From c944292047267dc67a70e66aefc6fa6bf518c216 Mon Sep 17 00:00:00 2001 From: "Kay Marquardt (Gnadelwartz)" Date: Sat, 13 Apr 2019 14:50:53 +0200 Subject: [PATCH] fix UTF-8 decode of message --- README.md | 4 +++- README.txt | 6 ++++-- bashbot.cron | 2 +- bashbot.rc | 2 +- bashbot.sh | 16 +++++++++++++--- commands.sh | 2 +- doc/1_firstbot.md | 2 +- doc/2_usage.md | 2 +- doc/3_advanced.md | 2 +- doc/4_expert.md | 11 ++++------- doc/5_practice.md | 2 +- notify | 2 +- question | 2 +- version | 2 +- 14 files changed, 34 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 641b9bb..4ff80dd 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,8 @@ A Telegram bot written in bash. Depends on [tmux](http://github.com/tmux/tmux). Uses [JSON.sh](http://github.com/dominictarr/JSON.sh). +For full UTF-8 support you need python on your system (optional). + Written by Drew (@topkecleon), Daniil Gentili (@danogentili), and Kay M (@gnadelwartz). Contributions by JuanPotato, BigNerd95, TiagoDanin, and iicc1. @@ -75,4 +77,4 @@ No - its not less (in)secure as any other Bot written in any other language. But If you feel that there's something missing or if you found a bug, feel free to submit a pull request! -#### $$VERSION$$ v0.6-dev3-0-gae157c4 +#### $$VERSION$$ v0.60-dev3-0-g2550aec diff --git a/README.txt b/README.txt index b9c77c4..fba9ed9 100644 --- a/README.txt +++ b/README.txt @@ -6,6 +6,8 @@ A Telegram bot written in bash. Depends on http://github.com/tmux/tmux[tmux]. Uses http://github.com/dominictarr/JSON.sh[JSON.sh]. +For full UTF-8 support you need python on your system (optional). + Written by Drew (@topkecleon), Daniil Gentili (@danogentili), and Kay M (@gnadelwartz). @@ -119,5 +121,5 @@ That's it! If you feel that there's something missing or if you found a bug, feel free to submit a pull request! -latexmath:[\[VERSION\]] v0.6-dev3-0-gae157c4 -++++++++++++++++++++++++++++++++++++++++++++ +latexmath:[\[VERSION\]] v0.60-dev3-0-g2550aec ++++++++++++++++++++++++++++++++++++++++++++++ diff --git a/bashbot.cron b/bashbot.cron index af518ff..a608261 100644 --- a/bashbot.cron +++ b/bashbot.cron @@ -7,7 +7,7 @@ # This file is public domain in the USA and all free countries. # Elsewhere, consider it to be WTFPLv2. (wtfpl.net/txt/copying) # -#### $$VERSION$$ v0.6-dev3-0-gae157c4 +#### $$VERSION$$ v0.60-dev3-0-g2550aec SHELL=/bin/sh diff --git a/bashbot.rc b/bashbot.rc index 9d58275..ebf016e 100755 --- a/bashbot.rc +++ b/bashbot.rc @@ -1,7 +1,7 @@ #!/bin/sh # description: Start or stop telegram-bash-bot # -#### $$VERSION$$ v0.6-dev3-0-gae157c4 +#### $$VERSION$$ v0.60-dev3-0-g2550aec # shellcheck disable=SC2009 # shellcheck disable=SC2181 diff --git a/bashbot.sh b/bashbot.sh index 0ff5c25..3aa3b41 100755 --- a/bashbot.sh +++ b/bashbot.sh @@ -10,7 +10,7 @@ # This file is public domain in the USA and all free countries. # Elsewhere, consider it to be WTFPLv2. (wtfpl.net/txt/copying) # -#### $$VERSION$$ v0.6-dev3-3-g03b6929 +#### $$VERSION$$ v0.60-dev3-0-g2550aec # # Exit Codes: # - 0 sucess (hopefully) @@ -149,6 +149,17 @@ urlencode() { echo "$*" | sed 's:%:%25:g;s: :%20:g;s:<:%3C:g;s:>:%3E:g;s:#:%23:g;s:{:%7B:g;s:}:%7D:g;s:|:%7C:g;s:\\:%5C:g;s:\^:%5E:g;s:~:%7E:g;s:\[:%5B:g;s:\]:%5D:g;s:`:%60:g;s:;:%3B:g;s:/:%2F:g;s:?:%3F:g;s^:^%3A^g;s:@:%40:g;s:=:%3D:g;s:&:%26:g;s:\$:%24:g;s:\!:%21:g;s:\*:%2A:g' } +# use phyton to decode UFT-8 JSON, provide error prone echo -e as fallback +if which python >/dev/null 2>&1 || which phyton2 >/dev/null 2>&1; then + jsondecode() { + printf '"%s\\n"' "${1//\"/\\\"}" | python -c 'import json, sys; sys.stdout.write(json.load(sys.stdin).encode("utf-8"))' + } +else + echo -e "${ORANGE}WARNING: Fallback to non UTF mode, install python to have full UTF-8 support!${NC}" + jsondecode() { + echo -e "$1" + } +fi send_message() { local text arg keyboard file lat long title address sent @@ -157,7 +168,6 @@ send_message() { text="$(echo "$2" | sed 's/ mykeyboardstartshere.*//g;s/ myfilelocationstartshere.*//g;s/ mylatstartshere.*//g;s/ mylongstartshere.*//g;s/ mytitlestartshere.*//g;s/ myaddressstartshere.*//g;s/ mykeyboardendshere.*//g')" arg="$3" [ "$arg" != "safe" ] && { - #text="$(echo "$text" | sed 's/ mynewlinestartshere /\r\n/g')" # hack for linebreaks in startproc scripts text="${text// mynewlinestartshere /$'\r\n'}" no_keyboard="$(echo "$2" | sed '/mykeyboardendshere/!d;s/.*mykeyboardendshere.*/mykeyboardendshere/')" @@ -505,7 +515,7 @@ process_client() { local TMP="${TMPDIR:-.}/$RANDOM$RANDOM-MESSAGE" echo "$UPDATE" >"$TMP" # Message - MESSAGE[0]="$(echo -e "$(sed -n -e '/\["result",'$PROCESS_NUMBER',"message","text"\]/ s/.*\][ \t]"\(.*\)"$/\1/p' <"$TMP")" | sed 's#\\/#/#g')" + MESSAGE[0]="$(jsondecode "$(sed -n -e '/\["result",'$PROCESS_NUMBER',"message","text"\]/ s/.*\][ \t]"\(.*\)"$/\1/p' <"$TMP")" | sed 's#\\/#/#g')" MESSAGE[ID]="$(sed -n -e '/\["result",'$PROCESS_NUMBER',"message","message_id"\]/ s/.*\][ \t]//p' <"$TMP" )" # Chat diff --git a/commands.sh b/commands.sh index e8aaa75..ba8b78b 100755 --- a/commands.sh +++ b/commands.sh @@ -4,7 +4,7 @@ # This file is public domain in the USA and all free countries. # Elsewhere, consider it to be WTFPLv2. (wtfpl.net/txt/copying) # -#### $$VERSION$$ v0.6-dev3-2-g121a113 +#### $$VERSION$$ v0.60-dev3-0-g2550aec # # shellcheck disable=SC2154 # shellcheck disable=SC2034 diff --git a/doc/1_firstbot.md b/doc/1_firstbot.md index 2cf1885..2b6f6aa 100644 --- a/doc/1_firstbot.md +++ b/doc/1_firstbot.md @@ -70,5 +70,5 @@ git clone --recursive https://github.com/topkecleon/telegram-bot-bash ``` 3. Change to directory ```telegram-bot.bash```, run ```./bashbot.sh init``` and follow the instructions. At this stage you are asked for your Bots token given by botfather. -#### $$VERSION$$ v0.6-dev3-0-gae157c4 +#### $$VERSION$$ v0.60-dev3-0-g2550aec diff --git a/doc/2_usage.md b/doc/2_usage.md index 5b3790c..188090a 100644 --- a/doc/2_usage.md +++ b/doc/2_usage.md @@ -147,6 +147,6 @@ Allowed values: typing for text messages, upload_photo for photos, record_video send_action "${CHAT[ID]}" "action" ``` -#### $$VERSION$$ v0.6-dev3-0-gae157c4 +#### $$VERSION$$ v0.60-dev3-0-g2550aec diff --git a/doc/3_advanced.md b/doc/3_advanced.md index 9685984..357405f 100644 --- a/doc/3_advanced.md +++ b/doc/3_advanced.md @@ -153,5 +153,5 @@ To send stickers through an *inline query*: answer_inline_query "$iQUERY_ID" "cached_sticker" "identifier for the sticker" ``` -#### $$VERSION$$ v0.6-dev3-0-gae157c4 +#### $$VERSION$$ v0.60-dev3-0-g2550aec diff --git a/doc/4_expert.md b/doc/4_expert.md index f2dcb3a..16abd9f 100644 --- a/doc/4_expert.md +++ b/doc/4_expert.md @@ -30,21 +30,18 @@ export 'LANGUAGE=den_US.UTF-8' To display all availible locales on your system run ```locale -a | more```. [Gentoo Wiki](https://wiki.gentoo.org/wiki/UTF-8) -#### UTF-8 in Telegram +#### UTF-8 Support for Telegram JSON ```UTF-8``` is a variable length encoding of Unicode. UTF-8 is recommended as the default encoding in JSON, XML and HTML, also Telegram make use of it. The first 128 characters are regular ASCII, so it's a superset of and compatible with ASCII environments. The next 1,920 characters need two bytes for encoding and covers almost all ```Latin``` alphabets, also ```Greek```, ```Cyrillic```, ```Hebrew```, ```Arabic``` and more. See [Wikipedia](https://en.wikipedia.org/wiki/UTF-8) for more deatils. -Telegram send Messages with all characters not fitting in one byte (256 bit) escaped as sequences of ```\uxxxx``` to be regular one byte ASCII (incl. iso-xxx-x), e.g. Emoticons and Arabic characters. -E.g. the Emoticons ``` 😁 😘 ❤️ 😊 👍 ``` are encoded as: +Telegram send JSON messages with all characters not fitting in one byte (256 bit) escaped as sequences of ```\uxxxx``` to be regular one byte ASCII (incl. iso-xxx-x), e.g. Emoticons and Arabic characters, e.g. the Emoticons ``` 😁 😘 ❤️ 😊 👍 ``` are encoded as: ``` \uD83D\uDE01 \uD83D\uDE18 \u2764\uFE0F \uD83D\uDE0A \uD83D\uDC4D ``` - -'\uXXXX' and '\UXXXXXXXX' escaped endocings are supported by zsh, bash, ksh93, mksh and FreeBSD sh, GNU 'printf' and GNU 'echo -e', see [this Stackexchange Answer](https://unix.stackexchange.com/questions/252286/how-to-convert-an-emoticon-specified-by-a-uxxxxx-code-to-utf-8/252295#252295) for more information. - +In theorie you can decode these characters with ```echo -e``` or ```printf '%s\\n'``` in bash, but this works only for single byte characters! To support also multibyte characters in JSON strings you need a working python on your system. If no python is detected on your system, bashbot falls back to echo -e. See [longstanding issue #50](https://github.com/topkecleon/telegram-bot-bash/issues/50) ### Run as other user or system service @@ -102,5 +99,5 @@ An example crontab is provided in ```bashbot.cron```. - if you run bashbot as an other user or a system service edit ```bashbot.cron``` to fit your needs and replace username```nobody``` with the username you want to run bashbot. copy the modified file to ```/etc/cron.d/bashbot``` -#### $$VERSION$$ v0.6-dev3-0-gae157c4 +#### $$VERSION$$ v0.60-dev3-0-g2550aec diff --git a/doc/5_practice.md b/doc/5_practice.md index fe8b587..b153323 100644 --- a/doc/5_practice.md +++ b/doc/5_practice.md @@ -111,5 +111,5 @@ In bashbot.sh line 490: ``` As you can see there are only two warnings in bashbots scripts. The first is a hint you may use shell substitions instead of sed, but this is only possible for simple cases. The second warning is about an unused variable, this is true because in our examples CONTACT is not used but assigned in case you want to use it :-) -#### $$VERSION$$ v0.6-dev3-0-gae157c4 +#### $$VERSION$$ v0.60-dev3-0-g2550aec diff --git a/notify b/notify index 9cfbfbb..16472d8 100755 --- a/notify +++ b/notify @@ -2,7 +2,7 @@ # This file is public domain in the USA and all free countries. # Elsewhere, consider it to be WTFPLv2. (wtfpl.net/txt/copying) -#### $$VERSION$$ v0.6-dev3-0-gae157c4 +#### $$VERSION$$ v0.60-dev3-0-g2550aec # adjust your language setting here # https://github.com/topkecleon/telegram-bot-bash#setting-up-your-environment diff --git a/question b/question index ed413ad..a886f0c 100755 --- a/question +++ b/question @@ -3,7 +3,7 @@ # This file is public domain in the USA and all free countries. # Elsewhere, consider it to be WTFPLv2. (wtfpl.net/txt/copying) -#### $$VERSION$$ v0.6-dev3-0-gae157c4 +#### $$VERSION$$ v0.60-dev3-0-g2550aec # adjust your language setting here # https://github.com/topkecleon/telegram-bot-bash#setting-up-your-environment diff --git a/version b/version index 421e430..57fbae3 100755 --- a/version +++ b/version @@ -1,6 +1,6 @@ #!/bin/bash # -#### $$VERSION$$ v0.6-dev3-0-gae157c4 +#### $$VERSION$$ v0.60-dev3-0-g2550aec # shellcheck disable=SC2016 # # Easy Versioning in git: