mirror of
https://github.com/octoleo/telegram-bot-bash.git
synced 2024-11-26 01:07:34 +00:00
fix UTF-8 decode of message
This commit is contained in:
parent
2550aecd3c
commit
c944292047
@ -4,6 +4,8 @@ A Telegram bot written in bash.
|
||||
Depends on [tmux](http://github.com/tmux/tmux).
|
||||
Uses [JSON.sh](http://github.com/dominictarr/JSON.sh).
|
||||
|
||||
For full UTF-8 support you need python on your system (optional).
|
||||
|
||||
Written by Drew (@topkecleon), Daniil Gentili (@danogentili), and Kay M (@gnadelwartz).
|
||||
|
||||
Contributions by JuanPotato, BigNerd95, TiagoDanin, and iicc1.
|
||||
@ -75,4 +77,4 @@ No - its not less (in)secure as any other Bot written in any other language. But
|
||||
|
||||
If you feel that there's something missing or if you found a bug, feel free to submit a pull request!
|
||||
|
||||
#### $$VERSION$$ v0.6-dev3-0-gae157c4
|
||||
#### $$VERSION$$ v0.60-dev3-0-g2550aec
|
||||
|
@ -6,6 +6,8 @@ A Telegram bot written in bash.
|
||||
Depends on http://github.com/tmux/tmux[tmux]. Uses
|
||||
http://github.com/dominictarr/JSON.sh[JSON.sh].
|
||||
|
||||
For full UTF-8 support you need python on your system (optional).
|
||||
|
||||
Written by Drew (@topkecleon), Daniil Gentili (@danogentili), and Kay M
|
||||
(@gnadelwartz).
|
||||
|
||||
@ -119,5 +121,5 @@ That's it!
|
||||
If you feel that there's something missing or if you found a bug, feel
|
||||
free to submit a pull request!
|
||||
|
||||
latexmath:[\[VERSION\]] v0.6-dev3-0-gae157c4
|
||||
++++++++++++++++++++++++++++++++++++++++++++
|
||||
latexmath:[\[VERSION\]] v0.60-dev3-0-g2550aec
|
||||
+++++++++++++++++++++++++++++++++++++++++++++
|
||||
|
@ -7,7 +7,7 @@
|
||||
# This file is public domain in the USA and all free countries.
|
||||
# Elsewhere, consider it to be WTFPLv2. (wtfpl.net/txt/copying)
|
||||
#
|
||||
#### $$VERSION$$ v0.6-dev3-0-gae157c4
|
||||
#### $$VERSION$$ v0.60-dev3-0-g2550aec
|
||||
|
||||
|
||||
SHELL=/bin/sh
|
||||
|
@ -1,7 +1,7 @@
|
||||
#!/bin/sh
|
||||
# description: Start or stop telegram-bash-bot
|
||||
#
|
||||
#### $$VERSION$$ v0.6-dev3-0-gae157c4
|
||||
#### $$VERSION$$ v0.60-dev3-0-g2550aec
|
||||
# shellcheck disable=SC2009
|
||||
# shellcheck disable=SC2181
|
||||
|
||||
|
16
bashbot.sh
16
bashbot.sh
@ -10,7 +10,7 @@
|
||||
# This file is public domain in the USA and all free countries.
|
||||
# Elsewhere, consider it to be WTFPLv2. (wtfpl.net/txt/copying)
|
||||
#
|
||||
#### $$VERSION$$ v0.6-dev3-3-g03b6929
|
||||
#### $$VERSION$$ v0.60-dev3-0-g2550aec
|
||||
#
|
||||
# Exit Codes:
|
||||
# - 0 sucess (hopefully)
|
||||
@ -149,6 +149,17 @@ urlencode() {
|
||||
echo "$*" | sed 's:%:%25:g;s: :%20:g;s:<:%3C:g;s:>:%3E:g;s:#:%23:g;s:{:%7B:g;s:}:%7D:g;s:|:%7C:g;s:\\:%5C:g;s:\^:%5E:g;s:~:%7E:g;s:\[:%5B:g;s:\]:%5D:g;s:`:%60:g;s:;:%3B:g;s:/:%2F:g;s:?:%3F:g;s^:^%3A^g;s:@:%40:g;s:=:%3D:g;s:&:%26:g;s:\$:%24:g;s:\!:%21:g;s:\*:%2A:g'
|
||||
}
|
||||
|
||||
# use phyton to decode UFT-8 JSON, provide error prone echo -e as fallback
|
||||
if which python >/dev/null 2>&1 || which phyton2 >/dev/null 2>&1; then
|
||||
jsondecode() {
|
||||
printf '"%s\\n"' "${1//\"/\\\"}" | python -c 'import json, sys; sys.stdout.write(json.load(sys.stdin).encode("utf-8"))'
|
||||
}
|
||||
else
|
||||
echo -e "${ORANGE}WARNING: Fallback to non UTF mode, install python to have full UTF-8 support!${NC}"
|
||||
jsondecode() {
|
||||
echo -e "$1"
|
||||
}
|
||||
fi
|
||||
|
||||
send_message() {
|
||||
local text arg keyboard file lat long title address sent
|
||||
@ -157,7 +168,6 @@ send_message() {
|
||||
text="$(echo "$2" | sed 's/ mykeyboardstartshere.*//g;s/ myfilelocationstartshere.*//g;s/ mylatstartshere.*//g;s/ mylongstartshere.*//g;s/ mytitlestartshere.*//g;s/ myaddressstartshere.*//g;s/ mykeyboardendshere.*//g')"
|
||||
arg="$3"
|
||||
[ "$arg" != "safe" ] && {
|
||||
#text="$(echo "$text" | sed 's/ mynewlinestartshere /\r\n/g')" # hack for linebreaks in startproc scripts
|
||||
text="${text// mynewlinestartshere /$'\r\n'}"
|
||||
no_keyboard="$(echo "$2" | sed '/mykeyboardendshere/!d;s/.*mykeyboardendshere.*/mykeyboardendshere/')"
|
||||
|
||||
@ -505,7 +515,7 @@ process_client() {
|
||||
local TMP="${TMPDIR:-.}/$RANDOM$RANDOM-MESSAGE"
|
||||
echo "$UPDATE" >"$TMP"
|
||||
# Message
|
||||
MESSAGE[0]="$(echo -e "$(sed -n -e '/\["result",'$PROCESS_NUMBER',"message","text"\]/ s/.*\][ \t]"\(.*\)"$/\1/p' <"$TMP")" | sed 's#\\/#/#g')"
|
||||
MESSAGE[0]="$(jsondecode "$(sed -n -e '/\["result",'$PROCESS_NUMBER',"message","text"\]/ s/.*\][ \t]"\(.*\)"$/\1/p' <"$TMP")" | sed 's#\\/#/#g')"
|
||||
MESSAGE[ID]="$(sed -n -e '/\["result",'$PROCESS_NUMBER',"message","message_id"\]/ s/.*\][ \t]//p' <"$TMP" )"
|
||||
|
||||
# Chat
|
||||
|
@ -4,7 +4,7 @@
|
||||
# This file is public domain in the USA and all free countries.
|
||||
# Elsewhere, consider it to be WTFPLv2. (wtfpl.net/txt/copying)
|
||||
#
|
||||
#### $$VERSION$$ v0.6-dev3-2-g121a113
|
||||
#### $$VERSION$$ v0.60-dev3-0-g2550aec
|
||||
#
|
||||
# shellcheck disable=SC2154
|
||||
# shellcheck disable=SC2034
|
||||
|
@ -70,5 +70,5 @@ git clone --recursive https://github.com/topkecleon/telegram-bot-bash
|
||||
```
|
||||
3. Change to directory ```telegram-bot.bash```, run ```./bashbot.sh init``` and follow the instructions. At this stage you are asked for your Bots token given by botfather.
|
||||
|
||||
#### $$VERSION$$ v0.6-dev3-0-gae157c4
|
||||
#### $$VERSION$$ v0.60-dev3-0-g2550aec
|
||||
|
||||
|
@ -147,6 +147,6 @@ Allowed values: typing for text messages, upload_photo for photos, record_video
|
||||
send_action "${CHAT[ID]}" "action"
|
||||
```
|
||||
|
||||
#### $$VERSION$$ v0.6-dev3-0-gae157c4
|
||||
#### $$VERSION$$ v0.60-dev3-0-g2550aec
|
||||
|
||||
|
||||
|
@ -153,5 +153,5 @@ To send stickers through an *inline query*:
|
||||
answer_inline_query "$iQUERY_ID" "cached_sticker" "identifier for the sticker"
|
||||
```
|
||||
|
||||
#### $$VERSION$$ v0.6-dev3-0-gae157c4
|
||||
#### $$VERSION$$ v0.60-dev3-0-g2550aec
|
||||
|
||||
|
@ -30,21 +30,18 @@ export 'LANGUAGE=den_US.UTF-8'
|
||||
|
||||
To display all availible locales on your system run ```locale -a | more```. [Gentoo Wiki](https://wiki.gentoo.org/wiki/UTF-8)
|
||||
|
||||
#### UTF-8 in Telegram
|
||||
#### UTF-8 Support for Telegram JSON
|
||||
```UTF-8``` is a variable length encoding of Unicode. UTF-8 is recommended as the default encoding in JSON, XML and HTML, also Telegram make use of it.
|
||||
|
||||
The first 128 characters are regular ASCII, so it's a superset of and compatible with ASCII environments. The next 1,920 characters need
|
||||
two bytes for encoding and covers almost all ```Latin``` alphabets, also ```Greek```, ```Cyrillic```,
|
||||
```Hebrew```, ```Arabic``` and more. See [Wikipedia](https://en.wikipedia.org/wiki/UTF-8) for more deatils.
|
||||
|
||||
Telegram send Messages with all characters not fitting in one byte (256 bit) escaped as sequences of ```\uxxxx``` to be regular one byte ASCII (incl. iso-xxx-x), e.g. Emoticons and Arabic characters.
|
||||
E.g. the Emoticons ``` 😁 😘 ❤️ 😊 👍 ``` are encoded as:
|
||||
Telegram send JSON messages with all characters not fitting in one byte (256 bit) escaped as sequences of ```\uxxxx``` to be regular one byte ASCII (incl. iso-xxx-x), e.g. Emoticons and Arabic characters, e.g. the Emoticons ``` 😁 😘 ❤️ 😊 👍 ``` are encoded as:
|
||||
```
|
||||
\uD83D\uDE01 \uD83D\uDE18 \u2764\uFE0F \uD83D\uDE0A \uD83D\uDC4D
|
||||
```
|
||||
|
||||
'\uXXXX' and '\UXXXXXXXX' escaped endocings are supported by zsh, bash, ksh93, mksh and FreeBSD sh, GNU 'printf' and GNU 'echo -e', see [this Stackexchange Answer](https://unix.stackexchange.com/questions/252286/how-to-convert-an-emoticon-specified-by-a-uxxxxx-code-to-utf-8/252295#252295) for more information.
|
||||
|
||||
In theorie you can decode these characters with ```echo -e``` or ```printf '%s\\n'``` in bash, but this works only for single byte characters! To support also multibyte characters in JSON strings you need a working python on your system. If no python is detected on your system, bashbot falls back to echo -e. See [longstanding issue #50](https://github.com/topkecleon/telegram-bot-bash/issues/50)
|
||||
|
||||
|
||||
### Run as other user or system service
|
||||
@ -102,5 +99,5 @@ An example crontab is provided in ```bashbot.cron```.
|
||||
- if you run bashbot as an other user or a system service edit ```bashbot.cron``` to fit your needs and replace username```nobody``` with the username you want to run bashbot. copy the modified file to ```/etc/cron.d/bashbot```
|
||||
|
||||
|
||||
#### $$VERSION$$ v0.6-dev3-0-gae157c4
|
||||
#### $$VERSION$$ v0.60-dev3-0-g2550aec
|
||||
|
||||
|
@ -111,5 +111,5 @@ In bashbot.sh line 490:
|
||||
```
|
||||
As you can see there are only two warnings in bashbots scripts. The first is a hint you may use shell substitions instead of sed, but this is only possible for simple cases. The second warning is about an unused variable, this is true because in our examples CONTACT is not used but assigned in case you want to use it :-)
|
||||
|
||||
#### $$VERSION$$ v0.6-dev3-0-gae157c4
|
||||
#### $$VERSION$$ v0.60-dev3-0-g2550aec
|
||||
|
||||
|
2
notify
2
notify
@ -2,7 +2,7 @@
|
||||
|
||||
# This file is public domain in the USA and all free countries.
|
||||
# Elsewhere, consider it to be WTFPLv2. (wtfpl.net/txt/copying)
|
||||
#### $$VERSION$$ v0.6-dev3-0-gae157c4
|
||||
#### $$VERSION$$ v0.60-dev3-0-g2550aec
|
||||
|
||||
# adjust your language setting here
|
||||
# https://github.com/topkecleon/telegram-bot-bash#setting-up-your-environment
|
||||
|
2
question
2
question
@ -3,7 +3,7 @@
|
||||
# This file is public domain in the USA and all free countries.
|
||||
# Elsewhere, consider it to be WTFPLv2. (wtfpl.net/txt/copying)
|
||||
|
||||
#### $$VERSION$$ v0.6-dev3-0-gae157c4
|
||||
#### $$VERSION$$ v0.60-dev3-0-g2550aec
|
||||
|
||||
# adjust your language setting here
|
||||
# https://github.com/topkecleon/telegram-bot-bash#setting-up-your-environment
|
||||
|
Loading…
Reference in New Issue
Block a user