update doc, fix minor problem with CHAT[TYPE]

This commit is contained in:
Kay Marquardt (Gnadelwartz) 2019-04-14 17:43:12 +02:00
parent 5787908d72
commit f5538eff3a
2 changed files with 35 additions and 35 deletions

View File

@ -10,7 +10,7 @@
# This file is public domain in the USA and all free countries.
# Elsewhere, consider it to be WTFPLv2. (wtfpl.net/txt/copying)
#
#### $$VERSION$$ v0.60-dev3-5-gaa1404d
#### $$VERSION$$ v0.60-dev3-6-g5787908
#
# Exit Codes:
# - 0 sucess (hopefully)
@ -145,28 +145,6 @@ OFFSET=0
declare -A USER MESSAGE URLS CONTACT LOCATION CHAT FORWARD REPLYTO
# use phyton JSON to decode JSON UFT-8, provide bash implementaion as fallback
if which python >/dev/null 2>&1 || which phyton2 >/dev/null 2>&1; then
JsonDecode() {
printf '"%s\\n"' "${1//\"/\\\"}" | python -c 'import json, sys; sys.stdout.write(json.load(sys.stdin).encode("utf-8"))'
}
else
# pure bash implementaion, done by KayM (@gnadelwartz)
# see https://stackoverflow.com/a/55666449/9381171
JsonDecode() {
local out="$1"
local remain=""
local regexp='(.*)\\ud([0-9a-fA-F]{3})\\ud([0-9a-fA-F]{3})(.*)'
while [[ "${out}" =~ $regexp ]] ; do
local W1="$(( ( 0xd${BASH_REMATCH[2]} & 0x3ff) <<10 ))"
local W2="$(( 0xd${BASH_REMATCH[3]} & 0x3ff ))"
U="$(( (${W1} | ${W2}) + 0x10000 ))"
remain="$(printf '\\U%8.8x' "${U}")${BASH_REMATCH[4]}${remain}"
out="${BASH_REMATCH[1]}"
done
echo -e "${out}${remain}"
}
fi
send_message() {
local text arg keyboard file lat long title address sent
@ -538,7 +516,7 @@ process_client() {
CHAT[USERNAME]="$(JsonDecode "$(JsonGetString '"result",'$PROCESS_NUMBER',"message","chat","username"' <"$TMP")")"
CHAT[TITLE]="$(JsonDecode "$(JsonGetString '"result",'$PROCESS_NUMBER',"message","chat","title"' <"$TMP")")"
CHAT[TYPE]="$(JsonDecode "$(JsonGetString '"result",'$PROCESS_NUMBER',"message","chat","type"' <"$TMP")")"
CHAT[ALL_MEMBERS_ARE_ADMINISTRATORS]="$(JsonDecode "$(JsonGetString '/\["result",'$PROCESS_NUMBER',"message","chat","all_members_are_administrators"' <"$TMP")")"
CHAT[ALL_MEMBERS_ARE_ADMINISTRATORS]="$(JsonDecode "$(JsonGetString '"result",'$PROCESS_NUMBER',"message","chat","all_members_are_administrators"' <"$TMP")")"
# User
USER[ID]="$(JsonGetValue '"result",'$PROCESS_NUMBER',"message","from","id"' <"$TMP" )"
@ -603,6 +581,29 @@ process_client() {
# To get user count execute bash bashbot.sh count
}
# use phyton JSON to decode JSON UFT-8, provide bash implementaion as fallback
if which python >/dev/null 2>&1 || which phyton2 >/dev/null 2>&1; then
JsonDecode() {
printf '"%s\\n"' "${1//\"/\\\"}" | python -c 'import json, sys; sys.stdout.write(json.load(sys.stdin).encode("utf-8"))'
}
else
# pure bash implementaion, done by KayM (@gnadelwartz)
# see https://stackoverflow.com/a/55666449/9381171
JsonDecode() {
local out="$1"
local remain=""
local regexp='(.*)\\ud([0-9a-fA-F]{3})\\ud([0-9a-fA-F]{3})(.*)'
while [[ "${out}" =~ $regexp ]] ; do
local W1="$(( ( 0xd${BASH_REMATCH[2]} & 0x3ff) <<10 ))"
local W2="$(( 0xd${BASH_REMATCH[3]} & 0x3ff ))"
U="$(( ( W1 | W2 ) + 0x10000 ))"
remain="$(printf '\\U%8.8x' "${U}")${BASH_REMATCH[4]}${remain}"
out="${BASH_REMATCH[1]}"
done
echo -e "${out}${remain}"
}
fi
# get bot name
ME="$(curl -s "$ME_URL" | ./JSON.sh/JSON.sh -s | JsonGetString '"result","username"')"

View File

@ -1,6 +1,12 @@
## Expert Use
### Handling UTF-8 character sets
UTF-8 is a variable length encoding of Unicode. UTF-8 is recommended as the default encoding in JSON, XML and HTML, also Telegram make use of it.
The first 128 characters are regular ASCII, so it's a superset of and compatible with ASCII environments. The next 1,920 characters need
two bytes for encoding and covers almost all ```Latin``` alphabets, also ```Greek```, ```Cyrillic```,
```Hebrew```, ```Arabic``` and more. See [Wikipedia](https://en.wikipedia.org/wiki/UTF-8) for more deatils.
#### Setting up your Environment
In general ```bash``` and ```GNU``` utitities are UTF-8 aware, but you have to setup your environment
and your scripts accordingly:
@ -31,18 +37,11 @@ export 'LANGUAGE=den_US.UTF-8'
To display all availible locales on your system run ```locale -a | more```. [Gentoo Wiki](https://wiki.gentoo.org/wiki/UTF-8)
#### UTF-8 Support
```UTF-8``` is a variable length encoding of Unicode. UTF-8 is recommended as the default encoding in JSON, XML and HTML, also Telegram make use of it.
Telegram send JSON messages with all characters not fitting in one byte (<256 bit) escaped as sequences of ```\uxxxx``` to be regular one byte ASCII Multibyte UTF-8 characters, e.g. Emoticons and Arabic characters, are send in UTF-16 notation. e.g. the Emoticons ``` 😁 😘 😊 👍 ``` are encoded as: ``` \uD83D\uDE01 \uD83D\uDE18 \u2764\uFE0F \uD83D\uDE0A \uD83D\uDC4D ```
The first 128 characters are regular ASCII, so it's a superset of and compatible with ASCII environments. The next 1,920 characters need
two bytes for encoding and covers almost all ```Latin``` alphabets, also ```Greek```, ```Cyrillic```,
```Hebrew```, ```Arabic``` and more. See [Wikipedia](https://en.wikipedia.org/wiki/UTF-8) for more deatils.
**This mixed JSON encoding can't not decoded from ```echo -e``` or ```printf '%s\\n'```, this works only for single byte characters!**
Telegram send JSON messages with all characters not fitting in one byte (256 bit) escaped as sequences of ```\uxxxx``` to be regular one byte ASCII (incl. iso-xxx-x), e.g. Emoticons and Arabic characters, e.g. the Emoticons ``` 😁 😘 ❤️ 😊 👍 ``` are encoded as:
```
\uD83D\uDE01 \uD83D\uDE18 \u2764\uFE0F \uD83D\uDE0A \uD83D\uDC4D
```
In theorie you can decode these characters with ```echo -e``` or ```printf '%s\\n'``` in bash, but this works only for single byte characters! To support also multibyte characters in JSON strings you need a working python on your system.
#### If no python is detected on your system, bashbot falls back to echo -e. See [longstanding issue #50](https://github.com/topkecleon/telegram-bot-bash/issues/50)
To to fully support decoding of multibyte characters you need a working python2 instllation on your system. If no python is detected bashbot falls back to a **slow, pure bash solution which may not always work 100% correct**.
### Run as other user or system service
@ -100,5 +99,5 @@ An example crontab is provided in ```bashbot.cron```.
- if you run bashbot as an other user or a system service edit ```bashbot.cron``` to fit your needs and replace username```nobody``` with the username you want to run bashbot. copy the modified file to ```/etc/cron.d/bashbot```
#### $$VERSION$$ v0.60-dev3-2-g9eddea5
#### $$VERSION$$ v0.60-dev3-6-g5787908