mirror of
https://github.com/octoleo/telegram-bot-bash.git
synced 2024-12-28 12:50:44 +00:00
update doc, fix minor problem with CHAT[TYPE]
This commit is contained in:
parent
5787908d72
commit
f5538eff3a
49
bashbot.sh
49
bashbot.sh
@ -10,7 +10,7 @@
|
||||
# This file is public domain in the USA and all free countries.
|
||||
# Elsewhere, consider it to be WTFPLv2. (wtfpl.net/txt/copying)
|
||||
#
|
||||
#### $$VERSION$$ v0.60-dev3-5-gaa1404d
|
||||
#### $$VERSION$$ v0.60-dev3-6-g5787908
|
||||
#
|
||||
# Exit Codes:
|
||||
# - 0 sucess (hopefully)
|
||||
@ -145,28 +145,6 @@ OFFSET=0
|
||||
declare -A USER MESSAGE URLS CONTACT LOCATION CHAT FORWARD REPLYTO
|
||||
|
||||
|
||||
# use phyton JSON to decode JSON UFT-8, provide bash implementaion as fallback
|
||||
if which python >/dev/null 2>&1 || which phyton2 >/dev/null 2>&1; then
|
||||
JsonDecode() {
|
||||
printf '"%s\\n"' "${1//\"/\\\"}" | python -c 'import json, sys; sys.stdout.write(json.load(sys.stdin).encode("utf-8"))'
|
||||
}
|
||||
else
|
||||
# pure bash implementaion, done by KayM (@gnadelwartz)
|
||||
# see https://stackoverflow.com/a/55666449/9381171
|
||||
JsonDecode() {
|
||||
local out="$1"
|
||||
local remain=""
|
||||
local regexp='(.*)\\ud([0-9a-fA-F]{3})\\ud([0-9a-fA-F]{3})(.*)'
|
||||
while [[ "${out}" =~ $regexp ]] ; do
|
||||
local W1="$(( ( 0xd${BASH_REMATCH[2]} & 0x3ff) <<10 ))"
|
||||
local W2="$(( 0xd${BASH_REMATCH[3]} & 0x3ff ))"
|
||||
U="$(( (${W1} | ${W2}) + 0x10000 ))"
|
||||
remain="$(printf '\\U%8.8x' "${U}")${BASH_REMATCH[4]}${remain}"
|
||||
out="${BASH_REMATCH[1]}"
|
||||
done
|
||||
echo -e "${out}${remain}"
|
||||
}
|
||||
fi
|
||||
|
||||
send_message() {
|
||||
local text arg keyboard file lat long title address sent
|
||||
@ -538,7 +516,7 @@ process_client() {
|
||||
CHAT[USERNAME]="$(JsonDecode "$(JsonGetString '"result",'$PROCESS_NUMBER',"message","chat","username"' <"$TMP")")"
|
||||
CHAT[TITLE]="$(JsonDecode "$(JsonGetString '"result",'$PROCESS_NUMBER',"message","chat","title"' <"$TMP")")"
|
||||
CHAT[TYPE]="$(JsonDecode "$(JsonGetString '"result",'$PROCESS_NUMBER',"message","chat","type"' <"$TMP")")"
|
||||
CHAT[ALL_MEMBERS_ARE_ADMINISTRATORS]="$(JsonDecode "$(JsonGetString '/\["result",'$PROCESS_NUMBER',"message","chat","all_members_are_administrators"' <"$TMP")")"
|
||||
CHAT[ALL_MEMBERS_ARE_ADMINISTRATORS]="$(JsonDecode "$(JsonGetString '"result",'$PROCESS_NUMBER',"message","chat","all_members_are_administrators"' <"$TMP")")"
|
||||
|
||||
# User
|
||||
USER[ID]="$(JsonGetValue '"result",'$PROCESS_NUMBER',"message","from","id"' <"$TMP" )"
|
||||
@ -603,6 +581,29 @@ process_client() {
|
||||
# To get user count execute bash bashbot.sh count
|
||||
}
|
||||
|
||||
# use phyton JSON to decode JSON UFT-8, provide bash implementaion as fallback
|
||||
if which python >/dev/null 2>&1 || which phyton2 >/dev/null 2>&1; then
|
||||
JsonDecode() {
|
||||
printf '"%s\\n"' "${1//\"/\\\"}" | python -c 'import json, sys; sys.stdout.write(json.load(sys.stdin).encode("utf-8"))'
|
||||
}
|
||||
else
|
||||
# pure bash implementaion, done by KayM (@gnadelwartz)
|
||||
# see https://stackoverflow.com/a/55666449/9381171
|
||||
JsonDecode() {
|
||||
local out="$1"
|
||||
local remain=""
|
||||
local regexp='(.*)\\ud([0-9a-fA-F]{3})\\ud([0-9a-fA-F]{3})(.*)'
|
||||
while [[ "${out}" =~ $regexp ]] ; do
|
||||
local W1="$(( ( 0xd${BASH_REMATCH[2]} & 0x3ff) <<10 ))"
|
||||
local W2="$(( 0xd${BASH_REMATCH[3]} & 0x3ff ))"
|
||||
U="$(( ( W1 | W2 ) + 0x10000 ))"
|
||||
remain="$(printf '\\U%8.8x' "${U}")${BASH_REMATCH[4]}${remain}"
|
||||
out="${BASH_REMATCH[1]}"
|
||||
done
|
||||
echo -e "${out}${remain}"
|
||||
}
|
||||
fi
|
||||
|
||||
# get bot name
|
||||
ME="$(curl -s "$ME_URL" | ./JSON.sh/JSON.sh -s | JsonGetString '"result","username"')"
|
||||
|
||||
|
@ -1,6 +1,12 @@
|
||||
## Expert Use
|
||||
|
||||
### Handling UTF-8 character sets
|
||||
UTF-8 is a variable length encoding of Unicode. UTF-8 is recommended as the default encoding in JSON, XML and HTML, also Telegram make use of it.
|
||||
|
||||
The first 128 characters are regular ASCII, so it's a superset of and compatible with ASCII environments. The next 1,920 characters need
|
||||
two bytes for encoding and covers almost all ```Latin``` alphabets, also ```Greek```, ```Cyrillic```,
|
||||
```Hebrew```, ```Arabic``` and more. See [Wikipedia](https://en.wikipedia.org/wiki/UTF-8) for more deatils.
|
||||
|
||||
#### Setting up your Environment
|
||||
In general ```bash``` and ```GNU``` utitities are UTF-8 aware, but you have to setup your environment
|
||||
and your scripts accordingly:
|
||||
@ -31,18 +37,11 @@ export 'LANGUAGE=den_US.UTF-8'
|
||||
To display all availible locales on your system run ```locale -a | more```. [Gentoo Wiki](https://wiki.gentoo.org/wiki/UTF-8)
|
||||
|
||||
#### UTF-8 Support
|
||||
```UTF-8``` is a variable length encoding of Unicode. UTF-8 is recommended as the default encoding in JSON, XML and HTML, also Telegram make use of it.
|
||||
Telegram send JSON messages with all characters not fitting in one byte (<256 bit) escaped as sequences of ```\uxxxx``` to be regular one byte ASCII Multibyte UTF-8 characters, e.g. Emoticons and Arabic characters, are send in UTF-16 notation. e.g. the Emoticons ``` 😁 😘 ❤️ 😊 👍 ``` are encoded as: ``` \uD83D\uDE01 \uD83D\uDE18 \u2764\uFE0F \uD83D\uDE0A \uD83D\uDC4D ```
|
||||
|
||||
The first 128 characters are regular ASCII, so it's a superset of and compatible with ASCII environments. The next 1,920 characters need
|
||||
two bytes for encoding and covers almost all ```Latin``` alphabets, also ```Greek```, ```Cyrillic```,
|
||||
```Hebrew```, ```Arabic``` and more. See [Wikipedia](https://en.wikipedia.org/wiki/UTF-8) for more deatils.
|
||||
**This mixed JSON encoding can't not decoded from ```echo -e``` or ```printf '%s\\n'```, this works only for single byte characters!**
|
||||
|
||||
Telegram send JSON messages with all characters not fitting in one byte (256 bit) escaped as sequences of ```\uxxxx``` to be regular one byte ASCII (incl. iso-xxx-x), e.g. Emoticons and Arabic characters, e.g. the Emoticons ``` 😁 😘 ❤️ 😊 👍 ``` are encoded as:
|
||||
```
|
||||
\uD83D\uDE01 \uD83D\uDE18 \u2764\uFE0F \uD83D\uDE0A \uD83D\uDC4D
|
||||
```
|
||||
In theorie you can decode these characters with ```echo -e``` or ```printf '%s\\n'``` in bash, but this works only for single byte characters! To support also multibyte characters in JSON strings you need a working python on your system.
|
||||
#### If no python is detected on your system, bashbot falls back to echo -e. See [longstanding issue #50](https://github.com/topkecleon/telegram-bot-bash/issues/50)
|
||||
To to fully support decoding of multibyte characters you need a working python2 instllation on your system. If no python is detected bashbot falls back to a **slow, pure bash solution which may not always work 100% correct**.
|
||||
|
||||
|
||||
### Run as other user or system service
|
||||
@ -100,5 +99,5 @@ An example crontab is provided in ```bashbot.cron```.
|
||||
- if you run bashbot as an other user or a system service edit ```bashbot.cron``` to fit your needs and replace username```nobody``` with the username you want to run bashbot. copy the modified file to ```/etc/cron.d/bashbot```
|
||||
|
||||
|
||||
#### $$VERSION$$ v0.60-dev3-2-g9eddea5
|
||||
#### $$VERSION$$ v0.60-dev3-6-g5787908
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user