feature recoll in place of swish-e to search a tomb's contents

Swish is outdated and recoll works much better as a frontend to
xapian, so we adopt it to provide local search over file contents in a
tomb. This update also makes plocate optional to search over file
names, not mandatory. Recoll works also in GUI using its -c
commandline option followed by the indexed tomb's path.
This commit is contained in:
Jaromil 2024-05-12 23:29:14 +02:00
parent b715917b31
commit 735d540fe7
13 changed files with 61 additions and 121 deletions

View File

@ -72,8 +72,8 @@ executable | function
steghide | bury and exhume keys inside images
resizefs | extend the size of existing tomb volumes
qrencode | engrave keys into printable qrcode sheets
mlocate | fast search of file names inside tombs
swish++ | fast search of file contents inside tombs
plocate | fast search of file names inside tombs
recoll | fast search of file contents inside tombs
unoconv | fast search of contents in PDF and DOC files
lesspipe | fast search of contents in compressed archives
haveged | fast entropy generation for key forging

View File

@ -122,19 +122,21 @@ internally to enumerate processes running in one or all tombs.
.IP "index"
Creates or updates the search indexes of all tombs currently open:
enables use of the \fIsearch\fR command using simple word patterns on
file names. Indexes are created using mlocate/plocate's updatedb(8) and
swish-e(1) if they are found on the system. Indexes allow one to search
file names. Indexes are created using plocate's updatedb(8) and
recoll(1) if they are found on the system. Indexes allow one to search
very fast for filenames and contents inside a tomb, they are stored
inside it and are not accessible if the Tomb is closed. To avoid
indexing a specific tomb simply touch a \fI.noindex\fR file in it.
Useful tools to have: poppler-utils, aspell, xdg-utils, plocate.
.B
.IP "search"
Takes any string as argument and searches for them through all tombs
currently open and previously indexed using the \fIindex\fR command.
The search matches filenames if mlocate/plocate is installed and then also
file contents if swish++ is present on the system, results are listed
on the console.
The search matches filenames if plocate is installed and then also
file contents if recoll is installed, all results are listed on the
console.
One can also run recoll's GUI using \fIrecoll -c /media/tomb\fR
.B
.IP "close"

View File

@ -22,8 +22,8 @@ RUN apt-get update -y && \
pinentry-curses \
file xxd \
steghide \
mlocate \
swish-e
plocate \
recoll
# Build and install Tomb from remote repo
RUN curl https://files.dyne.org/tomb/releases/Tomb-$TOMB_VERSION.tar.gz -o /tmp/Tomb-$TOMB_VERSION.tar.gz && \

View File

@ -1278,8 +1278,8 @@ msgid "Indexing ::1 tomb name:: contents..."
msgstr "Indexieren der Inhalte von ::1 tomb name:: läuft..."
#: tomb:Index and search:index_tombs:2859
msgid "Generating a new swish-e configuration file: ::1 swish conf::"
msgstr "Kreiere eine neue 'Swish-e'-Konfigurationsdatei: ::1 swish conf::"
msgid "Generating a new search index configuration file: ::1 swish conf::"
msgstr "Kreiere eine neue 'search index'-Konfigurationsdatei: ::1 swish conf::"
#: tomb:Index and search:index_tombs:2929
msgid "Search index updated."

View File

@ -1259,8 +1259,8 @@ msgid "Indexing ::1 tomb name:: contents..."
msgstr "Indexando el contenido de ::1 tumba::..."
#: tomb:Index and search:index_tombs:2859
msgid "Generating a new swish-e configuration file: ::1 swish conf::"
msgstr "Generando un nuevo fichero de configuración de swish-e: ::1 conf::"
msgid "Generating a new search index configuration file: ::1 swish conf::"
msgstr "Generando un nuevo fichero de configuración de search index: ::1 conf::"
#: tomb:Index and search:index_tombs:2929
msgid "Search index updated."

View File

@ -1317,9 +1317,9 @@ msgid "Indexing ::1 tomb name:: contents..."
msgstr "Index des contenus de ::1 tomb name:: en cours..."
#: tomb:Index and search:index_tombs:2859
msgid "Generating a new swish-e configuration file: ::1 swish conf::"
msgid "Generating a new search index configuration file: ::1 swish conf::"
msgstr ""
"Génération d'un nouveau fichier de configuration pour 'swish-e' : ::1 swish "
"Génération d'un nouveau fichier de configuration pour 'search index' : ::1 swish "
"conf::"
#: tomb:Index and search:index_tombs:2929

View File

@ -1259,8 +1259,8 @@ msgid "Indexing ::1 tomb name:: contents..."
msgstr "Indicizzazione ::1 tomb name:: contenuti..."
#: tomb:Index and search:index_tombs:2859
msgid "Generating a new swish-e configuration file: ::1 swish conf::"
msgstr "Genero un nuovo file di configurazione swish-e: ::1 swish conf::"
msgid "Generating a new search index configuration file: ::1 swish conf::"
msgstr "Genero un nuovo file di configurazione search index: ::1 swish conf::"
#: tomb:Index and search:index_tombs:2929
msgid "Search index updated."

View File

@ -1221,8 +1221,8 @@ msgid "Indexing ::1 tomb name:: contents..."
msgstr "Indexando ::1 nome da tumba:: conteúdo..."
#: tomb:Index and search:index_tombs:2859
msgid "Generating a new swish-e configuration file: ::1 swish conf::"
msgstr "Gerando um novo arquivo de configuração swish-e: ::1 swish conf::"
msgid "Generating a new search index configuration file: ::1 swish conf::"
msgstr "Gerando um novo arquivo de configuração search index: ::1 swish conf::"
#: tomb:Index and search:index_tombs:2929
msgid "Search index updated."

View File

@ -1238,8 +1238,8 @@ msgid "Indexing ::1 tomb name:: contents..."
msgstr "Индексирую содержимое ::1 tomb name::..."
#: tomb:Index and search:index_tombs:2859
msgid "Generating a new swish-e configuration file: ::1 swish conf::"
msgstr "Генерирую новый файл конфигурации swish-e: ::1 swish conf::"
msgid "Generating a new search index configuration file: ::1 swish conf::"
msgstr "Генерирую новый файл конфигурации search index: ::1 swish conf::"
#: tomb:Index and search:index_tombs:2929
msgid "Search index updated."

View File

@ -1231,8 +1231,8 @@ msgid "Indexing ::1 tomb name:: contents..."
msgstr "Indexerar ::1 tomb name:: innehåll..."
#: tomb:Index and search:index_tombs:2859
msgid "Generating a new swish-e configuration file: ::1 swish conf::"
msgstr "Genererar en ny swish-e konfigutŕationsfil: ::1 swish conf::"
msgid "Generating a new search index configuration file: ::1 swish conf::"
msgstr "Genererar en ny search index konfigutŕationsfil: ::1 swish conf::"
#: tomb:Index and search:index_tombs:2929
msgid "Search index updated."

View File

@ -1122,7 +1122,7 @@ msgid "Indexing ::1 tomb name:: contents..."
msgstr ""
#: tomb:Index and search:index_tombs:2859
msgid "Generating a new swish-e configuration file: ::1 swish conf::"
msgid "Generating a new search index configuration file: ::1 swish conf::"
msgstr ""
#: tomb:Index and search:index_tombs:2929

View File

@ -1131,7 +1131,7 @@ msgid "Indexing ::1 tomb name:: contents..."
msgstr ""
#: tomb:Index and search:index_tombs:2859
msgid "Generating a new swish-e configuration file: ::1 swish conf::"
msgid "Generating a new search index configuration file: ::1 swish conf::"
msgstr ""
#: tomb:Index and search:index_tombs:2929

130
tomb
View File

@ -65,7 +65,7 @@ typeset -i CLOAKIFY=1
typeset -i DECLOAKIFY=1
typeset -i SPHINX=1
typeset -i RESIZER=1
typeset -i SWISH=1
typeset -i RECOLL=1
typeset -i QRENCODE=1
# Default mount options
@ -970,7 +970,7 @@ function _print() {
_list_optional_tools() {
typeset -a _deps
_deps=(gettext dcfldd shred steghide)
_deps+=(resize2fs tomb-kdb-pbkdf2 argon2 qrencode swish-e unoconv lsof)
_deps+=(resize2fs tomb-kdb-pbkdf2 argon2 qrencode recoll unoconv lsof)
for d in $_deps; do
_print "`which $d`"
done
@ -1026,8 +1026,8 @@ _ensure_dependencies() {
command -v tomb-kdb-pbkdf2 1>/dev/null 2>/dev/null || KDF=0
# Check for ARGON2 KDF auxiliary tools
command -v argon2 1>/dev/null 2>/dev/null || ARGON2=0
# Check for Swish-E file content indexer
command -v swish-e 1>/dev/null 2>/dev/null || SWISH=0
# Check for Recoll file content indexer
command -v recoll 1>/dev/null 2>/dev/null || RECOLL=0
# Check for QREncode for paper backups of keys
command -v qrencode 1>/dev/null 2>/dev/null || QRENCODE=0
}
@ -2815,17 +2815,6 @@ BEGIN { }
# index files in all tombs for search
# $1 is optional, to specify a tomb
index_tombs() {
{ command -v updatedb 1>/dev/null 2>/dev/null } || {
_failure "Cannot index tombs on this system: updatedb (mlocate/plocate) not installed." }
updatedbver=`updatedb --version | grep '^updatedb'`
[[ "$updatedbver" =~ "GNU findutils" ]] && {
_warning "Cannot use GNU findutils for index/search commands." }
[[ "$updatedbver" =~ "locate" ]] || {
_failure "Index command needs 'mlocate/plocate' to be installed." }
_verbose "$updatedbver"
mounted_tombs=(`list_tomb_mounts $1`)
[[ ${#mounted_tombs} == 0 ]] && {
# Considering one tomb
@ -2836,11 +2825,11 @@ index_tombs() {
_success "Creating and updating search indexes."
# start the LibreOffice document converter if installed
{ command -v unoconv 1>/dev/null 2>/dev/null } && {
unoconv -l 2>/dev/null &
_verbose "unoconv listener launched."
sleep 1 }
# # start the LibreOffice document converter if installed
# { command -v unoconv 1>/dev/null 2>/dev/null } && {
# unoconv -l 2>/dev/null &
# _verbose "unoconv listener launched."
# sleep 1 }
for t in ${mounted_tombs}; do
mapper=`basename ${t[(ws:;:)1]}`
@ -2849,82 +2838,31 @@ index_tombs() {
[[ -r "${tombmount}/.noindex" ]] && {
_message "Skipping ::1 tomb name:: (.noindex found)." $tombname
continue }
_message "Indexing ::1 tomb name:: filenames..." $tombname
updatedb -l 0 -o "${tombmount}/.updatedb" -U "${tombmount}"
# here we use swish to index file contents
[[ $SWISH == 1 ]] && {
{ command -v updatedb 1>/dev/null 2>/dev/null } && {
updatedbver=`updatedb --version | grep '^updatedb'`
if [[ "$updatedbver" =~ "GNU findutils" ]]; then
_warning "Cannot use GNU findutils for index/search commands."
# elif [[ "$updatedbver" =~ "locate" ]]; then
# _warning "Index command needs 'mlocate/plocate' to be installed."
else
_message "Indexing ::1 tomb name:: filenames..." $tombname
_verbose "$updatedbver"
updatedb -l 0 -o "${tombmount}/.updatedb" -U "${tombmount}"
fi
}
# here we use recoll to index file contents
[[ $RECOLL == 1 ]] && {
_message "Indexing ::1 tomb name:: contents..." $tombname
rm -f "${tombmount}/.swishrc"
_message "Generating a new swish-e configuration file: ::1 swish conf::" "${tombmount}/.swishrc"
cat <<EOF > "${tombmount}/.swishrc"
# index directives
DefaultContents TXT*
IndexDir "$tombmount"
IndexFile "$tombmount/.swish"
# exclude images
FileRules filename regex /\.jp.?g/i
FileRules filename regex /\.png/i
FileRules filename regex /\.gif/i
FileRules filename regex /\.tiff/i
FileRules filename regex /\.svg/i
FileRules filename regex /\.xcf/i
FileRules filename regex /\.eps/i
FileRules filename regex /\.ttf/i
# exclude audio
FileRules filename regex /\.mp3/i
FileRules filename regex /\.ogg/i
FileRules filename regex /\.wav/i
FileRules filename regex /\.mod/i
FileRules filename regex /\.xm/i
# exclude video
FileRules filename regex /\.mp4/i
FileRules filename regex /\.avi/i
FileRules filename regex /\.ogv/i
FileRules filename regex /\.ogm/i
FileRules filename regex /\.mkv/i
FileRules filename regex /\.mov/i
FileRules filename regex /\.flv/i
FileRules filename regex /\.webm/i
# exclude system
FileRules filename is ok
FileRules filename is lock
FileRules filename is control
FileRules filename is status
FileRules filename is proc
FileRules filename is sys
FileRules filename is supervise
FileRules filename regex /\.asc$/i
FileRules filename regex /\.gpg$/i
# pdf and postscript
FileFilter .pdf pdftotext "'%p' -"
FileFilter .ps ps2txt "'%p' -"
# compressed files
FileFilterMatch lesspipe "%p" /\.tgz$/i
FileFilterMatch lesspipe "%p" /\.zip$/i
FileFilterMatch lesspipe "%p" /\.gz$/i
FileFilterMatch lesspipe "%p" /\.bz2$/i
FileFilterMatch lesspipe "%p" /\.Z$/
# spreadsheets
FileFilterMatch unoconv "-d spreadsheet -f csv --stdout %P" /\.xls.*/i
FileFilterMatch unoconv "-d spreadsheet -f csv --stdout %P" /\.xlt.*/i
FileFilter .ods unoconv "-d spreadsheet -f csv --stdout %P"
FileFilter .ots unoconv "-d spreadsheet -f csv --stdout %P"
FileFilter .dbf unoconv "-d spreadsheet -f csv --stdout %P"
FileFilter .dif unoconv "-d spreadsheet -f csv --stdout %P"
FileFilter .uos unoconv "-d spreadsheet -f csv --stdout %P"
FileFilter .sxc unoconv "-d spreadsheet -f csv --stdout %P"
# word documents
FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.doc.*/i
FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.odt.*/i
FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.rtf.*/i
FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.tex$/i
# native html support
IndexContents HTML* .htm .html .shtml
IndexContents XML* .xml
rm -f "${tombmount}/recoll.conf"
_message "Generating a new search index configuration: ::1 recoll conf::" "${tombmount}/recoll.conf"
mkdir -p "${tombmount}/.recoll"
cat <<EOF > "${tombmount}/recoll.conf"
topdirs = ${tombmount}
cachedir = ${tombmount}/.recoll
EOF
swish-e -c "${tombmount}/.swishrc" -S fs -v3
recollindex -c "${tombmount}"
}
_message "Search index updated."
done
@ -2961,10 +2899,10 @@ search_tombs() {
_message "Matches found: ::1 matches::" \
$(locate -d "${tombmount}/.updatedb" -e -i -c ${(f)@})
# Use swish-e to search over contents
[[ $SWISH == 1 && -r "$tombmount/.swish" ]] && {
# Use recoll to search over contents
[[ $RECOLL == 1 && -r "$tombmount/recoll.conf" ]] && {
_message "Searching contents in tomb ::1 tomb name::" $tombname
swish-e -w ${@} -f "$tombmount/.swish" -H0 }
recollq -c "${tombmount}" ${@} }
} || {
_warning "Skipping tomb ::1 tomb name::: not indexed." $tombname
_warning "Run 'tomb index' to create indexes." }