From 735d540fe7c9f123e9497d42fe3d71c71b0a9f77 Mon Sep 17 00:00:00 2001 From: Jaromil Date: Sun, 12 May 2024 23:29:14 +0200 Subject: [PATCH] feature recoll in place of swish-e to search a tomb's contents Swish is outdated and recoll works much better as a frontend to xapian, so we adopt it to provide local search over file contents in a tomb. This update also makes plocate optional to search over file names, not mandatory. Recoll works also in GUI using its -c commandline option followed by the indexed tomb's path. --- INSTALL.md | 4 +- doc/tomb.1 | 12 +-- extras/docker/Dockerfile | 4 +- extras/translations/de.po | 4 +- extras/translations/es.po | 4 +- extras/translations/fr.po | 4 +- extras/translations/it.po | 4 +- extras/translations/pt_BR.po | 4 +- extras/translations/ru.po | 4 +- extras/translations/sv.po | 4 +- extras/translations/tomb.pot | 2 +- extras/translations/zh_Hans.po | 2 +- tomb | 130 +++++++++------------------------ 13 files changed, 61 insertions(+), 121 deletions(-) diff --git a/INSTALL.md b/INSTALL.md index 0855e5b..9e430ec 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -72,8 +72,8 @@ executable | function steghide | bury and exhume keys inside images resizefs | extend the size of existing tomb volumes qrencode | engrave keys into printable qrcode sheets - mlocate | fast search of file names inside tombs - swish++ | fast search of file contents inside tombs + plocate | fast search of file names inside tombs + recoll | fast search of file contents inside tombs unoconv | fast search of contents in PDF and DOC files lesspipe | fast search of contents in compressed archives haveged | fast entropy generation for key forging diff --git a/doc/tomb.1 b/doc/tomb.1 index 2fe888d..602c1ea 100644 --- a/doc/tomb.1 +++ b/doc/tomb.1 @@ -122,19 +122,21 @@ internally to enumerate processes running in one or all tombs. .IP "index" Creates or updates the search indexes of all tombs currently open: enables use of the \fIsearch\fR command using simple word patterns on -file names. Indexes are created using mlocate/plocate's updatedb(8) and -swish-e(1) if they are found on the system. Indexes allow one to search +file names. Indexes are created using plocate's updatedb(8) and +recoll(1) if they are found on the system. Indexes allow one to search very fast for filenames and contents inside a tomb, they are stored inside it and are not accessible if the Tomb is closed. To avoid indexing a specific tomb simply touch a \fI.noindex\fR file in it. +Useful tools to have: poppler-utils, aspell, xdg-utils, plocate. .B .IP "search" Takes any string as argument and searches for them through all tombs currently open and previously indexed using the \fIindex\fR command. -The search matches filenames if mlocate/plocate is installed and then also -file contents if swish++ is present on the system, results are listed -on the console. +The search matches filenames if plocate is installed and then also +file contents if recoll is installed, all results are listed on the +console. +One can also run recoll's GUI using \fIrecoll -c /media/tomb\fR .B .IP "close" diff --git a/extras/docker/Dockerfile b/extras/docker/Dockerfile index 5f217a3..beeee67 100644 --- a/extras/docker/Dockerfile +++ b/extras/docker/Dockerfile @@ -22,8 +22,8 @@ RUN apt-get update -y && \ pinentry-curses \ file xxd \ steghide \ - mlocate \ - swish-e + plocate \ + recoll # Build and install Tomb from remote repo RUN curl https://files.dyne.org/tomb/releases/Tomb-$TOMB_VERSION.tar.gz -o /tmp/Tomb-$TOMB_VERSION.tar.gz && \ diff --git a/extras/translations/de.po b/extras/translations/de.po index 7f834a6..a68c082 100644 --- a/extras/translations/de.po +++ b/extras/translations/de.po @@ -1278,8 +1278,8 @@ msgid "Indexing ::1 tomb name:: contents..." msgstr "Indexieren der Inhalte von ::1 tomb name:: läuft..." #: tomb:Index and search:index_tombs:2859 -msgid "Generating a new swish-e configuration file: ::1 swish conf::" -msgstr "Kreiere eine neue 'Swish-e'-Konfigurationsdatei: ::1 swish conf::" +msgid "Generating a new search index configuration file: ::1 swish conf::" +msgstr "Kreiere eine neue 'search index'-Konfigurationsdatei: ::1 swish conf::" #: tomb:Index and search:index_tombs:2929 msgid "Search index updated." diff --git a/extras/translations/es.po b/extras/translations/es.po index 7b7b489..bf911ed 100644 --- a/extras/translations/es.po +++ b/extras/translations/es.po @@ -1259,8 +1259,8 @@ msgid "Indexing ::1 tomb name:: contents..." msgstr "Indexando el contenido de ::1 tumba::..." #: tomb:Index and search:index_tombs:2859 -msgid "Generating a new swish-e configuration file: ::1 swish conf::" -msgstr "Generando un nuevo fichero de configuración de swish-e: ::1 conf::" +msgid "Generating a new search index configuration file: ::1 swish conf::" +msgstr "Generando un nuevo fichero de configuración de search index: ::1 conf::" #: tomb:Index and search:index_tombs:2929 msgid "Search index updated." diff --git a/extras/translations/fr.po b/extras/translations/fr.po index 9faf82e..2589086 100644 --- a/extras/translations/fr.po +++ b/extras/translations/fr.po @@ -1317,9 +1317,9 @@ msgid "Indexing ::1 tomb name:: contents..." msgstr "Index des contenus de ::1 tomb name:: en cours..." #: tomb:Index and search:index_tombs:2859 -msgid "Generating a new swish-e configuration file: ::1 swish conf::" +msgid "Generating a new search index configuration file: ::1 swish conf::" msgstr "" -"Génération d'un nouveau fichier de configuration pour 'swish-e' : ::1 swish " +"Génération d'un nouveau fichier de configuration pour 'search index' : ::1 swish " "conf::" #: tomb:Index and search:index_tombs:2929 diff --git a/extras/translations/it.po b/extras/translations/it.po index d9bd9d0..76b0604 100644 --- a/extras/translations/it.po +++ b/extras/translations/it.po @@ -1259,8 +1259,8 @@ msgid "Indexing ::1 tomb name:: contents..." msgstr "Indicizzazione ::1 tomb name:: contenuti..." #: tomb:Index and search:index_tombs:2859 -msgid "Generating a new swish-e configuration file: ::1 swish conf::" -msgstr "Genero un nuovo file di configurazione swish-e: ::1 swish conf::" +msgid "Generating a new search index configuration file: ::1 swish conf::" +msgstr "Genero un nuovo file di configurazione search index: ::1 swish conf::" #: tomb:Index and search:index_tombs:2929 msgid "Search index updated." diff --git a/extras/translations/pt_BR.po b/extras/translations/pt_BR.po index edb0d83..6c191a0 100644 --- a/extras/translations/pt_BR.po +++ b/extras/translations/pt_BR.po @@ -1221,8 +1221,8 @@ msgid "Indexing ::1 tomb name:: contents..." msgstr "Indexando ::1 nome da tumba:: conteúdo..." #: tomb:Index and search:index_tombs:2859 -msgid "Generating a new swish-e configuration file: ::1 swish conf::" -msgstr "Gerando um novo arquivo de configuração swish-e: ::1 swish conf::" +msgid "Generating a new search index configuration file: ::1 swish conf::" +msgstr "Gerando um novo arquivo de configuração search index: ::1 swish conf::" #: tomb:Index and search:index_tombs:2929 msgid "Search index updated." diff --git a/extras/translations/ru.po b/extras/translations/ru.po index 84e9132..c7007f1 100644 --- a/extras/translations/ru.po +++ b/extras/translations/ru.po @@ -1238,8 +1238,8 @@ msgid "Indexing ::1 tomb name:: contents..." msgstr "Индексирую содержимое ::1 tomb name::..." #: tomb:Index and search:index_tombs:2859 -msgid "Generating a new swish-e configuration file: ::1 swish conf::" -msgstr "Генерирую новый файл конфигурации swish-e: ::1 swish conf::" +msgid "Generating a new search index configuration file: ::1 swish conf::" +msgstr "Генерирую новый файл конфигурации search index: ::1 swish conf::" #: tomb:Index and search:index_tombs:2929 msgid "Search index updated." diff --git a/extras/translations/sv.po b/extras/translations/sv.po index fdf45f9..148ad3b 100644 --- a/extras/translations/sv.po +++ b/extras/translations/sv.po @@ -1231,8 +1231,8 @@ msgid "Indexing ::1 tomb name:: contents..." msgstr "Indexerar ::1 tomb name:: innehåll..." #: tomb:Index and search:index_tombs:2859 -msgid "Generating a new swish-e configuration file: ::1 swish conf::" -msgstr "Genererar en ny swish-e konfigutŕationsfil: ::1 swish conf::" +msgid "Generating a new search index configuration file: ::1 swish conf::" +msgstr "Genererar en ny search index konfigutŕationsfil: ::1 swish conf::" #: tomb:Index and search:index_tombs:2929 msgid "Search index updated." diff --git a/extras/translations/tomb.pot b/extras/translations/tomb.pot index f00c2b2..a019224 100644 --- a/extras/translations/tomb.pot +++ b/extras/translations/tomb.pot @@ -1122,7 +1122,7 @@ msgid "Indexing ::1 tomb name:: contents..." msgstr "" #: tomb:Index and search:index_tombs:2859 -msgid "Generating a new swish-e configuration file: ::1 swish conf::" +msgid "Generating a new search index configuration file: ::1 swish conf::" msgstr "" #: tomb:Index and search:index_tombs:2929 diff --git a/extras/translations/zh_Hans.po b/extras/translations/zh_Hans.po index 6f43b1c..60d0a16 100644 --- a/extras/translations/zh_Hans.po +++ b/extras/translations/zh_Hans.po @@ -1131,7 +1131,7 @@ msgid "Indexing ::1 tomb name:: contents..." msgstr "" #: tomb:Index and search:index_tombs:2859 -msgid "Generating a new swish-e configuration file: ::1 swish conf::" +msgid "Generating a new search index configuration file: ::1 swish conf::" msgstr "" #: tomb:Index and search:index_tombs:2929 diff --git a/tomb b/tomb index a6b509f..d5b0cd2 100755 --- a/tomb +++ b/tomb @@ -65,7 +65,7 @@ typeset -i CLOAKIFY=1 typeset -i DECLOAKIFY=1 typeset -i SPHINX=1 typeset -i RESIZER=1 -typeset -i SWISH=1 +typeset -i RECOLL=1 typeset -i QRENCODE=1 # Default mount options @@ -970,7 +970,7 @@ function _print() { _list_optional_tools() { typeset -a _deps _deps=(gettext dcfldd shred steghide) - _deps+=(resize2fs tomb-kdb-pbkdf2 argon2 qrencode swish-e unoconv lsof) + _deps+=(resize2fs tomb-kdb-pbkdf2 argon2 qrencode recoll unoconv lsof) for d in $_deps; do _print "`which $d`" done @@ -1026,8 +1026,8 @@ _ensure_dependencies() { command -v tomb-kdb-pbkdf2 1>/dev/null 2>/dev/null || KDF=0 # Check for ARGON2 KDF auxiliary tools command -v argon2 1>/dev/null 2>/dev/null || ARGON2=0 - # Check for Swish-E file content indexer - command -v swish-e 1>/dev/null 2>/dev/null || SWISH=0 + # Check for Recoll file content indexer + command -v recoll 1>/dev/null 2>/dev/null || RECOLL=0 # Check for QREncode for paper backups of keys command -v qrencode 1>/dev/null 2>/dev/null || QRENCODE=0 } @@ -2815,17 +2815,6 @@ BEGIN { } # index files in all tombs for search # $1 is optional, to specify a tomb index_tombs() { - { command -v updatedb 1>/dev/null 2>/dev/null } || { - _failure "Cannot index tombs on this system: updatedb (mlocate/plocate) not installed." } - - updatedbver=`updatedb --version | grep '^updatedb'` - [[ "$updatedbver" =~ "GNU findutils" ]] && { - _warning "Cannot use GNU findutils for index/search commands." } - [[ "$updatedbver" =~ "locate" ]] || { - _failure "Index command needs 'mlocate/plocate' to be installed." } - - _verbose "$updatedbver" - mounted_tombs=(`list_tomb_mounts $1`) [[ ${#mounted_tombs} == 0 ]] && { # Considering one tomb @@ -2836,11 +2825,11 @@ index_tombs() { _success "Creating and updating search indexes." - # start the LibreOffice document converter if installed - { command -v unoconv 1>/dev/null 2>/dev/null } && { - unoconv -l 2>/dev/null & - _verbose "unoconv listener launched." - sleep 1 } + # # start the LibreOffice document converter if installed + # { command -v unoconv 1>/dev/null 2>/dev/null } && { + # unoconv -l 2>/dev/null & + # _verbose "unoconv listener launched." + # sleep 1 } for t in ${mounted_tombs}; do mapper=`basename ${t[(ws:;:)1]}` @@ -2849,82 +2838,31 @@ index_tombs() { [[ -r "${tombmount}/.noindex" ]] && { _message "Skipping ::1 tomb name:: (.noindex found)." $tombname continue } - _message "Indexing ::1 tomb name:: filenames..." $tombname - updatedb -l 0 -o "${tombmount}/.updatedb" -U "${tombmount}" - # here we use swish to index file contents - [[ $SWISH == 1 ]] && { + { command -v updatedb 1>/dev/null 2>/dev/null } && { + updatedbver=`updatedb --version | grep '^updatedb'` + if [[ "$updatedbver" =~ "GNU findutils" ]]; then + _warning "Cannot use GNU findutils for index/search commands." + # elif [[ "$updatedbver" =~ "locate" ]]; then + # _warning "Index command needs 'mlocate/plocate' to be installed." + else + _message "Indexing ::1 tomb name:: filenames..." $tombname + _verbose "$updatedbver" + updatedb -l 0 -o "${tombmount}/.updatedb" -U "${tombmount}" + fi + } + + # here we use recoll to index file contents + [[ $RECOLL == 1 ]] && { _message "Indexing ::1 tomb name:: contents..." $tombname - rm -f "${tombmount}/.swishrc" - _message "Generating a new swish-e configuration file: ::1 swish conf::" "${tombmount}/.swishrc" - cat < "${tombmount}/.swishrc" -# index directives -DefaultContents TXT* -IndexDir "$tombmount" -IndexFile "$tombmount/.swish" -# exclude images -FileRules filename regex /\.jp.?g/i -FileRules filename regex /\.png/i -FileRules filename regex /\.gif/i -FileRules filename regex /\.tiff/i -FileRules filename regex /\.svg/i -FileRules filename regex /\.xcf/i -FileRules filename regex /\.eps/i -FileRules filename regex /\.ttf/i -# exclude audio -FileRules filename regex /\.mp3/i -FileRules filename regex /\.ogg/i -FileRules filename regex /\.wav/i -FileRules filename regex /\.mod/i -FileRules filename regex /\.xm/i -# exclude video -FileRules filename regex /\.mp4/i -FileRules filename regex /\.avi/i -FileRules filename regex /\.ogv/i -FileRules filename regex /\.ogm/i -FileRules filename regex /\.mkv/i -FileRules filename regex /\.mov/i -FileRules filename regex /\.flv/i -FileRules filename regex /\.webm/i -# exclude system -FileRules filename is ok -FileRules filename is lock -FileRules filename is control -FileRules filename is status -FileRules filename is proc -FileRules filename is sys -FileRules filename is supervise -FileRules filename regex /\.asc$/i -FileRules filename regex /\.gpg$/i -# pdf and postscript -FileFilter .pdf pdftotext "'%p' -" -FileFilter .ps ps2txt "'%p' -" -# compressed files -FileFilterMatch lesspipe "%p" /\.tgz$/i -FileFilterMatch lesspipe "%p" /\.zip$/i -FileFilterMatch lesspipe "%p" /\.gz$/i -FileFilterMatch lesspipe "%p" /\.bz2$/i -FileFilterMatch lesspipe "%p" /\.Z$/ -# spreadsheets -FileFilterMatch unoconv "-d spreadsheet -f csv --stdout %P" /\.xls.*/i -FileFilterMatch unoconv "-d spreadsheet -f csv --stdout %P" /\.xlt.*/i -FileFilter .ods unoconv "-d spreadsheet -f csv --stdout %P" -FileFilter .ots unoconv "-d spreadsheet -f csv --stdout %P" -FileFilter .dbf unoconv "-d spreadsheet -f csv --stdout %P" -FileFilter .dif unoconv "-d spreadsheet -f csv --stdout %P" -FileFilter .uos unoconv "-d spreadsheet -f csv --stdout %P" -FileFilter .sxc unoconv "-d spreadsheet -f csv --stdout %P" -# word documents -FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.doc.*/i -FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.odt.*/i -FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.rtf.*/i -FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.tex$/i -# native html support -IndexContents HTML* .htm .html .shtml -IndexContents XML* .xml + rm -f "${tombmount}/recoll.conf" + _message "Generating a new search index configuration: ::1 recoll conf::" "${tombmount}/recoll.conf" + mkdir -p "${tombmount}/.recoll" + cat < "${tombmount}/recoll.conf" +topdirs = ${tombmount} +cachedir = ${tombmount}/.recoll EOF - - swish-e -c "${tombmount}/.swishrc" -S fs -v3 + recollindex -c "${tombmount}" } _message "Search index updated." done @@ -2961,10 +2899,10 @@ search_tombs() { _message "Matches found: ::1 matches::" \ $(locate -d "${tombmount}/.updatedb" -e -i -c ${(f)@}) - # Use swish-e to search over contents - [[ $SWISH == 1 && -r "$tombmount/.swish" ]] && { + # Use recoll to search over contents + [[ $RECOLL == 1 && -r "$tombmount/recoll.conf" ]] && { _message "Searching contents in tomb ::1 tomb name::" $tombname - swish-e -w ${@} -f "$tombmount/.swish" -H0 } + recollq -c "${tombmount}" ${@} } } || { _warning "Skipping tomb ::1 tomb name::: not indexed." $tombname _warning "Run 'tomb index' to create indexes." }