Improved swish indexing

swish-e usage for indexing of file contents is stable
This commit is contained in:
Jaromil 2013-06-20 10:26:12 +02:00
parent f3c18819d2
commit 064b9c964c

65
tomb
View File

@ -1754,31 +1754,76 @@ index_tombs() {
swishrc=`safe_filename swish`
cat <<EOF > $swishrc
# index directives
DefaultContents TXT*
FileFilter .pdf pdftotext "'%p' -"
IndexDir $tombmount
IndexFile $tombmount/.swish
# exclude images
FileRules filename regex /\.jp.?g/i
FileRules filename regex /\.png/i
FileRules filename regex /\.gif/i
FileRules filename regex /\.tiff/i
FileRules filename regex /\.svg/i
FileRules filename regex /\.xcf/i
FileRules filename regex /\.eps/i
FileRules filename regex /\.ttf/i
# exclude audio
FileRules filename regex /\.mp3/i
FileRules filename regex /\.ogg/i
FileRules filename regex /\.wav/i
FileRules filename regex /\.mod/i
FileRules filename regex /\.xm/i
# exclude video
FileRules filename regex /\.mp4/i
FileRules filename regex /\.avi/i
FileRules filename regex /\.ogv/i
FileRules filename regex /\.ogm/i
FileRules filename regex /\.mkv/i
FileRules filename regex /\.mov/i
# exclude system
FileRules filename is ok
FileRules filename is lock
FileRules filename is control
FileRules filename is status
FileRules filename is proc
FileRules filename is sys
FileRules filename is supervise
FileRules filename regex /\.asc$/i
FileRules filename regex /\.gpg$/i
FileFilterMatch unoconv "-d spreadsheet -f csv --stdout %P" /\.xls.*/
FileFilterMatch unoconv "-d spreadsheet -f csv --stdout %P" /\.xlt.*/
# pdf and postscript
FileFilter .pdf pdftotext "'%p' -"
FileFilter .ps ps2txt "'%p' -"
# compressed files
FileFilterMatch lesspipe "%p" /\.tgz$/i
FileFilterMatch lesspipe "%p" /\.zip$/i
FileFilterMatch lesspipe "%p" /\.gz$/i
FileFilterMatch lesspipe "%p" /\.bz2$/i
FileFilterMatch lesspipe "%p" /\.Z$/
# spreadsheets
FileFilterMatch unoconv "-d spreadsheet -f csv --stdout %P" /\.xls.*/i
FileFilterMatch unoconv "-d spreadsheet -f csv --stdout %P" /\.xlt.*/i
FileFilter .ods unoconv "-d spreadsheet -f csv --stdout %P"
FileFilter .ots unoconv "-d spreadsheet -f csv --stdout %P"
FileFilter .dbf unoconv "-d spreadsheet -f csv --stdout %P"
FileFilter .dif unoconv "-d spreadsheet -f csv --stdout %P"
FileFilter .uos unoconv "-d spreadsheet -f csv --stdout %P"
FileFilter .sxc unoconv "-d spreadsheet -f csv --stdout %P"
FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.doc.*/
FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.odt.*/
FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.rtf.*/
# word documents
FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.doc.*/i
FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.odt.*/i
FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.rtf.*/i
FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.tex$/i
# native html support
IndexContents HTML* .htm .html .shtml
IndexContents XML* .xml
IndexDir $tombmount
IndexFile $tombmount/.swish
EOF
xxx "Using swish-e to create index"
swish-e -c $swishrc -S fs -v3
rm -f $swishrc
}
say "search index updated"
done
}