Improved swish indexing

swish-e usage for indexing of file contents is stable
This commit is contained in:
Jaromil 2013-06-20 10:26:12 +02:00
parent f3c18819d2
commit 064b9c964c

65
tomb
View File

@ -1754,31 +1754,76 @@ index_tombs() {
swishrc=`safe_filename swish` swishrc=`safe_filename swish`
cat <<EOF > $swishrc cat <<EOF > $swishrc
# index directives
DefaultContents TXT* DefaultContents TXT*
FileFilter .pdf pdftotext "'%p' -" IndexDir $tombmount
IndexFile $tombmount/.swish
# exclude images
FileRules filename regex /\.jp.?g/i
FileRules filename regex /\.png/i
FileRules filename regex /\.gif/i
FileRules filename regex /\.tiff/i
FileRules filename regex /\.svg/i
FileRules filename regex /\.xcf/i
FileRules filename regex /\.eps/i
FileRules filename regex /\.ttf/i
# exclude audio
FileRules filename regex /\.mp3/i
FileRules filename regex /\.ogg/i
FileRules filename regex /\.wav/i
FileRules filename regex /\.mod/i
FileRules filename regex /\.xm/i
# exclude video
FileRules filename regex /\.mp4/i
FileRules filename regex /\.avi/i
FileRules filename regex /\.ogv/i
FileRules filename regex /\.ogm/i
FileRules filename regex /\.mkv/i
FileRules filename regex /\.mov/i
# exclude system
FileRules filename is ok
FileRules filename is lock
FileRules filename is control
FileRules filename is status
FileRules filename is proc
FileRules filename is sys
FileRules filename is supervise
FileRules filename regex /\.asc$/i
FileRules filename regex /\.gpg$/i
FileFilterMatch unoconv "-d spreadsheet -f csv --stdout %P" /\.xls.*/ # pdf and postscript
FileFilterMatch unoconv "-d spreadsheet -f csv --stdout %P" /\.xlt.*/ FileFilter .pdf pdftotext "'%p' -"
FileFilter .ps ps2txt "'%p' -"
# compressed files
FileFilterMatch lesspipe "%p" /\.tgz$/i
FileFilterMatch lesspipe "%p" /\.zip$/i
FileFilterMatch lesspipe "%p" /\.gz$/i
FileFilterMatch lesspipe "%p" /\.bz2$/i
FileFilterMatch lesspipe "%p" /\.Z$/
# spreadsheets
FileFilterMatch unoconv "-d spreadsheet -f csv --stdout %P" /\.xls.*/i
FileFilterMatch unoconv "-d spreadsheet -f csv --stdout %P" /\.xlt.*/i
FileFilter .ods unoconv "-d spreadsheet -f csv --stdout %P" FileFilter .ods unoconv "-d spreadsheet -f csv --stdout %P"
FileFilter .ots unoconv "-d spreadsheet -f csv --stdout %P" FileFilter .ots unoconv "-d spreadsheet -f csv --stdout %P"
FileFilter .dbf unoconv "-d spreadsheet -f csv --stdout %P" FileFilter .dbf unoconv "-d spreadsheet -f csv --stdout %P"
FileFilter .dif unoconv "-d spreadsheet -f csv --stdout %P" FileFilter .dif unoconv "-d spreadsheet -f csv --stdout %P"
FileFilter .uos unoconv "-d spreadsheet -f csv --stdout %P" FileFilter .uos unoconv "-d spreadsheet -f csv --stdout %P"
FileFilter .sxc unoconv "-d spreadsheet -f csv --stdout %P" FileFilter .sxc unoconv "-d spreadsheet -f csv --stdout %P"
# word documents
FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.doc.*/ FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.doc.*/i
FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.odt.*/ FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.odt.*/i
FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.rtf.*/ FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.rtf.*/i
FileFilterMatch unoconv "-d document -f txt --stdout %P" /\.tex$/i
# native html support
IndexContents HTML* .htm .html .shtml IndexContents HTML* .htm .html .shtml
IndexContents XML* .xml IndexContents XML* .xml
IndexDir $tombmount
IndexFile $tombmount/.swish
EOF EOF
xxx "Using swish-e to create index" xxx "Using swish-e to create index"
swish-e -c $swishrc -S fs -v3 swish-e -c $swishrc -S fs -v3
rm -f $swishrc rm -f $swishrc
} }
say "search index updated" say "search index updated"
done done
} }