sqlite3/ext/fts5/tool/loadfts5.tcl

173 lines
3.8 KiB
Tcl
Raw Normal View History

2023-06-27 18:34:42 +08:00
proc loadfile {f} {
set fd [open $f]
set data [read $fd]
close $fd
return $data
}
set ::nRow 0
set ::nRowPerDot 1000
proc load_hierachy {dir} {
foreach f [glob -nocomplain -dir $dir *] {
if {$::O(limit) && $::nRow>=$::O(limit)} break
if {[file isdir $f]} {
load_hierachy $f
} else {
db eval { INSERT INTO t1 VALUES($f, loadfile($f)) }
incr ::nRow
if {$::O(trans) && ($::nRow % $::O(trans))==0} {
db eval { COMMIT }
db eval { INSERT INTO t1(t1) VALUES('integrity-check') }
db eval { BEGIN }
}
if {($::nRow % $::nRowPerDot)==0} {
puts -nonewline .
if {($::nRow % (65*$::nRowPerDot))==0} { puts "" }
flush stdout
}
}
}
}
proc usage {} {
puts stderr "Usage: $::argv0 ?SWITCHES? DATABASE PATH"
puts stderr ""
puts stderr "Switches are:"
puts stderr " -fts4 (use fts4 instead of fts5)"
puts stderr " -fts5 (use fts5)"
puts stderr " -porter (use porter tokenizer)"
puts stderr " -delete (delete the database file before starting)"
puts stderr " -limit N (load no more than N documents)"
puts stderr " -automerge N (set the automerge parameter to N)"
puts stderr " -crisismerge N (set the crisismerge parameter to N)"
puts stderr " -prefix PREFIX (comma separated prefix= argument)"
puts stderr " -trans N (commit after N inserts - 0 == never)"
puts stderr " -hashsize N (set the fts5 hashsize parameter to N)"
puts stderr " -detail MODE (detail mode for fts5 tables)"
exit 1
}
set O(vtab) fts5
set O(tok) ""
set O(limit) 0
set O(delete) 0
set O(automerge) -1
set O(crisismerge) -1
set O(prefix) ""
set O(trans) 0
set O(hashsize) -1
set O(detail) full
if {[llength $argv]<2} usage
set nOpt [expr {[llength $argv]-2}]
for {set i 0} {$i < $nOpt} {incr i} {
set arg [lindex $argv $i]
switch -- [lindex $argv $i] {
-fts4 {
set O(vtab) fts4
}
-fts5 {
set O(vtab) fts5
}
-porter {
set O(tok) ", tokenize=porter"
}
-delete {
set O(delete) 1
}
-limit {
if { [incr i]>=$nOpt } usage
set O(limit) [lindex $argv $i]
}
-trans {
if { [incr i]>=$nOpt } usage
set O(trans) [lindex $argv $i]
}
-automerge {
if { [incr i]>=$nOpt } usage
set O(automerge) [lindex $argv $i]
}
-crisismerge {
if { [incr i]>=$nOpt } usage
set O(crisismerge) [lindex $argv $i]
}
-prefix {
if { [incr i]>=$nOpt } usage
set O(prefix) [lindex $argv $i]
}
-hashsize {
if { [incr i]>=$nOpt } usage
set O(hashsize) [lindex $argv $i]
}
-detail {
if { [incr i]>=$nOpt } usage
set O(detail) [lindex $argv $i]
}
default {
usage
}
}
}
set dbfile [lindex $argv end-1]
if {$O(delete)} { file delete -force $dbfile }
sqlite3 db $dbfile
catch { load_static_extension db fts5 }
db func loadfile loadfile
db eval "PRAGMA page_size=4096"
db eval BEGIN
set pref ""
if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" }
if {$O(vtab)=="fts5"} {
append pref ", detail=$O(detail)"
}
catch {
db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)"
db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);"
}
if {$O(hashsize)>=0} {
catch {
db eval "INSERT INTO t1(t1, rank) VALUES('hashsize', $O(hashsize));"
}
}
if {$O(automerge)>=0} {
if {$O(vtab) == "fts5"} {
db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) }
} else {
db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) }
}
}
if {$O(crisismerge)>=0} {
if {$O(vtab) == "fts5"} {
db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))}
} else {
}
}
load_hierachy [lindex $argv end]
db eval COMMIT
puts ""