173 lines
3.8 KiB
Tcl
173 lines
3.8 KiB
Tcl
|
|
||
|
|
||
|
proc loadfile {f} {
|
||
|
set fd [open $f]
|
||
|
set data [read $fd]
|
||
|
close $fd
|
||
|
return $data
|
||
|
}
|
||
|
|
||
|
set ::nRow 0
|
||
|
set ::nRowPerDot 1000
|
||
|
|
||
|
proc load_hierachy {dir} {
|
||
|
foreach f [glob -nocomplain -dir $dir *] {
|
||
|
if {$::O(limit) && $::nRow>=$::O(limit)} break
|
||
|
if {[file isdir $f]} {
|
||
|
load_hierachy $f
|
||
|
} else {
|
||
|
db eval { INSERT INTO t1 VALUES($f, loadfile($f)) }
|
||
|
incr ::nRow
|
||
|
|
||
|
if {$::O(trans) && ($::nRow % $::O(trans))==0} {
|
||
|
db eval { COMMIT }
|
||
|
db eval { INSERT INTO t1(t1) VALUES('integrity-check') }
|
||
|
db eval { BEGIN }
|
||
|
}
|
||
|
|
||
|
if {($::nRow % $::nRowPerDot)==0} {
|
||
|
puts -nonewline .
|
||
|
if {($::nRow % (65*$::nRowPerDot))==0} { puts "" }
|
||
|
flush stdout
|
||
|
}
|
||
|
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
proc usage {} {
|
||
|
puts stderr "Usage: $::argv0 ?SWITCHES? DATABASE PATH"
|
||
|
puts stderr ""
|
||
|
puts stderr "Switches are:"
|
||
|
puts stderr " -fts4 (use fts4 instead of fts5)"
|
||
|
puts stderr " -fts5 (use fts5)"
|
||
|
puts stderr " -porter (use porter tokenizer)"
|
||
|
puts stderr " -delete (delete the database file before starting)"
|
||
|
puts stderr " -limit N (load no more than N documents)"
|
||
|
puts stderr " -automerge N (set the automerge parameter to N)"
|
||
|
puts stderr " -crisismerge N (set the crisismerge parameter to N)"
|
||
|
puts stderr " -prefix PREFIX (comma separated prefix= argument)"
|
||
|
puts stderr " -trans N (commit after N inserts - 0 == never)"
|
||
|
puts stderr " -hashsize N (set the fts5 hashsize parameter to N)"
|
||
|
puts stderr " -detail MODE (detail mode for fts5 tables)"
|
||
|
exit 1
|
||
|
}
|
||
|
|
||
|
set O(vtab) fts5
|
||
|
set O(tok) ""
|
||
|
set O(limit) 0
|
||
|
set O(delete) 0
|
||
|
set O(automerge) -1
|
||
|
set O(crisismerge) -1
|
||
|
set O(prefix) ""
|
||
|
set O(trans) 0
|
||
|
set O(hashsize) -1
|
||
|
set O(detail) full
|
||
|
|
||
|
if {[llength $argv]<2} usage
|
||
|
set nOpt [expr {[llength $argv]-2}]
|
||
|
for {set i 0} {$i < $nOpt} {incr i} {
|
||
|
set arg [lindex $argv $i]
|
||
|
switch -- [lindex $argv $i] {
|
||
|
-fts4 {
|
||
|
set O(vtab) fts4
|
||
|
}
|
||
|
|
||
|
-fts5 {
|
||
|
set O(vtab) fts5
|
||
|
}
|
||
|
|
||
|
-porter {
|
||
|
set O(tok) ", tokenize=porter"
|
||
|
}
|
||
|
|
||
|
-delete {
|
||
|
set O(delete) 1
|
||
|
}
|
||
|
|
||
|
-limit {
|
||
|
if { [incr i]>=$nOpt } usage
|
||
|
set O(limit) [lindex $argv $i]
|
||
|
}
|
||
|
|
||
|
-trans {
|
||
|
if { [incr i]>=$nOpt } usage
|
||
|
set O(trans) [lindex $argv $i]
|
||
|
}
|
||
|
|
||
|
-automerge {
|
||
|
if { [incr i]>=$nOpt } usage
|
||
|
set O(automerge) [lindex $argv $i]
|
||
|
}
|
||
|
|
||
|
-crisismerge {
|
||
|
if { [incr i]>=$nOpt } usage
|
||
|
set O(crisismerge) [lindex $argv $i]
|
||
|
}
|
||
|
|
||
|
-prefix {
|
||
|
if { [incr i]>=$nOpt } usage
|
||
|
set O(prefix) [lindex $argv $i]
|
||
|
}
|
||
|
|
||
|
-hashsize {
|
||
|
if { [incr i]>=$nOpt } usage
|
||
|
set O(hashsize) [lindex $argv $i]
|
||
|
}
|
||
|
|
||
|
-detail {
|
||
|
if { [incr i]>=$nOpt } usage
|
||
|
set O(detail) [lindex $argv $i]
|
||
|
}
|
||
|
|
||
|
default {
|
||
|
usage
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
set dbfile [lindex $argv end-1]
|
||
|
if {$O(delete)} { file delete -force $dbfile }
|
||
|
sqlite3 db $dbfile
|
||
|
catch { load_static_extension db fts5 }
|
||
|
db func loadfile loadfile
|
||
|
db eval "PRAGMA page_size=4096"
|
||
|
|
||
|
db eval BEGIN
|
||
|
set pref ""
|
||
|
if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" }
|
||
|
if {$O(vtab)=="fts5"} {
|
||
|
append pref ", detail=$O(detail)"
|
||
|
}
|
||
|
catch {
|
||
|
db eval "CREATE VIRTUAL TABLE t1 USING $O(vtab) (path, content$O(tok)$pref)"
|
||
|
db eval "INSERT INTO t1(t1, rank) VALUES('pgsz', 4050);"
|
||
|
}
|
||
|
|
||
|
if {$O(hashsize)>=0} {
|
||
|
catch {
|
||
|
db eval "INSERT INTO t1(t1, rank) VALUES('hashsize', $O(hashsize));"
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
if {$O(automerge)>=0} {
|
||
|
if {$O(vtab) == "fts5"} {
|
||
|
db eval { INSERT INTO t1(t1, rank) VALUES('automerge', $O(automerge)) }
|
||
|
} else {
|
||
|
db eval { INSERT INTO t1(t1) VALUES('automerge=' || $O(automerge)) }
|
||
|
}
|
||
|
}
|
||
|
if {$O(crisismerge)>=0} {
|
||
|
if {$O(vtab) == "fts5"} {
|
||
|
db eval {INSERT INTO t1(t1, rank) VALUES('crisismerge', $O(crisismerge))}
|
||
|
} else {
|
||
|
}
|
||
|
}
|
||
|
load_hierachy [lindex $argv end]
|
||
|
db eval COMMIT
|
||
|
puts ""
|
||
|
|
||
|
|
||
|
|