line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
########################################### |
2
|
|
|
|
|
|
|
# SWISH::API::Common |
3
|
|
|
|
|
|
|
########################################### |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
########################################### |
6
|
|
|
|
|
|
|
package SWISH::API::Common; |
7
|
|
|
|
|
|
|
########################################### |
8
|
|
|
|
|
|
|
|
9
|
3
|
|
|
3
|
|
455468
|
use strict; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
99
|
|
10
|
3
|
|
|
3
|
|
22
|
use warnings; |
|
3
|
|
|
|
|
13
|
|
|
3
|
|
|
|
|
304
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our $VERSION = "0.04"; |
13
|
|
|
|
|
|
|
our $SWISH_EXE = "swish-e"; |
14
|
|
|
|
|
|
|
our @SWISH_EXE_PATHS = qw(/usr/local/bin); |
15
|
|
|
|
|
|
|
|
16
|
3
|
|
|
3
|
|
1202
|
use SWISH::API; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
use File::Path; |
18
|
|
|
|
|
|
|
use File::Find; |
19
|
|
|
|
|
|
|
use File::Spec; |
20
|
|
|
|
|
|
|
use File::Basename; |
21
|
|
|
|
|
|
|
use Log::Log4perl qw(:easy); |
22
|
|
|
|
|
|
|
use Sysadm::Install qw(:all); |
23
|
|
|
|
|
|
|
use File::Temp qw(tempfile); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
########################################### |
26
|
|
|
|
|
|
|
sub new { |
27
|
|
|
|
|
|
|
########################################### |
28
|
|
|
|
|
|
|
my($class, %options) = @_; |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
my $self = { |
31
|
|
|
|
|
|
|
swish_adm_dir => "$ENV{HOME}/.swish-common", |
32
|
|
|
|
|
|
|
swish_exe => swish_find(), |
33
|
|
|
|
|
|
|
swish_fuzzy_indexing_mode => "Stemming_en", |
34
|
|
|
|
|
|
|
%options, |
35
|
|
|
|
|
|
|
}; |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
my $defaults = { |
38
|
|
|
|
|
|
|
swish_idx_file => "$self->{swish_adm_dir}/default.idx", |
39
|
|
|
|
|
|
|
swish_cnf_file => "$self->{swish_adm_dir}/default.cnf", |
40
|
|
|
|
|
|
|
dirs_file => "$self->{swish_adm_dir}/default.dirs", |
41
|
|
|
|
|
|
|
streamer => "$self->{swish_adm_dir}/default.streamer", |
42
|
|
|
|
|
|
|
file_len_max => 100_000, |
43
|
|
|
|
|
|
|
atime_preserve => 0, |
44
|
|
|
|
|
|
|
}; |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
for my $name (keys %$defaults) { |
47
|
|
|
|
|
|
|
if(! exists $self->{$name}) { |
48
|
|
|
|
|
|
|
$self->{$name} = $defaults->{$name}; |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
LOGDIE "swish-e executable not found" unless -x $self->{swish_exe}; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
bless $self, $class; |
55
|
|
|
|
|
|
|
} |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
########################################### |
58
|
|
|
|
|
|
|
sub index_remove { |
59
|
|
|
|
|
|
|
########################################### |
60
|
|
|
|
|
|
|
my($self) = @_; |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
unlink $self->{swish_idx_file}; |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
########################################### |
66
|
|
|
|
|
|
|
sub search { |
67
|
|
|
|
|
|
|
########################################### |
68
|
|
|
|
|
|
|
my($self, $term) = @_; |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
if(! -f $self->{swish_idx_file}) { |
71
|
|
|
|
|
|
|
ERROR "Index file $self->{swish_idx_file} not found"; |
72
|
|
|
|
|
|
|
return undef; |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
my $swish = SWISH::API->new($self->{swish_idx_file}); |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
$swish->AbortLastError |
78
|
|
|
|
|
|
|
if $swish->Error; |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
my $results = $swish->Query($term); |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
$swish->AbortLastError |
83
|
|
|
|
|
|
|
if $swish->Error; |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# We might change this in the future to return an iterator |
86
|
|
|
|
|
|
|
# in scalar context |
87
|
|
|
|
|
|
|
my @results = (); |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
while (my $r = $results->NextResult) { |
90
|
|
|
|
|
|
|
my $hit = SWISH::API::Common::Hit->new( |
91
|
|
|
|
|
|
|
path => $r->Property("swishdocpath") |
92
|
|
|
|
|
|
|
); |
93
|
|
|
|
|
|
|
push @results, $hit; |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
return @results; |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
########################################### |
100
|
|
|
|
|
|
|
sub files_stream { |
101
|
|
|
|
|
|
|
########################################### |
102
|
|
|
|
|
|
|
my($self) = @_; |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
my @dirs = split /,/, slurp $self->{dirs_file}; |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
my @files = grep { -f } @dirs; |
107
|
|
|
|
|
|
|
@dirs = grep { ! -f } @dirs; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
for(@files) { |
110
|
|
|
|
|
|
|
$self->file_stream($_); |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
return unless @dirs; |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
find(sub { |
116
|
|
|
|
|
|
|
return unless -f; |
117
|
|
|
|
|
|
|
return unless -T; |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
my $full = $File::Find::name; |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
DEBUG "Indexing $full"; |
122
|
|
|
|
|
|
|
$self->file_stream(File::Spec->rel2abs($_)); |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
}, @dirs); |
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
############################################ |
128
|
|
|
|
|
|
|
sub file_stream { |
129
|
|
|
|
|
|
|
############################################ |
130
|
|
|
|
|
|
|
my($self, $file) = @_; |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
my @saved; |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
if($self->{atime_preserve}) { |
135
|
|
|
|
|
|
|
@saved = (stat($file))[8,9]; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
if(! open FILE, "<$file") { |
139
|
|
|
|
|
|
|
WARN "Cannot open $file ($!)"; |
140
|
|
|
|
|
|
|
return; |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
my $rc = sysread FILE, my $data, $self->{file_len_max}; |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
unless(defined $rc) { |
146
|
|
|
|
|
|
|
WARN "Can't read $file $!"; |
147
|
|
|
|
|
|
|
return; |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
close FILE; |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
if($self->{atime_preserve}) { |
152
|
|
|
|
|
|
|
utime(@saved, $file); |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
my $size = length $data; |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
print "Path-Name: $file\n", |
158
|
|
|
|
|
|
|
"Document-Type: TXT*\n", |
159
|
|
|
|
|
|
|
"Content-Length: $size\n\n"; |
160
|
|
|
|
|
|
|
print $data; |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
############################################ |
164
|
|
|
|
|
|
|
sub dir_prep { |
165
|
|
|
|
|
|
|
############################################ |
166
|
|
|
|
|
|
|
my($file) = @_; |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
my $dir = dirname($file); |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
if(! -d $dir) { |
171
|
|
|
|
|
|
|
mkd($dir) unless -d $dir; |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
############################################ |
176
|
|
|
|
|
|
|
sub index_add { |
177
|
|
|
|
|
|
|
############################################ |
178
|
|
|
|
|
|
|
my($self, $dir) = @_; |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
# Index new doc in tmp idx file |
181
|
|
|
|
|
|
|
my $old_idx_name = $self->{swish_idx_file}; |
182
|
|
|
|
|
|
|
(my $dummy, my $old_idx) = tempfile(CLEANUP => 1); |
183
|
|
|
|
|
|
|
mv $old_idx_name, $old_idx; |
184
|
|
|
|
|
|
|
mv "$old_idx_name.prop", "$old_idx.prop"; |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
($dummy, $self->{swish_idx_file}) = tempfile(CLEANUP => 1); |
187
|
|
|
|
|
|
|
$self->index($dir); |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
# Merge two indices |
190
|
|
|
|
|
|
|
my($stdout, $stderr, $rc) = tap($self->{swish_exe}, "-M", |
191
|
|
|
|
|
|
|
$old_idx, |
192
|
|
|
|
|
|
|
$self->{swish_idx_file}, |
193
|
|
|
|
|
|
|
$old_idx_name); |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
if($rc != 0) { |
196
|
|
|
|
|
|
|
ERROR "Merging failed: $stdout $stderr"; |
197
|
|
|
|
|
|
|
return undef; |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
$self->{swish_idx_file} = $old_idx_name; |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
############################################ |
204
|
|
|
|
|
|
|
sub index { |
205
|
|
|
|
|
|
|
############################################ |
206
|
|
|
|
|
|
|
my($self, @dirs) = @_; |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
# Make a new dirs file |
209
|
|
|
|
|
|
|
dir_prep($self->{dirs_file}); |
210
|
|
|
|
|
|
|
blurt join(',', @dirs), $self->{dirs_file}; |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
# Make a new swish conf file |
213
|
|
|
|
|
|
|
dir_prep($self->{swish_cnf_file}); |
214
|
|
|
|
|
|
|
blurt <{swish_cnf_file}; |
215
|
|
|
|
|
|
|
IndexDir $self->{streamer} |
216
|
|
|
|
|
|
|
IndexFile $self->{swish_idx_file} |
217
|
|
|
|
|
|
|
FuzzyIndexingMode $self->{swish_fuzzy_indexing_mode} |
218
|
|
|
|
|
|
|
EOT |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
# Make a new streamer |
221
|
|
|
|
|
|
|
dir_prep($self->{streamer}); |
222
|
|
|
|
|
|
|
my $perl = perl_find(); |
223
|
|
|
|
|
|
|
blurt <{streamer}; |
224
|
|
|
|
|
|
|
#!$perl |
225
|
|
|
|
|
|
|
use SWISH::API::Common; |
226
|
|
|
|
|
|
|
SWISH::API::Common->new( |
227
|
|
|
|
|
|
|
dirs_file => '$self->{dirs_file}', |
228
|
|
|
|
|
|
|
file_len_max => '$self->{file_len_max}', |
229
|
|
|
|
|
|
|
)->files_stream(); |
230
|
|
|
|
|
|
|
EOT |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
chmod 0755, $self->{streamer} or |
233
|
|
|
|
|
|
|
LOGDIE "chmod of $self->{streamer} failed ($!)"; |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
my($stdout, $stderr, $rc) = tap($self->{swish_exe}, "-c", |
236
|
|
|
|
|
|
|
$self->{swish_cnf_file}, |
237
|
|
|
|
|
|
|
"-e", "-S", "prog"); |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
unless($stdout =~ /Indexing done!/) { |
240
|
|
|
|
|
|
|
ERROR "Indexing failed: $stdout $stderr"; |
241
|
|
|
|
|
|
|
return undef; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
DEBUG "$stdout"; |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
1; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
########################################### |
250
|
|
|
|
|
|
|
sub perl_find { |
251
|
|
|
|
|
|
|
########################################### |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
if($^X =~ m#/#) { |
254
|
|
|
|
|
|
|
return $^X; |
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
return exe_find($^X); |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
########################################### |
261
|
|
|
|
|
|
|
sub swish_find { |
262
|
|
|
|
|
|
|
########################################### |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
for my $path (@SWISH_EXE_PATHS) { |
265
|
|
|
|
|
|
|
if(-f File::Spec->catfile($path, $SWISH_EXE)) { |
266
|
|
|
|
|
|
|
return File::Spec->catfile($path, $SWISH_EXE); |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
} |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
return exe_find($SWISH_EXE); |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
########################################### |
274
|
|
|
|
|
|
|
sub exe_find { |
275
|
|
|
|
|
|
|
########################################### |
276
|
|
|
|
|
|
|
my($exe) = @_; |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
for my $path (split /:/, $ENV{PATH}) { |
279
|
|
|
|
|
|
|
if(-f File::Spec->catfile($path, $exe)) { |
280
|
|
|
|
|
|
|
return File::Spec->catfile($path, $exe); |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
return undef; |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
########################################### |
288
|
|
|
|
|
|
|
package SWISH::API::Common::Hit; |
289
|
|
|
|
|
|
|
########################################### |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
make_accessor(__PACKAGE__, "path"); |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
########################################### |
294
|
|
|
|
|
|
|
sub new { |
295
|
|
|
|
|
|
|
########################################### |
296
|
|
|
|
|
|
|
my($class, %options) = @_; |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
my $self = { |
299
|
|
|
|
|
|
|
%options, |
300
|
|
|
|
|
|
|
}; |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
bless $self, $class; |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
################################################## |
306
|
|
|
|
|
|
|
# Poor man's Class::Struct |
307
|
|
|
|
|
|
|
################################################## |
308
|
|
|
|
|
|
|
sub make_accessor { |
309
|
|
|
|
|
|
|
################################################## |
310
|
|
|
|
|
|
|
my($package, $name) = @_; |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
no strict qw(refs); |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
my $code = <
|
315
|
|
|
|
|
|
|
*{"$package\\::$name"} = sub { |
316
|
|
|
|
|
|
|
my(\$self, \$value) = \@_; |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
if(defined \$value) { |
319
|
|
|
|
|
|
|
\$self->{$name} = \$value; |
320
|
|
|
|
|
|
|
} |
321
|
|
|
|
|
|
|
if(exists \$self->{$name}) { |
322
|
|
|
|
|
|
|
return (\$self->{$name}); |
323
|
|
|
|
|
|
|
} else { |
324
|
|
|
|
|
|
|
return ""; |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
EOT |
328
|
|
|
|
|
|
|
if(! defined *{"$package\::$name"}) { |
329
|
|
|
|
|
|
|
eval $code or die "$@"; |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
1; |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
__END__ |