| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package KinoSearch1::Index::SegReader; |
|
2
|
34
|
|
|
34
|
|
176
|
use strict; |
|
|
34
|
|
|
|
|
63
|
|
|
|
34
|
|
|
|
|
1058
|
|
|
3
|
34
|
|
|
34
|
|
183
|
use warnings; |
|
|
34
|
|
|
|
|
64
|
|
|
|
34
|
|
|
|
|
786
|
|
|
4
|
34
|
|
|
34
|
|
178
|
use KinoSearch1::Util::ToolSet; |
|
|
34
|
|
|
|
|
90
|
|
|
|
34
|
|
|
|
|
4621
|
|
|
5
|
34
|
|
|
34
|
|
183
|
use base qw( KinoSearch1::Index::IndexReader ); |
|
|
34
|
|
|
|
|
66
|
|
|
|
34
|
|
|
|
|
4075
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
BEGIN { |
|
8
|
34
|
|
|
34
|
|
356
|
__PACKAGE__->init_instance_vars( |
|
9
|
|
|
|
|
|
|
# params/members |
|
10
|
|
|
|
|
|
|
invindex => undef, |
|
11
|
|
|
|
|
|
|
seg_name => undef, |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# members |
|
14
|
|
|
|
|
|
|
comp_file_reader => undef, |
|
15
|
|
|
|
|
|
|
tinfos_reader => undef, |
|
16
|
|
|
|
|
|
|
finfos => undef, |
|
17
|
|
|
|
|
|
|
fields_reader => undef, |
|
18
|
|
|
|
|
|
|
freq_stream => undef, |
|
19
|
|
|
|
|
|
|
prox_stream => undef, |
|
20
|
|
|
|
|
|
|
deldocs => undef, |
|
21
|
|
|
|
|
|
|
norms_readers => undef, |
|
22
|
|
|
|
|
|
|
); |
|
23
|
|
|
|
|
|
|
|
|
24
|
34
|
|
|
|
|
324
|
__PACKAGE__->ready_get( |
|
25
|
|
|
|
|
|
|
qw( |
|
26
|
|
|
|
|
|
|
finfos |
|
27
|
|
|
|
|
|
|
fields_reader |
|
28
|
|
|
|
|
|
|
freq_stream |
|
29
|
|
|
|
|
|
|
prox_stream |
|
30
|
|
|
|
|
|
|
deldocs |
|
31
|
|
|
|
|
|
|
seg_name |
|
32
|
|
|
|
|
|
|
) |
|
33
|
|
|
|
|
|
|
); |
|
34
|
|
|
|
|
|
|
} |
|
35
|
|
|
|
|
|
|
|
|
36
|
34
|
|
|
34
|
|
26452
|
use KinoSearch1::Index::CompoundFileReader; |
|
|
34
|
|
|
|
|
89
|
|
|
|
34
|
|
|
|
|
896
|
|
|
37
|
34
|
|
|
34
|
|
19800
|
use KinoSearch1::Index::TermInfosReader; |
|
|
34
|
|
|
|
|
316
|
|
|
|
34
|
|
|
|
|
1099
|
|
|
38
|
34
|
|
|
34
|
|
7249
|
use KinoSearch1::Index::FieldsReader; |
|
|
34
|
|
|
|
|
90
|
|
|
|
34
|
|
|
|
|
1581
|
|
|
39
|
34
|
|
|
34
|
|
207
|
use KinoSearch1::Index::FieldInfos; |
|
|
34
|
|
|
|
|
71
|
|
|
|
34
|
|
|
|
|
1391
|
|
|
40
|
34
|
|
|
34
|
|
24882
|
use KinoSearch1::Index::NormsReader; |
|
|
34
|
|
|
|
|
108
|
|
|
|
34
|
|
|
|
|
824
|
|
|
41
|
34
|
|
|
34
|
|
29395
|
use KinoSearch1::Index::SegTermDocs; |
|
|
34
|
|
|
|
|
101
|
|
|
|
34
|
|
|
|
|
893
|
|
|
42
|
34
|
|
|
34
|
|
18875
|
use KinoSearch1::Index::DelDocs; |
|
|
34
|
|
|
|
|
109
|
|
|
|
34
|
|
|
|
|
41449
|
|
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# use KinoSearch1::Util::Class's new() |
|
45
|
|
|
|
|
|
|
# Note: can't inherit IndexReader's new() without recursion problems |
|
46
|
|
|
|
|
|
|
*new = *KinoSearch1::Util::Class::new; |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
sub init_instance { |
|
49
|
94
|
|
|
94
|
1
|
175
|
my $self = shift; |
|
50
|
94
|
|
|
|
|
161
|
my ( $seg_name, $invindex ) = @{$self}{ 'seg_name', 'invindex' }; |
|
|
94
|
|
|
|
|
441
|
|
|
51
|
94
|
|
|
|
|
226
|
$self->{norms_readers} = {}; |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# initialize DelDocs |
|
54
|
94
|
|
|
|
|
909
|
$self->{deldocs} = KinoSearch1::Index::DelDocs->new( |
|
55
|
|
|
|
|
|
|
invindex => $invindex, |
|
56
|
|
|
|
|
|
|
seg_name => $seg_name, |
|
57
|
|
|
|
|
|
|
); |
|
58
|
94
|
100
|
|
|
|
450
|
$self->{deldocs}->read_deldocs( $invindex, "$seg_name.del" ) |
|
59
|
|
|
|
|
|
|
if ( $invindex->file_exists("$seg_name.del") ); |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# initialize a CompoundFileReader |
|
62
|
94
|
|
|
|
|
883
|
my $comp_file_reader = $self->{comp_file_reader} |
|
63
|
|
|
|
|
|
|
= KinoSearch1::Index::CompoundFileReader->new( |
|
64
|
|
|
|
|
|
|
invindex => $invindex, |
|
65
|
|
|
|
|
|
|
seg_name => $seg_name, |
|
66
|
|
|
|
|
|
|
); |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# initialize FieldInfos |
|
69
|
94
|
|
|
|
|
867
|
my $finfos = $self->{finfos} = KinoSearch1::Index::FieldInfos->new; |
|
70
|
94
|
|
|
|
|
494
|
$finfos->read_infos( $comp_file_reader->open_instream("$seg_name.fnm") ); |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# initialize FieldsReader |
|
73
|
94
|
|
|
|
|
896
|
$self->{fields_reader} = KinoSearch1::Index::FieldsReader->new( |
|
74
|
|
|
|
|
|
|
finfos => $finfos, |
|
75
|
|
|
|
|
|
|
fdata_stream => $comp_file_reader->open_instream("$seg_name.fdt"), |
|
76
|
|
|
|
|
|
|
findex_stream => $comp_file_reader->open_instream("$seg_name.fdx"), |
|
77
|
|
|
|
|
|
|
); |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
# initialize TermInfosReader |
|
80
|
94
|
|
|
|
|
2003
|
$self->{tinfos_reader} = KinoSearch1::Index::TermInfosReader->new( |
|
81
|
|
|
|
|
|
|
invindex => $comp_file_reader, |
|
82
|
|
|
|
|
|
|
seg_name => $seg_name, |
|
83
|
|
|
|
|
|
|
finfos => $finfos, |
|
84
|
|
|
|
|
|
|
); |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# open the frequency data, the positional data, and the norms |
|
87
|
94
|
|
|
|
|
404
|
$self->{freq_stream} = $comp_file_reader->open_instream("$seg_name.frq"); |
|
88
|
94
|
|
|
|
|
406
|
$self->{prox_stream} = $comp_file_reader->open_instream("$seg_name.prx"); |
|
89
|
94
|
|
|
|
|
394
|
$self->_open_norms; |
|
90
|
|
|
|
|
|
|
} |
|
91
|
|
|
|
|
|
|
|
|
92
|
1080
|
|
|
1080
|
0
|
4357
|
sub max_doc { shift->{fields_reader}->get_size } |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
sub num_docs { |
|
95
|
100
|
|
|
100
|
0
|
137
|
my $self = shift; |
|
96
|
100
|
|
|
|
|
201
|
return $self->max_doc - $self->{deldocs}->get_num_deletions; |
|
97
|
|
|
|
|
|
|
} |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub delete_docs_by_term { |
|
100
|
4
|
|
|
4
|
0
|
6
|
my ( $self, $term ) = @_; |
|
101
|
4
|
|
|
|
|
8
|
my $term_docs = $self->term_docs($term); |
|
102
|
4
|
|
|
|
|
16
|
$self->{deldocs}->delete_by_term_docs($term_docs); |
|
103
|
|
|
|
|
|
|
} |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
sub commit_deletions { |
|
106
|
38
|
|
|
38
|
0
|
62
|
my $self = shift; |
|
107
|
38
|
100
|
|
|
|
160
|
return unless $self->{deldocs}->get_num_deletions; |
|
108
|
1
|
|
|
|
|
3
|
my $filename = $self->{seg_name} . ".del"; |
|
109
|
1
|
|
|
|
|
5
|
$self->{deldocs} |
|
110
|
|
|
|
|
|
|
->write_deldocs( $self->{invindex}, $filename, $self->max_doc ); |
|
111
|
|
|
|
|
|
|
} |
|
112
|
|
|
|
|
|
|
|
|
113
|
1
|
|
|
1
|
0
|
6
|
sub has_deletions { shift->{deldocs}->get_num_deletions } |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
sub _open_norms { |
|
116
|
94
|
|
|
94
|
|
174
|
my $self = shift; |
|
117
|
94
|
|
|
|
|
329
|
my ( $seg_name, $finfos, $comp_file_reader ) |
|
118
|
94
|
|
|
|
|
152
|
= @{$self}{ 'seg_name', 'finfos', 'comp_file_reader' }; |
|
119
|
94
|
|
|
|
|
322
|
my $max_doc = $self->max_doc; |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
# create a NormsReader for each indexed field. |
|
122
|
94
|
|
|
|
|
372
|
for my $finfo ( $finfos->get_infos ) { |
|
123
|
208
|
100
|
|
|
|
610
|
next unless $finfo->get_indexed; |
|
124
|
192
|
|
|
|
|
642
|
my $filename = "$seg_name.f" . $finfo->get_field_num; |
|
125
|
192
|
|
|
|
|
656
|
my $instream = $comp_file_reader->open_instream($filename); |
|
126
|
192
|
|
|
|
|
1504
|
$self->{norms_readers}{ $finfo->get_name } |
|
127
|
|
|
|
|
|
|
= KinoSearch1::Index::NormsReader->new( |
|
128
|
|
|
|
|
|
|
instream => $instream, |
|
129
|
|
|
|
|
|
|
max_doc => $max_doc, |
|
130
|
|
|
|
|
|
|
); |
|
131
|
|
|
|
|
|
|
} |
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
sub terms { |
|
135
|
17
|
|
|
17
|
0
|
31
|
my ( $self, $term ) = @_; |
|
136
|
17
|
|
|
|
|
533
|
return $self->{tinfos_reader}->terms($term); |
|
137
|
|
|
|
|
|
|
} |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub fetch_term_info { |
|
140
|
644
|
|
|
644
|
0
|
931
|
my ( $self, $term ) = @_; |
|
141
|
644
|
|
|
|
|
2666
|
return $self->{tinfos_reader}->fetch_term_info($term); |
|
142
|
|
|
|
|
|
|
} |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
sub get_skip_interval { |
|
145
|
654
|
|
|
654
|
0
|
2739
|
shift->{tinfos_reader}->get_skip_interval; |
|
146
|
|
|
|
|
|
|
} |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub doc_freq { |
|
149
|
1421
|
|
|
1421
|
0
|
1827
|
my ( $self, $term ) = @_; |
|
150
|
1421
|
|
|
|
|
4842
|
my $tinfo = $self->{tinfos_reader}->fetch_term_info($term); |
|
151
|
1421
|
100
|
|
|
|
12270
|
return defined $tinfo ? $tinfo->get_doc_freq : 0; |
|
152
|
|
|
|
|
|
|
} |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
sub term_docs { |
|
155
|
654
|
|
|
654
|
0
|
1205
|
my ( $self, $term ) = @_; |
|
156
|
654
|
|
|
|
|
4863
|
my $term_docs = KinoSearch1::Index::SegTermDocs->new( reader => $self, ); |
|
157
|
654
|
|
|
|
|
2374
|
$term_docs->seek($term); |
|
158
|
654
|
|
|
|
|
2105
|
return $term_docs; |
|
159
|
|
|
|
|
|
|
} |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub norms_reader { |
|
162
|
511
|
|
|
511
|
0
|
1807
|
my ( $self, $field_name ) = @_; |
|
163
|
511
|
100
|
|
|
|
1635
|
return unless exists $self->{norms_readers}{$field_name}; |
|
164
|
505
|
|
|
|
|
2218
|
return $self->{norms_readers}{$field_name}; |
|
165
|
|
|
|
|
|
|
} |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
sub get_field_names { |
|
168
|
45
|
|
|
45
|
0
|
144
|
my ( $self, %args ) = @_; |
|
169
|
45
|
|
|
|
|
284
|
my @fields = $self->{finfos}->get_infos; |
|
170
|
45
|
100
|
|
|
|
229
|
@fields = grep { $_->get_indexed } @fields |
|
|
92
|
|
|
|
|
272
|
|
|
171
|
|
|
|
|
|
|
if $args{indexed}; |
|
172
|
45
|
|
|
|
|
100
|
my @names = map { $_->get_name } @fields; |
|
|
103
|
|
|
|
|
353
|
|
|
173
|
45
|
|
|
|
|
242
|
return \@names; |
|
174
|
|
|
|
|
|
|
} |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
sub generate_field_infos { |
|
177
|
38
|
|
|
38
|
0
|
63
|
my $self = shift; |
|
178
|
38
|
|
|
|
|
146
|
my $new_finfos = $self->{finfos}->clone; |
|
179
|
38
|
|
|
|
|
132
|
$new_finfos->set_from_file(0); |
|
180
|
38
|
|
|
|
|
152
|
return $new_finfos; |
|
181
|
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
sub fetch_doc { |
|
184
|
48
|
|
|
48
|
0
|
256
|
$_[0]->{fields_reader}->fetch_doc( $_[1] ); |
|
185
|
|
|
|
|
|
|
} |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
sub segreaders_to_merge { |
|
188
|
8
|
|
|
8
|
0
|
21
|
my ( $self, $all ) = @_; |
|
189
|
8
|
100
|
|
|
|
54
|
return $self if $all; |
|
190
|
4
|
|
|
|
|
16
|
return; |
|
191
|
|
|
|
|
|
|
} |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
sub close { |
|
194
|
39
|
|
|
39
|
0
|
62
|
my $self = shift; |
|
195
|
39
|
50
|
|
|
|
162
|
return unless $self->{close_invindex}; |
|
196
|
|
|
|
|
|
|
|
|
197
|
39
|
|
|
|
|
161
|
$self->{deldocs}->close; |
|
198
|
39
|
|
|
|
|
162
|
$self->{finfos}->close; |
|
199
|
39
|
|
|
|
|
179
|
$self->{fields_reader}->close; |
|
200
|
39
|
|
|
|
|
184
|
$self->{tinfos_reader}->close; |
|
201
|
39
|
|
|
|
|
177
|
$self->{comp_file_reader}->close; |
|
202
|
39
|
|
|
|
|
132
|
$self->{freq_stream}->close; |
|
203
|
39
|
|
|
|
|
127
|
$self->{prox_stream}->close; |
|
204
|
39
|
|
|
|
|
99
|
$_->close for values %{ $self->{norms_readers} }; |
|
|
39
|
|
|
|
|
247
|
|
|
205
|
|
|
|
|
|
|
} |
|
206
|
|
|
|
|
|
|
1; |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
__END__ |