line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package KinoSearch1::Index::SegReader; |
2
|
34
|
|
|
34
|
|
176
|
use strict; |
|
34
|
|
|
|
|
63
|
|
|
34
|
|
|
|
|
1058
|
|
3
|
34
|
|
|
34
|
|
183
|
use warnings; |
|
34
|
|
|
|
|
64
|
|
|
34
|
|
|
|
|
786
|
|
4
|
34
|
|
|
34
|
|
178
|
use KinoSearch1::Util::ToolSet; |
|
34
|
|
|
|
|
90
|
|
|
34
|
|
|
|
|
4621
|
|
5
|
34
|
|
|
34
|
|
183
|
use base qw( KinoSearch1::Index::IndexReader ); |
|
34
|
|
|
|
|
66
|
|
|
34
|
|
|
|
|
4075
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
BEGIN { |
8
|
34
|
|
|
34
|
|
356
|
__PACKAGE__->init_instance_vars( |
9
|
|
|
|
|
|
|
# params/members |
10
|
|
|
|
|
|
|
invindex => undef, |
11
|
|
|
|
|
|
|
seg_name => undef, |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# members |
14
|
|
|
|
|
|
|
comp_file_reader => undef, |
15
|
|
|
|
|
|
|
tinfos_reader => undef, |
16
|
|
|
|
|
|
|
finfos => undef, |
17
|
|
|
|
|
|
|
fields_reader => undef, |
18
|
|
|
|
|
|
|
freq_stream => undef, |
19
|
|
|
|
|
|
|
prox_stream => undef, |
20
|
|
|
|
|
|
|
deldocs => undef, |
21
|
|
|
|
|
|
|
norms_readers => undef, |
22
|
|
|
|
|
|
|
); |
23
|
|
|
|
|
|
|
|
24
|
34
|
|
|
|
|
324
|
__PACKAGE__->ready_get( |
25
|
|
|
|
|
|
|
qw( |
26
|
|
|
|
|
|
|
finfos |
27
|
|
|
|
|
|
|
fields_reader |
28
|
|
|
|
|
|
|
freq_stream |
29
|
|
|
|
|
|
|
prox_stream |
30
|
|
|
|
|
|
|
deldocs |
31
|
|
|
|
|
|
|
seg_name |
32
|
|
|
|
|
|
|
) |
33
|
|
|
|
|
|
|
); |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
|
36
|
34
|
|
|
34
|
|
26452
|
use KinoSearch1::Index::CompoundFileReader; |
|
34
|
|
|
|
|
89
|
|
|
34
|
|
|
|
|
896
|
|
37
|
34
|
|
|
34
|
|
19800
|
use KinoSearch1::Index::TermInfosReader; |
|
34
|
|
|
|
|
316
|
|
|
34
|
|
|
|
|
1099
|
|
38
|
34
|
|
|
34
|
|
7249
|
use KinoSearch1::Index::FieldsReader; |
|
34
|
|
|
|
|
90
|
|
|
34
|
|
|
|
|
1581
|
|
39
|
34
|
|
|
34
|
|
207
|
use KinoSearch1::Index::FieldInfos; |
|
34
|
|
|
|
|
71
|
|
|
34
|
|
|
|
|
1391
|
|
40
|
34
|
|
|
34
|
|
24882
|
use KinoSearch1::Index::NormsReader; |
|
34
|
|
|
|
|
108
|
|
|
34
|
|
|
|
|
824
|
|
41
|
34
|
|
|
34
|
|
29395
|
use KinoSearch1::Index::SegTermDocs; |
|
34
|
|
|
|
|
101
|
|
|
34
|
|
|
|
|
893
|
|
42
|
34
|
|
|
34
|
|
18875
|
use KinoSearch1::Index::DelDocs; |
|
34
|
|
|
|
|
109
|
|
|
34
|
|
|
|
|
41449
|
|
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# use KinoSearch1::Util::Class's new() |
45
|
|
|
|
|
|
|
# Note: can't inherit IndexReader's new() without recursion problems |
46
|
|
|
|
|
|
|
*new = *KinoSearch1::Util::Class::new; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
sub init_instance { |
49
|
94
|
|
|
94
|
1
|
175
|
my $self = shift; |
50
|
94
|
|
|
|
|
161
|
my ( $seg_name, $invindex ) = @{$self}{ 'seg_name', 'invindex' }; |
|
94
|
|
|
|
|
441
|
|
51
|
94
|
|
|
|
|
226
|
$self->{norms_readers} = {}; |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# initialize DelDocs |
54
|
94
|
|
|
|
|
909
|
$self->{deldocs} = KinoSearch1::Index::DelDocs->new( |
55
|
|
|
|
|
|
|
invindex => $invindex, |
56
|
|
|
|
|
|
|
seg_name => $seg_name, |
57
|
|
|
|
|
|
|
); |
58
|
94
|
100
|
|
|
|
450
|
$self->{deldocs}->read_deldocs( $invindex, "$seg_name.del" ) |
59
|
|
|
|
|
|
|
if ( $invindex->file_exists("$seg_name.del") ); |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# initialize a CompoundFileReader |
62
|
94
|
|
|
|
|
883
|
my $comp_file_reader = $self->{comp_file_reader} |
63
|
|
|
|
|
|
|
= KinoSearch1::Index::CompoundFileReader->new( |
64
|
|
|
|
|
|
|
invindex => $invindex, |
65
|
|
|
|
|
|
|
seg_name => $seg_name, |
66
|
|
|
|
|
|
|
); |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# initialize FieldInfos |
69
|
94
|
|
|
|
|
867
|
my $finfos = $self->{finfos} = KinoSearch1::Index::FieldInfos->new; |
70
|
94
|
|
|
|
|
494
|
$finfos->read_infos( $comp_file_reader->open_instream("$seg_name.fnm") ); |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# initialize FieldsReader |
73
|
94
|
|
|
|
|
896
|
$self->{fields_reader} = KinoSearch1::Index::FieldsReader->new( |
74
|
|
|
|
|
|
|
finfos => $finfos, |
75
|
|
|
|
|
|
|
fdata_stream => $comp_file_reader->open_instream("$seg_name.fdt"), |
76
|
|
|
|
|
|
|
findex_stream => $comp_file_reader->open_instream("$seg_name.fdx"), |
77
|
|
|
|
|
|
|
); |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
# initialize TermInfosReader |
80
|
94
|
|
|
|
|
2003
|
$self->{tinfos_reader} = KinoSearch1::Index::TermInfosReader->new( |
81
|
|
|
|
|
|
|
invindex => $comp_file_reader, |
82
|
|
|
|
|
|
|
seg_name => $seg_name, |
83
|
|
|
|
|
|
|
finfos => $finfos, |
84
|
|
|
|
|
|
|
); |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
# open the frequency data, the positional data, and the norms |
87
|
94
|
|
|
|
|
404
|
$self->{freq_stream} = $comp_file_reader->open_instream("$seg_name.frq"); |
88
|
94
|
|
|
|
|
406
|
$self->{prox_stream} = $comp_file_reader->open_instream("$seg_name.prx"); |
89
|
94
|
|
|
|
|
394
|
$self->_open_norms; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
|
92
|
1080
|
|
|
1080
|
0
|
4357
|
sub max_doc { shift->{fields_reader}->get_size } |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
sub num_docs { |
95
|
100
|
|
|
100
|
0
|
137
|
my $self = shift; |
96
|
100
|
|
|
|
|
201
|
return $self->max_doc - $self->{deldocs}->get_num_deletions; |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub delete_docs_by_term { |
100
|
4
|
|
|
4
|
0
|
6
|
my ( $self, $term ) = @_; |
101
|
4
|
|
|
|
|
8
|
my $term_docs = $self->term_docs($term); |
102
|
4
|
|
|
|
|
16
|
$self->{deldocs}->delete_by_term_docs($term_docs); |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
sub commit_deletions { |
106
|
38
|
|
|
38
|
0
|
62
|
my $self = shift; |
107
|
38
|
100
|
|
|
|
160
|
return unless $self->{deldocs}->get_num_deletions; |
108
|
1
|
|
|
|
|
3
|
my $filename = $self->{seg_name} . ".del"; |
109
|
1
|
|
|
|
|
5
|
$self->{deldocs} |
110
|
|
|
|
|
|
|
->write_deldocs( $self->{invindex}, $filename, $self->max_doc ); |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
1
|
|
|
1
|
0
|
6
|
sub has_deletions { shift->{deldocs}->get_num_deletions } |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
sub _open_norms { |
116
|
94
|
|
|
94
|
|
174
|
my $self = shift; |
117
|
94
|
|
|
|
|
329
|
my ( $seg_name, $finfos, $comp_file_reader ) |
118
|
94
|
|
|
|
|
152
|
= @{$self}{ 'seg_name', 'finfos', 'comp_file_reader' }; |
119
|
94
|
|
|
|
|
322
|
my $max_doc = $self->max_doc; |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
# create a NormsReader for each indexed field. |
122
|
94
|
|
|
|
|
372
|
for my $finfo ( $finfos->get_infos ) { |
123
|
208
|
100
|
|
|
|
610
|
next unless $finfo->get_indexed; |
124
|
192
|
|
|
|
|
642
|
my $filename = "$seg_name.f" . $finfo->get_field_num; |
125
|
192
|
|
|
|
|
656
|
my $instream = $comp_file_reader->open_instream($filename); |
126
|
192
|
|
|
|
|
1504
|
$self->{norms_readers}{ $finfo->get_name } |
127
|
|
|
|
|
|
|
= KinoSearch1::Index::NormsReader->new( |
128
|
|
|
|
|
|
|
instream => $instream, |
129
|
|
|
|
|
|
|
max_doc => $max_doc, |
130
|
|
|
|
|
|
|
); |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
sub terms { |
135
|
17
|
|
|
17
|
0
|
31
|
my ( $self, $term ) = @_; |
136
|
17
|
|
|
|
|
533
|
return $self->{tinfos_reader}->terms($term); |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub fetch_term_info { |
140
|
644
|
|
|
644
|
0
|
931
|
my ( $self, $term ) = @_; |
141
|
644
|
|
|
|
|
2666
|
return $self->{tinfos_reader}->fetch_term_info($term); |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
sub get_skip_interval { |
145
|
654
|
|
|
654
|
0
|
2739
|
shift->{tinfos_reader}->get_skip_interval; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub doc_freq { |
149
|
1421
|
|
|
1421
|
0
|
1827
|
my ( $self, $term ) = @_; |
150
|
1421
|
|
|
|
|
4842
|
my $tinfo = $self->{tinfos_reader}->fetch_term_info($term); |
151
|
1421
|
100
|
|
|
|
12270
|
return defined $tinfo ? $tinfo->get_doc_freq : 0; |
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
sub term_docs { |
155
|
654
|
|
|
654
|
0
|
1205
|
my ( $self, $term ) = @_; |
156
|
654
|
|
|
|
|
4863
|
my $term_docs = KinoSearch1::Index::SegTermDocs->new( reader => $self, ); |
157
|
654
|
|
|
|
|
2374
|
$term_docs->seek($term); |
158
|
654
|
|
|
|
|
2105
|
return $term_docs; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub norms_reader { |
162
|
511
|
|
|
511
|
0
|
1807
|
my ( $self, $field_name ) = @_; |
163
|
511
|
100
|
|
|
|
1635
|
return unless exists $self->{norms_readers}{$field_name}; |
164
|
505
|
|
|
|
|
2218
|
return $self->{norms_readers}{$field_name}; |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
sub get_field_names { |
168
|
45
|
|
|
45
|
0
|
144
|
my ( $self, %args ) = @_; |
169
|
45
|
|
|
|
|
284
|
my @fields = $self->{finfos}->get_infos; |
170
|
45
|
100
|
|
|
|
229
|
@fields = grep { $_->get_indexed } @fields |
|
92
|
|
|
|
|
272
|
|
171
|
|
|
|
|
|
|
if $args{indexed}; |
172
|
45
|
|
|
|
|
100
|
my @names = map { $_->get_name } @fields; |
|
103
|
|
|
|
|
353
|
|
173
|
45
|
|
|
|
|
242
|
return \@names; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
sub generate_field_infos { |
177
|
38
|
|
|
38
|
0
|
63
|
my $self = shift; |
178
|
38
|
|
|
|
|
146
|
my $new_finfos = $self->{finfos}->clone; |
179
|
38
|
|
|
|
|
132
|
$new_finfos->set_from_file(0); |
180
|
38
|
|
|
|
|
152
|
return $new_finfos; |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
sub fetch_doc { |
184
|
48
|
|
|
48
|
0
|
256
|
$_[0]->{fields_reader}->fetch_doc( $_[1] ); |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
sub segreaders_to_merge { |
188
|
8
|
|
|
8
|
0
|
21
|
my ( $self, $all ) = @_; |
189
|
8
|
100
|
|
|
|
54
|
return $self if $all; |
190
|
4
|
|
|
|
|
16
|
return; |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
sub close { |
194
|
39
|
|
|
39
|
0
|
62
|
my $self = shift; |
195
|
39
|
50
|
|
|
|
162
|
return unless $self->{close_invindex}; |
196
|
|
|
|
|
|
|
|
197
|
39
|
|
|
|
|
161
|
$self->{deldocs}->close; |
198
|
39
|
|
|
|
|
162
|
$self->{finfos}->close; |
199
|
39
|
|
|
|
|
179
|
$self->{fields_reader}->close; |
200
|
39
|
|
|
|
|
184
|
$self->{tinfos_reader}->close; |
201
|
39
|
|
|
|
|
177
|
$self->{comp_file_reader}->close; |
202
|
39
|
|
|
|
|
132
|
$self->{freq_stream}->close; |
203
|
39
|
|
|
|
|
127
|
$self->{prox_stream}->close; |
204
|
39
|
|
|
|
|
99
|
$_->close for values %{ $self->{norms_readers} }; |
|
39
|
|
|
|
|
247
|
|
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
1; |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
__END__ |