line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package KinoSearch1::Index::FieldInfos; |
2
|
34
|
|
|
34
|
|
6028
|
use strict; |
|
34
|
|
|
|
|
72
|
|
|
34
|
|
|
|
|
1276
|
|
3
|
34
|
|
|
34
|
|
192
|
use warnings; |
|
34
|
|
|
|
|
71
|
|
|
34
|
|
|
|
|
1005
|
|
4
|
34
|
|
|
34
|
|
179
|
use KinoSearch1::Util::ToolSet; |
|
34
|
|
|
|
|
67
|
|
|
34
|
|
|
|
|
5272
|
|
5
|
34
|
|
|
34
|
|
200
|
use base qw( KinoSearch1::Util::Class Exporter ); |
|
34
|
|
|
|
|
69
|
|
|
34
|
|
|
|
|
4415
|
|
6
|
|
|
|
|
|
|
|
7
|
34
|
|
|
34
|
|
299
|
use constant INDEXED => "\x01"; |
|
34
|
|
|
|
|
69
|
|
|
34
|
|
|
|
|
2465
|
|
8
|
34
|
|
|
34
|
|
182
|
use constant VECTORIZED => "\x02"; |
|
34
|
|
|
|
|
86
|
|
|
34
|
|
|
|
|
2127
|
|
9
|
34
|
|
|
34
|
|
211
|
use constant OMIT_NORMS => "\x10"; |
|
34
|
|
|
|
|
150
|
|
|
34
|
|
|
|
|
3528
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
our @EXPORT_OK; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
BEGIN { |
14
|
34
|
|
|
34
|
|
355
|
__PACKAGE__->init_instance_vars( |
15
|
|
|
|
|
|
|
# members |
16
|
|
|
|
|
|
|
by_name => undef, |
17
|
|
|
|
|
|
|
by_num => undef, |
18
|
|
|
|
|
|
|
from_file => 0, |
19
|
|
|
|
|
|
|
); |
20
|
34
|
|
|
|
|
277
|
__PACKAGE__->ready_get_set(qw( from_file )); |
21
|
|
|
|
|
|
|
|
22
|
34
|
|
|
|
|
773
|
@EXPORT_OK = qw( |
23
|
|
|
|
|
|
|
INDEXED |
24
|
|
|
|
|
|
|
VECTORIZED |
25
|
|
|
|
|
|
|
OMIT_NORMS |
26
|
|
|
|
|
|
|
); |
27
|
|
|
|
|
|
|
} |
28
|
|
|
|
|
|
|
|
29
|
34
|
|
|
34
|
|
1259
|
use KinoSearch1::Document::Field; |
|
34
|
|
|
|
|
68
|
|
|
34
|
|
|
|
|
55828
|
|
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub init_instance { |
32
|
260
|
|
|
260
|
1
|
388
|
my $self = shift; |
33
|
260
|
|
|
|
|
772
|
$self->{by_name} = {}; |
34
|
260
|
|
|
|
|
789
|
$self->{by_num} = []; |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
sub clone { |
38
|
100
|
|
|
100
|
0
|
183
|
my $self = shift; |
39
|
100
|
|
|
|
|
414
|
my $evil_twin = __PACKAGE__->new; |
40
|
100
|
|
|
|
|
246
|
$evil_twin->{from_file} = $self->{from_file}; |
41
|
100
|
|
|
|
|
199
|
my @by_num; |
42
|
|
|
|
|
|
|
my %by_name; |
43
|
100
|
|
|
|
|
162
|
for my $finfo ( @{ $self->{by_num} } ) { |
|
100
|
|
|
|
|
273
|
|
44
|
182
|
|
|
|
|
608
|
my $dupe = $finfo->clone; |
45
|
182
|
|
|
|
|
346
|
push @by_num, $dupe; |
46
|
182
|
|
|
|
|
627
|
$by_name{ $finfo->get_name } = $dupe; |
47
|
|
|
|
|
|
|
} |
48
|
100
|
|
|
|
|
255
|
$evil_twin->{by_num} = \@by_num; |
49
|
100
|
|
|
|
|
226
|
$evil_twin->{by_name} = \%by_name; |
50
|
100
|
|
|
|
|
781
|
return $evil_twin; |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# Add a user-supplied Field object to the collection. |
54
|
|
|
|
|
|
|
sub add_field { |
55
|
158
|
|
|
158
|
0
|
262
|
my ( $self, $field ) = @_; |
56
|
158
|
50
|
|
|
|
541
|
croak("Not a KinoSearch1::Document::Field") |
57
|
|
|
|
|
|
|
unless a_isa_b( $field, 'KinoSearch1::Document::Field' ); |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# don't mod Field objects for segments that are read back in |
60
|
158
|
50
|
|
|
|
502
|
croak("Can't update FieldInfos that were read in from file") |
61
|
|
|
|
|
|
|
if $self->{from_file}; |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# add the field |
64
|
158
|
|
|
|
|
445
|
my $fieldname = $field->get_name; |
65
|
158
|
|
|
|
|
435
|
$self->{by_name}{$fieldname} = $field; |
66
|
158
|
|
|
|
|
428
|
$self->_assign_field_nums; |
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# Return the number of fields in the segment. |
70
|
208
|
|
|
208
|
0
|
274
|
sub size { scalar @{ $_[0]->{by_num} } } |
|
208
|
|
|
|
|
1281
|
|
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# Return a list of the Field objects. |
73
|
341
|
|
|
341
|
0
|
514
|
sub get_infos { @{ $_[0]->{by_num} } } |
|
341
|
|
|
|
|
1381
|
|
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# Given a fieldname, return its number. |
76
|
|
|
|
|
|
|
sub get_field_num { |
77
|
2199
|
|
|
2199
|
0
|
3294
|
my ( $self, $name ) = @_; |
78
|
|
|
|
|
|
|
return undef |
79
|
2199
|
100
|
|
|
|
5909
|
unless exists $self->{by_name}{$name}; |
80
|
2153
|
|
|
|
|
6629
|
my $num = $self->{by_name}{$name}->get_field_num; |
81
|
2153
|
|
|
|
|
5985
|
return $num; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
# Given a fieldname, return its FieldInfo. |
85
|
1
|
|
|
1
|
0
|
7
|
sub info_by_name { $_[0]->{by_name}{ $_[1] } } |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# Given a field number, return its fieldInfo. |
88
|
152
|
|
|
152
|
0
|
539
|
sub info_by_num { $_[0]->{by_num}[ $_[1] ] } |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# Given the field number (new, not original), return the name of the field. |
91
|
|
|
|
|
|
|
sub field_name { |
92
|
10
|
|
|
10
|
0
|
12
|
my ( $self, $num ) = @_; |
93
|
10
|
|
|
|
|
25
|
my $name = $self->{by_num}[$num]->get_name; |
94
|
10
|
50
|
|
|
|
21
|
croak("Don't know about field number $num") |
95
|
|
|
|
|
|
|
unless defined $name; |
96
|
10
|
|
|
|
|
20
|
return $name; |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
# Sort all the fields lexically by name and assign ascending numbers. |
100
|
|
|
|
|
|
|
sub _assign_field_nums { |
101
|
173
|
|
|
173
|
|
244
|
my $self = shift; |
102
|
173
|
50
|
|
|
|
479
|
confess("Can't _assign_field_nums when from_file") if $self->{from_file}; |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# assign field nums according to lexical order of field names |
105
|
173
|
|
|
|
|
796
|
@{ $self->{by_num} } |
|
1410
|
|
|
|
|
3204
|
|
106
|
173
|
|
|
|
|
241
|
= sort { $a->get_name cmp $b->get_name } values %{ $self->{by_name} }; |
|
173
|
|
|
|
|
662
|
|
107
|
173
|
|
|
|
|
343
|
my $inc = 0; |
108
|
173
|
|
|
|
|
240
|
$_->set_field_num( $inc++ ) for @{ $self->{by_num} }; |
|
173
|
|
|
|
|
799
|
|
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# Decode an existing .fnm file. |
112
|
|
|
|
|
|
|
sub read_infos { |
113
|
100
|
|
|
100
|
0
|
302
|
my ( $self, $instream ) = @_; |
114
|
100
|
|
|
|
|
178
|
my ( $by_name, $by_num ) = @{$self}{qw( by_name by_num )}; |
|
100
|
|
|
|
|
256
|
|
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
# set flag indicating that this FieldInfos object has been read in |
117
|
100
|
|
|
|
|
225
|
$self->{from_file} = 1; |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
# read in infos from stream |
120
|
100
|
|
|
|
|
1170
|
my $num_fields = $instream->lu_read('V'); |
121
|
100
|
|
|
|
|
892
|
my @names_and_bits = $instream->lu_read( 'Ta' x $num_fields ); |
122
|
100
|
|
|
|
|
228
|
my $field_num = 0; |
123
|
100
|
|
|
|
|
343
|
while ( $field_num < $num_fields ) { |
124
|
247
|
|
|
|
|
811
|
my ( $name, $bits ) = splice( @names_and_bits, 0, 2 ); |
125
|
247
|
100
|
|
|
|
2000
|
my $info = KinoSearch1::Document::Field->new( |
|
|
100
|
|
|
|
|
|
126
|
|
|
|
|
|
|
field_num => $field_num, |
127
|
|
|
|
|
|
|
name => $name, |
128
|
|
|
|
|
|
|
indexed => ( "$bits" & INDEXED ) eq INDEXED ? 1 : 0, |
129
|
|
|
|
|
|
|
vectorized => ( "$bits" & VECTORIZED ) eq VECTORIZED ? 1 : 0, |
130
|
|
|
|
|
|
|
fnm_bits => $bits, |
131
|
|
|
|
|
|
|
); |
132
|
247
|
|
|
|
|
590
|
$by_name->{$name} = $info; |
133
|
|
|
|
|
|
|
# order of storage implies lexical order by name and field number |
134
|
247
|
|
|
|
|
440
|
push @$by_num, $info; |
135
|
247
|
|
|
|
|
778
|
$field_num++; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
# Write .fnm file. |
140
|
|
|
|
|
|
|
sub write_infos { |
141
|
64
|
|
|
64
|
0
|
152
|
my ( $self, $outstream ) = @_; |
142
|
|
|
|
|
|
|
|
143
|
64
|
|
|
|
|
133
|
$outstream->lu_write( 'V', scalar @{ $self->{by_num} } ); |
|
64
|
|
|
|
|
346
|
|
144
|
64
|
|
|
|
|
106
|
for my $finfo ( @{ $self->{by_num} } ) { |
|
64
|
|
|
|
|
191
|
|
145
|
162
|
|
|
|
|
545
|
$outstream->lu_write( 'Ta', $finfo->get_name, $finfo->get_fnm_bits, ); |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
# Merge two FieldInfos objects, redefining fields as necessary and generating |
150
|
|
|
|
|
|
|
# new field numbers. |
151
|
|
|
|
|
|
|
sub consolidate { |
152
|
15
|
|
|
15
|
0
|
48
|
my ( $self, @others ) = @_; |
153
|
15
|
|
|
|
|
50
|
my $infos = $self->{by_name}; |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
# Make *this* finfos the master FieldInfos object |
156
|
15
|
|
|
|
|
35
|
for my $other (@others) { |
157
|
36
|
|
|
|
|
53
|
while ( my ( $name, $other_finfo ) = each %{ $other->{by_name} } ) { |
|
84
|
|
|
|
|
363
|
|
158
|
48
|
100
|
|
|
|
133
|
if ( exists $infos->{$name} ) { |
159
|
28
|
|
|
|
|
100
|
$infos->{$name} = $other_finfo->breed_with( $infos->{$name} ); |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
else { |
162
|
20
|
|
|
|
|
68
|
$infos->{$name} = $other_finfo->clone; |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
15
|
|
|
|
|
60
|
$self->_assign_field_nums; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
# Generate a mapping of field numbers between two FieldInfos objects. Should |
171
|
|
|
|
|
|
|
# be called by the superset. |
172
|
|
|
|
|
|
|
sub generate_field_num_map { |
173
|
16
|
|
|
16
|
0
|
42
|
my ( $self, $other ) = @_; |
174
|
16
|
|
|
|
|
33
|
my $map = ''; |
175
|
16
|
|
|
|
|
23
|
for my $other_finfo ( @{ $other->{by_num} } ) { |
|
16
|
|
|
|
|
48
|
|
176
|
18
|
|
|
|
|
67
|
my $orig_finfo = $self->{by_name}{ $other_finfo->get_name }; |
177
|
18
|
|
|
|
|
64
|
$map .= pack( 'I', $orig_finfo->get_field_num ); |
178
|
|
|
|
|
|
|
} |
179
|
16
|
|
|
|
|
67
|
return KinoSearch1::Util::IntMap->new( \$map ); |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
sub encode_fnm_bits { |
183
|
158
|
|
|
158
|
0
|
279
|
my ( undef, $field ) = @_; |
184
|
158
|
|
|
|
|
243
|
my $bits = "\0"; |
185
|
158
|
|
|
|
|
309
|
for ($bits) { |
186
|
158
|
100
|
|
|
|
483
|
$_ |= INDEXED if $field->get_indexed; |
187
|
158
|
100
|
|
|
|
509
|
$_ |= VECTORIZED if $field->get_vectorized; |
188
|
158
|
50
|
|
|
|
505
|
$_ |= OMIT_NORMS if $field->get_omit_norms; |
189
|
|
|
|
|
|
|
} |
190
|
158
|
|
|
|
|
727
|
return $bits; |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
sub decode_fnm_bits { |
194
|
0
|
|
|
0
|
0
|
0
|
my ( undef, $field, $bits ) = @_; |
195
|
0
|
|
|
|
|
0
|
$field->set_indexed( ( $bits & INDEXED ) eq INDEXED ); |
196
|
0
|
|
|
|
|
0
|
$field->set_vectorized( ( $bits & VECTORIZED ) eq VECTORIZED ); |
197
|
0
|
|
|
|
|
0
|
$field->set_omit_norms( ( $bits & OMIT_NORMS ) eq OMIT_NORMS ); |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
39
|
|
|
39
|
0
|
83
|
sub close { } |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
1; |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
__END__ |