line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package KinoSearch1::Index::FieldsReader; |
2
|
34
|
|
|
34
|
|
187
|
use strict; |
|
34
|
|
|
|
|
143
|
|
|
34
|
|
|
|
|
1262
|
|
3
|
34
|
|
|
34
|
|
191
|
use warnings; |
|
34
|
|
|
|
|
91
|
|
|
34
|
|
|
|
|
879
|
|
4
|
34
|
|
|
34
|
|
176
|
use KinoSearch1::Util::ToolSet; |
|
34
|
|
|
|
|
1844
|
|
|
34
|
|
|
|
|
4906
|
|
5
|
34
|
|
|
34
|
|
237
|
use base qw( KinoSearch1::Util::Class Exporter ); |
|
34
|
|
|
|
|
1764
|
|
|
34
|
|
|
|
|
4014
|
|
6
|
|
|
|
|
|
|
|
7
|
34
|
|
|
34
|
|
182
|
use constant ANALYZED => "\x01"; |
|
34
|
|
|
|
|
63
|
|
|
34
|
|
|
|
|
2594
|
|
8
|
34
|
|
|
34
|
|
349
|
use constant BINARY => "\x02"; |
|
34
|
|
|
|
|
79
|
|
|
34
|
|
|
|
|
1746
|
|
9
|
34
|
|
|
34
|
|
175
|
use constant COMPRESSED => "\x04"; |
|
34
|
|
|
|
|
174
|
|
|
34
|
|
|
|
|
3143
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
our @EXPORT_OK; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
BEGIN { |
14
|
34
|
|
|
34
|
|
120
|
@EXPORT_OK = qw( ANALYZED BINARY COMPRESSED ); |
15
|
34
|
|
|
|
|
299
|
__PACKAGE__->init_instance_vars( |
16
|
|
|
|
|
|
|
# constructor params / members |
17
|
|
|
|
|
|
|
finfos => undef, |
18
|
|
|
|
|
|
|
fdata_stream => undef, |
19
|
|
|
|
|
|
|
findex_stream => undef, |
20
|
|
|
|
|
|
|
# members |
21
|
|
|
|
|
|
|
size => undef, |
22
|
|
|
|
|
|
|
); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
} |
25
|
|
|
|
|
|
|
|
26
|
34
|
|
|
34
|
|
43518
|
use Compress::Zlib qw( uncompress ); |
|
34
|
|
|
|
|
3133959
|
|
|
34
|
|
|
|
|
3826
|
|
27
|
34
|
|
|
34
|
|
8591
|
use KinoSearch1::Document::Field; |
|
34
|
|
|
|
|
104
|
|
|
34
|
|
|
|
|
1313
|
|
28
|
34
|
|
|
34
|
|
10781
|
use KinoSearch1::Document::Doc; |
|
34
|
|
|
|
|
96
|
|
|
34
|
|
|
|
|
25500
|
|
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
sub init_instance { |
31
|
95
|
|
|
95
|
1
|
164
|
my $self = shift; |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# derive the number of documents in the segment |
34
|
95
|
|
|
|
|
769
|
$self->{size} = $self->{findex_stream}->length / 8; |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
# Return number of documents in segment. |
38
|
1080
|
|
|
1080
|
0
|
20189
|
sub get_size { $_[0]->{size} } |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
# Retrieve raw field data from files. Either the data will be turned into |
41
|
|
|
|
|
|
|
# full-on Field and Doc objects by fetch_doc, or it will be passed on mostly |
42
|
|
|
|
|
|
|
# intact when merging segments (field numbers will be modified). |
43
|
|
|
|
|
|
|
sub fetch_raw { |
44
|
10187
|
|
|
10187
|
0
|
13996
|
my ( $self, $doc_num ) = @_; |
45
|
10187
|
|
|
|
|
17431
|
my ( $findex_stream, $fdata_stream ) |
46
|
10187
|
|
|
|
|
11130
|
= @{$self}{ 'findex_stream', 'fdata_stream' }; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# get data file pointer from index |
49
|
10187
|
|
|
|
|
50192
|
$findex_stream->seek( $doc_num * 8 ); |
50
|
10187
|
|
|
|
|
133997
|
my $start = $findex_stream->lu_read('Q'); |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# retrieve one doc's worth of field data |
53
|
10187
|
|
|
|
|
46691
|
$fdata_stream->seek($start); |
54
|
10187
|
|
|
|
|
91656
|
my $num_fields = $fdata_stream->lu_read('V'); |
55
|
10187
|
|
|
|
|
21048
|
my $template = 'VaTT' x $num_fields; |
56
|
10187
|
|
|
|
|
43396
|
my @raw = $fdata_stream->lu_read($template); |
57
|
10187
|
|
|
|
|
35004
|
return ( $num_fields, \@raw ); |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
# Given a doc_num, rebuild a Doc object from the fields that were |
61
|
|
|
|
|
|
|
# stored. |
62
|
|
|
|
|
|
|
sub fetch_doc { |
63
|
49
|
|
|
49
|
0
|
89
|
my ( $self, $doc_num ) = @_; |
64
|
49
|
|
|
|
|
100
|
my $finfos = $self->{finfos}; |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# start a new Doc object, read in data |
67
|
49
|
|
|
|
|
326
|
my $doc = KinoSearch1::Document::Doc->new; |
68
|
49
|
|
|
|
|
173
|
my ( $num_fields, $data ) = $self->fetch_raw($doc_num); |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# docode stored data and build up the Doc object Field by Field. |
71
|
49
|
|
|
|
|
147
|
for ( 1 .. $num_fields ) { |
72
|
126
|
|
|
|
|
316
|
my ( $field_num, $bits, $string, $tv_string ) |
73
|
|
|
|
|
|
|
= splice( @$data, 0, 4 ); |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
# decode fnm bits |
76
|
126
|
100
|
|
|
|
521
|
my $analyzed = ( $bits & ANALYZED ) eq ANALYZED ? 1 : 0; |
77
|
126
|
100
|
|
|
|
268
|
my $binary = ( $bits & BINARY ) eq BINARY ? 1 : 0; |
78
|
126
|
100
|
|
|
|
261
|
my $compressed = ( $bits & COMPRESSED ) eq COMPRESSED ? 1 : 0; |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# create a field object, merging in the FieldInfo data, and add it |
81
|
126
|
|
|
|
|
435
|
my $finfo = $finfos->info_by_num($field_num); |
82
|
126
|
100
|
|
|
|
1171
|
my $field = KinoSearch1::Document::Field->new( |
83
|
|
|
|
|
|
|
%$finfo, |
84
|
|
|
|
|
|
|
field_num => $field_num, |
85
|
|
|
|
|
|
|
analyzed => $analyzed, |
86
|
|
|
|
|
|
|
binary => $binary, |
87
|
|
|
|
|
|
|
compressed => $compressed, |
88
|
|
|
|
|
|
|
fdt_bits => $bits, |
89
|
|
|
|
|
|
|
value => $compressed ? uncompress($string) : $string, |
90
|
|
|
|
|
|
|
tv_string => $tv_string, |
91
|
|
|
|
|
|
|
); |
92
|
126
|
|
|
|
|
694
|
$doc->add_field($field); |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
49
|
|
|
|
|
275
|
return $doc; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
sub decode_fdt_bits { |
99
|
0
|
|
|
0
|
0
|
0
|
my ( undef, $field, $bits ) = @_; |
100
|
0
|
|
|
|
|
0
|
$field->set_analyzed( ( $bits & ANALYZED ) eq ANALYZED ); |
101
|
0
|
|
|
|
|
0
|
$field->set_binary( ( $bits & BINARY ) eq BINARY ); |
102
|
0
|
|
|
|
|
0
|
$field->set_compressed( ( $bits & COMPRESSED ) eq COMPRESSED ); |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
sub encode_fdt_bits { |
106
|
128
|
|
|
128
|
0
|
254
|
my ( undef, $field ) = @_; |
107
|
128
|
|
|
|
|
236
|
my $bits = "\0"; |
108
|
128
|
|
|
|
|
262
|
for ($bits) { |
109
|
128
|
100
|
|
|
|
497
|
$_ |= ANALYZED if $field->get_analyzed; |
110
|
128
|
100
|
|
|
|
421
|
$_ |= BINARY if $field->get_binary; |
111
|
128
|
100
|
|
|
|
435
|
$_ |= COMPRESSED if $field->get_compressed; |
112
|
|
|
|
|
|
|
} |
113
|
128
|
|
|
|
|
774
|
return $bits; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub close { |
117
|
39
|
|
|
39
|
0
|
61
|
my $self = shift; |
118
|
39
|
|
|
|
|
154
|
$self->{findex_stream}->close; |
119
|
39
|
|
|
|
|
131
|
$self->{fdata_stream}->close; |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
1; |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
__END__ |