line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
2
|
|
|
2
|
|
54235
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
147
|
|
2
|
2
|
|
|
2
|
|
13
|
use warnings; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
116
|
|
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package KSx::Index::ZlibDocReader; |
5
|
2
|
|
|
2
|
|
12
|
use base qw( KinoSearch::Index::DocReader ); |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
52262
|
|
6
|
2
|
|
|
2
|
|
19
|
use KinoSearch::Util::StringHelper qw( utf8_valid utf8_flag_on ); |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
158
|
|
7
|
2
|
|
|
2
|
|
14
|
use Compress::Zlib qw( uncompress ); |
|
2
|
|
|
|
|
8
|
|
|
2
|
|
|
|
|
106
|
|
8
|
2
|
|
|
2
|
|
13
|
use Carp; |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
2306
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
# Inside-out member vars. |
11
|
|
|
|
|
|
|
our %ix_in; |
12
|
|
|
|
|
|
|
our %dat_in; |
13
|
|
|
|
|
|
|
our %binary_fields; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
sub new { |
16
|
6
|
|
|
6
|
1
|
141
|
my ( $either, %args ) = @_; |
17
|
6
|
|
|
|
|
432
|
my $self = $either->SUPER::new(%args); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# Validate metadata. Only open streams if the segment has data we |
20
|
|
|
|
|
|
|
# recognize. |
21
|
6
|
|
|
|
|
34
|
my $segment = $self->get_segment; |
22
|
6
|
|
|
|
|
48
|
my $metadata = $segment->fetch_metadata("zdocs"); |
23
|
6
|
50
|
|
|
|
21
|
if ($metadata) { |
24
|
6
|
50
|
|
|
|
29
|
if ( $metadata->{format} != 1 ) { |
25
|
0
|
|
|
|
|
0
|
confess("Unrecognized format: '$metadata->{format}'"); |
26
|
|
|
|
|
|
|
} |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
# Open InStreams. |
29
|
6
|
|
|
|
|
37
|
my $dat_filename = $segment->get_name . "/zdocs.dat"; |
30
|
6
|
|
|
|
|
27
|
my $ix_filename = $segment->get_name . "/zdocs.ix"; |
31
|
6
|
|
|
|
|
29
|
my $folder = $self->get_folder; |
32
|
6
|
50
|
|
|
|
96
|
$ix_in{$$self} = $folder->open_in($ix_filename) |
33
|
|
|
|
|
|
|
or confess KinoSearch->error; |
34
|
6
|
50
|
|
|
|
66
|
$dat_in{$$self} = $folder->open_in($dat_filename) |
35
|
|
|
|
|
|
|
or confess KinoSearch->error; |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
# Remember which fields are binary. |
38
|
6
|
|
|
|
|
25
|
my $schema = $self->get_schema; |
39
|
6
|
|
|
|
|
17
|
my $bin_fields = $binary_fields{$$self} = {}; |
40
|
24
|
|
|
|
|
209
|
$bin_fields->{$_} = 1 |
41
|
6
|
|
|
|
|
13
|
for grep { $schema->fetch_type($_)->binary } |
|
6
|
|
|
|
|
52
|
|
42
|
|
|
|
|
|
|
@{ $schema->all_fields }; |
43
|
|
|
|
|
|
|
} |
44
|
|
|
|
|
|
|
|
45
|
6
|
|
|
|
|
34
|
return $self; |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
sub fetch_doc { |
49
|
3
|
|
|
3
|
1
|
229
|
my ( $self, $doc_id ) = @_; |
50
|
3
|
|
|
|
|
9
|
my $dat_in = $dat_in{$$self}; |
51
|
3
|
|
|
|
|
7
|
my $ix_in = $ix_in{$$self}; |
52
|
3
|
|
|
|
|
5247
|
my $bin_fields = $binary_fields{$$self}; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# Bail if no data in segment. |
55
|
3
|
50
|
|
|
|
22
|
return unless $ix_in; |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# Read index information. |
58
|
3
|
|
|
|
|
33
|
$ix_in->seek( $doc_id * 8 ); |
59
|
3
|
|
|
|
|
28
|
my $start = $ix_in->read_i64; |
60
|
3
|
|
|
|
|
13
|
my $len = $ix_in->read_i64 - $start; |
61
|
3
|
|
|
|
|
13
|
my $compressed; |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# Read main data. |
64
|
3
|
|
|
|
|
14
|
$dat_in->seek($start); |
65
|
3
|
|
|
|
|
17
|
$dat_in->read( $compressed, $len ); |
66
|
3
|
|
|
|
|
22
|
my $inflated = uncompress($compressed); |
67
|
3
|
|
|
|
|
347
|
my $num_fields = unpack( "w", $inflated ); |
68
|
3
|
|
|
|
|
6
|
my $pack_template = "w "; |
69
|
3
|
|
|
|
|
16
|
$pack_template .= "w/a*" x ( $num_fields * 2 ); |
70
|
3
|
|
|
|
|
29
|
my ( undef, %fields ) = unpack( $pack_template, $inflated ); |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# Turn on UTF-8 flag for string fields. |
73
|
3
|
|
|
|
|
16
|
for my $field ( keys %fields ) { |
74
|
9
|
100
|
|
|
|
28
|
next if $bin_fields->{$field}; |
75
|
6
|
|
|
|
|
17
|
utf8_flag_on( $fields{$field} ); |
76
|
6
|
50
|
|
|
|
26
|
confess("Invalid UTF-8 read for doc $doc_id field '$field'") |
77
|
|
|
|
|
|
|
unless utf8_valid( $fields{$field} ); |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
|
80
|
3
|
|
|
|
|
227
|
return KinoSearch::Document::HitDoc->new( |
81
|
|
|
|
|
|
|
fields => \%fields, |
82
|
|
|
|
|
|
|
doc_id => $doc_id, |
83
|
|
|
|
|
|
|
); |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub read_record { |
87
|
21
|
|
|
21
|
0
|
33
|
my ( $self, $doc_id, $buf ) = @_; |
88
|
21
|
|
|
|
|
39
|
my $dat_in = $dat_in{$$self}; |
89
|
21
|
|
|
|
|
30
|
my $ix_in = $ix_in{$$self}; |
90
|
21
|
|
|
|
|
28
|
my $bin_fields = $binary_fields{$$self}; |
91
|
|
|
|
|
|
|
|
92
|
21
|
50
|
|
|
|
41
|
if ($ix_in) { |
93
|
21
|
|
|
|
|
236
|
$ix_in->seek( $doc_id * 8 ); |
94
|
21
|
|
|
|
|
70
|
my $start = $ix_in->read_i64; |
95
|
21
|
|
|
|
|
54
|
my $len = $ix_in->read_i64 - $start; |
96
|
21
|
|
|
|
|
50
|
$dat_in->seek($start); |
97
|
21
|
|
|
|
|
85
|
$dat_in->read( $$buf, $len ); |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
sub close { |
102
|
6
|
|
|
6
|
0
|
15
|
my $self = shift; |
103
|
6
|
|
|
|
|
21
|
delete $ix_in{$$self}; |
104
|
6
|
|
|
|
|
15
|
delete $dat_in{$$self}; |
105
|
6
|
|
|
|
|
60
|
delete $binary_fields{$$self}; |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub DESTROY { |
109
|
6
|
|
|
6
|
|
737
|
my $self = shift; |
110
|
6
|
|
|
|
|
21
|
delete $ix_in{$$self}; |
111
|
6
|
|
|
|
|
24
|
delete $dat_in{$$self}; |
112
|
6
|
|
|
|
|
12
|
delete $binary_fields{$$self}; |
113
|
6
|
|
|
|
|
592
|
$self->SUPER::DESTROY; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
1; |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
__END__ |