| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more |
|
2
|
|
|
|
|
|
|
# contributor license agreements. See the NOTICE file distributed with |
|
3
|
|
|
|
|
|
|
# this work for additional information regarding copyright ownership. |
|
4
|
|
|
|
|
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0 |
|
5
|
|
|
|
|
|
|
# (the "License"); you may not use this file except in compliance with |
|
6
|
|
|
|
|
|
|
# the License. You may obtain a copy of the License at |
|
7
|
|
|
|
|
|
|
# |
|
8
|
|
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0 |
|
9
|
|
|
|
|
|
|
# |
|
10
|
|
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software |
|
11
|
|
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS, |
|
12
|
|
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
13
|
|
|
|
|
|
|
# See the License for the specific language governing permissions and |
|
14
|
|
|
|
|
|
|
# limitations under the License. |
|
15
|
|
|
|
|
|
|
|
|
16
|
2
|
|
|
2
|
|
1178
|
use strict; |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
55
|
|
|
17
|
2
|
|
|
2
|
|
8
|
use warnings; |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
65
|
|
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
package LucyX::Index::ZlibDocReader; |
|
20
|
2
|
|
|
2
|
|
7
|
use base qw( Lucy::Index::DocReader ); |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
664
|
|
|
21
|
|
|
|
|
|
|
our $VERSION = '0.006000_002'; |
|
22
|
|
|
|
|
|
|
$VERSION = eval $VERSION; |
|
23
|
2
|
|
|
2
|
|
8
|
use Lucy::Util::StringHelper qw( utf8_valid utf8_flag_on ); |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
75
|
|
|
24
|
2
|
|
|
2
|
|
6
|
use Compress::Zlib qw( uncompress ); |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
84
|
|
|
25
|
2
|
|
|
2
|
|
9
|
use Carp; |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
1115
|
|
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# Inside-out member vars. |
|
28
|
|
|
|
|
|
|
our %ix_in; |
|
29
|
|
|
|
|
|
|
our %dat_in; |
|
30
|
|
|
|
|
|
|
our %binary_fields; |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
sub new { |
|
33
|
6
|
|
|
6
|
1
|
509
|
my ( $either, %args ) = @_; |
|
34
|
6
|
|
|
|
|
58
|
my $self = $either->SUPER::new(%args); |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# Validate metadata. Only open streams if the segment has data we |
|
37
|
|
|
|
|
|
|
# recognize. |
|
38
|
6
|
|
|
|
|
153
|
my $segment = $self->get_segment; |
|
39
|
6
|
|
|
|
|
30
|
my $metadata = $segment->fetch_metadata("zdocs"); |
|
40
|
6
|
50
|
|
|
|
14
|
if ($metadata) { |
|
41
|
6
|
50
|
|
|
|
16
|
if ( $metadata->{format} != 1 ) { |
|
42
|
0
|
|
|
|
|
0
|
confess("Unrecognized format: '$metadata->{format}'"); |
|
43
|
|
|
|
|
|
|
} |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
# Open InStreams. |
|
46
|
6
|
|
|
|
|
18
|
my $dat_filename = $segment->get_name . "/zdocs.dat"; |
|
47
|
6
|
|
|
|
|
13
|
my $ix_filename = $segment->get_name . "/zdocs.ix"; |
|
48
|
6
|
|
|
|
|
13
|
my $folder = $self->get_folder; |
|
49
|
6
|
50
|
|
|
|
69
|
$ix_in{$$self} = $folder->open_in($ix_filename) |
|
50
|
|
|
|
|
|
|
or confess Clownfish->error; |
|
51
|
6
|
50
|
|
|
|
39
|
$dat_in{$$self} = $folder->open_in($dat_filename) |
|
52
|
|
|
|
|
|
|
or confess Clownfish->error; |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# Remember which fields are binary. |
|
55
|
6
|
|
|
|
|
10
|
my $schema = $self->get_schema; |
|
56
|
6
|
|
|
|
|
8
|
my $bin_fields = $binary_fields{$$self} = {}; |
|
57
|
|
|
|
|
|
|
$bin_fields->{$_} = 1 |
|
58
|
6
|
|
|
|
|
6
|
for grep { $schema->fetch_type($_)->binary } |
|
|
24
|
|
|
|
|
91
|
|
|
59
|
6
|
|
|
|
|
29
|
@{ $schema->all_fields }; |
|
60
|
|
|
|
|
|
|
} |
|
61
|
|
|
|
|
|
|
|
|
62
|
6
|
|
|
|
|
22
|
return $self; |
|
63
|
|
|
|
|
|
|
} |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub fetch_doc { |
|
66
|
3
|
|
|
3
|
1
|
204
|
my ( $self, $doc_id ) = @_; |
|
67
|
3
|
|
|
|
|
5
|
my $dat_in = $dat_in{$$self}; |
|
68
|
3
|
|
|
|
|
6
|
my $ix_in = $ix_in{$$self}; |
|
69
|
3
|
|
|
|
|
4
|
my $bin_fields = $binary_fields{$$self}; |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
# Bail if no data in segment. |
|
72
|
3
|
50
|
|
|
|
9
|
return unless $ix_in; |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# Read index information. |
|
75
|
3
|
|
|
|
|
13
|
$ix_in->seek( $doc_id * 8 ); |
|
76
|
3
|
|
|
|
|
13
|
my $start = $ix_in->read_i64; |
|
77
|
3
|
|
|
|
|
8
|
my $len = $ix_in->read_i64 - $start; |
|
78
|
3
|
|
|
|
|
5
|
my $compressed; |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# Read main data. |
|
81
|
3
|
|
|
|
|
7
|
$dat_in->seek($start); |
|
82
|
3
|
|
|
|
|
11
|
$dat_in->read( $compressed, $len ); |
|
83
|
3
|
|
|
|
|
8
|
my $inflated = uncompress($compressed); |
|
84
|
3
|
|
|
|
|
144
|
my $num_fields = unpack( "w", $inflated ); |
|
85
|
3
|
|
|
|
|
4
|
my $pack_template = "w "; |
|
86
|
3
|
|
|
|
|
7
|
$pack_template .= "w/a*" x ( $num_fields * 2 ); |
|
87
|
3
|
|
|
|
|
16
|
my ( undef, %fields ) = unpack( $pack_template, $inflated ); |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
# Turn on UTF-8 flag for string fields. |
|
90
|
3
|
|
|
|
|
10
|
for my $field ( keys %fields ) { |
|
91
|
9
|
100
|
|
|
|
18
|
next if $bin_fields->{$field}; |
|
92
|
6
|
|
|
|
|
7
|
utf8_flag_on( $fields{$field} ); |
|
93
|
|
|
|
|
|
|
confess("Invalid UTF-8 read for doc $doc_id field '$field'") |
|
94
|
6
|
50
|
|
|
|
15
|
unless utf8_valid( $fields{$field} ); |
|
95
|
|
|
|
|
|
|
} |
|
96
|
|
|
|
|
|
|
|
|
97
|
3
|
|
|
|
|
34
|
return Lucy::Document::HitDoc->new( |
|
98
|
|
|
|
|
|
|
fields => \%fields, |
|
99
|
|
|
|
|
|
|
doc_id => $doc_id, |
|
100
|
|
|
|
|
|
|
); |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
sub read_record { |
|
104
|
21
|
|
|
21
|
0
|
14
|
my ( $self, $doc_id, $buf ) = @_; |
|
105
|
21
|
|
|
|
|
18
|
my $dat_in = $dat_in{$$self}; |
|
106
|
21
|
|
|
|
|
15
|
my $ix_in = $ix_in{$$self}; |
|
107
|
21
|
|
|
|
|
15
|
my $bin_fields = $binary_fields{$$self}; |
|
108
|
|
|
|
|
|
|
|
|
109
|
21
|
50
|
|
|
|
28
|
if ($ix_in) { |
|
110
|
21
|
|
|
|
|
32
|
$ix_in->seek( $doc_id * 8 ); |
|
111
|
21
|
|
|
|
|
28
|
my $start = $ix_in->read_i64; |
|
112
|
21
|
|
|
|
|
27
|
my $len = $ix_in->read_i64 - $start; |
|
113
|
21
|
|
|
|
|
27
|
$dat_in->seek($start); |
|
114
|
21
|
|
|
|
|
43
|
$dat_in->read( $$buf, $len ); |
|
115
|
|
|
|
|
|
|
} |
|
116
|
|
|
|
|
|
|
} |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
sub close { |
|
119
|
6
|
|
|
6
|
0
|
7
|
my $self = shift; |
|
120
|
6
|
|
|
|
|
15
|
delete $ix_in{$$self}; |
|
121
|
6
|
|
|
|
|
8
|
delete $dat_in{$$self}; |
|
122
|
6
|
|
|
|
|
32
|
delete $binary_fields{$$self}; |
|
123
|
|
|
|
|
|
|
} |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub DESTROY { |
|
126
|
6
|
|
|
6
|
|
263
|
my $self = shift; |
|
127
|
6
|
|
|
|
|
13
|
delete $ix_in{$$self}; |
|
128
|
6
|
|
|
|
|
7
|
delete $dat_in{$$self}; |
|
129
|
6
|
|
|
|
|
6
|
delete $binary_fields{$$self}; |
|
130
|
6
|
|
|
|
|
645
|
$self->SUPER::DESTROY; |
|
131
|
|
|
|
|
|
|
} |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
1; |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
__END__ |