line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more |
2
|
|
|
|
|
|
|
# contributor license agreements. See the NOTICE file distributed with |
3
|
|
|
|
|
|
|
# this work for additional information regarding copyright ownership. |
4
|
|
|
|
|
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0 |
5
|
|
|
|
|
|
|
# (the "License"); you may not use this file except in compliance with |
6
|
|
|
|
|
|
|
# the License. You may obtain a copy of the License at |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0 |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software |
11
|
|
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS, |
12
|
|
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13
|
|
|
|
|
|
|
# See the License for the specific language governing permissions and |
14
|
|
|
|
|
|
|
# limitations under the License. |
15
|
|
|
|
|
|
|
|
16
|
2
|
|
|
2
|
|
1178
|
use strict; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
47
|
|
17
|
2
|
|
|
2
|
|
6
|
use warnings; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
61
|
|
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
package LucyX::Index::ZlibDocReader; |
20
|
2
|
|
|
2
|
|
7
|
use base qw( Lucy::Index::DocReader ); |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
666
|
|
21
|
|
|
|
|
|
|
our $VERSION = '0.006000_001'; |
22
|
|
|
|
|
|
|
$VERSION = eval $VERSION; |
23
|
2
|
|
|
2
|
|
8
|
use Lucy::Util::StringHelper qw( utf8_valid utf8_flag_on ); |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
80
|
|
24
|
2
|
|
|
2
|
|
38
|
use Compress::Zlib qw( uncompress ); |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
74
|
|
25
|
2
|
|
|
2
|
|
7
|
use Carp; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
1109
|
|
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# Inside-out member vars. |
28
|
|
|
|
|
|
|
our %ix_in; |
29
|
|
|
|
|
|
|
our %dat_in; |
30
|
|
|
|
|
|
|
our %binary_fields; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
sub new { |
33
|
6
|
|
|
6
|
1
|
495
|
my ( $either, %args ) = @_; |
34
|
6
|
|
|
|
|
58
|
my $self = $either->SUPER::new(%args); |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# Validate metadata. Only open streams if the segment has data we |
37
|
|
|
|
|
|
|
# recognize. |
38
|
6
|
|
|
|
|
176
|
my $segment = $self->get_segment; |
39
|
6
|
|
|
|
|
33
|
my $metadata = $segment->fetch_metadata("zdocs"); |
40
|
6
|
50
|
|
|
|
13
|
if ($metadata) { |
41
|
6
|
50
|
|
|
|
18
|
if ( $metadata->{format} != 1 ) { |
42
|
0
|
|
|
|
|
0
|
confess("Unrecognized format: '$metadata->{format}'"); |
43
|
|
|
|
|
|
|
} |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
# Open InStreams. |
46
|
6
|
|
|
|
|
19
|
my $dat_filename = $segment->get_name . "/zdocs.dat"; |
47
|
6
|
|
|
|
|
13
|
my $ix_filename = $segment->get_name . "/zdocs.ix"; |
48
|
6
|
|
|
|
|
15
|
my $folder = $self->get_folder; |
49
|
6
|
50
|
|
|
|
69
|
$ix_in{$$self} = $folder->open_in($ix_filename) |
50
|
|
|
|
|
|
|
or confess Clownfish->error; |
51
|
6
|
50
|
|
|
|
39
|
$dat_in{$$self} = $folder->open_in($dat_filename) |
52
|
|
|
|
|
|
|
or confess Clownfish->error; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# Remember which fields are binary. |
55
|
6
|
|
|
|
|
17
|
my $schema = $self->get_schema; |
56
|
6
|
|
|
|
|
9
|
my $bin_fields = $binary_fields{$$self} = {}; |
57
|
|
|
|
|
|
|
$bin_fields->{$_} = 1 |
58
|
6
|
|
|
|
|
6
|
for grep { $schema->fetch_type($_)->binary } |
|
24
|
|
|
|
|
102
|
|
59
|
6
|
|
|
|
|
32
|
@{ $schema->all_fields }; |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
|
62
|
6
|
|
|
|
|
22
|
return $self; |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub fetch_doc { |
66
|
3
|
|
|
3
|
1
|
204
|
my ( $self, $doc_id ) = @_; |
67
|
3
|
|
|
|
|
5
|
my $dat_in = $dat_in{$$self}; |
68
|
3
|
|
|
|
|
4
|
my $ix_in = $ix_in{$$self}; |
69
|
3
|
|
|
|
|
3
|
my $bin_fields = $binary_fields{$$self}; |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
# Bail if no data in segment. |
72
|
3
|
50
|
|
|
|
7
|
return unless $ix_in; |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# Read index information. |
75
|
3
|
|
|
|
|
12
|
$ix_in->seek( $doc_id * 8 ); |
76
|
3
|
|
|
|
|
9
|
my $start = $ix_in->read_i64; |
77
|
3
|
|
|
|
|
7
|
my $len = $ix_in->read_i64 - $start; |
78
|
3
|
|
|
|
|
4
|
my $compressed; |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# Read main data. |
81
|
3
|
|
|
|
|
4
|
$dat_in->seek($start); |
82
|
3
|
|
|
|
|
7
|
$dat_in->read( $compressed, $len ); |
83
|
3
|
|
|
|
|
7
|
my $inflated = uncompress($compressed); |
84
|
3
|
|
|
|
|
136
|
my $num_fields = unpack( "w", $inflated ); |
85
|
3
|
|
|
|
|
3
|
my $pack_template = "w "; |
86
|
3
|
|
|
|
|
8
|
$pack_template .= "w/a*" x ( $num_fields * 2 ); |
87
|
3
|
|
|
|
|
16
|
my ( undef, %fields ) = unpack( $pack_template, $inflated ); |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
# Turn on UTF-8 flag for string fields. |
90
|
3
|
|
|
|
|
10
|
for my $field ( keys %fields ) { |
91
|
9
|
100
|
|
|
|
17
|
next if $bin_fields->{$field}; |
92
|
6
|
|
|
|
|
9
|
utf8_flag_on( $fields{$field} ); |
93
|
|
|
|
|
|
|
confess("Invalid UTF-8 read for doc $doc_id field '$field'") |
94
|
6
|
50
|
|
|
|
14
|
unless utf8_valid( $fields{$field} ); |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
|
97
|
3
|
|
|
|
|
36
|
return Lucy::Document::HitDoc->new( |
98
|
|
|
|
|
|
|
fields => \%fields, |
99
|
|
|
|
|
|
|
doc_id => $doc_id, |
100
|
|
|
|
|
|
|
); |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
sub read_record { |
104
|
21
|
|
|
21
|
0
|
17
|
my ( $self, $doc_id, $buf ) = @_; |
105
|
21
|
|
|
|
|
17
|
my $dat_in = $dat_in{$$self}; |
106
|
21
|
|
|
|
|
14
|
my $ix_in = $ix_in{$$self}; |
107
|
21
|
|
|
|
|
18
|
my $bin_fields = $binary_fields{$$self}; |
108
|
|
|
|
|
|
|
|
109
|
21
|
50
|
|
|
|
24
|
if ($ix_in) { |
110
|
21
|
|
|
|
|
34
|
$ix_in->seek( $doc_id * 8 ); |
111
|
21
|
|
|
|
|
32
|
my $start = $ix_in->read_i64; |
112
|
21
|
|
|
|
|
33
|
my $len = $ix_in->read_i64 - $start; |
113
|
21
|
|
|
|
|
23
|
$dat_in->seek($start); |
114
|
21
|
|
|
|
|
45
|
$dat_in->read( $$buf, $len ); |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
sub close { |
119
|
6
|
|
|
6
|
0
|
7
|
my $self = shift; |
120
|
6
|
|
|
|
|
14
|
delete $ix_in{$$self}; |
121
|
6
|
|
|
|
|
9
|
delete $dat_in{$$self}; |
122
|
6
|
|
|
|
|
31
|
delete $binary_fields{$$self}; |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub DESTROY { |
126
|
6
|
|
|
6
|
|
267
|
my $self = shift; |
127
|
6
|
|
|
|
|
13
|
delete $ix_in{$$self}; |
128
|
6
|
|
|
|
|
8
|
delete $dat_in{$$self}; |
129
|
6
|
|
|
|
|
6
|
delete $binary_fields{$$self}; |
130
|
6
|
|
|
|
|
690
|
$self->SUPER::DESTROY; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
1; |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
__END__ |