File Coverage

blib/lib/KSx/Index/ZlibDocReader.pm
Criterion Covered Total %
statement 75 76 98.6
branch 9 16 56.2
condition n/a
subroutine 11 11 100.0
pod 2 4 50.0
total 97 107 90.6


line stmt bran cond sub pod time code
1 2     2   54235 use strict;
  2         5  
  2         147  
2 2     2   13 use warnings;
  2         4  
  2         116  
3              
4             package KSx::Index::ZlibDocReader;
5 2     2   12 use base qw( KinoSearch::Index::DocReader );
  2         6  
  2         52262  
6 2     2   19 use KinoSearch::Util::StringHelper qw( utf8_valid utf8_flag_on );
  2         5  
  2         158  
7 2     2   14 use Compress::Zlib qw( uncompress );
  2         8  
  2         106  
8 2     2   13 use Carp;
  2         6  
  2         2306  
9              
10             # Inside-out member vars.
11             our %ix_in;
12             our %dat_in;
13             our %binary_fields;
14              
15             sub new {
16 6     6 1 141 my ( $either, %args ) = @_;
17 6         432 my $self = $either->SUPER::new(%args);
18              
19             # Validate metadata. Only open streams if the segment has data we
20             # recognize.
21 6         34 my $segment = $self->get_segment;
22 6         48 my $metadata = $segment->fetch_metadata("zdocs");
23 6 50       21 if ($metadata) {
24 6 50       29 if ( $metadata->{format} != 1 ) {
25 0         0 confess("Unrecognized format: '$metadata->{format}'");
26             }
27              
28             # Open InStreams.
29 6         37 my $dat_filename = $segment->get_name . "/zdocs.dat";
30 6         27 my $ix_filename = $segment->get_name . "/zdocs.ix";
31 6         29 my $folder = $self->get_folder;
32 6 50       96 $ix_in{$$self} = $folder->open_in($ix_filename)
33             or confess KinoSearch->error;
34 6 50       66 $dat_in{$$self} = $folder->open_in($dat_filename)
35             or confess KinoSearch->error;
36              
37             # Remember which fields are binary.
38 6         25 my $schema = $self->get_schema;
39 6         17 my $bin_fields = $binary_fields{$$self} = {};
40 24         209 $bin_fields->{$_} = 1
41 6         13 for grep { $schema->fetch_type($_)->binary }
  6         52  
42             @{ $schema->all_fields };
43             }
44              
45 6         34 return $self;
46             }
47              
48             sub fetch_doc {
49 3     3 1 229 my ( $self, $doc_id ) = @_;
50 3         9 my $dat_in = $dat_in{$$self};
51 3         7 my $ix_in = $ix_in{$$self};
52 3         5247 my $bin_fields = $binary_fields{$$self};
53              
54             # Bail if no data in segment.
55 3 50       22 return unless $ix_in;
56              
57             # Read index information.
58 3         33 $ix_in->seek( $doc_id * 8 );
59 3         28 my $start = $ix_in->read_i64;
60 3         13 my $len = $ix_in->read_i64 - $start;
61 3         13 my $compressed;
62              
63             # Read main data.
64 3         14 $dat_in->seek($start);
65 3         17 $dat_in->read( $compressed, $len );
66 3         22 my $inflated = uncompress($compressed);
67 3         347 my $num_fields = unpack( "w", $inflated );
68 3         6 my $pack_template = "w ";
69 3         16 $pack_template .= "w/a*" x ( $num_fields * 2 );
70 3         29 my ( undef, %fields ) = unpack( $pack_template, $inflated );
71              
72             # Turn on UTF-8 flag for string fields.
73 3         16 for my $field ( keys %fields ) {
74 9 100       28 next if $bin_fields->{$field};
75 6         17 utf8_flag_on( $fields{$field} );
76 6 50       26 confess("Invalid UTF-8 read for doc $doc_id field '$field'")
77             unless utf8_valid( $fields{$field} );
78             }
79              
80 3         227 return KinoSearch::Document::HitDoc->new(
81             fields => \%fields,
82             doc_id => $doc_id,
83             );
84             }
85              
86             sub read_record {
87 21     21 0 33 my ( $self, $doc_id, $buf ) = @_;
88 21         39 my $dat_in = $dat_in{$$self};
89 21         30 my $ix_in = $ix_in{$$self};
90 21         28 my $bin_fields = $binary_fields{$$self};
91              
92 21 50       41 if ($ix_in) {
93 21         236 $ix_in->seek( $doc_id * 8 );
94 21         70 my $start = $ix_in->read_i64;
95 21         54 my $len = $ix_in->read_i64 - $start;
96 21         50 $dat_in->seek($start);
97 21         85 $dat_in->read( $$buf, $len );
98             }
99             }
100              
101             sub close {
102 6     6 0 15 my $self = shift;
103 6         21 delete $ix_in{$$self};
104 6         15 delete $dat_in{$$self};
105 6         60 delete $binary_fields{$$self};
106             }
107              
108             sub DESTROY {
109 6     6   737 my $self = shift;
110 6         21 delete $ix_in{$$self};
111 6         24 delete $dat_in{$$self};
112 6         12 delete $binary_fields{$$self};
113 6         592 $self->SUPER::DESTROY;
114             }
115              
116             1;
117              
118             __END__