File Coverage

blib/lib/LucyX/Index/ZlibDocWriter.pm
Criterion Covered Total %
statement 87 87 100.0
branch 14 20 70.0
condition 1 3 33.3
subroutine 15 15 100.0
pod 3 4 75.0
total 120 129 93.0


line stmt bran cond sub pod time code
1             # Licensed to the Apache Software Foundation (ASF) under one or more
2             # contributor license agreements. See the NOTICE file distributed with
3             # this work for additional information regarding copyright ownership.
4             # The ASF licenses this file to You under the Apache License, Version 2.0
5             # (the "License"); you may not use this file except in compliance with
6             # the License. You may obtain a copy of the License at
7             #
8             # http://www.apache.org/licenses/LICENSE-2.0
9             #
10             # Unless required by applicable law or agreed to in writing, software
11             # distributed under the License is distributed on an "AS IS" BASIS,
12             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13             # See the License for the specific language governing permissions and
14             # limitations under the License.
15              
16 2     2   994 use strict;
  2         2  
  2         63  
17              
18             package LucyX::Index::ZlibDocWriter;
19 2     2   7 use base qw( Lucy::Index::DataWriter );
  2         1  
  2         707  
20 2     2   8 use Carp;
  2         1  
  2         89  
21 2     2   6 use Scalar::Util qw( blessed );
  2         2  
  2         65  
22 2     2   1138 use Compress::Zlib qw( compress );
  2         91616  
  2         137  
23 2     2   750 use Lucy::Util::StringHelper qw( cat_bytes );
  2         2  
  2         96  
24 2     2   8 use Clownfish;
  2         2  
  2         50  
25 2     2   7 use bytes;
  2         3  
  2         6  
26 2     2   28 no bytes;
  2         1  
  2         6  
27              
28             our $VERSION = '0.006000_001';
29             $VERSION = eval $VERSION;
30              
31             # Inside-out member vars.
32             our %ix_out;
33             our %dat_out;
34              
35             # Inherit constructor.
36              
37             sub _lazy_init {
38 4     4   5 my $self = shift;
39              
40             # Get outstreams. Skip past non-doc #0.
41 4         13 my $folder = $self->get_folder;
42 4         28 my $ix_file = $self->get_segment->get_name . "/zdocs.ix";
43 4         12 my $dat_file = $self->get_segment->get_name . "/zdocs.dat";
44 4 50       52 $ix_out{$$self} = $folder->open_out($ix_file)
45             or confess Clownfish->error;
46 4 50       27 $dat_out{$$self} = $folder->open_out($dat_file)
47             or confess Clownfish->error;
48 4         15 $ix_out{$$self}->write_i64(0);
49             }
50              
51             sub add_inverted_doc {
52 13     13 0 2208 my ( $self, %args ) = @_;
53 13 100       44 _lazy_init($self) unless $ix_out{$$self};
54 13         16 my $inverter = $args{inverter};
55 13         14 my $ix_out = $ix_out{$$self};
56 13         12 my $dat_out = $dat_out{$$self};
57              
58             # Check doc id.
59 13         39 my $expected = $ix_out->tell / 8;
60             confess("Expected doc id $expected, got '$args{doc_id}'")
61 13 50       26 unless $args{doc_id} == $expected;
62              
63 13         14 my $to_compress = "";
64 13         6 my $count = 0;
65 13         28 my $schema = $self->get_schema;
66 13         30 $inverter->iterate;
67 13         31 while ( $inverter->next ) {
68 52 100       155 next unless $inverter->get_type->stored;
69 39         71 my $name = $inverter->get_field_name;
70 39         51 my $value = $inverter->get_value;
71 39         60 cat_bytes( $to_compress, pack( "w", bytes::length($name) ) );
72 39         866 cat_bytes( $to_compress, $name );
73 39         47 cat_bytes( $to_compress, pack( "w", bytes::length($value) ) );
74 39         80 cat_bytes( $to_compress, $value );
75 39         85 $count++;
76             }
77             # Prepend count of fields to store in this Doc.
78 13         21 $to_compress = pack( "w", $count ) . $to_compress;
79              
80             # Write file pointer to index file. Write compressed serialized string to
81             # main file.
82 13         35 $ix_out->write_i64( $dat_out->tell );
83 13         19 $dat_out->print( compress($to_compress) );
84             }
85              
86             sub add_segment {
87 3     3 1 694 my ( $self, %args ) = @_;
88 3         5 my $seg_reader = $args{reader};
89 3         3 my $doc_map = $args{doc_map};
90 3         10 my $doc_max = $seg_reader->doc_max;
91              
92             # Bail if the supplied segment is empty. */
93 3 50       12 return unless $doc_max;
94              
95 3 100       11 _lazy_init($self) unless $ix_out{$$self};
96 3         2 my $ix_out = $ix_out{$$self};
97 3         4 my $dat_out = $dat_out{$$self};
98 3         14 my $doc_reader = $seg_reader->obtain("Lucy::Index::DocReader");
99 3 50 33     24 confess("Not a ZlibDocReader")
100             unless ( blessed($doc_reader)
101             and $doc_reader->isa("LucyX::Index::ZlibDocReader") );
102              
103 3         8 for ( my $i = 1; $i <= $doc_max; $i++ ) {
104 24 100       51 next unless $doc_map->get($i);
105 21         17 my $buf;
106 21         34 $doc_reader->read_record( $i, \$buf );
107 21         42 $ix_out->write_i64( $dat_out->tell );
108 21         407 $dat_out->print($buf);
109             }
110             }
111              
112             sub finish {
113 4     4 1 194 my $self = shift;
114 4         7 my $ix_out = $ix_out{$$self};
115 4         4 my $dat_out = $dat_out{$$self};
116 4 50       10 if ($ix_out) {
117             # Write one extra file pointer so that we can always derive record
118             # length.
119 4         12 $ix_out->write_i64( $dat_out->tell );
120              
121             # Close streams and store metadata.
122 4         11 $ix_out->close;
123 4         8 $dat_out->close;
124 4         9 my $segment = $self->get_segment;
125 4         20 $segment->store_metadata(
126             key => 'zdocs',
127             metadata => $self->metadata,
128             );
129             }
130             }
131              
132 4     4 1 998 sub format {1}
133              
134             sub DESTROY {
135 4     4   286 my $self = shift;
136 4         15 delete $ix_out{$$self};
137 4         8 delete $dat_out{$$self};
138 4         164 $self->SUPER::DESTROY;
139             }
140              
141             1;
142              
143             __END__