| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package WARC::Index::Volatile;					# -*- CPerl -*- | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 2 |  |  | 2 |  | 69726 | use strict; | 
|  | 2 |  |  |  |  | 13 |  | 
|  | 2 |  |  |  |  | 52 |  | 
| 4 | 2 |  |  | 2 |  | 10 | use warnings; | 
|  | 2 |  |  |  |  | 4 |  | 
|  | 2 |  |  |  |  | 42 |  | 
| 5 |  |  |  |  |  |  |  | 
| 6 | 2 |  |  | 2 |  | 9 | use Scalar::Util; | 
|  | 2 |  |  |  |  | 4 |  | 
|  | 2 |  |  |  |  | 64 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 | 2 |  |  | 2 |  | 404 | use WARC::Index; | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 52 |  | 
| 9 | 2 |  |  | 2 |  | 774 | use WARC::Index::Builder; | 
|  | 2 |  |  |  |  | 5 |  | 
|  | 2 |  |  |  |  | 51 |  | 
| 10 | 2 |  |  | 2 |  | 9 | use WARC::Index::Entry; | 
|  | 2 |  |  |  |  | 4 |  | 
|  | 2 |  |  |  |  | 63 |  | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | our @ISA = qw(WARC::Index WARC::Index::Builder); | 
| 13 |  |  |  |  |  |  |  | 
| 14 | 2 |  |  | 2 |  | 9 | use WARC; *WARC::Index::Volatile::VERSION = \$WARC::VERSION; | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 48 |  | 
| 15 |  |  |  |  |  |  |  | 
| 16 | 2 |  |  | 2 |  | 9 | use Carp; | 
|  | 2 |  |  |  |  | 4 |  | 
|  | 2 |  |  |  |  | 4267 |  | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | require WARC::Volume; | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | our @Default_Column_Set = qw/record_id/; | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | WARC::Index::register(filename => qr/[.]warc(?:[.]gz)?$/); | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | # This implementation uses a hash as the underlying structure. | 
| 25 |  |  |  |  |  |  | #  Keys defined by this class: | 
| 26 |  |  |  |  |  |  | # | 
| 27 |  |  |  |  |  |  | #   volumes | 
| 28 |  |  |  |  |  |  | #	array of WARC::Volume objects known to this index | 
| 29 |  |  |  |  |  |  | #	-- This field is used to intern volume objects within an index. | 
| 30 |  |  |  |  |  |  | #   volume_position_by_tag | 
| 31 |  |  |  |  |  |  | #	hash mapping volume tags to positions in volumes array | 
| 32 |  |  |  |  |  |  | #	-- This field is used to intern volume objects within an index. | 
| 33 |  |  |  |  |  |  | #   entries | 
| 34 |  |  |  |  |  |  | #	hash mapping volume tags to arrays of index entries | 
| 35 |  |  |  |  |  |  | #	-- This field is used to intern index entries within an index. | 
| 36 |  |  |  |  |  |  | #   entry_position_by_tag_offset | 
| 37 |  |  |  |  |  |  | #	hash mapping volume tags to hashes mapping record offset pairs to | 
| 38 |  |  |  |  |  |  | #	positions in entries arrays | 
| 39 |  |  |  |  |  |  | #	-- This field is used to intern index entries within an index. | 
| 40 |  |  |  |  |  |  | #   by | 
| 41 |  |  |  |  |  |  | #	nested hash mapping indexed columns and values to arrays of entries | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | sub _dbg_dump { | 
| 44 | 3 |  |  | 3 |  | 451 | my $self = shift; | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | my $out = | 
| 47 | 3 |  |  |  |  | 11 | (ref $self)." over ".(scalar @{$self->{volumes}})." volume(s):\n"; | 
|  | 3 |  |  |  |  | 11 |  | 
| 48 | 3 |  |  |  |  | 9 | $out .= "  $_\n" for map $_->filename, @{$self->{volumes}}; | 
|  | 3 |  |  |  |  | 19 |  | 
| 49 |  |  |  |  |  |  |  | 
| 50 | 3 |  |  |  |  | 20 | return $out; | 
| 51 |  |  |  |  |  |  | } | 
| 52 |  |  |  |  |  |  |  | 
| 53 |  |  |  |  |  |  | # implement WARC::Index interface | 
| 54 | 5 |  |  | 5 | 1 | 650 | sub attach { my $class = shift; $class->build(from => \@_) } | 
|  | 5 |  |  |  |  | 20 |  | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | # override build method inherited from WARC::Index | 
| 57 |  |  |  |  |  |  | sub build { | 
| 58 | 13 |  |  | 13 | 1 | 3531 | my $class = shift; | 
| 59 |  |  |  |  |  |  |  | 
| 60 | 13 |  |  |  |  | 48 | my @columns = @Default_Column_Set; | 
| 61 | 13 |  |  |  |  | 29 | my @from = (); | 
| 62 |  |  |  |  |  |  |  | 
| 63 | 13 | 100 |  |  |  | 45 | unless (defined wantarray) | 
| 64 | 1 |  |  |  |  | 140 | { carp "building volatile index in void context"; return } | 
|  | 1 |  |  |  |  | 10 |  | 
| 65 |  |  |  |  |  |  |  | 
| 66 | 12 |  |  |  |  | 33 | while (@_) { | 
| 67 | 17 |  |  |  |  | 44 | my $key = shift; | 
| 68 | 17 | 100 |  |  |  | 56 | if ($key eq 'from') { | 
|  |  | 100 |  |  |  |  |  | 
| 69 | 10 | 100 |  |  |  | 47 | if (UNIVERSAL::isa($_[0], 'ARRAY'))	{ @from = @{(shift)} } | 
|  | 8 |  |  |  |  | 16 |  | 
|  | 8 |  |  |  |  | 30 |  | 
| 70 | 2 |  |  |  |  | 12 | else					{ @from = splice @_ } | 
| 71 | 6 |  |  |  |  | 15 | } elsif ($key eq 'columns')			{ @columns = @{(shift)} } | 
|  | 6 |  |  |  |  | 24 |  | 
| 72 | 1 |  |  |  |  | 267 | else { croak "unknown option '$key' building volatile index" } | 
| 73 |  |  |  |  |  |  | } | 
| 74 |  |  |  |  |  |  |  | 
| 75 | 11 |  |  |  |  | 31 | my $_dvmap = \%WARC::Index::Entry::_distance_value_map; | 
| 76 |  |  |  |  |  |  | croak "unknown index column requested" | 
| 77 | 11 | 100 |  |  |  | 188 | if grep !defined $_dvmap->{$_}, @columns; | 
| 78 |  |  |  |  |  |  |  | 
| 79 |  |  |  |  |  |  | my $index = bless | 
| 80 | 13 |  |  |  |  | 69 | { by => {(map {$_ => {}} (grep $_dvmap->{$_}[0] eq 'exact', @columns)), | 
| 81 | 10 |  |  |  |  | 40 | (map {$_ => []} (grep $_dvmap->{$_}[0] eq 'prefix', @columns))} | 
|  | 2 |  |  |  |  | 9 |  | 
| 82 |  |  |  |  |  |  | }, $class; | 
| 83 |  |  |  |  |  |  |  | 
| 84 | 10 |  |  |  |  | 49 | $index->add($_) for @from; | 
| 85 |  |  |  |  |  |  |  | 
| 86 | 10 |  |  |  |  | 24 | { our $_total_constructed; $_total_constructed++ } | 
|  | 10 |  |  |  |  | 14 |  | 
|  | 10 |  |  |  |  | 27 |  | 
| 87 |  |  |  |  |  |  |  | 
| 88 | 10 |  |  |  |  | 60 | return $index; | 
| 89 |  |  |  |  |  |  | } | 
| 90 | 10 |  |  | 10 |  | 2606 | sub DESTROY { our $_total_destroyed; $_total_destroyed++ } | 
|  | 10 |  |  |  |  | 481 |  | 
| 91 |  |  |  |  |  |  |  | 
| 92 |  |  |  |  |  |  | sub searchable { | 
| 93 | 3 |  |  | 3 | 1 | 8 | my $self = shift; | 
| 94 | 3 |  |  |  |  | 6 | my $key = shift; | 
| 95 |  |  |  |  |  |  |  | 
| 96 | 3 |  |  |  |  | 15 | return defined $self->{by}{$key}; | 
| 97 |  |  |  |  |  |  | } | 
| 98 |  |  |  |  |  |  |  | 
| 99 |  |  |  |  |  |  | sub search { | 
| 100 | 57 |  |  | 57 | 1 | 4265 | my $self = shift; | 
| 101 |  |  |  |  |  |  |  | 
| 102 | 57 | 100 |  |  |  | 125 | unless (defined wantarray) | 
| 103 | 1 |  |  |  |  | 201 | { carp "calling 'search' method in void context"; return } | 
|  | 1 |  |  |  |  | 79 |  | 
| 104 |  |  |  |  |  |  |  | 
| 105 | 56 | 100 |  |  |  | 208 | croak "no arguments given to 'search' method" | 
| 106 |  |  |  |  |  |  | unless scalar @_; | 
| 107 | 55 | 100 |  |  |  | 218 | croak "odd number of arguments given to 'search' method" | 
| 108 |  |  |  |  |  |  | if scalar @_ % 2; | 
| 109 |  |  |  |  |  |  |  | 
| 110 | 54 |  |  |  |  | 78 | my $key = undef; my $val = undef; | 
|  | 54 |  |  |  |  | 64 |  | 
| 111 | 54 |  |  |  |  | 140 | for (my $i = 0; $i < $#_; $i += 2) | 
| 112 | 71 | 100 |  |  |  | 292 | { (($key, $val) = @_[$i,1+$i]), last if ref $self->{by}{$_[$i]} eq 'ARRAY' } | 
| 113 | 54 |  |  |  |  | 184 | for (my $i = 0; $i < $#_; $i += 2) | 
| 114 | 54 | 100 |  |  |  | 191 | { (($key, $val) = @_[$i,1+$i]), last if ref $self->{by}{$_[$i]} eq 'HASH' } | 
| 115 | 54 | 100 |  |  |  | 225 | croak "no usable search key" unless $key; | 
| 116 |  |  |  |  |  |  |  | 
| 117 | 53 |  |  |  |  | 105 | my $mode = $WARC::Index::Entry::_distance_value_map{$key}[0]; | 
| 118 | 53 |  |  |  |  | 78 | my $refkey = $WARC::Index::Entry::_distance_value_map{$key}[1]; | 
| 119 |  |  |  |  |  |  |  | 
| 120 | 53 |  |  |  |  | 74 | my $rows; | 
| 121 | 53 | 100 |  |  |  | 119 | if ($mode eq 'exact') { | 
|  |  | 100 |  |  |  |  |  | 
| 122 | 51 |  |  |  |  | 100 | $rows = $self->{by}{$key}{$val}; | 
| 123 |  |  |  |  |  |  | } elsif ($mode eq 'prefix') { | 
| 124 | 1 |  |  |  |  | 2 | $rows = [grep $_->value($refkey) =~ m/^\Q$val/, @{$self->{by}{$key}}]; | 
|  | 1 |  |  |  |  | 7 |  | 
| 125 | 1 |  |  |  |  | 16 | } else { die "unimplemented search mode $mode" } | 
| 126 |  |  |  |  |  |  |  | 
| 127 | 52 | 100 |  |  |  | 97 | if (wantarray) | 
| 128 | 49 |  |  |  |  | 115 | { return grep { $_->distance(@_) >= 0 } @$rows } | 
|  | 68 |  |  |  |  | 189 |  | 
| 129 |  |  |  |  |  |  | else { | 
| 130 | 3 |  |  |  |  | 4 | my $result = undef; my $result_distance = -1; | 
|  | 3 |  |  |  |  | 6 |  | 
| 131 | 3 |  |  |  |  | 7 | foreach my $entry (@$rows) { | 
| 132 | 13 |  |  |  |  | 30 | my $distance = $entry->distance(@_); | 
| 133 | 13 | 100 |  |  |  | 28 | unless (0 > $distance) { | 
| 134 | 10 | 100 | 100 |  |  | 37 | if ($result_distance < 0		# first match found | 
| 135 |  |  |  |  |  |  | or $distance < $result_distance)	# or better match found | 
| 136 | 4 |  |  |  |  | 5 | { $result = $entry; $result_distance = $distance } | 
|  | 4 |  |  |  |  | 8 |  | 
| 137 |  |  |  |  |  |  | } | 
| 138 | 13 | 100 |  |  |  | 33 | return $result if $result_distance == 0;	# no better match possible | 
| 139 |  |  |  |  |  |  | } | 
| 140 | 1 |  |  |  |  | 5 | return $result; | 
| 141 |  |  |  |  |  |  | } | 
| 142 |  |  |  |  |  |  | } | 
| 143 |  |  |  |  |  |  |  | 
| 144 |  |  |  |  |  |  | sub first_entry { | 
| 145 | 9 |  |  | 9 | 1 | 2506 | my $self = shift; | 
| 146 |  |  |  |  |  |  |  | 
| 147 | 9 |  |  |  |  | 35 | return $self->{entries}{$self->{volumes}[0]->_file_tag}[0]; | 
| 148 |  |  |  |  |  |  | } | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  | # implement WARC::Index::Builder interface | 
| 151 |  |  |  |  |  |  | sub _intern_volume ($$) { | 
| 152 | 244 |  |  | 244 |  | 334 | my $index = shift; | 
| 153 | 244 |  |  |  |  | 312 | my $volume = shift; | 
| 154 |  |  |  |  |  |  |  | 
| 155 | 244 |  |  |  |  | 592 | my $voltag = $volume->_file_tag; | 
| 156 |  |  |  |  |  |  | $index->{volume_position_by_tag}{$voltag} = | 
| 157 | 13 |  |  |  |  | 75 | ((push @{$index->{volumes}}, $volume) - 1) | 
| 158 | 244 | 100 |  |  |  | 979 | unless defined $index->{volume_position_by_tag}{$voltag}; | 
| 159 | 244 |  |  |  |  | 484 | $volume = $index->{volumes}[$index->{volume_position_by_tag}{$voltag}]; | 
| 160 |  |  |  |  |  |  |  | 
| 161 | 244 |  |  |  |  | 664 | return $volume, $voltag; | 
| 162 |  |  |  |  |  |  | } | 
| 163 |  |  |  |  |  |  | sub _index_entry ($$) { | 
| 164 | 238 |  |  | 238 |  | 332 | my $index = shift; | 
| 165 | 238 |  |  |  |  | 317 | my $entry = shift; | 
| 166 |  |  |  |  |  |  |  | 
| 167 | 238 |  |  |  |  | 351 | foreach my $key (keys %{$index->{by}}) { | 
|  | 238 |  |  |  |  | 657 |  | 
| 168 | 368 |  |  |  |  | 666 | my $refkey = $WARC::Index::Entry::_distance_value_map{$key}[1]; | 
| 169 | 368 | 100 |  |  |  | 647 | next unless defined $entry->value($refkey); | 
| 170 | 353 | 100 |  |  |  | 814 | if (ref $index->{by}{$key} eq 'HASH') | 
|  |  | 100 |  |  |  |  |  | 
| 171 | 306 |  |  |  |  | 396 | { push @{$index->{by}{$key}{$entry->{$refkey}}}, $entry } | 
|  | 306 |  |  |  |  | 1565 |  | 
| 172 |  |  |  |  |  |  | elsif (ref $index->{by}{$key} eq 'ARRAY') | 
| 173 | 46 |  |  |  |  | 60 | { push @{$index->{by}{$key}}, $entry } # defer sort to outer call | 
|  | 46 |  |  |  |  | 164 |  | 
| 174 | 1 |  |  |  |  | 11 | else { die "unknown object in $key index slot" } | 
| 175 |  |  |  |  |  |  | } | 
| 176 |  |  |  |  |  |  | } | 
| 177 |  |  |  |  |  |  | sub _add_record ($$) { | 
| 178 | 218 |  |  | 218 |  | 347 | my $index = shift; | 
| 179 | 218 |  |  |  |  | 296 | my $record = shift; | 
| 180 |  |  |  |  |  |  |  | 
| 181 | 218 |  |  |  |  | 332 | my $volume; my $voltag; | 
| 182 | 218 |  |  |  |  | 479 | ($volume, $voltag) = _intern_volume $index, $record->volume; | 
| 183 |  |  |  |  |  |  |  | 
| 184 | 218 |  |  |  |  | 650 | my $offset = $record->offset; | 
| 185 | 218 |  |  |  |  | 1078 | my $entry = WARC::Index::Volatile::Entry->_new | 
| 186 |  |  |  |  |  |  | ( _index => $index, _volume => $volume, _record_offset => $offset ); | 
| 187 |  |  |  |  |  |  |  | 
| 188 |  |  |  |  |  |  | # intern entry | 
| 189 | 218 | 100 |  |  |  | 624 | return if defined $index->{entry_position_by_tag_offset}{$voltag}{$offset}; | 
| 190 |  |  |  |  |  |  | $index->{entry_position_by_tag_offset}{$voltag}{$offset} = | 
| 191 | 213 |  |  |  |  | 305 | ((push @{$index->{entries}{$voltag}}, $entry) - 1); | 
|  | 213 |  |  |  |  | 750 |  | 
| 192 |  |  |  |  |  |  |  | 
| 193 |  |  |  |  |  |  | # populate entry | 
| 194 | 213 |  |  |  |  | 600 | $entry->{time} = $record->date; | 
| 195 | 213 |  |  |  |  | 560 | $entry->{record_id} = $record->id; | 
| 196 |  |  |  |  |  |  | $entry->{segment_origin_id} = $record->field('WARC-Segment-Origin-ID') | 
| 197 |  |  |  |  |  |  | if exists $index->{by}{segment_origin_id} | 
| 198 | 213 | 50 | 33 |  |  | 663 | && $record->type eq 'continuation'; | 
| 199 |  |  |  |  |  |  | $entry->{url} = $record->field('WARC-Target-URI') | 
| 200 |  |  |  |  |  |  | if (exists $index->{by}{url} || exists $index->{by}{url_prefix}) | 
| 201 | 213 | 100 | 100 |  |  | 796 | && defined $record->field('WARC-Target-URI'); | 
|  |  |  | 100 |  |  |  |  | 
| 202 |  |  |  |  |  |  |  | 
| 203 | 213 |  |  |  |  | 479 | _index_entry $index, $entry; | 
| 204 |  |  |  |  |  |  | } | 
| 205 |  |  |  |  |  |  | sub _add_volume ($$) { | 
| 206 | 12 |  |  | 12 |  | 23 | my $index = shift; | 
| 207 | 12 |  |  |  |  | 24 | my $volume = shift; | 
| 208 |  |  |  |  |  |  |  | 
| 209 | 12 |  |  |  |  | 29 | for (my $record = $volume->first_record; $record; $record = $record->next) | 
| 210 | 192 |  |  |  |  | 484 | { _add_record $index, $record } | 
| 211 |  |  |  |  |  |  | } | 
| 212 |  |  |  |  |  |  |  | 
| 213 |  |  |  |  |  |  | sub _add_entry ($$) { | 
| 214 | 51 |  |  | 51 |  | 74 | my $index = shift; | 
| 215 | 51 |  |  |  |  | 75 | my $source = shift; | 
| 216 |  |  |  |  |  |  |  | 
| 217 | 51 | 100 |  |  |  | 68 | if (grep !defined $source->value($_), keys %{$index->{by}}) | 
|  | 51 |  |  |  |  | 167 |  | 
| 218 |  |  |  |  |  |  | # at least one column not in source index; index the record instead | 
| 219 | 25 |  |  |  |  | 81 | { _add_record $index, $source->record } | 
| 220 |  |  |  |  |  |  | else { | 
| 221 |  |  |  |  |  |  | # volume and offset can be retrieved from a stub record without I/O | 
| 222 | 26 |  |  |  |  | 74 | my $rstub = $source->record; my $volume; my $voltag; | 
|  | 26 |  |  |  |  | 48 |  | 
| 223 | 26 |  |  |  |  | 59 | ($volume, $voltag) = _intern_volume $index, $rstub->volume; | 
| 224 | 26 |  |  |  |  | 75 | my $offset = $rstub->offset; | 
| 225 |  |  |  |  |  |  | my $entry = WARC::Index::Volatile::Entry->_new | 
| 226 |  |  |  |  |  |  | ( _index => $index, _volume => $volume, _record_offset => $offset, | 
| 227 | 26 |  |  |  |  | 44 | map { $_ => $source->value($_) } keys %{$index->{by}} ); | 
|  | 27 |  |  |  |  | 54 |  | 
|  | 26 |  |  |  |  | 63 |  | 
| 228 |  |  |  |  |  |  |  | 
| 229 |  |  |  |  |  |  | # intern entry | 
| 230 | 26 | 100 |  |  |  | 79 | return if defined $index->{entry_position_by_tag_offset}{$voltag}{$offset}; | 
| 231 |  |  |  |  |  |  | $index->{entry_position_by_tag_offset}{$voltag}{$offset} = | 
| 232 | 25 |  |  |  |  | 39 | ((push @{$index->{entries}{$voltag}}, $entry) - 1); | 
|  | 25 |  |  |  |  | 74 |  | 
| 233 |  |  |  |  |  |  |  | 
| 234 | 25 |  |  |  |  | 47 | _index_entry $index, $entry; | 
| 235 |  |  |  |  |  |  | } | 
| 236 |  |  |  |  |  |  | } | 
| 237 |  |  |  |  |  |  | sub _add_index ($$) { | 
| 238 | 2 |  |  | 2 |  | 4 | my $index = shift; | 
| 239 | 2 |  |  |  |  | 5 | my $source = shift; | 
| 240 |  |  |  |  |  |  |  | 
| 241 | 2 |  |  |  |  | 21 | for (my $entry = $source->first_entry; $entry; $entry = $entry->next) | 
| 242 | 50 |  |  |  |  | 106 | { _add_entry $index, $entry } | 
| 243 |  |  |  |  |  |  | } | 
| 244 |  |  |  |  |  |  |  | 
| 245 |  |  |  |  |  |  | sub add { | 
| 246 | 17 |  |  | 17 | 1 | 904 | my $self = shift; | 
| 247 |  |  |  |  |  |  |  | 
| 248 | 17 |  |  |  |  | 44 | foreach (@_) { | 
| 249 | 17 | 100 |  |  |  | 43 | if (not ref) | 
| 250 |  |  |  |  |  |  | # treat a loose scalar as a WARC volume file name | 
| 251 | 10 |  |  |  |  | 62 | { _add_volume $self, (mount WARC::Volume ($_)) } | 
| 252 |  |  |  |  |  |  | else { | 
| 253 | 7 | 100 |  |  |  | 103 | if   ($_->isa('WARC::Volume'))		{ _add_volume	$self, $_ } | 
|  | 2 | 100 |  |  |  | 10 |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 254 | 2 |  |  |  |  | 8 | elsif($_->isa('WARC::Index'))		{ _add_index	$self, $_ } | 
| 255 | 1 |  |  |  |  | 4 | elsif($_->isa('WARC::Index::Entry'))	{ _add_entry	$self, $_ } | 
| 256 | 1 |  |  |  |  | 5 | elsif($_->isa('WARC::Record::FromVolume')){ _add_record	$self, $_ } | 
| 257 | 1 |  |  |  |  | 33 | else { croak "unrecognized object $_" } | 
| 258 |  |  |  |  |  |  | } | 
| 259 |  |  |  |  |  |  | } | 
| 260 |  |  |  |  |  |  |  | 
| 261 |  |  |  |  |  |  | # sort any array-based columns | 
| 262 | 15 |  |  |  |  | 30 | foreach my $key (keys %{$self->{by}}) { | 
|  | 15 |  |  |  |  | 59 |  | 
| 263 | 23 | 100 |  |  |  | 91 | next unless ref $self->{by}{$key} eq 'ARRAY'; | 
| 264 | 2 |  |  |  |  | 7 | my $refkey = $WARC::Index::Entry::_distance_value_map{$key}[1]; | 
| 265 | 2 |  |  |  |  | 11 | @{$self->{by}{$key}} = | 
| 266 | 2 |  |  |  |  | 5 | sort { $a->value($refkey) cmp $b->value($refkey) } @{$self->{by}{$key}}; | 
|  | 144 |  |  |  |  | 230 |  | 
|  | 2 |  |  |  |  | 14 |  | 
| 267 |  |  |  |  |  |  | } | 
| 268 |  |  |  |  |  |  | } | 
| 269 |  |  |  |  |  |  |  | 
| 270 |  |  |  |  |  |  | { | 
| 271 |  |  |  |  |  |  | package WARC::Index::Volatile::Entry; | 
| 272 |  |  |  |  |  |  |  | 
| 273 |  |  |  |  |  |  | our @ISA = qw(WARC::Index::Entry); | 
| 274 |  |  |  |  |  |  |  | 
| 275 |  |  |  |  |  |  | # This implementation uses a hash as the underlying structure. | 
| 276 |  |  |  |  |  |  | # | 
| 277 |  |  |  |  |  |  | # Accessible search keys are stored directly in the hash, while internal | 
| 278 |  |  |  |  |  |  | #  values are stored with names with a leading underscore. | 
| 279 |  |  |  |  |  |  |  | 
| 280 |  |  |  |  |  |  | #  Keys defined by this class: | 
| 281 |  |  |  |  |  |  | # | 
| 282 |  |  |  |  |  |  | #   _index | 
| 283 |  |  |  |  |  |  | #	weak reference to parent index | 
| 284 |  |  |  |  |  |  | #   _volume | 
| 285 |  |  |  |  |  |  | #	reference to volume containing record | 
| 286 |  |  |  |  |  |  | #   _record_offset | 
| 287 |  |  |  |  |  |  | #	offset of record within containing volume | 
| 288 |  |  |  |  |  |  |  | 
| 289 | 208 |  |  | 208 |  | 346 | sub index { (shift)->{_index} } | 
| 290 | 489 |  |  | 489 |  | 1176 | sub volume { (shift)->{_volume} } | 
| 291 | 431 |  |  | 431 |  | 1549 | sub record_offset { (shift)->{_record_offset} } | 
| 292 |  |  |  |  |  |  |  | 
| 293 |  |  |  |  |  |  | sub next { | 
| 294 | 208 |  |  | 208 |  | 30465 | my $self = shift; | 
| 295 |  |  |  |  |  |  |  | 
| 296 | 208 |  |  |  |  | 455 | my $idx = $self->index; | 
| 297 | 208 |  |  |  |  | 408 | my $vt = $self->volume->_file_tag; | 
| 298 | 208 |  |  |  |  | 715 | my $off = $self->record_offset; | 
| 299 |  |  |  |  |  |  |  | 
| 300 |  |  |  |  |  |  | my $next = $idx->{entries}{$vt} | 
| 301 | 208 |  |  |  |  | 682 | [1+$idx->{entry_position_by_tag_offset}{$vt}{$off}]; | 
| 302 | 208 | 100 |  |  |  | 964 | return $next if defined $next; | 
| 303 |  |  |  |  |  |  |  | 
| 304 | 10 |  |  |  |  | 26 | my $nextvol = $idx->{volumes}[1+$idx->{volume_position_by_tag}{$vt}]; | 
| 305 | 10 | 100 |  |  |  | 29 | $next = $idx->{entries}{$nextvol->_file_tag}[0] if defined $nextvol; | 
| 306 |  |  |  |  |  |  |  | 
| 307 | 10 |  |  |  |  | 39 | return $next; | 
| 308 |  |  |  |  |  |  | } | 
| 309 |  |  |  |  |  |  |  | 
| 310 |  |  |  |  |  |  | sub value { | 
| 311 | 1198 |  |  | 1198 |  | 3645 | my $self = shift; | 
| 312 | 1198 |  |  |  |  | 1609 | my $key = shift; | 
| 313 |  |  |  |  |  |  |  | 
| 314 | 1198 | 100 |  |  |  | 2379 | return undef if $key =~ m/^_/; | 
| 315 | 1197 |  |  |  |  | 2854 | return $self->{$key}; | 
| 316 |  |  |  |  |  |  | } | 
| 317 |  |  |  |  |  |  |  | 
| 318 |  |  |  |  |  |  | sub _new { | 
| 319 | 244 |  |  | 244 |  | 412 | my $class = shift; | 
| 320 |  |  |  |  |  |  |  | 
| 321 | 244 |  |  |  |  | 784 | my $entry = bless { @_ }, $class; | 
| 322 | 244 |  |  |  |  | 955 | Scalar::Util::weaken $entry->{_index}; | 
| 323 | 244 |  |  |  |  | 470 | return $entry; | 
| 324 |  |  |  |  |  |  | } | 
| 325 |  |  |  |  |  |  | } | 
| 326 |  |  |  |  |  |  |  | 
| 327 |  |  |  |  |  |  | 1; | 
| 328 |  |  |  |  |  |  | __END__ |