| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package KSx::Highlight::Summarizer; | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | $VERSION = '0.06'; | 
| 4 |  |  |  |  |  |  |  | 
| 5 |  |  |  |  |  |  | @ISA = KinoSearch::Highlight::Highlighter; | 
| 6 | 1 |  |  | 1 |  | 1500 | use KinoSearch::Highlight::Highlighter; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 57 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 | 1 |  |  | 1 |  | 6 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 44 |  | 
| 9 |  |  |  |  |  |  |  | 
| 10 | 1 |  |  | 1 |  | 24 | use List::Util qw 'min'; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 169 |  | 
| 11 | 1 |  |  | 1 |  | 1073 | use Number::Range; | 
|  | 1 |  |  |  |  | 26897 |  | 
|  | 1 |  |  |  |  | 76 |  | 
| 12 |  |  |  |  |  |  |  | 
| 13 | 1 |  |  | 1 |  | 1509 | use Hash::Util::FieldHash::Compat 'fieldhashes'; | 
|  | 1 |  |  |  |  | 25608 |  | 
|  | 1 |  |  |  |  | 10 |  | 
| 14 |  |  |  |  |  |  | fieldhashes \my( %ellipsis, %summ_len, %page_h, %encoder ); | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | sub _range_endpoints { | 
| 17 | 0 |  |  | 0 |  | 0 | my $range = shift; | 
| 18 | 0 |  |  |  |  | 0 | my @range = $range->range; | 
| 19 | 0 |  |  |  |  | 0 | my $previous = shift @range; | 
| 20 | 0 |  |  |  |  | 0 | my $subrange = [($previous) x 2]; | 
| 21 | 0 |  |  |  |  | 0 | my @arrays; | 
| 22 | 0 |  |  |  |  | 0 | foreach my $current (@range) { | 
| 23 | 0 | 0 |  |  |  | 0 | if ($current == ($previous + 1)) { | 
| 24 | 0 |  |  |  |  | 0 | $subrange->[1] = $current; | 
| 25 |  |  |  |  |  |  | } | 
| 26 |  |  |  |  |  |  | else { | 
| 27 | 0 |  |  |  |  | 0 | push @arrays, $subrange; | 
| 28 | 0 |  |  |  |  | 0 | $subrange = [($current) x 2]; | 
| 29 |  |  |  |  |  |  | } | 
| 30 | 0 |  |  |  |  | 0 | $previous = $current; | 
| 31 |  |  |  |  |  |  | } | 
| 32 | 0 |  |  |  |  | 0 | return @arrays, $subrange; # Make sure the last subrange isn’t left out! | 
| 33 |  |  |  |  |  |  | } | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  | sub new { | 
| 36 | 1 |  |  | 1 | 1 | 31123 | my($pack, %args) = @_; | 
| 37 | 1 | 50 |  |  |  | 10 | my $ellipsis = exists $args{ellipsis} ? delete $args{ellipsis} | 
| 38 |  |  |  |  |  |  | : ' ... '; | 
| 39 | 1 | 50 |  |  |  | 4 | my $summ_len = exists $args{summary_length} | 
| 40 |  |  |  |  |  |  | ? delete $args{summary_length} : 0; | 
| 41 | 1 |  |  |  |  | 4 | my $page_h = delete $args{page_handler}; | 
| 42 | 1 |  |  |  |  | 3 | my $encoder   = delete $args{encoder}; | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | # accept args that the superclass only allows one to set through | 
| 45 |  |  |  |  |  |  | # accessor methods: | 
| 46 | 1 |  |  |  |  | 3 | my $pre_tag = delete $args{pre_tag}; | 
| 47 | 1 |  |  |  |  | 2 | my $post_tag = delete $args{post_tag}; | 
| 48 |  |  |  |  |  |  |  | 
| 49 | 1 |  |  |  |  | 201 | my $self = SUPER::new $pack %args; | 
| 50 |  |  |  |  |  |  |  | 
| 51 | 0 |  |  |  |  |  | $ellipsis{$self} = $ellipsis; | 
| 52 | 0 |  |  |  |  |  | $summ_len{$self} = $summ_len; | 
| 53 | 0 |  |  |  |  |  | $page_h{$self}   = $page_h; | 
| 54 | 0 |  |  |  |  |  | $encoder{$self}  = $encoder; | 
| 55 |  |  |  |  |  |  |  | 
| 56 | 0 | 0 |  |  |  |  | defined $pre_tag and $self->set_pre_tag($pre_tag); | 
| 57 | 0 | 0 |  |  |  |  | defined $post_tag and $self->set_post_tag($post_tag); | 
| 58 |  |  |  |  |  |  |  | 
| 59 | 0 |  |  |  |  |  | return $self; | 
| 60 |  |  |  |  |  |  | } | 
| 61 |  |  |  |  |  |  |  | 
| 62 |  |  |  |  |  |  | sub create_excerpt { | 
| 63 | 0 |  |  | 0 | 1 |  | my ($self, $hitdoc) = @_; | 
| 64 |  |  |  |  |  |  |  | 
| 65 | 0 |  |  |  |  |  | my $field = $self->get_field; | 
| 66 | 0 |  |  |  |  |  | my $x_len = $self->get_excerpt_length; | 
| 67 | 0 |  |  |  |  |  | my $limit = int($x_len /3 ); | 
| 68 |  |  |  |  |  |  |  | 
| 69 |  |  |  |  |  |  | # retrieve the text from the chosen field | 
| 70 | 0 |  |  |  |  |  | my $text = $hitdoc->{$field}; | 
| 71 | 0 | 0 |  |  |  |  | return unless defined $text; | 
| 72 | 0 |  |  |  |  |  | my $text_length = length $text; | 
| 73 | 0 | 0 |  |  |  |  | return '' unless $text_length; | 
| 74 |  |  |  |  |  |  |  | 
| 75 |  |  |  |  |  |  | # get offsets and weights of words that match | 
| 76 | 0 |  |  |  |  |  | my $searcher = $self->get_searchable; | 
| 77 | 0 |  |  |  |  |  | my $posits = $self->get_compiler->highlight_spans( | 
| 78 |  |  |  |  |  |  | searchable => $searcher, | 
| 79 |  |  |  |  |  |  | field      => $field, | 
| 80 |  |  |  |  |  |  | doc_vec    => $searcher->fetch_doc_vec( | 
| 81 |  |  |  |  |  |  | $hitdoc->get_doc_id | 
| 82 |  |  |  |  |  |  | ), | 
| 83 |  |  |  |  |  |  | ); | 
| 84 | 0 |  |  |  |  |  | my @locs = map [$_->get_offset,$_->get_weight], @{ | 
| 85 | 0 |  |  |  |  |  | KinoSearch::Highlight::HeatMap->new( | 
| 86 |  |  |  |  |  |  | spans  => $posits, | 
| 87 |  |  |  |  |  |  | window => $limit*2 | 
| 88 |  |  |  |  |  |  | )->get_spans | 
| 89 |  |  |  |  |  |  | }; | 
| 90 | 0 |  |  |  |  |  | @locs = map $$_[0], sort { $$b[1] <=> $$a[1] } @locs; | 
|  | 0 |  |  |  |  |  |  | 
| 91 |  |  |  |  |  |  |  | 
| 92 | 0 | 0 |  |  |  |  | @locs or @locs = 0; | 
| 93 |  |  |  |  |  |  |  | 
| 94 |  |  |  |  |  |  | #warn "@locs" if $summ_len{$self}; | 
| 95 |  |  |  |  |  |  | # determine the rough boundaries of the excerpts | 
| 96 | 0 |  |  |  |  |  | my $range = new Number::Range; | 
| 97 | 0 |  |  |  |  |  | my $summ_len = $summ_len{$self}; | 
| 98 | 0 |  |  |  |  |  | for(@locs) { | 
| 99 | 1 |  |  | 1 |  | 1316 | no warnings; # suppress Number::Range’s nasty warnings | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 1701 |  | 
| 100 | 0 |  |  |  |  |  | my $start = $_-$limit; | 
| 101 | 0 | 0 |  |  |  |  | $start = 0 if $start < 0; | 
| 102 | 0 |  |  |  |  |  | $range->addrange($start . '..' . min($start+$x_len, $text_length)); | 
| 103 | 0 | 0 | 0 |  |  |  | last if !$summ_len || $range->size >= $summ_len; | 
| 104 |  |  |  |  |  |  | } | 
| 105 | 0 |  |  |  |  |  | my @excerpt_bounds = _range_endpoints($range); | 
| 106 |  |  |  |  |  |  | #use DDS; warn Dump \@excerpt_bounds if $summ_len; | 
| 107 |  |  |  |  |  |  |  | 
| 108 |  |  |  |  |  |  | # close small gaps between ranges | 
| 109 | 0 |  |  |  |  |  | for(my $c = 1; $c < @excerpt_bounds;++$c) { | 
| 110 | 0 | 0 |  |  |  |  | $excerpt_bounds[$c][0] - $excerpt_bounds[$c-1][1] <= 10 and | 
| 111 |  |  |  |  |  |  | $excerpt_bounds[$c-1][1] = $excerpt_bounds[$c][1], | 
| 112 |  |  |  |  |  |  | splice(@excerpt_bounds, $c, 1), | 
| 113 |  |  |  |  |  |  | --$c; | 
| 114 |  |  |  |  |  |  | } | 
| 115 |  |  |  |  |  |  |  | 
| 116 |  |  |  |  |  |  | # extract the offsets from the highlight spans | 
| 117 | 0 |  |  |  |  |  | my(@starts, @ends); | 
| 118 | 0 |  |  |  |  |  | for(@$posits) { | 
| 119 | 0 |  |  |  |  |  | push(@starts, my $start = $_->get_offset); | 
| 120 | 0 |  |  |  |  |  | push(@ends,   $start + $_->get_length); | 
| 121 |  |  |  |  |  |  | } | 
| 122 |  |  |  |  |  |  |  | 
| 123 |  |  |  |  |  |  | # make the summary | 
| 124 | 0 |  |  |  |  |  | my $summary = ''; | 
| 125 | 0 |  |  |  |  |  | my $ellipsis = $ellipsis{$self}; | 
| 126 | 0 |  |  |  |  |  | my $token_re = qr/\b\w+(?:'\w+)?\b/; | 
| 127 | 0 |  |  |  |  |  | my $prev_ellipsis; # whether the previous excerpt ended with an ellip. | 
| 128 | 0 |  |  |  |  |  | my $prev_page = 0; # last page number of previous excerpt | 
| 129 | 0 |  |  |  |  |  | my $page_h = $page_h{$self}; | 
| 130 | 0 |  |  |  |  |  | for(@excerpt_bounds) { | 
| 131 |  |  |  |  |  |  | # make the excerpt | 
| 132 | 0 |  |  |  |  |  | my ($start,$end) = @$_; | 
| 133 |  |  |  |  |  |  |  | 
| 134 |  |  |  |  |  |  | # determine the page number that $start falls within | 
| 135 | 0 |  |  |  |  |  | my $page_no; | 
| 136 | 0 | 0 |  |  |  |  | $page_h and $page_no = | 
| 137 |  |  |  |  |  |  | substr($text, 0,$start) =~ y/\014// + 1; | 
| 138 |  |  |  |  |  |  |  | 
| 139 | 0 |  |  |  |  |  | my $x; # short for x-cerpt | 
| 140 |  |  |  |  |  |  | my $need_ellipsis; | 
| 141 |  |  |  |  |  |  |  | 
| 142 |  |  |  |  |  |  | #warn "<<".substr($text,$start,$limit).">>"; | 
| 143 |  |  |  |  |  |  | # look for a page break within $limit chars from $start (except we | 
| 144 |  |  |  |  |  |  | # shouldn’t do it if $start is 0 because there’s  a  good  chance | 
| 145 |  |  |  |  |  |  | # we’ll go past the very word for whose sake this excerpt exists) | 
| 146 |  |  |  |  |  |  | # ~~~ What about a case in which a page break plus maybe a few | 
| 147 |  |  |  |  |  |  | #     spaces occur just *before* $start. That shouldn’t get an | 
| 148 |  |  |  |  |  |  | #     ellipsis  (as in the  elsif  block  below),  should  it? | 
| 149 | 0 | 0 | 0 |  |  |  | if($page_h && $start && | 
|  |  | 0 | 0 |  |  |  |  | 
| 150 |  |  |  |  |  |  | substr($text,$start,$limit) =~ /^(.*)\014/s) { | 
| 151 | 0 |  |  |  |  |  | $start += length($1) + 1; | 
| 152 | 0 |  |  |  |  |  | $page_no += 1 + $1 =~ y/\014//; | 
| 153 | 0 |  |  |  |  |  | $x = substr $text, $start; | 
| 154 |  |  |  |  |  |  | } | 
| 155 |  |  |  |  |  |  | elsif( $start ) { # if this is not the beginning of the doc | 
| 156 | 0 |  |  |  |  |  | my $sb = $self->find_sentences( | 
| 157 |  |  |  |  |  |  | text => $text, offset => $start, length => $limit | 
| 158 |  |  |  |  |  |  | ); | 
| 159 | 0 | 0 |  |  |  |  | if(@$sb) { | 
| 160 | 0 |  |  |  |  |  | $start = $$sb[0]; | 
| 161 |  |  |  |  |  |  | } | 
| 162 | 0 |  |  |  |  |  | else { ++ $need_ellipsis } | 
| 163 | 0 |  |  |  |  |  | $x = substr $text, $start; | 
| 164 | 0 | 0 |  |  |  |  | if($need_ellipsis) { | 
| 165 |  |  |  |  |  |  | # skip past possible partial tokens, but don’t insert an | 
| 166 |  |  |  |  |  |  | # ellipsis yet, because it might need to come after a | 
| 167 |  |  |  |  |  |  | # page marker | 
| 168 | 0 | 0 |  |  |  |  | if ($x =~ s/ | 
| 169 |  |  |  |  |  |  | \A | 
| 170 |  |  |  |  |  |  | ( | 
| 171 |  |  |  |  |  |  | .{1,$limit}?  # don't go outside the window | 
| 172 |  |  |  |  |  |  | ) | 
| 173 |  |  |  |  |  |  | (?=$token_re)  # just b4 the start of a full token | 
| 174 |  |  |  |  |  |  | //xsm | 
| 175 |  |  |  |  |  |  | ) | 
| 176 |  |  |  |  |  |  | { | 
| 177 | 0 |  |  |  |  |  | $start += length($1); | 
| 178 |  |  |  |  |  |  | } | 
| 179 |  |  |  |  |  |  | } | 
| 180 |  |  |  |  |  |  | } | 
| 181 | 0 |  |  |  |  |  | else { $x = substr $text, $start } | 
| 182 |  |  |  |  |  |  |  | 
| 183 |  |  |  |  |  |  | # trim unwanted text from the end of the excerpt | 
| 184 | 0 |  |  |  |  |  | $x = substr $x, 0, $end-$start+1;  # +1 ’cos we need that extra | 
| 185 |  |  |  |  |  |  | #  char later | 
| 186 | 0 |  |  |  |  |  | my $end_with_ellipsis = 0; | 
| 187 |  |  |  |  |  |  |  | 
| 188 |  |  |  |  |  |  | # if we’ve trimmed the end of the text | 
| 189 | 0 | 0 |  |  |  |  | if ( $end < $text_length) {{ # doubled so ‘last’ will work | 
| 190 |  |  |  |  |  |  | # check to see whether there are page breaks after the high- | 
| 191 |  |  |  |  |  |  | # lighted word, and stop at the first one if so | 
| 192 | 0 | 0 | 0 |  |  |  | if ($page_h and substr($x, $limit*-2) =~ s/(\014[^\014]*)//) { | 
|  | 0 |  |  |  |  |  |  | 
| 193 | 0 |  |  |  |  |  | $end -= length $1; last; | 
|  | 0 |  |  |  |  |  |  | 
| 194 |  |  |  |  |  |  | } | 
| 195 |  |  |  |  |  |  |  | 
| 196 |  |  |  |  |  |  | # remove possible partial tokens from the end of the excerpt | 
| 197 | 0 |  |  |  |  |  | my $extra_char = chop $x; # the char we left dangling earlier | 
| 198 |  |  |  |  |  |  | # if the extra char wasn't part of a token, then we’re not | 
| 199 |  |  |  |  |  |  | # splitting one | 
| 200 | 0 | 0 |  |  |  |  | if ( $extra_char =~ $token_re ) { | 
| 201 | 0 |  |  |  |  |  | $x =~ s/$token_re$//;  # if this fails, that's fine | 
| 202 |  |  |  |  |  |  | } | 
| 203 |  |  |  |  |  |  |  | 
| 204 |  |  |  |  |  |  | # if the excerpt doesn't end with a full stop, end with | 
| 205 |  |  |  |  |  |  | # an ellipsis | 
| 206 | 0 | 0 |  |  |  |  | if ( $x !~ /\.\s*\Z/xsm ) { | 
| 207 | 0 |  |  |  |  |  | $x =~ s/\W+\Z//xsm; | 
| 208 | 0 |  |  |  |  |  | $x .= $ellipsis; | 
| 209 | 0 |  |  |  |  |  | ++$end_with_ellipsis; | 
| 210 |  |  |  |  |  |  | } | 
| 211 |  |  |  |  |  |  | }} | 
| 212 |  |  |  |  |  |  | #warn $x if $page_h; | 
| 213 |  |  |  |  |  |  |  | 
| 214 |  |  |  |  |  |  | # get the offsets that are within range for the excerpt, and make | 
| 215 |  |  |  |  |  |  | # them relative to $start | 
| 216 | 0 |  |  |  |  |  | my @relative_starts = map $_-$start, @starts; | 
| 217 | 0 |  |  |  |  |  | my @relative_ends   = map $_-$start, @ends; | 
| 218 | 0 |  |  |  |  |  | my $this_x_len = $end - $start; | 
| 219 | 0 |  | 0 |  |  |  | while ( @relative_starts and $relative_starts[0] < 0 ) { | 
| 220 | 0 |  |  |  |  |  | shift @relative_starts; | 
| 221 | 0 |  |  |  |  |  | shift @relative_ends; | 
| 222 |  |  |  |  |  |  | } | 
| 223 | 0 |  | 0 |  |  |  | while ( @relative_ends and $relative_ends[-1] > $this_x_len ) { | 
| 224 | 0 |  |  |  |  |  | pop @relative_starts; | 
| 225 | 0 |  |  |  |  |  | pop @relative_ends; | 
| 226 |  |  |  |  |  |  | } | 
| 227 |  |  |  |  |  |  |  | 
| 228 |  |  |  |  |  |  | # insert highlight tags and page break markers | 
| 229 |  |  |  |  |  |  | # sstart and send stand for span start and end | 
| 230 | 0 |  |  |  |  |  | my ( $sstart, $send, $last_sstart, $last_send ) = | 
| 231 |  |  |  |  |  |  | (  undef,  undef,  0,            0 ); | 
| 232 | 0 | 0 |  |  |  |  | if($page_h) { # Some of this code *is* repeated redundantly, but it | 
| 233 |  |  |  |  |  |  | # should  theoretically  run  faster  since  the | 
| 234 |  |  |  |  |  |  | # if($page_h) check doesn’t have to be made every | 
| 235 |  |  |  |  |  |  | # time through the loop. | 
| 236 | 0 | 0 | 0 |  |  |  | $prev_page != $page_no | 
|  |  |  | 0 |  |  |  |  | 
| 237 |  |  |  |  |  |  | ? ( | 
| 238 |  |  |  |  |  |  | $summary .= &$page_h($hitdoc, $page_no), | 
| 239 |  |  |  |  |  |  | $need_ellipsis && ($summary .= $ellipsis) | 
| 240 |  |  |  |  |  |  | ) : $need_ellipsis && !$prev_ellipsis && | 
| 241 |  |  |  |  |  |  | ($summary .= $ellipsis) | 
| 242 |  |  |  |  |  |  | ; | 
| 243 | 0 |  |  |  |  |  | while (@relative_starts) { | 
| 244 | 0 |  |  |  |  |  | $send   = shift @relative_ends; | 
| 245 | 0 |  |  |  |  |  | $sstart = shift @relative_starts; | 
| 246 | 0 | 0 | 0 |  |  |  | $summary .= _encode_with_pb( $self, | 
| 247 |  |  |  |  |  |  | substr( $x, $last_send, $sstart - $last_send ), | 
| 248 |  |  |  |  |  |  | $page_h, \$page_no, $hitdoc | 
| 249 |  |  |  |  |  |  | ) unless !$last_send && !$sstart; | 
| 250 | 0 |  |  |  |  |  | $summary .= $self->highlight( | 
| 251 |  |  |  |  |  |  | _encode_with_pb( $self, | 
| 252 |  |  |  |  |  |  | substr( $x, $sstart, $send - $sstart ), | 
| 253 |  |  |  |  |  |  | $page_h, \$page_no, $hitdoc | 
| 254 |  |  |  |  |  |  | ) | 
| 255 |  |  |  |  |  |  | ); | 
| 256 | 0 |  |  |  |  |  | $last_send = $send; | 
| 257 |  |  |  |  |  |  | } | 
| 258 | 0 | 0 |  |  |  |  | $summary .= _encode_with_pb( $self, | 
| 259 |  |  |  |  |  |  | substr( $x, $last_send ), | 
| 260 |  |  |  |  |  |  | $page_h, \$page_no, $hitdoc | 
| 261 |  |  |  |  |  |  | ) unless $last_send == length $x; | 
| 262 | 0 |  |  |  |  |  | $prev_page = $page_no; | 
| 263 |  |  |  |  |  |  | } | 
| 264 |  |  |  |  |  |  | else { | 
| 265 | 0 | 0 | 0 |  |  |  | $need_ellipsis and !$prev_ellipsis and $summary .= $ellipsis; | 
| 266 | 0 |  |  |  |  |  | while (@relative_starts) { | 
| 267 | 0 |  |  |  |  |  | $send   = shift @relative_ends; | 
| 268 | 0 |  |  |  |  |  | $sstart = shift @relative_starts; | 
| 269 | 0 | 0 | 0 |  |  |  | $summary .= $self->encode( | 
| 270 |  |  |  |  |  |  | substr( $x, $last_send, $sstart - $last_send ) ) | 
| 271 |  |  |  |  |  |  | unless !$last_send && !$sstart; | 
| 272 | 0 |  |  |  |  |  | $summary .= $self->highlight( | 
| 273 |  |  |  |  |  |  | $self->encode( | 
| 274 |  |  |  |  |  |  | substr( $x, $sstart, $send - $sstart ) | 
| 275 |  |  |  |  |  |  | ) | 
| 276 |  |  |  |  |  |  | ); | 
| 277 | 0 |  |  |  |  |  | $last_send = $send; | 
| 278 |  |  |  |  |  |  | } | 
| 279 | 0 | 0 |  |  |  |  | $summary .= $self->encode( substr( $x, $last_send ) ) | 
| 280 |  |  |  |  |  |  | unless $last_send == length $x; | 
| 281 |  |  |  |  |  |  | } | 
| 282 |  |  |  |  |  |  |  | 
| 283 | 0 |  |  |  |  |  | $prev_ellipsis = $end_with_ellipsis; | 
| 284 |  |  |  |  |  |  |  | 
| 285 |  |  |  |  |  |  | } | 
| 286 |  |  |  |  |  |  |  | 
| 287 | 0 |  |  |  |  |  | return $summary; | 
| 288 |  |  |  |  |  |  | } | 
| 289 |  |  |  |  |  |  |  | 
| 290 |  |  |  |  |  |  | # This is not called as a method above, because it’s a private routine that | 
| 291 |  |  |  |  |  |  | # should not be overridden (it is not guaranteed to exist in future ver- | 
| 292 |  |  |  |  |  |  | # sions), and it’s faster to call it as a function. | 
| 293 |  |  |  |  |  |  | sub _encode_with_pb { # w/page breaks | 
| 294 | 0 |  |  | 0 |  |  | my ($self, $text, $page_h, $page_no_ref, $hitdoc) = @_; | 
| 295 | 0 |  |  |  |  |  | my @to_encode = split /\014/, $text, -1; # -1 to allow trailing | 
| 296 | 0 |  |  |  |  |  | my $ret = '';                            #  null fields | 
| 297 | 0 | 0 |  |  |  |  | $ret .= $self->encode(shift @to_encode) if length $to_encode[0]; | 
| 298 | 0 |  |  |  |  |  | for(@to_encode) { | 
| 299 | 0 |  |  |  |  |  | $ret .= &$page_h($hitdoc, ++$$page_no_ref); | 
| 300 | 0 | 0 |  |  |  |  | $ret .= $self->encode($_) if length; | 
| 301 |  |  |  |  |  |  | } | 
| 302 | 0 |  |  |  |  |  | $ret; | 
| 303 |  |  |  |  |  |  | } | 
| 304 |  |  |  |  |  |  |  | 
| 305 |  |  |  |  |  |  | sub encode { | 
| 306 | 0 |  |  | 0 | 1 |  | my @__ = @_; # workaround for perl5.8.8 bug | 
| 307 |  |  |  |  |  |  | &{ | 
| 308 | 0 | 0 |  |  |  |  | $encoder{$__[0]} or return shift(@__)->SUPER::encode(@__) | 
|  | 0 |  |  |  |  |  |  | 
| 309 |  |  |  |  |  |  | }($__[1]) | 
| 310 |  |  |  |  |  |  | } | 
| 311 |  |  |  |  |  |  |  | 
| 312 |  |  |  |  |  |  | 1; | 
| 313 |  |  |  |  |  |  |  | 
| 314 |  |  |  |  |  |  | __END__ |