| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # build and eval the code to efficiently merge several iterators in one iterator | 
| 2 |  |  |  |  |  |  | package Iterator::Merger; | 
| 3 | 2 |  |  | 2 |  | 2529 | use strict; | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 50 |  | 
| 4 | 2 |  |  | 2 |  | 8 | use warnings; | 
|  | 2 |  |  |  |  | 4 |  | 
|  | 2 |  |  |  |  | 52 |  | 
| 5 | 2 |  |  | 2 |  | 9 | use Carp; | 
|  | 2 |  |  |  |  | 2 |  | 
|  | 2 |  |  |  |  | 122 |  | 
| 6 | 2 |  |  | 2 |  | 12 | use base 'Exporter'; | 
|  | 2 |  |  |  |  | 2 |  | 
|  | 2 |  |  |  |  | 264 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  | our $VERSION = '0.62'; | 
| 9 |  |  |  |  |  |  |  | 
| 10 | 2 |  |  | 2 |  | 14 | use constant DEBUG => 0; | 
|  | 2 |  |  |  |  | 11 |  | 
|  | 2 |  |  |  |  | 279 |  | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | our @EXPORT_OK = qw( | 
| 13 |  |  |  |  |  |  | imerge | 
| 14 |  |  |  |  |  |  | imerge_num | 
| 15 |  |  |  |  |  |  | imerge_raw | 
| 16 |  |  |  |  |  |  | ); | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | our %EXPORT_TAGS = ( | 
| 19 |  |  |  |  |  |  | all => \@EXPORT_OK | 
| 20 |  |  |  |  |  |  | ); | 
| 21 |  |  |  |  |  |  |  | 
| 22 | 2 |  |  | 2 |  | 12 | use constant HAS_ARRAY_HEAP  => eval "use Array::Heap;1"; | 
|  | 2 |  |  | 2 |  | 4 |  | 
|  | 2 |  |  |  |  | 125 |  | 
|  | 2 |  |  |  |  | 891 |  | 
|  | 2 |  |  |  |  | 866 |  | 
|  | 2 |  |  |  |  | 129 |  | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | our $Max_generate; | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | unless (defined $Max_generate) { | 
| 27 |  |  |  |  |  |  | $Max_generate = HAS_ARRAY_HEAP ? 9 : 12; # 10 => ~30KiB to eval (doubles each increment) | 
| 28 |  |  |  |  |  |  | } | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | my %Generator_cache; | 
| 31 |  |  |  |  |  |  |  | 
| 32 | 225 | 100 | 100 | 225 |  | 254466 | *imerge_raw = eval q! | 
|  | 213 | 50 | 100 |  |  | 794 |  | 
|  | 16 | 100 | 66 |  |  | 25 |  | 
|  | 16 | 50 |  |  |  | 89 |  | 
|  | 0 | 100 |  |  |  | 0 |  | 
|  | 2138 |  |  |  |  | 47194 |  | 
|  | 197 |  |  |  |  | 458 |  | 
|  | 1668 |  |  |  |  | 2796 |  | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 197 |  |  |  |  | 412 |  | 
|  | 1668 |  |  |  |  | 2782 |  | 
|  | 196 |  |  |  |  | 418 |  | 
|  | 196 |  |  |  |  | 977 |  | 
|  | 104302 |  |  |  |  | 3573900 |  | 
|  | 21238 |  |  |  |  | 346273 |  | 
|  | 21266 |  |  |  |  | 45081 |  | 
|  | 1470 |  |  |  |  | 2252 |  | 
| 33 |  |  |  |  |  |  | # try to use the defined-or operator | 
| 34 |  |  |  |  |  |  | sub { | 
| 35 |  |  |  |  |  |  | my @ites = @_ or return sub {}; | 
| 36 |  |  |  |  |  |  | if (@ites==1) { | 
| 37 |  |  |  |  |  |  | my $ite = shift; | 
| 38 |  |  |  |  |  |  | return ref($ite) eq 'GLOB' ? sub {scalar <$ite>} : sub {scalar &$ite}; | 
| 39 |  |  |  |  |  |  | } | 
| 40 |  |  |  |  |  |  | for (@ites) { | 
| 41 |  |  |  |  |  |  | if (ref($_) eq 'GLOB') { | 
| 42 |  |  |  |  |  |  | my $fh = $_; | 
| 43 |  |  |  |  |  |  | $_ = sub {<$fh>} | 
| 44 |  |  |  |  |  |  | } | 
| 45 |  |  |  |  |  |  | } | 
| 46 |  |  |  |  |  |  | croak "arguments must be CODE references or filehandles" if grep {ref($_) ne 'CODE'} @ites; | 
| 47 |  |  |  |  |  |  | my $ite = shift(@ites); | 
| 48 |  |  |  |  |  |  | sub { | 
| 49 |  |  |  |  |  |  | &$ite // do { | 
| 50 |  |  |  |  |  |  | { # block for redo | 
| 51 |  |  |  |  |  |  | $ite = shift(@ites) || return; | 
| 52 |  |  |  |  |  |  | &$ite // redo | 
| 53 |  |  |  |  |  |  | } | 
| 54 |  |  |  |  |  |  | } | 
| 55 |  |  |  |  |  |  | } | 
| 56 |  |  |  |  |  |  | } | 
| 57 |  |  |  |  |  |  | ! || eval q! | 
| 58 |  |  |  |  |  |  | # default to use defined() and a temporary variable | 
| 59 |  |  |  |  |  |  | sub { | 
| 60 |  |  |  |  |  |  | my @ites = @_ or return sub {}; | 
| 61 |  |  |  |  |  |  | if (@ites==1) { | 
| 62 |  |  |  |  |  |  | my $ite = shift; | 
| 63 |  |  |  |  |  |  | return ref($ite) eq 'GLOB' ? sub {scalar <$ite>} : sub {scalar &$ite}; | 
| 64 |  |  |  |  |  |  | } | 
| 65 |  |  |  |  |  |  | for (@ites) { | 
| 66 |  |  |  |  |  |  | if (ref($_) eq 'GLOB') { | 
| 67 |  |  |  |  |  |  | my $fh = $_; | 
| 68 |  |  |  |  |  |  | $_ = sub {<$fh>} | 
| 69 |  |  |  |  |  |  | } | 
| 70 |  |  |  |  |  |  | } | 
| 71 |  |  |  |  |  |  | croak "arguments must be CODE references or filehandles" if grep {ref($_) ne 'CODE'} @ites; | 
| 72 |  |  |  |  |  |  | my $ite = shift(@ites); | 
| 73 |  |  |  |  |  |  | sub { | 
| 74 |  |  |  |  |  |  | my $next = &$ite; | 
| 75 |  |  |  |  |  |  | until (defined $next) { | 
| 76 |  |  |  |  |  |  | $ite = shift(@ites) || return; | 
| 77 |  |  |  |  |  |  | $next = &$ite; | 
| 78 |  |  |  |  |  |  | } | 
| 79 |  |  |  |  |  |  | $next | 
| 80 |  |  |  |  |  |  | } | 
| 81 |  |  |  |  |  |  | } | 
| 82 |  |  |  |  |  |  | ! || die $@; | 
| 83 |  |  |  |  |  |  |  | 
| 84 |  |  |  |  |  |  | sub imerge { | 
| 85 | 225 |  |  | 225 | 1 | 307348 | _imerge(1, \@_) | 
| 86 |  |  |  |  |  |  | } | 
| 87 |  |  |  |  |  |  |  | 
| 88 |  |  |  |  |  |  | sub imerge_num { | 
| 89 | 225 |  |  | 225 | 1 | 652494 | _imerge(0, \@_) | 
| 90 |  |  |  |  |  |  | } | 
| 91 |  |  |  |  |  |  |  | 
| 92 |  |  |  |  |  |  | sub _imerge { | 
| 93 | 450 |  |  | 450 |  | 1301 | my ($lex, $iterators) = @_; | 
| 94 | 450 |  |  |  |  | 1309 | my $nb = @$iterators; | 
| 95 |  |  |  |  |  |  |  | 
| 96 | 450 | 100 |  |  |  | 951 | croak "arguments must be CODE references or filehandles" if grep {ref($_) !~ /^CODE$|^GLOB$/} @$iterators; | 
|  | 3368 |  |  |  |  | 9382 |  | 
| 97 |  |  |  |  |  |  |  | 
| 98 | 448 | 100 |  |  |  | 3202 | if ($nb==0) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 99 | 24 |  |  | 2424 |  | 125 | return sub {undef}; | 
|  | 2424 |  |  |  |  | 14736 |  | 
| 100 |  |  |  |  |  |  | } | 
| 101 |  |  |  |  |  |  | elsif ($nb==1) { | 
| 102 |  |  |  |  |  |  | #return $iterators->[0]; | 
| 103 |  |  |  |  |  |  | # ensure scalar context | 
| 104 | 32 |  |  |  |  | 53 | my $ite = $iterators->[0]; | 
| 105 | 32 | 50 |  | 4383 |  | 182 | return ref($ite) eq 'GLOB' ? sub {scalar <$ite>} : sub {scalar &$ite}; | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 4383 |  |  |  |  | 40070 |  | 
| 106 |  |  |  |  |  |  | } | 
| 107 |  |  |  |  |  |  | elsif ($nb <= $Max_generate) { | 
| 108 | 224 |  |  |  |  | 524 | DEBUG && warn "generate"; | 
| 109 | 224 | 50 |  |  |  | 524 | if ($nb == grep {ref($_) eq 'GLOB'} @$iterators) { | 
|  | 1232 |  |  |  |  | 2388 |  | 
| 110 |  |  |  |  |  |  | # only globs | 
| 111 | 0 |  | 0 |  |  | 0 | my $code = $Generator_cache{$nb, $lex, 1} ||= _merger_generator($nb, $lex, 1); | 
| 112 | 0 |  |  |  |  | 0 | return $code->(@$iterators); | 
| 113 |  |  |  |  |  |  | } else { | 
| 114 | 224 |  |  |  |  | 540 | for (@$iterators) { | 
| 115 | 1232 | 50 |  |  |  | 2276 | if (ref($_) eq 'GLOB') { | 
| 116 | 0 |  |  |  |  | 0 | my $fh = $_; | 
| 117 | 0 |  |  | 0 |  | 0 | $_ = sub {<$fh>} | 
| 118 | 0 |  |  |  |  | 0 | } | 
| 119 |  |  |  |  |  |  | } | 
| 120 | 224 |  | 66 |  |  | 1422 | my $code = $Generator_cache{$nb, $lex, 0} ||= _merger_generator($nb, $lex, 0); | 
| 121 | 224 |  |  |  |  | 5532 | return $code->(@$iterators); | 
| 122 |  |  |  |  |  |  | } | 
| 123 |  |  |  |  |  |  | } | 
| 124 |  |  |  |  |  |  | else { | 
| 125 |  |  |  |  |  |  | # no generation, giveup on some ultimate optim: lets turn all GLOBs to CODEs... | 
| 126 | 168 |  |  |  |  | 551 | for (@$iterators) { | 
| 127 | 2100 | 50 |  |  |  | 3837 | if (ref($_) eq 'GLOB') { | 
| 128 | 0 |  |  |  |  | 0 | my $fh = $_; | 
| 129 | 0 |  |  | 0 |  | 0 | $_ = sub {<$fh>} | 
| 130 | 0 |  |  |  |  | 0 | } | 
| 131 |  |  |  |  |  |  | } | 
| 132 | 168 |  |  |  |  | 365 | if (HAS_ARRAY_HEAP) { | 
| 133 | 168 |  |  |  |  | 272 | DEBUG && warn "heap"; | 
| 134 |  |  |  |  |  |  | # general case, use a heap | 
| 135 | 168 |  |  |  |  | 318 | my @heap; | 
| 136 |  |  |  |  |  |  | # cannot take references to *_heap_lex and *_heap functions, | 
| 137 |  |  |  |  |  |  | # due to prototype problems... | 
| 138 | 168 | 100 |  |  |  | 498 | if ($lex) { | 
| 139 | 84 |  |  |  |  | 208 | for my $ite (@$iterators) { | 
| 140 | 1050 |  |  |  |  | 1572 | my $val = &$ite; | 
| 141 | 1050 | 100 |  |  |  | 6257 | Array::Heap::push_heap_lex(@heap, [$val, $ite]) if defined $val; | 
| 142 |  |  |  |  |  |  | } | 
| 143 |  |  |  |  |  |  | return sub { | 
| 144 | 61903 |  | 100 | 61903 |  | 675135 | my $data = Array::Heap::pop_heap_lex(@heap) || return undef; | 
| 145 | 53419 |  |  |  |  | 64735 | my $min = $data->[0]; | 
| 146 | 53419 | 100 |  |  |  | 70321 | if ( defined($data->[0] = $data->[1]->()) ) { | 
| 147 | 52387 |  |  |  |  | 227109 | Array::Heap::push_heap_lex(@heap, $data); | 
| 148 |  |  |  |  |  |  | } | 
| 149 |  |  |  |  |  |  | $min | 
| 150 | 84 |  |  |  |  | 701 | }; | 
|  | 53419 |  |  |  |  | 85318 |  | 
| 151 |  |  |  |  |  |  | } | 
| 152 |  |  |  |  |  |  | else { | 
| 153 | 84 |  |  |  |  | 201 | for my $ite (@$iterators) { | 
| 154 | 1050 |  |  |  |  | 1794 | my $val = &$ite; | 
| 155 | 1050 | 100 |  |  |  | 5579 | Array::Heap::push_heap(@heap, [$val, $ite]) if defined $val; | 
| 156 |  |  |  |  |  |  | } | 
| 157 |  |  |  |  |  |  | return sub { | 
| 158 | 62468 |  | 100 | 62468 |  | 630994 | my $data = Array::Heap::pop_heap(@heap) || return undef; | 
| 159 | 53984 |  |  |  |  | 61933 | my $min = $data->[0]; | 
| 160 | 53984 | 100 |  |  |  | 68416 | if ( defined($data->[0] = $data->[1]->()) ) { | 
| 161 | 52952 |  |  |  |  | 208388 | Array::Heap::push_heap(@heap, $data); | 
| 162 |  |  |  |  |  |  | } | 
| 163 |  |  |  |  |  |  | $min | 
| 164 | 84 |  |  |  |  | 716 | }; | 
|  | 53984 |  |  |  |  | 76916 |  | 
| 165 |  |  |  |  |  |  | } | 
| 166 |  |  |  |  |  |  | } | 
| 167 |  |  |  |  |  |  | else { | 
| 168 |  |  |  |  |  |  | DEBUG && warn "brutal"; | 
| 169 |  |  |  |  |  |  | # no heap available, lets be dirty | 
| 170 |  |  |  |  |  |  | my @values = map {scalar &$_} @$iterators; | 
| 171 |  |  |  |  |  |  | #		warn "values: ", join(", ", map {length($_)?1:0} @values), "\n"; | 
| 172 |  |  |  |  |  |  | if ($lex) { | 
| 173 |  |  |  |  |  |  | return sub { | 
| 174 | 0 |  |  | 0 |  | 0 | my $i=-1; | 
| 175 | 0 |  |  |  |  | 0 | my $min; | 
| 176 |  |  |  |  |  |  | my $min_i; | 
| 177 | 0 |  |  |  |  | 0 | for (@values) { | 
| 178 | 0 |  |  |  |  | 0 | ++$i; | 
| 179 | 0 | 0 | 0 |  |  | 0 | if (defined and ((not defined $min) or ($_ lt $min))) { | 
|  |  |  | 0 |  |  |  |  | 
| 180 | 0 |  |  |  |  | 0 | $min = $_; | 
| 181 | 0 |  |  |  |  | 0 | $min_i = $i; | 
| 182 |  |  |  |  |  |  | } | 
| 183 |  |  |  |  |  |  | } | 
| 184 | 0 | 0 |  |  |  | 0 | $values[$min_i] = $iterators->[$min_i]->() if defined $min_i; | 
| 185 |  |  |  |  |  |  | #				warn "value is ", (length($min)?1:0), " from $min_i"; | 
| 186 | 0 |  |  |  |  | 0 | $min | 
| 187 |  |  |  |  |  |  | }; | 
| 188 |  |  |  |  |  |  | } | 
| 189 |  |  |  |  |  |  | else { | 
| 190 |  |  |  |  |  |  | return sub { | 
| 191 | 0 |  |  | 0 |  | 0 | my $i=-1; | 
| 192 | 0 |  |  |  |  | 0 | my $min; | 
| 193 |  |  |  |  |  |  | my $min_i; | 
| 194 | 0 |  |  |  |  | 0 | for (@values) { | 
| 195 | 0 |  |  |  |  | 0 | ++$i; | 
| 196 | 0 | 0 | 0 |  |  | 0 | if (defined and ((not defined $min) or ($_ < $min))) { | 
|  |  |  | 0 |  |  |  |  | 
| 197 | 0 |  |  |  |  | 0 | $min = $_; | 
| 198 | 0 |  |  |  |  | 0 | $min_i = $i; | 
| 199 |  |  |  |  |  |  | } | 
| 200 |  |  |  |  |  |  | } | 
| 201 | 0 | 0 |  |  |  | 0 | $values[$min_i] = $iterators->[$min_i]->() if defined $min_i; | 
| 202 | 0 |  |  |  |  | 0 | $min | 
| 203 |  |  |  |  |  |  | }; | 
| 204 |  |  |  |  |  |  | } | 
| 205 |  |  |  |  |  |  | } | 
| 206 |  |  |  |  |  |  | } | 
| 207 |  |  |  |  |  |  | } | 
| 208 |  |  |  |  |  |  |  | 
| 209 |  |  |  |  |  |  | sub _merger_generator { | 
| 210 | 16 |  |  | 16 |  | 57 | my ($nb, $lex, $globs) = @_; | 
| 211 | 16 |  |  |  |  | 38 | my $str = "no warnings;sub{"; | 
| 212 | 16 |  |  |  |  | 47 | $str .= "my(". join(',', map {"\$i$_"} 1..$nb). ")=\@_;"; | 
|  | 88 |  |  |  |  | 188 |  | 
| 213 | 16 | 50 |  |  |  | 150 | $str .= $globs ? "my\$n$_=<\$i$_>;" : "my\$n$_=&\$i$_;" for 1..$nb; | 
| 214 | 16 |  |  |  |  | 34 | $str .= "my\$r;sub{"; | 
| 215 | 16 | 100 |  |  |  | 48 | my $cmp = $lex ? ' lt' : '<'; | 
| 216 | 16 |  |  |  |  | 70 | $str .= _cmp($cmp, $globs, 1..$nb); | 
| 217 | 16 |  |  |  |  | 48 | $str .= ";\$r}}"; | 
| 218 |  |  |  |  |  |  |  | 
| 219 |  |  |  |  |  |  | # $str =~ s/;/;\n/g; | 
| 220 |  |  |  |  |  |  | # $str =~ s/\$/ \$/g; | 
| 221 |  |  |  |  |  |  | # $str =~ s/{/ {\n/g; | 
| 222 |  |  |  |  |  |  | # $str =~ s/}/ }\n/g; | 
| 223 |  |  |  |  |  |  | # warn "\n\n$str\n\n"; | 
| 224 |  |  |  |  |  |  |  | 
| 225 | 16 | 50 |  | 1 |  | 1167 | eval($str) || die "$@ in $str" | 
|  | 1 |  |  | 1 |  | 9 |  | 
|  | 1 |  |  | 1 |  | 2 |  | 
|  | 1 |  |  | 1 |  | 131 |  | 
|  | 1 |  |  | 1 |  | 9 |  | 
|  | 1 |  |  | 1 |  | 2 |  | 
|  | 1 |  |  | 1 |  | 187 |  | 
|  | 1 |  |  | 1 |  | 10 |  | 
|  | 1 |  |  | 1 |  | 1 |  | 
|  | 1 |  |  | 1 |  | 212 |  | 
|  | 1 |  |  | 1 |  | 8 |  | 
|  | 1 |  |  | 1 |  | 3 |  | 
|  | 1 |  |  | 1 |  | 265 |  | 
|  | 1 |  |  | 1 |  | 9 |  | 
|  | 1 |  |  | 1 |  | 2 |  | 
|  | 1 |  |  | 1 |  | 469 |  | 
|  | 1 |  |  |  |  | 8 |  | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 759 |  | 
|  | 1 |  |  |  |  | 9 |  | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 1389 |  | 
|  | 1 |  |  |  |  | 9 |  | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 2683 |  | 
|  | 1 |  |  |  |  | 9 |  | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 129 |  | 
|  | 1 |  |  |  |  | 10 |  | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 163 |  | 
|  | 1 |  |  |  |  | 8 |  | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 248 |  | 
|  | 1 |  |  |  |  | 8 |  | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 290 |  | 
|  | 1 |  |  |  |  | 8 |  | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 470 |  | 
|  | 1 |  |  |  |  | 7 |  | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 731 |  | 
|  | 1 |  |  |  |  | 7 |  | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 1399 |  | 
|  | 1 |  |  |  |  | 9 |  | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 2638 |  | 
| 226 |  |  |  |  |  |  | } | 
| 227 |  |  |  |  |  |  |  | 
| 228 |  |  |  |  |  |  | # recursive comparison expression building | 
| 229 |  |  |  |  |  |  | sub _cmp { | 
| 230 | 2024 |  |  | 2024 |  | 2916 | my ($cmp, $globs, $i, $j) = splice(@_, 0, 4); | 
| 231 | 2024 | 50 |  |  |  | 4984 | return $globs ? "(\$r=\$n$i,\$n$i=<\$i$i>)" : "(\$r=\$n$i,\$n$i=&\$i$i)" unless defined $j; | 
|  |  | 100 |  |  |  |  |  | 
| 232 | 1004 |  |  |  |  | 1897 | "(!defined\$n$j||defined\$n$i&&\$n$i$cmp\$n$j)?". _cmp($cmp, $globs, $i, @_). ":". _cmp($cmp, $globs, $j, @_) | 
| 233 |  |  |  |  |  |  | } | 
| 234 |  |  |  |  |  |  |  | 
| 235 |  |  |  |  |  |  | 1 |