| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package App::Sets::Sort; | 
| 2 |  |  |  |  |  |  | $App::Sets::Sort::VERSION = '0.976'; | 
| 3 |  |  |  |  |  |  |  | 
| 4 |  |  |  |  |  |  |  | 
| 5 | 4 |  |  | 4 |  | 35155 | use strict; | 
|  | 4 |  |  |  |  | 6 |  | 
|  | 4 |  |  |  |  | 132 |  | 
| 6 | 4 |  |  | 4 |  | 16 | use warnings; | 
|  | 4 |  |  |  |  | 6 |  | 
|  | 4 |  |  |  |  | 121 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  | # ABSTRACT: sort handling | 
| 9 |  |  |  |  |  |  |  | 
| 10 | 4 |  |  | 4 |  | 18 | use English qw( -no_match_vars ); | 
|  | 4 |  |  |  |  | 4 |  | 
|  | 4 |  |  |  |  | 20 |  | 
| 11 | 4 |  |  | 4 |  | 1571 | use 5.010; | 
|  | 4 |  |  |  |  | 11 |  | 
| 12 | 4 |  |  | 4 |  | 3014 | use File::Temp qw< tempfile >; | 
|  | 4 |  |  |  |  | 55044 |  | 
|  | 4 |  |  |  |  | 255 |  | 
| 13 | 4 |  |  | 4 |  | 24 | use Fcntl qw< :seek >; | 
|  | 4 |  |  |  |  | 6 |  | 
|  | 4 |  |  |  |  | 451 |  | 
| 14 | 4 |  |  | 4 |  | 609 | use Log::Log4perl::Tiny qw< :easy :dead_if_first >; | 
|  | 4 |  |  |  |  | 5474 |  | 
|  | 4 |  |  |  |  | 31 |  | 
| 15 | 4 |  |  | 4 |  | 934 | use base 'Exporter'; | 
|  | 4 |  |  |  |  | 4 |  | 
|  | 4 |  |  |  |  | 3604 |  | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | our @EXPORT_OK = qw< sort_filehandle internal_sort_filehandle >; | 
| 18 |  |  |  |  |  |  | our @EXPORT = qw< sort_filehandle >; | 
| 19 |  |  |  |  |  |  | our %EXPORT_TAGS = ( | 
| 20 |  |  |  |  |  |  | default => [ @EXPORT ], | 
| 21 |  |  |  |  |  |  | all => [ @EXPORT_OK ], | 
| 22 |  |  |  |  |  |  | ); | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | sub _test_external_sort { | 
| 25 | 2 |  |  | 2 |  | 3 | my $filename; | 
| 26 |  |  |  |  |  |  |  | 
| 27 | 2 | 50 |  |  |  | 4 | eval { | 
| 28 | 2 |  |  |  |  | 9 | (my $fh, $filename) = tempfile(); # might croak | 
| 29 | 2 |  |  |  |  | 977 | binmode $fh, ':raw'; | 
| 30 | 2 | 50 |  |  |  | 5 | print {$fh} "one\ntwo\nthree\nfour\n" or die 'whatever'; | 
|  | 2 |  |  |  |  | 33 |  | 
| 31 | 2 | 50 |  |  |  | 102 | close $fh or die 'whatever'; | 
| 32 |  |  |  |  |  |  | } or return; | 
| 33 |  |  |  |  |  |  |  | 
| 34 | 2 | 50 |  |  |  | 6 | my $fh = eval { | 
| 35 | 2 |  |  |  |  | 3891 | open my $tfh, '-|', 'sort', '-u', $filename; | 
| 36 | 2 |  |  |  |  | 43 | $tfh; | 
| 37 |  |  |  |  |  |  | } or return; | 
| 38 | 2 |  |  |  |  | 1660 | my @lines = <$fh>; | 
| 39 | 2 | 50 |  |  |  | 16 | return unless scalar(@lines) == 4; | 
| 40 | 2 | 50 |  |  |  | 8 | return unless defined $lines[3]; | 
| 41 | 2 |  |  |  |  | 26 | $lines[3] =~ s{\s+}{}gmxs; | 
| 42 | 2 | 50 |  |  |  | 47 | return unless $lines[3] eq 'two'; | 
| 43 |  |  |  |  |  |  |  | 
| 44 | 2 |  |  |  |  | 77 | return 1; | 
| 45 |  |  |  |  |  |  | } | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  | sub sort_filehandle { | 
| 48 | 129 |  |  | 129 | 0 | 644 | my ($filename, $config) = @_; | 
| 49 | 129 |  | 100 |  |  | 246 | $config ||= {}; | 
| 50 | 129 |  | 66 |  |  | 154 | state $has_sort = (!$config->{internal_sort}) && _test_external_sort(); | 
| 51 |  |  |  |  |  |  |  | 
| 52 | 129 | 100 |  |  |  | 213 | if ($has_sort) { | 
| 53 | 65 |  |  |  |  | 60 | my $fh; | 
| 54 | 65 | 50 |  |  |  | 75 | eval { open $fh, '-|', 'sort', '-u', $filename } and return $fh; | 
|  | 65 |  |  |  |  | 89954 |  | 
| 55 | 0 |  |  |  |  | 0 | WARN 'cannot use system sort, falling back to internal implementation'; | 
| 56 | 0 |  |  |  |  | 0 | $has_sort = 0; # from now on, use internal sort | 
| 57 |  |  |  |  |  |  | } | 
| 58 |  |  |  |  |  |  |  | 
| 59 | 64 |  |  |  |  | 99 | return internal_sort_filehandle($filename); | 
| 60 |  |  |  |  |  |  | } | 
| 61 |  |  |  |  |  |  |  | 
| 62 |  |  |  |  |  |  | sub internal_sort_filehandle { | 
| 63 | 65 |  |  | 65 | 0 | 157 | my ($filename) = @_; | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  | # Open input stream | 
| 66 | 65 | 50 |  |  |  | 1710 | open my $ifh, '<', $filename | 
| 67 |  |  |  |  |  |  | or LOGDIE "open('$filename'): $OS_ERROR"; | 
| 68 |  |  |  |  |  |  |  | 
| 69 |  |  |  |  |  |  | # Maximum values hints taken from Perl Power Tools' sort | 
| 70 | 65 |  | 100 |  |  | 240 | my $max_records = $ENV{SETS_MAX_RECORDS} || 200_000; | 
| 71 | 65 |  | 100 |  |  | 166 | my $max_files = $ENV{SETS_MAX_FILES} || 40; | 
| 72 | 65 |  |  |  |  | 66 | my (@records, @fhs); | 
| 73 | 65 |  |  |  |  | 665 | while (<$ifh>) { | 
| 74 | 618 |  |  |  |  | 477 | chomp; | 
| 75 | 618 |  |  |  |  | 590 | push @records, $_; | 
| 76 | 618 | 100 |  |  |  | 1468 | if (@records >= $max_records) { | 
| 77 | 3 |  |  |  |  | 7 | push @fhs, _flush_to_temp(\@records); | 
| 78 | 3 | 50 |  |  |  | 15 | _compact(\@fhs) if @fhs >= $max_files - 1; | 
| 79 |  |  |  |  |  |  | } | 
| 80 |  |  |  |  |  |  | } | 
| 81 |  |  |  |  |  |  |  | 
| 82 | 65 | 50 |  |  |  | 240 | push @fhs, _flush_to_temp(\@records) if @records; | 
| 83 | 65 |  |  |  |  | 153 | _compact(\@fhs); | 
| 84 | 65 | 50 |  |  |  | 799 | return $fhs[0] if @fhs; | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  | # seems like the file was empty... so it's sorted | 
| 87 | 0 |  |  |  |  | 0 | seek $ifh, 0, SEEK_SET; | 
| 88 | 0 |  |  |  |  | 0 | return $ifh; | 
| 89 |  |  |  |  |  |  | } | 
| 90 |  |  |  |  |  |  |  | 
| 91 |  |  |  |  |  |  | sub _flush_to_temp { | 
| 92 | 68 |  |  | 68 |  | 72 | my ($records) = @_; | 
| 93 | 68 |  |  |  |  | 171 | my $tfh = tempfile(UNLINK => 1); | 
| 94 | 68 |  |  |  |  | 30553 | my $previous; | 
| 95 | 68 |  |  |  |  | 334 | for my $item (sort @$records) { | 
| 96 | 618 | 50 | 33 |  |  | 1043 | next if defined($previous) && $previous eq $item; | 
| 97 | 618 |  |  |  |  | 447 | print {$tfh} $item, $INPUT_RECORD_SEPARATOR; | 
|  | 618 |  |  |  |  | 1558 |  | 
| 98 |  |  |  |  |  |  | } | 
| 99 | 68 |  |  |  |  | 140 | @$records = (); | 
| 100 | 68 |  |  |  |  | 1599 | seek $tfh, 0, SEEK_SET; | 
| 101 | 68 |  |  |  |  | 149 | return $tfh; | 
| 102 |  |  |  |  |  |  | } | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | sub _compact { | 
| 105 | 68 |  |  | 68 |  | 80 | my ($fhs) = @_; | 
| 106 | 68 | 100 |  |  |  | 157 | return if @$fhs == 1; | 
| 107 |  |  |  |  |  |  |  | 
| 108 |  |  |  |  |  |  | # where the output will end up | 
| 109 | 3 |  |  |  |  | 9 | my $ofh = tempfile(UNLINK => 1); | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | # convenience hash for tracking all contributors | 
| 112 |  |  |  |  |  |  | my %its = map { | 
| 113 | 3 |  |  |  |  | 1263 | my $fh = $fhs->[$_]; | 
|  | 6 |  |  |  |  | 9 |  | 
| 114 | 6 |  |  |  |  | 25 | my $head = <$fh>; | 
| 115 | 6 | 50 |  |  |  | 11 | if (defined $head) { | 
| 116 | 6 |  |  |  |  | 5 | chomp($head); | 
| 117 | 6 |  |  |  |  | 40 | $_ => [ $fh, $head ]; | 
| 118 |  |  |  |  |  |  | } | 
| 119 | 0 |  |  |  |  | 0 | else { () } | 
| 120 |  |  |  |  |  |  | } 0 .. $#$fhs; | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | # iterate until all contributors are exhausted | 
| 123 | 3 |  |  |  |  | 11 | while (scalar keys %its) { | 
| 124 |  |  |  |  |  |  |  | 
| 125 |  |  |  |  |  |  | # select the best (i.e. "lower"), cleanup on the way | 
| 126 | 25 |  |  |  |  | 35 | my ($fk, @keys) = keys %its; | 
| 127 | 25 |  |  |  |  | 25 | my $best = $its{$fk}[1]; | 
| 128 | 25 |  |  |  |  | 28 | for my $key (@keys) { | 
| 129 | 14 |  |  |  |  | 11 | my $head = $its{$key}[1]; | 
| 130 | 14 | 100 |  |  |  | 28 | $best = $head if $best gt $head; | 
| 131 |  |  |  |  |  |  | } | 
| 132 | 25 |  |  |  |  | 18 | print {$ofh} $best, $INPUT_RECORD_SEPARATOR; | 
|  | 25 |  |  |  |  | 66 |  | 
| 133 |  |  |  |  |  |  |  | 
| 134 |  |  |  |  |  |  | # get rid of the best in all iterators, cleanup on the way | 
| 135 |  |  |  |  |  |  | KEY: | 
| 136 | 25 |  |  |  |  | 25 | for my $key ($fk, @keys) { | 
| 137 | 39 |  |  |  |  | 32 | my $head = $its{$key}[1]; | 
| 138 | 39 |  |  |  |  | 68 | while ($head eq $best) { | 
| 139 | 25 |  |  |  |  | 41 | $head = readline $its{$key}[0]; | 
| 140 | 25 | 100 |  |  |  | 27 | if (defined $head) { | 
| 141 | 19 |  |  |  |  | 51 | chomp($its{$key}[1] = $head); | 
| 142 |  |  |  |  |  |  | } | 
| 143 |  |  |  |  |  |  | else { | 
| 144 | 6 |  |  |  |  | 10 | delete $its{$key}; | 
| 145 | 6 |  |  |  |  | 16 | next KEY; | 
| 146 |  |  |  |  |  |  | } | 
| 147 |  |  |  |  |  |  | } | 
| 148 |  |  |  |  |  |  | } | 
| 149 |  |  |  |  |  |  | } | 
| 150 |  |  |  |  |  |  |  | 
| 151 |  |  |  |  |  |  | # rewind, finalize compacting, return | 
| 152 | 3 |  |  |  |  | 64 | seek $ofh, 0, SEEK_SET; | 
| 153 | 3 |  |  |  |  | 7 | @$fhs = ($ofh); | 
| 154 | 3 |  |  |  |  | 10 | return; | 
| 155 |  |  |  |  |  |  | } | 
| 156 |  |  |  |  |  |  |  | 
| 157 |  |  |  |  |  |  | 1; | 
| 158 |  |  |  |  |  |  |  | 
| 159 |  |  |  |  |  |  | __END__ |