| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package App::Sets; | 
| 2 |  |  |  |  |  |  | $App::Sets::VERSION = '0.976'; | 
| 3 |  |  |  |  |  |  |  | 
| 4 |  |  |  |  |  |  |  | 
| 5 | 3 |  |  | 3 |  | 88058 | use strict; | 
|  | 3 |  |  |  |  | 6 |  | 
|  | 3 |  |  |  |  | 85 |  | 
| 6 | 3 |  |  | 3 |  | 11 | use warnings; | 
|  | 3 |  |  |  |  | 3 |  | 
|  | 3 |  |  |  |  | 77 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  | # ABSTRACT: set operations in Perl | 
| 9 |  |  |  |  |  |  |  | 
| 10 | 3 |  |  | 3 |  | 655 | use English qw( -no_match_vars ); | 
|  | 3 |  |  |  |  | 3323 |  | 
|  | 3 |  |  |  |  | 14 |  | 
| 11 | 3 |  |  | 3 |  | 1009 | use 5.010; | 
|  | 3 |  |  |  |  | 8 |  | 
| 12 |  |  |  |  |  |  | use Getopt::Long | 
| 13 | 3 |  |  | 3 |  | 2256 | qw< GetOptionsFromArray :config pass_through no_ignore_case bundling >; | 
|  | 3 |  |  |  |  | 32439 |  | 
|  | 3 |  |  |  |  | 13 |  | 
| 14 | 3 |  |  | 3 |  | 2329 | use Pod::Usage qw< pod2usage >; | 
|  | 3 |  |  |  |  | 123200 |  | 
|  | 3 |  |  |  |  | 262 |  | 
| 15 | 3 |  |  | 3 |  | 2062 | use Log::Log4perl::Tiny qw< :easy :dead_if_first LOGLEVEL >; | 
|  | 3 |  |  |  |  | 17308 |  | 
|  | 3 |  |  |  |  | 13 |  | 
| 16 | 3 |  |  | 3 |  | 2076 | use App::Sets::Parser; | 
|  | 3 |  |  |  |  | 6 |  | 
|  | 3 |  |  |  |  | 114 |  | 
| 17 | 3 |  |  | 3 |  | 1145 | use App::Sets::Iterator; | 
|  | 3 |  |  |  |  | 5 |  | 
|  | 3 |  |  |  |  | 75 |  | 
| 18 | 3 |  |  | 3 |  | 1052 | use App::Sets::Operations; | 
|  | 3 |  |  |  |  | 7 |  | 
|  | 3 |  |  |  |  | 85 |  | 
| 19 | 3 |  |  | 3 |  | 1169 | use App::Sets::Sort qw< sort_filehandle >; | 
|  | 3 |  |  |  |  | 7 |  | 
|  | 3 |  |  |  |  | 3275 |  | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | my %config = ( | 
| 22 |  |  |  |  |  |  | binmode => ':raw:encoding(UTF-8)', | 
| 23 |  |  |  |  |  |  | loglevel => 'INFO', | 
| 24 |  |  |  |  |  |  | parsedebug => 0, | 
| 25 |  |  |  |  |  |  | ); | 
| 26 |  |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  | sub populate_config { | 
| 28 | 64 |  |  | 64 | 0 | 262 | my (@args) = @_; | 
| 29 |  |  |  |  |  |  |  | 
| 30 | 64 | 50 |  |  |  | 180 | $config{sorted} = 1                if $ENV{SETS_SORTED}; | 
| 31 | 64 | 50 |  |  |  | 142 | $config{trim}   = 1                if $ENV{SETS_TRIM}; | 
| 32 | 64 | 50 |  |  |  | 132 | $config{cache}  = $ENV{SETS_CACHE} if exists $ENV{SETS_CACHE}; | 
| 33 |  |  |  |  |  |  | $config{loglevel}  = $ENV{SETS_LOGLEVEL} | 
| 34 | 64 | 50 |  |  |  | 136 | if exists $ENV{SETS_LOGLEVEL}; | 
| 35 |  |  |  |  |  |  | $config{parsedebug}  = $ENV{SETS_PARSEDEBUG} | 
| 36 | 64 | 50 |  |  |  | 100 | if exists $ENV{SETS_PARSEDEBUG}; | 
| 37 |  |  |  |  |  |  | $config{internal_sort} = $ENV{SETS_INTERNAL_SORT} | 
| 38 | 64 | 100 |  |  |  | 142 | if exists $ENV{SETS_INTERNAL_SORT}; | 
| 39 | 64 | 50 |  |  |  | 112 | $config{binmode} = $ENV{SETS_BINMODE} if $ENV{SETS_BINMODE}; | 
| 40 | 64 | 50 |  |  |  | 302 | GetOptionsFromArray( | 
| 41 |  |  |  |  |  |  | \@args, \%config, qw< man help usage version | 
| 42 |  |  |  |  |  |  | binmode|b=s | 
| 43 |  |  |  |  |  |  | cache|cache-sorted|S=s | 
| 44 |  |  |  |  |  |  | internal_sort|internal-sort|I! | 
| 45 |  |  |  |  |  |  | loglevel|l=s | 
| 46 |  |  |  |  |  |  | sorted|s! | 
| 47 |  |  |  |  |  |  | trim|t! | 
| 48 |  |  |  |  |  |  | > | 
| 49 |  |  |  |  |  |  | ) | 
| 50 |  |  |  |  |  |  | or pod2usage( | 
| 51 |  |  |  |  |  |  | -verbose  => 99, | 
| 52 |  |  |  |  |  |  | -sections => 'USAGE', | 
| 53 |  |  |  |  |  |  | ); | 
| 54 | 64 | 50 | 0 |  |  | 39097 | $App::Sets::VERSION | 
| 55 |  |  |  |  |  |  | //= '0.972' unless defined $App::Sets::VERSION; | 
| 56 |  |  |  |  |  |  | pod2usage(message => "$0 $App::Sets::VERSION", -verbose => 99, | 
| 57 |  |  |  |  |  |  | -sections => ' ') | 
| 58 | 64 | 50 |  |  |  | 132 | if $config{version}; | 
| 59 |  |  |  |  |  |  | pod2usage( | 
| 60 |  |  |  |  |  |  | -verbose  => 99, | 
| 61 |  |  |  |  |  |  | -sections => 'USAGE' | 
| 62 | 64 | 50 |  |  |  | 106 | ) if $config{usage}; | 
| 63 |  |  |  |  |  |  | pod2usage( | 
| 64 |  |  |  |  |  |  | -verbose  => 99, | 
| 65 |  |  |  |  |  |  | -sections => 'USAGE|EXAMPLES|OPTIONS' | 
| 66 | 64 | 50 |  |  |  | 106 | ) if $config{help}; | 
| 67 | 64 | 50 |  |  |  | 98 | pod2usage(-verbose => 2) if $config{man}; | 
| 68 |  |  |  |  |  |  |  | 
| 69 | 64 |  |  |  |  | 259 | LOGLEVEL $config{loglevel}; | 
| 70 |  |  |  |  |  |  |  | 
| 71 |  |  |  |  |  |  | $config{cache} = '.sorted' | 
| 72 |  |  |  |  |  |  | if exists $config{cache} | 
| 73 | 64 | 50 | 0 |  |  | 1044 | && !(defined($config{cache}) && length($config{cache})); | 
|  |  |  | 33 |  |  |  |  | 
| 74 | 64 | 50 |  |  |  | 163 | $config{sorted} = 1 if exists $config{cache}; | 
| 75 |  |  |  |  |  |  |  | 
| 76 | 64 | 50 |  |  |  | 148 | if (exists $config{cache}) { | 
|  |  | 50 |  |  |  |  |  | 
| 77 | 0 |  |  |  |  | 0 | INFO "using sort cache or generating it when not available"; | 
| 78 |  |  |  |  |  |  | } | 
| 79 |  |  |  |  |  |  | elsif ($config{sorted}) { | 
| 80 | 0 |  |  |  |  | 0 | INFO "assuming input files are sorted"; | 
| 81 |  |  |  |  |  |  | } | 
| 82 |  |  |  |  |  |  | INFO "trimming away leading/trailing whitespaces" | 
| 83 | 64 | 50 |  |  |  | 102 | if $config{trim}; | 
| 84 |  |  |  |  |  |  |  | 
| 85 | 64 | 50 |  |  |  | 108 | pod2usage( | 
| 86 |  |  |  |  |  |  | -verbose  => 99, | 
| 87 |  |  |  |  |  |  | -sections => 'USAGE', | 
| 88 |  |  |  |  |  |  | ) unless @args; | 
| 89 |  |  |  |  |  |  |  | 
| 90 | 64 |  |  |  |  | 183 | return @args; | 
| 91 |  |  |  |  |  |  | } ## end sub populate_config | 
| 92 |  |  |  |  |  |  |  | 
| 93 |  |  |  |  |  |  | sub run { | 
| 94 | 64 |  |  | 64 | 0 | 29034 | my $package = shift; | 
| 95 | 64 |  |  |  |  | 174 | my @args    = populate_config(@_); | 
| 96 |  |  |  |  |  |  |  | 
| 97 | 64 |  |  |  |  | 68 | my $input; | 
| 98 | 64 | 50 |  |  |  | 103 | if (@args > 1) { | 
| 99 | 64 | 50 |  |  |  | 112 | shift @args if $args[0] eq '--'; | 
| 100 | 64 | 50 |  |  |  | 122 | LOGDIE "only file op file [op file...] " | 
| 101 |  |  |  |  |  |  | . "with multiple parameters (@args)...\n" | 
| 102 |  |  |  |  |  |  | unless @args % 2; | 
| 103 | 64 |  |  |  |  | 55 | my @chunks; | 
| 104 | 64 |  |  |  |  | 106 | while (@args) { | 
| 105 | 128 |  |  |  |  | 201 | push @chunks, escape(shift @args); | 
| 106 | 128 | 100 |  |  |  | 360 | push @chunks, shift @args if @args; | 
| 107 |  |  |  |  |  |  | } | 
| 108 | 64 |  |  |  |  | 243 | $input = join ' ', @chunks; | 
| 109 |  |  |  |  |  |  | } ## end if (@args > 1) | 
| 110 |  |  |  |  |  |  | else { | 
| 111 | 0 |  |  |  |  | 0 | $input = shift @args; | 
| 112 |  |  |  |  |  |  | } | 
| 113 |  |  |  |  |  |  |  | 
| 114 | 64 | 50 |  |  |  | 115 | LOGLEVEL('DEBUG') if $config{parsedebug}; | 
| 115 | 64 |  |  |  |  | 275 | DEBUG "parsing >$input<"; | 
| 116 | 64 |  |  |  |  | 1117 | my $expression = App::Sets::Parser::parse($input, 0); | 
| 117 | 64 |  |  |  |  | 172 | LOGLEVEL($config{loglevel}); | 
| 118 |  |  |  |  |  |  |  | 
| 119 | 64 |  |  | 2 |  | 1394 | binmode STDOUT, $config{binmode}; | 
|  | 2 |  |  |  |  | 16 |  | 
|  | 2 |  |  |  |  | 4 |  | 
|  | 2 |  |  |  |  | 8 |  | 
| 120 |  |  |  |  |  |  |  | 
| 121 | 64 |  |  |  |  | 4656 | my $it = expression($expression); | 
| 122 | 64 |  |  |  |  | 222 | while (defined(my $item = $it->drop())) { | 
| 123 | 548 |  |  |  |  | 417 | print {*STDOUT} $item; | 
|  | 548 |  |  |  |  | 2806 |  | 
| 124 | 548 | 50 |  |  |  | 1763 | print {*STDOUT} "\n" if $config{trim}; | 
|  | 0 |  |  |  |  | 0 |  | 
| 125 |  |  |  |  |  |  | } | 
| 126 | 64 |  |  |  |  | 1663 | return; | 
| 127 |  |  |  |  |  |  | } ## end sub run | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | sub escape { | 
| 130 | 128 |  |  | 128 | 0 | 118 | my ($text) = @_; | 
| 131 | 128 |  |  |  |  | 770 | $text =~ s{(\W)}{\\$1}gmxs; | 
| 132 | 128 |  |  |  |  | 226 | return $text; | 
| 133 |  |  |  |  |  |  | } | 
| 134 |  |  |  |  |  |  |  | 
| 135 |  |  |  |  |  |  | sub expression { | 
| 136 | 192 |  |  | 192 | 0 | 212 | my ($expression) = @_; | 
| 137 | 192 | 100 |  |  |  | 354 | if (ref $expression) {    # operation | 
| 138 | 64 |  |  |  |  | 86 | my ($op, $l, $r) = @$expression; | 
| 139 | 64 |  |  |  |  | 341 | my $sub = App::Sets::Operations->can($op); | 
| 140 | 64 |  |  |  |  | 101 | return $sub->(expression($l), expression($r)); | 
| 141 |  |  |  |  |  |  | } | 
| 142 |  |  |  |  |  |  | else {                    # plain file | 
| 143 | 128 |  |  |  |  | 208 | return file($expression); | 
| 144 |  |  |  |  |  |  | } | 
| 145 |  |  |  |  |  |  | } ## end sub expression | 
| 146 |  |  |  |  |  |  |  | 
| 147 |  |  |  |  |  |  | sub file { | 
| 148 | 128 |  |  | 128 | 0 | 132 | my ($filename) = @_; | 
| 149 | 128 | 50 | 33 |  |  | 3315 | LOGDIE "invalid file '$filename'\n" | 
| 150 |  |  |  |  |  |  | unless -r $filename && !-d $filename; | 
| 151 |  |  |  |  |  |  |  | 
| 152 | 128 | 50 |  |  |  | 264 | if ($config{cache}) { | 
| 153 | 0 |  |  |  |  | 0 | my $cache_filename = $filename . $config{cache}; | 
| 154 | 0 | 0 |  |  |  | 0 | if (!-e $cache_filename) {    # generate cache file | 
| 155 | 0 |  |  |  |  | 0 | WARN "generating cached sorted file " | 
| 156 |  |  |  |  |  |  | . "'$cache_filename', might wait a bit..."; | 
| 157 | 0 |  |  |  |  | 0 | my $ifh = sort_filehandle($filename, \%config); | 
| 158 | 0 | 0 |  |  |  | 0 | open my $ofh, '>', $cache_filename | 
| 159 |  |  |  |  |  |  | or LOGDIE "open('$cache_filename') for output: $OS_ERROR"; | 
| 160 | 0 |  |  |  |  | 0 | while (<$ifh>) { | 
| 161 | 0 |  |  |  |  | 0 | print {$ofh} $_; | 
|  | 0 |  |  |  |  | 0 |  | 
| 162 |  |  |  |  |  |  | } | 
| 163 | 0 | 0 |  |  |  | 0 | close $ofh or LOGDIE "close('$cache_filename'): $OS_ERROR"; | 
| 164 |  |  |  |  |  |  | } ## end if (!-e $cache_filename) | 
| 165 | 0 |  |  |  |  | 0 | INFO "using '$cache_filename' (assumed to be sorted) " | 
| 166 |  |  |  |  |  |  | . "instead of '$filename'"; | 
| 167 | 0 |  |  |  |  | 0 | $filename = $cache_filename; | 
| 168 |  |  |  |  |  |  | } ## end if ($config{cache}) | 
| 169 |  |  |  |  |  |  |  | 
| 170 | 128 |  |  |  |  | 121 | my $fh; | 
| 171 | 128 | 50 |  |  |  | 205 | if ($config{sorted}) { | 
| 172 |  |  |  |  |  |  | INFO "opening '$filename', assuming it is already sorted" | 
| 173 | 0 | 0 |  |  |  | 0 | unless $config{cache}; | 
| 174 | 0 | 0 |  |  |  | 0 | open $fh, '<', $filename | 
| 175 |  |  |  |  |  |  | or LOGDIE "open('$filename'): $OS_ERROR"; | 
| 176 |  |  |  |  |  |  | } ## end if ($config{sorted}) | 
| 177 |  |  |  |  |  |  | else { | 
| 178 | 128 |  |  |  |  | 675 | INFO "opening '$filename' and sorting on the fly"; | 
| 179 | 128 |  |  |  |  | 2209 | $fh = sort_filehandle($filename, \%config); | 
| 180 |  |  |  |  |  |  | } | 
| 181 |  |  |  |  |  |  | return App::Sets::Iterator->new( | 
| 182 |  |  |  |  |  |  | sub { | 
| 183 | 1520 |  |  | 1520 |  | 18725 | my $retval = <$fh>; | 
| 184 | 1520 | 100 |  |  |  | 2609 | return unless defined $retval; | 
| 185 |  |  |  |  |  |  | $retval =~ s{\A\s+|\s+\z}{}gmxs | 
| 186 | 1216 | 50 |  |  |  | 1786 | if $config{trim}; | 
| 187 | 1216 |  |  |  |  | 2903 | return $retval; | 
| 188 |  |  |  |  |  |  | } | 
| 189 | 128 |  |  |  |  | 2359 | ); | 
| 190 |  |  |  |  |  |  | } ## end sub file | 
| 191 |  |  |  |  |  |  |  | 
| 192 |  |  |  |  |  |  | 1; | 
| 193 |  |  |  |  |  |  |  | 
| 194 |  |  |  |  |  |  | __END__ |