line  
 stmt  
 bran  
 cond  
 sub  
 pod  
 time  
 code  
 
1 
 
 
 
 
 
 
 
 
 
 
 
 
 
 #-----------------------------------------------------------------  
 
2 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # App::combinesheets  
 
3 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # Author: Martin Senger    
 
4 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # For copyright and disclaimer se below.  
 
5 
 
 
 
 
 
 
 
 
 
 
 
 
 
 #  
 
6 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ABSTRACT: command-line tool merging CSV and TSV spreadsheets  
 
7 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # PODNAME: App::combinesheets  
 
8 
 
 
 
 
 
 
 
 
 
 
 
 
 
 #-----------------------------------------------------------------  
 
9 
 
7
 
 
 
 
 
  
7
   
 
 
 
231286
 
 use warnings;  
 
  
 
7
 
 
 
 
 
 
 
 
 
20
 
    
 
  
 
7
 
 
 
 
 
 
 
 
 
226
 
    
 
10 
 
7
 
 
 
 
 
  
7
   
 
 
 
34
 
 use strict;  
 
  
 
7
 
 
 
 
 
 
 
 
 
16
 
    
 
  
 
7
 
 
 
 
 
 
 
 
 
436
 
    
 
11 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
12 
 
 
 
 
 
 
 
 
 
 
 
 
 
 package App::combinesheets;  
 
13 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
14 
 
 
 
 
 
 
 
 
 
 
 
 
 
 our $VERSION = '0.2.14'; # VERSION  
 
15 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
16 
 
7
 
 
 
 
 
  
7
   
 
 
 
44
 
 use base 'App::Cmd::Simple';  
 
  
 
7
 
 
 
 
 
 
 
 
 
12
 
    
 
  
 
7
 
 
 
 
 
 
 
 
 
7576
 
    
 
17 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
18 
 
7
 
 
 
 
 
  
7
   
 
 
 
669179
 
 use Pod::Usage;  
 
  
 
7
 
 
 
 
 
 
 
 
 
643404
 
    
 
  
 
7
 
 
 
 
 
 
 
 
 
1275
 
    
 
19 
 
7
 
 
 
 
 
  
7
   
 
 
 
83
 
 use Pod::Find qw(pod_where);  
 
  
 
7
 
 
 
 
 
 
 
 
 
18
 
    
 
  
 
7
 
 
 
 
 
 
 
 
 
413
 
    
 
20 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
21 
 
7
 
 
 
 
 
  
7
   
 
 
 
12925
 
 use Text::CSV::Simple;  
 
  
 
0
 
 
 
 
 
 
 
 
 
 
 
    
 
  
 
0
 
 
 
 
 
 
 
 
 
 
 
    
 
22 
 
 
 
 
 
 
 
 
 
 
 
 
 
 use Text::CSV_XS;  
 
23 
 
 
 
 
 
 
 
 
 
 
 
 
 
 use File::Spec;  
 
24 
 
 
 
 
 
 
 
 
 
 
 
 
 
 use File::Temp;  
 
25 
 
 
 
 
 
 
 
 
 
 
 
 
 
 use File::Which;  
 
26 
 
 
 
 
 
 
 
 
 
 
 
 
 
 use File::BOM qw( :all );  
 
27 
 
 
 
 
 
 
 
 
 
 
 
 
 
 use Algorithm::Loops qw( NestedLoops );  
 
28 
 
 
 
 
 
 
 
 
 
 
 
 
 
 use autouse 'IO::CaptureOutput' => qw(capture_exec);  
 
29 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
30 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # reserved keywords in the configuration  
 
31 
 
 
 
 
 
 
 
 
 
 
 
 
 
 use constant {  
 
32 
 
 
 
 
 
 
 
 
 
 
 
 
 
     CFG_MATCH => 'MATCH',  
 
33 
 
 
 
 
 
 
 
 
 
 
 
 
 
     CFG_PROG  => 'PROG',  
 
34 
 
 
 
 
 
 
 
 
 
 
 
 
 
     CFG_PROGS => 'PROGS',  
 
35 
 
 
 
 
 
 
 
 
 
 
 
 
 
     CFG_PERL  => 'PERL',  
 
36 
 
 
 
 
 
 
 
 
 
 
 
 
 
 };  
 
37 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
38 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # types of input files  
 
39 
 
 
 
 
 
 
 
 
 
 
 
 
 
 use constant {  
 
40 
 
 
 
 
 
 
 
 
 
 
 
 
 
     TYPE_CSV => 'csv',  
 
41 
 
 
 
 
 
 
 
 
 
 
 
 
 
     TYPE_TSV => 'tsv',  
 
42 
 
 
 
 
 
 
 
 
 
 
 
 
 
 #    TYPE_XSL => 'xsl',    # not-yet-supported  
 
43 
 
 
 
 
 
 
 
 
 
 
 
 
 
 };  
 
44 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
45 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # hash keys describing an input ($inputs)  
 
46 
 
 
 
 
 
 
 
 
 
 
 
 
 
 use constant {  
 
47 
 
 
 
 
 
 
 
 
 
 
 
 
 
     INPUT_FILE              => 'file',  
 
48 
 
 
 
 
 
 
 
 
 
 
 
 
 
     INPUT_TYPE              => 'type',  
 
49 
 
 
 
 
 
 
 
 
 
 
 
 
 
     INPUT_MATCHED_BY        => 'matched_by',  
 
50 
 
 
 
 
 
 
 
 
 
 
 
 
 
     INPUT_MATCHED_BY_INDEX  => 'matched_by_index',  
 
51 
 
 
 
 
 
 
 
 
 
 
 
 
 
     INPUT_HEADERS           => 'headers',  
 
52 
 
 
 
 
 
 
 
 
 
 
 
 
 
     INPUT_CONTENT           => 'content',  
 
53 
 
 
 
 
 
 
 
 
 
 
 
 
 
 };  
 
54 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
55 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # hash keys describing wanted fields ($wanted_columns)  
 
56 
 
 
 
 
 
 
 
 
 
 
 
 
 
 use constant {  
 
57 
 
 
 
 
 
 
 
 
 
 
 
 
 
     CFG_TYPE     => 'type',   # what kind of input (MATCH, PROG, PROGS or PERL)  
 
58 
 
 
 
 
 
 
 
 
 
 
 
 
 
     CFG_OUT_COL  => 'ocol',   # a name for this column used in the output  
 
59 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # keys used for the normal (MATCH) columns  
 
60 
 
 
 
 
 
 
 
 
 
 
 
 
 
     CFG_ID       => 'id',     # which input  
 
61 
 
 
 
 
 
 
 
 
 
 
 
 
 
     CFG_IN_COL   => 'icol',   # a column in such input  
 
62 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # keys used for the calculated columns (i.e. of type PROG, PROGS or PERL)  
 
63 
 
 
 
 
 
 
 
 
 
 
 
 
 
     CFG_EXT      => 'id',     # name of the external program or Perl external subroutine  
 
64 
 
 
 
 
 
 
 
 
 
 
 
 
 
     PERL_DETAILS => '_perl_details_', # added during the config processing  
 
65 
 
 
 
 
 
 
 
 
 
 
 
 
 
 };  
 
66 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
67 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
68 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # Command-line arguments and script usage  
 
69 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
70 
 
 
 
 
 
 
 
 
 
 
 
 
 
 sub usage_desc {  
 
71 
 
 
 
 
 
 
 
 
 
 
 
 
 
      my $self = shift;  
 
72 
 
 
 
 
 
 
 
 
 
 
 
 
 
      return "%c -config  -inputs  [other otions...]";    
 
73 
 
 
 
 
 
 
 
 
 
 
 
 
 
 }  
 
74 
 
 
 
 
 
 
 
 
 
 
 
 
 
 sub opt_spec {  
 
75 
 
 
 
 
 
 
 
 
 
 
 
 
 
     return (  
 
76 
 
 
 
 
 
 
 
 
 
 
 
 
 
         [ 'h'               => "display a short usage message" ],  
 
77 
 
 
 
 
 
 
 
 
 
 
 
 
 
         [ 'help'            => "display a full usage message"  ],  
 
78 
 
 
 
 
 
 
 
 
 
 
 
 
 
         [ 'man|m'           => "display a full manual page"    ],  
 
79 
 
 
 
 
 
 
 
 
 
 
 
 
 
         [ 'version|v'       => "display a version"             ],  
 
80 
 
 
 
 
 
 
 
 
 
 
 
 
 
         [],  
 
81 
 
 
 
 
 
 
 
 
 
 
 
 
 
         [ 'config|cfg=s'    => ""          ],   
 
82 
 
 
 
 
 
 
 
 
 
 
 
 
 
         [ 'inputs|i=s@{1,}' => "=[,=...] (e.g. PERSON=,CAR=)"                                  ],        
 
83 
 
 
 
 
 
 
 
 
 
 
 
 
 
         [ 'outfile|o=s'     => ""                 ],   
 
84 
 
 
 
 
 
 
 
 
 
 
 
 
 
         [ 'check|c'         => "only check the configuration"  ],  
 
85 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
86 
 
 
 
 
 
 
 
 
 
 
 
 
 
         { getopt_conf => ['no_bundling', 'no_ignore_case', 'auto_abbrev'] }  
 
87 
 
 
 
 
 
 
 
 
 
 
 
 
 
         );  
 
88 
 
 
 
 
 
 
 
 
 
 
 
 
 
 }  
 
89 
 
 
 
 
 
 
 
 
 
 
 
 
 
 sub validate_args {  
 
90 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my ($self, $opt, $args) = @_;  
 
91 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
92 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # show various levels of help and exit  
 
93 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $pod_where = pod_where ({-inc => 1}, __PACKAGE__);  
 
94 
 
 
 
 
 
 
 
 
 
 
 
 
 
     if ($opt->h) {  
 
95 
 
 
 
 
 
 
 
 
 
 
 
 
 
         print "Usage: " . $self->usage();  
 
96 
 
 
 
 
 
 
 
 
 
 
 
 
 
         if ($^S) { die "Okay\n" } else { exit (0) };  
 
97 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
98 
 
 
 
 
 
 
 
 
 
 
 
 
 
     pod2usage (-input => $pod_where, -verbose => 1, -exitval => 0) if $opt->help;  
 
99 
 
 
 
 
 
 
 
 
 
 
 
 
 
     pod2usage (-input => $pod_where, -verbose => 2, -exitval => 0) if $opt->man;  
 
100 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
101 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # show version and exit  
 
102 
 
 
 
 
 
 
 
 
 
 
 
 
 
     if ($opt->version) {  
 
103 
 
 
 
 
 
 
 
 
 
 
 
 
 
         ## no critic  
 
104 
 
 
 
 
 
 
 
 
 
 
 
 
 
         no strict;    # because the $VERSION will be added only when  
 
105 
 
 
 
 
 
 
 
 
 
 
 
 
 
         no warnings;  # the distribution is fully built up  
 
106 
 
 
 
 
 
 
 
 
 
 
 
 
 
         print "$VERSION\n";  
 
107 
 
 
 
 
 
 
 
 
 
 
 
 
 
         if ($^S) { die "Okay\n" } else { exit (0) };  
 
108 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
109 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
110 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # check required command-line arguments  
 
111 
 
 
 
 
 
 
 
 
 
 
 
 
 
     $self->usage_error ("Parameter '-config' is required.")  
 
112 
 
 
 
 
 
 
 
 
 
 
 
 
 
         unless $opt->config;  
 
113 
 
 
 
 
 
 
 
 
 
 
 
 
 
     $self->usage_error ("Parameter '-inputs' is required.")  
 
114 
 
 
 
 
 
 
 
 
 
 
 
 
 
         unless $opt->inputs;  
 
115 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
116 
 
 
 
 
 
 
 
 
 
 
 
 
 
     return;  
 
117 
 
 
 
 
 
 
 
 
 
 
 
 
 
 }  
 
118 
 
 
 
 
 
 
 
 
 
 
 
 
 
 sub usage_error {  
 
119 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my ( $self, $error ) = @_;  
 
120 
 
 
 
 
 
 
 
 
 
 
 
 
 
     die "Error: $error\nUsage: " . $self->usage->text;  
 
121 
 
 
 
 
 
 
 
 
 
 
 
 
 
 }  
 
122 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
123 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
124 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # The main part  
 
125 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
126 
 
 
 
 
 
 
 
 
 
 
 
 
 
 my $inputs;     # keys are input IDs  
 
127 
 
 
 
 
 
 
 
 
 
 
 
 
 
 sub execute {  
 
128 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my ($self, $opt, $args) = @_;  
 
129 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
130 
 
 
 
 
 
 
 
 
 
 
 
 
 
     $inputs = {};   # just in case somebody calls execute() twice  
 
131 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
132 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my @opt_inputs = split (m{,}, join (',', @{ $opt->inputs }));  
 
133 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $opt_outfile = $opt->outfile;  
 
134 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $opt_cfgfile = $opt->config;  
 
135 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
136 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # prepare output handler  
 
137 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $combined;  
 
138 
 
 
 
 
 
 
 
 
 
 
 
 
 
     if ($opt_outfile and not $opt->check) {  
 
139 
 
 
 
 
 
 
 
 
 
 
 
 
 
         open ($combined, '>', $opt_outfile)  
 
140 
 
 
 
 
 
 
 
 
 
 
 
 
 
             or die "[ER00] Cannot open file $opt_outfile for writing: $!\n";  
 
141 
 
 
 
 
 
 
 
 
 
 
 
 
 
     } else {  
 
142 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $combined = *STDOUT;  
 
143 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
144 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
145 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # read configuration  
 
146 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $wanted_cols = [];   # each element: { CFG_TYPE, CFG_ID, CFG_IN_COL, CFG_OUT_COL... }  
 
147 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $known_inputs = {};  # input ID => 1  ... the same input IDs as in $wanted_cols (for speed)  
 
148 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $matches = {};       # input ID => matching column/header  
 
149 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $config;  
 
150 
 
 
 
 
 
 
 
 
 
 
 
 
 
     open ($config, '<', $opt_cfgfile)  
 
151 
 
 
 
 
 
 
 
 
 
 
 
 
 
         or die "[ER00] Cannot read configuration file $opt_cfgfile: $!\n";  
 
152 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $line_count = 0;  
 
153 
 
 
 
 
 
 
 
 
 
 
 
 
 
     while (<$config>) {  
 
154 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $line_count++;  
 
155 
 
 
 
 
 
 
 
 
 
 
 
 
 
         chomp;  
 
156 
 
 
 
 
 
 
 
 
 
 
 
 
 
         next if m{^\s*$};   # ignore empty lines  
 
157 
 
 
 
 
 
 
 
 
 
 
 
 
 
         next if m{^\s*#};   # ignore comment lines  
 
158 
 
 
 
 
 
 
 
 
 
 
 
 
 
         s{^\s+|\s+$}{}g;    # trim whitespaces  
 
159 
 
 
 
 
 
 
 
 
 
 
 
 
 
         my ($input_id, $input_col, $output_col) = split (m{\s*\t\s*}, $_, 3);  
 
160 
 
 
 
 
 
 
 
 
 
 
 
 
 
         unless ($input_id and defined $input_col) {  
 
161 
 
 
 
 
 
 
 
 
 
 
 
 
 
             warn "[WR01] Configuration line $line_count ignored: '$_'\n";  
 
162 
 
 
 
 
 
 
 
 
 
 
 
 
 
             next;  
 
163 
 
 
 
 
 
 
 
 
 
 
 
 
 
         }  
 
164 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $input_id = uc ($input_id);   # make config keys upper-case  
 
165 
 
 
 
 
 
 
 
 
 
 
 
 
 
         if ($input_id eq CFG_MATCH) {  
 
166 
 
 
 
 
 
 
 
 
 
 
 
 
 
             my ($input_id, $column) = split (m{\s*=\s*}, $input_col, 2);  
 
167 
 
 
 
 
 
 
 
 
 
 
 
 
 
             unless ($input_id and $column !~ m{^\s*$}) {  
 
168 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 warn "[WR02] Bad format in configuration line $line_count: '$input_col'. Ignored.\n";  
 
169 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 next;  
 
170 
 
 
 
 
 
 
 
 
 
 
 
 
 
             }  
 
171 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $matches->{ uc ($input_id) } = $column;  
 
172 
 
 
 
 
 
 
 
 
 
 
 
 
 
             next;  
 
173 
 
 
 
 
 
 
 
 
 
 
 
 
 
         }  
 
174 
 
 
 
 
 
 
 
 
 
 
 
 
 
         my $wanted_col = {};  
 
175 
 
 
 
 
 
 
 
 
 
 
 
 
 
         if ($input_id eq CFG_PROG or $input_id eq CFG_PROGS or $input_id eq CFG_PERL) {  
 
176 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $wanted_col->{CFG_TYPE()} = $input_id;  
 
177 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $wanted_col->{CFG_EXT()} = $input_col;  
 
178 
 
 
 
 
 
 
 
 
 
 
 
 
 
             if (defined $output_col) {  
 
179 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 $wanted_col->{CFG_OUT_COL()} = $output_col;  
 
180 
 
 
 
 
 
 
 
 
 
 
 
 
 
             } else {  
 
181 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 warn "[WR10] Missing output column name in configuration line $line_count: '$input_col'.\n";  
 
182 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 $wanted_col->{CFG_OUT_COL()} = '';  
 
183 
 
 
 
 
 
 
 
 
 
 
 
 
 
             }  
 
184 
 
 
 
 
 
 
 
 
 
 
 
 
 
         } else {  
 
185 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $wanted_col->{CFG_TYPE()}    = CFG_MATCH;  
 
186 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $wanted_col->{CFG_ID()}      = $input_id;  
 
187 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $wanted_col->{CFG_IN_COL()}  = $input_col;  
 
188 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $wanted_col->{CFG_OUT_COL()} = (defined $output_col ? $output_col : $input_col);  
 
189 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $known_inputs->{$input_id} = 1;  
 
190 
 
 
 
 
 
 
 
 
 
 
 
 
 
         }  
 
191 
 
 
 
 
 
 
 
 
 
 
 
 
 
         push (@$wanted_cols, $wanted_col);  
 
192 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
193 
 
 
 
 
 
 
 
 
 
 
 
 
 
     close $config;  
 
194 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
195 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # prepare for calculated columns  
 
196 
 
 
 
 
 
 
 
 
 
 
 
 
 
     foreach my $col (@$wanted_cols) {  
 
197 
 
 
 
 
 
 
 
 
 
 
 
 
 
         if ($col->{CFG_TYPE()} eq CFG_PROG or $col->{CFG_TYPE()} eq CFG_PROGS) {  
 
198 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
199 
 
 
 
 
 
 
 
 
 
 
 
 
 
             # locate the external program  
 
200 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $col->{CFG_EXT()} = find_prog ($col->{CFG_EXT()});  
 
201 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
202 
 
 
 
 
 
 
 
 
 
 
 
 
 
         } elsif ($col->{CFG_TYPE()} eq CFG_PERL) {  
 
203 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
204 
 
 
 
 
 
 
 
 
 
 
 
 
 
             # load the wanted Perl module  
 
205 
 
 
 
 
 
 
 
 
 
 
 
 
 
             my $call = $col->{CFG_EXT()};  
 
206 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $call =~ m{^(.+)((::)|(->))(.*)$};  
 
207 
 
 
 
 
 
 
 
 
 
 
 
 
 
             my $module = $1;  
 
208 
 
 
 
 
 
 
 
 
 
 
 
 
 
             my $subroutine = $5;  
 
209 
 
 
 
 
 
 
 
 
 
 
 
 
 
             my $how_to_call = $2;   # can be '::' or '->'  
 
210 
 
 
 
 
 
 
 
 
 
 
 
 
 
             unless ($module) {  
 
211 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 warn "[WR11] Missing module name in '[PERL] " . $col->{CFG_OUT_COL()} . "'. Column ignored.\n";  
 
212 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 $col->{ignored} = 1;  
 
213 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 next;  
 
214 
 
 
 
 
 
 
 
 
 
 
 
 
 
             }  
 
215 
 
 
 
 
 
 
 
 
 
 
 
 
 
             unless ($subroutine) {  
 
216 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 warn "[WR12] Missing subroutine name in '[PERL] " . $col->{CFG_OUT_COL()} . "'. Column ignored.\n";  
 
217 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 $col->{ignored} = 1;  
 
218 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 next;  
 
219 
 
 
 
 
 
 
 
 
 
 
 
 
 
             }  
 
220 
 
 
 
 
 
 
 
 
 
 
 
 
 
             if ($module =~ m{^:+}) {  
 
221 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 warn "[WR13] Uncomplete module name in '[PERL] " . $col->{CFG_OUT_COL()} . "'. Column ignored.\n";  
 
222 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 $col->{ignored} = 1;  
 
223 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 next;  
 
224 
 
 
 
 
 
 
 
 
 
 
 
 
 
             }  
 
225 
 
 
 
 
 
 
 
 
 
 
 
 
 
             eval "require $module";  ## no critic  
 
226 
 
 
 
 
 
 
 
 
 
 
 
 
 
             if ($@) {  
 
227 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 warn "[WR14] Cannot load module '$module': $@. Column '" . $col->{CFG_OUT_COL()} . " ignored\n";  
 
228 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 $col->{ignored} = 1;  
 
229 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 next;  
 
230 
 
 
 
 
 
 
 
 
 
 
 
 
 
             }  
 
231 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $module->import();  
 
232 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
233 
 
 
 
 
 
 
 
 
 
 
 
 
 
             # remember what we just parsed and checked  
 
234 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $col->{PERL_DETAILS()} = {};  
 
235 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $col->{PERL_DETAILS()}->{what_to_call} = $module . $how_to_call . $subroutine;  
 
236 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $col->{PERL_DETAILS()}->{module} = $module;  
 
237 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $col->{PERL_DETAILS()}->{subroutine} = $subroutine;  
 
238 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $col->{PERL_DETAILS()}->{how_to_call} = $how_to_call;  
 
239 
 
 
 
 
 
 
 
 
 
 
 
 
 
         }  
 
240 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
241 
 
 
 
 
 
 
 
 
 
 
 
 
 
     $wanted_cols = [ grep { not $_->{ignored} } @$wanted_cols ];  
 
242 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
243 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # locate expected inputs  
 
244 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $primary_input;   # ID of the first input  
 
245 
 
 
 
 
 
 
 
 
 
 
 
 
 
     foreach my $opt_input (@opt_inputs) {  
 
246 
 
 
 
 
 
 
 
 
 
 
 
 
 
         my ($key, $value) = split (m{\s*=\s*}, $opt_input, 2);  
 
247 
 
 
 
 
 
 
 
 
 
 
 
 
 
         next unless $key;  
 
248 
 
 
 
 
 
 
 
 
 
 
 
 
 
         next unless $value;  
 
249 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $key = uc ($key);  
 
250 
 
 
 
 
 
 
 
 
 
 
 
 
 
         unless (exists $known_inputs->{$key}) {  
 
251 
 
 
 
 
 
 
 
 
 
 
 
 
 
             warn "[WR03] Configuration does not recognize the input named '$key'. Input ignored.\n";  
 
252 
 
 
 
 
 
 
 
 
 
 
 
 
 
             next;  
 
253 
 
 
 
 
 
 
 
 
 
 
 
 
 
         }  
 
254 
 
 
 
 
 
 
 
 
 
 
 
 
 
         unless (exists $matches->{$key}) {  
 
255 
 
 
 
 
 
 
 
 
 
 
 
 
 
             warn "[WR04] Input named '$key' does not have any MATCH column defined in configuration. Input ignored.\n";  
 
256 
 
 
 
 
 
 
 
 
 
 
 
 
 
             next;  
 
257 
 
 
 
 
 
 
 
 
 
 
 
 
 
         }  
 
258 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $primary_input = $key unless $primary_input;   # remember which input came first  
 
259 
 
 
 
 
 
 
 
 
 
 
 
 
 
         my $input = { INPUT_FILE()       => $value,  
 
260 
 
 
 
 
 
 
 
 
 
 
 
 
 
                       INPUT_MATCHED_BY() => $matches->{$key} };  
 
261 
 
 
 
 
 
 
 
 
 
 
 
 
 
         if ($value =~ m{\.csv$}i) {  
 
262 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $input->{INPUT_TYPE()} = TYPE_CSV;  
 
263 
 
 
 
 
 
 
 
 
 
 
 
 
 
         } else {  
 
264 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $input->{INPUT_TYPE()} = TYPE_TSV;  
 
265 
 
 
 
 
 
 
 
 
 
 
 
 
 
         }  
 
266 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $inputs->{$key} = $input;  
 
267 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
268 
 
 
 
 
 
 
 
 
 
 
 
 
 
     die "[ER01] No valid inputs specified. Exiting.\n"  
 
269 
 
 
 
 
 
 
 
 
 
 
 
 
 
         unless scalar keys (%$inputs) > 0;  
 
270 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
271 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # read headers from all inputs  
 
272 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $headers_by_id = {};  # used for re-using the same headers once read, and for some checks  
 
273 
 
 
 
 
 
 
 
 
 
 
 
 
 
     foreach my $input_id (keys %$inputs) {  
 
274 
 
 
 
 
 
 
 
 
 
 
 
 
 
         my $input = $inputs->{$input_id};  
 
275 
 
 
 
 
 
 
 
 
 
 
 
 
 
         my $headers;  
 
276 
 
 
 
 
 
 
 
 
 
 
 
 
 
         if (exists $headers_by_id->{$input_id}) {  
 
277 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $headers = $headers_by_id->{$input_id};   # copy already known headers  
 
278 
 
 
 
 
 
 
 
 
 
 
 
 
 
         } else {  
 
279 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $headers = read_headers ($input);  
 
280 
 
 
 
 
 
 
 
 
 
 
 
 
 
         }  
 
281 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
282 
 
 
 
 
 
 
 
 
 
 
 
 
 
         # add new properties to $input  
 
283 
 
 
 
 
 
 
 
 
 
 
 
 
 
         unless (exists $headers->{ $input->{INPUT_MATCHED_BY()} }) {  
 
284 
 
 
 
 
 
 
 
 
 
 
 
 
 
             warn ("[WR05] Input '$input_id' does not contain the matching header '" . $input->{INPUT_MATCHED_BY()} .  
 
285 
 
 
 
 
 
 
 
 
 
 
 
 
 
                   "'. Input ignored\n");  
 
286 
 
 
 
 
 
 
 
 
 
 
 
 
 
             delete $inputs->{$input_id};  
 
287 
 
 
 
 
 
 
 
 
 
 
 
 
 
             next;  
 
288 
 
 
 
 
 
 
 
 
 
 
 
 
 
         }  
 
289 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $headers_by_id->{$input_id} = $headers  
 
290 
 
 
 
 
 
 
 
 
 
 
 
 
 
             unless exists $headers_by_id->{$input_id};  
 
291 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $input->{INPUT_HEADERS()} = $headers;  
 
292 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $input->{INPUT_MATCHED_BY_INDEX()} = $headers->{ $input->{INPUT_MATCHED_BY()} };  
 
293 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
294 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
295 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # check real headers vs. headers as defined in configuration  
 
296 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $already_reported = {};  
 
297 
 
 
 
 
 
 
 
 
 
 
 
 
 
     foreach my $col (@$wanted_cols) {  
 
298 
 
 
 
 
 
 
 
 
 
 
 
 
 
         next if $col->{CFG_TYPE()} ne CFG_MATCH;   # check is done only for normal columns  
 
299 
 
 
 
 
 
 
 
 
 
 
 
 
 
         my $input_id = $col->{CFG_ID()};  
 
300 
 
 
 
 
 
 
 
 
 
 
 
 
 
         if (exists $headers_by_id->{$input_id}) {  
 
301 
 
 
 
 
 
 
 
 
 
 
 
 
 
             # does the requested column exist in this input's headers?  
 
302 
 
 
 
 
 
 
 
 
 
 
 
 
 
             unless (column_exists ($input_id, $col->{CFG_IN_COL()})) {  
 
303 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 warn "[WR06] Column '$col->{CFG_IN_COL()}' not found in the input '$input_id'. Column will be ignored.\n";  
 
304 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 $col->{ignored} = 1;  
 
305 
 
 
 
 
 
 
 
 
 
 
 
 
 
             }  
 
306 
 
 
 
 
 
 
 
 
 
 
 
 
 
             next;  
 
307 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
308 
 
 
 
 
 
 
 
 
 
 
 
 
 
         } elsif (!exists $already_reported->{$input_id}) {  
 
309 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $already_reported->{$input_id} = 1;  
 
310 
 
 
 
 
 
 
 
 
 
 
 
 
 
             warn "[WR07] Configuration defines columns from an input '$input_id' but no such input given (or was ignored). These columns will be ignored.\n";  
 
311 
 
 
 
 
 
 
 
 
 
 
 
 
 
         }  
 
312 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $col->{ignored} = 1;  
 
313 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
314 
 
 
 
 
 
 
 
 
 
 
 
 
 
     $wanted_cols = [ grep { not $_->{ignored} } @$wanted_cols ];  
 
315 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
316 
 
 
 
 
 
 
 
 
 
 
 
 
 
     foreach my $input_id (keys %$matches) {  
 
317 
 
 
 
 
 
 
 
 
 
 
 
 
 
         next unless exists $inputs->{$input_id};   # ignoring matches whose inputs are already ignored  
 
318 
 
 
 
 
 
 
 
 
 
 
 
 
 
         # does the matching column exist in this input's headers?  
 
319 
 
 
 
 
 
 
 
 
 
 
 
 
 
         unless (column_exists ($input_id, $matches->{$input_id})) {  
 
320 
 
 
 
 
 
 
 
 
 
 
 
 
 
             die "[ER02] Matching column '$matches->{$input_id}' not found in the input '$input_id'. Must exit.\n";  
 
321 
 
 
 
 
 
 
 
 
 
 
 
 
 
         }  
 
322 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
323 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
324 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # do we still have a primary input?  
 
325 
 
 
 
 
 
 
 
 
 
 
 
 
 
     unless (exists $inputs->{$primary_input}) {  
 
326 
 
 
 
 
 
 
 
 
 
 
 
 
 
         die "[ER03] Due to errors, the primary input '$primary_input' is now ignored. Must exit.\n";  
 
327 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
328 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
329 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # end of checking  
 
330 
 
 
 
 
 
 
 
 
 
 
 
 
 
     exit (0) if $opt->check;  
 
331 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
332 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # read all inputs into memory  
 
333 
 
 
 
 
 
 
 
 
 
 
 
 
 
     foreach my $input_id (keys %$inputs) {  
 
334 
 
 
 
 
 
 
 
 
 
 
 
 
 
         my $input = $inputs->{$input_id};  
 
335 
 
 
 
 
 
 
 
 
 
 
 
 
 
         my $content = read_content ($input);  
 
336 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $input->{INPUT_CONTENT()} = $content;  
 
337 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
338 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
339 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # output combined headers  
 
340 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my @header_line = ();  
 
341 
 
 
 
 
 
 
 
 
 
 
 
 
 
     foreach my $col (@$wanted_cols) {  
 
342 
 
 
 
 
 
 
 
 
 
 
 
 
 
         push (@header_line, $col->{CFG_OUT_COL()});  
 
343 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
344 
 
 
 
 
 
 
 
 
 
 
 
 
 
     print $combined join ("\t", @header_line) . "\n"  
 
345 
 
 
 
 
 
 
 
 
 
 
 
 
 
         unless scalar @header_line == 0;  
 
346 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
347 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # combine all inputs and make output lines  
 
348 
 
 
 
 
 
 
 
 
 
 
 
 
 
     foreach my $matching_content (sort keys %{ $inputs->{$primary_input}->{INPUT_CONTENT()} }) {  
 
349 
 
 
 
 
 
 
 
 
 
 
 
 
 
         # $matching_content is, for example, a publication title ("An Atlas of....")  
 
350 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
351 
 
 
 
 
 
 
 
 
 
 
 
 
 
         # inputs may have more lines with the same value in the matching columns  
 
352 
 
 
 
 
 
 
 
 
 
 
 
 
 
         # therefore, extract first the matching lines from all inputs  
 
353 
 
 
 
 
 
 
 
 
 
 
 
 
 
         my $lines_to_combine = [];  
 
354 
 
 
 
 
 
 
 
 
 
 
 
 
 
         my $inputs_to_combine = {};  # keys are inputs' CFG_IDs, values are indeces into $lines_to_combine  
 
355 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
356 
 
 
 
 
 
 
 
 
 
 
 
 
 
         foreach my $col (@$wanted_cols) {  
 
357 
 
 
 
 
 
 
 
 
 
 
 
 
 
             if ($col->{CFG_TYPE()} eq CFG_MATCH) {  
 
358 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 unless (exists $inputs_to_combine->{ $col->{CFG_ID()} }) {  
 
359 
 
 
 
 
 
 
 
 
 
 
 
 
 
                     # remember the same lines (from the same input) only once  
 
360 
 
 
 
 
 
 
 
 
 
 
 
 
 
                     my $input = $inputs->{ $col->{CFG_ID()} };  
 
361 
 
 
 
 
 
 
 
 
 
 
 
 
 
                     push (@$lines_to_combine, $input->{INPUT_CONTENT()}->{$matching_content} || [undef]);  
 
362 
 
 
 
 
 
 
 
 
 
 
 
 
 
                     $inputs_to_combine->{ $col->{CFG_ID()} } = $#$lines_to_combine;  
 
363 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 }  
 
364 
 
 
 
 
 
 
 
 
 
 
 
 
 
             }  
 
365 
 
 
 
 
 
 
 
 
 
 
 
 
 
         }  
 
366 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
367 
 
 
 
 
 
 
 
 
 
 
 
 
 
         # make all combinantions of matching lines  
 
368 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
369 
 
 
 
 
 
 
 
 
 
 
 
 
 
         # let's have 3 inputs, identified by K, L and M  
 
370 
 
 
 
 
 
 
 
 
 
 
 
 
 
         # there are three matching lines in K, two in L and one in M:  
 
371 
 
 
 
 
 
 
 
 
 
 
 
 
 
         # my $lines_to_combine = [  
 
372 
 
 
 
 
 
 
 
 
 
 
 
 
 
         #       [ "line1", "line2", "line3", ], # from input K  
 
373 
 
 
 
 
 
 
 
 
 
 
 
 
 
         #       [ "lineX", "lineY", ],          # from input L  
 
374 
 
 
 
 
 
 
 
 
 
 
 
 
 
         #       [ "lineQ", ],                   # from input M  
 
375 
 
 
 
 
 
 
 
 
 
 
 
 
 
         #       );  
 
376 
 
 
 
 
 
 
 
 
 
 
 
 
 
         # my $inputs_to_combine = { K => 0, L => 1, M => 2 };  
 
377 
 
 
 
 
 
 
 
 
 
 
 
 
 
         #  
 
378 
 
 
 
 
 
 
 
 
 
 
 
 
 
         # the subroutine create_output_line() will be called 6 times  
 
379 
 
 
 
 
 
 
 
 
 
 
 
 
 
         # with the following arguments:  
 
380 
 
 
 
 
 
 
 
 
 
 
 
 
 
         #   line1, lineX, lineQ  
 
381 
 
 
 
 
 
 
 
 
 
 
 
 
 
         #   line1, lineY, lineQ  
 
382 
 
 
 
 
 
 
 
 
 
 
 
 
 
         #   line2, lineX, lineQ  
 
383 
 
 
 
 
 
 
 
 
 
 
 
 
 
         #   line2, lineY, lineQ  
 
384 
 
 
 
 
 
 
 
 
 
 
 
 
 
         #   line3, lineX, lineQ  
 
385 
 
 
 
 
 
 
 
 
 
 
 
 
 
         #   line3, lineY, lineQ  
 
386 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
387 
 
 
 
 
 
 
 
 
 
 
 
 
 
         NestedLoops ($lines_to_combine,  
 
388 
 
 
 
 
 
 
 
 
 
 
 
 
 
                      sub {  
 
389 
 
 
 
 
 
 
 
 
 
 
 
 
 
                          my @input_lines = @_;  
 
390 
 
 
 
 
 
 
 
 
 
 
 
 
 
                          my @output_line = ();  
 
391 
 
 
 
 
 
 
 
 
 
 
 
 
 
                          my @calculated = ();   # indeces of the yet-to-be-calculated elements  
 
392 
 
 
 
 
 
 
 
 
 
 
 
 
 
                          my $column_count = -1;  
 
393 
 
 
 
 
 
 
 
 
 
 
 
 
 
                          foreach my $col (@$wanted_cols) { # $col defines what data to push into @output_line  
 
394 
 
 
 
 
 
 
 
 
 
 
 
 
 
                              $column_count++;  
 
395 
 
 
 
 
 
 
 
 
 
 
 
 
 
                              if ($col->{CFG_TYPE()} eq CFG_MATCH) {  
 
396 
 
 
 
 
 
 
 
 
 
 
 
 
 
                                  my $input = $inputs->{ $col->{CFG_ID()} };  
 
397 
 
 
 
 
 
 
 
 
 
 
 
 
 
                                  my $input_line = @input_lines[$inputs_to_combine->{ $col->{CFG_ID()} }];  
 
398 
 
 
 
 
 
 
 
 
 
 
 
 
 
                                  # use Data::Dumper;  
 
399 
 
 
 
 
 
 
 
 
 
 
 
 
 
                                  # print Dumper (\@input_lines);  
 
400 
 
 
 
 
 
 
 
 
 
 
 
 
 
                                  # print Dumper ($inputs_to_combine);  
 
401 
 
 
 
 
 
 
 
 
 
 
 
 
 
                                  my $idx = $input->{INPUT_HEADERS()}->{ $col->{CFG_IN_COL()} };  
 
402 
 
 
 
 
 
 
 
 
 
 
 
 
 
                                  my $value = $input_line->[$idx] || '';  
 
403 
 
 
 
 
 
 
 
 
 
 
 
 
 
                                  push (@output_line, $value);  
 
404 
 
 
 
 
 
 
 
 
 
 
 
 
 
                              } else {  
 
405 
 
 
 
 
 
 
 
 
 
 
 
 
 
                                  push (@calculated, $column_count);  
 
406 
 
 
 
 
 
 
 
 
 
 
 
 
 
                                  push (@output_line, '');  
 
407 
 
 
 
 
 
 
 
 
 
 
 
 
 
                              }  
 
408 
 
 
 
 
 
 
 
 
 
 
 
 
 
                          }  
 
409 
 
 
 
 
 
 
 
 
 
 
 
 
 
                          # insert the calculated columns  
 
410 
 
 
 
 
 
 
 
 
 
 
 
 
 
                          foreach my $idx (@calculated) {  
 
411 
 
 
 
 
 
 
 
 
 
 
 
 
 
                              if ($wanted_cols->[$idx]->{CFG_TYPE()} eq CFG_PROG) {  
 
412 
 
 
 
 
 
 
 
 
 
 
 
 
 
                                  $output_line[$idx] = call_prog ($wanted_cols->[$idx], \@header_line, \@output_line);  
 
413 
 
 
 
 
 
 
 
 
 
 
 
 
 
                              } elsif ($wanted_cols->[$idx]->{CFG_TYPE()} eq CFG_PROGS) {  
 
414 
 
 
 
 
 
 
 
 
 
 
 
 
 
                                  $output_line[$idx] = call_prog_simple ($wanted_cols->[$idx]);  
 
415 
 
 
 
 
 
 
 
 
 
 
 
 
 
                              } elsif ($wanted_cols->[$idx]->{CFG_TYPE()} eq CFG_PERL) {  
 
416 
 
 
 
 
 
 
 
 
 
 
 
 
 
                                  $output_line[$idx] = call_perl ($wanted_cols->[$idx], \@header_line, \@output_line);  
 
417 
 
 
 
 
 
 
 
 
 
 
 
 
 
                              }  
 
418 
 
 
 
 
 
 
 
 
 
 
 
 
 
                          }  
 
419 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
420 
 
 
 
 
 
 
 
 
 
 
 
 
 
                          print $combined join ("\t", @output_line) . "\n"  
 
421 
 
 
 
 
 
 
 
 
 
 
 
 
 
                              unless scalar @output_line == 0;  
 
422 
 
 
 
 
 
 
 
 
 
 
 
 
 
                      });  
 
423 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
424 
 
 
 
 
 
 
 
 
 
 
 
 
 
     close $combined if $opt_outfile;  
 
425 
 
 
 
 
 
 
 
 
 
 
 
 
 
 }  
 
426 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
427 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
428 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # Call a Perl subroutine (from any module) in order to get a value for  
 
429 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # a "calculated" column. $column defines which column to fill,  
 
430 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # $header_line is an arra is an arrayref with column headers and the  
 
431 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # $data_line is another arrayref with the values for the current row.  
 
432 
 
 
 
 
 
 
 
 
 
 
 
 
 
 #  
 
433 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # $column->{PERL_DETAILS} contains all details needed for the call  
 
434 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
435 
 
 
 
 
 
 
 
 
 
 
 
 
 
 sub call_perl {  
 
436 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my ($column, $header_line, $data_line) = @_;  
 
437 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
438 
 
 
 
 
 
 
 
 
 
 
 
 
 
     no strict;  ## no critic  
 
439 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $what_to_call = $column->{PERL_DETAILS()}->{what_to_call};  
 
440 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $how_to_call  = $column->{PERL_DETAILS()}->{how_to_call};  
 
441 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $module       = $column->{PERL_DETAILS()}->{module};  
 
442 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $subroutine   = $column->{PERL_DETAILS()}->{subroutine};  
 
443 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
444 
 
 
 
 
 
 
 
 
 
 
 
 
 
     if ($how_to_call eq '->') {  
 
445 
 
 
 
 
 
 
 
 
 
 
 
 
 
         return $module->$subroutine ($column, $header_line, $data_line);  
 
446 
 
 
 
 
 
 
 
 
 
 
 
 
 
     } else {  
 
447 
 
 
 
 
 
 
 
 
 
 
 
 
 
         return &$what_to_call ($column, $header_line, $data_line);  
 
448 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
449 
 
 
 
 
 
 
 
 
 
 
 
 
 
 }  
 
450 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
451 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
452 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # Call an external program in order to get a value for a "calculated"  
 
453 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # column. $column defines which column to fill, $header_line is an  
 
454 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # arra is an arrayref with column headers and the $data_line is  
 
455 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # another arrayref with the values for the current row.  
 
456 
 
 
 
 
 
 
 
 
 
 
 
 
 
 #  
 
457 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # $column->{CFG_EXT} contains a program name to call  
 
458 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
459 
 
 
 
 
 
 
 
 
 
 
 
 
 
 sub call_prog {  
 
460 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my ($column, $header_line, $data_line) = @_;  
 
461 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
462 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # prepare an input file for the external program  
 
463 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $tmp = File::Temp->new();  
 
464 
 
 
 
 
 
 
 
 
 
 
 
 
 
     for (my $i = 0; $i < @$header_line;$i++) {  
 
465 
 
 
 
 
 
 
 
 
 
 
 
 
 
         print $tmp $header_line->[$i] . "\t" . $data_line->[$i] . "\n";  
 
466 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
467 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
468 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # call it  
 
469 
 
 
 
 
 
 
 
 
 
 
 
 
 
     return _call_it ($column->{CFG_EXT()}, $tmp);  
 
470 
 
 
 
 
 
 
 
 
 
 
 
 
 
 }  
 
471 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
472 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
473 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # Call an external program (without any command-line arguments) in  
 
474 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # order to get a value for a "calculated" column.  
 
475 
 
 
 
 
 
 
 
 
 
 
 
 
 
 #  
 
476 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # $column->{CFG_EXT} contains a program name to call  
 
477 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
478 
 
 
 
 
 
 
 
 
 
 
 
 
 
 sub call_prog_simple {  
 
479 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my ($column) = @_;  
 
480 
 
 
 
 
 
 
 
 
 
 
 
 
 
     return _call_it ($column->{CFG_EXT()});  
 
481 
 
 
 
 
 
 
 
 
 
 
 
 
 
 }  
 
482 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
483 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
484 
 
 
 
 
 
 
 
 
 
 
 
 
 
 #  
 
485 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
486 
 
 
 
 
 
 
 
 
 
 
 
 
 
 sub _call_it {  
 
487 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my @command = @_;  
 
488 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my ($stdout, $stderr, $success, $exit_code) = capture_exec (@command);  
 
489 
 
 
 
 
 
 
 
 
 
 
 
 
 
     if ($exit_code != 0 or $stderr) {  
 
490 
 
 
 
 
 
 
 
 
 
 
 
 
 
         my $errmsg = '[ER05] Failed command: ' . join (' ', map {"'$_'"} @command) . "\n";  
 
491 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $errmsg .= "STDERR: $stderr\n" if $stderr;  
 
492 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $errmsg .= "EXIT CODE: $exit_code\n";  
 
493 
 
 
 
 
 
 
 
 
 
 
 
 
 
         die $errmsg;  
 
494 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
495 
 
 
 
 
 
 
 
 
 
 
 
 
 
     chomp $stdout;         # remove the last newline  
 
496 
 
 
 
 
 
 
 
 
 
 
 
 
 
     $stdout =~ s{\n}{ }g;  # better to replace newlines  
 
497 
 
 
 
 
 
 
 
 
 
 
 
 
 
     return $stdout;  
 
498 
 
 
 
 
 
 
 
 
 
 
 
 
 
 }  
 
499 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
500 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
501 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # Locate given $prgname and return it, usually with an added path. Or  
 
502 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # die if such program cannot be found or it is not executable.  
 
503 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
504 
 
 
 
 
 
 
 
 
 
 
 
 
 
 sub find_prog {  
 
505 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $prgname = shift;  
 
506 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $full_name;  
 
507 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
508 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # 1) try the name as it is (e.g. the ones with an absolute path)  
 
509 
 
 
 
 
 
 
 
 
 
 
 
 
 
     if (-e $prgname and -x $prgname and  
 
510 
 
 
 
 
 
 
 
 
 
 
 
 
 
         File::Spec->file_name_is_absolute ($prgname)) {  
 
511 
 
 
 
 
 
 
 
 
 
 
 
 
 
         return $prgname;  
 
512 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
513 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
514 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # 2) try to find it on system PATH  
 
515 
 
 
 
 
 
 
 
 
 
 
 
 
 
     $full_name = which ($prgname);  
 
516 
 
 
 
 
 
 
 
 
 
 
 
 
 
     if ($full_name ) {  
 
517 
 
 
 
 
 
 
 
 
 
 
 
 
 
         chomp $full_name;  
 
518 
 
 
 
 
 
 
 
 
 
 
 
 
 
         return $full_name;  
 
519 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
520 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
521 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # 3) try the environment variable with a path  
 
522 
 
 
 
 
 
 
 
 
 
 
 
 
 
     if (exists $ENV{COMBINE_SHEETS_EXT_PATH}) {  
 
523 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $full_name = File::Spec->catfile ($ENV{COMBINE_SHEETS_EXT_PATH}, $prgname);  
 
524 
 
 
 
 
 
 
 
 
 
 
 
 
 
         return maybe_die ($full_name);  
 
525 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
526 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
527 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # 4) try to find it in the current directory  
 
528 
 
 
 
 
 
 
 
 
 
 
 
 
 
     $full_name = File::Spec->catfile ('./', $prgname);  
 
529 
 
 
 
 
 
 
 
 
 
 
 
 
 
     return maybe_die ($full_name);  
 
530 
 
 
 
 
 
 
 
 
 
 
 
 
 
 }  
 
531 
 
 
 
 
 
 
 
 
 
 
 
 
 
 sub maybe_die {  
 
532 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $prg = shift;  
 
533 
 
 
 
 
 
 
 
 
 
 
 
 
 
     die "[ER04] '$prg' not found or is not executable.\n"  
 
534 
 
 
 
 
 
 
 
 
 
 
 
 
 
         unless -e $prg and -x $prg;  
 
535 
 
 
 
 
 
 
 
 
 
 
 
 
 
     return $prg;  
 
536 
 
 
 
 
 
 
 
 
 
 
 
 
 
 }  
 
537 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
538 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
539 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # Does the requested $column exist in the given input's headers?  
 
540 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
541 
 
 
 
 
 
 
 
 
 
 
 
 
 
 sub column_exists {  
 
542 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my ($input_id, $column) = @_;  
 
543 
 
 
 
 
 
 
 
 
 
 
 
 
 
     return exists $inputs->{$input_id}->{INPUT_HEADERS()}->{$column};  
 
544 
 
 
 
 
 
 
 
 
 
 
 
 
 
 }  
 
545 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
546 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
547 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # Read the headers (the first line) form an input file (given in  
 
548 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # hashref $input) and store them in the hashref $headers, each od them  
 
549 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # with its index as it appears in the read file. Do nothing if  
 
550 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # $headers already contains headers from the same input identifier.  
 
551 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
552 
 
 
 
 
 
 
 
 
 
 
 
 
 
 sub read_headers {  
 
553 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my ($input) = @_;  
 
554 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
555 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $headers;  
 
556 
 
 
 
 
 
 
 
 
 
 
 
 
 
     if ($input->{INPUT_TYPE()} eq TYPE_CSV) {  
 
557 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $headers = read_csv_headers ($input->{INPUT_FILE()});  
 
558 
 
 
 
 
 
 
 
 
 
 
 
 
 
     } else {  
 
559 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $headers = read_tsv_headers ($input->{INPUT_FILE()});  
 
560 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
561 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $new_headers = {};  
 
562 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $column_index = 0;  
 
563 
 
 
 
 
 
 
 
 
 
 
 
 
 
     foreach my $column (@$headers) {  
 
564 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $new_headers->{$column} = $column_index++;  
 
565 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
566 
 
 
 
 
 
 
 
 
 
 
 
 
 
     return $new_headers;  
 
567 
 
 
 
 
 
 
 
 
 
 
 
 
 
 }  
 
568 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
569 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
570 
 
 
 
 
 
 
 
 
 
 
 
 
 
 #  
 
571 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
572 
 
 
 
 
 
 
 
 
 
 
 
 
 
 sub read_csv_headers {  
 
573 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my ($file) = @_;  
 
574 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $line = read_first_line ($file);  
 
575 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
576 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $parser = Text::CSV_XS->new ({  
 
577 
 
 
 
 
 
 
 
 
 
 
 
 
 
         allow_loose_quotes => 1,  
 
578 
 
 
 
 
 
 
 
 
 
 
 
 
 
         escape_char        => "\\",  
 
579 
 
 
 
 
 
 
 
 
 
 
 
 
 
                                     });  
 
580 
 
 
 
 
 
 
 
 
 
 
 
 
 
     if ($parser->parse ($line)) {  
 
581 
 
 
 
 
 
 
 
 
 
 
 
 
 
         return [ $parser->fields ];  
 
582 
 
 
 
 
 
 
 
 
 
 
 
 
 
     } else {  
 
583 
 
 
 
 
 
 
 
 
 
 
 
 
 
         die "[ER04] Parsing CSV file $file failed: " .  
 
584 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $parser->error_input  . "\n" .  
 
585 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $parser->error_diag() . "\n";  
 
586 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
587 
 
 
 
 
 
 
 
 
 
 
 
 
 
 }  
 
588 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
589 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
590 
 
 
 
 
 
 
 
 
 
 
 
 
 
 #  
 
591 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
592 
 
 
 
 
 
 
 
 
 
 
 
 
 
 sub read_tsv_headers {  
 
593 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my ($file) = @_;  
 
594 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $line = read_first_line ($file);  
 
595 
 
 
 
 
 
 
 
 
 
 
 
 
 
     return [ split (m{\t}, $line) ];  
 
596 
 
 
 
 
 
 
 
 
 
 
 
 
 
 }  
 
597 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
598 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
599 
 
 
 
 
 
 
 
 
 
 
 
 
 
 #  
 
600 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
601 
 
 
 
 
 
 
 
 
 
 
 
 
 
 sub read_first_line {  
 
602 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my ($file) = @_;  
 
603 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $fh;  
 
604 
 
 
 
 
 
 
 
 
 
 
 
 
 
     open_bom ($fh, $file); # or open ($fh, '<', $file)  
 
605 
 
 
 
 
 
 
 
 
 
 
 
 
 
         # or die "[ER00] Cannot read input file $file: $!\n";  
 
606 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $line = <$fh>;          # read just one line  
 
607 
 
 
 
 
 
 
 
 
 
 
 
 
 
     close $fh;  
 
608 
 
 
 
 
 
 
 
 
 
 
 
 
 
     $line =~ s{(\r|\n)+$}{};   # remove newlines of any kind  
 
609 
 
 
 
 
 
 
 
 
 
 
 
 
 
     return $line;  
 
610 
 
 
 
 
 
 
 
 
 
 
 
 
 
 }  
 
611 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
612 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
613 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # Stringify a hashref  
 
614 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
615 
 
 
 
 
 
 
 
 
 
 
 
 
 
 sub ph {  
 
616 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $hashref = shift;  
 
617 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $result = '';  
 
618 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my ($key, $value);  
 
619 
 
 
 
 
 
 
 
 
 
 
 
 
 
     while (($key, $value) = each (%$hashref)) {  
 
620 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $result .= "$key => $value,";  
 
621 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
622 
 
 
 
 
 
 
 
 
 
 
 
 
 
     return substr ($result, 0, -1);  
 
623 
 
 
 
 
 
 
 
 
 
 
 
 
 
 }  
 
624 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
625 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
626 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # Read contents...  
 
627 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
628 
 
 
 
 
 
 
 
 
 
 
 
 
 
 sub read_content {  
 
629 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my ($input) = @_;  
 
630 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $content;  
 
631 
 
 
 
 
 
 
 
 
 
 
 
 
 
     if ($input->{INPUT_TYPE()} eq TYPE_CSV) {  
 
632 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $content = read_csv_content ($input->{INPUT_FILE()}, $input->{INPUT_MATCHED_BY_INDEX()});  
 
633 
 
 
 
 
 
 
 
 
 
 
 
 
 
     } else {  
 
634 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $content = read_tsv_content ($input->{INPUT_FILE()}, $input->{INPUT_MATCHED_BY_INDEX()});  
 
635 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
636 
 
 
 
 
 
 
 
 
 
 
 
 
 
     return $content;  
 
637 
 
 
 
 
 
 
 
 
 
 
 
 
 
 }  
 
638 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
639 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
640 
 
 
 
 
 
 
 
 
 
 
 
 
 
 #  
 
641 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
642 
 
 
 
 
 
 
 
 
 
 
 
 
 
 sub read_tsv_content {  
 
643 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my ($file, $matched_index) = @_;  
 
644 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $fh;  
 
645 
 
 
 
 
 
 
 
 
 
 
 
 
 
     open_bom ($fh, $file); # or open ($fh, '<', $file)  
 
646 
 
 
 
 
 
 
 
 
 
 
 
 
 
         # or die "[ER00] Cannot read input file $file: $!\n";  
 
647 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $content = {};  
 
648 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $line_count = 0;  
 
649 
 
 
 
 
 
 
 
 
 
 
 
 
 
     while (my $line = <$fh>) {  
 
650 
 
 
 
 
 
 
 
 
 
 
 
 
 
         next if $line_count++ == 0;  # skip header line  
 
651 
 
 
 
 
 
 
 
 
 
 
 
 
 
         next if $line =~ m{^\s*$};   # ignore empty lines  
 
652 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $line =~ s{(\r|\n)+$}{};     # remove newlines of any kind  
 
653 
 
 
 
 
 
 
 
 
 
 
 
 
 
         my @data = split (m{\t}, $line);  
 
654 
 
 
 
 
 
 
 
 
 
 
 
 
 
         $content->{ $data[$matched_index] } = [] unless $content->{ $data[$matched_index] };  
 
655 
 
 
 
 
 
 
 
 
 
 
 
 
 
         push (@{ $content->{ $data[$matched_index] } }, [@data]);  
 
656 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
657 
 
 
 
 
 
 
 
 
 
 
 
 
 
     close $fh;  
 
658 
 
 
 
 
 
 
 
 
 
 
 
 
 
     return $content;  
 
659 
 
 
 
 
 
 
 
 
 
 
 
 
 
 }  
 
660 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
661 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
662 
 
 
 
 
 
 
 
 
 
 
 
 
 
 #  
 
663 
 
 
 
 
 
 
 
 
 
 
 
 
 
 # ----------------------------------------------------------------  
 
664 
 
 
 
 
 
 
 
 
 
 
 
 
 
 sub read_csv_content {  
 
665 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my ($file, $matched_index) = @_;  
 
666 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $count_lines = 0;  
 
667 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $content = {};  
 
668 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
669 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # create a CSV parser; any error in reading input will be fatal  
 
670 
 
 
 
 
 
 
 
 
 
 
 
 
 
     my $csv = Text::CSV_XS->new ({  
 
671 
 
 
 
 
 
 
 
 
 
 
 
 
 
         allow_loose_quotes => 1,  
 
672 
 
 
 
 
 
 
 
 
 
 
 
 
 
         escape_char        => "\\",  
 
673 
 
 
 
 
 
 
 
 
 
 
 
 
 
         auto_diag          => 1,  
 
674 
 
 
 
 
 
 
 
 
 
 
 
 
 
                                  });  
 
675 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
676 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # read the CSV input  
 
677 
 
 
 
 
 
 
 
 
 
 
 
 
 
     open_bom (my $fh, $file);  
 
678 
 
 
 
 
 
 
 
 
 
 
 
 
 
     while (<$fh>) {  
 
679 
 
 
 
 
 
 
 
 
 
 
 
 
 
         if ($csv->parse ($_)) {  
 
680 
 
 
 
 
 
 
 
 
 
 
 
 
 
             next if $count_lines++ == 0;   # headers are ignored  
 
681 
 
 
 
 
 
 
 
 
 
 
 
 
 
             my @data = $csv->fields;  
 
682 
 
 
 
 
 
 
 
 
 
 
 
 
 
             if (@data) {  
 
683 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 push (@{ $content->{ $data[$matched_index] } }, \@data);  
 
684 
 
 
 
 
 
 
 
 
 
 
 
 
 
             }  
 
685 
 
 
 
 
 
 
 
 
 
 
 
 
 
         } else {  
 
686 
 
 
 
 
 
 
 
 
 
 
 
 
 
             my $err = $csv->error_input;  
 
687 
 
 
 
 
 
 
 
 
 
 
 
 
 
             warn "[WR09] Possible a wrong or not-readable input file '$file': $err\n";  
 
688 
 
 
 
 
 
 
 
 
 
 
 
 
 
             exit (1);  
 
689 
 
 
 
 
 
 
 
 
 
 
 
 
 
         }  
 
690 
 
 
 
 
 
 
 
 
 
 
 
 
 
     }  
 
691 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
692 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # $parser->add_trigger (after_parse => sub {  
 
693 
 
 
 
 
 
 
 
 
 
 
 
 
 
     #     my ($self, $data) = @_;  
 
694 
 
 
 
 
 
 
 
 
 
 
 
 
 
     #     return if $count_lines++ == 0;   # headers are ignored  
 
695 
 
 
 
 
 
 
 
 
 
 
 
 
 
     #     $content->{ $data->[$matched_index] } = [] unless $content->{ $data->[$matched_index] };  
 
696 
 
 
 
 
 
 
 
 
 
 
 
 
 
     #     push (@{ $content->{ $data->[$matched_index] } }, $data);  
 
697 
 
 
 
 
 
 
 
 
 
 
 
 
 
     #                       });  
 
698 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # read CSV input (the result is not used here; everything is done in triggers)  
 
699 
 
 
 
 
 
 
 
 
 
 
 
 
 
     # $parser->read_file ($file);  
 
700 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
701 
 
 
 
 
 
 
 
 
 
 
 
 
 
     return $content;  
 
702 
 
 
 
 
 
 
 
 
 
 
 
 
 
 }  
 
703 
 
 
 
 
 
 
 
 
 
 
 
 
 
 1;  
 
704 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
705 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
706 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
707 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =pod  
 
708 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
709 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =head1 NAME  
 
710 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
711 
 
 
 
 
 
 
 
 
 
 
 
 
 
 App::combinesheets - command-line tool merging CSV and TSV spreadsheets  
 
712 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
713 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =head1 VERSION  
 
714 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
715 
 
 
 
 
 
 
 
 
 
 
 
 
 
 version 0.2.14  
 
716 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
717 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =head1 SYNOPSIS  
 
718 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
719 
 
 
 
 
 
 
 
 
 
 
 
 
 
    combinesheets -h  
 
720 
 
 
 
 
 
 
 
 
 
 
 
 
 
    combinesheets -help  
 
721 
 
 
 
 
 
 
 
 
 
 
 
 
 
    combinesheets -man  
 
722 
 
 
 
 
 
 
 
 
 
 
 
 
 
    combinesheets -version  
 
723 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
724 
 
 
 
 
 
 
 
 
 
 
 
 
 
    combinesheets -config  -inputs  [] [-outfile ]      
 
725 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
726 
 
 
 
 
 
 
 
 
 
 
 
 
 
       where  has the form: = [=...]       
 
727 
 
 
 
 
 
 
 
 
 
 
 
 
 
       where  are: -check   
 
728 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
729 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =head1 DESCRIPTION  
 
730 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
731 
 
 
 
 
 
 
 
 
 
 
 
 
 
 B is a command-line tool merging together two or more   
 
732 
 
 
 
 
 
 
 
 
 
 
 
 
 
 spreadsheets. The spreadsheets can be COMMA-separated or TAB-separated  
 
733 
 
 
 
 
 
 
 
 
 
 
 
 
 
 files, each of them having the first line with column headers. Data in  
 
734 
 
 
 
 
 
 
 
 
 
 
 
 
 
 one of the column (it can be a different column in each input  
 
735 
 
 
 
 
 
 
 
 
 
 
 
 
 
 spreadsheet) serve to match lines. For example, having two spreadsheets,  
 
736 
 
 
 
 
 
 
 
 
 
 
 
 
 
 PERSON and CAR, with the following contents:  
 
737 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
738 
 
 
 
 
 
 
 
 
 
 
 
 
 
    persons.tsv:  
 
739 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
740 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Surname      First name  Sex  Age  Nickname  
 
741 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Novak        Jan         M    52   Honza  
 
742 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Gudernova    Jitka       F    56  
 
743 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Senger       Martin      M    61   Tulak  
 
744 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
745 
 
 
 
 
 
 
 
 
 
 
 
 
 
    cars.tsv:  
 
746 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
747 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Model  Year  Owned by  
 
748 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Praga  1936  Someone else  
 
749 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Mini   1968  Gudernova  
 
750 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Skoda  2002  Senger  
 
751 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
752 
 
 
 
 
 
 
 
 
 
 
 
 
 
 we want to merge these spreadsheet by C in persons.tsv and by   
 
753 
 
 
 
 
 
 
 
 
 
 
 
 
 
 C in cars.tsv. There are two possible results, depending   
 
754 
 
 
 
 
 
 
 
 
 
 
 
 
 
 which spreadsheet is used as the first one (a primary one). If the  
 
755 
 
 
 
 
 
 
 
 
 
 
 
 
 
 persons.tsv is the first, the result will be (which columns are  
 
756 
 
 
 
 
 
 
 
 
 
 
 
 
 
 included in the result will be described later in this document):  
 
757 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
758 
 
 
 
 
 
 
 
 
 
 
 
 
 
    combinesheets -cfg config.cfg -in PERSON=persons.tsv CAR=cars.csv  
 
759 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
760 
 
 
 
 
 
 
 
 
 
 
 
 
 
    First name  Surname    Model  Sex  Nickname  Age  Year  Owned by  
 
761 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Jitka       Gudernova  Mini   F              56   1968  Gudernova  
 
762 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Jan         Novak             M    Honza     52  
 
763 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Martin      Senger     Skoda  M    Tulak     61   2002  Senger  
 
764 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
765 
 
 
 
 
 
 
 
 
 
 
 
 
 
 Or, if the cars.tsv is the first, the result will be:  
 
766 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
767 
 
 
 
 
 
 
 
 
 
 
 
 
 
    combinesheets -cfg config.cfg -in CAR=cars.csv PERSON=persons.tsv  
 
768 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
769 
 
 
 
 
 
 
 
 
 
 
 
 
 
    First name  Surname    Model  Sex  Nickname  Age  Year  Owned by  
 
770 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Jitka       Gudernova  Mini   F              56   1968  Gudernova  
 
771 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Martin      Senger     Skoda  M    Tulak     61   2002  Senger  
 
772 
 
 
 
 
 
 
 
 
 
 
 
 
 
                           Praga                      1936  Someone else  
 
773 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
774 
 
 
 
 
 
 
 
 
 
 
 
 
 
 Of course, if both input spreadsheets have only the matching lines,  
 
775 
 
 
 
 
 
 
 
 
 
 
 
 
 
 both results will be the same (it will not matter which one of them is  
 
776 
 
 
 
 
 
 
 
 
 
 
 
 
 
 considered the primary one).  
 
777 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
778 
 
 
 
 
 
 
 
 
 
 
 
 
 
 The rows in the resulting spreadsheet are sorted by values in the  
 
779 
 
 
 
 
 
 
 
 
 
 
 
 
 
 column that was used as a matching column in the primary input.  
 
780 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
781 
 
 
 
 
 
 
 
 
 
 
 
 
 
 The information which columns should be used to match the input  
 
782 
 
 
 
 
 
 
 
 
 
 
 
 
 
 spreadsheets and which columns should appear in the resulting  
 
783 
 
 
 
 
 
 
 
 
 
 
 
 
 
 spreadsheet is read from a configuration file (see the C<-config>  
 
784 
 
 
 
 
 
 
 
 
 
 
 
 
 
  - or C<-cfg> - argument).  
 
785 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
786 
 
 
 
 
 
 
 
 
 
 
 
 
 
 The command-line arguments and options can be specified with single or  
 
787 
 
 
 
 
 
 
 
 
 
 
 
 
 
 double dash. Most of them can be abbreviated to the nearest non-biased  
 
788 
 
 
 
 
 
 
 
 
 
 
 
 
 
 length. They are case-sensitive.  
 
789 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
790 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =head2 Duplicated values in the matching columns  
 
791 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
792 
 
 
 
 
 
 
 
 
 
 
 
 
 
 If there are repeated (the same) values in the column that serves as  
 
793 
 
 
 
 
 
 
 
 
 
 
 
 
 
 matching criterion then the resulting spreadsheet will have as many  
 
794 
 
 
 
 
 
 
 
 
 
 
 
 
 
 output lines (for a particular matching value) as is the number of all  
 
795 
 
 
 
 
 
 
 
 
 
 
 
 
 
 combinations of the lines with that matching values in all input  
 
796 
 
 
 
 
 
 
 
 
 
 
 
 
 
 spreadsheets. For example, let's have C and C,    
 
797 
 
 
 
 
 
 
 
 
 
 
 
 
 
 assuming that a book can have more authors and any author can  
 
798 
 
 
 
 
 
 
 
 
 
 
 
 
 
 contribute to any number of books:  
 
799 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
800 
 
 
 
 
 
 
 
 
 
 
 
 
 
    books.tsv:  
 
801 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Title   Note    Author  
 
802 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Book 1  from B1-a       Kim  
 
803 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Book 2  from B2-b       Kim  
 
804 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Book 3  from B3-c       Katrin  
 
805 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Book 1  from B1-d       Blanka  
 
806 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Book 2  from B2-e       Katrin  
 
807 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
808 
 
 
 
 
 
 
 
 
 
 
 
 
 
    authors.tsv:  
 
809 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Age     Name  
 
810 
 
 
 
 
 
 
 
 
 
 
 
 
 
    28      Kim  
 
811 
 
 
 
 
 
 
 
 
 
 
 
 
 
    20      Katrin  
 
812 
 
 
 
 
 
 
 
 
 
 
 
 
 
    30      Blanka  
 
813 
 
 
 
 
 
 
 
 
 
 
 
 
 
    50      Lazy author  
 
814 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
815 
 
 
 
 
 
 
 
 
 
 
 
 
 
 The output (again, depending on which input is considered a primary  
 
816 
 
 
 
 
 
 
 
 
 
 
 
 
 
 input) will be (a list of included column is defined in the  
 
817 
 
 
 
 
 
 
 
 
 
 
 
 
 
 configuration file - see later):  
 
818 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
819 
 
 
 
 
 
 
 
 
 
 
 
 
 
    combinesheets -cfg books_to_authors.cfg -in BOOK=books.tsv AUTHOR=authors.tsv  
 
820 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
821 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Name    Title   Age Note  
 
822 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Blanka  Book 1  30  from B1-d  
 
823 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Katrin  Book 3  20  from B3-c  
 
824 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Katrin  Book 2  20  from B2-e  
 
825 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Kim     Book 1  28  from B1-a  
 
826 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Kim     Book 2  28  from B2-b  
 
827 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
828 
 
 
 
 
 
 
 
 
 
 
 
 
 
    combinesheets -cfg books_to_authors.cfg -in AUTHOR=authors.tsv BOOK=books.tsv  
 
829 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
830 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Name        Title   Age  Note  
 
831 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Blanka      Book 1  30   from B1-d  
 
832 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Katrin      Book 3  20   from B3-c  
 
833 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Katrin      Book 2  20   from B2-e  
 
834 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Kim         Book 1  28   from B1-a  
 
835 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Kim         Book 2  28   from B2-b  
 
836 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Lazy author         50  
 
837 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
838 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =head1 ADVANCED USAGE  
 
839 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
840 
 
 
 
 
 
 
 
 
 
 
 
 
 
 Additionally to the merging columns from one or more spreadsheets,  
 
841 
 
 
 
 
 
 
 
 
 
 
 
 
 
 this script can also add completely new columns to the resulting  
 
842 
 
 
 
 
 
 
 
 
 
 
 
 
 
 spreadsheet, the columns that do not exist in any of the input  
 
843 
 
 
 
 
 
 
 
 
 
 
 
 
 
 spreadsheet. Such columns are called C.   
 
844 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
845 
 
 
 
 
 
 
 
 
 
 
 
 
 
 Each C is created either by an external,   
 
846 
 
 
 
 
 
 
 
 
 
 
 
 
 
 command-line driven, program, or by a Perl subroutine. In both cases,  
 
847 
 
 
 
 
 
 
 
 
 
 
 
 
 
 the user must create (write) such external program or such Perl  
 
848 
 
 
 
 
 
 
 
 
 
 
 
 
 
 subroutine. Therefore, this usage is meant more for developers than  
 
849 
 
 
 
 
 
 
 
 
 
 
 
 
 
 for the end users.  
 
850 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
851 
 
 
 
 
 
 
 
 
 
 
 
 
 
 Note that this advanced feature is meant only for new columns, not for  
 
852 
 
 
 
 
 
 
 
 
 
 
 
 
 
 new rows. Therefore, it cannot be used, for example, to create rows  
 
853 
 
 
 
 
 
 
 
 
 
 
 
 
 
 with totals of columns.  
 
854 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
855 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =head2 Calculated columns by external programs  
 
856 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
857 
 
 
 
 
 
 
 
 
 
 
 
 
 
 If specified, an external program is invoked for each row. It can be  
 
858 
 
 
 
 
 
 
 
 
 
 
 
 
 
 specified either by a keyword B or by a keyword B - see    
 
859 
 
 
 
 
 
 
 
 
 
 
 
 
 
 syntax in the I section. In both cases, the   
 
860 
 
 
 
 
 
 
 
 
 
 
 
 
 
 value of the standard output of these programs become the value of the  
 
861 
 
 
 
 
 
 
 
 
 
 
 
 
 
 calculated column (a trailing newline of this standard output is  
 
862 
 
 
 
 
 
 
 
 
 
 
 
 
 
 removed and other newlines are replaced by spaces).  
 
863 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
864 
 
 
 
 
 
 
 
 
 
 
 
 
 
 A program defined by the B is called without any arguments   
 
865 
 
 
 
 
 
 
 
 
 
 
 
 
 
 (C in I stands for a I). That's why it does not have     
 
866 
 
 
 
 
 
 
 
 
 
 
 
 
 
 any knowledge for which row it has been invoked. Its usage is,  
 
867 
 
 
 
 
 
 
 
 
 
 
 
 
 
 therefore, for column values that are not dependent on other values  
 
868 
 
 
 
 
 
 
 
 
 
 
 
 
 
 from the spreadsheet. For example, for the C shown above,   
 
869 
 
 
 
 
 
 
 
 
 
 
 
 
 
 you can add a column C by calling a UNIX program C    
 
870 
 
 
 
 
 
 
 
 
 
 
 
 
 
 - again, see an example the I   
 
871 
 
 
 
 
 
 
 
 
 
 
 
 
 
 section.  
 
872 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
873 
 
 
 
 
 
 
 
 
 
 
 
 
 
 A program defined by the B is called with one argument which is   
 
874 
 
 
 
 
 
 
 
 
 
 
 
 
 
 a filename. This file contains the current row; each of its lines has  
 
875 
 
 
 
 
 
 
 
 
 
 
 
 
 
 two, TAB-separated, fields. The first field is the column name and the  
 
876 
 
 
 
 
 
 
 
 
 
 
 
 
 
 second field is the column value. For example, when processing the  
 
877 
 
 
 
 
 
 
 
 
 
 
 
 
 
 last row of the C given above, the file will have the   
 
878 
 
 
 
 
 
 
 
 
 
 
 
 
 
 following content:  
 
879 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
880 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Model       Skoda  
 
881 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Year        2002  
 
882 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Owned by    Senger  
 
883 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
884 
 
 
 
 
 
 
 
 
 
 
 
 
 
 The files are only temporary and will be removed when  
 
885 
 
 
 
 
 
 
 
 
 
 
 
 
 
 C finishes.   
 
886 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
887 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =head2 Calculated columns by a Perl subroutine  
 
888 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
889 
 
 
 
 
 
 
 
 
 
 
 
 
 
 If specified by the keyword B, a Perl subroutine is called for   
 
890 
 
 
 
 
 
 
 
 
 
 
 
 
 
 each row with the three arguments:  
 
891 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
892 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =over  
 
893 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
894 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item 1  
 
895 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
896 
 
 
 
 
 
 
 
 
 
 
 
 
 
 A hashref with information about the current column. Not often used  
 
897 
 
 
 
 
 
 
 
 
 
 
 
 
 
 but may be handy if the same subroutine deals with more columns and,  
 
898 
 
 
 
 
 
 
 
 
 
 
 
 
 
 therefore, needs to know for which column it was invoked. See the  
 
899 
 
 
 
 
 
 
 
 
 
 
 
 
 
 I example in the I section.    
 
900 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
901 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item 2  
 
902 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
903 
 
 
 
 
 
 
 
 
 
 
 
 
 
 An arrayref with all column names.  
 
904 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
905 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item 3  
 
906 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
907 
 
 
 
 
 
 
 
 
 
 
 
 
 
 An arrayref with all column values - in the same order as the column  
 
908 
 
 
 
 
 
 
 
 
 
 
 
 
 
 names.  
 
909 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
910 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =back  
 
911 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
912 
 
 
 
 
 
 
 
 
 
 
 
 
 
 Actually, depending how the subroutine is defined in the  
 
913 
 
 
 
 
 
 
 
 
 
 
 
 
 
 configuration, it may get as the first argument the module/class name  
 
914 
 
 
 
 
 
 
 
 
 
 
 
 
 
 where it belongs to. If you define it like this:  
 
915 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
916 
 
 
 
 
 
 
 
 
 
 
 
 
 
    PERL   Module::Example::test  
 
917 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
918 
 
 
 
 
 
 
 
 
 
 
 
 
 
 the C subroutine is called, indeed, with the three arguments as   
 
919 
 
 
 
 
 
 
 
 
 
 
 
 
 
 described above. However, if your definition is rather:  
 
920 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
921 
 
 
 
 
 
 
 
 
 
 
 
 
 
    PERL   Module::Example->test  
 
922 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
923 
 
 
 
 
 
 
 
 
 
 
 
 
 
 then the C subroutine is considered a Perl method and its first   
 
924 
 
 
 
 
 
 
 
 
 
 
 
 
 
 argument is the module/class name. It is up to you to decide how you  
 
925 
 
 
 
 
 
 
 
 
 
 
 
 
 
 want/need to write your functions. Again, an example is available in  
 
926 
 
 
 
 
 
 
 
 
 
 
 
 
 
 the I section.   
 
927 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
928 
 
 
 
 
 
 
 
 
 
 
 
 
 
 The return value of the subroutine will become a new value in the  
 
929 
 
 
 
 
 
 
 
 
 
 
 
 
 
 calculated column. Do not return undef but rather an empty string if  
 
930 
 
 
 
 
 
 
 
 
 
 
 
 
 
 the value cannot be created.  
 
931 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
932 
 
 
 
 
 
 
 
 
 
 
 
 
 
 What is an advantage of writing my own module/package if I can simply  
 
933 
 
 
 
 
 
 
 
 
 
 
 
 
 
 write an external program (perhaps also in Perl) doing exactly the  
 
934 
 
 
 
 
 
 
 
 
 
 
 
 
 
 same? The Perl module stays in the memory for the whole time of  
 
935 
 
 
 
 
 
 
 
 
 
 
 
 
 
 processing all input rows and, therefore, you can re-use some  
 
936 
 
 
 
 
 
 
 
 
 
 
 
 
 
 calculations done for the previous rows. An example about it  
 
937 
 
 
 
 
 
 
 
 
 
 
 
 
 
 (C) is given in the I    
 
938 
 
 
 
 
 
 
 
 
 
 
 
 
 
 section.  
 
939 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
940 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =head1 ARGUMENTS and OPTIONS  
 
941 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
942 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =over 4  
 
943 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
944 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item B<-config >   
 
945 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
946 
 
 
 
 
 
 
 
 
 
 
 
 
 
 A filename with a configuration file. This is a mandatory  
 
947 
 
 
 
 
 
 
 
 
 
 
 
 
 
 parameter. The configuration file describes:  
 
948 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
949 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =over  
 
950 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
951 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item *  
 
952 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
953 
 
 
 
 
 
 
 
 
 
 
 
 
 
 which columns in individual input spreadsheets should be  
 
954 
 
 
 
 
 
 
 
 
 
 
 
 
 
 included in the resulting spreadsheet,  
 
955 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
956 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item *  
 
957 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
958 
 
 
 
 
 
 
 
 
 
 
 
 
 
 what names should be given to the resulting columns  
 
959 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
960 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item *  
 
961 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
962 
 
 
 
 
 
 
 
 
 
 
 
 
 
 in which order should be the columns in the resulting  
 
963 
 
 
 
 
 
 
 
 
 
 
 
 
 
 spreadsheet  
 
964 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
965 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item *  
 
966 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
967 
 
 
 
 
 
 
 
 
 
 
 
 
 
 which columns should be used to match individual lines,  
 
968 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
969 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =back  
 
970 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
971 
 
 
 
 
 
 
 
 
 
 
 
 
 
 The configuration file is a TAB-separated file (with no header  
 
972 
 
 
 
 
 
 
 
 
 
 
 
 
 
 line). Empty lines and lines starting with a "#" character are  
 
973 
 
 
 
 
 
 
 
 
 
 
 
 
 
 ignored. Each line has two columns, in some cases there is an optional  
 
974 
 
 
 
 
 
 
 
 
 
 
 
 
 
 third column. Here is a configuration file used in the example above:  
 
975 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
976 
 
 
 
 
 
 
 
 
 
 
 
 
 
    # Columns to match records from individual inputs  
 
977 
 
 
 
 
 
 
 
 
 
 
 
 
 
    MATCH   PERSON=Surname  
 
978 
 
 
 
 
 
 
 
 
 
 
 
 
 
    MATCH   CAR=Owned by  
 
979 
 
 
 
 
 
 
 
 
 
 
 
 
 
    MATCH   CHILD=Parent  
 
980 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
981 
 
 
 
 
 
 
 
 
 
 
 
 
 
    # Columns - how they be in rows  
 
982 
 
 
 
 
 
 
 
 
 
 
 
 
 
    PERSON  First name  
 
983 
 
 
 
 
 
 
 
 
 
 
 
 
 
    PERSON  Surname  
 
984 
 
 
 
 
 
 
 
 
 
 
 
 
 
    CAR     Model  
 
985 
 
 
 
 
 
 
 
 
 
 
 
 
 
    PERSON  Sex  
 
986 
 
 
 
 
 
 
 
 
 
 
 
 
 
    CHILD   Name  
 
987 
 
 
 
 
 
 
 
 
 
 
 
 
 
    CHILD   Born  
 
988 
 
 
 
 
 
 
 
 
 
 
 
 
 
    PERSON  Nickname  
 
989 
 
 
 
 
 
 
 
 
 
 
 
 
 
    PERSON  Age  
 
990 
 
 
 
 
 
 
 
 
 
 
 
 
 
    CAR     Year  
 
991 
 
 
 
 
 
 
 
 
 
 
 
 
 
    CAR     Owned by  
 
992 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
993 
 
 
 
 
 
 
 
 
 
 
 
 
 
 The first column is either a reserved word C, or an identifier   
 
994 
 
 
 
 
 
 
 
 
 
 
 
 
 
 of an input spreadsheet. There are also few other reserved words - see  
 
995 
 
 
 
 
 
 
 
 
 
 
 
 
 
 more about them a bit later.  
 
996 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
997 
 
 
 
 
 
 
 
 
 
 
 
 
 
 The identifier can be almost anything (and it does not appear in the  
 
998 
 
 
 
 
 
 
 
 
 
 
 
 
 
 input spreadsheet itself). It is also used in the command-line  
 
999 
 
 
 
 
 
 
 
 
 
 
 
 
 
 argument C<-inputs> where it corresponds to a real file name of the  
 
1000 
 
 
 
 
 
 
 
 
 
 
 
 
 
 input. The lines with identifiers define what columns will be in the  
 
1001 
 
 
 
 
 
 
 
 
 
 
 
 
 
 result: the second column is the header of the wanted columns and an  
 
1002 
 
 
 
 
 
 
 
 
 
 
 
 
 
 optional third column (not used in the example above) is the header  
 
1003 
 
 
 
 
 
 
 
 
 
 
 
 
 
 used in the result. The resulting columns will be in the same order as  
 
1004 
 
 
 
 
 
 
 
 
 
 
 
 
 
 are these lines in the configuration file.  
 
1005 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1006 
 
 
 
 
 
 
 
 
 
 
 
 
 
 The reserved word C is used to define how to match lines in the   
 
1007 
 
 
 
 
 
 
 
 
 
 
 
 
 
 input spreadsheets. The format of its second column is:  
 
1008 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1009 
 
 
 
 
 
 
 
 
 
 
 
 
 
    =    
 
1010 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1011 
 
 
 
 
 
 
 
 
 
 
 
 
 
 There should be one MATCH line for each input spreadsheet. The data in  
 
1012 
 
 
 
 
 
 
 
 
 
 
 
 
 
 the column defined by the "column-header" will be used to find the  
 
1013 
 
 
 
 
 
 
 
 
 
 
 
 
 
 corresponding lines. In our example, the data in the column I   
 
1014 
 
 
 
 
 
 
 
 
 
 
 
 
 
 in the C will be matched with the data in the column   
 
1015 
 
 
 
 
 
 
 
 
 
 
 
 
 
 I in the C (the rows having the same values in    
 
1016 
 
 
 
 
 
 
 
 
 
 
 
 
 
 these two columns will be merged into one resulting row).  
 
1017 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1018 
 
 
 
 
 
 
 
 
 
 
 
 
 
 B   
 
1019 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1020 
 
 
 
 
 
 
 
 
 
 
 
 
 
 If you want to add so-called I as described in the   
 
1021 
 
 
 
 
 
 
 
 
 
 
 
 
 
 L"ADVANCED USAGE"> you need to use few additional reserved words in the  
 
1022 
 
 
 
 
 
 
 
 
 
 
 
 
 
 configuration file. These words are B, B and/or    
 
1023 
 
 
 
 
 
 
 
 
 
 
 
 
 
 B. They are used in the place where the new calculated column   
 
1024 
 
 
 
 
 
 
 
 
 
 
 
 
 
 should be placed. Their lines have the program name or the Perl  
 
1025 
 
 
 
 
 
 
 
 
 
 
 
 
 
 subroutine name in the second column, and they have mandatory third  
 
1026 
 
 
 
 
 
 
 
 
 
 
 
 
 
 column with the resulting name of the calculated column.  
 
1027 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1028 
 
 
 
 
 
 
 
 
 
 
 
 
 
 For example, we wish to add two columns to the input spreadsheet  
 
1029 
 
 
 
 
 
 
 
 
 
 
 
 
 
 C. The input file (the same as in the introduction) is:   
 
1030 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1031 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Model  Year  Owned by  
 
1032 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Praga  1936  Someone else  
 
1033 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Mini   1968  Gudernova  
 
1034 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Skoda  2002  Senger  
 
1035 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1036 
 
 
 
 
 
 
 
 
 
 
 
 
 
 We wish to add a column I that shows the difference between   
 
1037 
 
 
 
 
 
 
 
 
 
 
 
 
 
 the actual year and the value from the I column. We have a   
 
1038 
 
 
 
 
 
 
 
 
 
 
 
 
 
 shell script C doing it:   
 
1039 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1040 
 
 
 
 
 
 
 
 
 
 
 
 
 
    #!/bin/bash  
 
1041 
 
 
 
 
 
 
 
 
 
 
 
 
 
    YEAR=`grep Year $1 | cut -f2`  
 
1042 
 
 
 
 
 
 
 
 
 
 
 
 
 
    NOW=`date +%Y`  
 
1043 
 
 
 
 
 
 
 
 
 
 
 
 
 
    echo $(($NOW-$YEAR))  
 
1044 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1045 
 
 
 
 
 
 
 
 
 
 
 
 
 
 The configuration file C (assuming that we want the other   
 
1046 
 
 
 
 
 
 
 
 
 
 
 
 
 
 columns to remain the same) is:  
 
1047 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1048 
 
 
 
 
 
 
 
 
 
 
 
 
 
    MATCH   CAR=Owned by  
 
1049 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1050 
 
 
 
 
 
 
 
 
 
 
 
 
 
    CAR     Owned by  
 
1051 
 
 
 
 
 
 
 
 
 
 
 
 
 
    CAR     Model  
 
1052 
 
 
 
 
 
 
 
 
 
 
 
 
 
    CAR     Year  
 
1053 
 
 
 
 
 
 
 
 
 
 
 
 
 
    PROG    age.sh  Car age  
 
1054 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1055 
 
 
 
 
 
 
 
 
 
 
 
 
 
 When we run:  
 
1056 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1057 
 
 
 
 
 
 
 
 
 
 
 
 
 
    combinesheets -config cars.cfg -inputs CAR=cars.tsv  
 
1058 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1059 
 
 
 
 
 
 
 
 
 
 
 
 
 
 we get this result:  
 
1060 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1061 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Owned by        Model   Year    Car age  
 
1062 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Gudernova       Mini    1968    44  
 
1063 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Senger          Skoda   2002    10  
 
1064 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Someone else    Praga   1936    76  
 
1065 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1066 
 
 
 
 
 
 
 
 
 
 
 
 
 
 You can see that there is no need to use C for really   
 
1067 
 
 
 
 
 
 
 
 
 
 
 
 
 
 combining I sheets, an input can be just one sheet.   
 
1068 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1069 
 
 
 
 
 
 
 
 
 
 
 
 
 
 Another example adds a I column to the same input, a column   
 
1070 
 
 
 
 
 
 
 
 
 
 
 
 
 
 named I that gets its value from a UNIX command   
 
1071 
 
 
 
 
 
 
 
 
 
 
 
 
 
 C. This program does not get any information which row it has   
 
1072 
 
 
 
 
 
 
 
 
 
 
 
 
 
 been invoked for. The configuration file is now (note the new line  
 
1073 
 
 
 
 
 
 
 
 
 
 
 
 
 
 with the B):   
 
1074 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1075 
 
 
 
 
 
 
 
 
 
 
 
 
 
    MATCH   CAR=Owned by  
 
1076 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1077 
 
 
 
 
 
 
 
 
 
 
 
 
 
    CAR     Owned by  
 
1078 
 
 
 
 
 
 
 
 
 
 
 
 
 
    CAR     Model  
 
1079 
 
 
 
 
 
 
 
 
 
 
 
 
 
    CAR     Year  
 
1080 
 
 
 
 
 
 
 
 
 
 
 
 
 
    PROG    age.sh  Car age  
 
1081 
 
 
 
 
 
 
 
 
 
 
 
 
 
    PROGS   date    Last updated  
 
1082 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1083 
 
 
 
 
 
 
 
 
 
 
 
 
 
 and the result is now:  
 
1084 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1085 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Owned by        Model   Year    Car age   Last updated  
 
1086 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Gudernova       Mini    1968    44        Mon Feb 27 12:32:04 AST 2012  
 
1087 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Senger          Skoda   2002    10        Mon Feb 27 12:32:04 AST 2012  
 
1088 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Someone else    Praga   1936    76        Mon Feb 27 12:32:04 AST 2012  
 
1089 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1090 
 
 
 
 
 
 
 
 
 
 
 
 
 
 The last possibility is to call a Perl subroutine - using the reserved  
 
1091 
 
 
 
 
 
 
 
 
 
 
 
 
 
 word B in the configuration file. Let's have an input   
 
1092 
 
 
 
 
 
 
 
 
 
 
 
 
 
 spreadsheet (C) with data about flights:   
 
1093 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1094 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Date         Flight    Airport From      Airport To  
 
1095 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2009-01-18   AY838     London LHR        Helsinki Vantaa  
 
1096 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2009-01-22   AY839     Helsinki Vantaa   London LHR  
 
1097 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2009-03-15   NW2       Manila            Tokyo Narita  
 
1098 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2009-03-21   NW1       Tokyo Narita      Manila  
 
1099 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2011-05-06   SV326     Sharm El Sheik    Jeddah  
 
1100 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2011-07-31   RJ700     Amman             Jeddah  
 
1101 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2011-09-21   ME369     Jeddah            Beirut  
 
1102 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2011-09-24   ME368     Beirut            Jeddah  
 
1103 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2011-12-02   EZY3064   Prague            London Stansted  
 
1104 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2011-12-09   EZY3067   London Stansted   Prague  
 
1105 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2012-01-26   MS663     Cairo             Jeddah  
 
1106 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1107 
 
 
 
 
 
 
 
 
 
 
 
 
 
 We want to add columns with the international airport codes for both  
 
1108 
 
 
 
 
 
 
 
 
 
 
 
 
 
 I and I. The new columns will be named    
 
1109 
 
 
 
 
 
 
 
 
 
 
 
 
 
 I and I. The Perl subroutine will use a web  
 
1110 
 
 
 
 
 
 
 
 
 
 
 
 
 
 service to find the code. The subroutine will use a closure that will  
 
1111 
 
 
 
 
 
 
 
 
 
 
 
 
 
 remember already fetched codes so the web service does not need to be  
 
1112 
 
 
 
 
 
 
 
 
 
 
 
 
 
 called several times for the same airport name.  
 
1113 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1114 
 
 
 
 
 
 
 
 
 
 
 
 
 
 The configuration file C is:   
 
1115 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1116 
 
 
 
 
 
 
 
 
 
 
 
 
 
    MATCH   FLY=Date  
 
1117 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1118 
 
 
 
 
 
 
 
 
 
 
 
 
 
    FLY     Date  
 
1119 
 
 
 
 
 
 
 
 
 
 
 
 
 
    FLY     Flight  
 
1120 
 
 
 
 
 
 
 
 
 
 
 
 
 
    FLY     Airport From  
 
1121 
 
 
 
 
 
 
 
 
 
 
 
 
 
    PERL    Airport->find_code      Code From  
 
1122 
 
 
 
 
 
 
 
 
 
 
 
 
 
    FLY     Airport To  
 
1123 
 
 
 
 
 
 
 
 
 
 
 
 
 
    PERL    Airport->find_code      Code To  
 
1124 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1125 
 
 
 
 
 
 
 
 
 
 
 
 
 
 The name of the subroutine is attached to the module where it comes  
 
1126 
 
 
 
 
 
 
 
 
 
 
 
 
 
 from by either B<::> or B<-E> notation.   
 
1127 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1128 
 
 
 
 
 
 
 
 
 
 
 
 
 
 The invocation is:  
 
1129 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1130 
 
 
 
 
 
 
 
 
 
 
 
 
 
    combinesheets -config flights.cfg -inputs FLY=flights.tsv  
 
1131 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1132 
 
 
 
 
 
 
 
 
 
 
 
 
 
 The full code for the module C, the file C is    
 
1133 
 
 
 
 
 
 
 
 
 
 
 
 
 
 here:  
 
1134 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1135 
 
 
 
 
 
 
 
 
 
 
 
 
 
    package Airport;  
 
1136 
 
 
 
 
 
 
 
 
 
 
 
 
 
    use warnings;  
 
1137 
 
 
 
 
 
 
 
 
 
 
 
 
 
    use strict;  
 
1138 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1139 
 
 
 
 
 
 
 
 
 
 
 
 
 
    use LWP::Simple;  
 
1140 
 
 
 
 
 
 
 
 
 
 
 
 
 
    use JSON;  
 
1141 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1142 
 
 
 
 
 
 
 
 
 
 
 
 
 
    # preparing a closure in order not to fetch the same airport code again and again  
 
1143 
 
 
 
 
 
 
 
 
 
 
 
 
 
    my $already_found = make_already_found();  
 
1144 
 
 
 
 
 
 
 
 
 
 
 
 
 
    sub make_already_found {  
 
1145 
 
 
 
 
 
 
 
 
 
 
 
 
 
       my $already_found = {};  
 
1146 
 
 
 
 
 
 
 
 
 
 
 
 
 
       return sub {  
 
1147 
 
 
 
 
 
 
 
 
 
 
 
 
 
          my ($airport_name, $airport_code) = @_;  
 
1148 
 
 
 
 
 
 
 
 
 
 
 
 
 
          if (exists $already_found->{$airport_name}) {  
 
1149 
 
 
 
 
 
 
 
 
 
 
 
 
 
             if ($airport_code) {  
 
1150 
 
 
 
 
 
 
 
 
 
 
 
 
 
                 $already_found->{$airport_name} = $airport_code;  
 
1151 
 
 
 
 
 
 
 
 
 
 
 
 
 
             }  
 
1152 
 
 
 
 
 
 
 
 
 
 
 
 
 
             return $already_found->{$airport_name};  
 
1153 
 
 
 
 
 
 
 
 
 
 
 
 
 
          } else {  
 
1154 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $already_found->{$airport_name} = ($airport_code ? $airport_code : 1);  
 
1155 
 
 
 
 
 
 
 
 
 
 
 
 
 
             return 0;  
 
1156 
 
 
 
 
 
 
 
 
 
 
 
 
 
          }  
 
1157 
 
 
 
 
 
 
 
 
 
 
 
 
 
       }  
 
1158 
 
 
 
 
 
 
 
 
 
 
 
 
 
    }  
 
1159 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1160 
 
 
 
 
 
 
 
 
 
 
 
 
 
    sub find_code {  
 
1161 
 
 
 
 
 
 
 
 
 
 
 
 
 
       my ($class, $column, $header_line, $data_line) = @_;  
 
1162 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1163 
 
 
 
 
 
 
 
 
 
 
 
 
 
       my $column_with_airport_name = $column->{ocol};  
 
1164 
 
 
 
 
 
 
 
 
 
 
 
 
 
       $column_with_airport_name =~ s{Code}{Airport};  
 
1165 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1166 
 
 
 
 
 
 
 
 
 
 
 
 
 
       my $airport_name;  
 
1167 
 
 
 
 
 
 
 
 
 
 
 
 
 
       for (my $i = 0; $i < @$header_line; $i++) {  
 
1168 
 
 
 
 
 
 
 
 
 
 
 
 
 
          if ($header_line->[$i] eq $column_with_airport_name) {  
 
1169 
 
 
 
 
 
 
 
 
 
 
 
 
 
             $airport_name = $data_line->[$i];  
 
1170 
 
 
 
 
 
 
 
 
 
 
 
 
 
             last;  
 
1171 
 
 
 
 
 
 
 
 
 
 
 
 
 
          }  
 
1172 
 
 
 
 
 
 
 
 
 
 
 
 
 
       }  
 
1173 
 
 
 
 
 
 
 
 
 
 
 
 
 
       return '' unless $airport_name;  
 
1174 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1175 
 
 
 
 
 
 
 
 
 
 
 
 
 
       # now we have an airport name...  
 
1176 
 
 
 
 
 
 
 
 
 
 
 
 
 
       my $airport_code = $already_found->($airport_name);  
 
1177 
 
 
 
 
 
 
 
 
 
 
 
 
 
       return $airport_code if $airport_code;  
 
1178 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1179 
 
 
 
 
 
 
 
 
 
 
 
 
 
       #... go and find its airport code  
 
1180 
 
 
 
 
 
 
 
 
 
 
 
 
 
       $airport_code = '';  
 
1181 
 
 
 
 
 
 
 
 
 
 
 
 
 
       my $escaped_airport_name = $airport_name;  
 
1182 
 
 
 
 
 
 
 
 
 
 
 
 
 
       $escaped_airport_name =~ tr{ }{+};  
 
1183 
 
 
 
 
 
 
 
 
 
 
 
 
 
       my $url = "http://airportcode.riobard.com/search?q=$escaped_airport_name&fmt=json";  
 
1184 
 
 
 
 
 
 
 
 
 
 
 
 
 
       my $content = get ($url);  
 
1185 
 
 
 
 
 
 
 
 
 
 
 
 
 
       warn "Cannot get a response for '$url'\n"  
 
1186 
 
 
 
 
 
 
 
 
 
 
 
 
 
          unless defined $content;  
 
1187 
 
 
 
 
 
 
 
 
 
 
 
 
 
       my $json = JSON->new->allow_nonref;  
 
1188 
 
 
 
 
 
 
 
 
 
 
 
 
 
       my $data = $json->decode ($content);  
 
1189 
 
 
 
 
 
 
 
 
 
 
 
 
 
       foreach my $code (@$data) {  
 
1190 
 
 
 
 
 
 
 
 
 
 
 
 
 
          $airport_code .= $code->{code} . ",";  
 
1191 
 
 
 
 
 
 
 
 
 
 
 
 
 
       }  
 
1192 
 
 
 
 
 
 
 
 
 
 
 
 
 
       chop ($airport_code) if $airport_code;  # removing the trailing comma  
 
1193 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1194 
 
 
 
 
 
 
 
 
 
 
 
 
 
       $already_found->($airport_name, $airport_code);  
 
1195 
 
 
 
 
 
 
 
 
 
 
 
 
 
       return $airport_code;  
 
1196 
 
 
 
 
 
 
 
 
 
 
 
 
 
    }  
 
1197 
 
 
 
 
 
 
 
 
 
 
 
 
 
    1;  
 
1198 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1199 
 
 
 
 
 
 
 
 
 
 
 
 
 
 When run it creates the following output. Note that some airports have  
 
1200 
 
 
 
 
 
 
 
 
 
 
 
 
 
 more than one code because the name was ambiguous. Well, this is just  
 
1201 
 
 
 
 
 
 
 
 
 
 
 
 
 
 an example, isn't it?  
 
1202 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1203 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Date         Flight    Airport From      Code From   Airport To       Code To  
 
1204 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2009-01-18   AY838     London LHR        LHR         Helsinki Vantaa  HEL  
 
1205 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2009-01-22   AY839     Helsinki Vantaa   HEL         London LHR       LHR  
 
1206 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2009-03-15   NW2       Manila            MXA,MNL     Tokyo Narita     NRT  
 
1207 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2009-03-21   NW1       Tokyo Narita      NRT         Manila           MXA,MNL  
 
1208 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2011-05-06   SV326     Sharm El Sheik    SSH         Jeddah           JED  
 
1209 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2011-07-31   RJ700     Amman             ADJ,AMM     Jeddah           JED  
 
1210 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2011-09-21   ME369     Jeddah            JED         Beirut           BEY  
 
1211 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2011-09-24   ME368     Beirut            BEY         Jeddah           JED  
 
1212 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2011-12-02   EZY3064   Prague            PRG         London Stansted  STN  
 
1213 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2011-12-09   EZY3067   London Stansted   STN         Prague           PRG  
 
1214 
 
 
 
 
 
 
 
 
 
 
 
 
 
    2012-01-26   MS663     Cairo             CAI,CIR     Jeddah           JED  
 
1215 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1216 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item B<-inputs = [=...]>      
 
1217 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1218 
 
 
 
 
 
 
 
 
 
 
 
 
 
 Each C<-inputs> can have one or more file names, and there can be one  
 
1219 
 
 
 
 
 
 
 
 
 
 
 
 
 
 or more C<-inputs> arguments. It defines what are the input  
 
1220 
 
 
 
 
 
 
 
 
 
 
 
 
 
 spreadsheets and how they are identified in the configuration file  
 
1221 
 
 
 
 
 
 
 
 
 
 
 
 
 
 (see the C<-config> argument). For example, the inputs for our example  
 
1222 
 
 
 
 
 
 
 
 
 
 
 
 
 
 above can be specified in any of these ways:  
 
1223 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1224 
 
 
 
 
 
 
 
 
 
 
 
 
 
    -inputs PERSON=persons.tsv -inputs CAR=cars.tsv  
 
1225 
 
 
 
 
 
 
 
 
 
 
 
 
 
    -inputs PERSON=persons.tsv CAR=cars.tsv  
 
1226 
 
 
 
 
 
 
 
 
 
 
 
 
 
    -inputs PERSON=persons.tsv,CAR=cars.tsv  
 
1227 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1228 
 
 
 
 
 
 
 
 
 
 
 
 
 
 The first file name is considered to be the C input (see the   
 
1229 
 
 
 
 
 
 
 
 
 
 
 
 
 
 description above): the resulting spreadsheet will have the same  
 
1230 
 
 
 
 
 
 
 
 
 
 
 
 
 
 number of lines as the primary input.  
 
1231 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1232 
 
 
 
 
 
 
 
 
 
 
 
 
 
 The file names ending with the C<.csv> are considered to be in the  
 
1233 
 
 
 
 
 
 
 
 
 
 
 
 
 
 COMMA-separated formats, all others are considered to be  
 
1234 
 
 
 
 
 
 
 
 
 
 
 
 
 
 TAB-separated.  
 
1235 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1236 
 
 
 
 
 
 
 
 
 
 
 
 
 
 This is a mandatory parameter.  
 
1237 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1238 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item B<-outfile >   
 
1239 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1240 
 
 
 
 
 
 
 
 
 
 
 
 
 
 An optional parameter specifying a filename of the combined result. By  
 
1241 
 
 
 
 
 
 
 
 
 
 
 
 
 
 default, it is created on STDOUT. It is always in the TAB-separated  
 
1242 
 
 
 
 
 
 
 
 
 
 
 
 
 
 format.  
 
1243 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1244 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item B<-check>  
 
1245 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1246 
 
 
 
 
 
 
 
 
 
 
 
 
 
 This option causes that the configuration file and the input files  
 
1247 
 
 
 
 
 
 
 
 
 
 
 
 
 
 (only their header lines will be read) will be checked for errors but  
 
1248 
 
 
 
 
 
 
 
 
 
 
 
 
 
 no resulting spreadsheet will be created.  
 
1249 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1250 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item B<-ignorecases>  
 
1251 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1252 
 
 
 
 
 
 
 
 
 
 
 
 
 
 Not yet implemented.  
 
1253 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1254 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item B   
 
1255 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1256 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =over 8  
 
1257 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1258 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item B<-h>  
 
1259 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1260 
 
 
 
 
 
 
 
 
 
 
 
 
 
 Print a brief usage message and exits.  
 
1261 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1262 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item B<-help>  
 
1263 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1264 
 
 
 
 
 
 
 
 
 
 
 
 
 
 Print a brief usage message with options and exit.  
 
1265 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1266 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item B<-man>  
 
1267 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1268 
 
 
 
 
 
 
 
 
 
 
 
 
 
 Print a full usage message and exit.  
 
1269 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1270 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item B<-version>  
 
1271 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1272 
 
 
 
 
 
 
 
 
 
 
 
 
 
 Print the version and exit.  
 
1273 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1274 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =back  
 
1275 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1276 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =back  
 
1277 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1278 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =head1 ENVIRONMENT VARIABLES  
 
1279 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1280 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =head3 COMBINE_SHEETS_EXT_PATH  
 
1281 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1282 
 
 
 
 
 
 
 
 
 
 
 
 
 
 It contains a path that is used when looking for external programs  
 
1283 
 
 
 
 
 
 
 
 
 
 
 
 
 
 (when the reserved words PROG or PROGS are used). For example, the  
 
1284 
 
 
 
 
 
 
 
 
 
 
 
 
 
 C directory in the source distribution of this package has an   
 
1285 
 
 
 
 
 
 
 
 
 
 
 
 
 
 external program C. The full invocation can be done by:   
 
1286 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1287 
 
 
 
 
 
 
 
 
 
 
 
 
 
    COMBINE_SHEETS_EXT_PATH=examples bin/combinesheets -cfg examples/cars.cfg --inputs CAR=examples/cars.csv  
 
1288 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1289 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =head1 DEPENDENCIES  
 
1290 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1291 
 
 
 
 
 
 
 
 
 
 
 
 
 
 In order to run this tool you need Perl and the following Perl modules  
 
1292 
 
 
 
 
 
 
 
 
 
 
 
 
 
 to be installed:  
 
1293 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1294 
 
 
 
 
 
 
 
 
 
 
 
 
 
    App::Cmd::Simple  
 
1295 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Text::CSV::Simple  
 
1296 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Text::CSV_XS  
 
1297 
 
 
 
 
 
 
 
 
 
 
 
 
 
    File::BOM  
 
1298 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Getopt::Long::Descriptive  
 
1299 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Pod::Usage  
 
1300 
 
 
 
 
 
 
 
 
 
 
 
 
 
    Algorithm::Loops  
 
1301 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1302 
 
 
 
 
 
 
 
 
 
 
 
 
 
 Optionally (if your configuration file uses the reserved word PROG or  
 
1303 
 
 
 
 
 
 
 
 
 
 
 
 
 
 PROGS for calculated columns):  
 
1304 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1305 
 
 
 
 
 
 
 
 
 
 
 
 
 
    IO::CaptureOutput  
 
1306 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1307 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =head1 KNOWN BUGS, MISSING FEATURES  
 
1308 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1309 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =over  
 
1310 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1311 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item *  
 
1312 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1313 
 
 
 
 
 
 
 
 
 
 
 
 
 
 Columns are identified by their header names. There is no way  
 
1314 
 
 
 
 
 
 
 
 
 
 
 
 
 
 to identify them simply by their order (column number).  
 
1315 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1316 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item *  
 
1317 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1318 
 
 
 
 
 
 
 
 
 
 
 
 
 
 The input spreadsheet are read first into memory. Which may be  
 
1319 
 
 
 
 
 
 
 
 
 
 
 
 
 
 a problem with really huge spreadsheets.  
 
1320 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1321 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item *  
 
1322 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1323 
 
 
 
 
 
 
 
 
 
 
 
 
 
 The inputs can be COMMA-separated or TAB-separated. It would  
 
1324 
 
 
 
 
 
 
 
 
 
 
 
 
 
 be perhaps nice to allow also the Excel spreadsheets.  
 
1325 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1326 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item *  
 
1327 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1328 
 
 
 
 
 
 
 
 
 
 
 
 
 
 Comparing header names and rows is case-sensitive only. There  
 
1329 
 
 
 
 
 
 
 
 
 
 
 
 
 
 is a plan to implement the option C<-ignorecases>,  
 
1330 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1331 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =back  
 
1332 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1333 
 
 
 
 
 
 
 
 
 
 
 
 
 
 Some of these missing features may be implemented later.  
 
1334 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1335 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =head1 SUPPORT  
 
1336 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1337 
 
 
 
 
 
 
 
 
 
 
 
 
 
 You can find documentation for this module with the perldoc command.  
 
1338 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1339 
 
 
 
 
 
 
 
 
 
 
 
 
 
     perldoc App::combinesheets  
 
1340 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1341 
 
 
 
 
 
 
 
 
 
 
 
 
 
 You can also look for information at:  
 
1342 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1343 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =over 4  
 
1344 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1345 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item * RT: CPAN's request tracker  
 
1346 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1347 
 
 
 
 
 
 
 
 
 
 
 
 
 
 L   
 
1348 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1349 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item * AnnoCPAN: Annotated CPAN documentation  
 
1350 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1351 
 
 
 
 
 
 
 
 
 
 
 
 
 
 L   
 
1352 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1353 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item * CPAN Ratings  
 
1354 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1355 
 
 
 
 
 
 
 
 
 
 
 
 
 
 L   
 
1356 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1357 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =item * Search CPAN  
 
1358 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1359 
 
 
 
 
 
 
 
 
 
 
 
 
 
 L 
 
1360 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1361 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =back  
 
1362 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1363 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =head1 AUTHOR  
 
1364 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1365 
 
 
 
 
 
 
 
 
 
 
 
 
 
 Martin Senger    
 
1366 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1367 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =head1 COPYRIGHT AND LICENSE  
 
1368 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1369 
 
 
 
 
 
 
 
 
 
 
 
 
 
 This software is copyright (c) 2013 by Martin Senger, CBRC - KAUST (Computational Biology Research Center - King Abdullah University of Science and Technology) All Rights Reserved..  
 
1370 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1371 
 
 
 
 
 
 
 
 
 
 
 
 
 
 This is free software; you can redistribute it and/or modify it under  
 
1372 
 
 
 
 
 
 
 
 
 
 
 
 
 
 the same terms as the Perl 5 programming language system itself.  
 
1373 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1374 
 
 
 
 
 
 
 
 
 
 
 
 
 
 =cut  
 
1375 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1376 
 
 
 
 
 
 
 
 
 
 
 
 
 
    
 
1377 
 
 
 
 
 
 
 
 
 
 
 
 
 
 __END__