File Coverage

blib/lib/App/combinesheets.pm
Criterion Covered Total %
statement 16 18 88.8
branch n/a
condition n/a
subroutine 6 6 100.0
pod n/a
total 22 24 91.6


line stmt bran cond sub pod time code
1             #-----------------------------------------------------------------
2             # App::combinesheets
3             # Author: Martin Senger
4             # For copyright and disclaimer se below.
5             #
6             # ABSTRACT: command-line tool merging CSV and TSV spreadsheets
7             # PODNAME: App::combinesheets
8             #-----------------------------------------------------------------
9 7     7   231286 use warnings;
  7         20  
  7         226  
10 7     7   34 use strict;
  7         16  
  7         436  
11              
12             package App::combinesheets;
13              
14             our $VERSION = '0.2.14'; # VERSION
15              
16 7     7   44 use base 'App::Cmd::Simple';
  7         12  
  7         7576  
17              
18 7     7   669179 use Pod::Usage;
  7         643404  
  7         1275  
19 7     7   83 use Pod::Find qw(pod_where);
  7         18  
  7         413  
20              
21 7     7   12925 use Text::CSV::Simple;
  0            
  0            
22             use Text::CSV_XS;
23             use File::Spec;
24             use File::Temp;
25             use File::Which;
26             use File::BOM qw( :all );
27             use Algorithm::Loops qw( NestedLoops );
28             use autouse 'IO::CaptureOutput' => qw(capture_exec);
29              
30             # reserved keywords in the configuration
31             use constant {
32             CFG_MATCH => 'MATCH',
33             CFG_PROG => 'PROG',
34             CFG_PROGS => 'PROGS',
35             CFG_PERL => 'PERL',
36             };
37              
38             # types of input files
39             use constant {
40             TYPE_CSV => 'csv',
41             TYPE_TSV => 'tsv',
42             # TYPE_XSL => 'xsl', # not-yet-supported
43             };
44              
45             # hash keys describing an input ($inputs)
46             use constant {
47             INPUT_FILE => 'file',
48             INPUT_TYPE => 'type',
49             INPUT_MATCHED_BY => 'matched_by',
50             INPUT_MATCHED_BY_INDEX => 'matched_by_index',
51             INPUT_HEADERS => 'headers',
52             INPUT_CONTENT => 'content',
53             };
54              
55             # hash keys describing wanted fields ($wanted_columns)
56             use constant {
57             CFG_TYPE => 'type', # what kind of input (MATCH, PROG, PROGS or PERL)
58             CFG_OUT_COL => 'ocol', # a name for this column used in the output
59             # keys used for the normal (MATCH) columns
60             CFG_ID => 'id', # which input
61             CFG_IN_COL => 'icol', # a column in such input
62             # keys used for the calculated columns (i.e. of type PROG, PROGS or PERL)
63             CFG_EXT => 'id', # name of the external program or Perl external subroutine
64             PERL_DETAILS => '_perl_details_', # added during the config processing
65             };
66              
67             # ----------------------------------------------------------------
68             # Command-line arguments and script usage
69             # ----------------------------------------------------------------
70             sub usage_desc {
71             my $self = shift;
72             return "%c -config -inputs [other otions...]";
73             }
74             sub opt_spec {
75             return (
76             [ 'h' => "display a short usage message" ],
77             [ 'help' => "display a full usage message" ],
78             [ 'man|m' => "display a full manual page" ],
79             [ 'version|v' => "display a version" ],
80             [],
81             [ 'config|cfg=s' => "" ],
82             [ 'inputs|i=s@{1,}' => " in the form: =[,=...] (e.g. PERSON=,CAR=)" ],
83             [ 'outfile|o=s' => "" ],
84             [ 'check|c' => "only check the configuration" ],
85              
86             { getopt_conf => ['no_bundling', 'no_ignore_case', 'auto_abbrev'] }
87             );
88             }
89             sub validate_args {
90             my ($self, $opt, $args) = @_;
91              
92             # show various levels of help and exit
93             my $pod_where = pod_where ({-inc => 1}, __PACKAGE__);
94             if ($opt->h) {
95             print "Usage: " . $self->usage();
96             if ($^S) { die "Okay\n" } else { exit (0) };
97             }
98             pod2usage (-input => $pod_where, -verbose => 1, -exitval => 0) if $opt->help;
99             pod2usage (-input => $pod_where, -verbose => 2, -exitval => 0) if $opt->man;
100              
101             # show version and exit
102             if ($opt->version) {
103             ## no critic
104             no strict; # because the $VERSION will be added only when
105             no warnings; # the distribution is fully built up
106             print "$VERSION\n";
107             if ($^S) { die "Okay\n" } else { exit (0) };
108             }
109              
110             # check required command-line arguments
111             $self->usage_error ("Parameter '-config' is required.")
112             unless $opt->config;
113             $self->usage_error ("Parameter '-inputs' is required.")
114             unless $opt->inputs;
115              
116             return;
117             }
118             sub usage_error {
119             my ( $self, $error ) = @_;
120             die "Error: $error\nUsage: " . $self->usage->text;
121             }
122              
123             # ----------------------------------------------------------------
124             # The main part
125             # ----------------------------------------------------------------
126             my $inputs; # keys are input IDs
127             sub execute {
128             my ($self, $opt, $args) = @_;
129              
130             $inputs = {}; # just in case somebody calls execute() twice
131              
132             my @opt_inputs = split (m{,}, join (',', @{ $opt->inputs }));
133             my $opt_outfile = $opt->outfile;
134             my $opt_cfgfile = $opt->config;
135              
136             # prepare output handler
137             my $combined;
138             if ($opt_outfile and not $opt->check) {
139             open ($combined, '>', $opt_outfile)
140             or die "[ER00] Cannot open file $opt_outfile for writing: $!\n";
141             } else {
142             $combined = *STDOUT;
143             }
144              
145             # read configuration
146             my $wanted_cols = []; # each element: { CFG_TYPE, CFG_ID, CFG_IN_COL, CFG_OUT_COL... }
147             my $known_inputs = {}; # input ID => 1 ... the same input IDs as in $wanted_cols (for speed)
148             my $matches = {}; # input ID => matching column/header
149             my $config;
150             open ($config, '<', $opt_cfgfile)
151             or die "[ER00] Cannot read configuration file $opt_cfgfile: $!\n";
152             my $line_count = 0;
153             while (<$config>) {
154             $line_count++;
155             chomp;
156             next if m{^\s*$}; # ignore empty lines
157             next if m{^\s*#}; # ignore comment lines
158             s{^\s+|\s+$}{}g; # trim whitespaces
159             my ($input_id, $input_col, $output_col) = split (m{\s*\t\s*}, $_, 3);
160             unless ($input_id and defined $input_col) {
161             warn "[WR01] Configuration line $line_count ignored: '$_'\n";
162             next;
163             }
164             $input_id = uc ($input_id); # make config keys upper-case
165             if ($input_id eq CFG_MATCH) {
166             my ($input_id, $column) = split (m{\s*=\s*}, $input_col, 2);
167             unless ($input_id and $column !~ m{^\s*$}) {
168             warn "[WR02] Bad format in configuration line $line_count: '$input_col'. Ignored.\n";
169             next;
170             }
171             $matches->{ uc ($input_id) } = $column;
172             next;
173             }
174             my $wanted_col = {};
175             if ($input_id eq CFG_PROG or $input_id eq CFG_PROGS or $input_id eq CFG_PERL) {
176             $wanted_col->{CFG_TYPE()} = $input_id;
177             $wanted_col->{CFG_EXT()} = $input_col;
178             if (defined $output_col) {
179             $wanted_col->{CFG_OUT_COL()} = $output_col;
180             } else {
181             warn "[WR10] Missing output column name in configuration line $line_count: '$input_col'.\n";
182             $wanted_col->{CFG_OUT_COL()} = '';
183             }
184             } else {
185             $wanted_col->{CFG_TYPE()} = CFG_MATCH;
186             $wanted_col->{CFG_ID()} = $input_id;
187             $wanted_col->{CFG_IN_COL()} = $input_col;
188             $wanted_col->{CFG_OUT_COL()} = (defined $output_col ? $output_col : $input_col);
189             $known_inputs->{$input_id} = 1;
190             }
191             push (@$wanted_cols, $wanted_col);
192             }
193             close $config;
194              
195             # prepare for calculated columns
196             foreach my $col (@$wanted_cols) {
197             if ($col->{CFG_TYPE()} eq CFG_PROG or $col->{CFG_TYPE()} eq CFG_PROGS) {
198              
199             # locate the external program
200             $col->{CFG_EXT()} = find_prog ($col->{CFG_EXT()});
201              
202             } elsif ($col->{CFG_TYPE()} eq CFG_PERL) {
203              
204             # load the wanted Perl module
205             my $call = $col->{CFG_EXT()};
206             $call =~ m{^(.+)((::)|(->))(.*)$};
207             my $module = $1;
208             my $subroutine = $5;
209             my $how_to_call = $2; # can be '::' or '->'
210             unless ($module) {
211             warn "[WR11] Missing module name in '[PERL] " . $col->{CFG_OUT_COL()} . "'. Column ignored.\n";
212             $col->{ignored} = 1;
213             next;
214             }
215             unless ($subroutine) {
216             warn "[WR12] Missing subroutine name in '[PERL] " . $col->{CFG_OUT_COL()} . "'. Column ignored.\n";
217             $col->{ignored} = 1;
218             next;
219             }
220             if ($module =~ m{^:+}) {
221             warn "[WR13] Uncomplete module name in '[PERL] " . $col->{CFG_OUT_COL()} . "'. Column ignored.\n";
222             $col->{ignored} = 1;
223             next;
224             }
225             eval "require $module"; ## no critic
226             if ($@) {
227             warn "[WR14] Cannot load module '$module': $@. Column '" . $col->{CFG_OUT_COL()} . " ignored\n";
228             $col->{ignored} = 1;
229             next;
230             }
231             $module->import();
232              
233             # remember what we just parsed and checked
234             $col->{PERL_DETAILS()} = {};
235             $col->{PERL_DETAILS()}->{what_to_call} = $module . $how_to_call . $subroutine;
236             $col->{PERL_DETAILS()}->{module} = $module;
237             $col->{PERL_DETAILS()}->{subroutine} = $subroutine;
238             $col->{PERL_DETAILS()}->{how_to_call} = $how_to_call;
239             }
240             }
241             $wanted_cols = [ grep { not $_->{ignored} } @$wanted_cols ];
242              
243             # locate expected inputs
244             my $primary_input; # ID of the first input
245             foreach my $opt_input (@opt_inputs) {
246             my ($key, $value) = split (m{\s*=\s*}, $opt_input, 2);
247             next unless $key;
248             next unless $value;
249             $key = uc ($key);
250             unless (exists $known_inputs->{$key}) {
251             warn "[WR03] Configuration does not recognize the input named '$key'. Input ignored.\n";
252             next;
253             }
254             unless (exists $matches->{$key}) {
255             warn "[WR04] Input named '$key' does not have any MATCH column defined in configuration. Input ignored.\n";
256             next;
257             }
258             $primary_input = $key unless $primary_input; # remember which input came first
259             my $input = { INPUT_FILE() => $value,
260             INPUT_MATCHED_BY() => $matches->{$key} };
261             if ($value =~ m{\.csv$}i) {
262             $input->{INPUT_TYPE()} = TYPE_CSV;
263             } else {
264             $input->{INPUT_TYPE()} = TYPE_TSV;
265             }
266             $inputs->{$key} = $input;
267             }
268             die "[ER01] No valid inputs specified. Exiting.\n"
269             unless scalar keys (%$inputs) > 0;
270              
271             # read headers from all inputs
272             my $headers_by_id = {}; # used for re-using the same headers once read, and for some checks
273             foreach my $input_id (keys %$inputs) {
274             my $input = $inputs->{$input_id};
275             my $headers;
276             if (exists $headers_by_id->{$input_id}) {
277             $headers = $headers_by_id->{$input_id}; # copy already known headers
278             } else {
279             $headers = read_headers ($input);
280             }
281              
282             # add new properties to $input
283             unless (exists $headers->{ $input->{INPUT_MATCHED_BY()} }) {
284             warn ("[WR05] Input '$input_id' does not contain the matching header '" . $input->{INPUT_MATCHED_BY()} .
285             "'. Input ignored\n");
286             delete $inputs->{$input_id};
287             next;
288             }
289             $headers_by_id->{$input_id} = $headers
290             unless exists $headers_by_id->{$input_id};
291             $input->{INPUT_HEADERS()} = $headers;
292             $input->{INPUT_MATCHED_BY_INDEX()} = $headers->{ $input->{INPUT_MATCHED_BY()} };
293             }
294              
295             # check real headers vs. headers as defined in configuration
296             my $already_reported = {};
297             foreach my $col (@$wanted_cols) {
298             next if $col->{CFG_TYPE()} ne CFG_MATCH; # check is done only for normal columns
299             my $input_id = $col->{CFG_ID()};
300             if (exists $headers_by_id->{$input_id}) {
301             # does the requested column exist in this input's headers?
302             unless (column_exists ($input_id, $col->{CFG_IN_COL()})) {
303             warn "[WR06] Column '$col->{CFG_IN_COL()}' not found in the input '$input_id'. Column will be ignored.\n";
304             $col->{ignored} = 1;
305             }
306             next;
307              
308             } elsif (!exists $already_reported->{$input_id}) {
309             $already_reported->{$input_id} = 1;
310             warn "[WR07] Configuration defines columns from an input '$input_id' but no such input given (or was ignored). These columns will be ignored.\n";
311             }
312             $col->{ignored} = 1;
313             }
314             $wanted_cols = [ grep { not $_->{ignored} } @$wanted_cols ];
315              
316             foreach my $input_id (keys %$matches) {
317             next unless exists $inputs->{$input_id}; # ignoring matches whose inputs are already ignored
318             # does the matching column exist in this input's headers?
319             unless (column_exists ($input_id, $matches->{$input_id})) {
320             die "[ER02] Matching column '$matches->{$input_id}' not found in the input '$input_id'. Must exit.\n";
321             }
322             }
323              
324             # do we still have a primary input?
325             unless (exists $inputs->{$primary_input}) {
326             die "[ER03] Due to errors, the primary input '$primary_input' is now ignored. Must exit.\n";
327             }
328              
329             # end of checking
330             exit (0) if $opt->check;
331              
332             # read all inputs into memory
333             foreach my $input_id (keys %$inputs) {
334             my $input = $inputs->{$input_id};
335             my $content = read_content ($input);
336             $input->{INPUT_CONTENT()} = $content;
337             }
338              
339             # output combined headers
340             my @header_line = ();
341             foreach my $col (@$wanted_cols) {
342             push (@header_line, $col->{CFG_OUT_COL()});
343             }
344             print $combined join ("\t", @header_line) . "\n"
345             unless scalar @header_line == 0;
346              
347             # combine all inputs and make output lines
348             foreach my $matching_content (sort keys %{ $inputs->{$primary_input}->{INPUT_CONTENT()} }) {
349             # $matching_content is, for example, a publication title ("An Atlas of....")
350              
351             # inputs may have more lines with the same value in the matching columns
352             # therefore, extract first the matching lines from all inputs
353             my $lines_to_combine = [];
354             my $inputs_to_combine = {}; # keys are inputs' CFG_IDs, values are indeces into $lines_to_combine
355              
356             foreach my $col (@$wanted_cols) {
357             if ($col->{CFG_TYPE()} eq CFG_MATCH) {
358             unless (exists $inputs_to_combine->{ $col->{CFG_ID()} }) {
359             # remember the same lines (from the same input) only once
360             my $input = $inputs->{ $col->{CFG_ID()} };
361             push (@$lines_to_combine, $input->{INPUT_CONTENT()}->{$matching_content} || [undef]);
362             $inputs_to_combine->{ $col->{CFG_ID()} } = $#$lines_to_combine;
363             }
364             }
365             }
366              
367             # make all combinantions of matching lines
368              
369             # let's have 3 inputs, identified by K, L and M
370             # there are three matching lines in K, two in L and one in M:
371             # my $lines_to_combine = [
372             # [ "line1", "line2", "line3", ], # from input K
373             # [ "lineX", "lineY", ], # from input L
374             # [ "lineQ", ], # from input M
375             # );
376             # my $inputs_to_combine = { K => 0, L => 1, M => 2 };
377             #
378             # the subroutine create_output_line() will be called 6 times
379             # with the following arguments:
380             # line1, lineX, lineQ
381             # line1, lineY, lineQ
382             # line2, lineX, lineQ
383             # line2, lineY, lineQ
384             # line3, lineX, lineQ
385             # line3, lineY, lineQ
386              
387             NestedLoops ($lines_to_combine,
388             sub {
389             my @input_lines = @_;
390             my @output_line = ();
391             my @calculated = (); # indeces of the yet-to-be-calculated elements
392             my $column_count = -1;
393             foreach my $col (@$wanted_cols) { # $col defines what data to push into @output_line
394             $column_count++;
395             if ($col->{CFG_TYPE()} eq CFG_MATCH) {
396             my $input = $inputs->{ $col->{CFG_ID()} };
397             my $input_line = @input_lines[$inputs_to_combine->{ $col->{CFG_ID()} }];
398             # use Data::Dumper;
399             # print Dumper (\@input_lines);
400             # print Dumper ($inputs_to_combine);
401             my $idx = $input->{INPUT_HEADERS()}->{ $col->{CFG_IN_COL()} };
402             my $value = $input_line->[$idx] || '';
403             push (@output_line, $value);
404             } else {
405             push (@calculated, $column_count);
406             push (@output_line, '');
407             }
408             }
409             # insert the calculated columns
410             foreach my $idx (@calculated) {
411             if ($wanted_cols->[$idx]->{CFG_TYPE()} eq CFG_PROG) {
412             $output_line[$idx] = call_prog ($wanted_cols->[$idx], \@header_line, \@output_line);
413             } elsif ($wanted_cols->[$idx]->{CFG_TYPE()} eq CFG_PROGS) {
414             $output_line[$idx] = call_prog_simple ($wanted_cols->[$idx]);
415             } elsif ($wanted_cols->[$idx]->{CFG_TYPE()} eq CFG_PERL) {
416             $output_line[$idx] = call_perl ($wanted_cols->[$idx], \@header_line, \@output_line);
417             }
418             }
419              
420             print $combined join ("\t", @output_line) . "\n"
421             unless scalar @output_line == 0;
422             });
423             }
424             close $combined if $opt_outfile;
425             }
426              
427             # ----------------------------------------------------------------
428             # Call a Perl subroutine (from any module) in order to get a value for
429             # a "calculated" column. $column defines which column to fill,
430             # $header_line is an arra is an arrayref with column headers and the
431             # $data_line is another arrayref with the values for the current row.
432             #
433             # $column->{PERL_DETAILS} contains all details needed for the call
434             # ----------------------------------------------------------------
435             sub call_perl {
436             my ($column, $header_line, $data_line) = @_;
437              
438             no strict; ## no critic
439             my $what_to_call = $column->{PERL_DETAILS()}->{what_to_call};
440             my $how_to_call = $column->{PERL_DETAILS()}->{how_to_call};
441             my $module = $column->{PERL_DETAILS()}->{module};
442             my $subroutine = $column->{PERL_DETAILS()}->{subroutine};
443              
444             if ($how_to_call eq '->') {
445             return $module->$subroutine ($column, $header_line, $data_line);
446             } else {
447             return &$what_to_call ($column, $header_line, $data_line);
448             }
449             }
450              
451             # ----------------------------------------------------------------
452             # Call an external program in order to get a value for a "calculated"
453             # column. $column defines which column to fill, $header_line is an
454             # arra is an arrayref with column headers and the $data_line is
455             # another arrayref with the values for the current row.
456             #
457             # $column->{CFG_EXT} contains a program name to call
458             # ----------------------------------------------------------------
459             sub call_prog {
460             my ($column, $header_line, $data_line) = @_;
461              
462             # prepare an input file for the external program
463             my $tmp = File::Temp->new();
464             for (my $i = 0; $i < @$header_line;$i++) {
465             print $tmp $header_line->[$i] . "\t" . $data_line->[$i] . "\n";
466             }
467              
468             # call it
469             return _call_it ($column->{CFG_EXT()}, $tmp);
470             }
471              
472             # ----------------------------------------------------------------
473             # Call an external program (without any command-line arguments) in
474             # order to get a value for a "calculated" column.
475             #
476             # $column->{CFG_EXT} contains a program name to call
477             # ----------------------------------------------------------------
478             sub call_prog_simple {
479             my ($column) = @_;
480             return _call_it ($column->{CFG_EXT()});
481             }
482              
483             # ----------------------------------------------------------------
484             #
485             # ----------------------------------------------------------------
486             sub _call_it {
487             my @command = @_;
488             my ($stdout, $stderr, $success, $exit_code) = capture_exec (@command);
489             if ($exit_code != 0 or $stderr) {
490             my $errmsg = '[ER05] Failed command: ' . join (' ', map {"'$_'"} @command) . "\n";
491             $errmsg .= "STDERR: $stderr\n" if $stderr;
492             $errmsg .= "EXIT CODE: $exit_code\n";
493             die $errmsg;
494             }
495             chomp $stdout; # remove the last newline
496             $stdout =~ s{\n}{ }g; # better to replace newlines
497             return $stdout;
498             }
499              
500             # ----------------------------------------------------------------
501             # Locate given $prgname and return it, usually with an added path. Or
502             # die if such program cannot be found or it is not executable.
503             # ----------------------------------------------------------------
504             sub find_prog {
505             my $prgname = shift;
506             my $full_name;
507              
508             # 1) try the name as it is (e.g. the ones with an absolute path)
509             if (-e $prgname and -x $prgname and
510             File::Spec->file_name_is_absolute ($prgname)) {
511             return $prgname;
512             }
513              
514             # 2) try to find it on system PATH
515             $full_name = which ($prgname);
516             if ($full_name ) {
517             chomp $full_name;
518             return $full_name;
519             }
520              
521             # 3) try the environment variable with a path
522             if (exists $ENV{COMBINE_SHEETS_EXT_PATH}) {
523             $full_name = File::Spec->catfile ($ENV{COMBINE_SHEETS_EXT_PATH}, $prgname);
524             return maybe_die ($full_name);
525             }
526              
527             # 4) try to find it in the current directory
528             $full_name = File::Spec->catfile ('./', $prgname);
529             return maybe_die ($full_name);
530             }
531             sub maybe_die {
532             my $prg = shift;
533             die "[ER04] '$prg' not found or is not executable.\n"
534             unless -e $prg and -x $prg;
535             return $prg;
536             }
537              
538             # ----------------------------------------------------------------
539             # Does the requested $column exist in the given input's headers?
540             # ----------------------------------------------------------------
541             sub column_exists {
542             my ($input_id, $column) = @_;
543             return exists $inputs->{$input_id}->{INPUT_HEADERS()}->{$column};
544             }
545              
546             # ----------------------------------------------------------------
547             # Read the headers (the first line) form an input file (given in
548             # hashref $input) and store them in the hashref $headers, each od them
549             # with its index as it appears in the read file. Do nothing if
550             # $headers already contains headers from the same input identifier.
551             # ----------------------------------------------------------------
552             sub read_headers {
553             my ($input) = @_;
554              
555             my $headers;
556             if ($input->{INPUT_TYPE()} eq TYPE_CSV) {
557             $headers = read_csv_headers ($input->{INPUT_FILE()});
558             } else {
559             $headers = read_tsv_headers ($input->{INPUT_FILE()});
560             }
561             my $new_headers = {};
562             my $column_index = 0;
563             foreach my $column (@$headers) {
564             $new_headers->{$column} = $column_index++;
565             }
566             return $new_headers;
567             }
568              
569             # ----------------------------------------------------------------
570             #
571             # ----------------------------------------------------------------
572             sub read_csv_headers {
573             my ($file) = @_;
574             my $line = read_first_line ($file);
575              
576             my $parser = Text::CSV_XS->new ({
577             allow_loose_quotes => 1,
578             escape_char => "\\",
579             });
580             if ($parser->parse ($line)) {
581             return [ $parser->fields ];
582             } else {
583             die "[ER04] Parsing CSV file $file failed: " .
584             $parser->error_input . "\n" .
585             $parser->error_diag() . "\n";
586             }
587             }
588              
589             # ----------------------------------------------------------------
590             #
591             # ----------------------------------------------------------------
592             sub read_tsv_headers {
593             my ($file) = @_;
594             my $line = read_first_line ($file);
595             return [ split (m{\t}, $line) ];
596             }
597              
598             # ----------------------------------------------------------------
599             #
600             # ----------------------------------------------------------------
601             sub read_first_line {
602             my ($file) = @_;
603             my $fh;
604             open_bom ($fh, $file); # or open ($fh, '<', $file)
605             # or die "[ER00] Cannot read input file $file: $!\n";
606             my $line = <$fh>; # read just one line
607             close $fh;
608             $line =~ s{(\r|\n)+$}{}; # remove newlines of any kind
609             return $line;
610             }
611              
612             # ----------------------------------------------------------------
613             # Stringify a hashref
614             # ----------------------------------------------------------------
615             sub ph {
616             my $hashref = shift;
617             my $result = '';
618             my ($key, $value);
619             while (($key, $value) = each (%$hashref)) {
620             $result .= "$key => $value,";
621             }
622             return substr ($result, 0, -1);
623             }
624              
625             # ----------------------------------------------------------------
626             # Read contents...
627             # ----------------------------------------------------------------
628             sub read_content {
629             my ($input) = @_;
630             my $content;
631             if ($input->{INPUT_TYPE()} eq TYPE_CSV) {
632             $content = read_csv_content ($input->{INPUT_FILE()}, $input->{INPUT_MATCHED_BY_INDEX()});
633             } else {
634             $content = read_tsv_content ($input->{INPUT_FILE()}, $input->{INPUT_MATCHED_BY_INDEX()});
635             }
636             return $content;
637             }
638              
639             # ----------------------------------------------------------------
640             #
641             # ----------------------------------------------------------------
642             sub read_tsv_content {
643             my ($file, $matched_index) = @_;
644             my $fh;
645             open_bom ($fh, $file); # or open ($fh, '<', $file)
646             # or die "[ER00] Cannot read input file $file: $!\n";
647             my $content = {};
648             my $line_count = 0;
649             while (my $line = <$fh>) {
650             next if $line_count++ == 0; # skip header line
651             next if $line =~ m{^\s*$}; # ignore empty lines
652             $line =~ s{(\r|\n)+$}{}; # remove newlines of any kind
653             my @data = split (m{\t}, $line);
654             $content->{ $data[$matched_index] } = [] unless $content->{ $data[$matched_index] };
655             push (@{ $content->{ $data[$matched_index] } }, [@data]);
656             }
657             close $fh;
658             return $content;
659             }
660              
661             # ----------------------------------------------------------------
662             #
663             # ----------------------------------------------------------------
664             sub read_csv_content {
665             my ($file, $matched_index) = @_;
666             my $count_lines = 0;
667             my $content = {};
668              
669             # create a CSV parser; any error in reading input will be fatal
670             my $csv = Text::CSV_XS->new ({
671             allow_loose_quotes => 1,
672             escape_char => "\\",
673             auto_diag => 1,
674             });
675              
676             # read the CSV input
677             open_bom (my $fh, $file);
678             while (<$fh>) {
679             if ($csv->parse ($_)) {
680             next if $count_lines++ == 0; # headers are ignored
681             my @data = $csv->fields;
682             if (@data) {
683             push (@{ $content->{ $data[$matched_index] } }, \@data);
684             }
685             } else {
686             my $err = $csv->error_input;
687             warn "[WR09] Possible a wrong or not-readable input file '$file': $err\n";
688             exit (1);
689             }
690             }
691              
692             # $parser->add_trigger (after_parse => sub {
693             # my ($self, $data) = @_;
694             # return if $count_lines++ == 0; # headers are ignored
695             # $content->{ $data->[$matched_index] } = [] unless $content->{ $data->[$matched_index] };
696             # push (@{ $content->{ $data->[$matched_index] } }, $data);
697             # });
698             # read CSV input (the result is not used here; everything is done in triggers)
699             # $parser->read_file ($file);
700              
701             return $content;
702             }
703             1;
704              
705              
706              
707             =pod
708              
709             =head1 NAME
710              
711             App::combinesheets - command-line tool merging CSV and TSV spreadsheets
712              
713             =head1 VERSION
714              
715             version 0.2.14
716              
717             =head1 SYNOPSIS
718              
719             combinesheets -h
720             combinesheets -help
721             combinesheets -man
722             combinesheets -version
723              
724             combinesheets -config -inputs [] [-outfile ]
725              
726             where has the form: = [=...]
727             where are: -check
728              
729             =head1 DESCRIPTION
730              
731             B is a command-line tool merging together two or more
732             spreadsheets. The spreadsheets can be COMMA-separated or TAB-separated
733             files, each of them having the first line with column headers. Data in
734             one of the column (it can be a different column in each input
735             spreadsheet) serve to match lines. For example, having two spreadsheets,
736             PERSON and CAR, with the following contents:
737              
738             persons.tsv:
739              
740             Surname First name Sex Age Nickname
741             Novak Jan M 52 Honza
742             Gudernova Jitka F 56
743             Senger Martin M 61 Tulak
744              
745             cars.tsv:
746              
747             Model Year Owned by
748             Praga 1936 Someone else
749             Mini 1968 Gudernova
750             Skoda 2002 Senger
751              
752             we want to merge these spreadsheet by C in persons.tsv and by
753             C in cars.tsv. There are two possible results, depending
754             which spreadsheet is used as the first one (a primary one). If the
755             persons.tsv is the first, the result will be (which columns are
756             included in the result will be described later in this document):
757              
758             combinesheets -cfg config.cfg -in PERSON=persons.tsv CAR=cars.csv
759              
760             First name Surname Model Sex Nickname Age Year Owned by
761             Jitka Gudernova Mini F 56 1968 Gudernova
762             Jan Novak M Honza 52
763             Martin Senger Skoda M Tulak 61 2002 Senger
764              
765             Or, if the cars.tsv is the first, the result will be:
766              
767             combinesheets -cfg config.cfg -in CAR=cars.csv PERSON=persons.tsv
768              
769             First name Surname Model Sex Nickname Age Year Owned by
770             Jitka Gudernova Mini F 56 1968 Gudernova
771             Martin Senger Skoda M Tulak 61 2002 Senger
772             Praga 1936 Someone else
773              
774             Of course, if both input spreadsheets have only the matching lines,
775             both results will be the same (it will not matter which one of them is
776             considered the primary one).
777              
778             The rows in the resulting spreadsheet are sorted by values in the
779             column that was used as a matching column in the primary input.
780              
781             The information which columns should be used to match the input
782             spreadsheets and which columns should appear in the resulting
783             spreadsheet is read from a configuration file (see the C<-config>
784             - or C<-cfg> - argument).
785              
786             The command-line arguments and options can be specified with single or
787             double dash. Most of them can be abbreviated to the nearest non-biased
788             length. They are case-sensitive.
789              
790             =head2 Duplicated values in the matching columns
791              
792             If there are repeated (the same) values in the column that serves as
793             matching criterion then the resulting spreadsheet will have as many
794             output lines (for a particular matching value) as is the number of all
795             combinations of the lines with that matching values in all input
796             spreadsheets. For example, let's have C and C,
797             assuming that a book can have more authors and any author can
798             contribute to any number of books:
799              
800             books.tsv:
801             Title Note Author
802             Book 1 from B1-a Kim
803             Book 2 from B2-b Kim
804             Book 3 from B3-c Katrin
805             Book 1 from B1-d Blanka
806             Book 2 from B2-e Katrin
807              
808             authors.tsv:
809             Age Name
810             28 Kim
811             20 Katrin
812             30 Blanka
813             50 Lazy author
814              
815             The output (again, depending on which input is considered a primary
816             input) will be (a list of included column is defined in the
817             configuration file - see later):
818              
819             combinesheets -cfg books_to_authors.cfg -in BOOK=books.tsv AUTHOR=authors.tsv
820              
821             Name Title Age Note
822             Blanka Book 1 30 from B1-d
823             Katrin Book 3 20 from B3-c
824             Katrin Book 2 20 from B2-e
825             Kim Book 1 28 from B1-a
826             Kim Book 2 28 from B2-b
827              
828             combinesheets -cfg books_to_authors.cfg -in AUTHOR=authors.tsv BOOK=books.tsv
829              
830             Name Title Age Note
831             Blanka Book 1 30 from B1-d
832             Katrin Book 3 20 from B3-c
833             Katrin Book 2 20 from B2-e
834             Kim Book 1 28 from B1-a
835             Kim Book 2 28 from B2-b
836             Lazy author 50
837              
838             =head1 ADVANCED USAGE
839              
840             Additionally to the merging columns from one or more spreadsheets,
841             this script can also add completely new columns to the resulting
842             spreadsheet, the columns that do not exist in any of the input
843             spreadsheet. Such columns are called C.
844              
845             Each C is created either by an external,
846             command-line driven, program, or by a Perl subroutine. In both cases,
847             the user must create (write) such external program or such Perl
848             subroutine. Therefore, this usage is meant more for developers than
849             for the end users.
850              
851             Note that this advanced feature is meant only for new columns, not for
852             new rows. Therefore, it cannot be used, for example, to create rows
853             with totals of columns.
854              
855             =head2 Calculated columns by external programs
856              
857             If specified, an external program is invoked for each row. It can be
858             specified either by a keyword B or by a keyword B - see
859             syntax in the I section. In both cases, the
860             value of the standard output of these programs become the value of the
861             calculated column (a trailing newline of this standard output is
862             removed and other newlines are replaced by spaces).
863              
864             A program defined by the B is called without any arguments
865             (C in I stands for a I). That's why it does not have
866             any knowledge for which row it has been invoked. Its usage is,
867             therefore, for column values that are not dependent on other values
868             from the spreadsheet. For example, for the C shown above,
869             you can add a column C by calling a UNIX program C
870             - again, see an example the I
871             section.
872              
873             A program defined by the B is called with one argument which is
874             a filename. This file contains the current row; each of its lines has
875             two, TAB-separated, fields. The first field is the column name and the
876             second field is the column value. For example, when processing the
877             last row of the C given above, the file will have the
878             following content:
879              
880             Model Skoda
881             Year 2002
882             Owned by Senger
883              
884             The files are only temporary and will be removed when
885             C finishes.
886              
887             =head2 Calculated columns by a Perl subroutine
888              
889             If specified by the keyword B, a Perl subroutine is called for
890             each row with the three arguments:
891              
892             =over
893              
894             =item 1
895              
896             A hashref with information about the current column. Not often used
897             but may be handy if the same subroutine deals with more columns and,
898             therefore, needs to know for which column it was invoked. See the
899             I example in the I section.
900              
901             =item 2
902              
903             An arrayref with all column names.
904              
905             =item 3
906              
907             An arrayref with all column values - in the same order as the column
908             names.
909              
910             =back
911              
912             Actually, depending how the subroutine is defined in the
913             configuration, it may get as the first argument the module/class name
914             where it belongs to. If you define it like this:
915              
916             PERL Module::Example::test
917              
918             the C subroutine is called, indeed, with the three arguments as
919             described above. However, if your definition is rather:
920              
921             PERL Module::Example->test
922              
923             then the C subroutine is considered a Perl method and its first
924             argument is the module/class name. It is up to you to decide how you
925             want/need to write your functions. Again, an example is available in
926             the I section.
927              
928             The return value of the subroutine will become a new value in the
929             calculated column. Do not return undef but rather an empty string if
930             the value cannot be created.
931              
932             What is an advantage of writing my own module/package if I can simply
933             write an external program (perhaps also in Perl) doing exactly the
934             same? The Perl module stays in the memory for the whole time of
935             processing all input rows and, therefore, you can re-use some
936             calculations done for the previous rows. An example about it
937             (C) is given in the I
938             section.
939              
940             =head1 ARGUMENTS and OPTIONS
941              
942             =over 4
943              
944             =item B<-config >
945              
946             A filename with a configuration file. This is a mandatory
947             parameter. The configuration file describes:
948              
949             =over
950              
951             =item *
952              
953             which columns in individual input spreadsheets should be
954             included in the resulting spreadsheet,
955              
956             =item *
957              
958             what names should be given to the resulting columns
959              
960             =item *
961              
962             in which order should be the columns in the resulting
963             spreadsheet
964              
965             =item *
966              
967             which columns should be used to match individual lines,
968              
969             =back
970              
971             The configuration file is a TAB-separated file (with no header
972             line). Empty lines and lines starting with a "#" character are
973             ignored. Each line has two columns, in some cases there is an optional
974             third column. Here is a configuration file used in the example above:
975              
976             # Columns to match records from individual inputs
977             MATCH PERSON=Surname
978             MATCH CAR=Owned by
979             MATCH CHILD=Parent
980              
981             # Columns - how they be in rows
982             PERSON First name
983             PERSON Surname
984             CAR Model
985             PERSON Sex
986             CHILD Name
987             CHILD Born
988             PERSON Nickname
989             PERSON Age
990             CAR Year
991             CAR Owned by
992              
993             The first column is either a reserved word C, or an identifier
994             of an input spreadsheet. There are also few other reserved words - see
995             more about them a bit later.
996              
997             The identifier can be almost anything (and it does not appear in the
998             input spreadsheet itself). It is also used in the command-line
999             argument C<-inputs> where it corresponds to a real file name of the
1000             input. The lines with identifiers define what columns will be in the
1001             result: the second column is the header of the wanted columns and an
1002             optional third column (not used in the example above) is the header
1003             used in the result. The resulting columns will be in the same order as
1004             are these lines in the configuration file.
1005              
1006             The reserved word C is used to define how to match lines in the
1007             input spreadsheets. The format of its second column is:
1008              
1009             =
1010              
1011             There should be one MATCH line for each input spreadsheet. The data in
1012             the column defined by the "column-header" will be used to find the
1013             corresponding lines. In our example, the data in the column I
1014             in the C will be matched with the data in the column
1015             I in the C (the rows having the same values in
1016             these two columns will be merged into one resulting row).
1017              
1018             B
1019              
1020             If you want to add so-called I as described in the
1021             L you need to use few additional reserved words in the
1022             configuration file. These words are B, B and/or
1023             B. They are used in the place where the new calculated column
1024             should be placed. Their lines have the program name or the Perl
1025             subroutine name in the second column, and they have mandatory third
1026             column with the resulting name of the calculated column.
1027              
1028             For example, we wish to add two columns to the input spreadsheet
1029             C. The input file (the same as in the introduction) is:
1030              
1031             Model Year Owned by
1032             Praga 1936 Someone else
1033             Mini 1968 Gudernova
1034             Skoda 2002 Senger
1035              
1036             We wish to add a column I that shows the difference between
1037             the actual year and the value from the I column. We have a
1038             shell script C doing it:
1039              
1040             #!/bin/bash
1041             YEAR=`grep Year $1 | cut -f2`
1042             NOW=`date +%Y`
1043             echo $(($NOW-$YEAR))
1044              
1045             The configuration file C (assuming that we want the other
1046             columns to remain the same) is:
1047              
1048             MATCH CAR=Owned by
1049              
1050             CAR Owned by
1051             CAR Model
1052             CAR Year
1053             PROG age.sh Car age
1054              
1055             When we run:
1056              
1057             combinesheets -config cars.cfg -inputs CAR=cars.tsv
1058              
1059             we get this result:
1060              
1061             Owned by Model Year Car age
1062             Gudernova Mini 1968 44
1063             Senger Skoda 2002 10
1064             Someone else Praga 1936 76
1065              
1066             You can see that there is no need to use C for really
1067             combining I sheets, an input can be just one sheet.
1068              
1069             Another example adds a I column to the same input, a column
1070             named I that gets its value from a UNIX command
1071             C. This program does not get any information which row it has
1072             been invoked for. The configuration file is now (note the new line
1073             with the B):
1074              
1075             MATCH CAR=Owned by
1076              
1077             CAR Owned by
1078             CAR Model
1079             CAR Year
1080             PROG age.sh Car age
1081             PROGS date Last updated
1082              
1083             and the result is now:
1084              
1085             Owned by Model Year Car age Last updated
1086             Gudernova Mini 1968 44 Mon Feb 27 12:32:04 AST 2012
1087             Senger Skoda 2002 10 Mon Feb 27 12:32:04 AST 2012
1088             Someone else Praga 1936 76 Mon Feb 27 12:32:04 AST 2012
1089              
1090             The last possibility is to call a Perl subroutine - using the reserved
1091             word B in the configuration file. Let's have an input
1092             spreadsheet (C) with data about flights:
1093              
1094             Date Flight Airport From Airport To
1095             2009-01-18 AY838 London LHR Helsinki Vantaa
1096             2009-01-22 AY839 Helsinki Vantaa London LHR
1097             2009-03-15 NW2 Manila Tokyo Narita
1098             2009-03-21 NW1 Tokyo Narita Manila
1099             2011-05-06 SV326 Sharm El Sheik Jeddah
1100             2011-07-31 RJ700 Amman Jeddah
1101             2011-09-21 ME369 Jeddah Beirut
1102             2011-09-24 ME368 Beirut Jeddah
1103             2011-12-02 EZY3064 Prague London Stansted
1104             2011-12-09 EZY3067 London Stansted Prague
1105             2012-01-26 MS663 Cairo Jeddah
1106              
1107             We want to add columns with the international airport codes for both
1108             I and I. The new columns will be named
1109             I and I. The Perl subroutine will use a web
1110             service to find the code. The subroutine will use a closure that will
1111             remember already fetched codes so the web service does not need to be
1112             called several times for the same airport name.
1113              
1114             The configuration file C is:
1115              
1116             MATCH FLY=Date
1117              
1118             FLY Date
1119             FLY Flight
1120             FLY Airport From
1121             PERL Airport->find_code Code From
1122             FLY Airport To
1123             PERL Airport->find_code Code To
1124              
1125             The name of the subroutine is attached to the module where it comes
1126             from by either B<::> or B<-E> notation.
1127              
1128             The invocation is:
1129              
1130             combinesheets -config flights.cfg -inputs FLY=flights.tsv
1131              
1132             The full code for the module C, the file C is
1133             here:
1134              
1135             package Airport;
1136             use warnings;
1137             use strict;
1138              
1139             use LWP::Simple;
1140             use JSON;
1141              
1142             # preparing a closure in order not to fetch the same airport code again and again
1143             my $already_found = make_already_found();
1144             sub make_already_found {
1145             my $already_found = {};
1146             return sub {
1147             my ($airport_name, $airport_code) = @_;
1148             if (exists $already_found->{$airport_name}) {
1149             if ($airport_code) {
1150             $already_found->{$airport_name} = $airport_code;
1151             }
1152             return $already_found->{$airport_name};
1153             } else {
1154             $already_found->{$airport_name} = ($airport_code ? $airport_code : 1);
1155             return 0;
1156             }
1157             }
1158             }
1159              
1160             sub find_code {
1161             my ($class, $column, $header_line, $data_line) = @_;
1162              
1163             my $column_with_airport_name = $column->{ocol};
1164             $column_with_airport_name =~ s{Code}{Airport};
1165              
1166             my $airport_name;
1167             for (my $i = 0; $i < @$header_line; $i++) {
1168             if ($header_line->[$i] eq $column_with_airport_name) {
1169             $airport_name = $data_line->[$i];
1170             last;
1171             }
1172             }
1173             return '' unless $airport_name;
1174              
1175             # now we have an airport name...
1176             my $airport_code = $already_found->($airport_name);
1177             return $airport_code if $airport_code;
1178              
1179             #... go and find its airport code
1180             $airport_code = '';
1181             my $escaped_airport_name = $airport_name;
1182             $escaped_airport_name =~ tr{ }{+};
1183             my $url = "http://airportcode.riobard.com/search?q=$escaped_airport_name&fmt=json";
1184             my $content = get ($url);
1185             warn "Cannot get a response for '$url'\n"
1186             unless defined $content;
1187             my $json = JSON->new->allow_nonref;
1188             my $data = $json->decode ($content);
1189             foreach my $code (@$data) {
1190             $airport_code .= $code->{code} . ",";
1191             }
1192             chop ($airport_code) if $airport_code; # removing the trailing comma
1193              
1194             $already_found->($airport_name, $airport_code);
1195             return $airport_code;
1196             }
1197             1;
1198              
1199             When run it creates the following output. Note that some airports have
1200             more than one code because the name was ambiguous. Well, this is just
1201             an example, isn't it?
1202              
1203             Date Flight Airport From Code From Airport To Code To
1204             2009-01-18 AY838 London LHR LHR Helsinki Vantaa HEL
1205             2009-01-22 AY839 Helsinki Vantaa HEL London LHR LHR
1206             2009-03-15 NW2 Manila MXA,MNL Tokyo Narita NRT
1207             2009-03-21 NW1 Tokyo Narita NRT Manila MXA,MNL
1208             2011-05-06 SV326 Sharm El Sheik SSH Jeddah JED
1209             2011-07-31 RJ700 Amman ADJ,AMM Jeddah JED
1210             2011-09-21 ME369 Jeddah JED Beirut BEY
1211             2011-09-24 ME368 Beirut BEY Jeddah JED
1212             2011-12-02 EZY3064 Prague PRG London Stansted STN
1213             2011-12-09 EZY3067 London Stansted STN Prague PRG
1214             2012-01-26 MS663 Cairo CAI,CIR Jeddah JED
1215              
1216             =item B<-inputs = [=...]>
1217              
1218             Each C<-inputs> can have one or more file names, and there can be one
1219             or more C<-inputs> arguments. It defines what are the input
1220             spreadsheets and how they are identified in the configuration file
1221             (see the C<-config> argument). For example, the inputs for our example
1222             above can be specified in any of these ways:
1223              
1224             -inputs PERSON=persons.tsv -inputs CAR=cars.tsv
1225             -inputs PERSON=persons.tsv CAR=cars.tsv
1226             -inputs PERSON=persons.tsv,CAR=cars.tsv
1227              
1228             The first file name is considered to be the C input (see the
1229             description above): the resulting spreadsheet will have the same
1230             number of lines as the primary input.
1231              
1232             The file names ending with the C<.csv> are considered to be in the
1233             COMMA-separated formats, all others are considered to be
1234             TAB-separated.
1235              
1236             This is a mandatory parameter.
1237              
1238             =item B<-outfile >
1239              
1240             An optional parameter specifying a filename of the combined result. By
1241             default, it is created on STDOUT. It is always in the TAB-separated
1242             format.
1243              
1244             =item B<-check>
1245              
1246             This option causes that the configuration file and the input files
1247             (only their header lines will be read) will be checked for errors but
1248             no resulting spreadsheet will be created.
1249              
1250             =item B<-ignorecases>
1251              
1252             Not yet implemented.
1253              
1254             =item B
1255              
1256             =over 8
1257              
1258             =item B<-h>
1259              
1260             Print a brief usage message and exits.
1261              
1262             =item B<-help>
1263              
1264             Print a brief usage message with options and exit.
1265              
1266             =item B<-man>
1267              
1268             Print a full usage message and exit.
1269              
1270             =item B<-version>
1271              
1272             Print the version and exit.
1273              
1274             =back
1275              
1276             =back
1277              
1278             =head1 ENVIRONMENT VARIABLES
1279              
1280             =head3 COMBINE_SHEETS_EXT_PATH
1281              
1282             It contains a path that is used when looking for external programs
1283             (when the reserved words PROG or PROGS are used). For example, the
1284             C directory in the source distribution of this package has an
1285             external program C. The full invocation can be done by:
1286              
1287             COMBINE_SHEETS_EXT_PATH=examples bin/combinesheets -cfg examples/cars.cfg --inputs CAR=examples/cars.csv
1288              
1289             =head1 DEPENDENCIES
1290              
1291             In order to run this tool you need Perl and the following Perl modules
1292             to be installed:
1293              
1294             App::Cmd::Simple
1295             Text::CSV::Simple
1296             Text::CSV_XS
1297             File::BOM
1298             Getopt::Long::Descriptive
1299             Pod::Usage
1300             Algorithm::Loops
1301              
1302             Optionally (if your configuration file uses the reserved word PROG or
1303             PROGS for calculated columns):
1304              
1305             IO::CaptureOutput
1306              
1307             =head1 KNOWN BUGS, MISSING FEATURES
1308              
1309             =over
1310              
1311             =item *
1312              
1313             Columns are identified by their header names. There is no way
1314             to identify them simply by their order (column number).
1315              
1316             =item *
1317              
1318             The input spreadsheet are read first into memory. Which may be
1319             a problem with really huge spreadsheets.
1320              
1321             =item *
1322              
1323             The inputs can be COMMA-separated or TAB-separated. It would
1324             be perhaps nice to allow also the Excel spreadsheets.
1325              
1326             =item *
1327              
1328             Comparing header names and rows is case-sensitive only. There
1329             is a plan to implement the option C<-ignorecases>,
1330              
1331             =back
1332              
1333             Some of these missing features may be implemented later.
1334              
1335             =head1 SUPPORT
1336              
1337             You can find documentation for this module with the perldoc command.
1338              
1339             perldoc App::combinesheets
1340              
1341             You can also look for information at:
1342              
1343             =over 4
1344              
1345             =item * RT: CPAN's request tracker
1346              
1347             L
1348              
1349             =item * AnnoCPAN: Annotated CPAN documentation
1350              
1351             L
1352              
1353             =item * CPAN Ratings
1354              
1355             L
1356              
1357             =item * Search CPAN
1358              
1359             L
1360              
1361             =back
1362              
1363             =head1 AUTHOR
1364              
1365             Martin Senger
1366              
1367             =head1 COPYRIGHT AND LICENSE
1368              
1369             This software is copyright (c) 2013 by Martin Senger, CBRC - KAUST (Computational Biology Research Center - King Abdullah University of Science and Technology) All Rights Reserved..
1370              
1371             This is free software; you can redistribute it and/or modify it under
1372             the same terms as the Perl 5 programming language system itself.
1373              
1374             =cut
1375              
1376              
1377             __END__