File Coverage

blib/lib/App/CSV.pm
Criterion Covered Total %
statement 103 126 81.7
branch 35 66 53.0
condition 6 18 33.3
subroutine 18 19 94.7
pod 0 3 0.0
total 162 232 69.8


line stmt bran cond sub pod time code
1             package App::CSV;
2              
3 2     2   53182 use Moose;
  2         1296479  
  2         20  
4 2     2   23289 use IO::Handle;
  2         16800  
  2         113  
5 2     2   3880 use Text::CSV;
  2         48960  
  2         83  
6              
7             our $VERSION = '0.08';
8              
9 2     2   582 BEGIN {
10             # One day, MooseX::Getopt will allow us to pass pass_through to Getopt::Long.
11             # Until then, do the following ugly thing unconditionally.
12             # (We don't need a BEGIN block here yet. But we will once we start fussing
13             # around with version numbers.)
14 2     2   3154 use Getopt::Long qw(:config pass_through);
  2         30603  
  2         16  
15             }
16              
17             with 'MooseX::Getopt';
18              
19             # Create "hasrw" and "hasro" sugar for less cumbersome attribute declarations.
20             # Why isn't this in Moose?
21             BEGIN {
22             my $mk_has = sub {
23 4         8 my($access) = @_;
24             return sub {
25 66     66   256 my($attr, @args) = @_;
26 66         380 has $attr => (
27             is => $access,
28             metaclass => 'Getopt', # For cmd_aliases
29             @args,
30             );
31 4         13604 };
32 2     2   9 };
33 2     2   23 no strict 'refs';
  2         5  
  2         174  
34 2         8 *hasrw = $mk_has->('rw');
35 2         8 *hasro = $mk_has->('ro');
36             }
37              
38             # Input and output filenames. Significant when we want to DWIM with TSV files.
39             hasrw input => (isa => 'Str', cmd_aliases => 'i');
40             hasrw output => (isa => 'Str', cmd_aliases => 'o');
41              
42             # isa => 'FileHandle' (or IO::String...)
43             hasrw _input_fh => ();
44             hasrw _output_fh => ();
45              
46             # TODO: command line aliases?
47             hasro from_tsv =>
48             (isa => 'Bool', cmd_aliases => 'from-tsv', predicate => 'has_from_tsv');
49             hasro to_tsv =>
50             (isa => 'Bool', cmd_aliases => 'to-tsv', predicate => 'has_to_tsv');
51              
52             hasrw _init => (isa => 'Bool');
53              
54             # Normalized column indexes.
55             hasrw columns => (isa => 'ArrayRef[Int]', cmd_aliases => 'c');
56              
57             # Named fields.
58             hasrw fields => (isa => 'ArrayRef[Str]', cmd_aliases => 'f');
59              
60             # The input and output CSV processors.
61             hasrw _input_csv => ();
62             hasrw _output_csv => ();
63              
64             # Text::CSV options, straight from the manpage.
65             # We override Text::CSV's default here... because it's WRONG.
66             our %TextCSVOptions = (
67             # name => [type, default, alias, @extra_opts]
68             quote_char => ['Str', '"', 'q'],
69             escape_char => ['Str', '"', 'e'],
70             sep_char => ['Str', ',', 's', is => 'rw'],
71             eol => ['Any', ''],
72             always_quote => ['Int', 0],
73             binary => ['Int', 1, 'b'],
74             keep_meta_info => ['Int', 0, 'k'],
75             allow_loose_quotes => ['Int', 0],
76             allow_loose_escapes => ['Int', 0],
77             allow_whitespace => ['Int', 0, 'w'],
78             verbatim => ['Int', 0],
79             );
80              
81             # output CSV processor options default to whatever the input option is.
82             # But you can override it just for output by saying --output_foo instead
83             # of --foo. (Thanks, gphat and t0m.)
84             while (my($attr, $opts) = each %TextCSVOptions) {
85             my($type, $default, $short, @extra_opts) = @$opts;
86             hasro $attr => (
87             isa => $type,
88             default => $default,
89             __aliases($attr, $short),
90             @extra_opts
91             );
92             hasro "output_$attr" => (
93             isa => $type,
94             lazy => 1,
95             default => sub { $_[0]->$attr },
96             __output_aliases($attr),
97             @extra_opts,
98             );
99             }
100              
101             sub __aliases {
102 44     44   106 my($attr, $short) = @_;
103 44         81 my @aliases;
104 44         228 (my $dashes = $attr) =~ s/_/-/g;
105 44 100       219 push @aliases, $dashes if $attr ne $dashes;
106 44 100       144 push @aliases, $short if $short;
107 44 100       303 return @aliases ? (cmd_aliases => \@aliases) : ();
108             }
109              
110             sub __output_aliases {
111 22     22   146 return __aliases("output_" . shift);
112             }
113              
114             sub __normalize_column {
115 10     10   19 my($in) = @_;
116 10 100       153 return ($in <= 0) ? $in : $in - 1;
117             }
118              
119             sub _get_header_map {
120 3     3   6 my ($self) = @_;
121              
122 3         15 my $header_line = $self->_peek_line;
123 3         1150 my %header_map;
124 3         8 my $field_number = 0;
125 3         8 for my $field (@$header_line) {
126 9         31 $header_map{$field} = ++$field_number;
127             }
128              
129 3         10 return \%header_map;
130             }
131              
132             sub _fields_to_columns {
133 4     4   8 my ($self, $fields) = @_;
134              
135 4 100       13 my @all_fields = map { /^(\d+)-(\d+)$/ ? $1 .. $2 : $_ } map { split "," } @$fields;
  9         49  
  5         25  
136 4         11 my @named_fields = grep { /\D/ } @all_fields;
  10         37  
137              
138 4         7 my @normalized_fields;
139             # If there is at least one named field, we need to read the header line from input and translate
140             # into column number.
141 4 100       12 if (@named_fields) {
142 3         13 my $header_map = $self->_get_header_map;
143 3 100       8 if (my @missing_fields = grep { not defined $header_map->{$_} } @named_fields) {
  4         19  
144 1         15 die "The following named fields aren't in the input header: ",
145             join(", ", @missing_fields), "\n";
146             }
147 2 100       6 @normalized_fields = map { __normalize_column(/^\d+/ ? $_ : $header_map->{$_} ) } @all_fields;
  4         22  
148             }
149             else {
150 1         3 @normalized_fields = map { __normalize_column($_) } @all_fields;
  3         6  
151             }
152              
153 3         13 return \@normalized_fields;
154             }
155              
156             # TODO: You know, I end up with something like this on a lot of projects.
157             # Why isn't this easier? Having to remember to "use IO::Handle" is sad, too.
158             sub _setup_fh {
159 12     12   29 my($self, $name) = @_;
160 12         39 my $fh_name = "_${name}_fh";
161 12 50       476 return if $self->$fh_name; # someone had already injected a fh.
162              
163             # ARGH. You can't open $fh, ">-", but you can't open $fh, "", "-" !?
164 0 0       0 my $dir2 = $name eq 'input' ? '' : '>';
165 0 0       0 my $dir3 = $name eq 'input' ? '<' : '>';
166              
167 0         0 my $fh;
168 0 0       0 $self->$name('-') if not defined $self->$name;
169 0 0       0 if ($self->$name eq '-') { # use stdio
170 0 0       0 open $fh, "$dir2-" or die "open: stdio: $!";
171             } else {
172 0 0       0 open $fh, $dir3, $self->$name or die "open: $self->$name: $!";
173             }
174 0         0 $self->$fh_name($fh);
175             }
176              
177             sub init {
178 11     11 0 438029 my($self) = @_;
179 11 100       436 return if $self->_init;
180 6         204 $self->_init(1);
181              
182             # TODO: zero-based field numbers as an option? nah?
183 6 50       220 my @columns = (($self->columns ? @{$self->columns} : ()), @{$self->extra_argv});
  0         0  
  6         240  
184 6 100       70 $self->columns([map { __normalize_column($_) } @columns]) if @columns;
  3         10  
185              
186 6         34 $self->_setup_fh($_) for qw(input output);
187              
188             # DWIMmy TSV
189 6 50 33     214 if ($self->from_tsv ||
      33        
      33        
190             (!$self->has_from_tsv && $self->input && $self->input =~ /\.tsv$/)) {
191 0         0 $self->sep_char("\t");
192             }
193 6 50 33     241 if ($self->to_tsv ||
      33        
      33        
194             (!$self->has_to_tsv && $self->output && $self->output =~ /\.tsv$/)) {
195 0         0 $self->output_sep_char("\t");
196             }
197              
198 66         2438 $self->_input_csv(Text::CSV->new({
199 6         43 map { $_ => $self->$_ } keys %TextCSVOptions }));
200 66         138 $self->_output_csv(Text::CSV->new({
201 6         54 map { my $o = "output_$_"; $_ => $self->$o } keys %TextCSVOptions }));
  66         2588  
202              
203             # If columns aren't specified, look for the --fields option. It allows
204             # use of named fields, list of comma-separated fields list, field
205             # ranges and so on.
206              
207 6 100       53 if (@columns) {
208 2 50       85 warn "--fields (-f) option is ignored since columns are also specified." if $self->fields;
209             } else {
210 4 50       138 my @fields = $self->fields ? @{$self->fields} : ();
  4         164  
211 4 50       18 if (@fields) {
212 4         18 my $columns = $self->_fields_to_columns(\@fields);
213 3         109 $self->columns($columns);
214             }
215             }
216             }
217              
218             {
219             my $line_read;
220              
221             # Read a line from input, but push it back for later reading.
222             sub _peek_line {
223 3     3   6 my ($self) = @_;
224              
225 3         102 $line_read = $self->_input_csv->getline($self->_input_fh);
226             }
227              
228             # If there is a line already read, return it; otherwise, read from input.
229             sub _get_line {
230 23     23   377 my ($self) = @_;
231              
232 23         32 my $line;
233 23 100       50 if ($line_read) {
234 2         6 $line = $line_read;
235 2         5 undef $line_read;
236 2         12 return $line;
237             } else {
238 21         776 return $self->_input_csv->getline($self->_input_fh);
239             }
240             }
241             }
242              
243             sub run {
244 5     5 0 16 my($self) = @_;
245 5         16 $self->init;
246              
247             # L<perlsyn/"modifiers don't take loop labels">
248 5         12 INPUT: { do {
  5         9  
249 5         8 my $data;
250 5         21 while (defined($data = $self->_get_line)) {
251 18 50       6031 if ($self->columns) {
252 18         30 @$data = @$data[@{ $self->columns }];
  18         623  
253             }
254            
255 18 50       655 if (!$self->_output_csv->print($self->_output_fh, $data)) {
256 0         0 warn $self->format_error("Warning - Output error", diag => [$self->_input_csv->error_diag]), "\n";
257 0         0 next INPUT;
258             }
259 18         2849 $self->_output_fh->print("\n");
260             }
261              
262             # Keeps us going on input errors.
263             # TODO: strict errors, according to command line, blah
264 5 50       337 if (not defined $data) {
265 5 50       176 last INPUT if $self->_input_csv->eof;
266 0           warn $self->format_error("Warning - Input error", line => $., diag => [$self->_input_csv->error_diag]), "\n";
267             }
268             } }
269             }
270              
271             sub format_error {
272 0     0 0   my $self = shift;
273 0           my $msg = shift;
274 0           my %args = @_;
275              
276 0 0         $msg .= ", line $args{line}"
277             if defined $args{line};
278              
279 0 0         if ($args{diag}) {
280 0 0         my ($code, $err, $pos, $record) = @{ $args{diag} || [] };
  0            
281 0 0         $msg .= ": " . join " - ",
282             $code, $err, "position $pos",
283             (defined $record ? "record $record" : ());
284             }
285 0           return $msg;
286             }
287              
288             1;
289              
290             __END__
291             =head1 NAME
292              
293             App::CSV - process CSV files
294              
295             =head1 REDIRECTION
296              
297             Please see L<csv>.
298              
299             =head1 COPYRIGHT (The "MIT" License)
300              
301             Copyright 2013 Gaal Yahas.
302              
303             Permission is hereby granted, free of charge, to any person obtaining a
304             copy of this software and associated documentation files (the "Software"),
305             to deal in the Software without restriction, including without limitation
306             the rights to use, copy, modify, merge, publish, distribute, sublicense,
307             and/or sell copies of the Software, and to permit persons to whom the
308             Software is furnished to do so, subject to the following conditions:
309              
310             The above copyright notice and this permission notice shall be included
311             in all copies or substantial portions of the Software.
312              
313             THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
314             IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
315             FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
316             THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
317             OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
318             ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
319             OTHER DEALINGS IN THE SOFTWARE.
320              
321             =cut