line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package App::CSVUtils; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
113841
|
use 5.010001; |
|
1
|
|
|
|
|
15
|
|
4
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
19
|
|
5
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
22
|
|
6
|
1
|
|
|
1
|
|
2261
|
use Log::ger; |
|
1
|
|
|
|
|
82
|
|
|
1
|
|
|
|
|
5
|
|
7
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
262
|
use Cwd; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
63
|
|
9
|
1
|
|
|
1
|
|
6
|
use Exporter qw(import); |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
621
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY |
12
|
|
|
|
|
|
|
our $DATE = '2023-07-25'; # DATE |
13
|
|
|
|
|
|
|
our $DIST = 'App-CSVUtils'; # DIST |
14
|
|
|
|
|
|
|
our $VERSION = '1.030'; # VERSION |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our @EXPORT_OK = qw( |
17
|
|
|
|
|
|
|
gen_csv_util |
18
|
|
|
|
|
|
|
compile_eval_code |
19
|
|
|
|
|
|
|
eval_code |
20
|
|
|
|
|
|
|
); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
our %SPEC; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
our $sch_req_str_or_code = ['any*', of=>['str*', 'code*']]; |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub _open_file_read { |
27
|
138
|
|
|
138
|
|
196
|
my $filename = shift; |
28
|
|
|
|
|
|
|
|
29
|
138
|
|
|
|
|
240
|
my ($fh, $err); |
30
|
138
|
50
|
|
|
|
288
|
if ($filename eq '-') { |
31
|
0
|
|
|
|
|
0
|
$fh = *STDIN; |
32
|
|
|
|
|
|
|
} else { |
33
|
138
|
50
|
|
|
|
5860
|
open $fh, "<", $filename or do { |
34
|
0
|
|
|
|
|
0
|
$err = [500, "Can't open input filename '$filename': $!"]; |
35
|
0
|
|
|
|
|
0
|
goto RETURN; |
36
|
|
|
|
|
|
|
}; |
37
|
|
|
|
|
|
|
} |
38
|
138
|
|
|
|
|
1944
|
binmode $fh, ":encoding(utf8)"; |
39
|
|
|
|
|
|
|
|
40
|
138
|
|
|
|
|
5874
|
RETURN: |
41
|
|
|
|
|
|
|
($fh, $err); |
42
|
|
|
|
|
|
|
} |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
sub _open_file_write { |
45
|
2
|
|
|
2
|
|
6
|
my $filename = shift; |
46
|
|
|
|
|
|
|
|
47
|
2
|
|
|
|
|
5
|
my ($fh, $err); |
48
|
2
|
50
|
|
|
|
7
|
if ($filename eq '-') { |
49
|
0
|
|
|
|
|
0
|
$fh = *STDOUT; |
50
|
|
|
|
|
|
|
} else { |
51
|
2
|
50
|
|
|
|
137
|
open $fh, ">", $filename or do { |
52
|
0
|
|
|
|
|
0
|
$err = [500, "Can't open output filename '$filename': $!"]; |
53
|
0
|
|
|
|
|
0
|
goto RETURN; |
54
|
|
|
|
|
|
|
}; |
55
|
|
|
|
|
|
|
} |
56
|
2
|
|
|
|
|
31
|
binmode $fh, ":encoding(utf8)"; |
57
|
|
|
|
|
|
|
|
58
|
2
|
|
|
|
|
84
|
RETURN: |
59
|
|
|
|
|
|
|
($fh, $err); |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub _return_or_write_file { |
63
|
0
|
|
|
0
|
|
0
|
my ($res, $filename, $overwrite) = @_; |
64
|
0
|
0
|
|
|
|
0
|
return $res if !defined($filename); |
65
|
|
|
|
|
|
|
|
66
|
0
|
|
|
|
|
0
|
my $fh; |
67
|
0
|
0
|
|
|
|
0
|
if ($filename eq '-') { |
68
|
0
|
|
|
|
|
0
|
$fh = \*STDOUT; |
69
|
|
|
|
|
|
|
} else { |
70
|
0
|
0
|
|
|
|
0
|
if (-f $filename) { |
71
|
0
|
0
|
|
|
|
0
|
if ($overwrite) { |
72
|
0
|
|
|
|
|
0
|
log_info "[csvutil] Overwriting output file $filename"; |
73
|
|
|
|
|
|
|
} else { |
74
|
0
|
|
|
|
|
0
|
return [412, "Refusing to ovewrite existing output file '$filename', please select another path or specify --overwrite"]; |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
} |
77
|
0
|
0
|
|
|
|
0
|
open my $fh, ">", $filename or do { |
78
|
0
|
|
|
|
|
0
|
return [500, "Can't open output file '$filename': $!"]; |
79
|
|
|
|
|
|
|
}; |
80
|
0
|
|
|
|
|
0
|
binmode $fh, ":encoding(utf8)"; |
81
|
0
|
|
|
|
|
0
|
print $fh $res->[2]; |
82
|
0
|
0
|
|
|
|
0
|
close $fh or warn "Can't write to '$filename': $!"; |
83
|
0
|
|
|
|
|
0
|
return [$res->[0], $res->[1]]; |
84
|
|
|
|
|
|
|
} |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
sub compile_eval_code { |
88
|
45
|
100
|
|
45
|
1
|
148
|
return $_[0] if ref $_[0] eq 'CODE'; |
89
|
43
|
|
|
|
|
117
|
my ($str, $label) = @_; |
90
|
43
|
50
|
33
|
|
|
184
|
defined($str) && length($str) or die [400, "Please specify code ($label)"]; |
91
|
43
|
|
|
|
|
104
|
$str = "package main; no strict; no warnings; sub { $str }"; |
92
|
43
|
|
|
|
|
139
|
log_trace "[csvutil] Compiling Perl code: $str"; |
93
|
1
|
|
|
1
|
|
7
|
my $code = eval $str; ## no critic: BuiltinFunctions::ProhibitStringyEval |
|
1
|
|
|
1
|
|
2
|
|
|
1
|
|
|
1
|
|
26
|
|
|
1
|
|
|
1
|
|
5
|
|
|
1
|
|
|
1
|
|
12
|
|
|
1
|
|
|
1
|
|
62
|
|
|
1
|
|
|
1
|
|
7
|
|
|
1
|
|
|
1
|
|
2
|
|
|
1
|
|
|
1
|
|
26
|
|
|
1
|
|
|
1
|
|
5
|
|
|
1
|
|
|
1
|
|
2
|
|
|
1
|
|
|
1
|
|
57
|
|
|
1
|
|
|
1
|
|
7
|
|
|
1
|
|
|
1
|
|
3
|
|
|
1
|
|
|
1
|
|
47
|
|
|
1
|
|
|
1
|
|
6
|
|
|
1
|
|
|
1
|
|
2
|
|
|
1
|
|
|
1
|
|
45
|
|
|
1
|
|
|
1
|
|
6
|
|
|
1
|
|
|
1
|
|
2
|
|
|
1
|
|
|
1
|
|
38
|
|
|
1
|
|
|
1
|
|
7
|
|
|
1
|
|
|
1
|
|
2
|
|
|
1
|
|
|
1
|
|
42
|
|
|
1
|
|
|
1
|
|
7
|
|
|
1
|
|
|
1
|
|
2
|
|
|
1
|
|
|
1
|
|
37
|
|
|
1
|
|
|
1
|
|
7
|
|
|
1
|
|
|
1
|
|
2
|
|
|
1
|
|
|
1
|
|
54
|
|
|
1
|
|
|
1
|
|
23
|
|
|
1
|
|
|
1
|
|
4
|
|
|
1
|
|
|
1
|
|
30
|
|
|
1
|
|
|
1
|
|
11
|
|
|
1
|
|
|
1
|
|
15
|
|
|
1
|
|
|
1
|
|
70
|
|
|
1
|
|
|
1
|
|
7
|
|
|
1
|
|
|
1
|
|
3
|
|
|
1
|
|
|
1
|
|
37
|
|
|
1
|
|
|
1
|
|
6
|
|
|
1
|
|
|
1
|
|
5
|
|
|
1
|
|
|
1
|
|
59
|
|
|
1
|
|
|
1
|
|
7
|
|
|
1
|
|
|
1
|
|
3
|
|
|
1
|
|
|
1
|
|
50
|
|
|
1
|
|
|
1
|
|
9
|
|
|
1
|
|
|
1
|
|
2
|
|
|
1
|
|
|
1
|
|
55
|
|
|
1
|
|
|
1
|
|
6
|
|
|
1
|
|
|
1
|
|
4
|
|
|
1
|
|
|
1
|
|
29
|
|
|
1
|
|
|
1
|
|
5
|
|
|
1
|
|
|
1
|
|
2
|
|
|
1
|
|
|
1
|
|
59
|
|
|
1
|
|
|
1
|
|
7
|
|
|
1
|
|
|
1
|
|
2
|
|
|
1
|
|
|
1
|
|
29
|
|
|
1
|
|
|
1
|
|
4
|
|
|
1
|
|
|
1
|
|
2
|
|
|
1
|
|
|
1
|
|
67
|
|
|
1
|
|
|
1
|
|
7
|
|
|
1
|
|
|
1
|
|
2
|
|
|
1
|
|
|
1
|
|
25
|
|
|
1
|
|
|
1
|
|
5
|
|
|
1
|
|
|
1
|
|
2
|
|
|
1
|
|
|
1
|
|
55
|
|
|
1
|
|
|
1
|
|
6
|
|
|
1
|
|
|
1
|
|
2
|
|
|
1
|
|
|
1
|
|
24
|
|
|
1
|
|
|
1
|
|
5
|
|
|
1
|
|
|
1
|
|
2
|
|
|
1
|
|
|
1
|
|
43
|
|
|
1
|
|
|
1
|
|
6
|
|
|
1
|
|
|
1
|
|
3
|
|
|
1
|
|
|
1
|
|
23
|
|
|
1
|
|
|
1
|
|
5
|
|
|
1
|
|
|
1
|
|
1
|
|
|
1
|
|
|
1
|
|
45
|
|
|
1
|
|
|
1
|
|
6
|
|
|
1
|
|
|
1
|
|
2
|
|
|
1
|
|
|
1
|
|
23
|
|
|
1
|
|
|
1
|
|
5
|
|
|
1
|
|
|
1
|
|
1
|
|
|
1
|
|
|
1
|
|
82
|
|
|
1
|
|
|
1
|
|
9
|
|
|
1
|
|
|
1
|
|
108
|
|
|
1
|
|
|
|
|
39
|
|
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
140
|
|
|
1
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
23
|
|
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
64
|
|
|
1
|
|
|
|
|
18
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
49
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
77
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
40
|
|
|
1
|
|
|
|
|
10
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
116
|
|
|
1
|
|
|
|
|
12
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
48
|
|
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
51
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
25
|
|
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
100
|
|
|
1
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
42
|
|
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
61
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
67
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
71
|
|
|
1
|
|
|
|
|
9
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
25
|
|
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
57
|
|
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
36
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
40
|
|
|
1
|
|
|
|
|
10
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
24
|
|
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
56
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
40
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
40
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
35
|
|
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
54
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
38
|
|
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
58
|
|
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
27
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
49
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
24
|
|
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
52
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
29
|
|
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
57
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
36
|
|
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
9
|
|
|
1
|
|
|
|
|
69
|
|
|
1
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
23
|
|
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
57
|
|
|
1
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
25
|
|
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
61
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
31
|
|
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
66
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
70
|
|
|
1
|
|
|
|
|
10
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
49
|
|
|
1
|
|
|
|
|
12
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
24
|
|
|
1
|
|
|
|
|
14
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
60
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
71
|
|
|
1
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
51
|
|
|
1
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
27
|
|
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
80
|
|
|
1
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
25
|
|
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
86
|
|
|
1
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
38
|
|
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
85
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
37
|
|
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
84
|
|
|
1
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
36
|
|
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
80
|
|
|
43
|
|
|
|
|
3558
|
|
94
|
43
|
100
|
|
|
|
191
|
die [400, "Can't compile code ($label) '$str': $@"] if $@; |
95
|
40
|
|
|
|
|
141
|
$code; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
sub eval_code { |
99
|
1
|
|
|
1
|
|
8
|
no warnings 'once'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
7703
|
|
100
|
60
|
|
|
60
|
1
|
145
|
my ($code, $r, $value_for_topic, $return_topic) = @_; |
101
|
60
|
|
|
|
|
124
|
local $_ = $value_for_topic; |
102
|
60
|
|
|
|
|
81
|
local $main::r = $r; |
103
|
60
|
|
|
|
|
126
|
local $main::row = $r->{input_row}; |
104
|
60
|
|
|
|
|
84
|
local $main::rownum = $r->{input_rownum}; |
105
|
60
|
|
|
|
|
100
|
local $main::data_rownum = $r->{input_data_rownum}; |
106
|
60
|
|
|
|
|
91
|
local $main::csv = $r->{input_parser}; |
107
|
60
|
|
|
|
|
77
|
local $main::fields_idx = $r->{input_fields_idx}; |
108
|
60
|
100
|
|
|
|
107
|
if ($return_topic) { |
109
|
9
|
|
|
|
|
181
|
$code->($_); |
110
|
9
|
|
|
|
|
27
|
$_; |
111
|
|
|
|
|
|
|
} else { |
112
|
51
|
|
|
|
|
1065
|
$code->($_); |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub _get_field_idx { |
117
|
0
|
|
|
0
|
|
0
|
my ($field, $field_idxs) = @_; |
118
|
0
|
0
|
0
|
|
|
0
|
defined($field) && length($field) or die "Please specify at least a field\n"; |
119
|
0
|
|
|
|
|
0
|
my $idx = $field_idxs->{$field}; |
120
|
|
|
|
|
|
|
die "Unknown field '$field' (known fields include: ". |
121
|
0
|
0
|
|
|
|
0
|
join(", ", map { "'$_'" } sort {$field_idxs->{$a} <=> $field_idxs->{$b}} |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
122
|
|
|
|
|
|
|
keys %$field_idxs).")\n" unless defined $idx; |
123
|
0
|
|
|
|
|
0
|
$idx; |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
sub _get_csv_row { |
127
|
0
|
|
|
0
|
|
0
|
my ($csv, $row, $i, $outputs_header) = @_; |
128
|
|
|
|
|
|
|
#use DD; print " "; dd $row; |
129
|
0
|
0
|
0
|
|
|
0
|
return "" if $i == 1 && !$outputs_header; |
130
|
0
|
0
|
|
|
|
0
|
my $status = $csv->combine(@$row) |
131
|
|
|
|
|
|
|
or die "Error in line $i: ".$csv->error_input."\n"; |
132
|
0
|
|
|
|
|
0
|
$csv->string . "\n"; |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
sub _instantiate_parser_default { |
136
|
0
|
|
|
0
|
|
0
|
require Text::CSV_XS; |
137
|
|
|
|
|
|
|
|
138
|
0
|
|
|
|
|
0
|
Text::CSV_XS->new({binary=>1}); |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
sub _instantiate_parser { |
142
|
213
|
|
|
213
|
|
2082
|
require Text::CSV_XS; |
143
|
|
|
|
|
|
|
|
144
|
213
|
|
|
|
|
13748
|
my ($args, $prefix) = @_; |
145
|
213
|
|
50
|
|
|
438
|
$prefix //= ''; |
146
|
|
|
|
|
|
|
|
147
|
213
|
|
|
|
|
508
|
my %tcsv_opts = (binary=>1); |
148
|
213
|
100
|
66
|
|
|
1619
|
if (defined $args->{"${prefix}sep_char"} || |
|
|
100
|
66
|
|
|
|
|
149
|
|
|
|
|
|
|
defined $args->{"${prefix}quote_char"} || |
150
|
|
|
|
|
|
|
defined $args->{"${prefix}escape_char"}) { |
151
|
1
|
50
|
|
|
|
7
|
$tcsv_opts{"sep_char"} = $args->{"${prefix}sep_char"} if defined $args->{"${prefix}sep_char"}; |
152
|
1
|
50
|
|
|
|
4
|
$tcsv_opts{"quote_char"} = $args->{"${prefix}quote_char"} if defined $args->{"${prefix}quote_char"}; |
153
|
1
|
50
|
|
|
|
5
|
$tcsv_opts{"escape_char"} = $args->{"${prefix}escape_char"} if defined $args->{"${prefix}escape_char"}; |
154
|
|
|
|
|
|
|
} elsif ($args->{"${prefix}tsv"}) { |
155
|
1
|
|
|
|
|
3
|
$tcsv_opts{"sep_char"} = "\t"; |
156
|
1
|
|
|
|
|
3
|
$tcsv_opts{"quote_char"} = undef; |
157
|
1
|
|
|
|
|
2
|
$tcsv_opts{"escape_char"} = undef; |
158
|
|
|
|
|
|
|
} |
159
|
213
|
100
|
|
|
|
491
|
$tcsv_opts{always_quote} = 1 if $args->{"${prefix}always_quote"}; |
160
|
213
|
50
|
|
|
|
427
|
$tcsv_opts{quote_empty} = 1 if $args->{"${prefix}quote_empty"}; |
161
|
|
|
|
|
|
|
|
162
|
213
|
|
|
|
|
895
|
Text::CSV_XS->new(\%tcsv_opts); |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub _instantiate_emitter { |
166
|
94
|
|
|
94
|
|
154
|
my $args = shift; |
167
|
94
|
|
|
|
|
203
|
_instantiate_parser($args, 'output_'); |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
sub _complete_field_or_field_list { |
171
|
|
|
|
|
|
|
# return list of known fields of a CSV |
172
|
|
|
|
|
|
|
|
173
|
0
|
|
|
0
|
|
0
|
my $which = shift; |
174
|
|
|
|
|
|
|
|
175
|
0
|
|
|
|
|
0
|
my %args = @_; |
176
|
0
|
|
0
|
|
|
0
|
my $word = $args{word} // ''; |
177
|
0
|
|
|
|
|
0
|
my $cmdline = $args{cmdline}; |
178
|
0
|
|
|
|
|
0
|
my $r = $args{r}; |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
# we are not called from cmdline, bail |
181
|
0
|
0
|
|
|
|
0
|
return undef unless $cmdline; ## no critic: Subroutines::ProhibitExplicitReturnUndef |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
# let's parse argv first |
184
|
0
|
|
|
|
|
0
|
my $args; |
185
|
|
|
|
|
|
|
{ |
186
|
|
|
|
|
|
|
# this is not activated yet |
187
|
0
|
|
|
|
|
0
|
$r->{read_config} = 1; |
|
0
|
|
|
|
|
0
|
|
188
|
|
|
|
|
|
|
|
189
|
0
|
|
|
|
|
0
|
my $res = $cmdline->parse_argv($r); |
190
|
|
|
|
|
|
|
#return undef unless $res->[0] == 200; |
191
|
|
|
|
|
|
|
|
192
|
0
|
0
|
|
|
|
0
|
$cmdline->_read_config($r) unless $r->{config}; |
193
|
0
|
|
|
|
|
0
|
$args = $res->[2]; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
# user hasn't specified -f, bail |
197
|
0
|
0
|
0
|
|
|
0
|
return {message=>"Please specify input filename first"} unless defined $args && $args->{input_filename}; |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
# user wants to read CSV from stdin, bail |
200
|
0
|
0
|
|
|
|
0
|
return {message=>"Can't get field list when input is stdin"} if $args->{input_filename} eq '-'; |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# can the file be opened? |
203
|
0
|
|
|
|
|
0
|
my $csv_parser = _instantiate_parser(\%args, 'input_'); |
204
|
0
|
0
|
|
|
|
0
|
open my($fh), "<encoding(utf8)", $args->{input_filename} or do { |
205
|
|
|
|
|
|
|
#warn "csvutils: Cannot open file '$args->{input_filename}': $!\n"; |
206
|
0
|
|
|
|
|
0
|
return []; |
207
|
|
|
|
|
|
|
}; |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
# can the header row be read? |
210
|
0
|
0
|
|
|
|
0
|
my $row = $csv_parser->getline($fh) or return []; |
211
|
|
|
|
|
|
|
|
212
|
0
|
0
|
0
|
|
|
0
|
if (defined $args->{input_header} && !$args->{input_header}) { |
213
|
0
|
|
|
|
|
0
|
$row = [map {"field$_"} 1 .. @$row]; |
|
0
|
|
|
|
|
0
|
|
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
|
216
|
0
|
0
|
|
|
|
0
|
if ($which =~ /sort/) { |
217
|
0
|
|
|
|
|
0
|
$row = [map {($_,"-$_","+$_","~$_")} @$row]; |
|
0
|
|
|
|
|
0
|
|
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
|
220
|
0
|
|
|
|
|
0
|
require Complete::Util; |
221
|
0
|
0
|
|
|
|
0
|
if ($which =~ /field_list/) { |
222
|
0
|
|
|
|
|
0
|
return Complete::Util::complete_comma_sep( |
223
|
|
|
|
|
|
|
word => $word, |
224
|
|
|
|
|
|
|
elems => $row, |
225
|
|
|
|
|
|
|
uniq => 1, |
226
|
|
|
|
|
|
|
); |
227
|
|
|
|
|
|
|
} else { |
228
|
0
|
|
|
|
|
0
|
return Complete::Util::complete_array_elem( |
229
|
|
|
|
|
|
|
word => $word, |
230
|
|
|
|
|
|
|
array => $row, |
231
|
|
|
|
|
|
|
); |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
sub _complete_field { |
236
|
0
|
|
|
0
|
|
0
|
_complete_field_or_field_list('field', @_); |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
sub _complete_field_list { |
240
|
0
|
|
|
0
|
|
0
|
_complete_field_or_field_list('field_list', @_); |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
sub _complete_sort_field_list { |
244
|
0
|
|
|
0
|
|
0
|
_complete_field_or_field_list('sort_field_list', @_); |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
sub _complete_sort_field { |
248
|
0
|
|
|
0
|
|
0
|
_complete_field_or_field_list('sort_field', @_); |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
sub _array2hash { |
252
|
0
|
|
|
0
|
|
0
|
my ($row, $fields) = @_; |
253
|
0
|
|
|
|
|
0
|
my $rowhash = {}; |
254
|
0
|
|
|
|
|
0
|
for my $i (0..$#{$fields}) { |
|
0
|
|
|
|
|
0
|
|
255
|
0
|
|
|
|
|
0
|
$rowhash->{ $fields->[$i] } = $row->[$i]; |
256
|
|
|
|
|
|
|
} |
257
|
0
|
|
|
|
|
0
|
$rowhash; |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
# check that the first N values of a field are all defined and numeric. if there |
261
|
|
|
|
|
|
|
# are now rows or less than N values, return true. |
262
|
|
|
|
|
|
|
sub _is_numeric_field { |
263
|
0
|
|
|
0
|
|
0
|
require Scalar::Util::Numeric; |
264
|
|
|
|
|
|
|
|
265
|
0
|
|
|
|
|
0
|
my ($rows, $field_idx, $num_samples) = @_; |
266
|
0
|
|
0
|
|
|
0
|
$num_samples //= 5; |
267
|
|
|
|
|
|
|
|
268
|
0
|
|
|
|
|
0
|
my $is_numeric = 1; |
269
|
0
|
|
|
|
|
0
|
for my $row (@$rows) { |
270
|
0
|
|
|
|
|
0
|
my $val = $row->[$field_idx]; |
271
|
0
|
0
|
|
|
|
0
|
return 0 unless defined $val; |
272
|
0
|
0
|
|
|
|
0
|
return 0 unless Scalar::Util::Numeric::isnum($val); |
273
|
|
|
|
|
|
|
} |
274
|
0
|
|
|
|
|
0
|
$is_numeric; |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
# find a single field by name or index (1-based), return index (0-based). die |
278
|
|
|
|
|
|
|
# when requested field does not exist. |
279
|
|
|
|
|
|
|
sub _find_field { |
280
|
17
|
|
|
17
|
|
42
|
my ($fields, $name_or_idx) = @_; |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
# search by name first |
283
|
17
|
|
|
|
|
23
|
for my $i (0 .. $#{$fields}) { |
|
17
|
|
|
|
|
54
|
|
284
|
29
|
|
|
|
|
47
|
my $field = $fields->[$i]; |
285
|
29
|
100
|
|
|
|
100
|
return $i if $field eq $name_or_idx; |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
|
288
|
2
|
50
|
|
|
|
32
|
if ($name_or_idx eq '0') { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
289
|
0
|
|
|
|
|
0
|
die [400, "Field index 0 is requested, you probably meant 1 for the first field?"]; |
290
|
|
|
|
|
|
|
} elsif ($name_or_idx =~ /\A[1-9][0-9]*\z/) { |
291
|
1
|
50
|
|
|
|
7
|
if ($name_or_idx > @$fields) { |
292
|
0
|
|
|
|
|
0
|
die [400, "There are only ".scalar(@$fields)." field(s) but field index $name_or_idx is requested"]; |
293
|
|
|
|
|
|
|
} else { |
294
|
1
|
|
|
|
|
4
|
return $name_or_idx-1; |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
} elsif ($name_or_idx =~ /\A-[1-9][0-9]*\z/) { |
297
|
0
|
0
|
|
|
|
0
|
if (-$name_or_idx > @$fields) { |
298
|
0
|
|
|
|
|
0
|
die [400, "There are only ".scalar(@$fields)." field(s) but field index $name_or_idx is requested"]; |
299
|
|
|
|
|
|
|
} else { |
300
|
0
|
|
|
|
|
0
|
return @$fields + $name_or_idx; |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
# not found |
305
|
|
|
|
|
|
|
die [404, "Unknown field name/index '$name_or_idx' (known fields include: ". |
306
|
1
|
|
|
|
|
7
|
join(", ", map { "'$_'" } @$fields).")"]; |
|
3
|
|
|
|
|
17
|
|
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
# select one or more fields with options like --include-field, etc |
310
|
|
|
|
|
|
|
sub _select_fields { |
311
|
12
|
|
|
12
|
|
35
|
my ($fields, $field_idxs, $args, $default_select_choice) = @_; |
312
|
|
|
|
|
|
|
|
313
|
12
|
|
|
|
|
18
|
my @selected_fields; |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
my $select_field_options_used; |
316
|
|
|
|
|
|
|
|
317
|
12
|
100
|
|
|
|
33
|
if (defined $args->{include_field_pat}) { |
318
|
4
|
|
|
|
|
7
|
$select_field_options_used++; |
319
|
4
|
|
|
|
|
7
|
for my $field (@$fields) { |
320
|
10
|
50
|
|
|
|
59
|
if ($field =~ $args->{include_field_pat}) { |
321
|
10
|
|
|
|
|
25
|
push @selected_fields, $field; |
322
|
|
|
|
|
|
|
} |
323
|
|
|
|
|
|
|
} |
324
|
|
|
|
|
|
|
} |
325
|
12
|
100
|
|
|
|
26
|
if (defined $args->{exclude_field_pat}) { |
326
|
1
|
|
|
|
|
5
|
$select_field_options_used++; |
327
|
1
|
|
|
|
|
3
|
@selected_fields = grep { $_ !~ $args->{exclude_field_pat} } |
|
3
|
|
|
|
|
13
|
|
328
|
|
|
|
|
|
|
@selected_fields; |
329
|
|
|
|
|
|
|
} |
330
|
12
|
100
|
|
|
|
30
|
if (defined $args->{include_fields}) { |
331
|
8
|
|
|
|
|
14
|
$select_field_options_used++; |
332
|
|
|
|
|
|
|
FIELD: |
333
|
8
|
|
|
|
|
14
|
for my $field (@{ $args->{include_fields} }) { |
|
8
|
|
|
|
|
24
|
|
334
|
13
|
100
|
|
|
|
33
|
unless (defined $field_idxs->{$field}) { |
335
|
4
|
100
|
|
|
|
28
|
return [400, "Unknown field '$field'"] unless $args->{ignore_unknown_fields}; |
336
|
2
|
|
|
|
|
6
|
next FIELD; |
337
|
|
|
|
|
|
|
} |
338
|
9
|
50
|
|
|
|
23
|
next if grep { $field eq $_ } @selected_fields; |
|
3
|
|
|
|
|
16
|
|
339
|
9
|
|
|
|
|
23
|
push @selected_fields, $field; |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
} |
342
|
10
|
100
|
|
|
|
33
|
if (defined $args->{exclude_fields}) { |
343
|
2
|
|
|
|
|
5
|
$select_field_options_used++; |
344
|
|
|
|
|
|
|
FIELD: |
345
|
2
|
|
|
|
|
4
|
for my $field (@{ $args->{exclude_fields} }) { |
|
2
|
|
|
|
|
6
|
|
346
|
2
|
50
|
|
|
|
6
|
unless (defined $field_idxs->{$field}) { |
347
|
0
|
0
|
|
|
|
0
|
return [400, "Unknown field '$field'"] unless $args->{ignore_unknown_fields}; |
348
|
0
|
|
|
|
|
0
|
next FIELD; |
349
|
|
|
|
|
|
|
} |
350
|
2
|
|
|
|
|
4
|
@selected_fields = grep { $field ne $_ } @selected_fields; |
|
6
|
|
|
|
|
16
|
|
351
|
|
|
|
|
|
|
} |
352
|
|
|
|
|
|
|
} |
353
|
|
|
|
|
|
|
|
354
|
10
|
50
|
33
|
|
|
39
|
if (!$select_field_options_used && $default_select_choice) { |
355
|
0
|
0
|
|
|
|
0
|
if ($default_select_choice eq 'all') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
356
|
0
|
|
|
|
|
0
|
@selected_fields = @$fields; |
357
|
|
|
|
|
|
|
} elsif ($default_select_choice eq 'first') { |
358
|
0
|
0
|
|
|
|
0
|
@selected_fields = ($fields->[0]) if @$fields; |
359
|
|
|
|
|
|
|
} elsif ($default_select_choice eq 'last') { |
360
|
0
|
0
|
|
|
|
0
|
@selected_fields = ($fields->[-1]) if @$fields; |
361
|
|
|
|
|
|
|
} elsif ($default_select_choice eq 'first-if-only-field') { |
362
|
0
|
0
|
|
|
|
0
|
@selected_fields = ($fields->[0]) if @$fields == 1; |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
|
366
|
10
|
100
|
|
|
|
22
|
if ($args->{show_selected_fields}) { |
367
|
1
|
|
|
|
|
8
|
return [200, "OK", \@selected_fields]; |
368
|
|
|
|
|
|
|
} |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
#my %selected_field_idxs; |
371
|
|
|
|
|
|
|
#$selected_field_idxs{$_} = $fields_idx->{$_} for @selected_fields; |
372
|
|
|
|
|
|
|
|
373
|
9
|
|
|
|
|
14
|
my @selected_field_idxs_array; |
374
|
9
|
|
|
|
|
29
|
push @selected_field_idxs_array, $field_idxs->{$_} for @selected_fields; |
375
|
|
|
|
|
|
|
|
376
|
9
|
|
|
|
|
48
|
[100, "Continue", [\@selected_fields, \@selected_field_idxs_array]]; |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
our $xcomp_csvfiles = [filename => {file_ext_filter => qr/^[tc]sv$/i}]; |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
our %argspecs_csv_input = ( |
382
|
|
|
|
|
|
|
input_header => { |
383
|
|
|
|
|
|
|
summary => 'Specify whether input CSV has a header row', |
384
|
|
|
|
|
|
|
'summary.alt.bool.not' => 'Specify that input CSV does not have a header row', |
385
|
|
|
|
|
|
|
schema => 'bool*', |
386
|
|
|
|
|
|
|
default => 1, |
387
|
|
|
|
|
|
|
description => <<'_', |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
By default, the first row of the input CSV will be assumed to contain field |
390
|
|
|
|
|
|
|
names (and the second row contains the first data row). When you declare that |
391
|
|
|
|
|
|
|
input CSV does not have header row (`--no-input-header`), the first row of the |
392
|
|
|
|
|
|
|
CSV is assumed to contain the first data row. Fields will be named `field1`, |
393
|
|
|
|
|
|
|
`field2`, and so on. |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
_ |
396
|
|
|
|
|
|
|
cmdline_aliases => { |
397
|
|
|
|
|
|
|
}, |
398
|
|
|
|
|
|
|
tags => ['category:input'], |
399
|
|
|
|
|
|
|
}, |
400
|
|
|
|
|
|
|
input_tsv => { |
401
|
|
|
|
|
|
|
summary => "Inform that input file is in TSV (tab-separated) format instead of CSV", |
402
|
|
|
|
|
|
|
schema => 'true*', |
403
|
|
|
|
|
|
|
description => <<'_', |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
Overriden by `--input-sep-char`, `--input-quote-char`, `--input-escape-char` |
406
|
|
|
|
|
|
|
options. If one of those options is specified, then `--input-tsv` will be |
407
|
|
|
|
|
|
|
ignored. |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
_ |
410
|
|
|
|
|
|
|
tags => ['category:input'], |
411
|
|
|
|
|
|
|
}, |
412
|
|
|
|
|
|
|
input_sep_char => { |
413
|
|
|
|
|
|
|
summary => 'Specify field separator character in input CSV, will be passed to Text::CSV_XS', |
414
|
|
|
|
|
|
|
schema => ['str*', len=>1], |
415
|
|
|
|
|
|
|
description => <<'_', |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
Defaults to `,` (comma). Overrides `--input-tsv` option. |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
_ |
420
|
|
|
|
|
|
|
tags => ['category:input'], |
421
|
|
|
|
|
|
|
}, |
422
|
|
|
|
|
|
|
input_quote_char => { |
423
|
|
|
|
|
|
|
summary => 'Specify field quote character in input CSV, will be passed to Text::CSV_XS', |
424
|
|
|
|
|
|
|
schema => ['str*', len=>1], |
425
|
|
|
|
|
|
|
description => <<'_', |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
Defaults to `"` (double quote). Overrides `--input-tsv` option. |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
_ |
430
|
|
|
|
|
|
|
tags => ['category:input'], |
431
|
|
|
|
|
|
|
}, |
432
|
|
|
|
|
|
|
input_escape_char => { |
433
|
|
|
|
|
|
|
summary => 'Specify character to escape value in field in input CSV, will be passed to Text::CSV_XS', |
434
|
|
|
|
|
|
|
schema => ['str*', len=>1], |
435
|
|
|
|
|
|
|
description => <<'_', |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
Defaults to `\\` (backslash). Overrides `--input-tsv` option. |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
_ |
440
|
|
|
|
|
|
|
tags => ['category:input'], |
441
|
|
|
|
|
|
|
}, |
442
|
|
|
|
|
|
|
); |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
our %argspecs_csv_output = ( |
445
|
|
|
|
|
|
|
output_header => { |
446
|
|
|
|
|
|
|
summary => 'Whether output CSV should have a header row', |
447
|
|
|
|
|
|
|
schema => 'bool*', |
448
|
|
|
|
|
|
|
description => <<'_', |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
By default, a header row will be output *if* input CSV has header row. Under |
451
|
|
|
|
|
|
|
`--output-header`, a header row will be output even if input CSV does not have |
452
|
|
|
|
|
|
|
header row (value will be something like "col0,col1,..."). Under |
453
|
|
|
|
|
|
|
`--no-output-header`, header row will *not* be printed even if input CSV has |
454
|
|
|
|
|
|
|
header row. So this option can be used to unconditionally add or remove header |
455
|
|
|
|
|
|
|
row. |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
_ |
458
|
|
|
|
|
|
|
tags => ['category:output'], |
459
|
|
|
|
|
|
|
}, |
460
|
|
|
|
|
|
|
output_tsv => { |
461
|
|
|
|
|
|
|
summary => "Inform that output file is TSV (tab-separated) format instead of CSV", |
462
|
|
|
|
|
|
|
schema => 'bool*', |
463
|
|
|
|
|
|
|
description => <<'_', |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
This is like `--input-tsv` option but for output instead of input. |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
Overriden by `--output-sep-char`, `--output-quote-char`, `--output-escape-char` |
468
|
|
|
|
|
|
|
options. If one of those options is specified, then `--output-tsv` will be |
469
|
|
|
|
|
|
|
ignored. |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
_ |
472
|
|
|
|
|
|
|
tags => ['category:output'], |
473
|
|
|
|
|
|
|
}, |
474
|
|
|
|
|
|
|
output_sep_char => { |
475
|
|
|
|
|
|
|
summary => 'Specify field separator character in output CSV, will be passed to Text::CSV_XS', |
476
|
|
|
|
|
|
|
schema => ['str*', len=>1], |
477
|
|
|
|
|
|
|
description => <<'_', |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
This is like `--input-sep-char` option but for output instead of input. |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
Defaults to `,` (comma). Overrides `--output-tsv` option. |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
_ |
484
|
|
|
|
|
|
|
tags => ['category:output'], |
485
|
|
|
|
|
|
|
}, |
486
|
|
|
|
|
|
|
output_quote_char => { |
487
|
|
|
|
|
|
|
summary => 'Specify field quote character in output CSV, will be passed to Text::CSV_XS', |
488
|
|
|
|
|
|
|
schema => ['str*', len=>1], |
489
|
|
|
|
|
|
|
description => <<'_', |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
This is like `--input-quote-char` option but for output instead of input. |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
Defaults to `"` (double quote). Overrides `--output-tsv` option. |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
_ |
496
|
|
|
|
|
|
|
tags => ['category:output'], |
497
|
|
|
|
|
|
|
}, |
498
|
|
|
|
|
|
|
output_escape_char => { |
499
|
|
|
|
|
|
|
summary => 'Specify character to escape value in field in output CSV, will be passed to Text::CSV_XS', |
500
|
|
|
|
|
|
|
schema => ['str*', len=>1], |
501
|
|
|
|
|
|
|
description => <<'_', |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
This is like `--input-escape-char` option but for output instead of input. |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
Defaults to `\\` (backslash). Overrides `--output-tsv` option. |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
_ |
508
|
|
|
|
|
|
|
tags => ['category:output'], |
509
|
|
|
|
|
|
|
}, |
510
|
|
|
|
|
|
|
output_always_quote => { |
511
|
|
|
|
|
|
|
summary => 'Whether to always quote values', |
512
|
|
|
|
|
|
|
schema => 'bool*', |
513
|
|
|
|
|
|
|
default => 0, |
514
|
|
|
|
|
|
|
description => <<'_', |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
When set to false (the default), values are quoted only when necessary: |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
field1,field2,"field three contains comma (,)",field4 |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
When set to true, then all values will be quoted: |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
"field1","field2","field three contains comma (,)","field4" |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
_ |
525
|
|
|
|
|
|
|
tags => ['category:output'], |
526
|
|
|
|
|
|
|
}, |
527
|
|
|
|
|
|
|
output_quote_empty => { |
528
|
|
|
|
|
|
|
summary => 'Whether to quote empty values', |
529
|
|
|
|
|
|
|
schema => 'bool*', |
530
|
|
|
|
|
|
|
default => 0, |
531
|
|
|
|
|
|
|
description => <<'_', |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
When set to false (the default), empty values are not quoted: |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
field1,field2,,field4 |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
When set to true, then empty values will be quoted: |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
field1,field2,"",field4 |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
_ |
542
|
|
|
|
|
|
|
tags => ['category:output'], |
543
|
|
|
|
|
|
|
}, |
544
|
|
|
|
|
|
|
); |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
our %argspecopt_input_filename = ( |
547
|
|
|
|
|
|
|
input_filename => { |
548
|
|
|
|
|
|
|
summary => 'Input CSV file', |
549
|
|
|
|
|
|
|
description => <<'_', |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
Use `-` to read from stdin. |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
Encoding of input file is assumed to be UTF-8. |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
_ |
556
|
|
|
|
|
|
|
schema => 'filename*', |
557
|
|
|
|
|
|
|
default => '-', |
558
|
|
|
|
|
|
|
'x.completion' => $xcomp_csvfiles, |
559
|
|
|
|
|
|
|
tags => ['category:input'], |
560
|
|
|
|
|
|
|
}, |
561
|
|
|
|
|
|
|
); |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
our %argspecopt_input_filenames = ( |
564
|
|
|
|
|
|
|
input_filenames => { |
565
|
|
|
|
|
|
|
'x.name.is_plural' => 1, |
566
|
|
|
|
|
|
|
'x.name.singular' => 'input_filename', |
567
|
|
|
|
|
|
|
summary => 'Input CSV files', |
568
|
|
|
|
|
|
|
description => <<'_', |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
Use `-` to read from stdin. |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
Encoding of input file is assumed to be UTF-8. |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
_ |
575
|
|
|
|
|
|
|
schema => ['array*', of=>'filename*'], |
576
|
|
|
|
|
|
|
default => ['-'], |
577
|
|
|
|
|
|
|
'x.completion' => $xcomp_csvfiles, |
578
|
|
|
|
|
|
|
tags => ['category:input'], |
579
|
|
|
|
|
|
|
}, |
580
|
|
|
|
|
|
|
); |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
our %argspecopt_overwrite = ( |
583
|
|
|
|
|
|
|
overwrite => { |
584
|
|
|
|
|
|
|
summary => 'Whether to override existing output file', |
585
|
|
|
|
|
|
|
schema => 'bool*', |
586
|
|
|
|
|
|
|
cmdline_aliases=>{O=>{}}, |
587
|
|
|
|
|
|
|
tags => ['category:output'], |
588
|
|
|
|
|
|
|
}, |
589
|
|
|
|
|
|
|
); |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
our %argspecsopt_inplace = ( |
592
|
|
|
|
|
|
|
inplace => { |
593
|
|
|
|
|
|
|
summary => 'Output to the same file as input', |
594
|
|
|
|
|
|
|
schema => 'true*', |
595
|
|
|
|
|
|
|
description => <<'_', |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
Normally, you output to a different file than input. If you try to output to the |
598
|
|
|
|
|
|
|
same file (`-o INPUT.csv -O`) you will clobber the input file; thus the utility |
599
|
|
|
|
|
|
|
prevents you from doing it. However, with this `--inplace` option, you can |
600
|
|
|
|
|
|
|
output to the same file. Like perl's `-i` option, this will first output to a |
601
|
|
|
|
|
|
|
temporary file in the same directory as the input file then rename to the final |
602
|
|
|
|
|
|
|
file at the end. You cannot specify output file (`-o`) when using this option, |
603
|
|
|
|
|
|
|
but you can specify backup extension with `-b` option. |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
Some caveats: |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
- if input file is a symbolic link, it will be replaced with a regular file; |
608
|
|
|
|
|
|
|
- renaming (implemented using `rename()`) can fail if input filename is too long; |
609
|
|
|
|
|
|
|
- value specified in `-b` is currently not checked for acceptable characters; |
610
|
|
|
|
|
|
|
- things can also fail if permissions are restrictive; |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
_ |
613
|
|
|
|
|
|
|
tags => ['category:output'], |
614
|
|
|
|
|
|
|
}, |
615
|
|
|
|
|
|
|
inplace_backup_ext => { |
616
|
|
|
|
|
|
|
summary => 'Extension to add for backup of input file', |
617
|
|
|
|
|
|
|
schema => 'str*', |
618
|
|
|
|
|
|
|
default => '', |
619
|
|
|
|
|
|
|
description => <<'_', |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
In inplace mode (`--inplace`), if this option is set to a non-empty string, will |
622
|
|
|
|
|
|
|
rename the input file using this extension as a backup. The old existing backup |
623
|
|
|
|
|
|
|
will be overwritten, if any. |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
_ |
626
|
|
|
|
|
|
|
cmdline_aliases => {b=>{}}, |
627
|
|
|
|
|
|
|
tags => ['category:output'], |
628
|
|
|
|
|
|
|
}, |
629
|
|
|
|
|
|
|
); |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
our %argspecopt_output_filename = ( |
632
|
|
|
|
|
|
|
output_filename => { |
633
|
|
|
|
|
|
|
summary => 'Output filename', |
634
|
|
|
|
|
|
|
description => <<'_', |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
Use `-` to output to stdout (the default if you don't specify this option). |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
Encoding of output file is assumed to be UTF-8. |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
_ |
641
|
|
|
|
|
|
|
schema => 'filename*', |
642
|
|
|
|
|
|
|
cmdline_aliases=>{o=>{}}, |
643
|
|
|
|
|
|
|
tags => ['category:output'], |
644
|
|
|
|
|
|
|
}, |
645
|
|
|
|
|
|
|
); |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
our %argspecopt_output_filenames = ( |
648
|
|
|
|
|
|
|
output_filenames => { |
649
|
|
|
|
|
|
|
summary => 'Output filenames', |
650
|
|
|
|
|
|
|
description => <<'_', |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
Use `-` to output to stdout (the default if you don't specify this option). |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
Encoding of output file is assumed to be UTF-8. |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
_ |
657
|
|
|
|
|
|
|
schema => ['array*', of=>'filename*'], |
658
|
|
|
|
|
|
|
cmdline_aliases=>{o=>{}}, |
659
|
|
|
|
|
|
|
tags => ['category:output'], |
660
|
|
|
|
|
|
|
}, |
661
|
|
|
|
|
|
|
); |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
our %argspecopt_field = ( |
664
|
|
|
|
|
|
|
field => { |
665
|
|
|
|
|
|
|
summary => 'Field name', |
666
|
|
|
|
|
|
|
schema => 'str*', |
667
|
|
|
|
|
|
|
cmdline_aliases => { f=>{} }, |
668
|
|
|
|
|
|
|
completion => \&_complete_field, |
669
|
|
|
|
|
|
|
}, |
670
|
|
|
|
|
|
|
); |
671
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
our %argspecopt_field_1 = ( |
673
|
|
|
|
|
|
|
field => { |
674
|
|
|
|
|
|
|
summary => 'Field name', |
675
|
|
|
|
|
|
|
schema => 'str*', |
676
|
|
|
|
|
|
|
pos => 1, |
677
|
|
|
|
|
|
|
cmdline_aliases => { f=>{} }, |
678
|
|
|
|
|
|
|
completion => \&_complete_field, |
679
|
|
|
|
|
|
|
}, |
680
|
|
|
|
|
|
|
); |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
our %argspec_field_1 = ( |
683
|
|
|
|
|
|
|
field => { |
684
|
|
|
|
|
|
|
summary => 'Field name', |
685
|
|
|
|
|
|
|
schema => 'str*', |
686
|
|
|
|
|
|
|
cmdline_aliases => { f=>{} }, |
687
|
|
|
|
|
|
|
req => 1, |
688
|
|
|
|
|
|
|
pos => 1, |
689
|
|
|
|
|
|
|
completion => \&_complete_field, |
690
|
|
|
|
|
|
|
}, |
691
|
|
|
|
|
|
|
); |
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
our %argspec_fields_1plus = ( |
694
|
|
|
|
|
|
|
fields => { |
695
|
|
|
|
|
|
|
'x.name.is_plural' => 1, |
696
|
|
|
|
|
|
|
'x.name.singular' => 'field', |
697
|
|
|
|
|
|
|
summary => 'Field names', |
698
|
|
|
|
|
|
|
schema => ['array*', of=>['str*', min_len=>1], min_len=>1], |
699
|
|
|
|
|
|
|
req => 1, |
700
|
|
|
|
|
|
|
pos => 1, |
701
|
|
|
|
|
|
|
slurpy => 1, |
702
|
|
|
|
|
|
|
cmdline_aliases => {f=>{}}, |
703
|
|
|
|
|
|
|
element_completion => \&_complete_field, |
704
|
|
|
|
|
|
|
}, |
705
|
|
|
|
|
|
|
); |
706
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
# without completion, for adding new field |
708
|
|
|
|
|
|
|
our %argspec_field_1_nocomp = ( |
709
|
|
|
|
|
|
|
field => { |
710
|
|
|
|
|
|
|
summary => 'Field name', |
711
|
|
|
|
|
|
|
schema => 'str*', |
712
|
|
|
|
|
|
|
cmdline_aliases => { f=>{} }, |
713
|
|
|
|
|
|
|
req => 1, |
714
|
|
|
|
|
|
|
pos => 1, |
715
|
|
|
|
|
|
|
}, |
716
|
|
|
|
|
|
|
); |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
# without completion, for adding new fields |
719
|
|
|
|
|
|
|
our %argspec_fields_1plus_nocomp = ( |
720
|
|
|
|
|
|
|
fields => { |
721
|
|
|
|
|
|
|
'x.name.is_plural' => 1, |
722
|
|
|
|
|
|
|
'x.name.singular' => 'field', |
723
|
|
|
|
|
|
|
summary => 'Field names', |
724
|
|
|
|
|
|
|
'summary.alt.plurality.singular' => 'Field name', |
725
|
|
|
|
|
|
|
schema => ['array*', of=>['str*', min_len=>1], min_len=>1], |
726
|
|
|
|
|
|
|
cmdline_aliases => { f=>{} }, |
727
|
|
|
|
|
|
|
req => 1, |
728
|
|
|
|
|
|
|
pos => 1, |
729
|
|
|
|
|
|
|
slurpy => 1, |
730
|
|
|
|
|
|
|
}, |
731
|
|
|
|
|
|
|
); |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
our %argspec_fields = ( |
734
|
|
|
|
|
|
|
fields => { |
735
|
|
|
|
|
|
|
'x.name.is_plural' => 1, |
736
|
|
|
|
|
|
|
'x.name.singular' => 'field', |
737
|
|
|
|
|
|
|
summary => 'Field names', |
738
|
|
|
|
|
|
|
schema => ['array*', of=>['str*', min_len=>1], min_len=>1], |
739
|
|
|
|
|
|
|
req => 1, |
740
|
|
|
|
|
|
|
cmdline_aliases => {f=>{}}, |
741
|
|
|
|
|
|
|
element_completion => \&_complete_field, |
742
|
|
|
|
|
|
|
}, |
743
|
|
|
|
|
|
|
); |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
our %argspecopt_fields = ( |
746
|
|
|
|
|
|
|
fields => { |
747
|
|
|
|
|
|
|
'x.name.is_plural' => 1, |
748
|
|
|
|
|
|
|
'x.name.singular' => 'field', |
749
|
|
|
|
|
|
|
summary => 'Field names', |
750
|
|
|
|
|
|
|
schema => ['array*', of=>['str*', min_len=>1], min_len=>1], |
751
|
|
|
|
|
|
|
cmdline_aliases => {f=>{}}, |
752
|
|
|
|
|
|
|
element_completion => \&_complete_field, |
753
|
|
|
|
|
|
|
}, |
754
|
|
|
|
|
|
|
); |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
our %argspecsopt_field_selection = ( |
757
|
|
|
|
|
|
|
include_fields => { |
758
|
|
|
|
|
|
|
'x.name.is_plural' => 1, |
759
|
|
|
|
|
|
|
'x.name.singular' => 'include_field', |
760
|
|
|
|
|
|
|
summary => 'Field names to include, takes precedence over --exclude-field-pat', |
761
|
|
|
|
|
|
|
schema => ['array*', of=>'str*'], |
762
|
|
|
|
|
|
|
cmdline_aliases => { |
763
|
|
|
|
|
|
|
f => {}, |
764
|
|
|
|
|
|
|
field => {}, # backward compatibility |
765
|
|
|
|
|
|
|
}, |
766
|
|
|
|
|
|
|
element_completion => \&_complete_field, |
767
|
|
|
|
|
|
|
tags => ['category:field-selection'], |
768
|
|
|
|
|
|
|
}, |
769
|
|
|
|
|
|
|
include_field_pat => { |
770
|
|
|
|
|
|
|
summary => 'Field regex pattern to select, overidden by --exclude-field-pat', |
771
|
|
|
|
|
|
|
schema => 're*', |
772
|
|
|
|
|
|
|
cmdline_aliases => { |
773
|
|
|
|
|
|
|
field_pat => {}, # backward compatibility |
774
|
|
|
|
|
|
|
include_all_fields => { summary => 'Shortcut for --field-pat=.*, effectively selecting all fields', is_flag=>1, code => sub { $_[0]{include_field_pat} = '.*' } }, |
775
|
|
|
|
|
|
|
a => { summary => 'Shortcut for --field-pat=.*, effectively selecting all fields', is_flag=>1, code => sub { $_[0]{include_field_pat} = '.*' } }, |
776
|
|
|
|
|
|
|
}, |
777
|
|
|
|
|
|
|
tags => ['category:field-selection'], |
778
|
|
|
|
|
|
|
}, |
779
|
|
|
|
|
|
|
exclude_fields => { |
780
|
|
|
|
|
|
|
'x.name.is_plural' => 1, |
781
|
|
|
|
|
|
|
'x.name.singular' => 'exclude_field', |
782
|
|
|
|
|
|
|
summary => 'Field names to exclude, takes precedence over --fields', |
783
|
|
|
|
|
|
|
schema => ['array*', of=>'str*'], |
784
|
|
|
|
|
|
|
cmdline_aliases => { |
785
|
|
|
|
|
|
|
F => {}, |
786
|
|
|
|
|
|
|
}, |
787
|
|
|
|
|
|
|
element_completion => \&_complete_field, |
788
|
|
|
|
|
|
|
tags => ['category:field-selection'], |
789
|
|
|
|
|
|
|
}, |
790
|
|
|
|
|
|
|
exclude_field_pat => { |
791
|
|
|
|
|
|
|
summary => 'Field regex pattern to exclude, takes precedence over --field-pat', |
792
|
|
|
|
|
|
|
schema => 're*', |
793
|
|
|
|
|
|
|
cmdline_aliases => { |
794
|
|
|
|
|
|
|
exclude_all_fields => { summary => 'Shortcut for --exclude-field-pat=.*, effectively excluding all fields', is_flag=>1, code => sub { $_[0]{exclude_field_pat} = '.*' } }, |
795
|
|
|
|
|
|
|
A => { summary => 'Shortcut for --exclude-field-pat=.*, effectively excluding all fields', is_flag=>1, code => sub { $_[0]{exclude_field_pat} = '.*' } }, |
796
|
|
|
|
|
|
|
}, |
797
|
|
|
|
|
|
|
tags => ['category:field-selection'], |
798
|
|
|
|
|
|
|
}, |
799
|
|
|
|
|
|
|
ignore_unknown_fields => { |
800
|
|
|
|
|
|
|
summary => 'When unknown fields are specified in --include-field (--field) or --exclude-field options, ignore them instead of throwing an error', |
801
|
|
|
|
|
|
|
schema => 'bool*', |
802
|
|
|
|
|
|
|
}, |
803
|
|
|
|
|
|
|
show_selected_fields => { |
804
|
|
|
|
|
|
|
summary => 'Show selected fields and then immediately exit', |
805
|
|
|
|
|
|
|
schema => 'true*', |
806
|
|
|
|
|
|
|
}, |
807
|
|
|
|
|
|
|
); |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
our %argspec_eval = ( |
810
|
|
|
|
|
|
|
eval => { |
811
|
|
|
|
|
|
|
summary => 'Perl code', |
812
|
|
|
|
|
|
|
schema => $sch_req_str_or_code, |
813
|
|
|
|
|
|
|
cmdline_aliases => { e=>{} }, |
814
|
|
|
|
|
|
|
req => 1, |
815
|
|
|
|
|
|
|
}, |
816
|
|
|
|
|
|
|
); |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
our %argspecopt_eval = ( |
819
|
|
|
|
|
|
|
eval => { |
820
|
|
|
|
|
|
|
summary => 'Perl code', |
821
|
|
|
|
|
|
|
schema => $sch_req_str_or_code, |
822
|
|
|
|
|
|
|
cmdline_aliases => { e=>{} }, |
823
|
|
|
|
|
|
|
}, |
824
|
|
|
|
|
|
|
); |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
our %argspec_eval_1 = ( |
827
|
|
|
|
|
|
|
eval => { |
828
|
|
|
|
|
|
|
summary => 'Perl code', |
829
|
|
|
|
|
|
|
schema => $sch_req_str_or_code, |
830
|
|
|
|
|
|
|
cmdline_aliases => { e=>{} }, |
831
|
|
|
|
|
|
|
req => 1, |
832
|
|
|
|
|
|
|
pos => 1, |
833
|
|
|
|
|
|
|
}, |
834
|
|
|
|
|
|
|
); |
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
our %argspec_eval_2 = ( |
837
|
|
|
|
|
|
|
eval => { |
838
|
|
|
|
|
|
|
summary => 'Perl code', |
839
|
|
|
|
|
|
|
schema => $sch_req_str_or_code, |
840
|
|
|
|
|
|
|
cmdline_aliases => { e=>{} }, |
841
|
|
|
|
|
|
|
req => 1, |
842
|
|
|
|
|
|
|
pos => 2, |
843
|
|
|
|
|
|
|
}, |
844
|
|
|
|
|
|
|
); |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
our %argspecopt_eval_2 = ( |
847
|
|
|
|
|
|
|
eval => { |
848
|
|
|
|
|
|
|
summary => 'Perl code', |
849
|
|
|
|
|
|
|
schema => $sch_req_str_or_code, |
850
|
|
|
|
|
|
|
cmdline_aliases => { e=>{} }, |
851
|
|
|
|
|
|
|
pos => 2, |
852
|
|
|
|
|
|
|
}, |
853
|
|
|
|
|
|
|
); |
854
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
our %argspecsopt_sortsub = ( |
856
|
|
|
|
|
|
|
by_sortsub => { |
857
|
|
|
|
|
|
|
schema => 'str*', |
858
|
|
|
|
|
|
|
description => <<'_', |
859
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
When sorting rows, usually combined with `--key` because most Sort::Sub routine |
861
|
|
|
|
|
|
|
expects a string to be compared against. |
862
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
When sorting fields, the Sort::Sub routine will get the field name as argument. |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
_ |
866
|
|
|
|
|
|
|
summary => 'Sort using a Sort::Sub routine', |
867
|
|
|
|
|
|
|
'x.completion' => ['sortsub_spec'], |
868
|
|
|
|
|
|
|
}, |
869
|
|
|
|
|
|
|
sortsub_args => { |
870
|
|
|
|
|
|
|
summary => 'Arguments to pass to Sort::Sub routine', |
871
|
|
|
|
|
|
|
schema => ['hash*', of=>'str*'], |
872
|
|
|
|
|
|
|
}, |
873
|
|
|
|
|
|
|
); |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
our %argspecopt_key = ( |
876
|
|
|
|
|
|
|
key => { |
877
|
|
|
|
|
|
|
summary => 'Generate sort keys with this Perl code', |
878
|
|
|
|
|
|
|
description => <<'_', |
879
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
If specified, then will compute sort keys using Perl code and sort using the |
881
|
|
|
|
|
|
|
keys. Relevant when sorting using `--by-code` or `--by-sortsub`. If specified, |
882
|
|
|
|
|
|
|
then instead of row when sorting rows, the code (or Sort::Sub routine) will |
883
|
|
|
|
|
|
|
receive these sort keys to sort against. |
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
The code will receive the row (arrayref, or if -H is specified, hashref) as the |
886
|
|
|
|
|
|
|
argument. |
887
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
_ |
889
|
|
|
|
|
|
|
schema => $sch_req_str_or_code, |
890
|
|
|
|
|
|
|
cmdline_aliases => {k=>{}}, |
891
|
|
|
|
|
|
|
}, |
892
|
|
|
|
|
|
|
); |
893
|
|
|
|
|
|
|
|
894
|
|
|
|
|
|
|
our %argspecs_sort_rows = ( |
895
|
|
|
|
|
|
|
reverse => { |
896
|
|
|
|
|
|
|
schema => ['bool', is=>1], |
897
|
|
|
|
|
|
|
cmdline_aliases => {r=>{}}, |
898
|
|
|
|
|
|
|
}, |
899
|
|
|
|
|
|
|
ci => { |
900
|
|
|
|
|
|
|
schema => ['bool', is=>1], |
901
|
|
|
|
|
|
|
cmdline_aliases => {i=>{}}, |
902
|
|
|
|
|
|
|
}, |
903
|
|
|
|
|
|
|
by_fields => { |
904
|
|
|
|
|
|
|
summary => 'Sort by a list of field specifications', |
905
|
|
|
|
|
|
|
'summary.alt.plurality.singular' => 'Add a sort field specification', |
906
|
|
|
|
|
|
|
'x.name.is_plural' => 1, |
907
|
|
|
|
|
|
|
'x.name.singular' => 'by_field', |
908
|
|
|
|
|
|
|
description => <<'_', |
909
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
Each field specification is a field name with an optional prefix. `FIELD` |
911
|
|
|
|
|
|
|
(without prefix) means sort asciibetically ascending (smallest to largest), |
912
|
|
|
|
|
|
|
`~FIELD` means sort asciibetically descending (largest to smallest), `+FIELD` |
913
|
|
|
|
|
|
|
means sort numerically ascending, `-FIELD` means sort numerically descending. |
914
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
_ |
916
|
|
|
|
|
|
|
schema => ['array*', of=>'str*'], |
917
|
|
|
|
|
|
|
element_completion => \&_complete_sort_field, |
918
|
|
|
|
|
|
|
}, |
919
|
|
|
|
|
|
|
by_code => { |
920
|
|
|
|
|
|
|
summary => 'Sort by using Perl code', |
921
|
|
|
|
|
|
|
schema => $sch_req_str_or_code, |
922
|
|
|
|
|
|
|
description => <<'_', |
923
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
`$a` and `$b` (or the first and second argument) will contain the two rows to be |
925
|
|
|
|
|
|
|
compared. Which are arrayrefs; or if `--hash` (`-H`) is specified, hashrefs; or |
926
|
|
|
|
|
|
|
if `--key` is specified, whatever the code in `--key` returns. |
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
_ |
929
|
|
|
|
|
|
|
}, |
930
|
|
|
|
|
|
|
%argspecopt_key, |
931
|
|
|
|
|
|
|
%argspecsopt_sortsub, |
932
|
|
|
|
|
|
|
); |
933
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
our %argspecs_sort_fields = ( |
935
|
|
|
|
|
|
|
reverse => { |
936
|
|
|
|
|
|
|
schema => ['bool', is=>1], |
937
|
|
|
|
|
|
|
cmdline_aliases => {r=>{}}, |
938
|
|
|
|
|
|
|
}, |
939
|
|
|
|
|
|
|
ci => { |
940
|
|
|
|
|
|
|
schema => ['bool', is=>1], |
941
|
|
|
|
|
|
|
cmdline_aliases => {i=>{}}, |
942
|
|
|
|
|
|
|
}, |
943
|
|
|
|
|
|
|
by_examples => { |
944
|
|
|
|
|
|
|
summary => 'Sort by a list of field names as examples', |
945
|
|
|
|
|
|
|
'summary.alt.plurality.singular' => 'Add a field to sort by example', |
946
|
|
|
|
|
|
|
'x.name.is_plural' => 1, |
947
|
|
|
|
|
|
|
'x.name.singular' => 'by_example', |
948
|
|
|
|
|
|
|
schema => ['array*', of=>'str*'], |
949
|
|
|
|
|
|
|
element_completion => \&_complete_field, |
950
|
|
|
|
|
|
|
}, |
951
|
|
|
|
|
|
|
by_code => { |
952
|
|
|
|
|
|
|
summary => 'Sort fields using Perl code', |
953
|
|
|
|
|
|
|
schema => $sch_req_str_or_code, |
954
|
|
|
|
|
|
|
description => <<'_', |
955
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
`$a` and `$b` (or the first and second argument) will contain `[$field_name, |
957
|
|
|
|
|
|
|
$field_idx]`. |
958
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
_ |
960
|
|
|
|
|
|
|
}, |
961
|
|
|
|
|
|
|
%argspecsopt_sortsub, |
962
|
|
|
|
|
|
|
); |
963
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
our %argspecopt_with_data_rows = ( |
965
|
|
|
|
|
|
|
with_data_rows => { |
966
|
|
|
|
|
|
|
summary => 'Whether to also output data rows', |
967
|
|
|
|
|
|
|
schema => 'bool', |
968
|
|
|
|
|
|
|
}, |
969
|
|
|
|
|
|
|
); |
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
our %argspecopt_hash = ( |
972
|
|
|
|
|
|
|
hash => { |
973
|
|
|
|
|
|
|
summary => 'Provide row in $_ as hashref instead of arrayref', |
974
|
|
|
|
|
|
|
schema => ['bool*', is=>1], |
975
|
|
|
|
|
|
|
cmdline_aliases => {H=>{}}, |
976
|
|
|
|
|
|
|
}, |
977
|
|
|
|
|
|
|
); |
978
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
# add a position to specified argument, if possible |
980
|
|
|
|
|
|
|
sub _add_arg_pos { |
981
|
42
|
|
|
42
|
|
100
|
my ($args, $argname, $is_slurpy) = @_; |
982
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
# argument already has a position, return |
984
|
42
|
50
|
|
|
|
105
|
return if defined $args->{$argname}{pos}; |
985
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
# position of slurpy argument |
987
|
42
|
|
|
|
|
61
|
my $slurpy_pos; |
988
|
42
|
|
|
|
|
149
|
for (keys %$args) { |
989
|
509
|
100
|
|
|
|
993
|
next unless $args->{$_}{slurpy}; |
990
|
6
|
|
|
|
|
19
|
$slurpy_pos = $args->{$_}{pos}; |
991
|
6
|
|
|
|
|
13
|
last; |
992
|
|
|
|
|
|
|
} |
993
|
|
|
|
|
|
|
|
994
|
|
|
|
|
|
|
# there is already a slurpy arg, return |
995
|
42
|
50
|
66
|
|
|
161
|
return if $is_slurpy && defined $slurpy_pos; |
996
|
|
|
|
|
|
|
|
997
|
|
|
|
|
|
|
# find the lowest position that's not available |
998
|
|
|
|
|
|
|
ARG: |
999
|
42
|
|
|
|
|
130
|
for my $j (0 .. scalar(keys %$args)-1) { |
1000
|
60
|
100
|
100
|
|
|
160
|
last if defined $slurpy_pos && $j >= $slurpy_pos; |
1001
|
56
|
|
|
|
|
177
|
for (keys %$args) { |
1002
|
648
|
100
|
100
|
|
|
1381
|
next ARG if defined $args->{$_}{pos} && $args->{$_}{pos} == $j; |
1003
|
|
|
|
|
|
|
} |
1004
|
38
|
|
|
|
|
104
|
$args->{$argname}{pos} = $j; |
1005
|
38
|
100
|
|
|
|
89
|
$args->{$argname}{slurpy} = 1 if $is_slurpy; |
1006
|
38
|
|
|
|
|
104
|
last; |
1007
|
|
|
|
|
|
|
} |
1008
|
|
|
|
|
|
|
} |
1009
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
sub _randext { |
1011
|
2
|
|
|
2
|
|
16
|
state $charset = [0..9, "A".."Z","a".."z"]; |
1012
|
2
|
|
|
|
|
6
|
my $len = shift; |
1013
|
2
|
|
|
|
|
3
|
my $ext = ""; |
1014
|
2
|
|
|
|
|
5
|
for (1..$len) { $ext .= $charset->[rand @$charset] } |
|
10
|
|
|
|
|
21
|
|
1015
|
2
|
|
|
|
|
8
|
$ext; |
1016
|
|
|
|
|
|
|
} |
1017
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
$SPEC{gen_csv_util} = { |
1019
|
|
|
|
|
|
|
v => 1.1, |
1020
|
|
|
|
|
|
|
summary => 'Generate a CSV utility', |
1021
|
|
|
|
|
|
|
description => <<'_', |
1022
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
This routine is used to generate a CSV utility in the form of a <pm:Rinci> |
1024
|
|
|
|
|
|
|
function (code and metadata). You can then produce a CLI from the Rinci function |
1025
|
|
|
|
|
|
|
simply using <pm:Perinci::CmdLine::Gen> or, if you use <pm:Dist::Zilla>, |
1026
|
|
|
|
|
|
|
<pm:Dist::Zilla::Plugin::GenPericmdScript> or, if on the command-line, |
1027
|
|
|
|
|
|
|
<prog:gen-pericmd-script>. |
1028
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
Using this routine, by providing just one or a few hooks and setting some |
1030
|
|
|
|
|
|
|
parameters like a couple of extra arguments, you will get a complete CLI with |
1031
|
|
|
|
|
|
|
decent POD/manpage, ability to read one or multiple CSV's and write one or |
1032
|
|
|
|
|
|
|
multiple CSV's, some command-line options to customize how the input CSV's |
1033
|
|
|
|
|
|
|
should be parsed and how the output CSV's should be formatted and named. Your |
1034
|
|
|
|
|
|
|
CLI also has tab completion, usage and help message, and other features. |
1035
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
To create a CSV utility, you specify a `name` (e.g. `csv_dump`; must be a valid |
1037
|
|
|
|
|
|
|
unqualified Perl identifier/function name) and optionally `summary`, |
1038
|
|
|
|
|
|
|
`description`, and other metadata like `links` or even `add_meta_props`. Then |
1039
|
|
|
|
|
|
|
you specify one or more of `on_*` or `before_*` or `after_*` arguments to supply |
1040
|
|
|
|
|
|
|
handlers (coderefs) for your CSV utility at various hook points. |
1041
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
*THE HOOKS* |
1044
|
|
|
|
|
|
|
|
1045
|
|
|
|
|
|
|
All code for hooks should accept a single argument `r`. `r` is a stash (hashref) |
1046
|
|
|
|
|
|
|
of various data, the keys of which will depend on which hook point being called. |
1047
|
|
|
|
|
|
|
You can also add more keys to store data or for flow control (see hook |
1048
|
|
|
|
|
|
|
documentation below for more details). |
1049
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
The order of the hooks, in processing chronological order: |
1051
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
* on_begin |
1053
|
|
|
|
|
|
|
|
1054
|
|
|
|
|
|
|
Called when utility begins, before reading CSV. You can use this hook e.g. to |
1055
|
|
|
|
|
|
|
process arguments, set output filenames (if you allow custom output |
1056
|
|
|
|
|
|
|
filenames). |
1057
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
* before_read_input |
1059
|
|
|
|
|
|
|
|
1060
|
|
|
|
|
|
|
Called before opening any input CSV file. This hook is *still* called even if |
1061
|
|
|
|
|
|
|
your utility sets `reads_csv` to false. |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
At this point, the `input_filenames` stash key (as well as other keys like |
1064
|
|
|
|
|
|
|
`input_filename`, `input_filenum`, etc) has not been set. You can use this |
1065
|
|
|
|
|
|
|
hook e.g. to set a custom `input_filenames`. |
1066
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
* before_open_input_files |
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
Called before an input CSV file is about to be opened, including for stdin |
1070
|
|
|
|
|
|
|
(`-`). You can use this hook e.g. to check/preprocess input file. Flow control |
1071
|
|
|
|
|
|
|
is available by setting `$r->{wants_skip_files}` to skip reading all the input |
1072
|
|
|
|
|
|
|
file and go directly to the `after_read_input` hook. |
1073
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
* before_open_input_file |
1075
|
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
Called before an input CSV file is about to be opened, including for stdin |
1077
|
|
|
|
|
|
|
(`-`). For the first file, called after `before_open_input_file` hook. You can |
1078
|
|
|
|
|
|
|
use this hook e.g. to check/preprocess input file. Flow control is available |
1079
|
|
|
|
|
|
|
by setting `$r->{wants_skip_file}` to skip reading a single input file and go |
1080
|
|
|
|
|
|
|
to the next file, or `$r->{wants_skip_files}` to skip reading the rest of the |
1081
|
|
|
|
|
|
|
files and go directly to the `after_read_input` hook. |
1082
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
* on_input_header_row |
1084
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
Called when receiving header row. Will be called for every input file, and |
1086
|
|
|
|
|
|
|
called even when user specify `--no-input-header`, in which case the header |
1087
|
|
|
|
|
|
|
row will be the generated `["field1", "field2", ...]`. You can use this hook |
1088
|
|
|
|
|
|
|
e.g. to add/remove/rearrange fields. |
1089
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
You can set `$r->{wants_fill_rows}` to a defined false if you do not want |
1091
|
|
|
|
|
|
|
`$r->{input_rows}` to be filled with empty string elements when it contains |
1092
|
|
|
|
|
|
|
less than the number of fields (in case of sparse values at the end). Normally |
1093
|
|
|
|
|
|
|
you only want to do this when you want to do checking, e.g. in |
1094
|
|
|
|
|
|
|
<prog:csv-check-rows>. |
1095
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
* on_input_data_row |
1097
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
Called when receiving each data row. You can use this hook e.g. to modify the |
1099
|
|
|
|
|
|
|
row or print output (for line-by-line transformation or filtering). |
1100
|
|
|
|
|
|
|
|
1101
|
|
|
|
|
|
|
* after_close_input_file |
1102
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
Called after each input file is closed, including for stdin (`-`) (although |
1104
|
|
|
|
|
|
|
for stdin, the handle is not actually closed). Flow control is possible by |
1105
|
|
|
|
|
|
|
setting `$r->{wants_skip_files}` to skip reading the rest of the files and go |
1106
|
|
|
|
|
|
|
straight to the `after_close_input_files` hook. |
1107
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
* after_close_input_files |
1109
|
|
|
|
|
|
|
|
1110
|
|
|
|
|
|
|
Called after the last input file is closed, after the last |
1111
|
|
|
|
|
|
|
`after_close_input_file` hook, including for stdin (`-`) (although for stdin, |
1112
|
|
|
|
|
|
|
the handle is not actually closed). |
1113
|
|
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
* after_read_input |
1115
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
Called after the last row of the last CSV file is read and the last file is |
1117
|
|
|
|
|
|
|
closed. This hook is *still* called, if you set `reads_csv` option to false. |
1118
|
|
|
|
|
|
|
At this point the stash keys related to CSV reading have all been cleared, |
1119
|
|
|
|
|
|
|
including `input_filenames`, `input_filename`, `input_fh`, etc. |
1120
|
|
|
|
|
|
|
|
1121
|
|
|
|
|
|
|
You can use this hook e.g. to print output if you buffer the output. |
1122
|
|
|
|
|
|
|
|
1123
|
|
|
|
|
|
|
* on_end |
1124
|
|
|
|
|
|
|
|
1125
|
|
|
|
|
|
|
Called when utility is about to exit. You can use this hook e.g. to return the |
1126
|
|
|
|
|
|
|
final result. |
1127
|
|
|
|
|
|
|
|
1128
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
*THE STASH* |
1130
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
The common keys that `r` will contain: |
1132
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
- `gen_args`, hash. The arguments used to generate the CSV utility. |
1134
|
|
|
|
|
|
|
|
1135
|
|
|
|
|
|
|
- `util_args`, hash. The arguments that your CSV utility accepts. Parsed from |
1136
|
|
|
|
|
|
|
command-line arguments (or configuration files, or environment variables). |
1137
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
- `name`, str. The name of the CSV utility. Which can also be retrieved via |
1139
|
|
|
|
|
|
|
`gen_args`. |
1140
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
- `code_print`, coderef. Routine provided for you to print something. Accepts a |
1142
|
|
|
|
|
|
|
string. Takes care of opening the output files for you. |
1143
|
|
|
|
|
|
|
|
1144
|
|
|
|
|
|
|
- `code_print_row`, coderef. Routine provided for you to print a data row. You |
1145
|
|
|
|
|
|
|
pass the row (either arrayref or hashref). Takes care of opening the output |
1146
|
|
|
|
|
|
|
files for you, as well as printing header row the first time, if needed. |
1147
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
- `code_print_header_row`, coderef. Routine provided for you to print header |
1149
|
|
|
|
|
|
|
row. You don't need to pass any arguments. Will only print the header row once |
1150
|
|
|
|
|
|
|
per output file if output header is enabled, even if called multiple times. |
1151
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
If you are accepting CSV data (`reads_csv` gen argument set to true), the |
1153
|
|
|
|
|
|
|
following keys will also be available (in `on_input_header_row` and |
1154
|
|
|
|
|
|
|
`on_input_data_row` hooks): |
1155
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
- `input_parser`, a <pm:Text::CSV_XS> instance for input parsing. |
1157
|
|
|
|
|
|
|
|
1158
|
|
|
|
|
|
|
- `input_fields`, array of str. Input CSV's field names. |
1159
|
|
|
|
|
|
|
|
1160
|
|
|
|
|
|
|
- `input_fields_idx`, hash with field name as keys and field index (0-based |
1161
|
|
|
|
|
|
|
integer) as values. |
1162
|
|
|
|
|
|
|
|
1163
|
|
|
|
|
|
|
- `input_filenames`, array of str. |
1164
|
|
|
|
|
|
|
|
1165
|
|
|
|
|
|
|
- `input_filename`, str. The name of the current input file being read (`-` if |
1166
|
|
|
|
|
|
|
reading from stdin). |
1167
|
|
|
|
|
|
|
|
1168
|
|
|
|
|
|
|
- `input_filenum`, uint. The number of the current input file, 1 being the first |
1169
|
|
|
|
|
|
|
file, 2 for the second, and so on. |
1170
|
|
|
|
|
|
|
|
1171
|
|
|
|
|
|
|
- `input_fh`, the handle to the current file being read. |
1172
|
|
|
|
|
|
|
|
1173
|
|
|
|
|
|
|
- `input_rownum`, uint. The number of rows that have been read (reset after each |
1174
|
|
|
|
|
|
|
input file). In `on_input_header_row` phase, this will be 1 since header row |
1175
|
|
|
|
|
|
|
(including the generated one) is the first row. Then in `on_input_data_row` |
1176
|
|
|
|
|
|
|
phase (called the first time for a file), it will be 2 for the first data row, |
1177
|
|
|
|
|
|
|
even if physically it is the first row for CSV file that does not have a |
1178
|
|
|
|
|
|
|
header. |
1179
|
|
|
|
|
|
|
|
1180
|
|
|
|
|
|
|
- `input_data_rownum`, uint. The number of data rows that have been read (reset |
1181
|
|
|
|
|
|
|
after each input file). This will be equal to `input_rownum` less 1 if input |
1182
|
|
|
|
|
|
|
file has header. |
1183
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
- `input_row`, aos (array of str). The current input CSV row as an arrayref. |
1185
|
|
|
|
|
|
|
|
1186
|
|
|
|
|
|
|
- `input_row_as_hashref`, hos (hash of str). The current input CSV row as a |
1187
|
|
|
|
|
|
|
hashref, with field names as hash keys and field values as hash values. This |
1188
|
|
|
|
|
|
|
will only be calculated if utility wants it. Utility can express so by setting |
1189
|
|
|
|
|
|
|
`$r->{wants_input_row_as_hashref}` to true, e.g. in the `on_begin` hook. |
1190
|
|
|
|
|
|
|
|
1191
|
|
|
|
|
|
|
- `input_header_row_count`, uint. Contains the number of actual header rows that |
1192
|
|
|
|
|
|
|
have been read. If CLI user specifies `--no-input-header`, this will stay at |
1193
|
|
|
|
|
|
|
zero. Will be reset for each CSV file. |
1194
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
- `input_data_row_count`, int. Contains the number of actual data rows that have |
1196
|
|
|
|
|
|
|
read. Will be reset for each CSV file. |
1197
|
|
|
|
|
|
|
|
1198
|
|
|
|
|
|
|
If you are outputting CSV (`writes_csv` gen argument set to true), the following |
1199
|
|
|
|
|
|
|
keys will be available: |
1200
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
- `output_emitter`, a <pm:Text::CSV_XS> instance for output. |
1202
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
- `output_fields`, array of str. Should be set to list of output field names. If |
1204
|
|
|
|
|
|
|
unset, will be set to be the same as `input_fields`. |
1205
|
|
|
|
|
|
|
|
1206
|
|
|
|
|
|
|
- `output_fields_idx`, hash with field names as keys and field indexes (0-based |
1207
|
|
|
|
|
|
|
integer) as values. Normally you do not need to set this manually; you just |
1208
|
|
|
|
|
|
|
need to set `output_fields` and this hash will be computed automatically for |
1209
|
|
|
|
|
|
|
you just before the first output row is outputted. |
1210
|
|
|
|
|
|
|
|
1211
|
|
|
|
|
|
|
- `output_filenames`, array of str. |
1212
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
- `output_filename`, str, name of current output file. |
1214
|
|
|
|
|
|
|
|
1215
|
|
|
|
|
|
|
- `output_filenum`, uint, the number of the current output file, 1 being the |
1216
|
|
|
|
|
|
|
first file, 2 for the second, and so on. |
1217
|
|
|
|
|
|
|
|
1218
|
|
|
|
|
|
|
- `output_fh`, handle to the current output file. |
1219
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
- `output_rownum`, uint. The number of rows that have been outputted (reset |
1221
|
|
|
|
|
|
|
after each output file). |
1222
|
|
|
|
|
|
|
|
1223
|
|
|
|
|
|
|
- `output_data_rownum`, uint. The number of data rows that have been outputted |
1224
|
|
|
|
|
|
|
(reset after each output file). This will be equal to `input_rownum` less 1 if |
1225
|
|
|
|
|
|
|
input file has header. |
1226
|
|
|
|
|
|
|
|
1227
|
|
|
|
|
|
|
For other hook-specific keys, see the documentation for associated hook point. |
1228
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
|
1230
|
|
|
|
|
|
|
*ACCEPTING ADDITIONAL COMMAND-LINE OPTIONS/ARGUMENTS* |
1231
|
|
|
|
|
|
|
|
1232
|
|
|
|
|
|
|
As mentioned above, you will get additional command-line options/arguments in |
1233
|
|
|
|
|
|
|
`$r->{util_args}` hashref. Some options/arguments are already added by |
1234
|
|
|
|
|
|
|
`gen_csv_util`, e.g. `input_filename` or `input_filenames` along with |
1235
|
|
|
|
|
|
|
`input_sep_char`, etc (when your utility declares `reads_csv`), |
1236
|
|
|
|
|
|
|
`output_filename` or `output_filenames` along with `overwrite`, |
1237
|
|
|
|
|
|
|
`output_sep_char`, etc (when your utility declares `writes_csv`). |
1238
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
If you want to accept additional arguments/options, you specify them in |
1240
|
|
|
|
|
|
|
`add_args` (hashref, with key being Each option/argument has to be specified |
1241
|
|
|
|
|
|
|
first via `add_args` (as hashref, with key being argument name and value the |
1242
|
|
|
|
|
|
|
argument specification as defined in <pm:Rinci::function>)). Some argument |
1243
|
|
|
|
|
|
|
specifications have been defined in <pm:App::CSVUtils> and can be used. See |
1244
|
|
|
|
|
|
|
existing utilities for examples. |
1245
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
|
1247
|
|
|
|
|
|
|
*READING CSV DATA* |
1248
|
|
|
|
|
|
|
|
1249
|
|
|
|
|
|
|
To read CSV data, normally your utility would provide handler for the |
1250
|
|
|
|
|
|
|
`on_input_data_row` hook and sometimes additionally `on_input_header_row`. |
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
|
1253
|
|
|
|
|
|
|
*OUTPUTTING STRING OR RETURNING RESULT* |
1254
|
|
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
To output string, usually you call the provided routine `$r->{code_print}`. This |
1256
|
|
|
|
|
|
|
routine will open the output files for you. |
1257
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
You can also return enveloped result directly by setting `$r->{result}`. |
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
|
1261
|
|
|
|
|
|
|
*OUTPUTTING CSV DATA* |
1262
|
|
|
|
|
|
|
|
1263
|
|
|
|
|
|
|
To output CSV data, usually you call the provided routine `$r->{code_print_row}`. |
1264
|
|
|
|
|
|
|
This routine accepts a row (arrayref or hashref). This routine will open the |
1265
|
|
|
|
|
|
|
output files for you when needed, as well as print header row automatically. |
1266
|
|
|
|
|
|
|
|
1267
|
|
|
|
|
|
|
You can also buffer rows from input to e.g. `$r->{output_rows}`, then call |
1268
|
|
|
|
|
|
|
`$r->{code_print_row}` repeatedly in the `after_read_input` hook to print all the |
1269
|
|
|
|
|
|
|
rows. |
1270
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
*READING MULTIPLE CSV FILES* |
1273
|
|
|
|
|
|
|
|
1274
|
|
|
|
|
|
|
To read multiple CSV files, you first specify `reads_multiple_csv`. Then, you |
1275
|
|
|
|
|
|
|
can supply handler for `on_input_header_row` and `on_input_data_row` as usual. |
1276
|
|
|
|
|
|
|
If you want to do something before/after each input file, you can also supply |
1277
|
|
|
|
|
|
|
handler for `before_open_input_file` or `after_close_input_file`. |
1278
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
*WRITING TO MULTIPLE CSV FILES* |
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
Similarly, to write to many CSv files, you first specify `writes_multiple_csv`. |
1283
|
|
|
|
|
|
|
Then, you can supply handler for `on_input_header_row` and `on_input_data_row` |
1284
|
|
|
|
|
|
|
as usual. To switch to the next file, set |
1285
|
|
|
|
|
|
|
`$r->{wants_switch_to_next_output_file}` to true, in which case the next call to |
1286
|
|
|
|
|
|
|
`$r->{code_print_row}` will close the current file and open the next file. |
1287
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
*CHANGING THE OUTPUT FIELDS* |
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
When calling `$r->{code_print_row}`, you can output whatever fields you want. By |
1292
|
|
|
|
|
|
|
convention, you can set `$r->{output_fields}` and `$r->{output_fields_idx}` to |
1293
|
|
|
|
|
|
|
let other handlers know about the output fields. For example, see the |
1294
|
|
|
|
|
|
|
implementation of <prog:csv-concat>. |
1295
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
_ |
1297
|
|
|
|
|
|
|
args => { |
1298
|
|
|
|
|
|
|
name => { |
1299
|
|
|
|
|
|
|
schema => 'perl::identifier::unqualified_ascii*', |
1300
|
|
|
|
|
|
|
req => 1, |
1301
|
|
|
|
|
|
|
tags => ['category:metadata'], |
1302
|
|
|
|
|
|
|
}, |
1303
|
|
|
|
|
|
|
summary => { |
1304
|
|
|
|
|
|
|
schema => 'str*', |
1305
|
|
|
|
|
|
|
tags => ['category:metadata'], |
1306
|
|
|
|
|
|
|
}, |
1307
|
|
|
|
|
|
|
description => { |
1308
|
|
|
|
|
|
|
schema => 'str*', |
1309
|
|
|
|
|
|
|
tags => ['category:metadata'], |
1310
|
|
|
|
|
|
|
}, |
1311
|
|
|
|
|
|
|
links => { |
1312
|
|
|
|
|
|
|
schema => ['array*', of=>'hash*'], # XXX defhashes |
1313
|
|
|
|
|
|
|
tags => ['category:metadata'], |
1314
|
|
|
|
|
|
|
}, |
1315
|
|
|
|
|
|
|
examples => { |
1316
|
|
|
|
|
|
|
schema => ['array*'], # defhashes |
1317
|
|
|
|
|
|
|
tags => ['category:metadata'], |
1318
|
|
|
|
|
|
|
}, |
1319
|
|
|
|
|
|
|
add_meta_props => { |
1320
|
|
|
|
|
|
|
summary => 'Add additional Rinci function metadata properties', |
1321
|
|
|
|
|
|
|
schema => ['hash*'], |
1322
|
|
|
|
|
|
|
tags => ['category:metadata'], |
1323
|
|
|
|
|
|
|
}, |
1324
|
|
|
|
|
|
|
add_args => { |
1325
|
|
|
|
|
|
|
schema => ['hash*'], |
1326
|
|
|
|
|
|
|
tags => ['category:metadata'], |
1327
|
|
|
|
|
|
|
}, |
1328
|
|
|
|
|
|
|
add_args_rels => { |
1329
|
|
|
|
|
|
|
schema => ['hash*'], |
1330
|
|
|
|
|
|
|
tags => ['category:metadata'], |
1331
|
|
|
|
|
|
|
}, |
1332
|
|
|
|
|
|
|
|
1333
|
|
|
|
|
|
|
reads_csv => { |
1334
|
|
|
|
|
|
|
summary => 'Whether utility reads CSV data', |
1335
|
|
|
|
|
|
|
'summary.alt.bool.not' => 'Specify that utility does not read CSV data', |
1336
|
|
|
|
|
|
|
schema => 'bool*', |
1337
|
|
|
|
|
|
|
default => 1, |
1338
|
|
|
|
|
|
|
}, |
1339
|
|
|
|
|
|
|
reads_multiple_csv => { |
1340
|
|
|
|
|
|
|
summary => 'Whether utility accepts CSV data', |
1341
|
|
|
|
|
|
|
schema => 'bool*', |
1342
|
|
|
|
|
|
|
description => <<'_', |
1343
|
|
|
|
|
|
|
|
1344
|
|
|
|
|
|
|
Setting this option to true will implicitly set the `reads_csv` option to true, |
1345
|
|
|
|
|
|
|
obviously. |
1346
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
_ |
1348
|
|
|
|
|
|
|
}, |
1349
|
|
|
|
|
|
|
writes_csv => { |
1350
|
|
|
|
|
|
|
summary => 'Whether utility writes CSV data', |
1351
|
|
|
|
|
|
|
'summary.alt.bool.not' => 'Specify that utility does not write CSV data', |
1352
|
|
|
|
|
|
|
schema => 'bool*', |
1353
|
|
|
|
|
|
|
default => 1, |
1354
|
|
|
|
|
|
|
}, |
1355
|
|
|
|
|
|
|
writes_multiple_csv => { |
1356
|
|
|
|
|
|
|
summary => 'Whether utility outputs CSV data', |
1357
|
|
|
|
|
|
|
schema => 'bool*', |
1358
|
|
|
|
|
|
|
description => <<'_', |
1359
|
|
|
|
|
|
|
|
1360
|
|
|
|
|
|
|
Setting this option to true will implicitly set the `writes_csv` option to true, |
1361
|
|
|
|
|
|
|
obviously. |
1362
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
_ |
1364
|
|
|
|
|
|
|
}, |
1365
|
|
|
|
|
|
|
|
1366
|
|
|
|
|
|
|
on_begin => { |
1367
|
|
|
|
|
|
|
schema => 'code*', |
1368
|
|
|
|
|
|
|
}, |
1369
|
|
|
|
|
|
|
before_read_input => { |
1370
|
|
|
|
|
|
|
schema => 'code*', |
1371
|
|
|
|
|
|
|
}, |
1372
|
|
|
|
|
|
|
before_open_input_files => { |
1373
|
|
|
|
|
|
|
schema => 'code*', |
1374
|
|
|
|
|
|
|
}, |
1375
|
|
|
|
|
|
|
before_open_input_file => { |
1376
|
|
|
|
|
|
|
schema => 'code*', |
1377
|
|
|
|
|
|
|
}, |
1378
|
|
|
|
|
|
|
on_input_header_row => { |
1379
|
|
|
|
|
|
|
schema => 'code*', |
1380
|
|
|
|
|
|
|
}, |
1381
|
|
|
|
|
|
|
on_input_data_row => { |
1382
|
|
|
|
|
|
|
schema => 'code*', |
1383
|
|
|
|
|
|
|
}, |
1384
|
|
|
|
|
|
|
after_close_input_file => { |
1385
|
|
|
|
|
|
|
schema => 'code*', |
1386
|
|
|
|
|
|
|
}, |
1387
|
|
|
|
|
|
|
after_close_input_files => { |
1388
|
|
|
|
|
|
|
schema => 'code*', |
1389
|
|
|
|
|
|
|
}, |
1390
|
|
|
|
|
|
|
after_read_input => { |
1391
|
|
|
|
|
|
|
schema => 'code*', |
1392
|
|
|
|
|
|
|
}, |
1393
|
|
|
|
|
|
|
on_end => { |
1394
|
|
|
|
|
|
|
schema => 'code*', |
1395
|
|
|
|
|
|
|
}, |
1396
|
|
|
|
|
|
|
}, |
1397
|
|
|
|
|
|
|
result_naked => 1, |
1398
|
|
|
|
|
|
|
result => { |
1399
|
|
|
|
|
|
|
schema => 'bool*', |
1400
|
|
|
|
|
|
|
}, |
1401
|
|
|
|
|
|
|
}; |
1402
|
|
|
|
|
|
|
sub gen_csv_util { |
1403
|
25
|
|
|
25
|
1
|
204
|
my %gen_args = @_; |
1404
|
|
|
|
|
|
|
|
1405
|
25
|
50
|
|
|
|
470
|
my $name = delete($gen_args{name}) or die "Please specify name"; |
1406
|
25
|
|
50
|
|
|
89
|
my $summary = delete($gen_args{summary}) // '(No summary)'; |
1407
|
25
|
|
100
|
|
|
92
|
my $description = delete($gen_args{description}) // '(No description)'; |
1408
|
25
|
|
100
|
|
|
108
|
my $links = delete($gen_args{links}) // []; |
1409
|
25
|
|
100
|
|
|
83
|
my $examples = delete($gen_args{examples}) // []; |
1410
|
25
|
|
|
|
|
46
|
my $add_meta_props = delete $gen_args{add_meta_props}; |
1411
|
25
|
|
|
|
|
44
|
my $add_args = delete $gen_args{add_args}; |
1412
|
25
|
|
|
|
|
47
|
my $add_args_rels = delete $gen_args{add_args_rels}; |
1413
|
25
|
|
|
|
|
39
|
my $reads_multiple_csv = delete($gen_args{reads_multiple_csv}); |
1414
|
25
|
|
50
|
|
|
84
|
my $reads_csv = delete($gen_args{reads_csv}) // 1; |
1415
|
25
|
|
50
|
|
|
53
|
my $tags = [ @{ delete($gen_args{tags}) // [] } ]; |
|
25
|
|
|
|
|
90
|
|
1416
|
25
|
100
|
|
|
|
62
|
$reads_csv = 1 if $reads_multiple_csv; |
1417
|
25
|
|
|
|
|
36
|
my $writes_multiple_csv = delete($gen_args{writes_multiple_csv}); |
1418
|
25
|
|
100
|
|
|
83
|
my $writes_csv = delete($gen_args{writes_csv}) // 1; |
1419
|
25
|
50
|
|
|
|
68
|
$writes_csv = 1 if $writes_multiple_csv; |
1420
|
25
|
|
|
|
|
48
|
my $on_begin = delete $gen_args{on_begin}; |
1421
|
25
|
|
|
|
|
39
|
my $before_read_input = delete $gen_args{before_read_input}; |
1422
|
25
|
|
|
|
|
43
|
my $before_open_input_files = delete $gen_args{before_open_input_files}; |
1423
|
25
|
|
|
|
|
42
|
my $before_open_input_file = delete $gen_args{before_open_input_file}; |
1424
|
25
|
|
|
|
|
33
|
my $on_input_header_row = delete $gen_args{on_input_header_row}; |
1425
|
25
|
|
|
|
|
39
|
my $on_input_data_row = delete $gen_args{on_input_data_row}; |
1426
|
25
|
|
|
|
|
37
|
my $after_close_input_file = delete $gen_args{after_close_input_file}; |
1427
|
25
|
|
|
|
|
36
|
my $after_close_input_files = delete $gen_args{after_close_input_files}; |
1428
|
25
|
|
|
|
|
40
|
my $after_read_input = delete $gen_args{after_read_input}; |
1429
|
25
|
|
|
|
|
47
|
my $on_end = delete $gen_args{on_end}; |
1430
|
|
|
|
|
|
|
|
1431
|
25
|
50
|
|
|
|
77
|
scalar(keys %gen_args) and die "Unknown argument(s): ".join(", ", keys %gen_args); |
1432
|
|
|
|
|
|
|
|
1433
|
25
|
|
|
|
|
41
|
my $code; |
1434
|
|
|
|
|
|
|
CREATE_CODE: { |
1435
|
25
|
|
|
|
|
39
|
$code = sub { |
1436
|
119
|
|
|
119
|
|
302386
|
my %util_args = @_; |
1437
|
|
|
|
|
|
|
|
1438
|
119
|
|
100
|
|
|
576
|
my $has_header = $util_args{input_header} // 1; |
1439
|
119
|
|
66
|
|
|
381
|
my $outputs_header = $util_args{output_header} // $has_header; |
1440
|
|
|
|
|
|
|
|
1441
|
119
|
|
|
|
|
398
|
my $r = { |
1442
|
|
|
|
|
|
|
gen_args => \%gen_args, |
1443
|
|
|
|
|
|
|
util_args => \%util_args, |
1444
|
|
|
|
|
|
|
name => $name, |
1445
|
|
|
|
|
|
|
}; |
1446
|
|
|
|
|
|
|
|
1447
|
|
|
|
|
|
|
# inside the main eval block, we call hook handlers. A handler can |
1448
|
|
|
|
|
|
|
# throw an exception (which can be a string or an enveloped response |
1449
|
|
|
|
|
|
|
# like [500, "some error message"], see Rinci::function). we trap |
1450
|
|
|
|
|
|
|
# the exception so we can return the appropriate enveloped response. |
1451
|
|
|
|
|
|
|
MAIN_EVAL: |
1452
|
119
|
|
|
|
|
211
|
eval { |
1453
|
|
|
|
|
|
|
|
1454
|
|
|
|
|
|
|
# do some checking |
1455
|
119
|
50
|
33
|
|
|
342
|
if ($util_args{inplace} && (!$reads_csv || !$writes_csv)) { |
|
|
|
66
|
|
|
|
|
1456
|
0
|
|
|
|
|
0
|
die [412, "--inplace cannot be specified when we do not read & write CSV"]; |
1457
|
|
|
|
|
|
|
} |
1458
|
|
|
|
|
|
|
|
1459
|
119
|
100
|
|
|
|
251
|
if ($on_begin) { |
1460
|
37
|
|
|
|
|
122
|
log_trace "[csvutil] Calling on_begin hook handler ..."; |
1461
|
37
|
|
|
|
|
150
|
$on_begin->($r); |
1462
|
|
|
|
|
|
|
} |
1463
|
|
|
|
|
|
|
|
1464
|
|
|
|
|
|
|
my $code_open_file = sub { |
1465
|
|
|
|
|
|
|
# set output filenames, if not yet |
1466
|
211
|
100
|
|
211
|
|
414
|
unless ($r->{output_filenames}) { |
1467
|
79
|
|
|
|
|
114
|
my @output_filenames; |
1468
|
79
|
100
|
|
|
|
188
|
if ($util_args{inplace}) { |
|
|
50
|
|
|
|
|
|
1469
|
2
|
|
|
|
|
3
|
for my $input_filename (@{ $r->{input_filenames} }) { |
|
2
|
|
|
|
|
5
|
|
1470
|
2
|
|
|
|
|
4
|
my $output_filename; |
1471
|
2
|
|
|
|
|
3
|
while (1) { |
1472
|
2
|
|
|
|
|
10
|
$output_filename = $input_filename . "." . _randext(5); |
1473
|
2
|
50
|
|
|
|
56
|
last unless -e $output_filename; |
1474
|
|
|
|
|
|
|
} |
1475
|
2
|
|
|
|
|
20
|
push @output_filenames, $output_filename; |
1476
|
|
|
|
|
|
|
} |
1477
|
|
|
|
|
|
|
} elsif ($writes_multiple_csv) { |
1478
|
0
|
|
0
|
|
|
0
|
@output_filenames = @{ $util_args{output_filenames} // ['-'] }; |
|
0
|
|
|
|
|
0
|
|
1479
|
|
|
|
|
|
|
} else { |
1480
|
77
|
|
50
|
|
|
340
|
@output_filenames = ($util_args{output_filename} // '-'); |
1481
|
|
|
|
|
|
|
} |
1482
|
|
|
|
|
|
|
|
1483
|
|
|
|
|
|
|
CHECK_OUTPUT_FILENAME_SAME_AS_INPUT_FILENAME: { |
1484
|
79
|
|
|
|
|
129
|
my %seen_output_abs_path; # key = output filename |
|
79
|
|
|
|
|
101
|
|
1485
|
79
|
100
|
66
|
|
|
255
|
last unless $reads_csv && $writes_csv; |
1486
|
76
|
|
|
|
|
104
|
for my $input_filename (@{ $r->{input_filenames} }) { |
|
76
|
|
|
|
|
157
|
|
1487
|
78
|
50
|
|
|
|
156
|
next if $input_filename eq '-'; |
1488
|
78
|
|
|
|
|
2231
|
my $input_abs_path = Cwd::abs_path($input_filename); |
1489
|
78
|
50
|
|
|
|
269
|
die [500, "Can't get absolute path of input filename '$input_filename'"] unless $input_abs_path; |
1490
|
78
|
|
|
|
|
174
|
for my $output_filename (@output_filenames) { |
1491
|
78
|
100
|
|
|
|
269
|
next if $output_filename eq '-'; |
1492
|
2
|
50
|
|
|
|
5
|
next if $seen_output_abs_path{$output_filename}; |
1493
|
2
|
|
|
|
|
41
|
my $output_abs_path = Cwd::abs_path($output_filename); |
1494
|
2
|
50
|
|
|
|
8
|
die [500, "Can't get absolute path of output filename '$output_filename'"] unless $output_abs_path; |
1495
|
2
|
0
|
|
|
|
8
|
die [412, "Cannot set output filename to '$output_filename' ". |
|
|
50
|
|
|
|
|
|
1496
|
|
|
|
|
|
|
($output_filename ne $output_abs_path ? "($output_abs_path) ":""). |
1497
|
|
|
|
|
|
|
"because it is the same as input filename and input will be clobbered; use --inplace to avoid clobbering<"] |
1498
|
|
|
|
|
|
|
if $output_abs_path eq $input_abs_path; |
1499
|
|
|
|
|
|
|
} |
1500
|
|
|
|
|
|
|
} |
1501
|
|
|
|
|
|
|
} # CHECK_OUTPUT_FILENAME_SAME_AS_INPUT_FILENAME |
1502
|
|
|
|
|
|
|
|
1503
|
79
|
|
|
|
|
211
|
$r->{output_filenames} = \@output_filenames; |
1504
|
79
|
|
50
|
|
|
369
|
$r->{output_num_of_files} //= scalar(@output_filenames); |
1505
|
|
|
|
|
|
|
} # set output filenames |
1506
|
|
|
|
|
|
|
|
1507
|
|
|
|
|
|
|
# open the next file, if not yet |
1508
|
211
|
100
|
66
|
|
|
890
|
if (!$r->{output_fh} || $r->{wants_switch_to_next_output_file}) { |
1509
|
79
|
|
50
|
|
|
368
|
$r->{output_filenum} //= 0; |
1510
|
79
|
|
|
|
|
107
|
$r->{output_filenum}++; |
1511
|
|
|
|
|
|
|
|
1512
|
79
|
|
|
|
|
233
|
$r->{output_rownum} = 0; |
1513
|
79
|
|
|
|
|
142
|
$r->{output_data_rownum} = 0; |
1514
|
|
|
|
|
|
|
|
1515
|
|
|
|
|
|
|
# close the previous file, if any |
1516
|
79
|
50
|
33
|
|
|
187
|
if ($r->{output_fh} && $r->{output_filename} ne '-') { |
1517
|
0
|
|
|
|
|
0
|
log_info "[csvutil] Closing output file '$r->{output_filename}' ..."; |
1518
|
0
|
0
|
|
|
|
0
|
close $r->{output_fh} or die [500, "Can't close output file '$r->{output_filename}': $!"]; |
1519
|
0
|
|
|
|
|
0
|
delete $r->{has_printed_header}; |
1520
|
0
|
|
|
|
|
0
|
delete $r->{wants_switch_to_next_output_file}; |
1521
|
|
|
|
|
|
|
} |
1522
|
|
|
|
|
|
|
|
1523
|
|
|
|
|
|
|
# we have exhausted all the files, do nothing & return |
1524
|
79
|
50
|
|
|
|
113
|
return if $r->{output_filenum} > @{ $r->{output_filenames} }; |
|
79
|
|
|
|
|
267
|
|
1525
|
|
|
|
|
|
|
|
1526
|
79
|
|
|
|
|
195
|
$r->{output_filename} = $r->{output_filenames}[ $r->{output_filenum}-1 ]; |
1527
|
|
|
|
|
|
|
log_info "[csvutil] [%d/%s] Opening output file %s ...", |
1528
|
79
|
|
|
|
|
305
|
$r->{output_filenum}, $r->{output_num_of_files}, $r->{output_filename}; |
1529
|
79
|
100
|
|
|
|
287
|
if ($r->{output_filename} eq '-') { |
1530
|
77
|
|
|
|
|
190
|
$r->{output_fh} = \*STDOUT; |
1531
|
|
|
|
|
|
|
} else { |
1532
|
2
|
50
|
|
|
|
21
|
if (-f $r->{output_filename}) { |
1533
|
0
|
0
|
|
|
|
0
|
if ($r->{util_args}{overwrite}) { |
1534
|
0
|
|
|
|
|
0
|
log_info "[csvutil] Will be overwriting output file %s", $r->{output_filename}; |
1535
|
|
|
|
|
|
|
} else { |
1536
|
0
|
|
|
|
|
0
|
die [412, "Refusing to overwrite existing output file '$r->{output_filename}', choose another name or use --overwrite (-O)"]; |
1537
|
|
|
|
|
|
|
} |
1538
|
|
|
|
|
|
|
} |
1539
|
2
|
|
|
|
|
11
|
my ($fh, $err) = _open_file_write($r->{output_filename}); |
1540
|
2
|
50
|
|
|
|
9
|
die $err if $err; |
1541
|
2
|
|
|
|
|
6
|
$r->{output_fh} = $fh; |
1542
|
|
|
|
|
|
|
} |
1543
|
|
|
|
|
|
|
} # open the next file |
1544
|
119
|
|
|
|
|
742
|
}; # code_open_file |
1545
|
|
|
|
|
|
|
|
1546
|
|
|
|
|
|
|
my $code_print = sub { |
1547
|
9
|
|
|
9
|
|
15
|
my $str = shift; |
1548
|
9
|
|
|
|
|
23
|
$code_open_file->(); |
1549
|
9
|
|
|
|
|
14
|
print { $r->{output_fh} } $str; |
|
9
|
|
|
|
|
275
|
|
1550
|
119
|
|
|
|
|
365
|
}; # code_print |
1551
|
119
|
|
|
|
|
268
|
$r->{code_print} = $code_print; |
1552
|
|
|
|
|
|
|
|
1553
|
119
|
100
|
|
|
|
269
|
if ($writes_csv) { |
1554
|
94
|
|
|
|
|
219
|
my $output_emitter = _instantiate_emitter(\%util_args); |
1555
|
94
|
|
|
|
|
11219
|
$r->{output_emitter} = $output_emitter; |
1556
|
94
|
|
|
|
|
206
|
$r->{has_printed_header} = 0; |
1557
|
|
|
|
|
|
|
|
1558
|
|
|
|
|
|
|
my $code_print_header_row = sub { |
1559
|
|
|
|
|
|
|
# set output fields, if not yet |
1560
|
202
|
100
|
|
202
|
|
492
|
unless ($r->{output_fields}) { |
1561
|
|
|
|
|
|
|
# by default, use the |
1562
|
24
|
|
|
|
|
55
|
$r->{output_fields} = $r->{input_fields}; |
1563
|
|
|
|
|
|
|
} |
1564
|
|
|
|
|
|
|
|
1565
|
|
|
|
|
|
|
# index the output fields, if not yet |
1566
|
202
|
100
|
|
|
|
424
|
unless ($r->{output_fields_idx}) { |
1567
|
75
|
|
|
|
|
187
|
$r->{output_fields_idx} = {}; |
1568
|
75
|
|
|
|
|
107
|
for my $j (0 .. $#{ $r->{output_fields} }) { |
|
75
|
|
|
|
|
210
|
|
1569
|
206
|
|
|
|
|
430
|
$r->{output_fields_idx}{ $r->{output_fields}[$j] } = $j; |
1570
|
|
|
|
|
|
|
} |
1571
|
|
|
|
|
|
|
} |
1572
|
|
|
|
|
|
|
|
1573
|
202
|
|
|
|
|
473
|
$code_open_file->(); |
1574
|
|
|
|
|
|
|
|
1575
|
|
|
|
|
|
|
# print header line, if not yet |
1576
|
202
|
100
|
100
|
|
|
682
|
if ($outputs_header && !$r->{has_printed_header}) { |
1577
|
74
|
|
|
|
|
110
|
$r->{has_printed_header}++; |
1578
|
74
|
|
|
|
|
1388
|
$r->{output_emitter}->print($r->{output_fh}, $r->{output_fields}); |
1579
|
74
|
|
|
|
|
3880
|
print { $r->{output_fh} } "\n"; |
|
74
|
|
|
|
|
969
|
|
1580
|
74
|
|
|
|
|
288
|
$r->{output_rownum}++; |
1581
|
|
|
|
|
|
|
} |
1582
|
94
|
|
|
|
|
419
|
}; |
1583
|
94
|
|
|
|
|
177
|
$r->{code_print_header_row} = $code_print_header_row; |
1584
|
|
|
|
|
|
|
|
1585
|
|
|
|
|
|
|
my $code_print_row = sub { |
1586
|
202
|
|
|
202
|
|
337
|
my $row = shift; |
1587
|
|
|
|
|
|
|
|
1588
|
202
|
|
|
|
|
490
|
$code_print_header_row->(); |
1589
|
|
|
|
|
|
|
|
1590
|
|
|
|
|
|
|
# print data line |
1591
|
202
|
50
|
|
|
|
506
|
if ($row) { |
1592
|
202
|
50
|
|
|
|
504
|
if (ref $row eq 'HASH') { |
1593
|
0
|
|
|
|
|
0
|
my $row0 = $row; |
1594
|
0
|
|
|
|
|
0
|
$row = []; |
1595
|
0
|
|
|
|
|
0
|
for my $j (0 .. $#{ $r->{output_fields} }) { |
|
0
|
|
|
|
|
0
|
|
1596
|
0
|
|
0
|
|
|
0
|
$row->[$j] = $row0->{ $r->{output_fields}[$j] } // ''; |
1597
|
|
|
|
|
|
|
} |
1598
|
|
|
|
|
|
|
} |
1599
|
202
|
|
|
|
|
1376
|
$r->{output_emitter}->print( $r->{output_fh}, $row ); |
1600
|
202
|
|
|
|
|
4434
|
print { $r->{output_fh} } "\n"; |
|
202
|
|
|
|
|
2113
|
|
1601
|
202
|
|
|
|
|
618
|
$r->{output_rownum}++; |
1602
|
202
|
|
|
|
|
1077
|
$r->{output_data_rownum}++; |
1603
|
|
|
|
|
|
|
} |
1604
|
94
|
|
|
|
|
288
|
}; # code_print_row |
1605
|
94
|
|
|
|
|
230
|
$r->{code_print_row} = $code_print_row; |
1606
|
|
|
|
|
|
|
} # if outputs csv |
1607
|
|
|
|
|
|
|
|
1608
|
119
|
50
|
|
|
|
269
|
if ($before_read_input) { |
1609
|
0
|
|
|
|
|
0
|
log_trace "[csvutil] Calling before_read_input handler ..."; |
1610
|
0
|
|
|
|
|
0
|
$before_read_input->($r); |
1611
|
|
|
|
|
|
|
} |
1612
|
|
|
|
|
|
|
|
1613
|
|
|
|
|
|
|
READ_CSV: { |
1614
|
119
|
50
|
|
|
|
178
|
last unless $reads_csv; |
|
119
|
|
|
|
|
235
|
|
1615
|
|
|
|
|
|
|
|
1616
|
119
|
|
|
|
|
257
|
my $input_parser = _instantiate_parser(\%util_args, 'input_'); |
1617
|
119
|
|
|
|
|
13022
|
$r->{input_parser} = $input_parser; |
1618
|
|
|
|
|
|
|
|
1619
|
119
|
|
|
|
|
206
|
my @input_filenames; |
1620
|
119
|
100
|
|
|
|
266
|
if ($reads_multiple_csv) { |
1621
|
18
|
|
50
|
|
|
32
|
@input_filenames = @{ $util_args{input_filenames} // ['-'] }; |
|
18
|
|
|
|
|
74
|
|
1622
|
|
|
|
|
|
|
} else { |
1623
|
101
|
|
50
|
|
|
334
|
@input_filenames = ($util_args{input_filename} // '-'); |
1624
|
|
|
|
|
|
|
} |
1625
|
119
|
|
50
|
|
|
552
|
$r->{input_filenames} //= \@input_filenames; |
1626
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
BEFORE_INPUT_FILENAME: |
1628
|
119
|
|
|
|
|
221
|
$r->{input_filenum} = 0; |
1629
|
|
|
|
|
|
|
|
1630
|
|
|
|
|
|
|
INPUT_FILENAME: |
1631
|
119
|
|
|
|
|
223
|
for my $input_filename (@input_filenames) { |
1632
|
138
|
|
|
|
|
218
|
$r->{input_filenum}++; |
1633
|
138
|
|
|
|
|
269
|
$r->{input_filename} = $input_filename; |
1634
|
|
|
|
|
|
|
|
1635
|
138
|
100
|
100
|
|
|
478
|
if ($r->{input_filenum} == 1 && $before_open_input_files) { |
1636
|
1
|
|
|
|
|
6
|
log_trace "[csvutil] Calling before_open_input_files handler ..."; |
1637
|
1
|
|
|
|
|
7
|
$before_open_input_files->($r); |
1638
|
1
|
50
|
|
|
|
4
|
if (delete $r->{wants_skip_files}) { |
1639
|
0
|
|
|
|
|
0
|
log_trace "[csvutil] Handler wants to skip files, skipping all input files"; |
1640
|
0
|
|
|
|
|
0
|
last READ_CSV; |
1641
|
|
|
|
|
|
|
} |
1642
|
|
|
|
|
|
|
} |
1643
|
|
|
|
|
|
|
|
1644
|
138
|
50
|
|
|
|
311
|
if ($before_open_input_file) { |
1645
|
0
|
|
|
|
|
0
|
log_trace "[csvutil] Calling before_open_input_file handler ..."; |
1646
|
0
|
|
|
|
|
0
|
$before_open_input_file->($r); |
1647
|
0
|
0
|
|
|
|
0
|
if (delete $r->{wants_skip_file}) { |
|
|
0
|
|
|
|
|
|
1648
|
0
|
|
|
|
|
0
|
log_trace "[csvutil] Handler wants to skip this file, moving on to the next file"; |
1649
|
0
|
|
|
|
|
0
|
next INPUT_FILENAME; |
1650
|
|
|
|
|
|
|
} elsif (delete $r->{wants_skip_files}) { |
1651
|
0
|
|
|
|
|
0
|
log_trace "[csvutil] Handler wants to skip all files, skipping all input files"; |
1652
|
0
|
|
|
|
|
0
|
last READ_CSV; |
1653
|
|
|
|
|
|
|
} |
1654
|
|
|
|
|
|
|
} |
1655
|
|
|
|
|
|
|
|
1656
|
|
|
|
|
|
|
log_info "[csvutil] [file %d/%d] Reading input file %s ...", |
1657
|
138
|
|
|
|
|
495
|
$r->{input_filenum}, scalar(@input_filenames), $input_filename; |
1658
|
138
|
|
|
|
|
470
|
my ($fh, $err) = _open_file_read($input_filename); |
1659
|
138
|
50
|
|
|
|
331
|
die $err if $err; |
1660
|
138
|
|
|
|
|
523
|
$r->{input_fh} = $r->{input_fhs}[ $r->{input_filenum}-1 ] = $fh; |
1661
|
|
|
|
|
|
|
|
1662
|
138
|
|
|
|
|
193
|
my $i; |
1663
|
138
|
|
|
|
|
224
|
$r->{input_header_row_count} = 0; |
1664
|
138
|
|
|
|
|
365
|
$r->{input_data_row_count} = 0; |
1665
|
138
|
|
|
|
|
255
|
$r->{input_fields} = []; # array, field names in order |
1666
|
138
|
|
|
|
|
258
|
$r->{input_field_idxs} = {}; # key=field name, value=index (0-based) |
1667
|
138
|
|
|
|
|
207
|
my $row0; |
1668
|
|
|
|
|
|
|
my $code_getline = sub { |
1669
|
661
|
50
|
33
|
661
|
|
3310
|
if ($r->{stdin_input_fields} && $r->{input_filename} eq '-') { |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
1670
|
0
|
0
|
|
|
|
0
|
if ($i == 0) { |
1671
|
|
|
|
|
|
|
# we have read the header for stdin. since |
1672
|
|
|
|
|
|
|
# we can't seek to the beginning, we return |
1673
|
|
|
|
|
|
|
# the saved fields |
1674
|
0
|
|
|
|
|
0
|
$r->{input_header_row_count}++; |
1675
|
0
|
|
|
|
|
0
|
return $r->{stdin_input_fields}; |
1676
|
|
|
|
|
|
|
} else { |
1677
|
0
|
|
|
|
|
0
|
my $row = $input_parser->getline($r->{input_fh}); |
1678
|
0
|
0
|
|
|
|
0
|
$r->{input_data_row_count}++ if $row; |
1679
|
0
|
|
|
|
|
0
|
return $row; |
1680
|
|
|
|
|
|
|
} |
1681
|
|
|
|
|
|
|
} elsif ($i == 0 && !$has_header) { |
1682
|
|
|
|
|
|
|
# this is the first line of a file and user |
1683
|
|
|
|
|
|
|
# specifies there is no input header. we save |
1684
|
|
|
|
|
|
|
# the line and return the generated field names |
1685
|
|
|
|
|
|
|
# instead. |
1686
|
4
|
|
|
|
|
165
|
$row0 = $input_parser->getline($r->{input_fh}); |
1687
|
4
|
50
|
|
|
|
254
|
return unless $row0; |
1688
|
4
|
|
|
|
|
14
|
return [map { "field$_" } 1..@$row0]; |
|
12
|
|
|
|
|
53
|
|
1689
|
|
|
|
|
|
|
} elsif ($i == 1 && !$has_header) { |
1690
|
|
|
|
|
|
|
# we return the saved first line |
1691
|
4
|
50
|
|
|
|
12
|
$r->{input_data_row_count}++ if $row0; |
1692
|
4
|
|
|
|
|
12
|
return $row0; |
1693
|
|
|
|
|
|
|
} |
1694
|
653
|
|
|
|
|
17028
|
my $res = $input_parser->getline($r->{input_fh}); |
1695
|
653
|
100
|
|
|
|
23844
|
if ($res) { |
1696
|
537
|
100
|
|
|
|
1191
|
$r->{input_header_row_count}++ if $i==0; |
1697
|
537
|
100
|
|
|
|
1121
|
$r->{input_data_row_count}++ if $i; |
1698
|
|
|
|
|
|
|
} |
1699
|
653
|
|
|
|
|
1883
|
$res; |
1700
|
138
|
|
|
|
|
712
|
}; |
1701
|
138
|
|
|
|
|
363
|
$r->{code_getline} = $code_getline; |
1702
|
|
|
|
|
|
|
|
1703
|
138
|
|
|
|
|
218
|
$i = 0; |
1704
|
138
|
|
|
|
|
260
|
while ($r->{input_row} = $code_getline->()) { |
1705
|
545
|
|
|
|
|
733
|
$i++; |
1706
|
545
|
|
|
|
|
867
|
$r->{input_rownum} = $i; |
1707
|
545
|
100
|
|
|
|
1124
|
$r->{input_data_rownum} = $has_header ? $i-1 : $i; |
1708
|
545
|
100
|
|
|
|
983
|
if ($i == 1) { |
1709
|
|
|
|
|
|
|
# gather the list of fields |
1710
|
138
|
|
|
|
|
235
|
$r->{input_fields} = $r->{input_row}; |
1711
|
138
|
50
|
0
|
|
|
346
|
$r->{stdin_input_fields} //= $r->{input_row} if $input_filename eq '-'; |
1712
|
138
|
|
|
|
|
226
|
$r->{orig_input_fields} = $r->{input_fields}; |
1713
|
138
|
|
|
|
|
304
|
$r->{input_fields_idx} = {}; |
1714
|
138
|
|
|
|
|
199
|
for my $j (0 .. $#{ $r->{input_fields} }) { |
|
138
|
|
|
|
|
443
|
|
1715
|
370
|
|
|
|
|
893
|
$r->{input_fields_idx}{ $r->{input_fields}[$j] } = $j; |
1716
|
|
|
|
|
|
|
} |
1717
|
|
|
|
|
|
|
|
1718
|
138
|
100
|
|
|
|
281
|
if ($on_input_header_row) { |
1719
|
124
|
|
|
|
|
436
|
log_trace "[csvutil] Calling on_input_header_row hook handler ..."; |
1720
|
124
|
|
|
|
|
545
|
$on_input_header_row->($r); |
1721
|
|
|
|
|
|
|
|
1722
|
115
|
100
|
|
|
|
422
|
if (delete $r->{wants_skip_file}) { |
|
|
100
|
|
|
|
|
|
1723
|
3
|
|
|
|
|
10
|
log_trace "[csvutil] Handler wants to skip this file, moving on to the next file"; |
1724
|
3
|
|
|
|
|
13
|
next INPUT_FILENAME; |
1725
|
|
|
|
|
|
|
} elsif (delete $r->{wants_skip_files}) { |
1726
|
1
|
|
|
|
|
8
|
log_trace "[csvutil] Handler wants to skip all files, skipping all input files"; |
1727
|
1
|
|
|
|
|
4
|
last READ_CSV; |
1728
|
|
|
|
|
|
|
} |
1729
|
|
|
|
|
|
|
} |
1730
|
|
|
|
|
|
|
|
1731
|
|
|
|
|
|
|
# reindex the fields, should the above hook |
1732
|
|
|
|
|
|
|
# handler adds/removes fields. let's save the |
1733
|
|
|
|
|
|
|
# old fields_idx to orig_fields_idx. |
1734
|
125
|
|
|
|
|
219
|
$r->{orig_input_fields_idx} = $r->{input_fields_idx}; |
1735
|
125
|
|
|
|
|
192
|
$r->{input_fields_idx} = {}; |
1736
|
125
|
|
|
|
|
252
|
for my $j (0 .. $#{ $r->{input_fields} }) { |
|
125
|
|
|
|
|
307
|
|
1737
|
335
|
|
|
|
|
802
|
$r->{input_fields_idx}{ $r->{input_fields}[$j] } = $j; |
1738
|
|
|
|
|
|
|
} |
1739
|
|
|
|
|
|
|
|
1740
|
|
|
|
|
|
|
} else { |
1741
|
|
|
|
|
|
|
# fill up the elements of row to the number of |
1742
|
|
|
|
|
|
|
# fields, in case the row contains sparse values |
1743
|
407
|
50
|
33
|
|
|
928
|
unless (defined $r->{wants_fill_rows} && !$r->{wants_fill_rows}) { |
1744
|
407
|
100
|
|
|
|
528
|
if (@{ $r->{input_row} } < @{ $r->{input_fields} }) { |
|
407
|
|
|
|
|
640
|
|
|
407
|
|
|
|
|
813
|
|
1745
|
3
|
|
|
|
|
7
|
splice @{ $r->{input_row} }, scalar(@{ $r->{input_row} }), 0, (("") x (@{ $r->{input_fields} } - @{ $r->{input_row} })); |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
8
|
|
1746
|
|
|
|
|
|
|
} |
1747
|
|
|
|
|
|
|
} |
1748
|
|
|
|
|
|
|
|
1749
|
|
|
|
|
|
|
# generate the hashref version of row if utility |
1750
|
|
|
|
|
|
|
# requires it |
1751
|
407
|
100
|
|
|
|
804
|
if ($r->{wants_input_row_as_hashref}) { |
1752
|
34
|
|
|
|
|
71
|
$r->{input_row_as_hashref} = {}; |
1753
|
34
|
|
|
|
|
55
|
for my $j (0 .. $#{ $r->{input_row} }) { |
|
34
|
|
|
|
|
84
|
|
1754
|
|
|
|
|
|
|
# ignore extraneous data fields |
1755
|
93
|
50
|
|
|
|
122
|
last if $j >= @{ $r->{input_fields} }; |
|
93
|
|
|
|
|
171
|
|
1756
|
93
|
|
|
|
|
231
|
$r->{input_row_as_hashref}{ $r->{input_fields}[$j] } = $r->{input_row}[$j]; |
1757
|
|
|
|
|
|
|
} |
1758
|
|
|
|
|
|
|
} |
1759
|
|
|
|
|
|
|
|
1760
|
407
|
50
|
|
|
|
804
|
if ($on_input_data_row) { |
1761
|
407
|
100
|
|
|
|
975
|
log_trace "[csvutil] Calling on_input_data_row hook handler (for first data row) ..." if $r->{input_rownum} <= 2; |
1762
|
407
|
|
|
|
|
1241
|
$on_input_data_row->($r); |
1763
|
|
|
|
|
|
|
|
1764
|
402
|
100
|
|
|
|
1405
|
if (delete $r->{wants_skip_file}) { |
|
|
100
|
|
|
|
|
|
1765
|
2
|
|
|
|
|
7
|
log_trace "[csvutil] Handler wants to skip this file, moving on to the next file"; |
1766
|
2
|
|
|
|
|
8
|
next INPUT_FILENAME; |
1767
|
|
|
|
|
|
|
} elsif (delete $r->{wants_skip_files}) { |
1768
|
2
|
|
|
|
|
8
|
log_trace "[csvutil] Handler wants to skip all files, skipping all input files"; |
1769
|
2
|
|
|
|
|
9
|
last READ_CSV; |
1770
|
|
|
|
|
|
|
} |
1771
|
|
|
|
|
|
|
} |
1772
|
|
|
|
|
|
|
} |
1773
|
|
|
|
|
|
|
|
1774
|
|
|
|
|
|
|
} # while getline |
1775
|
|
|
|
|
|
|
|
1776
|
|
|
|
|
|
|
# XXX actually close filehandle except stdin |
1777
|
|
|
|
|
|
|
|
1778
|
116
|
50
|
|
|
|
357
|
if ($after_close_input_file) { |
1779
|
0
|
|
|
|
|
0
|
log_trace "[csvutil] Calling after_close_input_file handler ..."; |
1780
|
0
|
|
|
|
|
0
|
$after_close_input_file->($r); |
1781
|
0
|
0
|
|
|
|
0
|
if (delete $r->{wants_skip_files}) { |
1782
|
0
|
|
|
|
|
0
|
log_trace "[csvutil] Handler wants to skip reading all file, skipping"; |
1783
|
0
|
|
|
|
|
0
|
last READ_CSV; |
1784
|
|
|
|
|
|
|
} |
1785
|
|
|
|
|
|
|
} |
1786
|
|
|
|
|
|
|
} # for input_filename |
1787
|
|
|
|
|
|
|
|
1788
|
102
|
100
|
|
|
|
577
|
if ($after_close_input_files) { |
1789
|
32
|
|
|
|
|
102
|
log_trace "[csvutil] Calling after_close_input_files handler ..."; |
1790
|
32
|
|
|
|
|
127
|
$after_close_input_files->($r); |
1791
|
|
|
|
|
|
|
} |
1792
|
|
|
|
|
|
|
|
1793
|
|
|
|
|
|
|
} # READ_CSV |
1794
|
|
|
|
|
|
|
|
1795
|
|
|
|
|
|
|
# cleanup stash from csv-reading-related keys |
1796
|
105
|
|
|
|
|
445
|
delete $r->{input_filenames}; |
1797
|
105
|
|
|
|
|
167
|
delete $r->{input_filenum}; |
1798
|
105
|
|
|
|
|
175
|
delete $r->{input_filename}; |
1799
|
105
|
|
|
|
|
152
|
delete $r->{input_fh}; |
1800
|
105
|
|
|
|
|
157
|
delete $r->{input_rownum}; |
1801
|
105
|
|
|
|
|
174
|
delete $r->{input_data_rownum}; |
1802
|
105
|
|
|
|
|
155
|
delete $r->{input_row}; |
1803
|
105
|
|
|
|
|
158
|
delete $r->{input_row_as_hashref}; |
1804
|
105
|
|
|
|
|
193
|
delete $r->{input_fields}; |
1805
|
105
|
|
|
|
|
216
|
delete $r->{input_fields_idx}; |
1806
|
105
|
|
|
|
|
174
|
delete $r->{orig_input_fields_idx}; |
1807
|
105
|
|
|
|
|
833
|
delete $r->{code_getline}; |
1808
|
105
|
|
|
|
|
158
|
delete $r->{wants_input_row_as_hashref}; |
1809
|
|
|
|
|
|
|
|
1810
|
105
|
100
|
|
|
|
222
|
if ($after_read_input) { |
1811
|
15
|
|
|
|
|
64
|
log_trace "[csvutil] Calling after_read_input handler ..."; |
1812
|
15
|
|
|
|
|
60
|
$after_read_input->($r); |
1813
|
|
|
|
|
|
|
} |
1814
|
|
|
|
|
|
|
|
1815
|
|
|
|
|
|
|
# cleanup stash from csv-outputting-related keys |
1816
|
105
|
|
|
|
|
224
|
delete $r->{output_num_of_files}; |
1817
|
105
|
|
|
|
|
149
|
delete $r->{output_filenum}; |
1818
|
105
|
100
|
|
|
|
238
|
if ($r->{output_fh}) { |
1819
|
79
|
100
|
|
|
|
229
|
if ($r->{output_filename} ne '-') { |
1820
|
2
|
|
|
|
|
13
|
log_info "[csvutil] Closing output file '$r->{output_filename}' ..."; |
1821
|
2
|
50
|
|
|
|
134
|
close $r->{output_fh} or die [500, "Can't close output file '$r->{output_filename}': $!"]; |
1822
|
|
|
|
|
|
|
} |
1823
|
79
|
|
|
|
|
146
|
delete $r->{output_fh}; |
1824
|
|
|
|
|
|
|
} |
1825
|
105
|
100
|
|
|
|
244
|
if ($r->{util_args}{inplace}) { |
1826
|
2
|
|
50
|
|
|
15
|
my $output_filenum = $r->{output_filenum} // 0; |
1827
|
2
|
|
|
|
|
4
|
my $i = -1; |
1828
|
2
|
|
|
|
|
4
|
for my $output_filename (@{ $r->{output_filenames} }) { |
|
2
|
|
|
|
|
7
|
|
1829
|
2
|
|
|
|
|
3
|
$i++; |
1830
|
2
|
50
|
|
|
|
7
|
last if $i > $output_filenum; |
1831
|
2
|
50
|
|
|
|
24
|
(my $input_filename = $output_filename) =~ s/\.\w{5}\z// |
1832
|
|
|
|
|
|
|
or die [500, "BUG: Can't get original input file '$output_filename'"]; |
1833
|
2
|
100
|
|
|
|
10
|
if (length(my $ext = $r->{util_args}{inplace_backup_ext})) { |
1834
|
1
|
|
|
|
|
7
|
my $backup_filename = $input_filename . $ext; |
1835
|
1
|
|
|
|
|
7
|
log_info "[csvutil] Backing up input file '$output_filename' -> '$backup_filename' ..."; |
1836
|
1
|
50
|
|
|
|
43
|
rename $input_filename, $backup_filename or die [500, "Can't rename '$input_filename' -> '$backup_filename': $!"]; |
1837
|
|
|
|
|
|
|
} |
1838
|
2
|
|
|
|
|
14
|
log_info "[csvutil] Renaming from temporary output file '$output_filename' -> '$input_filename' ..."; |
1839
|
2
|
50
|
|
|
|
179
|
rename $output_filename, $input_filename or die [500, "Can't rename back '$output_filename' -> '$input_filename': $!"]; |
1840
|
|
|
|
|
|
|
} |
1841
|
|
|
|
|
|
|
} |
1842
|
105
|
|
|
|
|
164
|
delete $r->{output_filenames}; |
1843
|
105
|
|
|
|
|
153
|
delete $r->{output_filename}; |
1844
|
105
|
|
|
|
|
147
|
delete $r->{output_rownum}; |
1845
|
105
|
|
|
|
|
136
|
delete $r->{output_data_rownum}; |
1846
|
105
|
|
|
|
|
166
|
delete $r->{code_print}; |
1847
|
105
|
|
|
|
|
478
|
delete $r->{code_print_row}; |
1848
|
105
|
|
|
|
|
403
|
delete $r->{code_print_header_row}; |
1849
|
105
|
|
|
|
|
150
|
delete $r->{has_printed_header}; |
1850
|
105
|
|
|
|
|
138
|
delete $r->{wants_switch_to_next_output_file}; |
1851
|
|
|
|
|
|
|
|
1852
|
105
|
100
|
|
|
|
1654
|
if ($on_end) { |
1853
|
11
|
|
|
|
|
36
|
log_trace "[csvutil] Calling on_end hook handler ..."; |
1854
|
11
|
|
|
|
|
49
|
$on_end->($r); |
1855
|
|
|
|
|
|
|
} |
1856
|
|
|
|
|
|
|
|
1857
|
|
|
|
|
|
|
}; # MAIN_EVAL |
1858
|
|
|
|
|
|
|
|
1859
|
119
|
|
|
|
|
255
|
my $err = $@; |
1860
|
119
|
100
|
|
|
|
240
|
if ($err) { |
1861
|
14
|
50
|
|
|
|
45
|
$err = [500, $err] unless ref $err; |
1862
|
14
|
|
|
|
|
49
|
return $err; |
1863
|
|
|
|
|
|
|
} |
1864
|
|
|
|
|
|
|
|
1865
|
|
|
|
|
|
|
RETURN_RESULT: |
1866
|
105
|
100
|
|
|
|
302
|
if (!$r->{result}) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1867
|
91
|
|
|
|
|
229
|
$r->{result} = [200]; |
1868
|
|
|
|
|
|
|
} elsif (!ref($r->{result})) { |
1869
|
0
|
|
|
|
|
0
|
$r->{result} = [500, "BUG: Result (r->{result}) is set to a non-reference ($r->{result}), probably by one of the handlers"]; |
1870
|
|
|
|
|
|
|
} elsif (ref($r->{result}) ne 'ARRAY') { |
1871
|
0
|
|
|
|
|
0
|
$r->{result} = [500, "BUG: Result (r->{result}) is not set to an enveloped result (arrayref) ($r->{result}), probably by one of the handlers"]; |
1872
|
|
|
|
|
|
|
} |
1873
|
105
|
|
|
|
|
3490
|
$r->{result}; |
1874
|
25
|
|
|
|
|
290
|
}; |
1875
|
|
|
|
|
|
|
} # CREATE_CODE |
1876
|
|
|
|
|
|
|
|
1877
|
25
|
|
|
|
|
51
|
my $meta; |
1878
|
|
|
|
|
|
|
CREATE_META: { |
1879
|
|
|
|
|
|
|
|
1880
|
25
|
|
|
|
|
545
|
$meta = { |
1881
|
|
|
|
|
|
|
v => 1.1, |
1882
|
|
|
|
|
|
|
summary => $summary, |
1883
|
|
|
|
|
|
|
description => $description, |
1884
|
|
|
|
|
|
|
args => {}, |
1885
|
|
|
|
|
|
|
args_rels => {}, |
1886
|
|
|
|
|
|
|
links => $links, |
1887
|
|
|
|
|
|
|
examples => $examples, |
1888
|
|
|
|
|
|
|
tags => $tags, |
1889
|
|
|
|
|
|
|
}; |
1890
|
|
|
|
|
|
|
|
1891
|
|
|
|
|
|
|
CREATE_ARGS_PROP: { |
1892
|
25
|
50
|
|
|
|
46
|
if ($add_args) { |
|
25
|
|
|
|
|
89
|
|
1893
|
25
|
|
|
|
|
167
|
$meta->{args}{$_} = $add_args->{$_} for keys %$add_args; |
1894
|
|
|
|
|
|
|
} |
1895
|
|
|
|
|
|
|
|
1896
|
25
|
50
|
|
|
|
76
|
if ($reads_csv) { |
1897
|
25
|
|
|
|
|
111
|
$meta->{args}{$_} = {%{$argspecs_csv_input{$_}}} for keys %argspecs_csv_input; |
|
125
|
|
|
|
|
557
|
|
1898
|
|
|
|
|
|
|
|
1899
|
25
|
100
|
|
|
|
66
|
if ($reads_multiple_csv) { |
1900
|
3
|
|
|
|
|
6
|
$meta->{args}{input_filenames} = {%{$argspecopt_input_filenames{input_filenames}}}; |
|
3
|
|
|
|
|
23
|
|
1901
|
3
|
|
|
|
|
16
|
_add_arg_pos($meta->{args}, 'input_filenames', 'slurpy'); |
1902
|
3
|
|
|
|
|
9
|
push @$tags, 'reads-multiple-csv'; |
1903
|
|
|
|
|
|
|
} else { |
1904
|
22
|
|
|
|
|
36
|
$meta->{args}{input_filename} = {%{$argspecopt_input_filename{input_filename}}}; |
|
22
|
|
|
|
|
118
|
|
1905
|
22
|
|
|
|
|
77
|
_add_arg_pos($meta->{args}, 'input_filename'); |
1906
|
|
|
|
|
|
|
} |
1907
|
|
|
|
|
|
|
|
1908
|
25
|
|
|
|
|
68
|
push @$tags, 'reads-csv'; |
1909
|
|
|
|
|
|
|
} # if reads_csv |
1910
|
|
|
|
|
|
|
|
1911
|
25
|
100
|
|
|
|
82
|
if ($writes_csv) { |
1912
|
17
|
|
|
|
|
66
|
$meta->{args}{$_} = {%{$argspecs_csv_output{$_}}} for keys %argspecs_csv_output; |
|
119
|
|
|
|
|
499
|
|
1913
|
|
|
|
|
|
|
|
1914
|
17
|
50
|
|
|
|
50
|
if ($reads_csv) { |
1915
|
17
|
|
|
|
|
56
|
$meta->{args}{$_} = {%{$argspecsopt_inplace{$_}}} for keys %argspecsopt_inplace; |
|
34
|
|
|
|
|
185
|
|
1916
|
17
|
|
50
|
|
|
139
|
$meta->{args_rels}{'dep_all&'} //= []; |
1917
|
17
|
|
|
|
|
34
|
push @{ $meta->{args_rels}{'dep_all&'} }, ['inplace_backup_ext', ['inplace']]; |
|
17
|
|
|
|
|
64
|
|
1918
|
17
|
|
50
|
|
|
79
|
$meta->{args_rels}{'choose_one&'} //= []; |
1919
|
17
|
|
|
|
|
29
|
push @{ $meta->{args_rels}{'choose_one&'} }, ['inplace', 'output_filename']; |
|
17
|
|
|
|
|
50
|
|
1920
|
17
|
|
|
|
|
28
|
push @{ $meta->{args_rels}{'choose_one&'} }, ['inplace', 'output_filenames']; |
|
17
|
|
|
|
|
45
|
|
1921
|
|
|
|
|
|
|
} |
1922
|
|
|
|
|
|
|
|
1923
|
17
|
50
|
|
|
|
48
|
if ($writes_multiple_csv) { |
1924
|
0
|
|
|
|
|
0
|
$meta->{args}{output_filenames} = {%{$argspecopt_output_filenames{output_filenames}}}; |
|
0
|
|
|
|
|
0
|
|
1925
|
0
|
|
|
|
|
0
|
_add_arg_pos($meta->{args}, 'output_filenames', 'slurpy'); |
1926
|
0
|
0
|
|
|
|
0
|
if ($reads_csv) { |
1927
|
0
|
|
0
|
|
|
0
|
$meta->{args_rels}{'choose_one&'} //= []; |
1928
|
0
|
|
|
|
|
0
|
push @{ $meta->{args_rels}{'choose_one&'} }, [qw/output_filenames inplace/]; |
|
0
|
|
|
|
|
0
|
|
1929
|
|
|
|
|
|
|
} |
1930
|
0
|
|
|
|
|
0
|
push @$tags, 'writes-multiple-csv'; |
1931
|
|
|
|
|
|
|
} else { |
1932
|
17
|
|
|
|
|
29
|
$meta->{args}{output_filename} = {%{$argspecopt_output_filename{output_filename}}}; |
|
17
|
|
|
|
|
81
|
|
1933
|
17
|
|
|
|
|
75
|
_add_arg_pos($meta->{args}, 'output_filename'); |
1934
|
17
|
50
|
|
|
|
56
|
if ($reads_csv) { |
1935
|
17
|
|
50
|
|
|
47
|
$meta->{args_rels}{'choose_one&'} //= []; |
1936
|
17
|
|
|
|
|
30
|
push @{ $meta->{args_rels}{'choose_one&'} }, [qw/output_filename inplace/]; |
|
17
|
|
|
|
|
88
|
|
1937
|
|
|
|
|
|
|
} |
1938
|
|
|
|
|
|
|
} |
1939
|
|
|
|
|
|
|
|
1940
|
17
|
|
|
|
|
23
|
$meta->{args}{overwrite} = {%{$argspecopt_overwrite{overwrite}}}; |
|
17
|
|
|
|
|
79
|
|
1941
|
17
|
|
50
|
|
|
98
|
$meta->{args_rels}{'dep_any&'} //= []; |
1942
|
17
|
|
|
|
|
54
|
push @{ $meta->{args_rels}{'dep_any&'} }, ['overwrite', ['output_filename', 'output_filenames']]; |
|
17
|
|
|
|
|
67
|
|
1943
|
|
|
|
|
|
|
|
1944
|
17
|
|
|
|
|
51
|
push @$tags, 'writes-csv'; |
1945
|
|
|
|
|
|
|
} # if writes csv |
1946
|
|
|
|
|
|
|
|
1947
|
|
|
|
|
|
|
} # CREATE_ARGS_PROP |
1948
|
|
|
|
|
|
|
|
1949
|
|
|
|
|
|
|
CREATE_ARGS_RELS_PROP: { |
1950
|
25
|
|
|
|
|
42
|
$meta->{args_rels} = {}; |
|
25
|
|
|
|
|
81
|
|
1951
|
25
|
100
|
|
|
|
59
|
if ($add_args_rels) { |
1952
|
3
|
|
|
|
|
17
|
$meta->{args_rels}{$_} = $add_args_rels->{$_} for keys %$add_args_rels; |
1953
|
|
|
|
|
|
|
} |
1954
|
|
|
|
|
|
|
} # CREATE_ARGS_RELS_PROP |
1955
|
|
|
|
|
|
|
|
1956
|
25
|
50
|
|
|
|
74
|
if ($add_meta_props) { |
1957
|
0
|
|
|
|
|
0
|
$meta->{$_} = $add_meta_props->{$_} for keys %$add_meta_props; |
1958
|
|
|
|
|
|
|
} |
1959
|
|
|
|
|
|
|
|
1960
|
|
|
|
|
|
|
} # CREATE_META |
1961
|
|
|
|
|
|
|
|
1962
|
|
|
|
|
|
|
{ |
1963
|
25
|
|
|
|
|
41
|
my $package = caller(); |
|
25
|
|
|
|
|
42
|
|
|
25
|
|
|
|
|
96
|
|
1964
|
1
|
|
|
1
|
|
9
|
no strict 'refs'; ## no critic: TestingAndDebugging::ProhibitNoStrict |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
336
|
|
1965
|
25
|
|
|
|
|
508
|
*{"$package\::$name"} = $code; |
|
25
|
|
|
|
|
154
|
|
1966
|
|
|
|
|
|
|
#use DD; dd $meta; |
1967
|
25
|
|
|
|
|
41
|
${"$package\::SPEC"}{$name} = $meta; |
|
25
|
|
|
|
|
150
|
|
1968
|
|
|
|
|
|
|
} |
1969
|
|
|
|
|
|
|
|
1970
|
25
|
|
|
|
|
89
|
1; |
1971
|
|
|
|
|
|
|
} |
1972
|
|
|
|
|
|
|
|
1973
|
|
|
|
|
|
|
1; |
1974
|
|
|
|
|
|
|
# ABSTRACT: CLI utilities related to CSV |
1975
|
|
|
|
|
|
|
|
1976
|
|
|
|
|
|
|
__END__ |
1977
|
|
|
|
|
|
|
|
1978
|
|
|
|
|
|
|
=pod |
1979
|
|
|
|
|
|
|
|
1980
|
|
|
|
|
|
|
=encoding UTF-8 |
1981
|
|
|
|
|
|
|
|
1982
|
|
|
|
|
|
|
=head1 NAME |
1983
|
|
|
|
|
|
|
|
1984
|
|
|
|
|
|
|
App::CSVUtils - CLI utilities related to CSV |
1985
|
|
|
|
|
|
|
|
1986
|
|
|
|
|
|
|
=head1 VERSION |
1987
|
|
|
|
|
|
|
|
1988
|
|
|
|
|
|
|
This document describes version 1.030 of App::CSVUtils (from Perl distribution App-CSVUtils), released on 2023-07-25. |
1989
|
|
|
|
|
|
|
|
1990
|
|
|
|
|
|
|
=head1 DESCRIPTION |
1991
|
|
|
|
|
|
|
|
1992
|
|
|
|
|
|
|
This distribution contains the following CLI utilities: |
1993
|
|
|
|
|
|
|
|
1994
|
|
|
|
|
|
|
=over |
1995
|
|
|
|
|
|
|
|
1996
|
|
|
|
|
|
|
=item 1. L<csv-add-fields> |
1997
|
|
|
|
|
|
|
|
1998
|
|
|
|
|
|
|
=item 2. L<csv-avg> |
1999
|
|
|
|
|
|
|
|
2000
|
|
|
|
|
|
|
=item 3. L<csv-check-cell-values> |
2001
|
|
|
|
|
|
|
|
2002
|
|
|
|
|
|
|
=item 4. L<csv-check-field-names> |
2003
|
|
|
|
|
|
|
|
2004
|
|
|
|
|
|
|
=item 5. L<csv-check-field-values> |
2005
|
|
|
|
|
|
|
|
2006
|
|
|
|
|
|
|
=item 6. L<csv-check-rows> |
2007
|
|
|
|
|
|
|
|
2008
|
|
|
|
|
|
|
=item 7. L<csv-cmp> |
2009
|
|
|
|
|
|
|
|
2010
|
|
|
|
|
|
|
=item 8. L<csv-concat> |
2011
|
|
|
|
|
|
|
|
2012
|
|
|
|
|
|
|
=item 9. L<csv-convert-to-hash> |
2013
|
|
|
|
|
|
|
|
2014
|
|
|
|
|
|
|
=item 10. L<csv-csv> |
2015
|
|
|
|
|
|
|
|
2016
|
|
|
|
|
|
|
=item 11. L<csv-delete-fields> |
2017
|
|
|
|
|
|
|
|
2018
|
|
|
|
|
|
|
=item 12. L<csv-dump> |
2019
|
|
|
|
|
|
|
|
2020
|
|
|
|
|
|
|
=item 13. L<csv-each-row> |
2021
|
|
|
|
|
|
|
|
2022
|
|
|
|
|
|
|
=item 14. L<csv-fill-cells> |
2023
|
|
|
|
|
|
|
|
2024
|
|
|
|
|
|
|
=item 15. L<csv-fill-template> |
2025
|
|
|
|
|
|
|
|
2026
|
|
|
|
|
|
|
=item 16. L<csv-find-values> |
2027
|
|
|
|
|
|
|
|
2028
|
|
|
|
|
|
|
=item 17. L<csv-freqtable> |
2029
|
|
|
|
|
|
|
|
2030
|
|
|
|
|
|
|
=item 18. L<csv-gen> |
2031
|
|
|
|
|
|
|
|
2032
|
|
|
|
|
|
|
=item 19. L<csv-get-cells> |
2033
|
|
|
|
|
|
|
|
2034
|
|
|
|
|
|
|
=item 20. L<csv-grep> |
2035
|
|
|
|
|
|
|
|
2036
|
|
|
|
|
|
|
=item 21. L<csv-info> |
2037
|
|
|
|
|
|
|
|
2038
|
|
|
|
|
|
|
=item 22. L<csv-intrange> |
2039
|
|
|
|
|
|
|
|
2040
|
|
|
|
|
|
|
=item 23. L<csv-list-field-names> |
2041
|
|
|
|
|
|
|
|
2042
|
|
|
|
|
|
|
=item 24. L<csv-lookup-fields> |
2043
|
|
|
|
|
|
|
|
2044
|
|
|
|
|
|
|
=item 25. L<csv-ltrim> |
2045
|
|
|
|
|
|
|
|
2046
|
|
|
|
|
|
|
=item 26. L<csv-map> |
2047
|
|
|
|
|
|
|
|
2048
|
|
|
|
|
|
|
=item 27. L<csv-munge-field> |
2049
|
|
|
|
|
|
|
|
2050
|
|
|
|
|
|
|
=item 28. L<csv-munge-rows> |
2051
|
|
|
|
|
|
|
|
2052
|
|
|
|
|
|
|
=item 29. L<csv-pick> |
2053
|
|
|
|
|
|
|
|
2054
|
|
|
|
|
|
|
=item 30. L<csv-pick-cells> |
2055
|
|
|
|
|
|
|
|
2056
|
|
|
|
|
|
|
=item 31. L<csv-pick-fields> |
2057
|
|
|
|
|
|
|
|
2058
|
|
|
|
|
|
|
=item 32. L<csv-pick-rows> |
2059
|
|
|
|
|
|
|
|
2060
|
|
|
|
|
|
|
=item 33. L<csv-quote> |
2061
|
|
|
|
|
|
|
|
2062
|
|
|
|
|
|
|
=item 34. L<csv-replace-newline> |
2063
|
|
|
|
|
|
|
|
2064
|
|
|
|
|
|
|
=item 35. L<csv-rtrim> |
2065
|
|
|
|
|
|
|
|
2066
|
|
|
|
|
|
|
=item 36. L<csv-select-fields> |
2067
|
|
|
|
|
|
|
|
2068
|
|
|
|
|
|
|
=item 37. L<csv-select-rows> |
2069
|
|
|
|
|
|
|
|
2070
|
|
|
|
|
|
|
=item 38. L<csv-setop> |
2071
|
|
|
|
|
|
|
|
2072
|
|
|
|
|
|
|
=item 39. L<csv-shuf> |
2073
|
|
|
|
|
|
|
|
2074
|
|
|
|
|
|
|
=item 40. L<csv-shuf-fields> |
2075
|
|
|
|
|
|
|
|
2076
|
|
|
|
|
|
|
=item 41. L<csv-shuf-rows> |
2077
|
|
|
|
|
|
|
|
2078
|
|
|
|
|
|
|
=item 42. L<csv-sort> |
2079
|
|
|
|
|
|
|
|
2080
|
|
|
|
|
|
|
=item 43. L<csv-sort-fields> |
2081
|
|
|
|
|
|
|
|
2082
|
|
|
|
|
|
|
=item 44. L<csv-sort-rows> |
2083
|
|
|
|
|
|
|
|
2084
|
|
|
|
|
|
|
=item 45. L<csv-sorted> |
2085
|
|
|
|
|
|
|
|
2086
|
|
|
|
|
|
|
=item 46. L<csv-sorted-fields> |
2087
|
|
|
|
|
|
|
|
2088
|
|
|
|
|
|
|
=item 47. L<csv-sorted-rows> |
2089
|
|
|
|
|
|
|
|
2090
|
|
|
|
|
|
|
=item 48. L<csv-split> |
2091
|
|
|
|
|
|
|
|
2092
|
|
|
|
|
|
|
=item 49. L<csv-sum> |
2093
|
|
|
|
|
|
|
|
2094
|
|
|
|
|
|
|
=item 50. L<csv-transpose> |
2095
|
|
|
|
|
|
|
|
2096
|
|
|
|
|
|
|
=item 51. L<csv-trim> |
2097
|
|
|
|
|
|
|
|
2098
|
|
|
|
|
|
|
=item 52. L<csv-uniq> |
2099
|
|
|
|
|
|
|
|
2100
|
|
|
|
|
|
|
=item 53. L<csv-unquote> |
2101
|
|
|
|
|
|
|
|
2102
|
|
|
|
|
|
|
=item 54. L<csv2ltsv> |
2103
|
|
|
|
|
|
|
|
2104
|
|
|
|
|
|
|
=item 55. L<csv2paras> |
2105
|
|
|
|
|
|
|
|
2106
|
|
|
|
|
|
|
=item 56. L<csv2td> |
2107
|
|
|
|
|
|
|
|
2108
|
|
|
|
|
|
|
=item 57. L<csv2tsv> |
2109
|
|
|
|
|
|
|
|
2110
|
|
|
|
|
|
|
=item 58. L<csv2vcf> |
2111
|
|
|
|
|
|
|
|
2112
|
|
|
|
|
|
|
=item 59. L<list-csvutils> |
2113
|
|
|
|
|
|
|
|
2114
|
|
|
|
|
|
|
=item 60. L<paras2csv> |
2115
|
|
|
|
|
|
|
|
2116
|
|
|
|
|
|
|
=item 61. L<tsv2csv> |
2117
|
|
|
|
|
|
|
|
2118
|
|
|
|
|
|
|
=back |
2119
|
|
|
|
|
|
|
|
2120
|
|
|
|
|
|
|
=head1 FUNCTIONS |
2121
|
|
|
|
|
|
|
|
2122
|
|
|
|
|
|
|
|
2123
|
|
|
|
|
|
|
=head2 gen_csv_util |
2124
|
|
|
|
|
|
|
|
2125
|
|
|
|
|
|
|
Usage: |
2126
|
|
|
|
|
|
|
|
2127
|
|
|
|
|
|
|
gen_csv_util(%args) -> bool |
2128
|
|
|
|
|
|
|
|
2129
|
|
|
|
|
|
|
Generate a CSV utility. |
2130
|
|
|
|
|
|
|
|
2131
|
|
|
|
|
|
|
This routine is used to generate a CSV utility in the form of a L<Rinci> |
2132
|
|
|
|
|
|
|
function (code and metadata). You can then produce a CLI from the Rinci function |
2133
|
|
|
|
|
|
|
simply using L<Perinci::CmdLine::Gen> or, if you use L<Dist::Zilla>, |
2134
|
|
|
|
|
|
|
L<Dist::Zilla::Plugin::GenPericmdScript> or, if on the command-line, |
2135
|
|
|
|
|
|
|
L<gen-pericmd-script>. |
2136
|
|
|
|
|
|
|
|
2137
|
|
|
|
|
|
|
Using this routine, by providing just one or a few hooks and setting some |
2138
|
|
|
|
|
|
|
parameters like a couple of extra arguments, you will get a complete CLI with |
2139
|
|
|
|
|
|
|
decent POD/manpage, ability to read one or multiple CSV's and write one or |
2140
|
|
|
|
|
|
|
multiple CSV's, some command-line options to customize how the input CSV's |
2141
|
|
|
|
|
|
|
should be parsed and how the output CSV's should be formatted and named. Your |
2142
|
|
|
|
|
|
|
CLI also has tab completion, usage and help message, and other features. |
2143
|
|
|
|
|
|
|
|
2144
|
|
|
|
|
|
|
To create a CSV utility, you specify a C<name> (e.g. C<csv_dump>; must be a valid |
2145
|
|
|
|
|
|
|
unqualified Perl identifier/function name) and optionally C<summary>, |
2146
|
|
|
|
|
|
|
C<description>, and other metadata like C<links> or even C<add_meta_props>. Then |
2147
|
|
|
|
|
|
|
you specify one or more of C<on_*> or C<before_*> or C<after_*> arguments to supply |
2148
|
|
|
|
|
|
|
handlers (coderefs) for your CSV utility at various hook points. |
2149
|
|
|
|
|
|
|
|
2150
|
|
|
|
|
|
|
I<THE HOOKS> |
2151
|
|
|
|
|
|
|
|
2152
|
|
|
|
|
|
|
All code for hooks should accept a single argument C<r>. C<r> is a stash (hashref) |
2153
|
|
|
|
|
|
|
of various data, the keys of which will depend on which hook point being called. |
2154
|
|
|
|
|
|
|
You can also add more keys to store data or for flow control (see hook |
2155
|
|
|
|
|
|
|
documentation below for more details). |
2156
|
|
|
|
|
|
|
|
2157
|
|
|
|
|
|
|
The order of the hooks, in processing chronological order: |
2158
|
|
|
|
|
|
|
|
2159
|
|
|
|
|
|
|
=over |
2160
|
|
|
|
|
|
|
|
2161
|
|
|
|
|
|
|
=item * on_begin |
2162
|
|
|
|
|
|
|
|
2163
|
|
|
|
|
|
|
Called when utility begins, before reading CSV. You can use this hook e.g. to |
2164
|
|
|
|
|
|
|
process arguments, set output filenames (if you allow custom output |
2165
|
|
|
|
|
|
|
filenames). |
2166
|
|
|
|
|
|
|
|
2167
|
|
|
|
|
|
|
=item * before_read_input |
2168
|
|
|
|
|
|
|
|
2169
|
|
|
|
|
|
|
Called before opening any input CSV file. This hook is I<still> called even if |
2170
|
|
|
|
|
|
|
your utility sets C<reads_csv> to false. |
2171
|
|
|
|
|
|
|
|
2172
|
|
|
|
|
|
|
At this point, the C<input_filenames> stash key (as well as other keys like |
2173
|
|
|
|
|
|
|
C<input_filename>, C<input_filenum>, etc) has not been set. You can use this |
2174
|
|
|
|
|
|
|
hook e.g. to set a custom C<input_filenames>. |
2175
|
|
|
|
|
|
|
|
2176
|
|
|
|
|
|
|
=item * before_open_input_files |
2177
|
|
|
|
|
|
|
|
2178
|
|
|
|
|
|
|
Called before an input CSV file is about to be opened, including for stdin |
2179
|
|
|
|
|
|
|
(C<->). You can use this hook e.g. to check/preprocess input file. Flow control |
2180
|
|
|
|
|
|
|
is available by setting C<< $r-E<gt>{wants_skip_files} >> to skip reading all the input |
2181
|
|
|
|
|
|
|
file and go directly to the C<after_read_input> hook. |
2182
|
|
|
|
|
|
|
|
2183
|
|
|
|
|
|
|
=item * before_open_input_file |
2184
|
|
|
|
|
|
|
|
2185
|
|
|
|
|
|
|
Called before an input CSV file is about to be opened, including for stdin |
2186
|
|
|
|
|
|
|
(C<->). For the first file, called after C<before_open_input_file> hook. You can |
2187
|
|
|
|
|
|
|
use this hook e.g. to check/preprocess input file. Flow control is available |
2188
|
|
|
|
|
|
|
by setting C<< $r-E<gt>{wants_skip_file} >> to skip reading a single input file and go |
2189
|
|
|
|
|
|
|
to the next file, or C<< $r-E<gt>{wants_skip_files} >> to skip reading the rest of the |
2190
|
|
|
|
|
|
|
files and go directly to the C<after_read_input> hook. |
2191
|
|
|
|
|
|
|
|
2192
|
|
|
|
|
|
|
=item * on_input_header_row |
2193
|
|
|
|
|
|
|
|
2194
|
|
|
|
|
|
|
Called when receiving header row. Will be called for every input file, and |
2195
|
|
|
|
|
|
|
called even when user specify C<--no-input-header>, in which case the header |
2196
|
|
|
|
|
|
|
row will be the generated C<["field1", "field2", ...]>. You can use this hook |
2197
|
|
|
|
|
|
|
e.g. to add/remove/rearrange fields. |
2198
|
|
|
|
|
|
|
|
2199
|
|
|
|
|
|
|
You can set C<< $r-E<gt>{wants_fill_rows} >> to a defined false if you do not want |
2200
|
|
|
|
|
|
|
C<< $r-E<gt>{input_rows} >> to be filled with empty string elements when it contains |
2201
|
|
|
|
|
|
|
less than the number of fields (in case of sparse values at the end). Normally |
2202
|
|
|
|
|
|
|
you only want to do this when you want to do checking, e.g. in |
2203
|
|
|
|
|
|
|
L<csv-check-rows>. |
2204
|
|
|
|
|
|
|
|
2205
|
|
|
|
|
|
|
=item * on_input_data_row |
2206
|
|
|
|
|
|
|
|
2207
|
|
|
|
|
|
|
Called when receiving each data row. You can use this hook e.g. to modify the |
2208
|
|
|
|
|
|
|
row or print output (for line-by-line transformation or filtering). |
2209
|
|
|
|
|
|
|
|
2210
|
|
|
|
|
|
|
=item * after_close_input_file |
2211
|
|
|
|
|
|
|
|
2212
|
|
|
|
|
|
|
Called after each input file is closed, including for stdin (C<->) (although |
2213
|
|
|
|
|
|
|
for stdin, the handle is not actually closed). Flow control is possible by |
2214
|
|
|
|
|
|
|
setting C<< $r-E<gt>{wants_skip_files} >> to skip reading the rest of the files and go |
2215
|
|
|
|
|
|
|
straight to the C<after_close_input_files> hook. |
2216
|
|
|
|
|
|
|
|
2217
|
|
|
|
|
|
|
=item * after_close_input_files |
2218
|
|
|
|
|
|
|
|
2219
|
|
|
|
|
|
|
Called after the last input file is closed, after the last |
2220
|
|
|
|
|
|
|
C<after_close_input_file> hook, including for stdin (C<->) (although for stdin, |
2221
|
|
|
|
|
|
|
the handle is not actually closed). |
2222
|
|
|
|
|
|
|
|
2223
|
|
|
|
|
|
|
=item * after_read_input |
2224
|
|
|
|
|
|
|
|
2225
|
|
|
|
|
|
|
Called after the last row of the last CSV file is read and the last file is |
2226
|
|
|
|
|
|
|
closed. This hook is I<still> called, if you set C<reads_csv> option to false. |
2227
|
|
|
|
|
|
|
At this point the stash keys related to CSV reading have all been cleared, |
2228
|
|
|
|
|
|
|
including C<input_filenames>, C<input_filename>, C<input_fh>, etc. |
2229
|
|
|
|
|
|
|
|
2230
|
|
|
|
|
|
|
You can use this hook e.g. to print output if you buffer the output. |
2231
|
|
|
|
|
|
|
|
2232
|
|
|
|
|
|
|
=item * on_end |
2233
|
|
|
|
|
|
|
|
2234
|
|
|
|
|
|
|
Called when utility is about to exit. You can use this hook e.g. to return the |
2235
|
|
|
|
|
|
|
final result. |
2236
|
|
|
|
|
|
|
|
2237
|
|
|
|
|
|
|
=back |
2238
|
|
|
|
|
|
|
|
2239
|
|
|
|
|
|
|
I<THE STASH> |
2240
|
|
|
|
|
|
|
|
2241
|
|
|
|
|
|
|
The common keys that C<r> will contain: |
2242
|
|
|
|
|
|
|
|
2243
|
|
|
|
|
|
|
=over |
2244
|
|
|
|
|
|
|
|
2245
|
|
|
|
|
|
|
=item * C<gen_args>, hash. The arguments used to generate the CSV utility. |
2246
|
|
|
|
|
|
|
|
2247
|
|
|
|
|
|
|
=item * C<util_args>, hash. The arguments that your CSV utility accepts. Parsed from |
2248
|
|
|
|
|
|
|
command-line arguments (or configuration files, or environment variables). |
2249
|
|
|
|
|
|
|
|
2250
|
|
|
|
|
|
|
=item * C<name>, str. The name of the CSV utility. Which can also be retrieved via |
2251
|
|
|
|
|
|
|
C<gen_args>. |
2252
|
|
|
|
|
|
|
|
2253
|
|
|
|
|
|
|
=item * C<code_print>, coderef. Routine provided for you to print something. Accepts a |
2254
|
|
|
|
|
|
|
string. Takes care of opening the output files for you. |
2255
|
|
|
|
|
|
|
|
2256
|
|
|
|
|
|
|
=item * C<code_print_row>, coderef. Routine provided for you to print a data row. You |
2257
|
|
|
|
|
|
|
pass the row (either arrayref or hashref). Takes care of opening the output |
2258
|
|
|
|
|
|
|
files for you, as well as printing header row the first time, if needed. |
2259
|
|
|
|
|
|
|
|
2260
|
|
|
|
|
|
|
=item * C<code_print_header_row>, coderef. Routine provided for you to print header |
2261
|
|
|
|
|
|
|
row. You don't need to pass any arguments. Will only print the header row once |
2262
|
|
|
|
|
|
|
per output file if output header is enabled, even if called multiple times. |
2263
|
|
|
|
|
|
|
|
2264
|
|
|
|
|
|
|
=back |
2265
|
|
|
|
|
|
|
|
2266
|
|
|
|
|
|
|
If you are accepting CSV data (C<reads_csv> gen argument set to true), the |
2267
|
|
|
|
|
|
|
following keys will also be available (in C<on_input_header_row> and |
2268
|
|
|
|
|
|
|
C<on_input_data_row> hooks): |
2269
|
|
|
|
|
|
|
|
2270
|
|
|
|
|
|
|
=over |
2271
|
|
|
|
|
|
|
|
2272
|
|
|
|
|
|
|
=item * C<input_parser>, a L<Text::CSV_XS> instance for input parsing. |
2273
|
|
|
|
|
|
|
|
2274
|
|
|
|
|
|
|
=item * C<input_fields>, array of str. Input CSV's field names. |
2275
|
|
|
|
|
|
|
|
2276
|
|
|
|
|
|
|
=item * C<input_fields_idx>, hash with field name as keys and field index (0-based |
2277
|
|
|
|
|
|
|
integer) as values. |
2278
|
|
|
|
|
|
|
|
2279
|
|
|
|
|
|
|
=item * C<input_filenames>, array of str. |
2280
|
|
|
|
|
|
|
|
2281
|
|
|
|
|
|
|
=item * C<input_filename>, str. The name of the current input file being read (C<-> if |
2282
|
|
|
|
|
|
|
reading from stdin). |
2283
|
|
|
|
|
|
|
|
2284
|
|
|
|
|
|
|
=item * C<input_filenum>, uint. The number of the current input file, 1 being the first |
2285
|
|
|
|
|
|
|
file, 2 for the second, and so on. |
2286
|
|
|
|
|
|
|
|
2287
|
|
|
|
|
|
|
=item * C<input_fh>, the handle to the current file being read. |
2288
|
|
|
|
|
|
|
|
2289
|
|
|
|
|
|
|
=item * C<input_rownum>, uint. The number of rows that have been read (reset after each |
2290
|
|
|
|
|
|
|
input file). In C<on_input_header_row> phase, this will be 1 since header row |
2291
|
|
|
|
|
|
|
(including the generated one) is the first row. Then in C<on_input_data_row> |
2292
|
|
|
|
|
|
|
phase (called the first time for a file), it will be 2 for the first data row, |
2293
|
|
|
|
|
|
|
even if physically it is the first row for CSV file that does not have a |
2294
|
|
|
|
|
|
|
header. |
2295
|
|
|
|
|
|
|
|
2296
|
|
|
|
|
|
|
=item * C<input_data_rownum>, uint. The number of data rows that have been read (reset |
2297
|
|
|
|
|
|
|
after each input file). This will be equal to C<input_rownum> less 1 if input |
2298
|
|
|
|
|
|
|
file has header. |
2299
|
|
|
|
|
|
|
|
2300
|
|
|
|
|
|
|
=item * C<input_row>, aos (array of str). The current input CSV row as an arrayref. |
2301
|
|
|
|
|
|
|
|
2302
|
|
|
|
|
|
|
=item * C<input_row_as_hashref>, hos (hash of str). The current input CSV row as a |
2303
|
|
|
|
|
|
|
hashref, with field names as hash keys and field values as hash values. This |
2304
|
|
|
|
|
|
|
will only be calculated if utility wants it. Utility can express so by setting |
2305
|
|
|
|
|
|
|
C<< $r-E<gt>{wants_input_row_as_hashref} >> to true, e.g. in the C<on_begin> hook. |
2306
|
|
|
|
|
|
|
|
2307
|
|
|
|
|
|
|
=item * C<input_header_row_count>, uint. Contains the number of actual header rows that |
2308
|
|
|
|
|
|
|
have been read. If CLI user specifies C<--no-input-header>, this will stay at |
2309
|
|
|
|
|
|
|
zero. Will be reset for each CSV file. |
2310
|
|
|
|
|
|
|
|
2311
|
|
|
|
|
|
|
=item * C<input_data_row_count>, int. Contains the number of actual data rows that have |
2312
|
|
|
|
|
|
|
read. Will be reset for each CSV file. |
2313
|
|
|
|
|
|
|
|
2314
|
|
|
|
|
|
|
=back |
2315
|
|
|
|
|
|
|
|
2316
|
|
|
|
|
|
|
If you are outputting CSV (C<writes_csv> gen argument set to true), the following |
2317
|
|
|
|
|
|
|
keys will be available: |
2318
|
|
|
|
|
|
|
|
2319
|
|
|
|
|
|
|
=over |
2320
|
|
|
|
|
|
|
|
2321
|
|
|
|
|
|
|
=item * C<output_emitter>, a L<Text::CSV_XS> instance for output. |
2322
|
|
|
|
|
|
|
|
2323
|
|
|
|
|
|
|
=item * C<output_fields>, array of str. Should be set to list of output field names. If |
2324
|
|
|
|
|
|
|
unset, will be set to be the same as C<input_fields>. |
2325
|
|
|
|
|
|
|
|
2326
|
|
|
|
|
|
|
=item * C<output_fields_idx>, hash with field names as keys and field indexes (0-based |
2327
|
|
|
|
|
|
|
integer) as values. Normally you do not need to set this manually; you just |
2328
|
|
|
|
|
|
|
need to set C<output_fields> and this hash will be computed automatically for |
2329
|
|
|
|
|
|
|
you just before the first output row is outputted. |
2330
|
|
|
|
|
|
|
|
2331
|
|
|
|
|
|
|
=item * C<output_filenames>, array of str. |
2332
|
|
|
|
|
|
|
|
2333
|
|
|
|
|
|
|
=item * C<output_filename>, str, name of current output file. |
2334
|
|
|
|
|
|
|
|
2335
|
|
|
|
|
|
|
=item * C<output_filenum>, uint, the number of the current output file, 1 being the |
2336
|
|
|
|
|
|
|
first file, 2 for the second, and so on. |
2337
|
|
|
|
|
|
|
|
2338
|
|
|
|
|
|
|
=item * C<output_fh>, handle to the current output file. |
2339
|
|
|
|
|
|
|
|
2340
|
|
|
|
|
|
|
=item * C<output_rownum>, uint. The number of rows that have been outputted (reset |
2341
|
|
|
|
|
|
|
after each output file). |
2342
|
|
|
|
|
|
|
|
2343
|
|
|
|
|
|
|
=item * C<output_data_rownum>, uint. The number of data rows that have been outputted |
2344
|
|
|
|
|
|
|
(reset after each output file). This will be equal to C<input_rownum> less 1 if |
2345
|
|
|
|
|
|
|
input file has header. |
2346
|
|
|
|
|
|
|
|
2347
|
|
|
|
|
|
|
=back |
2348
|
|
|
|
|
|
|
|
2349
|
|
|
|
|
|
|
For other hook-specific keys, see the documentation for associated hook point. |
2350
|
|
|
|
|
|
|
|
2351
|
|
|
|
|
|
|
I<ACCEPTING ADDITIONAL COMMAND-LINE OPTIONS/ARGUMENTS> |
2352
|
|
|
|
|
|
|
|
2353
|
|
|
|
|
|
|
As mentioned above, you will get additional command-line options/arguments in |
2354
|
|
|
|
|
|
|
C<< $r-E<gt>{util_args} >> hashref. Some options/arguments are already added by |
2355
|
|
|
|
|
|
|
C<gen_csv_util>, e.g. C<input_filename> or C<input_filenames> along with |
2356
|
|
|
|
|
|
|
C<input_sep_char>, etc (when your utility declares C<reads_csv>), |
2357
|
|
|
|
|
|
|
C<output_filename> or C<output_filenames> along with C<overwrite>, |
2358
|
|
|
|
|
|
|
C<output_sep_char>, etc (when your utility declares C<writes_csv>). |
2359
|
|
|
|
|
|
|
|
2360
|
|
|
|
|
|
|
If you want to accept additional arguments/options, you specify them in |
2361
|
|
|
|
|
|
|
C<add_args> (hashref, with key being Each option/argument has to be specified |
2362
|
|
|
|
|
|
|
first via C<add_args> (as hashref, with key being argument name and value the |
2363
|
|
|
|
|
|
|
argument specification as defined in L<Rinci::function>)). Some argument |
2364
|
|
|
|
|
|
|
specifications have been defined in L<App::CSVUtils> and can be used. See |
2365
|
|
|
|
|
|
|
existing utilities for examples. |
2366
|
|
|
|
|
|
|
|
2367
|
|
|
|
|
|
|
I<READING CSV DATA> |
2368
|
|
|
|
|
|
|
|
2369
|
|
|
|
|
|
|
To read CSV data, normally your utility would provide handler for the |
2370
|
|
|
|
|
|
|
C<on_input_data_row> hook and sometimes additionally C<on_input_header_row>. |
2371
|
|
|
|
|
|
|
|
2372
|
|
|
|
|
|
|
I<OUTPUTTING STRING OR RETURNING RESULT> |
2373
|
|
|
|
|
|
|
|
2374
|
|
|
|
|
|
|
To output string, usually you call the provided routine C<< $r-E<gt>{code_print} >>. This |
2375
|
|
|
|
|
|
|
routine will open the output files for you. |
2376
|
|
|
|
|
|
|
|
2377
|
|
|
|
|
|
|
You can also return enveloped result directly by setting C<< $r-E<gt>{result} >>. |
2378
|
|
|
|
|
|
|
|
2379
|
|
|
|
|
|
|
I<OUTPUTTING CSV DATA> |
2380
|
|
|
|
|
|
|
|
2381
|
|
|
|
|
|
|
To output CSV data, usually you call the provided routine C<< $r-E<gt>{code_print_row} >>. |
2382
|
|
|
|
|
|
|
This routine accepts a row (arrayref or hashref). This routine will open the |
2383
|
|
|
|
|
|
|
output files for you when needed, as well as print header row automatically. |
2384
|
|
|
|
|
|
|
|
2385
|
|
|
|
|
|
|
You can also buffer rows from input to e.g. C<< $r-E<gt>{output_rows} >>, then call |
2386
|
|
|
|
|
|
|
C<< $r-E<gt>{code_print_row} >> repeatedly in the C<after_read_input> hook to print all the |
2387
|
|
|
|
|
|
|
rows. |
2388
|
|
|
|
|
|
|
|
2389
|
|
|
|
|
|
|
I<READING MULTIPLE CSV FILES> |
2390
|
|
|
|
|
|
|
|
2391
|
|
|
|
|
|
|
To read multiple CSV files, you first specify C<reads_multiple_csv>. Then, you |
2392
|
|
|
|
|
|
|
can supply handler for C<on_input_header_row> and C<on_input_data_row> as usual. |
2393
|
|
|
|
|
|
|
If you want to do something before/after each input file, you can also supply |
2394
|
|
|
|
|
|
|
handler for C<before_open_input_file> or C<after_close_input_file>. |
2395
|
|
|
|
|
|
|
|
2396
|
|
|
|
|
|
|
I<WRITING TO MULTIPLE CSV FILES> |
2397
|
|
|
|
|
|
|
|
2398
|
|
|
|
|
|
|
Similarly, to write to many CSv files, you first specify C<writes_multiple_csv>. |
2399
|
|
|
|
|
|
|
Then, you can supply handler for C<on_input_header_row> and C<on_input_data_row> |
2400
|
|
|
|
|
|
|
as usual. To switch to the next file, set |
2401
|
|
|
|
|
|
|
C<< $r-E<gt>{wants_switch_to_next_output_file} >> to true, in which case the next call to |
2402
|
|
|
|
|
|
|
C<< $r-E<gt>{code_print_row} >> will close the current file and open the next file. |
2403
|
|
|
|
|
|
|
|
2404
|
|
|
|
|
|
|
I<CHANGING THE OUTPUT FIELDS> |
2405
|
|
|
|
|
|
|
|
2406
|
|
|
|
|
|
|
When calling C<< $r-E<gt>{code_print_row} >>, you can output whatever fields you want. By |
2407
|
|
|
|
|
|
|
convention, you can set C<< $r-E<gt>{output_fields} >> and C<< $r-E<gt>{output_fields_idx} >> to |
2408
|
|
|
|
|
|
|
let other handlers know about the output fields. For example, see the |
2409
|
|
|
|
|
|
|
implementation of L<csv-concat>. |
2410
|
|
|
|
|
|
|
|
2411
|
|
|
|
|
|
|
This function is not exported by default, but exportable. |
2412
|
|
|
|
|
|
|
|
2413
|
|
|
|
|
|
|
Arguments ('*' denotes required arguments): |
2414
|
|
|
|
|
|
|
|
2415
|
|
|
|
|
|
|
=over 4 |
2416
|
|
|
|
|
|
|
|
2417
|
|
|
|
|
|
|
=item * B<add_args> => I<hash> |
2418
|
|
|
|
|
|
|
|
2419
|
|
|
|
|
|
|
(No description) |
2420
|
|
|
|
|
|
|
|
2421
|
|
|
|
|
|
|
=item * B<add_args_rels> => I<hash> |
2422
|
|
|
|
|
|
|
|
2423
|
|
|
|
|
|
|
(No description) |
2424
|
|
|
|
|
|
|
|
2425
|
|
|
|
|
|
|
=item * B<add_meta_props> => I<hash> |
2426
|
|
|
|
|
|
|
|
2427
|
|
|
|
|
|
|
Add additional Rinci function metadata properties. |
2428
|
|
|
|
|
|
|
|
2429
|
|
|
|
|
|
|
=item * B<after_close_input_file> => I<code> |
2430
|
|
|
|
|
|
|
|
2431
|
|
|
|
|
|
|
(No description) |
2432
|
|
|
|
|
|
|
|
2433
|
|
|
|
|
|
|
=item * B<after_close_input_files> => I<code> |
2434
|
|
|
|
|
|
|
|
2435
|
|
|
|
|
|
|
(No description) |
2436
|
|
|
|
|
|
|
|
2437
|
|
|
|
|
|
|
=item * B<after_read_input> => I<code> |
2438
|
|
|
|
|
|
|
|
2439
|
|
|
|
|
|
|
(No description) |
2440
|
|
|
|
|
|
|
|
2441
|
|
|
|
|
|
|
=item * B<before_open_input_file> => I<code> |
2442
|
|
|
|
|
|
|
|
2443
|
|
|
|
|
|
|
(No description) |
2444
|
|
|
|
|
|
|
|
2445
|
|
|
|
|
|
|
=item * B<before_open_input_files> => I<code> |
2446
|
|
|
|
|
|
|
|
2447
|
|
|
|
|
|
|
(No description) |
2448
|
|
|
|
|
|
|
|
2449
|
|
|
|
|
|
|
=item * B<before_read_input> => I<code> |
2450
|
|
|
|
|
|
|
|
2451
|
|
|
|
|
|
|
(No description) |
2452
|
|
|
|
|
|
|
|
2453
|
|
|
|
|
|
|
=item * B<description> => I<str> |
2454
|
|
|
|
|
|
|
|
2455
|
|
|
|
|
|
|
(No description) |
2456
|
|
|
|
|
|
|
|
2457
|
|
|
|
|
|
|
=item * B<examples> => I<array> |
2458
|
|
|
|
|
|
|
|
2459
|
|
|
|
|
|
|
(No description) |
2460
|
|
|
|
|
|
|
|
2461
|
|
|
|
|
|
|
=item * B<links> => I<array[hash]> |
2462
|
|
|
|
|
|
|
|
2463
|
|
|
|
|
|
|
(No description) |
2464
|
|
|
|
|
|
|
|
2465
|
|
|
|
|
|
|
=item * B<name>* => I<perl::identifier::unqualified_ascii> |
2466
|
|
|
|
|
|
|
|
2467
|
|
|
|
|
|
|
(No description) |
2468
|
|
|
|
|
|
|
|
2469
|
|
|
|
|
|
|
=item * B<on_begin> => I<code> |
2470
|
|
|
|
|
|
|
|
2471
|
|
|
|
|
|
|
(No description) |
2472
|
|
|
|
|
|
|
|
2473
|
|
|
|
|
|
|
=item * B<on_end> => I<code> |
2474
|
|
|
|
|
|
|
|
2475
|
|
|
|
|
|
|
(No description) |
2476
|
|
|
|
|
|
|
|
2477
|
|
|
|
|
|
|
=item * B<on_input_data_row> => I<code> |
2478
|
|
|
|
|
|
|
|
2479
|
|
|
|
|
|
|
(No description) |
2480
|
|
|
|
|
|
|
|
2481
|
|
|
|
|
|
|
=item * B<on_input_header_row> => I<code> |
2482
|
|
|
|
|
|
|
|
2483
|
|
|
|
|
|
|
(No description) |
2484
|
|
|
|
|
|
|
|
2485
|
|
|
|
|
|
|
=item * B<reads_csv> => I<bool> (default: 1) |
2486
|
|
|
|
|
|
|
|
2487
|
|
|
|
|
|
|
Whether utility reads CSV data. |
2488
|
|
|
|
|
|
|
|
2489
|
|
|
|
|
|
|
=item * B<reads_multiple_csv> => I<bool> |
2490
|
|
|
|
|
|
|
|
2491
|
|
|
|
|
|
|
Whether utility accepts CSV data. |
2492
|
|
|
|
|
|
|
|
2493
|
|
|
|
|
|
|
Setting this option to true will implicitly set the C<reads_csv> option to true, |
2494
|
|
|
|
|
|
|
obviously. |
2495
|
|
|
|
|
|
|
|
2496
|
|
|
|
|
|
|
=item * B<summary> => I<str> |
2497
|
|
|
|
|
|
|
|
2498
|
|
|
|
|
|
|
(No description) |
2499
|
|
|
|
|
|
|
|
2500
|
|
|
|
|
|
|
=item * B<writes_csv> => I<bool> (default: 1) |
2501
|
|
|
|
|
|
|
|
2502
|
|
|
|
|
|
|
Whether utility writes CSV data. |
2503
|
|
|
|
|
|
|
|
2504
|
|
|
|
|
|
|
=item * B<writes_multiple_csv> => I<bool> |
2505
|
|
|
|
|
|
|
|
2506
|
|
|
|
|
|
|
Whether utility outputs CSV data. |
2507
|
|
|
|
|
|
|
|
2508
|
|
|
|
|
|
|
Setting this option to true will implicitly set the C<writes_csv> option to true, |
2509
|
|
|
|
|
|
|
obviously. |
2510
|
|
|
|
|
|
|
|
2511
|
|
|
|
|
|
|
|
2512
|
|
|
|
|
|
|
=back |
2513
|
|
|
|
|
|
|
|
2514
|
|
|
|
|
|
|
Return value: (bool) |
2515
|
|
|
|
|
|
|
|
2516
|
|
|
|
|
|
|
|
2517
|
|
|
|
|
|
|
=head2 compile_eval_code |
2518
|
|
|
|
|
|
|
|
2519
|
|
|
|
|
|
|
Usage: |
2520
|
|
|
|
|
|
|
|
2521
|
|
|
|
|
|
|
$coderef = compile_eval_code($str, $label); |
2522
|
|
|
|
|
|
|
|
2523
|
|
|
|
|
|
|
Compile string code C<$str> to coderef in 'main' package, without C<use strict> |
2524
|
|
|
|
|
|
|
or C<use warnings>. Die on compile error. |
2525
|
|
|
|
|
|
|
|
2526
|
|
|
|
|
|
|
=head2 eval_code |
2527
|
|
|
|
|
|
|
|
2528
|
|
|
|
|
|
|
Usage: |
2529
|
|
|
|
|
|
|
|
2530
|
|
|
|
|
|
|
$res = eval_code($coderef, $r, $topic_var_value, $return_topic_var); |
2531
|
|
|
|
|
|
|
|
2532
|
|
|
|
|
|
|
=for Pod::Coverage ^(csvutil)$ |
2533
|
|
|
|
|
|
|
|
2534
|
|
|
|
|
|
|
=head1 FAQ |
2535
|
|
|
|
|
|
|
|
2536
|
|
|
|
|
|
|
=head2 My CSV does not have a header? |
2537
|
|
|
|
|
|
|
|
2538
|
|
|
|
|
|
|
Use the C<--no-header> option. Fields will be named C<field1>, C<field2>, and so |
2539
|
|
|
|
|
|
|
on. |
2540
|
|
|
|
|
|
|
|
2541
|
|
|
|
|
|
|
=head2 My data is TSV, not CSV? |
2542
|
|
|
|
|
|
|
|
2543
|
|
|
|
|
|
|
Use the C<--tsv> option. |
2544
|
|
|
|
|
|
|
|
2545
|
|
|
|
|
|
|
=head2 I have a big CSV and the utilities are too slow or eat too much RAM! |
2546
|
|
|
|
|
|
|
|
2547
|
|
|
|
|
|
|
These utilities are not (yet) optimized, patches welcome. If your CSV is very |
2548
|
|
|
|
|
|
|
big, perhaps a C-based solution is what you need. |
2549
|
|
|
|
|
|
|
|
2550
|
|
|
|
|
|
|
=head1 HOMEPAGE |
2551
|
|
|
|
|
|
|
|
2552
|
|
|
|
|
|
|
Please visit the project's homepage at L<https://metacpan.org/release/App-CSVUtils>. |
2553
|
|
|
|
|
|
|
|
2554
|
|
|
|
|
|
|
=head1 SOURCE |
2555
|
|
|
|
|
|
|
|
2556
|
|
|
|
|
|
|
Source repository is at L<https://github.com/perlancar/perl-App-CSVUtils>. |
2557
|
|
|
|
|
|
|
|
2558
|
|
|
|
|
|
|
=head1 SEE ALSO |
2559
|
|
|
|
|
|
|
|
2560
|
|
|
|
|
|
|
=head2 Similar CLI bundles for other format |
2561
|
|
|
|
|
|
|
|
2562
|
|
|
|
|
|
|
L<App::TSVUtils>, L<App::LTSVUtils>, L<App::SerializeUtils>. |
2563
|
|
|
|
|
|
|
|
2564
|
|
|
|
|
|
|
=head2 Other CSV-related utilities |
2565
|
|
|
|
|
|
|
|
2566
|
|
|
|
|
|
|
L<xls2csv> and L<xlsx2csv> from L<Spreadsheet::Read> |
2567
|
|
|
|
|
|
|
|
2568
|
|
|
|
|
|
|
L<import-csv-to-sqlite> from L<App::SQLiteUtils> |
2569
|
|
|
|
|
|
|
|
2570
|
|
|
|
|
|
|
Query CSV with SQL using L<fsql> from L<App::fsql> |
2571
|
|
|
|
|
|
|
|
2572
|
|
|
|
|
|
|
L<csvgrep> from L<csvgrep> |
2573
|
|
|
|
|
|
|
|
2574
|
|
|
|
|
|
|
=head2 Other non-Perl-based CSV utilities |
2575
|
|
|
|
|
|
|
|
2576
|
|
|
|
|
|
|
=head3 Python |
2577
|
|
|
|
|
|
|
|
2578
|
|
|
|
|
|
|
B<csvkit>, L<https://csvkit.readthedocs.io/en/latest/> |
2579
|
|
|
|
|
|
|
|
2580
|
|
|
|
|
|
|
=head1 AUTHOR |
2581
|
|
|
|
|
|
|
|
2582
|
|
|
|
|
|
|
perlancar <perlancar@cpan.org> |
2583
|
|
|
|
|
|
|
|
2584
|
|
|
|
|
|
|
=head1 CONTRIBUTOR |
2585
|
|
|
|
|
|
|
|
2586
|
|
|
|
|
|
|
=for stopwords Adam Hopkins |
2587
|
|
|
|
|
|
|
|
2588
|
|
|
|
|
|
|
Adam Hopkins <violapiratejunky@gmail.com> |
2589
|
|
|
|
|
|
|
|
2590
|
|
|
|
|
|
|
=head1 CONTRIBUTING |
2591
|
|
|
|
|
|
|
|
2592
|
|
|
|
|
|
|
|
2593
|
|
|
|
|
|
|
To contribute, you can send patches by email/via RT, or send pull requests on |
2594
|
|
|
|
|
|
|
GitHub. |
2595
|
|
|
|
|
|
|
|
2596
|
|
|
|
|
|
|
Most of the time, you don't need to build the distribution yourself. You can |
2597
|
|
|
|
|
|
|
simply modify the code, then test via: |
2598
|
|
|
|
|
|
|
|
2599
|
|
|
|
|
|
|
% prove -l |
2600
|
|
|
|
|
|
|
|
2601
|
|
|
|
|
|
|
If you want to build the distribution (e.g. to try to install it locally on your |
2602
|
|
|
|
|
|
|
system), you can install L<Dist::Zilla>, |
2603
|
|
|
|
|
|
|
L<Dist::Zilla::PluginBundle::Author::PERLANCAR>, |
2604
|
|
|
|
|
|
|
L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other |
2605
|
|
|
|
|
|
|
Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond |
2606
|
|
|
|
|
|
|
that are considered a bug and can be reported to me. |
2607
|
|
|
|
|
|
|
|
2608
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
2609
|
|
|
|
|
|
|
|
2610
|
|
|
|
|
|
|
This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2018, 2017, 2016 by perlancar <perlancar@cpan.org>. |
2611
|
|
|
|
|
|
|
|
2612
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
2613
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
2614
|
|
|
|
|
|
|
|
2615
|
|
|
|
|
|
|
=head1 BUGS |
2616
|
|
|
|
|
|
|
|
2617
|
|
|
|
|
|
|
Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=App-CSVUtils> |
2618
|
|
|
|
|
|
|
|
2619
|
|
|
|
|
|
|
When submitting a bug or request, please include a test-file or a |
2620
|
|
|
|
|
|
|
patch to an existing test-file that illustrates the bug or desired |
2621
|
|
|
|
|
|
|
feature. |
2622
|
|
|
|
|
|
|
|
2623
|
|
|
|
|
|
|
=cut |