line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# License: http://creativecommons.org/publicdomain/zero/1.0/ |
2
|
|
|
|
|
|
|
# (CC0 or Public Domain). To the extent possible under law, the author, |
3
|
|
|
|
|
|
|
# Jim Avera (email jim.avera at gmail dot com) has waived all copyright and |
4
|
|
|
|
|
|
|
# related or neighboring rights to this document. Attribution is requested |
5
|
|
|
|
|
|
|
# but not required. |
6
|
3
|
|
|
3
|
|
23
|
use strict; use warnings FATAL => 'all'; use utf8; |
|
3
|
|
|
3
|
|
7
|
|
|
3
|
|
|
3
|
|
114
|
|
|
3
|
|
|
|
|
18
|
|
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
111
|
|
|
3
|
|
|
|
|
17
|
|
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
21
|
|
7
|
3
|
|
|
3
|
|
96
|
use feature qw(say state lexical_subs current_sub); |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
251
|
|
8
|
3
|
|
|
3
|
|
20
|
no warnings qw(experimental::lexical_subs); |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
164
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
package Spreadsheet::Edit::IO; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# Allow "use <thismodule. VERSION ..." in development sandbox to not bomb |
13
|
3
|
|
|
3
|
|
18
|
{ no strict 'refs'; ${__PACKAGE__."::VER"."SION"} = 1999.999; } |
|
3
|
|
|
|
|
1916
|
|
|
3
|
|
|
|
|
315
|
|
14
|
|
|
|
|
|
|
our $VERSION = '1000.009'; # VERSION from Dist::Zilla::Plugin::OurPkgVersion |
15
|
|
|
|
|
|
|
our $DATE = '2023-09-23'; # DATE from Dist::Zilla::Plugin::OurDate |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# This module is derived from the old never-released Text:CSV::Spreadsheet |
18
|
|
|
|
|
|
|
|
19
|
3
|
|
|
3
|
|
26
|
use Exporter 'import'; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
225
|
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
our @EXPORT = qw/convert_spreadsheet OpenAsCsv cx2let let2cx cxrx2sheetaddr |
22
|
|
|
|
|
|
|
sheetname_from_spec filepath_from_spec |
23
|
|
|
|
|
|
|
form_spec_with_sheetname/; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
our @EXPORT_OK = qw/can_cvt_spreadsheets can_extract_allsheets can_extract_named_sheet |
26
|
|
|
|
|
|
|
openlibreoffice_path |
27
|
|
|
|
|
|
|
@sane_CSV_read_options @sane_CSV_write_options/; |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# TODO: Provide "known_attributes" function ala Text::CSV::known_attributes() |
30
|
|
|
|
|
|
|
|
31
|
3
|
|
|
3
|
|
1922
|
use version (); |
|
3
|
|
|
|
|
7206
|
|
|
3
|
|
|
|
|
99
|
|
32
|
3
|
|
|
3
|
|
21
|
use Carp; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
175
|
|
33
|
|
|
|
|
|
|
|
34
|
3
|
|
|
3
|
|
19
|
use File::Find (); |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
65
|
|
35
|
3
|
|
|
3
|
|
2007
|
use File::Copy (); |
|
3
|
|
|
|
|
8399
|
|
|
3
|
|
|
|
|
91
|
|
36
|
3
|
|
|
3
|
|
3049
|
use File::Copy::Recursive (); |
|
3
|
|
|
|
|
14128
|
|
|
3
|
|
|
|
|
108
|
|
37
|
3
|
|
|
3
|
|
40
|
use File::Glob qw/bsd_glob/; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
349
|
|
38
|
|
|
|
|
|
|
|
39
|
3
|
|
|
3
|
|
913
|
use Path::Tiny qw/path/; |
|
3
|
|
|
|
|
13032
|
|
|
3
|
|
|
|
|
186
|
|
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# Path::Tiny OBVIATES NEED for many but we still need this |
42
|
3
|
|
|
3
|
|
52
|
use File::Spec (); |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
96
|
|
43
|
3
|
|
|
3
|
|
536
|
use File::Spec::Functions qw/devnull tmpdir rootdir catdir catfile/; |
|
3
|
|
|
|
|
887
|
|
|
3
|
|
|
|
|
230
|
|
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
# Still sometimes convenient... |
46
|
3
|
|
|
3
|
|
35
|
use File::Basename qw(basename); |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
169
|
|
47
|
|
|
|
|
|
|
|
48
|
3
|
|
|
3
|
|
2559
|
use File::Which qw/which/; |
|
3
|
|
|
|
|
3431
|
|
|
3
|
|
|
|
|
172
|
|
49
|
3
|
|
|
3
|
|
2250
|
use URI::file (); |
|
3
|
|
|
|
|
36410
|
|
|
3
|
|
|
|
|
109
|
|
50
|
3
|
|
|
3
|
|
26
|
use Guard qw(guard scope_guard); |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
195
|
|
51
|
3
|
|
|
3
|
|
20
|
use Fcntl qw(:flock :seek); |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
441
|
|
52
|
3
|
|
|
3
|
|
23
|
use Scalar::Util qw/blessed/; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
151
|
|
53
|
3
|
|
|
3
|
|
22
|
use List::Util qw/none all notall first min max/; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
243
|
|
54
|
3
|
|
|
3
|
|
22
|
use Encode qw(encode decode); |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
149
|
|
55
|
3
|
|
|
3
|
|
17
|
use File::Glob qw/bsd_glob GLOB_NOCASE/; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
186
|
|
56
|
3
|
|
|
3
|
|
28
|
use Digest::MD5 qw/md5_base64/; |
|
3
|
|
|
|
|
10
|
|
|
3
|
|
|
|
|
166
|
|
57
|
3
|
|
|
3
|
|
19
|
use Text::CSV (); |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
151
|
|
58
|
|
|
|
|
|
|
# DDI 5.025 is needed for Windows-aware qsh() |
59
|
3
|
|
|
3
|
|
30
|
use Data::Dumper::Interp 5.025 qw/vis visq dvis dvisq ivis ivisq avis qsh qshlist u/; |
|
3
|
|
|
|
|
71
|
|
|
3
|
|
|
|
|
61
|
|
60
|
|
|
|
|
|
|
|
61
|
3
|
|
|
|
|
28
|
use Spreadsheet::Edit::Log qw/log_call fmt_call log_methcall fmt_methcall oops/, |
62
|
3
|
|
|
3
|
|
1146
|
':btw=IO${lno}:'; |
|
3
|
|
|
|
|
6
|
|
63
|
|
|
|
|
|
|
our %SpreadsheetEdit_Log_Options = ( |
64
|
|
|
|
|
|
|
is_public_api => sub{ |
65
|
|
|
|
|
|
|
$_[1][3] =~ /(?: ::|^ )(?: [a-z][^:]* | OpenAsCsv | ConvertSpreadsheet )$/x |
66
|
|
|
|
|
|
|
}, |
67
|
|
|
|
|
|
|
); |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
my $progname = path($0)->basename; |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
sub _get_username(;$) { |
72
|
4
|
|
|
4
|
|
11
|
my ($uid) = @_; |
73
|
4
|
|
50
|
|
|
20
|
$uid //= eval{ $> } // -1; # default to EUID |
|
4
|
|
33
|
|
|
61
|
|
74
|
4
|
|
|
|
|
10
|
state $answer = {}; |
75
|
4
|
|
66
|
|
|
24
|
return $answer->{$uid} //= do { |
76
|
|
|
|
|
|
|
# https://stackoverflow.com/questions/12081246/how-to-get-system-user-full-name-on-windows-in-perl |
77
|
3
|
|
33
|
|
|
3806
|
eval { getpwuid($uid) // $uid } |
78
|
|
|
|
|
|
|
|| |
79
|
3
|
0
|
0
|
|
|
8
|
($^O =~ /MSWin/ && $uid == (eval{$>}//-1) && eval{ # untested... |
|
0
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
|
|
|
|
|
33
|
|
|
|
|
80
|
0
|
|
|
|
|
0
|
require Win32API::Net; |
81
|
0
|
|
0
|
|
|
0
|
Win32API::Net::UserGetInfo($ENV{LOGONSERVER}||'',Win32::LoginName(),10,my $info={}); |
82
|
|
|
|
|
|
|
$info->{fullName} |
83
|
0
|
|
|
|
|
0
|
}) |
84
|
|
|
|
|
|
|
|| |
85
|
|
|
|
|
|
|
"UID$uid" |
86
|
|
|
|
|
|
|
}; |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# A private Libre/Open Office profile dir is needed to avoid conflicts |
91
|
|
|
|
|
|
|
# with interactive sessions, see |
92
|
|
|
|
|
|
|
# https://ask.libreoffice.org/en/question/290306/how-to-start-independent-lo-instance-process |
93
|
|
|
|
|
|
|
# |
94
|
|
|
|
|
|
|
# We use a persistent profile dir shared among processes for a given user |
95
|
|
|
|
|
|
|
# (actually one for each unique external tool which needs one). |
96
|
|
|
|
|
|
|
# Sharing is okay because we get an exclusive lock before actually using it. |
97
|
|
|
|
|
|
|
state $profile_parent_dir = do{ # also used for lockfile |
98
|
|
|
|
|
|
|
my $user = _get_username(); |
99
|
|
|
|
|
|
|
my $dname = __PACKAGE__."_${user}_profileparent"; |
100
|
|
|
|
|
|
|
$dname =~ s/::/-/g; |
101
|
|
|
|
|
|
|
$dname =~ s/[^-\w.:]/_/g; |
102
|
|
|
|
|
|
|
(my $path = path(File::Spec->tmpdir)->child($dname))->mkpath; |
103
|
|
|
|
|
|
|
$path # Path::Tiny |
104
|
|
|
|
|
|
|
}; |
105
|
|
|
|
|
|
|
sub _get_tool_profdir($) { |
106
|
0
|
|
|
0
|
|
0
|
my ($tool_path) = @_; |
107
|
0
|
|
|
|
|
0
|
my $fingerprint = _file_fingerprint($tool_path); |
108
|
0
|
|
|
|
|
0
|
(my $toolname = path($tool_path)->basename(qw/\.\w+$/)) =~ s/[^-\w.:]/_/g; |
109
|
0
|
|
|
|
|
0
|
my $path = $profile_parent_dir->child("${toolname}_$fingerprint"); |
110
|
0
|
|
|
|
|
0
|
$path->mkpath; |
111
|
0
|
|
|
|
|
0
|
$path |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
# Prevent concurrent document conversions. |
115
|
|
|
|
|
|
|
# LO & OO can't handle concurrent access to the same profile. |
116
|
|
|
|
|
|
|
my $locked_fh; |
117
|
|
|
|
|
|
|
sub _get_exclusive_lock($) { # returns lock object |
118
|
0
|
|
|
0
|
|
0
|
my $opts = shift; |
119
|
0
|
|
|
|
|
0
|
my $lockfile_path = $profile_parent_dir->child("LOCKFILE")->canonpath; |
120
|
0
|
|
|
|
|
0
|
my $sleeptime = 1; |
121
|
0
|
|
|
|
|
0
|
my $lock_fh; |
122
|
0
|
|
|
|
|
0
|
while (! defined $lock_fh) { |
123
|
|
|
|
|
|
|
#warn "$$ : ### AAA open $lockfile_path ...\n"; |
124
|
0
|
0
|
|
|
|
0
|
open $lock_fh, "+>>", $lockfile_path or die $!; |
125
|
|
|
|
|
|
|
#warn "$$ : ### AA2 open succeeded.\n"; |
126
|
0
|
|
|
|
|
0
|
eval { chmod 0666, $lock_fh; }; # sometimes not implemented |
|
0
|
|
|
|
|
0
|
|
127
|
|
|
|
|
|
|
#warn "$$ : ### AA3 flock ...\n"; |
128
|
0
|
0
|
|
|
|
0
|
if (! flock($lock_fh, LOCK_EX|LOCK_NB)) { |
129
|
|
|
|
|
|
|
#warn "$$ : ### AA4 flock FAILED\n"; |
130
|
0
|
0
|
|
|
|
0
|
seek($lock_fh, 0, SEEK_SET) or die; |
131
|
0
|
|
|
|
|
0
|
my @lines = <$lock_fh>; |
132
|
0
|
0
|
|
|
|
0
|
close($lock_fh) or die "close:$!"; $lock_fh = undef; |
|
0
|
|
|
|
|
0
|
|
133
|
|
|
|
|
|
|
#warn "$$ : ### AA6 fh closed...\n"; |
134
|
0
|
|
0
|
|
|
0
|
my $owner = $lines[-1] // ""; # pid NNN (progname) |
135
|
0
|
0
|
|
|
|
0
|
{ my ($pid) = ($owner =~ /pid (\d+)/) or last; |
|
0
|
|
|
|
|
0
|
|
136
|
0
|
0
|
|
|
|
0
|
my @s = stat("/proc/$pid") or last; |
137
|
0
|
|
|
|
|
0
|
$owner = _get_username($s[4])." ".$owner; |
138
|
|
|
|
|
|
|
} |
139
|
0
|
0
|
|
|
|
0
|
my $ownermsg = $owner ? " held by $owner" : ""; |
140
|
|
|
|
|
|
|
# Carp::longmess ... |
141
|
|
|
|
|
|
|
warn ">> ($$) Waiting for exclusive lock${ownermsg}...\n", |
142
|
|
|
|
|
|
|
" $lockfile_path\n" |
143
|
0
|
0
|
|
|
|
0
|
unless $opts->{silent}; |
144
|
0
|
|
|
|
|
0
|
sleep $sleeptime; |
145
|
|
|
|
|
|
|
} else { |
146
|
0
|
|
|
|
|
0
|
$locked_fh = $lock_fh; |
147
|
0
|
0
|
|
|
|
0
|
seek($lock_fh, 0, SEEK_END) or die; |
148
|
0
|
|
|
|
|
0
|
print $lock_fh "pid $$ ($progname)\n"; # always appends anyway on *nix |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
} |
151
|
0
|
|
|
|
|
0
|
$opts->{lockfile_fh} = $lock_fh; |
152
|
|
|
|
|
|
|
#warn "$$ : ### GOT LOCK\n"; |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
END{ |
155
|
3
|
50
|
|
3
|
|
746
|
if (defined $locked_fh) { |
156
|
0
|
|
|
|
|
0
|
flock($locked_fh, LOCK_UN); |
157
|
0
|
|
|
|
|
0
|
close($locked_fh); |
158
|
0
|
|
|
|
|
0
|
$locked_fh = undef; |
159
|
0
|
|
|
|
|
0
|
warn "Did emergency unlock!\n"; |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
#else { warn "(emergency unlock not needed)\n"; } |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
sub _release_lock($) { |
164
|
0
|
|
|
0
|
|
0
|
my $opts = shift; |
165
|
0
|
|
0
|
|
|
0
|
my $fh = delete($opts->{lockfile_fh}) // oops; |
166
|
0
|
0
|
|
|
|
0
|
oops unless $fh == $locked_fh; |
167
|
|
|
|
|
|
|
#seek($fh, 0, SEEK_SET) or die; |
168
|
|
|
|
|
|
|
#my @x = (<$fh>); |
169
|
|
|
|
|
|
|
#seek($fh, 0, SEEK_SET) or die; |
170
|
|
|
|
|
|
|
#warn dvis "$$ : Lockfile contains: @x\n"; |
171
|
|
|
|
|
|
|
##warn "$$ : ###BBB stalling before unlock...\n"; sleep 3; |
172
|
0
|
|
|
|
|
0
|
truncate($fh,0); |
173
|
0
|
0
|
|
|
|
0
|
flock($fh, LOCK_UN) or die "flock UN: $!"; |
174
|
0
|
|
|
|
|
0
|
close $fh; |
175
|
0
|
|
|
|
|
0
|
$locked_fh = undef; |
176
|
|
|
|
|
|
|
#warn "$$ : ###BB0 unlocked and closed.\n"; |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
# Libre Office text converter "charset" numbers |
180
|
|
|
|
|
|
|
my %LO_charsets = ( |
181
|
|
|
|
|
|
|
'WINDOWS1252' => 1, 'WINLATIN1' => 1, |
182
|
|
|
|
|
|
|
'APPLEWESTERN' => 2, |
183
|
|
|
|
|
|
|
'DOS/OS2437' => 3, |
184
|
|
|
|
|
|
|
'DOS/OS2850' => 4, |
185
|
|
|
|
|
|
|
'DOS/OS2860' => 5, |
186
|
|
|
|
|
|
|
'DOS/OS2861' => 6, |
187
|
|
|
|
|
|
|
'DOS/OS2863' => 7, |
188
|
|
|
|
|
|
|
'DOS/OS2865' => 8, |
189
|
|
|
|
|
|
|
'SYSTEM' => 9, 'SYSTEMDDEFAULT' => 9, |
190
|
|
|
|
|
|
|
'SYMBOL' => 10, |
191
|
|
|
|
|
|
|
'ASCII' => 11, |
192
|
|
|
|
|
|
|
'ISO88591' => 12, |
193
|
|
|
|
|
|
|
'ISO88592' => 13, |
194
|
|
|
|
|
|
|
'ISO88593' => 14, |
195
|
|
|
|
|
|
|
'ISO88594' => 15, |
196
|
|
|
|
|
|
|
'ISO88595' => 16, |
197
|
|
|
|
|
|
|
'ISO88596' => 17, |
198
|
|
|
|
|
|
|
'ISO88597' => 18, |
199
|
|
|
|
|
|
|
'ISO88598' => 19, |
200
|
|
|
|
|
|
|
'ISO88599' => 20, |
201
|
|
|
|
|
|
|
'ISO885914' => 21, |
202
|
|
|
|
|
|
|
'ISO885915' => 22, |
203
|
|
|
|
|
|
|
'OS2737' => 23, |
204
|
|
|
|
|
|
|
'OS2775' => 24, |
205
|
|
|
|
|
|
|
'OS2852' => 25, |
206
|
|
|
|
|
|
|
'OS2855' => 26, |
207
|
|
|
|
|
|
|
'OS2857' => 27, |
208
|
|
|
|
|
|
|
'OS2862' => 28, |
209
|
|
|
|
|
|
|
'OS2864' => 29, |
210
|
|
|
|
|
|
|
'OS2866' => 30, |
211
|
|
|
|
|
|
|
'OS2869' => 31, |
212
|
|
|
|
|
|
|
'WINDOWS874' => 32, |
213
|
|
|
|
|
|
|
'WINDOWS1250' => 33, 'WINLATIN2' => 33, |
214
|
|
|
|
|
|
|
'WINDOWS1251' => 34, |
215
|
|
|
|
|
|
|
'WINDOWS1253' => 35, |
216
|
|
|
|
|
|
|
'WINDOWS1254' => 36, |
217
|
|
|
|
|
|
|
'WINDOWS1255' => 37, |
218
|
|
|
|
|
|
|
'WINDOWS1256' => 38, |
219
|
|
|
|
|
|
|
'WINDOWS1257' => 39, |
220
|
|
|
|
|
|
|
'WINDOWS1258' => 40, |
221
|
|
|
|
|
|
|
'APPLEARABIC' => 41, |
222
|
|
|
|
|
|
|
'APPLECENTRALEUROPEAN' => 42, |
223
|
|
|
|
|
|
|
'APPLECROATIAN' => 43, |
224
|
|
|
|
|
|
|
'APPLECYRILLIC' => 44, |
225
|
|
|
|
|
|
|
'APPLEDEVANAGARI' => 45, |
226
|
|
|
|
|
|
|
'APPLEFARSI' => 46, |
227
|
|
|
|
|
|
|
'APPLEGREEK' => 47, |
228
|
|
|
|
|
|
|
'APPLEGUJARATI' => 48, |
229
|
|
|
|
|
|
|
'APPLEGURMUKHI' => 49, |
230
|
|
|
|
|
|
|
'APPLEHEBREW' => 50, |
231
|
|
|
|
|
|
|
'APPLEICELANDIC' => 51, |
232
|
|
|
|
|
|
|
'APPLEROMANIAN' => 52, |
233
|
|
|
|
|
|
|
'APPLETHAI' => 53, |
234
|
|
|
|
|
|
|
'APPLETURKISH' => 54, |
235
|
|
|
|
|
|
|
'APPLEUKRAINIAN' => 55, |
236
|
|
|
|
|
|
|
'APPLECHINESESIMPLIFIED' => 56, |
237
|
|
|
|
|
|
|
'APPLECHINESETRADITIONAL' => 57, |
238
|
|
|
|
|
|
|
'APPLEJAPANESE' => 58, |
239
|
|
|
|
|
|
|
'APPLEKOREAN' => 59, |
240
|
|
|
|
|
|
|
'WINDOWS932' => 60, |
241
|
|
|
|
|
|
|
'WINDOWS936' => 61, |
242
|
|
|
|
|
|
|
'WINDOWSWANSUNG949' => 62, |
243
|
|
|
|
|
|
|
'WINDOWS950' => 63, |
244
|
|
|
|
|
|
|
'SHIFTJIS' => 64, |
245
|
|
|
|
|
|
|
'GB2312' => 65, |
246
|
|
|
|
|
|
|
'GBT12345' => 66, |
247
|
|
|
|
|
|
|
'GBK' => 67, 'GB231280' => 67, |
248
|
|
|
|
|
|
|
'BIG5' => 68, |
249
|
|
|
|
|
|
|
'EUCJP' => 69, |
250
|
|
|
|
|
|
|
'EUCCN' => 70, |
251
|
|
|
|
|
|
|
'EUCTW' => 71, |
252
|
|
|
|
|
|
|
'ISO2022JP' => 72, |
253
|
|
|
|
|
|
|
'ISO2022CN' => 73, |
254
|
|
|
|
|
|
|
'KOI8R' => 74, |
255
|
|
|
|
|
|
|
'UTF7' => 75, |
256
|
|
|
|
|
|
|
'UTF8' => 76, |
257
|
|
|
|
|
|
|
'ISO885910' => 77, |
258
|
|
|
|
|
|
|
'ISO885913' => 78, |
259
|
|
|
|
|
|
|
'EUCKR' => 79, |
260
|
|
|
|
|
|
|
'ISO2022KR' => 80, |
261
|
|
|
|
|
|
|
'JIS0201' => 81, |
262
|
|
|
|
|
|
|
'JIS0208' => 82, |
263
|
|
|
|
|
|
|
'JIS0212' => 83, |
264
|
|
|
|
|
|
|
'WINDOWSJOHAB1361' => 84, |
265
|
|
|
|
|
|
|
'GB18030' => 85, |
266
|
|
|
|
|
|
|
'BIG5HKSCS' => 86, |
267
|
|
|
|
|
|
|
'TIS620' => 87, |
268
|
|
|
|
|
|
|
'KOI8U' => 88, |
269
|
|
|
|
|
|
|
'ISCIIDEVANAGARI' => 89, |
270
|
|
|
|
|
|
|
'JAVAUTF8' => 90, |
271
|
|
|
|
|
|
|
'ADOBESTANDARD' => 91, |
272
|
|
|
|
|
|
|
'ADOBESYMBOL' => 92, |
273
|
|
|
|
|
|
|
'PT154' => 93, |
274
|
|
|
|
|
|
|
'UCS4' => 65534, |
275
|
|
|
|
|
|
|
'UCS2' => 65535, |
276
|
|
|
|
|
|
|
); |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=for Pod::Coverage _name2LOcharsetnum |
279
|
|
|
|
|
|
|
=cut |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
sub _name2LOcharsetnum($) { |
282
|
0
|
|
|
0
|
|
0
|
my ($enc) = @_; |
283
|
0
|
|
|
|
|
0
|
local $_ = uc $enc; |
284
|
0
|
|
|
|
|
0
|
while (! $LO_charsets{$_}) { |
285
|
|
|
|
|
|
|
# successively remove - and other special characters |
286
|
0
|
0
|
|
|
|
0
|
s/\W//a or confess "LO charset: Unknown encoding name '$enc'"; |
287
|
|
|
|
|
|
|
} |
288
|
0
|
|
|
|
|
0
|
$LO_charsets{$_} |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
# convert between 0-based index and spreadsheet column letter code. |
292
|
|
|
|
|
|
|
# Default argument is $_ |
293
|
|
|
|
|
|
|
sub cx2let(_) { |
294
|
0
|
|
|
0
|
1
|
0
|
my $cx = shift; |
295
|
0
|
|
|
|
|
0
|
my $ABC="A"; ++$ABC for (1..$cx); |
|
0
|
|
|
|
|
0
|
|
296
|
0
|
|
|
|
|
0
|
return $ABC |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
sub let2cx(_) { |
299
|
0
|
|
|
0
|
1
|
0
|
my $ABC = shift; |
300
|
0
|
|
|
|
|
0
|
my $n = ord(substr($ABC,0,1,"")) - ord('A'); |
301
|
0
|
|
|
|
|
0
|
while (length $ABC) { |
302
|
0
|
|
|
|
|
0
|
my $letter = substr($ABC,0,1,""); |
303
|
0
|
|
|
|
|
0
|
$n = (($n+1) * 26) + (ord($letter) - ord('A')); |
304
|
|
|
|
|
|
|
} |
305
|
0
|
|
|
|
|
0
|
return $n; |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
sub cxrx2sheetaddr($$) { # (1,99) -> "B100" |
308
|
0
|
|
|
0
|
0
|
0
|
my ($cx, $rx) = @_; |
309
|
0
|
|
|
|
|
0
|
return cx2let($cx) . ($rx + 1); |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
=for Pod::Coverage cxrx2sheetaddr oops btw |
313
|
|
|
|
|
|
|
=cut |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
our @sane_CSV_read_options = ( |
316
|
|
|
|
|
|
|
# Text::CSV pod says to not specify 'eol' to allow embedded newlines, |
317
|
|
|
|
|
|
|
# and to automatically handle "\n", "\r", or "\r\n". |
318
|
|
|
|
|
|
|
#eol => $/, |
319
|
|
|
|
|
|
|
binary => 1, # Allow reading embedded newlines & unicode etc. |
320
|
|
|
|
|
|
|
sep_char => ",", |
321
|
|
|
|
|
|
|
quote_char => '"', |
322
|
|
|
|
|
|
|
escape_char => '"', # Embedded "s appear as "" |
323
|
|
|
|
|
|
|
allow_whitespace => 0, # Preserve leading & trailing white space |
324
|
|
|
|
|
|
|
auto_diag => 2, # die on errors |
325
|
|
|
|
|
|
|
); |
326
|
|
|
|
|
|
|
our @sane_CSV_write_options = ( |
327
|
|
|
|
|
|
|
eol => $/, # Necessary when WRITING csv files |
328
|
|
|
|
|
|
|
binary => 1, |
329
|
|
|
|
|
|
|
sep_char => ",", |
330
|
|
|
|
|
|
|
quote_char => '"', |
331
|
|
|
|
|
|
|
escape_char => '"', # Embedded "s appear as "" |
332
|
|
|
|
|
|
|
allow_whitespace => 0, # Preserve leading & trailing white space |
333
|
|
|
|
|
|
|
auto_diag => 2, # die on errors |
334
|
|
|
|
|
|
|
); |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
my %Saved_Sigs; |
337
|
|
|
|
|
|
|
sub _sighandler { |
338
|
0
|
0
|
0
|
0
|
|
0
|
if (! $Saved_Sigs{$_[0]} or $Saved_Sigs{$_[0]} eq 'DEFAULT') { |
339
|
|
|
|
|
|
|
# The user isn't catching this, so the process will abort without |
340
|
|
|
|
|
|
|
# running destructors: Call exit instead |
341
|
0
|
|
|
|
|
0
|
warn "($$)".__PACKAGE__." caught signal $_[0], exiting\n"; |
342
|
0
|
|
|
|
|
0
|
Carp::cluck "($$)".__PACKAGE__." caught signal $_[0], exiting\n"; |
343
|
0
|
|
|
|
|
0
|
exit 1; |
344
|
|
|
|
|
|
|
} |
345
|
0
|
|
|
|
|
0
|
$SIG{$_[0]} = $Saved_Sigs{$_[0]}; |
346
|
0
|
|
|
|
|
0
|
kill $_[0], $$; |
347
|
0
|
|
|
|
|
0
|
oops "Default (or user-defined) sig $_[0] action was to ignore!"; |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
sub _signals_guard() { |
350
|
0
|
|
0
|
0
|
|
0
|
%Saved_Sigs = ( map{ ($_ => ($SIG{$_} // undef)) } qw/HUP INT QUIT TERM/ ); |
|
0
|
|
|
|
|
0
|
|
351
|
0
|
|
|
|
|
0
|
$SIG{HUP} = $SIG{INT} = $SIG{QUIT} = $SIG{TERM} = \&_sighandler; |
352
|
0
|
|
|
0
|
|
0
|
return guard { @SIG{keys %Saved_Sigs} = (values %Saved_Sigs) } |
353
|
0
|
|
|
|
|
0
|
} |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
# Create a probably-unique fingerprint for a particular file |
356
|
|
|
|
|
|
|
sub _file_fingerprint($) { |
357
|
0
|
|
|
0
|
|
0
|
my $path = shift; |
358
|
0
|
|
|
|
|
0
|
my $ctx = Digest::MD5->new; |
359
|
0
|
|
|
|
|
0
|
$ctx->add($_) for((stat($path))[0,1,9]); # dev,ino,mtime |
360
|
0
|
|
|
|
|
0
|
substr($ctx->b64digest,0,6) |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# Find LibreOffice, or failing that OpenOffice |
364
|
|
|
|
|
|
|
our $OLpath_answer = $ENV{SPREADSHEET_EDIT_LOPATH}; |
365
|
|
|
|
|
|
|
sub openlibreoffice_path() { |
366
|
|
|
|
|
|
|
return $OLpath_answer if $OLpath_answer; |
367
|
|
|
|
|
|
|
unless ($ENV{SPREADSHEET_EDIT_IGNPATH}) { |
368
|
|
|
|
|
|
|
foreach my $short_name (qw(libreoffice loffice localc)) { |
369
|
|
|
|
|
|
|
if ($OLpath_answer = which($short_name)) { |
370
|
|
|
|
|
|
|
return( ($OLpath_answer=path($OLpath_answer)->canonpath) ); |
371
|
|
|
|
|
|
|
} |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
# Search for an installation. On Windows it will usually be |
375
|
|
|
|
|
|
|
# C:\Program Files\LibreOffice\... |
376
|
|
|
|
|
|
|
# On *nix, a local/isolated install (i.e. the result of extracting files |
377
|
|
|
|
|
|
|
# from a .deb or other archive) will be |
378
|
|
|
|
|
|
|
# somwehere/opt/libreofficeA.B/... |
379
|
|
|
|
|
|
|
# and "..." is the same standard hierarchy on all platforms. |
380
|
|
|
|
|
|
|
# |
381
|
|
|
|
|
|
|
# If multiple are found try to pick the "latest". |
382
|
|
|
|
|
|
|
my sub _cmp_subpaths($$) { |
383
|
|
|
|
|
|
|
my ($sp1, $sp2) = @_; |
384
|
|
|
|
|
|
|
oops if !defined($sp1); |
385
|
|
|
|
|
|
|
return 1 if !defined($sp2); |
386
|
|
|
|
|
|
|
# Use longest version in the (sub-)path, e.g. "4.4.1/opt/openoffice4/..." |
387
|
|
|
|
|
|
|
my (@v1) = sort { length($a) <=> length($b) } ($sp1 =~ /(\d[.\da-z]*)/ag); |
388
|
|
|
|
|
|
|
my (@v2) = sort { length($a) <=> length($b) } ($sp2 =~ /(\d[.\da-z]*)/ag); |
389
|
|
|
|
|
|
|
my $v1 = $v1[-1]//0; |
390
|
|
|
|
|
|
|
my $v2 = $v2[-1]//0; |
391
|
|
|
|
|
|
|
if ($v1 =~ s/alpha/0/) { |
392
|
|
|
|
|
|
|
return -1 unless $v2 =~ s/alpha/0/; |
393
|
|
|
|
|
|
|
} |
394
|
|
|
|
|
|
|
if ($v2 =~ /alpha/) { |
395
|
|
|
|
|
|
|
return +1 |
396
|
|
|
|
|
|
|
} |
397
|
|
|
|
|
|
|
version->parse($v1) <=> version->parse($v2) |
398
|
|
|
|
|
|
|
} |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
# I tried just doing File::Glob::bsd_glob('/*/*/*/opt/libre*/program') but |
401
|
|
|
|
|
|
|
# it silently failed even though the same glob works from the shell. Mmff... |
402
|
3
|
|
|
3
|
|
29
|
no warnings FATAL => 'all'; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
37803
|
|
403
|
|
|
|
|
|
|
state $is_MSWin = ($^O eq "MSWin32"); |
404
|
|
|
|
|
|
|
my (@search_dirs, $searchfor_re, $maxdepth); |
405
|
|
|
|
|
|
|
if ($is_MSWin) { |
406
|
|
|
|
|
|
|
@search_dirs = ("C:\\Program Files","C:\\Program Files (x86)"); |
407
|
|
|
|
|
|
|
$searchfor_re = qr/^Program Files/; |
408
|
|
|
|
|
|
|
$maxdepth = 1; |
409
|
|
|
|
|
|
|
# depth: C:\Program Files\libreofficeXXX/program/ |
410
|
|
|
|
|
|
|
# 1 |
411
|
|
|
|
|
|
|
} else { |
412
|
|
|
|
|
|
|
@search_dirs = (File::Spec->rootdir()); |
413
|
|
|
|
|
|
|
push @search_dirs, $ENV{HOME} if $ENV{HOME}; |
414
|
|
|
|
|
|
|
$maxdepth = 4; |
415
|
|
|
|
|
|
|
$searchfor_re = qr/^opt$/; |
416
|
|
|
|
|
|
|
# depth: /*/*/<unpackparent>/opt/libreofficeXXX/program/ |
417
|
|
|
|
|
|
|
# 1 2 3 4 |
418
|
|
|
|
|
|
|
} |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
my $debug = $ENV{SPREADSHEET_EDIT_FINDDEBUG}; |
421
|
|
|
|
|
|
|
my sub _Findvarsmsg() { |
422
|
|
|
|
|
|
|
if (u($_) eq u($File::Find::name) && u($_) eq u($File::Find::fullname)) { |
423
|
|
|
|
|
|
|
return qsh($_)."\n" |
424
|
|
|
|
|
|
|
} |
425
|
|
|
|
|
|
|
if (u($_) eq u($File::Find::name)) { |
426
|
|
|
|
|
|
|
return "\$_=name=".qsh($_)." -> ".qsh($File::Find::fullname)."\n"; |
427
|
|
|
|
|
|
|
} |
428
|
|
|
|
|
|
|
"\$_=".qsh($_)." name=".qsh($File::Find::name)." fullname=".qsh($File::Find::fullname)."\n"; |
429
|
|
|
|
|
|
|
} |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
my %results; |
432
|
|
|
|
|
|
|
$ENV{SPREADSHEET_EDIT_NOLOSEARCH} or |
433
|
|
|
|
|
|
|
File::Find::find( |
434
|
|
|
|
|
|
|
{ wanted => sub{ |
435
|
|
|
|
|
|
|
# Undef fullname OR invalid "_" filehandle implies a broken symlink, |
436
|
|
|
|
|
|
|
# see https://github.com/Perl/perl5/issues/21122 |
437
|
|
|
|
|
|
|
# Zero size on *nix implies /proc or something similar; do not enter. |
438
|
|
|
|
|
|
|
# File::Find::fullname unreadable implies followed link to inaccessable |
439
|
|
|
|
|
|
|
# (The initial "_" stat may be invalid, so "-l _" is useless) |
440
|
4866704
|
|
|
4866704
|
|
18601589
|
$! = 0; |
441
|
|
|
|
|
|
|
# https://github.com/Perl/perl5/issues/21143 |
442
|
4866704
|
|
|
|
|
8481339
|
my $fullname = $File::Find::fullname; |
443
|
4866704
|
50
|
33
|
|
|
15116855
|
if (!defined($fullname) && $is_MSWin) { |
444
|
0
|
0
|
|
|
|
0
|
warn "# _ MSWin undef fullname! ",_Findvarsmsg() if $debug; |
445
|
0
|
|
|
|
|
0
|
stat($_); # lstat was not done. Grr... |
446
|
0
|
|
|
|
|
0
|
$fullname = $File::Find::name; |
447
|
0
|
0
|
|
|
|
0
|
unless (-d _) { |
448
|
0
|
|
|
|
|
0
|
$File::Find::prune = 1; # in case it really is a dir |
449
|
0
|
|
|
|
|
0
|
return; |
450
|
|
|
|
|
|
|
} |
451
|
|
|
|
|
|
|
} else { |
452
|
4866704
|
100
|
100
|
|
|
17096678
|
unless (-d _ or -l _) { |
453
|
4488482
|
100
|
|
|
|
10175996
|
warn "# _ notdir/symlink: ",_Findvarsmsg() if $debug; |
454
|
4488482
|
|
|
|
|
55521296
|
$File::Find::prune = 1; # in case it really is |
455
|
4488482
|
|
|
|
|
162594147
|
return; |
456
|
|
|
|
|
|
|
} |
457
|
|
|
|
|
|
|
} |
458
|
378222
|
100
|
33
|
|
|
14234401
|
if ( |
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
459
|
|
|
|
|
|
|
!defined($fullname) # broken link, per docs |
460
|
|
|
|
|
|
|
|| (! -r _) || (! -x _) # unreadable item or invalid "_" handle |
461
|
|
|
|
|
|
|
# https://github.com/Perl/perl5/issues/21122 |
462
|
|
|
|
|
|
|
|| (!$is_MSWin && (stat(_))[7] == 0) # zero size ==> /proc etc. |
463
|
|
|
|
|
|
|
|| /\$/ # $some_windows_special_thing$ |
464
|
|
|
|
|
|
|
|| ! -r $fullname # presumably a symlink to unreadable |
465
|
|
|
|
|
|
|
|| ! -x _ # or unsearchable dir |
466
|
|
|
|
|
|
|
|| m#^/snap/(?!.*ffice)# # snap other than e.g. /snap/libreoffice |
467
|
|
|
|
|
|
|
|| m#^/(proc|dev|sys|tmp|boot|run|lost+found|usr/(include|src))$# |
468
|
|
|
|
|
|
|
) { |
469
|
646
|
100
|
|
|
|
3959
|
warn "# PRUNING ",_Findvarsmsg() if $debug; |
470
|
646
|
|
|
|
|
11398
|
$File::Find::prune = 1; |
471
|
|
|
|
|
|
|
return |
472
|
646
|
|
|
|
|
123069
|
} |
473
|
377576
|
100
|
|
|
|
1670058
|
warn "# DIR: ",_Findvarsmsg() if $debug; |
474
|
|
|
|
|
|
|
# Maximum depth: /*/*/<unpackparent>/opt/libreofficeXXX/program/ |
475
|
377576
|
|
|
|
|
5678109
|
my $path = path($_); |
476
|
377576
|
|
|
|
|
14740453
|
my $depth = scalar(() = $path->stringify =~ m#(/)#g); |
477
|
377576
|
100
|
|
|
|
15200341
|
if (basename($_) =~ $searchfor_re) { # ^opt$ or ^Program Files |
478
|
6
|
|
|
|
|
42
|
my $prefix = path($_)->parent->parent; |
479
|
6
|
|
|
|
|
5518
|
for my $o_l (qw/libre open/) { |
480
|
12
|
|
|
|
|
136
|
my $pattern |
481
|
|
|
|
|
|
|
= path($_)->child("${o_l}*/program/soffice*")->canonpath; |
482
|
|
|
|
|
|
|
# eval because I'm suspicious of the glob on Windows |
483
|
12
|
|
|
|
|
1361
|
my @hits; eval{ @hits = sort +bsd_glob($pattern, GLOB_NOCASE) }; |
|
12
|
|
|
|
|
38
|
|
|
12
|
|
|
|
|
9284
|
|
484
|
12
|
50
|
|
|
|
113
|
if (@hits) { |
485
|
|
|
|
|
|
|
# On windows, use soffice.com not .exe because it writes messages |
486
|
|
|
|
|
|
|
# to stdout not a window. See https://help.libreoffice.org/7.5/en-GB/text/shared/guide/start_parameters.html?&DbPAR=SHARED&System=WIN |
487
|
|
|
|
|
|
|
my $path = (first{ /soffice\.com$/ } @hits) || |
488
|
0
|
|
0
|
|
|
0
|
(first{ /soffice$/ } @hits); |
489
|
0
|
0
|
|
|
|
0
|
if ($path) { |
490
|
0
|
0
|
|
|
|
0
|
$prefix->subsumes($path) or oops dvis '$prefix $path'; |
491
|
0
|
|
|
|
|
0
|
my $subpath = path($path)->relative($prefix); |
492
|
0
|
0
|
|
|
|
0
|
if (_cmp_subpaths($subpath, $results{$o_l}{subpath}) >= 0) { |
493
|
0
|
|
|
|
|
0
|
@{$results{$o_l}}{qw/path subpath/} = ($path, $subpath); |
|
0
|
|
|
|
|
0
|
|
494
|
|
|
|
|
|
|
# We found where installations are, don't look deeper |
495
|
0
|
|
|
|
|
0
|
$maxdepth = $depth; |
496
|
|
|
|
|
|
|
} |
497
|
|
|
|
|
|
|
} |
498
|
|
|
|
|
|
|
} |
499
|
12
|
50
|
|
|
|
97
|
else { btw dvis '##glob failed: $pattern\n$@' if $@; } |
500
|
|
|
|
|
|
|
} |
501
|
|
|
|
|
|
|
} |
502
|
377576
|
100
|
|
|
|
46883298
|
if ($depth == $maxdepth) { |
|
|
50
|
|
|
|
|
|
503
|
2070
|
100
|
|
|
|
6311
|
warn "# pruning at maxdepth $depth ",qsh($_),"\n" if $debug; |
504
|
2070
|
|
|
|
|
20560
|
$File::Find::prune = 1; |
505
|
2070
|
|
|
|
|
58184
|
return; |
506
|
|
|
|
|
|
|
} |
507
|
0
|
|
|
|
|
0
|
elsif ($depth > $maxdepth) { oops dvis '$depth $maxdepth $_' } |
508
|
|
|
|
|
|
|
}, |
509
|
|
|
|
|
|
|
follow_fast => 1, |
510
|
|
|
|
|
|
|
follow_skip => 2, |
511
|
|
|
|
|
|
|
dangling_symlinks => 0, |
512
|
|
|
|
|
|
|
no_chdir => 1 |
513
|
|
|
|
|
|
|
}, |
514
|
|
|
|
|
|
|
@search_dirs |
515
|
|
|
|
|
|
|
); |
516
|
|
|
|
|
|
|
$OLpath_answer = path( |
517
|
|
|
|
|
|
|
$results{libre}{path} // $results{open}{path} |
518
|
|
|
|
|
|
|
|| (!$ENV{SPREADSHEET_EDIT_IGNPATH} && which("soffice")) # installed OO? |
519
|
|
|
|
|
|
|
|| return(undef) |
520
|
|
|
|
|
|
|
)->realpath->canonpath |
521
|
|
|
|
|
|
|
}#openlibreoffice_path |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
sub _openlibre_features() { |
524
|
1
|
|
|
1
|
|
4
|
state $hash; |
525
|
1
|
50
|
|
|
|
11
|
return $hash if defined $hash; |
526
|
1
|
|
50
|
|
|
20
|
my $prog = openlibreoffice_path() // return(($hash={ available => 0 })); |
527
|
0
|
|
|
|
|
0
|
my $raw_version; |
528
|
|
|
|
|
|
|
# This is gross but fast and works in recent versions of LO |
529
|
0
|
0
|
|
|
|
0
|
if (my $fh = eval{ path($prog)->realpath->parent->child("types/offapi.rdb") |
|
0
|
|
|
|
|
0
|
|
530
|
|
|
|
|
|
|
->filehandle("<",":raw")} ) { |
531
|
0
|
|
|
|
|
0
|
my $octets; sysread $fh, $octets, 100; |
|
0
|
|
|
|
|
0
|
|
532
|
0
|
0
|
|
|
|
0
|
if ($octets =~ /Created by LibreOffice (\d+\.\d+\.\w+)/) { |
533
|
0
|
|
|
|
|
0
|
$raw_version = $1; |
534
|
|
|
|
|
|
|
} |
535
|
|
|
|
|
|
|
} |
536
|
0
|
0
|
|
|
|
0
|
unless ($raw_version) { |
537
|
0
|
0
|
|
|
|
0
|
if (qx/$prog --version 2>&1/ =~ /Libre.*? (\d+\.\d+\.\w+)/) { |
538
|
0
|
|
|
|
|
0
|
$raw_version = $1; |
539
|
|
|
|
|
|
|
} else { |
540
|
0
|
|
|
|
|
0
|
warn "$prog --version DID NOT WORK\n"; |
541
|
|
|
|
|
|
|
} |
542
|
|
|
|
|
|
|
} |
543
|
0
|
0
|
|
|
|
0
|
unless ($raw_version) { |
544
|
0
|
|
|
|
|
0
|
warn "WARNING: Could not determine version of $prog\n"; |
545
|
0
|
|
|
|
|
0
|
$raw_version = "999.01"; |
546
|
|
|
|
|
|
|
} |
547
|
0
|
|
|
|
|
0
|
my $version = version->parse("v$raw_version"); |
548
|
0
|
|
|
|
|
0
|
$hash = { |
549
|
|
|
|
|
|
|
available => 1, |
550
|
|
|
|
|
|
|
# LibreOffice 7.2 allows extracting all sheets at once |
551
|
|
|
|
|
|
|
allsheets => ($version >= version->parse("v7.2")), |
552
|
|
|
|
|
|
|
# ...but not yet extracting a single sheet by name. |
553
|
|
|
|
|
|
|
# https://bugs.documentfoundation.org/show_bug.cgi?id=135762#c24 |
554
|
|
|
|
|
|
|
named_sheet => 0, |
555
|
|
|
|
|
|
|
# Supported output formats are too many to list |
556
|
|
|
|
|
|
|
ousuf_any => 1, |
557
|
|
|
|
|
|
|
raw_version => $raw_version, version => "$version", |
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
} |
560
|
|
|
|
|
|
|
|
561
|
0
|
|
|
0
|
|
0
|
sub _openlibre_supports_allsheets() { _openlibre_features()->{allsheets} } |
562
|
0
|
|
|
0
|
|
0
|
sub _openlibre_supports_named_sheet() { _openlibre_features()->{named_sheet} } |
563
|
0
|
|
|
0
|
|
0
|
sub _openlibre_supports_writing($) { _openlibre_features()->{available} } |
564
|
|
|
|
|
|
|
|
565
|
1
|
|
|
1
|
|
2046
|
sub _ssconvert_features() { return { availble => 0 } } # TODO add back? |
566
|
0
|
|
|
0
|
|
0
|
sub _ssconvert_supports_allsheets() { _ssconvert_features()->{allsheets} } |
567
|
0
|
|
|
0
|
|
0
|
sub _ssconvert_supports_named_sheet() { _ssconvert_features()->{named_sheet} } |
568
|
0
|
|
|
0
|
|
0
|
sub _ssconvert_supports_writing($) { _ssconvert_features()->{available} } |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
# These allow users (e.g. App-diff_spreadsheets tests) to determine |
571
|
|
|
|
|
|
|
# if external tool(s) are available to convert between spreadsheet formats |
572
|
|
|
|
|
|
|
# or to/from csv (CSVs are supported directly so can always be used) |
573
|
|
|
|
|
|
|
# Currently used by t/io.pl to skip tests |
574
|
|
|
|
|
|
|
sub can_cvt_spreadsheets() { |
575
|
|
|
|
|
|
|
_openlibre_features()->{available} || _ssconvert_features()->{availble} |
576
|
1
|
50
|
|
1
|
1
|
2630455
|
} |
577
|
|
|
|
|
|
|
sub can_extract_allsheets() { |
578
|
0
|
0
|
|
0
|
1
|
0
|
_openlibre_supports_allsheets() || _ssconvert_supports_allsheets() |
579
|
|
|
|
|
|
|
} |
580
|
|
|
|
|
|
|
sub can_extract_named_sheet() { |
581
|
0
|
0
|
0
|
0
|
1
|
0
|
can_extract_allsheets() # used to emulate |
582
|
|
|
|
|
|
|
|| _openlibre_supports_named_sheet() || _ssconvert_supports_named_sheet() |
583
|
|
|
|
|
|
|
} |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
=for Pod::Coverage can_cvt_spreadsheets can_extract_allsheets |
586
|
|
|
|
|
|
|
-for Pod::Coverage can_extract_named_sheet |
587
|
|
|
|
|
|
|
=cut |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
sub _runcmd($@) { |
590
|
0
|
|
|
0
|
|
0
|
my ($opts, @cmd) = @_; |
591
|
0
|
|
|
|
|
0
|
my $guard = _signals_guard; |
592
|
|
|
|
|
|
|
# This used to fork & exec but that blows up on MSWin32 because the child |
593
|
|
|
|
|
|
|
# pseudo-process executes all END{} blocks everywhere after "exec" |
594
|
0
|
|
|
|
|
0
|
my $redirs = ""; |
595
|
0
|
0
|
|
|
|
0
|
if ($opts->{suppress_stderr}) { |
596
|
0
|
|
|
|
|
0
|
$redirs .= " 2>".devnull(); |
597
|
|
|
|
|
|
|
} |
598
|
0
|
0
|
|
|
|
0
|
if ($opts->{stdout_to_stderr}) { |
599
|
0
|
|
|
|
|
0
|
confess "Not portable"; |
600
|
0
|
|
|
|
|
0
|
$redirs .= " 1>&2"; |
601
|
|
|
|
|
|
|
} |
602
|
0
|
0
|
|
|
|
0
|
if ($opts->{stderr_to_stdout}) { |
603
|
0
|
|
|
|
|
0
|
confess "Not portable"; |
604
|
0
|
|
|
|
|
0
|
$redirs .= " 2>&1"; |
605
|
|
|
|
|
|
|
} |
606
|
0
|
0
|
|
|
|
0
|
if ($opts->{suppress_stdout}) { |
607
|
0
|
|
|
|
|
0
|
$redirs .= " >".devnull(); |
608
|
|
|
|
|
|
|
} |
609
|
0
|
|
|
|
|
0
|
my $cmdstr = join(" ", map{qsh} @cmd) . $redirs; |
|
0
|
|
|
|
|
0
|
|
610
|
0
|
0
|
|
|
|
0
|
if ($redirs) { |
611
|
0
|
|
|
|
|
0
|
foreach (@cmd) { |
612
|
0
|
0
|
|
|
|
0
|
confess "Can not portably pass argument '$_'" if /["']/; |
613
|
|
|
|
|
|
|
} |
614
|
0
|
0
|
|
|
|
0
|
warn "> $cmdstr\n" if $opts->{verbose}; |
615
|
0
|
|
|
|
|
0
|
system $cmdstr; |
616
|
|
|
|
|
|
|
} else { |
617
|
0
|
0
|
|
|
|
0
|
warn "> $cmdstr\n" if $opts->{verbose}; |
618
|
0
|
|
|
|
|
0
|
system @cmd; |
619
|
|
|
|
|
|
|
} |
620
|
0
|
|
|
|
|
0
|
return $? |
621
|
|
|
|
|
|
|
} |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
sub _fmt_outpath_contents($) { |
624
|
0
|
|
0
|
0
|
|
0
|
my $outpath = $_[0]->{outpath} // oops; |
625
|
0
|
0
|
|
|
|
0
|
return "(outpath does not exist)" unless -e $outpath; |
626
|
0
|
0
|
|
|
|
0
|
return "(outpath is a file)" if -f $outpath; |
627
|
0
|
0
|
|
|
|
0
|
return "(outpath is a STRANGE OBJECT)" unless -d $outpath; |
628
|
|
|
|
|
|
|
"\n outpath contains: " |
629
|
0
|
|
|
|
|
0
|
.join(", ",map{qsh basename $_} path($outpath)->children); |
|
0
|
|
|
|
|
0
|
|
630
|
|
|
|
|
|
|
} |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
my $tempdir; |
633
|
|
|
|
|
|
|
sub _create_tempdir_if_needed($) { |
634
|
2
|
|
|
2
|
|
4
|
my $opts = shift; |
635
|
|
|
|
|
|
|
# Keep a per-process persistent temp directory, deleted at process exit. |
636
|
|
|
|
|
|
|
# It contains result files when the user did not specify {outpath}, |
637
|
|
|
|
|
|
|
# plus a cache of as-yet unrequested sheet .csv files, used when the |
638
|
|
|
|
|
|
|
# external tool can only extract all sheets, not a single sheet by name: |
639
|
|
|
|
|
|
|
# |
640
|
|
|
|
|
|
|
# tempdir/ |
641
|
|
|
|
|
|
|
# <ifbase>_<sig>.xlsx etc. # single file returned to user |
642
|
|
|
|
|
|
|
# <ifbase>_<sig>/*.csv # directory returned to user |
643
|
|
|
|
|
|
|
# <ifbase>_<sig>_csvcache/*.csv |
644
|
|
|
|
|
|
|
# |
645
|
|
|
|
|
|
|
# <ifbase> is derived from the intput file name, and <sig> is a fingerprint |
646
|
|
|
|
|
|
|
# based on input file's dev, inode, and modification timestamp. |
647
|
|
|
|
|
|
|
# |
648
|
2
|
|
66
|
|
|
12
|
$tempdir //= do{ |
649
|
|
|
|
|
|
|
#(my $template = __PACKAGE__."_XXXXX") =~ s/::/-/g; |
650
|
|
|
|
|
|
|
#Path::Tiny->tempdir($template) |
651
|
1
|
|
|
|
|
3
|
my $pid = $$; |
652
|
1
|
|
|
|
|
5
|
my $user = _get_username(); |
653
|
1
|
|
|
|
|
9
|
(my $dname = __PACKAGE__."_${user}_${pid}_tempdir") =~ s/::/-/g; |
654
|
1
|
|
|
|
|
17
|
(my $path = path(File::Spec->tmpdir)->child($dname))->mkpath; |
655
|
1
|
|
|
|
|
374
|
$path |
656
|
|
|
|
|
|
|
}; |
657
|
|
|
|
|
|
|
} |
658
|
3
|
100
|
|
3
|
|
7623
|
END{ $tempdir->remove_tree if $tempdir; } |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
# Compose a unique path under $tempdir. |
661
|
|
|
|
|
|
|
# This is *not* a "tempfile" or "tempdir" object which auto-destructs, |
662
|
|
|
|
|
|
|
# in fact it does not even exist yet and we don't know here which it will be. |
663
|
|
|
|
|
|
|
# Either the user must remove it when they are done with it, or it will |
664
|
|
|
|
|
|
|
# be removed when $tempdir is removed at process exit. |
665
|
|
|
|
|
|
|
# |
666
|
|
|
|
|
|
|
sub _path_under_tempdir($@) { |
667
|
0
|
|
|
0
|
|
0
|
my $opts = shift; |
668
|
|
|
|
|
|
|
my %args = ( |
669
|
0
|
|
|
|
|
0
|
words => [$opts->{ifbase}, $opts->{sheetname}], |
670
|
|
|
|
|
|
|
@_ |
671
|
|
|
|
|
|
|
); |
672
|
0
|
|
|
|
|
0
|
my $bname = join "_", grep{defined} @{$args{words}}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
673
|
|
|
|
|
|
|
# Collisions occur when recursing to emulate Extract-by-name, |
674
|
|
|
|
|
|
|
# or if the user repeatedly reads the same thing, etc. |
675
|
0
|
|
|
|
|
0
|
state $seqnums = {}; |
676
|
0
|
0
|
|
|
|
0
|
if ($seqnums->{$bname}++) { |
677
|
0
|
|
|
|
|
0
|
$bname .= "_".$seqnums->{$bname}; # append unique sequence number |
678
|
|
|
|
|
|
|
} |
679
|
0
|
0
|
|
|
|
0
|
$bname .= ".$args{suf}" if $args{suf}; |
680
|
0
|
|
|
|
|
0
|
return $tempdir->child($bname); |
681
|
|
|
|
|
|
|
} |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
# Compose csv cache subdir path |
684
|
|
|
|
|
|
|
sub _cachedir($) { |
685
|
0
|
|
|
0
|
|
0
|
my $opts = shift; |
686
|
0
|
|
|
|
|
0
|
_path_under_tempdir($opts,words => [$opts->{ifbase}, "csvcache"]); |
687
|
|
|
|
|
|
|
} |
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
## Copy an ephemeral temp file to a path under tempdir if needed |
690
|
|
|
|
|
|
|
#sub _make_file_permanent($$) { |
691
|
|
|
|
|
|
|
# my ($opts, $path) = @_; |
692
|
|
|
|
|
|
|
# if (eval{ $path->cached_temp }) { # didn't throw |
693
|
|
|
|
|
|
|
# my $suf = $path->basename =~ /\.(\w+)$/a ? $1 : undef; |
694
|
|
|
|
|
|
|
# my $newpath = _path_under_tempdir($opts, suf => $suf); |
695
|
|
|
|
|
|
|
# $path->move($newpath); |
696
|
|
|
|
|
|
|
# return $newpath |
697
|
|
|
|
|
|
|
# } else { |
698
|
|
|
|
|
|
|
# return $path |
699
|
|
|
|
|
|
|
# } |
700
|
|
|
|
|
|
|
#} |
701
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
sub _convert_using_openlibre($$$) { |
703
|
0
|
|
|
0
|
|
0
|
my ($opts, $src, $dst) = @_; |
704
|
0
|
0
|
|
0
|
|
0
|
oops unless all{ $opts->{$_} } qw/cvt_from cvt_to/; |
|
0
|
|
|
|
|
0
|
|
705
|
0
|
0
|
0
|
|
|
0
|
oops if $opts->{allsheets} && ! _openlibre_supports_allsheets(); |
706
|
0
|
0
|
0
|
|
|
0
|
oops if $opts->{sheetname} && ! _openlibre_supports_named_sheet(); |
707
|
0
|
|
|
|
|
0
|
my $debug = $opts->{debug}; |
708
|
|
|
|
|
|
|
|
709
|
0
|
|
0
|
|
|
0
|
my $prog = openlibreoffice_path() // oops; |
710
|
|
|
|
|
|
|
|
711
|
0
|
|
|
|
|
0
|
my $saved_UserInstallation = $ENV{UserInstallation}; |
712
|
|
|
|
|
|
|
# URI format is file://server/path where 'server' is empty. "file://path" is |
713
|
|
|
|
|
|
|
# "never correct, but is often used" en.wikipedia.org/wiki/File_URI_scheme |
714
|
|
|
|
|
|
|
# Correct examples: file::///tmp/something file:///C:/somewhere |
715
|
0
|
|
|
|
|
0
|
$ENV{UserInstallation} = URI::file->new(_get_tool_profdir($prog)->canonpath); |
716
|
0
|
0
|
|
|
|
0
|
warn "Temporarily set UserInstallation=$ENV{UserInstallation}\n" if $debug; |
717
|
|
|
|
|
|
|
scope_guard { |
718
|
0
|
0
|
|
0
|
|
0
|
if (defined $saved_UserInstallation) { |
719
|
0
|
|
|
|
|
0
|
$ENV{UserInstallation} = $saved_UserInstallation; |
720
|
|
|
|
|
|
|
} else { |
721
|
|
|
|
|
|
|
delete $ENV{UserInstallation} |
722
|
0
|
|
|
|
|
0
|
} |
723
|
0
|
|
|
|
|
0
|
}; |
724
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
# The --convert-to argument is "suffix:filtername:filteropts" |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
# I think (not certain) that we can only specify the encoding of CSV files, |
728
|
|
|
|
|
|
|
# either as input or output; .xlsx and .ods spreadsheets (which are based |
729
|
|
|
|
|
|
|
# on XML) could in principle use any encoding internally, but I'm not sure |
730
|
|
|
|
|
|
|
# we can control that, nor should anyone ever need to. |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
# REFERENCES: |
733
|
|
|
|
|
|
|
# https://help.libreoffice.org/7.5/en-US/text/shared/guide/start_parameters.html?&DbPAR=SHARED&System=UNIX |
734
|
|
|
|
|
|
|
# http://wiki.openoffice.org/wiki/Documentation/DevGuide/Spreadsheets/Filter_Options |
735
|
|
|
|
|
|
|
# https://wiki.documentfoundation.org/Documentation/DevGuide/Spreadsheet_Documents#Filter_Options_for_the_CSV_Filter |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
# I think we never want to specify the filter unless we have parameters |
738
|
|
|
|
|
|
|
# for it. Currently that is only for csv. |
739
|
|
|
|
|
|
|
# If no filter is specified, the suffix (e.g. 'ods') should be enough |
740
|
0
|
|
|
|
|
0
|
state $suf2ofilter = { |
741
|
|
|
|
|
|
|
csv => "Text - txt - csv (StarCalc)", |
742
|
|
|
|
|
|
|
txt => "Text - txt - csv (StarCalc)", |
743
|
|
|
|
|
|
|
#xls => "MS Excel 97", |
744
|
|
|
|
|
|
|
#xlsx => "Calc MS Excel 2007 XML", |
745
|
|
|
|
|
|
|
#ods => "calc8", |
746
|
|
|
|
|
|
|
}; |
747
|
|
|
|
|
|
|
|
748
|
0
|
|
0
|
|
|
0
|
my $ifilter = $opts->{soffice_infilter} //= do{ |
749
|
0
|
0
|
|
|
|
0
|
if ($opts->{cvt_from} eq "csv") { |
750
|
0
|
0
|
|
|
|
0
|
my $filter_name = $suf2ofilter->{$opts->{cvt_from}} or oops; |
751
|
0
|
|
|
|
|
0
|
my $enc = $opts->{input_encoding}; |
752
|
0
|
|
|
|
|
0
|
my $charset = _name2LOcharsetnum($enc); # dies if unknown enc |
753
|
0
|
|
|
|
|
0
|
my $colformats = ""; |
754
|
0
|
0
|
|
|
|
0
|
if (my $cf = $opts->{col_formats}) { |
755
|
0
|
0
|
|
|
|
0
|
$cf = [split /[\/,]/, $cf] unless ref($cf); # fmtA/fmtB/... |
756
|
0
|
|
|
|
|
0
|
for (my $ix=0; $ix <= $#$cf; $ix++) { |
757
|
0
|
|
0
|
|
|
0
|
local $_ = $cf->[$ix] // 1; |
758
|
0
|
0
|
0
|
|
|
0
|
m#^([123459]|10)$# |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
759
|
|
|
|
|
|
|
|| s#^standard$#1#i |
760
|
|
|
|
|
|
|
|| s#^text$#2#i |
761
|
|
|
|
|
|
|
|| s#^M+/D+/Y+$#3#i |
762
|
|
|
|
|
|
|
|| s#^D+/M+/Y+$#4#i |
763
|
|
|
|
|
|
|
|| s#^Y+/M+/D+$#5#i |
764
|
|
|
|
|
|
|
|| s#^ignore$#9#i |
765
|
|
|
|
|
|
|
|| s#^US.*English$#10#i |
766
|
|
|
|
|
|
|
|| croak "Unknown format code '$_' in {col_formats}"; |
767
|
0
|
0
|
|
|
|
0
|
$colformats .= "/" if $colformats; |
768
|
0
|
|
|
|
|
0
|
$colformats .= ($ix+1)."/$_"; |
769
|
|
|
|
|
|
|
} |
770
|
|
|
|
|
|
|
} |
771
|
|
|
|
|
|
|
$filter_name.":" |
772
|
|
|
|
|
|
|
# Tokens 1-4: FldSep=',' TxtDelim='"' Charset FirstLineNum |
773
|
|
|
|
|
|
|
#. "44,34,$charset,1" |
774
|
|
|
|
|
|
|
. ord($opts->{sep_char}//",")."," |
775
|
0
|
|
0
|
|
|
0
|
. ord($opts->{quote_char}//'"')."," |
|
|
|
0
|
|
|
|
|
776
|
|
|
|
|
|
|
. "$charset,1" |
777
|
|
|
|
|
|
|
# Token 5: Cell format codes: |
778
|
|
|
|
|
|
|
# If variable-width cells (the norm): colnum/fmt/colnum/fmt... |
779
|
|
|
|
|
|
|
# colnum: 1-based column number |
780
|
|
|
|
|
|
|
# fmt: 1=Std 2=Text 3=MM/DD/YY 4=DD/MM/YY 5=YY/MM/DD 6-8 unused |
781
|
|
|
|
|
|
|
# 9=ignore field (do not import), |
782
|
|
|
|
|
|
|
# 10=US-English content (e.g. 3.14 not 3,14) |
783
|
|
|
|
|
|
|
# (I'm guessing 1=Std means use current lang [or per Tok 6?]) |
784
|
|
|
|
|
|
|
# If fixed-width cells... [something else] |
785
|
|
|
|
|
|
|
. ",$colformats" |
786
|
|
|
|
|
|
|
# Token 6: MS-LCID Language Id; 0 or omitted means UI language |
787
|
|
|
|
|
|
|
. "," # default: false |
788
|
|
|
|
|
|
|
# Token 7: On input: true => Quoted cells are always read a 'text', |
789
|
|
|
|
|
|
|
# effectively disabling Token 8. This must be false to recognize dates |
790
|
|
|
|
|
|
|
# like "Jan 1, 2000" which by necessity must be quoted for the comma, |
791
|
|
|
|
|
|
|
# but will **CORRUPT** zip codes with leading zeroes unless |
792
|
|
|
|
|
|
|
# col_formats overrides (which it does now by default). |
793
|
|
|
|
|
|
|
.",false" # default: false |
794
|
|
|
|
|
|
|
# Token 8: on input: "Detect Special Numbers", i.e. date or time values |
795
|
|
|
|
|
|
|
# in human form, numbers in scientific (expondntial) notation etc. |
796
|
|
|
|
|
|
|
# If false, ONLY decimal numbers (thousands separators ok). |
797
|
|
|
|
|
|
|
.",true" # default: false (for import) |
798
|
|
|
|
|
|
|
# Tokens 9-10: not used on import |
799
|
|
|
|
|
|
|
.",," |
800
|
|
|
|
|
|
|
# Token 11: Remove spaces; trim leading & trailing spaces when reading |
801
|
|
|
|
|
|
|
."," # default: false |
802
|
|
|
|
|
|
|
# Token 12: not use on import |
803
|
|
|
|
|
|
|
."," |
804
|
|
|
|
|
|
|
# Token 13: Import "=..." as formulas instead of text? |
805
|
|
|
|
|
|
|
."," # default: false i.e. do not recognize formulas |
806
|
|
|
|
|
|
|
# Token 14: "Automatically detected since LibreOffice 7.6" [BOM?] |
807
|
|
|
|
|
|
|
."," |
808
|
|
|
|
|
|
|
} |
809
|
|
|
|
|
|
|
else { |
810
|
|
|
|
|
|
|
undef |
811
|
0
|
|
|
|
|
0
|
} |
812
|
|
|
|
|
|
|
}; |
813
|
|
|
|
|
|
|
|
814
|
0
|
|
0
|
|
|
0
|
my $ofilter = $opts->{soffice_outfilter} //= do{ |
815
|
|
|
|
|
|
|
# OutputFilterName[:paramtoken,paramtoken,...] |
816
|
0
|
0
|
|
|
|
0
|
if ($opts->{cvt_to} eq "csv") { |
817
|
0
|
0
|
|
|
|
0
|
my $filter_name = $suf2ofilter->{$opts->{cvt_to}} or oops; |
818
|
0
|
|
|
|
|
0
|
my $enc = $opts->{output_encoding}; |
819
|
0
|
|
|
|
|
0
|
my $charset = _name2LOcharsetnum($enc); # dies if unknown enc |
820
|
|
|
|
|
|
|
$filter_name.":" |
821
|
|
|
|
|
|
|
# Tokens 1-4: FldSep=, TxtDelim=" Charset FirstLineNum |
822
|
|
|
|
|
|
|
#."44,34,$charset,1" |
823
|
|
|
|
|
|
|
. ord($opts->{sep_char}//",")."," |
824
|
|
|
|
|
|
|
. ord($opts->{quote_char}//'"')."," |
825
|
|
|
|
|
|
|
. "$charset,1" |
826
|
|
|
|
|
|
|
# Token 5: Cell format codes. Only used for import? (see above) |
827
|
|
|
|
|
|
|
# What about fixed-width? |
828
|
|
|
|
|
|
|
."," |
829
|
|
|
|
|
|
|
# Token 6: Language identifier (uses Microsoft lang ids) |
830
|
|
|
|
|
|
|
# 1033 means US-English (omitted => use UI's language) |
831
|
|
|
|
|
|
|
."," |
832
|
|
|
|
|
|
|
# Token 7: QuoteAllTextCells |
833
|
|
|
|
|
|
|
# *** true will "quote" even single-bareword cells, which looks |
834
|
|
|
|
|
|
|
# *** bad and makes t/ tests messier, but preserves information |
835
|
|
|
|
|
|
|
# *** that such cells were not numbers or dates, etc. This ensures |
836
|
|
|
|
|
|
|
# *** that Zip codes, etc. with leading zeroes won't be corrupted |
837
|
|
|
|
|
|
|
# *** if converted back into a spreadsheet |
838
|
|
|
|
|
|
|
# Option #1: Specify true to quote all cells on export, then post-process |
839
|
|
|
|
|
|
|
# the result to un-quote obviously safe cells (for yet more overhead). |
840
|
|
|
|
|
|
|
# Option #2: Specify false, and assume the resulting CSV will never |
841
|
|
|
|
|
|
|
# be imported into a spreadsheet except via us, and we pre-scan |
842
|
|
|
|
|
|
|
# the data to generate {col_formats} so will usually be safe. |
843
|
|
|
|
|
|
|
# 5/30/23: Switching to Option #2... |
844
|
|
|
|
|
|
|
.",false" |
845
|
|
|
|
|
|
|
# Token 8: on output: true to store number as numbers; false to |
846
|
|
|
|
|
|
|
# store number cells as text. No UI equivalent. |
847
|
|
|
|
|
|
|
.",true" # default: documented as true (for export) BUT IS NOT! |
848
|
|
|
|
|
|
|
# Token 9: "Save cell contents as shown" |
849
|
|
|
|
|
|
|
# Generally we DO NOT want this because things like dates |
850
|
|
|
|
|
|
|
# can be formatted many different ways. |
851
|
|
|
|
|
|
|
##.",".($opts->{raw_values} ? "false" : "true") |
852
|
|
|
|
|
|
|
.",false" |
853
|
|
|
|
|
|
|
# Token 10: "Export cell formulas" |
854
|
|
|
|
|
|
|
.",false" |
855
|
|
|
|
|
|
|
# Token 11: not used for export |
856
|
|
|
|
|
|
|
."," |
857
|
|
|
|
|
|
|
# Token 12: (LO 7.2+) sheet selections: |
858
|
|
|
|
|
|
|
# 0 or absent => the "first" sheet |
859
|
|
|
|
|
|
|
# 1-N => the Nth sheet (arrgh, can not specify name!!) |
860
|
|
|
|
|
|
|
# -1 => export all sheets to files named filebasenamne.Sheetname.csv |
861
|
|
|
|
|
|
|
.",".($opts->{allsheets} ? -1 : |
862
|
0
|
0
|
0
|
|
|
0
|
$opts->{sheetname} ? die("add named-sheet support here") : |
|
|
0
|
0
|
|
|
|
|
863
|
|
|
|
|
|
|
0) |
864
|
|
|
|
|
|
|
# Token 13: Not used for export |
865
|
|
|
|
|
|
|
."," |
866
|
|
|
|
|
|
|
# Token 14: true to include BOM in the result |
867
|
|
|
|
|
|
|
#."," |
868
|
|
|
|
|
|
|
} |
869
|
|
|
|
|
|
|
else { |
870
|
|
|
|
|
|
|
undef |
871
|
0
|
|
|
|
|
0
|
} |
872
|
|
|
|
|
|
|
}; |
873
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
# We can only control the output directory path, not the name of |
875
|
|
|
|
|
|
|
# an individual result file. If $dst is a directory then the result |
876
|
|
|
|
|
|
|
# could theoretically output into it directly, but instead we always |
877
|
|
|
|
|
|
|
# output to an ephemeral temp directory and then move the results to $dst |
878
|
|
|
|
|
|
|
# |
879
|
|
|
|
|
|
|
# With 'allsheets' the resulting files must be renamed to conform to our |
880
|
|
|
|
|
|
|
# external API (namely SHEETNAME.csv). |
881
|
|
|
|
|
|
|
# |
882
|
|
|
|
|
|
|
# ERROR DETECTION: As of LO 7.5 we always get zero exist status and the |
883
|
|
|
|
|
|
|
# only way to detect errors is to notice that no files were written. |
884
|
|
|
|
|
|
|
# https://bugs.documentfoundation.org/show_bug.cgi?id=155415 |
885
|
|
|
|
|
|
|
# |
886
|
0
|
|
|
|
|
0
|
my $tdir = Path::Tiny->tempdir(path($dst)->basename."_XXXXX"); |
887
|
|
|
|
|
|
|
# will be deleted when $dirpath goes out of scope |
888
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
my @cmd = ($prog, |
890
|
|
|
|
|
|
|
"--headless", "--invisible", |
891
|
|
|
|
|
|
|
"--nolockcheck", "--norestore", |
892
|
|
|
|
|
|
|
"--view", # open read-only in case can't create lockfile |
893
|
|
|
|
|
|
|
$ifilter ? ("--infilter=$ifilter") : (), |
894
|
|
|
|
|
|
|
"--convert-to", |
895
|
0
|
0
|
|
|
|
0
|
$opts->{cvt_to}.($ofilter ? ":$ofilter" : ""), |
|
|
0
|
|
|
|
|
|
896
|
|
|
|
|
|
|
"--outdir", $tdir->canonpath, |
897
|
|
|
|
|
|
|
path($src)->canonpath); |
898
|
|
|
|
|
|
|
|
899
|
0
|
0
|
|
|
|
0
|
unless ($debug) { |
900
|
0
|
|
|
|
|
0
|
$opts->{suppress_stdout} = 1; |
901
|
|
|
|
|
|
|
#$opts->{suppress_stderr} = 1; |
902
|
|
|
|
|
|
|
} |
903
|
|
|
|
|
|
|
|
904
|
0
|
|
|
|
|
0
|
my $cmdstatus = _runcmd($opts, @cmd); |
905
|
|
|
|
|
|
|
|
906
|
0
|
0
|
|
|
|
0
|
if ($cmdstatus != 0) { |
907
|
|
|
|
|
|
|
# This should never happen, see ERROR DETECTION above |
908
|
|
|
|
|
|
|
confess sprintf("($$) UNEXPECTED FAILURE, wstat=0x%04x\n",$cmdstatus), |
909
|
|
|
|
|
|
|
" converting '$opts->{inpath}' to $opts->{cvt_to}\n", |
910
|
0
|
|
|
|
|
0
|
" Command was: ",join(" ",map{qsh} @cmd); |
|
0
|
|
|
|
|
0
|
|
911
|
|
|
|
|
|
|
} |
912
|
|
|
|
|
|
|
|
913
|
0
|
|
|
|
|
0
|
my @result_files = path($tdir)->children; |
914
|
0
|
0
|
|
|
|
0
|
btw dvis '>> @result_files' if $debug; |
915
|
0
|
0
|
|
|
|
0
|
if (@result_files == 0) { |
916
|
0
|
0
|
|
|
|
0
|
croak qsh($src)." is missing or unreadable\n", "cmd: @cmd\n" |
917
|
|
|
|
|
|
|
unless -r $src; |
918
|
0
|
|
|
|
|
0
|
croak "Something went wrong, ",path($prog)->basename," produced no output\n", |
919
|
|
|
|
|
|
|
"cmd: @cmd\n" |
920
|
|
|
|
|
|
|
} |
921
|
|
|
|
|
|
|
|
922
|
0
|
0
|
|
|
|
0
|
if ($opts->{allsheets}) { |
923
|
|
|
|
|
|
|
# Rename files to match our API (omit the spreadsheetbasename- prefix) |
924
|
0
|
|
|
|
|
0
|
foreach (@result_files) { |
925
|
0
|
|
|
|
|
0
|
my $dir = $_->parent; # Like dirname but including Volume: |
926
|
0
|
|
|
|
|
0
|
my $base = $_->basename; |
927
|
0
|
0
|
|
|
|
0
|
(my $newbase = $base) =~ s/^\Q$opts->{ifbase}\E-// or oops dvis '$base $opts'; |
928
|
0
|
|
|
|
|
0
|
my $newpath = $dir->child($newbase)->canonpath; |
929
|
0
|
|
|
|
|
0
|
my $oldpath = path($_)->canonpath; |
930
|
0
|
0
|
|
|
|
0
|
btw ">> Renaming $oldpath -> $newbase\n" if $debug; |
931
|
0
|
0
|
|
|
|
0
|
rename ($oldpath, $newpath) or oops "$!"; |
932
|
0
|
|
|
|
|
0
|
$_ = $newpath; # update @result_files |
933
|
|
|
|
|
|
|
} |
934
|
|
|
|
|
|
|
} |
935
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
# Move the results to $dst |
937
|
0
|
0
|
|
|
|
0
|
if (-e $dst) { |
938
|
0
|
0
|
|
|
|
0
|
croak "$dst must be a directory if it pre-exists\n" unless -d $dst; |
939
|
0
|
0
|
|
|
|
0
|
btw ">> Moving results -> $dst\n" if $debug; |
940
|
0
|
|
|
|
|
0
|
foreach (@result_files) { |
941
|
0
|
0
|
|
|
|
0
|
btw ">>> move $_ -> $dst" if $debug; |
942
|
0
|
|
|
|
|
0
|
File::Copy::move($_, $dst) |
943
|
|
|
|
|
|
|
} |
944
|
0
|
0
|
|
|
|
0
|
btw ">> Now $dst contains: ",avis($dst->children) if $debug; |
945
|
|
|
|
|
|
|
} else { |
946
|
0
|
0
|
|
|
|
0
|
if ($opts->{allsheets}) { |
947
|
0
|
0
|
|
|
|
0
|
btw ">> dirmove $tdir -> $dst\n" if $debug; |
948
|
0
|
0
|
|
|
|
0
|
rename($tdir, $dst) or File::Copy::dirmove($tdir, $dst); |
949
|
|
|
|
|
|
|
} else { |
950
|
0
|
0
|
|
|
|
0
|
croak "Expecting only one result file, not @result_files" |
951
|
|
|
|
|
|
|
if @result_files > 1; |
952
|
0
|
0
|
|
|
|
0
|
btw ">> move $result_files[0] -> $dst\n" if $debug; |
953
|
0
|
|
|
|
|
0
|
File::Copy::move($result_files[0], $dst); |
954
|
|
|
|
|
|
|
} |
955
|
|
|
|
|
|
|
} |
956
|
|
|
|
|
|
|
}#_convert_using_openlibre |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
sub _convert_using_ssconvert($$$) { |
959
|
0
|
|
|
0
|
|
0
|
my ($opts, $src, $dst) = @_; |
960
|
0
|
|
|
|
|
0
|
confess "Deprecated with extreme prejudice"; # no longer supported |
961
|
|
|
|
|
|
|
## |
962
|
|
|
|
|
|
|
## foreach (qw/inpath cvt_to /) |
963
|
|
|
|
|
|
|
## { oops "missing opts->{$_}" unless exists $opts->{$_} } |
964
|
|
|
|
|
|
|
## |
965
|
|
|
|
|
|
|
## my $eff_outpath = $opts->{outpath}; |
966
|
|
|
|
|
|
|
## if (my $prog=which("ssconvert")) { |
967
|
|
|
|
|
|
|
## my $enc = _get_encodings_from_opts($opts); |
968
|
|
|
|
|
|
|
## $enc //= "UTF-8"; # default |
969
|
|
|
|
|
|
|
## my @options; |
970
|
|
|
|
|
|
|
## if ($opts->{cvt_to} eq "csv") { |
971
|
|
|
|
|
|
|
## push @options, '--export-type=Gnumeric_stf:stf_assistant'; |
972
|
|
|
|
|
|
|
## my @dashO_terms = ("format=preserve", "transliterate-mode=escape"); |
973
|
|
|
|
|
|
|
## push @dashO_terms, "charset='${enc}'" if defined($enc); |
974
|
|
|
|
|
|
|
## if ($opts->{sheetname}) { |
975
|
|
|
|
|
|
|
## push @dashO_terms, "sheet='$opts->{sheetname}'"; |
976
|
|
|
|
|
|
|
## } |
977
|
|
|
|
|
|
|
## if ($opts->{allsheets}) { |
978
|
|
|
|
|
|
|
## #If both {allsheets} and {sheetname} are specified, only a single |
979
|
|
|
|
|
|
|
## # .csv file will be in the output directory |
980
|
|
|
|
|
|
|
## croak "'allsheets' option: 'outpath' must specify an existing directory" |
981
|
|
|
|
|
|
|
## unless -d $eff_outpath; |
982
|
|
|
|
|
|
|
## $eff_outpath = catfile($eff_outpath, "%s.csv"); |
983
|
|
|
|
|
|
|
## push @options, "--export-file-per-sheet"; |
984
|
|
|
|
|
|
|
## } |
985
|
|
|
|
|
|
|
## elsif ($opts->{sheetname}) { |
986
|
|
|
|
|
|
|
## # handled above |
987
|
|
|
|
|
|
|
## } |
988
|
|
|
|
|
|
|
## else { |
989
|
|
|
|
|
|
|
## # A backwards-incompatible change to ssconvert stopped extracting |
990
|
|
|
|
|
|
|
## # the "current" sheet by default; now all sheets are concatenated! |
991
|
|
|
|
|
|
|
## # See https://gitlab.gnome.org/GNOME/gnumeric/issues/461 |
992
|
|
|
|
|
|
|
## # ssconvert verison 1.12.45 supports a new "-O active-sheet=y" option |
993
|
|
|
|
|
|
|
## ## PORTABILITY BUG: Redirection syntax will not work on windows |
994
|
|
|
|
|
|
|
## my ($ssver) = (qx/ssconvert --version 2>&1/ =~ /ssconvert version '?(\d[\d\.]*)/); |
995
|
|
|
|
|
|
|
## if (version::is_lax($ssver) && version->parse($ssver) >= v1.12.45) { |
996
|
|
|
|
|
|
|
## push @dashO_terms, "active-sheet=y"; |
997
|
|
|
|
|
|
|
## } else { |
998
|
|
|
|
|
|
|
## croak("Due to an ssconvert bug, a sheetname must be given.\n", |
999
|
|
|
|
|
|
|
## "(for more information, see comment at ",__FILE__, |
1000
|
|
|
|
|
|
|
## " near line ", (__LINE__-10), ")\n"); |
1001
|
|
|
|
|
|
|
## } |
1002
|
|
|
|
|
|
|
## } |
1003
|
|
|
|
|
|
|
## push @options, '-O', join(" ",@dashO_terms); |
1004
|
|
|
|
|
|
|
## } |
1005
|
|
|
|
|
|
|
## elsif ($opts->{cvt_to} eq 'xlsx') { |
1006
|
|
|
|
|
|
|
## @options = ('--export-type=Gnumeric_Excel:xlsx2'); |
1007
|
|
|
|
|
|
|
## } |
1008
|
|
|
|
|
|
|
## elsif ($opts->{cvt_to} eq 'xls') { |
1009
|
|
|
|
|
|
|
## @options = ('--export-type=Gnumeric_Excel:excel_biff8'); # M'soft Excel 97/2000/XP |
1010
|
|
|
|
|
|
|
## } |
1011
|
|
|
|
|
|
|
## elsif ($opts->{cvt_to} =~ /^od/) { |
1012
|
|
|
|
|
|
|
## @options = ('--export-type=Gnumeric_OpenCalc:odf'); |
1013
|
|
|
|
|
|
|
## } |
1014
|
|
|
|
|
|
|
## elsif ($eff_outpath =~ /\.[a-z]{3,4}$/) { |
1015
|
|
|
|
|
|
|
## # let ssconvert choose based on the output file suffix |
1016
|
|
|
|
|
|
|
## } |
1017
|
|
|
|
|
|
|
## else { |
1018
|
|
|
|
|
|
|
## croak "unrecognized cvt_to='".u($opts->{cvt_to})."' and no outpath suffix"; |
1019
|
|
|
|
|
|
|
## } |
1020
|
|
|
|
|
|
|
## |
1021
|
|
|
|
|
|
|
## my $eff_inpath = $opts->{inpath}; |
1022
|
|
|
|
|
|
|
## if ($opts->{sheetname} && $opts->{inpath} =~ /.csv$/i) { |
1023
|
|
|
|
|
|
|
## # Control generated sheet name by using a symlink to the input file |
1024
|
|
|
|
|
|
|
## # See http://stackoverflow.com/questions/22550050/how-to-convert-csv-to-xls-with-ssconvert |
1025
|
|
|
|
|
|
|
## my $td = catdir($tempdir // oops, "Gnumeric"); |
1026
|
|
|
|
|
|
|
## remove_tree($td); mkdir($td) or die $!; |
1027
|
|
|
|
|
|
|
## $eff_inpath = catfile($td, $opts->{sheetname}); |
1028
|
|
|
|
|
|
|
## symlink $opts->{inpath}, $eff_inpath or die $!; |
1029
|
|
|
|
|
|
|
## fixme: handle unimplmented or no-perms symlink failures |
1030
|
|
|
|
|
|
|
## } |
1031
|
|
|
|
|
|
|
## my @cmd = ($prog, @options, $eff_inpath, $eff_outpath); |
1032
|
|
|
|
|
|
|
## |
1033
|
|
|
|
|
|
|
## my $suppress_stderr = !$opts->{debug}; |
1034
|
|
|
|
|
|
|
## if (0 != _runcmd({%$opts, suppress_stderr => $suppress_stderr}, @cmd)) { |
1035
|
|
|
|
|
|
|
## # Before showing a complicated ssconvert failure with backtrace, |
1036
|
|
|
|
|
|
|
## # check to see if the problem is just a non-existent input file |
1037
|
|
|
|
|
|
|
## { open my $dummy_fh, "<", $eff_inpath or croak "$eff_inpath : $!"; } |
1038
|
|
|
|
|
|
|
## my $failmsg = "($$) Conversion of '$opts->{inpath}' to $eff_outpath failed\n"."cmd: ".qshlist(@cmd)."\n"; |
1039
|
|
|
|
|
|
|
## if ($suppress_stderr) { # repeat showing all output |
1040
|
|
|
|
|
|
|
## if (0 == _runcmd({%$opts, suppress_stderr => 0}, @cmd)) { |
1041
|
|
|
|
|
|
|
## warn "Surprise! Command failed the first time but succeeded on 2nd try!\n"; |
1042
|
|
|
|
|
|
|
## } |
1043
|
|
|
|
|
|
|
## croak $failmsg; |
1044
|
|
|
|
|
|
|
## } |
1045
|
|
|
|
|
|
|
## } |
1046
|
|
|
|
|
|
|
## elsif (! -e $opts->{outpath}) { |
1047
|
|
|
|
|
|
|
## croak "($$) Conversion SILENTLY failed\n(using $prog)\n", |
1048
|
|
|
|
|
|
|
## " cmd: ",qshlist(@cmd),"\n" |
1049
|
|
|
|
|
|
|
## ; |
1050
|
|
|
|
|
|
|
## } |
1051
|
|
|
|
|
|
|
## return ($enc) |
1052
|
|
|
|
|
|
|
## } |
1053
|
|
|
|
|
|
|
## else { |
1054
|
|
|
|
|
|
|
## croak "Can not find ssconvert to convert '$opts->{inpath}' to $opts->{cvt_to}\n", |
1055
|
|
|
|
|
|
|
## "To install ssconvert: sudo apt-get install gnumeric\n"; |
1056
|
|
|
|
|
|
|
## } |
1057
|
|
|
|
|
|
|
} |
1058
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
# Extracts |||SHEETNAME or !SHEETNAME or [SHEETNAME] from a path+sheet |
1060
|
|
|
|
|
|
|
# specification, if present. |
1061
|
|
|
|
|
|
|
# (Lots of historical compatibility issues...) |
1062
|
|
|
|
|
|
|
# In scalar context, returns SHEETNAME or undef. |
1063
|
|
|
|
|
|
|
# INTERNAL USE ONLY: In array context, returns (filepath, SHEETNAME or undef) |
1064
|
|
|
|
|
|
|
sub sheetname_from_spec($) { |
1065
|
6
|
|
|
6
|
1
|
9
|
my $spec = shift; |
1066
|
6
|
|
|
|
|
11
|
local $_; |
1067
|
6
|
|
|
|
|
17
|
my $p = path($spec); |
1068
|
6
|
|
|
|
|
317
|
my $parent = $p->parent; |
1069
|
6
|
|
|
|
|
413
|
my ($base,$sn) = ($p->basename =~ /^(.*) (?| \|\|\|([^\!\[\|]+)$ |
1070
|
|
|
|
|
|
|
| \!([^\!\[\|]+)$ |
1071
|
|
|
|
|
|
|
| \[([^\[\]]+)\]$ |
1072
|
|
|
|
|
|
|
)/x); |
1073
|
6
|
100
|
33
|
|
|
102
|
wantarray ? ($parent->child($base//$p->basename)->stringify, $sn) : $sn |
1074
|
|
|
|
|
|
|
} |
1075
|
|
|
|
|
|
|
sub filepath_from_spec($) { |
1076
|
2
|
|
|
2
|
1
|
6
|
my ($path, undef) = sheetname_from_spec($_[0]); |
1077
|
2
|
|
|
|
|
99
|
$path |
1078
|
|
|
|
|
|
|
} |
1079
|
|
|
|
|
|
|
#Tester |
1080
|
|
|
|
|
|
|
#foreach ("", "/a!b/c", "/a!b/c!sheet1", "/a/b/c[sheet2]", "/a/b/c[bozo]d.xls", |
1081
|
|
|
|
|
|
|
# ) { |
1082
|
|
|
|
|
|
|
# foreach($_, basename($_)) { |
1083
|
|
|
|
|
|
|
# my ($fp,$sn) = sheetname_from_spec($_); |
1084
|
|
|
|
|
|
|
# use open ':std', ':locale'; |
1085
|
|
|
|
|
|
|
# warn ivis '# $_ → $fp $sn\n'; |
1086
|
|
|
|
|
|
|
# my $sn2 = sheetname_from_spec($_); |
1087
|
|
|
|
|
|
|
# die "bug" unless u($sn) eq u($sn2); |
1088
|
|
|
|
|
|
|
# } |
1089
|
|
|
|
|
|
|
#} |
1090
|
|
|
|
|
|
|
#die "TEX"; |
1091
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
# Construct a file + sheetname spec in the preferred form for humans to read |
1093
|
|
|
|
|
|
|
# If sheetname is undef, just return the file path |
1094
|
|
|
|
|
|
|
sub form_spec_with_sheetname($$) { |
1095
|
2
|
|
|
2
|
1
|
7
|
my ($filespec, $sheetname) = @_; |
1096
|
2
|
|
|
|
|
7
|
my $embedded_sheetname = sheetname_from_spec($filespec); |
1097
|
2
|
0
|
33
|
|
|
7
|
croak "conflicting embedded and separate sheetnames given" |
|
|
|
33
|
|
|
|
|
1098
|
|
|
|
|
|
|
if $embedded_sheetname && $sheetname && $embedded_sheetname ne $sheetname; |
1099
|
2
|
|
33
|
|
|
11
|
$sheetname //= $embedded_sheetname; |
1100
|
2
|
|
|
|
|
6
|
my $filepath = filepath_from_spec($filespec); |
1101
|
2
|
50
|
|
|
|
14
|
$sheetname ? "${filepath}[${sheetname}]" : $filepath |
1102
|
|
|
|
|
|
|
#$sheetname ? "${filepath}|||${sheetname}" : $filepath |
1103
|
|
|
|
|
|
|
} |
1104
|
|
|
|
|
|
|
|
1105
|
|
|
|
|
|
|
our $default_input_encodings = "UTF-8,windows-1252,UTF-16BE,UTF-16LE"; |
1106
|
|
|
|
|
|
|
our $default_output_encoding = "UTF-8"; |
1107
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
# Return digested %opts setting |
1109
|
|
|
|
|
|
|
# sheetname, inpath_sans_sheet (as Path::Tiny), encoding or default |
1110
|
|
|
|
|
|
|
sub _process_args($;@) { |
1111
|
2
|
50
|
|
2
|
|
8
|
confess "fix obsolete call to pass linearized options" |
1112
|
|
|
|
|
|
|
if ref($_[0]) eq "HASH"; |
1113
|
2
|
50
|
|
|
|
7
|
my $leading_inpath = ( scalar(@_) % 2 == 1 ? shift(@_) : undef ); |
1114
|
2
|
|
|
|
|
12
|
my %opts = ( |
1115
|
|
|
|
|
|
|
cvt_from => "", |
1116
|
|
|
|
|
|
|
cvt_to => "", |
1117
|
|
|
|
|
|
|
@_, |
1118
|
|
|
|
|
|
|
#verbose => 999, tempdir => "/tmp/J", |
1119
|
|
|
|
|
|
|
); |
1120
|
2
|
50
|
|
|
|
6
|
if (defined $opts{inpath}) { |
1121
|
0
|
0
|
|
|
|
0
|
croak "Initial INPATH arg specified as well as inpath => ... in options" |
1122
|
|
|
|
|
|
|
if defined $leading_inpath; |
1123
|
|
|
|
|
|
|
} else { |
1124
|
2
|
|
33
|
|
|
9
|
$opts{inpath} = $leading_inpath // croak "No inpath was specified"; |
1125
|
|
|
|
|
|
|
} |
1126
|
2
|
50
|
|
|
|
7
|
$opts{verbose}=1 if $opts{debug}; |
1127
|
|
|
|
|
|
|
|
1128
|
|
|
|
|
|
|
# inpath or outpath may have "!sheetname" appended (or alternate syntaxes), |
1129
|
|
|
|
|
|
|
# but may exist only if a separate 'sheetname' option is not specified. |
1130
|
|
|
|
|
|
|
# Input and output can not both be spreadsheets; one must be a CSV. |
1131
|
2
|
50
|
|
|
|
6
|
if (exists($opts{sheet})) { |
1132
|
0
|
|
|
|
|
0
|
carp "WARNING: Deprecated 'sheet' option found (use 'sheetname' instead)\n"; |
1133
|
0
|
0
|
|
|
|
0
|
croak "Both {sheet} and {sheetname} specified" if exists $opts{sheetname}; |
1134
|
0
|
|
|
|
|
0
|
$opts{sheetname} = delete $opts{sheet}; |
1135
|
|
|
|
|
|
|
} |
1136
|
2
|
|
|
|
|
4
|
{ my ($path_sans_sheet, $sheetname, $key); |
|
2
|
|
|
|
|
13
|
|
1137
|
2
|
|
|
|
|
5
|
for my $thiskey ('inpath', 'outpath') { |
1138
|
4
|
|
100
|
|
|
18
|
my $spec = $opts{$thiskey} || next; |
1139
|
2
|
|
|
|
|
11
|
my ($pssn, $sn) = sheetname_from_spec($spec); |
1140
|
2
|
50
|
|
|
|
113
|
if (defined $sn) { |
1141
|
0
|
0
|
|
|
|
0
|
croak "A sheetname is embeeded in both ", |
1142
|
|
|
|
|
|
|
"'$thiskey' ($opts{$thiskey}) and '$key' ($opts{$key})\n" |
1143
|
|
|
|
|
|
|
if $sheetname; |
1144
|
0
|
|
|
|
|
0
|
($path_sans_sheet, $sheetname, $key) = ($pssn, $sn, $thiskey); |
1145
|
|
|
|
|
|
|
} |
1146
|
|
|
|
|
|
|
} |
1147
|
2
|
50
|
|
|
|
8
|
if ($opts{sheetname}) { |
|
|
50
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
croak "'sheetname' option conflicts with embedded sheet name\n", |
1149
|
|
|
|
|
|
|
" sheetname => ", qsh($opts{sheetname}),"\n", |
1150
|
|
|
|
|
|
|
" $key => ", qsh($opts{$key}),"\n" |
1151
|
0
|
0
|
0
|
|
|
0
|
if defined($sheetname) && $sheetname ne $opts{sheetname}; |
1152
|
|
|
|
|
|
|
} |
1153
|
|
|
|
|
|
|
elsif (defined $sheetname) { |
1154
|
|
|
|
|
|
|
btw "(extracted sheet name \"$sheetname\" from $key)\n" |
1155
|
0
|
0
|
|
|
|
0
|
if $opts{verbose}; |
1156
|
0
|
|
|
|
|
0
|
$opts{sheetname} = $sheetname; |
1157
|
|
|
|
|
|
|
} |
1158
|
|
|
|
|
|
|
$opts{inpath_sans_sheet} = path( |
1159
|
|
|
|
|
|
|
($key && $key eq 'inpath') ? $path_sans_sheet : $opts{inpath} |
1160
|
2
|
50
|
33
|
|
|
10
|
); |
1161
|
|
|
|
|
|
|
} |
1162
|
|
|
|
|
|
|
# Input file basename sans any .suffix |
1163
|
2
|
|
|
|
|
101
|
$opts{ifbase} = $opts{inpath_sans_sheet}->basename(qr/\.[^.]+/); |
1164
|
|
|
|
|
|
|
|
1165
|
2
|
|
|
|
|
124
|
%opts |
1166
|
|
|
|
|
|
|
}#_process_args |
1167
|
|
|
|
|
|
|
|
1168
|
|
|
|
|
|
|
# Extract the of encoding(s) specified in an iolayers string |
1169
|
|
|
|
|
|
|
# Parse iolayers string, returning ($prefix,[encodings],$suffix) |
1170
|
|
|
|
|
|
|
# For example from ":raw:encodings(utf8,windows-1252):zz" the output |
1171
|
|
|
|
|
|
|
# would be (":raw", [:utf8","windows-1252"], ":zz") |
1172
|
|
|
|
|
|
|
sub _parse_iolayers($) { |
1173
|
0
|
|
0
|
0
|
|
0
|
local $_ = (shift) // ""; |
1174
|
0
|
0
|
|
|
|
0
|
/\A(<prefix>.*?) |
1175
|
|
|
|
|
|
|
(<encspec>:utf8|:encoding\(([^\)]+)\)) |
1176
|
|
|
|
|
|
|
(<suffix>.*?)\z/ or croak "Invalid iolayers spec '$_'\n"; |
1177
|
0
|
|
|
|
|
0
|
(my $prefix, $_, my $suffix) = ($+{prefix}, $+{encspec}, $+{suffix}); |
1178
|
0
|
0
|
0
|
|
|
0
|
/^:(utf8)$/ || /^:encoding\(([^\)]+)\)$/ or oops($_); |
1179
|
0
|
|
|
|
|
0
|
my $enclist = [split /,/, $1]; # comma,separated,list,of,encodings |
1180
|
0
|
|
|
|
|
0
|
($prefix, $enclist, $suffix); |
1181
|
|
|
|
|
|
|
} |
1182
|
|
|
|
|
|
|
|
1183
|
|
|
|
|
|
|
# Detect cvt_from and cvt_to from filenames, or peeking at the data. |
1184
|
|
|
|
|
|
|
# If input is CSV, detect encoding, separator and quote characters; |
1185
|
|
|
|
|
|
|
# add quotes to values with leading zeroes (e.g. Zip codes) which would |
1186
|
|
|
|
|
|
|
# otherwise be corrupted by being read as numbers instead of text strings. |
1187
|
|
|
|
|
|
|
# The modified data is written to a temp file |
1188
|
|
|
|
|
|
|
# Set default output_encoding if not specified |
1189
|
|
|
|
|
|
|
# RETURNS: The effective input path, either inpath_sans_sheet or a tempfile |
1190
|
|
|
|
|
|
|
sub _determine_enc_tofrom($) { |
1191
|
|
|
|
|
|
|
my $opts = shift; |
1192
|
|
|
|
|
|
|
my $debug = $opts->{debug}; |
1193
|
|
|
|
|
|
|
# Skip to ==BODY== below |
1194
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
my sub determine_input_encoding($) { |
1196
|
|
|
|
|
|
|
my $r2octets = shift; |
1197
|
|
|
|
|
|
|
# If user specified one encoding, use it; if user specified list, try them. |
1198
|
|
|
|
|
|
|
# If user did not specify, the default is a list to try. |
1199
|
|
|
|
|
|
|
$opts->{input_encoding} //= $default_input_encodings; |
1200
|
|
|
|
|
|
|
my @enclist = split m#,#, $opts->{input_encoding}; |
1201
|
|
|
|
|
|
|
return |
1202
|
|
|
|
|
|
|
if @enclist == 1; |
1203
|
|
|
|
|
|
|
$$r2octets //= $opts->{inpath_sans_sheet}->slurp_raw; |
1204
|
|
|
|
|
|
|
for my $enc (@enclist) { |
1205
|
|
|
|
|
|
|
eval { decode($enc, $$r2octets, Encode::FB_CROAK|Encode::LEAVE_SRC) }; |
1206
|
|
|
|
|
|
|
if ($@) { |
1207
|
|
|
|
|
|
|
btw "Input encoding '$enc' did not work...($@)\n" if $debug; |
1208
|
|
|
|
|
|
|
next; |
1209
|
|
|
|
|
|
|
} |
1210
|
|
|
|
|
|
|
btw "Input encoding '$enc' seems to work.\n" if $debug; |
1211
|
|
|
|
|
|
|
@enclist = ($enc); |
1212
|
|
|
|
|
|
|
last |
1213
|
|
|
|
|
|
|
} |
1214
|
|
|
|
|
|
|
#croak "Could not detect encoding of $opts->{inpath_sans_sheet}\n" |
1215
|
|
|
|
|
|
|
confess "Could not detect encoding of $opts->{inpath_sans_sheet}\n" |
1216
|
|
|
|
|
|
|
if @enclist > 1; |
1217
|
|
|
|
|
|
|
$opts->{input_encoding} = $enclist[0]; |
1218
|
|
|
|
|
|
|
} #determine_input_encoding |
1219
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
my sub readparse_csv($@) { |
1221
|
|
|
|
|
|
|
my $fh = shift; |
1222
|
|
|
|
|
|
|
my %csvopts = ( |
1223
|
|
|
|
|
|
|
@sane_CSV_read_options, |
1224
|
|
|
|
|
|
|
defined($opts->{quote_char}) ? (quote_char=>$opts->{quote_char}) : (), |
1225
|
|
|
|
|
|
|
defined($opts->{sep_char}) ? (sep_char=>$opts->{sep_char}) : (), |
1226
|
|
|
|
|
|
|
auto_diag => 2, # throw on error |
1227
|
|
|
|
|
|
|
@_ |
1228
|
|
|
|
|
|
|
); |
1229
|
|
|
|
|
|
|
$csvopts{escape_char} = $csvopts{quote_char}; # must always be the same |
1230
|
|
|
|
|
|
|
|
1231
|
|
|
|
|
|
|
my $csv = Text::CSV->new (\%csvopts) |
1232
|
|
|
|
|
|
|
or croak "Text::CSV->new: ", Text::CSV->error_diag(), |
1233
|
|
|
|
|
|
|
dvis('\n## %csvopts\n'); |
1234
|
|
|
|
|
|
|
my @rows; |
1235
|
|
|
|
|
|
|
while (my $F = $csv->getline( $fh )) { |
1236
|
|
|
|
|
|
|
push(@rows, $F); |
1237
|
|
|
|
|
|
|
} |
1238
|
|
|
|
|
|
|
\@rows |
1239
|
|
|
|
|
|
|
} |
1240
|
|
|
|
|
|
|
|
1241
|
|
|
|
|
|
|
my sub open_input($) { |
1242
|
|
|
|
|
|
|
my $r2octets = shift; |
1243
|
|
|
|
|
|
|
oops unless $opts->{input_encoding}; |
1244
|
|
|
|
|
|
|
my $fh; |
1245
|
|
|
|
|
|
|
my $pathish = defined($$r2octets) |
1246
|
|
|
|
|
|
|
? \$$r2octets : $opts->{inpath_sans_sheet}; |
1247
|
1
|
|
|
1
|
|
7
|
open($fh, "<:encoding($opts->{input_encoding})", $pathish) |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
7
|
|
1248
|
|
|
|
|
|
|
or die "$pathish : $!"; |
1249
|
|
|
|
|
|
|
$fh |
1250
|
|
|
|
|
|
|
} |
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
my sub determine_csv_q_sep($$) { |
1253
|
|
|
|
|
|
|
my ($r2octets, $r2rows) = @_; |
1254
|
|
|
|
|
|
|
return |
1255
|
|
|
|
|
|
|
if defined($opts->{quote_char}) && defined($opts->{sep_char}); |
1256
|
|
|
|
|
|
|
|
1257
|
|
|
|
|
|
|
my $fh = open_input($r2octets); |
1258
|
|
|
|
|
|
|
|
1259
|
|
|
|
|
|
|
# my $chars; |
1260
|
|
|
|
|
|
|
# if (defined($$r2octets) |
1261
|
|
|
|
|
|
|
# $chars = decode($opts->{input_encoding},$$r2octets,Encode::FB_CROAK); |
1262
|
|
|
|
|
|
|
# } |
1263
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
# Try combinations starting with the most-common '"' and ',' while |
1265
|
|
|
|
|
|
|
# parsing the file for unsafe unquoted values (throws on syntax error). |
1266
|
|
|
|
|
|
|
# The expectation is that the first try usually succeeds |
1267
|
|
|
|
|
|
|
Q: |
1268
|
|
|
|
|
|
|
for my $q (defined($opts->{quote_char}) |
1269
|
|
|
|
|
|
|
? ($opts->{quote_char}) : ("\"", "'")) { |
1270
|
|
|
|
|
|
|
my $found_q; |
1271
|
|
|
|
|
|
|
SEP: |
1272
|
|
|
|
|
|
|
for my $sep (defined($opts->{sep_char}) |
1273
|
|
|
|
|
|
|
? ($opts->{sep_char}) : (",","\t")) { |
1274
|
|
|
|
|
|
|
btw dvis '--- TRYING $q $sep ---' if $debug; |
1275
|
|
|
|
|
|
|
# # Preliminary check for an illegal use of the quote char |
1276
|
|
|
|
|
|
|
# if (defined($chars) |
1277
|
|
|
|
|
|
|
# && $chars =~ /[^${q}${sep}\x{0D}\x{0A}] |
1278
|
|
|
|
|
|
|
# ${q} |
1279
|
|
|
|
|
|
|
# (?=[^${q}${sep}\x{0D}\x{0A}] | \z)/gx) { |
1280
|
|
|
|
|
|
|
# btw ivis '>>>quote_char CAN NOT BE $q with sep=$sep because q exists mid-field before pos ${\(pos($chars))}' |
1281
|
|
|
|
|
|
|
# if $debug; |
1282
|
|
|
|
|
|
|
# next SEP |
1283
|
|
|
|
|
|
|
# } |
1284
|
|
|
|
|
|
|
$$r2rows = eval{ readparse_csv($fh, quote_char=>$q, sep_char=>$sep) }; |
1285
|
|
|
|
|
|
|
if ($@ eq "") { |
1286
|
|
|
|
|
|
|
warn ivis '>> Detected quote_char=$q sep_char=$sep\n' if $debug; |
1287
|
|
|
|
|
|
|
$opts->{quote_char} = $q; |
1288
|
|
|
|
|
|
|
$opts->{sep_char} = $sep; |
1289
|
|
|
|
|
|
|
last Q; |
1290
|
|
|
|
|
|
|
} |
1291
|
|
|
|
|
|
|
warn vis '$@\nq=$q sep=$sep did not work...\n' if $debug; |
1292
|
|
|
|
|
|
|
seek $fh, 0, SEEK_SET; |
1293
|
|
|
|
|
|
|
} |
1294
|
|
|
|
|
|
|
} |
1295
|
|
|
|
|
|
|
unless (defined($$r2rows)) { |
1296
|
|
|
|
|
|
|
confess "Input file is not valid CSV (or we have a bug)\n" |
1297
|
|
|
|
|
|
|
} |
1298
|
|
|
|
|
|
|
}#determine_csv_q_sep |
1299
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
my sub determine_csv_col_formats($$) { |
1301
|
|
|
|
|
|
|
my ($r2octets, $r2rows) = @_; |
1302
|
|
|
|
|
|
|
return |
1303
|
|
|
|
|
|
|
if defined $opts->{col_formats}; |
1304
|
|
|
|
|
|
|
$$r2rows //= do{ |
1305
|
|
|
|
|
|
|
my $fh = open_input($r2octets); |
1306
|
|
|
|
|
|
|
readparse_csv($fh); |
1307
|
|
|
|
|
|
|
}; |
1308
|
|
|
|
|
|
|
my $max_cols = 0; for my $row (@{ $$r2rows }) { $max_cols = @$row if $max_cols < @$row } |
1309
|
|
|
|
|
|
|
state $curr_yy = (localtime(time))[5]; |
1310
|
|
|
|
|
|
|
my @col_formats; |
1311
|
|
|
|
|
|
|
my sub recognized($$$$;$) { |
1312
|
|
|
|
|
|
|
my ($cx, $rx, $thing, $format, $as_msg) = @_; |
1313
|
|
|
|
|
|
|
$col_formats[$cx] = $format; |
1314
|
|
|
|
|
|
|
return unless $debug; |
1315
|
|
|
|
|
|
|
$as_msg //= " as ".vis($col_formats[$cx])." format"; |
1316
|
|
|
|
|
|
|
if (length($thing) > 35) { $thing = substr($thing,0,32)."..."; } |
1317
|
|
|
|
|
|
|
@_ = ("Recognized ",$thing," in ", cxrx2sheetaddr($cx,$rx), $as_msg); |
1318
|
|
|
|
|
|
|
goto &btw |
1319
|
|
|
|
|
|
|
} |
1320
|
|
|
|
|
|
|
CX: |
1321
|
|
|
|
|
|
|
for my $cx (0..$max_cols-1) { |
1322
|
|
|
|
|
|
|
RX: |
1323
|
|
|
|
|
|
|
for my $rx (0..$#{$$r2rows}) { |
1324
|
|
|
|
|
|
|
my $row = $$r2rows->[$rx]; |
1325
|
|
|
|
|
|
|
next if $cx > $#$row; # row has fewer columns than others |
1326
|
|
|
|
|
|
|
for ($row->[$cx]) { |
1327
|
|
|
|
|
|
|
# recognize obvious Y/M/D or M/D/Y or D/M/Y date forms |
1328
|
|
|
|
|
|
|
if (m#\b(?<y>(?:[12]\d)?\d\d)/(?<m>\d\d)/(?<d>\d\d)\b#) { |
1329
|
|
|
|
|
|
|
if ($+{d} > 12 && $+{d} <= 31 && $+{m} >= 1 && $+{m} <= 12 |
1330
|
|
|
|
|
|
|
&& ($+{y} < 100 || $+{y} >= 1000)) { |
1331
|
|
|
|
|
|
|
recognized($cx,$rx,$_,"YY/MM/DD"); |
1332
|
|
|
|
|
|
|
next CX; |
1333
|
|
|
|
|
|
|
} |
1334
|
|
|
|
|
|
|
# If ambiguous YYYY/??/?? we can still assume it is a date and not text |
1335
|
|
|
|
|
|
|
if (length($+{y})==4) { |
1336
|
|
|
|
|
|
|
#recognized($cx,$rx,$_,""," as some kind of date, fmt unknown"); |
1337
|
|
|
|
|
|
|
next RX; |
1338
|
|
|
|
|
|
|
} |
1339
|
|
|
|
|
|
|
} |
1340
|
|
|
|
|
|
|
if (m#\b(?<m>\d\d)/(?<d>\d\d)/(?<y>(?:[12]\d)?\d\d)\b#) { |
1341
|
|
|
|
|
|
|
if ($+{y} < 100 || $+{y} >= 1000) { |
1342
|
|
|
|
|
|
|
if ($+{d} > 12 && $+{d} <= 31 && $+{m} >= 1 && $+{m} <= 12) { |
1343
|
|
|
|
|
|
|
recognized($cx,$rx,$_,"MM/DD/YY"); |
1344
|
|
|
|
|
|
|
next CX |
1345
|
|
|
|
|
|
|
} |
1346
|
|
|
|
|
|
|
elsif ($+{m} > 12 && $+{m} <= 31 && $+{d} >= 1 && $+{d} <= 12) { |
1347
|
|
|
|
|
|
|
recognized($cx,$rx,$_,"DD/MM/YY"); |
1348
|
|
|
|
|
|
|
next CX |
1349
|
|
|
|
|
|
|
} |
1350
|
|
|
|
|
|
|
} |
1351
|
|
|
|
|
|
|
# If ambiguous ??/??/YYYY we can still assume it is a date and not text |
1352
|
|
|
|
|
|
|
if (length($+{y})==4) { |
1353
|
|
|
|
|
|
|
#recognized($cx,$rx,$_,""," as some kind of date, fmt unknown"); |
1354
|
|
|
|
|
|
|
next RX; |
1355
|
|
|
|
|
|
|
} |
1356
|
|
|
|
|
|
|
} |
1357
|
|
|
|
|
|
|
# Things to force to be read as text fields: |
1358
|
|
|
|
|
|
|
# 1. Leading zeroes |
1359
|
|
|
|
|
|
|
# 2. Leading ascii minus (\x{2D}) rather than math minus \N{U+2212}. |
1360
|
|
|
|
|
|
|
# This prevents conversion to the Unicode math minus when LO |
1361
|
|
|
|
|
|
|
# reads the CSV. The assumption is that if the input has an ascii |
1362
|
|
|
|
|
|
|
# minus then the original spreadsheet format was "text" not |
1363
|
|
|
|
|
|
|
# numeric. |
1364
|
|
|
|
|
|
|
if (/^[\x{2D}0]/) { |
1365
|
|
|
|
|
|
|
recognized($cx,$rx,$_,"text"); |
1366
|
|
|
|
|
|
|
next CX; |
1367
|
|
|
|
|
|
|
} |
1368
|
|
|
|
|
|
|
} |
1369
|
|
|
|
|
|
|
} |
1370
|
|
|
|
|
|
|
} |
1371
|
|
|
|
|
|
|
$opts->{col_formats} = \@col_formats; |
1372
|
|
|
|
|
|
|
}#determine_csv_col_formats |
1373
|
|
|
|
|
|
|
|
1374
|
|
|
|
|
|
|
# ==BODY== |
1375
|
|
|
|
|
|
|
unless ($opts->{cvt_to}) { |
1376
|
|
|
|
|
|
|
if ($opts->{outpath} && $opts->{outpath} =~ /\.([^.]+)$/) { |
1377
|
|
|
|
|
|
|
$opts->{cvt_to} = $1; |
1378
|
|
|
|
|
|
|
} |
1379
|
|
|
|
|
|
|
croak "'cvt_to' was not specified and can not be intuited from 'outpath'" |
1380
|
|
|
|
|
|
|
,dvis('\n### $opts') ###TEMP |
1381
|
|
|
|
|
|
|
unless $opts->{cvt_to}; |
1382
|
|
|
|
|
|
|
} |
1383
|
|
|
|
|
|
|
unless ($opts->{cvt_from}) { |
1384
|
|
|
|
|
|
|
if ($opts->{inpath_sans_sheet} =~ /\.([^.]+)$/) { |
1385
|
|
|
|
|
|
|
$opts->{cvt_from} = $1; |
1386
|
|
|
|
|
|
|
} |
1387
|
|
|
|
|
|
|
} |
1388
|
|
|
|
|
|
|
$opts->{cvt_from} =~ s/^\.txt$/.csv/i if $opts->{cvt_from}; |
1389
|
|
|
|
|
|
|
|
1390
|
|
|
|
|
|
|
# Detect file format and, if CSV, encoding |
1391
|
|
|
|
|
|
|
my ($octets, $rows); |
1392
|
|
|
|
|
|
|
if (!$opts->{cvt_from} || $opts->{cvt_from} eq "csv") { |
1393
|
|
|
|
|
|
|
determine_input_encoding(\$octets); |
1394
|
|
|
|
|
|
|
} |
1395
|
|
|
|
|
|
|
if (!$opts->{cvt_from}) { |
1396
|
|
|
|
|
|
|
# Detect the file format by looking at the data. Actually, we only |
1397
|
|
|
|
|
|
|
# support CSV in this case, so this is just a (half-baked) sanity check. |
1398
|
|
|
|
|
|
|
eval { |
1399
|
|
|
|
|
|
|
determine_csv_q_sep(\$octets, \$rows); |
1400
|
|
|
|
|
|
|
if (!$opts->{cvt_from}) { |
1401
|
|
|
|
|
|
|
$rows //= do{ |
1402
|
|
|
|
|
|
|
my $fh = open_input(\$octets); |
1403
|
|
|
|
|
|
|
readparse_csv($fh); |
1404
|
|
|
|
|
|
|
}; |
1405
|
|
|
|
|
|
|
} |
1406
|
|
|
|
|
|
|
}; |
1407
|
|
|
|
|
|
|
if ($@ eq "") { |
1408
|
|
|
|
|
|
|
warn "> Detected $opts->{inpath_sans_sheet} as a seemingly-valid CSV\n" |
1409
|
|
|
|
|
|
|
if $debug; |
1410
|
|
|
|
|
|
|
$opts->{cvt_from} = "csv"; |
1411
|
|
|
|
|
|
|
} else { |
1412
|
|
|
|
|
|
|
croak "Can not detect what kind of file ",qsh($opts->{inpath})," is\n"; |
1413
|
|
|
|
|
|
|
} |
1414
|
|
|
|
|
|
|
} |
1415
|
|
|
|
|
|
|
|
1416
|
|
|
|
|
|
|
if ($opts->{cvt_from} eq "csv") { |
1417
|
|
|
|
|
|
|
determine_csv_col_formats(\$octets, \$rows); |
1418
|
|
|
|
|
|
|
} else { |
1419
|
|
|
|
|
|
|
oops if defined($octets) or defined($rows); |
1420
|
|
|
|
|
|
|
} |
1421
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
# Set default ouput_encoding if not specified |
1423
|
|
|
|
|
|
|
$opts->{output_encoding} //= $default_output_encoding |
1424
|
|
|
|
|
|
|
if $opts->{cvt_to} eq "csv"; |
1425
|
|
|
|
|
|
|
|
1426
|
|
|
|
|
|
|
}#_determine_enc_tofrom |
1427
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
sub _tool_extract_all_csvs($$) { |
1429
|
0
|
|
|
0
|
|
0
|
my ($opts, $destdir) = @_; |
1430
|
|
|
|
|
|
|
|
1431
|
0
|
|
|
|
|
0
|
_get_exclusive_lock($opts); |
1432
|
0
|
|
|
0
|
|
0
|
scope_guard { _release_lock($opts); }; |
|
0
|
|
|
|
|
0
|
|
1433
|
|
|
|
|
|
|
|
1434
|
0
|
|
|
|
|
0
|
delete local $opts->{sheetname}; |
1435
|
0
|
|
|
|
|
0
|
local $opts->{allsheets} = 1; |
1436
|
0
|
0
|
|
|
|
0
|
if (_openlibre_supports_allsheets()) { |
|
|
0
|
|
|
|
|
|
1437
|
0
|
|
|
|
|
0
|
_convert_using_openlibre($opts, $opts->{inpath_sans_sheet}, $destdir); |
1438
|
|
|
|
|
|
|
} |
1439
|
|
|
|
|
|
|
elsif (_ssconvert_supports_allsheets()) { |
1440
|
0
|
|
|
|
|
0
|
_convert_using_ssconvert($opts, $opts->{inpath_sans_sheet}, $destdir); |
1441
|
|
|
|
|
|
|
} |
1442
|
0
|
|
|
|
|
0
|
else { confess "Can't extract 'allsheets'. Please install LibreOffice 7.2 or newer" } |
1443
|
|
|
|
|
|
|
} |
1444
|
|
|
|
|
|
|
|
1445
|
|
|
|
|
|
|
sub _tool_can_extract_csv_byname() { |
1446
|
0
|
0
|
|
0
|
|
0
|
_openlibre_supports_named_sheet() || _ssconvert_supports_named_sheet() |
1447
|
|
|
|
|
|
|
} |
1448
|
|
|
|
|
|
|
sub _tool_extract_one_csv($$) { |
1449
|
0
|
|
|
0
|
|
0
|
my ($opts, $destpath) = @_; |
1450
|
|
|
|
|
|
|
|
1451
|
0
|
|
|
|
|
0
|
_get_exclusive_lock($opts); |
1452
|
0
|
|
|
0
|
|
0
|
scope_guard { _release_lock($opts); }; |
|
0
|
|
|
|
|
0
|
|
1453
|
|
|
|
|
|
|
|
1454
|
0
|
0
|
|
|
|
0
|
if (_openlibre_features()->{available}) { |
1455
|
0
|
0
|
0
|
|
|
0
|
oops if $opts->{sheetname} && !_openlibre_supports_named_sheet(); |
1456
|
0
|
|
|
|
|
0
|
_convert_using_openlibre($opts, $opts->{inpath_sans_sheet}, $destpath); |
1457
|
|
|
|
|
|
|
} else { |
1458
|
0
|
|
|
|
|
0
|
_convert_using_ssconvert($opts, $opts->{inpath_sans_sheet}, $destpath); |
1459
|
|
|
|
|
|
|
} |
1460
|
|
|
|
|
|
|
} |
1461
|
|
|
|
|
|
|
sub _tool_can_extract_current_sheet() { |
1462
|
|
|
|
|
|
|
_openlibre_features()->{available} || _ssconvert_features()->{available} |
1463
|
0
|
0
|
|
0
|
|
0
|
} |
1464
|
|
|
|
|
|
|
|
1465
|
|
|
|
|
|
|
sub _tool_write_spreadsheet($$) { |
1466
|
0
|
|
|
0
|
|
0
|
my ($opts, $destpath) = @_; |
1467
|
|
|
|
|
|
|
|
1468
|
0
|
|
|
|
|
0
|
_get_exclusive_lock($opts); |
1469
|
0
|
|
|
0
|
|
0
|
scope_guard { _release_lock($opts); }; |
|
0
|
|
|
|
|
0
|
|
1470
|
|
|
|
|
|
|
|
1471
|
|
|
|
|
|
|
# ssconvert allows specifying the sheetname when importing a csv but not LO |
1472
|
0
|
0
|
0
|
|
|
0
|
if ($opts->{sheetname} && _ssconvert_supports_writing($opts->{cvt_to})) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1473
|
0
|
|
|
|
|
0
|
_convert_using_ssconvert($opts, $opts->{inpath_sans_sheet}, $destpath); |
1474
|
|
|
|
|
|
|
} |
1475
|
|
|
|
|
|
|
elsif (_openlibre_supports_writing($opts->{cvt_to})) { |
1476
|
0
|
0
|
|
|
|
0
|
if ($opts->{sheetname}) { |
1477
|
0
|
|
|
|
|
0
|
carp "WARNING: Sheet name when creating a spreadsheet will be ignored\n"; |
1478
|
0
|
|
|
|
|
0
|
delete $opts->{sheetname}; |
1479
|
|
|
|
|
|
|
} |
1480
|
0
|
|
|
|
|
0
|
_convert_using_openlibre($opts, $opts->{inpath_sans_sheet}, $destpath); |
1481
|
|
|
|
|
|
|
} |
1482
|
|
|
|
|
|
|
elsif (_ssconvert_supports_writing($opts->{cvt_to})) { |
1483
|
0
|
|
|
|
|
0
|
_convert_using_ssconvert($opts, $opts->{inpath_sans_sheet}, $destpath); |
1484
|
|
|
|
|
|
|
} |
1485
|
0
|
|
|
|
|
0
|
else { croak "Can't create $opts->{cvt_to} spreadsheets. Please install LibreOffice 7.2 or newer" } |
1486
|
|
|
|
|
|
|
} |
1487
|
|
|
|
|
|
|
|
1488
|
|
|
|
|
|
|
|
1489
|
|
|
|
|
|
|
# Extract CSVs for every sheet into {outpath} (setting to tmpdir if not preset). |
1490
|
|
|
|
|
|
|
# If cached CSVs are available they are moved into {outpath}/ . |
1491
|
|
|
|
|
|
|
sub _extract_all_csvs($) { |
1492
|
0
|
|
|
0
|
|
0
|
my ($opts) = @_; |
1493
|
0
|
|
|
|
|
0
|
my $outpath = _final_outpath($opts); |
1494
|
0
|
|
|
|
|
0
|
$outpath->mkpath; # nop if exists, croaks if conflicts with file |
1495
|
|
|
|
|
|
|
|
1496
|
0
|
|
|
|
|
0
|
_tool_extract_all_csvs($opts, $outpath); #logs |
1497
|
|
|
|
|
|
|
} |
1498
|
|
|
|
|
|
|
|
1499
|
|
|
|
|
|
|
|
1500
|
|
|
|
|
|
|
# Extract a single sheet into a CSV at {outpath} (defaulting to temp file). |
1501
|
|
|
|
|
|
|
# If a cached CSV is available it is moved to {outpath}. |
1502
|
|
|
|
|
|
|
sub _extract_one_csv($) { |
1503
|
|
|
|
|
|
|
my ($opts) = @_; |
1504
|
|
|
|
|
|
|
my $cachedirpath = _cachedir($opts); |
1505
|
|
|
|
|
|
|
|
1506
|
|
|
|
|
|
|
my sub _fill_csv_cache() { |
1507
|
|
|
|
|
|
|
$cachedirpath->remove_tree; |
1508
|
|
|
|
|
|
|
$cachedirpath->mkpath; |
1509
|
|
|
|
|
|
|
{ #local $opts->{verbose} = 0; |
1510
|
|
|
|
|
|
|
#local $opts->{debug} = 0; |
1511
|
|
|
|
|
|
|
_tool_extract_all_csvs($opts, $cachedirpath); |
1512
|
|
|
|
|
|
|
} |
1513
|
|
|
|
|
|
|
} |
1514
|
|
|
|
|
|
|
|
1515
|
|
|
|
|
|
|
my $outpath = _final_outpath($opts); |
1516
|
|
|
|
|
|
|
$outpath->remove unless -d $outpath; |
1517
|
|
|
|
|
|
|
|
1518
|
|
|
|
|
|
|
if (defined($opts->{sheetname})) { |
1519
|
|
|
|
|
|
|
my $fname = $opts->{sheetname}.".csv"; |
1520
|
|
|
|
|
|
|
my $cached_path = $cachedirpath->child($fname); |
1521
|
|
|
|
|
|
|
if (! -e $cached_path) { |
1522
|
|
|
|
|
|
|
if (_tool_can_extract_csv_byname()) { |
1523
|
|
|
|
|
|
|
_tool_extract_one_csv($opts, $outpath); #logs |
1524
|
|
|
|
|
|
|
return |
1525
|
|
|
|
|
|
|
} |
1526
|
|
|
|
|
|
|
warn ">>Emulating extract-by-name by extracting all csvs into cache...\n" |
1527
|
|
|
|
|
|
|
if $opts->{debug}; |
1528
|
|
|
|
|
|
|
_fill_csv_cache; |
1529
|
|
|
|
|
|
|
} |
1530
|
|
|
|
|
|
|
croak "Sheet '$opts->{sheetname}' does not exist in $opts->{inpath_sans_sheet}\n" |
1531
|
|
|
|
|
|
|
unless -e $cached_path; |
1532
|
|
|
|
|
|
|
warn "> Moving cached $fname to $outpath\n" if $opts->{verbose}; |
1533
|
|
|
|
|
|
|
File::Copy::move($cached_path, $outpath); |
1534
|
|
|
|
|
|
|
return |
1535
|
|
|
|
|
|
|
} |
1536
|
|
|
|
|
|
|
elsif (_tool_can_extract_current_sheet()) { |
1537
|
|
|
|
|
|
|
_tool_extract_one_csv($opts, $outpath); #logs |
1538
|
|
|
|
|
|
|
return |
1539
|
|
|
|
|
|
|
} |
1540
|
|
|
|
|
|
|
else { |
1541
|
|
|
|
|
|
|
_fill_csv_cache; |
1542
|
|
|
|
|
|
|
my @children = $cachedirpath->children; |
1543
|
|
|
|
|
|
|
if (@children == 0) { |
1544
|
|
|
|
|
|
|
croak "$opts->{inpath_sans_sheet} appears to have zero sheets!\n" |
1545
|
|
|
|
|
|
|
} |
1546
|
|
|
|
|
|
|
elsif (@children == 1) { |
1547
|
|
|
|
|
|
|
my $fname = $children[0]->basename; |
1548
|
|
|
|
|
|
|
my $cached_path = $cachedirpath->child($fname); |
1549
|
|
|
|
|
|
|
warn "> Moving cached $fname to $outpath\n" if $opts->{verbose}; |
1550
|
|
|
|
|
|
|
File::Copy::move($cached_path, $outpath); |
1551
|
|
|
|
|
|
|
return |
1552
|
|
|
|
|
|
|
} |
1553
|
|
|
|
|
|
|
else { |
1554
|
|
|
|
|
|
|
croak "$opts->{inpath_sans_sheet} contains multiple sheets; you must specify a sheetname\n" |
1555
|
|
|
|
|
|
|
} |
1556
|
|
|
|
|
|
|
} |
1557
|
|
|
|
|
|
|
} |
1558
|
|
|
|
|
|
|
sub _write_spreadsheet($) { |
1559
|
0
|
|
|
0
|
|
0
|
my ($opts) = @_; |
1560
|
|
|
|
|
|
|
|
1561
|
0
|
|
|
|
|
0
|
my $outpath = _final_outpath($opts); |
1562
|
0
|
0
|
|
|
|
0
|
$outpath->remove unless -d $outpath; |
1563
|
|
|
|
|
|
|
|
1564
|
0
|
|
|
|
|
0
|
_tool_write_spreadsheet($opts, $outpath); |
1565
|
|
|
|
|
|
|
} |
1566
|
|
|
|
|
|
|
|
1567
|
|
|
|
|
|
|
# If {outpath} is not set, set it to a unique output path in $tempdir |
1568
|
|
|
|
|
|
|
# Always returns {outpath} as a Path::Tiny object. |
1569
|
|
|
|
|
|
|
sub _final_outpath($) { |
1570
|
0
|
|
|
0
|
|
0
|
my $opts = shift; |
1571
|
0
|
0
|
|
|
|
0
|
if (defined $opts->{outpath}) { |
1572
|
0
|
|
|
|
|
0
|
return path($opts->{outpath}); |
1573
|
|
|
|
|
|
|
} else { |
1574
|
0
|
0
|
|
|
|
0
|
my $suf = $opts->{cvt_to} unless $opts->{allsheets}; |
1575
|
|
|
|
|
|
|
return( |
1576
|
0
|
|
|
|
|
0
|
($opts->{outpath}=_path_under_tempdir($opts, suf=>$suf)) |
1577
|
|
|
|
|
|
|
); |
1578
|
|
|
|
|
|
|
} |
1579
|
|
|
|
|
|
|
} |
1580
|
|
|
|
|
|
|
|
1581
|
|
|
|
|
|
|
sub convert_spreadsheet(@) { |
1582
|
|
|
|
|
|
|
# Set inpath_sans_sheet, sheetname, ifbase, etc. |
1583
|
2
|
|
|
2
|
1
|
7
|
my %opts = &_process_args; |
1584
|
2
|
50
|
|
|
|
25
|
btw dvis('>>> convert_spreadsheet %opts\n') if $opts{debug}; |
1585
|
2
|
|
|
|
|
19
|
my %input_opts = %opts; |
1586
|
|
|
|
|
|
|
|
1587
|
2
|
|
|
|
|
12
|
_create_tempdir_if_needed(\%opts); |
1588
|
|
|
|
|
|
|
|
1589
|
|
|
|
|
|
|
# intuit cvt_from & cvt_to, detect encoding, and pre-process .csv input |
1590
|
|
|
|
|
|
|
# if needed to avoid corruption of leading zeroes. |
1591
|
2
|
|
|
|
|
39
|
_determine_enc_tofrom(\%opts); |
1592
|
|
|
|
|
|
|
|
1593
|
2
|
|
|
|
|
6
|
my $input_enc = $opts{input_encoding}; |
1594
|
2
|
|
|
|
|
5
|
my $output_enc = $opts{output_encoding}; |
1595
|
|
|
|
|
|
|
|
1596
|
|
|
|
|
|
|
croak "Either input or output must be 'csv'\n" |
1597
|
2
|
50
|
33
|
|
|
8
|
unless $opts{cvt_from} eq 'csv' || $opts{cvt_to} eq 'csv'; |
1598
|
2
|
50
|
|
|
|
6
|
if ($opts{allsheets}) { |
1599
|
|
|
|
|
|
|
croak "'allsheets' is allowed only with cvt_to => 'csv'" |
1600
|
0
|
0
|
0
|
|
|
0
|
unless ($opts{cvt_to}//"") eq "csv"; |
1601
|
|
|
|
|
|
|
croak "With 'allsheets', a sheet name may not be specified\n" |
1602
|
0
|
0
|
|
|
|
0
|
if $opts{sheetname}; |
1603
|
|
|
|
|
|
|
croak "With 'allsheets', 'outpath' must be a directory if it exists\n" |
1604
|
0
|
0
|
0
|
|
|
0
|
if $opts{outpath} && -e $opts{outpath} && ! -d _; |
|
|
|
0
|
|
|
|
|
1605
|
|
|
|
|
|
|
} |
1606
|
|
|
|
|
|
|
|
1607
|
2
|
|
|
|
|
3
|
my $done; |
1608
|
2
|
50
|
|
|
|
7
|
if ($opts{cvt_from} eq $opts{cvt_to}) { # csv to csv |
1609
|
2
|
50
|
|
|
|
7
|
if (!$opts{allsheets}) { |
1610
|
2
|
50
|
|
|
|
4
|
if ($input_enc ne $output_enc) { |
1611
|
|
|
|
|
|
|
# Special case #1: in & out are CSVs but different encodings. |
1612
|
|
|
|
|
|
|
warn "> Transcoding csv: $input_enc -> $output_enc\n" |
1613
|
0
|
0
|
|
|
|
0
|
if $opts{debug}; |
1614
|
0
|
|
|
|
|
0
|
my $octets = $opts{inpath_sans_sheet}->slurp_raw; |
1615
|
0
|
|
|
|
|
0
|
my $chars = decode($input_enc, $octets, Encode::FB_CROAK); |
1616
|
0
|
|
|
|
|
0
|
$octets = encode($output_enc, $chars, Encode::FB_CROAK); |
1617
|
0
|
|
|
|
|
0
|
path(_final_outpath(\%opts))->spew_raw($octets); |
1618
|
0
|
|
|
|
|
0
|
$done = 1; |
1619
|
|
|
|
|
|
|
} else { |
1620
|
|
|
|
|
|
|
# Special case #2: No conversion is needed: Just copy the file or |
1621
|
|
|
|
|
|
|
# return the input path itself as the output |
1622
|
2
|
50
|
|
|
|
6
|
if (defined $opts{outpath}) { |
1623
|
|
|
|
|
|
|
warn "> No conversion needed, copying into ",qsh($opts{outpath}),"\n" |
1624
|
0
|
0
|
|
|
|
0
|
if $opts{verbose}; |
1625
|
0
|
|
|
|
|
0
|
$opts{inpath_sans_sheet}->copy($opts{outpath}); |
1626
|
0
|
|
|
|
|
0
|
$done = 1; |
1627
|
|
|
|
|
|
|
} else { |
1628
|
2
|
|
|
|
|
5
|
$opts{outpath} = $opts{inpath_sans_sheet}; |
1629
|
|
|
|
|
|
|
warn "> No conversion needed, returning ", qsh($opts{outpath}),"\n" |
1630
|
2
|
50
|
|
|
|
6
|
if $opts{verbose}; |
1631
|
2
|
|
|
|
|
4
|
$done = 1; |
1632
|
|
|
|
|
|
|
} |
1633
|
|
|
|
|
|
|
} |
1634
|
|
|
|
|
|
|
} |
1635
|
|
|
|
|
|
|
else { |
1636
|
|
|
|
|
|
|
# Special case #2: <allsheets> with input already a csv: |
1637
|
|
|
|
|
|
|
# Leave a symlink to the input in the <outpath> directory. |
1638
|
0
|
0
|
|
|
|
0
|
croak "transcoding not implemented in this situation" |
1639
|
|
|
|
|
|
|
if ($input_enc ne $output_enc); |
1640
|
0
|
|
|
|
|
0
|
my $outpath = path(_final_outpath(\%opts)); |
1641
|
0
|
|
|
|
|
0
|
$outpath->mkpath; # nop if exists, croaks if conflicts with file |
1642
|
0
|
|
|
|
|
0
|
my $dest = $outpath->child( $opts{ifbase}.".csv" ); |
1643
|
0
|
|
|
|
|
0
|
my $inpath = $opts{inpath_sans_sheet}; |
1644
|
0
|
|
|
|
|
0
|
my $s = eval{ symlink($inpath, $dest) }; |
|
0
|
|
|
|
|
0
|
|
1645
|
0
|
0
|
0
|
|
|
0
|
if ($@ or !$s) { # symlink unimplmented or insufficient permissions |
1646
|
0
|
0
|
|
|
|
0
|
btw dvis '>> $@' if $opts{debug}; |
1647
|
|
|
|
|
|
|
warn "> No conversion needed! Copying into ", qsh($dest),"\n" |
1648
|
0
|
0
|
|
|
|
0
|
if $opts{verbose}; |
1649
|
0
|
|
|
|
|
0
|
$opts{inpath_sans_sheet}->copy($dest); |
1650
|
|
|
|
|
|
|
} else { |
1651
|
|
|
|
|
|
|
warn "> No conversion needed! Left symlink at ", qsh($dest),"\n" |
1652
|
0
|
0
|
|
|
|
0
|
if $opts{verbose}; |
1653
|
|
|
|
|
|
|
} |
1654
|
0
|
|
|
|
|
0
|
$done = 1; |
1655
|
|
|
|
|
|
|
} |
1656
|
|
|
|
|
|
|
} |
1657
|
2
|
50
|
|
|
|
6
|
if (! $done) { |
1658
|
0
|
0
|
|
|
|
0
|
if ($opts{allsheets}) { |
1659
|
0
|
|
|
|
|
0
|
_extract_all_csvs(\%opts); |
1660
|
|
|
|
|
|
|
} |
1661
|
|
|
|
|
|
|
else { |
1662
|
|
|
|
|
|
|
# Result will be a single file. |
1663
|
0
|
0
|
|
|
|
0
|
if ($opts{cvt_to} eq "csv") { |
1664
|
0
|
|
|
|
|
0
|
_extract_one_csv(\%opts); |
1665
|
|
|
|
|
|
|
} else { |
1666
|
0
|
|
|
|
|
0
|
_write_spreadsheet(\%opts); |
1667
|
|
|
|
|
|
|
} |
1668
|
|
|
|
|
|
|
} |
1669
|
|
|
|
|
|
|
} |
1670
|
|
|
|
|
|
|
my $result = { |
1671
|
|
|
|
|
|
|
defined($output_enc) ? (encoding => $output_enc):(), |
1672
|
8
|
|
|
|
|
43
|
(map{ my $v = $opts{$_}; |
1673
|
8
|
100
|
|
|
|
39
|
($_ => (blessed($v) ? $v->stringify : $v)) |
1674
|
2
|
50
|
|
|
|
6
|
} grep{ defined $opts{$_} } |
|
10
|
|
|
|
|
23
|
|
1675
|
|
|
|
|
|
|
qw/inpath_sans_sheet outpath cvt_from cvt_to sheetname/) |
1676
|
|
|
|
|
|
|
}; |
1677
|
|
|
|
|
|
|
log_call [\%input_opts], [$result, \_fmt_outpath_contents($result)] |
1678
|
2
|
50
|
|
|
|
8
|
if $opts{verbose}; |
1679
|
|
|
|
|
|
|
|
1680
|
2
|
|
|
|
|
8
|
$result; |
1681
|
|
|
|
|
|
|
}#convert_spreadsheet |
1682
|
|
|
|
|
|
|
|
1683
|
|
|
|
|
|
|
# Open as a CSV, intuiting input encoding, converting from spreadsheet if |
1684
|
|
|
|
|
|
|
# necessary. |
1685
|
|
|
|
|
|
|
# |
1686
|
|
|
|
|
|
|
# :crlf translation is enabled on the resulting file handle, which converts |
1687
|
|
|
|
|
|
|
# DOS CR,LF to \n while passing *nix bare LF through unmolested. |
1688
|
|
|
|
|
|
|
# |
1689
|
|
|
|
|
|
|
# Input argument(s) are the same as for convert_spreadsheet (except |
1690
|
|
|
|
|
|
|
# outpath may not be specified). |
1691
|
|
|
|
|
|
|
# |
1692
|
|
|
|
|
|
|
# Returns a hash containing the file handle and other information. |
1693
|
|
|
|
|
|
|
sub OpenAsCsv { |
1694
|
2
|
50
|
|
2
|
1
|
16
|
my %opts = ( |
1695
|
|
|
|
|
|
|
(@_ == 1 ? (inpath => $_[0]) : (@_)), |
1696
|
|
|
|
|
|
|
cvt_to => 'csv', |
1697
|
|
|
|
|
|
|
); |
1698
|
|
|
|
|
|
|
# TODO: Rename {path} to {inpath} in all usages and rm this cruft; |
1699
|
|
|
|
|
|
|
carp "Obsolete OpenAsCsv usage: Change path to inpath\n" |
1700
|
2
|
50
|
33
|
|
|
9
|
if exists($opts{path}) and !$opts{silent}; |
1701
|
2
|
|
33
|
|
|
12
|
$opts{inpath} //= delete $opts{path}; # be compatible with old API |
1702
|
|
|
|
|
|
|
|
1703
|
2
|
|
|
|
|
6
|
my $inpath = delete $opts{inpath}; |
1704
|
2
|
50
|
|
|
|
8
|
croak "OpenAsCsv: missing 'inpath' option\n" unless $inpath; |
1705
|
2
|
50
|
|
|
|
16
|
croak "OpenAsCsv: outpath may not be specified\n" if $opts{outpath}; |
1706
|
|
|
|
|
|
|
|
1707
|
2
|
|
|
|
|
12
|
my $h = convert_spreadsheet($inpath, %opts, verbose => $opts{debug}); |
1708
|
2
|
50
|
|
|
|
9
|
oops "sheetname key bug" if exists $h->{sheet}; |
1709
|
|
|
|
|
|
|
|
1710
|
2
|
|
33
|
|
|
6
|
my $csvpath = $h->{outpath} // oops; # same as {inpath} if already a CSV |
1711
|
2
|
50
|
|
|
|
81
|
open my $fh, "<", $csvpath or croak "$csvpath : $!\n"; |
1712
|
2
|
50
|
|
|
|
46
|
binmode $fh, ":crlf:encoding(".$h->{encoding}.")" or die "binmode:$!"; |
1713
|
|
|
|
|
|
|
|
1714
|
|
|
|
|
|
|
my $r = { |
1715
|
|
|
|
|
|
|
fh => $fh, |
1716
|
|
|
|
|
|
|
csvpath => $csvpath, |
1717
|
|
|
|
|
|
|
inpath => $inpath, |
1718
|
2
|
100
|
|
|
|
134
|
(map{ exists($h->{$_}) ? ($_ => $h->{$_}) : () } |
|
10
|
|
|
|
|
31
|
|
1719
|
|
|
|
|
|
|
qw/inpath_sans_sheet sheetname encoding tempdir raw_values/), |
1720
|
|
|
|
|
|
|
}; |
1721
|
|
|
|
|
|
|
|
1722
|
2
|
|
|
|
|
14
|
return $r; |
1723
|
|
|
|
|
|
|
} |
1724
|
|
|
|
|
|
|
|
1725
|
|
|
|
|
|
|
1; |
1726
|
|
|
|
|
|
|
__END__ |
1727
|
|
|
|
|
|
|
|
1728
|
|
|
|
|
|
|
=pod |
1729
|
|
|
|
|
|
|
|
1730
|
|
|
|
|
|
|
=head1 NAME |
1731
|
|
|
|
|
|
|
|
1732
|
|
|
|
|
|
|
Spreadsheet::Edit::IO - convert between spreadsheet and csv files |
1733
|
|
|
|
|
|
|
|
1734
|
|
|
|
|
|
|
=head1 SYNOPSIS |
1735
|
|
|
|
|
|
|
|
1736
|
|
|
|
|
|
|
use Spreadsheet::Edit::IO qw/ |
1737
|
|
|
|
|
|
|
convert_spreadsheet OpenAsCsv |
1738
|
|
|
|
|
|
|
cx2let let2cx |
1739
|
|
|
|
|
|
|
@sane_CSV_read_options @sane_CSV_write_options/; |
1740
|
|
|
|
|
|
|
|
1741
|
|
|
|
|
|
|
# Open a CSV file or result of converting a sheet from a spreadsheet |
1742
|
|
|
|
|
|
|
my $hash = OpenAsCsv("/path/to/spreadsheet.odt!Sheet1"); # single-arg form |
1743
|
|
|
|
|
|
|
my $hash = OpenAsCsv(inpath => "/path/to/spreadsheet.odt", |
1744
|
|
|
|
|
|
|
sheetname -> "Sheet1"); |
1745
|
|
|
|
|
|
|
|
1746
|
|
|
|
|
|
|
print "Reading ",$hash->{csvpath}," with encoding ",$hash->{encoding},"\n"; |
1747
|
|
|
|
|
|
|
while (<$hash->{fh}>) { ... } |
1748
|
|
|
|
|
|
|
|
1749
|
|
|
|
|
|
|
# Convert CSV to spreadsheet in temp file (deleted at process exit) |
1750
|
|
|
|
|
|
|
$hash = convert_spreadsheet(inpath => "mycsv.csv", cvt_to => "xlsx"); |
1751
|
|
|
|
|
|
|
print "Output is $hash->{outpath}\n"; # e.g. "/tmp/dwYT6qf/mycsv.xlsx" |
1752
|
|
|
|
|
|
|
|
1753
|
|
|
|
|
|
|
# Convert *all* sheets to CSV files in a temp directory |
1754
|
|
|
|
|
|
|
$hash = convert_spreadsheet(inpath => "mywork.xls", cvt_to => "csv", |
1755
|
|
|
|
|
|
|
allsheets => 1); |
1756
|
|
|
|
|
|
|
opendir $dh, $hash->{outpath}; |
1757
|
|
|
|
|
|
|
while (readrir($h)) { say "Output csv file is $_" } |
1758
|
|
|
|
|
|
|
|
1759
|
|
|
|
|
|
|
# Transcode a CSV from windows-1252 to UTF-8 |
1760
|
|
|
|
|
|
|
convert_spreadsheet( |
1761
|
|
|
|
|
|
|
inpath => "input.csv", input_encoding => 'windows-1252', |
1762
|
|
|
|
|
|
|
outpath => "output.csv", output_encodoutg => 'UTF-8', |
1763
|
|
|
|
|
|
|
); |
1764
|
|
|
|
|
|
|
|
1765
|
|
|
|
|
|
|
# Translate between 0-based column index and letter code (A, B, etc.) |
1766
|
|
|
|
|
|
|
print cx2let(0); # "A" |
1767
|
|
|
|
|
|
|
print let2cx("A"); # 0 |
1768
|
|
|
|
|
|
|
print cx2let(26); # "AA" |
1769
|
|
|
|
|
|
|
print let2cx("ABC"); # 730 |
1770
|
|
|
|
|
|
|
|
1771
|
|
|
|
|
|
|
# Extract components from "filepath!SHEETNAME" specifiers |
1772
|
|
|
|
|
|
|
my $path = filepath_from_spec("/path/to/spreasheet.xls!Sheet1") |
1773
|
|
|
|
|
|
|
my $sheetname = sheetname_from_spec("/path/to/spreasheet.xls!Sheet1") |
1774
|
|
|
|
|
|
|
|
1775
|
|
|
|
|
|
|
# Parse a csv file with sane options |
1776
|
|
|
|
|
|
|
my $csv = Text::CSV->new({ @sane_CSV_read_options, eol => $hash->{eol} }) |
1777
|
|
|
|
|
|
|
or die "ERROR: ".Text::CSV->error_diag (); |
1778
|
|
|
|
|
|
|
my @rows |
1779
|
|
|
|
|
|
|
while (my $F = $csv->getline( $infh )) { |
1780
|
|
|
|
|
|
|
push @rows, $F; |
1781
|
|
|
|
|
|
|
} |
1782
|
|
|
|
|
|
|
close $infh or die "Error reading ", $hash->csvpath(), ": $!"; |
1783
|
|
|
|
|
|
|
|
1784
|
|
|
|
|
|
|
# Write a csv file with sane options |
1785
|
|
|
|
|
|
|
my $ocsv = Text::CSV->new({ @sane_CSV_write_options }) |
1786
|
|
|
|
|
|
|
or die "ERROR: ".Text::CSV->error_diag (); |
1787
|
|
|
|
|
|
|
open my $outfh, ">:encoding(utf8)", $outpath |
1788
|
|
|
|
|
|
|
or die "$outpath: $!"; |
1789
|
|
|
|
|
|
|
foreach (@rows) { $ocsv->print($outfh, $_) } |
1790
|
|
|
|
|
|
|
close $outfh or die "Error writing $outpath: $!"; |
1791
|
|
|
|
|
|
|
|
1792
|
|
|
|
|
|
|
=head1 DESCRIPTION |
1793
|
|
|
|
|
|
|
|
1794
|
|
|
|
|
|
|
Convert between CSV and spreadsheet files using external tools, plus some utility functions |
1795
|
|
|
|
|
|
|
|
1796
|
|
|
|
|
|
|
Currently this uses LibreOffice or OpenOffice (whatever is installed). An external tool is not needed when only CSV files are involved. |
1797
|
|
|
|
|
|
|
|
1798
|
|
|
|
|
|
|
=head2 $hash = OpenAsCsv INPUT |
1799
|
|
|
|
|
|
|
|
1800
|
|
|
|
|
|
|
=head2 $hash = OpenAsCsv inpath => INPUT, sheetname => SHEETNAME, ... |
1801
|
|
|
|
|
|
|
|
1802
|
|
|
|
|
|
|
This is a thin wrapper for C<convert_spreadsheet> followed by C<open> |
1803
|
|
|
|
|
|
|
|
1804
|
|
|
|
|
|
|
If a single argument is given it specifies INPUT; otherwise all arguments must |
1805
|
|
|
|
|
|
|
be specified as key => value pairs, and may include any options supported |
1806
|
|
|
|
|
|
|
by C<convert_spreadsheet>. |
1807
|
|
|
|
|
|
|
|
1808
|
|
|
|
|
|
|
INPUT may be a csv or spreadsheet workbook path; if a spreadsheet, |
1809
|
|
|
|
|
|
|
then a single "sheet" is converted, specified by either a !SHEETNAME suffix |
1810
|
|
|
|
|
|
|
in the INPUT path, a separate C<< sheetname => SHEETNAME >> option, |
1811
|
|
|
|
|
|
|
or unspecified to extract the only sheet (croaks if there is more than one). |
1812
|
|
|
|
|
|
|
|
1813
|
|
|
|
|
|
|
The resulting file handle refers to a guaranteed-seekable CSV file; |
1814
|
|
|
|
|
|
|
this will either be a temporary file (auto-removed at process exit), |
1815
|
|
|
|
|
|
|
or the original INPUT if it was already a seekable csv file. |
1816
|
|
|
|
|
|
|
|
1817
|
|
|
|
|
|
|
RETURNS: A ref to a hash containing the following: |
1818
|
|
|
|
|
|
|
|
1819
|
|
|
|
|
|
|
{ |
1820
|
|
|
|
|
|
|
fh => the resulting open file handle |
1821
|
|
|
|
|
|
|
csvpath => the path {fh} refers to, which might be a temporary file |
1822
|
|
|
|
|
|
|
sheetname => sheet name if the input was a spreadsheet |
1823
|
|
|
|
|
|
|
} |
1824
|
|
|
|
|
|
|
|
1825
|
|
|
|
|
|
|
=head2 convert_spreadsheet INPUT, cvt_to=>suffix, OPTIONS |
1826
|
|
|
|
|
|
|
|
1827
|
|
|
|
|
|
|
=head2 convert_spreadsheet INPUT, cvt_to=>"csv", allsheets => 1, OPTIONS |
1828
|
|
|
|
|
|
|
|
1829
|
|
|
|
|
|
|
Convert CSV to spreadsheet or vice-versa, or transcode CSV to CSV. |
1830
|
|
|
|
|
|
|
|
1831
|
|
|
|
|
|
|
RETURNS: A ref to a hash containing: |
1832
|
|
|
|
|
|
|
|
1833
|
|
|
|
|
|
|
{ |
1834
|
|
|
|
|
|
|
outpath => path to the output file (or directory with 'allsheets') |
1835
|
|
|
|
|
|
|
(a temporary file/dir or as you specified in OPTIONS). |
1836
|
|
|
|
|
|
|
|
1837
|
|
|
|
|
|
|
encoding => the encoding used when writing .csv files |
1838
|
|
|
|
|
|
|
} |
1839
|
|
|
|
|
|
|
|
1840
|
|
|
|
|
|
|
INPUT is the input file path; it may be a separate first argument as |
1841
|
|
|
|
|
|
|
shown above, or else included in OPTIONS as C<< inpath =E<gt> INPUT >>. |
1842
|
|
|
|
|
|
|
|
1843
|
|
|
|
|
|
|
If C<outpath =E<gt> OUTPATH> is specifed then results are I<always> saved |
1844
|
|
|
|
|
|
|
to that path. With C<allsheets> this is a directory, which will be created |
1845
|
|
|
|
|
|
|
if necessary. |
1846
|
|
|
|
|
|
|
|
1847
|
|
|
|
|
|
|
If C<outpath> is NOT specified in OPTIONS then, with one exception, |
1848
|
|
|
|
|
|
|
results are saved to a temporary file or directory and that path is returned |
1849
|
|
|
|
|
|
|
as C<outpath> in the result hash. |
1850
|
|
|
|
|
|
|
The exception is if no conversion is necessary |
1851
|
|
|
|
|
|
|
(i.e. C<cvt_from> is the same as C<cvt_to>), when the |
1852
|
|
|
|
|
|
|
input file itself is returned as C<outpath>. |
1853
|
|
|
|
|
|
|
|
1854
|
|
|
|
|
|
|
In all cases C<outpath> in the result hash points to the results. |
1855
|
|
|
|
|
|
|
|
1856
|
|
|
|
|
|
|
C<cvt_to> or C<cvt_from> are filename suffixes (sans dot) |
1857
|
|
|
|
|
|
|
e.g. "csv", "xlsx", etc., and need not be specified when indicated by |
1858
|
|
|
|
|
|
|
C<outpath> or INPUT parameters. |
1859
|
|
|
|
|
|
|
|
1860
|
|
|
|
|
|
|
OPTIONS may also include: |
1861
|
|
|
|
|
|
|
|
1862
|
|
|
|
|
|
|
=over 4 |
1863
|
|
|
|
|
|
|
|
1864
|
|
|
|
|
|
|
=item sheetname => "sheet name" |
1865
|
|
|
|
|
|
|
|
1866
|
|
|
|
|
|
|
The workbook 'sheet' name used when reading or writing a spreadsheet. |
1867
|
|
|
|
|
|
|
An input sheet name may also be specified as "!sheetname" appended to |
1868
|
|
|
|
|
|
|
the INPUT path. |
1869
|
|
|
|
|
|
|
|
1870
|
|
|
|
|
|
|
=item allsheets => BOOL |
1871
|
|
|
|
|
|
|
|
1872
|
|
|
|
|
|
|
B<All> sheets in the input |
1873
|
|
|
|
|
|
|
are converted to separate .csv files named "SHEETNAME.csv" in |
1874
|
|
|
|
|
|
|
the 'outpath' directory. C<< cvt_to =E<gt> 'csv' >> is also requred. |
1875
|
|
|
|
|
|
|
|
1876
|
|
|
|
|
|
|
=item input_encoding => ENCODING |
1877
|
|
|
|
|
|
|
|
1878
|
|
|
|
|
|
|
Specifies the encoding of INPUT if it is a csv file. |
1879
|
|
|
|
|
|
|
|
1880
|
|
|
|
|
|
|
ENCODING may be a comma-separated list of encoding |
1881
|
|
|
|
|
|
|
names which will be tried in the order until one seems to work. |
1882
|
|
|
|
|
|
|
If only one is specified it will be used without trying it first. |
1883
|
|
|
|
|
|
|
The default is "UTF-8,windows-1252". |
1884
|
|
|
|
|
|
|
|
1885
|
|
|
|
|
|
|
=item output_encoding => ENCODING |
1886
|
|
|
|
|
|
|
|
1887
|
|
|
|
|
|
|
Used when writing csv file(s), defaults to 'UTF-8'. |
1888
|
|
|
|
|
|
|
|
1889
|
|
|
|
|
|
|
=item col_formats => [...] |
1890
|
|
|
|
|
|
|
|
1891
|
|
|
|
|
|
|
This specifies how CSV data is imported into a spreadsheet. Each element |
1892
|
|
|
|
|
|
|
of the array may contain: |
1893
|
|
|
|
|
|
|
|
1894
|
|
|
|
|
|
|
undef, "standard" or "" (LibreOffice will auto-detect) |
1895
|
|
|
|
|
|
|
"text" (imported as unmolested text) |
1896
|
|
|
|
|
|
|
"MM/DD/YY", |
1897
|
|
|
|
|
|
|
"DD/MM/YY", |
1898
|
|
|
|
|
|
|
"YY/MM/DD", |
1899
|
|
|
|
|
|
|
"ignore" (do not import this column) |
1900
|
|
|
|
|
|
|
|
1901
|
|
|
|
|
|
|
Elements may also contain the numeric format codes defined by LibreOffice |
1902
|
|
|
|
|
|
|
at L<https://wiki.documentfoundation.org/Documentation/DevGuide/Spreadsheet_Documents#Filter_Options_for_the_CSV_Filter> |
1903
|
|
|
|
|
|
|
|
1904
|
|
|
|
|
|
|
B<Automatic format detection:> |
1905
|
|
|
|
|
|
|
Input CSV data is pre-scanned to auto-detect column formats |
1906
|
|
|
|
|
|
|
as much as possible. This usually works well as long as dates are |
1907
|
|
|
|
|
|
|
represented unambiguously, e.g. "2021-01-01" or "Jan 1, 2023". |
1908
|
|
|
|
|
|
|
|
1909
|
|
|
|
|
|
|
Specifically, this detects leading zeroes such as in U.S. Zip Codes, |
1910
|
|
|
|
|
|
|
and MM/DD/YY or DD/MM/YY dates when a DD happens to be more than 12. |
1911
|
|
|
|
|
|
|
|
1912
|
|
|
|
|
|
|
=item verbose => BOOL |
1913
|
|
|
|
|
|
|
|
1914
|
|
|
|
|
|
|
=back |
1915
|
|
|
|
|
|
|
|
1916
|
|
|
|
|
|
|
=head3 B<'binmode' Argument For Reading result CSVs> |
1917
|
|
|
|
|
|
|
|
1918
|
|
|
|
|
|
|
It is not possible to control the line-ending style in output CSV files, |
1919
|
|
|
|
|
|
|
but the following incantation will correctly read either DOS/Windows (CR,LF) |
1920
|
|
|
|
|
|
|
or *nix (LF) line endings properly, i.e. as a single \n: |
1921
|
|
|
|
|
|
|
|
1922
|
|
|
|
|
|
|
open my $fh, "<", $resulthash->{outpath}; |
1923
|
|
|
|
|
|
|
my $enc = $resulthash->{encoding}; |
1924
|
|
|
|
|
|
|
binmode($fh, ":raw:encoding($enc):crlf"); |
1925
|
|
|
|
|
|
|
|
1926
|
|
|
|
|
|
|
=head2 @sane_CSV_read_options |
1927
|
|
|
|
|
|
|
|
1928
|
|
|
|
|
|
|
=head2 @sane_CSV_write_options |
1929
|
|
|
|
|
|
|
|
1930
|
|
|
|
|
|
|
These contain options you will always want to use with |
1931
|
|
|
|
|
|
|
S<<< C<< Text::CSV->new() >> >>>. |
1932
|
|
|
|
|
|
|
Specifically, quotes and embedded newlines are handled correctly. |
1933
|
|
|
|
|
|
|
|
1934
|
|
|
|
|
|
|
Not exported by default. |
1935
|
|
|
|
|
|
|
|
1936
|
|
|
|
|
|
|
=head2 cx2let COLUMNINDEX |
1937
|
|
|
|
|
|
|
|
1938
|
|
|
|
|
|
|
=head2 let2cx LETTERCODE |
1939
|
|
|
|
|
|
|
|
1940
|
|
|
|
|
|
|
Functions which translate between spreadsheet-column |
1941
|
|
|
|
|
|
|
letter codes ("A", "B", etc.) and 0-based column indicies. |
1942
|
|
|
|
|
|
|
Not exported by default. |
1943
|
|
|
|
|
|
|
|
1944
|
|
|
|
|
|
|
=head2 filepath_from_spec EXPR |
1945
|
|
|
|
|
|
|
|
1946
|
|
|
|
|
|
|
=head2 sheetname_from_spec EXPR |
1947
|
|
|
|
|
|
|
|
1948
|
|
|
|
|
|
|
Functions which decompose strings containing a spreadsheet path and possibly sheetname |
1949
|
|
|
|
|
|
|
suffix, of the form "FILEPATH!SHEETNAME", "FILEPATH|||SHEETNAME", or "FILEPATH[SHEETNAME]". |
1950
|
|
|
|
|
|
|
C<sheetname_from_spec> returns C<undef> if the input does not have a |
1951
|
|
|
|
|
|
|
a sheetname suffix. |
1952
|
|
|
|
|
|
|
Not exported by default. |
1953
|
|
|
|
|
|
|
|
1954
|
|
|
|
|
|
|
=head2 form_spec_with_sheetname(PATH, SHEENAME) |
1955
|
|
|
|
|
|
|
|
1956
|
|
|
|
|
|
|
Composes a combined string in a "preferred" format (currently "PATH!SHEETNAME"). |
1957
|
|
|
|
|
|
|
Not exported by default. |
1958
|
|
|
|
|
|
|
|
1959
|
|
|
|
|
|
|
=head1 Feature Test Functions |
1960
|
|
|
|
|
|
|
|
1961
|
|
|
|
|
|
|
=head2 $bool = can_cvt_spreadsheets(); |
1962
|
|
|
|
|
|
|
|
1963
|
|
|
|
|
|
|
=head2 $bool = can_extract_allsheets(); |
1964
|
|
|
|
|
|
|
|
1965
|
|
|
|
|
|
|
=head2 $bool = can_extract_named_sheet(); |
1966
|
|
|
|
|
|
|
|
1967
|
|
|
|
|
|
|
These functions return false if the corresponding operations |
1968
|
|
|
|
|
|
|
are not possible because LibreOffice (or, someday gnumeric) is not installed |
1969
|
|
|
|
|
|
|
or is an older version which does not have needed capabilities. |
1970
|
|
|
|
|
|
|
|
1971
|
|
|
|
|
|
|
=head2 $path = openlibreoffice_path(); |
1972
|
|
|
|
|
|
|
|
1973
|
|
|
|
|
|
|
Returns the detected path of I<soffice> (Apache Open Office or Libre Office) |
1974
|
|
|
|
|
|
|
or undef if not found. |
1975
|
|
|
|
|
|
|
|
1976
|
|
|
|
|
|
|
These are not exported by default. |
1977
|
|
|
|
|
|
|
|
1978
|
|
|
|
|
|
|
=head1 SEE ALSO |
1979
|
|
|
|
|
|
|
|
1980
|
|
|
|
|
|
|
L<Spreadsheet::Edit> and L<Text::CSV> |
1981
|
|
|
|
|
|
|
|
1982
|
|
|
|
|
|
|
=cut |
1983
|
|
|
|
|
|
|
|