line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package SeaBASS::File; |
2
|
|
|
|
|
|
|
|
3
|
23
|
|
|
23
|
|
1199445
|
use strict; |
|
23
|
|
|
|
|
34
|
|
|
23
|
|
|
|
|
593
|
|
4
|
23
|
|
|
23
|
|
81
|
use warnings; |
|
23
|
|
|
|
|
26
|
|
|
23
|
|
|
|
|
1304
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
=head1 NAME |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
SeaBASS::File - Object-oriented interface for reading/writing SeaBASS files |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=head1 VERSION |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
version 0.170190 |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=cut |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our $VERSION = '0.170190'; # VERSION |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
To read SeaBASS files: |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
use SeaBASS::File qw(STRICT_READ STRICT_WRITE INSERT_BEGINNING INSERT_END); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
my $sb_file = SeaBASS::File->new("input.txt"); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# Calculate the average chlorophyll value using next |
27
|
|
|
|
|
|
|
my $chl_total = 0; |
28
|
|
|
|
|
|
|
my $measurements = 0; |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
while (my $row = $sb_file->next()){ |
31
|
|
|
|
|
|
|
if (defined($row->{'chl'})){ |
32
|
|
|
|
|
|
|
$chl_total += $row->{'chl'}; |
33
|
|
|
|
|
|
|
$measurements++; |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
if ($measurements){ |
37
|
|
|
|
|
|
|
print $chl_total/$measurements; |
38
|
|
|
|
|
|
|
} else { |
39
|
|
|
|
|
|
|
print "No chl values."; |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
#alternatively: |
43
|
|
|
|
|
|
|
$sb_file->rewind(); |
44
|
|
|
|
|
|
|
while (my %row = $sb_file->next()){ |
45
|
|
|
|
|
|
|
if (defined($row{'chl'})){ |
46
|
|
|
|
|
|
|
$chl_total += $row{'chl'}; |
47
|
|
|
|
|
|
|
$measurements++; |
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# Calculate the average chlorophyll value using where |
52
|
|
|
|
|
|
|
my $chl_total2 = 0; |
53
|
|
|
|
|
|
|
my $measurements2 = 0; |
54
|
|
|
|
|
|
|
$sb_file->where(sub { |
55
|
|
|
|
|
|
|
if (defined($_->{'chl'})){ |
56
|
|
|
|
|
|
|
$chl_total2 += $_->{'chl'}; |
57
|
|
|
|
|
|
|
$measurements2++; |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
}); |
60
|
|
|
|
|
|
|
if ($measurements2){ |
61
|
|
|
|
|
|
|
print $chl_total2/$measurements2; |
62
|
|
|
|
|
|
|
} else { |
63
|
|
|
|
|
|
|
print "No chl values."; |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
Or to modify SeaBASS files: |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
use SeaBASS::File qw(STRICT_READ STRICT_WRITE INSERT_BEGINNING INSERT_END); |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
my $sb_file = SeaBASS::File->new("input.txt"); |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# Add a one degree bias to water temperature |
73
|
|
|
|
|
|
|
while (my $row = $sb_file->next()){ |
74
|
|
|
|
|
|
|
$row->{'wt'} += 1; |
75
|
|
|
|
|
|
|
$sb_file->update($row); |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
$sb_file->write(); # to STDOUT |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# Remove the one degree bias to water temperature |
81
|
|
|
|
|
|
|
$sb_file->where(sub { |
82
|
|
|
|
|
|
|
$_->{'wt'} -= 1; |
83
|
|
|
|
|
|
|
}); |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
$sb_file->write("output_file.txt"); |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
Or to start a SeaBASS file from scratch: |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
use SeaBASS::File qw(STRICT_READ STRICT_WRITE INSERT_BEGINNING INSERT_END); |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
my $sb_file = SeaBASS::File->new({strict => 0, add_empty_headers => 1}); |
92
|
|
|
|
|
|
|
$sb_file->add_field('lat','degrees'); |
93
|
|
|
|
|
|
|
$sb_file->add_field('lon','degrees'); |
94
|
|
|
|
|
|
|
$sb_file->append({'lat' => 1, 'lon' => 2}); |
95
|
|
|
|
|
|
|
$sb_file->append("3,4"); # or if you're reading from a CSV file |
96
|
|
|
|
|
|
|
$sb_file->write(); |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=head1 DESCRIPTION |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
C provides an easy to use, object-oriented interface for |
101
|
|
|
|
|
|
|
reading, writing, and modifying SeaBASS data files. |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=head2 What is SeaBASS? |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
L Bio-optical Archive and Storage |
106
|
|
|
|
|
|
|
System housed at Goddard Space Flight Center. |
107
|
|
|
|
|
|
|
L provides the permanent public |
108
|
|
|
|
|
|
|
repository for data collected under the auspices of the NASA Ocean Biology and |
109
|
|
|
|
|
|
|
Biogeochemistry Program. It also houses data collected by participants in the |
110
|
|
|
|
|
|
|
NASA Sensor Intercomparision and Merger for Biological and Oceanic |
111
|
|
|
|
|
|
|
Interdisciplinary Studies (SIMBIOS) Program. SeaBASS includes marine |
112
|
|
|
|
|
|
|
bio-optical, biogeochemical, and (some) atmospheric data. |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=head2 SeaBASS File Format |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
SeaBASS files are plain ASCII files with a special header and a matrix of |
117
|
|
|
|
|
|
|
values. |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=head3 Header |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
The SeaBASS header block consists of many lines of header-keyword pairs. Some |
122
|
|
|
|
|
|
|
headers are optional but most, although technically not required for reading, |
123
|
|
|
|
|
|
|
are required to be ingested into the system. More detailed information is |
124
|
|
|
|
|
|
|
available in the SeaBASS L
|
125
|
|
|
|
|
|
|
article|http://seabass.gsfc.nasa.gov/wiki/article.cgi?article=metadataheaders>. |
126
|
|
|
|
|
|
|
The only absolutely required header for this module to work is the /fields |
127
|
|
|
|
|
|
|
line. This module turns fields and units lowercase at all times. |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
/begin_header |
130
|
|
|
|
|
|
|
/delimiter=space |
131
|
|
|
|
|
|
|
/missing=-999 |
132
|
|
|
|
|
|
|
/fields=date,time,lat,lon,depth,wt,sal |
133
|
|
|
|
|
|
|
/end_header |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=head3 Body |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
The SeaBASS body is a matrix of data values, organized much like a spreadsheet. |
138
|
|
|
|
|
|
|
Each column is separated by the value presented in the /delimiter header. |
139
|
|
|
|
|
|
|
Likewise, missing values are indicated by the value presented in the /missing |
140
|
|
|
|
|
|
|
header. The /fields header identifies the geophysical parameter presented in |
141
|
|
|
|
|
|
|
each column. |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
/begin_header |
144
|
|
|
|
|
|
|
/delimiter=space |
145
|
|
|
|
|
|
|
/missing=-999 |
146
|
|
|
|
|
|
|
/fields=date,time,lat,lon,depth,wt,sal |
147
|
|
|
|
|
|
|
/end_header |
148
|
|
|
|
|
|
|
19920109 16:30:00 31.389 -64.702 3.4 20.7320 -999 |
149
|
|
|
|
|
|
|
19920109 16:30:00 31.389 -64.702 19.1 20.7350 -999 |
150
|
|
|
|
|
|
|
19920109 16:30:00 31.389 -64.702 38.3 20.7400 -999 |
151
|
|
|
|
|
|
|
19920109 16:30:00 31.389 -64.702 59.6 20.7450 -999 |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=head3 Strictly Speaking |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
SeaBASS files are run through a program called |
156
|
|
|
|
|
|
|
L before |
157
|
|
|
|
|
|
|
they are submitted and before they are ingested into a NASA relational database |
158
|
|
|
|
|
|
|
management system. Some of the things it checks for are required |
159
|
|
|
|
|
|
|
L |
160
|
|
|
|
|
|
|
and proper L
|
161
|
|
|
|
|
|
|
names|http://seabass.gsfc.nasa.gov/wiki/article.cgi?article=stdfields>. All |
162
|
|
|
|
|
|
|
data must always have an associated depth, time, and location, though these |
163
|
|
|
|
|
|
|
fields may be placed in the header and are not always required in the data. |
164
|
|
|
|
|
|
|
Just because this module writes the files does not mean they will pass FCHECK. |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
Files are case-INsensitive. Headers are not allowed to have any whitespace. |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=cut |
169
|
|
|
|
|
|
|
|
170
|
23
|
|
|
23
|
|
80
|
use Carp qw(:DEFAULT); |
|
23
|
|
|
|
|
30
|
|
|
23
|
|
|
|
|
3095
|
|
171
|
23
|
|
|
23
|
|
91
|
use Fcntl qw(SEEK_SET); |
|
23
|
|
|
|
|
27
|
|
|
23
|
|
|
|
|
893
|
|
172
|
23
|
|
|
23
|
|
5776
|
use List::MoreUtils qw(firstidx each_arrayref); |
|
23
|
|
|
|
|
85184
|
|
|
23
|
|
|
|
|
121
|
|
173
|
23
|
|
|
23
|
|
18832
|
use Date::Calc qw(Add_Delta_Days); |
|
23
|
|
|
|
|
112397
|
|
|
23
|
|
|
|
|
1659
|
|
174
|
23
|
|
|
23
|
|
137
|
use Scalar::Util qw(looks_like_number); |
|
23
|
|
|
|
|
30
|
|
|
23
|
|
|
|
|
215980
|
|
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
require Exporter; |
177
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
178
|
|
|
|
|
|
|
our @EXPORT_OK = qw(STRICT_READ STRICT_WRITE STRICT_ALL INSERT_BEGINNING INSERT_END); |
179
|
|
|
|
|
|
|
our @EXPORT = qw(); |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
our $AUTOLOAD; |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=head1 EXPORT |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
This module does not export anything by default. |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=head2 STRICT_READ |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
C is used with the C option, enabling error messages when |
190
|
|
|
|
|
|
|
reading header lines and inserting header data. |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=head2 STRICT_WRITE |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
C is used with the C option, enabling error messages when |
195
|
|
|
|
|
|
|
writing the data to a file/stream. |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=head2 STRICT_ALL |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
C is used with the C option, enabling C and |
200
|
|
|
|
|
|
|
C. |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=head2 INSERT_BEGINNING |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
C is used with L
|
205
|
|
|
|
|
|
|
\@data_row | $data_row | %data_row)"> or L
|
206
|
|
|
|
|
|
|
$unit [, $position]])"> to insert a data row or field at the beginning of their |
207
|
|
|
|
|
|
|
respective lists. |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=head2 INSERT_END |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
C is used with L
|
212
|
|
|
|
|
|
|
$data_row | %data_row)"> or L
|
213
|
|
|
|
|
|
|
$position]])"> to insert a data row or field at the end of their respective |
214
|
|
|
|
|
|
|
lists. |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
=cut |
217
|
|
|
|
|
|
|
|
218
|
280
|
|
|
280
|
1
|
7367
|
sub STRICT_READ {1} |
219
|
440
|
|
|
440
|
1
|
667
|
sub STRICT_WRITE {2} |
220
|
1
|
|
|
1
|
1
|
1757
|
sub STRICT_ALL {3} |
221
|
22
|
|
|
22
|
1
|
46
|
sub INSERT_BEGINNING {0} |
222
|
104
|
|
|
104
|
1
|
178
|
sub INSERT_END {-1} |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
my %DEFAULT_OPTIONS = ( |
225
|
|
|
|
|
|
|
default_headers => {}, |
226
|
|
|
|
|
|
|
headers => {}, |
227
|
|
|
|
|
|
|
preserve_case => 1, |
228
|
|
|
|
|
|
|
keep_slashes => 0, |
229
|
|
|
|
|
|
|
cache => 1, |
230
|
|
|
|
|
|
|
delete_missing_headers => 0, |
231
|
|
|
|
|
|
|
missing_data_to_undef => 1, |
232
|
|
|
|
|
|
|
preserve_comments => 1, |
233
|
|
|
|
|
|
|
add_empty_headers => 0, |
234
|
|
|
|
|
|
|
strict => STRICT_WRITE, |
235
|
|
|
|
|
|
|
fill_ancillary_data => 0, |
236
|
|
|
|
|
|
|
preserve_header => 0, |
237
|
|
|
|
|
|
|
); |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
#return values for ref() |
240
|
|
|
|
|
|
|
my %OPTION_TYPES = ( |
241
|
|
|
|
|
|
|
default_headers => [ 'ARRAY', 'HASH' ], |
242
|
|
|
|
|
|
|
headers => [ 'ARRAY', 'HASH' ], |
243
|
|
|
|
|
|
|
preserve_case => [''], |
244
|
|
|
|
|
|
|
keep_slashes => [''], |
245
|
|
|
|
|
|
|
cache => [''], |
246
|
|
|
|
|
|
|
delete_missing_headers => [''], |
247
|
|
|
|
|
|
|
missing_data_to_undef => [''], |
248
|
|
|
|
|
|
|
preserve_comments => [''], |
249
|
|
|
|
|
|
|
add_empty_headers => [''], |
250
|
|
|
|
|
|
|
strict => [''], |
251
|
|
|
|
|
|
|
fill_ancillary_data => [''], |
252
|
|
|
|
|
|
|
preserve_header => [''], |
253
|
|
|
|
|
|
|
); |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
# All headers required by STRICT_READ and STRICT_WRITE |
256
|
|
|
|
|
|
|
my @REQUIRED_HEADERS = qw( |
257
|
|
|
|
|
|
|
begin_header |
258
|
|
|
|
|
|
|
investigators |
259
|
|
|
|
|
|
|
affiliations |
260
|
|
|
|
|
|
|
contact |
261
|
|
|
|
|
|
|
experiment |
262
|
|
|
|
|
|
|
cruise |
263
|
|
|
|
|
|
|
data_file_name |
264
|
|
|
|
|
|
|
documents |
265
|
|
|
|
|
|
|
calibration_files |
266
|
|
|
|
|
|
|
data_type |
267
|
|
|
|
|
|
|
start_date |
268
|
|
|
|
|
|
|
end_date |
269
|
|
|
|
|
|
|
start_time |
270
|
|
|
|
|
|
|
end_time |
271
|
|
|
|
|
|
|
north_latitude |
272
|
|
|
|
|
|
|
south_latitude |
273
|
|
|
|
|
|
|
east_longitude |
274
|
|
|
|
|
|
|
west_longitude |
275
|
|
|
|
|
|
|
missing |
276
|
|
|
|
|
|
|
delimiter |
277
|
|
|
|
|
|
|
units |
278
|
|
|
|
|
|
|
end_header |
279
|
|
|
|
|
|
|
); |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
# Headers that must be specified, regardless of strictness. |
282
|
|
|
|
|
|
|
my @ABSOLUTELY_REQUIRED_HEADERS = qw( |
283
|
|
|
|
|
|
|
fields |
284
|
|
|
|
|
|
|
); |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
# Valid headers used for STRICT_READ and STRICT_WRITE |
287
|
|
|
|
|
|
|
our @ALL_HEADERS = qw( |
288
|
|
|
|
|
|
|
begin_header |
289
|
|
|
|
|
|
|
investigators |
290
|
|
|
|
|
|
|
affiliations |
291
|
|
|
|
|
|
|
contact |
292
|
|
|
|
|
|
|
experiment |
293
|
|
|
|
|
|
|
cruise |
294
|
|
|
|
|
|
|
station |
295
|
|
|
|
|
|
|
data_file_name |
296
|
|
|
|
|
|
|
documents |
297
|
|
|
|
|
|
|
calibration_files |
298
|
|
|
|
|
|
|
data_type |
299
|
|
|
|
|
|
|
data_status |
300
|
|
|
|
|
|
|
start_date |
301
|
|
|
|
|
|
|
end_date |
302
|
|
|
|
|
|
|
start_time |
303
|
|
|
|
|
|
|
end_time |
304
|
|
|
|
|
|
|
north_latitude |
305
|
|
|
|
|
|
|
south_latitude |
306
|
|
|
|
|
|
|
east_longitude |
307
|
|
|
|
|
|
|
west_longitude |
308
|
|
|
|
|
|
|
cloud_percent |
309
|
|
|
|
|
|
|
measurement_depth |
310
|
|
|
|
|
|
|
secchi_depth |
311
|
|
|
|
|
|
|
water_depth |
312
|
|
|
|
|
|
|
wave_height |
313
|
|
|
|
|
|
|
wind_speed |
314
|
|
|
|
|
|
|
missing |
315
|
|
|
|
|
|
|
below_detection_limit |
316
|
|
|
|
|
|
|
above_detection_limit |
317
|
|
|
|
|
|
|
delimiter |
318
|
|
|
|
|
|
|
fields |
319
|
|
|
|
|
|
|
units |
320
|
|
|
|
|
|
|
end_header |
321
|
|
|
|
|
|
|
); |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
# headers that are allowed but are not to be added during add_empty_headers |
324
|
|
|
|
|
|
|
my @HIDDEN_HEADERS = qw( |
325
|
|
|
|
|
|
|
received |
326
|
|
|
|
|
|
|
optical_depth_warning |
327
|
|
|
|
|
|
|
); |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
# what to set the missing value to if it's not defined |
330
|
|
|
|
|
|
|
our $DEFAULT_MISSING = -999; |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
# overly complex data structure only understandable by idiots |
333
|
|
|
|
|
|
|
# your IQ is the percentage chance that it won't make sense |
334
|
|
|
|
|
|
|
# IE: my IQ = 40, I have a 60% chance of it making sense to me |
335
|
|
|
|
|
|
|
my %ANCILLARY = ( |
336
|
|
|
|
|
|
|
'lat' => [ { 'north_latitude' => qr/^(.*?)$/ }, { 'south_latitude' => qr/^(.*?)$/ } ], |
337
|
|
|
|
|
|
|
'lon' => [ { 'east_longitude' => qr/^(.*?)$/ }, { 'west_longitude' => qr/^(.*?)$/ } ], |
338
|
|
|
|
|
|
|
'depth' => [ { 'measurement_depth' => qr/^(.*?)$/ }, ], |
339
|
|
|
|
|
|
|
'date_time' => [ '$date $time', ], |
340
|
|
|
|
|
|
|
'date' => [ [ \&julian_to_greg, '$year$julian' ], [ \&julian_to_greg, '$year$jd' ], '$year$month$day', ], |
341
|
|
|
|
|
|
|
'year' => [ { '$date' => qr/^(\d{4})/ }, { 'start_date' => qr/^(\d{4})/ }, ], |
342
|
|
|
|
|
|
|
'month' => [ { '$date' => qr/^\d{4}(\d{2})/ }, [ \&julian_to_greg, qr/^\d{4}(\d{2})\d{2}$/, '$year$julian' ], [ \&julian_to_greg, qr/^\d{4}(\d{2})\d{2}$/, '$year$jd' ], { 'start_date' => qr/^\d{4}(\d{2})/ }, ], |
343
|
|
|
|
|
|
|
'day' => [ { '$date' => qr/^\d{6}(\d{2})/ }, [ \&julian_to_greg, qr/^\d{4}\d{2}(\d{2})$/, '$year$julian' ], [ \&julian_to_greg, qr/^\d{4}\d{2}(\d{2})$/, '$year$jd' ], { 'start_date' => qr/^\d{6}(\d{2})/ }, ], |
344
|
|
|
|
|
|
|
'time' => [ '$hour:$minute:$second', ], |
345
|
|
|
|
|
|
|
'hour' => [ { '$time' => qr/^(\d+):/ }, { 'start_time' => qr/^(\d+):/ }, ], |
346
|
|
|
|
|
|
|
'minute' => [ { '$time' => qr/:(\d+):/ }, { 'start_time' => qr/:(\d+):/ }, ], |
347
|
|
|
|
|
|
|
'second' => [ { '$time' => qr/:(\d+)(?:[^:\d]|$)/ }, { 'start_time' => qr/:(\d+)(?:[^:\d]|$)/ }, ], |
348
|
|
|
|
|
|
|
'station' => [ { 'station' => qr/^(.*?)$/ }, ], |
349
|
|
|
|
|
|
|
); |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
# what fill_ancillary_data adds to each row |
352
|
|
|
|
|
|
|
#my @FILL_ANCILLARY_DATA = qw(date time date_time lat lon depth); |
353
|
|
|
|
|
|
|
my @FILL_ANCILLARY_DATA = keys(%ANCILLARY); |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
my %FIELD_FORMATTING = ( |
356
|
|
|
|
|
|
|
'year' => '%04d', |
357
|
|
|
|
|
|
|
'month' => '%02d', |
358
|
|
|
|
|
|
|
'day' => '%02d', |
359
|
|
|
|
|
|
|
'julian' => '%03d', |
360
|
|
|
|
|
|
|
'hour' => '%02d', |
361
|
|
|
|
|
|
|
'minute' => '%02d', |
362
|
|
|
|
|
|
|
'second' => '%02d', |
363
|
|
|
|
|
|
|
); |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
=head2 new([$filename,] [\%options]) |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
my $sb_file = SeaBASS::File->new("input_file.txt"); |
370
|
|
|
|
|
|
|
my $sb_file = SeaBASS::File->new("input_file.txt", { delete_missing_headers => 1 }); |
371
|
|
|
|
|
|
|
my $sb_file = SeaBASS::File->new("output_file.txt", { add_empty_headers => 1 }); |
372
|
|
|
|
|
|
|
my $sb_file = SeaBASS::File->new({ add_empty_headers => 1 }); |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
Creates a C object. If the file specified exists, the object |
375
|
|
|
|
|
|
|
can be used to read the file. If the file specified does not exist, an empty |
376
|
|
|
|
|
|
|
object is created and will be written to the specified file by default when |
377
|
|
|
|
|
|
|
invoking C. |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
Options should be given in a hash reference to ensure proper argument parsing. |
380
|
|
|
|
|
|
|
If a file is specified, options can be given as a hash list. |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
=over 4 |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
=item * default_headers |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=item * headers |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
These two options accept either an array reference or a hash reference. They |
389
|
|
|
|
|
|
|
are used to set or override header information. First, headers are read from |
390
|
|
|
|
|
|
|
C, then from the data file itself, then are overridden by |
391
|
|
|
|
|
|
|
whatever is in C. |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
Arguments are an array reference of header lines, or a hash reference of |
394
|
|
|
|
|
|
|
header/value pairs. |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
my $sb_file = SeaBASS::File->new({ |
397
|
|
|
|
|
|
|
default_headers => [ |
398
|
|
|
|
|
|
|
'/cruise=fake_cruise', |
399
|
|
|
|
|
|
|
'/experiment=default_experiment', |
400
|
|
|
|
|
|
|
], |
401
|
|
|
|
|
|
|
headers => { |
402
|
|
|
|
|
|
|
'experiment' => 'real_experiment', |
403
|
|
|
|
|
|
|
}, |
404
|
|
|
|
|
|
|
}); |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
B Modifying the delimiter or missing value will likely break the |
407
|
|
|
|
|
|
|
object. Modifying these will change the expected format for all rows. Do so |
408
|
|
|
|
|
|
|
with caution. |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
=item * preserve_case |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
C<1> or C<0>, default C<1>. Setting this to C<0> will change all values in the |
413
|
|
|
|
|
|
|
header to lowercase. Header descriptors (the /header part) are always turned |
414
|
|
|
|
|
|
|
to lowercase, as well as all fields and units. |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
=item * keep_slashes |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
C<1> or C<0>, default C<0>. Forces the object to keep the / in the beginning of |
419
|
|
|
|
|
|
|
headers when accessed. If set to C<1>, when using the L
|
420
|
|
|
|
|
|
|
\%new_headers | \@get_headers | @get_headers ])"> function, they will be |
421
|
|
|
|
|
|
|
returned with leading slash. |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
=item * cache |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
C<1> or C<0>, default C<1>. Enables caching data rows as they are read. This |
426
|
|
|
|
|
|
|
speeds up re-reads and allows the data to be modified. This is required for |
427
|
|
|
|
|
|
|
writing files. |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
=item * delete_missing_headers |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
C<1> or C<0>, default C<0>. Any headers that are equal to the /missing header, |
432
|
|
|
|
|
|
|
NA, or are not defined (when using the C options) are |
433
|
|
|
|
|
|
|
deleted. They cannot be retrieved using C and will not be written. |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
=item * missing_data_to_undef |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
C<1> or C<0>, default C<1>. If any values in the data block are equal to the |
438
|
|
|
|
|
|
|
/missing, /above_detection_limit, /below_detection_limit headers, they are set |
439
|
|
|
|
|
|
|
to undef when they are retrieved. |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
=item * preserve_comments |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
C<1> or C<0>, default C<1>. Setting this option to zero will discard any |
444
|
|
|
|
|
|
|
comments found in the header. |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
=item * add_empty_headers |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
C<0>, C<1>, or a string. If set to a string, this will populate any missing |
449
|
|
|
|
|
|
|
headers, including optional ones, and will set their value to the string given. |
450
|
|
|
|
|
|
|
If set to 1, the string 'NA' is used. This option disables C. |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=item * strict |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
my $sb_file = SeaBASS::File->new("input_file.txt", {strict => STRICT_ALL}); |
455
|
|
|
|
|
|
|
my $sb_file = SeaBASS::File->new("input_file.txt", {strict => (STRICT_READ | STRICT_WRITE)}); |
456
|
|
|
|
|
|
|
my $sb_file = SeaBASS::File->new("input_file.txt", {strict => 0}); |
457
|
|
|
|
|
|
|
my $sb_file = SeaBASS::File->new("input_file.txt", {strict => STRICT_WRITE}); #default |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=over 4 |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=item * C |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
C will throw errors when reading invalid headers, missing required |
464
|
|
|
|
|
|
|
ones, or an invalid delimiter. This may change in future revisions. |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
=item * C |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
C will throw the same errors when writing the data to a file or |
469
|
|
|
|
|
|
|
stream. C only checks for required headers and invalid headers, |
470
|
|
|
|
|
|
|
but does not check their values to see if they are actually filled. This may |
471
|
|
|
|
|
|
|
change in future revisions. |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=back |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
=item * fill_ancillary_data |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
C<0> or C<1>, default C<0>. Insert date, time, measurement depth, station, and |
478
|
|
|
|
|
|
|
location values to the data rows from the headers. Values are not overridden |
479
|
|
|
|
|
|
|
if they are already present. This option is only useful when reading files. |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
B It is bad practice to include these fields in the data if they don't |
482
|
|
|
|
|
|
|
change throughout the file. This option is used to remove the burden of |
483
|
|
|
|
|
|
|
checking whether they are in the data or header. |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
B This option will also combine individual date/time |
486
|
|
|
|
|
|
|
parts in the data (year/month/day/etc) to create more uniform date/time fields. |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
B If any part of a date/time is missing, the fields |
489
|
|
|
|
|
|
|
dependent on it will not be added to the row. |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
=item * preserve_header |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
C<0> or C<1>, default C<0>. Preserves header and comment order. This option |
494
|
|
|
|
|
|
|
disables modifying the header, as well, but will not error if you try -- it will |
495
|
|
|
|
|
|
|
simply not be reflected in the output. |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
=back |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
=cut |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
sub new { |
502
|
137
|
|
|
137
|
1
|
163101
|
my ( $class, $file ) = ( shift, shift ); |
503
|
|
|
|
|
|
|
|
504
|
137
|
|
|
|
|
219
|
my $self = bless( {}, $class ); |
505
|
|
|
|
|
|
|
|
506
|
137
|
|
|
|
|
155
|
my %myoptions; |
507
|
|
|
|
|
|
|
|
508
|
137
|
100
|
|
|
|
472
|
if ( ref($file) eq 'HASH' ) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
509
|
4
|
|
|
|
|
32
|
%myoptions = ( %DEFAULT_OPTIONS, %$file ); |
510
|
4
|
|
|
|
|
8
|
$file = ''; |
511
|
|
|
|
|
|
|
} elsif ( ref( $_[0] ) eq 'HASH' ) { |
512
|
125
|
|
|
|
|
403
|
%myoptions = ( %DEFAULT_OPTIONS, %{ $_[0] } ); |
|
125
|
|
|
|
|
692
|
|
513
|
|
|
|
|
|
|
} elsif ( !ref( $_[0] ) ) { |
514
|
8
|
100
|
|
|
|
23
|
if ( $#_ % 2 == 1 ) { |
515
|
7
|
|
|
|
|
65
|
%myoptions = ( %DEFAULT_OPTIONS, @_ ); |
516
|
|
|
|
|
|
|
} else { |
517
|
1
|
|
|
|
|
187
|
croak('Even sized list expected'); |
518
|
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
|
} else { |
520
|
0
|
|
|
|
|
0
|
croak("Arguments not understood."); |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
|
523
|
136
|
|
|
|
|
363
|
$self->{'options'} = \%myoptions; |
524
|
136
|
|
|
|
|
330
|
$self->check_options(); |
525
|
|
|
|
|
|
|
|
526
|
132
|
50
|
|
|
|
371
|
if ( ref($file) eq 'GLOB' ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
527
|
0
|
|
|
|
|
0
|
$self->{'handle'} = $file; |
528
|
|
|
|
|
|
|
} elsif ( ref($file) eq 'SCALAR' ) { |
529
|
126
|
|
|
|
|
1368
|
open( my $fh, "<", $file ); |
530
|
126
|
|
|
|
|
250
|
$self->{'handle'} = $fh; |
531
|
|
|
|
|
|
|
} elsif ($file) { |
532
|
2
|
100
|
|
|
|
7
|
if ( !ref($file) ) { |
533
|
1
|
50
|
|
|
|
31
|
if ( -r $file ) { |
|
|
0
|
|
|
|
|
|
534
|
1
|
|
|
|
|
33
|
open( my $fh, "<", $file ); |
535
|
1
|
|
|
|
|
22
|
$self->{'handle'} = $fh; |
536
|
|
|
|
|
|
|
} elsif ( $self->{'options'}{'strict'} & STRICT_READ ) { |
537
|
0
|
|
|
|
|
0
|
croak("Strict read set, but input file not found or unreadable."); |
538
|
|
|
|
|
|
|
} else { |
539
|
0
|
|
|
|
|
0
|
$self->{'default_write_to'} = $file; |
540
|
0
|
|
|
|
|
0
|
$file = ''; |
541
|
|
|
|
|
|
|
} |
542
|
|
|
|
|
|
|
} else { |
543
|
1
|
|
|
|
|
94
|
croak("Invalid parameter, expected file path or file handle."); |
544
|
|
|
|
|
|
|
} |
545
|
|
|
|
|
|
|
} ## end elsif ($file) |
546
|
131
|
100
|
|
|
|
284
|
if ($file) { |
547
|
127
|
50
|
|
|
|
527
|
unless ( $self->read_headers() ) { |
548
|
0
|
0
|
|
|
|
0
|
unless ( $self->{'options'}{'strict'} & STRICT_READ ) { |
549
|
0
|
|
|
|
|
0
|
return; |
550
|
|
|
|
|
|
|
} |
551
|
|
|
|
|
|
|
} |
552
|
|
|
|
|
|
|
} else { |
553
|
4
|
|
|
|
|
11
|
$self->create_blank_file(); |
554
|
|
|
|
|
|
|
} |
555
|
127
|
|
|
|
|
303
|
return $self; |
556
|
|
|
|
|
|
|
} ## end sub new |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
=head1 OBJECT METHODS |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
=head2 add_headers(\%headers | \@header_lines | @header_lines) |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
$sb_file->add_headers({'investigators' => 'jason_lefler'}); |
563
|
|
|
|
|
|
|
$sb_file->add_headers(['/investigators=jason_lefler']); |
564
|
|
|
|
|
|
|
$sb_file->add_headers('/investigators=jason_lefler'); |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
C is used to add or override metadata for a C, as |
567
|
|
|
|
|
|
|
well as add comments. |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
This function can not be used to change fields/units, see |
570
|
|
|
|
|
|
|
L and |
571
|
|
|
|
|
|
|
L for that. |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
B Modifying the delimiter or missing value halfway through |
574
|
|
|
|
|
|
|
reading/writing will likely break the object. Modifying these will change the |
575
|
|
|
|
|
|
|
expected format for any new or non-cached rows. Do so with caution. |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
=cut |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
sub add_headers { |
580
|
394
|
|
|
394
|
1
|
334
|
my $self = shift; |
581
|
394
|
|
|
|
|
315
|
my $success = 1; |
582
|
394
|
|
|
|
|
556
|
my $strict = $self->{'options'}{'strict'} & STRICT_WRITE; |
583
|
394
|
100
|
|
|
|
806
|
if ( ref( $_[0] ) eq 'HASH' ) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
584
|
257
|
|
|
|
|
222
|
while ( my ( $k, $v ) = each( %{ $_[0] } ) ) { |
|
265
|
|
|
|
|
729
|
|
585
|
8
|
|
|
|
|
17
|
$success &= $self->validate_header( $k, $v, $strict ); |
586
|
8
|
|
|
|
|
17
|
$self->{'headers'}{$k} = $v; |
587
|
|
|
|
|
|
|
} |
588
|
|
|
|
|
|
|
} elsif ( ref( $_[0] ) eq 'ARRAY' ) { |
589
|
136
|
|
|
|
|
111
|
foreach ( @{ $_[0] } ) { |
|
136
|
|
|
|
|
251
|
|
590
|
2563
|
100
|
100
|
|
|
5354
|
if ( $_ =~ /^\s*!/ ) { |
|
|
100
|
|
|
|
|
|
591
|
11
|
|
|
|
|
6
|
push( @{ $self->{'comments'} }, $_ ); |
|
11
|
|
|
|
|
18
|
|
592
|
|
|
|
|
|
|
} elsif ( $strict && $_ !~ m"^/" ) { |
593
|
2
|
|
|
|
|
340
|
carp("Invalid header line: $_"); |
594
|
2
|
|
|
|
|
197
|
$success = 0; |
595
|
|
|
|
|
|
|
} else { |
596
|
2550
|
|
|
|
|
4223
|
my ( $k, $v ) = split( /=/, $_, 2 ); |
597
|
2550
|
|
|
|
|
3444
|
$success &= $self->validate_header( $k, $v, $strict ); |
598
|
2550
|
|
|
|
|
4186
|
$self->{'headers'}{$k} = $v; |
599
|
|
|
|
|
|
|
} |
600
|
|
|
|
|
|
|
} ## end foreach (@{$_[0]}) |
601
|
|
|
|
|
|
|
} elsif ( !ref( $_[0] ) ) { |
602
|
1
|
|
|
|
|
4
|
foreach (@_) { |
603
|
1
|
50
|
33
|
|
|
10
|
if ( $_ =~ /^\s*!/ ) { |
|
|
50
|
|
|
|
|
|
604
|
0
|
|
|
|
|
0
|
push( @{ $self->{'comments'} }, $_ ); |
|
0
|
|
|
|
|
0
|
|
605
|
|
|
|
|
|
|
} elsif ( $strict && $_ !~ m"^/" ) { |
606
|
0
|
|
|
|
|
0
|
carp("Invalid header line: $_"); |
607
|
0
|
|
|
|
|
0
|
$success = 0; |
608
|
|
|
|
|
|
|
} else { |
609
|
1
|
|
|
|
|
5
|
my ( $k, $v ) = split( /=/, $_, 2 ); |
610
|
1
|
|
|
|
|
4
|
$success &= $self->validate_header( $k, $v, $strict ); |
611
|
1
|
|
|
|
|
3
|
$self->{'headers'}{$k} = $v; |
612
|
|
|
|
|
|
|
} |
613
|
|
|
|
|
|
|
} ## end foreach (@_) |
614
|
|
|
|
|
|
|
} else { |
615
|
0
|
|
|
|
|
0
|
$success = 0; |
616
|
|
|
|
|
|
|
} |
617
|
394
|
|
|
|
|
437
|
return $success; |
618
|
|
|
|
|
|
|
} ## end sub add_headers |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
=head2 headers([ \%new_headers | \@get_headers | @get_headers ]) |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
=head2 head |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
=head2 h |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
my %headers = $sb_file->headers(['investigators']); |
627
|
|
|
|
|
|
|
print Dumper(\%headers); # { investigators => 'jason_lefler' } |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
my ($inv) = $sb_file->headers('investigators'); |
630
|
|
|
|
|
|
|
print $inv; # jason_lefler |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
$sb_file->headers({investigators => 'jason_lefler'}); |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
$sb_file->headers()->{'investigators'} = 'jason_lefler'; |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
C is used to read or modify header values. Given an array reference |
637
|
|
|
|
|
|
|
of header names, it will return a hash/hash reference with header/value pairs. |
638
|
|
|
|
|
|
|
Given a plain list of header names, it will return an array/array reference of |
639
|
|
|
|
|
|
|
the given header values. Given a hash reference, this function is a proxy for |
640
|
|
|
|
|
|
|
L. |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
If C is set, then headers will be returned as such, IE: C<< |
643
|
|
|
|
|
|
|
{'/investigators' => 'jason_lefler'} >>. |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
This function can also be used to set header values without going through the |
646
|
|
|
|
|
|
|
normal validation. |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
C and C are aliases to C. |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
=cut |
651
|
|
|
|
|
|
|
|
652
|
0
|
|
|
0
|
1
|
0
|
sub head { shift->headers(@_); } |
653
|
22
|
|
|
22
|
1
|
839
|
sub h { shift->headers(@_); } |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
sub headers { |
656
|
25
|
|
|
25
|
1
|
718
|
my $self = shift; |
657
|
25
|
100
|
|
|
|
55
|
if ( !@_ ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
658
|
21
|
|
|
|
|
104
|
return $self->{'headers'}; |
659
|
|
|
|
|
|
|
} elsif ( ref( $_[0] ) eq 'HASH' ) { |
660
|
1
|
|
|
|
|
3
|
return $self->add_headers(@_); |
661
|
|
|
|
|
|
|
} elsif ( ref( $_[0] ) eq 'ARRAY' ) { |
662
|
1
|
|
|
|
|
27
|
my %ret; |
663
|
1
|
|
|
|
|
2
|
for my $header ( @{ $_[0] } ) { |
|
1
|
|
|
|
|
5
|
|
664
|
2
|
|
|
|
|
5
|
$ret{$header} = $self->{'headers'}{ lc($header) }; |
665
|
|
|
|
|
|
|
} |
666
|
1
|
50
|
|
|
|
2
|
if (wantarray) { |
667
|
1
|
|
|
|
|
8
|
return %ret; |
668
|
|
|
|
|
|
|
} else { |
669
|
0
|
|
|
|
|
0
|
return \%ret; |
670
|
|
|
|
|
|
|
} |
671
|
|
|
|
|
|
|
} else { |
672
|
2
|
|
|
|
|
2
|
my @ret; |
673
|
2
|
|
|
|
|
5
|
foreach (@_) { |
674
|
4
|
50
|
|
|
|
7
|
if ( !ref ) { |
675
|
4
|
|
|
|
|
9
|
my $value = $self->{'headers'}{ lc($_) }; |
676
|
4
|
50
|
|
|
|
10
|
push( @ret, defined($value) ? $value : undef ); |
677
|
|
|
|
|
|
|
} else { |
678
|
0
|
|
|
|
|
0
|
croak("Argument not understood: $_"); |
679
|
|
|
|
|
|
|
} |
680
|
|
|
|
|
|
|
} ## end foreach (@_) |
681
|
2
|
100
|
|
|
|
8
|
if (wantarray) { |
|
|
50
|
|
|
|
|
|
682
|
1
|
|
|
|
|
4
|
return @ret; |
683
|
|
|
|
|
|
|
} elsif ( $#ret == 0 ) { |
684
|
0
|
|
|
|
|
0
|
return $ret[0]; |
685
|
|
|
|
|
|
|
} else { |
686
|
1
|
|
|
|
|
3
|
return \@ret; |
687
|
|
|
|
|
|
|
} |
688
|
|
|
|
|
|
|
} ## end else [ if (!@_) ] |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
} ## end sub headers |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
=head2 data([$index]) |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
=head2 d |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
=head2 body |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
=head2 b |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
=head2 all |
701
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
my $row = $sb_file->data(1); |
703
|
|
|
|
|
|
|
my @rows = $sb_file->all(); |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
C is responsible for returning either a data line via an index or all of |
706
|
|
|
|
|
|
|
the data lines at once. |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
Data is returned as C<< field => value >> pairs. |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
If given an index: in list context, returns the hash of the row; in scalar |
711
|
|
|
|
|
|
|
context, returns a reference to the row. |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
If not given an index: in list context, returns an array of the rows; in scalar |
714
|
|
|
|
|
|
|
context, returns a reference to an array of the rows. |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
If given an index out of range, returns undef. If given a negative index, |
717
|
|
|
|
|
|
|
Cs the file, then returns undef. |
718
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
If C is enabled and the row has already been read, it is retrieved from |
720
|
|
|
|
|
|
|
the cache. If it has not already be read, all rows leading up to the desired |
721
|
|
|
|
|
|
|
row will be read and cached, and the desired row returned. |
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
If C is disabled and either all rows are retrieved or a previously |
724
|
|
|
|
|
|
|
retrieved row is called again, the file will C, then seek to the |
725
|
|
|
|
|
|
|
desired row. |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
C, C, C, and C are all aliases to C. (Yes, that means |
728
|
|
|
|
|
|
|
C can be used with arguments, it would just look silly.) |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
=cut |
731
|
|
|
|
|
|
|
|
732
|
0
|
|
|
0
|
1
|
0
|
sub body { shift->data(@_); } |
733
|
0
|
|
|
0
|
1
|
0
|
sub b { shift->data(@_); } |
734
|
2
|
|
|
2
|
1
|
998
|
sub d { shift->data(@_); } |
735
|
42
|
|
|
42
|
1
|
4898
|
sub all { shift->data(@_); } |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
sub data { |
738
|
130
|
|
|
130
|
1
|
4594
|
my ( $self, $index ) = @_; |
739
|
130
|
100
|
|
|
|
215
|
if ( defined($index) ) { |
740
|
78
|
100
|
|
|
|
152
|
if ( $index < 0 ) { |
741
|
12
|
|
|
|
|
17
|
$self->rewind(); |
742
|
12
|
|
|
|
|
12
|
return; |
743
|
|
|
|
|
|
|
} |
744
|
66
|
100
|
|
|
|
122
|
if ( $self->{'options'}{'cache'} ) { |
745
|
60
|
100
|
|
|
|
109
|
if ( $index > $self->{'max_dataidx'} ) { |
746
|
36
|
|
|
|
|
32
|
my $startidx = $self->{'dataidx'}; |
747
|
36
|
|
|
|
|
80
|
for ( my $i = 0; $i < ( $index - $startidx ); $i++ ) { |
748
|
70
|
100
|
|
|
|
109
|
if ( !$self->next() ) { |
749
|
4
|
|
|
|
|
9
|
return; |
750
|
|
|
|
|
|
|
} |
751
|
|
|
|
|
|
|
} |
752
|
|
|
|
|
|
|
} ## end if ($index > $self->{'max_dataidx'...}) |
753
|
|
|
|
|
|
|
|
754
|
56
|
|
|
|
|
50
|
$self->{'dataidx'} = $index; |
755
|
|
|
|
|
|
|
|
756
|
56
|
50
|
|
|
|
79
|
if (wantarray) { |
757
|
0
|
|
|
|
|
0
|
return %{ $self->{'data'}[$index] }; |
|
0
|
|
|
|
|
0
|
|
758
|
|
|
|
|
|
|
} else { |
759
|
56
|
|
|
|
|
239
|
return $self->{'data'}[$index]; |
760
|
|
|
|
|
|
|
} |
761
|
|
|
|
|
|
|
} else { |
762
|
6
|
100
|
|
|
|
17
|
if ( $index <= $self->{'dataidx'} ) { |
763
|
3
|
|
|
|
|
7
|
$self->rewind(); |
764
|
|
|
|
|
|
|
} |
765
|
6
|
|
|
|
|
8
|
my $startidx = $self->{'dataidx'}; |
766
|
6
|
|
|
|
|
16
|
for ( my $i = 0; $i < ( $index - $startidx - 1 ); $i++ ) { |
767
|
7
|
50
|
|
|
|
16
|
if ( !$self->next() ) { |
768
|
0
|
|
|
|
|
0
|
return; |
769
|
|
|
|
|
|
|
} |
770
|
|
|
|
|
|
|
} |
771
|
6
|
|
|
|
|
11
|
return $self->next(); |
772
|
|
|
|
|
|
|
} ## end else [ if ($self->{'options'}...)] |
773
|
|
|
|
|
|
|
} else { |
774
|
52
|
100
|
|
|
|
114
|
if ( $self->{'options'}{'cache'} ) { |
775
|
45
|
|
|
|
|
88
|
while ( $self->next() ) { |
776
|
|
|
|
|
|
|
# noop |
777
|
|
|
|
|
|
|
} |
778
|
45
|
100
|
|
|
|
83
|
if (wantarray) { |
779
|
7
|
|
|
|
|
5
|
return @{ $self->{'data'} }; |
|
7
|
|
|
|
|
27
|
|
780
|
|
|
|
|
|
|
} else { |
781
|
38
|
|
|
|
|
131
|
return $self->{'data'}; |
782
|
|
|
|
|
|
|
} |
783
|
|
|
|
|
|
|
} else { |
784
|
7
|
|
|
|
|
18
|
$self->rewind(); |
785
|
7
|
|
|
|
|
6
|
my @data_rows; |
786
|
7
|
|
|
|
|
15
|
while ( my $data = $self->next() ) { |
787
|
28
|
|
|
|
|
57
|
push( @data_rows, $data ); |
788
|
|
|
|
|
|
|
} |
789
|
7
|
100
|
|
|
|
17
|
if (wantarray) { |
790
|
6
|
|
|
|
|
25
|
return @data_rows; |
791
|
|
|
|
|
|
|
} else { |
792
|
1
|
|
|
|
|
4
|
return \@data_rows; |
793
|
|
|
|
|
|
|
} |
794
|
|
|
|
|
|
|
} ## end else [ if ($self->{'options'}...)] |
795
|
|
|
|
|
|
|
} ## end else [ if (defined($index)) ] |
796
|
|
|
|
|
|
|
} ## end sub data |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
=head2 next() |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
while (my $row = $sb_file->next()){ |
801
|
|
|
|
|
|
|
print $row->{'lat'}; |
802
|
|
|
|
|
|
|
... |
803
|
|
|
|
|
|
|
} |
804
|
|
|
|
|
|
|
while (my %row = $sb_file->next()){ |
805
|
|
|
|
|
|
|
print $row{'lat'}; |
806
|
|
|
|
|
|
|
... |
807
|
|
|
|
|
|
|
} |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
Returns the next data row in the file, returning C when it runs out of |
810
|
|
|
|
|
|
|
rows. |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
Data is returned as C<< field => value >> pairs. |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
In list context, returns a hash of the row. In scalar context, returns a |
815
|
|
|
|
|
|
|
reference to the hash of a row. |
816
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
After a C, C will return the very first data hash, then each row |
818
|
|
|
|
|
|
|
in turn. If the row has been cached, it's retrieved from the cache instead of |
819
|
|
|
|
|
|
|
rereading from the file. |
820
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
=cut |
822
|
|
|
|
|
|
|
|
823
|
|
|
|
|
|
|
sub next { |
824
|
426
|
|
|
426
|
1
|
12639
|
my $self = shift; |
825
|
426
|
50
|
|
|
|
641
|
if (@_) { |
826
|
0
|
|
|
|
|
0
|
croak("invalid number of arguments on next(), expected 0."); |
827
|
|
|
|
|
|
|
} |
828
|
|
|
|
|
|
|
|
829
|
426
|
100
|
100
|
|
|
1588
|
if ( $self->{'options'}{'cache'} && $self->{'dataidx'} < $self->{'max_dataidx'} ) { |
|
|
100
|
|
|
|
|
|
830
|
73
|
|
|
|
|
68
|
$self->{'dataidx'}++; |
831
|
|
|
|
|
|
|
|
832
|
73
|
50
|
|
|
|
92
|
if (wantarray) { |
833
|
0
|
|
|
|
|
0
|
return %{ $self->{'data'}[ $self->{'dataidx'} ] }; |
|
0
|
|
|
|
|
0
|
|
834
|
|
|
|
|
|
|
} else { |
835
|
73
|
|
|
|
|
221
|
return $self->{'data'}[ $self->{'dataidx'} ]; |
836
|
|
|
|
|
|
|
} |
837
|
|
|
|
|
|
|
} elsif ( $self->{'handle'} ) { |
838
|
341
|
|
|
|
|
292
|
my $handle = $self->{'handle'}; |
839
|
341
|
|
|
|
|
279
|
my $line_number = $self->{'line_number'}; |
840
|
|
|
|
|
|
|
|
841
|
341
|
|
|
|
|
1025
|
while ( my $line = <$handle> ) { |
842
|
263
|
|
|
|
|
195
|
$line_number++; |
843
|
263
|
|
|
|
|
376
|
strip($line); |
844
|
263
|
50
|
|
|
|
386
|
if ($line) { |
845
|
263
|
|
|
|
|
469
|
my $data_row = $self->make_data_hash($line); |
846
|
263
|
|
|
|
|
318
|
$self->{'line_number'} = $line_number; |
847
|
263
|
100
|
|
|
|
417
|
if ( $self->{'options'}{'cache'} ) { |
848
|
201
|
|
|
|
|
152
|
push( @{ $self->{'data'} }, $data_row ); |
|
201
|
|
|
|
|
271
|
|
849
|
|
|
|
|
|
|
} |
850
|
263
|
|
|
|
|
243
|
$self->{'dataidx'}++; |
851
|
263
|
100
|
|
|
|
415
|
if ( $self->{'dataidx'} > $self->{'max_dataidx'} ) { |
852
|
235
|
|
|
|
|
219
|
$self->{'max_dataidx'} = $self->{'dataidx'}; |
853
|
|
|
|
|
|
|
} |
854
|
263
|
50
|
|
|
|
304
|
if (wantarray) { |
855
|
0
|
|
|
|
|
0
|
return %{$data_row}; |
|
0
|
|
|
|
|
0
|
|
856
|
|
|
|
|
|
|
} else { |
857
|
263
|
|
|
|
|
779
|
return $data_row; |
858
|
|
|
|
|
|
|
} |
859
|
|
|
|
|
|
|
} ## end if ($line) |
860
|
|
|
|
|
|
|
} ## end while (my $line = <$handle>) |
861
|
|
|
|
|
|
|
} ## end elsif ($self->{'handle'}) |
862
|
90
|
|
|
|
|
194
|
return; |
863
|
|
|
|
|
|
|
} ## end sub next |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
=head2 rewind() |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
C seeks to the start of the data. The next C will return the |
868
|
|
|
|
|
|
|
very first row (or C). If caching is enabled, it will not actually |
869
|
|
|
|
|
|
|
perform a seek, it will merely reset the index interator. If caching is |
870
|
|
|
|
|
|
|
disabled, a seek is performed on the file handle to return to the start of the |
871
|
|
|
|
|
|
|
data. |
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
=cut |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
sub rewind { |
876
|
61
|
|
|
61
|
1
|
991
|
my ($self) = @_; |
877
|
61
|
100
|
|
|
|
131
|
if ( $self->{'dataidx'} != -1 ) { |
878
|
31
|
100
|
|
|
|
62
|
if ( !$self->{'options'}{'cache'} ) { |
879
|
9
|
|
|
|
|
20
|
seek( $self->{'handle'}, $self->{'data_start_position'}, SEEK_SET ); |
880
|
|
|
|
|
|
|
} |
881
|
31
|
|
|
|
|
32
|
$self->{'line_number'} = $self->{'data_start_line'}; |
882
|
31
|
|
|
|
|
40
|
$self->{'dataidx'} = -1; |
883
|
|
|
|
|
|
|
} ## end if ($self->{'dataidx'}...) |
884
|
|
|
|
|
|
|
} ## end sub rewind |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
=head2 update(\%data_row | \@data_row | $data_row | %data_row) |
887
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
while (my %row = $sb_file->next()){ |
889
|
|
|
|
|
|
|
if ($row{'depth'} == -999){ |
890
|
|
|
|
|
|
|
$row{'depth'} = 0; |
891
|
|
|
|
|
|
|
} |
892
|
|
|
|
|
|
|
$sb_file->update(\%row); |
893
|
|
|
|
|
|
|
} |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
# Less useful for update(): |
896
|
|
|
|
|
|
|
print join(',',@{$sb_file->actual_fields()}); #lat,lon,depth,chl |
897
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
while (my %row = $sb_file->next()){ |
899
|
|
|
|
|
|
|
if ($row{'depth'} == -999){ |
900
|
|
|
|
|
|
|
$row{'depth'} = 0; |
901
|
|
|
|
|
|
|
} |
902
|
|
|
|
|
|
|
$sb_file->update(@row{'lat','lon','depth','chl'}); |
903
|
|
|
|
|
|
|
# or |
904
|
|
|
|
|
|
|
$sb_file->update([@row{'lat','lon','depth','chl'}]); |
905
|
|
|
|
|
|
|
} |
906
|
|
|
|
|
|
|
|
907
|
|
|
|
|
|
|
C replaces the last row read (using C) with the input. |
908
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
Caching must be enabled to use C, C, or C. |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
=cut |
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
sub update { |
914
|
12
|
|
|
12
|
1
|
50
|
my $self = shift; |
915
|
12
|
100
|
|
|
|
32
|
if ( !$self->{'options'}{'cache'} ) { |
|
|
100
|
|
|
|
|
|
916
|
1
|
|
|
|
|
123
|
croak("Caching must be enabled to write."); |
917
|
|
|
|
|
|
|
} elsif ( $self->{'dataidx'} == -1 ) { |
918
|
1
|
|
|
|
|
223
|
croak("No rows read yet."); |
919
|
|
|
|
|
|
|
} |
920
|
10
|
|
|
|
|
19
|
my $new_row = $self->ingest_row(@_); |
921
|
10
|
100
|
|
|
|
20
|
unless ( defined($new_row) ) { |
922
|
2
|
|
|
|
|
141
|
croak("Error parsing inputs"); |
923
|
|
|
|
|
|
|
} |
924
|
8
|
|
|
|
|
14
|
$self->{'data'}[ $self->{'dataidx'} ] = $new_row; |
925
|
|
|
|
|
|
|
} ## end sub update |
926
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
=head2 set($index, \%data_row | \@data_row | $data_row | %data_row) |
928
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
my %row = (lat => 1, lon => 2, chl => 1); |
930
|
|
|
|
|
|
|
$sb_file->set(0, \%row); |
931
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
print join(',',@{$sb_file->actual_fields()}); #lat,lon,chl |
933
|
|
|
|
|
|
|
$sb_file->set(0, [1, 2, 1]); |
934
|
|
|
|
|
|
|
|
935
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
C replaces the row at the given index with the input. Seeks to the |
937
|
|
|
|
|
|
|
given index if it has not been read to yet. Cs if the file does not go |
938
|
|
|
|
|
|
|
up to the index specified. |
939
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
Caching must be enabled to use C, C, or C. |
941
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
=cut |
943
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
sub set { |
945
|
3
|
|
|
3
|
1
|
13
|
my $self = shift; |
946
|
3
|
|
|
|
|
4
|
my $index = shift; |
947
|
3
|
50
|
|
|
|
9
|
if ( !$self->{'options'}{'cache'} ) { |
948
|
0
|
|
|
|
|
0
|
croak("Caching must be enabled to write"); |
949
|
|
|
|
|
|
|
} |
950
|
3
|
100
|
|
|
|
6
|
if ( $index < 0 ) { |
951
|
1
|
|
|
|
|
161
|
croak("Index must be positive integer"); |
952
|
|
|
|
|
|
|
} |
953
|
2
|
|
|
|
|
6
|
my $new_row = $self->ingest_row(@_); |
954
|
2
|
50
|
|
|
|
5
|
unless ( defined($new_row) ) { |
955
|
0
|
|
|
|
|
0
|
croak("Error parsing inputs"); |
956
|
|
|
|
|
|
|
} |
957
|
|
|
|
|
|
|
|
958
|
2
|
50
|
|
|
|
4
|
if ( $index > $self->{'max_dataidx'} ) { |
959
|
2
|
|
|
|
|
2
|
my $current_idx = $self->{'dataidx'}; |
960
|
2
|
|
|
|
|
5
|
$self->data($index); |
961
|
2
|
|
|
|
|
2
|
$self->{'dataidx'} = $current_idx; |
962
|
|
|
|
|
|
|
|
963
|
2
|
100
|
|
|
|
5
|
if ( $index > $self->{'max_dataidx'} ) { |
964
|
1
|
|
|
|
|
81
|
croak("Index out of bounds."); |
965
|
|
|
|
|
|
|
} |
966
|
|
|
|
|
|
|
} ## end if ($index > $self->{'max_dataidx'...}) |
967
|
|
|
|
|
|
|
|
968
|
1
|
|
|
|
|
3
|
$self->{'data'}[$index] = $new_row; |
969
|
|
|
|
|
|
|
} ## end sub set |
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
=head2 insert($index, \%data_row | \@data_row | $data_row | %data_row) |
972
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
use SeaBASS::File qw(INSERT_BEGINNING INSERT_END); |
974
|
|
|
|
|
|
|
... |
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
my %row = (lat => 1, lon => 2, chl => 1); |
977
|
|
|
|
|
|
|
$sb_file->insert(INSERT_BEGINNING, \%row); |
978
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
print join(',',@{$sb_file->actual_fields()}); #lat,lon,chl |
980
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
$sb_file->insert(1, [1, 2, 1]); |
982
|
|
|
|
|
|
|
$sb_file->insert(INSERT_END, [1, 2, 1]); |
983
|
|
|
|
|
|
|
|
984
|
|
|
|
|
|
|
Inserts the row into the given position. C inserts a new row |
985
|
|
|
|
|
|
|
at the start of the data, C inserts one at the end of the data |
986
|
|
|
|
|
|
|
block. |
987
|
|
|
|
|
|
|
|
988
|
|
|
|
|
|
|
The index must be a positive integer, C, or C. |
989
|
|
|
|
|
|
|
|
990
|
|
|
|
|
|
|
If a row is inserted at the end, the entire data block is read from the file to |
991
|
|
|
|
|
|
|
cache every row, the row is appended to the end, and the current position is |
992
|
|
|
|
|
|
|
reset to the original position, so C will still return the real next |
993
|
|
|
|
|
|
|
row from the data. |
994
|
|
|
|
|
|
|
|
995
|
|
|
|
|
|
|
If a row is inserted before the current position, the current position is |
996
|
|
|
|
|
|
|
shifted accordingly and will still return the C real row. |
997
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
Caching must be enabled to use C, C, or C. |
999
|
|
|
|
|
|
|
|
1000
|
|
|
|
|
|
|
=cut |
1001
|
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
sub insert { |
1003
|
16
|
|
|
16
|
1
|
25
|
my $self = shift; |
1004
|
16
|
|
|
|
|
12
|
my $index = shift; |
1005
|
16
|
50
|
|
|
|
31
|
if ( !$self->{'options'}{'cache'} ) { |
1006
|
0
|
|
|
|
|
0
|
croak("Caching must be enabled to write."); |
1007
|
|
|
|
|
|
|
} |
1008
|
16
|
100
|
|
|
|
20
|
if ( $index < INSERT_END ) { |
1009
|
1
|
|
|
|
|
158
|
croak("Index must be positive integer, or INSERT_BEGINNING (beginning), or INSERT_END (end)"); |
1010
|
|
|
|
|
|
|
} |
1011
|
15
|
|
|
|
|
30
|
my $new_row = $self->ingest_row(@_); |
1012
|
15
|
50
|
|
|
|
27
|
unless ( defined($new_row) ) { |
1013
|
0
|
|
|
|
|
0
|
croak("Error parsing inputs"); |
1014
|
|
|
|
|
|
|
} |
1015
|
|
|
|
|
|
|
|
1016
|
15
|
100
|
|
|
|
22
|
if ( $index == INSERT_END ) { |
|
|
100
|
|
|
|
|
|
1017
|
10
|
|
|
|
|
11
|
my $current_idx = $self->{'dataidx'}; |
1018
|
10
|
|
|
|
|
17
|
$self->data(); |
1019
|
10
|
|
|
|
|
11
|
$self->{'dataidx'} = $current_idx; |
1020
|
|
|
|
|
|
|
} elsif ( $index > $self->{'max_dataidx'} ) { |
1021
|
2
|
|
|
|
|
2
|
my $current_idx = $self->{'dataidx'}; |
1022
|
2
|
|
|
|
|
6
|
$self->data($index); |
1023
|
2
|
|
|
|
|
3
|
$self->{'dataidx'} = $current_idx; |
1024
|
|
|
|
|
|
|
|
1025
|
2
|
50
|
|
|
|
7
|
if ( $index == $self->{'max_dataidx'} + 1 ) { |
|
|
100
|
|
|
|
|
|
1026
|
0
|
|
|
|
|
0
|
$index = INSERT_END; |
1027
|
|
|
|
|
|
|
} elsif ( $index > $self->{'max_dataidx'} ) { |
1028
|
1
|
|
|
|
|
87
|
croak("Index out of bounds."); |
1029
|
|
|
|
|
|
|
} |
1030
|
|
|
|
|
|
|
} ## end elsif ($index > $self->{'max_dataidx'...}) |
1031
|
|
|
|
|
|
|
|
1032
|
14
|
100
|
100
|
|
|
37
|
if ( $index <= $self->{'dataidx'} && $index != INSERT_END ) { |
1033
|
3
|
|
|
|
|
3
|
$self->{'dataidx'}++; |
1034
|
|
|
|
|
|
|
} |
1035
|
|
|
|
|
|
|
|
1036
|
14
|
|
|
|
|
16
|
$self->{'max_dataidx'}++; |
1037
|
|
|
|
|
|
|
|
1038
|
14
|
100
|
|
|
|
19
|
if ( $index == INSERT_BEGINNING ) { |
|
|
100
|
|
|
|
|
|
1039
|
2
|
|
|
|
|
2
|
unshift( @{ $self->{'data'} }, $new_row ); |
|
2
|
|
|
|
|
6
|
|
1040
|
|
|
|
|
|
|
} elsif ( $index == INSERT_END ) { |
1041
|
10
|
|
|
|
|
9
|
push( @{ $self->{'data'} }, $new_row ); |
|
10
|
|
|
|
|
24
|
|
1042
|
|
|
|
|
|
|
} else { |
1043
|
2
|
|
|
|
|
1
|
splice( @{ $self->{'data'} }, $index, 0, $new_row ); |
|
2
|
|
|
|
|
6
|
|
1044
|
|
|
|
|
|
|
} |
1045
|
|
|
|
|
|
|
} ## end sub insert |
1046
|
|
|
|
|
|
|
|
1047
|
|
|
|
|
|
|
=head2 prepend(\%data_row | \@data_row | $data_row | %data_row) |
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
C is short for C. |
1050
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
=cut |
1052
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
sub prepend { |
1054
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
1055
|
1
|
|
|
|
|
2
|
$self->insert( INSERT_BEGINNING, @_ ); |
1056
|
|
|
|
|
|
|
} |
1057
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
=head2 append(\%data_row | \@data_row | $data_row | %data_row) |
1059
|
|
|
|
|
|
|
|
1060
|
|
|
|
|
|
|
C is short for C. |
1061
|
|
|
|
|
|
|
|
1062
|
|
|
|
|
|
|
=cut |
1063
|
|
|
|
|
|
|
|
1064
|
|
|
|
|
|
|
sub append { |
1065
|
9
|
|
|
9
|
1
|
37
|
my $self = shift; |
1066
|
9
|
|
|
|
|
10
|
$self->insert( INSERT_END, @_ ); |
1067
|
|
|
|
|
|
|
} |
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
=head2 remove([$index]) |
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
If index is specified, it deletes the desired index. If it is omitted, the |
1072
|
|
|
|
|
|
|
last row read is deleted. The current position is modified accordingly. |
1073
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
=cut |
1075
|
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
sub remove { |
1077
|
6
|
|
|
6
|
1
|
15
|
my ( $self, $index ) = @_; |
1078
|
|
|
|
|
|
|
|
1079
|
6
|
100
|
66
|
|
|
28
|
if ( !$self->{'options'}{'cache'} ) { |
|
|
50
|
|
|
|
|
|
1080
|
1
|
|
|
|
|
119
|
croak("Caching must be enabled to write."); |
1081
|
|
|
|
|
|
|
} elsif ( !defined($index) && $self->{'dataidx'} < 0 ) { |
1082
|
0
|
|
|
|
|
0
|
croak("No rows read yet."); |
1083
|
|
|
|
|
|
|
} |
1084
|
|
|
|
|
|
|
|
1085
|
5
|
100
|
|
|
|
16
|
if ( !defined($index) ) { |
1086
|
2
|
|
|
|
|
4
|
$index = $self->{'dataidx'}; |
1087
|
|
|
|
|
|
|
} |
1088
|
|
|
|
|
|
|
|
1089
|
5
|
50
|
|
|
|
14
|
if ( $index < 0 ) { |
|
|
100
|
|
|
|
|
|
1090
|
0
|
|
|
|
|
0
|
croak("Index must be positive integer"); |
1091
|
|
|
|
|
|
|
} elsif ( $index > $self->{'max_dataidx'} ) { |
1092
|
2
|
|
|
|
|
3
|
my $current_idx = $self->{'dataidx'}; |
1093
|
2
|
|
|
|
|
3
|
$self->data($index); |
1094
|
2
|
|
|
|
|
3
|
$self->{'dataidx'} = $current_idx; |
1095
|
|
|
|
|
|
|
|
1096
|
2
|
100
|
|
|
|
4
|
if ( $index > $self->{'max_dataidx'} ) { |
1097
|
1
|
|
|
|
|
155
|
croak("Index out of bounds."); |
1098
|
|
|
|
|
|
|
} |
1099
|
|
|
|
|
|
|
} ## end elsif ($index > $self->{'max_dataidx'...}) |
1100
|
|
|
|
|
|
|
|
1101
|
4
|
100
|
|
|
|
10
|
if ( $index <= $self->{'dataidx'} ) { |
1102
|
3
|
|
|
|
|
3
|
$self->{'dataidx'}--; |
1103
|
|
|
|
|
|
|
} |
1104
|
4
|
|
|
|
|
4
|
$self->{'max_dataidx'}--; |
1105
|
|
|
|
|
|
|
|
1106
|
4
|
|
|
|
|
3
|
splice( @{ $self->{'data'} }, $index, 1 ); |
|
4
|
|
|
|
|
16
|
|
1107
|
|
|
|
|
|
|
} ## end sub remove |
1108
|
|
|
|
|
|
|
|
1109
|
|
|
|
|
|
|
=head2 where(\&function) |
1110
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
# Find all rows with depth greater than 10 meters |
1112
|
|
|
|
|
|
|
my @ret = $sb_file->where(sub { |
1113
|
|
|
|
|
|
|
if ($_->{'depth'} > 10){ |
1114
|
|
|
|
|
|
|
return $_; |
1115
|
|
|
|
|
|
|
} else { |
1116
|
|
|
|
|
|
|
return undef; |
1117
|
|
|
|
|
|
|
} |
1118
|
|
|
|
|
|
|
}); |
1119
|
|
|
|
|
|
|
|
1120
|
|
|
|
|
|
|
# Delete all measurements with depth less than 10 meters |
1121
|
|
|
|
|
|
|
$sb_file->where(sub { |
1122
|
|
|
|
|
|
|
if ($_->{'depth'} < 10){ |
1123
|
|
|
|
|
|
|
$_ = undef; |
1124
|
|
|
|
|
|
|
} |
1125
|
|
|
|
|
|
|
}); |
1126
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
# Calculate the average chlorophyll value |
1128
|
|
|
|
|
|
|
my $chl_total = 0; |
1129
|
|
|
|
|
|
|
my $measurements = 0; |
1130
|
|
|
|
|
|
|
$sb_file->where(sub { |
1131
|
|
|
|
|
|
|
if (defined($_->{'chl'})){ |
1132
|
|
|
|
|
|
|
$chl_total += $_->{'chl'}; |
1133
|
|
|
|
|
|
|
$measurements++; |
1134
|
|
|
|
|
|
|
} |
1135
|
|
|
|
|
|
|
}); |
1136
|
|
|
|
|
|
|
if ($measurements){ |
1137
|
|
|
|
|
|
|
print $chl_total/$measurements; |
1138
|
|
|
|
|
|
|
} else { |
1139
|
|
|
|
|
|
|
print "No chl values."; |
1140
|
|
|
|
|
|
|
} |
1141
|
|
|
|
|
|
|
|
1142
|
|
|
|
|
|
|
|
1143
|
|
|
|
|
|
|
Traverses through each data line, running the given function on each row. |
1144
|
|
|
|
|
|
|
C<$_> is set to the current row. If C<$_> is set to undefined, C is |
1145
|
|
|
|
|
|
|
called. Any changes in C<$_> will be reflected in the data. |
1146
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
Any defined value returned is added to the return array. If nothing is |
1148
|
|
|
|
|
|
|
returned, a 0 is added. |
1149
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
=cut |
1151
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
sub where { |
1153
|
7
|
|
|
7
|
1
|
31
|
my ( $self, $function ) = ( shift, shift ); |
1154
|
7
|
100
|
|
|
|
67
|
if ( ref($function) ne 'CODE' ) { |
1155
|
1
|
|
|
|
|
152
|
croak("Invalid arguments."); |
1156
|
|
|
|
|
|
|
} |
1157
|
6
|
|
|
|
|
8
|
my $currentidx = $self->{'dataidx'}; |
1158
|
6
|
|
|
|
|
12
|
$self->rewind(); |
1159
|
|
|
|
|
|
|
|
1160
|
6
|
|
|
|
|
5
|
my @new_rows; |
1161
|
|
|
|
|
|
|
|
1162
|
6
|
|
|
|
|
12
|
while ( my $row = $self->next() ) { |
1163
|
19
|
|
|
|
|
23
|
local *_ = \$row; |
1164
|
19
|
|
|
|
|
30
|
my $ret = $function->(); |
1165
|
19
|
100
|
100
|
|
|
125
|
if ( defined($ret) && defined(wantarray) ) { |
1166
|
12
|
|
|
|
|
9
|
push( @new_rows, $ret ); |
1167
|
|
|
|
|
|
|
} |
1168
|
19
|
100
|
|
|
|
52
|
if ( !defined($row) ) { |
1169
|
2
|
100
|
|
|
|
5
|
if ( $self->{'dataidx'} <= $currentidx ) { |
1170
|
1
|
|
|
|
|
1
|
$currentidx--; |
1171
|
|
|
|
|
|
|
} |
1172
|
2
|
|
|
|
|
5
|
$self->remove(); |
1173
|
|
|
|
|
|
|
} ## end if (!defined($row)) |
1174
|
|
|
|
|
|
|
} ## end while (my $row = $self->next...) |
1175
|
|
|
|
|
|
|
|
1176
|
5
|
|
|
|
|
10
|
$self->data($currentidx); |
1177
|
|
|
|
|
|
|
|
1178
|
5
|
|
|
|
|
12
|
return @new_rows; |
1179
|
|
|
|
|
|
|
} ## end sub where |
1180
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
=head2 get_all($field_name [, ... ] [, \%options]) |
1182
|
|
|
|
|
|
|
|
1183
|
|
|
|
|
|
|
Returns an array/arrayref of all the values matching each given field name. |
1184
|
|
|
|
|
|
|
This function errors out if no field names are passed in or a non-existent |
1185
|
|
|
|
|
|
|
field is requested. |
1186
|
|
|
|
|
|
|
|
1187
|
|
|
|
|
|
|
Available options are: |
1188
|
|
|
|
|
|
|
|
1189
|
|
|
|
|
|
|
=over 4 |
1190
|
|
|
|
|
|
|
|
1191
|
|
|
|
|
|
|
=item * delete_missing |
1192
|
|
|
|
|
|
|
|
1193
|
|
|
|
|
|
|
If any of the fields are missing, the row will not be added to any of the |
1194
|
|
|
|
|
|
|
return arrays. (Useful for plotting or statistics that don't work well with |
1195
|
|
|
|
|
|
|
bad values.) |
1196
|
|
|
|
|
|
|
|
1197
|
|
|
|
|
|
|
=back |
1198
|
|
|
|
|
|
|
|
1199
|
|
|
|
|
|
|
=cut |
1200
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
sub get_all { |
1202
|
13
|
|
|
13
|
1
|
3630
|
my $self = shift; |
1203
|
13
|
|
|
|
|
25
|
my %options = ( 'delete_missing' => 0 ); |
1204
|
13
|
100
|
|
|
|
35
|
if ( ref( $_[$#_] ) eq 'HASH' ) { |
1205
|
5
|
|
|
|
|
5
|
%options = %{ pop(@_) }; |
|
5
|
|
|
|
|
13
|
|
1206
|
|
|
|
|
|
|
} |
1207
|
13
|
50
|
|
|
|
24
|
if ( !@_ ) { |
1208
|
0
|
|
|
|
|
0
|
croak("get_all must be called with at least one field name"); |
1209
|
|
|
|
|
|
|
} |
1210
|
|
|
|
|
|
|
|
1211
|
13
|
100
|
|
|
|
41
|
my $missing = ( $self->{'options'}{'missing_data_to_undef'} ? undef : $self->{'missing'} ); |
1212
|
|
|
|
|
|
|
|
1213
|
13
|
|
|
|
|
11
|
my $currentidx = $self->{'dataidx'}; |
1214
|
13
|
|
|
|
|
26
|
$self->rewind(); |
1215
|
|
|
|
|
|
|
|
1216
|
13
|
|
|
|
|
16
|
my @fields = map {lc} @_; # turn all inputs lowercase |
|
22
|
|
|
|
|
42
|
|
1217
|
|
|
|
|
|
|
|
1218
|
13
|
|
|
|
|
20
|
foreach my $field (@fields) { |
1219
|
21
|
100
|
|
130
|
|
44
|
if ( ( firstidx { $_ eq $field } @{ $self->{'actual_fields'} } ) < 0 ) { |
|
130
|
|
|
|
|
115
|
|
|
21
|
|
|
|
|
47
|
|
1220
|
4
|
100
|
66
|
4
|
|
19
|
if ( !$self->{'options'}{'fill_ancillary_data'} || ( firstidx { $_ eq $field } keys( %{ $self->{'ancillary'} } ) ) < 0 ) { |
|
4
|
|
|
|
|
12
|
|
|
1
|
|
|
|
|
7
|
|
1221
|
3
|
|
|
|
|
388
|
croak("Field $field does not exist"); |
1222
|
|
|
|
|
|
|
} |
1223
|
|
|
|
|
|
|
} |
1224
|
|
|
|
|
|
|
} ## end foreach my $field (@fields) |
1225
|
|
|
|
|
|
|
|
1226
|
10
|
|
|
|
|
14
|
my @ret = map { [] } @fields; # make return array of arrays |
|
17
|
|
|
|
|
24
|
|
1227
|
|
|
|
|
|
|
|
1228
|
10
|
|
|
|
|
23
|
while ( my $row = $self->next() ) { |
1229
|
40
|
100
|
|
|
|
55
|
if ( $options{'delete_missing'} ) { |
1230
|
20
|
|
|
|
|
11
|
my $has_all = 1; |
1231
|
20
|
|
|
|
|
21
|
foreach my $field (@fields) { |
1232
|
36
|
100
|
100
|
|
|
119
|
unless ( defined( $row->{$field} ) && ( !defined($missing) || $row->{$field} != $missing ) ) { |
|
|
|
66
|
|
|
|
|
1233
|
4
|
|
|
|
|
4
|
$has_all = 0; |
1234
|
4
|
|
|
|
|
6
|
last; |
1235
|
|
|
|
|
|
|
} |
1236
|
|
|
|
|
|
|
} ## end foreach my $field (@fields) |
1237
|
20
|
100
|
|
|
|
29
|
unless ($has_all) { |
1238
|
4
|
|
|
|
|
6
|
next; |
1239
|
|
|
|
|
|
|
} |
1240
|
|
|
|
|
|
|
} ## end if ($options{'delete_missing'...}) |
1241
|
|
|
|
|
|
|
|
1242
|
36
|
|
|
|
|
45
|
for ( my $i = 0; $i <= $#fields; $i++ ) { |
1243
|
60
|
|
|
|
|
38
|
push( @{ $ret[$i] }, $row->{ $fields[$i] } ); |
|
60
|
|
|
|
|
159
|
|
1244
|
|
|
|
|
|
|
} |
1245
|
|
|
|
|
|
|
} ## end while (my $row = $self->next...) |
1246
|
|
|
|
|
|
|
|
1247
|
10
|
|
|
|
|
19
|
$self->data($currentidx); |
1248
|
|
|
|
|
|
|
|
1249
|
10
|
100
|
|
|
|
18
|
if ( $#_ == 0 ) { |
|
|
100
|
|
|
|
|
|
1250
|
3
|
100
|
|
|
|
5
|
if (wantarray) { |
1251
|
1
|
|
|
|
|
2
|
return @{ $ret[0] }; |
|
1
|
|
|
|
|
4
|
|
1252
|
|
|
|
|
|
|
} else { |
1253
|
2
|
|
|
|
|
7
|
return $ret[0]; |
1254
|
|
|
|
|
|
|
} |
1255
|
|
|
|
|
|
|
} elsif (wantarray) { |
1256
|
4
|
|
|
|
|
14
|
return @ret; |
1257
|
|
|
|
|
|
|
} else { |
1258
|
3
|
|
|
|
|
9
|
return \@ret; |
1259
|
|
|
|
|
|
|
} |
1260
|
|
|
|
|
|
|
} ## end sub get_all |
1261
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
=head2 remove_field($field_name [, ... ]) |
1263
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
Removes a field from the file. C is called to remove the field |
1265
|
|
|
|
|
|
|
from cached rows. Any new rows grabbed will have the removed fields omitted, |
1266
|
|
|
|
|
|
|
as well. A warning is issued if the field does not exist. |
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
=cut |
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
sub remove_field { |
1271
|
8
|
|
|
8
|
1
|
23
|
my $self = shift; |
1272
|
8
|
100
|
|
|
|
19
|
if ( !@_ ) { |
1273
|
1
|
|
|
|
|
166
|
croak("Field(s) must be specified."); |
1274
|
|
|
|
|
|
|
} |
1275
|
7
|
|
|
|
|
13
|
foreach my $field_orig (@_) { |
1276
|
9
|
|
|
|
|
9
|
my $field = lc($field_orig); |
1277
|
|
|
|
|
|
|
|
1278
|
9
|
|
|
30
|
|
26
|
my $field_idx = firstidx { $_ eq $field } @{ $self->{'actual_fields'} }; |
|
30
|
|
|
|
|
47
|
|
|
9
|
|
|
|
|
24
|
|
1279
|
|
|
|
|
|
|
|
1280
|
9
|
100
|
|
|
|
24
|
if ( $field_idx < 0 ) { |
1281
|
1
|
|
|
|
|
93
|
carp("Field $field does not exist."); |
1282
|
|
|
|
|
|
|
} else { |
1283
|
8
|
|
|
|
|
9
|
splice( @{ $self->{'actual_fields'} }, $field_idx, 1 ); |
|
8
|
|
|
|
|
13
|
|
1284
|
8
|
|
|
|
|
6
|
splice( @{ $self->{'actual_units'} }, $field_idx, 1 ); |
|
8
|
|
|
|
|
15
|
|
1285
|
|
|
|
|
|
|
} |
1286
|
|
|
|
|
|
|
} ## end foreach my $field_orig (@_) |
1287
|
7
|
|
|
|
|
123
|
$self->update_fields(); |
1288
|
|
|
|
|
|
|
} ## end sub remove_field |
1289
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
=head2 add_field($field_name [, $unit [, $position]]) |
1291
|
|
|
|
|
|
|
|
1292
|
|
|
|
|
|
|
Adds a field to the file. C is called to populate all cached |
1293
|
|
|
|
|
|
|
rows. Any rows retrieved will have the new field set to undefined or /missing, |
1294
|
|
|
|
|
|
|
depending on if the option C is set. |
1295
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
If the unit is not specified, it is set to unitless. |
1297
|
|
|
|
|
|
|
|
1298
|
|
|
|
|
|
|
If the position is not specified, the field is added to the end. |
1299
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
=cut |
1301
|
|
|
|
|
|
|
|
1302
|
|
|
|
|
|
|
sub add_field { |
1303
|
25
|
|
|
25
|
1
|
86
|
my ( $self, $field, $unit, $position ) = @_; |
1304
|
25
|
50
|
|
|
|
65
|
if ( !$self->{'options'}{'cache'} ) { |
|
|
100
|
|
|
|
|
|
1305
|
0
|
|
|
|
|
0
|
croak("Caching must be enabled to write."); |
1306
|
|
|
|
|
|
|
} elsif ( !$field ) { |
1307
|
1
|
|
|
|
|
79
|
croak("Field must be specified."); |
1308
|
|
|
|
|
|
|
} |
1309
|
24
|
|
|
|
|
26
|
$field = lc($field); |
1310
|
|
|
|
|
|
|
|
1311
|
24
|
|
|
121
|
|
58
|
my $field_idx = firstidx { $_ eq $field } @{ $self->{'actual_fields'} }; |
|
121
|
|
|
|
|
79
|
|
|
24
|
|
|
|
|
55
|
|
1312
|
24
|
100
|
|
|
|
65
|
if ( $field_idx >= 0 ) { |
1313
|
3
|
|
|
|
|
433
|
croak("Field already exists."); |
1314
|
|
|
|
|
|
|
} |
1315
|
21
|
100
|
|
|
|
33
|
if ( !defined($position) ) { |
1316
|
15
|
|
|
|
|
25
|
$position = INSERT_END; |
1317
|
|
|
|
|
|
|
} |
1318
|
21
|
|
100
|
|
|
39
|
$unit ||= 'unitless'; |
1319
|
21
|
|
|
|
|
26
|
$unit = lc($unit); |
1320
|
|
|
|
|
|
|
|
1321
|
21
|
100
|
|
|
|
24
|
if ( $position == INSERT_END ) { |
|
|
100
|
|
|
|
|
|
1322
|
17
|
|
|
|
|
16
|
push( @{ $self->{'actual_fields'} }, $field ); |
|
17
|
|
|
|
|
27
|
|
1323
|
17
|
|
|
|
|
13
|
push( @{ $self->{'actual_units'} }, $unit ); |
|
17
|
|
|
|
|
23
|
|
1324
|
|
|
|
|
|
|
} elsif ( $position == INSERT_BEGINNING ) { |
1325
|
2
|
|
|
|
|
1
|
unshift( @{ $self->{'actual_fields'} }, $field ); |
|
2
|
|
|
|
|
4
|
|
1326
|
2
|
|
|
|
|
3
|
unshift( @{ $self->{'actual_units'} }, $unit ); |
|
2
|
|
|
|
|
3
|
|
1327
|
|
|
|
|
|
|
} else { |
1328
|
2
|
|
|
|
|
2
|
splice( @{ $self->{'actual_fields'} }, $position, 0, $field ); |
|
2
|
|
|
|
|
5
|
|
1329
|
2
|
|
|
|
|
2
|
splice( @{ $self->{'actual_units'} }, $position, 0, $unit ); |
|
2
|
|
|
|
|
3
|
|
1330
|
|
|
|
|
|
|
} |
1331
|
21
|
|
|
|
|
35
|
$self->update_fields(); |
1332
|
|
|
|
|
|
|
} ## end sub add_field |
1333
|
|
|
|
|
|
|
|
1334
|
|
|
|
|
|
|
=head2 find_fields($string | qr/match/ [, ... ]) |
1335
|
|
|
|
|
|
|
|
1336
|
|
|
|
|
|
|
Finds fields matching the string or regex given. If given a string, it must |
1337
|
|
|
|
|
|
|
match a field exactly and entirely to be found. To find a substring, use |
1338
|
|
|
|
|
|
|
C. Fields are returned in the order that they will be output. This |
1339
|
|
|
|
|
|
|
function takes into account fields that are added or removed. All fields are |
1340
|
|
|
|
|
|
|
always lowercase, so all matches are case insensitive. |
1341
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
Given one argument, returns an array of the fields found. An empty array is |
1343
|
|
|
|
|
|
|
returned if no fields match. |
1344
|
|
|
|
|
|
|
|
1345
|
|
|
|
|
|
|
Given multiple arguments, returns an array/arrayref of arrays of fields found. |
1346
|
|
|
|
|
|
|
IE: C would return something like |
1347
|
|
|
|
|
|
|
C<[['lw510','lw550'],['es510','es550']]>. If no field is matched, the inner |
1348
|
|
|
|
|
|
|
array will be empty. IE: C<[[],[]]>. |
1349
|
|
|
|
|
|
|
|
1350
|
|
|
|
|
|
|
=cut |
1351
|
|
|
|
|
|
|
|
1352
|
|
|
|
|
|
|
sub find_fields { |
1353
|
10
|
|
|
10
|
1
|
3411
|
my $self = shift; |
1354
|
10
|
50
|
|
|
|
25
|
if ( $#_ < 0 ) { |
1355
|
0
|
|
|
|
|
0
|
croak("Input must be a string or regex object."); |
1356
|
|
|
|
|
|
|
} |
1357
|
|
|
|
|
|
|
|
1358
|
10
|
|
|
|
|
10
|
my @ret; |
1359
|
|
|
|
|
|
|
|
1360
|
10
|
|
|
|
|
16
|
foreach my $find (@_) { |
1361
|
13
|
|
|
|
|
7
|
my ( $regex, @matching ); |
1362
|
13
|
50
|
|
|
|
21
|
if ( defined($find) ) { |
1363
|
13
|
100
|
|
|
|
28
|
if ( !ref($find) ) { |
|
|
50
|
|
|
|
|
|
1364
|
7
|
|
|
|
|
70
|
$regex = lc(qr/^$find$/i); |
1365
|
|
|
|
|
|
|
} elsif ( ref($find) eq 'Regexp' ) { |
1366
|
6
|
|
|
|
|
20
|
$regex = lc(qr/$find/i); |
1367
|
|
|
|
|
|
|
} else { |
1368
|
0
|
|
|
|
|
0
|
croak("Input must be a string or regex object."); |
1369
|
|
|
|
|
|
|
} |
1370
|
|
|
|
|
|
|
|
1371
|
13
|
|
|
|
|
15
|
foreach my $field ( @{ $self->{'actual_fields'} } ) { |
|
13
|
|
|
|
|
21
|
|
1372
|
92
|
100
|
|
|
|
215
|
if ( $field =~ $regex ) { |
1373
|
23
|
|
|
|
|
28
|
push( @matching, $field ); |
1374
|
|
|
|
|
|
|
} |
1375
|
|
|
|
|
|
|
} |
1376
|
|
|
|
|
|
|
} |
1377
|
13
|
|
|
|
|
22
|
push( @ret, \@matching ); |
1378
|
|
|
|
|
|
|
} ## end foreach my $find (@_) |
1379
|
|
|
|
|
|
|
|
1380
|
10
|
100
|
|
|
|
17
|
if ( $#_ == 0 ) { |
1381
|
7
|
|
|
|
|
7
|
return @{ $ret[0] }; |
|
7
|
|
|
|
|
22
|
|
1382
|
|
|
|
|
|
|
} else { |
1383
|
3
|
100
|
|
|
|
6
|
if (wantarray) { |
1384
|
2
|
|
|
|
|
6
|
return @ret; |
1385
|
|
|
|
|
|
|
} else { |
1386
|
1
|
|
|
|
|
3
|
return \@ret; |
1387
|
|
|
|
|
|
|
} |
1388
|
|
|
|
|
|
|
} ## end else [ if ($#_ == 0) ] |
1389
|
|
|
|
|
|
|
} ## end sub find_fields |
1390
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
=head2 add_comment(@comments) |
1392
|
|
|
|
|
|
|
|
1393
|
|
|
|
|
|
|
Adds comments to the output file, which are printed, in bulk, after C. |
1394
|
|
|
|
|
|
|
Comments are trimmed before entry and !s are added, if required. |
1395
|
|
|
|
|
|
|
|
1396
|
|
|
|
|
|
|
=cut |
1397
|
|
|
|
|
|
|
|
1398
|
|
|
|
|
|
|
sub add_comment { |
1399
|
2
|
|
|
2
|
1
|
3
|
my $self = shift; |
1400
|
2
|
|
|
|
|
5
|
push(@{$self->{'comments'}}, map { |
1401
|
2
|
|
|
|
|
23
|
my $c = $_; |
|
3
|
|
|
|
|
4
|
|
1402
|
3
|
|
|
|
|
11
|
$c =~ s/^\s+|\s+$//g; |
1403
|
3
|
100
|
|
|
|
7
|
if ($c =~ /^!/){ |
1404
|
1
|
|
|
|
|
3
|
$c |
1405
|
|
|
|
|
|
|
} else { |
1406
|
2
|
|
|
|
|
5
|
"! $c" |
1407
|
|
|
|
|
|
|
} |
1408
|
|
|
|
|
|
|
} @_); |
1409
|
|
|
|
|
|
|
} |
1410
|
|
|
|
|
|
|
|
1411
|
|
|
|
|
|
|
=head2 get_comments([@indices]) |
1412
|
|
|
|
|
|
|
|
1413
|
|
|
|
|
|
|
Returns a list of the comments at the given indices. If no indices are passed |
1414
|
|
|
|
|
|
|
in, return them all. |
1415
|
|
|
|
|
|
|
|
1416
|
|
|
|
|
|
|
=cut |
1417
|
|
|
|
|
|
|
|
1418
|
|
|
|
|
|
|
sub get_comments { |
1419
|
6
|
|
|
6
|
1
|
10
|
my $self = shift; |
1420
|
6
|
|
|
|
|
6
|
my @ret; |
1421
|
6
|
100
|
|
|
|
10
|
if (@_){ |
1422
|
2
|
|
|
|
|
4
|
@ret = map {$self->{'comments'}[$_]} @_; |
|
3
|
|
|
|
|
6
|
|
1423
|
|
|
|
|
|
|
} else { |
1424
|
4
|
|
|
|
|
4
|
@ret = @{$self->{'comments'}}; |
|
4
|
|
|
|
|
9
|
|
1425
|
|
|
|
|
|
|
} |
1426
|
6
|
50
|
|
|
|
9
|
if (wantarray){ |
1427
|
0
|
|
|
|
|
0
|
return @ret; |
1428
|
|
|
|
|
|
|
} else { |
1429
|
6
|
|
|
|
|
26
|
return \@ret; |
1430
|
|
|
|
|
|
|
} |
1431
|
|
|
|
|
|
|
} |
1432
|
|
|
|
|
|
|
|
1433
|
|
|
|
|
|
|
=head2 set_comments(@comments) |
1434
|
|
|
|
|
|
|
|
1435
|
|
|
|
|
|
|
Overwrites all of the comments in the file. For now, this is the proper way |
1436
|
|
|
|
|
|
|
to remove comments. Comments are trimmed before entry and !s are added, if |
1437
|
|
|
|
|
|
|
required. |
1438
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
=cut |
1440
|
|
|
|
|
|
|
|
1441
|
|
|
|
|
|
|
sub set_comments { |
1442
|
1
|
|
|
1
|
1
|
1
|
my $self = shift; |
1443
|
|
|
|
|
|
|
$self->{'comments'} = [map { |
1444
|
1
|
|
|
|
|
3
|
my $c = $_; |
|
2
|
|
|
|
|
2
|
|
1445
|
2
|
|
|
|
|
5
|
$c =~ s/^\s+|\s+$//g; |
1446
|
2
|
100
|
|
|
|
6
|
if ($c =~ /^!/){ |
1447
|
1
|
|
|
|
|
3
|
$c |
1448
|
|
|
|
|
|
|
} else { |
1449
|
1
|
|
|
|
|
3
|
"! $c" |
1450
|
|
|
|
|
|
|
} |
1451
|
|
|
|
|
|
|
} @_]; |
1452
|
|
|
|
|
|
|
} |
1453
|
|
|
|
|
|
|
|
1454
|
|
|
|
|
|
|
=head2 write([$filename | $file_handle | \*GLOB]) |
1455
|
|
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
Outputs the current header and data to the given handle or glob. If no |
1457
|
|
|
|
|
|
|
arguments are given, and a non-existent filename was given to C, the |
1458
|
|
|
|
|
|
|
contents are output into that. If an output file was not given, C |
1459
|
|
|
|
|
|
|
outputs to STDOUT. |
1460
|
|
|
|
|
|
|
|
1461
|
|
|
|
|
|
|
If C is enabled, the headers are checked for invalid headers and |
1462
|
|
|
|
|
|
|
missing required headers and errors/warnings can be thrown accordingly. |
1463
|
|
|
|
|
|
|
|
1464
|
|
|
|
|
|
|
The headers are output in a somewhat-arbitrary but consistent order. If |
1465
|
|
|
|
|
|
|
C is enabled, placeholders are added for every header that |
1466
|
|
|
|
|
|
|
does not exist. A comment section is also added if one is not present. |
1467
|
|
|
|
|
|
|
|
1468
|
|
|
|
|
|
|
=cut |
1469
|
|
|
|
|
|
|
|
1470
|
|
|
|
|
|
|
sub write { |
1471
|
15
|
|
|
15
|
1
|
9032
|
my ( $self, $write_to_h ) = @_; |
1472
|
|
|
|
|
|
|
|
1473
|
15
|
|
|
|
|
32
|
my $strict_write = $self->{'options'}{'strict'} & STRICT_WRITE; |
1474
|
15
|
100
|
|
|
|
34
|
my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' ); |
1475
|
15
|
|
|
|
|
14
|
my $error = 0; |
1476
|
|
|
|
|
|
|
|
1477
|
15
|
100
|
|
|
|
40
|
if ($strict_write) { |
1478
|
3
|
|
|
|
|
3
|
foreach my $header ( keys( %{ $self->{'headers'} } ) ) { |
|
3
|
|
|
|
|
20
|
|
1479
|
65
|
|
|
|
|
48
|
( my $header_no_slash = $header ) =~ s"^/""; |
1480
|
65
|
50
|
33
|
1104
|
|
140
|
if ( ( firstidx { $_ eq $header_no_slash } @ALL_HEADERS ) < 0 && ( firstidx { $_ eq $header_no_slash } @HIDDEN_HEADERS ) < 0 ) { |
|
1104
|
|
|
|
|
726
|
|
|
0
|
|
|
|
|
0
|
|
1481
|
0
|
|
|
|
|
0
|
carp("Invalid header: $header"); |
1482
|
0
|
|
|
|
|
0
|
$error = 1; |
1483
|
|
|
|
|
|
|
} |
1484
|
|
|
|
|
|
|
} ## end foreach my $header (keys(%{...})) |
1485
|
|
|
|
|
|
|
|
1486
|
3
|
|
|
|
|
8
|
foreach my $header (@REQUIRED_HEADERS) { |
1487
|
66
|
100
|
|
|
|
126
|
if ( !exists( $self->{'headers'}{$header} ) ) { |
1488
|
20
|
|
|
|
|
1285
|
carp("Missing required header: $header"); |
1489
|
20
|
|
|
|
|
696
|
$error = 1; |
1490
|
|
|
|
|
|
|
} |
1491
|
|
|
|
|
|
|
} ## end foreach my $header (@REQUIRED_HEADERS) |
1492
|
|
|
|
|
|
|
} ## end if ($strict_write) |
1493
|
|
|
|
|
|
|
|
1494
|
15
|
100
|
|
|
|
26
|
if ( !$error ) { |
1495
|
14
|
|
|
|
|
33
|
my $close_write_to = 0; |
1496
|
14
|
|
|
|
|
27
|
my $old_fh = select(); |
1497
|
|
|
|
|
|
|
|
1498
|
14
|
50
|
33
|
|
|
53
|
if ( !$write_to_h && exists( $self->{'default_write_to'} ) ) { |
1499
|
0
|
|
0
|
|
|
0
|
$write_to_h ||= $self->{'default_write_to'}; |
1500
|
|
|
|
|
|
|
} |
1501
|
|
|
|
|
|
|
|
1502
|
14
|
50
|
|
|
|
26
|
if ( defined($write_to_h) ) { |
1503
|
0
|
0
|
|
|
|
0
|
if ( ref($write_to_h) eq 'GLOB' ) { |
|
|
0
|
|
|
|
|
|
1504
|
0
|
|
|
|
|
0
|
select($write_to_h); |
1505
|
|
|
|
|
|
|
} elsif ( !ref($write_to_h) ) { |
1506
|
0
|
|
|
|
|
0
|
my $write_to = $write_to_h; |
1507
|
0
|
|
|
|
|
0
|
$write_to_h = undef; |
1508
|
0
|
0
|
|
|
|
0
|
open( $write_to_h, ">", $write_to ) || croak("Invalid argument for write()."); |
1509
|
0
|
|
|
|
|
0
|
$close_write_to = 1; |
1510
|
0
|
|
|
|
|
0
|
select($write_to_h); |
1511
|
|
|
|
|
|
|
} else { |
1512
|
0
|
|
|
|
|
0
|
croak("Invalid argument for write()."); |
1513
|
|
|
|
|
|
|
} |
1514
|
|
|
|
|
|
|
} ## end if (defined($write_to_h...)) |
1515
|
|
|
|
|
|
|
|
1516
|
14
|
|
50
|
|
|
63
|
$self->{'headers'}{"${slash}delimiter"} ||= 'comma'; |
1517
|
14
|
|
|
|
|
27
|
my $actual_delim = lc( $self->{'headers'}{"${slash}delimiter"} ); |
1518
|
14
|
100
|
|
|
|
47
|
if ( $actual_delim eq 'comma' ) { |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1519
|
2
|
|
|
|
|
3
|
$actual_delim = ','; |
1520
|
|
|
|
|
|
|
} elsif ( $actual_delim eq 'space' ) { |
1521
|
12
|
|
|
|
|
16
|
$actual_delim = ' '; |
1522
|
|
|
|
|
|
|
} elsif ( $actual_delim eq 'tab' ) { |
1523
|
0
|
|
|
|
|
0
|
$actual_delim = "\t"; |
1524
|
|
|
|
|
|
|
} elsif ( $actual_delim eq 'semicolon' ) { |
1525
|
0
|
|
|
|
|
0
|
$actual_delim = ';'; |
1526
|
|
|
|
|
|
|
} else { |
1527
|
0
|
|
|
|
|
0
|
$actual_delim = ','; |
1528
|
0
|
|
|
|
|
0
|
$self->{'headers'}{"${slash}delimiter"} = 'comma'; |
1529
|
|
|
|
|
|
|
} |
1530
|
|
|
|
|
|
|
|
1531
|
14
|
50
|
|
|
|
31
|
my $missing = ( exists( $self->{'missing'} ) ? $self->{'missing'} : $DEFAULT_MISSING ); |
1532
|
14
|
100
|
|
|
|
41
|
my $bdl = ( exists( $self->{'below_detection_limit'} ) ? $self->{'below_detection_limit'} : $DEFAULT_MISSING ); |
1533
|
14
|
100
|
|
|
|
29
|
my $adl = ( exists( $self->{'above_detection_limit'} ) ? $self->{'above_detection_limit'} : $DEFAULT_MISSING ); |
1534
|
|
|
|
|
|
|
|
1535
|
14
|
100
|
|
|
|
23
|
if ( $self->{'options'}{'preserve_header'} ) { |
1536
|
1
|
|
|
|
|
2
|
print join("\n", @{ $self->{'preserved_header'} }, ''); |
|
1
|
|
|
|
|
51
|
|
1537
|
|
|
|
|
|
|
} else { |
1538
|
13
|
100
|
|
|
|
34
|
if ( !exists( $self->{'headers'}{"${slash}begin_header"} ) ) { |
1539
|
3
|
|
|
|
|
88
|
print "/begin_header\n"; |
1540
|
|
|
|
|
|
|
} |
1541
|
|
|
|
|
|
|
|
1542
|
13
|
|
|
|
|
17
|
my $add_missing_headers = $self->{'options'}{'add_empty_headers'}; |
1543
|
13
|
100
|
66
|
|
|
31
|
if ( $add_missing_headers && $add_missing_headers eq '1' ) { |
1544
|
1
|
|
|
|
|
1
|
$add_missing_headers = 'NA'; |
1545
|
|
|
|
|
|
|
} |
1546
|
13
|
100
|
|
|
|
26
|
if ( !$self->{'options'}{'preserve_case'} ) { |
1547
|
1
|
|
50
|
|
|
2
|
$add_missing_headers = lc( $add_missing_headers || '' ); |
1548
|
|
|
|
|
|
|
} |
1549
|
|
|
|
|
|
|
|
1550
|
13
|
|
|
|
|
15
|
my @headers_to_print; |
1551
|
|
|
|
|
|
|
|
1552
|
13
|
|
|
|
|
63
|
@headers_to_print = @ALL_HEADERS; |
1553
|
13
|
100
|
66
|
|
|
41
|
if ($missing eq $adl || $self->{'options'}{'missing_data_to_undef'}){ |
1554
|
12
|
|
|
|
|
118
|
@headers_to_print = grep(!/above_detection_limit/i, @headers_to_print); |
1555
|
|
|
|
|
|
|
} |
1556
|
13
|
100
|
66
|
|
|
45
|
if ($missing eq $bdl || $self->{'options'}{'missing_data_to_undef'}){ |
1557
|
12
|
|
|
|
|
87
|
@headers_to_print = grep(!/below_detection_limit/i, @headers_to_print); |
1558
|
|
|
|
|
|
|
} |
1559
|
|
|
|
|
|
|
|
1560
|
13
|
100
|
|
|
|
19
|
unless (grep($_ ne 'unitless', @{ $self->{'actual_units'} })){ |
|
13
|
|
|
|
|
43
|
|
1561
|
6
|
|
|
|
|
62
|
@headers_to_print = grep(!/units/i, @headers_to_print); |
1562
|
|
|
|
|
|
|
} |
1563
|
|
|
|
|
|
|
|
1564
|
13
|
|
|
|
|
23
|
foreach my $header (@headers_to_print) { |
1565
|
399
|
100
|
|
|
|
991
|
if ( $header eq 'missing' ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1566
|
13
|
|
|
|
|
15
|
while ( my ( $h, $k ) = each( %{ $self->{'headers'} } ) ) { |
|
165
|
|
|
|
|
319
|
|
1567
|
152
|
|
|
|
|
144
|
( my $header_no_slash = $h ) =~ s"^/""; |
1568
|
152
|
50
|
|
3097
|
|
344
|
if ( ( firstidx { $_ eq $header_no_slash } @ALL_HEADERS ) < 0 ) { |
|
3097
|
|
|
|
|
1880
|
|
1569
|
0
|
|
|
|
|
0
|
print "/$h=$k\n"; |
1570
|
|
|
|
|
|
|
} |
1571
|
|
|
|
|
|
|
} ## end while (my ($h, $k) = each...) |
1572
|
|
|
|
|
|
|
|
1573
|
13
|
|
|
|
|
13
|
foreach my $comment ( @{ $self->{'comments'} } ) { |
|
13
|
|
|
|
|
26
|
|
1574
|
18
|
|
|
|
|
103
|
print "$comment\n"; |
1575
|
|
|
|
|
|
|
} |
1576
|
13
|
100
|
100
|
|
|
16
|
if ( !@{ $self->{'comments'} } && $add_missing_headers ) { |
|
13
|
|
|
|
|
61
|
|
1577
|
1
|
|
|
|
|
13
|
print "! Comments: \n!\n"; |
1578
|
|
|
|
|
|
|
} |
1579
|
13
|
100
|
|
|
|
31
|
if ( !exists( $self->{'headers'}{"$slash$header"} ) ) { |
1580
|
3
|
|
|
|
|
40
|
print "/missing=$missing\n"; |
1581
|
|
|
|
|
|
|
} else { |
1582
|
10
|
|
|
|
|
100
|
print '/', $header, '=', $self->{'headers'}{"$slash$header"}, "\n"; |
1583
|
|
|
|
|
|
|
} |
1584
|
|
|
|
|
|
|
} elsif ( $header eq 'fields' ) { |
1585
|
13
|
|
|
|
|
22
|
print "/$header=", join( ',', @{ $self->{'actual_fields'} } ), "\n"; |
|
13
|
|
|
|
|
104
|
|
1586
|
|
|
|
|
|
|
} elsif ( $header eq 'units' ) { |
1587
|
7
|
|
|
|
|
10
|
print "/$header=", join( ',', @{ $self->{'actual_units'} } ), "\n"; |
|
7
|
|
|
|
|
67
|
|
1588
|
|
|
|
|
|
|
} elsif ( exists( $self->{'headers'}{"$slash$header"} ) ) { |
1589
|
110
|
100
|
|
|
|
269
|
if ( $header =~ /_header/ ) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1590
|
20
|
|
|
|
|
432
|
print "/$header\n"; |
1591
|
|
|
|
|
|
|
} elsif (length($self->{'headers'}{"$slash$header"})) { |
1592
|
65
|
|
|
|
|
65
|
my $v = $self->{'headers'}{"$slash$header"}; |
1593
|
65
|
100
|
|
|
|
124
|
if ( $header =~ /_latitude|_longitude/ ) { |
|
|
100
|
|
|
|
|
|
1594
|
8
|
|
|
|
|
51
|
print "/$header=$v\[deg]\n"; |
1595
|
|
|
|
|
|
|
} elsif ( $header =~ /_time/ ) { |
1596
|
4
|
|
|
|
|
25
|
print "/$header=$v\[gmt]\n"; |
1597
|
|
|
|
|
|
|
} else { |
1598
|
53
|
|
|
|
|
366
|
print "/$header=$v\n"; |
1599
|
|
|
|
|
|
|
} |
1600
|
|
|
|
|
|
|
# print '/', $header, '=', $self->{'headers'}{"$slash$header"}, "\n"; |
1601
|
|
|
|
|
|
|
} elsif ($add_missing_headers) { |
1602
|
25
|
100
|
|
|
|
41
|
if ( $header =~ /_latitude|_longitude/ ) { |
|
|
100
|
|
|
|
|
|
1603
|
4
|
|
|
|
|
26
|
print "/$header=$add_missing_headers\[deg]\n"; |
1604
|
|
|
|
|
|
|
} elsif ( $header =~ /_time/ ) { |
1605
|
2
|
|
|
|
|
13
|
print "/$header=$add_missing_headers\[gmt]\n"; |
1606
|
|
|
|
|
|
|
} else { |
1607
|
19
|
|
|
|
|
111
|
print "/$header=$add_missing_headers\n"; |
1608
|
|
|
|
|
|
|
} |
1609
|
|
|
|
|
|
|
} |
1610
|
|
|
|
|
|
|
} elsif ($add_missing_headers) { |
1611
|
0
|
0
|
|
|
|
0
|
if ( $header =~ /_latitude|_longitude/ ) { |
|
|
0
|
|
|
|
|
|
1612
|
0
|
|
|
|
|
0
|
print "/$header=$add_missing_headers\[deg]\n"; |
1613
|
|
|
|
|
|
|
} elsif ( $header =~ /_time/ ) { |
1614
|
0
|
|
|
|
|
0
|
print "/$header=$add_missing_headers\[gmt]\n"; |
1615
|
|
|
|
|
|
|
} else { |
1616
|
0
|
|
|
|
|
0
|
print "/$header=$add_missing_headers\n"; |
1617
|
|
|
|
|
|
|
} |
1618
|
|
|
|
|
|
|
} ## end elsif ($add_missing_headers) |
1619
|
|
|
|
|
|
|
} ## end foreach my $header (@ALL_HEADERS) |
1620
|
|
|
|
|
|
|
|
1621
|
13
|
100
|
|
|
|
47
|
if ( !exists( $self->{'headers'}{"${slash}end_header"} ) ) { |
1622
|
3
|
|
|
|
|
22
|
print "/end_header\n"; |
1623
|
|
|
|
|
|
|
} |
1624
|
|
|
|
|
|
|
} ## end else [ if ($self->{'options'}...)] |
1625
|
|
|
|
|
|
|
|
1626
|
14
|
|
|
|
|
37
|
$self->rewind(); |
1627
|
|
|
|
|
|
|
|
1628
|
14
|
|
|
|
|
28
|
while ( my $row = $self->next() ) { |
1629
|
24
|
|
|
|
|
14
|
my @values; |
1630
|
24
|
|
|
|
|
19
|
foreach my $field ( @{ $self->{'actual_fields'} } ) { |
|
24
|
|
|
|
|
33
|
|
1631
|
118
|
100
|
|
|
|
181
|
push( @values, ( defined( $row->{$field} ) ? $row->{$field} : $missing ) ); |
1632
|
|
|
|
|
|
|
} |
1633
|
24
|
|
|
|
|
291
|
print join( $actual_delim, @values ), "\n"; |
1634
|
|
|
|
|
|
|
} ## end while (my $row = $self->next...) |
1635
|
|
|
|
|
|
|
|
1636
|
14
|
|
|
|
|
41
|
select($old_fh); |
1637
|
14
|
50
|
|
|
|
37
|
if ($close_write_to) { |
1638
|
0
|
|
|
|
|
0
|
close($write_to_h); |
1639
|
|
|
|
|
|
|
} |
1640
|
|
|
|
|
|
|
} else { |
1641
|
1
|
|
|
|
|
58
|
croak("Error(s) writing file"); |
1642
|
|
|
|
|
|
|
} |
1643
|
14
|
|
|
|
|
45
|
return; |
1644
|
|
|
|
|
|
|
} ## end sub write |
1645
|
|
|
|
|
|
|
|
1646
|
|
|
|
|
|
|
=head2 close() |
1647
|
|
|
|
|
|
|
|
1648
|
|
|
|
|
|
|
If a file handle is opened for reading, this function closes it. This is |
1649
|
|
|
|
|
|
|
automatically called when the object is destroyed. This is useful to replace |
1650
|
|
|
|
|
|
|
the file being read with the current changes. |
1651
|
|
|
|
|
|
|
|
1652
|
|
|
|
|
|
|
=cut |
1653
|
|
|
|
|
|
|
|
1654
|
|
|
|
|
|
|
sub close { |
1655
|
137
|
|
|
137
|
1
|
153
|
my ($self) = @_; |
1656
|
137
|
100
|
|
|
|
263
|
if ( $self->{'handle'} ) { |
1657
|
127
|
|
|
|
|
342
|
my $ret = close( $self->{'handle'} ); |
1658
|
127
|
|
|
|
|
286
|
delete( $self->{'handle'} ); |
1659
|
127
|
|
|
|
|
1610
|
return $ret; |
1660
|
|
|
|
|
|
|
} else { |
1661
|
10
|
|
|
|
|
58
|
return; |
1662
|
|
|
|
|
|
|
} |
1663
|
|
|
|
|
|
|
} ## end sub close |
1664
|
|
|
|
|
|
|
|
1665
|
|
|
|
|
|
|
=head2 make_data_hash($line [,\@field_list]) |
1666
|
|
|
|
|
|
|
|
1667
|
|
|
|
|
|
|
my %row = $sb_file->make_data_hash("1.5,2,2.5"); |
1668
|
|
|
|
|
|
|
my %row = $sb_file->make_data_hash("1.5,2,2.5", [qw(lat lon sal)]); |
1669
|
|
|
|
|
|
|
my %row = $sb_file->make_data_hash("1.5,2,2.5", [$sb_file->fields()]); |
1670
|
|
|
|
|
|
|
my %row = $sb_file->make_data_hash("1.5,2,2.5", [$sb_file->actual_fields()]); |
1671
|
|
|
|
|
|
|
|
1672
|
|
|
|
|
|
|
For mostly internal use. This function parses a data line. It first splits |
1673
|
|
|
|
|
|
|
the data via the delimiter, assigns a field to each value, and returns a hash |
1674
|
|
|
|
|
|
|
or hash reference. |
1675
|
|
|
|
|
|
|
|
1676
|
|
|
|
|
|
|
If C<@field_list> is not set, C<< $sb_file->fields() >> is used. |
1677
|
|
|
|
|
|
|
|
1678
|
|
|
|
|
|
|
If a delimiter is not set (a blank file was created, a file without a |
1679
|
|
|
|
|
|
|
/delimiter header is read, etc), the delimiter is guessed and set using |
1680
|
|
|
|
|
|
|
L. |
1681
|
|
|
|
|
|
|
|
1682
|
|
|
|
|
|
|
Cs if the delimiter could not be guessed or the number of fields the |
1683
|
|
|
|
|
|
|
line is split into does not match up with the field list. |
1684
|
|
|
|
|
|
|
|
1685
|
|
|
|
|
|
|
=cut |
1686
|
|
|
|
|
|
|
|
1687
|
|
|
|
|
|
|
sub make_data_hash { |
1688
|
263
|
|
|
263
|
1
|
278
|
my ( $self, $line, $field_list ) = @_; |
1689
|
263
|
50
|
66
|
|
|
509
|
if ( !$self->{'delim'} && !$self->guess_delim($line) ) { |
1690
|
0
|
|
|
|
|
0
|
croak("Need a delimiter"); |
1691
|
|
|
|
|
|
|
} |
1692
|
263
|
|
|
|
|
1532
|
my @values = split( $self->{'delim'}, $line ); |
1693
|
263
|
|
33
|
|
|
781
|
$field_list ||= $self->{'fields'}; |
1694
|
|
|
|
|
|
|
|
1695
|
263
|
|
|
|
|
202
|
my ( $num_expected, $num_got ) = ( scalar( @{ $self->{'fields'} } ), scalar(@values) ); |
|
263
|
|
|
|
|
382
|
|
1696
|
263
|
50
|
|
|
|
406
|
if ( $num_expected != $num_got ) { |
1697
|
0
|
|
|
|
|
0
|
croak("Incorrect number of fields or elements: got $num_got, expected $num_expected"); |
1698
|
|
|
|
|
|
|
} |
1699
|
|
|
|
|
|
|
|
1700
|
263
|
|
|
|
|
212
|
my %ret; |
1701
|
|
|
|
|
|
|
|
1702
|
263
|
|
|
|
|
1202
|
my $iterator = each_arrayref( $field_list, \@values ); |
1703
|
263
|
|
|
|
|
845
|
while ( my ( $k, $v ) = $iterator->() ) { |
1704
|
1848
|
100
|
|
|
|
1914
|
if ( $self->{'options'}{'missing_data_to_undef'} ) { |
1705
|
937
|
100
|
100
|
|
|
8689
|
if ( $self->{'missing_is_number'} && looks_like_number($v) && $v == $self->{'missing'} ) { |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
1706
|
67
|
|
|
|
|
168
|
$ret{$k} = undef; |
1707
|
|
|
|
|
|
|
} elsif ( $self->{'adl_is_number'} && looks_like_number($v) && $v == $self->{'above_detection_limit'} ){ |
1708
|
1
|
|
|
|
|
3
|
$ret{$k} = undef; |
1709
|
|
|
|
|
|
|
} elsif ( $self->{'bdl_is_number'} && looks_like_number($v) && $v == $self->{'below_detection_limit'} ){ |
1710
|
1
|
|
|
|
|
4
|
$ret{$k} = undef; |
1711
|
|
|
|
|
|
|
} elsif ($v eq $self->{'missing'} || $v eq $self->{'below_detection_limit'} || $v eq $self->{'above_detection_limit'}) { |
1712
|
8
|
|
|
|
|
21
|
$ret{$k} = undef; |
1713
|
|
|
|
|
|
|
} else { |
1714
|
860
|
|
|
|
|
2595
|
$ret{$k} = $v; |
1715
|
|
|
|
|
|
|
} |
1716
|
|
|
|
|
|
|
} else { |
1717
|
911
|
|
|
|
|
2232
|
$ret{$k} = $v; |
1718
|
|
|
|
|
|
|
} |
1719
|
|
|
|
|
|
|
} ## end while (my ($k, $v) = $iterator...) |
1720
|
263
|
|
|
|
|
481
|
$self->add_and_remove_fields( \%ret ); |
1721
|
263
|
50
|
|
|
|
325
|
if (wantarray) { |
1722
|
0
|
|
|
|
|
0
|
return %ret; |
1723
|
|
|
|
|
|
|
} else { |
1724
|
263
|
|
|
|
|
1022
|
return \%ret; |
1725
|
|
|
|
|
|
|
} |
1726
|
|
|
|
|
|
|
} ## end sub make_data_hash |
1727
|
|
|
|
|
|
|
|
1728
|
|
|
|
|
|
|
=head2 AUTOLOAD |
1729
|
|
|
|
|
|
|
|
1730
|
|
|
|
|
|
|
print $sb_file->missing(); |
1731
|
|
|
|
|
|
|
print $sb_file->dataidx(); |
1732
|
|
|
|
|
|
|
print $sb_file->actual_fields(); |
1733
|
|
|
|
|
|
|
... |
1734
|
|
|
|
|
|
|
|
1735
|
|
|
|
|
|
|
Returns a few internal variables. The accessor is read only, but some |
1736
|
|
|
|
|
|
|
variables can be returned as a reference, and can be modified afterwards. |
1737
|
|
|
|
|
|
|
Though, do it knowing this is a terrible idea. |
1738
|
|
|
|
|
|
|
|
1739
|
|
|
|
|
|
|
If the variable retrieved is an array or hash reference and this is called in a |
1740
|
|
|
|
|
|
|
list context, the variable is dereferenced first. |
1741
|
|
|
|
|
|
|
|
1742
|
|
|
|
|
|
|
Here are a few "useful" variables: |
1743
|
|
|
|
|
|
|
|
1744
|
|
|
|
|
|
|
=over 4 |
1745
|
|
|
|
|
|
|
|
1746
|
|
|
|
|
|
|
=item * dataidx |
1747
|
|
|
|
|
|
|
|
1748
|
|
|
|
|
|
|
The current row index. |
1749
|
|
|
|
|
|
|
|
1750
|
|
|
|
|
|
|
=item * max_dataidx |
1751
|
|
|
|
|
|
|
|
1752
|
|
|
|
|
|
|
The highest row index read so far. |
1753
|
|
|
|
|
|
|
|
1754
|
|
|
|
|
|
|
=item * fields |
1755
|
|
|
|
|
|
|
|
1756
|
|
|
|
|
|
|
An array of the original fields. |
1757
|
|
|
|
|
|
|
|
1758
|
|
|
|
|
|
|
=item * actual_fields |
1759
|
|
|
|
|
|
|
|
1760
|
|
|
|
|
|
|
An array of the current fields, as modified by C or C. |
1761
|
|
|
|
|
|
|
|
1762
|
|
|
|
|
|
|
=item * delim |
1763
|
|
|
|
|
|
|
|
1764
|
|
|
|
|
|
|
The regex used to split data lines. |
1765
|
|
|
|
|
|
|
|
1766
|
|
|
|
|
|
|
=item * missing |
1767
|
|
|
|
|
|
|
|
1768
|
|
|
|
|
|
|
The null/fill/missing value of the SeaBASS file. |
1769
|
|
|
|
|
|
|
|
1770
|
|
|
|
|
|
|
=item * delim |
1771
|
|
|
|
|
|
|
|
1772
|
|
|
|
|
|
|
The current line delimiter regex. |
1773
|
|
|
|
|
|
|
|
1774
|
|
|
|
|
|
|
=back |
1775
|
|
|
|
|
|
|
|
1776
|
|
|
|
|
|
|
=cut |
1777
|
|
|
|
|
|
|
|
1778
|
|
|
|
|
|
|
sub AUTOLOAD { |
1779
|
109
|
|
|
109
|
|
137
|
my $self = shift; |
1780
|
109
|
50
|
|
|
|
156
|
if ( !ref($self) ) { |
1781
|
0
|
|
|
|
|
0
|
croak("$self is not an object"); |
1782
|
|
|
|
|
|
|
} |
1783
|
|
|
|
|
|
|
|
1784
|
109
|
|
|
|
|
92
|
my $name = $AUTOLOAD; |
1785
|
109
|
50
|
|
|
|
181
|
if ($name) { |
1786
|
109
|
|
|
|
|
249
|
$name =~ s/.*://; |
1787
|
109
|
|
|
|
|
118
|
my $value = $self->{$name}; |
1788
|
109
|
100
|
|
|
|
173
|
if ( !defined($value) ) { |
1789
|
1
|
|
|
|
|
5
|
return; |
1790
|
|
|
|
|
|
|
} |
1791
|
108
|
100
|
66
|
|
|
353
|
if ( ref($value) eq 'ARRAY' && wantarray ) { |
|
|
50
|
33
|
|
|
|
|
1792
|
104
|
|
|
|
|
75
|
return @{$value}; |
|
104
|
|
|
|
|
281
|
|
1793
|
|
|
|
|
|
|
} elsif ( ref($value) eq 'HASH' && wantarray ) { |
1794
|
0
|
|
|
|
|
0
|
return %{$value}; |
|
0
|
|
|
|
|
0
|
|
1795
|
|
|
|
|
|
|
} |
1796
|
4
|
|
|
|
|
17
|
return $value; |
1797
|
|
|
|
|
|
|
} ## end if ($name) |
1798
|
|
|
|
|
|
|
} ## end sub AUTOLOAD |
1799
|
|
|
|
|
|
|
|
1800
|
|
|
|
|
|
|
sub DESTROY { |
1801
|
137
|
|
|
137
|
|
50238
|
my $self = shift; |
1802
|
137
|
|
|
|
|
294
|
$self->close(); |
1803
|
|
|
|
|
|
|
} |
1804
|
|
|
|
|
|
|
|
1805
|
|
|
|
|
|
|
=head1 INTERNAL METHODS |
1806
|
|
|
|
|
|
|
|
1807
|
|
|
|
|
|
|
=head2 check_options() |
1808
|
|
|
|
|
|
|
|
1809
|
|
|
|
|
|
|
For internal use only. This function is in charge of checking the options to |
1810
|
|
|
|
|
|
|
make sure they are of the right type (array/hash reference where appropriate). |
1811
|
|
|
|
|
|
|
|
1812
|
|
|
|
|
|
|
If add_empty_headers is set, this function turns off C. |
1813
|
|
|
|
|
|
|
|
1814
|
|
|
|
|
|
|
Called by the object, accepts no arguments. |
1815
|
|
|
|
|
|
|
|
1816
|
|
|
|
|
|
|
=cut |
1817
|
|
|
|
|
|
|
|
1818
|
|
|
|
|
|
|
#<<< perltidy destroys this function |
1819
|
|
|
|
|
|
|
sub check_options { |
1820
|
136
|
|
|
136
|
1
|
140
|
my $self = shift; |
1821
|
136
|
|
|
|
|
135
|
while (my ($k, $v) = each(%{$self->{'options'}})) { |
|
1744
|
|
|
|
|
3175
|
|
1822
|
1612
|
100
|
|
|
|
2984
|
if (!exists($DEFAULT_OPTIONS{$k})) { |
|
|
100
|
|
|
|
|
|
1823
|
2
|
|
|
|
|
193
|
croak("Option not understood: $k"); |
1824
|
1871
|
|
|
1871
|
|
3927
|
} elsif ((firstidx { $_ eq ref($v) } @{$OPTION_TYPES{$k}}) < 0) { |
|
1610
|
|
|
|
|
2523
|
|
1825
|
2
|
|
|
|
|
4
|
my $expected_ref = join('/', @{$OPTION_TYPES{$k}}); |
|
2
|
|
|
|
|
5
|
|
1826
|
2
|
100
|
|
|
|
197
|
croak("Option $k not of the right type, expected: " . ($expected_ref ? "$expected_ref reference" : 'scalar')); |
1827
|
|
|
|
|
|
|
} ## end elsif ((firstidx { $_ eq ...})) |
1828
|
|
|
|
|
|
|
} ## end while (my ($k, $v) = each...) |
1829
|
132
|
100
|
|
|
|
351
|
if ($self->{'options'}{'add_empty_headers'}) { |
1830
|
1
|
|
|
|
|
3
|
$self->{'options'}{'strict'} &= STRICT_READ; |
1831
|
|
|
|
|
|
|
} |
1832
|
|
|
|
|
|
|
} ## end sub check_options |
1833
|
|
|
|
|
|
|
#>>> |
1834
|
|
|
|
|
|
|
|
1835
|
|
|
|
|
|
|
=head2 create_blank_file() |
1836
|
|
|
|
|
|
|
|
1837
|
|
|
|
|
|
|
For internal use only. C populates the object with proper |
1838
|
|
|
|
|
|
|
internal variables, as well as adding blank headers if C is |
1839
|
|
|
|
|
|
|
set. |
1840
|
|
|
|
|
|
|
|
1841
|
|
|
|
|
|
|
By default, the missing value is set to C<$DEFAULT_MISSING> (C<-999>). |
1842
|
|
|
|
|
|
|
|
1843
|
|
|
|
|
|
|
This function turns on the C option, as C must be enabled to |
1844
|
|
|
|
|
|
|
write. |
1845
|
|
|
|
|
|
|
|
1846
|
|
|
|
|
|
|
The delimiter is left undefined and will be guessed upon reading the first data |
1847
|
|
|
|
|
|
|
line using the L function. |
1848
|
|
|
|
|
|
|
|
1849
|
|
|
|
|
|
|
Called by the object, accepts no arguments. |
1850
|
|
|
|
|
|
|
|
1851
|
|
|
|
|
|
|
=cut |
1852
|
|
|
|
|
|
|
|
1853
|
|
|
|
|
|
|
sub create_blank_file { |
1854
|
4
|
|
|
4
|
1
|
4
|
my ($self) = @_; |
1855
|
4
|
|
|
|
|
7
|
$self->{'actual_fields'} = []; |
1856
|
4
|
|
|
|
|
6
|
$self->{'actual_units'} = []; |
1857
|
4
|
|
|
|
|
5
|
$self->{'fields'} = []; |
1858
|
4
|
|
|
|
|
5
|
$self->{'units'} = []; |
1859
|
4
|
|
|
|
|
5
|
$self->{'headers'} = {}; |
1860
|
4
|
|
|
|
|
5
|
$self->{'comments'} = []; |
1861
|
4
|
|
|
|
|
5
|
$self->{'data'} = []; |
1862
|
4
|
|
|
|
|
5
|
$self->{'dataidx'} = -1; |
1863
|
4
|
|
|
|
|
5
|
$self->{'max_dataidx'} = -1; |
1864
|
4
|
|
|
|
|
5
|
$self->{'delim'} = undef; |
1865
|
4
|
|
|
|
|
4
|
$self->{'missing'} = $DEFAULT_MISSING; |
1866
|
|
|
|
|
|
|
|
1867
|
4
|
|
|
|
|
5
|
$self->{'options'}{'cache'} = 1; |
1868
|
4
|
|
|
|
|
3
|
$self->{'options'}{'fill_ancillary_data'} = 0; |
1869
|
|
|
|
|
|
|
|
1870
|
4
|
100
|
|
|
|
8
|
my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' ); |
1871
|
4
|
100
|
|
|
|
7
|
if ($self->{'options'}{'add_empty_headers'}) { |
1872
|
1
|
|
|
|
|
2
|
foreach (@ALL_HEADERS) { |
1873
|
33
|
50
|
|
|
|
46
|
if ( !exists( $self->{'headers'}{"${slash}$_"} ) ) { |
1874
|
33
|
|
|
|
|
37
|
$self->{'headers'}{"${slash}$_"} = ''; |
1875
|
33
|
100
|
|
|
|
40
|
if ( $_ eq 'missing' ) { |
1876
|
1
|
|
|
|
|
3
|
$self->{'headers'}{"${slash}missing"} = $DEFAULT_MISSING; |
1877
|
|
|
|
|
|
|
} |
1878
|
|
|
|
|
|
|
} ## end if (!exists($self->{'headers'...})) |
1879
|
|
|
|
|
|
|
} ## end foreach (@ALL_HEADERS) |
1880
|
|
|
|
|
|
|
} ## end if ($add_missing_headers) |
1881
|
|
|
|
|
|
|
|
1882
|
4
|
|
|
|
|
5
|
my $success = 1; |
1883
|
4
|
50
|
|
|
|
9
|
if ( $self->{'options'}{'default_headers'} ) { |
1884
|
4
|
|
|
|
|
8
|
$success &= $self->add_headers( $self->{'options'}{'default_headers'} ); |
1885
|
|
|
|
|
|
|
} |
1886
|
4
|
50
|
|
|
|
8
|
if ( $self->{'options'}{'headers'} ) { |
1887
|
4
|
|
|
|
|
6
|
$success &= $self->add_headers( $self->{'options'}{'headers'} ); |
1888
|
|
|
|
|
|
|
} |
1889
|
4
|
50
|
|
|
|
11
|
unless ($success) { |
1890
|
0
|
|
|
|
|
0
|
croak("Error creating blank file."); |
1891
|
|
|
|
|
|
|
} |
1892
|
|
|
|
|
|
|
} ## end sub create_blank_file |
1893
|
|
|
|
|
|
|
|
1894
|
|
|
|
|
|
|
=head2 read_headers() |
1895
|
|
|
|
|
|
|
|
1896
|
|
|
|
|
|
|
For internal use only. C reads the metadata at the beginning of |
1897
|
|
|
|
|
|
|
a SeaBASS file. |
1898
|
|
|
|
|
|
|
|
1899
|
|
|
|
|
|
|
Called by the object, accepts no arguments. |
1900
|
|
|
|
|
|
|
|
1901
|
|
|
|
|
|
|
=cut |
1902
|
|
|
|
|
|
|
|
1903
|
|
|
|
|
|
|
sub read_headers { |
1904
|
127
|
|
|
127
|
1
|
136
|
my $self = shift; |
1905
|
|
|
|
|
|
|
|
1906
|
127
|
50
|
|
|
|
265
|
if ( $self->{'headers'} ) { |
1907
|
0
|
|
|
|
|
0
|
return; |
1908
|
|
|
|
|
|
|
} |
1909
|
|
|
|
|
|
|
|
1910
|
127
|
100
|
|
|
|
254
|
my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' ); |
1911
|
127
|
|
|
|
|
122
|
my $success = 1; |
1912
|
127
|
|
|
|
|
96
|
my @comments; |
1913
|
|
|
|
|
|
|
|
1914
|
127
|
|
|
|
|
183
|
$self->{'headers'} = {}; |
1915
|
127
|
|
|
|
|
192
|
$self->{'comments'} = []; |
1916
|
|
|
|
|
|
|
|
1917
|
127
|
50
|
|
|
|
258
|
if ( $self->{'options'}{'default_headers'} ) { |
1918
|
127
|
|
|
|
|
285
|
$success &= $self->add_headers( $self->{'options'}{'default_headers'} ); |
1919
|
|
|
|
|
|
|
} |
1920
|
|
|
|
|
|
|
|
1921
|
127
|
|
|
|
|
142
|
my $handle = $self->{'handle'}; |
1922
|
127
|
|
|
|
|
127
|
my $position = my $line_number = 0; |
1923
|
127
|
|
|
|
|
88
|
my @header_lines; |
1924
|
|
|
|
|
|
|
|
1925
|
127
|
|
|
|
|
135
|
my $strict = $self->{'options'}{'strict'}; |
1926
|
|
|
|
|
|
|
|
1927
|
127
|
|
|
|
|
466
|
while ( my $line = <$handle> ) { |
1928
|
3256
|
|
|
|
|
1796
|
$line_number++; |
1929
|
3256
|
|
|
|
|
3026
|
strip($line); |
1930
|
3256
|
50
|
|
|
|
3739
|
if ($line) { |
1931
|
3256
|
100
|
|
|
|
6695
|
if ( $line =~ m'^(/end_header)\@?$'i ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1932
|
112
|
|
|
|
|
242
|
push( @header_lines, $1 ); |
1933
|
112
|
|
|
|
|
122
|
$position = tell($handle); |
1934
|
112
|
|
|
|
|
149
|
last; |
1935
|
|
|
|
|
|
|
} elsif ( $line =~ m"^/" ) { |
1936
|
2433
|
|
|
|
|
2234
|
push( @header_lines, $line ); |
1937
|
|
|
|
|
|
|
} elsif ( $line =~ m"^!" ) { |
1938
|
696
|
|
|
|
|
620
|
push( @comments, $line ); |
1939
|
696
|
100
|
|
|
|
1017
|
if ( $self->{'options'}{'preserve_header'} ) { |
1940
|
10
|
|
|
|
|
10
|
push( @header_lines, $line ); |
1941
|
|
|
|
|
|
|
} |
1942
|
|
|
|
|
|
|
} else { #TODO: search ahead for more headers or comments (in case of merely comment missing !) and fail if READ_STRICT |
1943
|
15
|
|
|
|
|
19
|
seek( $handle, $position, SEEK_SET ); |
1944
|
15
|
50
|
|
|
|
21
|
if ( $strict & STRICT_READ ) { |
1945
|
0
|
|
|
|
|
0
|
carp("File missing /end_header or comment missing !, assuming data start: line #$line_number ($line)"); |
1946
|
|
|
|
|
|
|
} |
1947
|
15
|
|
|
|
|
17
|
last; |
1948
|
|
|
|
|
|
|
} |
1949
|
|
|
|
|
|
|
} ## end if ($line) |
1950
|
3129
|
|
|
|
|
5953
|
$position = tell($handle); |
1951
|
|
|
|
|
|
|
} ## end while (my $line = <$handle>) |
1952
|
|
|
|
|
|
|
|
1953
|
|
|
|
|
|
|
# add_headers looks at STRICT_WRITE, not STRICT_READ |
1954
|
127
|
100
|
|
|
|
222
|
if ( $strict & STRICT_READ ) { |
1955
|
5
|
|
|
|
|
15
|
$self->{'options'}{'strict'} |= STRICT_WRITE; |
1956
|
|
|
|
|
|
|
} else { |
1957
|
122
|
|
|
|
|
165
|
$self->{'options'}{'strict'} = 0; |
1958
|
|
|
|
|
|
|
} |
1959
|
|
|
|
|
|
|
|
1960
|
127
|
100
|
|
|
|
252
|
if ( $self->{'options'}{'preserve_header'} ) { |
1961
|
1
|
|
|
|
|
7
|
$self->{'preserved_header'} = [@header_lines]; |
1962
|
|
|
|
|
|
|
} |
1963
|
|
|
|
|
|
|
|
1964
|
127
|
|
|
|
|
232
|
$success &= $self->add_headers( \@header_lines ); |
1965
|
|
|
|
|
|
|
|
1966
|
|
|
|
|
|
|
# restore strictness |
1967
|
127
|
|
|
|
|
183
|
$self->{'options'}{'strict'} = $strict; |
1968
|
|
|
|
|
|
|
|
1969
|
127
|
50
|
|
|
|
251
|
if ( $self->{'options'}{'headers'} ) { |
1970
|
127
|
|
|
|
|
223
|
$success &= $self->add_headers( $self->{'options'}{'headers'} ); |
1971
|
|
|
|
|
|
|
} |
1972
|
|
|
|
|
|
|
|
1973
|
127
|
|
|
|
|
139
|
my %headers = %{ $self->{'headers'} }; |
|
127
|
|
|
|
|
1246
|
|
1974
|
|
|
|
|
|
|
|
1975
|
127
|
100
|
|
|
|
319
|
if ( $self->{'options'}{'preserve_comments'} ) { |
1976
|
126
|
|
|
|
|
109
|
push( @{ $self->{'comments'} }, @comments ); |
|
126
|
|
|
|
|
327
|
|
1977
|
|
|
|
|
|
|
} |
1978
|
|
|
|
|
|
|
|
1979
|
127
|
|
66
|
|
|
352
|
my $missing = $headers{"${slash}missing"} || $DEFAULT_MISSING; |
1980
|
127
|
100
|
|
|
|
244
|
if ( $self->{'options'}{'delete_missing_headers'} ) { |
1981
|
1
|
|
|
|
|
5
|
while ( my ( $k, $v ) = each(%headers) ) { |
1982
|
31
|
100
|
|
|
|
64
|
if ( $k =~ m"/?(?:end|begin)_header$|^/?missing$" ) { |
1983
|
3
|
|
|
|
|
4
|
next; |
1984
|
|
|
|
|
|
|
} |
1985
|
28
|
100
|
66
|
|
|
140
|
if ( !defined($v) || $v =~ m"^n/?a(?:\[.*?\])?$"i || lc($v) eq lc($missing) ) { |
|
|
|
66
|
|
|
|
|
1986
|
7
|
|
|
|
|
14
|
delete( $headers{$k} ); |
1987
|
|
|
|
|
|
|
} |
1988
|
|
|
|
|
|
|
} ## end while (my ($k, $v) = each...) |
1989
|
|
|
|
|
|
|
} ## end if ($self->{'options'}...) |
1990
|
|
|
|
|
|
|
|
1991
|
127
|
100
|
|
|
|
197
|
if ( $strict & STRICT_READ ) { |
1992
|
5
|
|
|
|
|
11
|
foreach (@REQUIRED_HEADERS) { |
1993
|
110
|
100
|
|
|
|
3137
|
if ( !exists( $headers{"${slash}$_"} ) ) { |
1994
|
58
|
|
|
|
|
54
|
$success = 0; |
1995
|
58
|
|
|
|
|
4940
|
carp("Missing required header: $_"); |
1996
|
|
|
|
|
|
|
} |
1997
|
|
|
|
|
|
|
} ## end foreach (@REQUIRED_HEADERS) |
1998
|
5
|
|
|
|
|
28
|
while ( my ( $header, $value ) = each(%headers) ) { |
1999
|
72
|
50
|
|
|
|
79
|
if ($slash) { |
2000
|
0
|
|
|
|
|
0
|
$header =~ s"^/""; |
2001
|
|
|
|
|
|
|
} |
2002
|
72
|
50
|
33
|
1220
|
|
155
|
if ( ( firstidx { $_ eq $header } @ALL_HEADERS ) < 0 && ( firstidx { $_ eq $header } @HIDDEN_HEADERS ) < 0 ) { |
|
1312
|
|
|
|
|
895
|
|
|
0
|
|
|
|
|
0
|
|
2003
|
0
|
|
|
|
|
0
|
$success = 0; |
2004
|
0
|
|
|
|
|
0
|
carp("$header not a standard header."); |
2005
|
|
|
|
|
|
|
} |
2006
|
|
|
|
|
|
|
} ## end while (my ($header, $value...)) |
2007
|
5
|
50
|
33
|
|
|
37
|
if ( $headers{"${slash}begin_header"} || $headers{"${slash}end_header"} ) { |
2008
|
0
|
|
|
|
|
0
|
$success = 0; |
2009
|
0
|
|
|
|
|
0
|
carp("begin_ or end_header incorrect"); |
2010
|
|
|
|
|
|
|
} |
2011
|
|
|
|
|
|
|
} ## end if ($strict & STRICT_READ) |
2012
|
127
|
|
|
|
|
211
|
foreach (@ABSOLUTELY_REQUIRED_HEADERS) { |
2013
|
127
|
100
|
|
|
|
323
|
if ( !exists( $headers{"${slash}$_"} ) ) { |
2014
|
1
|
|
|
|
|
1
|
$success = 0; |
2015
|
1
|
50
|
|
|
|
2
|
if ( $strict & STRICT_READ ) { |
2016
|
1
|
|
|
|
|
95
|
carp("Missing absolutely required header: $_"); |
2017
|
|
|
|
|
|
|
} |
2018
|
|
|
|
|
|
|
} |
2019
|
|
|
|
|
|
|
} ## end foreach (@ABSOLUTELY_REQUIRED_HEADERS) |
2020
|
|
|
|
|
|
|
|
2021
|
127
|
|
100
|
|
|
1384
|
$self->{'fields'} = [ split( /\s*,\s*/, $headers{"${slash}fields"} || '' ) ]; |
2022
|
127
|
|
100
|
|
|
1055
|
$self->{'actual_fields'} = [ split( /\s*,\s*/, $headers{"${slash}fields"} || '' ) ]; |
2023
|
|
|
|
|
|
|
|
2024
|
127
|
100
|
|
|
|
311
|
if ( $headers{"${slash}units"} ) { |
2025
|
79
|
|
|
|
|
543
|
$self->{'units'} = [ split( /\s*,\s*/, $headers{"${slash}units"} ) ]; |
2026
|
79
|
|
|
|
|
573
|
$self->{'actual_units'} = [ split( /\s*,\s*/, $headers{"${slash}units"} ) ]; |
2027
|
|
|
|
|
|
|
} else { |
2028
|
48
|
|
|
|
|
87
|
my (@new_units1); |
2029
|
48
|
|
|
|
|
41
|
foreach ( @{ $self->{'fields'} } ) { |
|
48
|
|
|
|
|
116
|
|
2030
|
338
|
|
|
|
|
319
|
push( @new_units1, 'unitless' ); |
2031
|
|
|
|
|
|
|
} |
2032
|
48
|
|
|
|
|
96
|
my @new_units2 = @new_units1; |
2033
|
48
|
|
|
|
|
66
|
$self->{'units'} = \@new_units1; |
2034
|
48
|
|
|
|
|
69
|
$self->{'actual_units'} = \@new_units2; |
2035
|
48
|
|
|
|
|
163
|
$headers{"${slash}units"} = join( ',', @new_units1 ); |
2036
|
|
|
|
|
|
|
} ## end else [ if ($headers{"${slash}units"...})] |
2037
|
|
|
|
|
|
|
|
2038
|
127
|
100
|
|
|
|
146
|
if ( @{$self->{'fields'}} != @{$self->{'units'}} ) { |
|
127
|
|
|
|
|
171
|
|
|
127
|
|
|
|
|
265
|
|
2039
|
1
|
50
|
|
|
|
3
|
if ( $strict & STRICT_READ ) { |
2040
|
1
|
|
|
|
|
86
|
carp("/fields and /units don't match up"); |
2041
|
1
|
|
|
|
|
48
|
$success = 0; |
2042
|
|
|
|
|
|
|
} else { |
2043
|
0
|
|
|
|
|
0
|
while (@{$self->{'fields'}} > @{$self->{'units'}}){ |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
2044
|
0
|
|
|
|
|
0
|
push(@{$self->{'units'}}, 'unitless'); |
|
0
|
|
|
|
|
0
|
|
2045
|
|
|
|
|
|
|
} |
2046
|
0
|
|
|
|
|
0
|
while (@{$self->{'fields'}} < @{$self->{'units'}}){ |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
2047
|
0
|
|
|
|
|
0
|
pop(@{$self->{'units'}}); |
|
0
|
|
|
|
|
0
|
|
2048
|
|
|
|
|
|
|
} |
2049
|
|
|
|
|
|
|
} |
2050
|
|
|
|
|
|
|
} |
2051
|
|
|
|
|
|
|
|
2052
|
127
|
100
|
|
|
|
229
|
unless ($success) { |
2053
|
4
|
50
|
|
|
|
11
|
if ( $strict & STRICT_READ ) { |
2054
|
4
|
|
|
|
|
344
|
croak("Error(s) reading SeaBASS file"); |
2055
|
|
|
|
|
|
|
} else { |
2056
|
0
|
|
|
|
|
0
|
return; |
2057
|
|
|
|
|
|
|
} |
2058
|
|
|
|
|
|
|
} |
2059
|
|
|
|
|
|
|
|
2060
|
123
|
|
|
|
|
137
|
$self->{'missing'} = $missing; |
2061
|
123
|
|
66
|
|
|
481
|
$self->{'below_detection_limit'} = $headers{"${slash}below_detection_limit"} || $missing; |
2062
|
123
|
|
66
|
|
|
416
|
$self->{'above_detection_limit'} = $headers{"${slash}above_detection_limit"} || $missing; |
2063
|
123
|
|
|
|
|
132
|
$self->{'line_number'} = $line_number; |
2064
|
123
|
|
|
|
|
143
|
$self->{'data_start_line'} = $line_number; |
2065
|
123
|
|
|
|
|
196
|
$self->{'data_start_position'} = $position; |
2066
|
|
|
|
|
|
|
|
2067
|
123
|
100
|
|
|
|
216
|
if ( $self->{'options'}{'cache'} ) { |
2068
|
113
|
|
|
|
|
187
|
$self->{'data'} = []; |
2069
|
|
|
|
|
|
|
} |
2070
|
123
|
|
|
|
|
161
|
$self->{'dataidx'} = -1; |
2071
|
123
|
|
|
|
|
126
|
$self->{'max_dataidx'} = -1; |
2072
|
|
|
|
|
|
|
|
2073
|
123
|
|
|
|
|
151
|
$self->{'headers'} = \%headers; |
2074
|
|
|
|
|
|
|
|
2075
|
123
|
100
|
|
|
|
469
|
if ( $self->{'options'}{'fill_ancillary_data'} ) { |
2076
|
16
|
|
|
|
|
27
|
my @fields_lc = map {lc} @{ $self->{'fields'} }; |
|
110
|
|
|
|
|
122
|
|
|
16
|
|
|
|
|
23
|
|
2077
|
16
|
|
|
|
|
24
|
$self->{'fields_lc'} = \@fields_lc; |
2078
|
16
|
|
|
|
|
21
|
$self->{'ancillary'} = {}; |
2079
|
|
|
|
|
|
|
|
2080
|
16
|
|
|
|
|
20
|
foreach my $field (@FILL_ANCILLARY_DATA) { |
2081
|
208
|
|
|
|
|
251
|
$self->find_ancillaries($field); |
2082
|
|
|
|
|
|
|
} |
2083
|
|
|
|
|
|
|
|
2084
|
16
|
|
|
|
|
24
|
$self->{'case_conversion'} = {}; |
2085
|
|
|
|
|
|
|
|
2086
|
16
|
|
|
|
|
15
|
while ( my ( $field, $value ) = each( %{ $self->{'ancillary'} } ) ) { |
|
159
|
|
|
|
|
344
|
|
2087
|
143
|
|
|
801
|
|
242
|
my $idx = firstidx { $_ eq $field } @{ $self->{'fields_lc'} }; |
|
801
|
|
|
|
|
484
|
|
|
143
|
|
|
|
|
216
|
|
2088
|
143
|
|
|
|
|
168
|
my $new_field = $field; |
2089
|
143
|
100
|
|
|
|
171
|
if ( $idx >= 0 ) { |
2090
|
77
|
|
|
|
|
84
|
$new_field = $self->{'fields'}[$idx]; |
2091
|
|
|
|
|
|
|
} |
2092
|
|
|
|
|
|
|
|
2093
|
143
|
100
|
|
|
|
149
|
if ( ref($value) ) { |
2094
|
29
|
|
|
|
|
55
|
for ( my $i = 1; $i < @$value; $i++ ) { |
2095
|
52
|
|
|
|
|
44
|
my $new_arg = $value->[$i]; |
2096
|
52
|
|
|
|
|
136
|
for ( $value->[$i] =~ /\$(\{\w+\}|\w+)/g ) { |
2097
|
37
|
|
|
|
|
82
|
( my $variable = $_ ) =~ s/^\{|\}$//g; |
2098
|
|
|
|
|
|
|
|
2099
|
37
|
|
|
196
|
|
79
|
my $idx = firstidx { $_ eq $variable } @{ $self->{'fields_lc'} }; |
|
196
|
|
|
|
|
125
|
|
|
37
|
|
|
|
|
62
|
|
2100
|
37
|
|
|
|
|
55
|
my $new_variable = $self->{'fields'}[$idx]; |
2101
|
|
|
|
|
|
|
|
2102
|
37
|
|
|
|
|
277
|
$new_arg =~ s/\$$variable(\W|\b|$)/\$$new_variable$1/g; |
2103
|
37
|
|
|
|
|
193
|
$new_arg =~ s/\$\{$variable\}/\$$new_variable/g; |
2104
|
|
|
|
|
|
|
} ## end for ($value->[$i] =~ /\$(\{\w+\}|\w+)/g) |
2105
|
52
|
|
|
|
|
107
|
$value->[$i] = $new_arg; |
2106
|
|
|
|
|
|
|
} ## end for (my $i = 1; $i <= length...) |
2107
|
|
|
|
|
|
|
} else { |
2108
|
114
|
|
|
|
|
85
|
my $new_value = $value; |
2109
|
114
|
|
|
|
|
323
|
for ( $value =~ /\$(\{\w+\}|\w+)/g ) { |
2110
|
151
|
|
|
|
|
397
|
( my $variable = $_ ) =~ s/^\{|\}$//g; |
2111
|
|
|
|
|
|
|
|
2112
|
151
|
|
|
740
|
|
245
|
my $idx = firstidx { $_ eq $variable } @{ $self->{'fields_lc'} }; |
|
740
|
|
|
|
|
437
|
|
|
151
|
|
|
|
|
226
|
|
2113
|
151
|
100
|
|
|
|
298
|
if ( $idx >= 0 ) { |
2114
|
145
|
|
|
|
|
159
|
my $new_variable = $self->{'fields'}[$idx]; |
2115
|
|
|
|
|
|
|
|
2116
|
145
|
|
|
|
|
981
|
$new_value =~ s/\$$variable(\W|\b|$)/\$$new_variable$1/g; |
2117
|
145
|
|
|
|
|
959
|
$new_value =~ s/\$\{$variable\}/\$$new_variable/g; |
2118
|
|
|
|
|
|
|
} ## end if ($idx >= 0) |
2119
|
|
|
|
|
|
|
} ## end for ($value =~ /\$(\{\w+\}|\w+)/g) |
2120
|
114
|
|
|
|
|
146
|
$value = $new_value; |
2121
|
|
|
|
|
|
|
} ## end else [ if (ref($value)) ] |
2122
|
143
|
50
|
|
|
|
206
|
if ( $field ne $new_field ) { |
2123
|
0
|
|
|
|
|
0
|
delete( $self->{'ancillary'}{$field} ); |
2124
|
0
|
|
|
|
|
0
|
$self->{'case_conversion'}{$field} = $new_field; |
2125
|
|
|
|
|
|
|
} |
2126
|
143
|
|
|
|
|
205
|
$self->{'ancillary'}{$new_field} = $value; |
2127
|
|
|
|
|
|
|
} ## end while (my ($field, $value...)) |
2128
|
|
|
|
|
|
|
|
2129
|
16
|
|
|
|
|
30
|
delete( $self->{'fields_lc'} ); |
2130
|
|
|
|
|
|
|
} ## end if ($self->{'options'}...) |
2131
|
|
|
|
|
|
|
|
2132
|
123
|
|
|
|
|
475
|
return 1; |
2133
|
|
|
|
|
|
|
} ## end sub read_headers |
2134
|
|
|
|
|
|
|
|
2135
|
|
|
|
|
|
|
=head2 validate_header($header, $value, $strict) |
2136
|
|
|
|
|
|
|
|
2137
|
|
|
|
|
|
|
my ($k, $v, $string) = ('investigators','jason_lefler',0) |
2138
|
|
|
|
|
|
|
$sb_file->validate_header($k, $v, $strict); |
2139
|
|
|
|
|
|
|
|
2140
|
|
|
|
|
|
|
For internal use only. C is in charge of properly formatting |
2141
|
|
|
|
|
|
|
key/value pairs to add to the object. This function will modify the input |
2142
|
|
|
|
|
|
|
variables in place to prepare them for use. |
2143
|
|
|
|
|
|
|
|
2144
|
|
|
|
|
|
|
Returns false if there was a problem with the inputs, such as C is set |
2145
|
|
|
|
|
|
|
and an invalid header was passed in. |
2146
|
|
|
|
|
|
|
|
2147
|
|
|
|
|
|
|
C will set C to C<$DEFAULT_MISSING> (C<-999>) if it |
2148
|
|
|
|
|
|
|
is blank or undefined. |
2149
|
|
|
|
|
|
|
|
2150
|
|
|
|
|
|
|
This function will also change the expected delimiter for rows that have not |
2151
|
|
|
|
|
|
|
yet been cached. |
2152
|
|
|
|
|
|
|
|
2153
|
|
|
|
|
|
|
=cut |
2154
|
|
|
|
|
|
|
|
2155
|
|
|
|
|
|
|
sub validate_header { |
2156
|
2559
|
|
|
2559
|
1
|
2295
|
my ( $self, $k, $v, $strict ) = @_; |
2157
|
|
|
|
|
|
|
|
2158
|
2559
|
|
|
|
|
1583
|
my $success = 1; |
2159
|
|
|
|
|
|
|
|
2160
|
2559
|
100
|
|
|
|
2549
|
if ( !defined($v) ) { |
2161
|
199
|
|
|
|
|
189
|
$v = ''; |
2162
|
|
|
|
|
|
|
} else { |
2163
|
2360
|
|
|
|
|
2175
|
strip($v); |
2164
|
|
|
|
|
|
|
} |
2165
|
|
|
|
|
|
|
|
2166
|
2559
|
|
|
|
|
2345
|
strip($k); |
2167
|
|
|
|
|
|
|
|
2168
|
2559
|
|
|
|
|
2414
|
$k = lc($k); |
2169
|
|
|
|
|
|
|
|
2170
|
2559
|
50
|
66
|
|
|
4261
|
if ( length($v) == 0 && $k !~ /_header/ ) { |
2171
|
0
|
0
|
|
|
|
0
|
if ($strict) { |
2172
|
0
|
|
|
|
|
0
|
carp("$k missing value"); |
2173
|
0
|
|
|
|
|
0
|
$success = 0; |
2174
|
|
|
|
|
|
|
} else { |
2175
|
0
|
|
|
|
|
0
|
$v = ""; |
2176
|
|
|
|
|
|
|
} |
2177
|
|
|
|
|
|
|
} ## end if (length($v) == 0 &&...) |
2178
|
|
|
|
|
|
|
|
2179
|
2559
|
100
|
100
|
|
|
6977
|
if ( !$self->{'options'}{'preserve_case'} || $k =~ /fields|units/ ) { |
2180
|
1307
|
|
|
|
|
1184
|
$v = lc($v); |
2181
|
|
|
|
|
|
|
} |
2182
|
|
|
|
|
|
|
|
2183
|
2559
|
100
|
|
|
|
2668
|
if ( $self->{'options'}{'keep_slashes'} ) { |
2184
|
139
|
100
|
|
|
|
221
|
if ( $k =~ m"^[^/]" ) { |
2185
|
2
|
|
|
|
|
5
|
$k = "/$k"; |
2186
|
|
|
|
|
|
|
} |
2187
|
|
|
|
|
|
|
|
2188
|
139
|
50
|
66
|
0
|
|
225
|
if ( $strict && ( firstidx { "/$_" eq $k } @ALL_HEADERS ) < 0 && ( firstidx { "/$_" eq $k } @HIDDEN_HEADERS ) < 0 ) { |
|
12
|
|
33
|
|
|
32
|
|
|
0
|
|
|
|
|
0
|
|
2189
|
0
|
|
|
|
|
0
|
carp("Invalid header, $k"); |
2190
|
0
|
|
|
|
|
0
|
$success = 0; |
2191
|
|
|
|
|
|
|
} |
2192
|
|
|
|
|
|
|
} else { |
2193
|
2420
|
100
|
|
|
|
4089
|
if ( $k =~ m"^/" ) { |
2194
|
2418
|
|
|
|
|
3578
|
$k =~ s"^/""; |
2195
|
|
|
|
|
|
|
} |
2196
|
|
|
|
|
|
|
|
2197
|
2420
|
50
|
66
|
0
|
|
3748
|
if ( $strict && ( firstidx { $_ eq $k } @ALL_HEADERS ) < 0 && ( firstidx { $_ eq $k } @HIDDEN_HEADERS ) < 0 ) { |
|
1328
|
|
33
|
|
|
899
|
|
|
0
|
|
|
|
|
0
|
|
2198
|
0
|
|
|
|
|
0
|
carp("Invalid header, $k"); |
2199
|
0
|
|
|
|
|
0
|
$success = 0; |
2200
|
|
|
|
|
|
|
} |
2201
|
|
|
|
|
|
|
} ## end else [ if ($self->{'options'}...)] |
2202
|
|
|
|
|
|
|
|
2203
|
2559
|
100
|
|
|
|
7465
|
if ( $k =~ /_latitude|_longitude/){ |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
2204
|
308
|
|
|
|
|
431
|
$v =~ s/\[deg\]$//i; |
2205
|
|
|
|
|
|
|
} elsif ( $k =~ /_time/){ |
2206
|
158
|
|
|
|
|
287
|
$v =~ s/\[gmt\]$//i; |
2207
|
|
|
|
|
|
|
} elsif ( $k =~ m"^/?delimiter$" ) { |
2208
|
89
|
100
|
|
|
|
175
|
unless ( $self->set_delim( $strict, $v ) ) { |
2209
|
1
|
50
|
|
|
|
4
|
if ($strict) { |
2210
|
0
|
|
|
|
|
0
|
$success = 0; |
2211
|
|
|
|
|
|
|
} |
2212
|
|
|
|
|
|
|
} |
2213
|
|
|
|
|
|
|
} elsif ( $k =~ m"^/?missing$" ) { |
2214
|
110
|
50
|
|
|
|
275
|
$self->{'missing'} = ( length($v) ? $v : $DEFAULT_MISSING ); |
2215
|
110
|
|
|
|
|
389
|
$self->{'missing_is_number'} = looks_like_number( $self->{'missing'} ); |
2216
|
|
|
|
|
|
|
} elsif ( $k =~ m"^/?above_detection_limit" ) { |
2217
|
3
|
50
|
|
|
|
7
|
$self->{'above_detection_limit'} = ( length($v) ? $v : $self->{'missing'} ); |
2218
|
3
|
|
|
|
|
13
|
$self->{'adl_is_number'} = looks_like_number( $self->{'above_detection_limit'} ); |
2219
|
|
|
|
|
|
|
} elsif ( $k =~ m"^/?below_detection_limit" ) { |
2220
|
3
|
50
|
|
|
|
5
|
$self->{'below_detection_limit'} = ( length($v) ? $v : $self->{'missing'} ); |
2221
|
3
|
|
|
|
|
7
|
$self->{'bdl_is_number'} = looks_like_number( $self->{'below_detection_limit'} ); |
2222
|
|
|
|
|
|
|
} |
2223
|
|
|
|
|
|
|
|
2224
|
2559
|
|
|
|
|
1969
|
$_[1] = $k; |
2225
|
2559
|
|
|
|
|
1688
|
$_[2] = $v; |
2226
|
|
|
|
|
|
|
|
2227
|
2559
|
|
|
|
|
2396
|
return $success; |
2228
|
|
|
|
|
|
|
} ## end sub validate_header |
2229
|
|
|
|
|
|
|
|
2230
|
|
|
|
|
|
|
=head2 set_delim($strict, $delim) |
2231
|
|
|
|
|
|
|
|
2232
|
|
|
|
|
|
|
Takes a string declaring the delim (IE: 'comma', 'space', etc) and updates the |
2233
|
|
|
|
|
|
|
object's internal delimiter regex. |
2234
|
|
|
|
|
|
|
|
2235
|
|
|
|
|
|
|
=cut |
2236
|
|
|
|
|
|
|
|
2237
|
|
|
|
|
|
|
sub set_delim { |
2238
|
89
|
|
|
89
|
1
|
102
|
my $self = shift; |
2239
|
89
|
|
|
|
|
74
|
my $strict = shift; |
2240
|
89
|
|
50
|
|
|
163
|
my $delim = shift || ''; |
2241
|
89
|
50
|
|
|
|
275
|
if ( $delim eq 'comma' ) { |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
2242
|
0
|
|
|
|
|
0
|
$delim = qr/\s*,\s*/; |
2243
|
|
|
|
|
|
|
} elsif ( $delim eq 'semicolon' ) { |
2244
|
0
|
|
|
|
|
0
|
$delim = qr/\s*;\s*/; |
2245
|
|
|
|
|
|
|
} elsif ( $delim eq 'space' ) { |
2246
|
87
|
|
|
|
|
322
|
$delim = qr/\s+/; |
2247
|
|
|
|
|
|
|
} elsif ( $delim eq 'tab' ) { |
2248
|
0
|
|
|
|
|
0
|
$delim = qr/\t/; |
2249
|
|
|
|
|
|
|
} elsif ($strict) { |
2250
|
1
|
|
|
|
|
185
|
carp("delimiter not understood"); |
2251
|
|
|
|
|
|
|
} else { |
2252
|
1
|
50
|
|
|
|
5
|
my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' ); |
2253
|
1
|
|
|
|
|
3
|
$self->{'headers'}{"${slash}delimiter"} = 'comma'; |
2254
|
1
|
|
|
|
|
3
|
$delim = undef; |
2255
|
|
|
|
|
|
|
} |
2256
|
89
|
|
|
|
|
230
|
$self->{'delim'} = $delim; |
2257
|
89
|
100
|
|
|
|
246
|
return ( $delim ? 1 : 0 ); |
2258
|
|
|
|
|
|
|
} ## end sub set_delim |
2259
|
|
|
|
|
|
|
|
2260
|
|
|
|
|
|
|
=head2 update_fields() |
2261
|
|
|
|
|
|
|
|
2262
|
|
|
|
|
|
|
C runs through the currently cached rows and calls |
2263
|
|
|
|
|
|
|
C on each row. It then updates the /fields and /units |
2264
|
|
|
|
|
|
|
headers in the header hash. |
2265
|
|
|
|
|
|
|
|
2266
|
|
|
|
|
|
|
=cut |
2267
|
|
|
|
|
|
|
|
2268
|
|
|
|
|
|
|
sub update_fields { |
2269
|
28
|
|
|
28
|
1
|
26
|
my ($self) = @_; |
2270
|
28
|
100
|
66
|
|
|
111
|
if ( $self->{'options'}{'cache'} && $self->{'max_dataidx'} >= 0 ) { |
2271
|
4
|
|
|
|
|
6
|
foreach my $hash ( @{ $self->{'data'} } ) { |
|
4
|
|
|
|
|
7
|
|
2272
|
9
|
|
|
|
|
14
|
$self->add_and_remove_fields($hash); |
2273
|
|
|
|
|
|
|
} |
2274
|
|
|
|
|
|
|
} |
2275
|
|
|
|
|
|
|
|
2276
|
28
|
100
|
|
|
|
70
|
my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' ); |
2277
|
28
|
|
|
|
|
22
|
$self->{'headers'}{"${slash}fields"} = join( ',', @{ $self->{'actual_fields'} } ); |
|
28
|
|
|
|
|
80
|
|
2278
|
28
|
|
|
|
|
31
|
$self->{'headers'}{"${slash}units"} = join( ',', @{ $self->{'actual_units'} } ); |
|
28
|
|
|
|
|
85
|
|
2279
|
|
|
|
|
|
|
} ## end sub update_fields |
2280
|
|
|
|
|
|
|
|
2281
|
|
|
|
|
|
|
=head2 add_and_remove_fields(\%row) |
2282
|
|
|
|
|
|
|
|
2283
|
|
|
|
|
|
|
Given a reference to a row, this function deletes any fields removed with |
2284
|
|
|
|
|
|
|
C and adds an undefined or /missing value for each field added |
2285
|
|
|
|
|
|
|
via C. If C is set, an undefined value is |
2286
|
|
|
|
|
|
|
given, otherwise, it is filled with the /missing value. |
2287
|
|
|
|
|
|
|
|
2288
|
|
|
|
|
|
|
If C is set, this function adds missing date, time, |
2289
|
|
|
|
|
|
|
date_time, lat, lon, and depth fields to the retrieved row from the header. |
2290
|
|
|
|
|
|
|
|
2291
|
|
|
|
|
|
|
Needlessly returns the hash reference passed in. |
2292
|
|
|
|
|
|
|
|
2293
|
|
|
|
|
|
|
=cut |
2294
|
|
|
|
|
|
|
|
2295
|
|
|
|
|
|
|
sub add_and_remove_fields { |
2296
|
274
|
|
|
274
|
1
|
246
|
my ( $self, $hash ) = @_; |
2297
|
274
|
|
|
|
|
657
|
foreach my $field ( keys(%$hash) ) { |
2298
|
1895
|
100
|
|
7730
|
|
2556
|
if ( ( firstidx { $_ eq $field } @{ $self->{'actual_fields'} } ) < 0 ) { |
|
7730
|
|
|
|
|
6993
|
|
|
1895
|
|
|
|
|
2485
|
|
2299
|
21
|
50
|
33
|
0
|
|
49
|
unless ( $self->{'options'}{'fill_ancillary_data'} && ( firstidx { $_ eq $field } keys( %{ $self->{'ancillary'} } ) ) >= 0 ) { |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
2300
|
21
|
|
|
|
|
38
|
delete( $hash->{$field} ); |
2301
|
|
|
|
|
|
|
} |
2302
|
|
|
|
|
|
|
} |
2303
|
|
|
|
|
|
|
} ## end foreach my $field (keys(%$hash...)) |
2304
|
|
|
|
|
|
|
|
2305
|
274
|
100
|
|
|
|
525
|
my $missing = ( $self->{'options'}{'missing_data_to_undef'} ? undef : $self->{'missing'} ); |
2306
|
274
|
|
|
|
|
623
|
while ( my ( $variable, $pad ) = each(%FIELD_FORMATTING) ) { |
2307
|
1918
|
|
33
|
|
|
4157
|
my $case_var = $self->{'case_conversion'}{$variable} || $variable; |
2308
|
1918
|
50
|
66
|
|
|
4248
|
if ( defined( $hash->{$case_var} ) && ( !defined($missing) || $hash->{$case_var} != $missing ) ) { |
|
|
|
66
|
|
|
|
|
2309
|
57
|
|
|
|
|
160
|
$hash->{$case_var} = sprintf( $pad, $hash->{$case_var} ); |
2310
|
|
|
|
|
|
|
} |
2311
|
|
|
|
|
|
|
} ## end while (my ($variable, $pad...)) |
2312
|
|
|
|
|
|
|
|
2313
|
274
|
100
|
|
|
|
415
|
if ( defined($self->{'ancillary'}) ) { |
2314
|
19
|
|
|
|
|
27
|
$self->{'ancillary_tmp'} = {}; |
2315
|
19
|
|
|
|
|
35
|
for my $variable (@FILL_ANCILLARY_DATA) { |
2316
|
247
|
100
|
|
|
|
318
|
if ( defined($self->{'ancillary'}{$variable}) ) { |
2317
|
173
|
|
|
|
|
224
|
my $value = $self->extrapolate_variables( $missing, $self->{'ancillary'}{$variable}, $hash ); |
2318
|
173
|
50
|
|
|
|
240
|
if ( defined($value) ) { |
2319
|
173
|
|
|
|
|
277
|
$hash->{$variable} = $value; |
2320
|
|
|
|
|
|
|
} |
2321
|
|
|
|
|
|
|
} ## end if ($self->{'ancillary'...}) |
2322
|
|
|
|
|
|
|
} ## end for my $variable (@FILL_ANCILLARY_DATA) |
2323
|
|
|
|
|
|
|
} ## end if ($self->{'ancillary'...}) |
2324
|
|
|
|
|
|
|
|
2325
|
274
|
|
|
|
|
188
|
foreach my $field ( @{ $self->{'actual_fields'} } ) { |
|
274
|
|
|
|
|
379
|
|
2326
|
1951
|
100
|
|
|
|
2451
|
if ( !exists( $hash->{$field} ) ) { |
2327
|
61
|
|
|
|
|
93
|
$hash->{$field} = $missing; |
2328
|
|
|
|
|
|
|
} |
2329
|
|
|
|
|
|
|
} |
2330
|
|
|
|
|
|
|
|
2331
|
274
|
|
|
|
|
294
|
return $hash; |
2332
|
|
|
|
|
|
|
} ## end sub add_and_remove_fields |
2333
|
|
|
|
|
|
|
|
2334
|
|
|
|
|
|
|
=head2 guess_delim($line) |
2335
|
|
|
|
|
|
|
|
2336
|
|
|
|
|
|
|
C is is used to guess the delimiter of a line. It is not very |
2337
|
|
|
|
|
|
|
intelligent. If it sees any commas, it will assume the delimiter is a comma. |
2338
|
|
|
|
|
|
|
Then, it checks for tabs, spaces, then semi-colons. Returns 1 on success. If |
2339
|
|
|
|
|
|
|
it doesn't find any, it will throw a warning and return undef. |
2340
|
|
|
|
|
|
|
|
2341
|
|
|
|
|
|
|
=cut |
2342
|
|
|
|
|
|
|
|
2343
|
|
|
|
|
|
|
sub guess_delim { |
2344
|
35
|
|
|
35
|
1
|
39
|
my ( $self, $line ) = @_; |
2345
|
35
|
|
|
|
|
36
|
my $delim_string = ''; |
2346
|
35
|
100
|
|
|
|
180
|
if ( $line =~ /,/ ) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2347
|
2
|
|
|
|
|
7
|
my $delim = qr/\s*,\s*/; |
2348
|
2
|
|
|
|
|
3
|
$self->{'delim'} = $delim; |
2349
|
2
|
|
|
|
|
3
|
$delim_string = 'comma'; |
2350
|
|
|
|
|
|
|
} elsif ( $line =~ /\t/ ) { |
2351
|
0
|
|
|
|
|
0
|
my $delim = qr/\t/; |
2352
|
0
|
|
|
|
|
0
|
$self->{'delim'} = $delim; |
2353
|
0
|
|
|
|
|
0
|
$delim_string = 'tab'; |
2354
|
|
|
|
|
|
|
} elsif ( $line =~ /\s+/ ) { |
2355
|
33
|
|
|
|
|
116
|
my $delim = qr/\s+/; |
2356
|
33
|
|
|
|
|
47
|
$self->{'delim'} = $delim; |
2357
|
33
|
|
|
|
|
51
|
$delim_string = 'space'; |
2358
|
|
|
|
|
|
|
} elsif ( $line =~ /;/ ) { |
2359
|
0
|
|
|
|
|
0
|
my $delim = qr/\s*;\s*/; |
2360
|
0
|
|
|
|
|
0
|
$self->{'delim'} = $delim; |
2361
|
0
|
|
|
|
|
0
|
$delim_string = 'semicolon'; |
2362
|
|
|
|
|
|
|
} else { |
2363
|
0
|
|
|
|
|
0
|
carp("No delimiter defined or can be guessed"); |
2364
|
0
|
|
|
|
|
0
|
return; |
2365
|
|
|
|
|
|
|
} |
2366
|
35
|
100
|
|
|
|
128
|
$self->{'headers'}{ ( $self->{'options'}{'keep_slashes'} ? '/' : '' ) . 'delimiter' } = $delim_string; |
2367
|
35
|
|
|
|
|
68
|
return 1; |
2368
|
|
|
|
|
|
|
} ## end sub guess_delim |
2369
|
|
|
|
|
|
|
|
2370
|
|
|
|
|
|
|
=head2 ingest_row(\%data_row | \@data_row | $data_row | %data_row) |
2371
|
|
|
|
|
|
|
|
2372
|
|
|
|
|
|
|
For mostly internal use, parses arguments for C, C, and C |
2373
|
|
|
|
|
|
|
and returns a hash or hash reference of the data row. Given a hash reference, |
2374
|
|
|
|
|
|
|
it will merely return it. |
2375
|
|
|
|
|
|
|
|
2376
|
|
|
|
|
|
|
Given an array or array reference, it will assume each element is a field as |
2377
|
|
|
|
|
|
|
listed in either C or C. If the number of elements |
2378
|
|
|
|
|
|
|
matches C, it uses assumes it's that. If it doesn't match, it is |
2379
|
|
|
|
|
|
|
tried to match against C. If it doesn't match either, a warning is |
2380
|
|
|
|
|
|
|
issued and the return is undefined. |
2381
|
|
|
|
|
|
|
|
2382
|
|
|
|
|
|
|
Given a non-reference scalar, it will split the scalar based on the current |
2383
|
|
|
|
|
|
|
delimiter. If one is not defined, it is guessed. If it cannot be guessed, the |
2384
|
|
|
|
|
|
|
return is undefined. |
2385
|
|
|
|
|
|
|
|
2386
|
|
|
|
|
|
|
If the inputs are successfully parsed, all keys are turned lowercase. |
2387
|
|
|
|
|
|
|
|
2388
|
|
|
|
|
|
|
=cut |
2389
|
|
|
|
|
|
|
|
2390
|
|
|
|
|
|
|
sub ingest_row { |
2391
|
27
|
|
|
27
|
1
|
25
|
my $self = shift; |
2392
|
27
|
|
|
|
|
21
|
my %new_row; |
2393
|
27
|
100
|
|
|
|
52
|
if ( $#_ < 0 ) { |
2394
|
1
|
|
|
|
|
163
|
carp("Incorrect number of arguments to ingest_row()"); |
2395
|
1
|
|
|
|
|
108
|
return; |
2396
|
|
|
|
|
|
|
} |
2397
|
26
|
|
|
|
|
47
|
my $arrayref; |
2398
|
26
|
100
|
|
|
|
82
|
if ( ref( $_[0] ) eq 'HASH' ) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
2399
|
14
|
|
|
|
|
15
|
%new_row = %{ shift(@_) }; |
|
14
|
|
|
|
|
44
|
|
2400
|
|
|
|
|
|
|
} elsif ( ref( $_[0] ) eq 'ARRAY' ) { |
2401
|
3
|
|
|
|
|
4
|
$arrayref = $_[0]; |
2402
|
|
|
|
|
|
|
} elsif ( !ref( $_[0] ) ) { |
2403
|
9
|
100
|
|
|
|
17
|
if ( $#_ == 0 ) { |
|
|
100
|
|
|
|
|
|
2404
|
7
|
50
|
66
|
|
|
23
|
if ( !$self->{'delim'} && !$self->guess_delim( $_[0] ) ) { |
2405
|
0
|
|
|
|
|
0
|
return; |
2406
|
|
|
|
|
|
|
} |
2407
|
7
|
|
|
|
|
50
|
$arrayref = [ split( $self->{'delim'}, $_[0] ) ]; |
2408
|
|
|
|
|
|
|
} elsif ( $#_ % 2 == 1 ) { |
2409
|
1
|
|
|
|
|
3
|
%new_row = @_; |
2410
|
|
|
|
|
|
|
} else { |
2411
|
1
|
|
|
|
|
107
|
carp('Even sized list, scalar, or hash/array reference expected'); |
2412
|
1
|
|
|
|
|
93
|
return; |
2413
|
|
|
|
|
|
|
} |
2414
|
|
|
|
|
|
|
} else { |
2415
|
0
|
|
|
|
|
0
|
carp("Arguments to ingest_row() not understood."); |
2416
|
0
|
|
|
|
|
0
|
return; |
2417
|
|
|
|
|
|
|
} |
2418
|
|
|
|
|
|
|
|
2419
|
25
|
100
|
|
|
|
51
|
if ($arrayref) { |
2420
|
10
|
|
|
|
|
6
|
my $iterator; |
2421
|
10
|
100
|
|
|
|
11
|
if ( scalar( @{ $self->{'actual_fields'} } ) == scalar( @{$arrayref} ) ) { |
|
10
|
50
|
|
|
|
12
|
|
|
10
|
|
|
|
|
14
|
|
2422
|
8
|
|
|
|
|
39
|
$iterator = each_arrayref( $self->{'actual_fields'}, $arrayref ); |
2423
|
2
|
|
|
|
|
2
|
} elsif ( scalar( @{ $self->{'fields'} } ) == scalar( @{$arrayref} ) ) { |
|
2
|
|
|
|
|
4
|
|
2424
|
2
|
|
|
|
|
9
|
$iterator = each_arrayref( $self->{'fields'}, $arrayref ); |
2425
|
2
|
|
|
|
|
4
|
$self->add_and_remove_fields( \%new_row ); |
2426
|
|
|
|
|
|
|
} else { |
2427
|
0
|
|
|
|
|
0
|
my $actual_fields = scalar( @{ $self->{'actual_fields'} } ); |
|
0
|
|
|
|
|
0
|
|
2428
|
0
|
|
|
|
|
0
|
my $fields = scalar( @{ $self->{'fields'} } ); |
|
0
|
|
|
|
|
0
|
|
2429
|
0
|
0
|
|
|
|
0
|
if ( $actual_fields == $fields ) { |
2430
|
0
|
|
|
|
|
0
|
carp("Invalid number of elements, expected $fields"); |
2431
|
|
|
|
|
|
|
} else { |
2432
|
0
|
|
|
|
|
0
|
carp("Invalid number of elements, expected $actual_fields or $fields"); |
2433
|
|
|
|
|
|
|
} |
2434
|
0
|
|
|
|
|
0
|
return; |
2435
|
|
|
|
|
|
|
} ## end else [ if (scalar(@{$self->{'actual_fields'...}}))] |
2436
|
10
|
|
|
|
|
41
|
while ( my ( $k, $v ) = $iterator->() ) { |
2437
|
52
|
|
|
|
|
142
|
$new_row{$k} = $v; |
2438
|
|
|
|
|
|
|
} |
2439
|
|
|
|
|
|
|
} ## end if ($arrayref) |
2440
|
|
|
|
|
|
|
|
2441
|
25
|
|
|
|
|
45
|
%new_row = map { lc($_) => $new_row{$_} } keys %new_row; |
|
71
|
|
|
|
|
152
|
|
2442
|
|
|
|
|
|
|
|
2443
|
25
|
50
|
|
|
|
52
|
if (wantarray) { |
2444
|
0
|
|
|
|
|
0
|
return %new_row; |
2445
|
|
|
|
|
|
|
} else { |
2446
|
25
|
|
|
|
|
47
|
return \%new_row; |
2447
|
|
|
|
|
|
|
} |
2448
|
|
|
|
|
|
|
} ## end sub ingest_row |
2449
|
|
|
|
|
|
|
|
2450
|
|
|
|
|
|
|
=head2 find_ancillaries($field_name) |
2451
|
|
|
|
|
|
|
|
2452
|
|
|
|
|
|
|
Used by C to traverse through a field's possible |
2453
|
|
|
|
|
|
|
substitutes in C<%ANCILLARY> and try to find the most suitable replacement. |
2454
|
|
|
|
|
|
|
Values of fields in C<%ANCILLARY> are array references, where each element is |
2455
|
|
|
|
|
|
|
either: |
2456
|
|
|
|
|
|
|
|
2457
|
|
|
|
|
|
|
=over 4 |
2458
|
|
|
|
|
|
|
|
2459
|
|
|
|
|
|
|
=item * a string of existing field names used to create the value |
2460
|
|
|
|
|
|
|
|
2461
|
|
|
|
|
|
|
=item * an array reference of the form [converter function, parsing regex |
2462
|
|
|
|
|
|
|
(optional), arguments to converter, ... ] |
2463
|
|
|
|
|
|
|
|
2464
|
|
|
|
|
|
|
=item * a hash reference of the form { header => qr/parsing_regex/ } |
2465
|
|
|
|
|
|
|
|
2466
|
|
|
|
|
|
|
=back |
2467
|
|
|
|
|
|
|
|
2468
|
|
|
|
|
|
|
If the element is an array reference and an argument requires a field from the |
2469
|
|
|
|
|
|
|
file, all arguments are parsed and the variables within them extrapolated, then |
2470
|
|
|
|
|
|
|
the array is put into C<< $self->{'ancillary'} >>. |
2471
|
|
|
|
|
|
|
|
2472
|
|
|
|
|
|
|
If no value can be ascertained, it will not be added to the data rows. |
2473
|
|
|
|
|
|
|
|
2474
|
|
|
|
|
|
|
The value found is stored in C<< $self->{'ancillary'} >>. Returns 1 on |
2475
|
|
|
|
|
|
|
success, 0 if the field cannot be filled in. |
2476
|
|
|
|
|
|
|
|
2477
|
|
|
|
|
|
|
=cut |
2478
|
|
|
|
|
|
|
|
2479
|
|
|
|
|
|
|
sub find_ancillaries { |
2480
|
377
|
|
|
377
|
1
|
303
|
my ( $self, $field ) = @_; |
2481
|
377
|
100
|
|
|
|
552
|
if ( $self->{'ancillary'}{$field} ) { |
2482
|
95
|
|
|
|
|
104
|
return 1; |
2483
|
|
|
|
|
|
|
} |
2484
|
282
|
|
|
1679
|
|
475
|
my $idx = firstidx { $_ eq $field } @{ $self->{'fields_lc'} }; |
|
1679
|
|
|
|
|
1032
|
|
|
282
|
|
|
|
|
403
|
|
2485
|
282
|
100
|
|
|
|
524
|
if ( $idx >= 0 ) { |
2486
|
77
|
|
|
|
|
159
|
$self->{'ancillary'}{$field} = "\$\{$field\}"; |
2487
|
77
|
|
|
|
|
87
|
return 1; |
2488
|
|
|
|
|
|
|
} |
2489
|
|
|
|
|
|
|
|
2490
|
205
|
50
|
|
|
|
243
|
my $slash = ( $self->{'options'}{'keep_slashes'} ? '/' : '' ); |
2491
|
205
|
100
|
|
|
|
123
|
foreach my $attempt ( @{ $ANCILLARY{$field} || [] } ) { |
|
205
|
|
|
|
|
364
|
|
2492
|
349
|
100
|
|
|
|
529
|
if ( ref($attempt) eq 'HASH' ) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
2493
|
242
|
|
|
|
|
145
|
keys( %{$attempt} ); #reset each() iterator between calls |
|
242
|
|
|
|
|
202
|
|
2494
|
242
|
|
|
|
|
170
|
while ( my ( $where, $regex ) = each( %{$attempt} ) ) { |
|
457
|
|
|
|
|
867
|
|
2495
|
242
|
100
|
66
|
|
|
772
|
if ( $where =~ /^\$/ ) { |
|
|
100
|
66
|
|
|
|
|
2496
|
101
|
100
|
|
564
|
|
411
|
if ( ( firstidx { "\$$_" eq $where } $self->fields() ) >= 0 ) { |
|
564
|
|
|
|
|
612
|
|
2497
|
15
|
|
|
33
|
|
57
|
$self->{'ancillary'}{$field} = [ sub { return shift; }, $regex, $where ]; |
|
33
|
|
|
|
|
35
|
|
2498
|
15
|
|
|
|
|
44
|
return 1; |
2499
|
|
|
|
|
|
|
} |
2500
|
|
|
|
|
|
|
} elsif ( defined( $self->{'headers'}{"$slash$where"} ) && $self->{'headers'}{"$slash$where"} =~ $regex && lc($1) ne 'na' ) { |
2501
|
12
|
|
|
|
|
35
|
$self->{'ancillary'}{$field} = $1; |
2502
|
12
|
|
|
|
|
22
|
return 1; |
2503
|
|
|
|
|
|
|
} |
2504
|
|
|
|
|
|
|
} ## end while (my ($where, $regex...)) |
2505
|
|
|
|
|
|
|
} elsif ( ref($attempt) eq 'ARRAY' ) { |
2506
|
64
|
|
|
|
|
87
|
my @attempt = @$attempt; |
2507
|
64
|
|
|
|
|
52
|
my $function = shift(@attempt); |
2508
|
64
|
|
|
|
|
41
|
my $regex; |
2509
|
64
|
100
|
|
|
|
120
|
if ( ref( $attempt[0] ) eq 'Regexp' ) { |
2510
|
30
|
|
|
|
|
21
|
$regex = shift(@attempt); |
2511
|
|
|
|
|
|
|
} |
2512
|
64
|
|
|
|
|
41
|
my $success = 1; |
2513
|
64
|
|
|
|
|
47
|
my @args; |
2514
|
64
|
|
|
|
|
54
|
foreach my $argument (@attempt) { |
2515
|
64
|
|
|
|
|
42
|
my $tmparg = $argument; |
2516
|
64
|
|
|
|
|
231
|
for ( $argument =~ /\$(\{\w+\}|\w+)/g ) { |
2517
|
86
|
|
|
|
|
168
|
( my $variable = $_ ) =~ s/^\{|\}$//g; |
2518
|
86
|
|
|
|
|
113
|
$success &= $self->find_ancillaries($variable); |
2519
|
86
|
100
|
|
|
|
92
|
if ($success) { |
2520
|
36
|
50
|
|
|
|
43
|
if ( ref( $self->{'ancillary'}{$variable} ) ) { |
2521
|
0
|
|
|
|
|
0
|
$tmparg =~ s/\$$variable(\W|\b|$)/\$\{$variable\}$1/g; |
2522
|
|
|
|
|
|
|
} else { |
2523
|
36
|
|
|
|
|
30
|
my $value = $self->{'ancillary'}{$variable}; |
2524
|
36
|
|
|
|
|
402
|
$tmparg =~ s/\$$variable(\W|\b|$)/$value$1/g; |
2525
|
36
|
|
|
|
|
212
|
$tmparg =~ s/\$\{$variable\}/$value/g; |
2526
|
|
|
|
|
|
|
} |
2527
|
|
|
|
|
|
|
} else { |
2528
|
50
|
|
|
|
|
44
|
last; |
2529
|
|
|
|
|
|
|
} |
2530
|
|
|
|
|
|
|
} ## end for ($argument =~ /\$(\{\w+\}|\w+)/g) |
2531
|
64
|
|
|
|
|
101
|
push( @args, $tmparg ); |
2532
|
|
|
|
|
|
|
} ## end foreach my $argument (@attempt) |
2533
|
64
|
100
|
|
|
|
103
|
if ($success) { |
2534
|
14
|
100
|
|
|
|
24
|
if ($regex) { |
2535
|
8
|
|
|
|
|
8
|
unshift( @args, $regex ); |
2536
|
|
|
|
|
|
|
} |
2537
|
14
|
|
|
|
|
25
|
$self->{'ancillary'}{$field} = [ $function, @args ]; |
2538
|
14
|
|
|
|
|
28
|
return 1; |
2539
|
|
|
|
|
|
|
} ## end if ($success) |
2540
|
|
|
|
|
|
|
} elsif ( !ref($attempt) ) { |
2541
|
43
|
|
|
|
|
32
|
my $success = 1; |
2542
|
43
|
|
|
|
|
37
|
my $tmparg = $attempt; |
2543
|
43
|
|
|
|
|
177
|
for ( $attempt =~ /\$(\{\w+\}|\w+)/g ) { |
2544
|
83
|
|
|
|
|
151
|
( my $variable = $_ ) =~ s/^\{|\}$//g; |
2545
|
83
|
|
|
|
|
109
|
$success &= $self->find_ancillaries($variable); |
2546
|
83
|
100
|
|
|
|
94
|
if ($success) { |
2547
|
65
|
100
|
|
|
|
86
|
if ( ref( $self->{'ancillary'}{$variable} ) ) { |
2548
|
6
|
|
|
|
|
43
|
$tmparg =~ s/\$$variable(\W|\b|$)/\$\{$variable\}$1/g; |
2549
|
|
|
|
|
|
|
} else { |
2550
|
59
|
|
|
|
|
51
|
my $value = $self->{'ancillary'}{$variable}; |
2551
|
59
|
|
|
|
|
688
|
$tmparg =~ s/\$$variable(\W|\b|$)/$value$1/g; |
2552
|
59
|
|
|
|
|
427
|
$tmparg =~ s/\$\{$variable\}/$value/g; |
2553
|
|
|
|
|
|
|
} |
2554
|
|
|
|
|
|
|
} else { |
2555
|
18
|
|
|
|
|
18
|
last; |
2556
|
|
|
|
|
|
|
} |
2557
|
|
|
|
|
|
|
} ## end for ($attempt =~ /\$(\{\w+\}|\w+)/g) |
2558
|
43
|
100
|
|
|
|
66
|
if ($success) { |
2559
|
25
|
|
|
|
|
40
|
$self->{'ancillary'}{$field} = $tmparg; |
2560
|
25
|
|
|
|
|
41
|
return 1; |
2561
|
|
|
|
|
|
|
} |
2562
|
|
|
|
|
|
|
} ## end elsif (!ref($attempt)) |
2563
|
|
|
|
|
|
|
} ## end foreach my $attempt (@{$ANCILLARY...}) |
2564
|
|
|
|
|
|
|
|
2565
|
139
|
|
|
|
|
153
|
return 0; |
2566
|
|
|
|
|
|
|
} ## end sub find_ancillaries |
2567
|
|
|
|
|
|
|
|
2568
|
|
|
|
|
|
|
=head2 extrapolate_variables($missing, $expression, \%row) |
2569
|
|
|
|
|
|
|
|
2570
|
|
|
|
|
|
|
Used by C to convert a parsed ancillary string, such as |
2571
|
|
|
|
|
|
|
C<'$year$month$day'>, into a real value using the fields from the C<\%row>. |
2572
|
|
|
|
|
|
|
C<$expression>s are strings figured out by C and stored in |
2573
|
|
|
|
|
|
|
C<< $self->{'ancillary'} >>. |
2574
|
|
|
|
|
|
|
|
2575
|
|
|
|
|
|
|
The return is undefined if a value cannot be created (IE: a required field is |
2576
|
|
|
|
|
|
|
missing). |
2577
|
|
|
|
|
|
|
|
2578
|
|
|
|
|
|
|
=cut |
2579
|
|
|
|
|
|
|
|
2580
|
|
|
|
|
|
|
sub extrapolate_variables { |
2581
|
220
|
|
|
220
|
1
|
189
|
my ( $self, $missing, $expression, $row ) = @_; |
2582
|
|
|
|
|
|
|
|
2583
|
220
|
100
|
|
|
|
237
|
if ( ref($expression) ) { |
2584
|
47
|
|
|
|
|
57
|
return $self->extrapolate_function( $missing, $expression, $row ); |
2585
|
|
|
|
|
|
|
} else { |
2586
|
173
|
|
|
|
|
116
|
my $tmpexpr = $expression; |
2587
|
173
|
|
|
|
|
472
|
for ( $expression =~ /\$(\{\w+\}|\w+)/g ) { |
2588
|
221
|
|
|
|
|
399
|
( my $variable = $_ ) =~ s/^\{|\}$//g; |
2589
|
221
|
|
|
|
|
104
|
my $value; |
2590
|
221
|
100
|
66
|
|
|
531
|
if ( $self->{'ancillary_tmp'}{$variable} ) { |
|
|
50
|
33
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2591
|
123
|
|
|
|
|
110
|
$value = $self->{'ancillary_tmp'}{$variable}; |
2592
|
|
|
|
|
|
|
} elsif ( defined( $row->{$variable} ) && ( !defined($missing) || $row->{$variable} != $missing ) ) { |
2593
|
98
|
|
|
|
|
85
|
$value = $row->{$variable}; |
2594
|
98
|
|
|
|
|
127
|
$self->{'ancillary_tmp'}{$variable} = $value; |
2595
|
|
|
|
|
|
|
} elsif ( ref( $self->{'ancillary'}{$variable} ) ) { |
2596
|
0
|
|
|
|
|
0
|
$value = $self->extrapolate_function( $missing, $self->{'ancillary'}{$variable}, $row ); |
2597
|
0
|
0
|
|
|
|
0
|
if ( !defined($value) ) { |
2598
|
0
|
|
|
|
|
0
|
return; |
2599
|
|
|
|
|
|
|
} |
2600
|
|
|
|
|
|
|
} else { |
2601
|
0
|
|
|
|
|
0
|
return; |
2602
|
|
|
|
|
|
|
} |
2603
|
|
|
|
|
|
|
|
2604
|
221
|
|
|
|
|
1908
|
$tmpexpr =~ s/\$$variable(\W|\b|$)/$value$1/g; |
2605
|
221
|
|
|
|
|
1132
|
$tmpexpr =~ s/\$\{$variable\}/$value/g; |
2606
|
|
|
|
|
|
|
} ## end for ($expression =~ /\$(\{\w+\}|\w+)/g) |
2607
|
173
|
|
|
|
|
271
|
return $tmpexpr; |
2608
|
|
|
|
|
|
|
} ## end else [ if (ref($expression)) ] |
2609
|
|
|
|
|
|
|
} ## end sub extrapolate_variables |
2610
|
|
|
|
|
|
|
|
2611
|
|
|
|
|
|
|
=head2 extrapolate_function($missing, $expression, \%row) |
2612
|
|
|
|
|
|
|
|
2613
|
|
|
|
|
|
|
If the value stored in C<< $self->{'ancillary'} >> is an array reference, this |
2614
|
|
|
|
|
|
|
function uses the array to create an actual value. See |
2615
|
|
|
|
|
|
|
L for an explanation of the |
2616
|
|
|
|
|
|
|
array. |
2617
|
|
|
|
|
|
|
|
2618
|
|
|
|
|
|
|
=cut |
2619
|
|
|
|
|
|
|
|
2620
|
|
|
|
|
|
|
sub extrapolate_function { |
2621
|
47
|
|
|
47
|
1
|
39
|
my ( $self, $missing, $expression, $row ) = @_; |
2622
|
47
|
|
|
|
|
27
|
my $value; |
2623
|
47
|
|
|
|
|
58
|
my ( $function, @args ) = @$expression; |
2624
|
47
|
|
|
|
|
31
|
my $regex; |
2625
|
|
|
|
|
|
|
|
2626
|
47
|
100
|
|
|
|
73
|
if ( ref( $args[0] ) eq 'Regexp' ) { |
2627
|
41
|
|
|
|
|
37
|
$regex = shift(@args); |
2628
|
|
|
|
|
|
|
} |
2629
|
47
|
|
|
|
|
53
|
for (@args) { |
2630
|
47
|
|
|
|
|
63
|
$_ = $self->extrapolate_variables( $missing, $_, $row ); |
2631
|
47
|
50
|
|
|
|
79
|
if ( !defined($_) ) { |
2632
|
0
|
|
|
|
|
0
|
return; |
2633
|
|
|
|
|
|
|
} |
2634
|
|
|
|
|
|
|
} ## end for (@args) |
2635
|
47
|
|
|
|
|
67
|
$value = &$function(@args); |
2636
|
47
|
100
|
|
|
|
69
|
if ($regex) { |
2637
|
41
|
50
|
|
|
|
144
|
if ( $value =~ $regex ) { |
2638
|
41
|
|
|
|
|
63
|
$value = $1; |
2639
|
|
|
|
|
|
|
} |
2640
|
|
|
|
|
|
|
} |
2641
|
|
|
|
|
|
|
|
2642
|
47
|
|
|
|
|
99
|
return $value; |
2643
|
|
|
|
|
|
|
} ## end sub extrapolate_function |
2644
|
|
|
|
|
|
|
|
2645
|
|
|
|
|
|
|
=head1 STATIC METHODS |
2646
|
|
|
|
|
|
|
|
2647
|
|
|
|
|
|
|
=head2 strip(@list) |
2648
|
|
|
|
|
|
|
|
2649
|
|
|
|
|
|
|
my @space_filled_lines = (' line1 ', ' line2', 'line3 ', 'line4'); |
2650
|
|
|
|
|
|
|
strip(@space_filled_lines); |
2651
|
|
|
|
|
|
|
print @space_filled_lines; #line1line2line3line4 |
2652
|
|
|
|
|
|
|
|
2653
|
|
|
|
|
|
|
Runs through the list and removes leading and trailing whitespace. All changes |
2654
|
|
|
|
|
|
|
are made in place. |
2655
|
|
|
|
|
|
|
|
2656
|
|
|
|
|
|
|
It is literally this: |
2657
|
|
|
|
|
|
|
|
2658
|
|
|
|
|
|
|
sub strip { |
2659
|
|
|
|
|
|
|
s/^\s+|\s+$//g for @_; |
2660
|
|
|
|
|
|
|
} |
2661
|
|
|
|
|
|
|
|
2662
|
|
|
|
|
|
|
=cut |
2663
|
|
|
|
|
|
|
|
2664
|
|
|
|
|
|
|
# Not an object method! |
2665
|
|
|
|
|
|
|
sub strip { |
2666
|
8438
|
|
|
8438
|
1
|
30754
|
s/^\s+|\s+$//g for @_; |
2667
|
|
|
|
|
|
|
} |
2668
|
|
|
|
|
|
|
|
2669
|
|
|
|
|
|
|
=head2 julian_to_greg($yyyyjjj) |
2670
|
|
|
|
|
|
|
|
2671
|
|
|
|
|
|
|
Converts a date in the day of year format YYYYJJJ into YYYYMMDD. Returns the |
2672
|
|
|
|
|
|
|
newly formatted string or undefined if the input does not match the required |
2673
|
|
|
|
|
|
|
format. |
2674
|
|
|
|
|
|
|
|
2675
|
|
|
|
|
|
|
This uses the C function from C to do the heavy |
2676
|
|
|
|
|
|
|
lifting. |
2677
|
|
|
|
|
|
|
|
2678
|
|
|
|
|
|
|
=cut |
2679
|
|
|
|
|
|
|
|
2680
|
|
|
|
|
|
|
# Not an object method! |
2681
|
|
|
|
|
|
|
sub julian_to_greg { |
2682
|
14
|
|
|
14
|
1
|
13
|
my ($yyyyjjj) = @_; |
2683
|
14
|
50
|
|
|
|
39
|
if ( $yyyyjjj =~ /^(\d{4})(\d{3})$/ ) { |
2684
|
14
|
|
|
|
|
72
|
my ( $y, $m, $d ) = Add_Delta_Days( $1, 1, 1, $2 - 1 ); |
2685
|
14
|
|
|
|
|
42
|
return sprintf( '%04d%02d%02d', $y, $m, $d ); |
2686
|
|
|
|
|
|
|
} |
2687
|
0
|
|
|
|
|
|
return; |
2688
|
|
|
|
|
|
|
} ## end sub julian_to_greg |
2689
|
|
|
|
|
|
|
|
2690
|
|
|
|
|
|
|
=head1 CAVEATS/ODDITIES |
2691
|
|
|
|
|
|
|
|
2692
|
|
|
|
|
|
|
=head2 Duplicate Fields |
2693
|
|
|
|
|
|
|
|
2694
|
|
|
|
|
|
|
This class will not allow a field to be added to the object if a field of the |
2695
|
|
|
|
|
|
|
same name already exists. If a file being read has duplicate field names, only |
2696
|
|
|
|
|
|
|
the B one is used. No warning is issued. If C is used to |
2697
|
|
|
|
|
|
|
remove it, only the first instance will be deleted. To delete all instances, |
2698
|
|
|
|
|
|
|
use C<< $sb_file->remove_field($sb_file->find_fields('chl')) >>. This may |
2699
|
|
|
|
|
|
|
change in future releases. |
2700
|
|
|
|
|
|
|
|
2701
|
|
|
|
|
|
|
=head2 Changing Delimiter or Missing Value |
2702
|
|
|
|
|
|
|
|
2703
|
|
|
|
|
|
|
Modifying the delimiter header on a file that is being read will cause any |
2704
|
|
|
|
|
|
|
non-cached rows to be split by the new delimiter, which should break most/all |
2705
|
|
|
|
|
|
|
files. If the delimiter must be changed, call C to cache all the rows, |
2706
|
|
|
|
|
|
|
then change it. This will obviously not work if caching is turned off. The |
2707
|
|
|
|
|
|
|
same is true for setting the missing value, but only really applies when the |
2708
|
|
|
|
|
|
|
C option is used (same goes to below detection limit). |
2709
|
|
|
|
|
|
|
|
2710
|
|
|
|
|
|
|
=head2 Below Detection Limit |
2711
|
|
|
|
|
|
|
|
2712
|
|
|
|
|
|
|
Below detection limit is only partially supported. If C is |
2713
|
|
|
|
|
|
|
used, fields equal to C will be set to C, as |
2714
|
|
|
|
|
|
|
well. Files modified while using C will have all data |
2715
|
|
|
|
|
|
|
equal to C written out set to the missing value instead |
2716
|
|
|
|
|
|
|
of the below detection limit value. If the below detection limit value is equal |
2717
|
|
|
|
|
|
|
to the missing value or C is used, the |
2718
|
|
|
|
|
|
|
C header will not be written. |
2719
|
|
|
|
|
|
|
|
2720
|
|
|
|
|
|
|
=head1 AUTHOR |
2721
|
|
|
|
|
|
|
|
2722
|
|
|
|
|
|
|
Jason Lefler, C<< >> |
2723
|
|
|
|
|
|
|
|
2724
|
|
|
|
|
|
|
=head1 BUGS |
2725
|
|
|
|
|
|
|
|
2726
|
|
|
|
|
|
|
Please report any bugs or feature requests to C
|
2727
|
|
|
|
|
|
|
rt.cpan.org>, or through the web interface at |
2728
|
|
|
|
|
|
|
L. I will be |
2729
|
|
|
|
|
|
|
notified, and then you'll automatically be notified of progress on your bug as |
2730
|
|
|
|
|
|
|
I make changes. |
2731
|
|
|
|
|
|
|
|
2732
|
|
|
|
|
|
|
=head1 SUPPORT |
2733
|
|
|
|
|
|
|
|
2734
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
2735
|
|
|
|
|
|
|
|
2736
|
|
|
|
|
|
|
perldoc SeaBASS::File |
2737
|
|
|
|
|
|
|
|
2738
|
|
|
|
|
|
|
You can also look for information at: |
2739
|
|
|
|
|
|
|
|
2740
|
|
|
|
|
|
|
=over 4 |
2741
|
|
|
|
|
|
|
|
2742
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
2743
|
|
|
|
|
|
|
|
2744
|
|
|
|
|
|
|
L |
2745
|
|
|
|
|
|
|
|
2746
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
2747
|
|
|
|
|
|
|
|
2748
|
|
|
|
|
|
|
L |
2749
|
|
|
|
|
|
|
|
2750
|
|
|
|
|
|
|
=item * CPAN Ratings |
2751
|
|
|
|
|
|
|
|
2752
|
|
|
|
|
|
|
L |
2753
|
|
|
|
|
|
|
|
2754
|
|
|
|
|
|
|
=item * Search CPAN |
2755
|
|
|
|
|
|
|
|
2756
|
|
|
|
|
|
|
L |
2757
|
|
|
|
|
|
|
|
2758
|
|
|
|
|
|
|
=back |
2759
|
|
|
|
|
|
|
|
2760
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
2761
|
|
|
|
|
|
|
|
2762
|
|
|
|
|
|
|
Copyright 2014 Jason Lefler. |
2763
|
|
|
|
|
|
|
|
2764
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under |
2765
|
|
|
|
|
|
|
the terms of either: the GNU General Public License as published by the Free |
2766
|
|
|
|
|
|
|
Software Foundation; or the Artistic License. |
2767
|
|
|
|
|
|
|
|
2768
|
|
|
|
|
|
|
See http://dev.perl.org/licenses/ for more information. |
2769
|
|
|
|
|
|
|
|
2770
|
|
|
|
|
|
|
=cut |
2771
|
|
|
|
|
|
|
|
2772
|
|
|
|
|
|
|
1; # End of SeaBASS::File |