line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Date::Reformat; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
Date::Reformat - Rearrange date strings |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
use Date::Reformat; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
my $parser = Date::Reformat->new( |
12
|
|
|
|
|
|
|
parser => { |
13
|
|
|
|
|
|
|
regex => qr/^(\d{4})-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)$/, |
14
|
|
|
|
|
|
|
params => [qw(year month day hour minute second)], |
15
|
|
|
|
|
|
|
}, |
16
|
|
|
|
|
|
|
defaults => { |
17
|
|
|
|
|
|
|
time_zone => 'America/New_York', |
18
|
|
|
|
|
|
|
}, |
19
|
|
|
|
|
|
|
transformations => [ |
20
|
|
|
|
|
|
|
{ |
21
|
|
|
|
|
|
|
from => 'year', |
22
|
|
|
|
|
|
|
to => 'century', |
23
|
|
|
|
|
|
|
coderef => sub { int($_[0] / 100) }, |
24
|
|
|
|
|
|
|
}, |
25
|
|
|
|
|
|
|
], |
26
|
|
|
|
|
|
|
formatter => { |
27
|
|
|
|
|
|
|
sprintf => '%s-%02d-%02dT%02d:%02d:02d %s', |
28
|
|
|
|
|
|
|
params => [qw(year month day hour minute second time_zone)], |
29
|
|
|
|
|
|
|
}, |
30
|
|
|
|
|
|
|
); |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
my $parser = Date::Reformat->new( |
33
|
|
|
|
|
|
|
parser => { |
34
|
|
|
|
|
|
|
strptime => '%Y-%m-%dT%M:%H:%S', |
35
|
|
|
|
|
|
|
# or heuristic => 'ymd', # http://www.postgresql.org/docs/9.2/static/datetime-input-rules.html |
36
|
|
|
|
|
|
|
}, |
37
|
|
|
|
|
|
|
defaults => { |
38
|
|
|
|
|
|
|
time_zone => 'America/New_York', |
39
|
|
|
|
|
|
|
}, |
40
|
|
|
|
|
|
|
formatter => { |
41
|
|
|
|
|
|
|
strftime => '%Y-%m-%dT%M:%H:%S %Z', |
42
|
|
|
|
|
|
|
# or data_structure => 'hashref' || 'hash' || 'arrayref' || 'array' |
43
|
|
|
|
|
|
|
# or coderef => sub { my ($y, $m, $d) = @_; DateTime->new(year => $y, month => $m, day => $d) }, |
44
|
|
|
|
|
|
|
# params => [qw(year month day)], |
45
|
|
|
|
|
|
|
}, |
46
|
|
|
|
|
|
|
); |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
my $reformatted_string = $parser->reformat_date($date_string); |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head1 DESCRIPTION |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
This module aims to be a lightweight and flexible tool for rearranging |
53
|
|
|
|
|
|
|
components of a date string, then returning the components in the order |
54
|
|
|
|
|
|
|
and structure specified. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=cut |
57
|
|
|
|
|
|
|
|
58
|
11
|
|
|
11
|
|
197296
|
use 5.010000; |
|
11
|
|
|
|
|
37
|
|
|
11
|
|
|
|
|
418
|
|
59
|
11
|
|
|
11
|
|
54
|
use strict; |
|
11
|
|
|
|
|
17
|
|
|
11
|
|
|
|
|
335
|
|
60
|
11
|
|
|
11
|
|
66
|
use warnings; |
|
11
|
|
|
|
|
17
|
|
|
11
|
|
|
|
|
17763
|
|
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
our $VERSION = '0.02'; |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
my $MONTH_LOOKUP = { |
65
|
|
|
|
|
|
|
}; |
66
|
|
|
|
|
|
|
{ |
67
|
|
|
|
|
|
|
# Lookups for month abbreviations. |
68
|
|
|
|
|
|
|
my $c = 0; |
69
|
|
|
|
|
|
|
foreach my $abbr (qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)) { |
70
|
|
|
|
|
|
|
$MONTH_LOOKUP->{'abbr'}->{lc($abbr)} = ++$c; |
71
|
|
|
|
|
|
|
$MONTH_LOOKUP->{'number'}->{$c}->{'abbr'} = $abbr; |
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
my $TOKENS = { |
76
|
|
|
|
|
|
|
'year' => { |
77
|
|
|
|
|
|
|
'regex' => q/(?\d{4})/, |
78
|
|
|
|
|
|
|
'sprintf' => '%04d', |
79
|
|
|
|
|
|
|
}, |
80
|
|
|
|
|
|
|
'year_abbr' => { |
81
|
|
|
|
|
|
|
'regex' => q/(?\d{2})/, |
82
|
|
|
|
|
|
|
'sprintf' => '%02d', |
83
|
|
|
|
|
|
|
}, |
84
|
|
|
|
|
|
|
'month' => { |
85
|
|
|
|
|
|
|
'regex' => q/(?\d\d?)/, |
86
|
|
|
|
|
|
|
'sprintf' => '%02d', |
87
|
|
|
|
|
|
|
}, |
88
|
|
|
|
|
|
|
'month_no_padding' => { |
89
|
|
|
|
|
|
|
'regex' => q/(?\d\d?)/, |
90
|
|
|
|
|
|
|
'sprintf' => '%d', |
91
|
|
|
|
|
|
|
'storage' => 'month', |
92
|
|
|
|
|
|
|
}, |
93
|
|
|
|
|
|
|
'month_name' => { |
94
|
|
|
|
|
|
|
'regex' => q/(?January|February|March|April|May|June|July|August|September|October|November|December)/, |
95
|
|
|
|
|
|
|
'sprintf' => '%s', |
96
|
|
|
|
|
|
|
}, |
97
|
|
|
|
|
|
|
'month_abbr' => { |
98
|
|
|
|
|
|
|
'regex' => q/(?Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)/, |
99
|
|
|
|
|
|
|
'sprintf' => '%s', |
100
|
|
|
|
|
|
|
}, |
101
|
|
|
|
|
|
|
'day' => { |
102
|
|
|
|
|
|
|
'regex' => q/(?\d\d?)/, |
103
|
|
|
|
|
|
|
'sprintf' => '%02d', |
104
|
|
|
|
|
|
|
}, |
105
|
|
|
|
|
|
|
'day_name' => { |
106
|
|
|
|
|
|
|
'regex' => q/(?Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)/, |
107
|
|
|
|
|
|
|
'sprintf' => '%s', |
108
|
|
|
|
|
|
|
}, |
109
|
|
|
|
|
|
|
'day_abbr' => { |
110
|
|
|
|
|
|
|
'regex' => q/(?Mon|Tues?|Wed|Thur?|Fri|Sat|Sun)/, |
111
|
|
|
|
|
|
|
'sprintf' => '%s', |
112
|
|
|
|
|
|
|
}, |
113
|
|
|
|
|
|
|
'day_of_year' => { |
114
|
|
|
|
|
|
|
'regex' => q/(?\d\d?\d?)/, |
115
|
|
|
|
|
|
|
'sprintf' => '%03d', |
116
|
|
|
|
|
|
|
}, |
117
|
|
|
|
|
|
|
'julian_day' => { |
118
|
|
|
|
|
|
|
'regex' => q/J(?\d+)/, |
119
|
|
|
|
|
|
|
'sprintf' => '%s', |
120
|
|
|
|
|
|
|
'constraint' => sub { $_[0] >= 0 }, |
121
|
|
|
|
|
|
|
}, |
122
|
|
|
|
|
|
|
'era_abbr' => { |
123
|
|
|
|
|
|
|
'regex' => q/(?BC|AD|BCE|CE)/, |
124
|
|
|
|
|
|
|
'sprintf' => '%s', |
125
|
|
|
|
|
|
|
}, |
126
|
|
|
|
|
|
|
'hour' => { |
127
|
|
|
|
|
|
|
'regex' => q/(?\d\d?)/, |
128
|
|
|
|
|
|
|
'sprintf' => '%02d', |
129
|
|
|
|
|
|
|
'constraint' => sub { $_[0] >= 0 && $_[0] < 24 }, |
130
|
|
|
|
|
|
|
}, |
131
|
|
|
|
|
|
|
'hour_12' => { |
132
|
|
|
|
|
|
|
'regex' => q/(?\d\d?)/, |
133
|
|
|
|
|
|
|
'sprintf' => '%d', |
134
|
|
|
|
|
|
|
}, |
135
|
|
|
|
|
|
|
'minute' => { |
136
|
|
|
|
|
|
|
'regex' => q/(?\d\d)/, |
137
|
|
|
|
|
|
|
'sprintf' => '%02d', |
138
|
|
|
|
|
|
|
'constraint' => sub { $_[0] >= 0 && $_[0] < 60 }, |
139
|
|
|
|
|
|
|
}, |
140
|
|
|
|
|
|
|
'second' => { |
141
|
|
|
|
|
|
|
'regex' => q/(?\d\d)/, |
142
|
|
|
|
|
|
|
'sprintf' => '%02d', |
143
|
|
|
|
|
|
|
}, |
144
|
|
|
|
|
|
|
'am_or_pm' => { |
145
|
|
|
|
|
|
|
'regex' => q/(?(?i)[ap]\.?m\.?)/, |
146
|
|
|
|
|
|
|
'sprintf' => '%s', |
147
|
|
|
|
|
|
|
}, |
148
|
|
|
|
|
|
|
'time_zone' => { |
149
|
|
|
|
|
|
|
'regex' => q/(?Z|UTC|[[:alpha:]]{3,}(?:\/[[:alpha:]]+)?)/, |
150
|
|
|
|
|
|
|
'sprintf' => '%s', |
151
|
|
|
|
|
|
|
}, |
152
|
|
|
|
|
|
|
'time_zone_offset' => { |
153
|
|
|
|
|
|
|
'regex' => q|(?[-+]\d\d?(?:\d\d)?)|, |
154
|
|
|
|
|
|
|
'sprintf' => '%s', |
155
|
|
|
|
|
|
|
}, |
156
|
|
|
|
|
|
|
'phrase' => { |
157
|
|
|
|
|
|
|
'regex' => q/(?(?i)today|tomorrow|yesterday|(?:next|last)\w+(?:week|month|year)|\d+\w+(?:seconds?|minutes?|hours?|days?|weeks?|months?|years?)\w+(?:ago|from\w+now))/, |
158
|
|
|
|
|
|
|
'sprintf' => '%s', |
159
|
|
|
|
|
|
|
}, |
160
|
|
|
|
|
|
|
}; |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
my $STRPTIME_PREPROCESS = [ |
163
|
|
|
|
|
|
|
{ |
164
|
|
|
|
|
|
|
'token' => '%c', |
165
|
|
|
|
|
|
|
'replacement' => '%c', # TODO: Perhaps use Scalar::Defer, and look up locale datetime format only if needed. |
166
|
|
|
|
|
|
|
}, |
167
|
|
|
|
|
|
|
{ |
168
|
|
|
|
|
|
|
'token' => '%D', |
169
|
|
|
|
|
|
|
'replacement' => '%m/%d/%y', |
170
|
|
|
|
|
|
|
}, |
171
|
|
|
|
|
|
|
{ |
172
|
|
|
|
|
|
|
'token' => '%F', |
173
|
|
|
|
|
|
|
'replacement' => '%Y-%m-%d', |
174
|
|
|
|
|
|
|
}, |
175
|
|
|
|
|
|
|
{ |
176
|
|
|
|
|
|
|
'token' => '%R', |
177
|
|
|
|
|
|
|
'replacement' => '%H:%M', |
178
|
|
|
|
|
|
|
}, |
179
|
|
|
|
|
|
|
{ |
180
|
|
|
|
|
|
|
'token' => '%r', |
181
|
|
|
|
|
|
|
'replacement' => '%I:%M:%S %p', # TODO: This may be affected by locale. |
182
|
|
|
|
|
|
|
}, |
183
|
|
|
|
|
|
|
{ |
184
|
|
|
|
|
|
|
'token' => '%T', |
185
|
|
|
|
|
|
|
'replacement' => '%H:%M:%S', |
186
|
|
|
|
|
|
|
}, |
187
|
|
|
|
|
|
|
{ |
188
|
|
|
|
|
|
|
'token' => '%X', |
189
|
|
|
|
|
|
|
'replacement' => '%X', # TODO: Perhaps use Scalar::Defer, and look up locale time format only if needed. |
190
|
|
|
|
|
|
|
}, |
191
|
|
|
|
|
|
|
{ |
192
|
|
|
|
|
|
|
'token' => '%x', |
193
|
|
|
|
|
|
|
'replacement' => '%x', # TODO: Perhaps use Scalar::Defer, and look up locale date format only if needed. |
194
|
|
|
|
|
|
|
}, |
195
|
|
|
|
|
|
|
]; |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
my $STRPTIME_POSTPROCESS = [ |
198
|
|
|
|
|
|
|
{ |
199
|
|
|
|
|
|
|
'token' => '%n', |
200
|
|
|
|
|
|
|
'replacement' => '\s+', |
201
|
|
|
|
|
|
|
}, |
202
|
|
|
|
|
|
|
{ |
203
|
|
|
|
|
|
|
'token' => '%t', |
204
|
|
|
|
|
|
|
'replacement' => '\s+', |
205
|
|
|
|
|
|
|
}, |
206
|
|
|
|
|
|
|
{ |
207
|
|
|
|
|
|
|
'token' => '%%', |
208
|
|
|
|
|
|
|
'replacement' => quotemeta('%'), |
209
|
|
|
|
|
|
|
}, |
210
|
|
|
|
|
|
|
]; |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
my $STRFTIME_POSTPROCESS = [ |
213
|
|
|
|
|
|
|
{ |
214
|
|
|
|
|
|
|
'token' => '%n', |
215
|
|
|
|
|
|
|
'replacement' => "\n", |
216
|
|
|
|
|
|
|
}, |
217
|
|
|
|
|
|
|
{ |
218
|
|
|
|
|
|
|
'token' => '%t', |
219
|
|
|
|
|
|
|
'replacement' => "\t", |
220
|
|
|
|
|
|
|
}, |
221
|
|
|
|
|
|
|
]; |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
my $DEFAULT_STRPTIME_MAPPINGS = { |
224
|
|
|
|
|
|
|
'%A' => 'day_name', # TODO |
225
|
|
|
|
|
|
|
'%a' => 'day_abbr', |
226
|
|
|
|
|
|
|
'%B' => 'month_name', # TODO |
227
|
|
|
|
|
|
|
'%b' => 'month_abbr', |
228
|
|
|
|
|
|
|
'%C' => 'century', # TODO |
229
|
|
|
|
|
|
|
'%d' => 'day', |
230
|
|
|
|
|
|
|
'%e' => 'day', # TODO: This one is space-padded. |
231
|
|
|
|
|
|
|
'%G' => 'week_year', # TODO |
232
|
|
|
|
|
|
|
'%g' => 'week_year_abbr', # TODO |
233
|
|
|
|
|
|
|
'%H' => 'hour', |
234
|
|
|
|
|
|
|
'%h' => 'month_abbr', |
235
|
|
|
|
|
|
|
'%I' => 'hour_12', |
236
|
|
|
|
|
|
|
'%j' => 'day_of_year', |
237
|
|
|
|
|
|
|
'%k' => 'hour', # TODO: This one is space-padded. |
238
|
|
|
|
|
|
|
'%l' => 'hour_12', # TODO: This one is space-padded. |
239
|
|
|
|
|
|
|
'%M' => 'minute', |
240
|
|
|
|
|
|
|
'%m' => 'month', |
241
|
|
|
|
|
|
|
'%-m' => 'month_no_padding', |
242
|
|
|
|
|
|
|
'%N' => 'fractional_seconds', # TODO |
243
|
|
|
|
|
|
|
'%P' => 'am_or_pm', |
244
|
|
|
|
|
|
|
'%p' => 'am_or_pm', # TODO: This is uppercase. |
245
|
|
|
|
|
|
|
'%S' => 'second', |
246
|
|
|
|
|
|
|
'%s' => 'epoch', # TODO |
247
|
|
|
|
|
|
|
'%U' => 'week_number_0', # TODO |
248
|
|
|
|
|
|
|
'%u' => 'day_of_week', # TODO |
249
|
|
|
|
|
|
|
'%V' => 'week_number', # TODO |
250
|
|
|
|
|
|
|
'%W' => 'week_number_1', # TODO |
251
|
|
|
|
|
|
|
'%w' => 'day_of_week_0', # TODO |
252
|
|
|
|
|
|
|
'%Y' => 'year', |
253
|
|
|
|
|
|
|
'%y' => 'year_abbr', |
254
|
|
|
|
|
|
|
'%Z' => 'time_zone', |
255
|
|
|
|
|
|
|
'%z' => 'time_zone_offset', |
256
|
|
|
|
|
|
|
}; |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
my $DEFAULT_STRFTIME_MAPPINGS = { |
259
|
|
|
|
|
|
|
}; |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
my $DEFAULT_TRANSFORMATIONS = { |
262
|
|
|
|
|
|
|
# to => { |
263
|
|
|
|
|
|
|
# from => \&transformation_coderef, |
264
|
|
|
|
|
|
|
# }, |
265
|
|
|
|
|
|
|
'year' => { |
266
|
|
|
|
|
|
|
'year_abbr' => sub { |
267
|
|
|
|
|
|
|
my ($date) = @_; |
268
|
|
|
|
|
|
|
return $date->{'year'} if defined($date->{'year'}); |
269
|
|
|
|
|
|
|
return $date->{'year_abbr'} < 70 |
270
|
|
|
|
|
|
|
? $date->{'year_abbr'} + 2000 |
271
|
|
|
|
|
|
|
: $date->{'year_abbr'} + 1900; |
272
|
|
|
|
|
|
|
}, |
273
|
|
|
|
|
|
|
}, |
274
|
|
|
|
|
|
|
'year_abbr' => { |
275
|
|
|
|
|
|
|
'year' => sub { |
276
|
|
|
|
|
|
|
my ($date) = @_; |
277
|
|
|
|
|
|
|
return $date->{'year_abbr'} if defined($date->{'year_abbr'}); |
278
|
|
|
|
|
|
|
return substr($date->{'year'}, -2, 2); |
279
|
|
|
|
|
|
|
}, |
280
|
|
|
|
|
|
|
}, |
281
|
|
|
|
|
|
|
'month' => { |
282
|
|
|
|
|
|
|
'month_abbr' => sub { |
283
|
|
|
|
|
|
|
my ($date) = @_; |
284
|
|
|
|
|
|
|
return $date->{'month'} if defined($date->{'month'}); |
285
|
|
|
|
|
|
|
return $MONTH_LOOKUP->{'abbr'}->{ lc($date->{'month_abbr'}) } // undef; |
286
|
|
|
|
|
|
|
}, |
287
|
|
|
|
|
|
|
}, |
288
|
|
|
|
|
|
|
'month_abbr' => { |
289
|
|
|
|
|
|
|
'month' => sub { |
290
|
|
|
|
|
|
|
my ($date) = @_; |
291
|
|
|
|
|
|
|
return $date->{'month_abbr'} if defined($date->{'month_abbr'}); |
292
|
|
|
|
|
|
|
return $MONTH_LOOKUP->{'number'}->{ $date->{'month'}+0 }->{'abbr'} // undef; |
293
|
|
|
|
|
|
|
}, |
294
|
|
|
|
|
|
|
}, |
295
|
|
|
|
|
|
|
'hour' => { |
296
|
|
|
|
|
|
|
'hour_12' => sub { |
297
|
|
|
|
|
|
|
my ($date) = @_; |
298
|
|
|
|
|
|
|
return $date->{'hour'} if defined($date->{'hour'}); |
299
|
|
|
|
|
|
|
if (lc($date->{'am_or_pm'}) eq 'pm') { |
300
|
|
|
|
|
|
|
return $date->{'hour_12'} == 12 |
301
|
|
|
|
|
|
|
? $date->{'hour_12'} |
302
|
|
|
|
|
|
|
: $date->{'hour_12'} + 12; |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
return $date->{'hour_12'} == 12 |
305
|
|
|
|
|
|
|
? 0 |
306
|
|
|
|
|
|
|
: $date->{'hour_12'}; |
307
|
|
|
|
|
|
|
}, |
308
|
|
|
|
|
|
|
}, |
309
|
|
|
|
|
|
|
'hour_12' => { |
310
|
|
|
|
|
|
|
'hour' => sub { |
311
|
|
|
|
|
|
|
my ($date) = @_; |
312
|
|
|
|
|
|
|
return $date->{'hour_12'} if defined($date->{'hour_12'}); |
313
|
|
|
|
|
|
|
if ($date->{'hour'} == 0) { |
314
|
|
|
|
|
|
|
return 12; |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
return $date->{'hour'} < 13 |
317
|
|
|
|
|
|
|
? $date->{'hour'} |
318
|
|
|
|
|
|
|
: $date->{'hour'} - 12; |
319
|
|
|
|
|
|
|
}, |
320
|
|
|
|
|
|
|
}, |
321
|
|
|
|
|
|
|
'am_or_pm' => { |
322
|
|
|
|
|
|
|
'hour' => sub { |
323
|
|
|
|
|
|
|
my ($date) = @_; |
324
|
|
|
|
|
|
|
return $date->{'am_or_pm'} if defined($date->{'am_or_pm'}); |
325
|
|
|
|
|
|
|
if ($date->{'hour'} == 0) { |
326
|
|
|
|
|
|
|
return 'am'; |
327
|
|
|
|
|
|
|
} |
328
|
|
|
|
|
|
|
return $date->{'hour'} >= 12 |
329
|
|
|
|
|
|
|
? 'pm' |
330
|
|
|
|
|
|
|
: 'am'; |
331
|
|
|
|
|
|
|
}, |
332
|
|
|
|
|
|
|
}, |
333
|
|
|
|
|
|
|
}; |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=head2 METHODS |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=over 4 |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
=item new() |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=cut |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
sub new { |
344
|
161
|
|
|
161
|
1
|
139551
|
my ($class, %args) = @_; |
345
|
161
|
|
|
|
|
682
|
my $self = bless {}, $class; |
346
|
161
|
|
|
|
|
273
|
foreach my $parameter ( |
347
|
|
|
|
|
|
|
'debug', |
348
|
|
|
|
|
|
|
'parser', |
349
|
|
|
|
|
|
|
'formatter', |
350
|
|
|
|
|
|
|
'transformations', |
351
|
|
|
|
|
|
|
'defaults', |
352
|
|
|
|
|
|
|
) |
353
|
|
|
|
|
|
|
{ |
354
|
805
|
|
|
|
|
1354
|
my $method = 'initialize_' . $parameter; |
355
|
805
|
|
|
|
|
2414
|
$self->$method($args{$parameter}); |
356
|
|
|
|
|
|
|
} |
357
|
161
|
|
|
|
|
701
|
return $self; |
358
|
|
|
|
|
|
|
} |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
=item initialize_parser() |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
=cut |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
sub initialize_parser { |
365
|
161
|
|
|
161
|
1
|
225
|
my ($self, $definition) = @_; |
366
|
|
|
|
|
|
|
# TODO: Verify $definition is a hashref with one of the approved parser parameters (regex, strptime, etc.). |
367
|
161
|
100
|
|
|
|
503
|
if (defined($definition->{'regex'})) { |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# Initialize the right kind of regex parser (simple capture or named capture). |
370
|
4
|
100
|
|
|
|
10
|
if (defined($definition->{'params'})) { |
371
|
3
|
|
|
|
|
9
|
return $self->initialize_parser_for_regex_with_params( |
372
|
|
|
|
|
|
|
{ |
373
|
|
|
|
|
|
|
'regex' => $definition->{'regex'}, |
374
|
|
|
|
|
|
|
'params' => $definition->{'params'}, |
375
|
|
|
|
|
|
|
} |
376
|
|
|
|
|
|
|
); |
377
|
|
|
|
|
|
|
} |
378
|
1
|
|
|
|
|
5
|
return $self->initialize_parser_for_regex_named_capture( |
379
|
|
|
|
|
|
|
{ |
380
|
|
|
|
|
|
|
'regex' => $definition->{'regex'}, |
381
|
|
|
|
|
|
|
}, |
382
|
|
|
|
|
|
|
); |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
|
386
|
157
|
100
|
|
|
|
376
|
if (defined($definition->{'strptime'})) { |
387
|
10
|
|
|
|
|
38
|
return $self->initialize_parser_for_strptime( |
388
|
|
|
|
|
|
|
{ |
389
|
|
|
|
|
|
|
'strptime' => $definition->{'strptime'}, |
390
|
|
|
|
|
|
|
}, |
391
|
|
|
|
|
|
|
); |
392
|
|
|
|
|
|
|
} |
393
|
|
|
|
|
|
|
|
394
|
147
|
100
|
|
|
|
287
|
if (defined($definition->{'heuristic'})) { |
395
|
132
|
|
|
|
|
453
|
return $self->initialize_parser_heuristic( |
396
|
|
|
|
|
|
|
{ |
397
|
|
|
|
|
|
|
'heuristic' => $definition->{'heuristic'}, |
398
|
|
|
|
|
|
|
}, |
399
|
|
|
|
|
|
|
); |
400
|
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
# Nothing initialized. |
403
|
15
|
|
|
|
|
29
|
return; |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
=item initialize_formatter() |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
=cut |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
sub initialize_formatter { |
411
|
161
|
|
|
161
|
1
|
193
|
my ($self, $definition) = @_; |
412
|
|
|
|
|
|
|
# TODO: Verify $definition is a hashref with one of the approved formatter parameters (sprintf, strftime, etc.). |
413
|
161
|
100
|
|
|
|
353
|
if (defined($definition->{'sprintf'})) { |
414
|
5
|
|
|
|
|
24
|
return $self->initialize_formatter_for_sprintf( |
415
|
|
|
|
|
|
|
{ |
416
|
|
|
|
|
|
|
'sprintf' => $definition->{'sprintf'}, |
417
|
|
|
|
|
|
|
'params' => $definition->{'params'}, |
418
|
|
|
|
|
|
|
}, |
419
|
|
|
|
|
|
|
); |
420
|
|
|
|
|
|
|
} |
421
|
|
|
|
|
|
|
|
422
|
156
|
100
|
|
|
|
303
|
if (defined($definition->{'strftime'})) { |
423
|
8
|
|
|
|
|
30
|
return $self->initialize_formatter_for_strftime( |
424
|
|
|
|
|
|
|
{ |
425
|
|
|
|
|
|
|
'strftime' => $definition->{'strftime'}, |
426
|
|
|
|
|
|
|
}, |
427
|
|
|
|
|
|
|
); |
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
|
430
|
148
|
100
|
|
|
|
322
|
if (defined($definition->{'structure'})) { |
431
|
6
|
100
|
|
|
|
20
|
if ($definition->{'structure'} =~ /^hash(?:ref)?$/) { |
432
|
3
|
|
|
|
|
15
|
return $self->initialize_formatter_for_hashref( |
433
|
|
|
|
|
|
|
{ |
434
|
|
|
|
|
|
|
'structure' => $definition->{'structure'}, |
435
|
|
|
|
|
|
|
'params' => $definition->{'params'}, |
436
|
|
|
|
|
|
|
}, |
437
|
|
|
|
|
|
|
); |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
|
440
|
3
|
50
|
|
|
|
12
|
if ($definition->{'structure'} =~ /^array(?:ref)?$/) { |
441
|
3
|
|
|
|
|
16
|
return $self->initialize_formatter_for_arrayref( |
442
|
|
|
|
|
|
|
{ |
443
|
|
|
|
|
|
|
'structure' => $definition->{'structure'}, |
444
|
|
|
|
|
|
|
'params' => $definition->{'params'}, |
445
|
|
|
|
|
|
|
}, |
446
|
|
|
|
|
|
|
); |
447
|
|
|
|
|
|
|
} |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
|
450
|
142
|
100
|
|
|
|
353
|
if (defined($definition->{'coderef'})) { |
451
|
1
|
|
|
|
|
5
|
return $self->initialize_formatter_for_coderef( |
452
|
|
|
|
|
|
|
{ |
453
|
|
|
|
|
|
|
'coderef' => $definition->{'coderef'}, |
454
|
|
|
|
|
|
|
'params' => $definition->{'params'}, |
455
|
|
|
|
|
|
|
}, |
456
|
|
|
|
|
|
|
); |
457
|
|
|
|
|
|
|
} |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
# Nothing initialized. |
460
|
141
|
|
|
|
|
228
|
return; |
461
|
|
|
|
|
|
|
} |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
=item initialize_transformations() |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=cut |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
sub initialize_transformations { |
468
|
161
|
|
|
161
|
1
|
297
|
my ($self, $transformations) = @_; |
469
|
|
|
|
|
|
|
# TODO: Verify $transformations is an arrayref. |
470
|
161
|
|
|
|
|
164
|
my $count = 0; |
471
|
161
|
|
|
|
|
375
|
foreach my $t (@$transformations) { |
472
|
1
|
|
|
|
|
3
|
$self->{'transformations'}->{$t->{'to'}}->{$t->{'from'}} = $t->{'transformation'}; |
473
|
1
|
|
|
|
|
2
|
$count++; |
474
|
|
|
|
|
|
|
} |
475
|
161
|
|
|
|
|
324
|
return $count; |
476
|
|
|
|
|
|
|
} |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
=item initialize_defaults() |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
=cut |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
sub initialize_defaults { |
483
|
161
|
|
|
161
|
1
|
216
|
my ($self, $args) = @_; |
484
|
|
|
|
|
|
|
# TODO: Verify $args is a hashref. |
485
|
161
|
|
100
|
|
|
1152
|
return $self->{'defaults'} = $args // {}; |
486
|
|
|
|
|
|
|
} |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
=item initialize_debug() |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
=cut |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
sub initialize_debug { |
493
|
161
|
|
|
161
|
1
|
220
|
my ($self, $value) = @_; |
494
|
161
|
|
100
|
|
|
744
|
return $self->{'debug'} = $value // 0; |
495
|
|
|
|
|
|
|
} |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
=item initialize_parser_for_regex_with_params() |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
=cut |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
sub initialize_parser_for_regex_with_params { |
502
|
3
|
|
|
3
|
1
|
4
|
my ($self, $definition) = @_; |
503
|
3
|
|
|
|
|
4
|
my $regex = $definition->{'regex'}; |
504
|
3
|
|
|
|
|
4
|
my $params = $definition->{'params'}; |
505
|
|
|
|
|
|
|
my $success = $self->add_parser( |
506
|
|
|
|
|
|
|
sub { |
507
|
3
|
|
|
3
|
|
4
|
my ($date_string) = @_; |
508
|
3
|
|
|
|
|
26
|
my (@components) = $date_string =~ $regex; |
509
|
3
|
50
|
|
|
|
9
|
return if ! @components; |
510
|
3
|
|
|
|
|
3
|
my %date = (); |
511
|
3
|
|
|
|
|
13
|
@date{@$params} = @components; |
512
|
|
|
|
|
|
|
# TODO: Add named capture values to %date. |
513
|
3
|
|
|
|
|
7
|
return \%date; |
514
|
|
|
|
|
|
|
}, |
515
|
3
|
|
|
|
|
14
|
); |
516
|
3
|
|
|
|
|
8
|
return $success; |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
=item initialize_parser_for_regex_named_capture() |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
=cut |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
sub initialize_parser_for_regex_named_capture { |
524
|
107
|
|
|
107
|
1
|
150
|
my ($self, $definition) = @_; |
525
|
107
|
|
|
|
|
161
|
my $regex = $definition->{'regex'}; |
526
|
|
|
|
|
|
|
my $success = $self->add_parser( |
527
|
|
|
|
|
|
|
sub { |
528
|
11
|
|
|
11
|
|
12
|
my ($date_string) = @_; |
529
|
11
|
|
|
|
|
134
|
my $success = $date_string =~ $regex; |
530
|
11
|
50
|
|
|
|
27
|
return if ! $success; |
531
|
11
|
|
|
11
|
|
6324
|
my %date = %+; |
|
11
|
|
|
|
|
4912
|
|
|
11
|
|
|
|
|
55720
|
|
|
11
|
|
|
|
|
261
|
|
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
# Move 'hour_12' if the wrong value. |
534
|
11
|
50
|
33
|
|
|
73
|
if ( |
|
|
|
66
|
|
|
|
|
535
|
|
|
|
|
|
|
defined($date{'hour_12'}) |
536
|
|
|
|
|
|
|
&& |
537
|
|
|
|
|
|
|
( |
538
|
|
|
|
|
|
|
$date{'hour_12'} > 12 |
539
|
|
|
|
|
|
|
|| |
540
|
|
|
|
|
|
|
$date{'hour_12'} == 0 |
541
|
|
|
|
|
|
|
) |
542
|
|
|
|
|
|
|
) { |
543
|
0
|
|
|
|
|
0
|
$date{'hour'} = delete $date{'hour_12'}; |
544
|
|
|
|
|
|
|
} |
545
|
|
|
|
|
|
|
|
546
|
11
|
|
|
|
|
25
|
return \%date; |
547
|
|
|
|
|
|
|
}, |
548
|
107
|
|
|
|
|
649
|
); |
549
|
107
|
|
|
|
|
141
|
return $success; |
550
|
|
|
|
|
|
|
} |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
=item initialize_parser_for_strptime() |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
=cut |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
sub initialize_parser_for_strptime { |
557
|
10
|
|
|
10
|
1
|
12
|
my ($self, $definition) = @_; |
558
|
10
|
|
|
|
|
16
|
my $strptime = $definition->{'strptime'}; |
559
|
10
|
|
|
|
|
15
|
my $format = $strptime; |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
# Preprocess some tokens that expand into other tokens. |
562
|
10
|
|
|
|
|
17
|
foreach my $preprocess (@$STRPTIME_PREPROCESS) { |
563
|
80
|
|
|
|
|
462
|
$format =~ s/$preprocess->{'token'}/$preprocess->{'replacement'}/g; |
564
|
|
|
|
|
|
|
} |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
# Escape everything in the strptime string so we can turn it into a regex. |
567
|
10
|
|
|
|
|
25
|
$format = quotemeta($format); |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
# Unescape the parts that we will replace as tokens. |
570
|
|
|
|
|
|
|
# regex from DateTime::Format::Strptime |
571
|
10
|
|
|
|
|
74
|
$format =~ s/(?
|
572
|
10
|
|
|
|
|
19
|
$format =~ s/%\\\{([^\}]+)\\\}/%{$1}/g; |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
# Replace expanded tokens: %{year} |
575
|
10
|
|
|
|
|
14
|
$format =~ |
576
|
|
|
|
|
|
|
s/ |
577
|
|
|
|
|
|
|
%{(\w+)} |
578
|
|
|
|
|
|
|
/ |
579
|
0
|
0
|
|
|
|
0
|
$TOKENS->{$1} ? $TOKENS->{$1}->{'regex'} : "\%{$1}" |
580
|
|
|
|
|
|
|
/sgex; |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
# Replace single character tokens: %Y |
583
|
10
|
|
|
|
|
34
|
$format =~ |
584
|
|
|
|
|
|
|
s/ |
585
|
|
|
|
|
|
|
(%[%a-zA-Z]) |
586
|
|
|
|
|
|
|
/ |
587
|
68
|
|
|
|
|
133
|
$self->strptime_token_to_regex($1) |
588
|
|
|
|
|
|
|
/sgex; |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
# Postprocess some tokens that expand into special characters. |
591
|
10
|
|
|
|
|
72
|
foreach my $postprocess (@$STRPTIME_POSTPROCESS) { |
592
|
30
|
|
|
|
|
234
|
$format =~ s/$postprocess->{'token'}/$postprocess->{'replacement'}/g; |
593
|
|
|
|
|
|
|
} |
594
|
|
|
|
|
|
|
|
595
|
10
|
50
|
|
|
|
766
|
say "Crafted regex: $strptime -> $format" if $self->{'debug'}; |
596
|
10
|
|
|
|
|
498
|
my $success = $self->initialize_parser_for_regex_named_capture( |
597
|
|
|
|
|
|
|
{ |
598
|
|
|
|
|
|
|
'regex' => qr/$format/, |
599
|
|
|
|
|
|
|
}, |
600
|
|
|
|
|
|
|
); |
601
|
10
|
|
|
|
|
46
|
return $success; |
602
|
|
|
|
|
|
|
} |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
=item initialize_parser_heuristic() |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
=cut |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
sub initialize_parser_heuristic { |
609
|
132
|
|
|
132
|
1
|
143
|
my ($self, $definition) = @_; |
610
|
132
|
|
|
|
|
184
|
my $hint = $definition->{'heuristic'}; |
611
|
132
|
|
|
|
|
196
|
my $known_parsers = {}; # Populated when we add a parser to the stack in front of this one. |
612
|
132
|
|
|
|
|
585
|
my $regex_for_date = qr{ \w+ [-/\.] \w+ (?:[-/\.] \w+) }x; |
613
|
132
|
|
|
|
|
273
|
my $regex_for_time = qr/ \d\d? : \d\d (?::\d\d) /x; |
614
|
132
|
|
|
|
|
308
|
my $regex_for_time_zone_offset = qr/ [-+] \d\d? (?:\d\d) /x; |
615
|
132
|
|
|
|
|
341
|
my $regex_for_time_zone_long_name = qr{ [[:alpha:]]+ / [[:alpha:]]+ (?:_ [[:alpha:]]+) }x; |
616
|
132
|
|
|
|
|
269
|
my $regex_for_julian_day = qr/ J\d+ /x; |
617
|
132
|
|
|
|
|
460
|
my $regex_for_number = qr/ \d+ /x; |
618
|
132
|
|
|
|
|
310
|
my $regex_for_string = qr/ [[:alpha:]]+ /x; |
619
|
132
|
|
|
|
|
274
|
my $regex_for_whitespace = qr/ \s+ /x; |
620
|
132
|
|
|
|
|
1273
|
my $token_regex = qr{ |
621
|
|
|
|
|
|
|
# time zone offset |
622
|
|
|
|
|
|
|
( $regex_for_time_zone_offset ) |
623
|
|
|
|
|
|
|
# time |
624
|
|
|
|
|
|
|
| ( $regex_for_time ) |
625
|
|
|
|
|
|
|
# time zone long name |
626
|
|
|
|
|
|
|
| ( $regex_for_time_zone_long_name ) |
627
|
|
|
|
|
|
|
# date |
628
|
|
|
|
|
|
|
| ( $regex_for_date ) |
629
|
|
|
|
|
|
|
# Julian day |
630
|
|
|
|
|
|
|
| ( $regex_for_julian_day ) |
631
|
|
|
|
|
|
|
# number |
632
|
|
|
|
|
|
|
| ( $regex_for_number ) |
633
|
|
|
|
|
|
|
# string |
634
|
|
|
|
|
|
|
| ( $regex_for_string ) |
635
|
|
|
|
|
|
|
# whitespace |
636
|
|
|
|
|
|
|
| ( $regex_for_whitespace ) |
637
|
|
|
|
|
|
|
# anything else |
638
|
|
|
|
|
|
|
| ( . ) |
639
|
|
|
|
|
|
|
}x; |
640
|
|
|
|
|
|
|
my $success = $self->add_parser( |
641
|
|
|
|
|
|
|
sub { |
642
|
132
|
|
|
132
|
|
120
|
my ($date_string) = @_; |
643
|
132
|
|
|
|
|
104
|
my $order_string; # Will be set with ymd|dmy|mdy when we have enough information. |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
# Split string into parts that can be identified later. |
646
|
132
|
50
|
|
|
|
2475
|
say "Parsing date string into parts: $date_string" if $self->{'debug'}; |
647
|
132
|
|
|
|
|
2329
|
my @parts = $date_string =~ /$token_regex/g; |
648
|
132
|
50
|
|
|
|
381
|
return if ! @parts; |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
# Try to identify what each part is, based on what it looks like, and what order it is in. |
651
|
132
|
|
|
|
|
204
|
my @parser_parts = (); |
652
|
132
|
|
|
|
|
172
|
my $date = {}; |
653
|
132
|
|
|
|
|
199
|
foreach my $part (grep { defined($_) } @parts) { |
|
3375
|
|
|
|
|
3802
|
|
654
|
343
|
50
|
|
|
|
1555
|
say "Trying to identify part: '$part'" if $self->{'debug'}; |
655
|
343
|
50
|
|
|
|
5287
|
if ($part =~ /^$regex_for_time_zone_offset$/) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
656
|
0
|
0
|
|
|
|
0
|
say " time_zone_offset ($part)" if $self->{'debug'}; |
657
|
0
|
|
|
|
|
0
|
push @parser_parts, $TOKENS->{'time_zone_offset'}->{'regex'}; |
658
|
0
|
|
|
|
|
0
|
$date->{'time_zone_offset'} = $part; |
659
|
|
|
|
|
|
|
} |
660
|
|
|
|
|
|
|
elsif ($part =~ /^$regex_for_time$/) { |
661
|
5
|
|
|
|
|
24
|
my @time = split(/:/, $part); |
662
|
|
|
|
|
|
|
|
663
|
5
|
50
|
|
|
|
26
|
say " hour ($time[0])" if $self->{'debug'}; |
664
|
5
|
|
|
|
|
13
|
push @parser_parts, $TOKENS->{'hour'}->{'regex'}; |
665
|
5
|
|
|
|
|
11
|
$date->{'hour'} = $time[0]; |
666
|
|
|
|
|
|
|
|
667
|
5
|
50
|
|
|
|
23
|
say " minute ($time[1])" if $self->{'debug'}; |
668
|
5
|
|
|
|
|
13
|
push @parser_parts, quotemeta(':'), $TOKENS->{'minute'}->{'regex'}; |
669
|
5
|
|
|
|
|
10
|
$date->{'minute'} = $time[1]; |
670
|
|
|
|
|
|
|
|
671
|
5
|
50
|
|
|
|
13
|
if (@time > 2) { |
672
|
5
|
50
|
|
|
|
18
|
say " second ($time[2])" if $self->{'debug'}; |
673
|
5
|
|
|
|
|
14
|
push @parser_parts, quotemeta(':'), $TOKENS->{'second'}->{'regex'}; |
674
|
5
|
|
|
|
|
12
|
$date->{'second'} = $time[2]; |
675
|
|
|
|
|
|
|
} |
676
|
|
|
|
|
|
|
} |
677
|
|
|
|
|
|
|
elsif ($part =~ /^$regex_for_time_zone_long_name$/) { |
678
|
0
|
|
|
|
|
0
|
say " time_zone ($part)"; |
679
|
0
|
|
|
|
|
0
|
push @parser_parts, $TOKENS->{'time_zone'}->{'regex'}; |
680
|
0
|
|
|
|
|
0
|
$date->{'time_zone'} = $part; |
681
|
|
|
|
|
|
|
} |
682
|
|
|
|
|
|
|
elsif ($part =~ /^$regex_for_date$/) { |
683
|
67
|
|
|
|
|
295
|
my @date_parts = split(m|[-/\.]|, $part); |
684
|
67
|
|
|
|
|
122
|
my @order = (); |
685
|
|
|
|
|
|
|
# PostgreSQL forces reliance on the hint. |
686
|
|
|
|
|
|
|
#foreach my $index (0..2) { |
687
|
|
|
|
|
|
|
# if ($date_parts[$index] =~ /^\d+$/) { |
688
|
|
|
|
|
|
|
# if ($date_parts[$index] > 31) { |
689
|
|
|
|
|
|
|
# $order[$index] = 'y'; |
690
|
|
|
|
|
|
|
# } |
691
|
|
|
|
|
|
|
# elsif ($date_parts[$index] > 12) { |
692
|
|
|
|
|
|
|
# $order[$index] = 'd'; |
693
|
|
|
|
|
|
|
# } |
694
|
|
|
|
|
|
|
# else { |
695
|
|
|
|
|
|
|
# $order[$index] = 'm'; |
696
|
|
|
|
|
|
|
# } |
697
|
|
|
|
|
|
|
# } |
698
|
|
|
|
|
|
|
# elsif ($date_parts[$index] =~ $TOKENS->{'month_abbr'}->{'regex'}) { |
699
|
|
|
|
|
|
|
# $order[$index] = 'm'; |
700
|
|
|
|
|
|
|
# } |
701
|
|
|
|
|
|
|
#} |
702
|
67
|
|
|
|
|
172
|
$order_string = join('', @order); |
703
|
67
|
100
|
66
|
|
|
1306
|
if ( |
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
704
|
|
|
|
|
|
|
$date_parts[0] =~ /^$TOKENS->{'year'}->{'regex'}$/ |
705
|
|
|
|
|
|
|
&& |
706
|
|
|
|
|
|
|
scalar(keys %$date) == 0 |
707
|
|
|
|
|
|
|
) { |
708
|
21
|
|
|
|
|
71
|
$order_string = 'ymd'; |
709
|
|
|
|
|
|
|
} |
710
|
|
|
|
|
|
|
elsif ( |
711
|
|
|
|
|
|
|
$hint eq 'dmy' |
712
|
|
|
|
|
|
|
&& |
713
|
|
|
|
|
|
|
( |
714
|
|
|
|
|
|
|
$date_parts[0] =~ /^$TOKENS->{'month_abbr'}->{'regex'}$/ |
715
|
|
|
|
|
|
|
|| |
716
|
|
|
|
|
|
|
$date_parts[0] =~ /^$TOKENS->{'month_name'}->{'regex'}$/ |
717
|
|
|
|
|
|
|
) |
718
|
|
|
|
|
|
|
) { |
719
|
2
|
|
|
|
|
4
|
$order_string = 'mdy'; |
720
|
|
|
|
|
|
|
} |
721
|
|
|
|
|
|
|
elsif ( |
722
|
|
|
|
|
|
|
$hint eq 'mdy' |
723
|
|
|
|
|
|
|
&& |
724
|
|
|
|
|
|
|
( |
725
|
|
|
|
|
|
|
$date_parts[1] =~ /^$TOKENS->{'month_abbr'}->{'regex'}$/ |
726
|
|
|
|
|
|
|
|| |
727
|
|
|
|
|
|
|
$date_parts[1] =~ /^$TOKENS->{'month_name'}->{'regex'}$/ |
728
|
|
|
|
|
|
|
) |
729
|
|
|
|
|
|
|
) { |
730
|
3
|
|
|
|
|
6
|
$order_string = 'dmy'; |
731
|
|
|
|
|
|
|
} |
732
|
67
|
100
|
|
|
|
299
|
if ($order_string !~ /^ymd|dmy|mdy$/) { |
733
|
41
|
50
|
|
|
|
246
|
say "Using date token order hint: $hint" if $self->{'debug'}; |
734
|
41
|
|
|
|
|
53
|
$order_string = $hint; |
735
|
|
|
|
|
|
|
} |
736
|
67
|
|
|
|
|
204
|
@order = split(//, $order_string); |
737
|
67
|
|
|
|
|
140
|
foreach my $index (0..2) { |
738
|
176
|
100
|
|
|
|
491
|
if ($order[$index] eq 'y') { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
739
|
57
|
100
|
|
|
|
401
|
if ($date_parts[$index] =~ /^$TOKENS->{'year'}->{'regex'}$/) { |
|
|
100
|
|
|
|
|
|
740
|
34
|
50
|
|
|
|
203
|
say " year ($date_parts[$index])" if $self->{'debug'}; |
741
|
34
|
|
|
|
|
78
|
push @parser_parts, $TOKENS->{'year'}->{'regex'}; |
742
|
34
|
|
|
|
|
79
|
$date->{'year'} = $date_parts[$index]; |
743
|
|
|
|
|
|
|
} |
744
|
|
|
|
|
|
|
elsif ($date_parts[$index] =~ /^$TOKENS->{'year_abbr'}->{'regex'}$/) { |
745
|
20
|
50
|
|
|
|
100
|
say " year_abbr ($date_parts[$index])" if $self->{'debug'}; |
746
|
20
|
|
|
|
|
61
|
push @parser_parts, $TOKENS->{'year_abbr'}->{'regex'}; |
747
|
20
|
|
|
|
|
50
|
$date->{'year_abbr'} = $date_parts[$index]; |
748
|
|
|
|
|
|
|
} |
749
|
|
|
|
|
|
|
else { |
750
|
3
|
|
|
|
|
35
|
warn "Error parsing year: " |
751
|
|
|
|
|
|
|
. "value '$date_parts[$index]' out of range ($part); " |
752
|
|
|
|
|
|
|
. "Perhaps you need a different heuristic hint than '$hint'\n"; |
753
|
3
|
|
|
|
|
28
|
return; |
754
|
|
|
|
|
|
|
} |
755
|
|
|
|
|
|
|
} |
756
|
|
|
|
|
|
|
elsif ($order[$index] eq 'm') { |
757
|
59
|
100
|
100
|
|
|
789
|
if ( |
|
|
100
|
|
|
|
|
|
758
|
|
|
|
|
|
|
$date_parts[$index] =~ /^$TOKENS->{'month'}->{'regex'}$/ |
759
|
|
|
|
|
|
|
&& |
760
|
|
|
|
|
|
|
$date_parts[$index] <= 12 |
761
|
|
|
|
|
|
|
) { |
762
|
42
|
50
|
|
|
|
240
|
say " month ($date_parts[$index])" if $self->{'debug'}; |
763
|
42
|
|
|
|
|
84
|
push @parser_parts, $TOKENS->{'month'}->{'regex'}; |
764
|
42
|
|
|
|
|
86
|
$date->{'month'} = $date_parts[$index]; |
765
|
|
|
|
|
|
|
} |
766
|
|
|
|
|
|
|
elsif ($date_parts[$index] =~ /^$TOKENS->{'month_abbr'}->{'regex'}$/) { |
767
|
12
|
50
|
|
|
|
65
|
say " month_abbr ($date_parts[$index])" if $self->{'debug'}; |
768
|
12
|
|
|
|
|
21
|
push @parser_parts, $TOKENS->{'month_abbr'}->{'regex'}; |
769
|
12
|
|
|
|
|
29
|
$date->{'month_abbr'} = $date_parts[$index]; |
770
|
|
|
|
|
|
|
} |
771
|
|
|
|
|
|
|
else { |
772
|
5
|
|
|
|
|
101
|
warn "Error parsing month: " |
773
|
|
|
|
|
|
|
. "value '$date_parts[$index]' out of range ($part); " |
774
|
|
|
|
|
|
|
. "Perhaps you need a different heuristic hint than '$hint'\n"; |
775
|
5
|
|
|
|
|
84
|
return; |
776
|
|
|
|
|
|
|
} |
777
|
|
|
|
|
|
|
} |
778
|
|
|
|
|
|
|
elsif ($order[$index] eq 'd') { |
779
|
60
|
100
|
100
|
|
|
615
|
if ( |
780
|
|
|
|
|
|
|
$date_parts[$index] !~ /^$TOKENS->{'day'}->{'regex'}$/ |
781
|
|
|
|
|
|
|
|| |
782
|
|
|
|
|
|
|
$date_parts[$index] > 31 |
783
|
|
|
|
|
|
|
) { |
784
|
14
|
|
|
|
|
198
|
warn "Error parsing day: " |
785
|
|
|
|
|
|
|
. "value '$date_parts[$index]' out of range ($part); " |
786
|
|
|
|
|
|
|
. "Perhaps you need a different heuristic hint than '$hint'\n"; |
787
|
14
|
|
|
|
|
125
|
return; |
788
|
|
|
|
|
|
|
} |
789
|
46
|
50
|
|
|
|
210
|
say " day ($date_parts[$index])" if $self->{'debug'}; |
790
|
46
|
|
|
|
|
145
|
push @parser_parts, $TOKENS->{'day'}->{'regex'}; |
791
|
46
|
|
|
|
|
109
|
$date->{'day'} = $date_parts[$index]; |
792
|
|
|
|
|
|
|
} |
793
|
154
|
100
|
|
|
|
802
|
push @parser_parts, qr|[-/\.]| if $index < 2; |
794
|
|
|
|
|
|
|
} |
795
|
|
|
|
|
|
|
} |
796
|
|
|
|
|
|
|
elsif ($part =~ /^$regex_for_julian_day$/) { |
797
|
3
|
|
|
|
|
32
|
my $success = $part =~ $TOKENS->{'julian_day'}->{'regex'}; |
798
|
3
|
|
|
|
|
11
|
say " julian_day ($part)\n"; |
799
|
3
|
|
|
|
|
8
|
push @parser_parts, $TOKENS->{'julian_day'}->{'regex'}; |
800
|
3
|
|
|
|
|
33
|
$date->{'julian_day'} = $+{'julian_day'}; |
801
|
|
|
|
|
|
|
} |
802
|
|
|
|
|
|
|
elsif ($part =~ /^$regex_for_number$/) { |
803
|
130
|
100
|
66
|
|
|
627
|
if (length($part) == 8) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
804
|
4
|
|
|
|
|
83
|
my $regex_date = |
805
|
|
|
|
|
|
|
qr/ |
806
|
|
|
|
|
|
|
$TOKENS->{'year'}->{'regex'} |
807
|
|
|
|
|
|
|
$TOKENS->{'month'}->{'regex'} |
808
|
|
|
|
|
|
|
$TOKENS->{'day'}->{'regex'} |
809
|
|
|
|
|
|
|
/x; |
810
|
4
|
|
|
|
|
22
|
my $success = $part =~ $regex_date; |
811
|
4
|
|
|
|
|
83
|
my %ymd = %+; |
812
|
4
|
|
|
|
|
14
|
foreach my $token ('year', 'month', 'day') { |
813
|
12
|
|
|
|
|
37
|
say " $token ($ymd{$token})"; |
814
|
12
|
|
|
|
|
20
|
push @parser_parts, $TOKENS->{$token}->{'regex'}; |
815
|
12
|
|
|
|
|
35
|
$date->{$token} = $ymd{$token}; |
816
|
|
|
|
|
|
|
} |
817
|
|
|
|
|
|
|
} |
818
|
|
|
|
|
|
|
elsif (length($part) == 6) { |
819
|
4
|
100
|
|
|
|
14
|
if (defined($date->{'year'})) { |
820
|
|
|
|
|
|
|
# This is a concatenated time: HHMM |
821
|
1
|
|
|
|
|
90
|
my $regex_time = |
822
|
|
|
|
|
|
|
qr/ |
823
|
|
|
|
|
|
|
$TOKENS->{'hour'}->{'regex'} |
824
|
|
|
|
|
|
|
$TOKENS->{'minute'}->{'regex'} |
825
|
|
|
|
|
|
|
$TOKENS->{'second'}->{'regex'} |
826
|
|
|
|
|
|
|
/x; |
827
|
1
|
|
|
|
|
6
|
my $success = $part =~ $regex_time; |
828
|
1
|
|
|
|
|
24
|
my %hms = %+; |
829
|
1
|
|
|
|
|
5
|
foreach my $token ('hour', 'minute', 'second') { |
830
|
3
|
|
|
|
|
11
|
say " $token ($hms{$token})"; |
831
|
3
|
|
|
|
|
6
|
push @parser_parts, $TOKENS->{$token}->{'regex'}; |
832
|
3
|
|
|
|
|
9
|
$date->{$token} = $hms{$token}; |
833
|
|
|
|
|
|
|
} |
834
|
|
|
|
|
|
|
} |
835
|
|
|
|
|
|
|
else { |
836
|
|
|
|
|
|
|
# This is a concatenated date: YYMMDD |
837
|
3
|
|
|
|
|
77
|
my $regex_date = |
838
|
|
|
|
|
|
|
qr/ |
839
|
|
|
|
|
|
|
$TOKENS->{'year_abbr'}->{'regex'} |
840
|
|
|
|
|
|
|
$TOKENS->{'month'}->{'regex'} |
841
|
|
|
|
|
|
|
$TOKENS->{'day'}->{'regex'} |
842
|
|
|
|
|
|
|
/x; |
843
|
3
|
|
|
|
|
20
|
my $success = $part =~ $regex_date; |
844
|
3
|
|
|
|
|
40
|
my %ymd = %+; |
845
|
3
|
|
|
|
|
11
|
foreach my $token ('year_abbr', 'month', 'day') { |
846
|
9
|
|
|
|
|
29
|
say " $token ($ymd{$token})"; |
847
|
9
|
|
|
|
|
14
|
push @parser_parts, $TOKENS->{$token}->{'regex'}; |
848
|
9
|
|
|
|
|
72
|
$date->{$token} = $ymd{$token}; |
849
|
|
|
|
|
|
|
} |
850
|
|
|
|
|
|
|
} |
851
|
|
|
|
|
|
|
} |
852
|
|
|
|
|
|
|
elsif (length($part) == 3 && defined($date->{'year'})) { |
853
|
|
|
|
|
|
|
# day_of_year |
854
|
3
|
50
|
|
|
|
15
|
say " day_of_year ($part)" if $self->{'debug'}; |
855
|
3
|
|
|
|
|
21
|
push @parser_parts, $TOKENS->{'day_of_year'}->{'regex'}; |
856
|
3
|
|
|
|
|
10
|
$date->{'day_of_year'} = $part; |
857
|
|
|
|
|
|
|
} |
858
|
|
|
|
|
|
|
elsif (length($part) == 4) { |
859
|
29
|
100
|
66
|
|
|
156
|
if (defined($date->{'year'}) || defined($date->{'year_abbr'})) { |
860
|
|
|
|
|
|
|
# This is a concatenated time without seconds: HHMM |
861
|
2
|
|
|
|
|
25
|
my $regex_time = |
862
|
|
|
|
|
|
|
qr/ |
863
|
|
|
|
|
|
|
$TOKENS->{'hour'}->{'regex'} |
864
|
|
|
|
|
|
|
$TOKENS->{'minute'}->{'regex'} |
865
|
|
|
|
|
|
|
/x; |
866
|
2
|
|
|
|
|
17
|
my $success = $part =~ $regex_time; |
867
|
2
|
|
|
|
|
107
|
my %hm = %+; |
868
|
2
|
|
|
|
|
7
|
foreach my $token ('hour', 'minute') { |
869
|
4
|
100
|
|
|
|
13
|
if (! $TOKENS->{$token}->{'constraint'}->($hm{$token})) { |
870
|
2
|
|
|
|
|
41
|
warn "Error parsing $token: " |
871
|
|
|
|
|
|
|
. "value '$hm{$token}' out of range ($date_string)\n"; |
872
|
2
|
|
|
|
|
25
|
return; |
873
|
|
|
|
|
|
|
} |
874
|
2
|
|
|
|
|
12
|
say " $token ($hm{$token})"; |
875
|
2
|
|
|
|
|
6
|
push @parser_parts, $TOKENS->{$token}->{'regex'}; |
876
|
2
|
|
|
|
|
5
|
$date->{$token} = $hm{$token}; |
877
|
|
|
|
|
|
|
} |
878
|
|
|
|
|
|
|
} |
879
|
|
|
|
|
|
|
else { |
880
|
|
|
|
|
|
|
# year (if month and day have not been set, order is now ymd). |
881
|
27
|
|
33
|
|
|
211
|
my $token = $self->most_likely_token( |
882
|
|
|
|
|
|
|
'possible_tokens' => ['year'], |
883
|
|
|
|
|
|
|
'already_claimed' => $date, |
884
|
|
|
|
|
|
|
'heuristic' => ($order_string // $hint), |
885
|
|
|
|
|
|
|
'date_string' => $date_string, |
886
|
|
|
|
|
|
|
'value' => $part, |
887
|
|
|
|
|
|
|
); |
888
|
27
|
50
|
|
|
|
62
|
return if ! defined $token; |
889
|
27
|
50
|
|
|
|
292
|
say " $token ($part)" if $self->{'debug'}; |
890
|
27
|
|
|
|
|
72
|
push @parser_parts, $TOKENS->{$token}->{'regex'}; |
891
|
27
|
|
|
|
|
56
|
$date->{$token} = $part; |
892
|
27
|
50
|
66
|
|
|
207
|
if ( |
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
893
|
|
|
|
|
|
|
! defined($date->{'day'}) |
894
|
|
|
|
|
|
|
&& |
895
|
|
|
|
|
|
|
! defined($date->{'month'}) |
896
|
|
|
|
|
|
|
&& |
897
|
|
|
|
|
|
|
! defined($date->{'month_abbr'}) |
898
|
|
|
|
|
|
|
&& |
899
|
|
|
|
|
|
|
! defined($date->{'month_name'}) |
900
|
|
|
|
|
|
|
) { |
901
|
15
|
|
50
|
|
|
75
|
$order_string ||= 'ymd'; |
902
|
|
|
|
|
|
|
} |
903
|
|
|
|
|
|
|
} |
904
|
|
|
|
|
|
|
} |
905
|
|
|
|
|
|
|
else { |
906
|
|
|
|
|
|
|
# Either month, or day, or year (based on $order_string or $hint or what has been set already). |
907
|
90
|
100
|
66
|
|
|
589
|
if (($order_string // $hint) eq 'dmy') { |
|
|
100
|
66
|
|
|
|
|
|
|
50
|
66
|
|
|
|
|
908
|
23
|
|
33
|
|
|
176
|
my $token = $self->most_likely_token( |
909
|
|
|
|
|
|
|
'possible_tokens' => ['day', 'month', 'year', 'year_abbr'], |
910
|
|
|
|
|
|
|
'already_claimed' => $date, |
911
|
|
|
|
|
|
|
'heuristic' => ($order_string // $hint), |
912
|
|
|
|
|
|
|
'date_string' => $date_string, |
913
|
|
|
|
|
|
|
'value' => $part, |
914
|
|
|
|
|
|
|
); |
915
|
23
|
100
|
|
|
|
94
|
return if ! defined $token; |
916
|
19
|
50
|
33
|
|
|
208
|
say " $token ($part) based on " . ($order_string // $hint) if $self->{'debug'}; |
917
|
19
|
|
|
|
|
42
|
push @parser_parts, $TOKENS->{$token}->{'regex'}; |
918
|
19
|
|
|
|
|
91
|
$date->{$token} = $part; |
919
|
|
|
|
|
|
|
} |
920
|
|
|
|
|
|
|
elsif (($order_string // $hint) eq 'mdy') { |
921
|
24
|
|
33
|
|
|
145
|
my $token = $self->most_likely_token( |
922
|
|
|
|
|
|
|
'possible_tokens' => ['month', 'day', 'year', 'year_abbr'], |
923
|
|
|
|
|
|
|
'already_claimed' => $date, |
924
|
|
|
|
|
|
|
'heuristic' => ($order_string // $hint), |
925
|
|
|
|
|
|
|
'date_string' => $date_string, |
926
|
|
|
|
|
|
|
'value' => $part, |
927
|
|
|
|
|
|
|
); |
928
|
24
|
100
|
|
|
|
70
|
return if ! defined $token; |
929
|
20
|
50
|
33
|
|
|
204
|
say " $token ($part) based on " . ($order_string // $hint) if $self->{'debug'}; |
930
|
20
|
|
|
|
|
42
|
push @parser_parts, $TOKENS->{$token}->{'regex'}; |
931
|
20
|
|
|
|
|
60
|
$date->{$token} = $part; |
932
|
|
|
|
|
|
|
} |
933
|
|
|
|
|
|
|
elsif (($order_string // $hint) eq 'ymd') { |
934
|
43
|
|
66
|
|
|
239
|
my $token = $self->most_likely_token( |
935
|
|
|
|
|
|
|
'possible_tokens' => ['year', 'year_abbr', 'month', 'day'], |
936
|
|
|
|
|
|
|
'already_claimed' => $date, |
937
|
|
|
|
|
|
|
'heuristic' => ($order_string // $hint), |
938
|
|
|
|
|
|
|
'date_string' => $date_string, |
939
|
|
|
|
|
|
|
'value' => $part, |
940
|
|
|
|
|
|
|
); |
941
|
43
|
100
|
|
|
|
119
|
return if ! defined $token; |
942
|
39
|
50
|
66
|
|
|
442
|
say " $token ($part) based on " . ($order_string // $hint) if $self->{'debug'}; |
943
|
39
|
|
|
|
|
133
|
push @parser_parts, $TOKENS->{$token}->{'regex'}; |
944
|
39
|
|
|
|
|
122
|
$date->{$token} = $part; |
945
|
|
|
|
|
|
|
} |
946
|
|
|
|
|
|
|
else { |
947
|
0
|
0
|
|
|
|
0
|
say " number ($part)" if $self->{'debug'}; |
948
|
0
|
|
|
|
|
0
|
push @parser_parts, $regex_for_number; |
949
|
|
|
|
|
|
|
} |
950
|
|
|
|
|
|
|
} |
951
|
|
|
|
|
|
|
} |
952
|
|
|
|
|
|
|
elsif ($part =~ /^$regex_for_string$/) { |
953
|
|
|
|
|
|
|
# TODO: Look for time zone abbreviation. |
954
|
31
|
|
|
|
|
156
|
my $token = $self->most_likely_token( |
955
|
|
|
|
|
|
|
'possible_tokens' => ['am_or_pm', 'era_abbr', 'month_name', 'month_abbr', 'day_name', 'day_abbr', 'phrase', 'time_zone'], |
956
|
|
|
|
|
|
|
'already_claimed' => $date, |
957
|
|
|
|
|
|
|
'date_string' => $date_string, |
958
|
|
|
|
|
|
|
'value' => $part, |
959
|
|
|
|
|
|
|
); |
960
|
31
|
100
|
|
|
|
71
|
if ($token) { |
961
|
29
|
100
|
100
|
|
|
123
|
if ($token eq 'month_name' || $token eq 'month_abbr') { |
962
|
24
|
100
|
|
|
|
74
|
if (defined($date->{'month'})) { |
963
|
6
|
|
|
|
|
105
|
say " $token will need to take the place of month"; |
964
|
6
|
50
|
66
|
|
|
34
|
if (($order_string // $hint) =~ /md/) { |
965
|
6
|
50
|
|
|
|
30
|
say " day ($date->{'month'}) moved from month" if $self->{'debug'}; |
966
|
6
|
|
|
|
|
10
|
foreach my $parser_part (@parser_parts) { |
967
|
20
|
100
|
|
|
|
43
|
if ($parser_part =~ /\?/) { |
968
|
6
|
|
|
|
|
14
|
$parser_part = $TOKENS->{'day'}->{'regex'}; |
969
|
|
|
|
|
|
|
} |
970
|
|
|
|
|
|
|
} |
971
|
6
|
|
|
|
|
22
|
$date->{'day'} = delete $date->{'month'}; |
972
|
|
|
|
|
|
|
} |
973
|
|
|
|
|
|
|
} |
974
|
|
|
|
|
|
|
} |
975
|
29
|
50
|
|
|
|
515
|
say " $token ($part)" if $self->{'debug'}; |
976
|
29
|
|
|
|
|
105
|
push @parser_parts, $TOKENS->{$token}->{'regex'}; |
977
|
29
|
|
|
|
|
103
|
$date->{$token} = $part; |
978
|
|
|
|
|
|
|
} |
979
|
|
|
|
|
|
|
else { |
980
|
2
|
50
|
|
|
|
54
|
say " literal ($part)" if $self->{'debug'}; |
981
|
2
|
|
|
|
|
61
|
push @parser_parts, quotemeta($part); |
982
|
|
|
|
|
|
|
} |
983
|
|
|
|
|
|
|
} |
984
|
|
|
|
|
|
|
elsif ($part =~ /^$regex_for_whitespace$/) { |
985
|
98
|
50
|
|
|
|
487
|
say " whitespace ($part)" if $self->{'debug'}; |
986
|
98
|
|
|
|
|
211
|
push @parser_parts, $regex_for_whitespace; |
987
|
|
|
|
|
|
|
} |
988
|
|
|
|
|
|
|
else { |
989
|
9
|
50
|
|
|
|
48
|
say " literal ($part)" if $self->{'debug'}; |
990
|
9
|
|
|
|
|
34
|
push @parser_parts, quotemeta($part); |
991
|
|
|
|
|
|
|
} |
992
|
|
|
|
|
|
|
} |
993
|
|
|
|
|
|
|
|
994
|
|
|
|
|
|
|
# If am_or_pm is pm, and hour is < 12, change from hour to hour_12 (and the parser). |
995
|
96
|
100
|
66
|
|
|
306
|
if (defined($date->{'am_or_pm'}) && lc($date->{'am_or_pm'}) eq 'pm' ) { |
996
|
1
|
50
|
33
|
|
|
8
|
if (defined($date->{'hour'}) && $date->{'hour'} < 12) { |
997
|
1
|
|
|
|
|
4
|
$date->{'hour_12'} = delete $date->{'hour'}; |
998
|
1
|
|
|
|
|
3
|
foreach my $parser_part (@parser_parts) { |
999
|
13
|
100
|
|
|
|
23
|
if ($parser_part =~ /\?/) { |
1000
|
1
|
|
|
|
|
4
|
$parser_part =~ s/\?/?/; |
1001
|
|
|
|
|
|
|
} |
1002
|
|
|
|
|
|
|
} |
1003
|
|
|
|
|
|
|
} |
1004
|
|
|
|
|
|
|
} |
1005
|
96
|
|
|
|
|
248
|
my $parser_regex = join('', @parser_parts); |
1006
|
96
|
50
|
|
|
|
489
|
say "Crafted regex: $date_string -> $parser_regex" if $self->{'debug'}; |
1007
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
# Add a new parser that will match this date format. |
1009
|
96
|
50
|
|
|
|
202
|
if (! defined($known_parsers->{$parser_regex}) ) { |
1010
|
96
|
|
|
|
|
286
|
$known_parsers->{$parser_regex} = 1; |
1011
|
96
|
|
|
|
|
3697
|
$self->initialize_parser_for_regex_named_capture( |
1012
|
|
|
|
|
|
|
{ |
1013
|
|
|
|
|
|
|
'regex' => qr/$parser_regex/, |
1014
|
|
|
|
|
|
|
}, |
1015
|
|
|
|
|
|
|
); |
1016
|
|
|
|
|
|
|
# Move the heuristic parser to the last slot again. |
1017
|
96
|
|
|
|
|
138
|
push( |
1018
|
96
|
|
|
|
|
168
|
@{ $self->{'active_parsers'} }, |
1019
|
|
|
|
|
|
|
splice( |
1020
|
96
|
|
|
|
|
184
|
@{ $self->{'active_parsers'} }, -2, 1 |
1021
|
|
|
|
|
|
|
), |
1022
|
|
|
|
|
|
|
); |
1023
|
|
|
|
|
|
|
} |
1024
|
|
|
|
|
|
|
|
1025
|
96
|
|
|
|
|
317
|
return $date; |
1026
|
|
|
|
|
|
|
}, |
1027
|
132
|
|
|
|
|
2729
|
); |
1028
|
132
|
|
|
|
|
604
|
return $success; |
1029
|
|
|
|
|
|
|
} |
1030
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
=item initialize_formatter_for_arrayref() |
1032
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
=cut |
1034
|
|
|
|
|
|
|
|
1035
|
|
|
|
|
|
|
sub initialize_formatter_for_arrayref { |
1036
|
7
|
|
|
7
|
1
|
9
|
my ($self, $definition) = @_; |
1037
|
7
|
|
50
|
|
|
16
|
my $structure = $definition->{'structure'} // 'arrayref'; |
1038
|
7
|
|
50
|
|
|
14
|
my $params = $definition->{'params'} // die "Unable to create $structure formatter: No 'params' argument defined."; |
1039
|
|
|
|
|
|
|
# TODO: Validate parameters. |
1040
|
|
|
|
|
|
|
my $success = $self->add_formatter( |
1041
|
|
|
|
|
|
|
sub { |
1042
|
7
|
|
|
7
|
|
10
|
my ($date) = @_; |
1043
|
46
|
|
33
|
|
|
121
|
my @formatted = ( |
|
|
|
33
|
|
|
|
|
|
|
|
0
|
|
|
|
|
1044
|
|
|
|
|
|
|
map |
1045
|
|
|
|
|
|
|
{ |
1046
|
|
|
|
|
|
|
# Use the value, if available. |
1047
|
7
|
|
|
|
|
11
|
$date->{$_} |
1048
|
|
|
|
|
|
|
// |
1049
|
|
|
|
|
|
|
# Or see if we can determine the value by transforming another field. |
1050
|
|
|
|
|
|
|
$self->transform_token_value( |
1051
|
|
|
|
|
|
|
'target_token' => $_, |
1052
|
|
|
|
|
|
|
'date' => $date, |
1053
|
|
|
|
|
|
|
) |
1054
|
|
|
|
|
|
|
// |
1055
|
|
|
|
|
|
|
# Or see if there is a default value for the field. |
1056
|
|
|
|
|
|
|
$self->{'defaults'}->{$_} |
1057
|
|
|
|
|
|
|
// |
1058
|
|
|
|
|
|
|
# Or just use a value of empty string. |
1059
|
|
|
|
|
|
|
'' |
1060
|
|
|
|
|
|
|
} |
1061
|
|
|
|
|
|
|
@$params, |
1062
|
|
|
|
|
|
|
); |
1063
|
7
|
100
|
|
|
|
32
|
return \@formatted if $structure eq 'arrayref'; |
1064
|
1
|
|
|
|
|
6
|
return @formatted; |
1065
|
|
|
|
|
|
|
}, |
1066
|
7
|
|
|
|
|
37
|
); |
1067
|
7
|
|
|
|
|
13
|
return $success; |
1068
|
|
|
|
|
|
|
} |
1069
|
|
|
|
|
|
|
|
1070
|
|
|
|
|
|
|
=item initialize_formatter_for_hashref() |
1071
|
|
|
|
|
|
|
|
1072
|
|
|
|
|
|
|
=cut |
1073
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
sub initialize_formatter_for_hashref { |
1075
|
3
|
|
|
3
|
1
|
3
|
my ($self, $definition) = @_; |
1076
|
3
|
|
50
|
|
|
8
|
my $structure = $definition->{'structure'} // 'hashref'; |
1077
|
3
|
|
50
|
|
|
12
|
my $params = $definition->{'params'} // die "Unable to create $structure formatter: No 'params' argument defined."; |
1078
|
|
|
|
|
|
|
# TODO: Validate parameters. |
1079
|
|
|
|
|
|
|
|
1080
|
3
|
|
|
|
|
13
|
$self->initialize_formatter_for_arrayref( |
1081
|
|
|
|
|
|
|
{ |
1082
|
|
|
|
|
|
|
'structure' => 'arrayref', |
1083
|
|
|
|
|
|
|
'params' => $params, |
1084
|
|
|
|
|
|
|
}, |
1085
|
|
|
|
|
|
|
); |
1086
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
my $success = $self->add_formatter( |
1088
|
|
|
|
|
|
|
sub { |
1089
|
3
|
|
|
3
|
|
5
|
my ($date) = @_; |
1090
|
3
|
|
|
|
|
4
|
my %formatted = (); |
1091
|
3
|
|
|
|
|
11
|
@formatted{@$params} = @$date; |
1092
|
3
|
50
|
|
|
|
17
|
return \%formatted if $structure eq 'hashref'; |
1093
|
0
|
|
|
|
|
0
|
return %formatted; |
1094
|
|
|
|
|
|
|
}, |
1095
|
3
|
|
|
|
|
22
|
); |
1096
|
3
|
|
|
|
|
8
|
return $success; |
1097
|
|
|
|
|
|
|
} |
1098
|
|
|
|
|
|
|
|
1099
|
|
|
|
|
|
|
=item initialize_formatter_for_coderef() |
1100
|
|
|
|
|
|
|
|
1101
|
|
|
|
|
|
|
=cut |
1102
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
sub initialize_formatter_for_coderef { |
1104
|
1
|
|
|
1
|
1
|
2
|
my ($self, $definition) = @_; |
1105
|
1
|
|
50
|
0
|
|
4
|
my $coderef = $definition->{'coderef'} // sub { @_ }; |
|
0
|
|
|
|
|
0
|
|
1106
|
1
|
|
50
|
|
|
2
|
my $params = $definition->{'params'} // die "Unable to create coderef formatter: No 'params' argument defined."; |
1107
|
|
|
|
|
|
|
# TODO: Validate parameters. |
1108
|
|
|
|
|
|
|
|
1109
|
1
|
|
|
|
|
6
|
$self->initialize_formatter_for_arrayref( |
1110
|
|
|
|
|
|
|
{ |
1111
|
|
|
|
|
|
|
'structure' => 'array', |
1112
|
|
|
|
|
|
|
'params' => $params, |
1113
|
|
|
|
|
|
|
}, |
1114
|
|
|
|
|
|
|
); |
1115
|
|
|
|
|
|
|
|
1116
|
1
|
|
|
|
|
20
|
my $success = $self->add_formatter( |
1117
|
|
|
|
|
|
|
$coderef, |
1118
|
|
|
|
|
|
|
); |
1119
|
1
|
|
|
|
|
3
|
return $success; |
1120
|
|
|
|
|
|
|
} |
1121
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
=item initialize_formatter_for_sprintf() |
1123
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
=cut |
1125
|
|
|
|
|
|
|
|
1126
|
|
|
|
|
|
|
sub initialize_formatter_for_sprintf { |
1127
|
13
|
|
|
13
|
1
|
18
|
my ($self, $definition) = @_; |
1128
|
13
|
|
|
|
|
17
|
my $sprintf = $definition->{'sprintf'}; |
1129
|
13
|
|
50
|
|
|
37
|
my $params = $definition->{'params'} // die "Unable to create sprintf formatter: No 'params' argument defined."; |
1130
|
|
|
|
|
|
|
# TODO: Validate parameters. |
1131
|
|
|
|
|
|
|
my $success = $self->add_formatter( |
1132
|
|
|
|
|
|
|
sub { |
1133
|
13
|
|
|
13
|
|
13
|
my ($date) = @_; |
1134
|
85
|
|
100
|
|
|
313
|
my $formatted = sprintf( |
|
|
|
66
|
|
|
|
|
|
|
|
50
|
|
|
|
|
1135
|
|
|
|
|
|
|
$sprintf, |
1136
|
|
|
|
|
|
|
map |
1137
|
|
|
|
|
|
|
{ |
1138
|
|
|
|
|
|
|
# Use the value, if available. |
1139
|
13
|
|
|
|
|
26
|
$date->{$_} |
1140
|
|
|
|
|
|
|
// |
1141
|
|
|
|
|
|
|
# Or see if we can determine the value by transforming another field. |
1142
|
|
|
|
|
|
|
$self->transform_token_value( |
1143
|
|
|
|
|
|
|
'target_token' => $_, |
1144
|
|
|
|
|
|
|
'date' => $date, |
1145
|
|
|
|
|
|
|
) |
1146
|
|
|
|
|
|
|
// |
1147
|
|
|
|
|
|
|
# Or see if there is a default value for the field. |
1148
|
|
|
|
|
|
|
$self->{'defaults'}->{$_} |
1149
|
|
|
|
|
|
|
// |
1150
|
|
|
|
|
|
|
# Or just use a value of empty string. |
1151
|
|
|
|
|
|
|
'' |
1152
|
|
|
|
|
|
|
} |
1153
|
|
|
|
|
|
|
@$params, |
1154
|
|
|
|
|
|
|
); |
1155
|
13
|
|
|
|
|
39
|
return $formatted; |
1156
|
|
|
|
|
|
|
}, |
1157
|
13
|
|
|
|
|
84
|
); |
1158
|
13
|
|
|
|
|
37
|
return $success; |
1159
|
|
|
|
|
|
|
} |
1160
|
|
|
|
|
|
|
|
1161
|
|
|
|
|
|
|
=item initialize_formatter_for_strftime() |
1162
|
|
|
|
|
|
|
|
1163
|
|
|
|
|
|
|
=cut |
1164
|
|
|
|
|
|
|
|
1165
|
|
|
|
|
|
|
sub initialize_formatter_for_strftime { |
1166
|
8
|
|
|
8
|
1
|
7
|
my ($self, $definition) = @_; |
1167
|
8
|
|
|
|
|
11
|
my $strftime = $definition->{'strftime'}; |
1168
|
8
|
|
|
|
|
9
|
my $format = $strftime; |
1169
|
8
|
|
|
|
|
10
|
my $params = []; |
1170
|
|
|
|
|
|
|
|
1171
|
|
|
|
|
|
|
# Preprocess some tokens that expand into other tokens. |
1172
|
8
|
|
|
|
|
14
|
foreach my $preprocess (@$STRPTIME_PREPROCESS) { |
1173
|
64
|
|
|
|
|
351
|
$format =~ s/$preprocess->{'token'}/$preprocess->{'replacement'}/g; |
1174
|
|
|
|
|
|
|
} |
1175
|
|
|
|
|
|
|
|
1176
|
|
|
|
|
|
|
# Replace single character tokens with expanded tokens: %Y -> %{year} |
1177
|
|
|
|
|
|
|
$format =~ |
1178
|
8
|
|
|
|
|
46
|
s/ |
1179
|
|
|
|
|
|
|
(%[-_^]?[%a-zA-Z]) |
1180
|
|
|
|
|
|
|
/ |
1181
|
52
|
|
|
|
|
83
|
$self->strftime_token_to_internal($1) |
1182
|
|
|
|
|
|
|
/sgex; |
1183
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
# Find all tokens. |
1185
|
8
|
|
|
|
|
83
|
my @tokens = $format =~ m/(%{\w+})/g; |
1186
|
|
|
|
|
|
|
|
1187
|
|
|
|
|
|
|
# Replace tokens in order, and build $params list. |
1188
|
8
|
|
|
|
|
17
|
foreach my $token (@tokens) { |
1189
|
|
|
|
|
|
|
# Replace expanded tokens: %{year} |
1190
|
52
|
50
|
|
|
|
171
|
if ($token =~ m/%{(\w+)}/) { |
1191
|
52
|
|
|
|
|
69
|
my $internal = $1; |
1192
|
52
|
|
50
|
|
|
107
|
my $sprintf = $TOKENS->{$internal}->{'sprintf'} // |
1193
|
|
|
|
|
|
|
die "Unable to find sprintf definition for token '$internal'"; |
1194
|
|
|
|
|
|
|
|
1195
|
52
|
50
|
|
|
|
810
|
say "Internal token $internal maps to sprintf token '$sprintf'." if $self->{'debug'}; |
1196
|
52
|
|
|
|
|
505
|
$format =~ s/\Q$token\E/$sprintf/; |
1197
|
52
|
|
|
|
|
52
|
my $alias; |
1198
|
52
|
100
|
|
|
|
119
|
if (defined($TOKENS->{$internal}->{'storage'})) { |
1199
|
2
|
|
|
|
|
4
|
$alias = $TOKENS->{$internal}->{'storage'}; |
1200
|
|
|
|
|
|
|
} |
1201
|
52
|
|
66
|
|
|
196
|
push @$params, ($alias // $internal); |
1202
|
|
|
|
|
|
|
} |
1203
|
|
|
|
|
|
|
} |
1204
|
|
|
|
|
|
|
|
1205
|
|
|
|
|
|
|
# Postprocess some tokens that expand into special characters. |
1206
|
8
|
|
|
|
|
15
|
foreach my $postprocess (@$STRFTIME_POSTPROCESS) { |
1207
|
16
|
|
|
|
|
2264
|
$format =~ s/$postprocess->{'token'}/$postprocess->{'replacement'}/g; |
1208
|
|
|
|
|
|
|
} |
1209
|
|
|
|
|
|
|
|
1210
|
8
|
50
|
|
|
|
171
|
say "Crafted sprintf: $strftime -> $format [" . join(', ', @$params) . "]" if $self->{'debug'}; |
1211
|
8
|
|
|
|
|
45
|
my $success = $self->initialize_formatter_for_sprintf( |
1212
|
|
|
|
|
|
|
{ |
1213
|
|
|
|
|
|
|
'sprintf' => $format, |
1214
|
|
|
|
|
|
|
'params' => $params, |
1215
|
|
|
|
|
|
|
}, |
1216
|
|
|
|
|
|
|
); |
1217
|
8
|
|
|
|
|
39
|
return $success; |
1218
|
|
|
|
|
|
|
} |
1219
|
|
|
|
|
|
|
|
1220
|
|
|
|
|
|
|
=item strptime_token_to_regex() |
1221
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
=cut |
1223
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
sub strptime_token_to_regex { |
1225
|
68
|
|
|
68
|
1
|
122
|
my ($self, $token) = @_; |
1226
|
68
|
|
|
|
|
57
|
my $internal; |
1227
|
68
|
50
|
|
|
|
4937
|
say "Attempting to convert strptime token $token into a regex." if $self->{'debug'}; |
1228
|
68
|
50
|
|
|
|
325
|
if (defined($self->{'strptime_mappings'}->{$token})) { |
|
|
100
|
|
|
|
|
|
1229
|
0
|
|
|
|
|
0
|
$internal = $self->{'strptime_mappings'}->{$token}; |
1230
|
|
|
|
|
|
|
} |
1231
|
|
|
|
|
|
|
elsif (defined($DEFAULT_STRPTIME_MAPPINGS->{$token})) { |
1232
|
65
|
|
|
|
|
98
|
$internal = $DEFAULT_STRPTIME_MAPPINGS->{$token}; |
1233
|
|
|
|
|
|
|
} |
1234
|
|
|
|
|
|
|
|
1235
|
68
|
100
|
|
|
|
104
|
if (! defined($internal)) { |
1236
|
3
|
50
|
|
|
|
272
|
say "No mapping found" if $self->{'debug'}; |
1237
|
3
|
|
|
|
|
26
|
return $token; # Perform no substitution. |
1238
|
|
|
|
|
|
|
} |
1239
|
|
|
|
|
|
|
|
1240
|
65
|
50
|
|
|
|
148
|
if (! defined($TOKENS->{$internal}->{'regex'})) { |
1241
|
0
|
|
|
|
|
0
|
die "Unable to find regex definition for token '$internal'"; |
1242
|
|
|
|
|
|
|
} |
1243
|
65
|
50
|
|
|
|
4580
|
say "Strptime token $token maps to internal token '$internal'." if $self->{'debug'}; |
1244
|
|
|
|
|
|
|
|
1245
|
65
|
|
|
|
|
462
|
return $TOKENS->{$internal}->{'regex'}; |
1246
|
|
|
|
|
|
|
} |
1247
|
|
|
|
|
|
|
|
1248
|
|
|
|
|
|
|
=item strftime_token_to_internal |
1249
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
=cut |
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
sub strftime_token_to_internal { |
1253
|
52
|
|
|
52
|
1
|
71
|
my ($self, $token) = @_; |
1254
|
52
|
|
|
|
|
39
|
my $internal; |
1255
|
52
|
50
|
|
|
|
1362
|
say "Attempting to convert strftime token $token into an internal token." if $self->{'debug'}; |
1256
|
52
|
50
|
|
|
|
114
|
if (defined($self->{'strftime_mappings'}->{$token})) { |
1257
|
0
|
|
|
|
|
0
|
$internal = $self->{'strftime_mappings'}->{$token}; |
1258
|
|
|
|
|
|
|
} |
1259
|
52
|
50
|
|
|
|
163
|
if (defined($self->{'strptime_mappings'}->{$token})) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1260
|
0
|
|
|
|
|
0
|
$internal = $self->{'strptime_mappings'}->{$token}; |
1261
|
|
|
|
|
|
|
} |
1262
|
|
|
|
|
|
|
elsif (defined($DEFAULT_STRFTIME_MAPPINGS->{$token})) { |
1263
|
0
|
|
|
|
|
0
|
$internal = $DEFAULT_STRFTIME_MAPPINGS->{$token}; |
1264
|
|
|
|
|
|
|
} |
1265
|
|
|
|
|
|
|
elsif (defined($DEFAULT_STRPTIME_MAPPINGS->{$token})) { |
1266
|
52
|
|
|
|
|
55
|
$internal = $DEFAULT_STRPTIME_MAPPINGS->{$token}; |
1267
|
|
|
|
|
|
|
} |
1268
|
|
|
|
|
|
|
|
1269
|
52
|
50
|
|
|
|
76
|
if (! defined($internal)) { |
1270
|
0
|
0
|
|
|
|
0
|
say "No mapping found" if $self->{'debug'}; |
1271
|
0
|
|
|
|
|
0
|
return '%' . $token; # Perform no substitution, but escape token for sprintf. |
1272
|
|
|
|
|
|
|
} |
1273
|
|
|
|
|
|
|
|
1274
|
52
|
50
|
|
|
|
119
|
if (! defined($TOKENS->{$internal}->{'sprintf'})) { |
1275
|
0
|
|
|
|
|
0
|
die "Unable to find sprintf definition for token '$internal'"; |
1276
|
|
|
|
|
|
|
} |
1277
|
52
|
50
|
|
|
|
1098
|
say "Strftime token $token maps to internal token '$internal'." if $self->{'debug'}; |
1278
|
|
|
|
|
|
|
|
1279
|
52
|
|
|
|
|
221
|
return '%{' . $internal . '}'; |
1280
|
|
|
|
|
|
|
} |
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
=item transform_token_value() |
1283
|
|
|
|
|
|
|
|
1284
|
|
|
|
|
|
|
=cut |
1285
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
sub transform_token_value { |
1287
|
8
|
|
|
8
|
1
|
20
|
my ($self, %args) = @_; |
1288
|
8
|
|
|
|
|
13
|
my $target_token = $args{'target_token'}; |
1289
|
8
|
|
|
|
|
9
|
my $date = $args{'date'}; |
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
# Return the value, if it is already set. |
1292
|
8
|
50
|
|
|
|
19
|
return $date->{$target_token} if defined($date->{$target_token}); |
1293
|
|
|
|
|
|
|
|
1294
|
8
|
|
|
|
|
13
|
foreach my $transformations ($self->{'transformations'}, $DEFAULT_TRANSFORMATIONS) { |
1295
|
|
|
|
|
|
|
# Look up transformations to $target_token from a field that is defined in $date. |
1296
|
15
|
100
|
|
|
|
34
|
if (defined($transformations->{$target_token})) { |
1297
|
6
|
|
|
|
|
5
|
foreach my $source_token (keys %{$transformations->{$target_token}}) { |
|
6
|
|
|
|
|
19
|
|
1298
|
6
|
50
|
33
|
|
|
30
|
if (defined($date->{$source_token}) && defined($transformations->{$target_token}->{$source_token})) { |
1299
|
|
|
|
|
|
|
# Run the transformation and return the value. |
1300
|
6
|
|
|
|
|
17
|
return $transformations->{$target_token}->{$source_token}->($date); |
1301
|
|
|
|
|
|
|
} |
1302
|
|
|
|
|
|
|
} |
1303
|
|
|
|
|
|
|
} |
1304
|
|
|
|
|
|
|
} |
1305
|
|
|
|
|
|
|
|
1306
|
2
|
|
|
|
|
36
|
return; |
1307
|
|
|
|
|
|
|
} |
1308
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
=item most_likely_token() |
1310
|
|
|
|
|
|
|
|
1311
|
|
|
|
|
|
|
=cut |
1312
|
|
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
sub most_likely_token { |
1314
|
148
|
|
|
148
|
1
|
551
|
my ($self, %args) = @_; |
1315
|
148
|
|
50
|
|
|
312
|
my $already_claimed = $args{'already_claimed'} // {}; |
1316
|
148
|
|
50
|
|
|
294
|
my $possible_tokens = $args{'possible_tokens'} // return; |
1317
|
148
|
|
100
|
|
|
296
|
my $hint = $args{'heuristic'} // ''; |
1318
|
148
|
|
50
|
|
|
356
|
my $date_part = $args{'value'} // return; |
1319
|
148
|
|
33
|
|
|
360
|
my $date_string = $args{'date_string'} // $date_part; |
1320
|
|
|
|
|
|
|
|
1321
|
148
|
|
|
|
|
230
|
foreach my $token (@$possible_tokens) { |
1322
|
379
|
100
|
|
|
|
768
|
if ($token eq 'day') { |
1323
|
54
|
100
|
|
|
|
110
|
next if defined($already_claimed->{'day'}); |
1324
|
40
|
50
|
|
|
|
381
|
next if ($date_part !~ /^$TOKENS->{$token}->{'regex'}$/); |
1325
|
40
|
100
|
|
|
|
114
|
if ($date_part > 31) { |
1326
|
8
|
|
|
|
|
136
|
warn "Error parsing day: " |
1327
|
|
|
|
|
|
|
. "value '$date_part' out of range ($date_string); " |
1328
|
|
|
|
|
|
|
. "Perhaps you need a different heuristic hint than '$hint'\n"; |
1329
|
8
|
|
|
|
|
58
|
return; |
1330
|
|
|
|
|
|
|
} |
1331
|
32
|
|
|
|
|
118
|
return $token; |
1332
|
|
|
|
|
|
|
} |
1333
|
325
|
100
|
|
|
|
491
|
if ($token eq 'month') { |
1334
|
66
|
100
|
|
|
|
192
|
next if defined($already_claimed->{'month'}); |
1335
|
48
|
100
|
|
|
|
95
|
next if defined($already_claimed->{'month_abbr'}); |
1336
|
35
|
100
|
|
|
|
102
|
next if defined($already_claimed->{'month_name'}); |
1337
|
30
|
50
|
|
|
|
290
|
next if ($date_part !~ /^$TOKENS->{$token}->{'regex'}$/); |
1338
|
30
|
100
|
|
|
|
110
|
if ($date_part > 12) { |
1339
|
4
|
|
|
|
|
101
|
warn "Error parsing month: " |
1340
|
|
|
|
|
|
|
. "value '$date_part' out of range ($date_string); " |
1341
|
|
|
|
|
|
|
. "Perhaps you need a different heuristic hint than '$hint'\n"; |
1342
|
4
|
|
|
|
|
34
|
return; |
1343
|
|
|
|
|
|
|
} |
1344
|
26
|
|
|
|
|
92
|
return $token; |
1345
|
|
|
|
|
|
|
} |
1346
|
259
|
100
|
100
|
|
|
761
|
if ($token eq 'year' || $token eq 'year_abbr') { |
1347
|
133
|
100
|
|
|
|
264
|
next if defined($already_claimed->{'year'}); |
1348
|
97
|
100
|
|
|
|
242
|
next if defined($already_claimed->{'year_abbr'}); |
1349
|
69
|
100
|
|
|
|
1341
|
next if ($date_part !~ /^$TOKENS->{$token}->{'regex'}$/); |
1350
|
47
|
|
|
|
|
257
|
return $token; |
1351
|
|
|
|
|
|
|
} |
1352
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
# Any other type of token does not need special handling. |
1354
|
126
|
50
|
|
|
|
224
|
next if defined($already_claimed->{$token}); |
1355
|
126
|
100
|
|
|
|
5455
|
next if ($date_part !~ /^$TOKENS->{$token}->{'regex'}$/); |
1356
|
29
|
|
|
|
|
168
|
return $token; |
1357
|
|
|
|
|
|
|
} |
1358
|
|
|
|
|
|
|
|
1359
|
2
|
50
|
|
|
|
6
|
if ($hint) { |
1360
|
0
|
|
|
|
|
0
|
warn "Error parsing $possible_tokens->[0]: " |
1361
|
|
|
|
|
|
|
. "elements out of order ($date_string); " |
1362
|
|
|
|
|
|
|
. "Perhaps you need a different heuristic hint than '$hint'\n"; |
1363
|
|
|
|
|
|
|
} |
1364
|
|
|
|
|
|
|
|
1365
|
2
|
|
|
|
|
8
|
return; |
1366
|
|
|
|
|
|
|
} |
1367
|
|
|
|
|
|
|
|
1368
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
=item add_parser() |
1370
|
|
|
|
|
|
|
|
1371
|
|
|
|
|
|
|
=cut |
1372
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
sub add_parser { |
1374
|
242
|
|
|
242
|
1
|
275
|
my ($self, $parser) = @_; |
1375
|
242
|
|
|
|
|
204
|
my $count = push @{ $self->{'active_parsers'} }, $parser; |
|
242
|
|
|
|
|
616
|
|
1376
|
242
|
50
|
|
|
|
607
|
return $count ? 1 : 0; |
1377
|
|
|
|
|
|
|
} |
1378
|
|
|
|
|
|
|
|
1379
|
|
|
|
|
|
|
=item add_formatter() |
1380
|
|
|
|
|
|
|
|
1381
|
|
|
|
|
|
|
=cut |
1382
|
|
|
|
|
|
|
|
1383
|
|
|
|
|
|
|
sub add_formatter { |
1384
|
24
|
|
|
24
|
1
|
28
|
my ($self, $formatter) = @_; |
1385
|
24
|
|
|
|
|
21
|
my $count = push @{ $self->{'active_formatters'} }, $formatter; |
|
24
|
|
|
|
|
63
|
|
1386
|
24
|
50
|
|
|
|
65
|
return $count ? 1 : 0; |
1387
|
|
|
|
|
|
|
} |
1388
|
|
|
|
|
|
|
|
1389
|
|
|
|
|
|
|
=item parse_date() |
1390
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
=cut |
1392
|
|
|
|
|
|
|
|
1393
|
|
|
|
|
|
|
sub parse_date { |
1394
|
146
|
|
|
146
|
1
|
2336
|
my ($self, $date_string) = @_; |
1395
|
146
|
|
|
|
|
162
|
foreach my $parser (@{ $self->{'active_parsers'} }) { |
|
146
|
|
|
|
|
242
|
|
1396
|
146
|
|
|
|
|
262
|
my $date = $parser->($date_string); |
1397
|
|
|
|
|
|
|
# TODO: Add formatting step here. |
1398
|
146
|
100
|
|
|
|
707
|
return $date if defined($date); |
1399
|
|
|
|
|
|
|
} |
1400
|
|
|
|
|
|
|
# None of the parsers were able to extract the date components. |
1401
|
36
|
|
|
|
|
98
|
return; |
1402
|
|
|
|
|
|
|
} |
1403
|
|
|
|
|
|
|
|
1404
|
|
|
|
|
|
|
=item format_date() |
1405
|
|
|
|
|
|
|
|
1406
|
|
|
|
|
|
|
=cut |
1407
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
sub format_date { |
1409
|
20
|
|
|
20
|
1
|
89
|
my ($self, $date) = @_; |
1410
|
20
|
|
|
|
|
35
|
my @formatted = ($date); |
1411
|
20
|
|
|
|
|
18
|
foreach my $formatter (@{ $self->{'active_formatters'} }) { |
|
20
|
|
|
|
|
36
|
|
1412
|
24
|
|
|
|
|
43
|
@formatted = $formatter->(@formatted); |
1413
|
|
|
|
|
|
|
} |
1414
|
20
|
50
|
|
|
|
85
|
return $formatted[0] if (scalar(@formatted) == 1); |
1415
|
0
|
|
|
|
|
0
|
return @formatted; |
1416
|
|
|
|
|
|
|
} |
1417
|
|
|
|
|
|
|
|
1418
|
|
|
|
|
|
|
=item reformat_date() |
1419
|
|
|
|
|
|
|
|
1420
|
|
|
|
|
|
|
=cut |
1421
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
sub reformat_date { |
1423
|
5
|
|
|
5
|
1
|
23
|
my ($self, $date_string) = @_; |
1424
|
5
|
|
|
|
|
12
|
my $date = $self->parse_date($date_string); |
1425
|
5
|
|
|
|
|
14
|
my @formatted = $self->format_date($date); |
1426
|
5
|
50
|
|
|
|
22
|
return $formatted[0] if (scalar(@formatted) == 1); |
1427
|
0
|
|
|
|
|
|
return @formatted; |
1428
|
|
|
|
|
|
|
}; |
1429
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
=back |
1431
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
=cut |
1433
|
|
|
|
|
|
|
|
1434
|
|
|
|
|
|
|
1; |
1435
|
|
|
|
|
|
|
__END__ |