| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Date::Find 0.03; |
|
2
|
2
|
|
|
2
|
|
224891
|
use 5.020; |
|
|
2
|
|
|
|
|
8
|
|
|
3
|
2
|
|
|
2
|
|
1025
|
use experimental 'signatures'; |
|
|
2
|
|
|
|
|
4950
|
|
|
|
2
|
|
|
|
|
13
|
|
|
4
|
|
|
|
|
|
|
|
|
5
|
2
|
|
|
2
|
|
1085
|
use utf8; # we store month names in this file |
|
|
2
|
|
|
|
|
406
|
|
|
|
2
|
|
|
|
|
12
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
2
|
|
|
2
|
|
98
|
use Exporter 'import'; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
76
|
|
|
8
|
2
|
|
|
2
|
|
9
|
use Carp 'croak'; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
6413
|
|
|
9
|
|
|
|
|
|
|
our @EXPORT_OK = qw(find_ymd find_all_ymd guess_ymd |
|
10
|
|
|
|
|
|
|
%date_type %longname |
|
11
|
|
|
|
|
|
|
); |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 NAME |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
Date::Find - find year, month, day from (filename) strings |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
use 5.020; |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
my $info = guess_ymd('statement_20221201.pdf'); |
|
22
|
|
|
|
|
|
|
say "$info->{value} - $info->{year} - $info->{month} - $info->{day}"; |
|
23
|
|
|
|
|
|
|
# statement_20221201.pdf - 2022 - 12 - 01 |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
my @dates = guess_ymd(['statement_20221201.pdf', |
|
26
|
|
|
|
|
|
|
'statement_02.12.2022.pdf', |
|
27
|
|
|
|
|
|
|
'random.pdf', |
|
28
|
|
|
|
|
|
|
], components => 'ym'); |
|
29
|
|
|
|
|
|
|
for my $info (@dates) { |
|
30
|
|
|
|
|
|
|
say "$info->{value} - $info->{year} - $info->{month} - $info->{day}"; |
|
31
|
|
|
|
|
|
|
} |
|
32
|
|
|
|
|
|
|
# statement_20221201.pdf - 2022 - 12 - 00 |
|
33
|
|
|
|
|
|
|
# statement_02.12.2022.pdf - 2022 - 12 - 00 |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
my @dates = guess_ymd(['statement_20221201.pdf', |
|
36
|
|
|
|
|
|
|
'statement_02.12.2022.pdf', |
|
37
|
|
|
|
|
|
|
'random.pdf', |
|
38
|
|
|
|
|
|
|
], components => 'ym', mode => 'strict'); |
|
39
|
|
|
|
|
|
|
for my $info (@dates) { |
|
40
|
|
|
|
|
|
|
say "$info->{value} - $info->{year} - $info->{month} - $info->{day}"; |
|
41
|
|
|
|
|
|
|
} |
|
42
|
|
|
|
|
|
|
# statement_20221201.pdf - 2022 - 12 - 00 |
|
43
|
|
|
|
|
|
|
# statement_02.12.2022.pdf - 2022 - 12 - 00 |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=cut |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
our %month_names = ( |
|
48
|
|
|
|
|
|
|
# English |
|
49
|
|
|
|
|
|
|
'january' => 1, |
|
50
|
|
|
|
|
|
|
'february' => 2, |
|
51
|
|
|
|
|
|
|
'march' => 3, |
|
52
|
|
|
|
|
|
|
'april' => 4, |
|
53
|
|
|
|
|
|
|
'may' => 5, |
|
54
|
|
|
|
|
|
|
'june' => 6, |
|
55
|
|
|
|
|
|
|
'july' => 7, |
|
56
|
|
|
|
|
|
|
'august' => 8, |
|
57
|
|
|
|
|
|
|
'september' => 9, |
|
58
|
|
|
|
|
|
|
'october' => 10, |
|
59
|
|
|
|
|
|
|
'november' => 11, |
|
60
|
|
|
|
|
|
|
'december' => 12, |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
# German |
|
63
|
|
|
|
|
|
|
'januar' => 1, |
|
64
|
|
|
|
|
|
|
'februar' => 2, |
|
65
|
|
|
|
|
|
|
'maerz' => 3, |
|
66
|
|
|
|
|
|
|
'märz' => 3, |
|
67
|
|
|
|
|
|
|
'april' => 4, |
|
68
|
|
|
|
|
|
|
'mai' => 5, |
|
69
|
|
|
|
|
|
|
'juni' => 6, |
|
70
|
|
|
|
|
|
|
'juli' => 7, |
|
71
|
|
|
|
|
|
|
'august' => 8, |
|
72
|
|
|
|
|
|
|
'september' => 9, |
|
73
|
|
|
|
|
|
|
'oktober' => 10, |
|
74
|
|
|
|
|
|
|
'november' => 11, |
|
75
|
|
|
|
|
|
|
'dezember' => 12, |
|
76
|
|
|
|
|
|
|
); |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
our $dxy = |
|
79
|
|
|
|
|
|
|
qr/(?[12]\d|3[01]|0?\d)(?:[.]?)\s*/ |
|
80
|
|
|
|
|
|
|
. "(?(?i)" |
|
81
|
|
|
|
|
|
|
. join( "|", |
|
82
|
|
|
|
|
|
|
map { /^(...)(.*)$/; $2 ? "$1($2)?" : $1 } reverse sort keys %month_names) |
|
83
|
|
|
|
|
|
|
. ")" |
|
84
|
|
|
|
|
|
|
. qr/\s+(?(?:20)\d\d)\b/ |
|
85
|
|
|
|
|
|
|
; |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# Februar 27, 2023 |
|
88
|
|
|
|
|
|
|
our $xdy = |
|
89
|
|
|
|
|
|
|
"(?(?i)" |
|
90
|
|
|
|
|
|
|
. join( "|", |
|
91
|
|
|
|
|
|
|
map { /^(...)(.*)$/; $2 ? "$1($2)?" : $1 } reverse sort keys %month_names) |
|
92
|
|
|
|
|
|
|
. ")" |
|
93
|
|
|
|
|
|
|
. qr/\s*(?[12]\d|3[01]|0?\d)(?:[.,]?)\s*/ |
|
94
|
|
|
|
|
|
|
. qr/\s+(?(?:20)\d\d)\b/ |
|
95
|
|
|
|
|
|
|
; |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
our %date_type = ( |
|
98
|
|
|
|
|
|
|
ymd => qr/(?(?:20)\d\d)([-.]?)(?0\d|1[012])(\2)(?[012]\d|3[01])/, |
|
99
|
|
|
|
|
|
|
dmy => qr/(?[012]\d|3[01])([-.]?)(?0\d|1[012])(\2)(?(?:20)\d\d)/, |
|
100
|
|
|
|
|
|
|
dxy => $dxy, |
|
101
|
|
|
|
|
|
|
xdy => $xdy, |
|
102
|
|
|
|
|
|
|
ym => qr/(?(?:20)\d\d)([-.]?)(?0\d|1[012])/, |
|
103
|
|
|
|
|
|
|
my => qr/(?0\d|1[012])([-.]?)(?(?:20)\d\d)/, |
|
104
|
|
|
|
|
|
|
y => [qr/\D(?(?:20)\d\d)\D/, qr/(?(?:20)\d\d)/], |
|
105
|
|
|
|
|
|
|
); |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
our @default_preference = sort { length $b <=> length $a || $b cmp $a } keys %date_type; |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# Should we also support hour, minute, second?! |
|
111
|
|
|
|
|
|
|
our %longname = ( |
|
112
|
|
|
|
|
|
|
'y' => 'year', |
|
113
|
|
|
|
|
|
|
'm' => 'month', |
|
114
|
|
|
|
|
|
|
'x' => 'monthname', |
|
115
|
|
|
|
|
|
|
'd' => 'day', |
|
116
|
|
|
|
|
|
|
); |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=head2 C<< find_ymd >> |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=cut |
|
122
|
|
|
|
|
|
|
|
|
123
|
102
|
|
|
102
|
1
|
160
|
sub find_ymd( $date_regex, $source, $date_regex_order='ymd' ) { |
|
|
102
|
|
|
|
|
203
|
|
|
|
102
|
|
|
|
|
146
|
|
|
|
102
|
|
|
|
|
166
|
|
|
|
102
|
|
|
|
|
134
|
|
|
124
|
102
|
100
|
|
|
|
7089
|
if( $source !~ /$date_regex/ ) { |
|
125
|
66
|
|
|
|
|
308
|
return; |
|
126
|
|
|
|
|
|
|
} |
|
127
|
|
|
|
|
|
|
|
|
128
|
36
|
|
|
|
|
136
|
my %ymd; |
|
129
|
36
|
50
|
|
|
|
212
|
if( keys %- ) { # we have named captures |
|
130
|
36
|
|
|
|
|
125
|
for (keys %longname) { |
|
131
|
144
|
|
66
|
|
|
1225
|
$ymd{ $longname{ $_ } } //= $+{ $longname{ $_ }} // $+{ $_ }; |
|
|
|
|
66
|
|
|
|
|
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
} else { |
|
134
|
0
|
|
|
|
|
0
|
my @ymd = split //, $date_regex_order; |
|
135
|
0
|
|
|
|
|
0
|
for my $i (0..$#ymd) { |
|
136
|
0
|
|
0
|
|
|
0
|
$ymd{ $longname{ $ymd[$i] }} //= substr( $source, $-[$i+1], $+[$i+1] ); |
|
137
|
|
|
|
|
|
|
} |
|
138
|
|
|
|
|
|
|
}; |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# map month names to month numbers |
|
141
|
36
|
100
|
|
|
|
130
|
if( $ymd{monthname} ) { |
|
142
|
|
|
|
|
|
|
|
|
143
|
2
|
50
|
|
|
|
11
|
if( ! exists $month_names{ lc $ymd{ monthname }}) { |
|
144
|
0
|
|
|
|
|
0
|
die "Whoops unknown month '$ymd{ monthname }'"; |
|
145
|
|
|
|
|
|
|
}; |
|
146
|
|
|
|
|
|
|
|
|
147
|
2
|
|
|
|
|
7
|
$ymd{month} = $month_names{ lc( delete $ymd{ monthname })}; |
|
148
|
|
|
|
|
|
|
} else { |
|
149
|
34
|
|
|
|
|
66
|
delete $ymd{ monthname }; |
|
150
|
|
|
|
|
|
|
} |
|
151
|
|
|
|
|
|
|
|
|
152
|
36
|
50
|
|
|
|
142
|
$ymd{ year } += 2000 if $ymd{ year } < 100; |
|
153
|
36
|
|
|
|
|
87
|
for my $n ( values %longname ) { |
|
154
|
144
|
100
|
|
|
|
306
|
next if $n eq 'monthname'; |
|
155
|
|
|
|
|
|
|
|
|
156
|
108
|
|
|
|
|
360
|
$ymd{ $n } = sprintf '%02d', $ymd{ $n }; |
|
157
|
|
|
|
|
|
|
}; |
|
158
|
|
|
|
|
|
|
|
|
159
|
36
|
|
|
|
|
56
|
delete $ymd{ monthname }; |
|
160
|
|
|
|
|
|
|
|
|
161
|
36
|
|
|
|
|
98
|
return \%ymd; |
|
162
|
|
|
|
|
|
|
} |
|
163
|
|
|
|
|
|
|
|
|
164
|
13
|
|
|
13
|
0
|
229303
|
sub find_all_ymd( $source, %options ) { |
|
|
13
|
|
|
|
|
28
|
|
|
|
13
|
|
|
|
|
21
|
|
|
|
13
|
|
|
|
|
21
|
|
|
165
|
|
|
|
|
|
|
# $options{ preference } //= \@default_preference; |
|
166
|
13
|
|
|
|
|
21
|
my %res; |
|
167
|
13
|
|
|
|
|
86
|
for my $dt (sort keys %date_type) { |
|
168
|
91
|
100
|
|
|
|
340
|
my @attempts = ref $date_type{ $dt } eq 'ARRAY' ? @{ $date_type{ $dt } } : $date_type{ $dt }; |
|
|
13
|
|
|
|
|
38
|
|
|
169
|
91
|
|
|
|
|
168
|
for my $candidate (@attempts) { |
|
170
|
102
|
|
|
|
|
218
|
my $r = find_ymd( $candidate, $source ); |
|
171
|
102
|
100
|
|
|
|
327
|
if( $r ) { |
|
172
|
36
|
|
|
|
|
92
|
$res{ $dt } = $r; |
|
173
|
|
|
|
|
|
|
last |
|
174
|
36
|
|
|
|
|
94
|
} |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
} |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
#if( $options{ }) { |
|
179
|
|
|
|
|
|
|
#} |
|
180
|
13
|
|
|
|
|
82
|
return %res |
|
181
|
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
|
|
183
|
6
|
|
|
6
|
0
|
9
|
sub guess_date_format( $sources, %options ) { |
|
|
6
|
|
|
|
|
11
|
|
|
|
6
|
|
|
|
|
14
|
|
|
|
6
|
|
|
|
|
11
|
|
|
184
|
6
|
50
|
|
|
|
14
|
$sources = [$sources] unless ref $sources eq 'ARRAY'; |
|
185
|
6
|
|
|
|
|
11
|
my %res; |
|
186
|
6
|
|
|
|
|
16
|
for my $s (@$sources) { |
|
187
|
11
|
|
|
|
|
26
|
my %fmts = find_all_ymd( $s ); |
|
188
|
11
|
50
|
|
|
|
32
|
if( scalar keys %fmts ) { |
|
189
|
11
|
|
|
|
|
29
|
for (keys %fmts) { |
|
190
|
30
|
|
100
|
|
|
117
|
$res{ $_ } //= []; |
|
191
|
30
|
|
|
|
|
42
|
push @{$res{$_}}, { value => $s, %{$fmts{ $_ }} }; |
|
|
30
|
|
|
|
|
56
|
|
|
|
30
|
|
|
|
|
223
|
|
|
192
|
|
|
|
|
|
|
} |
|
193
|
|
|
|
|
|
|
} else { |
|
194
|
0
|
|
0
|
|
|
0
|
$res{ 'no_date' } //= []; |
|
195
|
0
|
|
|
|
|
0
|
push @{$res{$_}}, { value => $s, date => undef }; |
|
|
0
|
|
|
|
|
0
|
|
|
196
|
|
|
|
|
|
|
} |
|
197
|
|
|
|
|
|
|
} |
|
198
|
6
|
|
|
|
|
22
|
return \%res |
|
199
|
|
|
|
|
|
|
} |
|
200
|
|
|
|
|
|
|
|
|
201
|
6
|
|
|
6
|
0
|
14447
|
sub guess_ymd( $sources, %options ) { |
|
|
6
|
|
|
|
|
14
|
|
|
|
6
|
|
|
|
|
12
|
|
|
|
6
|
|
|
|
|
11
|
|
|
202
|
6
|
|
50
|
|
|
99
|
$options{ mode } //= 'lax'; |
|
203
|
6
|
|
50
|
|
|
33
|
$options{ preference } //= \@default_preference; |
|
204
|
|
|
|
|
|
|
|
|
205
|
6
|
50
|
|
|
|
18
|
croak "Need an array of filenames" |
|
206
|
|
|
|
|
|
|
unless defined $sources; |
|
207
|
6
|
50
|
|
|
|
21
|
$sources = [$sources] unless ref $sources eq 'ARRAY'; |
|
208
|
6
|
|
|
|
|
21
|
my $values = guess_date_format( $sources, %options ); |
|
209
|
|
|
|
|
|
|
|
|
210
|
6
|
|
|
|
|
19
|
my $fmt; |
|
211
|
6
|
0
|
33
|
|
|
22
|
if( $values->{no_date} and @{ $values->{no_date}} and $options{ mode } eq 'strict') { |
|
|
0
|
|
33
|
|
|
0
|
|
|
212
|
|
|
|
|
|
|
# Maybe we don't want croak?! |
|
213
|
0
|
|
|
|
|
0
|
croak "Entries without a date found: " . join " ", @{ $values->{no_date} }; |
|
|
0
|
|
|
|
|
0
|
|
|
214
|
|
|
|
|
|
|
}; |
|
215
|
6
|
|
|
|
|
10
|
delete $values->{no_date}; # we can't do anything about them |
|
216
|
|
|
|
|
|
|
|
|
217
|
6
|
50
|
|
|
|
27
|
if( scalar keys %$values == 1 ) { |
|
|
|
100
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
# Only one kind of format, so we use that |
|
219
|
0
|
|
|
|
|
0
|
($fmt) = keys %$values; |
|
220
|
|
|
|
|
|
|
} elsif( $options{ components }) { |
|
221
|
|
|
|
|
|
|
# Find all entries that have the wanted components and be done with it |
|
222
|
4
|
|
|
|
|
23
|
my $s = join "", sort split //, $options{ components }; |
|
223
|
|
|
|
|
|
|
|
|
224
|
4
|
|
|
|
|
9
|
my %res; |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
# First, fill in from what the user specified, then fill in the remaining |
|
227
|
|
|
|
|
|
|
# components from other guesses |
|
228
|
4
|
100
|
|
|
|
15
|
if( $values->{ $options{ components }}) { |
|
229
|
3
|
|
|
|
|
6
|
my $dt = $options{ components }; |
|
230
|
|
|
|
|
|
|
$res{ $_->{value} } //= $_ |
|
231
|
3
|
|
33
|
|
|
5
|
for @{$values->{$dt}}; |
|
|
3
|
|
|
|
|
25
|
|
|
232
|
|
|
|
|
|
|
} |
|
233
|
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
# Fill in the remaining stuff from other guesses |
|
235
|
4
|
50
|
|
|
|
22
|
for my $dt (sort { length $b <=> length $a || $a cmp $b } keys %$values) { |
|
|
12
|
|
|
|
|
36
|
|
|
236
|
14
|
|
|
|
|
77
|
my $comp = join "", sort split //, $dt; |
|
237
|
14
|
100
|
|
|
|
139
|
if( $comp =~ /$s/ ) { |
|
238
|
|
|
|
|
|
|
$res{ $_->{value} } //= $_ |
|
239
|
10
|
|
66
|
|
|
16
|
for @{$values->{$dt}}; |
|
|
10
|
|
|
|
|
55
|
|
|
240
|
|
|
|
|
|
|
} |
|
241
|
|
|
|
|
|
|
} |
|
242
|
4
|
50
|
|
|
|
13
|
my @res = map { $res{ $_ } ? $res{ $_ } : () } @$sources; |
|
|
8
|
|
|
|
|
30
|
|
|
243
|
4
|
50
|
|
|
|
50
|
return wantarray ? @res : $res[0]; |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
} else { |
|
246
|
2
|
|
|
|
|
12
|
(my $max) = sort { @$b <=> @$a } values %$values; |
|
|
6
|
|
|
|
|
15
|
|
|
247
|
2
|
|
|
|
|
3
|
$max = @$max; |
|
248
|
|
|
|
|
|
|
|
|
249
|
2
|
|
|
|
|
6
|
my %mode = map { $_ => 1 } grep { @{$values->{$_}} == $max } keys %$values; |
|
|
6
|
|
|
|
|
15
|
|
|
|
6
|
|
|
|
|
10
|
|
|
|
6
|
|
|
|
|
15
|
|
|
250
|
2
|
|
|
|
|
5
|
my @fmt = grep { $mode{ $_ }} @{ $options{ preference }}; |
|
|
14
|
|
|
|
|
28
|
|
|
|
2
|
|
|
|
|
5
|
|
|
251
|
2
|
50
|
33
|
|
|
16
|
if( scalar keys %mode != 1 && $options{mode} eq 'strict') { |
|
252
|
0
|
|
|
|
|
0
|
croak "Multiple possibilities found, specify one: " . join ",", keys %mode; |
|
253
|
|
|
|
|
|
|
} else { |
|
254
|
2
|
|
|
|
|
8
|
$fmt = $fmt[0] |
|
255
|
|
|
|
|
|
|
} |
|
256
|
|
|
|
|
|
|
} |
|
257
|
2
|
50
|
|
|
|
5
|
return wantarray ? @{ $values->{$fmt} } : $values->{$fmt}->[0]; |
|
|
2
|
|
|
|
|
19
|
|
|
258
|
|
|
|
|
|
|
} |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
1; |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
L - extract dates from more arbitrary text |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
L - extract date and time from filenames, with timezone |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=cut |