line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
1
|
|
|
1
|
|
42401
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
30
|
|
2
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
41
|
|
3
|
|
|
|
|
|
|
package Date::Extract::Surprise; |
4
|
|
|
|
|
|
|
BEGIN { |
5
|
1
|
|
|
1
|
|
18
|
$Date::Extract::Surprise::VERSION = '0.006'; |
6
|
|
|
|
|
|
|
} |
7
|
|
|
|
|
|
|
BEGIN { |
8
|
1
|
|
|
1
|
|
18
|
$Date::Extract::Surprise::DIST = 'Date-Extract-Surprise'; |
9
|
|
|
|
|
|
|
} |
10
|
|
|
|
|
|
|
# ABSTRACT: extract probable dates from strings *with surprises* |
11
|
|
|
|
|
|
|
|
12
|
1
|
|
|
1
|
|
4
|
use Carp qw( croak ); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
47
|
|
13
|
1
|
|
|
1
|
|
4
|
use Scalar::Util qw( blessed ); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
95
|
|
14
|
|
|
|
|
|
|
# just trying to be helpful. |
15
|
|
|
|
|
|
|
use Exporter::Easy ( |
16
|
1
|
|
|
|
|
5
|
OK => [qw( extract_datetimes )], |
17
|
1
|
|
|
1
|
|
750
|
); |
|
1
|
|
|
|
|
1362
|
|
18
|
|
|
|
|
|
|
|
19
|
1
|
|
|
1
|
|
1105
|
use DateTime::Format::Flexible qw(); |
|
1
|
|
|
|
|
278705
|
|
|
1
|
|
|
|
|
974
|
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
sub new { |
23
|
5
|
|
|
5
|
1
|
30
|
my $class = shift; |
24
|
5
|
|
|
|
|
28
|
my $self = |
25
|
|
|
|
|
|
|
bless { |
26
|
|
|
|
|
|
|
DEBUG => 0, |
27
|
|
|
|
|
|
|
@_, |
28
|
|
|
|
|
|
|
}, |
29
|
|
|
|
|
|
|
$class; |
30
|
|
|
|
|
|
|
|
31
|
5
|
|
|
|
|
13
|
return $self; |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
sub extract { |
37
|
6
|
50
|
|
6
|
1
|
2208
|
return unless @_; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# can be called as an object method, class method, or function |
40
|
|
|
|
|
|
|
# there's probably better ways to support this. |
41
|
6
|
50
|
66
|
|
|
87
|
my $self = blessed( $_[0] ) && $_[0]->isa( __PACKAGE__ ) ? shift |
|
|
100
|
|
|
|
|
|
42
|
|
|
|
|
|
|
: $_[0] eq __PACKAGE__ ? shift->new() |
43
|
|
|
|
|
|
|
: croak "Please call as a class or object method!\n"; |
44
|
|
|
|
|
|
|
|
45
|
6
|
|
|
|
|
14
|
my $text = shift; |
46
|
|
|
|
|
|
|
|
47
|
6
|
|
|
|
|
13
|
my %args = @_; |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# set a base date for ambiguous DTs we find, default to epoch. |
50
|
|
|
|
|
|
|
# if a string value is passed and can't be parsed, croak. |
51
|
6
|
50
|
33
|
|
|
89
|
my $base = blessed( $args{base} ) && $args{base}->isa( 'DateTime' ) ? delete $args{base} |
|
|
50
|
|
|
|
|
|
52
|
|
|
|
|
|
|
: defined $args{base} ? DateTime::Format::Flexible->parse_datetime( $args{base} ) |
53
|
|
|
|
|
|
|
: DateTime->new( year => 1970, month => 1, day => 1 ); |
54
|
|
|
|
|
|
|
|
55
|
6
|
|
|
|
|
1647
|
my @timestamps; # populate this |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# there's no immediate need to split into lines, but it should make |
58
|
|
|
|
|
|
|
# some future features easier (like reporting which lines matched) |
59
|
6
|
|
|
|
|
28
|
for my $line ( split /[\n\r]+/, $text ) { |
60
|
|
|
|
|
|
|
|
61
|
6
|
50
|
|
|
|
25
|
warn " {$line}\n" if $self->{DEBUG} > 0; |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# split it into terms and remove chars that may trip us up |
64
|
6
|
|
|
|
|
30
|
my @terms = map { (my $s = $_) =~ s/[,]/ /g; $s } split q[ ], $line; |
|
69
|
|
|
|
|
106
|
|
|
69
|
|
|
|
|
125
|
|
65
|
|
|
|
|
|
|
|
66
|
6
|
|
|
|
|
27
|
for my $i ( 0 .. $#terms ) { |
67
|
69
|
|
|
|
|
405092
|
for my $j ( $i .. $#terms ) { |
68
|
450
|
|
|
|
|
1559841
|
my $search_str = join ' ', @terms[$i .. $j]; |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# clean up other crap that DT::F::F chokes on? |
71
|
450
|
|
|
|
|
1604
|
$search_str =~ s/at//ig; |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# clean up whitespace |
74
|
450
|
|
|
|
|
2159
|
$search_str =~ s/(\s){2,}/$1/g; |
75
|
450
|
|
|
|
|
2822
|
$search_str =~ s/^\s+|\s+$//g; |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# it almost certainly has some *numbers* in it! |
78
|
450
|
100
|
|
|
|
1852
|
next unless $search_str =~ /\d/; |
79
|
|
|
|
|
|
|
|
80
|
276
|
50
|
|
|
|
1044
|
warn " {$search_str}\n" if $self->{DEBUG} > 1; |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# if we can't determine the *date*, assume epoch |
83
|
276
|
|
|
|
|
1242
|
DateTime::Format::Flexible->base( $base ); |
84
|
|
|
|
|
|
|
|
85
|
276
|
100
|
|
|
|
13229
|
next unless my $dt = eval { |
86
|
276
|
|
|
|
|
1184
|
DateTime::Format::Flexible->parse_datetime( $search_str ); |
87
|
|
|
|
|
|
|
}; |
88
|
|
|
|
|
|
|
|
89
|
36
|
|
|
|
|
158103
|
push @timestamps, $dt; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
|
94
|
6
|
|
|
|
|
144
|
return @timestamps; |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub extract_datetimes { |
100
|
2
|
50
|
|
2
|
1
|
1428
|
return unless @_; |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
# can be called as an object method, class method, or function |
103
|
|
|
|
|
|
|
# there's probably better ways to support this. |
104
|
2
|
50
|
33
|
|
|
27
|
croak "This is a function. Use extract() if you need a method!\n" |
|
|
|
33
|
|
|
|
|
105
|
|
|
|
|
|
|
if ( blessed( $_[0] ) && $_[0]->isa( __PACKAGE__ ) ) or |
106
|
|
|
|
|
|
|
( $_[0] eq __PACKAGE__ ); |
107
|
|
|
|
|
|
|
|
108
|
2
|
|
|
|
|
15
|
my $self = __PACKAGE__->new(); |
109
|
|
|
|
|
|
|
|
110
|
2
|
|
|
|
|
7
|
return $self->extract( @_ ); |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
1 || q{life without coffee isn't worth living}; #truth |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=pod |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=head1 NAME |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
Date::Extract::Surprise - extract probable dates from strings *with surprises* |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=head1 VERSION |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
version 0.006 |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=head1 SYNOPSIS |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
use Date::Extract::Surprise; |
129
|
|
|
|
|
|
|
my $des = Date::Extract::Surprise->new(); |
130
|
|
|
|
|
|
|
my @datetimes = $des->extract( $arbitrary_text ); |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
# or... |
133
|
|
|
|
|
|
|
use Date::Extract::Surprise; |
134
|
|
|
|
|
|
|
my @datetimes = Date::Extract::Surprise->extract( $arbitrary_text ); |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
# or... |
137
|
|
|
|
|
|
|
use Date::Extract::Surprise qw( extract_datetimes ); |
138
|
|
|
|
|
|
|
my @datetimes = extract_datetimes( $arbitrary_text ); |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=head1 DESCRIPTION |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
This is modeled on Sartak's excellent L, a proven |
143
|
|
|
|
|
|
|
and capable module that you can use to extract references to dates |
144
|
|
|
|
|
|
|
and times from otherwise arbitrary text. For example: |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
"The package will be delivered at 3:15 PM, March 15, 2007, on the dot." |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
Upon parsing that, you should end up with a L object |
149
|
|
|
|
|
|
|
representing March 15, 2007 at 3:15PM in your timezone. |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
L is designed to try to minimize "false-positives" |
152
|
|
|
|
|
|
|
(ie. detecting things that *aren't* actually dates or times), but |
153
|
|
|
|
|
|
|
at the expense of potentially missing some dates. As its |
154
|
|
|
|
|
|
|
documentation states, "I welcome here.>" |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
Because I had the I need - to find dates in strings I
|
157
|
|
|
|
|
|
|
if some were going to be bogus>, I created L |
158
|
|
|
|
|
|
|
which will gladly detect anything that even I like |
159
|
|
|
|
|
|
|
it could be a date or time. |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
B at least I of the dates this will 'detect' in some |
162
|
|
|
|
|
|
|
text should be what you wanted. It's up to you to figure out which one |
163
|
|
|
|
|
|
|
that is! :-) |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=head1 METHODS |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
=head2 new |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
Just your basic object constructor. |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
my $des = Date::Extract::Surprise->new(); |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
Currently takes only one argument: |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
=head2 extract |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
This is designed to (more or less) mirror the interface of |
178
|
|
|
|
|
|
|
Date::Extract->extract(). However, at this time, it supports |
179
|
|
|
|
|
|
|
almost none of its namesake's extra options, and adds one |
180
|
|
|
|
|
|
|
additional option. |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
This can be called as either a class method or as a method on |
183
|
|
|
|
|
|
|
an object, as seen in the L. |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=head1 FUNCTIONS |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=head2 extract_datetimes |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
If you're old-skool and prefer things to export a function, you can have it. |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
It takes the same arguments as the L method and returns the same values. |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=for :list = DEBUG |
194
|
|
|
|
|
|
|
integer greater than 0 for debugging level. higher numbers |
195
|
|
|
|
|
|
|
give more detail |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
It will probably take more in the future. |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
=head1 SEE ALSO |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
=for :list * Date::Extract |
202
|
|
|
|
|
|
|
* DateTime::Format::Flexible |
203
|
|
|
|
|
|
|
* Time::ParseDate |
204
|
|
|
|
|
|
|
* Date::Manip |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=head1 NOTES |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
Yes, this code is slow and dumb, but it helped me solve a problem and |
209
|
|
|
|
|
|
|
I hope it may help others, too. Let me know if you need anything changed! |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
I'm hoping this will work on perl 5.6 and before, because I want |
212
|
|
|
|
|
|
|
to be helpful to as many people as possible, but I am too lazy |
213
|
|
|
|
|
|
|
to test it myself. Bug reports and/or patches please! |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=head1 TODO |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=for :list * more test strings in the tests |
218
|
|
|
|
|
|
|
* support more options from L |
219
|
|
|
|
|
|
|
* more rigorous tests beyond basic functionality |
220
|
|
|
|
|
|
|
* eat a sandwich |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
=head1 AUTHOR |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
Stephen R. Scaffidi |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
This software is copyright (c) 2010 by Stephen R. Scaffidi. |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
231
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=cut |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
__END__ |