| blib/lib/Date/Extract/Surprise.pm | |||
|---|---|---|---|
| Criterion | Covered | Total | % |
| statement | 51 | 51 | 100.0 |
| branch | 14 | 22 | 63.6 |
| condition | 5 | 12 | 41.6 |
| subroutine | 11 | 11 | 100.0 |
| pod | 3 | 3 | 100.0 |
| total | 84 | 99 | 84.8 |
| line | stmt | bran | cond | sub | pod | time | code |
|---|---|---|---|---|---|---|---|
| 1 | 1 | 1 | 42401 | use strict; | |||
| 1 | 3 | ||||||
| 1 | 30 | ||||||
| 2 | 1 | 1 | 5 | use warnings; | |||
| 1 | 2 | ||||||
| 1 | 41 | ||||||
| 3 | package Date::Extract::Surprise; | ||||||
| 4 | BEGIN { | ||||||
| 5 | 1 | 1 | 18 | $Date::Extract::Surprise::VERSION = '0.006'; | |||
| 6 | } | ||||||
| 7 | BEGIN { | ||||||
| 8 | 1 | 1 | 18 | $Date::Extract::Surprise::DIST = 'Date-Extract-Surprise'; | |||
| 9 | } | ||||||
| 10 | # ABSTRACT: extract probable dates from strings *with surprises* | ||||||
| 11 | |||||||
| 12 | 1 | 1 | 4 | use Carp qw( croak ); | |||
| 1 | 2 | ||||||
| 1 | 47 | ||||||
| 13 | 1 | 1 | 4 | use Scalar::Util qw( blessed ); | |||
| 1 | 2 | ||||||
| 1 | 95 | ||||||
| 14 | # just trying to be helpful. | ||||||
| 15 | use Exporter::Easy ( | ||||||
| 16 | 1 | 5 | OK => [qw( extract_datetimes )], | ||||
| 17 | 1 | 1 | 750 | ); | |||
| 1 | 1362 | ||||||
| 18 | |||||||
| 19 | 1 | 1 | 1105 | use DateTime::Format::Flexible qw(); | |||
| 1 | 278705 | ||||||
| 1 | 974 | ||||||
| 20 | |||||||
| 21 | |||||||
| 22 | sub new { | ||||||
| 23 | 5 | 5 | 1 | 30 | my $class = shift; | ||
| 24 | 5 | 28 | my $self = | ||||
| 25 | bless { | ||||||
| 26 | DEBUG => 0, | ||||||
| 27 | @_, | ||||||
| 28 | }, | ||||||
| 29 | $class; | ||||||
| 30 | |||||||
| 31 | 5 | 13 | return $self; | ||||
| 32 | } | ||||||
| 33 | |||||||
| 34 | |||||||
| 35 | |||||||
| 36 | sub extract { | ||||||
| 37 | 6 | 50 | 6 | 1 | 2208 | return unless @_; | |
| 38 | |||||||
| 39 | # can be called as an object method, class method, or function | ||||||
| 40 | # there's probably better ways to support this. | ||||||
| 41 | 6 | 50 | 66 | 87 | my $self = blessed( $_[0] ) && $_[0]->isa( __PACKAGE__ ) ? shift | ||
| 100 | |||||||
| 42 | : $_[0] eq __PACKAGE__ ? shift->new() | ||||||
| 43 | : croak "Please call as a class or object method!\n"; | ||||||
| 44 | |||||||
| 45 | 6 | 14 | my $text = shift; | ||||
| 46 | |||||||
| 47 | 6 | 13 | my %args = @_; | ||||
| 48 | |||||||
| 49 | # set a base date for ambiguous DTs we find, default to epoch. | ||||||
| 50 | # if a string value is passed and can't be parsed, croak. | ||||||
| 51 | 6 | 50 | 33 | 89 | my $base = blessed( $args{base} ) && $args{base}->isa( 'DateTime' ) ? delete $args{base} | ||
| 50 | |||||||
| 52 | : defined $args{base} ? DateTime::Format::Flexible->parse_datetime( $args{base} ) | ||||||
| 53 | : DateTime->new( year => 1970, month => 1, day => 1 ); | ||||||
| 54 | |||||||
| 55 | 6 | 1647 | my @timestamps; # populate this | ||||
| 56 | |||||||
| 57 | # there's no immediate need to split into lines, but it should make | ||||||
| 58 | # some future features easier (like reporting which lines matched) | ||||||
| 59 | 6 | 28 | for my $line ( split /[\n\r]+/, $text ) { | ||||
| 60 | |||||||
| 61 | 6 | 50 | 25 | warn " {$line}\n" if $self->{DEBUG} > 0; | |||
| 62 | |||||||
| 63 | # split it into terms and remove chars that may trip us up | ||||||
| 64 | 6 | 30 | my @terms = map { (my $s = $_) =~ s/[,]/ /g; $s } split q[ ], $line; | ||||
| 69 | 106 | ||||||
| 69 | 125 | ||||||
| 65 | |||||||
| 66 | 6 | 27 | for my $i ( 0 .. $#terms ) { | ||||
| 67 | 69 | 405092 | for my $j ( $i .. $#terms ) { | ||||
| 68 | 450 | 1559841 | my $search_str = join ' ', @terms[$i .. $j]; | ||||
| 69 | |||||||
| 70 | # clean up other crap that DT::F::F chokes on? | ||||||
| 71 | 450 | 1604 | $search_str =~ s/at//ig; | ||||
| 72 | |||||||
| 73 | # clean up whitespace | ||||||
| 74 | 450 | 2159 | $search_str =~ s/(\s){2,}/$1/g; | ||||
| 75 | 450 | 2822 | $search_str =~ s/^\s+|\s+$//g; | ||||
| 76 | |||||||
| 77 | # it almost certainly has some *numbers* in it! | ||||||
| 78 | 450 | 100 | 1852 | next unless $search_str =~ /\d/; | |||
| 79 | |||||||
| 80 | 276 | 50 | 1044 | warn " {$search_str}\n" if $self->{DEBUG} > 1; | |||
| 81 | |||||||
| 82 | # if we can't determine the *date*, assume epoch | ||||||
| 83 | 276 | 1242 | DateTime::Format::Flexible->base( $base ); | ||||
| 84 | |||||||
| 85 | 276 | 100 | 13229 | next unless my $dt = eval { | |||
| 86 | 276 | 1184 | DateTime::Format::Flexible->parse_datetime( $search_str ); | ||||
| 87 | }; | ||||||
| 88 | |||||||
| 89 | 36 | 158103 | push @timestamps, $dt; | ||||
| 90 | } | ||||||
| 91 | } | ||||||
| 92 | } | ||||||
| 93 | |||||||
| 94 | 6 | 144 | return @timestamps; | ||||
| 95 | } | ||||||
| 96 | |||||||
| 97 | |||||||
| 98 | |||||||
| 99 | sub extract_datetimes { | ||||||
| 100 | 2 | 50 | 2 | 1 | 1428 | return unless @_; | |
| 101 | |||||||
| 102 | # can be called as an object method, class method, or function | ||||||
| 103 | # there's probably better ways to support this. | ||||||
| 104 | 2 | 50 | 33 | 27 | croak "This is a function. Use extract() if you need a method!\n" | ||
| 33 | |||||||
| 105 | if ( blessed( $_[0] ) && $_[0]->isa( __PACKAGE__ ) ) or | ||||||
| 106 | ( $_[0] eq __PACKAGE__ ); | ||||||
| 107 | |||||||
| 108 | 2 | 15 | my $self = __PACKAGE__->new(); | ||||
| 109 | |||||||
| 110 | 2 | 7 | return $self->extract( @_ ); | ||||
| 111 | } | ||||||
| 112 | |||||||
| 113 | 1 || q{life without coffee isn't worth living}; #truth | ||||||
| 114 | |||||||
| 115 | |||||||
| 116 | =pod | ||||||
| 117 | |||||||
| 118 | =head1 NAME | ||||||
| 119 | |||||||
| 120 | Date::Extract::Surprise - extract probable dates from strings *with surprises* | ||||||
| 121 | |||||||
| 122 | =head1 VERSION | ||||||
| 123 | |||||||
| 124 | version 0.006 | ||||||
| 125 | |||||||
| 126 | =head1 SYNOPSIS | ||||||
| 127 | |||||||
| 128 | use Date::Extract::Surprise; | ||||||
| 129 | my $des = Date::Extract::Surprise->new(); | ||||||
| 130 | my @datetimes = $des->extract( $arbitrary_text ); | ||||||
| 131 | |||||||
| 132 | # or... | ||||||
| 133 | use Date::Extract::Surprise; | ||||||
| 134 | my @datetimes = Date::Extract::Surprise->extract( $arbitrary_text ); | ||||||
| 135 | |||||||
| 136 | # or... | ||||||
| 137 | use Date::Extract::Surprise qw( extract_datetimes ); | ||||||
| 138 | my @datetimes = extract_datetimes( $arbitrary_text ); | ||||||
| 139 | |||||||
| 140 | =head1 DESCRIPTION | ||||||
| 141 | |||||||
| 142 | This is modeled on Sartak's excellent L |
||||||
| 143 | and capable module that you can use to extract references to dates | ||||||
| 144 | and times from otherwise arbitrary text. For example: | ||||||
| 145 | |||||||
| 146 | "The package will be delivered at 3:15 PM, March 15, 2007, on the dot." | ||||||
| 147 | |||||||
| 148 | Upon parsing that, you should end up with a L |
||||||
| 149 | representing March 15, 2007 at 3:15PM in your timezone. | ||||||
| 150 | |||||||
| 151 | L |
||||||
| 152 | (ie. detecting things that *aren't* actually dates or times), but | ||||||
| 153 | at the expense of potentially missing some dates. As its | ||||||
| 154 | documentation states, "I |
||||||
| 155 | |||||||
| 156 | Because I had the I | ||||||
| 157 | if some were going to be bogus>, I created L |
||||||
| 158 | which will gladly detect anything that even I |
||||||
| 159 | it could be a date or time. | ||||||
| 160 | |||||||
| 161 | B |
||||||
| 162 | text should be what you wanted. It's up to you to figure out which one | ||||||
| 163 | that is! :-) | ||||||
| 164 | |||||||
| 165 | =head1 METHODS | ||||||
| 166 | |||||||
| 167 | =head2 new | ||||||
| 168 | |||||||
| 169 | Just your basic object constructor. | ||||||
| 170 | |||||||
| 171 | my $des = Date::Extract::Surprise->new(); | ||||||
| 172 | |||||||
| 173 | Currently takes only one argument: | ||||||
| 174 | |||||||
| 175 | =head2 extract | ||||||
| 176 | |||||||
| 177 | This is designed to (more or less) mirror the interface of | ||||||
| 178 | Date::Extract->extract(). However, at this time, it supports | ||||||
| 179 | almost none of its namesake's extra options, and adds one | ||||||
| 180 | additional option. | ||||||
| 181 | |||||||
| 182 | This can be called as either a class method or as a method on | ||||||
| 183 | an object, as seen in the L |
||||||
| 184 | |||||||
| 185 | =head1 FUNCTIONS | ||||||
| 186 | |||||||
| 187 | =head2 extract_datetimes | ||||||
| 188 | |||||||
| 189 | If you're old-skool and prefer things to export a function, you can have it. | ||||||
| 190 | |||||||
| 191 | It takes the same arguments as the L |
||||||
| 192 | |||||||
| 193 | =for :list = DEBUG | ||||||
| 194 | integer greater than 0 for debugging level. higher numbers | ||||||
| 195 | give more detail | ||||||
| 196 | |||||||
| 197 | It will probably take more in the future. | ||||||
| 198 | |||||||
| 199 | =head1 SEE ALSO | ||||||
| 200 | |||||||
| 201 | =for :list * Date::Extract | ||||||
| 202 | * DateTime::Format::Flexible | ||||||
| 203 | * Time::ParseDate | ||||||
| 204 | * Date::Manip | ||||||
| 205 | |||||||
| 206 | =head1 NOTES | ||||||
| 207 | |||||||
| 208 | Yes, this code is slow and dumb, but it helped me solve a problem and | ||||||
| 209 | I hope it may help others, too. Let me know if you need anything changed! | ||||||
| 210 | |||||||
| 211 | I'm hoping this will work on perl 5.6 and before, because I want | ||||||
| 212 | to be helpful to as many people as possible, but I am too lazy | ||||||
| 213 | to test it myself. Bug reports and/or patches please! | ||||||
| 214 | |||||||
| 215 | =head1 TODO | ||||||
| 216 | |||||||
| 217 | =for :list * more test strings in the tests | ||||||
| 218 | * support more options from L |
||||||
| 219 | * more rigorous tests beyond basic functionality | ||||||
| 220 | * eat a sandwich | ||||||
| 221 | |||||||
| 222 | =head1 AUTHOR | ||||||
| 223 | |||||||
| 224 | Stephen R. Scaffidi |
||||||
| 225 | |||||||
| 226 | =head1 COPYRIGHT AND LICENSE | ||||||
| 227 | |||||||
| 228 | This software is copyright (c) 2010 by Stephen R. Scaffidi. | ||||||
| 229 | |||||||
| 230 | This is free software; you can redistribute it and/or modify it under | ||||||
| 231 | the same terms as the Perl 5 programming language system itself. | ||||||
| 232 | |||||||
| 233 | =cut | ||||||
| 234 | |||||||
| 235 | |||||||
| 236 | __END__ |