File Coverage

blib/lib/Date/Reformat.pm

Criterion	Covered	Total	%
statement	452	478	94.5
branch	198	276	71.7
condition	75	128	58.5
subroutine	38	38	100.0
pod	26	26	100.0
total	789	946	83.4

line	stmt	bran	cond	sub	pod	time	code
1							package Date::Reformat;
2
3							=head1 NAME
4
5							Date::Reformat - Rearrange date strings
6
7							=head1 SYNOPSIS
8
9							use Date::Reformat;
10
11							my $reformat = Date::Reformat->new(
12							parser => {
13							regex => qr/^(\d{4})-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)$/,
14							params => [qw(year month day hour minute second)],
15							},
16							defaults => {
17							time_zone => 'America/New_York',
18							},
19							transformations => [
20							{
21							from => 'year',
22							to => 'century',
23							coderef => sub { int($_[0] / 100) },
24							},
25							],
26							formatter => {
27							sprintf => '%s-%02d-%02dT%02d:%02d:02d %s',
28							params => [qw(year month day hour minute second time_zone)],
29							},
30							);
31
32							my $reformat = Date::Reformat->new(
33							parser => {
34							strptime => '%Y-%m-%dT%M:%H:%S',
35							# or heuristic => 'ymd', # http://www.postgresql.org/docs/9.2/static/datetime-input-rules.html
36							},
37							defaults => {
38							time_zone => 'America/New_York',
39							},
40							formatter => {
41							strftime => '%Y-%m-%dT%M:%H:%S %Z',
42							# or data_structure => 'hashref' \|\| 'hash' \|\| 'arrayref' \|\| 'array'
43							# or coderef => sub { my ($y, $m, $d) = @_; DateTime->new(year => $y, month => $m, day => $d) },
44							# params => [qw(year month day)],
45							},
46							);
47
48							my $reformatted_string = $reformat->reformat_date($date_string);
49
50							=head1 DESCRIPTION
51
52							This module aims to be a lightweight and flexible tool for rearranging
53							components of a date string, then returning the components in the order
54							and structure specified.
55
56							My motivation was a month of trying to compare data from spreadsheets from
57							several sources, and every single one used a different date format, which
58							made comparison difficult.
59
60							There are so many modules for doing date math, or parsing a specific date
61							format. I needed something that could take in pretty much any format
62							and turn it into a single format that I could then use for comparison.
63
64							=cut
65
66	11			11		143968	use 5.010000;
	11					26
67	11			11		35	use strict;
	11					12
	11					214
68	11			11		33	use warnings;
	11					14
	11					316
69
70	11			11		5107	use Types::Standard qw(ClassName Object Maybe Optional slurpy Dict HashRef ArrayRef RegexpRef CodeRef Enum Str Int);
	11					521086
	11					105
71	11			11		18588	use Type::Params qw();
	11					87590
	11					17167
72
73							our $VERSION = '0.04';
74
75							my $MONTH_LOOKUP = {
76							};
77							{
78							# Lookups for month abbreviations.
79							my $c = 0;
80							foreach my $abbr (qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)) {
81							$MONTH_LOOKUP->{'abbr'}->{lc($abbr)} = ++$c;
82							$MONTH_LOOKUP->{'number'}->{$c}->{'abbr'} = $abbr;
83							}
84							}
85
86							my $TOKENS = {
87							'year' => {
88							'regex' => q/(?\d{4})/,
89							'sprintf' => '%04d',
90							},
91							'year_abbr' => {
92							'regex' => q/(?\d{2})/,
93							'sprintf' => '%02d',
94							},
95							'month' => {
96							'regex' => q/(?\d\d?)/,
97							'sprintf' => '%02d',
98							},
99							'month_no_padding' => {
100							'regex' => q/(?\d\d?)/,
101							'sprintf' => '%d',
102							'storage' => 'month',
103							},
104							'month_name' => {
105							'regex' => q/(?January\|February\|March\|April\|May\|June\|July\|August\|September\|October\|November\|December)/,
106							'sprintf' => '%s',
107							},
108							'month_abbr' => {
109							'regex' => q/(?Jan\|Feb\|Mar\|Apr\|May\|Jun\|Jul\|Aug\|Sep\|Oct\|Nov\|Dec)/,
110							'sprintf' => '%s',
111							},
112							'day' => {
113							'regex' => q/(?\d\d?)/,
114							'sprintf' => '%02d',
115							},
116							'day_name' => {
117							'regex' => q/(?Monday\|Tuesday\|Wednesday\|Thursday\|Friday\|Saturday\|Sunday)/,
118							'sprintf' => '%s',
119							},
120							'day_abbr' => {
121							'regex' => q/(?Mon\|Tues?\|Wed\|Thur?\|Fri\|Sat\|Sun)/,
122							'sprintf' => '%s',
123							},
124							'day_of_year' => {
125							'regex' => q/(?\d\d?\d?)/,
126							'sprintf' => '%03d',
127							},
128							'julian_day' => {
129							'regex' => q/J(?\d+)/,
130							'sprintf' => '%s',
131							'constraint' => sub { $_[0] >= 0 },
132							},
133							'era_abbr' => {
134							'regex' => q/(?BC\|AD\|BCE\|CE)/,
135							'sprintf' => '%s',
136							},
137							'hour' => {
138							'regex' => q/(?\d\d?)/,
139							'sprintf' => '%02d',
140							'constraint' => sub { $_[0] >= 0 && $_[0] < 24 },
141							},
142							'hour_12' => {
143							'regex' => q/(?\d\d?)/,
144							'sprintf' => '%d',
145							},
146							'minute' => {
147							'regex' => q/(?\d\d)/,
148							'sprintf' => '%02d',
149							'constraint' => sub { $_[0] >= 0 && $_[0] < 60 },
150							},
151							'second' => {
152							'regex' => q/(?\d\d)/,
153							'sprintf' => '%02d',
154							},
155							'am_or_pm' => {
156							'regex' => q/(?(?i)[ap]\.?m\.?)/,
157							'sprintf' => '%s',
158							},
159							'time_zone' => {
160							'regex' => q/(?Z\|UTC\|[[:alpha:]]{3,}(?:\/[[:alpha:]]+)?)/,
161							'sprintf' => '%s',
162							},
163							'time_zone_offset' => {
164							'regex' => q\|(?[-+]\d\d?(?:\d\d)?)\|,
165							'sprintf' => '%s',
166							},
167							'phrase' => {
168							'regex' => q/(?(?i)today\|tomorrow\|yesterday\|(?:next\|last)\w+(?:week\|month\|year)\|\d+\w+(?:seconds?\|minutes?\|hours?\|days?\|weeks?\|months?\|years?)\w+(?:ago\|from\w+now))/,
169							'sprintf' => '%s',
170							},
171							};
172
173							my $STRPTIME_PREPROCESS = [
174							{
175							'token' => '%c',
176							'replacement' => '%c', # TODO: Perhaps use Scalar::Defer, and look up locale datetime format only if needed.
177							},
178							{
179							'token' => '%D',
180							'replacement' => '%m/%d/%y',
181							},
182							{
183							'token' => '%F',
184							'replacement' => '%Y-%m-%d',
185							},
186							{
187							'token' => '%R',
188							'replacement' => '%H:%M',
189							},
190							{
191							'token' => '%r',
192							'replacement' => '%I:%M:%S %p', # TODO: This may be affected by locale.
193							},
194							{
195							'token' => '%T',
196							'replacement' => '%H:%M:%S',
197							},
198							{
199							'token' => '%X',
200							'replacement' => '%X', # TODO: Perhaps use Scalar::Defer, and look up locale time format only if needed.
201							},
202							{
203							'token' => '%x',
204							'replacement' => '%x', # TODO: Perhaps use Scalar::Defer, and look up locale date format only if needed.
205							},
206							];
207
208							my $STRPTIME_POSTPROCESS = [
209							{
210							'token' => '%n',
211							'replacement' => '\s+',
212							},
213							{
214							'token' => '%t',
215							'replacement' => '\s+',
216							},
217							{
218							'token' => '%%',
219							'replacement' => quotemeta('%'),
220							},
221							];
222
223							my $STRFTIME_POSTPROCESS = [
224							{
225							'token' => '%n',
226							'replacement' => "\n",
227							},
228							{
229							'token' => '%t',
230							'replacement' => "\t",
231							},
232							];
233
234							my $DEFAULT_STRPTIME_MAPPINGS = {
235							'%A' => 'day_name', # TODO
236							'%a' => 'day_abbr',
237							'%B' => 'month_name', # TODO
238							'%b' => 'month_abbr',
239							'%C' => 'century', # TODO
240							'%d' => 'day',
241							'%e' => 'day', # TODO: This one is space-padded.
242							'%G' => 'week_year', # TODO
243							'%g' => 'week_year_abbr', # TODO
244							'%H' => 'hour',
245							'%h' => 'month_abbr',
246							'%I' => 'hour_12',
247							'%j' => 'day_of_year',
248							'%k' => 'hour', # TODO: This one is space-padded.
249							'%l' => 'hour_12', # TODO: This one is space-padded.
250							'%M' => 'minute',
251							'%m' => 'month',
252							'%-m' => 'month_no_padding',
253							'%N' => 'fractional_seconds', # TODO
254							'%P' => 'am_or_pm',
255							'%p' => 'am_or_pm', # TODO: This is uppercase.
256							'%S' => 'second',
257							'%s' => 'epoch', # TODO
258							'%U' => 'week_number_0', # TODO
259							'%u' => 'day_of_week', # TODO
260							'%V' => 'week_number', # TODO
261							'%W' => 'week_number_1', # TODO
262							'%w' => 'day_of_week_0', # TODO
263							'%Y' => 'year',
264							'%y' => 'year_abbr',
265							'%Z' => 'time_zone',
266							'%z' => 'time_zone_offset',
267							};
268
269							my $DEFAULT_STRFTIME_MAPPINGS = {
270							};
271
272							my $DEFAULT_TRANSFORMATIONS = {
273							# to => {
274							# from => \&transformation_coderef,
275							# },
276							'year' => {
277							'year_abbr' => sub {
278							my ($date) = @_;
279							return $date->{'year'} if defined($date->{'year'});
280							return $date->{'year_abbr'} < 70
281							? $date->{'year_abbr'} + 2000
282							: $date->{'year_abbr'} + 1900;
283							},
284							},
285							'year_abbr' => {
286							'year' => sub {
287							my ($date) = @_;
288							return $date->{'year_abbr'} if defined($date->{'year_abbr'});
289							return substr($date->{'year'}, -2, 2);
290							},
291							},
292							'month' => {
293							'month_abbr' => sub {
294							my ($date) = @_;
295							return $date->{'month'} if defined($date->{'month'});
296							return $MONTH_LOOKUP->{'abbr'}->{ lc($date->{'month_abbr'}) } // undef;
297							},
298							},
299							'month_abbr' => {
300							'month' => sub {
301							my ($date) = @_;
302							return $date->{'month_abbr'} if defined($date->{'month_abbr'});
303							return $MONTH_LOOKUP->{'number'}->{ $date->{'month'}+0 }->{'abbr'} // undef;
304							},
305							},
306							'hour' => {
307							'hour_12' => sub {
308							my ($date) = @_;
309							return $date->{'hour'} if defined($date->{'hour'});
310							if (lc($date->{'am_or_pm'}) eq 'pm') {
311							return $date->{'hour_12'} == 12
312							? $date->{'hour_12'}
313							: $date->{'hour_12'} + 12;
314							}
315							return $date->{'hour_12'} == 12
316							? 0
317							: $date->{'hour_12'};
318							},
319							},
320							'hour_12' => {
321							'hour' => sub {
322							my ($date) = @_;
323							return $date->{'hour_12'} if defined($date->{'hour_12'});
324							if ($date->{'hour'} == 0) {
325							return 12;
326							}
327							return $date->{'hour'} < 13
328							? $date->{'hour'}
329							: $date->{'hour'} - 12;
330							},
331							},
332							'am_or_pm' => {
333							'hour' => sub {
334							my ($date) = @_;
335							return $date->{'am_or_pm'} if defined($date->{'am_or_pm'});
336							if ($date->{'hour'} == 0) {
337							return 'am';
338							}
339							return $date->{'hour'} >= 12
340							? 'pm'
341							: 'am';
342							},
343							},
344							};
345
346							=head2 METHODS
347
348							=over 4
349
350							=item new()
351
352							Returns a new reformatter instance.
353
354							my $reformat = Date::Reformat->new(
355							'parser' => $parsing_instructions,
356							'transformations' => $transformation_instructions,
357							'defaults' => $default_values,
358							'formatter' => $formatting_instructions,
359							'debug' => 0,
360							);
361
362							Parameters:
363
364							=over 4
365
366							=item parser
367
368							A hashref of instructions used to initialize a parser.
369
370							See L for details.
371
372							=item transformations
373
374							An arrayref of hashrefs containing instructions on how to
375							convert values of one token into values for another token
376							(such as C to C).
377
378							See L for details.
379
380							=item defaults
381
382							A hashref specifying values to use if the date string does
383							not contain a specific token (such as a time_zone value).
384
385							See L for details.
386
387							=item formatter
388
389							A hashref of instructions used to initialize a formatter.
390
391							See L for details.
392
393							=item debug
394
395							Either a 1 or a 0, to turn debugging on or off, respectively.
396
397							=back
398
399							=cut
400
401							sub new {
402	161			161	1	135249	state $check = Type::Params::compile(
403							ClassName,
404							slurpy Dict[
405							'debug' => Optional[Int],
406							'parser' => Optional[HashRef],
407							'formatter' => Optional[HashRef],
408							'transformations' => Optional[ArrayRef[HashRef]],
409							'defaults' => Optional[HashRef],
410							],
411							);
412	161					146188	my ($class, $args) = $check->(@_);
413	161					8271	my $self = bless {}, $class;
414
415	161					370	$self->debug($args->{'debug'});
416
417	161					242	foreach my $parameter (
418							'parser',
419							'formatter',
420							'transformations',
421							'defaults',
422							)
423							{
424	644	100				1329	next if ! defined $args->{$parameter};
425
426	169					260	my $initialize = 'prepare_' . $parameter;
427	169					414	my @data = $self->$initialize($args->{$parameter});
428
429	169					405	my $add = 'add_' . $parameter;
430	169					463	$self->$add(@data);
431							}
432	161					380	return $self;
433							}
434
435							=item prepare_parser()
436
437							Builds a parser based on the given instructions. To add it to
438							the currently active parsers, see L.
439
440							If several parsers are active, the first one to successfully parse
441							the current date string returns the results of the parse, and subsequent
442							parsers are not utilized. See L for more information.
443
444							The types of parsers that can be initialized via this method are:
445
446							=over 4
447
448							=item regex
449
450							The regex must specify what parts should be captured, and a list
451							of token names must be supplied to identify which token each captured
452							value will be assigned to.
453
454							$reformat->prepare_parser(
455							{
456							regex => qr/^(\d{4})-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)$/,
457							params => [qw(year month day hour minute second)],
458							},
459							);
460
461							=item regex with named capture
462
463							The regex must specify what parts should be captured, using named
464							capture syntax.
465
466							$reformat->prepare_parser(
467							{
468							regex => qr/^(?\d{4})-(?\d\d)-(?\d\d) (?\d\d?):(?\d\d):(?\d\d)$/,
469							},
470							);
471
472							=item strptime
473
474							The format string must be in strptime() format.
475
476							$reformat->prepare_parser(
477							{
478							strptime => '%Y-%m-%dT%M:%H:%S',
479							},
480							);
481
482							=item heuristic
483
484							A hint must be provided that will help the parser determine the meaning
485							of numbers if the ordering is ambiguous.
486
487							Currently the heuristic parsing mimics the PostgreSQL date parser (though
488							I have not copied over all the test cases from the PostgreSQL regression
489							tests, so there are likely to be differences/flaws).
490
491							$reformat->prepare_parser(
492							{
493							heuristic => 'ymd', # or 'mdy' or 'dmy'
494							},
495							);
496
497							Currently when the heuristic parser parses a date string, it creates a
498							named regex parser which it injects into the active parsers directly in
499							front of itself, so that subsequent date strings that are in the same
500							format will be parsed via the regex.
501
502							I plan to add a parameter that will control whether parsers are generated
503							by the heuristic parser (I also plan to refactor that method quite a bit,
504							because it kind of makes me cringe to look at it).
505
506							=back
507
508							=cut
509
510							sub prepare_parser {
511	146			146	1	117	state $check = Type::Params::compile(
512							Object,
513							Dict[
514							'regex' => Optional[RegexpRef],
515							'params' => Optional[ArrayRef],
516							'strptime' => Optional[Str],
517							'heuristic' => Optional[Enum[qw(ymd dmy mdy)]],
518							],
519							);
520	146					35402	my ($self, $definition) = $check->(@_);
521
522	146	100				4373	if (defined($definition->{'regex'})) {
523
524							# Initialize the right kind of regex parser (simple capture or named capture).
525	4	100				10	if (defined($definition->{'params'})) {
526							return $self->prepare_parser_for_regex_with_params(
527							{
528							'regex' => $definition->{'regex'},
529	3					11	'params' => $definition->{'params'},
530							}
531							);
532							}
533							return $self->prepare_parser_for_regex_named_capture(
534							{
535	1					4	'regex' => $definition->{'regex'},
536							},
537							);
538
539							}
540
541	142	100				269	if (defined($definition->{'strptime'})) {
542							return $self->prepare_parser_for_strptime(
543							{
544	10					36	'strptime' => $definition->{'strptime'},
545							},
546							);
547							}
548
549	132	50				228	if (defined($definition->{'heuristic'})) {
550							return $self->prepare_parser_heuristic(
551							{
552	132					353	'heuristic' => $definition->{'heuristic'},
553							},
554							);
555							}
556
557							# Nothing initialized.
558	0					0	return;
559							}
560
561							=item prepare_formatter()
562
563							Builds a formatter based on the given instructions. To add it to the
564							currently active formatters, see L.
565
566							If several formatters are active, they are each called in turn, receiving
567							the output from the previous parser.
568
569							The types of parsers that can be initialized via this method are:
570
571							=over 4
572
573							=item sprintf
574
575							The format string must be in sprintf() format, and a list of token names
576							must be supplied to identify which token values to send to the formatter.
577
578							$reformat->prepare_formatter(
579							{
580							sprintf => '%s-%02d-%02dT%02d:%02d:02d %s',
581							params => [qw(year month day hour minute second time_zone)],
582							},
583							);
584
585							=item strftime
586
587							The format string must be in strftime() format.
588
589							$reformat->prepare_formatter(
590							{
591							strftime => '%Y-%m-%dT%M:%H:%S %Z',
592							},
593							);
594
595							=item data_structure
596
597							The type of the desired data structure must be specified, and a list of
598							token names to identify which token values to include in the data structure.
599
600							Valid data structure types are:
601
602							=over 4
603
604							=item hash
605
606							=item hashref
607
608							=item array
609
610							=item arrayref
611
612							=back
613
614							$reformat->prepare_formatter(
615							{
616							data_structure => 'hashref',
617							params => [qw(year month day hour minute second time_zone)],
618							},
619							);
620
621							=item coderef
622
623							The supplied coderef will be passed the token values specified. Whatever the
624							coderef returns will be passed to the next active formatter, or will be returned,
625							if this is the final formatter.
626
627							$reformat->prepare_formatter(
628							{
629							coderef => sub { my ($y, $m, $d) = @_; DateTime->new(year => $y, month => $m, day => $d) },
630							params => [qw(year month day)],
631							},
632							);
633
634							=back
635
636							=cut
637
638							sub prepare_formatter {
639	20			20	1	34	state $check = Type::Params::compile(
640							Object,
641							Dict[
642							'sprintf' => Optional[Str],
643							'params' => Optional[ArrayRef],
644							'strftime' => Optional[Str],
645							'data_structure' => Optional[Enum[qw(hash hashref array arrayref)]],
646							'coderef' => Optional[CodeRef],
647							],
648							);
649	20					66177	my ($self, $definition) = $check->(@_);
650
651	20	100				1523	if (defined($definition->{'sprintf'})) {
652							return $self->prepare_formatter_for_sprintf(
653							{
654							'sprintf' => $definition->{'sprintf'},
655	5					25	'params' => $definition->{'params'},
656							},
657							);
658							}
659
660	15	100				30	if (defined($definition->{'strftime'})) {
661							return $self->prepare_formatter_for_strftime(
662							{
663	8					27	'strftime' => $definition->{'strftime'},
664							},
665							);
666							}
667
668	7	100				14	if (defined($definition->{'data_structure'})) {
669	6	100				18	if ($definition->{'data_structure'} =~ /^hash(?:ref)?$/) {
670							return $self->prepare_formatter_for_hashref(
671							{
672							'structure' => $definition->{'data_structure'},
673	3					14	'params' => $definition->{'params'},
674							},
675							);
676							}
677
678	3	50				11	if ($definition->{'data_structure'} =~ /^array(?:ref)?$/) {
679							return $self->prepare_formatter_for_arrayref(
680							{
681							'structure' => $definition->{'data_structure'},
682	3					11	'params' => $definition->{'params'},
683							},
684							);
685							}
686							}
687
688	1	50				2	if (defined($definition->{'coderef'})) {
689							return $self->prepare_formatter_for_coderef(
690							{
691							'coderef' => $definition->{'coderef'},
692	1					7	'params' => $definition->{'params'},
693							},
694							);
695							}
696
697							# Nothing initialized.
698	0					0	return;
699							}
700
701							=item prepare_transformations()
702
703							Accepts an arrayref of hashrefs that specify how to transform
704							token values from one token type to another.
705
706							Returns the same arrayref. To add it to the currently active
707							transformers, see L.
708
709							=cut
710
711							sub prepare_transformations {
712	1			1	1	2	my ($self, $transformations) = @_;
713	1		50			4	return $transformations // [];
714							}
715
716							=item add_transformations()
717
718							Accepts an arrayref of hashrefs that specify how to transform
719							token values from one token type to another. Adds each
720							transformation instruction to the list of active transformers.
721							A transformation instruction with the same C and C
722							values as a previous instruction will overwrite the previous
723							version.
724
725							$reformat->add_transformations(
726							[
727							{
728							'to' => 'hour',
729							'from' => 'hour_12',
730							'transformation' => sub {
731							my ($date) = @_;
732							# Use the value of $date->{'hour_12'} (and $date->{'am_or_pm'})
733							# to calculate what the value of $date->{'hour'} should be.
734							# ...
735							return $hour;
736							},
737							},
738							],
739							);
740
741							The values in each hashref are:
742
743							=over 4
744
745							=item to
746
747							The name of the token type that is desired (for instance
748							'hour', meaning the 24-hour format).
749
750							=item from
751
752							The name of the token type that is available in the date
753							string (for instance 'hour_12', meaning the 12-hour format).
754
755							=item transformation
756
757							A coderef which accepts a hashref containing the information
758							which has been parsed out of the date string. The coderef
759							is expected to examine the date information, transform the
760							token type specified via C into the correct value for the
761							token type specified via C, and return that value.
762
763							=back
764
765							Several transformations have been built into this module.
766							Search for C<$DEFAULT_TRANSFORMATIONS> in the source code.
767
768							Transformations added via this method will take precedence
769							over built-in transformations.
770
771							=cut
772
773							sub add_transformations {
774	1			1	1	4	state $check = Type::Params::compile(
775							Object,
776							ArrayRef[
777							Dict[
778							'to' => Str,
779							'from' => Str,
780							'transformation' => CodeRef,
781							],
782							],
783							);
784	1					9945	my ($self, $transformations) = $check->(@_);
785
786	1					171	my $count = 0;
787	1					3	foreach my $t (@$transformations) {
788	1					4	$self->{'transformations'}->{$t->{'to'}}->{$t->{'from'}} = $t->{'transformation'};
789	1					2	$count++;
790							}
791	1					4	return $count;
792							}
793
794							=item prepare_defaults()
795
796							Accepts a hashref of default values to use when transforming
797							or formatting a date which is missing tokens that are needed.
798
799							This method clears out any defaults which had been set
800							previously.
801
802							Returns the same hashref it was given, but does not set them.
803							To add defaults, see L.
804
805							=cut
806
807							sub prepare_defaults {
808	2			2	1	4	my ($self, $args) = @_;
809	2					3	$self->{'defaults'} = {};
810	2					4	return $args;
811							}
812
813							=item add_defaults()
814
815							Accepts a hashref of default values to use when transforming
816							or formatting a date which is missing tokens that are needed.
817
818							Each key should be the name of a token, and the corresponding
819							value is the default value that will be used when a date is
820							missing that token.
821
822							$reformat->add_defaults(
823							{
824							'time_zone' => 'America/New_York',
825							},
826							);
827
828							=cut
829
830							sub add_defaults {
831	2			2	1	7	state $check = Type::Params::compile(
832							Object,
833							HashRef,
834							);
835	2					1043	my ($self, $args) = $check->(@_);
836
837	2					21	foreach my $token (keys %$args) {
838	2					6	$self->{'defaults'}->{$token} = $args->{$token};
839							}
840	2					5	return $self->{'defaults'};
841							}
842
843							=item debug()
844
845							Turns debugging statements on or off, or returns the
846							current debug setting.
847
848							Expects a true value to turn debugging on, and a false value
849							to turn debugging off.
850
851							$reformat->debug(1); # 1 or 0
852
853							=cut
854
855							sub debug {
856	161			161	1	184	state $check = Type::Params::compile(
857							Object,
858							Maybe[Int],
859							);
860	161					8414	my ($self, $value) = $check->(@_);
861	161	100				2007	$self->{'debug'} = $value if (defined $value);
862	161		100			429	return $self->{'debug'} //= 0;
863							}
864
865							=item prepare_parser_for_regex_with_params()
866
867							Internal method called by L.
868
869							=cut
870
871							sub prepare_parser_for_regex_with_params {
872	3			3	1	4	state $check = Type::Params::compile(
873							Object,
874							Dict[
875							'regex' => RegexpRef,
876							'params' => ArrayRef,
877							],
878							);
879	3					4670	my ($self, $definition) = $check->(@_);
880
881	3					119	my $regex = $definition->{'regex'};
882	3					3	my $params = $definition->{'params'};
883
884	3					6	state $sub_check = Type::Params::compile(
885							Str,
886							);
887
888							return (
889							sub {
890	3			3		8	my ($date_string) = $sub_check->(@_);
891	3					38	my (@components) = $date_string =~ $regex;
892	3	50				7	return if ! @components;
893	3					4	my %date = ();
894	3					17	@date{@$params} = @components;
895							# TODO: Add named capture values to %date.
896	3					6	return \%date;
897							},
898	3					319	);
899							}
900
901							=item prepare_parser_for_regex_named_capture()
902
903							Internal method called by L.
904
905							=cut
906
907							sub prepare_parser_for_regex_named_capture {
908	107			107	1	126	state $check = Type::Params::compile(
909							Object,
910							Dict[
911							'regex' => RegexpRef,
912							],
913							);
914	107					12823	my ($self, $definition) = $check->(@_);
915
916	107					2821	my $regex = $definition->{'regex'};
917
918	107					101	state $sub_check = Type::Params::compile(
919							Str,
920							);
921
922							return (
923							sub {
924	11			11		24	my ($date_string) = $sub_check->(@_);
925	11					141	my $success = $date_string =~ $regex;
926	11	50				24	return if ! $success;
927	11			11		4625	my %date = %+;
	11					3582
	11					46420
	11					216
928
929							# Move 'hour_12' if the wrong value.
930	11	50	33			56	if (
			66
931							defined($date{'hour_12'})
932							&&
933							(
934							$date{'hour_12'} > 12
935							\|\|
936							$date{'hour_12'} == 0
937							)
938							) {
939	0					0	$date{'hour'} = delete $date{'hour_12'};
940							}
941
942	11					18	return \%date;
943							},
944	107					1719	);
945							}
946
947							=item prepare_parser_for_strptime()
948
949							Internal method called by L.
950
951							=cut
952
953							sub prepare_parser_for_strptime {
954	10			10	1	13	state $check = Type::Params::compile(
955							Object,
956							Dict[
957							'strptime' => Str,
958							],
959							);
960	10					6170	my ($self, $definition) = $check->(@_);
961
962	10					273	my $strptime = $definition->{'strptime'};
963	10					10	my $format = $strptime;
964
965							# Preprocess some tokens that expand into other tokens.
966	10					16	foreach my $preprocess (@$STRPTIME_PREPROCESS) {
967	80					357	$format =~ s/$preprocess->{'token'}/$preprocess->{'replacement'}/g;
968							}
969
970							# Escape everything in the strptime string so we can turn it into a regex.
971	10					19	$format = quotemeta($format);
972
973							# Unescape the parts that we will replace as tokens.
974							# regex from DateTime::Format::Strptime
975	10					56	$format =~ s/(?
976	10					45	$format =~ s/%\\\{([^\}]+)\\\}/%{$1}/g;
977
978							# Replace expanded tokens: %{year}
979	10					12	$format =~
980							s/
981							%\{(\w+)\}
982							/
983	0	0				0	$TOKENS->{$1} ? $TOKENS->{$1}->{'regex'} : "\%{$1}"
984							/sgex;
985
986							# Replace single character tokens: %Y
987	10					29	$format =~
988							s/
989							(%[%a-zA-Z])
990							/
991	68					147	$self->strptime_token_to_regex($1)
992							/sgex;
993
994							# Postprocess some tokens that expand into special characters.
995	10					26	foreach my $postprocess (@$STRPTIME_POSTPROCESS) {
996	30					200	$format =~ s/$postprocess->{'token'}/$postprocess->{'replacement'}/g;
997							}
998
999	10	50				784	say "Crafted regex: $strptime -> $format" if $self->{'debug'};
1000	10					381	return $self->prepare_parser_for_regex_named_capture(
1001							{
1002							'regex' => qr/$format/,
1003							},
1004							);
1005							}
1006
1007							=item prepare_parser_heuristic()
1008
1009							Internal method called by L.
1010
1011							=cut
1012
1013							sub prepare_parser_heuristic {
1014	132			132	1	114	state $check = Type::Params::compile(
1015							Object,
1016							Dict[
1017							'heuristic' => Enum[qw(ymd dmy mdy)],
1018							],
1019							);
1020	132					3951	my ($self, $definition) = $check->(@_);
1021
1022	132					2411	my $hint = $definition->{'heuristic'};
1023	132					135	my $known_parsers = {}; # Populated when we add a parser to the stack in front of this one.
1024	132					374	my $regex_for_date = qr{ \w+ [-/\.] \w+ (?:[-/\.] \w+) }x;
1025	132					199	my $regex_for_time = qr/ \d\d? : \d\d (?::\d\d) /x;
1026	132					243	my $regex_for_time_zone_offset = qr/ [-+] \d\d? (?:\d\d) /x;
1027	132					267	my $regex_for_time_zone_long_name = qr{ [[:alpha:]]+ / [[:alpha:]]+ (?:_ [[:alpha:]]+) }x;
1028	132					286	my $regex_for_julian_day = qr/ J\d+ /x;
1029	132					231	my $regex_for_number = qr/ \d+ /x;
1030	132					179	my $regex_for_string = qr/ [[:alpha:]]+ /x;
1031	132					186	my $regex_for_whitespace = qr/ \s+ /x;
1032	132					1013	my $token_regex = qr{
1033							# time zone offset
1034							( $regex_for_time_zone_offset )
1035							# time
1036							\| ( $regex_for_time )
1037							# time zone long name
1038							\| ( $regex_for_time_zone_long_name )
1039							# date
1040							\| ( $regex_for_date )
1041							# Julian day
1042							\| ( $regex_for_julian_day )
1043							# number
1044							\| ( $regex_for_number )
1045							# string
1046							\| ( $regex_for_string )
1047							# whitespace
1048							\| ( $regex_for_whitespace )
1049							# anything else
1050							\| ( . )
1051							}x;
1052
1053	132					129	state $sub_check = Type::Params::compile(
1054							Str,
1055							);
1056
1057							return (
1058							sub {
1059	132			132		293	my ($date_string) = $sub_check->(@_);
1060	132					717	my $order_string; # Will be set with ymd\|dmy\|mdy when we have enough information.
1061
1062							# Split string into parts that can be identified later.
1063	132	50				9782	say "Parsing date string into parts: $date_string" if $self->{'debug'};
1064	132					1890	my @parts = $date_string =~ /$token_regex/g;
1065	132	50				325	return if ! @parts;
1066
1067							# Try to identify what each part is, based on what it looks like, and what order it is in.
1068	132					128	my @parser_parts = ();
1069	132					162	my $date = {};
1070	132					181	foreach my $part (grep { defined($_) } @parts) {
	3375					2301
1071	343	50				20562	say "Trying to identify part: '$part'" if $self->{'debug'};
1072	343	50				4945	if ($part =~ /^$regex_for_time_zone_offset$/) {
		100
		50
		100
		100
		100
		100
		100
1073	0	0				0	say " time_zone_offset ($part)" if $self->{'debug'};
1074	0					0	push @parser_parts, $TOKENS->{'time_zone_offset'}->{'regex'};
1075	0					0	$date->{'time_zone_offset'} = $part;
1076							}
1077							elsif ($part =~ /^$regex_for_time$/) {
1078	5					16	my @time = split(/:/, $part);
1079
1080	5	50				295	say " hour ($time[0])" if $self->{'debug'};
1081	5					19	push @parser_parts, $TOKENS->{'hour'}->{'regex'};
1082	5					11	$date->{'hour'} = $time[0];
1083
1084	5	50				294	say " minute ($time[1])" if $self->{'debug'};
1085	5					19	push @parser_parts, quotemeta(':'), $TOKENS->{'minute'}->{'regex'};
1086	5					10	$date->{'minute'} = $time[1];
1087
1088	5	50				14	if (@time > 2) {
1089	5	50				289	say " second ($time[2])" if $self->{'debug'};
1090	5					21	push @parser_parts, quotemeta(':'), $TOKENS->{'second'}->{'regex'};
1091	5					20	$date->{'second'} = $time[2];
1092							}
1093							}
1094							elsif ($part =~ /^$regex_for_time_zone_long_name$/) {
1095	0					0	say " time_zone ($part)";
1096	0					0	push @parser_parts, $TOKENS->{'time_zone'}->{'regex'};
1097	0					0	$date->{'time_zone'} = $part;
1098							}
1099							elsif ($part =~ /^$regex_for_date$/) {
1100	67					247	my @date_parts = split(m\|[-/\.]\|, $part);
1101	67					77	my @order = ();
1102							# PostgreSQL forces reliance on the hint.
1103							#foreach my $index (0..2) {
1104							# if ($date_parts[$index] =~ /^\d+$/) {
1105							# if ($date_parts[$index] > 31) {
1106							# $order[$index] = 'y';
1107							# }
1108							# elsif ($date_parts[$index] > 12) {
1109							# $order[$index] = 'd';
1110							# }
1111							# else {
1112							# $order[$index] = 'm';
1113							# }
1114							# }
1115							# elsif ($date_parts[$index] =~ $TOKENS->{'month_abbr'}->{'regex'}) {
1116							# $order[$index] = 'm';
1117							# }
1118							#}
1119	67					114	$order_string = join('', @order);
1120	67	100	66			893	if (
		100	66
		100	66
			66
			66
1121							$date_parts[0] =~ /^$TOKENS->{'year'}->{'regex'}$/
1122							&&
1123							scalar(keys %$date) == 0
1124							) {
1125	21					27	$order_string = 'ymd';
1126							}
1127							elsif (
1128							$hint eq 'dmy'
1129							&&
1130							(
1131							$date_parts[0] =~ /^$TOKENS->{'month_abbr'}->{'regex'}$/
1132							\|\|
1133							$date_parts[0] =~ /^$TOKENS->{'month_name'}->{'regex'}$/
1134							)
1135							) {
1136	2					4	$order_string = 'mdy';
1137							}
1138							elsif (
1139							$hint eq 'mdy'
1140							&&
1141							(
1142							$date_parts[1] =~ /^$TOKENS->{'month_abbr'}->{'regex'}$/
1143							\|\|
1144							$date_parts[1] =~ /^$TOKENS->{'month_name'}->{'regex'}$/
1145							)
1146							) {
1147	3					4	$order_string = 'dmy';
1148							}
1149	67	100				128	if ($order_string !~ /^ymd\|dmy\|mdy$/) {
1150	41	50				2608	say "Using date token order hint: $hint" if $self->{'debug'};
1151	41					78	$order_string = $hint;
1152							}
1153	67					202	@order = split(//, $order_string);
1154	67					134	foreach my $index (0..2) {
1155	176	100				430	if ($order[$index] eq 'y') {
		100
		50
1156	57	100				357	if ($date_parts[$index] =~ /^$TOKENS->{'year'}->{'regex'}$/) {
		100
1157	34	50				2004	say " year ($date_parts[$index])" if $self->{'debug'};
1158	34					107	push @parser_parts, $TOKENS->{'year'}->{'regex'};
1159	34					84	$date->{'year'} = $date_parts[$index];
1160							}
1161							elsif ($date_parts[$index] =~ /^$TOKENS->{'year_abbr'}->{'regex'}$/) {
1162	20	50				5626	say " year_abbr ($date_parts[$index])" if $self->{'debug'};
1163	20					69	push @parser_parts, $TOKENS->{'year_abbr'}->{'regex'};
1164	20					59	$date->{'year_abbr'} = $date_parts[$index];
1165							}
1166							else {
1167	3					30	warn "Error parsing year: "
1168							. "value '$date_parts[$index]' out of range ($part); "
1169							. "Perhaps you need a different heuristic hint than '$hint'\n";
1170	3					23	return;
1171							}
1172							}
1173							elsif ($order[$index] eq 'm') {
1174	59	100	100			628	if (
		100
1175							$date_parts[$index] =~ /^$TOKENS->{'month'}->{'regex'}$/
1176							&&
1177							$date_parts[$index] <= 12
1178							) {
1179	42	50				2654	say " month ($date_parts[$index])" if $self->{'debug'};
1180	42					123	push @parser_parts, $TOKENS->{'month'}->{'regex'};
1181	42					117	$date->{'month'} = $date_parts[$index];
1182							}
1183							elsif ($date_parts[$index] =~ /^$TOKENS->{'month_abbr'}->{'regex'}$/) {
1184	12	50				840	say " month_abbr ($date_parts[$index])" if $self->{'debug'};
1185	12					45	push @parser_parts, $TOKENS->{'month_abbr'}->{'regex'};
1186	12					41	$date->{'month_abbr'} = $date_parts[$index];
1187							}
1188							else {
1189	5					60	warn "Error parsing month: "
1190							. "value '$date_parts[$index]' out of range ($part); "
1191							. "Perhaps you need a different heuristic hint than '$hint'\n";
1192	5					43	return;
1193							}
1194							}
1195							elsif ($order[$index] eq 'd') {
1196	60	100	100			575	if (
1197							$date_parts[$index] !~ /^$TOKENS->{'day'}->{'regex'}$/
1198							\|\|
1199							$date_parts[$index] > 31
1200							) {
1201	14					140	warn "Error parsing day: "
1202							. "value '$date_parts[$index]' out of range ($part); "
1203							. "Perhaps you need a different heuristic hint than '$hint'\n";
1204	14					119	return;
1205							}
1206	46	50				2809	say " day ($date_parts[$index])" if $self->{'debug'};
1207	46					128	push @parser_parts, $TOKENS->{'day'}->{'regex'};
1208	46					134	$date->{'day'} = $date_parts[$index];
1209							}
1210	154	100				669	push @parser_parts, qr\|[-/\.]\| if $index < 2;
1211							}
1212							}
1213							elsif ($part =~ /^$regex_for_julian_day$/) {
1214	3					34	my $success = $part =~ $TOKENS->{'julian_day'}->{'regex'};
1215	3					281	say " julian_day ($part)\n";
1216	3					15	push @parser_parts, $TOKENS->{'julian_day'}->{'regex'};
1217	3					32	$date->{'julian_day'} = $+{'julian_day'};
1218							}
1219							elsif ($part =~ /^$regex_for_number$/) {
1220	130	100	66			541	if (length($part) == 8) {
		100
		100
		100
1221	4					44	my $regex_date =
1222							qr/
1223							$TOKENS->{'year'}->{'regex'}
1224							$TOKENS->{'month'}->{'regex'}
1225							$TOKENS->{'day'}->{'regex'}
1226							/x;
1227	4					22	my $success = $part =~ $regex_date;
1228	4					76	my %ymd = %+;
1229	4					15	foreach my $token ('year', 'month', 'day') {
1230	12					999	say " $token ($ymd{$token})";
1231	12					40	push @parser_parts, $TOKENS->{$token}->{'regex'};
1232	12					48	$date->{$token} = $ymd{$token};
1233							}
1234							}
1235							elsif (length($part) == 6) {
1236	4	100				13	if (defined($date->{'year'})) {
1237							# This is a concatenated time: HHMM
1238	1					16	my $regex_time =
1239							qr/
1240							$TOKENS->{'hour'}->{'regex'}
1241							$TOKENS->{'minute'}->{'regex'}
1242							$TOKENS->{'second'}->{'regex'}
1243							/x;
1244	1					5	my $success = $part =~ $regex_time;
1245	1					11	my %hms = %+;
1246	1					5	foreach my $token ('hour', 'minute', 'second') {
1247	3					215	say " $token ($hms{$token})";
1248	3					11	push @parser_parts, $TOKENS->{$token}->{'regex'};
1249	3					11	$date->{$token} = $hms{$token};
1250							}
1251							}
1252							else {
1253							# This is a concatenated date: YYMMDD
1254	3					43	my $regex_date =
1255							qr/
1256							$TOKENS->{'year_abbr'}->{'regex'}
1257							$TOKENS->{'month'}->{'regex'}
1258							$TOKENS->{'day'}->{'regex'}
1259							/x;
1260	3					17	my $success = $part =~ $regex_date;
1261	3					50	my %ymd = %+;
1262	3					10	foreach my $token ('year_abbr', 'month', 'day') {
1263	9					779	say " $token ($ymd{$token})";
1264	9					31	push @parser_parts, $TOKENS->{$token}->{'regex'};
1265	9					38	$date->{$token} = $ymd{$token};
1266							}
1267							}
1268							}
1269							elsif (length($part) == 3 && defined($date->{'year'})) {
1270							# day_of_year
1271	3	50				266	say " day_of_year ($part)" if $self->{'debug'};
1272	3					11	push @parser_parts, $TOKENS->{'day_of_year'}->{'regex'};
1273	3					13	$date->{'day_of_year'} = $part;
1274							}
1275							elsif (length($part) == 4) {
1276	29	100	66			153	if (defined($date->{'year'}) \|\| defined($date->{'year_abbr'})) {
1277							# This is a concatenated time without seconds: HHMM
1278	2					21	my $regex_time =
1279							qr/
1280							$TOKENS->{'hour'}->{'regex'}
1281							$TOKENS->{'minute'}->{'regex'}
1282							/x;
1283	2					9	my $success = $part =~ $regex_time;
1284	2					26	my %hm = %+;
1285	2					6	foreach my $token ('hour', 'minute') {
1286	4	100				12	if (! $TOKENS->{$token}->{'constraint'}->($hm{$token})) {
1287	2					23	warn "Error parsing $token: "
1288							. "value '$hm{$token}' out of range ($date_string)\n";
1289	2					17	return;
1290							}
1291	2					150	say " $token ($hm{$token})";
1292	2					8	push @parser_parts, $TOKENS->{$token}->{'regex'};
1293	2					4	$date->{$token} = $hm{$token};
1294							}
1295							}
1296							else {
1297							# year (if month and day have not been set, order is now ymd).
1298	27		33			159	my $token = $self->most_likely_token(
1299							'possible_tokens' => ['year'],
1300							'already_claimed' => $date,
1301							'heuristic' => ($order_string // $hint),
1302							'date_string' => $date_string,
1303							'value' => $part,
1304							);
1305	27	50				48	return if ! defined $token;
1306	27	50				1724	say " $token ($part)" if $self->{'debug'};
1307	27					94	push @parser_parts, $TOKENS->{$token}->{'regex'};
1308	27					63	$date->{$token} = $part;
1309	27	50	66			170	if (
			33
			33
1310							! defined($date->{'day'})
1311							&&
1312							! defined($date->{'month'})
1313							&&
1314							! defined($date->{'month_abbr'})
1315							&&
1316							! defined($date->{'month_name'})
1317							) {
1318	15		50			75	$order_string \|\|= 'ymd';
1319							}
1320							}
1321							}
1322							else {
1323							# Either month, or day, or year (based on $order_string or $hint or what has been set already).
1324	90	100	66			519	if (($order_string // $hint) eq 'dmy') {
		100	66
		50	66
1325	23		33			130	my $token = $self->most_likely_token(
1326							'possible_tokens' => ['day', 'month', 'year', 'year_abbr'],
1327							'already_claimed' => $date,
1328							'heuristic' => ($order_string // $hint),
1329							'date_string' => $date_string,
1330							'value' => $part,
1331							);
1332	23	100				54	return if ! defined $token;
1333	19	50	33			1513	say " $token ($part) based on " . ($order_string // $hint) if $self->{'debug'};
1334	19					61	push @parser_parts, $TOKENS->{$token}->{'regex'};
1335	19					65	$date->{$token} = $part;
1336							}
1337							elsif (($order_string // $hint) eq 'mdy') {
1338	24		33			178	my $token = $self->most_likely_token(
1339							'possible_tokens' => ['month', 'day', 'year', 'year_abbr'],
1340							'already_claimed' => $date,
1341							'heuristic' => ($order_string // $hint),
1342							'date_string' => $date_string,
1343							'value' => $part,
1344							);
1345	24	100				74	return if ! defined $token;
1346	20	50	33			797	say " $token ($part) based on " . ($order_string // $hint) if $self->{'debug'};
1347	20					64	push @parser_parts, $TOKENS->{$token}->{'regex'};
1348	20					82	$date->{$token} = $part;
1349							}
1350							elsif (($order_string // $hint) eq 'ymd') {
1351	43		66			219	my $token = $self->most_likely_token(
1352							'possible_tokens' => ['year', 'year_abbr', 'month', 'day'],
1353							'already_claimed' => $date,
1354							'heuristic' => ($order_string // $hint),
1355							'date_string' => $date_string,
1356							'value' => $part,
1357							);
1358	43	100				94	return if ! defined $token;
1359	39	50	66			2745	say " $token ($part) based on " . ($order_string // $hint) if $self->{'debug'};
1360	39					120	push @parser_parts, $TOKENS->{$token}->{'regex'};
1361	39					132	$date->{$token} = $part;
1362							}
1363							else {
1364	0	0				0	say " number ($part)" if $self->{'debug'};
1365	0					0	push @parser_parts, $regex_for_number;
1366							}
1367							}
1368							}
1369							elsif ($part =~ /^$regex_for_string$/) {
1370							# TODO: Look for time zone abbreviation.
1371	31					144	my $token = $self->most_likely_token(
1372							'possible_tokens' => ['am_or_pm', 'era_abbr', 'month_name', 'month_abbr', 'day_name', 'day_abbr', 'phrase', 'time_zone'],
1373							'already_claimed' => $date,
1374							'date_string' => $date_string,
1375							'value' => $part,
1376							);
1377	31	100				69	if ($token) {
1378	29	100	100			115	if ($token eq 'month_name' \|\| $token eq 'month_abbr') {
1379	24	100				50	if (defined($date->{'month'})) {
1380	6					320	say " $token will need to take the place of month";
1381	6	50	66			42	if (($order_string // $hint) =~ /md/) {
1382	6	50				244	say " day ($date->{'month'}) moved from month" if $self->{'debug'};
1383	6					15	foreach my $parser_part (@parser_parts) {
1384	20	100				49	if ($parser_part =~ /\?/) {
1385	6					16	$parser_part = $TOKENS->{'day'}->{'regex'};
1386							}
1387							}
1388	6					15	$date->{'day'} = delete $date->{'month'};
1389							}
1390							}
1391							}
1392	29	50				1933	say " $token ($part)" if $self->{'debug'};
1393	29					88	push @parser_parts, $TOKENS->{$token}->{'regex'};
1394	29					110	$date->{$token} = $part;
1395							}
1396							else {
1397	2	50				168	say " literal ($part)" if $self->{'debug'};
1398	2					11	push @parser_parts, quotemeta($part);
1399							}
1400							}
1401							elsif ($part =~ /^$regex_for_whitespace$/) {
1402	98	50				5455	say " whitespace ($part)" if $self->{'debug'};
1403	98					345	push @parser_parts, $regex_for_whitespace;
1404							}
1405							else {
1406	9	50				711	say " literal ($part)" if $self->{'debug'};
1407	9					43	push @parser_parts, quotemeta($part);
1408							}
1409							}
1410
1411							# If am_or_pm is pm, and hour is < 12, change from hour to hour_12 (and the parser).
1412	96	100	66			226	if (defined($date->{'am_or_pm'}) && lc($date->{'am_or_pm'}) eq 'pm' ) {
1413	1	50	33			6	if (defined($date->{'hour'}) && $date->{'hour'} < 12) {
1414	1					3	$date->{'hour_12'} = delete $date->{'hour'};
1415	1					2	foreach my $parser_part (@parser_parts) {
1416	13	100				20	if ($parser_part =~ /\?/) {
1417	1					3	$parser_part =~ s/\?/?/;
1418							}
1419							}
1420							}
1421							}
1422	96					241	my $parser_regex = join('', @parser_parts);
1423	96	50				5799	say "Crafted regex: $date_string -> $parser_regex" if $self->{'debug'};
1424
1425							# Add a new parser that will match this date format.
1426	96	50				306	if (! defined($known_parsers->{$parser_regex}) ) {
1427	96					281	$known_parsers->{$parser_regex} = 1;
1428	96					3133	$self->add_parser(
1429							$self->prepare_parser_for_regex_named_capture(
1430							{
1431							'regex' => qr/$parser_regex/,
1432							},
1433							),
1434							);
1435							# Move the heuristic parser to the last slot again.
1436							push(
1437	96					110	@{ $self->{'active_parsers'} },
1438							splice(
1439	96					184	@{ $self->{'active_parsers'} }, -2, 1
	96					137
1440							),
1441							);
1442							}
1443
1444	96					270	return $date;
1445							},
1446	132					2362	);
1447							}
1448
1449							=item prepare_formatter_for_arrayref()
1450
1451							Internal method called by L.
1452
1453							=cut
1454
1455							sub prepare_formatter_for_arrayref {
1456	7			7	1	14	state $check = Type::Params::compile(
1457							Object,
1458							Dict[
1459							'params' => ArrayRef[Str],
1460							'structure' => Optional[Enum[qw(array arrayref)]],
1461							],
1462							);
1463	7					10823	my ($self, $definition) = $check->(@_);
1464
1465	7		50			359	my $structure = $definition->{'structure'} // 'arrayref';
1466	7					10	my $params = $definition->{'params'};
1467
1468	7					11	state $sub_check = Type::Params::compile(
1469							HashRef,
1470							);
1471
1472							return (
1473							sub {
1474	7			7		16	my ($date) = $sub_check->(@_);
1475							my @formatted = (
1476							map
1477							{
1478							# Use the value, if available.
1479	7					38	$date->{$_}
1480							//
1481							# Or see if we can determine the value by transforming another field.
1482							$self->transform_token_value(
1483							'target_token' => $_,
1484							'date' => $date,
1485							)
1486							//
1487							# Or see if there is a default value for the field.
1488	46		33			106	$self->{'defaults'}->{$_}
			33
			0
1489							//
1490							# Or just use a value of empty string.
1491							''
1492							}
1493							@$params,
1494							);
1495	7	100				23	return \@formatted if $structure eq 'arrayref';
1496	1					3	return @formatted;
1497							},
1498	7					633	);
1499							}
1500
1501							=item prepare_formatter_for_hashref()
1502
1503							Internal method called by L.
1504
1505							=cut
1506
1507							sub prepare_formatter_for_hashref {
1508	3			3	1	5	state $check = Type::Params::compile(
1509							Object,
1510							Dict[
1511							'params' => ArrayRef[Str],
1512							'structure' => Optional[Enum[qw(hash hashref)]],
1513							],
1514							);
1515	3					5285	my ($self, $definition) = $check->(@_);
1516
1517	3		50			190	my $structure = $definition->{'structure'} // 'hashref';
1518	3					4	my $params = $definition->{'params'};
1519
1520	3					11	my @formatters = $self->prepare_formatter_for_arrayref(
1521							{
1522							'structure' => 'arrayref',
1523							'params' => $params,
1524							},
1525							);
1526
1527	3					7	state $sub_check = Type::Params::compile(
1528							ArrayRef,
1529							);
1530
1531							push @formatters, (
1532							sub {
1533	3			3		7	my ($date) = $sub_check->(@_);
1534	3					16	my %formatted = ();
1535	3					11	@formatted{@$params} = @$date;
1536	3	50				11	return \%formatted if $structure eq 'hashref';
1537	0					0	return %formatted;
1538							},
1539	3					298	);
1540	3					10	return @formatters;
1541							}
1542
1543							=item prepare_formatter_for_coderef()
1544
1545							Internal method called by L.
1546
1547							=cut
1548
1549							sub prepare_formatter_for_coderef {
1550	1			1	1	4	state $check = Type::Params::compile(
1551							Object,
1552							Dict[
1553							'params' => ArrayRef[Str],
1554							'coderef' => CodeRef,
1555							],
1556							);
1557	1					5381	my ($self, $definition) = $check->(@_);
1558
1559	1					101	my $coderef = $definition->{'coderef'};
1560	1					2	my $params = $definition->{'params'};
1561
1562	1					7	my @formatters = $self->prepare_formatter_for_arrayref(
1563							{
1564							'structure' => 'array',
1565							'params' => $params,
1566							},
1567							);
1568
1569	1					3	push @formatters, (
1570							$coderef,
1571							);
1572	1					4	return @formatters;
1573							}
1574
1575							=item prepare_formatter_for_sprintf()
1576
1577							Internal method called by L.
1578
1579							=cut
1580
1581							sub prepare_formatter_for_sprintf {
1582	13			13	1	25	state $check = Type::Params::compile(
1583							Object,
1584							Dict[
1585							'params' => ArrayRef[Str],
1586							'sprintf' => Str,
1587							],
1588							);
1589	13					26801	my ($self, $definition) = $check->(@_);
1590
1591	13					723	my $sprintf = $definition->{'sprintf'};
1592	13					18	my $params = $definition->{'params'};
1593
1594	13					25	state $sub_check = Type::Params::compile(
1595							HashRef,
1596							);
1597
1598							return (
1599							sub {
1600	13			13		31	my ($date) = $sub_check->(@_);
1601							my $formatted = sprintf(
1602							$sprintf,
1603							map
1604							{
1605							# Use the value, if available.
1606	13					78	$date->{$_}
1607							//
1608							# Or see if we can determine the value by transforming another field.
1609							$self->transform_token_value(
1610							'target_token' => $_,
1611							'date' => $date,
1612							)
1613							//
1614							# Or see if there is a default value for the field.
1615	85		100			261	$self->{'defaults'}->{$_}
			66
			50
1616							//
1617							# Or just use a value of empty string.
1618							''
1619							}
1620							@$params,
1621							);
1622	13					38	return $formatted;
1623							},
1624	13					1613	);
1625							}
1626
1627							=item prepare_formatter_for_strftime()
1628
1629							Internal method called by L.
1630
1631							=cut
1632
1633							sub prepare_formatter_for_strftime {
1634	8			8	1	15	state $check = Type::Params::compile(
1635							Object,
1636							Dict[
1637							'strftime' => Str,
1638							],
1639							);
1640	8					6074	my ($self, $definition) = $check->(@_);
1641
1642	8					256	my $strftime = $definition->{'strftime'};
1643	8					6	my $format = $strftime;
1644	8					9	my $params = [];
1645
1646							# Preprocess some tokens that expand into other tokens.
1647	8					17	foreach my $preprocess (@$STRPTIME_PREPROCESS) {
1648	64					273	$format =~ s/$preprocess->{'token'}/$preprocess->{'replacement'}/g;
1649							}
1650
1651							# Replace single character tokens with expanded tokens: %Y -> %{year}
1652							$format =~
1653	8					31	s/
1654							(%[-_^]?[%a-zA-Z])
1655							/
1656	52					128	$self->strftime_token_to_internal($1)
1657							/sgex;
1658
1659							# Find all tokens.
1660	8					80	my @tokens = $format =~ m/(%\{\w+\})/g;
1661
1662							# Replace tokens in order, and build $params list.
1663	8					15	foreach my $token (@tokens) {
1664							# Replace expanded tokens: %{year}
1665	52	50				213	if ($token =~ m/%\{(\w+)\}/) {
1666	52					73	my $internal = $1;
1667	52		50			108	my $sprintf = $TOKENS->{$internal}->{'sprintf'} //
1668							die "Unable to find sprintf definition for token '$internal'";
1669
1670	52	50				3911	say "Internal token $internal maps to sprintf token '$sprintf'." if $self->{'debug'};
1671	52					676	$format =~ s/\Q$token\E/$sprintf/;
1672	52					64	my $alias;
1673	52	100				150	if (defined($TOKENS->{$internal}->{'storage'})) {
1674	2					4	$alias = $TOKENS->{$internal}->{'storage'};
1675							}
1676	52		66			220	push @$params, ($alias // $internal);
1677							}
1678							}
1679
1680							# Postprocess some tokens that expand into special characters.
1681	8					13	foreach my $postprocess (@$STRFTIME_POSTPROCESS) {
1682	16					76	$format =~ s/$postprocess->{'token'}/$postprocess->{'replacement'}/g;
1683							}
1684
1685	8	50				622	say "Crafted sprintf: $strftime -> $format [" . join(', ', @$params) . "]" if $self->{'debug'};
1686	8					52	return $self->prepare_formatter_for_sprintf(
1687							{
1688							'sprintf' => $format,
1689							'params' => $params,
1690							},
1691							);
1692							}
1693
1694							=item strptime_token_to_regex()
1695
1696							Internal method called by L.
1697
1698							=cut
1699
1700							sub strptime_token_to_regex {
1701	68			68	1	68	state $check = Type::Params::compile(
1702							Object,
1703							Str,
1704							);
1705	68					1133	my ($self, $token) = $check->(@_);
1706
1707	68					667	my $internal;
1708	68	50				5473	say "Attempting to convert strptime token $token into a regex." if $self->{'debug'};
1709	68	50				311	if (defined($self->{'strptime_mappings'}->{$token})) {
		100
1710	0					0	$internal = $self->{'strptime_mappings'}->{$token};
1711							}
1712							elsif (defined($DEFAULT_STRPTIME_MAPPINGS->{$token})) {
1713	65					97	$internal = $DEFAULT_STRPTIME_MAPPINGS->{$token};
1714							}
1715
1716	68	100				101	if (! defined($internal)) {
1717	3	50				213	say "No mapping found" if $self->{'debug'};
1718	3					19	return $token; # Perform no substitution.
1719							}
1720
1721	65	50				144	if (! defined($TOKENS->{$internal}->{'regex'})) {
1722	0					0	die "Unable to find regex definition for token '$internal'";
1723							}
1724	65	50				4786	say "Strptime token $token maps to internal token '$internal'." if $self->{'debug'};
1725
1726	65					469	return $TOKENS->{$internal}->{'regex'};
1727							}
1728
1729							=item strftime_token_to_internal
1730
1731							Internal method called by L.
1732
1733							=cut
1734
1735							sub strftime_token_to_internal {
1736	52			52	1	44	state $check = Type::Params::compile(
1737							Object,
1738							Str,
1739							);
1740	52					1126	my ($self, $token) = $check->(@_);
1741
1742	52					495	my $internal;
1743	52	50				4212	say "Attempting to convert strftime token $token into an internal token." if $self->{'debug'};
1744	52	50				157	if (defined($self->{'strftime_mappings'}->{$token})) {
1745	0					0	$internal = $self->{'strftime_mappings'}->{$token};
1746							}
1747	52	50				186	if (defined($self->{'strptime_mappings'}->{$token})) {
		50
		50
1748	0					0	$internal = $self->{'strptime_mappings'}->{$token};
1749							}
1750							elsif (defined($DEFAULT_STRFTIME_MAPPINGS->{$token})) {
1751	0					0	$internal = $DEFAULT_STRFTIME_MAPPINGS->{$token};
1752							}
1753							elsif (defined($DEFAULT_STRPTIME_MAPPINGS->{$token})) {
1754	52					62	$internal = $DEFAULT_STRPTIME_MAPPINGS->{$token};
1755							}
1756
1757	52	50				78	if (! defined($internal)) {
1758	0	0				0	say "No mapping found" if $self->{'debug'};
1759	0					0	return '%' . $token; # Perform no substitution, but escape token for sprintf.
1760							}
1761
1762	52	50				115	if (! defined($TOKENS->{$internal}->{'sprintf'})) {
1763	0					0	die "Unable to find sprintf definition for token '$internal'";
1764							}
1765	52	50				3823	say "Strftime token $token maps to internal token '$internal'." if $self->{'debug'};
1766
1767	52					353	return '%{' . $internal . '}';
1768							}
1769
1770							=item transform_token_value()
1771
1772							Internal method called by L.
1773
1774							=cut
1775
1776							sub transform_token_value {
1777	8			8	1	12	state $check = Type::Params::compile(
1778							Object,
1779							slurpy Dict[
1780							'target_token' => Str,
1781							'date' => HashRef,
1782							],
1783							);
1784	8					17120	my ($self, $args) = $check->(@_);
1785
1786	8					375	my $target_token = $args->{'target_token'};
1787	8					9	my $date = $args->{'date'};
1788
1789							# Return the value, if it is already set.
1790	8	50				22	return $date->{$target_token} if defined($date->{$target_token});
1791
1792	8					13	foreach my $transformations ($self->{'transformations'}, $DEFAULT_TRANSFORMATIONS) {
1793							# Look up transformations to $target_token from a field that is defined in $date.
1794	15	100				52	if (defined($transformations->{$target_token})) {
1795	6					7	foreach my $source_token (keys %{$transformations->{$target_token}}) {
	6					15
1796	6	50	33			26	if (defined($date->{$source_token}) && defined($transformations->{$target_token}->{$source_token})) {
1797							# Run the transformation and return the value.
1798	6					15	return $transformations->{$target_token}->{$source_token}->($date);
1799							}
1800							}
1801							}
1802							}
1803
1804	2					24	return;
1805							}
1806
1807							=item most_likely_token()
1808
1809							Internal method called by L.
1810
1811							=cut
1812
1813							sub most_likely_token {
1814	148			148	1	119	state $check = Type::Params::compile(
1815							Object,
1816							slurpy Dict[
1817							'already_claimed' => Optional[HashRef],
1818							'possible_tokens' => ArrayRef,
1819							'heuristic' => Optional[Str],
1820							'value' => Str,
1821							'date_string' => Optional[Str],
1822							],
1823							);
1824	148					10343	my ($self, $args) = $check->(@_);
1825
1826	148		50			6792	my $already_claimed = $args->{'already_claimed'} // {};
1827	148					150	my $possible_tokens = $args->{'possible_tokens'};
1828	148		100			291	my $hint = $args->{'heuristic'} // '';
1829	148					149	my $date_part = $args->{'value'};
1830	148		33			213	my $date_string = $args->{'date_string'} // $date_part;
1831
1832	148					236	foreach my $token (@$possible_tokens) {
1833	379	100				547	if ($token eq 'day') {
1834	54	100				90	next if defined($already_claimed->{'day'});
1835	40	50				254	next if ($date_part !~ /^$TOKENS->{$token}->{'regex'}$/);
1836	40	100				89	if ($date_part > 31) {
1837	8					80	warn "Error parsing day: "
1838							. "value '$date_part' out of range ($date_string); "
1839							. "Perhaps you need a different heuristic hint than '$hint'\n";
1840	8					44	return;
1841							}
1842	32					101	return $token;
1843							}
1844	325	100				416	if ($token eq 'month') {
1845	66	100				140	next if defined($already_claimed->{'month'});
1846	48	100				75	next if defined($already_claimed->{'month_abbr'});
1847	35	100				64	next if defined($already_claimed->{'month_name'});
1848	30	50				222	next if ($date_part !~ /^$TOKENS->{$token}->{'regex'}$/);
1849	30	100				81	if ($date_part > 12) {
1850	4					60	warn "Error parsing month: "
1851							. "value '$date_part' out of range ($date_string); "
1852							. "Perhaps you need a different heuristic hint than '$hint'\n";
1853	4					37	return;
1854							}
1855	26					87	return $token;
1856							}
1857	259	100	100			682	if ($token eq 'year' \|\| $token eq 'year_abbr') {
1858	133	100				218	next if defined($already_claimed->{'year'});
1859	97	100				134	next if defined($already_claimed->{'year_abbr'});
1860	69	100				1221	next if ($date_part !~ /^$TOKENS->{$token}->{'regex'}$/);
1861	47					169	return $token;
1862							}
1863
1864							# Any other type of token does not need special handling.
1865	126	50				195	next if defined($already_claimed->{$token});
1866	126	100				4651	next if ($date_part !~ /^$TOKENS->{$token}->{'regex'}$/);
1867	29					169	return $token;
1868							}
1869
1870	2	50				5	if ($hint) {
1871	0					0	warn "Error parsing $possible_tokens->[0]: "
1872							. "elements out of order ($date_string); "
1873							. "Perhaps you need a different heuristic hint than '$hint'\n";
1874							}
1875
1876	2					5	return;
1877							}
1878
1879
1880							=item add_parser()
1881
1882							Adds a parser to the active parsers. When parsing a date string,
1883							the parser will be called if each preceeding parser has failed to
1884							parse the date.
1885
1886							See L for generating a parser in the correct
1887							format.
1888
1889							$reformat->add_parser(
1890							$reformat->prepare_parser( ... ),
1891							);
1892
1893							=cut
1894
1895							sub add_parser {
1896	242			242	1	203	state $check = Type::Params::compile(
1897							Object,
1898							slurpy ArrayRef[CodeRef],
1899							);
1900	242					11030	my ($self, $parsers) = $check->(@_);
1901
1902	242					2699	my $count = push @{ $self->{'active_parsers'} }, @$parsers;
	242					496
1903	242	50				589	return $count ? 1 : 0;
1904							}
1905
1906							=item add_formatter()
1907
1908							Adds a formatter to the active formatters. When formatting a date,
1909							the formatter will be called after each preceeding formatter, receiving
1910							as input the output from the previous formatter.
1911
1912							See L for generating a formatter in the correct
1913							format.
1914
1915							$reformat->add_formatter(
1916							$reformat->prepare_formatter( ... ),
1917							);
1918
1919							=cut
1920
1921							sub add_formatter {
1922	20			20	1	35	state $check = Type::Params::compile(
1923							Object,
1924							slurpy ArrayRef[CodeRef],
1925							);
1926	20					18166	my ($self, $formatters) = $check->(@_);
1927
1928	20					345	my $count = push @{ $self->{'active_formatters'} }, @$formatters;
	20					50
1929	20	50				68	return $count ? 1 : 0;
1930							}
1931
1932							=item parse_date()
1933
1934							Given a date string, attempts to parse it via the active parsers.
1935							Returns a hashref containing the tokens that were extracted
1936							from the date string.
1937
1938							my $date_hashref = $reformat->parse_date($date_string);
1939
1940							=cut
1941
1942							sub parse_date {
1943	146			146	1	1652	state $check = Type::Params::compile(
1944							Object,
1945							Str,
1946							);
1947	146					2293	my ($self, $date_string) = $check->(@_);
1948
1949	146					1101	state $has_parser = ArrayRef[CodeRef];
1950	146	50				1491	if (! $has_parser->($self->{'active_parsers'}) ) {
1951	0					0	die "No parsers defined. Have you called add_parser()?";
1952							}
1953
1954	146					1997	foreach my $parser (@{ $self->{'active_parsers'} }) {
	146					243
1955	146					195	my $date = $parser->($date_string);
1956	146	100				489	return $date if defined($date);
1957							}
1958							# None of the parsers were able to extract the date components.
1959	36					63	return;
1960							}
1961
1962							=item format_date()
1963
1964							Given a hashref containing the tokens that were extracted from a
1965							date string, formats the date using each of the active parsers,
1966							passing the output from the previous formatter to the next formatter.
1967
1968							my $date_string = $reformat->format_date($date_hashref);
1969
1970							=cut
1971
1972							sub format_date {
1973	20			20	1	71	state $check = Type::Params::compile(
1974							Object,
1975							Maybe[HashRef],
1976							);
1977	20					5468	my ($self, $date) = $check->(@_);
1978
1979	20	50				325	return if ! defined($date);
1980
1981	20					33	state $has_formatter = ArrayRef[CodeRef];
1982	20	50				1472	if (! $has_formatter->($self->{'active_formatters'}) ) {
1983	0					0	die "No formatters defined. Have you called add_formatter()?";
1984							}
1985
1986	20					290	my @formatted = ($date);
1987	20					21	foreach my $formatter (@{ $self->{'active_formatters'} }) {
	20					42
1988	24					39	@formatted = $formatter->(@formatted);
1989							}
1990	20	50				67	return $formatted[0] if (scalar(@formatted) == 1);
1991	0					0	return @formatted;
1992							}
1993
1994							=item reformat_date()
1995
1996							Given a date string, attempts to parse it and format it using the
1997							active parsers and formaters.
1998
1999							my $date_string = $reformat->reformat_date($date_string);
2000
2001							=cut
2002
2003							sub reformat_date {
2004	5			5	1	20	state $check = Type::Params::compile(
2005							Object,
2006							Str,
2007							);
2008	5					541	my ($self, $date_string) = $check->(@_);
2009
2010	5					41	my $date = $self->parse_date($date_string);
2011	5					10	my @formatted = $self->format_date($date);
2012	5	50				19	return $formatted[0] if (scalar(@formatted) == 1);
2013	0						return @formatted;
2014							};
2015
2016							=back
2017
2018							=cut
2019
2020							1;
2021							__END__