File Coverage

blib/lib/Date/Reformat.pm

Criterion	Covered	Total	%
statement	453	479	94.5
branch	198	276	71.7
condition	75	128	58.5
subroutine	38	38	100.0
pod	26	26	100.0
total	790	947	83.4

line	stmt	bran	cond	sub	pod	time	code
1							package Date::Reformat;
2
3							=head1 NAME
4
5							Date::Reformat - Rearrange date strings
6
7							=head1 SYNOPSIS
8
9							use Date::Reformat;
10
11							my $reformat = Date::Reformat->new(
12							parser => {
13							regex => qr/^(\d{4})-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)$/,
14							params => [qw(year month day hour minute second)],
15							},
16							defaults => {
17							time_zone => 'America/New_York',
18							},
19							transformations => [
20							{
21							from => 'year',
22							to => 'century',
23							coderef => sub { int($_[0] / 100) },
24							},
25							],
26							formatter => {
27							sprintf => '%s-%02d-%02dT%02d:%02d:02d %s',
28							params => [qw(year month day hour minute second time_zone)],
29							},
30							);
31
32							my $reformat = Date::Reformat->new(
33							parser => {
34							strptime => '%Y-%m-%dT%M:%H:%S',
35							# or heuristic => 'ymd', # http://www.postgresql.org/docs/9.2/static/datetime-input-rules.html
36							},
37							defaults => {
38							time_zone => 'America/New_York',
39							},
40							formatter => {
41							strftime => '%Y-%m-%dT%M:%H:%S %Z',
42							# or data_structure => 'hashref' \|\| 'hash' \|\| 'arrayref' \|\| 'array'
43							# or coderef => sub { my ($y, $m, $d) = @_; DateTime->new(year => $y, month => $m, day => $d) },
44							# params => [qw(year month day)],
45							},
46							);
47
48							my $reformatted_string = $reformat->reformat_date($date_string);
49
50							=head1 DESCRIPTION
51
52							This module aims to be a lightweight and flexible tool for rearranging
53							components of a date string, then returning the components in the order
54							and structure specified.
55
56							My motivation was a month of trying to compare data from spreadsheets from
57							several sources, and every single one used a different date format, which
58							made comparison difficult.
59
60							There are so many modules for doing date math, or parsing a specific date
61							format. I needed something that could take in pretty much any format
62							and turn it into a single format that I could then use for comparison.
63
64							=cut
65
66	11			11		159231	use 5.010000;
	11					30
	11					332
67	11			11		43	use strict;
	11					12
	11					336
68	11			11		37	use warnings;
	11					16
	11					327
69
70	11			11		5388	use Types::Standard qw(ClassName Object Maybe Optional slurpy Dict HashRef ArrayRef RegexpRef CodeRef Enum Str Int);
	11					558276
	11					149
71	11			11		21419	use Type::Params qw();
	11					96202
	11					18749
72
73							our $VERSION = '0.03';
74
75							my $MONTH_LOOKUP = {
76							};
77							{
78							# Lookups for month abbreviations.
79							my $c = 0;
80							foreach my $abbr (qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)) {
81							$MONTH_LOOKUP->{'abbr'}->{lc($abbr)} = ++$c;
82							$MONTH_LOOKUP->{'number'}->{$c}->{'abbr'} = $abbr;
83							}
84							}
85
86							my $TOKENS = {
87							'year' => {
88							'regex' => q/(?\d{4})/,
89							'sprintf' => '%04d',
90							},
91							'year_abbr' => {
92							'regex' => q/(?\d{2})/,
93							'sprintf' => '%02d',
94							},
95							'month' => {
96							'regex' => q/(?\d\d?)/,
97							'sprintf' => '%02d',
98							},
99							'month_no_padding' => {
100							'regex' => q/(?\d\d?)/,
101							'sprintf' => '%d',
102							'storage' => 'month',
103							},
104							'month_name' => {
105							'regex' => q/(?January\|February\|March\|April\|May\|June\|July\|August\|September\|October\|November\|December)/,
106							'sprintf' => '%s',
107							},
108							'month_abbr' => {
109							'regex' => q/(?Jan\|Feb\|Mar\|Apr\|May\|Jun\|Jul\|Aug\|Sep\|Oct\|Nov\|Dec)/,
110							'sprintf' => '%s',
111							},
112							'day' => {
113							'regex' => q/(?\d\d?)/,
114							'sprintf' => '%02d',
115							},
116							'day_name' => {
117							'regex' => q/(?Monday\|Tuesday\|Wednesday\|Thursday\|Friday\|Saturday\|Sunday)/,
118							'sprintf' => '%s',
119							},
120							'day_abbr' => {
121							'regex' => q/(?Mon\|Tues?\|Wed\|Thur?\|Fri\|Sat\|Sun)/,
122							'sprintf' => '%s',
123							},
124							'day_of_year' => {
125							'regex' => q/(?\d\d?\d?)/,
126							'sprintf' => '%03d',
127							},
128							'julian_day' => {
129							'regex' => q/J(?\d+)/,
130							'sprintf' => '%s',
131							'constraint' => sub { $_[0] >= 0 },
132							},
133							'era_abbr' => {
134							'regex' => q/(?BC\|AD\|BCE\|CE)/,
135							'sprintf' => '%s',
136							},
137							'hour' => {
138							'regex' => q/(?\d\d?)/,
139							'sprintf' => '%02d',
140							'constraint' => sub { $_[0] >= 0 && $_[0] < 24 },
141							},
142							'hour_12' => {
143							'regex' => q/(?\d\d?)/,
144							'sprintf' => '%d',
145							},
146							'minute' => {
147							'regex' => q/(?\d\d)/,
148							'sprintf' => '%02d',
149							'constraint' => sub { $_[0] >= 0 && $_[0] < 60 },
150							},
151							'second' => {
152							'regex' => q/(?\d\d)/,
153							'sprintf' => '%02d',
154							},
155							'am_or_pm' => {
156							'regex' => q/(?(?i)[ap]\.?m\.?)/,
157							'sprintf' => '%s',
158							},
159							'time_zone' => {
160							'regex' => q/(?Z\|UTC\|[[:alpha:]]{3,}(?:\/[[:alpha:]]+)?)/,
161							'sprintf' => '%s',
162							},
163							'time_zone_offset' => {
164							'regex' => q\|(?[-+]\d\d?(?:\d\d)?)\|,
165							'sprintf' => '%s',
166							},
167							'phrase' => {
168							'regex' => q/(?(?i)today\|tomorrow\|yesterday\|(?:next\|last)\w+(?:week\|month\|year)\|\d+\w+(?:seconds?\|minutes?\|hours?\|days?\|weeks?\|months?\|years?)\w+(?:ago\|from\w+now))/,
169							'sprintf' => '%s',
170							},
171							};
172
173							my $STRPTIME_PREPROCESS = [
174							{
175							'token' => '%c',
176							'replacement' => '%c', # TODO: Perhaps use Scalar::Defer, and look up locale datetime format only if needed.
177							},
178							{
179							'token' => '%D',
180							'replacement' => '%m/%d/%y',
181							},
182							{
183							'token' => '%F',
184							'replacement' => '%Y-%m-%d',
185							},
186							{
187							'token' => '%R',
188							'replacement' => '%H:%M',
189							},
190							{
191							'token' => '%r',
192							'replacement' => '%I:%M:%S %p', # TODO: This may be affected by locale.
193							},
194							{
195							'token' => '%T',
196							'replacement' => '%H:%M:%S',
197							},
198							{
199							'token' => '%X',
200							'replacement' => '%X', # TODO: Perhaps use Scalar::Defer, and look up locale time format only if needed.
201							},
202							{
203							'token' => '%x',
204							'replacement' => '%x', # TODO: Perhaps use Scalar::Defer, and look up locale date format only if needed.
205							},
206							];
207
208							my $STRPTIME_POSTPROCESS = [
209							{
210							'token' => '%n',
211							'replacement' => '\s+',
212							},
213							{
214							'token' => '%t',
215							'replacement' => '\s+',
216							},
217							{
218							'token' => '%%',
219							'replacement' => quotemeta('%'),
220							},
221							];
222
223							my $STRFTIME_POSTPROCESS = [
224							{
225							'token' => '%n',
226							'replacement' => "\n",
227							},
228							{
229							'token' => '%t',
230							'replacement' => "\t",
231							},
232							];
233
234							my $DEFAULT_STRPTIME_MAPPINGS = {
235							'%A' => 'day_name', # TODO
236							'%a' => 'day_abbr',
237							'%B' => 'month_name', # TODO
238							'%b' => 'month_abbr',
239							'%C' => 'century', # TODO
240							'%d' => 'day',
241							'%e' => 'day', # TODO: This one is space-padded.
242							'%G' => 'week_year', # TODO
243							'%g' => 'week_year_abbr', # TODO
244							'%H' => 'hour',
245							'%h' => 'month_abbr',
246							'%I' => 'hour_12',
247							'%j' => 'day_of_year',
248							'%k' => 'hour', # TODO: This one is space-padded.
249							'%l' => 'hour_12', # TODO: This one is space-padded.
250							'%M' => 'minute',
251							'%m' => 'month',
252							'%-m' => 'month_no_padding',
253							'%N' => 'fractional_seconds', # TODO
254							'%P' => 'am_or_pm',
255							'%p' => 'am_or_pm', # TODO: This is uppercase.
256							'%S' => 'second',
257							'%s' => 'epoch', # TODO
258							'%U' => 'week_number_0', # TODO
259							'%u' => 'day_of_week', # TODO
260							'%V' => 'week_number', # TODO
261							'%W' => 'week_number_1', # TODO
262							'%w' => 'day_of_week_0', # TODO
263							'%Y' => 'year',
264							'%y' => 'year_abbr',
265							'%Z' => 'time_zone',
266							'%z' => 'time_zone_offset',
267							};
268
269							my $DEFAULT_STRFTIME_MAPPINGS = {
270							};
271
272							my $DEFAULT_TRANSFORMATIONS = {
273							# to => {
274							# from => \&transformation_coderef,
275							# },
276							'year' => {
277							'year_abbr' => sub {
278							my ($date) = @_;
279							return $date->{'year'} if defined($date->{'year'});
280							return $date->{'year_abbr'} < 70
281							? $date->{'year_abbr'} + 2000
282							: $date->{'year_abbr'} + 1900;
283							},
284							},
285							'year_abbr' => {
286							'year' => sub {
287							my ($date) = @_;
288							return $date->{'year_abbr'} if defined($date->{'year_abbr'});
289							return substr($date->{'year'}, -2, 2);
290							},
291							},
292							'month' => {
293							'month_abbr' => sub {
294							my ($date) = @_;
295							return $date->{'month'} if defined($date->{'month'});
296							return $MONTH_LOOKUP->{'abbr'}->{ lc($date->{'month_abbr'}) } // undef;
297							},
298							},
299							'month_abbr' => {
300							'month' => sub {
301							my ($date) = @_;
302							return $date->{'month_abbr'} if defined($date->{'month_abbr'});
303							return $MONTH_LOOKUP->{'number'}->{ $date->{'month'}+0 }->{'abbr'} // undef;
304							},
305							},
306							'hour' => {
307							'hour_12' => sub {
308							my ($date) = @_;
309							return $date->{'hour'} if defined($date->{'hour'});
310							if (lc($date->{'am_or_pm'}) eq 'pm') {
311							return $date->{'hour_12'} == 12
312							? $date->{'hour_12'}
313							: $date->{'hour_12'} + 12;
314							}
315							return $date->{'hour_12'} == 12
316							? 0
317							: $date->{'hour_12'};
318							},
319							},
320							'hour_12' => {
321							'hour' => sub {
322							my ($date) = @_;
323							return $date->{'hour_12'} if defined($date->{'hour_12'});
324							if ($date->{'hour'} == 0) {
325							return 12;
326							}
327							return $date->{'hour'} < 13
328							? $date->{'hour'}
329							: $date->{'hour'} - 12;
330							},
331							},
332							'am_or_pm' => {
333							'hour' => sub {
334							my ($date) = @_;
335							return $date->{'am_or_pm'} if defined($date->{'am_or_pm'});
336							if ($date->{'hour'} == 0) {
337							return 'am';
338							}
339							return $date->{'hour'} >= 12
340							? 'pm'
341							: 'am';
342							},
343							},
344							};
345
346							=head2 METHODS
347
348							=over 4
349
350							=item new()
351
352							Returns a new reformatter instance.
353
354							my $reformat = Date::Reformat->new(
355							'parser' => $parsing_instructions,
356							'transformations' => $transformation_instructions,
357							'defaults' => $default_values,
358							'formatter' => $formatting_instructions,
359							'debug' => 0,
360							);
361
362							Parameters:
363
364							=over 4
365
366							=item parser
367
368							A hashref of instructions used to initialize a parser.
369
370							See L for details.
371
372							=item transformations
373
374							An arrayref of hashrefs containing instructions on how to
375							convert values of one token into values for another token
376							(such as C to C).
377
378							See L for details.
379
380							=item defaults
381
382							A hashref specifying values to use if the date string does
383							not contain a specific token (such as a time_zone value).
384
385							See L for details.
386
387							=item formatter
388
389							A hashref of instructions used to initialize a formatter.
390
391							See L for details.
392
393							=item debug
394
395							Either a 1 or a 0, to turn debugging on or off, respectively.
396
397							=back
398
399							=cut
400
401							sub new {
402	161			161	1	101257	state $check = Type::Params::compile(
403							ClassName,
404							slurpy Dict[
405							'debug' => Optional[Int],
406							'parser' => Optional[HashRef],
407							'formatter' => Optional[HashRef],
408							'transformations' => Optional[ArrayRef[HashRef]],
409							'defaults' => Optional[HashRef],
410							],
411							);
412	161					151281	my ($class, $args) = $check->(@_);
413	161					8071	my $self = bless {}, $class;
414
415	161					375	$self->debug($args->{'debug'});
416
417	161					195	foreach my $parameter (
418							'parser',
419							'formatter',
420							'transformations',
421							'defaults',
422							)
423							{
424	644	100				1253	next if ! defined $args->{$parameter};
425
426	169					224	my $initialize = 'prepare_' . $parameter;
427	169					377	my @data = $self->$initialize($args->{$parameter});
428
429	169					393	my $add = 'add_' . $parameter;
430	169					409	$self->$add(@data);
431							}
432	161					378	return $self;
433							}
434
435							=item prepare_parser()
436
437							Builds a parser based on the given instructions. To add it to
438							the currently active parsers, see L.
439
440							If several parsers are active, the first one to successfully parse
441							the current date string returns the results of the parse, and subsequent
442							parsers are not utilized. See L for more information.
443
444							The types of parsers that can be initialized via this method are:
445
446							=over 4
447
448							=item regex
449
450							The regex must specify what parts should be captured, and a list
451							of token names must be supplied to identify which token each captured
452							value will be assigned to.
453
454							$reformat->prepare_parser(
455							{
456							regex => qr/^(\d{4})-(\d\d)-(\d\d)T(\d\d):(\d\d):(\d\d)$/,
457							params => [qw(year month day hour minute second)],
458							},
459							);
460
461							=item regex with named capture
462
463							The regex must specify what parts should be captured, using named
464							capture syntax.
465
466							$reformat->prepare_parser(
467							{
468							regex => qr/^(?\d{4})-(?\d\d)-(?\d\d) (?\d\d?):(?\d\d):(?\d\d)$/,
469							},
470							);
471
472							=item strptime
473
474							The format string must be in strptime() format.
475
476							$reformat->prepare_parser(
477							{
478							strptime => '%Y-%m-%dT%M:%H:%S',
479							},
480							);
481
482							=item heuristic
483
484							A hint must be provided that will help the parser determine the meaning
485							of numbers if the ordering is ambiguous.
486
487							Currently the heuristic parsing mimics the PostgreSQL date parser (though
488							I have not copied over all the test cases from the PostgreSQL regression
489							tests, so there are likely to be differences/flaws).
490
491							$reformat->prepare_parser(
492							{
493							heuristic => 'ymd', # or 'mdy' or 'dmy'
494							},
495							);
496
497							Currently when the heuristic parser parses a date string, it creates a
498							named regex parser which it injects into the active parsers directly in
499							front of itself, so that subsequent date strings that are in the same
500							format will be parsed via the regex.
501
502							I plan to add a parameter that will control whether parsers are generated
503							by the heuristic parser (I also plan to refactor that method quite a bit,
504							because it kind of makes me cringe to look at it).
505
506							=back
507
508							=cut
509
510							sub prepare_parser {
511	146			146	1	147	state $check = Type::Params::compile(
512							Object,
513							Dict[
514							'regex' => Optional[RegexpRef],
515							'params' => Optional[ArrayRef],
516							'strptime' => Optional[Str],
517							'heuristic' => Optional[Enum[qw(ymd dmy mdy)]],
518							],
519							);
520	146					36889	my ($self, $definition) = $check->(@_);
521
522	146	100				4192	if (defined($definition->{'regex'})) {
523
524							# Initialize the right kind of regex parser (simple capture or named capture).
525	4	100				9	if (defined($definition->{'params'})) {
526	3					18	return $self->prepare_parser_for_regex_with_params(
527							{
528							'regex' => $definition->{'regex'},
529							'params' => $definition->{'params'},
530							}
531							);
532							}
533	1					5	return $self->prepare_parser_for_regex_named_capture(
534							{
535							'regex' => $definition->{'regex'},
536							},
537							);
538
539							}
540
541	142	100				227	if (defined($definition->{'strptime'})) {
542	10					42	return $self->prepare_parser_for_strptime(
543							{
544							'strptime' => $definition->{'strptime'},
545							},
546							);
547							}
548
549	132	50				188	if (defined($definition->{'heuristic'})) {
550	132					337	return $self->prepare_parser_heuristic(
551							{
552							'heuristic' => $definition->{'heuristic'},
553							},
554							);
555							}
556
557							# Nothing initialized.
558	0					0	return;
559							}
560
561							=item prepare_formatter()
562
563							Builds a formatter based on the given instructions. To add it to the
564							currently active formatters, see L.
565
566							If several formatters are active, they are each called in turn, receiving
567							the output from the previous parser.
568
569							The types of parsers that can be initialized via this method are:
570
571							=over 4
572
573							=item sprintf
574
575							The format string must be in sprintf() format, and a list of token names
576							must be supplied to identify which token values to send to the formatter.
577
578							$reformat->prepare_formatter(
579							{
580							sprintf => '%s-%02d-%02dT%02d:%02d:02d %s',
581							params => [qw(year month day hour minute second time_zone)],
582							},
583							);
584
585							=item strftime
586
587							The format string must be in strftime() format.
588
589							$reformat->prepare_formatter(
590							{
591							strftime => '%Y-%m-%dT%M:%H:%S %Z',
592							},
593							);
594
595							=item data_structure
596
597							The type of the desired data structure must be specified, and a list of
598							token names to identify which token values to include in the data structure.
599
600							Valid data structure types are:
601
602							=over 4
603
604							=item hash
605
606							=item hashref
607
608							=item array
609
610							=item arrayref
611
612							=back
613
614							$reformat->prepare_formatter(
615							{
616							data_structure => 'hashref',
617							params => [qw(year month day hour minute second time_zone)],
618							},
619							);
620
621							=item coderef
622
623							The supplied coderef will be passed the token values specified. Whatever the
624							coderef returns will be passed to the next active formatter, or will be returned,
625							if this is the final formatter.
626
627							$reformat->prepare_formatter(
628							{
629							coderef => sub { my ($y, $m, $d) = @_; DateTime->new(year => $y, month => $m, day => $d) },
630							params => [qw(year month day)],
631							},
632							);
633
634							=back
635
636							=cut
637
638							sub prepare_formatter {
639	20			20	1	40	state $check = Type::Params::compile(
640							Object,
641							Dict[
642							'sprintf' => Optional[Str],
643							'params' => Optional[ArrayRef],
644							'strftime' => Optional[Str],
645							'data_structure' => Optional[Enum[qw(hash hashref array arrayref)]],
646							'coderef' => Optional[CodeRef],
647							],
648							);
649	20					70238	my ($self, $definition) = $check->(@_);
650
651	20	100				2040	if (defined($definition->{'sprintf'})) {
652	5					30	return $self->prepare_formatter_for_sprintf(
653							{
654							'sprintf' => $definition->{'sprintf'},
655							'params' => $definition->{'params'},
656							},
657							);
658							}
659
660	15	100				38	if (defined($definition->{'strftime'})) {
661	8					38	return $self->prepare_formatter_for_strftime(
662							{
663							'strftime' => $definition->{'strftime'},
664							},
665							);
666							}
667
668	7	100				16	if (defined($definition->{'data_structure'})) {
669	6	100				13	if ($definition->{'data_structure'} =~ /^hash(?:ref)?$/) {
670	3					13	return $self->prepare_formatter_for_hashref(
671							{
672							'structure' => $definition->{'data_structure'},
673							'params' => $definition->{'params'},
674							},
675							);
676							}
677
678	3	50				12	if ($definition->{'data_structure'} =~ /^array(?:ref)?$/) {
679	3					15	return $self->prepare_formatter_for_arrayref(
680							{
681							'structure' => $definition->{'data_structure'},
682							'params' => $definition->{'params'},
683							},
684							);
685							}
686							}
687
688	1	50				3	if (defined($definition->{'coderef'})) {
689	1					5	return $self->prepare_formatter_for_coderef(
690							{
691							'coderef' => $definition->{'coderef'},
692							'params' => $definition->{'params'},
693							},
694							);
695							}
696
697							# Nothing initialized.
698	0					0	return;
699							}
700
701							=item prepare_transformations()
702
703							Accepts an arrayref of hashrefs that specify how to transform
704							token values from one token type to another.
705
706							Returns the same arrayref. To add it to the currently active
707							transformers, see L.
708
709							=cut
710
711							sub prepare_transformations {
712	1			1	1	2	my ($self, $transformations) = @_;
713	1		50			4	return $transformations // [];
714							}
715
716							=item add_transformations()
717
718							Accepts an arrayref of hashrefs that specify how to transform
719							token values from one token type to another. Adds each
720							transformation instruction to the list of active transformers.
721							A transformation instruction with the same C and C
722							values as a previous instruction will overwrite the previous
723							version.
724
725							$reformat->add_transformations(
726							[
727							{
728							'to' => 'hour',
729							'from' => 'hour_12',
730							'transformation' => sub {
731							my ($date) = @_;
732							# Use the value of $date->{'hour_12'} (and $date->{'am_or_pm'})
733							# to calculate what the value of $date->{'hour'} should be.
734							# ...
735							return $hour;
736							},
737							},
738							],
739							);
740
741							The values in each hashref are:
742
743							=over 4
744
745							=item to
746
747							The name of the token type that is desired (for instance
748							'hour', meaning the 24-hour format).
749
750							=item from
751
752							The name of the token type that is available in the date
753							string (for instance 'hour_12', meaning the 12-hour format).
754
755							=item transformation
756
757							A coderef which accepts a hashref containing the information
758							which has been parsed out of the date string. The coderef
759							is expected to examine the date information, transform the
760							token type specified via C into the correct value for the
761							token type specified via C, and return that value.
762
763							=back
764
765							Several transformations have been built into this module.
766							Search for C<$DEFAULT_TRANSFORMATIONS> in the source code.
767
768							Transformations added via this method will take precedence
769							over built-in transformations.
770
771							=cut
772
773							sub add_transformations {
774	1			1	1	5	state $check = Type::Params::compile(
775							Object,
776							ArrayRef[
777							Dict[
778							'to' => Str,
779							'from' => Str,
780							'transformation' => CodeRef,
781							],
782							],
783							);
784	1					10890	my ($self, $transformations) = $check->(@_);
785
786	1					230	my $count = 0;
787	1					2	foreach my $t (@$transformations) {
788	1					5	$self->{'transformations'}->{$t->{'to'}}->{$t->{'from'}} = $t->{'transformation'};
789	1					2	$count++;
790							}
791	1					4	return $count;
792							}
793
794							=item prepare_defaults()
795
796							Accepts a hashref of default values to use when transforming
797							or formatting a date which is missing tokens that are needed.
798
799							This method clears out any defaults which had been set
800							previously.
801
802							Returns the same hashref it was given, but does not set them.
803							To add defaults, see L.
804
805							=cut
806
807							sub prepare_defaults {
808	2			2	1	4	my ($self, $args) = @_;
809	2					6	$self->{'defaults'} = {};
810	2					4	return $args;
811							}
812
813							=item add_defaults()
814
815							Accepts a hashref of default values to use when transforming
816							or formatting a date which is missing tokens that are needed.
817
818							Each key should be the name of a token, and the corresponding
819							value is the default value that will be used when a date is
820							missing that token.
821
822							$reformat->add_defaults(
823							{
824							'time_zone' => 'America/New_York',
825							},
826							);
827
828							=cut
829
830							sub add_defaults {
831	2			2	1	11	state $check = Type::Params::compile(
832							Object,
833							HashRef,
834							);
835	2					1244	my ($self, $args) = $check->(@_);
836
837	2					21	foreach my $token (keys %$args) {
838	2					8	$self->{'defaults'}->{$token} = $args->{$token};
839							}
840	2					6	return $self->{'defaults'};
841							}
842
843							=item debug()
844
845							Turns debugging statements on or off, or returns the
846							current debug setting.
847
848							Expects a true value to turn debugging on, and a false value
849							to turn debugging off.
850
851							$reformat->debug(1); # 1 or 0
852
853							=cut
854
855							sub debug {
856	161			161	1	176	state $check = Type::Params::compile(
857							Object,
858							Maybe[Int],
859							);
860	161					9660	my ($self, $value) = $check->(@_);
861	161	100				1765	$self->{'debug'} = $value if (defined $value);
862	161		100			383	return $self->{'debug'} //= 0;
863							}
864
865							=item prepare_parser_for_regex_with_params()
866
867							Internal method called by L.
868
869							=cut
870
871							sub prepare_parser_for_regex_with_params {
872	3			3	1	9	state $check = Type::Params::compile(
873							Object,
874							Dict[
875							'regex' => RegexpRef,
876							'params' => ArrayRef,
877							],
878							);
879	3					5221	my ($self, $definition) = $check->(@_);
880
881	3					157	my $regex = $definition->{'regex'};
882	3					5	my $params = $definition->{'params'};
883
884	3					7	state $sub_check = Type::Params::compile(
885							Str,
886							);
887
888							return (
889							sub {
890	3			3		9	my ($date_string) = $sub_check->(@_);
891	3					46	my (@components) = $date_string =~ $regex;
892	3	50				7	return if ! @components;
893	3					5	my %date = ();
894	3					17	@date{@$params} = @components;
895							# TODO: Add named capture values to %date.
896	3					7	return \%date;
897							},
898	3					316	);
899							}
900
901							=item prepare_parser_for_regex_named_capture()
902
903							Internal method called by L.
904
905							=cut
906
907							sub prepare_parser_for_regex_named_capture {
908	107			107	1	97	state $check = Type::Params::compile(
909							Object,
910							Dict[
911							'regex' => RegexpRef,
912							],
913							);
914	107					14833	my ($self, $definition) = $check->(@_);
915
916	107					2534	my $regex = $definition->{'regex'};
917
918	107					103	state $sub_check = Type::Params::compile(
919							Str,
920							);
921
922							return (
923							sub {
924	11			11		28	my ($date_string) = $sub_check->(@_);
925	11					166	my $success = $date_string =~ $regex;
926	11	50				22	return if ! $success;
927	11			11		5563	my %date = %+;
	11					4276
	11					51382
	11					239
928
929							# Move 'hour_12' if the wrong value.
930	11	50	33			74	if (
			66
931							defined($date{'hour_12'})
932							&&
933							(
934							$date{'hour_12'} > 12
935							\|\|
936							$date{'hour_12'} == 0
937							)
938							) {
939	0					0	$date{'hour'} = delete $date{'hour_12'};
940							}
941
942	11					21	return \%date;
943							},
944	107					1697	);
945							}
946
947							=item prepare_parser_for_strptime()
948
949							Internal method called by L.
950
951							=cut
952
953							sub prepare_parser_for_strptime {
954	10			10	1	17	state $check = Type::Params::compile(
955							Object,
956							Dict[
957							'strptime' => Str,
958							],
959							);
960	10					6335	my ($self, $definition) = $check->(@_);
961
962	10					302	my $strptime = $definition->{'strptime'};
963	10					15	my $format = $strptime;
964
965							# Preprocess some tokens that expand into other tokens.
966	10					18	foreach my $preprocess (@$STRPTIME_PREPROCESS) {
967	80					398	$format =~ s/$preprocess->{'token'}/$preprocess->{'replacement'}/g;
968							}
969
970							# Escape everything in the strptime string so we can turn it into a regex.
971	10					23	$format = quotemeta($format);
972
973							# Unescape the parts that we will replace as tokens.
974							# regex from DateTime::Format::Strptime
975	10					61	$format =~ s/(?
976	10					50	$format =~ s/%\\\{([^\}]+)\\\}/%{$1}/g;
977
978							# Replace expanded tokens: %{year}
979	10					15	$format =~
980							s/
981							%{(\w+)}
982							/
983	0	0				0	$TOKENS->{$1} ? $TOKENS->{$1}->{'regex'} : "\%{$1}"
984							/sgex;
985
986							# Replace single character tokens: %Y
987	10					32	$format =~
988							s/
989							(%[%a-zA-Z])
990							/
991	68					112	$self->strptime_token_to_regex($1)
992							/sgex;
993
994							# Postprocess some tokens that expand into special characters.
995	10					22	foreach my $postprocess (@$STRPTIME_POSTPROCESS) {
996	30					195	$format =~ s/$postprocess->{'token'}/$postprocess->{'replacement'}/g;
997							}
998
999	10	50				273	say "Crafted regex: $strptime -> $format" if $self->{'debug'};
1000	10					387	return $self->prepare_parser_for_regex_named_capture(
1001							{
1002							'regex' => qr/$format/,
1003							},
1004							);
1005							}
1006
1007							=item prepare_parser_heuristic()
1008
1009							Internal method called by L.
1010
1011							=cut
1012
1013							sub prepare_parser_heuristic {
1014	132			132	1	98	state $check = Type::Params::compile(
1015							Object,
1016							Dict[
1017							'heuristic' => Enum[qw(ymd dmy mdy)],
1018							],
1019							);
1020	132					4586	my ($self, $definition) = $check->(@_);
1021
1022	132					2081	my $hint = $definition->{'heuristic'};
1023	132					127	my $known_parsers = {}; # Populated when we add a parser to the stack in front of this one.
1024	132					352	my $regex_for_date = qr{ \w+ [-/\.] \w+ (?:[-/\.] \w+) }x;
1025	132					216	my $regex_for_time = qr/ \d\d? : \d\d (?::\d\d) /x;
1026	132					205	my $regex_for_time_zone_offset = qr/ [-+] \d\d? (?:\d\d) /x;
1027	132					211	my $regex_for_time_zone_long_name = qr{ [[:alpha:]]+ / [[:alpha:]]+ (?:_ [[:alpha:]]+) }x;
1028	132					213	my $regex_for_julian_day = qr/ J\d+ /x;
1029	132					186	my $regex_for_number = qr/ \d+ /x;
1030	132					180	my $regex_for_string = qr/ [[:alpha:]]+ /x;
1031	132					188	my $regex_for_whitespace = qr/ \s+ /x;
1032	132					971	my $token_regex = qr{
1033							# time zone offset
1034							( $regex_for_time_zone_offset )
1035							# time
1036							\| ( $regex_for_time )
1037							# time zone long name
1038							\| ( $regex_for_time_zone_long_name )
1039							# date
1040							\| ( $regex_for_date )
1041							# Julian day
1042							\| ( $regex_for_julian_day )
1043							# number
1044							\| ( $regex_for_number )
1045							# string
1046							\| ( $regex_for_string )
1047							# whitespace
1048							\| ( $regex_for_whitespace )
1049							# anything else
1050							\| ( . )
1051							}x;
1052
1053	132					109	state $sub_check = Type::Params::compile(
1054							Str,
1055							);
1056
1057							return (
1058							sub {
1059	132			132		211	my ($date_string) = $sub_check->(@_);
1060	132					627	my $order_string; # Will be set with ymd\|dmy\|mdy when we have enough information.
1061
1062							# Split string into parts that can be identified later.
1063	132	50				2902	say "Parsing date string into parts: $date_string" if $self->{'debug'};
1064	132					2116	my @parts = $date_string =~ /$token_regex/g;
1065	132	50				294	return if ! @parts;
1066
1067							# Try to identify what each part is, based on what it looks like, and what order it is in.
1068	132					167	my @parser_parts = ();
1069	132					160	my $date = {};
1070	132					163	foreach my $part (grep { defined($_) } @parts) {
	3375					2046
1071	343	50				1089	say "Trying to identify part: '$part'" if $self->{'debug'};
1072	343	50				3310	if ($part =~ /^$regex_for_time_zone_offset$/) {
		100
		50
		100
		100
		100
		100
		100
1073	0	0				0	say " time_zone_offset ($part)" if $self->{'debug'};
1074	0					0	push @parser_parts, $TOKENS->{'time_zone_offset'}->{'regex'};
1075	0					0	$date->{'time_zone_offset'} = $part;
1076							}
1077							elsif ($part =~ /^$regex_for_time$/) {
1078	5					15	my @time = split(/:/, $part);
1079
1080	5	50				18	say " hour ($time[0])" if $self->{'debug'};
1081	5					8	push @parser_parts, $TOKENS->{'hour'}->{'regex'};
1082	5					9	$date->{'hour'} = $time[0];
1083
1084	5	50				12	say " minute ($time[1])" if $self->{'debug'};
1085	5					10	push @parser_parts, quotemeta(':'), $TOKENS->{'minute'}->{'regex'};
1086	5					7	$date->{'minute'} = $time[1];
1087
1088	5	50				10	if (@time > 2) {
1089	5	50				16	say " second ($time[2])" if $self->{'debug'};
1090	5					9	push @parser_parts, quotemeta(':'), $TOKENS->{'second'}->{'regex'};
1091	5					13	$date->{'second'} = $time[2];
1092							}
1093							}
1094							elsif ($part =~ /^$regex_for_time_zone_long_name$/) {
1095	0					0	say " time_zone ($part)";
1096	0					0	push @parser_parts, $TOKENS->{'time_zone'}->{'regex'};
1097	0					0	$date->{'time_zone'} = $part;
1098							}
1099							elsif ($part =~ /^$regex_for_date$/) {
1100	67					216	my @date_parts = split(m\|[-/\.]\|, $part);
1101	67					57	my @order = ();
1102							# PostgreSQL forces reliance on the hint.
1103							#foreach my $index (0..2) {
1104							# if ($date_parts[$index] =~ /^\d+$/) {
1105							# if ($date_parts[$index] > 31) {
1106							# $order[$index] = 'y';
1107							# }
1108							# elsif ($date_parts[$index] > 12) {
1109							# $order[$index] = 'd';
1110							# }
1111							# else {
1112							# $order[$index] = 'm';
1113							# }
1114							# }
1115							# elsif ($date_parts[$index] =~ $TOKENS->{'month_abbr'}->{'regex'}) {
1116							# $order[$index] = 'm';
1117							# }
1118							#}
1119	67					101	$order_string = join('', @order);
1120	67	100	66			747	if (
		100	66
		100	66
			66
			66
1121							$date_parts[0] =~ /^$TOKENS->{'year'}->{'regex'}$/
1122							&&
1123							scalar(keys %$date) == 0
1124							) {
1125	21					46	$order_string = 'ymd';
1126							}
1127							elsif (
1128							$hint eq 'dmy'
1129							&&
1130							(
1131							$date_parts[0] =~ /^$TOKENS->{'month_abbr'}->{'regex'}$/
1132							\|\|
1133							$date_parts[0] =~ /^$TOKENS->{'month_name'}->{'regex'}$/
1134							)
1135							) {
1136	2					2	$order_string = 'mdy';
1137							}
1138							elsif (
1139							$hint eq 'mdy'
1140							&&
1141							(
1142							$date_parts[1] =~ /^$TOKENS->{'month_abbr'}->{'regex'}$/
1143							\|\|
1144							$date_parts[1] =~ /^$TOKENS->{'month_name'}->{'regex'}$/
1145							)
1146							) {
1147	3					3	$order_string = 'dmy';
1148							}
1149	67	100				125	if ($order_string !~ /^ymd\|dmy\|mdy$/) {
1150	41	50				159	say "Using date token order hint: $hint" if $self->{'debug'};
1151	41					45	$order_string = $hint;
1152							}
1153	67					153	@order = split(//, $order_string);
1154	67					99	foreach my $index (0..2) {
1155	176	100				326	if ($order[$index] eq 'y') {
		100
		50
1156	57	100				248	if ($date_parts[$index] =~ /^$TOKENS->{'year'}->{'regex'}$/) {
		100
1157	34	50				110	say " year ($date_parts[$index])" if $self->{'debug'};
1158	34					52	push @parser_parts, $TOKENS->{'year'}->{'regex'};
1159	34					49	$date->{'year'} = $date_parts[$index];
1160							}
1161							elsif ($date_parts[$index] =~ /^$TOKENS->{'year_abbr'}->{'regex'}$/) {
1162	20	50				65	say " year_abbr ($date_parts[$index])" if $self->{'debug'};
1163	20					30	push @parser_parts, $TOKENS->{'year_abbr'}->{'regex'};
1164	20					35	$date->{'year_abbr'} = $date_parts[$index];
1165							}
1166							else {
1167	3					20	warn "Error parsing year: "
1168							. "value '$date_parts[$index]' out of range ($part); "
1169							. "Perhaps you need a different heuristic hint than '$hint'\n";
1170	3					18	return;
1171							}
1172							}
1173							elsif ($order[$index] eq 'm') {
1174	59	100	100			430	if (
		100
1175							$date_parts[$index] =~ /^$TOKENS->{'month'}->{'regex'}$/
1176							&&
1177							$date_parts[$index] <= 12
1178							) {
1179	42	50				134	say " month ($date_parts[$index])" if $self->{'debug'};
1180	42					54	push @parser_parts, $TOKENS->{'month'}->{'regex'};
1181	42					61	$date->{'month'} = $date_parts[$index];
1182							}
1183							elsif ($date_parts[$index] =~ /^$TOKENS->{'month_abbr'}->{'regex'}$/) {
1184	12	50				42	say " month_abbr ($date_parts[$index])" if $self->{'debug'};
1185	12					16	push @parser_parts, $TOKENS->{'month_abbr'}->{'regex'};
1186	12					22	$date->{'month_abbr'} = $date_parts[$index];
1187							}
1188							else {
1189	5					56	warn "Error parsing month: "
1190							. "value '$date_parts[$index]' out of range ($part); "
1191							. "Perhaps you need a different heuristic hint than '$hint'\n";
1192	5					43	return;
1193							}
1194							}
1195							elsif ($order[$index] eq 'd') {
1196	60	100	100			391	if (
1197							$date_parts[$index] !~ /^$TOKENS->{'day'}->{'regex'}$/
1198							\|\|
1199							$date_parts[$index] > 31
1200							) {
1201	14					141	warn "Error parsing day: "
1202							. "value '$date_parts[$index]' out of range ($part); "
1203							. "Perhaps you need a different heuristic hint than '$hint'\n";
1204	14					99	return;
1205							}
1206	46	50				150	say " day ($date_parts[$index])" if $self->{'debug'};
1207	46					53	push @parser_parts, $TOKENS->{'day'}->{'regex'};
1208	46					67	$date->{'day'} = $date_parts[$index];
1209							}
1210	154	100				514	push @parser_parts, qr\|[-/\.]\| if $index < 2;
1211							}
1212							}
1213							elsif ($part =~ /^$regex_for_julian_day$/) {
1214	3					23	my $success = $part =~ $TOKENS->{'julian_day'}->{'regex'};
1215	3					9	say " julian_day ($part)\n";
1216	3					7	push @parser_parts, $TOKENS->{'julian_day'}->{'regex'};
1217	3					28	$date->{'julian_day'} = $+{'julian_day'};
1218							}
1219							elsif ($part =~ /^$regex_for_number$/) {
1220	130	100	66			489	if (length($part) == 8) {
		100
		100
		100
1221	4					46	my $regex_date =
1222							qr/
1223							$TOKENS->{'year'}->{'regex'}
1224							$TOKENS->{'month'}->{'regex'}
1225							$TOKENS->{'day'}->{'regex'}
1226							/x;
1227	4					18	my $success = $part =~ $regex_date;
1228	4					60	my %ymd = %+;
1229	4					12	foreach my $token ('year', 'month', 'day') {
1230	12					30	say " $token ($ymd{$token})";
1231	12					15	push @parser_parts, $TOKENS->{$token}->{'regex'};
1232	12					26	$date->{$token} = $ymd{$token};
1233							}
1234							}
1235							elsif (length($part) == 6) {
1236	4	100				9	if (defined($date->{'year'})) {
1237							# This is a concatenated time: HHMM
1238	1					15	my $regex_time =
1239							qr/
1240							$TOKENS->{'hour'}->{'regex'}
1241							$TOKENS->{'minute'}->{'regex'}
1242							$TOKENS->{'second'}->{'regex'}
1243							/x;
1244	1					4	my $success = $part =~ $regex_time;
1245	1					10	my %hms = %+;
1246	1					5	foreach my $token ('hour', 'minute', 'second') {
1247	3					8	say " $token ($hms{$token})";
1248	3					5	push @parser_parts, $TOKENS->{$token}->{'regex'};
1249	3					6	$date->{$token} = $hms{$token};
1250							}
1251							}
1252							else {
1253							# This is a concatenated date: YYMMDD
1254	3					32	my $regex_date =
1255							qr/
1256							$TOKENS->{'year_abbr'}->{'regex'}
1257							$TOKENS->{'month'}->{'regex'}
1258							$TOKENS->{'day'}->{'regex'}
1259							/x;
1260	3					17	my $success = $part =~ $regex_date;
1261	3					41	my %ymd = %+;
1262	3					9	foreach my $token ('year_abbr', 'month', 'day') {
1263	9					20	say " $token ($ymd{$token})";
1264	9					13	push @parser_parts, $TOKENS->{$token}->{'regex'};
1265	9					23	$date->{$token} = $ymd{$token};
1266							}
1267							}
1268							}
1269							elsif (length($part) == 3 && defined($date->{'year'})) {
1270							# day_of_year
1271	3	50				10	say " day_of_year ($part)" if $self->{'debug'};
1272	3					6	push @parser_parts, $TOKENS->{'day_of_year'}->{'regex'};
1273	3					8	$date->{'day_of_year'} = $part;
1274							}
1275							elsif (length($part) == 4) {
1276	29	100	66			100	if (defined($date->{'year'}) \|\| defined($date->{'year_abbr'})) {
1277							# This is a concatenated time without seconds: HHMM
1278	2					19	my $regex_time =
1279							qr/
1280							$TOKENS->{'hour'}->{'regex'}
1281							$TOKENS->{'minute'}->{'regex'}
1282							/x;
1283	2					8	my $success = $part =~ $regex_time;
1284	2					20	my %hm = %+;
1285	2					5	foreach my $token ('hour', 'minute') {
1286	4	100				10	if (! $TOKENS->{$token}->{'constraint'}->($hm{$token})) {
1287	2					20	warn "Error parsing $token: "
1288							. "value '$hm{$token}' out of range ($date_string)\n";
1289	2					16	return;
1290							}
1291	2					7	say " $token ($hm{$token})";
1292	2					4	push @parser_parts, $TOKENS->{$token}->{'regex'};
1293	2					2	$date->{$token} = $hm{$token};
1294							}
1295							}
1296							else {
1297							# year (if month and day have not been set, order is now ymd).
1298	27		33			124	my $token = $self->most_likely_token(
1299							'possible_tokens' => ['year'],
1300							'already_claimed' => $date,
1301							'heuristic' => ($order_string // $hint),
1302							'date_string' => $date_string,
1303							'value' => $part,
1304							);
1305	27	50				40	return if ! defined $token;
1306	27	50				172	say " $token ($part)" if $self->{'debug'};
1307	27					42	push @parser_parts, $TOKENS->{$token}->{'regex'};
1308	27					56	$date->{$token} = $part;
1309	27	50	66			125	if (
			33
			33
1310							! defined($date->{'day'})
1311							&&
1312							! defined($date->{'month'})
1313							&&
1314							! defined($date->{'month_abbr'})
1315							&&
1316							! defined($date->{'month_name'})
1317							) {
1318	15		50			58	$order_string \|\|= 'ymd';
1319							}
1320							}
1321							}
1322							else {
1323							# Either month, or day, or year (based on $order_string or $hint or what has been set already).
1324	90	100	66			511	if (($order_string // $hint) eq 'dmy') {
		100	66
		50	66
1325	23		33			102	my $token = $self->most_likely_token(
1326							'possible_tokens' => ['day', 'month', 'year', 'year_abbr'],
1327							'already_claimed' => $date,
1328							'heuristic' => ($order_string // $hint),
1329							'date_string' => $date_string,
1330							'value' => $part,
1331							);
1332	23	100				47	return if ! defined $token;
1333	19	50	33			162	say " $token ($part) based on " . ($order_string // $hint) if $self->{'debug'};
1334	19					28	push @parser_parts, $TOKENS->{$token}->{'regex'};
1335	19					47	$date->{$token} = $part;
1336							}
1337							elsif (($order_string // $hint) eq 'mdy') {
1338	24		33			105	my $token = $self->most_likely_token(
1339							'possible_tokens' => ['month', 'day', 'year', 'year_abbr'],
1340							'already_claimed' => $date,
1341							'heuristic' => ($order_string // $hint),
1342							'date_string' => $date_string,
1343							'value' => $part,
1344							);
1345	24	100				48	return if ! defined $token;
1346	20	50	33			175	say " $token ($part) based on " . ($order_string // $hint) if $self->{'debug'};
1347	20					30	push @parser_parts, $TOKENS->{$token}->{'regex'};
1348	20					53	$date->{$token} = $part;
1349							}
1350							elsif (($order_string // $hint) eq 'ymd') {
1351	43		66			160	my $token = $self->most_likely_token(
1352							'possible_tokens' => ['year', 'year_abbr', 'month', 'day'],
1353							'already_claimed' => $date,
1354							'heuristic' => ($order_string // $hint),
1355							'date_string' => $date_string,
1356							'value' => $part,
1357							);
1358	43	100				93	return if ! defined $token;
1359	39	50	66			388	say " $token ($part) based on " . ($order_string // $hint) if $self->{'debug'};
1360	39					62	push @parser_parts, $TOKENS->{$token}->{'regex'};
1361	39					99	$date->{$token} = $part;
1362							}
1363							else {
1364	0	0				0	say " number ($part)" if $self->{'debug'};
1365	0					0	push @parser_parts, $regex_for_number;
1366							}
1367							}
1368							}
1369							elsif ($part =~ /^$regex_for_string$/) {
1370							# TODO: Look for time zone abbreviation.
1371	31					117	my $token = $self->most_likely_token(
1372							'possible_tokens' => ['am_or_pm', 'era_abbr', 'month_name', 'month_abbr', 'day_name', 'day_abbr', 'phrase', 'time_zone'],
1373							'already_claimed' => $date,
1374							'date_string' => $date_string,
1375							'value' => $part,
1376							);
1377	31	100				58	if ($token) {
1378	29	100	100			99	if ($token eq 'month_name' \|\| $token eq 'month_abbr') {
1379	24	100				47	if (defined($date->{'month'})) {
1380	6					127	say " $token will need to take the place of month";
1381	6	50	66			31	if (($order_string // $hint) =~ /md/) {
1382	6	50				27	say " day ($date->{'month'}) moved from month" if $self->{'debug'};
1383	6					7	foreach my $parser_part (@parser_parts) {
1384	20	100				38	if ($parser_part =~ /\?/) {
1385	6					12	$parser_part = $TOKENS->{'day'}->{'regex'};
1386							}
1387							}
1388	6					14	$date->{'day'} = delete $date->{'month'};
1389							}
1390							}
1391							}
1392	29	50				433	say " $token ($part)" if $self->{'debug'};
1393	29					61	push @parser_parts, $TOKENS->{$token}->{'regex'};
1394	29					84	$date->{$token} = $part;
1395							}
1396							else {
1397	2	50				45	say " literal ($part)" if $self->{'debug'};
1398	2					7	push @parser_parts, quotemeta($part);
1399							}
1400							}
1401							elsif ($part =~ /^$regex_for_whitespace$/) {
1402	98	50				282	say " whitespace ($part)" if $self->{'debug'};
1403	98					172	push @parser_parts, $regex_for_whitespace;
1404							}
1405							else {
1406	9	50				29	say " literal ($part)" if $self->{'debug'};
1407	9					24	push @parser_parts, quotemeta($part);
1408							}
1409							}
1410
1411							# If am_or_pm is pm, and hour is < 12, change from hour to hour_12 (and the parser).
1412	96	100	66			211	if (defined($date->{'am_or_pm'}) && lc($date->{'am_or_pm'}) eq 'pm' ) {
1413	1	50	33			5	if (defined($date->{'hour'}) && $date->{'hour'} < 12) {
1414	1					3	$date->{'hour_12'} = delete $date->{'hour'};
1415	1					2	foreach my $parser_part (@parser_parts) {
1416	13	100				19	if ($parser_part =~ /\?/) {
1417	1					3	$parser_part =~ s/\?/?/;
1418							}
1419							}
1420							}
1421							}
1422	96					186	my $parser_regex = join('', @parser_parts);
1423	96	50				314	say "Crafted regex: $date_string -> $parser_regex" if $self->{'debug'};
1424
1425							# Add a new parser that will match this date format.
1426	96	50				164	if (! defined($known_parsers->{$parser_regex}) ) {
1427	96					160	$known_parsers->{$parser_regex} = 1;
1428	96					2440	$self->add_parser(
1429							$self->prepare_parser_for_regex_named_capture(
1430							{
1431							'regex' => qr/$parser_regex/,
1432							},
1433							),
1434							);
1435							# Move the heuristic parser to the last slot again.
1436	96					96	push(
1437	96					184	@{ $self->{'active_parsers'} },
1438							splice(
1439	96					171	@{ $self->{'active_parsers'} }, -2, 1
1440							),
1441							);
1442							}
1443
1444	96					257	return $date;
1445							},
1446	132					2343	);
1447							}
1448
1449							=item prepare_formatter_for_arrayref()
1450
1451							Internal method called by L.
1452
1453							=cut
1454
1455							sub prepare_formatter_for_arrayref {
1456	7			7	1	14	state $check = Type::Params::compile(
1457							Object,
1458							Dict[
1459							'params' => ArrayRef[Str],
1460							'structure' => Optional[Enum[qw(array arrayref)]],
1461							],
1462							);
1463	7					10729	my ($self, $definition) = $check->(@_);
1464
1465	7		50			372	my $structure = $definition->{'structure'} // 'arrayref';
1466	7					9	my $params = $definition->{'params'};
1467
1468	7					10	state $sub_check = Type::Params::compile(
1469							HashRef,
1470							);
1471
1472							return (
1473							sub {
1474	7			7		15	my ($date) = $sub_check->(@_);
1475	46		33			102	my @formatted = (
			33
			0
1476							map
1477							{
1478							# Use the value, if available.
1479	7					38	$date->{$_}
1480							//
1481							# Or see if we can determine the value by transforming another field.
1482							$self->transform_token_value(
1483							'target_token' => $_,
1484							'date' => $date,
1485							)
1486							//
1487							# Or see if there is a default value for the field.
1488							$self->{'defaults'}->{$_}
1489							//
1490							# Or just use a value of empty string.
1491							''
1492							}
1493							@$params,
1494							);
1495	7	100				22	return \@formatted if $structure eq 'arrayref';
1496	1					3	return @formatted;
1497							},
1498	7					697	);
1499							}
1500
1501							=item prepare_formatter_for_hashref()
1502
1503							Internal method called by L.
1504
1505							=cut
1506
1507							sub prepare_formatter_for_hashref {
1508	3			3	1	5	state $check = Type::Params::compile(
1509							Object,
1510							Dict[
1511							'params' => ArrayRef[Str],
1512							'structure' => Optional[Enum[qw(hash hashref)]],
1513							],
1514							);
1515	3					5337	my ($self, $definition) = $check->(@_);
1516
1517	3		50			159	my $structure = $definition->{'structure'} // 'hashref';
1518	3					3	my $params = $definition->{'params'};
1519
1520	3					10	my @formatters = $self->prepare_formatter_for_arrayref(
1521							{
1522							'structure' => 'arrayref',
1523							'params' => $params,
1524							},
1525							);
1526
1527	3					6	state $sub_check = Type::Params::compile(
1528							ArrayRef,
1529							);
1530
1531							push @formatters, (
1532							sub {
1533	3			3		6	my ($date) = $sub_check->(@_);
1534	3					13	my %formatted = ();
1535	3					10	@formatted{@$params} = @$date;
1536	3	50				10	return \%formatted if $structure eq 'hashref';
1537	0					0	return %formatted;
1538							},
1539	3					268	);
1540	3					8	return @formatters;
1541							}
1542
1543							=item prepare_formatter_for_coderef()
1544
1545							Internal method called by L.
1546
1547							=cut
1548
1549							sub prepare_formatter_for_coderef {
1550	1			1	1	4	state $check = Type::Params::compile(
1551							Object,
1552							Dict[
1553							'params' => ArrayRef[Str],
1554							'coderef' => CodeRef,
1555							],
1556							);
1557	1					5413	my ($self, $definition) = $check->(@_);
1558
1559	1					90	my $coderef = $definition->{'coderef'};
1560	1					2	my $params = $definition->{'params'};
1561
1562	1					5	my @formatters = $self->prepare_formatter_for_arrayref(
1563							{
1564							'structure' => 'array',
1565							'params' => $params,
1566							},
1567							);
1568
1569	1					3	push @formatters, (
1570							$coderef,
1571							);
1572	1					4	return @formatters;
1573							}
1574
1575							=item prepare_formatter_for_sprintf()
1576
1577							Internal method called by L.
1578
1579							=cut
1580
1581							sub prepare_formatter_for_sprintf {
1582	13			13	1	37	state $check = Type::Params::compile(
1583							Object,
1584							Dict[
1585							'params' => ArrayRef[Str],
1586							'sprintf' => Str,
1587							],
1588							);
1589	13					30513	my ($self, $definition) = $check->(@_);
1590
1591	13					1042	my $sprintf = $definition->{'sprintf'};
1592	13					28	my $params = $definition->{'params'};
1593
1594	13					36	state $sub_check = Type::Params::compile(
1595							HashRef,
1596							);
1597
1598							return (
1599							sub {
1600	13			13		39	my ($date) = $sub_check->(@_);
1601	85		100			275	my $formatted = sprintf(
			66
			50
1602							$sprintf,
1603							map
1604							{
1605							# Use the value, if available.
1606	13					97	$date->{$_}
1607							//
1608							# Or see if we can determine the value by transforming another field.
1609							$self->transform_token_value(
1610							'target_token' => $_,
1611							'date' => $date,
1612							)
1613							//
1614							# Or see if there is a default value for the field.
1615							$self->{'defaults'}->{$_}
1616							//
1617							# Or just use a value of empty string.
1618							''
1619							}
1620							@$params,
1621							);
1622	13					50	return $formatted;
1623							},
1624	13					1793	);
1625							}
1626
1627							=item prepare_formatter_for_strftime()
1628
1629							Internal method called by L.
1630
1631							=cut
1632
1633							sub prepare_formatter_for_strftime {
1634	8			8	1	19	state $check = Type::Params::compile(
1635							Object,
1636							Dict[
1637							'strftime' => Str,
1638							],
1639							);
1640	8					7167	my ($self, $definition) = $check->(@_);
1641
1642	8					343	my $strftime = $definition->{'strftime'};
1643	8					10	my $format = $strftime;
1644	8					14	my $params = [];
1645
1646							# Preprocess some tokens that expand into other tokens.
1647	8					19	foreach my $preprocess (@$STRPTIME_PREPROCESS) {
1648	64					315	$format =~ s/$preprocess->{'token'}/$preprocess->{'replacement'}/g;
1649							}
1650
1651							# Replace single character tokens with expanded tokens: %Y -> %{year}
1652							$format =~
1653	8					53	s/
1654							(%[-_^]?[%a-zA-Z])
1655							/
1656	52					102	$self->strftime_token_to_internal($1)
1657							/sgex;
1658
1659							# Find all tokens.
1660	8					79	my @tokens = $format =~ m/(%{\w+})/g;
1661
1662							# Replace tokens in order, and build $params list.
1663	8					18	foreach my $token (@tokens) {
1664							# Replace expanded tokens: %{year}
1665	52	50				184	if ($token =~ m/%{(\w+)}/) {
1666	52					65	my $internal = $1;
1667	52		50			111	my $sprintf = $TOKENS->{$internal}->{'sprintf'} //
1668							die "Unable to find sprintf definition for token '$internal'";
1669
1670	52	50				1648	say "Internal token $internal maps to sprintf token '$sprintf'." if $self->{'debug'};
1671	52					541	$format =~ s/\Q$token\E/$sprintf/;
1672	52					51	my $alias;
1673	52	100				109	if (defined($TOKENS->{$internal}->{'storage'})) {
1674	2					3	$alias = $TOKENS->{$internal}->{'storage'};
1675							}
1676	52		66			188	push @$params, ($alias // $internal);
1677							}
1678							}
1679
1680							# Postprocess some tokens that expand into special characters.
1681	8					19	foreach my $postprocess (@$STRFTIME_POSTPROCESS) {
1682	16					85	$format =~ s/$postprocess->{'token'}/$postprocess->{'replacement'}/g;
1683							}
1684
1685	8	50				278	say "Crafted sprintf: $strftime -> $format [" . join(', ', @$params) . "]" if $self->{'debug'};
1686	8					55	return $self->prepare_formatter_for_sprintf(
1687							{
1688							'sprintf' => $format,
1689							'params' => $params,
1690							},
1691							);
1692							}
1693
1694							=item strptime_token_to_regex()
1695
1696							Internal method called by L.
1697
1698							=cut
1699
1700							sub strptime_token_to_regex {
1701	68			68	1	54	state $check = Type::Params::compile(
1702							Object,
1703							Str,
1704							);
1705	68					1087	my ($self, $token) = $check->(@_);
1706
1707	68					540	my $internal;
1708	68	50				2056	say "Attempting to convert strptime token $token into a regex." if $self->{'debug'};
1709	68	50				228	if (defined($self->{'strptime_mappings'}->{$token})) {
		100
1710	0					0	$internal = $self->{'strptime_mappings'}->{$token};
1711							}
1712							elsif (defined($DEFAULT_STRPTIME_MAPPINGS->{$token})) {
1713	65					75	$internal = $DEFAULT_STRPTIME_MAPPINGS->{$token};
1714							}
1715
1716	68	100				93	if (! defined($internal)) {
1717	3	50				8	say "No mapping found" if $self->{'debug'};
1718	3					7	return $token; # Perform no substitution.
1719							}
1720
1721	65	50				131	if (! defined($TOKENS->{$internal}->{'regex'})) {
1722	0					0	die "Unable to find regex definition for token '$internal'";
1723							}
1724	65	50				1668	say "Strptime token $token maps to internal token '$internal'." if $self->{'debug'};
1725
1726	65					330	return $TOKENS->{$internal}->{'regex'};
1727							}
1728
1729							=item strftime_token_to_internal
1730
1731							Internal method called by L.
1732
1733							=cut
1734
1735							sub strftime_token_to_internal {
1736	52			52	1	42	state $check = Type::Params::compile(
1737							Object,
1738							Str,
1739							);
1740	52					1138	my ($self, $token) = $check->(@_);
1741
1742	52					451	my $internal;
1743	52	50				1831	say "Attempting to convert strftime token $token into an internal token." if $self->{'debug'};
1744	52	50				121	if (defined($self->{'strftime_mappings'}->{$token})) {
1745	0					0	$internal = $self->{'strftime_mappings'}->{$token};
1746							}
1747	52	50				173	if (defined($self->{'strptime_mappings'}->{$token})) {
		50
		50
1748	0					0	$internal = $self->{'strptime_mappings'}->{$token};
1749							}
1750							elsif (defined($DEFAULT_STRFTIME_MAPPINGS->{$token})) {
1751	0					0	$internal = $DEFAULT_STRFTIME_MAPPINGS->{$token};
1752							}
1753							elsif (defined($DEFAULT_STRPTIME_MAPPINGS->{$token})) {
1754	52					60	$internal = $DEFAULT_STRPTIME_MAPPINGS->{$token};
1755							}
1756
1757	52	50				76	if (! defined($internal)) {
1758	0	0				0	say "No mapping found" if $self->{'debug'};
1759	0					0	return '%' . $token; # Perform no substitution, but escape token for sprintf.
1760							}
1761
1762	52	50				118	if (! defined($TOKENS->{$internal}->{'sprintf'})) {
1763	0					0	die "Unable to find sprintf definition for token '$internal'";
1764							}
1765	52	50				1589	say "Strftime token $token maps to internal token '$internal'." if $self->{'debug'};
1766
1767	52					260	return '%{' . $internal . '}';
1768							}
1769
1770							=item transform_token_value()
1771
1772							Internal method called by L.
1773
1774							=cut
1775
1776							sub transform_token_value {
1777	8			8	1	16	state $check = Type::Params::compile(
1778							Object,
1779							slurpy Dict[
1780							'target_token' => Str,
1781							'date' => HashRef,
1782							],
1783							);
1784	8					19706	my ($self, $args) = $check->(@_);
1785
1786	8					502	my $target_token = $args->{'target_token'};
1787	8					12	my $date = $args->{'date'};
1788
1789							# Return the value, if it is already set.
1790	8	50				21	return $date->{$target_token} if defined($date->{$target_token});
1791
1792	8					20	foreach my $transformations ($self->{'transformations'}, $DEFAULT_TRANSFORMATIONS) {
1793							# Look up transformations to $target_token from a field that is defined in $date.
1794	15	100				69	if (defined($transformations->{$target_token})) {
1795	6					5	foreach my $source_token (keys %{$transformations->{$target_token}}) {
	6					19
1796	6	50	33			30	if (defined($date->{$source_token}) && defined($transformations->{$target_token}->{$source_token})) {
1797							# Run the transformation and return the value.
1798	6					19	return $transformations->{$target_token}->{$source_token}->($date);
1799							}
1800							}
1801							}
1802							}
1803
1804	2					30	return;
1805							}
1806
1807							=item most_likely_token()
1808
1809							Internal method called by L.
1810
1811							=cut
1812
1813							sub most_likely_token {
1814	148			148	1	120	state $check = Type::Params::compile(
1815							Object,
1816							slurpy Dict[
1817							'already_claimed' => Optional[HashRef],
1818							'possible_tokens' => ArrayRef,
1819							'heuristic' => Optional[Str],
1820							'value' => Str,
1821							'date_string' => Optional[Str],
1822							],
1823							);
1824	148					12327	my ($self, $args) = $check->(@_);
1825
1826	148		50			5890	my $already_claimed = $args->{'already_claimed'} // {};
1827	148					122	my $possible_tokens = $args->{'possible_tokens'};
1828	148		100			289	my $hint = $args->{'heuristic'} // '';
1829	148					106	my $date_part = $args->{'value'};
1830	148		33			195	my $date_string = $args->{'date_string'} // $date_part;
1831
1832	148					181	foreach my $token (@$possible_tokens) {
1833	379	100				499	if ($token eq 'day') {
1834	54	100				82	next if defined($already_claimed->{'day'});
1835	40	50				238	next if ($date_part !~ /^$TOKENS->{$token}->{'regex'}$/);
1836	40	100				79	if ($date_part > 31) {
1837	8					83	warn "Error parsing day: "
1838							. "value '$date_part' out of range ($date_string); "
1839							. "Perhaps you need a different heuristic hint than '$hint'\n";
1840	8					46	return;
1841							}
1842	32					104	return $token;
1843							}
1844	325	100				389	if ($token eq 'month') {
1845	66	100				104	next if defined($already_claimed->{'month'});
1846	48	100				77	next if defined($already_claimed->{'month_abbr'});
1847	35	100				58	next if defined($already_claimed->{'month_name'});
1848	30	50				188	next if ($date_part !~ /^$TOKENS->{$token}->{'regex'}$/);
1849	30	100				53	if ($date_part > 12) {
1850	4					53	warn "Error parsing month: "
1851							. "value '$date_part' out of range ($date_string); "
1852							. "Perhaps you need a different heuristic hint than '$hint'\n";
1853	4					24	return;
1854							}
1855	26					79	return $token;
1856							}
1857	259	100	100			590	if ($token eq 'year' \|\| $token eq 'year_abbr') {
1858	133	100				182	next if defined($already_claimed->{'year'});
1859	97	100				131	next if defined($already_claimed->{'year_abbr'});
1860	69	100				955	next if ($date_part !~ /^$TOKENS->{$token}->{'regex'}$/);
1861	47					134	return $token;
1862							}
1863
1864							# Any other type of token does not need special handling.
1865	126	50				182	next if defined($already_claimed->{$token});
1866	126	100				3861	next if ($date_part !~ /^$TOKENS->{$token}->{'regex'}$/);
1867	29					129	return $token;
1868							}
1869
1870	2	50				4	if ($hint) {
1871	0					0	warn "Error parsing $possible_tokens->[0]: "
1872							. "elements out of order ($date_string); "
1873							. "Perhaps you need a different heuristic hint than '$hint'\n";
1874							}
1875
1876	2					6	return;
1877							}
1878
1879
1880							=item add_parser()
1881
1882							Adds a parser to the active parsers. When parsing a date string,
1883							the parser will be called if each preceeding parser has failed to
1884							parse the date.
1885
1886							See L for generating a parser in the correct
1887							format.
1888
1889							$reformat->add_parser(
1890							$reformat->prepare_parser( ... ),
1891							);
1892
1893							=cut
1894
1895							sub add_parser {
1896	242			242	1	208	state $check = Type::Params::compile(
1897							Object,
1898							slurpy ArrayRef[CodeRef],
1899							);
1900	242					12219	my ($self, $parsers) = $check->(@_);
1901
1902	242					2401	my $count = push @{ $self->{'active_parsers'} }, @$parsers;
	242					427
1903	242	50				529	return $count ? 1 : 0;
1904							}
1905
1906							=item add_formatter()
1907
1908							Adds a formatter to the active formatters. When formatting a date,
1909							the formatter will be called after each preceeding formatter, receiving
1910							as input the output from the previous formatter.
1911
1912							See L for generating a formatter in the correct
1913							format.
1914
1915							$reformat->add_formatter(
1916							$reformat->prepare_formatter( ... ),
1917							);
1918
1919							=cut
1920
1921							sub add_formatter {
1922	20			20	1	48	state $check = Type::Params::compile(
1923							Object,
1924							slurpy ArrayRef[CodeRef],
1925							);
1926	20					20623	my ($self, $formatters) = $check->(@_);
1927
1928	20					444	my $count = push @{ $self->{'active_formatters'} }, @$formatters;
	20					59
1929	20	50				73	return $count ? 1 : 0;
1930							}
1931
1932							=item parse_date()
1933
1934							Given a date string, attempts to parse it via the active parsers.
1935							Returns a hashref containing the tokens that were extracted
1936							from the date string.
1937
1938							my $date_hashref = $reformat->parse_date($date_string);
1939
1940							=cut
1941
1942							sub parse_date {
1943	146			146	1	1542	state $check = Type::Params::compile(
1944							Object,
1945							Str,
1946							);
1947	146					2258	my ($self, $date_string) = $check->(@_);
1948
1949	146					1007	state $has_parser = ArrayRef[CodeRef];
1950	146	50				1606	if (! $has_parser->($self->{'active_parsers'}) ) {
1951	0					0	die "No parsers defined. Have you called add_parser()?";
1952							}
1953
1954	146					1900	foreach my $parser (@{ $self->{'active_parsers'} }) {
	146					222
1955	146					192	my $date = $parser->($date_string);
1956	146	100				472	return $date if defined($date);
1957							}
1958							# None of the parsers were able to extract the date components.
1959	36					57	return;
1960							}
1961
1962							=item format_date()
1963
1964							Given a hashref containing the tokens that were extracted from a
1965							date string, formats the date using each of the active parsers,
1966							passing the output from the previous formatter to the next formatter.
1967
1968							my $date_string = $reformat->format_date($date_hashref);
1969
1970							=cut
1971
1972							sub format_date {
1973	20			20	1	85	state $check = Type::Params::compile(
1974							Object,
1975							Maybe[HashRef],
1976							);
1977	20					6010	my ($self, $date) = $check->(@_);
1978
1979	20	50				384	return if ! defined($date);
1980
1981	20					42	state $has_formatter = ArrayRef[CodeRef];
1982	20	50				1572	if (! $has_formatter->($self->{'active_formatters'}) ) {
1983	0					0	die "No formatters defined. Have you called add_formatter()?";
1984							}
1985
1986	20					322	my @formatted = ($date);
1987	20					23	foreach my $formatter (@{ $self->{'active_formatters'} }) {
	20					44
1988	24					47	@formatted = $formatter->(@formatted);
1989							}
1990	20	50				72	return $formatted[0] if (scalar(@formatted) == 1);
1991	0					0	return @formatted;
1992							}
1993
1994							=item reformat_date()
1995
1996							Given a date string, attempts to parse it and format it using the
1997							active parsers and formaters.
1998
1999							my $date_string = $reformat->reformat_date($date_string);
2000
2001							=cut
2002
2003							sub reformat_date {
2004	5			5	1	35	state $check = Type::Params::compile(
2005							Object,
2006							Str,
2007							);
2008	5					628	my ($self, $date_string) = $check->(@_);
2009
2010	5					71	my $date = $self->parse_date($date_string);
2011	5					17	my @formatted = $self->format_date($date);
2012	5	50				24	return $formatted[0] if (scalar(@formatted) == 1);
2013	0						return @formatted;
2014							};
2015
2016							=back
2017
2018							=cut
2019
2020							1;
2021							__END__