File Coverage

blib/lib/String/Errf.pm
Criterion Covered Total %
statement 120 121 99.1
branch 58 64 90.6
condition 15 21 71.4
subroutine 30 31 96.7
pod 1 6 16.6
total 224 243 92.1


line stmt bran cond sub pod time code
1 2     2   38699 use strict;
  2         3  
  2         46  
2 2     2   6 use warnings;
  2         1  
  2         77  
3             package String::Errf; # I really wanted to call it String::Fister.
4             $String::Errf::VERSION = '0.008';
5 2     2   775 use String::Formatter 0.102081 ();
  2         20586  
  2         50  
6 2     2   776 use parent 'String::Formatter';
  2         415  
  2         6  
7             # ABSTRACT: a simple sprintf-like dialect
8              
9 2     2   80 use Scalar::Util ();
  2         2  
  2         85  
10              
11             #pod =head1 SYNOPSIS
12             #pod
13             #pod use String::Errf qw(errf);
14             #pod
15             #pod print errf "This process was started at %{start}t with %{args;argument}n.\n",
16             #pod { start => $^T, args => 0 + @ARGV };
17             #pod
18             #pod ...might print something like:
19             #pod
20             #pod This process was started at 2010-10-17 14:05:29 with 0 arguments.
21             #pod
22             #pod =head1 DESCRIPTION
23             #pod
24             #pod String::Errf provides C, a simple string formatter that works something
25             #pod like C>. It is implemented using
26             #pod L and L. Their documentation may be useful
27             #pod in understanding or extending String::Errf. The C subroutine is only
28             #pod available when imported. Calling L will not do what you
29             #pod want.
30             #pod
31             #pod =head1 DIFFERENCES FROM SPRINTF
32             #pod
33             #pod The data passed to C should be organized in a single hashref, not a list.
34             #pod
35             #pod Formatting codes require named parameters, and the available codes are
36             #pod different. See L below.
37             #pod
38             #pod As with most String::Formatter formatters, C<%> is not a format code. If you
39             #pod want a literal C<%>, do not put anything between the two percent signs, just
40             #pod write C<%%>.
41             #pod
42             #pod =head2 UNDEF HANDLING
43             #pod
44             #pod By default, formatting codes tend to treat C like Perl does: coercing it
45             #pod to an empty string or zero. This was a bad initial decision and will probably
46             #pod change. A C handler can be provided when importing C to setup
47             #pod a callback for how undefs should be handled. These two possibilities seem
48             #pod useful:
49             #pod
50             #pod # Very lax; undefs always turn into the same string:
51             #pod use String::Errf errf => { on_undef => sub { '(undef)' } };
52             #pod
53             #pod # Strict; undefs are never valid:
54             #pod use String::Errf errf => { on_undef => sub {
55             #pod Carp::croak("undef passed to $_[1]{literal}") } };
56             #pod } };
57             #pod
58             #pod =head2 FORMATTING CODES
59             #pod
60             #pod C formatting codes I a set of arguments between the C<%> and the
61             #pod formatting code letter. These arguments are placed in curly braces and
62             #pod separated by semicolons. The first argument is the name of the data to look
63             #pod for in the format data. For example, this is a valid use of C:
64             #pod
65             #pod errf "The current time in %{tz}s is %{now;local}t.", {
66             #pod tz => $ENV{TZ},
67             #pod now => time,
68             #pod };
69             #pod
70             #pod The second argument, if present, may be a compact form for multiple named
71             #pod arguments. The rest of the arguments will be named values in the form
72             #pod C. The examples below should help clarify how arguments are
73             #pod passed. When an argument appears in both a compact and named form, the named
74             #pod form trumps the compact form.
75             #pod
76             #pod The specific codes and their arguments are:
77             #pod
78             #pod =head3 s for string
79             #pod
80             #pod The C format code is for any string, and takes no arguments. It just
81             #pod includes the named item from the input data.
82             #pod
83             #pod errf "%{name}s", { name => 'John Smith' }; # returns "John Smith"
84             #pod
85             #pod Remember, C does I have any of the left- or right-padding formatting
86             #pod that C provides. It is not meant for building tables, only strings.
87             #pod
88             #pod =head3 i for integer
89             #pod
90             #pod The C format code is used for integers. It takes one optional argument,
91             #pod C, which defaults to the empty string. C may be given as the
92             #pod compact argument, standing alone. C is used to prefix non-negative
93             #pod integers. It may only be a plus sign.
94             #pod
95             #pod errf "%{x}i", { x => 10 }; # returns "10"
96             #pod errf "%{x;+}i", { x => 10 }; # returns "+10"
97             #pod
98             #pod errf "%{x;prefix=+}i", { x => 10 }; # returns "+10"
99             #pod
100             #pod The rounding behavior for non-integer values I.
101             #pod
102             #pod =head3 f for float (or fractional)
103             #pod
104             #pod The C format code is for numbers with sub-integer precision. It works just
105             #pod like C, but adds a C argument which specifies how many decimal
106             #pod places of precision to display. The compact argument may be just the prefix or
107             #pod the prefix followed by a period followed by the precision.
108             #pod
109             #pod errf "%{x}f", { x => 10.1234 }; # returns "10";
110             #pod errf "%{x;+}f", { x => 10.1234 }; # returns "+10";
111             #pod
112             #pod errf "%{x;.2}f", { x => 10.1234 }; # returns "10.12";
113             #pod errf "%{x;+.2}f", { x => 10.1234 }; # returns "+10.12";
114             #pod
115             #pod errf "%{x;precision=.2}f", { x => 10.1234 }; # returns "10.12";
116             #pod errf "%{x;prefix=+;precision=.2}f", { x => 10.1234 }; # returns "+10.12";
117             #pod
118             #pod =head3 t for time
119             #pod
120             #pod The C format code is used to format timestamps provided in epoch seconds.
121             #pod It can be given two arguments: C and C.
122             #pod
123             #pod C can be either date, time, or datetime, and indicates what part of the
124             #pod timestamp should be displayed. The default is datetime. C requests that
125             #pod the timestamp be displayed in either UTC or the local time zone. The default
126             #pod is local.
127             #pod
128             #pod The compact form is just C alone.
129             #pod
130             #pod # Assuming our local time zone is America/New_York...
131             #pod
132             #pod errf "%{x}t", { x => 1280530906 }; # "2010-07-30 19:01:46"
133             #pod errf "%{x;type=date}t", { x => 1280530906 }; # "2010-07-30"
134             #pod errf "%{x;type=time}t", { x => 1280530906 }; # "19:01:46"
135             #pod errf "%{x;type=datetime}t", { x => 1280530906 }; # "2010-07-30 19:01:46"
136             #pod
137             #pod errf "%{x;tz=UTC}t", { x => 1280530906 }; # "2010-07-30 23:01:46 UTC"
138             #pod errf "%{x;tz=UTC;type=date}t", { x => 1280530906 }; # "2010-07-30 UTC"
139             #pod errf "%{x;tz=UTC;type=time}t", { x => 1280530906 }; # "23:01:46 UTC"
140             #pod errf "%{x;tz=UTC;type=datetime}t", { x => 1280530906 }; # "2010-07-30 23:01:46 UTC"
141             #pod
142             #pod =head3 n and N for numbered
143             #pod
144             #pod The C and C format codes are for picking words based on number. It takes
145             #pod two of its own arguments, C and C, as well as C and
146             #pod C which may be used for formatting the number itself.
147             #pod
148             #pod If the value being formatted is 1, the singular word is used. Otherwise, the
149             #pod plural form is used.
150             #pod
151             #pod errf "%{x;singular=dog;plural=dogs}n", { x => 0 }; # 0 dogs
152             #pod errf "%{x;singular=dog;plural=dogs}n", { x => 1 }; # 1 dog
153             #pod errf "%{x;singular=dog;plural=dogs}n", { x => 2 }; # 2 dogs
154             #pod
155             #pod errf "%{x;singular=dog;plural=dogs}n", { x => 1.4 }; # 1.4 dogs
156             #pod errf "%{x;singular=dog;plural=dogs;precision=1}n", { x => 1.4 }; # 1.4 dogs
157             #pod errf "%{x;singular=dog;plural=dogs;precision=0}n", { x => 1.4 }; # 1 dog
158             #pod
159             #pod If C is used instead of C, the number will not be included, only the
160             #pod chosen word.
161             #pod
162             #pod errf "%{x;singular=is;plural=are}N", { x => 0 }; # are
163             #pod errf "%{x;singular=is;plural=are}N", { x => 1 }; # is
164             #pod errf "%{x;singular=is;plural=are}N", { x => 2 }; # are
165             #pod
166             #pod errf "%{x;singular=is;plural=are}N", { x => 1.4 }; # 1.4 are
167             #pod errf "%{x;singular=is;plural=are;precision=1}N", { x => 1.4 }; # 1.4 are
168             #pod errf "%{x;singular=is;plural=are;precision=0}N", { x => 1.4 }; # 1 is
169             #pod
170             #pod The compact form may take any of the following forms:
171             #pod
172             #pod word - equivalent to singular=word
173             #pod
174             #pod word+suffix - equivalent to singular=word;plural=wordsuffix
175             #pod
176             #pod word1/word2 - equivalent to singular=word;plural=word2
177             #pod
178             #pod If no singular form is given, an exception is thrown. If no plural form is
179             #pod given, one will be generated according to some basic rules of English
180             #pod noun orthography.
181             #pod
182             #pod =head3
183             #pod
184             #pod =cut
185              
186 2     2   5 use Carp ();
  2         2  
  2         45  
187 2     2   972 use Time::Piece ();
  2         13481  
  2         37  
188 2     2   8 use Params::Util ();
  2         2  
  2         131  
189              
190             use Sub::Exporter -setup => {
191             exports => {
192             errf => sub {
193 4         219 my ($class, $name, $arg) = @_;
194 4         6 my $fmt = $class->new($arg);
195 4     72   13 return sub { $fmt->format(@_) };
  72         23683  
196             },
197             }
198 2     2   6 };
  2         2  
  2         17  
199              
200             sub new {
201 4     4 1 4 my $class = shift;
202 4         14 my $self = $class->SUPER::new(@_);
203 4         23 my $arg = shift;
204 4         5 $self->{'String::Errf'} = {};
205 4 100 33     13 if ($arg && $arg->{on_undef}) {
206 2         3 $self->{'String::Errf'}{on_undef} = $arg->{on_undef};
207             }
208              
209 4         4 return $self;
210             }
211              
212             sub default_codes {
213             return {
214 4     4 0 24 i => '_format_int',
215             f => '_format_float',
216             t => '_format_timestamp',
217             s => '_format_string',
218             n => '_format_numbered',
219             N => '_format_numbered',
220             };
221             }
222              
223 4     4 0 41 sub default_input_processor { 'require_named_input' }
224 4     4 0 35 sub default_format_hunker { '__hunk_errf' }
225 4     4 0 23 sub default_string_replacer { '__replace_errf' }
226 4     4 0 20 sub default_hunk_formatter { '__format_errf' }
227              
228             my $regex = qr/
229             (% # leading '%'
230             (?:{ # {
231             ([^;]*?) # mandatory argument name
232             (?: ; ([^\}]*?) )? # optional extras after semicolon
233             }) # }
234             ($|.) # potential conversion character
235             )
236             /xi;
237              
238             sub __hunk_errf {
239 72     72   379 my ($self, $string) = @_;
240              
241 72         54 my @to_fmt;
242 72         62 my $pos = 0;
243              
244 72         668 while ($string =~ m{\G(.*?)$regex}gs) {
245 80         350 push @to_fmt, $1, {
246             literal => $2,
247             argument => $3,
248             extra => $4,
249             conversion => $5,
250             };
251              
252 80         290 $pos = pos $string;
253             }
254              
255 72 100       115 push @to_fmt, substr $string, $pos if $pos < length $string;
256              
257 72         143 return \@to_fmt;
258             }
259              
260             sub __replace_errf {
261 72     72   692 my ($self, $hunks, $input) = @_;
262              
263 72         63 my $heap = {};
264 72         118 my $code = $self->codes;
265              
266 72         178 for my $i (grep { ref $hunks->[$_] } 0 .. $#$hunks) {
  161         268  
267 76         63 my $hunk = $hunks->[ $i ];
268 76         93 my $conv = $code->{ $hunk->{conversion} };
269              
270 76 100       400 Carp::croak("Unknown conversion in stringf: $hunk->{conversion}")
271             unless defined $conv;
272              
273 74         80 $hunk->{replacement} = $input->{ $hunk->{argument} };
274 74 100       295 $hunk->{args} = [ $hunk->{extra} ? split /;/, $hunk->{extra} : () ];
275             }
276             }
277              
278             sub __format_errf {
279 74     74   352 my ($self, $hunk) = @_;
280              
281 74         94 my $conv = $self->codes->{ $hunk->{conversion} };
282              
283 74 50       187 Carp::croak("Unknown conversion in stringf: $hunk->{conversion}")
284             unless defined $conv;
285              
286 74 100 66     140 if (
287             ! defined $hunk->{replacement}
288             && (my $on_undef = $self->{'String::Errf'}{on_undef})
289             ) {
290 2         5 return $self->$on_undef($hunk);
291             }
292              
293 72         146 return $self->$conv($hunk->{replacement}, $hunk->{args}, $hunk);
294             }
295              
296             sub _proc_args {
297 105     105   84 my ($self, $input, $parse_compact) = @_;
298              
299 105 100       186 return $input if ref $input eq 'HASH';
300              
301             $parse_compact ||= sub {
302 0     0   0 Carp::croak("no compact format allowed, but compact format found");
303 69   50     83 };
304              
305 69         94 my @args = @$input;
306              
307 69 100 100     344 my $first = (defined $args[0] and length $args[0] and $args[0] !~ /=/)
308             ? shift @args
309             : undef;
310              
311             my %param = (
312 53         65 ($first ? %{ $parse_compact->($first) } : ()),
313 69 100       83 (map {; split /=/, $_, 2 } @args),
  25         71  
314             );
315              
316 69         140 return \%param;
317             }
318              
319             # Likely integer formatting options are:
320             # prefix (+ for positive numbers)
321             #
322             # Other options like (minwidth, precision, fillchar) are not out of the
323             # question, but if this system is to be used for formatting simple
324             # user-oriented error messages, they seem really unlikely to be used. Put off
325             # supplying them! -- rjbs, 2010-07-30
326             sub _format_int {
327 6     6   6 my ($self, $value, $rest) = @_;
328              
329             my $arg = $self->_proc_args($rest, sub {
330 3 50   3   15 return { prefix => $_[0] eq '+' ? '+' : '', }
331 6         19 });
332              
333 6         12 my $int_value = int $value;
334 6 50       9 $value = sprintf '%.0f', $value unless $int_value == $value;
335              
336 6 100       12 return $value if $value < 0;
337              
338 4 100       7 $arg->{prefix} = '' unless defined $arg->{prefix};
339              
340 4         17 return "$arg->{prefix}$value";
341             }
342              
343              
344             # Likely float formatting options are:
345             # prefix (+ for positive numbers)
346             # precision
347             #
348             # My remarks above for "int" go for floats, too. -- rjbs, 2010-07-30
349             sub _format_float {
350 54     54   45 my ($self, $value, $rest) = @_;
351              
352             my $arg = $self->_proc_args($rest, sub {
353 12     12   41 my ($prefix_str, $prec) = $_[0] =~ /\A(\+?)(?:\.(\d+))?\z/;
354 12         57 return { prefix => $prefix_str, precision => $prec };
355 54         123 });
356              
357             undef $arg->{precision}
358 54 100 66     177 unless defined $arg->{precision} and length $arg->{precision};
359              
360 54 100       84 $arg->{prefix} = '' unless defined $arg->{prefix};
361              
362             $value = defined $arg->{precision}
363 54 100       125 ? sprintf("%0.$arg->{precision}f", $value)
364             : $value;
365              
366 54 100       180 return $value < 0 ? $value : "$arg->{prefix}$value";
367             }
368              
369             sub _format_timestamp {
370 9     9   9 my ($self, $value, $rest) = @_;
371              
372             my $arg = $self->_proc_args($rest, sub {
373 6     6   19 return { type => $_[0] };
374 9         25 });
375              
376 9   100     32 my $type = $arg->{type} || 'datetime';
377 9   100     19 my $zone = $arg->{tz} || 'local';
378              
379 9 50       18 my $format = $type eq 'datetime' ? '%Y-%m-%d %H:%M:%S'
    100          
    100          
380             : $type eq 'date' ? '%Y-%m-%d'
381             : $type eq 'time' ? '%H:%M:%S'
382             : Carp::croak("unknown format type for %t: $type");
383              
384             # Supplying a time zone is *strictly informational*. -- rjbs, 2010-10-15
385 9 50 66     21 Carp::croak("illegal time zone for %t: $zone")
386             unless $zone eq 'local' or $zone eq 'UTC';
387              
388 9 100       9 my $method = $zone eq 'UTC' ? 'gmtime' : 'localtime';
389 9         28 my $piece = Time::Piece->$method($value);
390              
391 9         325 my $str = $piece->strftime($format);
392              
393 9 100       244 return $zone eq 'UTC' ? "$str UTC" : $str;
394             }
395              
396             sub _format_string {
397 3     3   3 my ($self, $value, $rest) = @_;
398 3         7 return $value;
399             }
400              
401             sub _pluralize {
402 29     29   23 my ($singular) = @_;
403              
404 29 100       105 return $singular =~ /(?:[xzs]|sh|ch)\z/ ? "${singular}es"
    100          
405             : $singular =~ s/y\z/ies/ ? $singular
406             : "${singular}s";
407             }
408              
409             sub _format_numbered {
410 36     36   35 my ($self, $value, $rest, $hunk) = @_;
411              
412             my $arg = $self->_proc_args($rest, sub {
413 32     32   31 my ($word) = @_;
414              
415 32         127 my ($singular, $divider, $extra) = $word =~ m{\A(.+?)(?: ([/+]) (.+) )?\z}x;
416              
417 32 100       50 $divider = '' unless defined $divider; # just to avoid warnings
418              
419 32 100       87 my $plural = $divider eq '/' ? $extra
    100          
420             : $divider eq '+' ? "$singular$extra"
421             : _pluralize($singular);
422              
423 32         141 return { singular => $singular, plural => $plural };
424 36         115 });
425              
426             $value = $self->_format_float($value, {
427             prefix => $arg->{prefix},
428             precision => $arg->{precision},
429 36         152 });
430              
431             Carp::croak("no word given to number-based formatter")
432 36 50       62 unless defined $arg->{singular};
433              
434 36 100       52 $arg->{plural} = _pluralize($arg->{singular}) unless defined $arg->{plural};
435              
436 36 100       64 my $formed = abs($value) == 1 ? $arg->{singular} : $arg->{plural};
437              
438 36 100       100 return $formed if $hunk->{conversion} eq 'N';
439 18         66 return "$value $formed";
440             }
441              
442             1;
443              
444             __END__