File Coverage

blib/lib/Locale/Maketext.pm
Criterion Covered Total %
statement 328 386 84.9
branch 113 176 64.2
condition 46 100 46.0
subroutine 40 44 90.9
pod 12 19 63.1
total 539 725 74.3


line stmt bran cond sub pod time code
1             package Locale::Maketext;
2 19     19   1975798 use strict;
  19         42  
  19         1000  
3             our $USE_LITERALS;
4 17     17   116 use Carp ();
  17         34  
  17         478  
5 17     17   10156 use I18N::LangTags ();
  17         63267  
  17         594  
6 17     17   8427 use I18N::LangTags::Detect ();
  17         36921  
  17         971  
7              
8             #--------------------------------------------------------------------------
9              
10 17 50   17   128 BEGIN { unless(defined &DEBUG) { *DEBUG = sub () {0} } }
  17         1687  
11             # define the constant 'DEBUG' at compile-time
12              
13             # turn on utf8 if we have it (this is what GutsLoader.pm used to do essentially )
14             # use if (exists $INC{'utf8.pm'} || eval 'use utf8'), 'utf8';
15             BEGIN {
16              
17             # if we have it || we can load it
18 17 50 33 17   116 if ( exists $INC{'utf8.pm'} || eval { local $SIG{'__DIE__'};require utf8; } ) {
  17         133  
  17         7980  
19 17         4692 utf8->import();
20 17         11211 DEBUG and warn " utf8 on for _compile()\n";
21             }
22             else {
23 0         0 DEBUG and warn " utf8 not available for _compile() ($INC{'utf8.pm'})\n$@\n";
24             }
25             }
26              
27              
28             our $VERSION = '1.33';
29             our @ISA = ();
30              
31             our $MATCH_SUPERS = 1;
32             our $MATCH_SUPERS_TIGHTLY = 1;
33             our $USING_LANGUAGE_TAGS = 1;
34             # Turning this off is somewhat of a security risk in that little or no
35             # checking will be done on the legality of tokens passed to the
36             # eval("use $module_name") in _try_use. If you turn this off, you have
37             # to do your own taint checking.
38              
39             $USE_LITERALS = 1 unless defined $USE_LITERALS;
40             # a hint for compiling bracket-notation things.
41              
42             my %isa_scan = ();
43              
44             ###########################################################################
45              
46             sub quant {
47 2     2 1 6 my($handle, $num, @forms) = @_;
48              
49 2 50       6 return $num if @forms == 0; # what should this mean?
50 2 50 33     6 return $forms[2] if @forms > 2 and $num == 0; # special zeroth case
51              
52             # Normal case:
53             # Note that the formatting of $num is preserved.
54 2         9 return( $handle->numf($num) . ' ' . $handle->numerate($num, @forms) );
55             # Most human languages put the number phrase before the qualified phrase.
56             }
57              
58              
59             sub numerate {
60             # return this lexical item in a form appropriate to this number
61 0     0 1 0 my($handle, $num, @forms) = @_;
62 0         0 my $s = ($num == 1);
63              
64 0 0       0 return '' unless @forms;
65 0 0       0 if(@forms == 1) { # only the headword form specified
66 0 0       0 return $s ? $forms[0] : ($forms[0] . 's'); # very cheap hack.
67             }
68             else { # sing and plural were specified
69 0 0       0 return $s ? $forms[0] : $forms[1];
70             }
71             }
72              
73             #--------------------------------------------------------------------------
74              
75             sub numf {
76 2     2 1 5 my($handle, $num) = @_[0,1];
77 2 50 33     10 if($num < 10_000_000_000 and $num > -10_000_000_000 and $num == int($num)) {
      33        
78 2         4 $num += 0; # Just use normal integer stringification.
79             # Specifically, don't let %G turn ten million into 1E+007
80             }
81             else {
82 0         0 $num = CORE::sprintf('%G', $num);
83             # "CORE::" is there to avoid confusion with the above sub sprintf.
84             }
85 2         5 while( $num =~ s/^([-+]?\d+)(\d{3})/$1,$2/s ) {1} # right from perlfaq5
  0         0  
86             # The initial \d+ gobbles as many digits as it can, and then we
87             # backtrack so it un-eats the rightmost three, and then we
88             # insert the comma there.
89              
90 2 50 33     8 $num =~ tr<.,><,.> if ref($handle) and $handle->{'numf_comma'};
91             # This is just a lame hack instead of using Number::Format
92 2         9 return $num;
93             }
94              
95             sub sprintf {
96 17     17   137 no integer;
  17         29  
  17         171  
97 6     6 1 44 my($handle, $format, @params) = @_;
98 6         48 return CORE::sprintf($format, @params);
99             # "CORE::" is there to avoid confusion with myself!
100             }
101              
102             #=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#
103              
104 17     17   1319 use integer; # vroom vroom... applies to the whole rest of the module
  17         57  
  17         86  
105              
106             sub language_tag {
107 0   0 0 1 0 my $it = ref($_[0]) || $_[0];
108 0 0       0 return undef unless $it =~ m/([^':]+)(?:::)?$/s;
109 0         0 $it = lc($1);
110 0         0 $it =~ tr<_><->;
111 0         0 return $it;
112             }
113              
114             sub encoding {
115 0     0 1 0 my $it = $_[0];
116             return(
117 0   0     0 (ref($it) && $it->{'encoding'})
118             || 'iso-8859-1' # Latin-1
119             );
120             }
121              
122             #--------------------------------------------------------------------------
123              
124 17     17 0 63 sub fallback_languages { return('i-default', 'en', 'en-US') }
125              
126 17     17 0 39 sub fallback_language_classes { return () }
127              
128             #--------------------------------------------------------------------------
129              
130             sub fail_with { # an actual attribute method!
131 1     1 1 7 my($handle, @params) = @_;
132 1 50       3 return unless ref($handle);
133 1 50       4 $handle->{'fail'} = $params[0] if @params;
134 1         3 return $handle->{'fail'};
135             }
136              
137             #--------------------------------------------------------------------------
138              
139             sub _exclude {
140 40     40   78 my ( $handle, @methods ) = @_;
141              
142 40 100       161 unless ( defined $handle->{'denylist'} ) {
143 17     17   6115 no strict 'refs';
  17         34  
  17         47167  
144              
145             # Don't let people call methods they're not supposed to from maketext.
146             # Explicitly exclude all methods in this package that start with an
147             # underscore on principle.
148             $handle->{'denylist'} = {
149 408         1186 map { $_ => 1 } (
150             qw/
151             blacklist
152             denylist
153             encoding
154             fail_with
155             failure_handler_auto
156             fallback_language_classes
157             fallback_languages
158             get_handle
159             init
160             language_tag
161             maketext
162             new
163             whitelist
164             allowlist
165 17         66 /, grep { /^_/ } keys %{ __PACKAGE__ . "::" }
  650         1127  
  17         428  
166             ),
167             };
168             }
169              
170 40 100       226 if ( scalar @methods ) {
171 6         13 $handle->{'denylist'} = { %{ $handle->{'denylist'} }, map { $_ => 1 } @methods };
  6         59  
  8         76  
172             }
173              
174 40         113 delete $handle->{'_external_lex_cache'};
175 40         89 return;
176             }
177              
178             sub blacklist {
179 20     20 1 3405 my ( $handle, @methods ) = @_;
180 20         86 _exclude ( $handle, @methods );
181 20         38 return;
182             }
183              
184             sub denylist {
185 20     20 1 3681 my ( $handle, @methods ) = @_;
186 20         75 _exclude ( $handle, @methods );
187 20         36 return;
188             }
189              
190             sub _include {
191 6     6   18 my ( $handle, @methods ) = @_;
192 6 50       37 if ( scalar @methods ) {
193 6 100       21 $handle->{'allowlist'} = {} unless defined $handle->{'allowlist'};
194 6         13 $handle->{'allowlist'} = { %{ $handle->{'allowlist'} }, map { $_ => 1 } @methods };
  6         22  
  6         31  
195             }
196              
197 6         42 delete $handle->{'_external_lex_cache'};
198 6         14 return;
199             }
200              
201             sub whitelist {
202 3     3 1 4375 my ( $handle, @methods ) = @_;
203 3         31 _include ( $handle, @methods );
204 3         6 return;
205             }
206              
207             sub allowlist {
208 3     3 1 5317 my ( $handle, @methods ) = @_;
209 3         11 _include ( $handle, @methods );
210 3         10 return;
211             }
212              
213             #--------------------------------------------------------------------------
214              
215             sub failure_handler_auto {
216             # Meant to be used like:
217             # $handle->fail_with('failure_handler_auto')
218              
219 5     5 1 8 my $handle = shift;
220 5         7 my $phrase = shift;
221              
222 5   100     13 $handle->{'failure_lex'} ||= {};
223 5         8 my $lex = $handle->{'failure_lex'};
224              
225 5   66     33 my $value ||= ($lex->{$phrase} ||= $handle->_compile($phrase));
      33        
226              
227             # Dumbly copied from sub maketext:
228 5 100       15 return ${$value} if ref($value) eq 'SCALAR';
  3         20  
229 2 50       6 return $value if ref($value) ne 'CODE';
230             {
231 2         4 local $SIG{'__DIE__'};
  2         6  
232 2         4 eval { $value = &$value($handle, @_) };
  2         24  
233             }
234             # If we make it here, there was an exception thrown in the
235             # call to $value, and so scream:
236 2 100       13 if($@) {
237             # pretty up the error message
238 1         3 $@ =~ s{\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?}
239             {\n in bracket code [compiled line $1],}s;
240             #$err =~ s/\n?$/\n/s;
241 1         140 Carp::croak "Error in maketexting \"$phrase\":\n$@ as used";
242             # Rather unexpected, but suppose that the sub tried calling
243             # a method that didn't exist.
244             }
245             else {
246 1         7 return $value;
247             }
248             }
249              
250             #==========================================================================
251              
252             sub new {
253             # Nothing fancy!
254 17   33 17 0 142 my $class = ref($_[0]) || $_[0];
255 17         48 my $handle = bless {}, $class;
256 17         102 $handle->blacklist;
257 17         156 $handle->denylist;
258 17         106 $handle->init;
259 17         127 return $handle;
260             }
261              
262 17     17 0 28 sub init { return } # no-op
263              
264             ###########################################################################
265              
266             sub maketext {
267             # Remember, this can fail. Failure is controllable many ways.
268 52 50   52 0 37195 Carp::croak 'maketext requires at least one parameter' unless @_ > 1;
269              
270 52         221 my($handle, $phrase) = splice(@_,0,2);
271 52 50 33     270 Carp::confess('No handle/phrase') unless (defined($handle) && defined($phrase));
272              
273             # backup $@ in case it's still being used in the calling code.
274             # If no failures, we'll re-set it back to what it was later.
275 52         95 my $at = $@;
276              
277             # Copy @_ case one of its elements is $@.
278 52         133 @_ = @_;
279              
280             # Look up the value:
281              
282 52         87 my $value;
283 52 100       1573 if (exists $handle->{'_external_lex_cache'}{$phrase}) {
284 1         3 DEBUG and warn "* Using external lex cache version of \"$phrase\"\n";
285 1         3 $value = $handle->{'_external_lex_cache'}{$phrase};
286             }
287             else {
288 51         147 foreach my $h_r (
289 51 100 33     385 @{ $isa_scan{ref($handle) || $handle} || $handle->_lex_refs }
290             ) {
291 51         103 DEBUG and warn "* Looking up \"$phrase\" in $h_r\n";
292 51 50 66     409 if(exists $h_r->{$phrase}) {
    100          
    100          
    100          
293 11         17 DEBUG and warn " Found \"$phrase\" in $h_r\n";
294 11 50       61 unless(ref($value = $h_r->{$phrase})) {
295             # Nonref means it's not yet compiled. Compile and replace.
296 11 100       33 if ($handle->{'use_external_lex_cache'}) {
297 1         8 $value = $handle->{'_external_lex_cache'}{$phrase} = $handle->_compile($value);
298             }
299             else {
300 10         53 $value = $h_r->{$phrase} = $handle->_compile($value);
301             }
302             }
303 11         35 last;
304             }
305             # extending packages need to be able to localize _AUTO and if readonly can't "local $h_r->{'_AUTO'} = 1;"
306             # but they can "local $handle->{'_external_lex_cache'}{'_AUTO'} = 1;"
307             elsif($phrase !~ m/^_/s and ($handle->{'use_external_lex_cache'} ? ( exists $handle->{'_external_lex_cache'}{'_AUTO'} ? $handle->{'_external_lex_cache'}{'_AUTO'} : $h_r->{'_AUTO'} ) : $h_r->{'_AUTO'})) {
308             # it's an auto lex, and this is an autoable key!
309 36         57 DEBUG and warn " Automaking \"$phrase\" into $h_r\n";
310 36 100       91 if ($handle->{'use_external_lex_cache'}) {
311 33         129 $value = $handle->{'_external_lex_cache'}{$phrase} = $handle->_compile($phrase);
312             }
313             else {
314 3         11 $value = $h_r->{$phrase} = $handle->_compile($phrase);
315             }
316 17         52 last;
317             }
318 4         9 DEBUG>1 and print " Not found in $h_r, nor automakable\n";
319             # else keep looking
320             }
321             }
322              
323 33 100       102 unless(defined($value)) {
324 4         5 DEBUG and warn "! Lookup of \"$phrase\" in/under ", ref($handle) || $handle, " fails.\n";
325 4 50 33     15 if(ref($handle) and $handle->{'fail'}) {
326 4         4 DEBUG and warn "WARNING0: maketext fails looking for <$phrase>\n";
327 4         4 my $fail;
328 4 50       10 if(ref($fail = $handle->{'fail'}) eq 'CODE') { # it's a sub reference
329 0         0 $@ = $at; # Put $@ back in case we altered it along the way.
330 0         0 return &{$fail}($handle, $phrase, @_);
  0         0  
331             # If it ever returns, it should return a good value.
332             }
333             else { # It's a method name
334 4         5 $@ = $at; # Put $@ back in case we altered it along the way.
335 4         15 return $handle->$fail($phrase, @_);
336             # If it ever returns, it should return a good value.
337             }
338             }
339             else {
340             # All we know how to do is this;
341 0         0 Carp::croak("maketext doesn't know how to say:\n$phrase\nas needed");
342             }
343             }
344              
345 29 100       122 if(ref($value) eq 'SCALAR'){
346 6         13 $@ = $at; # Put $@ back in case we altered it along the way.
347 6         76 return $$value ;
348             }
349 23 50       84 if(ref($value) ne 'CODE'){
350 0         0 $@ = $at; # Put $@ back in case we altered it along the way.
351 0         0 return $value ;
352             }
353              
354             {
355 23         41 local $SIG{'__DIE__'};
  23         95  
356 23         54 eval { $value = &$value($handle, @_) };
  23         835  
357             }
358             # If we make it here, there was an exception thrown in the
359             # call to $value, and so scream:
360 23 50       163 if ($@) {
361             # pretty up the error message
362 0         0 $@ =~ s{\s+at\s+\(eval\s+\d+\)\s+line\s+(\d+)\.?\n?}
363             {\n in bracket code [compiled line $1],}s;
364             #$err =~ s/\n?$/\n/s;
365 0         0 Carp::croak "Error in maketexting \"$phrase\":\n$@ as used";
366             # Rather unexpected, but suppose that the sub tried calling
367             # a method that didn't exist.
368             }
369             else {
370 23         47 $@ = $at; # Put $@ back in case we altered it along the way.
371 23         149 return $value;
372             }
373 0         0 $@ = $at; # Put $@ back in case we altered it along the way.
374             }
375              
376             ###########################################################################
377              
378             sub get_handle { # This is a constructor and, yes, it CAN FAIL.
379             # Its class argument has to be the base class for the current
380             # application's l10n files.
381              
382 17     17 0 2356186 my($base_class, @languages) = @_;
383 17   33     135 $base_class = ref($base_class) || $base_class;
384             # Complain if they use __PACKAGE__ as a project base class?
385              
386 17 100       60 if( @languages ) {
387 11         23 DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
388 11 50       47 if($USING_LANGUAGE_TAGS) { # An explicit language-list was given!
389             @languages =
390 11         86 map {; $_, I18N::LangTags::alternate_language_tags($_) }
  11         367  
391             # Catch alternation
392             map I18N::LangTags::locale2language_tag($_),
393             # If it's a lg tag, fine, pass thru (untainted)
394             # If it's a locale ID, try converting to a lg tag (untainted),
395             # otherwise nix it.
396             @languages;
397 11         419 DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
398             }
399             }
400             else {
401 6         27 @languages = $base_class->_ambient_langprefs;
402             }
403              
404 17         1404 @languages = $base_class->_langtag_munging(@languages);
405              
406 17         81 my %seen;
407 17         49 foreach my $module_name ( map { $base_class . '::' . $_ } @languages ) {
  104         249  
408 24 50       170 next unless length $module_name; # sanity
409 24 100 66     204 next if $seen{$module_name}++ # Already been here, and it was no-go
410             || !&_try_use($module_name); # Try to use() it, but can't it.
411 17         201 return($module_name->new); # Make it!
412             }
413              
414 0         0 return undef; # Fail!
415             }
416              
417             ###########################################################################
418              
419             sub _langtag_munging {
420 17     17   62 my($base_class, @languages) = @_;
421              
422             # We have all these DEBUG statements because otherwise it's hard as hell
423             # to diagnose if/when something goes wrong.
424              
425 17         31 DEBUG and warn 'Lgs1: ', map("<$_>", @languages), "\n";
426              
427 17 50       64 if($USING_LANGUAGE_TAGS) {
428 17         33 DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
429 17         100 @languages = $base_class->_add_supers( @languages );
430              
431 17         64 push @languages, I18N::LangTags::panic_languages(@languages);
432 17         490 DEBUG and warn "After adding panic languages:\n",
433             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
434              
435 17         151 push @languages, $base_class->fallback_languages;
436             # You are free to override fallback_languages to return empty-list!
437 17         33 DEBUG and warn 'Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
438              
439             @languages = # final bit of processing to turn them into classname things
440             map {
441 17         52 my $it = $_; # copy
  104         145  
442 104         180 $it =~ tr<-A-Z><_a-z>; # lc, and turn - to _
443 104         135 $it =~ tr<_a-z0-9><>cd; # remove all but a-z0-9_
444 104         212 $it;
445             } @languages
446             ;
447 17         33 DEBUG and warn "Nearing end of munging:\n",
448             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
449             }
450             else {
451 0         0 DEBUG and warn "Bypassing language-tags.\n",
452             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
453             }
454              
455 17         32 DEBUG and warn "Before adding fallback classes:\n",
456             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
457              
458 17         103 push @languages, $base_class->fallback_language_classes;
459             # You are free to override that to return whatever.
460              
461 17         44 DEBUG and warn "Finally:\n",
462             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
463              
464 17         118 return @languages;
465             }
466              
467             ###########################################################################
468              
469             sub _ambient_langprefs {
470 6     6   23 return I18N::LangTags::Detect::detect();
471             }
472              
473             ###########################################################################
474              
475             sub _add_supers {
476 60     60   326142 my($base_class, @languages) = @_;
477              
478 60 50       220 if (!$MATCH_SUPERS) {
    100          
479             # Nothing
480 0         0 DEBUG and warn "Bypassing any super-matching.\n",
481             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
482              
483             }
484             elsif( $MATCH_SUPERS_TIGHTLY ) {
485 42         71 DEBUG and warn "Before adding new supers tightly:\n",
486             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
487 42         128 @languages = I18N::LangTags::implicate_supers( @languages );
488 42         6895 DEBUG and warn "After adding new supers tightly:\n",
489             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
490              
491             }
492             else {
493 18         17 DEBUG and warn "Before adding supers to end:\n",
494             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
495 18         41 @languages = I18N::LangTags::implicate_supers_strictly( @languages );
496 18         1261 DEBUG and warn "After adding supers to end:\n",
497             ' Lgs@', __LINE__, ': ', map("<$_>", @languages), "\n";
498             }
499              
500 60         191 return @languages;
501             }
502              
503             ###########################################################################
504             #
505             # This is where most people should stop reading.
506             #
507             ###########################################################################
508              
509             my %tried = ();
510             # memoization of whether we've used this module, or found it unusable.
511              
512             sub _try_use { # Basically a wrapper around "require Modulename"
513             # "Many men have tried..." "They tried and failed?" "They tried and died."
514 26 100   26   952 return $tried{$_[0]} if exists $tried{$_[0]}; # memoization
515              
516 18         39 my $module = $_[0]; # ASSUME sane module name!
517 17     17   172 { no strict 'refs';
  17         33  
  17         766  
  18         52  
518 17     17   92 no warnings 'once';
  17         27  
  17         4375  
519             return($tried{$module} = 1)
520 18 100 100     40 if %{$module . '::Lexicon'} or @{$module . '::ISA'};
  18         3579  
  7         52  
521             # weird case: we never use'd it, but there it is!
522             }
523              
524 6         8 DEBUG and warn " About to use $module ...\n";
525              
526 6         29 local $SIG{'__DIE__'};
527 6         11 local $@;
528 6         31 local @INC = @INC;
529 6 100       25 pop @INC if $INC[-1] eq '.';
530 6         419 eval "require $module"; # used to be "use $module", but no point in that.
531              
532 6 100       31 if($@) {
533 5         7 DEBUG and warn "Error using $module \: $@\n";
534 5         49 return $tried{$module} = 0;
535             }
536             else {
537 1         1 DEBUG and warn " OK, $module is used\n";
538 1         9 return $tried{$module} = 1;
539             }
540             }
541              
542             #--------------------------------------------------------------------------
543              
544             sub _lex_refs { # report the lexicon references for this handle's class
545             # returns an arrayREF!
546 17     17   111 no strict 'refs';
  17         46  
  17         621  
547 17     17   77 no warnings 'once';
  17         26  
  17         44923  
548 36   66 36   173 my $class = ref($_[0]) || $_[0];
549 36         52 DEBUG and warn "Lex refs lookup on $class\n";
550 36 50       115 return $isa_scan{$class} if exists $isa_scan{$class}; # memoization!
551              
552 36         80 my @lex_refs;
553 36 100       108 my $seen_r = ref($_[1]) ? $_[1] : {};
554              
555 36 100       160 if( defined( *{$class . '::Lexicon'}{'HASH'} )) {
  36         249  
556 18         58 push @lex_refs, *{$class . '::Lexicon'}{'HASH'};
  18         99  
557             DEBUG and warn '%' . $class . '::Lexicon contains ',
558 18         50 scalar(keys %{$class . '::Lexicon'}), " entries\n";
559             }
560              
561             # Implements depth(height?)-first recursive searching of superclasses.
562             # In hindsight, I suppose I could have just used Class::ISA!
563 36         55 foreach my $superclass (@{$class . '::ISA'}) {
  36         137  
564 24         39 DEBUG and warn " Super-class search into $superclass\n";
565 24 50       90 next if $seen_r->{$superclass}++;
566 24         37 push @lex_refs, @{&_lex_refs($superclass, $seen_r)}; # call myself
  24         151  
567             }
568              
569 36         73 $isa_scan{$class} = \@lex_refs; # save for next time
570 36         133 return \@lex_refs;
571             }
572              
573 0     0 0 0 sub clear_isa_scan { %isa_scan = (); return; } # end on a note of simplicity!
  0         0  
574              
575             #--------------------------------------------------------------------------
576              
577             sub _compile {
578             # This big scary routine compiles an entry.
579             # It returns either a coderef if there's brackety bits in this, or
580             # otherwise a ref to a scalar.
581              
582 51     51   213577 my $string_to_compile = $_[1]; # There are taint issues using regex on @_ - perlbug 60378,27344
583              
584             # The while() regex is more expensive than this check on strings that don't need a compile.
585             # this op causes a ~2% speed hit for strings that need compile and a 250% speed improvement
586             # on strings that don't need compiling.
587 51 100       307 return \"$string_to_compile" if($string_to_compile !~ m/[\[~\]]/ms); # return a string ref if chars [~] are not in the string
588              
589 43         91 my $handle = $_[0];
590              
591 43         83 my(@code);
592 43         101 my(@c) = (''); # "chunks" -- scratch.
593 43         69 my $call_count = 0;
594 43         77 my $big_pile = '';
595             {
596 43         67 my $in_group = 0; # start out outside a group
  43         61  
597 43         81 my($m, @params); # scratch
598              
599 43         299 while($string_to_compile =~ # Iterate over chunks.
600             m/(
601             [^\~\[\]]+ # non-~[] stuff (Capture everything else here)
602             |
603             ~. # ~[, ~], ~~, ~other
604             |
605             \[ # [ presumably opening a group
606             |
607             \] # ] presumably closing a group
608             |
609             ~ # terminal ~ ?
610             |
611             $
612             )/xgs
613             ) {
614 162         256 DEBUG>2 and warn qq{ "$1"\n};
615              
616 162 100 100     912 if($1 eq '[' or $1 eq '') { # "[" or end
    100          
    50          
    0          
    0          
    0          
    0          
    0          
617             # Whether this is "[" or end, force processing of any
618             # preceding literal.
619 66 50       241 if($in_group) {
620 0 0       0 if($1 eq '') {
621 0         0 $handle->_die_pointing($string_to_compile, 'Unterminated bracket group');
622             }
623             else {
624 0         0 $handle->_die_pointing($string_to_compile, 'You can\'t nest bracket groups');
625             }
626             }
627             else {
628 66 100       180 if ($1 eq '') {
629 24         47 DEBUG>2 and warn " [end-string]\n";
630             }
631             else {
632 42         79 $in_group = 1;
633             }
634 66 50       214 die "How come \@c is empty?? in <$string_to_compile>" unless @c; # sanity
635 66 100       387 if(length $c[-1]) {
636             # Now actually processing the preceding literal
637 10         19 $big_pile .= $c[-1];
638 10 100 66     68 if($USE_LITERALS and (
639             (ord('A') == 65)
640             ? $c[-1] !~ m/[^\x20-\x7E]/s
641             # ASCII very safe chars
642             : $c[-1] !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
643             # EBCDIC very safe chars
644             )) {
645             # normal case -- all very safe chars
646 7         18 $c[-1] =~ s/'/\\'/g;
647 7         19 push @code, q{ '} . $c[-1] . "',\n";
648 7         38 $c[-1] = ''; # reuse this slot
649             }
650             else {
651 3         14 $c[-1] =~ s/\\\\/\\/g;
652 3         14 push @code, ' $c[' . $#c . "],\n";
653 3         18 push @c, ''; # new chunk
654             }
655             }
656             # else just ignore the empty string.
657             }
658              
659             }
660             elsif($1 eq ']') { # "]"
661             # close group -- go back in-band
662 43 100       109 if($in_group) {
663 42         70 $in_group = 0;
664              
665 42         80 DEBUG>2 and warn " --Closing group [$c[-1]]\n";
666              
667             # And now process the group...
668              
669 42 50 33     243 if(!length($c[-1]) or $c[-1] =~ m/^\s+$/s) {
670 0         0 DEBUG>2 and warn " -- (Ignoring)\n";
671 0         0 $c[-1] = ''; # reset out chink
672 0         0 next;
673             }
674              
675             #$c[-1] =~ s/^\s+//s;
676             #$c[-1] =~ s/\s+$//s;
677 42         183 ($m,@params) = split(/,/, $c[-1], -1); # was /\s*,\s*/
678              
679             # A bit of a hack -- we've turned "~,"'s into DELs, so turn
680             # 'em into real commas here.
681 42         71 if (ord('A') == 65) { # ASCII, etc
682 42         115 foreach($m, @params) { tr/\x7F/,/ }
  73         181  
683             }
684             else { # EBCDIC (1047, 0037, POSIX-BC)
685             # Thanks to Peter Prymmer for the EBCDIC handling
686             foreach($m, @params) { tr/\x07/,/ }
687             }
688              
689             # Special-case handling of some method names:
690 42 100 66     346 if($m eq '_*' or $m =~ m/^_(-?\d+)$/s) {
    100          
    50          
691             # Treat [_1,...] as [,_1,...], etc.
692 5         16 unshift @params, $m;
693 5         12 $m = '';
694             }
695             elsif($m eq '*') {
696 1         2 $m = 'quant'; # "*" for "times": "4 cars" is 4 times "cars"
697             }
698             elsif($m eq '#') {
699 0         0 $m = 'numf'; # "#" for "number": [#,_1] for "the number _1"
700             }
701              
702             # Most common case: a simple, legal-looking method name
703 42 100 33     586 if($m eq '') {
    100 66        
      33        
      66        
      66        
      33        
704             # 0-length method name means to just interpolate:
705 5         13 push @code, ' (';
706             }
707             elsif($m =~ /^\w+$/s
708             && !$handle->{'blacklist'}{$m}
709             && !$handle->{'denylist'}{$m}
710             && ( !defined $handle->{'whitelist'} || $handle->{'whitelist'}{$m} )
711             && ( !defined $handle->{'allowlist'} || $handle->{'allowlist'}{$m} )
712             # exclude anything fancy and restrict to the allowlist/denylist (and historical whitelist/blacklist).
713             ) {
714 19         64 push @code, ' $_[0]->' . $m . '(';
715             }
716             else {
717             # TODO: implement something? or just too icky to consider?
718 18         117 $handle->_die_pointing(
719             $string_to_compile,
720             "Can't use \"$m\" as a method name in bracket group",
721             2 + length($c[-1])
722             );
723             }
724              
725 24         45 pop @c; # we don't need that chunk anymore
726 24         52 ++$call_count;
727              
728 24         54 foreach my $p (@params) {
729 24 50 33     191 if($p eq '_*') {
    100          
    50          
730             # Meaning: all parameters except $_[0]
731 0         0 $code[-1] .= ' @_[1 .. $#_], ';
732             # and yes, that does the right thing for all @_ < 3
733             }
734             elsif($p =~ m/^_(-?\d+)$/s) {
735             # _3 meaning $_[3]
736 10         46 $code[-1] .= '$_[' . (0 + $1) . '], ';
737             }
738             elsif($USE_LITERALS and (
739             (ord('A') == 65)
740             ? $p !~ m/[^\x20-\x7E]/s
741             # ASCII very safe chars
742             : $p !~ m/[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~\x07]/s
743             # EBCDIC very safe chars
744             )) {
745             # Normal case: a literal containing only safe characters
746 14         31 $p =~ s/'/\\'/g;
747 14         70 $code[-1] .= q{'} . $p . q{', };
748             }
749             else {
750             # Stow it on the chunk-stack, and just refer to that.
751 0         0 push @c, $p;
752 0         0 push @code, ' $c[' . $#c . '], ';
753             }
754             }
755 24         56 $code[-1] .= "),\n";
756              
757 24         133 push @c, '';
758             }
759             else {
760 1         7 $handle->_die_pointing($string_to_compile, q{Unbalanced ']'});
761             }
762              
763             }
764             elsif(substr($1,0,1) ne '~') {
765             # it's stuff not containing "~" or "[" or "]"
766             # i.e., a literal blob
767 53         113 my $text = $1;
768 53         184 $text =~ s/\\/\\\\/g;
769 53         281 $c[-1] .= $text;
770              
771             }
772             elsif($1 eq '~~') { # "~~"
773 0         0 $c[-1] .= '~';
774              
775             }
776             elsif($1 eq '~[') { # "~["
777 0         0 $c[-1] .= '[';
778              
779             }
780             elsif($1 eq '~]') { # "~]"
781 0         0 $c[-1] .= ']';
782              
783             }
784             elsif($1 eq '~,') { # "~,"
785 0 0       0 if($in_group) {
786             # This is a hack, based on the assumption that no-one will actually
787             # want a DEL inside a bracket group. Let's hope that's it's true.
788 0         0 if (ord('A') == 65) { # ASCII etc
789 0         0 $c[-1] .= "\x7F";
790             }
791             else { # EBCDIC (cp 1047, 0037, POSIX-BC)
792             $c[-1] .= "\x07";
793             }
794             }
795             else {
796 0         0 $c[-1] .= '~,';
797             }
798              
799             }
800             elsif($1 eq '~') { # possible only at string-end, it seems.
801 0         0 $c[-1] .= '~';
802              
803             }
804             else {
805             # It's a "~X" where X is not a special character.
806             # Consider it a literal ~ and X.
807 0         0 my $text = $1;
808 0         0 $text =~ s/\\/\\\\/g;
809 0         0 $c[-1] .= $text;
810             }
811             }
812             }
813              
814 24 50       107 if($call_count) {
815 24         52 undef $big_pile; # Well, nevermind that.
816             }
817             else {
818             # It's all literals! Ahwell, that can happen.
819             # So don't bother with the eval. Return a SCALAR reference.
820 0         0 return \$big_pile;
821             }
822              
823 24 50 33     119 die q{Last chunk isn't null??} if @c and length $c[-1]; # sanity
824 24         44 DEBUG and warn scalar(@c), " chunks under closure\n";
825 24 50       111 if(@code == 0) { # not possible?
    100          
826 0         0 DEBUG and warn "Empty code\n";
827 0         0 return \'';
828             }
829             elsif(@code > 1) { # most cases, presumably!
830 10         27 unshift @code, "join '',\n";
831             }
832 24         62 unshift @code, "use strict; sub {\n";
833 24         53 push @code, "}\n";
834              
835 24         41 DEBUG and warn @code;
836 24     10   2502 my $sub = eval(join '', @code);
  10     6   85  
  10     6   18  
  10         765  
  6         50  
  6         14  
  6         372  
  6         51  
  6         11  
  6         320  
837 24 50       97 die "$@ while evalling" . join('', @code) if $@; # Should be impossible.
838 24         163 return $sub;
839             }
840              
841             #--------------------------------------------------------------------------
842              
843             sub _die_pointing {
844             # This is used by _compile to throw a fatal error
845 19     19   42 my $target = shift;
846 19   33     64 $target = ref($target) || $target; # class name
847             # ...leaving $_[0] the error-causing text, and $_[1] the error message
848              
849 19         68 my $i = index($_[0], "\n");
850              
851 19         36 my $pointy;
852 19 100       74 my $pos = pos($_[0]) - (defined($_[2]) ? $_[2] : 0) - 1;
853 19 100       85 if($pos < 1) {
854 18         36 $pointy = "^=== near there\n";
855             }
856             else { # we need to space over
857 1         3 my $first_tab = index($_[0], "\t");
858 1 50 33     6 if($pos > 2 and ( -1 == $first_tab or $first_tab > pos($_[0]))) {
      33        
859             # No tabs, or the first tab is harmlessly after where we will point to,
860             # AND we're far enough from the margin that we can draw a proper arrow.
861 1         4 $pointy = ('=' x $pos) . "^ near there\n";
862             }
863             else {
864             # tabs screw everything up!
865 0         0 $pointy = substr($_[0],0,$pos);
866 0         0 $pointy =~ tr/\t //cd;
867             # make everything into whitespace, but preserving tabs
868 0         0 $pointy .= "^=== near there\n";
869             }
870             }
871              
872 19         44 my $errmsg = "$_[1], in\:\n$_[0]";
873              
874 19 50       48 if($i == -1) {
    0          
875             # No newline.
876 19         49 $errmsg .= "\n" . $pointy;
877             }
878             elsif($i == (length($_[0]) - 1) ) {
879             # Already has a newline at end.
880 0         0 $errmsg .= $pointy;
881             }
882             else {
883             # don't bother with the pointy bit, I guess.
884             }
885 19         2976 Carp::croak( "$errmsg via $target, as used" );
886             }
887              
888             1;