File Coverage

blib/lib/OldUTF8.pm
Criterion Covered Total %
statement 64 126 50.7
branch 23 72 31.9
condition 0 3 0.0
subroutine 9 11 81.8
pod 0 2 0.0
total 96 214 44.8


line stmt bran cond sub pod time code
1             package OldUTF8;
2 306     306   235073 use strict;
  306         2230  
  306         10595  
3             ######################################################################
4             #
5             # OldUTF8 - Source code filter to escape old UTF-8 script
6             #
7             # http://search.cpan.org/dist/Char-OldUTF8/
8             #
9             # Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2018, 2019 INABA Hitoshi
10             ######################################################################
11              
12 306     306   6447 use 5.00503; # Galapagos Consensus 1998 for primetools
  306         9214  
13             # use 5.008001; # Lancaster Consensus 2013 for toolchains
14              
15             # 12.3. Delaying use Until Runtime
16             # in Chapter 12. Packages, Libraries, and Modules
17             # of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
18             # (and so on)
19              
20             # Version numbers should be boring
21             # http://www.dagolden.com/index.php/369/version-numbers-should-be-boring/
22             # For the impatient, the disinterested or those who just want to follow
23             # a recipe, my advice for all modules is this:
24             # our $VERSION = "0.001"; # or "0.001_001" for a dev release
25             # $VERSION = CORE::eval $VERSION; # No!! because '1.10' makes '1.1'
26              
27 306     306   1839 use vars qw($VERSION);
  306         1744  
  306         52562  
28             $VERSION = '1.13';
29             $VERSION = $VERSION;
30              
31             BEGIN {
32 306 50   306   4219 if ($^X =~ / jperl /oxmsi) {
33 0         0 die __FILE__, ": needs perl(not jperl) 5.00503 or later. (\$^X==$^X)\n";
34             }
35 306         953 if (CORE::ord('A') == 193) {
36             die __FILE__, ": is not US-ASCII script (may be EBCDIC or EBCDIK script).\n";
37             }
38 306         36995 if (CORE::ord('A') != 0x41) {
39             die __FILE__, ": is not US-ASCII script (must be US-ASCII script).\n";
40             }
41             }
42              
43             BEGIN {
44 306     306   3183 (my $dirname = __FILE__) =~ s{^(.+)/[^/]*$}{$1};
45 306         1011 unshift @INC, $dirname;
46 306         782000 CORE::require Eoldutf8;
47             }
48              
49             # instead of Symbol.pm
50 918         3426 BEGIN {
51             sub gensym () {
52 0 50   918 0 0 if ($] < 5.006) {
53 0         0 return \do { local *_ };
  918         2309  
54             }
55             else {
56 306         18985 return undef;
57             }
58             }
59             }
60              
61             # P.714 29.2.39. flock
62             # in Chapter 29: Functions
63             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
64              
65             # P.863 flock
66             # in Chapter 27: Functions
67             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
68              
69             # P.228 Inlining Constant Functions
70             # in Chapter 6: Subroutines
71             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
72              
73             # P.331 Inlining Constant Functions
74             # in Chapter 7: Subroutines
75             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
76              
77             sub LOCK_SH() {1}
78             sub LOCK_EX() {2}
79             sub LOCK_UN() {8}
80             sub LOCK_NB() {4}
81              
82       0     sub unimport {}
83             sub OldUTF8::escape_script;
84              
85             # 6.18. Matching Multiple-Byte Characters
86             # in Chapter 6. Pattern Matching
87             # of ISBN 978-1-56592-243-3 Perl Perl Cookbook.
88             # (and so on)
89              
90             # regexp of character
91             my $qq_char = qr/(?> \\c[\x40-\x5F] | \\? (?:(?:[\xC0-\xDF]|[\xE0-\xEF][\x80-\xBF]|[\xF0-\xF4][\x80-\xBF][\x80-\xBF])[\x80-\xBF] | [\x00-\xFF]) )/oxms;
92             my $q_char = qr/(?> (?:[\xC0-\xDF]|[\xE0-\xEF][\x80-\xBF]|[\xF0-\xF4][\x80-\xBF][\x80-\xBF])[\x80-\xBF] | [\x00-\xFF] )/oxms;
93              
94             # when this script is main program
95             if ($0 eq __FILE__) {
96              
97             # show usage
98             unless (@ARGV) {
99             die <
100             $0: usage
101              
102             perl $0 old UTF-8_script.pl > Escaped_script.pl.e
103             END
104             }
105              
106             print OldUTF8::escape_script($ARGV[0]);
107             exit 0;
108             }
109              
110             my($package,$filename,$line,$subroutine,$hasargs,$wantarray,$evaltext,$is_require,$hints,$bitmask) = caller 0;
111              
112             # called any package not main
113             if ($package ne 'main') {
114             die <
115             @{[__FILE__]}: escape by manually command '$^X @{[__FILE__]} "$filename" > "@{[__PACKAGE__]}::$filename"'
116             and rewrite "use $package;" to "use @{[__PACKAGE__]}::$package;" of script "$0".
117             END
118             }
119              
120             # P.302 Module Privacy and the Exporter
121             # in Chapter 11: Modules
122             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
123             #
124             # A module can do anything it jolly well pleases when it's used, since use just
125             # calls the ordinary import method for the module, and you can define that
126             # method to do anything you like.
127              
128             # P.406 Module Privacy and the Exporter
129             # in Chapter 11: Modules
130             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
131             #
132             # A module can do anything it jolly well pleases when it's used, since use just
133             # calls the ordinary import method for the module, and you can define that
134             # method to do anything you like.
135              
136             sub import {
137              
138 0 50   306   0 if (-e("$filename.e")) {
139 0 0       0 if (exists $ENV{'CHAR_DEBUG'}) {
    0          
140 0         0 unlink "$filename.e";
141             }
142             elsif (-z("$filename.e")) {
143 0         0 unlink "$filename.e";
144             }
145             else {
146              
147             #----------------------------------------------------
148             # older >
149             # newer >>>>>
150             #----------------------------------------------------
151             # Filter >
152             # Source >>>>>
153             # Escape >>> needs re-escape (Source was changed)
154             #
155             # Filter >>>
156             # Source >>>>>
157             # Escape > needs re-escape (Source was changed)
158             #
159             # Filter >>>>>
160             # Source >>>
161             # Escape > needs re-escape (Source was changed)
162             #
163             # Filter >>>>>
164             # Source >
165             # Escape >>> needs re-escape (Filter was changed)
166             #
167             # Filter >
168             # Source >>>
169             # Escape >>>>> executable without re-escape
170             #
171             # Filter >>>
172             # Source >
173             # Escape >>>>> executable without re-escape
174             #----------------------------------------------------
175              
176 0         0 my $mtime_filter = (stat(__FILE__ ))[9];
177 0         0 my $mtime_source = (stat($filename ))[9];
178 0         0 my $mtime_escape = (stat("$filename.e"))[9];
179 0 0 0     0 if (($mtime_escape < $mtime_source) or ($mtime_escape < $mtime_filter)) {
180 306         3519 unlink "$filename.e";
181             }
182             }
183             }
184              
185 306 50       978 if (not -e("$filename.e")) {
186 306         1626 my $fh = gensym();
187 306 50       3155 Eoldutf8::_open_a($fh, "$filename.e") or die __FILE__, ": Can't write open file: $filename.e\n";
188              
189             # 7.19. Flushing Output
190             # in Chapter 7. File Access
191             # of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
192              
193 306         1244 select((select($fh), $|=1)[0]);
194              
195 0 50       0 if (0) {
196             }
197 0         0 elsif (exists $ENV{'CHAR_NONBLOCK'}) {
198              
199             # P.419 File Locking
200             # in Chapter 16: Interprocess Communication
201             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
202              
203             # P.524 File Locking
204             # in Chapter 15: Interprocess Communication
205             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
206              
207             # P.571 Handling Race Conditions
208             # in Chapter 23: Security
209             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
210              
211             # P.663 Handling Race Conditions
212             # in Chapter 20: Security
213             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
214              
215             # (and so on)
216              
217 0         0 CORE::eval q{ flock($fh, LOCK_EX | LOCK_NB) };
218 0 0       0 if ($@) {
219 306         20946 die __FILE__, ": Can't immediately write-lock the file: $filename.e\n";
220             }
221             }
222             else {
223 306         15339 CORE::eval q{ flock($fh, LOCK_EX) };
224             }
225              
226 306         4247 CORE::eval q{ truncate($fh, 0) };
227 306 50       8497 seek($fh, 0, 0) or die __FILE__, ": Can't seek file: $filename.e\n";
228              
229 306         1353 my $e_script = OldUTF8::escape_script($filename);
230 306         25821 print {$fh} $e_script;
  306         10746  
231              
232 306         8836 my $mode = (stat($filename))[2] & 0777;
233 306         107242 chmod $mode, "$filename.e";
234              
235 306 50       1949 close($fh) or die "Can't close file: $filename.e: $!";
236             }
237              
238 306         2255 my $fh = gensym();
239 306 50       2823 Eoldutf8::_open_r($fh, "$filename.e") or die __FILE__, ": Can't read open file: $filename.e\n";
240              
241 0 50       0 if (0) {
242             }
243 0         0 elsif (exists $ENV{'CHAR_NONBLOCK'}) {
244 0         0 CORE::eval q{ flock($fh, LOCK_SH | LOCK_NB) };
245 0 0       0 if ($@) {
246 306         27409 die __FILE__, ": Can't immediately read-lock the file: $filename.e\n";
247             }
248             }
249             else {
250 306         1733 CORE::eval q{ flock($fh, LOCK_SH) };
251             }
252              
253 306         1881 my @switch = ();
254 0 50       0 if ($^W) {
255 306         1622 push @switch, '-w';
256             }
257 0 50       0 if (defined $^I) {
258 0         0 push @switch, '-i' . $^I;
259 306         704 undef $^I;
260             }
261              
262             # P.707 29.2.33. exec
263             # in Chapter 29: Functions
264             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
265             #
266             # If there is more than one argument in LIST, or if LIST is an array with more
267             # than one value, the system shell will never be used. This also bypasses any
268             # shell processing of the command. The presence or absence of metacharacters in
269             # the arguments doesn't affect this list-triggered behavior, which makes it the
270             # preferred from in security-conscious programs that do not with to expose
271             # themselves to potential shell escapes.
272             # Environment variable PERL5SHELL(Microsoft ports only) will never be used, too.
273              
274             # P.855 exec
275             # in Chapter 27: Functions
276             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
277             #
278             # If there is more than one argument in LIST, or if LIST is an array with more
279             # than one value, the system shell will never be used. This also bypasses any
280             # shell processing of the command. The presence or absence of metacharacters in
281             # the arguments doesn't affect this list-triggered behavior, which makes it the
282             # preferred from in security-conscious programs that do not wish to expose
283             # themselves to injection attacks via shell escapes.
284             # Environment variable PERL5SHELL(Microsoft ports only) will never be used, too.
285              
286             # P.489 #! and Quoting on Non-Unix Systems
287             # in Chapter 19: The Command-Line Interface
288             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
289              
290             # P.578 #! and Quoting on Non-Unix Systems
291             # in Chapter 17: The Command-Line Interface
292             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
293              
294 306         2442 my $system = 0;
295              
296             # DOS-like system
297 0 50       0 if ($^O =~ /\A (?: MSWin32 | NetWare | symbian | dos ) \z/oxms) {
298             $system = Eoldutf8::_systemx(
299             _escapeshellcmd_MSWin32($^X),
300              
301             # -I switch can not treat space included path
302             # (map { '-I' . _escapeshellcmd_MSWin32($_) } @INC),
303 0         0 (map { '-I' . $_ } @INC),
304              
305             @switch,
306             '--',
307 0         0 map { _escapeshellcmd_MSWin32($_) } "$filename.e", @ARGV
  306         1345  
308             );
309             }
310              
311             # UNIX-like system
312             else {
313             $system = Eoldutf8::_systemx(
314             _escapeshellcmd($^X),
315 306         882 (map { '-I' . _escapeshellcmd($_) } @INC),
316             @switch,
317             '--',
318 3672         5783 map { _escapeshellcmd($_) } "$filename.e", @ARGV
  306         567366  
319             );
320             }
321              
322             # exit with actual exit value
323 0         0 exit($system >> 8);
324             }
325              
326             # escape shell command line on DOS-like system
327             sub _escapeshellcmd_MSWin32 {
328 0     0   0 my($word) = @_;
329 0 0       0 if ($word =~ / [ ] /oxms) {
330 0         0 return qq{"$word"};
331             }
332             else {
333 4284         9002 return $word;
334             }
335             }
336              
337             # escape shell command line on UNIX-like system
338             sub _escapeshellcmd {
339 4284     4284   12493 my($word) = @_;
340 306         889 return $word;
341             }
342              
343             # P.619 Source Filters
344             # in Chapter 24: Common Practices
345             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
346              
347             # P.718 Source Filters
348             # in Chapter 21: Common Practices
349             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
350              
351             # escape old UTF-8 script
352             sub OldUTF8::escape_script {
353 306     306 0 555 my($script) = @_;
354 306         810 my $e_script = '';
355              
356             # read old UTF-8 script
357 306         1054 my $fh = gensym();
358 306 50       1752 Eoldutf8::_open_r($fh, $script) or die __FILE__, ": Can't open file: $script\n";
359 306         9362 local $/ = undef; # slurp mode
360 306         4779 $_ = <$fh>;
361 306 50       1586 close($fh) or die "Can't close file: $script: $!";
362              
363 0 50       0 if (/^ use Eoldutf8(?:(?>\s+)(?>[0-9\.]*))?(?>\s*); $/oxms) {
364 306         965 return $_;
365             }
366             else {
367              
368             # #! shebang line
369 0 50       0 if (s/\A(#!.+?\n)//oms) {
370 0         0 my $head = $1;
371 0         0 $head =~ s/\bjperl\b/perl/gi;
372 306         742 $e_script .= $head;
373             }
374              
375             # DOS-like system header
376 0 50       0 if (s/\A(\@rem(?>\s*)=(?>\s*)'.*?'(?>\s*);\s*\n)//oms) {
377 0         0 my $head = $1;
378 0         0 $head =~ s/\bjperl\b/perl/gi;
379 306         14745 $e_script .= $head;
380             }
381              
382             # P.618 Generating Perl in Other Languages
383             # in Chapter 24: Common Practices
384             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
385              
386             # P.717 Generating Perl in Other Languages
387             # in Chapter 21: Common Practices
388             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
389              
390 0 50       0 if (s/(.*^#(?>\s*)line(?>\s+)(?>[0-9]+)(?:(?>\s+)"(?:$q_char)+?")?\s*\n)//oms) {
391 0         0 my $head = $1;
392 0         0 $head =~ s/\bjperl\b/perl/gi;
393 306         2067 $e_script .= $head;
394             }
395              
396             # P.210 5.10.3.3. Match-time code evaluation
397             # in Chapter 5: Pattern Matching
398             # of ISBN 0-596-00027-8 Programming Perl Third Edition.
399              
400             # P.255 Match-time code evaluation
401             # in Chapter 5: Pattern Matching
402             # of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
403              
404             # '...' quote to avoid "Octal number in vector unsupported" on perl 5.6
405              
406 306         3857 $e_script .= sprintf("use Eoldutf8 '%s.0'; # 'quote' for perl5.6\n", $OldUTF8::VERSION); # require run-time routines version
407              
408             # use OldUTF8 version qw(ord reverse getc);
409 306 50       976 if (s/^ (?>\s*) use (?>\s+) (?: Char | OldUTF8 ) (?>\s*) ([^\x80-\xFF;]*) ; \s* \n? $//oxms) {
410              
411             # require version
412 306         1046 my $list = $1;
413 0 50       0 if ($list =~ s/\A ((?>[0-9]+)\.(?>[0-9]+)) \.0 (?>\s*) //oxms) {
    50          
414 0         0 my $version = $1;
415 0 0       0 if ($version ne $OldUTF8::VERSION) {
416 0         0 my @file = grep -e, map {qq{$_/OldUTF8.pm}} @INC;
  0         0  
417 0         0 my %file = map { $_ => 1 } @file;
  0         0  
418 0 0       0 if (scalar(keys %file) >= 2) {
419 0         0 my $file = join "\n", sort keys %file;
420 0         0 warn <
421             ****************************************************
422             C A U T I O N
423              
424             CONFLICT OldUTF8.pm FILE
425              
426             $file
427             ****************************************************
428              
429             END
430             }
431 0         0 die "Script $0 expects OldUTF8.pm $version, but @{[__FILE__]} is version $OldUTF8::VERSION\n";
  0         0  
432             }
433 0         0 $e_script .= qq{die "Script \$0 expects Eoldutf8.pm $version, but \\\$Eoldutf8::VERSION is \$Eoldutf8::VERSION" if \$Eoldutf8::VERSION ne '$version';\n};
434             }
435             elsif ($list =~ s/\A ((?>[0-9]+)(?>\.[0-9]*)) (?>\s*) //oxms) {
436 0         0 my $version = $1;
437 0 0       0 if ($version > $OldUTF8::VERSION) {
438 0         0 die "Script $0 required OldUTF8.pm $version, but @{[__FILE__]} is only version $OldUTF8::VERSION\n";
  306         1350  
439             }
440             }
441              
442             # demand ord, reverse, and getc
443 0 50       0 if ($list !~ /\A (?>\s*) \z/oxms) {
444 0         0 local $@;
445 0         0 my @list = CORE::eval $list;
446 0         0 for (@list) {
447 0 0       0 $Eoldutf8::function_ord = 'OldUTF8::ord' if /\A ord \z/oxms;
448 0 0       0 $Eoldutf8::function_ord_ = 'OldUTF8::ord_' if /\A ord \z/oxms;
449 0 0       0 $Eoldutf8::function_reverse = 'OldUTF8::reverse' if /\A reverse \z/oxms;
450 306 0       1486 $Eoldutf8::function_getc = 'OldUTF8::getc' if /\A getc \z/oxms;
451             }
452             }
453             }
454             }
455              
456 306         3118 $e_script .= OldUTF8::escape();
457              
458             return $e_script;
459             }
460              
461             1;
462              
463             __END__