File Coverage

Char/Egreek.pm

Criterion	Covered	Total	%
statement	51	954	5.3
branch	4	562	0.7
condition	1	180	0.5
subroutine	20	85	23.5
pod	7	50	14.0
total	83	1831	4.5

line	stmt	bran	cond	sub	pod	time	code
1							package Char::Egreek;
2							######################################################################
3							#
4							# Char::Egreek - Run-time routines for Char/Greek.pm
5							#
6							# http://search.cpan.org/dist/Char-Greek/
7							#
8							# Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 INABA Hitoshi
9							######################################################################
10
11	176			176		5092	use 5.00503; # Galapagos Consensus 1998 for primetools
	176					671
	176					11421
12							# use 5.008001; # Lancaster Consensus 2013 for toolchains
13
14							# 12.3. Delaying use Until Runtime
15							# in Chapter 12. Packages, Libraries, and Modules
16							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
17							# (and so on)
18
19	176			176		15106	BEGIN { eval q{ use vars qw($VERSION) } }
	176			176		3503
	176					388
	176					47046
20							$VERSION = sprintf '%d.%02d', q$Revision: 0.99 $ =~ /(\d+)/xmsg;
21
22							BEGIN {
23	176	50		176		1575	if ($^X =~ / jperl /oxmsi) {
24	0					0	die __FILE__, ": needs perl(not jperl) 5.00503 or later. (\$^X==$^X)";
25							}
26	176					320	if (CORE::ord('A') == 193) {
27							die __FILE__, ": is not US-ASCII script (may be EBCDIC or EBCDIK script).";
28							}
29	176					45072	if (CORE::ord('A') != 0x41) {
30							die __FILE__, ": is not US-ASCII script (must be US-ASCII script).";
31							}
32							}
33
34							BEGIN {
35
36							# instead of utf8.pm
37	176			176		29833	eval q{
	176			176		1590
	176			62		324
	176					34124
	62					10032
	57					21641
	54					12629
	68					26553
	62					12004
	49					10145
38							no warnings qw(redefine);
39							*utf8::upgrade = sub { CORE::length $_[0] };
40							*utf8::downgrade = sub { 1 };
41							*utf8::encode = sub { };
42							*utf8::decode = sub { 1 };
43							*utf8::is_utf8 = sub { };
44							*utf8::valid = sub { 1 };
45							};
46	176	50				157786	if ($@) {
47	0					0	*utf8::upgrade = sub { CORE::length $_[0] };
	0					0
48	0					0	*utf8::downgrade = sub { 1 };
	0					0
49	0					0	*utf8::encode = sub { };
	0					0
50	0					0	*utf8::decode = sub { 1 };
	0					0
51	0					0	*utf8::is_utf8 = sub { };
	0					0
52	0					0	*utf8::valid = sub { 1 };
	0					0
53							}
54							}
55
56							# instead of Symbol.pm
57							BEGIN {
58	176			176		1439	my $genpkg = "Symbol::";
59	176					19041	my $genseq = 0;
60
61							sub gensym () {
62	0			0	0	0	my $name = "GEN" . $genseq++;
63
64							# here, no strict qw(refs); if strict.pm exists
65
66	0					0	my $ref = \*{$genpkg . $name};
	0					0
67	0					0	delete $$genpkg{$name};
68	0					0	return $ref;
69							}
70
71							sub qualify ($;$) {
72	0			0	0	0	my ($name) = @_;
73	0	0	0			0	if (!ref($name) && (Char::Egreek::index($name, '::') == -1) && (Char::Egreek::index($name, "'") == -1)) {
			0
74	0					0	my $pkg;
75	0					0	my %global = map {$_ => 1} qw(ARGV ARGVOUT ENV INC SIG STDERR STDIN STDOUT DATA);
	0					0
76
77							# Global names: special character, "^xyz", or other.
78	0	0	0			0	if ($name =~ /^(([^a-z])\|(\^[a-z_]+))\z/i \|\| $global{$name}) {
79							# RGS 2001-11-05 : translate leading ^X to control-char
80	0					0	$name =~ s/^\^([a-z_])/'qq(\c'.$1.')'/eei;
	0					0
81	0					0	$pkg = "main";
82							}
83							else {
84	0	0				0	$pkg = (@_ > 1) ? $_[1] : caller;
85							}
86	0					0	$name = $pkg . "::" . $name;
87							}
88	0					0	return $name;
89							}
90
91							sub qualify_to_ref ($;$) {
92
93							# here, no strict qw(refs); if strict.pm exists
94
95	0	0		0	0	0	return \*{ qualify $_[0], @_ > 1 ? $_[1] : caller };
	0					0
96							}
97							}
98
99							# Column: local $@
100							# in Chapter 9. Osaete okitai Perl no kiso
101							# of ISBN 10: 4798119172 \| ISBN 13: 978-4798119175 MODAN Perl NYUMON
102							# (and so on)
103
104							# use strict; if strict.pm exists
105							BEGIN {
106	176	50		176		420	if (eval { local $@; CORE::require strict }) {
	176					1632
	176					2346
107	176					51313	strict::->import;
108							}
109							}
110
111							# P.714 29.2.39. flock
112							# in Chapter 29: Functions
113							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
114
115							# P.863 flock
116							# in Chapter 27: Functions
117							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
118
119							sub LOCK_SH() {1}
120							sub LOCK_EX() {2}
121							sub LOCK_UN() {8}
122							sub LOCK_NB() {4}
123
124							# instead of Carp.pm
125							sub carp;
126							sub croak;
127							sub cluck;
128							sub confess;
129
130							my $your_char = q{[\x00-\xFF]};
131
132							# regexp of character
133	176			176		12242	BEGIN { eval q{ use vars qw($q_char) } }
	176			176		1996
	176					372
	176					20003
134							$q_char = qr/$your_char/oxms;
135
136							#
137							# Greek character range per length
138							#
139							my %range_tr = ();
140
141							#
142							# alias of encoding name
143							#
144	176			176		22955	BEGIN { eval q{ use vars qw($encoding_alias) } }
	176			176		1083
	176					353
	176					688153
145
146							#
147							# Greek case conversion
148							#
149							my %lc = ();
150							@lc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
151							qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
152							my %uc = ();
153							@uc{qw(a b c d e f g h i j k l m n o p q r s t u v w x y z)} =
154							qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z);
155							my %fc = ();
156							@fc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
157							qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
158
159							if (0) {
160							}
161
162							elsif (__PACKAGE__ =~ / \b Egreek \z/oxms) {
163							%range_tr = (
164							1 => [ [0x00..0xFF],
165							],
166							);
167							$encoding_alias = qr/ \b (?: iso[-_ ]?8859-7 \| iec[- ]?8859-7 \| greek ) \b /oxmsi;
168
169							%lc = (%lc,
170							"\xB6" => "\xDC", # GREEK LETTER ALPHA WITH TONOS
171							"\xB8" => "\xDD", # GREEK LETTER EPSILON WITH TONOS
172							"\xB9" => "\xDE", # GREEK LETTER ETA WITH TONOS
173							"\xBA" => "\xDF", # GREEK LETTER IOTA WITH TONOS
174							"\xBC" => "\xFC", # GREEK LETTER OMICRON WITH TONOS
175							"\xBE" => "\xFD", # GREEK LETTER UPSILON WITH TONOS
176							"\xBF" => "\xFE", # GREEK LETTER OMEGA WITH TONOS
177							"\xC1" => "\xE1", # GREEK LETTER ALPHA
178							"\xC2" => "\xE2", # GREEK LETTER BETA
179							"\xC3" => "\xE3", # GREEK LETTER GAMMA
180							"\xC4" => "\xE4", # GREEK LETTER DELTA
181							"\xC5" => "\xE5", # GREEK LETTER EPSILON
182							"\xC6" => "\xE6", # GREEK LETTER ZETA
183							"\xC7" => "\xE7", # GREEK LETTER ETA
184							"\xC8" => "\xE8", # GREEK LETTER THETA
185							"\xC9" => "\xE9", # GREEK LETTER IOTA
186							"\xCA" => "\xEA", # GREEK LETTER KAPPA
187							"\xCB" => "\xEB", # GREEK LETTER LAMDA
188							"\xCC" => "\xEC", # GREEK LETTER MU
189							"\xCD" => "\xED", # GREEK LETTER NU
190							"\xCE" => "\xEE", # GREEK LETTER XI
191							"\xCF" => "\xEF", # GREEK LETTER OMICRON
192							"\xD0" => "\xF0", # GREEK LETTER PI
193							"\xD1" => "\xF1", # GREEK LETTER RHO
194							"\xD3" => "\xF3", # GREEK LETTER SIGMA
195							"\xD4" => "\xF4", # GREEK LETTER TAU
196							"\xD5" => "\xF5", # GREEK LETTER UPSILON
197							"\xD6" => "\xF6", # GREEK LETTER PHI
198							"\xD7" => "\xF7", # GREEK LETTER CHI
199							"\xD8" => "\xF8", # GREEK LETTER PSI
200							"\xD9" => "\xF9", # GREEK LETTER OMEGA
201							"\xDA" => "\xFA", # GREEK LETTER IOTA WITH DIALYTIKA
202							"\xDB" => "\xFB", # GREEK LETTER UPSILON WITH DIALYTIKA
203							);
204
205							%uc = (%uc,
206							"\xDC" => "\xB6", # GREEK LETTER ALPHA WITH TONOS
207							"\xDD" => "\xB8", # GREEK LETTER EPSILON WITH TONOS
208							"\xDE" => "\xB9", # GREEK LETTER ETA WITH TONOS
209							"\xDF" => "\xBA", # GREEK LETTER IOTA WITH TONOS
210							"\xE1" => "\xC1", # GREEK LETTER ALPHA
211							"\xE2" => "\xC2", # GREEK LETTER BETA
212							"\xE3" => "\xC3", # GREEK LETTER GAMMA
213							"\xE4" => "\xC4", # GREEK LETTER DELTA
214							"\xE5" => "\xC5", # GREEK LETTER EPSILON
215							"\xE6" => "\xC6", # GREEK LETTER ZETA
216							"\xE7" => "\xC7", # GREEK LETTER ETA
217							"\xE8" => "\xC8", # GREEK LETTER THETA
218							"\xE9" => "\xC9", # GREEK LETTER IOTA
219							"\xEA" => "\xCA", # GREEK LETTER KAPPA
220							"\xEB" => "\xCB", # GREEK LETTER LAMDA
221							"\xEC" => "\xCC", # GREEK LETTER MU
222							"\xED" => "\xCD", # GREEK LETTER NU
223							"\xEE" => "\xCE", # GREEK LETTER XI
224							"\xEF" => "\xCF", # GREEK LETTER OMICRON
225							"\xF0" => "\xD0", # GREEK LETTER PI
226							"\xF1" => "\xD1", # GREEK LETTER RHO
227							"\xF3" => "\xD3", # GREEK LETTER SIGMA
228							"\xF4" => "\xD4", # GREEK LETTER TAU
229							"\xF5" => "\xD5", # GREEK LETTER UPSILON
230							"\xF6" => "\xD6", # GREEK LETTER PHI
231							"\xF7" => "\xD7", # GREEK LETTER CHI
232							"\xF8" => "\xD8", # GREEK LETTER PSI
233							"\xF9" => "\xD9", # GREEK LETTER OMEGA
234							"\xFA" => "\xDA", # GREEK LETTER IOTA WITH DIALYTIKA
235							"\xFB" => "\xDB", # GREEK LETTER UPSILON WITH DIALYTIKA
236							"\xFC" => "\xBC", # GREEK LETTER OMICRON WITH TONOS
237							"\xFD" => "\xBE", # GREEK LETTER UPSILON WITH TONOS
238							"\xFE" => "\xBF", # GREEK LETTER OMEGA WITH TONOS
239							);
240
241							%fc = (%fc,
242							"\xB6" => "\xDC", # GREEK CAPITAL LETTER ALPHA WITH TONOS --> GREEK SMALL LETTER ALPHA WITH TONOS
243							"\xB8" => "\xDD", # GREEK CAPITAL LETTER EPSILON WITH TONOS --> GREEK SMALL LETTER EPSILON WITH TONOS
244							"\xB9" => "\xDE", # GREEK CAPITAL LETTER ETA WITH TONOS --> GREEK SMALL LETTER ETA WITH TONOS
245							"\xBA" => "\xDF", # GREEK CAPITAL LETTER IOTA WITH TONOS --> GREEK SMALL LETTER IOTA WITH TONOS
246							"\xBC" => "\xFC", # GREEK CAPITAL LETTER OMICRON WITH TONOS --> GREEK SMALL LETTER OMICRON WITH TONOS
247							"\xBE" => "\xFD", # GREEK CAPITAL LETTER UPSILON WITH TONOS --> GREEK SMALL LETTER UPSILON WITH TONOS
248							"\xBF" => "\xFE", # GREEK CAPITAL LETTER OMEGA WITH TONOS --> GREEK SMALL LETTER OMEGA WITH TONOS
249							"\xC1" => "\xE1", # GREEK CAPITAL LETTER ALPHA --> GREEK SMALL LETTER ALPHA
250							"\xC2" => "\xE2", # GREEK CAPITAL LETTER BETA --> GREEK SMALL LETTER BETA
251							"\xC3" => "\xE3", # GREEK CAPITAL LETTER GAMMA --> GREEK SMALL LETTER GAMMA
252							"\xC4" => "\xE4", # GREEK CAPITAL LETTER DELTA --> GREEK SMALL LETTER DELTA
253							"\xC5" => "\xE5", # GREEK CAPITAL LETTER EPSILON --> GREEK SMALL LETTER EPSILON
254							"\xC6" => "\xE6", # GREEK CAPITAL LETTER ZETA --> GREEK SMALL LETTER ZETA
255							"\xC7" => "\xE7", # GREEK CAPITAL LETTER ETA --> GREEK SMALL LETTER ETA
256							"\xC8" => "\xE8", # GREEK CAPITAL LETTER THETA --> GREEK SMALL LETTER THETA
257							"\xC9" => "\xE9", # GREEK CAPITAL LETTER IOTA --> GREEK SMALL LETTER IOTA
258							"\xCA" => "\xEA", # GREEK CAPITAL LETTER KAPPA --> GREEK SMALL LETTER KAPPA
259							"\xCB" => "\xEB", # GREEK CAPITAL LETTER LAMDA --> GREEK SMALL LETTER LAMDA
260							"\xCC" => "\xEC", # GREEK CAPITAL LETTER MU --> GREEK SMALL LETTER MU
261							"\xCD" => "\xED", # GREEK CAPITAL LETTER NU --> GREEK SMALL LETTER NU
262							"\xCE" => "\xEE", # GREEK CAPITAL LETTER XI --> GREEK SMALL LETTER XI
263							"\xCF" => "\xEF", # GREEK CAPITAL LETTER OMICRON --> GREEK SMALL LETTER OMICRON
264							"\xD0" => "\xF0", # GREEK CAPITAL LETTER PI --> GREEK SMALL LETTER PI
265							"\xD1" => "\xF1", # GREEK CAPITAL LETTER RHO --> GREEK SMALL LETTER RHO
266							"\xD3" => "\xF3", # GREEK CAPITAL LETTER SIGMA --> GREEK SMALL LETTER SIGMA
267							"\xD4" => "\xF4", # GREEK CAPITAL LETTER TAU --> GREEK SMALL LETTER TAU
268							"\xD5" => "\xF5", # GREEK CAPITAL LETTER UPSILON --> GREEK SMALL LETTER UPSILON
269							"\xD6" => "\xF6", # GREEK CAPITAL LETTER PHI --> GREEK SMALL LETTER PHI
270							"\xD7" => "\xF7", # GREEK CAPITAL LETTER CHI --> GREEK SMALL LETTER CHI
271							"\xD8" => "\xF8", # GREEK CAPITAL LETTER PSI --> GREEK SMALL LETTER PSI
272							"\xD9" => "\xF9", # GREEK CAPITAL LETTER OMEGA --> GREEK SMALL LETTER OMEGA
273							"\xDA" => "\xFA", # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA --> GREEK SMALL LETTER IOTA WITH DIALYTIKA
274							"\xDB" => "\xFB", # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA --> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
275							"\xF2" => "\xF3", # GREEK SMALL LETTER FINAL SIGMA --> GREEK SMALL LETTER SIGMA
276							);
277							}
278
279							else {
280							croak "Don't know my package name '@{[__PACKAGE__]}'";
281							}
282
283							#
284							# @ARGV wildcard globbing
285							#
286							sub import {
287
288	0	0		0		0	if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
289	0					0	my @argv = ();
290	0					0	for (@ARGV) {
291
292							# has space
293	0	0				0	if (/\A (?:$q_char)*? [ ] /oxms) {
		0
294	0	0				0	if (my @glob = Char::Egreek::glob(qq{"$_"})) {
295	0					0	push @argv, @glob;
296							}
297							else {
298	0					0	push @argv, $_;
299							}
300							}
301
302							# has wildcard metachar
303							elsif (/\A (?:$q_char)? [?] /oxms) {
304	0	0				0	if (my @glob = Char::Egreek::glob($_)) {
305	0					0	push @argv, @glob;
306							}
307							else {
308	0					0	push @argv, $_;
309							}
310							}
311
312							# no wildcard globbing
313							else {
314	0					0	push @argv, $_;
315							}
316							}
317	0					0	@ARGV = @argv;
318							}
319							}
320
321							# P.230 Care with Prototypes
322							# in Chapter 6: Subroutines
323							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
324							#
325							# If you aren't careful, you can get yourself into trouble with prototypes.
326							# But if you are careful, you can do a lot of neat things with them. This is
327							# all very powerful, of course, and should only be used in moderation to make
328							# the world a better place.
329
330							# P.332 Care with Prototypes
331							# in Chapter 7: Subroutines
332							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
333							#
334							# If you aren't careful, you can get yourself into trouble with prototypes.
335							# But if you are careful, you can do a lot of neat things with them. This is
336							# all very powerful, of course, and should only be used in moderation to make
337							# the world a better place.
338
339							#
340							# Prototypes of subroutines
341							#
342	0			0		0	sub unimport {}
343							sub Char::Egreek::split(;$$$);
344							sub Char::Egreek::tr($$$$;$);
345							sub Char::Egreek::chop(@);
346							sub Char::Egreek::index($$;$);
347							sub Char::Egreek::rindex($$;$);
348							sub Char::Egreek::lcfirst(@);
349							sub Char::Egreek::lcfirst_();
350							sub Char::Egreek::lc(@);
351							sub Char::Egreek::lc_();
352							sub Char::Egreek::ucfirst(@);
353							sub Char::Egreek::ucfirst_();
354							sub Char::Egreek::uc(@);
355							sub Char::Egreek::uc_();
356							sub Char::Egreek::fc(@);
357							sub Char::Egreek::fc_();
358							sub Char::Egreek::ignorecase;
359							sub Char::Egreek::classic_character_class;
360							sub Char::Egreek::capture;
361							sub Char::Egreek::chr(;$);
362							sub Char::Egreek::chr_();
363							sub Char::Egreek::glob($);
364							sub Char::Egreek::glob_();
365
366							sub Char::Greek::ord(;$);
367							sub Char::Greek::ord_();
368							sub Char::Greek::reverse(@);
369							sub Char::Greek::getc(;*@);
370							sub Char::Greek::length(;$);
371							sub Char::Greek::substr($$;$$);
372							sub Char::Greek::index($$;$);
373							sub Char::Greek::rindex($$;$);
374
375							#
376							# Regexp work
377							#
378	176			176		18613	BEGIN { eval q{ use vars qw(
	176			176		2663
	176					475
	176					115911
379							$Char::Greek::re_a
380							$Char::Greek::re_t
381							$Char::Greek::re_n
382							$Char::Greek::re_r
383							) } }
384
385							#
386							# Character class
387							#
388	176			176		17138	BEGIN { eval q{ use vars qw(
	176			176		11903
	176					379
	176					4384814
389							$dot
390							$dot_s
391							$eD
392							$eS
393							$eW
394							$eH
395							$eV
396							$eR
397							$eN
398							$not_alnum
399							$not_alpha
400							$not_ascii
401							$not_blank
402							$not_cntrl
403							$not_digit
404							$not_graph
405							$not_lower
406							$not_lower_i
407							$not_print
408							$not_punct
409							$not_space
410							$not_upper
411							$not_upper_i
412							$not_word
413							$not_xdigit
414							$eb
415							$eB
416							) } }
417
418							${Char::Egreek::dot} = qr{(?:[^\x0A])};
419							${Char::Egreek::dot_s} = qr{(?:[\x00-\xFF])};
420							${Char::Egreek::eD} = qr{(?:[^0-9])};
421
422							# Vertical tabs are now whitespace
423							# \s in a regex now matches a vertical tab in all circumstances.
424							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
425							# ${Char::Egreek::eS} = qr{(?:[^\x09\x0A \x0C\x0D\x20])};
426							# ${Char::Egreek::eS} = qr{(?:[^\x09\x0A\x0B\x0C\x0D\x20])};
427							${Char::Egreek::eS} = qr{(?:[^\s])};
428
429							${Char::Egreek::eW} = qr{(?:[^0-9A-Z_a-z])};
430							${Char::Egreek::eH} = qr{(?:[^\x09\x20])};
431							${Char::Egreek::eV} = qr{(?:[^\x0A\x0B\x0C\x0D])};
432							${Char::Egreek::eR} = qr{(?:\x0D\x0A\|[\x0A\x0D])};
433							${Char::Egreek::eN} = qr{(?:[^\x0A])};
434							${Char::Egreek::not_alnum} = qr{(?:[^\x30-\x39\x41-\x5A\x61-\x7A])};
435							${Char::Egreek::not_alpha} = qr{(?:[^\x41-\x5A\x61-\x7A])};
436							${Char::Egreek::not_ascii} = qr{(?:[^\x00-\x7F])};
437							${Char::Egreek::not_blank} = qr{(?:[^\x09\x20])};
438							${Char::Egreek::not_cntrl} = qr{(?:[^\x00-\x1F\x7F])};
439							${Char::Egreek::not_digit} = qr{(?:[^\x30-\x39])};
440							${Char::Egreek::not_graph} = qr{(?:[^\x21-\x7F])};
441							${Char::Egreek::not_lower} = qr{(?:[^\x61-\x7A])};
442							${Char::Egreek::not_lower_i} = qr{(?:[\x00-\xFF])};
443							${Char::Egreek::not_print} = qr{(?:[^\x20-\x7F])};
444							${Char::Egreek::not_punct} = qr{(?:[^\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E])};
445							${Char::Egreek::not_space} = qr{(?:[^\s\x0B])};
446							${Char::Egreek::not_upper} = qr{(?:[^\x41-\x5A])};
447							${Char::Egreek::not_upper_i} = qr{(?:[\x00-\xFF])};
448							${Char::Egreek::not_word} = qr{(?:[^\x30-\x39\x41-\x5A\x5F\x61-\x7A])};
449							${Char::Egreek::not_xdigit} = qr{(?:[^\x30-\x39\x41-\x46\x61-\x66])};
450							${Char::Egreek::eb} = qr{(?:\A(?=[0-9A-Z_a-z])\|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[0-9A-Z_a-z])\|(?<=[0-9A-Z_a-z])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]\|\z))};
451							${Char::Egreek::eB} = qr{(?:(?<=[0-9A-Z_a-z])(?=[0-9A-Z_a-z])\|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]))};
452
453							# avoid: Name "Char::Egreek::foo" used only once: possible typo at here.
454							${Char::Egreek::dot} = ${Char::Egreek::dot};
455							${Char::Egreek::dot_s} = ${Char::Egreek::dot_s};
456							${Char::Egreek::eD} = ${Char::Egreek::eD};
457							${Char::Egreek::eS} = ${Char::Egreek::eS};
458							${Char::Egreek::eW} = ${Char::Egreek::eW};
459							${Char::Egreek::eH} = ${Char::Egreek::eH};
460							${Char::Egreek::eV} = ${Char::Egreek::eV};
461							${Char::Egreek::eR} = ${Char::Egreek::eR};
462							${Char::Egreek::eN} = ${Char::Egreek::eN};
463							${Char::Egreek::not_alnum} = ${Char::Egreek::not_alnum};
464							${Char::Egreek::not_alpha} = ${Char::Egreek::not_alpha};
465							${Char::Egreek::not_ascii} = ${Char::Egreek::not_ascii};
466							${Char::Egreek::not_blank} = ${Char::Egreek::not_blank};
467							${Char::Egreek::not_cntrl} = ${Char::Egreek::not_cntrl};
468							${Char::Egreek::not_digit} = ${Char::Egreek::not_digit};
469							${Char::Egreek::not_graph} = ${Char::Egreek::not_graph};
470							${Char::Egreek::not_lower} = ${Char::Egreek::not_lower};
471							${Char::Egreek::not_lower_i} = ${Char::Egreek::not_lower_i};
472							${Char::Egreek::not_print} = ${Char::Egreek::not_print};
473							${Char::Egreek::not_punct} = ${Char::Egreek::not_punct};
474							${Char::Egreek::not_space} = ${Char::Egreek::not_space};
475							${Char::Egreek::not_upper} = ${Char::Egreek::not_upper};
476							${Char::Egreek::not_upper_i} = ${Char::Egreek::not_upper_i};
477							${Char::Egreek::not_word} = ${Char::Egreek::not_word};
478							${Char::Egreek::not_xdigit} = ${Char::Egreek::not_xdigit};
479							${Char::Egreek::eb} = ${Char::Egreek::eb};
480							${Char::Egreek::eB} = ${Char::Egreek::eB};
481
482							#
483							# Greek split
484							#
485							sub Char::Egreek::split(;$$$) {
486
487							# P.794 29.2.161. split
488							# in Chapter 29: Functions
489							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
490
491							# P.951 split
492							# in Chapter 27: Functions
493							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
494
495	0			0	0	0	my $pattern = $_[0];
496	0					0	my $string = $_[1];
497	0					0	my $limit = $_[2];
498
499							# if $pattern is also omitted or is the literal space, " "
500	0	0				0	if (not defined $pattern) {
501	0					0	$pattern = ' ';
502							}
503
504							# if $string is omitted, the function splits the $_ string
505	0	0				0	if (not defined $string) {
506	0	0				0	if (defined $_) {
507	0					0	$string = $_;
508							}
509							else {
510	0					0	$string = '';
511							}
512							}
513
514	0					0	my @split = ();
515
516							# when string is empty
517	0	0				0	if ($string eq '') {
		0
518
519							# resulting list value in list context
520	0	0				0	if (wantarray) {
521	0					0	return @split;
522							}
523
524							# count of substrings in scalar context
525							else {
526	0	0				0	carp "Use of implicit split to \@_ is deprecated" if $^W;
527	0					0	@_ = @split;
528	0					0	return scalar @_;
529							}
530							}
531
532							# split's first argument is more consistently interpreted
533							#
534							# After some changes earlier in v5.17, split's behavior has been simplified:
535							# if the PATTERN argument evaluates to a string containing one space, it is
536							# treated the way that a literal string containing one space once was.
537							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#split's_first_argument_is_more_consistently_interpreted
538
539							# if $pattern is also omitted or is the literal space, " ", the function splits
540							# on whitespace, /\s+/, after skipping any leading whitespace
541							# (and so on)
542
543							elsif ($pattern eq ' ') {
544	0	0				0	if (not defined $limit) {
545	0					0	return CORE::split(' ', $string);
546							}
547							else {
548	0					0	return CORE::split(' ', $string, $limit);
549							}
550							}
551
552							# if $limit is negative, it is treated as if an arbitrarily large $limit has been specified
553	0	0	0			0	if ((not defined $limit) or ($limit <= 0)) {
		0
554
555							# a pattern capable of matching either the null string or something longer than the
556							# null string will split the value of $string into separate characters wherever it
557							# matches the null string between characters
558							# (and so on)
559
560	0	0				0	if ('' =~ / \A $pattern \z /xms) {
561	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
562	0					0	my $limit = scalar(() = $string =~ /($pattern)/oxmsg);
563
564							# P.1024 Appendix W.10 Multibyte Processing
565							# of ISBN 1-56592-224-7 CJKV Information Processing
566							# (and so on)
567
568							# the //m modifier is assumed when you split on the pattern /^/
569							# (and so on)
570
571							# V
572	0		0			0	while ((--$limit > 0) and ($string =~ s/\A((?:$q_char)+?)$pattern//m)) {
573
574							# if the $pattern contains parentheses, then the substring matched by each pair of parentheses
575							# is included in the resulting list, interspersed with the fields that are ordinarily returned
576							# (and so on)
577
578	0					0	local $@;
579	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
580	0					0	push @split, eval('$' . $digit);
581							}
582							}
583							}
584
585							else {
586	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
587
588							# V
589	0					0	while ($string =~ s/\A((?:$q_char)*?)$pattern//m) {
590	0					0	local $@;
591	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
592	0					0	push @split, eval('$' . $digit);
593							}
594							}
595							}
596							}
597
598							elsif ($limit > 0) {
599	0	0				0	if ('' =~ / \A $pattern \z /xms) {
600	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
601	0		0			0	while ((--$limit > 0) and (CORE::length($string) > 0)) {
602
603							# V
604	0	0				0	if ($string =~ s/\A((?:$q_char)+?)$pattern//m) {
605	0					0	local $@;
606	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
607	0					0	push @split, eval('$' . $digit);
608							}
609							}
610							}
611							}
612							else {
613	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
614	0		0			0	while ((--$limit > 0) and (CORE::length($string) > 0)) {
615
616							# V
617	0	0				0	if ($string =~ s/\A((?:$q_char)*?)$pattern//m) {
618	0					0	local $@;
619	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
620	0					0	push @split, eval('$' . $digit);
621							}
622							}
623							}
624							}
625							}
626
627	0	0				0	if (CORE::length($string) > 0) {
628	0					0	push @split, $string;
629							}
630
631							# if $_[2] (NOT "$limit") is omitted or zero, trailing null fields are stripped from the result
632	0	0	0			0	if ((not defined $_[2]) or ($_[2] == 0)) {
633	0		0			0	while ((scalar(@split) >= 1) and ($split[-1] eq '')) {
634	0					0	pop @split;
635							}
636							}
637
638							# resulting list value in list context
639	0	0				0	if (wantarray) {
640	0					0	return @split;
641							}
642
643							# count of substrings in scalar context
644							else {
645	0	0				0	carp "Use of implicit split to \@_ is deprecated" if $^W;
646	0					0	@_ = @split;
647	0					0	return scalar @_;
648							}
649							}
650
651							#
652							# get last subexpression offsets
653							#
654							sub _last_subexpression_offsets {
655	0			0		0	my $pattern = $_[0];
656
657							# remove comment
658	0					0	$pattern =~ s/$\?\# .*? $//oxmsg;
659
660	0					0	my $modifier = '';
661	0	0				0	if ($pattern =~ /\(\?\^? ([\-A-Za-z]+) :/oxms) {
662	0					0	$modifier = $1;
663	0					0	$modifier =~ s/-[A-Za-z]*//;
664							}
665
666							# with /x modifier
667	0					0	my @char = ();
668	0	0				0	if ($modifier =~ /x/oxms) {
669	0					0	@char = $pattern =~ /\G(
670							\\ (?:$q_char) \|
671							\# (?:$q_char)*? $ \|
672							\[ (?: \\\] \| (?:$q_char))+? \] \|
673							\(\? \|
674							(?:$q_char)
675							)/oxmsg;
676							}
677
678							# without /x modifier
679							else {
680	0					0	@char = $pattern =~ /\G(
681							\\ (?:$q_char) \|
682							\[ (?: \\\] \| (?:$q_char))+? \] \|
683							\(\? \|
684							(?:$q_char)
685							)/oxmsg;
686							}
687
688	0					0	return scalar grep { $_ eq '(' } @char;
	0					0
689							}
690
691							#
692							# Greek transliteration (tr///)
693							#
694							sub Char::Egreek::tr($$$$;$) {
695
696	0			0	0	0	my $bind_operator = $_[1];
697	0					0	my $searchlist = $_[2];
698	0					0	my $replacementlist = $_[3];
699	0		0			0	my $modifier = $_[4] \|\| '';
700
701	0	0				0	if ($modifier =~ /r/oxms) {
702	0	0				0	if ($bind_operator =~ / !~ /oxms) {
703	0					0	croak "Using !~ with tr///r doesn't make sense";
704							}
705							}
706
707	0					0	my @char = $_[0] =~ /\G ($q_char) /oxmsg;
708	0					0	my @searchlist = _charlist_tr($searchlist);
709	0					0	my @replacementlist = _charlist_tr($replacementlist);
710
711	0					0	my %tr = ();
712	0					0	for (my $i=0; $i <= $#searchlist; $i++) {
713	0	0				0	if (not exists $tr{$searchlist[$i]}) {
714	0	0	0			0	if (defined $replacementlist[$i] and ($replacementlist[$i] ne '')) {
		0	0
		0
715	0					0	$tr{$searchlist[$i]} = $replacementlist[$i];
716							}
717							elsif ($modifier =~ /d/oxms) {
718	0					0	$tr{$searchlist[$i]} = '';
719							}
720							elsif (defined $replacementlist[-1] and ($replacementlist[-1] ne '')) {
721	0					0	$tr{$searchlist[$i]} = $replacementlist[-1];
722							}
723							else {
724	0					0	$tr{$searchlist[$i]} = $searchlist[$i];
725							}
726							}
727							}
728
729	0					0	my $tr = 0;
730	0					0	my $replaced = '';
731	0	0				0	if ($modifier =~ /c/oxms) {
732	0					0	while (defined(my $char = shift @char)) {
733	0	0				0	if (not exists $tr{$char}) {
734	0	0				0	if (defined $replacementlist[0]) {
735	0					0	$replaced .= $replacementlist[0];
736							}
737	0					0	$tr++;
738	0	0				0	if ($modifier =~ /s/oxms) {
739	0		0			0	while (@char and (not exists $tr{$char[0]})) {
740	0					0	shift @char;
741	0					0	$tr++;
742							}
743							}
744							}
745							else {
746	0					0	$replaced .= $char;
747							}
748							}
749							}
750							else {
751	0					0	while (defined(my $char = shift @char)) {
752	0	0				0	if (exists $tr{$char}) {
753	0					0	$replaced .= $tr{$char};
754	0					0	$tr++;
755	0	0				0	if ($modifier =~ /s/oxms) {
756	0		0			0	while (@char and (exists $tr{$char[0]}) and ($tr{$char[0]} eq $tr{$char})) {
			0
757	0					0	shift @char;
758	0					0	$tr++;
759							}
760							}
761							}
762							else {
763	0					0	$replaced .= $char;
764							}
765							}
766							}
767
768	0	0				0	if ($modifier =~ /r/oxms) {
769	0					0	return $replaced;
770							}
771							else {
772	0					0	$_[0] = $replaced;
773	0	0				0	if ($bind_operator =~ / !~ /oxms) {
774	0					0	return not $tr;
775							}
776							else {
777	0					0	return $tr;
778							}
779							}
780							}
781
782							#
783							# Greek chop
784							#
785							sub Char::Egreek::chop(@) {
786
787	0			0	0	0	my $chop;
788	0	0				0	if (@_ == 0) {
789	0					0	my @char = /\G ($q_char) /oxmsg;
790	0					0	$chop = pop @char;
791	0					0	$_ = join '', @char;
792							}
793							else {
794	0					0	for (@_) {
795	0					0	my @char = /\G ($q_char) /oxmsg;
796	0					0	$chop = pop @char;
797	0					0	$_ = join '', @char;
798							}
799							}
800	0					0	return $chop;
801							}
802
803							#
804							# Greek index by octet
805							#
806							sub Char::Egreek::index($$;$) {
807
808	0			0	1	0	my($str,$substr,$position) = @_;
809	0		0			0	$position \|\|= 0;
810	0					0	my $pos = 0;
811
812	0					0	while ($pos < CORE::length($str)) {
813	0	0				0	if (CORE::substr($str,$pos,CORE::length($substr)) eq $substr) {
814	0	0				0	if ($pos >= $position) {
815	0					0	return $pos;
816							}
817							}
818	0	0				0	if (CORE::substr($str,$pos) =~ /\A ($q_char) /oxms) {
819	0					0	$pos += CORE::length($1);
820							}
821							else {
822	0					0	$pos += 1;
823							}
824							}
825	0					0	return -1;
826							}
827
828							#
829							# Greek reverse index
830							#
831							sub Char::Egreek::rindex($$;$) {
832
833	0			0	0	0	my($str,$substr,$position) = @_;
834	0		0			0	$position \|\|= CORE::length($str) - 1;
835	0					0	my $pos = 0;
836	0					0	my $rindex = -1;
837
838	0		0			0	while (($pos < CORE::length($str)) and ($pos <= $position)) {
839	0	0				0	if (CORE::substr($str,$pos,CORE::length($substr)) eq $substr) {
840	0					0	$rindex = $pos;
841							}
842	0	0				0	if (CORE::substr($str,$pos) =~ /\A ($q_char) /oxms) {
843	0					0	$pos += CORE::length($1);
844							}
845							else {
846	0					0	$pos += 1;
847							}
848							}
849	0					0	return $rindex;
850							}
851
852							#
853							# Greek lower case first with parameter
854							#
855							sub Char::Egreek::lcfirst(@) {
856	0	0		0	0	0	if (@_) {
857	0					0	my $s = shift @_;
858	0	0	0			0	if (@_ and wantarray) {
859	0					0	return Char::Egreek::lc(CORE::substr($s,0,1)) . CORE::substr($s,1), @_;
860							}
861							else {
862	0					0	return Char::Egreek::lc(CORE::substr($s,0,1)) . CORE::substr($s,1);
863							}
864							}
865							else {
866	0					0	return Char::Egreek::lc(CORE::substr($_,0,1)) . CORE::substr($_,1);
867							}
868							}
869
870							#
871							# Greek lower case first without parameter
872							#
873							sub Char::Egreek::lcfirst_() {
874	0			0	0	0	return Char::Egreek::lc(CORE::substr($_,0,1)) . CORE::substr($_,1);
875							}
876
877							#
878							# Greek lower case with parameter
879							#
880							sub Char::Egreek::lc(@) {
881	0	0		0	0	0	if (@_) {
882	0					0	my $s = shift @_;
883	0	0	0			0	if (@_ and wantarray) {
884	0	0				0	return join('', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
885							}
886							else {
887	0	0				0	return join('', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
888							}
889							}
890							else {
891	0					0	return Char::Egreek::lc_();
892							}
893							}
894
895							#
896							# Greek lower case without parameter
897							#
898							sub Char::Egreek::lc_() {
899	0			0	0	0	my $s = $_;
900	0	0				0	return join '', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
901							}
902
903							#
904							# Greek upper case first with parameter
905							#
906							sub Char::Egreek::ucfirst(@) {
907	0	0		0	0	0	if (@_) {
908	0					0	my $s = shift @_;
909	0	0	0			0	if (@_ and wantarray) {
910	0					0	return Char::Egreek::uc(CORE::substr($s,0,1)) . CORE::substr($s,1), @_;
911							}
912							else {
913	0					0	return Char::Egreek::uc(CORE::substr($s,0,1)) . CORE::substr($s,1);
914							}
915							}
916							else {
917	0					0	return Char::Egreek::uc(CORE::substr($_,0,1)) . CORE::substr($_,1);
918							}
919							}
920
921							#
922							# Greek upper case first without parameter
923							#
924							sub Char::Egreek::ucfirst_() {
925	0			0	0	0	return Char::Egreek::uc(CORE::substr($_,0,1)) . CORE::substr($_,1);
926							}
927
928							#
929							# Greek upper case with parameter
930							#
931							sub Char::Egreek::uc(@) {
932	0	0		0	0	0	if (@_) {
933	0					0	my $s = shift @_;
934	0	0	0			0	if (@_ and wantarray) {
935	0	0				0	return join('', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
936							}
937							else {
938	0	0				0	return join('', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
939							}
940							}
941							else {
942	0					0	return Char::Egreek::uc_();
943							}
944							}
945
946							#
947							# Greek upper case without parameter
948							#
949							sub Char::Egreek::uc_() {
950	0			0	0	0	my $s = $_;
951	0	0				0	return join '', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
952							}
953
954							#
955							# Greek fold case with parameter
956							#
957							sub Char::Egreek::fc(@) {
958	0	0		0	0	0	if (@_) {
959	0					0	my $s = shift @_;
960	0	0	0			0	if (@_ and wantarray) {
961	0	0				0	return join('', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
962							}
963							else {
964	0	0				0	return join('', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
965							}
966							}
967							else {
968	0					0	return Char::Egreek::fc_();
969							}
970							}
971
972							#
973							# Greek fold case without parameter
974							#
975							sub Char::Egreek::fc_() {
976	0			0	0	0	my $s = $_;
977	0	0				0	return join '', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
978							}
979
980							#
981							# Greek regexp capture
982							#
983							{
984							sub Char::Egreek::capture {
985	0			0	1	0	return $_[0];
986							}
987							}
988
989							#
990							# Greek regexp ignore case modifier
991							#
992							sub Char::Egreek::ignorecase {
993
994	0			0	0	0	my @string = @_;
995	0					0	my $metachar = qr/[\@\\\|[\]{]/oxms;
996
997							# ignore case of $scalar or @array
998	0					0	for my $string (@string) {
999
1000							# split regexp
1001	0					0	my @char = $string =~ /\G(
1002							\[\^ \|
1003							\\? (?:$q_char)
1004							)/oxmsg;
1005
1006							# unescape character
1007	0					0	for (my $i=0; $i <= $#char; $i++) {
1008	0	0				0	next if not defined $char[$i];
1009
1010							# open character class [...]
1011	0	0				0	if ($char[$i] eq '[') {
		0
		0
		0
1012	0					0	my $left = $i;
1013
1014							# [] make die "unmatched [] in regexp ..."
1015
1016	0	0				0	if ($char[$i+1] eq ']') {
1017	0					0	$i++;
1018							}
1019
1020	0					0	while (1) {
1021	0	0				0	if (++$i > $#char) {
1022	0					0	croak "Unmatched [] in regexp";
1023							}
1024	0	0				0	if ($char[$i] eq ']') {
1025	0					0	my $right = $i;
1026	0					0	my @charlist = charlist_qr(@char[$left+1..$right-1], 'i');
1027
1028							# escape character
1029	0					0	for my $char (@charlist) {
1030	0	0				0	if (0) {
1031							}
1032
1033	0					0	elsif ($char =~ /\A [.\|)] \z/oxms) {
1034	0					0	$char = $1 . '\\' . $char;
1035							}
1036							}
1037
1038							# [...]
1039	0					0	splice @char, $left, $right-$left+1, '(?:' . join('\|', @charlist) . ')';
1040
1041	0					0	$i = $left;
1042	0					0	last;
1043							}
1044							}
1045							}
1046
1047							# open character class [^...]
1048							elsif ($char[$i] eq '[^') {
1049	0					0	my $left = $i;
1050
1051							# [^] make die "unmatched [] in regexp ..."
1052
1053	0	0				0	if ($char[$i+1] eq ']') {
1054	0					0	$i++;
1055							}
1056
1057	0					0	while (1) {
1058	0	0				0	if (++$i > $#char) {
1059	0					0	croak "Unmatched [] in regexp";
1060							}
1061	0	0				0	if ($char[$i] eq ']') {
1062	0					0	my $right = $i;
1063	0					0	my @charlist = charlist_not_qr(@char[$left+1..$right-1], 'i');
1064
1065							# escape character
1066	0					0	for my $char (@charlist) {
1067	0	0				0	if (0) {
1068							}
1069
1070	0					0	elsif ($char =~ /\A [.\|)] \z/oxms) {
1071	0					0	$char = '\\' . $char;
1072							}
1073							}
1074
1075							# [^...]
1076	0					0	splice @char, $left, $right-$left+1, '(?!' . join('\|', @charlist) . ")(?:$your_char)";
1077
1078	0					0	$i = $left;
1079	0					0	last;
1080							}
1081							}
1082							}
1083
1084							# rewrite classic character class or escape character
1085							elsif (my $char = classic_character_class($char[$i])) {
1086	0					0	$char[$i] = $char;
1087							}
1088
1089							# with /i modifier
1090							elsif ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) {
1091	0					0	my $uc = Char::Egreek::uc($char[$i]);
1092	0					0	my $fc = Char::Egreek::fc($char[$i]);
1093	0	0				0	if ($uc ne $fc) {
1094	0	0				0	if (CORE::length($fc) == 1) {
1095	0					0	$char[$i] = '[' . $uc . $fc . ']';
1096							}
1097							else {
1098	0					0	$char[$i] = '(?:' . $uc . '\|' . $fc . ')';
1099							}
1100							}
1101							}
1102							}
1103
1104							# characterize
1105	0					0	for (my $i=0; $i <= $#char; $i++) {
1106	0	0				0	next if not defined $char[$i];
1107
1108	0	0				0	if (0) {
1109							}
1110
1111							# quote character before ? + * {
1112	0	0				0	elsif (($i >= 1) and ($char[$i] =~ /\A [\?\+\*\{] \z/oxms)) {
1113	0	0				0	if ($char[$i-1] !~ /\A [\x00-\xFF] \z/oxms) {
1114	0					0	$char[$i-1] = '(?:' . $char[$i-1] . ')';
1115							}
1116							}
1117							}
1118
1119	0					0	$string = join '', @char;
1120							}
1121
1122							# make regexp string
1123	0					0	return @string;
1124							}
1125
1126							#
1127							# classic character class ( \D \S \W \d \s \w \C \X \H \V \h \v \R \N \b \B )
1128							#
1129							sub Char::Egreek::classic_character_class {
1130	0			0	0	0	my($char) = @_;
1131
1132							return {
1133	0		0			0	'\D' => '${Char::Egreek::eD}',
1134							'\S' => '${Char::Egreek::eS}',
1135							'\W' => '${Char::Egreek::eW}',
1136							'\d' => '[0-9]',
1137
1138							# Before Perl 5.6, \s only matched the five whitespace characters
1139							# tab, newline, form-feed, carriage return, and the space character
1140							# itself, which, taken together, is the character class [\t\n\f\r ].
1141
1142							# Vertical tabs are now whitespace
1143							# \s in a regex now matches a vertical tab in all circumstances.
1144							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
1145							# \t \n \v \f \r space
1146							# '\s' => '[\x09\x0A \x0C\x0D\x20]',
1147							# '\s' => '[\x09\x0A\x0B\x0C\x0D\x20]',
1148							'\s' => '\s',
1149
1150							'\w' => '[0-9A-Z_a-z]',
1151							'\C' => '[\x00-\xFF]',
1152							'\X' => 'X',
1153
1154							# \h \v \H \V
1155
1156							# P.114 Character Class Shortcuts
1157							# in Chapter 7: In the World of Regular Expressions
1158							# of ISBN 978-0-596-52010-6 Learning Perl, Fifth Edition
1159
1160							# P.357 13.2.3 Whitespace
1161							# in Chapter 13: perlrecharclass: Perl Regular Expression Character Classes
1162							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
1163							#
1164							# 0x00009 CHARACTER TABULATION h s
1165							# 0x0000a LINE FEED (LF) vs
1166							# 0x0000b LINE TABULATION v
1167							# 0x0000c FORM FEED (FF) vs
1168							# 0x0000d CARRIAGE RETURN (CR) vs
1169							# 0x00020 SPACE h s
1170
1171							# P.196 Table 5-9. Alphanumeric regex metasymbols
1172							# in Chapter 5. Pattern Matching
1173							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1174
1175							# (and so on)
1176
1177							'\H' => '${Char::Egreek::eH}',
1178							'\V' => '${Char::Egreek::eV}',
1179							'\h' => '[\x09\x20]',
1180							'\v' => '[\x0A\x0B\x0C\x0D]',
1181							'\R' => '${Char::Egreek::eR}',
1182
1183							# \N
1184							#
1185							# http://perldoc.perl.org/perlre.html
1186							# Character Classes and other Special Escapes
1187							# Any character but \n (experimental). Not affected by /s modifier
1188
1189							'\N' => '${Char::Egreek::eN}',
1190
1191							# \b \B
1192
1193							# P.180 Boundaries: The \b and \B Assertions
1194							# in Chapter 5: Pattern Matching
1195							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
1196
1197							# P.219 Boundaries: The \b and \B Assertions
1198							# in Chapter 5: Pattern Matching
1199							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1200
1201							# '\b' => '(?:(?<=\A\|\W)(?=\w)\|(?<=\w)(?=\W\|\z))',
1202							'\b' => '${Char::Egreek::eb}',
1203
1204							# '\B' => '(?:(?<=\w)(?=\w)\|(?<=\W)(?=\W))',
1205							'\B' => '${Char::Egreek::eB}',
1206
1207							}->{$char} \|\| '';
1208							}
1209
1210							#
1211							# prepare Greek characters per length
1212							#
1213
1214							# 1 octet characters
1215							my @chars1 = ();
1216							sub chars1 {
1217	0	0		0	0	0	if (@chars1) {
1218	0					0	return @chars1;
1219							}
1220	0	0				0	if (exists $range_tr{1}) {
1221	0					0	my @ranges = @{ $range_tr{1} };
	0					0
1222	0					0	while (my @range = splice(@ranges,0,1)) {
1223	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1224	0					0	push @chars1, pack 'C', $oct0;
1225							}
1226							}
1227							}
1228	0					0	return @chars1;
1229							}
1230
1231							# 2 octets characters
1232							my @chars2 = ();
1233							sub chars2 {
1234	0	0		0	0	0	if (@chars2) {
1235	0					0	return @chars2;
1236							}
1237	0	0				0	if (exists $range_tr{2}) {
1238	0					0	my @ranges = @{ $range_tr{2} };
	0					0
1239	0					0	while (my @range = splice(@ranges,0,2)) {
1240	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1241	0					0	for my $oct1 (@{$range[1]}) {
	0					0
1242	0					0	push @chars2, pack 'CC', $oct0,$oct1;
1243							}
1244							}
1245							}
1246							}
1247	0					0	return @chars2;
1248							}
1249
1250							# 3 octets characters
1251							my @chars3 = ();
1252							sub chars3 {
1253	0	0		0	0	0	if (@chars3) {
1254	0					0	return @chars3;
1255							}
1256	0	0				0	if (exists $range_tr{3}) {
1257	0					0	my @ranges = @{ $range_tr{3} };
	0					0
1258	0					0	while (my @range = splice(@ranges,0,3)) {
1259	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1260	0					0	for my $oct1 (@{$range[1]}) {
	0					0
1261	0					0	for my $oct2 (@{$range[2]}) {
	0					0
1262	0					0	push @chars3, pack 'CCC', $oct0,$oct1,$oct2;
1263							}
1264							}
1265							}
1266							}
1267							}
1268	0					0	return @chars3;
1269							}
1270
1271							# 4 octets characters
1272							my @chars4 = ();
1273							sub chars4 {
1274	0	0		0	0	0	if (@chars4) {
1275	0					0	return @chars4;
1276							}
1277	0	0				0	if (exists $range_tr{4}) {
1278	0					0	my @ranges = @{ $range_tr{4} };
	0					0
1279	0					0	while (my @range = splice(@ranges,0,4)) {
1280	0					0	for my $oct0 (@{$range[0]}) {
	0					0
1281	0					0	for my $oct1 (@{$range[1]}) {
	0					0
1282	0					0	for my $oct2 (@{$range[2]}) {
	0					0
1283	0					0	for my $oct3 (@{$range[3]}) {
	0					0
1284	0					0	push @chars4, pack 'CCCC', $oct0,$oct1,$oct2,$oct3;
1285							}
1286							}
1287							}
1288							}
1289							}
1290							}
1291	0					0	return @chars4;
1292							}
1293
1294							#
1295							# Greek open character list for tr
1296							#
1297							sub _charlist_tr {
1298
1299	0			0		0	local $_ = shift @_;
1300
1301							# unescape character
1302	0					0	my @char = ();
1303	0					0	while (not /\G \z/oxmsgc) {
1304	0	0				0	if (/\G (\\0?55\|\\x2[Dd]\|\\-) /oxmsgc) {
		0
		0
		0
		0
		0
		0
1305	0					0	push @char, '\-';
1306							}
1307							elsif (/\G \\ ([0-7]{2,3}) /oxmsgc) {
1308	0					0	push @char, CORE::chr(oct $1);
1309							}
1310							elsif (/\G \\x ([0-9A-Fa-f]{1,2}) /oxmsgc) {
1311	0					0	push @char, CORE::chr(hex $1);
1312							}
1313							elsif (/\G \\c ([\x40-\x5F]) /oxmsgc) {
1314	0					0	push @char, CORE::chr(CORE::ord($1) & 0x1F);
1315							}
1316							elsif (/\G (\\ [0nrtfbae]) /oxmsgc) {
1317	0					0	push @char, {
1318							'\0' => "\0",
1319							'\n' => "\n",
1320							'\r' => "\r",
1321							'\t' => "\t",
1322							'\f' => "\f",
1323							'\b' => "\x08", # \b means backspace in character class
1324							'\a' => "\a",
1325							'\e' => "\e",
1326							}->{$1};
1327							}
1328							elsif (/\G \\ ($q_char) /oxmsgc) {
1329	0					0	push @char, $1;
1330							}
1331							elsif (/\G ($q_char) /oxmsgc) {
1332	0					0	push @char, $1;
1333							}
1334							}
1335
1336							# join separated multiple-octet
1337	0					0	@char = join('',@char) =~ /\G (\\-\|$q_char) /oxmsg;
1338
1339							# unescape '-'
1340	0					0	my @i = ();
1341	0					0	for my $i (0 .. $#char) {
1342	0	0				0	if ($char[$i] eq '\-') {
		0
1343	0					0	$char[$i] = '-';
1344							}
1345							elsif ($char[$i] eq '-') {
1346	0	0	0			0	if ((0 < $i) and ($i < $#char)) {
1347	0					0	push @i, $i;
1348							}
1349							}
1350							}
1351
1352							# open character list (reverse for splice)
1353	0					0	for my $i (CORE::reverse @i) {
1354	0					0	my @range = ();
1355
1356							# range error
1357	0	0	0			0	if ((CORE::length($char[$i-1]) > CORE::length($char[$i+1])) or ($char[$i-1] gt $char[$i+1])) {
1358	0					0	croak "Invalid tr/// range \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1359							}
1360
1361							# range of multiple-octet code
1362	0	0				0	if (CORE::length($char[$i-1]) == 1) {
		0
		0
		0
1363	0	0				0	if (CORE::length($char[$i+1]) == 1) {
		0
		0
		0
1364	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars1();
	0					0
1365							}
1366							elsif (CORE::length($char[$i+1]) == 2) {
1367	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
1368	0					0	push @range, grep {$_ le $char[$i+1]} chars2();
	0					0
1369							}
1370							elsif (CORE::length($char[$i+1]) == 3) {
1371	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
1372	0					0	push @range, chars2();
1373	0					0	push @range, grep {$_ le $char[$i+1]} chars3();
	0					0
1374							}
1375							elsif (CORE::length($char[$i+1]) == 4) {
1376	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
1377	0					0	push @range, chars2();
1378	0					0	push @range, chars3();
1379	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
1380							}
1381							else {
1382	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1383							}
1384							}
1385							elsif (CORE::length($char[$i-1]) == 2) {
1386	0	0				0	if (CORE::length($char[$i+1]) == 2) {
		0
		0
1387	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars2();
	0					0
1388							}
1389							elsif (CORE::length($char[$i+1]) == 3) {
1390	0					0	push @range, grep {$char[$i-1] le $_} chars2();
	0					0
1391	0					0	push @range, grep {$_ le $char[$i+1]} chars3();
	0					0
1392							}
1393							elsif (CORE::length($char[$i+1]) == 4) {
1394	0					0	push @range, grep {$char[$i-1] le $_} chars2();
	0					0
1395	0					0	push @range, chars3();
1396	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
1397							}
1398							else {
1399	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1400							}
1401							}
1402							elsif (CORE::length($char[$i-1]) == 3) {
1403	0	0				0	if (CORE::length($char[$i+1]) == 3) {
		0
1404	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars3();
	0					0
1405							}
1406							elsif (CORE::length($char[$i+1]) == 4) {
1407	0					0	push @range, grep {$char[$i-1] le $_} chars3();
	0					0
1408	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
1409							}
1410							else {
1411	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1412							}
1413							}
1414							elsif (CORE::length($char[$i-1]) == 4) {
1415	0	0				0	if (CORE::length($char[$i+1]) == 4) {
1416	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars4();
	0					0
1417							}
1418							else {
1419	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1420							}
1421							}
1422							else {
1423	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
1424							}
1425
1426	0					0	splice @char, $i-1, 3, @range;
1427							}
1428
1429	0					0	return @char;
1430							}
1431
1432							#
1433							# Greek open character class
1434							#
1435							sub _cc {
1436	0	0		0		0	if (scalar(@_) == 0) {
		0
		0
1437	0					0	die __FILE__, ": subroutine cc got no parameter.";
1438							}
1439							elsif (scalar(@_) == 1) {
1440	0					0	return sprintf('\x%02X',$_[0]);
1441							}
1442							elsif (scalar(@_) == 2) {
1443	0	0				0	if ($_[0] > $_[1]) {
		0
		0
1444	0					0	die __FILE__, ": subroutine cc got \$_[0] > \$_[1] parameters).";
1445							}
1446							elsif ($_[0] == $_[1]) {
1447	0					0	return sprintf('\x%02X',$_[0]);
1448							}
1449							elsif (($_[0]+1) == $_[1]) {
1450	0					0	return sprintf('[\\x%02X\\x%02X]',$_[0],$_[1]);
1451							}
1452							else {
1453	0					0	return sprintf('[\\x%02X-\\x%02X]',$_[0],$_[1]);
1454							}
1455							}
1456							else {
1457	0					0	die __FILE__, ": subroutine cc got 3 or more parameters (@{[scalar(@_)]} parameters).";
	0					0
1458							}
1459							}
1460
1461							#
1462							# Greek octet range
1463							#
1464							sub _octets {
1465	0			0		0	my $length = shift @_;
1466
1467	0	0				0	if ($length == 1) {
1468	0					0	my($a1) = unpack 'C', $_[0];
1469	0					0	my($z1) = unpack 'C', $_[1];
1470
1471	0	0				0	if ($a1 > $z1) {
1472	0					0	croak 'Invalid [] range in regexp (CORE::ord(A) > CORE::ord(B)) ' . '\x' . unpack('H',$a1) . '-\x' . unpack('H',$z1);
1473							}
1474
1475	0	0				0	if ($a1 == $z1) {
		0
1476	0					0	return sprintf('\x%02X',$a1);
1477							}
1478							elsif (($a1+1) == $z1) {
1479	0					0	return sprintf('\x%02X\x%02X',$a1,$z1);
1480							}
1481							else {
1482	0					0	return sprintf('\x%02X-\x%02X',$a1,$z1);
1483							}
1484							}
1485							else {
1486	0					0	die __FILE__, ": subroutine _octets got invalid length ($length).";
1487							}
1488							}
1489
1490							#
1491							# Greek range regexp
1492							#
1493							sub _range_regexp {
1494	0			0		0	my($length,$first,$last) = @_;
1495
1496	0					0	my @range_regexp = ();
1497	0	0				0	if (not exists $range_tr{$length}) {
1498	0					0	return @range_regexp;
1499							}
1500
1501	0					0	my @ranges = @{ $range_tr{$length} };
	0					0
1502	0					0	while (my @range = splice(@ranges,0,$length)) {
1503	0					0	my $min = '';
1504	0					0	my $max = '';
1505	0					0	for (my $i=0; $i < $length; $i++) {
1506	0					0	$min .= pack 'C', $range[$i][0];
1507	0					0	$max .= pack 'C', $range[$i][-1];
1508							}
1509
1510							# min___max
1511							# FIRST_____________LAST
1512							# (nothing)
1513
1514	0	0	0			0	if ($max lt $first) {
		0	0
		0	0
		0	0
		0	0
		0	0
		0	0
1515							}
1516
1517							# **********
1518							# min_________max
1519							# FIRST_____________LAST
1520							# **********
1521
1522							elsif (($min le $first) and ($first le $max) and ($max le $last)) {
1523	0					0	push @range_regexp, _octets($length,$first,$max,$min,$max);
1524							}
1525
1526							# **********************
1527							# min________________max
1528							# FIRST_____________LAST
1529							# **********************
1530
1531							elsif (($min eq $first) and ($max eq $last)) {
1532	0					0	push @range_regexp, _octets($length,$first,$last,$min,$max);
1533							}
1534
1535							# *********
1536							# min___max
1537							# FIRST_____________LAST
1538							# *********
1539
1540							elsif (($first le $min) and ($max le $last)) {
1541	0					0	push @range_regexp, _octets($length,$min,$max,$min,$max);
1542							}
1543
1544							# **********************
1545							# min__________________________max
1546							# FIRST_____________LAST
1547							# **********************
1548
1549							elsif (($min le $first) and ($last le $max)) {
1550	0					0	push @range_regexp, _octets($length,$first,$last,$min,$max);
1551							}
1552
1553							# *********
1554							# min________max
1555							# FIRST_____________LAST
1556							# *********
1557
1558							elsif (($first le $min) and ($min le $last) and ($last le $max)) {
1559	0					0	push @range_regexp, _octets($length,$min,$last,$min,$max);
1560							}
1561
1562							# min___max
1563							# FIRST_____________LAST
1564							# (nothing)
1565
1566							elsif ($last lt $min) {
1567							}
1568
1569							else {
1570	0					0	die __FILE__, ": subroutine _range_regexp panic.";
1571							}
1572							}
1573
1574	0					0	return @range_regexp;
1575							}
1576
1577							#
1578							# Greek open character list for qr and not qr
1579							#
1580							sub _charlist {
1581
1582	0			0		0	my $modifier = pop @_;
1583	0					0	my @char = @_;
1584
1585	0	0				0	my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
1586
1587							# unescape character
1588	0					0	for (my $i=0; $i <= $#char; $i++) {
1589
1590							# escape - to ...
1591	0	0	0			0	if ($char[$i] eq '-') {
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
1592	0	0	0			0	if ((0 < $i) and ($i < $#char)) {
1593	0					0	$char[$i] = '...';
1594							}
1595							}
1596
1597							# octal escape sequence
1598							elsif ($char[$i] =~ /\A \\o \{ ([0-7]+) \} \z/oxms) {
1599	0					0	$char[$i] = octchr($1);
1600							}
1601
1602							# hexadecimal escape sequence
1603							elsif ($char[$i] =~ /\A \\x \{ ([0-9A-Fa-f]+) \} \z/oxms) {
1604	0					0	$char[$i] = hexchr($1);
1605							}
1606
1607							# \N{CHARNAME} --> N\{CHARNAME}
1608							elsif ($char[$i] =~ /\A \\ (N) ( \{ ([^0-9\}][^\}]*) \} ) \z/oxms) {
1609	0					0	$char[$i] = $1 . '\\' . $2;
1610							}
1611
1612							# \p{PROPERTY} --> p\{PROPERTY}
1613							elsif ($char[$i] =~ /\A \\ (p) ( \{ ([^0-9\}][^\}]*) \} ) \z/oxms) {
1614	0					0	$char[$i] = $1 . '\\' . $2;
1615							}
1616
1617							# \P{PROPERTY} --> P\{PROPERTY}
1618							elsif ($char[$i] =~ /\A \\ (P) ( \{ ([^0-9\}][^\}]*) \} ) \z/oxms) {
1619	0					0	$char[$i] = $1 . '\\' . $2;
1620							}
1621
1622							# \p, \P, \X --> p, P, X
1623							elsif ($char[$i] =~ /\A \\ ( [pPX] ) \z/oxms) {
1624	0					0	$char[$i] = $1;
1625							}
1626
1627							elsif ($char[$i] =~ /\A \\ ([0-7]{2,3}) \z/oxms) {
1628	0					0	$char[$i] = CORE::chr oct $1;
1629							}
1630							elsif ($char[$i] =~ /\A \\x ([0-9A-Fa-f]{1,2}) \z/oxms) {
1631	0					0	$char[$i] = CORE::chr hex $1;
1632							}
1633							elsif ($char[$i] =~ /\A \\c ([\x40-\x5F]) \z/oxms) {
1634	0					0	$char[$i] = CORE::chr(CORE::ord($1) & 0x1F);
1635							}
1636							elsif ($char[$i] =~ /\A (\\ [0nrtfbaedswDSWHVhvR]) \z/oxms) {
1637	0					0	$char[$i] = {
1638							'\0' => "\0",
1639							'\n' => "\n",
1640							'\r' => "\r",
1641							'\t' => "\t",
1642							'\f' => "\f",
1643							'\b' => "\x08", # \b means backspace in character class
1644							'\a' => "\a",
1645							'\e' => "\e",
1646							'\d' => '[0-9]',
1647
1648							# Vertical tabs are now whitespace
1649							# \s in a regex now matches a vertical tab in all circumstances.
1650							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
1651							# \t \n \v \f \r space
1652							# '\s' => '[\x09\x0A \x0C\x0D\x20]',
1653							# '\s' => '[\x09\x0A\x0B\x0C\x0D\x20]',
1654							'\s' => '\s',
1655
1656							'\w' => '[0-9A-Z_a-z]',
1657							'\D' => '${Char::Egreek::eD}',
1658							'\S' => '${Char::Egreek::eS}',
1659							'\W' => '${Char::Egreek::eW}',
1660
1661							'\H' => '${Char::Egreek::eH}',
1662							'\V' => '${Char::Egreek::eV}',
1663							'\h' => '[\x09\x20]',
1664							'\v' => '[\x0A\x0B\x0C\x0D]',
1665							'\R' => '${Char::Egreek::eR}',
1666
1667							}->{$1};
1668							}
1669
1670							# POSIX-style character classes
1671							elsif ($ignorecase and ($char[$i] =~ /\A ( \[\: \^? (?:lower\|upper) :\] ) \z/oxms)) {
1672	0					0	$char[$i] = {
1673
1674							'[:lower:]' => '[\x41-\x5A\x61-\x7A]',
1675							'[:upper:]' => '[\x41-\x5A\x61-\x7A]',
1676							'[:^lower:]' => '${Char::Egreek::not_lower_i}',
1677							'[:^upper:]' => '${Char::Egreek::not_upper_i}',
1678
1679							}->{$1};
1680							}
1681							elsif ($char[$i] =~ /\A ( \[\: \^? (?:alnum\|alpha\|ascii\|blank\|cntrl\|digit\|graph\|lower\|print\|punct\|space\|upper\|word\|xdigit) :\] ) \z/oxms) {
1682	0					0	$char[$i] = {
1683
1684							'[:alnum:]' => '[\x30-\x39\x41-\x5A\x61-\x7A]',
1685							'[:alpha:]' => '[\x41-\x5A\x61-\x7A]',
1686							'[:ascii:]' => '[\x00-\x7F]',
1687							'[:blank:]' => '[\x09\x20]',
1688							'[:cntrl:]' => '[\x00-\x1F\x7F]',
1689							'[:digit:]' => '[\x30-\x39]',
1690							'[:graph:]' => '[\x21-\x7F]',
1691							'[:lower:]' => '[\x61-\x7A]',
1692							'[:print:]' => '[\x20-\x7F]',
1693							'[:punct:]' => '[\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E]',
1694
1695							# P.174 POSIX-Style Character Classes
1696							# in Chapter 5: Pattern Matching
1697							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
1698
1699							# P.311 11.2.4 Character Classes and other Special Escapes
1700							# in Chapter 11: perlre: Perl regular expressions
1701							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
1702
1703							# P.210 POSIX-Style Character Classes
1704							# in Chapter 5: Pattern Matching
1705							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1706
1707							'[:space:]' => '[\s\x0B]', # "\s" plus vertical tab ("\cK")
1708
1709							'[:upper:]' => '[\x41-\x5A]',
1710							'[:word:]' => '[\x30-\x39\x41-\x5A\x5F\x61-\x7A]',
1711							'[:xdigit:]' => '[\x30-\x39\x41-\x46\x61-\x66]',
1712							'[:^alnum:]' => '${Char::Egreek::not_alnum}',
1713							'[:^alpha:]' => '${Char::Egreek::not_alpha}',
1714							'[:^ascii:]' => '${Char::Egreek::not_ascii}',
1715							'[:^blank:]' => '${Char::Egreek::not_blank}',
1716							'[:^cntrl:]' => '${Char::Egreek::not_cntrl}',
1717							'[:^digit:]' => '${Char::Egreek::not_digit}',
1718							'[:^graph:]' => '${Char::Egreek::not_graph}',
1719							'[:^lower:]' => '${Char::Egreek::not_lower}',
1720							'[:^print:]' => '${Char::Egreek::not_print}',
1721							'[:^punct:]' => '${Char::Egreek::not_punct}',
1722							'[:^space:]' => '${Char::Egreek::not_space}',
1723							'[:^upper:]' => '${Char::Egreek::not_upper}',
1724							'[:^word:]' => '${Char::Egreek::not_word}',
1725							'[:^xdigit:]' => '${Char::Egreek::not_xdigit}',
1726
1727							}->{$1};
1728							}
1729							elsif ($char[$i] =~ /\A \\ ($q_char) \z/oxms) {
1730	0					0	$char[$i] = $1;
1731							}
1732							}
1733
1734							# open character list
1735	0					0	my @singleoctet = ();
1736	0					0	my @multipleoctet = ();
1737	0					0	for (my $i=0; $i <= $#char; ) {
1738
1739							# escaped -
1740	0	0	0			0	if (defined($char[$i+1]) and ($char[$i+1] eq '...')) {
		0
		0
		0
		0
		0
1741	0					0	$i += 1;
1742	0					0	next;
1743							}
1744
1745							# make range regexp
1746							elsif ($char[$i] eq '...') {
1747
1748							# range error
1749	0	0				0	if (CORE::length($char[$i-1]) > CORE::length($char[$i+1])) {
		0
1750	0					0	croak 'Invalid [] range in regexp (length(A) > length(B)) ' . '\x' . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]);
1751							}
1752							elsif (CORE::length($char[$i-1]) == CORE::length($char[$i+1])) {
1753	0	0				0	if ($char[$i-1] gt $char[$i+1]) {
1754	0					0	croak 'Invalid [] range in regexp (CORE::ord(A) > CORE::ord(B)) ' . '\x' . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]);
1755							}
1756							}
1757
1758							# make range regexp per length
1759	0					0	for my $length (CORE::length($char[$i-1]) .. CORE::length($char[$i+1])) {
1760	0					0	my @regexp = ();
1761
1762							# is first and last
1763	0	0	0			0	if (($length == CORE::length($char[$i-1])) and ($length == CORE::length($char[$i+1]))) {
		0	0
		0
		0
1764	0					0	push @regexp, _range_regexp($length, $char[$i-1], $char[$i+1]);
1765							}
1766
1767							# is first
1768							elsif ($length == CORE::length($char[$i-1])) {
1769	0					0	push @regexp, _range_regexp($length, $char[$i-1], "\xFF" x $length);
1770							}
1771
1772							# is inside in first and last
1773							elsif ((CORE::length($char[$i-1]) < $length) and ($length < CORE::length($char[$i+1]))) {
1774	0					0	push @regexp, _range_regexp($length, "\x00" x $length, "\xFF" x $length);
1775							}
1776
1777							# is last
1778							elsif ($length == CORE::length($char[$i+1])) {
1779	0					0	push @regexp, _range_regexp($length, "\x00" x $length, $char[$i+1]);
1780							}
1781
1782							else {
1783	0					0	die __FILE__, ": subroutine make_regexp panic.";
1784							}
1785
1786	0	0				0	if ($length == 1) {
1787	0					0	push @singleoctet, @regexp;
1788							}
1789							else {
1790	0					0	push @multipleoctet, @regexp;
1791							}
1792							}
1793
1794	0					0	$i += 2;
1795							}
1796
1797							# with /i modifier
1798							elsif ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) {
1799	0	0				0	if ($modifier =~ /i/oxms) {
1800	0					0	my $uc = Char::Egreek::uc($char[$i]);
1801	0					0	my $fc = Char::Egreek::fc($char[$i]);
1802	0	0				0	if ($uc ne $fc) {
1803	0	0				0	if (CORE::length($fc) == 1) {
1804	0					0	push @singleoctet, $uc, $fc;
1805							}
1806							else {
1807	0					0	push @singleoctet, $uc;
1808	0					0	push @multipleoctet, $fc;
1809							}
1810							}
1811							else {
1812	0					0	push @singleoctet, $char[$i];
1813							}
1814							}
1815							else {
1816	0					0	push @singleoctet, $char[$i];
1817							}
1818	0					0	$i += 1;
1819							}
1820
1821							# single character of single octet code
1822							elsif ($char[$i] =~ /\A (?: \\h ) \z/oxms) {
1823	0					0	push @singleoctet, "\t", "\x20";
1824	0					0	$i += 1;
1825							}
1826							elsif ($char[$i] =~ /\A (?: \\v ) \z/oxms) {
1827	0					0	push @singleoctet, "\x0A", "\x0B", "\x0C", "\x0D";
1828	0					0	$i += 1;
1829							}
1830							elsif ($char[$i] =~ /\A (?: \\d \| \\s \| \\w ) \z/oxms) {
1831	0					0	push @singleoctet, $char[$i];
1832	0					0	$i += 1;
1833							}
1834
1835							# single character of multiple-octet code
1836							else {
1837	0					0	push @multipleoctet, $char[$i];
1838	0					0	$i += 1;
1839							}
1840							}
1841
1842							# quote metachar
1843	0					0	for (@singleoctet) {
1844	0	0				0	if ($_ eq '...') {
		0
		0
		0
		0
1845	0					0	$_ = '-';
1846							}
1847							elsif (/\A \n \z/oxms) {
1848	0					0	$_ = '\n';
1849							}
1850							elsif (/\A \r \z/oxms) {
1851	0					0	$_ = '\r';
1852							}
1853							elsif (/\A ([\x00-\x20\x7F-\xFF]) \z/oxms) {
1854	0					0	$_ = sprintf('\x%02X', CORE::ord $1);
1855							}
1856							elsif (/\A [\x00-\xFF] \z/oxms) {
1857	0					0	$_ = quotemeta $_;
1858							}
1859							}
1860
1861							# return character list
1862	0					0	return \@singleoctet, \@multipleoctet;
1863							}
1864
1865							#
1866							# Greek octal escape sequence
1867							#
1868							sub octchr {
1869	0			0	0	0	my($octdigit) = @_;
1870
1871	0					0	my @binary = ();
1872	0					0	for my $octal (split(//,$octdigit)) {
1873	0					0	push @binary, {
1874							'0' => '000',
1875							'1' => '001',
1876							'2' => '010',
1877							'3' => '011',
1878							'4' => '100',
1879							'5' => '101',
1880							'6' => '110',
1881							'7' => '111',
1882							}->{$octal};
1883							}
1884	0					0	my $binary = join '', @binary;
1885
1886	0					0	my $octchr = {
1887							# 1234567
1888							1 => pack('B*', "0000000$binary"),
1889							2 => pack('B*', "000000$binary"),
1890							3 => pack('B*', "00000$binary"),
1891							4 => pack('B*', "0000$binary"),
1892							5 => pack('B*', "000$binary"),
1893							6 => pack('B*', "00$binary"),
1894							7 => pack('B*', "0$binary"),
1895							0 => pack('B*', "$binary"),
1896
1897							}->{CORE::length($binary) % 8};
1898
1899	0					0	return $octchr;
1900							}
1901
1902							#
1903							# Greek hexadecimal escape sequence
1904							#
1905							sub hexchr {
1906	0			0	0	0	my($hexdigit) = @_;
1907
1908	0					0	my $hexchr = {
1909							1 => pack('H*', "0$hexdigit"),
1910							0 => pack('H*', "$hexdigit"),
1911
1912							}->{CORE::length($_[0]) % 2};
1913
1914	0					0	return $hexchr;
1915							}
1916
1917							#
1918							# Greek open character list for qr
1919							#
1920							sub charlist_qr {
1921
1922	0			0	0	0	my $modifier = pop @_;
1923	0					0	my @char = @_;
1924
1925	0					0	my($singleoctet, $multipleoctet) = _charlist(@char, $modifier);
1926	0					0	my @singleoctet = @$singleoctet;
1927	0					0	my @multipleoctet = @$multipleoctet;
1928
1929							# return character list
1930	0	0				0	if (scalar(@singleoctet) >= 1) {
1931
1932							# with /i modifier
1933	0	0				0	if ($modifier =~ m/i/oxms) {
1934	0					0	my %singleoctet_ignorecase = ();
1935	0					0	for (@singleoctet) {
1936	0		0			0	while (s/ \A \\x(..) - \\x(..) //oxms or s/ \A \\x((..)) //oxms) {
1937	0					0	for my $ord (hex($1) .. hex($2)) {
1938	0					0	my $char = CORE::chr($ord);
1939	0					0	my $uc = Char::Egreek::uc($char);
1940	0					0	my $fc = Char::Egreek::fc($char);
1941	0	0				0	if ($uc eq $fc) {
1942	0					0	$singleoctet_ignorecase{unpack 'C*', $char} = 1;
1943							}
1944							else {
1945	0	0				0	if (CORE::length($fc) == 1) {
1946	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
1947	0					0	$singleoctet_ignorecase{unpack 'C*', $fc} = 1;
1948							}
1949							else {
1950	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
1951	0					0	push @multipleoctet, join '', map {sprintf('\x%02X',$_)} unpack 'C*', $fc;
	0					0
1952							}
1953							}
1954							}
1955							}
1956	0	0				0	if ($_ ne '') {
1957	0					0	$singleoctet_ignorecase{unpack 'C*', $_} = 1;
1958							}
1959							}
1960	0					0	my $i = 0;
1961	0					0	my @singleoctet_ignorecase = ();
1962	0					0	for my $ord (0 .. 255) {
1963	0	0				0	if (exists $singleoctet_ignorecase{$ord}) {
1964	0					0	push @{$singleoctet_ignorecase[$i]}, $ord;
	0					0
1965							}
1966							else {
1967	0					0	$i++;
1968							}
1969							}
1970	0					0	@singleoctet = ();
1971	0					0	for my $range (@singleoctet_ignorecase) {
1972	0	0				0	if (ref $range) {
1973	0	0				0	if (scalar(@{$range}) == 1) {
	0	0				0
	0					0
1974	0					0	push @singleoctet, sprintf('\x%02X', @{$range}[0]);
	0					0
1975							}
1976							elsif (scalar(@{$range}) == 2) {
1977	0					0	push @singleoctet, sprintf('\x%02X\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
1978							}
1979							else {
1980	0					0	push @singleoctet, sprintf('\x%02X-\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
1981							}
1982							}
1983							}
1984							}
1985
1986	0					0	my $not_anchor = '';
1987
1988	0					0	push @multipleoctet, join('', $not_anchor, '[', @singleoctet, ']' );
1989							}
1990	0	0				0	if (scalar(@multipleoctet) >= 2) {
1991	0					0	return '(?:' . join('\|', @multipleoctet) . ')';
1992							}
1993							else {
1994	0					0	return $multipleoctet[0];
1995							}
1996							}
1997
1998							#
1999							# Greek open character list for not qr
2000							#
2001							sub charlist_not_qr {
2002
2003	0			0	0	0	my $modifier = pop @_;
2004	0					0	my @char = @_;
2005
2006	0					0	my($singleoctet, $multipleoctet) = _charlist(@char, $modifier);
2007	0					0	my @singleoctet = @$singleoctet;
2008	0					0	my @multipleoctet = @$multipleoctet;
2009
2010							# with /i modifier
2011	0	0				0	if ($modifier =~ m/i/oxms) {
2012	0					0	my %singleoctet_ignorecase = ();
2013	0					0	for (@singleoctet) {
2014	0		0			0	while (s/ \A \\x(..) - \\x(..) //oxms or s/ \A \\x((..)) //oxms) {
2015	0					0	for my $ord (hex($1) .. hex($2)) {
2016	0					0	my $char = CORE::chr($ord);
2017	0					0	my $uc = Char::Egreek::uc($char);
2018	0					0	my $fc = Char::Egreek::fc($char);
2019	0	0				0	if ($uc eq $fc) {
2020	0					0	$singleoctet_ignorecase{unpack 'C*', $char} = 1;
2021							}
2022							else {
2023	0	0				0	if (CORE::length($fc) == 1) {
2024	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
2025	0					0	$singleoctet_ignorecase{unpack 'C*', $fc} = 1;
2026							}
2027							else {
2028	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
2029	0					0	push @multipleoctet, join '', map {sprintf('\x%02X',$_)} unpack 'C*', $fc;
	0					0
2030							}
2031							}
2032							}
2033							}
2034	0	0				0	if ($_ ne '') {
2035	0					0	$singleoctet_ignorecase{unpack 'C*', $_} = 1;
2036							}
2037							}
2038	0					0	my $i = 0;
2039	0					0	my @singleoctet_ignorecase = ();
2040	0					0	for my $ord (0 .. 255) {
2041	0	0				0	if (exists $singleoctet_ignorecase{$ord}) {
2042	0					0	push @{$singleoctet_ignorecase[$i]}, $ord;
	0					0
2043							}
2044							else {
2045	0					0	$i++;
2046							}
2047							}
2048	0					0	@singleoctet = ();
2049	0					0	for my $range (@singleoctet_ignorecase) {
2050	0	0				0	if (ref $range) {
2051	0	0				0	if (scalar(@{$range}) == 1) {
	0	0				0
	0					0
2052	0					0	push @singleoctet, sprintf('\x%02X', @{$range}[0]);
	0					0
2053							}
2054							elsif (scalar(@{$range}) == 2) {
2055	0					0	push @singleoctet, sprintf('\x%02X\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
2056							}
2057							else {
2058	0					0	push @singleoctet, sprintf('\x%02X-\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
2059							}
2060							}
2061							}
2062							}
2063
2064							# return character list
2065	0	0				0	if (scalar(@multipleoctet) >= 1) {
2066	0	0				0	if (scalar(@singleoctet) >= 1) {
2067
2068							# any character other than multiple-octet and single octet character class
2069	0					0	return '(?!' . join('\|', @multipleoctet) . ')(?:[^' . join('', @singleoctet) . '])';
2070							}
2071							else {
2072
2073							# any character other than multiple-octet character class
2074	0					0	return '(?!' . join('\|', @multipleoctet) . ")(?:$your_char)";
2075							}
2076							}
2077							else {
2078	0	0				0	if (scalar(@singleoctet) >= 1) {
2079
2080							# any character other than single octet character class
2081	0					0	return '(?:[^' . join('', @singleoctet) . '])';
2082							}
2083							else {
2084
2085							# any character
2086	0					0	return "(?:$your_char)";
2087							}
2088							}
2089							}
2090
2091							#
2092							# open file in read mode
2093							#
2094							sub _open_r {
2095	176			176		608	my(undef,$file) = @_;
2096	176					908	$file =~ s#\A (\s) #./$1#oxms;
2097	176		33			16890	return eval(q{open($_[0],'<',$_[1])}) \|\|
2098							open($_[0],"< $file\0");
2099							}
2100
2101							#
2102							# open file in write mode
2103							#
2104							sub _open_w {
2105	0			0		0	my(undef,$file) = @_;
2106	0					0	$file =~ s#\A (\s) #./$1#oxms;
2107	0		0			0	return eval(q{open($_[0],'>',$_[1])}) \|\|
2108							open($_[0],"> $file\0");
2109							}
2110
2111							#
2112							# open file in append mode
2113							#
2114							sub _open_a {
2115	0			0		0	my(undef,$file) = @_;
2116	0					0	$file =~ s#\A (\s) #./$1#oxms;
2117	0		0			0	return eval(q{open($_[0],'>>',$_[1])}) \|\|
2118							open($_[0],">> $file\0");
2119							}
2120
2121							#
2122							# safe system
2123							#
2124							sub _systemx {
2125
2126							# P.707 29.2.33. exec
2127							# in Chapter 29: Functions
2128							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2129							#
2130							# Be aware that in older releases of Perl, exec (and system) did not flush
2131							# your output buffer, so you needed to enable command buffering by setting $\|
2132							# on one or more filehandles to avoid lost output in the case of exec, or
2133							# misordererd output in the case of system. This situation was largely remedied
2134							# in the 5.6 release of Perl. (So, 5.005 release not yet.)
2135
2136							# P.855 exec
2137							# in Chapter 27: Functions
2138							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2139							#
2140							# In very old release of Perl (before v5.6), exec (and system) did not flush
2141							# your output buffer, so you needed to enable command buffering by setting $\|
2142							# on one or more filehandles to avoid lost output with exec or misordered
2143							# output with system.
2144
2145	176			176		719	$\| = 1;
2146
2147							# P.565 23.1.2. Cleaning Up Your Environment
2148							# in Chapter 23: Security
2149							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2150
2151							# P.656 Cleaning Up Your Environment
2152							# in Chapter 20: Security
2153							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2154
2155							# local $ENV{'PATH'} = '.';
2156	176					3760	local @ENV{qw(IFS CDPATH ENV BASH_ENV)}; # Make %ENV safer
2157
2158							# P.707 29.2.33. exec
2159							# in Chapter 29: Functions
2160							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2161							#
2162							# As we mentioned earlier, exec treats a discrete list of arguments as an
2163							# indication that it should bypass shell processing. However, there is one
2164							# place where you might still get tripped up. The exec call (and system, too)
2165							# will not distinguish between a single scalar argument and an array containing
2166							# only one element.
2167							#
2168							# @args = ("echo surprise"); # just one element in list
2169							# exec @args # still subject to shell escapes
2170							# or die "exec: $!"; # because @args == 1
2171							#
2172							# To avoid this, you can use the PATHNAME syntax, explicitly duplicating the
2173							# first argument as the pathname, which forces the rest of the arguments to be
2174							# interpreted as a list, even if there is only one of them:
2175							#
2176							# exec { $args[0] } @args # safe even with one-argument list
2177							# or die "can't exec @args: $!";
2178
2179							# P.855 exec
2180							# in Chapter 27: Functions
2181							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2182							#
2183							# As we mentioned earlier, exec treats a discrete list of arguments as a
2184							# directive to bypass shell processing. However, there is one place where
2185							# you might still get tripped up. The exec call (and system, too) cannot
2186							# distinguish between a single scalar argument and an array containing
2187							# only one element.
2188							#
2189							# @args = ("echo surprise"); # just one element in list
2190							# exec @args # still subject to shell escapes
2191							# \|\| die "exec: $!"; # because @args == 1
2192							#
2193							# To avoid this, use the PATHNAME syntax, explicitly duplicating the first
2194							# argument as the pathname, which forces the rest of the arguments to be
2195							# interpreted as a list, even if there is only one of them:
2196							#
2197							# exec { $args[0] } @args # safe even with one-argument list
2198							# \|\| die "can't exec @args: $!";
2199
2200	176					403	return CORE::system { $_[0] } @_; # safe even with one-argument list
	176					14598122
2201							}
2202
2203							#
2204							# Greek order to character (with parameter)
2205							#
2206							sub Char::Egreek::chr(;$) {
2207
2208	0	0		0	0		my $c = @_ ? $_[0] : $_;
2209
2210	0	0					if ($c == 0x00) {
2211	0						return "\x00";
2212							}
2213							else {
2214	0						my @chr = ();
2215	0						while ($c > 0) {
2216	0						unshift @chr, ($c % 0x100);
2217	0						$c = int($c / 0x100);
2218							}
2219	0						return pack 'C*', @chr;
2220							}
2221							}
2222
2223							#
2224							# Greek order to character (without parameter)
2225							#
2226							sub Char::Egreek::chr_() {
2227
2228	0			0	0		my $c = $_;
2229
2230	0	0					if ($c == 0x00) {
2231	0						return "\x00";
2232							}
2233							else {
2234	0						my @chr = ();
2235	0						while ($c > 0) {
2236	0						unshift @chr, ($c % 0x100);
2237	0						$c = int($c / 0x100);
2238							}
2239	0						return pack 'C*', @chr;
2240							}
2241							}
2242
2243							#
2244							# Greek path globbing (with parameter)
2245							#
2246							sub Char::Egreek::glob($) {
2247
2248	0	0		0	0		if (wantarray) {
2249	0						my @glob = _DOS_like_glob(@_);
2250	0						for my $glob (@glob) {
2251	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2252							}
2253	0						return @glob;
2254							}
2255							else {
2256	0						my $glob = _DOS_like_glob(@_);
2257	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2258	0						return $glob;
2259							}
2260							}
2261
2262							#
2263							# Greek path globbing (without parameter)
2264							#
2265							sub Char::Egreek::glob_() {
2266
2267	0	0		0	0		if (wantarray) {
2268	0						my @glob = _DOS_like_glob();
2269	0						for my $glob (@glob) {
2270	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2271							}
2272	0						return @glob;
2273							}
2274							else {
2275	0						my $glob = _DOS_like_glob();
2276	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
2277	0						return $glob;
2278							}
2279							}
2280
2281							#
2282							# Greek path globbing via File::DosGlob 1.10
2283							#
2284							# Often I confuse "_dosglob" and "_doglob".
2285							# So, I renamed "_dosglob" to "_DOS_like_glob".
2286							#
2287							my %iter;
2288							my %entries;
2289							sub _DOS_like_glob {
2290
2291							# context (keyed by second cxix argument provided by core)
2292	0			0			my($expr,$cxix) = @_;
2293
2294							# glob without args defaults to $_
2295	0	0					$expr = $_ if not defined $expr;
2296
2297							# represents the current user's home directory
2298							#
2299							# 7.3. Expanding Tildes in Filenames
2300							# in Chapter 7. File Access
2301							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
2302							#
2303							# and File::HomeDir, File::HomeDir::Windows module
2304
2305							# DOS-like system
2306	0	0					if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
2307	0						$expr =~ s{ \A ~ (?= [^/\\] ) }
2308	0						{ my_home_MSWin32() }oxmse;
2309							}
2310
2311							# UNIX-like system
2312							else {
2313	0						$expr =~ s{ \A ~ ( (?:[^/])* ) }
2314	0	0	0				{ $1 ? (eval(q{(getpwnam($1))[7]})\|\|my_home()) : my_home() }oxmse;
2315							}
2316
2317							# assume global context if not provided one
2318	0	0					$cxix = '_G_' if not defined $cxix;
2319	0	0					$iter{$cxix} = 0 if not exists $iter{$cxix};
2320
2321							# if we're just beginning, do it all first
2322	0	0					if ($iter{$cxix} == 0) {
2323	0						$entries{$cxix} = [ _do_glob(1, _parse_line($expr)) ];
2324							}
2325
2326							# chuck it all out, quick or slow
2327	0	0					if (wantarray) {
2328	0						delete $iter{$cxix};
2329	0						return @{delete $entries{$cxix}};
	0
2330							}
2331							else {
2332	0	0					if ($iter{$cxix} = scalar @{$entries{$cxix}}) {
	0
2333	0						return shift @{$entries{$cxix}};
	0
2334							}
2335							else {
2336							# return undef for EOL
2337	0						delete $iter{$cxix};
2338	0						delete $entries{$cxix};
2339	0						return undef;
2340							}
2341							}
2342							}
2343
2344							#
2345							# Greek path globbing subroutine
2346							#
2347							sub _do_glob {
2348
2349	0			0			my($cond,@expr) = @_;
2350	0						my @glob = ();
2351	0						my $fix_drive_relative_paths = 0;
2352
2353							OUTER:
2354	0						for my $expr (@expr) {
2355	0	0					next OUTER if not defined $expr;
2356	0	0					next OUTER if $expr eq '';
2357
2358	0						my @matched = ();
2359	0						my @globdir = ();
2360	0						my $head = '.';
2361	0						my $pathsep = '/';
2362	0						my $tail;
2363
2364							# if argument is within quotes strip em and do no globbing
2365	0	0					if ($expr =~ /\A " ((?:$q_char)*) " \z/oxms) {
2366	0						$expr = $1;
2367	0	0					if ($cond eq 'd') {
2368	0	0					if (-d $expr) {
2369	0						push @glob, $expr;
2370							}
2371							}
2372							else {
2373	0	0					if (-e $expr) {
2374	0						push @glob, $expr;
2375							}
2376							}
2377	0						next OUTER;
2378							}
2379
2380							# wildcards with a drive prefix such as h:*.pm must be changed
2381							# to h:./*.pm to expand correctly
2382	0	0					if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
2383	0	0					if ($expr =~ s# \A ((?:[A-Za-z]:)?) ([^/\\]) #$1./$2#oxms) {
2384	0						$fix_drive_relative_paths = 1;
2385							}
2386							}
2387
2388	0	0					if (($head, $tail) = _parse_path($expr,$pathsep)) {
2389	0	0					if ($tail eq '') {
2390	0						push @glob, $expr;
2391	0						next OUTER;
2392							}
2393	0	0					if ($head =~ / \A (?:$q_char)? [?] /oxms) {
2394	0	0					if (@globdir = _do_glob('d', $head)) {
2395	0						push @glob, _do_glob($cond, map {"$_$pathsep$tail"} @globdir);
	0
2396	0						next OUTER;
2397							}
2398							}
2399	0	0	0				if ($head eq '' or $head =~ /\A [A-Za-z]: \z/oxms) {
2400	0						$head .= $pathsep;
2401							}
2402	0						$expr = $tail;
2403							}
2404
2405							# If file component has no wildcards, we can avoid opendir
2406	0	0					if ($expr !~ / \A (?:$q_char)? [?] /oxms) {
2407	0	0					if ($head eq '.') {
2408	0						$head = '';
2409							}
2410	0	0	0				if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
2411	0						$head .= $pathsep;
2412							}
2413	0						$head .= $expr;
2414	0	0					if ($cond eq 'd') {
2415	0	0					if (-d $head) {
2416	0						push @glob, $head;
2417							}
2418							}
2419							else {
2420	0	0					if (-e $head) {
2421	0						push @glob, $head;
2422							}
2423							}
2424	0						next OUTER;
2425							}
2426	0	0					opendir(*DIR, $head) or next OUTER;
2427	0						my @leaf = readdir DIR;
2428	0						closedir DIR;
2429
2430	0	0					if ($head eq '.') {
2431	0						$head = '';
2432							}
2433	0	0	0				if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
2434	0						$head .= $pathsep;
2435							}
2436
2437	0						my $pattern = '';
2438	0						while ($expr =~ / \G ($q_char) /oxgc) {
2439	0						my $char = $1;
2440
2441							# 6.9. Matching Shell Globs as Regular Expressions
2442							# in Chapter 6. Pattern Matching
2443							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
2444							# (and so on)
2445
2446	0	0					if ($char eq '*') {
		0
		0
2447	0						$pattern .= "(?:$your_char)*",
2448							}
2449							elsif ($char eq '?') {
2450	0						$pattern .= "(?:$your_char)?", # DOS style
2451							# $pattern .= "(?:$your_char)", # UNIX style
2452							}
2453							elsif ((my $fc = Char::Egreek::fc($char)) ne $char) {
2454	0						$pattern .= $fc;
2455							}
2456							else {
2457	0						$pattern .= quotemeta $char;
2458							}
2459							}
2460	0			0			my $matchsub = sub { Char::Egreek::fc($_[0]) =~ /\A $pattern \z/xms };
	0
2461
2462							# if ($@) {
2463							# print STDERR "$0: $@\n";
2464							# next OUTER;
2465							# }
2466
2467							INNER:
2468	0						for my $leaf (@leaf) {
2469	0	0	0				if ($leaf eq '.' or $leaf eq '..') {
2470	0						next INNER;
2471							}
2472	0	0	0				if ($cond eq 'd' and not -d "$head$leaf") {
2473	0						next INNER;
2474							}
2475
2476	0	0					if (&$matchsub($leaf)) {
2477	0						push @matched, "$head$leaf";
2478	0						next INNER;
2479							}
2480
2481							# [DOS compatibility special case]
2482							# Failed, add a trailing dot and try again, but only...
2483
2484	0	0	0				if (Char::Egreek::index($leaf,'.') == -1 and # if name does not have a dot in it and
			0
2485							CORE::length($leaf) <= 8 and # name is shorter than or equal to 8 chars and
2486							Char::Egreek::index($pattern,'\\.') != -1 # pattern has a dot.
2487							) {
2488	0	0					if (&$matchsub("$leaf.")) {
2489	0						push @matched, "$head$leaf";
2490	0						next INNER;
2491							}
2492							}
2493							}
2494	0	0					if (@matched) {
2495	0						push @glob, @matched;
2496							}
2497							}
2498	0	0					if ($fix_drive_relative_paths) {
2499	0						for my $glob (@glob) {
2500	0						$glob =~ s# \A ([A-Za-z]:) \./ #$1#oxms;
2501							}
2502							}
2503	0						return @glob;
2504							}
2505
2506							#
2507							# Greek parse line
2508							#
2509							sub _parse_line {
2510
2511	0			0			my($line) = @_;
2512
2513	0						$line .= ' ';
2514	0						my @piece = ();
2515	0						while ($line =~ /
2516							" ( (?: [^"] )* ) " \s+ \|
2517							( (?: [^"\s] )* ) \s+
2518							/oxmsg
2519							) {
2520	0	0					push @piece, defined($1) ? $1 : $2;
2521							}
2522	0						return @piece;
2523							}
2524
2525							#
2526							# Greek parse path
2527							#
2528							sub _parse_path {
2529
2530	0			0			my($path,$pathsep) = @_;
2531
2532	0						$path .= '/';
2533	0						my @subpath = ();
2534	0						while ($path =~ /
2535							((?: [^\/\\] )+?) [\/\\]
2536							/oxmsg
2537							) {
2538	0						push @subpath, $1;
2539							}
2540
2541	0						my $tail = pop @subpath;
2542	0						my $head = join $pathsep, @subpath;
2543	0						return $head, $tail;
2544							}
2545
2546							#
2547							# via File::HomeDir::Windows 1.00
2548							#
2549							sub my_home_MSWin32 {
2550
2551							# A lot of unix people and unix-derived tools rely on
2552							# the ability to overload HOME. We will support it too
2553							# so that they can replace raw HOME calls with File::HomeDir.
2554	0	0	0	0	0		if (exists $ENV{'HOME'} and $ENV{'HOME'}) {
		0	0
		0	0
			0
			0
2555	0						return $ENV{'HOME'};
2556							}
2557
2558							# Do we have a user profile?
2559							elsif (exists $ENV{'USERPROFILE'} and $ENV{'USERPROFILE'}) {
2560	0						return $ENV{'USERPROFILE'};
2561							}
2562
2563							# Some Windows use something like $ENV{'HOME'}
2564							elsif (exists $ENV{'HOMEDRIVE'} and exists $ENV{'HOMEPATH'} and $ENV{'HOMEDRIVE'} and $ENV{'HOMEPATH'}) {
2565	0						return join '', $ENV{'HOMEDRIVE'}, $ENV{'HOMEPATH'};
2566							}
2567
2568	0						return undef;
2569							}
2570
2571							#
2572							# via File::HomeDir::Unix 1.00
2573							#
2574							sub my_home {
2575	0			0	0		my $home;
2576
2577	0	0	0				if (exists $ENV{'HOME'} and defined $ENV{'HOME'}) {
		0	0
2578	0						$home = $ENV{'HOME'};
2579							}
2580
2581							# This is from the original code, but I'm guessing
2582							# it means "login directory" and exists on some Unixes.
2583							elsif (exists $ENV{'LOGDIR'} and $ENV{'LOGDIR'}) {
2584	0						$home = $ENV{'LOGDIR'};
2585							}
2586
2587							### More-desperate methods
2588
2589							# Light desperation on any (Unixish) platform
2590							else {
2591	0						$home = eval q{ (getpwuid($<))[7] };
2592							}
2593
2594							# On Unix in general, a non-existant home means "no home"
2595							# For example, "nobody"-like users might use /nonexistant
2596	0	0	0				if (defined $home and ! -d($home)) {
2597	0						$home = undef;
2598							}
2599	0						return $home;
2600							}
2601
2602							#
2603							# ${^PREMATCH}, $PREMATCH, $` the string preceding what was matched
2604							#
2605							sub Char::Egreek::PREMATCH {
2606	0			0	0		return $`;
2607							}
2608
2609							#
2610							# ${^MATCH}, $MATCH, $& the string that matched
2611							#
2612							sub Char::Egreek::MATCH {
2613	0			0	0		return $&;
2614							}
2615
2616							#
2617							# ${^POSTMATCH}, $POSTMATCH, $' the string following what was matched
2618							#
2619							sub Char::Egreek::POSTMATCH {
2620	0			0	0		return $';
2621							}
2622
2623							#
2624							# Greek character to order (with parameter)
2625							#
2626							sub Char::Greek::ord(;$) {
2627
2628	0	0		0	1		local $_ = shift if @_;
2629
2630	0	0					if (/\A ($q_char) /oxms) {
2631	0						my @ord = unpack 'C*', $1;
2632	0						my $ord = 0;
2633	0						while (my $o = shift @ord) {
2634	0						$ord = $ord * 0x100 + $o;
2635							}
2636	0						return $ord;
2637							}
2638							else {
2639	0						return CORE::ord $_;
2640							}
2641							}
2642
2643							#
2644							# Greek character to order (without parameter)
2645							#
2646							sub Char::Greek::ord_() {
2647
2648	0	0		0	0		if (/\A ($q_char) /oxms) {
2649	0						my @ord = unpack 'C*', $1;
2650	0						my $ord = 0;
2651	0						while (my $o = shift @ord) {
2652	0						$ord = $ord * 0x100 + $o;
2653							}
2654	0						return $ord;
2655							}
2656							else {
2657	0						return CORE::ord $_;
2658							}
2659							}
2660
2661							#
2662							# Greek reverse
2663							#
2664							sub Char::Greek::reverse(@) {
2665
2666	0	0		0	0		if (wantarray) {
2667	0						return CORE::reverse @_;
2668							}
2669							else {
2670
2671							# One of us once cornered Larry in an elevator and asked him what
2672							# problem he was solving with this, but he looked as far off into
2673							# the distance as he could in an elevator and said, "It seemed like
2674							# a good idea at the time."
2675
2676	0						return join '', CORE::reverse(join('',@_) =~ /\G ($q_char) /oxmsg);
2677							}
2678							}
2679
2680							#
2681							# Greek getc (with parameter, without parameter)
2682							#
2683							sub Char::Greek::getc(;*@) {
2684
2685	0			0	0		my($package) = caller;
2686	0	0					my $fh = @_ ? qualify_to_ref(shift,$package) : \*STDIN;
2687	0	0	0				croak 'Too many arguments for Char::Greek::getc' if @_ and not wantarray;
2688
2689	0						my @length = sort { $a <=> $b } keys %range_tr;
	0
2690	0						my $getc = '';
2691	0						for my $length ($length[0] .. $length[-1]) {
2692	0						$getc .= CORE::getc($fh);
2693	0	0					if (exists $range_tr{CORE::length($getc)}) {
2694	0	0					if ($getc =~ /\A ${Char::Egreek::dot_s} \z/oxms) {
2695	0	0					return wantarray ? ($getc,@_) : $getc;
2696							}
2697							}
2698							}
2699	0	0					return wantarray ? ($getc,@_) : $getc;
2700							}
2701
2702							#
2703							# Greek length by character
2704							#
2705							sub Char::Greek::length(;$) {
2706
2707	0	0		0	1		local $_ = shift if @_;
2708
2709	0						local @_ = /\G ($q_char) /oxmsg;
2710	0						return scalar @_;
2711							}
2712
2713							#
2714							# Greek substr by character
2715							#
2716							BEGIN {
2717
2718							# P.232 The lvalue Attribute
2719							# in Chapter 6: Subroutines
2720							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2721
2722							# P.336 The lvalue Attribute
2723							# in Chapter 7: Subroutines
2724							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2725
2726							# P.144 8.4 Lvalue subroutines
2727							# in Chapter 8: perlsub: Perl subroutines
2728							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
2729
2730	176	50	0	176	1	406278	eval sprintf(<<'END', ($] >= 5.014000) ? ':lvalue' : '');
	0	0		0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
2731							# vv----------------*******
2732							sub Char::Greek::substr($$;$$) %s {
2733
2734							my @char = $_[0] =~ /\G ($q_char) /oxmsg;
2735
2736							# If the substring is beyond either end of the string, substr() returns the undefined
2737							# value and produces a warning. When used as an lvalue, specifying a substring that
2738							# is entirely outside the string raises an exception.
2739							# http://perldoc.perl.org/functions/substr.html
2740
2741							# A return with no argument returns the scalar value undef in scalar context,
2742							# an empty list () in list context, and (naturally) nothing at all in void
2743							# context.
2744
2745							my $offset = $_[1];
2746							if (($offset > scalar(@char)) or ($offset < (-1 * scalar(@char)))) {
2747							return;
2748							}
2749
2750							# substr($string,$offset,$length,$replacement)
2751							if (@_ == 4) {
2752							my(undef,undef,$length,$replacement) = @_;
2753							my $substr = join '', splice(@char, $offset, $length, $replacement);
2754							$_[0] = join '', @char;
2755
2756							# return $substr; this doesn't work, don't say "return"
2757							$substr;
2758							}
2759
2760							# substr($string,$offset,$length)
2761							elsif (@_ == 3) {
2762							my(undef,undef,$length) = @_;
2763							my $octet_offset = 0;
2764							my $octet_length = 0;
2765							if ($offset == 0) {
2766							$octet_offset = 0;
2767							}
2768							elsif ($offset > 0) {
2769							$octet_offset = CORE::length(join '', @char[0..$offset-1]);
2770							}
2771							else {
2772							$octet_offset = -1 * CORE::length(join '', @char[$#char+$offset+1..$#char]);
2773							}
2774							if ($length == 0) {
2775							$octet_length = 0;
2776							}
2777							elsif ($length > 0) {
2778							$octet_length = CORE::length(join '', @char[$offset..$offset+$length-1]);
2779							}
2780							else {
2781							$octet_length = -1 * CORE::length(join '', @char[$#char+$length+1..$#char]);
2782							}
2783							CORE::substr($_[0], $octet_offset, $octet_length);
2784							}
2785
2786							# substr($string,$offset)
2787							else {
2788							my $octet_offset = 0;
2789							if ($offset == 0) {
2790							$octet_offset = 0;
2791							}
2792							elsif ($offset > 0) {
2793							$octet_offset = CORE::length(join '', @char[0..$offset-1]);
2794							}
2795							else {
2796							$octet_offset = -1 * CORE::length(join '', @char[$#char+$offset+1..$#char]);
2797							}
2798							CORE::substr($_[0], $octet_offset);
2799							}
2800							}
2801							END
2802							}
2803
2804							#
2805							# Greek index by character
2806							#
2807							sub Char::Greek::index($$;$) {
2808
2809	0			0	1		my $index;
2810	0	0					if (@_ == 3) {
2811	0						$index = Char::Egreek::index($_[0], $_[1], CORE::length(Char::Greek::substr($_[0], 0, $_[2])));
2812							}
2813							else {
2814	0						$index = Char::Egreek::index($_[0], $_[1]);
2815							}
2816
2817	0	0					if ($index == -1) {
2818	0						return -1;
2819							}
2820							else {
2821	0						return Char::Greek::length(CORE::substr $_[0], 0, $index);
2822							}
2823							}
2824
2825							#
2826							# Greek rindex by character
2827							#
2828							sub Char::Greek::rindex($$;$) {
2829
2830	0			0	1		my $rindex;
2831	0	0					if (@_ == 3) {
2832	0						$rindex = Char::Egreek::rindex($_[0], $_[1], CORE::length(Char::Greek::substr($_[0], 0, $_[2])));
2833							}
2834							else {
2835	0						$rindex = Char::Egreek::rindex($_[0], $_[1]);
2836							}
2837
2838	0	0					if ($rindex == -1) {
2839	0						return -1;
2840							}
2841							else {
2842	0						return Char::Greek::length(CORE::substr $_[0], 0, $rindex);
2843							}
2844							}
2845
2846							#
2847							# instead of Carp::carp
2848							#
2849							sub carp {
2850	0			0	0		my($package,$filename,$line) = caller(1);
2851	0						print STDERR "@_ at $filename line $line.\n";
2852							}
2853
2854							#
2855							# instead of Carp::croak
2856							#
2857							sub croak {
2858	0			0	0		my($package,$filename,$line) = caller(1);
2859	0						print STDERR "@_ at $filename line $line.\n";
2860	0						die "\n";
2861							}
2862
2863							#
2864							# instead of Carp::cluck
2865							#
2866							sub cluck {
2867	0			0	0		my $i = 0;
2868	0						my @cluck = ();
2869	0						while (my($package,$filename,$line,$subroutine) = caller($i)) {
2870	0						push @cluck, "[$i] $filename($line) $package::$subroutine\n";
2871	0						$i++;
2872							}
2873	0						print STDERR CORE::reverse @cluck;
2874	0						print STDERR "\n";
2875	0						carp @_;
2876							}
2877
2878							#
2879							# instead of Carp::confess
2880							#
2881							sub confess {
2882	0			0	0		my $i = 0;
2883	0						my @confess = ();
2884	0						while (my($package,$filename,$line,$subroutine) = caller($i)) {
2885	0						push @confess, "[$i] $filename($line) $package::$subroutine\n";
2886	0						$i++;
2887							}
2888	0						print STDERR CORE::reverse @confess;
2889	0						print STDERR "\n";
2890	0						croak @_;
2891							}
2892
2893							1;
2894
2895							__END__