File Coverage

blib/lib/Eutf2.pm

Criterion	Covered	Total	%
statement	83	3119	2.6
branch	4	2704	0.1
condition	1	373	0.2
subroutine	36	125	28.8
pod	7	74	9.4
total	131	6395	2.0

line	stmt	bran	cond	sub	pod	time	code
1							package Eutf2;
2							######################################################################
3							#
4							# Eutf2 - Run-time routines for UTF2.pm
5							#
6							# http://search.cpan.org/dist/Char-UTF2/
7							#
8							# Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015 INABA Hitoshi
9							######################################################################
10
11	302			302		6438	use 5.00503; # Galapagos Consensus 1998 for primetools
	302					908
	302					15887
12							# use 5.008001; # Lancaster Consensus 2013 for toolchains
13
14							# 12.3. Delaying use Until Runtime
15							# in Chapter 12. Packages, Libraries, and Modules
16							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
17							# (and so on)
18
19							# Version numbers should be boring
20							# http://www.dagolden.com/index.php/369/version-numbers-should-be-boring/
21							# For the impatient, the disinterested or those who just want to follow
22							# a recipe, my advice for all modules is this:
23							# our $VERSION = "0.001"; # or "0.001_001" for a dev release
24							# $VERSION = eval $VERSION;
25
26	302			302		20780	BEGIN { CORE::eval q{ use vars qw($VERSION) } }
	302			302		1605
	302					474
	302					44273
27							$VERSION = '1.05';
28							$VERSION = CORE::eval $VERSION;
29
30							BEGIN {
31	302	50		302		2160	if ($^X =~ / jperl /oxmsi) {
32	0					0	die __FILE__, ": needs perl(not jperl) 5.00503 or later. (\$^X==$^X)\n";
33							}
34	302					422	if (CORE::ord('A') == 193) {
35							die __FILE__, ": is not US-ASCII script (may be EBCDIC or EBCDIK script).\n";
36							}
37	302					38780	if (CORE::ord('A') != 0x41) {
38							die __FILE__, ": is not US-ASCII script (must be US-ASCII script).\n";
39							}
40							}
41
42							BEGIN {
43
44							# instead of utf8.pm
45	302			302		19091	CORE::eval q{
	302			302		1633
	302			118		587
	302					37253
	118					19286
	97					15997
	95					15074
	99					14751
	90					14269
	105					17819
46							no warnings qw(redefine);
47							*utf8::upgrade = sub { CORE::length $_[0] };
48							*utf8::downgrade = sub { 1 };
49							*utf8::encode = sub { };
50							*utf8::decode = sub { 1 };
51							*utf8::is_utf8 = sub { };
52							*utf8::valid = sub { 1 };
53							};
54	302	50				158603	if ($@) {
55	0					0	*utf8::upgrade = sub { CORE::length $_[0] };
	0					0
56	0					0	*utf8::downgrade = sub { 1 };
	0					0
57	0					0	*utf8::encode = sub { };
	0					0
58	0					0	*utf8::decode = sub { 1 };
	0					0
59	0					0	*utf8::is_utf8 = sub { };
	0					0
60	0					0	*utf8::valid = sub { 1 };
	0					0
61							}
62							}
63
64							# instead of Symbol.pm
65							BEGIN {
66	302			302		691	my $genpkg = "Symbol::";
67	302					12811	my $genseq = 0;
68
69							sub gensym () {
70	0			0	0	0	my $name = "GEN" . $genseq++;
71
72							# here, no strict qw(refs); if strict.pm exists
73
74	0					0	my $ref = \*{$genpkg . $name};
	0					0
75	0					0	delete $$genpkg{$name};
76	0					0	return $ref;
77							}
78
79							sub qualify ($;$) {
80	0			0	0	0	my ($name) = @_;
81	0	0	0			0	if (!ref($name) && (Eutf2::index($name, '::') == -1) && (Eutf2::index($name, "'") == -1)) {
			0
82	0					0	my $pkg;
83	0					0	my %global = map {$_ => 1} qw(ARGV ARGVOUT ENV INC SIG STDERR STDIN STDOUT DATA);
	0					0
84
85							# Global names: special character, "^xyz", or other.
86	0	0	0			0	if ($name =~ /^(([^\x80-\xFFa-z])\|(\^[a-z_]+))\z/i \|\| $global{$name}) {
87							# RGS 2001-11-05 : translate leading ^X to control-char
88	0					0	$name =~ s/^\^([a-z_])/'qq(\c'.$1.')'/eei;
	0					0
89	0					0	$pkg = "main";
90							}
91							else {
92	0	0				0	$pkg = (@_ > 1) ? $_[1] : caller;
93							}
94	0					0	$name = $pkg . "::" . $name;
95							}
96	0					0	return $name;
97							}
98
99							sub qualify_to_ref ($;$) {
100
101							# here, no strict qw(refs); if strict.pm exists
102
103	0	0		0	0	0	return \*{ qualify $_[0], @_ > 1 ? $_[1] : caller };
	0					0
104							}
105							}
106
107							# Column: local $@
108							# in Chapter 9. Osaete okitai Perl no kiso
109							# of ISBN 10: 4798119172 \| ISBN 13: 978-4798119175 MODAN Perl NYUMON
110							# (and so on)
111
112							# use strict; if strict.pm exists
113							BEGIN {
114	302	50		302		532	if (CORE::eval { local $@; CORE::require strict }) {
	302					483
	302					2966
115	302					34363	strict::->import;
116							}
117							}
118
119							# P.714 29.2.39. flock
120							# in Chapter 29: Functions
121							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
122
123							# P.863 flock
124							# in Chapter 27: Functions
125							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
126
127							sub LOCK_SH() {1}
128							sub LOCK_EX() {2}
129							sub LOCK_UN() {8}
130							sub LOCK_NB() {4}
131
132							# instead of Carp.pm
133							sub carp;
134							sub croak;
135							sub cluck;
136							sub confess;
137
138							# 6.18. Matching Multiple-Byte Characters
139							# in Chapter 6. Pattern Matching
140							# of ISBN 978-1-56592-243-3 Perl Perl Cookbook.
141							# (and so on)
142
143							# regexp of character
144	302			302		19521	BEGIN { CORE::eval q{ use vars qw($your_char) } } $your_char = q{(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF]\|[\x00-\x7F\xF5-\xFF]};
	302			302		1577
	302					447
	302					17977
145	302			302		18048	BEGIN { CORE::eval q{ use vars qw($qq_char ) } } $qq_char = qr/\\c[\x40-\x5F]\|\\?(?:$your_char)/oxms;
	302			302		1390
	302					423
	302					17073
146	302			302		16339	BEGIN { CORE::eval q{ use vars qw($q_char ) } } $q_char = qr/$your_char/oxms;
	302			302		1373
	302					518
	302					19849
147
148							#
149							# UTF-8 character range per length
150							#
151							my %range_tr = ();
152
153							#
154							# alias of encoding name
155							#
156	302			302		17685	BEGIN { CORE::eval q{ use vars qw($encoding_alias) } }
	302			302		1439
	302					451
	302					3015864
157
158							#
159							# UTF-8 case conversion
160							#
161							my %lc = ();
162							@lc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
163							qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
164							my %uc = ();
165							@uc{qw(a b c d e f g h i j k l m n o p q r s t u v w x y z)} =
166							qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z);
167							my %fc = ();
168							@fc{qw(A B C D E F G H I J K L M N O P Q R S T U V W X Y Z)} =
169							qw(a b c d e f g h i j k l m n o p q r s t u v w x y z);
170
171							if (0) {
172							}
173
174							elsif (__PACKAGE__ =~ / \b Eutf2 \z/oxms) {
175							%range_tr = (
176							1 => [ [0x00..0x7F],
177							[0xF5..0xFF], # malformed octet
178							],
179							2 => [ [0xC2..0xDF],[0x80..0xBF],
180							],
181							3 => [ [0xE0..0xE0],[0xA0..0xBF],[0x80..0xBF],
182							[0xE1..0xEC],[0x80..0xBF],[0x80..0xBF],
183							[0xED..0xED],[0x80..0x9F],[0x80..0xBF],
184							[0xEE..0xEF],[0x80..0xBF],[0x80..0xBF],
185							],
186							4 => [ [0xF0..0xF0],[0x90..0xBF],[0x80..0xBF],[0x80..0xBF],
187							[0xF1..0xF3],[0x80..0xBF],[0x80..0xBF],[0x80..0xBF],
188							[0xF4..0xF4],[0x80..0x8F],[0x80..0xBF],[0x80..0xBF],
189							],
190							);
191							$encoding_alias = qr/ \b (?: utf-8 \| utf-8-strict \| utf-?2 ) \b /oxmsi;
192
193							# CaseFolding-7.0.0.txt
194							# Date: 2014-04-09, 20:00:56 GMT [MD]
195							#
196							# Unicode Character Database
197							# Copyright (c) 1991-2014 Unicode, Inc.
198							# For terms of use, see http://www.unicode.org/terms_of_use.html
199							# For documentation, see http://www.unicode.org/reports/tr44/
200
201							# you can use "make_CaseFolding.pl" to update this hash
202
203							%fc = (
204							"\x41" => "\x61", # LATIN CAPITAL LETTER A
205							"\x42" => "\x62", # LATIN CAPITAL LETTER B
206							"\x43" => "\x63", # LATIN CAPITAL LETTER C
207							"\x44" => "\x64", # LATIN CAPITAL LETTER D
208							"\x45" => "\x65", # LATIN CAPITAL LETTER E
209							"\x46" => "\x66", # LATIN CAPITAL LETTER F
210							"\x47" => "\x67", # LATIN CAPITAL LETTER G
211							"\x48" => "\x68", # LATIN CAPITAL LETTER H
212							"\x49" => "\x69", # LATIN CAPITAL LETTER I
213							"\x4A" => "\x6A", # LATIN CAPITAL LETTER J
214							"\x4B" => "\x6B", # LATIN CAPITAL LETTER K
215							"\x4C" => "\x6C", # LATIN CAPITAL LETTER L
216							"\x4D" => "\x6D", # LATIN CAPITAL LETTER M
217							"\x4E" => "\x6E", # LATIN CAPITAL LETTER N
218							"\x4F" => "\x6F", # LATIN CAPITAL LETTER O
219							"\x50" => "\x70", # LATIN CAPITAL LETTER P
220							"\x51" => "\x71", # LATIN CAPITAL LETTER Q
221							"\x52" => "\x72", # LATIN CAPITAL LETTER R
222							"\x53" => "\x73", # LATIN CAPITAL LETTER S
223							"\x54" => "\x74", # LATIN CAPITAL LETTER T
224							"\x55" => "\x75", # LATIN CAPITAL LETTER U
225							"\x56" => "\x76", # LATIN CAPITAL LETTER V
226							"\x57" => "\x77", # LATIN CAPITAL LETTER W
227							"\x58" => "\x78", # LATIN CAPITAL LETTER X
228							"\x59" => "\x79", # LATIN CAPITAL LETTER Y
229							"\x5A" => "\x7A", # LATIN CAPITAL LETTER Z
230							"\xC2\xB5" => "\xCE\xBC", # MICRO SIGN
231							"\xC3\x80" => "\xC3\xA0", # LATIN CAPITAL LETTER A WITH GRAVE
232							"\xC3\x81" => "\xC3\xA1", # LATIN CAPITAL LETTER A WITH ACUTE
233							"\xC3\x82" => "\xC3\xA2", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
234							"\xC3\x83" => "\xC3\xA3", # LATIN CAPITAL LETTER A WITH TILDE
235							"\xC3\x84" => "\xC3\xA4", # LATIN CAPITAL LETTER A WITH DIAERESIS
236							"\xC3\x85" => "\xC3\xA5", # LATIN CAPITAL LETTER A WITH RING ABOVE
237							"\xC3\x86" => "\xC3\xA6", # LATIN CAPITAL LETTER AE
238							"\xC3\x87" => "\xC3\xA7", # LATIN CAPITAL LETTER C WITH CEDILLA
239							"\xC3\x88" => "\xC3\xA8", # LATIN CAPITAL LETTER E WITH GRAVE
240							"\xC3\x89" => "\xC3\xA9", # LATIN CAPITAL LETTER E WITH ACUTE
241							"\xC3\x8A" => "\xC3\xAA", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
242							"\xC3\x8B" => "\xC3\xAB", # LATIN CAPITAL LETTER E WITH DIAERESIS
243							"\xC3\x8C" => "\xC3\xAC", # LATIN CAPITAL LETTER I WITH GRAVE
244							"\xC3\x8D" => "\xC3\xAD", # LATIN CAPITAL LETTER I WITH ACUTE
245							"\xC3\x8E" => "\xC3\xAE", # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
246							"\xC3\x8F" => "\xC3\xAF", # LATIN CAPITAL LETTER I WITH DIAERESIS
247							"\xC3\x90" => "\xC3\xB0", # LATIN CAPITAL LETTER ETH
248							"\xC3\x91" => "\xC3\xB1", # LATIN CAPITAL LETTER N WITH TILDE
249							"\xC3\x92" => "\xC3\xB2", # LATIN CAPITAL LETTER O WITH GRAVE
250							"\xC3\x93" => "\xC3\xB3", # LATIN CAPITAL LETTER O WITH ACUTE
251							"\xC3\x94" => "\xC3\xB4", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
252							"\xC3\x95" => "\xC3\xB5", # LATIN CAPITAL LETTER O WITH TILDE
253							"\xC3\x96" => "\xC3\xB6", # LATIN CAPITAL LETTER O WITH DIAERESIS
254							"\xC3\x98" => "\xC3\xB8", # LATIN CAPITAL LETTER O WITH STROKE
255							"\xC3\x99" => "\xC3\xB9", # LATIN CAPITAL LETTER U WITH GRAVE
256							"\xC3\x9A" => "\xC3\xBA", # LATIN CAPITAL LETTER U WITH ACUTE
257							"\xC3\x9B" => "\xC3\xBB", # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
258							"\xC3\x9C" => "\xC3\xBC", # LATIN CAPITAL LETTER U WITH DIAERESIS
259							"\xC3\x9D" => "\xC3\xBD", # LATIN CAPITAL LETTER Y WITH ACUTE
260							"\xC3\x9E" => "\xC3\xBE", # LATIN CAPITAL LETTER THORN
261							"\xC3\x9F" => "\x73\x73", # LATIN SMALL LETTER SHARP S
262							"\xC4\x80" => "\xC4\x81", # LATIN CAPITAL LETTER A WITH MACRON
263							"\xC4\x82" => "\xC4\x83", # LATIN CAPITAL LETTER A WITH BREVE
264							"\xC4\x84" => "\xC4\x85", # LATIN CAPITAL LETTER A WITH OGONEK
265							"\xC4\x86" => "\xC4\x87", # LATIN CAPITAL LETTER C WITH ACUTE
266							"\xC4\x88" => "\xC4\x89", # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
267							"\xC4\x8A" => "\xC4\x8B", # LATIN CAPITAL LETTER C WITH DOT ABOVE
268							"\xC4\x8C" => "\xC4\x8D", # LATIN CAPITAL LETTER C WITH CARON
269							"\xC4\x8E" => "\xC4\x8F", # LATIN CAPITAL LETTER D WITH CARON
270							"\xC4\x90" => "\xC4\x91", # LATIN CAPITAL LETTER D WITH STROKE
271							"\xC4\x92" => "\xC4\x93", # LATIN CAPITAL LETTER E WITH MACRON
272							"\xC4\x94" => "\xC4\x95", # LATIN CAPITAL LETTER E WITH BREVE
273							"\xC4\x96" => "\xC4\x97", # LATIN CAPITAL LETTER E WITH DOT ABOVE
274							"\xC4\x98" => "\xC4\x99", # LATIN CAPITAL LETTER E WITH OGONEK
275							"\xC4\x9A" => "\xC4\x9B", # LATIN CAPITAL LETTER E WITH CARON
276							"\xC4\x9C" => "\xC4\x9D", # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
277							"\xC4\x9E" => "\xC4\x9F", # LATIN CAPITAL LETTER G WITH BREVE
278							"\xC4\xA0" => "\xC4\xA1", # LATIN CAPITAL LETTER G WITH DOT ABOVE
279							"\xC4\xA2" => "\xC4\xA3", # LATIN CAPITAL LETTER G WITH CEDILLA
280							"\xC4\xA4" => "\xC4\xA5", # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
281							"\xC4\xA6" => "\xC4\xA7", # LATIN CAPITAL LETTER H WITH STROKE
282							"\xC4\xA8" => "\xC4\xA9", # LATIN CAPITAL LETTER I WITH TILDE
283							"\xC4\xAA" => "\xC4\xAB", # LATIN CAPITAL LETTER I WITH MACRON
284							"\xC4\xAC" => "\xC4\xAD", # LATIN CAPITAL LETTER I WITH BREVE
285							"\xC4\xAE" => "\xC4\xAF", # LATIN CAPITAL LETTER I WITH OGONEK
286							"\xC4\xB0" => "\x69\xCC\x87", # LATIN CAPITAL LETTER I WITH DOT ABOVE
287							"\xC4\xB2" => "\xC4\xB3", # LATIN CAPITAL LIGATURE IJ
288							"\xC4\xB4" => "\xC4\xB5", # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
289							"\xC4\xB6" => "\xC4\xB7", # LATIN CAPITAL LETTER K WITH CEDILLA
290							"\xC4\xB9" => "\xC4\xBA", # LATIN CAPITAL LETTER L WITH ACUTE
291							"\xC4\xBB" => "\xC4\xBC", # LATIN CAPITAL LETTER L WITH CEDILLA
292							"\xC4\xBD" => "\xC4\xBE", # LATIN CAPITAL LETTER L WITH CARON
293							"\xC4\xBF" => "\xC5\x80", # LATIN CAPITAL LETTER L WITH MIDDLE DOT
294							"\xC5\x81" => "\xC5\x82", # LATIN CAPITAL LETTER L WITH STROKE
295							"\xC5\x83" => "\xC5\x84", # LATIN CAPITAL LETTER N WITH ACUTE
296							"\xC5\x85" => "\xC5\x86", # LATIN CAPITAL LETTER N WITH CEDILLA
297							"\xC5\x87" => "\xC5\x88", # LATIN CAPITAL LETTER N WITH CARON
298							"\xC5\x89" => "\xCA\xBC\x6E", # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
299							"\xC5\x8A" => "\xC5\x8B", # LATIN CAPITAL LETTER ENG
300							"\xC5\x8C" => "\xC5\x8D", # LATIN CAPITAL LETTER O WITH MACRON
301							"\xC5\x8E" => "\xC5\x8F", # LATIN CAPITAL LETTER O WITH BREVE
302							"\xC5\x90" => "\xC5\x91", # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
303							"\xC5\x92" => "\xC5\x93", # LATIN CAPITAL LIGATURE OE
304							"\xC5\x94" => "\xC5\x95", # LATIN CAPITAL LETTER R WITH ACUTE
305							"\xC5\x96" => "\xC5\x97", # LATIN CAPITAL LETTER R WITH CEDILLA
306							"\xC5\x98" => "\xC5\x99", # LATIN CAPITAL LETTER R WITH CARON
307							"\xC5\x9A" => "\xC5\x9B", # LATIN CAPITAL LETTER S WITH ACUTE
308							"\xC5\x9C" => "\xC5\x9D", # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
309							"\xC5\x9E" => "\xC5\x9F", # LATIN CAPITAL LETTER S WITH CEDILLA
310							"\xC5\xA0" => "\xC5\xA1", # LATIN CAPITAL LETTER S WITH CARON
311							"\xC5\xA2" => "\xC5\xA3", # LATIN CAPITAL LETTER T WITH CEDILLA
312							"\xC5\xA4" => "\xC5\xA5", # LATIN CAPITAL LETTER T WITH CARON
313							"\xC5\xA6" => "\xC5\xA7", # LATIN CAPITAL LETTER T WITH STROKE
314							"\xC5\xA8" => "\xC5\xA9", # LATIN CAPITAL LETTER U WITH TILDE
315							"\xC5\xAA" => "\xC5\xAB", # LATIN CAPITAL LETTER U WITH MACRON
316							"\xC5\xAC" => "\xC5\xAD", # LATIN CAPITAL LETTER U WITH BREVE
317							"\xC5\xAE" => "\xC5\xAF", # LATIN CAPITAL LETTER U WITH RING ABOVE
318							"\xC5\xB0" => "\xC5\xB1", # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
319							"\xC5\xB2" => "\xC5\xB3", # LATIN CAPITAL LETTER U WITH OGONEK
320							"\xC5\xB4" => "\xC5\xB5", # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
321							"\xC5\xB6" => "\xC5\xB7", # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
322							"\xC5\xB8" => "\xC3\xBF", # LATIN CAPITAL LETTER Y WITH DIAERESIS
323							"\xC5\xB9" => "\xC5\xBA", # LATIN CAPITAL LETTER Z WITH ACUTE
324							"\xC5\xBB" => "\xC5\xBC", # LATIN CAPITAL LETTER Z WITH DOT ABOVE
325							"\xC5\xBD" => "\xC5\xBE", # LATIN CAPITAL LETTER Z WITH CARON
326							"\xC5\xBF" => "\x73", # LATIN SMALL LETTER LONG S
327							"\xC6\x81" => "\xC9\x93", # LATIN CAPITAL LETTER B WITH HOOK
328							"\xC6\x82" => "\xC6\x83", # LATIN CAPITAL LETTER B WITH TOPBAR
329							"\xC6\x84" => "\xC6\x85", # LATIN CAPITAL LETTER TONE SIX
330							"\xC6\x86" => "\xC9\x94", # LATIN CAPITAL LETTER OPEN O
331							"\xC6\x87" => "\xC6\x88", # LATIN CAPITAL LETTER C WITH HOOK
332							"\xC6\x89" => "\xC9\x96", # LATIN CAPITAL LETTER AFRICAN D
333							"\xC6\x8A" => "\xC9\x97", # LATIN CAPITAL LETTER D WITH HOOK
334							"\xC6\x8B" => "\xC6\x8C", # LATIN CAPITAL LETTER D WITH TOPBAR
335							"\xC6\x8E" => "\xC7\x9D", # LATIN CAPITAL LETTER REVERSED E
336							"\xC6\x8F" => "\xC9\x99", # LATIN CAPITAL LETTER SCHWA
337							"\xC6\x90" => "\xC9\x9B", # LATIN CAPITAL LETTER OPEN E
338							"\xC6\x91" => "\xC6\x92", # LATIN CAPITAL LETTER F WITH HOOK
339							"\xC6\x93" => "\xC9\xA0", # LATIN CAPITAL LETTER G WITH HOOK
340							"\xC6\x94" => "\xC9\xA3", # LATIN CAPITAL LETTER GAMMA
341							"\xC6\x96" => "\xC9\xA9", # LATIN CAPITAL LETTER IOTA
342							"\xC6\x97" => "\xC9\xA8", # LATIN CAPITAL LETTER I WITH STROKE
343							"\xC6\x98" => "\xC6\x99", # LATIN CAPITAL LETTER K WITH HOOK
344							"\xC6\x9C" => "\xC9\xAF", # LATIN CAPITAL LETTER TURNED M
345							"\xC6\x9D" => "\xC9\xB2", # LATIN CAPITAL LETTER N WITH LEFT HOOK
346							"\xC6\x9F" => "\xC9\xB5", # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
347							"\xC6\xA0" => "\xC6\xA1", # LATIN CAPITAL LETTER O WITH HORN
348							"\xC6\xA2" => "\xC6\xA3", # LATIN CAPITAL LETTER OI
349							"\xC6\xA4" => "\xC6\xA5", # LATIN CAPITAL LETTER P WITH HOOK
350							"\xC6\xA6" => "\xCA\x80", # LATIN LETTER YR
351							"\xC6\xA7" => "\xC6\xA8", # LATIN CAPITAL LETTER TONE TWO
352							"\xC6\xA9" => "\xCA\x83", # LATIN CAPITAL LETTER ESH
353							"\xC6\xAC" => "\xC6\xAD", # LATIN CAPITAL LETTER T WITH HOOK
354							"\xC6\xAE" => "\xCA\x88", # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
355							"\xC6\xAF" => "\xC6\xB0", # LATIN CAPITAL LETTER U WITH HORN
356							"\xC6\xB1" => "\xCA\x8A", # LATIN CAPITAL LETTER UPSILON
357							"\xC6\xB2" => "\xCA\x8B", # LATIN CAPITAL LETTER V WITH HOOK
358							"\xC6\xB3" => "\xC6\xB4", # LATIN CAPITAL LETTER Y WITH HOOK
359							"\xC6\xB5" => "\xC6\xB6", # LATIN CAPITAL LETTER Z WITH STROKE
360							"\xC6\xB7" => "\xCA\x92", # LATIN CAPITAL LETTER EZH
361							"\xC6\xB8" => "\xC6\xB9", # LATIN CAPITAL LETTER EZH REVERSED
362							"\xC6\xBC" => "\xC6\xBD", # LATIN CAPITAL LETTER TONE FIVE
363							"\xC7\x84" => "\xC7\x86", # LATIN CAPITAL LETTER DZ WITH CARON
364							"\xC7\x85" => "\xC7\x86", # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
365							"\xC7\x87" => "\xC7\x89", # LATIN CAPITAL LETTER LJ
366							"\xC7\x88" => "\xC7\x89", # LATIN CAPITAL LETTER L WITH SMALL LETTER J
367							"\xC7\x8A" => "\xC7\x8C", # LATIN CAPITAL LETTER NJ
368							"\xC7\x8B" => "\xC7\x8C", # LATIN CAPITAL LETTER N WITH SMALL LETTER J
369							"\xC7\x8D" => "\xC7\x8E", # LATIN CAPITAL LETTER A WITH CARON
370							"\xC7\x8F" => "\xC7\x90", # LATIN CAPITAL LETTER I WITH CARON
371							"\xC7\x91" => "\xC7\x92", # LATIN CAPITAL LETTER O WITH CARON
372							"\xC7\x93" => "\xC7\x94", # LATIN CAPITAL LETTER U WITH CARON
373							"\xC7\x95" => "\xC7\x96", # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
374							"\xC7\x97" => "\xC7\x98", # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
375							"\xC7\x99" => "\xC7\x9A", # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
376							"\xC7\x9B" => "\xC7\x9C", # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
377							"\xC7\x9E" => "\xC7\x9F", # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
378							"\xC7\xA0" => "\xC7\xA1", # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
379							"\xC7\xA2" => "\xC7\xA3", # LATIN CAPITAL LETTER AE WITH MACRON
380							"\xC7\xA4" => "\xC7\xA5", # LATIN CAPITAL LETTER G WITH STROKE
381							"\xC7\xA6" => "\xC7\xA7", # LATIN CAPITAL LETTER G WITH CARON
382							"\xC7\xA8" => "\xC7\xA9", # LATIN CAPITAL LETTER K WITH CARON
383							"\xC7\xAA" => "\xC7\xAB", # LATIN CAPITAL LETTER O WITH OGONEK
384							"\xC7\xAC" => "\xC7\xAD", # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
385							"\xC7\xAE" => "\xC7\xAF", # LATIN CAPITAL LETTER EZH WITH CARON
386							"\xC7\xB0" => "\x6A\xCC\x8C", # LATIN SMALL LETTER J WITH CARON
387							"\xC7\xB1" => "\xC7\xB3", # LATIN CAPITAL LETTER DZ
388							"\xC7\xB2" => "\xC7\xB3", # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
389							"\xC7\xB4" => "\xC7\xB5", # LATIN CAPITAL LETTER G WITH ACUTE
390							"\xC7\xB6" => "\xC6\x95", # LATIN CAPITAL LETTER HWAIR
391							"\xC7\xB7" => "\xC6\xBF", # LATIN CAPITAL LETTER WYNN
392							"\xC7\xB8" => "\xC7\xB9", # LATIN CAPITAL LETTER N WITH GRAVE
393							"\xC7\xBA" => "\xC7\xBB", # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
394							"\xC7\xBC" => "\xC7\xBD", # LATIN CAPITAL LETTER AE WITH ACUTE
395							"\xC7\xBE" => "\xC7\xBF", # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
396							"\xC8\x80" => "\xC8\x81", # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
397							"\xC8\x82" => "\xC8\x83", # LATIN CAPITAL LETTER A WITH INVERTED BREVE
398							"\xC8\x84" => "\xC8\x85", # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
399							"\xC8\x86" => "\xC8\x87", # LATIN CAPITAL LETTER E WITH INVERTED BREVE
400							"\xC8\x88" => "\xC8\x89", # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
401							"\xC8\x8A" => "\xC8\x8B", # LATIN CAPITAL LETTER I WITH INVERTED BREVE
402							"\xC8\x8C" => "\xC8\x8D", # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
403							"\xC8\x8E" => "\xC8\x8F", # LATIN CAPITAL LETTER O WITH INVERTED BREVE
404							"\xC8\x90" => "\xC8\x91", # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
405							"\xC8\x92" => "\xC8\x93", # LATIN CAPITAL LETTER R WITH INVERTED BREVE
406							"\xC8\x94" => "\xC8\x95", # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
407							"\xC8\x96" => "\xC8\x97", # LATIN CAPITAL LETTER U WITH INVERTED BREVE
408							"\xC8\x98" => "\xC8\x99", # LATIN CAPITAL LETTER S WITH COMMA BELOW
409							"\xC8\x9A" => "\xC8\x9B", # LATIN CAPITAL LETTER T WITH COMMA BELOW
410							"\xC8\x9C" => "\xC8\x9D", # LATIN CAPITAL LETTER YOGH
411							"\xC8\x9E" => "\xC8\x9F", # LATIN CAPITAL LETTER H WITH CARON
412							"\xC8\xA0" => "\xC6\x9E", # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
413							"\xC8\xA2" => "\xC8\xA3", # LATIN CAPITAL LETTER OU
414							"\xC8\xA4" => "\xC8\xA5", # LATIN CAPITAL LETTER Z WITH HOOK
415							"\xC8\xA6" => "\xC8\xA7", # LATIN CAPITAL LETTER A WITH DOT ABOVE
416							"\xC8\xA8" => "\xC8\xA9", # LATIN CAPITAL LETTER E WITH CEDILLA
417							"\xC8\xAA" => "\xC8\xAB", # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
418							"\xC8\xAC" => "\xC8\xAD", # LATIN CAPITAL LETTER O WITH TILDE AND MACRON
419							"\xC8\xAE" => "\xC8\xAF", # LATIN CAPITAL LETTER O WITH DOT ABOVE
420							"\xC8\xB0" => "\xC8\xB1", # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
421							"\xC8\xB2" => "\xC8\xB3", # LATIN CAPITAL LETTER Y WITH MACRON
422							"\xC8\xBA" => "\xE2\xB1\xA5", # LATIN CAPITAL LETTER A WITH STROKE
423							"\xC8\xBB" => "\xC8\xBC", # LATIN CAPITAL LETTER C WITH STROKE
424							"\xC8\xBD" => "\xC6\x9A", # LATIN CAPITAL LETTER L WITH BAR
425							"\xC8\xBE" => "\xE2\xB1\xA6", # LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
426							"\xC9\x81" => "\xC9\x82", # LATIN CAPITAL LETTER GLOTTAL STOP
427							"\xC9\x83" => "\xC6\x80", # LATIN CAPITAL LETTER B WITH STROKE
428							"\xC9\x84" => "\xCA\x89", # LATIN CAPITAL LETTER U BAR
429							"\xC9\x85" => "\xCA\x8C", # LATIN CAPITAL LETTER TURNED V
430							"\xC9\x86" => "\xC9\x87", # LATIN CAPITAL LETTER E WITH STROKE
431							"\xC9\x88" => "\xC9\x89", # LATIN CAPITAL LETTER J WITH STROKE
432							"\xC9\x8A" => "\xC9\x8B", # LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
433							"\xC9\x8C" => "\xC9\x8D", # LATIN CAPITAL LETTER R WITH STROKE
434							"\xC9\x8E" => "\xC9\x8F", # LATIN CAPITAL LETTER Y WITH STROKE
435							"\xCD\x85" => "\xCE\xB9", # COMBINING GREEK YPOGEGRAMMENI
436							"\xCD\xB0" => "\xCD\xB1", # GREEK CAPITAL LETTER HETA
437							"\xCD\xB2" => "\xCD\xB3", # GREEK CAPITAL LETTER ARCHAIC SAMPI
438							"\xCD\xB6" => "\xCD\xB7", # GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
439							"\xCD\xBF" => "\xCF\xB3", # GREEK CAPITAL LETTER YOT
440							"\xCE\x86" => "\xCE\xAC", # GREEK CAPITAL LETTER ALPHA WITH TONOS
441							"\xCE\x88" => "\xCE\xAD", # GREEK CAPITAL LETTER EPSILON WITH TONOS
442							"\xCE\x89" => "\xCE\xAE", # GREEK CAPITAL LETTER ETA WITH TONOS
443							"\xCE\x8A" => "\xCE\xAF", # GREEK CAPITAL LETTER IOTA WITH TONOS
444							"\xCE\x8C" => "\xCF\x8C", # GREEK CAPITAL LETTER OMICRON WITH TONOS
445							"\xCE\x8E" => "\xCF\x8D", # GREEK CAPITAL LETTER UPSILON WITH TONOS
446							"\xCE\x8F" => "\xCF\x8E", # GREEK CAPITAL LETTER OMEGA WITH TONOS
447							"\xCE\x90" => "\xCE\xB9\xCC\x88\xCC\x81", # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
448							"\xCE\x91" => "\xCE\xB1", # GREEK CAPITAL LETTER ALPHA
449							"\xCE\x92" => "\xCE\xB2", # GREEK CAPITAL LETTER BETA
450							"\xCE\x93" => "\xCE\xB3", # GREEK CAPITAL LETTER GAMMA
451							"\xCE\x94" => "\xCE\xB4", # GREEK CAPITAL LETTER DELTA
452							"\xCE\x95" => "\xCE\xB5", # GREEK CAPITAL LETTER EPSILON
453							"\xCE\x96" => "\xCE\xB6", # GREEK CAPITAL LETTER ZETA
454							"\xCE\x97" => "\xCE\xB7", # GREEK CAPITAL LETTER ETA
455							"\xCE\x98" => "\xCE\xB8", # GREEK CAPITAL LETTER THETA
456							"\xCE\x99" => "\xCE\xB9", # GREEK CAPITAL LETTER IOTA
457							"\xCE\x9A" => "\xCE\xBA", # GREEK CAPITAL LETTER KAPPA
458							"\xCE\x9B" => "\xCE\xBB", # GREEK CAPITAL LETTER LAMDA
459							"\xCE\x9C" => "\xCE\xBC", # GREEK CAPITAL LETTER MU
460							"\xCE\x9D" => "\xCE\xBD", # GREEK CAPITAL LETTER NU
461							"\xCE\x9E" => "\xCE\xBE", # GREEK CAPITAL LETTER XI
462							"\xCE\x9F" => "\xCE\xBF", # GREEK CAPITAL LETTER OMICRON
463							"\xCE\xA0" => "\xCF\x80", # GREEK CAPITAL LETTER PI
464							"\xCE\xA1" => "\xCF\x81", # GREEK CAPITAL LETTER RHO
465							"\xCE\xA3" => "\xCF\x83", # GREEK CAPITAL LETTER SIGMA
466							"\xCE\xA4" => "\xCF\x84", # GREEK CAPITAL LETTER TAU
467							"\xCE\xA5" => "\xCF\x85", # GREEK CAPITAL LETTER UPSILON
468							"\xCE\xA6" => "\xCF\x86", # GREEK CAPITAL LETTER PHI
469							"\xCE\xA7" => "\xCF\x87", # GREEK CAPITAL LETTER CHI
470							"\xCE\xA8" => "\xCF\x88", # GREEK CAPITAL LETTER PSI
471							"\xCE\xA9" => "\xCF\x89", # GREEK CAPITAL LETTER OMEGA
472							"\xCE\xAA" => "\xCF\x8A", # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
473							"\xCE\xAB" => "\xCF\x8B", # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
474							"\xCE\xB0" => "\xCF\x85\xCC\x88\xCC\x81", # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
475							"\xCF\x82" => "\xCF\x83", # GREEK SMALL LETTER FINAL SIGMA
476							"\xCF\x8F" => "\xCF\x97", # GREEK CAPITAL KAI SYMBOL
477							"\xCF\x90" => "\xCE\xB2", # GREEK BETA SYMBOL
478							"\xCF\x91" => "\xCE\xB8", # GREEK THETA SYMBOL
479							"\xCF\x95" => "\xCF\x86", # GREEK PHI SYMBOL
480							"\xCF\x96" => "\xCF\x80", # GREEK PI SYMBOL
481							"\xCF\x98" => "\xCF\x99", # GREEK LETTER ARCHAIC KOPPA
482							"\xCF\x9A" => "\xCF\x9B", # GREEK LETTER STIGMA
483							"\xCF\x9C" => "\xCF\x9D", # GREEK LETTER DIGAMMA
484							"\xCF\x9E" => "\xCF\x9F", # GREEK LETTER KOPPA
485							"\xCF\xA0" => "\xCF\xA1", # GREEK LETTER SAMPI
486							"\xCF\xA2" => "\xCF\xA3", # COPTIC CAPITAL LETTER SHEI
487							"\xCF\xA4" => "\xCF\xA5", # COPTIC CAPITAL LETTER FEI
488							"\xCF\xA6" => "\xCF\xA7", # COPTIC CAPITAL LETTER KHEI
489							"\xCF\xA8" => "\xCF\xA9", # COPTIC CAPITAL LETTER HORI
490							"\xCF\xAA" => "\xCF\xAB", # COPTIC CAPITAL LETTER GANGIA
491							"\xCF\xAC" => "\xCF\xAD", # COPTIC CAPITAL LETTER SHIMA
492							"\xCF\xAE" => "\xCF\xAF", # COPTIC CAPITAL LETTER DEI
493							"\xCF\xB0" => "\xCE\xBA", # GREEK KAPPA SYMBOL
494							"\xCF\xB1" => "\xCF\x81", # GREEK RHO SYMBOL
495							"\xCF\xB4" => "\xCE\xB8", # GREEK CAPITAL THETA SYMBOL
496							"\xCF\xB5" => "\xCE\xB5", # GREEK LUNATE EPSILON SYMBOL
497							"\xCF\xB7" => "\xCF\xB8", # GREEK CAPITAL LETTER SHO
498							"\xCF\xB9" => "\xCF\xB2", # GREEK CAPITAL LUNATE SIGMA SYMBOL
499							"\xCF\xBA" => "\xCF\xBB", # GREEK CAPITAL LETTER SAN
500							"\xCF\xBD" => "\xCD\xBB", # GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL
501							"\xCF\xBE" => "\xCD\xBC", # GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL
502							"\xCF\xBF" => "\xCD\xBD", # GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL
503							"\xD0\x80" => "\xD1\x90", # CYRILLIC CAPITAL LETTER IE WITH GRAVE
504							"\xD0\x81" => "\xD1\x91", # CYRILLIC CAPITAL LETTER IO
505							"\xD0\x82" => "\xD1\x92", # CYRILLIC CAPITAL LETTER DJE
506							"\xD0\x83" => "\xD1\x93", # CYRILLIC CAPITAL LETTER GJE
507							"\xD0\x84" => "\xD1\x94", # CYRILLIC CAPITAL LETTER UKRAINIAN IE
508							"\xD0\x85" => "\xD1\x95", # CYRILLIC CAPITAL LETTER DZE
509							"\xD0\x86" => "\xD1\x96", # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
510							"\xD0\x87" => "\xD1\x97", # CYRILLIC CAPITAL LETTER YI
511							"\xD0\x88" => "\xD1\x98", # CYRILLIC CAPITAL LETTER JE
512							"\xD0\x89" => "\xD1\x99", # CYRILLIC CAPITAL LETTER LJE
513							"\xD0\x8A" => "\xD1\x9A", # CYRILLIC CAPITAL LETTER NJE
514							"\xD0\x8B" => "\xD1\x9B", # CYRILLIC CAPITAL LETTER TSHE
515							"\xD0\x8C" => "\xD1\x9C", # CYRILLIC CAPITAL LETTER KJE
516							"\xD0\x8D" => "\xD1\x9D", # CYRILLIC CAPITAL LETTER I WITH GRAVE
517							"\xD0\x8E" => "\xD1\x9E", # CYRILLIC CAPITAL LETTER SHORT U
518							"\xD0\x8F" => "\xD1\x9F", # CYRILLIC CAPITAL LETTER DZHE
519							"\xD0\x90" => "\xD0\xB0", # CYRILLIC CAPITAL LETTER A
520							"\xD0\x91" => "\xD0\xB1", # CYRILLIC CAPITAL LETTER BE
521							"\xD0\x92" => "\xD0\xB2", # CYRILLIC CAPITAL LETTER VE
522							"\xD0\x93" => "\xD0\xB3", # CYRILLIC CAPITAL LETTER GHE
523							"\xD0\x94" => "\xD0\xB4", # CYRILLIC CAPITAL LETTER DE
524							"\xD0\x95" => "\xD0\xB5", # CYRILLIC CAPITAL LETTER IE
525							"\xD0\x96" => "\xD0\xB6", # CYRILLIC CAPITAL LETTER ZHE
526							"\xD0\x97" => "\xD0\xB7", # CYRILLIC CAPITAL LETTER ZE
527							"\xD0\x98" => "\xD0\xB8", # CYRILLIC CAPITAL LETTER I
528							"\xD0\x99" => "\xD0\xB9", # CYRILLIC CAPITAL LETTER SHORT I
529							"\xD0\x9A" => "\xD0\xBA", # CYRILLIC CAPITAL LETTER KA
530							"\xD0\x9B" => "\xD0\xBB", # CYRILLIC CAPITAL LETTER EL
531							"\xD0\x9C" => "\xD0\xBC", # CYRILLIC CAPITAL LETTER EM
532							"\xD0\x9D" => "\xD0\xBD", # CYRILLIC CAPITAL LETTER EN
533							"\xD0\x9E" => "\xD0\xBE", # CYRILLIC CAPITAL LETTER O
534							"\xD0\x9F" => "\xD0\xBF", # CYRILLIC CAPITAL LETTER PE
535							"\xD0\xA0" => "\xD1\x80", # CYRILLIC CAPITAL LETTER ER
536							"\xD0\xA1" => "\xD1\x81", # CYRILLIC CAPITAL LETTER ES
537							"\xD0\xA2" => "\xD1\x82", # CYRILLIC CAPITAL LETTER TE
538							"\xD0\xA3" => "\xD1\x83", # CYRILLIC CAPITAL LETTER U
539							"\xD0\xA4" => "\xD1\x84", # CYRILLIC CAPITAL LETTER EF
540							"\xD0\xA5" => "\xD1\x85", # CYRILLIC CAPITAL LETTER HA
541							"\xD0\xA6" => "\xD1\x86", # CYRILLIC CAPITAL LETTER TSE
542							"\xD0\xA7" => "\xD1\x87", # CYRILLIC CAPITAL LETTER CHE
543							"\xD0\xA8" => "\xD1\x88", # CYRILLIC CAPITAL LETTER SHA
544							"\xD0\xA9" => "\xD1\x89", # CYRILLIC CAPITAL LETTER SHCHA
545							"\xD0\xAA" => "\xD1\x8A", # CYRILLIC CAPITAL LETTER HARD SIGN
546							"\xD0\xAB" => "\xD1\x8B", # CYRILLIC CAPITAL LETTER YERU
547							"\xD0\xAC" => "\xD1\x8C", # CYRILLIC CAPITAL LETTER SOFT SIGN
548							"\xD0\xAD" => "\xD1\x8D", # CYRILLIC CAPITAL LETTER E
549							"\xD0\xAE" => "\xD1\x8E", # CYRILLIC CAPITAL LETTER YU
550							"\xD0\xAF" => "\xD1\x8F", # CYRILLIC CAPITAL LETTER YA
551							"\xD1\xA0" => "\xD1\xA1", # CYRILLIC CAPITAL LETTER OMEGA
552							"\xD1\xA2" => "\xD1\xA3", # CYRILLIC CAPITAL LETTER YAT
553							"\xD1\xA4" => "\xD1\xA5", # CYRILLIC CAPITAL LETTER IOTIFIED E
554							"\xD1\xA6" => "\xD1\xA7", # CYRILLIC CAPITAL LETTER LITTLE YUS
555							"\xD1\xA8" => "\xD1\xA9", # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
556							"\xD1\xAA" => "\xD1\xAB", # CYRILLIC CAPITAL LETTER BIG YUS
557							"\xD1\xAC" => "\xD1\xAD", # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
558							"\xD1\xAE" => "\xD1\xAF", # CYRILLIC CAPITAL LETTER KSI
559							"\xD1\xB0" => "\xD1\xB1", # CYRILLIC CAPITAL LETTER PSI
560							"\xD1\xB2" => "\xD1\xB3", # CYRILLIC CAPITAL LETTER FITA
561							"\xD1\xB4" => "\xD1\xB5", # CYRILLIC CAPITAL LETTER IZHITSA
562							"\xD1\xB6" => "\xD1\xB7", # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
563							"\xD1\xB8" => "\xD1\xB9", # CYRILLIC CAPITAL LETTER UK
564							"\xD1\xBA" => "\xD1\xBB", # CYRILLIC CAPITAL LETTER ROUND OMEGA
565							"\xD1\xBC" => "\xD1\xBD", # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
566							"\xD1\xBE" => "\xD1\xBF", # CYRILLIC CAPITAL LETTER OT
567							"\xD2\x80" => "\xD2\x81", # CYRILLIC CAPITAL LETTER KOPPA
568							"\xD2\x8A" => "\xD2\x8B", # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
569							"\xD2\x8C" => "\xD2\x8D", # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
570							"\xD2\x8E" => "\xD2\x8F", # CYRILLIC CAPITAL LETTER ER WITH TICK
571							"\xD2\x90" => "\xD2\x91", # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
572							"\xD2\x92" => "\xD2\x93", # CYRILLIC CAPITAL LETTER GHE WITH STROKE
573							"\xD2\x94" => "\xD2\x95", # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
574							"\xD2\x96" => "\xD2\x97", # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
575							"\xD2\x98" => "\xD2\x99", # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
576							"\xD2\x9A" => "\xD2\x9B", # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
577							"\xD2\x9C" => "\xD2\x9D", # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
578							"\xD2\x9E" => "\xD2\x9F", # CYRILLIC CAPITAL LETTER KA WITH STROKE
579							"\xD2\xA0" => "\xD2\xA1", # CYRILLIC CAPITAL LETTER BASHKIR KA
580							"\xD2\xA2" => "\xD2\xA3", # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
581							"\xD2\xA4" => "\xD2\xA5", # CYRILLIC CAPITAL LIGATURE EN GHE
582							"\xD2\xA6" => "\xD2\xA7", # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
583							"\xD2\xA8" => "\xD2\xA9", # CYRILLIC CAPITAL LETTER ABKHASIAN HA
584							"\xD2\xAA" => "\xD2\xAB", # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
585							"\xD2\xAC" => "\xD2\xAD", # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
586							"\xD2\xAE" => "\xD2\xAF", # CYRILLIC CAPITAL LETTER STRAIGHT U
587							"\xD2\xB0" => "\xD2\xB1", # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
588							"\xD2\xB2" => "\xD2\xB3", # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
589							"\xD2\xB4" => "\xD2\xB5", # CYRILLIC CAPITAL LIGATURE TE TSE
590							"\xD2\xB6" => "\xD2\xB7", # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
591							"\xD2\xB8" => "\xD2\xB9", # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
592							"\xD2\xBA" => "\xD2\xBB", # CYRILLIC CAPITAL LETTER SHHA
593							"\xD2\xBC" => "\xD2\xBD", # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
594							"\xD2\xBE" => "\xD2\xBF", # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
595							"\xD3\x80" => "\xD3\x8F", # CYRILLIC LETTER PALOCHKA
596							"\xD3\x81" => "\xD3\x82", # CYRILLIC CAPITAL LETTER ZHE WITH BREVE
597							"\xD3\x83" => "\xD3\x84", # CYRILLIC CAPITAL LETTER KA WITH HOOK
598							"\xD3\x85" => "\xD3\x86", # CYRILLIC CAPITAL LETTER EL WITH TAIL
599							"\xD3\x87" => "\xD3\x88", # CYRILLIC CAPITAL LETTER EN WITH HOOK
600							"\xD3\x89" => "\xD3\x8A", # CYRILLIC CAPITAL LETTER EN WITH TAIL
601							"\xD3\x8B" => "\xD3\x8C", # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
602							"\xD3\x8D" => "\xD3\x8E", # CYRILLIC CAPITAL LETTER EM WITH TAIL
603							"\xD3\x90" => "\xD3\x91", # CYRILLIC CAPITAL LETTER A WITH BREVE
604							"\xD3\x92" => "\xD3\x93", # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
605							"\xD3\x94" => "\xD3\x95", # CYRILLIC CAPITAL LIGATURE A IE
606							"\xD3\x96" => "\xD3\x97", # CYRILLIC CAPITAL LETTER IE WITH BREVE
607							"\xD3\x98" => "\xD3\x99", # CYRILLIC CAPITAL LETTER SCHWA
608							"\xD3\x9A" => "\xD3\x9B", # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
609							"\xD3\x9C" => "\xD3\x9D", # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
610							"\xD3\x9E" => "\xD3\x9F", # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
611							"\xD3\xA0" => "\xD3\xA1", # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
612							"\xD3\xA2" => "\xD3\xA3", # CYRILLIC CAPITAL LETTER I WITH MACRON
613							"\xD3\xA4" => "\xD3\xA5", # CYRILLIC CAPITAL LETTER I WITH DIAERESIS
614							"\xD3\xA6" => "\xD3\xA7", # CYRILLIC CAPITAL LETTER O WITH DIAERESIS
615							"\xD3\xA8" => "\xD3\xA9", # CYRILLIC CAPITAL LETTER BARRED O
616							"\xD3\xAA" => "\xD3\xAB", # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
617							"\xD3\xAC" => "\xD3\xAD", # CYRILLIC CAPITAL LETTER E WITH DIAERESIS
618							"\xD3\xAE" => "\xD3\xAF", # CYRILLIC CAPITAL LETTER U WITH MACRON
619							"\xD3\xB0" => "\xD3\xB1", # CYRILLIC CAPITAL LETTER U WITH DIAERESIS
620							"\xD3\xB2" => "\xD3\xB3", # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
621							"\xD3\xB4" => "\xD3\xB5", # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
622							"\xD3\xB6" => "\xD3\xB7", # CYRILLIC CAPITAL LETTER GHE WITH DESCENDER
623							"\xD3\xB8" => "\xD3\xB9", # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
624							"\xD3\xBA" => "\xD3\xBB", # CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK
625							"\xD3\xBC" => "\xD3\xBD", # CYRILLIC CAPITAL LETTER HA WITH HOOK
626							"\xD3\xBE" => "\xD3\xBF", # CYRILLIC CAPITAL LETTER HA WITH STROKE
627							"\xD4\x80" => "\xD4\x81", # CYRILLIC CAPITAL LETTER KOMI DE
628							"\xD4\x82" => "\xD4\x83", # CYRILLIC CAPITAL LETTER KOMI DJE
629							"\xD4\x84" => "\xD4\x85", # CYRILLIC CAPITAL LETTER KOMI ZJE
630							"\xD4\x86" => "\xD4\x87", # CYRILLIC CAPITAL LETTER KOMI DZJE
631							"\xD4\x88" => "\xD4\x89", # CYRILLIC CAPITAL LETTER KOMI LJE
632							"\xD4\x8A" => "\xD4\x8B", # CYRILLIC CAPITAL LETTER KOMI NJE
633							"\xD4\x8C" => "\xD4\x8D", # CYRILLIC CAPITAL LETTER KOMI SJE
634							"\xD4\x8E" => "\xD4\x8F", # CYRILLIC CAPITAL LETTER KOMI TJE
635							"\xD4\x90" => "\xD4\x91", # CYRILLIC CAPITAL LETTER REVERSED ZE
636							"\xD4\x92" => "\xD4\x93", # CYRILLIC CAPITAL LETTER EL WITH HOOK
637							"\xD4\x94" => "\xD4\x95", # CYRILLIC CAPITAL LETTER LHA
638							"\xD4\x96" => "\xD4\x97", # CYRILLIC CAPITAL LETTER RHA
639							"\xD4\x98" => "\xD4\x99", # CYRILLIC CAPITAL LETTER YAE
640							"\xD4\x9A" => "\xD4\x9B", # CYRILLIC CAPITAL LETTER QA
641							"\xD4\x9C" => "\xD4\x9D", # CYRILLIC CAPITAL LETTER WE
642							"\xD4\x9E" => "\xD4\x9F", # CYRILLIC CAPITAL LETTER ALEUT KA
643							"\xD4\xA0" => "\xD4\xA1", # CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK
644							"\xD4\xA2" => "\xD4\xA3", # CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK
645							"\xD4\xA4" => "\xD4\xA5", # CYRILLIC CAPITAL LETTER PE WITH DESCENDER
646							"\xD4\xA6" => "\xD4\xA7", # CYRILLIC CAPITAL LETTER SHHA WITH DESCENDER
647							"\xD4\xA8" => "\xD4\xA9", # CYRILLIC CAPITAL LETTER EN WITH LEFT HOOK
648							"\xD4\xAA" => "\xD4\xAB", # CYRILLIC CAPITAL LETTER DZZHE
649							"\xD4\xAC" => "\xD4\xAD", # CYRILLIC CAPITAL LETTER DCHE
650							"\xD4\xAE" => "\xD4\xAF", # CYRILLIC CAPITAL LETTER EL WITH DESCENDER
651							"\xD4\xB1" => "\xD5\xA1", # ARMENIAN CAPITAL LETTER AYB
652							"\xD4\xB2" => "\xD5\xA2", # ARMENIAN CAPITAL LETTER BEN
653							"\xD4\xB3" => "\xD5\xA3", # ARMENIAN CAPITAL LETTER GIM
654							"\xD4\xB4" => "\xD5\xA4", # ARMENIAN CAPITAL LETTER DA
655							"\xD4\xB5" => "\xD5\xA5", # ARMENIAN CAPITAL LETTER ECH
656							"\xD4\xB6" => "\xD5\xA6", # ARMENIAN CAPITAL LETTER ZA
657							"\xD4\xB7" => "\xD5\xA7", # ARMENIAN CAPITAL LETTER EH
658							"\xD4\xB8" => "\xD5\xA8", # ARMENIAN CAPITAL LETTER ET
659							"\xD4\xB9" => "\xD5\xA9", # ARMENIAN CAPITAL LETTER TO
660							"\xD4\xBA" => "\xD5\xAA", # ARMENIAN CAPITAL LETTER ZHE
661							"\xD4\xBB" => "\xD5\xAB", # ARMENIAN CAPITAL LETTER INI
662							"\xD4\xBC" => "\xD5\xAC", # ARMENIAN CAPITAL LETTER LIWN
663							"\xD4\xBD" => "\xD5\xAD", # ARMENIAN CAPITAL LETTER XEH
664							"\xD4\xBE" => "\xD5\xAE", # ARMENIAN CAPITAL LETTER CA
665							"\xD4\xBF" => "\xD5\xAF", # ARMENIAN CAPITAL LETTER KEN
666							"\xD5\x80" => "\xD5\xB0", # ARMENIAN CAPITAL LETTER HO
667							"\xD5\x81" => "\xD5\xB1", # ARMENIAN CAPITAL LETTER JA
668							"\xD5\x82" => "\xD5\xB2", # ARMENIAN CAPITAL LETTER GHAD
669							"\xD5\x83" => "\xD5\xB3", # ARMENIAN CAPITAL LETTER CHEH
670							"\xD5\x84" => "\xD5\xB4", # ARMENIAN CAPITAL LETTER MEN
671							"\xD5\x85" => "\xD5\xB5", # ARMENIAN CAPITAL LETTER YI
672							"\xD5\x86" => "\xD5\xB6", # ARMENIAN CAPITAL LETTER NOW
673							"\xD5\x87" => "\xD5\xB7", # ARMENIAN CAPITAL LETTER SHA
674							"\xD5\x88" => "\xD5\xB8", # ARMENIAN CAPITAL LETTER VO
675							"\xD5\x89" => "\xD5\xB9", # ARMENIAN CAPITAL LETTER CHA
676							"\xD5\x8A" => "\xD5\xBA", # ARMENIAN CAPITAL LETTER PEH
677							"\xD5\x8B" => "\xD5\xBB", # ARMENIAN CAPITAL LETTER JHEH
678							"\xD5\x8C" => "\xD5\xBC", # ARMENIAN CAPITAL LETTER RA
679							"\xD5\x8D" => "\xD5\xBD", # ARMENIAN CAPITAL LETTER SEH
680							"\xD5\x8E" => "\xD5\xBE", # ARMENIAN CAPITAL LETTER VEW
681							"\xD5\x8F" => "\xD5\xBF", # ARMENIAN CAPITAL LETTER TIWN
682							"\xD5\x90" => "\xD6\x80", # ARMENIAN CAPITAL LETTER REH
683							"\xD5\x91" => "\xD6\x81", # ARMENIAN CAPITAL LETTER CO
684							"\xD5\x92" => "\xD6\x82", # ARMENIAN CAPITAL LETTER YIWN
685							"\xD5\x93" => "\xD6\x83", # ARMENIAN CAPITAL LETTER PIWR
686							"\xD5\x94" => "\xD6\x84", # ARMENIAN CAPITAL LETTER KEH
687							"\xD5\x95" => "\xD6\x85", # ARMENIAN CAPITAL LETTER OH
688							"\xD5\x96" => "\xD6\x86", # ARMENIAN CAPITAL LETTER FEH
689							"\xD6\x87" => "\xD5\xA5\xD6\x82", # ARMENIAN SMALL LIGATURE ECH YIWN
690							"\xE1\x82\xA0" => "\xE2\xB4\x80", # GEORGIAN CAPITAL LETTER AN
691							"\xE1\x82\xA1" => "\xE2\xB4\x81", # GEORGIAN CAPITAL LETTER BAN
692							"\xE1\x82\xA2" => "\xE2\xB4\x82", # GEORGIAN CAPITAL LETTER GAN
693							"\xE1\x82\xA3" => "\xE2\xB4\x83", # GEORGIAN CAPITAL LETTER DON
694							"\xE1\x82\xA4" => "\xE2\xB4\x84", # GEORGIAN CAPITAL LETTER EN
695							"\xE1\x82\xA5" => "\xE2\xB4\x85", # GEORGIAN CAPITAL LETTER VIN
696							"\xE1\x82\xA6" => "\xE2\xB4\x86", # GEORGIAN CAPITAL LETTER ZEN
697							"\xE1\x82\xA7" => "\xE2\xB4\x87", # GEORGIAN CAPITAL LETTER TAN
698							"\xE1\x82\xA8" => "\xE2\xB4\x88", # GEORGIAN CAPITAL LETTER IN
699							"\xE1\x82\xA9" => "\xE2\xB4\x89", # GEORGIAN CAPITAL LETTER KAN
700							"\xE1\x82\xAA" => "\xE2\xB4\x8A", # GEORGIAN CAPITAL LETTER LAS
701							"\xE1\x82\xAB" => "\xE2\xB4\x8B", # GEORGIAN CAPITAL LETTER MAN
702							"\xE1\x82\xAC" => "\xE2\xB4\x8C", # GEORGIAN CAPITAL LETTER NAR
703							"\xE1\x82\xAD" => "\xE2\xB4\x8D", # GEORGIAN CAPITAL LETTER ON
704							"\xE1\x82\xAE" => "\xE2\xB4\x8E", # GEORGIAN CAPITAL LETTER PAR
705							"\xE1\x82\xAF" => "\xE2\xB4\x8F", # GEORGIAN CAPITAL LETTER ZHAR
706							"\xE1\x82\xB0" => "\xE2\xB4\x90", # GEORGIAN CAPITAL LETTER RAE
707							"\xE1\x82\xB1" => "\xE2\xB4\x91", # GEORGIAN CAPITAL LETTER SAN
708							"\xE1\x82\xB2" => "\xE2\xB4\x92", # GEORGIAN CAPITAL LETTER TAR
709							"\xE1\x82\xB3" => "\xE2\xB4\x93", # GEORGIAN CAPITAL LETTER UN
710							"\xE1\x82\xB4" => "\xE2\xB4\x94", # GEORGIAN CAPITAL LETTER PHAR
711							"\xE1\x82\xB5" => "\xE2\xB4\x95", # GEORGIAN CAPITAL LETTER KHAR
712							"\xE1\x82\xB6" => "\xE2\xB4\x96", # GEORGIAN CAPITAL LETTER GHAN
713							"\xE1\x82\xB7" => "\xE2\xB4\x97", # GEORGIAN CAPITAL LETTER QAR
714							"\xE1\x82\xB8" => "\xE2\xB4\x98", # GEORGIAN CAPITAL LETTER SHIN
715							"\xE1\x82\xB9" => "\xE2\xB4\x99", # GEORGIAN CAPITAL LETTER CHIN
716							"\xE1\x82\xBA" => "\xE2\xB4\x9A", # GEORGIAN CAPITAL LETTER CAN
717							"\xE1\x82\xBB" => "\xE2\xB4\x9B", # GEORGIAN CAPITAL LETTER JIL
718							"\xE1\x82\xBC" => "\xE2\xB4\x9C", # GEORGIAN CAPITAL LETTER CIL
719							"\xE1\x82\xBD" => "\xE2\xB4\x9D", # GEORGIAN CAPITAL LETTER CHAR
720							"\xE1\x82\xBE" => "\xE2\xB4\x9E", # GEORGIAN CAPITAL LETTER XAN
721							"\xE1\x82\xBF" => "\xE2\xB4\x9F", # GEORGIAN CAPITAL LETTER JHAN
722							"\xE1\x83\x80" => "\xE2\xB4\xA0", # GEORGIAN CAPITAL LETTER HAE
723							"\xE1\x83\x81" => "\xE2\xB4\xA1", # GEORGIAN CAPITAL LETTER HE
724							"\xE1\x83\x82" => "\xE2\xB4\xA2", # GEORGIAN CAPITAL LETTER HIE
725							"\xE1\x83\x83" => "\xE2\xB4\xA3", # GEORGIAN CAPITAL LETTER WE
726							"\xE1\x83\x84" => "\xE2\xB4\xA4", # GEORGIAN CAPITAL LETTER HAR
727							"\xE1\x83\x85" => "\xE2\xB4\xA5", # GEORGIAN CAPITAL LETTER HOE
728							"\xE1\x83\x87" => "\xE2\xB4\xA7", # GEORGIAN CAPITAL LETTER YN
729							"\xE1\x83\x8D" => "\xE2\xB4\xAD", # GEORGIAN CAPITAL LETTER AEN
730							"\xE1\xB8\x80" => "\xE1\xB8\x81", # LATIN CAPITAL LETTER A WITH RING BELOW
731							"\xE1\xB8\x82" => "\xE1\xB8\x83", # LATIN CAPITAL LETTER B WITH DOT ABOVE
732							"\xE1\xB8\x84" => "\xE1\xB8\x85", # LATIN CAPITAL LETTER B WITH DOT BELOW
733							"\xE1\xB8\x86" => "\xE1\xB8\x87", # LATIN CAPITAL LETTER B WITH LINE BELOW
734							"\xE1\xB8\x88" => "\xE1\xB8\x89", # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
735							"\xE1\xB8\x8A" => "\xE1\xB8\x8B", # LATIN CAPITAL LETTER D WITH DOT ABOVE
736							"\xE1\xB8\x8C" => "\xE1\xB8\x8D", # LATIN CAPITAL LETTER D WITH DOT BELOW
737							"\xE1\xB8\x8E" => "\xE1\xB8\x8F", # LATIN CAPITAL LETTER D WITH LINE BELOW
738							"\xE1\xB8\x90" => "\xE1\xB8\x91", # LATIN CAPITAL LETTER D WITH CEDILLA
739							"\xE1\xB8\x92" => "\xE1\xB8\x93", # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
740							"\xE1\xB8\x94" => "\xE1\xB8\x95", # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
741							"\xE1\xB8\x96" => "\xE1\xB8\x97", # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
742							"\xE1\xB8\x98" => "\xE1\xB8\x99", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
743							"\xE1\xB8\x9A" => "\xE1\xB8\x9B", # LATIN CAPITAL LETTER E WITH TILDE BELOW
744							"\xE1\xB8\x9C" => "\xE1\xB8\x9D", # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
745							"\xE1\xB8\x9E" => "\xE1\xB8\x9F", # LATIN CAPITAL LETTER F WITH DOT ABOVE
746							"\xE1\xB8\xA0" => "\xE1\xB8\xA1", # LATIN CAPITAL LETTER G WITH MACRON
747							"\xE1\xB8\xA2" => "\xE1\xB8\xA3", # LATIN CAPITAL LETTER H WITH DOT ABOVE
748							"\xE1\xB8\xA4" => "\xE1\xB8\xA5", # LATIN CAPITAL LETTER H WITH DOT BELOW
749							"\xE1\xB8\xA6" => "\xE1\xB8\xA7", # LATIN CAPITAL LETTER H WITH DIAERESIS
750							"\xE1\xB8\xA8" => "\xE1\xB8\xA9", # LATIN CAPITAL LETTER H WITH CEDILLA
751							"\xE1\xB8\xAA" => "\xE1\xB8\xAB", # LATIN CAPITAL LETTER H WITH BREVE BELOW
752							"\xE1\xB8\xAC" => "\xE1\xB8\xAD", # LATIN CAPITAL LETTER I WITH TILDE BELOW
753							"\xE1\xB8\xAE" => "\xE1\xB8\xAF", # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
754							"\xE1\xB8\xB0" => "\xE1\xB8\xB1", # LATIN CAPITAL LETTER K WITH ACUTE
755							"\xE1\xB8\xB2" => "\xE1\xB8\xB3", # LATIN CAPITAL LETTER K WITH DOT BELOW
756							"\xE1\xB8\xB4" => "\xE1\xB8\xB5", # LATIN CAPITAL LETTER K WITH LINE BELOW
757							"\xE1\xB8\xB6" => "\xE1\xB8\xB7", # LATIN CAPITAL LETTER L WITH DOT BELOW
758							"\xE1\xB8\xB8" => "\xE1\xB8\xB9", # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
759							"\xE1\xB8\xBA" => "\xE1\xB8\xBB", # LATIN CAPITAL LETTER L WITH LINE BELOW
760							"\xE1\xB8\xBC" => "\xE1\xB8\xBD", # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
761							"\xE1\xB8\xBE" => "\xE1\xB8\xBF", # LATIN CAPITAL LETTER M WITH ACUTE
762							"\xE1\xB9\x80" => "\xE1\xB9\x81", # LATIN CAPITAL LETTER M WITH DOT ABOVE
763							"\xE1\xB9\x82" => "\xE1\xB9\x83", # LATIN CAPITAL LETTER M WITH DOT BELOW
764							"\xE1\xB9\x84" => "\xE1\xB9\x85", # LATIN CAPITAL LETTER N WITH DOT ABOVE
765							"\xE1\xB9\x86" => "\xE1\xB9\x87", # LATIN CAPITAL LETTER N WITH DOT BELOW
766							"\xE1\xB9\x88" => "\xE1\xB9\x89", # LATIN CAPITAL LETTER N WITH LINE BELOW
767							"\xE1\xB9\x8A" => "\xE1\xB9\x8B", # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
768							"\xE1\xB9\x8C" => "\xE1\xB9\x8D", # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
769							"\xE1\xB9\x8E" => "\xE1\xB9\x8F", # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
770							"\xE1\xB9\x90" => "\xE1\xB9\x91", # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
771							"\xE1\xB9\x92" => "\xE1\xB9\x93", # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
772							"\xE1\xB9\x94" => "\xE1\xB9\x95", # LATIN CAPITAL LETTER P WITH ACUTE
773							"\xE1\xB9\x96" => "\xE1\xB9\x97", # LATIN CAPITAL LETTER P WITH DOT ABOVE
774							"\xE1\xB9\x98" => "\xE1\xB9\x99", # LATIN CAPITAL LETTER R WITH DOT ABOVE
775							"\xE1\xB9\x9A" => "\xE1\xB9\x9B", # LATIN CAPITAL LETTER R WITH DOT BELOW
776							"\xE1\xB9\x9C" => "\xE1\xB9\x9D", # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
777							"\xE1\xB9\x9E" => "\xE1\xB9\x9F", # LATIN CAPITAL LETTER R WITH LINE BELOW
778							"\xE1\xB9\xA0" => "\xE1\xB9\xA1", # LATIN CAPITAL LETTER S WITH DOT ABOVE
779							"\xE1\xB9\xA2" => "\xE1\xB9\xA3", # LATIN CAPITAL LETTER S WITH DOT BELOW
780							"\xE1\xB9\xA4" => "\xE1\xB9\xA5", # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
781							"\xE1\xB9\xA6" => "\xE1\xB9\xA7", # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
782							"\xE1\xB9\xA8" => "\xE1\xB9\xA9", # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
783							"\xE1\xB9\xAA" => "\xE1\xB9\xAB", # LATIN CAPITAL LETTER T WITH DOT ABOVE
784							"\xE1\xB9\xAC" => "\xE1\xB9\xAD", # LATIN CAPITAL LETTER T WITH DOT BELOW
785							"\xE1\xB9\xAE" => "\xE1\xB9\xAF", # LATIN CAPITAL LETTER T WITH LINE BELOW
786							"\xE1\xB9\xB0" => "\xE1\xB9\xB1", # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
787							"\xE1\xB9\xB2" => "\xE1\xB9\xB3", # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
788							"\xE1\xB9\xB4" => "\xE1\xB9\xB5", # LATIN CAPITAL LETTER U WITH TILDE BELOW
789							"\xE1\xB9\xB6" => "\xE1\xB9\xB7", # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
790							"\xE1\xB9\xB8" => "\xE1\xB9\xB9", # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
791							"\xE1\xB9\xBA" => "\xE1\xB9\xBB", # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
792							"\xE1\xB9\xBC" => "\xE1\xB9\xBD", # LATIN CAPITAL LETTER V WITH TILDE
793							"\xE1\xB9\xBE" => "\xE1\xB9\xBF", # LATIN CAPITAL LETTER V WITH DOT BELOW
794							"\xE1\xBA\x80" => "\xE1\xBA\x81", # LATIN CAPITAL LETTER W WITH GRAVE
795							"\xE1\xBA\x82" => "\xE1\xBA\x83", # LATIN CAPITAL LETTER W WITH ACUTE
796							"\xE1\xBA\x84" => "\xE1\xBA\x85", # LATIN CAPITAL LETTER W WITH DIAERESIS
797							"\xE1\xBA\x86" => "\xE1\xBA\x87", # LATIN CAPITAL LETTER W WITH DOT ABOVE
798							"\xE1\xBA\x88" => "\xE1\xBA\x89", # LATIN CAPITAL LETTER W WITH DOT BELOW
799							"\xE1\xBA\x8A" => "\xE1\xBA\x8B", # LATIN CAPITAL LETTER X WITH DOT ABOVE
800							"\xE1\xBA\x8C" => "\xE1\xBA\x8D", # LATIN CAPITAL LETTER X WITH DIAERESIS
801							"\xE1\xBA\x8E" => "\xE1\xBA\x8F", # LATIN CAPITAL LETTER Y WITH DOT ABOVE
802							"\xE1\xBA\x90" => "\xE1\xBA\x91", # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
803							"\xE1\xBA\x92" => "\xE1\xBA\x93", # LATIN CAPITAL LETTER Z WITH DOT BELOW
804							"\xE1\xBA\x94" => "\xE1\xBA\x95", # LATIN CAPITAL LETTER Z WITH LINE BELOW
805							"\xE1\xBA\x96" => "\x68\xCC\xB1", # LATIN SMALL LETTER H WITH LINE BELOW
806							"\xE1\xBA\x97" => "\x74\xCC\x88", # LATIN SMALL LETTER T WITH DIAERESIS
807							"\xE1\xBA\x98" => "\x77\xCC\x8A", # LATIN SMALL LETTER W WITH RING ABOVE
808							"\xE1\xBA\x99" => "\x79\xCC\x8A", # LATIN SMALL LETTER Y WITH RING ABOVE
809							"\xE1\xBA\x9A" => "\x61\xCA\xBE", # LATIN SMALL LETTER A WITH RIGHT HALF RING
810							"\xE1\xBA\x9B" => "\xE1\xB9\xA1", # LATIN SMALL LETTER LONG S WITH DOT ABOVE
811							"\xE1\xBA\x9E" => "\x73\x73", # LATIN CAPITAL LETTER SHARP S
812							"\xE1\xBA\xA0" => "\xE1\xBA\xA1", # LATIN CAPITAL LETTER A WITH DOT BELOW
813							"\xE1\xBA\xA2" => "\xE1\xBA\xA3", # LATIN CAPITAL LETTER A WITH HOOK ABOVE
814							"\xE1\xBA\xA4" => "\xE1\xBA\xA5", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
815							"\xE1\xBA\xA6" => "\xE1\xBA\xA7", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
816							"\xE1\xBA\xA8" => "\xE1\xBA\xA9", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
817							"\xE1\xBA\xAA" => "\xE1\xBA\xAB", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
818							"\xE1\xBA\xAC" => "\xE1\xBA\xAD", # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
819							"\xE1\xBA\xAE" => "\xE1\xBA\xAF", # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
820							"\xE1\xBA\xB0" => "\xE1\xBA\xB1", # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
821							"\xE1\xBA\xB2" => "\xE1\xBA\xB3", # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
822							"\xE1\xBA\xB4" => "\xE1\xBA\xB5", # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
823							"\xE1\xBA\xB6" => "\xE1\xBA\xB7", # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
824							"\xE1\xBA\xB8" => "\xE1\xBA\xB9", # LATIN CAPITAL LETTER E WITH DOT BELOW
825							"\xE1\xBA\xBA" => "\xE1\xBA\xBB", # LATIN CAPITAL LETTER E WITH HOOK ABOVE
826							"\xE1\xBA\xBC" => "\xE1\xBA\xBD", # LATIN CAPITAL LETTER E WITH TILDE
827							"\xE1\xBA\xBE" => "\xE1\xBA\xBF", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
828							"\xE1\xBB\x80" => "\xE1\xBB\x81", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
829							"\xE1\xBB\x82" => "\xE1\xBB\x83", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
830							"\xE1\xBB\x84" => "\xE1\xBB\x85", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
831							"\xE1\xBB\x86" => "\xE1\xBB\x87", # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
832							"\xE1\xBB\x88" => "\xE1\xBB\x89", # LATIN CAPITAL LETTER I WITH HOOK ABOVE
833							"\xE1\xBB\x8A" => "\xE1\xBB\x8B", # LATIN CAPITAL LETTER I WITH DOT BELOW
834							"\xE1\xBB\x8C" => "\xE1\xBB\x8D", # LATIN CAPITAL LETTER O WITH DOT BELOW
835							"\xE1\xBB\x8E" => "\xE1\xBB\x8F", # LATIN CAPITAL LETTER O WITH HOOK ABOVE
836							"\xE1\xBB\x90" => "\xE1\xBB\x91", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
837							"\xE1\xBB\x92" => "\xE1\xBB\x93", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
838							"\xE1\xBB\x94" => "\xE1\xBB\x95", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
839							"\xE1\xBB\x96" => "\xE1\xBB\x97", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
840							"\xE1\xBB\x98" => "\xE1\xBB\x99", # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
841							"\xE1\xBB\x9A" => "\xE1\xBB\x9B", # LATIN CAPITAL LETTER O WITH HORN AND ACUTE
842							"\xE1\xBB\x9C" => "\xE1\xBB\x9D", # LATIN CAPITAL LETTER O WITH HORN AND GRAVE
843							"\xE1\xBB\x9E" => "\xE1\xBB\x9F", # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
844							"\xE1\xBB\xA0" => "\xE1\xBB\xA1", # LATIN CAPITAL LETTER O WITH HORN AND TILDE
845							"\xE1\xBB\xA2" => "\xE1\xBB\xA3", # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
846							"\xE1\xBB\xA4" => "\xE1\xBB\xA5", # LATIN CAPITAL LETTER U WITH DOT BELOW
847							"\xE1\xBB\xA6" => "\xE1\xBB\xA7", # LATIN CAPITAL LETTER U WITH HOOK ABOVE
848							"\xE1\xBB\xA8" => "\xE1\xBB\xA9", # LATIN CAPITAL LETTER U WITH HORN AND ACUTE
849							"\xE1\xBB\xAA" => "\xE1\xBB\xAB", # LATIN CAPITAL LETTER U WITH HORN AND GRAVE
850							"\xE1\xBB\xAC" => "\xE1\xBB\xAD", # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
851							"\xE1\xBB\xAE" => "\xE1\xBB\xAF", # LATIN CAPITAL LETTER U WITH HORN AND TILDE
852							"\xE1\xBB\xB0" => "\xE1\xBB\xB1", # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
853							"\xE1\xBB\xB2" => "\xE1\xBB\xB3", # LATIN CAPITAL LETTER Y WITH GRAVE
854							"\xE1\xBB\xB4" => "\xE1\xBB\xB5", # LATIN CAPITAL LETTER Y WITH DOT BELOW
855							"\xE1\xBB\xB6" => "\xE1\xBB\xB7", # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
856							"\xE1\xBB\xB8" => "\xE1\xBB\xB9", # LATIN CAPITAL LETTER Y WITH TILDE
857							"\xE1\xBB\xBA" => "\xE1\xBB\xBB", # LATIN CAPITAL LETTER MIDDLE-WELSH LL
858							"\xE1\xBB\xBC" => "\xE1\xBB\xBD", # LATIN CAPITAL LETTER MIDDLE-WELSH V
859							"\xE1\xBB\xBE" => "\xE1\xBB\xBF", # LATIN CAPITAL LETTER Y WITH LOOP
860							"\xE1\xBC\x88" => "\xE1\xBC\x80", # GREEK CAPITAL LETTER ALPHA WITH PSILI
861							"\xE1\xBC\x89" => "\xE1\xBC\x81", # GREEK CAPITAL LETTER ALPHA WITH DASIA
862							"\xE1\xBC\x8A" => "\xE1\xBC\x82", # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
863							"\xE1\xBC\x8B" => "\xE1\xBC\x83", # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
864							"\xE1\xBC\x8C" => "\xE1\xBC\x84", # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
865							"\xE1\xBC\x8D" => "\xE1\xBC\x85", # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
866							"\xE1\xBC\x8E" => "\xE1\xBC\x86", # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
867							"\xE1\xBC\x8F" => "\xE1\xBC\x87", # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
868							"\xE1\xBC\x98" => "\xE1\xBC\x90", # GREEK CAPITAL LETTER EPSILON WITH PSILI
869							"\xE1\xBC\x99" => "\xE1\xBC\x91", # GREEK CAPITAL LETTER EPSILON WITH DASIA
870							"\xE1\xBC\x9A" => "\xE1\xBC\x92", # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
871							"\xE1\xBC\x9B" => "\xE1\xBC\x93", # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
872							"\xE1\xBC\x9C" => "\xE1\xBC\x94", # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
873							"\xE1\xBC\x9D" => "\xE1\xBC\x95", # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
874							"\xE1\xBC\xA8" => "\xE1\xBC\xA0", # GREEK CAPITAL LETTER ETA WITH PSILI
875							"\xE1\xBC\xA9" => "\xE1\xBC\xA1", # GREEK CAPITAL LETTER ETA WITH DASIA
876							"\xE1\xBC\xAA" => "\xE1\xBC\xA2", # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
877							"\xE1\xBC\xAB" => "\xE1\xBC\xA3", # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
878							"\xE1\xBC\xAC" => "\xE1\xBC\xA4", # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
879							"\xE1\xBC\xAD" => "\xE1\xBC\xA5", # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
880							"\xE1\xBC\xAE" => "\xE1\xBC\xA6", # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
881							"\xE1\xBC\xAF" => "\xE1\xBC\xA7", # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
882							"\xE1\xBC\xB8" => "\xE1\xBC\xB0", # GREEK CAPITAL LETTER IOTA WITH PSILI
883							"\xE1\xBC\xB9" => "\xE1\xBC\xB1", # GREEK CAPITAL LETTER IOTA WITH DASIA
884							"\xE1\xBC\xBA" => "\xE1\xBC\xB2", # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
885							"\xE1\xBC\xBB" => "\xE1\xBC\xB3", # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
886							"\xE1\xBC\xBC" => "\xE1\xBC\xB4", # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
887							"\xE1\xBC\xBD" => "\xE1\xBC\xB5", # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
888							"\xE1\xBC\xBE" => "\xE1\xBC\xB6", # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
889							"\xE1\xBC\xBF" => "\xE1\xBC\xB7", # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
890							"\xE1\xBD\x88" => "\xE1\xBD\x80", # GREEK CAPITAL LETTER OMICRON WITH PSILI
891							"\xE1\xBD\x89" => "\xE1\xBD\x81", # GREEK CAPITAL LETTER OMICRON WITH DASIA
892							"\xE1\xBD\x8A" => "\xE1\xBD\x82", # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
893							"\xE1\xBD\x8B" => "\xE1\xBD\x83", # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
894							"\xE1\xBD\x8C" => "\xE1\xBD\x84", # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
895							"\xE1\xBD\x8D" => "\xE1\xBD\x85", # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
896							"\xE1\xBD\x90" => "\xCF\x85\xCC\x93", # GREEK SMALL LETTER UPSILON WITH PSILI
897							"\xE1\xBD\x92" => "\xCF\x85\xCC\x93\xCC\x80", # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
898							"\xE1\xBD\x94" => "\xCF\x85\xCC\x93\xCC\x81", # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
899							"\xE1\xBD\x96" => "\xCF\x85\xCC\x93\xCD\x82", # GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
900							"\xE1\xBD\x99" => "\xE1\xBD\x91", # GREEK CAPITAL LETTER UPSILON WITH DASIA
901							"\xE1\xBD\x9B" => "\xE1\xBD\x93", # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
902							"\xE1\xBD\x9D" => "\xE1\xBD\x95", # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
903							"\xE1\xBD\x9F" => "\xE1\xBD\x97", # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
904							"\xE1\xBD\xA8" => "\xE1\xBD\xA0", # GREEK CAPITAL LETTER OMEGA WITH PSILI
905							"\xE1\xBD\xA9" => "\xE1\xBD\xA1", # GREEK CAPITAL LETTER OMEGA WITH DASIA
906							"\xE1\xBD\xAA" => "\xE1\xBD\xA2", # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
907							"\xE1\xBD\xAB" => "\xE1\xBD\xA3", # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
908							"\xE1\xBD\xAC" => "\xE1\xBD\xA4", # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
909							"\xE1\xBD\xAD" => "\xE1\xBD\xA5", # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
910							"\xE1\xBD\xAE" => "\xE1\xBD\xA6", # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
911							"\xE1\xBD\xAF" => "\xE1\xBD\xA7", # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
912							"\xE1\xBE\x80" => "\xE1\xBC\x80\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
913							"\xE1\xBE\x81" => "\xE1\xBC\x81\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
914							"\xE1\xBE\x82" => "\xE1\xBC\x82\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
915							"\xE1\xBE\x83" => "\xE1\xBC\x83\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
916							"\xE1\xBE\x84" => "\xE1\xBC\x84\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
917							"\xE1\xBE\x85" => "\xE1\xBC\x85\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
918							"\xE1\xBE\x86" => "\xE1\xBC\x86\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
919							"\xE1\xBE\x87" => "\xE1\xBC\x87\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
920							"\xE1\xBE\x88" => "\xE1\xBC\x80\xCE\xB9", # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
921							"\xE1\xBE\x89" => "\xE1\xBC\x81\xCE\xB9", # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
922							"\xE1\xBE\x8A" => "\xE1\xBC\x82\xCE\xB9", # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
923							"\xE1\xBE\x8B" => "\xE1\xBC\x83\xCE\xB9", # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
924							"\xE1\xBE\x8C" => "\xE1\xBC\x84\xCE\xB9", # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
925							"\xE1\xBE\x8D" => "\xE1\xBC\x85\xCE\xB9", # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
926							"\xE1\xBE\x8E" => "\xE1\xBC\x86\xCE\xB9", # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
927							"\xE1\xBE\x8F" => "\xE1\xBC\x87\xCE\xB9", # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
928							"\xE1\xBE\x90" => "\xE1\xBC\xA0\xCE\xB9", # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
929							"\xE1\xBE\x91" => "\xE1\xBC\xA1\xCE\xB9", # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
930							"\xE1\xBE\x92" => "\xE1\xBC\xA2\xCE\xB9", # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
931							"\xE1\xBE\x93" => "\xE1\xBC\xA3\xCE\xB9", # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
932							"\xE1\xBE\x94" => "\xE1\xBC\xA4\xCE\xB9", # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
933							"\xE1\xBE\x95" => "\xE1\xBC\xA5\xCE\xB9", # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
934							"\xE1\xBE\x96" => "\xE1\xBC\xA6\xCE\xB9", # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
935							"\xE1\xBE\x97" => "\xE1\xBC\xA7\xCE\xB9", # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
936							"\xE1\xBE\x98" => "\xE1\xBC\xA0\xCE\xB9", # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
937							"\xE1\xBE\x99" => "\xE1\xBC\xA1\xCE\xB9", # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
938							"\xE1\xBE\x9A" => "\xE1\xBC\xA2\xCE\xB9", # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
939							"\xE1\xBE\x9B" => "\xE1\xBC\xA3\xCE\xB9", # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
940							"\xE1\xBE\x9C" => "\xE1\xBC\xA4\xCE\xB9", # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
941							"\xE1\xBE\x9D" => "\xE1\xBC\xA5\xCE\xB9", # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
942							"\xE1\xBE\x9E" => "\xE1\xBC\xA6\xCE\xB9", # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
943							"\xE1\xBE\x9F" => "\xE1\xBC\xA7\xCE\xB9", # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
944							"\xE1\xBE\xA0" => "\xE1\xBD\xA0\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
945							"\xE1\xBE\xA1" => "\xE1\xBD\xA1\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
946							"\xE1\xBE\xA2" => "\xE1\xBD\xA2\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
947							"\xE1\xBE\xA3" => "\xE1\xBD\xA3\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
948							"\xE1\xBE\xA4" => "\xE1\xBD\xA4\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
949							"\xE1\xBE\xA5" => "\xE1\xBD\xA5\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
950							"\xE1\xBE\xA6" => "\xE1\xBD\xA6\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
951							"\xE1\xBE\xA7" => "\xE1\xBD\xA7\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
952							"\xE1\xBE\xA8" => "\xE1\xBD\xA0\xCE\xB9", # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
953							"\xE1\xBE\xA9" => "\xE1\xBD\xA1\xCE\xB9", # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
954							"\xE1\xBE\xAA" => "\xE1\xBD\xA2\xCE\xB9", # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
955							"\xE1\xBE\xAB" => "\xE1\xBD\xA3\xCE\xB9", # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
956							"\xE1\xBE\xAC" => "\xE1\xBD\xA4\xCE\xB9", # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
957							"\xE1\xBE\xAD" => "\xE1\xBD\xA5\xCE\xB9", # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
958							"\xE1\xBE\xAE" => "\xE1\xBD\xA6\xCE\xB9", # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
959							"\xE1\xBE\xAF" => "\xE1\xBD\xA7\xCE\xB9", # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
960							"\xE1\xBE\xB2" => "\xE1\xBD\xB0\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
961							"\xE1\xBE\xB3" => "\xCE\xB1\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
962							"\xE1\xBE\xB4" => "\xCE\xAC\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
963							"\xE1\xBE\xB6" => "\xCE\xB1\xCD\x82", # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
964							"\xE1\xBE\xB7" => "\xCE\xB1\xCD\x82\xCE\xB9", # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
965							"\xE1\xBE\xB8" => "\xE1\xBE\xB0", # GREEK CAPITAL LETTER ALPHA WITH VRACHY
966							"\xE1\xBE\xB9" => "\xE1\xBE\xB1", # GREEK CAPITAL LETTER ALPHA WITH MACRON
967							"\xE1\xBE\xBA" => "\xE1\xBD\xB0", # GREEK CAPITAL LETTER ALPHA WITH VARIA
968							"\xE1\xBE\xBB" => "\xE1\xBD\xB1", # GREEK CAPITAL LETTER ALPHA WITH OXIA
969							"\xE1\xBE\xBC" => "\xCE\xB1\xCE\xB9", # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
970							"\xE1\xBE\xBE" => "\xCE\xB9", # GREEK PROSGEGRAMMENI
971							"\xE1\xBF\x82" => "\xE1\xBD\xB4\xCE\xB9", # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
972							"\xE1\xBF\x83" => "\xCE\xB7\xCE\xB9", # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
973							"\xE1\xBF\x84" => "\xCE\xAE\xCE\xB9", # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
974							"\xE1\xBF\x86" => "\xCE\xB7\xCD\x82", # GREEK SMALL LETTER ETA WITH PERISPOMENI
975							"\xE1\xBF\x87" => "\xCE\xB7\xCD\x82\xCE\xB9", # GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
976							"\xE1\xBF\x88" => "\xE1\xBD\xB2", # GREEK CAPITAL LETTER EPSILON WITH VARIA
977							"\xE1\xBF\x89" => "\xE1\xBD\xB3", # GREEK CAPITAL LETTER EPSILON WITH OXIA
978							"\xE1\xBF\x8A" => "\xE1\xBD\xB4", # GREEK CAPITAL LETTER ETA WITH VARIA
979							"\xE1\xBF\x8B" => "\xE1\xBD\xB5", # GREEK CAPITAL LETTER ETA WITH OXIA
980							"\xE1\xBF\x8C" => "\xCE\xB7\xCE\xB9", # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
981							"\xE1\xBF\x92" => "\xCE\xB9\xCC\x88\xCC\x80", # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
982							"\xE1\xBF\x93" => "\xCE\xB9\xCC\x88\xCC\x81", # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
983							"\xE1\xBF\x96" => "\xCE\xB9\xCD\x82", # GREEK SMALL LETTER IOTA WITH PERISPOMENI
984							"\xE1\xBF\x97" => "\xCE\xB9\xCC\x88\xCD\x82", # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
985							"\xE1\xBF\x98" => "\xE1\xBF\x90", # GREEK CAPITAL LETTER IOTA WITH VRACHY
986							"\xE1\xBF\x99" => "\xE1\xBF\x91", # GREEK CAPITAL LETTER IOTA WITH MACRON
987							"\xE1\xBF\x9A" => "\xE1\xBD\xB6", # GREEK CAPITAL LETTER IOTA WITH VARIA
988							"\xE1\xBF\x9B" => "\xE1\xBD\xB7", # GREEK CAPITAL LETTER IOTA WITH OXIA
989							"\xE1\xBF\xA2" => "\xCF\x85\xCC\x88\xCC\x80", # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
990							"\xE1\xBF\xA3" => "\xCF\x85\xCC\x88\xCC\x81", # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
991							"\xE1\xBF\xA4" => "\xCF\x81\xCC\x93", # GREEK SMALL LETTER RHO WITH PSILI
992							"\xE1\xBF\xA6" => "\xCF\x85\xCD\x82", # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
993							"\xE1\xBF\xA7" => "\xCF\x85\xCC\x88\xCD\x82", # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
994							"\xE1\xBF\xA8" => "\xE1\xBF\xA0", # GREEK CAPITAL LETTER UPSILON WITH VRACHY
995							"\xE1\xBF\xA9" => "\xE1\xBF\xA1", # GREEK CAPITAL LETTER UPSILON WITH MACRON
996							"\xE1\xBF\xAA" => "\xE1\xBD\xBA", # GREEK CAPITAL LETTER UPSILON WITH VARIA
997							"\xE1\xBF\xAB" => "\xE1\xBD\xBB", # GREEK CAPITAL LETTER UPSILON WITH OXIA
998							"\xE1\xBF\xAC" => "\xE1\xBF\xA5", # GREEK CAPITAL LETTER RHO WITH DASIA
999							"\xE1\xBF\xB2" => "\xE1\xBD\xBC\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
1000							"\xE1\xBF\xB3" => "\xCF\x89\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
1001							"\xE1\xBF\xB4" => "\xCF\x8E\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
1002							"\xE1\xBF\xB6" => "\xCF\x89\xCD\x82", # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
1003							"\xE1\xBF\xB7" => "\xCF\x89\xCD\x82\xCE\xB9", # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
1004							"\xE1\xBF\xB8" => "\xE1\xBD\xB8", # GREEK CAPITAL LETTER OMICRON WITH VARIA
1005							"\xE1\xBF\xB9" => "\xE1\xBD\xB9", # GREEK CAPITAL LETTER OMICRON WITH OXIA
1006							"\xE1\xBF\xBA" => "\xE1\xBD\xBC", # GREEK CAPITAL LETTER OMEGA WITH VARIA
1007							"\xE1\xBF\xBB" => "\xE1\xBD\xBD", # GREEK CAPITAL LETTER OMEGA WITH OXIA
1008							"\xE1\xBF\xBC" => "\xCF\x89\xCE\xB9", # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
1009							"\xE2\x84\xA6" => "\xCF\x89", # OHM SIGN
1010							"\xE2\x84\xAA" => "\x6B", # KELVIN SIGN
1011							"\xE2\x84\xAB" => "\xC3\xA5", # ANGSTROM SIGN
1012							"\xE2\x84\xB2" => "\xE2\x85\x8E", # TURNED CAPITAL F
1013							"\xE2\x85\xA0" => "\xE2\x85\xB0", # ROMAN NUMERAL ONE
1014							"\xE2\x85\xA1" => "\xE2\x85\xB1", # ROMAN NUMERAL TWO
1015							"\xE2\x85\xA2" => "\xE2\x85\xB2", # ROMAN NUMERAL THREE
1016							"\xE2\x85\xA3" => "\xE2\x85\xB3", # ROMAN NUMERAL FOUR
1017							"\xE2\x85\xA4" => "\xE2\x85\xB4", # ROMAN NUMERAL FIVE
1018							"\xE2\x85\xA5" => "\xE2\x85\xB5", # ROMAN NUMERAL SIX
1019							"\xE2\x85\xA6" => "\xE2\x85\xB6", # ROMAN NUMERAL SEVEN
1020							"\xE2\x85\xA7" => "\xE2\x85\xB7", # ROMAN NUMERAL EIGHT
1021							"\xE2\x85\xA8" => "\xE2\x85\xB8", # ROMAN NUMERAL NINE
1022							"\xE2\x85\xA9" => "\xE2\x85\xB9", # ROMAN NUMERAL TEN
1023							"\xE2\x85\xAA" => "\xE2\x85\xBA", # ROMAN NUMERAL ELEVEN
1024							"\xE2\x85\xAB" => "\xE2\x85\xBB", # ROMAN NUMERAL TWELVE
1025							"\xE2\x85\xAC" => "\xE2\x85\xBC", # ROMAN NUMERAL FIFTY
1026							"\xE2\x85\xAD" => "\xE2\x85\xBD", # ROMAN NUMERAL ONE HUNDRED
1027							"\xE2\x85\xAE" => "\xE2\x85\xBE", # ROMAN NUMERAL FIVE HUNDRED
1028							"\xE2\x85\xAF" => "\xE2\x85\xBF", # ROMAN NUMERAL ONE THOUSAND
1029							"\xE2\x86\x83" => "\xE2\x86\x84", # ROMAN NUMERAL REVERSED ONE HUNDRED
1030							"\xE2\x92\xB6" => "\xE2\x93\x90", # CIRCLED LATIN CAPITAL LETTER A
1031							"\xE2\x92\xB7" => "\xE2\x93\x91", # CIRCLED LATIN CAPITAL LETTER B
1032							"\xE2\x92\xB8" => "\xE2\x93\x92", # CIRCLED LATIN CAPITAL LETTER C
1033							"\xE2\x92\xB9" => "\xE2\x93\x93", # CIRCLED LATIN CAPITAL LETTER D
1034							"\xE2\x92\xBA" => "\xE2\x93\x94", # CIRCLED LATIN CAPITAL LETTER E
1035							"\xE2\x92\xBB" => "\xE2\x93\x95", # CIRCLED LATIN CAPITAL LETTER F
1036							"\xE2\x92\xBC" => "\xE2\x93\x96", # CIRCLED LATIN CAPITAL LETTER G
1037							"\xE2\x92\xBD" => "\xE2\x93\x97", # CIRCLED LATIN CAPITAL LETTER H
1038							"\xE2\x92\xBE" => "\xE2\x93\x98", # CIRCLED LATIN CAPITAL LETTER I
1039							"\xE2\x92\xBF" => "\xE2\x93\x99", # CIRCLED LATIN CAPITAL LETTER J
1040							"\xE2\x93\x80" => "\xE2\x93\x9A", # CIRCLED LATIN CAPITAL LETTER K
1041							"\xE2\x93\x81" => "\xE2\x93\x9B", # CIRCLED LATIN CAPITAL LETTER L
1042							"\xE2\x93\x82" => "\xE2\x93\x9C", # CIRCLED LATIN CAPITAL LETTER M
1043							"\xE2\x93\x83" => "\xE2\x93\x9D", # CIRCLED LATIN CAPITAL LETTER N
1044							"\xE2\x93\x84" => "\xE2\x93\x9E", # CIRCLED LATIN CAPITAL LETTER O
1045							"\xE2\x93\x85" => "\xE2\x93\x9F", # CIRCLED LATIN CAPITAL LETTER P
1046							"\xE2\x93\x86" => "\xE2\x93\xA0", # CIRCLED LATIN CAPITAL LETTER Q
1047							"\xE2\x93\x87" => "\xE2\x93\xA1", # CIRCLED LATIN CAPITAL LETTER R
1048							"\xE2\x93\x88" => "\xE2\x93\xA2", # CIRCLED LATIN CAPITAL LETTER S
1049							"\xE2\x93\x89" => "\xE2\x93\xA3", # CIRCLED LATIN CAPITAL LETTER T
1050							"\xE2\x93\x8A" => "\xE2\x93\xA4", # CIRCLED LATIN CAPITAL LETTER U
1051							"\xE2\x93\x8B" => "\xE2\x93\xA5", # CIRCLED LATIN CAPITAL LETTER V
1052							"\xE2\x93\x8C" => "\xE2\x93\xA6", # CIRCLED LATIN CAPITAL LETTER W
1053							"\xE2\x93\x8D" => "\xE2\x93\xA7", # CIRCLED LATIN CAPITAL LETTER X
1054							"\xE2\x93\x8E" => "\xE2\x93\xA8", # CIRCLED LATIN CAPITAL LETTER Y
1055							"\xE2\x93\x8F" => "\xE2\x93\xA9", # CIRCLED LATIN CAPITAL LETTER Z
1056							"\xE2\xB0\x80" => "\xE2\xB0\xB0", # GLAGOLITIC CAPITAL LETTER AZU
1057							"\xE2\xB0\x81" => "\xE2\xB0\xB1", # GLAGOLITIC CAPITAL LETTER BUKY
1058							"\xE2\xB0\x82" => "\xE2\xB0\xB2", # GLAGOLITIC CAPITAL LETTER VEDE
1059							"\xE2\xB0\x83" => "\xE2\xB0\xB3", # GLAGOLITIC CAPITAL LETTER GLAGOLI
1060							"\xE2\xB0\x84" => "\xE2\xB0\xB4", # GLAGOLITIC CAPITAL LETTER DOBRO
1061							"\xE2\xB0\x85" => "\xE2\xB0\xB5", # GLAGOLITIC CAPITAL LETTER YESTU
1062							"\xE2\xB0\x86" => "\xE2\xB0\xB6", # GLAGOLITIC CAPITAL LETTER ZHIVETE
1063							"\xE2\xB0\x87" => "\xE2\xB0\xB7", # GLAGOLITIC CAPITAL LETTER DZELO
1064							"\xE2\xB0\x88" => "\xE2\xB0\xB8", # GLAGOLITIC CAPITAL LETTER ZEMLJA
1065							"\xE2\xB0\x89" => "\xE2\xB0\xB9", # GLAGOLITIC CAPITAL LETTER IZHE
1066							"\xE2\xB0\x8A" => "\xE2\xB0\xBA", # GLAGOLITIC CAPITAL LETTER INITIAL IZHE
1067							"\xE2\xB0\x8B" => "\xE2\xB0\xBB", # GLAGOLITIC CAPITAL LETTER I
1068							"\xE2\xB0\x8C" => "\xE2\xB0\xBC", # GLAGOLITIC CAPITAL LETTER DJERVI
1069							"\xE2\xB0\x8D" => "\xE2\xB0\xBD", # GLAGOLITIC CAPITAL LETTER KAKO
1070							"\xE2\xB0\x8E" => "\xE2\xB0\xBE", # GLAGOLITIC CAPITAL LETTER LJUDIJE
1071							"\xE2\xB0\x8F" => "\xE2\xB0\xBF", # GLAGOLITIC CAPITAL LETTER MYSLITE
1072							"\xE2\xB0\x90" => "\xE2\xB1\x80", # GLAGOLITIC CAPITAL LETTER NASHI
1073							"\xE2\xB0\x91" => "\xE2\xB1\x81", # GLAGOLITIC CAPITAL LETTER ONU
1074							"\xE2\xB0\x92" => "\xE2\xB1\x82", # GLAGOLITIC CAPITAL LETTER POKOJI
1075							"\xE2\xB0\x93" => "\xE2\xB1\x83", # GLAGOLITIC CAPITAL LETTER RITSI
1076							"\xE2\xB0\x94" => "\xE2\xB1\x84", # GLAGOLITIC CAPITAL LETTER SLOVO
1077							"\xE2\xB0\x95" => "\xE2\xB1\x85", # GLAGOLITIC CAPITAL LETTER TVRIDO
1078							"\xE2\xB0\x96" => "\xE2\xB1\x86", # GLAGOLITIC CAPITAL LETTER UKU
1079							"\xE2\xB0\x97" => "\xE2\xB1\x87", # GLAGOLITIC CAPITAL LETTER FRITU
1080							"\xE2\xB0\x98" => "\xE2\xB1\x88", # GLAGOLITIC CAPITAL LETTER HERU
1081							"\xE2\xB0\x99" => "\xE2\xB1\x89", # GLAGOLITIC CAPITAL LETTER OTU
1082							"\xE2\xB0\x9A" => "\xE2\xB1\x8A", # GLAGOLITIC CAPITAL LETTER PE
1083							"\xE2\xB0\x9B" => "\xE2\xB1\x8B", # GLAGOLITIC CAPITAL LETTER SHTA
1084							"\xE2\xB0\x9C" => "\xE2\xB1\x8C", # GLAGOLITIC CAPITAL LETTER TSI
1085							"\xE2\xB0\x9D" => "\xE2\xB1\x8D", # GLAGOLITIC CAPITAL LETTER CHRIVI
1086							"\xE2\xB0\x9E" => "\xE2\xB1\x8E", # GLAGOLITIC CAPITAL LETTER SHA
1087							"\xE2\xB0\x9F" => "\xE2\xB1\x8F", # GLAGOLITIC CAPITAL LETTER YERU
1088							"\xE2\xB0\xA0" => "\xE2\xB1\x90", # GLAGOLITIC CAPITAL LETTER YERI
1089							"\xE2\xB0\xA1" => "\xE2\xB1\x91", # GLAGOLITIC CAPITAL LETTER YATI
1090							"\xE2\xB0\xA2" => "\xE2\xB1\x92", # GLAGOLITIC CAPITAL LETTER SPIDERY HA
1091							"\xE2\xB0\xA3" => "\xE2\xB1\x93", # GLAGOLITIC CAPITAL LETTER YU
1092							"\xE2\xB0\xA4" => "\xE2\xB1\x94", # GLAGOLITIC CAPITAL LETTER SMALL YUS
1093							"\xE2\xB0\xA5" => "\xE2\xB1\x95", # GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL
1094							"\xE2\xB0\xA6" => "\xE2\xB1\x96", # GLAGOLITIC CAPITAL LETTER YO
1095							"\xE2\xB0\xA7" => "\xE2\xB1\x97", # GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS
1096							"\xE2\xB0\xA8" => "\xE2\xB1\x98", # GLAGOLITIC CAPITAL LETTER BIG YUS
1097							"\xE2\xB0\xA9" => "\xE2\xB1\x99", # GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS
1098							"\xE2\xB0\xAA" => "\xE2\xB1\x9A", # GLAGOLITIC CAPITAL LETTER FITA
1099							"\xE2\xB0\xAB" => "\xE2\xB1\x9B", # GLAGOLITIC CAPITAL LETTER IZHITSA
1100							"\xE2\xB0\xAC" => "\xE2\xB1\x9C", # GLAGOLITIC CAPITAL LETTER SHTAPIC
1101							"\xE2\xB0\xAD" => "\xE2\xB1\x9D", # GLAGOLITIC CAPITAL LETTER TROKUTASTI A
1102							"\xE2\xB0\xAE" => "\xE2\xB1\x9E", # GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
1103							"\xE2\xB1\xA0" => "\xE2\xB1\xA1", # LATIN CAPITAL LETTER L WITH DOUBLE BAR
1104							"\xE2\xB1\xA2" => "\xC9\xAB", # LATIN CAPITAL LETTER L WITH MIDDLE TILDE
1105							"\xE2\xB1\xA3" => "\xE1\xB5\xBD", # LATIN CAPITAL LETTER P WITH STROKE
1106							"\xE2\xB1\xA4" => "\xC9\xBD", # LATIN CAPITAL LETTER R WITH TAIL
1107							"\xE2\xB1\xA7" => "\xE2\xB1\xA8", # LATIN CAPITAL LETTER H WITH DESCENDER
1108							"\xE2\xB1\xA9" => "\xE2\xB1\xAA", # LATIN CAPITAL LETTER K WITH DESCENDER
1109							"\xE2\xB1\xAB" => "\xE2\xB1\xAC", # LATIN CAPITAL LETTER Z WITH DESCENDER
1110							"\xE2\xB1\xAD" => "\xC9\x91", # LATIN CAPITAL LETTER ALPHA
1111							"\xE2\xB1\xAE" => "\xC9\xB1", # LATIN CAPITAL LETTER M WITH HOOK
1112							"\xE2\xB1\xAF" => "\xC9\x90", # LATIN CAPITAL LETTER TURNED A
1113							"\xE2\xB1\xB0" => "\xC9\x92", # LATIN CAPITAL LETTER TURNED ALPHA
1114							"\xE2\xB1\xB2" => "\xE2\xB1\xB3", # LATIN CAPITAL LETTER W WITH HOOK
1115							"\xE2\xB1\xB5" => "\xE2\xB1\xB6", # LATIN CAPITAL LETTER HALF H
1116							"\xE2\xB1\xBE" => "\xC8\xBF", # LATIN CAPITAL LETTER S WITH SWASH TAIL
1117							"\xE2\xB1\xBF" => "\xC9\x80", # LATIN CAPITAL LETTER Z WITH SWASH TAIL
1118							"\xE2\xB2\x80" => "\xE2\xB2\x81", # COPTIC CAPITAL LETTER ALFA
1119							"\xE2\xB2\x82" => "\xE2\xB2\x83", # COPTIC CAPITAL LETTER VIDA
1120							"\xE2\xB2\x84" => "\xE2\xB2\x85", # COPTIC CAPITAL LETTER GAMMA
1121							"\xE2\xB2\x86" => "\xE2\xB2\x87", # COPTIC CAPITAL LETTER DALDA
1122							"\xE2\xB2\x88" => "\xE2\xB2\x89", # COPTIC CAPITAL LETTER EIE
1123							"\xE2\xB2\x8A" => "\xE2\xB2\x8B", # COPTIC CAPITAL LETTER SOU
1124							"\xE2\xB2\x8C" => "\xE2\xB2\x8D", # COPTIC CAPITAL LETTER ZATA
1125							"\xE2\xB2\x8E" => "\xE2\xB2\x8F", # COPTIC CAPITAL LETTER HATE
1126							"\xE2\xB2\x90" => "\xE2\xB2\x91", # COPTIC CAPITAL LETTER THETHE
1127							"\xE2\xB2\x92" => "\xE2\xB2\x93", # COPTIC CAPITAL LETTER IAUDA
1128							"\xE2\xB2\x94" => "\xE2\xB2\x95", # COPTIC CAPITAL LETTER KAPA
1129							"\xE2\xB2\x96" => "\xE2\xB2\x97", # COPTIC CAPITAL LETTER LAULA
1130							"\xE2\xB2\x98" => "\xE2\xB2\x99", # COPTIC CAPITAL LETTER MI
1131							"\xE2\xB2\x9A" => "\xE2\xB2\x9B", # COPTIC CAPITAL LETTER NI
1132							"\xE2\xB2\x9C" => "\xE2\xB2\x9D", # COPTIC CAPITAL LETTER KSI
1133							"\xE2\xB2\x9E" => "\xE2\xB2\x9F", # COPTIC CAPITAL LETTER O
1134							"\xE2\xB2\xA0" => "\xE2\xB2\xA1", # COPTIC CAPITAL LETTER PI
1135							"\xE2\xB2\xA2" => "\xE2\xB2\xA3", # COPTIC CAPITAL LETTER RO
1136							"\xE2\xB2\xA4" => "\xE2\xB2\xA5", # COPTIC CAPITAL LETTER SIMA
1137							"\xE2\xB2\xA6" => "\xE2\xB2\xA7", # COPTIC CAPITAL LETTER TAU
1138							"\xE2\xB2\xA8" => "\xE2\xB2\xA9", # COPTIC CAPITAL LETTER UA
1139							"\xE2\xB2\xAA" => "\xE2\xB2\xAB", # COPTIC CAPITAL LETTER FI
1140							"\xE2\xB2\xAC" => "\xE2\xB2\xAD", # COPTIC CAPITAL LETTER KHI
1141							"\xE2\xB2\xAE" => "\xE2\xB2\xAF", # COPTIC CAPITAL LETTER PSI
1142							"\xE2\xB2\xB0" => "\xE2\xB2\xB1", # COPTIC CAPITAL LETTER OOU
1143							"\xE2\xB2\xB2" => "\xE2\xB2\xB3", # COPTIC CAPITAL LETTER DIALECT-P ALEF
1144							"\xE2\xB2\xB4" => "\xE2\xB2\xB5", # COPTIC CAPITAL LETTER OLD COPTIC AIN
1145							"\xE2\xB2\xB6" => "\xE2\xB2\xB7", # COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE
1146							"\xE2\xB2\xB8" => "\xE2\xB2\xB9", # COPTIC CAPITAL LETTER DIALECT-P KAPA
1147							"\xE2\xB2\xBA" => "\xE2\xB2\xBB", # COPTIC CAPITAL LETTER DIALECT-P NI
1148							"\xE2\xB2\xBC" => "\xE2\xB2\xBD", # COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI
1149							"\xE2\xB2\xBE" => "\xE2\xB2\xBF", # COPTIC CAPITAL LETTER OLD COPTIC OOU
1150							"\xE2\xB3\x80" => "\xE2\xB3\x81", # COPTIC CAPITAL LETTER SAMPI
1151							"\xE2\xB3\x82" => "\xE2\xB3\x83", # COPTIC CAPITAL LETTER CROSSED SHEI
1152							"\xE2\xB3\x84" => "\xE2\xB3\x85", # COPTIC CAPITAL LETTER OLD COPTIC SHEI
1153							"\xE2\xB3\x86" => "\xE2\xB3\x87", # COPTIC CAPITAL LETTER OLD COPTIC ESH
1154							"\xE2\xB3\x88" => "\xE2\xB3\x89", # COPTIC CAPITAL LETTER AKHMIMIC KHEI
1155							"\xE2\xB3\x8A" => "\xE2\xB3\x8B", # COPTIC CAPITAL LETTER DIALECT-P HORI
1156							"\xE2\xB3\x8C" => "\xE2\xB3\x8D", # COPTIC CAPITAL LETTER OLD COPTIC HORI
1157							"\xE2\xB3\x8E" => "\xE2\xB3\x8F", # COPTIC CAPITAL LETTER OLD COPTIC HA
1158							"\xE2\xB3\x90" => "\xE2\xB3\x91", # COPTIC CAPITAL LETTER L-SHAPED HA
1159							"\xE2\xB3\x92" => "\xE2\xB3\x93", # COPTIC CAPITAL LETTER OLD COPTIC HEI
1160							"\xE2\xB3\x94" => "\xE2\xB3\x95", # COPTIC CAPITAL LETTER OLD COPTIC HAT
1161							"\xE2\xB3\x96" => "\xE2\xB3\x97", # COPTIC CAPITAL LETTER OLD COPTIC GANGIA
1162							"\xE2\xB3\x98" => "\xE2\xB3\x99", # COPTIC CAPITAL LETTER OLD COPTIC DJA
1163							"\xE2\xB3\x9A" => "\xE2\xB3\x9B", # COPTIC CAPITAL LETTER OLD COPTIC SHIMA
1164							"\xE2\xB3\x9C" => "\xE2\xB3\x9D", # COPTIC CAPITAL LETTER OLD NUBIAN SHIMA
1165							"\xE2\xB3\x9E" => "\xE2\xB3\x9F", # COPTIC CAPITAL LETTER OLD NUBIAN NGI
1166							"\xE2\xB3\xA0" => "\xE2\xB3\xA1", # COPTIC CAPITAL LETTER OLD NUBIAN NYI
1167							"\xE2\xB3\xA2" => "\xE2\xB3\xA3", # COPTIC CAPITAL LETTER OLD NUBIAN WAU
1168							"\xE2\xB3\xAB" => "\xE2\xB3\xAC", # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI
1169							"\xE2\xB3\xAD" => "\xE2\xB3\xAE", # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA
1170							"\xE2\xB3\xB2" => "\xE2\xB3\xB3", # COPTIC CAPITAL LETTER BOHAIRIC KHEI
1171							"\xEA\x99\x80" => "\xEA\x99\x81", # CYRILLIC CAPITAL LETTER ZEMLYA
1172							"\xEA\x99\x82" => "\xEA\x99\x83", # CYRILLIC CAPITAL LETTER DZELO
1173							"\xEA\x99\x84" => "\xEA\x99\x85", # CYRILLIC CAPITAL LETTER REVERSED DZE
1174							"\xEA\x99\x86" => "\xEA\x99\x87", # CYRILLIC CAPITAL LETTER IOTA
1175							"\xEA\x99\x88" => "\xEA\x99\x89", # CYRILLIC CAPITAL LETTER DJERV
1176							"\xEA\x99\x8A" => "\xEA\x99\x8B", # CYRILLIC CAPITAL LETTER MONOGRAPH UK
1177							"\xEA\x99\x8C" => "\xEA\x99\x8D", # CYRILLIC CAPITAL LETTER BROAD OMEGA
1178							"\xEA\x99\x8E" => "\xEA\x99\x8F", # CYRILLIC CAPITAL LETTER NEUTRAL YER
1179							"\xEA\x99\x90" => "\xEA\x99\x91", # CYRILLIC CAPITAL LETTER YERU WITH BACK YER
1180							"\xEA\x99\x92" => "\xEA\x99\x93", # CYRILLIC CAPITAL LETTER IOTIFIED YAT
1181							"\xEA\x99\x94" => "\xEA\x99\x95", # CYRILLIC CAPITAL LETTER REVERSED YU
1182							"\xEA\x99\x96" => "\xEA\x99\x97", # CYRILLIC CAPITAL LETTER IOTIFIED A
1183							"\xEA\x99\x98" => "\xEA\x99\x99", # CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS
1184							"\xEA\x99\x9A" => "\xEA\x99\x9B", # CYRILLIC CAPITAL LETTER BLENDED YUS
1185							"\xEA\x99\x9C" => "\xEA\x99\x9D", # CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS
1186							"\xEA\x99\x9E" => "\xEA\x99\x9F", # CYRILLIC CAPITAL LETTER YN
1187							"\xEA\x99\xA0" => "\xEA\x99\xA1", # CYRILLIC CAPITAL LETTER REVERSED TSE
1188							"\xEA\x99\xA2" => "\xEA\x99\xA3", # CYRILLIC CAPITAL LETTER SOFT DE
1189							"\xEA\x99\xA4" => "\xEA\x99\xA5", # CYRILLIC CAPITAL LETTER SOFT EL
1190							"\xEA\x99\xA6" => "\xEA\x99\xA7", # CYRILLIC CAPITAL LETTER SOFT EM
1191							"\xEA\x99\xA8" => "\xEA\x99\xA9", # CYRILLIC CAPITAL LETTER MONOCULAR O
1192							"\xEA\x99\xAA" => "\xEA\x99\xAB", # CYRILLIC CAPITAL LETTER BINOCULAR O
1193							"\xEA\x99\xAC" => "\xEA\x99\xAD", # CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O
1194							"\xEA\x9A\x80" => "\xEA\x9A\x81", # CYRILLIC CAPITAL LETTER DWE
1195							"\xEA\x9A\x82" => "\xEA\x9A\x83", # CYRILLIC CAPITAL LETTER DZWE
1196							"\xEA\x9A\x84" => "\xEA\x9A\x85", # CYRILLIC CAPITAL LETTER ZHWE
1197							"\xEA\x9A\x86" => "\xEA\x9A\x87", # CYRILLIC CAPITAL LETTER CCHE
1198							"\xEA\x9A\x88" => "\xEA\x9A\x89", # CYRILLIC CAPITAL LETTER DZZE
1199							"\xEA\x9A\x8A" => "\xEA\x9A\x8B", # CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK
1200							"\xEA\x9A\x8C" => "\xEA\x9A\x8D", # CYRILLIC CAPITAL LETTER TWE
1201							"\xEA\x9A\x8E" => "\xEA\x9A\x8F", # CYRILLIC CAPITAL LETTER TSWE
1202							"\xEA\x9A\x90" => "\xEA\x9A\x91", # CYRILLIC CAPITAL LETTER TSSE
1203							"\xEA\x9A\x92" => "\xEA\x9A\x93", # CYRILLIC CAPITAL LETTER TCHE
1204							"\xEA\x9A\x94" => "\xEA\x9A\x95", # CYRILLIC CAPITAL LETTER HWE
1205							"\xEA\x9A\x96" => "\xEA\x9A\x97", # CYRILLIC CAPITAL LETTER SHWE
1206							"\xEA\x9A\x98" => "\xEA\x9A\x99", # CYRILLIC CAPITAL LETTER DOUBLE O
1207							"\xEA\x9A\x9A" => "\xEA\x9A\x9B", # CYRILLIC CAPITAL LETTER CROSSED O
1208							"\xEA\x9C\xA2" => "\xEA\x9C\xA3", # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF
1209							"\xEA\x9C\xA4" => "\xEA\x9C\xA5", # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN
1210							"\xEA\x9C\xA6" => "\xEA\x9C\xA7", # LATIN CAPITAL LETTER HENG
1211							"\xEA\x9C\xA8" => "\xEA\x9C\xA9", # LATIN CAPITAL LETTER TZ
1212							"\xEA\x9C\xAA" => "\xEA\x9C\xAB", # LATIN CAPITAL LETTER TRESILLO
1213							"\xEA\x9C\xAC" => "\xEA\x9C\xAD", # LATIN CAPITAL LETTER CUATRILLO
1214							"\xEA\x9C\xAE" => "\xEA\x9C\xAF", # LATIN CAPITAL LETTER CUATRILLO WITH COMMA
1215							"\xEA\x9C\xB2" => "\xEA\x9C\xB3", # LATIN CAPITAL LETTER AA
1216							"\xEA\x9C\xB4" => "\xEA\x9C\xB5", # LATIN CAPITAL LETTER AO
1217							"\xEA\x9C\xB6" => "\xEA\x9C\xB7", # LATIN CAPITAL LETTER AU
1218							"\xEA\x9C\xB8" => "\xEA\x9C\xB9", # LATIN CAPITAL LETTER AV
1219							"\xEA\x9C\xBA" => "\xEA\x9C\xBB", # LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR
1220							"\xEA\x9C\xBC" => "\xEA\x9C\xBD", # LATIN CAPITAL LETTER AY
1221							"\xEA\x9C\xBE" => "\xEA\x9C\xBF", # LATIN CAPITAL LETTER REVERSED C WITH DOT
1222							"\xEA\x9D\x80" => "\xEA\x9D\x81", # LATIN CAPITAL LETTER K WITH STROKE
1223							"\xEA\x9D\x82" => "\xEA\x9D\x83", # LATIN CAPITAL LETTER K WITH DIAGONAL STROKE
1224							"\xEA\x9D\x84" => "\xEA\x9D\x85", # LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE
1225							"\xEA\x9D\x86" => "\xEA\x9D\x87", # LATIN CAPITAL LETTER BROKEN L
1226							"\xEA\x9D\x88" => "\xEA\x9D\x89", # LATIN CAPITAL LETTER L WITH HIGH STROKE
1227							"\xEA\x9D\x8A" => "\xEA\x9D\x8B", # LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY
1228							"\xEA\x9D\x8C" => "\xEA\x9D\x8D", # LATIN CAPITAL LETTER O WITH LOOP
1229							"\xEA\x9D\x8E" => "\xEA\x9D\x8F", # LATIN CAPITAL LETTER OO
1230							"\xEA\x9D\x90" => "\xEA\x9D\x91", # LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER
1231							"\xEA\x9D\x92" => "\xEA\x9D\x93", # LATIN CAPITAL LETTER P WITH FLOURISH
1232							"\xEA\x9D\x94" => "\xEA\x9D\x95", # LATIN CAPITAL LETTER P WITH SQUIRREL TAIL
1233							"\xEA\x9D\x96" => "\xEA\x9D\x97", # LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER
1234							"\xEA\x9D\x98" => "\xEA\x9D\x99", # LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE
1235							"\xEA\x9D\x9A" => "\xEA\x9D\x9B", # LATIN CAPITAL LETTER R ROTUNDA
1236							"\xEA\x9D\x9C" => "\xEA\x9D\x9D", # LATIN CAPITAL LETTER RUM ROTUNDA
1237							"\xEA\x9D\x9E" => "\xEA\x9D\x9F", # LATIN CAPITAL LETTER V WITH DIAGONAL STROKE
1238							"\xEA\x9D\xA0" => "\xEA\x9D\xA1", # LATIN CAPITAL LETTER VY
1239							"\xEA\x9D\xA2" => "\xEA\x9D\xA3", # LATIN CAPITAL LETTER VISIGOTHIC Z
1240							"\xEA\x9D\xA4" => "\xEA\x9D\xA5", # LATIN CAPITAL LETTER THORN WITH STROKE
1241							"\xEA\x9D\xA6" => "\xEA\x9D\xA7", # LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER
1242							"\xEA\x9D\xA8" => "\xEA\x9D\xA9", # LATIN CAPITAL LETTER VEND
1243							"\xEA\x9D\xAA" => "\xEA\x9D\xAB", # LATIN CAPITAL LETTER ET
1244							"\xEA\x9D\xAC" => "\xEA\x9D\xAD", # LATIN CAPITAL LETTER IS
1245							"\xEA\x9D\xAE" => "\xEA\x9D\xAF", # LATIN CAPITAL LETTER CON
1246							"\xEA\x9D\xB9" => "\xEA\x9D\xBA", # LATIN CAPITAL LETTER INSULAR D
1247							"\xEA\x9D\xBB" => "\xEA\x9D\xBC", # LATIN CAPITAL LETTER INSULAR F
1248							"\xEA\x9D\xBD" => "\xE1\xB5\xB9", # LATIN CAPITAL LETTER INSULAR G
1249							"\xEA\x9D\xBE" => "\xEA\x9D\xBF", # LATIN CAPITAL LETTER TURNED INSULAR G
1250							"\xEA\x9E\x80" => "\xEA\x9E\x81", # LATIN CAPITAL LETTER TURNED L
1251							"\xEA\x9E\x82" => "\xEA\x9E\x83", # LATIN CAPITAL LETTER INSULAR R
1252							"\xEA\x9E\x84" => "\xEA\x9E\x85", # LATIN CAPITAL LETTER INSULAR S
1253							"\xEA\x9E\x86" => "\xEA\x9E\x87", # LATIN CAPITAL LETTER INSULAR T
1254							"\xEA\x9E\x8B" => "\xEA\x9E\x8C", # LATIN CAPITAL LETTER SALTILLO
1255							"\xEA\x9E\x8D" => "\xC9\xA5", # LATIN CAPITAL LETTER TURNED H
1256							"\xEA\x9E\x90" => "\xEA\x9E\x91", # LATIN CAPITAL LETTER N WITH DESCENDER
1257							"\xEA\x9E\x92" => "\xEA\x9E\x93", # LATIN CAPITAL LETTER C WITH BAR
1258							"\xEA\x9E\x96" => "\xEA\x9E\x97", # LATIN CAPITAL LETTER B WITH FLOURISH
1259							"\xEA\x9E\x98" => "\xEA\x9E\x99", # LATIN CAPITAL LETTER F WITH STROKE
1260							"\xEA\x9E\x9A" => "\xEA\x9E\x9B", # LATIN CAPITAL LETTER VOLAPUK AE
1261							"\xEA\x9E\x9C" => "\xEA\x9E\x9D", # LATIN CAPITAL LETTER VOLAPUK OE
1262							"\xEA\x9E\x9E" => "\xEA\x9E\x9F", # LATIN CAPITAL LETTER VOLAPUK UE
1263							"\xEA\x9E\xA0" => "\xEA\x9E\xA1", # LATIN CAPITAL LETTER G WITH OBLIQUE STROKE
1264							"\xEA\x9E\xA2" => "\xEA\x9E\xA3", # LATIN CAPITAL LETTER K WITH OBLIQUE STROKE
1265							"\xEA\x9E\xA4" => "\xEA\x9E\xA5", # LATIN CAPITAL LETTER N WITH OBLIQUE STROKE
1266							"\xEA\x9E\xA6" => "\xEA\x9E\xA7", # LATIN CAPITAL LETTER R WITH OBLIQUE STROKE
1267							"\xEA\x9E\xA8" => "\xEA\x9E\xA9", # LATIN CAPITAL LETTER S WITH OBLIQUE STROKE
1268							"\xEA\x9E\xAA" => "\xC9\xA6", # LATIN CAPITAL LETTER H WITH HOOK
1269							"\xEA\x9E\xAB" => "\xC9\x9C", # LATIN CAPITAL LETTER REVERSED OPEN E
1270							"\xEA\x9E\xAC" => "\xC9\xA1", # LATIN CAPITAL LETTER SCRIPT G
1271							"\xEA\x9E\xAD" => "\xC9\xAC", # LATIN CAPITAL LETTER L WITH BELT
1272							"\xEA\x9E\xB0" => "\xCA\x9E", # LATIN CAPITAL LETTER TURNED K
1273							"\xEA\x9E\xB1" => "\xCA\x87", # LATIN CAPITAL LETTER TURNED T
1274							"\xEF\xAC\x80" => "\x66\x66", # LATIN SMALL LIGATURE FF
1275							"\xEF\xAC\x81" => "\x66\x69", # LATIN SMALL LIGATURE FI
1276							"\xEF\xAC\x82" => "\x66\x6C", # LATIN SMALL LIGATURE FL
1277							"\xEF\xAC\x83" => "\x66\x66\x69", # LATIN SMALL LIGATURE FFI
1278							"\xEF\xAC\x84" => "\x66\x66\x6C", # LATIN SMALL LIGATURE FFL
1279							"\xEF\xAC\x85" => "\x73\x74", # LATIN SMALL LIGATURE LONG S T
1280							"\xEF\xAC\x86" => "\x73\x74", # LATIN SMALL LIGATURE ST
1281							"\xEF\xAC\x93" => "\xD5\xB4\xD5\xB6", # ARMENIAN SMALL LIGATURE MEN NOW
1282							"\xEF\xAC\x94" => "\xD5\xB4\xD5\xA5", # ARMENIAN SMALL LIGATURE MEN ECH
1283							"\xEF\xAC\x95" => "\xD5\xB4\xD5\xAB", # ARMENIAN SMALL LIGATURE MEN INI
1284							"\xEF\xAC\x96" => "\xD5\xBE\xD5\xB6", # ARMENIAN SMALL LIGATURE VEW NOW
1285							"\xEF\xAC\x97" => "\xD5\xB4\xD5\xAD", # ARMENIAN SMALL LIGATURE MEN XEH
1286							"\xEF\xBC\xA1" => "\xEF\xBD\x81", # FULLWIDTH LATIN CAPITAL LETTER A
1287							"\xEF\xBC\xA2" => "\xEF\xBD\x82", # FULLWIDTH LATIN CAPITAL LETTER B
1288							"\xEF\xBC\xA3" => "\xEF\xBD\x83", # FULLWIDTH LATIN CAPITAL LETTER C
1289							"\xEF\xBC\xA4" => "\xEF\xBD\x84", # FULLWIDTH LATIN CAPITAL LETTER D
1290							"\xEF\xBC\xA5" => "\xEF\xBD\x85", # FULLWIDTH LATIN CAPITAL LETTER E
1291							"\xEF\xBC\xA6" => "\xEF\xBD\x86", # FULLWIDTH LATIN CAPITAL LETTER F
1292							"\xEF\xBC\xA7" => "\xEF\xBD\x87", # FULLWIDTH LATIN CAPITAL LETTER G
1293							"\xEF\xBC\xA8" => "\xEF\xBD\x88", # FULLWIDTH LATIN CAPITAL LETTER H
1294							"\xEF\xBC\xA9" => "\xEF\xBD\x89", # FULLWIDTH LATIN CAPITAL LETTER I
1295							"\xEF\xBC\xAA" => "\xEF\xBD\x8A", # FULLWIDTH LATIN CAPITAL LETTER J
1296							"\xEF\xBC\xAB" => "\xEF\xBD\x8B", # FULLWIDTH LATIN CAPITAL LETTER K
1297							"\xEF\xBC\xAC" => "\xEF\xBD\x8C", # FULLWIDTH LATIN CAPITAL LETTER L
1298							"\xEF\xBC\xAD" => "\xEF\xBD\x8D", # FULLWIDTH LATIN CAPITAL LETTER M
1299							"\xEF\xBC\xAE" => "\xEF\xBD\x8E", # FULLWIDTH LATIN CAPITAL LETTER N
1300							"\xEF\xBC\xAF" => "\xEF\xBD\x8F", # FULLWIDTH LATIN CAPITAL LETTER O
1301							"\xEF\xBC\xB0" => "\xEF\xBD\x90", # FULLWIDTH LATIN CAPITAL LETTER P
1302							"\xEF\xBC\xB1" => "\xEF\xBD\x91", # FULLWIDTH LATIN CAPITAL LETTER Q
1303							"\xEF\xBC\xB2" => "\xEF\xBD\x92", # FULLWIDTH LATIN CAPITAL LETTER R
1304							"\xEF\xBC\xB3" => "\xEF\xBD\x93", # FULLWIDTH LATIN CAPITAL LETTER S
1305							"\xEF\xBC\xB4" => "\xEF\xBD\x94", # FULLWIDTH LATIN CAPITAL LETTER T
1306							"\xEF\xBC\xB5" => "\xEF\xBD\x95", # FULLWIDTH LATIN CAPITAL LETTER U
1307							"\xEF\xBC\xB6" => "\xEF\xBD\x96", # FULLWIDTH LATIN CAPITAL LETTER V
1308							"\xEF\xBC\xB7" => "\xEF\xBD\x97", # FULLWIDTH LATIN CAPITAL LETTER W
1309							"\xEF\xBC\xB8" => "\xEF\xBD\x98", # FULLWIDTH LATIN CAPITAL LETTER X
1310							"\xEF\xBC\xB9" => "\xEF\xBD\x99", # FULLWIDTH LATIN CAPITAL LETTER Y
1311							"\xEF\xBC\xBA" => "\xEF\xBD\x9A", # FULLWIDTH LATIN CAPITAL LETTER Z
1312							"\xF0\x90\x90\x80" => "\xF0\x90\x90\xA8", # DESERET CAPITAL LETTER LONG I
1313							"\xF0\x90\x90\x81" => "\xF0\x90\x90\xA9", # DESERET CAPITAL LETTER LONG E
1314							"\xF0\x90\x90\x82" => "\xF0\x90\x90\xAA", # DESERET CAPITAL LETTER LONG A
1315							"\xF0\x90\x90\x83" => "\xF0\x90\x90\xAB", # DESERET CAPITAL LETTER LONG AH
1316							"\xF0\x90\x90\x84" => "\xF0\x90\x90\xAC", # DESERET CAPITAL LETTER LONG O
1317							"\xF0\x90\x90\x85" => "\xF0\x90\x90\xAD", # DESERET CAPITAL LETTER LONG OO
1318							"\xF0\x90\x90\x86" => "\xF0\x90\x90\xAE", # DESERET CAPITAL LETTER SHORT I
1319							"\xF0\x90\x90\x87" => "\xF0\x90\x90\xAF", # DESERET CAPITAL LETTER SHORT E
1320							"\xF0\x90\x90\x88" => "\xF0\x90\x90\xB0", # DESERET CAPITAL LETTER SHORT A
1321							"\xF0\x90\x90\x89" => "\xF0\x90\x90\xB1", # DESERET CAPITAL LETTER SHORT AH
1322							"\xF0\x90\x90\x8A" => "\xF0\x90\x90\xB2", # DESERET CAPITAL LETTER SHORT O
1323							"\xF0\x90\x90\x8B" => "\xF0\x90\x90\xB3", # DESERET CAPITAL LETTER SHORT OO
1324							"\xF0\x90\x90\x8C" => "\xF0\x90\x90\xB4", # DESERET CAPITAL LETTER AY
1325							"\xF0\x90\x90\x8D" => "\xF0\x90\x90\xB5", # DESERET CAPITAL LETTER OW
1326							"\xF0\x90\x90\x8E" => "\xF0\x90\x90\xB6", # DESERET CAPITAL LETTER WU
1327							"\xF0\x90\x90\x8F" => "\xF0\x90\x90\xB7", # DESERET CAPITAL LETTER YEE
1328							"\xF0\x90\x90\x90" => "\xF0\x90\x90\xB8", # DESERET CAPITAL LETTER H
1329							"\xF0\x90\x90\x91" => "\xF0\x90\x90\xB9", # DESERET CAPITAL LETTER PEE
1330							"\xF0\x90\x90\x92" => "\xF0\x90\x90\xBA", # DESERET CAPITAL LETTER BEE
1331							"\xF0\x90\x90\x93" => "\xF0\x90\x90\xBB", # DESERET CAPITAL LETTER TEE
1332							"\xF0\x90\x90\x94" => "\xF0\x90\x90\xBC", # DESERET CAPITAL LETTER DEE
1333							"\xF0\x90\x90\x95" => "\xF0\x90\x90\xBD", # DESERET CAPITAL LETTER CHEE
1334							"\xF0\x90\x90\x96" => "\xF0\x90\x90\xBE", # DESERET CAPITAL LETTER JEE
1335							"\xF0\x90\x90\x97" => "\xF0\x90\x90\xBF", # DESERET CAPITAL LETTER KAY
1336							"\xF0\x90\x90\x98" => "\xF0\x90\x91\x80", # DESERET CAPITAL LETTER GAY
1337							"\xF0\x90\x90\x99" => "\xF0\x90\x91\x81", # DESERET CAPITAL LETTER EF
1338							"\xF0\x90\x90\x9A" => "\xF0\x90\x91\x82", # DESERET CAPITAL LETTER VEE
1339							"\xF0\x90\x90\x9B" => "\xF0\x90\x91\x83", # DESERET CAPITAL LETTER ETH
1340							"\xF0\x90\x90\x9C" => "\xF0\x90\x91\x84", # DESERET CAPITAL LETTER THEE
1341							"\xF0\x90\x90\x9D" => "\xF0\x90\x91\x85", # DESERET CAPITAL LETTER ES
1342							"\xF0\x90\x90\x9E" => "\xF0\x90\x91\x86", # DESERET CAPITAL LETTER ZEE
1343							"\xF0\x90\x90\x9F" => "\xF0\x90\x91\x87", # DESERET CAPITAL LETTER ESH
1344							"\xF0\x90\x90\xA0" => "\xF0\x90\x91\x88", # DESERET CAPITAL LETTER ZHEE
1345							"\xF0\x90\x90\xA1" => "\xF0\x90\x91\x89", # DESERET CAPITAL LETTER ER
1346							"\xF0\x90\x90\xA2" => "\xF0\x90\x91\x8A", # DESERET CAPITAL LETTER EL
1347							"\xF0\x90\x90\xA3" => "\xF0\x90\x91\x8B", # DESERET CAPITAL LETTER EM
1348							"\xF0\x90\x90\xA4" => "\xF0\x90\x91\x8C", # DESERET CAPITAL LETTER EN
1349							"\xF0\x90\x90\xA5" => "\xF0\x90\x91\x8D", # DESERET CAPITAL LETTER ENG
1350							"\xF0\x90\x90\xA6" => "\xF0\x90\x91\x8E", # DESERET CAPITAL LETTER OI
1351							"\xF0\x90\x90\xA7" => "\xF0\x90\x91\x8F", # DESERET CAPITAL LETTER EW
1352							"\xF0\x91\xA2\xA0" => "\xF0\x91\xA3\x80", # WARANG CITI CAPITAL LETTER NGAA
1353							"\xF0\x91\xA2\xA1" => "\xF0\x91\xA3\x81", # WARANG CITI CAPITAL LETTER A
1354							"\xF0\x91\xA2\xA2" => "\xF0\x91\xA3\x82", # WARANG CITI CAPITAL LETTER WI
1355							"\xF0\x91\xA2\xA3" => "\xF0\x91\xA3\x83", # WARANG CITI CAPITAL LETTER YU
1356							"\xF0\x91\xA2\xA4" => "\xF0\x91\xA3\x84", # WARANG CITI CAPITAL LETTER YA
1357							"\xF0\x91\xA2\xA5" => "\xF0\x91\xA3\x85", # WARANG CITI CAPITAL LETTER YO
1358							"\xF0\x91\xA2\xA6" => "\xF0\x91\xA3\x86", # WARANG CITI CAPITAL LETTER II
1359							"\xF0\x91\xA2\xA7" => "\xF0\x91\xA3\x87", # WARANG CITI CAPITAL LETTER UU
1360							"\xF0\x91\xA2\xA8" => "\xF0\x91\xA3\x88", # WARANG CITI CAPITAL LETTER E
1361							"\xF0\x91\xA2\xA9" => "\xF0\x91\xA3\x89", # WARANG CITI CAPITAL LETTER O
1362							"\xF0\x91\xA2\xAA" => "\xF0\x91\xA3\x8A", # WARANG CITI CAPITAL LETTER ANG
1363							"\xF0\x91\xA2\xAB" => "\xF0\x91\xA3\x8B", # WARANG CITI CAPITAL LETTER GA
1364							"\xF0\x91\xA2\xAC" => "\xF0\x91\xA3\x8C", # WARANG CITI CAPITAL LETTER KO
1365							"\xF0\x91\xA2\xAD" => "\xF0\x91\xA3\x8D", # WARANG CITI CAPITAL LETTER ENY
1366							"\xF0\x91\xA2\xAE" => "\xF0\x91\xA3\x8E", # WARANG CITI CAPITAL LETTER YUJ
1367							"\xF0\x91\xA2\xAF" => "\xF0\x91\xA3\x8F", # WARANG CITI CAPITAL LETTER UC
1368							"\xF0\x91\xA2\xB0" => "\xF0\x91\xA3\x90", # WARANG CITI CAPITAL LETTER ENN
1369							"\xF0\x91\xA2\xB1" => "\xF0\x91\xA3\x91", # WARANG CITI CAPITAL LETTER ODD
1370							"\xF0\x91\xA2\xB2" => "\xF0\x91\xA3\x92", # WARANG CITI CAPITAL LETTER TTE
1371							"\xF0\x91\xA2\xB3" => "\xF0\x91\xA3\x93", # WARANG CITI CAPITAL LETTER NUNG
1372							"\xF0\x91\xA2\xB4" => "\xF0\x91\xA3\x94", # WARANG CITI CAPITAL LETTER DA
1373							"\xF0\x91\xA2\xB5" => "\xF0\x91\xA3\x95", # WARANG CITI CAPITAL LETTER AT
1374							"\xF0\x91\xA2\xB6" => "\xF0\x91\xA3\x96", # WARANG CITI CAPITAL LETTER AM
1375							"\xF0\x91\xA2\xB7" => "\xF0\x91\xA3\x97", # WARANG CITI CAPITAL LETTER BU
1376							"\xF0\x91\xA2\xB8" => "\xF0\x91\xA3\x98", # WARANG CITI CAPITAL LETTER PU
1377							"\xF0\x91\xA2\xB9" => "\xF0\x91\xA3\x99", # WARANG CITI CAPITAL LETTER HIYO
1378							"\xF0\x91\xA2\xBA" => "\xF0\x91\xA3\x9A", # WARANG CITI CAPITAL LETTER HOLO
1379							"\xF0\x91\xA2\xBB" => "\xF0\x91\xA3\x9B", # WARANG CITI CAPITAL LETTER HORR
1380							"\xF0\x91\xA2\xBC" => "\xF0\x91\xA3\x9C", # WARANG CITI CAPITAL LETTER HAR
1381							"\xF0\x91\xA2\xBD" => "\xF0\x91\xA3\x9D", # WARANG CITI CAPITAL LETTER SSUU
1382							"\xF0\x91\xA2\xBE" => "\xF0\x91\xA3\x9E", # WARANG CITI CAPITAL LETTER SII
1383							"\xF0\x91\xA2\xBF" => "\xF0\x91\xA3\x9F", # WARANG CITI CAPITAL LETTER VIYO
1384							);
1385							}
1386
1387							else {
1388							croak "Don't know my package name '@{[__PACKAGE__]}'";
1389							}
1390
1391							#
1392							# @ARGV wildcard globbing
1393							#
1394							sub import {
1395
1396	0	0		0		0	if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
1397	0					0	my @argv = ();
1398	0					0	for (@ARGV) {
1399
1400							# has space
1401	0	0				0	if (/\A (?:$q_char)*? [ ] /oxms) {
		0
1402	0	0				0	if (my @glob = Eutf2::glob(qq{"$_"})) {
1403	0					0	push @argv, @glob;
1404							}
1405							else {
1406	0					0	push @argv, $_;
1407							}
1408							}
1409
1410							# has wildcard metachar
1411							elsif (/\A (?:$q_char)? [?] /oxms) {
1412	0	0				0	if (my @glob = Eutf2::glob($_)) {
1413	0					0	push @argv, @glob;
1414							}
1415							else {
1416	0					0	push @argv, $_;
1417							}
1418							}
1419
1420							# no wildcard globbing
1421							else {
1422	0					0	push @argv, $_;
1423							}
1424							}
1425	0					0	@ARGV = @argv;
1426							}
1427
1428	0					0	*Char::ord = \&UTF2::ord;
1429	0					0	*Char::ord_ = \&UTF2::ord_;
1430	0					0	*Char::reverse = \&UTF2::reverse;
1431	0					0	*Char::getc = \&UTF2::getc;
1432	0					0	*Char::length = \&UTF2::length;
1433	0					0	*Char::substr = \&UTF2::substr;
1434	0					0	*Char::index = \&UTF2::index;
1435	0					0	*Char::rindex = \&UTF2::rindex;
1436	0					0	*Char::eval = \&UTF2::eval;
1437	0					0	*Char::escape = \&UTF2::escape;
1438	0					0	*Char::escape_token = \&UTF2::escape_token;
1439	0					0	*Char::escape_script = \&UTF2::escape_script;
1440							}
1441
1442							# P.230 Care with Prototypes
1443							# in Chapter 6: Subroutines
1444							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
1445							#
1446							# If you aren't careful, you can get yourself into trouble with prototypes.
1447							# But if you are careful, you can do a lot of neat things with them. This is
1448							# all very powerful, of course, and should only be used in moderation to make
1449							# the world a better place.
1450
1451							# P.332 Care with Prototypes
1452							# in Chapter 7: Subroutines
1453							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1454							#
1455							# If you aren't careful, you can get yourself into trouble with prototypes.
1456							# But if you are careful, you can do a lot of neat things with them. This is
1457							# all very powerful, of course, and should only be used in moderation to make
1458							# the world a better place.
1459
1460							#
1461							# Prototypes of subroutines
1462							#
1463	0			0		0	sub unimport {}
1464							sub Eutf2::split(;$$$);
1465							sub Eutf2::tr($$$$;$);
1466							sub Eutf2::chop(@);
1467							sub Eutf2::index($$;$);
1468							sub Eutf2::rindex($$;$);
1469							sub Eutf2::lcfirst(@);
1470							sub Eutf2::lcfirst_();
1471							sub Eutf2::lc(@);
1472							sub Eutf2::lc_();
1473							sub Eutf2::ucfirst(@);
1474							sub Eutf2::ucfirst_();
1475							sub Eutf2::uc(@);
1476							sub Eutf2::uc_();
1477							sub Eutf2::fc(@);
1478							sub Eutf2::fc_();
1479							sub Eutf2::ignorecase;
1480							sub Eutf2::classic_character_class;
1481							sub Eutf2::capture;
1482							sub Eutf2::chr(;$);
1483							sub Eutf2::chr_();
1484							sub Eutf2::glob($);
1485							sub Eutf2::glob_();
1486
1487							sub UTF2::ord(;$);
1488							sub UTF2::ord_();
1489							sub UTF2::reverse(@);
1490							sub UTF2::getc(;*@);
1491							sub UTF2::length(;$);
1492							sub UTF2::substr($$;$$);
1493							sub UTF2::index($$;$);
1494							sub UTF2::rindex($$;$);
1495							sub UTF2::escape(;$);
1496
1497							#
1498							# Regexp work
1499							#
1500	302			302		26796	BEGIN { CORE::eval q{ use vars qw(
	302			302		2607
	302					775
	302					124558
1501							$UTF2::re_a
1502							$UTF2::re_t
1503							$UTF2::re_n
1504							$UTF2::re_r
1505							) } }
1506
1507							#
1508							# Character class
1509							#
1510	302			302		19673	BEGIN { CORE::eval q{ use vars qw(
	302			302		1624
	302					464
	302					6354616
1511							$dot
1512							$dot_s
1513							$eD
1514							$eS
1515							$eW
1516							$eH
1517							$eV
1518							$eR
1519							$eN
1520							$not_alnum
1521							$not_alpha
1522							$not_ascii
1523							$not_blank
1524							$not_cntrl
1525							$not_digit
1526							$not_graph
1527							$not_lower
1528							$not_lower_i
1529							$not_print
1530							$not_punct
1531							$not_space
1532							$not_upper
1533							$not_upper_i
1534							$not_word
1535							$not_xdigit
1536							$eb
1537							$eB
1538							) } }
1539
1540							${Eutf2::dot} = qr{(?>[^\x80-\xFF\x0A]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1541							${Eutf2::dot_s} = qr{(?>[^\x80-\xFF]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1542							${Eutf2::eD} = qr{(?>[^\x80-\xFF0-9]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1543
1544							# Vertical tabs are now whitespace
1545							# \s in a regex now matches a vertical tab in all circumstances.
1546							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
1547							# ${Eutf2::eS} = qr{(?>[^\x80-\xFF\x09\x0A \x0C\x0D\x20]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1548							# ${Eutf2::eS} = qr{(?>[^\x80-\xFF\x09\x0A\x0B\x0C\x0D\x20]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1549							${Eutf2::eS} = qr{(?>[^\x80-\xFF\s]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1550
1551							${Eutf2::eW} = qr{(?>[^\x80-\xFF0-9A-Z_a-z]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1552							${Eutf2::eH} = qr{(?>[^\x80-\xFF\x09\x20]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1553							${Eutf2::eV} = qr{(?>[^\x80-\xFF\x0A\x0B\x0C\x0D]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1554							${Eutf2::eR} = qr{(?>\x0D\x0A\|[\x0A\x0D])};
1555							${Eutf2::eN} = qr{(?>[^\x80-\xFF\x0A]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1556							${Eutf2::not_alnum} = qr{(?>[^\x80-\xFF\x30-\x39\x41-\x5A\x61-\x7A]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1557							${Eutf2::not_alpha} = qr{(?>[^\x80-\xFF\x41-\x5A\x61-\x7A]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1558							${Eutf2::not_ascii} = qr{(?>[^\x80-\xFF\x00-\x7F]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1559							${Eutf2::not_blank} = qr{(?>[^\x80-\xFF\x09\x20]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1560							${Eutf2::not_cntrl} = qr{(?>[^\x80-\xFF\x00-\x1F\x7F]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1561							${Eutf2::not_digit} = qr{(?>[^\x80-\xFF\x30-\x39]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1562							${Eutf2::not_graph} = qr{(?>[^\x80-\xFF\x21-\x7F]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1563							${Eutf2::not_lower} = qr{(?>[^\x80-\xFF\x61-\x7A]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1564							${Eutf2::not_lower_i} = qr{(?>[^\x80-\xFF\x41-\x5A\x61-\x7A]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])}; # Perl 5.16 compatible
1565							# ${Eutf2::not_lower_i} = qr{(?>[^\x80-\xFF]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])}; # older Perl compatible
1566							${Eutf2::not_print} = qr{(?>[^\x80-\xFF\x20-\x7F]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1567							${Eutf2::not_punct} = qr{(?>[^\x80-\xFF\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1568							${Eutf2::not_space} = qr{(?>[^\x80-\xFF\s\x0B]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1569							${Eutf2::not_upper} = qr{(?>[^\x80-\xFF\x41-\x5A]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1570							${Eutf2::not_upper_i} = qr{(?>[^\x80-\xFF\x41-\x5A\x61-\x7A]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])}; # Perl 5.16 compatible
1571							# ${Eutf2::not_upper_i} = qr{(?>[^\x80-\xFF]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])}; # older Perl compatible
1572							${Eutf2::not_word} = qr{(?>[^\x80-\xFF\x30-\x39\x41-\x5A\x5F\x61-\x7A]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1573							${Eutf2::not_xdigit} = qr{(?>[^\x80-\xFF\x30-\x39\x41-\x46\x61-\x66]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])};
1574							${Eutf2::eb} = qr{(?:\A(?=[0-9A-Z_a-z])\|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[0-9A-Z_a-z])\|(?<=[0-9A-Z_a-z])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]\|\z))};
1575							${Eutf2::eB} = qr{(?:(?<=[0-9A-Z_a-z])(?=[0-9A-Z_a-z])\|(?<=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF])(?=[\x00-\x2F\x40\x5B-\x5E\x60\x7B-\xFF]))};
1576
1577							# avoid: Name "Eutf2::foo" used only once: possible typo at here.
1578							${Eutf2::dot} = ${Eutf2::dot};
1579							${Eutf2::dot_s} = ${Eutf2::dot_s};
1580							${Eutf2::eD} = ${Eutf2::eD};
1581							${Eutf2::eS} = ${Eutf2::eS};
1582							${Eutf2::eW} = ${Eutf2::eW};
1583							${Eutf2::eH} = ${Eutf2::eH};
1584							${Eutf2::eV} = ${Eutf2::eV};
1585							${Eutf2::eR} = ${Eutf2::eR};
1586							${Eutf2::eN} = ${Eutf2::eN};
1587							${Eutf2::not_alnum} = ${Eutf2::not_alnum};
1588							${Eutf2::not_alpha} = ${Eutf2::not_alpha};
1589							${Eutf2::not_ascii} = ${Eutf2::not_ascii};
1590							${Eutf2::not_blank} = ${Eutf2::not_blank};
1591							${Eutf2::not_cntrl} = ${Eutf2::not_cntrl};
1592							${Eutf2::not_digit} = ${Eutf2::not_digit};
1593							${Eutf2::not_graph} = ${Eutf2::not_graph};
1594							${Eutf2::not_lower} = ${Eutf2::not_lower};
1595							${Eutf2::not_lower_i} = ${Eutf2::not_lower_i};
1596							${Eutf2::not_print} = ${Eutf2::not_print};
1597							${Eutf2::not_punct} = ${Eutf2::not_punct};
1598							${Eutf2::not_space} = ${Eutf2::not_space};
1599							${Eutf2::not_upper} = ${Eutf2::not_upper};
1600							${Eutf2::not_upper_i} = ${Eutf2::not_upper_i};
1601							${Eutf2::not_word} = ${Eutf2::not_word};
1602							${Eutf2::not_xdigit} = ${Eutf2::not_xdigit};
1603							${Eutf2::eb} = ${Eutf2::eb};
1604							${Eutf2::eB} = ${Eutf2::eB};
1605
1606							#
1607							# UTF-8 split
1608							#
1609							sub Eutf2::split(;$$$) {
1610
1611							# P.794 29.2.161. split
1612							# in Chapter 29: Functions
1613							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
1614
1615							# P.951 split
1616							# in Chapter 27: Functions
1617							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
1618
1619	0			0	0	0	my $pattern = $_[0];
1620	0					0	my $string = $_[1];
1621	0					0	my $limit = $_[2];
1622
1623							# if $pattern is also omitted or is the literal space, " "
1624	0	0				0	if (not defined $pattern) {
1625	0					0	$pattern = ' ';
1626							}
1627
1628							# if $string is omitted, the function splits the $_ string
1629	0	0				0	if (not defined $string) {
1630	0	0				0	if (defined $_) {
1631	0					0	$string = $_;
1632							}
1633							else {
1634	0					0	$string = '';
1635							}
1636							}
1637
1638	0					0	my @split = ();
1639
1640							# when string is empty
1641	0	0				0	if ($string eq '') {
		0
1642
1643							# resulting list value in list context
1644	0	0				0	if (wantarray) {
1645	0					0	return @split;
1646							}
1647
1648							# count of substrings in scalar context
1649							else {
1650	0	0				0	carp "Use of implicit split to \@_ is deprecated" if $^W;
1651	0					0	@_ = @split;
1652	0					0	return scalar @_;
1653							}
1654							}
1655
1656							# split's first argument is more consistently interpreted
1657							#
1658							# After some changes earlier in v5.17, split's behavior has been simplified:
1659							# if the PATTERN argument evaluates to a string containing one space, it is
1660							# treated the way that a literal string containing one space once was.
1661							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#split's_first_argument_is_more_consistently_interpreted
1662
1663							# if $pattern is also omitted or is the literal space, " ", the function splits
1664							# on whitespace, /\s+/, after skipping any leading whitespace
1665							# (and so on)
1666
1667							elsif ($pattern eq ' ') {
1668	0	0				0	if (not defined $limit) {
1669	0					0	return CORE::split(' ', $string);
1670							}
1671							else {
1672	0					0	return CORE::split(' ', $string, $limit);
1673							}
1674							}
1675
1676							# if $limit is negative, it is treated as if an arbitrarily large $limit has been specified
1677	0	0	0			0	if ((not defined $limit) or ($limit <= 0)) {
		0
1678
1679							# a pattern capable of matching either the null string or something longer than the
1680							# null string will split the value of $string into separate characters wherever it
1681							# matches the null string between characters
1682							# (and so on)
1683
1684	0	0				0	if ('' =~ / \A $pattern \z /xms) {
1685	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
1686	0					0	my $limit = scalar(() = $string =~ /($pattern)/oxmsg);
1687
1688							# P.1024 Appendix W.10 Multibyte Processing
1689							# of ISBN 1-56592-224-7 CJKV Information Processing
1690							# (and so on)
1691
1692							# the //m modifier is assumed when you split on the pattern /^/
1693							# (and so on)
1694
1695							# V
1696	0		0			0	while ((--$limit > 0) and ($string =~ s/\A((?:$q_char)+?)$pattern//m)) {
1697
1698							# if the $pattern contains parentheses, then the substring matched by each pair of parentheses
1699							# is included in the resulting list, interspersed with the fields that are ordinarily returned
1700							# (and so on)
1701
1702	0					0	local $@;
1703	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
1704	0					0	push @split, CORE::eval('$' . $digit);
1705							}
1706							}
1707							}
1708
1709							else {
1710	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
1711
1712							# V
1713	0					0	while ($string =~ s/\A((?:$q_char)*?)$pattern//m) {
1714	0					0	local $@;
1715	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
1716	0					0	push @split, CORE::eval('$' . $digit);
1717							}
1718							}
1719							}
1720							}
1721
1722							elsif ($limit > 0) {
1723	0	0				0	if ('' =~ / \A $pattern \z /xms) {
1724	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
1725	0		0			0	while ((--$limit > 0) and (CORE::length($string) > 0)) {
1726
1727							# V
1728	0	0				0	if ($string =~ s/\A((?:$q_char)+?)$pattern//m) {
1729	0					0	local $@;
1730	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
1731	0					0	push @split, CORE::eval('$' . $digit);
1732							}
1733							}
1734							}
1735							}
1736							else {
1737	0					0	my $last_subexpression_offsets = _last_subexpression_offsets($pattern);
1738	0		0			0	while ((--$limit > 0) and (CORE::length($string) > 0)) {
1739
1740							# V
1741	0	0				0	if ($string =~ s/\A((?:$q_char)*?)$pattern//m) {
1742	0					0	local $@;
1743	0					0	for (my $digit=1; $digit <= ($last_subexpression_offsets + 1); $digit++) {
1744	0					0	push @split, CORE::eval('$' . $digit);
1745							}
1746							}
1747							}
1748							}
1749							}
1750
1751	0	0				0	if (CORE::length($string) > 0) {
1752	0					0	push @split, $string;
1753							}
1754
1755							# if $_[2] (NOT "$limit") is omitted or zero, trailing null fields are stripped from the result
1756	0	0	0			0	if ((not defined $_[2]) or ($_[2] == 0)) {
1757	0		0			0	while ((scalar(@split) >= 1) and ($split[-1] eq '')) {
1758	0					0	pop @split;
1759							}
1760							}
1761
1762							# resulting list value in list context
1763	0	0				0	if (wantarray) {
1764	0					0	return @split;
1765							}
1766
1767							# count of substrings in scalar context
1768							else {
1769	0	0				0	carp "Use of implicit split to \@_ is deprecated" if $^W;
1770	0					0	@_ = @split;
1771	0					0	return scalar @_;
1772							}
1773							}
1774
1775							#
1776							# get last subexpression offsets
1777							#
1778							sub _last_subexpression_offsets {
1779	0			0		0	my $pattern = $_[0];
1780
1781							# remove comment
1782	0					0	$pattern =~ s/$\?\# .*? $//oxmsg;
1783
1784	0					0	my $modifier = '';
1785	0	0				0	if ($pattern =~ /\(\?\^? ([\-A-Za-z]+) :/oxms) {
1786	0					0	$modifier = $1;
1787	0					0	$modifier =~ s/-[A-Za-z]*//;
1788							}
1789
1790							# with /x modifier
1791	0					0	my @char = ();
1792	0	0				0	if ($modifier =~ /x/oxms) {
1793	0					0	@char = $pattern =~ /\G((?>
1794							[^\x80-\xFF\\\#\[\(]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
1795							\\ $q_char \|
1796							\# (?>[^\n]*) $ \|
1797							\[ (?>(?:[^\x80-\xFF\\\]]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF]\|\\\\\|\\\]\|$q_char)+) \] \|
1798							\(\? \|
1799							$q_char
1800							))/oxmsg;
1801							}
1802
1803							# without /x modifier
1804							else {
1805	0					0	@char = $pattern =~ /\G((?>
1806							[^\x80-\xFF\\\[\(]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
1807							\\ $q_char \|
1808							\[ (?>(?:[^\x80-\xFF\\\]]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF]\|\\\\\|\\\]\|$q_char)+) \] \|
1809							\(\? \|
1810							$q_char
1811							))/oxmsg;
1812							}
1813
1814	0					0	return scalar grep { $_ eq '(' } @char;
	0					0
1815							}
1816
1817							#
1818							# UTF-8 transliteration (tr///)
1819							#
1820							sub Eutf2::tr($$$$;$) {
1821
1822	0			0	0	0	my $bind_operator = $_[1];
1823	0					0	my $searchlist = $_[2];
1824	0					0	my $replacementlist = $_[3];
1825	0		0			0	my $modifier = $_[4] \|\| '';
1826
1827	0	0				0	if ($modifier =~ /r/oxms) {
1828	0	0				0	if ($bind_operator =~ / !~ /oxms) {
1829	0					0	croak "Using !~ with tr///r doesn't make sense";
1830							}
1831							}
1832
1833	0					0	my @char = $_[0] =~ /\G (?>$q_char) /oxmsg;
1834	0					0	my @searchlist = _charlist_tr($searchlist);
1835	0					0	my @replacementlist = _charlist_tr($replacementlist);
1836
1837	0					0	my %tr = ();
1838	0					0	for (my $i=0; $i <= $#searchlist; $i++) {
1839	0	0				0	if (not exists $tr{$searchlist[$i]}) {
1840	0	0	0			0	if (defined $replacementlist[$i] and ($replacementlist[$i] ne '')) {
		0	0
		0
1841	0					0	$tr{$searchlist[$i]} = $replacementlist[$i];
1842							}
1843							elsif ($modifier =~ /d/oxms) {
1844	0					0	$tr{$searchlist[$i]} = '';
1845							}
1846							elsif (defined $replacementlist[-1] and ($replacementlist[-1] ne '')) {
1847	0					0	$tr{$searchlist[$i]} = $replacementlist[-1];
1848							}
1849							else {
1850	0					0	$tr{$searchlist[$i]} = $searchlist[$i];
1851							}
1852							}
1853							}
1854
1855	0					0	my $tr = 0;
1856	0					0	my $replaced = '';
1857	0	0				0	if ($modifier =~ /c/oxms) {
1858	0					0	while (defined(my $char = shift @char)) {
1859	0	0				0	if (not exists $tr{$char}) {
1860	0	0				0	if (defined $replacementlist[0]) {
1861	0					0	$replaced .= $replacementlist[0];
1862							}
1863	0					0	$tr++;
1864	0	0				0	if ($modifier =~ /s/oxms) {
1865	0		0			0	while (@char and (not exists $tr{$char[0]})) {
1866	0					0	shift @char;
1867	0					0	$tr++;
1868							}
1869							}
1870							}
1871							else {
1872	0					0	$replaced .= $char;
1873							}
1874							}
1875							}
1876							else {
1877	0					0	while (defined(my $char = shift @char)) {
1878	0	0				0	if (exists $tr{$char}) {
1879	0					0	$replaced .= $tr{$char};
1880	0					0	$tr++;
1881	0	0				0	if ($modifier =~ /s/oxms) {
1882	0		0			0	while (@char and (exists $tr{$char[0]}) and ($tr{$char[0]} eq $tr{$char})) {
			0
1883	0					0	shift @char;
1884	0					0	$tr++;
1885							}
1886							}
1887							}
1888							else {
1889	0					0	$replaced .= $char;
1890							}
1891							}
1892							}
1893
1894	0	0				0	if ($modifier =~ /r/oxms) {
1895	0					0	return $replaced;
1896							}
1897							else {
1898	0					0	$_[0] = $replaced;
1899	0	0				0	if ($bind_operator =~ / !~ /oxms) {
1900	0					0	return not $tr;
1901							}
1902							else {
1903	0					0	return $tr;
1904							}
1905							}
1906							}
1907
1908							#
1909							# UTF-8 chop
1910							#
1911							sub Eutf2::chop(@) {
1912
1913	0			0	0	0	my $chop;
1914	0	0				0	if (@_ == 0) {
1915	0					0	my @char = /\G (?>$q_char) /oxmsg;
1916	0					0	$chop = pop @char;
1917	0					0	$_ = join '', @char;
1918							}
1919							else {
1920	0					0	for (@_) {
1921	0					0	my @char = /\G (?>$q_char) /oxmsg;
1922	0					0	$chop = pop @char;
1923	0					0	$_ = join '', @char;
1924							}
1925							}
1926	0					0	return $chop;
1927							}
1928
1929							#
1930							# UTF-8 index by octet
1931							#
1932							sub Eutf2::index($$;$) {
1933
1934	0			0	1	0	my($str,$substr,$position) = @_;
1935	0		0			0	$position \|\|= 0;
1936	0					0	my $pos = 0;
1937
1938	0					0	while ($pos < CORE::length($str)) {
1939	0	0				0	if (CORE::substr($str,$pos,CORE::length($substr)) eq $substr) {
1940	0	0				0	if ($pos >= $position) {
1941	0					0	return $pos;
1942							}
1943							}
1944	0	0				0	if (CORE::substr($str,$pos) =~ /\A ($q_char) /oxms) {
1945	0					0	$pos += CORE::length($1);
1946							}
1947							else {
1948	0					0	$pos += 1;
1949							}
1950							}
1951	0					0	return -1;
1952							}
1953
1954							#
1955							# UTF-8 reverse index
1956							#
1957							sub Eutf2::rindex($$;$) {
1958
1959	0			0	0	0	my($str,$substr,$position) = @_;
1960	0		0			0	$position \|\|= CORE::length($str) - 1;
1961	0					0	my $pos = 0;
1962	0					0	my $rindex = -1;
1963
1964	0		0			0	while (($pos < CORE::length($str)) and ($pos <= $position)) {
1965	0	0				0	if (CORE::substr($str,$pos,CORE::length($substr)) eq $substr) {
1966	0					0	$rindex = $pos;
1967							}
1968	0	0				0	if (CORE::substr($str,$pos) =~ /\A ($q_char) /oxms) {
1969	0					0	$pos += CORE::length($1);
1970							}
1971							else {
1972	0					0	$pos += 1;
1973							}
1974							}
1975	0					0	return $rindex;
1976							}
1977
1978							#
1979							# UTF-8 lower case first with parameter
1980							#
1981							sub Eutf2::lcfirst(@) {
1982	0	0		0	0	0	if (@_) {
1983	0					0	my $s = shift @_;
1984	0	0	0			0	if (@_ and wantarray) {
1985	0					0	return Eutf2::lc(CORE::substr($s,0,1)) . CORE::substr($s,1), @_;
1986							}
1987							else {
1988	0					0	return Eutf2::lc(CORE::substr($s,0,1)) . CORE::substr($s,1);
1989							}
1990							}
1991							else {
1992	0					0	return Eutf2::lc(CORE::substr($_,0,1)) . CORE::substr($_,1);
1993							}
1994							}
1995
1996							#
1997							# UTF-8 lower case first without parameter
1998							#
1999							sub Eutf2::lcfirst_() {
2000	0			0	0	0	return Eutf2::lc(CORE::substr($_,0,1)) . CORE::substr($_,1);
2001							}
2002
2003							#
2004							# UTF-8 lower case with parameter
2005							#
2006							sub Eutf2::lc(@) {
2007	0	0		0	0	0	if (@_) {
2008	0					0	my $s = shift @_;
2009	0	0	0			0	if (@_ and wantarray) {
2010	0	0				0	return join('', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
2011							}
2012							else {
2013	0	0				0	return join('', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
2014							}
2015							}
2016							else {
2017	0					0	return Eutf2::lc_();
2018							}
2019							}
2020
2021							#
2022							# UTF-8 lower case without parameter
2023							#
2024							sub Eutf2::lc_() {
2025	0			0	0	0	my $s = $_;
2026	0	0				0	return join '', map {defined($lc{$_}) ? $lc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
2027							}
2028
2029							#
2030							# UTF-8 upper case first with parameter
2031							#
2032							sub Eutf2::ucfirst(@) {
2033	0	0		0	0	0	if (@_) {
2034	0					0	my $s = shift @_;
2035	0	0	0			0	if (@_ and wantarray) {
2036	0					0	return Eutf2::uc(CORE::substr($s,0,1)) . CORE::substr($s,1), @_;
2037							}
2038							else {
2039	0					0	return Eutf2::uc(CORE::substr($s,0,1)) . CORE::substr($s,1);
2040							}
2041							}
2042							else {
2043	0					0	return Eutf2::uc(CORE::substr($_,0,1)) . CORE::substr($_,1);
2044							}
2045							}
2046
2047							#
2048							# UTF-8 upper case first without parameter
2049							#
2050							sub Eutf2::ucfirst_() {
2051	0			0	0	0	return Eutf2::uc(CORE::substr($_,0,1)) . CORE::substr($_,1);
2052							}
2053
2054							#
2055							# UTF-8 upper case with parameter
2056							#
2057							sub Eutf2::uc(@) {
2058	0	0		0	0	0	if (@_) {
2059	0					0	my $s = shift @_;
2060	0	0	0			0	if (@_ and wantarray) {
2061	0	0				0	return join('', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
2062							}
2063							else {
2064	0	0				0	return join('', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
2065							}
2066							}
2067							else {
2068	0					0	return Eutf2::uc_();
2069							}
2070							}
2071
2072							#
2073							# UTF-8 upper case without parameter
2074							#
2075							sub Eutf2::uc_() {
2076	0			0	0	0	my $s = $_;
2077	0	0				0	return join '', map {defined($uc{$_}) ? $uc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
2078							}
2079
2080							#
2081							# UTF-8 fold case with parameter
2082							#
2083							sub Eutf2::fc(@) {
2084	0	0		0	0	0	if (@_) {
2085	0					0	my $s = shift @_;
2086	0	0	0			0	if (@_ and wantarray) {
2087	0	0				0	return join('', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg)), @_;
	0					0
2088							}
2089							else {
2090	0	0				0	return join('', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg));
	0					0
2091							}
2092							}
2093							else {
2094	0					0	return Eutf2::fc_();
2095							}
2096							}
2097
2098							#
2099							# UTF-8 fold case without parameter
2100							#
2101							sub Eutf2::fc_() {
2102	0			0	0	0	my $s = $_;
2103	0	0				0	return join '', map {defined($fc{$_}) ? $fc{$_} : $_} ($s =~ /\G ($q_char) /oxmsg);
	0					0
2104							}
2105
2106							#
2107							# UTF-8 regexp capture
2108							#
2109							{
2110							sub Eutf2::capture {
2111	0			0	1	0	return $_[0];
2112							}
2113							}
2114
2115							#
2116							# UTF-8 regexp ignore case modifier
2117							#
2118							sub Eutf2::ignorecase {
2119
2120	0			0	0	0	my @string = @_;
2121	0					0	my $metachar = qr/[\@\\\|[\]{]/oxms;
2122
2123							# ignore case of $scalar or @array
2124	0					0	for my $string (@string) {
2125
2126							# split regexp
2127	0					0	my @char = $string =~ /\G (?>\[\^\|\\$q_char\|$q_char) /oxmsg;
2128
2129							# unescape character
2130	0					0	for (my $i=0; $i <= $#char; $i++) {
2131	0	0				0	next if not defined $char[$i];
2132
2133							# open character class [...]
2134	0	0				0	if ($char[$i] eq '[') {
		0
		0
		0
2135	0					0	my $left = $i;
2136
2137							# [] make die "unmatched [] in regexp ...\n"
2138
2139	0	0				0	if ($char[$i+1] eq ']') {
2140	0					0	$i++;
2141							}
2142
2143	0					0	while (1) {
2144	0	0				0	if (++$i > $#char) {
2145	0					0	croak "Unmatched [] in regexp";
2146							}
2147	0	0				0	if ($char[$i] eq ']') {
2148	0					0	my $right = $i;
2149	0					0	my @charlist = charlist_qr(@char[$left+1..$right-1], 'i');
2150
2151							# escape character
2152	0					0	for my $char (@charlist) {
2153	0	0				0	if (0) {
2154							}
2155
2156	0					0	elsif ($char =~ /\A [.\|)] \z/oxms) {
2157	0					0	$char = '\\' . $char;
2158							}
2159							}
2160
2161							# [...]
2162	0					0	splice @char, $left, $right-$left+1, '(?:' . join('\|', @charlist) . ')';
2163
2164	0					0	$i = $left;
2165	0					0	last;
2166							}
2167							}
2168							}
2169
2170							# open character class [^...]
2171							elsif ($char[$i] eq '[^') {
2172	0					0	my $left = $i;
2173
2174							# [^] make die "unmatched [] in regexp ...\n"
2175
2176	0	0				0	if ($char[$i+1] eq ']') {
2177	0					0	$i++;
2178							}
2179
2180	0					0	while (1) {
2181	0	0				0	if (++$i > $#char) {
2182	0					0	croak "Unmatched [] in regexp";
2183							}
2184	0	0				0	if ($char[$i] eq ']') {
2185	0					0	my $right = $i;
2186	0					0	my @charlist = charlist_not_qr(@char[$left+1..$right-1], 'i');
2187
2188							# escape character
2189	0					0	for my $char (@charlist) {
2190	0	0				0	if (0) {
2191							}
2192
2193	0					0	elsif ($char =~ /\A [.\|)] \z/oxms) {
2194	0					0	$char = '\\' . $char;
2195							}
2196							}
2197
2198							# [^...]
2199	0					0	splice @char, $left, $right-$left+1, '(?!' . join('\|', @charlist) . ")(?:$your_char)";
2200
2201	0					0	$i = $left;
2202	0					0	last;
2203							}
2204							}
2205							}
2206
2207							# rewrite classic character class or escape character
2208							elsif (my $char = classic_character_class($char[$i])) {
2209	0					0	$char[$i] = $char;
2210							}
2211
2212							# with /i modifier
2213							elsif ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) {
2214	0					0	my $uc = Eutf2::uc($char[$i]);
2215	0					0	my $fc = Eutf2::fc($char[$i]);
2216	0	0				0	if ($uc ne $fc) {
2217	0	0				0	if (CORE::length($fc) == 1) {
2218	0					0	$char[$i] = '[' . $uc . $fc . ']';
2219							}
2220							else {
2221	0					0	$char[$i] = '(?:' . $uc . '\|' . $fc . ')';
2222							}
2223							}
2224							}
2225							}
2226
2227							# characterize
2228	0					0	for (my $i=0; $i <= $#char; $i++) {
2229	0	0				0	next if not defined $char[$i];
2230
2231	0	0				0	if (0) {
2232							}
2233
2234							# quote character before ? + * {
2235	0	0				0	elsif (($i >= 1) and ($char[$i] =~ /\A [\?\+\*\{] \z/oxms)) {
2236	0	0				0	if ($char[$i-1] !~ /\A [\x00-\xFF] \z/oxms) {
2237	0					0	$char[$i-1] = '(?:' . $char[$i-1] . ')';
2238							}
2239							}
2240							}
2241
2242	0					0	$string = join '', @char;
2243							}
2244
2245							# make regexp string
2246	0					0	return @string;
2247							}
2248
2249							#
2250							# classic character class ( \D \S \W \d \s \w \C \X \H \V \h \v \R \N \b \B )
2251							#
2252							sub Eutf2::classic_character_class {
2253	0			0	0	0	my($char) = @_;
2254
2255							return {
2256	0		0			0	'\D' => '${Eutf2::eD}',
2257							'\S' => '${Eutf2::eS}',
2258							'\W' => '${Eutf2::eW}',
2259							'\d' => '[0-9]',
2260
2261							# Before Perl 5.6, \s only matched the five whitespace characters
2262							# tab, newline, form-feed, carriage return, and the space character
2263							# itself, which, taken together, is the character class [\t\n\f\r ].
2264
2265							# Vertical tabs are now whitespace
2266							# \s in a regex now matches a vertical tab in all circumstances.
2267							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
2268							# \t \n \v \f \r space
2269							# '\s' => '[\x09\x0A \x0C\x0D\x20]',
2270							# '\s' => '[\x09\x0A\x0B\x0C\x0D\x20]',
2271							'\s' => '\s',
2272
2273							'\w' => '[0-9A-Z_a-z]',
2274							'\C' => '[\x00-\xFF]',
2275							'\X' => 'X',
2276
2277							# \h \v \H \V
2278
2279							# P.114 Character Class Shortcuts
2280							# in Chapter 7: In the World of Regular Expressions
2281							# of ISBN 978-0-596-52010-6 Learning Perl, Fifth Edition
2282
2283							# P.357 13.2.3 Whitespace
2284							# in Chapter 13: perlrecharclass: Perl Regular Expression Character Classes
2285							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
2286							#
2287							# 0x00009 CHARACTER TABULATION h s
2288							# 0x0000a LINE FEED (LF) vs
2289							# 0x0000b LINE TABULATION v
2290							# 0x0000c FORM FEED (FF) vs
2291							# 0x0000d CARRIAGE RETURN (CR) vs
2292							# 0x00020 SPACE h s
2293
2294							# P.196 Table 5-9. Alphanumeric regex metasymbols
2295							# in Chapter 5. Pattern Matching
2296							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2297
2298							# (and so on)
2299
2300							'\H' => '${Eutf2::eH}',
2301							'\V' => '${Eutf2::eV}',
2302							'\h' => '[\x09\x20]',
2303							'\v' => '[\x0A\x0B\x0C\x0D]',
2304							'\R' => '${Eutf2::eR}',
2305
2306							# \N
2307							#
2308							# http://perldoc.perl.org/perlre.html
2309							# Character Classes and other Special Escapes
2310							# Any character but \n (experimental). Not affected by /s modifier
2311
2312							'\N' => '${Eutf2::eN}',
2313
2314							# \b \B
2315
2316							# P.180 Boundaries: The \b and \B Assertions
2317							# in Chapter 5: Pattern Matching
2318							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2319
2320							# P.219 Boundaries: The \b and \B Assertions
2321							# in Chapter 5: Pattern Matching
2322							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2323
2324							# \b really means (?:(?<=\w)(?!\w)\|(?
2325							# or (?:(?<=\A\|\W)(?=\w)\|(?<=\w)(?=\W\|\z))
2326							'\b' => '${Eutf2::eb}',
2327
2328							# \B really means (?:(?<=\w)(?=\w)\|(?
2329							# or (?:(?<=\w)(?=\w)\|(?<=\W)(?=\W))
2330							'\B' => '${Eutf2::eB}',
2331
2332							}->{$char} \|\| '';
2333							}
2334
2335							#
2336							# prepare UTF-8 characters per length
2337							#
2338
2339							# 1 octet characters
2340							my @chars1 = ();
2341							sub chars1 {
2342	0	0		0	0	0	if (@chars1) {
2343	0					0	return @chars1;
2344							}
2345	0	0				0	if (exists $range_tr{1}) {
2346	0					0	my @ranges = @{ $range_tr{1} };
	0					0
2347	0					0	while (my @range = splice(@ranges,0,1)) {
2348	0					0	for my $oct0 (@{$range[0]}) {
	0					0
2349	0					0	push @chars1, pack 'C', $oct0;
2350							}
2351							}
2352							}
2353	0					0	return @chars1;
2354							}
2355
2356							# 2 octets characters
2357							my @chars2 = ();
2358							sub chars2 {
2359	0	0		0	0	0	if (@chars2) {
2360	0					0	return @chars2;
2361							}
2362	0	0				0	if (exists $range_tr{2}) {
2363	0					0	my @ranges = @{ $range_tr{2} };
	0					0
2364	0					0	while (my @range = splice(@ranges,0,2)) {
2365	0					0	for my $oct0 (@{$range[0]}) {
	0					0
2366	0					0	for my $oct1 (@{$range[1]}) {
	0					0
2367	0					0	push @chars2, pack 'CC', $oct0,$oct1;
2368							}
2369							}
2370							}
2371							}
2372	0					0	return @chars2;
2373							}
2374
2375							# 3 octets characters
2376							my @chars3 = ();
2377							sub chars3 {
2378	0	0		0	0	0	if (@chars3) {
2379	0					0	return @chars3;
2380							}
2381	0	0				0	if (exists $range_tr{3}) {
2382	0					0	my @ranges = @{ $range_tr{3} };
	0					0
2383	0					0	while (my @range = splice(@ranges,0,3)) {
2384	0					0	for my $oct0 (@{$range[0]}) {
	0					0
2385	0					0	for my $oct1 (@{$range[1]}) {
	0					0
2386	0					0	for my $oct2 (@{$range[2]}) {
	0					0
2387	0					0	push @chars3, pack 'CCC', $oct0,$oct1,$oct2;
2388							}
2389							}
2390							}
2391							}
2392							}
2393	0					0	return @chars3;
2394							}
2395
2396							# 4 octets characters
2397							my @chars4 = ();
2398							sub chars4 {
2399	0	0		0	0	0	if (@chars4) {
2400	0					0	return @chars4;
2401							}
2402	0	0				0	if (exists $range_tr{4}) {
2403	0					0	my @ranges = @{ $range_tr{4} };
	0					0
2404	0					0	while (my @range = splice(@ranges,0,4)) {
2405	0					0	for my $oct0 (@{$range[0]}) {
	0					0
2406	0					0	for my $oct1 (@{$range[1]}) {
	0					0
2407	0					0	for my $oct2 (@{$range[2]}) {
	0					0
2408	0					0	for my $oct3 (@{$range[3]}) {
	0					0
2409	0					0	push @chars4, pack 'CCCC', $oct0,$oct1,$oct2,$oct3;
2410							}
2411							}
2412							}
2413							}
2414							}
2415							}
2416	0					0	return @chars4;
2417							}
2418
2419							#
2420							# UTF-8 open character list for tr
2421							#
2422							sub _charlist_tr {
2423
2424	0			0		0	local $_ = shift @_;
2425
2426							# unescape character
2427	0					0	my @char = ();
2428	0					0	while (not /\G \z/oxmsgc) {
2429	0	0				0	if (/\G (\\0?55\|\\x2[Dd]\|\\-) /oxmsgc) {
		0
		0
		0
		0
		0
		0
2430	0					0	push @char, '\-';
2431							}
2432							elsif (/\G \\ ([0-7]{2,3}) /oxmsgc) {
2433	0					0	push @char, CORE::chr(oct $1);
2434							}
2435							elsif (/\G \\x ([0-9A-Fa-f]{1,2}) /oxmsgc) {
2436	0					0	push @char, CORE::chr(hex $1);
2437							}
2438							elsif (/\G \\c ([\x40-\x5F]) /oxmsgc) {
2439	0					0	push @char, CORE::chr(CORE::ord($1) & 0x1F);
2440							}
2441							elsif (/\G (\\ [0nrtfbae]) /oxmsgc) {
2442	0					0	push @char, {
2443							'\0' => "\0",
2444							'\n' => "\n",
2445							'\r' => "\r",
2446							'\t' => "\t",
2447							'\f' => "\f",
2448							'\b' => "\x08", # \b means backspace in character class
2449							'\a' => "\a",
2450							'\e' => "\e",
2451							}->{$1};
2452							}
2453							elsif (/\G \\ ($q_char) /oxmsgc) {
2454	0					0	push @char, $1;
2455							}
2456							elsif (/\G ($q_char) /oxmsgc) {
2457	0					0	push @char, $1;
2458							}
2459							}
2460
2461							# join separated multiple-octet
2462	0					0	@char = join('',@char) =~ /\G (?>\\-\|$q_char) /oxmsg;
2463
2464							# unescape '-'
2465	0					0	my @i = ();
2466	0					0	for my $i (0 .. $#char) {
2467	0	0				0	if ($char[$i] eq '\-') {
		0
2468	0					0	$char[$i] = '-';
2469							}
2470							elsif ($char[$i] eq '-') {
2471	0	0	0			0	if ((0 < $i) and ($i < $#char)) {
2472	0					0	push @i, $i;
2473							}
2474							}
2475							}
2476
2477							# open character list (reverse for splice)
2478	0					0	for my $i (CORE::reverse @i) {
2479	0					0	my @range = ();
2480
2481							# range error
2482	0	0	0			0	if ((CORE::length($char[$i-1]) > CORE::length($char[$i+1])) or ($char[$i-1] gt $char[$i+1])) {
2483	0					0	croak "Invalid tr/// range \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
2484							}
2485
2486							# range of multiple-octet code
2487	0	0				0	if (CORE::length($char[$i-1]) == 1) {
		0
		0
		0
2488	0	0				0	if (CORE::length($char[$i+1]) == 1) {
		0
		0
		0
2489	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars1();
	0					0
2490							}
2491							elsif (CORE::length($char[$i+1]) == 2) {
2492	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
2493	0					0	push @range, grep {$_ le $char[$i+1]} chars2();
	0					0
2494							}
2495							elsif (CORE::length($char[$i+1]) == 3) {
2496	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
2497	0					0	push @range, chars2();
2498	0					0	push @range, grep {$_ le $char[$i+1]} chars3();
	0					0
2499							}
2500							elsif (CORE::length($char[$i+1]) == 4) {
2501	0					0	push @range, grep {$char[$i-1] le $_} chars1();
	0					0
2502	0					0	push @range, chars2();
2503	0					0	push @range, chars3();
2504	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
2505							}
2506							else {
2507	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
2508							}
2509							}
2510							elsif (CORE::length($char[$i-1]) == 2) {
2511	0	0				0	if (CORE::length($char[$i+1]) == 2) {
		0
		0
2512	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars2();
	0					0
2513							}
2514							elsif (CORE::length($char[$i+1]) == 3) {
2515	0					0	push @range, grep {$char[$i-1] le $_} chars2();
	0					0
2516	0					0	push @range, grep {$_ le $char[$i+1]} chars3();
	0					0
2517							}
2518							elsif (CORE::length($char[$i+1]) == 4) {
2519	0					0	push @range, grep {$char[$i-1] le $_} chars2();
	0					0
2520	0					0	push @range, chars3();
2521	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
2522							}
2523							else {
2524	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
2525							}
2526							}
2527							elsif (CORE::length($char[$i-1]) == 3) {
2528	0	0				0	if (CORE::length($char[$i+1]) == 3) {
		0
2529	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars3();
	0					0
2530							}
2531							elsif (CORE::length($char[$i+1]) == 4) {
2532	0					0	push @range, grep {$char[$i-1] le $_} chars3();
	0					0
2533	0					0	push @range, grep {$_ le $char[$i+1]} chars4();
	0					0
2534							}
2535							else {
2536	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
2537							}
2538							}
2539							elsif (CORE::length($char[$i-1]) == 4) {
2540	0	0				0	if (CORE::length($char[$i+1]) == 4) {
2541	0	0				0	push @range, grep {($char[$i-1] le $_) and ($_ le $char[$i+1])} chars4();
	0					0
2542							}
2543							else {
2544	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
2545							}
2546							}
2547							else {
2548	0					0	croak "Invalid tr/// range (over 4octets) \"\\x" . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]) . '"';
2549							}
2550
2551	0					0	splice @char, $i-1, 3, @range;
2552							}
2553
2554	0					0	return @char;
2555							}
2556
2557							#
2558							# UTF-8 open character class
2559							#
2560							sub _cc {
2561	0	0		0		0	if (scalar(@_) == 0) {
		0
		0
2562	0					0	die __FILE__, ": subroutine cc got no parameter.\n";
2563							}
2564							elsif (scalar(@_) == 1) {
2565	0					0	return sprintf('\x%02X',$_[0]);
2566							}
2567							elsif (scalar(@_) == 2) {
2568	0	0				0	if ($_[0] > $_[1]) {
		0
		0
2569	0					0	die __FILE__, ": subroutine cc got \$_[0] > \$_[1] parameters).\n";
2570							}
2571							elsif ($_[0] == $_[1]) {
2572	0					0	return sprintf('\x%02X',$_[0]);
2573							}
2574							elsif (($_[0]+1) == $_[1]) {
2575	0					0	return sprintf('[\\x%02X\\x%02X]',$_[0],$_[1]);
2576							}
2577							else {
2578	0					0	return sprintf('[\\x%02X-\\x%02X]',$_[0],$_[1]);
2579							}
2580							}
2581							else {
2582	0					0	die __FILE__, ": subroutine cc got 3 or more parameters (@{[scalar(@_)]} parameters).\n";
	0					0
2583							}
2584							}
2585
2586							#
2587							# UTF-8 octet range
2588							#
2589							sub _octets {
2590	0			0		0	my $length = shift @_;
2591
2592	0	0				0	if ($length == 1) {
		0
		0
		0
2593	0					0	my($a1) = unpack 'C', $_[0];
2594	0					0	my($z1) = unpack 'C', $_[1];
2595
2596	0	0				0	if ($a1 > $z1) {
2597	0					0	croak 'Invalid [] range in regexp (CORE::ord(A) > CORE::ord(B)) ' . '\x' . unpack('H',$a1) . '-\x' . unpack('H',$z1);
2598							}
2599
2600	0	0				0	if ($a1 == $z1) {
		0
2601	0					0	return sprintf('\x%02X',$a1);
2602							}
2603							elsif (($a1+1) == $z1) {
2604	0					0	return sprintf('\x%02X\x%02X',$a1,$z1);
2605							}
2606							else {
2607	0					0	return sprintf('\x%02X-\x%02X',$a1,$z1);
2608							}
2609							}
2610							elsif ($length == 2) {
2611	0					0	my($a1,$a2) = unpack 'CC', $_[0];
2612	0					0	my($z1,$z2) = unpack 'CC', $_[1];
2613	0					0	my($A1,$A2) = unpack 'CC', $_[2];
2614	0					0	my($Z1,$Z2) = unpack 'CC', $_[3];
2615
2616	0	0				0	if ($a1 == $z1) {
		0
2617							return (
2618							# 11111111 222222222222
2619							# A A Z
2620	0					0	_cc($a1) . _cc($a2,$z2), # a2-z2
2621							);
2622							}
2623							elsif (($a1+1) == $z1) {
2624							return (
2625							# 11111111111 222222222222
2626							# A Z A Z
2627	0					0	_cc($a1) . _cc($a2,$Z2), # a2-
2628							_cc( $z1) . _cc($A2,$z2), # -z2
2629							);
2630							}
2631							else {
2632							return (
2633							# 1111111111111111 222222222222
2634							# A Z A Z
2635	0					0	_cc($a1) . _cc($a2,$Z2), # a2-
2636							_cc($a1+1,$z1-1) . _cc($A2,$Z2), # -
2637							_cc( $z1) . _cc($A2,$z2), # -z2
2638							);
2639							}
2640							}
2641							elsif ($length == 3) {
2642	0					0	my($a1,$a2,$a3) = unpack 'CCC', $_[0];
2643	0					0	my($z1,$z2,$z3) = unpack 'CCC', $_[1];
2644	0					0	my($A1,$A2,$A3) = unpack 'CCC', $_[2];
2645	0					0	my($Z1,$Z2,$Z3) = unpack 'CCC', $_[3];
2646
2647	0	0				0	if ($a1 == $z1) {
		0
2648	0	0				0	if ($a2 == $z2) {
		0
2649							return (
2650							# 11111111 22222222 333333333333
2651							# A A A Z
2652	0					0	_cc($a1) . _cc($a2) . _cc($a3,$z3), # a3-z3
2653							);
2654							}
2655							elsif (($a2+1) == $z2) {
2656							return (
2657							# 11111111 22222222222 333333333333
2658							# A A Z A Z
2659	0					0	_cc($a1) . _cc($a2) . _cc($a3,$Z3), # a3-
2660							_cc($a1) . _cc( $z2) . _cc($A3,$z3), # -z3
2661							);
2662							}
2663							else {
2664							return (
2665							# 11111111 2222222222222222 333333333333
2666							# A A Z A Z
2667	0					0	_cc($a1) . _cc($a2) . _cc($a3,$Z3), # a3-
2668							_cc($a1) . _cc($a2+1,$z2-1) . _cc($A3,$Z3), # -
2669							_cc($a1) . _cc( $z2) . _cc($A3,$z3), # -z3
2670							);
2671							}
2672							}
2673							elsif (($a1+1) == $z1) {
2674							return (
2675							# 11111111111 22222222222222 333333333333
2676							# A Z A Z A Z
2677	0					0	_cc($a1) . _cc($a2) . _cc($a3,$Z3), # a3-
2678							_cc($a1) . _cc($a2+1,$Z2) . _cc($A3,$Z3), # -
2679							_cc( $z1) . _cc($A2,$z2-1) . _cc($A3,$Z3), # -
2680							_cc( $z1) . _cc( $z2) . _cc($A3,$z3), # -z3
2681							);
2682							}
2683							else {
2684							return (
2685							# 1111111111111111 22222222222222 333333333333
2686							# A Z A Z A Z
2687	0					0	_cc($a1) . _cc($a2) . _cc($a3,$Z3), # a3-
2688							_cc($a1) . _cc($a2+1,$Z2) . _cc($A3,$Z3), # -
2689							_cc($a1+1,$z1-1) . _cc($A2,$Z2) . _cc($A3,$Z3), # -
2690							_cc( $z1) . _cc($A2,$z2-1) . _cc($A3,$Z3), # -
2691							_cc( $z1) . _cc( $z2) . _cc($A3,$z3), # -z3
2692							);
2693							}
2694							}
2695							elsif ($length == 4) {
2696	0					0	my($a1,$a2,$a3,$a4) = unpack 'CCCC', $_[0];
2697	0					0	my($z1,$z2,$z3,$z4) = unpack 'CCCC', $_[1];
2698	0					0	my($A1,$A2,$A3,$A4) = unpack 'CCCC', $_[0];
2699	0					0	my($Z1,$Z2,$Z3,$Z4) = unpack 'CCCC', $_[1];
2700
2701	0	0				0	if ($a1 == $z1) {
		0
2702	0	0				0	if ($a2 == $z2) {
		0
2703	0	0				0	if ($a3 == $z3) {
		0
2704							return (
2705							# 11111111 22222222 33333333 444444444444
2706							# A A A A Z
2707	0					0	_cc($a1) . _cc($a2) . _cc($a3) . _cc($a4,$z4), # a4-z4
2708							);
2709							}
2710							elsif (($a3+1) == $z3) {
2711							return (
2712							# 11111111 22222222 33333333333 444444444444
2713							# A A A Z A Z
2714	0					0	_cc($a1) . _cc($a2) . _cc($a3) . _cc($a4,$Z4), # a4-
2715							_cc($a1) . _cc($a2) . _cc( $z3) . _cc($A4,$z4), # -z4
2716							);
2717							}
2718							else {
2719							return (
2720							# 11111111 22222222 3333333333333333 444444444444
2721							# A A A Z A Z
2722	0					0	_cc($a1) . _cc($a2) . _cc($a3) . _cc($a4,$Z4), # a4-
2723							_cc($a1) . _cc($a2) . _cc($a3+1,$z3-1) . _cc($A4,$Z4), # -
2724							_cc($a1) . _cc($a2) . _cc( $z3) . _cc($A4,$z4), # -z4
2725							);
2726							}
2727							}
2728							elsif (($a2+1) == $z2) {
2729							return (
2730							# 11111111 22222222222 33333333333333 444444444444
2731							# A A Z A Z A Z
2732	0					0	_cc($a1) . _cc($a2) . _cc($a3) . _cc($a4,$Z4), # a4-
2733							_cc($a1) . _cc($a2) . _cc($a3+1,$Z3) . _cc($A4,$Z4), # -
2734							_cc($a1) . _cc( $z2) . _cc($A3,$z3-1) . _cc($A4,$Z4), # -
2735							_cc($a1) . _cc( $z2) . _cc( $z3) . _cc($A4,$z4), # -z4
2736							);
2737							}
2738							else {
2739							return (
2740							# 11111111 2222222222222222 33333333333333 444444444444
2741							# A A Z A Z A Z
2742	0					0	_cc($a1) . _cc($a2) . _cc($a3) . _cc($a4,$Z4), # a4-
2743							_cc($a1) . _cc($a2) . _cc($a3+1,$Z3) . _cc($A4,$Z4), # -
2744							_cc($a1) . _cc($a2+1,$z2-1) . _cc($A3,$Z3) . _cc($A4,$Z4), # -
2745							_cc($a1) . _cc( $z2) . _cc($A3,$z3-1) . _cc($A4,$Z4), # -
2746							_cc($a1) . _cc( $z2) . _cc( $z3) . _cc($A4,$z4), # -z4
2747							);
2748							}
2749							}
2750							elsif (($a1+1) == $z1) {
2751							return (
2752							# 11111111111 22222222222222 33333333333333 444444444444
2753							# A Z A Z A Z A Z
2754	0					0	_cc($a1) . _cc($a2) . _cc($a3) . _cc($a4,$Z4), # a4-
2755							_cc($a1) . _cc($a2) . _cc($a3+1,$Z3) . _cc($A4,$Z4), # -
2756							_cc($a1) . _cc($a2+1,$Z2) . _cc($A3,$Z3) . _cc($A4,$Z4), # -
2757							_cc( $z1) . _cc($A2,$z2-1) . _cc($A3,$Z3) . _cc($A4,$Z4), # -
2758							_cc( $z1) . _cc( $z2) . _cc($A3,$z3-1) . _cc($A4,$Z4), # -
2759							_cc( $z1) . _cc( $z2) . _cc( $z3) . _cc($A4,$z4), # -z4
2760							);
2761							}
2762							else {
2763							return (
2764							# 1111111111111111 22222222222222 33333333333333 444444444444
2765							# A Z A Z A Z A Z
2766	0					0	_cc($a1) . _cc($a2) . _cc($a3) . _cc($a4,$Z4), # a4-
2767							_cc($a1) . _cc($a2) . _cc($a3+1,$Z3) . _cc($A4,$Z4), # -
2768							_cc($a1) . _cc($a2+1,$Z2) . _cc($A3,$Z3) . _cc($A4,$Z4), # -
2769							_cc($a1+1,$z1-1) . _cc($A2,$Z2) . _cc($A3,$Z3) . _cc($A4,$Z4), # -
2770							_cc( $z1) . _cc($A2,$z2-1) . _cc($A3,$Z3) . _cc($A4,$Z4), # -
2771							_cc( $z1) . _cc( $z2) . _cc($A3,$z3-1) . _cc($A4,$Z4), # -
2772							_cc( $z1) . _cc( $z2) . _cc( $z3) . _cc($A4,$z4), # -z4
2773							);
2774							}
2775							}
2776							else {
2777	0					0	die __FILE__, ": subroutine _octets got invalid length ($length).\n";
2778							}
2779							}
2780
2781							#
2782							# UTF-8 range regexp
2783							#
2784							sub _range_regexp {
2785	0			0		0	my($length,$first,$last) = @_;
2786
2787	0					0	my @range_regexp = ();
2788	0	0				0	if (not exists $range_tr{$length}) {
2789	0					0	return @range_regexp;
2790							}
2791
2792	0					0	my @ranges = @{ $range_tr{$length} };
	0					0
2793	0					0	while (my @range = splice(@ranges,0,$length)) {
2794	0					0	my $min = '';
2795	0					0	my $max = '';
2796	0					0	for (my $i=0; $i < $length; $i++) {
2797	0					0	$min .= pack 'C', $range[$i][0];
2798	0					0	$max .= pack 'C', $range[$i][-1];
2799							}
2800
2801							# min___max
2802							# FIRST_____________LAST
2803							# (nothing)
2804
2805	0	0	0			0	if ($max lt $first) {
		0	0
		0	0
		0	0
		0	0
		0	0
		0	0
2806							}
2807
2808							# **********
2809							# min_________max
2810							# FIRST_____________LAST
2811							# **********
2812
2813							elsif (($min le $first) and ($first le $max) and ($max le $last)) {
2814	0					0	push @range_regexp, _octets($length,$first,$max,$min,$max);
2815							}
2816
2817							# **********************
2818							# min________________max
2819							# FIRST_____________LAST
2820							# **********************
2821
2822							elsif (($min eq $first) and ($max eq $last)) {
2823	0					0	push @range_regexp, _octets($length,$first,$last,$min,$max);
2824							}
2825
2826							# *********
2827							# min___max
2828							# FIRST_____________LAST
2829							# *********
2830
2831							elsif (($first le $min) and ($max le $last)) {
2832	0					0	push @range_regexp, _octets($length,$min,$max,$min,$max);
2833							}
2834
2835							# **********************
2836							# min__________________________max
2837							# FIRST_____________LAST
2838							# **********************
2839
2840							elsif (($min le $first) and ($last le $max)) {
2841	0					0	push @range_regexp, _octets($length,$first,$last,$min,$max);
2842							}
2843
2844							# *********
2845							# min________max
2846							# FIRST_____________LAST
2847							# *********
2848
2849							elsif (($first le $min) and ($min le $last) and ($last le $max)) {
2850	0					0	push @range_regexp, _octets($length,$min,$last,$min,$max);
2851							}
2852
2853							# min___max
2854							# FIRST_____________LAST
2855							# (nothing)
2856
2857							elsif ($last lt $min) {
2858							}
2859
2860							else {
2861	0					0	die __FILE__, ": subroutine _range_regexp panic.\n";
2862							}
2863							}
2864
2865	0					0	return @range_regexp;
2866							}
2867
2868							#
2869							# UTF-8 open character list for qr and not qr
2870							#
2871							sub _charlist {
2872
2873	0			0		0	my $modifier = pop @_;
2874	0					0	my @char = @_;
2875
2876	0	0				0	my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
2877
2878							# unescape character
2879	0					0	for (my $i=0; $i <= $#char; $i++) {
2880
2881							# escape - to ...
2882	0	0	0			0	if ($char[$i] eq '-') {
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
2883	0	0	0			0	if ((0 < $i) and ($i < $#char)) {
2884	0					0	$char[$i] = '...';
2885							}
2886							}
2887
2888							# octal escape sequence
2889							elsif ($char[$i] =~ /\A \\o \{ ([0-7]+) \} \z/oxms) {
2890	0					0	$char[$i] = octchr($1);
2891							}
2892
2893							# hexadecimal escape sequence
2894							elsif ($char[$i] =~ /\A \\x \{ ([0-9A-Fa-f]+) \} \z/oxms) {
2895	0					0	$char[$i] = hexchr($1);
2896							}
2897
2898							# \b{...} --> b\{...}
2899							# \B{...} --> B\{...}
2900							# \N{CHARNAME} --> N\{CHARNAME}
2901							# \p{PROPERTY} --> p\{PROPERTY}
2902							# \P{PROPERTY} --> P\{PROPERTY}
2903							elsif ($char[$i] =~ /\A \\ ([bBNpP]) ( \{ ([^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} ) \z/oxms) {
2904	0					0	$char[$i] = $1 . '\\' . $2;
2905							}
2906
2907							# \p, \P, \X --> p, P, X
2908							elsif ($char[$i] =~ /\A \\ ( [pPX] ) \z/oxms) {
2909	0					0	$char[$i] = $1;
2910							}
2911
2912							elsif ($char[$i] =~ /\A \\ ([0-7]{2,3}) \z/oxms) {
2913	0					0	$char[$i] = CORE::chr oct $1;
2914							}
2915							elsif ($char[$i] =~ /\A \\x ([0-9A-Fa-f]{1,2}) \z/oxms) {
2916	0					0	$char[$i] = CORE::chr hex $1;
2917							}
2918							elsif ($char[$i] =~ /\A \\c ([\x40-\x5F]) \z/oxms) {
2919	0					0	$char[$i] = CORE::chr(CORE::ord($1) & 0x1F);
2920							}
2921							elsif ($char[$i] =~ /\A (\\ [0nrtfbaedswDSWHVhvR]) \z/oxms) {
2922	0					0	$char[$i] = {
2923							'\0' => "\0",
2924							'\n' => "\n",
2925							'\r' => "\r",
2926							'\t' => "\t",
2927							'\f' => "\f",
2928							'\b' => "\x08", # \b means backspace in character class
2929							'\a' => "\a",
2930							'\e' => "\e",
2931							'\d' => '[0-9]',
2932
2933							# Vertical tabs are now whitespace
2934							# \s in a regex now matches a vertical tab in all circumstances.
2935							# http://search.cpan.org/dist/perl-5.18.0/pod/perldelta.pod#Vertical_tabs_are_now_whitespace
2936							# \t \n \v \f \r space
2937							# '\s' => '[\x09\x0A \x0C\x0D\x20]',
2938							# '\s' => '[\x09\x0A\x0B\x0C\x0D\x20]',
2939							'\s' => '\s',
2940
2941							'\w' => '[0-9A-Z_a-z]',
2942							'\D' => '${Eutf2::eD}',
2943							'\S' => '${Eutf2::eS}',
2944							'\W' => '${Eutf2::eW}',
2945
2946							'\H' => '${Eutf2::eH}',
2947							'\V' => '${Eutf2::eV}',
2948							'\h' => '[\x09\x20]',
2949							'\v' => '[\x0A\x0B\x0C\x0D]',
2950							'\R' => '${Eutf2::eR}',
2951
2952							}->{$1};
2953							}
2954
2955							# POSIX-style character classes
2956							elsif ($ignorecase and ($char[$i] =~ /\A ( \[\: \^? (?:lower\|upper) :\] ) \z/oxms)) {
2957	0					0	$char[$i] = {
2958
2959							'[:lower:]' => '[\x41-\x5A\x61-\x7A]',
2960							'[:upper:]' => '[\x41-\x5A\x61-\x7A]',
2961							'[:^lower:]' => '${Eutf2::not_lower_i}',
2962							'[:^upper:]' => '${Eutf2::not_upper_i}',
2963
2964							}->{$1};
2965							}
2966							elsif ($char[$i] =~ /\A ( \[\: \^? (?:alnum\|alpha\|ascii\|blank\|cntrl\|digit\|graph\|lower\|print\|punct\|space\|upper\|word\|xdigit) :\] ) \z/oxms) {
2967	0					0	$char[$i] = {
2968
2969							'[:alnum:]' => '[\x30-\x39\x41-\x5A\x61-\x7A]',
2970							'[:alpha:]' => '[\x41-\x5A\x61-\x7A]',
2971							'[:ascii:]' => '[\x00-\x7F]',
2972							'[:blank:]' => '[\x09\x20]',
2973							'[:cntrl:]' => '[\x00-\x1F\x7F]',
2974							'[:digit:]' => '[\x30-\x39]',
2975							'[:graph:]' => '[\x21-\x7F]',
2976							'[:lower:]' => '[\x61-\x7A]',
2977							'[:print:]' => '[\x20-\x7F]',
2978							'[:punct:]' => '[\x21-\x2F\x3A-\x3F\x40\x5B-\x5F\x60\x7B-\x7E]',
2979
2980							# P.174 POSIX-Style Character Classes
2981							# in Chapter 5: Pattern Matching
2982							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
2983
2984							# P.311 11.2.4 Character Classes and other Special Escapes
2985							# in Chapter 11: perlre: Perl regular expressions
2986							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
2987
2988							# P.210 POSIX-Style Character Classes
2989							# in Chapter 5: Pattern Matching
2990							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
2991
2992							'[:space:]' => '[\s\x0B]', # "\s" plus vertical tab ("\cK")
2993
2994							'[:upper:]' => '[\x41-\x5A]',
2995							'[:word:]' => '[\x30-\x39\x41-\x5A\x5F\x61-\x7A]',
2996							'[:xdigit:]' => '[\x30-\x39\x41-\x46\x61-\x66]',
2997							'[:^alnum:]' => '${Eutf2::not_alnum}',
2998							'[:^alpha:]' => '${Eutf2::not_alpha}',
2999							'[:^ascii:]' => '${Eutf2::not_ascii}',
3000							'[:^blank:]' => '${Eutf2::not_blank}',
3001							'[:^cntrl:]' => '${Eutf2::not_cntrl}',
3002							'[:^digit:]' => '${Eutf2::not_digit}',
3003							'[:^graph:]' => '${Eutf2::not_graph}',
3004							'[:^lower:]' => '${Eutf2::not_lower}',
3005							'[:^print:]' => '${Eutf2::not_print}',
3006							'[:^punct:]' => '${Eutf2::not_punct}',
3007							'[:^space:]' => '${Eutf2::not_space}',
3008							'[:^upper:]' => '${Eutf2::not_upper}',
3009							'[:^word:]' => '${Eutf2::not_word}',
3010							'[:^xdigit:]' => '${Eutf2::not_xdigit}',
3011
3012							}->{$1};
3013							}
3014							elsif ($char[$i] =~ /\A \\ ($q_char) \z/oxms) {
3015	0					0	$char[$i] = $1;
3016							}
3017							}
3018
3019							# open character list
3020	0					0	my @singleoctet = ();
3021	0					0	my @multipleoctet = ();
3022	0					0	for (my $i=0; $i <= $#char; ) {
3023
3024							# escaped -
3025	0	0	0			0	if (defined($char[$i+1]) and ($char[$i+1] eq '...')) {
		0
		0
		0
		0
		0
3026	0					0	$i += 1;
3027	0					0	next;
3028							}
3029
3030							# make range regexp
3031							elsif ($char[$i] eq '...') {
3032
3033							# range error
3034	0	0				0	if (CORE::length($char[$i-1]) > CORE::length($char[$i+1])) {
		0
3035	0					0	croak 'Invalid [] range in regexp (length(A) > length(B)) ' . '\x' . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]);
3036							}
3037							elsif (CORE::length($char[$i-1]) == CORE::length($char[$i+1])) {
3038	0	0				0	if ($char[$i-1] gt $char[$i+1]) {
3039	0					0	croak 'Invalid [] range in regexp (CORE::ord(A) > CORE::ord(B)) ' . '\x' . unpack('H',$char[$i-1]) . '-\x' . unpack('H',$char[$i+1]);
3040							}
3041							}
3042
3043							# make range regexp per length
3044	0					0	for my $length (CORE::length($char[$i-1]) .. CORE::length($char[$i+1])) {
3045	0					0	my @regexp = ();
3046
3047							# is first and last
3048	0	0	0			0	if (($length == CORE::length($char[$i-1])) and ($length == CORE::length($char[$i+1]))) {
		0	0
		0
		0
3049	0					0	push @regexp, _range_regexp($length, $char[$i-1], $char[$i+1]);
3050							}
3051
3052							# is first
3053							elsif ($length == CORE::length($char[$i-1])) {
3054	0					0	push @regexp, _range_regexp($length, $char[$i-1], "\xFF" x $length);
3055							}
3056
3057							# is inside in first and last
3058							elsif ((CORE::length($char[$i-1]) < $length) and ($length < CORE::length($char[$i+1]))) {
3059	0					0	push @regexp, _range_regexp($length, "\x00" x $length, "\xFF" x $length);
3060							}
3061
3062							# is last
3063							elsif ($length == CORE::length($char[$i+1])) {
3064	0					0	push @regexp, _range_regexp($length, "\x00" x $length, $char[$i+1]);
3065							}
3066
3067							else {
3068	0					0	die __FILE__, ": subroutine make_regexp panic.\n";
3069							}
3070
3071	0	0				0	if ($length == 1) {
3072	0					0	push @singleoctet, @regexp;
3073							}
3074							else {
3075	0					0	push @multipleoctet, @regexp;
3076							}
3077							}
3078
3079	0					0	$i += 2;
3080							}
3081
3082							# with /i modifier
3083							elsif ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) {
3084	0	0				0	if ($modifier =~ /i/oxms) {
3085	0					0	my $uc = Eutf2::uc($char[$i]);
3086	0					0	my $fc = Eutf2::fc($char[$i]);
3087	0	0				0	if ($uc ne $fc) {
3088	0	0				0	if (CORE::length($fc) == 1) {
3089	0					0	push @singleoctet, $uc, $fc;
3090							}
3091							else {
3092	0					0	push @singleoctet, $uc;
3093	0					0	push @multipleoctet, $fc;
3094							}
3095							}
3096							else {
3097	0					0	push @singleoctet, $char[$i];
3098							}
3099							}
3100							else {
3101	0					0	push @singleoctet, $char[$i];
3102							}
3103	0					0	$i += 1;
3104							}
3105
3106							# single character of single octet code
3107							elsif ($char[$i] =~ /\A (?: \\h ) \z/oxms) {
3108	0					0	push @singleoctet, "\t", "\x20";
3109	0					0	$i += 1;
3110							}
3111							elsif ($char[$i] =~ /\A (?: \\v ) \z/oxms) {
3112	0					0	push @singleoctet, "\x0A", "\x0B", "\x0C", "\x0D";
3113	0					0	$i += 1;
3114							}
3115							elsif ($char[$i] =~ /\A (?: \\d \| \\s \| \\w ) \z/oxms) {
3116	0					0	push @singleoctet, $char[$i];
3117	0					0	$i += 1;
3118							}
3119
3120							# single character of multiple-octet code
3121							else {
3122	0					0	push @multipleoctet, $char[$i];
3123	0					0	$i += 1;
3124							}
3125							}
3126
3127							# quote metachar
3128	0					0	for (@singleoctet) {
3129	0	0				0	if ($_ eq '...') {
		0
		0
		0
		0
3130	0					0	$_ = '-';
3131							}
3132							elsif (/\A \n \z/oxms) {
3133	0					0	$_ = '\n';
3134							}
3135							elsif (/\A \r \z/oxms) {
3136	0					0	$_ = '\r';
3137							}
3138							elsif (/\A ([\x00-\x20\x7F-\xFF]) \z/oxms) {
3139	0					0	$_ = sprintf('\x%02X', CORE::ord $1);
3140							}
3141							elsif (/\A [\x00-\xFF] \z/oxms) {
3142	0					0	$_ = quotemeta $_;
3143							}
3144							}
3145
3146							# return character list
3147	0					0	return \@singleoctet, \@multipleoctet;
3148							}
3149
3150							#
3151							# UTF-8 octal escape sequence
3152							#
3153							sub octchr {
3154	0			0	0	0	my($octdigit) = @_;
3155
3156	0					0	my @binary = ();
3157	0					0	for my $octal (split(//,$octdigit)) {
3158	0					0	push @binary, {
3159							'0' => '000',
3160							'1' => '001',
3161							'2' => '010',
3162							'3' => '011',
3163							'4' => '100',
3164							'5' => '101',
3165							'6' => '110',
3166							'7' => '111',
3167							}->{$octal};
3168							}
3169	0					0	my $binary = join '', @binary;
3170
3171	0					0	my $octchr = {
3172							# 1234567
3173							1 => pack('B*', "0000000$binary"),
3174							2 => pack('B*', "000000$binary"),
3175							3 => pack('B*', "00000$binary"),
3176							4 => pack('B*', "0000$binary"),
3177							5 => pack('B*', "000$binary"),
3178							6 => pack('B*', "00$binary"),
3179							7 => pack('B*', "0$binary"),
3180							0 => pack('B*', "$binary"),
3181
3182							}->{CORE::length($binary) % 8};
3183
3184	0					0	return $octchr;
3185							}
3186
3187							#
3188							# UTF-8 hexadecimal escape sequence
3189							#
3190							sub hexchr {
3191	0			0	0	0	my($hexdigit) = @_;
3192
3193	0					0	my $hexchr = {
3194							1 => pack('H*', "0$hexdigit"),
3195							0 => pack('H*', "$hexdigit"),
3196
3197							}->{CORE::length($_[0]) % 2};
3198
3199	0					0	return $hexchr;
3200							}
3201
3202							#
3203							# UTF-8 open character list for qr
3204							#
3205							sub charlist_qr {
3206
3207	0			0	0	0	my $modifier = pop @_;
3208	0					0	my @char = @_;
3209
3210	0					0	my($singleoctet, $multipleoctet) = _charlist(@char, $modifier);
3211	0					0	my @singleoctet = @$singleoctet;
3212	0					0	my @multipleoctet = @$multipleoctet;
3213
3214							# return character list
3215	0	0				0	if (scalar(@singleoctet) >= 1) {
3216
3217							# with /i modifier
3218	0	0				0	if ($modifier =~ m/i/oxms) {
3219	0					0	my %singleoctet_ignorecase = ();
3220	0					0	for (@singleoctet) {
3221	0		0			0	while (s/ \A \\x(..) - \\x(..) //oxms or s/ \A \\x((..)) //oxms) {
3222	0					0	for my $ord (hex($1) .. hex($2)) {
3223	0					0	my $char = CORE::chr($ord);
3224	0					0	my $uc = Eutf2::uc($char);
3225	0					0	my $fc = Eutf2::fc($char);
3226	0	0				0	if ($uc eq $fc) {
3227	0					0	$singleoctet_ignorecase{unpack 'C*', $char} = 1;
3228							}
3229							else {
3230	0	0				0	if (CORE::length($fc) == 1) {
3231	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
3232	0					0	$singleoctet_ignorecase{unpack 'C*', $fc} = 1;
3233							}
3234							else {
3235	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
3236	0					0	push @multipleoctet, join '', map {sprintf('\x%02X',$_)} unpack 'C*', $fc;
	0					0
3237							}
3238							}
3239							}
3240							}
3241	0	0				0	if ($_ ne '') {
3242	0					0	$singleoctet_ignorecase{unpack 'C*', $_} = 1;
3243							}
3244							}
3245	0					0	my $i = 0;
3246	0					0	my @singleoctet_ignorecase = ();
3247	0					0	for my $ord (0 .. 255) {
3248	0	0				0	if (exists $singleoctet_ignorecase{$ord}) {
3249	0					0	push @{$singleoctet_ignorecase[$i]}, $ord;
	0					0
3250							}
3251							else {
3252	0					0	$i++;
3253							}
3254							}
3255	0					0	@singleoctet = ();
3256	0					0	for my $range (@singleoctet_ignorecase) {
3257	0	0				0	if (ref $range) {
3258	0	0				0	if (scalar(@{$range}) == 1) {
	0	0				0
	0					0
3259	0					0	push @singleoctet, sprintf('\x%02X', @{$range}[0]);
	0					0
3260							}
3261							elsif (scalar(@{$range}) == 2) {
3262	0					0	push @singleoctet, sprintf('\x%02X\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
3263							}
3264							else {
3265	0					0	push @singleoctet, sprintf('\x%02X-\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
3266							}
3267							}
3268							}
3269							}
3270
3271	0					0	my $not_anchor = '';
3272	0					0	$not_anchor = '(?!(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF]))';
3273
3274	0					0	push @multipleoctet, join('', $not_anchor, '[', @singleoctet, ']' );
3275							}
3276	0	0				0	if (scalar(@multipleoctet) >= 2) {
3277	0					0	return '(?:' . join('\|', @multipleoctet) . ')';
3278							}
3279							else {
3280	0					0	return $multipleoctet[0];
3281							}
3282							}
3283
3284							#
3285							# UTF-8 open character list for not qr
3286							#
3287							sub charlist_not_qr {
3288
3289	0			0	0	0	my $modifier = pop @_;
3290	0					0	my @char = @_;
3291
3292	0					0	my($singleoctet, $multipleoctet) = _charlist(@char, $modifier);
3293	0					0	my @singleoctet = @$singleoctet;
3294	0					0	my @multipleoctet = @$multipleoctet;
3295
3296							# with /i modifier
3297	0	0				0	if ($modifier =~ m/i/oxms) {
3298	0					0	my %singleoctet_ignorecase = ();
3299	0					0	for (@singleoctet) {
3300	0		0			0	while (s/ \A \\x(..) - \\x(..) //oxms or s/ \A \\x((..)) //oxms) {
3301	0					0	for my $ord (hex($1) .. hex($2)) {
3302	0					0	my $char = CORE::chr($ord);
3303	0					0	my $uc = Eutf2::uc($char);
3304	0					0	my $fc = Eutf2::fc($char);
3305	0	0				0	if ($uc eq $fc) {
3306	0					0	$singleoctet_ignorecase{unpack 'C*', $char} = 1;
3307							}
3308							else {
3309	0	0				0	if (CORE::length($fc) == 1) {
3310	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
3311	0					0	$singleoctet_ignorecase{unpack 'C*', $fc} = 1;
3312							}
3313							else {
3314	0					0	$singleoctet_ignorecase{unpack 'C*', $uc} = 1;
3315	0					0	push @multipleoctet, join '', map {sprintf('\x%02X',$_)} unpack 'C*', $fc;
	0					0
3316							}
3317							}
3318							}
3319							}
3320	0	0				0	if ($_ ne '') {
3321	0					0	$singleoctet_ignorecase{unpack 'C*', $_} = 1;
3322							}
3323							}
3324	0					0	my $i = 0;
3325	0					0	my @singleoctet_ignorecase = ();
3326	0					0	for my $ord (0 .. 255) {
3327	0	0				0	if (exists $singleoctet_ignorecase{$ord}) {
3328	0					0	push @{$singleoctet_ignorecase[$i]}, $ord;
	0					0
3329							}
3330							else {
3331	0					0	$i++;
3332							}
3333							}
3334	0					0	@singleoctet = ();
3335	0					0	for my $range (@singleoctet_ignorecase) {
3336	0	0				0	if (ref $range) {
3337	0	0				0	if (scalar(@{$range}) == 1) {
	0	0				0
	0					0
3338	0					0	push @singleoctet, sprintf('\x%02X', @{$range}[0]);
	0					0
3339							}
3340							elsif (scalar(@{$range}) == 2) {
3341	0					0	push @singleoctet, sprintf('\x%02X\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
3342							}
3343							else {
3344	0					0	push @singleoctet, sprintf('\x%02X-\x%02X', @{$range}[0], @{$range}[-1]);
	0					0
	0					0
3345							}
3346							}
3347							}
3348							}
3349
3350							# return character list
3351	0	0				0	if (scalar(@multipleoctet) >= 1) {
3352	0	0				0	if (scalar(@singleoctet) >= 1) {
3353
3354							# any character other than multiple-octet and single octet character class
3355	0					0	return '(?!' . join('\|', @multipleoctet) . ')(?:[^\x80-\xFF' . join('', @singleoctet) . ']\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])';
3356							}
3357							else {
3358
3359							# any character other than multiple-octet character class
3360	0					0	return '(?!' . join('\|', @multipleoctet) . ")(?:$your_char)";
3361							}
3362							}
3363							else {
3364	0	0				0	if (scalar(@singleoctet) >= 1) {
3365
3366							# any character other than single octet character class
3367	0					0	return '(?:[^\x80-\xFF' . join('', @singleoctet) . ']\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])';
3368							}
3369							else {
3370
3371							# any character
3372	0					0	return "(?:$your_char)";
3373							}
3374							}
3375							}
3376
3377							#
3378							# open file in read mode
3379							#
3380							sub _open_r {
3381	302			302		9136	my(undef,$file) = @_;
3382	302					1217	$file =~ s#\A (\s) #./$1#oxms;
3383	302		33			25714	return CORE::eval(q{open($_[0],'<',$_[1])}) \|\|
3384							open($_[0],"< $file\0");
3385							}
3386
3387							#
3388							# open file in write mode
3389							#
3390							sub _open_w {
3391	0			0		0	my(undef,$file) = @_;
3392	0					0	$file =~ s#\A (\s) #./$1#oxms;
3393	0		0			0	return CORE::eval(q{open($_[0],'>',$_[1])}) \|\|
3394							open($_[0],"> $file\0");
3395							}
3396
3397							#
3398							# open file in append mode
3399							#
3400							sub _open_a {
3401	0			0		0	my(undef,$file) = @_;
3402	0					0	$file =~ s#\A (\s) #./$1#oxms;
3403	0		0			0	return CORE::eval(q{open($_[0],'>>',$_[1])}) \|\|
3404							open($_[0],">> $file\0");
3405							}
3406
3407							#
3408							# safe system
3409							#
3410							sub _systemx {
3411
3412							# P.707 29.2.33. exec
3413							# in Chapter 29: Functions
3414							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
3415							#
3416							# Be aware that in older releases of Perl, exec (and system) did not flush
3417							# your output buffer, so you needed to enable command buffering by setting $\|
3418							# on one or more filehandles to avoid lost output in the case of exec, or
3419							# misordererd output in the case of system. This situation was largely remedied
3420							# in the 5.6 release of Perl. (So, 5.005 release not yet.)
3421
3422							# P.855 exec
3423							# in Chapter 27: Functions
3424							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
3425							#
3426							# In very old release of Perl (before v5.6), exec (and system) did not flush
3427							# your output buffer, so you needed to enable command buffering by setting $\|
3428							# on one or more filehandles to avoid lost output with exec or misordered
3429							# output with system.
3430
3431	302			302		912	$\| = 1;
3432
3433							# P.565 23.1.2. Cleaning Up Your Environment
3434							# in Chapter 23: Security
3435							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
3436
3437							# P.656 Cleaning Up Your Environment
3438							# in Chapter 20: Security
3439							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
3440
3441							# local $ENV{'PATH'} = '.';
3442	302					2571	local @ENV{qw(IFS CDPATH ENV BASH_ENV)}; # Make %ENV safer
3443
3444							# P.707 29.2.33. exec
3445							# in Chapter 29: Functions
3446							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
3447							#
3448							# As we mentioned earlier, exec treats a discrete list of arguments as an
3449							# indication that it should bypass shell processing. However, there is one
3450							# place where you might still get tripped up. The exec call (and system, too)
3451							# will not distinguish between a single scalar argument and an array containing
3452							# only one element.
3453							#
3454							# @args = ("echo surprise"); # just one element in list
3455							# exec @args # still subject to shell escapes
3456							# or die "exec: $!"; # because @args == 1
3457							#
3458							# To avoid this, you can use the PATHNAME syntax, explicitly duplicating the
3459							# first argument as the pathname, which forces the rest of the arguments to be
3460							# interpreted as a list, even if there is only one of them:
3461							#
3462							# exec { $args[0] } @args # safe even with one-argument list
3463							# or die "can't exec @args: $!";
3464
3465							# P.855 exec
3466							# in Chapter 27: Functions
3467							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
3468							#
3469							# As we mentioned earlier, exec treats a discrete list of arguments as a
3470							# directive to bypass shell processing. However, there is one place where
3471							# you might still get tripped up. The exec call (and system, too) cannot
3472							# distinguish between a single scalar argument and an array containing
3473							# only one element.
3474							#
3475							# @args = ("echo surprise"); # just one element in list
3476							# exec @args # still subject to shell escapes
3477							# \|\| die "exec: $!"; # because @args == 1
3478							#
3479							# To avoid this, use the PATHNAME syntax, explicitly duplicating the first
3480							# argument as the pathname, which forces the rest of the arguments to be
3481							# interpreted as a list, even if there is only one of them:
3482							#
3483							# exec { $args[0] } @args # safe even with one-argument list
3484							# \|\| die "can't exec @args: $!";
3485
3486	302					543	return CORE::system { $_[0] } @_; # safe even with one-argument list
	302					41324479
3487							}
3488
3489							#
3490							# UTF-8 order to character (with parameter)
3491							#
3492							sub Eutf2::chr(;$) {
3493
3494	0	0		0	0		my $c = @_ ? $_[0] : $_;
3495
3496	0	0					if ($c == 0x00) {
3497	0						return "\x00";
3498							}
3499							else {
3500	0						my @chr = ();
3501	0						while ($c > 0) {
3502	0						unshift @chr, ($c % 0x100);
3503	0						$c = int($c / 0x100);
3504							}
3505	0						return pack 'C*', @chr;
3506							}
3507							}
3508
3509							#
3510							# UTF-8 order to character (without parameter)
3511							#
3512							sub Eutf2::chr_() {
3513
3514	0			0	0		my $c = $_;
3515
3516	0	0					if ($c == 0x00) {
3517	0						return "\x00";
3518							}
3519							else {
3520	0						my @chr = ();
3521	0						while ($c > 0) {
3522	0						unshift @chr, ($c % 0x100);
3523	0						$c = int($c / 0x100);
3524							}
3525	0						return pack 'C*', @chr;
3526							}
3527							}
3528
3529							#
3530							# UTF-8 path globbing (with parameter)
3531							#
3532							sub Eutf2::glob($) {
3533
3534	0	0		0	0		if (wantarray) {
3535	0						my @glob = _DOS_like_glob(@_);
3536	0						for my $glob (@glob) {
3537	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
3538							}
3539	0						return @glob;
3540							}
3541							else {
3542	0						my $glob = _DOS_like_glob(@_);
3543	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
3544	0						return $glob;
3545							}
3546							}
3547
3548							#
3549							# UTF-8 path globbing (without parameter)
3550							#
3551							sub Eutf2::glob_() {
3552
3553	0	0		0	0		if (wantarray) {
3554	0						my @glob = _DOS_like_glob();
3555	0						for my $glob (@glob) {
3556	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
3557							}
3558	0						return @glob;
3559							}
3560							else {
3561	0						my $glob = _DOS_like_glob();
3562	0						$glob =~ s{ \A (?:\./)+ }{}oxms;
3563	0						return $glob;
3564							}
3565							}
3566
3567							#
3568							# UTF-8 path globbing via File::DosGlob 1.10
3569							#
3570							# Often I confuse "_dosglob" and "_doglob".
3571							# So, I renamed "_dosglob" to "_DOS_like_glob".
3572							#
3573							my %iter;
3574							my %entries;
3575							sub _DOS_like_glob {
3576
3577							# context (keyed by second cxix argument provided by core)
3578	0			0			my($expr,$cxix) = @_;
3579
3580							# glob without args defaults to $_
3581	0	0					$expr = $_ if not defined $expr;
3582
3583							# represents the current user's home directory
3584							#
3585							# 7.3. Expanding Tildes in Filenames
3586							# in Chapter 7. File Access
3587							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
3588							#
3589							# and File::HomeDir, File::HomeDir::Windows module
3590
3591							# DOS-like system
3592	0	0					if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
3593	0						$expr =~ s{ \A ~ (?= [^/\\] ) }
3594	0						{ my_home_MSWin32() }oxmse;
3595							}
3596
3597							# UNIX-like system
3598							else {
3599	0						$expr =~ s{ \A ~ ( (?:[^\x80-\xFF/]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])* ) }
3600	0	0	0				{ $1 ? (CORE::eval(q{(getpwnam($1))[7]})\|\|my_home()) : my_home() }oxmse;
3601							}
3602
3603							# assume global context if not provided one
3604	0	0					$cxix = '_G_' if not defined $cxix;
3605	0	0					$iter{$cxix} = 0 if not exists $iter{$cxix};
3606
3607							# if we're just beginning, do it all first
3608	0	0					if ($iter{$cxix} == 0) {
3609	0						$entries{$cxix} = [ _do_glob(1, _parse_line($expr)) ];
3610							}
3611
3612							# chuck it all out, quick or slow
3613	0	0					if (wantarray) {
3614	0						delete $iter{$cxix};
3615	0						return @{delete $entries{$cxix}};
	0
3616							}
3617							else {
3618	0	0					if ($iter{$cxix} = scalar @{$entries{$cxix}}) {
	0
3619	0						return shift @{$entries{$cxix}};
	0
3620							}
3621							else {
3622							# return undef for EOL
3623	0						delete $iter{$cxix};
3624	0						delete $entries{$cxix};
3625	0						return undef;
3626							}
3627							}
3628							}
3629
3630							#
3631							# UTF-8 path globbing subroutine
3632							#
3633							sub _do_glob {
3634
3635	0			0			my($cond,@expr) = @_;
3636	0						my @glob = ();
3637	0						my $fix_drive_relative_paths = 0;
3638
3639							OUTER:
3640	0						for my $expr (@expr) {
3641	0	0					next OUTER if not defined $expr;
3642	0	0					next OUTER if $expr eq '';
3643
3644	0						my @matched = ();
3645	0						my @globdir = ();
3646	0						my $head = '.';
3647	0						my $pathsep = '/';
3648	0						my $tail;
3649
3650							# if argument is within quotes strip em and do no globbing
3651	0	0					if ($expr =~ /\A " ((?:$q_char)*?) " \z/oxms) {
3652	0						$expr = $1;
3653	0	0					if ($cond eq 'd') {
3654	0	0					if (-d $expr) {
3655	0						push @glob, $expr;
3656							}
3657							}
3658							else {
3659	0	0					if (-e $expr) {
3660	0						push @glob, $expr;
3661							}
3662							}
3663	0						next OUTER;
3664							}
3665
3666							# wildcards with a drive prefix such as h:*.pm must be changed
3667							# to h:./*.pm to expand correctly
3668	0	0					if ($^O =~ /\A (?: MSWin32 \| NetWare \| symbian \| dos ) \z/oxms) {
3669	0	0					if ($expr =~ s# \A ((?:[A-Za-z]:)?) ([^\x80-\xFF/\\]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF]) #$1./$2#oxms) {
3670	0						$fix_drive_relative_paths = 1;
3671							}
3672							}
3673
3674	0	0					if (($head, $tail) = _parse_path($expr,$pathsep)) {
3675	0	0					if ($tail eq '') {
3676	0						push @glob, $expr;
3677	0						next OUTER;
3678							}
3679	0	0					if ($head =~ / \A (?:$q_char)? [?] /oxms) {
3680	0	0					if (@globdir = _do_glob('d', $head)) {
3681	0						push @glob, _do_glob($cond, map {"$_$pathsep$tail"} @globdir);
	0
3682	0						next OUTER;
3683							}
3684							}
3685	0	0	0				if ($head eq '' or $head =~ /\A [A-Za-z]: \z/oxms) {
3686	0						$head .= $pathsep;
3687							}
3688	0						$expr = $tail;
3689							}
3690
3691							# If file component has no wildcards, we can avoid opendir
3692	0	0					if ($expr !~ / \A (?:$q_char)? [?] /oxms) {
3693	0	0					if ($head eq '.') {
3694	0						$head = '';
3695							}
3696	0	0	0				if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
3697	0						$head .= $pathsep;
3698							}
3699	0						$head .= $expr;
3700	0	0					if ($cond eq 'd') {
3701	0	0					if (-d $head) {
3702	0						push @glob, $head;
3703							}
3704							}
3705							else {
3706	0	0					if (-e $head) {
3707	0						push @glob, $head;
3708							}
3709							}
3710	0						next OUTER;
3711							}
3712	0	0					opendir(*DIR, $head) or next OUTER;
3713	0						my @leaf = readdir DIR;
3714	0						closedir DIR;
3715
3716	0	0					if ($head eq '.') {
3717	0						$head = '';
3718							}
3719	0	0	0				if ($head ne '' and ($head =~ / \G ($q_char) /oxmsg)[-1] ne $pathsep) {
3720	0						$head .= $pathsep;
3721							}
3722
3723	0						my $pattern = '';
3724	0						while ($expr =~ / \G ($q_char) /oxgc) {
3725	0						my $char = $1;
3726
3727							# 6.9. Matching Shell Globs as Regular Expressions
3728							# in Chapter 6. Pattern Matching
3729							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
3730							# (and so on)
3731
3732	0	0					if ($char eq '*') {
		0
		0
3733	0						$pattern .= "(?:$your_char)*",
3734							}
3735							elsif ($char eq '?') {
3736	0						$pattern .= "(?:$your_char)?", # DOS style
3737							# $pattern .= "(?:$your_char)", # UNIX style
3738							}
3739							elsif ((my $fc = Eutf2::fc($char)) ne $char) {
3740	0						$pattern .= $fc;
3741							}
3742							else {
3743	0						$pattern .= quotemeta $char;
3744							}
3745							}
3746	0			0			my $matchsub = sub { Eutf2::fc($_[0]) =~ /\A $pattern \z/xms };
	0
3747
3748							# if ($@) {
3749							# print STDERR "$0: $@\n";
3750							# next OUTER;
3751							# }
3752
3753							INNER:
3754	0						for my $leaf (@leaf) {
3755	0	0	0				if ($leaf eq '.' or $leaf eq '..') {
3756	0						next INNER;
3757							}
3758	0	0	0				if ($cond eq 'd' and not -d "$head$leaf") {
3759	0						next INNER;
3760							}
3761
3762	0	0					if (&$matchsub($leaf)) {
3763	0						push @matched, "$head$leaf";
3764	0						next INNER;
3765							}
3766
3767							# [DOS compatibility special case]
3768							# Failed, add a trailing dot and try again, but only...
3769
3770	0	0	0				if (Eutf2::index($leaf,'.') == -1 and # if name does not have a dot in it and
			0
3771							CORE::length($leaf) <= 8 and # name is shorter than or equal to 8 chars and
3772							Eutf2::index($pattern,'\\.') != -1 # pattern has a dot.
3773							) {
3774	0	0					if (&$matchsub("$leaf.")) {
3775	0						push @matched, "$head$leaf";
3776	0						next INNER;
3777							}
3778							}
3779							}
3780	0	0					if (@matched) {
3781	0						push @glob, @matched;
3782							}
3783							}
3784	0	0					if ($fix_drive_relative_paths) {
3785	0						for my $glob (@glob) {
3786	0						$glob =~ s# \A ([A-Za-z]:) \./ #$1#oxms;
3787							}
3788							}
3789	0						return @glob;
3790							}
3791
3792							#
3793							# UTF-8 parse line
3794							#
3795							sub _parse_line {
3796
3797	0			0			my($line) = @_;
3798
3799	0						$line .= ' ';
3800	0						my @piece = ();
3801	0						while ($line =~ /
3802							" ( (?>(?: [^\x80-\xFF"] \|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] )* ) ) " (?>\s+) \|
3803							( (?>(?: [^\x80-\xFF"\s]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] )* ) ) (?>\s+)
3804							/oxmsg
3805							) {
3806	0	0					push @piece, defined($1) ? $1 : $2;
3807							}
3808	0						return @piece;
3809							}
3810
3811							#
3812							# UTF-8 parse path
3813							#
3814							sub _parse_path {
3815
3816	0			0			my($path,$pathsep) = @_;
3817
3818	0						$path .= '/';
3819	0						my @subpath = ();
3820	0						while ($path =~ /
3821							((?: [^\x80-\xFF\/\\]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] )+?) [\/\\]
3822							/oxmsg
3823							) {
3824	0						push @subpath, $1;
3825							}
3826
3827	0						my $tail = pop @subpath;
3828	0						my $head = join $pathsep, @subpath;
3829	0						return $head, $tail;
3830							}
3831
3832							#
3833							# via File::HomeDir::Windows 1.00
3834							#
3835							sub my_home_MSWin32 {
3836
3837							# A lot of unix people and unix-derived tools rely on
3838							# the ability to overload HOME. We will support it too
3839							# so that they can replace raw HOME calls with File::HomeDir.
3840	0	0	0	0	0		if (exists $ENV{'HOME'} and $ENV{'HOME'}) {
		0	0
		0	0
			0
			0
3841	0						return $ENV{'HOME'};
3842							}
3843
3844							# Do we have a user profile?
3845							elsif (exists $ENV{'USERPROFILE'} and $ENV{'USERPROFILE'}) {
3846	0						return $ENV{'USERPROFILE'};
3847							}
3848
3849							# Some Windows use something like $ENV{'HOME'}
3850							elsif (exists $ENV{'HOMEDRIVE'} and exists $ENV{'HOMEPATH'} and $ENV{'HOMEDRIVE'} and $ENV{'HOMEPATH'}) {
3851	0						return join '', $ENV{'HOMEDRIVE'}, $ENV{'HOMEPATH'};
3852							}
3853
3854	0						return undef;
3855							}
3856
3857							#
3858							# via File::HomeDir::Unix 1.00
3859							#
3860							sub my_home {
3861	0			0	0		my $home;
3862
3863	0	0	0				if (exists $ENV{'HOME'} and defined $ENV{'HOME'}) {
		0	0
3864	0						$home = $ENV{'HOME'};
3865							}
3866
3867							# This is from the original code, but I'm guessing
3868							# it means "login directory" and exists on some Unixes.
3869							elsif (exists $ENV{'LOGDIR'} and $ENV{'LOGDIR'}) {
3870	0						$home = $ENV{'LOGDIR'};
3871							}
3872
3873							### More-desperate methods
3874
3875							# Light desperation on any (Unixish) platform
3876							else {
3877	0						$home = CORE::eval q{ (getpwuid($<))[7] };
3878							}
3879
3880							# On Unix in general, a non-existant home means "no home"
3881							# For example, "nobody"-like users might use /nonexistant
3882	0	0	0				if (defined $home and ! -d($home)) {
3883	0						$home = undef;
3884							}
3885	0						return $home;
3886							}
3887
3888							#
3889							# ${^PREMATCH}, $PREMATCH, $` the string preceding what was matched
3890							#
3891							sub Eutf2::PREMATCH {
3892	0			0	0		return $`;
3893							}
3894
3895							#
3896							# ${^MATCH}, $MATCH, $& the string that matched
3897							#
3898							sub Eutf2::MATCH {
3899	0			0	0		return $&;
3900							}
3901
3902							#
3903							# ${^POSTMATCH}, $POSTMATCH, $' the string following what was matched
3904							#
3905							sub Eutf2::POSTMATCH {
3906	0			0	0		return $';
3907							}
3908
3909							#
3910							# UTF-8 character to order (with parameter)
3911							#
3912							sub UTF2::ord(;$) {
3913
3914	0	0		0	1		local $_ = shift if @_;
3915
3916	0	0					if (/\A ($q_char) /oxms) {
3917	0						my @ord = unpack 'C*', $1;
3918	0						my $ord = 0;
3919	0						while (my $o = shift @ord) {
3920	0						$ord = $ord * 0x100 + $o;
3921							}
3922	0						return $ord;
3923							}
3924							else {
3925	0						return CORE::ord $_;
3926							}
3927							}
3928
3929							#
3930							# UTF-8 character to order (without parameter)
3931							#
3932							sub UTF2::ord_() {
3933
3934	0	0		0	0		if (/\A ($q_char) /oxms) {
3935	0						my @ord = unpack 'C*', $1;
3936	0						my $ord = 0;
3937	0						while (my $o = shift @ord) {
3938	0						$ord = $ord * 0x100 + $o;
3939							}
3940	0						return $ord;
3941							}
3942							else {
3943	0						return CORE::ord $_;
3944							}
3945							}
3946
3947							#
3948							# UTF-8 reverse
3949							#
3950							sub UTF2::reverse(@) {
3951
3952	0	0		0	0		if (wantarray) {
3953	0						return CORE::reverse @_;
3954							}
3955							else {
3956
3957							# One of us once cornered Larry in an elevator and asked him what
3958							# problem he was solving with this, but he looked as far off into
3959							# the distance as he could in an elevator and said, "It seemed like
3960							# a good idea at the time."
3961
3962	0						return join '', CORE::reverse(join('',@_) =~ /\G ($q_char) /oxmsg);
3963							}
3964							}
3965
3966							#
3967							# UTF-8 getc (with parameter, without parameter)
3968							#
3969							sub UTF2::getc(;*@) {
3970
3971	0			0	0		my($package) = caller;
3972	0	0					my $fh = @_ ? qualify_to_ref(shift,$package) : \*STDIN;
3973	0	0	0				croak 'Too many arguments for UTF2::getc' if @_ and not wantarray;
3974
3975	0						my @length = sort { $a <=> $b } keys %range_tr;
	0
3976	0						my $getc = '';
3977	0						for my $length ($length[0] .. $length[-1]) {
3978	0						$getc .= CORE::getc($fh);
3979	0	0					if (exists $range_tr{CORE::length($getc)}) {
3980	0	0					if ($getc =~ /\A ${Eutf2::dot_s} \z/oxms) {
3981	0	0					return wantarray ? ($getc,@_) : $getc;
3982							}
3983							}
3984							}
3985	0	0					return wantarray ? ($getc,@_) : $getc;
3986							}
3987
3988							#
3989							# UTF-8 length by character
3990							#
3991							sub UTF2::length(;$) {
3992
3993	0	0		0	1		local $_ = shift if @_;
3994
3995	0						local @_ = /\G ($q_char) /oxmsg;
3996	0						return scalar @_;
3997							}
3998
3999							#
4000							# UTF-8 substr by character
4001							#
4002							BEGIN {
4003
4004							# P.232 The lvalue Attribute
4005							# in Chapter 6: Subroutines
4006							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
4007
4008							# P.336 The lvalue Attribute
4009							# in Chapter 7: Subroutines
4010							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
4011
4012							# P.144 8.4 Lvalue subroutines
4013							# in Chapter 8: perlsub: Perl subroutines
4014							# of ISBN-13: 978-1-906966-02-7 The Perl Language Reference Manual (for Perl version 5.12.1)
4015
4016	302	50	0	302	1	266930	CORE::eval sprintf(<<'END', ($] >= 5.014000) ? ':lvalue' : '');
	0	0		0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
	0
4017							# vv----------------------*******
4018							sub UTF2::substr($$;$$) %s {
4019
4020							my @char = $_[0] =~ /\G (?>$q_char) /oxmsg;
4021
4022							# If the substring is beyond either end of the string, substr() returns the undefined
4023							# value and produces a warning. When used as an lvalue, specifying a substring that
4024							# is entirely outside the string raises an exception.
4025							# http://perldoc.perl.org/functions/substr.html
4026
4027							# A return with no argument returns the scalar value undef in scalar context,
4028							# an empty list () in list context, and (naturally) nothing at all in void
4029							# context.
4030
4031							my $offset = $_[1];
4032							if (($offset > scalar(@char)) or ($offset < (-1 * scalar(@char)))) {
4033							return;
4034							}
4035
4036							# substr($string,$offset,$length,$replacement)
4037							if (@_ == 4) {
4038							my(undef,undef,$length,$replacement) = @_;
4039							my $substr = join '', splice(@char, $offset, $length, $replacement);
4040							$_[0] = join '', @char;
4041
4042							# return $substr; this doesn't work, don't say "return"
4043							$substr;
4044							}
4045
4046							# substr($string,$offset,$length)
4047							elsif (@_ == 3) {
4048							my(undef,undef,$length) = @_;
4049							my $octet_offset = 0;
4050							my $octet_length = 0;
4051							if ($offset == 0) {
4052							$octet_offset = 0;
4053							}
4054							elsif ($offset > 0) {
4055							$octet_offset = CORE::length(join '', @char[0..$offset-1]);
4056							}
4057							else {
4058							$octet_offset = -1 * CORE::length(join '', @char[$#char+$offset+1..$#char]);
4059							}
4060							if ($length == 0) {
4061							$octet_length = 0;
4062							}
4063							elsif ($length > 0) {
4064							$octet_length = CORE::length(join '', @char[$offset..$offset+$length-1]);
4065							}
4066							else {
4067							$octet_length = -1 * CORE::length(join '', @char[$#char+$length+1..$#char]);
4068							}
4069							CORE::substr($_[0], $octet_offset, $octet_length);
4070							}
4071
4072							# substr($string,$offset)
4073							else {
4074							my $octet_offset = 0;
4075							if ($offset == 0) {
4076							$octet_offset = 0;
4077							}
4078							elsif ($offset > 0) {
4079							$octet_offset = CORE::length(join '', @char[0..$offset-1]);
4080							}
4081							else {
4082							$octet_offset = -1 * CORE::length(join '', @char[$#char+$offset+1..$#char]);
4083							}
4084							CORE::substr($_[0], $octet_offset);
4085							}
4086							}
4087							END
4088							}
4089
4090							#
4091							# UTF-8 index by character
4092							#
4093							sub UTF2::index($$;$) {
4094
4095	0			0	1		my $index;
4096	0	0					if (@_ == 3) {
4097	0						$index = Eutf2::index($_[0], $_[1], CORE::length(UTF2::substr($_[0], 0, $_[2])));
4098							}
4099							else {
4100	0						$index = Eutf2::index($_[0], $_[1]);
4101							}
4102
4103	0	0					if ($index == -1) {
4104	0						return -1;
4105							}
4106							else {
4107	0						return UTF2::length(CORE::substr $_[0], 0, $index);
4108							}
4109							}
4110
4111							#
4112							# UTF-8 rindex by character
4113							#
4114							sub UTF2::rindex($$;$) {
4115
4116	0			0	1		my $rindex;
4117	0	0					if (@_ == 3) {
4118	0						$rindex = Eutf2::rindex($_[0], $_[1], CORE::length(UTF2::substr($_[0], 0, $_[2])));
4119							}
4120							else {
4121	0						$rindex = Eutf2::rindex($_[0], $_[1]);
4122							}
4123
4124	0	0					if ($rindex == -1) {
4125	0						return -1;
4126							}
4127							else {
4128	0						return UTF2::length(CORE::substr $_[0], 0, $rindex);
4129							}
4130							}
4131
4132							# when 'm//', '/' means regexp match 'm//' and '?' means regexp match '??'
4133							# when 'div', '/' means division operator and '?' means conditional operator (condition ? then : else)
4134	302			302		28423	BEGIN { CORE::eval q{ use vars qw($slash) } } $slash = 'm//';
	302			302		2823
	302					611
	302					27931
4135
4136							# ord() to ord() or UTF2::ord()
4137	302			302		20488	BEGIN { CORE::eval q{ use vars qw($function_ord) } } $function_ord = 'ord';
	302			302		1571
	302					510
	302					22221
4138
4139							# ord to ord or UTF2::ord_
4140	302			302		19109	BEGIN { CORE::eval q{ use vars qw($function_ord_) } } $function_ord_ = 'ord';
	302			302		1497
	302					522
	302					22214
4141
4142							# reverse to reverse or UTF2::reverse
4143	302			302		19316	BEGIN { CORE::eval q{ use vars qw($function_reverse) } } $function_reverse = 'reverse';
	302			302		1641
	302					491
	302					23972
4144
4145							# getc to getc or UTF2::getc
4146	302			302		18398	BEGIN { CORE::eval q{ use vars qw($function_getc) } } $function_getc = 'getc';
	302			302		1487
	302					516
	302					26484
4147
4148							# P.1023 Appendix W.9 Multibyte Anchoring
4149							# of ISBN 1-56592-224-7 CJKV Information Processing
4150
4151							my $anchor = '';
4152
4153	302			302		19477	BEGIN { CORE::eval q{ use vars qw($nest) } }
	302			302		1622
	302					513
	302					20977107
4154
4155							# regexp of nested parens in qqXX
4156
4157							# P.340 Matching Nested Constructs with Embedded Code
4158							# in Chapter 7: Perl
4159							# of ISBN 0-596-00289-0 Mastering Regular Expressions, Second edition
4160
4161							my $qq_paren = qr{(?{local $nest=0}) (?>(?:
4162							[^\x80-\xFF\\()] \|
4163							\( (?{$nest++}) \|
4164							\) (?(?{$nest>0})(?{$nest--})\|(?!)))*) (?(?{$nest!=0})(?!)) \|
4165							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4166							\\ [^\x80-\xFFc] \|
4167							\\c[\x40-\x5F] \|
4168							\\ (?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4169							[\x00-\xFF]
4170							}xms;
4171
4172							my $qq_brace = qr{(?{local $nest=0}) (?>(?:
4173							[^\x80-\xFF\\{}] \|
4174							\{ (?{$nest++}) \|
4175							\} (?(?{$nest>0})(?{$nest--})\|(?!)))*) (?(?{$nest!=0})(?!)) \|
4176							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4177							\\ [^\x80-\xFFc] \|
4178							\\c[\x40-\x5F] \|
4179							\\ (?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4180							[\x00-\xFF]
4181							}xms;
4182
4183							my $qq_bracket = qr{(?{local $nest=0}) (?>(?:
4184							[^\x80-\xFF\\\[\]] \|
4185							\[ (?{$nest++}) \|
4186							\] (?(?{$nest>0})(?{$nest--})\|(?!)))*) (?(?{$nest!=0})(?!)) \|
4187							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4188							\\ [^\x80-\xFFc] \|
4189							\\c[\x40-\x5F] \|
4190							\\ (?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4191							[\x00-\xFF]
4192							}xms;
4193
4194							my $qq_angle = qr{(?{local $nest=0}) (?>(?:
4195							[^\x80-\xFF\\<>] \|
4196							\< (?{$nest++}) \|
4197							\> (?(?{$nest>0})(?{$nest--})\|(?!)))*) (?(?{$nest!=0})(?!)) \|
4198							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4199							\\ [^\x80-\xFFc] \|
4200							\\c[\x40-\x5F] \|
4201							\\ (?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4202							[\x00-\xFF]
4203							}xms;
4204
4205							my $qq_scalar = qr{(?: \{ (?:$qq_brace)*? \} \|
4206							(?: ::)? (?:
4207							(?> [a-zA-Z_][a-zA-Z_0-9]* (?: ::[a-zA-Z_][a-zA-Z_0-9]) )
4208							(?>(?: \[ (?: \$\[ \| \$\] \| $qq_char )? \] \| \{ (?:$qq_brace)? \} )*)
4209							(?>(?: (?: -> )? (?: [\$\@\%\&\]\ \| \$\#\* \| [\@\%]? \[ (?: \$\[ \| \$\] \| $qq_char )? \] \| [\@\%\]? \{ (?:$qq_brace)? \} ) ))
4210							))
4211							}xms;
4212
4213							my $qq_variable = qr{(?: \{ (?:$qq_brace)*? \} \|
4214							(?: ::)? (?:
4215							(?>[0-9]+) \|
4216							[^\x80-\xFFa-zA-Z_0-9\[\]] \|
4217							^[A-Z] \|
4218							(?> [a-zA-Z_][a-zA-Z_0-9]* (?: ::[a-zA-Z_][a-zA-Z_0-9]) )
4219							(?>(?: \[ (?: \$\[ \| \$\] \| $qq_char )? \] \| \{ (?:$qq_brace)? \} )*)
4220							(?>(?: (?: -> )? (?: [\$\@\%\&\]\ \| \$\#\* \| [\@\%]? \[ (?: \$\[ \| \$\] \| $qq_char )? \] \| [\@\%\]? \{ (?:$qq_brace)? \} ) ))
4221							))
4222							}xms;
4223
4224							my $qq_substr = qr{(?> Char::substr \| UTF2::substr \| CORE::substr \| substr ) (?>\s*) $ $qq_paren $
4225							}xms;
4226
4227							# regexp of nested parens in qXX
4228							my $q_paren = qr{(?{local $nest=0}) (?>(?:
4229							[^\x80-\xFF()] \|
4230							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4231							\( (?{$nest++}) \|
4232							\) (?(?{$nest>0})(?{$nest--})\|(?!)))*) (?(?{$nest!=0})(?!)) \|
4233							[\x00-\xFF]
4234							}xms;
4235
4236							my $q_brace = qr{(?{local $nest=0}) (?>(?:
4237							[^\x80-\xFF\{\}] \|
4238							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4239							\{ (?{$nest++}) \|
4240							\} (?(?{$nest>0})(?{$nest--})\|(?!)))*) (?(?{$nest!=0})(?!)) \|
4241							[\x00-\xFF]
4242							}xms;
4243
4244							my $q_bracket = qr{(?{local $nest=0}) (?>(?:
4245							[^\x80-\xFF\[\]] \|
4246							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4247							\[ (?{$nest++}) \|
4248							\] (?(?{$nest>0})(?{$nest--})\|(?!)))*) (?(?{$nest!=0})(?!)) \|
4249							[\x00-\xFF]
4250							}xms;
4251
4252							my $q_angle = qr{(?{local $nest=0}) (?>(?:
4253							[^\x80-\xFF<>] \|
4254							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
4255							\< (?{$nest++}) \|
4256							\> (?(?{$nest>0})(?{$nest--})\|(?!)))*) (?(?{$nest!=0})(?!)) \|
4257							[\x00-\xFF]
4258							}xms;
4259
4260							my $matched = '';
4261							my $s_matched = '';
4262
4263							my $tr_variable = ''; # variable of tr///
4264							my $sub_variable = ''; # variable of s///
4265							my $bind_operator = ''; # =~ or !~
4266
4267							my @heredoc = (); # here document
4268							my @heredoc_delimiter = ();
4269							my $here_script = ''; # here script
4270
4271							#
4272							# escape UTF-8 script
4273							#
4274							sub UTF2::escape(;$) {
4275	0	0		0	0		local($_) = $_[0] if @_;
4276
4277							# P.359 The Study Function
4278							# in Chapter 7: Perl
4279							# of ISBN 0-596-00289-0 Mastering Regular Expressions, Second edition
4280
4281	0						study $_; # Yes, I studied study yesterday.
4282
4283							# while all script
4284
4285							# 6.14. Matching from Where the Last Pattern Left Off
4286							# in Chapter 6. Pattern Matching
4287							# of ISBN 0-596-00313-7 Perl Cookbook, 2nd Edition.
4288							# (and so on)
4289
4290							# one member of Tag-team
4291							#
4292							# P.128 Start of match (or end of previous match): \G
4293							# P.130 Advanced Use of \G with Perl
4294							# in Chapter 3: Overview of Regular Expression Features and Flavors
4295							# P.255 Use leading anchors
4296							# P.256 Expose ^ and \G at the front expressions
4297							# in Chapter 6: Crafting an Efficient Expression
4298							# P.315 "Tag-team" matching with /gc
4299							# in Chapter 7: Perl
4300							# of ISBN 0-596-00289-0 Mastering Regular Expressions, Second edition
4301
4302	0						my $e_script = '';
4303	0						while (not /\G \z/oxgc) { # member
4304	0						$e_script .= UTF2::escape_token();
4305							}
4306
4307	0						return $e_script;
4308							}
4309
4310							#
4311							# escape UTF-8 token of script
4312							#
4313							sub UTF2::escape_token {
4314
4315							# \n output here document
4316
4317	0			0	0		my $ignore_modules = join('\|', qw(
4318							utf8
4319							bytes
4320							charnames
4321							I18N::Japanese
4322							I18N::Collate
4323							I18N::JExt
4324							File::DosGlob
4325							Wild
4326							Wildcard
4327							Japanese
4328							));
4329
4330							# another member of Tag-team
4331							#
4332							# P.315 "Tag-team" matching with /gc
4333							# in Chapter 7: Perl
4334							# of ISBN 0-596-00289-0 Mastering Regular Expressions, Second edition
4335
4336	0	0	0				if (/\G ( \n ) /oxgc) { # another member (and so on)
		0	0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
4337	0						my $heredoc = '';
4338	0	0					if (scalar(@heredoc_delimiter) >= 1) {
4339	0						$slash = 'm//';
4340
4341	0						$heredoc = join '', @heredoc;
4342	0						@heredoc = ();
4343
4344							# skip here document
4345	0						for my $heredoc_delimiter (@heredoc_delimiter) {
4346	0						/\G .*? \n $heredoc_delimiter \n/xmsgc;
4347							}
4348	0						@heredoc_delimiter = ();
4349
4350	0						$here_script = '';
4351							}
4352	0						return "\n" . $heredoc;
4353							}
4354
4355							# ignore space, comment
4356	0						elsif (/\G ((?>\s+)\|\#.*) /oxgc) { return $1; }
4357
4358							# if (, elsif (, unless (, while (, until (, given (, and when (
4359
4360							# given, when
4361
4362							# P.225 The given Statement
4363							# in Chapter 15: Smart Matching and given-when
4364							# of ISBN 978-0-596-52010-6 Learning Perl, Fifth Edition
4365
4366							# P.133 The given Statement
4367							# in Chapter 4: Statements and Declarations
4368							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
4369
4370							elsif (/\G ( (?: if \| elsif \| unless \| while \| until \| given \| when ) (?>\s*) \( ) /oxgc) {
4371	0						$slash = 'm//';
4372	0						return $1;
4373							}
4374
4375							# scalar variable ($scalar = ...) =~ tr///;
4376							# scalar variable ($scalar = ...) =~ s///;
4377
4378							# state
4379
4380							# P.68 Persistent, Private Variables
4381							# in Chapter 4: Subroutines
4382							# of ISBN 978-0-596-52010-6 Learning Perl, Fifth Edition
4383
4384							# P.160 Persistent Lexically Scoped Variables: state
4385							# in Chapter 4: Statements and Declarations
4386							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
4387
4388							# (and so on)
4389
4390							elsif (/\G ( \( (?>\s) (?: local \b \| my \b \| our \b \| state \b )? (?>\s) \$ $qq_scalar ) /oxgc) {
4391	0						my $e_string = e_string($1);
4392
4393	0	0					if (/\G ( (?>\s) = $qq_paren \) ) ( (?>\s) (?: =~ \| !~ ) (?>\s*) ) (?= (?: tr \| y ) \b ) /oxgc) {
		0
4394	0						$tr_variable = $e_string . e_string($1);
4395	0						$bind_operator = $2;
4396	0						$slash = 'm//';
4397	0						return '';
4398							}
4399							elsif (/\G ( (?>\s) = $qq_paren \) ) ( (?>\s) (?: =~ \| !~ ) (?>\s*) ) (?= s \b ) /oxgc) {
4400	0						$sub_variable = $e_string . e_string($1);
4401	0						$bind_operator = $2;
4402	0						$slash = 'm//';
4403	0						return '';
4404							}
4405							else {
4406	0						$slash = 'div';
4407	0						return $e_string;
4408							}
4409							}
4410
4411							# $`, ${`}, $PREMATCH, ${PREMATCH}, ${^PREMATCH} --> Eutf2::PREMATCH()
4412							elsif (/\G ( \$` \| \$\{`\} \| \$ (?>\s) PREMATCH \b \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} ) /oxmsgc) {
4413	0						$slash = 'div';
4414	0						return q{Eutf2::PREMATCH()};
4415							}
4416
4417							# $&, ${&}, $MATCH, ${MATCH}, ${^MATCH} --> Eutf2::MATCH()
4418							elsif (/\G ( \$& \| \$\{&\} \| \$ (?>\s) MATCH \b \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} ) /oxmsgc) {
4419	0						$slash = 'div';
4420	0						return q{Eutf2::MATCH()};
4421							}
4422
4423							# $', ${'} --> $', ${'}
4424							elsif (/\G ( \$' \| \$\{'\} ) /oxmsgc) {
4425	0						$slash = 'div';
4426	0						return $1;
4427							}
4428
4429							# $POSTMATCH, ${POSTMATCH}, ${^POSTMATCH} --> Eutf2::POSTMATCH()
4430							elsif (/\G ( \$ (?>\s) POSTMATCH \b \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} ) /oxmsgc) {
4431	0						$slash = 'div';
4432	0						return q{Eutf2::POSTMATCH()};
4433							}
4434
4435							# scalar variable $scalar =~ tr///;
4436							# scalar variable $scalar =~ s///;
4437							# substr() =~ tr///;
4438							# substr() =~ s///;
4439							elsif (/\G ( \$ $qq_scalar \| $qq_substr ) /oxgc) {
4440	0						my $scalar = e_string($1);
4441
4442	0	0					if (/\G ( (?>\s) (?: =~ \| !~ ) (?>\s) ) (?= (?: tr \| y ) \b ) /oxgc) {
		0
4443	0						$tr_variable = $scalar;
4444	0						$bind_operator = $1;
4445	0						$slash = 'm//';
4446	0						return '';
4447							}
4448							elsif (/\G ( (?>\s) (?: =~ \| !~ ) (?>\s) ) (?= s \b ) /oxgc) {
4449	0						$sub_variable = $scalar;
4450	0						$bind_operator = $1;
4451	0						$slash = 'm//';
4452	0						return '';
4453							}
4454							else {
4455	0						$slash = 'div';
4456	0						return $scalar;
4457							}
4458							}
4459
4460							# end of statement
4461							elsif (/\G ( [,;] ) /oxgc) {
4462	0						$slash = 'm//';
4463
4464							# clear tr/// variable
4465	0						$tr_variable = '';
4466
4467							# clear s/// variable
4468	0						$sub_variable = '';
4469
4470	0						$bind_operator = '';
4471
4472	0						return $1;
4473							}
4474
4475							# bareword
4476							elsif (/\G ( \{ (?>\s) (?: tr \| index \| rindex \| reverse ) (?>\s) \} ) /oxmsgc) {
4477	0						return $1;
4478							}
4479
4480							# $0 --> $0
4481							elsif (/\G ( \$ 0 ) /oxmsgc) {
4482	0						$slash = 'div';
4483	0						return $1;
4484							}
4485							elsif (/\G ( \$ \{ (?>\s) 0 (?>\s) \} ) /oxmsgc) {
4486	0						$slash = 'div';
4487	0						return $1;
4488							}
4489
4490							# $$ --> $$
4491							elsif (/\G ( \$ \$ ) (?![\w\{]) /oxmsgc) {
4492	0						$slash = 'div';
4493	0						return $1;
4494							}
4495
4496							# $1, $2, $3 --> $2, $3, $4 after s/// with multibyte anchoring
4497							# $1, $2, $3 --> $1, $2, $3 otherwise
4498							elsif (/\G \$ ((?>[1-9][0-9]*)) /oxmsgc) {
4499	0						$slash = 'div';
4500	0						return e_capture($1);
4501							}
4502							elsif (/\G \$ \{ (?>\s) ((?>[1-9][0-9])) (?>\s*) \} /oxmsgc) {
4503	0						$slash = 'div';
4504	0						return e_capture($1);
4505							}
4506
4507							# $$foo[ ... ] --> $ $foo->[ ... ]
4508							elsif (/\G \$ ( \$ (?> [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \[ .+? \] ) /oxmsgc) {
4509	0						$slash = 'div';
4510	0						return e_capture($1.'->'.$2);
4511							}
4512
4513							# $$foo{ ... } --> $ $foo->{ ... }
4514							elsif (/\G \$ ( \$ (?> [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \{ .+? \} ) /oxmsgc) {
4515	0						$slash = 'div';
4516	0						return e_capture($1.'->'.$2);
4517							}
4518
4519							# $$foo
4520							elsif (/\G \$ ( \$ (?> [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) /oxmsgc) {
4521	0						$slash = 'div';
4522	0						return e_capture($1);
4523							}
4524
4525							# ${ foo }
4526							elsif (/\G \$ (?>\s) \{ ( (?>\s) (?> [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* ) (?>\s*) ) \} /oxmsgc) {
4527	0						$slash = 'div';
4528	0						return '${' . $1 . '}';
4529							}
4530
4531							# ${ ... }
4532							elsif (/\G \$ (?>\s) \{ (?>\s) ( $qq_brace ) (?>\s*) \} /oxmsgc) {
4533	0						$slash = 'div';
4534	0						return e_capture($1);
4535							}
4536
4537							# variable or function
4538							# $ @ % & * $ #
4539							elsif (/\G ( (?: [\$\@\%\&\] \| \$\# \| -> \| \b sub \b) (?>\s) (?: split \| chop \| index \| rindex \| lc \| uc \| fc \| chr \| ord \| reverse \| getc \| tr \| y \| q \| qq \| qx \| qw \| m \| s \| qr \| glob \| lstat \| opendir \| stat \| unlink \| chdir ) ) \b /oxmsgc) {
4540	0						$slash = 'div';
4541	0						return $1;
4542							}
4543							# $ $ $ $ $ $ $ $ $ $ $ $ $ $
4544							# $ @ # \ ' " / ? ( ) [ ] < >
4545							elsif (/\G ( \$[\$\@\#\\\'\"\/\?\[\]\<\>] ) /oxmsgc) {
4546	0						$slash = 'div';
4547	0						return $1;
4548							}
4549
4550							# while ()
4551							elsif (/\G \b (while (?>\s) $ (?>\s) <[\$]?[A-Za-z_][A-Za-z_0-9]> (?>\s) $) \b /oxgc) {
4552	0						return $1;
4553							}
4554
4555							# while () --- glob
4556
4557							# avoid "Error: Runtime exception" of perl version 5.005_03
4558
4559							elsif (/\G \b while (?>\s) $ (?>\s) < ((?:[^\x80-\xFF>\0\a\e\f\n\r\t]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])+?) > (?>\s*) $ \b /oxgc) {
4560	0						return 'while ($_ = Eutf2::glob("' . $1 . '"))';
4561							}
4562
4563							# while (glob)
4564							elsif (/\G \b while (?>\s) $ (?>\s) glob (?>\s*) $ /oxgc) {
4565	0						return 'while ($_ = Eutf2::glob_)';
4566							}
4567
4568							# while (glob(WILDCARD))
4569							elsif (/\G \b while (?>\s) \( (?>\s) glob \b /oxgc) {
4570	0						return 'while ($_ = Eutf2::glob';
4571							}
4572
4573							# doit if, doit unless, doit while, doit until, doit for, doit when
4574	0						elsif (/\G \b ( if \| unless \| while \| until \| for \| when ) \b /oxgc) { $slash = 'm//'; return $1; }
	0
4575
4576							# subroutines of package Eutf2
4577	0						elsif (/\G \b (CORE:: \| ->(>?\s*) (?: atan2 \| [a-z]{2,})) \b /oxgc) { $slash = 'm//'; return $1; }
	0
4578	0						elsif (/\G \b Char::eval (?= (?>\s*) \{ ) /oxgc) { $slash = 'm//'; return 'eval'; }
	0
4579	0						elsif (/\G \b UTF2::eval (?= (?>\s*) \{ ) /oxgc) { $slash = 'm//'; return 'eval'; }
	0
4580	0						elsif (/\G \b Char::eval \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'eval Char::escape'; }
	0
4581	0						elsif (/\G \b UTF2::eval \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'eval UTF2::escape'; }
	0
4582	0						elsif (/\G \b bytes::substr \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'substr'; }
	0
4583	0						elsif (/\G \b chop \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Eutf2::chop'; }
	0
4584	0						elsif (/\G \b bytes::index \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'index'; }
	0
4585	0						elsif (/\G \b Char::index \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Char::index'; }
	0
4586	0						elsif (/\G \b UTF2::index \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'UTF2::index'; }
	0
4587	0						elsif (/\G \b index \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Eutf2::index'; }
	0
4588	0						elsif (/\G \b bytes::rindex \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'rindex'; }
	0
4589	0						elsif (/\G \b Char::rindex \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Char::rindex'; }
	0
4590	0						elsif (/\G \b UTF2::rindex \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'UTF2::rindex'; }
	0
4591	0						elsif (/\G \b rindex \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Eutf2::rindex'; }
	0
4592	0						elsif (/\G \b lc (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'm//'; return 'Eutf2::lc'; }
	0
4593	0						elsif (/\G \b lcfirst (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'm//'; return 'Eutf2::lcfirst'; }
	0
4594	0						elsif (/\G \b uc (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'm//'; return 'Eutf2::uc'; }
	0
4595	0						elsif (/\G \b ucfirst (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'm//'; return 'Eutf2::ucfirst'; }
	0
4596	0						elsif (/\G \b fc (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'm//'; return 'Eutf2::fc'; }
	0
4597
4598							# "-s '' ..." means file test "-s 'filename' ..." (not means "- s/// ...")
4599	0						elsif (/\G -s (?>\s*) (\") ((?:$qq_char)+?) (\") /oxgc) { $slash = 'm//'; return '-s ' . e_qq('', $1,$3,$2); }
	0
4600	0						elsif (/\G -s (?>\s+) qq (?>\s*) (\#) ((?:$qq_char)+?) (\#) /oxgc) { $slash = 'm//'; return '-s ' . e_qq('qq',$1,$3,$2); }
	0
4601	0						elsif (/\G -s (?>\s+) qq (?>\s*) ($) ((?:$qq_paren)+?) ($) /oxgc) { $slash = 'm//'; return '-s ' . e_qq('qq',$1,$3,$2); }
	0
4602	0						elsif (/\G -s (?>\s+) qq (?>\s*) (\{) ((?:$qq_brace)+?) (\}) /oxgc) { $slash = 'm//'; return '-s ' . e_qq('qq',$1,$3,$2); }
	0
4603	0						elsif (/\G -s (?>\s+) qq (?>\s*) (\[) ((?:$qq_bracket)+?) (\]) /oxgc) { $slash = 'm//'; return '-s ' . e_qq('qq',$1,$3,$2); }
	0
4604	0						elsif (/\G -s (?>\s+) qq (?>\s*) (\<) ((?:$qq_angle)+?) (\>) /oxgc) { $slash = 'm//'; return '-s ' . e_qq('qq',$1,$3,$2); }
	0
4605	0						elsif (/\G -s (?>\s+) qq (?>\s*) (\S) ((?:$qq_char)+?) (\1) /oxgc) { $slash = 'm//'; return '-s ' . e_qq('qq',$1,$3,$2); }
	0
4606
4607	0						elsif (/\G -s (?>\s*) (\') ((?:\\\'\|\\\\\|$q_char)+?) (\') /oxgc) { $slash = 'm//'; return '-s ' . e_q ('', $1,$3,$2); }
	0
4608	0						elsif (/\G -s (?>\s+) q (?>\s*) (\#) ((?:\\\#\|\\\\\|$q_char)+?) (\#) /oxgc) { $slash = 'm//'; return '-s ' . e_q ('q', $1,$3,$2); }
	0
4609	0						elsif (/\G -s (?>\s+) q (?>\s*) ($) ((?:\\$\|\\\\\|$q_paren)+?) (\)) /oxgc) { $slash = 'm//'; return '-s ' . e_q ('q', $1,$3,$2); }
	0
4610	0						elsif (/\G -s (?>\s+) q (?>\s*) (\{) ((?:\\\}\|\\\\\|$q_brace)+?) (\}) /oxgc) { $slash = 'm//'; return '-s ' . e_q ('q', $1,$3,$2); }
	0
4611	0						elsif (/\G -s (?>\s+) q (?>\s*) (\[) ((?:\\\]\|\\\\\|$q_bracket)+?) (\]) /oxgc) { $slash = 'm//'; return '-s ' . e_q ('q', $1,$3,$2); }
	0
4612	0						elsif (/\G -s (?>\s+) q (?>\s*) (\<) ((?:\\\>\|\\\\\|$q_angle)+?) (\>) /oxgc) { $slash = 'm//'; return '-s ' . e_q ('q', $1,$3,$2); }
	0
4613	0						elsif (/\G -s (?>\s+) q (?>\s*) (\S) ((?:\\\1\|\\\\\|$q_char)+?) (\1) /oxgc) { $slash = 'm//'; return '-s ' . e_q ('q', $1,$3,$2); }
	0
4614
4615							elsif (/\G -s (?>\s) (\$ (?> \w+ (?: ::\w+) ) (?: (?: ->)? (?: [\$\@\%\&\]\ \| \$\#\* \| $ (?:$qq_paren)? $ \| [\@\%\]? \{ (?:$qq_brace)+? \} \| [\@\%]? \[ (?:$qq_bracket)+? \] ) )*) /oxgc)
4616	0						{ $slash = 'm//'; return "-s $1"; }
	0
4617	0						elsif (/\G -s (?>\s) $ ((?:$qq_paren)?) $ /oxgc) { $slash = 'm//'; return "-s ($1)"; }
	0
4618	0						elsif (/\G -s (?= (?>\s+) [a-z]+) /oxgc) { $slash = 'm//'; return '-s'; }
	0
4619	0						elsif (/\G -s (?>\s+) ((?>\w+)) /oxgc) { $slash = 'm//'; return "-s $1"; }
	0
4620
4621	0						elsif (/\G \b bytes::length (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'm//'; return 'length'; }
	0
4622	0						elsif (/\G \b bytes::chr (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'm//'; return 'chr'; }
	0
4623	0						elsif (/\G \b chr (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'm//'; return 'Eutf2::chr'; }
	0
4624	0						elsif (/\G \b bytes::ord (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'div'; return 'ord'; }
	0
4625	0						elsif (/\G \b ord (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'div'; return $function_ord; }
	0
4626	0						elsif (/\G \b glob (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $slash = 'm//'; return 'Eutf2::glob'; }
	0
4627	0						elsif (/\G \b lc \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Eutf2::lc_'; }
	0
4628	0						elsif (/\G \b lcfirst \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Eutf2::lcfirst_'; }
	0
4629	0						elsif (/\G \b uc \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Eutf2::uc_'; }
	0
4630	0						elsif (/\G \b ucfirst \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Eutf2::ucfirst_'; }
	0
4631	0						elsif (/\G \b fc \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Eutf2::fc_'; }
	0
4632	0						elsif (/\G -s \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return '-s '; }
	0
4633
4634	0						elsif (/\G \b bytes::length \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'length'; }
	0
4635	0						elsif (/\G \b bytes::chr \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'chr'; }
	0
4636	0						elsif (/\G \b chr \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Eutf2::chr_'; }
	0
4637	0						elsif (/\G \b bytes::ord \b (?! (?>\s*) => ) /oxgc) { $slash = 'div'; return 'ord'; }
	0
4638	0						elsif (/\G \b ord \b (?! (?>\s*) => ) /oxgc) { $slash = 'div'; return $function_ord_; }
	0
4639	0						elsif (/\G \b glob \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return 'Eutf2::glob_'; }
	0
4640	0						elsif (/\G \b reverse \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return $function_reverse; }
	0
4641	0						elsif (/\G \b getc \b (?! (?>\s*) => ) /oxgc) { $slash = 'm//'; return $function_getc; }
	0
4642							# split
4643							elsif (/\G \b (split) \b (?! (?>\s*) => ) /oxgc) {
4644	0						$slash = 'm//';
4645
4646	0						my $e = '';
4647	0						while (/\G ( (?>\s+) \| \( \| \#.* ) /oxgc) {
4648	0						$e .= $1;
4649							}
4650
4651							# end of split
4652	0	0					if (/\G (?= [,;\)\}\]] ) /oxgc) { return 'Eutf2::split' . $e; }
	0	0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
4653
4654							# split scalar value
4655	0						elsif (/\G ( [\$\@\&\*] $qq_scalar ) /oxgc) { return 'Eutf2::split' . $e . e_string($1); }
4656
4657							# split literal space
4658	0						elsif (/\G \b qq (\#) [ ] (\#) /oxgc) { return 'Eutf2::split' . $e . qq {qq$1 $2}; }
4659	0						elsif (/\G \b qq ((?>\s*)) ($) [ ] ($) /oxgc) { return 'Eutf2::split' . $e . qq{$1qq$2 $3}; }
4660	0						elsif (/\G \b qq ((?>\s*)) (\{) [ ] (\}) /oxgc) { return 'Eutf2::split' . $e . qq{$1qq$2 $3}; }
4661	0						elsif (/\G \b qq ((?>\s*)) (\[) [ ] (\]) /oxgc) { return 'Eutf2::split' . $e . qq{$1qq$2 $3}; }
4662	0						elsif (/\G \b qq ((?>\s*)) (\<) [ ] (\>) /oxgc) { return 'Eutf2::split' . $e . qq{$1qq$2 $3}; }
4663	0						elsif (/\G \b qq ((?>\s*)) (\S) [ ] (\2) /oxgc) { return 'Eutf2::split' . $e . qq{$1qq$2 $3}; }
4664	0						elsif (/\G \b q (\#) [ ] (\#) /oxgc) { return 'Eutf2::split' . $e . qq {q$1 $2}; }
4665	0						elsif (/\G \b q ((?>\s*)) ($) [ ] ($) /oxgc) { return 'Eutf2::split' . $e . qq {$1q$2 $3}; }
4666	0						elsif (/\G \b q ((?>\s*)) (\{) [ ] (\}) /oxgc) { return 'Eutf2::split' . $e . qq {$1q$2 $3}; }
4667	0						elsif (/\G \b q ((?>\s*)) (\[) [ ] (\]) /oxgc) { return 'Eutf2::split' . $e . qq {$1q$2 $3}; }
4668	0						elsif (/\G \b q ((?>\s*)) (\<) [ ] (\>) /oxgc) { return 'Eutf2::split' . $e . qq {$1q$2 $3}; }
4669	0						elsif (/\G \b q ((?>\s*)) (\S) [ ] (\2) /oxgc) { return 'Eutf2::split' . $e . qq {$1q$2 $3}; }
4670	0						elsif (/\G ' [ ] ' /oxgc) { return 'Eutf2::split' . $e . qq {' '}; }
4671	0						elsif (/\G " [ ] " /oxgc) { return 'Eutf2::split' . $e . qq {" "}; }
4672
4673							# split qq//
4674							elsif (/\G \b (qq) \b /oxgc) {
4675	0	0					if (/\G (\#) ((?:$qq_char)*?) (\#) /oxgc) { return e_split($e.'qr',$1,$3,$2,''); } # qq# # --> qr # #
	0
4676							else {
4677	0						while (not /\G \z/oxgc) {
4678	0	0					if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
		0
4679	0						elsif (/\G ($) ((?:$qq_paren)*?) ($) /oxgc) { return e_split($e.'qr',$1,$3,$2,''); } # qq ( ) --> qr ( )
4680	0						elsif (/\G (\{) ((?:$qq_brace)*?) (\}) /oxgc) { return e_split($e.'qr',$1,$3,$2,''); } # qq { } --> qr { }
4681	0						elsif (/\G (\[) ((?:$qq_bracket)*?) (\]) /oxgc) { return e_split($e.'qr',$1,$3,$2,''); } # qq [ ] --> qr [ ]
4682	0						elsif (/\G (\<) ((?:$qq_angle)*?) (\>) /oxgc) { return e_split($e.'qr',$1,$3,$2,''); } # qq < > --> qr < >
4683	0						elsif (/\G ([\-:?\\^\|]) ((?:$qq_char)?) (\1) /oxgc) { return e_split($e.'qr','{','}',$2,''); } # qq \| \| --> qr { }
4684	0						elsif (/\G (\S) ((?:$qq_char)?) (\1) /oxgc) { return e_split($e.'qr',$1,$3,$2,''); } # qq * --> qr * *
4685							}
4686	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
4687							}
4688							}
4689
4690							# split qr//
4691							elsif (/\G \b (qr) \b /oxgc) {
4692	0	0					if (/\G (\#) ((?:$qq_char)?) (\#) ([imosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1,$3,$2,$4); } # qr# #
	0
4693							else {
4694	0						while (not /\G \z/oxgc) {
4695	0	0					if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
		0
		0
4696	0						elsif (/\G ($) ((?:$qq_paren)?) ($) ([imosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1, $3, $2,$4); } # qr ( )
4697	0						elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([imosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1, $3, $2,$4); } # qr { }
4698	0						elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([imosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1, $3, $2,$4); } # qr [ ]
4699	0						elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([imosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1, $3, $2,$4); } # qr < >
4700	0						elsif (/\G (\') ((?:$qq_char)?) (\') ([imosxpadlunbB]) /oxgc) { return e_split_q($e.'qr',$1, $3, $2,$4); } # qr ' '
4701	0						elsif (/\G ([\-:?\\^\|]) ((?:$qq_char)?) (\1) ([imosxpadlunbB]*) /oxgc) { return e_split ($e.'qr','{','}',$2,$4); } # qr \| \| --> qr { }
4702	0						elsif (/\G (\S) ((?:$qq_char)?) (\1) ([imosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1, $3, $2,$4); } # qr * *
4703							}
4704	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
4705							}
4706							}
4707
4708							# split q//
4709							elsif (/\G \b (q) \b /oxgc) {
4710	0	0					if (/\G (\#) ((?:\\\#\|\\\\\|$q_char)*?) (\#) /oxgc) { return e_split_q($e.'qr',$1,$3,$2,''); } # q# # --> qr # #
	0
4711							else {
4712	0						while (not /\G \z/oxgc) {
4713	0	0					if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
		0
4714	0						elsif (/\G ($) ((?:\\\\\|\\$\|\\$\|$q_paren)*?) ($) /oxgc) { return e_split_q($e.'qr',$1,$3,$2,''); } # q ( ) --> qr ( )
4715	0						elsif (/\G (\{) ((?:\\\\\|\\\}\|\\\{\|$q_brace)*?) (\}) /oxgc) { return e_split_q($e.'qr',$1,$3,$2,''); } # q { } --> qr { }
4716	0						elsif (/\G (\[) ((?:\\\\\|\\\]\|\\\[\|$q_bracket)*?) (\]) /oxgc) { return e_split_q($e.'qr',$1,$3,$2,''); } # q [ ] --> qr [ ]
4717	0						elsif (/\G (\<) ((?:\\\\\|\\\>\|\\\<\|$q_angle)*?) (\>) /oxgc) { return e_split_q($e.'qr',$1,$3,$2,''); } # q < > --> qr < >
4718	0						elsif (/\G ([\-:?\\^\|]) ((?:$q_char)?) (\1) /oxgc) { return e_split_q($e.'qr','{','}',$2,''); } # q \| \| --> qr { }
4719	0						elsif (/\G (\S) ((?:\\\\\|\\\1\| $q_char)?) (\1) /oxgc) { return e_split_q($e.'qr',$1,$3,$2,''); } # q * --> qr * *
4720							}
4721	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
4722							}
4723							}
4724
4725							# split m//
4726							elsif (/\G \b (m) \b /oxgc) {
4727	0	0					if (/\G (\#) ((?:$qq_char)?) (\#) ([cgimosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1,$3,$2,$4); } # m# # --> qr # #
	0
4728							else {
4729	0						while (not /\G \z/oxgc) {
4730	0	0					if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
		0
		0
4731	0						elsif (/\G ($) ((?:$qq_paren)?) ($) ([cgimosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1, $3, $2,$4); } # m ( ) --> qr ( )
4732	0						elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([cgimosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1, $3, $2,$4); } # m { } --> qr { }
4733	0						elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([cgimosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1, $3, $2,$4); } # m [ ] --> qr [ ]
4734	0						elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([cgimosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1, $3, $2,$4); } # m < > --> qr < >
4735	0						elsif (/\G (\') ((?:$qq_char)?) (\') ([cgimosxpadlunbB]) /oxgc) { return e_split_q($e.'qr',$1, $3, $2,$4); } # m ' ' --> qr ' '
4736	0						elsif (/\G ([\-:?\\^\|]) ((?:$qq_char)?) (\1) ([cgimosxpadlunbB]*) /oxgc) { return e_split ($e.'qr','{','}',$2,$4); } # m \| \| --> qr { }
4737	0						elsif (/\G (\S) ((?:$qq_char)?) (\1) ([cgimosxpadlunbB]) /oxgc) { return e_split ($e.'qr',$1, $3, $2,$4); } # m * * --> qr * *
4738							}
4739	0						die __FILE__, ": Search pattern not terminated\n";
4740							}
4741							}
4742
4743							# split ''
4744							elsif (/\G (\') /oxgc) {
4745	0						my $q_string = '';
4746	0						while (not /\G \z/oxgc) {
4747	0	0					if (/\G (\\\\) /oxgc) { $q_string .= $1; }
	0	0
		0
		0
4748	0						elsif (/\G (\\\') /oxgc) { $q_string .= $1; } # splitqr'' --> split qr''
4749	0						elsif (/\G \' /oxgc) { return e_split_q($e.q{ qr},"'","'",$q_string,''); } # ' ' --> qr ' '
4750	0						elsif (/\G ($q_char) /oxgc) { $q_string .= $1; }
4751							}
4752	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
4753							}
4754
4755							# split ""
4756							elsif (/\G (\") /oxgc) {
4757	0						my $qq_string = '';
4758	0						while (not /\G \z/oxgc) {
4759	0	0					if (/\G (\\\\) /oxgc) { $qq_string .= $1; }
	0	0
		0
		0
4760	0						elsif (/\G (\\\") /oxgc) { $qq_string .= $1; } # splitqr"" --> split qr""
4761	0						elsif (/\G \" /oxgc) { return e_split($e.q{ qr},'"','"',$qq_string,''); } # " " --> qr " "
4762	0						elsif (/\G ($q_char) /oxgc) { $qq_string .= $1; }
4763							}
4764	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
4765							}
4766
4767							# split //
4768							elsif (/\G (\/) /oxgc) {
4769	0						my $regexp = '';
4770	0						while (not /\G \z/oxgc) {
4771	0	0					if (/\G (\\\\) /oxgc) { $regexp .= $1; }
	0	0
		0
		0
4772	0						elsif (/\G (\\\/) /oxgc) { $regexp .= $1; } # splitqr// --> split qr//
4773	0						elsif (/\G \/ ([cgimosxpadlunbB]*) /oxgc) { return e_split($e.q{ qr}, '/','/',$regexp,$1); } # / / --> qr / /
4774	0						elsif (/\G ($q_char) /oxgc) { $regexp .= $1; }
4775							}
4776	0						die __FILE__, ": Search pattern not terminated\n";
4777							}
4778							}
4779
4780							# tr/// or y///
4781
4782							# about [cdsrbB]* (/B modifier)
4783							#
4784							# P.559 appendix C
4785							# of ISBN 4-89052-384-7 Programming perl
4786							# (Japanese title is: Perl puroguramingu)
4787
4788							elsif (/\G \b ( tr \| y ) \b /oxgc) {
4789	0						my $ope = $1;
4790
4791							# $1 $2 $3 $4 $5 $6
4792	0	0					if (/\G (\#) ((?:$qq_char)?) (\#) ((?:$qq_char)?) (\#) ([cdsrbB]*) /oxgc) { # tr# # #
4793	0						my @tr = ($tr_variable,$2);
4794	0						return e_tr(@tr,'',$4,$6);
4795							}
4796							else {
4797	0						my $e = '';
4798	0						while (not /\G \z/oxgc) {
4799	0	0					if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
4800							elsif (/\G ($) ((?:$qq_paren)*?) ($) /oxgc) {
4801	0						my @tr = ($tr_variable,$2);
4802	0						while (not /\G \z/oxgc) {
4803	0	0					if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
4804	0						elsif (/\G ($) ((?:$qq_paren)?) ($) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr ( ) ( )
4805	0						elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr ( ) { }
4806	0						elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr ( ) [ ]
4807	0						elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr ( ) < >
4808	0						elsif (/\G (\S) ((?:$qq_char)?) (\1) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr ( ) * *
4809							}
4810	0						die __FILE__, ": Transliteration replacement not terminated\n";
4811							}
4812							elsif (/\G (\{) ((?:$qq_brace)*?) (\}) /oxgc) {
4813	0						my @tr = ($tr_variable,$2);
4814	0						while (not /\G \z/oxgc) {
4815	0	0					if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
4816	0						elsif (/\G ($) ((?:$qq_paren)?) ($) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr { } ( )
4817	0						elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr { } { }
4818	0						elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr { } [ ]
4819	0						elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr { } < >
4820	0						elsif (/\G (\S) ((?:$qq_char)?) (\1) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr { } * *
4821							}
4822	0						die __FILE__, ": Transliteration replacement not terminated\n";
4823							}
4824							elsif (/\G (\[) ((?:$qq_bracket)*?) (\]) /oxgc) {
4825	0						my @tr = ($tr_variable,$2);
4826	0						while (not /\G \z/oxgc) {
4827	0	0					if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
4828	0						elsif (/\G ($) ((?:$qq_paren)?) ($) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr [ ] ( )
4829	0						elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr [ ] { }
4830	0						elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr [ ] [ ]
4831	0						elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr [ ] < >
4832	0						elsif (/\G (\S) ((?:$qq_char)?) (\1) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr [ ] * *
4833							}
4834	0						die __FILE__, ": Transliteration replacement not terminated\n";
4835							}
4836							elsif (/\G (\<) ((?:$qq_angle)*?) (\>) /oxgc) {
4837	0						my @tr = ($tr_variable,$2);
4838	0						while (not /\G \z/oxgc) {
4839	0	0					if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
4840	0						elsif (/\G ($) ((?:$qq_paren)?) ($) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr < > ( )
4841	0						elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr < > { }
4842	0						elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr < > [ ]
4843	0						elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr < > < >
4844	0						elsif (/\G (\S) ((?:$qq_char)?) (\1) ([cdsrbB]) /oxgc) { return e_tr(@tr,$e,$2,$4); } # tr < > * *
4845							}
4846	0						die __FILE__, ": Transliteration replacement not terminated\n";
4847							}
4848							# $1 $2 $3 $4 $5 $6
4849							elsif (/\G (\S) ((?:$qq_char)?) (\1) ((?:$qq_char)?) (\1) ([cdsrbB]) /oxgc) { # tr * *
4850	0						my @tr = ($tr_variable,$2);
4851	0						return e_tr(@tr,'',$4,$6);
4852							}
4853							}
4854	0						die __FILE__, ": Transliteration pattern not terminated\n";
4855							}
4856							}
4857
4858							# qq//
4859							elsif (/\G \b (qq) \b /oxgc) {
4860	0						my $ope = $1;
4861
4862							# if (/\G (\#) ((?:$qq_char)*?) (\#) /oxgc) { return e_qq($ope,$1,$3,$2); } # qq# #
4863	0	0					if (/\G (\#) /oxgc) { # qq# #
4864	0						my $qq_string = '';
4865	0						while (not /\G \z/oxgc) {
4866	0	0					if (/\G (\\\\) /oxgc) { $qq_string .= $1; }
	0	0
		0
		0
4867	0						elsif (/\G (\\\#) /oxgc) { $qq_string .= $1; }
4868	0						elsif (/\G (\#) /oxgc) { return e_qq($ope,'#','#',$qq_string); }
4869	0						elsif (/\G ($qq_char) /oxgc) { $qq_string .= $1; }
4870							}
4871	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
4872							}
4873
4874							else {
4875	0						my $e = '';
4876	0						while (not /\G \z/oxgc) {
4877	0	0					if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
4878
4879							# elsif (/\G ($) ((?:$qq_paren)*?) ($) /oxgc) { return $e . e_qq($ope,$1,$3,$2); } # qq ( )
4880							elsif (/\G (\() /oxgc) { # qq ( )
4881	0						my $qq_string = '';
4882	0						local $nest = 1;
4883	0						while (not /\G \z/oxgc) {
4884	0	0					if (/\G (\\\\) /oxgc) { $qq_string .= $1; }
	0	0
		0
		0
		0
4885	0						elsif (/\G (\\\)) /oxgc) { $qq_string .= $1; }
4886	0						elsif (/\G (\() /oxgc) { $qq_string .= $1; $nest++; }
	0
4887							elsif (/\G (\)) /oxgc) {
4888	0	0					if (--$nest == 0) { return $e . e_qq($ope,'(',')',$qq_string); }
	0
4889	0						else { $qq_string .= $1; }
4890							}
4891	0						elsif (/\G ($qq_char) /oxgc) { $qq_string .= $1; }
4892							}
4893	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
4894							}
4895
4896							# elsif (/\G (\{) ((?:$qq_brace)*?) (\}) /oxgc) { return $e . e_qq($ope,$1,$3,$2); } # qq { }
4897							elsif (/\G (\{) /oxgc) { # qq { }
4898	0						my $qq_string = '';
4899	0						local $nest = 1;
4900	0						while (not /\G \z/oxgc) {
4901	0	0					if (/\G (\\\\) /oxgc) { $qq_string .= $1; }
	0	0
		0
		0
		0
4902	0						elsif (/\G (\\\}) /oxgc) { $qq_string .= $1; }
4903	0						elsif (/\G (\{) /oxgc) { $qq_string .= $1; $nest++; }
	0
4904							elsif (/\G (\}) /oxgc) {
4905	0	0					if (--$nest == 0) { return $e . e_qq($ope,'{','}',$qq_string); }
	0
4906	0						else { $qq_string .= $1; }
4907							}
4908	0						elsif (/\G ($qq_char) /oxgc) { $qq_string .= $1; }
4909							}
4910	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
4911							}
4912
4913							# elsif (/\G (\[) ((?:$qq_bracket)*?) (\]) /oxgc) { return $e . e_qq($ope,$1,$3,$2); } # qq [ ]
4914							elsif (/\G (\[) /oxgc) { # qq [ ]
4915	0						my $qq_string = '';
4916	0						local $nest = 1;
4917	0						while (not /\G \z/oxgc) {
4918	0	0					if (/\G (\\\\) /oxgc) { $qq_string .= $1; }
	0	0
		0
		0
		0
4919	0						elsif (/\G (\\\]) /oxgc) { $qq_string .= $1; }
4920	0						elsif (/\G (\[) /oxgc) { $qq_string .= $1; $nest++; }
	0
4921							elsif (/\G (\]) /oxgc) {
4922	0	0					if (--$nest == 0) { return $e . e_qq($ope,'[',']',$qq_string); }
	0
4923	0						else { $qq_string .= $1; }
4924							}
4925	0						elsif (/\G ($qq_char) /oxgc) { $qq_string .= $1; }
4926							}
4927	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
4928							}
4929
4930							# elsif (/\G (\<) ((?:$qq_angle)*?) (\>) /oxgc) { return $e . e_qq($ope,$1,$3,$2); } # qq < >
4931							elsif (/\G (\<) /oxgc) { # qq < >
4932	0						my $qq_string = '';
4933	0						local $nest = 1;
4934	0						while (not /\G \z/oxgc) {
4935	0	0					if (/\G (\\\\) /oxgc) { $qq_string .= $1; }
	0	0
		0
		0
		0
4936	0						elsif (/\G (\\\>) /oxgc) { $qq_string .= $1; }
4937	0						elsif (/\G (\<) /oxgc) { $qq_string .= $1; $nest++; }
	0
4938							elsif (/\G (\>) /oxgc) {
4939	0	0					if (--$nest == 0) { return $e . e_qq($ope,'<','>',$qq_string); }
	0
4940	0						else { $qq_string .= $1; }
4941							}
4942	0						elsif (/\G ($qq_char) /oxgc) { $qq_string .= $1; }
4943							}
4944	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
4945							}
4946
4947							# elsif (/\G (\S) ((?:$qq_char)?) (\1) /oxgc) { return $e . e_qq($ope,$1,$3,$2); } # qq *
4948							elsif (/\G (\S) /oxgc) { # qq * *
4949	0						my $delimiter = $1;
4950	0						my $qq_string = '';
4951	0						while (not /\G \z/oxgc) {
4952	0	0					if (/\G (\\\\) /oxgc) { $qq_string .= $1; }
	0	0
		0
		0
4953	0						elsif (/\G (\\\Q$delimiter\E) /oxgc) { $qq_string .= $1; }
4954	0						elsif (/\G (\Q$delimiter\E) /oxgc) { return $e . e_qq($ope,$delimiter,$delimiter,$qq_string); }
4955	0						elsif (/\G ($qq_char) /oxgc) { $qq_string .= $1; }
4956							}
4957	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
4958							}
4959							}
4960	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
4961							}
4962							}
4963
4964							# qr//
4965							elsif (/\G \b (qr) \b /oxgc) {
4966	0						my $ope = $1;
4967	0	0					if (/\G (\#) ((?:$qq_char)?) (\#) ([imosxpadlunbB]) /oxgc) { # qr# # #
4968	0						return e_qr($ope,$1,$3,$2,$4);
4969							}
4970							else {
4971	0						my $e = '';
4972	0						while (not /\G \z/oxgc) {
4973	0	0					if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
		0
		0
4974	0						elsif (/\G ($) ((?:$qq_paren)?) ($) ([imosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # qr ( )
4975	0						elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([imosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # qr { }
4976	0						elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([imosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # qr [ ]
4977	0						elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([imosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # qr < >
4978	0						elsif (/\G (\') ((?:$qq_char)?) (\') ([imosxpadlunbB]) /oxgc) { return $e . e_qr_q($ope,$1, $3, $2,$4); } # qr ' '
4979	0						elsif (/\G ([\-:?\\^\|]) ((?:$qq_char)?) (\1) ([imosxpadlunbB]*) /oxgc) { return $e . e_qr ($ope,'{','}',$2,$4); } # qr \| \| --> qr { }
4980	0						elsif (/\G (\S) ((?:$qq_char)?) (\1) ([imosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # qr * *
4981							}
4982	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
4983							}
4984							}
4985
4986							# qw//
4987							elsif (/\G \b (qw) \b /oxgc) {
4988	0						my $ope = $1;
4989	0	0					if (/\G (\#) (.*?) (\#) /oxmsgc) { # qw# #
4990	0						return e_qw($ope,$1,$3,$2);
4991							}
4992							else {
4993	0						my $e = '';
4994	0						while (not /\G \z/oxgc) {
4995	0	0					if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
		0
		0
		0
		0
		0
4996
4997	0						elsif (/\G ($) ([^(]*?) ($) /oxmsgc) { return $e . e_qw($ope,$1,$3,$2); } # qw ( )
4998	0						elsif (/\G ($) ((?:$q_paren)*?) ($) /oxmsgc) { return $e . e_qw($ope,$1,$3,$2); } # qw ( )
4999
5000	0						elsif (/\G (\{) ([^{]*?) (\}) /oxmsgc) { return $e . e_qw($ope,$1,$3,$2); } # qw { }
5001	0						elsif (/\G (\{) ((?:$q_brace)*?) (\}) /oxmsgc) { return $e . e_qw($ope,$1,$3,$2); } # qw { }
5002
5003	0						elsif (/\G (\[) ([^[]*?) (\]) /oxmsgc) { return $e . e_qw($ope,$1,$3,$2); } # qw [ ]
5004	0						elsif (/\G (\[) ((?:$q_bracket)*?) (\]) /oxmsgc) { return $e . e_qw($ope,$1,$3,$2); } # qw [ ]
5005
5006	0						elsif (/\G (\<) ([^<]*?) (\>) /oxmsgc) { return $e . e_qw($ope,$1,$3,$2); } # qw < >
5007	0						elsif (/\G (\<) ((?:$q_angle)*?) (\>) /oxmsgc) { return $e . e_qw($ope,$1,$3,$2); } # qw < >
5008
5009	0						elsif (/\G ([\x21-\x3F]) (.?) (\1) /oxmsgc) { return $e . e_qw($ope,$1,$3,$2); } # qw *
5010	0						elsif (/\G (\S) ((?:$q_char)?) (\1) /oxmsgc) { return $e . e_qw($ope,$1,$3,$2); } # qw *
5011							}
5012	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5013							}
5014							}
5015
5016							# qx//
5017							elsif (/\G \b (qx) \b /oxgc) {
5018	0						my $ope = $1;
5019	0	0					if (/\G (\#) ((?:$qq_char)*?) (\#) /oxgc) { # qx# #
5020	0						return e_qq($ope,$1,$3,$2);
5021							}
5022							else {
5023	0						my $e = '';
5024	0						while (not /\G \z/oxgc) {
5025	0	0					if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
		0
5026	0						elsif (/\G ($) ((?:$qq_paren)*?) ($) /oxgc) { return $e . e_qq($ope,$1,$3,$2); } # qx ( )
5027	0						elsif (/\G (\{) ((?:$qq_brace)*?) (\}) /oxgc) { return $e . e_qq($ope,$1,$3,$2); } # qx { }
5028	0						elsif (/\G (\[) ((?:$qq_bracket)*?) (\]) /oxgc) { return $e . e_qq($ope,$1,$3,$2); } # qx [ ]
5029	0						elsif (/\G (\<) ((?:$qq_angle)*?) (\>) /oxgc) { return $e . e_qq($ope,$1,$3,$2); } # qx < >
5030	0						elsif (/\G (\') ((?:$qq_char)*?) (\') /oxgc) { return $e . e_q ($ope,$1,$3,$2); } # qx ' '
5031	0						elsif (/\G (\S) ((?:$qq_char)?) (\1) /oxgc) { return $e . e_qq($ope,$1,$3,$2); } # qx *
5032							}
5033	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5034							}
5035							}
5036
5037							# q//
5038							elsif (/\G \b (q) \b /oxgc) {
5039	0						my $ope = $1;
5040
5041							# if (/\G (\#) ((?:\\\#\|\\\\\|$q_char)*?) (\#) /oxgc) { return e_q($ope,$1,$3,$2); } # q# #
5042
5043							# avoid "Error: Runtime exception" of perl version 5.005_03
5044							# (and so on)
5045
5046	0	0					if (/\G (\#) /oxgc) { # q# #
5047	0						my $q_string = '';
5048	0						while (not /\G \z/oxgc) {
5049	0	0					if (/\G (\\\\) /oxgc) { $q_string .= $1; }
	0	0
		0
		0
5050	0						elsif (/\G (\\\#) /oxgc) { $q_string .= $1; }
5051	0						elsif (/\G (\#) /oxgc) { return e_q($ope,'#','#',$q_string); }
5052	0						elsif (/\G ($q_char) /oxgc) { $q_string .= $1; }
5053							}
5054	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5055							}
5056
5057							else {
5058	0						my $e = '';
5059	0						while (not /\G \z/oxgc) {
5060	0	0					if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
5061
5062							# elsif (/\G ($) ((?:\\$\|\\\\\|$q_paren)*?) (\)) /oxgc) { return $e . e_q($ope,$1,$3,$2); } # q ( )
5063							elsif (/\G (\() /oxgc) { # q ( )
5064	0						my $q_string = '';
5065	0						local $nest = 1;
5066	0						while (not /\G \z/oxgc) {
5067	0	0					if (/\G (\\\\) /oxgc) { $q_string .= $1; }
	0	0
		0
		0
		0
		0
5068	0						elsif (/\G (\\\)) /oxgc) { $q_string .= $1; }
5069	0						elsif (/\G (\\\() /oxgc) { $q_string .= $1; }
5070	0						elsif (/\G (\() /oxgc) { $q_string .= $1; $nest++; }
	0
5071							elsif (/\G (\)) /oxgc) {
5072	0	0					if (--$nest == 0) { return $e . e_q($ope,'(',')',$q_string); }
	0
5073	0						else { $q_string .= $1; }
5074							}
5075	0						elsif (/\G ($q_char) /oxgc) { $q_string .= $1; }
5076							}
5077	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5078							}
5079
5080							# elsif (/\G (\{) ((?:\\\}\|\\\\\|$q_brace)*?) (\}) /oxgc) { return $e . e_q($ope,$1,$3,$2); } # q { }
5081							elsif (/\G (\{) /oxgc) { # q { }
5082	0						my $q_string = '';
5083	0						local $nest = 1;
5084	0						while (not /\G \z/oxgc) {
5085	0	0					if (/\G (\\\\) /oxgc) { $q_string .= $1; }
	0	0
		0
		0
		0
		0
5086	0						elsif (/\G (\\\}) /oxgc) { $q_string .= $1; }
5087	0						elsif (/\G (\\\{) /oxgc) { $q_string .= $1; }
5088	0						elsif (/\G (\{) /oxgc) { $q_string .= $1; $nest++; }
	0
5089							elsif (/\G (\}) /oxgc) {
5090	0	0					if (--$nest == 0) { return $e . e_q($ope,'{','}',$q_string); }
	0
5091	0						else { $q_string .= $1; }
5092							}
5093	0						elsif (/\G ($q_char) /oxgc) { $q_string .= $1; }
5094							}
5095	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5096							}
5097
5098							# elsif (/\G (\[) ((?:\\\]\|\\\\\|$q_bracket)*?) (\]) /oxgc) { return $e . e_q($ope,$1,$3,$2); } # q [ ]
5099							elsif (/\G (\[) /oxgc) { # q [ ]
5100	0						my $q_string = '';
5101	0						local $nest = 1;
5102	0						while (not /\G \z/oxgc) {
5103	0	0					if (/\G (\\\\) /oxgc) { $q_string .= $1; }
	0	0
		0
		0
		0
		0
5104	0						elsif (/\G (\\\]) /oxgc) { $q_string .= $1; }
5105	0						elsif (/\G (\\\[) /oxgc) { $q_string .= $1; }
5106	0						elsif (/\G (\[) /oxgc) { $q_string .= $1; $nest++; }
	0
5107							elsif (/\G (\]) /oxgc) {
5108	0	0					if (--$nest == 0) { return $e . e_q($ope,'[',']',$q_string); }
	0
5109	0						else { $q_string .= $1; }
5110							}
5111	0						elsif (/\G ($q_char) /oxgc) { $q_string .= $1; }
5112							}
5113	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5114							}
5115
5116							# elsif (/\G (\<) ((?:\\\>\|\\\\\|$q_angle)*?) (\>) /oxgc) { return $e . e_q($ope,$1,$3,$2); } # q < >
5117							elsif (/\G (\<) /oxgc) { # q < >
5118	0						my $q_string = '';
5119	0						local $nest = 1;
5120	0						while (not /\G \z/oxgc) {
5121	0	0					if (/\G (\\\\) /oxgc) { $q_string .= $1; }
	0	0
		0
		0
		0
		0
5122	0						elsif (/\G (\\\>) /oxgc) { $q_string .= $1; }
5123	0						elsif (/\G (\\\<) /oxgc) { $q_string .= $1; }
5124	0						elsif (/\G (\<) /oxgc) { $q_string .= $1; $nest++; }
	0
5125							elsif (/\G (\>) /oxgc) {
5126	0	0					if (--$nest == 0) { return $e . e_q($ope,'<','>',$q_string); }
	0
5127	0						else { $q_string .= $1; }
5128							}
5129	0						elsif (/\G ($q_char) /oxgc) { $q_string .= $1; }
5130							}
5131	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5132							}
5133
5134							# elsif (/\G (\S) ((?:\\\1\|\\\\\|$q_char)?) (\1) /oxgc) { return $e . e_q($ope,$1,$3,$2); } # q *
5135							elsif (/\G (\S) /oxgc) { # q * *
5136	0						my $delimiter = $1;
5137	0						my $q_string = '';
5138	0						while (not /\G \z/oxgc) {
5139	0	0					if (/\G (\\\\) /oxgc) { $q_string .= $1; }
	0	0
		0
		0
5140	0						elsif (/\G (\\\Q$delimiter\E) /oxgc) { $q_string .= $1; }
5141	0						elsif (/\G (\Q$delimiter\E) /oxgc) { return $e . e_q($ope,$delimiter,$delimiter,$q_string); }
5142	0						elsif (/\G ($q_char) /oxgc) { $q_string .= $1; }
5143							}
5144	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5145							}
5146							}
5147	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5148							}
5149							}
5150
5151							# m//
5152							elsif (/\G \b (m) \b /oxgc) {
5153	0						my $ope = $1;
5154	0	0					if (/\G (\#) ((?:$qq_char)?) (\#) ([cgimosxpadlunbB]) /oxgc) { # m# #
5155	0						return e_qr($ope,$1,$3,$2,$4);
5156							}
5157							else {
5158	0						my $e = '';
5159	0						while (not /\G \z/oxgc) {
5160	0	0					if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
		0
		0
		0
5161	0						elsif (/\G ($) ((?:$qq_paren)?) ($) ([cgimosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # m ( )
5162	0						elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([cgimosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # m { }
5163	0						elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([cgimosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # m [ ]
5164	0						elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([cgimosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # m < >
5165	0						elsif (/\G (\?) ((?:$qq_char)?) (\?) ([cgimosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # m ? ?
5166	0						elsif (/\G (\') ((?:$qq_char)?) (\') ([cgimosxpadlunbB]) /oxgc) { return $e . e_qr_q($ope,$1, $3, $2,$4); } # m ' '
5167	0						elsif (/\G ([\-:\\^\|]) ((?:$qq_char)?) (\1) ([cgimosxpadlunbB]*) /oxgc) { return $e . e_qr ($ope,'{','}',$2,$4); } # m \| \| --> m { }
5168	0						elsif (/\G (\S) ((?:$qq_char)?) (\1) ([cgimosxpadlunbB]) /oxgc) { return $e . e_qr ($ope,$1, $3, $2,$4); } # m * *
5169							}
5170	0						die __FILE__, ": Search pattern not terminated\n";
5171							}
5172							}
5173
5174							# s///
5175
5176							# about [cegimosxpradlunbB]* (/cg modifier)
5177							#
5178							# P.67 Pattern-Matching Operators
5179							# of ISBN 0-596-00241-6 Perl in a Nutshell, Second Edition.
5180
5181							elsif (/\G \b (s) \b /oxgc) {
5182	0						my $ope = $1;
5183
5184							# $1 $2 $3 $4 $5 $6
5185	0	0					if (/\G (\#) ((?:$qq_char)?) (\#) ((?:$qq_char)?) (\#) ([cegimosxpradlunbB]*) /oxgc) { # s# # #
5186	0						return e_sub($sub_variable,$1,$2,$3,$3,$4,$5,$6);
5187							}
5188							else {
5189	0						my $e = '';
5190	0						while (not /\G \z/oxgc) {
5191	0	0					if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
		0
		0
		0
5192							elsif (/\G ($) ((?:$qq_paren)*?) ($) /oxgc) {
5193	0						my @s = ($1,$2,$3);
5194	0						while (not /\G \z/oxgc) {
5195	0	0					if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
		0
		0
		0
		0
5196							# $1 $2 $3 $4
5197	0						elsif (/\G ($) ((?:$qq_paren)?) ($) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5198	0						elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5199	0						elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5200	0						elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5201	0						elsif (/\G (\') ((?:$qq_char)?) (\') ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5202	0						elsif (/\G (\$) ((?:$qq_char)?) (\$) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5203	0						elsif (/\G (\:) ((?:$qq_char)?) (\:) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5204	0						elsif (/\G (\@) ((?:$qq_char)?) (\@) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5205	0						elsif (/\G (\S) ((?:$qq_char)?) (\1) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5206							}
5207	0						die __FILE__, ": Substitution replacement not terminated\n";
5208							}
5209							elsif (/\G (\{) ((?:$qq_brace)*?) (\}) /oxgc) {
5210	0						my @s = ($1,$2,$3);
5211	0						while (not /\G \z/oxgc) {
5212	0	0					if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
		0
		0
		0
		0
5213							# $1 $2 $3 $4
5214	0						elsif (/\G ($) ((?:$qq_paren)?) ($) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5215	0						elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5216	0						elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5217	0						elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5218	0						elsif (/\G (\') ((?:$qq_char)?) (\') ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5219	0						elsif (/\G (\$) ((?:$qq_char)?) (\$) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5220	0						elsif (/\G (\:) ((?:$qq_char)?) (\:) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5221	0						elsif (/\G (\@) ((?:$qq_char)?) (\@) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5222	0						elsif (/\G (\S) ((?:$qq_char)?) (\1) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5223							}
5224	0						die __FILE__, ": Substitution replacement not terminated\n";
5225							}
5226							elsif (/\G (\[) ((?:$qq_bracket)*?) (\]) /oxgc) {
5227	0						my @s = ($1,$2,$3);
5228	0						while (not /\G \z/oxgc) {
5229	0	0					if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
		0
		0
5230							# $1 $2 $3 $4
5231	0						elsif (/\G ($) ((?:$qq_paren)?) ($) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5232	0						elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5233	0						elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5234	0						elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5235	0						elsif (/\G (\') ((?:$qq_char)?) (\') ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5236	0						elsif (/\G (\$) ((?:$qq_char)?) (\$) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5237	0						elsif (/\G (\S) ((?:$qq_char)?) (\1) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5238							}
5239	0						die __FILE__, ": Substitution replacement not terminated\n";
5240							}
5241							elsif (/\G (\<) ((?:$qq_angle)*?) (\>) /oxgc) {
5242	0						my @s = ($1,$2,$3);
5243	0						while (not /\G \z/oxgc) {
5244	0	0					if (/\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
		0
		0
		0
		0
5245							# $1 $2 $3 $4
5246	0						elsif (/\G ($) ((?:$qq_paren)?) ($) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5247	0						elsif (/\G (\{) ((?:$qq_brace)?) (\}) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5248	0						elsif (/\G (\[) ((?:$qq_bracket)?) (\]) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5249	0						elsif (/\G (\<) ((?:$qq_angle)?) (\>) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5250	0						elsif (/\G (\') ((?:$qq_char)?) (\') ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5251	0						elsif (/\G (\$) ((?:$qq_char)?) (\$) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5252	0						elsif (/\G (\:) ((?:$qq_char)?) (\:) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5253	0						elsif (/\G (\@) ((?:$qq_char)?) (\@) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5254	0						elsif (/\G (\S) ((?:$qq_char)?) (\1) ([cegimosxpradlunbB]) /oxgc) { return e_sub($sub_variable,@s,$1,$2,$3,$4); }
5255							}
5256	0						die __FILE__, ": Substitution replacement not terminated\n";
5257							}
5258							# $1 $2 $3 $4 $5 $6
5259							elsif (/\G (\') ((?:$qq_char)?) (\') ((?:$qq_char)?) (\') ([cegimosxpradlunbB]*) /oxgc) {
5260	0						return e_sub($sub_variable,$1,$2,$3,$3,$4,$5,$6);
5261							}
5262							# $1 $2 $3 $4 $5 $6
5263							elsif (/\G ([\-:?\\^\|]) ((?:$qq_char)?) (\1) ((?:$qq_char)?) (\1) ([cegimosxpradlunbB]) /oxgc) {
5264	0						return e_sub($sub_variable,'{',$2,'}','{',$4,'}',$6); # s \| \| \| --> s { } { }
5265							}
5266							# $1 $2 $3 $4 $5 $6
5267							elsif (/\G (\$) ((?:$qq_char)?) (\1) ((?:$qq_char)?) (\1) ([cegimosxpradlunbB]*) /oxgc) {
5268	0						return e_sub($sub_variable,$1,$2,$3,$3,$4,$5,$6);
5269							}
5270							# $1 $2 $3 $4 $5 $6
5271							elsif (/\G (\S) ((?:$qq_char)?) (\1) ((?:$qq_char)?) (\1) ([cegimosxpradlunbB]*) /oxgc) {
5272	0						return e_sub($sub_variable,$1,$2,$3,$3,$4,$5,$6);
5273							}
5274							}
5275	0						die __FILE__, ": Substitution pattern not terminated\n";
5276							}
5277							}
5278
5279							# require ignore module
5280	0						elsif (/\G \b require ((?>\s+) (?:$ignore_modules) .? ;) ([ \t] [#\n]) /oxmsgc) { return "# require$1$2"; }
5281	0						elsif (/\G \b require ((?>\s+) (?:$ignore_modules) .? ;) ([ \t] [^\x80-\xFF#]) /oxmsgc) { return "# require$1\n$2"; }
5282	0						elsif (/\G \b require ((?>\s+) (?:$ignore_modules)) \b /oxmsgc) { return "# require$1"; }
5283
5284							# use strict; --> use strict; no strict qw(refs);
5285	0						elsif (/\G \b use ((?>\s+) strict .? ;) ([ \t] [#\n]) /oxmsgc) { return "use$1 no strict qw(refs);$2"; }
5286	0						elsif (/\G \b use ((?>\s+) strict .? ;) ([ \t] [^\x80-\xFF#]) /oxmsgc) { return "use$1 no strict qw(refs);\n$2"; }
5287	0						elsif (/\G \b use ((?>\s+) strict) \b /oxmsgc) { return "use$1; no strict qw(refs)"; }
5288
5289							# use 5.12.0; --> use 5.12.0; no strict qw(refs);
5290							elsif (/\G \b use (?>\s+) ((?>([1-9][0-9_])(?:\.([0-9_]+)))) (?>\s*) ; /oxmsgc) {
5291	0	0	0				if (($2 >= 6) or (($2 == 5) and ($3 ge '012'))) {
			0
5292	0						return "use $1; no strict qw(refs);";
5293							}
5294							else {
5295	0						return "use $1;";
5296							}
5297							}
5298							elsif (/\G \b use (?>\s+) ((?>v([0-9][0-9_])(?:\.([0-9_]+)))) (?>\s*) ; /oxmsgc) {
5299	0	0	0				if (($2 >= 6) or (($2 == 5) and ($3 >= 12))) {
			0
5300	0						return "use $1; no strict qw(refs);";
5301							}
5302							else {
5303	0						return "use $1;";
5304							}
5305							}
5306
5307							# ignore use module
5308	0						elsif (/\G \b use ((?>\s+) (?:$ignore_modules) .? ;) ([ \t] [#\n]) /oxmsgc) { return "# use$1$2"; }
5309	0						elsif (/\G \b use ((?>\s+) (?:$ignore_modules) .? ;) ([ \t] [^\x80-\xFF#]) /oxmsgc) { return "# use$1\n$2"; }
5310	0						elsif (/\G \b use ((?>\s+) (?:$ignore_modules)) \b /oxmsgc) { return "# use$1"; }
5311
5312							# ignore no module
5313	0						elsif (/\G \b no ((?>\s+) (?:$ignore_modules) .? ;) ([ \t] [#\n]) /oxmsgc) { return "# no$1$2"; }
5314	0						elsif (/\G \b no ((?>\s+) (?:$ignore_modules) .? ;) ([ \t] [^\x80-\xFF#]) /oxmsgc) { return "# no$1\n$2"; }
5315	0						elsif (/\G \b no ((?>\s+) (?:$ignore_modules)) \b /oxmsgc) { return "# no$1"; }
5316
5317							# use else
5318	0						elsif (/\G \b use \b /oxmsgc) { return "use"; }
5319
5320							# use else
5321	0						elsif (/\G \b no \b /oxmsgc) { return "no"; }
5322
5323							# ''
5324							elsif (/\G (?
5325	0						my $q_string = '';
5326	0						while (not /\G \z/oxgc) {
5327	0	0					if (/\G (\\\\) /oxgc) { $q_string .= $1; }
	0	0
		0
		0
5328	0						elsif (/\G (\\\') /oxgc) { $q_string .= $1; }
5329	0						elsif (/\G \' /oxgc) { return e_q('', "'","'",$q_string); }
5330	0						elsif (/\G ($q_char) /oxgc) { $q_string .= $1; }
5331							}
5332	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5333							}
5334
5335							# ""
5336							elsif (/\G (\") /oxgc) {
5337	0						my $qq_string = '';
5338	0						while (not /\G \z/oxgc) {
5339	0	0					if (/\G (\\\\) /oxgc) { $qq_string .= $1; }
	0	0
		0
		0
5340	0						elsif (/\G (\\\") /oxgc) { $qq_string .= $1; }
5341	0						elsif (/\G \" /oxgc) { return e_qq('', '"','"',$qq_string); }
5342	0						elsif (/\G ($q_char) /oxgc) { $qq_string .= $1; }
5343							}
5344	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5345							}
5346
5347							# ``
5348							elsif (/\G (\`) /oxgc) {
5349	0						my $qx_string = '';
5350	0						while (not /\G \z/oxgc) {
5351	0	0					if (/\G (\\\\) /oxgc) { $qx_string .= $1; }
	0	0
		0
		0
5352	0						elsif (/\G (\\\`) /oxgc) { $qx_string .= $1; }
5353	0						elsif (/\G \` /oxgc) { return e_qq('', '`','`',$qx_string); }
5354	0						elsif (/\G ($q_char) /oxgc) { $qx_string .= $1; }
5355							}
5356	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5357							}
5358
5359							# // --- not divide operator (num / num), not defined-or
5360							elsif (($slash eq 'm//') and /\G (\/) /oxgc) {
5361	0						my $regexp = '';
5362	0						while (not /\G \z/oxgc) {
5363	0	0					if (/\G (\\\\) /oxgc) { $regexp .= $1; }
	0	0
		0
		0
5364	0						elsif (/\G (\\\/) /oxgc) { $regexp .= $1; }
5365	0						elsif (/\G \/ ([cgimosxpadlunbB]*) /oxgc) { return e_qr('', '/','/',$regexp,$1); }
5366	0						elsif (/\G ($q_char) /oxgc) { $regexp .= $1; }
5367							}
5368	0						die __FILE__, ": Search pattern not terminated\n";
5369							}
5370
5371							# ?? --- not conditional operator (condition ? then : else)
5372							elsif (($slash eq 'm//') and /\G (\?) /oxgc) {
5373	0						my $regexp = '';
5374	0						while (not /\G \z/oxgc) {
5375	0	0					if (/\G (\\\\) /oxgc) { $regexp .= $1; }
	0	0
		0
		0
5376	0						elsif (/\G (\\\?) /oxgc) { $regexp .= $1; }
5377	0						elsif (/\G \? ([cgimosxpadlunbB]*) /oxgc) { return e_qr('m','?','?',$regexp,$1); }
5378	0						elsif (/\G ($q_char) /oxgc) { $regexp .= $1; }
5379							}
5380	0						die __FILE__, ": Search pattern not terminated\n";
5381							}
5382
5383							# <<>> (a safer ARGV)
5384	0						elsif (/\G ( <<>> ) /oxgc) { $slash = 'm//'; return $1; }
	0
5385
5386							# << (bit shift) --- not here document
5387	0						elsif (/\G ( << (?>\s*) ) (?= [0-9\$\@\&] ) /oxgc) { $slash = 'm//'; return $1; }
	0
5388
5389							# <<'HEREDOC'
5390							elsif (/\G ( << '([a-zA-Z_0-9]*)' ) /oxgc) {
5391	0						$slash = 'm//';
5392	0						my $here_quote = $1;
5393	0						my $delimiter = $2;
5394
5395							# get here document
5396	0	0					if ($here_script eq '') {
5397	0						$here_script = CORE::substr $_, pos $_;
5398	0						$here_script =~ s/.*?\n//oxm;
5399							}
5400	0	0					if ($here_script =~ s/\A (.*?) \n $delimiter \n //xms) {
5401	0						push @heredoc, $1 . qq{\n$delimiter\n};
5402	0						push @heredoc_delimiter, $delimiter;
5403							}
5404							else {
5405	0						die __FILE__, ": Can't find string terminator $delimiter anywhere before EOF\n";
5406							}
5407	0						return $here_quote;
5408							}
5409
5410							# <<\HEREDOC
5411
5412							# P.66 2.6.6. "Here" Documents
5413							# in Chapter 2: Bits and Pieces
5414							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
5415
5416							# P.73 "Here" Documents
5417							# in Chapter 2: Bits and Pieces
5418							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
5419
5420							elsif (/\G ( << \\([a-zA-Z_0-9]+) ) /oxgc) {
5421	0						$slash = 'm//';
5422	0						my $here_quote = $1;
5423	0						my $delimiter = $2;
5424
5425							# get here document
5426	0	0					if ($here_script eq '') {
5427	0						$here_script = CORE::substr $_, pos $_;
5428	0						$here_script =~ s/.*?\n//oxm;
5429							}
5430	0	0					if ($here_script =~ s/\A (.*?) \n $delimiter \n //xms) {
5431	0						push @heredoc, $1 . qq{\n$delimiter\n};
5432	0						push @heredoc_delimiter, $delimiter;
5433							}
5434							else {
5435	0						die __FILE__, ": Can't find string terminator $delimiter anywhere before EOF\n";
5436							}
5437	0						return $here_quote;
5438							}
5439
5440							# <<"HEREDOC"
5441							elsif (/\G ( << "([a-zA-Z_0-9]*)" ) /oxgc) {
5442	0						$slash = 'm//';
5443	0						my $here_quote = $1;
5444	0						my $delimiter = $2;
5445
5446							# get here document
5447	0	0					if ($here_script eq '') {
5448	0						$here_script = CORE::substr $_, pos $_;
5449	0						$here_script =~ s/.*?\n//oxm;
5450							}
5451	0	0					if ($here_script =~ s/\A (.*?) \n $delimiter \n //xms) {
5452	0						push @heredoc, e_heredoc($1) . qq{\n$delimiter\n};
5453	0						push @heredoc_delimiter, $delimiter;
5454							}
5455							else {
5456	0						die __FILE__, ": Can't find string terminator $delimiter anywhere before EOF\n";
5457							}
5458	0						return $here_quote;
5459							}
5460
5461							# <
5462							elsif (/\G ( << ([a-zA-Z_0-9]+) ) /oxgc) {
5463	0						$slash = 'm//';
5464	0						my $here_quote = $1;
5465	0						my $delimiter = $2;
5466
5467							# get here document
5468	0	0					if ($here_script eq '') {
5469	0						$here_script = CORE::substr $_, pos $_;
5470	0						$here_script =~ s/.*?\n//oxm;
5471							}
5472	0	0					if ($here_script =~ s/\A (.*?) \n $delimiter \n //xms) {
5473	0						push @heredoc, e_heredoc($1) . qq{\n$delimiter\n};
5474	0						push @heredoc_delimiter, $delimiter;
5475							}
5476							else {
5477	0						die __FILE__, ": Can't find string terminator $delimiter anywhere before EOF\n";
5478							}
5479	0						return $here_quote;
5480							}
5481
5482							# <<`HEREDOC`
5483							elsif (/\G ( << `([a-zA-Z_0-9]*)` ) /oxgc) {
5484	0						$slash = 'm//';
5485	0						my $here_quote = $1;
5486	0						my $delimiter = $2;
5487
5488							# get here document
5489	0	0					if ($here_script eq '') {
5490	0						$here_script = CORE::substr $_, pos $_;
5491	0						$here_script =~ s/.*?\n//oxm;
5492							}
5493	0	0					if ($here_script =~ s/\A (.*?) \n $delimiter \n //xms) {
5494	0						push @heredoc, e_heredoc($1) . qq{\n$delimiter\n};
5495	0						push @heredoc_delimiter, $delimiter;
5496							}
5497							else {
5498	0						die __FILE__, ": Can't find string terminator $delimiter anywhere before EOF\n";
5499							}
5500	0						return $here_quote;
5501							}
5502
5503							# <<= <=> <= < operator
5504							elsif (/\G ( <<= \| <=> \| <= \| < ) (?= (?>\s) [A-Za-z_0-9'"`\$\@\&\\(\+\-] )/oxgc) {
5505	0						return $1;
5506							}
5507
5508							#
5509							elsif (/\G (<[\$]?[A-Za-z_][A-Za-z_0-9]*>) /oxgc) {
5510	0						return $1;
5511							}
5512
5513							# --- glob
5514
5515							# avoid "Error: Runtime exception" of perl version 5.005_03
5516
5517							elsif (/\G < ((?:[^\x80-\xFF>\0\a\e\f\n\r\t]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF])+?) > /oxgc) {
5518	0						return 'Eutf2::glob("' . $1 . '")';
5519							}
5520
5521							# __DATA__
5522	0						elsif (/\G ^ ( __DATA__ \n .*) \z /oxmsgc) { return $1; }
5523
5524							# __END__
5525	0						elsif (/\G ^ ( __END__ \n .*) \z /oxmsgc) { return $1; }
5526
5527							# \cD Control-D
5528
5529							# P.68 2.6.8. Other Literal Tokens
5530							# in Chapter 2: Bits and Pieces
5531							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
5532
5533							# P.76 Other Literal Tokens
5534							# in Chapter 2: Bits and Pieces
5535							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
5536
5537	0						elsif (/\G ( \cD .*) \z /oxmsgc) { return $1; }
5538
5539							# \cZ Control-Z
5540	0						elsif (/\G ( \cZ .*) \z /oxmsgc) { return $1; }
5541
5542							# any operator before div
5543							elsif (/\G (
5544							-- \| \+\+ \|
5545							[\)\}\]]
5546
5547	0						) /oxgc) { $slash = 'div'; return $1; }
	0
5548
5549							# yada-yada or triple-dot operator
5550							elsif (/\G (
5551							\.\.\.
5552
5553	0						) /oxgc) { $slash = 'm//'; return q{die('Unimplemented')}; }
	0
5554
5555							# any operator before m//
5556
5557							# //, //= (defined-or)
5558
5559							# P.164 Logical Operators
5560							# in Chapter 10: More Control Structures
5561							# of ISBN 978-0-596-52010-6 Learning Perl, Fifth Edition
5562
5563							# P.119 C-Style Logical (Short-Circuit) Operators
5564							# in Chapter 3: Unary and Binary Operators
5565							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
5566
5567							# (and so on)
5568
5569							# ~~
5570
5571							# P.221 The Smart Match Operator
5572							# in Chapter 15: Smart Matching and given-when
5573							# of ISBN 978-0-596-52010-6 Learning Perl, Fifth Edition
5574
5575							# P.112 Smartmatch Operator
5576							# in Chapter 3: Unary and Binary Operators
5577							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
5578
5579							# (and so on)
5580
5581							elsif (/\G ((?>
5582
5583							!~~ \| !~ \| != \| ! \|
5584							%= \| % \|
5585							&&= \| && \| &= \| &\.= \| &\. \| & \|
5586							-= \| -> \| - \|
5587							:(?>\s*)= \|
5588							: \|
5589							<<>> \|
5590							<<= \| <=> \| <= \| < \|
5591							== \| => \| =~ \| = \|
5592							>>= \| >> \| >= \| > \|
5593							\\= \| \\ \| \= \| \ \|
5594							\+= \| \+ \|
5595							\.\. \| \.= \| \. \|
5596							\/\/= \| \/\/ \|
5597							\/= \| \/ \|
5598							\? \|
5599							\\ \|
5600							\^= \| \^\.= \| \^\. \| \^ \|
5601							\b x= \|
5602							\\|\\|= \| \\|\\| \| \\|= \| \\|\.= \| \\|\. \| \\| \|
5603							~~ \| ~\. \| ~ \|
5604							\b(?: and \| cmp \| eq \| ge \| gt \| le \| lt \| ne \| not \| or \| xor \| x )\b \|
5605							\b(?: print )\b \|
5606
5607							[,;\(\{\[]
5608
5609	0						)) /oxgc) { $slash = 'm//'; return $1; }
	0
5610
5611							# other any character
5612	0						elsif (/\G ($q_char) /oxgc) { $slash = 'div'; return $1; }
	0
5613
5614							# system error
5615							else {
5616	0						die __FILE__, ": Oops, this shouldn't happen!\n";
5617							}
5618							}
5619
5620							# escape UTF-8 string
5621							sub e_string {
5622	0			0	0		my($string) = @_;
5623	0						my $e_string = '';
5624
5625	0						local $slash = 'm//';
5626
5627							# P.1024 Appendix W.10 Multibyte Processing
5628							# of ISBN 1-56592-224-7 CJKV Information Processing
5629							# (and so on)
5630
5631	0						my @char = $string =~ / \G (?>[^\x80-\xFF\\]\|\\$q_char\|$q_char) /oxmsg;
5632
5633							# without { ... }
5634	0	0	0				if (not (grep(/\A \{ \z/xms, @char) and grep(/\A \} \z/xms, @char))) {
5635	0	0					if ($string !~ /<
5636	0						return $string;
5637							}
5638							}
5639
5640							E_STRING_LOOP:
5641	0						while ($string !~ /\G \z/oxgc) {
5642	0	0					if (0) {
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
5643							}
5644
5645							# $`, ${`}, $PREMATCH, ${PREMATCH}, ${^PREMATCH} --> @{[Eutf2::PREMATCH()]}
5646	0						elsif ($string =~ /\G ( \$` \| \$\{`\} \| \$ (?>\s) PREMATCH \b \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} ) /oxmsgc) {
5647	0						$e_string .= q{Eutf2::PREMATCH()};
5648	0						$slash = 'div';
5649							}
5650
5651							# $&, ${&}, $MATCH, ${MATCH}, ${^MATCH} --> @{[Eutf2::MATCH()]}
5652							elsif ($string =~ /\G ( \$& \| \$\{&\} \| \$ (?>\s) MATCH \b \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} ) /oxmsgc) {
5653	0						$e_string .= q{Eutf2::MATCH()};
5654	0						$slash = 'div';
5655							}
5656
5657							# $', ${'} --> $', ${'}
5658							elsif ($string =~ /\G ( \$' \| \$\{'\} ) /oxmsgc) {
5659	0						$e_string .= $1;
5660	0						$slash = 'div';
5661							}
5662
5663							# $POSTMATCH, ${POSTMATCH}, ${^POSTMATCH} --> @{[Eutf2::POSTMATCH()]}
5664							elsif ($string =~ /\G ( \$ (?>\s) POSTMATCH \b \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} ) /oxmsgc) {
5665	0						$e_string .= q{Eutf2::POSTMATCH()};
5666	0						$slash = 'div';
5667							}
5668
5669							# bareword
5670							elsif ($string =~ /\G ( \{ (?>\s) (?: tr \| index \| rindex \| reverse ) (?>\s) \} ) /oxmsgc) {
5671	0						$e_string .= $1;
5672	0						$slash = 'div';
5673							}
5674
5675							# $0 --> $0
5676							elsif ($string =~ /\G ( \$ 0 ) /oxmsgc) {
5677	0						$e_string .= $1;
5678	0						$slash = 'div';
5679							}
5680							elsif ($string =~ /\G ( \$ \{ (?>\s) 0 (?>\s) \} ) /oxmsgc) {
5681	0						$e_string .= $1;
5682	0						$slash = 'div';
5683							}
5684
5685							# $$ --> $$
5686							elsif ($string =~ /\G ( \$ \$ ) (?![\w\{]) /oxmsgc) {
5687	0						$e_string .= $1;
5688	0						$slash = 'div';
5689							}
5690
5691							# $1, $2, $3 --> $2, $3, $4 after s/// with multibyte anchoring
5692							# $1, $2, $3 --> $1, $2, $3 otherwise
5693							elsif ($string =~ /\G \$ ((?>[1-9][0-9]*)) /oxmsgc) {
5694	0						$e_string .= e_capture($1);
5695	0						$slash = 'div';
5696							}
5697							elsif ($string =~ /\G \$ \{ (?>\s) ((?>[1-9][0-9])) (?>\s*) \} /oxmsgc) {
5698	0						$e_string .= e_capture($1);
5699	0						$slash = 'div';
5700							}
5701
5702							# $$foo[ ... ] --> $ $foo->[ ... ]
5703							elsif ($string =~ /\G \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \[ .+? \] ) /oxmsgc) {
5704	0						$e_string .= e_capture($1.'->'.$2);
5705	0						$slash = 'div';
5706							}
5707
5708							# $$foo{ ... } --> $ $foo->{ ... }
5709							elsif ($string =~ /\G \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \{ .+? \} ) /oxmsgc) {
5710	0						$e_string .= e_capture($1.'->'.$2);
5711	0						$slash = 'div';
5712							}
5713
5714							# $$foo
5715							elsif ($string =~ /\G \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) /oxmsgc) {
5716	0						$e_string .= e_capture($1);
5717	0						$slash = 'div';
5718							}
5719
5720							# ${ foo }
5721							elsif ($string =~ /\G \$ (?>\s) \{ ((?> \s [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* \s* )) \} /oxmsgc) {
5722	0						$e_string .= '${' . $1 . '}';
5723	0						$slash = 'div';
5724							}
5725
5726							# ${ ... }
5727							elsif ($string =~ /\G \$ (?>\s) \{ (?>\s) ( $qq_brace ) (?>\s*) \} /oxmsgc) {
5728	0						$e_string .= e_capture($1);
5729	0						$slash = 'div';
5730							}
5731
5732							# variable or function
5733							# $ @ % & * $ #
5734							elsif ($string =~ /\G ( (?: [\$\@\%\&\] \| \$\# \| -> \| \b sub \b) (?>\s) (?: split \| chop \| index \| rindex \| lc \| uc \| fc \| chr \| ord \| reverse \| getc \| tr \| y \| q \| qq \| qx \| qw \| m \| s \| qr \| glob \| lstat \| opendir \| stat \| unlink \| chdir ) ) \b /oxmsgc) {
5735	0						$e_string .= $1;
5736	0						$slash = 'div';
5737							}
5738							# $ $ $ $ $ $ $ $ $ $ $ $ $ $
5739							# $ @ # \ ' " / ? ( ) [ ] < >
5740							elsif ($string =~ /\G ( \$[\$\@\#\\\'\"\/\?\[\]\<\>] ) /oxmsgc) {
5741	0						$e_string .= $1;
5742	0						$slash = 'div';
5743							}
5744
5745							# subroutines of package Eutf2
5746	0						elsif ($string =~ /\G \b (CORE:: \| ->(>?\s*) (?: atan2 \| [a-z]{2,})) \b /oxgc) { $e_string .= $1; $slash = 'm//'; }
	0
5747	0						elsif ($string =~ /\G \b Char::eval (?= (?>\s*) \{ ) /oxgc) { $e_string .= 'eval'; $slash = 'm//'; }
	0
5748	0						elsif ($string =~ /\G \b UTF2::eval (?= (?>\s*) \{ ) /oxgc) { $e_string .= 'eval'; $slash = 'm//'; }
	0
5749	0						elsif ($string =~ /\G \b Char::eval \b /oxgc) { $e_string .= 'eval Char::escape'; $slash = 'm//'; }
	0
5750	0						elsif ($string =~ /\G \b UTF2::eval \b /oxgc) { $e_string .= 'eval UTF2::escape'; $slash = 'm//'; }
	0
5751	0						elsif ($string =~ /\G \b bytes::substr \b /oxgc) { $e_string .= 'substr'; $slash = 'm//'; }
	0
5752	0						elsif ($string =~ /\G \b chop \b /oxgc) { $e_string .= 'Eutf2::chop'; $slash = 'm//'; }
	0
5753	0						elsif ($string =~ /\G \b bytes::index \b /oxgc) { $e_string .= 'index'; $slash = 'm//'; }
	0
5754	0						elsif ($string =~ /\G \b Char::index \b /oxgc) { $e_string .= 'Char::index'; $slash = 'm//'; }
	0
5755	0						elsif ($string =~ /\G \b UTF2::index \b /oxgc) { $e_string .= 'UTF2::index'; $slash = 'm//'; }
	0
5756	0						elsif ($string =~ /\G \b index \b /oxgc) { $e_string .= 'Eutf2::index'; $slash = 'm//'; }
	0
5757	0						elsif ($string =~ /\G \b bytes::rindex \b /oxgc) { $e_string .= 'rindex'; $slash = 'm//'; }
	0
5758	0						elsif ($string =~ /\G \b Char::rindex \b /oxgc) { $e_string .= 'Char::rindex'; $slash = 'm//'; }
	0
5759	0						elsif ($string =~ /\G \b UTF2::rindex \b /oxgc) { $e_string .= 'UTF2::rindex'; $slash = 'm//'; }
	0
5760	0						elsif ($string =~ /\G \b rindex \b /oxgc) { $e_string .= 'Eutf2::rindex'; $slash = 'm//'; }
	0
5761	0						elsif ($string =~ /\G \b lc (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= 'Eutf2::lc'; $slash = 'm//'; }
	0
5762	0						elsif ($string =~ /\G \b lcfirst (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= 'Eutf2::lcfirst'; $slash = 'm//'; }
	0
5763	0						elsif ($string =~ /\G \b uc (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= 'Eutf2::uc'; $slash = 'm//'; }
	0
5764	0						elsif ($string =~ /\G \b ucfirst (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= 'Eutf2::ucfirst'; $slash = 'm//'; }
	0
5765	0						elsif ($string =~ /\G \b fc (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= 'Eutf2::fc'; $slash = 'm//'; }
	0
5766
5767							# "-s '' ..." means file test "-s 'filename' ..." (not means "- s/// ...")
5768	0						elsif ($string =~ /\G -s (?>\s*) (\") ((?:$qq_char)+?) (\") /oxgc) { $e_string .= '-s ' . e_qq('', $1,$3,$2); $slash = 'm//'; }
	0
5769	0						elsif ($string =~ /\G -s (?>\s+) qq (?>\s*) (\#) ((?:$qq_char)+?) (\#) /oxgc) { $e_string .= '-s ' . e_qq('qq',$1,$3,$2); $slash = 'm//'; }
	0
5770	0						elsif ($string =~ /\G -s (?>\s+) qq (?>\s*) ($) ((?:$qq_paren)+?) ($) /oxgc) { $e_string .= '-s ' . e_qq('qq',$1,$3,$2); $slash = 'm//'; }
	0
5771	0						elsif ($string =~ /\G -s (?>\s+) qq (?>\s*) (\{) ((?:$qq_brace)+?) (\}) /oxgc) { $e_string .= '-s ' . e_qq('qq',$1,$3,$2); $slash = 'm//'; }
	0
5772	0						elsif ($string =~ /\G -s (?>\s+) qq (?>\s*) (\[) ((?:$qq_bracket)+?) (\]) /oxgc) { $e_string .= '-s ' . e_qq('qq',$1,$3,$2); $slash = 'm//'; }
	0
5773	0						elsif ($string =~ /\G -s (?>\s+) qq (?>\s*) (\<) ((?:$qq_angle)+?) (\>) /oxgc) { $e_string .= '-s ' . e_qq('qq',$1,$3,$2); $slash = 'm//'; }
	0
5774	0						elsif ($string =~ /\G -s (?>\s+) qq (?>\s*) (\S) ((?:$qq_char)+?) (\1) /oxgc) { $e_string .= '-s ' . e_qq('qq',$1,$3,$2); $slash = 'm//'; }
	0
5775
5776	0						elsif ($string =~ /\G -s (?>\s*) (\') ((?:\\\'\|\\\\\|$q_char)+?) (\') /oxgc) { $e_string .= '-s ' . e_q ('', $1,$3,$2); $slash = 'm//'; }
	0
5777	0						elsif ($string =~ /\G -s (?>\s+) q (?>\s*) (\#) ((?:\\\#\|\\\\\|$q_char)+?) (\#) /oxgc) { $e_string .= '-s ' . e_q ('q', $1,$3,$2); $slash = 'm//'; }
	0
5778	0						elsif ($string =~ /\G -s (?>\s+) q (?>\s*) ($) ((?:\\$\|\\\\\|$q_paren)+?) (\)) /oxgc) { $e_string .= '-s ' . e_q ('q', $1,$3,$2); $slash = 'm//'; }
	0
5779	0						elsif ($string =~ /\G -s (?>\s+) q (?>\s*) (\{) ((?:\\\}\|\\\\\|$q_brace)+?) (\}) /oxgc) { $e_string .= '-s ' . e_q ('q', $1,$3,$2); $slash = 'm//'; }
	0
5780	0						elsif ($string =~ /\G -s (?>\s+) q (?>\s*) (\[) ((?:\\\]\|\\\\\|$q_bracket)+?) (\]) /oxgc) { $e_string .= '-s ' . e_q ('q', $1,$3,$2); $slash = 'm//'; }
	0
5781	0						elsif ($string =~ /\G -s (?>\s+) q (?>\s*) (\<) ((?:\\\>\|\\\\\|$q_angle)+?) (\>) /oxgc) { $e_string .= '-s ' . e_q ('q', $1,$3,$2); $slash = 'm//'; }
	0
5782	0						elsif ($string =~ /\G -s (?>\s+) q (?>\s*) (\S) ((?:\\\1\|\\\\\|$q_char)+?) (\1) /oxgc) { $e_string .= '-s ' . e_q ('q', $1,$3,$2); $slash = 'm//'; }
	0
5783
5784							elsif ($string =~ /\G -s (?>\s) (\$ (?> \w+ (?: ::\w+)) (?: (?: ->)? (?: [\$\@\%\&\]\ \| \$\#\* \| $ (?:$qq_paren)? $ \| [\@\%\]? \{ (?:$qq_brace)+? \} \| [\@\%]? \[ (?:$qq_bracket)+? \] ))*) /oxgc)
5785	0						{ $e_string .= "-s $1"; $slash = 'm//'; }
	0
5786	0						elsif ($string =~ /\G -s (?>\s) $ ((?:$qq_paren)?) $ /oxgc) { $e_string .= "-s ($1)"; $slash = 'm//'; }
	0
5787	0						elsif ($string =~ /\G -s (?= (?>\s+) [a-z]+) /oxgc) { $e_string .= '-s'; $slash = 'm//'; }
	0
5788	0						elsif ($string =~ /\G -s (?>\s+) ((?>\w+)) /oxgc) { $e_string .= "-s $1"; $slash = 'm//'; }
	0
5789
5790	0						elsif ($string =~ /\G \b bytes::length (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= 'length'; $slash = 'm//'; }
	0
5791	0						elsif ($string =~ /\G \b bytes::chr (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= 'chr'; $slash = 'm//'; }
	0
5792	0						elsif ($string =~ /\G \b chr (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= 'Eutf2::chr'; $slash = 'm//'; }
	0
5793	0						elsif ($string =~ /\G \b bytes::ord (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= 'ord'; $slash = 'div'; }
	0
5794	0						elsif ($string =~ /\G \b ord (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= $function_ord; $slash = 'div'; }
	0
5795	0						elsif ($string =~ /\G \b glob (?= (?>\s+)[A-Za-z_]\|(?>\s)['"`\$\@\&\\(]) /oxgc) { $e_string .= 'Eutf2::glob'; $slash = 'm//'; }
	0
5796	0						elsif ($string =~ /\G \b lc \b /oxgc) { $e_string .= 'Eutf2::lc_'; $slash = 'm//'; }
	0
5797	0						elsif ($string =~ /\G \b lcfirst \b /oxgc) { $e_string .= 'Eutf2::lcfirst_'; $slash = 'm//'; }
	0
5798	0						elsif ($string =~ /\G \b uc \b /oxgc) { $e_string .= 'Eutf2::uc_'; $slash = 'm//'; }
	0
5799	0						elsif ($string =~ /\G \b ucfirst \b /oxgc) { $e_string .= 'Eutf2::ucfirst_'; $slash = 'm//'; }
	0
5800	0						elsif ($string =~ /\G \b fc \b /oxgc) { $e_string .= 'Eutf2::fc_'; $slash = 'm//'; }
	0
5801	0						elsif ($string =~ /\G -s \b /oxgc) { $e_string .= '-s '; $slash = 'm//'; }
	0
5802
5803	0						elsif ($string =~ /\G \b bytes::length \b /oxgc) { $e_string .= 'length'; $slash = 'm//'; }
	0
5804	0						elsif ($string =~ /\G \b bytes::chr \b /oxgc) { $e_string .= 'chr'; $slash = 'm//'; }
	0
5805	0						elsif ($string =~ /\G \b chr \b /oxgc) { $e_string .= 'Eutf2::chr_'; $slash = 'm//'; }
	0
5806	0						elsif ($string =~ /\G \b bytes::ord \b /oxgc) { $e_string .= 'ord'; $slash = 'div'; }
	0
5807	0						elsif ($string =~ /\G \b ord \b /oxgc) { $e_string .= $function_ord_; $slash = 'div'; }
	0
5808	0						elsif ($string =~ /\G \b glob \b /oxgc) { $e_string .= 'Eutf2::glob_'; $slash = 'm//'; }
	0
5809	0						elsif ($string =~ /\G \b reverse \b /oxgc) { $e_string .= $function_reverse; $slash = 'm//'; }
	0
5810	0						elsif ($string =~ /\G \b getc \b /oxgc) { $e_string .= $function_getc; $slash = 'm//'; }
	0
5811							# split
5812							elsif ($string =~ /\G \b (split) \b (?! (?>\s*) => ) /oxgc) {
5813	0						$slash = 'm//';
5814
5815	0						my $e = '';
5816	0						while ($string =~ /\G ( (?>\s+) \| \( \| \#.* ) /oxgc) {
5817	0						$e .= $1;
5818							}
5819
5820							# end of split
5821	0	0					if ($string =~ /\G (?= [,;\)\}\]] ) /oxgc) { return 'Eutf2::split' . $e; }
	0	0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
5822
5823							# split scalar value
5824	0						elsif ($string =~ /\G ( [\$\@\&\*] $qq_scalar ) /oxgc) { $e_string .= 'Eutf2::split' . $e . e_string($1); next E_STRING_LOOP; }
	0
5825
5826							# split literal space
5827	0						elsif ($string =~ /\G \b qq (\#) [ ] (\#) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq {qq$1 $2}; next E_STRING_LOOP; }
	0
5828	0						elsif ($string =~ /\G \b qq ((?>\s*)) ($) [ ] ($) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq{$1qq$2 $3}; next E_STRING_LOOP; }
	0
5829	0						elsif ($string =~ /\G \b qq ((?>\s*)) (\{) [ ] (\}) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq{$1qq$2 $3}; next E_STRING_LOOP; }
	0
5830	0						elsif ($string =~ /\G \b qq ((?>\s*)) (\[) [ ] (\]) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq{$1qq$2 $3}; next E_STRING_LOOP; }
	0
5831	0						elsif ($string =~ /\G \b qq ((?>\s*)) (\<) [ ] (\>) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq{$1qq$2 $3}; next E_STRING_LOOP; }
	0
5832	0						elsif ($string =~ /\G \b qq ((?>\s*)) (\S) [ ] (\2) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq{$1qq$2 $3}; next E_STRING_LOOP; }
	0
5833	0						elsif ($string =~ /\G \b q (\#) [ ] (\#) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq {q$1 $2}; next E_STRING_LOOP; }
	0
5834	0						elsif ($string =~ /\G \b q ((?>\s*)) ($) [ ] ($) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq {$1q$2 $3}; next E_STRING_LOOP; }
	0
5835	0						elsif ($string =~ /\G \b q ((?>\s*)) (\{) [ ] (\}) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq {$1q$2 $3}; next E_STRING_LOOP; }
	0
5836	0						elsif ($string =~ /\G \b q ((?>\s*)) (\[) [ ] (\]) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq {$1q$2 $3}; next E_STRING_LOOP; }
	0
5837	0						elsif ($string =~ /\G \b q ((?>\s*)) (\<) [ ] (\>) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq {$1q$2 $3}; next E_STRING_LOOP; }
	0
5838	0						elsif ($string =~ /\G \b q ((?>\s*)) (\S) [ ] (\2) /oxgc) { $e_string .= 'Eutf2::split' . $e . qq {$1q$2 $3}; next E_STRING_LOOP; }
	0
5839	0						elsif ($string =~ /\G ' [ ] ' /oxgc) { $e_string .= 'Eutf2::split' . $e . qq {' '}; next E_STRING_LOOP; }
	0
5840	0						elsif ($string =~ /\G " [ ] " /oxgc) { $e_string .= 'Eutf2::split' . $e . qq {" "}; next E_STRING_LOOP; }
	0
5841
5842							# split qq//
5843							elsif ($string =~ /\G \b (qq) \b /oxgc) {
5844	0	0					if ($string =~ /\G (\#) ((?:$qq_char)*?) (\#) /oxgc) { $e_string .= e_split($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # qq# # --> qr # #
	0
	0
5845							else {
5846	0						while ($string !~ /\G \z/oxgc) {
5847	0	0					if ($string =~ /\G ((?>\s+)\|\#.*) /oxgc) { $e_string .= $e . $1; }
	0	0
		0
		0
		0
		0
		0
5848	0						elsif ($string =~ /\G ($) ((?:$qq_paren)*?) ($) /oxgc) { $e_string .= e_split($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # qq ( ) --> qr ( )
	0
5849	0						elsif ($string =~ /\G (\{) ((?:$qq_brace)*?) (\}) /oxgc) { $e_string .= e_split($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # qq { } --> qr { }
	0
5850	0						elsif ($string =~ /\G (\[) ((?:$qq_bracket)*?) (\]) /oxgc) { $e_string .= e_split($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # qq [ ] --> qr [ ]
	0
5851	0						elsif ($string =~ /\G (\<) ((?:$qq_angle)*?) (\>) /oxgc) { $e_string .= e_split($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # qq < > --> qr < >
	0
5852	0						elsif ($string =~ /\G ([\-:?\\^\|]) ((?:$qq_char)?) (\1) /oxgc) { $e_string .= e_split($e.'qr','{','}',$2,''); next E_STRING_LOOP; } # qq \| \| --> qr { }
	0
5853	0						elsif ($string =~ /\G (\S) ((?:$qq_char)?) (\1) /oxgc) { $e_string .= e_split($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # qq * --> qr * *
	0
5854							}
5855	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5856							}
5857							}
5858
5859							# split qr//
5860							elsif ($string =~ /\G \b (qr) \b /oxgc) {
5861	0	0					if ($string =~ /\G (\#) ((?:$qq_char)?) (\#) ([imosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1,$3,$2,$4); next E_STRING_LOOP; } # qr# #
	0
	0
5862							else {
5863	0						while ($string !~ /\G \z/oxgc) {
5864	0	0					if ($string =~ /\G ((?>\s+)\|\#.*) /oxgc) { $e_string .= $e . $1; }
	0	0
		0
		0
		0
		0
		0
		0
5865	0						elsif ($string =~ /\G ($) ((?:$qq_paren)?) ($) ([imosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # qr ( )
	0
5866	0						elsif ($string =~ /\G (\{) ((?:$qq_brace)?) (\}) ([imosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # qr { }
	0
5867	0						elsif ($string =~ /\G (\[) ((?:$qq_bracket)?) (\]) ([imosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # qr [ ]
	0
5868	0						elsif ($string =~ /\G (\<) ((?:$qq_angle)?) (\>) ([imosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # qr < >
	0
5869	0						elsif ($string =~ /\G (\') ((?:$qq_char)?) (\') ([imosxpadlunbB]) /oxgc) { $e_string .= e_split_q($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # qr ' '
	0
5870	0						elsif ($string =~ /\G ([\-:?\\^\|]) ((?:$qq_char)?) (\1) ([imosxpadlunbB]*) /oxgc) { $e_string .= e_split ($e.'qr','{','}',$2,$4); next E_STRING_LOOP; } # qr \| \| --> qr { }
	0
5871	0						elsif ($string =~ /\G (\S) ((?:$qq_char)?) (\1) ([imosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # qr * *
	0
5872							}
5873	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5874							}
5875							}
5876
5877							# split q//
5878							elsif ($string =~ /\G \b (q) \b /oxgc) {
5879	0	0					if ($string =~ /\G (\#) ((?:\\\#\|\\\\\|$q_char)*?) (\#) /oxgc) { $e_string .= e_split_q($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # q# # --> qr # #
	0
	0
5880							else {
5881	0						while ($string !~ /\G \z/oxgc) {
5882	0	0					if ($string =~ /\G ((?>\s+)\|\#.*) /oxgc) { $e_string .= $e . $1; }
	0	0
		0
		0
		0
		0
		0
5883	0						elsif ($string =~ /\G ($) ((?:\\\\\|\\$\|\\$\|$q_paren)*?) ($) /oxgc) { $e_string .= e_split_q($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # q ( ) --> qr ( )
	0
5884	0						elsif ($string =~ /\G (\{) ((?:\\\\\|\\\}\|\\\{\|$q_brace)*?) (\}) /oxgc) { $e_string .= e_split_q($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # q { } --> qr { }
	0
5885	0						elsif ($string =~ /\G (\[) ((?:\\\\\|\\\]\|\\\[\|$q_bracket)*?) (\]) /oxgc) { $e_string .= e_split_q($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # q [ ] --> qr [ ]
	0
5886	0						elsif ($string =~ /\G (\<) ((?:\\\\\|\\\>\|\\\<\|$q_angle)*?) (\>) /oxgc) { $e_string .= e_split_q($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # q < > --> qr < >
	0
5887	0						elsif ($string =~ /\G ([\-:?\\^\|]) ((?:$q_char)?) (\1) /oxgc) { $e_string .= e_split_q($e.'qr','{','}',$2,''); next E_STRING_LOOP; } # q \| \| --> qr { }
	0
5888	0						elsif ($string =~ /\G (\S) ((?:\\\\\|\\\1\| $q_char)?) (\1) /oxgc) { $e_string .= e_split_q($e.'qr',$1,$3,$2,''); next E_STRING_LOOP; } # q * --> qr * *
	0
5889							}
5890	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5891							}
5892							}
5893
5894							# split m//
5895							elsif ($string =~ /\G \b (m) \b /oxgc) {
5896	0	0					if ($string =~ /\G (\#) ((?:$qq_char)?) (\#) ([cgimosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1,$3,$2,$4); next E_STRING_LOOP; } # m# # --> qr # #
	0
	0
5897							else {
5898	0						while ($string !~ /\G \z/oxgc) {
5899	0	0					if ($string =~ /\G ((?>\s+)\|\#.*) /oxgc) { $e_string .= $e . $1; }
	0	0
		0
		0
		0
		0
		0
		0
5900	0						elsif ($string =~ /\G ($) ((?:$qq_paren)?) ($) ([cgimosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # m ( ) --> qr ( )
	0
5901	0						elsif ($string =~ /\G (\{) ((?:$qq_brace)?) (\}) ([cgimosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # m { } --> qr { }
	0
5902	0						elsif ($string =~ /\G (\[) ((?:$qq_bracket)?) (\]) ([cgimosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # m [ ] --> qr [ ]
	0
5903	0						elsif ($string =~ /\G (\<) ((?:$qq_angle)?) (\>) ([cgimosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # m < > --> qr < >
	0
5904	0						elsif ($string =~ /\G (\') ((?:$qq_char)?) (\') ([cgimosxpadlunbB]) /oxgc) { $e_string .= e_split_q($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # m ' ' --> qr ' '
	0
5905	0						elsif ($string =~ /\G ([\-:?\\^\|]) ((?:$qq_char)?) (\1) ([cgimosxpadlunbB]*) /oxgc) { $e_string .= e_split ($e.'qr','{','}',$2,$4); next E_STRING_LOOP; } # m \| \| --> qr { }
	0
5906	0						elsif ($string =~ /\G (\S) ((?:$qq_char)?) (\1) ([cgimosxpadlunbB]) /oxgc) { $e_string .= e_split ($e.'qr',$1, $3, $2,$4); next E_STRING_LOOP; } # m * * --> qr * *
	0
5907							}
5908	0						die __FILE__, ": Search pattern not terminated\n";
5909							}
5910							}
5911
5912							# split ''
5913							elsif ($string =~ /\G (\') /oxgc) {
5914	0						my $q_string = '';
5915	0						while ($string !~ /\G \z/oxgc) {
5916	0	0					if ($string =~ /\G (\\\\) /oxgc) { $q_string .= $1; }
	0	0
		0
		0
5917	0						elsif ($string =~ /\G (\\\') /oxgc) { $q_string .= $1; } # splitqr'' --> split qr''
5918	0						elsif ($string =~ /\G \' /oxgc) { $e_string .= e_split_q($e.q{ qr},"'","'",$q_string,''); next E_STRING_LOOP; } # ' ' --> qr ' '
	0
5919	0						elsif ($string =~ /\G ($q_char) /oxgc) { $q_string .= $1; }
5920							}
5921	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5922							}
5923
5924							# split ""
5925							elsif ($string =~ /\G (\") /oxgc) {
5926	0						my $qq_string = '';
5927	0						while ($string !~ /\G \z/oxgc) {
5928	0	0					if ($string =~ /\G (\\\\) /oxgc) { $qq_string .= $1; }
	0	0
		0
		0
5929	0						elsif ($string =~ /\G (\\\") /oxgc) { $qq_string .= $1; } # splitqr"" --> split qr""
5930	0						elsif ($string =~ /\G \" /oxgc) { $e_string .= e_split($e.q{ qr},'"','"',$qq_string,''); next E_STRING_LOOP; } # " " --> qr " "
	0
5931	0						elsif ($string =~ /\G ($q_char) /oxgc) { $qq_string .= $1; }
5932							}
5933	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5934							}
5935
5936							# split //
5937							elsif ($string =~ /\G (\/) /oxgc) {
5938	0						my $regexp = '';
5939	0						while ($string !~ /\G \z/oxgc) {
5940	0	0					if ($string =~ /\G (\\\\) /oxgc) { $regexp .= $1; }
	0	0
		0
		0
5941	0						elsif ($string =~ /\G (\\\/) /oxgc) { $regexp .= $1; } # splitqr// --> split qr//
5942	0						elsif ($string =~ /\G \/ ([cgimosxpadlunbB]*) /oxgc) { $e_string .= e_split($e.q{ qr}, '/','/',$regexp,$1); next E_STRING_LOOP; } # / / --> qr / /
	0
5943	0						elsif ($string =~ /\G ($q_char) /oxgc) { $regexp .= $1; }
5944							}
5945	0						die __FILE__, ": Search pattern not terminated\n";
5946							}
5947							}
5948
5949							# qq//
5950							elsif ($string =~ /\G \b (qq) \b /oxgc) {
5951	0						my $ope = $1;
5952	0	0					if ($string =~ /\G (\#) ((?:$qq_char)*?) (\#) /oxgc) { # qq# #
5953	0						$e_string .= e_qq($ope,$1,$3,$2);
5954							}
5955							else {
5956	0						my $e = '';
5957	0						while ($string !~ /\G \z/oxgc) {
5958	0	0					if ($string =~ /\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
5959	0						elsif ($string =~ /\G ($) ((?:$qq_paren)*?) ($) /oxgc) { $e_string .= $e . e_qq($ope,$1,$3,$2); next E_STRING_LOOP; } # qq ( )
	0
5960	0						elsif ($string =~ /\G (\{) ((?:$qq_brace)*?) (\}) /oxgc) { $e_string .= $e . e_qq($ope,$1,$3,$2); next E_STRING_LOOP; } # qq { }
	0
5961	0						elsif ($string =~ /\G (\[) ((?:$qq_bracket)*?) (\]) /oxgc) { $e_string .= $e . e_qq($ope,$1,$3,$2); next E_STRING_LOOP; } # qq [ ]
	0
5962	0						elsif ($string =~ /\G (\<) ((?:$qq_angle)*?) (\>) /oxgc) { $e_string .= $e . e_qq($ope,$1,$3,$2); next E_STRING_LOOP; } # qq < >
	0
5963	0						elsif ($string =~ /\G (\S) ((?:$qq_char)?) (\1) /oxgc) { $e_string .= $e . e_qq($ope,$1,$3,$2); next E_STRING_LOOP; } # qq *
	0
5964							}
5965	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5966							}
5967							}
5968
5969							# qx//
5970							elsif ($string =~ /\G \b (qx) \b /oxgc) {
5971	0						my $ope = $1;
5972	0	0					if ($string =~ /\G (\#) ((?:$qq_char)*?) (\#) /oxgc) { # qx# #
5973	0						$e_string .= e_qq($ope,$1,$3,$2);
5974							}
5975							else {
5976	0						my $e = '';
5977	0						while ($string !~ /\G \z/oxgc) {
5978	0	0					if ($string =~ /\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
		0
5979	0						elsif ($string =~ /\G ($) ((?:$qq_paren)*?) ($) /oxgc) { $e_string .= $e . e_qq($ope,$1,$3,$2); next E_STRING_LOOP; } # qx ( )
	0
5980	0						elsif ($string =~ /\G (\{) ((?:$qq_brace)*?) (\}) /oxgc) { $e_string .= $e . e_qq($ope,$1,$3,$2); next E_STRING_LOOP; } # qx { }
	0
5981	0						elsif ($string =~ /\G (\[) ((?:$qq_bracket)*?) (\]) /oxgc) { $e_string .= $e . e_qq($ope,$1,$3,$2); next E_STRING_LOOP; } # qx [ ]
	0
5982	0						elsif ($string =~ /\G (\<) ((?:$qq_angle)*?) (\>) /oxgc) { $e_string .= $e . e_qq($ope,$1,$3,$2); next E_STRING_LOOP; } # qx < >
	0
5983	0						elsif ($string =~ /\G (\') ((?:$qq_char)*?) (\') /oxgc) { $e_string .= $e . e_q ($ope,$1,$3,$2); next E_STRING_LOOP; } # qx ' '
	0
5984	0						elsif ($string =~ /\G (\S) ((?:$qq_char)?) (\1) /oxgc) { $e_string .= $e . e_qq($ope,$1,$3,$2); next E_STRING_LOOP; } # qx *
	0
5985							}
5986	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
5987							}
5988							}
5989
5990							# q//
5991							elsif ($string =~ /\G \b (q) \b /oxgc) {
5992	0						my $ope = $1;
5993	0	0					if ($string =~ /\G (\#) ((?:\\\#\|\\\\\|$q_char)*?) (\#) /oxgc) { # q# #
5994	0						$e_string .= e_q($ope,$1,$3,$2);
5995							}
5996							else {
5997	0						my $e = '';
5998	0						while ($string !~ /\G \z/oxgc) {
5999	0	0					if ($string =~ /\G ((?>\s+)\|\#.*) /oxgc) { $e .= $1; }
	0	0
		0
		0
		0
		0
6000	0						elsif ($string =~ /\G ($) ((?:\\\\\|\\$\|\\$\|$q_paren)*?) ($) /oxgc) { $e_string .= $e . e_q($ope,$1,$3,$2); next E_STRING_LOOP; } # q ( )
	0
6001	0						elsif ($string =~ /\G (\{) ((?:\\\\\|\\\}\|\\\{\|$q_brace)*?) (\}) /oxgc) { $e_string .= $e . e_q($ope,$1,$3,$2); next E_STRING_LOOP; } # q { }
	0
6002	0						elsif ($string =~ /\G (\[) ((?:\\\\\|\\\]\|\\\[\|$q_bracket)*?) (\]) /oxgc) { $e_string .= $e . e_q($ope,$1,$3,$2); next E_STRING_LOOP; } # q [ ]
	0
6003	0						elsif ($string =~ /\G (\<) ((?:\\\\\|\\\>\|\\\<\|$q_angle)*?) (\>) /oxgc) { $e_string .= $e . e_q($ope,$1,$3,$2); next E_STRING_LOOP; } # q < >
	0
6004	0						elsif ($string =~ /\G (\S) ((?:\\\\\|\\\1\| $q_char)?) (\1) /oxgc) { $e_string .= $e . e_q($ope,$1,$3,$2); next E_STRING_LOOP; } # q *
	0
6005							}
6006	0						die __FILE__, ": Can't find string terminator anywhere before EOF\n";
6007							}
6008							}
6009
6010							# ''
6011	0						elsif ($string =~ /\G (?
6012
6013							# ""
6014	0						elsif ($string =~ /\G (\") ((?:$qq_char)*?) (\") /oxgc) { $e_string .= e_qq('',$1,$3,$2); }
6015
6016							# ``
6017	0						elsif ($string =~ /\G (\`) ((?:$qq_char)*?) (\`) /oxgc) { $e_string .= e_qq('',$1,$3,$2); }
6018
6019							# <<>> (a safer ARGV)
6020	0						elsif ($string =~ /\G ( <<>> ) /oxgc) { $e_string .= $1; }
6021
6022							# <<= <=> <= < operator
6023	0						elsif ($string =~ /\G ( <<= \| <=> \| <= \| < ) (?= (?>\s) [A-Za-z_0-9'"`\$\@\&\\(\+\-] )/oxgc) { $e_string .= $1; }
6024
6025							#
6026	0						elsif ($string =~ /\G (<[\$]?[A-Za-z_][A-Za-z_0-9]*>) /oxgc) { $e_string .= $1; }
6027
6028							# --- glob
6029							elsif ($string =~ /\G < ((?:$q_char)+?) > /oxgc) {
6030	0						$e_string .= 'Eutf2::glob("' . $1 . '")';
6031							}
6032
6033							# << (bit shift) --- not here document
6034	0						elsif ($string =~ /\G ( << (?>\s*) ) (?= [0-9\$\@\&] ) /oxgc) { $slash = 'm//'; $e_string .= $1; }
	0
6035
6036							# <<'HEREDOC'
6037							elsif ($string =~ /\G ( << '([a-zA-Z_0-9]*)' ) /oxgc) {
6038	0						$slash = 'm//';
6039	0						my $here_quote = $1;
6040	0						my $delimiter = $2;
6041
6042							# get here document
6043	0	0					if ($here_script eq '') {
6044	0						$here_script = CORE::substr $_, pos $_;
6045	0						$here_script =~ s/.*?\n//oxm;
6046							}
6047	0	0					if ($here_script =~ s/\A (.*?) \n $delimiter \n //xms) {
6048	0						push @heredoc, $1 . qq{\n$delimiter\n};
6049	0						push @heredoc_delimiter, $delimiter;
6050							}
6051							else {
6052	0						die __FILE__, ": Can't find string terminator $delimiter anywhere before EOF\n";
6053							}
6054	0						$e_string .= $here_quote;
6055							}
6056
6057							# <<\HEREDOC
6058							elsif ($string =~ /\G ( << \\([a-zA-Z_0-9]+) ) /oxgc) {
6059	0						$slash = 'm//';
6060	0						my $here_quote = $1;
6061	0						my $delimiter = $2;
6062
6063							# get here document
6064	0	0					if ($here_script eq '') {
6065	0						$here_script = CORE::substr $_, pos $_;
6066	0						$here_script =~ s/.*?\n//oxm;
6067							}
6068	0	0					if ($here_script =~ s/\A (.*?) \n $delimiter \n //xms) {
6069	0						push @heredoc, $1 . qq{\n$delimiter\n};
6070	0						push @heredoc_delimiter, $delimiter;
6071							}
6072							else {
6073	0						die __FILE__, ": Can't find string terminator $delimiter anywhere before EOF\n";
6074							}
6075	0						$e_string .= $here_quote;
6076							}
6077
6078							# <<"HEREDOC"
6079							elsif ($string =~ /\G ( << "([a-zA-Z_0-9]*)" ) /oxgc) {
6080	0						$slash = 'm//';
6081	0						my $here_quote = $1;
6082	0						my $delimiter = $2;
6083
6084							# get here document
6085	0	0					if ($here_script eq '') {
6086	0						$here_script = CORE::substr $_, pos $_;
6087	0						$here_script =~ s/.*?\n//oxm;
6088							}
6089	0	0					if ($here_script =~ s/\A (.*?) \n $delimiter \n //xms) {
6090	0						push @heredoc, e_heredoc($1) . qq{\n$delimiter\n};
6091	0						push @heredoc_delimiter, $delimiter;
6092							}
6093							else {
6094	0						die __FILE__, ": Can't find string terminator $delimiter anywhere before EOF\n";
6095							}
6096	0						$e_string .= $here_quote;
6097							}
6098
6099							# <
6100							elsif ($string =~ /\G ( << ([a-zA-Z_0-9]+) ) /oxgc) {
6101	0						$slash = 'm//';
6102	0						my $here_quote = $1;
6103	0						my $delimiter = $2;
6104
6105							# get here document
6106	0	0					if ($here_script eq '') {
6107	0						$here_script = CORE::substr $_, pos $_;
6108	0						$here_script =~ s/.*?\n//oxm;
6109							}
6110	0	0					if ($here_script =~ s/\A (.*?) \n $delimiter \n //xms) {
6111	0						push @heredoc, e_heredoc($1) . qq{\n$delimiter\n};
6112	0						push @heredoc_delimiter, $delimiter;
6113							}
6114							else {
6115	0						die __FILE__, ": Can't find string terminator $delimiter anywhere before EOF\n";
6116							}
6117	0						$e_string .= $here_quote;
6118							}
6119
6120							# <<`HEREDOC`
6121							elsif ($string =~ /\G ( << `([a-zA-Z_0-9]*)` ) /oxgc) {
6122	0						$slash = 'm//';
6123	0						my $here_quote = $1;
6124	0						my $delimiter = $2;
6125
6126							# get here document
6127	0	0					if ($here_script eq '') {
6128	0						$here_script = CORE::substr $_, pos $_;
6129	0						$here_script =~ s/.*?\n//oxm;
6130							}
6131	0	0					if ($here_script =~ s/\A (.*?) \n $delimiter \n //xms) {
6132	0						push @heredoc, e_heredoc($1) . qq{\n$delimiter\n};
6133	0						push @heredoc_delimiter, $delimiter;
6134							}
6135							else {
6136	0						die __FILE__, ": Can't find string terminator $delimiter anywhere before EOF\n";
6137							}
6138	0						$e_string .= $here_quote;
6139							}
6140
6141							# any operator before div
6142							elsif ($string =~ /\G (
6143							-- \| \+\+ \|
6144							[\)\}\]]
6145
6146	0						) /oxgc) { $slash = 'div'; $e_string .= $1; }
	0
6147
6148							# yada-yada or triple-dot operator
6149							elsif ($string =~ /\G (
6150							\.\.\.
6151
6152	0						) /oxgc) { $slash = 'm//'; $e_string .= q{die('Unimplemented')}; }
	0
6153
6154							# any operator before m//
6155							elsif ($string =~ /\G ((?>
6156
6157							!~~ \| !~ \| != \| ! \|
6158							%= \| % \|
6159							&&= \| && \| &= \| &\.= \| &\. \| & \|
6160							-= \| -> \| - \|
6161							:(?>\s*)= \|
6162							: \|
6163							<<>> \|
6164							<<= \| <=> \| <= \| < \|
6165							== \| => \| =~ \| = \|
6166							>>= \| >> \| >= \| > \|
6167							\\= \| \\ \| \= \| \ \|
6168							\+= \| \+ \|
6169							\.\. \| \.= \| \. \|
6170							\/\/= \| \/\/ \|
6171							\/= \| \/ \|
6172							\? \|
6173							\\ \|
6174							\^= \| \^\.= \| \^\. \| \^ \|
6175							\b x= \|
6176							\\|\\|= \| \\|\\| \| \\|= \| \\|\.= \| \\|\. \| \\| \|
6177							~~ \| ~\. \| ~ \|
6178							\b(?: and \| cmp \| eq \| ge \| gt \| le \| lt \| ne \| not \| or \| xor \| x )\b \|
6179							\b(?: print )\b \|
6180
6181							[,;\(\{\[]
6182
6183	0						)) /oxgc) { $slash = 'm//'; $e_string .= $1; }
	0
6184
6185							# other any character
6186	0						elsif ($string =~ /\G ($q_char) /oxgc) { $e_string .= $1; }
6187
6188							# system error
6189							else {
6190	0						die __FILE__, ": Oops, this shouldn't happen!\n";
6191							}
6192							}
6193
6194	0						return $e_string;
6195							}
6196
6197							#
6198							# character class
6199							#
6200							sub character_class {
6201	0			0	0		my($char,$modifier) = @_;
6202
6203	0	0					if ($char eq '.') {
6204	0	0					if ($modifier =~ /s/) {
6205	0						return '${Eutf2::dot_s}';
6206							}
6207							else {
6208	0						return '${Eutf2::dot}';
6209							}
6210							}
6211							else {
6212	0						return Eutf2::classic_character_class($char);
6213							}
6214							}
6215
6216							#
6217							# escape capture ($1, $2, $3, ...)
6218							#
6219							sub e_capture {
6220
6221	0			0	0		return join '', '${', $_[0], '}';
6222							}
6223
6224							#
6225							# escape transliteration (tr/// or y///)
6226							#
6227							sub e_tr {
6228	0			0	0		my($variable,$charclass,$e,$charclass2,$modifier) = @_;
6229	0						my $e_tr = '';
6230	0		0				$modifier \|\|= '';
6231
6232	0						$slash = 'div';
6233
6234							# quote character class 1
6235	0						$charclass = q_tr($charclass);
6236
6237							# quote character class 2
6238	0						$charclass2 = q_tr($charclass2);
6239
6240							# /b /B modifier
6241	0	0					if ($modifier =~ tr/bB//d) {
6242	0	0					if ($variable eq '') {
6243	0						$e_tr = qq{tr$charclass$e$charclass2$modifier};
6244							}
6245							else {
6246	0						$e_tr = qq{$variable${bind_operator}tr$charclass$e$charclass2$modifier};
6247							}
6248							}
6249							else {
6250	0	0					if ($variable eq '') {
6251	0						$e_tr = qq{Eutf2::tr(\$_,' =~ ',$charclass,$e$charclass2,'$modifier')};
6252							}
6253							else {
6254	0						$e_tr = qq{Eutf2::tr($variable,'$bind_operator',$charclass,$e$charclass2,'$modifier')};
6255							}
6256							}
6257
6258							# clear tr/// variable
6259	0						$tr_variable = '';
6260	0						$bind_operator = '';
6261
6262	0						return $e_tr;
6263							}
6264
6265							#
6266							# quote for escape transliteration (tr/// or y///)
6267							#
6268							sub q_tr {
6269	0			0	0		my($charclass) = @_;
6270
6271							# quote character class
6272	0	0					if ($charclass !~ /'/oxms) {
		0
		0
		0
		0
		0
6273	0						return e_q('', "'", "'", $charclass); # --> q' '
6274							}
6275							elsif ($charclass !~ /\//oxms) {
6276	0						return e_q('q', '/', '/', $charclass); # --> q/ /
6277							}
6278							elsif ($charclass !~ /\#/oxms) {
6279	0						return e_q('q', '#', '#', $charclass); # --> q# #
6280							}
6281							elsif ($charclass !~ /[\<\>]/oxms) {
6282	0						return e_q('q', '<', '>', $charclass); # --> q< >
6283							}
6284							elsif ($charclass !~ /[]/oxms) {
6285	0						return e_q('q', '(', ')', $charclass); # --> q( )
6286							}
6287							elsif ($charclass !~ /[\{\}]/oxms) {
6288	0						return e_q('q', '{', '}', $charclass); # --> q{ }
6289							}
6290							else {
6291	0						for my $char (qw( ! " $ % & * + . : = ? @ ^ ` \| ~ ), "\x00".."\x1F", "\x7F", "\xFF") {
6292	0	0					if ($charclass !~ /\Q$char\E/xms) {
6293	0						return e_q('q', $char, $char, $charclass);
6294							}
6295							}
6296							}
6297
6298	0						return e_q('q', '{', '}', $charclass);
6299							}
6300
6301							#
6302							# escape q string (q//, '')
6303							#
6304							sub e_q {
6305	0			0	0		my($ope,$delimiter,$end_delimiter,$string) = @_;
6306
6307	0						$slash = 'div';
6308
6309	0						return join '', $ope, $delimiter, $string, $end_delimiter;
6310							}
6311
6312							#
6313							# escape qq string (qq//, "", qx//, ``)
6314							#
6315							sub e_qq {
6316	0			0	0		my($ope,$delimiter,$end_delimiter,$string) = @_;
6317
6318	0						$slash = 'div';
6319
6320	0						my $left_e = 0;
6321	0						my $right_e = 0;
6322
6323							# split regexp
6324	0						my @char = $string =~ /\G((?>
6325							[^\x80-\xFF\\\$]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
6326							\\x\{ (?>[0-9A-Fa-f]+) \} \|
6327							\\o\{ (?>[0-7]+) \} \|
6328							\\N\{ (?>[^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} \|
6329							\\ $q_char \|
6330							\$` \| \$\{`\} \| \$ (?>\s) PREMATCH \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} \|
6331							\$& \| \$\{&\} \| \$ (?>\s) MATCH \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} \|
6332							\$ (?>\s) POSTMATCH \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} \|
6333							\$ (?>\s* [0-9]+) \|
6334							\$ (?>\s) \{ (?>\s [0-9]+ \s*) \} \|
6335							\$ \$ (?![\w\{]) \|
6336							\$ (?>\s) \$ (?>\s) $qq_variable \|
6337							$q_char
6338							))/oxmsg;
6339
6340	0						for (my $i=0; $i <= $#char; $i++) {
6341
6342							# "\L\u" --> "\u\L"
6343	0	0	0				if (($char[$i] eq '\L') and ($char[$i+1] eq '\u')) {
		0	0
		0
		0
		0
6344	0						@char[$i,$i+1] = @char[$i+1,$i];
6345							}
6346
6347							# "\U\l" --> "\l\U"
6348							elsif (($char[$i] eq '\U') and ($char[$i+1] eq '\l')) {
6349	0						@char[$i,$i+1] = @char[$i+1,$i];
6350							}
6351
6352							# octal escape sequence
6353							elsif ($char[$i] =~ /\A \\o \{ ([0-7]+) \} \z/oxms) {
6354	0						$char[$i] = Eutf2::octchr($1);
6355							}
6356
6357							# hexadecimal escape sequence
6358							elsif ($char[$i] =~ /\A \\x \{ ([0-9A-Fa-f]+) \} \z/oxms) {
6359	0						$char[$i] = Eutf2::hexchr($1);
6360							}
6361
6362							# \N{CHARNAME} --> N{CHARNAME}
6363							elsif ($char[$i] =~ /\A \\ ( N\{ ([^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} ) \z/oxms) {
6364	0						$char[$i] = $1;
6365							}
6366
6367	0	0					if (0) {
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
6368							}
6369
6370							# \F
6371							#
6372							# P.69 Table 2-6. Translation escapes
6373							# in Chapter 2: Bits and Pieces
6374							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
6375							# (and so on)
6376
6377							# \u \l \U \L \F \Q \E
6378	0						elsif ($char[$i] =~ /\A ([<>]) \z/oxms) {
6379	0	0					if ($right_e < $left_e) {
6380	0						$char[$i] = '\\' . $char[$i];
6381							}
6382							}
6383							elsif ($char[$i] eq '\u') {
6384
6385							# "STRING @{[ LIST EXPR ]} MORE STRING"
6386
6387							# P.257 Other Tricks You Can Do with Hard References
6388							# in Chapter 8: References
6389							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
6390
6391							# P.353 Other Tricks You Can Do with Hard References
6392							# in Chapter 8: References
6393							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
6394
6395							# (and so on)
6396
6397	0						$char[$i] = '@{[Eutf2::ucfirst qq<';
6398	0						$left_e++;
6399							}
6400							elsif ($char[$i] eq '\l') {
6401	0						$char[$i] = '@{[Eutf2::lcfirst qq<';
6402	0						$left_e++;
6403							}
6404							elsif ($char[$i] eq '\U') {
6405	0						$char[$i] = '@{[Eutf2::uc qq<';
6406	0						$left_e++;
6407							}
6408							elsif ($char[$i] eq '\L') {
6409	0						$char[$i] = '@{[Eutf2::lc qq<';
6410	0						$left_e++;
6411							}
6412							elsif ($char[$i] eq '\F') {
6413	0						$char[$i] = '@{[Eutf2::fc qq<';
6414	0						$left_e++;
6415							}
6416							elsif ($char[$i] eq '\Q') {
6417	0						$char[$i] = '@{[CORE::quotemeta qq<';
6418	0						$left_e++;
6419							}
6420							elsif ($char[$i] eq '\E') {
6421	0	0					if ($right_e < $left_e) {
6422	0						$char[$i] = '>]}';
6423	0						$right_e++;
6424							}
6425							else {
6426	0						$char[$i] = '';
6427							}
6428							}
6429							elsif ($char[$i] eq '\Q') {
6430	0						while (1) {
6431	0	0					if (++$i > $#char) {
6432	0						last;
6433							}
6434	0	0					if ($char[$i] eq '\E') {
6435	0						last;
6436							}
6437							}
6438							}
6439							elsif ($char[$i] eq '\E') {
6440							}
6441
6442							# $0 --> $0
6443							elsif ($char[$i] =~ /\A \$ 0 \z/oxms) {
6444							}
6445							elsif ($char[$i] =~ /\A \$ \{ (?>\s) 0 (?>\s) \} \z/oxms) {
6446							}
6447
6448							# $$ --> $$
6449							elsif ($char[$i] =~ /\A \$\$ \z/oxms) {
6450							}
6451
6452							# $1, $2, $3 --> $2, $3, $4 after s/// with multibyte anchoring
6453							# $1, $2, $3 --> $1, $2, $3 otherwise
6454							elsif ($char[$i] =~ /\A \$ ((?>[1-9][0-9]*)) \z/oxms) {
6455	0						$char[$i] = e_capture($1);
6456							}
6457							elsif ($char[$i] =~ /\A \$ \{ (?>\s) ((?>[1-9][0-9])) (?>\s*) \} \z/oxms) {
6458	0						$char[$i] = e_capture($1);
6459							}
6460
6461							# $$foo[ ... ] --> $ $foo->[ ... ]
6462							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \[ (?:$qq_bracket)*? \] ) \z/oxms) {
6463	0						$char[$i] = e_capture($1.'->'.$2);
6464							}
6465
6466							# $$foo{ ... } --> $ $foo->{ ... }
6467							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \{ (?:$qq_brace)*? \} ) \z/oxms) {
6468	0						$char[$i] = e_capture($1.'->'.$2);
6469							}
6470
6471							# $$foo
6472							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) \z/oxms) {
6473	0						$char[$i] = e_capture($1);
6474							}
6475
6476							# $`, ${`}, $PREMATCH, ${PREMATCH}, ${^PREMATCH} --> Eutf2::PREMATCH()
6477							elsif ($char[$i] =~ /\A ( \$` \| \$\{`\} \| \$ (?>\s) PREMATCH \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} ) \z/oxmsgc) {
6478	0						$char[$i] = '@{[Eutf2::PREMATCH()]}';
6479							}
6480
6481							# $&, ${&}, $MATCH, ${MATCH}, ${^MATCH} --> Eutf2::MATCH()
6482							elsif ($char[$i] =~ /\A ( \$& \| \$\{&\} \| \$ (?>\s) MATCH \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} ) \z/oxmsgc) {
6483	0						$char[$i] = '@{[Eutf2::MATCH()]}';
6484							}
6485
6486							# $POSTMATCH, ${POSTMATCH}, ${^POSTMATCH} --> Eutf2::POSTMATCH()
6487							elsif ($char[$i] =~ /\A ( \$ (?>\s) POSTMATCH \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} ) \z/oxmsgc) {
6488	0						$char[$i] = '@{[Eutf2::POSTMATCH()]}';
6489							}
6490
6491							# ${ foo } --> ${ foo }
6492							elsif ($char[$i] =~ /\A \$ (?>\s) \{ (?> \s [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* \s* ) \} \z/oxms) {
6493							}
6494
6495							# ${ ... }
6496							elsif ($char[$i] =~ /\A \$ (?>\s*) \{ ( .+ ) \} \z/oxms) {
6497	0						$char[$i] = e_capture($1);
6498							}
6499							}
6500
6501							# return string
6502	0	0					if ($left_e > $right_e) {
6503	0						return join '', $ope, $delimiter, @char, '>]}' x ($left_e - $right_e), $end_delimiter;
6504							}
6505	0						return join '', $ope, $delimiter, @char, $end_delimiter;
6506							}
6507
6508							#
6509							# escape qw string (qw//)
6510							#
6511							sub e_qw {
6512	0			0	0		my($ope,$delimiter,$end_delimiter,$string) = @_;
6513
6514	0						$slash = 'div';
6515
6516							# choice again delimiter
6517	0						my %octet = map {$_ => 1} ($string =~ /\G ([\x00-\xFF]) /oxmsg);
	0
6518	0	0					if (not $octet{$end_delimiter}) {
		0
		0
		0
		0
6519	0						return join '', $ope, $delimiter, $string, $end_delimiter;
6520							}
6521							elsif (not $octet{')'}) {
6522	0						return join '', $ope, '(', $string, ')';
6523							}
6524							elsif (not $octet{'}'}) {
6525	0						return join '', $ope, '{', $string, '}';
6526							}
6527							elsif (not $octet{']'}) {
6528	0						return join '', $ope, '[', $string, ']';
6529							}
6530							elsif (not $octet{'>'}) {
6531	0						return join '', $ope, '<', $string, '>';
6532							}
6533							else {
6534	0						for my $char (qw( ! " $ % & * + - . / : = ? @ ^ ` \| ~ ), "\x00".."\x1F", "\x7F", "\xFF") {
6535	0	0					if (not $octet{$char}) {
6536	0						return join '', $ope, $char, $string, $char;
6537							}
6538							}
6539							}
6540
6541							# qw/AAA BBB C'CC/ --> ('AAA', 'BBB', 'C\'CC')
6542	0						my @string = CORE::split(/\s+/, $string);
6543	0						for my $string (@string) {
6544	0						my @octet = $string =~ /\G ([\x00-\xFF]) /oxmsg;
6545	0						for my $octet (@octet) {
6546	0	0					if ($octet =~ /\A (['\\]) \z/oxms) {
6547	0						$octet = '\\' . $1;
6548							}
6549							}
6550	0						$string = join '', @octet;
6551							}
6552	0						return join '', '(', (join ', ', map { "'$_'" } @string), ')';
	0
6553							}
6554
6555							#
6556							# escape here document (<<"HEREDOC", <
6557							#
6558							sub e_heredoc {
6559	0			0	0		my($string) = @_;
6560
6561	0						$slash = 'm//';
6562
6563	0						my $metachar = qr/[\@\\\|]/oxms; # '\|' is for <<`HEREDOC`
6564
6565	0						my $left_e = 0;
6566	0						my $right_e = 0;
6567
6568							# split regexp
6569	0						my @char = $string =~ /\G((?>
6570							[^\x80-\xFF\\\$]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
6571							\\x\{ (?>[0-9A-Fa-f]+) \} \|
6572							\\o\{ (?>[0-7]+) \} \|
6573							\\N\{ (?>[^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} \|
6574							\\ $q_char \|
6575							\$` \| \$\{`\} \| \$ (?>\s) PREMATCH \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} \|
6576							\$& \| \$\{&\} \| \$ (?>\s) MATCH \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} \|
6577							\$ (?>\s) POSTMATCH \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} \|
6578							\$ (?>\s* [0-9]+) \|
6579							\$ (?>\s) \{ (?>\s [0-9]+ \s*) \} \|
6580							\$ \$ (?![\w\{]) \|
6581							\$ (?>\s) \$ (?>\s) $qq_variable \|
6582							$q_char
6583							))/oxmsg;
6584
6585	0						for (my $i=0; $i <= $#char; $i++) {
6586
6587							# "\L\u" --> "\u\L"
6588	0	0	0				if (($char[$i] eq '\L') and ($char[$i+1] eq '\u')) {
		0	0
		0
		0
		0
6589	0						@char[$i,$i+1] = @char[$i+1,$i];
6590							}
6591
6592							# "\U\l" --> "\l\U"
6593							elsif (($char[$i] eq '\U') and ($char[$i+1] eq '\l')) {
6594	0						@char[$i,$i+1] = @char[$i+1,$i];
6595							}
6596
6597							# octal escape sequence
6598							elsif ($char[$i] =~ /\A \\o \{ ([0-7]+) \} \z/oxms) {
6599	0						$char[$i] = Eutf2::octchr($1);
6600							}
6601
6602							# hexadecimal escape sequence
6603							elsif ($char[$i] =~ /\A \\x \{ ([0-9A-Fa-f]+) \} \z/oxms) {
6604	0						$char[$i] = Eutf2::hexchr($1);
6605							}
6606
6607							# \N{CHARNAME} --> N{CHARNAME}
6608							elsif ($char[$i] =~ /\A \\ ( N\{ ([^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} ) \z/oxms) {
6609	0						$char[$i] = $1;
6610							}
6611
6612	0	0					if (0) {
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
6613							}
6614
6615							# \u \l \U \L \F \Q \E
6616	0						elsif ($char[$i] =~ /\A ([<>]) \z/oxms) {
6617	0	0					if ($right_e < $left_e) {
6618	0						$char[$i] = '\\' . $char[$i];
6619							}
6620							}
6621							elsif ($char[$i] eq '\u') {
6622	0						$char[$i] = '@{[Eutf2::ucfirst qq<';
6623	0						$left_e++;
6624							}
6625							elsif ($char[$i] eq '\l') {
6626	0						$char[$i] = '@{[Eutf2::lcfirst qq<';
6627	0						$left_e++;
6628							}
6629							elsif ($char[$i] eq '\U') {
6630	0						$char[$i] = '@{[Eutf2::uc qq<';
6631	0						$left_e++;
6632							}
6633							elsif ($char[$i] eq '\L') {
6634	0						$char[$i] = '@{[Eutf2::lc qq<';
6635	0						$left_e++;
6636							}
6637							elsif ($char[$i] eq '\F') {
6638	0						$char[$i] = '@{[Eutf2::fc qq<';
6639	0						$left_e++;
6640							}
6641							elsif ($char[$i] eq '\Q') {
6642	0						$char[$i] = '@{[CORE::quotemeta qq<';
6643	0						$left_e++;
6644							}
6645							elsif ($char[$i] eq '\E') {
6646	0	0					if ($right_e < $left_e) {
6647	0						$char[$i] = '>]}';
6648	0						$right_e++;
6649							}
6650							else {
6651	0						$char[$i] = '';
6652							}
6653							}
6654							elsif ($char[$i] eq '\Q') {
6655	0						while (1) {
6656	0	0					if (++$i > $#char) {
6657	0						last;
6658							}
6659	0	0					if ($char[$i] eq '\E') {
6660	0						last;
6661							}
6662							}
6663							}
6664							elsif ($char[$i] eq '\E') {
6665							}
6666
6667							# $0 --> $0
6668							elsif ($char[$i] =~ /\A \$ 0 \z/oxms) {
6669							}
6670							elsif ($char[$i] =~ /\A \$ \{ (?>\s) 0 (?>\s) \} \z/oxms) {
6671							}
6672
6673							# $$ --> $$
6674							elsif ($char[$i] =~ /\A \$\$ \z/oxms) {
6675							}
6676
6677							# $1, $2, $3 --> $2, $3, $4 after s/// with multibyte anchoring
6678							# $1, $2, $3 --> $1, $2, $3 otherwise
6679							elsif ($char[$i] =~ /\A \$ ((?>[1-9][0-9]*)) \z/oxms) {
6680	0						$char[$i] = e_capture($1);
6681							}
6682							elsif ($char[$i] =~ /\A \$ \{ (?>\s) ((?>[1-9][0-9])) (?>\s*) \} \z/oxms) {
6683	0						$char[$i] = e_capture($1);
6684							}
6685
6686							# $$foo[ ... ] --> $ $foo->[ ... ]
6687							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \[ (?:$qq_bracket)*? \] ) \z/oxms) {
6688	0						$char[$i] = e_capture($1.'->'.$2);
6689							}
6690
6691							# $$foo{ ... } --> $ $foo->{ ... }
6692							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \{ (?:$qq_brace)*? \} ) \z/oxms) {
6693	0						$char[$i] = e_capture($1.'->'.$2);
6694							}
6695
6696							# $$foo
6697							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) \z/oxms) {
6698	0						$char[$i] = e_capture($1);
6699							}
6700
6701							# $`, ${`}, $PREMATCH, ${PREMATCH}, ${^PREMATCH} --> Eutf2::PREMATCH()
6702							elsif ($char[$i] =~ /\A ( \$` \| \$\{`\} \| \$ (?>\s) PREMATCH \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} ) \z/oxmsgc) {
6703	0						$char[$i] = '@{[Eutf2::PREMATCH()]}';
6704							}
6705
6706							# $&, ${&}, $MATCH, ${MATCH}, ${^MATCH} --> Eutf2::MATCH()
6707							elsif ($char[$i] =~ /\A ( \$& \| \$\{&\} \| \$ (?>\s) MATCH \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} ) \z/oxmsgc) {
6708	0						$char[$i] = '@{[Eutf2::MATCH()]}';
6709							}
6710
6711							# $POSTMATCH, ${POSTMATCH}, ${^POSTMATCH} --> Eutf2::POSTMATCH()
6712							elsif ($char[$i] =~ /\A ( \$ (?>\s) POSTMATCH \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} ) \z/oxmsgc) {
6713	0						$char[$i] = '@{[Eutf2::POSTMATCH()]}';
6714							}
6715
6716							# ${ foo } --> ${ foo }
6717							elsif ($char[$i] =~ /\A \$ (?>\s) \{ (?> \s [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* \s* ) \} \z/oxms) {
6718							}
6719
6720							# ${ ... }
6721							elsif ($char[$i] =~ /\A \$ (?>\s*) \{ ( .+ ) \} \z/oxms) {
6722	0						$char[$i] = e_capture($1);
6723							}
6724							}
6725
6726							# return string
6727	0	0					if ($left_e > $right_e) {
6728	0						return join '', @char, '>]}' x ($left_e - $right_e);
6729							}
6730	0						return join '', @char;
6731							}
6732
6733							#
6734							# escape regexp (m//, qr//)
6735							#
6736							sub e_qr {
6737	0			0	0		my($ope,$delimiter,$end_delimiter,$string,$modifier) = @_;
6738	0		0				$modifier \|\|= '';
6739
6740	0						$modifier =~ tr/p//d;
6741	0	0					if ($modifier =~ /([adlu])/oxms) {
6742	0						my $line = 0;
6743	0						for (my $i=0; my($package,$filename,$use_line,$subroutine) = caller($i); $i++) {
6744	0	0					if ($filename ne __FILE__) {
6745	0						$line = $use_line + (CORE::substr($_,0,pos($_)) =~ tr/\n//) + 1;
6746	0						last;
6747							}
6748							}
6749	0						die qq{Unsupported modifier "$1" used at line $line.\n};
6750							}
6751
6752	0						$slash = 'div';
6753
6754							# literal null string pattern
6755	0	0					if ($string eq '') {
		0
6756	0						$modifier =~ tr/bB//d;
6757	0						$modifier =~ tr/i//d;
6758	0						return join '', $ope, $delimiter, $end_delimiter, $modifier;
6759							}
6760
6761							# /b /B modifier
6762							elsif ($modifier =~ tr/bB//d) {
6763
6764							# choice again delimiter
6765	0	0					if ($delimiter =~ / [\@:] /oxms) {
6766	0						my @char = $string =~ /\G ([\x00-\xFF]) /oxmsg;
6767	0						my %octet = map {$_ => 1} @char;
	0
6768	0	0					if (not $octet{')'}) {
		0
		0
		0
6769	0						$delimiter = '(';
6770	0						$end_delimiter = ')';
6771							}
6772							elsif (not $octet{'}'}) {
6773	0						$delimiter = '{';
6774	0						$end_delimiter = '}';
6775							}
6776							elsif (not $octet{']'}) {
6777	0						$delimiter = '[';
6778	0						$end_delimiter = ']';
6779							}
6780							elsif (not $octet{'>'}) {
6781	0						$delimiter = '<';
6782	0						$end_delimiter = '>';
6783							}
6784							else {
6785	0						for my $char (qw( ! " $ % & * + - . / = ? ^ ` \| ~ ), "\x00".."\x1F", "\x7F", "\xFF") {
6786	0	0					if (not $octet{$char}) {
6787	0						$delimiter = $char;
6788	0						$end_delimiter = $char;
6789	0						last;
6790							}
6791							}
6792							}
6793							}
6794
6795	0	0	0				if (($ope =~ /\A m? \z/oxms) and ($delimiter eq '?')) {
6796	0						return join '', $ope, $delimiter, $string, $matched, $end_delimiter, $modifier;
6797							}
6798							else {
6799	0						return join '', $ope, $delimiter, '(?:', $string, ')', $matched, $end_delimiter, $modifier;
6800							}
6801							}
6802
6803	0	0					my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
6804	0						my $metachar = qr/[\@\\\|[\]{^]/oxms;
6805
6806							# split regexp
6807	0						my @char = $string =~ /\G((?>
6808							[^\x80-\xFF\\\$\@\[\(]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
6809							\\x (?>[0-9A-Fa-f]{1,2}) \|
6810							\\ (?>[0-7]{2,3}) \|
6811							\\c [\x40-\x5F] \|
6812							\\x\{ (?>[0-9A-Fa-f]+) \} \|
6813							\\o\{ (?>[0-7]+) \} \|
6814							\\[bBNpP]\{ (?>[^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} \|
6815							\\ $q_char \|
6816							\$` \| \$\{`\} \| \$ (?>\s) PREMATCH \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} \|
6817							\$& \| \$\{&\} \| \$ (?>\s) MATCH \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} \|
6818							\$ (?>\s) POSTMATCH \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} \|
6819							[\$\@] $qq_variable \|
6820							\$ (?>\s* [0-9]+) \|
6821							\$ (?>\s) \{ (?>\s [0-9]+ \s*) \} \|
6822							\$ \$ (?![\w\{]) \|
6823							\$ (?>\s) \$ (?>\s) $qq_variable \|
6824							\[\^ \|
6825							\[\: (?>[a-z]+) :\] \|
6826							\[\:\^ (?>[a-z]+) :\] \|
6827							\(\? \|
6828							$q_char
6829							))/oxmsg;
6830
6831							# choice again delimiter
6832	0	0					if ($delimiter =~ / [\@:] /oxms) {
6833	0						my %octet = map {$_ => 1} @char;
	0
6834	0	0					if (not $octet{')'}) {
		0
		0
		0
6835	0						$delimiter = '(';
6836	0						$end_delimiter = ')';
6837							}
6838							elsif (not $octet{'}'}) {
6839	0						$delimiter = '{';
6840	0						$end_delimiter = '}';
6841							}
6842							elsif (not $octet{']'}) {
6843	0						$delimiter = '[';
6844	0						$end_delimiter = ']';
6845							}
6846							elsif (not $octet{'>'}) {
6847	0						$delimiter = '<';
6848	0						$end_delimiter = '>';
6849							}
6850							else {
6851	0						for my $char (qw( ! " $ % & * + - . / = ? ^ ` \| ~ ), "\x00".."\x1F", "\x7F", "\xFF") {
6852	0	0					if (not $octet{$char}) {
6853	0						$delimiter = $char;
6854	0						$end_delimiter = $char;
6855	0						last;
6856							}
6857							}
6858							}
6859							}
6860
6861	0						my $left_e = 0;
6862	0						my $right_e = 0;
6863	0						for (my $i=0; $i <= $#char; $i++) {
6864
6865							# "\L\u" --> "\u\L"
6866	0	0	0				if (($char[$i] eq '\L') and ($char[$i+1] eq '\u')) {
		0	0
		0
		0
		0
		0
6867	0						@char[$i,$i+1] = @char[$i+1,$i];
6868							}
6869
6870							# "\U\l" --> "\l\U"
6871							elsif (($char[$i] eq '\U') and ($char[$i+1] eq '\l')) {
6872	0						@char[$i,$i+1] = @char[$i+1,$i];
6873							}
6874
6875							# octal escape sequence
6876							elsif ($char[$i] =~ /\A \\o \{ ([0-7]+) \} \z/oxms) {
6877	0						$char[$i] = Eutf2::octchr($1);
6878							}
6879
6880							# hexadecimal escape sequence
6881							elsif ($char[$i] =~ /\A \\x \{ ([0-9A-Fa-f]+) \} \z/oxms) {
6882	0						$char[$i] = Eutf2::hexchr($1);
6883							}
6884
6885							# \b{...} --> b\{...}
6886							# \B{...} --> B\{...}
6887							# \N{CHARNAME} --> N\{CHARNAME}
6888							# \p{PROPERTY} --> p\{PROPERTY}
6889							# \P{PROPERTY} --> P\{PROPERTY}
6890							elsif ($char[$i] =~ /\A \\ ([bBNpP]) ( \{ ([^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} ) \z/oxms) {
6891	0						$char[$i] = $1 . '\\' . $2;
6892							}
6893
6894							# \p, \P, \X --> p, P, X
6895							elsif ($char[$i] =~ /\A \\ ( [pPX] ) \z/oxms) {
6896	0						$char[$i] = $1;
6897							}
6898
6899	0	0	0				if (0) {
		0	0
		0	0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
6900							}
6901
6902							# join separated multiple-octet
6903	0						elsif ($char[$i] =~ /\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms) {
6904	0	0	0				if ( ($i+3 <= $#char) and (grep(/\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms, @char[$i+1..$i+3]) == 3) and (CORE::eval(sprintf '"%s%s%s%s"', @char[$i..$i+3]) =~ /\A $q_char \z/oxms)) {
		0	0
		0	0
			0
			0
			0
6905	0						$char[$i] .= join '', splice @char, $i+1, 3;
6906							}
6907							elsif (($i+2 <= $#char) and (grep(/\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms, @char[$i+1..$i+2]) == 2) and (CORE::eval(sprintf '"%s%s%s"', @char[$i..$i+2]) =~ /\A $q_char \z/oxms)) {
6908	0						$char[$i] .= join '', splice @char, $i+1, 2;
6909							}
6910							elsif (($i+1 <= $#char) and (grep(/\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms, $char[$i+1 ]) == 1) and (CORE::eval(sprintf '"%s%s"', @char[$i..$i+1]) =~ /\A $q_char \z/oxms)) {
6911	0						$char[$i] .= join '', splice @char, $i+1, 1;
6912							}
6913							}
6914
6915							# open character class [...]
6916							elsif ($char[$i] eq '[') {
6917	0						my $left = $i;
6918
6919							# [] make die "Unmatched [] in regexp ...\n"
6920							# (and so on)
6921
6922	0	0					if ($char[$i+1] eq ']') {
6923	0						$i++;
6924							}
6925
6926	0						while (1) {
6927	0	0					if (++$i > $#char) {
6928	0						die __FILE__, ": Unmatched [] in regexp\n";
6929							}
6930	0	0					if ($char[$i] eq ']') {
6931	0						my $right = $i;
6932
6933							# [...]
6934	0	0					if (grep(/\A [\$\@]/oxms,@char[$left+1..$right-1]) >= 1) {
6935	0						splice @char, $left, $right-$left+1, sprintf(q{@{[Eutf2::charlist_qr(%s,'%s')]}}, join(',', map {qq_stuff($delimiter,$end_delimiter,$_)} @char[$left+1..$right-1]), $modifier);
	0
6936							}
6937							else {
6938	0						splice @char, $left, $right-$left+1, Eutf2::charlist_qr(@char[$left+1..$right-1], $modifier);
6939							}
6940
6941	0						$i = $left;
6942	0						last;
6943							}
6944							}
6945							}
6946
6947							# open character class [^...]
6948							elsif ($char[$i] eq '[^') {
6949	0						my $left = $i;
6950
6951							# [^] make die "Unmatched [] in regexp ...\n"
6952							# (and so on)
6953
6954	0	0					if ($char[$i+1] eq ']') {
6955	0						$i++;
6956							}
6957
6958	0						while (1) {
6959	0	0					if (++$i > $#char) {
6960	0						die __FILE__, ": Unmatched [] in regexp\n";
6961							}
6962	0	0					if ($char[$i] eq ']') {
6963	0						my $right = $i;
6964
6965							# [^...]
6966	0	0					if (grep(/\A [\$\@]/oxms,@char[$left+1..$right-1]) >= 1) {
6967	0						splice @char, $left, $right-$left+1, sprintf(q{@{[Eutf2::charlist_not_qr(%s,'%s')]}}, join(',', map {qq_stuff($delimiter,$end_delimiter,$_)} @char[$left+1..$right-1]), $modifier);
	0
6968							}
6969							else {
6970	0						splice @char, $left, $right-$left+1, Eutf2::charlist_not_qr(@char[$left+1..$right-1], $modifier);
6971							}
6972
6973	0						$i = $left;
6974	0						last;
6975							}
6976							}
6977							}
6978
6979							# rewrite character class or escape character
6980							elsif (my $char = character_class($char[$i],$modifier)) {
6981	0						$char[$i] = $char;
6982							}
6983
6984							# /i modifier
6985							elsif ($ignorecase and ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) and (Eutf2::uc($char[$i]) ne Eutf2::fc($char[$i]))) {
6986	0	0					if (CORE::length(Eutf2::fc($char[$i])) == 1) {
6987	0						$char[$i] = '[' . Eutf2::uc($char[$i]) . Eutf2::fc($char[$i]) . ']';
6988							}
6989							else {
6990	0						$char[$i] = '(?:' . Eutf2::uc($char[$i]) . '\|' . Eutf2::fc($char[$i]) . ')';
6991							}
6992							}
6993
6994							# \u \l \U \L \F \Q \E
6995							elsif ($char[$i] =~ /\A [<>] \z/oxms) {
6996	0	0					if ($right_e < $left_e) {
6997	0						$char[$i] = '\\' . $char[$i];
6998							}
6999							}
7000							elsif ($char[$i] eq '\u') {
7001	0						$char[$i] = '@{[Eutf2::ucfirst qq<';
7002	0						$left_e++;
7003							}
7004							elsif ($char[$i] eq '\l') {
7005	0						$char[$i] = '@{[Eutf2::lcfirst qq<';
7006	0						$left_e++;
7007							}
7008							elsif ($char[$i] eq '\U') {
7009	0						$char[$i] = '@{[Eutf2::uc qq<';
7010	0						$left_e++;
7011							}
7012							elsif ($char[$i] eq '\L') {
7013	0						$char[$i] = '@{[Eutf2::lc qq<';
7014	0						$left_e++;
7015							}
7016							elsif ($char[$i] eq '\F') {
7017	0						$char[$i] = '@{[Eutf2::fc qq<';
7018	0						$left_e++;
7019							}
7020							elsif ($char[$i] eq '\Q') {
7021	0						$char[$i] = '@{[CORE::quotemeta qq<';
7022	0						$left_e++;
7023							}
7024							elsif ($char[$i] eq '\E') {
7025	0	0					if ($right_e < $left_e) {
7026	0						$char[$i] = '>]}';
7027	0						$right_e++;
7028							}
7029							else {
7030	0						$char[$i] = '';
7031							}
7032							}
7033							elsif ($char[$i] eq '\Q') {
7034	0						while (1) {
7035	0	0					if (++$i > $#char) {
7036	0						last;
7037							}
7038	0	0					if ($char[$i] eq '\E') {
7039	0						last;
7040							}
7041							}
7042							}
7043							elsif ($char[$i] eq '\E') {
7044							}
7045
7046							# $0 --> $0
7047							elsif ($char[$i] =~ /\A \$ 0 \z/oxms) {
7048	0	0					if ($ignorecase) {
7049	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7050							}
7051							}
7052							elsif ($char[$i] =~ /\A \$ \{ (?>\s) 0 (?>\s) \} \z/oxms) {
7053	0	0					if ($ignorecase) {
7054	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7055							}
7056							}
7057
7058							# $$ --> $$
7059							elsif ($char[$i] =~ /\A \$\$ \z/oxms) {
7060							}
7061
7062							# $1, $2, $3 --> $2, $3, $4 after s/// with multibyte anchoring
7063							# $1, $2, $3 --> $1, $2, $3 otherwise
7064							elsif ($char[$i] =~ /\A \$ ((?>[1-9][0-9]*)) \z/oxms) {
7065	0						$char[$i] = e_capture($1);
7066	0	0					if ($ignorecase) {
7067	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7068							}
7069							}
7070							elsif ($char[$i] =~ /\A \$ \{ (?>\s) ((?>[1-9][0-9])) (?>\s*) \} \z/oxms) {
7071	0						$char[$i] = e_capture($1);
7072	0	0					if ($ignorecase) {
7073	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7074							}
7075							}
7076
7077							# $$foo[ ... ] --> $ $foo->[ ... ]
7078							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \[ (?:$qq_bracket)*? \] ) \z/oxms) {
7079	0						$char[$i] = e_capture($1.'->'.$2);
7080	0	0					if ($ignorecase) {
7081	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7082							}
7083							}
7084
7085							# $$foo{ ... } --> $ $foo->{ ... }
7086							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \{ (?:$qq_brace)*? \} ) \z/oxms) {
7087	0						$char[$i] = e_capture($1.'->'.$2);
7088	0	0					if ($ignorecase) {
7089	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7090							}
7091							}
7092
7093							# $$foo
7094							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) \z/oxms) {
7095	0						$char[$i] = e_capture($1);
7096	0	0					if ($ignorecase) {
7097	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7098							}
7099							}
7100
7101							# $`, ${`}, $PREMATCH, ${PREMATCH}, ${^PREMATCH} --> Eutf2::PREMATCH()
7102							elsif ($char[$i] =~ /\A ( \$` \| \$\{`\} \| \$ (?>\s) PREMATCH \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} ) \z/oxmsgc) {
7103	0	0					if ($ignorecase) {
7104	0						$char[$i] = '@{[Eutf2::ignorecase(Eutf2::PREMATCH())]}';
7105							}
7106							else {
7107	0						$char[$i] = '@{[Eutf2::PREMATCH()]}';
7108							}
7109							}
7110
7111							# $&, ${&}, $MATCH, ${MATCH}, ${^MATCH} --> Eutf2::MATCH()
7112							elsif ($char[$i] =~ /\A ( \$& \| \$\{&\} \| \$ (?>\s) MATCH \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} ) \z/oxmsgc) {
7113	0	0					if ($ignorecase) {
7114	0						$char[$i] = '@{[Eutf2::ignorecase(Eutf2::MATCH())]}';
7115							}
7116							else {
7117	0						$char[$i] = '@{[Eutf2::MATCH()]}';
7118							}
7119							}
7120
7121							# $POSTMATCH, ${POSTMATCH}, ${^POSTMATCH} --> Eutf2::POSTMATCH()
7122							elsif ($char[$i] =~ /\A ( \$ (?>\s) POSTMATCH \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} ) \z/oxmsgc) {
7123	0	0					if ($ignorecase) {
7124	0						$char[$i] = '@{[Eutf2::ignorecase(Eutf2::POSTMATCH())]}';
7125							}
7126							else {
7127	0						$char[$i] = '@{[Eutf2::POSTMATCH()]}';
7128							}
7129							}
7130
7131							# ${ foo }
7132							elsif ($char[$i] =~ /\A \$ (?>\s) \{ ((?> \s [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* \s* )) \} \z/oxms) {
7133	0	0					if ($ignorecase) {
7134	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7135							}
7136							}
7137
7138							# ${ ... }
7139							elsif ($char[$i] =~ /\A \$ (?>\s*) \{ ( .+ ) \} \z/oxms) {
7140	0						$char[$i] = e_capture($1);
7141	0	0					if ($ignorecase) {
7142	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7143							}
7144							}
7145
7146							# $scalar or @array
7147							elsif ($char[$i] =~ /\A [\$\@].+ /oxms) {
7148	0						$char[$i] = e_string($char[$i]);
7149	0	0					if ($ignorecase) {
7150	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7151							}
7152							}
7153
7154							# quote character before ? + * {
7155							elsif (($i >= 1) and ($char[$i] =~ /\A [\?\+\*\{] \z/oxms)) {
7156	0	0	0				if ($char[$i-1] =~ /\A (?:[\x00-\xFF]\|\\[0-7]{2,3}\|\\x[0-9-A-Fa-f]{1,2}) \z/oxms) {
		0
7157							}
7158							elsif (($ope =~ /\A m? \z/oxms) and ($delimiter eq '?')) {
7159	0						my $char = $char[$i-1];
7160	0	0					if ($char[$i] eq '{') {
7161	0						die __FILE__, qq{: "MULTIBYTE{n}" should be "(MULTIBYTE){n}" in m?? (and shift \$1,\$2,\$3,...) ($char){n}\n};
7162							}
7163							else {
7164	0						die __FILE__, qq{: "MULTIBYTE$char[$i]" should be "(MULTIBYTE)$char[$i]" in m?? (and shift \$1,\$2,\$3,...) ($char)$char[$i]\n};
7165							}
7166							}
7167							else {
7168	0						$char[$i-1] = '(?:' . $char[$i-1] . ')';
7169							}
7170							}
7171							}
7172
7173							# make regexp string
7174	0						$modifier =~ tr/i//d;
7175	0	0					if ($left_e > $right_e) {
7176	0	0	0				if (($ope =~ /\A m? \z/oxms) and ($delimiter eq '?')) {
7177	0						return join '', $ope, $delimiter, $anchor, @char, '>]}' x ($left_e - $right_e), $matched, $end_delimiter, $modifier;
7178							}
7179							else {
7180	0						return join '', $ope, $delimiter, $anchor, '(?:', @char, '>]}' x ($left_e - $right_e), ')', $matched, $end_delimiter, $modifier;
7181							}
7182							}
7183	0	0	0				if (($ope =~ /\A m? \z/oxms) and ($delimiter eq '?')) {
7184	0						return join '', $ope, $delimiter, $anchor, @char, $matched, $end_delimiter, $modifier;
7185							}
7186							else {
7187	0						return join '', $ope, $delimiter, $anchor, '(?:', @char, ')', $matched, $end_delimiter, $modifier;
7188							}
7189							}
7190
7191							#
7192							# double quote stuff
7193							#
7194							sub qq_stuff {
7195	0			0	0		my($delimiter,$end_delimiter,$stuff) = @_;
7196
7197							# scalar variable or array variable
7198	0	0					if ($stuff =~ /\A [\$\@] /oxms) {
7199	0						return $stuff;
7200							}
7201
7202							# quote by delimiter
7203	0						my %octet = map {$_ => 1} ($stuff =~ /\G ([\x00-\xFF]) /oxmsg);
	0
7204	0						for my $char (qw( ! " $ % & * + - . / : = ? @ ^ ` \| ~ ), "\x00".."\x1F", "\x7F", "\xFF") {
7205	0	0					next if $char eq $delimiter;
7206	0	0					next if $char eq $end_delimiter;
7207	0	0					if (not $octet{$char}) {
7208	0						return join '', 'qq', $char, $stuff, $char;
7209							}
7210							}
7211	0						return join '', 'qq', '<', $stuff, '>';
7212							}
7213
7214							#
7215							# escape regexp (m'', qr'', and m''b, qr''b)
7216							#
7217							sub e_qr_q {
7218	0			0	0		my($ope,$delimiter,$end_delimiter,$string,$modifier) = @_;
7219	0		0				$modifier \|\|= '';
7220
7221	0						$modifier =~ tr/p//d;
7222	0	0					if ($modifier =~ /([adlu])/oxms) {
7223	0						my $line = 0;
7224	0						for (my $i=0; my($package,$filename,$use_line,$subroutine) = caller($i); $i++) {
7225	0	0					if ($filename ne __FILE__) {
7226	0						$line = $use_line + (CORE::substr($_,0,pos($_)) =~ tr/\n//) + 1;
7227	0						last;
7228							}
7229							}
7230	0						die qq{Unsupported modifier "$1" used at line $line.\n};
7231							}
7232
7233	0						$slash = 'div';
7234
7235							# literal null string pattern
7236	0	0					if ($string eq '') {
		0
7237	0						$modifier =~ tr/bB//d;
7238	0						$modifier =~ tr/i//d;
7239	0						return join '', $ope, $delimiter, $end_delimiter, $modifier;
7240							}
7241
7242							# with /b /B modifier
7243							elsif ($modifier =~ tr/bB//d) {
7244	0						return e_qr_qb($ope,$delimiter,$end_delimiter,$string,$modifier);
7245							}
7246
7247							# without /b /B modifier
7248							else {
7249	0						return e_qr_qt($ope,$delimiter,$end_delimiter,$string,$modifier);
7250							}
7251							}
7252
7253							#
7254							# escape regexp (m'', qr'')
7255							#
7256							sub e_qr_qt {
7257	0			0	0		my($ope,$delimiter,$end_delimiter,$string,$modifier) = @_;
7258
7259	0	0					my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
7260
7261							# split regexp
7262	0						my @char = $string =~ /\G((?>
7263							[^\x80-\xFF\\\[\$\@\/] \|
7264							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
7265							\[\^ \|
7266							\[\: (?>[a-z]+) \:\] \|
7267							\[\:\^ (?>[a-z]+) \:\] \|
7268							[\$\@\/] \|
7269							\\ (?:$q_char) \|
7270							(?:$q_char)
7271							))/oxmsg;
7272
7273							# unescape character
7274	0						for (my $i=0; $i <= $#char; $i++) {
7275	0	0	0				if (0) {
		0	0
		0	0
		0
		0
		0
7276							}
7277
7278							# open character class [...]
7279	0						elsif ($char[$i] eq '[') {
7280	0						my $left = $i;
7281	0	0					if ($char[$i+1] eq ']') {
7282	0						$i++;
7283							}
7284	0						while (1) {
7285	0	0					if (++$i > $#char) {
7286	0						die __FILE__, ": Unmatched [] in regexp\n";
7287							}
7288	0	0					if ($char[$i] eq ']') {
7289	0						my $right = $i;
7290
7291							# [...]
7292	0						splice @char, $left, $right-$left+1, Eutf2::charlist_qr(@char[$left+1..$right-1], $modifier);
7293
7294	0						$i = $left;
7295	0						last;
7296							}
7297							}
7298							}
7299
7300							# open character class [^...]
7301							elsif ($char[$i] eq '[^') {
7302	0						my $left = $i;
7303	0	0					if ($char[$i+1] eq ']') {
7304	0						$i++;
7305							}
7306	0						while (1) {
7307	0	0					if (++$i > $#char) {
7308	0						die __FILE__, ": Unmatched [] in regexp\n";
7309							}
7310	0	0					if ($char[$i] eq ']') {
7311	0						my $right = $i;
7312
7313							# [^...]
7314	0						splice @char, $left, $right-$left+1, Eutf2::charlist_not_qr(@char[$left+1..$right-1], $modifier);
7315
7316	0						$i = $left;
7317	0						last;
7318							}
7319							}
7320							}
7321
7322							# escape $ @ / and \
7323							elsif ($char[$i] =~ /\A [\$\@\/\\] \z/oxms) {
7324	0						$char[$i] = '\\' . $char[$i];
7325							}
7326
7327							# rewrite character class or escape character
7328							elsif (my $char = character_class($char[$i],$modifier)) {
7329	0						$char[$i] = $char;
7330							}
7331
7332							# /i modifier
7333							elsif ($ignorecase and ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) and (Eutf2::uc($char[$i]) ne Eutf2::fc($char[$i]))) {
7334	0	0					if (CORE::length(Eutf2::fc($char[$i])) == 1) {
7335	0						$char[$i] = '[' . Eutf2::uc($char[$i]) . Eutf2::fc($char[$i]) . ']';
7336							}
7337							else {
7338	0						$char[$i] = '(?:' . Eutf2::uc($char[$i]) . '\|' . Eutf2::fc($char[$i]) . ')';
7339							}
7340							}
7341
7342							# quote character before ? + * {
7343							elsif (($i >= 1) and ($char[$i] =~ /\A [\?\+\*\{] \z/oxms)) {
7344	0	0					if ($char[$i-1] =~ /\A [\x00-\xFF] \z/oxms) {
7345							}
7346							else {
7347	0						$char[$i-1] = '(?:' . $char[$i-1] . ')';
7348							}
7349							}
7350							}
7351
7352	0						$delimiter = '/';
7353	0						$end_delimiter = '/';
7354
7355	0						$modifier =~ tr/i//d;
7356	0						return join '', $ope, $delimiter, $anchor, '(?:', @char, ')', $matched, $end_delimiter, $modifier;
7357							}
7358
7359							#
7360							# escape regexp (m''b, qr''b)
7361							#
7362							sub e_qr_qb {
7363	0			0	0		my($ope,$delimiter,$end_delimiter,$string,$modifier) = @_;
7364
7365							# split regexp
7366	0						my @char = $string =~ /\G ((?>[^\\]\|\\\\\|[\x00-\xFF])) /oxmsg;
7367
7368							# unescape character
7369	0						for (my $i=0; $i <= $#char; $i++) {
7370	0	0					if (0) {
		0
7371							}
7372
7373							# remain \\
7374	0						elsif ($char[$i] eq '\\\\') {
7375							}
7376
7377							# escape $ @ / and \
7378							elsif ($char[$i] =~ /\A [\$\@\/\\] \z/oxms) {
7379	0						$char[$i] = '\\' . $char[$i];
7380							}
7381							}
7382
7383	0						$delimiter = '/';
7384	0						$end_delimiter = '/';
7385	0						return join '', $ope, $delimiter, '(?:', @char, ')', $matched, $end_delimiter, $modifier;
7386							}
7387
7388							#
7389							# escape regexp (s/here//)
7390							#
7391							sub e_s1 {
7392	0			0	0		my($ope,$delimiter,$end_delimiter,$string,$modifier) = @_;
7393	0		0				$modifier \|\|= '';
7394
7395	0						$modifier =~ tr/p//d;
7396	0	0					if ($modifier =~ /([adlu])/oxms) {
7397	0						my $line = 0;
7398	0						for (my $i=0; my($package,$filename,$use_line,$subroutine) = caller($i); $i++) {
7399	0	0					if ($filename ne __FILE__) {
7400	0						$line = $use_line + (CORE::substr($_,0,pos($_)) =~ tr/\n//) + 1;
7401	0						last;
7402							}
7403							}
7404	0						die qq{Unsupported modifier "$1" used at line $line.\n};
7405							}
7406
7407	0						$slash = 'div';
7408
7409							# literal null string pattern
7410	0	0					if ($string eq '') {
		0
7411	0						$modifier =~ tr/bB//d;
7412	0						$modifier =~ tr/i//d;
7413	0						return join '', $ope, $delimiter, $end_delimiter, $modifier;
7414							}
7415
7416							# /b /B modifier
7417							elsif ($modifier =~ tr/bB//d) {
7418
7419							# choice again delimiter
7420	0	0					if ($delimiter =~ / [\@:] /oxms) {
7421	0						my @char = $string =~ /\G ([\x00-\xFF]) /oxmsg;
7422	0						my %octet = map {$_ => 1} @char;
	0
7423	0	0					if (not $octet{')'}) {
		0
		0
		0
7424	0						$delimiter = '(';
7425	0						$end_delimiter = ')';
7426							}
7427							elsif (not $octet{'}'}) {
7428	0						$delimiter = '{';
7429	0						$end_delimiter = '}';
7430							}
7431							elsif (not $octet{']'}) {
7432	0						$delimiter = '[';
7433	0						$end_delimiter = ']';
7434							}
7435							elsif (not $octet{'>'}) {
7436	0						$delimiter = '<';
7437	0						$end_delimiter = '>';
7438							}
7439							else {
7440	0						for my $char (qw( ! " $ % & * + - . / = ? ^ ` \| ~ ), "\x00".."\x1F", "\x7F", "\xFF") {
7441	0	0					if (not $octet{$char}) {
7442	0						$delimiter = $char;
7443	0						$end_delimiter = $char;
7444	0						last;
7445							}
7446							}
7447							}
7448							}
7449
7450	0						my $prematch = '';
7451	0						return join '', $ope, $delimiter, $prematch, '(?:', $string, ')', $matched, $end_delimiter, $modifier;
7452							}
7453
7454	0	0					my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
7455	0						my $metachar = qr/[\@\\\|[\]{^]/oxms;
7456
7457							# split regexp
7458	0						my @char = $string =~ /\G((?>
7459							[^\x80-\xFF\\\$\@\[\(]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
7460							\\ (?>[1-9][0-9]*) \|
7461							\\g (?>\s) (?>[1-9][0-9]) \|
7462							\\g (?>\s) \{ (?>\s) (?>[1-9][0-9]) (?>\s) \} \|
7463							\\g (?>\s) \{ (?>\s) - (?>\s) (?>[1-9][0-9]) (?>\s*) \} \|
7464							\\x (?>[0-9A-Fa-f]{1,2}) \|
7465							\\ (?>[0-7]{2,3}) \|
7466							\\c [\x40-\x5F] \|
7467							\\x\{ (?>[0-9A-Fa-f]+) \} \|
7468							\\o\{ (?>[0-7]+) \} \|
7469							\\[bBNpP]\{ (?>[^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} \|
7470							\\ $q_char \|
7471							\$` \| \$\{`\} \| \$ (?>\s) PREMATCH \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} \|
7472							\$& \| \$\{&\} \| \$ (?>\s) MATCH \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} \|
7473							\$ (?>\s) POSTMATCH \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} \|
7474							[\$\@] $qq_variable \|
7475							\$ (?>\s* [0-9]+) \|
7476							\$ (?>\s) \{ (?>\s [0-9]+ \s*) \} \|
7477							\$ \$ (?![\w\{]) \|
7478							\$ (?>\s) \$ (?>\s) $qq_variable \|
7479							\[\^ \|
7480							\[\: (?>[a-z]+) :\] \|
7481							\[\:\^ (?>[a-z]+) :\] \|
7482							\(\? \|
7483							$q_char
7484							))/oxmsg;
7485
7486							# choice again delimiter
7487	0	0					if ($delimiter =~ / [\@:] /oxms) {
7488	0						my %octet = map {$_ => 1} @char;
	0
7489	0	0					if (not $octet{')'}) {
		0
		0
		0
7490	0						$delimiter = '(';
7491	0						$end_delimiter = ')';
7492							}
7493							elsif (not $octet{'}'}) {
7494	0						$delimiter = '{';
7495	0						$end_delimiter = '}';
7496							}
7497							elsif (not $octet{']'}) {
7498	0						$delimiter = '[';
7499	0						$end_delimiter = ']';
7500							}
7501							elsif (not $octet{'>'}) {
7502	0						$delimiter = '<';
7503	0						$end_delimiter = '>';
7504							}
7505							else {
7506	0						for my $char (qw( ! " $ % & * + - . / = ? ^ ` \| ~ ), "\x00".."\x1F", "\x7F", "\xFF") {
7507	0	0					if (not $octet{$char}) {
7508	0						$delimiter = $char;
7509	0						$end_delimiter = $char;
7510	0						last;
7511							}
7512							}
7513							}
7514							}
7515
7516							# count '('
7517	0						my $parens = grep { $_ eq '(' } @char;
	0
7518
7519	0						my $left_e = 0;
7520	0						my $right_e = 0;
7521	0						for (my $i=0; $i <= $#char; $i++) {
7522
7523							# "\L\u" --> "\u\L"
7524	0	0	0				if (($char[$i] eq '\L') and ($char[$i+1] eq '\u')) {
		0	0
		0
		0
		0
		0
7525	0						@char[$i,$i+1] = @char[$i+1,$i];
7526							}
7527
7528							# "\U\l" --> "\l\U"
7529							elsif (($char[$i] eq '\U') and ($char[$i+1] eq '\l')) {
7530	0						@char[$i,$i+1] = @char[$i+1,$i];
7531							}
7532
7533							# octal escape sequence
7534							elsif ($char[$i] =~ /\A \\o \{ ([0-7]+) \} \z/oxms) {
7535	0						$char[$i] = Eutf2::octchr($1);
7536							}
7537
7538							# hexadecimal escape sequence
7539							elsif ($char[$i] =~ /\A \\x \{ ([0-9A-Fa-f]+) \} \z/oxms) {
7540	0						$char[$i] = Eutf2::hexchr($1);
7541							}
7542
7543							# \b{...} --> b\{...}
7544							# \B{...} --> B\{...}
7545							# \N{CHARNAME} --> N\{CHARNAME}
7546							# \p{PROPERTY} --> p\{PROPERTY}
7547							# \P{PROPERTY} --> P\{PROPERTY}
7548							elsif ($char[$i] =~ /\A \\ ([bBNpP]) ( \{ ([^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} ) \z/oxms) {
7549	0						$char[$i] = $1 . '\\' . $2;
7550							}
7551
7552							# \p, \P, \X --> p, P, X
7553							elsif ($char[$i] =~ /\A \\ ( [pPX] ) \z/oxms) {
7554	0						$char[$i] = $1;
7555							}
7556
7557	0	0	0				if (0) {
		0	0
		0	0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
7558							}
7559
7560							# join separated multiple-octet
7561	0						elsif ($char[$i] =~ /\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms) {
7562	0	0	0				if ( ($i+3 <= $#char) and (grep(/\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms, @char[$i+1..$i+3]) == 3) and (CORE::eval(sprintf '"%s%s%s%s"', @char[$i..$i+3]) =~ /\A $q_char \z/oxms)) {
		0	0
		0	0
			0
			0
			0
7563	0						$char[$i] .= join '', splice @char, $i+1, 3;
7564							}
7565							elsif (($i+2 <= $#char) and (grep(/\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms, @char[$i+1..$i+2]) == 2) and (CORE::eval(sprintf '"%s%s%s"', @char[$i..$i+2]) =~ /\A $q_char \z/oxms)) {
7566	0						$char[$i] .= join '', splice @char, $i+1, 2;
7567							}
7568							elsif (($i+1 <= $#char) and (grep(/\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms, $char[$i+1 ]) == 1) and (CORE::eval(sprintf '"%s%s"', @char[$i..$i+1]) =~ /\A $q_char \z/oxms)) {
7569	0						$char[$i] .= join '', splice @char, $i+1, 1;
7570							}
7571							}
7572
7573							# open character class [...]
7574							elsif ($char[$i] eq '[') {
7575	0						my $left = $i;
7576	0	0					if ($char[$i+1] eq ']') {
7577	0						$i++;
7578							}
7579	0						while (1) {
7580	0	0					if (++$i > $#char) {
7581	0						die __FILE__, ": Unmatched [] in regexp\n";
7582							}
7583	0	0					if ($char[$i] eq ']') {
7584	0						my $right = $i;
7585
7586							# [...]
7587	0	0					if (grep(/\A [\$\@]/oxms,@char[$left+1..$right-1]) >= 1) {
7588	0						splice @char, $left, $right-$left+1, sprintf(q{@{[Eutf2::charlist_qr(%s,'%s')]}}, join(',', map {qq_stuff($delimiter,$end_delimiter,$_)} @char[$left+1..$right-1]), $modifier);
	0
7589							}
7590							else {
7591	0						splice @char, $left, $right-$left+1, Eutf2::charlist_qr(@char[$left+1..$right-1], $modifier);
7592							}
7593
7594	0						$i = $left;
7595	0						last;
7596							}
7597							}
7598							}
7599
7600							# open character class [^...]
7601							elsif ($char[$i] eq '[^') {
7602	0						my $left = $i;
7603	0	0					if ($char[$i+1] eq ']') {
7604	0						$i++;
7605							}
7606	0						while (1) {
7607	0	0					if (++$i > $#char) {
7608	0						die __FILE__, ": Unmatched [] in regexp\n";
7609							}
7610	0	0					if ($char[$i] eq ']') {
7611	0						my $right = $i;
7612
7613							# [^...]
7614	0	0					if (grep(/\A [\$\@]/oxms,@char[$left+1..$right-1]) >= 1) {
7615	0						splice @char, $left, $right-$left+1, sprintf(q{@{[Eutf2::charlist_not_qr(%s,'%s')]}}, join(',', map {qq_stuff($delimiter,$end_delimiter,$_)} @char[$left+1..$right-1]), $modifier);
	0
7616							}
7617							else {
7618	0						splice @char, $left, $right-$left+1, Eutf2::charlist_not_qr(@char[$left+1..$right-1], $modifier);
7619							}
7620
7621	0						$i = $left;
7622	0						last;
7623							}
7624							}
7625							}
7626
7627							# rewrite character class or escape character
7628							elsif (my $char = character_class($char[$i],$modifier)) {
7629	0						$char[$i] = $char;
7630							}
7631
7632							# /i modifier
7633							elsif ($ignorecase and ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) and (Eutf2::uc($char[$i]) ne Eutf2::fc($char[$i]))) {
7634	0	0					if (CORE::length(Eutf2::fc($char[$i])) == 1) {
7635	0						$char[$i] = '[' . Eutf2::uc($char[$i]) . Eutf2::fc($char[$i]) . ']';
7636							}
7637							else {
7638	0						$char[$i] = '(?:' . Eutf2::uc($char[$i]) . '\|' . Eutf2::fc($char[$i]) . ')';
7639							}
7640							}
7641
7642							# \u \l \U \L \F \Q \E
7643							elsif ($char[$i] =~ /\A [<>] \z/oxms) {
7644	0	0					if ($right_e < $left_e) {
7645	0						$char[$i] = '\\' . $char[$i];
7646							}
7647							}
7648							elsif ($char[$i] eq '\u') {
7649	0						$char[$i] = '@{[Eutf2::ucfirst qq<';
7650	0						$left_e++;
7651							}
7652							elsif ($char[$i] eq '\l') {
7653	0						$char[$i] = '@{[Eutf2::lcfirst qq<';
7654	0						$left_e++;
7655							}
7656							elsif ($char[$i] eq '\U') {
7657	0						$char[$i] = '@{[Eutf2::uc qq<';
7658	0						$left_e++;
7659							}
7660							elsif ($char[$i] eq '\L') {
7661	0						$char[$i] = '@{[Eutf2::lc qq<';
7662	0						$left_e++;
7663							}
7664							elsif ($char[$i] eq '\F') {
7665	0						$char[$i] = '@{[Eutf2::fc qq<';
7666	0						$left_e++;
7667							}
7668							elsif ($char[$i] eq '\Q') {
7669	0						$char[$i] = '@{[CORE::quotemeta qq<';
7670	0						$left_e++;
7671							}
7672							elsif ($char[$i] eq '\E') {
7673	0	0					if ($right_e < $left_e) {
7674	0						$char[$i] = '>]}';
7675	0						$right_e++;
7676							}
7677							else {
7678	0						$char[$i] = '';
7679							}
7680							}
7681							elsif ($char[$i] eq '\Q') {
7682	0						while (1) {
7683	0	0					if (++$i > $#char) {
7684	0						last;
7685							}
7686	0	0					if ($char[$i] eq '\E') {
7687	0						last;
7688							}
7689							}
7690							}
7691							elsif ($char[$i] eq '\E') {
7692							}
7693
7694							# \0 --> \0
7695							elsif ($char[$i] =~ /\A \\ (?>\s*) 0 \z/oxms) {
7696							}
7697
7698							# \g{N}, \g{-N}
7699
7700							# P.108 Using Simple Patterns
7701							# in Chapter 7: In the World of Regular Expressions
7702							# of ISBN 978-0-596-52010-6 Learning Perl, Fifth Edition
7703
7704							# P.221 Capturing
7705							# in Chapter 5: Pattern Matching
7706							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
7707
7708							# \g{-1}, \g{-2}, \g{-3} --> \g{-1}, \g{-2}, \g{-3}
7709							elsif ($char[$i] =~ /\A \\g (?>\s) \{ (?>\s) - (?>\s) ((?>[1-9][0-9])) (?>\s*) \} \z/oxms) {
7710							}
7711
7712							# \g{1}, \g{2}, \g{3} --> \g{2}, \g{3}, \g{4} (only when multibyte anchoring is enable)
7713							elsif ($char[$i] =~ /\A \\g (?>\s) \{ (?>\s) ((?>[1-9][0-9])) (?>\s) \} \z/oxms) {
7714							}
7715
7716							# \g1, \g2, \g3 --> \g2, \g3, \g4 (only when multibyte anchoring is enable)
7717							elsif ($char[$i] =~ /\A \\g (?>\s) ((?>[1-9][0-9])) \z/oxms) {
7718							}
7719
7720							# \1, \2, \3 --> \2, \3, \4 (only when multibyte anchoring is enable)
7721							elsif ($char[$i] =~ /\A \\ (?>\s) ((?>[1-9][0-9])) \z/oxms) {
7722							}
7723
7724							# $0 --> $0
7725							elsif ($char[$i] =~ /\A \$ 0 \z/oxms) {
7726	0	0					if ($ignorecase) {
7727	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7728							}
7729							}
7730							elsif ($char[$i] =~ /\A \$ \{ (?>\s) 0 (?>\s) \} \z/oxms) {
7731	0	0					if ($ignorecase) {
7732	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7733							}
7734							}
7735
7736							# $$ --> $$
7737							elsif ($char[$i] =~ /\A \$\$ \z/oxms) {
7738							}
7739
7740							# $1, $2, $3 --> $2, $3, $4 after s/// with multibyte anchoring
7741							# $1, $2, $3 --> $1, $2, $3 otherwise
7742							elsif ($char[$i] =~ /\A \$ ((?>[1-9][0-9]*)) \z/oxms) {
7743	0						$char[$i] = e_capture($1);
7744	0	0					if ($ignorecase) {
7745	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7746							}
7747							}
7748							elsif ($char[$i] =~ /\A \$ \{ (?>\s) ((?>[1-9][0-9])) (?>\s*) \} \z/oxms) {
7749	0						$char[$i] = e_capture($1);
7750	0	0					if ($ignorecase) {
7751	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7752							}
7753							}
7754
7755							# $$foo[ ... ] --> $ $foo->[ ... ]
7756							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \[ (?:$qq_bracket)*? \] ) \z/oxms) {
7757	0						$char[$i] = e_capture($1.'->'.$2);
7758	0	0					if ($ignorecase) {
7759	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7760							}
7761							}
7762
7763							# $$foo{ ... } --> $ $foo->{ ... }
7764							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \{ (?:$qq_brace)*? \} ) \z/oxms) {
7765	0						$char[$i] = e_capture($1.'->'.$2);
7766	0	0					if ($ignorecase) {
7767	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7768							}
7769							}
7770
7771							# $$foo
7772							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) \z/oxms) {
7773	0						$char[$i] = e_capture($1);
7774	0	0					if ($ignorecase) {
7775	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7776							}
7777							}
7778
7779							# $`, ${`}, $PREMATCH, ${PREMATCH}, ${^PREMATCH} --> Eutf2::PREMATCH()
7780							elsif ($char[$i] =~ /\A ( \$` \| \$\{`\} \| \$ (?>\s) PREMATCH \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} ) \z/oxmsgc) {
7781	0	0					if ($ignorecase) {
7782	0						$char[$i] = '@{[Eutf2::ignorecase(Eutf2::PREMATCH())]}';
7783							}
7784							else {
7785	0						$char[$i] = '@{[Eutf2::PREMATCH()]}';
7786							}
7787							}
7788
7789							# $&, ${&}, $MATCH, ${MATCH}, ${^MATCH} --> Eutf2::MATCH()
7790							elsif ($char[$i] =~ /\A ( \$& \| \$\{&\} \| \$ (?>\s) MATCH \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} ) \z/oxmsgc) {
7791	0	0					if ($ignorecase) {
7792	0						$char[$i] = '@{[Eutf2::ignorecase(Eutf2::MATCH())]}';
7793							}
7794							else {
7795	0						$char[$i] = '@{[Eutf2::MATCH()]}';
7796							}
7797							}
7798
7799							# $POSTMATCH, ${POSTMATCH}, ${^POSTMATCH} --> Eutf2::POSTMATCH()
7800							elsif ($char[$i] =~ /\A ( \$ (?>\s) POSTMATCH \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} ) \z/oxmsgc) {
7801	0	0					if ($ignorecase) {
7802	0						$char[$i] = '@{[Eutf2::ignorecase(Eutf2::POSTMATCH())]}';
7803							}
7804							else {
7805	0						$char[$i] = '@{[Eutf2::POSTMATCH()]}';
7806							}
7807							}
7808
7809							# ${ foo }
7810							elsif ($char[$i] =~ /\A \$ (?>\s) \{ ((?> \s [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* \s* )) \} \z/oxms) {
7811	0	0					if ($ignorecase) {
7812	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7813							}
7814							}
7815
7816							# ${ ... }
7817							elsif ($char[$i] =~ /\A \$ (?>\s*) \{ ( .+ ) \} \z/oxms) {
7818	0						$char[$i] = e_capture($1);
7819	0	0					if ($ignorecase) {
7820	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7821							}
7822							}
7823
7824							# $scalar or @array
7825							elsif ($char[$i] =~ /\A [\$\@].+ /oxms) {
7826	0						$char[$i] = e_string($char[$i]);
7827	0	0					if ($ignorecase) {
7828	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
7829							}
7830							}
7831
7832							# quote character before ? + * {
7833							elsif (($i >= 1) and ($char[$i] =~ /\A [\?\+\*\{] \z/oxms)) {
7834	0	0					if ($char[$i-1] =~ /\A (?:[\x00-\xFF]\|\\[0-7]{2,3}\|\\x[0-9-A-Fa-f]{1,2}) \z/oxms) {
7835							}
7836							else {
7837	0						$char[$i-1] = '(?:' . $char[$i-1] . ')';
7838							}
7839							}
7840							}
7841
7842							# make regexp string
7843	0						my $prematch = '';
7844	0						$modifier =~ tr/i//d;
7845	0	0					if ($left_e > $right_e) {
7846	0						return join '', $ope, $delimiter, $prematch, '(?:', @char, '>]}' x ($left_e - $right_e), ')', $matched, $end_delimiter, $modifier;
7847							}
7848	0						return join '', $ope, $delimiter, $prematch, '(?:', @char, ')', $matched, $end_delimiter, $modifier;
7849							}
7850
7851							#
7852							# escape regexp (s'here'' or s'here''b)
7853							#
7854							sub e_s1_q {
7855	0			0	0		my($ope,$delimiter,$end_delimiter,$string,$modifier) = @_;
7856	0		0				$modifier \|\|= '';
7857
7858	0						$modifier =~ tr/p//d;
7859	0	0					if ($modifier =~ /([adlu])/oxms) {
7860	0						my $line = 0;
7861	0						for (my $i=0; my($package,$filename,$use_line,$subroutine) = caller($i); $i++) {
7862	0	0					if ($filename ne __FILE__) {
7863	0						$line = $use_line + (CORE::substr($_,0,pos($_)) =~ tr/\n//) + 1;
7864	0						last;
7865							}
7866							}
7867	0						die qq{Unsupported modifier "$1" used at line $line.\n};
7868							}
7869
7870	0						$slash = 'div';
7871
7872							# literal null string pattern
7873	0	0					if ($string eq '') {
		0
7874	0						$modifier =~ tr/bB//d;
7875	0						$modifier =~ tr/i//d;
7876	0						return join '', $ope, $delimiter, $end_delimiter, $modifier;
7877							}
7878
7879							# with /b /B modifier
7880							elsif ($modifier =~ tr/bB//d) {
7881	0						return e_s1_qb($ope,$delimiter,$end_delimiter,$string,$modifier);
7882							}
7883
7884							# without /b /B modifier
7885							else {
7886	0						return e_s1_qt($ope,$delimiter,$end_delimiter,$string,$modifier);
7887							}
7888							}
7889
7890							#
7891							# escape regexp (s'here'')
7892							#
7893							sub e_s1_qt {
7894	0			0	0		my($ope,$delimiter,$end_delimiter,$string,$modifier) = @_;
7895
7896	0	0					my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
7897
7898							# split regexp
7899	0						my @char = $string =~ /\G((?>
7900							[^\x80-\xFF\\\[\$\@\/] \|
7901							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
7902							\[\^ \|
7903							\[\: (?>[a-z]+) \:\] \|
7904							\[\:\^ (?>[a-z]+) \:\] \|
7905							[\$\@\/] \|
7906							\\ (?:$q_char) \|
7907							(?:$q_char)
7908							))/oxmsg;
7909
7910							# unescape character
7911	0						for (my $i=0; $i <= $#char; $i++) {
7912	0	0	0				if (0) {
		0	0
		0	0
		0
		0
		0
7913							}
7914
7915							# open character class [...]
7916	0						elsif ($char[$i] eq '[') {
7917	0						my $left = $i;
7918	0	0					if ($char[$i+1] eq ']') {
7919	0						$i++;
7920							}
7921	0						while (1) {
7922	0	0					if (++$i > $#char) {
7923	0						die __FILE__, ": Unmatched [] in regexp\n";
7924							}
7925	0	0					if ($char[$i] eq ']') {
7926	0						my $right = $i;
7927
7928							# [...]
7929	0						splice @char, $left, $right-$left+1, Eutf2::charlist_qr(@char[$left+1..$right-1], $modifier);
7930
7931	0						$i = $left;
7932	0						last;
7933							}
7934							}
7935							}
7936
7937							# open character class [^...]
7938							elsif ($char[$i] eq '[^') {
7939	0						my $left = $i;
7940	0	0					if ($char[$i+1] eq ']') {
7941	0						$i++;
7942							}
7943	0						while (1) {
7944	0	0					if (++$i > $#char) {
7945	0						die __FILE__, ": Unmatched [] in regexp\n";
7946							}
7947	0	0					if ($char[$i] eq ']') {
7948	0						my $right = $i;
7949
7950							# [^...]
7951	0						splice @char, $left, $right-$left+1, Eutf2::charlist_not_qr(@char[$left+1..$right-1], $modifier);
7952
7953	0						$i = $left;
7954	0						last;
7955							}
7956							}
7957							}
7958
7959							# escape $ @ / and \
7960							elsif ($char[$i] =~ /\A [\$\@\/\\] \z/oxms) {
7961	0						$char[$i] = '\\' . $char[$i];
7962							}
7963
7964							# rewrite character class or escape character
7965							elsif (my $char = character_class($char[$i],$modifier)) {
7966	0						$char[$i] = $char;
7967							}
7968
7969							# /i modifier
7970							elsif ($ignorecase and ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) and (Eutf2::uc($char[$i]) ne Eutf2::fc($char[$i]))) {
7971	0	0					if (CORE::length(Eutf2::fc($char[$i])) == 1) {
7972	0						$char[$i] = '[' . Eutf2::uc($char[$i]) . Eutf2::fc($char[$i]) . ']';
7973							}
7974							else {
7975	0						$char[$i] = '(?:' . Eutf2::uc($char[$i]) . '\|' . Eutf2::fc($char[$i]) . ')';
7976							}
7977							}
7978
7979							# quote character before ? + * {
7980							elsif (($i >= 1) and ($char[$i] =~ /\A [\?\+\*\{] \z/oxms)) {
7981	0	0					if ($char[$i-1] =~ /\A [\x00-\xFF] \z/oxms) {
7982							}
7983							else {
7984	0						$char[$i-1] = '(?:' . $char[$i-1] . ')';
7985							}
7986							}
7987							}
7988
7989	0						$modifier =~ tr/i//d;
7990	0						$delimiter = '/';
7991	0						$end_delimiter = '/';
7992	0						my $prematch = '';
7993	0						return join '', $ope, $delimiter, $prematch, '(?:', @char, ')', $matched, $end_delimiter, $modifier;
7994							}
7995
7996							#
7997							# escape regexp (s'here''b)
7998							#
7999							sub e_s1_qb {
8000	0			0	0		my($ope,$delimiter,$end_delimiter,$string,$modifier) = @_;
8001
8002							# split regexp
8003	0						my @char = $string =~ /\G (?>[^\\]\|\\\\\|[\x00-\xFF]) /oxmsg;
8004
8005							# unescape character
8006	0						for (my $i=0; $i <= $#char; $i++) {
8007	0	0					if (0) {
		0
8008							}
8009
8010							# remain \\
8011	0						elsif ($char[$i] eq '\\\\') {
8012							}
8013
8014							# escape $ @ / and \
8015							elsif ($char[$i] =~ /\A [\$\@\/\\] \z/oxms) {
8016	0						$char[$i] = '\\' . $char[$i];
8017							}
8018							}
8019
8020	0						$delimiter = '/';
8021	0						$end_delimiter = '/';
8022	0						my $prematch = '';
8023	0						return join '', $ope, $delimiter, $prematch, '(?:', @char, ')', $matched, $end_delimiter, $modifier;
8024							}
8025
8026							#
8027							# escape regexp (s''here')
8028							#
8029							sub e_s2_q {
8030	0			0	0		my($ope,$delimiter,$end_delimiter,$string) = @_;
8031
8032	0						$slash = 'div';
8033
8034	0						my @char = $string =~ / \G (?>[^\x80-\xFF\\]\|\\\\\|$q_char) /oxmsg;
8035	0						for (my $i=0; $i <= $#char; $i++) {
8036	0	0					if (0) {
		0
8037							}
8038
8039							# not escape \\
8040	0						elsif ($char[$i] =~ /\A \\\\ \z/oxms) {
8041							}
8042
8043							# escape $ @ / and \
8044							elsif ($char[$i] =~ /\A [\$\@\/\\] \z/oxms) {
8045	0						$char[$i] = '\\' . $char[$i];
8046							}
8047							}
8048
8049	0						return join '', $ope, $delimiter, @char, $end_delimiter;
8050							}
8051
8052							#
8053							# escape regexp (s/here/and here/modifier)
8054							#
8055							sub e_sub {
8056	0			0	0		my($variable,$delimiter1,$pattern,$end_delimiter1,$delimiter2,$replacement,$end_delimiter2,$modifier) = @_;
8057	0		0				$modifier \|\|= '';
8058
8059	0						$modifier =~ tr/p//d;
8060	0	0					if ($modifier =~ /([adlu])/oxms) {
8061	0						my $line = 0;
8062	0						for (my $i=0; my($package,$filename,$use_line,$subroutine) = caller($i); $i++) {
8063	0	0					if ($filename ne __FILE__) {
8064	0						$line = $use_line + (CORE::substr($_,0,pos($_)) =~ tr/\n//) + 1;
8065	0						last;
8066							}
8067							}
8068	0						die qq{Unsupported modifier "$1" used at line $line.\n};
8069							}
8070
8071	0	0					if ($variable eq '') {
8072	0						$variable = '$_';
8073	0						$bind_operator = ' =~ ';
8074							}
8075
8076	0						$slash = 'div';
8077
8078							# P.128 Start of match (or end of previous match): \G
8079							# P.130 Advanced Use of \G with Perl
8080							# in Chapter 3: Overview of Regular Expression Features and Flavors
8081							# P.312 Iterative Matching: Scalar Context, with /g
8082							# in Chapter 7: Perl
8083							# of ISBN 0-596-00289-0 Mastering Regular Expressions, Second edition
8084
8085							# P.181 Where You Left Off: The \G Assertion
8086							# in Chapter 5: Pattern Matching
8087							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
8088
8089							# P.220 Where You Left Off: The \G Assertion
8090							# in Chapter 5: Pattern Matching
8091							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
8092
8093	0						my $e_modifier = $modifier =~ tr/e//d;
8094	0						my $r_modifier = $modifier =~ tr/r//d;
8095
8096	0						my $my = '';
8097	0	0					if ($variable =~ s/\A $ (?>\s*) ( (?>(?: local \b \| my \b \| our \b \| state \b )?) .+ ) $ \z/$1/oxms) {
8098	0						$my = $variable;
8099	0						$variable =~ s/ (?: local \b \| my \b \| our \b \| state \b ) (?>\s*) //oxms;
8100	0						$variable =~ s/ = .+ \z//oxms;
8101							}
8102
8103	0						(my $variable_basename = $variable) =~ s/ [\[\{].* \z//oxms;
8104	0						$variable_basename =~ s/ \s+ \z//oxms;
8105
8106							# quote replacement string
8107	0						my $e_replacement = '';
8108	0	0					if ($e_modifier >= 1) {
8109	0						$e_replacement = e_qq('', '', '', $replacement);
8110	0						$e_modifier--;
8111							}
8112							else {
8113	0	0					if ($delimiter2 eq "'") {
8114	0						$e_replacement = e_s2_q('qq', '/', '/', $replacement);
8115							}
8116							else {
8117	0						$e_replacement = e_qq ('qq', $delimiter2, $end_delimiter2, $replacement);
8118							}
8119							}
8120
8121	0						my $sub = '';
8122
8123							# with /r
8124	0	0					if ($r_modifier) {
8125	0	0					if (0) {
8126							}
8127
8128							# s///gr without multibyte anchoring
8129	0						elsif ($modifier =~ /g/oxms) {
8130	0	0					$sub = sprintf(
8131							# 1 2 3 4 5
8132							q,
8133
8134							$variable, # 1
8135							($delimiter1 eq "'") ? # 2
8136							e_s1_q('m', $delimiter1, $end_delimiter1, $pattern, $modifier) : # :
8137							e_s1 ('m', $delimiter1, $end_delimiter1, $pattern, $modifier), # :
8138							$s_matched, # 3
8139							$e_replacement, # 4
8140							'$UTF2::re_r=CORE::eval $UTF2::re_r; ' x $e_modifier, # 5
8141							);
8142							}
8143
8144							# s///r
8145							else {
8146
8147	0						my $prematch = q{$`};
8148
8149	0	0					$sub = sprintf(
8150							# 1 2 3 4 5 6 7
8151							q<(%s =~ %s) ? CORE::eval{%s local $^W=0; local $UTF2::re_r=%s; %s"%s$UTF2::re_r$'" } : %s>,
8152
8153							$variable, # 1
8154							($delimiter1 eq "'") ? # 2
8155							e_s1_q('m', $delimiter1, $end_delimiter1, $pattern, $modifier) : # :
8156							e_s1 ('m', $delimiter1, $end_delimiter1, $pattern, $modifier), # :
8157							$s_matched, # 3
8158							$e_replacement, # 4
8159							'$UTF2::re_r=CORE::eval $UTF2::re_r; ' x $e_modifier, # 5
8160							$prematch, # 6
8161							$variable, # 7
8162							);
8163							}
8164
8165							# $var !~ s///r doesn't make sense
8166	0	0					if ($bind_operator =~ / !~ /oxms) {
8167	0						$sub = q{die("$0: Using !~ with s///r doesn't make sense"), } . $sub;
8168							}
8169							}
8170
8171							# without /r
8172							else {
8173	0	0					if (0) {
8174							}
8175
8176							# s///g without multibyte anchoring
8177	0						elsif ($modifier =~ /g/oxms) {
8178	0	0					$sub = sprintf(
		0
8179							# 1 2 3 4 5 6 7 8
8180							q,
8181
8182							$variable, # 1
8183							($delimiter1 eq "'") ? # 2
8184							e_s1_q('m', $delimiter1, $end_delimiter1, $pattern, $modifier) : # :
8185							e_s1 ('m', $delimiter1, $end_delimiter1, $pattern, $modifier), # :
8186							$s_matched, # 3
8187							$e_replacement, # 4
8188							'$UTF2::re_r=CORE::eval $UTF2::re_r; ' x $e_modifier, # 5
8189							$variable, # 6
8190							$variable, # 7
8191							($bind_operator =~ / !~ /oxms) ? '!' : '', # 8
8192							);
8193							}
8194
8195							# s///
8196							else {
8197
8198	0						my $prematch = q{$`};
8199
8200	0	0					$sub = sprintf(
		0
8201
8202							($bind_operator =~ / =~ /oxms) ?
8203
8204							# 1 2 3 4 5 6 7 8
8205							q<(%s%s%s) ? CORE::eval{%s local $^W=0; local $UTF2::re_r=%s; %s%s="%s$UTF2::re_r$'"; 1 } : undef> :
8206
8207							# 1 2 3 4 5 6 7 8
8208							q<(%s%s%s) ? 1 : CORE::eval{%s local $^W=0; local $UTF2::re_r=%s; %s%s="%s$UTF2::re_r$'"; undef }>,
8209
8210							$variable, # 1
8211							$bind_operator, # 2
8212							($delimiter1 eq "'") ? # 3
8213							e_s1_q('m', $delimiter1, $end_delimiter1, $pattern, $modifier) : # :
8214							e_s1 ('m', $delimiter1, $end_delimiter1, $pattern, $modifier), # :
8215							$s_matched, # 4
8216							$e_replacement, # 5
8217							'$UTF2::re_r=CORE::eval $UTF2::re_r; ' x $e_modifier, # 6
8218							$variable, # 7
8219							$prematch, # 8
8220							);
8221							}
8222							}
8223
8224							# (my $foo = $bar) =~ s/// --> (my $foo = $bar, CORE::eval { ... })[1]
8225	0	0					if ($my ne '') {
8226	0						$sub = "($my, $sub)[1]";
8227							}
8228
8229							# clear s/// variable
8230	0						$sub_variable = '';
8231	0						$bind_operator = '';
8232
8233	0						return $sub;
8234							}
8235
8236							#
8237							# escape regexp of split qr//
8238							#
8239							sub e_split {
8240	0			0	0		my($ope,$delimiter,$end_delimiter,$string,$modifier) = @_;
8241	0		0				$modifier \|\|= '';
8242
8243	0						$modifier =~ tr/p//d;
8244	0	0					if ($modifier =~ /([adlu])/oxms) {
8245	0						my $line = 0;
8246	0						for (my $i=0; my($package,$filename,$use_line,$subroutine) = caller($i); $i++) {
8247	0	0					if ($filename ne __FILE__) {
8248	0						$line = $use_line + (CORE::substr($_,0,pos($_)) =~ tr/\n//) + 1;
8249	0						last;
8250							}
8251							}
8252	0						die qq{Unsupported modifier "$1" used at line $line.\n};
8253							}
8254
8255	0						$slash = 'div';
8256
8257							# /b /B modifier
8258	0	0					if ($modifier =~ tr/bB//d) {
8259	0						return join '', 'split', $ope, $delimiter, $string, $end_delimiter, $modifier;
8260							}
8261
8262	0	0					my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
8263	0						my $metachar = qr/[\@\\\|[\]{^]/oxms;
8264
8265							# split regexp
8266	0						my @char = $string =~ /\G((?>
8267							[^\x80-\xFF\\\$\@\[\(]\|(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
8268							\\x (?>[0-9A-Fa-f]{1,2}) \|
8269							\\ (?>[0-7]{2,3}) \|
8270							\\c [\x40-\x5F] \|
8271							\\x\{ (?>[0-9A-Fa-f]+) \} \|
8272							\\o\{ (?>[0-7]+) \} \|
8273							\\[bBNpP]\{ (?>[^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} \|
8274							\\ $q_char \|
8275							\$` \| \$\{`\} \| \$ (?>\s) PREMATCH \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} \|
8276							\$& \| \$\{&\} \| \$ (?>\s) MATCH \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} \|
8277							\$ (?>\s) POSTMATCH \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} \|
8278							[\$\@] $qq_variable \|
8279							\$ (?>\s* [0-9]+) \|
8280							\$ (?>\s) \{ (?>\s [0-9]+ \s*) \} \|
8281							\$ \$ (?![\w\{]) \|
8282							\$ (?>\s) \$ (?>\s) $qq_variable \|
8283							\[\^ \|
8284							\[\: (?>[a-z]+) :\] \|
8285							\[\:\^ (?>[a-z]+) :\] \|
8286							\(\? \|
8287							$q_char
8288							))/oxmsg;
8289
8290	0						my $left_e = 0;
8291	0						my $right_e = 0;
8292	0						for (my $i=0; $i <= $#char; $i++) {
8293
8294							# "\L\u" --> "\u\L"
8295	0	0	0				if (($char[$i] eq '\L') and ($char[$i+1] eq '\u')) {
		0	0
		0
		0
		0
		0
8296	0						@char[$i,$i+1] = @char[$i+1,$i];
8297							}
8298
8299							# "\U\l" --> "\l\U"
8300							elsif (($char[$i] eq '\U') and ($char[$i+1] eq '\l')) {
8301	0						@char[$i,$i+1] = @char[$i+1,$i];
8302							}
8303
8304							# octal escape sequence
8305							elsif ($char[$i] =~ /\A \\o \{ ([0-7]+) \} \z/oxms) {
8306	0						$char[$i] = Eutf2::octchr($1);
8307							}
8308
8309							# hexadecimal escape sequence
8310							elsif ($char[$i] =~ /\A \\x \{ ([0-9A-Fa-f]+) \} \z/oxms) {
8311	0						$char[$i] = Eutf2::hexchr($1);
8312							}
8313
8314							# \b{...} --> b\{...}
8315							# \B{...} --> B\{...}
8316							# \N{CHARNAME} --> N\{CHARNAME}
8317							# \p{PROPERTY} --> p\{PROPERTY}
8318							# \P{PROPERTY} --> P\{PROPERTY}
8319							elsif ($char[$i] =~ /\A \\ ([bBNpP]) ( \{ ([^\x80-\xFF0-9\}][^\x80-\xFF\}]*) \} ) \z/oxms) {
8320	0						$char[$i] = $1 . '\\' . $2;
8321							}
8322
8323							# \p, \P, \X --> p, P, X
8324							elsif ($char[$i] =~ /\A \\ ( [pPX] ) \z/oxms) {
8325	0						$char[$i] = $1;
8326							}
8327
8328	0	0	0				if (0) {
		0	0
		0	0
		0	0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
8329							}
8330
8331							# join separated multiple-octet
8332	0						elsif ($char[$i] =~ /\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms) {
8333	0	0	0				if ( ($i+3 <= $#char) and (grep(/\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms, @char[$i+1..$i+3]) == 3) and (CORE::eval(sprintf '"%s%s%s%s"', @char[$i..$i+3]) =~ /\A $q_char \z/oxms)) {
		0	0
		0	0
			0
			0
			0
8334	0						$char[$i] .= join '', splice @char, $i+1, 3;
8335							}
8336							elsif (($i+2 <= $#char) and (grep(/\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms, @char[$i+1..$i+2]) == 2) and (CORE::eval(sprintf '"%s%s%s"', @char[$i..$i+2]) =~ /\A $q_char \z/oxms)) {
8337	0						$char[$i] .= join '', splice @char, $i+1, 2;
8338							}
8339							elsif (($i+1 <= $#char) and (grep(/\A (?: \\ [0-7]{2,3} \| \\x [0-9A-Fa-f]{1,2}) \z/oxms, $char[$i+1 ]) == 1) and (CORE::eval(sprintf '"%s%s"', @char[$i..$i+1]) =~ /\A $q_char \z/oxms)) {
8340	0						$char[$i] .= join '', splice @char, $i+1, 1;
8341							}
8342							}
8343
8344							# open character class [...]
8345							elsif ($char[$i] eq '[') {
8346	0						my $left = $i;
8347	0	0					if ($char[$i+1] eq ']') {
8348	0						$i++;
8349							}
8350	0						while (1) {
8351	0	0					if (++$i > $#char) {
8352	0						die __FILE__, ": Unmatched [] in regexp\n";
8353							}
8354	0	0					if ($char[$i] eq ']') {
8355	0						my $right = $i;
8356
8357							# [...]
8358	0	0					if (grep(/\A [\$\@]/oxms,@char[$left+1..$right-1]) >= 1) {
8359	0						splice @char, $left, $right-$left+1, sprintf(q{@{[Eutf2::charlist_qr(%s,'%s')]}}, join(',', map {qq_stuff($delimiter,$end_delimiter,$_)} @char[$left+1..$right-1]), $modifier);
	0
8360							}
8361							else {
8362	0						splice @char, $left, $right-$left+1, Eutf2::charlist_qr(@char[$left+1..$right-1], $modifier);
8363							}
8364
8365	0						$i = $left;
8366	0						last;
8367							}
8368							}
8369							}
8370
8371							# open character class [^...]
8372							elsif ($char[$i] eq '[^') {
8373	0						my $left = $i;
8374	0	0					if ($char[$i+1] eq ']') {
8375	0						$i++;
8376							}
8377	0						while (1) {
8378	0	0					if (++$i > $#char) {
8379	0						die __FILE__, ": Unmatched [] in regexp\n";
8380							}
8381	0	0					if ($char[$i] eq ']') {
8382	0						my $right = $i;
8383
8384							# [^...]
8385	0	0					if (grep(/\A [\$\@]/oxms,@char[$left+1..$right-1]) >= 1) {
8386	0						splice @char, $left, $right-$left+1, sprintf(q{@{[Eutf2::charlist_not_qr(%s,'%s')]}}, join(',', map {qq_stuff($delimiter,$end_delimiter,$_)} @char[$left+1..$right-1]), $modifier);
	0
8387							}
8388							else {
8389	0						splice @char, $left, $right-$left+1, Eutf2::charlist_not_qr(@char[$left+1..$right-1], $modifier);
8390							}
8391
8392	0						$i = $left;
8393	0						last;
8394							}
8395							}
8396							}
8397
8398							# rewrite character class or escape character
8399							elsif (my $char = character_class($char[$i],$modifier)) {
8400	0						$char[$i] = $char;
8401							}
8402
8403							# P.794 29.2.161. split
8404							# in Chapter 29: Functions
8405							# of ISBN 0-596-00027-8 Programming Perl Third Edition.
8406
8407							# P.951 split
8408							# in Chapter 27: Functions
8409							# of ISBN 978-0-596-00492-7 Programming Perl 4th Edition.
8410
8411							# said "The //m modifier is assumed when you split on the pattern /^/",
8412							# but perl5.008 is not so. Therefore, this software adds //m.
8413							# (and so on)
8414
8415							# split(m/^/) --> split(m/^/m)
8416							elsif (($char[$i] eq '^') and ($modifier !~ /m/oxms)) {
8417	0						$modifier .= 'm';
8418							}
8419
8420							# /i modifier
8421							elsif ($ignorecase and ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) and (Eutf2::uc($char[$i]) ne Eutf2::fc($char[$i]))) {
8422	0	0					if (CORE::length(Eutf2::fc($char[$i])) == 1) {
8423	0						$char[$i] = '[' . Eutf2::uc($char[$i]) . Eutf2::fc($char[$i]) . ']';
8424							}
8425							else {
8426	0						$char[$i] = '(?:' . Eutf2::uc($char[$i]) . '\|' . Eutf2::fc($char[$i]) . ')';
8427							}
8428							}
8429
8430							# \u \l \U \L \F \Q \E
8431							elsif ($char[$i] =~ /\A ([<>]) \z/oxms) {
8432	0	0					if ($right_e < $left_e) {
8433	0						$char[$i] = '\\' . $char[$i];
8434							}
8435							}
8436							elsif ($char[$i] eq '\u') {
8437	0						$char[$i] = '@{[Eutf2::ucfirst qq<';
8438	0						$left_e++;
8439							}
8440							elsif ($char[$i] eq '\l') {
8441	0						$char[$i] = '@{[Eutf2::lcfirst qq<';
8442	0						$left_e++;
8443							}
8444							elsif ($char[$i] eq '\U') {
8445	0						$char[$i] = '@{[Eutf2::uc qq<';
8446	0						$left_e++;
8447							}
8448							elsif ($char[$i] eq '\L') {
8449	0						$char[$i] = '@{[Eutf2::lc qq<';
8450	0						$left_e++;
8451							}
8452							elsif ($char[$i] eq '\F') {
8453	0						$char[$i] = '@{[Eutf2::fc qq<';
8454	0						$left_e++;
8455							}
8456							elsif ($char[$i] eq '\Q') {
8457	0						$char[$i] = '@{[CORE::quotemeta qq<';
8458	0						$left_e++;
8459							}
8460							elsif ($char[$i] eq '\E') {
8461	0	0					if ($right_e < $left_e) {
8462	0						$char[$i] = '>]}';
8463	0						$right_e++;
8464							}
8465							else {
8466	0						$char[$i] = '';
8467							}
8468							}
8469							elsif ($char[$i] eq '\Q') {
8470	0						while (1) {
8471	0	0					if (++$i > $#char) {
8472	0						last;
8473							}
8474	0	0					if ($char[$i] eq '\E') {
8475	0						last;
8476							}
8477							}
8478							}
8479							elsif ($char[$i] eq '\E') {
8480							}
8481
8482							# $0 --> $0
8483							elsif ($char[$i] =~ /\A \$ 0 \z/oxms) {
8484	0	0					if ($ignorecase) {
8485	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
8486							}
8487							}
8488							elsif ($char[$i] =~ /\A \$ \{ (?>\s) 0 (?>\s) \} \z/oxms) {
8489	0	0					if ($ignorecase) {
8490	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
8491							}
8492							}
8493
8494							# $$ --> $$
8495							elsif ($char[$i] =~ /\A \$\$ \z/oxms) {
8496							}
8497
8498							# $1, $2, $3 --> $2, $3, $4 after s/// with multibyte anchoring
8499							# $1, $2, $3 --> $1, $2, $3 otherwise
8500							elsif ($char[$i] =~ /\A \$ ((?>[1-9][0-9]*)) \z/oxms) {
8501	0						$char[$i] = e_capture($1);
8502	0	0					if ($ignorecase) {
8503	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
8504							}
8505							}
8506							elsif ($char[$i] =~ /\A \$ \{ (?>\s) ((?>[1-9][0-9])) (?>\s*) \} \z/oxms) {
8507	0						$char[$i] = e_capture($1);
8508	0	0					if ($ignorecase) {
8509	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
8510							}
8511							}
8512
8513							# $$foo[ ... ] --> $ $foo->[ ... ]
8514							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \[ (?:$qq_bracket)*? \] ) \z/oxms) {
8515	0						$char[$i] = e_capture($1.'->'.$2);
8516	0	0					if ($ignorecase) {
8517	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
8518							}
8519							}
8520
8521							# $$foo{ ... } --> $ $foo->{ ... }
8522							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) ( \{ (?:$qq_brace)*? \} ) \z/oxms) {
8523	0						$char[$i] = e_capture($1.'->'.$2);
8524	0	0					if ($ignorecase) {
8525	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
8526							}
8527							}
8528
8529							# $$foo
8530							elsif ($char[$i] =~ /\A \$ ((?> \$ [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* )) \z/oxms) {
8531	0						$char[$i] = e_capture($1);
8532	0	0					if ($ignorecase) {
8533	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
8534							}
8535							}
8536
8537							# $`, ${`}, $PREMATCH, ${PREMATCH}, ${^PREMATCH} --> Eutf2::PREMATCH()
8538							elsif ($char[$i] =~ /\A ( \$` \| \$\{`\} \| \$ (?>\s) PREMATCH \| \$ (?>\s) \{ (?>\s) PREMATCH (?>\s) \} \| \$ (?>\s*) \{\^PREMATCH\} ) \z/oxmsgc) {
8539	0	0					if ($ignorecase) {
8540	0						$char[$i] = '@{[Eutf2::ignorecase(Eutf2::PREMATCH())]}';
8541							}
8542							else {
8543	0						$char[$i] = '@{[Eutf2::PREMATCH()]}';
8544							}
8545							}
8546
8547							# $&, ${&}, $MATCH, ${MATCH}, ${^MATCH} --> Eutf2::MATCH()
8548							elsif ($char[$i] =~ /\A ( \$& \| \$\{&\} \| \$ (?>\s) MATCH \| \$ (?>\s) \{ (?>\s) MATCH (?>\s) \} \| \$ (?>\s*) \{\^MATCH\} ) \z/oxmsgc) {
8549	0	0					if ($ignorecase) {
8550	0						$char[$i] = '@{[Eutf2::ignorecase(Eutf2::MATCH())]}';
8551							}
8552							else {
8553	0						$char[$i] = '@{[Eutf2::MATCH()]}';
8554							}
8555							}
8556
8557							# $POSTMATCH, ${POSTMATCH}, ${^POSTMATCH} --> Eutf2::POSTMATCH()
8558							elsif ($char[$i] =~ /\A ( \$ (?>\s) POSTMATCH \| \$ (?>\s) \{ (?>\s) POSTMATCH (?>\s) \} \| \$ (?>\s*) \{\^POSTMATCH\} ) \z/oxmsgc) {
8559	0	0					if ($ignorecase) {
8560	0						$char[$i] = '@{[Eutf2::ignorecase(Eutf2::POSTMATCH())]}';
8561							}
8562							else {
8563	0						$char[$i] = '@{[Eutf2::POSTMATCH()]}';
8564							}
8565							}
8566
8567							# ${ foo }
8568							elsif ($char[$i] =~ /\A \$ (?>\s) \{ ((?> \s [A-Za-z_][A-Za-z0-9_](?: ::[A-Za-z_][A-Za-z0-9_])* \s* )) \} \z/oxms) {
8569	0	0					if ($ignorecase) {
8570	0						$char[$i] = '@{[Eutf2::ignorecase(' . $1 . ')]}';
8571							}
8572							}
8573
8574							# ${ ... }
8575							elsif ($char[$i] =~ /\A \$ (?>\s*) \{ ( .+ ) \} \z/oxms) {
8576	0						$char[$i] = e_capture($1);
8577	0	0					if ($ignorecase) {
8578	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
8579							}
8580							}
8581
8582							# $scalar or @array
8583							elsif ($char[$i] =~ /\A [\$\@].+ /oxms) {
8584	0						$char[$i] = e_string($char[$i]);
8585	0	0					if ($ignorecase) {
8586	0						$char[$i] = '@{[Eutf2::ignorecase(' . $char[$i] . ')]}';
8587							}
8588							}
8589
8590							# quote character before ? + * {
8591							elsif (($i >= 1) and ($char[$i] =~ /\A [\?\+\*\{] \z/oxms)) {
8592	0	0					if ($char[$i-1] =~ /\A (?:[\x00-\xFF]\|\\[0-7]{2,3}\|\\x[0-9-A-Fa-f]{1,2}) \z/oxms) {
8593							}
8594							else {
8595	0						$char[$i-1] = '(?:' . $char[$i-1] . ')';
8596							}
8597							}
8598							}
8599
8600							# make regexp string
8601	0						$modifier =~ tr/i//d;
8602	0	0					if ($left_e > $right_e) {
8603	0						return join '', 'Eutf2::split', $ope, $delimiter, @char, '>]}' x ($left_e - $right_e), $end_delimiter, $modifier;
8604							}
8605	0						return join '', 'Eutf2::split', $ope, $delimiter, @char, $end_delimiter, $modifier;
8606							}
8607
8608							#
8609							# escape regexp of split qr''
8610							#
8611							sub e_split_q {
8612	0			0	0		my($ope,$delimiter,$end_delimiter,$string,$modifier) = @_;
8613	0		0				$modifier \|\|= '';
8614
8615	0						$modifier =~ tr/p//d;
8616	0	0					if ($modifier =~ /([adlu])/oxms) {
8617	0						my $line = 0;
8618	0						for (my $i=0; my($package,$filename,$use_line,$subroutine) = caller($i); $i++) {
8619	0	0					if ($filename ne __FILE__) {
8620	0						$line = $use_line + (CORE::substr($_,0,pos($_)) =~ tr/\n//) + 1;
8621	0						last;
8622							}
8623							}
8624	0						die qq{Unsupported modifier "$1" used at line $line.\n};
8625							}
8626
8627	0						$slash = 'div';
8628
8629							# /b /B modifier
8630	0	0					if ($modifier =~ tr/bB//d) {
8631	0						return join '', 'split', $ope, $delimiter, $string, $end_delimiter, $modifier;
8632							}
8633
8634	0	0					my $ignorecase = ($modifier =~ /i/oxms) ? 1 : 0;
8635
8636							# split regexp
8637	0						my @char = $string =~ /\G((?>
8638							[^\x80-\xFF\\\[] \|
8639							(?:[\xC2-\xDF]\|[\xE0-\xE0][\xA0-\xBF]\|[\xE1-\xEC][\x80-\xBF]\|[\xED-\xED][\x80-\x9F]\|[\xEE-\xEF][\x80-\xBF]\|[\xF0-\xF0][\x90-\xBF][\x80-\xBF]\|[\xF1-\xF3][\x80-\xBF][\x80-\xBF]\|[\xF4-\xF4][\x80-\x8F][\x80-\xBF])[\x80-\xBF] \|
8640							\[\^ \|
8641							\[\: (?>[a-z]+) \:\] \|
8642							\[\:\^ (?>[a-z]+) \:\] \|
8643							\\ (?:$q_char) \|
8644							(?:$q_char)
8645							))/oxmsg;
8646
8647							# unescape character
8648	0						for (my $i=0; $i <= $#char; $i++) {
8649	0	0	0				if (0) {
		0	0
		0	0
		0	0
		0
		0
8650							}
8651
8652							# open character class [...]
8653	0						elsif ($char[$i] eq '[') {
8654	0						my $left = $i;
8655	0	0					if ($char[$i+1] eq ']') {
8656	0						$i++;
8657							}
8658	0						while (1) {
8659	0	0					if (++$i > $#char) {
8660	0						die __FILE__, ": Unmatched [] in regexp\n";
8661							}
8662	0	0					if ($char[$i] eq ']') {
8663	0						my $right = $i;
8664
8665							# [...]
8666	0						splice @char, $left, $right-$left+1, Eutf2::charlist_qr(@char[$left+1..$right-1], $modifier);
8667
8668	0						$i = $left;
8669	0						last;
8670							}
8671							}
8672							}
8673
8674							# open character class [^...]
8675							elsif ($char[$i] eq '[^') {
8676	0						my $left = $i;
8677	0	0					if ($char[$i+1] eq ']') {
8678	0						$i++;
8679							}
8680	0						while (1) {
8681	0	0					if (++$i > $#char) {
8682	0						die __FILE__, ": Unmatched [] in regexp\n";
8683							}
8684	0	0					if ($char[$i] eq ']') {
8685	0						my $right = $i;
8686
8687							# [^...]
8688	0						splice @char, $left, $right-$left+1, Eutf2::charlist_not_qr(@char[$left+1..$right-1], $modifier);
8689
8690	0						$i = $left;
8691	0						last;
8692							}
8693							}
8694							}
8695
8696							# rewrite character class or escape character
8697							elsif (my $char = character_class($char[$i],$modifier)) {
8698	0						$char[$i] = $char;
8699							}
8700
8701							# split(m/^/) --> split(m/^/m)
8702							elsif (($char[$i] eq '^') and ($modifier !~ /m/oxms)) {
8703	0						$modifier .= 'm';
8704							}
8705
8706							# /i modifier
8707							elsif ($ignorecase and ($char[$i] =~ /\A [\x00-\xFF] \z/oxms) and (Eutf2::uc($char[$i]) ne Eutf2::fc($char[$i]))) {
8708	0	0					if (CORE::length(Eutf2::fc($char[$i])) == 1) {
8709	0						$char[$i] = '[' . Eutf2::uc($char[$i]) . Eutf2::fc($char[$i]) . ']';
8710							}
8711							else {
8712	0						$char[$i] = '(?:' . Eutf2::uc($char[$i]) . '\|' . Eutf2::fc($char[$i]) . ')';
8713							}
8714							}
8715
8716							# quote character before ? + * {
8717							elsif (($i >= 1) and ($char[$i] =~ /\A [\?\+\*\{] \z/oxms)) {
8718	0	0					if ($char[$i-1] =~ /\A [\x00-\xFF] \z/oxms) {
8719							}
8720							else {
8721	0						$char[$i-1] = '(?:' . $char[$i-1] . ')';
8722							}
8723							}
8724							}
8725
8726	0						$modifier =~ tr/i//d;
8727	0						return join '', 'Eutf2::split', $ope, $delimiter, @char, $end_delimiter, $modifier;
8728							}
8729
8730							#
8731							# instead of Carp::carp
8732							#
8733							sub carp {
8734	0			0	0		my($package,$filename,$line) = caller(1);
8735	0						print STDERR "@_ at $filename line $line.\n";
8736							}
8737
8738							#
8739							# instead of Carp::croak
8740							#
8741							sub croak {
8742	0			0	0		my($package,$filename,$line) = caller(1);
8743	0						print STDERR "@_ at $filename line $line.\n";
8744	0						die "\n";
8745							}
8746
8747							#
8748							# instead of Carp::cluck
8749							#
8750							sub cluck {
8751	0			0	0		my $i = 0;
8752	0						my @cluck = ();
8753	0						while (my($package,$filename,$line,$subroutine) = caller($i)) {
8754	0						push @cluck, "[$i] $filename($line) $package::$subroutine\n";
8755	0						$i++;
8756							}
8757	0						print STDERR CORE::reverse @cluck;
8758	0						print STDERR "\n";
8759	0						carp @_;
8760							}
8761
8762							#
8763							# instead of Carp::confess
8764							#
8765							sub confess {
8766	0			0	0		my $i = 0;
8767	0						my @confess = ();
8768	0						while (my($package,$filename,$line,$subroutine) = caller($i)) {
8769	0						push @confess, "[$i] $filename($line) $package::$subroutine\n";
8770	0						$i++;
8771							}
8772	0						print STDERR CORE::reverse @confess;
8773	0						print STDERR "\n";
8774	0						croak @_;
8775							}
8776
8777							1;
8778
8779							__END__