| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
;;;;# -*-coding:utf-8;-*- µ ← col73 |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
require 5; |
|
4
|
11
|
|
|
11
|
|
80664
|
use 5.8.0; |
|
|
11
|
|
|
|
|
42
|
|
|
5
|
|
|
|
|
|
|
package Text::Unidecode; |
|
6
|
|
|
|
|
|
|
$Last_Modified =' Time-stamp: "2015-10-21 06:43:24 MDT sburke@cpan.org"'; |
|
7
|
11
|
|
|
11
|
|
2623
|
use utf8; |
|
|
11
|
|
|
|
|
49
|
|
|
|
11
|
|
|
|
|
75
|
|
|
8
|
11
|
|
|
11
|
|
275
|
use strict; |
|
|
11
|
|
|
|
|
31
|
|
|
|
11
|
|
|
|
|
278
|
|
|
9
|
11
|
|
|
11
|
|
8856
|
use integer; # vroom vroom! |
|
|
11
|
|
|
|
|
114
|
|
|
|
11
|
|
|
|
|
59
|
|
|
10
|
11
|
|
|
11
|
|
368
|
use vars qw($VERSION @ISA @EXPORT @Char $UNKNOWN $NULLMAP $TABLE_SIZE $Last_Modified); |
|
|
11
|
|
|
|
|
20
|
|
|
|
11
|
|
|
|
|
1719
|
|
|
11
|
|
|
|
|
|
|
$VERSION = '1.27'; |
|
12
|
|
|
|
|
|
|
require Exporter; |
|
13
|
|
|
|
|
|
|
@ISA = ('Exporter'); |
|
14
|
|
|
|
|
|
|
@EXPORT = ('unidecode'); |
|
15
|
|
|
|
|
|
|
|
|
16
|
11
|
50
|
|
11
|
|
2064
|
BEGIN { *DEBUG = sub () {0} unless defined &DEBUG } |
|
17
|
|
|
|
|
|
|
$UNKNOWN = '[?] '; |
|
18
|
|
|
|
|
|
|
$TABLE_SIZE = 256; |
|
19
|
|
|
|
|
|
|
$NULLMAP = [( $UNKNOWN ) x $TABLE_SIZE]; # for blocks we can't load |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
|
22
|
|
|
|
|
|
|
{ |
|
23
|
|
|
|
|
|
|
my $x = join '', "\x00" .. "\x7F"; |
|
24
|
|
|
|
|
|
|
die "the 7-bit purity test fails!" unless $x eq unidecode($x); |
|
25
|
|
|
|
|
|
|
} |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
sub unidecode { |
|
30
|
|
|
|
|
|
|
# Destructive in void context -- in other contexts, nondestructive. |
|
31
|
|
|
|
|
|
|
|
|
32
|
684
|
50
|
|
684
|
1
|
45270
|
unless(@_) { # Sanity: Nothing coming in! |
|
33
|
0
|
0
|
|
|
|
0
|
return() if wantarray; |
|
34
|
0
|
|
|
|
|
0
|
return ''; |
|
35
|
|
|
|
|
|
|
} |
|
36
|
|
|
|
|
|
|
|
|
37
|
684
|
50
|
|
|
|
1463
|
if( defined wantarray ) { |
|
38
|
|
|
|
|
|
|
# We're in list or scalar context (i.e., just not void context.) |
|
39
|
|
|
|
|
|
|
# So make @_'s items no longer be aliases. |
|
40
|
684
|
|
|
|
|
3062
|
@_ = map $_, @_; |
|
41
|
|
|
|
|
|
|
} else { |
|
42
|
|
|
|
|
|
|
# Otherwise (if we're in void context), then just let @_ stay |
|
43
|
|
|
|
|
|
|
# aliases, and alter their elements IN-PLACE! |
|
44
|
|
|
|
|
|
|
} |
|
45
|
|
|
|
|
|
|
|
|
46
|
684
|
|
|
|
|
1636
|
foreach my $n (@_) { |
|
47
|
684
|
50
|
|
|
|
1541
|
next unless defined $n; |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# Shut up potentially fatal warnings about UTF-16 surrogate |
|
50
|
|
|
|
|
|
|
# characters when running under perl -w |
|
51
|
|
|
|
|
|
|
# This is per https://rt.cpan.org/Ticket/Display.html?id=97456 |
|
52
|
11
|
|
|
11
|
|
65
|
no warnings 'utf8'; |
|
|
11
|
|
|
|
|
23
|
|
|
|
11
|
|
|
|
|
8880
|
|
|
53
|
|
|
|
|
|
|
|
|
54
|
684
|
100
|
|
|
|
3871
|
$n =~ s~([^\x00-\x7f])~${$Char[ord($1)>>8]||t($1)}[ord($1)&255]~egs; |
|
|
702
|
|
|
|
|
952
|
|
|
|
702
|
|
|
|
|
3526
|
|
|
55
|
|
|
|
|
|
|
} |
|
56
|
|
|
|
|
|
|
# That means: |
|
57
|
|
|
|
|
|
|
# Replace character 0xABCD with $Char[0xAB][0xCD], loading |
|
58
|
|
|
|
|
|
|
# the table 0xAB as needed. |
|
59
|
|
|
|
|
|
|
# |
|
60
|
|
|
|
|
|
|
#====================================================================== |
|
61
|
|
|
|
|
|
|
# |
|
62
|
|
|
|
|
|
|
# Yes, that's dense code. It's the warp core! |
|
63
|
|
|
|
|
|
|
# Here is an expansion into pseudocode... as best as I can manage it... |
|
64
|
|
|
|
|
|
|
# |
|
65
|
|
|
|
|
|
|
# $character = $1; |
|
66
|
|
|
|
|
|
|
# $charnum = ord($character); |
|
67
|
|
|
|
|
|
|
# $charnum_lowbits = $charnum & 255; |
|
68
|
|
|
|
|
|
|
# $charnum_highbits = $charnum >> 8; |
|
69
|
|
|
|
|
|
|
# |
|
70
|
|
|
|
|
|
|
# $table_ref = $Char->[$charnum_highbits]; |
|
71
|
|
|
|
|
|
|
# |
|
72
|
|
|
|
|
|
|
# if($table_ref) { |
|
73
|
|
|
|
|
|
|
# # As expected, we got the arrayref for this table. |
|
74
|
|
|
|
|
|
|
# } else { |
|
75
|
|
|
|
|
|
|
# # Uhoh, we couldn't find the arrayref for this table. |
|
76
|
|
|
|
|
|
|
# # So we call t($character). |
|
77
|
|
|
|
|
|
|
# # It loads a table. Namely, it does: |
|
78
|
|
|
|
|
|
|
# Load_Table_For( $charnum_highbits ); |
|
79
|
|
|
|
|
|
|
# # ...which does magic, and puts something in |
|
80
|
|
|
|
|
|
|
# # $Char->[$charnum_highbits], |
|
81
|
|
|
|
|
|
|
# # so NOW we actually CAN do: |
|
82
|
|
|
|
|
|
|
# $table_ref = $Char->[$charnum_highbits]; |
|
83
|
|
|
|
|
|
|
# } |
|
84
|
|
|
|
|
|
|
# |
|
85
|
|
|
|
|
|
|
# $for_this_char |
|
86
|
|
|
|
|
|
|
# = $table_ref->[ $charnum_lowbits ]; |
|
87
|
|
|
|
|
|
|
# |
|
88
|
|
|
|
|
|
|
# # Although the syntax we actually use is the odd |
|
89
|
|
|
|
|
|
|
# but COMPLETE EQUIVALENT to this syntax: |
|
90
|
|
|
|
|
|
|
# |
|
91
|
|
|
|
|
|
|
# $for_this_char |
|
92
|
|
|
|
|
|
|
# = ${ $table_ref }[ $charnum_lowbits ]; |
|
93
|
|
|
|
|
|
|
# |
|
94
|
|
|
|
|
|
|
# and $for_this_char is the replacement text for this |
|
95
|
|
|
|
|
|
|
# character, in: |
|
96
|
|
|
|
|
|
|
# $n =~ s~(char)~replacement~egs |
|
97
|
|
|
|
|
|
|
# |
|
98
|
|
|
|
|
|
|
# (And why did I use s~x~y~ instead of s/x/y/ ? |
|
99
|
|
|
|
|
|
|
# It's all the same for Perl: perldoc perlretut says: |
|
100
|
|
|
|
|
|
|
# As with the match "m//" operator, "s///" can |
|
101
|
|
|
|
|
|
|
# use other delimiters, such as "s!!!" and "s{}{}", |
|
102
|
|
|
|
|
|
|
# I didn't do it for sake of obscurity. I think it's just to |
|
103
|
|
|
|
|
|
|
# keep my editor's syntax highlighter from crashing, |
|
104
|
|
|
|
|
|
|
# which was a problem with s/// when the insides are as gory |
|
105
|
|
|
|
|
|
|
# as we have here. |
|
106
|
|
|
|
|
|
|
|
|
107
|
684
|
50
|
|
|
|
1788
|
return unless defined wantarray; # void context |
|
108
|
684
|
100
|
|
|
|
2253
|
return @_ if wantarray; # normal list context -- return the copies |
|
109
|
|
|
|
|
|
|
# Else normal scalar context: |
|
110
|
436
|
50
|
|
|
|
2973
|
return $_[0] if @_ == 1; |
|
111
|
0
|
|
|
|
|
0
|
return join '', @_; # rarer fallthru: a list in, but a scalar out. |
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
#====================================================================== |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub make_placeholder_map { |
|
117
|
134
|
|
|
134
|
0
|
6678
|
return [( $UNKNOWN ) x $TABLE_SIZE ]; |
|
118
|
|
|
|
|
|
|
} |
|
119
|
|
|
|
|
|
|
sub make_placeholder_map_nulls { |
|
120
|
0
|
|
|
0
|
0
|
0
|
return [( "" ) x $TABLE_SIZE ]; |
|
121
|
|
|
|
|
|
|
} |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
#====================================================================== |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub t { # "t" is for "t"able. |
|
126
|
|
|
|
|
|
|
# Load (and return) a char table for this character |
|
127
|
|
|
|
|
|
|
# this should get called only once per table per session. |
|
128
|
538
|
|
|
538
|
0
|
1081
|
my $bank = ord($_[0]) >> 8; |
|
129
|
538
|
50
|
|
|
|
1435
|
return $Char[$bank] if $Char[$bank]; |
|
130
|
|
|
|
|
|
|
|
|
131
|
538
|
|
|
|
|
1060
|
load_bank($bank); |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# Now see how that fared... |
|
134
|
|
|
|
|
|
|
|
|
135
|
538
|
50
|
50
|
|
|
2520
|
if(ref($Char[$bank] || '') ne 'ARRAY') { |
|
136
|
0
|
|
|
|
|
0
|
DEBUG > 1 and print |
|
137
|
|
|
|
|
|
|
" Loading failed for bank $bank (err $@). Using null map.\n"; |
|
138
|
0
|
|
|
|
|
0
|
return $Char[$bank] = $NULLMAP; |
|
139
|
|
|
|
|
|
|
} |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
|
|
142
|
538
|
|
|
|
|
579
|
DEBUG > 1 and print " Loading succeeded.\n"; |
|
143
|
538
|
|
|
|
|
1082
|
my $cb = $Char[$bank]; |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# Sanity-check it: |
|
146
|
538
|
100
|
|
|
|
1476
|
if(@$cb == $TABLE_SIZE) { |
|
147
|
|
|
|
|
|
|
# As expected. Fallthru. |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
} else { |
|
150
|
100
|
50
|
|
|
|
336
|
if(@$cb > $TABLE_SIZE) { |
|
|
|
50
|
|
|
|
|
|
|
151
|
0
|
|
|
|
|
0
|
DEBUG and print "Bank $bank is too large-- it has ", scalar @$cb, |
|
152
|
|
|
|
|
|
|
" entries in it. Pruning.\n"; |
|
153
|
0
|
|
|
|
|
0
|
splice @$cb, $TABLE_SIZE; |
|
154
|
|
|
|
|
|
|
# That two-argument form splices everything off into nowhere, |
|
155
|
|
|
|
|
|
|
# starting with the first overage character. |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
} elsif( @$cb < $TABLE_SIZE) { |
|
158
|
100
|
|
|
|
|
121
|
DEBUG and print "Bank $bank is too small-- it has ", scalar @$cb, |
|
159
|
|
|
|
|
|
|
" entries in it. Now padding it.\n"; |
|
160
|
100
|
50
|
|
|
|
252
|
if(@$cb == 0) { |
|
161
|
0
|
|
|
|
|
0
|
DEBUG and print " (Yes, ZERO entries!)\n"; |
|
162
|
|
|
|
|
|
|
} |
|
163
|
100
|
|
|
|
|
560
|
push @$cb, |
|
164
|
|
|
|
|
|
|
( $UNKNOWN ) x ( $TABLE_SIZE - @$cb) |
|
165
|
|
|
|
|
|
|
# i.e., however many items, times the deficit |
|
166
|
|
|
|
|
|
|
; |
|
167
|
|
|
|
|
|
|
# And fallthru... |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
} else { |
|
170
|
0
|
|
|
|
|
0
|
die "UNREACHABLE CODE HERE (INSANE)"; |
|
171
|
|
|
|
|
|
|
} |
|
172
|
|
|
|
|
|
|
} |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# Check for undefness in block: |
|
175
|
|
|
|
|
|
|
|
|
176
|
538
|
|
|
|
|
1472
|
for(my $i = 0; $i < $TABLE_SIZE; ++$i) { |
|
177
|
137728
|
50
|
|
|
|
393723
|
unless(defined $cb->[$i]) { |
|
178
|
0
|
|
|
|
|
0
|
DEBUG and printf "Undef at position %d in block x%02x\n", |
|
179
|
|
|
|
|
|
|
$i, $bank; |
|
180
|
0
|
|
|
|
|
0
|
$cb->[$i] = ''; |
|
181
|
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
} |
|
183
|
|
|
|
|
|
|
|
|
184
|
538
|
|
|
|
|
5022
|
return $Char[$bank]; |
|
185
|
|
|
|
|
|
|
} |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
#----------------------------------------------------------------------- |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
our $eval_loaded_okay; |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
sub load_bank { |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
# This is in its own sub, for sake of sweeping the scary thing |
|
194
|
|
|
|
|
|
|
# (namely, a call to eval) under the rug. |
|
195
|
|
|
|
|
|
|
# I.e., to paraphrase what Larry Wall once said to me: if |
|
196
|
|
|
|
|
|
|
# you're going to do something odd, maybe you should do it |
|
197
|
|
|
|
|
|
|
# in private. |
|
198
|
|
|
|
|
|
|
|
|
199
|
538
|
|
|
538
|
0
|
814
|
my($banknum) = @_; # just as an integer value |
|
200
|
|
|
|
|
|
|
|
|
201
|
538
|
|
|
|
|
611
|
DEBUG and printf |
|
202
|
|
|
|
|
|
|
"# Eval-loading %s::x%02x ...\n"; |
|
203
|
|
|
|
|
|
|
|
|
204
|
538
|
|
|
|
|
709
|
$eval_loaded_okay = 0; |
|
205
|
538
|
|
|
|
|
1932
|
my $code = |
|
206
|
|
|
|
|
|
|
sprintf( "require %s::x%02x; \$eval_loaded_okay = 1;\n", |
|
207
|
|
|
|
|
|
|
__PACKAGE__, |
|
208
|
|
|
|
|
|
|
$banknum); |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
{ |
|
211
|
538
|
|
|
|
|
708
|
local $SIG{'__DIE__'}; |
|
|
538
|
|
|
|
|
2156
|
|
|
212
|
538
|
|
|
|
|
44406
|
eval($code); |
|
213
|
|
|
|
|
|
|
} |
|
214
|
|
|
|
|
|
|
|
|
215
|
538
|
50
|
|
|
|
2830
|
return 1 if $eval_loaded_okay; |
|
216
|
0
|
|
|
|
|
|
return 0; |
|
217
|
|
|
|
|
|
|
} |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
#====================================================================== |
|
220
|
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
1; |
|
222
|
|
|
|
|
|
|
__END__ |