line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
;;;;# -*-coding:utf-8;-*- µ ← col73 |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
require 5; |
4
|
11
|
|
|
11
|
|
80664
|
use 5.8.0; |
|
11
|
|
|
|
|
42
|
|
5
|
|
|
|
|
|
|
package Text::Unidecode; |
6
|
|
|
|
|
|
|
$Last_Modified =' Time-stamp: "2015-10-21 06:43:24 MDT sburke@cpan.org"'; |
7
|
11
|
|
|
11
|
|
2623
|
use utf8; |
|
11
|
|
|
|
|
49
|
|
|
11
|
|
|
|
|
75
|
|
8
|
11
|
|
|
11
|
|
275
|
use strict; |
|
11
|
|
|
|
|
31
|
|
|
11
|
|
|
|
|
278
|
|
9
|
11
|
|
|
11
|
|
8856
|
use integer; # vroom vroom! |
|
11
|
|
|
|
|
114
|
|
|
11
|
|
|
|
|
59
|
|
10
|
11
|
|
|
11
|
|
368
|
use vars qw($VERSION @ISA @EXPORT @Char $UNKNOWN $NULLMAP $TABLE_SIZE $Last_Modified); |
|
11
|
|
|
|
|
20
|
|
|
11
|
|
|
|
|
1719
|
|
11
|
|
|
|
|
|
|
$VERSION = '1.27'; |
12
|
|
|
|
|
|
|
require Exporter; |
13
|
|
|
|
|
|
|
@ISA = ('Exporter'); |
14
|
|
|
|
|
|
|
@EXPORT = ('unidecode'); |
15
|
|
|
|
|
|
|
|
16
|
11
|
50
|
|
11
|
|
2064
|
BEGIN { *DEBUG = sub () {0} unless defined &DEBUG } |
17
|
|
|
|
|
|
|
$UNKNOWN = '[?] '; |
18
|
|
|
|
|
|
|
$TABLE_SIZE = 256; |
19
|
|
|
|
|
|
|
$NULLMAP = [( $UNKNOWN ) x $TABLE_SIZE]; # for blocks we can't load |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
22
|
|
|
|
|
|
|
{ |
23
|
|
|
|
|
|
|
my $x = join '', "\x00" .. "\x7F"; |
24
|
|
|
|
|
|
|
die "the 7-bit purity test fails!" unless $x eq unidecode($x); |
25
|
|
|
|
|
|
|
} |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
sub unidecode { |
30
|
|
|
|
|
|
|
# Destructive in void context -- in other contexts, nondestructive. |
31
|
|
|
|
|
|
|
|
32
|
684
|
50
|
|
684
|
1
|
45270
|
unless(@_) { # Sanity: Nothing coming in! |
33
|
0
|
0
|
|
|
|
0
|
return() if wantarray; |
34
|
0
|
|
|
|
|
0
|
return ''; |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
|
37
|
684
|
50
|
|
|
|
1463
|
if( defined wantarray ) { |
38
|
|
|
|
|
|
|
# We're in list or scalar context (i.e., just not void context.) |
39
|
|
|
|
|
|
|
# So make @_'s items no longer be aliases. |
40
|
684
|
|
|
|
|
3062
|
@_ = map $_, @_; |
41
|
|
|
|
|
|
|
} else { |
42
|
|
|
|
|
|
|
# Otherwise (if we're in void context), then just let @_ stay |
43
|
|
|
|
|
|
|
# aliases, and alter their elements IN-PLACE! |
44
|
|
|
|
|
|
|
} |
45
|
|
|
|
|
|
|
|
46
|
684
|
|
|
|
|
1636
|
foreach my $n (@_) { |
47
|
684
|
50
|
|
|
|
1541
|
next unless defined $n; |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# Shut up potentially fatal warnings about UTF-16 surrogate |
50
|
|
|
|
|
|
|
# characters when running under perl -w |
51
|
|
|
|
|
|
|
# This is per https://rt.cpan.org/Ticket/Display.html?id=97456 |
52
|
11
|
|
|
11
|
|
65
|
no warnings 'utf8'; |
|
11
|
|
|
|
|
23
|
|
|
11
|
|
|
|
|
8880
|
|
53
|
|
|
|
|
|
|
|
54
|
684
|
100
|
|
|
|
3871
|
$n =~ s~([^\x00-\x7f])~${$Char[ord($1)>>8]||t($1)}[ord($1)&255]~egs; |
|
702
|
|
|
|
|
952
|
|
|
702
|
|
|
|
|
3526
|
|
55
|
|
|
|
|
|
|
} |
56
|
|
|
|
|
|
|
# That means: |
57
|
|
|
|
|
|
|
# Replace character 0xABCD with $Char[0xAB][0xCD], loading |
58
|
|
|
|
|
|
|
# the table 0xAB as needed. |
59
|
|
|
|
|
|
|
# |
60
|
|
|
|
|
|
|
#====================================================================== |
61
|
|
|
|
|
|
|
# |
62
|
|
|
|
|
|
|
# Yes, that's dense code. It's the warp core! |
63
|
|
|
|
|
|
|
# Here is an expansion into pseudocode... as best as I can manage it... |
64
|
|
|
|
|
|
|
# |
65
|
|
|
|
|
|
|
# $character = $1; |
66
|
|
|
|
|
|
|
# $charnum = ord($character); |
67
|
|
|
|
|
|
|
# $charnum_lowbits = $charnum & 255; |
68
|
|
|
|
|
|
|
# $charnum_highbits = $charnum >> 8; |
69
|
|
|
|
|
|
|
# |
70
|
|
|
|
|
|
|
# $table_ref = $Char->[$charnum_highbits]; |
71
|
|
|
|
|
|
|
# |
72
|
|
|
|
|
|
|
# if($table_ref) { |
73
|
|
|
|
|
|
|
# # As expected, we got the arrayref for this table. |
74
|
|
|
|
|
|
|
# } else { |
75
|
|
|
|
|
|
|
# # Uhoh, we couldn't find the arrayref for this table. |
76
|
|
|
|
|
|
|
# # So we call t($character). |
77
|
|
|
|
|
|
|
# # It loads a table. Namely, it does: |
78
|
|
|
|
|
|
|
# Load_Table_For( $charnum_highbits ); |
79
|
|
|
|
|
|
|
# # ...which does magic, and puts something in |
80
|
|
|
|
|
|
|
# # $Char->[$charnum_highbits], |
81
|
|
|
|
|
|
|
# # so NOW we actually CAN do: |
82
|
|
|
|
|
|
|
# $table_ref = $Char->[$charnum_highbits]; |
83
|
|
|
|
|
|
|
# } |
84
|
|
|
|
|
|
|
# |
85
|
|
|
|
|
|
|
# $for_this_char |
86
|
|
|
|
|
|
|
# = $table_ref->[ $charnum_lowbits ]; |
87
|
|
|
|
|
|
|
# |
88
|
|
|
|
|
|
|
# # Although the syntax we actually use is the odd |
89
|
|
|
|
|
|
|
# but COMPLETE EQUIVALENT to this syntax: |
90
|
|
|
|
|
|
|
# |
91
|
|
|
|
|
|
|
# $for_this_char |
92
|
|
|
|
|
|
|
# = ${ $table_ref }[ $charnum_lowbits ]; |
93
|
|
|
|
|
|
|
# |
94
|
|
|
|
|
|
|
# and $for_this_char is the replacement text for this |
95
|
|
|
|
|
|
|
# character, in: |
96
|
|
|
|
|
|
|
# $n =~ s~(char)~replacement~egs |
97
|
|
|
|
|
|
|
# |
98
|
|
|
|
|
|
|
# (And why did I use s~x~y~ instead of s/x/y/ ? |
99
|
|
|
|
|
|
|
# It's all the same for Perl: perldoc perlretut says: |
100
|
|
|
|
|
|
|
# As with the match "m//" operator, "s///" can |
101
|
|
|
|
|
|
|
# use other delimiters, such as "s!!!" and "s{}{}", |
102
|
|
|
|
|
|
|
# I didn't do it for sake of obscurity. I think it's just to |
103
|
|
|
|
|
|
|
# keep my editor's syntax highlighter from crashing, |
104
|
|
|
|
|
|
|
# which was a problem with s/// when the insides are as gory |
105
|
|
|
|
|
|
|
# as we have here. |
106
|
|
|
|
|
|
|
|
107
|
684
|
50
|
|
|
|
1788
|
return unless defined wantarray; # void context |
108
|
684
|
100
|
|
|
|
2253
|
return @_ if wantarray; # normal list context -- return the copies |
109
|
|
|
|
|
|
|
# Else normal scalar context: |
110
|
436
|
50
|
|
|
|
2973
|
return $_[0] if @_ == 1; |
111
|
0
|
|
|
|
|
0
|
return join '', @_; # rarer fallthru: a list in, but a scalar out. |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
#====================================================================== |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub make_placeholder_map { |
117
|
134
|
|
|
134
|
0
|
6678
|
return [( $UNKNOWN ) x $TABLE_SIZE ]; |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
sub make_placeholder_map_nulls { |
120
|
0
|
|
|
0
|
0
|
0
|
return [( "" ) x $TABLE_SIZE ]; |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
#====================================================================== |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub t { # "t" is for "t"able. |
126
|
|
|
|
|
|
|
# Load (and return) a char table for this character |
127
|
|
|
|
|
|
|
# this should get called only once per table per session. |
128
|
538
|
|
|
538
|
0
|
1081
|
my $bank = ord($_[0]) >> 8; |
129
|
538
|
50
|
|
|
|
1435
|
return $Char[$bank] if $Char[$bank]; |
130
|
|
|
|
|
|
|
|
131
|
538
|
|
|
|
|
1060
|
load_bank($bank); |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# Now see how that fared... |
134
|
|
|
|
|
|
|
|
135
|
538
|
50
|
50
|
|
|
2520
|
if(ref($Char[$bank] || '') ne 'ARRAY') { |
136
|
0
|
|
|
|
|
0
|
DEBUG > 1 and print |
137
|
|
|
|
|
|
|
" Loading failed for bank $bank (err $@). Using null map.\n"; |
138
|
0
|
|
|
|
|
0
|
return $Char[$bank] = $NULLMAP; |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
|
142
|
538
|
|
|
|
|
579
|
DEBUG > 1 and print " Loading succeeded.\n"; |
143
|
538
|
|
|
|
|
1082
|
my $cb = $Char[$bank]; |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# Sanity-check it: |
146
|
538
|
100
|
|
|
|
1476
|
if(@$cb == $TABLE_SIZE) { |
147
|
|
|
|
|
|
|
# As expected. Fallthru. |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
} else { |
150
|
100
|
50
|
|
|
|
336
|
if(@$cb > $TABLE_SIZE) { |
|
|
50
|
|
|
|
|
|
151
|
0
|
|
|
|
|
0
|
DEBUG and print "Bank $bank is too large-- it has ", scalar @$cb, |
152
|
|
|
|
|
|
|
" entries in it. Pruning.\n"; |
153
|
0
|
|
|
|
|
0
|
splice @$cb, $TABLE_SIZE; |
154
|
|
|
|
|
|
|
# That two-argument form splices everything off into nowhere, |
155
|
|
|
|
|
|
|
# starting with the first overage character. |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
} elsif( @$cb < $TABLE_SIZE) { |
158
|
100
|
|
|
|
|
121
|
DEBUG and print "Bank $bank is too small-- it has ", scalar @$cb, |
159
|
|
|
|
|
|
|
" entries in it. Now padding it.\n"; |
160
|
100
|
50
|
|
|
|
252
|
if(@$cb == 0) { |
161
|
0
|
|
|
|
|
0
|
DEBUG and print " (Yes, ZERO entries!)\n"; |
162
|
|
|
|
|
|
|
} |
163
|
100
|
|
|
|
|
560
|
push @$cb, |
164
|
|
|
|
|
|
|
( $UNKNOWN ) x ( $TABLE_SIZE - @$cb) |
165
|
|
|
|
|
|
|
# i.e., however many items, times the deficit |
166
|
|
|
|
|
|
|
; |
167
|
|
|
|
|
|
|
# And fallthru... |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
} else { |
170
|
0
|
|
|
|
|
0
|
die "UNREACHABLE CODE HERE (INSANE)"; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# Check for undefness in block: |
175
|
|
|
|
|
|
|
|
176
|
538
|
|
|
|
|
1472
|
for(my $i = 0; $i < $TABLE_SIZE; ++$i) { |
177
|
137728
|
50
|
|
|
|
393723
|
unless(defined $cb->[$i]) { |
178
|
0
|
|
|
|
|
0
|
DEBUG and printf "Undef at position %d in block x%02x\n", |
179
|
|
|
|
|
|
|
$i, $bank; |
180
|
0
|
|
|
|
|
0
|
$cb->[$i] = ''; |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
|
184
|
538
|
|
|
|
|
5022
|
return $Char[$bank]; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
#----------------------------------------------------------------------- |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
our $eval_loaded_okay; |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
sub load_bank { |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
# This is in its own sub, for sake of sweeping the scary thing |
194
|
|
|
|
|
|
|
# (namely, a call to eval) under the rug. |
195
|
|
|
|
|
|
|
# I.e., to paraphrase what Larry Wall once said to me: if |
196
|
|
|
|
|
|
|
# you're going to do something odd, maybe you should do it |
197
|
|
|
|
|
|
|
# in private. |
198
|
|
|
|
|
|
|
|
199
|
538
|
|
|
538
|
0
|
814
|
my($banknum) = @_; # just as an integer value |
200
|
|
|
|
|
|
|
|
201
|
538
|
|
|
|
|
611
|
DEBUG and printf |
202
|
|
|
|
|
|
|
"# Eval-loading %s::x%02x ...\n"; |
203
|
|
|
|
|
|
|
|
204
|
538
|
|
|
|
|
709
|
$eval_loaded_okay = 0; |
205
|
538
|
|
|
|
|
1932
|
my $code = |
206
|
|
|
|
|
|
|
sprintf( "require %s::x%02x; \$eval_loaded_okay = 1;\n", |
207
|
|
|
|
|
|
|
__PACKAGE__, |
208
|
|
|
|
|
|
|
$banknum); |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
{ |
211
|
538
|
|
|
|
|
708
|
local $SIG{'__DIE__'}; |
|
538
|
|
|
|
|
2156
|
|
212
|
538
|
|
|
|
|
44406
|
eval($code); |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
538
|
50
|
|
|
|
2830
|
return 1 if $eval_loaded_okay; |
216
|
0
|
|
|
|
|
|
return 0; |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
#====================================================================== |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
1; |
222
|
|
|
|
|
|
|
__END__ |