line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
;;;;# -*-coding:utf-8;-*- µ ← col73 |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
require 5; |
4
|
11
|
|
|
11
|
|
89424
|
use 5.8.0; |
|
11
|
|
|
|
|
74
|
|
5
|
|
|
|
|
|
|
package Text::Unidecode; |
6
|
|
|
|
|
|
|
$Last_Modified =' Time-stamp: "2016-11-26 05:01:56 MST"'; |
7
|
11
|
|
|
11
|
|
2723
|
use utf8; |
|
11
|
|
|
|
|
83
|
|
|
11
|
|
|
|
|
64
|
|
8
|
11
|
|
|
11
|
|
402
|
use strict; |
|
11
|
|
|
|
|
28
|
|
|
11
|
|
|
|
|
372
|
|
9
|
11
|
|
|
11
|
|
8654
|
use integer; # vroom vroom! |
|
11
|
|
|
|
|
130
|
|
|
11
|
|
|
|
|
63
|
|
10
|
11
|
|
|
|
|
2610
|
use vars qw($VERSION @ISA @EXPORT @Char $UNKNOWN $NULLMAP $TABLE_SIZE $Last_Modified |
11
|
|
|
|
|
|
|
$Note_Broken_Tables %Broken_Table_Size %Broken_Table_Copy |
12
|
11
|
|
|
11
|
|
636
|
); |
|
11
|
|
|
|
|
47
|
|
13
|
|
|
|
|
|
|
$VERSION = '1.30'; |
14
|
|
|
|
|
|
|
require Exporter; |
15
|
|
|
|
|
|
|
@ISA = ('Exporter'); |
16
|
|
|
|
|
|
|
@EXPORT = ('unidecode'); |
17
|
|
|
|
|
|
|
$Note_Broken_Tables = 0; |
18
|
11
|
50
|
|
11
|
|
2574
|
BEGIN { *DEBUG = sub () {0} unless defined &DEBUG } |
19
|
|
|
|
|
|
|
$UNKNOWN = '[?] '; |
20
|
|
|
|
|
|
|
$TABLE_SIZE = 256; |
21
|
|
|
|
|
|
|
$NULLMAP = [( $UNKNOWN ) x $TABLE_SIZE]; # for blocks we can't load |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
24
|
|
|
|
|
|
|
{ |
25
|
|
|
|
|
|
|
my $x = join '', "\x00" .. "\x7F"; |
26
|
|
|
|
|
|
|
die "the 7-bit purity test fails!" unless $x eq unidecode($x); |
27
|
|
|
|
|
|
|
} |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
#-------------------------------------------------------------------------- |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub unidecode { |
32
|
|
|
|
|
|
|
# Destructive in void context -- in other contexts, nondestructive. |
33
|
|
|
|
|
|
|
|
34
|
685
|
50
|
|
685
|
1
|
30287
|
unless(@_) { # Sanity: Nothing coming in! |
35
|
0
|
0
|
|
|
|
0
|
return() if wantarray; |
36
|
0
|
|
|
|
|
0
|
return ''; |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
685
|
50
|
|
|
|
1378
|
if( defined wantarray ) { |
40
|
|
|
|
|
|
|
# We're in list or scalar context (i.e., just not void context.) |
41
|
|
|
|
|
|
|
# So make @_'s items no longer be aliases. |
42
|
685
|
|
|
|
|
2756
|
@_ = map $_, @_; |
43
|
|
|
|
|
|
|
} else { |
44
|
|
|
|
|
|
|
# Otherwise (if we're in void context), then just let @_ stay |
45
|
|
|
|
|
|
|
# aliases, and alter their elements IN-PLACE! |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
|
48
|
685
|
|
|
|
|
1469
|
foreach my $n (@_) { |
49
|
685
|
50
|
|
|
|
1485
|
next unless defined $n; |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# Shut up potentially fatal warnings about UTF-16 surrogate |
52
|
|
|
|
|
|
|
# characters when running under perl -w |
53
|
|
|
|
|
|
|
# This is per https://rt.cpan.org/Ticket/Display.html?id=97456 |
54
|
11
|
|
|
11
|
|
72
|
no warnings 'utf8'; |
|
11
|
|
|
|
|
30
|
|
|
11
|
|
|
|
|
10298
|
|
55
|
|
|
|
|
|
|
|
56
|
685
|
100
|
|
|
|
4254
|
$n =~ s~([^\x00-\x7f])~${$Char[ord($1)>>8]||t($1)}[ord($1)&255]~egs; |
|
702
|
|
|
|
|
808
|
|
|
702
|
|
|
|
|
3437
|
|
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
# That means: |
59
|
|
|
|
|
|
|
# Replace character 0xABCD with $Char[0xAB][0xCD], loading |
60
|
|
|
|
|
|
|
# the table 0xAB as needed. |
61
|
|
|
|
|
|
|
# |
62
|
|
|
|
|
|
|
#====================================================================== |
63
|
|
|
|
|
|
|
# |
64
|
|
|
|
|
|
|
# Yes, that's dense code. It's the warp core! |
65
|
|
|
|
|
|
|
# Here is an expansion into pseudocode... as best as I can manage it... |
66
|
|
|
|
|
|
|
# |
67
|
|
|
|
|
|
|
# $character = $1; |
68
|
|
|
|
|
|
|
# $charnum = ord($character); |
69
|
|
|
|
|
|
|
# $charnum_lowbits = $charnum & 255; |
70
|
|
|
|
|
|
|
# $charnum_highbits = $charnum >> 8; |
71
|
|
|
|
|
|
|
# |
72
|
|
|
|
|
|
|
# $table_ref = $Char->[$charnum_highbits]; |
73
|
|
|
|
|
|
|
# |
74
|
|
|
|
|
|
|
# if($table_ref) { |
75
|
|
|
|
|
|
|
# # As expected, we got the arrayref for this table. |
76
|
|
|
|
|
|
|
# } else { |
77
|
|
|
|
|
|
|
# # Uhoh, we couldn't find the arrayref for this table. |
78
|
|
|
|
|
|
|
# # So we call t($character). |
79
|
|
|
|
|
|
|
# # It loads a table. Namely, it does: |
80
|
|
|
|
|
|
|
# Load_Table_For( $charnum_highbits ); |
81
|
|
|
|
|
|
|
# # ...which does magic, and puts something in |
82
|
|
|
|
|
|
|
# # $Char->[$charnum_highbits], |
83
|
|
|
|
|
|
|
# # so NOW we actually CAN do: |
84
|
|
|
|
|
|
|
# $table_ref = $Char->[$charnum_highbits]; |
85
|
|
|
|
|
|
|
# } |
86
|
|
|
|
|
|
|
# |
87
|
|
|
|
|
|
|
# $for_this_char |
88
|
|
|
|
|
|
|
# = $table_ref->[ $charnum_lowbits ]; |
89
|
|
|
|
|
|
|
# |
90
|
|
|
|
|
|
|
# # Although the syntax we actually use is the odd |
91
|
|
|
|
|
|
|
# but COMPLETE EQUIVALENT to this syntax: |
92
|
|
|
|
|
|
|
# |
93
|
|
|
|
|
|
|
# $for_this_char |
94
|
|
|
|
|
|
|
# = ${ $table_ref }[ $charnum_lowbits ]; |
95
|
|
|
|
|
|
|
# |
96
|
|
|
|
|
|
|
# and $for_this_char is the replacement text for this |
97
|
|
|
|
|
|
|
# character, in: |
98
|
|
|
|
|
|
|
# $n =~ s~(char)~replacement~egs |
99
|
|
|
|
|
|
|
# |
100
|
|
|
|
|
|
|
# (And why did I use s~x~y~ instead of s/x/y/ ? |
101
|
|
|
|
|
|
|
# It's all the same for Perl: perldoc perlretut says: |
102
|
|
|
|
|
|
|
# As with the match "m//" operator, "s///" can |
103
|
|
|
|
|
|
|
# use other delimiters, such as "s!!!" and "s{}{}", |
104
|
|
|
|
|
|
|
# I didn't do it for sake of obscurity. I think it's just to |
105
|
|
|
|
|
|
|
# keep my editor's syntax highlighter from crashing, |
106
|
|
|
|
|
|
|
# which was a problem with s/// when the insides are as gory |
107
|
|
|
|
|
|
|
# as we have here. |
108
|
|
|
|
|
|
|
|
109
|
685
|
50
|
|
|
|
1674
|
return unless defined wantarray; # void context |
110
|
685
|
100
|
|
|
|
2146
|
return @_ if wantarray; # normal list context -- return the copies |
111
|
|
|
|
|
|
|
# Else normal scalar context: |
112
|
437
|
50
|
|
|
|
3415
|
return $_[0] if @_ == 1; |
113
|
0
|
|
|
|
|
0
|
return join '', @_; # rarer fallthru: a list in, but a scalar out. |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
#====================================================================== |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
sub make_placeholder_map { |
119
|
134
|
|
|
134
|
0
|
7892
|
return [( $UNKNOWN ) x $TABLE_SIZE ]; |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
sub make_placeholder_map_nulls { |
122
|
0
|
|
|
0
|
0
|
0
|
return [( "" ) x $TABLE_SIZE ]; |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
#====================================================================== |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
sub t { # "t" is for "t"able. |
128
|
|
|
|
|
|
|
# Load (and return) a char table for this character |
129
|
|
|
|
|
|
|
# this should get called only once per table per session. |
130
|
538
|
|
|
538
|
0
|
887
|
my $bank = ord($_[0]) >> 8; |
131
|
538
|
50
|
|
|
|
1253
|
return $Char[$bank] if $Char[$bank]; |
132
|
|
|
|
|
|
|
|
133
|
538
|
|
|
|
|
841
|
load_bank($bank); |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# Now see how that fared... |
136
|
|
|
|
|
|
|
|
137
|
538
|
50
|
50
|
|
|
2510
|
if(ref($Char[$bank] || '') ne 'ARRAY') { |
138
|
0
|
|
|
|
|
0
|
DEBUG > 1 and print |
139
|
|
|
|
|
|
|
" Loading failed for bank $bank (err $@). Using null map.\n"; |
140
|
0
|
|
|
|
|
0
|
return $Char[$bank] = $NULLMAP; |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
|
144
|
538
|
|
|
|
|
528
|
DEBUG > 1 and print " Loading succeeded.\n"; |
145
|
538
|
|
|
|
|
782
|
my $cb = $Char[$bank]; |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# Sanity-check it: |
148
|
538
|
50
|
|
|
|
1599
|
if(@$cb == $TABLE_SIZE) { |
149
|
|
|
|
|
|
|
# As expected. Fallthru. |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
} else { |
152
|
0
|
0
|
|
|
|
0
|
if($Note_Broken_Tables) { |
153
|
0
|
|
|
|
|
0
|
$Broken_Table_Size{$bank} = scalar @$cb; |
154
|
0
|
|
|
|
|
0
|
$Broken_Table_Copy{$bank} = [ @$cb ]; |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
0
|
0
|
|
|
|
0
|
if(@$cb > $TABLE_SIZE) { |
|
|
0
|
|
|
|
|
|
158
|
0
|
|
|
|
|
0
|
DEBUG and print "Bank $bank is too large-- it has ", scalar @$cb, |
159
|
|
|
|
|
|
|
" entries in it. Pruning.\n"; |
160
|
0
|
|
|
|
|
0
|
splice @$cb, $TABLE_SIZE; |
161
|
|
|
|
|
|
|
# That two-argument form splices everything off into nowhere, |
162
|
|
|
|
|
|
|
# starting with the first overage character. |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
} elsif( @$cb < $TABLE_SIZE) { |
165
|
0
|
|
|
|
|
0
|
DEBUG and print "Bank $bank is too small-- it has ", scalar @$cb, |
166
|
|
|
|
|
|
|
" entries in it. Now padding it.\n"; |
167
|
0
|
0
|
|
|
|
0
|
if(@$cb == 0) { |
168
|
0
|
|
|
|
|
0
|
DEBUG and print " (Yes, ZERO entries!)\n"; |
169
|
|
|
|
|
|
|
} |
170
|
0
|
|
|
|
|
0
|
push @$cb, |
171
|
|
|
|
|
|
|
( $UNKNOWN ) x ( $TABLE_SIZE - @$cb) |
172
|
|
|
|
|
|
|
# i.e., however many items, times the deficit |
173
|
|
|
|
|
|
|
; |
174
|
|
|
|
|
|
|
# And fallthru... |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
} else { |
177
|
0
|
|
|
|
|
0
|
die "UNREACHABLE CODE HERE (INSANE)"; |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
# Check for undefness in block: |
182
|
|
|
|
|
|
|
|
183
|
538
|
|
|
|
|
1404
|
for(my $i = 0; $i < $TABLE_SIZE; ++$i) { |
184
|
137728
|
50
|
|
|
|
314842
|
unless(defined $cb->[$i]) { |
185
|
0
|
|
|
|
|
0
|
DEBUG and printf "Undef at position %d in block x%02x\n", |
186
|
|
|
|
|
|
|
$i, $bank; |
187
|
0
|
|
|
|
|
0
|
$cb->[$i] = ''; |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
|
191
|
538
|
|
|
|
|
5765
|
return $Char[$bank]; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
#----------------------------------------------------------------------- |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
our $eval_loaded_okay; |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
sub load_bank { |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
# This is in its own sub, for sake of sweeping the scary thing |
201
|
|
|
|
|
|
|
# (namely, a call to eval) under the rug. |
202
|
|
|
|
|
|
|
# I.e., to paraphrase what Larry Wall once said to me: if |
203
|
|
|
|
|
|
|
# you're going to do something odd, maybe you should do it |
204
|
|
|
|
|
|
|
# in private. |
205
|
|
|
|
|
|
|
|
206
|
538
|
|
|
538
|
0
|
836
|
my($banknum) = @_; # just as an integer value |
207
|
|
|
|
|
|
|
|
208
|
538
|
|
|
|
|
432
|
DEBUG and printf |
209
|
|
|
|
|
|
|
"# Eval-loading %s::x%02x ...\n"; |
210
|
|
|
|
|
|
|
|
211
|
538
|
|
|
|
|
574
|
$eval_loaded_okay = 0; |
212
|
538
|
|
|
|
|
1961
|
my $code = |
213
|
|
|
|
|
|
|
sprintf( "require %s::x%02x; \$eval_loaded_okay = 1;\n", |
214
|
|
|
|
|
|
|
__PACKAGE__, |
215
|
|
|
|
|
|
|
$banknum); |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
{ |
218
|
538
|
|
|
|
|
549
|
local $SIG{'__DIE__'}; |
|
538
|
|
|
|
|
2082
|
|
219
|
538
|
|
|
|
|
47875
|
eval($code); |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
538
|
50
|
|
|
|
2435
|
return 1 if $eval_loaded_okay; |
223
|
0
|
|
|
|
|
|
return 0; |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
#====================================================================== |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
1; |
229
|
|
|
|
|
|
|
__END__ |