line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
#define PERL_NO_GET_CONTEXT /* we want efficiency */ |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
/* I guese no private function needs pTHX_ and aTHX_ */ |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
#include "EXTERN.h" |
7
|
|
|
|
|
|
|
#include "perl.h" |
8
|
|
|
|
|
|
|
#include "XSUB.h" |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
/* This file is prepared by mkheader */ |
11
|
|
|
|
|
|
|
#include "ucatbl.h" |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
/* At present, char > 0x10ffff are unaffected without complaint, right? */ |
14
|
|
|
|
|
|
|
#define VALID_UTF_MAX (0x10ffff) |
15
|
|
|
|
|
|
|
#define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv)) |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
#define MAX_DIV_16 (UV_MAX / 16) |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
/* Supported Levels */ |
20
|
|
|
|
|
|
|
#define MinLevel (1) |
21
|
|
|
|
|
|
|
#define MaxLevel (4) |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
/* Shifted weight at 4th level */ |
24
|
|
|
|
|
|
|
#define Shift4Wt (0xFFFF) |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
#define VCE_Length (9) |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
#define Hangul_SBase (0xAC00) |
29
|
|
|
|
|
|
|
#define Hangul_SIni (0xAC00) |
30
|
|
|
|
|
|
|
#define Hangul_SFin (0xD7A3) |
31
|
|
|
|
|
|
|
#define Hangul_NCount (588) |
32
|
|
|
|
|
|
|
#define Hangul_TCount (28) |
33
|
|
|
|
|
|
|
#define Hangul_LBase (0x1100) |
34
|
|
|
|
|
|
|
#define Hangul_LIni (0x1100) |
35
|
|
|
|
|
|
|
#define Hangul_LFin (0x1159) |
36
|
|
|
|
|
|
|
#define Hangul_LFill (0x115F) |
37
|
|
|
|
|
|
|
#define Hangul_LEnd (0x115F) /* Unicode 5.2 */ |
38
|
|
|
|
|
|
|
#define Hangul_VBase (0x1161) |
39
|
|
|
|
|
|
|
#define Hangul_VIni (0x1160) /* from Vowel Filler */ |
40
|
|
|
|
|
|
|
#define Hangul_VFin (0x11A2) |
41
|
|
|
|
|
|
|
#define Hangul_VEnd (0x11A7) /* Unicode 5.2 */ |
42
|
|
|
|
|
|
|
#define Hangul_TBase (0x11A7) /* from "no-final" codepoint */ |
43
|
|
|
|
|
|
|
#define Hangul_TIni (0x11A8) |
44
|
|
|
|
|
|
|
#define Hangul_TFin (0x11F9) |
45
|
|
|
|
|
|
|
#define Hangul_TEnd (0x11FF) /* Unicode 5.2 */ |
46
|
|
|
|
|
|
|
#define HangulL2Ini (0xA960) /* Unicode 5.2 */ |
47
|
|
|
|
|
|
|
#define HangulL2Fin (0xA97C) /* Unicode 5.2 */ |
48
|
|
|
|
|
|
|
#define HangulV2Ini (0xD7B0) /* Unicode 5.2 */ |
49
|
|
|
|
|
|
|
#define HangulV2Fin (0xD7C6) /* Unicode 5.2 */ |
50
|
|
|
|
|
|
|
#define HangulT2Ini (0xD7CB) /* Unicode 5.2 */ |
51
|
|
|
|
|
|
|
#define HangulT2Fin (0xD7FB) /* Unicode 5.2 */ |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
#define CJK_UidIni (0x4E00) |
54
|
|
|
|
|
|
|
#define CJK_UidFin (0x9FA5) |
55
|
|
|
|
|
|
|
#define CJK_UidF41 (0x9FBB) /* Unicode 4.1 */ |
56
|
|
|
|
|
|
|
#define CJK_UidF51 (0x9FC3) /* Unicode 5.1 */ |
57
|
|
|
|
|
|
|
#define CJK_UidF52 (0x9FCB) /* Unicode 5.2 */ |
58
|
|
|
|
|
|
|
#define CJK_UidF61 (0x9FCC) /* Unicode 6.1 */ |
59
|
|
|
|
|
|
|
#define CJK_UidF80 (0x9FD5) /* Unicode 8.0 */ |
60
|
|
|
|
|
|
|
#define CJK_UidF100 (0x9FEA) /* Unicode 10.0 */ |
61
|
|
|
|
|
|
|
#define CJK_UidF110 (0x9FEF) /* Unicode 11.0 */ |
62
|
|
|
|
|
|
|
#define CJK_UidF130 (0x9FFC) /* Unicode 13.0 */ |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
#define CJK_ExtAIni (0x3400) /* Unicode 3.0 */ |
65
|
|
|
|
|
|
|
#define CJK_ExtAFin (0x4DB5) /* Unicode 3.0 */ |
66
|
|
|
|
|
|
|
#define CJK_ExtA130 (0x4DBF) /* Unicode 13.0 */ |
67
|
|
|
|
|
|
|
#define CJK_ExtBIni (0x20000) /* Unicode 3.1 */ |
68
|
|
|
|
|
|
|
#define CJK_ExtBFin (0x2A6D6) /* Unicode 3.1 */ |
69
|
|
|
|
|
|
|
#define CJK_ExtB130 (0x2A6DD) /* Unicode 13.0 */ |
70
|
|
|
|
|
|
|
#define CJK_ExtCIni (0x2A700) /* Unicode 5.2 */ |
71
|
|
|
|
|
|
|
#define CJK_ExtCFin (0x2B734) /* Unicode 5.2 */ |
72
|
|
|
|
|
|
|
#define CJK_ExtDIni (0x2B740) /* Unicode 6.0 */ |
73
|
|
|
|
|
|
|
#define CJK_ExtDFin (0x2B81D) /* Unicode 6.0 */ |
74
|
|
|
|
|
|
|
#define CJK_ExtEIni (0x2B820) /* Unicode 8.0 */ |
75
|
|
|
|
|
|
|
#define CJK_ExtEFin (0x2CEA1) /* Unicode 8.0 */ |
76
|
|
|
|
|
|
|
#define CJK_ExtFIni (0x2CEB0) /* Unicode 10.0 */ |
77
|
|
|
|
|
|
|
#define CJK_ExtFFin (0x2EBE0) /* Unicode 10.0 */ |
78
|
|
|
|
|
|
|
#define CJK_ExtGIni (0x30000) /* Unicode 13.0 */ |
79
|
|
|
|
|
|
|
#define CJK_ExtGFin (0x3134A) /* Unicode 13.0 */ |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
#define CJK_CompIni (0xFA0E) |
82
|
|
|
|
|
|
|
#define CJK_CompFin (0xFA29) |
83
|
|
|
|
|
|
|
static const STDCHAR UnifiedCompat[] = { |
84
|
|
|
|
|
|
|
1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1 |
85
|
|
|
|
|
|
|
}; /* E F 0 1 2 3 4 5 6 7 8 9 A B C D E F 0 1 2 3 4 5 6 7 8 9 */ |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
#define TangIdeoIni (0x17000) /* Unicode 9.0 */ |
88
|
|
|
|
|
|
|
#define TangIdeoFin (0x187EC) /* Unicode 9.0 */ |
89
|
|
|
|
|
|
|
#define TangIdeo110 (0x187F1) /* Unicode 11.0 */ |
90
|
|
|
|
|
|
|
#define TangIdeo120 (0x187F7) /* Unicode 12.0 */ |
91
|
|
|
|
|
|
|
#define TangCompIni (0x18800) /* Unicode 9.0 */ |
92
|
|
|
|
|
|
|
#define TangCompFin (0x18AF2) /* Unicode 9.0 */ |
93
|
|
|
|
|
|
|
#define TangComp130 (0x18AFF) /* Unicode 13.0 */ |
94
|
|
|
|
|
|
|
#define TangSuppIni (0x18D00) /* Unicode 13.0 */ |
95
|
|
|
|
|
|
|
#define TangSuppFin (0x18D08) /* Unicode 13.0 */ |
96
|
|
|
|
|
|
|
#define NushuIni (0x1B170) /* Unicode 10.0 */ |
97
|
|
|
|
|
|
|
#define NushuFin (0x1B2FB) /* Unicode 10.0 */ |
98
|
|
|
|
|
|
|
#define KhitanIni (0x18B00) /* Unicode 13.0 */ |
99
|
|
|
|
|
|
|
#define KhitanFin (0x18CD5) /* Unicode 13.0 */ |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
#define codeRange(bcode, ecode) ((bcode) <= code && code <= (ecode)) |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
MODULE = Unicode::Collate PACKAGE = Unicode::Collate |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
PROTOTYPES: DISABLE |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
void |
108
|
|
|
|
|
|
|
_fetch_rest () |
109
|
|
|
|
|
|
|
PREINIT: |
110
|
|
|
|
|
|
|
char ** rest; |
111
|
|
|
|
|
|
|
PPCODE: |
112
|
120015
|
100
|
|
|
|
|
for (rest = (char **)UCA_rest; *rest; ++rest) { |
113
|
119888
|
50
|
|
|
|
|
XPUSHs(sv_2mortal(newSVpv((char *) *rest, 0))); |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
void |
118
|
|
|
|
|
|
|
_fetch_simple (uv) |
119
|
|
|
|
|
|
|
UV uv |
120
|
|
|
|
|
|
|
PREINIT: |
121
|
|
|
|
|
|
|
U8 ***plane, **row; |
122
|
16798
|
|
|
|
|
|
U8* result = NULL; |
123
|
|
|
|
|
|
|
PPCODE: |
124
|
16798
|
50
|
|
|
|
|
if (!OVER_UTF_MAX(uv)){ |
125
|
16798
|
|
|
|
|
|
plane = (U8***)UCA_simple[uv >> 16]; |
126
|
16798
|
50
|
|
|
|
|
if (plane) { |
127
|
16798
|
|
|
|
|
|
row = plane[(uv >> 8) & 0xff]; |
128
|
16798
|
50
|
|
|
|
|
result = row ? row[uv & 0xff] : NULL; |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
} |
131
|
16798
|
50
|
|
|
|
|
if (result) { |
132
|
|
|
|
|
|
|
int i; |
133
|
16798
|
|
|
|
|
|
int num = (int)*result; |
134
|
16798
|
|
|
|
|
|
++result; |
135
|
16798
|
50
|
|
|
|
|
EXTEND(SP, num); |
|
|
50
|
|
|
|
|
|
136
|
38682
|
100
|
|
|
|
|
for (i = 0; i < num; ++i) { |
137
|
21884
|
|
|
|
|
|
PUSHs(sv_2mortal(newSVpvn((char *) result, VCE_Length))); |
138
|
21884
|
|
|
|
|
|
result += VCE_Length; |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
} else { |
141
|
0
|
|
|
|
|
|
PUSHs(sv_2mortal(newSViv(0))); |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
SV* |
145
|
|
|
|
|
|
|
_ignorable_simple (uv) |
146
|
|
|
|
|
|
|
UV uv |
147
|
|
|
|
|
|
|
ALIAS: |
148
|
|
|
|
|
|
|
_exists_simple = 1 |
149
|
|
|
|
|
|
|
PREINIT: |
150
|
|
|
|
|
|
|
U8 ***plane, **row; |
151
|
58109
|
|
|
|
|
|
int num = -1; |
152
|
58109
|
|
|
|
|
|
U8* result = NULL; |
153
|
|
|
|
|
|
|
CODE: |
154
|
58109
|
50
|
|
|
|
|
if (!OVER_UTF_MAX(uv)){ |
155
|
58109
|
|
|
|
|
|
plane = (U8***)UCA_simple[uv >> 16]; |
156
|
58109
|
100
|
|
|
|
|
if (plane) { |
157
|
58105
|
|
|
|
|
|
row = plane[(uv >> 8) & 0xff]; |
158
|
58105
|
100
|
|
|
|
|
result = row ? row[uv & 0xff] : NULL; |
159
|
|
|
|
|
|
|
} |
160
|
58109
|
100
|
|
|
|
|
if (result) |
161
|
33570
|
|
|
|
|
|
num = (int)*result; /* assuming 0 <= num < 128 */ |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
58109
|
100
|
|
|
|
|
if (ix) |
165
|
29006
|
100
|
|
|
|
|
RETVAL = boolSV(num >0); |
166
|
|
|
|
|
|
|
else |
167
|
29103
|
100
|
|
|
|
|
RETVAL = boolSV(num==0); |
168
|
|
|
|
|
|
|
OUTPUT: |
169
|
|
|
|
|
|
|
RETVAL |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
void |
173
|
|
|
|
|
|
|
_getHexArray (src) |
174
|
|
|
|
|
|
|
SV* src |
175
|
|
|
|
|
|
|
PREINIT: |
176
|
|
|
|
|
|
|
char *s, *e; |
177
|
|
|
|
|
|
|
STRLEN byte; |
178
|
|
|
|
|
|
|
UV value; |
179
|
574242
|
|
|
|
|
|
bool overflowed = FALSE; |
180
|
|
|
|
|
|
|
const char *hexdigit; |
181
|
|
|
|
|
|
|
PPCODE: |
182
|
574242
|
50
|
|
|
|
|
s = SvPV(src,byte); |
183
|
2556048
|
100
|
|
|
|
|
for (e = s + byte; s < e;) { |
184
|
1981806
|
|
|
|
|
|
hexdigit = strchr((char *) PL_hexdigit, *s++); |
185
|
1981806
|
100
|
|
|
|
|
if (! hexdigit) |
186
|
514116
|
|
|
|
|
|
continue; |
187
|
1467690
|
|
|
|
|
|
value = (hexdigit - PL_hexdigit) & 0xF; |
188
|
5894710
|
100
|
|
|
|
|
while (*s) { |
189
|
5539079
|
|
|
|
|
|
hexdigit = strchr((char *) PL_hexdigit, *s++); |
190
|
5539079
|
100
|
|
|
|
|
if (! hexdigit) |
191
|
1112059
|
|
|
|
|
|
break; |
192
|
4427020
|
50
|
|
|
|
|
if (overflowed) |
193
|
0
|
|
|
|
|
|
continue; |
194
|
4427020
|
50
|
|
|
|
|
if (value > MAX_DIV_16) { |
195
|
0
|
|
|
|
|
|
overflowed = TRUE; |
196
|
0
|
|
|
|
|
|
continue; |
197
|
|
|
|
|
|
|
} |
198
|
4427020
|
|
|
|
|
|
value = (value << 4) | ((hexdigit - PL_hexdigit) & 0xF); |
199
|
|
|
|
|
|
|
} |
200
|
1467690
|
50
|
|
|
|
|
XPUSHs(sv_2mortal(newSVuv(overflowed ? UV_MAX : value))); |
|
|
50
|
|
|
|
|
|
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
SV* |
205
|
|
|
|
|
|
|
_isIllegal (sv) |
206
|
|
|
|
|
|
|
SV* sv |
207
|
|
|
|
|
|
|
PREINIT: |
208
|
|
|
|
|
|
|
UV uv; |
209
|
|
|
|
|
|
|
CODE: |
210
|
4888
|
50
|
|
|
|
|
if (!sv || !SvIOK(sv)) |
|
|
50
|
|
|
|
|
|
211
|
0
|
|
|
|
|
|
XSRETURN_YES; |
212
|
4888
|
|
|
|
|
|
uv = SvUVX(sv); |
213
|
4888
|
100
|
|
|
|
|
RETVAL = boolSV( |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
214
|
|
|
|
|
|
|
0x10FFFF < uv /* out of range */ |
215
|
|
|
|
|
|
|
|| ((uv & 0xFFFE) == 0xFFFE) /* ??FFF[EF] */ |
216
|
|
|
|
|
|
|
|| (0xD800 <= uv && uv <= 0xDFFF) /* unpaired surrogates */ |
217
|
|
|
|
|
|
|
|| (0xFDD0 <= uv && uv <= 0xFDEF) /* other non-characters */ |
218
|
|
|
|
|
|
|
); |
219
|
|
|
|
|
|
|
OUTPUT: |
220
|
|
|
|
|
|
|
RETVAL |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
void |
224
|
|
|
|
|
|
|
_decompHangul (code) |
225
|
|
|
|
|
|
|
UV code |
226
|
|
|
|
|
|
|
PREINIT: |
227
|
|
|
|
|
|
|
UV sindex, lindex, vindex, tindex; |
228
|
|
|
|
|
|
|
PPCODE: |
229
|
|
|
|
|
|
|
/* code *must* be in Hangul syllable. |
230
|
|
|
|
|
|
|
* Check it before you enter here. */ |
231
|
105
|
|
|
|
|
|
sindex = code - Hangul_SBase; |
232
|
105
|
|
|
|
|
|
lindex = sindex / Hangul_NCount; |
233
|
105
|
|
|
|
|
|
vindex = (sindex % Hangul_NCount) / Hangul_TCount; |
234
|
105
|
|
|
|
|
|
tindex = sindex % Hangul_TCount; |
235
|
|
|
|
|
|
|
|
236
|
105
|
100
|
|
|
|
|
EXTEND(SP, tindex ? 3 : 2); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
237
|
105
|
|
|
|
|
|
PUSHs(sv_2mortal(newSVuv(lindex + Hangul_LBase))); |
238
|
105
|
|
|
|
|
|
PUSHs(sv_2mortal(newSVuv(vindex + Hangul_VBase))); |
239
|
105
|
100
|
|
|
|
|
if (tindex) |
240
|
60
|
|
|
|
|
|
PUSHs(sv_2mortal(newSVuv(tindex + Hangul_TBase))); |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
SV* |
244
|
|
|
|
|
|
|
getHST (code, uca_vers = 0) |
245
|
|
|
|
|
|
|
UV code; |
246
|
|
|
|
|
|
|
IV uca_vers; |
247
|
|
|
|
|
|
|
PREINIT: |
248
|
|
|
|
|
|
|
const char * hangtype; |
249
|
|
|
|
|
|
|
STRLEN typelen; |
250
|
|
|
|
|
|
|
CODE: |
251
|
1198
|
100
|
|
|
|
|
if (codeRange(Hangul_SIni, Hangul_SFin)) { |
|
|
100
|
|
|
|
|
|
252
|
222
|
100
|
|
|
|
|
if ((code - Hangul_SBase) % Hangul_TCount) { |
253
|
66
|
|
|
|
|
|
hangtype = "LVT"; typelen = 3; |
254
|
|
|
|
|
|
|
} else { |
255
|
45
|
|
|
|
|
|
hangtype = "LV"; typelen = 2; |
256
|
|
|
|
|
|
|
} |
257
|
1087
|
100
|
|
|
|
|
} else if (uca_vers < 20) { |
258
|
270
|
100
|
|
|
|
|
if (codeRange(Hangul_LIni, Hangul_LFin) || code == Hangul_LFill) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
259
|
24
|
|
|
|
|
|
hangtype = "L"; typelen = 1; |
260
|
246
|
100
|
|
|
|
|
} else if (codeRange(Hangul_VIni, Hangul_VFin)) { |
|
|
100
|
|
|
|
|
|
261
|
24
|
|
|
|
|
|
hangtype = "V"; typelen = 1; |
262
|
222
|
100
|
|
|
|
|
} else if (codeRange(Hangul_TIni, Hangul_TFin)) { |
|
|
100
|
|
|
|
|
|
263
|
24
|
|
|
|
|
|
hangtype = "T"; typelen = 1; |
264
|
|
|
|
|
|
|
} else { |
265
|
270
|
|
|
|
|
|
hangtype = ""; typelen = 0; |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
} else { |
268
|
817
|
100
|
|
|
|
|
if (codeRange(Hangul_LIni, Hangul_LEnd) || |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
269
|
286
|
100
|
|
|
|
|
codeRange(HangulL2Ini, HangulL2Fin)) { |
270
|
192
|
|
|
|
|
|
hangtype = "L"; typelen = 1; |
271
|
625
|
100
|
|
|
|
|
} else if (codeRange(Hangul_VIni, Hangul_VEnd) || |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
272
|
195
|
100
|
|
|
|
|
codeRange(HangulV2Ini, HangulV2Fin)) { |
273
|
197
|
|
|
|
|
|
hangtype = "V"; typelen = 1; |
274
|
428
|
100
|
|
|
|
|
} else if (codeRange(Hangul_TIni, Hangul_TEnd) || |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
275
|
130
|
100
|
|
|
|
|
codeRange(HangulT2Ini, HangulT2Fin)) { |
276
|
151
|
|
|
|
|
|
hangtype = "T"; typelen = 1; |
277
|
|
|
|
|
|
|
} else { |
278
|
277
|
|
|
|
|
|
hangtype = ""; typelen = 0; |
279
|
|
|
|
|
|
|
} |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
|
282
|
1198
|
|
|
|
|
|
RETVAL = newSVpvn(hangtype, typelen); |
283
|
|
|
|
|
|
|
OUTPUT: |
284
|
|
|
|
|
|
|
RETVAL |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
void |
288
|
|
|
|
|
|
|
_derivCE_9 (code) |
289
|
|
|
|
|
|
|
UV code |
290
|
|
|
|
|
|
|
ALIAS: |
291
|
|
|
|
|
|
|
_derivCE_14 = 1 |
292
|
|
|
|
|
|
|
_derivCE_18 = 2 |
293
|
|
|
|
|
|
|
_derivCE_20 = 3 |
294
|
|
|
|
|
|
|
_derivCE_22 = 4 |
295
|
|
|
|
|
|
|
_derivCE_24 = 5 |
296
|
|
|
|
|
|
|
_derivCE_32 = 6 |
297
|
|
|
|
|
|
|
_derivCE_34 = 7 |
298
|
|
|
|
|
|
|
_derivCE_36 = 8 |
299
|
|
|
|
|
|
|
_derivCE_38 = 9 |
300
|
|
|
|
|
|
|
_derivCE_40 = 10 |
301
|
|
|
|
|
|
|
_derivCE_43 = 11 |
302
|
|
|
|
|
|
|
PREINIT: |
303
|
|
|
|
|
|
|
UV base, aaaa, bbbb; |
304
|
12743
|
|
|
|
|
|
U8 a[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00"; |
305
|
12743
|
|
|
|
|
|
U8 b[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00"; |
306
|
12743
|
|
|
|
|
|
bool basic_unified = 0, tangut = 0, nushu = 0, khitan = 0; |
307
|
|
|
|
|
|
|
PPCODE: |
308
|
12743
|
100
|
|
|
|
|
if (codeRange(CJK_UidIni, CJK_CompFin)) { |
|
|
100
|
|
|
|
|
|
309
|
12246
|
100
|
|
|
|
|
if (codeRange(CJK_CompIni, CJK_CompFin)) |
|
|
50
|
|
|
|
|
|
310
|
792
|
|
|
|
|
|
basic_unified = (bool)UnifiedCompat[code - CJK_CompIni]; |
311
|
|
|
|
|
|
|
else |
312
|
5331
|
100
|
|
|
|
|
basic_unified = (ix >= 11 ? (code <= CJK_UidF130) : |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
313
|
|
|
|
|
|
|
ix >= 9 ? (code <= CJK_UidF110) : |
314
|
|
|
|
|
|
|
ix == 8 ? (code <= CJK_UidF100) : |
315
|
|
|
|
|
|
|
ix >= 6 ? (code <= CJK_UidF80) : |
316
|
|
|
|
|
|
|
ix == 5 ? (code <= CJK_UidF61) : |
317
|
|
|
|
|
|
|
ix >= 3 ? (code <= CJK_UidF52) : |
318
|
|
|
|
|
|
|
ix == 2 ? (code <= CJK_UidF51) : |
319
|
|
|
|
|
|
|
ix == 1 ? (code <= CJK_UidF41) : |
320
|
|
|
|
|
|
|
(code <= CJK_UidFin)); |
321
|
|
|
|
|
|
|
} else { |
322
|
6620
|
100
|
|
|
|
|
if (ix >= 7) { |
323
|
5353
|
100
|
|
|
|
|
tangut = (ix >= 11) ? (codeRange(TangIdeoIni, TangIdeo120) || |
|
|
100
|
|
|
|
|
|
324
|
5189
|
100
|
|
|
|
|
codeRange(TangCompIni, TangComp130) || |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
325
|
7691
|
100
|
|
|
|
|
codeRange(TangSuppIni, TangSuppFin)) : |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
326
|
682
|
100
|
|
|
|
|
(ix == 10) ? (codeRange(TangIdeoIni, TangIdeo120) || |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
327
|
186
|
100
|
|
|
|
|
codeRange(TangCompIni, TangCompFin)) : |
328
|
341
|
100
|
|
|
|
|
(ix == 9) ? (codeRange(TangIdeoIni, TangIdeo110) || |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
329
|
93
|
100
|
|
|
|
|
codeRange(TangCompIni, TangCompFin)) : |
330
|
702
|
100
|
|
|
|
|
(codeRange(TangIdeoIni, TangIdeoFin) || |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
331
|
195
|
100
|
|
|
|
|
codeRange(TangCompIni, TangCompFin)); |
332
|
|
|
|
|
|
|
} |
333
|
6620
|
100
|
|
|
|
|
if (ix >= 8) |
334
|
3048
|
100
|
|
|
|
|
nushu = (codeRange(NushuIni, NushuFin)); |
|
|
100
|
|
|
|
|
|
335
|
6620
|
100
|
|
|
|
|
if (ix >= 11) |
336
|
2051
|
100
|
|
|
|
|
khitan = (codeRange(KhitanIni, KhitanFin)); |
|
|
100
|
|
|
|
|
|
337
|
|
|
|
|
|
|
} |
338
|
28223
|
100
|
|
|
|
|
base = tangut |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
339
|
|
|
|
|
|
|
? 0xFB00 : |
340
|
|
|
|
|
|
|
nushu |
341
|
|
|
|
|
|
|
? 0xFB01 : |
342
|
|
|
|
|
|
|
khitan |
343
|
|
|
|
|
|
|
? 0xFB02 : |
344
|
|
|
|
|
|
|
basic_unified |
345
|
|
|
|
|
|
|
? 0xFB40 : /* CJK */ |
346
|
2044
|
100
|
|
|
|
|
((ix >= 11 ? codeRange(CJK_ExtAIni, CJK_ExtA130) |
|
|
100
|
|
|
|
|
|
347
|
6180
|
100
|
|
|
|
|
: codeRange(CJK_ExtAIni, CJK_ExtAFin)) |
|
|
100
|
|
|
|
|
|
348
|
13436
|
100
|
|
|
|
|
|| |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
349
|
1486
|
100
|
|
|
|
|
(ix >= 11 ? codeRange(CJK_ExtBIni, CJK_ExtB130) |
|
|
100
|
|
|
|
|
|
350
|
5770
|
100
|
|
|
|
|
: codeRange(CJK_ExtBIni, CJK_ExtBFin)) |
|
|
100
|
|
|
|
|
|
351
|
6158
|
100
|
|
|
|
|
|| |
352
|
4182
|
100
|
|
|
|
|
(ix >= 3 && codeRange(CJK_ExtCIni, CJK_ExtCFin)) |
|
|
100
|
|
|
|
|
|
353
|
6106
|
100
|
|
|
|
|
|| |
354
|
3785
|
100
|
|
|
|
|
(ix >= 4 && codeRange(CJK_ExtDIni, CJK_ExtDFin)) |
|
|
100
|
|
|
|
|
|
355
|
6058
|
100
|
|
|
|
|
|| |
356
|
1963
|
100
|
|
|
|
|
(ix >= 6 && codeRange(CJK_ExtEIni, CJK_ExtEFin)) |
|
|
100
|
|
|
|
|
|
357
|
6030
|
100
|
|
|
|
|
|| |
358
|
1363
|
100
|
|
|
|
|
(ix >= 8 && codeRange(CJK_ExtFIni, CJK_ExtFFin)) |
|
|
100
|
|
|
|
|
|
359
|
6010
|
100
|
|
|
|
|
|| |
360
|
459
|
100
|
|
|
|
|
(ix >= 11 && codeRange(CJK_ExtGIni, CJK_ExtGFin))) |
|
|
100
|
|
|
|
|
|
361
|
|
|
|
|
|
|
? 0xFB80 /* CJK ext. */ |
362
|
|
|
|
|
|
|
: 0xFBC0; /* others */ |
363
|
12743
|
100
|
|
|
|
|
aaaa = tangut || nushu || khitan ? base : base + (code >> 15); |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
364
|
12956
|
|
|
|
|
|
bbbb = (tangut ? (code - TangIdeoIni) : |
365
|
69
|
|
|
|
|
|
nushu ? (code - NushuIni) : |
366
|
12743
|
100
|
|
|
|
|
khitan ? (code - KhitanIni) : (code & 0x7FFF)) | 0x8000; |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
367
|
12743
|
|
|
|
|
|
a[1] = (U8)(aaaa >> 8); |
368
|
12743
|
|
|
|
|
|
a[2] = (U8)(aaaa & 0xFF); |
369
|
12743
|
|
|
|
|
|
b[1] = (U8)(bbbb >> 8); |
370
|
12743
|
|
|
|
|
|
b[2] = (U8)(bbbb & 0xFF); |
371
|
12743
|
|
|
|
|
|
a[4] = (U8)(0x20); /* second octet of level 2 */ |
372
|
12743
|
|
|
|
|
|
a[6] = (U8)(0x02); /* second octet of level 3 */ |
373
|
12743
|
|
|
|
|
|
a[7] = b[7] = (U8)(code >> 8); |
374
|
12743
|
|
|
|
|
|
a[8] = b[8] = (U8)(code & 0xFF); |
375
|
12743
|
50
|
|
|
|
|
EXTEND(SP, 2); |
376
|
12743
|
|
|
|
|
|
PUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length))); |
377
|
12743
|
|
|
|
|
|
PUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length))); |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
void |
381
|
|
|
|
|
|
|
_derivCE_8 (code) |
382
|
|
|
|
|
|
|
UV code |
383
|
|
|
|
|
|
|
PREINIT: |
384
|
|
|
|
|
|
|
UV aaaa, bbbb; |
385
|
495
|
|
|
|
|
|
U8 a[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00"; |
386
|
495
|
|
|
|
|
|
U8 b[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00"; |
387
|
|
|
|
|
|
|
PPCODE: |
388
|
495
|
|
|
|
|
|
aaaa = 0xFF80 + (code >> 15); |
389
|
495
|
|
|
|
|
|
bbbb = (code & 0x7FFF) | 0x8000; |
390
|
495
|
|
|
|
|
|
a[1] = (U8)(aaaa >> 8); |
391
|
495
|
|
|
|
|
|
a[2] = (U8)(aaaa & 0xFF); |
392
|
495
|
|
|
|
|
|
b[1] = (U8)(bbbb >> 8); |
393
|
495
|
|
|
|
|
|
b[2] = (U8)(bbbb & 0xFF); |
394
|
495
|
|
|
|
|
|
a[4] = (U8)(0x02); /* second octet of level 2 */ |
395
|
495
|
|
|
|
|
|
a[6] = (U8)(0x01); /* second octet of level 3 */ |
396
|
495
|
|
|
|
|
|
a[7] = b[7] = (U8)(code >> 8); |
397
|
495
|
|
|
|
|
|
a[8] = b[8] = (U8)(code & 0xFF); |
398
|
495
|
50
|
|
|
|
|
EXTEND(SP, 2); |
399
|
495
|
|
|
|
|
|
PUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length))); |
400
|
495
|
|
|
|
|
|
PUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length))); |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
void |
404
|
|
|
|
|
|
|
_uideoCE_8 (code) |
405
|
|
|
|
|
|
|
UV code |
406
|
|
|
|
|
|
|
PREINIT: |
407
|
96
|
|
|
|
|
|
U8 uice[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00"; |
408
|
|
|
|
|
|
|
PPCODE: |
409
|
96
|
|
|
|
|
|
uice[1] = uice[7] = (U8)(code >> 8); |
410
|
96
|
|
|
|
|
|
uice[2] = uice[8] = (U8)(code & 0xFF); |
411
|
96
|
|
|
|
|
|
uice[4] = (U8)(0x20); /* second octet of level 2 */ |
412
|
96
|
|
|
|
|
|
uice[6] = (U8)(0x02); /* second octet of level 3 */ |
413
|
96
|
|
|
|
|
|
PUSHs(sv_2mortal(newSVpvn((char *) uice, VCE_Length))); |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
SV* |
417
|
|
|
|
|
|
|
_isUIdeo (code, uca_vers) |
418
|
|
|
|
|
|
|
UV code; |
419
|
|
|
|
|
|
|
IV uca_vers; |
420
|
|
|
|
|
|
|
bool basic_unified = 0; |
421
|
|
|
|
|
|
|
CODE: |
422
|
|
|
|
|
|
|
/* uca_vers = 0 for _uideoCE_8() */ |
423
|
14307
|
100
|
|
|
|
|
if (CJK_UidIni <= code) { |
424
|
13119
|
100
|
|
|
|
|
if (codeRange(CJK_CompIni, CJK_CompFin)) |
|
|
100
|
|
|
|
|
|
425
|
576
|
|
|
|
|
|
basic_unified = (bool)UnifiedCompat[code - CJK_CompIni]; |
426
|
|
|
|
|
|
|
else |
427
|
12543
|
100
|
|
|
|
|
basic_unified = (uca_vers >= 43 ? (code <= CJK_UidF130) : |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
428
|
|
|
|
|
|
|
uca_vers >= 38 ? (code <= CJK_UidF110) : |
429
|
|
|
|
|
|
|
uca_vers >= 36 ? (code <= CJK_UidF100) : |
430
|
|
|
|
|
|
|
uca_vers >= 32 ? (code <= CJK_UidF80) : |
431
|
|
|
|
|
|
|
uca_vers >= 24 ? (code <= CJK_UidF61) : |
432
|
|
|
|
|
|
|
uca_vers >= 20 ? (code <= CJK_UidF52) : |
433
|
|
|
|
|
|
|
uca_vers >= 18 ? (code <= CJK_UidF51) : |
434
|
|
|
|
|
|
|
uca_vers >= 14 ? (code <= CJK_UidF41) : |
435
|
|
|
|
|
|
|
(code <= CJK_UidFin)); |
436
|
|
|
|
|
|
|
} |
437
|
14307
|
100
|
|
|
|
|
RETVAL = boolSV( |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
438
|
|
|
|
|
|
|
(basic_unified) |
439
|
|
|
|
|
|
|
|| |
440
|
|
|
|
|
|
|
(codeRange(CJK_ExtAIni, CJK_ExtAFin)) |
441
|
|
|
|
|
|
|
|| |
442
|
|
|
|
|
|
|
(uca_vers >= 43 && codeRange(CJK_ExtAIni, CJK_ExtA130)) |
443
|
|
|
|
|
|
|
|| |
444
|
|
|
|
|
|
|
(uca_vers >= 8 && codeRange(CJK_ExtBIni, CJK_ExtBFin)) |
445
|
|
|
|
|
|
|
|| |
446
|
|
|
|
|
|
|
(uca_vers >= 43 && codeRange(CJK_ExtBIni, CJK_ExtB130)) |
447
|
|
|
|
|
|
|
|| |
448
|
|
|
|
|
|
|
(uca_vers >= 20 && codeRange(CJK_ExtCIni, CJK_ExtCFin)) |
449
|
|
|
|
|
|
|
|| |
450
|
|
|
|
|
|
|
(uca_vers >= 22 && codeRange(CJK_ExtDIni, CJK_ExtDFin)) |
451
|
|
|
|
|
|
|
|| |
452
|
|
|
|
|
|
|
(uca_vers >= 32 && codeRange(CJK_ExtEIni, CJK_ExtEFin)) |
453
|
|
|
|
|
|
|
|| |
454
|
|
|
|
|
|
|
(uca_vers >= 36 && codeRange(CJK_ExtFIni, CJK_ExtFFin)) |
455
|
|
|
|
|
|
|
|| |
456
|
|
|
|
|
|
|
(uca_vers >= 43 && codeRange(CJK_ExtGIni, CJK_ExtGFin)) |
457
|
|
|
|
|
|
|
); |
458
|
|
|
|
|
|
|
OUTPUT: |
459
|
|
|
|
|
|
|
RETVAL |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
SV* |
463
|
|
|
|
|
|
|
mk_SortKey (self, buf) |
464
|
|
|
|
|
|
|
SV* self; |
465
|
|
|
|
|
|
|
SV* buf; |
466
|
|
|
|
|
|
|
PREINIT: |
467
|
|
|
|
|
|
|
SV *dst, **svp; |
468
|
|
|
|
|
|
|
STRLEN dlen, vlen; |
469
|
|
|
|
|
|
|
U8 *d, *p, *e, *v, *s[MaxLevel], *eachlevel[MaxLevel]; |
470
|
|
|
|
|
|
|
AV *bufAV; |
471
|
|
|
|
|
|
|
HV *selfHV; |
472
|
|
|
|
|
|
|
UV back_flag; |
473
|
|
|
|
|
|
|
I32 i, buf_len; |
474
|
|
|
|
|
|
|
IV lv, level, uca_vers; |
475
|
|
|
|
|
|
|
bool upper_lower, kata_hira, v2i, last_is_var; |
476
|
|
|
|
|
|
|
CODE: |
477
|
59153
|
50
|
|
|
|
|
if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV) |
|
|
50
|
|
|
|
|
|
478
|
59153
|
|
|
|
|
|
selfHV = (HV*)SvRV(self); |
479
|
|
|
|
|
|
|
else |
480
|
0
|
|
|
|
|
|
croak("$self is not a HASHREF."); |
481
|
|
|
|
|
|
|
|
482
|
59153
|
50
|
|
|
|
|
if (SvROK(buf) && SvTYPE(SvRV(buf)) == SVt_PVAV) |
|
|
50
|
|
|
|
|
|
483
|
59153
|
|
|
|
|
|
bufAV = (AV*)SvRV(buf); |
484
|
|
|
|
|
|
|
else |
485
|
0
|
|
|
|
|
|
croak("XSUB, not an ARRAYREF."); |
486
|
|
|
|
|
|
|
|
487
|
59153
|
|
|
|
|
|
buf_len = av_len(bufAV); |
488
|
|
|
|
|
|
|
|
489
|
59153
|
100
|
|
|
|
|
if (buf_len < 0) { /* empty: -1 */ |
490
|
2241
|
|
|
|
|
|
dlen = 2 * (MaxLevel - 1); |
491
|
2241
|
|
|
|
|
|
dst = newSV(dlen); |
492
|
2241
|
|
|
|
|
|
(void)SvPOK_only(dst); |
493
|
2241
|
|
|
|
|
|
d = (U8*)SvPVX(dst); |
494
|
15687
|
100
|
|
|
|
|
while (dlen--) |
495
|
13446
|
|
|
|
|
|
*d++ = '\0'; |
496
|
|
|
|
|
|
|
} else { |
497
|
56912
|
|
|
|
|
|
svp = hv_fetch(selfHV, "level", 5, FALSE); |
498
|
56912
|
50
|
|
|
|
|
level = svp ? SvIV(*svp) : MaxLevel; |
|
|
50
|
|
|
|
|
|
499
|
|
|
|
|
|
|
|
500
|
218319
|
100
|
|
|
|
|
for (lv = 0; lv < level; lv++) { |
501
|
161407
|
|
|
|
|
|
New(0, eachlevel[lv], 2 * (1 + buf_len) + 1, U8); |
502
|
161407
|
|
|
|
|
|
s[lv] = eachlevel[lv]; |
503
|
|
|
|
|
|
|
} |
504
|
|
|
|
|
|
|
|
505
|
56912
|
|
|
|
|
|
svp = hv_fetch(selfHV, "upper_before_lower", 18, FALSE); |
506
|
56912
|
100
|
|
|
|
|
upper_lower = svp ? SvTRUE(*svp) : FALSE; |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
507
|
56912
|
|
|
|
|
|
svp = hv_fetch(selfHV, "katakana_before_hiragana", 24, FALSE); |
508
|
56912
|
100
|
|
|
|
|
kata_hira = svp ? SvTRUE(*svp) : FALSE; |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
509
|
56912
|
|
|
|
|
|
svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE); |
510
|
56912
|
50
|
|
|
|
|
uca_vers = SvIV(*svp); |
511
|
56912
|
|
|
|
|
|
svp = hv_fetch(selfHV, "variable", 8, FALSE); |
512
|
56305
|
50
|
|
|
|
|
v2i = uca_vers >= 9 && svp /* (vers >= 9) and not (non-ignorable) */ |
513
|
2552
|
100
|
|
|
|
|
? !(SvCUR(*svp) == 13 && memEQ(SvPVX(*svp), "non-ignorable", 13)) |
514
|
115769
|
100
|
|
|
|
|
: FALSE; |
|
|
100
|
|
|
|
|
|
515
|
|
|
|
|
|
|
|
516
|
56912
|
|
|
|
|
|
last_is_var = FALSE; |
517
|
157511
|
100
|
|
|
|
|
for (i = 0; i <= buf_len; i++) { |
518
|
100599
|
|
|
|
|
|
svp = av_fetch(bufAV, i, FALSE); |
519
|
|
|
|
|
|
|
|
520
|
100599
|
50
|
|
|
|
|
if (svp && SvPOK(*svp)) |
|
|
50
|
|
|
|
|
|
521
|
100599
|
50
|
|
|
|
|
v = (U8*)SvPV(*svp, vlen); |
522
|
|
|
|
|
|
|
else |
523
|
0
|
|
|
|
|
|
croak("not a vwt."); |
524
|
|
|
|
|
|
|
|
525
|
100599
|
50
|
|
|
|
|
if (vlen < VCE_Length) /* ignore short VCE (unexpected) */ |
526
|
0
|
|
|
|
|
|
continue; |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
/* "Ignorable (L1, L2) after Variable" since track. v. 9 */ |
529
|
100599
|
100
|
|
|
|
|
if (v2i) { |
530
|
93608
|
100
|
|
|
|
|
if (*v) |
531
|
1777
|
|
|
|
|
|
last_is_var = TRUE; |
532
|
91831
|
100
|
|
|
|
|
else if (v[1] || v[2]) /* non zero primary weight */ |
|
|
100
|
|
|
|
|
|
533
|
84532
|
|
|
|
|
|
last_is_var = FALSE; |
534
|
7299
|
100
|
|
|
|
|
else if (last_is_var) /* zero primary weight; skipped */ |
535
|
43
|
|
|
|
|
|
continue; |
536
|
|
|
|
|
|
|
} |
537
|
|
|
|
|
|
|
|
538
|
100556
|
50
|
|
|
|
|
if (v[5] == 0) { /* tert wt < 256 */ |
539
|
100556
|
100
|
|
|
|
|
if (upper_lower) { |
540
|
4279
|
100
|
|
|
|
|
if (0x8 <= v[6] && v[6] <= 0xC) /* lower */ |
|
|
100
|
|
|
|
|
|
541
|
571
|
|
|
|
|
|
v[6] -= 6; |
542
|
3708
|
100
|
|
|
|
|
else if (0x2 <= v[6] && v[6] <= 0x6) /* upper */ |
|
|
100
|
|
|
|
|
|
543
|
2102
|
|
|
|
|
|
v[6] += 6; |
544
|
1606
|
100
|
|
|
|
|
else if (v[6] == 0x1C) /* square upper */ |
545
|
57
|
|
|
|
|
|
v[6]++; |
546
|
1549
|
100
|
|
|
|
|
else if (v[6] == 0x1D) /* square lower */ |
547
|
61
|
|
|
|
|
|
v[6]--; |
548
|
|
|
|
|
|
|
} |
549
|
100556
|
100
|
|
|
|
|
if (kata_hira) { |
550
|
378
|
100
|
|
|
|
|
if (0x0F <= v[6] && v[6] <= 0x13) /* katakana */ |
|
|
50
|
|
|
|
|
|
551
|
202
|
|
|
|
|
|
v[6] -= 2; |
552
|
176
|
100
|
|
|
|
|
else if (0xD <= v[6] && v[6] <= 0xE) /* hiragana */ |
|
|
50
|
|
|
|
|
|
553
|
30
|
|
|
|
|
|
v[6] += 5; |
554
|
|
|
|
|
|
|
} |
555
|
|
|
|
|
|
|
} |
556
|
|
|
|
|
|
|
|
557
|
394173
|
100
|
|
|
|
|
for (lv = 0; lv < level; lv++) { |
558
|
293617
|
100
|
|
|
|
|
if (v[2 * lv + 1] || v[2 * lv + 2]) { |
|
|
100
|
|
|
|
|
|
559
|
241913
|
|
|
|
|
|
*s[lv]++ = v[2 * lv + 1]; |
560
|
241913
|
|
|
|
|
|
*s[lv]++ = v[2 * lv + 2]; |
561
|
|
|
|
|
|
|
} |
562
|
|
|
|
|
|
|
} |
563
|
|
|
|
|
|
|
} |
564
|
|
|
|
|
|
|
|
565
|
56912
|
|
|
|
|
|
dlen = 2 * (MaxLevel - 1); |
566
|
218319
|
100
|
|
|
|
|
for (lv = 0; lv < level; lv++) |
567
|
161407
|
|
|
|
|
|
dlen += s[lv] - eachlevel[lv]; |
568
|
|
|
|
|
|
|
|
569
|
56912
|
|
|
|
|
|
dst = newSV(dlen); |
570
|
56912
|
|
|
|
|
|
(void)SvPOK_only(dst); |
571
|
56912
|
|
|
|
|
|
d = (U8*)SvPVX(dst); |
572
|
|
|
|
|
|
|
|
573
|
56912
|
|
|
|
|
|
svp = hv_fetch(selfHV, "backwardsFlag", 13, FALSE); |
574
|
56912
|
50
|
|
|
|
|
back_flag = svp ? SvUV(*svp) : (UV)0; |
|
|
50
|
|
|
|
|
|
575
|
|
|
|
|
|
|
|
576
|
218319
|
100
|
|
|
|
|
for (lv = 0; lv < level; lv++) { |
577
|
161407
|
100
|
|
|
|
|
if (back_flag & (1 << (lv + 1))) { |
578
|
381
|
|
|
|
|
|
p = s[lv]; |
579
|
381
|
|
|
|
|
|
e = eachlevel[lv]; |
580
|
2520
|
100
|
|
|
|
|
for ( ; e < p; p -= 2) { |
581
|
2139
|
|
|
|
|
|
*d++ = p[-2]; |
582
|
2139
|
|
|
|
|
|
*d++ = p[-1]; |
583
|
|
|
|
|
|
|
} |
584
|
|
|
|
|
|
|
} |
585
|
|
|
|
|
|
|
else { |
586
|
161026
|
|
|
|
|
|
p = eachlevel[lv]; |
587
|
161026
|
|
|
|
|
|
e = s[lv]; |
588
|
640574
|
100
|
|
|
|
|
while (p < e) |
589
|
479548
|
|
|
|
|
|
*d++ = *p++; |
590
|
|
|
|
|
|
|
} |
591
|
161407
|
100
|
|
|
|
|
if (lv + 1 < MaxLevel) { /* lv + 1 == real level */ |
592
|
149270
|
|
|
|
|
|
*d++ = '\0'; |
593
|
149270
|
|
|
|
|
|
*d++ = '\0'; |
594
|
|
|
|
|
|
|
} |
595
|
|
|
|
|
|
|
} |
596
|
|
|
|
|
|
|
|
597
|
123153
|
100
|
|
|
|
|
for (lv = level; lv < MaxLevel; lv++) { |
598
|
66241
|
100
|
|
|
|
|
if (lv + 1 < MaxLevel) { /* lv + 1 == real level */ |
599
|
21466
|
|
|
|
|
|
*d++ = '\0'; |
600
|
21466
|
|
|
|
|
|
*d++ = '\0'; |
601
|
|
|
|
|
|
|
} |
602
|
|
|
|
|
|
|
} |
603
|
|
|
|
|
|
|
|
604
|
218319
|
100
|
|
|
|
|
for (lv = 0; lv < level; lv++) { |
605
|
161407
|
|
|
|
|
|
Safefree(eachlevel[lv]); |
606
|
|
|
|
|
|
|
} |
607
|
|
|
|
|
|
|
} |
608
|
59153
|
|
|
|
|
|
*d = '\0'; |
609
|
59153
|
|
|
|
|
|
SvCUR_set(dst, d - (U8*)SvPVX(dst)); |
610
|
59153
|
|
|
|
|
|
RETVAL = dst; |
611
|
|
|
|
|
|
|
OUTPUT: |
612
|
|
|
|
|
|
|
RETVAL |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
SV* |
616
|
|
|
|
|
|
|
varCE (self, vce) |
617
|
|
|
|
|
|
|
SV* self; |
618
|
|
|
|
|
|
|
SV* vce; |
619
|
|
|
|
|
|
|
PREINIT: |
620
|
|
|
|
|
|
|
SV *dst, *vbl, **svp; |
621
|
|
|
|
|
|
|
HV *selfHV; |
622
|
|
|
|
|
|
|
U8 *a, *v, *d; |
623
|
|
|
|
|
|
|
STRLEN alen, vlen; |
624
|
|
|
|
|
|
|
bool ig_l2; |
625
|
|
|
|
|
|
|
IV uca_vers; |
626
|
|
|
|
|
|
|
UV totwt; |
627
|
|
|
|
|
|
|
CODE: |
628
|
102824
|
50
|
|
|
|
|
if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV) |
|
|
50
|
|
|
|
|
|
629
|
102824
|
|
|
|
|
|
selfHV = (HV*)SvRV(self); |
630
|
|
|
|
|
|
|
else |
631
|
0
|
|
|
|
|
|
croak("$self is not a HASHREF."); |
632
|
|
|
|
|
|
|
|
633
|
102824
|
|
|
|
|
|
svp = hv_fetch(selfHV, "ignore_level2", 13, FALSE); |
634
|
102824
|
100
|
|
|
|
|
ig_l2 = svp ? SvTRUE(*svp) : FALSE; |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
635
|
|
|
|
|
|
|
|
636
|
102824
|
|
|
|
|
|
svp = hv_fetch(selfHV, "variable", 8, FALSE); |
637
|
102824
|
50
|
|
|
|
|
vbl = svp ? *svp : &PL_sv_no; |
638
|
102824
|
50
|
|
|
|
|
a = (U8*)SvPV(vbl, alen); |
639
|
102824
|
50
|
|
|
|
|
v = (U8*)SvPV(vce, vlen); |
640
|
|
|
|
|
|
|
|
641
|
102824
|
|
|
|
|
|
dst = newSV(vlen); |
642
|
102824
|
|
|
|
|
|
d = (U8*)SvPVX(dst); |
643
|
102824
|
|
|
|
|
|
(void)SvPOK_only(dst); |
644
|
102824
|
|
|
|
|
|
Copy(v, d, vlen, U8); |
645
|
102824
|
|
|
|
|
|
SvCUR_set(dst, vlen); |
646
|
102824
|
|
|
|
|
|
d[vlen] = '\0'; |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
/* primary weight == 0 && secondary weight != 0 */ |
649
|
102824
|
100
|
|
|
|
|
if (ig_l2 && !d[1] && !d[2] && (d[3] || d[4])) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
650
|
21
|
|
|
|
|
|
d[3] = d[4] = d[5] = d[6] = '\0'; |
651
|
|
|
|
|
|
|
} |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
/* variable: checked only the first char and the length, |
654
|
|
|
|
|
|
|
trusting checkCollator() and %VariableOK in Perl ... */ |
655
|
|
|
|
|
|
|
|
656
|
102824
|
50
|
|
|
|
|
if (vlen >= VCE_Length && *a != 'n') { |
|
|
100
|
|
|
|
|
|
657
|
97080
|
100
|
|
|
|
|
if (*v) { |
658
|
2124
|
100
|
|
|
|
|
if (*a == 's') { /* shifted or shift-trimmed */ |
659
|
2064
|
|
|
|
|
|
d[7] = d[1]; /* wt level 1 to 4 */ |
660
|
2064
|
|
|
|
|
|
d[8] = d[2]; |
661
|
|
|
|
|
|
|
} /* else blanked */ |
662
|
2124
|
|
|
|
|
|
d[1] = d[2] = d[3] = d[4] = d[5] = d[6] = '\0'; |
663
|
94956
|
100
|
|
|
|
|
} else if (*a == 's') { /* shifted or shift-trimmed */ |
664
|
94736
|
|
|
|
|
|
totwt = d[1] + d[2] + d[3] + d[4] + d[5] + d[6]; |
665
|
94736
|
100
|
|
|
|
|
if (alen == 7 && totwt != 0) { /* shifted */ |
|
|
100
|
|
|
|
|
|
666
|
189034
|
100
|
|
|
|
|
if (d[1] == 0 && d[2] == 1) { /* XXX: CollationAuxiliary-6.2.0 */ |
|
|
100
|
|
|
|
|
|
667
|
66
|
|
|
|
|
|
d[7] = d[1]; /* wt level 1 to 4 */ |
668
|
66
|
|
|
|
|
|
d[8] = d[2]; |
669
|
|
|
|
|
|
|
} else { |
670
|
94451
|
|
|
|
|
|
svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE); |
671
|
94451
|
50
|
|
|
|
|
if (!svp) |
672
|
0
|
|
|
|
|
|
croak("Panic: no $self->{UCA_Version} in varCE"); |
673
|
94451
|
50
|
|
|
|
|
uca_vers = SvIV(*svp); |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
/* completely ignorable or the second derived CE */ |
676
|
94451
|
100
|
|
|
|
|
if (uca_vers >= 36 && d[3] + d[4] + d[5] + d[6] == 0) { |
|
|
100
|
|
|
|
|
|
677
|
11629
|
|
|
|
|
|
d[7] = d[8] = '\0'; |
678
|
|
|
|
|
|
|
} else { |
679
|
82822
|
|
|
|
|
|
d[7] = (U8)(Shift4Wt >> 8); |
680
|
82822
|
|
|
|
|
|
d[8] = (U8)(Shift4Wt & 0xFF); |
681
|
|
|
|
|
|
|
} |
682
|
|
|
|
|
|
|
} |
683
|
|
|
|
|
|
|
} else { /* shift-trimmed or completely ignorable */ |
684
|
219
|
|
|
|
|
|
d[7] = d[8] = '\0'; |
685
|
|
|
|
|
|
|
} |
686
|
|
|
|
|
|
|
} /* else blanked */ |
687
|
|
|
|
|
|
|
} /* else non-ignorable */ |
688
|
102824
|
|
|
|
|
|
RETVAL = dst; |
689
|
|
|
|
|
|
|
OUTPUT: |
690
|
|
|
|
|
|
|
RETVAL |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
SV* |
695
|
|
|
|
|
|
|
visualizeSortKey (self, key) |
696
|
|
|
|
|
|
|
SV * self |
697
|
|
|
|
|
|
|
SV * key |
698
|
|
|
|
|
|
|
PREINIT: |
699
|
|
|
|
|
|
|
HV *selfHV; |
700
|
|
|
|
|
|
|
SV **svp, *dst; |
701
|
|
|
|
|
|
|
U8 *s, *e, *d; |
702
|
|
|
|
|
|
|
STRLEN klen, dlen; |
703
|
|
|
|
|
|
|
UV uv; |
704
|
258
|
|
|
|
|
|
IV uca_vers, sep = 0; |
705
|
258
|
|
|
|
|
|
const char *upperhex = "0123456789ABCDEF"; |
706
|
|
|
|
|
|
|
CODE: |
707
|
258
|
50
|
|
|
|
|
if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV) |
|
|
50
|
|
|
|
|
|
708
|
258
|
|
|
|
|
|
selfHV = (HV*)SvRV(self); |
709
|
|
|
|
|
|
|
else |
710
|
0
|
|
|
|
|
|
croak("$self is not a HASHREF."); |
711
|
|
|
|
|
|
|
|
712
|
258
|
|
|
|
|
|
svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE); |
713
|
258
|
50
|
|
|
|
|
if (!svp) |
714
|
0
|
|
|
|
|
|
croak("Panic: no $self->{UCA_Version} in visualizeSortKey"); |
715
|
258
|
50
|
|
|
|
|
uca_vers = SvIV(*svp); |
716
|
|
|
|
|
|
|
|
717
|
258
|
50
|
|
|
|
|
s = (U8*)SvPV(key, klen); |
718
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
/* slightly *longer* than the need, but I'm afraid of miscounting; |
720
|
|
|
|
|
|
|
= (klen / 2) * 5 - 1 |
721
|
|
|
|
|
|
|
# FFFF and ' ' for each 16bit units but ' ' is less by 1; |
722
|
|
|
|
|
|
|
# ' ' and '|' for level boundaries including the identical level |
723
|
|
|
|
|
|
|
+ 2 # '[' and ']' |
724
|
|
|
|
|
|
|
+ 1 # '\0' |
725
|
|
|
|
|
|
|
(a) if klen is odd (not expected), maybe more 5 bytes. |
726
|
258
|
|
|
|
|
|
(b) there is not always the identical level. |
727
|
258
|
|
|
|
|
|
*/ |
728
|
258
|
|
|
|
|
|
dlen = (klen / 2) * 5 + MaxLevel * 2 + 2; |
729
|
258
|
|
|
|
|
|
dst = newSV(dlen); |
730
|
|
|
|
|
|
|
(void)SvPOK_only(dst); |
731
|
258
|
|
|
|
|
|
d = (U8*)SvPVX(dst); |
732
|
2708
|
100
|
|
|
|
|
|
733
|
2450
|
|
|
|
|
|
*d++ = '['; |
734
|
2450
|
100
|
|
|
|
|
for (e = s + klen; s < e; s += 2) { |
|
|
100
|
|
|
|
|
|
735
|
1601
|
100
|
|
|
|
|
uv = (U16)(*s << 8 | s[1]); |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
736
|
1298
|
|
|
|
|
|
if (uv || sep >= MaxLevel) { |
737
|
1601
|
|
|
|
|
|
if ((d[-1] != '[') && ((9 <= uca_vers) || (d[-1] != '|'))) |
738
|
1601
|
|
|
|
|
|
*d++ = ' '; |
739
|
1601
|
|
|
|
|
|
*d++ = upperhex[ (s[0] >> 4) & 0xF ]; |
740
|
1601
|
|
|
|
|
|
*d++ = upperhex[ s[0] & 0xF ]; |
741
|
|
|
|
|
|
|
*d++ = upperhex[ (s[1] >> 4) & 0xF ]; |
742
|
849
|
100
|
|
|
|
|
*d++ = upperhex[ s[1] & 0xF ]; |
|
|
100
|
|
|
|
|
|
743
|
749
|
|
|
|
|
|
} else { |
744
|
849
|
|
|
|
|
|
if ((9 <= uca_vers) && (d[-1] != '[')) |
745
|
849
|
|
|
|
|
|
*d++ = ' '; |
746
|
|
|
|
|
|
|
*d++ = '|'; |
747
|
|
|
|
|
|
|
++sep; |
748
|
258
|
|
|
|
|
|
} |
749
|
258
|
|
|
|
|
|
} |
750
|
258
|
|
|
|
|
|
*d++ = ']'; |
751
|
258
|
|
|
|
|
|
*d = '\0'; |
752
|
|
|
|
|
|
|
SvCUR_set(dst, d - (U8*)SvPVX(dst)); |
753
|
|
|
|
|
|
|
RETVAL = dst; |
754
|
|
|
|
|
|
|
OUTPUT: |
755
|
|
|
|
|
|
|
RETVAL |