line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
#define PERL_NO_GET_CONTEXT /* we want efficiency */ |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
/* I guese no private function needs pTHX_ and aTHX_ */ |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
#include "EXTERN.h" |
7
|
|
|
|
|
|
|
#include "perl.h" |
8
|
|
|
|
|
|
|
#include "XSUB.h" |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
/* This file is prepared by mkheader */ |
11
|
|
|
|
|
|
|
#include "ucatbl.h" |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
/* At present, char > 0x10ffff are unaffected without complaint, right? */ |
14
|
|
|
|
|
|
|
#define VALID_UTF_MAX (0x10ffff) |
15
|
|
|
|
|
|
|
#define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv)) |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
#define MAX_DIV_16 (UV_MAX / 16) |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
/* Supported Levels */ |
20
|
|
|
|
|
|
|
#define MinLevel (1) |
21
|
|
|
|
|
|
|
#define MaxLevel (4) |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
/* Shifted weight at 4th level */ |
24
|
|
|
|
|
|
|
#define Shift4Wt (0xFFFF) |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
#define VCE_Length (9) |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
#define Hangul_SBase (0xAC00) |
29
|
|
|
|
|
|
|
#define Hangul_SIni (0xAC00) |
30
|
|
|
|
|
|
|
#define Hangul_SFin (0xD7A3) |
31
|
|
|
|
|
|
|
#define Hangul_NCount (588) |
32
|
|
|
|
|
|
|
#define Hangul_TCount (28) |
33
|
|
|
|
|
|
|
#define Hangul_LBase (0x1100) |
34
|
|
|
|
|
|
|
#define Hangul_LIni (0x1100) |
35
|
|
|
|
|
|
|
#define Hangul_LFin (0x1159) |
36
|
|
|
|
|
|
|
#define Hangul_LFill (0x115F) |
37
|
|
|
|
|
|
|
#define Hangul_LEnd (0x115F) /* Unicode 5.2 */ |
38
|
|
|
|
|
|
|
#define Hangul_VBase (0x1161) |
39
|
|
|
|
|
|
|
#define Hangul_VIni (0x1160) /* from Vowel Filler */ |
40
|
|
|
|
|
|
|
#define Hangul_VFin (0x11A2) |
41
|
|
|
|
|
|
|
#define Hangul_VEnd (0x11A7) /* Unicode 5.2 */ |
42
|
|
|
|
|
|
|
#define Hangul_TBase (0x11A7) /* from "no-final" codepoint */ |
43
|
|
|
|
|
|
|
#define Hangul_TIni (0x11A8) |
44
|
|
|
|
|
|
|
#define Hangul_TFin (0x11F9) |
45
|
|
|
|
|
|
|
#define Hangul_TEnd (0x11FF) /* Unicode 5.2 */ |
46
|
|
|
|
|
|
|
#define HangulL2Ini (0xA960) /* Unicode 5.2 */ |
47
|
|
|
|
|
|
|
#define HangulL2Fin (0xA97C) /* Unicode 5.2 */ |
48
|
|
|
|
|
|
|
#define HangulV2Ini (0xD7B0) /* Unicode 5.2 */ |
49
|
|
|
|
|
|
|
#define HangulV2Fin (0xD7C6) /* Unicode 5.2 */ |
50
|
|
|
|
|
|
|
#define HangulT2Ini (0xD7CB) /* Unicode 5.2 */ |
51
|
|
|
|
|
|
|
#define HangulT2Fin (0xD7FB) /* Unicode 5.2 */ |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
#define CJK_UidIni (0x4E00) |
54
|
|
|
|
|
|
|
#define CJK_UidFin (0x9FA5) |
55
|
|
|
|
|
|
|
#define CJK_UidF41 (0x9FBB) /* Unicode 4.1 */ |
56
|
|
|
|
|
|
|
#define CJK_UidF51 (0x9FC3) /* Unicode 5.1 */ |
57
|
|
|
|
|
|
|
#define CJK_UidF52 (0x9FCB) /* Unicode 5.2 */ |
58
|
|
|
|
|
|
|
#define CJK_UidF61 (0x9FCC) /* Unicode 6.1 */ |
59
|
|
|
|
|
|
|
#define CJK_UidF80 (0x9FD5) /* Unicode 8.0 */ |
60
|
|
|
|
|
|
|
#define CJK_UidF100 (0x9FEA) /* Unicode 10.0 */ |
61
|
|
|
|
|
|
|
#define CJK_UidF110 (0x9FEF) /* Unicode 10.0 */ |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
#define CJK_ExtAIni (0x3400) /* Unicode 3.0 */ |
64
|
|
|
|
|
|
|
#define CJK_ExtAFin (0x4DB5) /* Unicode 3.0 */ |
65
|
|
|
|
|
|
|
#define CJK_ExtBIni (0x20000) /* Unicode 3.1 */ |
66
|
|
|
|
|
|
|
#define CJK_ExtBFin (0x2A6D6) /* Unicode 3.1 */ |
67
|
|
|
|
|
|
|
#define CJK_ExtCIni (0x2A700) /* Unicode 5.2 */ |
68
|
|
|
|
|
|
|
#define CJK_ExtCFin (0x2B734) /* Unicode 5.2 */ |
69
|
|
|
|
|
|
|
#define CJK_ExtDIni (0x2B740) /* Unicode 6.0 */ |
70
|
|
|
|
|
|
|
#define CJK_ExtDFin (0x2B81D) /* Unicode 6.0 */ |
71
|
|
|
|
|
|
|
#define CJK_ExtEIni (0x2B820) /* Unicode 8.0 */ |
72
|
|
|
|
|
|
|
#define CJK_ExtEFin (0x2CEA1) /* Unicode 8.0 */ |
73
|
|
|
|
|
|
|
#define CJK_ExtFIni (0x2CEB0) /* Unicode 10.0 */ |
74
|
|
|
|
|
|
|
#define CJK_ExtFFin (0x2EBE0) /* Unicode 10.0 */ |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
#define CJK_CompIni (0xFA0E) |
77
|
|
|
|
|
|
|
#define CJK_CompFin (0xFA29) |
78
|
|
|
|
|
|
|
static const STDCHAR UnifiedCompat[] = { |
79
|
|
|
|
|
|
|
1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1 |
80
|
|
|
|
|
|
|
}; /* E F 0 1 2 3 4 5 6 7 8 9 A B C D E F 0 1 2 3 4 5 6 7 8 9 */ |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
#define TangIdeoIni (0x17000) /* Unicode 9.0 */ |
83
|
|
|
|
|
|
|
#define TangIdeoFin (0x187EC) /* Unicode 9.0 */ |
84
|
|
|
|
|
|
|
#define TangIdeo110 (0x187F1) /* Unicode 11.0 */ |
85
|
|
|
|
|
|
|
#define TangIdeo120 (0x187F7) /* Unicode 12.0 */ |
86
|
|
|
|
|
|
|
#define TangCompIni (0x18800) /* Unicode 9.0 */ |
87
|
|
|
|
|
|
|
#define TangCompFin (0x18AF2) /* Unicode 9.0 */ |
88
|
|
|
|
|
|
|
#define NushuIni (0x1B170) /* Unicode 10.0 */ |
89
|
|
|
|
|
|
|
#define NushuFin (0x1B2FB) /* Unicode 10.0 */ |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
#define codeRange(bcode, ecode) ((bcode) <= code && code <= (ecode)) |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
MODULE = Unicode::Collate PACKAGE = Unicode::Collate |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
PROTOTYPES: DISABLE |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
void |
98
|
|
|
|
|
|
|
_fetch_rest () |
99
|
|
|
|
|
|
|
PREINIT: |
100
|
|
|
|
|
|
|
char ** rest; |
101
|
|
|
|
|
|
|
PPCODE: |
102
|
119634
|
100
|
|
|
|
|
for (rest = (char **)UCA_rest; *rest; ++rest) { |
103
|
119507
|
50
|
|
|
|
|
XPUSHs(sv_2mortal(newSVpv((char *) *rest, 0))); |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
void |
108
|
|
|
|
|
|
|
_fetch_simple (uv) |
109
|
|
|
|
|
|
|
UV uv |
110
|
|
|
|
|
|
|
PREINIT: |
111
|
|
|
|
|
|
|
U8 ***plane, **row; |
112
|
16798
|
|
|
|
|
|
U8* result = NULL; |
113
|
|
|
|
|
|
|
PPCODE: |
114
|
16798
|
50
|
|
|
|
|
if (!OVER_UTF_MAX(uv)){ |
115
|
16798
|
|
|
|
|
|
plane = (U8***)UCA_simple[uv >> 16]; |
116
|
16798
|
50
|
|
|
|
|
if (plane) { |
117
|
16798
|
|
|
|
|
|
row = plane[(uv >> 8) & 0xff]; |
118
|
16798
|
50
|
|
|
|
|
result = row ? row[uv & 0xff] : NULL; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
} |
121
|
16798
|
50
|
|
|
|
|
if (result) { |
122
|
|
|
|
|
|
|
int i; |
123
|
16798
|
|
|
|
|
|
int num = (int)*result; |
124
|
16798
|
|
|
|
|
|
++result; |
125
|
16798
|
50
|
|
|
|
|
EXTEND(SP, num); |
|
|
50
|
|
|
|
|
|
126
|
38682
|
100
|
|
|
|
|
for (i = 0; i < num; ++i) { |
127
|
21884
|
|
|
|
|
|
PUSHs(sv_2mortal(newSVpvn((char *) result, VCE_Length))); |
128
|
21884
|
|
|
|
|
|
result += VCE_Length; |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
} else { |
131
|
0
|
|
|
|
|
|
PUSHs(sv_2mortal(newSViv(0))); |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
SV* |
135
|
|
|
|
|
|
|
_ignorable_simple (uv) |
136
|
|
|
|
|
|
|
UV uv |
137
|
|
|
|
|
|
|
ALIAS: |
138
|
|
|
|
|
|
|
_exists_simple = 1 |
139
|
|
|
|
|
|
|
PREINIT: |
140
|
|
|
|
|
|
|
U8 ***plane, **row; |
141
|
58109
|
|
|
|
|
|
int num = -1; |
142
|
58109
|
|
|
|
|
|
U8* result = NULL; |
143
|
|
|
|
|
|
|
CODE: |
144
|
58109
|
50
|
|
|
|
|
if (!OVER_UTF_MAX(uv)){ |
145
|
58109
|
|
|
|
|
|
plane = (U8***)UCA_simple[uv >> 16]; |
146
|
58109
|
100
|
|
|
|
|
if (plane) { |
147
|
58105
|
|
|
|
|
|
row = plane[(uv >> 8) & 0xff]; |
148
|
58105
|
100
|
|
|
|
|
result = row ? row[uv & 0xff] : NULL; |
149
|
|
|
|
|
|
|
} |
150
|
58109
|
100
|
|
|
|
|
if (result) |
151
|
33570
|
|
|
|
|
|
num = (int)*result; /* assuming 0 <= num < 128 */ |
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
|
154
|
58109
|
100
|
|
|
|
|
if (ix) |
155
|
29006
|
100
|
|
|
|
|
RETVAL = boolSV(num >0); |
156
|
|
|
|
|
|
|
else |
157
|
29103
|
100
|
|
|
|
|
RETVAL = boolSV(num==0); |
158
|
|
|
|
|
|
|
OUTPUT: |
159
|
|
|
|
|
|
|
RETVAL |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
void |
163
|
|
|
|
|
|
|
_getHexArray (src) |
164
|
|
|
|
|
|
|
SV* src |
165
|
|
|
|
|
|
|
PREINIT: |
166
|
|
|
|
|
|
|
char *s, *e; |
167
|
|
|
|
|
|
|
STRLEN byte; |
168
|
|
|
|
|
|
|
UV value; |
169
|
571226
|
|
|
|
|
|
bool overflowed = FALSE; |
170
|
|
|
|
|
|
|
const char *hexdigit; |
171
|
|
|
|
|
|
|
PPCODE: |
172
|
571226
|
50
|
|
|
|
|
s = SvPV(src,byte); |
173
|
2543309
|
100
|
|
|
|
|
for (e = s + byte; s < e;) { |
174
|
1972083
|
|
|
|
|
|
hexdigit = strchr((char *) PL_hexdigit, *s++); |
175
|
1972083
|
100
|
|
|
|
|
if (! hexdigit) |
176
|
511662
|
|
|
|
|
|
continue; |
177
|
1460421
|
|
|
|
|
|
value = (hexdigit - PL_hexdigit) & 0xF; |
178
|
5864944
|
100
|
|
|
|
|
while (*s) { |
179
|
5510887
|
|
|
|
|
|
hexdigit = strchr((char *) PL_hexdigit, *s++); |
180
|
5510887
|
100
|
|
|
|
|
if (! hexdigit) |
181
|
1106364
|
|
|
|
|
|
break; |
182
|
4404523
|
50
|
|
|
|
|
if (overflowed) |
183
|
0
|
|
|
|
|
|
continue; |
184
|
4404523
|
50
|
|
|
|
|
if (value > MAX_DIV_16) { |
185
|
0
|
|
|
|
|
|
overflowed = TRUE; |
186
|
0
|
|
|
|
|
|
continue; |
187
|
|
|
|
|
|
|
} |
188
|
4404523
|
|
|
|
|
|
value = (value << 4) | ((hexdigit - PL_hexdigit) & 0xF); |
189
|
|
|
|
|
|
|
} |
190
|
1460421
|
50
|
|
|
|
|
XPUSHs(sv_2mortal(newSVuv(overflowed ? UV_MAX : value))); |
|
|
50
|
|
|
|
|
|
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
SV* |
195
|
|
|
|
|
|
|
_isIllegal (sv) |
196
|
|
|
|
|
|
|
SV* sv |
197
|
|
|
|
|
|
|
PREINIT: |
198
|
|
|
|
|
|
|
UV uv; |
199
|
|
|
|
|
|
|
CODE: |
200
|
4454
|
50
|
|
|
|
|
if (!sv || !SvIOK(sv)) |
|
|
50
|
|
|
|
|
|
201
|
0
|
|
|
|
|
|
XSRETURN_YES; |
202
|
4454
|
|
|
|
|
|
uv = SvUVX(sv); |
203
|
4454
|
100
|
|
|
|
|
RETVAL = boolSV( |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
204
|
|
|
|
|
|
|
0x10FFFF < uv /* out of range */ |
205
|
|
|
|
|
|
|
|| ((uv & 0xFFFE) == 0xFFFE) /* ??FFF[EF] */ |
206
|
|
|
|
|
|
|
|| (0xD800 <= uv && uv <= 0xDFFF) /* unpaired surrogates */ |
207
|
|
|
|
|
|
|
|| (0xFDD0 <= uv && uv <= 0xFDEF) /* other non-characters */ |
208
|
|
|
|
|
|
|
); |
209
|
|
|
|
|
|
|
OUTPUT: |
210
|
|
|
|
|
|
|
RETVAL |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
void |
214
|
|
|
|
|
|
|
_decompHangul (code) |
215
|
|
|
|
|
|
|
UV code |
216
|
|
|
|
|
|
|
PREINIT: |
217
|
|
|
|
|
|
|
UV sindex, lindex, vindex, tindex; |
218
|
|
|
|
|
|
|
PPCODE: |
219
|
|
|
|
|
|
|
/* code *must* be in Hangul syllable. |
220
|
|
|
|
|
|
|
* Check it before you enter here. */ |
221
|
105
|
|
|
|
|
|
sindex = code - Hangul_SBase; |
222
|
105
|
|
|
|
|
|
lindex = sindex / Hangul_NCount; |
223
|
105
|
|
|
|
|
|
vindex = (sindex % Hangul_NCount) / Hangul_TCount; |
224
|
105
|
|
|
|
|
|
tindex = sindex % Hangul_TCount; |
225
|
|
|
|
|
|
|
|
226
|
105
|
100
|
|
|
|
|
EXTEND(SP, tindex ? 3 : 2); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
227
|
105
|
|
|
|
|
|
PUSHs(sv_2mortal(newSVuv(lindex + Hangul_LBase))); |
228
|
105
|
|
|
|
|
|
PUSHs(sv_2mortal(newSVuv(vindex + Hangul_VBase))); |
229
|
105
|
100
|
|
|
|
|
if (tindex) |
230
|
60
|
|
|
|
|
|
PUSHs(sv_2mortal(newSVuv(tindex + Hangul_TBase))); |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
SV* |
234
|
|
|
|
|
|
|
getHST (code, uca_vers = 0) |
235
|
|
|
|
|
|
|
UV code; |
236
|
|
|
|
|
|
|
IV uca_vers; |
237
|
|
|
|
|
|
|
PREINIT: |
238
|
|
|
|
|
|
|
const char * hangtype; |
239
|
|
|
|
|
|
|
STRLEN typelen; |
240
|
|
|
|
|
|
|
CODE: |
241
|
1148
|
100
|
|
|
|
|
if (codeRange(Hangul_SIni, Hangul_SFin)) { |
|
|
100
|
|
|
|
|
|
242
|
212
|
100
|
|
|
|
|
if ((code - Hangul_SBase) % Hangul_TCount) { |
243
|
63
|
|
|
|
|
|
hangtype = "LVT"; typelen = 3; |
244
|
|
|
|
|
|
|
} else { |
245
|
43
|
|
|
|
|
|
hangtype = "LV"; typelen = 2; |
246
|
|
|
|
|
|
|
} |
247
|
1042
|
100
|
|
|
|
|
} else if (uca_vers < 20) { |
248
|
270
|
100
|
|
|
|
|
if (codeRange(Hangul_LIni, Hangul_LFin) || code == Hangul_LFill) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
249
|
24
|
|
|
|
|
|
hangtype = "L"; typelen = 1; |
250
|
246
|
100
|
|
|
|
|
} else if (codeRange(Hangul_VIni, Hangul_VFin)) { |
|
|
100
|
|
|
|
|
|
251
|
24
|
|
|
|
|
|
hangtype = "V"; typelen = 1; |
252
|
222
|
100
|
|
|
|
|
} else if (codeRange(Hangul_TIni, Hangul_TFin)) { |
|
|
100
|
|
|
|
|
|
253
|
24
|
|
|
|
|
|
hangtype = "T"; typelen = 1; |
254
|
|
|
|
|
|
|
} else { |
255
|
270
|
|
|
|
|
|
hangtype = ""; typelen = 0; |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
} else { |
258
|
772
|
100
|
|
|
|
|
if (codeRange(Hangul_LIni, Hangul_LEnd) || |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
259
|
264
|
100
|
|
|
|
|
codeRange(HangulL2Ini, HangulL2Fin)) { |
260
|
183
|
|
|
|
|
|
hangtype = "L"; typelen = 1; |
261
|
589
|
100
|
|
|
|
|
} else if (codeRange(Hangul_VIni, Hangul_VEnd) || |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
262
|
180
|
100
|
|
|
|
|
codeRange(HangulV2Ini, HangulV2Fin)) { |
263
|
188
|
|
|
|
|
|
hangtype = "V"; typelen = 1; |
264
|
401
|
100
|
|
|
|
|
} else if (codeRange(Hangul_TIni, Hangul_TEnd) || |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
265
|
120
|
100
|
|
|
|
|
codeRange(HangulT2Ini, HangulT2Fin)) { |
266
|
142
|
|
|
|
|
|
hangtype = "T"; typelen = 1; |
267
|
|
|
|
|
|
|
} else { |
268
|
259
|
|
|
|
|
|
hangtype = ""; typelen = 0; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
|
|
272
|
1148
|
|
|
|
|
|
RETVAL = newSVpvn(hangtype, typelen); |
273
|
|
|
|
|
|
|
OUTPUT: |
274
|
|
|
|
|
|
|
RETVAL |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
void |
278
|
|
|
|
|
|
|
_derivCE_9 (code) |
279
|
|
|
|
|
|
|
UV code |
280
|
|
|
|
|
|
|
ALIAS: |
281
|
|
|
|
|
|
|
_derivCE_14 = 1 |
282
|
|
|
|
|
|
|
_derivCE_18 = 2 |
283
|
|
|
|
|
|
|
_derivCE_20 = 3 |
284
|
|
|
|
|
|
|
_derivCE_22 = 4 |
285
|
|
|
|
|
|
|
_derivCE_24 = 5 |
286
|
|
|
|
|
|
|
_derivCE_32 = 6 |
287
|
|
|
|
|
|
|
_derivCE_34 = 7 |
288
|
|
|
|
|
|
|
_derivCE_36 = 8 |
289
|
|
|
|
|
|
|
_derivCE_38 = 9 |
290
|
|
|
|
|
|
|
_derivCE_40 = 10 |
291
|
|
|
|
|
|
|
PREINIT: |
292
|
|
|
|
|
|
|
UV base, aaaa, bbbb; |
293
|
11423
|
|
|
|
|
|
U8 a[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00"; |
294
|
11423
|
|
|
|
|
|
U8 b[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00"; |
295
|
11423
|
|
|
|
|
|
bool basic_unified = 0, tangut = 0, nushu = 0; |
296
|
|
|
|
|
|
|
PPCODE: |
297
|
11423
|
100
|
|
|
|
|
if (codeRange(CJK_UidIni, CJK_CompFin)) { |
|
|
100
|
|
|
|
|
|
298
|
11118
|
100
|
|
|
|
|
if (codeRange(CJK_CompIni, CJK_CompFin)) |
|
|
50
|
|
|
|
|
|
299
|
748
|
|
|
|
|
|
basic_unified = (bool)UnifiedCompat[code - CJK_CompIni]; |
300
|
|
|
|
|
|
|
else |
301
|
4811
|
100
|
|
|
|
|
basic_unified = (ix >= 9 ? (code <= CJK_UidF110) : |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
302
|
|
|
|
|
|
|
ix >= 8 ? (code <= CJK_UidF100) : |
303
|
|
|
|
|
|
|
ix >= 6 ? (code <= CJK_UidF80) : |
304
|
|
|
|
|
|
|
ix == 5 ? (code <= CJK_UidF61) : |
305
|
|
|
|
|
|
|
ix >= 3 ? (code <= CJK_UidF52) : |
306
|
|
|
|
|
|
|
ix == 2 ? (code <= CJK_UidF51) : |
307
|
|
|
|
|
|
|
ix == 1 ? (code <= CJK_UidF41) : |
308
|
|
|
|
|
|
|
(code <= CJK_UidFin)); |
309
|
|
|
|
|
|
|
} else { |
310
|
5864
|
100
|
|
|
|
|
if (ix >= 7) { |
311
|
6304
|
100
|
|
|
|
|
tangut = (ix >= 10) ? (codeRange(TangIdeoIni, TangIdeo120) || |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
312
|
6969
|
100
|
|
|
|
|
codeRange(TangCompIni, TangCompFin)) : |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
313
|
289
|
100
|
|
|
|
|
(ix >= 9) ? (codeRange(TangIdeoIni, TangIdeo110) || |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
314
|
71
|
100
|
|
|
|
|
codeRange(TangCompIni, TangCompFin)) : |
315
|
598
|
100
|
|
|
|
|
(codeRange(TangIdeoIni, TangIdeoFin) || |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
316
|
151
|
100
|
|
|
|
|
codeRange(TangCompIni, TangCompFin)); |
317
|
|
|
|
|
|
|
} |
318
|
5864
|
100
|
|
|
|
|
if (ix >= 8) |
319
|
2682
|
100
|
|
|
|
|
nushu = (codeRange(NushuIni, NushuFin)); |
|
|
100
|
|
|
|
|
|
320
|
|
|
|
|
|
|
} |
321
|
18631
|
100
|
|
|
|
|
base = tangut |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
322
|
|
|
|
|
|
|
? 0xFB00 : |
323
|
|
|
|
|
|
|
nushu |
324
|
|
|
|
|
|
|
? 0xFB01 : |
325
|
|
|
|
|
|
|
basic_unified |
326
|
|
|
|
|
|
|
? 0xFB40 : /* CJK */ |
327
|
5411
|
100
|
|
|
|
|
((codeRange(CJK_ExtAIni, CJK_ExtAFin)) |
328
|
6300
|
100
|
|
|
|
|
|| |
329
|
1837
|
100
|
|
|
|
|
(codeRange(CJK_ExtBIni, CJK_ExtBFin)) |
330
|
5209
|
100
|
|
|
|
|
|| |
331
|
3453
|
100
|
|
|
|
|
(ix >= 3 && codeRange(CJK_ExtCIni, CJK_ExtCFin)) |
|
|
100
|
|
|
|
|
|
332
|
5161
|
100
|
|
|
|
|
|| |
333
|
3104
|
100
|
|
|
|
|
(ix >= 4 && codeRange(CJK_ExtDIni, CJK_ExtDFin)) |
|
|
100
|
|
|
|
|
|
334
|
5117
|
100
|
|
|
|
|
|| |
335
|
1506
|
100
|
|
|
|
|
(ix >= 6 && codeRange(CJK_ExtEIni, CJK_ExtEFin)) |
|
|
100
|
|
|
|
|
|
336
|
5093
|
100
|
|
|
|
|
|| |
337
|
998
|
100
|
|
|
|
|
(ix >= 8 && codeRange(CJK_ExtFIni, CJK_ExtFFin))) |
|
|
100
|
|
|
|
|
|
338
|
|
|
|
|
|
|
? 0xFB80 /* CJK ext. */ |
339
|
|
|
|
|
|
|
: 0xFBC0; /* others */ |
340
|
11423
|
100
|
|
|
|
|
aaaa = tangut || nushu ? base : base + (code >> 15); |
|
|
100
|
|
|
|
|
|
341
|
11594
|
|
|
|
|
|
bbbb = (tangut ? (code - TangIdeoIni) : |
342
|
11423
|
100
|
|
|
|
|
nushu ? (code - NushuIni) : (code & 0x7FFF)) | 0x8000; |
|
|
100
|
|
|
|
|
|
343
|
11423
|
|
|
|
|
|
a[1] = (U8)(aaaa >> 8); |
344
|
11423
|
|
|
|
|
|
a[2] = (U8)(aaaa & 0xFF); |
345
|
11423
|
|
|
|
|
|
b[1] = (U8)(bbbb >> 8); |
346
|
11423
|
|
|
|
|
|
b[2] = (U8)(bbbb & 0xFF); |
347
|
11423
|
|
|
|
|
|
a[4] = (U8)(0x20); /* second octet of level 2 */ |
348
|
11423
|
|
|
|
|
|
a[6] = (U8)(0x02); /* second octet of level 3 */ |
349
|
11423
|
|
|
|
|
|
a[7] = b[7] = (U8)(code >> 8); |
350
|
11423
|
|
|
|
|
|
a[8] = b[8] = (U8)(code & 0xFF); |
351
|
11423
|
50
|
|
|
|
|
EXTEND(SP, 2); |
352
|
11423
|
|
|
|
|
|
PUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length))); |
353
|
11423
|
|
|
|
|
|
PUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length))); |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
void |
357
|
|
|
|
|
|
|
_derivCE_8 (code) |
358
|
|
|
|
|
|
|
UV code |
359
|
|
|
|
|
|
|
PREINIT: |
360
|
|
|
|
|
|
|
UV aaaa, bbbb; |
361
|
451
|
|
|
|
|
|
U8 a[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00"; |
362
|
451
|
|
|
|
|
|
U8 b[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00"; |
363
|
|
|
|
|
|
|
PPCODE: |
364
|
451
|
|
|
|
|
|
aaaa = 0xFF80 + (code >> 15); |
365
|
451
|
|
|
|
|
|
bbbb = (code & 0x7FFF) | 0x8000; |
366
|
451
|
|
|
|
|
|
a[1] = (U8)(aaaa >> 8); |
367
|
451
|
|
|
|
|
|
a[2] = (U8)(aaaa & 0xFF); |
368
|
451
|
|
|
|
|
|
b[1] = (U8)(bbbb >> 8); |
369
|
451
|
|
|
|
|
|
b[2] = (U8)(bbbb & 0xFF); |
370
|
451
|
|
|
|
|
|
a[4] = (U8)(0x02); /* second octet of level 2 */ |
371
|
451
|
|
|
|
|
|
a[6] = (U8)(0x01); /* second octet of level 3 */ |
372
|
451
|
|
|
|
|
|
a[7] = b[7] = (U8)(code >> 8); |
373
|
451
|
|
|
|
|
|
a[8] = b[8] = (U8)(code & 0xFF); |
374
|
451
|
50
|
|
|
|
|
EXTEND(SP, 2); |
375
|
451
|
|
|
|
|
|
PUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length))); |
376
|
451
|
|
|
|
|
|
PUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length))); |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
void |
380
|
|
|
|
|
|
|
_uideoCE_8 (code) |
381
|
|
|
|
|
|
|
UV code |
382
|
|
|
|
|
|
|
PREINIT: |
383
|
86
|
|
|
|
|
|
U8 uice[VCE_Length + 1] = "\x00\x00\x00\x00\x00\x00\x00\x00\x00"; |
384
|
|
|
|
|
|
|
PPCODE: |
385
|
86
|
|
|
|
|
|
uice[1] = uice[7] = (U8)(code >> 8); |
386
|
86
|
|
|
|
|
|
uice[2] = uice[8] = (U8)(code & 0xFF); |
387
|
86
|
|
|
|
|
|
uice[4] = (U8)(0x20); /* second octet of level 2 */ |
388
|
86
|
|
|
|
|
|
uice[6] = (U8)(0x02); /* second octet of level 3 */ |
389
|
86
|
|
|
|
|
|
PUSHs(sv_2mortal(newSVpvn((char *) uice, VCE_Length))); |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
SV* |
393
|
|
|
|
|
|
|
_isUIdeo (code, uca_vers) |
394
|
|
|
|
|
|
|
UV code; |
395
|
|
|
|
|
|
|
IV uca_vers; |
396
|
|
|
|
|
|
|
bool basic_unified = 0; |
397
|
|
|
|
|
|
|
CODE: |
398
|
|
|
|
|
|
|
/* uca_vers = 0 for _uideoCE_8() */ |
399
|
13795
|
100
|
|
|
|
|
if (CJK_UidIni <= code) { |
400
|
12643
|
100
|
|
|
|
|
if (codeRange(CJK_CompIni, CJK_CompFin)) |
|
|
100
|
|
|
|
|
|
401
|
548
|
|
|
|
|
|
basic_unified = (bool)UnifiedCompat[code - CJK_CompIni]; |
402
|
|
|
|
|
|
|
else |
403
|
12095
|
100
|
|
|
|
|
basic_unified = (uca_vers >= 38 ? (code <= CJK_UidF110) : |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
404
|
|
|
|
|
|
|
uca_vers >= 36 ? (code <= CJK_UidF100) : |
405
|
|
|
|
|
|
|
uca_vers >= 32 ? (code <= CJK_UidF80) : |
406
|
|
|
|
|
|
|
uca_vers >= 24 ? (code <= CJK_UidF61) : |
407
|
|
|
|
|
|
|
uca_vers >= 20 ? (code <= CJK_UidF52) : |
408
|
|
|
|
|
|
|
uca_vers >= 18 ? (code <= CJK_UidF51) : |
409
|
|
|
|
|
|
|
uca_vers >= 14 ? (code <= CJK_UidF41) : |
410
|
|
|
|
|
|
|
(code <= CJK_UidFin)); |
411
|
|
|
|
|
|
|
} |
412
|
13795
|
100
|
|
|
|
|
RETVAL = boolSV( |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
413
|
|
|
|
|
|
|
(basic_unified) |
414
|
|
|
|
|
|
|
|| |
415
|
|
|
|
|
|
|
(codeRange(CJK_ExtAIni, CJK_ExtAFin)) |
416
|
|
|
|
|
|
|
|| |
417
|
|
|
|
|
|
|
(uca_vers >= 8 && codeRange(CJK_ExtBIni, CJK_ExtBFin)) |
418
|
|
|
|
|
|
|
|| |
419
|
|
|
|
|
|
|
(uca_vers >= 20 && codeRange(CJK_ExtCIni, CJK_ExtCFin)) |
420
|
|
|
|
|
|
|
|| |
421
|
|
|
|
|
|
|
(uca_vers >= 22 && codeRange(CJK_ExtDIni, CJK_ExtDFin)) |
422
|
|
|
|
|
|
|
|| |
423
|
|
|
|
|
|
|
(uca_vers >= 32 && codeRange(CJK_ExtEIni, CJK_ExtEFin)) |
424
|
|
|
|
|
|
|
|| |
425
|
|
|
|
|
|
|
(uca_vers >= 36 && codeRange(CJK_ExtFIni, CJK_ExtFFin)) |
426
|
|
|
|
|
|
|
); |
427
|
|
|
|
|
|
|
OUTPUT: |
428
|
|
|
|
|
|
|
RETVAL |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
SV* |
432
|
|
|
|
|
|
|
mk_SortKey (self, buf) |
433
|
|
|
|
|
|
|
SV* self; |
434
|
|
|
|
|
|
|
SV* buf; |
435
|
|
|
|
|
|
|
PREINIT: |
436
|
|
|
|
|
|
|
SV *dst, **svp; |
437
|
|
|
|
|
|
|
STRLEN dlen, vlen; |
438
|
|
|
|
|
|
|
U8 *d, *p, *e, *v, *s[MaxLevel], *eachlevel[MaxLevel]; |
439
|
|
|
|
|
|
|
AV *bufAV; |
440
|
|
|
|
|
|
|
HV *selfHV; |
441
|
|
|
|
|
|
|
UV back_flag; |
442
|
|
|
|
|
|
|
I32 i, buf_len; |
443
|
|
|
|
|
|
|
IV lv, level, uca_vers; |
444
|
|
|
|
|
|
|
bool upper_lower, kata_hira, v2i, last_is_var; |
445
|
|
|
|
|
|
|
CODE: |
446
|
57449
|
50
|
|
|
|
|
if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV) |
|
|
50
|
|
|
|
|
|
447
|
57449
|
|
|
|
|
|
selfHV = (HV*)SvRV(self); |
448
|
|
|
|
|
|
|
else |
449
|
0
|
|
|
|
|
|
croak("$self is not a HASHREF."); |
450
|
|
|
|
|
|
|
|
451
|
57449
|
50
|
|
|
|
|
if (SvROK(buf) && SvTYPE(SvRV(buf)) == SVt_PVAV) |
|
|
50
|
|
|
|
|
|
452
|
57449
|
|
|
|
|
|
bufAV = (AV*)SvRV(buf); |
453
|
|
|
|
|
|
|
else |
454
|
0
|
|
|
|
|
|
croak("XSUB, not an ARRAYREF."); |
455
|
|
|
|
|
|
|
|
456
|
57449
|
|
|
|
|
|
buf_len = av_len(bufAV); |
457
|
|
|
|
|
|
|
|
458
|
57449
|
100
|
|
|
|
|
if (buf_len < 0) { /* empty: -1 */ |
459
|
1969
|
|
|
|
|
|
dlen = 2 * (MaxLevel - 1); |
460
|
1969
|
|
|
|
|
|
dst = newSV(dlen); |
461
|
1969
|
|
|
|
|
|
(void)SvPOK_only(dst); |
462
|
1969
|
|
|
|
|
|
d = (U8*)SvPVX(dst); |
463
|
13783
|
100
|
|
|
|
|
while (dlen--) |
464
|
11814
|
|
|
|
|
|
*d++ = '\0'; |
465
|
|
|
|
|
|
|
} else { |
466
|
55480
|
|
|
|
|
|
svp = hv_fetch(selfHV, "level", 5, FALSE); |
467
|
55480
|
50
|
|
|
|
|
level = svp ? SvIV(*svp) : MaxLevel; |
|
|
50
|
|
|
|
|
|
468
|
|
|
|
|
|
|
|
469
|
211187
|
100
|
|
|
|
|
for (lv = 0; lv < level; lv++) { |
470
|
155707
|
|
|
|
|
|
New(0, eachlevel[lv], 2 * (1 + buf_len) + 1, U8); |
471
|
155707
|
|
|
|
|
|
s[lv] = eachlevel[lv]; |
472
|
|
|
|
|
|
|
} |
473
|
|
|
|
|
|
|
|
474
|
55480
|
|
|
|
|
|
svp = hv_fetch(selfHV, "upper_before_lower", 18, FALSE); |
475
|
55480
|
100
|
|
|
|
|
upper_lower = svp ? SvTRUE(*svp) : FALSE; |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
476
|
55480
|
|
|
|
|
|
svp = hv_fetch(selfHV, "katakana_before_hiragana", 24, FALSE); |
477
|
55480
|
100
|
|
|
|
|
kata_hira = svp ? SvTRUE(*svp) : FALSE; |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
478
|
55480
|
|
|
|
|
|
svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE); |
479
|
55480
|
50
|
|
|
|
|
uca_vers = SvIV(*svp); |
480
|
55480
|
|
|
|
|
|
svp = hv_fetch(selfHV, "variable", 8, FALSE); |
481
|
54927
|
50
|
|
|
|
|
v2i = uca_vers >= 9 && svp /* (vers >= 9) and not (non-ignorable) */ |
482
|
2548
|
100
|
|
|
|
|
? !(SvCUR(*svp) == 13 && memEQ(SvPVX(*svp), "non-ignorable", 13)) |
483
|
112955
|
100
|
|
|
|
|
: FALSE; |
|
|
100
|
|
|
|
|
|
484
|
|
|
|
|
|
|
|
485
|
55480
|
|
|
|
|
|
last_is_var = FALSE; |
486
|
153071
|
100
|
|
|
|
|
for (i = 0; i <= buf_len; i++) { |
487
|
97591
|
|
|
|
|
|
svp = av_fetch(bufAV, i, FALSE); |
488
|
|
|
|
|
|
|
|
489
|
97591
|
50
|
|
|
|
|
if (svp && SvPOK(*svp)) |
|
|
50
|
|
|
|
|
|
490
|
97591
|
50
|
|
|
|
|
v = (U8*)SvPV(*svp, vlen); |
491
|
|
|
|
|
|
|
else |
492
|
0
|
|
|
|
|
|
croak("not a vwt."); |
493
|
|
|
|
|
|
|
|
494
|
97591
|
50
|
|
|
|
|
if (vlen < VCE_Length) /* ignore short VCE (unexpected) */ |
495
|
0
|
|
|
|
|
|
continue; |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
/* "Ignorable (L1, L2) after Variable" since track. v. 9 */ |
498
|
97591
|
100
|
|
|
|
|
if (v2i) { |
499
|
90711
|
100
|
|
|
|
|
if (*v) |
500
|
1774
|
|
|
|
|
|
last_is_var = TRUE; |
501
|
88937
|
100
|
|
|
|
|
else if (v[1] || v[2]) /* non zero primary weight */ |
|
|
100
|
|
|
|
|
|
502
|
81638
|
|
|
|
|
|
last_is_var = FALSE; |
503
|
7299
|
100
|
|
|
|
|
else if (last_is_var) /* zero primary weight; skipped */ |
504
|
43
|
|
|
|
|
|
continue; |
505
|
|
|
|
|
|
|
} |
506
|
|
|
|
|
|
|
|
507
|
97548
|
50
|
|
|
|
|
if (v[5] == 0) { /* tert wt < 256 */ |
508
|
97548
|
100
|
|
|
|
|
if (upper_lower) { |
509
|
4279
|
100
|
|
|
|
|
if (0x8 <= v[6] && v[6] <= 0xC) /* lower */ |
|
|
100
|
|
|
|
|
|
510
|
571
|
|
|
|
|
|
v[6] -= 6; |
511
|
3708
|
100
|
|
|
|
|
else if (0x2 <= v[6] && v[6] <= 0x6) /* upper */ |
|
|
100
|
|
|
|
|
|
512
|
2102
|
|
|
|
|
|
v[6] += 6; |
513
|
1606
|
100
|
|
|
|
|
else if (v[6] == 0x1C) /* square upper */ |
514
|
57
|
|
|
|
|
|
v[6]++; |
515
|
1549
|
100
|
|
|
|
|
else if (v[6] == 0x1D) /* square lower */ |
516
|
61
|
|
|
|
|
|
v[6]--; |
517
|
|
|
|
|
|
|
} |
518
|
97548
|
100
|
|
|
|
|
if (kata_hira) { |
519
|
378
|
100
|
|
|
|
|
if (0x0F <= v[6] && v[6] <= 0x13) /* katakana */ |
|
|
50
|
|
|
|
|
|
520
|
202
|
|
|
|
|
|
v[6] -= 2; |
521
|
176
|
100
|
|
|
|
|
else if (0xD <= v[6] && v[6] <= 0xE) /* hiragana */ |
|
|
50
|
|
|
|
|
|
522
|
30
|
|
|
|
|
|
v[6] += 5; |
523
|
|
|
|
|
|
|
} |
524
|
|
|
|
|
|
|
} |
525
|
|
|
|
|
|
|
|
526
|
379188
|
100
|
|
|
|
|
for (lv = 0; lv < level; lv++) { |
527
|
281640
|
100
|
|
|
|
|
if (v[2 * lv + 1] || v[2 * lv + 2]) { |
|
|
100
|
|
|
|
|
|
528
|
233277
|
|
|
|
|
|
*s[lv]++ = v[2 * lv + 1]; |
529
|
233277
|
|
|
|
|
|
*s[lv]++ = v[2 * lv + 2]; |
530
|
|
|
|
|
|
|
} |
531
|
|
|
|
|
|
|
} |
532
|
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
|
|
534
|
55480
|
|
|
|
|
|
dlen = 2 * (MaxLevel - 1); |
535
|
211187
|
100
|
|
|
|
|
for (lv = 0; lv < level; lv++) |
536
|
155707
|
|
|
|
|
|
dlen += s[lv] - eachlevel[lv]; |
537
|
|
|
|
|
|
|
|
538
|
55480
|
|
|
|
|
|
dst = newSV(dlen); |
539
|
55480
|
|
|
|
|
|
(void)SvPOK_only(dst); |
540
|
55480
|
|
|
|
|
|
d = (U8*)SvPVX(dst); |
541
|
|
|
|
|
|
|
|
542
|
55480
|
|
|
|
|
|
svp = hv_fetch(selfHV, "backwardsFlag", 13, FALSE); |
543
|
55480
|
50
|
|
|
|
|
back_flag = svp ? SvUV(*svp) : (UV)0; |
|
|
50
|
|
|
|
|
|
544
|
|
|
|
|
|
|
|
545
|
211187
|
100
|
|
|
|
|
for (lv = 0; lv < level; lv++) { |
546
|
155707
|
100
|
|
|
|
|
if (back_flag & (1 << (lv + 1))) { |
547
|
381
|
|
|
|
|
|
p = s[lv]; |
548
|
381
|
|
|
|
|
|
e = eachlevel[lv]; |
549
|
2520
|
100
|
|
|
|
|
for ( ; e < p; p -= 2) { |
550
|
2139
|
|
|
|
|
|
*d++ = p[-2]; |
551
|
2139
|
|
|
|
|
|
*d++ = p[-1]; |
552
|
|
|
|
|
|
|
} |
553
|
|
|
|
|
|
|
} |
554
|
|
|
|
|
|
|
else { |
555
|
155326
|
|
|
|
|
|
p = eachlevel[lv]; |
556
|
155326
|
|
|
|
|
|
e = s[lv]; |
557
|
617602
|
100
|
|
|
|
|
while (p < e) |
558
|
462276
|
|
|
|
|
|
*d++ = *p++; |
559
|
|
|
|
|
|
|
} |
560
|
155707
|
100
|
|
|
|
|
if (lv + 1 < MaxLevel) { /* lv + 1 == real level */ |
561
|
144990
|
|
|
|
|
|
*d++ = '\0'; |
562
|
144990
|
|
|
|
|
|
*d++ = '\0'; |
563
|
|
|
|
|
|
|
} |
564
|
|
|
|
|
|
|
} |
565
|
|
|
|
|
|
|
|
566
|
121693
|
100
|
|
|
|
|
for (lv = level; lv < MaxLevel; lv++) { |
567
|
66213
|
100
|
|
|
|
|
if (lv + 1 < MaxLevel) { /* lv + 1 == real level */ |
568
|
21450
|
|
|
|
|
|
*d++ = '\0'; |
569
|
21450
|
|
|
|
|
|
*d++ = '\0'; |
570
|
|
|
|
|
|
|
} |
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
|
573
|
211187
|
100
|
|
|
|
|
for (lv = 0; lv < level; lv++) { |
574
|
155707
|
|
|
|
|
|
Safefree(eachlevel[lv]); |
575
|
|
|
|
|
|
|
} |
576
|
|
|
|
|
|
|
} |
577
|
57449
|
|
|
|
|
|
*d = '\0'; |
578
|
57449
|
|
|
|
|
|
SvCUR_set(dst, d - (U8*)SvPVX(dst)); |
579
|
57449
|
|
|
|
|
|
RETVAL = dst; |
580
|
|
|
|
|
|
|
OUTPUT: |
581
|
|
|
|
|
|
|
RETVAL |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
SV* |
585
|
|
|
|
|
|
|
varCE (self, vce) |
586
|
|
|
|
|
|
|
SV* self; |
587
|
|
|
|
|
|
|
SV* vce; |
588
|
|
|
|
|
|
|
PREINIT: |
589
|
|
|
|
|
|
|
SV *dst, *vbl, **svp; |
590
|
|
|
|
|
|
|
HV *selfHV; |
591
|
|
|
|
|
|
|
U8 *a, *v, *d; |
592
|
|
|
|
|
|
|
STRLEN alen, vlen; |
593
|
|
|
|
|
|
|
bool ig_l2; |
594
|
|
|
|
|
|
|
IV uca_vers; |
595
|
|
|
|
|
|
|
UV totwt; |
596
|
|
|
|
|
|
|
CODE: |
597
|
99816
|
50
|
|
|
|
|
if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV) |
|
|
50
|
|
|
|
|
|
598
|
99816
|
|
|
|
|
|
selfHV = (HV*)SvRV(self); |
599
|
|
|
|
|
|
|
else |
600
|
0
|
|
|
|
|
|
croak("$self is not a HASHREF."); |
601
|
|
|
|
|
|
|
|
602
|
99816
|
|
|
|
|
|
svp = hv_fetch(selfHV, "ignore_level2", 13, FALSE); |
603
|
99816
|
100
|
|
|
|
|
ig_l2 = svp ? SvTRUE(*svp) : FALSE; |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
604
|
|
|
|
|
|
|
|
605
|
99816
|
|
|
|
|
|
svp = hv_fetch(selfHV, "variable", 8, FALSE); |
606
|
99816
|
50
|
|
|
|
|
vbl = svp ? *svp : &PL_sv_no; |
607
|
99816
|
50
|
|
|
|
|
a = (U8*)SvPV(vbl, alen); |
608
|
99816
|
50
|
|
|
|
|
v = (U8*)SvPV(vce, vlen); |
609
|
|
|
|
|
|
|
|
610
|
99816
|
|
|
|
|
|
dst = newSV(vlen); |
611
|
99816
|
|
|
|
|
|
d = (U8*)SvPVX(dst); |
612
|
99816
|
|
|
|
|
|
(void)SvPOK_only(dst); |
613
|
99816
|
|
|
|
|
|
Copy(v, d, vlen, U8); |
614
|
99816
|
|
|
|
|
|
SvCUR_set(dst, vlen); |
615
|
99816
|
|
|
|
|
|
d[vlen] = '\0'; |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
/* primary weight == 0 && secondary weight != 0 */ |
618
|
99816
|
100
|
|
|
|
|
if (ig_l2 && !d[1] && !d[2] && (d[3] || d[4])) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
619
|
21
|
|
|
|
|
|
d[3] = d[4] = d[5] = d[6] = '\0'; |
620
|
|
|
|
|
|
|
} |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
/* variable: checked only the first char and the length, |
623
|
|
|
|
|
|
|
trusting checkCollator() and %VariableOK in Perl ... */ |
624
|
|
|
|
|
|
|
|
625
|
99816
|
50
|
|
|
|
|
if (vlen >= VCE_Length && *a != 'n') { |
|
|
100
|
|
|
|
|
|
626
|
94077
|
100
|
|
|
|
|
if (*v) { |
627
|
2121
|
100
|
|
|
|
|
if (*a == 's') { /* shifted or shift-trimmed */ |
628
|
2062
|
|
|
|
|
|
d[7] = d[1]; /* wt level 1 to 4 */ |
629
|
2062
|
|
|
|
|
|
d[8] = d[2]; |
630
|
|
|
|
|
|
|
} /* else blanked */ |
631
|
2121
|
|
|
|
|
|
d[1] = d[2] = d[3] = d[4] = d[5] = d[6] = '\0'; |
632
|
91956
|
100
|
|
|
|
|
} else if (*a == 's') { /* shifted or shift-trimmed */ |
633
|
91740
|
|
|
|
|
|
totwt = d[1] + d[2] + d[3] + d[4] + d[5] + d[6]; |
634
|
91740
|
100
|
|
|
|
|
if (alen == 7 && totwt != 0) { /* shifted */ |
|
|
100
|
|
|
|
|
|
635
|
183050
|
100
|
|
|
|
|
if (d[1] == 0 && d[2] == 1) { /* XXX: CollationAuxiliary-6.2.0 */ |
|
|
100
|
|
|
|
|
|
636
|
66
|
|
|
|
|
|
d[7] = d[1]; /* wt level 1 to 4 */ |
637
|
66
|
|
|
|
|
|
d[8] = d[2]; |
638
|
|
|
|
|
|
|
} else { |
639
|
91459
|
|
|
|
|
|
svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE); |
640
|
91459
|
50
|
|
|
|
|
if (!svp) |
641
|
0
|
|
|
|
|
|
croak("Panic: no $self->{UCA_Version} in varCE"); |
642
|
91459
|
50
|
|
|
|
|
uca_vers = SvIV(*svp); |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
/* completely ignorable or the second derived CE */ |
645
|
91459
|
100
|
|
|
|
|
if (uca_vers >= 36 && d[3] + d[4] + d[5] + d[6] == 0) { |
|
|
100
|
|
|
|
|
|
646
|
11014
|
|
|
|
|
|
d[7] = d[8] = '\0'; |
647
|
|
|
|
|
|
|
} else { |
648
|
80445
|
|
|
|
|
|
d[7] = (U8)(Shift4Wt >> 8); |
649
|
80445
|
|
|
|
|
|
d[8] = (U8)(Shift4Wt & 0xFF); |
650
|
|
|
|
|
|
|
} |
651
|
|
|
|
|
|
|
} |
652
|
|
|
|
|
|
|
} else { /* shift-trimmed or completely ignorable */ |
653
|
215
|
|
|
|
|
|
d[7] = d[8] = '\0'; |
654
|
|
|
|
|
|
|
} |
655
|
|
|
|
|
|
|
} /* else blanked */ |
656
|
|
|
|
|
|
|
} /* else non-ignorable */ |
657
|
99816
|
|
|
|
|
|
RETVAL = dst; |
658
|
|
|
|
|
|
|
OUTPUT: |
659
|
|
|
|
|
|
|
RETVAL |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
SV* |
664
|
|
|
|
|
|
|
visualizeSortKey (self, key) |
665
|
|
|
|
|
|
|
SV * self |
666
|
|
|
|
|
|
|
SV * key |
667
|
|
|
|
|
|
|
PREINIT: |
668
|
|
|
|
|
|
|
HV *selfHV; |
669
|
|
|
|
|
|
|
SV **svp, *dst; |
670
|
|
|
|
|
|
|
U8 *s, *e, *d; |
671
|
|
|
|
|
|
|
STRLEN klen, dlen; |
672
|
|
|
|
|
|
|
UV uv; |
673
|
246
|
|
|
|
|
|
IV uca_vers, sep = 0; |
674
|
246
|
|
|
|
|
|
const char *upperhex = "0123456789ABCDEF"; |
675
|
|
|
|
|
|
|
CODE: |
676
|
246
|
50
|
|
|
|
|
if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV) |
|
|
50
|
|
|
|
|
|
677
|
246
|
|
|
|
|
|
selfHV = (HV*)SvRV(self); |
678
|
|
|
|
|
|
|
else |
679
|
0
|
|
|
|
|
|
croak("$self is not a HASHREF."); |
680
|
|
|
|
|
|
|
|
681
|
246
|
|
|
|
|
|
svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE); |
682
|
246
|
50
|
|
|
|
|
if (!svp) |
683
|
0
|
|
|
|
|
|
croak("Panic: no $self->{UCA_Version} in visualizeSortKey"); |
684
|
246
|
50
|
|
|
|
|
uca_vers = SvIV(*svp); |
685
|
|
|
|
|
|
|
|
686
|
246
|
50
|
|
|
|
|
s = (U8*)SvPV(key, klen); |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
/* slightly *longer* than the need, but I'm afraid of miscounting; |
689
|
|
|
|
|
|
|
= (klen / 2) * 5 - 1 |
690
|
|
|
|
|
|
|
# FFFF and ' ' for each 16bit units but ' ' is less by 1; |
691
|
|
|
|
|
|
|
# ' ' and '|' for level boundaries including the identical level |
692
|
|
|
|
|
|
|
+ 2 # '[' and ']' |
693
|
|
|
|
|
|
|
+ 1 # '\0' |
694
|
|
|
|
|
|
|
(a) if klen is odd (not expected), maybe more 5 bytes. |
695
|
246
|
|
|
|
|
|
(b) there is not always the identical level. |
696
|
246
|
|
|
|
|
|
*/ |
697
|
246
|
|
|
|
|
|
dlen = (klen / 2) * 5 + MaxLevel * 2 + 2; |
698
|
246
|
|
|
|
|
|
dst = newSV(dlen); |
699
|
|
|
|
|
|
|
(void)SvPOK_only(dst); |
700
|
246
|
|
|
|
|
|
d = (U8*)SvPVX(dst); |
701
|
2584
|
100
|
|
|
|
|
|
702
|
2338
|
|
|
|
|
|
*d++ = '['; |
703
|
2338
|
100
|
|
|
|
|
for (e = s + klen; s < e; s += 2) { |
|
|
100
|
|
|
|
|
|
704
|
1533
|
100
|
|
|
|
|
uv = (U16)(*s << 8 | s[1]); |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
705
|
1242
|
|
|
|
|
|
if (uv || sep >= MaxLevel) { |
706
|
1533
|
|
|
|
|
|
if ((d[-1] != '[') && ((9 <= uca_vers) || (d[-1] != '|'))) |
707
|
1533
|
|
|
|
|
|
*d++ = ' '; |
708
|
1533
|
|
|
|
|
|
*d++ = upperhex[ (s[0] >> 4) & 0xF ]; |
709
|
1533
|
|
|
|
|
|
*d++ = upperhex[ s[0] & 0xF ]; |
710
|
|
|
|
|
|
|
*d++ = upperhex[ (s[1] >> 4) & 0xF ]; |
711
|
805
|
100
|
|
|
|
|
*d++ = upperhex[ s[1] & 0xF ]; |
|
|
100
|
|
|
|
|
|
712
|
705
|
|
|
|
|
|
} else { |
713
|
805
|
|
|
|
|
|
if ((9 <= uca_vers) && (d[-1] != '[')) |
714
|
805
|
|
|
|
|
|
*d++ = ' '; |
715
|
|
|
|
|
|
|
*d++ = '|'; |
716
|
|
|
|
|
|
|
++sep; |
717
|
246
|
|
|
|
|
|
} |
718
|
246
|
|
|
|
|
|
} |
719
|
246
|
|
|
|
|
|
*d++ = ']'; |
720
|
246
|
|
|
|
|
|
*d = '\0'; |
721
|
|
|
|
|
|
|
SvCUR_set(dst, d - (U8*)SvPVX(dst)); |
722
|
|
|
|
|
|
|
RETVAL = dst; |
723
|
|
|
|
|
|
|
OUTPUT: |
724
|
|
|
|
|
|
|
RETVAL |