line |
stmt |
bran |
cond |
sub |
time |
code |
1
|
|
|
|
|
|
|
2
|
|
|
|
|
|
#define PERL_NO_GET_CONTEXT /* we want efficiency */ |
3
|
|
|
|
|
|
|
4
|
|
|
|
|
|
/* I guese no private function needs pTHX_ and aTHX_ */ |
5
|
|
|
|
|
|
|
6
|
|
|
|
|
|
#include "EXTERN.h" |
7
|
|
|
|
|
|
#include "perl.h" |
8
|
|
|
|
|
|
#include "XSUB.h" |
9
|
|
|
|
|
|
|
10
|
|
|
|
|
|
/* This file is prepared by mkheader */ |
11
|
|
|
|
|
|
#include "ucatbl.h" |
12
|
|
|
|
|
|
|
13
|
|
|
|
|
|
/* Perl 5.6.1 ? */ |
14
|
|
|
|
|
|
#ifndef utf8n_to_uvuni |
15
|
|
|
|
|
|
#define utf8n_to_uvuni utf8_to_uv |
16
|
|
|
|
|
|
#endif /* utf8n_to_uvuni */ |
17
|
|
|
|
|
|
|
18
|
|
|
|
|
|
/* UTF8_ALLOW_BOM is used before Perl 5.8.0 */ |
19
|
|
|
|
|
|
#ifndef UTF8_ALLOW_BOM |
20
|
|
|
|
|
|
#define UTF8_ALLOW_BOM (0) |
21
|
|
|
|
|
|
#endif /* UTF8_ALLOW_BOM */ |
22
|
|
|
|
|
|
|
23
|
|
|
|
|
|
#ifndef UTF8_ALLOW_SURROGATE |
24
|
|
|
|
|
|
#define UTF8_ALLOW_SURROGATE (0) |
25
|
|
|
|
|
|
#endif /* UTF8_ALLOW_SURROGATE */ |
26
|
|
|
|
|
|
|
27
|
|
|
|
|
|
#ifndef UTF8_ALLOW_FE_FF |
28
|
|
|
|
|
|
#define UTF8_ALLOW_FE_FF (0) |
29
|
|
|
|
|
|
#endif /* UTF8_ALLOW_FE_FF */ |
30
|
|
|
|
|
|
|
31
|
|
|
|
|
|
#ifndef UTF8_ALLOW_FFFF |
32
|
|
|
|
|
|
#define UTF8_ALLOW_FFFF (0) |
33
|
|
|
|
|
|
#endif /* UTF8_ALLOW_FFFF */ |
34
|
|
|
|
|
|
|
35
|
|
|
|
|
|
#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|UTF8_ALLOW_FE_FF|UTF8_ALLOW_FFFF) |
36
|
|
|
|
|
|
|
37
|
|
|
|
|
|
/* if utf8n_to_uvuni() sets retlen to 0 (?) */ |
38
|
|
|
|
|
|
#define ErrRetlenIsZero "panic (Unicode::Collate): zero-length character" |
39
|
|
|
|
|
|
|
40
|
|
|
|
|
|
/* At present, char > 0x10ffff are unaffected without complaint, right? */ |
41
|
|
|
|
|
|
#define VALID_UTF_MAX (0x10ffff) |
42
|
|
|
|
|
|
#define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv)) |
43
|
|
|
|
|
|
|
44
|
|
|
|
|
|
static const UV max_div_16 = UV_MAX / 16; |
45
|
|
|
|
|
|
|
46
|
|
|
|
|
|
/* Supported Levels */ |
47
|
|
|
|
|
|
#define MinLevel (1) |
48
|
|
|
|
|
|
#define MaxLevel (4) |
49
|
|
|
|
|
|
|
50
|
|
|
|
|
|
/* Shifted weight at 4th level */ |
51
|
|
|
|
|
|
#define Shift4Wt (0xFFFF) |
52
|
|
|
|
|
|
|
53
|
|
|
|
|
|
#define VCE_Length (9) |
54
|
|
|
|
|
|
|
55
|
|
|
|
|
|
#define Hangul_SBase (0xAC00) |
56
|
|
|
|
|
|
#define Hangul_SIni (0xAC00) |
57
|
|
|
|
|
|
#define Hangul_SFin (0xD7A3) |
58
|
|
|
|
|
|
#define Hangul_NCount (588) |
59
|
|
|
|
|
|
#define Hangul_TCount (28) |
60
|
|
|
|
|
|
#define Hangul_LBase (0x1100) |
61
|
|
|
|
|
|
#define Hangul_LIni (0x1100) |
62
|
|
|
|
|
|
#define Hangul_LFin (0x1159) |
63
|
|
|
|
|
|
#define Hangul_LFill (0x115F) |
64
|
|
|
|
|
|
#define Hangul_LEnd (0x115F) /* Unicode 5.2 */ |
65
|
|
|
|
|
|
#define Hangul_VBase (0x1161) |
66
|
|
|
|
|
|
#define Hangul_VIni (0x1160) /* from Vowel Filler */ |
67
|
|
|
|
|
|
#define Hangul_VFin (0x11A2) |
68
|
|
|
|
|
|
#define Hangul_VEnd (0x11A7) /* Unicode 5.2 */ |
69
|
|
|
|
|
|
#define Hangul_TBase (0x11A7) /* from "no-final" codepoint */ |
70
|
|
|
|
|
|
#define Hangul_TIni (0x11A8) |
71
|
|
|
|
|
|
#define Hangul_TFin (0x11F9) |
72
|
|
|
|
|
|
#define Hangul_TEnd (0x11FF) /* Unicode 5.2 */ |
73
|
|
|
|
|
|
#define HangulL2Ini (0xA960) /* Unicode 5.2 */ |
74
|
|
|
|
|
|
#define HangulL2Fin (0xA97C) /* Unicode 5.2 */ |
75
|
|
|
|
|
|
#define HangulV2Ini (0xD7B0) /* Unicode 5.2 */ |
76
|
|
|
|
|
|
#define HangulV2Fin (0xD7C6) /* Unicode 5.2 */ |
77
|
|
|
|
|
|
#define HangulT2Ini (0xD7CB) /* Unicode 5.2 */ |
78
|
|
|
|
|
|
#define HangulT2Fin (0xD7FB) /* Unicode 5.2 */ |
79
|
|
|
|
|
|
|
80
|
|
|
|
|
|
#define CJK_UidIni (0x4E00) |
81
|
|
|
|
|
|
#define CJK_UidFin (0x9FA5) |
82
|
|
|
|
|
|
#define CJK_UidF41 (0x9FBB) |
83
|
|
|
|
|
|
#define CJK_UidF51 (0x9FC3) |
84
|
|
|
|
|
|
#define CJK_UidF52 (0x9FCB) |
85
|
|
|
|
|
|
#define CJK_UidF61 (0x9FCC) |
86
|
|
|
|
|
|
#define CJK_ExtAIni (0x3400) /* Unicode 3.0 */ |
87
|
|
|
|
|
|
#define CJK_ExtAFin (0x4DB5) /* Unicode 3.0 */ |
88
|
|
|
|
|
|
#define CJK_ExtBIni (0x20000) /* Unicode 3.1 */ |
89
|
|
|
|
|
|
#define CJK_ExtBFin (0x2A6D6) /* Unicode 3.1 */ |
90
|
|
|
|
|
|
#define CJK_ExtCIni (0x2A700) /* Unicode 5.2 */ |
91
|
|
|
|
|
|
#define CJK_ExtCFin (0x2B734) /* Unicode 5.2 */ |
92
|
|
|
|
|
|
#define CJK_ExtDIni (0x2B740) /* Unicode 6.0 */ |
93
|
|
|
|
|
|
#define CJK_ExtDFin (0x2B81D) /* Unicode 6.0 */ |
94
|
|
|
|
|
|
|
95
|
|
|
|
|
|
#define CJK_CompIni (0xFA0E) |
96
|
|
|
|
|
|
#define CJK_CompFin (0xFA29) |
97
|
|
|
|
|
|
static STDCHAR UnifiedCompat[] = { |
98
|
|
|
|
|
|
1,1,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,1,0,0,1,1,1 |
99
|
|
|
|
|
|
}; /* E F 0 1 2 3 4 5 6 7 8 9 A B C D E F 0 1 2 3 4 5 6 7 8 9 */ |
100
|
|
|
|
|
|
|
101
|
|
|
|
|
|
#define codeRange(bcode, ecode) ((bcode) <= code && code <= (ecode)) |
102
|
|
|
|
|
|
|
103
|
|
|
|
|
|
MODULE = Unicode::Collate PACKAGE = Unicode::Collate |
104
|
|
|
|
|
|
|
105
|
|
|
|
|
|
PROTOTYPES: DISABLE |
106
|
|
|
|
|
|
|
107
|
|
|
|
|
|
void |
108
|
|
|
|
|
|
_fetch_rest () |
109
|
|
|
|
|
|
PREINIT: |
110
|
|
|
|
|
|
char ** rest; |
111
|
|
|
|
|
|
PPCODE: |
112
|
167670
|
|
|
|
|
for (rest = UCA_rest; *rest; ++rest) { |
113
|
167440
|
|
|
|
|
XPUSHs(sv_2mortal(newSVpv((char *) *rest, 0))); |
114
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
117
|
|
|
|
|
|
void |
118
|
|
|
|
|
|
_fetch_simple (uv) |
119
|
|
|
|
|
|
UV uv |
120
|
|
|
|
|
|
PREINIT: |
121
|
|
|
|
|
|
U8 ***plane, **row; |
122
|
|
|
|
|
|
U8* result = NULL; |
123
|
|
|
|
|
|
PPCODE: |
124
|
32210
|
|
|
|
|
if (!OVER_UTF_MAX(uv)){ |
125
|
32210
|
|
|
|
|
plane = (U8***)UCA_simple[uv >> 16]; |
126
|
32210
|
|
|
|
|
if (plane) { |
127
|
32210
|
|
|
|
|
row = plane[(uv >> 8) & 0xff]; |
128
|
32210
|
|
|
|
|
result = row ? row[uv & 0xff] : NULL; |
129
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
} |
131
|
32210
|
|
|
|
|
if (result) { |
132
|
|
|
|
|
|
int i; |
133
|
32210
|
|
|
|
|
int num = (int)*result; |
134
|
32210
|
|
|
|
|
++result; |
135
|
75232
|
|
|
|
|
for (i = 0; i < num; ++i) { |
136
|
43022
|
|
|
|
|
XPUSHs(sv_2mortal(newSVpvn((char *) result, VCE_Length))); |
137
|
43022
|
|
|
|
|
result += VCE_Length; |
138
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
} else { |
140
|
0
|
|
|
|
|
XPUSHs(sv_2mortal(newSViv(0))); |
141
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
143
|
|
|
|
|
|
SV* |
144
|
|
|
|
|
|
_ignorable_simple (uv) |
145
|
|
|
|
|
|
UV uv |
146
|
|
|
|
|
|
ALIAS: |
147
|
|
|
|
|
|
_exists_simple = 1 |
148
|
|
|
|
|
|
PREINIT: |
149
|
|
|
|
|
|
U8 ***plane, **row; |
150
|
|
|
|
|
|
int num = -1; |
151
|
|
|
|
|
|
U8* result = NULL; |
152
|
|
|
|
|
|
CODE: |
153
|
113940
|
|
|
|
|
if (!OVER_UTF_MAX(uv)){ |
154
|
113940
|
|
|
|
|
plane = (U8***)UCA_simple[uv >> 16]; |
155
|
113940
|
|
|
|
|
if (plane) { |
156
|
113932
|
|
|
|
|
row = plane[(uv >> 8) & 0xff]; |
157
|
113932
|
|
|
|
|
result = row ? row[uv & 0xff] : NULL; |
158
|
|
|
|
|
|
} |
159
|
113940
|
|
|
|
|
if (result) |
160
|
65048
|
|
|
|
|
num = (int)*result; /* assuming 0 <= num < 128 */ |
161
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
163
|
113940
|
|
|
|
|
if (ix) |
164
|
56452
|
|
|
|
|
RETVAL = boolSV(num >0); |
165
|
|
|
|
|
|
else |
166
|
57488
|
|
|
|
|
RETVAL = boolSV(num==0); |
167
|
|
|
|
|
|
OUTPUT: |
168
|
|
|
|
|
|
RETVAL |
169
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
171
|
|
|
|
|
|
void |
172
|
|
|
|
|
|
_getHexArray (src) |
173
|
|
|
|
|
|
SV* src |
174
|
|
|
|
|
|
PREINIT: |
175
|
|
|
|
|
|
char *s, *e; |
176
|
|
|
|
|
|
STRLEN byte; |
177
|
|
|
|
|
|
UV value; |
178
|
|
|
|
|
|
bool overflowed = FALSE; |
179
|
|
|
|
|
|
const char *hexdigit; |
180
|
|
|
|
|
|
PPCODE: |
181
|
881698
|
|
|
|
|
s = SvPV(src,byte); |
182
|
5288250
|
|
|
|
|
for (e = s + byte; s < e;) { |
183
|
3524854
|
|
|
|
|
hexdigit = strchr((char *) PL_hexdigit, *s++); |
184
|
3524854
|
|
|
|
|
if (! hexdigit) |
185
|
842594
|
|
|
|
|
continue; |
186
|
2682260
|
|
|
|
|
value = (hexdigit - PL_hexdigit) & 0xF; |
187
|
13458924
|
|
|
|
|
while (*s) { |
188
|
10238740
|
|
|
|
|
hexdigit = strchr((char *) PL_hexdigit, *s++); |
189
|
10238740
|
|
|
|
|
if (! hexdigit) |
190
|
|
|
|
|
|
break; |
191
|
8094404
|
|
|
|
|
if (overflowed) |
192
|
0
|
|
|
|
|
continue; |
193
|
8094404
|
|
|
|
|
if (value > max_div_16) { |
194
|
|
|
|
|
|
overflowed = TRUE; |
195
|
0
|
|
|
|
|
continue; |
196
|
|
|
|
|
|
} |
197
|
8094404
|
|
|
|
|
value = (value << 4) | ((hexdigit - PL_hexdigit) & 0xF); |
198
|
|
|
|
|
|
} |
199
|
2682260
|
|
|
|
|
XPUSHs(sv_2mortal(newSVuv(overflowed ? UV_MAX : value))); |
200
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
203
|
|
|
|
|
|
SV* |
204
|
|
|
|
|
|
_isIllegal (sv) |
205
|
|
|
|
|
|
SV* sv |
206
|
|
|
|
|
|
PREINIT: |
207
|
|
|
|
|
|
UV uv; |
208
|
|
|
|
|
|
CODE: |
209
|
6104
|
|
|
|
|
if (!sv || !SvIOK(sv)) |
210
|
0
|
|
|
|
|
XSRETURN_YES; |
211
|
6104
|
|
|
|
|
uv = SvUVX(sv); |
212
|
6104
|
|
|
|
|
RETVAL = boolSV( |
213
|
|
|
|
|
|
0x10FFFF < uv /* out of range */ |
214
|
|
|
|
|
|
|| ((uv & 0xFFFE) == 0xFFFE) /* ??FFF[EF] (cf. utf8.c) */ |
215
|
|
|
|
|
|
|| (0xD800 <= uv && uv <= 0xDFFF) /* unpaired surrogates */ |
216
|
|
|
|
|
|
|| (0xFDD0 <= uv && uv <= 0xFDEF) /* other non-characters */ |
217
|
|
|
|
|
|
); |
218
|
|
|
|
|
|
OUTPUT: |
219
|
|
|
|
|
|
RETVAL |
220
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
222
|
|
|
|
|
|
void |
223
|
|
|
|
|
|
_decompHangul (code) |
224
|
|
|
|
|
|
UV code |
225
|
|
|
|
|
|
PREINIT: |
226
|
|
|
|
|
|
UV sindex, lindex, vindex, tindex; |
227
|
|
|
|
|
|
PPCODE: |
228
|
|
|
|
|
|
/* code *must* be in Hangul syllable. |
229
|
|
|
|
|
|
* Check it before you enter here. */ |
230
|
3072
|
|
|
|
|
sindex = code - Hangul_SBase; |
231
|
3072
|
|
|
|
|
lindex = sindex / Hangul_NCount; |
232
|
3072
|
|
|
|
|
vindex = (sindex % Hangul_NCount) / Hangul_TCount; |
233
|
3072
|
|
|
|
|
tindex = sindex % Hangul_TCount; |
234
|
|
|
|
|
|
|
235
|
3072
|
|
|
|
|
XPUSHs(sv_2mortal(newSVuv(lindex + Hangul_LBase))); |
236
|
3072
|
|
|
|
|
XPUSHs(sv_2mortal(newSVuv(vindex + Hangul_VBase))); |
237
|
3072
|
|
|
|
|
if (tindex) |
238
|
2250
|
|
|
|
|
XPUSHs(sv_2mortal(newSVuv(tindex + Hangul_TBase))); |
239
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
241
|
|
|
|
|
|
SV* |
242
|
|
|
|
|
|
getHST (code, uca_vers = 0) |
243
|
|
|
|
|
|
UV code; |
244
|
|
|
|
|
|
IV uca_vers; |
245
|
|
|
|
|
|
PREINIT: |
246
|
|
|
|
|
|
const char * hangtype; |
247
|
|
|
|
|
|
STRLEN typelen; |
248
|
|
|
|
|
|
CODE: |
249
|
1496
|
|
|
|
|
if (codeRange(Hangul_SIni, Hangul_SFin)) { |
250
|
132
|
|
|
|
|
if ((code - Hangul_SBase) % Hangul_TCount) { |
251
|
|
|
|
|
|
hangtype = "LVT"; typelen = 3; |
252
|
|
|
|
|
|
} else { |
253
|
|
|
|
|
|
hangtype = "LV"; typelen = 2; |
254
|
|
|
|
|
|
} |
255
|
1364
|
|
|
|
|
} else if (uca_vers < 20) { |
256
|
540
|
|
|
|
|
if (codeRange(Hangul_LIni, Hangul_LFin) || code == Hangul_LFill) { |
257
|
|
|
|
|
|
hangtype = "L"; typelen = 1; |
258
|
492
|
|
|
|
|
} else if (codeRange(Hangul_VIni, Hangul_VFin)) { |
259
|
|
|
|
|
|
hangtype = "V"; typelen = 1; |
260
|
444
|
|
|
|
|
} else if (codeRange(Hangul_TIni, Hangul_TFin)) { |
261
|
|
|
|
|
|
hangtype = "T"; typelen = 1; |
262
|
|
|
|
|
|
} else { |
263
|
|
|
|
|
|
hangtype = ""; typelen = 0; |
264
|
|
|
|
|
|
} |
265
|
|
|
|
|
|
} else { |
266
|
1648
|
|
|
|
|
if (codeRange(Hangul_LIni, Hangul_LEnd) || |
267
|
824
|
|
|
|
|
codeRange(HangulL2Ini, HangulL2Fin)) { |
268
|
|
|
|
|
|
hangtype = "L"; typelen = 1; |
269
|
1204
|
|
|
|
|
} else if (codeRange(Hangul_VIni, Hangul_VEnd) || |
270
|
602
|
|
|
|
|
codeRange(HangulV2Ini, HangulV2Fin)) { |
271
|
|
|
|
|
|
hangtype = "V"; typelen = 1; |
272
|
740
|
|
|
|
|
} else if (codeRange(Hangul_TIni, Hangul_TEnd) || |
273
|
370
|
|
|
|
|
codeRange(HangulT2Ini, HangulT2Fin)) { |
274
|
|
|
|
|
|
hangtype = "T"; typelen = 1; |
275
|
|
|
|
|
|
} else { |
276
|
|
|
|
|
|
hangtype = ""; typelen = 0; |
277
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
280
|
1496
|
|
|
|
|
RETVAL = newSVpvn(hangtype, typelen); |
281
|
|
|
|
|
|
OUTPUT: |
282
|
|
|
|
|
|
RETVAL |
283
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
285
|
|
|
|
|
|
void |
286
|
|
|
|
|
|
_derivCE_9 (code) |
287
|
|
|
|
|
|
UV code |
288
|
|
|
|
|
|
ALIAS: |
289
|
|
|
|
|
|
_derivCE_14 = 1 |
290
|
|
|
|
|
|
_derivCE_18 = 2 |
291
|
|
|
|
|
|
_derivCE_20 = 3 |
292
|
|
|
|
|
|
_derivCE_22 = 4 |
293
|
|
|
|
|
|
_derivCE_24 = 5 |
294
|
|
|
|
|
|
PREINIT: |
295
|
|
|
|
|
|
UV base, aaaa, bbbb; |
296
|
13474
|
|
|
|
|
U8 a[VCE_Length + 1] = "\x00\xFF\xFF\x00\x20\x00\x02\xFF\xFF"; |
297
|
13474
|
|
|
|
|
U8 b[VCE_Length + 1] = "\x00\xFF\xFF\x00\x00\x00\x00\xFF\xFF"; |
298
|
|
|
|
|
|
bool basic_unified = 0; |
299
|
|
|
|
|
|
PPCODE: |
300
|
13474
|
|
|
|
|
if (CJK_UidIni <= code) { |
301
|
10058
|
|
|
|
|
if (codeRange(CJK_CompIni, CJK_CompFin)) |
302
|
792
|
|
|
|
|
basic_unified = (bool)UnifiedCompat[code - CJK_CompIni]; |
303
|
|
|
|
|
|
else |
304
|
9266
|
|
|
|
|
basic_unified = (ix >= 5 ? (code <= CJK_UidF61) : |
305
|
|
|
|
|
|
ix >= 3 ? (code <= CJK_UidF52) : |
306
|
|
|
|
|
|
ix == 2 ? (code <= CJK_UidF51) : |
307
|
|
|
|
|
|
ix == 1 ? (code <= CJK_UidF41) : |
308
|
|
|
|
|
|
(code <= CJK_UidFin)); |
309
|
|
|
|
|
|
} |
310
|
20930
|
|
|
|
|
base = (basic_unified) |
311
|
|
|
|
|
|
? 0xFB40 : /* CJK */ |
312
|
7456
|
|
|
|
|
((codeRange(CJK_ExtAIni, CJK_ExtAFin)) |
313
|
7456
|
|
|
|
|
|| |
314
|
7456
|
|
|
|
|
(codeRange(CJK_ExtBIni, CJK_ExtBFin)) |
315
|
3998
|
|
|
|
|
|| |
316
|
1816
|
|
|
|
|
(ix >= 3 && codeRange(CJK_ExtCIni, CJK_ExtCFin)) |
317
|
3966
|
|
|
|
|
|| |
318
|
1472
|
|
|
|
|
(ix >= 4 && codeRange(CJK_ExtDIni, CJK_ExtDFin))) |
319
|
|
|
|
|
|
? 0xFB80 /* CJK ext. */ |
320
|
|
|
|
|
|
: 0xFBC0; /* others */ |
321
|
13474
|
|
|
|
|
aaaa = base + (code >> 15); |
322
|
13474
|
|
|
|
|
bbbb = (code & 0x7FFF) | 0x8000; |
323
|
13474
|
|
|
|
|
a[1] = (U8)(aaaa >> 8); |
324
|
13474
|
|
|
|
|
a[2] = (U8)(aaaa & 0xFF); |
325
|
13474
|
|
|
|
|
b[1] = (U8)(bbbb >> 8); |
326
|
13474
|
|
|
|
|
b[2] = (U8)(bbbb & 0xFF); |
327
|
13474
|
|
|
|
|
a[7] = b[7] = (U8)(code >> 8); |
328
|
13474
|
|
|
|
|
a[8] = b[8] = (U8)(code & 0xFF); |
329
|
13474
|
|
|
|
|
XPUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length))); |
330
|
13474
|
|
|
|
|
XPUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length))); |
331
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
333
|
|
|
|
|
|
void |
334
|
|
|
|
|
|
_derivCE_8 (code) |
335
|
|
|
|
|
|
UV code |
336
|
|
|
|
|
|
PREINIT: |
337
|
|
|
|
|
|
UV aaaa, bbbb; |
338
|
632
|
|
|
|
|
U8 a[VCE_Length + 1] = "\x00\xFF\xFF\x00\x02\x00\x01\xFF\xFF"; |
339
|
632
|
|
|
|
|
U8 b[VCE_Length + 1] = "\x00\xFF\xFF\x00\x00\x00\x00\xFF\xFF"; |
340
|
|
|
|
|
|
PPCODE: |
341
|
632
|
|
|
|
|
aaaa = 0xFF80 + (code >> 15); |
342
|
632
|
|
|
|
|
bbbb = (code & 0x7FFF) | 0x8000; |
343
|
632
|
|
|
|
|
a[1] = (U8)(aaaa >> 8); |
344
|
632
|
|
|
|
|
a[2] = (U8)(aaaa & 0xFF); |
345
|
632
|
|
|
|
|
b[1] = (U8)(bbbb >> 8); |
346
|
632
|
|
|
|
|
b[2] = (U8)(bbbb & 0xFF); |
347
|
632
|
|
|
|
|
a[7] = b[7] = (U8)(code >> 8); |
348
|
632
|
|
|
|
|
a[8] = b[8] = (U8)(code & 0xFF); |
349
|
632
|
|
|
|
|
XPUSHs(sv_2mortal(newSVpvn((char *) a, VCE_Length))); |
350
|
632
|
|
|
|
|
XPUSHs(sv_2mortal(newSVpvn((char *) b, VCE_Length))); |
351
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
353
|
|
|
|
|
|
void |
354
|
|
|
|
|
|
_uideoCE_8 (code) |
355
|
|
|
|
|
|
UV code |
356
|
|
|
|
|
|
PREINIT: |
357
|
88
|
|
|
|
|
U8 uice[VCE_Length + 1] = "\x00\xFF\xFF\x00\x20\x00\x02\xFF\xFF"; |
358
|
|
|
|
|
|
PPCODE: |
359
|
88
|
|
|
|
|
uice[1] = uice[7] = (U8)(code >> 8); |
360
|
88
|
|
|
|
|
uice[2] = uice[8] = (U8)(code & 0xFF); |
361
|
88
|
|
|
|
|
XPUSHs(sv_2mortal(newSVpvn((char *) uice, VCE_Length))); |
362
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
364
|
|
|
|
|
|
SV* |
365
|
|
|
|
|
|
_isUIdeo (code, uca_vers) |
366
|
|
|
|
|
|
UV code; |
367
|
|
|
|
|
|
IV uca_vers; |
368
|
|
|
|
|
|
bool basic_unified = 0; |
369
|
|
|
|
|
|
CODE: |
370
|
|
|
|
|
|
/* uca_vers = 0 for _uideoCE_8() */ |
371
|
24072
|
|
|
|
|
if (CJK_UidIni <= code) { |
372
|
22244
|
|
|
|
|
if (codeRange(CJK_CompIni, CJK_CompFin)) |
373
|
648
|
|
|
|
|
basic_unified = (bool)UnifiedCompat[code - CJK_CompIni]; |
374
|
|
|
|
|
|
else |
375
|
21596
|
|
|
|
|
basic_unified = (uca_vers >= 24 ? (code <= CJK_UidF61) : |
376
|
|
|
|
|
|
uca_vers >= 20 ? (code <= CJK_UidF52) : |
377
|
|
|
|
|
|
uca_vers >= 18 ? (code <= CJK_UidF51) : |
378
|
|
|
|
|
|
uca_vers >= 14 ? (code <= CJK_UidF41) : |
379
|
|
|
|
|
|
(code <= CJK_UidFin)); |
380
|
|
|
|
|
|
} |
381
|
24072
|
|
|
|
|
RETVAL = boolSV( |
382
|
|
|
|
|
|
(basic_unified) |
383
|
|
|
|
|
|
|| |
384
|
|
|
|
|
|
(codeRange(CJK_ExtAIni, CJK_ExtAFin)) |
385
|
|
|
|
|
|
|| |
386
|
|
|
|
|
|
(uca_vers >= 8 && codeRange(CJK_ExtBIni, CJK_ExtBFin)) |
387
|
|
|
|
|
|
|| |
388
|
|
|
|
|
|
(uca_vers >= 20 && codeRange(CJK_ExtCIni, CJK_ExtCFin)) |
389
|
|
|
|
|
|
|| |
390
|
|
|
|
|
|
(uca_vers >= 22 && codeRange(CJK_ExtDIni, CJK_ExtDFin)) |
391
|
|
|
|
|
|
); |
392
|
|
|
|
|
|
OUTPUT: |
393
|
|
|
|
|
|
RETVAL |
394
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
396
|
|
|
|
|
|
SV* |
397
|
|
|
|
|
|
mk_SortKey (self, buf) |
398
|
|
|
|
|
|
SV* self; |
399
|
|
|
|
|
|
SV* buf; |
400
|
|
|
|
|
|
PREINIT: |
401
|
|
|
|
|
|
SV *dst, **svp; |
402
|
|
|
|
|
|
STRLEN dlen, vlen; |
403
|
|
|
|
|
|
U8 *d, *p, *e, *v, *s[MaxLevel], *eachlevel[MaxLevel]; |
404
|
|
|
|
|
|
AV *bufAV; |
405
|
|
|
|
|
|
HV *selfHV; |
406
|
|
|
|
|
|
UV back_flag; |
407
|
|
|
|
|
|
I32 i, buf_len; |
408
|
|
|
|
|
|
IV lv, level, uca_vers; |
409
|
|
|
|
|
|
bool upper_lower, kata_hira, v2i, last_is_var; |
410
|
|
|
|
|
|
CODE: |
411
|
99120
|
|
|
|
|
if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV) |
412
|
99120
|
|
|
|
|
selfHV = (HV*)SvRV(self); |
413
|
|
|
|
|
|
else |
414
|
0
|
|
|
|
|
croak("$self is not a HASHREF."); |
415
|
|
|
|
|
|
|
416
|
198240
|
|
|
|
|
if (SvROK(buf) && SvTYPE(SvRV(buf)) == SVt_PVAV) |
417
|
99120
|
|
|
|
|
bufAV = (AV*)SvRV(buf); |
418
|
|
|
|
|
|
else |
419
|
0
|
|
|
|
|
croak("XSUB, not an ARRAYREF."); |
420
|
|
|
|
|
|
|
421
|
99120
|
|
|
|
|
buf_len = av_len(bufAV); |
422
|
|
|
|
|
|
|
423
|
99120
|
|
|
|
|
if (buf_len < 0) { /* empty: -1 */ |
424
|
|
|
|
|
|
dlen = 2 * (MaxLevel - 1); |
425
|
1864
|
|
|
|
|
dst = newSV(dlen); |
426
|
1864
|
|
|
|
|
(void)SvPOK_only(dst); |
427
|
1864
|
|
|
|
|
d = (U8*)SvPVX(dst); |
428
|
14912
|
|
|
|
|
while (dlen--) |
429
|
11184
|
|
|
|
|
*d++ = '\0'; |
430
|
|
|
|
|
|
} else { |
431
|
97256
|
|
|
|
|
svp = hv_fetch(selfHV, "level", 5, FALSE); |
432
|
97256
|
|
|
|
|
level = svp ? SvIV(*svp) : MaxLevel; |
433
|
|
|
|
|
|
|
434
|
362880
|
|
|
|
|
for (lv = 0; lv < level; lv++) { |
435
|
265624
|
|
|
|
|
New(0, eachlevel[lv], 2 * (1 + buf_len) + 1, U8); |
436
|
265624
|
|
|
|
|
s[lv] = eachlevel[lv]; |
437
|
|
|
|
|
|
} |
438
|
|
|
|
|
|
|
439
|
97256
|
|
|
|
|
svp = hv_fetch(selfHV, "upper_before_lower", 18, FALSE); |
440
|
97256
|
|
|
|
|
upper_lower = svp ? SvTRUE(*svp) : FALSE; |
441
|
97256
|
|
|
|
|
svp = hv_fetch(selfHV, "katakana_before_hiragana", 24, FALSE); |
442
|
97256
|
|
|
|
|
kata_hira = svp ? SvTRUE(*svp) : FALSE; |
443
|
97256
|
|
|
|
|
svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE); |
444
|
97256
|
|
|
|
|
uca_vers = SvIV(*svp); |
445
|
97256
|
|
|
|
|
svp = hv_fetch(selfHV, "variable", 8, FALSE); |
446
|
97256
|
|
|
|
|
v2i = uca_vers >= 9 && svp /* (vers >= 9) and not (non-ignorable) */ |
447
|
101350
|
|
|
|
|
? !(SvCUR(*svp) == 13 && memEQ(SvPVX(*svp), "non-ignorable", 13)) |
448
|
193770
|
|
|
|
|
: FALSE; |
449
|
|
|
|
|
|
|
450
|
|
|
|
|
|
last_is_var = FALSE; |
451
|
264666
|
|
|
|
|
for (i = 0; i <= buf_len; i++) { |
452
|
167410
|
|
|
|
|
svp = av_fetch(bufAV, i, FALSE); |
453
|
|
|
|
|
|
|
454
|
167410
|
|
|
|
|
if (svp && SvPOK(*svp)) |
455
|
167410
|
|
|
|
|
v = (U8*)SvPV(*svp, vlen); |
456
|
|
|
|
|
|
else |
457
|
0
|
|
|
|
|
croak("not a vwt."); |
458
|
|
|
|
|
|
|
459
|
167410
|
|
|
|
|
if (vlen < VCE_Length) /* ignore short VCE (unexpected) */ |
460
|
0
|
|
|
|
|
continue; |
461
|
|
|
|
|
|
|
462
|
|
|
|
|
|
/* "Ignorable (L1, L2) after Variable" since track. v. 9 */ |
463
|
167410
|
|
|
|
|
if (v2i) { |
464
|
154754
|
|
|
|
|
if (*v) |
465
|
|
|
|
|
|
last_is_var = TRUE; |
466
|
151064
|
|
|
|
|
else if (v[1] || v[2]) /* non zero primary weight */ |
467
|
|
|
|
|
|
last_is_var = FALSE; |
468
|
12428
|
|
|
|
|
else if (last_is_var) /* zero primary weight; skipped */ |
469
|
86
|
|
|
|
|
continue; |
470
|
|
|
|
|
|
} |
471
|
|
|
|
|
|
|
472
|
167324
|
|
|
|
|
if (v[5] == 0) { /* tert wt < 256 */ |
473
|
167324
|
|
|
|
|
if (upper_lower) { |
474
|
2520
|
|
|
|
|
if (0x8 <= v[6] && v[6] <= 0xC) /* lower */ |
475
|
746
|
|
|
|
|
v[6] -= 6; |
476
|
1774
|
|
|
|
|
else if (0x2 <= v[6] && v[6] <= 0x6) /* upper */ |
477
|
1006
|
|
|
|
|
v[6] += 6; |
478
|
768
|
|
|
|
|
else if (v[6] == 0x1C) /* square upper */ |
479
|
6
|
|
|
|
|
v[6]++; |
480
|
762
|
|
|
|
|
else if (v[6] == 0x1D) /* square lower */ |
481
|
2
|
|
|
|
|
v[6]--; |
482
|
|
|
|
|
|
} |
483
|
167324
|
|
|
|
|
if (kata_hira) { |
484
|
756
|
|
|
|
|
if (0x0F <= v[6] && v[6] <= 0x13) /* katakana */ |
485
|
404
|
|
|
|
|
v[6] -= 2; |
486
|
352
|
|
|
|
|
else if (0xD <= v[6] && v[6] <= 0xE) /* hiragana */ |
487
|
60
|
|
|
|
|
v[6] += 5; |
488
|
|
|
|
|
|
} |
489
|
|
|
|
|
|
} |
490
|
|
|
|
|
|
|
491
|
473100
|
|
|
|
|
for (lv = 0; lv < level; lv++) { |
492
|
473100
|
|
|
|
|
if (v[2 * lv + 1] || v[2 * lv + 2]) { |
493
|
405248
|
|
|
|
|
*s[lv]++ = v[2 * lv + 1]; |
494
|
405248
|
|
|
|
|
*s[lv]++ = v[2 * lv + 2]; |
495
|
|
|
|
|
|
} |
496
|
|
|
|
|
|
} |
497
|
|
|
|
|
|
} |
498
|
|
|
|
|
|
|
499
|
|
|
|
|
|
dlen = 2 * (MaxLevel - 1); |
500
|
265624
|
|
|
|
|
for (lv = 0; lv < level; lv++) |
501
|
265624
|
|
|
|
|
dlen += s[lv] - eachlevel[lv]; |
502
|
|
|
|
|
|
|
503
|
97256
|
|
|
|
|
dst = newSV(dlen); |
504
|
97256
|
|
|
|
|
(void)SvPOK_only(dst); |
505
|
97256
|
|
|
|
|
d = (U8*)SvPVX(dst); |
506
|
|
|
|
|
|
|
507
|
97256
|
|
|
|
|
svp = hv_fetch(selfHV, "backwardsFlag", 13, FALSE); |
508
|
97256
|
|
|
|
|
back_flag = svp ? SvUV(*svp) : (UV)0; |
509
|
|
|
|
|
|
|
510
|
362880
|
|
|
|
|
for (lv = 0; lv < level; lv++) { |
511
|
265624
|
|
|
|
|
if (back_flag & (1 << (lv + 1))) { |
512
|
530
|
|
|
|
|
p = s[lv]; |
513
|
530
|
|
|
|
|
e = eachlevel[lv]; |
514
|
3172
|
|
|
|
|
for ( ; e < p; p -= 2) { |
515
|
2642
|
|
|
|
|
*d++ = p[-2]; |
516
|
2642
|
|
|
|
|
*d++ = p[-1]; |
517
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
else { |
520
|
265094
|
|
|
|
|
p = eachlevel[lv]; |
521
|
265094
|
|
|
|
|
e = s[lv]; |
522
|
1335400
|
|
|
|
|
while (p < e) |
523
|
805212
|
|
|
|
|
*d++ = *p++; |
524
|
|
|
|
|
|
} |
525
|
265624
|
|
|
|
|
if (lv + 1 < MaxLevel) { /* lv + 1 == real level */ |
526
|
254248
|
|
|
|
|
*d++ = '\0'; |
527
|
254248
|
|
|
|
|
*d++ = '\0'; |
528
|
|
|
|
|
|
} |
529
|
|
|
|
|
|
} |
530
|
|
|
|
|
|
|
531
|
123400
|
|
|
|
|
for (lv = level; lv < MaxLevel; lv++) { |
532
|
123400
|
|
|
|
|
if (lv + 1 < MaxLevel) { /* lv + 1 == real level */ |
533
|
37520
|
|
|
|
|
*d++ = '\0'; |
534
|
37520
|
|
|
|
|
*d++ = '\0'; |
535
|
|
|
|
|
|
} |
536
|
|
|
|
|
|
} |
537
|
|
|
|
|
|
|
538
|
265624
|
|
|
|
|
for (lv = 0; lv < level; lv++) { |
539
|
265624
|
|
|
|
|
Safefree(eachlevel[lv]); |
540
|
|
|
|
|
|
} |
541
|
|
|
|
|
|
} |
542
|
99120
|
|
|
|
|
*d = '\0'; |
543
|
99120
|
|
|
|
|
SvCUR_set(dst, d - (U8*)SvPVX(dst)); |
544
|
|
|
|
|
|
RETVAL = dst; |
545
|
|
|
|
|
|
OUTPUT: |
546
|
|
|
|
|
|
RETVAL |
547
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
549
|
|
|
|
|
|
SV* |
550
|
|
|
|
|
|
varCE (self, vce) |
551
|
|
|
|
|
|
SV* self; |
552
|
|
|
|
|
|
SV* vce; |
553
|
|
|
|
|
|
PREINIT: |
554
|
|
|
|
|
|
SV *dst, *vbl, **svp; |
555
|
|
|
|
|
|
HV *selfHV; |
556
|
|
|
|
|
|
U8 *a, *v, *d; |
557
|
|
|
|
|
|
STRLEN alen, vlen; |
558
|
|
|
|
|
|
bool ig_l2; |
559
|
|
|
|
|
|
UV totwt; |
560
|
|
|
|
|
|
CODE: |
561
|
171812
|
|
|
|
|
if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV) |
562
|
171812
|
|
|
|
|
selfHV = (HV*)SvRV(self); |
563
|
|
|
|
|
|
else |
564
|
0
|
|
|
|
|
croak("$self is not a HASHREF."); |
565
|
|
|
|
|
|
|
566
|
171812
|
|
|
|
|
svp = hv_fetch(selfHV, "ignore_level2", 13, FALSE); |
567
|
173606
|
|
|
|
|
ig_l2 = svp ? SvTRUE(*svp) : FALSE; |
568
|
|
|
|
|
|
|
569
|
171812
|
|
|
|
|
svp = hv_fetch(selfHV, "variable", 8, FALSE); |
570
|
171812
|
|
|
|
|
vbl = svp ? *svp : &PL_sv_no; |
571
|
171812
|
|
|
|
|
a = (U8*)SvPV(vbl, alen); |
572
|
171812
|
|
|
|
|
v = (U8*)SvPV(vce, vlen); |
573
|
|
|
|
|
|
|
574
|
171812
|
|
|
|
|
dst = newSV(vlen); |
575
|
171812
|
|
|
|
|
d = (U8*)SvPVX(dst); |
576
|
171812
|
|
|
|
|
(void)SvPOK_only(dst); |
577
|
171812
|
|
|
|
|
Copy(v, d, vlen, U8); |
578
|
171812
|
|
|
|
|
SvCUR_set(dst, vlen); |
579
|
171812
|
|
|
|
|
d[vlen] = '\0'; |
580
|
|
|
|
|
|
|
581
|
|
|
|
|
|
/* primary weight == 0 && secondary weight != 0 */ |
582
|
171812
|
|
|
|
|
if (ig_l2 && !d[1] && !d[2] && (d[3] || d[4])) { |
583
|
42
|
|
|
|
|
d[3] = d[4] = d[5] = d[6] = '\0'; |
584
|
|
|
|
|
|
} |
585
|
|
|
|
|
|
|
586
|
|
|
|
|
|
/* variable: checked only the first char and the length, |
587
|
|
|
|
|
|
trusting checkCollator() and %VariableOK in Perl ... */ |
588
|
|
|
|
|
|
|
589
|
171812
|
|
|
|
|
if (vlen < VCE_Length /* ignore short VCE (unexpected) */ |
590
|
171812
|
|
|
|
|
|| |
591
|
171812
|
|
|
|
|
*a == 'n') /* non-ignorable */ |
592
|
|
|
|
|
|
1; |
593
|
160758
|
|
|
|
|
else if (*v) { |
594
|
4382
|
|
|
|
|
if (*a == 's') { /* shifted or shift-trimmed */ |
595
|
4282
|
|
|
|
|
d[7] = d[1]; /* wt level 1 to 4 */ |
596
|
4282
|
|
|
|
|
d[8] = d[2]; |
597
|
|
|
|
|
|
} /* else blanked */ |
598
|
|
|
|
|
|
|
599
|
4382
|
|
|
|
|
d[1] = d[2] = d[3] = d[4] = d[5] = d[6] = '\0'; |
600
|
|
|
|
|
|
} |
601
|
156376
|
|
|
|
|
else if (*a == 'b') /* blanked */ |
602
|
|
|
|
|
|
1; |
603
|
156050
|
|
|
|
|
else if (*a == 's') { /* shifted or shift-trimmed */ |
604
|
156050
|
|
|
|
|
totwt = d[1] + d[2] + d[3] + d[4] + d[5] + d[6]; |
605
|
156050
|
|
|
|
|
if (alen == 7 && totwt != 0) { /* shifted */ |
606
|
155726
|
|
|
|
|
if (d[1] == 0 && d[2] == 1) { /* XXX: CollationAuxiliary-6.2.0 */ |
607
|
132
|
|
|
|
|
d[7] = d[1]; /* wt level 1 to 4 */ |
608
|
132
|
|
|
|
|
d[8] = d[2]; |
609
|
|
|
|
|
|
} else { |
610
|
155594
|
|
|
|
|
d[7] = (U8)(Shift4Wt >> 8); |
611
|
155594
|
|
|
|
|
d[8] = (U8)(Shift4Wt & 0xFF); |
612
|
|
|
|
|
|
} |
613
|
|
|
|
|
|
} else { /* shift-trimmed or completely ignorable */ |
614
|
324
|
|
|
|
|
d[7] = d[8] = '\0'; |
615
|
|
|
|
|
|
} |
616
|
|
|
|
|
|
} |
617
|
|
|
|
|
|
else |
618
|
0
|
|
|
|
|
croak("unknown variable value '%s'", a); |
619
|
|
|
|
|
|
RETVAL = dst; |
620
|
|
|
|
|
|
OUTPUT: |
621
|
|
|
|
|
|
RETVAL |
622
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
625
|
|
|
|
|
|
SV* |
626
|
|
|
|
|
|
visualizeSortKey (self, key) |
627
|
|
|
|
|
|
SV * self |
628
|
|
|
|
|
|
SV * key |
629
|
|
|
|
|
|
PREINIT: |
630
|
|
|
|
|
|
HV *selfHV; |
631
|
|
|
|
|
|
SV **svp, *dst; |
632
|
|
|
|
|
|
U8 *s, *e, *d; |
633
|
|
|
|
|
|
STRLEN klen, dlen; |
634
|
|
|
|
|
|
UV uv; |
635
|
|
|
|
|
|
IV uca_vers, sep = 0; |
636
|
|
|
|
|
|
static const char *upperhex = "0123456789ABCDEF"; |
637
|
|
|
|
|
|
CODE: |
638
|
224
|
|
|
|
|
if (SvROK(self) && SvTYPE(SvRV(self)) == SVt_PVHV) |
639
|
224
|
|
|
|
|
selfHV = (HV*)SvRV(self); |
640
|
|
|
|
|
|
else |
641
|
0
|
|
|
|
|
croak("$self is not a HASHREF."); |
642
|
|
|
|
|
|
|
643
|
224
|
|
|
|
|
svp = hv_fetch(selfHV, "UCA_Version", 11, FALSE); |
644
|
224
|
|
|
|
|
if (!svp) |
645
|
0
|
|
|
|
|
croak("Panic: no $self->{UCA_Version} in visualizeSortKey"); |
646
|
224
|
|
|
|
|
uca_vers = SvIV(*svp); |
647
|
|
|
|
|
|
|
648
|
224
|
|
|
|
|
s = (U8*)SvPV(key, klen); |
649
|
|
|
|
|
|
|
650
|
|
|
|
|
|
/* slightly *longer* than the need, but I'm afraid of miscounting; |
651
|
|
|
|
|
|
= (klen / 2) * 5 - 1 |
652
|
|
|
|
|
|
# FFFF and ' ' for each 16bit units but ' ' is less by 1; |
653
|
|
|
|
|
|
# ' ' and '|' for level boundaries including the identical level |
654
|
|
|
|
|
|
+ 2 # '[' and ']' |
655
|
|
|
|
|
|
+ 1 # '\0' |
656
|
|
|
|
|
|
(a) if klen is odd (not expected), maybe more 5 bytes. |
657
|
224
|
|
|
|
|
(b) there is not always the identical level. |
658
|
224
|
|
|
|
|
*/ |
659
|
224
|
|
|
|
|
dlen = (klen / 2) * 5 + MaxLevel * 2 + 2; |
660
|
224
|
|
|
|
|
dst = newSV(dlen); |
661
|
|
|
|
|
|
(void)SvPOK_only(dst); |
662
|
224
|
|
|
|
|
d = (U8*)SvPVX(dst); |
663
|
2528
|
|
|
|
|
|
664
|
2304
|
|
|
|
|
*d++ = '['; |
665
|
2304
|
|
|
|
|
for (e = s + klen; s < e; s += 2) { |
666
|
1600
|
|
|
|
|
uv = (U16)(*s << 8 | s[1]); |
667
|
1350
|
|
|
|
|
if (uv || sep >= MaxLevel) { |
668
|
1600
|
|
|
|
|
if ((d[-1] != '[') && ((9 <= uca_vers) || (d[-1] != '|'))) |
669
|
1600
|
|
|
|
|
*d++ = ' '; |
670
|
1600
|
|
|
|
|
*d++ = upperhex[ (s[0] >> 4) & 0xF ]; |
671
|
1600
|
|
|
|
|
*d++ = upperhex[ s[0] & 0xF ]; |
672
|
|
|
|
|
|
*d++ = upperhex[ (s[1] >> 4) & 0xF ]; |
673
|
704
|
|
|
|
|
*d++ = upperhex[ s[1] & 0xF ]; |
674
|
594
|
|
|
|
|
} else { |
675
|
704
|
|
|
|
|
if ((9 <= uca_vers) && (d[-1] != '[')) |
676
|
704
|
|
|
|
|
*d++ = ' '; |
677
|
|
|
|
|
|
*d++ = '|'; |
678
|
|
|
|
|
|
++sep; |
679
|
224
|
|
|
|
|
} |
680
|
224
|
|
|
|
|
} |
681
|
224
|
|
|
|
|
*d++ = ']'; |
682
|
|
|
|
|
|
*d = '\0'; |
683
|
|
|
|
|
|
SvCUR_set(dst, d - (U8*)SvPVX(dst)); |
684
|
|
|
|
|
|
RETVAL = dst; |
685
|
|
|
|
|
|
OUTPUT: |
686
|
|
|
|
|
|
RETVAL |
687
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
690
|
|
|
|
|
|
void |
691
|
|
|
|
|
|
unpack_U (src) |
692
|
|
|
|
|
|
SV* src |
693
|
|
|
|
|
|
PREINIT: |
694
|
|
|
|
|
|
STRLEN srclen, retlen; |
695
|
|
|
|
|
|
U8 *s, *p, *e; |
696
|
|
|
|
|
|
UV uv; |
697
|
|
|
|
|
|
PPCODE: |
698
|
99672
|
|
|
|
|
s = (U8*)SvPV(src,srclen); |
699
|
99672
|
|
|
|
|
if (!SvUTF8(src)) { |
700
|
8082
|
|
|
|
|
SV* tmpsv = sv_mortalcopy(src); |
701
|
8082
|
|
|
|
|
if (!SvPOK(tmpsv)) |
702
|
0
|
|
|
|
|
(void)sv_pvn_force(tmpsv,&srclen); |
703
|
8082
|
|
|
|
|
sv_utf8_upgrade(tmpsv); |
704
|
8082
|
|
|
|
|
s = (U8*)SvPV(tmpsv,srclen); |
705
|
|
|
|
|
|
} |
706
|
99672
|
|
|
|
|
e = s + srclen; |
707
|
|
|
|
|
|
|
708
|
248728
|
|
|
|
|
for (p = s; p < e; p += retlen) { |
709
|
149056
|
|
|
|
|
uv = utf8n_to_uvuni(p, e - p, &retlen, AllowAnyUTF); |
710
|
149056
|
|
|
|
|
if (!retlen) |
711
|
0
|
|
|
|
|
croak(ErrRetlenIsZero); |
712
|
149056
|
|
|
|
|
XPUSHs(sv_2mortal(newSVuv(uv))); |
713
|
|
|
|
|
|
} |
714
|
|
|
|
|
|
|