| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
#define PERL_NO_GET_CONTEXT /* we want efficiency */ |
|
3
|
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
/* private functions which need pTHX_ and aTHX_ |
|
5
|
|
|
|
|
|
|
pv_cat_decompHangul |
|
6
|
|
|
|
|
|
|
sv_2pvunicode |
|
7
|
|
|
|
|
|
|
pv_utf8_decompose |
|
8
|
|
|
|
|
|
|
pv_utf8_reorder |
|
9
|
|
|
|
|
|
|
pv_utf8_compose |
|
10
|
|
|
|
|
|
|
*/ |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
#include "EXTERN.h" |
|
13
|
|
|
|
|
|
|
#include "perl.h" |
|
14
|
|
|
|
|
|
|
#include "XSUB.h" |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
/* These 5 files are prepared by mkheader */ |
|
17
|
|
|
|
|
|
|
#include "unfcmb.h" |
|
18
|
|
|
|
|
|
|
#include "unfcan.h" |
|
19
|
|
|
|
|
|
|
#include "unfcpt.h" |
|
20
|
|
|
|
|
|
|
#include "unfcmp.h" |
|
21
|
|
|
|
|
|
|
#include "unfexc.h" |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
/* The generated normalization tables since v5.20 are in native character set |
|
24
|
|
|
|
|
|
|
* terms. Prior to that, they were in Unicode terms. So we use 'uvchr' for |
|
25
|
|
|
|
|
|
|
* later perls, and redefine that to be 'uvuni' for earlier ones */ |
|
26
|
|
|
|
|
|
|
#if PERL_VERSION < 20 |
|
27
|
|
|
|
|
|
|
# undef uvchr_to_utf8 |
|
28
|
|
|
|
|
|
|
# ifdef uvuni_to_utf8 |
|
29
|
|
|
|
|
|
|
# define uvchr_to_utf8 uvuni_to_utf8 |
|
30
|
|
|
|
|
|
|
# else /* Perl 5.6.1 */ |
|
31
|
|
|
|
|
|
|
# define uvchr_to_utf8 uv_to_utf8 |
|
32
|
|
|
|
|
|
|
# endif |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
# undef utf8n_to_uvchr |
|
35
|
|
|
|
|
|
|
# ifdef utf8n_to_uvuni |
|
36
|
|
|
|
|
|
|
# define utf8n_to_uvchr utf8n_to_uvuni |
|
37
|
|
|
|
|
|
|
# else /* Perl 5.6.1 */ |
|
38
|
|
|
|
|
|
|
# define utf8n_to_uvchr utf8_to_uv |
|
39
|
|
|
|
|
|
|
# endif |
|
40
|
|
|
|
|
|
|
#endif |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
/* UTF8_ALLOW_BOM is used before Perl 5.8.0 */ |
|
43
|
|
|
|
|
|
|
#ifndef UTF8_ALLOW_BOM |
|
44
|
|
|
|
|
|
|
#define UTF8_ALLOW_BOM (0) |
|
45
|
|
|
|
|
|
|
#endif /* UTF8_ALLOW_BOM */ |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
#ifndef UTF8_ALLOW_SURROGATE |
|
48
|
|
|
|
|
|
|
#define UTF8_ALLOW_SURROGATE (0) |
|
49
|
|
|
|
|
|
|
#endif /* UTF8_ALLOW_SURROGATE */ |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
#ifndef UTF8_ALLOW_FE_FF |
|
52
|
|
|
|
|
|
|
#define UTF8_ALLOW_FE_FF (0) |
|
53
|
|
|
|
|
|
|
#endif /* UTF8_ALLOW_FE_FF */ |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
#ifndef UTF8_ALLOW_FFFF |
|
56
|
|
|
|
|
|
|
#define UTF8_ALLOW_FFFF (0) |
|
57
|
|
|
|
|
|
|
#endif /* UTF8_ALLOW_FFFF */ |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
#ifndef PERL_UNUSED_VAR |
|
60
|
|
|
|
|
|
|
# define PERL_UNUSED_VAR(x) ((void)sizeof(x)) |
|
61
|
|
|
|
|
|
|
#endif |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
#define AllowAnyUTF (UTF8_ALLOW_SURROGATE|UTF8_ALLOW_BOM|UTF8_ALLOW_FE_FF|UTF8_ALLOW_FFFF) |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
/* check if the string buffer is enough before uvchr_to_utf8(). */ |
|
66
|
|
|
|
|
|
|
/* dstart, d, and dlen should be defined outside before. */ |
|
67
|
|
|
|
|
|
|
#define Renew_d_if_not_enough_to(need) STRLEN curlen = d - dstart; \ |
|
68
|
|
|
|
|
|
|
if (dlen < curlen + (need)) { \ |
|
69
|
|
|
|
|
|
|
dlen += (need); \ |
|
70
|
|
|
|
|
|
|
Renew(dstart, dlen+1, U8); \ |
|
71
|
|
|
|
|
|
|
d = dstart + curlen; \ |
|
72
|
|
|
|
|
|
|
} |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
/* if utf8n_to_uvchr() sets retlen to 0 (if broken?) */ |
|
75
|
|
|
|
|
|
|
#define ErrRetlenIsZero "panic (Unicode::Normalize %s): zero-length character" |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
/* utf8_hop() hops back before start. Maybe broken UTF-8 */ |
|
78
|
|
|
|
|
|
|
#define ErrHopBeforeStart "panic (Unicode::Normalize): hopping before start" |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
/* At present, char > 0x10ffff are unaffected without complaint, right? */ |
|
81
|
|
|
|
|
|
|
#define VALID_UTF_MAX (0x10ffff) |
|
82
|
|
|
|
|
|
|
#define OVER_UTF_MAX(uv) (VALID_UTF_MAX < (uv)) |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
/* size of array for combining characters */ |
|
85
|
|
|
|
|
|
|
/* enough as an initial value? */ |
|
86
|
|
|
|
|
|
|
#define CC_SEQ_SIZE (10) |
|
87
|
|
|
|
|
|
|
#define CC_SEQ_STEP (5) |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
/* HANGUL begin */ |
|
90
|
|
|
|
|
|
|
#define Hangul_SBase 0xAC00 |
|
91
|
|
|
|
|
|
|
#define Hangul_SFinal 0xD7A3 |
|
92
|
|
|
|
|
|
|
#define Hangul_SCount 11172 |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
#define Hangul_NCount 588 |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
#define Hangul_LBase 0x1100 |
|
97
|
|
|
|
|
|
|
#define Hangul_LFinal 0x1112 |
|
98
|
|
|
|
|
|
|
#define Hangul_LCount 19 |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
#define Hangul_VBase 0x1161 |
|
101
|
|
|
|
|
|
|
#define Hangul_VFinal 0x1175 |
|
102
|
|
|
|
|
|
|
#define Hangul_VCount 21 |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
#define Hangul_TBase 0x11A7 |
|
105
|
|
|
|
|
|
|
#define Hangul_TFinal 0x11C2 |
|
106
|
|
|
|
|
|
|
#define Hangul_TCount 28 |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
#define Hangul_IsS(u) ((Hangul_SBase <= (u)) && ((u) <= Hangul_SFinal)) |
|
109
|
|
|
|
|
|
|
#define Hangul_IsN(u) (((u) - Hangul_SBase) % Hangul_TCount == 0) |
|
110
|
|
|
|
|
|
|
#define Hangul_IsLV(u) (Hangul_IsS(u) && Hangul_IsN(u)) |
|
111
|
|
|
|
|
|
|
#define Hangul_IsL(u) ((Hangul_LBase <= (u)) && ((u) <= Hangul_LFinal)) |
|
112
|
|
|
|
|
|
|
#define Hangul_IsV(u) ((Hangul_VBase <= (u)) && ((u) <= Hangul_VFinal)) |
|
113
|
|
|
|
|
|
|
#define Hangul_IsT(u) ((Hangul_TBase < (u)) && ((u) <= Hangul_TFinal)) |
|
114
|
|
|
|
|
|
|
/* HANGUL end */ |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
/* this is used for canonical ordering of combining characters (c.c.). */ |
|
117
|
|
|
|
|
|
|
typedef struct { |
|
118
|
|
|
|
|
|
|
U8 cc; /* combining class */ |
|
119
|
|
|
|
|
|
|
UV uv; /* codepoint */ |
|
120
|
|
|
|
|
|
|
STRLEN pos; /* position */ |
|
121
|
|
|
|
|
|
|
} UNF_cc; |
|
122
|
|
|
|
|
|
|
|
|
123
|
1083
|
|
|
|
|
|
static int compare_cc(const void *a, const void *b) |
|
124
|
|
|
|
|
|
|
{ |
|
125
|
|
|
|
|
|
|
int ret_cc; |
|
126
|
1083
|
|
|
|
|
|
ret_cc = ((UNF_cc*) a)->cc - ((UNF_cc*) b)->cc; |
|
127
|
1083
|
100
|
|
|
|
|
if (ret_cc) |
|
128
|
184
|
|
|
|
|
|
return ret_cc; |
|
129
|
|
|
|
|
|
|
|
|
130
|
899
|
|
|
|
|
|
return ( ((UNF_cc*) a)->pos > ((UNF_cc*) b)->pos ) |
|
131
|
899
|
|
|
|
|
|
- ( ((UNF_cc*) a)->pos < ((UNF_cc*) b)->pos ); |
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
|
|
134
|
1526
|
|
|
|
|
|
static U8* dec_canonical(UV uv) |
|
135
|
|
|
|
|
|
|
{ |
|
136
|
|
|
|
|
|
|
U8 ***plane, **row; |
|
137
|
1526
|
100
|
|
|
|
|
if (OVER_UTF_MAX(uv)) |
|
138
|
10
|
|
|
|
|
|
return NULL; |
|
139
|
1516
|
|
|
|
|
|
plane = (U8***)UNF_canon[uv >> 16]; |
|
140
|
1516
|
100
|
|
|
|
|
if (! plane) |
|
141
|
5
|
|
|
|
|
|
return NULL; |
|
142
|
1511
|
|
|
|
|
|
row = plane[(uv >> 8) & 0xff]; |
|
143
|
1511
|
100
|
|
|
|
|
return row ? row[uv & 0xff] : NULL; |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
|
|
146
|
429
|
|
|
|
|
|
static U8* dec_compat(UV uv) |
|
147
|
|
|
|
|
|
|
{ |
|
148
|
|
|
|
|
|
|
U8 ***plane, **row; |
|
149
|
429
|
100
|
|
|
|
|
if (OVER_UTF_MAX(uv)) |
|
150
|
6
|
|
|
|
|
|
return NULL; |
|
151
|
423
|
|
|
|
|
|
plane = (U8***)UNF_compat[uv >> 16]; |
|
152
|
423
|
100
|
|
|
|
|
if (! plane) |
|
153
|
3
|
|
|
|
|
|
return NULL; |
|
154
|
420
|
|
|
|
|
|
row = plane[(uv >> 8) & 0xff]; |
|
155
|
420
|
100
|
|
|
|
|
return row ? row[uv & 0xff] : NULL; |
|
156
|
|
|
|
|
|
|
} |
|
157
|
|
|
|
|
|
|
|
|
158
|
438
|
|
|
|
|
|
static UV composite_uv(UV uv, UV uv2) |
|
159
|
|
|
|
|
|
|
{ |
|
160
|
|
|
|
|
|
|
UNF_complist ***plane, **row, *cell, *i; |
|
161
|
|
|
|
|
|
|
|
|
162
|
438
|
100
|
|
|
|
|
if (!uv2 || OVER_UTF_MAX(uv) || OVER_UTF_MAX(uv2)) |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
163
|
9
|
|
|
|
|
|
return 0; |
|
164
|
|
|
|
|
|
|
|
|
165
|
429
|
100
|
|
|
|
|
if (Hangul_IsL(uv) && Hangul_IsV(uv2)) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
166
|
68
|
|
|
|
|
|
UV lindex = uv - Hangul_LBase; |
|
167
|
68
|
|
|
|
|
|
UV vindex = uv2 - Hangul_VBase; |
|
168
|
68
|
|
|
|
|
|
return(Hangul_SBase + (lindex * Hangul_VCount + vindex) * |
|
169
|
|
|
|
|
|
|
Hangul_TCount); |
|
170
|
|
|
|
|
|
|
} |
|
171
|
361
|
100
|
|
|
|
|
if (Hangul_IsLV(uv) && Hangul_IsT(uv2)) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
172
|
44
|
|
|
|
|
|
UV tindex = uv2 - Hangul_TBase; |
|
173
|
44
|
|
|
|
|
|
return(uv + tindex); |
|
174
|
|
|
|
|
|
|
} |
|
175
|
317
|
|
|
|
|
|
plane = UNF_compos[uv >> 16]; |
|
176
|
317
|
50
|
|
|
|
|
if (! plane) |
|
177
|
0
|
|
|
|
|
|
return 0; |
|
178
|
317
|
|
|
|
|
|
row = plane[(uv >> 8) & 0xff]; |
|
179
|
317
|
100
|
|
|
|
|
if (! row) |
|
180
|
38
|
|
|
|
|
|
return 0; |
|
181
|
279
|
|
|
|
|
|
cell = row[uv & 0xff]; |
|
182
|
279
|
100
|
|
|
|
|
if (! cell) |
|
183
|
122
|
|
|
|
|
|
return 0; |
|
184
|
908
|
100
|
|
|
|
|
for (i = cell; i->nextchar; i++) { |
|
185
|
832
|
100
|
|
|
|
|
if (uv2 == i->nextchar) |
|
186
|
81
|
|
|
|
|
|
return i->composite; |
|
187
|
|
|
|
|
|
|
} |
|
188
|
76
|
|
|
|
|
|
return 0; |
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
|
|
191
|
2979
|
|
|
|
|
|
static U8 getCombinClass(UV uv) |
|
192
|
|
|
|
|
|
|
{ |
|
193
|
|
|
|
|
|
|
U8 **plane, *row; |
|
194
|
2979
|
100
|
|
|
|
|
if (OVER_UTF_MAX(uv)) |
|
195
|
22
|
|
|
|
|
|
return 0; |
|
196
|
2957
|
|
|
|
|
|
plane = (U8**)UNF_combin[uv >> 16]; |
|
197
|
2957
|
100
|
|
|
|
|
if (! plane) |
|
198
|
11
|
|
|
|
|
|
return 0; |
|
199
|
2946
|
|
|
|
|
|
row = plane[(uv >> 8) & 0xff]; |
|
200
|
2946
|
100
|
|
|
|
|
return row ? row[uv & 0xff] : 0; |
|
201
|
|
|
|
|
|
|
} |
|
202
|
|
|
|
|
|
|
|
|
203
|
233
|
|
|
|
|
|
static U8* pv_cat_decompHangul(pTHX_ U8* d, UV uv) |
|
204
|
|
|
|
|
|
|
{ |
|
205
|
233
|
|
|
|
|
|
UV sindex = uv - Hangul_SBase; |
|
206
|
233
|
|
|
|
|
|
UV lindex = sindex / Hangul_NCount; |
|
207
|
233
|
|
|
|
|
|
UV vindex = (sindex % Hangul_NCount) / Hangul_TCount; |
|
208
|
233
|
|
|
|
|
|
UV tindex = sindex % Hangul_TCount; |
|
209
|
|
|
|
|
|
|
|
|
210
|
233
|
50
|
|
|
|
|
if (! Hangul_IsS(uv)) |
|
|
|
50
|
|
|
|
|
|
|
211
|
0
|
|
|
|
|
|
return d; |
|
212
|
|
|
|
|
|
|
|
|
213
|
233
|
|
|
|
|
|
d = uvchr_to_utf8(d, (lindex + Hangul_LBase)); |
|
214
|
233
|
|
|
|
|
|
d = uvchr_to_utf8(d, (vindex + Hangul_VBase)); |
|
215
|
233
|
100
|
|
|
|
|
if (tindex) |
|
216
|
10
|
|
|
|
|
|
d = uvchr_to_utf8(d, (tindex + Hangul_TBase)); |
|
217
|
233
|
|
|
|
|
|
return d; |
|
218
|
|
|
|
|
|
|
} |
|
219
|
|
|
|
|
|
|
|
|
220
|
671
|
|
|
|
|
|
static char* sv_2pvunicode(pTHX_ SV *sv, STRLEN *lp) |
|
221
|
|
|
|
|
|
|
{ |
|
222
|
|
|
|
|
|
|
char *s; |
|
223
|
|
|
|
|
|
|
STRLEN len; |
|
224
|
671
|
100
|
|
|
|
|
s = SvPV(sv,len); |
|
225
|
671
|
100
|
|
|
|
|
if (!SvUTF8(sv)) { |
|
226
|
95
|
|
|
|
|
|
SV* tmpsv = sv_2mortal(newSVpvn(s, len)); |
|
227
|
95
|
50
|
|
|
|
|
if (!SvPOK(tmpsv)) |
|
228
|
0
|
0
|
|
|
|
|
s = SvPV_force(tmpsv,len); |
|
229
|
95
|
|
|
|
|
|
sv_utf8_upgrade(tmpsv); |
|
230
|
95
|
50
|
|
|
|
|
s = SvPV(tmpsv,len); |
|
231
|
|
|
|
|
|
|
} |
|
232
|
671
|
50
|
|
|
|
|
if (lp) |
|
233
|
671
|
|
|
|
|
|
*lp = len; |
|
234
|
671
|
|
|
|
|
|
return s; |
|
235
|
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
static |
|
238
|
405
|
|
|
|
|
|
U8* pv_utf8_decompose(pTHX_ U8* s, STRLEN slen, U8** dp, STRLEN dlen, bool iscompat) |
|
239
|
|
|
|
|
|
|
{ |
|
240
|
405
|
|
|
|
|
|
U8* p = s; |
|
241
|
405
|
|
|
|
|
|
U8* e = s + slen; |
|
242
|
405
|
|
|
|
|
|
U8* dstart = *dp; |
|
243
|
405
|
|
|
|
|
|
U8* d = dstart; |
|
244
|
|
|
|
|
|
|
|
|
245
|
2183
|
100
|
|
|
|
|
while (p < e) { |
|
246
|
|
|
|
|
|
|
STRLEN retlen; |
|
247
|
1778
|
|
|
|
|
|
UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF); |
|
248
|
1778
|
50
|
|
|
|
|
if (!retlen) |
|
249
|
0
|
|
|
|
|
|
croak(ErrRetlenIsZero, "decompose"); |
|
250
|
1778
|
|
|
|
|
|
p += retlen; |
|
251
|
|
|
|
|
|
|
|
|
252
|
2003
|
100
|
|
|
|
|
if (Hangul_IsS(uv)) { |
|
|
|
100
|
|
|
|
|
|
|
253
|
225
|
100
|
|
|
|
|
Renew_d_if_not_enough_to(UTF8_MAXLEN * 3) |
|
254
|
225
|
|
|
|
|
|
d = pv_cat_decompHangul(aTHX_ d, uv); |
|
255
|
|
|
|
|
|
|
} |
|
256
|
|
|
|
|
|
|
else { |
|
257
|
1553
|
100
|
|
|
|
|
U8* r = iscompat ? dec_compat(uv) : dec_canonical(uv); |
|
258
|
|
|
|
|
|
|
|
|
259
|
1553
|
100
|
|
|
|
|
if (r) { |
|
260
|
265
|
|
|
|
|
|
STRLEN len = (STRLEN)strlen((char *)r); |
|
261
|
265
|
100
|
|
|
|
|
Renew_d_if_not_enough_to(len) |
|
262
|
1682
|
100
|
|
|
|
|
while (len--) |
|
263
|
1417
|
|
|
|
|
|
*d++ = *r++; |
|
264
|
|
|
|
|
|
|
} |
|
265
|
|
|
|
|
|
|
else { |
|
266
|
1288
|
100
|
|
|
|
|
Renew_d_if_not_enough_to(UTF8_MAXLEN) |
|
267
|
1778
|
|
|
|
|
|
d = uvchr_to_utf8(d, uv); |
|
268
|
|
|
|
|
|
|
} |
|
269
|
|
|
|
|
|
|
} |
|
270
|
|
|
|
|
|
|
} |
|
271
|
405
|
|
|
|
|
|
*dp = dstart; |
|
272
|
405
|
|
|
|
|
|
return d; |
|
273
|
|
|
|
|
|
|
} |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
static |
|
276
|
380
|
|
|
|
|
|
U8* pv_utf8_reorder(pTHX_ U8* s, STRLEN slen, U8** dp, STRLEN dlen) |
|
277
|
|
|
|
|
|
|
{ |
|
278
|
380
|
|
|
|
|
|
U8* p = s; |
|
279
|
380
|
|
|
|
|
|
U8* e = s + slen; |
|
280
|
380
|
|
|
|
|
|
U8* dstart = *dp; |
|
281
|
380
|
|
|
|
|
|
U8* d = dstart; |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
UNF_cc seq_ary[CC_SEQ_SIZE]; |
|
284
|
380
|
|
|
|
|
|
UNF_cc* seq_ptr = seq_ary; /* use array at the beginning */ |
|
285
|
380
|
|
|
|
|
|
UNF_cc* seq_ext = NULL; /* extend if need */ |
|
286
|
380
|
|
|
|
|
|
STRLEN seq_max = CC_SEQ_SIZE; |
|
287
|
380
|
|
|
|
|
|
STRLEN cc_pos = 0; |
|
288
|
|
|
|
|
|
|
|
|
289
|
1791
|
100
|
|
|
|
|
while (p < e) { |
|
290
|
|
|
|
|
|
|
U8 curCC; |
|
291
|
|
|
|
|
|
|
STRLEN retlen; |
|
292
|
1411
|
|
|
|
|
|
UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF); |
|
293
|
1411
|
50
|
|
|
|
|
if (!retlen) |
|
294
|
0
|
|
|
|
|
|
croak(ErrRetlenIsZero, "reorder"); |
|
295
|
1411
|
|
|
|
|
|
p += retlen; |
|
296
|
|
|
|
|
|
|
|
|
297
|
1411
|
|
|
|
|
|
curCC = getCombinClass(uv); |
|
298
|
|
|
|
|
|
|
|
|
299
|
1411
|
100
|
|
|
|
|
if (curCC != 0) { |
|
300
|
734
|
100
|
|
|
|
|
if (seq_max < cc_pos + 1) { /* extend if need */ |
|
301
|
54
|
|
|
|
|
|
seq_max = cc_pos + CC_SEQ_STEP; /* new size */ |
|
302
|
54
|
100
|
|
|
|
|
if (CC_SEQ_SIZE == cc_pos) { /* seq_ary full */ |
|
303
|
|
|
|
|
|
|
STRLEN i; |
|
304
|
12
|
50
|
|
|
|
|
New(0, seq_ext, seq_max, UNF_cc); |
|
305
|
132
|
100
|
|
|
|
|
for (i = 0; i < cc_pos; i++) |
|
306
|
120
|
|
|
|
|
|
seq_ext[i] = seq_ary[i]; |
|
307
|
|
|
|
|
|
|
} |
|
308
|
|
|
|
|
|
|
else { |
|
309
|
42
|
50
|
|
|
|
|
Renew(seq_ext, seq_max, UNF_cc); |
|
310
|
|
|
|
|
|
|
} |
|
311
|
54
|
|
|
|
|
|
seq_ptr = seq_ext; /* use seq_ext from now */ |
|
312
|
|
|
|
|
|
|
} |
|
313
|
|
|
|
|
|
|
|
|
314
|
734
|
|
|
|
|
|
seq_ptr[cc_pos].cc = curCC; |
|
315
|
734
|
|
|
|
|
|
seq_ptr[cc_pos].uv = uv; |
|
316
|
734
|
|
|
|
|
|
seq_ptr[cc_pos].pos = cc_pos; |
|
317
|
734
|
|
|
|
|
|
++cc_pos; |
|
318
|
|
|
|
|
|
|
|
|
319
|
734
|
100
|
|
|
|
|
if (p < e) |
|
320
|
635
|
|
|
|
|
|
continue; |
|
321
|
|
|
|
|
|
|
} |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
/* output */ |
|
324
|
776
|
100
|
|
|
|
|
if (cc_pos) { |
|
325
|
|
|
|
|
|
|
STRLEN i; |
|
326
|
|
|
|
|
|
|
|
|
327
|
217
|
100
|
|
|
|
|
if (cc_pos > 1) /* reordered if there are two c.c.'s */ |
|
328
|
101
|
|
|
|
|
|
qsort((void*)seq_ptr, cc_pos, sizeof(UNF_cc), compare_cc); |
|
329
|
|
|
|
|
|
|
|
|
330
|
951
|
100
|
|
|
|
|
for (i = 0; i < cc_pos; i++) { |
|
331
|
734
|
100
|
|
|
|
|
Renew_d_if_not_enough_to(UTF8_MAXLEN) |
|
332
|
734
|
|
|
|
|
|
d = uvchr_to_utf8(d, seq_ptr[i].uv); |
|
333
|
|
|
|
|
|
|
} |
|
334
|
217
|
|
|
|
|
|
cc_pos = 0; |
|
335
|
|
|
|
|
|
|
} |
|
336
|
|
|
|
|
|
|
|
|
337
|
776
|
100
|
|
|
|
|
if (curCC == 0) { |
|
338
|
677
|
100
|
|
|
|
|
Renew_d_if_not_enough_to(UTF8_MAXLEN) |
|
339
|
776
|
|
|
|
|
|
d = uvchr_to_utf8(d, uv); |
|
340
|
|
|
|
|
|
|
} |
|
341
|
|
|
|
|
|
|
} |
|
342
|
380
|
100
|
|
|
|
|
if (seq_ext) |
|
343
|
12
|
|
|
|
|
|
Safefree(seq_ext); |
|
344
|
380
|
|
|
|
|
|
*dp = dstart; |
|
345
|
380
|
|
|
|
|
|
return d; |
|
346
|
|
|
|
|
|
|
} |
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
static |
|
349
|
273
|
|
|
|
|
|
U8* pv_utf8_compose(pTHX_ U8* s, STRLEN slen, U8** dp, STRLEN dlen, bool iscontig) |
|
350
|
|
|
|
|
|
|
{ |
|
351
|
273
|
|
|
|
|
|
U8* p = s; |
|
352
|
273
|
|
|
|
|
|
U8* e = s + slen; |
|
353
|
273
|
|
|
|
|
|
U8* dstart = *dp; |
|
354
|
273
|
|
|
|
|
|
U8* d = dstart; |
|
355
|
|
|
|
|
|
|
|
|
356
|
273
|
|
|
|
|
|
UV uvS = 0; /* code point of the starter */ |
|
357
|
273
|
|
|
|
|
|
bool valid_uvS = FALSE; /* if FALSE, uvS isn't initialized yet */ |
|
358
|
273
|
|
|
|
|
|
U8 preCC = 0; |
|
359
|
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
UV seq_ary[CC_SEQ_SIZE]; |
|
361
|
273
|
|
|
|
|
|
UV* seq_ptr = seq_ary; /* use array at the beginning */ |
|
362
|
273
|
|
|
|
|
|
UV* seq_ext = NULL; /* extend if need */ |
|
363
|
273
|
|
|
|
|
|
STRLEN seq_max = CC_SEQ_SIZE; |
|
364
|
273
|
|
|
|
|
|
STRLEN cc_pos = 0; |
|
365
|
|
|
|
|
|
|
|
|
366
|
1336
|
100
|
|
|
|
|
while (p < e) { |
|
367
|
|
|
|
|
|
|
U8 curCC; |
|
368
|
|
|
|
|
|
|
STRLEN retlen; |
|
369
|
1063
|
|
|
|
|
|
UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF); |
|
370
|
1063
|
50
|
|
|
|
|
if (!retlen) |
|
371
|
0
|
|
|
|
|
|
croak(ErrRetlenIsZero, "compose"); |
|
372
|
1063
|
|
|
|
|
|
p += retlen; |
|
373
|
|
|
|
|
|
|
|
|
374
|
1063
|
|
|
|
|
|
curCC = getCombinClass(uv); |
|
375
|
|
|
|
|
|
|
|
|
376
|
1063
|
100
|
|
|
|
|
if (!valid_uvS) { |
|
377
|
294
|
100
|
|
|
|
|
if (curCC == 0) { |
|
378
|
259
|
|
|
|
|
|
uvS = uv; /* the first Starter is found */ |
|
379
|
259
|
|
|
|
|
|
valid_uvS = TRUE; |
|
380
|
259
|
100
|
|
|
|
|
if (p < e) |
|
381
|
725
|
|
|
|
|
|
continue; |
|
382
|
|
|
|
|
|
|
} |
|
383
|
|
|
|
|
|
|
else { |
|
384
|
35
|
100
|
|
|
|
|
Renew_d_if_not_enough_to(UTF8_MAXLEN) |
|
385
|
35
|
|
|
|
|
|
d = uvchr_to_utf8(d, uv); |
|
386
|
35
|
|
|
|
|
|
continue; |
|
387
|
|
|
|
|
|
|
} |
|
388
|
|
|
|
|
|
|
} |
|
389
|
|
|
|
|
|
|
else { |
|
390
|
|
|
|
|
|
|
bool composed; |
|
391
|
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
/* blocked */ |
|
393
|
769
|
100
|
|
|
|
|
if ((iscontig && cc_pos) || /* discontiguous combination */ |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
394
|
636
|
100
|
|
|
|
|
(curCC != 0 && preCC == curCC) || /* blocked by same CC */ |
|
|
|
100
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
(preCC > curCC)) /* blocked by higher CC: revised D2 */ |
|
396
|
352
|
|
|
|
|
|
composed = FALSE; |
|
397
|
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
/* not blocked: |
|
399
|
|
|
|
|
|
|
iscontig && cc_pos == 0 -- contiguous combination |
|
400
|
|
|
|
|
|
|
curCC == 0 && preCC == 0 -- starter + starter |
|
401
|
|
|
|
|
|
|
curCC != 0 && preCC < curCC -- lower CC */ |
|
402
|
|
|
|
|
|
|
else { |
|
403
|
|
|
|
|
|
|
/* try composition */ |
|
404
|
417
|
|
|
|
|
|
UV uvComp = composite_uv(uvS, uv); |
|
405
|
|
|
|
|
|
|
|
|
406
|
417
|
100
|
|
|
|
|
if (uvComp && !isExclusion(uvComp)) { |
|
|
|
50
|
|
|
|
|
|
|
407
|
184
|
|
|
|
|
|
uvS = uvComp; |
|
408
|
184
|
|
|
|
|
|
composed = TRUE; |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
/* preCC should not be changed to curCC */ |
|
411
|
|
|
|
|
|
|
/* e.g. 1E14 = 0045 0304 0300 where CC(0304) == CC(0300) */ |
|
412
|
184
|
100
|
|
|
|
|
if (p < e) |
|
413
|
120
|
|
|
|
|
|
continue; |
|
414
|
|
|
|
|
|
|
} |
|
415
|
|
|
|
|
|
|
else |
|
416
|
233
|
|
|
|
|
|
composed = FALSE; |
|
417
|
|
|
|
|
|
|
} |
|
418
|
|
|
|
|
|
|
|
|
419
|
649
|
100
|
|
|
|
|
if (!composed) { |
|
420
|
585
|
|
|
|
|
|
preCC = curCC; |
|
421
|
585
|
100
|
|
|
|
|
if (curCC != 0 || !(p < e)) { |
|
|
|
100
|
|
|
|
|
|
|
422
|
506
|
100
|
|
|
|
|
if (seq_max < cc_pos + 1) { /* extend if need */ |
|
423
|
50
|
|
|
|
|
|
seq_max = cc_pos + CC_SEQ_STEP; /* new size */ |
|
424
|
50
|
100
|
|
|
|
|
if (CC_SEQ_SIZE == cc_pos) { /* seq_ary full */ |
|
425
|
10
|
50
|
|
|
|
|
New(0, seq_ext, seq_max, UV); |
|
426
|
10
|
50
|
|
|
|
|
Copy(seq_ary, seq_ext, cc_pos, UV); |
|
427
|
|
|
|
|
|
|
} |
|
428
|
|
|
|
|
|
|
else { |
|
429
|
40
|
50
|
|
|
|
|
Renew(seq_ext, seq_max, UV); |
|
430
|
|
|
|
|
|
|
} |
|
431
|
50
|
|
|
|
|
|
seq_ptr = seq_ext; /* use seq_ext from now */ |
|
432
|
|
|
|
|
|
|
} |
|
433
|
506
|
|
|
|
|
|
seq_ptr[cc_pos] = uv; |
|
434
|
506
|
|
|
|
|
|
++cc_pos; |
|
435
|
|
|
|
|
|
|
} |
|
436
|
585
|
100
|
|
|
|
|
if (curCC != 0 && p < e) |
|
|
|
100
|
|
|
|
|
|
|
437
|
388
|
|
|
|
|
|
continue; |
|
438
|
|
|
|
|
|
|
} |
|
439
|
|
|
|
|
|
|
} |
|
440
|
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
/* output */ |
|
442
|
|
|
|
|
|
|
{ |
|
443
|
338
|
100
|
|
|
|
|
Renew_d_if_not_enough_to(UTF8_MAXLEN) |
|
444
|
338
|
|
|
|
|
|
d = uvchr_to_utf8(d, uvS); /* starter (composed or not) */ |
|
445
|
|
|
|
|
|
|
} |
|
446
|
|
|
|
|
|
|
|
|
447
|
338
|
100
|
|
|
|
|
if (cc_pos) { |
|
448
|
|
|
|
|
|
|
STRLEN i; |
|
449
|
|
|
|
|
|
|
|
|
450
|
643
|
100
|
|
|
|
|
for (i = 0; i < cc_pos; i++) { |
|
451
|
506
|
100
|
|
|
|
|
Renew_d_if_not_enough_to(UTF8_MAXLEN) |
|
452
|
506
|
|
|
|
|
|
d = uvchr_to_utf8(d, seq_ptr[i]); |
|
453
|
|
|
|
|
|
|
} |
|
454
|
137
|
|
|
|
|
|
cc_pos = 0; |
|
455
|
|
|
|
|
|
|
} |
|
456
|
|
|
|
|
|
|
|
|
457
|
338
|
|
|
|
|
|
uvS = uv; |
|
458
|
|
|
|
|
|
|
} |
|
459
|
273
|
100
|
|
|
|
|
if (seq_ext) |
|
460
|
10
|
|
|
|
|
|
Safefree(seq_ext); |
|
461
|
273
|
|
|
|
|
|
*dp = dstart; |
|
462
|
273
|
|
|
|
|
|
return d; |
|
463
|
|
|
|
|
|
|
} |
|
464
|
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
MODULE = Unicode::Normalize PACKAGE = Unicode::Normalize |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
SV* |
|
468
|
|
|
|
|
|
|
decompose(src, compat = &PL_sv_no) |
|
469
|
|
|
|
|
|
|
SV * src |
|
470
|
|
|
|
|
|
|
SV * compat |
|
471
|
|
|
|
|
|
|
PROTOTYPE: $;$ |
|
472
|
|
|
|
|
|
|
PREINIT: |
|
473
|
|
|
|
|
|
|
SV* dst; |
|
474
|
|
|
|
|
|
|
U8 *s, *d, *dend; |
|
475
|
|
|
|
|
|
|
STRLEN slen, dlen; |
|
476
|
|
|
|
|
|
|
CODE: |
|
477
|
48
|
|
|
|
|
|
s = (U8*)sv_2pvunicode(aTHX_ src,&slen); |
|
478
|
48
|
|
|
|
|
|
dst = newSVpvn("", 0); |
|
479
|
48
|
|
|
|
|
|
dlen = slen; |
|
480
|
48
|
|
|
|
|
|
New(0, d, dlen+1, U8); |
|
481
|
48
|
50
|
|
|
|
|
dend = pv_utf8_decompose(aTHX_ s, slen, &d, dlen, (bool)SvTRUE(compat)); |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
482
|
48
|
|
|
|
|
|
sv_setpvn(dst, (char *)d, dend - d); |
|
483
|
48
|
|
|
|
|
|
SvUTF8_on(dst); |
|
484
|
48
|
|
|
|
|
|
Safefree(d); |
|
485
|
48
|
|
|
|
|
|
RETVAL = dst; |
|
486
|
|
|
|
|
|
|
OUTPUT: |
|
487
|
|
|
|
|
|
|
RETVAL |
|
488
|
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
SV* |
|
491
|
|
|
|
|
|
|
reorder(src) |
|
492
|
|
|
|
|
|
|
SV * src |
|
493
|
|
|
|
|
|
|
PROTOTYPE: $ |
|
494
|
|
|
|
|
|
|
PREINIT: |
|
495
|
|
|
|
|
|
|
SV* dst; |
|
496
|
|
|
|
|
|
|
U8 *s, *d, *dend; |
|
497
|
|
|
|
|
|
|
STRLEN slen, dlen; |
|
498
|
|
|
|
|
|
|
CODE: |
|
499
|
23
|
|
|
|
|
|
s = (U8*)sv_2pvunicode(aTHX_ src,&slen); |
|
500
|
23
|
|
|
|
|
|
dst = newSVpvn("", 0); |
|
501
|
23
|
|
|
|
|
|
dlen = slen; |
|
502
|
23
|
|
|
|
|
|
New(0, d, dlen+1, U8); |
|
503
|
23
|
|
|
|
|
|
dend = pv_utf8_reorder(aTHX_ s, slen, &d, dlen); |
|
504
|
23
|
|
|
|
|
|
sv_setpvn(dst, (char *)d, dend - d); |
|
505
|
23
|
|
|
|
|
|
SvUTF8_on(dst); |
|
506
|
23
|
|
|
|
|
|
Safefree(d); |
|
507
|
23
|
|
|
|
|
|
RETVAL = dst; |
|
508
|
|
|
|
|
|
|
OUTPUT: |
|
509
|
|
|
|
|
|
|
RETVAL |
|
510
|
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
SV* |
|
513
|
|
|
|
|
|
|
compose(src) |
|
514
|
|
|
|
|
|
|
SV * src |
|
515
|
|
|
|
|
|
|
PROTOTYPE: $ |
|
516
|
|
|
|
|
|
|
ALIAS: |
|
517
|
|
|
|
|
|
|
composeContiguous = 1 |
|
518
|
|
|
|
|
|
|
PREINIT: |
|
519
|
|
|
|
|
|
|
SV* dst; |
|
520
|
|
|
|
|
|
|
U8 *s, *d, *dend; |
|
521
|
|
|
|
|
|
|
STRLEN slen, dlen; |
|
522
|
|
|
|
|
|
|
CODE: |
|
523
|
40
|
|
|
|
|
|
s = (U8*)sv_2pvunicode(aTHX_ src,&slen); |
|
524
|
40
|
|
|
|
|
|
dst = newSVpvn("", 0); |
|
525
|
40
|
|
|
|
|
|
dlen = slen; |
|
526
|
40
|
|
|
|
|
|
New(0, d, dlen+1, U8); |
|
527
|
40
|
|
|
|
|
|
dend = pv_utf8_compose(aTHX_ s, slen, &d, dlen, (bool)ix); |
|
528
|
40
|
|
|
|
|
|
sv_setpvn(dst, (char *)d, dend - d); |
|
529
|
40
|
|
|
|
|
|
SvUTF8_on(dst); |
|
530
|
40
|
|
|
|
|
|
Safefree(d); |
|
531
|
40
|
|
|
|
|
|
RETVAL = dst; |
|
532
|
|
|
|
|
|
|
OUTPUT: |
|
533
|
|
|
|
|
|
|
RETVAL |
|
534
|
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
SV* |
|
537
|
|
|
|
|
|
|
NFD(src) |
|
538
|
|
|
|
|
|
|
SV * src |
|
539
|
|
|
|
|
|
|
PROTOTYPE: $ |
|
540
|
|
|
|
|
|
|
ALIAS: |
|
541
|
|
|
|
|
|
|
NFKD = 1 |
|
542
|
|
|
|
|
|
|
PREINIT: |
|
543
|
|
|
|
|
|
|
SV *dst; |
|
544
|
|
|
|
|
|
|
U8 *s, *t, *tend, *d, *dend; |
|
545
|
|
|
|
|
|
|
STRLEN slen, tlen, dlen; |
|
546
|
|
|
|
|
|
|
CODE: |
|
547
|
124
|
|
|
|
|
|
s = (U8*)sv_2pvunicode(aTHX_ src,&slen); |
|
548
|
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
/* decompose */ |
|
550
|
124
|
|
|
|
|
|
tlen = slen; |
|
551
|
124
|
|
|
|
|
|
New(0, t, tlen+1, U8); |
|
552
|
124
|
|
|
|
|
|
tend = pv_utf8_decompose(aTHX_ s, slen, &t, tlen, (bool)(ix==1)); |
|
553
|
124
|
|
|
|
|
|
*tend = '\0'; |
|
554
|
124
|
|
|
|
|
|
tlen = tend - t; /* no longer know real size of t */ |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
/* reorder */ |
|
557
|
124
|
|
|
|
|
|
dlen = tlen; |
|
558
|
124
|
|
|
|
|
|
New(0, d, dlen+1, U8); |
|
559
|
124
|
|
|
|
|
|
dend = pv_utf8_reorder(aTHX_ t, tlen, &d, dlen); |
|
560
|
124
|
|
|
|
|
|
*dend = '\0'; |
|
561
|
124
|
|
|
|
|
|
dlen = dend - d; /* no longer know real size of d */ |
|
562
|
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
/* return */ |
|
564
|
124
|
|
|
|
|
|
dst = newSVpvn("", 0); |
|
565
|
124
|
|
|
|
|
|
sv_setpvn(dst, (char *)d, dlen); |
|
566
|
124
|
|
|
|
|
|
SvUTF8_on(dst); |
|
567
|
|
|
|
|
|
|
|
|
568
|
124
|
|
|
|
|
|
Safefree(t); |
|
569
|
124
|
|
|
|
|
|
Safefree(d); |
|
570
|
124
|
|
|
|
|
|
RETVAL = dst; |
|
571
|
|
|
|
|
|
|
OUTPUT: |
|
572
|
|
|
|
|
|
|
RETVAL |
|
573
|
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
SV* |
|
576
|
|
|
|
|
|
|
NFC(src) |
|
577
|
|
|
|
|
|
|
SV * src |
|
578
|
|
|
|
|
|
|
PROTOTYPE: $ |
|
579
|
|
|
|
|
|
|
ALIAS: |
|
580
|
|
|
|
|
|
|
NFKC = 1 |
|
581
|
|
|
|
|
|
|
FCC = 2 |
|
582
|
|
|
|
|
|
|
PREINIT: |
|
583
|
|
|
|
|
|
|
SV *dst; |
|
584
|
|
|
|
|
|
|
U8 *s, *t, *tend, *u, *uend, *d, *dend; |
|
585
|
|
|
|
|
|
|
STRLEN slen, tlen, ulen, dlen; |
|
586
|
|
|
|
|
|
|
CODE: |
|
587
|
233
|
|
|
|
|
|
s = (U8*)sv_2pvunicode(aTHX_ src,&slen); |
|
588
|
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
/* decompose */ |
|
590
|
233
|
|
|
|
|
|
tlen = slen; |
|
591
|
233
|
|
|
|
|
|
New(0, t, tlen+1, U8); |
|
592
|
233
|
|
|
|
|
|
tend = pv_utf8_decompose(aTHX_ s, slen, &t, tlen, (bool)(ix==1)); |
|
593
|
233
|
|
|
|
|
|
*tend = '\0'; |
|
594
|
233
|
|
|
|
|
|
tlen = tend - t; /* no longer know real size of t */ |
|
595
|
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
/* reorder */ |
|
597
|
233
|
|
|
|
|
|
ulen = tlen; |
|
598
|
233
|
|
|
|
|
|
New(0, u, ulen+1, U8); |
|
599
|
233
|
|
|
|
|
|
uend = pv_utf8_reorder(aTHX_ t, tlen, &u, ulen); |
|
600
|
233
|
|
|
|
|
|
*uend = '\0'; |
|
601
|
233
|
|
|
|
|
|
ulen = uend - u; /* no longer know real size of u */ |
|
602
|
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
/* compose */ |
|
604
|
233
|
|
|
|
|
|
dlen = ulen; |
|
605
|
233
|
|
|
|
|
|
New(0, d, dlen+1, U8); |
|
606
|
233
|
|
|
|
|
|
dend = pv_utf8_compose(aTHX_ u, ulen, &d, dlen, (bool)(ix==2)); |
|
607
|
233
|
|
|
|
|
|
*dend = '\0'; |
|
608
|
233
|
|
|
|
|
|
dlen = dend - d; /* no longer know real size of d */ |
|
609
|
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
/* return */ |
|
611
|
233
|
|
|
|
|
|
dst = newSVpvn("", 0); |
|
612
|
233
|
|
|
|
|
|
sv_setpvn(dst, (char *)d, dlen); |
|
613
|
233
|
|
|
|
|
|
SvUTF8_on(dst); |
|
614
|
|
|
|
|
|
|
|
|
615
|
233
|
|
|
|
|
|
Safefree(t); |
|
616
|
233
|
|
|
|
|
|
Safefree(u); |
|
617
|
233
|
|
|
|
|
|
Safefree(d); |
|
618
|
233
|
|
|
|
|
|
RETVAL = dst; |
|
619
|
|
|
|
|
|
|
OUTPUT: |
|
620
|
|
|
|
|
|
|
RETVAL |
|
621
|
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
SV* |
|
624
|
|
|
|
|
|
|
checkNFD(src) |
|
625
|
|
|
|
|
|
|
SV * src |
|
626
|
|
|
|
|
|
|
PROTOTYPE: $ |
|
627
|
|
|
|
|
|
|
ALIAS: |
|
628
|
|
|
|
|
|
|
checkNFKD = 1 |
|
629
|
|
|
|
|
|
|
PREINIT: |
|
630
|
|
|
|
|
|
|
STRLEN srclen, retlen; |
|
631
|
|
|
|
|
|
|
U8 *s, *e, *p, curCC, preCC; |
|
632
|
23
|
|
|
|
|
|
bool result = TRUE; |
|
633
|
|
|
|
|
|
|
CODE: |
|
634
|
23
|
|
|
|
|
|
s = (U8*)sv_2pvunicode(aTHX_ src,&srclen); |
|
635
|
23
|
|
|
|
|
|
e = s + srclen; |
|
636
|
|
|
|
|
|
|
|
|
637
|
23
|
|
|
|
|
|
preCC = 0; |
|
638
|
73
|
100
|
|
|
|
|
for (p = s; p < e; p += retlen) { |
|
639
|
61
|
|
|
|
|
|
UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF); |
|
640
|
61
|
50
|
|
|
|
|
if (!retlen) |
|
641
|
0
|
|
|
|
|
|
croak(ErrRetlenIsZero, "checkNFD or -NFKD"); |
|
642
|
|
|
|
|
|
|
|
|
643
|
61
|
|
|
|
|
|
curCC = getCombinClass(uv); |
|
644
|
61
|
100
|
|
|
|
|
if (preCC > curCC && curCC != 0) { /* canonical ordering violated */ |
|
|
|
50
|
|
|
|
|
|
|
645
|
0
|
|
|
|
|
|
result = FALSE; |
|
646
|
0
|
|
|
|
|
|
break; |
|
647
|
|
|
|
|
|
|
} |
|
648
|
61
|
100
|
|
|
|
|
if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv))) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
649
|
11
|
|
|
|
|
|
result = FALSE; |
|
650
|
11
|
|
|
|
|
|
break; |
|
651
|
|
|
|
|
|
|
} |
|
652
|
50
|
|
|
|
|
|
preCC = curCC; |
|
653
|
|
|
|
|
|
|
} |
|
654
|
23
|
100
|
|
|
|
|
RETVAL = boolSV(result); |
|
655
|
|
|
|
|
|
|
OUTPUT: |
|
656
|
|
|
|
|
|
|
RETVAL |
|
657
|
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
SV* |
|
660
|
|
|
|
|
|
|
checkNFC(src) |
|
661
|
|
|
|
|
|
|
SV * src |
|
662
|
|
|
|
|
|
|
PROTOTYPE: $ |
|
663
|
|
|
|
|
|
|
ALIAS: |
|
664
|
|
|
|
|
|
|
checkNFKC = 1 |
|
665
|
|
|
|
|
|
|
PREINIT: |
|
666
|
|
|
|
|
|
|
STRLEN srclen, retlen; |
|
667
|
|
|
|
|
|
|
U8 *s, *e, *p, curCC, preCC; |
|
668
|
37
|
|
|
|
|
|
bool result = TRUE; |
|
669
|
37
|
|
|
|
|
|
bool isMAYBE = FALSE; |
|
670
|
|
|
|
|
|
|
CODE: |
|
671
|
37
|
|
|
|
|
|
s = (U8*)sv_2pvunicode(aTHX_ src,&srclen); |
|
672
|
37
|
|
|
|
|
|
e = s + srclen; |
|
673
|
|
|
|
|
|
|
|
|
674
|
37
|
|
|
|
|
|
preCC = 0; |
|
675
|
128
|
100
|
|
|
|
|
for (p = s; p < e; p += retlen) { |
|
676
|
103
|
|
|
|
|
|
UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF); |
|
677
|
103
|
50
|
|
|
|
|
if (!retlen) |
|
678
|
0
|
|
|
|
|
|
croak(ErrRetlenIsZero, "checkNFC or -NFKC"); |
|
679
|
|
|
|
|
|
|
|
|
680
|
103
|
|
|
|
|
|
curCC = getCombinClass(uv); |
|
681
|
103
|
100
|
|
|
|
|
if (preCC > curCC && curCC != 0) { /* canonical ordering violated */ |
|
|
|
100
|
|
|
|
|
|
|
682
|
2
|
|
|
|
|
|
result = FALSE; |
|
683
|
2
|
|
|
|
|
|
break; |
|
684
|
|
|
|
|
|
|
} |
|
685
|
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
/* get NFC/NFKC property */ |
|
687
|
101
|
100
|
|
|
|
|
if (Hangul_IsS(uv)) /* Hangul syllables are canonical composites */ |
|
|
|
100
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
; /* YES */ |
|
689
|
98
|
50
|
|
|
|
|
else if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv)) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
690
|
6
|
|
|
|
|
|
result = FALSE; |
|
691
|
6
|
|
|
|
|
|
break; |
|
692
|
|
|
|
|
|
|
} |
|
693
|
92
|
100
|
|
|
|
|
else if (isComp2nd(uv)) |
|
694
|
13
|
|
|
|
|
|
isMAYBE = TRUE; |
|
695
|
79
|
100
|
|
|
|
|
else if (ix) { |
|
696
|
|
|
|
|
|
|
char *canon, *compat; |
|
697
|
|
|
|
|
|
|
/* NFKC_NO when having compatibility mapping. */ |
|
698
|
34
|
|
|
|
|
|
canon = (char *) dec_canonical(uv); |
|
699
|
34
|
|
|
|
|
|
compat = (char *) dec_compat(uv); |
|
700
|
34
|
100
|
|
|
|
|
if (compat && !(canon && strEQ(canon, compat))) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
701
|
4
|
|
|
|
|
|
result = FALSE; |
|
702
|
4
|
|
|
|
|
|
break; |
|
703
|
|
|
|
|
|
|
} |
|
704
|
|
|
|
|
|
|
} /* end of get NFC/NFKC property */ |
|
705
|
|
|
|
|
|
|
|
|
706
|
91
|
|
|
|
|
|
preCC = curCC; |
|
707
|
|
|
|
|
|
|
} |
|
708
|
37
|
100
|
|
|
|
|
if (isMAYBE && result) /* NO precedes MAYBE */ |
|
|
|
100
|
|
|
|
|
|
|
709
|
6
|
|
|
|
|
|
XSRETURN_UNDEF; |
|
710
|
31
|
100
|
|
|
|
|
RETVAL = boolSV(result); |
|
711
|
|
|
|
|
|
|
OUTPUT: |
|
712
|
|
|
|
|
|
|
RETVAL |
|
713
|
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
SV* |
|
716
|
|
|
|
|
|
|
checkFCD(src) |
|
717
|
|
|
|
|
|
|
SV * src |
|
718
|
|
|
|
|
|
|
PROTOTYPE: $ |
|
719
|
|
|
|
|
|
|
ALIAS: |
|
720
|
|
|
|
|
|
|
checkFCC = 1 |
|
721
|
|
|
|
|
|
|
PREINIT: |
|
722
|
|
|
|
|
|
|
STRLEN srclen, retlen; |
|
723
|
|
|
|
|
|
|
U8 *s, *e, *p, curCC, preCC; |
|
724
|
67
|
|
|
|
|
|
bool result = TRUE; |
|
725
|
67
|
|
|
|
|
|
bool isMAYBE = FALSE; |
|
726
|
|
|
|
|
|
|
CODE: |
|
727
|
67
|
|
|
|
|
|
s = (U8*)sv_2pvunicode(aTHX_ src,&srclen); |
|
728
|
67
|
|
|
|
|
|
e = s + srclen; |
|
729
|
67
|
|
|
|
|
|
preCC = 0; |
|
730
|
225
|
100
|
|
|
|
|
for (p = s; p < e; p += retlen) { |
|
731
|
|
|
|
|
|
|
U8 *sCan; |
|
732
|
|
|
|
|
|
|
UV uvLead; |
|
733
|
179
|
|
|
|
|
|
STRLEN canlen = 0; |
|
734
|
179
|
|
|
|
|
|
UV uv = utf8n_to_uvchr(p, e - p, &retlen, AllowAnyUTF); |
|
735
|
179
|
50
|
|
|
|
|
if (!retlen) |
|
736
|
0
|
|
|
|
|
|
croak(ErrRetlenIsZero, "checkFCD or -FCC"); |
|
737
|
|
|
|
|
|
|
|
|
738
|
179
|
|
|
|
|
|
sCan = (U8*) dec_canonical(uv); |
|
739
|
|
|
|
|
|
|
|
|
740
|
179
|
100
|
|
|
|
|
if (sCan) { |
|
741
|
|
|
|
|
|
|
STRLEN canret; |
|
742
|
24
|
|
|
|
|
|
canlen = (STRLEN)strlen((char *) sCan); |
|
743
|
24
|
|
|
|
|
|
uvLead = utf8n_to_uvchr(sCan, canlen, &canret, AllowAnyUTF); |
|
744
|
24
|
50
|
|
|
|
|
if (!canret) |
|
745
|
24
|
|
|
|
|
|
croak(ErrRetlenIsZero, "checkFCD or -FCC"); |
|
746
|
|
|
|
|
|
|
} |
|
747
|
|
|
|
|
|
|
else { |
|
748
|
155
|
|
|
|
|
|
uvLead = uv; |
|
749
|
|
|
|
|
|
|
} |
|
750
|
|
|
|
|
|
|
|
|
751
|
179
|
|
|
|
|
|
curCC = getCombinClass(uvLead); |
|
752
|
|
|
|
|
|
|
|
|
753
|
179
|
100
|
|
|
|
|
if (curCC != 0 && curCC < preCC) { /* canonical ordering violated */ |
|
|
|
100
|
|
|
|
|
|
|
754
|
18
|
|
|
|
|
|
result = FALSE; |
|
755
|
18
|
|
|
|
|
|
break; |
|
756
|
|
|
|
|
|
|
} |
|
757
|
|
|
|
|
|
|
|
|
758
|
161
|
100
|
|
|
|
|
if (ix) { |
|
759
|
21
|
50
|
|
|
|
|
if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv)) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
760
|
3
|
|
|
|
|
|
result = FALSE; |
|
761
|
3
|
|
|
|
|
|
break; |
|
762
|
|
|
|
|
|
|
} |
|
763
|
18
|
100
|
|
|
|
|
else if (isComp2nd(uv)) |
|
764
|
8
|
|
|
|
|
|
isMAYBE = TRUE; |
|
765
|
|
|
|
|
|
|
} |
|
766
|
|
|
|
|
|
|
|
|
767
|
158
|
100
|
|
|
|
|
if (sCan) { |
|
768
|
|
|
|
|
|
|
STRLEN canret; |
|
769
|
|
|
|
|
|
|
UV uvTrail; |
|
770
|
21
|
|
|
|
|
|
U8* eCan = sCan + canlen; |
|
771
|
21
|
|
|
|
|
|
U8* pCan = utf8_hop(eCan, -1); |
|
772
|
21
|
50
|
|
|
|
|
if (pCan < sCan) |
|
773
|
0
|
|
|
|
|
|
croak(ErrHopBeforeStart); |
|
774
|
21
|
|
|
|
|
|
uvTrail = utf8n_to_uvchr(pCan, eCan - pCan, &canret, AllowAnyUTF); |
|
775
|
21
|
50
|
|
|
|
|
if (!canret) |
|
776
|
0
|
|
|
|
|
|
croak(ErrRetlenIsZero, "checkFCD or -FCC"); |
|
777
|
21
|
|
|
|
|
|
preCC = getCombinClass(uvTrail); |
|
778
|
|
|
|
|
|
|
} |
|
779
|
|
|
|
|
|
|
else { |
|
780
|
137
|
|
|
|
|
|
preCC = curCC; |
|
781
|
|
|
|
|
|
|
} |
|
782
|
|
|
|
|
|
|
} |
|
783
|
67
|
100
|
|
|
|
|
if (isMAYBE && result) /* NO precedes MAYBE */ |
|
|
|
100
|
|
|
|
|
|
|
784
|
5
|
|
|
|
|
|
XSRETURN_UNDEF; |
|
785
|
62
|
100
|
|
|
|
|
RETVAL = boolSV(result); |
|
786
|
|
|
|
|
|
|
OUTPUT: |
|
787
|
|
|
|
|
|
|
RETVAL |
|
788
|
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
U8 |
|
791
|
|
|
|
|
|
|
getCombinClass(uv) |
|
792
|
|
|
|
|
|
|
UV uv |
|
793
|
|
|
|
|
|
|
PROTOTYPE: $ |
|
794
|
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
bool |
|
796
|
|
|
|
|
|
|
isExclusion(uv) |
|
797
|
|
|
|
|
|
|
UV uv |
|
798
|
|
|
|
|
|
|
PROTOTYPE: $ |
|
799
|
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
bool |
|
801
|
|
|
|
|
|
|
isSingleton(uv) |
|
802
|
|
|
|
|
|
|
UV uv |
|
803
|
|
|
|
|
|
|
PROTOTYPE: $ |
|
804
|
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
bool |
|
806
|
|
|
|
|
|
|
isNonStDecomp(uv) |
|
807
|
|
|
|
|
|
|
UV uv |
|
808
|
|
|
|
|
|
|
PROTOTYPE: $ |
|
809
|
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
bool |
|
811
|
|
|
|
|
|
|
isComp2nd(uv) |
|
812
|
|
|
|
|
|
|
UV uv |
|
813
|
|
|
|
|
|
|
PROTOTYPE: $ |
|
814
|
|
|
|
|
|
|
ALIAS: |
|
815
|
|
|
|
|
|
|
isNFC_MAYBE = 1 |
|
816
|
|
|
|
|
|
|
isNFKC_MAYBE = 2 |
|
817
|
|
|
|
|
|
|
INIT: |
|
818
|
|
|
|
|
|
|
PERL_UNUSED_VAR(ix); |
|
819
|
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
SV* |
|
821
|
|
|
|
|
|
|
isNFD_NO(uv) |
|
822
|
|
|
|
|
|
|
UV uv |
|
823
|
|
|
|
|
|
|
PROTOTYPE: $ |
|
824
|
|
|
|
|
|
|
ALIAS: |
|
825
|
|
|
|
|
|
|
isNFKD_NO = 1 |
|
826
|
|
|
|
|
|
|
PREINIT: |
|
827
|
44
|
|
|
|
|
|
bool result = FALSE; |
|
828
|
|
|
|
|
|
|
CODE: |
|
829
|
44
|
100
|
|
|
|
|
if (Hangul_IsS(uv) || (ix ? dec_compat(uv) : dec_canonical(uv))) |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
830
|
28
|
|
|
|
|
|
result = TRUE; /* NFD_NO or NFKD_NO */ |
|
831
|
44
|
100
|
|
|
|
|
RETVAL = boolSV(result); |
|
832
|
|
|
|
|
|
|
OUTPUT: |
|
833
|
|
|
|
|
|
|
RETVAL |
|
834
|
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
SV* |
|
837
|
|
|
|
|
|
|
isComp_Ex(uv) |
|
838
|
|
|
|
|
|
|
UV uv |
|
839
|
|
|
|
|
|
|
PROTOTYPE: $ |
|
840
|
|
|
|
|
|
|
ALIAS: |
|
841
|
|
|
|
|
|
|
isNFC_NO = 0 |
|
842
|
|
|
|
|
|
|
isNFKC_NO = 1 |
|
843
|
|
|
|
|
|
|
PREINIT: |
|
844
|
66
|
|
|
|
|
|
bool result = FALSE; |
|
845
|
|
|
|
|
|
|
CODE: |
|
846
|
66
|
100
|
|
|
|
|
if (isExclusion(uv) || isSingleton(uv) || isNonStDecomp(uv)) |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
847
|
33
|
|
|
|
|
|
result = TRUE; /* NFC_NO or NFKC_NO */ |
|
848
|
33
|
100
|
|
|
|
|
else if (ix) { |
|
849
|
|
|
|
|
|
|
char *canon, *compat; |
|
850
|
11
|
|
|
|
|
|
canon = (char *) dec_canonical(uv); |
|
851
|
11
|
|
|
|
|
|
compat = (char *) dec_compat(uv); |
|
852
|
11
|
100
|
|
|
|
|
if (compat && (!canon || strNE(canon, compat))) |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
853
|
2
|
|
|
|
|
|
result = TRUE; /* NFC_NO or NFKC_NO */ |
|
854
|
|
|
|
|
|
|
} |
|
855
|
66
|
100
|
|
|
|
|
RETVAL = boolSV(result); |
|
856
|
|
|
|
|
|
|
OUTPUT: |
|
857
|
|
|
|
|
|
|
RETVAL |
|
858
|
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
SV* |
|
860
|
|
|
|
|
|
|
getComposite(uv, uv2) |
|
861
|
|
|
|
|
|
|
UV uv |
|
862
|
|
|
|
|
|
|
UV uv2 |
|
863
|
|
|
|
|
|
|
PROTOTYPE: $$ |
|
864
|
|
|
|
|
|
|
PREINIT: |
|
865
|
|
|
|
|
|
|
UV composite; |
|
866
|
|
|
|
|
|
|
CODE: |
|
867
|
21
|
|
|
|
|
|
composite = composite_uv(uv, uv2); |
|
868
|
21
|
100
|
|
|
|
|
RETVAL = composite ? newSVuv(composite) : &PL_sv_undef; |
|
869
|
|
|
|
|
|
|
OUTPUT: |
|
870
|
|
|
|
|
|
|
RETVAL |
|
871
|
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
SV* |
|
875
|
|
|
|
|
|
|
getCanon(uv) |
|
876
|
|
|
|
|
|
|
UV uv |
|
877
|
|
|
|
|
|
|
PROTOTYPE: $ |
|
878
|
|
|
|
|
|
|
ALIAS: |
|
879
|
|
|
|
|
|
|
getCompat = 1 |
|
880
|
|
|
|
|
|
|
CODE: |
|
881
|
46
|
100
|
|
|
|
|
if (Hangul_IsS(uv)) { |
|
|
|
100
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
U8 tmp[3 * UTF8_MAXLEN + 1]; |
|
883
|
8
|
|
|
|
|
|
U8 *t = tmp; |
|
884
|
8
|
|
|
|
|
|
U8 *e = pv_cat_decompHangul(aTHX_ t, uv); |
|
885
|
8
|
|
|
|
|
|
RETVAL = newSVpvn((char *)t, e - t); |
|
886
|
|
|
|
|
|
|
} else { |
|
887
|
30
|
100
|
|
|
|
|
U8* rstr = ix ? dec_compat(uv) : dec_canonical(uv); |
|
888
|
30
|
100
|
|
|
|
|
if (!rstr) |
|
889
|
8
|
|
|
|
|
|
XSRETURN_UNDEF; |
|
890
|
22
|
|
|
|
|
|
RETVAL = newSVpvn((char *)rstr, strlen((char *)rstr)); |
|
891
|
|
|
|
|
|
|
} |
|
892
|
30
|
|
|
|
|
|
SvUTF8_on(RETVAL); |
|
893
|
|
|
|
|
|
|
OUTPUT: |
|
894
|
|
|
|
|
|
|
RETVAL |
|
895
|
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
void |
|
898
|
|
|
|
|
|
|
splitOnLastStarter(src) |
|
899
|
|
|
|
|
|
|
SV * src |
|
900
|
|
|
|
|
|
|
PREINIT: |
|
901
|
|
|
|
|
|
|
SV *svp; |
|
902
|
|
|
|
|
|
|
STRLEN srclen; |
|
903
|
|
|
|
|
|
|
U8 *s, *e, *p; |
|
904
|
|
|
|
|
|
|
PPCODE: |
|
905
|
76
|
|
|
|
|
|
s = (U8*)sv_2pvunicode(aTHX_ src,&srclen); |
|
906
|
76
|
|
|
|
|
|
e = s + srclen; |
|
907
|
76
|
|
|
|
|
|
p = e; |
|
908
|
135
|
100
|
|
|
|
|
while (s < p) { |
|
909
|
|
|
|
|
|
|
UV uv; |
|
910
|
133
|
|
|
|
|
|
p = utf8_hop(p, -1); |
|
911
|
133
|
50
|
|
|
|
|
if (p < s) |
|
912
|
0
|
|
|
|
|
|
croak(ErrHopBeforeStart); |
|
913
|
133
|
|
|
|
|
|
uv = utf8n_to_uvchr(p, e - p, NULL, AllowAnyUTF); |
|
914
|
133
|
100
|
|
|
|
|
if (getCombinClass(uv) == 0) /* Last Starter found */ |
|
915
|
74
|
|
|
|
|
|
break; |
|
916
|
|
|
|
|
|
|
} |
|
917
|
|
|
|
|
|
|
|
|
918
|
76
|
|
|
|
|
|
svp = sv_2mortal(newSVpvn((char*)s, p - s)); |
|
919
|
76
|
|
|
|
|
|
SvUTF8_on(svp); |
|
920
|
76
|
50
|
|
|
|
|
XPUSHs(svp); |
|
921
|
|
|
|
|
|
|
|
|
922
|
76
|
|
|
|
|
|
svp = sv_2mortal(newSVpvn((char*)p, e - p)); |
|
923
|
76
|
|
|
|
|
|
SvUTF8_on(svp); |
|
924
|
76
|
50
|
|
|
|
|
XPUSHs(svp); |
|
925
|
|
|
|
|
|
|
|