line |
stmt |
bran |
cond |
sub |
time |
code |
1
|
|
|
|
|
|
/* |
2
|
|
|
|
|
|
$Id: Unicode.xs,v 2.10 2013/04/26 18:30:46 dankogai Exp $ |
3
|
|
|
|
|
|
*/ |
4
|
|
|
|
|
|
|
5
|
|
|
|
|
|
#define PERL_NO_GET_CONTEXT |
6
|
|
|
|
|
|
#include "EXTERN.h" |
7
|
|
|
|
|
|
#include "perl.h" |
8
|
|
|
|
|
|
#include "XSUB.h" |
9
|
|
|
|
|
|
#define U8 U8 |
10
|
|
|
|
|
|
#include "../Encode/encode.h" |
11
|
|
|
|
|
|
|
12
|
|
|
|
|
|
#define FBCHAR 0xFFFd |
13
|
|
|
|
|
|
#define BOM_BE 0xFeFF |
14
|
|
|
|
|
|
#define BOM16LE 0xFFFe |
15
|
|
|
|
|
|
#define BOM32LE 0xFFFe0000 |
16
|
|
|
|
|
|
#define issurrogate(x) (0xD800 <= (x) && (x) <= 0xDFFF ) |
17
|
|
|
|
|
|
#define isHiSurrogate(x) (0xD800 <= (x) && (x) < 0xDC00 ) |
18
|
|
|
|
|
|
#define isLoSurrogate(x) (0xDC00 <= (x) && (x) <= 0xDFFF ) |
19
|
|
|
|
|
|
#define invalid_ucs2(x) ( issurrogate(x) || 0xFFFF < (x) ) |
20
|
|
|
|
|
|
|
21
|
|
|
|
|
|
/* For pre-5.14 source compatibility */ |
22
|
|
|
|
|
|
#ifndef UNICODE_WARN_ILLEGAL_INTERCHANGE |
23
|
|
|
|
|
|
# define UNICODE_WARN_ILLEGAL_INTERCHANGE 0 |
24
|
|
|
|
|
|
# define UTF8_DISALLOW_SURROGATE 0 |
25
|
|
|
|
|
|
# define UTF8_WARN_SURROGATE 0 |
26
|
|
|
|
|
|
# define UTF8_DISALLOW_FE_FF 0 |
27
|
|
|
|
|
|
# define UTF8_WARN_FE_FF 0 |
28
|
|
|
|
|
|
# define UTF8_WARN_NONCHAR 0 |
29
|
|
|
|
|
|
#endif |
30
|
|
|
|
|
|
|
31
|
|
|
|
|
|
#define PERLIO_BUFSIZ 1024 /* XXX value comes from PerlIOEncode_get_base */ |
32
|
|
|
|
|
|
|
33
|
|
|
|
|
|
/* Avoid wasting too much space in the result buffer */ |
34
|
|
|
|
|
|
/* static void */ |
35
|
|
|
|
|
|
/* shrink_buffer(SV *result) */ |
36
|
|
|
|
|
|
/* { */ |
37
|
|
|
|
|
|
/* if (SvLEN(result) > 42 + SvCUR(result)) { */ |
38
|
|
|
|
|
|
/* char *buf; */ |
39
|
|
|
|
|
|
/* STRLEN len = 1 + SvCUR(result); /\* include the NUL byte *\/ */ |
40
|
|
|
|
|
|
/* New(0, buf, len, char); */ |
41
|
|
|
|
|
|
/* Copy(SvPVX(result), buf, len, char); */ |
42
|
|
|
|
|
|
/* Safefree(SvPVX(result)); */ |
43
|
|
|
|
|
|
/* SvPV_set(result, buf); */ |
44
|
|
|
|
|
|
/* SvLEN_set(result, len); */ |
45
|
|
|
|
|
|
/* } */ |
46
|
|
|
|
|
|
/* } */ |
47
|
|
|
|
|
|
|
48
|
|
|
|
|
|
#define shrink_buffer(result) { \ |
49
|
|
|
|
|
|
if (SvLEN(result) > 42 + SvCUR(result)) { \ |
50
|
|
|
|
|
|
char *newpv; \ |
51
|
|
|
|
|
|
STRLEN newlen = 1 + SvCUR(result); /* include the NUL byte */ \ |
52
|
|
|
|
|
|
New(0, newpv, newlen, char); \ |
53
|
|
|
|
|
|
Copy(SvPVX(result), newpv, newlen, char); \ |
54
|
|
|
|
|
|
Safefree(SvPVX(result)); \ |
55
|
|
|
|
|
|
SvPV_set(result, newpv); \ |
56
|
|
|
|
|
|
SvLEN_set(result, newlen); \ |
57
|
|
|
|
|
|
} \ |
58
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
60
|
|
|
|
|
|
static UV |
61
|
3276906
|
|
|
|
|
enc_unpack(pTHX_ U8 **sp, U8 *e, STRLEN size, U8 endian) |
62
|
|
|
|
|
|
{ |
63
|
3276906
|
|
|
|
|
U8 *s = *sp; |
64
|
|
|
|
|
|
UV v = 0; |
65
|
3276906
|
|
|
|
|
if (s+size > e) { |
66
|
0
|
|
|
|
|
croak("Partial character %c",(char) endian); |
67
|
|
|
|
|
|
} |
68
|
3276906
|
|
|
|
|
switch(endian) { |
69
|
|
|
|
|
|
case 'N': |
70
|
788628
|
|
|
|
|
v = *s++; |
71
|
788628
|
|
|
|
|
v = (v << 8) | *s++; |
72
|
|
|
|
|
|
case 'n': |
73
|
1699380
|
|
|
|
|
v = (v << 8) | *s++; |
74
|
1699380
|
|
|
|
|
v = (v << 8) | *s++; |
75
|
1699380
|
|
|
|
|
break; |
76
|
|
|
|
|
|
case 'V': |
77
|
|
|
|
|
|
case 'v': |
78
|
1577526
|
|
|
|
|
v |= *s++; |
79
|
1577526
|
|
|
|
|
v |= (*s++ << 8); |
80
|
1577526
|
|
|
|
|
if (endian == 'v') |
81
|
|
|
|
|
|
break; |
82
|
788620
|
|
|
|
|
v |= (*s++ << 16); |
83
|
788620
|
|
|
|
|
v |= (*s++ << 24); |
84
|
788620
|
|
|
|
|
break; |
85
|
|
|
|
|
|
default: |
86
|
0
|
|
|
|
|
croak("Unknown endian %c",(char) endian); |
87
|
|
|
|
|
|
break; |
88
|
|
|
|
|
|
} |
89
|
3276906
|
|
|
|
|
*sp = s; |
90
|
3276906
|
|
|
|
|
return v; |
91
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
93
|
|
|
|
|
|
void |
94
|
3354972
|
|
|
|
|
enc_pack(pTHX_ SV *result, STRLEN size, U8 endian, UV value) |
95
|
|
|
|
|
|
{ |
96
|
3354972
|
|
|
|
|
U8 *d = (U8 *) SvPV_nolen(result); |
97
|
|
|
|
|
|
|
98
|
3354972
|
|
|
|
|
switch(endian) { |
99
|
|
|
|
|
|
case 'v': |
100
|
|
|
|
|
|
case 'V': |
101
|
1578980
|
|
|
|
|
d += SvCUR(result); |
102
|
1578980
|
|
|
|
|
SvCUR_set(result,SvCUR(result)+size); |
103
|
7893300
|
|
|
|
|
while (size--) { |
104
|
4735340
|
|
|
|
|
*d++ = (U8)(value & 0xFF); |
105
|
4735340
|
|
|
|
|
value >>= 8; |
106
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
break; |
108
|
|
|
|
|
|
case 'n': |
109
|
|
|
|
|
|
case 'N': |
110
|
1775992
|
|
|
|
|
SvCUR_set(result,SvCUR(result)+size); |
111
|
1775992
|
|
|
|
|
d += SvCUR(result); |
112
|
8756528
|
|
|
|
|
while (size--) { |
113
|
5204544
|
|
|
|
|
*--d = (U8)(value & 0xFF); |
114
|
5204544
|
|
|
|
|
value >>= 8; |
115
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
break; |
117
|
|
|
|
|
|
default: |
118
|
0
|
|
|
|
|
croak("Unknown endian %c",(char) endian); |
119
|
|
|
|
|
|
break; |
120
|
|
|
|
|
|
} |
121
|
3354972
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
123
|
|
|
|
|
|
MODULE = Encode::Unicode PACKAGE = Encode::Unicode |
124
|
|
|
|
|
|
|
125
|
|
|
|
|
|
PROTOTYPES: DISABLE |
126
|
|
|
|
|
|
|
127
|
|
|
|
|
|
#define attr(k, l) (hv_exists((HV *)SvRV(obj),k,l) ? \ |
128
|
|
|
|
|
|
*hv_fetch((HV *)SvRV(obj),k,l,0) : &PL_sv_undef) |
129
|
|
|
|
|
|
|
130
|
|
|
|
|
|
void |
131
|
|
|
|
|
|
decode_xs(obj, str, check = 0) |
132
|
|
|
|
|
|
SV * obj |
133
|
|
|
|
|
|
SV * str |
134
|
|
|
|
|
|
IV check |
135
|
|
|
|
|
|
CODE: |
136
|
|
|
|
|
|
{ |
137
|
4510
|
|
|
|
|
U8 endian = *((U8 *)SvPV_nolen(attr("endian", 6))); |
138
|
4510
|
|
|
|
|
int size = SvIV(attr("size", 4)); |
139
|
|
|
|
|
|
int ucs2 = -1; /* only needed in the event of surrogate pairs */ |
140
|
4510
|
|
|
|
|
SV *result = newSVpvn("",0); |
141
|
4510
|
|
|
|
|
STRLEN usize = (size > 0 ? size : 1); /* protect against rogue size<=0 */ |
142
|
|
|
|
|
|
STRLEN ulen; |
143
|
|
|
|
|
|
STRLEN resultbuflen; |
144
|
|
|
|
|
|
U8 *resultbuf; |
145
|
4510
|
|
|
|
|
U8 *s = (U8 *)SvPVbyte(str,ulen); |
146
|
4510
|
|
|
|
|
U8 *e = (U8 *)SvEND(str); |
147
|
|
|
|
|
|
/* Optimise for the common case of being called from PerlIOEncode_fill() |
148
|
|
|
|
|
|
with a standard length buffer. In this case the result SV's buffer is |
149
|
|
|
|
|
|
only used temporarily, so we can afford to allocate the maximum needed |
150
|
|
|
|
|
|
and not care about unused space. */ |
151
|
4510
|
|
|
|
|
const bool temp_result = (ulen == PERLIO_BUFSIZ); |
152
|
|
|
|
|
|
|
153
|
4510
|
|
|
|
|
ST(0) = sv_2mortal(result); |
154
|
4510
|
|
|
|
|
SvUTF8_on(result); |
155
|
|
|
|
|
|
|
156
|
4510
|
|
|
|
|
if (!endian && s+size <= e) { |
157
|
|
|
|
|
|
UV bom; |
158
|
16
|
|
|
|
|
endian = (size == 4) ? 'N' : 'n'; |
159
|
16
|
|
|
|
|
bom = enc_unpack(aTHX_ &s,e,size,endian); |
160
|
16
|
|
|
|
|
if (bom != BOM_BE) { |
161
|
8
|
|
|
|
|
if (bom == BOM16LE) { |
162
|
4
|
|
|
|
|
endian = 'v'; |
163
|
|
|
|
|
|
} |
164
|
4
|
|
|
|
|
else if (bom == BOM32LE) { |
165
|
4
|
|
|
|
|
endian = 'V'; |
166
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
else { |
168
|
0
|
|
|
|
|
croak("%"SVf":Unrecognised BOM %"UVxf, |
169
|
0
|
|
|
|
|
*hv_fetch((HV *)SvRV(obj),"Name",4,0), |
170
|
|
|
|
|
|
bom); |
171
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
#if 1 |
174
|
|
|
|
|
|
/* Update endian for next sequence */ |
175
|
16
|
|
|
|
|
if (SvTRUE(attr("renewed", 7))) { |
176
|
8
|
|
|
|
|
hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0); |
177
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
#endif |
179
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
181
|
4510
|
|
|
|
|
if (temp_result) { |
182
|
216
|
|
|
|
|
resultbuflen = 1 + ulen/usize * UTF8_MAXLEN; |
183
|
|
|
|
|
|
} else { |
184
|
|
|
|
|
|
/* Preallocate the buffer to the minimum possible space required. */ |
185
|
4294
|
|
|
|
|
resultbuflen = ulen/usize + UTF8_MAXLEN + 1; |
186
|
|
|
|
|
|
} |
187
|
4510
|
|
|
|
|
resultbuf = (U8 *) SvGROW(result, resultbuflen); |
188
|
|
|
|
|
|
|
189
|
3285888
|
|
|
|
|
while (s < e && s+size <= e) { |
190
|
3276872
|
|
|
|
|
UV ord = enc_unpack(aTHX_ &s,e,size,endian); |
191
|
|
|
|
|
|
U8 *d; |
192
|
3276872
|
|
|
|
|
if (issurrogate(ord)) { |
193
|
58
|
|
|
|
|
if (ucs2 == -1) { |
194
|
52
|
|
|
|
|
ucs2 = SvTRUE(attr("ucs2", 4)); |
195
|
|
|
|
|
|
} |
196
|
58
|
|
|
|
|
if (ucs2 || size == 4) { |
197
|
28
|
|
|
|
|
if (check) { |
198
|
4
|
|
|
|
|
croak("%"SVf":no surrogates allowed %"UVxf, |
199
|
4
|
|
|
|
|
*hv_fetch((HV *)SvRV(obj),"Name",4,0), |
200
|
|
|
|
|
|
ord); |
201
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
ord = FBCHAR; |
203
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
else { |
205
|
|
|
|
|
|
UV lo; |
206
|
30
|
|
|
|
|
if (!isHiSurrogate(ord)) { |
207
|
4
|
|
|
|
|
if (check) { |
208
|
0
|
|
|
|
|
croak("%"SVf":Malformed HI surrogate %"UVxf, |
209
|
0
|
|
|
|
|
*hv_fetch((HV *)SvRV(obj),"Name",4,0), |
210
|
|
|
|
|
|
ord); |
211
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
else { |
213
|
|
|
|
|
|
ord = FBCHAR; |
214
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
} |
216
|
26
|
|
|
|
|
else if (s+size > e) { |
217
|
8
|
|
|
|
|
if (check) { |
218
|
0
|
|
|
|
|
if (check & ENCODE_STOP_AT_PARTIAL) { |
219
|
0
|
|
|
|
|
s -= size; |
220
|
0
|
|
|
|
|
break; |
221
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
else { |
223
|
0
|
|
|
|
|
croak("%"SVf":Malformed HI surrogate %"UVxf, |
224
|
0
|
|
|
|
|
*hv_fetch((HV *)SvRV(obj),"Name",4,0), |
225
|
|
|
|
|
|
ord); |
226
|
|
|
|
|
|
} |
227
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
else { |
229
|
|
|
|
|
|
ord = FBCHAR; |
230
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
else { |
233
|
18
|
|
|
|
|
lo = enc_unpack(aTHX_ &s,e,size,endian); |
234
|
18
|
|
|
|
|
if (!isLoSurrogate(lo)) { |
235
|
4
|
|
|
|
|
if (check) { |
236
|
0
|
|
|
|
|
croak("%"SVf":Malformed LO surrogate %"UVxf, |
237
|
0
|
|
|
|
|
*hv_fetch((HV *)SvRV(obj),"Name",4,0), |
238
|
|
|
|
|
|
ord); |
239
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
else { |
241
|
4
|
|
|
|
|
s -= size; |
242
|
|
|
|
|
|
ord = FBCHAR; |
243
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
else { |
246
|
14
|
|
|
|
|
ord = 0x10000 + ((ord - 0xD800) << 10) + (lo - 0xDC00); |
247
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
252
|
3276868
|
|
|
|
|
if ((ord & 0xFFFE) == 0xFFFE || (ord >= 0xFDD0 && ord <= 0xFDEF)) { |
253
|
0
|
|
|
|
|
if (check) { |
254
|
0
|
|
|
|
|
croak("%"SVf":Unicode character %"UVxf" is illegal", |
255
|
0
|
|
|
|
|
*hv_fetch((HV *)SvRV(obj),"Name",4,0), |
256
|
|
|
|
|
|
ord); |
257
|
|
|
|
|
|
} else { |
258
|
|
|
|
|
|
ord = FBCHAR; |
259
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
} |
261
|
|
|
|
|
|
|
262
|
3276868
|
|
|
|
|
if (resultbuflen < SvCUR(result) + UTF8_MAXLEN + 1) { |
263
|
|
|
|
|
|
/* Do not allocate >8Mb more than the minimum needed. |
264
|
|
|
|
|
|
This prevents allocating too much in the rogue case of a large |
265
|
|
|
|
|
|
input consisting initially of long sequence uft8-byte unicode |
266
|
|
|
|
|
|
chars followed by single utf8-byte chars. */ |
267
|
|
|
|
|
|
/* +1 |
268
|
|
|
|
|
|
fixes Unicode.xs!decode_xs n-byte heap-overflow |
269
|
|
|
|
|
|
*/ |
270
|
4220
|
|
|
|
|
STRLEN remaining = (e - s)/usize + 1; /* +1 to avoid the leak */ |
271
|
4220
|
|
|
|
|
STRLEN max_alloc = remaining + (8*1024*1024); |
272
|
4220
|
|
|
|
|
STRLEN est_alloc = remaining * UTF8_MAXLEN; |
273
|
8440
|
|
|
|
|
STRLEN newlen = SvLEN(result) + /* min(max_alloc, est_alloc) */ |
274
|
4220
|
|
|
|
|
(est_alloc > max_alloc ? max_alloc : est_alloc); |
275
|
4220
|
|
|
|
|
resultbuf = (U8 *) SvGROW(result, newlen); |
276
|
4220
|
|
|
|
|
resultbuflen = SvLEN(result); |
277
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
279
|
3276868
|
|
|
|
|
d = uvuni_to_utf8_flags(resultbuf+SvCUR(result), ord, |
280
|
|
|
|
|
|
UNICODE_WARN_ILLEGAL_INTERCHANGE); |
281
|
3276868
|
|
|
|
|
SvCUR_set(result, d - (U8 *)SvPVX(result)); |
282
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
284
|
4506
|
|
|
|
|
if (s < e) { |
285
|
|
|
|
|
|
/* unlikely to happen because it's fixed-length -- dankogai */ |
286
|
0
|
|
|
|
|
if (check & ENCODE_WARN_ON_ERR) { |
287
|
0
|
|
|
|
|
Perl_warner(aTHX_ packWARN(WARN_UTF8),"%"SVf":Partial character", |
288
|
0
|
|
|
|
|
*hv_fetch((HV *)SvRV(obj),"Name",4,0)); |
289
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
} |
291
|
4506
|
|
|
|
|
if (check && !(check & ENCODE_LEAVE_SRC)) { |
292
|
238
|
|
|
|
|
if (s < e) { |
293
|
0
|
|
|
|
|
Move(s,SvPVX(str),e-s,U8); |
294
|
0
|
|
|
|
|
SvCUR_set(str,(e-s)); |
295
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
else { |
297
|
238
|
|
|
|
|
SvCUR_set(str,0); |
298
|
|
|
|
|
|
} |
299
|
238
|
|
|
|
|
*SvEND(str) = '\0'; |
300
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
302
|
8620
|
|
|
|
|
if (!temp_result) shrink_buffer(result); |
303
|
4506
|
|
|
|
|
if (SvTAINTED(str)) SvTAINTED_on(result); /* propagate taintedness */ |
304
|
4506
|
|
|
|
|
XSRETURN(1); |
305
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
307
|
|
|
|
|
|
void |
308
|
|
|
|
|
|
encode_xs(obj, utf8, check = 0) |
309
|
|
|
|
|
|
SV * obj |
310
|
|
|
|
|
|
SV * utf8 |
311
|
|
|
|
|
|
IV check |
312
|
|
|
|
|
|
CODE: |
313
|
|
|
|
|
|
{ |
314
|
4832
|
|
|
|
|
U8 endian = *((U8 *)SvPV_nolen(attr("endian", 6))); |
315
|
4832
|
|
|
|
|
const int size = SvIV(attr("size", 4)); |
316
|
|
|
|
|
|
int ucs2 = -1; /* only needed if there is invalid_ucs2 input */ |
317
|
4832
|
|
|
|
|
const STRLEN usize = (size > 0 ? size : 1); |
318
|
4832
|
|
|
|
|
SV *result = newSVpvn("", 0); |
319
|
|
|
|
|
|
STRLEN ulen; |
320
|
4832
|
|
|
|
|
U8 *s = (U8 *) SvPVutf8(utf8, ulen); |
321
|
4832
|
|
|
|
|
const U8 *e = (U8 *) SvEND(utf8); |
322
|
|
|
|
|
|
/* Optimise for the common case of being called from PerlIOEncode_flush() |
323
|
|
|
|
|
|
with a standard length buffer. In this case the result SV's buffer is |
324
|
|
|
|
|
|
only used temporarily, so we can afford to allocate the maximum needed |
325
|
|
|
|
|
|
and not care about unused space. */ |
326
|
4832
|
|
|
|
|
const bool temp_result = (ulen == PERLIO_BUFSIZ); |
327
|
|
|
|
|
|
|
328
|
4832
|
|
|
|
|
ST(0) = sv_2mortal(result); |
329
|
|
|
|
|
|
|
330
|
|
|
|
|
|
/* Preallocate the result buffer to the maximum possible size. |
331
|
|
|
|
|
|
ie. assume each UTF8 byte is 1 character. |
332
|
|
|
|
|
|
Then shrink the result's buffer if necesary at the end. */ |
333
|
4832
|
|
|
|
|
SvGROW(result, ((ulen+1) * usize)); |
334
|
|
|
|
|
|
|
335
|
4832
|
|
|
|
|
if (!endian) { |
336
|
12
|
|
|
|
|
endian = (size == 4) ? 'N' : 'n'; |
337
|
12
|
|
|
|
|
enc_pack(aTHX_ result,size,endian,BOM_BE); |
338
|
|
|
|
|
|
#if 1 |
339
|
|
|
|
|
|
/* Update endian for next sequence */ |
340
|
12
|
|
|
|
|
if (SvTRUE(attr("renewed", 7))) { |
341
|
4
|
|
|
|
|
hv_store((HV *)SvRV(obj),"endian",6,newSVpv((char *)&endian,1),0); |
342
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
#endif |
344
|
|
|
|
|
|
} |
345
|
3359636
|
|
|
|
|
while (s < e && s+UTF8SKIP(s) <= e) { |
346
|
|
|
|
|
|
STRLEN len; |
347
|
3354808
|
|
|
|
|
UV ord = utf8n_to_uvuni(s, e-s, &len, (UTF8_DISALLOW_SURROGATE |
348
|
|
|
|
|
|
|UTF8_WARN_SURROGATE |
349
|
|
|
|
|
|
|UTF8_DISALLOW_FE_FF |
350
|
|
|
|
|
|
|UTF8_WARN_FE_FF |
351
|
|
|
|
|
|
|UTF8_WARN_NONCHAR)); |
352
|
3354808
|
|
|
|
|
s += len; |
353
|
3354808
|
|
|
|
|
if (size != 4 && invalid_ucs2(ord)) { |
354
|
168
|
|
|
|
|
if (!issurrogate(ord)) { |
355
|
168
|
|
|
|
|
if (ucs2 == -1) { |
356
|
166
|
|
|
|
|
ucs2 = SvTRUE(attr("ucs2", 4)); |
357
|
|
|
|
|
|
} |
358
|
168
|
|
|
|
|
if (ucs2 || ord > 0x10FFFF) { |
359
|
12
|
|
|
|
|
if (check) { |
360
|
4
|
|
|
|
|
croak("%"SVf":code point \"\\x{%"UVxf"}\" too high", |
361
|
4
|
|
|
|
|
*hv_fetch((HV *)SvRV(obj),"Name",4,0),ord); |
362
|
|
|
|
|
|
} |
363
|
8
|
|
|
|
|
enc_pack(aTHX_ result,size,endian,FBCHAR); |
364
|
|
|
|
|
|
} else { |
365
|
156
|
|
|
|
|
UV hi = ((ord - 0x10000) >> 10) + 0xD800; |
366
|
156
|
|
|
|
|
UV lo = ((ord - 0x10000) & 0x3FF) + 0xDC00; |
367
|
156
|
|
|
|
|
enc_pack(aTHX_ result,size,endian,hi); |
368
|
156
|
|
|
|
|
enc_pack(aTHX_ result,size,endian,lo); |
369
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
} |
371
|
|
|
|
|
|
else { |
372
|
|
|
|
|
|
/* not supposed to happen */ |
373
|
0
|
|
|
|
|
enc_pack(aTHX_ result,size,endian,FBCHAR); |
374
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
} |
376
|
|
|
|
|
|
else { |
377
|
3354640
|
|
|
|
|
enc_pack(aTHX_ result,size,endian,ord); |
378
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
} |
380
|
4828
|
|
|
|
|
if (s < e) { |
381
|
|
|
|
|
|
/* UTF-8 partial char happens often on PerlIO. |
382
|
|
|
|
|
|
Since this is okay and normal, we do not warn. |
383
|
|
|
|
|
|
But this is critical when you choose to LEAVE_SRC |
384
|
|
|
|
|
|
in which case we die */ |
385
|
64
|
|
|
|
|
if (check & (ENCODE_DIE_ON_ERR|ENCODE_LEAVE_SRC)) { |
386
|
0
|
|
|
|
|
Perl_croak(aTHX_ "%"SVf":partial character is not allowed " |
387
|
|
|
|
|
|
"when CHECK = 0x%" UVuf, |
388
|
0
|
|
|
|
|
*hv_fetch((HV *)SvRV(obj),"Name",4,0), check); |
389
|
|
|
|
|
|
} |
390
|
|
|
|
|
|
} |
391
|
4828
|
|
|
|
|
if (check && !(check & ENCODE_LEAVE_SRC)) { |
392
|
96
|
|
|
|
|
if (s < e) { |
393
|
64
|
|
|
|
|
Move(s,SvPVX(utf8),e-s,U8); |
394
|
64
|
|
|
|
|
SvCUR_set(utf8,(e-s)); |
395
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
else { |
397
|
32
|
|
|
|
|
SvCUR_set(utf8,0); |
398
|
|
|
|
|
|
} |
399
|
96
|
|
|
|
|
*SvEND(utf8) = '\0'; |
400
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
402
|
8814
|
|
|
|
|
if (!temp_result) shrink_buffer(result); |
403
|
4828
|
|
|
|
|
if (SvTAINTED(utf8)) SvTAINTED_on(result); /* propagate taintedness */ |
404
|
|
|
|
|
|
|
405
|
4828
|
|
|
|
|
SvSETMAGIC(utf8); |
406
|
|
|
|
|
|
|
407
|
4828
|
|
|
|
|
XSRETURN(1); |
408
|
|
|
|
|
|
} |