line |
stmt |
bran |
cond |
sub |
time |
code |
1
|
|
|
|
|
|
/* |
2
|
|
|
|
|
|
$Id: Encode.xs,v 2.24 2013/08/29 16:47:39 dankogai Exp dankogai $ |
3
|
|
|
|
|
|
*/ |
4
|
|
|
|
|
|
|
5
|
|
|
|
|
|
#define PERL_NO_GET_CONTEXT |
6
|
|
|
|
|
|
#include "EXTERN.h" |
7
|
|
|
|
|
|
#include "perl.h" |
8
|
|
|
|
|
|
#include "XSUB.h" |
9
|
|
|
|
|
|
#define U8 U8 |
10
|
|
|
|
|
|
#include "encode.h" |
11
|
|
|
|
|
|
|
12
|
|
|
|
|
|
# define PERLIO_MODNAME "PerlIO::encoding" |
13
|
|
|
|
|
|
# define PERLIO_FILENAME "PerlIO/encoding.pm" |
14
|
|
|
|
|
|
|
15
|
|
|
|
|
|
/* set 1 or more to profile. t/encoding.t dumps core because of |
16
|
|
|
|
|
|
Perl_warner and PerlIO don't work well */ |
17
|
|
|
|
|
|
#define ENCODE_XS_PROFILE 0 |
18
|
|
|
|
|
|
|
19
|
|
|
|
|
|
/* set 0 to disable floating point to calculate buffer size for |
20
|
|
|
|
|
|
encode_method(). 1 is recommended. 2 restores NI-S original */ |
21
|
|
|
|
|
|
#define ENCODE_XS_USEFP 1 |
22
|
|
|
|
|
|
|
23
|
|
|
|
|
|
#define UNIMPLEMENTED(x,y) y x (SV *sv, char *encoding) {dTHX; \ |
24
|
|
|
|
|
|
Perl_croak(aTHX_ "panic_unimplemented"); \ |
25
|
|
|
|
|
|
return (y)0; /* fool picky compilers */ \ |
26
|
|
|
|
|
|
} |
27
|
|
|
|
|
|
/**/ |
28
|
|
|
|
|
|
|
29
|
0
|
|
|
|
|
UNIMPLEMENTED(_encoded_utf8_to_bytes, I32) |
30
|
0
|
|
|
|
|
UNIMPLEMENTED(_encoded_bytes_to_utf8, I32) |
31
|
|
|
|
|
|
|
32
|
|
|
|
|
|
#ifdef UTF8_DISALLOW_ILLEGAL_INTERCHANGE |
33
|
|
|
|
|
|
# define UTF8_ALLOW_STRICT UTF8_DISALLOW_ILLEGAL_INTERCHANGE |
34
|
|
|
|
|
|
#else |
35
|
|
|
|
|
|
# define UTF8_ALLOW_STRICT 0 |
36
|
|
|
|
|
|
#endif |
37
|
|
|
|
|
|
|
38
|
|
|
|
|
|
#define UTF8_ALLOW_NONSTRICT (UTF8_ALLOW_ANY & \ |
39
|
|
|
|
|
|
~(UTF8_ALLOW_CONTINUATION | \ |
40
|
|
|
|
|
|
UTF8_ALLOW_NON_CONTINUATION | \ |
41
|
|
|
|
|
|
UTF8_ALLOW_LONG)) |
42
|
|
|
|
|
|
|
43
|
|
|
|
|
|
void |
44
|
1752
|
|
|
|
|
Encode_XSEncoding(pTHX_ encode_t * enc) |
45
|
|
|
|
|
|
{ |
46
|
1752
|
|
|
|
|
dSP; |
47
|
1752
|
|
|
|
|
HV *stash = gv_stashpv("Encode::XS", TRUE); |
48
|
1752
|
|
|
|
|
SV *sv = sv_bless(newRV_noinc(newSViv(PTR2IV(enc))), stash); |
49
|
|
|
|
|
|
int i = 0; |
50
|
1752
|
|
|
|
|
PUSHMARK(sp); |
51
|
1752
|
|
|
|
|
XPUSHs(sv); |
52
|
5256
|
|
|
|
|
while (enc->name[i]) { |
53
|
1752
|
|
|
|
|
const char *name = enc->name[i++]; |
54
|
1752
|
|
|
|
|
XPUSHs(sv_2mortal(newSVpvn(name, strlen(name)))); |
55
|
|
|
|
|
|
} |
56
|
1752
|
|
|
|
|
PUTBACK; |
57
|
1752
|
|
|
|
|
call_pv("Encode::define_encoding", G_DISCARD); |
58
|
1752
|
|
|
|
|
SvREFCNT_dec(sv); |
59
|
1752
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
61
|
|
|
|
|
|
void |
62
|
0
|
|
|
|
|
call_failure(SV * routine, U8 * done, U8 * dest, U8 * orig) |
63
|
|
|
|
|
|
{ |
64
|
|
|
|
|
|
/* Exists for breakpointing */ |
65
|
0
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
68
|
|
|
|
|
|
#define ERR_ENCODE_NOMAP "\"\\x{%04" UVxf "}\" does not map to %s" |
69
|
|
|
|
|
|
#define ERR_DECODE_NOMAP "%s \"\\x%02" UVXf "\" does not map to Unicode" |
70
|
|
|
|
|
|
|
71
|
|
|
|
|
|
static SV * |
72
|
524
|
|
|
|
|
do_fallback_cb(pTHX_ UV ch, SV *fallback_cb) |
73
|
|
|
|
|
|
{ |
74
|
524
|
|
|
|
|
dSP; |
75
|
|
|
|
|
|
int argc; |
76
|
524
|
|
|
|
|
SV *retval = newSVpv("",0); |
77
|
524
|
|
|
|
|
ENTER; |
78
|
524
|
|
|
|
|
SAVETMPS; |
79
|
524
|
|
|
|
|
PUSHMARK(sp); |
80
|
524
|
|
|
|
|
XPUSHs(sv_2mortal(newSVnv((UV)ch))); |
81
|
524
|
|
|
|
|
PUTBACK; |
82
|
524
|
|
|
|
|
argc = call_sv(fallback_cb, G_SCALAR); |
83
|
524
|
|
|
|
|
SPAGAIN; |
84
|
524
|
|
|
|
|
if (argc != 1){ |
85
|
0
|
|
|
|
|
croak("fallback sub must return scalar!"); |
86
|
|
|
|
|
|
} |
87
|
524
|
|
|
|
|
sv_catsv(retval, POPs); |
88
|
524
|
|
|
|
|
PUTBACK; |
89
|
524
|
|
|
|
|
FREETMPS; |
90
|
524
|
|
|
|
|
LEAVE; |
91
|
524
|
|
|
|
|
return retval; |
92
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
94
|
|
|
|
|
|
static SV * |
95
|
226120
|
|
|
|
|
encode_method(pTHX_ const encode_t * enc, const encpage_t * dir, SV * src, |
96
|
|
|
|
|
|
int check, STRLEN * offset, SV * term, int * retcode, |
97
|
|
|
|
|
|
SV *fallback_cb) |
98
|
|
|
|
|
|
{ |
99
|
|
|
|
|
|
STRLEN slen; |
100
|
226120
|
|
|
|
|
U8 *s = (U8 *) SvPV(src, slen); |
101
|
226120
|
|
|
|
|
STRLEN tlen = slen; |
102
|
|
|
|
|
|
STRLEN ddone = 0; |
103
|
|
|
|
|
|
STRLEN sdone = 0; |
104
|
|
|
|
|
|
/* We allocate slen+1. |
105
|
|
|
|
|
|
PerlIO dumps core if this value is smaller than this. */ |
106
|
226120
|
|
|
|
|
SV *dst = sv_2mortal(newSV(slen+1)); |
107
|
226120
|
|
|
|
|
U8 *d = (U8 *)SvPVX(dst); |
108
|
226120
|
|
|
|
|
STRLEN dlen = SvLEN(dst)-1; |
109
|
|
|
|
|
|
int code = 0; |
110
|
226120
|
|
|
|
|
STRLEN trmlen = 0; |
111
|
226120
|
|
|
|
|
U8 *trm = term ? (U8*) SvPV(term, trmlen) : NULL; |
112
|
|
|
|
|
|
|
113
|
226120
|
|
|
|
|
if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */ |
114
|
|
|
|
|
|
|
115
|
226120
|
|
|
|
|
if (offset) { |
116
|
1530
|
|
|
|
|
s += *offset; |
117
|
1530
|
|
|
|
|
if (slen > *offset){ /* safeguard against slen overflow */ |
118
|
1530
|
|
|
|
|
slen -= *offset; |
119
|
|
|
|
|
|
}else{ |
120
|
0
|
|
|
|
|
slen = 0; |
121
|
|
|
|
|
|
} |
122
|
1530
|
|
|
|
|
tlen = slen; |
123
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
125
|
226120
|
|
|
|
|
if (slen == 0){ |
126
|
12214
|
|
|
|
|
SvCUR_set(dst, 0); |
127
|
12214
|
|
|
|
|
SvPOK_only(dst); |
128
|
12214
|
|
|
|
|
goto ENCODE_END; |
129
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
131
|
219502
|
|
|
|
|
while( (code = do_encode(dir, s, &slen, d, dlen, &dlen, !check, |
132
|
|
|
|
|
|
trm, trmlen)) ) |
133
|
|
|
|
|
|
{ |
134
|
7510
|
|
|
|
|
SvCUR_set(dst, dlen+ddone); |
135
|
7510
|
|
|
|
|
SvPOK_only(dst); |
136
|
|
|
|
|
|
|
137
|
7510
|
|
|
|
|
if (code == ENCODE_FALLBACK || code == ENCODE_PARTIAL || |
138
|
|
|
|
|
|
code == ENCODE_FOUND_TERM) { |
139
|
|
|
|
|
|
break; |
140
|
|
|
|
|
|
} |
141
|
5686
|
|
|
|
|
switch (code) { |
142
|
|
|
|
|
|
case ENCODE_NOSPACE: |
143
|
|
|
|
|
|
{ |
144
|
|
|
|
|
|
STRLEN more = 0; /* make sure you initialize! */ |
145
|
|
|
|
|
|
STRLEN sleft; |
146
|
2928
|
|
|
|
|
sdone += slen; |
147
|
2928
|
|
|
|
|
ddone += dlen; |
148
|
2928
|
|
|
|
|
sleft = tlen - sdone; |
149
|
|
|
|
|
|
#if ENCODE_XS_PROFILE >= 2 |
150
|
|
|
|
|
|
Perl_warn(aTHX_ |
151
|
|
|
|
|
|
"more=%d, sdone=%d, sleft=%d, SvLEN(dst)=%d\n", |
152
|
|
|
|
|
|
more, sdone, sleft, SvLEN(dst)); |
153
|
|
|
|
|
|
#endif |
154
|
2928
|
|
|
|
|
if (sdone != 0) { /* has src ever been processed ? */ |
155
|
|
|
|
|
|
#if ENCODE_XS_USEFP == 2 |
156
|
|
|
|
|
|
more = (1.0*tlen*SvLEN(dst)+sdone-1)/sdone |
157
|
|
|
|
|
|
- SvLEN(dst); |
158
|
|
|
|
|
|
#elif ENCODE_XS_USEFP |
159
|
2928
|
|
|
|
|
more = (STRLEN)((1.0*SvLEN(dst)+1)/sdone * sleft); |
160
|
|
|
|
|
|
#else |
161
|
|
|
|
|
|
/* safe until SvLEN(dst) == MAX_INT/16 */ |
162
|
|
|
|
|
|
more = (16*SvLEN(dst)+1)/sdone/16 * sleft; |
163
|
|
|
|
|
|
#endif |
164
|
|
|
|
|
|
} |
165
|
2928
|
|
|
|
|
more += UTF8_MAXLEN; /* insurance policy */ |
166
|
2928
|
|
|
|
|
d = (U8 *) SvGROW(dst, SvLEN(dst) + more); |
167
|
|
|
|
|
|
/* dst need to grow need MORE bytes! */ |
168
|
2928
|
|
|
|
|
if (ddone >= SvLEN(dst)) { |
169
|
0
|
|
|
|
|
Perl_croak(aTHX_ "Destination couldn't be grown."); |
170
|
|
|
|
|
|
} |
171
|
2928
|
|
|
|
|
dlen = SvLEN(dst)-ddone-1; |
172
|
2928
|
|
|
|
|
d += ddone; |
173
|
2928
|
|
|
|
|
s += slen; |
174
|
2928
|
|
|
|
|
slen = tlen-sdone; |
175
|
2928
|
|
|
|
|
continue; |
176
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
case ENCODE_NOREP: |
178
|
|
|
|
|
|
/* encoding */ |
179
|
2758
|
|
|
|
|
if (dir == enc->f_utf8) { |
180
|
|
|
|
|
|
STRLEN clen; |
181
|
1560
|
|
|
|
|
UV ch = |
182
|
1560
|
|
|
|
|
utf8n_to_uvuni(s+slen, (SvCUR(src)-slen), |
183
|
|
|
|
|
|
&clen, UTF8_ALLOW_ANY|UTF8_CHECK_ONLY); |
184
|
|
|
|
|
|
/* if non-representable multibyte prefix at end of current buffer - break*/ |
185
|
1560
|
|
|
|
|
if (clen > tlen - sdone) break; |
186
|
1560
|
|
|
|
|
if (check & ENCODE_DIE_ON_ERR) { |
187
|
2
|
|
|
|
|
Perl_croak(aTHX_ ERR_ENCODE_NOMAP, |
188
|
|
|
|
|
|
(UV)ch, enc->name[0]); |
189
|
|
|
|
|
|
return &PL_sv_undef; /* never reaches but be safe */ |
190
|
|
|
|
|
|
} |
191
|
1558
|
|
|
|
|
if (check & ENCODE_WARN_ON_ERR){ |
192
|
260
|
|
|
|
|
Perl_warner(aTHX_ packWARN(WARN_UTF8), |
193
|
|
|
|
|
|
ERR_ENCODE_NOMAP, (UV)ch, enc->name[0]); |
194
|
|
|
|
|
|
} |
195
|
1558
|
|
|
|
|
if (check & ENCODE_RETURN_ON_ERR){ |
196
|
|
|
|
|
|
goto ENCODE_SET_SRC; |
197
|
|
|
|
|
|
} |
198
|
1554
|
|
|
|
|
if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ |
199
|
|
|
|
|
|
SV* subchar = |
200
|
|
|
|
|
|
(fallback_cb != &PL_sv_undef) |
201
|
|
|
|
|
|
? do_fallback_cb(aTHX_ ch, fallback_cb) |
202
|
1564
|
|
|
|
|
: newSVpvf(check & ENCODE_PERLQQ ? "\\x{%04"UVxf"}" : |
203
|
522
|
|
|
|
|
check & ENCODE_HTMLCREF ? "%" UVuf ";" : |
204
|
|
|
|
|
|
"%" UVxf ";", (UV)ch); |
205
|
1042
|
|
|
|
|
SvUTF8_off(subchar); /* make sure no decoded string gets in */ |
206
|
1042
|
|
|
|
|
sdone += slen + clen; |
207
|
1042
|
|
|
|
|
ddone += dlen + SvCUR(subchar); |
208
|
1042
|
|
|
|
|
sv_catsv(dst, subchar); |
209
|
1042
|
|
|
|
|
SvREFCNT_dec(subchar); |
210
|
|
|
|
|
|
} else { |
211
|
|
|
|
|
|
/* fallback char */ |
212
|
512
|
|
|
|
|
sdone += slen + clen; |
213
|
512
|
|
|
|
|
ddone += dlen + enc->replen; |
214
|
512
|
|
|
|
|
sv_catpvn(dst, (char*)enc->rep, enc->replen); |
215
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
/* decoding */ |
218
|
|
|
|
|
|
else { |
219
|
1198
|
|
|
|
|
if (check & ENCODE_DIE_ON_ERR){ |
220
|
6
|
|
|
|
|
Perl_croak(aTHX_ ERR_DECODE_NOMAP, |
221
|
6
|
|
|
|
|
enc->name[0], (UV)s[slen]); |
222
|
|
|
|
|
|
return &PL_sv_undef; /* never reaches but be safe */ |
223
|
|
|
|
|
|
} |
224
|
1192
|
|
|
|
|
if (check & ENCODE_WARN_ON_ERR){ |
225
|
0
|
|
|
|
|
Perl_warner( |
226
|
|
|
|
|
|
aTHX_ packWARN(WARN_UTF8), |
227
|
|
|
|
|
|
ERR_DECODE_NOMAP, |
228
|
0
|
|
|
|
|
enc->name[0], (UV)s[slen]); |
229
|
|
|
|
|
|
} |
230
|
1192
|
|
|
|
|
if (check & ENCODE_RETURN_ON_ERR){ |
231
|
|
|
|
|
|
goto ENCODE_SET_SRC; |
232
|
|
|
|
|
|
} |
233
|
1114
|
|
|
|
|
if (check & |
234
|
|
|
|
|
|
(ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ |
235
|
|
|
|
|
|
SV* subchar = |
236
|
|
|
|
|
|
(fallback_cb != &PL_sv_undef) |
237
|
260
|
|
|
|
|
? do_fallback_cb(aTHX_ (UV)s[slen], fallback_cb) |
238
|
1314
|
|
|
|
|
: newSVpvf("\\x%02" UVXf, (UV)s[slen]); |
239
|
1054
|
|
|
|
|
sdone += slen + 1; |
240
|
1054
|
|
|
|
|
ddone += dlen + SvCUR(subchar); |
241
|
1054
|
|
|
|
|
sv_catsv(dst, subchar); |
242
|
1054
|
|
|
|
|
SvREFCNT_dec(subchar); |
243
|
|
|
|
|
|
} else { |
244
|
60
|
|
|
|
|
sdone += slen + 1; |
245
|
60
|
|
|
|
|
ddone += dlen + strlen(FBCHAR_UTF8); |
246
|
60
|
|
|
|
|
sv_catpv(dst, FBCHAR_UTF8); |
247
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
/* settle variables when fallback */ |
250
|
2668
|
|
|
|
|
d = (U8 *)SvEND(dst); |
251
|
2668
|
|
|
|
|
dlen = SvLEN(dst) - ddone - 1; |
252
|
2668
|
|
|
|
|
s = (U8*)SvPVX(src) + sdone; |
253
|
2668
|
|
|
|
|
slen = tlen - sdone; |
254
|
2668
|
|
|
|
|
break; |
255
|
|
|
|
|
|
|
256
|
|
|
|
|
|
default: |
257
|
0
|
|
|
|
|
Perl_croak(aTHX_ "Unexpected code %d converting %s %s", |
258
|
0
|
|
|
|
|
code, (dir == enc->f_utf8) ? "to" : "from", |
259
|
|
|
|
|
|
enc->name[0]); |
260
|
|
|
|
|
|
return &PL_sv_undef; |
261
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
ENCODE_SET_SRC: |
264
|
213898
|
|
|
|
|
if (check && !(check & ENCODE_LEAVE_SRC)){ |
265
|
36794
|
|
|
|
|
sdone = SvCUR(src) - (slen+sdone); |
266
|
36794
|
|
|
|
|
if (sdone) { |
267
|
288
|
|
|
|
|
sv_setpvn(src, (char*)s+slen, sdone); |
268
|
|
|
|
|
|
} |
269
|
36794
|
|
|
|
|
SvCUR_set(src, sdone); |
270
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
/* warn("check = 0x%X, code = 0x%d\n", check, code); */ |
272
|
|
|
|
|
|
|
273
|
213898
|
|
|
|
|
SvCUR_set(dst, dlen+ddone); |
274
|
213898
|
|
|
|
|
SvPOK_only(dst); |
275
|
|
|
|
|
|
|
276
|
|
|
|
|
|
#if ENCODE_XS_PROFILE |
277
|
|
|
|
|
|
if (SvCUR(dst) > SvCUR(src)){ |
278
|
|
|
|
|
|
Perl_warn(aTHX_ |
279
|
|
|
|
|
|
"SvLEN(dst)=%d, SvCUR(dst)=%d. %d bytes unused(%f %%)\n", |
280
|
|
|
|
|
|
SvLEN(dst), SvCUR(dst), SvLEN(dst) - SvCUR(dst), |
281
|
|
|
|
|
|
(SvLEN(dst) - SvCUR(dst))*1.0/SvLEN(dst)*100.0); |
282
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
#endif |
284
|
|
|
|
|
|
|
285
|
213898
|
|
|
|
|
if (offset) |
286
|
1530
|
|
|
|
|
*offset += sdone + slen; |
287
|
|
|
|
|
|
|
288
|
|
|
|
|
|
ENCODE_END: |
289
|
226112
|
|
|
|
|
*SvEND(dst) = '\0'; |
290
|
226112
|
|
|
|
|
if (retcode) *retcode = code; |
291
|
|
|
|
|
|
return dst; |
292
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
294
|
|
|
|
|
|
static bool |
295
|
43202
|
|
|
|
|
strict_utf8(pTHX_ SV* sv) |
296
|
|
|
|
|
|
{ |
297
|
|
|
|
|
|
HV* hv; |
298
|
|
|
|
|
|
SV** svp; |
299
|
43202
|
|
|
|
|
sv = SvRV(sv); |
300
|
43202
|
|
|
|
|
if (!sv || SvTYPE(sv) != SVt_PVHV) |
301
|
|
|
|
|
|
return 0; |
302
|
|
|
|
|
|
hv = (HV*)sv; |
303
|
43202
|
|
|
|
|
svp = hv_fetch(hv, "strict_utf8", 11, 0); |
304
|
43202
|
|
|
|
|
if (!svp) |
305
|
|
|
|
|
|
return 0; |
306
|
820
|
|
|
|
|
return SvTRUE(*svp); |
307
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
309
|
|
|
|
|
|
static U8* |
310
|
43126
|
|
|
|
|
process_utf8(pTHX_ SV* dst, U8* s, U8* e, SV *check_sv, |
311
|
|
|
|
|
|
bool encode, bool strict, bool stop_at_partial) |
312
|
|
|
|
|
|
{ |
313
|
|
|
|
|
|
UV uv; |
314
|
|
|
|
|
|
STRLEN ulen; |
315
|
|
|
|
|
|
SV *fallback_cb; |
316
|
|
|
|
|
|
int check; |
317
|
|
|
|
|
|
|
318
|
43126
|
|
|
|
|
if (SvROK(check_sv)) { |
319
|
|
|
|
|
|
/* croak("UTF-8 decoder doesn't support callback CHECK"); */ |
320
|
|
|
|
|
|
fallback_cb = check_sv; |
321
|
|
|
|
|
|
check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as perlqq */ |
322
|
|
|
|
|
|
} |
323
|
|
|
|
|
|
else { |
324
|
|
|
|
|
|
fallback_cb = &PL_sv_undef; |
325
|
43122
|
|
|
|
|
check = SvIV(check_sv); |
326
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
328
|
43126
|
|
|
|
|
SvPOK_only(dst); |
329
|
43126
|
|
|
|
|
SvCUR_set(dst,0); |
330
|
|
|
|
|
|
|
331
|
387030
|
|
|
|
|
while (s < e) { |
332
|
300858
|
|
|
|
|
if (UTF8_IS_INVARIANT(*s)) { |
333
|
200734
|
|
|
|
|
sv_catpvn(dst, (char *)s, 1); |
334
|
200734
|
|
|
|
|
s++; |
335
|
200734
|
|
|
|
|
continue; |
336
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
338
|
100124
|
|
|
|
|
if (UTF8_IS_START(*s)) { |
339
|
99590
|
|
|
|
|
U8 skip = UTF8SKIP(s); |
340
|
99590
|
|
|
|
|
if ((s + skip) > e) { |
341
|
|
|
|
|
|
/* Partial character */ |
342
|
|
|
|
|
|
/* XXX could check that rest of bytes are UTF8_IS_CONTINUATION(ch) */ |
343
|
328
|
|
|
|
|
if (stop_at_partial || (check & ENCODE_STOP_AT_PARTIAL)) |
344
|
|
|
|
|
|
break; |
345
|
|
|
|
|
|
|
346
|
|
|
|
|
|
goto malformed_byte; |
347
|
|
|
|
|
|
} |
348
|
|
|
|
|
|
|
349
|
99262
|
|
|
|
|
uv = utf8n_to_uvuni(s, e - s, &ulen, |
350
|
|
|
|
|
|
UTF8_CHECK_ONLY | (strict ? UTF8_ALLOW_STRICT : |
351
|
|
|
|
|
|
UTF8_ALLOW_NONSTRICT) |
352
|
|
|
|
|
|
); |
353
|
|
|
|
|
|
#if 1 /* perl-5.8.6 and older do not check UTF8_ALLOW_LONG */ |
354
|
99262
|
|
|
|
|
if (strict && uv > PERL_UNICODE_MAX) |
355
|
0
|
|
|
|
|
ulen = (STRLEN) -1; |
356
|
|
|
|
|
|
#endif |
357
|
99262
|
|
|
|
|
if (ulen == -1) { |
358
|
272
|
|
|
|
|
if (strict) { |
359
|
34
|
|
|
|
|
uv = utf8n_to_uvuni(s, e - s, &ulen, |
360
|
|
|
|
|
|
UTF8_CHECK_ONLY | UTF8_ALLOW_NONSTRICT); |
361
|
34
|
|
|
|
|
if (ulen == -1) |
362
|
|
|
|
|
|
goto malformed_byte; |
363
|
|
|
|
|
|
goto malformed; |
364
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
goto malformed_byte; |
366
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
369
|
|
|
|
|
|
/* Whole char is good */ |
370
|
98990
|
|
|
|
|
sv_catpvn(dst,(char *)s,skip); |
371
|
98990
|
|
|
|
|
s += skip; |
372
|
98990
|
|
|
|
|
continue; |
373
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
375
|
|
|
|
|
|
/* If we get here there is something wrong with alleged UTF-8 */ |
376
|
|
|
|
|
|
malformed_byte: |
377
|
1094
|
|
|
|
|
uv = (UV)*s; |
378
|
1094
|
|
|
|
|
ulen = 1; |
379
|
|
|
|
|
|
|
380
|
|
|
|
|
|
malformed: |
381
|
1122
|
|
|
|
|
if (check & ENCODE_DIE_ON_ERR){ |
382
|
36
|
|
|
|
|
if (encode) |
383
|
26
|
|
|
|
|
Perl_croak(aTHX_ ERR_ENCODE_NOMAP, uv, "utf8"); |
384
|
|
|
|
|
|
else |
385
|
10
|
|
|
|
|
Perl_croak(aTHX_ ERR_DECODE_NOMAP, "utf8", uv); |
386
|
|
|
|
|
|
} |
387
|
1086
|
|
|
|
|
if (check & ENCODE_WARN_ON_ERR){ |
388
|
272
|
|
|
|
|
if (encode) |
389
|
0
|
|
|
|
|
Perl_warner(aTHX_ packWARN(WARN_UTF8), |
390
|
|
|
|
|
|
ERR_ENCODE_NOMAP, uv, "utf8"); |
391
|
|
|
|
|
|
else |
392
|
272
|
|
|
|
|
Perl_warner(aTHX_ packWARN(WARN_UTF8), |
393
|
|
|
|
|
|
ERR_DECODE_NOMAP, "utf8", uv); |
394
|
|
|
|
|
|
} |
395
|
1086
|
|
|
|
|
if (check & ENCODE_RETURN_ON_ERR) { |
396
|
|
|
|
|
|
break; |
397
|
|
|
|
|
|
} |
398
|
1054
|
|
|
|
|
if (check & (ENCODE_PERLQQ|ENCODE_HTMLCREF|ENCODE_XMLCREF)){ |
399
|
|
|
|
|
|
SV* subchar = |
400
|
|
|
|
|
|
(fallback_cb != &PL_sv_undef) |
401
|
|
|
|
|
|
? do_fallback_cb(aTHX_ uv, fallback_cb) |
402
|
32
|
|
|
|
|
: newSVpvf(check & ENCODE_PERLQQ |
403
|
14
|
|
|
|
|
? (ulen == 1 ? "\\x%02" UVXf : "\\x{%04" UVXf "}") |
404
|
14
|
|
|
|
|
: check & ENCODE_HTMLCREF ? "%" UVuf ";" |
405
|
|
|
|
|
|
: "%" UVxf ";", uv); |
406
|
18
|
|
|
|
|
if (encode){ |
407
|
0
|
|
|
|
|
SvUTF8_off(subchar); /* make sure no decoded string gets in */ |
408
|
|
|
|
|
|
} |
409
|
18
|
|
|
|
|
sv_catsv(dst, subchar); |
410
|
18
|
|
|
|
|
SvREFCNT_dec(subchar); |
411
|
|
|
|
|
|
} else { |
412
|
1036
|
|
|
|
|
sv_catpv(dst, FBCHAR_UTF8); |
413
|
|
|
|
|
|
} |
414
|
1054
|
|
|
|
|
s += ulen; |
415
|
|
|
|
|
|
} |
416
|
43090
|
|
|
|
|
*SvEND(dst) = '\0'; |
417
|
|
|
|
|
|
|
418
|
43090
|
|
|
|
|
return s; |
419
|
|
|
|
|
|
} |
420
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
422
|
|
|
|
|
|
MODULE = Encode PACKAGE = Encode::utf8 PREFIX = Method_ |
423
|
|
|
|
|
|
|
424
|
|
|
|
|
|
PROTOTYPES: DISABLE |
425
|
|
|
|
|
|
|
426
|
|
|
|
|
|
void |
427
|
|
|
|
|
|
Method_decode_xs(obj,src,check_sv = &PL_sv_no) |
428
|
|
|
|
|
|
SV * obj |
429
|
|
|
|
|
|
SV * src |
430
|
|
|
|
|
|
SV * check_sv |
431
|
|
|
|
|
|
PREINIT: |
432
|
|
|
|
|
|
STRLEN slen; |
433
|
|
|
|
|
|
U8 *s; |
434
|
|
|
|
|
|
U8 *e; |
435
|
|
|
|
|
|
SV *dst; |
436
|
|
|
|
|
|
bool renewed = 0; |
437
|
|
|
|
|
|
int check; |
438
|
|
|
|
|
|
CODE: |
439
|
|
|
|
|
|
{ |
440
|
42798
|
|
|
|
|
dSP; ENTER; SAVETMPS; |
441
|
42798
|
|
|
|
|
if (src == &PL_sv_undef || SvROK(src)) src = sv_2mortal(newSV(0)); |
442
|
42798
|
|
|
|
|
s = (U8 *) SvPV(src, slen); |
443
|
42798
|
|
|
|
|
e = (U8 *) SvEND(src); |
444
|
42798
|
|
|
|
|
check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv); |
445
|
|
|
|
|
|
/* |
446
|
|
|
|
|
|
* PerlIO check -- we assume the object is of PerlIO if renewed |
447
|
|
|
|
|
|
*/ |
448
|
42798
|
|
|
|
|
PUSHMARK(sp); |
449
|
42798
|
|
|
|
|
XPUSHs(obj); |
450
|
42798
|
|
|
|
|
PUTBACK; |
451
|
42798
|
|
|
|
|
if (call_method("renewed",G_SCALAR) == 1) { |
452
|
42798
|
|
|
|
|
SPAGAIN; |
453
|
42798
|
|
|
|
|
renewed = (bool)POPi; |
454
|
42798
|
|
|
|
|
PUTBACK; |
455
|
|
|
|
|
|
#if 0 |
456
|
|
|
|
|
|
fprintf(stderr, "renewed == %d\n", renewed); |
457
|
|
|
|
|
|
#endif |
458
|
|
|
|
|
|
} |
459
|
42798
|
|
|
|
|
FREETMPS; LEAVE; |
460
|
|
|
|
|
|
/* end PerlIO check */ |
461
|
|
|
|
|
|
|
462
|
42798
|
|
|
|
|
if (SvUTF8(src)) { |
463
|
10
|
|
|
|
|
s = utf8_to_bytes(s,&slen); |
464
|
10
|
|
|
|
|
if (s) { |
465
|
10
|
|
|
|
|
SvCUR_set(src,slen); |
466
|
10
|
|
|
|
|
SvUTF8_off(src); |
467
|
10
|
|
|
|
|
e = s+slen; |
468
|
|
|
|
|
|
} |
469
|
|
|
|
|
|
else { |
470
|
0
|
|
|
|
|
croak("Cannot decode string with wide characters"); |
471
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
} |
473
|
|
|
|
|
|
|
474
|
42798
|
|
|
|
|
dst = sv_2mortal(newSV(slen>0?slen:1)); /* newSV() abhors 0 -- inaba */ |
475
|
85596
|
|
|
|
|
s = process_utf8(aTHX_ dst, s, e, check_sv, 0, strict_utf8(aTHX_ obj), renewed); |
476
|
|
|
|
|
|
|
477
|
|
|
|
|
|
/* Clear out translated part of source unless asked not to */ |
478
|
42788
|
|
|
|
|
if (check && !(check & ENCODE_LEAVE_SRC)){ |
479
|
144
|
|
|
|
|
slen = e-s; |
480
|
144
|
|
|
|
|
if (slen) { |
481
|
44
|
|
|
|
|
sv_setpvn(src, (char*)s, slen); |
482
|
|
|
|
|
|
} |
483
|
144
|
|
|
|
|
SvCUR_set(src, slen); |
484
|
|
|
|
|
|
} |
485
|
42788
|
|
|
|
|
SvUTF8_on(dst); |
486
|
42788
|
|
|
|
|
if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */ |
487
|
42788
|
|
|
|
|
ST(0) = dst; |
488
|
42788
|
|
|
|
|
XSRETURN(1); |
489
|
|
|
|
|
|
} |
490
|
|
|
|
|
|
|
491
|
|
|
|
|
|
void |
492
|
|
|
|
|
|
Method_encode_xs(obj,src,check_sv = &PL_sv_no) |
493
|
|
|
|
|
|
SV * obj |
494
|
|
|
|
|
|
SV * src |
495
|
|
|
|
|
|
SV * check_sv |
496
|
|
|
|
|
|
PREINIT: |
497
|
|
|
|
|
|
STRLEN slen; |
498
|
|
|
|
|
|
U8 *s; |
499
|
|
|
|
|
|
U8 *e; |
500
|
|
|
|
|
|
SV *dst; |
501
|
|
|
|
|
|
bool renewed = 0; |
502
|
|
|
|
|
|
int check; |
503
|
|
|
|
|
|
CODE: |
504
|
|
|
|
|
|
{ |
505
|
570
|
|
|
|
|
check = SvROK(check_sv) ? ENCODE_PERLQQ|ENCODE_LEAVE_SRC : SvIV(check_sv); |
506
|
570
|
|
|
|
|
if (src == &PL_sv_undef || SvROK(src)) src = sv_2mortal(newSV(0)); |
507
|
570
|
|
|
|
|
s = (U8 *) SvPV(src, slen); |
508
|
570
|
|
|
|
|
e = (U8 *) SvEND(src); |
509
|
570
|
|
|
|
|
dst = sv_2mortal(newSV(slen>0?slen:1)); /* newSV() abhors 0 -- inaba */ |
510
|
570
|
|
|
|
|
if (SvUTF8(src)) { |
511
|
|
|
|
|
|
/* Already encoded */ |
512
|
404
|
|
|
|
|
if (strict_utf8(aTHX_ obj)) { |
513
|
328
|
|
|
|
|
s = process_utf8(aTHX_ dst, s, e, check_sv, 1, 1, 0); |
514
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
else { |
516
|
|
|
|
|
|
/* trust it and just copy the octets */ |
517
|
76
|
|
|
|
|
sv_setpvn(dst,(char *)s,(e-s)); |
518
|
|
|
|
|
|
s = e; |
519
|
|
|
|
|
|
} |
520
|
|
|
|
|
|
} |
521
|
|
|
|
|
|
else { |
522
|
|
|
|
|
|
/* Native bytes - can always encode */ |
523
|
166
|
|
|
|
|
U8 *d = (U8 *) SvGROW(dst, 2*slen+1); /* +1 or assertion will botch */ |
524
|
18996
|
|
|
|
|
while (s < e) { |
525
|
18664
|
|
|
|
|
UV uv = NATIVE_TO_UNI((UV) *s); |
526
|
18664
|
|
|
|
|
s++; /* Above expansion of NATIVE_TO_UNI() is safer this way. */ |
527
|
18664
|
|
|
|
|
if (UNI_IS_INVARIANT(uv)) |
528
|
18642
|
|
|
|
|
*d++ = (U8)UTF_TO_NATIVE(uv); |
529
|
|
|
|
|
|
else { |
530
|
22
|
|
|
|
|
*d++ = (U8)UTF8_EIGHT_BIT_HI(uv); |
531
|
22
|
|
|
|
|
*d++ = (U8)UTF8_EIGHT_BIT_LO(uv); |
532
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
} |
534
|
166
|
|
|
|
|
SvCUR_set(dst, d- (U8 *)SvPVX(dst)); |
535
|
166
|
|
|
|
|
*SvEND(dst) = '\0'; |
536
|
|
|
|
|
|
} |
537
|
|
|
|
|
|
|
538
|
|
|
|
|
|
/* Clear out translated part of source unless asked not to */ |
539
|
544
|
|
|
|
|
if (check && !(check & ENCODE_LEAVE_SRC)){ |
540
|
116
|
|
|
|
|
slen = e-s; |
541
|
116
|
|
|
|
|
if (slen) { |
542
|
0
|
|
|
|
|
sv_setpvn(src, (char*)s, slen); |
543
|
|
|
|
|
|
} |
544
|
116
|
|
|
|
|
SvCUR_set(src, slen); |
545
|
|
|
|
|
|
} |
546
|
544
|
|
|
|
|
SvPOK_only(dst); |
547
|
544
|
|
|
|
|
SvUTF8_off(dst); |
548
|
544
|
|
|
|
|
if (SvTAINTED(src)) SvTAINTED_on(dst); /* propagate taintedness */ |
549
|
544
|
|
|
|
|
ST(0) = dst; |
550
|
544
|
|
|
|
|
XSRETURN(1); |
551
|
|
|
|
|
|
} |
552
|
|
|
|
|
|
|
553
|
|
|
|
|
|
MODULE = Encode PACKAGE = Encode::XS PREFIX = Method_ |
554
|
|
|
|
|
|
|
555
|
|
|
|
|
|
PROTOTYPES: ENABLE |
556
|
|
|
|
|
|
|
557
|
|
|
|
|
|
void |
558
|
|
|
|
|
|
Method_renew(obj) |
559
|
|
|
|
|
|
SV * obj |
560
|
|
|
|
|
|
CODE: |
561
|
|
|
|
|
|
{ |
562
|
194
|
|
|
|
|
XSRETURN(1); |
563
|
|
|
|
|
|
} |
564
|
|
|
|
|
|
|
565
|
|
|
|
|
|
int |
566
|
|
|
|
|
|
Method_renewed(obj) |
567
|
|
|
|
|
|
SV * obj |
568
|
|
|
|
|
|
CODE: |
569
|
|
|
|
|
|
RETVAL = 0; |
570
|
|
|
|
|
|
OUTPUT: |
571
|
|
|
|
|
|
RETVAL |
572
|
|
|
|
|
|
|
573
|
|
|
|
|
|
void |
574
|
|
|
|
|
|
Method_name(obj) |
575
|
|
|
|
|
|
SV * obj |
576
|
|
|
|
|
|
CODE: |
577
|
|
|
|
|
|
{ |
578
|
1484
|
|
|
|
|
encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
579
|
1484
|
|
|
|
|
ST(0) = sv_2mortal(newSVpvn(enc->name[0],strlen(enc->name[0]))); |
580
|
1484
|
|
|
|
|
XSRETURN(1); |
581
|
|
|
|
|
|
} |
582
|
|
|
|
|
|
|
583
|
|
|
|
|
|
void |
584
|
|
|
|
|
|
Method_cat_decode(obj, dst, src, off, term, check_sv = &PL_sv_no) |
585
|
|
|
|
|
|
SV * obj |
586
|
|
|
|
|
|
SV * dst |
587
|
|
|
|
|
|
SV * src |
588
|
|
|
|
|
|
SV * off |
589
|
|
|
|
|
|
SV * term |
590
|
|
|
|
|
|
SV * check_sv |
591
|
|
|
|
|
|
CODE: |
592
|
|
|
|
|
|
{ |
593
|
|
|
|
|
|
int check; |
594
|
|
|
|
|
|
SV *fallback_cb = &PL_sv_undef; |
595
|
1530
|
|
|
|
|
encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
596
|
1530
|
|
|
|
|
STRLEN offset = (STRLEN)SvIV(off); |
597
|
1530
|
|
|
|
|
int code = 0; |
598
|
1530
|
|
|
|
|
if (SvUTF8(src)) { |
599
|
0
|
|
|
|
|
sv_utf8_downgrade(src, FALSE); |
600
|
|
|
|
|
|
} |
601
|
1530
|
|
|
|
|
if (SvROK(check_sv)){ |
602
|
|
|
|
|
|
fallback_cb = check_sv; |
603
|
|
|
|
|
|
check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ |
604
|
|
|
|
|
|
}else{ |
605
|
1530
|
|
|
|
|
check = SvIV(check_sv); |
606
|
|
|
|
|
|
} |
607
|
1530
|
|
|
|
|
sv_catsv(dst, encode_method(aTHX_ enc, enc->t_utf8, src, check, |
608
|
|
|
|
|
|
&offset, term, &code, fallback_cb)); |
609
|
1530
|
|
|
|
|
SvIV_set(off, (IV)offset); |
610
|
1530
|
|
|
|
|
if (code == ENCODE_FOUND_TERM) { |
611
|
1530
|
|
|
|
|
ST(0) = &PL_sv_yes; |
612
|
|
|
|
|
|
}else{ |
613
|
0
|
|
|
|
|
ST(0) = &PL_sv_no; |
614
|
|
|
|
|
|
} |
615
|
1530
|
|
|
|
|
XSRETURN(1); |
616
|
|
|
|
|
|
} |
617
|
|
|
|
|
|
|
618
|
|
|
|
|
|
void |
619
|
|
|
|
|
|
Method_decode(obj,src,check_sv = &PL_sv_no) |
620
|
|
|
|
|
|
SV * obj |
621
|
|
|
|
|
|
SV * src |
622
|
|
|
|
|
|
SV * check_sv |
623
|
|
|
|
|
|
CODE: |
624
|
|
|
|
|
|
{ |
625
|
|
|
|
|
|
int check; |
626
|
|
|
|
|
|
SV *fallback_cb = &PL_sv_undef; |
627
|
154418
|
|
|
|
|
encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
628
|
154418
|
|
|
|
|
if (SvUTF8(src)) { |
629
|
12
|
|
|
|
|
sv_utf8_downgrade(src, FALSE); |
630
|
|
|
|
|
|
} |
631
|
154418
|
|
|
|
|
if (SvROK(check_sv)){ |
632
|
|
|
|
|
|
fallback_cb = check_sv; |
633
|
|
|
|
|
|
check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ |
634
|
|
|
|
|
|
}else{ |
635
|
154412
|
|
|
|
|
check = SvIV(check_sv); |
636
|
|
|
|
|
|
} |
637
|
154418
|
|
|
|
|
ST(0) = encode_method(aTHX_ enc, enc->t_utf8, src, check, |
638
|
|
|
|
|
|
NULL, Nullsv, NULL, fallback_cb); |
639
|
154412
|
|
|
|
|
SvUTF8_on(ST(0)); |
640
|
154412
|
|
|
|
|
XSRETURN(1); |
641
|
|
|
|
|
|
} |
642
|
|
|
|
|
|
|
643
|
|
|
|
|
|
void |
644
|
|
|
|
|
|
Method_encode(obj,src,check_sv = &PL_sv_no) |
645
|
|
|
|
|
|
SV * obj |
646
|
|
|
|
|
|
SV * src |
647
|
|
|
|
|
|
SV * check_sv |
648
|
|
|
|
|
|
CODE: |
649
|
|
|
|
|
|
{ |
650
|
|
|
|
|
|
int check; |
651
|
|
|
|
|
|
SV *fallback_cb = &PL_sv_undef; |
652
|
70172
|
|
|
|
|
encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
653
|
70172
|
|
|
|
|
sv_utf8_upgrade(src); |
654
|
70172
|
|
|
|
|
if (SvROK(check_sv)){ |
655
|
|
|
|
|
|
fallback_cb = check_sv; |
656
|
|
|
|
|
|
check = ENCODE_PERLQQ|ENCODE_LEAVE_SRC; /* same as FB_PERLQQ */ |
657
|
|
|
|
|
|
}else{ |
658
|
70166
|
|
|
|
|
check = SvIV(check_sv); |
659
|
|
|
|
|
|
} |
660
|
70172
|
|
|
|
|
ST(0) = encode_method(aTHX_ enc, enc->f_utf8, src, check, |
661
|
|
|
|
|
|
NULL, Nullsv, NULL, fallback_cb); |
662
|
70170
|
|
|
|
|
XSRETURN(1); |
663
|
|
|
|
|
|
} |
664
|
|
|
|
|
|
|
665
|
|
|
|
|
|
void |
666
|
|
|
|
|
|
Method_needs_lines(obj) |
667
|
|
|
|
|
|
SV * obj |
668
|
|
|
|
|
|
CODE: |
669
|
|
|
|
|
|
{ |
670
|
|
|
|
|
|
/* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ |
671
|
194
|
|
|
|
|
ST(0) = &PL_sv_no; |
672
|
194
|
|
|
|
|
XSRETURN(1); |
673
|
|
|
|
|
|
} |
674
|
|
|
|
|
|
|
675
|
|
|
|
|
|
void |
676
|
|
|
|
|
|
Method_perlio_ok(obj) |
677
|
|
|
|
|
|
SV * obj |
678
|
|
|
|
|
|
CODE: |
679
|
|
|
|
|
|
{ |
680
|
|
|
|
|
|
/* encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); */ |
681
|
|
|
|
|
|
/* require_pv(PERLIO_FILENAME); */ |
682
|
|
|
|
|
|
|
683
|
16
|
|
|
|
|
eval_pv("require PerlIO::encoding", 0); |
684
|
|
|
|
|
|
|
685
|
16
|
|
|
|
|
if (SvTRUE(get_sv("@", 0))) { |
686
|
0
|
|
|
|
|
ST(0) = &PL_sv_no; |
687
|
|
|
|
|
|
}else{ |
688
|
16
|
|
|
|
|
ST(0) = &PL_sv_yes; |
689
|
|
|
|
|
|
} |
690
|
16
|
|
|
|
|
XSRETURN(1); |
691
|
|
|
|
|
|
} |
692
|
|
|
|
|
|
|
693
|
|
|
|
|
|
void |
694
|
|
|
|
|
|
Method_mime_name(obj) |
695
|
|
|
|
|
|
SV * obj |
696
|
|
|
|
|
|
CODE: |
697
|
|
|
|
|
|
{ |
698
|
108
|
|
|
|
|
encode_t *enc = INT2PTR(encode_t *, SvIV(SvRV(obj))); |
699
|
|
|
|
|
|
SV *retval; |
700
|
108
|
|
|
|
|
eval_pv("require Encode::MIME::Name", 0); |
701
|
|
|
|
|
|
|
702
|
108
|
|
|
|
|
if (SvTRUE(get_sv("@", 0))) { |
703
|
0
|
|
|
|
|
ST(0) = &PL_sv_undef; |
704
|
|
|
|
|
|
}else{ |
705
|
108
|
|
|
|
|
ENTER; |
706
|
108
|
|
|
|
|
SAVETMPS; |
707
|
108
|
|
|
|
|
PUSHMARK(sp); |
708
|
108
|
|
|
|
|
XPUSHs(sv_2mortal(newSVpvn(enc->name[0], strlen(enc->name[0])))); |
709
|
108
|
|
|
|
|
PUTBACK; |
710
|
108
|
|
|
|
|
call_pv("Encode::MIME::Name::get_mime_name", G_SCALAR); |
711
|
108
|
|
|
|
|
SPAGAIN; |
712
|
108
|
|
|
|
|
retval = newSVsv(POPs); |
713
|
108
|
|
|
|
|
PUTBACK; |
714
|
108
|
|
|
|
|
FREETMPS; |
715
|
108
|
|
|
|
|
LEAVE; |
716
|
|
|
|
|
|
/* enc->name[0] */ |
717
|
108
|
|
|
|
|
ST(0) = retval; |
718
|
|
|
|
|
|
} |
719
|
108
|
|
|
|
|
XSRETURN(1); |
720
|
|
|
|
|
|
} |
721
|
|
|
|
|
|
|
722
|
|
|
|
|
|
MODULE = Encode PACKAGE = Encode |
723
|
|
|
|
|
|
|
724
|
|
|
|
|
|
PROTOTYPES: ENABLE |
725
|
|
|
|
|
|
|
726
|
|
|
|
|
|
I32 |
727
|
|
|
|
|
|
_bytes_to_utf8(sv, ...) |
728
|
|
|
|
|
|
SV * sv |
729
|
|
|
|
|
|
CODE: |
730
|
|
|
|
|
|
{ |
731
|
0
|
|
|
|
|
SV * encoding = items == 2 ? ST(1) : Nullsv; |
732
|
|
|
|
|
|
|
733
|
0
|
|
|
|
|
if (encoding) |
734
|
0
|
|
|
|
|
RETVAL = _encoded_bytes_to_utf8(sv, SvPV_nolen(encoding)); |
735
|
|
|
|
|
|
else { |
736
|
|
|
|
|
|
STRLEN len; |
737
|
0
|
|
|
|
|
U8* s = (U8*)SvPV(sv, len); |
738
|
|
|
|
|
|
U8* converted; |
739
|
|
|
|
|
|
|
740
|
0
|
|
|
|
|
converted = bytes_to_utf8(s, &len); /* This allocs */ |
741
|
0
|
|
|
|
|
sv_setpvn(sv, (char *)converted, len); |
742
|
0
|
|
|
|
|
SvUTF8_on(sv); /* XXX Should we? */ |
743
|
0
|
|
|
|
|
Safefree(converted); /* ... so free it */ |
744
|
0
|
|
|
|
|
RETVAL = len; |
745
|
|
|
|
|
|
} |
746
|
|
|
|
|
|
} |
747
|
|
|
|
|
|
OUTPUT: |
748
|
|
|
|
|
|
RETVAL |
749
|
|
|
|
|
|
|
750
|
|
|
|
|
|
I32 |
751
|
|
|
|
|
|
_utf8_to_bytes(sv, ...) |
752
|
|
|
|
|
|
SV * sv |
753
|
|
|
|
|
|
CODE: |
754
|
|
|
|
|
|
{ |
755
|
0
|
|
|
|
|
SV * to = items > 1 ? ST(1) : Nullsv; |
756
|
0
|
|
|
|
|
SV * check = items > 2 ? ST(2) : Nullsv; |
757
|
|
|
|
|
|
|
758
|
0
|
|
|
|
|
if (to) { |
759
|
0
|
|
|
|
|
RETVAL = _encoded_utf8_to_bytes(sv, SvPV_nolen(to)); |
760
|
|
|
|
|
|
} else { |
761
|
|
|
|
|
|
STRLEN len; |
762
|
0
|
|
|
|
|
U8 *s = (U8*)SvPV(sv, len); |
763
|
|
|
|
|
|
|
764
|
|
|
|
|
|
RETVAL = 0; |
765
|
0
|
|
|
|
|
if (SvTRUE(check)) { |
766
|
|
|
|
|
|
/* Must do things the slow way */ |
767
|
|
|
|
|
|
U8 *dest; |
768
|
|
|
|
|
|
/* We need a copy to pass to check() */ |
769
|
|
|
|
|
|
U8 *src = s; |
770
|
0
|
|
|
|
|
U8 *send = s + len; |
771
|
|
|
|
|
|
U8 *d0; |
772
|
|
|
|
|
|
|
773
|
0
|
|
|
|
|
New(83, dest, len, U8); /* I think */ |
774
|
|
|
|
|
|
d0 = dest; |
775
|
|
|
|
|
|
|
776
|
0
|
|
|
|
|
while (s < send) { |
777
|
0
|
|
|
|
|
if (*s < 0x80){ |
778
|
0
|
|
|
|
|
*dest++ = *s++; |
779
|
|
|
|
|
|
} else { |
780
|
|
|
|
|
|
STRLEN ulen; |
781
|
0
|
|
|
|
|
UV uv = *s++; |
782
|
|
|
|
|
|
|
783
|
|
|
|
|
|
/* Have to do it all ourselves because of error routine, |
784
|
|
|
|
|
|
aargh. */ |
785
|
0
|
|
|
|
|
if (!(uv & 0x40)){ goto failure; } |
786
|
0
|
|
|
|
|
if (!(uv & 0x20)) { ulen = 2; uv &= 0x1f; } |
787
|
0
|
|
|
|
|
else if (!(uv & 0x10)) { ulen = 3; uv &= 0x0f; } |
788
|
0
|
|
|
|
|
else if (!(uv & 0x08)) { ulen = 4; uv &= 0x07; } |
789
|
0
|
|
|
|
|
else if (!(uv & 0x04)) { ulen = 5; uv &= 0x03; } |
790
|
0
|
|
|
|
|
else if (!(uv & 0x02)) { ulen = 6; uv &= 0x01; } |
791
|
0
|
|
|
|
|
else if (!(uv & 0x01)) { ulen = 7; uv = 0; } |
792
|
|
|
|
|
|
else { ulen = 13; uv = 0; } |
793
|
|
|
|
|
|
|
794
|
|
|
|
|
|
/* Note change to utf8.c variable naming, for variety */ |
795
|
0
|
|
|
|
|
while (ulen--) { |
796
|
0
|
|
|
|
|
if ((*s & 0xc0) != 0x80){ |
797
|
|
|
|
|
|
goto failure; |
798
|
|
|
|
|
|
} else { |
799
|
0
|
|
|
|
|
uv = (uv << 6) | (*s++ & 0x3f); |
800
|
|
|
|
|
|
} |
801
|
|
|
|
|
|
} |
802
|
0
|
|
|
|
|
if (uv > 256) { |
803
|
|
|
|
|
|
failure: |
804
|
0
|
|
|
|
|
call_failure(check, s, dest, src); |
805
|
|
|
|
|
|
/* Now what happens? */ |
806
|
|
|
|
|
|
} |
807
|
0
|
|
|
|
|
*dest++ = (U8)uv; |
808
|
|
|
|
|
|
} |
809
|
|
|
|
|
|
} |
810
|
0
|
|
|
|
|
RETVAL = dest - d0; |
811
|
0
|
|
|
|
|
sv_usepvn(sv, (char *)dest, RETVAL); |
812
|
0
|
|
|
|
|
SvUTF8_off(sv); |
813
|
|
|
|
|
|
} else { |
814
|
0
|
|
|
|
|
RETVAL = (utf8_to_bytes(s, &len) ? len : 0); |
815
|
|
|
|
|
|
} |
816
|
|
|
|
|
|
} |
817
|
|
|
|
|
|
} |
818
|
|
|
|
|
|
OUTPUT: |
819
|
|
|
|
|
|
RETVAL |
820
|
|
|
|
|
|
|
821
|
|
|
|
|
|
bool |
822
|
|
|
|
|
|
is_utf8(sv, check = 0) |
823
|
|
|
|
|
|
SV * sv |
824
|
|
|
|
|
|
int check |
825
|
|
|
|
|
|
CODE: |
826
|
|
|
|
|
|
{ |
827
|
5656
|
|
|
|
|
if (SvGMAGICAL(sv)) /* it could be $1, for example */ |
828
|
2
|
|
|
|
|
sv = newSVsv(sv); /* GMAGIG will be done */ |
829
|
5656
|
|
|
|
|
RETVAL = SvUTF8(sv) ? TRUE : FALSE; |
830
|
5656
|
|
|
|
|
if (RETVAL && |
831
|
0
|
|
|
|
|
check && |
832
|
0
|
|
|
|
|
!is_utf8_string((U8*)SvPVX(sv), SvCUR(sv))) |
833
|
|
|
|
|
|
RETVAL = FALSE; |
834
|
5656
|
|
|
|
|
if (sv != ST(0)) |
835
|
2
|
|
|
|
|
SvREFCNT_dec(sv); /* it was a temp copy */ |
836
|
|
|
|
|
|
} |
837
|
|
|
|
|
|
OUTPUT: |
838
|
|
|
|
|
|
RETVAL |
839
|
|
|
|
|
|
|
840
|
|
|
|
|
|
#ifndef SvIsCOW |
841
|
|
|
|
|
|
# define SvIsCOW (SvREADONLY(sv) && SvFAKE(sv)) |
842
|
|
|
|
|
|
#endif |
843
|
|
|
|
|
|
|
844
|
|
|
|
|
|
SV * |
845
|
|
|
|
|
|
_utf8_on(sv) |
846
|
|
|
|
|
|
SV * sv |
847
|
|
|
|
|
|
CODE: |
848
|
|
|
|
|
|
{ |
849
|
20
|
|
|
|
|
if (SvPOK(sv)) { |
850
|
20
|
|
|
|
|
SV *rsv = newSViv(SvUTF8(sv)); |
851
|
|
|
|
|
|
RETVAL = rsv; |
852
|
20
|
|
|
|
|
if (SvIsCOW(sv)) sv_force_normal(sv); |
853
|
20
|
|
|
|
|
SvUTF8_on(sv); |
854
|
|
|
|
|
|
} else { |
855
|
|
|
|
|
|
RETVAL = &PL_sv_undef; |
856
|
|
|
|
|
|
} |
857
|
|
|
|
|
|
} |
858
|
|
|
|
|
|
OUTPUT: |
859
|
|
|
|
|
|
RETVAL |
860
|
|
|
|
|
|
|
861
|
|
|
|
|
|
SV * |
862
|
|
|
|
|
|
_utf8_off(sv) |
863
|
|
|
|
|
|
SV * sv |
864
|
|
|
|
|
|
CODE: |
865
|
|
|
|
|
|
{ |
866
|
6
|
|
|
|
|
if (SvPOK(sv)) { |
867
|
6
|
|
|
|
|
SV *rsv = newSViv(SvUTF8(sv)); |
868
|
|
|
|
|
|
RETVAL = rsv; |
869
|
6
|
|
|
|
|
if (SvIsCOW(sv)) sv_force_normal(sv); |
870
|
6
|
|
|
|
|
SvUTF8_off(sv); |
871
|
|
|
|
|
|
} else { |
872
|
|
|
|
|
|
RETVAL = &PL_sv_undef; |
873
|
|
|
|
|
|
} |
874
|
|
|
|
|
|
} |
875
|
|
|
|
|
|
OUTPUT: |
876
|
|
|
|
|
|
RETVAL |
877
|
|
|
|
|
|
|
878
|
|
|
|
|
|
int |
879
|
|
|
|
|
|
DIE_ON_ERR() |
880
|
|
|
|
|
|
CODE: |
881
|
|
|
|
|
|
RETVAL = ENCODE_DIE_ON_ERR; |
882
|
|
|
|
|
|
OUTPUT: |
883
|
|
|
|
|
|
RETVAL |
884
|
|
|
|
|
|
|
885
|
|
|
|
|
|
int |
886
|
|
|
|
|
|
WARN_ON_ERR() |
887
|
|
|
|
|
|
CODE: |
888
|
|
|
|
|
|
RETVAL = ENCODE_WARN_ON_ERR; |
889
|
|
|
|
|
|
OUTPUT: |
890
|
|
|
|
|
|
RETVAL |
891
|
|
|
|
|
|
|
892
|
|
|
|
|
|
int |
893
|
|
|
|
|
|
LEAVE_SRC() |
894
|
|
|
|
|
|
CODE: |
895
|
|
|
|
|
|
RETVAL = ENCODE_LEAVE_SRC; |
896
|
|
|
|
|
|
OUTPUT: |
897
|
|
|
|
|
|
RETVAL |
898
|
|
|
|
|
|
|
899
|
|
|
|
|
|
int |
900
|
|
|
|
|
|
RETURN_ON_ERR() |
901
|
|
|
|
|
|
CODE: |
902
|
|
|
|
|
|
RETVAL = ENCODE_RETURN_ON_ERR; |
903
|
|
|
|
|
|
OUTPUT: |
904
|
|
|
|
|
|
RETVAL |
905
|
|
|
|
|
|
|
906
|
|
|
|
|
|
int |
907
|
|
|
|
|
|
PERLQQ() |
908
|
|
|
|
|
|
CODE: |
909
|
|
|
|
|
|
RETVAL = ENCODE_PERLQQ; |
910
|
|
|
|
|
|
OUTPUT: |
911
|
|
|
|
|
|
RETVAL |
912
|
|
|
|
|
|
|
913
|
|
|
|
|
|
int |
914
|
|
|
|
|
|
HTMLCREF() |
915
|
|
|
|
|
|
CODE: |
916
|
|
|
|
|
|
RETVAL = ENCODE_HTMLCREF; |
917
|
|
|
|
|
|
OUTPUT: |
918
|
|
|
|
|
|
RETVAL |
919
|
|
|
|
|
|
|
920
|
|
|
|
|
|
int |
921
|
|
|
|
|
|
XMLCREF() |
922
|
|
|
|
|
|
CODE: |
923
|
|
|
|
|
|
RETVAL = ENCODE_XMLCREF; |
924
|
|
|
|
|
|
OUTPUT: |
925
|
|
|
|
|
|
RETVAL |
926
|
|
|
|
|
|
|
927
|
|
|
|
|
|
int |
928
|
|
|
|
|
|
STOP_AT_PARTIAL() |
929
|
|
|
|
|
|
CODE: |
930
|
|
|
|
|
|
RETVAL = ENCODE_STOP_AT_PARTIAL; |
931
|
|
|
|
|
|
OUTPUT: |
932
|
|
|
|
|
|
RETVAL |
933
|
|
|
|
|
|
|
934
|
|
|
|
|
|
int |
935
|
|
|
|
|
|
FB_DEFAULT() |
936
|
|
|
|
|
|
CODE: |
937
|
|
|
|
|
|
RETVAL = ENCODE_FB_DEFAULT; |
938
|
|
|
|
|
|
OUTPUT: |
939
|
|
|
|
|
|
RETVAL |
940
|
|
|
|
|
|
|
941
|
|
|
|
|
|
int |
942
|
|
|
|
|
|
FB_CROAK() |
943
|
|
|
|
|
|
CODE: |
944
|
|
|
|
|
|
RETVAL = ENCODE_FB_CROAK; |
945
|
|
|
|
|
|
OUTPUT: |
946
|
|
|
|
|
|
RETVAL |
947
|
|
|
|
|
|
|
948
|
|
|
|
|
|
int |
949
|
|
|
|
|
|
FB_QUIET() |
950
|
|
|
|
|
|
CODE: |
951
|
|
|
|
|
|
RETVAL = ENCODE_FB_QUIET; |
952
|
|
|
|
|
|
OUTPUT: |
953
|
|
|
|
|
|
RETVAL |
954
|
|
|
|
|
|
|
955
|
|
|
|
|
|
int |
956
|
|
|
|
|
|
FB_WARN() |
957
|
|
|
|
|
|
CODE: |
958
|
|
|
|
|
|
RETVAL = ENCODE_FB_WARN; |
959
|
|
|
|
|
|
OUTPUT: |
960
|
|
|
|
|
|
RETVAL |
961
|
|
|
|
|
|
|
962
|
|
|
|
|
|
int |
963
|
|
|
|
|
|
FB_PERLQQ() |
964
|
|
|
|
|
|
CODE: |
965
|
|
|
|
|
|
RETVAL = ENCODE_FB_PERLQQ; |
966
|
|
|
|
|
|
OUTPUT: |
967
|
|
|
|
|
|
RETVAL |
968
|
|
|
|
|
|
|
969
|
|
|
|
|
|
int |
970
|
|
|
|
|
|
FB_HTMLCREF() |
971
|
|
|
|
|
|
CODE: |
972
|
|
|
|
|
|
RETVAL = ENCODE_FB_HTMLCREF; |
973
|
|
|
|
|
|
OUTPUT: |
974
|
|
|
|
|
|
RETVAL |
975
|
|
|
|
|
|
|
976
|
|
|
|
|
|
int |
977
|
|
|
|
|
|
FB_XMLCREF() |
978
|
|
|
|
|
|
CODE: |
979
|
|
|
|
|
|
RETVAL = ENCODE_FB_XMLCREF; |
980
|
|
|
|
|
|
OUTPUT: |
981
|
|
|
|
|
|
RETVAL |
982
|
|
|
|
|
|
|
983
|
|
|
|
|
|
BOOT: |
984
|
|
|
|
|
|
{ |
985
|
|
|
|
|
|
#include "def_t.h" |
986
|
|
|
|
|
|
#include "def_t.exh" |
987
|
|
|
|
|
|
} |