line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#include "EXTERN.h" |
2
|
|
|
|
|
|
|
#include "perl.h" |
3
|
|
|
|
|
|
|
#include "XSUB.h" |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
#ifdef XS_VERSION |
6
|
|
|
|
|
|
|
#undef XS_VERSION |
7
|
|
|
|
|
|
|
#endif |
8
|
|
|
|
|
|
|
#define XS_VERSION "2.500" |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
#define BASE 36 |
11
|
|
|
|
|
|
|
#define TMIN 1 |
12
|
|
|
|
|
|
|
#define TMAX 26 |
13
|
|
|
|
|
|
|
#define SKEW 38 |
14
|
|
|
|
|
|
|
#define DAMP 700 |
15
|
|
|
|
|
|
|
#define INITIAL_BIAS 72 |
16
|
|
|
|
|
|
|
#define INITIAL_N 128 |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
#define isBASE(x) UTF8_IS_INVARIANT((unsigned char)x) |
19
|
|
|
|
|
|
|
#define DELIM '-' |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
#define TMIN_MAX(t) (((t) < TMIN) ? (TMIN) : ((t) > TMAX) ? (TMAX) : (t)) |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
#ifndef utf8_to_uvchr_buf |
24
|
|
|
|
|
|
|
#define utf8_to_uvchr_buf(in_p,in_e,u8) utf8_to_uvchr(in_p,u8); |
25
|
|
|
|
|
|
|
#endif |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
static char enc_digit[BASE] = { |
28
|
|
|
|
|
|
|
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', |
29
|
|
|
|
|
|
|
'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', |
30
|
|
|
|
|
|
|
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', |
31
|
|
|
|
|
|
|
}; |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
static IV dec_digit[0x80] = { |
34
|
|
|
|
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 00..0F */ |
35
|
|
|
|
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10..1F */ |
36
|
|
|
|
|
|
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20..2F */ |
37
|
|
|
|
|
|
|
26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1, /* 30..3F */ |
38
|
|
|
|
|
|
|
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 40..4F */ |
39
|
|
|
|
|
|
|
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, /* 50..5F */ |
40
|
|
|
|
|
|
|
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 60..6F */ |
41
|
|
|
|
|
|
|
15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, /* 70..7F */ |
42
|
|
|
|
|
|
|
}; |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
static int adapt(int delta, int numpoints, int first) { |
45
|
|
|
|
|
|
|
int k; |
46
|
|
|
|
|
|
|
|
47
|
22587
|
100
|
|
|
|
|
delta /= first ? DAMP : 2; |
|
|
100
|
|
|
|
|
|
48
|
22587
|
|
|
|
|
|
delta += delta/numpoints; |
49
|
|
|
|
|
|
|
|
50
|
41035
|
100
|
|
|
|
|
for(k=0; delta > ((BASE-TMIN) * TMAX)/2; k += BASE) |
|
|
100
|
|
|
|
|
|
51
|
18448
|
|
|
|
|
|
delta /= BASE-TMIN; |
52
|
|
|
|
|
|
|
|
53
|
22587
|
|
|
|
|
|
return k + (((BASE-TMIN+1) * delta) / (delta+SKEW)); |
54
|
|
|
|
|
|
|
}; |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
static void |
57
|
124540
|
|
|
|
|
|
grow_string(SV *const sv, char **start, char **current, char **end, STRLEN add) |
58
|
|
|
|
|
|
|
{ |
59
|
|
|
|
|
|
|
STRLEN len; |
60
|
|
|
|
|
|
|
|
61
|
124540
|
100
|
|
|
|
|
if(*current + add <= *end) |
62
|
|
|
|
|
|
|
return; |
63
|
|
|
|
|
|
|
|
64
|
22
|
|
|
|
|
|
len = (*current - *start); |
65
|
22
|
50
|
|
|
|
|
*start = SvGROW(sv, (len + add + 15) & ~15); |
|
|
50
|
|
|
|
|
|
66
|
22
|
|
|
|
|
|
*current = *start + len; |
67
|
22
|
|
|
|
|
|
*end = *start + SvLEN(sv); |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
MODULE = Net::IDN::Punycode PACKAGE = Net::IDN::Punycode |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
SV* |
73
|
|
|
|
|
|
|
encode_punycode(input) |
74
|
|
|
|
|
|
|
SV * input |
75
|
|
|
|
|
|
|
PREINIT: |
76
|
|
|
|
|
|
|
UV c, m, n = INITIAL_N; |
77
|
|
|
|
|
|
|
int k, q, t; |
78
|
|
|
|
|
|
|
int bias = INITIAL_BIAS; |
79
|
|
|
|
|
|
|
int delta = 0, skip_delta; |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
const char *in_s, *in_p, *in_e, *skip_p; |
82
|
|
|
|
|
|
|
char *re_s, *re_p, *re_e; |
83
|
|
|
|
|
|
|
int first = 1; |
84
|
|
|
|
|
|
|
STRLEN length_guess, len, h, u8; |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
CODE: |
87
|
1735
|
100
|
|
|
|
|
in_s = in_p = SvPVutf8(input, len); |
88
|
1735
|
|
|
|
|
|
in_e = in_s + len; |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
length_guess = len; |
91
|
1735
|
100
|
|
|
|
|
if(length_guess < 64) length_guess = 64; /* optimise for maximum length of domain names */ |
92
|
1735
|
|
|
|
|
|
length_guess += 2; /* plus DELIM + '\0' */ |
93
|
|
|
|
|
|
|
|
94
|
1735
|
|
|
|
|
|
RETVAL = NEWSV('P',length_guess); |
95
|
1735
|
|
|
|
|
|
SvPOK_only(RETVAL); |
96
|
1735
|
50
|
|
|
|
|
re_s = re_p = SvPV_nolen(RETVAL); |
97
|
1735
|
|
|
|
|
|
re_e = re_s + SvLEN(RETVAL); |
98
|
|
|
|
|
|
|
h = 0; |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
/* copy basic code points */ |
101
|
18065
|
100
|
|
|
|
|
while(in_p < in_e) { |
102
|
16330
|
100
|
|
|
|
|
if( isBASE(*in_p) ) { |
103
|
7282
|
|
|
|
|
|
grow_string(RETVAL, &re_s, &re_p, &re_e, sizeof(char)); |
104
|
7282
|
|
|
|
|
|
*re_p++ = *in_p; |
105
|
7282
|
|
|
|
|
|
h++; |
106
|
|
|
|
|
|
|
} |
107
|
16330
|
|
|
|
|
|
in_p++; |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
/* add DELIM if needed */ |
111
|
1735
|
100
|
|
|
|
|
if(h) { |
112
|
455
|
|
|
|
|
|
grow_string(RETVAL, &re_s, &re_p, &re_e, sizeof(char)); |
113
|
1735
|
|
|
|
|
|
*re_p++ = DELIM; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
for(;;) { |
117
|
|
|
|
|
|
|
/* find smallest code point not yet handled */ |
118
|
|
|
|
|
|
|
m = UV_MAX; |
119
|
|
|
|
|
|
|
q = skip_delta = 0; |
120
|
|
|
|
|
|
|
|
121
|
36383
|
100
|
|
|
|
|
for(in_p = skip_p = in_s; in_p < in_e;) { |
122
|
31390
|
50
|
|
|
|
|
c = utf8_to_uvchr_buf((U8*)in_p, (U8*)in_e, &u8); |
123
|
|
|
|
|
|
|
c = NATIVE_TO_UNI(c); |
124
|
|
|
|
|
|
|
|
125
|
31390
|
100
|
|
|
|
|
if(c >= n && c < m) { |
126
|
|
|
|
|
|
|
m = c; |
127
|
|
|
|
|
|
|
skip_p = in_p; |
128
|
|
|
|
|
|
|
skip_delta = q; |
129
|
|
|
|
|
|
|
} |
130
|
31390
|
100
|
|
|
|
|
if(c < n) |
131
|
24390
|
|
|
|
|
|
++q; |
132
|
31390
|
|
|
|
|
|
in_p += u8; |
133
|
|
|
|
|
|
|
} |
134
|
4993
|
100
|
|
|
|
|
if(m == UV_MAX) |
135
|
|
|
|
|
|
|
break; |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
/* increase delta to the state corresponding to |
138
|
|
|
|
|
|
|
the m code point at the beginning of the string */ |
139
|
3258
|
|
|
|
|
|
delta += (m-n) * (h+1); |
140
|
|
|
|
|
|
|
n = m; |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
/* now find the chars to be encoded in this round */ |
143
|
|
|
|
|
|
|
|
144
|
3258
|
|
|
|
|
|
delta += skip_delta; |
145
|
16691
|
100
|
|
|
|
|
for(in_p = skip_p; in_p < in_e;) { |
146
|
13433
|
50
|
|
|
|
|
c = utf8_to_uvchr_buf((U8*)in_p, (U8*)in_e, &u8); |
147
|
|
|
|
|
|
|
c = NATIVE_TO_UNI(c); |
148
|
|
|
|
|
|
|
|
149
|
13433
|
100
|
|
|
|
|
if(c < n) { |
150
|
7904
|
|
|
|
|
|
++delta; |
151
|
5529
|
100
|
|
|
|
|
} else if( c == n ) { |
152
|
|
|
|
|
|
|
q = delta; |
153
|
|
|
|
|
|
|
|
154
|
6793
|
|
|
|
|
|
for(k = BASE;; k += BASE) { |
155
|
10156
|
100
|
|
|
|
|
t = TMIN_MAX(k - bias); |
156
|
10156
|
100
|
|
|
|
|
if(q < t) break; |
157
|
6793
|
|
|
|
|
|
grow_string(RETVAL, &re_s, &re_p, &re_e, sizeof(char)); |
158
|
6793
|
|
|
|
|
|
*re_p++ = enc_digit[t + ((q-t) % (BASE-t))]; |
159
|
6793
|
|
|
|
|
|
q = (q-t) / (BASE-t); |
160
|
6793
|
|
|
|
|
|
} |
161
|
3363
|
50
|
|
|
|
|
if(q > BASE) croak("input exceeds punycode limit"); |
162
|
3363
|
|
|
|
|
|
grow_string(RETVAL, &re_s, &re_p, &re_e, sizeof(char)); |
163
|
3363
|
|
|
|
|
|
*re_p++ = enc_digit[q]; |
164
|
3363
|
|
|
|
|
|
bias = adapt(delta, h+1, first); |
165
|
|
|
|
|
|
|
delta = first = 0; |
166
|
3363
|
|
|
|
|
|
++h; |
167
|
|
|
|
|
|
|
} |
168
|
13433
|
|
|
|
|
|
in_p += u8; |
169
|
|
|
|
|
|
|
} |
170
|
3258
|
|
|
|
|
|
++delta; |
171
|
3258
|
|
|
|
|
|
++n; |
172
|
3258
|
|
|
|
|
|
} |
173
|
1735
|
|
|
|
|
|
grow_string(RETVAL, &re_s, &re_p, &re_e, sizeof(char)); |
174
|
1735
|
|
|
|
|
|
*re_p = 0; |
175
|
1735
|
|
|
|
|
|
SvCUR_set(RETVAL, re_p - re_s); |
176
|
|
|
|
|
|
|
OUTPUT: |
177
|
|
|
|
|
|
|
RETVAL |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
SV* |
180
|
|
|
|
|
|
|
decode_punycode(input) |
181
|
|
|
|
|
|
|
SV * input |
182
|
|
|
|
|
|
|
PREINIT: |
183
|
|
|
|
|
|
|
UV c, n = INITIAL_N; |
184
|
|
|
|
|
|
|
IV dc; |
185
|
|
|
|
|
|
|
int i = 0, oldi, j, k, t, w; |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
int bias = INITIAL_BIAS; |
188
|
|
|
|
|
|
|
int delta = 0, skip_delta; |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
const char *in_s, *in_p, *in_e, *skip_p; |
191
|
|
|
|
|
|
|
char *re_s, *re_p, *re_e; |
192
|
|
|
|
|
|
|
int first = 1; |
193
|
|
|
|
|
|
|
STRLEN length_guess, len, h, u8; |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
CODE: |
196
|
8401
|
100
|
|
|
|
|
in_s = in_p = SvPV_nolen(input); |
197
|
8401
|
|
|
|
|
|
in_e = SvEND(input); |
198
|
|
|
|
|
|
|
|
199
|
8401
|
|
|
|
|
|
length_guess = SvCUR(input) * 2; |
200
|
8401
|
50
|
|
|
|
|
if(length_guess < 256) length_guess = 256; |
201
|
|
|
|
|
|
|
|
202
|
8401
|
|
|
|
|
|
RETVAL = NEWSV('D',length_guess); |
203
|
8401
|
|
|
|
|
|
SvPOK_only(RETVAL); |
204
|
8401
|
50
|
|
|
|
|
re_s = re_p = SvPV_nolen(RETVAL); |
205
|
8401
|
|
|
|
|
|
re_e = re_s + SvLEN(RETVAL); |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
skip_p = NULL; |
208
|
85694
|
100
|
|
|
|
|
for(in_p = in_s; in_p < in_e; in_p++) { |
209
|
77293
|
|
|
|
|
|
c = *in_p; /* we don't care whether it's UTF-8 */ |
210
|
77293
|
50
|
|
|
|
|
if(!isBASE(c)) croak("non-base character in input for decode_punycode"); |
211
|
77293
|
100
|
|
|
|
|
if(c == DELIM) skip_p = in_p; |
212
|
77293
|
|
|
|
|
|
grow_string(RETVAL, &re_s, &re_p, &re_e, 1); |
213
|
77293
|
|
|
|
|
|
*re_p++ = c; /* copy it */ |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
|
216
|
8401
|
100
|
|
|
|
|
if(skip_p) { |
217
|
2061
|
|
|
|
|
|
h = skip_p - in_s; /* base chars handled */ |
218
|
2061
|
|
|
|
|
|
re_p = re_s + h; /* points to end of base chars */ |
219
|
8401
|
|
|
|
|
|
skip_p++; /* skip over DELIM */ |
220
|
|
|
|
|
|
|
} else { |
221
|
|
|
|
|
|
|
h = 0; /* no base chars */ |
222
|
6340
|
|
|
|
|
|
re_p = re_s; |
223
|
|
|
|
|
|
|
skip_p = in_s; /* read everything */ |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
|
226
|
27625
|
100
|
|
|
|
|
for(in_p = skip_p; in_p < in_e; i++) { |
227
|
|
|
|
|
|
|
oldi = i; |
228
|
|
|
|
|
|
|
w = 1; |
229
|
|
|
|
|
|
|
|
230
|
51247
|
|
|
|
|
|
for(k = BASE;; k+= BASE) { |
231
|
70477
|
100
|
|
|
|
|
if(!(in_p < in_e)) croak("incomplete encoded code point in decode_punycode"); |
232
|
70471
|
|
|
|
|
|
dc = dec_digit[*in_p++]; /* we already know it's in 0..127 */ |
233
|
70471
|
50
|
|
|
|
|
if(dc < 0) croak("invalid digit in input for decode_punycode"); |
234
|
70471
|
|
|
|
|
|
c = (UV)dc; |
235
|
70471
|
|
|
|
|
|
i += c * w; |
236
|
70471
|
100
|
|
|
|
|
t = TMIN_MAX(k - bias); |
237
|
70471
|
100
|
|
|
|
|
if(c < t) break; |
238
|
51247
|
|
|
|
|
|
w *= BASE-t; |
239
|
51247
|
|
|
|
|
|
} |
240
|
19224
|
|
|
|
|
|
h++; |
241
|
19224
|
|
|
|
|
|
bias = adapt(i-oldi, h, first); |
242
|
|
|
|
|
|
|
first = 0; |
243
|
19224
|
|
|
|
|
|
n += i / h; /* code point n to insert */ |
244
|
19224
|
|
|
|
|
|
i = i % h; /* at position i */ |
245
|
|
|
|
|
|
|
|
246
|
19224
|
50
|
|
|
|
|
u8 = UNISKIP(n); /* how many bytes we need */ |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
j = i; |
249
|
36412
|
100
|
|
|
|
|
for(skip_p = re_s; j > 0; j--) /* find position in UTF-8 */ |
250
|
17188
|
|
|
|
|
|
skip_p+=UTF8SKIP(skip_p); |
251
|
|
|
|
|
|
|
|
252
|
19224
|
|
|
|
|
|
grow_string(RETVAL, &re_s, &re_p, &re_e, u8); |
253
|
19224
|
100
|
|
|
|
|
if(skip_p < re_p) /* move succeeding chars */ |
254
|
7777
|
|
|
|
|
|
Move(skip_p, skip_p + u8, re_p - skip_p, char); |
255
|
19224
|
|
|
|
|
|
re_p += u8; |
256
|
19224
|
|
|
|
|
|
uvuni_to_utf8_flags((U8*)skip_p, n, UNICODE_ALLOW_ANY); |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
|
259
|
8395
|
50
|
|
|
|
|
if(!first) SvUTF8_on(RETVAL); /* UTF-8 chars have been inserted */ |
260
|
8395
|
|
|
|
|
|
grow_string(RETVAL, &re_s, &re_p, &re_e, 1); |
261
|
8395
|
|
|
|
|
|
*re_p = 0; |
262
|
8395
|
|
|
|
|
|
SvCUR_set(RETVAL, re_p - re_s); |
263
|
|
|
|
|
|
|
OUTPUT: |
264
|
|
|
|
|
|
|
RETVAL |