line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
/* |
2
|
|
|
|
|
|
|
Copyright (C) 2016-2017 Alexander Borisov |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or |
5
|
|
|
|
|
|
|
modify it under the terms of the GNU Lesser General Public |
6
|
|
|
|
|
|
|
License as published by the Free Software Foundation; either |
7
|
|
|
|
|
|
|
version 2.1 of the License, or (at your option) any later version. |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful, |
10
|
|
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
11
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12
|
|
|
|
|
|
|
Lesser General Public License for more details. |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public |
15
|
|
|
|
|
|
|
License along with this library; if not, write to the Free Software |
16
|
|
|
|
|
|
|
Foundation, Inc., 51 Franklin avl_treet, Fifth Floor, Boston, MA 02110-1301 USA |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
Author: lex.borisov@gmail.com (Alexander Borisov) |
19
|
|
|
|
|
|
|
*/ |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
#include "myurl/punycode.h" |
22
|
|
|
|
|
|
|
|
23
|
0
|
|
|
|
|
|
mystatus_t myurl_punycode_encode_with_callback(const unsigned char* data, size_t data_size, mycore_string_raw_t* str_raw) |
24
|
|
|
|
|
|
|
{ |
25
|
0
|
|
|
|
|
|
size_t i = 0; |
26
|
0
|
|
|
|
|
|
size_t cp_count = 0; |
27
|
|
|
|
|
|
|
|
28
|
0
|
0
|
|
|
|
|
while(i < data_size) { |
29
|
0
|
|
|
|
|
|
size_t n = myencoding_ascii_utf_8_length(data[i]); |
30
|
|
|
|
|
|
|
|
31
|
0
|
0
|
|
|
|
|
if(n == 1) { |
32
|
0
|
|
|
|
|
|
str_raw->data[ str_raw->length++ ] = data[i]; |
33
|
|
|
|
|
|
|
} |
34
|
0
|
0
|
|
|
|
|
else if(n == 0) |
35
|
0
|
|
|
|
|
|
return MyURL_STATUS_ERROR; |
36
|
|
|
|
|
|
|
|
37
|
0
|
|
|
|
|
|
cp_count++; |
38
|
0
|
|
|
|
|
|
i += n; |
39
|
|
|
|
|
|
|
} |
40
|
|
|
|
|
|
|
|
41
|
0
|
0
|
|
|
|
|
if(str_raw->length) { |
42
|
0
|
|
|
|
|
|
str_raw->data[ str_raw->length++ ] = MyURL_PUNYCODE_CONST_DELIMITER; |
43
|
|
|
|
|
|
|
} |
44
|
|
|
|
|
|
|
|
45
|
0
|
0
|
|
|
|
|
while(i < data_size) { |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
|
49
|
0
|
|
|
|
|
|
return MyURL_STATUS_OK; |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
///* |
54
|
|
|
|
|
|
|
// punycode.c from RFC 3492 |
55
|
|
|
|
|
|
|
// http://www.nicemice.net/idn/ |
56
|
|
|
|
|
|
|
// Adam M. Costello |
57
|
|
|
|
|
|
|
// http://www.nicemice.net/amc/ |
58
|
|
|
|
|
|
|
// |
59
|
|
|
|
|
|
|
// This is ANSI C code (C89) implementing Punycode (RFC 3492). |
60
|
|
|
|
|
|
|
// |
61
|
|
|
|
|
|
|
// */ |
62
|
|
|
|
|
|
|
// |
63
|
|
|
|
|
|
|
// |
64
|
|
|
|
|
|
|
///************************************************************/ |
65
|
|
|
|
|
|
|
///* Public interface (would normally go in its own .h file): */ |
66
|
|
|
|
|
|
|
// |
67
|
|
|
|
|
|
|
//#include |
68
|
|
|
|
|
|
|
// |
69
|
|
|
|
|
|
|
//enum punycode_status { |
70
|
|
|
|
|
|
|
// punycode_success, |
71
|
|
|
|
|
|
|
// punycode_bad_input, /* Input is invalid. */ |
72
|
|
|
|
|
|
|
// punycode_big_output, /* Output would exceed the space provided. */ |
73
|
|
|
|
|
|
|
// punycode_overflow /* Input needs wider integers to process. */ |
74
|
|
|
|
|
|
|
//}; |
75
|
|
|
|
|
|
|
// |
76
|
|
|
|
|
|
|
//#if UINT_MAX >= (1 << 26) - 1 |
77
|
|
|
|
|
|
|
//typedef unsigned int punycode_uint; |
78
|
|
|
|
|
|
|
//#else |
79
|
|
|
|
|
|
|
//typedef unsigned long punycode_uint; |
80
|
|
|
|
|
|
|
//#endif |
81
|
|
|
|
|
|
|
// |
82
|
|
|
|
|
|
|
//enum punycode_status punycode_encode(punycode_uint input_length, |
83
|
|
|
|
|
|
|
// const punycode_uint input[], |
84
|
|
|
|
|
|
|
// const unsigned char case_flags[], |
85
|
|
|
|
|
|
|
// punycode_uint *output_length, |
86
|
|
|
|
|
|
|
// char output[] ); |
87
|
|
|
|
|
|
|
// |
88
|
|
|
|
|
|
|
///* punycode_encode() converts Unicode to Punycode. The input */ |
89
|
|
|
|
|
|
|
///* is represented as an array of Unicode code points (not code */ |
90
|
|
|
|
|
|
|
///* units; surrogate pairs are not allowed), and the output */ |
91
|
|
|
|
|
|
|
///* will be represented as an array of ASCII code points. The */ |
92
|
|
|
|
|
|
|
///* output string is *not* null-terminated; it will contain */ |
93
|
|
|
|
|
|
|
///* zeros if and only if the input contains zeros. (Of course */ |
94
|
|
|
|
|
|
|
///* the caller can leave room for a terminator and add one if */ |
95
|
|
|
|
|
|
|
///* needed.) The input_length is the number of code points in */ |
96
|
|
|
|
|
|
|
///* the input. The output_length is an in/out argument: the */ |
97
|
|
|
|
|
|
|
///* caller passes in the maximum number of code points that it */ |
98
|
|
|
|
|
|
|
///* can receive, and on successful return it will contain the */ |
99
|
|
|
|
|
|
|
///* number of code points actually output. The case_flags array */ |
100
|
|
|
|
|
|
|
///* holds input_length boolean values, where nonzero suggests that */ |
101
|
|
|
|
|
|
|
///* the corresponding Unicode character be forced to uppercase */ |
102
|
|
|
|
|
|
|
///* after being decoded (if possible), and zero suggests that */ |
103
|
|
|
|
|
|
|
///* it be forced to lowercase (if possible). ASCII code points */ |
104
|
|
|
|
|
|
|
///* are encoded literally, except that ASCII letters are forced */ |
105
|
|
|
|
|
|
|
///* to uppercase or lowercase according to the corresponding */ |
106
|
|
|
|
|
|
|
///* uppercase flags. If case_flags is a null pointer then ASCII */ |
107
|
|
|
|
|
|
|
///* letters are left as they are, and other code points are */ |
108
|
|
|
|
|
|
|
///* treated as if their uppercase flags were zero. The return */ |
109
|
|
|
|
|
|
|
///* value can be any of the punycode_status values defined above */ |
110
|
|
|
|
|
|
|
///* except punycode_bad_input; if not punycode_success, then */ |
111
|
|
|
|
|
|
|
///* output_size and output might contain garbage. */ |
112
|
|
|
|
|
|
|
// |
113
|
|
|
|
|
|
|
//enum punycode_status punycode_decode(punycode_uint input_length, |
114
|
|
|
|
|
|
|
// const char input[], |
115
|
|
|
|
|
|
|
// punycode_uint *output_length, |
116
|
|
|
|
|
|
|
// punycode_uint output[], |
117
|
|
|
|
|
|
|
// unsigned char case_flags[] ); |
118
|
|
|
|
|
|
|
// |
119
|
|
|
|
|
|
|
///* punycode_decode() converts Punycode to Unicode. The input is */ |
120
|
|
|
|
|
|
|
///* represented as an array of ASCII code points, and the output */ |
121
|
|
|
|
|
|
|
///* will be represented as an array of Unicode code points. The */ |
122
|
|
|
|
|
|
|
///* input_length is the number of code points in the input. The */ |
123
|
|
|
|
|
|
|
///* output_length is an in/out argument: the caller passes in */ |
124
|
|
|
|
|
|
|
///* the maximum number of code points that it can receive, and */ |
125
|
|
|
|
|
|
|
///* on successful return it will contain the actual number of */ |
126
|
|
|
|
|
|
|
///* code points output. The case_flags array needs room for at */ |
127
|
|
|
|
|
|
|
///* least output_length values, or it can be a null pointer if the */ |
128
|
|
|
|
|
|
|
///* case information is not needed. A nonzero flag suggests that */ |
129
|
|
|
|
|
|
|
///* the corresponding Unicode character be forced to uppercase */ |
130
|
|
|
|
|
|
|
///* by the caller (if possible), while zero suggests that it be */ |
131
|
|
|
|
|
|
|
///* forced to lowercase (if possible). ASCII code points are */ |
132
|
|
|
|
|
|
|
///* output already in the proper case, but their flags will be set */ |
133
|
|
|
|
|
|
|
///* appropriately so that applying the flags would be harmless. */ |
134
|
|
|
|
|
|
|
///* The return value can be any of the punycode_status values */ |
135
|
|
|
|
|
|
|
///* defined above; if not punycode_success, then output_length, */ |
136
|
|
|
|
|
|
|
///* output, and case_flags might contain garbage. On success, the */ |
137
|
|
|
|
|
|
|
///* decoder will never need to write an output_length greater than */ |
138
|
|
|
|
|
|
|
///* input_length, because of how the encoding is defined. */ |
139
|
|
|
|
|
|
|
// |
140
|
|
|
|
|
|
|
///**********************************************************/ |
141
|
|
|
|
|
|
|
///* Implementation (would normally go in its own .c file): */ |
142
|
|
|
|
|
|
|
// |
143
|
|
|
|
|
|
|
//#include |
144
|
|
|
|
|
|
|
// |
145
|
|
|
|
|
|
|
///*** Bootstring parameters for Punycode ***/ |
146
|
|
|
|
|
|
|
// |
147
|
|
|
|
|
|
|
//enum { base = 36, tmin = 1, tmax = 26, skew = 38, damp = 700, |
148
|
|
|
|
|
|
|
// initial_bias = 72, initial_n = 0x80, delimiter = 0x2D }; |
149
|
|
|
|
|
|
|
// |
150
|
|
|
|
|
|
|
///* basic(cp) tests whether cp is a basic code point: */ |
151
|
|
|
|
|
|
|
//#define basic(cp) ((punycode_uint)(cp) < 0x80) |
152
|
|
|
|
|
|
|
// |
153
|
|
|
|
|
|
|
///* delim(cp) tests whether cp is a delimiter: */ |
154
|
|
|
|
|
|
|
//#define delim(cp) ((cp) == delimiter) |
155
|
|
|
|
|
|
|
// |
156
|
|
|
|
|
|
|
///* decode_digit(cp) returns the numeric value of a basic code */ |
157
|
|
|
|
|
|
|
///* point (for use in representing integers) in the range 0 to */ |
158
|
|
|
|
|
|
|
///* base-1, or base if cp is does not represent a value. */ |
159
|
|
|
|
|
|
|
// |
160
|
|
|
|
|
|
|
//static punycode_uint decode_digit(punycode_uint cp) |
161
|
|
|
|
|
|
|
//{ |
162
|
|
|
|
|
|
|
// return cp - 48 < 10 ? cp - 22 : cp - 65 < 26 ? cp - 65 : |
163
|
|
|
|
|
|
|
// cp - 97 < 26 ? cp - 97 : base; |
164
|
|
|
|
|
|
|
//} |
165
|
|
|
|
|
|
|
// |
166
|
|
|
|
|
|
|
///* encode_digit(d,flag) returns the basic code point whose value */ |
167
|
|
|
|
|
|
|
///* (when used for representing integers) is d, which needs to be in */ |
168
|
|
|
|
|
|
|
///* the range 0 to base-1. The lowercase form is used unless flag is */ |
169
|
|
|
|
|
|
|
///* nonzero, in which case the uppercase form is used. The behavior */ |
170
|
|
|
|
|
|
|
///* is undefined if flag is nonzero and digit d has no uppercase form. */ |
171
|
|
|
|
|
|
|
// |
172
|
|
|
|
|
|
|
//static char encode_digit(punycode_uint d, int flag) |
173
|
|
|
|
|
|
|
//{ |
174
|
|
|
|
|
|
|
// return d + 22 + 75 * (d < 26) - ((flag != 0) << 5); |
175
|
|
|
|
|
|
|
// /* 0..25 map to ASCII a..z or A..Z */ |
176
|
|
|
|
|
|
|
// /* 26..35 map to ASCII 0..9 */ |
177
|
|
|
|
|
|
|
//} |
178
|
|
|
|
|
|
|
// |
179
|
|
|
|
|
|
|
///* flagged(bcp) tests whether a basic code point is flagged */ |
180
|
|
|
|
|
|
|
///* (uppercase). The behavior is undefined if bcp is not a */ |
181
|
|
|
|
|
|
|
///* basic code point. */ |
182
|
|
|
|
|
|
|
// |
183
|
|
|
|
|
|
|
//#define flagged(bcp) ((punycode_uint)(bcp) - 65 < 26) |
184
|
|
|
|
|
|
|
// |
185
|
|
|
|
|
|
|
///* encode_basic(bcp,flag) forces a basic code point to lowercase */ |
186
|
|
|
|
|
|
|
///* if flag is zero, uppercase if flag is nonzero, and returns */ |
187
|
|
|
|
|
|
|
///* the resulting code point. The code point is unchanged if it */ |
188
|
|
|
|
|
|
|
///* is caseless. The behavior is undefined if bcp is not a basic */ |
189
|
|
|
|
|
|
|
///* code point. */ |
190
|
|
|
|
|
|
|
// |
191
|
|
|
|
|
|
|
//static char encode_basic(punycode_uint bcp, int flag) |
192
|
|
|
|
|
|
|
//{ |
193
|
|
|
|
|
|
|
// bcp -= (bcp - 97 < 26) << 5; |
194
|
|
|
|
|
|
|
// return bcp + ((!flag && (bcp - 65 < 26)) << 5); |
195
|
|
|
|
|
|
|
//} |
196
|
|
|
|
|
|
|
// |
197
|
|
|
|
|
|
|
///*** Platform-specific constants ***/ |
198
|
|
|
|
|
|
|
// |
199
|
|
|
|
|
|
|
///* maxint is the maximum value of a punycode_uint variable: */ |
200
|
|
|
|
|
|
|
//static const punycode_uint maxint = -1; |
201
|
|
|
|
|
|
|
///* Because maxint is unsigned, -1 becomes the maximum value. */ |
202
|
|
|
|
|
|
|
// |
203
|
|
|
|
|
|
|
///*** Bias adaptation function ***/ |
204
|
|
|
|
|
|
|
// |
205
|
|
|
|
|
|
|
//static punycode_uint adapt(punycode_uint delta, punycode_uint numpoints, int firsttime) |
206
|
|
|
|
|
|
|
//{ |
207
|
|
|
|
|
|
|
// punycode_uint k; |
208
|
|
|
|
|
|
|
// |
209
|
|
|
|
|
|
|
// delta = firsttime ? delta / damp : delta >> 1; |
210
|
|
|
|
|
|
|
// /* delta >> 1 is a faster way of doing delta / 2 */ |
211
|
|
|
|
|
|
|
// delta += delta / numpoints; |
212
|
|
|
|
|
|
|
// |
213
|
|
|
|
|
|
|
// for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base) { |
214
|
|
|
|
|
|
|
// delta /= base - tmin; |
215
|
|
|
|
|
|
|
// } |
216
|
|
|
|
|
|
|
// |
217
|
|
|
|
|
|
|
// return k + (base - tmin + 1) * delta / (delta + skew); |
218
|
|
|
|
|
|
|
//} |
219
|
|
|
|
|
|
|
// |
220
|
|
|
|
|
|
|
///*** Main encode function ***/ |
221
|
|
|
|
|
|
|
// |
222
|
|
|
|
|
|
|
//enum punycode_status punycode_encode(punycode_uint input_length, |
223
|
|
|
|
|
|
|
// const punycode_uint input[], |
224
|
|
|
|
|
|
|
// const unsigned char case_flags[], |
225
|
|
|
|
|
|
|
// punycode_uint *output_length, |
226
|
|
|
|
|
|
|
// char output[] ) |
227
|
|
|
|
|
|
|
//{ |
228
|
|
|
|
|
|
|
// punycode_uint n, delta, h, b, out, max_out, bias, j, m, q, k, t; |
229
|
|
|
|
|
|
|
// |
230
|
|
|
|
|
|
|
// /* Initialize the state: */ |
231
|
|
|
|
|
|
|
// |
232
|
|
|
|
|
|
|
// n = initial_n; |
233
|
|
|
|
|
|
|
// delta = out = 0; |
234
|
|
|
|
|
|
|
// max_out = *output_length; |
235
|
|
|
|
|
|
|
// bias = initial_bias; |
236
|
|
|
|
|
|
|
// |
237
|
|
|
|
|
|
|
// /* Handle the basic code points: */ |
238
|
|
|
|
|
|
|
// for (j = 0; j < input_length; ++j) { |
239
|
|
|
|
|
|
|
// if (basic(input[j])) { |
240
|
|
|
|
|
|
|
// if (max_out - out < 2) return punycode_big_output; |
241
|
|
|
|
|
|
|
// output[out++] = |
242
|
|
|
|
|
|
|
// case_flags ? encode_basic(input[j], case_flags[j]) : input[j]; |
243
|
|
|
|
|
|
|
// } |
244
|
|
|
|
|
|
|
// /* else if (input[j] < n) return punycode_bad_input; */ |
245
|
|
|
|
|
|
|
// /* (not needed for Punycode with unsigned code points) */ |
246
|
|
|
|
|
|
|
// } |
247
|
|
|
|
|
|
|
// |
248
|
|
|
|
|
|
|
// h = b = out; |
249
|
|
|
|
|
|
|
// |
250
|
|
|
|
|
|
|
// /* h is the number of code points that have been handled, b is the */ |
251
|
|
|
|
|
|
|
// /* number of basic code points, and out is the number of characters */ |
252
|
|
|
|
|
|
|
// /* that have been output. */ |
253
|
|
|
|
|
|
|
// |
254
|
|
|
|
|
|
|
// if (b > 0) output[out++] = delimiter; |
255
|
|
|
|
|
|
|
// |
256
|
|
|
|
|
|
|
// /* Main encoding loop: */ |
257
|
|
|
|
|
|
|
// |
258
|
|
|
|
|
|
|
// while (h < input_length) { |
259
|
|
|
|
|
|
|
// /* All non-basic code points < n have been */ |
260
|
|
|
|
|
|
|
// /* handled already. Find the next larger one: */ |
261
|
|
|
|
|
|
|
// |
262
|
|
|
|
|
|
|
// for (m = maxint, j = 0; j < input_length; ++j) { |
263
|
|
|
|
|
|
|
// /* if (basic(input[j])) continue; */ |
264
|
|
|
|
|
|
|
// /* (not needed for Punycode) */ |
265
|
|
|
|
|
|
|
// if (input[j] >= n && input[j] < m) m = input[j]; |
266
|
|
|
|
|
|
|
// } |
267
|
|
|
|
|
|
|
// |
268
|
|
|
|
|
|
|
// /* Increase delta enough to advance the decoder's */ |
269
|
|
|
|
|
|
|
// /* state to , but guard against overflow: */ |
270
|
|
|
|
|
|
|
// |
271
|
|
|
|
|
|
|
// if (m - n > (maxint - delta) / (h + 1)) return punycode_overflow; |
272
|
|
|
|
|
|
|
// delta += (m - n) * (h + 1); |
273
|
|
|
|
|
|
|
// n = m; |
274
|
|
|
|
|
|
|
// |
275
|
|
|
|
|
|
|
// for (j = 0; j < input_length; ++j) { |
276
|
|
|
|
|
|
|
// /* Punycode does not need to check whether input[j] is basic: */ |
277
|
|
|
|
|
|
|
// if (input[j] < n /* || basic(input[j]) */ ) { |
278
|
|
|
|
|
|
|
// if (++delta == 0) return punycode_overflow; |
279
|
|
|
|
|
|
|
// } |
280
|
|
|
|
|
|
|
// |
281
|
|
|
|
|
|
|
// if (input[j] == n) { |
282
|
|
|
|
|
|
|
// /* Represent delta as a generalized variable-length integer: */ |
283
|
|
|
|
|
|
|
// |
284
|
|
|
|
|
|
|
// for (q = delta, k = base; ; k += base) { |
285
|
|
|
|
|
|
|
// if (out >= max_out) return punycode_big_output; |
286
|
|
|
|
|
|
|
// |
287
|
|
|
|
|
|
|
// t = k <= bias /* + tmin */ ? tmin : /* +tmin not needed */ |
288
|
|
|
|
|
|
|
// k >= bias + tmax ? tmax : k - bias; |
289
|
|
|
|
|
|
|
// if (q < t) break; |
290
|
|
|
|
|
|
|
// output[out++] = encode_digit(t + (q - t) % (base - t), 0); |
291
|
|
|
|
|
|
|
// q = (q - t) / (base - t); |
292
|
|
|
|
|
|
|
// } |
293
|
|
|
|
|
|
|
// |
294
|
|
|
|
|
|
|
// output[out++] = encode_digit(q, case_flags && case_flags[j]); |
295
|
|
|
|
|
|
|
// bias = adapt(delta, h + 1, h == b); |
296
|
|
|
|
|
|
|
// delta = 0; |
297
|
|
|
|
|
|
|
// ++h; |
298
|
|
|
|
|
|
|
// } |
299
|
|
|
|
|
|
|
// } |
300
|
|
|
|
|
|
|
// |
301
|
|
|
|
|
|
|
// ++delta, ++n; |
302
|
|
|
|
|
|
|
// } |
303
|
|
|
|
|
|
|
// |
304
|
|
|
|
|
|
|
// *output_length = out; |
305
|
|
|
|
|
|
|
// return punycode_success; |
306
|
|
|
|
|
|
|
//} |
307
|
|
|
|
|
|
|
// |
308
|
|
|
|
|
|
|
///*** Main decode function ***/ |
309
|
|
|
|
|
|
|
// |
310
|
|
|
|
|
|
|
//enum punycode_status punycode_decode(punycode_uint input_length, |
311
|
|
|
|
|
|
|
// const char input[], |
312
|
|
|
|
|
|
|
// punycode_uint *output_length, |
313
|
|
|
|
|
|
|
// punycode_uint output[], |
314
|
|
|
|
|
|
|
// unsigned char case_flags[] ) |
315
|
|
|
|
|
|
|
//{ |
316
|
|
|
|
|
|
|
// punycode_uint n, out, i, max_out, bias, |
317
|
|
|
|
|
|
|
// b, j, in, oldi, w, k, digit, t; |
318
|
|
|
|
|
|
|
// |
319
|
|
|
|
|
|
|
// /* Initialize the state: */ |
320
|
|
|
|
|
|
|
// |
321
|
|
|
|
|
|
|
// n = initial_n; |
322
|
|
|
|
|
|
|
// out = i = 0; |
323
|
|
|
|
|
|
|
// max_out = *output_length; |
324
|
|
|
|
|
|
|
// bias = initial_bias; |
325
|
|
|
|
|
|
|
// |
326
|
|
|
|
|
|
|
// /* Handle the basic code points: Let b be the number of input code */ |
327
|
|
|
|
|
|
|
// /* points before the last delimiter, or 0 if there is none, then */ |
328
|
|
|
|
|
|
|
// /* copy the first b code points to the output. */ |
329
|
|
|
|
|
|
|
// |
330
|
|
|
|
|
|
|
// for (b = j = 0; j < input_length; ++j) if (delim(input[j])) b = j; |
331
|
|
|
|
|
|
|
// if (b > max_out) return punycode_big_output; |
332
|
|
|
|
|
|
|
// |
333
|
|
|
|
|
|
|
// for (j = 0; j < b; ++j) { |
334
|
|
|
|
|
|
|
// if (case_flags) case_flags[out] = flagged(input[j]); |
335
|
|
|
|
|
|
|
// if (!basic(input[j])) return punycode_bad_input; |
336
|
|
|
|
|
|
|
// output[out++] = input[j]; |
337
|
|
|
|
|
|
|
// } |
338
|
|
|
|
|
|
|
// |
339
|
|
|
|
|
|
|
// /* Main decoding loop: Start just after the last delimiter if any */ |
340
|
|
|
|
|
|
|
// /* basic code points were copied; start at the beginning otherwise. */ |
341
|
|
|
|
|
|
|
// |
342
|
|
|
|
|
|
|
// for (in = b > 0 ? b + 1 : 0; in < input_length; ++out) { |
343
|
|
|
|
|
|
|
// |
344
|
|
|
|
|
|
|
// /* in is the index of the next character to be consumed, and */ |
345
|
|
|
|
|
|
|
// /* out is the number of code points in the output array. */ |
346
|
|
|
|
|
|
|
// |
347
|
|
|
|
|
|
|
// /* Decode a generalized variable-length integer into delta, */ |
348
|
|
|
|
|
|
|
// /* which gets added to i. The overflow checking is easier */ |
349
|
|
|
|
|
|
|
// /* if we increase i as we go, then subtract off its starting */ |
350
|
|
|
|
|
|
|
// /* value at the end to obtain delta. */ |
351
|
|
|
|
|
|
|
// |
352
|
|
|
|
|
|
|
// for (oldi = i, w = 1, k = base; ; k += base) { |
353
|
|
|
|
|
|
|
// if (in >= input_length) return punycode_bad_input; |
354
|
|
|
|
|
|
|
// digit = decode_digit(input[in++]); |
355
|
|
|
|
|
|
|
// if (digit >= base) return punycode_bad_input; |
356
|
|
|
|
|
|
|
// if (digit > (maxint - i) / w) return punycode_overflow; |
357
|
|
|
|
|
|
|
// i += digit * w; |
358
|
|
|
|
|
|
|
// t = k <= bias /* + tmin */ ? tmin : /* +tmin not needed */ |
359
|
|
|
|
|
|
|
// k >= bias + tmax ? tmax : k - bias; |
360
|
|
|
|
|
|
|
// if (digit < t) break; |
361
|
|
|
|
|
|
|
// if (w > maxint / (base - t)) return punycode_overflow; |
362
|
|
|
|
|
|
|
// w *= (base - t); |
363
|
|
|
|
|
|
|
// } |
364
|
|
|
|
|
|
|
// |
365
|
|
|
|
|
|
|
// bias = adapt(i - oldi, out + 1, oldi == 0); |
366
|
|
|
|
|
|
|
// |
367
|
|
|
|
|
|
|
// /* i was supposed to wrap around from out+1 to 0, */ |
368
|
|
|
|
|
|
|
// /* incrementing n each time, so we'll fix that now: */ |
369
|
|
|
|
|
|
|
// |
370
|
|
|
|
|
|
|
// if (i / (out + 1) > maxint - n) return punycode_overflow; |
371
|
|
|
|
|
|
|
// n += i / (out + 1); |
372
|
|
|
|
|
|
|
// i %= (out + 1); |
373
|
|
|
|
|
|
|
// |
374
|
|
|
|
|
|
|
// /* Insert n at position i of the output: */ |
375
|
|
|
|
|
|
|
// |
376
|
|
|
|
|
|
|
// /* not needed for Punycode: */ |
377
|
|
|
|
|
|
|
// /* if (decode_digit(n) <= base) return punycode_invalid_input; */ |
378
|
|
|
|
|
|
|
// if (out >= max_out) return punycode_big_output; |
379
|
|
|
|
|
|
|
// |
380
|
|
|
|
|
|
|
// if (case_flags) { |
381
|
|
|
|
|
|
|
// memmove(case_flags + i + 1, case_flags + i, out - i); |
382
|
|
|
|
|
|
|
// |
383
|
|
|
|
|
|
|
// /* Case of last character determines uppercase flag: */ |
384
|
|
|
|
|
|
|
// case_flags[i] = flagged(input[in - 1]); |
385
|
|
|
|
|
|
|
// } |
386
|
|
|
|
|
|
|
// |
387
|
|
|
|
|
|
|
// memmove(output + i + 1, output + i, (out - i) * sizeof *output); |
388
|
|
|
|
|
|
|
// output[i++] = n; |
389
|
|
|
|
|
|
|
// } |
390
|
|
|
|
|
|
|
// |
391
|
|
|
|
|
|
|
// *output_length = out; |
392
|
|
|
|
|
|
|
// return punycode_success; |
393
|
|
|
|
|
|
|
//} |
394
|
|
|
|
|
|
|
// |
395
|
|
|
|
|
|
|
// |