| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
/* |
|
2
|
|
|
|
|
|
|
Copyright (C) 2016-2017 Alexander Borisov |
|
3
|
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or |
|
5
|
|
|
|
|
|
|
modify it under the terms of the GNU Lesser General Public |
|
6
|
|
|
|
|
|
|
License as published by the Free Software Foundation; either |
|
7
|
|
|
|
|
|
|
version 2.1 of the License, or (at your option) any later version. |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful, |
|
10
|
|
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
11
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
12
|
|
|
|
|
|
|
Lesser General Public License for more details. |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public |
|
15
|
|
|
|
|
|
|
License along with this library; if not, write to the Free Software |
|
16
|
|
|
|
|
|
|
Foundation, Inc., 51 Franklin avl_treet, Fifth Floor, Boston, MA 02110-1301 USA |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
Author: lex.borisov@gmail.com (Alexander Borisov) |
|
19
|
|
|
|
|
|
|
*/ |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
#include "myurl/punycode.h" |
|
22
|
|
|
|
|
|
|
|
|
23
|
0
|
|
|
|
|
|
mystatus_t myurl_punycode_encode_with_callback(const unsigned char* data, size_t data_size, mycore_string_raw_t* str_raw) |
|
24
|
|
|
|
|
|
|
{ |
|
25
|
0
|
|
|
|
|
|
size_t i = 0; |
|
26
|
0
|
|
|
|
|
|
size_t cp_count = 0; |
|
27
|
|
|
|
|
|
|
|
|
28
|
0
|
0
|
|
|
|
|
while(i < data_size) { |
|
29
|
0
|
|
|
|
|
|
size_t n = myencoding_ascii_utf_8_length(data[i]); |
|
30
|
|
|
|
|
|
|
|
|
31
|
0
|
0
|
|
|
|
|
if(n == 1) { |
|
32
|
0
|
|
|
|
|
|
str_raw->data[ str_raw->length++ ] = data[i]; |
|
33
|
|
|
|
|
|
|
} |
|
34
|
0
|
0
|
|
|
|
|
else if(n == 0) |
|
35
|
0
|
|
|
|
|
|
return MyURL_STATUS_ERROR; |
|
36
|
|
|
|
|
|
|
|
|
37
|
0
|
|
|
|
|
|
cp_count++; |
|
38
|
0
|
|
|
|
|
|
i += n; |
|
39
|
|
|
|
|
|
|
} |
|
40
|
|
|
|
|
|
|
|
|
41
|
0
|
0
|
|
|
|
|
if(str_raw->length) { |
|
42
|
0
|
|
|
|
|
|
str_raw->data[ str_raw->length++ ] = MyURL_PUNYCODE_CONST_DELIMITER; |
|
43
|
|
|
|
|
|
|
} |
|
44
|
|
|
|
|
|
|
|
|
45
|
0
|
0
|
|
|
|
|
while(i < data_size) { |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
} |
|
48
|
|
|
|
|
|
|
|
|
49
|
0
|
|
|
|
|
|
return MyURL_STATUS_OK; |
|
50
|
|
|
|
|
|
|
} |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
///* |
|
54
|
|
|
|
|
|
|
// punycode.c from RFC 3492 |
|
55
|
|
|
|
|
|
|
// http://www.nicemice.net/idn/ |
|
56
|
|
|
|
|
|
|
// Adam M. Costello |
|
57
|
|
|
|
|
|
|
// http://www.nicemice.net/amc/ |
|
58
|
|
|
|
|
|
|
// |
|
59
|
|
|
|
|
|
|
// This is ANSI C code (C89) implementing Punycode (RFC 3492). |
|
60
|
|
|
|
|
|
|
// |
|
61
|
|
|
|
|
|
|
// */ |
|
62
|
|
|
|
|
|
|
// |
|
63
|
|
|
|
|
|
|
// |
|
64
|
|
|
|
|
|
|
///************************************************************/ |
|
65
|
|
|
|
|
|
|
///* Public interface (would normally go in its own .h file): */ |
|
66
|
|
|
|
|
|
|
// |
|
67
|
|
|
|
|
|
|
//#include |
|
68
|
|
|
|
|
|
|
// |
|
69
|
|
|
|
|
|
|
//enum punycode_status { |
|
70
|
|
|
|
|
|
|
// punycode_success, |
|
71
|
|
|
|
|
|
|
// punycode_bad_input, /* Input is invalid. */ |
|
72
|
|
|
|
|
|
|
// punycode_big_output, /* Output would exceed the space provided. */ |
|
73
|
|
|
|
|
|
|
// punycode_overflow /* Input needs wider integers to process. */ |
|
74
|
|
|
|
|
|
|
//}; |
|
75
|
|
|
|
|
|
|
// |
|
76
|
|
|
|
|
|
|
//#if UINT_MAX >= (1 << 26) - 1 |
|
77
|
|
|
|
|
|
|
//typedef unsigned int punycode_uint; |
|
78
|
|
|
|
|
|
|
//#else |
|
79
|
|
|
|
|
|
|
//typedef unsigned long punycode_uint; |
|
80
|
|
|
|
|
|
|
//#endif |
|
81
|
|
|
|
|
|
|
// |
|
82
|
|
|
|
|
|
|
//enum punycode_status punycode_encode(punycode_uint input_length, |
|
83
|
|
|
|
|
|
|
// const punycode_uint input[], |
|
84
|
|
|
|
|
|
|
// const unsigned char case_flags[], |
|
85
|
|
|
|
|
|
|
// punycode_uint *output_length, |
|
86
|
|
|
|
|
|
|
// char output[] ); |
|
87
|
|
|
|
|
|
|
// |
|
88
|
|
|
|
|
|
|
///* punycode_encode() converts Unicode to Punycode. The input */ |
|
89
|
|
|
|
|
|
|
///* is represented as an array of Unicode code points (not code */ |
|
90
|
|
|
|
|
|
|
///* units; surrogate pairs are not allowed), and the output */ |
|
91
|
|
|
|
|
|
|
///* will be represented as an array of ASCII code points. The */ |
|
92
|
|
|
|
|
|
|
///* output string is *not* null-terminated; it will contain */ |
|
93
|
|
|
|
|
|
|
///* zeros if and only if the input contains zeros. (Of course */ |
|
94
|
|
|
|
|
|
|
///* the caller can leave room for a terminator and add one if */ |
|
95
|
|
|
|
|
|
|
///* needed.) The input_length is the number of code points in */ |
|
96
|
|
|
|
|
|
|
///* the input. The output_length is an in/out argument: the */ |
|
97
|
|
|
|
|
|
|
///* caller passes in the maximum number of code points that it */ |
|
98
|
|
|
|
|
|
|
///* can receive, and on successful return it will contain the */ |
|
99
|
|
|
|
|
|
|
///* number of code points actually output. The case_flags array */ |
|
100
|
|
|
|
|
|
|
///* holds input_length boolean values, where nonzero suggests that */ |
|
101
|
|
|
|
|
|
|
///* the corresponding Unicode character be forced to uppercase */ |
|
102
|
|
|
|
|
|
|
///* after being decoded (if possible), and zero suggests that */ |
|
103
|
|
|
|
|
|
|
///* it be forced to lowercase (if possible). ASCII code points */ |
|
104
|
|
|
|
|
|
|
///* are encoded literally, except that ASCII letters are forced */ |
|
105
|
|
|
|
|
|
|
///* to uppercase or lowercase according to the corresponding */ |
|
106
|
|
|
|
|
|
|
///* uppercase flags. If case_flags is a null pointer then ASCII */ |
|
107
|
|
|
|
|
|
|
///* letters are left as they are, and other code points are */ |
|
108
|
|
|
|
|
|
|
///* treated as if their uppercase flags were zero. The return */ |
|
109
|
|
|
|
|
|
|
///* value can be any of the punycode_status values defined above */ |
|
110
|
|
|
|
|
|
|
///* except punycode_bad_input; if not punycode_success, then */ |
|
111
|
|
|
|
|
|
|
///* output_size and output might contain garbage. */ |
|
112
|
|
|
|
|
|
|
// |
|
113
|
|
|
|
|
|
|
//enum punycode_status punycode_decode(punycode_uint input_length, |
|
114
|
|
|
|
|
|
|
// const char input[], |
|
115
|
|
|
|
|
|
|
// punycode_uint *output_length, |
|
116
|
|
|
|
|
|
|
// punycode_uint output[], |
|
117
|
|
|
|
|
|
|
// unsigned char case_flags[] ); |
|
118
|
|
|
|
|
|
|
// |
|
119
|
|
|
|
|
|
|
///* punycode_decode() converts Punycode to Unicode. The input is */ |
|
120
|
|
|
|
|
|
|
///* represented as an array of ASCII code points, and the output */ |
|
121
|
|
|
|
|
|
|
///* will be represented as an array of Unicode code points. The */ |
|
122
|
|
|
|
|
|
|
///* input_length is the number of code points in the input. The */ |
|
123
|
|
|
|
|
|
|
///* output_length is an in/out argument: the caller passes in */ |
|
124
|
|
|
|
|
|
|
///* the maximum number of code points that it can receive, and */ |
|
125
|
|
|
|
|
|
|
///* on successful return it will contain the actual number of */ |
|
126
|
|
|
|
|
|
|
///* code points output. The case_flags array needs room for at */ |
|
127
|
|
|
|
|
|
|
///* least output_length values, or it can be a null pointer if the */ |
|
128
|
|
|
|
|
|
|
///* case information is not needed. A nonzero flag suggests that */ |
|
129
|
|
|
|
|
|
|
///* the corresponding Unicode character be forced to uppercase */ |
|
130
|
|
|
|
|
|
|
///* by the caller (if possible), while zero suggests that it be */ |
|
131
|
|
|
|
|
|
|
///* forced to lowercase (if possible). ASCII code points are */ |
|
132
|
|
|
|
|
|
|
///* output already in the proper case, but their flags will be set */ |
|
133
|
|
|
|
|
|
|
///* appropriately so that applying the flags would be harmless. */ |
|
134
|
|
|
|
|
|
|
///* The return value can be any of the punycode_status values */ |
|
135
|
|
|
|
|
|
|
///* defined above; if not punycode_success, then output_length, */ |
|
136
|
|
|
|
|
|
|
///* output, and case_flags might contain garbage. On success, the */ |
|
137
|
|
|
|
|
|
|
///* decoder will never need to write an output_length greater than */ |
|
138
|
|
|
|
|
|
|
///* input_length, because of how the encoding is defined. */ |
|
139
|
|
|
|
|
|
|
// |
|
140
|
|
|
|
|
|
|
///**********************************************************/ |
|
141
|
|
|
|
|
|
|
///* Implementation (would normally go in its own .c file): */ |
|
142
|
|
|
|
|
|
|
// |
|
143
|
|
|
|
|
|
|
//#include |
|
144
|
|
|
|
|
|
|
// |
|
145
|
|
|
|
|
|
|
///*** Bootstring parameters for Punycode ***/ |
|
146
|
|
|
|
|
|
|
// |
|
147
|
|
|
|
|
|
|
//enum { base = 36, tmin = 1, tmax = 26, skew = 38, damp = 700, |
|
148
|
|
|
|
|
|
|
// initial_bias = 72, initial_n = 0x80, delimiter = 0x2D }; |
|
149
|
|
|
|
|
|
|
// |
|
150
|
|
|
|
|
|
|
///* basic(cp) tests whether cp is a basic code point: */ |
|
151
|
|
|
|
|
|
|
//#define basic(cp) ((punycode_uint)(cp) < 0x80) |
|
152
|
|
|
|
|
|
|
// |
|
153
|
|
|
|
|
|
|
///* delim(cp) tests whether cp is a delimiter: */ |
|
154
|
|
|
|
|
|
|
//#define delim(cp) ((cp) == delimiter) |
|
155
|
|
|
|
|
|
|
// |
|
156
|
|
|
|
|
|
|
///* decode_digit(cp) returns the numeric value of a basic code */ |
|
157
|
|
|
|
|
|
|
///* point (for use in representing integers) in the range 0 to */ |
|
158
|
|
|
|
|
|
|
///* base-1, or base if cp is does not represent a value. */ |
|
159
|
|
|
|
|
|
|
// |
|
160
|
|
|
|
|
|
|
//static punycode_uint decode_digit(punycode_uint cp) |
|
161
|
|
|
|
|
|
|
//{ |
|
162
|
|
|
|
|
|
|
// return cp - 48 < 10 ? cp - 22 : cp - 65 < 26 ? cp - 65 : |
|
163
|
|
|
|
|
|
|
// cp - 97 < 26 ? cp - 97 : base; |
|
164
|
|
|
|
|
|
|
//} |
|
165
|
|
|
|
|
|
|
// |
|
166
|
|
|
|
|
|
|
///* encode_digit(d,flag) returns the basic code point whose value */ |
|
167
|
|
|
|
|
|
|
///* (when used for representing integers) is d, which needs to be in */ |
|
168
|
|
|
|
|
|
|
///* the range 0 to base-1. The lowercase form is used unless flag is */ |
|
169
|
|
|
|
|
|
|
///* nonzero, in which case the uppercase form is used. The behavior */ |
|
170
|
|
|
|
|
|
|
///* is undefined if flag is nonzero and digit d has no uppercase form. */ |
|
171
|
|
|
|
|
|
|
// |
|
172
|
|
|
|
|
|
|
//static char encode_digit(punycode_uint d, int flag) |
|
173
|
|
|
|
|
|
|
//{ |
|
174
|
|
|
|
|
|
|
// return d + 22 + 75 * (d < 26) - ((flag != 0) << 5); |
|
175
|
|
|
|
|
|
|
// /* 0..25 map to ASCII a..z or A..Z */ |
|
176
|
|
|
|
|
|
|
// /* 26..35 map to ASCII 0..9 */ |
|
177
|
|
|
|
|
|
|
//} |
|
178
|
|
|
|
|
|
|
// |
|
179
|
|
|
|
|
|
|
///* flagged(bcp) tests whether a basic code point is flagged */ |
|
180
|
|
|
|
|
|
|
///* (uppercase). The behavior is undefined if bcp is not a */ |
|
181
|
|
|
|
|
|
|
///* basic code point. */ |
|
182
|
|
|
|
|
|
|
// |
|
183
|
|
|
|
|
|
|
//#define flagged(bcp) ((punycode_uint)(bcp) - 65 < 26) |
|
184
|
|
|
|
|
|
|
// |
|
185
|
|
|
|
|
|
|
///* encode_basic(bcp,flag) forces a basic code point to lowercase */ |
|
186
|
|
|
|
|
|
|
///* if flag is zero, uppercase if flag is nonzero, and returns */ |
|
187
|
|
|
|
|
|
|
///* the resulting code point. The code point is unchanged if it */ |
|
188
|
|
|
|
|
|
|
///* is caseless. The behavior is undefined if bcp is not a basic */ |
|
189
|
|
|
|
|
|
|
///* code point. */ |
|
190
|
|
|
|
|
|
|
// |
|
191
|
|
|
|
|
|
|
//static char encode_basic(punycode_uint bcp, int flag) |
|
192
|
|
|
|
|
|
|
//{ |
|
193
|
|
|
|
|
|
|
// bcp -= (bcp - 97 < 26) << 5; |
|
194
|
|
|
|
|
|
|
// return bcp + ((!flag && (bcp - 65 < 26)) << 5); |
|
195
|
|
|
|
|
|
|
//} |
|
196
|
|
|
|
|
|
|
// |
|
197
|
|
|
|
|
|
|
///*** Platform-specific constants ***/ |
|
198
|
|
|
|
|
|
|
// |
|
199
|
|
|
|
|
|
|
///* maxint is the maximum value of a punycode_uint variable: */ |
|
200
|
|
|
|
|
|
|
//static const punycode_uint maxint = -1; |
|
201
|
|
|
|
|
|
|
///* Because maxint is unsigned, -1 becomes the maximum value. */ |
|
202
|
|
|
|
|
|
|
// |
|
203
|
|
|
|
|
|
|
///*** Bias adaptation function ***/ |
|
204
|
|
|
|
|
|
|
// |
|
205
|
|
|
|
|
|
|
//static punycode_uint adapt(punycode_uint delta, punycode_uint numpoints, int firsttime) |
|
206
|
|
|
|
|
|
|
//{ |
|
207
|
|
|
|
|
|
|
// punycode_uint k; |
|
208
|
|
|
|
|
|
|
// |
|
209
|
|
|
|
|
|
|
// delta = firsttime ? delta / damp : delta >> 1; |
|
210
|
|
|
|
|
|
|
// /* delta >> 1 is a faster way of doing delta / 2 */ |
|
211
|
|
|
|
|
|
|
// delta += delta / numpoints; |
|
212
|
|
|
|
|
|
|
// |
|
213
|
|
|
|
|
|
|
// for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base) { |
|
214
|
|
|
|
|
|
|
// delta /= base - tmin; |
|
215
|
|
|
|
|
|
|
// } |
|
216
|
|
|
|
|
|
|
// |
|
217
|
|
|
|
|
|
|
// return k + (base - tmin + 1) * delta / (delta + skew); |
|
218
|
|
|
|
|
|
|
//} |
|
219
|
|
|
|
|
|
|
// |
|
220
|
|
|
|
|
|
|
///*** Main encode function ***/ |
|
221
|
|
|
|
|
|
|
// |
|
222
|
|
|
|
|
|
|
//enum punycode_status punycode_encode(punycode_uint input_length, |
|
223
|
|
|
|
|
|
|
// const punycode_uint input[], |
|
224
|
|
|
|
|
|
|
// const unsigned char case_flags[], |
|
225
|
|
|
|
|
|
|
// punycode_uint *output_length, |
|
226
|
|
|
|
|
|
|
// char output[] ) |
|
227
|
|
|
|
|
|
|
//{ |
|
228
|
|
|
|
|
|
|
// punycode_uint n, delta, h, b, out, max_out, bias, j, m, q, k, t; |
|
229
|
|
|
|
|
|
|
// |
|
230
|
|
|
|
|
|
|
// /* Initialize the state: */ |
|
231
|
|
|
|
|
|
|
// |
|
232
|
|
|
|
|
|
|
// n = initial_n; |
|
233
|
|
|
|
|
|
|
// delta = out = 0; |
|
234
|
|
|
|
|
|
|
// max_out = *output_length; |
|
235
|
|
|
|
|
|
|
// bias = initial_bias; |
|
236
|
|
|
|
|
|
|
// |
|
237
|
|
|
|
|
|
|
// /* Handle the basic code points: */ |
|
238
|
|
|
|
|
|
|
// for (j = 0; j < input_length; ++j) { |
|
239
|
|
|
|
|
|
|
// if (basic(input[j])) { |
|
240
|
|
|
|
|
|
|
// if (max_out - out < 2) return punycode_big_output; |
|
241
|
|
|
|
|
|
|
// output[out++] = |
|
242
|
|
|
|
|
|
|
// case_flags ? encode_basic(input[j], case_flags[j]) : input[j]; |
|
243
|
|
|
|
|
|
|
// } |
|
244
|
|
|
|
|
|
|
// /* else if (input[j] < n) return punycode_bad_input; */ |
|
245
|
|
|
|
|
|
|
// /* (not needed for Punycode with unsigned code points) */ |
|
246
|
|
|
|
|
|
|
// } |
|
247
|
|
|
|
|
|
|
// |
|
248
|
|
|
|
|
|
|
// h = b = out; |
|
249
|
|
|
|
|
|
|
// |
|
250
|
|
|
|
|
|
|
// /* h is the number of code points that have been handled, b is the */ |
|
251
|
|
|
|
|
|
|
// /* number of basic code points, and out is the number of characters */ |
|
252
|
|
|
|
|
|
|
// /* that have been output. */ |
|
253
|
|
|
|
|
|
|
// |
|
254
|
|
|
|
|
|
|
// if (b > 0) output[out++] = delimiter; |
|
255
|
|
|
|
|
|
|
// |
|
256
|
|
|
|
|
|
|
// /* Main encoding loop: */ |
|
257
|
|
|
|
|
|
|
// |
|
258
|
|
|
|
|
|
|
// while (h < input_length) { |
|
259
|
|
|
|
|
|
|
// /* All non-basic code points < n have been */ |
|
260
|
|
|
|
|
|
|
// /* handled already. Find the next larger one: */ |
|
261
|
|
|
|
|
|
|
// |
|
262
|
|
|
|
|
|
|
// for (m = maxint, j = 0; j < input_length; ++j) { |
|
263
|
|
|
|
|
|
|
// /* if (basic(input[j])) continue; */ |
|
264
|
|
|
|
|
|
|
// /* (not needed for Punycode) */ |
|
265
|
|
|
|
|
|
|
// if (input[j] >= n && input[j] < m) m = input[j]; |
|
266
|
|
|
|
|
|
|
// } |
|
267
|
|
|
|
|
|
|
// |
|
268
|
|
|
|
|
|
|
// /* Increase delta enough to advance the decoder's */ |
|
269
|
|
|
|
|
|
|
// /* state to , but guard against overflow: */ |
|
270
|
|
|
|
|
|
|
// |
|
271
|
|
|
|
|
|
|
// if (m - n > (maxint - delta) / (h + 1)) return punycode_overflow; |
|
272
|
|
|
|
|
|
|
// delta += (m - n) * (h + 1); |
|
273
|
|
|
|
|
|
|
// n = m; |
|
274
|
|
|
|
|
|
|
// |
|
275
|
|
|
|
|
|
|
// for (j = 0; j < input_length; ++j) { |
|
276
|
|
|
|
|
|
|
// /* Punycode does not need to check whether input[j] is basic: */ |
|
277
|
|
|
|
|
|
|
// if (input[j] < n /* || basic(input[j]) */ ) { |
|
278
|
|
|
|
|
|
|
// if (++delta == 0) return punycode_overflow; |
|
279
|
|
|
|
|
|
|
// } |
|
280
|
|
|
|
|
|
|
// |
|
281
|
|
|
|
|
|
|
// if (input[j] == n) { |
|
282
|
|
|
|
|
|
|
// /* Represent delta as a generalized variable-length integer: */ |
|
283
|
|
|
|
|
|
|
// |
|
284
|
|
|
|
|
|
|
// for (q = delta, k = base; ; k += base) { |
|
285
|
|
|
|
|
|
|
// if (out >= max_out) return punycode_big_output; |
|
286
|
|
|
|
|
|
|
// |
|
287
|
|
|
|
|
|
|
// t = k <= bias /* + tmin */ ? tmin : /* +tmin not needed */ |
|
288
|
|
|
|
|
|
|
// k >= bias + tmax ? tmax : k - bias; |
|
289
|
|
|
|
|
|
|
// if (q < t) break; |
|
290
|
|
|
|
|
|
|
// output[out++] = encode_digit(t + (q - t) % (base - t), 0); |
|
291
|
|
|
|
|
|
|
// q = (q - t) / (base - t); |
|
292
|
|
|
|
|
|
|
// } |
|
293
|
|
|
|
|
|
|
// |
|
294
|
|
|
|
|
|
|
// output[out++] = encode_digit(q, case_flags && case_flags[j]); |
|
295
|
|
|
|
|
|
|
// bias = adapt(delta, h + 1, h == b); |
|
296
|
|
|
|
|
|
|
// delta = 0; |
|
297
|
|
|
|
|
|
|
// ++h; |
|
298
|
|
|
|
|
|
|
// } |
|
299
|
|
|
|
|
|
|
// } |
|
300
|
|
|
|
|
|
|
// |
|
301
|
|
|
|
|
|
|
// ++delta, ++n; |
|
302
|
|
|
|
|
|
|
// } |
|
303
|
|
|
|
|
|
|
// |
|
304
|
|
|
|
|
|
|
// *output_length = out; |
|
305
|
|
|
|
|
|
|
// return punycode_success; |
|
306
|
|
|
|
|
|
|
//} |
|
307
|
|
|
|
|
|
|
// |
|
308
|
|
|
|
|
|
|
///*** Main decode function ***/ |
|
309
|
|
|
|
|
|
|
// |
|
310
|
|
|
|
|
|
|
//enum punycode_status punycode_decode(punycode_uint input_length, |
|
311
|
|
|
|
|
|
|
// const char input[], |
|
312
|
|
|
|
|
|
|
// punycode_uint *output_length, |
|
313
|
|
|
|
|
|
|
// punycode_uint output[], |
|
314
|
|
|
|
|
|
|
// unsigned char case_flags[] ) |
|
315
|
|
|
|
|
|
|
//{ |
|
316
|
|
|
|
|
|
|
// punycode_uint n, out, i, max_out, bias, |
|
317
|
|
|
|
|
|
|
// b, j, in, oldi, w, k, digit, t; |
|
318
|
|
|
|
|
|
|
// |
|
319
|
|
|
|
|
|
|
// /* Initialize the state: */ |
|
320
|
|
|
|
|
|
|
// |
|
321
|
|
|
|
|
|
|
// n = initial_n; |
|
322
|
|
|
|
|
|
|
// out = i = 0; |
|
323
|
|
|
|
|
|
|
// max_out = *output_length; |
|
324
|
|
|
|
|
|
|
// bias = initial_bias; |
|
325
|
|
|
|
|
|
|
// |
|
326
|
|
|
|
|
|
|
// /* Handle the basic code points: Let b be the number of input code */ |
|
327
|
|
|
|
|
|
|
// /* points before the last delimiter, or 0 if there is none, then */ |
|
328
|
|
|
|
|
|
|
// /* copy the first b code points to the output. */ |
|
329
|
|
|
|
|
|
|
// |
|
330
|
|
|
|
|
|
|
// for (b = j = 0; j < input_length; ++j) if (delim(input[j])) b = j; |
|
331
|
|
|
|
|
|
|
// if (b > max_out) return punycode_big_output; |
|
332
|
|
|
|
|
|
|
// |
|
333
|
|
|
|
|
|
|
// for (j = 0; j < b; ++j) { |
|
334
|
|
|
|
|
|
|
// if (case_flags) case_flags[out] = flagged(input[j]); |
|
335
|
|
|
|
|
|
|
// if (!basic(input[j])) return punycode_bad_input; |
|
336
|
|
|
|
|
|
|
// output[out++] = input[j]; |
|
337
|
|
|
|
|
|
|
// } |
|
338
|
|
|
|
|
|
|
// |
|
339
|
|
|
|
|
|
|
// /* Main decoding loop: Start just after the last delimiter if any */ |
|
340
|
|
|
|
|
|
|
// /* basic code points were copied; start at the beginning otherwise. */ |
|
341
|
|
|
|
|
|
|
// |
|
342
|
|
|
|
|
|
|
// for (in = b > 0 ? b + 1 : 0; in < input_length; ++out) { |
|
343
|
|
|
|
|
|
|
// |
|
344
|
|
|
|
|
|
|
// /* in is the index of the next character to be consumed, and */ |
|
345
|
|
|
|
|
|
|
// /* out is the number of code points in the output array. */ |
|
346
|
|
|
|
|
|
|
// |
|
347
|
|
|
|
|
|
|
// /* Decode a generalized variable-length integer into delta, */ |
|
348
|
|
|
|
|
|
|
// /* which gets added to i. The overflow checking is easier */ |
|
349
|
|
|
|
|
|
|
// /* if we increase i as we go, then subtract off its starting */ |
|
350
|
|
|
|
|
|
|
// /* value at the end to obtain delta. */ |
|
351
|
|
|
|
|
|
|
// |
|
352
|
|
|
|
|
|
|
// for (oldi = i, w = 1, k = base; ; k += base) { |
|
353
|
|
|
|
|
|
|
// if (in >= input_length) return punycode_bad_input; |
|
354
|
|
|
|
|
|
|
// digit = decode_digit(input[in++]); |
|
355
|
|
|
|
|
|
|
// if (digit >= base) return punycode_bad_input; |
|
356
|
|
|
|
|
|
|
// if (digit > (maxint - i) / w) return punycode_overflow; |
|
357
|
|
|
|
|
|
|
// i += digit * w; |
|
358
|
|
|
|
|
|
|
// t = k <= bias /* + tmin */ ? tmin : /* +tmin not needed */ |
|
359
|
|
|
|
|
|
|
// k >= bias + tmax ? tmax : k - bias; |
|
360
|
|
|
|
|
|
|
// if (digit < t) break; |
|
361
|
|
|
|
|
|
|
// if (w > maxint / (base - t)) return punycode_overflow; |
|
362
|
|
|
|
|
|
|
// w *= (base - t); |
|
363
|
|
|
|
|
|
|
// } |
|
364
|
|
|
|
|
|
|
// |
|
365
|
|
|
|
|
|
|
// bias = adapt(i - oldi, out + 1, oldi == 0); |
|
366
|
|
|
|
|
|
|
// |
|
367
|
|
|
|
|
|
|
// /* i was supposed to wrap around from out+1 to 0, */ |
|
368
|
|
|
|
|
|
|
// /* incrementing n each time, so we'll fix that now: */ |
|
369
|
|
|
|
|
|
|
// |
|
370
|
|
|
|
|
|
|
// if (i / (out + 1) > maxint - n) return punycode_overflow; |
|
371
|
|
|
|
|
|
|
// n += i / (out + 1); |
|
372
|
|
|
|
|
|
|
// i %= (out + 1); |
|
373
|
|
|
|
|
|
|
// |
|
374
|
|
|
|
|
|
|
// /* Insert n at position i of the output: */ |
|
375
|
|
|
|
|
|
|
// |
|
376
|
|
|
|
|
|
|
// /* not needed for Punycode: */ |
|
377
|
|
|
|
|
|
|
// /* if (decode_digit(n) <= base) return punycode_invalid_input; */ |
|
378
|
|
|
|
|
|
|
// if (out >= max_out) return punycode_big_output; |
|
379
|
|
|
|
|
|
|
// |
|
380
|
|
|
|
|
|
|
// if (case_flags) { |
|
381
|
|
|
|
|
|
|
// memmove(case_flags + i + 1, case_flags + i, out - i); |
|
382
|
|
|
|
|
|
|
// |
|
383
|
|
|
|
|
|
|
// /* Case of last character determines uppercase flag: */ |
|
384
|
|
|
|
|
|
|
// case_flags[i] = flagged(input[in - 1]); |
|
385
|
|
|
|
|
|
|
// } |
|
386
|
|
|
|
|
|
|
// |
|
387
|
|
|
|
|
|
|
// memmove(output + i + 1, output + i, (out - i) * sizeof *output); |
|
388
|
|
|
|
|
|
|
// output[i++] = n; |
|
389
|
|
|
|
|
|
|
// } |
|
390
|
|
|
|
|
|
|
// |
|
391
|
|
|
|
|
|
|
// *output_length = out; |
|
392
|
|
|
|
|
|
|
// return punycode_success; |
|
393
|
|
|
|
|
|
|
//} |
|
394
|
|
|
|
|
|
|
// |
|
395
|
|
|
|
|
|
|
// |