line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
/* |
2
|
|
|
|
|
|
|
Copyright (C) 2015-2017 Alexander Borisov |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or |
5
|
|
|
|
|
|
|
modify it under the terms of the GNU Lesser General Public |
6
|
|
|
|
|
|
|
License as published by the Free Software Foundation; either |
7
|
|
|
|
|
|
|
version 2.1 of the License, or (at your option) any later version. |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful, |
10
|
|
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
11
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12
|
|
|
|
|
|
|
Lesser General Public License for more details. |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public |
15
|
|
|
|
|
|
|
License along with this library; if not, write to the Free Software |
16
|
|
|
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
Author: lex.borisov@gmail.com (Alexander Borisov) |
19
|
|
|
|
|
|
|
*/ |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
#include "myencoding/encoding.h" |
22
|
|
|
|
|
|
|
#include "myencoding/resource.h" |
23
|
|
|
|
|
|
|
#include "mycore/utils/resources.h" |
24
|
|
|
|
|
|
|
|
25
|
12
|
|
|
|
|
|
myencoding_custom_f myencoding_get_function_by_id(myencoding_t idx) |
26
|
|
|
|
|
|
|
{ |
27
|
12
|
|
|
|
|
|
return myencoding_function_index[idx]; |
28
|
|
|
|
|
|
|
} |
29
|
|
|
|
|
|
|
|
30
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_utf_8(unsigned const char data, myencoding_result_t *res) |
31
|
|
|
|
|
|
|
{ |
32
|
|
|
|
|
|
|
// res->first -- lower boundary |
33
|
|
|
|
|
|
|
// res->second -- upper boundary |
34
|
|
|
|
|
|
|
// res->result -- code point |
35
|
|
|
|
|
|
|
// res->third -- bytes seen |
36
|
|
|
|
|
|
|
// res->flag -- bytes needed |
37
|
|
|
|
|
|
|
|
38
|
0
|
0
|
|
|
|
|
if(res->flag == 0) |
39
|
|
|
|
|
|
|
{ |
40
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) { |
41
|
0
|
|
|
|
|
|
res->result = data; |
42
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
43
|
|
|
|
|
|
|
} |
44
|
0
|
0
|
|
|
|
|
else if(data >= 0xC2 && data <= 0xDF) { |
|
|
0
|
|
|
|
|
|
45
|
0
|
|
|
|
|
|
res->flag = 1; |
46
|
0
|
|
|
|
|
|
res->result = data - 0xC0; |
47
|
|
|
|
|
|
|
} |
48
|
0
|
0
|
|
|
|
|
else if(data >= 0xE0 && data <= 0xEF) { |
|
|
0
|
|
|
|
|
|
49
|
0
|
0
|
|
|
|
|
if(data == 0xE0) { |
50
|
0
|
|
|
|
|
|
res->first = 0xA0; |
51
|
|
|
|
|
|
|
} |
52
|
0
|
0
|
|
|
|
|
else if(data == 0xED){ |
53
|
0
|
|
|
|
|
|
res->second = 0x9F; |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
|
56
|
0
|
|
|
|
|
|
res->flag = 2; |
57
|
0
|
|
|
|
|
|
res->result = data - 0xE0; |
58
|
|
|
|
|
|
|
} |
59
|
0
|
0
|
|
|
|
|
else if(data >= 0xF0 && data <= 0xF4) { |
|
|
0
|
|
|
|
|
|
60
|
0
|
0
|
|
|
|
|
if(data == 0xF0) { |
61
|
0
|
|
|
|
|
|
res->first = 0x90; |
62
|
|
|
|
|
|
|
} |
63
|
0
|
0
|
|
|
|
|
else if(data == 0xF4){ |
64
|
0
|
|
|
|
|
|
res->second = 0x8F; |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
|
67
|
0
|
|
|
|
|
|
res->flag = 2; |
68
|
0
|
|
|
|
|
|
res->result = data - 0xF0; |
69
|
|
|
|
|
|
|
} |
70
|
|
|
|
|
|
|
else |
71
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
72
|
|
|
|
|
|
|
|
73
|
0
|
|
|
|
|
|
res->result = res->result << (6 * res->flag); |
74
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_CONTINUE; |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
|
77
|
0
|
0
|
|
|
|
|
if(data < res->first && data > res->second) |
|
|
0
|
|
|
|
|
|
78
|
|
|
|
|
|
|
{ |
79
|
0
|
|
|
|
|
|
res->result = 0x00; |
80
|
0
|
|
|
|
|
|
res->flag = 0x00; |
81
|
0
|
|
|
|
|
|
res->third = 0x00; |
82
|
0
|
|
|
|
|
|
res->first = 0x80; |
83
|
0
|
|
|
|
|
|
res->second = 0xBF; |
84
|
|
|
|
|
|
|
|
85
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_DONE|MyENCODING_STATUS_ERROR; |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
|
88
|
0
|
|
|
|
|
|
res->first = 0x80; |
89
|
0
|
|
|
|
|
|
res->second = 0xBF; |
90
|
|
|
|
|
|
|
|
91
|
0
|
|
|
|
|
|
res->third++; |
92
|
0
|
|
|
|
|
|
res->result += (unsigned long)(data - 0x80) << (6 * (res->flag - res->third)); |
93
|
|
|
|
|
|
|
|
94
|
0
|
0
|
|
|
|
|
if(res->third != res->flag) |
95
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_CONTINUE; |
96
|
|
|
|
|
|
|
|
97
|
0
|
|
|
|
|
|
res->flag = 0x00; |
98
|
0
|
|
|
|
|
|
res->third = 0x00; |
99
|
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_ibm866(unsigned const char data, myencoding_result_t *res) |
104
|
|
|
|
|
|
|
{ |
105
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
106
|
0
|
|
|
|
|
|
(res->result = data); |
107
|
|
|
|
|
|
|
else |
108
|
0
|
|
|
|
|
|
(res->result = myencoding_map_ibm866[(data - 0x80)]); |
109
|
|
|
|
|
|
|
|
110
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_iso_8859_2(unsigned const char data, myencoding_result_t *res) |
114
|
|
|
|
|
|
|
{ |
115
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
116
|
0
|
|
|
|
|
|
(res->result = data); |
117
|
|
|
|
|
|
|
else |
118
|
0
|
|
|
|
|
|
(res->result = myencoding_map_iso_8859_2[(data - 0x80)]); |
119
|
|
|
|
|
|
|
|
120
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
|
123
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_iso_8859_3(unsigned const char data, myencoding_result_t *res) |
124
|
|
|
|
|
|
|
{ |
125
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
126
|
0
|
|
|
|
|
|
(res->result = data); |
127
|
|
|
|
|
|
|
else |
128
|
0
|
|
|
|
|
|
(res->result = myencoding_map_iso_8859_3[(data - 0x80)]); |
129
|
|
|
|
|
|
|
|
130
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
|
133
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_iso_8859_4(unsigned const char data, myencoding_result_t *res) |
134
|
|
|
|
|
|
|
{ |
135
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
136
|
0
|
|
|
|
|
|
(res->result = data); |
137
|
|
|
|
|
|
|
else |
138
|
0
|
|
|
|
|
|
(res->result = myencoding_map_iso_8859_4[(data - 0x80)]); |
139
|
|
|
|
|
|
|
|
140
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_iso_8859_5(unsigned const char data, myencoding_result_t *res) |
144
|
|
|
|
|
|
|
{ |
145
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
146
|
0
|
|
|
|
|
|
(res->result = data); |
147
|
|
|
|
|
|
|
else |
148
|
0
|
|
|
|
|
|
(res->result = myencoding_map_iso_8859_5[(data - 0x80)]); |
149
|
|
|
|
|
|
|
|
150
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_iso_8859_6(unsigned const char data, myencoding_result_t *res) |
154
|
|
|
|
|
|
|
{ |
155
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
156
|
0
|
|
|
|
|
|
(res->result = data); |
157
|
|
|
|
|
|
|
else |
158
|
0
|
|
|
|
|
|
(res->result = myencoding_map_iso_8859_6[(data - 0x80)]); |
159
|
|
|
|
|
|
|
|
160
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_iso_8859_7(unsigned const char data, myencoding_result_t *res) |
164
|
|
|
|
|
|
|
{ |
165
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
166
|
0
|
|
|
|
|
|
(res->result = data); |
167
|
|
|
|
|
|
|
else |
168
|
0
|
|
|
|
|
|
(res->result = myencoding_map_iso_8859_7[(data - 0x80)]); |
169
|
|
|
|
|
|
|
|
170
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_iso_8859_8(unsigned const char data, myencoding_result_t *res) |
174
|
|
|
|
|
|
|
{ |
175
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
176
|
0
|
|
|
|
|
|
(res->result = data); |
177
|
|
|
|
|
|
|
else |
178
|
0
|
|
|
|
|
|
(res->result = myencoding_map_iso_8859_8[(data - 0x80)]); |
179
|
|
|
|
|
|
|
|
180
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_iso_8859_8_i(unsigned const char data, myencoding_result_t *res) |
184
|
|
|
|
|
|
|
{ |
185
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
186
|
0
|
|
|
|
|
|
(res->result = data); |
187
|
|
|
|
|
|
|
else |
188
|
0
|
|
|
|
|
|
(res->result = myencoding_map_iso_8859_8[(data - 0x80)]); |
189
|
|
|
|
|
|
|
|
190
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_iso_8859_10(unsigned const char data, myencoding_result_t *res) |
194
|
|
|
|
|
|
|
{ |
195
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
196
|
0
|
|
|
|
|
|
(res->result = data); |
197
|
|
|
|
|
|
|
else |
198
|
0
|
|
|
|
|
|
(res->result = myencoding_map_iso_8859_10[(data - 0x80)]); |
199
|
|
|
|
|
|
|
|
200
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_iso_8859_13(unsigned const char data, myencoding_result_t *res) |
204
|
|
|
|
|
|
|
{ |
205
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
206
|
0
|
|
|
|
|
|
(res->result = data); |
207
|
|
|
|
|
|
|
else |
208
|
0
|
|
|
|
|
|
(res->result = myencoding_map_iso_8859_13[(data - 0x80)]); |
209
|
|
|
|
|
|
|
|
210
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
|
213
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_iso_8859_14(unsigned const char data, myencoding_result_t *res) |
214
|
|
|
|
|
|
|
{ |
215
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
216
|
0
|
|
|
|
|
|
(res->result = data); |
217
|
|
|
|
|
|
|
else |
218
|
0
|
|
|
|
|
|
(res->result = myencoding_map_iso_8859_14[(data - 0x80)]); |
219
|
|
|
|
|
|
|
|
220
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_iso_8859_15(unsigned const char data, myencoding_result_t *res) |
224
|
|
|
|
|
|
|
{ |
225
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
226
|
0
|
|
|
|
|
|
(res->result = data); |
227
|
|
|
|
|
|
|
else |
228
|
0
|
|
|
|
|
|
(res->result = myencoding_map_iso_8859_15[(data - 0x80)]); |
229
|
|
|
|
|
|
|
|
230
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_iso_8859_16(unsigned const char data, myencoding_result_t *res) |
234
|
|
|
|
|
|
|
{ |
235
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
236
|
0
|
|
|
|
|
|
(res->result = data); |
237
|
|
|
|
|
|
|
else |
238
|
0
|
|
|
|
|
|
(res->result = myencoding_map_iso_8859_16[(data - 0x80)]); |
239
|
|
|
|
|
|
|
|
240
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
36
|
|
|
|
|
|
myencoding_status_t myencoding_decode_koi8_r(unsigned const char data, myencoding_result_t *res) |
244
|
|
|
|
|
|
|
{ |
245
|
36
|
100
|
|
|
|
|
if(data <= 0x7F) |
246
|
24
|
|
|
|
|
|
(res->result = data); |
247
|
|
|
|
|
|
|
else |
248
|
12
|
|
|
|
|
|
(res->result = myencoding_map_koi8_r[(data - 0x80)]); |
249
|
|
|
|
|
|
|
|
250
|
36
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
|
253
|
36
|
|
|
|
|
|
myencoding_status_t myencoding_decode_koi8_u(unsigned const char data, myencoding_result_t *res) |
254
|
|
|
|
|
|
|
{ |
255
|
36
|
100
|
|
|
|
|
if(data <= 0x7F) |
256
|
24
|
|
|
|
|
|
(res->result = data); |
257
|
|
|
|
|
|
|
else |
258
|
12
|
|
|
|
|
|
(res->result = myencoding_map_koi8_u[(data - 0x80)]); |
259
|
|
|
|
|
|
|
|
260
|
36
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
|
263
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_macintosh(unsigned const char data, myencoding_result_t *res) |
264
|
|
|
|
|
|
|
{ |
265
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
266
|
0
|
|
|
|
|
|
(res->result = data); |
267
|
|
|
|
|
|
|
else |
268
|
0
|
|
|
|
|
|
(res->result = myencoding_map_macintosh[(data - 0x80)]); |
269
|
|
|
|
|
|
|
|
270
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_windows_874(unsigned const char data, myencoding_result_t *res) |
274
|
|
|
|
|
|
|
{ |
275
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
276
|
0
|
|
|
|
|
|
(res->result = data); |
277
|
|
|
|
|
|
|
else |
278
|
0
|
|
|
|
|
|
(res->result = myencoding_map_windows_874[(data - 0x80)]); |
279
|
|
|
|
|
|
|
|
280
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
|
283
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_windows_1250(unsigned const char data, myencoding_result_t *res) |
284
|
|
|
|
|
|
|
{ |
285
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
286
|
0
|
|
|
|
|
|
(res->result = data); |
287
|
|
|
|
|
|
|
else |
288
|
0
|
|
|
|
|
|
(res->result = myencoding_map_windows_1250[(data - 0x80)]); |
289
|
|
|
|
|
|
|
|
290
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
36
|
|
|
|
|
|
myencoding_status_t myencoding_decode_windows_1251(unsigned const char data, myencoding_result_t *res) |
294
|
|
|
|
|
|
|
{ |
295
|
36
|
100
|
|
|
|
|
if(data <= 0x7F) |
296
|
24
|
|
|
|
|
|
(res->result = data); |
297
|
|
|
|
|
|
|
else |
298
|
12
|
|
|
|
|
|
(res->result = myencoding_map_windows_1251[(data - 0x80)]); |
299
|
|
|
|
|
|
|
|
300
|
36
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
|
303
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_windows_1252(unsigned const char data, myencoding_result_t *res) |
304
|
|
|
|
|
|
|
{ |
305
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
306
|
0
|
|
|
|
|
|
(res->result = data); |
307
|
|
|
|
|
|
|
else |
308
|
0
|
|
|
|
|
|
(res->result = myencoding_map_windows_1252[(data - 0x80)]); |
309
|
|
|
|
|
|
|
|
310
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
|
313
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_windows_1253(unsigned const char data, myencoding_result_t *res) |
314
|
|
|
|
|
|
|
{ |
315
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
316
|
0
|
|
|
|
|
|
(res->result = data); |
317
|
|
|
|
|
|
|
else |
318
|
0
|
|
|
|
|
|
(res->result = myencoding_map_windows_1253[(data - 0x80)]); |
319
|
|
|
|
|
|
|
|
320
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
321
|
|
|
|
|
|
|
} |
322
|
|
|
|
|
|
|
|
323
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_windows_1254(unsigned const char data, myencoding_result_t *res) |
324
|
|
|
|
|
|
|
{ |
325
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
326
|
0
|
|
|
|
|
|
(res->result = data); |
327
|
|
|
|
|
|
|
else |
328
|
0
|
|
|
|
|
|
(res->result = myencoding_map_windows_1254[(data - 0x80)]); |
329
|
|
|
|
|
|
|
|
330
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
|
333
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_windows_1255(unsigned const char data, myencoding_result_t *res) |
334
|
|
|
|
|
|
|
{ |
335
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
336
|
0
|
|
|
|
|
|
(res->result = data); |
337
|
|
|
|
|
|
|
else |
338
|
0
|
|
|
|
|
|
(res->result = myencoding_map_windows_1255[(data - 0x80)]); |
339
|
|
|
|
|
|
|
|
340
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
341
|
|
|
|
|
|
|
} |
342
|
|
|
|
|
|
|
|
343
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_windows_1256(unsigned const char data, myencoding_result_t *res) |
344
|
|
|
|
|
|
|
{ |
345
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
346
|
0
|
|
|
|
|
|
(res->result = data); |
347
|
|
|
|
|
|
|
else |
348
|
0
|
|
|
|
|
|
(res->result = myencoding_map_windows_1256[(data - 0x80)]); |
349
|
|
|
|
|
|
|
|
350
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
351
|
|
|
|
|
|
|
} |
352
|
|
|
|
|
|
|
|
353
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_windows_1257(unsigned const char data, myencoding_result_t *res) |
354
|
|
|
|
|
|
|
{ |
355
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
356
|
0
|
|
|
|
|
|
(res->result = data); |
357
|
|
|
|
|
|
|
else |
358
|
0
|
|
|
|
|
|
(res->result = myencoding_map_windows_1257[(data - 0x80)]); |
359
|
|
|
|
|
|
|
|
360
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
|
363
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_windows_1258(unsigned const char data, myencoding_result_t *res) |
364
|
|
|
|
|
|
|
{ |
365
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
366
|
0
|
|
|
|
|
|
(res->result = data); |
367
|
|
|
|
|
|
|
else |
368
|
0
|
|
|
|
|
|
(res->result = myencoding_map_windows_1258[(data - 0x80)]); |
369
|
|
|
|
|
|
|
|
370
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
371
|
|
|
|
|
|
|
} |
372
|
|
|
|
|
|
|
|
373
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_x_mac_cyrillic(unsigned const char data, myencoding_result_t *res) |
374
|
|
|
|
|
|
|
{ |
375
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
376
|
0
|
|
|
|
|
|
(res->result = data); |
377
|
|
|
|
|
|
|
else |
378
|
0
|
|
|
|
|
|
(res->result = myencoding_map_x_mac_cyrillic[(data - 0x80)]); |
379
|
|
|
|
|
|
|
|
380
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
381
|
|
|
|
|
|
|
} |
382
|
|
|
|
|
|
|
|
383
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_gbk(unsigned const char data, myencoding_result_t *res) |
384
|
|
|
|
|
|
|
{ |
385
|
0
|
|
|
|
|
|
return myencoding_decode_gb18030(data, res); |
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
|
388
|
0
|
|
|
|
|
|
unsigned long myencoding_index_gb18030_ranges_code_point(unsigned long pointer) |
389
|
|
|
|
|
|
|
{ |
390
|
|
|
|
|
|
|
// step 1 |
391
|
0
|
0
|
|
|
|
|
if((pointer > 39419 && pointer < 189000) || pointer > 1237575) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
392
|
0
|
|
|
|
|
|
return 0; |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
// step 2 |
395
|
0
|
0
|
|
|
|
|
if(pointer == 7457) |
396
|
0
|
|
|
|
|
|
return 0xe7c7; |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
// step 3 |
399
|
0
|
|
|
|
|
|
unsigned long offset = sizeof(myencoding_map_gb18030_ranges) / (sizeof(unsigned long) * 2); |
400
|
|
|
|
|
|
|
|
401
|
0
|
|
|
|
|
|
unsigned long code_point_offset = 0; |
402
|
0
|
0
|
|
|
|
|
while (offset) { |
403
|
0
|
|
|
|
|
|
offset--; |
404
|
|
|
|
|
|
|
|
405
|
0
|
0
|
|
|
|
|
if(myencoding_map_gb18030_ranges[offset][0] == pointer || |
|
|
0
|
|
|
|
|
|
406
|
0
|
|
|
|
|
|
myencoding_map_gb18030_ranges[offset][0] < pointer) |
407
|
|
|
|
|
|
|
{ |
408
|
0
|
|
|
|
|
|
code_point_offset = myencoding_map_gb18030_ranges[offset][1]; |
409
|
0
|
|
|
|
|
|
break; |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
} |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
// step 4 |
414
|
0
|
|
|
|
|
|
return (code_point_offset + pointer - offset); |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
|
417
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_gb18030(unsigned const char data, myencoding_result_t *res) |
418
|
|
|
|
|
|
|
{ |
419
|
0
|
0
|
|
|
|
|
if(res->third) { |
420
|
0
|
0
|
|
|
|
|
if(data >= 0x30 && data <= 0x39) { |
|
|
0
|
|
|
|
|
|
421
|
0
|
|
|
|
|
|
res->result = myencoding_index_gb18030_ranges_code_point((((res->first - 0x81) * 10 + res->second - 0x30) * |
422
|
0
|
|
|
|
|
|
126 + res->third - 0x81) * 10 + data - 0x30); |
423
|
|
|
|
|
|
|
} |
424
|
|
|
|
|
|
|
|
425
|
0
|
|
|
|
|
|
res->first = 0; |
426
|
0
|
|
|
|
|
|
res->second = 0; |
427
|
0
|
|
|
|
|
|
res->third = 0; |
428
|
|
|
|
|
|
|
|
429
|
0
|
0
|
|
|
|
|
if(res->result) |
430
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
431
|
|
|
|
|
|
|
|
432
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
433
|
|
|
|
|
|
|
} |
434
|
0
|
0
|
|
|
|
|
else if(res->second) { |
435
|
0
|
0
|
|
|
|
|
if(data >= 0x81 && data <= 0xFE) { |
|
|
0
|
|
|
|
|
|
436
|
0
|
|
|
|
|
|
res->third = data; |
437
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_CONTINUE; |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
|
440
|
0
|
|
|
|
|
|
res->first = 0; |
441
|
0
|
|
|
|
|
|
res->second = 0; |
442
|
|
|
|
|
|
|
|
443
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
444
|
|
|
|
|
|
|
} |
445
|
0
|
0
|
|
|
|
|
else if(res->first) { |
446
|
0
|
0
|
|
|
|
|
if(data >= 0x30 && data <= 0x39) { |
|
|
0
|
|
|
|
|
|
447
|
0
|
|
|
|
|
|
res->second = data; |
448
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_CONTINUE; |
449
|
|
|
|
|
|
|
} |
450
|
|
|
|
|
|
|
|
451
|
0
|
|
|
|
|
|
unsigned long lead = res->first, pointer = 0x00; |
452
|
0
|
|
|
|
|
|
res->first = 0x00; |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
unsigned char offset; |
455
|
|
|
|
|
|
|
|
456
|
0
|
0
|
|
|
|
|
if(data < 0x7F) |
457
|
0
|
|
|
|
|
|
offset = 0x40; |
458
|
|
|
|
|
|
|
else |
459
|
0
|
|
|
|
|
|
offset = 0x41; |
460
|
|
|
|
|
|
|
|
461
|
0
|
0
|
|
|
|
|
if((data >= 0x40 && data <= 0x7E) || |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
462
|
0
|
0
|
|
|
|
|
(data >= 0x80 && data <= 0xFE)) |
463
|
|
|
|
|
|
|
{ |
464
|
0
|
|
|
|
|
|
pointer = (lead - 0x81) * 190 + (data - offset); |
465
|
|
|
|
|
|
|
} |
466
|
|
|
|
|
|
|
else { |
467
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
468
|
|
|
|
|
|
|
} |
469
|
|
|
|
|
|
|
|
470
|
0
|
|
|
|
|
|
res->result = myencoding_map_gb18030[pointer]; |
471
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
472
|
|
|
|
|
|
|
} |
473
|
|
|
|
|
|
|
|
474
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) { |
475
|
0
|
|
|
|
|
|
res->result = data; |
476
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
477
|
|
|
|
|
|
|
} |
478
|
|
|
|
|
|
|
|
479
|
0
|
0
|
|
|
|
|
if(data == 0x80) { |
480
|
0
|
|
|
|
|
|
res->result = 0x20ac; |
481
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
482
|
|
|
|
|
|
|
} |
483
|
|
|
|
|
|
|
|
484
|
0
|
0
|
|
|
|
|
if(data >= 0x81 && data <= 0xFE) { |
|
|
0
|
|
|
|
|
|
485
|
0
|
|
|
|
|
|
res->first = data; |
486
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_CONTINUE; |
487
|
|
|
|
|
|
|
} |
488
|
|
|
|
|
|
|
|
489
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
|
492
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_big5(unsigned const char data, myencoding_result_t *res) |
493
|
|
|
|
|
|
|
{ |
494
|
0
|
0
|
|
|
|
|
if(res->first) |
495
|
|
|
|
|
|
|
{ |
496
|
0
|
|
|
|
|
|
unsigned long lead = res->first; |
497
|
0
|
|
|
|
|
|
unsigned long pointer = 0x00; |
498
|
|
|
|
|
|
|
unsigned long offset; |
499
|
|
|
|
|
|
|
|
500
|
0
|
|
|
|
|
|
res->first = 0x00; |
501
|
|
|
|
|
|
|
|
502
|
0
|
0
|
|
|
|
|
if(data < 0x7F) |
503
|
0
|
|
|
|
|
|
offset = 0x40; |
504
|
|
|
|
|
|
|
else |
505
|
0
|
|
|
|
|
|
offset = 0x62; |
506
|
|
|
|
|
|
|
|
507
|
0
|
0
|
|
|
|
|
if((data >= 0x40 && data <= 0x7E) || |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
508
|
0
|
0
|
|
|
|
|
(data >= 0xA1 && data <= 0xFE)) |
509
|
|
|
|
|
|
|
{ |
510
|
0
|
|
|
|
|
|
pointer = (lead - 0x81) * 157 + (data - offset); |
511
|
|
|
|
|
|
|
} |
512
|
|
|
|
|
|
|
|
513
|
0
|
|
|
|
|
|
switch (pointer) { |
514
|
|
|
|
|
|
|
case 1133: |
515
|
|
|
|
|
|
|
// U+00CA U+0304 |
516
|
0
|
|
|
|
|
|
res->result = 0x00ca; |
517
|
0
|
|
|
|
|
|
res->result_aux = 0x0304; |
518
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
519
|
|
|
|
|
|
|
case 1135: |
520
|
|
|
|
|
|
|
// U+00CA U+030C |
521
|
0
|
|
|
|
|
|
res->result = 0x00ca; |
522
|
0
|
|
|
|
|
|
res->result_aux = 0x030c; |
523
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
524
|
|
|
|
|
|
|
case 1164: |
525
|
|
|
|
|
|
|
// U+00EA U+0304 |
526
|
0
|
|
|
|
|
|
res->result = 0x00ea; |
527
|
0
|
|
|
|
|
|
res->result_aux = 0x0304; |
528
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
529
|
|
|
|
|
|
|
case 1166: |
530
|
|
|
|
|
|
|
// U+00EA U+030C |
531
|
0
|
|
|
|
|
|
res->result = 0x00ea; |
532
|
0
|
|
|
|
|
|
res->result_aux = 0x030c; |
533
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
534
|
|
|
|
|
|
|
default: |
535
|
0
|
|
|
|
|
|
break; |
536
|
|
|
|
|
|
|
} |
537
|
|
|
|
|
|
|
|
538
|
0
|
0
|
|
|
|
|
if(pointer == 0) |
539
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
540
|
|
|
|
|
|
|
|
541
|
0
|
|
|
|
|
|
res->result = myencoding_map_big5[pointer]; |
542
|
0
|
|
|
|
|
|
res->result_aux = 0; |
543
|
|
|
|
|
|
|
|
544
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
545
|
|
|
|
|
|
|
} |
546
|
|
|
|
|
|
|
|
547
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) { |
548
|
0
|
|
|
|
|
|
res->result = data; |
549
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
550
|
|
|
|
|
|
|
} |
551
|
|
|
|
|
|
|
|
552
|
0
|
0
|
|
|
|
|
if(data >= 0x81 && data <= 0xFE) { |
|
|
0
|
|
|
|
|
|
553
|
0
|
|
|
|
|
|
res->first = data; |
554
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_CONTINUE; |
555
|
|
|
|
|
|
|
} |
556
|
|
|
|
|
|
|
|
557
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
|
560
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_euc_jp(unsigned const char data, myencoding_result_t *res) |
561
|
|
|
|
|
|
|
{ |
562
|
0
|
0
|
|
|
|
|
if(res->first == 0x8E && (data >= 0xA1 && data <= 0xDF)) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
563
|
0
|
|
|
|
|
|
res->first = 0x00; |
564
|
0
|
|
|
|
|
|
res->result = 0xFF61 + data - 0xA1; |
565
|
|
|
|
|
|
|
|
566
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
567
|
|
|
|
|
|
|
} |
568
|
0
|
0
|
|
|
|
|
else if(res->first == 0x8F && (data >= 0xA1 && data <= 0xFE)) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
569
|
0
|
|
|
|
|
|
res->flag = 1; |
570
|
0
|
|
|
|
|
|
res->first = data; |
571
|
|
|
|
|
|
|
|
572
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_CONTINUE; |
573
|
|
|
|
|
|
|
} |
574
|
0
|
0
|
|
|
|
|
else if(res->first) |
575
|
|
|
|
|
|
|
{ |
576
|
0
|
|
|
|
|
|
unsigned long lead = res->first; |
577
|
0
|
|
|
|
|
|
res->first = 0x00; |
578
|
|
|
|
|
|
|
|
579
|
0
|
0
|
|
|
|
|
if((lead >= 0xA1 && lead <= 0xFE) && |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
580
|
0
|
0
|
|
|
|
|
(data >= 0xA1 && data <= 0xFE)) |
581
|
|
|
|
|
|
|
{ |
582
|
0
|
|
|
|
|
|
unsigned long idx = (lead - 0xA1) * 94 + data - 0xA1; |
583
|
0
|
0
|
|
|
|
|
if(res->flag) { |
584
|
0
|
|
|
|
|
|
res->result = myencoding_map_jis0212[idx]; |
585
|
|
|
|
|
|
|
} |
586
|
|
|
|
|
|
|
else { |
587
|
0
|
|
|
|
|
|
res->result = myencoding_map_jis0208[idx]; |
588
|
|
|
|
|
|
|
} |
589
|
|
|
|
|
|
|
|
590
|
0
|
|
|
|
|
|
res->flag = 0; |
591
|
|
|
|
|
|
|
|
592
|
0
|
0
|
|
|
|
|
if(res->result == 0) |
593
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
594
|
|
|
|
|
|
|
|
595
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
596
|
|
|
|
|
|
|
} |
597
|
|
|
|
|
|
|
} |
598
|
|
|
|
|
|
|
|
599
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) { |
600
|
0
|
|
|
|
|
|
res->result = data; |
601
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
602
|
|
|
|
|
|
|
} |
603
|
|
|
|
|
|
|
|
604
|
0
|
0
|
|
|
|
|
if((data >= 0x8E && data <= 0x8F) || |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
605
|
0
|
0
|
|
|
|
|
(data >= 0xA1 && data <= 0xFE)) |
606
|
|
|
|
|
|
|
{ |
607
|
0
|
|
|
|
|
|
res->first = data; |
608
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_CONTINUE; |
609
|
|
|
|
|
|
|
} |
610
|
|
|
|
|
|
|
|
611
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
612
|
|
|
|
|
|
|
} |
613
|
|
|
|
|
|
|
|
614
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_iso_2022_jp(unsigned const char data, myencoding_result_t *res) |
615
|
|
|
|
|
|
|
{ |
616
|
|
|
|
|
|
|
// res->first -- lead |
617
|
|
|
|
|
|
|
// res->second -- state |
618
|
|
|
|
|
|
|
// res->third -- output state |
619
|
|
|
|
|
|
|
// res->flag -- output flag |
620
|
|
|
|
|
|
|
|
621
|
0
|
|
|
|
|
|
switch (res->second) { |
622
|
|
|
|
|
|
|
case 0: // ASCII |
623
|
|
|
|
|
|
|
{ |
624
|
0
|
0
|
|
|
|
|
if(data == 0x1B) { |
625
|
0
|
|
|
|
|
|
res->second = 6; |
626
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_CONTINUE; |
627
|
|
|
|
|
|
|
} |
628
|
0
|
0
|
|
|
|
|
else if((data <= 0x7F) && |
|
|
0
|
|
|
|
|
|
629
|
0
|
0
|
|
|
|
|
data != 0x0E && data != 0x0F && data != 0x1B) |
|
|
0
|
|
|
|
|
|
630
|
|
|
|
|
|
|
{ |
631
|
0
|
|
|
|
|
|
res->flag = 0; |
632
|
0
|
|
|
|
|
|
res->result = data; |
633
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
634
|
|
|
|
|
|
|
} |
635
|
|
|
|
|
|
|
|
636
|
0
|
|
|
|
|
|
res->flag = 0; |
637
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
638
|
|
|
|
|
|
|
} |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
case 1: // Roman |
641
|
|
|
|
|
|
|
{ |
642
|
0
|
0
|
|
|
|
|
if(data == 0x1B) { |
643
|
0
|
|
|
|
|
|
res->second = 6; |
644
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_CONTINUE; |
645
|
|
|
|
|
|
|
} |
646
|
0
|
0
|
|
|
|
|
else if(data == 0x5C) { |
647
|
0
|
|
|
|
|
|
res->flag = 0; |
648
|
0
|
|
|
|
|
|
res->result = 0x00A5; |
649
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
650
|
|
|
|
|
|
|
} |
651
|
0
|
0
|
|
|
|
|
else if(data == 0x7E) { |
652
|
0
|
|
|
|
|
|
res->flag = 0; |
653
|
0
|
|
|
|
|
|
res->result = 0x203E; |
654
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
655
|
|
|
|
|
|
|
} |
656
|
0
|
0
|
|
|
|
|
else if((data <= 0x7F) && |
|
|
0
|
|
|
|
|
|
657
|
0
|
0
|
|
|
|
|
data != 0x0E && data != 0x0F && |
|
|
0
|
|
|
|
|
|
658
|
0
|
0
|
|
|
|
|
data != 0x1B && data != 0x5C && |
|
|
0
|
|
|
|
|
|
659
|
|
|
|
|
|
|
data != 0x7E) |
660
|
|
|
|
|
|
|
{ |
661
|
0
|
|
|
|
|
|
res->flag = 0; |
662
|
0
|
|
|
|
|
|
res->result = data; |
663
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
664
|
|
|
|
|
|
|
} |
665
|
|
|
|
|
|
|
|
666
|
0
|
|
|
|
|
|
res->flag = 0; |
667
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
668
|
|
|
|
|
|
|
} |
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
case 3: // Katakana |
671
|
|
|
|
|
|
|
{ |
672
|
0
|
0
|
|
|
|
|
if(data == 0x1B) { |
673
|
0
|
|
|
|
|
|
res->second = 6; |
674
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_CONTINUE; |
675
|
|
|
|
|
|
|
} |
676
|
0
|
0
|
|
|
|
|
else if(data >= 0x21 && data <= 0x5F) { |
|
|
0
|
|
|
|
|
|
677
|
0
|
|
|
|
|
|
res->flag = 0; |
678
|
0
|
|
|
|
|
|
res->result = 0xFF61 + data - 0x21; |
679
|
|
|
|
|
|
|
|
680
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
681
|
|
|
|
|
|
|
} |
682
|
|
|
|
|
|
|
|
683
|
0
|
|
|
|
|
|
res->flag = 0; |
684
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
685
|
|
|
|
|
|
|
} |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
case 4: // Lead byte |
688
|
|
|
|
|
|
|
{ |
689
|
0
|
0
|
|
|
|
|
if(data == 0x1B) { |
690
|
0
|
|
|
|
|
|
res->second = 6; |
691
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_CONTINUE; |
692
|
|
|
|
|
|
|
} |
693
|
0
|
0
|
|
|
|
|
else if(data >= 0x21 && data <= 0x7E) { |
|
|
0
|
|
|
|
|
|
694
|
0
|
|
|
|
|
|
res->flag = 0; |
695
|
0
|
|
|
|
|
|
res->first = data; |
696
|
0
|
|
|
|
|
|
res->second = 5; |
697
|
|
|
|
|
|
|
|
698
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_CONTINUE; |
699
|
|
|
|
|
|
|
} |
700
|
|
|
|
|
|
|
|
701
|
0
|
|
|
|
|
|
res->flag = 0; |
702
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
703
|
|
|
|
|
|
|
} |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
case 5: // Trail byte |
706
|
|
|
|
|
|
|
{ |
707
|
0
|
0
|
|
|
|
|
if(data == 0x1B) { |
708
|
0
|
|
|
|
|
|
res->second = 6; |
709
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_CONTINUE|MyENCODING_STATUS_ERROR; |
710
|
|
|
|
|
|
|
} |
711
|
0
|
0
|
|
|
|
|
else if(data >= 0x21 && data <= 0x7E) { |
|
|
0
|
|
|
|
|
|
712
|
0
|
|
|
|
|
|
res->second = 4; |
713
|
|
|
|
|
|
|
|
714
|
0
|
|
|
|
|
|
unsigned long pointer = (res->first - 0x21) * 94 + data - 0x21; |
715
|
0
|
|
|
|
|
|
res->result = myencoding_map_jis0208[pointer]; |
716
|
|
|
|
|
|
|
|
717
|
0
|
0
|
|
|
|
|
if(res->result == 0) |
718
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
719
|
|
|
|
|
|
|
|
720
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
721
|
|
|
|
|
|
|
} |
722
|
|
|
|
|
|
|
|
723
|
0
|
|
|
|
|
|
res->second = 4; |
724
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
725
|
|
|
|
|
|
|
} |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
case 6: // Escape start |
728
|
|
|
|
|
|
|
{ |
729
|
0
|
0
|
|
|
|
|
if(data == 0x24 || data == 0x28) { |
|
|
0
|
|
|
|
|
|
730
|
0
|
|
|
|
|
|
res->first = data; |
731
|
0
|
|
|
|
|
|
res->second = 7; |
732
|
|
|
|
|
|
|
|
733
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_CONTINUE; |
734
|
|
|
|
|
|
|
} |
735
|
|
|
|
|
|
|
|
736
|
0
|
|
|
|
|
|
res->flag = 0; |
737
|
0
|
|
|
|
|
|
res->second = res->third; |
738
|
|
|
|
|
|
|
|
739
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
740
|
|
|
|
|
|
|
} |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
case 7: // Escape |
743
|
|
|
|
|
|
|
{ |
744
|
0
|
|
|
|
|
|
unsigned long lead = res->first; |
745
|
0
|
|
|
|
|
|
res->first = 0x00; |
746
|
|
|
|
|
|
|
|
747
|
0
|
|
|
|
|
|
res->second = 0x00; |
748
|
|
|
|
|
|
|
|
749
|
0
|
0
|
|
|
|
|
if(lead == 0x28 && data == 0x42) { |
|
|
0
|
|
|
|
|
|
750
|
0
|
|
|
|
|
|
res->second = 0; |
751
|
|
|
|
|
|
|
} |
752
|
0
|
0
|
|
|
|
|
else if(lead == 0x28 && data == 0x4A) { |
|
|
0
|
|
|
|
|
|
753
|
0
|
|
|
|
|
|
res->second = 1; |
754
|
|
|
|
|
|
|
} |
755
|
0
|
0
|
|
|
|
|
else if(lead == 0x28 && data == 0x49) { |
|
|
0
|
|
|
|
|
|
756
|
0
|
|
|
|
|
|
res->second = 2; |
757
|
|
|
|
|
|
|
} |
758
|
0
|
0
|
|
|
|
|
else if(lead == 0x24 && (data == 0x40 || data == 0x42)) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
759
|
0
|
|
|
|
|
|
res->second = 4; |
760
|
|
|
|
|
|
|
} |
761
|
|
|
|
|
|
|
|
762
|
0
|
0
|
|
|
|
|
if(res->second) |
763
|
|
|
|
|
|
|
{ |
764
|
0
|
|
|
|
|
|
res->third = res->second; |
765
|
|
|
|
|
|
|
|
766
|
0
|
|
|
|
|
|
unsigned long output_flag = res->flag; |
767
|
0
|
|
|
|
|
|
res->flag = 1; |
768
|
|
|
|
|
|
|
|
769
|
0
|
0
|
|
|
|
|
if(output_flag) |
770
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
771
|
|
|
|
|
|
|
|
772
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_CONTINUE; |
773
|
|
|
|
|
|
|
} |
774
|
|
|
|
|
|
|
|
775
|
0
|
|
|
|
|
|
res->flag = 0; |
776
|
0
|
|
|
|
|
|
res->second = res->third; |
777
|
|
|
|
|
|
|
|
778
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
779
|
|
|
|
|
|
|
} |
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
default: |
782
|
0
|
|
|
|
|
|
break; |
783
|
|
|
|
|
|
|
} |
784
|
|
|
|
|
|
|
|
785
|
0
|
|
|
|
|
|
return 0; |
786
|
|
|
|
|
|
|
} |
787
|
|
|
|
|
|
|
|
788
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_shift_jis(unsigned const char data, myencoding_result_t *res) |
789
|
|
|
|
|
|
|
{ |
790
|
|
|
|
|
|
|
// res->first -- lead |
791
|
|
|
|
|
|
|
|
792
|
0
|
0
|
|
|
|
|
if(res->first) |
793
|
|
|
|
|
|
|
{ |
794
|
|
|
|
|
|
|
unsigned long offset; |
795
|
|
|
|
|
|
|
unsigned long lead_offset; |
796
|
|
|
|
|
|
|
|
797
|
0
|
0
|
|
|
|
|
if(data < 0x7F) |
798
|
0
|
|
|
|
|
|
offset = 0x40; |
799
|
|
|
|
|
|
|
else |
800
|
0
|
|
|
|
|
|
offset = 0x41; |
801
|
|
|
|
|
|
|
|
802
|
0
|
0
|
|
|
|
|
if(res->first < 0xA0) |
803
|
0
|
|
|
|
|
|
lead_offset = 0x81; |
804
|
|
|
|
|
|
|
else |
805
|
0
|
|
|
|
|
|
lead_offset = 0xC1; |
806
|
|
|
|
|
|
|
|
807
|
0
|
|
|
|
|
|
unsigned long pointer = 0x00; |
808
|
0
|
0
|
|
|
|
|
if((data >= 0x40 && data <= 0x7E) || |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
809
|
0
|
0
|
|
|
|
|
(data >= 0x80 && data <= 0xFC)) |
810
|
|
|
|
|
|
|
{ |
811
|
0
|
|
|
|
|
|
pointer = (res->first - lead_offset) * 188 + data - offset; |
812
|
|
|
|
|
|
|
} |
813
|
|
|
|
|
|
|
|
814
|
0
|
0
|
|
|
|
|
if(pointer) |
815
|
0
|
|
|
|
|
|
res->result = myencoding_map_jis0208[pointer]; |
816
|
|
|
|
|
|
|
else |
817
|
0
|
|
|
|
|
|
res->result = 0x00; |
818
|
|
|
|
|
|
|
|
819
|
0
|
0
|
|
|
|
|
if(res->result == 0x00 && (pointer >= 8836 && pointer <= 10528)) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
820
|
0
|
|
|
|
|
|
res->result = 0xE000 + pointer - 8836; |
821
|
|
|
|
|
|
|
} |
822
|
|
|
|
|
|
|
|
823
|
0
|
0
|
|
|
|
|
if(res->result) |
824
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
825
|
|
|
|
|
|
|
|
826
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
827
|
|
|
|
|
|
|
} |
828
|
|
|
|
|
|
|
|
829
|
0
|
0
|
|
|
|
|
if((data <= 0x7F) || data == 0x80) { |
|
|
0
|
|
|
|
|
|
830
|
0
|
|
|
|
|
|
res->result = data; |
831
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
832
|
|
|
|
|
|
|
} |
833
|
|
|
|
|
|
|
|
834
|
0
|
0
|
|
|
|
|
if(data >= 0xA1 && data <= 0xDF) { |
|
|
0
|
|
|
|
|
|
835
|
0
|
|
|
|
|
|
res->result = 0xFF61 + data - 0xA1; |
836
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
837
|
|
|
|
|
|
|
} |
838
|
|
|
|
|
|
|
|
839
|
0
|
0
|
|
|
|
|
if((data >= 0x81 && data <= 0x9F) || |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
840
|
0
|
0
|
|
|
|
|
(data >= 0xE0 && data <= 0xFC)) { |
841
|
0
|
|
|
|
|
|
res->first = data; |
842
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_CONTINUE; |
843
|
|
|
|
|
|
|
} |
844
|
|
|
|
|
|
|
|
845
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
846
|
|
|
|
|
|
|
} |
847
|
|
|
|
|
|
|
|
848
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_euc_kr(unsigned const char data, myencoding_result_t *res) |
849
|
|
|
|
|
|
|
{ |
850
|
|
|
|
|
|
|
// res->first -- lead |
851
|
|
|
|
|
|
|
|
852
|
0
|
0
|
|
|
|
|
if(res->first) |
853
|
|
|
|
|
|
|
{ |
854
|
0
|
|
|
|
|
|
unsigned long lead = res->first, pointer = 0x00; |
855
|
0
|
|
|
|
|
|
res->first = 0x00; |
856
|
|
|
|
|
|
|
|
857
|
0
|
0
|
|
|
|
|
if(data >= 0x41 && data <= 0xFE) { |
|
|
0
|
|
|
|
|
|
858
|
0
|
|
|
|
|
|
pointer = (lead - 0x81) * 190 + (data - 0x41); |
859
|
|
|
|
|
|
|
} |
860
|
|
|
|
|
|
|
|
861
|
0
|
0
|
|
|
|
|
if(pointer) |
862
|
0
|
|
|
|
|
|
res->result = myencoding_map_euc_kr[pointer]; |
863
|
|
|
|
|
|
|
|
864
|
0
|
0
|
|
|
|
|
if(res->result) |
865
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
866
|
|
|
|
|
|
|
|
867
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
868
|
|
|
|
|
|
|
} |
869
|
|
|
|
|
|
|
|
870
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) { |
871
|
0
|
|
|
|
|
|
res->result = data; |
872
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
873
|
|
|
|
|
|
|
} |
874
|
|
|
|
|
|
|
|
875
|
0
|
0
|
|
|
|
|
if(data >= 0x81 && data <= 0xFE) { |
|
|
0
|
|
|
|
|
|
876
|
0
|
|
|
|
|
|
res->first = data; |
877
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_CONTINUE; |
878
|
|
|
|
|
|
|
} |
879
|
|
|
|
|
|
|
|
880
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
881
|
|
|
|
|
|
|
} |
882
|
|
|
|
|
|
|
|
883
|
72
|
|
|
|
|
|
myencoding_status_t myencoding_decode_shared_utf_16(unsigned const char data, myencoding_result_t *res) |
884
|
|
|
|
|
|
|
{ |
885
|
|
|
|
|
|
|
// res->first -- lead |
886
|
|
|
|
|
|
|
// res->second -- lead surrogate |
887
|
|
|
|
|
|
|
// res->flag -- flag |
888
|
|
|
|
|
|
|
|
889
|
72
|
100
|
|
|
|
|
if(res->first == 0x00) { |
890
|
36
|
|
|
|
|
|
res->first = data; |
891
|
36
|
|
|
|
|
|
return MyENCODING_STATUS_CONTINUE; |
892
|
|
|
|
|
|
|
} |
893
|
|
|
|
|
|
|
|
894
|
|
|
|
|
|
|
unsigned long code_unit; |
895
|
36
|
50
|
|
|
|
|
if(res->flag) |
896
|
0
|
|
|
|
|
|
(code_unit = (res->first << 8) + data); |
897
|
|
|
|
|
|
|
else |
898
|
36
|
|
|
|
|
|
(code_unit = (unsigned long)(data << 8) + res->first); |
899
|
|
|
|
|
|
|
|
900
|
36
|
|
|
|
|
|
res->first = 0x00; |
901
|
|
|
|
|
|
|
|
902
|
36
|
50
|
|
|
|
|
if(res->second) { |
903
|
0
|
|
|
|
|
|
unsigned long lead_surrogate = res->second; |
904
|
0
|
|
|
|
|
|
res->second = 0x00; |
905
|
|
|
|
|
|
|
|
906
|
0
|
0
|
|
|
|
|
if(code_unit >= 0xDC00 && code_unit <= 0xDFFF) { |
|
|
0
|
|
|
|
|
|
907
|
0
|
|
|
|
|
|
res->result = 0x10000 + ((lead_surrogate - 0xD800) << 10) + (code_unit - 0xDC00); |
908
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
909
|
|
|
|
|
|
|
} |
910
|
|
|
|
|
|
|
|
911
|
0
|
|
|
|
|
|
unsigned char byte1 = (unsigned char)(code_unit >> 8); |
912
|
0
|
|
|
|
|
|
unsigned char byte2 = (unsigned char)(code_unit & 0x00FF); |
913
|
|
|
|
|
|
|
|
914
|
0
|
0
|
|
|
|
|
if(res->flag) { |
915
|
0
|
|
|
|
|
|
res->result = byte1; |
916
|
0
|
|
|
|
|
|
res->result_aux = byte2; |
917
|
|
|
|
|
|
|
} |
918
|
|
|
|
|
|
|
else { |
919
|
0
|
|
|
|
|
|
res->result = byte2; |
920
|
0
|
|
|
|
|
|
res->result_aux = byte1; |
921
|
|
|
|
|
|
|
} |
922
|
|
|
|
|
|
|
|
923
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_DONE|MyENCODING_STATUS_ERROR; |
924
|
|
|
|
|
|
|
} |
925
|
|
|
|
|
|
|
|
926
|
36
|
50
|
|
|
|
|
if(code_unit >= 0xD800 && code_unit <= 0xDBFF) { |
|
|
0
|
|
|
|
|
|
927
|
0
|
|
|
|
|
|
res->second = code_unit; |
928
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_CONTINUE; |
929
|
|
|
|
|
|
|
} |
930
|
|
|
|
|
|
|
|
931
|
36
|
50
|
|
|
|
|
if(code_unit >= 0xDC00 && code_unit <= 0xDFFF) { |
|
|
0
|
|
|
|
|
|
932
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_ERROR; |
933
|
|
|
|
|
|
|
} |
934
|
|
|
|
|
|
|
|
935
|
36
|
|
|
|
|
|
res->result = code_unit; |
936
|
|
|
|
|
|
|
|
937
|
36
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
938
|
|
|
|
|
|
|
} |
939
|
|
|
|
|
|
|
|
940
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_utf_16be(unsigned const char data, myencoding_result_t *res) |
941
|
|
|
|
|
|
|
{ |
942
|
0
|
0
|
|
|
|
|
if(res->flag == 0) |
943
|
0
|
|
|
|
|
|
res->flag = 1; |
944
|
|
|
|
|
|
|
|
945
|
0
|
|
|
|
|
|
return myencoding_decode_shared_utf_16(data, res); |
946
|
|
|
|
|
|
|
} |
947
|
|
|
|
|
|
|
|
948
|
72
|
|
|
|
|
|
myencoding_status_t myencoding_decode_utf_16le(unsigned const char data, myencoding_result_t *res) |
949
|
|
|
|
|
|
|
{ |
950
|
72
|
50
|
|
|
|
|
if(res->flag) |
951
|
0
|
|
|
|
|
|
res->flag = 0; |
952
|
|
|
|
|
|
|
|
953
|
72
|
|
|
|
|
|
return myencoding_decode_shared_utf_16(data, res); |
954
|
|
|
|
|
|
|
} |
955
|
|
|
|
|
|
|
|
956
|
0
|
|
|
|
|
|
myencoding_status_t myencoding_decode_x_user_defined(unsigned const char data, myencoding_result_t *res) |
957
|
|
|
|
|
|
|
{ |
958
|
0
|
0
|
|
|
|
|
if(data <= 0x7F) |
959
|
0
|
|
|
|
|
|
res->result = data; |
960
|
|
|
|
|
|
|
else |
961
|
0
|
|
|
|
|
|
res->result = 0xF780 + data - 0x80; |
962
|
|
|
|
|
|
|
|
963
|
0
|
|
|
|
|
|
return MyENCODING_STATUS_OK; |
964
|
|
|
|
|
|
|
} |
965
|
|
|
|
|
|
|
|
966
|
0
|
|
|
|
|
|
void myencoding_result_clean(myencoding_result_t *res) |
967
|
|
|
|
|
|
|
{ |
968
|
0
|
|
|
|
|
|
memset(res, 0, sizeof(myencoding_result_t)); |
969
|
0
|
|
|
|
|
|
} |
970
|
|
|
|
|
|
|
|
971
|
0
|
|
|
|
|
|
size_t myencoding_codepoint_ascii_length(size_t codepoint) |
972
|
|
|
|
|
|
|
{ |
973
|
0
|
0
|
|
|
|
|
if (codepoint <= 0x0000007F) { |
974
|
0
|
|
|
|
|
|
return 1; |
975
|
|
|
|
|
|
|
} |
976
|
0
|
0
|
|
|
|
|
else if (codepoint <= 0x000007FF) { |
977
|
0
|
|
|
|
|
|
return 2; |
978
|
|
|
|
|
|
|
} |
979
|
0
|
0
|
|
|
|
|
else if (codepoint <= 0x0000FFFF) { |
980
|
0
|
|
|
|
|
|
return 3; |
981
|
|
|
|
|
|
|
} |
982
|
0
|
0
|
|
|
|
|
else if (codepoint <= 0x001FFFFF) { |
983
|
0
|
|
|
|
|
|
return 4; |
984
|
|
|
|
|
|
|
} |
985
|
|
|
|
|
|
|
/* not uses in unicode */ |
986
|
0
|
0
|
|
|
|
|
else if (codepoint <= 0x03FFFFFF) { |
987
|
0
|
|
|
|
|
|
return 5; |
988
|
|
|
|
|
|
|
} |
989
|
0
|
0
|
|
|
|
|
else if (codepoint <= 0x7FFFFFFF) { |
990
|
0
|
|
|
|
|
|
return 6; |
991
|
|
|
|
|
|
|
} |
992
|
|
|
|
|
|
|
|
993
|
0
|
|
|
|
|
|
return 0; |
994
|
|
|
|
|
|
|
} |
995
|
|
|
|
|
|
|
|
996
|
0
|
|
|
|
|
|
size_t myencoding_ascii_utf_8_length(const unsigned char data) |
997
|
|
|
|
|
|
|
{ |
998
|
0
|
0
|
|
|
|
|
if (data < 0x80){ |
999
|
0
|
|
|
|
|
|
return 1; |
1000
|
|
|
|
|
|
|
} |
1001
|
0
|
0
|
|
|
|
|
else if ((data & 0xe0) == 0xc0) { |
1002
|
0
|
|
|
|
|
|
return 2; |
1003
|
|
|
|
|
|
|
} |
1004
|
0
|
0
|
|
|
|
|
else if ((data & 0xf0) == 0xe0) { |
1005
|
0
|
|
|
|
|
|
return 3; |
1006
|
|
|
|
|
|
|
} |
1007
|
0
|
0
|
|
|
|
|
else if ((data & 0xf8) == 0xf0) { |
1008
|
0
|
|
|
|
|
|
return 4; |
1009
|
|
|
|
|
|
|
} |
1010
|
0
|
0
|
|
|
|
|
else if ((data & 0xfc) == 0xf8) { |
1011
|
0
|
|
|
|
|
|
return 5; |
1012
|
|
|
|
|
|
|
} |
1013
|
0
|
0
|
|
|
|
|
else if ((data & 0xff) == 0xfc) { |
1014
|
0
|
|
|
|
|
|
return 6; |
1015
|
|
|
|
|
|
|
} |
1016
|
|
|
|
|
|
|
|
1017
|
0
|
|
|
|
|
|
return 0; |
1018
|
|
|
|
|
|
|
} |
1019
|
|
|
|
|
|
|
|
1020
|
272
|
|
|
|
|
|
size_t myencoding_codepoint_to_ascii_utf_8(size_t codepoint, char *data) |
1021
|
|
|
|
|
|
|
{ |
1022
|
|
|
|
|
|
|
/* 0x80 -- 10xxxxxx */ |
1023
|
|
|
|
|
|
|
/* 0xC0 -- 110xxxxx */ |
1024
|
|
|
|
|
|
|
/* 0xE0 -- 1110xxxx */ |
1025
|
|
|
|
|
|
|
/* 0xF0 -- 11110xxx */ |
1026
|
|
|
|
|
|
|
/* 0xF8 -- 111110xx */ |
1027
|
|
|
|
|
|
|
/* 0xFC -- 1111110x */ |
1028
|
|
|
|
|
|
|
|
1029
|
272
|
100
|
|
|
|
|
if (codepoint <= 0x0000007F) { |
1030
|
|
|
|
|
|
|
/* 0xxxxxxx */ |
1031
|
192
|
|
|
|
|
|
data[0] = (char)codepoint; |
1032
|
192
|
|
|
|
|
|
return 1; |
1033
|
|
|
|
|
|
|
} |
1034
|
80
|
50
|
|
|
|
|
else if (codepoint <= 0x000007FF) { |
1035
|
|
|
|
|
|
|
/* 110xxxxx 10xxxxxx */ |
1036
|
80
|
|
|
|
|
|
data[0] = (char)(0xC0 | (codepoint >> 6 )); |
1037
|
80
|
|
|
|
|
|
data[1] = (char)(0x80 | (codepoint & 0x3F)); |
1038
|
|
|
|
|
|
|
|
1039
|
80
|
|
|
|
|
|
return 2; |
1040
|
|
|
|
|
|
|
} |
1041
|
0
|
0
|
|
|
|
|
else if (codepoint <= 0x0000FFFF) { |
1042
|
|
|
|
|
|
|
/* 1110xxxx 10xxxxxx 10xxxxxx */ |
1043
|
0
|
|
|
|
|
|
data[0] = (char)(0xE0 | ((codepoint >> 12))); |
1044
|
0
|
|
|
|
|
|
data[1] = (char)(0x80 | ((codepoint >> 6 ) & 0x3F)); |
1045
|
0
|
|
|
|
|
|
data[2] = (char)(0x80 | ( codepoint & 0x3F)); |
1046
|
|
|
|
|
|
|
|
1047
|
0
|
|
|
|
|
|
return 3; |
1048
|
|
|
|
|
|
|
} |
1049
|
0
|
0
|
|
|
|
|
else if (codepoint <= 0x001FFFFF) { |
1050
|
|
|
|
|
|
|
/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
1051
|
0
|
|
|
|
|
|
data[0] = (char)(0xF0 | ( codepoint >> 18)); |
1052
|
0
|
|
|
|
|
|
data[1] = (char)(0x80 | ((codepoint >> 12) & 0x3F)); |
1053
|
0
|
|
|
|
|
|
data[2] = (char)(0x80 | ((codepoint >> 6 ) & 0x3F)); |
1054
|
0
|
|
|
|
|
|
data[3] = (char)(0x80 | ( codepoint & 0x3F)); |
1055
|
|
|
|
|
|
|
|
1056
|
0
|
|
|
|
|
|
return 4; |
1057
|
|
|
|
|
|
|
} |
1058
|
|
|
|
|
|
|
/* not uses in unicode */ |
1059
|
|
|
|
|
|
|
// else if (codepoint <= 0x03FFFFFF) { |
1060
|
|
|
|
|
|
|
// /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
1061
|
|
|
|
|
|
|
// data[0] = 0xF8 | ( codepoint >> 24); |
1062
|
|
|
|
|
|
|
// data[1] = 0x80 | ((codepoint >> 18) & 0x3F); |
1063
|
|
|
|
|
|
|
// data[2] = 0x80 | ((codepoint >> 12) & 0x3F); |
1064
|
|
|
|
|
|
|
// data[3] = 0x80 | ((codepoint >> 6 ) & 0x3F); |
1065
|
|
|
|
|
|
|
// data[4] = 0x80 | ( codepoint & 0x3F); |
1066
|
|
|
|
|
|
|
// |
1067
|
|
|
|
|
|
|
// return 5; |
1068
|
|
|
|
|
|
|
// } |
1069
|
|
|
|
|
|
|
// else if (codepoint <= 0x7FFFFFFF) { |
1070
|
|
|
|
|
|
|
// /* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
1071
|
|
|
|
|
|
|
// data[0] = 0xFC | ( codepoint >> 30); |
1072
|
|
|
|
|
|
|
// data[1] = 0x80 | ((codepoint >> 24) & 0x3F); |
1073
|
|
|
|
|
|
|
// data[2] = 0x80 | ((codepoint >> 18) & 0x3F); |
1074
|
|
|
|
|
|
|
// data[3] = 0x80 | ((codepoint >> 12) & 0x3F); |
1075
|
|
|
|
|
|
|
// data[4] = 0x80 | ((codepoint >> 6 ) & 0x3F); |
1076
|
|
|
|
|
|
|
// data[5] = 0x80 | ( codepoint & 0x3F); |
1077
|
|
|
|
|
|
|
// |
1078
|
|
|
|
|
|
|
// return 6; |
1079
|
|
|
|
|
|
|
// } |
1080
|
|
|
|
|
|
|
|
1081
|
0
|
|
|
|
|
|
return 0; |
1082
|
|
|
|
|
|
|
} |
1083
|
|
|
|
|
|
|
|
1084
|
0
|
|
|
|
|
|
size_t myencoding_codepoint_to_lowercase_ascii_utf_8(size_t codepoint, char *data) |
1085
|
|
|
|
|
|
|
{ |
1086
|
|
|
|
|
|
|
/* 0x80 -- 10xxxxxx */ |
1087
|
|
|
|
|
|
|
/* 0xC0 -- 110xxxxx */ |
1088
|
|
|
|
|
|
|
/* 0xE0 -- 1110xxxx */ |
1089
|
|
|
|
|
|
|
/* 0xF0 -- 11110xxx */ |
1090
|
|
|
|
|
|
|
/* 0xF8 -- 111110xx */ |
1091
|
|
|
|
|
|
|
/* 0xFC -- 1111110x */ |
1092
|
|
|
|
|
|
|
|
1093
|
0
|
0
|
|
|
|
|
if (codepoint <= 0x0000007F) { |
1094
|
|
|
|
|
|
|
/* 0xxxxxxx */ |
1095
|
0
|
|
|
|
|
|
data[0] = (char)mycore_string_chars_lowercase_map[ codepoint ]; |
1096
|
0
|
|
|
|
|
|
return 1; |
1097
|
|
|
|
|
|
|
} |
1098
|
0
|
0
|
|
|
|
|
else if (codepoint <= 0x000007FF) { |
1099
|
|
|
|
|
|
|
/* 110xxxxx 10xxxxxx */ |
1100
|
0
|
|
|
|
|
|
data[0] = (char)(0xC0 | (codepoint >> 6 )); |
1101
|
0
|
|
|
|
|
|
data[1] = (char)(0x80 | (codepoint & 0x3F)); |
1102
|
|
|
|
|
|
|
|
1103
|
0
|
|
|
|
|
|
return 2; |
1104
|
|
|
|
|
|
|
} |
1105
|
0
|
0
|
|
|
|
|
else if (codepoint <= 0x0000FFFF) { |
1106
|
|
|
|
|
|
|
/* 1110xxxx 10xxxxxx 10xxxxxx */ |
1107
|
0
|
|
|
|
|
|
data[0] = (char)(0xE0 | ((codepoint >> 12))); |
1108
|
0
|
|
|
|
|
|
data[1] = (char)(0x80 | ((codepoint >> 6 ) & 0x3F)); |
1109
|
0
|
|
|
|
|
|
data[2] = (char)(0x80 | ( codepoint & 0x3F)); |
1110
|
|
|
|
|
|
|
|
1111
|
0
|
|
|
|
|
|
return 3; |
1112
|
|
|
|
|
|
|
} |
1113
|
0
|
0
|
|
|
|
|
else if (codepoint <= 0x001FFFFF) { |
1114
|
|
|
|
|
|
|
/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
1115
|
0
|
|
|
|
|
|
data[0] = (char)(0xF0 | ( codepoint >> 18)); |
1116
|
0
|
|
|
|
|
|
data[1] = (char)(0x80 | ((codepoint >> 12) & 0x3F)); |
1117
|
0
|
|
|
|
|
|
data[2] = (char)(0x80 | ((codepoint >> 6 ) & 0x3F)); |
1118
|
0
|
|
|
|
|
|
data[3] = (char)(0x80 | ( codepoint & 0x3F)); |
1119
|
|
|
|
|
|
|
|
1120
|
0
|
|
|
|
|
|
return 4; |
1121
|
|
|
|
|
|
|
} |
1122
|
|
|
|
|
|
|
/* not uses in unicode */ |
1123
|
|
|
|
|
|
|
// else if (codepoint <= 0x03FFFFFF) { |
1124
|
|
|
|
|
|
|
// /* 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
1125
|
|
|
|
|
|
|
// data[0] = 0xF8 | ( codepoint >> 24); |
1126
|
|
|
|
|
|
|
// data[1] = 0x80 | ((codepoint >> 18) & 0x3F); |
1127
|
|
|
|
|
|
|
// data[2] = 0x80 | ((codepoint >> 12) & 0x3F); |
1128
|
|
|
|
|
|
|
// data[3] = 0x80 | ((codepoint >> 6 ) & 0x3F); |
1129
|
|
|
|
|
|
|
// data[4] = 0x80 | ( codepoint & 0x3F); |
1130
|
|
|
|
|
|
|
// |
1131
|
|
|
|
|
|
|
// return 5; |
1132
|
|
|
|
|
|
|
// } |
1133
|
|
|
|
|
|
|
// else if (codepoint <= 0x7FFFFFFF) { |
1134
|
|
|
|
|
|
|
// /* 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
1135
|
|
|
|
|
|
|
// data[0] = 0xFC | ( codepoint >> 30); |
1136
|
|
|
|
|
|
|
// data[1] = 0x80 | ((codepoint >> 24) & 0x3F); |
1137
|
|
|
|
|
|
|
// data[2] = 0x80 | ((codepoint >> 18) & 0x3F); |
1138
|
|
|
|
|
|
|
// data[3] = 0x80 | ((codepoint >> 12) & 0x3F); |
1139
|
|
|
|
|
|
|
// data[4] = 0x80 | ((codepoint >> 6 ) & 0x3F); |
1140
|
|
|
|
|
|
|
// data[5] = 0x80 | ( codepoint & 0x3F); |
1141
|
|
|
|
|
|
|
// |
1142
|
|
|
|
|
|
|
// return 6; |
1143
|
|
|
|
|
|
|
// } |
1144
|
|
|
|
|
|
|
|
1145
|
0
|
|
|
|
|
|
return 0; |
1146
|
|
|
|
|
|
|
} |
1147
|
|
|
|
|
|
|
|
1148
|
0
|
|
|
|
|
|
size_t myencoding_ascii_utf_8_to_codepoint(const unsigned char* data, size_t* codepoint) |
1149
|
|
|
|
|
|
|
{ |
1150
|
0
|
0
|
|
|
|
|
if (*data < 0x80){ |
1151
|
|
|
|
|
|
|
/* 0xxxxxxx */ |
1152
|
0
|
|
|
|
|
|
*codepoint = (size_t)*data; |
1153
|
0
|
|
|
|
|
|
return 1; |
1154
|
|
|
|
|
|
|
} |
1155
|
0
|
0
|
|
|
|
|
else if ((*data & 0xe0) == 0xc0) { |
1156
|
|
|
|
|
|
|
/* 110xxxxx 10xxxxxx */ |
1157
|
0
|
|
|
|
|
|
*codepoint = (data[0] ^ (0xC0 & data[0])) << 6; |
1158
|
0
|
|
|
|
|
|
*codepoint |= (data[1] ^ (0x80 & data[1])); |
1159
|
|
|
|
|
|
|
|
1160
|
0
|
|
|
|
|
|
return 2; |
1161
|
|
|
|
|
|
|
} |
1162
|
0
|
0
|
|
|
|
|
else if ((*data & 0xf0) == 0xe0) { |
1163
|
|
|
|
|
|
|
/* 1110xxxx 10xxxxxx 10xxxxxx */ |
1164
|
0
|
|
|
|
|
|
*codepoint = (data[0] ^ (0xE0 & data[0])) << 12; |
1165
|
0
|
|
|
|
|
|
*codepoint |= (data[1] ^ (0x80 & data[1])) << 6; |
1166
|
0
|
|
|
|
|
|
*codepoint |= (data[2] ^ (0x80 & data[2])); |
1167
|
|
|
|
|
|
|
|
1168
|
0
|
|
|
|
|
|
return 3; |
1169
|
|
|
|
|
|
|
} |
1170
|
0
|
0
|
|
|
|
|
else if ((*data & 0xf8) == 0xf0) { |
1171
|
|
|
|
|
|
|
/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
1172
|
0
|
|
|
|
|
|
*codepoint = (data[0] ^ (0xF0 & data[0])) << 18; |
1173
|
0
|
|
|
|
|
|
*codepoint |= (data[1] ^ (0x80 & data[1])) << 12; |
1174
|
0
|
|
|
|
|
|
*codepoint |= (data[2] ^ (0x80 & data[2])) << 6; |
1175
|
0
|
|
|
|
|
|
*codepoint |= (data[3] ^ (0x80 & data[3])); |
1176
|
|
|
|
|
|
|
|
1177
|
0
|
|
|
|
|
|
return 4; |
1178
|
|
|
|
|
|
|
} |
1179
|
|
|
|
|
|
|
|
1180
|
0
|
|
|
|
|
|
return 0; |
1181
|
|
|
|
|
|
|
} |
1182
|
|
|
|
|
|
|
|
1183
|
0
|
|
|
|
|
|
size_t myencoding_codepoint_to_ascii_utf_16(size_t codepoint, char *data) |
1184
|
|
|
|
|
|
|
{ |
1185
|
0
|
0
|
|
|
|
|
if((codepoint >> 16)) { |
1186
|
0
|
|
|
|
|
|
codepoint -= 0x10000; |
1187
|
|
|
|
|
|
|
|
1188
|
0
|
|
|
|
|
|
size_t high = 0xD800 | (codepoint >> 10); |
1189
|
0
|
|
|
|
|
|
size_t low = 0xDC00 | (codepoint & 0x3FF); |
1190
|
|
|
|
|
|
|
|
1191
|
0
|
|
|
|
|
|
data[0] = (char)(high >> 8); |
1192
|
0
|
|
|
|
|
|
data[1] = (char)high; |
1193
|
0
|
|
|
|
|
|
data[2] = (char)(low >> 8); |
1194
|
0
|
|
|
|
|
|
data[3] = (char)low; |
1195
|
|
|
|
|
|
|
|
1196
|
0
|
|
|
|
|
|
return 4; |
1197
|
|
|
|
|
|
|
} |
1198
|
|
|
|
|
|
|
|
1199
|
0
|
|
|
|
|
|
data[0] = (char)(codepoint >> 8); |
1200
|
0
|
|
|
|
|
|
data[1] = (char)codepoint; |
1201
|
|
|
|
|
|
|
|
1202
|
0
|
|
|
|
|
|
return 2; |
1203
|
|
|
|
|
|
|
} |
1204
|
|
|
|
|
|
|
|
1205
|
0
|
|
|
|
|
|
size_t myencoding_convert_to_ascii_utf_8(mycore_string_raw_t* raw_str, const char* buff, size_t length, myencoding_t encoding) |
1206
|
|
|
|
|
|
|
{ |
1207
|
0
|
0
|
|
|
|
|
if(raw_str->data == NULL) { |
1208
|
0
|
|
|
|
|
|
raw_str->size = length + 1; |
1209
|
0
|
|
|
|
|
|
raw_str->length = 0; |
1210
|
0
|
|
|
|
|
|
raw_str->data = mycore_malloc(sizeof(char) * raw_str->size); |
1211
|
|
|
|
|
|
|
|
1212
|
0
|
0
|
|
|
|
|
if(raw_str->data == NULL) |
1213
|
0
|
|
|
|
|
|
return 0; |
1214
|
|
|
|
|
|
|
} |
1215
|
|
|
|
|
|
|
|
1216
|
0
|
|
|
|
|
|
myencoding_result_t res = {0}; |
1217
|
|
|
|
|
|
|
|
1218
|
0
|
|
|
|
|
|
unsigned const char* u_buff = (unsigned const char*)buff; |
1219
|
0
|
|
|
|
|
|
const myencoding_custom_f func = myencoding_get_function_by_id(encoding); |
1220
|
|
|
|
|
|
|
|
1221
|
|
|
|
|
|
|
size_t i; |
1222
|
0
|
0
|
|
|
|
|
for (i = 0; i < length; i++) |
1223
|
|
|
|
|
|
|
{ |
1224
|
0
|
0
|
|
|
|
|
if(func(u_buff[i], &res) == MyENCODING_STATUS_OK) { |
1225
|
0
|
0
|
|
|
|
|
if((raw_str->length + 6) >= raw_str->size) { |
1226
|
0
|
|
|
|
|
|
size_t new_size = raw_str->length + 6 + (length / 2); |
1227
|
0
|
|
|
|
|
|
char *new_data = mycore_realloc(raw_str->data, sizeof(char) * new_size); |
1228
|
|
|
|
|
|
|
|
1229
|
0
|
0
|
|
|
|
|
if(new_data == NULL) { |
1230
|
0
|
|
|
|
|
|
return 0; |
1231
|
|
|
|
|
|
|
} |
1232
|
|
|
|
|
|
|
|
1233
|
0
|
|
|
|
|
|
raw_str->data = new_data; |
1234
|
0
|
|
|
|
|
|
raw_str->size = new_size; |
1235
|
|
|
|
|
|
|
} |
1236
|
|
|
|
|
|
|
|
1237
|
0
|
|
|
|
|
|
raw_str->length += myencoding_codepoint_to_ascii_utf_8(res.result, &raw_str->data[raw_str->length]); |
1238
|
|
|
|
|
|
|
} |
1239
|
|
|
|
|
|
|
} |
1240
|
|
|
|
|
|
|
|
1241
|
0
|
|
|
|
|
|
return i; |
1242
|
|
|
|
|
|
|
} |
1243
|
|
|
|
|
|
|
|
1244
|
|
|
|
|
|
|
|