line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
// This file is part of NameTag . |
2
|
|
|
|
|
|
|
// |
3
|
|
|
|
|
|
|
// This file is a bundle of all sources and headers of NameTag library. |
4
|
|
|
|
|
|
|
// Comments and copyrights of all individual files are kept. |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
#include |
7
|
|
|
|
|
|
|
#include |
8
|
|
|
|
|
|
|
#include |
9
|
|
|
|
|
|
|
#include |
10
|
|
|
|
|
|
|
#include |
11
|
|
|
|
|
|
|
#include |
12
|
|
|
|
|
|
|
#include |
13
|
|
|
|
|
|
|
#include |
14
|
|
|
|
|
|
|
#include |
15
|
|
|
|
|
|
|
#include |
16
|
|
|
|
|
|
|
#include |
17
|
|
|
|
|
|
|
#include |
18
|
|
|
|
|
|
|
#include |
19
|
|
|
|
|
|
|
#include |
20
|
|
|
|
|
|
|
#include |
21
|
|
|
|
|
|
|
#include |
22
|
|
|
|
|
|
|
#include |
23
|
|
|
|
|
|
|
#include |
24
|
|
|
|
|
|
|
#include |
25
|
|
|
|
|
|
|
#include |
26
|
|
|
|
|
|
|
#include |
27
|
|
|
|
|
|
|
#include |
28
|
|
|
|
|
|
|
#include |
29
|
|
|
|
|
|
|
#include |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
namespace ufal { |
32
|
|
|
|
|
|
|
namespace nametag { |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
///////// |
35
|
|
|
|
|
|
|
// File: utils/common.h |
36
|
|
|
|
|
|
|
///////// |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
39
|
|
|
|
|
|
|
// |
40
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
41
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
42
|
|
|
|
|
|
|
// |
43
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
44
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
45
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
// Headers available in all sources |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
namespace utils { |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
using namespace std; |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
// Assert that int is at least 4B |
54
|
|
|
|
|
|
|
static_assert(sizeof(int) >= sizeof(int32_t), "Int must be at least 4B wide!"); |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
// Assert that we are on a little endian system |
57
|
|
|
|
|
|
|
#ifdef __BYTE_ORDER__ |
58
|
|
|
|
|
|
|
static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, "Only little endian systems are supported!"); |
59
|
|
|
|
|
|
|
#endif |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
#define runtime_failure(message) exit((cerr << message << endl, 1)) |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
} // namespace utils |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
///////// |
66
|
|
|
|
|
|
|
// File: common.h |
67
|
|
|
|
|
|
|
///////// |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
// This file is part of NameTag . |
70
|
|
|
|
|
|
|
// |
71
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
72
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
73
|
|
|
|
|
|
|
// |
74
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
75
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
76
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
using namespace utils; |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
///////// |
81
|
|
|
|
|
|
|
// File: classifier/classifier_outcome.h |
82
|
|
|
|
|
|
|
///////// |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
// This file is part of NameTag . |
85
|
|
|
|
|
|
|
// |
86
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
87
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
88
|
|
|
|
|
|
|
// |
89
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
90
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
91
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
typedef uint32_t classifier_outcome; |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
///////// |
96
|
|
|
|
|
|
|
// File: bilou/entity_type.h |
97
|
|
|
|
|
|
|
///////// |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
// This file is part of NameTag . |
100
|
|
|
|
|
|
|
// |
101
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
102
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
103
|
|
|
|
|
|
|
// |
104
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
105
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
106
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
typedef classifier_outcome entity_type; |
109
|
|
|
|
|
|
|
enum :entity_type { entity_type_unknown = ~0U }; |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
///////// |
112
|
|
|
|
|
|
|
// File: bilou/bilou_type.h |
113
|
|
|
|
|
|
|
///////// |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
// This file is part of NameTag . |
116
|
|
|
|
|
|
|
// |
117
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
118
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
119
|
|
|
|
|
|
|
// |
120
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
121
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
122
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
typedef unsigned bilou_type; |
125
|
|
|
|
|
|
|
enum :bilou_type { bilou_type_B, bilou_type_I, bilou_type_L, bilou_type_O, bilou_type_U, bilou_type_total, bilou_type_unknown = ~0U }; |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
///////// |
128
|
|
|
|
|
|
|
// File: bilou/bilou_probabilities.h |
129
|
|
|
|
|
|
|
///////// |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
// This file is part of NameTag . |
132
|
|
|
|
|
|
|
// |
133
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
134
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
135
|
|
|
|
|
|
|
// |
136
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
137
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
138
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
class bilou_probabilities { |
141
|
|
|
|
|
|
|
public: |
142
|
|
|
|
|
|
|
struct probability_info { |
143
|
|
|
|
|
|
|
double probability; |
144
|
|
|
|
|
|
|
entity_type entity; |
145
|
|
|
|
|
|
|
}; |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
array bilou; |
148
|
|
|
|
|
|
|
}; |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
class bilou_probabilities_global : public bilou_probabilities { |
151
|
|
|
|
|
|
|
public: |
152
|
|
|
|
|
|
|
bilou_type best; |
153
|
|
|
|
|
|
|
array previous; |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
void init(const bilou_probabilities& local); |
156
|
|
|
|
|
|
|
void update(const bilou_probabilities& local, const bilou_probabilities_global& prev); |
157
|
|
|
|
|
|
|
}; |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
///////// |
160
|
|
|
|
|
|
|
// File: bilou/bilou_probabilities.cpp |
161
|
|
|
|
|
|
|
///////// |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
// This file is part of NameTag . |
164
|
|
|
|
|
|
|
// |
165
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
166
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
167
|
|
|
|
|
|
|
// |
168
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
169
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
170
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
171
|
|
|
|
|
|
|
|
172
|
4
|
|
|
|
|
|
void bilou_probabilities_global::init(const bilou_probabilities& local) { |
173
|
4
|
|
|
|
|
|
bilou = local.bilou; |
174
|
|
|
|
|
|
|
previous.fill(bilou_type_unknown); |
175
|
|
|
|
|
|
|
|
176
|
4
|
|
|
|
|
|
best = bilou_type_B; |
177
|
4
|
|
|
|
|
|
bilou[bilou_type_I].probability = 0; |
178
|
4
|
|
|
|
|
|
bilou[bilou_type_L].probability = 0; |
179
|
4
|
50
|
|
|
|
|
if (bilou[bilou_type_O].probability > bilou[best].probability) best = bilou_type_O; |
180
|
4
|
100
|
|
|
|
|
if (bilou[bilou_type_U].probability > bilou[best].probability) best = bilou_type_U; |
181
|
4
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
10
|
|
|
|
|
|
void bilou_probabilities_global::update(const bilou_probabilities& local, const bilou_probabilities_global& prev) { |
184
|
|
|
|
|
|
|
// Find the best of previous LOU |
185
|
|
|
|
|
|
|
bilou_type best_LOU = bilou_type_L; |
186
|
10
|
|
|
|
|
|
double best_LOU_prob = prev.bilou[bilou_type_L].probability; |
187
|
10
|
50
|
|
|
|
|
if (prev.bilou[bilou_type_O].probability > best_LOU_prob) { |
188
|
|
|
|
|
|
|
best_LOU = bilou_type_O; |
189
|
|
|
|
|
|
|
best_LOU_prob = prev.bilou[bilou_type_O].probability; |
190
|
|
|
|
|
|
|
} |
191
|
10
|
100
|
|
|
|
|
if (prev.bilou[bilou_type_U].probability > best_LOU_prob) { |
192
|
|
|
|
|
|
|
best_LOU = bilou_type_U; |
193
|
|
|
|
|
|
|
best_LOU_prob = prev.bilou[bilou_type_U].probability; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
// Find the best of previous BI |
197
|
|
|
|
|
|
|
bilou_type best_BI = bilou_type_B; |
198
|
10
|
|
|
|
|
|
double best_BI_prob = prev.bilou[bilou_type_B].probability; |
199
|
10
|
50
|
|
|
|
|
if (prev.bilou[bilou_type_I].probability > best_BI_prob) { |
200
|
|
|
|
|
|
|
best_BI = bilou_type_I; |
201
|
|
|
|
|
|
|
best_BI_prob = prev.bilou[bilou_type_I].probability; |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
// Normalize the best_*_prob probabilities |
205
|
10
|
50
|
|
|
|
|
if (best_BI_prob > best_LOU_prob) { |
206
|
0
|
|
|
|
|
|
best_LOU_prob /= best_BI_prob; |
207
|
|
|
|
|
|
|
best_BI_prob = 1; |
208
|
|
|
|
|
|
|
} else { |
209
|
10
|
|
|
|
|
|
best_BI_prob /= best_LOU_prob; |
210
|
|
|
|
|
|
|
best_LOU_prob = 1; |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
// Store update probabilites |
214
|
10
|
|
|
|
|
|
bilou[bilou_type_B].probability = best_LOU_prob * local.bilou[bilou_type_B].probability; |
215
|
10
|
|
|
|
|
|
bilou[bilou_type_B].entity = local.bilou[bilou_type_B].entity; |
216
|
10
|
|
|
|
|
|
previous[bilou_type_B] = best_LOU; |
217
|
10
|
|
|
|
|
|
best = bilou_type_B; |
218
|
|
|
|
|
|
|
|
219
|
10
|
|
|
|
|
|
bilou[bilou_type_I].probability = best_BI_prob * local.bilou[bilou_type_I].probability; |
220
|
10
|
|
|
|
|
|
bilou[bilou_type_I].entity = prev.bilou[best_BI].entity; |
221
|
10
|
|
|
|
|
|
previous[bilou_type_I] = best_BI; |
222
|
10
|
50
|
|
|
|
|
if (bilou[bilou_type_I].probability > bilou[best].probability) best = bilou_type_I; |
223
|
|
|
|
|
|
|
|
224
|
10
|
|
|
|
|
|
bilou[bilou_type_L].probability = best_BI_prob * local.bilou[bilou_type_L].probability; |
225
|
10
|
|
|
|
|
|
bilou[bilou_type_L].entity = prev.bilou[best_BI].entity; |
226
|
10
|
|
|
|
|
|
previous[bilou_type_L] = best_BI; |
227
|
10
|
50
|
|
|
|
|
if (bilou[bilou_type_L].probability > bilou[best].probability) best = bilou_type_L; |
228
|
|
|
|
|
|
|
|
229
|
10
|
|
|
|
|
|
bilou[bilou_type_O].probability = best_LOU_prob * local.bilou[bilou_type_O].probability; |
230
|
10
|
|
|
|
|
|
bilou[bilou_type_O].entity = local.bilou[bilou_type_O].entity; |
231
|
10
|
|
|
|
|
|
previous[bilou_type_O] = best_LOU; |
232
|
10
|
50
|
|
|
|
|
if (bilou[bilou_type_O].probability > bilou[best].probability) best = bilou_type_O; |
233
|
|
|
|
|
|
|
|
234
|
10
|
|
|
|
|
|
bilou[bilou_type_U].probability = best_LOU_prob * local.bilou[bilou_type_U].probability; |
235
|
10
|
|
|
|
|
|
bilou[bilou_type_U].entity = local.bilou[bilou_type_U].entity; |
236
|
10
|
|
|
|
|
|
previous[bilou_type_U] = best_LOU; |
237
|
10
|
100
|
|
|
|
|
if (bilou[bilou_type_U].probability > bilou[best].probability) best = bilou_type_U; |
238
|
10
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
///////// |
241
|
|
|
|
|
|
|
// File: classifier/classifier_feature.h |
242
|
|
|
|
|
|
|
///////// |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
// This file is part of NameTag . |
245
|
|
|
|
|
|
|
// |
246
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
247
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
248
|
|
|
|
|
|
|
// |
249
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
250
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
251
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
typedef uint32_t classifier_feature; |
254
|
|
|
|
|
|
|
typedef vector classifier_features; |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
///////// |
257
|
|
|
|
|
|
|
// File: features/ner_feature.h |
258
|
|
|
|
|
|
|
///////// |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
// This file is part of NameTag . |
261
|
|
|
|
|
|
|
// |
262
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
263
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
264
|
|
|
|
|
|
|
// |
265
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
266
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
267
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
typedef classifier_feature ner_feature; |
270
|
|
|
|
|
|
|
enum :ner_feature { ner_feature_unknown = ~0U }; |
271
|
|
|
|
|
|
|
typedef classifier_features ner_features; |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
///////// |
274
|
|
|
|
|
|
|
// File: bilou/ner_word.h |
275
|
|
|
|
|
|
|
///////// |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
// This file is part of NameTag . |
278
|
|
|
|
|
|
|
// |
279
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
280
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
281
|
|
|
|
|
|
|
// |
282
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
283
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
284
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
285
|
|
|
|
|
|
|
|
286
|
9
|
|
|
|
|
|
struct ner_word { |
287
|
|
|
|
|
|
|
string form; |
288
|
|
|
|
|
|
|
string raw_lemma; |
289
|
|
|
|
|
|
|
vector raw_lemmas_all; |
290
|
|
|
|
|
|
|
string lemma_id; |
291
|
|
|
|
|
|
|
string lemma_comments; |
292
|
|
|
|
|
|
|
string tag; |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
ner_word() {} |
295
|
|
|
|
|
|
|
ner_word(const string& form) : form(form) {} |
296
|
|
|
|
|
|
|
}; |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
///////// |
299
|
|
|
|
|
|
|
// File: bilou/ner_sentence.h |
300
|
|
|
|
|
|
|
///////// |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
// This file is part of NameTag . |
303
|
|
|
|
|
|
|
// |
304
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
305
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
306
|
|
|
|
|
|
|
// |
307
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
308
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
309
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
310
|
|
|
|
|
|
|
|
311
|
0
|
|
|
|
|
|
struct ner_sentence { |
312
|
|
|
|
|
|
|
unsigned size = 0; |
313
|
|
|
|
|
|
|
vector words; |
314
|
|
|
|
|
|
|
vector features; |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
struct probability_info { |
317
|
|
|
|
|
|
|
bilou_probabilities local; |
318
|
|
|
|
|
|
|
bool local_filled; |
319
|
|
|
|
|
|
|
bilou_probabilities_global global; |
320
|
|
|
|
|
|
|
}; |
321
|
|
|
|
|
|
|
vector probabilities; |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
struct previous_stage_info { |
324
|
|
|
|
|
|
|
bilou_type bilou; |
325
|
|
|
|
|
|
|
entity_type entity; |
326
|
|
|
|
|
|
|
}; |
327
|
|
|
|
|
|
|
vector previous_stage; |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
void resize(unsigned size); |
330
|
|
|
|
|
|
|
void clear_features(); |
331
|
|
|
|
|
|
|
void clear_probabilities_local_filled(); |
332
|
|
|
|
|
|
|
void clear_previous_stage(); |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
void compute_best_decoding(); |
335
|
|
|
|
|
|
|
void fill_previous_stage(); |
336
|
|
|
|
|
|
|
}; |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
///////// |
339
|
|
|
|
|
|
|
// File: bilou/ner_sentence.cpp |
340
|
|
|
|
|
|
|
///////// |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
// This file is part of NameTag . |
343
|
|
|
|
|
|
|
// |
344
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
345
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
346
|
|
|
|
|
|
|
// |
347
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
348
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
349
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
350
|
|
|
|
|
|
|
|
351
|
4
|
|
|
|
|
|
void ner_sentence::resize(unsigned size) { |
352
|
4
|
|
|
|
|
|
this->size = size; |
353
|
4
|
100
|
|
|
|
|
if (words.size() < size) words.resize(size); |
354
|
4
|
100
|
|
|
|
|
if (features.size() < size) features.resize(size); |
355
|
4
|
100
|
|
|
|
|
if (probabilities.size() < size) probabilities.resize(size); |
356
|
4
|
100
|
|
|
|
|
if (previous_stage.size() < size) previous_stage.resize(size); |
357
|
4
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
|
359
|
0
|
|
|
|
|
|
void ner_sentence::clear_features() { |
360
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < size; i++) |
|
|
0
|
|
|
|
|
|
361
|
14
|
|
|
|
|
|
features[i].clear(); |
362
|
0
|
|
|
|
|
|
} |
363
|
|
|
|
|
|
|
|
364
|
0
|
|
|
|
|
|
void ner_sentence::clear_probabilities_local_filled() { |
365
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < size; i++) |
|
|
0
|
|
|
|
|
|
366
|
28
|
|
|
|
|
|
probabilities[i].local_filled = false; |
367
|
0
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
|
369
|
0
|
|
|
|
|
|
void ner_sentence::clear_previous_stage() { |
370
|
9
|
100
|
|
|
|
|
for (unsigned i = 0; i < size; i++) { |
|
|
0
|
|
|
|
|
|
371
|
14
|
|
|
|
|
|
previous_stage[i].bilou = bilou_type_unknown; |
372
|
7
|
|
|
|
|
|
previous_stage[i].entity = entity_type_unknown; |
373
|
|
|
|
|
|
|
} |
374
|
0
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
|
376
|
4
|
|
|
|
|
|
void ner_sentence::compute_best_decoding() { |
377
|
|
|
|
|
|
|
// Find best L O U bilou_type for the last word |
378
|
|
|
|
|
|
|
bilou_type best = bilou_type_L; |
379
|
4
|
|
|
|
|
|
auto& last_bilou = probabilities[size - 1].global.bilou; |
380
|
4
|
50
|
|
|
|
|
if (last_bilou[bilou_type_O].probability > last_bilou[best].probability) best = bilou_type_O; |
381
|
4
|
50
|
|
|
|
|
if (last_bilou[bilou_type_U].probability > last_bilou[best].probability) best = bilou_type_U; |
382
|
4
|
|
|
|
|
|
probabilities[size - 1].global.best = best; |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
// Store the best bilou_type for all preceeding words |
385
|
14
|
100
|
|
|
|
|
for (unsigned i = size - 1; i; i--) { |
386
|
20
|
|
|
|
|
|
best = probabilities[i].global.previous[best]; |
387
|
20
|
|
|
|
|
|
probabilities[i - 1].global.best = best; |
388
|
|
|
|
|
|
|
} |
389
|
4
|
|
|
|
|
|
} |
390
|
|
|
|
|
|
|
|
391
|
8
|
|
|
|
|
|
void ner_sentence::fill_previous_stage() { |
392
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < size; i++) { |
393
|
28
|
|
|
|
|
|
previous_stage[i].bilou = probabilities[i].global.best; |
394
|
14
|
|
|
|
|
|
previous_stage[i].entity = probabilities[i].global.bilou[probabilities[i].global.best].entity; |
395
|
|
|
|
|
|
|
} |
396
|
4
|
|
|
|
|
|
} |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
///////// |
399
|
|
|
|
|
|
|
// File: classifier/classifier_instance.h |
400
|
|
|
|
|
|
|
///////// |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
// This file is part of NameTag . |
403
|
|
|
|
|
|
|
// |
404
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
405
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
406
|
|
|
|
|
|
|
// |
407
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
408
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
409
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
class classifier_instance { |
412
|
|
|
|
|
|
|
public: |
413
|
|
|
|
|
|
|
classifier_features features; |
414
|
|
|
|
|
|
|
classifier_outcome outcome; |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
classifier_instance(const classifier_features& features, const classifier_outcome& outcome) : features(features), outcome(outcome) {} |
417
|
|
|
|
|
|
|
}; |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
///////// |
420
|
|
|
|
|
|
|
// File: classifier/network_parameters.h |
421
|
|
|
|
|
|
|
///////// |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
// This file is part of NameTag . |
424
|
|
|
|
|
|
|
// |
425
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
426
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
427
|
|
|
|
|
|
|
// |
428
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
429
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
430
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
struct network_parameters { |
433
|
|
|
|
|
|
|
int iterations; |
434
|
|
|
|
|
|
|
double missing_weight; |
435
|
|
|
|
|
|
|
double initial_learning_rate; |
436
|
|
|
|
|
|
|
double final_learning_rate; |
437
|
|
|
|
|
|
|
double gaussian_sigma; |
438
|
|
|
|
|
|
|
int hidden_layer; // Experimental use only. |
439
|
|
|
|
|
|
|
}; |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
///////// |
442
|
|
|
|
|
|
|
// File: utils/binary_decoder.h |
443
|
|
|
|
|
|
|
///////// |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
446
|
|
|
|
|
|
|
// |
447
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
448
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
449
|
|
|
|
|
|
|
// |
450
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
451
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
452
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
namespace utils { |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
// |
457
|
|
|
|
|
|
|
// Declarations |
458
|
|
|
|
|
|
|
// |
459
|
|
|
|
|
|
|
|
460
|
0
|
|
|
|
|
|
class binary_decoder_error : public runtime_error { |
461
|
|
|
|
|
|
|
public: |
462
|
0
|
0
|
|
|
|
|
explicit binary_decoder_error(const char* description) : runtime_error(description) {} |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
463
|
|
|
|
|
|
|
}; |
464
|
|
|
|
|
|
|
|
465
|
7
|
0
|
|
|
|
|
class binary_decoder { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
466
|
|
|
|
|
|
|
public: |
467
|
|
|
|
|
|
|
inline unsigned char* fill(unsigned len); |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
inline unsigned next_1B(); |
470
|
|
|
|
|
|
|
inline unsigned next_2B(); |
471
|
|
|
|
|
|
|
inline unsigned next_4B(); |
472
|
|
|
|
|
|
|
inline void next_str(string& str); |
473
|
|
|
|
|
|
|
template inline const T* next(unsigned elements); |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
inline bool is_end(); |
476
|
|
|
|
|
|
|
inline unsigned tell(); |
477
|
|
|
|
|
|
|
inline void seek(unsigned pos); |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
private: |
480
|
|
|
|
|
|
|
vector buffer; |
481
|
|
|
|
|
|
|
const unsigned char* data; |
482
|
|
|
|
|
|
|
const unsigned char* data_end; |
483
|
|
|
|
|
|
|
}; |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
// |
486
|
|
|
|
|
|
|
// Definitions |
487
|
|
|
|
|
|
|
// |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
unsigned char* binary_decoder::fill(unsigned len) { |
490
|
7
|
50
|
|
|
|
|
buffer.resize(len); |
491
|
7
|
|
|
|
|
|
data = buffer.data(); |
492
|
7
|
|
|
|
|
|
data_end = buffer.data() + len; |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
return buffer.data(); |
495
|
|
|
|
|
|
|
} |
496
|
|
|
|
|
|
|
|
497
|
626
|
|
|
|
|
|
unsigned binary_decoder::next_1B() { |
498
|
313
|
50
|
|
|
|
|
if (data + 1 > data_end) throw binary_decoder_error("No more data in binary_decoder"); |
499
|
313
|
|
|
|
|
|
return *data++; |
500
|
|
|
|
|
|
|
} |
501
|
|
|
|
|
|
|
|
502
|
679
|
|
|
|
|
|
unsigned binary_decoder::next_2B() { |
503
|
679
|
50
|
|
|
|
|
if (data + sizeof(uint16_t) > data_end) throw binary_decoder_error("No more data in binary_decoder"); |
504
|
|
|
|
|
|
|
uint16_t result; |
505
|
|
|
|
|
|
|
memcpy(&result, data, sizeof(uint16_t)); |
506
|
679
|
|
|
|
|
|
data += sizeof(uint16_t); |
507
|
679
|
|
|
|
|
|
return result; |
508
|
|
|
|
|
|
|
} |
509
|
|
|
|
|
|
|
|
510
|
516
|
|
|
|
|
|
unsigned binary_decoder::next_4B() { |
511
|
516
|
50
|
|
|
|
|
if (data + sizeof(uint32_t) > data_end) throw binary_decoder_error("No more data in binary_decoder"); |
512
|
|
|
|
|
|
|
uint32_t result; |
513
|
|
|
|
|
|
|
memcpy(&result, data, sizeof(uint32_t)); |
514
|
516
|
|
|
|
|
|
data += sizeof(uint32_t); |
515
|
516
|
|
|
|
|
|
return result; |
516
|
|
|
|
|
|
|
} |
517
|
|
|
|
|
|
|
|
518
|
46
|
|
|
|
|
|
void binary_decoder::next_str(string& str) { |
519
|
46
|
|
|
|
|
|
unsigned len = next_1B(); |
520
|
46
|
50
|
|
|
|
|
if (len == 255) len = next_4B(); |
521
|
46
|
|
|
|
|
|
str.assign(next(len), len); |
522
|
46
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
|
524
|
978
|
|
|
|
|
|
template const T* binary_decoder::next(unsigned elements) { |
525
|
489
|
50
|
|
|
|
|
if (data + sizeof(T) * elements > data_end) throw binary_decoder_error("No more data in binary_decoder"); |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
526
|
|
|
|
|
|
|
const T* result = (const T*) data; |
527
|
489
|
|
|
|
|
|
data += sizeof(T) * elements; |
528
|
489
|
|
|
|
|
|
return result; |
529
|
|
|
|
|
|
|
} |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
bool binary_decoder::is_end() { |
532
|
7
|
|
|
|
|
|
return data >= data_end; |
533
|
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
unsigned binary_decoder::tell() { |
536
|
1
|
|
|
|
|
|
return data - buffer.data(); |
537
|
|
|
|
|
|
|
} |
538
|
|
|
|
|
|
|
|
539
|
1
|
|
|
|
|
|
void binary_decoder::seek(unsigned pos) { |
540
|
1
|
50
|
|
|
|
|
if (pos > buffer.size()) throw binary_decoder_error("Cannot seek past end of binary_decoder"); |
541
|
1
|
|
|
|
|
|
data = buffer.data() + pos; |
542
|
1
|
|
|
|
|
|
} |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
} // namespace utils |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
///////// |
547
|
|
|
|
|
|
|
// File: utils/string_piece.h |
548
|
|
|
|
|
|
|
///////// |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
551
|
|
|
|
|
|
|
// |
552
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
553
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
554
|
|
|
|
|
|
|
// |
555
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
556
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
557
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
namespace utils { |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
struct string_piece { |
562
|
|
|
|
|
|
|
const char* str; |
563
|
|
|
|
|
|
|
size_t len; |
564
|
|
|
|
|
|
|
|
565
|
4
|
|
|
|
|
|
string_piece() : str(nullptr), len(0) {} |
566
|
0
|
|
|
|
|
|
string_piece(const char* str) : str(str), len(strlen(str)) {} |
567
|
33
|
|
|
|
|
|
string_piece(const char* str, size_t len) : str(str), len(len) {} |
568
|
43
|
|
|
|
|
|
string_piece(const string& str) : str(str.c_str()), len(str.size()) {} |
569
|
|
|
|
|
|
|
}; |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
inline ostream& operator<<(ostream& os, const string_piece& str) { |
572
|
0
|
|
|
|
|
|
return os.write(str.str, str.len); |
573
|
|
|
|
|
|
|
} |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
inline bool operator==(const string_piece& a, const string_piece& b) { |
576
|
0
|
0
|
|
|
|
|
return a.len == b.len && memcmp(a.str, b.str, a.len) == 0; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
577
|
|
|
|
|
|
|
} |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
inline bool operator!=(const string_piece& a, const string_piece& b) { |
580
|
|
|
|
|
|
|
return a.len != b.len || memcmp(a.str, b.str, a.len) != 0; |
581
|
|
|
|
|
|
|
} |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
} // namespace utils |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
///////// |
586
|
|
|
|
|
|
|
// File: utils/binary_encoder.h |
587
|
|
|
|
|
|
|
///////// |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
590
|
|
|
|
|
|
|
// |
591
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
592
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
593
|
|
|
|
|
|
|
// |
594
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
595
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
596
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
namespace utils { |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
// |
601
|
|
|
|
|
|
|
// Declarations |
602
|
|
|
|
|
|
|
// |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
class binary_encoder { |
605
|
|
|
|
|
|
|
public: |
606
|
|
|
|
|
|
|
inline binary_encoder(); |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
inline void add_1B(unsigned val); |
609
|
|
|
|
|
|
|
inline void add_2B(unsigned val); |
610
|
|
|
|
|
|
|
inline void add_4B(unsigned val); |
611
|
|
|
|
|
|
|
inline void add_float(double val); |
612
|
|
|
|
|
|
|
inline void add_double(double val); |
613
|
|
|
|
|
|
|
inline void add_str(string_piece str); |
614
|
|
|
|
|
|
|
inline void add_data(string_piece data); |
615
|
|
|
|
|
|
|
template inline void add_data(const vector& data); |
616
|
|
|
|
|
|
|
template inline void add_data(const T* data, size_t elements); |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
vector data; |
619
|
|
|
|
|
|
|
}; |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
// |
622
|
|
|
|
|
|
|
// Definitions |
623
|
|
|
|
|
|
|
// |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
binary_encoder::binary_encoder() { |
626
|
|
|
|
|
|
|
data.reserve(16); |
627
|
|
|
|
|
|
|
} |
628
|
|
|
|
|
|
|
|
629
|
0
|
|
|
|
|
|
void binary_encoder::add_1B(unsigned val) { |
630
|
0
|
0
|
|
|
|
|
if (uint8_t(val) != val) runtime_failure("Should encode value " << val << " in one byte!"); |
631
|
0
|
|
|
|
|
|
data.push_back(val); |
632
|
0
|
|
|
|
|
|
} |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
void binary_encoder::add_2B(unsigned val) { |
635
|
|
|
|
|
|
|
if (uint16_t(val) != val) runtime_failure("Should encode value " << val << " in two bytes!"); |
636
|
|
|
|
|
|
|
data.insert(data.end(), (unsigned char*) &val, ((unsigned char*) &val) + sizeof(uint16_t)); |
637
|
|
|
|
|
|
|
} |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
void binary_encoder::add_4B(unsigned val) { |
640
|
|
|
|
|
|
|
if (uint32_t(val) != val) runtime_failure("Should encode value " << val << " in four bytes!"); |
641
|
0
|
|
|
|
|
|
data.insert(data.end(), (unsigned char*) &val, ((unsigned char*) &val) + sizeof(uint32_t)); |
642
|
|
|
|
|
|
|
} |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
void binary_encoder::add_float(double val) { |
645
|
|
|
|
|
|
|
data.insert(data.end(), (unsigned char*) &val, ((unsigned char*) &val) + sizeof(float)); |
646
|
|
|
|
|
|
|
} |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
void binary_encoder::add_double(double val) { |
649
|
|
|
|
|
|
|
data.insert(data.end(), (unsigned char*) &val, ((unsigned char*) &val) + sizeof(double)); |
650
|
|
|
|
|
|
|
} |
651
|
|
|
|
|
|
|
|
652
|
0
|
|
|
|
|
|
void binary_encoder::add_str(string_piece str) { |
653
|
0
|
|
|
|
|
|
add_1B(str.len < 255 ? str.len : 255); |
654
|
0
|
0
|
|
|
|
|
if (!(str.len < 255)) add_4B(str.len); |
655
|
|
|
|
|
|
|
add_data(str); |
656
|
0
|
|
|
|
|
|
} |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
void binary_encoder::add_data(string_piece data) { |
659
|
0
|
|
|
|
|
|
this->data.insert(this->data.end(), (const unsigned char*) data.str, (const unsigned char*) (data.str + data.len)); |
660
|
|
|
|
|
|
|
} |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
template |
663
|
|
|
|
|
|
|
void binary_encoder::add_data(const vector& data) { |
664
|
|
|
|
|
|
|
this->data.insert(this->data.end(), (const unsigned char*) data.data(), (const unsigned char*) (data.data() + data.size())); |
665
|
|
|
|
|
|
|
} |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
template |
668
|
|
|
|
|
|
|
void binary_encoder::add_data(const T* data, size_t elements) { |
669
|
|
|
|
|
|
|
this->data.insert(this->data.end(), (const unsigned char*) data, (const unsigned char*) (data + elements)); |
670
|
|
|
|
|
|
|
} |
671
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
} // namespace utils |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
///////// |
675
|
|
|
|
|
|
|
// File: classifier/network_classifier.h |
676
|
|
|
|
|
|
|
///////// |
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
// This file is part of NameTag . |
679
|
|
|
|
|
|
|
// |
680
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
681
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
682
|
|
|
|
|
|
|
// |
683
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
684
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
685
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
686
|
|
|
|
|
|
|
|
687
|
6
|
0
|
|
|
|
|
class network_classifier { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
688
|
|
|
|
|
|
|
public: |
689
|
|
|
|
|
|
|
bool load(istream& is); |
690
|
|
|
|
|
|
|
bool save(ostream& os); |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
bool train(unsigned features, unsigned outcomes, const vector& train, |
693
|
|
|
|
|
|
|
const vector& heldout, const network_parameters& parameters, bool verbose); |
694
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
void classify(const classifier_features& features, vector& outcomes, vector& buffer) const; |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
private: |
698
|
|
|
|
|
|
|
// Direct connections |
699
|
|
|
|
|
|
|
vector> weights; |
700
|
|
|
|
|
|
|
vector> indices; |
701
|
|
|
|
|
|
|
double missing_weight; |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
// Hidden layer, experimental use only |
704
|
|
|
|
|
|
|
vector> hidden_weights[2]; |
705
|
|
|
|
|
|
|
vector hidden_layer, hidden_error; |
706
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
// Output layer |
708
|
|
|
|
|
|
|
vector output_layer, output_error; |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
inline void propagate(const classifier_features& features); |
711
|
|
|
|
|
|
|
inline void propagate(const classifier_features& features, vector& hidden_layer, vector& output_layer) const; |
712
|
|
|
|
|
|
|
inline void backpropagate(const classifier_instance& instance, double learning_rate, double gaussian_sigma); |
713
|
|
|
|
|
|
|
inline classifier_outcome best_outcome(); |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
template void load_matrix(binary_decoder& data, vector>& m); |
716
|
|
|
|
|
|
|
template void save_matrix(binary_encoder& enc, const vector>& m); |
717
|
|
|
|
|
|
|
}; |
718
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
///////// |
720
|
|
|
|
|
|
|
// File: utils/compressor.h |
721
|
|
|
|
|
|
|
///////// |
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
724
|
|
|
|
|
|
|
// |
725
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
726
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
727
|
|
|
|
|
|
|
// |
728
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
729
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
730
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
namespace utils { |
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
class binary_decoder; |
735
|
|
|
|
|
|
|
class binary_encoder; |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
class compressor { |
738
|
|
|
|
|
|
|
public: |
739
|
|
|
|
|
|
|
static bool load(istream& is, binary_decoder& data); |
740
|
|
|
|
|
|
|
static bool save(ostream& os, const binary_encoder& enc); |
741
|
|
|
|
|
|
|
}; |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
} // namespace utils |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
///////// |
746
|
|
|
|
|
|
|
// File: utils/unaligned_access.h |
747
|
|
|
|
|
|
|
///////// |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
750
|
|
|
|
|
|
|
// |
751
|
|
|
|
|
|
|
// Copyright 2017 Institute of Formal and Applied Linguistics, Faculty of |
752
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
753
|
|
|
|
|
|
|
// |
754
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
755
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
756
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
namespace utils { |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
// |
761
|
|
|
|
|
|
|
// Declarations |
762
|
|
|
|
|
|
|
// |
763
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
template |
765
|
|
|
|
|
|
|
inline T unaligned_load(const P* ptr); |
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
template |
768
|
|
|
|
|
|
|
inline T unaligned_load_inc(const P*& ptr); |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
template |
771
|
|
|
|
|
|
|
inline void unaligned_store(P* ptr, T value); |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
template |
774
|
|
|
|
|
|
|
inline void unaligned_store_inc(P*& ptr, T value); |
775
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
template |
777
|
|
|
|
|
|
|
T* unaligned_lower_bound(T* first, size_t size, T val); |
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
template |
780
|
|
|
|
|
|
|
T* unaligned_upper_bound(T* first, size_t size, T val); |
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
// |
783
|
|
|
|
|
|
|
// Definitions |
784
|
|
|
|
|
|
|
// |
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
template |
787
|
|
|
|
|
|
|
inline T unaligned_load(const P* ptr) { |
788
|
|
|
|
|
|
|
T value; |
789
|
|
|
|
|
|
|
memcpy(&value, ptr, sizeof(T)); |
790
|
|
|
|
|
|
|
return value; |
791
|
|
|
|
|
|
|
} |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
template |
794
|
|
|
|
|
|
|
inline T unaligned_load_inc(const P*& ptr) { |
795
|
|
|
|
|
|
|
T value; |
796
|
|
|
|
|
|
|
memcpy(&value, ptr, sizeof(T)); |
797
|
0
|
|
|
|
|
|
((const char*&)ptr) += sizeof(T); |
798
|
|
|
|
|
|
|
return value; |
799
|
|
|
|
|
|
|
} |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
template |
802
|
|
|
|
|
|
|
inline void unaligned_store(P* ptr, T value) { |
803
|
|
|
|
|
|
|
memcpy(ptr, &value, sizeof(T)); |
804
|
|
|
|
|
|
|
} |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
template |
807
|
|
|
|
|
|
|
inline void unaligned_store_inc(P*& ptr, T value) { |
808
|
|
|
|
|
|
|
memcpy(ptr, &value, sizeof(T)); |
809
|
35
|
|
|
|
|
|
((char*&)ptr) += sizeof(T); |
810
|
|
|
|
|
|
|
} |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
template |
813
|
|
|
|
|
|
|
T* unaligned_lower_bound(T* first, size_t size, T val) { |
814
|
26
|
100
|
|
|
|
|
while (size) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
815
|
16
|
|
|
|
|
|
size_t step = size >> 1; |
816
|
16
|
100
|
|
|
|
|
if (unaligned_load(first + step) < val) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
817
|
6
|
|
|
|
|
|
first += step + 1; |
818
|
6
|
|
|
|
|
|
size -= step + 1; |
819
|
|
|
|
|
|
|
} else { |
820
|
|
|
|
|
|
|
size = step; |
821
|
|
|
|
|
|
|
} |
822
|
|
|
|
|
|
|
} |
823
|
|
|
|
|
|
|
return first; |
824
|
|
|
|
|
|
|
} |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
template |
827
|
|
|
|
|
|
|
T* unaligned_upper_bound(T* first, size_t size, T val) { |
828
|
|
|
|
|
|
|
while (size) { |
829
|
|
|
|
|
|
|
size_t step = size >> 1; |
830
|
|
|
|
|
|
|
if (!(val < unaligned_load(first + step))) { |
831
|
|
|
|
|
|
|
first += step + 1; |
832
|
|
|
|
|
|
|
size -= step + 1; |
833
|
|
|
|
|
|
|
} else { |
834
|
|
|
|
|
|
|
size = step; |
835
|
|
|
|
|
|
|
} |
836
|
|
|
|
|
|
|
} |
837
|
|
|
|
|
|
|
return first; |
838
|
|
|
|
|
|
|
} |
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
} // namespace utils |
841
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
///////// |
843
|
|
|
|
|
|
|
// File: classifier/network_classifier.cpp |
844
|
|
|
|
|
|
|
///////// |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
// This file is part of NameTag . |
847
|
|
|
|
|
|
|
// |
848
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
849
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
850
|
|
|
|
|
|
|
// |
851
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
852
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
853
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
854
|
|
|
|
|
|
|
|
855
|
2
|
|
|
|
|
|
bool network_classifier::load(istream& is) { |
856
|
|
|
|
|
|
|
binary_decoder data; |
857
|
2
|
50
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
50
|
|
|
|
|
|
858
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
try { |
860
|
|
|
|
|
|
|
// Direct connections |
861
|
2
|
50
|
|
|
|
|
load_matrix(data, indices); |
862
|
2
|
50
|
|
|
|
|
missing_weight = unaligned_load(data.next(1)); |
863
|
2
|
50
|
|
|
|
|
load_matrix(data, weights); |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
// Hidden layer |
866
|
2
|
|
|
|
|
|
hidden_weights[0].clear(); |
867
|
2
|
|
|
|
|
|
hidden_weights[1].clear(); |
868
|
2
|
50
|
|
|
|
|
hidden_layer.resize(data.next_2B()); |
|
|
50
|
|
|
|
|
|
869
|
2
|
50
|
|
|
|
|
if (!hidden_layer.empty()) { |
870
|
0
|
0
|
|
|
|
|
load_matrix(data, hidden_weights[0]); |
871
|
0
|
0
|
|
|
|
|
load_matrix(data, hidden_weights[1]); |
872
|
|
|
|
|
|
|
} |
873
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
// Output layer |
875
|
2
|
50
|
|
|
|
|
unsigned outcomes = data.next_2B(); |
876
|
2
|
50
|
|
|
|
|
output_layer.resize(outcomes); |
877
|
2
|
50
|
|
|
|
|
output_error.resize(outcomes); |
|
|
0
|
|
|
|
|
|
878
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
879
|
|
|
|
|
|
|
return false; |
880
|
|
|
|
|
|
|
} |
881
|
|
|
|
|
|
|
|
882
|
2
|
|
|
|
|
|
return data.is_end(); |
883
|
|
|
|
|
|
|
} |
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
template |
886
|
4
|
|
|
|
|
|
void network_classifier::load_matrix(binary_decoder& data, vector>& m) { |
887
|
4
|
|
|
|
|
|
m.resize(data.next_4B()); |
888
|
664
|
100
|
|
|
|
|
for (auto&& row : m) { |
|
|
100
|
|
|
|
|
|
889
|
660
|
|
|
|
|
|
row.resize(data.next_2B()); |
890
|
660
|
|
|
|
|
|
if (!row.empty()) |
891
|
232
|
|
|
|
|
|
memcpy((unsigned char*) row.data(), data.next(row.size()), row.size() * sizeof(T)); |
892
|
|
|
|
|
|
|
} |
893
|
4
|
|
|
|
|
|
} |
894
|
|
|
|
|
|
|
|
895
|
0
|
|
|
|
|
|
bool network_classifier::train(unsigned features, unsigned outcomes, const vector& train, |
896
|
|
|
|
|
|
|
const vector& heldout, const network_parameters& parameters, bool verbose) { |
897
|
|
|
|
|
|
|
// Assertions |
898
|
0
|
0
|
|
|
|
|
if (features <= 0) { if (verbose) cerr << "There must be more than zero features!" << endl; return false; } |
|
|
0
|
|
|
|
|
|
899
|
0
|
0
|
|
|
|
|
if (outcomes <= 0) { if (verbose) cerr << "There must be more than zero features!" << endl; return false; } |
|
|
0
|
|
|
|
|
|
900
|
0
|
0
|
|
|
|
|
if (train.empty()) { if (verbose) cerr << "No training data!" << endl; return false; } |
|
|
0
|
|
|
|
|
|
901
|
0
|
0
|
|
|
|
|
for (auto&& instance : train) { |
902
|
0
|
0
|
|
|
|
|
if (instance.outcome >= outcomes) { if (verbose) cerr << "Training instances out of range!" << endl; return false; } |
|
|
0
|
|
|
|
|
|
903
|
0
|
0
|
|
|
|
|
for(auto& feature : instance.features) |
904
|
0
|
0
|
|
|
|
|
if (feature >= features) { if (verbose) cerr << "Training instances out of range!" << endl; return false; } |
|
|
0
|
|
|
|
|
|
905
|
|
|
|
|
|
|
} |
906
|
0
|
0
|
|
|
|
|
for (auto&& instance : heldout) |
907
|
0
|
0
|
|
|
|
|
for(auto& feature : instance.features) |
908
|
0
|
0
|
|
|
|
|
if (feature >= features) { if (verbose) cerr << "Heldout instances out of range!" << endl; return false; } |
|
|
0
|
|
|
|
|
|
909
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
mt19937 generator(42); |
911
|
|
|
|
|
|
|
uniform_real_distribution uniform(-0.1f, 0.1f); |
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
// Compute indices from existing feature-outcome pairs |
914
|
0
|
|
|
|
|
|
indices.clear(); |
915
|
0
|
|
|
|
|
|
indices.resize(features); |
916
|
0
|
0
|
|
|
|
|
for (auto&& instance : train) |
917
|
0
|
0
|
|
|
|
|
for (auto&& feature : instance.features) |
918
|
0
|
|
|
|
|
|
indices[feature].emplace_back(instance.outcome); |
919
|
|
|
|
|
|
|
|
920
|
0
|
0
|
|
|
|
|
for (auto&& row : indices) { |
921
|
|
|
|
|
|
|
sort(row.begin(), row.end()); |
922
|
0
|
|
|
|
|
|
row.resize(unique(row.begin(), row.end()) - row.begin()); |
923
|
|
|
|
|
|
|
} |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
// Initialize direct connections |
926
|
0
|
|
|
|
|
|
weights.clear(); |
927
|
0
|
0
|
|
|
|
|
for (auto&& row : indices) |
928
|
0
|
|
|
|
|
|
weights.emplace_back(row.size()); |
929
|
0
|
|
|
|
|
|
missing_weight = parameters.missing_weight; |
930
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
// Initialize hidden layer |
932
|
0
|
|
|
|
|
|
hidden_layer.resize(parameters.hidden_layer); |
933
|
0
|
0
|
|
|
|
|
if (!hidden_layer.empty()) { |
934
|
0
|
|
|
|
|
|
hidden_error.resize(hidden_layer.size()); |
935
|
|
|
|
|
|
|
|
936
|
0
|
|
|
|
|
|
hidden_weights[0].resize(features); |
937
|
0
|
0
|
|
|
|
|
for (auto&& row : hidden_weights[0]) |
938
|
0
|
0
|
|
|
|
|
for (auto&& weight : row.resize(hidden_layer.size()), row) |
939
|
0
|
|
|
|
|
|
weight = uniform(generator) + uniform(generator) + uniform(generator); |
940
|
|
|
|
|
|
|
|
941
|
0
|
|
|
|
|
|
hidden_weights[1].resize(hidden_layer.size()); |
942
|
0
|
0
|
|
|
|
|
for (auto&& row : hidden_weights[1]) |
943
|
0
|
0
|
|
|
|
|
for (auto&& weight : row.resize(outcomes), row) |
944
|
0
|
|
|
|
|
|
weight = uniform(generator) + uniform(generator) + uniform(generator); |
945
|
|
|
|
|
|
|
} |
946
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
// Initialize output layer |
948
|
0
|
|
|
|
|
|
output_layer.resize(outcomes); |
949
|
0
|
|
|
|
|
|
output_error.resize(outcomes); |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
// Normalize gaussian_sigma |
952
|
0
|
|
|
|
|
|
double gaussian_sigma = parameters.gaussian_sigma / train.size(); |
953
|
|
|
|
|
|
|
|
954
|
|
|
|
|
|
|
// Train |
955
|
|
|
|
|
|
|
vector permutation; |
956
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < train.size(); i++) |
957
|
0
|
|
|
|
|
|
permutation.push_back(i); |
958
|
|
|
|
|
|
|
|
959
|
0
|
0
|
|
|
|
|
for (int iteration = 0; iteration < parameters.iterations; iteration++) { |
960
|
0
|
0
|
|
|
|
|
if (verbose) cerr << "Iteration " << iteration + 1 << ": "; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
961
|
|
|
|
|
|
|
|
962
|
0
|
0
|
|
|
|
|
double learning_rate = parameters.final_learning_rate && parameters.iterations > 1 ? |
963
|
0
|
|
|
|
|
|
exp(((parameters.iterations - 1 - iteration) * log(parameters.initial_learning_rate) + iteration * log(parameters.final_learning_rate)) / (parameters.iterations-1)) : |
964
|
0
|
0
|
|
|
|
|
parameters.initial_learning_rate; |
965
|
|
|
|
|
|
|
double logprob = 0; |
966
|
|
|
|
|
|
|
int training_correct = 0; |
967
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
// Process instances in random order |
969
|
0
|
|
|
|
|
|
shuffle(permutation.begin(), permutation.end(), generator); |
970
|
0
|
0
|
|
|
|
|
for (auto&& train_index : permutation) { |
971
|
0
|
|
|
|
|
|
auto& instance = train[train_index]; |
972
|
0
|
|
|
|
|
|
propagate(instance.features); |
973
|
|
|
|
|
|
|
|
974
|
|
|
|
|
|
|
// Update logprob and training_correct |
975
|
0
|
|
|
|
|
|
logprob += log(output_layer[instance.outcome]); |
976
|
0
|
|
|
|
|
|
training_correct += best_outcome() == instance.outcome; |
977
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
// Improve network weights according to correct outcome |
979
|
0
|
|
|
|
|
|
backpropagate(instance, learning_rate, gaussian_sigma); |
980
|
|
|
|
|
|
|
} |
981
|
0
|
0
|
|
|
|
|
if (verbose) |
982
|
|
|
|
|
|
|
cerr << "a " << fixed << setprecision(3) << learning_rate |
983
|
|
|
|
|
|
|
<< ", logprob " << scientific << setprecision(4) << logprob |
984
|
0
|
|
|
|
|
|
<< ", training acc " << fixed << setprecision(2) << training_correct * 100. / train.size() |
985
|
0
|
0
|
|
|
|
|
<< "%, "; |
986
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
// Evaluate heldout accuracy if heldout data are present |
988
|
0
|
0
|
|
|
|
|
if (!heldout.empty()) { |
989
|
|
|
|
|
|
|
int heldout_correct = 0; |
990
|
0
|
0
|
|
|
|
|
for (auto&& instance : heldout) { |
991
|
0
|
|
|
|
|
|
propagate(instance.features); |
992
|
0
|
|
|
|
|
|
heldout_correct += best_outcome() == instance.outcome; |
993
|
|
|
|
|
|
|
} |
994
|
0
|
0
|
|
|
|
|
if (verbose) cerr << "heldout acc " << heldout_correct * 100. / heldout.size() << ", "; |
|
|
0
|
|
|
|
|
|
995
|
|
|
|
|
|
|
} |
996
|
0
|
0
|
|
|
|
|
if (verbose) cerr << "done." << endl; |
997
|
|
|
|
|
|
|
} |
998
|
|
|
|
|
|
|
return true; |
999
|
|
|
|
|
|
|
} |
1000
|
|
|
|
|
|
|
|
1001
|
14
|
|
|
|
|
|
void network_classifier::classify(const classifier_features& features, vector& outcomes, vector& buffer) const { |
1002
|
14
|
100
|
|
|
|
|
if (outcomes.size() != output_layer.size()) outcomes.resize(output_layer.size()); |
1003
|
14
|
50
|
|
|
|
|
if (buffer.size() != hidden_layer.size()) buffer.resize(hidden_layer.size()); |
1004
|
|
|
|
|
|
|
|
1005
|
|
|
|
|
|
|
// Propagation |
1006
|
14
|
|
|
|
|
|
propagate(features, buffer, outcomes); |
1007
|
14
|
|
|
|
|
|
} |
1008
|
|
|
|
|
|
|
|
1009
|
|
|
|
|
|
|
void network_classifier::propagate(const classifier_features& features) { |
1010
|
0
|
0
|
|
|
|
|
propagate(features, hidden_layer, output_layer); |
|
|
0
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
} |
1012
|
|
|
|
|
|
|
|
1013
|
14
|
|
|
|
|
|
void network_classifier::propagate(const classifier_features& features, vector& hidden_layer, vector& output_layer) const { |
1014
|
14
|
|
|
|
|
|
output_layer.assign(output_layer.size(), features.size() * missing_weight); |
1015
|
|
|
|
|
|
|
|
1016
|
|
|
|
|
|
|
// Direct connections |
1017
|
305
|
100
|
|
|
|
|
for (auto&& feature : features) |
1018
|
291
|
50
|
|
|
|
|
if (feature < indices.size()) |
1019
|
705
|
100
|
|
|
|
|
for (unsigned i = 0; i < indices[feature].size(); i++) |
1020
|
1242
|
|
|
|
|
|
output_layer[indices[feature][i]] += weights[feature][i] - missing_weight; |
1021
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
// Hidden layer |
1023
|
14
|
50
|
|
|
|
|
if (!hidden_layer.empty()) { |
1024
|
0
|
0
|
|
|
|
|
for (auto&& weight : hidden_layer) |
1025
|
0
|
|
|
|
|
|
weight = 0; |
1026
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
// Propagate to hidden layer |
1028
|
0
|
0
|
|
|
|
|
for (auto&& feature : features) |
1029
|
0
|
0
|
|
|
|
|
if (feature < hidden_weights[0].size()) |
1030
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < hidden_layer.size(); i++) { |
1031
|
0
|
|
|
|
|
|
hidden_layer[i] += hidden_weights[0][feature][i]; |
1032
|
|
|
|
|
|
|
} |
1033
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
// Apply logistic sigmoid to hidden layer |
1035
|
0
|
0
|
|
|
|
|
for (auto&& weight : hidden_layer) |
1036
|
0
|
|
|
|
|
|
weight = 1 / (1 + exp(-weight)); |
1037
|
|
|
|
|
|
|
|
1038
|
|
|
|
|
|
|
// Propagate to output_layer |
1039
|
14
|
0
|
|
|
|
|
for (unsigned h = 0; h < hidden_layer.size(); h++) |
1040
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < output_layer.size(); i++) |
1041
|
0
|
|
|
|
|
|
output_layer[i] += hidden_layer[h] * hidden_weights[1][h][i]; |
1042
|
|
|
|
|
|
|
} |
1043
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
// Apply softmax sigmoid to output_layer layer |
1045
|
|
|
|
|
|
|
double sum = 0; |
1046
|
140
|
100
|
|
|
|
|
for (unsigned i = 0; i < output_layer.size(); sum += output_layer[i], i++) |
1047
|
126
|
|
|
|
|
|
output_layer[i] = exp(output_layer[i]); |
1048
|
14
|
|
|
|
|
|
sum = 1 / sum; |
1049
|
140
|
100
|
|
|
|
|
for (unsigned i = 0; i < output_layer.size(); i++) |
1050
|
126
|
|
|
|
|
|
output_layer[i] *= sum; |
1051
|
14
|
|
|
|
|
|
} |
1052
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
classifier_outcome network_classifier::best_outcome() { |
1054
|
|
|
|
|
|
|
classifier_outcome best = 0; |
1055
|
0
|
0
|
|
|
|
|
for (unsigned i = 1; i < output_layer.size(); i++) |
|
|
0
|
|
|
|
|
|
1056
|
0
|
0
|
|
|
|
|
if (output_layer[i] > output_layer[best]) |
|
|
0
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
best = i; |
1058
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
return best; |
1060
|
|
|
|
|
|
|
} |
1061
|
|
|
|
|
|
|
|
1062
|
0
|
|
|
|
|
|
void network_classifier::backpropagate(const classifier_instance& instance, double learning_rate, double gaussian_sigma) { |
1063
|
|
|
|
|
|
|
// Compute error vector |
1064
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < output_error.size(); i++) |
1065
|
0
|
0
|
|
|
|
|
output_error[i] = (i == instance.outcome) - output_layer[i]; |
1066
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
// Update direct connections |
1068
|
0
|
0
|
|
|
|
|
for (auto&& feature : instance.features) |
1069
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < indices[feature].size(); i++) |
1070
|
0
|
|
|
|
|
|
weights[feature][i] += learning_rate * output_error[indices[feature][i]] - weights[feature][i] * gaussian_sigma; |
1071
|
|
|
|
|
|
|
|
1072
|
|
|
|
|
|
|
// Update hidden layer |
1073
|
0
|
0
|
|
|
|
|
if (!hidden_layer.empty()) { |
1074
|
|
|
|
|
|
|
// Backpropagate output_error into hidden_error |
1075
|
0
|
0
|
|
|
|
|
for (unsigned h = 0; h < hidden_layer.size(); h++) { |
1076
|
0
|
|
|
|
|
|
hidden_error[h] = 0; |
1077
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < output_layer.size(); i++) |
1078
|
0
|
|
|
|
|
|
hidden_error[h] += hidden_weights[1][h][i] * output_error[i]; |
1079
|
0
|
|
|
|
|
|
hidden_error[h] *= hidden_layer[h] * (1-hidden_layer[h]); |
1080
|
|
|
|
|
|
|
} |
1081
|
|
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
// Update hidden_weights[1] |
1083
|
0
|
0
|
|
|
|
|
for (unsigned h = 0; h < hidden_layer.size(); h++) |
1084
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < output_layer.size(); i++) |
1085
|
0
|
|
|
|
|
|
hidden_weights[1][h][i] += learning_rate * hidden_layer[h] * output_error[i] - hidden_weights[1][h][i] * gaussian_sigma; |
1086
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
// Update hidden_weights[0] |
1088
|
0
|
0
|
|
|
|
|
for (auto&& feature : instance.features) |
1089
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < hidden_layer.size(); i++) |
1090
|
0
|
|
|
|
|
|
hidden_weights[0][feature][i] += learning_rate * hidden_error[i] - hidden_weights[0][feature][i] * gaussian_sigma; |
1091
|
|
|
|
|
|
|
} |
1092
|
0
|
|
|
|
|
|
} |
1093
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
///////// |
1095
|
|
|
|
|
|
|
// File: ner/entity_map.h |
1096
|
|
|
|
|
|
|
///////// |
1097
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
// This file is part of NameTag . |
1099
|
|
|
|
|
|
|
// |
1100
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
1101
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1102
|
|
|
|
|
|
|
// |
1103
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1104
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1105
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1106
|
|
|
|
|
|
|
|
1107
|
0
|
|
|
|
|
|
class entity_map { |
1108
|
|
|
|
|
|
|
public: |
1109
|
|
|
|
|
|
|
entity_type parse(const char* str, bool add_entity = false) const; |
1110
|
|
|
|
|
|
|
const string& name(entity_type entity) const; |
1111
|
|
|
|
|
|
|
|
1112
|
|
|
|
|
|
|
bool load(istream& is); |
1113
|
|
|
|
|
|
|
bool save(ostream& os) const; |
1114
|
|
|
|
|
|
|
|
1115
|
|
|
|
|
|
|
entity_type size() const; |
1116
|
|
|
|
|
|
|
private: |
1117
|
|
|
|
|
|
|
mutable unordered_map str2id; |
1118
|
|
|
|
|
|
|
mutable vector id2str; |
1119
|
|
|
|
|
|
|
string empty; |
1120
|
|
|
|
|
|
|
}; |
1121
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
///////// |
1123
|
|
|
|
|
|
|
// File: tokenizer/tokenizer.h |
1124
|
|
|
|
|
|
|
///////// |
1125
|
|
|
|
|
|
|
|
1126
|
|
|
|
|
|
|
// This file is part of NameTag . |
1127
|
|
|
|
|
|
|
// |
1128
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
1129
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1130
|
|
|
|
|
|
|
// |
1131
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1132
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1133
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1134
|
|
|
|
|
|
|
|
1135
|
|
|
|
|
|
|
// Range of a token, measured in Unicode characters, not UTF8 bytes. |
1136
|
|
|
|
|
|
|
// It must be in sync with morphodita::token_range. |
1137
|
|
|
|
|
|
|
struct token_range { |
1138
|
|
|
|
|
|
|
size_t start; |
1139
|
|
|
|
|
|
|
size_t length; |
1140
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
token_range() {} |
1142
|
|
|
|
|
|
|
token_range(size_t start, size_t length) : start(start), length(length) {} |
1143
|
|
|
|
|
|
|
}; |
1144
|
|
|
|
|
|
|
|
1145
|
4
|
|
|
|
|
|
class tokenizer { |
1146
|
|
|
|
|
|
|
public: |
1147
|
4
|
|
|
|
|
|
virtual ~tokenizer() {} |
1148
|
|
|
|
|
|
|
|
1149
|
|
|
|
|
|
|
virtual void set_text(string_piece text, bool make_copy = false) = 0; |
1150
|
|
|
|
|
|
|
virtual bool next_sentence(vector* forms, vector* tokens) = 0; |
1151
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
// Static factory method |
1153
|
|
|
|
|
|
|
static tokenizer* new_vertical_tokenizer(); |
1154
|
|
|
|
|
|
|
}; |
1155
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
///////// |
1157
|
|
|
|
|
|
|
// File: ner/ner.h |
1158
|
|
|
|
|
|
|
///////// |
1159
|
|
|
|
|
|
|
|
1160
|
|
|
|
|
|
|
// This file is part of NameTag . |
1161
|
|
|
|
|
|
|
// |
1162
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
1163
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1164
|
|
|
|
|
|
|
// |
1165
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1166
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1167
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1168
|
|
|
|
|
|
|
|
1169
|
0
|
|
|
|
|
|
struct named_entity { |
1170
|
|
|
|
|
|
|
size_t start; |
1171
|
|
|
|
|
|
|
size_t length; |
1172
|
|
|
|
|
|
|
string type; |
1173
|
|
|
|
|
|
|
|
1174
|
|
|
|
|
|
|
named_entity() {} |
1175
|
3
|
|
|
|
|
|
named_entity(size_t start, size_t length, const string& type) : start(start), length(length), type(type) {} |
1176
|
|
|
|
|
|
|
}; |
1177
|
|
|
|
|
|
|
|
1178
|
1
|
|
|
|
|
|
class ner { |
1179
|
|
|
|
|
|
|
public: |
1180
|
0
|
|
|
|
|
|
virtual ~ner() {} |
1181
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
static ner* load(const char* fname); |
1183
|
|
|
|
|
|
|
static ner* load(istream& is); |
1184
|
|
|
|
|
|
|
|
1185
|
|
|
|
|
|
|
// Perform named entity recognition on a tokenizes sentence and return found |
1186
|
|
|
|
|
|
|
// named entities in the given vector. |
1187
|
|
|
|
|
|
|
virtual void recognize(const vector& forms, vector& entities) const = 0; |
1188
|
|
|
|
|
|
|
|
1189
|
|
|
|
|
|
|
// Return the possible entity types |
1190
|
|
|
|
|
|
|
virtual void entity_types(vector& types) const = 0; |
1191
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
// Return gazetteers used by the recognizer, if any, optionally with the index of entity type |
1193
|
|
|
|
|
|
|
virtual void gazetteers(vector& gazetteers, vector* gazetteer_types) const = 0; |
1194
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
// Construct a new tokenizer instance appropriate for this recognizer. |
1196
|
|
|
|
|
|
|
// Can return NULL if no such tokenizer exists. |
1197
|
|
|
|
|
|
|
virtual tokenizer* new_tokenizer() const = 0; |
1198
|
|
|
|
|
|
|
}; |
1199
|
|
|
|
|
|
|
|
1200
|
|
|
|
|
|
|
///////// |
1201
|
|
|
|
|
|
|
// File: tagger/tagger_ids.h |
1202
|
|
|
|
|
|
|
///////// |
1203
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
// This file is part of NameTag . |
1205
|
|
|
|
|
|
|
// |
1206
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
1207
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1208
|
|
|
|
|
|
|
// |
1209
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1210
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1211
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1212
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
class tagger_ids { |
1214
|
|
|
|
|
|
|
public: |
1215
|
|
|
|
|
|
|
enum tagger_id { TRIVIAL, EXTERNAL, MORPHODITA }; |
1216
|
|
|
|
|
|
|
|
1217
|
0
|
|
|
|
|
|
static bool parse(const string& str, tagger_id& id) { |
1218
|
0
|
0
|
|
|
|
|
if (str == "trivial") return id = TRIVIAL, true; |
1219
|
0
|
0
|
|
|
|
|
if (str == "external") return id = EXTERNAL, true; |
1220
|
0
|
0
|
|
|
|
|
if (str == "morphodita") return id = MORPHODITA, true; |
1221
|
|
|
|
|
|
|
return false; |
1222
|
|
|
|
|
|
|
} |
1223
|
|
|
|
|
|
|
}; |
1224
|
|
|
|
|
|
|
|
1225
|
|
|
|
|
|
|
typedef tagger_ids::tagger_id tagger_id; |
1226
|
|
|
|
|
|
|
|
1227
|
|
|
|
|
|
|
///////// |
1228
|
|
|
|
|
|
|
// File: tagger/tagger.h |
1229
|
|
|
|
|
|
|
///////// |
1230
|
|
|
|
|
|
|
|
1231
|
|
|
|
|
|
|
// This file is part of NameTag . |
1232
|
|
|
|
|
|
|
// |
1233
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
1234
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1235
|
|
|
|
|
|
|
// |
1236
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1237
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1238
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1239
|
|
|
|
|
|
|
|
1240
|
1
|
|
|
|
|
|
class tagger { |
1241
|
|
|
|
|
|
|
public: |
1242
|
0
|
|
|
|
|
|
virtual ~tagger() {} |
1243
|
|
|
|
|
|
|
|
1244
|
|
|
|
|
|
|
virtual void tag(const vector& forms, ner_sentence& sentence) const = 0; |
1245
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
// Factory methods |
1247
|
|
|
|
|
|
|
static tagger* load_instance(istream& is); |
1248
|
|
|
|
|
|
|
static tagger* create_and_encode_instance(const string& tagger_id_and_params, ostream& os); |
1249
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
protected: |
1251
|
|
|
|
|
|
|
virtual bool load(istream& is) = 0; |
1252
|
|
|
|
|
|
|
virtual bool create_and_encode(const string& params, ostream& os) = 0; |
1253
|
|
|
|
|
|
|
|
1254
|
|
|
|
|
|
|
private: |
1255
|
|
|
|
|
|
|
static tagger* create(tagger_id id); |
1256
|
|
|
|
|
|
|
}; |
1257
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
///////// |
1259
|
|
|
|
|
|
|
// File: features/nlp_pipeline.h |
1260
|
|
|
|
|
|
|
///////// |
1261
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
// This file is part of NameTag . |
1263
|
|
|
|
|
|
|
// |
1264
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
1265
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1266
|
|
|
|
|
|
|
// |
1267
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1268
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1269
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1270
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
struct nlp_pipeline { |
1272
|
|
|
|
|
|
|
ufal::nametag::tokenizer* tokenizer; |
1273
|
|
|
|
|
|
|
const ufal::nametag::tagger* tagger; |
1274
|
|
|
|
|
|
|
|
1275
|
1
|
|
|
|
|
|
nlp_pipeline(ufal::nametag::tokenizer* tokenizer, const ufal::nametag::tagger* tagger) : tokenizer(tokenizer), tagger(tagger) {} |
1276
|
|
|
|
|
|
|
}; |
1277
|
|
|
|
|
|
|
|
1278
|
|
|
|
|
|
|
///////// |
1279
|
|
|
|
|
|
|
// File: features/feature_processor.h |
1280
|
|
|
|
|
|
|
///////// |
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
// This file is part of NameTag . |
1283
|
|
|
|
|
|
|
// |
1284
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
1285
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1286
|
|
|
|
|
|
|
// |
1287
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1288
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1289
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1290
|
|
|
|
|
|
|
|
1291
|
8
|
|
|
|
|
|
class feature_processor { |
1292
|
|
|
|
|
|
|
public: |
1293
|
|
|
|
|
|
|
virtual ~feature_processor(); |
1294
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
virtual bool parse(int window, const vector& args, entity_map& entities, |
1296
|
|
|
|
|
|
|
ner_feature* total_features, const nlp_pipeline& pipeline); |
1297
|
|
|
|
|
|
|
virtual void load(binary_decoder& data, const nlp_pipeline& pipeline); |
1298
|
|
|
|
|
|
|
virtual void save(binary_encoder& enc); |
1299
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& buffer) const; |
1301
|
|
|
|
|
|
|
virtual void process_entities(ner_sentence& sentence, vector& entities, vector& buffer) const; |
1302
|
|
|
|
|
|
|
|
1303
|
|
|
|
|
|
|
virtual void gazetteers(vector& gazetteers, vector* gazetteer_types) const; |
1304
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
protected: |
1306
|
|
|
|
|
|
|
int window; |
1307
|
|
|
|
|
|
|
|
1308
|
99
|
|
|
|
|
|
inline ner_feature lookup(const string& key, ner_feature* total_features) const { |
1309
|
|
|
|
|
|
|
auto it = map.find(key); |
1310
|
99
|
100
|
|
|
|
|
if (it == map.end() && total_features) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1311
|
0
|
|
|
|
|
|
it = map.emplace(key, window + *total_features).first; |
1312
|
0
|
|
|
|
|
|
*total_features += 2*window + 1; |
1313
|
|
|
|
|
|
|
} |
1314
|
99
|
100
|
|
|
|
|
return it != map.end() ? it->second : ner_feature_unknown; |
1315
|
|
|
|
|
|
|
} |
1316
|
|
|
|
|
|
|
|
1317
|
|
|
|
|
|
|
mutable unordered_map map; |
1318
|
|
|
|
|
|
|
|
1319
|
|
|
|
|
|
|
// Factory method |
1320
|
|
|
|
|
|
|
public: |
1321
|
|
|
|
|
|
|
static feature_processor* create(const string& name); |
1322
|
|
|
|
|
|
|
}; |
1323
|
|
|
|
|
|
|
|
1324
|
|
|
|
|
|
|
///////// |
1325
|
|
|
|
|
|
|
// File: features/feature_processor.cpp |
1326
|
|
|
|
|
|
|
///////// |
1327
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
// This file is part of NameTag . |
1329
|
|
|
|
|
|
|
// |
1330
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
1331
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1332
|
|
|
|
|
|
|
// |
1333
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1334
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1335
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1336
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
// Feature processor -- methods and virtual methods |
1338
|
0
|
|
|
|
|
|
feature_processor::~feature_processor() {} |
1339
|
|
|
|
|
|
|
|
1340
|
0
|
|
|
|
|
|
bool feature_processor::parse(int window, const vector& /*args*/, entity_map& /*entities*/, |
1341
|
|
|
|
|
|
|
ner_feature* total_features, const nlp_pipeline& /*pipeline*/) { |
1342
|
0
|
0
|
|
|
|
|
if (window < 0) return false; |
1343
|
0
|
0
|
|
|
|
|
if (!total_features) return false; |
1344
|
|
|
|
|
|
|
|
1345
|
0
|
|
|
|
|
|
this->window = window; |
1346
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
map.clear(); |
1348
|
0
|
0
|
|
|
|
|
lookup(string(), total_features); // Always add an empty string to the map |
1349
|
|
|
|
|
|
|
|
1350
|
0
|
|
|
|
|
|
return true; |
1351
|
|
|
|
|
|
|
} |
1352
|
|
|
|
|
|
|
|
1353
|
8
|
|
|
|
|
|
void feature_processor::load(binary_decoder& data, const nlp_pipeline& /*pipeline*/) { |
1354
|
8
|
|
|
|
|
|
window = data.next_4B(); |
1355
|
|
|
|
|
|
|
|
1356
|
|
|
|
|
|
|
map.clear(); |
1357
|
8
|
|
|
|
|
|
map.rehash(data.next_4B()); |
1358
|
43
|
100
|
|
|
|
|
for (unsigned i = data.next_4B(); i > 0; i--) { |
1359
|
|
|
|
|
|
|
string key; |
1360
|
35
|
50
|
|
|
|
|
data.next_str(key); |
1361
|
35
|
50
|
|
|
|
|
map.emplace(key, data.next_4B()); |
1362
|
|
|
|
|
|
|
} |
1363
|
8
|
|
|
|
|
|
} |
1364
|
|
|
|
|
|
|
|
1365
|
0
|
|
|
|
|
|
void feature_processor::save(binary_encoder& enc) { |
1366
|
0
|
|
|
|
|
|
enc.add_4B(window); |
1367
|
|
|
|
|
|
|
|
1368
|
0
|
|
|
|
|
|
enc.add_4B(map.bucket_count()); |
1369
|
0
|
|
|
|
|
|
enc.add_4B(map.size()); |
1370
|
|
|
|
|
|
|
|
1371
|
0
|
|
|
|
|
|
vector> map_elements(map.begin(), map.end()); |
1372
|
|
|
|
|
|
|
sort(map_elements.begin(), map_elements.end()); |
1373
|
0
|
0
|
|
|
|
|
for (auto&& element : map_elements) { |
1374
|
0
|
0
|
|
|
|
|
enc.add_str(element.first); |
1375
|
0
|
|
|
|
|
|
enc.add_4B(element.second); |
1376
|
|
|
|
|
|
|
} |
1377
|
0
|
|
|
|
|
|
} |
1378
|
|
|
|
|
|
|
|
1379
|
0
|
|
|
|
|
|
void feature_processor::process_sentence(ner_sentence& /*sentence*/, ner_feature* /*total_features*/, string& /*buffer*/) const {} |
1380
|
|
|
|
|
|
|
|
1381
|
16
|
|
|
|
|
|
void feature_processor::process_entities(ner_sentence& /*sentence*/, vector& /*entities*/, vector& /*buffer*/) const {} |
1382
|
|
|
|
|
|
|
|
1383
|
0
|
|
|
|
|
|
void feature_processor::gazetteers(vector& /*gazetteers*/, vector* /*gazetteer_types*/) const {} |
1384
|
|
|
|
|
|
|
|
1385
|
|
|
|
|
|
|
///////// |
1386
|
|
|
|
|
|
|
// File: unilib/unicode.h |
1387
|
|
|
|
|
|
|
///////// |
1388
|
|
|
|
|
|
|
|
1389
|
|
|
|
|
|
|
// This file is part of UniLib . |
1390
|
|
|
|
|
|
|
// |
1391
|
|
|
|
|
|
|
// Copyright 2014 Institute of Formal and Applied Linguistics, Faculty of |
1392
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1393
|
|
|
|
|
|
|
// |
1394
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1395
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1396
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1397
|
|
|
|
|
|
|
// |
1398
|
|
|
|
|
|
|
// UniLib version: 3.3.0 |
1399
|
|
|
|
|
|
|
// Unicode version: 15.0.0 |
1400
|
|
|
|
|
|
|
|
1401
|
|
|
|
|
|
|
namespace unilib { |
1402
|
|
|
|
|
|
|
|
1403
|
|
|
|
|
|
|
class unicode { |
1404
|
|
|
|
|
|
|
enum : uint8_t { |
1405
|
|
|
|
|
|
|
_Lu = 1, _Ll = 2, _Lt = 3, _Lm = 4, _Lo = 5, |
1406
|
|
|
|
|
|
|
_Mn = 6, _Mc = 7, _Me = 8, |
1407
|
|
|
|
|
|
|
_Nd = 9, _Nl = 10, _No = 11, |
1408
|
|
|
|
|
|
|
_Pc = 12, _Pd = 13, _Ps = 14, _Pe = 15, _Pi = 16, _Pf = 17, _Po = 18, |
1409
|
|
|
|
|
|
|
_Sm = 19, _Sc = 20, _Sk = 21, _So = 22, |
1410
|
|
|
|
|
|
|
_Zs = 23, _Zl = 24, _Zp = 25, |
1411
|
|
|
|
|
|
|
_Cc = 26, _Cf = 27, _Cs = 28, _Co = 29, _Cn = 30 |
1412
|
|
|
|
|
|
|
}; |
1413
|
|
|
|
|
|
|
|
1414
|
|
|
|
|
|
|
public: |
1415
|
|
|
|
|
|
|
typedef uint32_t category_t; |
1416
|
|
|
|
|
|
|
enum : category_t { |
1417
|
|
|
|
|
|
|
Lu = 1 << _Lu, Ll = 1 << _Ll, Lt = 1 << _Lt, Lut = Lu | Lt, LC = Lu | Ll | Lt, |
1418
|
|
|
|
|
|
|
Lm = 1 << _Lm, Lo = 1 << _Lo, L = Lu | Ll | Lt | Lm | Lo, |
1419
|
|
|
|
|
|
|
Mn = 1 << _Mn, Mc = 1 << _Mc, Me = 1 << _Me, M = Mn | Mc | Me, |
1420
|
|
|
|
|
|
|
Nd = 1 << _Nd, Nl = 1 << _Nl, No = 1 << _No, N = Nd | Nl | No, |
1421
|
|
|
|
|
|
|
Pc = 1 << _Pc, Pd = 1 << _Pd, Ps = 1 << _Ps, Pe = 1 << _Pe, Pi = 1 << _Pi, |
1422
|
|
|
|
|
|
|
Pf = 1 << _Pf, Po = 1 << _Po, P = Pc | Pd | Ps | Pe | Pi | Pf | Po, |
1423
|
|
|
|
|
|
|
Sm = 1 << _Sm, Sc = 1 << _Sc, Sk = 1 << _Sk, So = 1 << _So, S = Sm | Sc | Sk | So, |
1424
|
|
|
|
|
|
|
Zs = 1 << _Zs, Zl = 1 << _Zl, Zp = 1 << _Zp, Z = Zs | Zl | Zp, |
1425
|
|
|
|
|
|
|
Cc = 1 << _Cc, Cf = 1 << _Cf, Cs = 1 << _Cs, Co = 1 << _Co, Cn = 1 << _Cn, C = Cc | Cf | Cs | Co | Cn |
1426
|
|
|
|
|
|
|
}; |
1427
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
static inline category_t category(char32_t chr); |
1429
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
static inline char32_t lowercase(char32_t chr); |
1431
|
|
|
|
|
|
|
static inline char32_t uppercase(char32_t chr); |
1432
|
|
|
|
|
|
|
static inline char32_t titlecase(char32_t chr); |
1433
|
|
|
|
|
|
|
|
1434
|
|
|
|
|
|
|
private: |
1435
|
|
|
|
|
|
|
static const char32_t CHARS = 0x110000; |
1436
|
|
|
|
|
|
|
static const int32_t DEFAULT_CAT = Cn; |
1437
|
|
|
|
|
|
|
|
1438
|
|
|
|
|
|
|
static const uint8_t category_index[CHARS >> 8]; |
1439
|
|
|
|
|
|
|
static const uint8_t category_block[][256]; |
1440
|
|
|
|
|
|
|
static const uint8_t othercase_index[CHARS >> 8]; |
1441
|
|
|
|
|
|
|
static const char32_t othercase_block[][256]; |
1442
|
|
|
|
|
|
|
|
1443
|
|
|
|
|
|
|
enum othercase_type { LOWER_ONLY = 1, UPPERTITLE_ONLY = 2, UPPER_ONLY = 3, LOWER_THEN_UPPER = 4, UPPER_THEN_TITLE = 5, TITLE_THEN_LOWER = 6 }; |
1444
|
|
|
|
|
|
|
}; |
1445
|
|
|
|
|
|
|
|
1446
|
|
|
|
|
|
|
unicode::category_t unicode::category(char32_t chr) { |
1447
|
283
|
0
|
|
|
|
|
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1448
|
|
|
|
|
|
|
} |
1449
|
|
|
|
|
|
|
|
1450
|
4
|
|
|
|
|
|
char32_t unicode::lowercase(char32_t chr) { |
1451
|
4
|
50
|
|
|
|
|
if (chr < CHARS) { |
1452
|
4
|
|
|
|
|
|
char32_t othercase = othercase_block[othercase_index[chr >> 8]][chr & 0xFF]; |
1453
|
4
|
50
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::LOWER_ONLY) return othercase >> 8; |
1454
|
0
|
0
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::LOWER_THEN_UPPER) return othercase >> 8; |
1455
|
0
|
0
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::TITLE_THEN_LOWER) return othercase_block[othercase_index[(othercase >> 8) >> 8]][(othercase >> 8) & 0xFF] >> 8; |
1456
|
|
|
|
|
|
|
} |
1457
|
|
|
|
|
|
|
return chr; |
1458
|
|
|
|
|
|
|
} |
1459
|
|
|
|
|
|
|
|
1460
|
0
|
|
|
|
|
|
char32_t unicode::uppercase(char32_t chr) { |
1461
|
0
|
0
|
|
|
|
|
if (chr < CHARS) { |
1462
|
0
|
|
|
|
|
|
char32_t othercase = othercase_block[othercase_index[chr >> 8]][chr & 0xFF]; |
1463
|
0
|
0
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::UPPERTITLE_ONLY) return othercase >> 8; |
1464
|
0
|
0
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::UPPER_ONLY) return othercase >> 8; |
1465
|
0
|
0
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::UPPER_THEN_TITLE) return othercase >> 8; |
1466
|
0
|
0
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::LOWER_THEN_UPPER) return othercase_block[othercase_index[(othercase >> 8) >> 8]][(othercase >> 8) & 0xFF] >> 8; |
1467
|
|
|
|
|
|
|
} |
1468
|
|
|
|
|
|
|
return chr; |
1469
|
|
|
|
|
|
|
} |
1470
|
|
|
|
|
|
|
|
1471
|
|
|
|
|
|
|
char32_t unicode::titlecase(char32_t chr) { |
1472
|
|
|
|
|
|
|
if (chr < CHARS) { |
1473
|
|
|
|
|
|
|
char32_t othercase = othercase_block[othercase_index[chr >> 8]][chr & 0xFF]; |
1474
|
|
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::UPPERTITLE_ONLY) return othercase >> 8; |
1475
|
|
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::TITLE_THEN_LOWER) return othercase >> 8; |
1476
|
|
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::UPPER_THEN_TITLE) return othercase_block[othercase_index[(othercase >> 8) >> 8]][(othercase >> 8) & 0xFF] >> 8; |
1477
|
|
|
|
|
|
|
} |
1478
|
|
|
|
|
|
|
return chr; |
1479
|
|
|
|
|
|
|
} |
1480
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
} // namespace unilib |
1482
|
|
|
|
|
|
|
|
1483
|
|
|
|
|
|
|
///////// |
1484
|
|
|
|
|
|
|
// File: unilib/utf8.h |
1485
|
|
|
|
|
|
|
///////// |
1486
|
|
|
|
|
|
|
|
1487
|
|
|
|
|
|
|
// This file is part of UniLib . |
1488
|
|
|
|
|
|
|
// |
1489
|
|
|
|
|
|
|
// Copyright 2014 Institute of Formal and Applied Linguistics, Faculty of |
1490
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1491
|
|
|
|
|
|
|
// |
1492
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1493
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1494
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1495
|
|
|
|
|
|
|
// |
1496
|
|
|
|
|
|
|
// UniLib version: 3.3.0 |
1497
|
|
|
|
|
|
|
// Unicode version: 15.0.0 |
1498
|
|
|
|
|
|
|
|
1499
|
|
|
|
|
|
|
namespace unilib { |
1500
|
|
|
|
|
|
|
|
1501
|
|
|
|
|
|
|
class utf8 { |
1502
|
|
|
|
|
|
|
public: |
1503
|
|
|
|
|
|
|
static bool valid(const char* str); |
1504
|
|
|
|
|
|
|
static bool valid(const char* str, size_t len); |
1505
|
|
|
|
|
|
|
static inline bool valid(const std::string& str); |
1506
|
|
|
|
|
|
|
|
1507
|
|
|
|
|
|
|
static inline char32_t decode(const char*& str); |
1508
|
|
|
|
|
|
|
static inline char32_t decode(const char*& str, size_t& len); |
1509
|
|
|
|
|
|
|
static inline char32_t first(const char* str); |
1510
|
|
|
|
|
|
|
static inline char32_t first(const char* str, size_t len); |
1511
|
|
|
|
|
|
|
static inline char32_t first(const std::string& str); |
1512
|
|
|
|
|
|
|
|
1513
|
|
|
|
|
|
|
static void decode(const char* str, std::u32string& decoded); |
1514
|
|
|
|
|
|
|
static void decode(const char* str, size_t len, std::u32string& decoded); |
1515
|
|
|
|
|
|
|
static inline void decode(const std::string& str, std::u32string& decoded); |
1516
|
|
|
|
|
|
|
|
1517
|
|
|
|
|
|
|
class string_decoder { |
1518
|
|
|
|
|
|
|
public: |
1519
|
|
|
|
|
|
|
class iterator; |
1520
|
|
|
|
|
|
|
inline iterator begin(); |
1521
|
|
|
|
|
|
|
inline iterator end(); |
1522
|
|
|
|
|
|
|
private: |
1523
|
|
|
|
|
|
|
inline string_decoder(const char* str); |
1524
|
|
|
|
|
|
|
const char* str; |
1525
|
|
|
|
|
|
|
friend class utf8; |
1526
|
|
|
|
|
|
|
}; |
1527
|
|
|
|
|
|
|
static inline string_decoder decoder(const char* str); |
1528
|
|
|
|
|
|
|
static inline string_decoder decoder(const std::string& str); |
1529
|
|
|
|
|
|
|
|
1530
|
|
|
|
|
|
|
class buffer_decoder { |
1531
|
|
|
|
|
|
|
public: |
1532
|
|
|
|
|
|
|
class iterator; |
1533
|
|
|
|
|
|
|
inline iterator begin(); |
1534
|
|
|
|
|
|
|
inline iterator end(); |
1535
|
|
|
|
|
|
|
private: |
1536
|
|
|
|
|
|
|
inline buffer_decoder(const char* str, size_t len); |
1537
|
|
|
|
|
|
|
const char* str; |
1538
|
|
|
|
|
|
|
size_t len; |
1539
|
|
|
|
|
|
|
friend class utf8; |
1540
|
|
|
|
|
|
|
}; |
1541
|
|
|
|
|
|
|
static inline buffer_decoder decoder(const char* str, size_t len); |
1542
|
|
|
|
|
|
|
|
1543
|
|
|
|
|
|
|
static inline void append(char*& str, char32_t chr); |
1544
|
|
|
|
|
|
|
static inline void append(std::string& str, char32_t chr); |
1545
|
|
|
|
|
|
|
static void encode(const std::u32string& str, std::string& encoded); |
1546
|
|
|
|
|
|
|
|
1547
|
|
|
|
|
|
|
template static void map(F f, const char* str, std::string& result); |
1548
|
|
|
|
|
|
|
template static void map(F f, const char* str, size_t len, std::string& result); |
1549
|
|
|
|
|
|
|
template static void map(F f, const std::string& str, std::string& result); |
1550
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
private: |
1552
|
|
|
|
|
|
|
static const char REPLACEMENT_CHAR = '?'; |
1553
|
|
|
|
|
|
|
}; |
1554
|
|
|
|
|
|
|
|
1555
|
|
|
|
|
|
|
bool utf8::valid(const std::string& str) { |
1556
|
|
|
|
|
|
|
return valid(str.c_str()); |
1557
|
|
|
|
|
|
|
} |
1558
|
|
|
|
|
|
|
|
1559
|
68
|
|
|
|
|
|
char32_t utf8::decode(const char*& str) { |
1560
|
68
|
100
|
|
|
|
|
if (((unsigned char)*str) < 0x80) return (unsigned char)*str++; |
1561
|
10
|
50
|
|
|
|
|
else if (((unsigned char)*str) < 0xC0) return ++str, REPLACEMENT_CHAR; |
1562
|
10
|
50
|
|
|
|
|
else if (((unsigned char)*str) < 0xE0) { |
1563
|
10
|
|
|
|
|
|
char32_t res = (((unsigned char)*str++) & 0x1F) << 6; |
1564
|
10
|
50
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
50
|
|
|
|
|
|
1565
|
10
|
|
|
|
|
|
return res + (((unsigned char)*str++) & 0x3F); |
1566
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF0) { |
1567
|
0
|
|
|
|
|
|
char32_t res = (((unsigned char)*str++) & 0x0F) << 12; |
1568
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
0
|
|
|
|
|
|
1569
|
0
|
|
|
|
|
|
res += (((unsigned char)*str++) & 0x3F) << 6; |
1570
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
0
|
|
|
|
|
|
1571
|
0
|
|
|
|
|
|
return res + (((unsigned char)*str++) & 0x3F); |
1572
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF8) { |
1573
|
0
|
|
|
|
|
|
char32_t res = (((unsigned char)*str++) & 0x07) << 18; |
1574
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
0
|
|
|
|
|
|
1575
|
0
|
|
|
|
|
|
res += (((unsigned char)*str++) & 0x3F) << 12; |
1576
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
0
|
|
|
|
|
|
1577
|
0
|
|
|
|
|
|
res += (((unsigned char)*str++) & 0x3F) << 6; |
1578
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
0
|
|
|
|
|
|
1579
|
0
|
|
|
|
|
|
return res + (((unsigned char)*str++) & 0x3F); |
1580
|
0
|
|
|
|
|
|
} else return ++str, REPLACEMENT_CHAR; |
1581
|
|
|
|
|
|
|
} |
1582
|
|
|
|
|
|
|
|
1583
|
218
|
|
|
|
|
|
char32_t utf8::decode(const char*& str, size_t& len) { |
1584
|
218
|
50
|
|
|
|
|
if (!len) return 0; |
1585
|
218
|
|
|
|
|
|
--len; |
1586
|
218
|
100
|
|
|
|
|
if (((unsigned char)*str) < 0x80) return (unsigned char)*str++; |
1587
|
23
|
50
|
|
|
|
|
else if (((unsigned char)*str) < 0xC0) return ++str, REPLACEMENT_CHAR; |
1588
|
23
|
50
|
|
|
|
|
else if (((unsigned char)*str) < 0xE0) { |
1589
|
23
|
|
|
|
|
|
char32_t res = (((unsigned char)*str++) & 0x1F) << 6; |
1590
|
23
|
50
|
|
|
|
|
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1591
|
23
|
|
|
|
|
|
return res + ((--len, ((unsigned char)*str++)) & 0x3F); |
1592
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF0) { |
1593
|
0
|
|
|
|
|
|
char32_t res = (((unsigned char)*str++) & 0x0F) << 12; |
1594
|
0
|
0
|
|
|
|
|
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1595
|
0
|
|
|
|
|
|
res += ((--len, ((unsigned char)*str++)) & 0x3F) << 6; |
1596
|
0
|
0
|
|
|
|
|
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1597
|
0
|
|
|
|
|
|
return res + ((--len, ((unsigned char)*str++)) & 0x3F); |
1598
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF8) { |
1599
|
0
|
|
|
|
|
|
char32_t res = (((unsigned char)*str++) & 0x07) << 18; |
1600
|
0
|
0
|
|
|
|
|
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1601
|
0
|
|
|
|
|
|
res += ((--len, ((unsigned char)*str++)) & 0x3F) << 12; |
1602
|
0
|
0
|
|
|
|
|
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1603
|
0
|
|
|
|
|
|
res += ((--len, ((unsigned char)*str++)) & 0x3F) << 6; |
1604
|
0
|
0
|
|
|
|
|
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1605
|
0
|
|
|
|
|
|
return res + ((--len, ((unsigned char)*str++)) & 0x3F); |
1606
|
0
|
|
|
|
|
|
} else return ++str, REPLACEMENT_CHAR; |
1607
|
|
|
|
|
|
|
} |
1608
|
|
|
|
|
|
|
|
1609
|
|
|
|
|
|
|
char32_t utf8::first(const char* str) { |
1610
|
|
|
|
|
|
|
return decode(str); |
1611
|
|
|
|
|
|
|
} |
1612
|
|
|
|
|
|
|
|
1613
|
|
|
|
|
|
|
char32_t utf8::first(const char* str, size_t len) { |
1614
|
|
|
|
|
|
|
return decode(str, len); |
1615
|
|
|
|
|
|
|
} |
1616
|
|
|
|
|
|
|
|
1617
|
|
|
|
|
|
|
char32_t utf8::first(const std::string& str) { |
1618
|
|
|
|
|
|
|
return first(str.c_str()); |
1619
|
|
|
|
|
|
|
} |
1620
|
|
|
|
|
|
|
|
1621
|
|
|
|
|
|
|
void utf8::decode(const std::string& str, std::u32string& decoded) { |
1622
|
|
|
|
|
|
|
decode(str.c_str(), decoded); |
1623
|
|
|
|
|
|
|
} |
1624
|
|
|
|
|
|
|
|
1625
|
|
|
|
|
|
|
class utf8::string_decoder::iterator : public std::iterator { |
1626
|
|
|
|
|
|
|
public: |
1627
|
0
|
|
|
|
|
|
iterator(const char* str) : codepoint(0), next(str) { operator++(); } |
1628
|
|
|
|
|
|
|
iterator(const iterator& it) : codepoint(it.codepoint), next(it.next) {} |
1629
|
0
|
0
|
|
|
|
|
iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; } |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1630
|
|
|
|
|
|
|
iterator operator++(int) { iterator tmp(*this); operator++(); return tmp; } |
1631
|
|
|
|
|
|
|
bool operator==(const iterator& other) const { return next == other.next; } |
1632
|
|
|
|
|
|
|
bool operator!=(const iterator& other) const { return next != other.next; } |
1633
|
|
|
|
|
|
|
const char32_t& operator*() { return codepoint; } |
1634
|
|
|
|
|
|
|
private: |
1635
|
|
|
|
|
|
|
char32_t codepoint; |
1636
|
|
|
|
|
|
|
const char* next; |
1637
|
|
|
|
|
|
|
}; |
1638
|
|
|
|
|
|
|
|
1639
|
|
|
|
|
|
|
utf8::string_decoder::string_decoder(const char* str) : str(str) {} |
1640
|
|
|
|
|
|
|
|
1641
|
|
|
|
|
|
|
utf8::string_decoder::iterator utf8::string_decoder::begin() { |
1642
|
|
|
|
|
|
|
return iterator(str); |
1643
|
|
|
|
|
|
|
} |
1644
|
|
|
|
|
|
|
|
1645
|
|
|
|
|
|
|
utf8::string_decoder::iterator utf8::string_decoder::end() { |
1646
|
|
|
|
|
|
|
return iterator(nullptr); |
1647
|
|
|
|
|
|
|
} |
1648
|
|
|
|
|
|
|
|
1649
|
|
|
|
|
|
|
utf8::string_decoder utf8::decoder(const char* str) { |
1650
|
|
|
|
|
|
|
return string_decoder(str); |
1651
|
|
|
|
|
|
|
} |
1652
|
|
|
|
|
|
|
|
1653
|
|
|
|
|
|
|
utf8::string_decoder utf8::decoder(const std::string& str) { |
1654
|
|
|
|
|
|
|
return string_decoder(str.c_str()); |
1655
|
|
|
|
|
|
|
} |
1656
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
class utf8::buffer_decoder::iterator : public std::iterator { |
1658
|
|
|
|
|
|
|
public: |
1659
|
|
|
|
|
|
|
iterator(const char* str, size_t len) : codepoint(0), next(str), len(len) { operator++(); } |
1660
|
|
|
|
|
|
|
iterator(const iterator& it) : codepoint(it.codepoint), next(it.next), len(it.len) {} |
1661
|
|
|
|
|
|
|
iterator& operator++() { if (!len) next = nullptr; if (next) codepoint = decode(next, len); return *this; } |
1662
|
|
|
|
|
|
|
iterator operator++(int) { iterator tmp(*this); operator++(); return tmp; } |
1663
|
|
|
|
|
|
|
bool operator==(const iterator& other) const { return next == other.next; } |
1664
|
|
|
|
|
|
|
bool operator!=(const iterator& other) const { return next != other.next; } |
1665
|
|
|
|
|
|
|
const char32_t& operator*() { return codepoint; } |
1666
|
|
|
|
|
|
|
private: |
1667
|
|
|
|
|
|
|
char32_t codepoint; |
1668
|
|
|
|
|
|
|
const char* next; |
1669
|
|
|
|
|
|
|
size_t len; |
1670
|
|
|
|
|
|
|
}; |
1671
|
|
|
|
|
|
|
|
1672
|
|
|
|
|
|
|
utf8::buffer_decoder::buffer_decoder(const char* str, size_t len) : str(str), len(len) {} |
1673
|
|
|
|
|
|
|
|
1674
|
|
|
|
|
|
|
utf8::buffer_decoder::iterator utf8::buffer_decoder::begin() { |
1675
|
|
|
|
|
|
|
return iterator(str, len); |
1676
|
|
|
|
|
|
|
} |
1677
|
|
|
|
|
|
|
|
1678
|
|
|
|
|
|
|
utf8::buffer_decoder::iterator utf8::buffer_decoder::end() { |
1679
|
|
|
|
|
|
|
return iterator(nullptr, 0); |
1680
|
|
|
|
|
|
|
} |
1681
|
|
|
|
|
|
|
|
1682
|
|
|
|
|
|
|
utf8::buffer_decoder utf8::decoder(const char* str, size_t len) { |
1683
|
|
|
|
|
|
|
return buffer_decoder(str, len); |
1684
|
|
|
|
|
|
|
} |
1685
|
|
|
|
|
|
|
|
1686
|
|
|
|
|
|
|
void utf8::append(char*& str, char32_t chr) { |
1687
|
|
|
|
|
|
|
if (chr < 0x80) *str++ = chr; |
1688
|
|
|
|
|
|
|
else if (chr < 0x800) { *str++ = 0xC0 + (chr >> 6); *str++ = 0x80 + (chr & 0x3F); } |
1689
|
|
|
|
|
|
|
else if (chr < 0x10000) { *str++ = 0xE0 + (chr >> 12); *str++ = 0x80 + ((chr >> 6) & 0x3F); *str++ = 0x80 + (chr & 0x3F); } |
1690
|
|
|
|
|
|
|
else if (chr < 0x200000) { *str++ = 0xF0 + (chr >> 18); *str++ = 0x80 + ((chr >> 12) & 0x3F); *str++ = 0x80 + ((chr >> 6) & 0x3F); *str++ = 0x80 + (chr & 0x3F); } |
1691
|
|
|
|
|
|
|
else *str++ = REPLACEMENT_CHAR; |
1692
|
|
|
|
|
|
|
} |
1693
|
|
|
|
|
|
|
|
1694
|
4
|
|
|
|
|
|
void utf8::append(std::string& str, char32_t chr) { |
1695
|
4
|
50
|
|
|
|
|
if (chr < 0x80) str += chr; |
1696
|
0
|
0
|
|
|
|
|
else if (chr < 0x800) { str += 0xC0 + (chr >> 6); str += 0x80 + (chr & 0x3F); } |
1697
|
0
|
0
|
|
|
|
|
else if (chr < 0x10000) { str += 0xE0 + (chr >> 12); str += 0x80 + ((chr >> 6) & 0x3F); str += 0x80 + (chr & 0x3F); } |
1698
|
0
|
0
|
|
|
|
|
else if (chr < 0x200000) { str += 0xF0 + (chr >> 18); str += 0x80 + ((chr >> 12) & 0x3F); str += 0x80 + ((chr >> 6) & 0x3F); str += 0x80 + (chr & 0x3F); } |
1699
|
|
|
|
|
|
|
else str += REPLACEMENT_CHAR; |
1700
|
4
|
|
|
|
|
|
} |
1701
|
|
|
|
|
|
|
|
1702
|
0
|
|
|
|
|
|
template void utf8::map(F f, const char* str, std::string& result) { |
1703
|
|
|
|
|
|
|
result.clear(); |
1704
|
|
|
|
|
|
|
|
1705
|
0
|
0
|
|
|
|
|
for (char32_t chr; (chr = decode(str)); ) |
1706
|
0
|
|
|
|
|
|
append(result, f(chr)); |
1707
|
0
|
|
|
|
|
|
} |
1708
|
|
|
|
|
|
|
|
1709
|
0
|
|
|
|
|
|
template void utf8::map(F f, const char* str, size_t len, std::string& result) { |
1710
|
|
|
|
|
|
|
result.clear(); |
1711
|
|
|
|
|
|
|
|
1712
|
0
|
0
|
|
|
|
|
while (len) |
1713
|
0
|
|
|
|
|
|
append(result, f(decode(str, len))); |
1714
|
0
|
|
|
|
|
|
} |
1715
|
|
|
|
|
|
|
|
1716
|
|
|
|
|
|
|
template void utf8::map(F f, const std::string& str, std::string& result) { |
1717
|
0
|
|
|
|
|
|
map(f, str.c_str(), result); |
1718
|
|
|
|
|
|
|
} |
1719
|
|
|
|
|
|
|
|
1720
|
|
|
|
|
|
|
} // namespace unilib |
1721
|
|
|
|
|
|
|
|
1722
|
|
|
|
|
|
|
///////// |
1723
|
|
|
|
|
|
|
// File: utils/parse_int.h |
1724
|
|
|
|
|
|
|
///////// |
1725
|
|
|
|
|
|
|
|
1726
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
1727
|
|
|
|
|
|
|
// |
1728
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
1729
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1730
|
|
|
|
|
|
|
// |
1731
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1732
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1733
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1734
|
|
|
|
|
|
|
|
1735
|
|
|
|
|
|
|
namespace utils { |
1736
|
|
|
|
|
|
|
|
1737
|
|
|
|
|
|
|
// |
1738
|
|
|
|
|
|
|
// Declarations |
1739
|
|
|
|
|
|
|
// |
1740
|
|
|
|
|
|
|
|
1741
|
|
|
|
|
|
|
// Try to parse an int from given string. If the int cannot be parsed or does |
1742
|
|
|
|
|
|
|
// not fit into int, false is returned and the error string is filled using the |
1743
|
|
|
|
|
|
|
// value_name argument. |
1744
|
|
|
|
|
|
|
inline bool parse_int(string_piece str, const char* value_name, int& value, string& error); |
1745
|
|
|
|
|
|
|
|
1746
|
|
|
|
|
|
|
// Try to parse an int from given string. If the int cannot be parsed or does |
1747
|
|
|
|
|
|
|
// not fit into int, an error is displayed and program exits. |
1748
|
|
|
|
|
|
|
inline int parse_int(string_piece str, const char* value_name); |
1749
|
|
|
|
|
|
|
|
1750
|
|
|
|
|
|
|
// |
1751
|
|
|
|
|
|
|
// Definitions |
1752
|
|
|
|
|
|
|
// |
1753
|
|
|
|
|
|
|
|
1754
|
0
|
|
|
|
|
|
bool parse_int(string_piece str, const char* value_name, int& value, string& error) { |
1755
|
|
|
|
|
|
|
string_piece original = str; |
1756
|
|
|
|
|
|
|
|
1757
|
|
|
|
|
|
|
// Skip spaces |
1758
|
0
|
0
|
|
|
|
|
while (str.len && (str.str[0] == ' ' || str.str[0] == '\f' || str.str[0] == '\n' || str.str[0] == '\r' || str.str[0] == '\t' || str.str[0] == '\v')) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1759
|
0
|
|
|
|
|
|
str.str++, str.len--; |
1760
|
|
|
|
|
|
|
|
1761
|
|
|
|
|
|
|
// Allow minus |
1762
|
|
|
|
|
|
|
bool positive = true; |
1763
|
0
|
0
|
|
|
|
|
if (str.len && (str.str[0] == '+' || str.str[0] == '-')) { |
|
|
0
|
|
|
|
|
|
1764
|
|
|
|
|
|
|
positive = str.str[0] == '+'; |
1765
|
0
|
|
|
|
|
|
str.str++, str.len--; |
1766
|
|
|
|
|
|
|
} |
1767
|
|
|
|
|
|
|
|
1768
|
|
|
|
|
|
|
// Parse value, checking for overflow/underflow |
1769
|
0
|
0
|
|
|
|
|
if (!str.len) return error.assign("Cannot parse ").append(value_name).append(" int value '").append(original.str, original.len).append("': empty string."), false; |
1770
|
|
|
|
|
|
|
if (!(str.str[0] >= '0' || str.str[0] <= '9')) return error.assign("Cannot parse ").append(value_name).append(" int value '").append(original.str, original.len).append("': non-digit character found."), false; |
1771
|
|
|
|
|
|
|
|
1772
|
0
|
|
|
|
|
|
value = 0; |
1773
|
0
|
0
|
|
|
|
|
while (str.len && str.str[0] >= '0' && str.str[0] <= '9') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1774
|
0
|
0
|
|
|
|
|
if (positive) { |
1775
|
0
|
0
|
|
|
|
|
if (value > (numeric_limits::max() - (str.str[0] - '0')) / 10) |
1776
|
0
|
|
|
|
|
|
return error.assign("Cannot parse ").append(value_name).append(" int value '").append(original.str, original.len).append("': overflow occured."), false; |
1777
|
0
|
|
|
|
|
|
value = 10 * value + (str.str[0] - '0'); |
1778
|
|
|
|
|
|
|
} else { |
1779
|
0
|
0
|
|
|
|
|
if (value < (numeric_limits::min() + (str.str[0] - '0')) / 10) |
1780
|
0
|
|
|
|
|
|
return error.assign("Cannot parse ").append(value_name).append(" int value '").append(original.str, original.len).append("': underflow occured."), false; |
1781
|
0
|
|
|
|
|
|
value = 10 * value - (str.str[0] - '0'); |
1782
|
|
|
|
|
|
|
} |
1783
|
0
|
|
|
|
|
|
str.str++, str.len--; |
1784
|
|
|
|
|
|
|
} |
1785
|
|
|
|
|
|
|
|
1786
|
|
|
|
|
|
|
// Skip spaces |
1787
|
0
|
0
|
|
|
|
|
while (str.len && (str.str[0] == ' ' || str.str[0] == '\f' || str.str[0] == '\n' || str.str[0] == '\r' || str.str[0] == '\t' || str.str[0] == '\v')) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1788
|
0
|
|
|
|
|
|
str.str++, str.len--; |
1789
|
|
|
|
|
|
|
|
1790
|
|
|
|
|
|
|
// Check for remaining characters |
1791
|
0
|
0
|
|
|
|
|
if (str.len) return error.assign("Cannot parse ").append(value_name).append(" int value '").append(original.str, original.len).append("': non-digit character found."), false; |
1792
|
|
|
|
|
|
|
|
1793
|
|
|
|
|
|
|
return true; |
1794
|
|
|
|
|
|
|
} |
1795
|
|
|
|
|
|
|
|
1796
|
0
|
|
|
|
|
|
int parse_int(string_piece str, const char* value_name) { |
1797
|
|
|
|
|
|
|
int result; |
1798
|
|
|
|
|
|
|
string error; |
1799
|
|
|
|
|
|
|
|
1800
|
0
|
0
|
|
|
|
|
if (!parse_int(str, value_name, result, error)) |
|
|
0
|
|
|
|
|
|
1801
|
0
|
|
|
|
|
|
runtime_failure(error); |
1802
|
|
|
|
|
|
|
|
1803
|
0
|
|
|
|
|
|
return result; |
1804
|
|
|
|
|
|
|
} |
1805
|
|
|
|
|
|
|
|
1806
|
|
|
|
|
|
|
} // namespace utils |
1807
|
|
|
|
|
|
|
|
1808
|
|
|
|
|
|
|
///////// |
1809
|
|
|
|
|
|
|
// File: utils/path_from_utf8.h |
1810
|
|
|
|
|
|
|
///////// |
1811
|
|
|
|
|
|
|
|
1812
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
1813
|
|
|
|
|
|
|
// |
1814
|
|
|
|
|
|
|
// Copyright 2022 Institute of Formal and Applied Linguistics, Faculty of |
1815
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1816
|
|
|
|
|
|
|
// |
1817
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1818
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1819
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1820
|
|
|
|
|
|
|
|
1821
|
|
|
|
|
|
|
namespace utils { |
1822
|
|
|
|
|
|
|
|
1823
|
|
|
|
|
|
|
// |
1824
|
|
|
|
|
|
|
// Declarations |
1825
|
|
|
|
|
|
|
// |
1826
|
|
|
|
|
|
|
|
1827
|
|
|
|
|
|
|
#ifdef _WIN32 |
1828
|
|
|
|
|
|
|
inline wstring path_from_utf8(const char* str); |
1829
|
|
|
|
|
|
|
inline wstring path_from_utf8(const string& str); |
1830
|
|
|
|
|
|
|
#else |
1831
|
|
|
|
|
|
|
inline string path_from_utf8(const char* str); |
1832
|
|
|
|
|
|
|
inline const string& path_from_utf8(const string& str); |
1833
|
|
|
|
|
|
|
#endif |
1834
|
|
|
|
|
|
|
|
1835
|
|
|
|
|
|
|
// |
1836
|
|
|
|
|
|
|
// Definitions |
1837
|
|
|
|
|
|
|
// |
1838
|
|
|
|
|
|
|
|
1839
|
|
|
|
|
|
|
#ifdef _WIN32 |
1840
|
|
|
|
|
|
|
|
1841
|
|
|
|
|
|
|
inline wstring path_from_utf8(const char* str) { |
1842
|
|
|
|
|
|
|
// We could implement this using codecvt_utf8_utf16, but it is not available |
1843
|
|
|
|
|
|
|
// in GCC 4.9, which we still use. We could also use MultiByteToWideChar, |
1844
|
|
|
|
|
|
|
// but using it would require changing our build infrastructure -- hence |
1845
|
|
|
|
|
|
|
// we implement the conversion manually. |
1846
|
|
|
|
|
|
|
wstring wstr; |
1847
|
|
|
|
|
|
|
while (*str) { |
1848
|
|
|
|
|
|
|
char32_t chr; |
1849
|
|
|
|
|
|
|
if (((unsigned char)*str) < 0x80) chr = (unsigned char)*str++; |
1850
|
|
|
|
|
|
|
else if (((unsigned char)*str) < 0xC0) chr = '?', ++str; |
1851
|
|
|
|
|
|
|
else if (((unsigned char)*str) < 0xE0) { |
1852
|
|
|
|
|
|
|
chr = (((unsigned char)*str++) & 0x1F) << 6; |
1853
|
|
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) chr = '?'; |
1854
|
|
|
|
|
|
|
else chr += ((unsigned char)*str++) & 0x3F; |
1855
|
|
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF0) { |
1856
|
|
|
|
|
|
|
chr = (((unsigned char)*str++) & 0x0F) << 12; |
1857
|
|
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) chr = '?'; |
1858
|
|
|
|
|
|
|
else { |
1859
|
|
|
|
|
|
|
chr += (((unsigned char)*str++) & 0x3F) << 6; |
1860
|
|
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) chr = '?'; |
1861
|
|
|
|
|
|
|
else chr += ((unsigned char)*str++) & 0x3F; |
1862
|
|
|
|
|
|
|
} |
1863
|
|
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF8) { |
1864
|
|
|
|
|
|
|
chr = (((unsigned char)*str++) & 0x07) << 18; |
1865
|
|
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) chr = '?'; |
1866
|
|
|
|
|
|
|
else { |
1867
|
|
|
|
|
|
|
chr += (((unsigned char)*str++) & 0x3F) << 12; |
1868
|
|
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) chr = '?'; |
1869
|
|
|
|
|
|
|
else { |
1870
|
|
|
|
|
|
|
chr += (((unsigned char)*str++) & 0x3F) << 6; |
1871
|
|
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) chr = '?'; |
1872
|
|
|
|
|
|
|
else chr += ((unsigned char)*str++) & 0x3F; |
1873
|
|
|
|
|
|
|
} |
1874
|
|
|
|
|
|
|
} |
1875
|
|
|
|
|
|
|
} else chr = '?', ++str; |
1876
|
|
|
|
|
|
|
|
1877
|
|
|
|
|
|
|
if (chr <= 0xFFFF) wstr.push_back(chr); |
1878
|
|
|
|
|
|
|
else if (chr <= 0x10FFFF) { |
1879
|
|
|
|
|
|
|
wstr.push_back(0xD800 + ((chr - 0x10000) >> 10)); |
1880
|
|
|
|
|
|
|
wstr.push_back(0xDC00 + ((chr - 0x10000) & 0x3FF)); |
1881
|
|
|
|
|
|
|
} else { |
1882
|
|
|
|
|
|
|
wstr.push_back('?'); |
1883
|
|
|
|
|
|
|
} |
1884
|
|
|
|
|
|
|
} |
1885
|
|
|
|
|
|
|
return wstr; |
1886
|
|
|
|
|
|
|
} |
1887
|
|
|
|
|
|
|
|
1888
|
|
|
|
|
|
|
inline wstring path_from_utf8(const string& str) { |
1889
|
|
|
|
|
|
|
return path_from_utf8(str.c_str()); |
1890
|
|
|
|
|
|
|
} |
1891
|
|
|
|
|
|
|
|
1892
|
|
|
|
|
|
|
#else |
1893
|
|
|
|
|
|
|
|
1894
|
|
|
|
|
|
|
inline string path_from_utf8(const char* str) { |
1895
|
1
|
|
|
|
|
|
return str; |
1896
|
|
|
|
|
|
|
} |
1897
|
|
|
|
|
|
|
|
1898
|
|
|
|
|
|
|
inline const string& path_from_utf8(const string& str) { |
1899
|
|
|
|
|
|
|
return str; |
1900
|
|
|
|
|
|
|
} |
1901
|
|
|
|
|
|
|
|
1902
|
|
|
|
|
|
|
#endif |
1903
|
|
|
|
|
|
|
|
1904
|
|
|
|
|
|
|
} // namespace utils |
1905
|
|
|
|
|
|
|
|
1906
|
|
|
|
|
|
|
///////// |
1907
|
|
|
|
|
|
|
// File: utils/split.h |
1908
|
|
|
|
|
|
|
///////// |
1909
|
|
|
|
|
|
|
|
1910
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
1911
|
|
|
|
|
|
|
// |
1912
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
1913
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1914
|
|
|
|
|
|
|
// |
1915
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1916
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1917
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1918
|
|
|
|
|
|
|
|
1919
|
|
|
|
|
|
|
namespace utils { |
1920
|
|
|
|
|
|
|
|
1921
|
|
|
|
|
|
|
// |
1922
|
|
|
|
|
|
|
// Declarations |
1923
|
|
|
|
|
|
|
// |
1924
|
|
|
|
|
|
|
|
1925
|
|
|
|
|
|
|
// Split given text on the separator character. |
1926
|
|
|
|
|
|
|
inline void split(const string& text, char sep, vector& tokens); |
1927
|
|
|
|
|
|
|
inline void split(string_piece text, char sep, vector& tokens); |
1928
|
|
|
|
|
|
|
|
1929
|
|
|
|
|
|
|
// |
1930
|
|
|
|
|
|
|
// Definitions |
1931
|
|
|
|
|
|
|
// |
1932
|
|
|
|
|
|
|
|
1933
|
0
|
|
|
|
|
|
void split(const string& text, char sep, vector& tokens) { |
1934
|
0
|
|
|
|
|
|
tokens.clear(); |
1935
|
0
|
0
|
|
|
|
|
if (text.empty()) return; |
1936
|
|
|
|
|
|
|
|
1937
|
0
|
|
|
|
|
|
string::size_type index = 0; |
1938
|
0
|
0
|
|
|
|
|
for (string::size_type next; (next = text.find(sep, index)) != string::npos; index = next + 1) |
1939
|
0
|
|
|
|
|
|
tokens.emplace_back(text, index, next - index); |
1940
|
|
|
|
|
|
|
|
1941
|
0
|
|
|
|
|
|
tokens.emplace_back(text, index); |
1942
|
|
|
|
|
|
|
} |
1943
|
|
|
|
|
|
|
|
1944
|
|
|
|
|
|
|
void split(string_piece text, char sep, vector& tokens) { |
1945
|
|
|
|
|
|
|
tokens.clear(); |
1946
|
|
|
|
|
|
|
if (!text.len) return; |
1947
|
|
|
|
|
|
|
|
1948
|
|
|
|
|
|
|
const char* str = text.str; |
1949
|
|
|
|
|
|
|
for (const char* next; (next = (const char*) memchr(str, sep, text.str + text.len - str)); str = next + 1) |
1950
|
|
|
|
|
|
|
tokens.emplace_back(str, next - str); |
1951
|
|
|
|
|
|
|
|
1952
|
|
|
|
|
|
|
tokens.emplace_back(str, text.str + text.len - str); |
1953
|
|
|
|
|
|
|
} |
1954
|
|
|
|
|
|
|
|
1955
|
|
|
|
|
|
|
} // namespace utils |
1956
|
|
|
|
|
|
|
|
1957
|
|
|
|
|
|
|
///////// |
1958
|
|
|
|
|
|
|
// File: utils/url_detector.h |
1959
|
|
|
|
|
|
|
///////// |
1960
|
|
|
|
|
|
|
|
1961
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
1962
|
|
|
|
|
|
|
// |
1963
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
1964
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1965
|
|
|
|
|
|
|
// |
1966
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1967
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1968
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1969
|
|
|
|
|
|
|
|
1970
|
|
|
|
|
|
|
namespace utils { |
1971
|
|
|
|
|
|
|
|
1972
|
|
|
|
|
|
|
class url_detector { |
1973
|
|
|
|
|
|
|
public: |
1974
|
|
|
|
|
|
|
enum url_type { NO_URL = 0, URL = 1, EMAIL = 2 }; |
1975
|
|
|
|
|
|
|
|
1976
|
|
|
|
|
|
|
// Detect whether given string is an url or an email. |
1977
|
|
|
|
|
|
|
// If length==nullptr, the whole str must match. |
1978
|
|
|
|
|
|
|
// If length!=nullptr, length of longest matching prefix is returned. |
1979
|
|
|
|
|
|
|
static url_type detect(string_piece str, size_t* length = nullptr); |
1980
|
|
|
|
|
|
|
}; |
1981
|
|
|
|
|
|
|
|
1982
|
|
|
|
|
|
|
} // namespace utils |
1983
|
|
|
|
|
|
|
|
1984
|
|
|
|
|
|
|
///////// |
1985
|
|
|
|
|
|
|
// File: features/feature_processor_instances.cpp |
1986
|
|
|
|
|
|
|
///////// |
1987
|
|
|
|
|
|
|
|
1988
|
|
|
|
|
|
|
// This file is part of NameTag . |
1989
|
|
|
|
|
|
|
// |
1990
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
1991
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1992
|
|
|
|
|
|
|
// |
1993
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1994
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1995
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1996
|
|
|
|
|
|
|
|
1997
|
|
|
|
|
|
|
// Helper functions defined as macros so that they can access arguments without passing them |
1998
|
|
|
|
|
|
|
#define apply_in_window(I, Feature) apply_in_range(I, Feature, -window, window) |
1999
|
|
|
|
|
|
|
|
2000
|
|
|
|
|
|
|
#define apply_in_range(I, Feature, Left, Right) { \ |
2001
|
|
|
|
|
|
|
ner_feature _feature = (Feature); \ |
2002
|
|
|
|
|
|
|
if (_feature != ner_feature_unknown) \ |
2003
|
|
|
|
|
|
|
for (int _w = int(I) + (Left) < 0 ? 0 : int(I) + (Left), \ |
2004
|
|
|
|
|
|
|
_end = int(I) + (Right) + 1 < int(sentence.size) ? int(I) + (Right) + 1 : sentence.size; \ |
2005
|
|
|
|
|
|
|
_w < _end; _w++) \ |
2006
|
|
|
|
|
|
|
sentence.features[_w].emplace_back(_feature + _w - int(I)); \ |
2007
|
|
|
|
|
|
|
} |
2008
|
|
|
|
|
|
|
|
2009
|
|
|
|
|
|
|
#define apply_outer_words_in_window(Feature) { \ |
2010
|
|
|
|
|
|
|
ner_feature _outer_feature = (Feature); \ |
2011
|
|
|
|
|
|
|
if (_outer_feature != ner_feature_unknown) \ |
2012
|
|
|
|
|
|
|
for (int _i = 1; _i <= window; _i++) { \ |
2013
|
|
|
|
|
|
|
apply_in_window(-_i, _outer_feature); \ |
2014
|
|
|
|
|
|
|
apply_in_window(sentence.size - 1 + _i, _outer_feature); \ |
2015
|
|
|
|
|
|
|
} \ |
2016
|
|
|
|
|
|
|
} |
2017
|
|
|
|
|
|
|
|
2018
|
|
|
|
|
|
|
#define lookup_empty() /* lookup(string()) always returns */(window) |
2019
|
|
|
|
|
|
|
|
2020
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////// |
2021
|
|
|
|
|
|
|
// Feature processor instances (ordered lexicographically) // |
2022
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////// |
2023
|
|
|
|
|
|
|
namespace feature_processors { |
2024
|
|
|
|
|
|
|
|
2025
|
|
|
|
|
|
|
// BrownClusters |
2026
|
0
|
|
|
|
|
|
class brown_clusters : public feature_processor { |
2027
|
|
|
|
|
|
|
public: |
2028
|
0
|
|
|
|
|
|
virtual bool parse(int window, const vector& args, entity_map& entities, |
2029
|
|
|
|
|
|
|
ner_feature* total_features, const nlp_pipeline& pipeline) override { |
2030
|
0
|
0
|
|
|
|
|
if (!feature_processor::parse(window, args, entities, total_features, pipeline)) return false; |
2031
|
0
|
0
|
|
|
|
|
if (args.size() < 1) return cerr << "BrownCluster requires a cluster file as the first argument!" << endl, false; |
2032
|
|
|
|
|
|
|
|
2033
|
0
|
|
|
|
|
|
ifstream in(path_from_utf8(args[0]).c_str()); |
2034
|
0
|
0
|
|
|
|
|
if (!in.is_open()) return cerr << "Cannot open Brown clusters file '" << args[0] << "'!" << endl, false; |
2035
|
|
|
|
|
|
|
|
2036
|
|
|
|
|
|
|
vector substrings; |
2037
|
0
|
0
|
|
|
|
|
substrings.emplace_back(string::npos); |
2038
|
0
|
0
|
|
|
|
|
for (unsigned i = 1; i < args.size(); i++) { |
2039
|
0
|
0
|
|
|
|
|
int len = parse_int(args[i].c_str(), "BrownCluster_prefix_length"); |
2040
|
0
|
0
|
|
|
|
|
if (len <= 0) |
2041
|
0
|
0
|
|
|
|
|
return cerr << "Wrong prefix length '" << len << "' in BrownCluster specification!" << endl, false; |
2042
|
|
|
|
|
|
|
else |
2043
|
0
|
0
|
|
|
|
|
substrings.emplace_back(len); |
2044
|
|
|
|
|
|
|
} |
2045
|
|
|
|
|
|
|
|
2046
|
0
|
|
|
|
|
|
clusters.clear(); |
2047
|
|
|
|
|
|
|
unordered_map cluster_map; |
2048
|
|
|
|
|
|
|
unordered_map prefixes_map; |
2049
|
|
|
|
|
|
|
string line; |
2050
|
0
|
|
|
|
|
|
vector tokens; |
2051
|
0
|
0
|
|
|
|
|
while (getline(in, line)) { |
|
|
0
|
|
|
|
|
|
2052
|
0
|
0
|
|
|
|
|
split(line, '\t', tokens); |
2053
|
0
|
0
|
|
|
|
|
if (tokens.size() != 2) return cerr << "Wrong line '" << line << "' in Brown cluster file '" << args[0] << "'!" << endl, false; |
2054
|
|
|
|
|
|
|
|
2055
|
|
|
|
|
|
|
string cluster = tokens[0], form = tokens[1]; |
2056
|
|
|
|
|
|
|
auto it = cluster_map.find(cluster); |
2057
|
0
|
0
|
|
|
|
|
if (it == cluster_map.end()) { |
2058
|
0
|
|
|
|
|
|
unsigned id = clusters.size(); |
2059
|
0
|
0
|
|
|
|
|
clusters.emplace_back(); |
2060
|
0
|
0
|
|
|
|
|
for (auto&& substring : substrings) |
2061
|
0
|
0
|
|
|
|
|
if (substring == string::npos || substring < cluster.size()) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2062
|
0
|
0
|
|
|
|
|
clusters.back().emplace_back(prefixes_map.emplace(cluster.substr(0, substring), *total_features + (2*window + 1) * (int)prefixes_map.size() + window).first->second); |
|
|
0
|
|
|
|
|
|
2063
|
|
|
|
|
|
|
it = cluster_map.emplace(cluster, id).first; |
2064
|
|
|
|
|
|
|
} |
2065
|
0
|
0
|
|
|
|
|
if (!map.emplace(form, it->second).second) return cerr << "Form '" << form << "' is present twice in Brown cluster file '" << args[0] << "'!" << endl, false; |
2066
|
|
|
|
|
|
|
} |
2067
|
|
|
|
|
|
|
|
2068
|
0
|
|
|
|
|
|
*total_features += (2*window + 1) * prefixes_map.size(); |
2069
|
0
|
|
|
|
|
|
return true; |
2070
|
|
|
|
|
|
|
} |
2071
|
|
|
|
|
|
|
|
2072
|
0
|
|
|
|
|
|
virtual void load(binary_decoder& data, const nlp_pipeline& pipeline) override { |
2073
|
0
|
|
|
|
|
|
feature_processor::load(data, pipeline); |
2074
|
|
|
|
|
|
|
|
2075
|
0
|
|
|
|
|
|
clusters.resize(data.next_4B()); |
2076
|
0
|
0
|
|
|
|
|
for (auto&& cluster : clusters) { |
2077
|
0
|
|
|
|
|
|
cluster.resize(data.next_4B()); |
2078
|
0
|
0
|
|
|
|
|
for (auto&& feature : cluster) |
2079
|
0
|
|
|
|
|
|
feature = data.next_4B(); |
2080
|
|
|
|
|
|
|
} |
2081
|
0
|
|
|
|
|
|
} |
2082
|
|
|
|
|
|
|
|
2083
|
0
|
|
|
|
|
|
virtual void save(binary_encoder& enc) override { |
2084
|
0
|
|
|
|
|
|
feature_processor::save(enc); |
2085
|
|
|
|
|
|
|
|
2086
|
0
|
|
|
|
|
|
enc.add_4B(clusters.size()); |
2087
|
0
|
0
|
|
|
|
|
for (auto&& cluster : clusters) { |
2088
|
0
|
|
|
|
|
|
enc.add_4B(cluster.size()); |
2089
|
0
|
0
|
|
|
|
|
for (auto&& feature : cluster) |
2090
|
0
|
|
|
|
|
|
enc.add_4B(feature); |
2091
|
|
|
|
|
|
|
} |
2092
|
0
|
|
|
|
|
|
} |
2093
|
|
|
|
|
|
|
|
2094
|
0
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* /*total_features*/, string& /*buffer*/) const override { |
2095
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
2096
|
0
|
|
|
|
|
|
auto it = map.find(sentence.words[i].raw_lemma); |
2097
|
0
|
0
|
|
|
|
|
if (it != map.end()) { |
2098
|
0
|
|
|
|
|
|
auto& cluster = clusters[it->second]; |
2099
|
0
|
0
|
|
|
|
|
for (auto&& feature : cluster) |
2100
|
0
|
0
|
|
|
|
|
apply_in_window(i, feature); |
|
|
0
|
|
|
|
|
|
2101
|
|
|
|
|
|
|
} |
2102
|
|
|
|
|
|
|
} |
2103
|
0
|
|
|
|
|
|
} |
2104
|
|
|
|
|
|
|
|
2105
|
|
|
|
|
|
|
private: |
2106
|
|
|
|
|
|
|
vector> clusters; |
2107
|
|
|
|
|
|
|
}; |
2108
|
|
|
|
|
|
|
|
2109
|
|
|
|
|
|
|
// CzechAddContainers |
2110
|
0
|
|
|
|
|
|
class czech_add_containers : public feature_processor { |
2111
|
|
|
|
|
|
|
public: |
2112
|
0
|
|
|
|
|
|
virtual bool parse(int window, const vector& args, entity_map& entities, ner_feature* total_features, const nlp_pipeline& pipeline) override { |
2113
|
0
|
0
|
|
|
|
|
if (window) return cerr << "CzechAddContainers cannot have non-zero window!" << endl, false; |
2114
|
|
|
|
|
|
|
|
2115
|
0
|
|
|
|
|
|
return feature_processor::parse(window, args, entities, total_features, pipeline); |
2116
|
|
|
|
|
|
|
} |
2117
|
|
|
|
|
|
|
|
2118
|
0
|
|
|
|
|
|
virtual void process_entities(ner_sentence& /*sentence*/, vector& entities, vector& buffer) const override { |
2119
|
0
|
|
|
|
|
|
buffer.clear(); |
2120
|
|
|
|
|
|
|
|
2121
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < entities.size(); i++) { |
2122
|
|
|
|
|
|
|
// P if ps+ pf+ |
2123
|
0
|
0
|
|
|
|
|
if (entities[i].type.compare("pf") == 0 && (!i || entities[i-1].start + entities[i-1].length < entities[i].start || entities[i-1].type.compare("pf") != 0)) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2124
|
0
|
|
|
|
|
|
unsigned j = i + 1; |
2125
|
0
|
0
|
|
|
|
|
while (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("pf") == 0) j++; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2126
|
0
|
0
|
|
|
|
|
if (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("ps") == 0) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2127
|
0
|
|
|
|
|
|
j++; |
2128
|
0
|
0
|
|
|
|
|
while (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("ps") == 0) j++; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2129
|
0
|
|
|
|
|
|
buffer.emplace_back(entities[i].start, entities[j - 1].start + entities[j - 1].length - entities[i].start, "P"); |
2130
|
|
|
|
|
|
|
} |
2131
|
|
|
|
|
|
|
} |
2132
|
|
|
|
|
|
|
|
2133
|
|
|
|
|
|
|
// T if td tm ty | td tm |
2134
|
0
|
0
|
|
|
|
|
if (entities[i].type.compare("td") == 0 && i+1 < entities.size() && entities[i+1].start == entities[i].start + entities[i].length && entities[i+1].type.compare("tm") == 0) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2135
|
0
|
|
|
|
|
|
unsigned j = i + 2; |
2136
|
0
|
0
|
|
|
|
|
if (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("ty") == 0) j++; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2137
|
0
|
|
|
|
|
|
buffer.emplace_back(entities[i].start, entities[j - 1].start + entities[j - 1].length - entities[i].start, "T"); |
2138
|
|
|
|
|
|
|
} |
2139
|
|
|
|
|
|
|
// T if !td tm ty |
2140
|
0
|
0
|
|
|
|
|
if (entities[i].type.compare("tm") == 0 && (!i || entities[i-1].start + entities[i-1].length < entities[i].start || entities[i-1].type.compare("td") != 0)) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2141
|
0
|
0
|
|
|
|
|
if (i+1 < entities.size() && entities[i+1].start == entities[i].start + entities[i].length && entities[i+1].type.compare("ty") == 0) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2142
|
0
|
|
|
|
|
|
buffer.emplace_back(entities[i].start, entities[i + 1].start + entities[i + 1].length - entities[i].start, "T"); |
2143
|
|
|
|
|
|
|
|
2144
|
0
|
|
|
|
|
|
buffer.push_back(entities[i]); |
2145
|
|
|
|
|
|
|
} |
2146
|
|
|
|
|
|
|
|
2147
|
0
|
0
|
|
|
|
|
if (buffer.size() > entities.size()) entities = buffer; |
2148
|
0
|
|
|
|
|
|
} |
2149
|
|
|
|
|
|
|
|
2150
|
|
|
|
|
|
|
// CzechAddContainers used to be entity_processor which had empty load and save methods. |
2151
|
0
|
|
|
|
|
|
virtual void load(binary_decoder& /*data*/, const nlp_pipeline& /*pipeline*/) override {} |
2152
|
0
|
|
|
|
|
|
virtual void save(binary_encoder& /*enc*/) override {} |
2153
|
|
|
|
|
|
|
}; |
2154
|
|
|
|
|
|
|
|
2155
|
|
|
|
|
|
|
// CzechLemmaTerm |
2156
|
0
|
|
|
|
|
|
class czech_lemma_term : public feature_processor { |
2157
|
|
|
|
|
|
|
public: |
2158
|
0
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& buffer) const override { |
2159
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
2160
|
0
|
0
|
|
|
|
|
for (unsigned pos = 0; pos + 2 < sentence.words[i].lemma_comments.size(); pos++) |
2161
|
0
|
0
|
|
|
|
|
if (sentence.words[i].lemma_comments[pos] == '_' && sentence.words[i].lemma_comments[pos+1] == ';') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2162
|
0
|
|
|
|
|
|
buffer.assign(1, sentence.words[i].lemma_comments[pos+2]); |
2163
|
0
|
0
|
|
|
|
|
apply_in_window(i, lookup(buffer, total_features)); |
|
|
0
|
|
|
|
|
|
2164
|
|
|
|
|
|
|
} |
2165
|
|
|
|
|
|
|
} |
2166
|
0
|
|
|
|
|
|
} |
2167
|
|
|
|
|
|
|
}; |
2168
|
|
|
|
|
|
|
|
2169
|
|
|
|
|
|
|
// Form |
2170
|
0
|
|
|
|
|
|
class form : public feature_processor { |
2171
|
|
|
|
|
|
|
public: |
2172
|
8
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& /*buffer*/) const override { |
2173
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) |
2174
|
54
|
100
|
|
|
|
|
apply_in_window(i, lookup(sentence.words[i].form, total_features)); |
|
|
100
|
|
|
|
|
|
2175
|
|
|
|
|
|
|
|
2176
|
36
|
50
|
|
|
|
|
apply_outer_words_in_window(lookup_empty()); |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
2177
|
4
|
|
|
|
|
|
} |
2178
|
|
|
|
|
|
|
}; |
2179
|
|
|
|
|
|
|
|
2180
|
|
|
|
|
|
|
// FormCapitalization |
2181
|
0
|
|
|
|
|
|
class form_capitalization : public feature_processor { |
2182
|
|
|
|
|
|
|
public: |
2183
|
0
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& buffer) const override { |
2184
|
|
|
|
|
|
|
using namespace unilib; |
2185
|
|
|
|
|
|
|
|
2186
|
0
|
|
|
|
|
|
ner_feature fst_cap = lookup(buffer.assign("f"), total_features); |
2187
|
0
|
|
|
|
|
|
ner_feature all_cap = lookup(buffer.assign("a"), total_features); |
2188
|
0
|
|
|
|
|
|
ner_feature mixed_cap = lookup(buffer.assign("m"), total_features); |
2189
|
|
|
|
|
|
|
|
2190
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
2191
|
|
|
|
|
|
|
bool was_upper = false, was_lower = false; |
2192
|
|
|
|
|
|
|
|
2193
|
0
|
|
|
|
|
|
auto* form = sentence.words[i].form.c_str(); |
2194
|
|
|
|
|
|
|
char32_t chr; |
2195
|
0
|
0
|
|
|
|
|
for (bool first = true; (chr = utf8::decode(form)); first = false) { |
2196
|
|
|
|
|
|
|
auto category = unicode::category(chr); |
2197
|
0
|
0
|
|
|
|
|
was_upper = was_upper || category & unicode::Lut; |
|
|
0
|
|
|
|
|
|
2198
|
0
|
0
|
|
|
|
|
was_lower = was_lower || category & unicode::Ll; |
|
|
0
|
|
|
|
|
|
2199
|
|
|
|
|
|
|
|
2200
|
0
|
0
|
|
|
|
|
if (first && was_upper) apply_in_window(i, fst_cap); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2201
|
|
|
|
|
|
|
} |
2202
|
0
|
0
|
|
|
|
|
if (was_upper && !was_lower) apply_in_window(i, all_cap); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2203
|
0
|
0
|
|
|
|
|
if (was_upper && was_lower) apply_in_window(i, mixed_cap); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2204
|
|
|
|
|
|
|
} |
2205
|
0
|
|
|
|
|
|
} |
2206
|
|
|
|
|
|
|
}; |
2207
|
|
|
|
|
|
|
|
2208
|
|
|
|
|
|
|
// FormCaseNormalized |
2209
|
0
|
|
|
|
|
|
class form_case_normalized : public feature_processor { |
2210
|
|
|
|
|
|
|
public: |
2211
|
0
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& buffer) const override { |
2212
|
|
|
|
|
|
|
using namespace unilib; |
2213
|
|
|
|
|
|
|
|
2214
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
2215
|
|
|
|
|
|
|
buffer.clear(); |
2216
|
0
|
0
|
|
|
|
|
for (auto&& chr : utf8::decoder(sentence.words[i].form)) |
2217
|
0
|
0
|
|
|
|
|
utf8::append(buffer, buffer.empty() ? chr : unicode::lowercase(chr)); |
2218
|
0
|
0
|
|
|
|
|
apply_in_window(i, lookup(buffer, total_features)); |
|
|
0
|
|
|
|
|
|
2219
|
|
|
|
|
|
|
} |
2220
|
|
|
|
|
|
|
|
2221
|
0
|
0
|
|
|
|
|
apply_outer_words_in_window(lookup_empty()); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2222
|
0
|
|
|
|
|
|
} |
2223
|
|
|
|
|
|
|
}; |
2224
|
|
|
|
|
|
|
|
2225
|
|
|
|
|
|
|
// Gazetteers |
2226
|
0
|
|
|
|
|
|
class gazetteers : public feature_processor { |
2227
|
|
|
|
|
|
|
public: |
2228
|
|
|
|
|
|
|
enum { G = 0, U = 1, B = 2, L = 3, I = 4 }; |
2229
|
|
|
|
|
|
|
|
2230
|
0
|
|
|
|
|
|
virtual bool parse(int window, const vector& args, entity_map& entities, |
2231
|
|
|
|
|
|
|
ner_feature* total_features, const nlp_pipeline& pipeline) override { |
2232
|
|
|
|
|
|
|
cerr << "The 'Gazetteers' feature template is deprecated, use 'GazetteersEnhanced' !" << endl; |
2233
|
|
|
|
|
|
|
|
2234
|
0
|
0
|
|
|
|
|
if (!feature_processor::parse(window, args, entities, total_features, pipeline)) return false; |
2235
|
|
|
|
|
|
|
|
2236
|
0
|
|
|
|
|
|
gazetteers_info.clear(); |
2237
|
0
|
0
|
|
|
|
|
for (auto&& arg : args) { |
2238
|
0
|
0
|
|
|
|
|
ifstream in(path_from_utf8(arg).c_str()); |
2239
|
0
|
0
|
|
|
|
|
if (!in.is_open()) return cerr << "Cannot open gazetteers file '" << arg << "'!" << endl, false; |
2240
|
|
|
|
|
|
|
|
2241
|
|
|
|
|
|
|
unsigned longest = 0; |
2242
|
|
|
|
|
|
|
string gazetteer; |
2243
|
|
|
|
|
|
|
string line; |
2244
|
0
|
|
|
|
|
|
vector tokens; |
2245
|
0
|
0
|
|
|
|
|
while (getline(in, line)) { |
|
|
0
|
|
|
|
|
|
2246
|
0
|
0
|
|
|
|
|
split(line, ' ', tokens); |
2247
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < tokens.size(); i++) |
2248
|
0
|
0
|
|
|
|
|
if (!tokens[i][0]) |
2249
|
0
|
|
|
|
|
|
tokens.erase(tokens.begin() + i--); |
2250
|
0
|
0
|
|
|
|
|
if (tokens.size() > longest) longest = tokens.size(); |
2251
|
|
|
|
|
|
|
|
2252
|
|
|
|
|
|
|
gazetteer.clear(); |
2253
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < tokens.size(); i++) { |
2254
|
0
|
0
|
|
|
|
|
if (i) gazetteer += ' '; |
2255
|
|
|
|
|
|
|
gazetteer += tokens[i]; |
2256
|
0
|
|
|
|
|
|
auto it = map.emplace(gazetteer, (int)gazetteers_info.size()).first; |
2257
|
0
|
0
|
|
|
|
|
if (it->second == gazetteers_info.size()) gazetteers_info.emplace_back(); |
|
|
0
|
|
|
|
|
|
2258
|
0
|
|
|
|
|
|
auto& info = gazetteers_info[it->second]; |
2259
|
0
|
0
|
|
|
|
|
if (i + 1 < tokens.size()) |
2260
|
0
|
|
|
|
|
|
info.prefix_of_longer |= true; |
2261
|
|
|
|
|
|
|
else |
2262
|
0
|
0
|
|
|
|
|
if (find(info.features.begin(), info.features.end(), *total_features + window) == info.features.end()) |
2263
|
0
|
0
|
|
|
|
|
info.features.emplace_back(*total_features + window); |
2264
|
|
|
|
|
|
|
} |
2265
|
|
|
|
|
|
|
} |
2266
|
0
|
0
|
|
|
|
|
*total_features += (2*window + 1) * (longest == 0 ? 0 : longest == 1 ? U+1 : longest == 2 ? L+1 : I+1); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2267
|
|
|
|
|
|
|
} |
2268
|
|
|
|
|
|
|
|
2269
|
|
|
|
|
|
|
return true; |
2270
|
|
|
|
|
|
|
} |
2271
|
|
|
|
|
|
|
|
2272
|
0
|
|
|
|
|
|
virtual void load(binary_decoder& data, const nlp_pipeline& pipeline) override { |
2273
|
0
|
|
|
|
|
|
feature_processor::load(data, pipeline); |
2274
|
|
|
|
|
|
|
|
2275
|
0
|
|
|
|
|
|
gazetteers_info.resize(data.next_4B()); |
2276
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer : gazetteers_info) { |
2277
|
0
|
|
|
|
|
|
gazetteer.prefix_of_longer = data.next_1B(); |
2278
|
0
|
|
|
|
|
|
gazetteer.features.resize(data.next_1B()); |
2279
|
0
|
0
|
|
|
|
|
for (auto&& feature : gazetteer.features) |
2280
|
0
|
|
|
|
|
|
feature = data.next_4B(); |
2281
|
|
|
|
|
|
|
} |
2282
|
0
|
|
|
|
|
|
} |
2283
|
|
|
|
|
|
|
|
2284
|
0
|
|
|
|
|
|
virtual void save(binary_encoder& enc) override { |
2285
|
0
|
|
|
|
|
|
feature_processor::save(enc); |
2286
|
|
|
|
|
|
|
|
2287
|
0
|
|
|
|
|
|
enc.add_4B(gazetteers_info.size()); |
2288
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer : gazetteers_info) { |
2289
|
0
|
|
|
|
|
|
enc.add_1B(gazetteer.prefix_of_longer); |
2290
|
0
|
|
|
|
|
|
enc.add_1B(gazetteer.features.size()); |
2291
|
0
|
0
|
|
|
|
|
for (auto&& feature : gazetteer.features) |
2292
|
0
|
|
|
|
|
|
enc.add_4B(feature); |
2293
|
|
|
|
|
|
|
} |
2294
|
0
|
|
|
|
|
|
} |
2295
|
|
|
|
|
|
|
|
2296
|
0
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* /*total_features*/, string& buffer) const override { |
2297
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
2298
|
0
|
|
|
|
|
|
auto it = map.find(sentence.words[i].raw_lemma); |
2299
|
0
|
0
|
|
|
|
|
if (it == map.end()) continue; |
2300
|
|
|
|
|
|
|
|
2301
|
|
|
|
|
|
|
// Apply regular gazetteer feature G + unigram gazetteer feature U |
2302
|
0
|
0
|
|
|
|
|
for (auto&& feature : gazetteers_info[it->second].features) { |
2303
|
0
|
0
|
|
|
|
|
apply_in_window(i, feature + G * (2*window + 1)); |
|
|
0
|
|
|
|
|
|
2304
|
0
|
0
|
|
|
|
|
apply_in_window(i, feature + U * (2*window + 1)); |
|
|
0
|
|
|
|
|
|
2305
|
|
|
|
|
|
|
} |
2306
|
|
|
|
|
|
|
|
2307
|
0
|
0
|
|
|
|
|
for (unsigned j = i + 1; gazetteers_info[it->second].prefix_of_longer && j < sentence.size; j++) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2308
|
0
|
0
|
|
|
|
|
if (j == i + 1) buffer.assign(sentence.words[i].raw_lemma); |
2309
|
|
|
|
|
|
|
buffer += ' '; |
2310
|
0
|
|
|
|
|
|
buffer += sentence.words[j].raw_lemma; |
2311
|
|
|
|
|
|
|
it = map.find(buffer); |
2312
|
0
|
0
|
|
|
|
|
if (it == map.end()) break; |
2313
|
|
|
|
|
|
|
|
2314
|
|
|
|
|
|
|
// Apply regular gazetteer feature G + position specific gazetteers B, I, L |
2315
|
0
|
0
|
|
|
|
|
for (auto&& feature : gazetteers_info[it->second].features) |
2316
|
0
|
0
|
|
|
|
|
for (unsigned g = i; g <= j; g++) { |
2317
|
0
|
0
|
|
|
|
|
apply_in_window(g, feature + G * (2*window + 1)); |
|
|
0
|
|
|
|
|
|
2318
|
0
|
0
|
|
|
|
|
apply_in_window(g, feature + (g == i ? B : g == j ? L : I) * (2*window + 1)); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2319
|
|
|
|
|
|
|
} |
2320
|
|
|
|
|
|
|
} |
2321
|
|
|
|
|
|
|
} |
2322
|
0
|
|
|
|
|
|
} |
2323
|
|
|
|
|
|
|
|
2324
|
|
|
|
|
|
|
private: |
2325
|
0
|
|
|
|
|
|
struct gazetteer_info { |
2326
|
|
|
|
|
|
|
vector features; |
2327
|
|
|
|
|
|
|
bool prefix_of_longer; |
2328
|
|
|
|
|
|
|
}; |
2329
|
|
|
|
|
|
|
vector gazetteers_info; |
2330
|
|
|
|
|
|
|
}; |
2331
|
|
|
|
|
|
|
|
2332
|
|
|
|
|
|
|
// GazetteersEnhanced |
2333
|
0
|
|
|
|
|
|
class gazetteers_enhanced : public feature_processor { |
2334
|
|
|
|
|
|
|
public: |
2335
|
|
|
|
|
|
|
enum { G = 0, U = 1, B = 2, L = 3, I = 4, TOTAL = 5 }; |
2336
|
|
|
|
|
|
|
|
2337
|
0
|
|
|
|
|
|
virtual bool parse(int window, const vector& args, entity_map& entities, |
2338
|
|
|
|
|
|
|
ner_feature* total_features, const nlp_pipeline& pipeline) override { |
2339
|
0
|
0
|
|
|
|
|
if (!feature_processor::parse(window, args, entities, total_features, pipeline)) return false; |
2340
|
|
|
|
|
|
|
|
2341
|
0
|
|
|
|
|
|
gazetteer_metas.clear(); |
2342
|
|
|
|
|
|
|
gazetteer_lists.clear(); |
2343
|
|
|
|
|
|
|
|
2344
|
0
|
0
|
|
|
|
|
if (args.size() < 4) return cerr << "Not enough parameters to GazetteersEnhanced!" << endl, false; |
2345
|
0
|
0
|
|
|
|
|
if (args.size() & 1) return cerr << "Odd number of parameters to GazetteersEnhanced!" << endl, false; |
2346
|
|
|
|
|
|
|
|
2347
|
0
|
0
|
|
|
|
|
if (args[0] == "form") match = MATCH_FORM; |
2348
|
0
|
0
|
|
|
|
|
else if (args[0] == "rawlemma") match = MATCH_RAWLEMMA; |
2349
|
0
|
0
|
|
|
|
|
else if (args[0] == "rawlemmas") match = MATCH_RAWLEMMAS; |
2350
|
0
|
|
|
|
|
|
else return cerr << "First parameter of GazetteersEnhanced not one of form/rawlemma/rawlemmas!" << endl, false; |
2351
|
|
|
|
|
|
|
|
2352
|
0
|
0
|
|
|
|
|
if (args[1] == "embed_in_model") embed = EMBED_IN_MODEL; |
2353
|
0
|
0
|
|
|
|
|
else if (args[1] == "out_of_model") embed = OUT_OF_MODEL; |
2354
|
0
|
|
|
|
|
|
else return cerr << "Second parameter of GazetteersEnhanced not one of [embed_in|out_of]_model!" << endl, false; |
2355
|
|
|
|
|
|
|
|
2356
|
0
|
0
|
|
|
|
|
for (unsigned i = 2; i < args.size(); i += 2) { |
2357
|
0
|
|
|
|
|
|
gazetteer_metas.emplace_back(); |
2358
|
0
|
|
|
|
|
|
gazetteer_metas.back().basename = args[i]; |
2359
|
0
|
|
|
|
|
|
gazetteer_metas.back().feature = *total_features + window; *total_features += TOTAL * (2 * window + 1); |
2360
|
0
|
0
|
|
|
|
|
gazetteer_metas.back().entity = args[i + 1] == "NONE" ? -1 : entities.parse(args[i + 1].c_str(), true); |
2361
|
|
|
|
|
|
|
} |
2362
|
|
|
|
|
|
|
|
2363
|
0
|
|
|
|
|
|
entity_list.clear(); |
2364
|
0
|
0
|
|
|
|
|
for (entity_type i = 0; i < entities.size(); i++) |
2365
|
0
|
|
|
|
|
|
entity_list.push_back(entities.name(i)); |
2366
|
|
|
|
|
|
|
|
2367
|
0
|
0
|
|
|
|
|
if (!load_gazetteer_lists(pipeline, embed == EMBED_IN_MODEL)) return false; |
2368
|
|
|
|
|
|
|
|
2369
|
0
|
|
|
|
|
|
return true; |
2370
|
|
|
|
|
|
|
} |
2371
|
|
|
|
|
|
|
|
2372
|
0
|
|
|
|
|
|
virtual void load(binary_decoder& data, const nlp_pipeline& pipeline) override { |
2373
|
0
|
|
|
|
|
|
feature_processor::load(data, pipeline); |
2374
|
|
|
|
|
|
|
|
2375
|
0
|
|
|
|
|
|
match = data.next_4B(); |
2376
|
0
|
|
|
|
|
|
embed = OUT_OF_MODEL; |
2377
|
|
|
|
|
|
|
|
2378
|
0
|
|
|
|
|
|
gazetteer_metas.resize(data.next_4B()); |
2379
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer_meta : gazetteer_metas) { |
2380
|
0
|
|
|
|
|
|
data.next_str(gazetteer_meta.basename); |
2381
|
0
|
|
|
|
|
|
gazetteer_meta.feature = data.next_4B(); |
2382
|
0
|
|
|
|
|
|
gazetteer_meta.entity = data.next_4B(); |
2383
|
|
|
|
|
|
|
} |
2384
|
|
|
|
|
|
|
|
2385
|
0
|
|
|
|
|
|
gazetteer_lists.resize(data.next_4B()); |
2386
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer_list : gazetteer_lists) { |
2387
|
0
|
|
|
|
|
|
gazetteer_list.gazetteers.resize(data.next_4B()); |
2388
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer : gazetteer_list.gazetteers) |
2389
|
0
|
|
|
|
|
|
data.next_str(gazetteer); |
2390
|
0
|
|
|
|
|
|
gazetteer_list.feature = data.next_4B(); |
2391
|
0
|
|
|
|
|
|
gazetteer_list.entity = data.next_4B(); |
2392
|
0
|
|
|
|
|
|
gazetteer_list.mode = data.next_4B(); |
2393
|
|
|
|
|
|
|
} |
2394
|
|
|
|
|
|
|
|
2395
|
0
|
|
|
|
|
|
entity_list.resize(data.next_4B()); |
2396
|
0
|
0
|
|
|
|
|
for (auto&& entity : entity_list) |
2397
|
0
|
|
|
|
|
|
data.next_str(entity); |
2398
|
|
|
|
|
|
|
|
2399
|
0
|
|
|
|
|
|
load_gazetteer_lists(pipeline, false); |
2400
|
0
|
|
|
|
|
|
} |
2401
|
|
|
|
|
|
|
|
2402
|
0
|
|
|
|
|
|
virtual void save(binary_encoder& enc) override { |
2403
|
0
|
|
|
|
|
|
feature_processor::save(enc); |
2404
|
|
|
|
|
|
|
|
2405
|
0
|
|
|
|
|
|
enc.add_4B(match); |
2406
|
|
|
|
|
|
|
|
2407
|
0
|
|
|
|
|
|
enc.add_4B(gazetteer_metas.size()); |
2408
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer_meta : gazetteer_metas) { |
2409
|
0
|
|
|
|
|
|
enc.add_str(gazetteer_meta.basename); |
2410
|
0
|
|
|
|
|
|
enc.add_4B(gazetteer_meta.feature); |
2411
|
0
|
|
|
|
|
|
enc.add_4B(gazetteer_meta.entity); |
2412
|
|
|
|
|
|
|
} |
2413
|
|
|
|
|
|
|
|
2414
|
0
|
0
|
|
|
|
|
if (embed == EMBED_IN_MODEL) { |
2415
|
0
|
|
|
|
|
|
enc.add_4B(gazetteer_lists.size()); |
2416
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer_list : gazetteer_lists) { |
2417
|
0
|
|
|
|
|
|
enc.add_4B(gazetteer_list.gazetteers.size()); |
2418
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer : gazetteer_list.gazetteers) |
2419
|
0
|
|
|
|
|
|
enc.add_str(gazetteer); |
2420
|
0
|
|
|
|
|
|
enc.add_4B(gazetteer_list.feature); |
2421
|
0
|
|
|
|
|
|
enc.add_4B(gazetteer_list.entity); |
2422
|
0
|
|
|
|
|
|
enc.add_4B(gazetteer_list.mode); |
2423
|
|
|
|
|
|
|
} |
2424
|
|
|
|
|
|
|
} else { |
2425
|
|
|
|
|
|
|
enc.add_4B(0); |
2426
|
|
|
|
|
|
|
} |
2427
|
|
|
|
|
|
|
|
2428
|
0
|
|
|
|
|
|
enc.add_4B(entity_list.size()); |
2429
|
0
|
0
|
|
|
|
|
for (auto&& entity : entity_list) |
2430
|
0
|
|
|
|
|
|
enc.add_str(entity); |
2431
|
0
|
|
|
|
|
|
} |
2432
|
|
|
|
|
|
|
|
2433
|
0
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* /*total_features*/, string& /*buffer*/) const override { |
2434
|
|
|
|
|
|
|
vector nodes, new_nodes; |
2435
|
0
|
0
|
|
|
|
|
vector> features(sentence.size); |
2436
|
|
|
|
|
|
|
|
2437
|
0
|
0
|
|
|
|
|
vector> recased_match_sources(sentence.size); |
2438
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) |
2439
|
0
|
0
|
|
|
|
|
recase_match_source(sentence.words[i], RECASE_ANY, recased_match_sources[i]); |
2440
|
|
|
|
|
|
|
|
2441
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
2442
|
|
|
|
|
|
|
unsigned hard_pre_length = 0, hard_pre_node = -1; |
2443
|
|
|
|
|
|
|
bool hard_pre_possible = true; |
2444
|
0
|
|
|
|
|
|
nodes.assign(1, 0); |
2445
|
0
|
0
|
|
|
|
|
for (unsigned j = i; j < sentence.size && !nodes.empty(); j++) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2446
|
|
|
|
|
|
|
new_nodes.clear(); |
2447
|
0
|
0
|
|
|
|
|
for (auto&& node : nodes) |
2448
|
0
|
0
|
|
|
|
|
if (!gazetteers_trie[node].children.empty()) |
2449
|
0
|
0
|
|
|
|
|
for (auto&& match_source : recased_match_sources[j]) { |
2450
|
0
|
|
|
|
|
|
auto range = gazetteers_trie[node].children.equal_range(match_source); |
2451
|
0
|
0
|
|
|
|
|
for (auto&& it = range.first; it != range.second; it++) |
2452
|
0
|
0
|
|
|
|
|
append_unless_exists(new_nodes, it->second); |
2453
|
|
|
|
|
|
|
} |
2454
|
|
|
|
|
|
|
|
2455
|
0
|
0
|
|
|
|
|
hard_pre_possible = hard_pre_possible && !sentence.probabilities[j].local_filled; |
|
|
0
|
|
|
|
|
|
2456
|
0
|
0
|
|
|
|
|
if (hard_pre_possible) |
2457
|
0
|
0
|
|
|
|
|
for (auto&& node : new_nodes) |
2458
|
0
|
0
|
|
|
|
|
if (gazetteers_trie[node].mode == HARD_PRE && |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2459
|
0
|
0
|
|
|
|
|
((j - i + 1) > hard_pre_length || node < hard_pre_node)) |
2460
|
0
|
|
|
|
|
|
hard_pre_length = j - i + 1, hard_pre_node = node; |
2461
|
|
|
|
|
|
|
|
2462
|
|
|
|
|
|
|
// Fill features |
2463
|
0
|
0
|
|
|
|
|
for (auto&& node : new_nodes) |
2464
|
0
|
0
|
|
|
|
|
for (auto&& feature : gazetteers_trie[node].features) |
2465
|
0
|
0
|
|
|
|
|
for (unsigned k = i; k <= j; k++) { |
2466
|
0
|
0
|
|
|
|
|
bilou_type type = j == i ? bilou_type_U : k == i ? bilou_type_B : k == j ? bilou_type_L : bilou_type_I; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2467
|
0
|
0
|
|
|
|
|
append_unless_exists(features[k], feature + G * (2 * window + 1)); |
2468
|
0
|
0
|
|
|
|
|
append_unless_exists(features[k], feature + type * (2 * window + 1)); |
2469
|
|
|
|
|
|
|
} |
2470
|
|
|
|
|
|
|
|
2471
|
|
|
|
|
|
|
nodes.swap(new_nodes); |
2472
|
|
|
|
|
|
|
} |
2473
|
|
|
|
|
|
|
|
2474
|
0
|
0
|
|
|
|
|
if (hard_pre_length) |
2475
|
0
|
0
|
|
|
|
|
for (unsigned j = i; j < i + hard_pre_length; j++) { |
2476
|
0
|
0
|
|
|
|
|
for (auto&& bilou : sentence.probabilities[j].local.bilou) { |
2477
|
0
|
|
|
|
|
|
bilou.probability = 0.; |
2478
|
0
|
|
|
|
|
|
bilou.entity = entity_type_unknown; |
2479
|
|
|
|
|
|
|
} |
2480
|
|
|
|
|
|
|
bilou_type type = hard_pre_length == 1 ? bilou_type_U : |
2481
|
0
|
0
|
|
|
|
|
j == i ? bilou_type_B : j + 1 == i + hard_pre_length ? bilou_type_L : bilou_type_I; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2482
|
0
|
|
|
|
|
|
sentence.probabilities[j].local.bilou[type].probability = 1.; |
2483
|
0
|
|
|
|
|
|
sentence.probabilities[j].local.bilou[type].entity = gazetteers_trie[hard_pre_node].entity; |
2484
|
0
|
|
|
|
|
|
sentence.probabilities[j].local_filled = true; |
2485
|
|
|
|
|
|
|
} |
2486
|
|
|
|
|
|
|
} |
2487
|
|
|
|
|
|
|
|
2488
|
|
|
|
|
|
|
// Apply generated features |
2489
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) |
2490
|
0
|
0
|
|
|
|
|
for (auto&& feature : features[i]) |
2491
|
0
|
0
|
|
|
|
|
apply_in_window(i, feature); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2492
|
0
|
|
|
|
|
|
} |
2493
|
|
|
|
|
|
|
|
2494
|
0
|
|
|
|
|
|
virtual void process_entities(ner_sentence& sentence, vector& entities, vector& buffer) const override { |
2495
|
|
|
|
|
|
|
vector nodes, new_nodes; |
2496
|
|
|
|
|
|
|
|
2497
|
0
|
0
|
|
|
|
|
vector> recased_match_sources(sentence.size); |
2498
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) |
2499
|
0
|
0
|
|
|
|
|
recase_match_source(sentence.words[i], RECASE_ANY, recased_match_sources[i]); |
2500
|
|
|
|
|
|
|
|
2501
|
0
|
|
|
|
|
|
buffer.clear(); |
2502
|
|
|
|
|
|
|
unsigned entity_until = 0; |
2503
|
0
|
0
|
|
|
|
|
for (unsigned i = 0, e = 0; i < sentence.size; i++) { |
2504
|
0
|
0
|
|
|
|
|
while (e < entities.size() && entities[e].start == i) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2505
|
0
|
0
|
|
|
|
|
if (i + entities[e].length > entity_until) |
2506
|
0
|
|
|
|
|
|
entity_until = i + entities[e].length; |
2507
|
0
|
0
|
|
|
|
|
buffer.push_back(entities[e++]); |
2508
|
|
|
|
|
|
|
} |
2509
|
|
|
|
|
|
|
|
2510
|
0
|
0
|
|
|
|
|
if (entity_until <= i) { |
2511
|
|
|
|
|
|
|
// There is place for a possible POST gazetteer |
2512
|
0
|
0
|
|
|
|
|
unsigned free_until = e < entities.size() ? entities[e].start : sentence.size; |
2513
|
|
|
|
|
|
|
|
2514
|
0
|
|
|
|
|
|
unsigned hard_post_length = 0, hard_post_node = -1; |
2515
|
0
|
|
|
|
|
|
nodes.assign(1, 0); |
2516
|
0
|
0
|
|
|
|
|
for (unsigned j = i; j < free_until && !nodes.empty(); j++) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2517
|
|
|
|
|
|
|
new_nodes.clear(); |
2518
|
0
|
0
|
|
|
|
|
for (auto&& node : nodes) |
2519
|
0
|
0
|
|
|
|
|
if (!gazetteers_trie[node].children.empty()) |
2520
|
0
|
0
|
|
|
|
|
for (auto&& match_source : recased_match_sources[j]) { |
2521
|
0
|
|
|
|
|
|
auto range = gazetteers_trie[node].children.equal_range(match_source); |
2522
|
0
|
0
|
|
|
|
|
for (auto&& it = range.first; it != range.second; it++) |
2523
|
0
|
0
|
|
|
|
|
append_unless_exists(new_nodes, it->second); |
2524
|
|
|
|
|
|
|
} |
2525
|
|
|
|
|
|
|
|
2526
|
0
|
0
|
|
|
|
|
for (auto&& node : new_nodes) |
2527
|
0
|
0
|
|
|
|
|
if (gazetteers_trie[node].mode == HARD_POST && |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2528
|
0
|
0
|
|
|
|
|
((j - i + 1) > hard_post_length || node < hard_post_node)) |
2529
|
0
|
|
|
|
|
|
hard_post_length = j - i + 1, hard_post_node = node; |
2530
|
|
|
|
|
|
|
|
2531
|
|
|
|
|
|
|
nodes.swap(new_nodes); |
2532
|
|
|
|
|
|
|
} |
2533
|
|
|
|
|
|
|
|
2534
|
0
|
0
|
|
|
|
|
if (hard_post_length) { |
2535
|
0
|
0
|
|
|
|
|
buffer.emplace_back(i, hard_post_length, entity_list[gazetteers_trie[hard_post_node].entity]); |
2536
|
0
|
|
|
|
|
|
entity_until = i + hard_post_length; |
2537
|
|
|
|
|
|
|
} |
2538
|
|
|
|
|
|
|
} |
2539
|
|
|
|
|
|
|
} |
2540
|
|
|
|
|
|
|
|
2541
|
0
|
0
|
|
|
|
|
if (buffer.size() != entities.size()) |
2542
|
|
|
|
|
|
|
entities.swap(buffer); |
2543
|
0
|
|
|
|
|
|
} |
2544
|
|
|
|
|
|
|
|
2545
|
0
|
|
|
|
|
|
virtual void gazetteers(vector& gazetteers, vector* gazetteer_types) const override { |
2546
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer_list : gazetteer_lists) |
2547
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer : gazetteer_list.gazetteers) { |
2548
|
0
|
0
|
|
|
|
|
gazetteers.push_back(gazetteer); |
2549
|
0
|
0
|
|
|
|
|
if (gazetteer_types) gazetteer_types->push_back(gazetteer_list.entity); |
|
|
0
|
|
|
|
|
|
2550
|
|
|
|
|
|
|
} |
2551
|
0
|
|
|
|
|
|
} |
2552
|
|
|
|
|
|
|
|
2553
|
|
|
|
|
|
|
private: |
2554
|
|
|
|
|
|
|
enum { MATCH_FORM = 0, MATCH_RAWLEMMA = 1, MATCH_RAWLEMMAS = 2 }; |
2555
|
|
|
|
|
|
|
int match; |
2556
|
|
|
|
|
|
|
|
2557
|
|
|
|
|
|
|
enum { EMBED_IN_MODEL = 0, OUT_OF_MODEL = 1 }; |
2558
|
|
|
|
|
|
|
int embed; |
2559
|
|
|
|
|
|
|
|
2560
|
|
|
|
|
|
|
enum { SOFT, HARD_PRE, HARD_POST, MODES_TOTAL }; |
2561
|
|
|
|
|
|
|
const static vector basename_suffixes; |
2562
|
|
|
|
|
|
|
|
2563
|
0
|
|
|
|
|
|
struct gazetteer_meta_info { |
2564
|
|
|
|
|
|
|
string basename; |
2565
|
|
|
|
|
|
|
ner_feature feature; |
2566
|
|
|
|
|
|
|
int entity; |
2567
|
|
|
|
|
|
|
}; |
2568
|
|
|
|
|
|
|
vector gazetteer_metas; |
2569
|
|
|
|
|
|
|
|
2570
|
0
|
|
|
|
|
|
struct gazetteer_list_info { |
2571
|
|
|
|
|
|
|
vector gazetteers; |
2572
|
|
|
|
|
|
|
ner_feature feature; |
2573
|
|
|
|
|
|
|
int entity; |
2574
|
|
|
|
|
|
|
int mode; |
2575
|
|
|
|
|
|
|
}; |
2576
|
|
|
|
|
|
|
vector gazetteer_lists; |
2577
|
|
|
|
|
|
|
|
2578
|
0
|
|
|
|
|
|
struct gazetteer_trie_node { |
2579
|
|
|
|
|
|
|
vector features; |
2580
|
|
|
|
|
|
|
unordered_multimap children; |
2581
|
|
|
|
|
|
|
int mode = SOFT, entity = -1; |
2582
|
|
|
|
|
|
|
}; |
2583
|
|
|
|
|
|
|
vector gazetteers_trie; |
2584
|
|
|
|
|
|
|
|
2585
|
|
|
|
|
|
|
vector entity_list; |
2586
|
|
|
|
|
|
|
|
2587
|
|
|
|
|
|
|
template |
2588
|
0
|
|
|
|
|
|
inline static void append_unless_exists(vector& array, T value) { |
2589
|
|
|
|
|
|
|
size_t i; |
2590
|
0
|
0
|
|
|
|
|
for (i = array.size(); i; i--) |
2591
|
0
|
0
|
|
|
|
|
if (array[i - 1] == value) |
2592
|
|
|
|
|
|
|
break; |
2593
|
|
|
|
|
|
|
|
2594
|
0
|
0
|
|
|
|
|
if (!i) |
2595
|
0
|
|
|
|
|
|
array.push_back(value); |
2596
|
0
|
|
|
|
|
|
} |
2597
|
|
|
|
|
|
|
|
2598
|
0
|
|
|
|
|
|
bool load_gazetteer_lists(const nlp_pipeline& pipeline, bool files_must_exist) { |
2599
|
|
|
|
|
|
|
string file_name, line; |
2600
|
|
|
|
|
|
|
|
2601
|
|
|
|
|
|
|
// Load raw gazetteers (maybe additional during inference) |
2602
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer_meta : gazetteer_metas) |
2603
|
0
|
0
|
|
|
|
|
for (int mode = 0; mode < MODES_TOTAL; mode++) { |
2604
|
0
|
|
|
|
|
|
file_name.assign(gazetteer_meta.basename).append(basename_suffixes[mode]); |
2605
|
|
|
|
|
|
|
|
2606
|
0
|
0
|
|
|
|
|
ifstream file(path_from_utf8(file_name).c_str()); |
2607
|
0
|
0
|
|
|
|
|
if (!file.is_open()) { |
2608
|
0
|
0
|
|
|
|
|
if (mode == SOFT && files_must_exist) |
2609
|
0
|
|
|
|
|
|
return cerr << "Cannot open gazetteers file '" << file_name << "'!" << endl, false; |
2610
|
0
|
|
|
|
|
|
continue; |
2611
|
|
|
|
|
|
|
} |
2612
|
|
|
|
|
|
|
|
2613
|
0
|
0
|
|
|
|
|
gazetteer_lists.emplace_back(); |
2614
|
0
|
|
|
|
|
|
gazetteer_lists.back().feature = gazetteer_meta.feature; |
2615
|
0
|
|
|
|
|
|
gazetteer_lists.back().entity = gazetteer_meta.entity; |
2616
|
0
|
|
|
|
|
|
gazetteer_lists.back().mode = mode; |
2617
|
|
|
|
|
|
|
|
2618
|
0
|
0
|
|
|
|
|
while (getline(file, line)) |
|
|
0
|
|
|
|
|
|
2619
|
0
|
0
|
|
|
|
|
if (!line.empty() && line[0] != '#') |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2620
|
0
|
0
|
|
|
|
|
gazetteer_lists.back().gazetteers.push_back(line); |
2621
|
|
|
|
|
|
|
} |
2622
|
|
|
|
|
|
|
|
2623
|
|
|
|
|
|
|
// Build the gazetteers_trie |
2624
|
|
|
|
|
|
|
unordered_map gazetteer_prefixes; |
2625
|
0
|
0
|
|
|
|
|
vector gazetteer_tokens, gazetteer_tokens_additional, gazetteer_token(1); |
2626
|
0
|
|
|
|
|
|
ner_sentence gazetteer_token_tagged; |
2627
|
0
|
|
|
|
|
|
vector gazetteer_recased_match_sources; |
2628
|
|
|
|
|
|
|
|
2629
|
|
|
|
|
|
|
gazetteers_trie.clear(); |
2630
|
0
|
0
|
|
|
|
|
gazetteers_trie.emplace_back(); |
2631
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer_list : gazetteer_lists) |
2632
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer : gazetteer_list.gazetteers) { |
2633
|
0
|
0
|
|
|
|
|
pipeline.tokenizer->set_text(gazetteer); |
2634
|
0
|
0
|
|
|
|
|
if (!pipeline.tokenizer->next_sentence(&gazetteer_tokens, nullptr)) continue; |
|
|
0
|
|
|
|
|
|
2635
|
0
|
0
|
|
|
|
|
while (pipeline.tokenizer->next_sentence(&gazetteer_tokens_additional, nullptr)) |
|
|
0
|
|
|
|
|
|
2636
|
0
|
0
|
|
|
|
|
gazetteer_tokens.insert(gazetteer_tokens.end(), gazetteer_tokens_additional.begin(), gazetteer_tokens_additional.end()); |
2637
|
|
|
|
|
|
|
|
2638
|
|
|
|
|
|
|
unsigned node = 0; |
2639
|
|
|
|
|
|
|
string prefix; |
2640
|
0
|
0
|
|
|
|
|
for (unsigned token = 0; token < gazetteer_tokens.size(); token++) { |
2641
|
0
|
0
|
|
|
|
|
if (token) prefix.push_back('\t'); |
|
|
0
|
|
|
|
|
|
2642
|
0
|
0
|
|
|
|
|
prefix.append(gazetteer_tokens[token].str, gazetteer_tokens[token].len); |
2643
|
|
|
|
|
|
|
auto prefix_it = gazetteer_prefixes.find(prefix); |
2644
|
0
|
0
|
|
|
|
|
if (prefix_it == gazetteer_prefixes.end()) { |
2645
|
0
|
|
|
|
|
|
unsigned new_node = gazetteers_trie.size(); |
2646
|
0
|
0
|
|
|
|
|
gazetteers_trie.emplace_back(); |
2647
|
|
|
|
|
|
|
gazetteer_prefixes.emplace(prefix, new_node); |
2648
|
|
|
|
|
|
|
|
2649
|
0
|
|
|
|
|
|
gazetteer_token[0] = string_piece(gazetteer_tokens[token]); |
2650
|
0
|
0
|
|
|
|
|
pipeline.tagger->tag(gazetteer_token, gazetteer_token_tagged); |
2651
|
0
|
0
|
|
|
|
|
recase_match_source(gazetteer_token_tagged.words[0], RECASE_NATIVE, gazetteer_recased_match_sources); |
2652
|
0
|
0
|
|
|
|
|
for (auto&& match_source : gazetteer_recased_match_sources) |
2653
|
0
|
|
|
|
|
|
gazetteers_trie[node].children.emplace(match_source, new_node); |
2654
|
|
|
|
|
|
|
|
2655
|
0
|
|
|
|
|
|
node = new_node; |
2656
|
|
|
|
|
|
|
} else { |
2657
|
0
|
|
|
|
|
|
node = prefix_it->second; |
2658
|
|
|
|
|
|
|
} |
2659
|
|
|
|
|
|
|
} |
2660
|
|
|
|
|
|
|
|
2661
|
0
|
0
|
|
|
|
|
append_unless_exists(gazetteers_trie[node].features, gazetteer_list.feature); |
2662
|
0
|
0
|
|
|
|
|
if ((gazetteer_list.mode == HARD_PRE && gazetteers_trie[node].mode != HARD_PRE) || |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2663
|
0
|
0
|
|
|
|
|
(gazetteer_list.mode == HARD_POST && gazetteers_trie[node].mode == SOFT)) { |
2664
|
0
|
|
|
|
|
|
gazetteers_trie[node].mode = gazetteer_list.mode; |
2665
|
0
|
|
|
|
|
|
gazetteers_trie[node].entity = gazetteer_list.entity; |
2666
|
|
|
|
|
|
|
} |
2667
|
|
|
|
|
|
|
} |
2668
|
|
|
|
|
|
|
|
2669
|
|
|
|
|
|
|
return true; |
2670
|
|
|
|
|
|
|
} |
2671
|
|
|
|
|
|
|
|
2672
|
|
|
|
|
|
|
enum { TO_LOWER, TO_TITLE, TO_UPPER, TO_TOTAL }; |
2673
|
0
|
|
|
|
|
|
static void recase_text(const string& text, int mode, vector& recased) { |
2674
|
|
|
|
|
|
|
using namespace unilib; |
2675
|
|
|
|
|
|
|
|
2676
|
0
|
|
|
|
|
|
recased.emplace_back(); |
2677
|
|
|
|
|
|
|
|
2678
|
0
|
0
|
|
|
|
|
if (mode == TO_UPPER) |
2679
|
|
|
|
|
|
|
utf8::map(unicode::uppercase, text, recased.back()); |
2680
|
0
|
0
|
|
|
|
|
else if (mode == TO_LOWER) |
2681
|
|
|
|
|
|
|
utf8::map(unicode::lowercase, text, recased.back()); |
2682
|
0
|
0
|
|
|
|
|
else if (mode == TO_TITLE) |
2683
|
0
|
0
|
|
|
|
|
for (auto&& chr : utf8::decoder(text)) |
2684
|
0
|
0
|
|
|
|
|
utf8::append(recased.back(), recased.back().empty() ? unicode::uppercase(chr) : unicode::lowercase(chr)); |
2685
|
0
|
|
|
|
|
|
} |
2686
|
|
|
|
|
|
|
|
2687
|
|
|
|
|
|
|
enum { RECASE_NATIVE, RECASE_ANY }; |
2688
|
0
|
|
|
|
|
|
void recase_match_source(const ner_word& word, int mode, vector& recased) const { |
2689
|
|
|
|
|
|
|
using namespace unilib; |
2690
|
|
|
|
|
|
|
|
2691
|
|
|
|
|
|
|
bool any_lower = false, first_uc = false, first = true; |
2692
|
0
|
0
|
|
|
|
|
for (auto&& chr : utf8::decoder(word.form)) { |
2693
|
0
|
0
|
|
|
|
|
any_lower = any_lower || (unicode::category(chr) & unicode::Ll); |
|
|
0
|
|
|
|
|
|
2694
|
0
|
0
|
|
|
|
|
if (first) first_uc = unicode::category(chr) & unicode::Lut; |
2695
|
|
|
|
|
|
|
first = false; |
2696
|
|
|
|
|
|
|
} |
2697
|
|
|
|
|
|
|
|
2698
|
0
|
|
|
|
|
|
recased.clear(); |
2699
|
|
|
|
|
|
|
|
2700
|
0
|
0
|
|
|
|
|
for (int perform = 0; perform < TO_TOTAL; perform++) { |
2701
|
0
|
0
|
|
|
|
|
if (mode == RECASE_NATIVE) { |
2702
|
0
|
0
|
|
|
|
|
if (perform == TO_UPPER && !(first_uc && !any_lower)) continue; |
|
|
0
|
|
|
|
|
|
2703
|
0
|
0
|
|
|
|
|
if (perform == TO_TITLE && !(first_uc && any_lower)) continue; |
|
|
0
|
|
|
|
|
|
2704
|
0
|
0
|
|
|
|
|
if (perform == TO_LOWER && first_uc) continue; |
2705
|
|
|
|
|
|
|
} |
2706
|
0
|
0
|
|
|
|
|
if (mode == RECASE_ANY) { |
2707
|
0
|
0
|
|
|
|
|
if (perform == TO_UPPER && !(first_uc && !any_lower)) continue; |
|
|
0
|
|
|
|
|
|
2708
|
0
|
0
|
|
|
|
|
if (perform == TO_TITLE && !first_uc) continue; |
2709
|
|
|
|
|
|
|
} |
2710
|
|
|
|
|
|
|
|
2711
|
0
|
0
|
|
|
|
|
if (match == MATCH_FORM) |
2712
|
0
|
|
|
|
|
|
recase_text(word.form, perform, recased); |
2713
|
0
|
0
|
|
|
|
|
else if (match == MATCH_RAWLEMMA) |
2714
|
0
|
|
|
|
|
|
recase_text(word.raw_lemma, perform, recased); |
2715
|
0
|
0
|
|
|
|
|
else if (match == MATCH_RAWLEMMAS) |
2716
|
0
|
0
|
|
|
|
|
for (auto&& raw_lemma : word.raw_lemmas_all) |
2717
|
0
|
|
|
|
|
|
recase_text(raw_lemma, perform, recased); |
2718
|
|
|
|
|
|
|
} |
2719
|
0
|
|
|
|
|
|
} |
2720
|
|
|
|
|
|
|
}; |
2721
|
12
|
50
|
|
|
|
|
const vector gazetteers_enhanced::basename_suffixes = {".txt", ".hard_pre.txt", ".hard_post.txt"}; |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2722
|
|
|
|
|
|
|
|
2723
|
|
|
|
|
|
|
// Lemma |
2724
|
0
|
|
|
|
|
|
class lemma : public feature_processor { |
2725
|
|
|
|
|
|
|
public: |
2726
|
8
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& /*buffer*/) const override { |
2727
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) |
2728
|
60
|
50
|
|
|
|
|
apply_in_window(i, lookup(sentence.words[i].lemma_id, total_features)); |
|
|
100
|
|
|
|
|
|
2729
|
|
|
|
|
|
|
|
2730
|
36
|
50
|
|
|
|
|
apply_outer_words_in_window(lookup_empty()); |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
2731
|
4
|
|
|
|
|
|
} |
2732
|
|
|
|
|
|
|
}; |
2733
|
|
|
|
|
|
|
|
2734
|
|
|
|
|
|
|
// NumericTimeValue |
2735
|
0
|
|
|
|
|
|
class number_time_value : public feature_processor { |
2736
|
|
|
|
|
|
|
public: |
2737
|
4
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& buffer) const override { |
2738
|
4
|
|
|
|
|
|
ner_feature hour = lookup(buffer.assign("H"), total_features); |
2739
|
4
|
|
|
|
|
|
ner_feature minute = lookup(buffer.assign("M"), total_features); |
2740
|
4
|
|
|
|
|
|
ner_feature time = lookup(buffer.assign("t"), total_features); |
2741
|
4
|
|
|
|
|
|
ner_feature day = lookup(buffer.assign("d"), total_features); |
2742
|
4
|
|
|
|
|
|
ner_feature month = lookup(buffer.assign("m"), total_features); |
2743
|
4
|
|
|
|
|
|
ner_feature year = lookup(buffer.assign("y"), total_features); |
2744
|
|
|
|
|
|
|
|
2745
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
2746
|
14
|
|
|
|
|
|
const char* form = sentence.words[i].form.c_str(); |
2747
|
|
|
|
|
|
|
unsigned num; |
2748
|
|
|
|
|
|
|
bool digit; |
2749
|
|
|
|
|
|
|
|
2750
|
14
|
50
|
|
|
|
|
for (digit = false, num = 0; *form; form++) { |
2751
|
14
|
50
|
|
|
|
|
if (*form < '0' || *form > '9') break; |
2752
|
|
|
|
|
|
|
digit = true; |
2753
|
0
|
|
|
|
|
|
num = num * 10 + *form - '0'; |
2754
|
|
|
|
|
|
|
} |
2755
|
14
|
50
|
|
|
|
|
if (digit && !*form) { |
|
|
0
|
|
|
|
|
|
2756
|
|
|
|
|
|
|
// We have a number |
2757
|
0
|
0
|
|
|
|
|
if (num < 24) apply_in_window(i, hour); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2758
|
0
|
0
|
|
|
|
|
if (num < 60) apply_in_window(i, minute); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2759
|
0
|
0
|
|
|
|
|
if (num >= 1 && num <= 31) apply_in_window(i, day); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2760
|
0
|
0
|
|
|
|
|
if (num >= 1 && num <= 12) apply_in_window(i, month); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2761
|
0
|
0
|
|
|
|
|
if (num >= 1000 && num <= 2200) apply_in_window(i, year);; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2762
|
|
|
|
|
|
|
} |
2763
|
14
|
50
|
|
|
|
|
if (digit && num < 24 && (*form == '.' || *form == ':')) { |
|
|
0
|
|
|
|
|
|
2764
|
|
|
|
|
|
|
// Maybe time |
2765
|
0
|
0
|
|
|
|
|
for (digit = false, num = 0, form++; *form; form++) { |
2766
|
0
|
0
|
|
|
|
|
if (*form < '0' || *form > '9') break; |
2767
|
|
|
|
|
|
|
digit = true; |
2768
|
0
|
|
|
|
|
|
num = num * 10 + *form - '0'; |
2769
|
|
|
|
|
|
|
} |
2770
|
0
|
0
|
|
|
|
|
if (digit && !*form && num < 60) apply_in_window(i, time); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2771
|
|
|
|
|
|
|
} |
2772
|
|
|
|
|
|
|
} |
2773
|
4
|
|
|
|
|
|
} |
2774
|
|
|
|
|
|
|
}; |
2775
|
|
|
|
|
|
|
|
2776
|
|
|
|
|
|
|
// PreviousStage |
2777
|
0
|
|
|
|
|
|
class previous_stage : public feature_processor { |
2778
|
|
|
|
|
|
|
public: |
2779
|
8
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& buffer) const override { |
2780
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) |
2781
|
14
|
100
|
|
|
|
|
if (sentence.previous_stage[i].bilou != bilou_type_unknown) { |
2782
|
|
|
|
|
|
|
buffer.clear(); |
2783
|
7
|
|
|
|
|
|
append_encoded(buffer, sentence.previous_stage[i].bilou); |
2784
|
7
|
|
|
|
|
|
buffer.push_back(' '); |
2785
|
7
|
|
|
|
|
|
append_encoded(buffer, sentence.previous_stage[i].entity); |
2786
|
16
|
50
|
|
|
|
|
apply_in_range(i, lookup(buffer, total_features), 1, window); |
|
|
100
|
|
|
|
|
|
2787
|
|
|
|
|
|
|
} |
2788
|
4
|
|
|
|
|
|
} |
2789
|
|
|
|
|
|
|
|
2790
|
|
|
|
|
|
|
private: |
2791
|
14
|
|
|
|
|
|
static void append_encoded(string& str, int value) { |
2792
|
14
|
100
|
|
|
|
|
if (value < 0) { |
2793
|
4
|
|
|
|
|
|
str.push_back('-'); |
2794
|
14
|
|
|
|
|
|
value = -value; |
2795
|
|
|
|
|
|
|
} |
2796
|
25
|
100
|
|
|
|
|
for (; value; value >>= 4) |
2797
|
11
|
|
|
|
|
|
str.push_back("0123456789abcdef"[value & 0xF]); |
2798
|
14
|
|
|
|
|
|
} |
2799
|
|
|
|
|
|
|
}; |
2800
|
|
|
|
|
|
|
|
2801
|
|
|
|
|
|
|
// RawLemma |
2802
|
0
|
|
|
|
|
|
class raw_lemma : public feature_processor { |
2803
|
|
|
|
|
|
|
public: |
2804
|
8
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& /*buffer*/) const override { |
2805
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) |
2806
|
60
|
50
|
|
|
|
|
apply_in_window(i, lookup(sentence.words[i].raw_lemma, total_features)); |
|
|
100
|
|
|
|
|
|
2807
|
|
|
|
|
|
|
|
2808
|
36
|
50
|
|
|
|
|
apply_outer_words_in_window(lookup_empty()); |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
2809
|
4
|
|
|
|
|
|
} |
2810
|
|
|
|
|
|
|
}; |
2811
|
|
|
|
|
|
|
|
2812
|
|
|
|
|
|
|
// RawLemmaCapitalization |
2813
|
0
|
|
|
|
|
|
class raw_lemma_capitalization : public feature_processor { |
2814
|
|
|
|
|
|
|
public: |
2815
|
4
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& buffer) const override { |
2816
|
|
|
|
|
|
|
using namespace unilib; |
2817
|
|
|
|
|
|
|
|
2818
|
4
|
|
|
|
|
|
ner_feature fst_cap = lookup(buffer.assign("f"), total_features); |
2819
|
4
|
|
|
|
|
|
ner_feature all_cap = lookup(buffer.assign("a"), total_features); |
2820
|
4
|
|
|
|
|
|
ner_feature mixed_cap = lookup(buffer.assign("m"), total_features); |
2821
|
|
|
|
|
|
|
|
2822
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
2823
|
|
|
|
|
|
|
bool was_upper = false, was_lower = false; |
2824
|
|
|
|
|
|
|
|
2825
|
68
|
|
|
|
|
|
auto* raw_lemma = sentence.words[i].raw_lemma.c_str(); |
2826
|
|
|
|
|
|
|
char32_t chr; |
2827
|
68
|
100
|
|
|
|
|
for (bool first = true; (chr = utf8::decode(raw_lemma)); first = false) { |
2828
|
|
|
|
|
|
|
auto category = unicode::category(chr); |
2829
|
54
|
50
|
|
|
|
|
was_upper = was_upper || category & unicode::Lut; |
|
|
50
|
|
|
|
|
|
2830
|
54
|
100
|
|
|
|
|
was_lower = was_lower || category & unicode::Ll; |
|
|
100
|
|
|
|
|
|
2831
|
|
|
|
|
|
|
|
2832
|
54
|
50
|
|
|
|
|
if (first && was_upper) apply_in_window(i, fst_cap); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2833
|
|
|
|
|
|
|
} |
2834
|
14
|
50
|
|
|
|
|
if (was_upper && !was_lower) apply_in_window(i, all_cap); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2835
|
14
|
50
|
|
|
|
|
if (was_upper && was_lower) apply_in_window(i, mixed_cap); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2836
|
|
|
|
|
|
|
} |
2837
|
4
|
|
|
|
|
|
} |
2838
|
|
|
|
|
|
|
}; |
2839
|
|
|
|
|
|
|
|
2840
|
|
|
|
|
|
|
// RawLemmaCaseNormalized |
2841
|
0
|
|
|
|
|
|
class raw_lemma_case_normalized : public feature_processor { |
2842
|
|
|
|
|
|
|
public: |
2843
|
0
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& buffer) const override { |
2844
|
|
|
|
|
|
|
using namespace unilib; |
2845
|
|
|
|
|
|
|
|
2846
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
2847
|
|
|
|
|
|
|
buffer.clear(); |
2848
|
0
|
0
|
|
|
|
|
for (auto&& chr : utf8::decoder(sentence.words[i].raw_lemma)) |
2849
|
0
|
0
|
|
|
|
|
utf8::append(buffer, buffer.empty() ? chr : unicode::lowercase(chr)); |
2850
|
0
|
0
|
|
|
|
|
apply_in_window(i, lookup(buffer, total_features)); |
|
|
0
|
|
|
|
|
|
2851
|
|
|
|
|
|
|
} |
2852
|
|
|
|
|
|
|
|
2853
|
0
|
0
|
|
|
|
|
apply_outer_words_in_window(lookup_empty()); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2854
|
0
|
|
|
|
|
|
} |
2855
|
|
|
|
|
|
|
}; |
2856
|
|
|
|
|
|
|
|
2857
|
|
|
|
|
|
|
// *Suffix |
2858
|
|
|
|
|
|
|
enum { SUFFIX_SOURCE_FORM, SUFFIX_SOURCE_RAWLEMMA }; |
2859
|
|
|
|
|
|
|
enum { SUFFIX_CASE_ORIGINAL, SUFFIX_CASE_NORMALIZED }; |
2860
|
0
|
|
|
|
|
|
class suffix : public feature_processor { |
2861
|
|
|
|
|
|
|
public: |
2862
|
0
|
|
|
|
|
|
suffix(int source, int casing) : source(source), casing(casing) {} |
2863
|
|
|
|
|
|
|
|
2864
|
0
|
|
|
|
|
|
virtual bool parse(int window, const vector& args, entity_map& entities, |
2865
|
|
|
|
|
|
|
ner_feature* total_features, const nlp_pipeline& pipeline) override { |
2866
|
0
|
0
|
|
|
|
|
if (!feature_processor::parse(window, args, entities, total_features, pipeline)) return false; |
2867
|
0
|
0
|
|
|
|
|
if (args.size() != 2) return cerr << "*Suffix features require exactly two arguments -- shortest and longest suffix length!" << endl, false; |
2868
|
|
|
|
|
|
|
|
2869
|
|
|
|
|
|
|
string error; |
2870
|
0
|
0
|
|
|
|
|
if (!parse_int(args[0], "*Suffix shortest length", shortest, error)) return cerr << error << endl, false; |
|
|
0
|
|
|
|
|
|
2871
|
0
|
0
|
|
|
|
|
if (!parse_int(args[1], "*Suffix longest length", longest, error)) return cerr << error << endl, false; |
|
|
0
|
|
|
|
|
|
2872
|
|
|
|
|
|
|
return true; |
2873
|
|
|
|
|
|
|
} |
2874
|
|
|
|
|
|
|
|
2875
|
0
|
|
|
|
|
|
virtual void load(binary_decoder& data, const nlp_pipeline& pipeline) override { |
2876
|
0
|
|
|
|
|
|
feature_processor::load(data, pipeline); |
2877
|
|
|
|
|
|
|
|
2878
|
0
|
|
|
|
|
|
shortest = data.next_4B(); |
2879
|
0
|
|
|
|
|
|
longest = data.next_4B(); |
2880
|
0
|
|
|
|
|
|
} |
2881
|
|
|
|
|
|
|
|
2882
|
0
|
|
|
|
|
|
virtual void save(binary_encoder& enc) override { |
2883
|
0
|
|
|
|
|
|
feature_processor::save(enc); |
2884
|
|
|
|
|
|
|
|
2885
|
0
|
|
|
|
|
|
enc.add_4B(shortest); |
2886
|
0
|
|
|
|
|
|
enc.add_4B(longest); |
2887
|
0
|
|
|
|
|
|
} |
2888
|
|
|
|
|
|
|
|
2889
|
0
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& buffer) const override { |
2890
|
|
|
|
|
|
|
using namespace unilib; |
2891
|
|
|
|
|
|
|
|
2892
|
|
|
|
|
|
|
vector chrs; |
2893
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
2894
|
|
|
|
|
|
|
chrs.clear(); |
2895
|
0
|
0
|
|
|
|
|
for (auto&& chr : utf8::decoder(source == SUFFIX_SOURCE_FORM ? sentence.words[i].form : sentence.words[i].raw_lemma)) |
|
|
0
|
|
|
|
|
|
2896
|
0
|
0
|
|
|
|
|
chrs.push_back((casing == SUFFIX_CASE_ORIGINAL || chrs.empty()) ? chr : unicode::lowercase(chr)); |
|
|
0
|
|
|
|
|
|
2897
|
|
|
|
|
|
|
|
2898
|
|
|
|
|
|
|
buffer.clear(); |
2899
|
0
|
0
|
|
|
|
|
for (int s = 1; s <= longest && s <= int(chrs.size()); s++) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2900
|
0
|
0
|
|
|
|
|
utf8::append(buffer, chrs[chrs.size() - s]); |
2901
|
0
|
0
|
|
|
|
|
if (s >= shortest) { |
2902
|
0
|
0
|
|
|
|
|
apply_in_window(i, lookup(buffer, total_features)); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2903
|
|
|
|
|
|
|
} |
2904
|
|
|
|
|
|
|
} |
2905
|
|
|
|
|
|
|
} |
2906
|
|
|
|
|
|
|
|
2907
|
0
|
0
|
|
|
|
|
apply_outer_words_in_window(lookup_empty()); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2908
|
0
|
|
|
|
|
|
} |
2909
|
|
|
|
|
|
|
|
2910
|
|
|
|
|
|
|
private: |
2911
|
|
|
|
|
|
|
int shortest, longest; |
2912
|
|
|
|
|
|
|
int source, casing; |
2913
|
|
|
|
|
|
|
}; |
2914
|
|
|
|
|
|
|
|
2915
|
|
|
|
|
|
|
// Tag |
2916
|
0
|
|
|
|
|
|
class tag : public feature_processor { |
2917
|
|
|
|
|
|
|
public: |
2918
|
8
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& /*buffer*/) const override { |
2919
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) |
2920
|
54
|
100
|
|
|
|
|
apply_in_window(i, lookup(sentence.words[i].tag, total_features)); |
|
|
100
|
|
|
|
|
|
2921
|
|
|
|
|
|
|
|
2922
|
36
|
50
|
|
|
|
|
apply_outer_words_in_window(lookup_empty()); |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
2923
|
4
|
|
|
|
|
|
} |
2924
|
|
|
|
|
|
|
}; |
2925
|
|
|
|
|
|
|
|
2926
|
|
|
|
|
|
|
// URLEmailDetector |
2927
|
0
|
|
|
|
|
|
class url_email_detector : public feature_processor { |
2928
|
|
|
|
|
|
|
public: |
2929
|
0
|
|
|
|
|
|
virtual bool parse(int window, const vector& args, entity_map& entities, |
2930
|
|
|
|
|
|
|
ner_feature* total_features, const nlp_pipeline& pipeline) override { |
2931
|
0
|
0
|
|
|
|
|
if (!feature_processor::parse(window, args, entities, total_features, pipeline)) return false; |
2932
|
0
|
0
|
|
|
|
|
if (args.size() != 2) return cerr << "URLEmailDetector requires exactly two arguments -- named entity types for URL and email!" << endl, false; |
2933
|
|
|
|
|
|
|
|
2934
|
0
|
|
|
|
|
|
url = entities.parse(args[0].c_str(), true); |
2935
|
0
|
|
|
|
|
|
email = entities.parse(args[1].c_str(), true); |
2936
|
|
|
|
|
|
|
|
2937
|
0
|
0
|
|
|
|
|
if (url == entity_type_unknown || email == entity_type_unknown) |
|
|
0
|
|
|
|
|
|
2938
|
0
|
|
|
|
|
|
return cerr << "Cannot create entities '" << args[0] << "' and '" << args[1] << "' in URLEmailDetector!" << endl, false; |
2939
|
|
|
|
|
|
|
return true; |
2940
|
|
|
|
|
|
|
} |
2941
|
|
|
|
|
|
|
|
2942
|
1
|
|
|
|
|
|
virtual void load(binary_decoder& data, const nlp_pipeline& pipeline) override { |
2943
|
1
|
|
|
|
|
|
feature_processor::load(data, pipeline); |
2944
|
|
|
|
|
|
|
|
2945
|
1
|
|
|
|
|
|
url = data.next_4B(); |
2946
|
1
|
|
|
|
|
|
email = data.next_4B(); |
2947
|
1
|
|
|
|
|
|
} |
2948
|
|
|
|
|
|
|
|
2949
|
0
|
|
|
|
|
|
virtual void save(binary_encoder& enc) override { |
2950
|
0
|
|
|
|
|
|
feature_processor::save(enc); |
2951
|
|
|
|
|
|
|
|
2952
|
0
|
|
|
|
|
|
enc.add_4B(url); |
2953
|
0
|
|
|
|
|
|
enc.add_4B(email); |
2954
|
0
|
|
|
|
|
|
} |
2955
|
|
|
|
|
|
|
|
2956
|
8
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* /*total_features*/, string& /*buffer*/) const override { |
2957
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
2958
|
14
|
|
|
|
|
|
auto type = url_detector::detect(sentence.words[i].form); |
2959
|
14
|
50
|
|
|
|
|
if (type == url_detector::NO_URL || sentence.probabilities[i].local_filled) continue; |
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
2960
|
|
|
|
|
|
|
|
2961
|
|
|
|
|
|
|
// We have found URL or email and the word has not yet been determined |
2962
|
0
|
0
|
|
|
|
|
for (auto&& bilou : sentence.probabilities[i].local.bilou) { |
2963
|
0
|
|
|
|
|
|
bilou.probability = 0.; |
2964
|
0
|
|
|
|
|
|
bilou.entity = entity_type_unknown; |
2965
|
|
|
|
|
|
|
} |
2966
|
0
|
|
|
|
|
|
sentence.probabilities[i].local.bilou[bilou_type_U].probability = 1.; |
2967
|
0
|
0
|
|
|
|
|
sentence.probabilities[i].local.bilou[bilou_type_U].entity = type == url_detector::EMAIL ? email : url; |
2968
|
0
|
|
|
|
|
|
sentence.probabilities[i].local_filled = true; |
2969
|
|
|
|
|
|
|
} |
2970
|
4
|
|
|
|
|
|
} |
2971
|
|
|
|
|
|
|
|
2972
|
|
|
|
|
|
|
private: |
2973
|
|
|
|
|
|
|
entity_type url, email; |
2974
|
|
|
|
|
|
|
}; |
2975
|
|
|
|
|
|
|
|
2976
|
|
|
|
|
|
|
} // namespace feature_processors |
2977
|
|
|
|
|
|
|
|
2978
|
|
|
|
|
|
|
// Feature processor factory method |
2979
|
8
|
|
|
|
|
|
feature_processor* feature_processor::create(const string& name) { |
2980
|
|
|
|
|
|
|
using namespace feature_processors; |
2981
|
|
|
|
|
|
|
|
2982
|
8
|
50
|
|
|
|
|
if (name.compare("BrownClusters") == 0) return new brown_clusters(); |
2983
|
8
|
50
|
|
|
|
|
if (name.compare("CzechAddContainers") == 0) return new czech_add_containers(); |
2984
|
8
|
50
|
|
|
|
|
if (name.compare("CzechLemmaTerm") == 0) return new czech_lemma_term(); |
2985
|
8
|
100
|
|
|
|
|
if (name.compare("Form") == 0) return new form(); |
2986
|
7
|
50
|
|
|
|
|
if (name.compare("FormCapitalization") == 0) return new form_capitalization(); |
2987
|
7
|
50
|
|
|
|
|
if (name.compare("FormCaseNormalized") == 0) return new form_case_normalized(); |
2988
|
7
|
50
|
|
|
|
|
if (name.compare("FormCaseNormalizedSuffix") == 0) return new suffix(SUFFIX_SOURCE_FORM, SUFFIX_CASE_NORMALIZED); |
2989
|
7
|
50
|
|
|
|
|
if (name.compare("FormSuffix") == 0) return new suffix(SUFFIX_SOURCE_FORM, SUFFIX_CASE_ORIGINAL); |
2990
|
7
|
50
|
|
|
|
|
if (name.compare("Gazetteers") == 0) return new feature_processors::gazetteers(); |
2991
|
7
|
50
|
|
|
|
|
if (name.compare("GazetteersEnhanced") == 0) return new gazetteers_enhanced(); |
2992
|
7
|
100
|
|
|
|
|
if (name.compare("Lemma") == 0) return new lemma(); |
2993
|
6
|
100
|
|
|
|
|
if (name.compare("NumericTimeValue") == 0) return new number_time_value(); |
2994
|
5
|
100
|
|
|
|
|
if (name.compare("PreviousStage") == 0) return new previous_stage(); |
2995
|
4
|
100
|
|
|
|
|
if (name.compare("RawLemma") == 0) return new raw_lemma(); |
2996
|
3
|
100
|
|
|
|
|
if (name.compare("RawLemmaCapitalization") == 0) return new raw_lemma_capitalization(); |
2997
|
2
|
50
|
|
|
|
|
if (name.compare("RawLemmaCaseNormalized") == 0) return new raw_lemma_case_normalized(); |
2998
|
2
|
50
|
|
|
|
|
if (name.compare("RawLemmaCaseNormalizedSuffix") == 0) return new suffix(SUFFIX_SOURCE_RAWLEMMA, SUFFIX_CASE_NORMALIZED); |
2999
|
2
|
50
|
|
|
|
|
if (name.compare("RawLemmaSuffix") == 0) return new suffix(SUFFIX_SOURCE_RAWLEMMA, SUFFIX_CASE_ORIGINAL); |
3000
|
2
|
100
|
|
|
|
|
if (name.compare("Tag") == 0) return new tag(); |
3001
|
1
|
50
|
|
|
|
|
if (name.compare("URLEmailDetector") == 0) return new url_email_detector(); |
3002
|
|
|
|
|
|
|
return nullptr; |
3003
|
|
|
|
|
|
|
} |
3004
|
|
|
|
|
|
|
|
3005
|
|
|
|
|
|
|
///////// |
3006
|
|
|
|
|
|
|
// File: features/feature_templates.h |
3007
|
|
|
|
|
|
|
///////// |
3008
|
|
|
|
|
|
|
|
3009
|
|
|
|
|
|
|
// This file is part of NameTag . |
3010
|
|
|
|
|
|
|
// |
3011
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
3012
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
3013
|
|
|
|
|
|
|
// |
3014
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
3015
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
3016
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
3017
|
|
|
|
|
|
|
|
3018
|
0
|
|
|
|
|
|
class feature_templates { |
3019
|
|
|
|
|
|
|
public: |
3020
|
|
|
|
|
|
|
void parse(istream& is, entity_map& entities, const nlp_pipeline& pipeline); |
3021
|
|
|
|
|
|
|
|
3022
|
|
|
|
|
|
|
bool load(istream& is, const nlp_pipeline& pipeline); |
3023
|
|
|
|
|
|
|
bool save(ostream& os); |
3024
|
|
|
|
|
|
|
|
3025
|
|
|
|
|
|
|
void process_sentence(ner_sentence& sentence, string& buffer, bool add_features = false) const; |
3026
|
|
|
|
|
|
|
void process_entities(ner_sentence& sentence, vector& entities, vector& buffer) const; |
3027
|
|
|
|
|
|
|
ner_feature get_total_features() const; |
3028
|
|
|
|
|
|
|
|
3029
|
|
|
|
|
|
|
void gazetteers(vector& gazetteers, vector* gazetteer_types) const; |
3030
|
|
|
|
|
|
|
|
3031
|
|
|
|
|
|
|
private: |
3032
|
|
|
|
|
|
|
mutable ner_feature total_features; |
3033
|
|
|
|
|
|
|
|
3034
|
7
|
|
|
|
|
|
struct feature_processor_info { |
3035
|
|
|
|
|
|
|
string name; |
3036
|
|
|
|
|
|
|
unique_ptr processor; |
3037
|
|
|
|
|
|
|
|
3038
|
8
|
|
|
|
|
|
feature_processor_info(const string& name, feature_processor* processor) : name(name), processor(processor) {} |
3039
|
|
|
|
|
|
|
}; |
3040
|
|
|
|
|
|
|
vector processors; |
3041
|
|
|
|
|
|
|
}; |
3042
|
|
|
|
|
|
|
|
3043
|
|
|
|
|
|
|
///////// |
3044
|
|
|
|
|
|
|
// File: features/feature_templates.cpp |
3045
|
|
|
|
|
|
|
///////// |
3046
|
|
|
|
|
|
|
|
3047
|
|
|
|
|
|
|
// This file is part of NameTag . |
3048
|
|
|
|
|
|
|
// |
3049
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
3050
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
3051
|
|
|
|
|
|
|
// |
3052
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
3053
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
3054
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
3055
|
|
|
|
|
|
|
|
3056
|
1
|
|
|
|
|
|
bool feature_templates::load(istream& is, const nlp_pipeline& pipeline) { |
3057
|
|
|
|
|
|
|
binary_decoder data; |
3058
|
1
|
50
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
50
|
|
|
|
|
|
3059
|
|
|
|
|
|
|
|
3060
|
|
|
|
|
|
|
try { |
3061
|
1
|
50
|
|
|
|
|
total_features = data.next_4B(); |
3062
|
|
|
|
|
|
|
|
3063
|
|
|
|
|
|
|
processors.clear(); |
3064
|
9
|
50
|
|
|
|
|
for (unsigned i = data.next_4B(); i; i--) { |
|
|
100
|
|
|
|
|
|
3065
|
|
|
|
|
|
|
string name; |
3066
|
8
|
50
|
|
|
|
|
data.next_str(name); |
3067
|
|
|
|
|
|
|
|
3068
|
|
|
|
|
|
|
// Try creating the processor |
3069
|
8
|
50
|
|
|
|
|
auto* processor = feature_processor::create(name); |
3070
|
8
|
50
|
|
|
|
|
if (processor) { |
3071
|
8
|
50
|
|
|
|
|
processor->load(data, pipeline); |
3072
|
8
|
50
|
|
|
|
|
processors.emplace_back(name, processor); |
3073
|
|
|
|
|
|
|
continue; |
3074
|
|
|
|
|
|
|
} |
3075
|
|
|
|
|
|
|
|
3076
|
|
|
|
|
|
|
// Could not find processor with specified name |
3077
|
|
|
|
|
|
|
return false; |
3078
|
|
0
|
|
|
|
|
} |
3079
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
3080
|
|
|
|
|
|
|
return false; |
3081
|
|
|
|
|
|
|
} |
3082
|
|
|
|
|
|
|
|
3083
|
1
|
|
|
|
|
|
return data.is_end(); |
3084
|
|
|
|
|
|
|
} |
3085
|
|
|
|
|
|
|
|
3086
|
8
|
|
|
|
|
|
void feature_templates::process_sentence(ner_sentence& sentence, string& buffer, bool adding_features) const { |
3087
|
|
|
|
|
|
|
// Start with omnipresent feature |
3088
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
3089
|
14
|
|
|
|
|
|
sentence.features[i].clear(); |
3090
|
14
|
|
|
|
|
|
sentence.features[i].emplace_back(0); |
3091
|
|
|
|
|
|
|
} |
3092
|
|
|
|
|
|
|
|
3093
|
|
|
|
|
|
|
// Add features from feature processors |
3094
|
36
|
100
|
|
|
|
|
for (auto&& processor : processors) |
3095
|
32
|
50
|
|
|
|
|
processor.processor->process_sentence(sentence, adding_features ? &total_features : nullptr, buffer); |
3096
|
4
|
|
|
|
|
|
} |
3097
|
|
|
|
|
|
|
|
3098
|
0
|
|
|
|
|
|
void feature_templates::process_entities(ner_sentence& sentence, vector& entities, vector& buffer) const { |
3099
|
18
|
100
|
|
|
|
|
for (auto&& processor : processors) |
|
|
0
|
|
|
|
|
|
3100
|
16
|
|
|
|
|
|
processor.processor->process_entities(sentence, entities, buffer); |
3101
|
0
|
|
|
|
|
|
} |
3102
|
|
|
|
|
|
|
|
3103
|
0
|
|
|
|
|
|
ner_feature feature_templates::get_total_features() const { |
3104
|
0
|
|
|
|
|
|
return total_features; |
3105
|
|
|
|
|
|
|
} |
3106
|
|
|
|
|
|
|
|
3107
|
0
|
|
|
|
|
|
void feature_templates::gazetteers(vector& gazetteers, vector* gazetteer_types) const { |
3108
|
0
|
0
|
|
|
|
|
for (auto&& processor : processors) |
|
|
0
|
|
|
|
|
|
3109
|
0
|
|
|
|
|
|
processor.processor->gazetteers(gazetteers, gazetteer_types); |
3110
|
0
|
|
|
|
|
|
} |
3111
|
|
|
|
|
|
|
|
3112
|
|
|
|
|
|
|
///////// |
3113
|
|
|
|
|
|
|
// File: morphodita/derivator/derivator.h |
3114
|
|
|
|
|
|
|
///////// |
3115
|
|
|
|
|
|
|
|
3116
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
3117
|
|
|
|
|
|
|
// |
3118
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
3119
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
3120
|
|
|
|
|
|
|
// |
3121
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
3122
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
3123
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
3124
|
|
|
|
|
|
|
|
3125
|
|
|
|
|
|
|
namespace morphodita { |
3126
|
|
|
|
|
|
|
|
3127
|
0
|
|
|
|
|
|
struct derivated_lemma { |
3128
|
|
|
|
|
|
|
string lemma; |
3129
|
|
|
|
|
|
|
}; |
3130
|
|
|
|
|
|
|
|
3131
|
0
|
|
|
|
|
|
class derivator { |
3132
|
|
|
|
|
|
|
public: |
3133
|
0
|
|
|
|
|
|
virtual ~derivator() {} |
3134
|
|
|
|
|
|
|
|
3135
|
|
|
|
|
|
|
// For given lemma, return the parent in the derivation graph. |
3136
|
|
|
|
|
|
|
// The lemma is assumed to be lemma id and any lemma comments are ignored. |
3137
|
|
|
|
|
|
|
virtual bool parent(string_piece lemma, derivated_lemma& parent) const = 0; |
3138
|
|
|
|
|
|
|
|
3139
|
|
|
|
|
|
|
// For given lemma, return the children in the derivation graph. |
3140
|
|
|
|
|
|
|
// The lemma is assumed to be lemma id and any lemma comments are ignored. |
3141
|
|
|
|
|
|
|
virtual bool children(string_piece lemma, vector& children) const = 0; |
3142
|
|
|
|
|
|
|
}; |
3143
|
|
|
|
|
|
|
|
3144
|
|
|
|
|
|
|
} // namespace morphodita |
3145
|
|
|
|
|
|
|
|
3146
|
|
|
|
|
|
|
///////// |
3147
|
|
|
|
|
|
|
// File: morphodita/tokenizer/tokenizer.h |
3148
|
|
|
|
|
|
|
///////// |
3149
|
|
|
|
|
|
|
|
3150
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
3151
|
|
|
|
|
|
|
// |
3152
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
3153
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
3154
|
|
|
|
|
|
|
// |
3155
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
3156
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
3157
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
3158
|
|
|
|
|
|
|
|
3159
|
|
|
|
|
|
|
namespace morphodita { |
3160
|
|
|
|
|
|
|
|
3161
|
|
|
|
|
|
|
// Range of a token, measured in Unicode characters, not UTF8 bytes. |
3162
|
|
|
|
|
|
|
struct token_range { |
3163
|
|
|
|
|
|
|
size_t start; |
3164
|
|
|
|
|
|
|
size_t length; |
3165
|
|
|
|
|
|
|
|
3166
|
|
|
|
|
|
|
token_range() {} |
3167
|
33
|
|
|
|
|
|
token_range(size_t start, size_t length) : start(start), length(length) {} |
3168
|
|
|
|
|
|
|
}; |
3169
|
|
|
|
|
|
|
|
3170
|
4
|
|
|
|
|
|
class tokenizer { |
3171
|
|
|
|
|
|
|
public: |
3172
|
4
|
|
|
|
|
|
virtual ~tokenizer() {} |
3173
|
|
|
|
|
|
|
|
3174
|
|
|
|
|
|
|
virtual void set_text(string_piece text, bool make_copy = false) = 0; |
3175
|
|
|
|
|
|
|
virtual bool next_sentence(vector* forms, vector* tokens) = 0; |
3176
|
|
|
|
|
|
|
|
3177
|
|
|
|
|
|
|
// Static factory methods |
3178
|
|
|
|
|
|
|
static tokenizer* new_vertical_tokenizer(); |
3179
|
|
|
|
|
|
|
|
3180
|
|
|
|
|
|
|
static tokenizer* new_czech_tokenizer(); |
3181
|
|
|
|
|
|
|
static tokenizer* new_english_tokenizer(); |
3182
|
|
|
|
|
|
|
static tokenizer* new_generic_tokenizer(); |
3183
|
|
|
|
|
|
|
}; |
3184
|
|
|
|
|
|
|
|
3185
|
|
|
|
|
|
|
} // namespace morphodita |
3186
|
|
|
|
|
|
|
|
3187
|
|
|
|
|
|
|
///////// |
3188
|
|
|
|
|
|
|
// File: morphodita/morpho/morpho.h |
3189
|
|
|
|
|
|
|
///////// |
3190
|
|
|
|
|
|
|
|
3191
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
3192
|
|
|
|
|
|
|
// |
3193
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
3194
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
3195
|
|
|
|
|
|
|
// |
3196
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
3197
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
3198
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
3199
|
|
|
|
|
|
|
|
3200
|
|
|
|
|
|
|
namespace morphodita { |
3201
|
|
|
|
|
|
|
|
3202
|
0
|
|
|
|
|
|
struct tagged_form { |
3203
|
|
|
|
|
|
|
string form; |
3204
|
|
|
|
|
|
|
string tag; |
3205
|
|
|
|
|
|
|
|
3206
|
|
|
|
|
|
|
tagged_form() {} |
3207
|
0
|
|
|
|
|
|
tagged_form(const string& form, const string& tag) : form(form), tag(tag) {} |
3208
|
|
|
|
|
|
|
}; |
3209
|
|
|
|
|
|
|
|
3210
|
43
|
|
|
|
|
|
struct tagged_lemma { |
3211
|
|
|
|
|
|
|
string lemma; |
3212
|
|
|
|
|
|
|
string tag; |
3213
|
|
|
|
|
|
|
|
3214
|
|
|
|
|
|
|
tagged_lemma() {} |
3215
|
22
|
|
|
|
|
|
tagged_lemma(const string& lemma, const string& tag) : lemma(lemma), tag(tag) {} |
3216
|
|
|
|
|
|
|
}; |
3217
|
|
|
|
|
|
|
|
3218
|
0
|
|
|
|
|
|
struct tagged_lemma_forms { |
3219
|
|
|
|
|
|
|
string lemma; |
3220
|
|
|
|
|
|
|
vector forms; |
3221
|
|
|
|
|
|
|
|
3222
|
|
|
|
|
|
|
tagged_lemma_forms() {} |
3223
|
0
|
|
|
|
|
|
tagged_lemma_forms(const string& lemma) : lemma(lemma) {} |
3224
|
|
|
|
|
|
|
}; |
3225
|
|
|
|
|
|
|
|
3226
|
1
|
|
|
|
|
|
class morpho { |
3227
|
|
|
|
|
|
|
public: |
3228
|
0
|
|
|
|
|
|
virtual ~morpho() {} |
3229
|
|
|
|
|
|
|
|
3230
|
|
|
|
|
|
|
static morpho* load(istream& is); |
3231
|
|
|
|
|
|
|
static morpho* load(const char* fname); |
3232
|
|
|
|
|
|
|
|
3233
|
|
|
|
|
|
|
enum guesser_mode { NO_GUESSER = 0, GUESSER = 1, GUESSER_UNSPECIFIED = -1 }; |
3234
|
|
|
|
|
|
|
|
3235
|
|
|
|
|
|
|
// Perform morphologic analysis of a form. The form is given by a pointer and |
3236
|
|
|
|
|
|
|
// length and therefore does not need to be '\0' terminated. The guesser |
3237
|
|
|
|
|
|
|
// parameter specifies whether a guesser can be used if the form is not found |
3238
|
|
|
|
|
|
|
// in the dictionary. Output is assigned to the lemmas vector. |
3239
|
|
|
|
|
|
|
// |
3240
|
|
|
|
|
|
|
// If the form is found in the dictionary, analyses are assigned to lemmas |
3241
|
|
|
|
|
|
|
// and NO_GUESSER returned. If guesser == GUESSER and the form analyses are |
3242
|
|
|
|
|
|
|
// found using a guesser, they are assigned to lemmas and GUESSER is |
3243
|
|
|
|
|
|
|
// returned. Otherwise <0 is returned and lemmas are filled with one |
3244
|
|
|
|
|
|
|
// analysis containing given form as lemma and a tag for unknown word. |
3245
|
|
|
|
|
|
|
virtual int analyze(string_piece form, guesser_mode guesser, vector& lemmas) const = 0; |
3246
|
|
|
|
|
|
|
|
3247
|
|
|
|
|
|
|
// Perform morphologic generation of a lemma. The lemma is given by a pointer |
3248
|
|
|
|
|
|
|
// and length and therefore does not need to be '\0' terminated. Optionally |
3249
|
|
|
|
|
|
|
// a tag_wildcard can be specified (or be NULL) and if so, results are |
3250
|
|
|
|
|
|
|
// filtered using this wildcard. The guesser parameter speficies whether |
3251
|
|
|
|
|
|
|
// a guesser can be used if the lemma is not found in the dictionary. Output |
3252
|
|
|
|
|
|
|
// is assigned to the forms vector. |
3253
|
|
|
|
|
|
|
// |
3254
|
|
|
|
|
|
|
// Tag_wildcard can be either NULL or a wildcard applied to the results. |
3255
|
|
|
|
|
|
|
// A ? in the wildcard matches any character, [bytes] matches any of the |
3256
|
|
|
|
|
|
|
// bytes and [^bytes] matches any byte different from the specified ones. |
3257
|
|
|
|
|
|
|
// A - has no special meaning inside the bytes and if ] is first in bytes, it |
3258
|
|
|
|
|
|
|
// does not end the bytes group. |
3259
|
|
|
|
|
|
|
// |
3260
|
|
|
|
|
|
|
// If the given lemma is only a raw lemma, all lemma ids with this raw lemma |
3261
|
|
|
|
|
|
|
// are returned. Otherwise only matching lemma ids are returned, ignoring any |
3262
|
|
|
|
|
|
|
// lemma comments. For every found lemma, matching forms are filtered using |
3263
|
|
|
|
|
|
|
// the tag_wildcard. If at least one lemma is found in the dictionary, |
3264
|
|
|
|
|
|
|
// NO_GUESSER is returned. If guesser == GUESSER and the lemma is found by |
3265
|
|
|
|
|
|
|
// the guesser, GUESSER is returned. Otherwise, forms are cleared and <0 is |
3266
|
|
|
|
|
|
|
// returned. |
3267
|
|
|
|
|
|
|
virtual int generate(string_piece lemma, const char* tag_wildcard, guesser_mode guesser, vector& forms) const = 0; |
3268
|
|
|
|
|
|
|
|
3269
|
|
|
|
|
|
|
// Rawlemma and lemma id identification |
3270
|
|
|
|
|
|
|
virtual int raw_lemma_len(string_piece lemma) const = 0; |
3271
|
|
|
|
|
|
|
virtual int lemma_id_len(string_piece lemma) const = 0; |
3272
|
|
|
|
|
|
|
|
3273
|
|
|
|
|
|
|
// Rawform identification |
3274
|
|
|
|
|
|
|
virtual int raw_form_len(string_piece form) const = 0; |
3275
|
|
|
|
|
|
|
|
3276
|
|
|
|
|
|
|
// Construct a new tokenizer instance appropriate for this morphology. |
3277
|
|
|
|
|
|
|
// Can return NULL if no such tokenizer exists. |
3278
|
|
|
|
|
|
|
virtual tokenizer* new_tokenizer() const = 0; |
3279
|
|
|
|
|
|
|
|
3280
|
|
|
|
|
|
|
// Return a derivator for this morphology, or NULL if it does not exist. |
3281
|
|
|
|
|
|
|
// The returned instance is owned by the morphology and should not be deleted. |
3282
|
|
|
|
|
|
|
virtual const derivator* get_derivator() const; |
3283
|
|
|
|
|
|
|
|
3284
|
|
|
|
|
|
|
protected: |
3285
|
|
|
|
|
|
|
unique_ptr derinet; |
3286
|
|
|
|
|
|
|
}; |
3287
|
|
|
|
|
|
|
|
3288
|
|
|
|
|
|
|
} // namespace morphodita |
3289
|
|
|
|
|
|
|
|
3290
|
|
|
|
|
|
|
///////// |
3291
|
|
|
|
|
|
|
// File: morphodita/tagset_converter/tagset_converter.h |
3292
|
|
|
|
|
|
|
///////// |
3293
|
|
|
|
|
|
|
|
3294
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
3295
|
|
|
|
|
|
|
// |
3296
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
3297
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
3298
|
|
|
|
|
|
|
// |
3299
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
3300
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
3301
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
3302
|
|
|
|
|
|
|
|
3303
|
|
|
|
|
|
|
namespace morphodita { |
3304
|
|
|
|
|
|
|
|
3305
|
0
|
|
|
|
|
|
class tagset_converter { |
3306
|
|
|
|
|
|
|
public: |
3307
|
0
|
|
|
|
|
|
virtual ~tagset_converter() {} |
3308
|
|
|
|
|
|
|
|
3309
|
|
|
|
|
|
|
// Convert a tag-lemma pair to a different tag set. |
3310
|
|
|
|
|
|
|
virtual void convert(tagged_lemma& tagged_lemma) const = 0; |
3311
|
|
|
|
|
|
|
// Convert a result of analysis to a different tag set. Apart from calling |
3312
|
|
|
|
|
|
|
// convert, any repeated entry is removed. |
3313
|
|
|
|
|
|
|
virtual void convert_analyzed(vector& tagged_lemmas) const = 0; |
3314
|
|
|
|
|
|
|
// Convert a result of generation to a different tag set. Apart from calling |
3315
|
|
|
|
|
|
|
// convert, any repeated entry is removed. |
3316
|
|
|
|
|
|
|
virtual void convert_generated(vector& forms) const = 0; |
3317
|
|
|
|
|
|
|
|
3318
|
|
|
|
|
|
|
// Static factory methods |
3319
|
|
|
|
|
|
|
static tagset_converter* new_identity_converter(); |
3320
|
|
|
|
|
|
|
|
3321
|
|
|
|
|
|
|
static tagset_converter* new_pdt_to_conll2009_converter(); |
3322
|
|
|
|
|
|
|
static tagset_converter* new_strip_lemma_comment_converter(const morpho& dictionary); |
3323
|
|
|
|
|
|
|
static tagset_converter* new_strip_lemma_id_converter(const morpho& dictionary); |
3324
|
|
|
|
|
|
|
}; |
3325
|
|
|
|
|
|
|
|
3326
|
|
|
|
|
|
|
// Helper method for creating tagset_converter from instance name. |
3327
|
|
|
|
|
|
|
tagset_converter* new_tagset_converter(const string& name, const morpho& dictionary); |
3328
|
|
|
|
|
|
|
|
3329
|
|
|
|
|
|
|
// Helper methods making sure remapped results are unique. |
3330
|
|
|
|
|
|
|
void tagset_converter_unique_analyzed(vector& tagged_lemmas); |
3331
|
|
|
|
|
|
|
void tagset_converter_unique_generated(vector& forms); |
3332
|
|
|
|
|
|
|
|
3333
|
|
|
|
|
|
|
} // namespace morphodita |
3334
|
|
|
|
|
|
|
|
3335
|
|
|
|
|
|
|
///////// |
3336
|
|
|
|
|
|
|
// File: morphodita/derivator/derivation_formatter.h |
3337
|
|
|
|
|
|
|
///////// |
3338
|
|
|
|
|
|
|
|
3339
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
3340
|
|
|
|
|
|
|
// |
3341
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
3342
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
3343
|
|
|
|
|
|
|
// |
3344
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
3345
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
3346
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
3347
|
|
|
|
|
|
|
|
3348
|
|
|
|
|
|
|
namespace morphodita { |
3349
|
|
|
|
|
|
|
|
3350
|
0
|
|
|
|
|
|
class derivation_formatter { |
3351
|
|
|
|
|
|
|
public: |
3352
|
0
|
|
|
|
|
|
virtual ~derivation_formatter() {} |
3353
|
|
|
|
|
|
|
|
3354
|
|
|
|
|
|
|
// Perform the required derivation and store it directly in the lemma. |
3355
|
|
|
|
|
|
|
virtual void format_derivation(string& lemma) const; |
3356
|
|
|
|
|
|
|
|
3357
|
|
|
|
|
|
|
// Perform the required derivation and store it directly in the tagged_lemma. |
3358
|
|
|
|
|
|
|
// If a tagset_converter is given, it is also applied. |
3359
|
|
|
|
|
|
|
virtual void format_tagged_lemma(tagged_lemma& lemma, const tagset_converter* converter = nullptr) const = 0; |
3360
|
|
|
|
|
|
|
|
3361
|
|
|
|
|
|
|
// Perform the required derivation on a list of tagged_lemmas. |
3362
|
|
|
|
|
|
|
// If a tagset_converter is given, it is also applied. |
3363
|
|
|
|
|
|
|
// Either way, only unique entries are returned. |
3364
|
|
|
|
|
|
|
virtual void format_tagged_lemmas(vector& lemmas, const tagset_converter* converter = nullptr) const; |
3365
|
|
|
|
|
|
|
|
3366
|
|
|
|
|
|
|
// Static factory methods. |
3367
|
|
|
|
|
|
|
static derivation_formatter* new_none_derivation_formatter(); |
3368
|
|
|
|
|
|
|
static derivation_formatter* new_root_derivation_formatter(const derivator* derinet); |
3369
|
|
|
|
|
|
|
static derivation_formatter* new_path_derivation_formatter(const derivator* derinet); |
3370
|
|
|
|
|
|
|
static derivation_formatter* new_tree_derivation_formatter(const derivator* derinet); |
3371
|
|
|
|
|
|
|
// String version of static factory method. |
3372
|
|
|
|
|
|
|
static derivation_formatter* new_derivation_formatter(string_piece name, const derivator* derinet); |
3373
|
|
|
|
|
|
|
}; |
3374
|
|
|
|
|
|
|
|
3375
|
|
|
|
|
|
|
} // namespace morphodita |
3376
|
|
|
|
|
|
|
|
3377
|
|
|
|
|
|
|
///////// |
3378
|
|
|
|
|
|
|
// File: morphodita/derivator/derivation_formatter.cpp |
3379
|
|
|
|
|
|
|
///////// |
3380
|
|
|
|
|
|
|
|
3381
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
3382
|
|
|
|
|
|
|
// |
3383
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
3384
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
3385
|
|
|
|
|
|
|
// |
3386
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
3387
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
3388
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
3389
|
|
|
|
|
|
|
|
3390
|
|
|
|
|
|
|
namespace morphodita { |
3391
|
|
|
|
|
|
|
|
3392
|
0
|
|
|
|
|
|
void derivation_formatter::format_derivation(string& lemma) const { |
3393
|
0
|
|
|
|
|
|
tagged_lemma result; |
3394
|
0
|
|
|
|
|
|
result.lemma.swap(lemma); |
3395
|
0
|
0
|
|
|
|
|
format_tagged_lemma(result); |
3396
|
0
|
|
|
|
|
|
lemma.swap(result.lemma); |
3397
|
0
|
|
|
|
|
|
} |
3398
|
|
|
|
|
|
|
|
3399
|
0
|
|
|
|
|
|
void derivation_formatter::format_tagged_lemmas(vector& lemmas, const tagset_converter* converter) const { |
3400
|
0
|
0
|
|
|
|
|
for (auto&& lemma : lemmas) |
3401
|
0
|
|
|
|
|
|
format_tagged_lemma(lemma, converter); |
3402
|
|
|
|
|
|
|
|
3403
|
0
|
0
|
|
|
|
|
if (lemmas.size() > 1) |
3404
|
0
|
|
|
|
|
|
tagset_converter_unique_analyzed(lemmas); |
3405
|
0
|
|
|
|
|
|
} |
3406
|
|
|
|
|
|
|
|
3407
|
0
|
|
|
|
|
|
class none_derivation_formatter : public derivation_formatter { |
3408
|
0
|
|
|
|
|
|
virtual void format_derivation(string& /*lemma*/) const override {} |
3409
|
|
|
|
|
|
|
|
3410
|
0
|
|
|
|
|
|
virtual void format_tagged_lemma(tagged_lemma& lemma, const tagset_converter* converter) const override { |
3411
|
0
|
0
|
|
|
|
|
if (converter) converter->convert(lemma); |
3412
|
0
|
|
|
|
|
|
} |
3413
|
|
|
|
|
|
|
|
3414
|
0
|
|
|
|
|
|
virtual void format_tagged_lemmas(vector& lemmas, const tagset_converter* converter) const override { |
3415
|
0
|
0
|
|
|
|
|
if (converter) converter->convert_analyzed(lemmas); |
3416
|
0
|
|
|
|
|
|
} |
3417
|
|
|
|
|
|
|
}; |
3418
|
|
|
|
|
|
|
|
3419
|
0
|
|
|
|
|
|
derivation_formatter* derivation_formatter::new_none_derivation_formatter() { |
3420
|
0
|
|
|
|
|
|
return new none_derivation_formatter(); |
3421
|
|
|
|
|
|
|
} |
3422
|
|
|
|
|
|
|
|
3423
|
0
|
|
|
|
|
|
class root_derivation_formatter : public derivation_formatter { |
3424
|
|
|
|
|
|
|
public: |
3425
|
0
|
|
|
|
|
|
root_derivation_formatter(const derivator* derinet) : derinet(derinet) {} |
3426
|
|
|
|
|
|
|
|
3427
|
0
|
|
|
|
|
|
virtual void format_tagged_lemma(tagged_lemma& lemma, const tagset_converter* converter) const override { |
3428
|
0
|
0
|
|
|
|
|
for (derivated_lemma parent; derinet->parent(lemma.lemma, parent); ) |
|
|
0
|
|
|
|
|
|
3429
|
0
|
|
|
|
|
|
lemma.lemma.assign(parent.lemma); |
3430
|
0
|
0
|
|
|
|
|
if (converter) converter->convert(lemma); |
3431
|
0
|
|
|
|
|
|
} |
3432
|
|
|
|
|
|
|
|
3433
|
|
|
|
|
|
|
private: |
3434
|
|
|
|
|
|
|
const derivator* derinet; |
3435
|
|
|
|
|
|
|
}; |
3436
|
|
|
|
|
|
|
|
3437
|
0
|
|
|
|
|
|
derivation_formatter* derivation_formatter::new_root_derivation_formatter(const derivator* derinet) { |
3438
|
0
|
0
|
|
|
|
|
return derinet ? new root_derivation_formatter(derinet) : nullptr; |
|
|
0
|
|
|
|
|
|
3439
|
|
|
|
|
|
|
} |
3440
|
|
|
|
|
|
|
|
3441
|
0
|
|
|
|
|
|
class path_derivation_formatter : public derivation_formatter { |
3442
|
|
|
|
|
|
|
public: |
3443
|
0
|
|
|
|
|
|
path_derivation_formatter(const derivator* derinet) : derinet(derinet) {} |
3444
|
|
|
|
|
|
|
|
3445
|
0
|
|
|
|
|
|
virtual void format_tagged_lemma(tagged_lemma& lemma, const tagset_converter* converter) const override { |
3446
|
0
|
|
|
|
|
|
tagged_lemma current(lemma); |
3447
|
0
|
0
|
|
|
|
|
if (converter) converter->convert(lemma); |
|
|
0
|
|
|
|
|
|
3448
|
0
|
0
|
|
|
|
|
for (derivated_lemma parent; derinet->parent(current.lemma, parent); current.lemma.swap(parent.lemma)) { |
|
|
0
|
|
|
|
|
|
3449
|
0
|
0
|
|
|
|
|
tagged_lemma parrent_lemma(parent.lemma, current.tag); |
3450
|
0
|
0
|
|
|
|
|
if (converter) converter->convert(parrent_lemma); |
|
|
0
|
|
|
|
|
|
3451
|
0
|
0
|
|
|
|
|
lemma.lemma.append(" ").append(parrent_lemma.lemma); |
3452
|
|
|
|
|
|
|
} |
3453
|
0
|
|
|
|
|
|
} |
3454
|
|
|
|
|
|
|
|
3455
|
|
|
|
|
|
|
private: |
3456
|
|
|
|
|
|
|
const derivator* derinet; |
3457
|
|
|
|
|
|
|
}; |
3458
|
|
|
|
|
|
|
|
3459
|
0
|
|
|
|
|
|
derivation_formatter* derivation_formatter::new_path_derivation_formatter(const derivator* derinet) { |
3460
|
0
|
0
|
|
|
|
|
return derinet ? new path_derivation_formatter(derinet) : nullptr; |
|
|
0
|
|
|
|
|
|
3461
|
|
|
|
|
|
|
} |
3462
|
|
|
|
|
|
|
|
3463
|
0
|
|
|
|
|
|
class tree_derivation_formatter : public derivation_formatter { |
3464
|
|
|
|
|
|
|
public: |
3465
|
0
|
|
|
|
|
|
tree_derivation_formatter(const derivator* derinet) : derinet(derinet) {} |
3466
|
|
|
|
|
|
|
|
3467
|
0
|
|
|
|
|
|
virtual void format_tagged_lemma(tagged_lemma& lemma, const tagset_converter* converter) const override { |
3468
|
|
|
|
|
|
|
string root(lemma.lemma), tag(lemma.tag); |
3469
|
0
|
0
|
|
|
|
|
if (converter) converter->convert(lemma); |
|
|
0
|
|
|
|
|
|
3470
|
0
|
0
|
|
|
|
|
for (derivated_lemma parent; derinet->parent(root, parent); root.swap(parent.lemma)) {} |
|
|
0
|
|
|
|
|
|
3471
|
0
|
0
|
|
|
|
|
format_tree(root, tag, lemma, converter); |
3472
|
0
|
|
|
|
|
|
} |
3473
|
|
|
|
|
|
|
|
3474
|
0
|
|
|
|
|
|
void format_tree(const string& root, const string& tag, tagged_lemma& tree, const tagset_converter* converter) const { |
3475
|
0
|
|
|
|
|
|
vector children; |
3476
|
|
|
|
|
|
|
|
3477
|
0
|
0
|
|
|
|
|
if (converter) { |
3478
|
0
|
0
|
|
|
|
|
tagged_lemma current(root, tag); |
3479
|
0
|
0
|
|
|
|
|
converter->convert(current); |
3480
|
0
|
0
|
|
|
|
|
tree.lemma.append(" ").append(current.lemma); |
3481
|
|
|
|
|
|
|
} else { |
3482
|
0
|
0
|
|
|
|
|
tree.lemma.append(" ").append(root); |
3483
|
|
|
|
|
|
|
} |
3484
|
|
|
|
|
|
|
|
3485
|
0
|
0
|
|
|
|
|
if (derinet->children(root, children)) |
|
|
0
|
|
|
|
|
|
3486
|
0
|
0
|
|
|
|
|
for (auto&& child : children) |
3487
|
0
|
0
|
|
|
|
|
format_tree(child.lemma, tag, tree, converter); |
3488
|
0
|
0
|
|
|
|
|
tree.lemma.push_back(' '); |
3489
|
0
|
|
|
|
|
|
} |
3490
|
|
|
|
|
|
|
|
3491
|
|
|
|
|
|
|
private: |
3492
|
|
|
|
|
|
|
const derivator* derinet; |
3493
|
|
|
|
|
|
|
}; |
3494
|
|
|
|
|
|
|
|
3495
|
0
|
|
|
|
|
|
derivation_formatter* derivation_formatter::new_tree_derivation_formatter(const derivator* derinet) { |
3496
|
0
|
0
|
|
|
|
|
return derinet ? new tree_derivation_formatter(derinet) : nullptr; |
|
|
0
|
|
|
|
|
|
3497
|
|
|
|
|
|
|
} |
3498
|
|
|
|
|
|
|
|
3499
|
0
|
|
|
|
|
|
derivation_formatter* derivation_formatter::new_derivation_formatter(string_piece name, const derivator* derinet) { |
3500
|
0
|
0
|
|
|
|
|
if (name == "none") return new_none_derivation_formatter(); |
3501
|
0
|
0
|
|
|
|
|
if (name == "root") return new_root_derivation_formatter(derinet); |
3502
|
0
|
0
|
|
|
|
|
if (name == "path") return new_path_derivation_formatter(derinet); |
3503
|
0
|
0
|
|
|
|
|
if (name == "tree") return new_tree_derivation_formatter(derinet); |
3504
|
|
|
|
|
|
|
return nullptr; |
3505
|
|
|
|
|
|
|
} |
3506
|
|
|
|
|
|
|
|
3507
|
|
|
|
|
|
|
} // namespace morphodita |
3508
|
|
|
|
|
|
|
|
3509
|
|
|
|
|
|
|
///////// |
3510
|
|
|
|
|
|
|
// File: morphodita/morpho/small_stringops.h |
3511
|
|
|
|
|
|
|
///////// |
3512
|
|
|
|
|
|
|
|
3513
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
3514
|
|
|
|
|
|
|
// |
3515
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
3516
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
3517
|
|
|
|
|
|
|
// |
3518
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
3519
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
3520
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
3521
|
|
|
|
|
|
|
|
3522
|
|
|
|
|
|
|
namespace morphodita { |
3523
|
|
|
|
|
|
|
|
3524
|
|
|
|
|
|
|
// Declarations |
3525
|
|
|
|
|
|
|
inline bool small_memeq(const void* a, const void* b, size_t len); |
3526
|
|
|
|
|
|
|
inline void small_memcpy(void* dest, const void* src, size_t len); |
3527
|
|
|
|
|
|
|
|
3528
|
|
|
|
|
|
|
// Definitions |
3529
|
|
|
|
|
|
|
bool small_memeq(const void* a_void, const void* b_void, size_t len) { |
3530
|
|
|
|
|
|
|
const char* a = (const char*)a_void; |
3531
|
|
|
|
|
|
|
const char* b = (const char*)b_void; |
3532
|
|
|
|
|
|
|
|
3533
|
438
|
0
|
|
|
|
|
while (len--) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
3534
|
393
|
0
|
|
|
|
|
if (*a++ != *b++) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
3535
|
|
|
|
|
|
|
return false; |
3536
|
|
|
|
|
|
|
return true; |
3537
|
|
|
|
|
|
|
} |
3538
|
|
|
|
|
|
|
|
3539
|
|
|
|
|
|
|
void small_memcpy(void* dest_void, const void* src_void, size_t len) { |
3540
|
|
|
|
|
|
|
char* dest = (char*)dest_void; |
3541
|
|
|
|
|
|
|
const char* src = (const char*)src_void; |
3542
|
|
|
|
|
|
|
|
3543
|
437
|
0
|
|
|
|
|
while (len--) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
3544
|
316
|
|
|
|
|
|
*dest++ = *src++; |
3545
|
|
|
|
|
|
|
} |
3546
|
|
|
|
|
|
|
|
3547
|
|
|
|
|
|
|
} // namespace morphodita |
3548
|
|
|
|
|
|
|
|
3549
|
|
|
|
|
|
|
///////// |
3550
|
|
|
|
|
|
|
// File: utils/pointer_decoder.h |
3551
|
|
|
|
|
|
|
///////// |
3552
|
|
|
|
|
|
|
|
3553
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
3554
|
|
|
|
|
|
|
// |
3555
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
3556
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
3557
|
|
|
|
|
|
|
// |
3558
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
3559
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
3560
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
3561
|
|
|
|
|
|
|
|
3562
|
|
|
|
|
|
|
namespace utils { |
3563
|
|
|
|
|
|
|
|
3564
|
|
|
|
|
|
|
// |
3565
|
|
|
|
|
|
|
// Declarations |
3566
|
|
|
|
|
|
|
// |
3567
|
|
|
|
|
|
|
|
3568
|
|
|
|
|
|
|
class pointer_decoder { |
3569
|
|
|
|
|
|
|
public: |
3570
|
|
|
|
|
|
|
inline pointer_decoder(const unsigned char*& data); |
3571
|
|
|
|
|
|
|
inline unsigned next_1B(); |
3572
|
|
|
|
|
|
|
inline unsigned next_2B(); |
3573
|
|
|
|
|
|
|
inline unsigned next_4B(); |
3574
|
|
|
|
|
|
|
inline void next_str(string& str); |
3575
|
|
|
|
|
|
|
template inline const T* next(unsigned elements); |
3576
|
|
|
|
|
|
|
|
3577
|
|
|
|
|
|
|
private: |
3578
|
|
|
|
|
|
|
const unsigned char*& data; |
3579
|
|
|
|
|
|
|
}; |
3580
|
|
|
|
|
|
|
|
3581
|
|
|
|
|
|
|
// |
3582
|
|
|
|
|
|
|
// Definitions |
3583
|
|
|
|
|
|
|
// |
3584
|
|
|
|
|
|
|
|
3585
|
43
|
|
|
|
|
|
pointer_decoder::pointer_decoder(const unsigned char*& data) : data(data) {} |
3586
|
|
|
|
|
|
|
|
3587
|
|
|
|
|
|
|
unsigned pointer_decoder::next_1B() { |
3588
|
0
|
|
|
|
|
|
return *data++; |
3589
|
|
|
|
|
|
|
} |
3590
|
|
|
|
|
|
|
|
3591
|
|
|
|
|
|
|
unsigned pointer_decoder::next_2B() { |
3592
|
|
|
|
|
|
|
uint16_t result; |
3593
|
56
|
|
|
|
|
|
memcpy(&result, data, sizeof(uint16_t)); |
3594
|
56
|
|
|
|
|
|
data += sizeof(uint16_t); |
3595
|
13
|
|
|
|
|
|
return result; |
3596
|
|
|
|
|
|
|
} |
3597
|
|
|
|
|
|
|
|
3598
|
|
|
|
|
|
|
unsigned pointer_decoder::next_4B() { |
3599
|
|
|
|
|
|
|
uint32_t result; |
3600
|
30
|
|
|
|
|
|
memcpy(&result, data, sizeof(uint32_t)); |
3601
|
30
|
|
|
|
|
|
data += sizeof(uint32_t); |
3602
|
|
|
|
|
|
|
return result; |
3603
|
|
|
|
|
|
|
} |
3604
|
|
|
|
|
|
|
|
3605
|
|
|
|
|
|
|
void pointer_decoder::next_str(string& str) { |
3606
|
|
|
|
|
|
|
unsigned len = next_1B(); |
3607
|
|
|
|
|
|
|
if (len == 255) len = next_4B(); |
3608
|
|
|
|
|
|
|
str.assign(next(len), len); |
3609
|
|
|
|
|
|
|
} |
3610
|
|
|
|
|
|
|
|
3611
|
|
|
|
|
|
|
template const T* pointer_decoder::next(unsigned elements) { |
3612
|
39
|
|
|
|
|
|
const T* result = (const T*) data; |
3613
|
0
|
|
|
|
|
|
data += sizeof(T) * elements; |
3614
|
|
|
|
|
|
|
return result; |
3615
|
|
|
|
|
|
|
} |
3616
|
|
|
|
|
|
|
|
3617
|
|
|
|
|
|
|
} // namespace utils |
3618
|
|
|
|
|
|
|
|
3619
|
|
|
|
|
|
|
///////// |
3620
|
|
|
|
|
|
|
// File: morphodita/morpho/persistent_unordered_map.h |
3621
|
|
|
|
|
|
|
///////// |
3622
|
|
|
|
|
|
|
|
3623
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
3624
|
|
|
|
|
|
|
// |
3625
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
3626
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
3627
|
|
|
|
|
|
|
// |
3628
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
3629
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
3630
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
3631
|
|
|
|
|
|
|
|
3632
|
|
|
|
|
|
|
namespace morphodita { |
3633
|
|
|
|
|
|
|
|
3634
|
|
|
|
|
|
|
// Declarations |
3635
|
0
|
|
|
|
|
|
class persistent_unordered_map { |
3636
|
|
|
|
|
|
|
public: |
3637
|
|
|
|
|
|
|
// Accessing function |
3638
|
|
|
|
|
|
|
template |
3639
|
|
|
|
|
|
|
inline const unsigned char* at(const char* str, int len, EntrySize entry_size) const; |
3640
|
|
|
|
|
|
|
|
3641
|
|
|
|
|
|
|
template |
3642
|
|
|
|
|
|
|
inline const T* at_typed(const char* str, int len) const; |
3643
|
|
|
|
|
|
|
|
3644
|
|
|
|
|
|
|
template |
3645
|
|
|
|
|
|
|
inline void iter(const char* str, int len, EntryProcess entry_process) const; |
3646
|
|
|
|
|
|
|
|
3647
|
|
|
|
|
|
|
template |
3648
|
|
|
|
|
|
|
inline void iter_all(EntryProcess entry_process) const; |
3649
|
|
|
|
|
|
|
|
3650
|
|
|
|
|
|
|
// Two helper functions accessing some internals |
3651
|
|
|
|
|
|
|
inline int max_length() const; |
3652
|
|
|
|
|
|
|
inline const unsigned char* data_start(int len) const; |
3653
|
|
|
|
|
|
|
|
3654
|
|
|
|
|
|
|
// Creation functions |
3655
|
|
|
|
|
|
|
persistent_unordered_map() {} |
3656
|
|
|
|
|
|
|
template |
3657
|
|
|
|
|
|
|
persistent_unordered_map(const unordered_map& map, double load_factor, EntryEncode entry_encode); |
3658
|
|
|
|
|
|
|
template |
3659
|
|
|
|
|
|
|
persistent_unordered_map(const unordered_map& map, double load_factor, bool add_prefixes, bool add_suffixes, EntryEncode entry_encode); |
3660
|
|
|
|
|
|
|
|
3661
|
|
|
|
|
|
|
// Manual creation functions |
3662
|
|
|
|
|
|
|
inline void resize(unsigned elems); |
3663
|
|
|
|
|
|
|
inline void add(const char* str, int str_len, int data_len); |
3664
|
|
|
|
|
|
|
inline void done_adding(); |
3665
|
|
|
|
|
|
|
inline unsigned char* fill(const char* str, int str_len, int data_len); |
3666
|
|
|
|
|
|
|
inline void done_filling(); |
3667
|
|
|
|
|
|
|
|
3668
|
|
|
|
|
|
|
// Serialization |
3669
|
|
|
|
|
|
|
inline void load(binary_decoder& data); |
3670
|
|
|
|
|
|
|
inline void save(binary_encoder& enc); |
3671
|
|
|
|
|
|
|
|
3672
|
|
|
|
|
|
|
private: |
3673
|
|
|
|
|
|
|
struct fnv_hash; |
3674
|
|
|
|
|
|
|
vector hashes; |
3675
|
|
|
|
|
|
|
|
3676
|
|
|
|
|
|
|
template |
3677
|
|
|
|
|
|
|
void construct(const map& map, double load_factor, EntryEncode entry_encode); |
3678
|
|
|
|
|
|
|
}; |
3679
|
|
|
|
|
|
|
|
3680
|
|
|
|
|
|
|
// Definitions |
3681
|
147
|
|
|
|
|
|
struct persistent_unordered_map::fnv_hash { |
3682
|
16
|
|
|
|
|
|
fnv_hash(unsigned num) { |
3683
|
16
|
|
|
|
|
|
mask = 1; |
3684
|
68
|
100
|
|
|
|
|
while (mask < num) |
3685
|
52
|
|
|
|
|
|
mask <<= 1; |
3686
|
16
|
50
|
|
|
|
|
hash.resize(mask + 1); |
3687
|
16
|
|
|
|
|
|
mask--; |
3688
|
16
|
|
|
|
|
|
} |
3689
|
137
|
|
|
|
|
|
fnv_hash(binary_decoder& data) { |
3690
|
137
|
50
|
|
|
|
|
uint32_t size = data.next_4B(); |
3691
|
137
|
|
|
|
|
|
mask = size - 2; |
3692
|
137
|
50
|
|
|
|
|
hash.resize(size); |
3693
|
137
|
50
|
|
|
|
|
memcpy(hash.data(), data.next(size), size * sizeof(uint32_t)); |
3694
|
|
|
|
|
|
|
|
3695
|
137
|
50
|
|
|
|
|
size = data.next_4B(); |
3696
|
137
|
50
|
|
|
|
|
this->data.resize(size); |
3697
|
137
|
100
|
|
|
|
|
if (size) memcpy(this->data.data(), data.next(size), size); |
|
|
50
|
|
|
|
|
|
3698
|
137
|
|
|
|
|
|
} |
3699
|
|
|
|
|
|
|
|
3700
|
|
|
|
|
|
|
inline uint32_t index(const char* data, int len) const { |
3701
|
245
|
0
|
|
|
|
|
if (len <= 0) return 0; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
3702
|
227
|
0
|
|
|
|
|
if (len == 1) return unaligned_load(data); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
3703
|
198
|
0
|
|
|
|
|
if (len == 2) return unaligned_load(data); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
3704
|
|
|
|
|
|
|
|
3705
|
|
|
|
|
|
|
uint32_t hash = 2166136261U; |
3706
|
540
|
0
|
|
|
|
|
while (len--) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
3707
|
446
|
|
|
|
|
|
hash = (hash ^ unsigned((signed char)*data++)) * 16777619U; |
3708
|
94
|
|
|
|
|
|
return hash & mask; |
3709
|
|
|
|
|
|
|
} |
3710
|
|
|
|
|
|
|
|
3711
|
|
|
|
|
|
|
inline void save(binary_encoder& enc); |
3712
|
|
|
|
|
|
|
|
3713
|
|
|
|
|
|
|
unsigned mask; |
3714
|
|
|
|
|
|
|
vector hash; |
3715
|
|
|
|
|
|
|
vector data; |
3716
|
|
|
|
|
|
|
}; |
3717
|
|
|
|
|
|
|
|
3718
|
|
|
|
|
|
|
template |
3719
|
48
|
|
|
|
|
|
const unsigned char* persistent_unordered_map::at(const char* str, int len, EntrySize entry_size) const { |
3720
|
48
|
0
|
|
|
|
|
if (unsigned(len) >= hashes.size()) return nullptr; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
3721
|
|
|
|
|
|
|
|
3722
|
48
|
|
|
|
|
|
unsigned index = hashes[len].index(str, len); |
3723
|
96
|
|
|
|
|
|
const unsigned char* data = hashes[len].data.data() + hashes[len].hash[index]; |
3724
|
96
|
|
|
|
|
|
const unsigned char* end = hashes[len].data.data() + hashes[len].hash[index+1]; |
3725
|
|
|
|
|
|
|
|
3726
|
48
|
0
|
|
|
|
|
if (len <= 2) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
3727
|
48
|
0
|
|
|
|
|
return data != end ? data + len : nullptr; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
3728
|
|
|
|
|
|
|
|
3729
|
0
|
0
|
|
|
|
|
while (data < end) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
3730
|
0
|
0
|
|
|
|
|
if (small_memeq(str, data, len)) return data + len; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
3731
|
0
|
|
|
|
|
|
data += len; |
3732
|
|
|
|
|
|
|
pointer_decoder decoder(data); |
3733
|
0
|
|
|
|
|
|
entry_size(decoder); |
3734
|
|
|
|
|
|
|
} |
3735
|
|
|
|
|
|
|
|
3736
|
|
|
|
|
|
|
return nullptr; |
3737
|
|
|
|
|
|
|
} |
3738
|
|
|
|
|
|
|
|
3739
|
|
|
|
|
|
|
template |
3740
|
204
|
|
|
|
|
|
const T* persistent_unordered_map::at_typed(const char* str, int len) const { |
3741
|
204
|
50
|
|
|
|
|
if (unsigned(len) >= hashes.size()) return nullptr; |
|
|
100
|
|
|
|
|
|
3742
|
|
|
|
|
|
|
|
3743
|
149
|
|
|
|
|
|
unsigned index = hashes[len].index(str, len); |
3744
|
298
|
|
|
|
|
|
const unsigned char* data = hashes[len].data.data() + hashes[len].hash[index]; |
3745
|
298
|
|
|
|
|
|
const unsigned char* end = hashes[len].data.data() + hashes[len].hash[index+1]; |
3746
|
|
|
|
|
|
|
|
3747
|
149
|
100
|
|
|
|
|
if (len <= 2) |
|
|
100
|
|
|
|
|
|
3748
|
99
|
100
|
|
|
|
|
return data != end ? (const T*)(data + len) : nullptr; |
|
|
50
|
|
|
|
|
|
3749
|
|
|
|
|
|
|
|
3750
|
79
|
100
|
|
|
|
|
while (data < end) { |
|
|
100
|
|
|
|
|
|
3751
|
58
|
100
|
|
|
|
|
if (small_memeq(str, data, len)) return (const T*)(data + len); |
|
|
100
|
|
|
|
|
|
3752
|
29
|
|
|
|
|
|
data += len + sizeof(T); |
3753
|
|
|
|
|
|
|
} |
3754
|
|
|
|
|
|
|
|
3755
|
|
|
|
|
|
|
return nullptr; |
3756
|
|
|
|
|
|
|
} |
3757
|
|
|
|
|
|
|
|
3758
|
|
|
|
|
|
|
template |
3759
|
30
|
|
|
|
|
|
void persistent_unordered_map::iter(const char* str, int len, EntryProcess entry_process) const { |
3760
|
30
|
0
|
|
|
|
|
if (unsigned(len) >= hashes.size()) return; |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
3761
|
|
|
|
|
|
|
|
3762
|
30
|
|
|
|
|
|
unsigned index = hashes[len].index(str, len); |
3763
|
60
|
|
|
|
|
|
const unsigned char* data = hashes[len].data.data() + hashes[len].hash[index]; |
3764
|
30
|
|
|
|
|
|
const unsigned char* end = hashes[len].data.data() + hashes[len].hash[index+1]; |
3765
|
|
|
|
|
|
|
|
3766
|
60
|
0
|
|
|
|
|
while (data < end) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
3767
|
|
|
|
|
|
|
auto start = (const char*) data; |
3768
|
30
|
|
|
|
|
|
data += len; |
3769
|
|
|
|
|
|
|
pointer_decoder decoder(data); |
3770
|
30
|
|
|
|
|
|
entry_process(start, decoder); |
3771
|
|
|
|
|
|
|
} |
3772
|
|
|
|
|
|
|
} |
3773
|
|
|
|
|
|
|
|
3774
|
|
|
|
|
|
|
template |
3775
|
2
|
|
|
|
|
|
void persistent_unordered_map::iter_all(EntryProcess entry_process) const { |
3776
|
4
|
100
|
|
|
|
|
for (unsigned len = 0; len < hashes.size(); len++) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
3777
|
3
|
|
|
|
|
|
const unsigned char* data = hashes[len].data.data(); |
3778
|
|
|
|
|
|
|
const unsigned char* end = data + hashes[len].data.size(); |
3779
|
|
|
|
|
|
|
|
3780
|
16
|
100
|
|
|
|
|
while (data < end) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
3781
|
|
|
|
|
|
|
auto start = (const char*) data; |
3782
|
13
|
|
|
|
|
|
data += len; |
3783
|
|
|
|
|
|
|
pointer_decoder decoder(data); |
3784
|
13
|
|
|
|
|
|
entry_process(start, len, decoder); |
3785
|
|
|
|
|
|
|
} |
3786
|
|
|
|
|
|
|
} |
3787
|
1
|
|
|
|
|
|
} |
3788
|
|
|
|
|
|
|
|
3789
|
|
|
|
|
|
|
int persistent_unordered_map::max_length() const { |
3790
|
52
|
|
|
|
|
|
return hashes.size(); |
3791
|
|
|
|
|
|
|
} |
3792
|
|
|
|
|
|
|
|
3793
|
|
|
|
|
|
|
const unsigned char* persistent_unordered_map::data_start(int len) const { |
3794
|
19
|
0
|
|
|
|
|
return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
3795
|
|
|
|
|
|
|
} |
3796
|
|
|
|
|
|
|
|
3797
|
16
|
|
|
|
|
|
void persistent_unordered_map::resize(unsigned elems) { |
3798
|
16
|
100
|
|
|
|
|
if (hashes.size() == 0) hashes.emplace_back(1); |
3799
|
14
|
100
|
|
|
|
|
else if (hashes.size() == 1) hashes.emplace_back(1<<8); |
3800
|
12
|
100
|
|
|
|
|
else if (hashes.size() == 2) hashes.emplace_back(1<<16); |
3801
|
10
|
|
|
|
|
|
else hashes.emplace_back(elems); |
3802
|
16
|
|
|
|
|
|
} |
3803
|
|
|
|
|
|
|
|
3804
|
9
|
|
|
|
|
|
void persistent_unordered_map::add(const char* str, int str_len, int data_len) { |
3805
|
9
|
50
|
|
|
|
|
if (unsigned(str_len) < hashes.size()) |
3806
|
9
|
|
|
|
|
|
hashes[str_len].hash[hashes[str_len].index(str, str_len)] += str_len + data_len; |
3807
|
9
|
|
|
|
|
|
} |
3808
|
|
|
|
|
|
|
|
3809
|
2
|
|
|
|
|
|
void persistent_unordered_map::done_adding() { |
3810
|
18
|
100
|
|
|
|
|
for (auto&& hash : hashes) { |
3811
|
|
|
|
|
|
|
int total = 0; |
3812
|
131632
|
100
|
|
|
|
|
for (auto&& len : hash.hash) total += len, len = total - len; |
3813
|
16
|
|
|
|
|
|
hash.data.resize(total); |
3814
|
|
|
|
|
|
|
} |
3815
|
2
|
|
|
|
|
|
} |
3816
|
|
|
|
|
|
|
|
3817
|
9
|
|
|
|
|
|
unsigned char* persistent_unordered_map::fill(const char* str, int str_len, int data_len) { |
3818
|
9
|
50
|
|
|
|
|
if (unsigned(str_len) < hashes.size()) { |
3819
|
9
|
|
|
|
|
|
unsigned index = hashes[str_len].index(str, str_len); |
3820
|
18
|
|
|
|
|
|
unsigned offset = hashes[str_len].hash[index]; |
3821
|
9
|
|
|
|
|
|
small_memcpy(hashes[str_len].data.data() + offset, str, str_len); |
3822
|
9
|
|
|
|
|
|
hashes[str_len].hash[index] += str_len + data_len; |
3823
|
9
|
|
|
|
|
|
return hashes[str_len].data.data() + offset + str_len; |
3824
|
|
|
|
|
|
|
} |
3825
|
|
|
|
|
|
|
return nullptr; |
3826
|
|
|
|
|
|
|
} |
3827
|
|
|
|
|
|
|
|
3828
|
2
|
|
|
|
|
|
void persistent_unordered_map::done_filling() { |
3829
|
18
|
100
|
|
|
|
|
for (auto&& hash : hashes) |
3830
|
131632
|
100
|
|
|
|
|
for (int i = hash.hash.size() - 1; i >= 0; i--) |
3831
|
131616
|
100
|
|
|
|
|
hash.hash[i] = i > 0 ? hash.hash[i-1] : 0; |
3832
|
2
|
|
|
|
|
|
} |
3833
|
|
|
|
|
|
|
|
3834
|
48
|
|
|
|
|
|
void persistent_unordered_map::load(binary_decoder& data) { |
3835
|
48
|
|
|
|
|
|
unsigned sizes = data.next_1B(); |
3836
|
|
|
|
|
|
|
|
3837
|
|
|
|
|
|
|
hashes.clear(); |
3838
|
185
|
100
|
|
|
|
|
for (unsigned i = 0; i < sizes; i++) |
3839
|
137
|
|
|
|
|
|
hashes.emplace_back(data); |
3840
|
48
|
|
|
|
|
|
} |
3841
|
|
|
|
|
|
|
|
3842
|
|
|
|
|
|
|
} // namespace morphodita |
3843
|
|
|
|
|
|
|
|
3844
|
|
|
|
|
|
|
///////// |
3845
|
|
|
|
|
|
|
// File: morphodita/derivator/derivator_dictionary.h |
3846
|
|
|
|
|
|
|
///////// |
3847
|
|
|
|
|
|
|
|
3848
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
3849
|
|
|
|
|
|
|
// |
3850
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
3851
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
3852
|
|
|
|
|
|
|
// |
3853
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
3854
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
3855
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
3856
|
|
|
|
|
|
|
|
3857
|
|
|
|
|
|
|
namespace morphodita { |
3858
|
|
|
|
|
|
|
|
3859
|
0
|
|
|
|
|
|
class derivator_dictionary : public derivator { |
3860
|
|
|
|
|
|
|
public: |
3861
|
|
|
|
|
|
|
virtual bool parent(string_piece lemma, derivated_lemma& parent) const override; |
3862
|
|
|
|
|
|
|
virtual bool children(string_piece lemma, vector& children) const override; |
3863
|
|
|
|
|
|
|
|
3864
|
|
|
|
|
|
|
bool load(istream& is); |
3865
|
|
|
|
|
|
|
|
3866
|
|
|
|
|
|
|
private: |
3867
|
|
|
|
|
|
|
friend class morpho; |
3868
|
|
|
|
|
|
|
const morpho* dictionary; |
3869
|
|
|
|
|
|
|
persistent_unordered_map derinet; |
3870
|
|
|
|
|
|
|
}; |
3871
|
|
|
|
|
|
|
|
3872
|
|
|
|
|
|
|
} // namespace morphodita |
3873
|
|
|
|
|
|
|
|
3874
|
|
|
|
|
|
|
///////// |
3875
|
|
|
|
|
|
|
// File: morphodita/derivator/derivator_dictionary.cpp |
3876
|
|
|
|
|
|
|
///////// |
3877
|
|
|
|
|
|
|
|
3878
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
3879
|
|
|
|
|
|
|
// |
3880
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
3881
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
3882
|
|
|
|
|
|
|
// |
3883
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
3884
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
3885
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
3886
|
|
|
|
|
|
|
|
3887
|
|
|
|
|
|
|
namespace morphodita { |
3888
|
|
|
|
|
|
|
|
3889
|
0
|
|
|
|
|
|
bool derivator_dictionary::parent(string_piece lemma, derivated_lemma& parent) const { |
3890
|
0
|
0
|
|
|
|
|
if (dictionary) lemma.len = dictionary->lemma_id_len(lemma); |
3891
|
|
|
|
|
|
|
|
3892
|
0
|
|
|
|
|
|
auto lemma_data = derinet.at(lemma.str, lemma.len, [](pointer_decoder& data) { |
3893
|
|
|
|
|
|
|
data.next(data.next_1B()); |
3894
|
|
|
|
|
|
|
data.next_4B(); |
3895
|
|
|
|
|
|
|
data.next(data.next_2B()); |
3896
|
0
|
|
|
|
|
|
}); |
3897
|
0
|
0
|
|
|
|
|
if (lemma_data) { |
3898
|
0
|
|
|
|
|
|
auto parent_encoded = *(uint32_t*)(lemma_data + 1 + *lemma_data); |
3899
|
0
|
0
|
|
|
|
|
if (parent_encoded) { |
3900
|
0
|
|
|
|
|
|
unsigned parent_len = parent_encoded & 0xFF; |
3901
|
0
|
|
|
|
|
|
auto parent_data = derinet.data_start(parent_len) + (parent_encoded >> 8); |
3902
|
0
|
|
|
|
|
|
parent.lemma.assign((const char*) parent_data, parent_len); |
3903
|
0
|
0
|
|
|
|
|
if (parent_data[parent_len]) |
3904
|
0
|
|
|
|
|
|
parent.lemma.append((const char*) parent_data + parent_len + 1, parent_data[parent_len]); |
3905
|
|
|
|
|
|
|
return true; |
3906
|
|
|
|
|
|
|
} |
3907
|
|
|
|
|
|
|
} |
3908
|
|
|
|
|
|
|
parent.lemma.clear(); |
3909
|
0
|
|
|
|
|
|
return false; |
3910
|
|
|
|
|
|
|
} |
3911
|
|
|
|
|
|
|
|
3912
|
0
|
|
|
|
|
|
bool derivator_dictionary::children(string_piece lemma, vector& children) const { |
3913
|
0
|
0
|
|
|
|
|
if (dictionary) lemma.len = dictionary->lemma_id_len(lemma); |
3914
|
|
|
|
|
|
|
|
3915
|
0
|
|
|
|
|
|
auto lemma_data = derinet.at(lemma.str, lemma.len, [](pointer_decoder& data) { |
3916
|
|
|
|
|
|
|
data.next(data.next_1B()); |
3917
|
|
|
|
|
|
|
data.next_4B(); |
3918
|
|
|
|
|
|
|
data.next(data.next_2B()); |
3919
|
0
|
|
|
|
|
|
}); |
3920
|
0
|
0
|
|
|
|
|
if (lemma_data) { |
3921
|
0
|
|
|
|
|
|
auto children_len = *(uint16_t*)(lemma_data + 1 + *lemma_data + 4); |
3922
|
0
|
|
|
|
|
|
auto children_encoded = (uint32_t*)(lemma_data + 1 + *lemma_data + 4 + 2); |
3923
|
0
|
0
|
|
|
|
|
if (children_len) { |
3924
|
0
|
|
|
|
|
|
children.resize(children_len); |
3925
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < children_len; i++) { |
3926
|
0
|
|
|
|
|
|
unsigned child_len = children_encoded[i] & 0xFF; |
3927
|
0
|
|
|
|
|
|
auto child_data = derinet.data_start(child_len) + (children_encoded[i] >> 8); |
3928
|
0
|
|
|
|
|
|
children[i].lemma.assign((const char*) child_data, child_len); |
3929
|
0
|
0
|
|
|
|
|
if (child_data[child_len]) |
3930
|
0
|
|
|
|
|
|
children[i].lemma.append((const char*) child_data + child_len + 1, child_data[child_len]); |
3931
|
|
|
|
|
|
|
} |
3932
|
|
|
|
|
|
|
return true; |
3933
|
|
|
|
|
|
|
} |
3934
|
|
|
|
|
|
|
} |
3935
|
0
|
|
|
|
|
|
children.clear(); |
3936
|
0
|
|
|
|
|
|
return false; |
3937
|
|
|
|
|
|
|
} |
3938
|
|
|
|
|
|
|
|
3939
|
0
|
|
|
|
|
|
bool derivator_dictionary::load(istream& is) { |
3940
|
|
|
|
|
|
|
binary_decoder data; |
3941
|
0
|
0
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
0
|
|
|
|
|
|
3942
|
|
|
|
|
|
|
|
3943
|
|
|
|
|
|
|
try { |
3944
|
0
|
0
|
|
|
|
|
for (int i = data.next_1B(); i > 0; i--) |
|
|
0
|
|
|
|
|
|
3945
|
0
|
0
|
|
|
|
|
derinet.resize(data.next_4B()); |
|
|
0
|
|
|
|
|
|
3946
|
|
|
|
|
|
|
|
3947
|
|
|
|
|
|
|
unsigned data_position = data.tell(); |
3948
|
|
|
|
|
|
|
vector lemma, parent; |
3949
|
0
|
0
|
|
|
|
|
for (int pass = 1; pass <= 3; pass++) { |
3950
|
0
|
0
|
|
|
|
|
if (pass > 1) data.seek(data_position); |
|
|
0
|
|
|
|
|
|
3951
|
|
|
|
|
|
|
|
3952
|
|
|
|
|
|
|
lemma.clear(); |
3953
|
0
|
0
|
|
|
|
|
for (int i = data.next_4B(); i > 0; i--) { |
|
|
0
|
|
|
|
|
|
3954
|
0
|
0
|
|
|
|
|
lemma.resize(lemma.size() - data.next_1B()); |
|
|
0
|
|
|
|
|
|
3955
|
0
|
0
|
|
|
|
|
for (int i = data.next_1B(); i > 0; i--) |
|
|
0
|
|
|
|
|
|
3956
|
0
|
0
|
|
|
|
|
lemma.push_back(data.next_1B()); |
3957
|
|
|
|
|
|
|
|
3958
|
0
|
0
|
|
|
|
|
unsigned char lemma_comment_len = data.next_1B(); |
3959
|
0
|
0
|
|
|
|
|
const char* lemma_comment = lemma_comment_len ? data.next(lemma_comment_len) : nullptr; |
|
|
0
|
|
|
|
|
|
3960
|
|
|
|
|
|
|
|
3961
|
0
|
0
|
|
|
|
|
unsigned children = data.next_2B(); |
3962
|
|
|
|
|
|
|
|
3963
|
0
|
0
|
|
|
|
|
if (pass == 3) parent.clear(); |
3964
|
|
|
|
|
|
|
enum { REMOVE_START = 1, REMOVE_END = 2, ADD_START = 4, ADD_END = 8 }; |
3965
|
0
|
0
|
|
|
|
|
int operations = data.next_1B(); |
3966
|
0
|
0
|
|
|
|
|
if (operations) { |
3967
|
0
|
0
|
|
|
|
|
int remove_start = operations & REMOVE_START ? data.next_1B() : 0; |
|
|
0
|
|
|
|
|
|
3968
|
0
|
0
|
|
|
|
|
int remove_end = operations & REMOVE_END ? data.next_1B() : 0; |
|
|
0
|
|
|
|
|
|
3969
|
0
|
0
|
|
|
|
|
if (operations & ADD_START) { |
3970
|
0
|
0
|
|
|
|
|
int add_start = data.next_1B(); |
3971
|
0
|
0
|
|
|
|
|
const char* str = data.next(add_start); |
3972
|
0
|
0
|
|
|
|
|
if (pass == 3) parent.assign(str, str + add_start); |
3973
|
|
|
|
|
|
|
} |
3974
|
0
|
0
|
|
|
|
|
if (pass == 3) parent.insert(parent.end(), lemma.begin() + remove_start, lemma.end() - remove_end); |
|
|
0
|
|
|
|
|
|
3975
|
0
|
0
|
|
|
|
|
if (operations & ADD_END) { |
3976
|
0
|
0
|
|
|
|
|
int add_end = data.next_1B(); |
3977
|
0
|
0
|
|
|
|
|
const char* str = data.next(add_end); |
3978
|
0
|
0
|
|
|
|
|
if (pass == 3) parent.insert(parent.end(), str, str + add_end); |
3979
|
|
|
|
|
|
|
} |
3980
|
|
|
|
|
|
|
} |
3981
|
|
|
|
|
|
|
|
3982
|
0
|
0
|
|
|
|
|
if (pass == 1) { |
3983
|
0
|
|
|
|
|
|
derinet.add(lemma.data(), lemma.size(), 1 + lemma_comment_len + 4 + 2 + 4 * children); |
3984
|
0
|
0
|
|
|
|
|
} else if (pass == 2) { |
3985
|
0
|
|
|
|
|
|
unsigned char* lemma_data = derinet.fill(lemma.data(), lemma.size(), 1 + lemma_comment_len + 4 + 2 + 4 * children); |
3986
|
0
|
|
|
|
|
|
*lemma_data++ = lemma_comment_len; |
3987
|
0
|
0
|
|
|
|
|
while (lemma_comment_len--) *lemma_data++ = *lemma_comment++; |
3988
|
|
|
|
|
|
|
unaligned_store_inc(lemma_data, 0); |
3989
|
|
|
|
|
|
|
unaligned_store_inc(lemma_data, children); |
3990
|
0
|
0
|
|
|
|
|
if (children) unaligned_store(((uint32_t*)lemma_data) + children - 1, 0); |
3991
|
0
|
0
|
|
|
|
|
} else if (pass == 3 && !parent.empty()) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
3992
|
0
|
|
|
|
|
|
auto lemma_data = derinet.at(lemma.data(), lemma.size(), [](pointer_decoder& data) { |
3993
|
|
|
|
|
|
|
data.next(data.next_1B()); |
3994
|
|
|
|
|
|
|
data.next_4B(); |
3995
|
|
|
|
|
|
|
data.next(data.next_2B()); |
3996
|
0
|
|
|
|
|
|
}); |
3997
|
0
|
|
|
|
|
|
auto parent_data = derinet.at(parent.data(), parent.size(), [](pointer_decoder& data) { |
3998
|
|
|
|
|
|
|
data.next(data.next_1B()); |
3999
|
|
|
|
|
|
|
data.next_4B(); |
4000
|
|
|
|
|
|
|
data.next(data.next_2B()); |
4001
|
0
|
|
|
|
|
|
}); |
4002
|
0
|
0
|
|
|
|
|
assert(lemma_data && parent_data); |
4003
|
|
|
|
|
|
|
|
4004
|
0
|
|
|
|
|
|
unsigned parent_offset = parent_data - parent.size() - derinet.data_start(parent.size()); |
4005
|
0
|
0
|
|
|
|
|
assert(parent.size() < (1<<8) && parent_offset < (1<<24)); |
|
|
0
|
|
|
|
|
|
4006
|
0
|
|
|
|
|
|
unaligned_store((void *)(lemma_data + 1 + *lemma_data), (parent_offset << 8) | parent.size()); |
4007
|
|
|
|
|
|
|
|
4008
|
0
|
|
|
|
|
|
unsigned lemma_offset = lemma_data - lemma.size() - derinet.data_start(lemma.size()); |
4009
|
0
|
0
|
|
|
|
|
assert(lemma.size() < (1<<8) && lemma_offset < (1<<24)); |
|
|
0
|
|
|
|
|
|
4010
|
0
|
|
|
|
|
|
auto children_len = unaligned_load(parent_data + 1 + *parent_data + 4); |
4011
|
0
|
|
|
|
|
|
auto children = (uint32_t*)(parent_data + 1 + *parent_data + 4 + 2); |
4012
|
0
|
|
|
|
|
|
auto child_index = unaligned_load(children + children_len - 1); |
4013
|
0
|
|
|
|
|
|
unaligned_store(children + child_index, (lemma_offset << 8) | lemma.size()); |
4014
|
0
|
0
|
|
|
|
|
if (child_index+1 < children_len) |
4015
|
0
|
|
|
|
|
|
unaligned_store(children + children_len - 1, unaligned_load(children + children_len - 1) + 1); |
4016
|
|
|
|
|
|
|
} |
4017
|
|
|
|
|
|
|
} |
4018
|
|
|
|
|
|
|
|
4019
|
0
|
0
|
|
|
|
|
if (pass == 1) |
4020
|
0
|
0
|
|
|
|
|
derinet.done_adding(); |
4021
|
0
|
0
|
|
|
|
|
if (pass == 2) |
4022
|
0
|
|
|
|
|
|
derinet.done_filling(); |
4023
|
|
0
|
|
|
|
|
} |
4024
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
4025
|
|
|
|
|
|
|
return false; |
4026
|
|
|
|
|
|
|
} |
4027
|
0
|
|
|
|
|
|
return true; |
4028
|
|
|
|
|
|
|
} |
4029
|
|
|
|
|
|
|
|
4030
|
|
|
|
|
|
|
} // namespace morphodita |
4031
|
|
|
|
|
|
|
|
4032
|
|
|
|
|
|
|
///////// |
4033
|
|
|
|
|
|
|
// File: morphodita/morpho/casing_variants.h |
4034
|
|
|
|
|
|
|
///////// |
4035
|
|
|
|
|
|
|
|
4036
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
4037
|
|
|
|
|
|
|
// |
4038
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
4039
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
4040
|
|
|
|
|
|
|
// |
4041
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
4042
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
4043
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
4044
|
|
|
|
|
|
|
|
4045
|
|
|
|
|
|
|
namespace morphodita { |
4046
|
|
|
|
|
|
|
|
4047
|
14
|
|
|
|
|
|
inline void generate_casing_variants(string_piece form, string& form_uclc, string& form_lc) { |
4048
|
|
|
|
|
|
|
using namespace unilib; |
4049
|
|
|
|
|
|
|
|
4050
|
|
|
|
|
|
|
// Detect uppercase+titlecase characters. |
4051
|
|
|
|
|
|
|
bool first_Lut = false; // first character is uppercase or titlecase |
4052
|
|
|
|
|
|
|
bool rest_has_Lut = false; // any character but first is uppercase or titlecase |
4053
|
|
|
|
|
|
|
{ |
4054
|
14
|
|
|
|
|
|
string_piece form_tmp = form; |
4055
|
28
|
|
|
|
|
|
first_Lut = unicode::category(utf8::decode(form_tmp.str, form_tmp.len)) & unicode::Lut; |
4056
|
52
|
100
|
|
|
|
|
while (form_tmp.len && !rest_has_Lut) |
|
|
50
|
|
|
|
|
|
4057
|
38
|
|
|
|
|
|
rest_has_Lut = unicode::category(utf8::decode(form_tmp.str, form_tmp.len)) & unicode::Lut; |
4058
|
|
|
|
|
|
|
} |
4059
|
|
|
|
|
|
|
|
4060
|
|
|
|
|
|
|
// Generate all casing variants if needed (they are different than given form). |
4061
|
|
|
|
|
|
|
// We only replace letters with their lowercase variants. |
4062
|
|
|
|
|
|
|
// - form_uclc: first uppercase, rest lowercase |
4063
|
|
|
|
|
|
|
// - form_lc: all lowercase |
4064
|
|
|
|
|
|
|
|
4065
|
14
|
100
|
|
|
|
|
if (first_Lut && !rest_has_Lut) { // common case allowing fast execution |
4066
|
4
|
|
|
|
|
|
form_lc.reserve(form.len); |
4067
|
4
|
|
|
|
|
|
string_piece form_tmp = form; |
4068
|
4
|
|
|
|
|
|
utf8::append(form_lc, unicode::lowercase(utf8::decode(form_tmp.str, form_tmp.len))); |
4069
|
4
|
|
|
|
|
|
form_lc.append(form_tmp.str, form_tmp.len); |
4070
|
10
|
50
|
|
|
|
|
} else if (!first_Lut && rest_has_Lut) { |
4071
|
0
|
|
|
|
|
|
form_lc.reserve(form.len); |
4072
|
0
|
|
|
|
|
|
utf8::map(unicode::lowercase, form.str, form.len, form_lc); |
4073
|
10
|
50
|
|
|
|
|
} else if (first_Lut && rest_has_Lut) { |
4074
|
0
|
|
|
|
|
|
form_lc.reserve(form.len); |
4075
|
0
|
|
|
|
|
|
form_uclc.reserve(form.len); |
4076
|
0
|
|
|
|
|
|
string_piece form_tmp = form; |
4077
|
0
|
|
|
|
|
|
char32_t first = utf8::decode(form_tmp.str, form_tmp.len); |
4078
|
0
|
|
|
|
|
|
utf8::append(form_lc, unicode::lowercase(first)); |
4079
|
0
|
|
|
|
|
|
utf8::append(form_uclc, first); |
4080
|
0
|
0
|
|
|
|
|
while (form_tmp.len) { |
4081
|
0
|
|
|
|
|
|
char32_t lowercase = unicode::lowercase(utf8::decode(form_tmp.str, form_tmp.len)); |
4082
|
0
|
|
|
|
|
|
utf8::append(form_lc, lowercase); |
4083
|
0
|
|
|
|
|
|
utf8::append(form_uclc, lowercase); |
4084
|
|
|
|
|
|
|
} |
4085
|
|
|
|
|
|
|
} |
4086
|
14
|
|
|
|
|
|
} |
4087
|
|
|
|
|
|
|
|
4088
|
|
|
|
|
|
|
} // namespace morphodita |
4089
|
|
|
|
|
|
|
|
4090
|
|
|
|
|
|
|
///////// |
4091
|
|
|
|
|
|
|
// File: morphodita/morpho/czech_lemma_addinfo.h |
4092
|
|
|
|
|
|
|
///////// |
4093
|
|
|
|
|
|
|
|
4094
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
4095
|
|
|
|
|
|
|
// |
4096
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
4097
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
4098
|
|
|
|
|
|
|
// |
4099
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
4100
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
4101
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
4102
|
|
|
|
|
|
|
|
4103
|
|
|
|
|
|
|
namespace morphodita { |
4104
|
|
|
|
|
|
|
|
4105
|
|
|
|
|
|
|
// Declarations |
4106
|
0
|
|
|
|
|
|
struct czech_lemma_addinfo { |
4107
|
|
|
|
|
|
|
inline static int raw_lemma_len(string_piece lemma); |
4108
|
|
|
|
|
|
|
inline static int lemma_id_len(string_piece lemma); |
4109
|
|
|
|
|
|
|
inline static string format(const unsigned char* addinfo, int addinfo_len); |
4110
|
|
|
|
|
|
|
inline static bool generatable(const unsigned char* addinfo, int addinfo_len); |
4111
|
|
|
|
|
|
|
|
4112
|
|
|
|
|
|
|
inline int parse(string_piece lemma, bool die_on_failure = false); |
4113
|
|
|
|
|
|
|
inline bool match_lemma_id(const unsigned char* other_addinfo, int other_addinfo_len); |
4114
|
|
|
|
|
|
|
|
4115
|
|
|
|
|
|
|
vector data; |
4116
|
|
|
|
|
|
|
}; |
4117
|
|
|
|
|
|
|
|
4118
|
|
|
|
|
|
|
// Definitions |
4119
|
0
|
|
|
|
|
|
int czech_lemma_addinfo::raw_lemma_len(string_piece lemma) { |
4120
|
|
|
|
|
|
|
// Lemma ends by a '-[0-9]', '`' or '_' on non-first position. |
4121
|
0
|
0
|
|
|
|
|
for (unsigned len = 1; len < lemma.len; len++) |
4122
|
0
|
0
|
|
|
|
|
if (lemma.str[len] == '`' || lemma.str[len] == '_' || |
|
|
0
|
|
|
|
|
|
4123
|
0
|
0
|
|
|
|
|
(lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9')) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4124
|
0
|
|
|
|
|
|
return len; |
4125
|
0
|
|
|
|
|
|
return lemma.len; |
4126
|
|
|
|
|
|
|
} |
4127
|
|
|
|
|
|
|
|
4128
|
0
|
|
|
|
|
|
int czech_lemma_addinfo::lemma_id_len(string_piece lemma) { |
4129
|
|
|
|
|
|
|
// Lemma ends by a '-[0-9]', '`' or '_' on non-first position. |
4130
|
0
|
0
|
|
|
|
|
for (unsigned len = 1; len < lemma.len; len++) { |
4131
|
0
|
0
|
|
|
|
|
if (lemma.str[len] == '`' || lemma.str[len] == '_') |
4132
|
0
|
|
|
|
|
|
return len; |
4133
|
0
|
0
|
|
|
|
|
if (lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4134
|
0
|
|
|
|
|
|
len += 2; |
4135
|
0
|
0
|
|
|
|
|
while (len < lemma.len && lemma.str[len] >= '0' && lemma.str[len] <= '9') len++; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4136
|
0
|
|
|
|
|
|
return len; |
4137
|
|
|
|
|
|
|
} |
4138
|
|
|
|
|
|
|
} |
4139
|
0
|
|
|
|
|
|
return lemma.len; |
4140
|
|
|
|
|
|
|
} |
4141
|
|
|
|
|
|
|
|
4142
|
0
|
|
|
|
|
|
string czech_lemma_addinfo::format(const unsigned char* addinfo, int addinfo_len) { |
4143
|
|
|
|
|
|
|
string res; |
4144
|
|
|
|
|
|
|
|
4145
|
0
|
0
|
|
|
|
|
if (addinfo_len) { |
4146
|
0
|
0
|
|
|
|
|
res.reserve(addinfo_len + 4); |
4147
|
0
|
0
|
|
|
|
|
if (addinfo[0] != 255) { |
4148
|
|
|
|
|
|
|
char num[5]; |
4149
|
0
|
|
|
|
|
|
snprintf(num, sizeof(num), "-%u", addinfo[0]); |
4150
|
|
|
|
|
|
|
res += num; |
4151
|
|
|
|
|
|
|
} |
4152
|
0
|
0
|
|
|
|
|
for (int i = 1; i < addinfo_len; i++) |
4153
|
0
|
|
|
|
|
|
res += addinfo[i]; |
4154
|
|
|
|
|
|
|
} |
4155
|
|
|
|
|
|
|
|
4156
|
0
|
|
|
|
|
|
return res; |
4157
|
|
|
|
|
|
|
} |
4158
|
|
|
|
|
|
|
|
4159
|
|
|
|
|
|
|
bool czech_lemma_addinfo::generatable(const unsigned char* addinfo, int addinfo_len) { |
4160
|
0
|
0
|
|
|
|
|
for (int i = 1; i + 2 < addinfo_len; i++) |
4161
|
0
|
0
|
|
|
|
|
if (addinfo[i] == '_' && addinfo[i+1] == ',' && addinfo[i+2] == 'x') |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4162
|
|
|
|
|
|
|
return false; |
4163
|
|
|
|
|
|
|
|
4164
|
|
|
|
|
|
|
return true; |
4165
|
|
|
|
|
|
|
} |
4166
|
|
|
|
|
|
|
|
4167
|
0
|
|
|
|
|
|
int czech_lemma_addinfo::parse(string_piece lemma, bool die_on_failure) { |
4168
|
|
|
|
|
|
|
data.clear(); |
4169
|
|
|
|
|
|
|
|
4170
|
0
|
|
|
|
|
|
const char* lemma_info = lemma.str + raw_lemma_len(lemma); |
4171
|
0
|
0
|
|
|
|
|
if (lemma_info < lemma.str + lemma.len) { |
4172
|
0
|
|
|
|
|
|
int lemma_num = 255; |
4173
|
|
|
|
|
|
|
const char* lemma_additional_info = lemma_info; |
4174
|
|
|
|
|
|
|
|
4175
|
0
|
0
|
|
|
|
|
if (*lemma_info == '-') { |
4176
|
0
|
|
|
|
|
|
lemma_num = 0; |
4177
|
0
|
|
|
|
|
|
for (lemma_additional_info = lemma_info + 1; |
4178
|
0
|
0
|
|
|
|
|
lemma_additional_info < lemma.str + lemma.len && (*lemma_additional_info >= '0' && *lemma_additional_info <= '9'); |
|
|
0
|
|
|
|
|
|
4179
|
|
|
|
|
|
|
lemma_additional_info++) |
4180
|
0
|
|
|
|
|
|
lemma_num = 10 * lemma_num + (*lemma_additional_info - '0'); |
4181
|
|
|
|
|
|
|
|
4182
|
0
|
0
|
|
|
|
|
if (lemma_additional_info == lemma_info + 1 || (lemma_additional_info < lemma.str + lemma.len && *lemma_additional_info != '`' && *lemma_additional_info != '_') || lemma_num >= 255) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4183
|
0
|
0
|
|
|
|
|
if (die_on_failure) |
4184
|
0
|
|
|
|
|
|
runtime_failure("Lemma number " << lemma_num << " in lemma " << lemma << " out of range!"); |
4185
|
|
|
|
|
|
|
else |
4186
|
0
|
|
|
|
|
|
lemma_num = 255; |
4187
|
|
|
|
|
|
|
} |
4188
|
|
|
|
|
|
|
} |
4189
|
0
|
|
|
|
|
|
data.emplace_back(lemma_num); |
4190
|
0
|
0
|
|
|
|
|
while (lemma_additional_info < lemma.str + lemma.len) |
4191
|
0
|
|
|
|
|
|
data.push_back(*(unsigned char*)lemma_additional_info++); |
4192
|
|
|
|
|
|
|
|
4193
|
0
|
0
|
|
|
|
|
if (data.size() > 255) { |
4194
|
0
|
0
|
|
|
|
|
if (die_on_failure) |
4195
|
0
|
|
|
|
|
|
runtime_failure("Too long lemma info " << lemma_info << " in lemma " << lemma << '!'); |
4196
|
|
|
|
|
|
|
else |
4197
|
0
|
|
|
|
|
|
data.resize(255); |
4198
|
|
|
|
|
|
|
} |
4199
|
|
|
|
|
|
|
} |
4200
|
|
|
|
|
|
|
|
4201
|
0
|
|
|
|
|
|
return lemma_info - lemma.str; |
4202
|
|
|
|
|
|
|
} |
4203
|
|
|
|
|
|
|
|
4204
|
|
|
|
|
|
|
bool czech_lemma_addinfo::match_lemma_id(const unsigned char* other_addinfo, int other_addinfo_len) { |
4205
|
0
|
0
|
|
|
|
|
if (data.empty()) return true; |
4206
|
0
|
0
|
|
|
|
|
if (data[0] != 255 && (!other_addinfo_len || other_addinfo[0] != data[0])) return false; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4207
|
|
|
|
|
|
|
return true; |
4208
|
|
|
|
|
|
|
} |
4209
|
|
|
|
|
|
|
|
4210
|
|
|
|
|
|
|
} // namespace morphodita |
4211
|
|
|
|
|
|
|
|
4212
|
|
|
|
|
|
|
///////// |
4213
|
|
|
|
|
|
|
// File: morphodita/morpho/tag_filter.h |
4214
|
|
|
|
|
|
|
///////// |
4215
|
|
|
|
|
|
|
|
4216
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
4217
|
|
|
|
|
|
|
// |
4218
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
4219
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
4220
|
|
|
|
|
|
|
// |
4221
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
4222
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
4223
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
4224
|
|
|
|
|
|
|
|
4225
|
|
|
|
|
|
|
namespace morphodita { |
4226
|
|
|
|
|
|
|
|
4227
|
|
|
|
|
|
|
// Declarations |
4228
|
0
|
|
|
|
|
|
class tag_filter { |
4229
|
|
|
|
|
|
|
public: |
4230
|
|
|
|
|
|
|
tag_filter(const char* filter = nullptr); |
4231
|
|
|
|
|
|
|
|
4232
|
|
|
|
|
|
|
inline bool matches(const char* tag) const; |
4233
|
|
|
|
|
|
|
|
4234
|
|
|
|
|
|
|
private: |
4235
|
|
|
|
|
|
|
struct char_filter { |
4236
|
|
|
|
|
|
|
char_filter(int pos, bool negate, int chars_offset, int chars_len) |
4237
|
0
|
|
|
|
|
|
: pos(pos), negate(negate), chars_offset(chars_offset), chars_len(chars_len) {} |
4238
|
|
|
|
|
|
|
|
4239
|
|
|
|
|
|
|
int pos; |
4240
|
|
|
|
|
|
|
bool negate; |
4241
|
|
|
|
|
|
|
int chars_offset, chars_len; |
4242
|
|
|
|
|
|
|
}; |
4243
|
|
|
|
|
|
|
|
4244
|
|
|
|
|
|
|
string wildcard; |
4245
|
|
|
|
|
|
|
std::vector filters; |
4246
|
|
|
|
|
|
|
}; |
4247
|
|
|
|
|
|
|
|
4248
|
|
|
|
|
|
|
// Definitions |
4249
|
0
|
|
|
|
|
|
inline bool tag_filter::matches(const char* tag) const { |
4250
|
0
|
0
|
|
|
|
|
if (filters.empty()) return true; |
4251
|
|
|
|
|
|
|
|
4252
|
|
|
|
|
|
|
int tag_pos = 0; |
4253
|
0
|
0
|
|
|
|
|
for (auto&& filter : filters) { |
4254
|
|
|
|
|
|
|
// Skip until next filter position. If the tag ends prematurely, accept. |
4255
|
0
|
0
|
|
|
|
|
while (tag_pos < filter.pos) |
4256
|
0
|
0
|
|
|
|
|
if (!tag[tag_pos++]) |
4257
|
|
|
|
|
|
|
return true; |
4258
|
0
|
0
|
|
|
|
|
if (!tag[tag_pos]) |
4259
|
|
|
|
|
|
|
return true; |
4260
|
|
|
|
|
|
|
|
4261
|
|
|
|
|
|
|
// We assume filter.chars_len >= 1. |
4262
|
0
|
|
|
|
|
|
bool matched = (wildcard[filter.chars_offset] == tag[tag_pos]) ^ filter.negate; |
4263
|
0
|
0
|
|
|
|
|
for (int i = 1; i < filter.chars_len && ((!matched) ^ filter.negate); i++) |
|
|
0
|
|
|
|
|
|
4264
|
0
|
|
|
|
|
|
matched = (wildcard[filter.chars_offset + i] == tag[tag_pos]) ^ filter.negate; |
4265
|
0
|
0
|
|
|
|
|
if (!matched) return false; |
4266
|
|
|
|
|
|
|
} |
4267
|
|
|
|
|
|
|
return true; |
4268
|
|
|
|
|
|
|
} |
4269
|
|
|
|
|
|
|
|
4270
|
|
|
|
|
|
|
} // namespace morphodita |
4271
|
|
|
|
|
|
|
|
4272
|
|
|
|
|
|
|
///////// |
4273
|
|
|
|
|
|
|
// File: morphodita/morpho/morpho_dictionary.h |
4274
|
|
|
|
|
|
|
///////// |
4275
|
|
|
|
|
|
|
|
4276
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
4277
|
|
|
|
|
|
|
// |
4278
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
4279
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
4280
|
|
|
|
|
|
|
// |
4281
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
4282
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
4283
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
4284
|
|
|
|
|
|
|
|
4285
|
|
|
|
|
|
|
namespace morphodita { |
4286
|
|
|
|
|
|
|
|
4287
|
|
|
|
|
|
|
// Declarations |
4288
|
|
|
|
|
|
|
template |
4289
|
0
|
|
|
|
|
|
class morpho_dictionary { |
4290
|
|
|
|
|
|
|
public: |
4291
|
|
|
|
|
|
|
void load(binary_decoder& data); |
4292
|
|
|
|
|
|
|
void analyze(string_piece form, vector& lemmas) const; |
4293
|
|
|
|
|
|
|
bool generate(string_piece lemma, const tag_filter& filter, vector& lemmas_forms) const; |
4294
|
|
|
|
|
|
|
private: |
4295
|
|
|
|
|
|
|
persistent_unordered_map lemmas, roots, suffixes; |
4296
|
|
|
|
|
|
|
|
4297
|
|
|
|
|
|
|
vector tags; |
4298
|
|
|
|
|
|
|
vector>>> classes; |
4299
|
|
|
|
|
|
|
}; |
4300
|
|
|
|
|
|
|
|
4301
|
|
|
|
|
|
|
// Definitions |
4302
|
|
|
|
|
|
|
template |
4303
|
1
|
|
|
|
|
|
void morpho_dictionary::load(binary_decoder& data) { |
4304
|
|
|
|
|
|
|
// Prepare lemmas and roots hashes |
4305
|
8
|
100
|
|
|
|
|
for (int i = data.next_1B(); i > 0; i--) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4306
|
7
|
|
|
|
|
|
lemmas.resize(data.next_4B()); |
4307
|
10
|
100
|
|
|
|
|
for (int i = data.next_1B(); i > 0; i--) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4308
|
9
|
|
|
|
|
|
roots.resize(data.next_4B()); |
4309
|
|
|
|
|
|
|
|
4310
|
|
|
|
|
|
|
// Perform two pass over the lemmas and roots data, filling the hashes. |
4311
|
|
|
|
|
|
|
|
4312
|
1
|
|
|
|
|
|
vector lemma(max(lemmas.max_length(), roots.max_length())); |
4313
|
1
|
50
|
|
|
|
|
vector root(max(lemmas.max_length(), roots.max_length())); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4314
|
|
|
|
|
|
|
unsigned data_position = data.tell(); |
4315
|
3
|
100
|
|
|
|
|
for (int pass = 1; pass <= 2; pass++) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4316
|
2
|
100
|
|
|
|
|
if (pass > 1) data.seek(data_position); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4317
|
|
|
|
|
|
|
|
4318
|
|
|
|
|
|
|
int lemma_len = 0; |
4319
|
|
|
|
|
|
|
int root_len = 0; |
4320
|
|
|
|
|
|
|
|
4321
|
6
|
50
|
|
|
|
|
for (int i = data.next_4B(); i > 0; i--) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4322
|
4
|
50
|
|
|
|
|
lemma_len -= data.next_1B(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4323
|
28
|
50
|
|
|
|
|
for (int i = data.next_1B(); i > 0; i--) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4324
|
24
|
50
|
|
|
|
|
lemma[lemma_len++] = data.next_1B(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4325
|
4
|
50
|
|
|
|
|
unsigned char lemma_info_len = data.next_1B(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4326
|
4
|
50
|
|
|
|
|
const char* lemma_info = lemma_info_len ? data.next(lemma_info_len) : nullptr; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4327
|
4
|
50
|
|
|
|
|
unsigned lemma_roots = data.next_1B(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4328
|
|
|
|
|
|
|
|
4329
|
|
|
|
|
|
|
unsigned char* lemma_data /* to keep compiler happy */ = nullptr; |
4330
|
|
|
|
|
|
|
unsigned lemma_offset /* to keep compiler happy */ = 0; |
4331
|
|
|
|
|
|
|
|
4332
|
4
|
100
|
|
|
|
|
if (pass == 1) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4333
|
2
|
|
|
|
|
|
lemmas.add(lemma.data(), lemma_len, 1 + lemma_info_len + 1 + lemma_roots * (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint16_t))); |
4334
|
|
|
|
|
|
|
} else /*if (pass == 2)*/ { |
4335
|
2
|
|
|
|
|
|
lemma_data = lemmas.fill(lemma.data(), lemma_len, 1 + lemma_info_len + 1 + lemma_roots * (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint16_t))); |
4336
|
4
|
|
|
|
|
|
lemma_offset = lemma_data - lemma_len - lemmas.data_start(lemma_len); |
4337
|
|
|
|
|
|
|
|
4338
|
2
|
|
|
|
|
|
*lemma_data++ = lemma_info_len; |
4339
|
2
|
50
|
|
|
|
|
if (lemma_info_len) small_memcpy(lemma_data, lemma_info, lemma_info_len), lemma_data += lemma_info_len; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4340
|
2
|
|
|
|
|
|
*lemma_data++ = lemma_roots; |
4341
|
|
|
|
|
|
|
} |
4342
|
|
|
|
|
|
|
|
4343
|
4
|
|
|
|
|
|
small_memcpy(root.data(), lemma.data(), lemma_len); root_len = lemma_len; |
4344
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < lemma_roots; i++) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4345
|
|
|
|
|
|
|
enum { REMOVE_START = 1, REMOVE_END = 2, ADD_START = 4, ADD_END = 8 }; |
4346
|
14
|
50
|
|
|
|
|
int operations = data.next_1B(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4347
|
14
|
50
|
|
|
|
|
if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; } |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4348
|
14
|
100
|
|
|
|
|
if (operations & REMOVE_END) root_len -= data.next_1B(); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4349
|
14
|
50
|
|
|
|
|
if (operations & ADD_START) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4350
|
0
|
0
|
|
|
|
|
int from = root_len, to = from + data.next_1B(); while (from > 0) root[--to] = root[--from]; root_len += to; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4351
|
0
|
0
|
|
|
|
|
for (int i = 0; i < to; i++) root[i] = data.next_1B(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4352
|
|
|
|
|
|
|
} |
4353
|
14
|
50
|
|
|
|
|
if (operations & ADD_END) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4354
|
42
|
50
|
|
|
|
|
for (int len = data.next_1B(); len > 0; len--) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4355
|
28
|
50
|
|
|
|
|
root[root_len++] = data.next_1B(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4356
|
14
|
50
|
|
|
|
|
uint16_t clas = data.next_2B(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4357
|
|
|
|
|
|
|
|
4358
|
14
|
100
|
|
|
|
|
if (pass == 1) { // for each root |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4359
|
7
|
|
|
|
|
|
roots.add(root.data(), root_len, sizeof(uint16_t) + sizeof(uint32_t) + sizeof(uint8_t)); |
4360
|
|
|
|
|
|
|
} else /*if (pass == 2)*/ { |
4361
|
7
|
|
|
|
|
|
unsigned char* root_data = roots.fill(root.data(), root_len, sizeof(uint16_t) + sizeof(uint32_t) + sizeof(uint8_t)); |
4362
|
14
|
|
|
|
|
|
unsigned root_offset = root_data - root_len - roots.data_start(root_len); |
4363
|
|
|
|
|
|
|
|
4364
|
|
|
|
|
|
|
unaligned_store_inc(root_data, clas); |
4365
|
|
|
|
|
|
|
unaligned_store_inc(root_data, lemma_offset); |
4366
|
|
|
|
|
|
|
unaligned_store_inc(root_data, lemma_len); |
4367
|
7
|
50
|
|
|
|
|
assert(uint8_t(lemma_len) == lemma_len); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4368
|
|
|
|
|
|
|
|
4369
|
|
|
|
|
|
|
unaligned_store_inc(lemma_data, root_offset); |
4370
|
|
|
|
|
|
|
unaligned_store_inc(lemma_data, root_len); |
4371
|
|
|
|
|
|
|
unaligned_store_inc(lemma_data, clas); |
4372
|
7
|
50
|
|
|
|
|
assert(uint8_t(root_len) == root_len); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4373
|
|
|
|
|
|
|
} |
4374
|
|
|
|
|
|
|
} |
4375
|
|
|
|
|
|
|
} |
4376
|
|
|
|
|
|
|
|
4377
|
2
|
100
|
|
|
|
|
if (pass == 1) { // after the whole pass |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4378
|
1
|
50
|
|
|
|
|
lemmas.done_adding(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4379
|
1
|
50
|
|
|
|
|
roots.done_adding(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4380
|
|
|
|
|
|
|
} else /*if (pass == 2)*/ { |
4381
|
1
|
|
|
|
|
|
lemmas.done_filling(); |
4382
|
1
|
|
|
|
|
|
roots.done_filling(); |
4383
|
|
|
|
|
|
|
} |
4384
|
|
|
|
|
|
|
} |
4385
|
|
|
|
|
|
|
|
4386
|
|
|
|
|
|
|
// Load tags |
4387
|
1
|
50
|
|
|
|
|
tags.resize(data.next_2B()); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4388
|
21
|
100
|
|
|
|
|
for (auto&& tag : tags) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4389
|
20
|
50
|
|
|
|
|
tag.resize(data.next_1B()); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4390
|
80
|
100
|
|
|
|
|
for (unsigned i = 0; i < tag.size(); i++) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4391
|
60
|
50
|
|
|
|
|
tag[i] = data.next_1B(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4392
|
|
|
|
|
|
|
} |
4393
|
|
|
|
|
|
|
|
4394
|
|
|
|
|
|
|
// Load suffixes |
4395
|
1
|
50
|
|
|
|
|
suffixes.load(data); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4396
|
|
|
|
|
|
|
|
4397
|
|
|
|
|
|
|
// Fill classes from suffixes |
4398
|
14
|
50
|
|
|
|
|
suffixes.iter_all([this](const char* suffix, int len, pointer_decoder& data) mutable { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4399
|
|
|
|
|
|
|
unsigned classes_len = data.next_2B(); |
4400
|
|
|
|
|
|
|
const uint16_t* classes_ptr = data.next(classes_len); |
4401
|
|
|
|
|
|
|
const uint16_t* indices_ptr = data.next(classes_len); |
4402
|
|
|
|
|
|
|
const uint16_t* tags_ptr = data.next(data.next_2B()); |
4403
|
|
|
|
|
|
|
|
4404
|
13
|
|
|
|
|
|
string suffix_str(suffix, len); |
4405
|
28
|
100
|
|
|
|
|
for (unsigned i = 0; i < classes_len; i++) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4406
|
15
|
|
|
|
|
|
auto classes_ptr_i = unaligned_load(classes_ptr + i); |
4407
|
15
|
100
|
|
|
|
|
if (classes_ptr_i >= classes.size()) classes.resize(classes_ptr_i + 1); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4408
|
15
|
50
|
|
|
|
|
classes[classes_ptr_i].emplace_back(suffix_str, vector()); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4409
|
35
|
100
|
|
|
|
|
for (const uint16_t* ptr = tags_ptr + unaligned_load(indices_ptr + i), |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4410
|
15
|
|
|
|
|
|
* end = tags_ptr + unaligned_load(indices_ptr + i + 1); |
4411
|
|
|
|
|
|
|
ptr < end; ptr++) |
4412
|
20
|
50
|
|
|
|
|
classes[classes_ptr_i].back().second.emplace_back(unaligned_load(ptr)); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4413
|
|
|
|
|
|
|
} |
4414
|
13
|
|
|
|
|
|
}); |
4415
|
1
|
|
|
|
|
|
} |
4416
|
|
|
|
|
|
|
|
4417
|
|
|
|
|
|
|
template |
4418
|
18
|
|
|
|
|
|
void morpho_dictionary::analyze(string_piece form, vector& lemmas) const { |
4419
|
|
|
|
|
|
|
int max_suffix_len = suffixes.max_length(); |
4420
|
|
|
|
|
|
|
|
4421
|
|
|
|
|
|
|
uint16_t* suff_stack[16]; vector suff_heap; |
4422
|
18
|
50
|
|
|
|
|
uint16_t** suff = max_suffix_len <= 16 ? suff_stack : (suff_heap.resize(max_suffix_len), suff_heap.data()); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4423
|
|
|
|
|
|
|
int suff_len = 0; |
4424
|
48
|
50
|
|
|
|
|
for (int i = form.len; i >= 0 && suff_len < max_suffix_len; i--, suff_len++) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4425
|
48
|
|
|
|
|
|
suff[suff_len] = (uint16_t*) suffixes.at(form.str + i, suff_len, [](pointer_decoder& data) { |
4426
|
0
|
|
|
|
|
|
data.next(2 * data.next_2B()); |
4427
|
|
|
|
|
|
|
data.next(data.next_2B()); |
4428
|
0
|
|
|
|
|
|
}); |
4429
|
48
|
|
|
|
|
|
if (!suff[suff_len]) break; |
4430
|
|
|
|
|
|
|
} |
4431
|
|
|
|
|
|
|
|
4432
|
48
|
100
|
|
|
|
|
for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++) |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4433
|
30
|
50
|
|
|
|
|
if (unaligned_load(suff[suff_len])) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4434
|
30
|
|
|
|
|
|
unsigned suff_classes = unaligned_load(suff[suff_len]); |
4435
|
30
|
|
|
|
|
|
uint16_t* suff_data = suff[suff_len] + 1; |
4436
|
|
|
|
|
|
|
|
4437
|
60
|
50
|
|
|
|
|
roots.iter(form.str, root_len, [&](const char* root, pointer_decoder& root_data) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4438
|
|
|
|
|
|
|
uint16_t root_class = root_data.next_2B(); |
4439
|
|
|
|
|
|
|
unsigned lemma_offset = root_data.next_4B(); |
4440
|
|
|
|
|
|
|
unsigned lemma_len = root_data.next_1B(); |
4441
|
|
|
|
|
|
|
|
4442
|
60
|
100
|
|
|
|
|
if (small_memeq(form.str, root, root_len)) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4443
|
16
|
|
|
|
|
|
uint16_t* suffix_class_ptr = unaligned_lower_bound(suff_data, suff_classes, root_class); |
4444
|
10
|
50
|
|
|
|
|
if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4445
|
38
|
|
|
|
|
|
const unsigned char* lemma_data = this->lemmas.data_start(lemma_len) + lemma_offset; |
4446
|
|
|
|
|
|
|
string lemma((const char*)lemma_data, lemma_len); |
4447
|
10
|
50
|
|
|
|
|
if (lemma_data[lemma_len]) lemma += LemmaAddinfo::format(lemma_data + lemma_len + 1, lemma_data[lemma_len]); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4448
|
|
|
|
|
|
|
|
4449
|
20
|
|
|
|
|
|
uint16_t* suff_tag_indices = suff_data + suff_classes; |
4450
|
10
|
|
|
|
|
|
uint16_t* suff_tags = suff_tag_indices + suff_classes + 1; |
4451
|
28
|
100
|
|
|
|
|
for (unsigned i = unaligned_load(suff_tag_indices + (suffix_class_ptr - suff_data)); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4452
|
28
|
|
|
|
|
|
i < unaligned_load(suff_tag_indices + (suffix_class_ptr - suff_data) + 1); i++) |
4453
|
18
|
50
|
|
|
|
|
lemmas.emplace_back(lemma, tags[unaligned_load(suff_tags + i)]); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4454
|
|
|
|
|
|
|
} |
4455
|
|
|
|
|
|
|
} |
4456
|
30
|
|
|
|
|
|
}); |
4457
|
|
|
|
|
|
|
} |
4458
|
18
|
|
|
|
|
|
} |
4459
|
|
|
|
|
|
|
|
4460
|
|
|
|
|
|
|
template |
4461
|
0
|
|
|
|
|
|
bool morpho_dictionary::generate(string_piece lemma, const tag_filter& filter, vector& lemmas_forms) const { |
4462
|
|
|
|
|
|
|
LemmaAddinfo addinfo; |
4463
|
0
|
0
|
|
|
|
|
int raw_lemma_len = addinfo.parse(lemma); |
|
|
0
|
|
|
|
|
|
4464
|
0
|
|
|
|
|
|
bool matched_lemma = false; |
4465
|
|
|
|
|
|
|
|
4466
|
0
|
0
|
|
|
|
|
lemmas.iter(lemma.str, raw_lemma_len, [&](const char* lemma_str, pointer_decoder& data) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4467
|
|
|
|
|
|
|
unsigned lemma_info_len = data.next_1B(); |
4468
|
|
|
|
|
|
|
const auto* lemma_info = data.next(lemma_info_len); |
4469
|
|
|
|
|
|
|
unsigned lemma_roots_len = data.next_1B(); |
4470
|
0
|
|
|
|
|
|
auto* lemma_roots_ptr = data.next(lemma_roots_len * (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint16_t))); |
4471
|
|
|
|
|
|
|
|
4472
|
0
|
0
|
|
|
|
|
if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4473
|
0
|
|
|
|
|
|
matched_lemma = true; |
4474
|
|
|
|
|
|
|
|
4475
|
|
|
|
|
|
|
vector* forms = nullptr; |
4476
|
|
|
|
|
|
|
pointer_decoder lemma_roots(lemma_roots_ptr); |
4477
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < lemma_roots_len; i++) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4478
|
|
|
|
|
|
|
unsigned root_offset = lemma_roots.next_4B(); |
4479
|
|
|
|
|
|
|
unsigned root_len = lemma_roots.next_1B(); |
4480
|
|
|
|
|
|
|
unsigned clas = lemma_roots.next_2B(); |
4481
|
|
|
|
|
|
|
|
4482
|
0
|
|
|
|
|
|
const unsigned char* root_data = roots.data_start(root_len) + root_offset; |
4483
|
0
|
0
|
|
|
|
|
for (auto&& suffix : classes[clas]) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4484
|
|
|
|
|
|
|
string root_with_suffix; |
4485
|
0
|
0
|
|
|
|
|
for (auto&& tag : suffix.second) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4486
|
0
|
0
|
|
|
|
|
if (filter.matches(tags[tag].c_str())) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4487
|
0
|
0
|
|
|
|
|
if (!forms) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4488
|
0
|
0
|
|
|
|
|
lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len)); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4489
|
0
|
|
|
|
|
|
forms = &lemmas_forms.back().forms; |
4490
|
|
|
|
|
|
|
} |
4491
|
|
|
|
|
|
|
|
4492
|
0
|
0
|
|
|
|
|
if (root_with_suffix.empty() && root_len + suffix.first.size()) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4493
|
0
|
0
|
|
|
|
|
root_with_suffix.reserve(root_len + suffix.first.size()); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4494
|
|
|
|
|
|
|
root_with_suffix.assign((const char*)root_data, root_len); |
4495
|
|
|
|
|
|
|
root_with_suffix.append(suffix.first); |
4496
|
|
|
|
|
|
|
} |
4497
|
|
|
|
|
|
|
|
4498
|
0
|
0
|
|
|
|
|
forms->emplace_back(root_with_suffix, tags[tag]); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4499
|
|
|
|
|
|
|
} |
4500
|
|
|
|
|
|
|
} |
4501
|
|
|
|
|
|
|
} |
4502
|
|
|
|
|
|
|
} |
4503
|
0
|
|
|
|
|
|
}); |
4504
|
|
|
|
|
|
|
|
4505
|
0
|
|
|
|
|
|
return matched_lemma; |
4506
|
|
|
|
|
|
|
} |
4507
|
|
|
|
|
|
|
|
4508
|
|
|
|
|
|
|
} // namespace morphodita |
4509
|
|
|
|
|
|
|
|
4510
|
|
|
|
|
|
|
///////// |
4511
|
|
|
|
|
|
|
// File: morphodita/morpho/morpho_prefix_guesser.h |
4512
|
|
|
|
|
|
|
///////// |
4513
|
|
|
|
|
|
|
|
4514
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
4515
|
|
|
|
|
|
|
// |
4516
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
4517
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
4518
|
|
|
|
|
|
|
// |
4519
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
4520
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
4521
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
4522
|
|
|
|
|
|
|
|
4523
|
|
|
|
|
|
|
namespace morphodita { |
4524
|
|
|
|
|
|
|
|
4525
|
|
|
|
|
|
|
// Declarations |
4526
|
|
|
|
|
|
|
template |
4527
|
0
|
|
|
|
|
|
class morpho_prefix_guesser { |
4528
|
|
|
|
|
|
|
public: |
4529
|
0
|
|
|
|
|
|
morpho_prefix_guesser(const MorphoDictionary& dictionary) : dictionary(dictionary) {} |
4530
|
|
|
|
|
|
|
|
4531
|
|
|
|
|
|
|
void load(binary_decoder& data); |
4532
|
|
|
|
|
|
|
void analyze(string_piece form, vector& lemmas); |
4533
|
|
|
|
|
|
|
bool generate(string_piece lemma, const tag_filter& filter, vector& lemmas_forms); |
4534
|
|
|
|
|
|
|
|
4535
|
|
|
|
|
|
|
private: |
4536
|
|
|
|
|
|
|
const MorphoDictionary& dictionary; |
4537
|
|
|
|
|
|
|
vector tag_filters; |
4538
|
|
|
|
|
|
|
persistent_unordered_map prefixes_initial, prefixes_middle; |
4539
|
|
|
|
|
|
|
}; |
4540
|
|
|
|
|
|
|
|
4541
|
|
|
|
|
|
|
// Definitions |
4542
|
|
|
|
|
|
|
template |
4543
|
0
|
|
|
|
|
|
void morpho_prefix_guesser::load(binary_decoder& data) { |
4544
|
|
|
|
|
|
|
// Load and construct tag filters |
4545
|
0
|
0
|
|
|
|
|
for (unsigned tag_filters_len = data.next_1B(); tag_filters_len; tag_filters_len--) { |
4546
|
0
|
|
|
|
|
|
unsigned tag_filter_len = data.next_1B(); |
4547
|
0
|
|
|
|
|
|
string tag_filter(data.next(tag_filter_len), tag_filter_len); |
4548
|
|
|
|
|
|
|
|
4549
|
0
|
0
|
|
|
|
|
tag_filters.emplace_back(tag_filter.c_str()); |
4550
|
|
|
|
|
|
|
} |
4551
|
|
|
|
|
|
|
|
4552
|
|
|
|
|
|
|
// Load prefixes |
4553
|
0
|
|
|
|
|
|
prefixes_initial.load(data); |
4554
|
0
|
|
|
|
|
|
prefixes_middle.load(data); |
4555
|
0
|
|
|
|
|
|
} |
4556
|
|
|
|
|
|
|
|
4557
|
|
|
|
|
|
|
// Analyze can return non-unique lemma-tag pairs. |
4558
|
|
|
|
|
|
|
template |
4559
|
0
|
|
|
|
|
|
void morpho_prefix_guesser::analyze(string_piece form, vector& lemmas) { |
4560
|
0
|
0
|
|
|
|
|
if (!form.len) return; |
4561
|
|
|
|
|
|
|
|
4562
|
|
|
|
|
|
|
vector form_tmp; |
4563
|
|
|
|
|
|
|
vector middle_masks; |
4564
|
0
|
0
|
|
|
|
|
middle_masks.reserve(form.len); |
4565
|
|
|
|
|
|
|
|
4566
|
0
|
0
|
|
|
|
|
for (unsigned initial = 0; initial < form.len; initial++) { |
4567
|
|
|
|
|
|
|
// Match the initial prefix. |
4568
|
0
|
|
|
|
|
|
unsigned initial_mask = (1<
|
4569
|
0
|
0
|
|
|
|
|
if (initial) { |
4570
|
0
|
|
|
|
|
|
auto found = prefixes_initial.at_typed(form.str, initial); |
4571
|
0
|
0
|
|
|
|
|
if (!found) break; |
4572
|
0
|
|
|
|
|
|
initial_mask = unaligned_load(found); |
4573
|
|
|
|
|
|
|
} |
4574
|
|
|
|
|
|
|
|
4575
|
|
|
|
|
|
|
// If we have found an initial prefix (including the empty one), match middle prefixes. |
4576
|
0
|
0
|
|
|
|
|
if (initial_mask) { |
4577
|
0
|
0
|
|
|
|
|
middle_masks.resize(initial); |
4578
|
0
|
0
|
|
|
|
|
middle_masks.emplace_back(initial_mask); |
4579
|
0
|
0
|
|
|
|
|
for (unsigned middle = initial; middle < middle_masks.size(); middle++) { |
4580
|
0
|
0
|
|
|
|
|
if (!middle_masks[middle]) continue; |
4581
|
|
|
|
|
|
|
// Try matching middle prefixes from current index. |
4582
|
0
|
0
|
|
|
|
|
for (unsigned i = middle + 1; i < form.len; i++) { |
4583
|
0
|
|
|
|
|
|
auto found = prefixes_middle.at_typed(form.str + middle, i - middle); |
4584
|
0
|
0
|
|
|
|
|
if (!found) break; |
4585
|
0
|
0
|
|
|
|
|
if (unaligned_load(found)) { |
4586
|
0
|
0
|
|
|
|
|
if (i + 1 > middle_masks.size()) middle_masks.resize(i + 1); |
|
|
0
|
|
|
|
|
|
4587
|
0
|
|
|
|
|
|
middle_masks[i] |= middle_masks[middle] & unaligned_load(found); |
4588
|
|
|
|
|
|
|
} |
4589
|
|
|
|
|
|
|
} |
4590
|
|
|
|
|
|
|
|
4591
|
|
|
|
|
|
|
// Try matching word forms if at least one middle prefix was found. |
4592
|
0
|
0
|
|
|
|
|
if (middle > initial && middle < form.len ) { |
|
|
0
|
|
|
|
|
|
4593
|
0
|
0
|
|
|
|
|
if (initial) { |
4594
|
0
|
0
|
|
|
|
|
if (form_tmp.empty()) form_tmp.assign(form.str, form.str + form.len); |
4595
|
0
|
|
|
|
|
|
small_memcpy(form_tmp.data() + middle - initial, form.str, initial); |
4596
|
|
|
|
|
|
|
} |
4597
|
0
|
|
|
|
|
|
unsigned lemmas_ori_size = lemmas.size(); |
4598
|
0
|
0
|
|
|
|
|
dictionary.analyze(string_piece((initial ? form_tmp.data() : form.str) + middle - initial, form.len - middle + initial), lemmas); |
|
|
0
|
|
|
|
|
|
4599
|
|
|
|
|
|
|
unsigned lemmas_new_size = lemmas_ori_size; |
4600
|
0
|
0
|
|
|
|
|
for (unsigned i = lemmas_ori_size; i < lemmas.size(); i++) { |
4601
|
0
|
0
|
|
|
|
|
for (unsigned filter = 0; filter < tag_filters.size(); filter++) |
4602
|
0
|
0
|
|
|
|
|
if ((middle_masks[middle] & (1<
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4603
|
0
|
0
|
|
|
|
|
if (i == lemmas_new_size) { |
4604
|
0
|
|
|
|
|
|
lemmas[lemmas_new_size].lemma.insert(0, form.str + initial, middle - initial); |
4605
|
|
|
|
|
|
|
} else { |
4606
|
0
|
0
|
|
|
|
|
lemmas[lemmas_new_size].lemma.reserve(lemmas[i].lemma.size() + middle - initial); |
4607
|
0
|
|
|
|
|
|
lemmas[lemmas_new_size].lemma.assign(form.str + initial, middle - initial); |
4608
|
0
|
|
|
|
|
|
lemmas[lemmas_new_size].lemma.append(lemmas[i].lemma); |
4609
|
0
|
|
|
|
|
|
lemmas[lemmas_new_size].tag = lemmas[i].tag; |
4610
|
|
|
|
|
|
|
} |
4611
|
0
|
|
|
|
|
|
lemmas_new_size++; |
4612
|
0
|
|
|
|
|
|
break; |
4613
|
|
|
|
|
|
|
} |
4614
|
|
|
|
|
|
|
} |
4615
|
0
|
0
|
|
|
|
|
if (lemmas_new_size < lemmas.size()) lemmas.erase(lemmas.begin() + lemmas_new_size, lemmas.end()); |
4616
|
|
|
|
|
|
|
} |
4617
|
|
|
|
|
|
|
} |
4618
|
|
|
|
|
|
|
} |
4619
|
|
|
|
|
|
|
} |
4620
|
|
|
|
|
|
|
} |
4621
|
|
|
|
|
|
|
|
4622
|
|
|
|
|
|
|
template |
4623
|
|
|
|
|
|
|
bool morpho_prefix_guesser::generate(string_piece /*lemma*/, const tag_filter& /*filter*/, vector& /*lemmas_forms*/) { |
4624
|
|
|
|
|
|
|
// Not implemented yet. Is it actually needed? |
4625
|
|
|
|
|
|
|
return false; |
4626
|
|
|
|
|
|
|
} |
4627
|
|
|
|
|
|
|
} // namespace morphodita |
4628
|
|
|
|
|
|
|
|
4629
|
|
|
|
|
|
|
///////// |
4630
|
|
|
|
|
|
|
// File: morphodita/morpho/morpho_statistical_guesser.h |
4631
|
|
|
|
|
|
|
///////// |
4632
|
|
|
|
|
|
|
|
4633
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
4634
|
|
|
|
|
|
|
// |
4635
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
4636
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
4637
|
|
|
|
|
|
|
// |
4638
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
4639
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
4640
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
4641
|
|
|
|
|
|
|
|
4642
|
|
|
|
|
|
|
namespace morphodita { |
4643
|
|
|
|
|
|
|
|
4644
|
0
|
|
|
|
|
|
class morpho_statistical_guesser { |
4645
|
|
|
|
|
|
|
public: |
4646
|
|
|
|
|
|
|
void load(binary_decoder& data); |
4647
|
|
|
|
|
|
|
typedef vector used_rules; |
4648
|
|
|
|
|
|
|
void analyze(string_piece form, vector& lemmas, used_rules* used); |
4649
|
|
|
|
|
|
|
|
4650
|
|
|
|
|
|
|
private: |
4651
|
|
|
|
|
|
|
vector tags; |
4652
|
|
|
|
|
|
|
unsigned default_tag; |
4653
|
|
|
|
|
|
|
persistent_unordered_map rules; |
4654
|
|
|
|
|
|
|
}; |
4655
|
|
|
|
|
|
|
|
4656
|
|
|
|
|
|
|
} // namespace morphodita |
4657
|
|
|
|
|
|
|
|
4658
|
|
|
|
|
|
|
///////// |
4659
|
|
|
|
|
|
|
// File: morphodita/tokenizer/unicode_tokenizer.h |
4660
|
|
|
|
|
|
|
///////// |
4661
|
|
|
|
|
|
|
|
4662
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
4663
|
|
|
|
|
|
|
// |
4664
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
4665
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
4666
|
|
|
|
|
|
|
// |
4667
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
4668
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
4669
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
4670
|
|
|
|
|
|
|
|
4671
|
|
|
|
|
|
|
namespace morphodita { |
4672
|
|
|
|
|
|
|
|
4673
|
4
|
|
|
|
|
|
class unicode_tokenizer : public tokenizer { |
4674
|
|
|
|
|
|
|
public: |
4675
|
|
|
|
|
|
|
enum { URL_EMAIL_LATEST = 2 }; |
4676
|
|
|
|
|
|
|
unicode_tokenizer(unsigned url_email_tokenizer); |
4677
|
|
|
|
|
|
|
|
4678
|
|
|
|
|
|
|
virtual void set_text(string_piece text, bool make_copy = false) override; |
4679
|
|
|
|
|
|
|
virtual bool next_sentence(vector* forms, vector* tokens) override; |
4680
|
|
|
|
|
|
|
|
4681
|
|
|
|
|
|
|
virtual bool next_sentence(vector& tokens) = 0; |
4682
|
|
|
|
|
|
|
|
4683
|
|
|
|
|
|
|
protected: |
4684
|
|
|
|
|
|
|
struct char_info { |
4685
|
|
|
|
|
|
|
char32_t chr; |
4686
|
|
|
|
|
|
|
unilib::unicode::category_t cat; |
4687
|
|
|
|
|
|
|
const char* str; |
4688
|
|
|
|
|
|
|
|
4689
|
152
|
|
|
|
|
|
char_info(char32_t chr, const char* str) : chr(chr), cat(unilib::unicode::category(chr)), str(str) {} |
4690
|
|
|
|
|
|
|
}; |
4691
|
|
|
|
|
|
|
vector chars; |
4692
|
|
|
|
|
|
|
size_t current; |
4693
|
|
|
|
|
|
|
|
4694
|
|
|
|
|
|
|
bool tokenize_url_email(vector& tokens); |
4695
|
|
|
|
|
|
|
bool emergency_sentence_split(const vector& tokens); |
4696
|
|
|
|
|
|
|
bool is_eos(const vector& tokens, char32_t eos_chr, const unordered_set* abbreviations); |
4697
|
|
|
|
|
|
|
|
4698
|
|
|
|
|
|
|
private: |
4699
|
|
|
|
|
|
|
unsigned url_email_tokenizer; |
4700
|
|
|
|
|
|
|
string text_buffer; |
4701
|
|
|
|
|
|
|
vector tokens_buffer; |
4702
|
|
|
|
|
|
|
string eos_buffer; |
4703
|
|
|
|
|
|
|
}; |
4704
|
|
|
|
|
|
|
|
4705
|
|
|
|
|
|
|
} // namespace morphodita |
4706
|
|
|
|
|
|
|
|
4707
|
|
|
|
|
|
|
///////// |
4708
|
|
|
|
|
|
|
// File: morphodita/tokenizer/ragel_tokenizer.h |
4709
|
|
|
|
|
|
|
///////// |
4710
|
|
|
|
|
|
|
|
4711
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
4712
|
|
|
|
|
|
|
// |
4713
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
4714
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
4715
|
|
|
|
|
|
|
// |
4716
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
4717
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
4718
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
4719
|
|
|
|
|
|
|
|
4720
|
|
|
|
|
|
|
namespace morphodita { |
4721
|
|
|
|
|
|
|
|
4722
|
3
|
|
|
|
|
|
class ragel_tokenizer : public unicode_tokenizer { |
4723
|
|
|
|
|
|
|
public: |
4724
|
|
|
|
|
|
|
ragel_tokenizer(unsigned url_email_tokenizer); |
4725
|
|
|
|
|
|
|
|
4726
|
|
|
|
|
|
|
protected: |
4727
|
|
|
|
|
|
|
static inline uint8_t ragel_char(const char_info& chr); |
4728
|
|
|
|
|
|
|
|
4729
|
|
|
|
|
|
|
private: |
4730
|
|
|
|
|
|
|
static void initialize_ragel_map(); |
4731
|
|
|
|
|
|
|
static vector ragel_map; |
4732
|
|
|
|
|
|
|
static atomic_flag ragel_map_flag; |
4733
|
|
|
|
|
|
|
static void ragel_map_add(char32_t chr, uint8_t mapping); |
4734
|
|
|
|
|
|
|
|
4735
|
|
|
|
|
|
|
friend class unicode_tokenizer; |
4736
|
|
|
|
|
|
|
static bool ragel_url_email(unsigned version, const vector& chars, size_t& current_char, vector& tokens); |
4737
|
|
|
|
|
|
|
}; |
4738
|
|
|
|
|
|
|
|
4739
|
|
|
|
|
|
|
uint8_t ragel_tokenizer::ragel_char(const char_info& chr) { |
4740
|
59
|
50
|
|
|
|
|
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4741
|
|
|
|
|
|
|
} |
4742
|
|
|
|
|
|
|
|
4743
|
|
|
|
|
|
|
} // namespace morphodita |
4744
|
|
|
|
|
|
|
|
4745
|
|
|
|
|
|
|
///////// |
4746
|
|
|
|
|
|
|
// File: morphodita/tokenizer/czech_tokenizer.h |
4747
|
|
|
|
|
|
|
///////// |
4748
|
|
|
|
|
|
|
|
4749
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
4750
|
|
|
|
|
|
|
// |
4751
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
4752
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
4753
|
|
|
|
|
|
|
// |
4754
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
4755
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
4756
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
4757
|
|
|
|
|
|
|
|
4758
|
|
|
|
|
|
|
namespace morphodita { |
4759
|
|
|
|
|
|
|
|
4760
|
0
|
|
|
|
|
|
class czech_tokenizer : public ragel_tokenizer { |
4761
|
|
|
|
|
|
|
public: |
4762
|
|
|
|
|
|
|
enum tokenizer_language { CZECH = 0, SLOVAK = 1 }; |
4763
|
|
|
|
|
|
|
enum { LATEST = 2 }; |
4764
|
|
|
|
|
|
|
czech_tokenizer(tokenizer_language language, unsigned version, const morpho* m = nullptr); |
4765
|
|
|
|
|
|
|
|
4766
|
|
|
|
|
|
|
virtual bool next_sentence(vector& tokens) override; |
4767
|
|
|
|
|
|
|
|
4768
|
|
|
|
|
|
|
private: |
4769
|
|
|
|
|
|
|
const morpho* m; |
4770
|
|
|
|
|
|
|
const unordered_set* abbreviations; |
4771
|
|
|
|
|
|
|
vector lemmas; |
4772
|
|
|
|
|
|
|
|
4773
|
|
|
|
|
|
|
void merge_hyphenated(vector& tokens); |
4774
|
|
|
|
|
|
|
|
4775
|
|
|
|
|
|
|
static const unordered_set abbreviations_czech; |
4776
|
|
|
|
|
|
|
static const unordered_set abbreviations_slovak; |
4777
|
|
|
|
|
|
|
}; |
4778
|
|
|
|
|
|
|
|
4779
|
|
|
|
|
|
|
} // namespace morphodita |
4780
|
|
|
|
|
|
|
|
4781
|
|
|
|
|
|
|
///////// |
4782
|
|
|
|
|
|
|
// File: morphodita/morpho/czech_morpho.h |
4783
|
|
|
|
|
|
|
///////// |
4784
|
|
|
|
|
|
|
|
4785
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
4786
|
|
|
|
|
|
|
// |
4787
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
4788
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
4789
|
|
|
|
|
|
|
// |
4790
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
4791
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
4792
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
4793
|
|
|
|
|
|
|
|
4794
|
|
|
|
|
|
|
namespace morphodita { |
4795
|
|
|
|
|
|
|
|
4796
|
0
|
|
|
|
|
|
class czech_morpho : public morpho { |
4797
|
|
|
|
|
|
|
public: |
4798
|
|
|
|
|
|
|
using morpho_language = czech_tokenizer::tokenizer_language; |
4799
|
|
|
|
|
|
|
|
4800
|
0
|
0
|
|
|
|
|
czech_morpho(morpho_language language, unsigned version) : language(language), version(version) {} |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4801
|
|
|
|
|
|
|
|
4802
|
|
|
|
|
|
|
virtual int analyze(string_piece form, morpho::guesser_mode guesser, vector& lemmas) const override; |
4803
|
|
|
|
|
|
|
virtual int generate(string_piece lemma, const char* tag_wildcard, guesser_mode guesser, vector& forms) const override; |
4804
|
|
|
|
|
|
|
virtual int raw_lemma_len(string_piece lemma) const override; |
4805
|
|
|
|
|
|
|
virtual int lemma_id_len(string_piece lemma) const override; |
4806
|
|
|
|
|
|
|
virtual int raw_form_len(string_piece form) const override; |
4807
|
|
|
|
|
|
|
virtual tokenizer* new_tokenizer() const override; |
4808
|
|
|
|
|
|
|
|
4809
|
|
|
|
|
|
|
bool load(istream& is); |
4810
|
|
|
|
|
|
|
private: |
4811
|
|
|
|
|
|
|
inline void analyze_special(string_piece form, vector& lemmas) const; |
4812
|
|
|
|
|
|
|
|
4813
|
|
|
|
|
|
|
morpho_language language; |
4814
|
|
|
|
|
|
|
unsigned version; |
4815
|
|
|
|
|
|
|
morpho_dictionary dictionary; |
4816
|
|
|
|
|
|
|
unique_ptr> prefix_guesser; |
4817
|
|
|
|
|
|
|
unique_ptr statistical_guesser; |
4818
|
|
|
|
|
|
|
|
4819
|
|
|
|
|
|
|
string unknown_tag = "X@-------------"; |
4820
|
|
|
|
|
|
|
string number_tag = "C=-------------"; |
4821
|
|
|
|
|
|
|
string punctuation_tag = "Z:-------------"; |
4822
|
|
|
|
|
|
|
}; |
4823
|
|
|
|
|
|
|
|
4824
|
|
|
|
|
|
|
} // namespace morphodita |
4825
|
|
|
|
|
|
|
|
4826
|
|
|
|
|
|
|
///////// |
4827
|
|
|
|
|
|
|
// File: morphodita/morpho/czech_morpho.cpp |
4828
|
|
|
|
|
|
|
///////// |
4829
|
|
|
|
|
|
|
|
4830
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
4831
|
|
|
|
|
|
|
// |
4832
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
4833
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
4834
|
|
|
|
|
|
|
// |
4835
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
4836
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
4837
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
4838
|
|
|
|
|
|
|
|
4839
|
|
|
|
|
|
|
namespace morphodita { |
4840
|
|
|
|
|
|
|
|
4841
|
0
|
|
|
|
|
|
bool czech_morpho::load(istream& is) { |
4842
|
|
|
|
|
|
|
binary_decoder data; |
4843
|
0
|
0
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
0
|
|
|
|
|
|
4844
|
|
|
|
|
|
|
|
4845
|
|
|
|
|
|
|
try { |
4846
|
|
|
|
|
|
|
// Load tag length |
4847
|
0
|
0
|
|
|
|
|
unsigned tag_length = data.next_1B(); |
4848
|
0
|
0
|
|
|
|
|
if (tag_length < unknown_tag.size()) unknown_tag.erase(tag_length); |
|
|
0
|
|
|
|
|
|
4849
|
0
|
0
|
|
|
|
|
if (tag_length < number_tag.size()) number_tag.erase(tag_length); |
|
|
0
|
|
|
|
|
|
4850
|
0
|
0
|
|
|
|
|
if (tag_length < punctuation_tag.size()) punctuation_tag.erase(tag_length); |
|
|
0
|
|
|
|
|
|
4851
|
|
|
|
|
|
|
|
4852
|
|
|
|
|
|
|
// Load dictionary |
4853
|
0
|
0
|
|
|
|
|
dictionary.load(data); |
4854
|
|
|
|
|
|
|
|
4855
|
|
|
|
|
|
|
// Optionally prefix guesser if present |
4856
|
0
|
|
|
|
|
|
prefix_guesser.reset(); |
4857
|
0
|
0
|
|
|
|
|
if (data.next_1B()) { |
|
|
0
|
|
|
|
|
|
4858
|
0
|
0
|
|
|
|
|
prefix_guesser.reset(new morpho_prefix_guesser(dictionary)); |
4859
|
0
|
0
|
|
|
|
|
prefix_guesser->load(data); |
4860
|
|
|
|
|
|
|
} |
4861
|
|
|
|
|
|
|
|
4862
|
|
|
|
|
|
|
// Optionally statistical guesser if present |
4863
|
|
|
|
|
|
|
statistical_guesser.reset(); |
4864
|
0
|
0
|
|
|
|
|
if (data.next_1B()) { |
|
|
0
|
|
|
|
|
|
4865
|
0
|
0
|
|
|
|
|
statistical_guesser.reset(new morpho_statistical_guesser()); |
4866
|
0
|
0
|
|
|
|
|
statistical_guesser->load(data); |
4867
|
|
0
|
|
|
|
|
} |
4868
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
4869
|
|
|
|
|
|
|
return false; |
4870
|
|
|
|
|
|
|
} |
4871
|
|
|
|
|
|
|
|
4872
|
0
|
|
|
|
|
|
return data.is_end(); |
4873
|
|
|
|
|
|
|
} |
4874
|
|
|
|
|
|
|
|
4875
|
0
|
|
|
|
|
|
int czech_morpho::analyze(string_piece form, guesser_mode guesser, vector& lemmas) const { |
4876
|
|
|
|
|
|
|
lemmas.clear(); |
4877
|
|
|
|
|
|
|
|
4878
|
0
|
0
|
|
|
|
|
if (form.len) { |
4879
|
|
|
|
|
|
|
// Generate all casing variants if needed (they are different than given form). |
4880
|
|
|
|
|
|
|
string form_uclc; // first uppercase, rest lowercase |
4881
|
|
|
|
|
|
|
string form_lc; // all lowercase |
4882
|
0
|
0
|
|
|
|
|
generate_casing_variants(form, form_uclc, form_lc); |
4883
|
|
|
|
|
|
|
|
4884
|
|
|
|
|
|
|
// Start by analysing using the dictionary and all casing variants. |
4885
|
0
|
0
|
|
|
|
|
dictionary.analyze(form, lemmas); |
4886
|
0
|
0
|
|
|
|
|
if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas); |
|
|
0
|
|
|
|
|
|
4887
|
0
|
0
|
|
|
|
|
if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas); |
|
|
0
|
|
|
|
|
|
4888
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) return NO_GUESSER; |
4889
|
|
|
|
|
|
|
|
4890
|
|
|
|
|
|
|
// Then call analyze_special to handle numbers and punctuation. |
4891
|
0
|
0
|
|
|
|
|
analyze_special(form, lemmas); |
4892
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) return NO_GUESSER; |
4893
|
|
|
|
|
|
|
|
4894
|
|
|
|
|
|
|
// For the prefix guesser, use only form_lc. |
4895
|
0
|
0
|
|
|
|
|
if (guesser == GUESSER && prefix_guesser) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4896
|
0
|
0
|
|
|
|
|
prefix_guesser->analyze(form_lc.empty() ? form : form_lc, lemmas); |
|
|
0
|
|
|
|
|
|
4897
|
|
|
|
|
|
|
bool prefix_guesser_guesses = !lemmas.empty(); |
4898
|
|
|
|
|
|
|
|
4899
|
|
|
|
|
|
|
// For the statistical guesser, use all casing variants. |
4900
|
0
|
0
|
|
|
|
|
if (guesser == GUESSER && statistical_guesser) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4901
|
0
|
0
|
|
|
|
|
if (form_uclc.empty() && form_lc.empty()) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
4902
|
0
|
0
|
|
|
|
|
statistical_guesser->analyze(form, lemmas, nullptr); |
4903
|
|
|
|
|
|
|
else { |
4904
|
0
|
0
|
|
|
|
|
morpho_statistical_guesser::used_rules used_rules; used_rules.reserve(3); |
4905
|
0
|
0
|
|
|
|
|
statistical_guesser->analyze(form, lemmas, &used_rules); |
4906
|
0
|
0
|
|
|
|
|
if (!form_uclc.empty()) statistical_guesser->analyze(form_uclc, lemmas, &used_rules); |
|
|
0
|
|
|
|
|
|
4907
|
0
|
0
|
|
|
|
|
if (!form_lc.empty()) statistical_guesser->analyze(form_lc, lemmas, &used_rules); |
|
|
0
|
|
|
|
|
|
4908
|
|
|
|
|
|
|
} |
4909
|
|
|
|
|
|
|
} |
4910
|
|
|
|
|
|
|
|
4911
|
|
|
|
|
|
|
// Make sure results are unique lemma-tag pairs. Statistical guesser produces |
4912
|
|
|
|
|
|
|
// unique lemma-tag pairs, but prefix guesser does not. |
4913
|
0
|
0
|
|
|
|
|
if (prefix_guesser_guesses) { |
4914
|
0
|
|
|
|
|
|
sort(lemmas.begin(), lemmas.end(), [](const tagged_lemma& a, const tagged_lemma& b) { |
4915
|
0
|
|
|
|
|
|
int lemma_compare = a.lemma.compare(b.lemma); |
4916
|
0
|
0
|
|
|
|
|
return lemma_compare < 0 || (lemma_compare == 0 && a.tag < b.tag); |
4917
|
|
|
|
|
|
|
}); |
4918
|
0
|
|
|
|
|
|
auto lemmas_end = unique(lemmas.begin(), lemmas.end(), [](const tagged_lemma& a, const tagged_lemma& b) { |
4919
|
0
|
0
|
|
|
|
|
return a.lemma == b.lemma && a.tag == b.tag; |
|
|
0
|
|
|
|
|
|
4920
|
0
|
|
|
|
|
|
}); |
4921
|
0
|
0
|
|
|
|
|
if (lemmas_end != lemmas.end()) lemmas.erase(lemmas_end, lemmas.end()); |
4922
|
|
|
|
|
|
|
} |
4923
|
|
|
|
|
|
|
|
4924
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) return GUESSER; |
4925
|
|
|
|
|
|
|
} |
4926
|
|
|
|
|
|
|
|
4927
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), unknown_tag); |
4928
|
0
|
|
|
|
|
|
return -1; |
4929
|
|
|
|
|
|
|
} |
4930
|
|
|
|
|
|
|
|
4931
|
0
|
|
|
|
|
|
int czech_morpho::generate(string_piece lemma, const char* tag_wildcard, morpho::guesser_mode guesser, vector& forms) const { |
4932
|
|
|
|
|
|
|
forms.clear(); |
4933
|
|
|
|
|
|
|
|
4934
|
0
|
|
|
|
|
|
tag_filter filter(tag_wildcard); |
4935
|
|
|
|
|
|
|
|
4936
|
0
|
0
|
|
|
|
|
if (lemma.len) { |
4937
|
0
|
0
|
|
|
|
|
if (dictionary.generate(lemma, filter, forms)) |
|
|
0
|
|
|
|
|
|
4938
|
|
|
|
|
|
|
return NO_GUESSER; |
4939
|
|
|
|
|
|
|
|
4940
|
0
|
0
|
|
|
|
|
if (guesser == GUESSER && prefix_guesser) |
|
|
0
|
|
|
|
|
|
4941
|
|
|
|
|
|
|
if (prefix_guesser->generate(lemma, filter, forms)) |
4942
|
|
|
|
|
|
|
return GUESSER; |
4943
|
|
|
|
|
|
|
} |
4944
|
|
|
|
|
|
|
|
4945
|
|
|
|
|
|
|
return -1; |
4946
|
|
|
|
|
|
|
} |
4947
|
|
|
|
|
|
|
|
4948
|
0
|
|
|
|
|
|
int czech_morpho::raw_lemma_len(string_piece lemma) const { |
4949
|
0
|
|
|
|
|
|
return czech_lemma_addinfo::raw_lemma_len(lemma); |
4950
|
|
|
|
|
|
|
} |
4951
|
|
|
|
|
|
|
|
4952
|
0
|
|
|
|
|
|
int czech_morpho::lemma_id_len(string_piece lemma) const { |
4953
|
0
|
|
|
|
|
|
return czech_lemma_addinfo::lemma_id_len(lemma); |
4954
|
|
|
|
|
|
|
} |
4955
|
|
|
|
|
|
|
|
4956
|
0
|
|
|
|
|
|
int czech_morpho::raw_form_len(string_piece form) const { |
4957
|
0
|
|
|
|
|
|
return form.len; |
4958
|
|
|
|
|
|
|
} |
4959
|
|
|
|
|
|
|
|
4960
|
0
|
|
|
|
|
|
tokenizer* czech_morpho::new_tokenizer() const { |
4961
|
0
|
0
|
|
|
|
|
return new czech_tokenizer(language, version, this); |
4962
|
|
|
|
|
|
|
} |
4963
|
|
|
|
|
|
|
|
4964
|
|
|
|
|
|
|
// What characters are considered punctuation except for the ones in unicode Punctuation category. |
4965
|
|
|
|
|
|
|
static bool punctuation_additional[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1/*$*/, |
4966
|
|
|
|
|
|
|
0,0,0,0,0,0,1/*+*/,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1/*<*/,1/*=*/,1/*>*/,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
4967
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,1/*^*/,0,1/*`*/,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1/*|*/,0,1/*~*/,0,0,0,0,0,0,0,0, |
4968
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
4969
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
4970
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
4971
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
4972
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
4973
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
4974
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
4975
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
4976
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
4977
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1/*caron*/}; |
4978
|
|
|
|
|
|
|
|
4979
|
|
|
|
|
|
|
// What characters of unicode Punctuation category are not considered punctuation. |
4980
|
|
|
|
|
|
|
static bool punctuation_exceptions[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
4981
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
4982
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
4983
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,1/*paragraph*/}; |
4984
|
|
|
|
|
|
|
|
4985
|
0
|
|
|
|
|
|
void czech_morpho::analyze_special(string_piece form, vector& lemmas) const { |
4986
|
|
|
|
|
|
|
using namespace unilib; |
4987
|
|
|
|
|
|
|
|
4988
|
|
|
|
|
|
|
// Analyzer for numbers and punctuation. |
4989
|
|
|
|
|
|
|
// Number is anything matching [+-]? is_Pn* ([.,] is_Pn*)? ([Ee] [+-]? is_Pn+)? for at least one is_Pn* nonempty. |
4990
|
|
|
|
|
|
|
// Punctuation is any form beginning with either unicode punctuation or punctuation_exceptions character. |
4991
|
|
|
|
|
|
|
// Beware that numbers takes precedence, so - is punctuation, -3 is number, -. is punctuation, -.3 is number. |
4992
|
0
|
0
|
|
|
|
|
if (!form.len) return; |
4993
|
|
|
|
|
|
|
|
4994
|
0
|
|
|
|
|
|
string_piece form_ori = form; |
4995
|
0
|
|
|
|
|
|
char32_t first = utf8::decode(form.str, form.len); |
4996
|
|
|
|
|
|
|
|
4997
|
|
|
|
|
|
|
// Try matching a number. |
4998
|
|
|
|
|
|
|
char32_t codepoint = first; |
4999
|
|
|
|
|
|
|
bool any_digit = false; |
5000
|
0
|
0
|
|
|
|
|
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(form.str, form.len); |
5001
|
0
|
0
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(form.str, form.len); |
5002
|
0
|
0
|
|
|
|
|
if ((codepoint == '.' && form.len) || codepoint == ',') codepoint = utf8::decode(form.str, form.len); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5003
|
0
|
0
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(form.str, form.len); |
5004
|
0
|
0
|
|
|
|
|
if (any_digit && (codepoint == 'e' || codepoint == 'E')) { |
|
|
0
|
|
|
|
|
|
5005
|
0
|
|
|
|
|
|
codepoint = utf8::decode(form.str, form.len); |
5006
|
0
|
0
|
|
|
|
|
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(form.str, form.len); |
5007
|
|
|
|
|
|
|
any_digit = false; |
5008
|
0
|
0
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(form.str, form.len); |
5009
|
|
|
|
|
|
|
} |
5010
|
|
|
|
|
|
|
|
5011
|
0
|
0
|
|
|
|
|
if (any_digit && !form.len && (!codepoint || codepoint == '.')) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5012
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form_ori.str, form_ori.len), number_tag); |
5013
|
0
|
0
|
|
|
|
|
} else if ((first < sizeof(punctuation_additional) && punctuation_additional[first]) || |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5014
|
0
|
0
|
|
|
|
|
((unicode::category(first) & unicode::P) && (first >= sizeof(punctuation_exceptions) || !punctuation_exceptions[first]))) |
|
|
0
|
|
|
|
|
|
5015
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form_ori.str, form_ori.len), punctuation_tag); |
5016
|
|
|
|
|
|
|
} |
5017
|
|
|
|
|
|
|
|
5018
|
|
|
|
|
|
|
} // namespace morphodita |
5019
|
|
|
|
|
|
|
|
5020
|
|
|
|
|
|
|
///////// |
5021
|
|
|
|
|
|
|
// File: morphodita/morpho/english_lemma_addinfo.h |
5022
|
|
|
|
|
|
|
///////// |
5023
|
|
|
|
|
|
|
|
5024
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
5025
|
|
|
|
|
|
|
// |
5026
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
5027
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
5028
|
|
|
|
|
|
|
// |
5029
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
5030
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
5031
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5032
|
|
|
|
|
|
|
|
5033
|
|
|
|
|
|
|
namespace morphodita { |
5034
|
|
|
|
|
|
|
|
5035
|
|
|
|
|
|
|
// Declarations |
5036
|
0
|
|
|
|
|
|
struct english_lemma_addinfo { |
5037
|
|
|
|
|
|
|
inline static int raw_lemma_len(string_piece lemma); |
5038
|
|
|
|
|
|
|
inline static int lemma_id_len(string_piece lemma); |
5039
|
|
|
|
|
|
|
inline static string format(const unsigned char* addinfo, int addinfo_len); |
5040
|
|
|
|
|
|
|
inline static bool generatable(const unsigned char* addinfo, int addinfo_len); |
5041
|
|
|
|
|
|
|
|
5042
|
|
|
|
|
|
|
inline int parse(string_piece lemma, bool die_on_failure = false); |
5043
|
|
|
|
|
|
|
inline bool match_lemma_id(const unsigned char* other_addinfo, int other_addinfo_len); |
5044
|
|
|
|
|
|
|
|
5045
|
|
|
|
|
|
|
vector data; |
5046
|
|
|
|
|
|
|
}; |
5047
|
|
|
|
|
|
|
|
5048
|
|
|
|
|
|
|
// Definitions |
5049
|
0
|
|
|
|
|
|
int english_lemma_addinfo::raw_lemma_len(string_piece lemma) { |
5050
|
|
|
|
|
|
|
// Lemma ends either by |
5051
|
|
|
|
|
|
|
// - '^' on non-first position followed by nothing or [A-Za-z][-A-Za-z]* |
5052
|
|
|
|
|
|
|
// - '+' on non-first position followed by nothing |
5053
|
0
|
0
|
|
|
|
|
for (unsigned len = 1; len < lemma.len; len++) { |
5054
|
0
|
0
|
|
|
|
|
if (len + 1 == lemma.len && (lemma.str[len] == '^' || lemma.str[len] == '+')) |
|
|
0
|
|
|
|
|
|
5055
|
0
|
|
|
|
|
|
return len; |
5056
|
0
|
0
|
|
|
|
|
if (len + 1 < lemma.len && lemma.str[len] == '^') { |
|
|
0
|
|
|
|
|
|
5057
|
|
|
|
|
|
|
bool ok = true; |
5058
|
0
|
0
|
|
|
|
|
for (unsigned i = len + 1; ok && i < lemma.len; i++) |
|
|
0
|
|
|
|
|
|
5059
|
0
|
0
|
|
|
|
|
ok &= (lemma.str[i] >= 'A' && lemma.str[i] <= 'Z') || |
5060
|
0
|
0
|
|
|
|
|
(lemma.str[i] >= 'a' && lemma.str[i] <= 'z') || |
|
|
0
|
|
|
|
|
|
5061
|
0
|
0
|
|
|
|
|
(i > len + 1 && lemma.str[i] == '-'); |
5062
|
0
|
0
|
|
|
|
|
if (ok) return len; |
5063
|
|
|
|
|
|
|
} |
5064
|
|
|
|
|
|
|
} |
5065
|
0
|
|
|
|
|
|
return lemma.len; |
5066
|
|
|
|
|
|
|
} |
5067
|
|
|
|
|
|
|
|
5068
|
|
|
|
|
|
|
int english_lemma_addinfo::lemma_id_len(string_piece lemma) { |
5069
|
|
|
|
|
|
|
// No lemma comments. |
5070
|
0
|
|
|
|
|
|
return lemma.len; |
5071
|
|
|
|
|
|
|
} |
5072
|
|
|
|
|
|
|
|
5073
|
|
|
|
|
|
|
string english_lemma_addinfo::format(const unsigned char* addinfo, int addinfo_len) { |
5074
|
0
|
|
|
|
|
|
return string((const char*) addinfo, addinfo_len); |
5075
|
|
|
|
|
|
|
} |
5076
|
|
|
|
|
|
|
|
5077
|
|
|
|
|
|
|
bool english_lemma_addinfo::generatable(const unsigned char* /*addinfo*/, int /*addinfo_len*/) { |
5078
|
|
|
|
|
|
|
return true; |
5079
|
|
|
|
|
|
|
} |
5080
|
|
|
|
|
|
|
|
5081
|
0
|
|
|
|
|
|
int english_lemma_addinfo::parse(string_piece lemma, bool /*die_on_failure*/) { |
5082
|
|
|
|
|
|
|
data.clear(); |
5083
|
|
|
|
|
|
|
|
5084
|
0
|
|
|
|
|
|
size_t len = raw_lemma_len(lemma); |
5085
|
0
|
0
|
|
|
|
|
for (size_t i = len; i < lemma.len; i++) |
5086
|
0
|
|
|
|
|
|
data.push_back(lemma.str[i]); |
5087
|
|
|
|
|
|
|
|
5088
|
0
|
|
|
|
|
|
return len; |
5089
|
|
|
|
|
|
|
} |
5090
|
|
|
|
|
|
|
|
5091
|
0
|
|
|
|
|
|
bool english_lemma_addinfo::match_lemma_id(const unsigned char* other_addinfo, int other_addinfo_len) { |
5092
|
0
|
0
|
|
|
|
|
if (data.empty()) return true; |
5093
|
0
|
0
|
|
|
|
|
if (data.size() == 1 && data[0] == '^') return other_addinfo_len > 0 && other_addinfo[0] == '^'; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5094
|
0
|
0
|
|
|
|
|
if (data.size() == 1 && data[0] == '+') return other_addinfo_len == 0; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5095
|
0
|
0
|
|
|
|
|
return data.size() == size_t(other_addinfo_len) && small_memeq(data.data(), other_addinfo, other_addinfo_len); |
|
|
0
|
|
|
|
|
|
5096
|
|
|
|
|
|
|
} |
5097
|
|
|
|
|
|
|
|
5098
|
|
|
|
|
|
|
} // namespace morphodita |
5099
|
|
|
|
|
|
|
|
5100
|
|
|
|
|
|
|
///////// |
5101
|
|
|
|
|
|
|
// File: morphodita/morpho/english_morpho_guesser.h |
5102
|
|
|
|
|
|
|
///////// |
5103
|
|
|
|
|
|
|
|
5104
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
5105
|
|
|
|
|
|
|
// |
5106
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
5107
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
5108
|
|
|
|
|
|
|
// |
5109
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
5110
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
5111
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5112
|
|
|
|
|
|
|
|
5113
|
|
|
|
|
|
|
namespace morphodita { |
5114
|
|
|
|
|
|
|
|
5115
|
0
|
0
|
|
|
|
|
class english_morpho_guesser { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5116
|
|
|
|
|
|
|
public: |
5117
|
|
|
|
|
|
|
void load(binary_decoder& data); |
5118
|
|
|
|
|
|
|
void analyze(string_piece form, string_piece form_lc, vector& lemmas) const; |
5119
|
|
|
|
|
|
|
bool analyze_proper_names(string_piece form, string_piece form_lc, vector& lemmas) const; |
5120
|
|
|
|
|
|
|
|
5121
|
|
|
|
|
|
|
private: |
5122
|
|
|
|
|
|
|
inline void add(const string& tag, const string& form, vector& lemmas) const; |
5123
|
|
|
|
|
|
|
inline void add(const string& tag, const string& tag2, const string& form, vector& lemmas) const; |
5124
|
|
|
|
|
|
|
inline void add(const string& tag, const string& form, unsigned negation_len, vector& lemmas) const; |
5125
|
|
|
|
|
|
|
inline void add(const string& tag, const string& tag2, const string& form, unsigned negation_len, vector& lemmas) const; |
5126
|
|
|
|
|
|
|
void add_NNS(const string& form, unsigned negation_len, vector& lemmas) const; |
5127
|
|
|
|
|
|
|
void add_NNPS(const string& form, vector& lemmas) const; |
5128
|
|
|
|
|
|
|
void add_VBG(const string& form, vector& lemmas) const; |
5129
|
|
|
|
|
|
|
void add_VBD_VBN(const string& form, vector& lemmas) const; |
5130
|
|
|
|
|
|
|
void add_VBZ(const string& form, vector& lemmas) const; |
5131
|
|
|
|
|
|
|
void add_JJR_RBR(const string& form, unsigned negation_len, vector& lemmas) const; |
5132
|
|
|
|
|
|
|
void add_JJS_RBS(const string& form, unsigned negation_len, vector& lemmas) const; |
5133
|
|
|
|
|
|
|
|
5134
|
|
|
|
|
|
|
enum { NEGATION_LEN = 0, TO_FOLLOW = 1, TOTAL = 2 }; |
5135
|
|
|
|
|
|
|
vector exceptions_tags; |
5136
|
|
|
|
|
|
|
persistent_unordered_map exceptions; |
5137
|
|
|
|
|
|
|
persistent_unordered_map negations; |
5138
|
|
|
|
|
|
|
string CD = "CD", FW = "FW", JJ = "JJ", JJR = "JJR", JJS = "JJS", |
5139
|
|
|
|
|
|
|
NN = "NN", NNP = "NNP", NNPS = "NNPS", NNS = "NNS", RB = "RB", |
5140
|
|
|
|
|
|
|
RBR = "RBR", RBS = "RBS", SYM = "SYM", VB = "VB", VBD = "VBD", |
5141
|
|
|
|
|
|
|
VBG = "VBG", VBN = "VBN", VBP = "VBP", VBZ = "VBZ"; |
5142
|
|
|
|
|
|
|
}; |
5143
|
|
|
|
|
|
|
|
5144
|
|
|
|
|
|
|
} // namespace morphodita |
5145
|
|
|
|
|
|
|
|
5146
|
|
|
|
|
|
|
///////// |
5147
|
|
|
|
|
|
|
// File: morphodita/morpho/english_morpho.h |
5148
|
|
|
|
|
|
|
///////// |
5149
|
|
|
|
|
|
|
|
5150
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
5151
|
|
|
|
|
|
|
// |
5152
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
5153
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
5154
|
|
|
|
|
|
|
// |
5155
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
5156
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
5157
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5158
|
|
|
|
|
|
|
|
5159
|
|
|
|
|
|
|
namespace morphodita { |
5160
|
|
|
|
|
|
|
|
5161
|
0
|
|
|
|
|
|
class english_morpho : public morpho { |
5162
|
|
|
|
|
|
|
public: |
5163
|
0
|
0
|
|
|
|
|
english_morpho(unsigned version) : version(version) {} |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5164
|
|
|
|
|
|
|
|
5165
|
|
|
|
|
|
|
virtual int analyze(string_piece form, morpho::guesser_mode guesser, vector& lemmas) const override; |
5166
|
|
|
|
|
|
|
virtual int generate(string_piece lemma, const char* tag_wildcard, guesser_mode guesser, vector& forms) const override; |
5167
|
|
|
|
|
|
|
virtual int raw_lemma_len(string_piece lemma) const override; |
5168
|
|
|
|
|
|
|
virtual int lemma_id_len(string_piece lemma) const override; |
5169
|
|
|
|
|
|
|
virtual int raw_form_len(string_piece form) const override; |
5170
|
|
|
|
|
|
|
virtual tokenizer* new_tokenizer() const override; |
5171
|
|
|
|
|
|
|
|
5172
|
|
|
|
|
|
|
bool load(istream& is); |
5173
|
|
|
|
|
|
|
private: |
5174
|
|
|
|
|
|
|
inline void analyze_special(string_piece form, vector& lemmas) const; |
5175
|
|
|
|
|
|
|
|
5176
|
|
|
|
|
|
|
unsigned version; |
5177
|
|
|
|
|
|
|
morpho_dictionary dictionary; |
5178
|
|
|
|
|
|
|
english_morpho_guesser morpho_guesser; |
5179
|
|
|
|
|
|
|
|
5180
|
|
|
|
|
|
|
string unknown_tag = "UNK"; |
5181
|
|
|
|
|
|
|
string number_tag = "CD", nnp_tag = "NNP", ls_tag = "LS"; |
5182
|
|
|
|
|
|
|
string open_quotation_tag = "``", close_quotation_tag = "''"; |
5183
|
|
|
|
|
|
|
string open_parenthesis_tag = "(", close_parenthesis_tag = ")"; |
5184
|
|
|
|
|
|
|
string comma_tag = ",", dot_tag = ".", punctuation_tag = ":", hash_tag = "#", dollar_tag = "$"; |
5185
|
|
|
|
|
|
|
string sym_tag = "SYM", jj_tag = "JJ", nn_tag = "NN", nns_tag = "NNS", cc_tag = "CC", pos_tag = "POS", in_tag = "IN"; |
5186
|
|
|
|
|
|
|
}; |
5187
|
|
|
|
|
|
|
|
5188
|
|
|
|
|
|
|
} // namespace morphodita |
5189
|
|
|
|
|
|
|
|
5190
|
|
|
|
|
|
|
///////// |
5191
|
|
|
|
|
|
|
// File: morphodita/tokenizer/english_tokenizer.h |
5192
|
|
|
|
|
|
|
///////// |
5193
|
|
|
|
|
|
|
|
5194
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
5195
|
|
|
|
|
|
|
// |
5196
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
5197
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
5198
|
|
|
|
|
|
|
// |
5199
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
5200
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
5201
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5202
|
|
|
|
|
|
|
|
5203
|
|
|
|
|
|
|
namespace morphodita { |
5204
|
|
|
|
|
|
|
|
5205
|
0
|
|
|
|
|
|
class english_tokenizer : public ragel_tokenizer { |
5206
|
|
|
|
|
|
|
public: |
5207
|
|
|
|
|
|
|
enum { LATEST = 2 }; |
5208
|
|
|
|
|
|
|
english_tokenizer(unsigned version); |
5209
|
|
|
|
|
|
|
|
5210
|
|
|
|
|
|
|
virtual bool next_sentence(vector& tokens) override; |
5211
|
|
|
|
|
|
|
|
5212
|
|
|
|
|
|
|
private: |
5213
|
|
|
|
|
|
|
void split_token(vector& tokens); |
5214
|
|
|
|
|
|
|
|
5215
|
|
|
|
|
|
|
static const unordered_set abbreviations; |
5216
|
|
|
|
|
|
|
}; |
5217
|
|
|
|
|
|
|
|
5218
|
|
|
|
|
|
|
} // namespace morphodita |
5219
|
|
|
|
|
|
|
|
5220
|
|
|
|
|
|
|
///////// |
5221
|
|
|
|
|
|
|
// File: morphodita/morpho/english_morpho.cpp |
5222
|
|
|
|
|
|
|
///////// |
5223
|
|
|
|
|
|
|
|
5224
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
5225
|
|
|
|
|
|
|
// |
5226
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
5227
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
5228
|
|
|
|
|
|
|
// |
5229
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
5230
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
5231
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5232
|
|
|
|
|
|
|
|
5233
|
|
|
|
|
|
|
namespace morphodita { |
5234
|
|
|
|
|
|
|
|
5235
|
0
|
|
|
|
|
|
bool english_morpho::load(istream& is) { |
5236
|
|
|
|
|
|
|
binary_decoder data; |
5237
|
0
|
0
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
0
|
|
|
|
|
|
5238
|
|
|
|
|
|
|
|
5239
|
|
|
|
|
|
|
try { |
5240
|
0
|
0
|
|
|
|
|
dictionary.load(data); |
5241
|
0
|
0
|
|
|
|
|
morpho_guesser.load(data); |
|
|
0
|
|
|
|
|
|
5242
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
5243
|
|
|
|
|
|
|
return false; |
5244
|
|
|
|
|
|
|
} |
5245
|
|
|
|
|
|
|
|
5246
|
0
|
|
|
|
|
|
return data.is_end(); |
5247
|
|
|
|
|
|
|
} |
5248
|
|
|
|
|
|
|
|
5249
|
0
|
|
|
|
|
|
int english_morpho::analyze(string_piece form, guesser_mode guesser, vector& lemmas) const { |
5250
|
|
|
|
|
|
|
lemmas.clear(); |
5251
|
|
|
|
|
|
|
|
5252
|
0
|
0
|
|
|
|
|
if (form.len) { |
5253
|
|
|
|
|
|
|
// Generate all casing variants if needed (they are different than given form). |
5254
|
|
|
|
|
|
|
string form_uclc; // first uppercase, rest lowercase |
5255
|
|
|
|
|
|
|
string form_lc; // all lowercase |
5256
|
0
|
0
|
|
|
|
|
generate_casing_variants(form, form_uclc, form_lc); |
5257
|
|
|
|
|
|
|
|
5258
|
|
|
|
|
|
|
// Start by analysing using the dictionary and all casing variants. |
5259
|
0
|
0
|
|
|
|
|
dictionary.analyze(form, lemmas); |
5260
|
0
|
0
|
|
|
|
|
if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas); |
|
|
0
|
|
|
|
|
|
5261
|
0
|
0
|
|
|
|
|
if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas); |
|
|
0
|
|
|
|
|
|
5262
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) |
5263
|
0
|
0
|
|
|
|
|
return guesser == NO_GUESSER || !morpho_guesser.analyze_proper_names(form, form_lc.empty() ? form : form_lc, lemmas) ? NO_GUESSER : GUESSER; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5264
|
|
|
|
|
|
|
|
5265
|
|
|
|
|
|
|
// Then call analyze_special to handle numbers, punctuation and symbols. |
5266
|
0
|
0
|
|
|
|
|
analyze_special(form, lemmas); |
5267
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) return NO_GUESSER; |
5268
|
|
|
|
|
|
|
|
5269
|
|
|
|
|
|
|
// Use English guesser on form_lc if allowed. |
5270
|
0
|
0
|
|
|
|
|
if (guesser == GUESSER) |
5271
|
0
|
0
|
|
|
|
|
morpho_guesser.analyze(form, form_lc.empty() ? form : form_lc, lemmas); |
|
|
0
|
|
|
|
|
|
5272
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) return GUESSER; |
5273
|
|
|
|
|
|
|
} |
5274
|
|
|
|
|
|
|
|
5275
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), unknown_tag); |
5276
|
0
|
|
|
|
|
|
return -1; |
5277
|
|
|
|
|
|
|
} |
5278
|
|
|
|
|
|
|
|
5279
|
0
|
|
|
|
|
|
int english_morpho::generate(string_piece lemma, const char* tag_wildcard, morpho::guesser_mode /*guesser*/, vector& forms) const { |
5280
|
|
|
|
|
|
|
forms.clear(); |
5281
|
|
|
|
|
|
|
|
5282
|
0
|
|
|
|
|
|
tag_filter filter(tag_wildcard); |
5283
|
|
|
|
|
|
|
|
5284
|
0
|
0
|
|
|
|
|
if (lemma.len) { |
5285
|
0
|
0
|
|
|
|
|
if (dictionary.generate(lemma, filter, forms)) |
|
|
0
|
|
|
|
|
|
5286
|
|
|
|
|
|
|
return NO_GUESSER; |
5287
|
|
|
|
|
|
|
} |
5288
|
|
|
|
|
|
|
|
5289
|
|
|
|
|
|
|
return -1; |
5290
|
|
|
|
|
|
|
} |
5291
|
|
|
|
|
|
|
|
5292
|
0
|
|
|
|
|
|
int english_morpho::raw_lemma_len(string_piece lemma) const { |
5293
|
0
|
|
|
|
|
|
return english_lemma_addinfo::raw_lemma_len(lemma); |
5294
|
|
|
|
|
|
|
} |
5295
|
|
|
|
|
|
|
|
5296
|
0
|
|
|
|
|
|
int english_morpho::lemma_id_len(string_piece lemma) const { |
5297
|
0
|
|
|
|
|
|
return english_lemma_addinfo::lemma_id_len(lemma); |
5298
|
|
|
|
|
|
|
} |
5299
|
|
|
|
|
|
|
|
5300
|
0
|
|
|
|
|
|
int english_morpho::raw_form_len(string_piece form) const { |
5301
|
0
|
|
|
|
|
|
return form.len; |
5302
|
|
|
|
|
|
|
} |
5303
|
|
|
|
|
|
|
|
5304
|
0
|
|
|
|
|
|
tokenizer* english_morpho::new_tokenizer() const { |
5305
|
0
|
0
|
|
|
|
|
return new english_tokenizer(version <= 2 ? 1 : 2); |
5306
|
|
|
|
|
|
|
} |
5307
|
|
|
|
|
|
|
|
5308
|
0
|
|
|
|
|
|
void english_morpho::analyze_special(string_piece form, vector& lemmas) const { |
5309
|
|
|
|
|
|
|
using namespace unilib; |
5310
|
|
|
|
|
|
|
|
5311
|
|
|
|
|
|
|
// Analyzer for numbers and punctuation. |
5312
|
0
|
0
|
|
|
|
|
if (!form.len) return; |
5313
|
|
|
|
|
|
|
|
5314
|
|
|
|
|
|
|
// One-letter punctuation exceptions. |
5315
|
0
|
0
|
|
|
|
|
if (form.len == 1) |
5316
|
0
|
|
|
|
|
|
switch(*form.str) { |
5317
|
|
|
|
|
|
|
case '.': |
5318
|
|
|
|
|
|
|
case '!': |
5319
|
0
|
0
|
|
|
|
|
case '?': lemmas.emplace_back(string(form.str, form.len), dot_tag); return; |
5320
|
0
|
0
|
|
|
|
|
case ',': lemmas.emplace_back(string(form.str, form.len), comma_tag); return; |
5321
|
0
|
0
|
|
|
|
|
case '#': lemmas.emplace_back(string(form.str, form.len), hash_tag); return; |
5322
|
0
|
0
|
|
|
|
|
case '$': lemmas.emplace_back(string(form.str, form.len), dollar_tag); return; |
5323
|
0
|
0
|
|
|
|
|
case '[': lemmas.emplace_back(string(form.str, form.len), sym_tag); return; |
5324
|
0
|
0
|
|
|
|
|
case ']': lemmas.emplace_back(string(form.str, form.len), sym_tag); return; |
5325
|
0
|
0
|
|
|
|
|
case '%': lemmas.emplace_back(string(form.str, form.len), jj_tag); |
5326
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), nn_tag); return; |
5327
|
0
|
0
|
|
|
|
|
case '&': lemmas.emplace_back(string(form.str, form.len), cc_tag); |
5328
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), sym_tag); return; |
5329
|
0
|
0
|
|
|
|
|
case '*': lemmas.emplace_back(string(form.str, form.len), sym_tag); |
5330
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), nn_tag); return; |
5331
|
0
|
0
|
|
|
|
|
case '@': lemmas.emplace_back(string(form.str, form.len), sym_tag); |
5332
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), in_tag); return; |
5333
|
0
|
0
|
|
|
|
|
case '\'': lemmas.emplace_back(string(form.str, form.len), close_quotation_tag); |
5334
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), pos_tag); return; |
5335
|
|
|
|
|
|
|
} |
5336
|
|
|
|
|
|
|
|
5337
|
|
|
|
|
|
|
// Try matching a number: [+-]? is_Pn* (, is_Pn{3})? (. is_Pn*)? (s | [Ee] [+-]? is_Pn+)? with at least one digit |
5338
|
0
|
|
|
|
|
|
string_piece number = form; |
5339
|
0
|
|
|
|
|
|
char32_t codepoint = utf8::decode(number.str, number.len); |
5340
|
|
|
|
|
|
|
bool any_digit = false; |
5341
|
0
|
0
|
|
|
|
|
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len); |
5342
|
0
|
0
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
5343
|
0
|
0
|
|
|
|
|
while (codepoint == ',') { |
5344
|
0
|
|
|
|
|
|
string_piece group = number; |
5345
|
0
|
0
|
|
|
|
|
if (unicode::category(utf8::decode(group.str, group.len) & ~unicode::N)) break; |
5346
|
0
|
0
|
|
|
|
|
if (unicode::category(utf8::decode(group.str, group.len) & ~unicode::N)) break; |
5347
|
0
|
0
|
|
|
|
|
if (unicode::category(utf8::decode(group.str, group.len) & ~unicode::N)) break; |
5348
|
|
|
|
|
|
|
any_digit = true; |
5349
|
0
|
|
|
|
|
|
number = group; |
5350
|
0
|
|
|
|
|
|
codepoint = utf8::decode(number.str, number.len); |
5351
|
|
|
|
|
|
|
} |
5352
|
0
|
0
|
|
|
|
|
if (codepoint == '.' && number.len) { |
|
|
0
|
|
|
|
|
|
5353
|
0
|
|
|
|
|
|
codepoint = utf8::decode(number.str, number.len); |
5354
|
0
|
0
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
5355
|
|
|
|
|
|
|
} |
5356
|
0
|
0
|
|
|
|
|
if (version >= 2 && any_digit && codepoint == 's' && !number.len) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5357
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), number_tag); |
5358
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len - 1), nns_tag); |
5359
|
0
|
|
|
|
|
|
return; |
5360
|
|
|
|
|
|
|
} |
5361
|
0
|
0
|
|
|
|
|
if (any_digit && (codepoint == 'e' || codepoint == 'E')) { |
|
|
0
|
|
|
|
|
|
5362
|
0
|
|
|
|
|
|
codepoint = utf8::decode(number.str, number.len); |
5363
|
0
|
0
|
|
|
|
|
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len); |
5364
|
|
|
|
|
|
|
any_digit = false; |
5365
|
0
|
0
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
5366
|
|
|
|
|
|
|
} |
5367
|
0
|
0
|
|
|
|
|
if (any_digit && !number.len && (!codepoint || codepoint == '.')) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5368
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), number_tag); |
5369
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), nnp_tag); |
5370
|
0
|
0
|
|
|
|
|
if (form.len == 1 + (codepoint == '.') && *form.str >= '1' && *form.str <= '9') |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5371
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), ls_tag); |
5372
|
|
|
|
|
|
|
return; |
5373
|
|
|
|
|
|
|
} |
5374
|
|
|
|
|
|
|
|
5375
|
|
|
|
|
|
|
// Open quotation, end quotation, open parentheses, end parentheses, symbol, or other |
5376
|
0
|
|
|
|
|
|
string_piece punctuation = form; |
5377
|
|
|
|
|
|
|
bool open_quotation = true, close_quotation = true, open_parenthesis = true, close_parenthesis = true, any_punctuation = true, symbol = true; |
5378
|
0
|
0
|
|
|
|
|
while ((symbol || any_punctuation) && punctuation.len) { |
|
|
0
|
|
|
|
|
|
5379
|
0
|
|
|
|
|
|
codepoint = utf8::decode(punctuation.str, punctuation.len); |
5380
|
0
|
0
|
|
|
|
|
if (open_quotation) open_quotation = codepoint == '`' || unicode::category(codepoint) & unicode::Pi; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5381
|
0
|
0
|
|
|
|
|
if (close_quotation) close_quotation = codepoint == '\'' || codepoint == '"' || unicode::category(codepoint) & unicode::Pf; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5382
|
0
|
0
|
|
|
|
|
if (open_parenthesis) open_parenthesis = unicode::category(codepoint) & unicode::Ps; |
5383
|
0
|
0
|
|
|
|
|
if (close_parenthesis) close_parenthesis = unicode::category(codepoint) & unicode::Pe; |
5384
|
0
|
0
|
|
|
|
|
if (any_punctuation) any_punctuation = unicode::category(codepoint) & unicode::P; |
5385
|
0
|
0
|
|
|
|
|
if (symbol) symbol = codepoint == '*' || unicode::category(codepoint) & unicode::S; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5386
|
|
|
|
|
|
|
} |
5387
|
0
|
0
|
|
|
|
|
if (!punctuation.len && open_quotation) { lemmas.emplace_back(string(form.str, form.len), open_quotation_tag); return; } |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5388
|
0
|
0
|
|
|
|
|
if (!punctuation.len && close_quotation) { lemmas.emplace_back(string(form.str, form.len), close_quotation_tag); return; } |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5389
|
0
|
0
|
|
|
|
|
if (!punctuation.len && open_parenthesis) { lemmas.emplace_back(string(form.str, form.len), open_parenthesis_tag); return; } |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5390
|
0
|
0
|
|
|
|
|
if (!punctuation.len && close_parenthesis) { lemmas.emplace_back(string(form.str, form.len), close_parenthesis_tag); return; } |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5391
|
0
|
0
|
|
|
|
|
if (!punctuation.len && symbol) { lemmas.emplace_back(string(form.str, form.len), sym_tag); return; } |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5392
|
0
|
0
|
|
|
|
|
if (!punctuation.len && any_punctuation) { lemmas.emplace_back(string(form.str, form.len), punctuation_tag); return; } |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5393
|
|
|
|
|
|
|
} |
5394
|
|
|
|
|
|
|
|
5395
|
|
|
|
|
|
|
} // namespace morphodita |
5396
|
|
|
|
|
|
|
|
5397
|
|
|
|
|
|
|
///////// |
5398
|
|
|
|
|
|
|
// File: morphodita/morpho/english_morpho_guesser.cpp |
5399
|
|
|
|
|
|
|
///////// |
5400
|
|
|
|
|
|
|
|
5401
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
5402
|
|
|
|
|
|
|
// |
5403
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
5404
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
5405
|
|
|
|
|
|
|
// |
5406
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
5407
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
5408
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5409
|
|
|
|
|
|
|
|
5410
|
|
|
|
|
|
|
// This code is a reimplementation of morphologic analyzer Morphium |
5411
|
|
|
|
|
|
|
// by Johanka Spoustova (Treex::Tool::EnglishMorpho::Analysis Perl module) |
5412
|
|
|
|
|
|
|
// and reimplementation of morphologic lemmatizer by Martin Popel |
5413
|
|
|
|
|
|
|
// (Treex::Tool::EnglishMorpho::Lemmatizer Perl module). The latter is based |
5414
|
|
|
|
|
|
|
// on morpha: |
5415
|
|
|
|
|
|
|
// Minnen, G., J. Carroll and D. Pearce (2001). Applied morphological |
5416
|
|
|
|
|
|
|
// processing of English, Natural Language Engineering, 7(3). 207-223. |
5417
|
|
|
|
|
|
|
// Morpha has been released under LGPL as a part of RASP system |
5418
|
|
|
|
|
|
|
// http://ilexir.co.uk/applications/rasp/. |
5419
|
|
|
|
|
|
|
|
5420
|
|
|
|
|
|
|
namespace morphodita { |
5421
|
|
|
|
|
|
|
|
5422
|
0
|
|
|
|
|
|
void english_morpho_guesser::load(binary_decoder& data) { |
5423
|
0
|
|
|
|
|
|
unsigned tags = data.next_2B(); |
5424
|
0
|
|
|
|
|
|
exceptions_tags.clear(); |
5425
|
0
|
|
|
|
|
|
exceptions_tags.reserve(tags); |
5426
|
0
|
0
|
|
|
|
|
while (tags--) { |
5427
|
0
|
|
|
|
|
|
unsigned len = data.next_1B(); |
5428
|
0
|
0
|
|
|
|
|
exceptions_tags.emplace_back(string(data.next(len), len)); |
5429
|
|
|
|
|
|
|
} |
5430
|
|
|
|
|
|
|
|
5431
|
0
|
|
|
|
|
|
exceptions.load(data); |
5432
|
0
|
|
|
|
|
|
negations.load(data); |
5433
|
0
|
|
|
|
|
|
} |
5434
|
|
|
|
|
|
|
|
5435
|
|
|
|
|
|
|
static const char _tag_guesser_actions[] = { |
5436
|
|
|
|
|
|
|
0, 1, 0, 1, 1, 1, 2, 1, |
5437
|
|
|
|
|
|
|
3, 1, 4, 1, 5, 1, 6, 1, |
5438
|
|
|
|
|
|
|
7, 2, 2, 6, 2, 2, 7, 2, |
5439
|
|
|
|
|
|
|
4, 6, 2, 4, 7, 2, 5, 6, |
5440
|
|
|
|
|
|
|
2, 5, 7, 2, 6, 7, 3, 2, |
5441
|
|
|
|
|
|
|
6, 7, 3, 4, 6, 7, 3, 5, |
5442
|
|
|
|
|
|
|
6, 7 |
5443
|
|
|
|
|
|
|
}; |
5444
|
|
|
|
|
|
|
|
5445
|
|
|
|
|
|
|
static const unsigned char _tag_guesser_key_offsets[] = { |
5446
|
|
|
|
|
|
|
0, 19, 26, 34, 42, 50, 58, 66, |
5447
|
|
|
|
|
|
|
74, 82, 90, 100, 108, 116, 124, 132, |
5448
|
|
|
|
|
|
|
145, 153, 161, 168, 179, 195, 212, 220, |
5449
|
|
|
|
|
|
|
228, 236 |
5450
|
|
|
|
|
|
|
}; |
5451
|
|
|
|
|
|
|
|
5452
|
|
|
|
|
|
|
static const char _tag_guesser_trans_keys[] = { |
5453
|
|
|
|
|
|
|
45, 46, 99, 100, 103, 105, 109, 110, |
5454
|
|
|
|
|
|
|
114, 115, 116, 118, 120, 48, 57, 65, |
5455
|
|
|
|
|
|
|
90, 97, 122, 45, 48, 57, 65, 90, |
5456
|
|
|
|
|
|
|
97, 122, 45, 114, 48, 57, 65, 90, |
5457
|
|
|
|
|
|
|
97, 122, 45, 111, 48, 57, 65, 90, |
5458
|
|
|
|
|
|
|
97, 122, 45, 109, 48, 57, 65, 90, |
5459
|
|
|
|
|
|
|
97, 122, 45, 101, 48, 57, 65, 90, |
5460
|
|
|
|
|
|
|
97, 122, 45, 115, 48, 57, 65, 90, |
5461
|
|
|
|
|
|
|
97, 122, 45, 101, 48, 57, 65, 90, |
5462
|
|
|
|
|
|
|
97, 122, 45, 108, 48, 57, 65, 90, |
5463
|
|
|
|
|
|
|
97, 122, 45, 115, 48, 57, 65, 90, |
5464
|
|
|
|
|
|
|
97, 122, 45, 97, 101, 111, 48, 57, |
5465
|
|
|
|
|
|
|
65, 90, 98, 122, 45, 101, 48, 57, |
5466
|
|
|
|
|
|
|
65, 90, 97, 122, 45, 108, 48, 57, |
5467
|
|
|
|
|
|
|
65, 90, 97, 122, 45, 109, 48, 57, |
5468
|
|
|
|
|
|
|
65, 90, 97, 122, 45, 105, 48, 57, |
5469
|
|
|
|
|
|
|
65, 90, 97, 122, 45, 97, 101, 105, |
5470
|
|
|
|
|
|
|
111, 117, 121, 48, 57, 65, 90, 98, |
5471
|
|
|
|
|
|
|
122, 45, 115, 48, 57, 65, 90, 97, |
5472
|
|
|
|
|
|
|
122, 45, 101, 48, 57, 65, 90, 97, |
5473
|
|
|
|
|
|
|
122, 45, 48, 57, 65, 90, 97, 122, |
5474
|
|
|
|
|
|
|
45, 101, 114, 115, 116, 48, 57, 65, |
5475
|
|
|
|
|
|
|
90, 97, 122, 45, 46, 105, 109, 118, |
5476
|
|
|
|
|
|
|
120, 48, 57, 65, 90, 97, 98, 99, |
5477
|
|
|
|
|
|
|
100, 101, 122, 45, 46, 101, 105, 109, |
5478
|
|
|
|
|
|
|
118, 120, 48, 57, 65, 90, 97, 98, |
5479
|
|
|
|
|
|
|
99, 100, 102, 122, 45, 110, 48, 57, |
5480
|
|
|
|
|
|
|
65, 90, 97, 122, 45, 105, 48, 57, |
5481
|
|
|
|
|
|
|
65, 90, 97, 122, 45, 101, 48, 57, |
5482
|
|
|
|
|
|
|
65, 90, 97, 122, 45, 115, 48, 57, |
5483
|
|
|
|
|
|
|
65, 90, 97, 122, 0 |
5484
|
|
|
|
|
|
|
}; |
5485
|
|
|
|
|
|
|
|
5486
|
|
|
|
|
|
|
static const char _tag_guesser_single_lengths[] = { |
5487
|
|
|
|
|
|
|
13, 1, 2, 2, 2, 2, 2, 2, |
5488
|
|
|
|
|
|
|
2, 2, 4, 2, 2, 2, 2, 7, |
5489
|
|
|
|
|
|
|
2, 2, 1, 5, 6, 7, 2, 2, |
5490
|
|
|
|
|
|
|
2, 2 |
5491
|
|
|
|
|
|
|
}; |
5492
|
|
|
|
|
|
|
|
5493
|
|
|
|
|
|
|
static const char _tag_guesser_range_lengths[] = { |
5494
|
|
|
|
|
|
|
3, 3, 3, 3, 3, 3, 3, 3, |
5495
|
|
|
|
|
|
|
3, 3, 3, 3, 3, 3, 3, 3, |
5496
|
|
|
|
|
|
|
3, 3, 3, 3, 5, 5, 3, 3, |
5497
|
|
|
|
|
|
|
3, 3 |
5498
|
|
|
|
|
|
|
}; |
5499
|
|
|
|
|
|
|
|
5500
|
|
|
|
|
|
|
static const unsigned char _tag_guesser_index_offsets[] = { |
5501
|
|
|
|
|
|
|
0, 17, 22, 28, 34, 40, 46, 52, |
5502
|
|
|
|
|
|
|
58, 64, 70, 78, 84, 90, 96, 102, |
5503
|
|
|
|
|
|
|
113, 119, 125, 130, 139, 151, 164, 170, |
5504
|
|
|
|
|
|
|
176, 182 |
5505
|
|
|
|
|
|
|
}; |
5506
|
|
|
|
|
|
|
|
5507
|
|
|
|
|
|
|
static const char _tag_guesser_indicies[] = { |
5508
|
|
|
|
|
|
|
1, 2, 5, 6, 7, 5, 5, 8, |
5509
|
|
|
|
|
|
|
9, 10, 11, 5, 5, 3, 4, 4, |
5510
|
|
|
|
|
|
|
0, 13, 14, 15, 15, 12, 13, 16, |
5511
|
|
|
|
|
|
|
14, 15, 15, 12, 13, 17, 14, 15, |
5512
|
|
|
|
|
|
|
15, 12, 13, 18, 14, 15, 15, 12, |
5513
|
|
|
|
|
|
|
13, 18, 14, 15, 15, 12, 13, 19, |
5514
|
|
|
|
|
|
|
14, 15, 15, 12, 13, 20, 14, 15, |
5515
|
|
|
|
|
|
|
15, 12, 13, 18, 14, 15, 15, 12, |
5516
|
|
|
|
|
|
|
13, 21, 14, 15, 15, 12, 13, 22, |
5517
|
|
|
|
|
|
|
23, 24, 14, 15, 15, 12, 13, 25, |
5518
|
|
|
|
|
|
|
14, 15, 15, 12, 13, 23, 14, 15, |
5519
|
|
|
|
|
|
|
15, 12, 13, 23, 14, 15, 15, 12, |
5520
|
|
|
|
|
|
|
13, 26, 14, 15, 15, 12, 28, 15, |
5521
|
|
|
|
|
|
|
15, 15, 15, 15, 15, 29, 26, 26, |
5522
|
|
|
|
|
|
|
27, 31, 4, 32, 33, 33, 30, 13, |
5523
|
|
|
|
|
|
|
23, 14, 15, 15, 12, 13, 14, 15, |
5524
|
|
|
|
|
|
|
15, 12, 13, 34, 35, 36, 37, 14, |
5525
|
|
|
|
|
|
|
15, 15, 12, 13, 38, 39, 39, 39, |
5526
|
|
|
|
|
|
|
39, 14, 15, 15, 39, 15, 12, 13, |
5527
|
|
|
|
|
|
|
38, 40, 39, 39, 39, 39, 14, 15, |
5528
|
|
|
|
|
|
|
15, 39, 15, 12, 13, 41, 14, 15, |
5529
|
|
|
|
|
|
|
15, 12, 13, 42, 14, 15, 15, 12, |
5530
|
|
|
|
|
|
|
13, 18, 14, 15, 15, 12, 13, 43, |
5531
|
|
|
|
|
|
|
14, 15, 15, 12, 0 |
5532
|
|
|
|
|
|
|
}; |
5533
|
|
|
|
|
|
|
|
5534
|
|
|
|
|
|
|
static const char _tag_guesser_trans_targs[] = { |
5535
|
|
|
|
|
|
|
18, 19, 20, 18, 18, 20, 21, 22, |
5536
|
|
|
|
|
|
|
23, 24, 16, 25, 18, 19, 18, 1, |
5537
|
|
|
|
|
|
|
3, 4, 18, 7, 8, 10, 11, 18, |
5538
|
|
|
|
|
|
|
13, 12, 18, 18, 19, 18, 18, 19, |
5539
|
|
|
|
|
|
|
18, 18, 2, 5, 6, 9, 20, 20, |
5540
|
|
|
|
|
|
|
18, 14, 15, 17 |
5541
|
|
|
|
|
|
|
}; |
5542
|
|
|
|
|
|
|
|
5543
|
|
|
|
|
|
|
static const char _tag_guesser_trans_actions[] = { |
5544
|
|
|
|
|
|
|
29, 46, 29, 32, 11, 11, 11, 11, |
5545
|
|
|
|
|
|
|
11, 11, 0, 11, 13, 35, 15, 0, |
5546
|
|
|
|
|
|
|
0, 0, 1, 0, 0, 0, 0, 3, |
5547
|
|
|
|
|
|
|
0, 0, 5, 17, 38, 20, 23, 42, |
5548
|
|
|
|
|
|
|
26, 9, 0, 0, 0, 0, 13, 0, |
5549
|
|
|
|
|
|
|
7, 0, 0, 0 |
5550
|
|
|
|
|
|
|
}; |
5551
|
|
|
|
|
|
|
|
5552
|
|
|
|
|
|
|
static const char _tag_guesser_eof_actions[] = { |
5553
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
5554
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
5555
|
|
|
|
|
|
|
0, 0, 0, 0, 15, 15, 0, 0, |
5556
|
|
|
|
|
|
|
0, 0 |
5557
|
|
|
|
|
|
|
}; |
5558
|
|
|
|
|
|
|
|
5559
|
|
|
|
|
|
|
static const int tag_guesser_start = 0; |
5560
|
|
|
|
|
|
|
|
5561
|
0
|
|
|
|
|
|
void english_morpho_guesser::analyze(string_piece form, string_piece form_lc, vector& lemmas) const { |
5562
|
|
|
|
|
|
|
// Try exceptions list |
5563
|
0
|
|
|
|
|
|
auto* exception = exceptions.at(form_lc.str, form_lc.len, [](pointer_decoder& data){ |
5564
|
0
|
0
|
|
|
|
|
for (unsigned len = data.next_1B(); len; len--) { |
5565
|
|
|
|
|
|
|
data.next(data.next_1B()); |
5566
|
|
|
|
|
|
|
data.next(data.next_1B()); |
5567
|
|
|
|
|
|
|
} |
5568
|
0
|
|
|
|
|
|
}); |
5569
|
|
|
|
|
|
|
|
5570
|
0
|
0
|
|
|
|
|
if (exception) { |
5571
|
|
|
|
|
|
|
// Found in exceptions list |
5572
|
|
|
|
|
|
|
pointer_decoder data(exception); |
5573
|
0
|
0
|
|
|
|
|
for (unsigned len = data.next_1B(); len; len--) { |
5574
|
|
|
|
|
|
|
unsigned lemma_len = data.next_1B(); |
5575
|
0
|
|
|
|
|
|
string lemma(data.next(lemma_len), lemma_len); |
5576
|
0
|
0
|
|
|
|
|
for (unsigned tags = data.next_1B(); tags; tags--) |
5577
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(lemma, exceptions_tags[data.next_2B()]); |
5578
|
|
|
|
|
|
|
} |
5579
|
|
|
|
|
|
|
} else { |
5580
|
|
|
|
|
|
|
// Try stripping negative prefix and use rule guesser |
5581
|
|
|
|
|
|
|
string lemma_lc(form_lc.str, form_lc.len); |
5582
|
|
|
|
|
|
|
// Try finding negative prefix |
5583
|
|
|
|
|
|
|
unsigned negation_len = 0; |
5584
|
0
|
0
|
|
|
|
|
for (unsigned prefix = 1; prefix <= form_lc.len; prefix++) { |
5585
|
0
|
|
|
|
|
|
auto found = negations.at(form_lc.str, prefix, [](pointer_decoder& data){ data.next(TOTAL); }); |
5586
|
0
|
0
|
|
|
|
|
if (!found) break; |
5587
|
0
|
0
|
|
|
|
|
if (found[NEGATION_LEN]) { |
5588
|
0
|
0
|
|
|
|
|
if (form_lc.len - prefix >= found[TO_FOLLOW]) negation_len = found[NEGATION_LEN]; |
5589
|
|
|
|
|
|
|
} |
5590
|
|
|
|
|
|
|
} |
5591
|
|
|
|
|
|
|
|
5592
|
|
|
|
|
|
|
// Add default tags |
5593
|
0
|
|
|
|
|
|
add(FW, lemma_lc, lemmas); |
5594
|
0
|
0
|
|
|
|
|
add(JJ, lemma_lc, negation_len, lemmas); |
5595
|
0
|
0
|
|
|
|
|
add(RB, lemma_lc, negation_len, lemmas); |
5596
|
0
|
0
|
|
|
|
|
add(NN, lemma_lc, negation_len, lemmas); |
5597
|
0
|
0
|
|
|
|
|
add_NNS(lemma_lc, negation_len, lemmas); |
5598
|
|
|
|
|
|
|
|
5599
|
|
|
|
|
|
|
// Add specialized tags |
5600
|
|
|
|
|
|
|
const char* p = form_lc.str; int cs; |
5601
|
|
|
|
|
|
|
bool added_JJR_RBR = false, added_JJS_RBS = false, added_SYM = false, added_CD = false; |
5602
|
|
|
|
|
|
|
|
5603
|
|
|
|
|
|
|
{ |
5604
|
|
|
|
|
|
|
cs = tag_guesser_start; |
5605
|
|
|
|
|
|
|
} |
5606
|
|
|
|
|
|
|
|
5607
|
|
|
|
|
|
|
{ |
5608
|
|
|
|
|
|
|
int _klen; |
5609
|
|
|
|
|
|
|
unsigned int _trans; |
5610
|
|
|
|
|
|
|
const char *_acts; |
5611
|
|
|
|
|
|
|
unsigned int _nacts; |
5612
|
|
|
|
|
|
|
const char *_keys; |
5613
|
|
|
|
|
|
|
|
5614
|
0
|
0
|
|
|
|
|
if ( p == ( (form_lc.str + form_lc.len)) ) |
5615
|
|
|
|
|
|
|
goto _test_eof; |
5616
|
|
|
|
|
|
|
_resume: |
5617
|
0
|
|
|
|
|
|
_keys = _tag_guesser_trans_keys + _tag_guesser_key_offsets[cs]; |
5618
|
0
|
|
|
|
|
|
_trans = _tag_guesser_index_offsets[cs]; |
5619
|
|
|
|
|
|
|
|
5620
|
0
|
|
|
|
|
|
_klen = _tag_guesser_single_lengths[cs]; |
5621
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
5622
|
|
|
|
|
|
|
const char *_lower = _keys; |
5623
|
|
|
|
|
|
|
const char *_mid; |
5624
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
5625
|
|
|
|
|
|
|
while (1) { |
5626
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
5627
|
|
|
|
|
|
|
break; |
5628
|
|
|
|
|
|
|
|
5629
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
5630
|
0
|
0
|
|
|
|
|
if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) < *_mid ) |
5631
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
5632
|
0
|
0
|
|
|
|
|
else if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) > *_mid ) |
5633
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
5634
|
|
|
|
|
|
|
else { |
5635
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
5636
|
0
|
|
|
|
|
|
goto _match; |
5637
|
|
|
|
|
|
|
} |
5638
|
|
|
|
|
|
|
} |
5639
|
0
|
|
|
|
|
|
_keys += _klen; |
5640
|
0
|
|
|
|
|
|
_trans += _klen; |
5641
|
|
|
|
|
|
|
} |
5642
|
|
|
|
|
|
|
|
5643
|
0
|
|
|
|
|
|
_klen = _tag_guesser_range_lengths[cs]; |
5644
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
5645
|
|
|
|
|
|
|
const char *_lower = _keys; |
5646
|
|
|
|
|
|
|
const char *_mid; |
5647
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
5648
|
|
|
|
|
|
|
while (1) { |
5649
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
5650
|
|
|
|
|
|
|
break; |
5651
|
|
|
|
|
|
|
|
5652
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
5653
|
0
|
0
|
|
|
|
|
if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) < _mid[0] ) |
5654
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
5655
|
0
|
0
|
|
|
|
|
else if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) > _mid[1] ) |
5656
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
5657
|
|
|
|
|
|
|
else { |
5658
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
5659
|
0
|
|
|
|
|
|
goto _match; |
5660
|
|
|
|
|
|
|
} |
5661
|
|
|
|
|
|
|
} |
5662
|
0
|
|
|
|
|
|
_trans += _klen; |
5663
|
|
|
|
|
|
|
} |
5664
|
|
|
|
|
|
|
|
5665
|
|
|
|
|
|
|
_match: |
5666
|
0
|
|
|
|
|
|
_trans = _tag_guesser_indicies[_trans]; |
5667
|
0
|
|
|
|
|
|
cs = _tag_guesser_trans_targs[_trans]; |
5668
|
|
|
|
|
|
|
|
5669
|
0
|
0
|
|
|
|
|
if ( _tag_guesser_trans_actions[_trans] == 0 ) |
5670
|
|
|
|
|
|
|
goto _again; |
5671
|
|
|
|
|
|
|
|
5672
|
0
|
|
|
|
|
|
_acts = _tag_guesser_actions + _tag_guesser_trans_actions[_trans]; |
5673
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
5674
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
5675
|
|
|
|
|
|
|
{ |
5676
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
5677
|
|
|
|
|
|
|
{ |
5678
|
|
|
|
|
|
|
case 0: |
5679
|
0
|
0
|
|
|
|
|
{ if (!added_JJR_RBR) added_JJR_RBR = true, add_JJR_RBR(lemma_lc, negation_len, lemmas); } |
|
|
0
|
|
|
|
|
|
5680
|
|
|
|
|
|
|
break; |
5681
|
|
|
|
|
|
|
case 1: |
5682
|
0
|
0
|
|
|
|
|
{ if (!added_JJS_RBS) added_JJS_RBS = true, add_JJS_RBS(lemma_lc, negation_len, lemmas); } |
|
|
0
|
|
|
|
|
|
5683
|
|
|
|
|
|
|
break; |
5684
|
|
|
|
|
|
|
case 2: |
5685
|
0
|
0
|
|
|
|
|
{ add_VBG(lemma_lc, lemmas); } |
5686
|
|
|
|
|
|
|
break; |
5687
|
|
|
|
|
|
|
case 3: |
5688
|
0
|
0
|
|
|
|
|
{ add_VBD_VBN(lemma_lc, lemmas); } |
5689
|
|
|
|
|
|
|
break; |
5690
|
|
|
|
|
|
|
case 4: |
5691
|
0
|
0
|
|
|
|
|
{ add_VBZ(lemma_lc, lemmas); } |
5692
|
|
|
|
|
|
|
break; |
5693
|
|
|
|
|
|
|
case 5: |
5694
|
0
|
|
|
|
|
|
{ add(VB, lemma_lc, lemmas); add(VBP, lemma_lc, lemmas); } |
5695
|
|
|
|
|
|
|
break; |
5696
|
|
|
|
|
|
|
case 6: |
5697
|
0
|
0
|
|
|
|
|
{ if (!added_SYM) added_SYM = true, add(SYM, lemma_lc, lemmas); } |
5698
|
|
|
|
|
|
|
break; |
5699
|
|
|
|
|
|
|
case 7: |
5700
|
0
|
0
|
|
|
|
|
{ if (!added_CD) added_CD = true, add(CD, lemma_lc, lemmas); } |
5701
|
|
|
|
|
|
|
break; |
5702
|
|
|
|
|
|
|
} |
5703
|
|
|
|
|
|
|
} |
5704
|
|
|
|
|
|
|
|
5705
|
|
|
|
|
|
|
_again: |
5706
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form_lc.str + form_lc.len)) ) |
5707
|
|
|
|
|
|
|
goto _resume; |
5708
|
|
|
|
|
|
|
_test_eof: {} |
5709
|
0
|
0
|
|
|
|
|
if ( p == ( (form_lc.str + form_lc.len)) ) |
5710
|
|
|
|
|
|
|
{ |
5711
|
0
|
|
|
|
|
|
const char *__acts = _tag_guesser_actions + _tag_guesser_eof_actions[cs]; |
5712
|
0
|
|
|
|
|
|
unsigned int __nacts = (unsigned int) *__acts++; |
5713
|
0
|
0
|
|
|
|
|
while ( __nacts-- > 0 ) { |
5714
|
0
|
0
|
|
|
|
|
switch ( *__acts++ ) { |
5715
|
|
|
|
|
|
|
case 7: |
5716
|
0
|
0
|
|
|
|
|
{ if (!added_CD) added_CD = true, add(CD, lemma_lc, lemmas); } |
5717
|
|
|
|
|
|
|
break; |
5718
|
|
|
|
|
|
|
} |
5719
|
|
|
|
|
|
|
} |
5720
|
|
|
|
|
|
|
} |
5721
|
|
|
|
|
|
|
|
5722
|
|
|
|
|
|
|
} |
5723
|
|
|
|
|
|
|
|
5724
|
|
|
|
|
|
|
} |
5725
|
|
|
|
|
|
|
|
5726
|
|
|
|
|
|
|
// Add proper names |
5727
|
0
|
|
|
|
|
|
analyze_proper_names(form, form_lc, lemmas); |
5728
|
0
|
|
|
|
|
|
} |
5729
|
|
|
|
|
|
|
|
5730
|
0
|
|
|
|
|
|
bool english_morpho_guesser::analyze_proper_names(string_piece form, string_piece form_lc, vector& lemmas) const { |
5731
|
|
|
|
|
|
|
// NNP if form_lc != form or form.str[0] =~ /[0-9']/, NNPS if form_lc != form |
5732
|
0
|
0
|
|
|
|
|
bool is_NNP = form.str != form_lc.str || (form.len && (*form.str == '\'' || (*form.str >= '0' && *form.str <= '9'))); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5733
|
0
|
|
|
|
|
|
bool is_NNPS = form.str != form_lc.str; |
5734
|
0
|
0
|
|
|
|
|
if (!is_NNP && !is_NNPS) return false; |
5735
|
|
|
|
|
|
|
|
5736
|
|
|
|
|
|
|
bool was_NNP = false, was_NNPS = false; |
5737
|
0
|
0
|
|
|
|
|
for (auto&& lemma : lemmas) { |
5738
|
0
|
|
|
|
|
|
was_NNP |= lemma.tag == NNP; |
5739
|
0
|
|
|
|
|
|
was_NNPS |= lemma.tag == NNPS; |
5740
|
|
|
|
|
|
|
} |
5741
|
0
|
0
|
|
|
|
|
if (!((is_NNP && !was_NNP) || (is_NNPS && !was_NNPS))) return false; |
|
|
0
|
|
|
|
|
|
5742
|
|
|
|
|
|
|
|
5743
|
|
|
|
|
|
|
string lemma(form.str, form.len); |
5744
|
0
|
0
|
|
|
|
|
if (is_NNP && !was_NNP) add(NNP, lemma, lemmas); |
5745
|
0
|
0
|
|
|
|
|
if (is_NNPS && !was_NNPS) add_NNPS(lemma, lemmas); |
|
|
0
|
|
|
|
|
|
5746
|
|
|
|
|
|
|
return true; |
5747
|
|
|
|
|
|
|
} |
5748
|
|
|
|
|
|
|
|
5749
|
|
|
|
|
|
|
inline void english_morpho_guesser::add(const string& tag, const string& form, vector& lemmas) const { |
5750
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(form, tag); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5751
|
|
|
|
|
|
|
} |
5752
|
|
|
|
|
|
|
|
5753
|
|
|
|
|
|
|
inline void english_morpho_guesser::add(const string& tag, const string& tag2, const string& form, vector& lemmas) const { |
5754
|
|
|
|
|
|
|
add(tag, form, lemmas); |
5755
|
|
|
|
|
|
|
add(tag2, form, lemmas); |
5756
|
|
|
|
|
|
|
} |
5757
|
|
|
|
|
|
|
|
5758
|
0
|
|
|
|
|
|
inline void english_morpho_guesser::add(const string& tag, const string& form, unsigned negation_len, vector& lemmas) const { |
5759
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5760
|
0
|
|
|
|
|
|
} |
5761
|
|
|
|
|
|
|
|
5762
|
0
|
|
|
|
|
|
inline void english_morpho_guesser::add(const string& tag, const string& tag2, const string& form, unsigned negation_len, vector& lemmas) const { |
5763
|
0
|
|
|
|
|
|
add(tag, form, negation_len, lemmas); |
5764
|
0
|
|
|
|
|
|
add(tag2, form, negation_len, lemmas); |
5765
|
0
|
|
|
|
|
|
} |
5766
|
|
|
|
|
|
|
|
5767
|
|
|
|
|
|
|
// Common definitions (written backwards) |
5768
|
|
|
|
|
|
|
#define REM(str, len) (str.substr(0, str.size() - len)) |
5769
|
|
|
|
|
|
|
#define REM_ADD(str, len, add) (str.substr(0, str.size() - len).append(add)) |
5770
|
|
|
|
|
|
|
|
5771
|
|
|
|
|
|
|
static const char _NNS_actions[] = { |
5772
|
|
|
|
|
|
|
0, 1, 0, 1, 1, 1, 2, 1, |
5773
|
|
|
|
|
|
|
3, 1, 4, 1, 5, 1, 6, 1, |
5774
|
|
|
|
|
|
|
7, 1, 8, 1, 9, 1, 10, 1, |
5775
|
|
|
|
|
|
|
11, 1, 12, 1, 13 |
5776
|
|
|
|
|
|
|
}; |
5777
|
|
|
|
|
|
|
|
5778
|
|
|
|
|
|
|
static const char _NNS_key_offsets[] = { |
5779
|
|
|
|
|
|
|
0, 0, 2, 3, 4, 5, 7, 17, |
5780
|
|
|
|
|
|
|
17, 29, 30, 35, 35, 36, 37, 37, |
5781
|
|
|
|
|
|
|
37, 44, 45, 53, 63, 72 |
5782
|
|
|
|
|
|
|
}; |
5783
|
|
|
|
|
|
|
|
5784
|
|
|
|
|
|
|
static const char _NNS_trans_keys[] = { |
5785
|
|
|
|
|
|
|
110, 115, 101, 109, 101, 99, 115, 98, |
5786
|
|
|
|
|
|
|
100, 102, 104, 106, 110, 112, 116, 118, |
5787
|
|
|
|
|
|
|
122, 104, 122, 98, 100, 102, 103, 106, |
5788
|
|
|
|
|
|
|
110, 112, 116, 118, 120, 111, 97, 101, |
5789
|
|
|
|
|
|
|
105, 111, 117, 105, 119, 104, 105, 111, |
5790
|
|
|
|
|
|
|
115, 118, 120, 122, 115, 97, 101, 105, |
5791
|
|
|
|
|
|
|
110, 111, 114, 115, 117, 98, 100, 102, |
5792
|
|
|
|
|
|
|
104, 106, 110, 112, 116, 118, 122, 97, |
5793
|
|
|
|
|
|
|
101, 105, 111, 117, 121, 122, 98, 120, |
5794
|
|
|
|
|
|
|
0 |
5795
|
|
|
|
|
|
|
}; |
5796
|
|
|
|
|
|
|
|
5797
|
|
|
|
|
|
|
static const char _NNS_single_lengths[] = { |
5798
|
|
|
|
|
|
|
0, 2, 1, 1, 1, 2, 0, 0, |
5799
|
|
|
|
|
|
|
2, 1, 5, 0, 1, 1, 0, 0, |
5800
|
|
|
|
|
|
|
7, 1, 8, 0, 7, 0 |
5801
|
|
|
|
|
|
|
}; |
5802
|
|
|
|
|
|
|
|
5803
|
|
|
|
|
|
|
static const char _NNS_range_lengths[] = { |
5804
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 5, 0, |
5805
|
|
|
|
|
|
|
5, 0, 0, 0, 0, 0, 0, 0, |
5806
|
|
|
|
|
|
|
0, 0, 0, 5, 1, 0 |
5807
|
|
|
|
|
|
|
}; |
5808
|
|
|
|
|
|
|
|
5809
|
|
|
|
|
|
|
static const char _NNS_index_offsets[] = { |
5810
|
|
|
|
|
|
|
0, 0, 3, 5, 7, 9, 12, 18, |
5811
|
|
|
|
|
|
|
19, 27, 29, 35, 36, 38, 40, 41, |
5812
|
|
|
|
|
|
|
42, 50, 52, 61, 67, 76 |
5813
|
|
|
|
|
|
|
}; |
5814
|
|
|
|
|
|
|
|
5815
|
|
|
|
|
|
|
static const char _NNS_indicies[] = { |
5816
|
|
|
|
|
|
|
0, 2, 1, 3, 1, 4, 1, 6, |
5817
|
|
|
|
|
|
|
5, 7, 7, 1, 8, 8, 8, 8, |
5818
|
|
|
|
|
|
|
8, 1, 9, 11, 10, 10, 10, 10, |
5819
|
|
|
|
|
|
|
10, 10, 1, 12, 1, 13, 13, 13, |
5820
|
|
|
|
|
|
|
13, 13, 1, 14, 15, 1, 16, 1, |
5821
|
|
|
|
|
|
|
17, 1, 18, 19, 20, 21, 22, 7, |
5822
|
|
|
|
|
|
|
23, 1, 24, 1, 25, 25, 25, 26, |
5823
|
|
|
|
|
|
|
25, 27, 28, 29, 1, 30, 30, 30, |
5824
|
|
|
|
|
|
|
30, 30, 1, 31, 31, 31, 31, 31, |
5825
|
|
|
|
|
|
|
31, 33, 32, 1, 17, 0 |
5826
|
|
|
|
|
|
|
}; |
5827
|
|
|
|
|
|
|
|
5828
|
|
|
|
|
|
|
static const char _NNS_trans_targs[] = { |
5829
|
|
|
|
|
|
|
2, 0, 4, 3, 15, 15, 16, 15, |
5830
|
|
|
|
|
|
|
7, 15, 15, 17, 15, 11, 15, 13, |
5831
|
|
|
|
|
|
|
15, 15, 5, 6, 8, 18, 12, 20, |
5832
|
|
|
|
|
|
|
15, 15, 9, 10, 15, 19, 15, 15, |
5833
|
|
|
|
|
|
|
14, 21 |
5834
|
|
|
|
|
|
|
}; |
5835
|
|
|
|
|
|
|
|
5836
|
|
|
|
|
|
|
static const char _NNS_trans_actions[] = { |
5837
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 27, 27, 21, |
5838
|
|
|
|
|
|
|
0, 23, 25, 25, 19, 0, 17, 0, |
5839
|
|
|
|
|
|
|
5, 11, 0, 0, 0, 21, 0, 21, |
5840
|
|
|
|
|
|
|
3, 9, 0, 0, 15, 9, 7, 13, |
5841
|
|
|
|
|
|
|
0, 15 |
5842
|
|
|
|
|
|
|
}; |
5843
|
|
|
|
|
|
|
|
5844
|
|
|
|
|
|
|
static const int NNS_start = 1; |
5845
|
|
|
|
|
|
|
|
5846
|
0
|
|
|
|
|
|
void english_morpho_guesser::add_NNS(const string& form, unsigned negation_len, vector& lemmas) const { |
5847
|
0
|
|
|
|
|
|
const char* p = form.c_str() + negation_len; int cs; |
5848
|
|
|
|
|
|
|
char best = 'z'; unsigned remove = 0; const char* append = nullptr; |
5849
|
|
|
|
|
|
|
|
5850
|
|
|
|
|
|
|
{ |
5851
|
|
|
|
|
|
|
cs = NNS_start; |
5852
|
|
|
|
|
|
|
} |
5853
|
|
|
|
|
|
|
|
5854
|
|
|
|
|
|
|
{ |
5855
|
|
|
|
|
|
|
int _klen; |
5856
|
|
|
|
|
|
|
unsigned int _trans; |
5857
|
|
|
|
|
|
|
const char *_acts; |
5858
|
|
|
|
|
|
|
unsigned int _nacts; |
5859
|
|
|
|
|
|
|
const char *_keys; |
5860
|
|
|
|
|
|
|
|
5861
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
5862
|
|
|
|
|
|
|
goto _test_eof; |
5863
|
|
|
|
|
|
|
if ( cs == 0 ) |
5864
|
|
|
|
|
|
|
goto _out; |
5865
|
|
|
|
|
|
|
_resume: |
5866
|
0
|
|
|
|
|
|
_keys = _NNS_trans_keys + _NNS_key_offsets[cs]; |
5867
|
0
|
|
|
|
|
|
_trans = _NNS_index_offsets[cs]; |
5868
|
|
|
|
|
|
|
|
5869
|
0
|
|
|
|
|
|
_klen = _NNS_single_lengths[cs]; |
5870
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
5871
|
|
|
|
|
|
|
const char *_lower = _keys; |
5872
|
|
|
|
|
|
|
const char *_mid; |
5873
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
5874
|
|
|
|
|
|
|
while (1) { |
5875
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
5876
|
|
|
|
|
|
|
break; |
5877
|
|
|
|
|
|
|
|
5878
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
5879
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < *_mid ) |
5880
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
5881
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > *_mid ) |
5882
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
5883
|
|
|
|
|
|
|
else { |
5884
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
5885
|
0
|
|
|
|
|
|
goto _match; |
5886
|
|
|
|
|
|
|
} |
5887
|
|
|
|
|
|
|
} |
5888
|
0
|
|
|
|
|
|
_keys += _klen; |
5889
|
0
|
|
|
|
|
|
_trans += _klen; |
5890
|
|
|
|
|
|
|
} |
5891
|
|
|
|
|
|
|
|
5892
|
0
|
|
|
|
|
|
_klen = _NNS_range_lengths[cs]; |
5893
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
5894
|
|
|
|
|
|
|
const char *_lower = _keys; |
5895
|
|
|
|
|
|
|
const char *_mid; |
5896
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
5897
|
|
|
|
|
|
|
while (1) { |
5898
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
5899
|
|
|
|
|
|
|
break; |
5900
|
|
|
|
|
|
|
|
5901
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
5902
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < _mid[0] ) |
5903
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
5904
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > _mid[1] ) |
5905
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
5906
|
|
|
|
|
|
|
else { |
5907
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
5908
|
0
|
|
|
|
|
|
goto _match; |
5909
|
|
|
|
|
|
|
} |
5910
|
|
|
|
|
|
|
} |
5911
|
0
|
|
|
|
|
|
_trans += _klen; |
5912
|
|
|
|
|
|
|
} |
5913
|
|
|
|
|
|
|
|
5914
|
|
|
|
|
|
|
_match: |
5915
|
0
|
|
|
|
|
|
_trans = _NNS_indicies[_trans]; |
5916
|
0
|
|
|
|
|
|
cs = _NNS_trans_targs[_trans]; |
5917
|
|
|
|
|
|
|
|
5918
|
0
|
0
|
|
|
|
|
if ( _NNS_trans_actions[_trans] == 0 ) |
5919
|
|
|
|
|
|
|
goto _again; |
5920
|
|
|
|
|
|
|
|
5921
|
0
|
|
|
|
|
|
_acts = _NNS_actions + _NNS_trans_actions[_trans]; |
5922
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
5923
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
5924
|
|
|
|
|
|
|
{ |
5925
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
5926
|
|
|
|
|
|
|
{ |
5927
|
|
|
|
|
|
|
case 0: |
5928
|
0
|
0
|
|
|
|
|
{ if (best > 'a') best = 'a', remove = 2, append = "an"; } |
5929
|
|
|
|
|
|
|
break; |
5930
|
|
|
|
|
|
|
case 1: |
5931
|
0
|
0
|
|
|
|
|
{ if (best > 'b') best = 'b', remove = 1, append = nullptr; } |
5932
|
|
|
|
|
|
|
break; |
5933
|
|
|
|
|
|
|
case 2: |
5934
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 3, append = "fe"; } |
5935
|
|
|
|
|
|
|
break; |
5936
|
|
|
|
|
|
|
case 3: |
5937
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 2, append = nullptr; } |
5938
|
|
|
|
|
|
|
break; |
5939
|
|
|
|
|
|
|
case 4: |
5940
|
0
|
0
|
|
|
|
|
{ if (best > 'e') best = 'e', remove = 1, append = nullptr; } |
5941
|
|
|
|
|
|
|
break; |
5942
|
|
|
|
|
|
|
case 5: |
5943
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 2, append = nullptr; } |
5944
|
|
|
|
|
|
|
break; |
5945
|
|
|
|
|
|
|
case 6: |
5946
|
0
|
0
|
|
|
|
|
{ if (best > 'g') best = 'g', remove = 1, append = nullptr; } |
5947
|
|
|
|
|
|
|
break; |
5948
|
|
|
|
|
|
|
case 7: |
5949
|
0
|
0
|
|
|
|
|
{ if (best > 'h') best = 'h', remove = 2, append = nullptr; } |
5950
|
|
|
|
|
|
|
break; |
5951
|
|
|
|
|
|
|
case 8: |
5952
|
0
|
0
|
|
|
|
|
{ if (best > 'i') best = 'i', remove = 1, append = nullptr; } |
5953
|
|
|
|
|
|
|
break; |
5954
|
|
|
|
|
|
|
case 9: |
5955
|
0
|
0
|
|
|
|
|
{ if (best > 'j') best = 'j', remove = 1, append = nullptr; } |
5956
|
|
|
|
|
|
|
break; |
5957
|
|
|
|
|
|
|
case 10: |
5958
|
0
|
0
|
|
|
|
|
{ if (best > 'k') best = 'k', remove = 2, append = nullptr; } |
5959
|
|
|
|
|
|
|
break; |
5960
|
|
|
|
|
|
|
case 11: |
5961
|
0
|
0
|
|
|
|
|
{ if (best > 'l') best = 'l', remove = 3, append = "y"; } |
5962
|
|
|
|
|
|
|
break; |
5963
|
|
|
|
|
|
|
case 12: |
5964
|
0
|
0
|
|
|
|
|
{ if (best > 'm') best = 'm', remove = 2, append = nullptr; } |
5965
|
|
|
|
|
|
|
break; |
5966
|
|
|
|
|
|
|
case 13: |
5967
|
0
|
0
|
|
|
|
|
{ if (best > 'n') best = 'n', remove = 1, append = nullptr; } |
5968
|
|
|
|
|
|
|
break; |
5969
|
|
|
|
|
|
|
} |
5970
|
|
|
|
|
|
|
} |
5971
|
|
|
|
|
|
|
|
5972
|
|
|
|
|
|
|
_again: |
5973
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
5974
|
|
|
|
|
|
|
goto _out; |
5975
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form.c_str() + form.size())) ) |
5976
|
|
|
|
|
|
|
goto _resume; |
5977
|
|
|
|
|
|
|
_test_eof: {} |
5978
|
|
|
|
|
|
|
_out: {} |
5979
|
|
|
|
|
|
|
} |
5980
|
|
|
|
|
|
|
|
5981
|
0
|
0
|
|
|
|
|
add(NNS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5982
|
0
|
|
|
|
|
|
} |
5983
|
|
|
|
|
|
|
|
5984
|
|
|
|
|
|
|
static const char _NNPS_actions[] = { |
5985
|
|
|
|
|
|
|
0, 1, 1, 1, 2, 1, 4, 1, |
5986
|
|
|
|
|
|
|
5, 1, 6, 1, 7, 1, 8, 1, |
5987
|
|
|
|
|
|
|
9, 1, 10, 1, 11, 1, 12, 1, |
5988
|
|
|
|
|
|
|
14, 1, 15, 1, 16, 2, 0, 1, |
5989
|
|
|
|
|
|
|
2, 3, 4, 2, 13, 14 |
5990
|
|
|
|
|
|
|
}; |
5991
|
|
|
|
|
|
|
|
5992
|
|
|
|
|
|
|
static const unsigned char _NNPS_key_offsets[] = { |
5993
|
|
|
|
|
|
|
0, 0, 4, 6, 8, 10, 12, 16, |
5994
|
|
|
|
|
|
|
36, 36, 60, 62, 72, 72, 74, 76, |
5995
|
|
|
|
|
|
|
78, 78, 98, 98, 100, 102, 104, 104, |
5996
|
|
|
|
|
|
|
118, 120, 136, 156, 174, 174 |
5997
|
|
|
|
|
|
|
}; |
5998
|
|
|
|
|
|
|
|
5999
|
|
|
|
|
|
|
static const char _NNPS_trans_keys[] = { |
6000
|
|
|
|
|
|
|
78, 83, 110, 115, 69, 101, 77, 109, |
6001
|
|
|
|
|
|
|
77, 109, 69, 101, 67, 83, 99, 115, |
6002
|
|
|
|
|
|
|
66, 68, 70, 72, 74, 78, 80, 84, |
6003
|
|
|
|
|
|
|
86, 90, 98, 100, 102, 104, 106, 110, |
6004
|
|
|
|
|
|
|
112, 116, 118, 122, 72, 90, 104, 122, |
6005
|
|
|
|
|
|
|
66, 68, 70, 71, 74, 78, 80, 84, |
6006
|
|
|
|
|
|
|
86, 88, 98, 100, 102, 103, 106, 110, |
6007
|
|
|
|
|
|
|
112, 116, 118, 120, 79, 111, 65, 69, |
6008
|
|
|
|
|
|
|
73, 79, 85, 97, 101, 105, 111, 117, |
6009
|
|
|
|
|
|
|
73, 105, 87, 119, 87, 119, 66, 68, |
6010
|
|
|
|
|
|
|
70, 72, 74, 78, 80, 84, 86, 90, |
6011
|
|
|
|
|
|
|
98, 100, 102, 104, 106, 110, 112, 116, |
6012
|
|
|
|
|
|
|
118, 122, 73, 105, 69, 101, 69, 101, |
6013
|
|
|
|
|
|
|
72, 73, 79, 83, 86, 88, 90, 104, |
6014
|
|
|
|
|
|
|
105, 111, 115, 118, 120, 122, 83, 115, |
6015
|
|
|
|
|
|
|
65, 69, 73, 78, 79, 82, 83, 85, |
6016
|
|
|
|
|
|
|
97, 101, 105, 110, 111, 114, 115, 117, |
6017
|
|
|
|
|
|
|
66, 68, 70, 72, 74, 78, 80, 84, |
6018
|
|
|
|
|
|
|
86, 90, 98, 100, 102, 104, 106, 110, |
6019
|
|
|
|
|
|
|
112, 116, 118, 122, 65, 69, 73, 79, |
6020
|
|
|
|
|
|
|
85, 89, 90, 97, 101, 105, 111, 117, |
6021
|
|
|
|
|
|
|
121, 122, 66, 88, 98, 120, 72, 73, |
6022
|
|
|
|
|
|
|
79, 83, 86, 88, 90, 104, 105, 111, |
6023
|
|
|
|
|
|
|
115, 118, 120, 122, 0 |
6024
|
|
|
|
|
|
|
}; |
6025
|
|
|
|
|
|
|
|
6026
|
|
|
|
|
|
|
static const char _NNPS_single_lengths[] = { |
6027
|
|
|
|
|
|
|
0, 4, 2, 2, 2, 2, 4, 0, |
6028
|
|
|
|
|
|
|
0, 4, 2, 10, 0, 2, 2, 2, |
6029
|
|
|
|
|
|
|
0, 0, 0, 2, 2, 2, 0, 14, |
6030
|
|
|
|
|
|
|
2, 16, 0, 14, 0, 14 |
6031
|
|
|
|
|
|
|
}; |
6032
|
|
|
|
|
|
|
|
6033
|
|
|
|
|
|
|
static const char _NNPS_range_lengths[] = { |
6034
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 10, |
6035
|
|
|
|
|
|
|
0, 10, 0, 0, 0, 0, 0, 0, |
6036
|
|
|
|
|
|
|
0, 10, 0, 0, 0, 0, 0, 0, |
6037
|
|
|
|
|
|
|
0, 0, 10, 2, 0, 0 |
6038
|
|
|
|
|
|
|
}; |
6039
|
|
|
|
|
|
|
|
6040
|
|
|
|
|
|
|
static const unsigned char _NNPS_index_offsets[] = { |
6041
|
|
|
|
|
|
|
0, 0, 5, 8, 11, 14, 17, 22, |
6042
|
|
|
|
|
|
|
33, 34, 49, 52, 63, 64, 67, 70, |
6043
|
|
|
|
|
|
|
73, 74, 85, 86, 89, 92, 95, 96, |
6044
|
|
|
|
|
|
|
111, 114, 131, 142, 159, 160 |
6045
|
|
|
|
|
|
|
}; |
6046
|
|
|
|
|
|
|
|
6047
|
|
|
|
|
|
|
static const char _NNPS_indicies[] = { |
6048
|
|
|
|
|
|
|
0, 2, 3, 4, 1, 5, 6, 1, |
6049
|
|
|
|
|
|
|
7, 8, 1, 8, 8, 1, 10, 11, |
6050
|
|
|
|
|
|
|
9, 12, 12, 12, 12, 1, 13, 13, |
6051
|
|
|
|
|
|
|
13, 13, 13, 13, 13, 13, 13, 13, |
6052
|
|
|
|
|
|
|
1, 14, 16, 15, 16, 15, 15, 15, |
6053
|
|
|
|
|
|
|
15, 15, 15, 15, 15, 15, 15, 15, |
6054
|
|
|
|
|
|
|
1, 17, 17, 1, 18, 18, 18, 18, |
6055
|
|
|
|
|
|
|
18, 18, 18, 18, 18, 18, 1, 19, |
6056
|
|
|
|
|
|
|
20, 21, 1, 22, 23, 1, 23, 23, |
6057
|
|
|
|
|
|
|
1, 24, 25, 25, 25, 25, 25, 25, |
6058
|
|
|
|
|
|
|
25, 25, 25, 25, 1, 26, 21, 21, |
6059
|
|
|
|
|
|
|
1, 6, 6, 1, 11, 11, 9, 1, |
6060
|
|
|
|
|
|
|
27, 28, 29, 30, 31, 12, 32, 27, |
6061
|
|
|
|
|
|
|
33, 29, 30, 34, 12, 32, 1, 35, |
6062
|
|
|
|
|
|
|
35, 1, 36, 36, 36, 37, 36, 38, |
6063
|
|
|
|
|
|
|
39, 40, 36, 36, 36, 37, 36, 38, |
6064
|
|
|
|
|
|
|
39, 40, 1, 41, 41, 41, 41, 41, |
6065
|
|
|
|
|
|
|
41, 41, 41, 41, 41, 1, 42, 42, |
6066
|
|
|
|
|
|
|
42, 42, 42, 42, 44, 42, 42, 42, |
6067
|
|
|
|
|
|
|
42, 42, 42, 44, 43, 43, 1, 24, |
6068
|
|
|
|
|
|
|
27, 33, 29, 30, 34, 12, 32, 27, |
6069
|
|
|
|
|
|
|
33, 29, 30, 34, 12, 32, 1, 0 |
6070
|
|
|
|
|
|
|
}; |
6071
|
|
|
|
|
|
|
|
6072
|
|
|
|
|
|
|
static const char _NNPS_trans_targs[] = { |
6073
|
|
|
|
|
|
|
2, 0, 5, 20, 21, 3, 4, 22, |
6074
|
|
|
|
|
|
|
22, 22, 23, 29, 22, 8, 22, 22, |
6075
|
|
|
|
|
|
|
24, 22, 12, 22, 14, 15, 22, 22, |
6076
|
|
|
|
|
|
|
22, 18, 22, 6, 7, 9, 25, 13, |
6077
|
|
|
|
|
|
|
27, 17, 19, 22, 22, 10, 11, 22, |
6078
|
|
|
|
|
|
|
26, 22, 22, 16, 28 |
6079
|
|
|
|
|
|
|
}; |
6080
|
|
|
|
|
|
|
|
6081
|
|
|
|
|
|
|
static const char _NNPS_trans_actions[] = { |
6082
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 29, |
6083
|
|
|
|
|
|
|
1, 27, 27, 27, 21, 0, 35, 25, |
6084
|
|
|
|
|
|
|
25, 19, 0, 17, 0, 0, 32, 5, |
6085
|
|
|
|
|
|
|
11, 0, 23, 0, 0, 0, 21, 0, |
6086
|
|
|
|
|
|
|
21, 0, 0, 3, 9, 0, 0, 15, |
6087
|
|
|
|
|
|
|
9, 7, 13, 0, 15 |
6088
|
|
|
|
|
|
|
}; |
6089
|
|
|
|
|
|
|
|
6090
|
|
|
|
|
|
|
static const int NNPS_start = 1; |
6091
|
|
|
|
|
|
|
|
6092
|
0
|
|
|
|
|
|
void english_morpho_guesser::add_NNPS(const string& form, vector& lemmas) const { |
6093
|
|
|
|
|
|
|
const char* p = form.c_str(); int cs; |
6094
|
|
|
|
|
|
|
char best = 'z'; unsigned remove = 0; const char* append = nullptr; |
6095
|
|
|
|
|
|
|
|
6096
|
|
|
|
|
|
|
{ |
6097
|
|
|
|
|
|
|
cs = NNPS_start; |
6098
|
|
|
|
|
|
|
} |
6099
|
|
|
|
|
|
|
|
6100
|
|
|
|
|
|
|
{ |
6101
|
|
|
|
|
|
|
int _klen; |
6102
|
|
|
|
|
|
|
unsigned int _trans; |
6103
|
|
|
|
|
|
|
const char *_acts; |
6104
|
|
|
|
|
|
|
unsigned int _nacts; |
6105
|
|
|
|
|
|
|
const char *_keys; |
6106
|
|
|
|
|
|
|
|
6107
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
6108
|
|
|
|
|
|
|
goto _test_eof; |
6109
|
|
|
|
|
|
|
if ( cs == 0 ) |
6110
|
|
|
|
|
|
|
goto _out; |
6111
|
|
|
|
|
|
|
_resume: |
6112
|
0
|
|
|
|
|
|
_keys = _NNPS_trans_keys + _NNPS_key_offsets[cs]; |
6113
|
0
|
|
|
|
|
|
_trans = _NNPS_index_offsets[cs]; |
6114
|
|
|
|
|
|
|
|
6115
|
0
|
|
|
|
|
|
_klen = _NNPS_single_lengths[cs]; |
6116
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
6117
|
|
|
|
|
|
|
const char *_lower = _keys; |
6118
|
|
|
|
|
|
|
const char *_mid; |
6119
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
6120
|
|
|
|
|
|
|
while (1) { |
6121
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
6122
|
|
|
|
|
|
|
break; |
6123
|
|
|
|
|
|
|
|
6124
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
6125
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid ) |
6126
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
6127
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid ) |
6128
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
6129
|
|
|
|
|
|
|
else { |
6130
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
6131
|
0
|
|
|
|
|
|
goto _match; |
6132
|
|
|
|
|
|
|
} |
6133
|
|
|
|
|
|
|
} |
6134
|
0
|
|
|
|
|
|
_keys += _klen; |
6135
|
0
|
|
|
|
|
|
_trans += _klen; |
6136
|
|
|
|
|
|
|
} |
6137
|
|
|
|
|
|
|
|
6138
|
0
|
|
|
|
|
|
_klen = _NNPS_range_lengths[cs]; |
6139
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
6140
|
|
|
|
|
|
|
const char *_lower = _keys; |
6141
|
|
|
|
|
|
|
const char *_mid; |
6142
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
6143
|
|
|
|
|
|
|
while (1) { |
6144
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
6145
|
|
|
|
|
|
|
break; |
6146
|
|
|
|
|
|
|
|
6147
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
6148
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] ) |
6149
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
6150
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] ) |
6151
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
6152
|
|
|
|
|
|
|
else { |
6153
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
6154
|
0
|
|
|
|
|
|
goto _match; |
6155
|
|
|
|
|
|
|
} |
6156
|
|
|
|
|
|
|
} |
6157
|
0
|
|
|
|
|
|
_trans += _klen; |
6158
|
|
|
|
|
|
|
} |
6159
|
|
|
|
|
|
|
|
6160
|
|
|
|
|
|
|
_match: |
6161
|
0
|
|
|
|
|
|
_trans = _NNPS_indicies[_trans]; |
6162
|
0
|
|
|
|
|
|
cs = _NNPS_trans_targs[_trans]; |
6163
|
|
|
|
|
|
|
|
6164
|
0
|
0
|
|
|
|
|
if ( _NNPS_trans_actions[_trans] == 0 ) |
6165
|
|
|
|
|
|
|
goto _again; |
6166
|
|
|
|
|
|
|
|
6167
|
0
|
|
|
|
|
|
_acts = _NNPS_actions + _NNPS_trans_actions[_trans]; |
6168
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
6169
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
6170
|
|
|
|
|
|
|
{ |
6171
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
6172
|
|
|
|
|
|
|
{ |
6173
|
|
|
|
|
|
|
case 0: |
6174
|
0
|
0
|
|
|
|
|
{ if (best > 'a') best = 'a', remove = 2, append = "AN"; } |
6175
|
|
|
|
|
|
|
break; |
6176
|
|
|
|
|
|
|
case 1: |
6177
|
0
|
0
|
|
|
|
|
{ if (best > 'b') best = 'b', remove = 2, append = "an"; } |
6178
|
|
|
|
|
|
|
break; |
6179
|
|
|
|
|
|
|
case 2: |
6180
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 1, append = nullptr; } |
6181
|
|
|
|
|
|
|
break; |
6182
|
|
|
|
|
|
|
case 3: |
6183
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 3, append = "FE"; } |
6184
|
|
|
|
|
|
|
break; |
6185
|
|
|
|
|
|
|
case 4: |
6186
|
0
|
0
|
|
|
|
|
{ if (best > 'e') best = 'e', remove = 3, append = "fe"; } |
6187
|
|
|
|
|
|
|
break; |
6188
|
|
|
|
|
|
|
case 5: |
6189
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 2, append = nullptr; } |
6190
|
|
|
|
|
|
|
break; |
6191
|
|
|
|
|
|
|
case 6: |
6192
|
0
|
0
|
|
|
|
|
{ if (best > 'g') best = 'g', remove = 1, append = nullptr; } |
6193
|
|
|
|
|
|
|
break; |
6194
|
|
|
|
|
|
|
case 7: |
6195
|
0
|
0
|
|
|
|
|
{ if (best > 'h') best = 'h', remove = 2, append = nullptr; } |
6196
|
|
|
|
|
|
|
break; |
6197
|
|
|
|
|
|
|
case 8: |
6198
|
0
|
0
|
|
|
|
|
{ if (best > 'i') best = 'i', remove = 1, append = nullptr; } |
6199
|
|
|
|
|
|
|
break; |
6200
|
|
|
|
|
|
|
case 9: |
6201
|
0
|
0
|
|
|
|
|
{ if (best > 'j') best = 'j', remove = 2, append = nullptr; } |
6202
|
|
|
|
|
|
|
break; |
6203
|
|
|
|
|
|
|
case 10: |
6204
|
0
|
0
|
|
|
|
|
{ if (best > 'k') best = 'k', remove = 1, append = nullptr; } |
6205
|
|
|
|
|
|
|
break; |
6206
|
|
|
|
|
|
|
case 11: |
6207
|
0
|
0
|
|
|
|
|
{ if (best > 'l') best = 'l', remove = 1, append = nullptr; } |
6208
|
|
|
|
|
|
|
break; |
6209
|
|
|
|
|
|
|
case 12: |
6210
|
0
|
0
|
|
|
|
|
{ if (best > 'm') best = 'm', remove = 2, append = nullptr; } |
6211
|
|
|
|
|
|
|
break; |
6212
|
|
|
|
|
|
|
case 13: |
6213
|
0
|
0
|
|
|
|
|
{ if (best > 'n') best = 'n', remove = 3, append = "Y"; } |
6214
|
|
|
|
|
|
|
break; |
6215
|
|
|
|
|
|
|
case 14: |
6216
|
0
|
0
|
|
|
|
|
{ if (best > 'o') best = 'o', remove = 3, append = "y"; } |
6217
|
|
|
|
|
|
|
break; |
6218
|
|
|
|
|
|
|
case 15: |
6219
|
0
|
0
|
|
|
|
|
{ if (best > 'p') best = 'p', remove = 2, append = nullptr; } |
6220
|
|
|
|
|
|
|
break; |
6221
|
|
|
|
|
|
|
case 16: |
6222
|
0
|
0
|
|
|
|
|
{ if (best > 'q') best = 'q', remove = 1, append = nullptr; } |
6223
|
|
|
|
|
|
|
break; |
6224
|
|
|
|
|
|
|
} |
6225
|
|
|
|
|
|
|
} |
6226
|
|
|
|
|
|
|
|
6227
|
|
|
|
|
|
|
_again: |
6228
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
6229
|
|
|
|
|
|
|
goto _out; |
6230
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form.c_str() + form.size())) ) |
6231
|
|
|
|
|
|
|
goto _resume; |
6232
|
|
|
|
|
|
|
_test_eof: {} |
6233
|
|
|
|
|
|
|
_out: {} |
6234
|
|
|
|
|
|
|
} |
6235
|
|
|
|
|
|
|
|
6236
|
0
|
0
|
|
|
|
|
add(NNPS, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas); |
|
|
0
|
|
|
|
|
|
6237
|
0
|
|
|
|
|
|
} |
6238
|
|
|
|
|
|
|
|
6239
|
|
|
|
|
|
|
static const char _VBG_actions[] = { |
6240
|
|
|
|
|
|
|
0, 1, 1, 1, 2, 1, 4, 1, |
6241
|
|
|
|
|
|
|
5, 1, 6, 1, 7, 1, 9, 1, |
6242
|
|
|
|
|
|
|
10, 1, 11, 1, 12, 1, 13, 1, |
6243
|
|
|
|
|
|
|
14, 1, 15, 1, 16, 1, 17, 2, |
6244
|
|
|
|
|
|
|
0, 12, 2, 3, 4, 2, 5, 9, |
6245
|
|
|
|
|
|
|
2, 5, 10, 2, 8, 9, 2, 9, |
6246
|
|
|
|
|
|
|
10, 2, 11, 12, 3, 0, 2, 12, |
6247
|
|
|
|
|
|
|
3, 2, 11, 12 |
6248
|
|
|
|
|
|
|
}; |
6249
|
|
|
|
|
|
|
|
6250
|
|
|
|
|
|
|
static const short _VBG_key_offsets[] = { |
6251
|
|
|
|
|
|
|
0, 0, 1, 2, 3, 9, 14, 24, |
6252
|
|
|
|
|
|
|
29, 34, 44, 46, 47, 48, 49, 50, |
6253
|
|
|
|
|
|
|
51, 52, 59, 66, 68, 70, 71, 72, |
6254
|
|
|
|
|
|
|
73, 74, 75, 76, 81, 89, 90, 91, |
6255
|
|
|
|
|
|
|
92, 93, 94, 96, 97, 98, 99, 100, |
6256
|
|
|
|
|
|
|
101, 102, 127, 127, 136, 137, 142, 153, |
6257
|
|
|
|
|
|
|
162, 171, 181, 186, 191, 197, 207, 207, |
6258
|
|
|
|
|
|
|
216, 228, 229, 240, 240, 249, 258, 267, |
6259
|
|
|
|
|
|
|
276, 285, 290, 302, 313, 318, 324, 334, |
6260
|
|
|
|
|
|
|
344, 355, 362, 373, 382, 391, 391, 402, |
6261
|
|
|
|
|
|
|
413, 415, 416, 417, 417, 418, 426, 437, |
6262
|
|
|
|
|
|
|
442, 448, 458, 468, 479, 486, 497, 504, |
6263
|
|
|
|
|
|
|
510, 519, 528, 537, 543 |
6264
|
|
|
|
|
|
|
}; |
6265
|
|
|
|
|
|
|
|
6266
|
|
|
|
|
|
|
static const char _VBG_trans_keys[] = { |
6267
|
|
|
|
|
|
|
103, 110, 105, 97, 101, 105, 111, 117, |
6268
|
|
|
|
|
|
|
121, 97, 101, 105, 111, 117, 98, 100, |
6269
|
|
|
|
|
|
|
102, 104, 106, 110, 112, 116, 118, 122, |
6270
|
|
|
|
|
|
|
97, 101, 105, 111, 117, 97, 101, 105, |
6271
|
|
|
|
|
|
|
111, 117, 98, 100, 102, 104, 106, 110, |
6272
|
|
|
|
|
|
|
112, 116, 118, 122, 98, 114, 105, 114, |
6273
|
|
|
|
|
|
|
112, 105, 109, 101, 97, 101, 105, 111, |
6274
|
|
|
|
|
|
|
117, 98, 122, 97, 101, 105, 111, 117, |
6275
|
|
|
|
|
|
|
98, 122, 97, 122, 98, 114, 105, 114, |
6276
|
|
|
|
|
|
|
112, 105, 109, 101, 97, 101, 105, 111, |
6277
|
|
|
|
|
|
|
117, 97, 101, 105, 110, 111, 115, 117, |
6278
|
|
|
|
|
|
|
120, 105, 112, 105, 109, 101, 98, 114, |
6279
|
|
|
|
|
|
|
105, 114, 112, 105, 109, 101, 98, 99, |
6280
|
|
|
|
|
|
|
100, 102, 103, 104, 106, 107, 108, 109, |
6281
|
|
|
|
|
|
|
110, 111, 112, 113, 114, 115, 116, 117, |
6282
|
|
|
|
|
|
|
118, 119, 120, 121, 122, 97, 105, 97, |
6283
|
|
|
|
|
|
|
98, 101, 105, 111, 117, 122, 99, 120, |
6284
|
|
|
|
|
|
|
113, 97, 101, 105, 111, 117, 98, 99, |
6285
|
|
|
|
|
|
|
100, 105, 111, 117, 122, 97, 101, 102, |
6286
|
|
|
|
|
|
|
120, 97, 100, 101, 105, 111, 117, 122, |
6287
|
|
|
|
|
|
|
98, 120, 97, 101, 102, 105, 111, 117, |
6288
|
|
|
|
|
|
|
122, 98, 120, 97, 101, 103, 105, 110, |
6289
|
|
|
|
|
|
|
111, 117, 122, 98, 120, 97, 101, 105, |
6290
|
|
|
|
|
|
|
111, 117, 101, 110, 111, 115, 120, 101, |
6291
|
|
|
|
|
|
|
110, 111, 112, 115, 120, 97, 101, 104, |
6292
|
|
|
|
|
|
|
105, 111, 116, 117, 122, 98, 120, 97, |
6293
|
|
|
|
|
|
|
101, 105, 106, 111, 117, 122, 98, 120, |
6294
|
|
|
|
|
|
|
98, 99, 100, 105, 107, 111, 117, 122, |
6295
|
|
|
|
|
|
|
97, 101, 102, 120, 105, 97, 101, 105, |
6296
|
|
|
|
|
|
|
108, 111, 114, 117, 119, 122, 98, 120, |
6297
|
|
|
|
|
|
|
97, 101, 105, 109, 111, 117, 122, 98, |
6298
|
|
|
|
|
|
|
120, 97, 101, 105, 110, 111, 117, 122, |
6299
|
|
|
|
|
|
|
98, 120, 97, 101, 105, 111, 112, 117, |
6300
|
|
|
|
|
|
|
122, 98, 120, 97, 101, 105, 111, 113, |
6301
|
|
|
|
|
|
|
117, 122, 98, 120, 97, 101, 105, 111, |
6302
|
|
|
|
|
|
|
114, 117, 122, 98, 120, 97, 101, 105, |
6303
|
|
|
|
|
|
|
111, 117, 98, 99, 100, 105, 108, 111, |
6304
|
|
|
|
|
|
|
116, 117, 97, 101, 102, 122, 101, 110, |
6305
|
|
|
|
|
|
|
111, 115, 120, 98, 104, 106, 116, 118, |
6306
|
|
|
|
|
|
|
122, 101, 110, 111, 115, 120, 101, 110, |
6307
|
|
|
|
|
|
|
111, 112, 115, 120, 101, 105, 110, 111, |
6308
|
|
|
|
|
|
|
115, 120, 98, 116, 118, 122, 101, 105, |
6309
|
|
|
|
|
|
|
110, 111, 115, 120, 98, 116, 118, 122, |
6310
|
|
|
|
|
|
|
101, 110, 111, 115, 120, 98, 104, 106, |
6311
|
|
|
|
|
|
|
116, 118, 122, 98, 101, 110, 111, 114, |
6312
|
|
|
|
|
|
|
115, 120, 101, 110, 111, 115, 120, 98, |
6313
|
|
|
|
|
|
|
104, 106, 116, 118, 122, 97, 101, 105, |
6314
|
|
|
|
|
|
|
111, 115, 117, 122, 98, 120, 97, 101, |
6315
|
|
|
|
|
|
|
105, 111, 116, 117, 122, 98, 120, 122, |
6316
|
|
|
|
|
|
|
98, 100, 102, 104, 106, 110, 112, 116, |
6317
|
|
|
|
|
|
|
118, 120, 122, 98, 100, 102, 104, 106, |
6318
|
|
|
|
|
|
|
110, 112, 116, 118, 120, 98, 114, 112, |
6319
|
|
|
|
|
|
|
114, 113, 97, 101, 105, 108, 111, 117, |
6320
|
|
|
|
|
|
|
98, 122, 101, 110, 111, 115, 120, 98, |
6321
|
|
|
|
|
|
|
104, 106, 116, 118, 122, 101, 110, 111, |
6322
|
|
|
|
|
|
|
115, 120, 101, 110, 111, 112, 115, 120, |
6323
|
|
|
|
|
|
|
101, 105, 110, 111, 115, 120, 98, 116, |
6324
|
|
|
|
|
|
|
118, 122, 101, 105, 110, 111, 115, 120, |
6325
|
|
|
|
|
|
|
98, 116, 118, 122, 101, 110, 111, 115, |
6326
|
|
|
|
|
|
|
120, 98, 104, 106, 116, 118, 122, 98, |
6327
|
|
|
|
|
|
|
101, 110, 111, 114, 115, 120, 101, 110, |
6328
|
|
|
|
|
|
|
111, 115, 120, 98, 104, 106, 116, 118, |
6329
|
|
|
|
|
|
|
122, 97, 101, 105, 111, 117, 98, 122, |
6330
|
|
|
|
|
|
|
97, 101, 105, 111, 117, 121, 97, 101, |
6331
|
|
|
|
|
|
|
105, 111, 117, 118, 122, 98, 120, 97, |
6332
|
|
|
|
|
|
|
101, 105, 111, 117, 119, 122, 98, 120, |
6333
|
|
|
|
|
|
|
97, 101, 105, 111, 117, 120, 122, 98, |
6334
|
|
|
|
|
|
|
119, 97, 101, 105, 111, 117, 121, 97, |
6335
|
|
|
|
|
|
|
101, 105, 111, 117, 121, 122, 98, 120, |
6336
|
|
|
|
|
|
|
0 |
6337
|
|
|
|
|
|
|
}; |
6338
|
|
|
|
|
|
|
|
6339
|
|
|
|
|
|
|
static const char _VBG_single_lengths[] = { |
6340
|
|
|
|
|
|
|
0, 1, 1, 1, 6, 5, 0, 5, |
6341
|
|
|
|
|
|
|
5, 0, 2, 1, 1, 1, 1, 1, |
6342
|
|
|
|
|
|
|
1, 5, 5, 0, 2, 1, 1, 1, |
6343
|
|
|
|
|
|
|
1, 1, 1, 5, 8, 1, 1, 1, |
6344
|
|
|
|
|
|
|
1, 1, 2, 1, 1, 1, 1, 1, |
6345
|
|
|
|
|
|
|
1, 23, 0, 7, 1, 5, 7, 7, |
6346
|
|
|
|
|
|
|
7, 8, 5, 5, 6, 8, 0, 7, |
6347
|
|
|
|
|
|
|
8, 1, 9, 0, 7, 7, 7, 7, |
6348
|
|
|
|
|
|
|
7, 5, 8, 5, 5, 6, 6, 6, |
6349
|
|
|
|
|
|
|
5, 7, 5, 7, 7, 0, 1, 1, |
6350
|
|
|
|
|
|
|
2, 1, 1, 0, 1, 6, 5, 5, |
6351
|
|
|
|
|
|
|
6, 6, 6, 5, 7, 5, 5, 6, |
6352
|
|
|
|
|
|
|
7, 7, 7, 6, 7 |
6353
|
|
|
|
|
|
|
}; |
6354
|
|
|
|
|
|
|
|
6355
|
|
|
|
|
|
|
static const char _VBG_range_lengths[] = { |
6356
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 5, 0, |
6357
|
|
|
|
|
|
|
0, 5, 0, 0, 0, 0, 0, 0, |
6358
|
|
|
|
|
|
|
0, 1, 1, 1, 0, 0, 0, 0, |
6359
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
6360
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
6361
|
|
|
|
|
|
|
0, 1, 0, 1, 0, 0, 2, 1, |
6362
|
|
|
|
|
|
|
1, 1, 0, 0, 0, 1, 0, 1, |
6363
|
|
|
|
|
|
|
2, 0, 1, 0, 1, 1, 1, 1, |
6364
|
|
|
|
|
|
|
1, 0, 2, 3, 0, 0, 2, 2, |
6365
|
|
|
|
|
|
|
3, 0, 3, 1, 1, 0, 5, 5, |
6366
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 1, 3, 0, |
6367
|
|
|
|
|
|
|
0, 2, 2, 3, 0, 3, 1, 0, |
6368
|
|
|
|
|
|
|
1, 1, 1, 0, 1 |
6369
|
|
|
|
|
|
|
}; |
6370
|
|
|
|
|
|
|
|
6371
|
|
|
|
|
|
|
static const short _VBG_index_offsets[] = { |
6372
|
|
|
|
|
|
|
0, 0, 2, 4, 6, 13, 19, 25, |
6373
|
|
|
|
|
|
|
31, 37, 43, 46, 48, 50, 52, 54, |
6374
|
|
|
|
|
|
|
56, 58, 65, 72, 74, 77, 79, 81, |
6375
|
|
|
|
|
|
|
83, 85, 87, 89, 95, 104, 106, 108, |
6376
|
|
|
|
|
|
|
110, 112, 114, 117, 119, 121, 123, 125, |
6377
|
|
|
|
|
|
|
127, 129, 154, 155, 164, 166, 172, 182, |
6378
|
|
|
|
|
|
|
191, 200, 210, 216, 222, 229, 239, 240, |
6379
|
|
|
|
|
|
|
249, 260, 262, 273, 274, 283, 292, 301, |
6380
|
|
|
|
|
|
|
310, 319, 325, 336, 345, 351, 358, 367, |
6381
|
|
|
|
|
|
|
376, 385, 393, 402, 411, 420, 421, 428, |
6382
|
|
|
|
|
|
|
435, 438, 440, 442, 443, 445, 453, 462, |
6383
|
|
|
|
|
|
|
468, 475, 484, 493, 502, 510, 519, 526, |
6384
|
|
|
|
|
|
|
533, 542, 551, 560, 567 |
6385
|
|
|
|
|
|
|
}; |
6386
|
|
|
|
|
|
|
|
6387
|
|
|
|
|
|
|
static const unsigned char _VBG_indicies[] = { |
6388
|
|
|
|
|
|
|
0, 1, 2, 1, 3, 1, 4, 4, |
6389
|
|
|
|
|
|
|
4, 4, 4, 4, 1, 5, 5, 5, |
6390
|
|
|
|
|
|
|
5, 6, 1, 7, 7, 7, 7, 7, |
6391
|
|
|
|
|
|
|
1, 8, 8, 8, 8, 9, 1, 5, |
6392
|
|
|
|
|
|
|
5, 5, 5, 10, 1, 11, 11, 11, |
6393
|
|
|
|
|
|
|
11, 11, 1, 11, 12, 1, 11, 1, |
6394
|
|
|
|
|
|
|
13, 1, 11, 1, 14, 1, 11, 1, |
6395
|
|
|
|
|
|
|
11, 1, 5, 5, 5, 5, 6, 15, |
6396
|
|
|
|
|
|
|
1, 5, 5, 5, 5, 6, 16, 1, |
6397
|
|
|
|
|
|
|
4, 1, 17, 18, 1, 17, 1, 19, |
6398
|
|
|
|
|
|
|
1, 17, 1, 20, 1, 17, 1, 17, |
6399
|
|
|
|
|
|
|
1, 21, 22, 21, 23, 24, 1, 25, |
6400
|
|
|
|
|
|
|
26, 25, 27, 28, 29, 25, 30, 1, |
6401
|
|
|
|
|
|
|
31, 1, 31, 1, 32, 1, 31, 1, |
6402
|
|
|
|
|
|
|
31, 1, 33, 34, 1, 33, 1, 35, |
6403
|
|
|
|
|
|
|
1, 33, 1, 36, 1, 33, 1, 33, |
6404
|
|
|
|
|
|
|
1, 38, 39, 40, 41, 42, 43, 44, |
6405
|
|
|
|
|
|
|
45, 46, 47, 48, 49, 50, 51, 52, |
6406
|
|
|
|
|
|
|
53, 54, 55, 56, 57, 58, 59, 60, |
6407
|
|
|
|
|
|
|
37, 1, 1, 61, 62, 61, 61, 61, |
6408
|
|
|
|
|
|
|
61, 63, 63, 1, 64, 1, 65, 65, |
6409
|
|
|
|
|
|
|
65, 65, 65, 1, 67, 68, 67, 66, |
6410
|
|
|
|
|
|
|
66, 66, 67, 66, 67, 1, 69, 62, |
6411
|
|
|
|
|
|
|
69, 69, 69, 69, 63, 63, 1, 61, |
6412
|
|
|
|
|
|
|
61, 62, 61, 61, 61, 63, 63, 1, |
6413
|
|
|
|
|
|
|
66, 66, 68, 66, 70, 66, 66, 67, |
6414
|
|
|
|
|
|
|
67, 1, 71, 71, 71, 71, 71, 1, |
6415
|
|
|
|
|
|
|
72, 73, 74, 75, 76, 1, 72, 73, |
6416
|
|
|
|
|
|
|
74, 11, 75, 76, 1, 61, 61, 62, |
6417
|
|
|
|
|
|
|
61, 61, 77, 61, 63, 63, 1, 78, |
6418
|
|
|
|
|
|
|
61, 61, 61, 62, 61, 61, 63, 63, |
6419
|
|
|
|
|
|
|
1, 63, 79, 63, 61, 62, 61, 61, |
6420
|
|
|
|
|
|
|
63, 61, 63, 1, 7, 1, 61, 61, |
6421
|
|
|
|
|
|
|
61, 68, 61, 80, 61, 80, 67, 67, |
6422
|
|
|
|
|
|
|
1, 5, 61, 61, 61, 62, 61, 61, |
6423
|
|
|
|
|
|
|
63, 63, 1, 81, 81, 82, 62, 81, |
6424
|
|
|
|
|
|
|
81, 63, 63, 1, 81, 81, 81, 81, |
6425
|
|
|
|
|
|
|
62, 81, 63, 63, 1, 61, 61, 61, |
6426
|
|
|
|
|
|
|
61, 62, 61, 63, 63, 1, 61, 83, |
6427
|
|
|
|
|
|
|
61, 84, 62, 61, 63, 63, 1, 5, |
6428
|
|
|
|
|
|
|
5, 5, 5, 6, 1, 85, 86, 85, |
6429
|
|
|
|
|
|
|
5, 86, 5, 86, 6, 5, 85, 1, |
6430
|
|
|
|
|
|
|
87, 88, 89, 90, 91, 85, 85, 85, |
6431
|
|
|
|
|
|
|
1, 87, 92, 89, 93, 94, 1, 87, |
6432
|
|
|
|
|
|
|
92, 89, 17, 93, 94, 1, 87, 17, |
6433
|
|
|
|
|
|
|
88, 89, 90, 91, 85, 85, 1, 87, |
6434
|
|
|
|
|
|
|
20, 88, 89, 90, 91, 85, 85, 1, |
6435
|
|
|
|
|
|
|
95, 88, 89, 90, 91, 85, 85, 85, |
6436
|
|
|
|
|
|
|
1, 17, 87, 92, 89, 18, 93, 94, |
6437
|
|
|
|
|
|
|
1, 87, 97, 89, 98, 99, 96, 96, |
6438
|
|
|
|
|
|
|
96, 1, 66, 66, 66, 66, 100, 66, |
6439
|
|
|
|
|
|
|
67, 67, 1, 101, 102, 103, 61, 62, |
6440
|
|
|
|
|
|
|
61, 63, 63, 1, 104, 106, 106, 106, |
6441
|
|
|
|
|
|
|
106, 106, 106, 105, 107, 107, 107, 107, |
6442
|
|
|
|
|
|
|
107, 107, 1, 31, 108, 1, 31, 1, |
6443
|
|
|
|
|
|
|
109, 1, 105, 110, 104, 5, 5, 5, |
6444
|
|
|
|
|
|
|
112, 5, 6, 111, 1, 113, 114, 115, |
6445
|
|
|
|
|
|
|
116, 117, 111, 111, 111, 1, 113, 118, |
6446
|
|
|
|
|
|
|
115, 119, 120, 1, 113, 118, 115, 33, |
6447
|
|
|
|
|
|
|
119, 120, 1, 113, 33, 114, 115, 116, |
6448
|
|
|
|
|
|
|
117, 111, 111, 1, 113, 36, 114, 115, |
6449
|
|
|
|
|
|
|
116, 117, 111, 111, 1, 121, 114, 115, |
6450
|
|
|
|
|
|
|
116, 117, 111, 111, 111, 1, 33, 113, |
6451
|
|
|
|
|
|
|
118, 115, 34, 119, 120, 1, 113, 123, |
6452
|
|
|
|
|
|
|
115, 124, 125, 122, 122, 122, 1, 5, |
6453
|
|
|
|
|
|
|
5, 5, 5, 6, 111, 1, 4, 4, |
6454
|
|
|
|
|
|
|
4, 4, 4, 4, 1, 66, 66, 66, |
6455
|
|
|
|
|
|
|
66, 66, 68, 67, 67, 1, 81, 81, |
6456
|
|
|
|
|
|
|
81, 81, 81, 62, 63, 63, 1, 81, |
6457
|
|
|
|
|
|
|
81, 81, 81, 81, 62, 63, 63, 1, |
6458
|
|
|
|
|
|
|
126, 126, 126, 126, 126, 4, 1, 127, |
6459
|
|
|
|
|
|
|
127, 127, 127, 127, 129, 130, 128, 1, |
6460
|
|
|
|
|
|
|
0 |
6461
|
|
|
|
|
|
|
}; |
6462
|
|
|
|
|
|
|
|
6463
|
|
|
|
|
|
|
static const char _VBG_trans_targs[] = { |
6464
|
|
|
|
|
|
|
2, 0, 3, 41, 42, 42, 44, 42, |
6465
|
|
|
|
|
|
|
42, 44, 44, 51, 52, 13, 15, 42, |
6466
|
|
|
|
|
|
|
42, 68, 69, 23, 25, 77, 78, 83, |
6467
|
|
|
|
|
|
|
84, 42, 80, 29, 82, 31, 33, 42, |
6468
|
|
|
|
|
|
|
32, 87, 88, 37, 39, 4, 43, 46, |
6469
|
|
|
|
|
|
|
47, 48, 49, 53, 55, 56, 58, 60, |
6470
|
|
|
|
|
|
|
61, 19, 62, 63, 64, 75, 76, 95, |
6471
|
|
|
|
|
|
|
96, 97, 98, 99, 100, 5, 45, 42, |
6472
|
|
|
|
|
|
|
42, 6, 7, 42, 45, 8, 50, 9, |
6473
|
|
|
|
|
|
|
10, 11, 12, 14, 16, 54, 42, 57, |
6474
|
|
|
|
|
|
|
59, 17, 18, 65, 66, 67, 74, 20, |
6475
|
|
|
|
|
|
|
70, 22, 71, 72, 21, 24, 26, 73, |
6476
|
|
|
|
|
|
|
67, 70, 71, 72, 45, 27, 85, 94, |
6477
|
|
|
|
|
|
|
42, 42, 79, 28, 81, 30, 42, 86, |
6478
|
|
|
|
|
|
|
93, 34, 89, 36, 90, 91, 35, 38, |
6479
|
|
|
|
|
|
|
40, 92, 86, 89, 90, 91, 65, 65, |
6480
|
|
|
|
|
|
|
42, 42, 45 |
6481
|
|
|
|
|
|
|
}; |
6482
|
|
|
|
|
|
|
|
6483
|
|
|
|
|
|
|
static const char _VBG_trans_actions[] = { |
6484
|
|
|
|
|
|
|
0, 0, 0, 29, 23, 15, 15, 3, |
6485
|
|
|
|
|
|
|
46, 46, 40, 0, 0, 0, 0, 5, |
6486
|
|
|
|
|
|
|
34, 0, 0, 0, 0, 15, 15, 15, |
6487
|
|
|
|
|
|
|
15, 11, 11, 0, 11, 0, 0, 9, |
6488
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
6489
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
6490
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 21, |
6491
|
|
|
|
|
|
|
0, 0, 0, 23, 0, 0, 19, 19, |
6492
|
|
|
|
|
|
|
7, 0, 0, 49, 49, 0, 49, 0, |
6493
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 19, 17, 19, |
6494
|
|
|
|
|
|
|
49, 0, 0, 27, 27, 0, 0, 0, |
6495
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
6496
|
|
|
|
|
|
|
25, 25, 25, 25, 56, 0, 9, 9, |
6497
|
|
|
|
|
|
|
13, 43, 43, 0, 9, 0, 37, 0, |
6498
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
6499
|
|
|
|
|
|
|
0, 0, 7, 7, 7, 7, 23, 1, |
6500
|
|
|
|
|
|
|
31, 1, 52 |
6501
|
|
|
|
|
|
|
}; |
6502
|
|
|
|
|
|
|
|
6503
|
|
|
|
|
|
|
static const char _VBG_eof_actions[] = { |
6504
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
6505
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
6506
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
6507
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
6508
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
6509
|
|
|
|
|
|
|
0, 0, 0, 3, 0, 0, 3, 3, |
6510
|
|
|
|
|
|
|
3, 3, 0, 3, 3, 3, 0, 3, |
6511
|
|
|
|
|
|
|
3, 0, 3, 0, 3, 3, 3, 3, |
6512
|
|
|
|
|
|
|
3, 0, 0, 25, 25, 25, 25, 25, |
6513
|
|
|
|
|
|
|
25, 25, 25, 3, 3, 0, 0, 0, |
6514
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 7, 7, |
6515
|
|
|
|
|
|
|
7, 7, 7, 7, 7, 7, 0, 0, |
6516
|
|
|
|
|
|
|
3, 3, 3, 0, 3 |
6517
|
|
|
|
|
|
|
}; |
6518
|
|
|
|
|
|
|
|
6519
|
|
|
|
|
|
|
static const int VBG_start = 1; |
6520
|
|
|
|
|
|
|
|
6521
|
0
|
|
|
|
|
|
void english_morpho_guesser::add_VBG(const string& form, vector& lemmas) const { |
6522
|
|
|
|
|
|
|
const char* p = form.c_str(); int cs; |
6523
|
|
|
|
|
|
|
char best = 'z'; unsigned remove = 0; const char* append = nullptr; |
6524
|
|
|
|
|
|
|
|
6525
|
|
|
|
|
|
|
{ |
6526
|
|
|
|
|
|
|
cs = VBG_start; |
6527
|
|
|
|
|
|
|
} |
6528
|
|
|
|
|
|
|
|
6529
|
|
|
|
|
|
|
{ |
6530
|
|
|
|
|
|
|
int _klen; |
6531
|
|
|
|
|
|
|
unsigned int _trans; |
6532
|
|
|
|
|
|
|
const char *_acts; |
6533
|
|
|
|
|
|
|
unsigned int _nacts; |
6534
|
|
|
|
|
|
|
const char *_keys; |
6535
|
|
|
|
|
|
|
|
6536
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
6537
|
|
|
|
|
|
|
goto _test_eof; |
6538
|
|
|
|
|
|
|
if ( cs == 0 ) |
6539
|
|
|
|
|
|
|
goto _out; |
6540
|
|
|
|
|
|
|
_resume: |
6541
|
0
|
|
|
|
|
|
_keys = _VBG_trans_keys + _VBG_key_offsets[cs]; |
6542
|
0
|
|
|
|
|
|
_trans = _VBG_index_offsets[cs]; |
6543
|
|
|
|
|
|
|
|
6544
|
0
|
|
|
|
|
|
_klen = _VBG_single_lengths[cs]; |
6545
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
6546
|
|
|
|
|
|
|
const char *_lower = _keys; |
6547
|
|
|
|
|
|
|
const char *_mid; |
6548
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
6549
|
|
|
|
|
|
|
while (1) { |
6550
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
6551
|
|
|
|
|
|
|
break; |
6552
|
|
|
|
|
|
|
|
6553
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
6554
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid ) |
6555
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
6556
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid ) |
6557
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
6558
|
|
|
|
|
|
|
else { |
6559
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
6560
|
0
|
|
|
|
|
|
goto _match; |
6561
|
|
|
|
|
|
|
} |
6562
|
|
|
|
|
|
|
} |
6563
|
0
|
|
|
|
|
|
_keys += _klen; |
6564
|
0
|
|
|
|
|
|
_trans += _klen; |
6565
|
|
|
|
|
|
|
} |
6566
|
|
|
|
|
|
|
|
6567
|
0
|
|
|
|
|
|
_klen = _VBG_range_lengths[cs]; |
6568
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
6569
|
|
|
|
|
|
|
const char *_lower = _keys; |
6570
|
|
|
|
|
|
|
const char *_mid; |
6571
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
6572
|
|
|
|
|
|
|
while (1) { |
6573
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
6574
|
|
|
|
|
|
|
break; |
6575
|
|
|
|
|
|
|
|
6576
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
6577
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] ) |
6578
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
6579
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] ) |
6580
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
6581
|
|
|
|
|
|
|
else { |
6582
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
6583
|
0
|
|
|
|
|
|
goto _match; |
6584
|
|
|
|
|
|
|
} |
6585
|
|
|
|
|
|
|
} |
6586
|
0
|
|
|
|
|
|
_trans += _klen; |
6587
|
|
|
|
|
|
|
} |
6588
|
|
|
|
|
|
|
|
6589
|
|
|
|
|
|
|
_match: |
6590
|
0
|
|
|
|
|
|
_trans = _VBG_indicies[_trans]; |
6591
|
0
|
|
|
|
|
|
cs = _VBG_trans_targs[_trans]; |
6592
|
|
|
|
|
|
|
|
6593
|
0
|
0
|
|
|
|
|
if ( _VBG_trans_actions[_trans] == 0 ) |
6594
|
|
|
|
|
|
|
goto _again; |
6595
|
|
|
|
|
|
|
|
6596
|
0
|
|
|
|
|
|
_acts = _VBG_actions + _VBG_trans_actions[_trans]; |
6597
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
6598
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
6599
|
|
|
|
|
|
|
{ |
6600
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
6601
|
|
|
|
|
|
|
{ |
6602
|
|
|
|
|
|
|
case 0: |
6603
|
0
|
0
|
|
|
|
|
{ if (best > 'a') best = 'a', remove = 3, append = nullptr; } |
6604
|
|
|
|
|
|
|
break; |
6605
|
|
|
|
|
|
|
case 1: |
6606
|
0
|
0
|
|
|
|
|
{ if (best > 'b') best = 'b', remove = 3, append = "e"; } |
6607
|
|
|
|
|
|
|
break; |
6608
|
|
|
|
|
|
|
case 2: |
6609
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 3, append = nullptr; } |
6610
|
|
|
|
|
|
|
break; |
6611
|
|
|
|
|
|
|
case 3: |
6612
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 3, append = "e"; } |
6613
|
|
|
|
|
|
|
break; |
6614
|
|
|
|
|
|
|
case 4: |
6615
|
0
|
0
|
|
|
|
|
{ if (best > 'e') best = 'e', remove = 3, append = nullptr; } |
6616
|
|
|
|
|
|
|
break; |
6617
|
|
|
|
|
|
|
case 5: |
6618
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 3, append = "e"; } |
6619
|
|
|
|
|
|
|
break; |
6620
|
|
|
|
|
|
|
case 6: |
6621
|
0
|
0
|
|
|
|
|
{ if (best > 'g') best = 'g', remove = 3, append = nullptr; } |
6622
|
|
|
|
|
|
|
break; |
6623
|
|
|
|
|
|
|
case 7: |
6624
|
0
|
0
|
|
|
|
|
{ if (best > 'h') best = 'h', remove = 3, append = "e"; } |
6625
|
|
|
|
|
|
|
break; |
6626
|
|
|
|
|
|
|
case 8: |
6627
|
0
|
0
|
|
|
|
|
{ if (best > 'i') best = 'i', remove = 3, append = nullptr; } |
6628
|
|
|
|
|
|
|
break; |
6629
|
|
|
|
|
|
|
case 9: |
6630
|
0
|
0
|
|
|
|
|
{ if (best > 'j') best = 'j', remove = 3, append = "e"; } |
6631
|
|
|
|
|
|
|
break; |
6632
|
|
|
|
|
|
|
case 10: |
6633
|
0
|
0
|
|
|
|
|
{ if (best > 'k') best = 'k', remove = 3, append = nullptr; } |
6634
|
|
|
|
|
|
|
break; |
6635
|
|
|
|
|
|
|
case 11: |
6636
|
0
|
0
|
|
|
|
|
{ if (best > 'l') best = 'l', remove = 3, append = "e"; } |
6637
|
|
|
|
|
|
|
break; |
6638
|
|
|
|
|
|
|
case 12: |
6639
|
0
|
0
|
|
|
|
|
{ if (best > 'm') best = 'm', remove = 3, append = nullptr; } |
6640
|
|
|
|
|
|
|
break; |
6641
|
|
|
|
|
|
|
case 13: |
6642
|
0
|
0
|
|
|
|
|
{ if (best > 'n') best = 'n', remove = 3, append = "e"; } |
6643
|
|
|
|
|
|
|
break; |
6644
|
|
|
|
|
|
|
case 14: |
6645
|
0
|
0
|
|
|
|
|
{ if (best > 'o') best = 'o', remove = 3, append = nullptr; } |
6646
|
|
|
|
|
|
|
break; |
6647
|
|
|
|
|
|
|
case 15: |
6648
|
0
|
0
|
|
|
|
|
{ if (best > 'p') best = 'p', remove = 3, append = "e"; } |
6649
|
|
|
|
|
|
|
break; |
6650
|
|
|
|
|
|
|
case 16: |
6651
|
0
|
0
|
|
|
|
|
{ if (best > 'q') best = 'q', remove = 3, append = nullptr; } |
6652
|
|
|
|
|
|
|
break; |
6653
|
|
|
|
|
|
|
case 17: |
6654
|
0
|
0
|
|
|
|
|
{ if (best > 'r') best = 'r', remove = 3, append = "e"; } |
6655
|
|
|
|
|
|
|
break; |
6656
|
|
|
|
|
|
|
} |
6657
|
|
|
|
|
|
|
} |
6658
|
|
|
|
|
|
|
|
6659
|
|
|
|
|
|
|
_again: |
6660
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
6661
|
|
|
|
|
|
|
goto _out; |
6662
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form.c_str() + form.size())) ) |
6663
|
|
|
|
|
|
|
goto _resume; |
6664
|
|
|
|
|
|
|
_test_eof: {} |
6665
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
6666
|
|
|
|
|
|
|
{ |
6667
|
0
|
|
|
|
|
|
const char *__acts = _VBG_actions + _VBG_eof_actions[cs]; |
6668
|
0
|
|
|
|
|
|
unsigned int __nacts = (unsigned int) *__acts++; |
6669
|
0
|
0
|
|
|
|
|
while ( __nacts-- > 0 ) { |
6670
|
0
|
|
|
|
|
|
switch ( *__acts++ ) { |
6671
|
|
|
|
|
|
|
case 2: |
6672
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 3, append = nullptr; } |
6673
|
|
|
|
|
|
|
break; |
6674
|
|
|
|
|
|
|
case 5: |
6675
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 3, append = "e"; } |
6676
|
|
|
|
|
|
|
break; |
6677
|
|
|
|
|
|
|
case 15: |
6678
|
0
|
0
|
|
|
|
|
{ if (best > 'p') best = 'p', remove = 3, append = "e"; } |
6679
|
|
|
|
|
|
|
break; |
6680
|
|
|
|
|
|
|
} |
6681
|
|
|
|
|
|
|
} |
6682
|
|
|
|
|
|
|
} |
6683
|
|
|
|
|
|
|
|
6684
|
|
|
|
|
|
|
_out: {} |
6685
|
|
|
|
|
|
|
} |
6686
|
|
|
|
|
|
|
|
6687
|
0
|
0
|
|
|
|
|
add(VBG, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas); |
|
|
0
|
|
|
|
|
|
6688
|
0
|
|
|
|
|
|
} |
6689
|
|
|
|
|
|
|
|
6690
|
|
|
|
|
|
|
static const char _VBD_VBN_actions[] = { |
6691
|
|
|
|
|
|
|
0, 1, 0, 1, 2, 1, 3, 1, |
6692
|
|
|
|
|
|
|
4, 1, 5, 1, 6, 1, 7, 1, |
6693
|
|
|
|
|
|
|
8, 1, 9, 1, 10, 1, 11, 1, |
6694
|
|
|
|
|
|
|
13, 1, 14, 1, 15, 1, 16, 1, |
6695
|
|
|
|
|
|
|
17, 2, 1, 16, 2, 4, 5, 2, |
6696
|
|
|
|
|
|
|
8, 16, 2, 9, 13, 2, 9, 14, |
6697
|
|
|
|
|
|
|
2, 12, 13, 2, 13, 14, 2, 15, |
6698
|
|
|
|
|
|
|
16, 3, 1, 3, 16, 3, 3, 15, |
6699
|
|
|
|
|
|
|
16 |
6700
|
|
|
|
|
|
|
}; |
6701
|
|
|
|
|
|
|
|
6702
|
|
|
|
|
|
|
static const short _VBD_VBN_key_offsets[] = { |
6703
|
|
|
|
|
|
|
0, 0, 2, 3, 9, 14, 24, 29, |
6704
|
|
|
|
|
|
|
34, 44, 46, 47, 48, 49, 50, 51, |
6705
|
|
|
|
|
|
|
52, 60, 67, 74, 76, 77, 78, 79, |
6706
|
|
|
|
|
|
|
80, 81, 82, 87, 95, 96, 97, 98, |
6707
|
|
|
|
|
|
|
99, 100, 102, 103, 104, 105, 106, 107, |
6708
|
|
|
|
|
|
|
108, 114, 115, 140, 140, 149, 150, 155, |
6709
|
|
|
|
|
|
|
166, 175, 184, 194, 199, 204, 210, 220, |
6710
|
|
|
|
|
|
|
220, 229, 241, 242, 253, 253, 262, 271, |
6711
|
|
|
|
|
|
|
280, 289, 298, 303, 316, 327, 332, 338, |
6712
|
|
|
|
|
|
|
348, 358, 369, 376, 387, 396, 405, 405, |
6713
|
|
|
|
|
|
|
416, 427, 429, 430, 431, 431, 432, 440, |
6714
|
|
|
|
|
|
|
451, 456, 462, 472, 482, 493, 500, 511, |
6715
|
|
|
|
|
|
|
518, 524, 533, 542, 551 |
6716
|
|
|
|
|
|
|
}; |
6717
|
|
|
|
|
|
|
|
6718
|
|
|
|
|
|
|
static const char _VBD_VBN_trans_keys[] = { |
6719
|
|
|
|
|
|
|
100, 110, 101, 97, 101, 105, 111, 117, |
6720
|
|
|
|
|
|
|
121, 97, 101, 105, 111, 117, 98, 100, |
6721
|
|
|
|
|
|
|
102, 104, 106, 110, 112, 116, 118, 122, |
6722
|
|
|
|
|
|
|
97, 101, 105, 111, 117, 97, 101, 105, |
6723
|
|
|
|
|
|
|
111, 117, 98, 100, 102, 104, 106, 110, |
6724
|
|
|
|
|
|
|
112, 116, 118, 122, 98, 114, 105, 114, |
6725
|
|
|
|
|
|
|
112, 105, 109, 101, 97, 101, 105, 111, |
6726
|
|
|
|
|
|
|
117, 121, 98, 122, 97, 101, 105, 111, |
6727
|
|
|
|
|
|
|
117, 98, 122, 97, 101, 105, 111, 117, |
6728
|
|
|
|
|
|
|
98, 122, 98, 114, 105, 114, 112, 105, |
6729
|
|
|
|
|
|
|
109, 101, 97, 101, 105, 111, 117, 97, |
6730
|
|
|
|
|
|
|
101, 105, 110, 111, 115, 117, 120, 105, |
6731
|
|
|
|
|
|
|
112, 105, 109, 101, 98, 114, 105, 114, |
6732
|
|
|
|
|
|
|
112, 105, 109, 101, 97, 101, 105, 111, |
6733
|
|
|
|
|
|
|
117, 121, 101, 98, 99, 100, 102, 103, |
6734
|
|
|
|
|
|
|
104, 105, 106, 107, 108, 109, 110, 112, |
6735
|
|
|
|
|
|
|
113, 114, 115, 116, 117, 118, 119, 120, |
6736
|
|
|
|
|
|
|
121, 122, 97, 111, 97, 98, 101, 105, |
6737
|
|
|
|
|
|
|
111, 117, 122, 99, 120, 113, 97, 101, |
6738
|
|
|
|
|
|
|
105, 111, 117, 98, 99, 100, 105, 111, |
6739
|
|
|
|
|
|
|
117, 122, 97, 101, 102, 120, 97, 100, |
6740
|
|
|
|
|
|
|
101, 105, 111, 117, 122, 98, 120, 97, |
6741
|
|
|
|
|
|
|
101, 102, 105, 111, 117, 122, 98, 120, |
6742
|
|
|
|
|
|
|
97, 101, 103, 105, 110, 111, 117, 122, |
6743
|
|
|
|
|
|
|
98, 120, 97, 101, 105, 111, 117, 101, |
6744
|
|
|
|
|
|
|
110, 111, 115, 120, 101, 110, 111, 112, |
6745
|
|
|
|
|
|
|
115, 120, 97, 101, 104, 105, 111, 116, |
6746
|
|
|
|
|
|
|
117, 122, 98, 120, 97, 101, 105, 106, |
6747
|
|
|
|
|
|
|
111, 117, 122, 98, 120, 98, 99, 100, |
6748
|
|
|
|
|
|
|
105, 107, 111, 117, 122, 97, 101, 102, |
6749
|
|
|
|
|
|
|
120, 105, 97, 101, 105, 108, 111, 114, |
6750
|
|
|
|
|
|
|
117, 119, 122, 98, 120, 97, 101, 105, |
6751
|
|
|
|
|
|
|
109, 111, 117, 122, 98, 120, 97, 101, |
6752
|
|
|
|
|
|
|
105, 110, 111, 117, 122, 98, 120, 97, |
6753
|
|
|
|
|
|
|
101, 105, 111, 112, 117, 122, 98, 120, |
6754
|
|
|
|
|
|
|
97, 101, 105, 111, 113, 117, 122, 98, |
6755
|
|
|
|
|
|
|
120, 97, 101, 105, 111, 114, 117, 122, |
6756
|
|
|
|
|
|
|
98, 120, 97, 101, 105, 111, 117, 98, |
6757
|
|
|
|
|
|
|
99, 100, 105, 108, 110, 111, 116, 117, |
6758
|
|
|
|
|
|
|
97, 101, 102, 122, 101, 110, 111, 115, |
6759
|
|
|
|
|
|
|
120, 98, 104, 106, 116, 118, 122, 101, |
6760
|
|
|
|
|
|
|
110, 111, 115, 120, 101, 110, 111, 112, |
6761
|
|
|
|
|
|
|
115, 120, 101, 105, 110, 111, 115, 120, |
6762
|
|
|
|
|
|
|
98, 116, 118, 122, 101, 105, 110, 111, |
6763
|
|
|
|
|
|
|
115, 120, 98, 116, 118, 122, 101, 110, |
6764
|
|
|
|
|
|
|
111, 115, 120, 98, 104, 106, 116, 118, |
6765
|
|
|
|
|
|
|
122, 98, 101, 110, 111, 114, 115, 120, |
6766
|
|
|
|
|
|
|
101, 110, 111, 115, 120, 98, 104, 106, |
6767
|
|
|
|
|
|
|
116, 118, 122, 97, 101, 105, 111, 115, |
6768
|
|
|
|
|
|
|
117, 122, 98, 120, 97, 101, 105, 111, |
6769
|
|
|
|
|
|
|
116, 117, 122, 98, 120, 122, 98, 100, |
6770
|
|
|
|
|
|
|
102, 104, 106, 110, 112, 116, 118, 120, |
6771
|
|
|
|
|
|
|
122, 98, 100, 102, 104, 106, 110, 112, |
6772
|
|
|
|
|
|
|
116, 118, 120, 98, 114, 112, 114, 113, |
6773
|
|
|
|
|
|
|
97, 101, 105, 108, 111, 117, 98, 122, |
6774
|
|
|
|
|
|
|
101, 110, 111, 115, 120, 98, 104, 106, |
6775
|
|
|
|
|
|
|
116, 118, 122, 101, 110, 111, 115, 120, |
6776
|
|
|
|
|
|
|
101, 110, 111, 112, 115, 120, 101, 105, |
6777
|
|
|
|
|
|
|
110, 111, 115, 120, 98, 116, 118, 122, |
6778
|
|
|
|
|
|
|
101, 105, 110, 111, 115, 120, 98, 116, |
6779
|
|
|
|
|
|
|
118, 122, 101, 110, 111, 115, 120, 98, |
6780
|
|
|
|
|
|
|
104, 106, 116, 118, 122, 98, 101, 110, |
6781
|
|
|
|
|
|
|
111, 114, 115, 120, 101, 110, 111, 115, |
6782
|
|
|
|
|
|
|
120, 98, 104, 106, 116, 118, 122, 97, |
6783
|
|
|
|
|
|
|
101, 105, 111, 117, 98, 122, 97, 101, |
6784
|
|
|
|
|
|
|
105, 111, 117, 121, 97, 101, 105, 111, |
6785
|
|
|
|
|
|
|
117, 118, 122, 98, 120, 97, 101, 105, |
6786
|
|
|
|
|
|
|
111, 117, 119, 122, 98, 120, 97, 101, |
6787
|
|
|
|
|
|
|
105, 111, 117, 120, 122, 98, 119, 97, |
6788
|
|
|
|
|
|
|
101, 105, 111, 117, 121, 122, 98, 120, |
6789
|
|
|
|
|
|
|
0 |
6790
|
|
|
|
|
|
|
}; |
6791
|
|
|
|
|
|
|
|
6792
|
|
|
|
|
|
|
static const char _VBD_VBN_single_lengths[] = { |
6793
|
|
|
|
|
|
|
0, 2, 1, 6, 5, 0, 5, 5, |
6794
|
|
|
|
|
|
|
0, 2, 1, 1, 1, 1, 1, 1, |
6795
|
|
|
|
|
|
|
6, 5, 5, 2, 1, 1, 1, 1, |
6796
|
|
|
|
|
|
|
1, 1, 5, 8, 1, 1, 1, 1, |
6797
|
|
|
|
|
|
|
1, 2, 1, 1, 1, 1, 1, 1, |
6798
|
|
|
|
|
|
|
6, 1, 23, 0, 7, 1, 5, 7, |
6799
|
|
|
|
|
|
|
7, 7, 8, 5, 5, 6, 8, 0, |
6800
|
|
|
|
|
|
|
7, 8, 1, 9, 0, 7, 7, 7, |
6801
|
|
|
|
|
|
|
7, 7, 5, 9, 5, 5, 6, 6, |
6802
|
|
|
|
|
|
|
6, 5, 7, 5, 7, 7, 0, 1, |
6803
|
|
|
|
|
|
|
1, 2, 1, 1, 0, 1, 6, 5, |
6804
|
|
|
|
|
|
|
5, 6, 6, 6, 5, 7, 5, 5, |
6805
|
|
|
|
|
|
|
6, 7, 7, 7, 7 |
6806
|
|
|
|
|
|
|
}; |
6807
|
|
|
|
|
|
|
|
6808
|
|
|
|
|
|
|
static const char _VBD_VBN_range_lengths[] = { |
6809
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 5, 0, 0, |
6810
|
|
|
|
|
|
|
5, 0, 0, 0, 0, 0, 0, 0, |
6811
|
|
|
|
|
|
|
1, 1, 1, 0, 0, 0, 0, 0, |
6812
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
6813
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
6814
|
|
|
|
|
|
|
0, 0, 1, 0, 1, 0, 0, 2, |
6815
|
|
|
|
|
|
|
1, 1, 1, 0, 0, 0, 1, 0, |
6816
|
|
|
|
|
|
|
1, 2, 0, 1, 0, 1, 1, 1, |
6817
|
|
|
|
|
|
|
1, 1, 0, 2, 3, 0, 0, 2, |
6818
|
|
|
|
|
|
|
2, 3, 0, 3, 1, 1, 0, 5, |
6819
|
|
|
|
|
|
|
5, 0, 0, 0, 0, 0, 1, 3, |
6820
|
|
|
|
|
|
|
0, 0, 2, 2, 3, 0, 3, 1, |
6821
|
|
|
|
|
|
|
0, 1, 1, 1, 1 |
6822
|
|
|
|
|
|
|
}; |
6823
|
|
|
|
|
|
|
|
6824
|
|
|
|
|
|
|
static const short _VBD_VBN_index_offsets[] = { |
6825
|
|
|
|
|
|
|
0, 0, 3, 5, 12, 18, 24, 30, |
6826
|
|
|
|
|
|
|
36, 42, 45, 47, 49, 51, 53, 55, |
6827
|
|
|
|
|
|
|
57, 65, 72, 79, 82, 84, 86, 88, |
6828
|
|
|
|
|
|
|
90, 92, 94, 100, 109, 111, 113, 115, |
6829
|
|
|
|
|
|
|
117, 119, 122, 124, 126, 128, 130, 132, |
6830
|
|
|
|
|
|
|
134, 141, 143, 168, 169, 178, 180, 186, |
6831
|
|
|
|
|
|
|
196, 205, 214, 224, 230, 236, 243, 253, |
6832
|
|
|
|
|
|
|
254, 263, 274, 276, 287, 288, 297, 306, |
6833
|
|
|
|
|
|
|
315, 324, 333, 339, 351, 360, 366, 373, |
6834
|
|
|
|
|
|
|
382, 391, 400, 408, 417, 426, 435, 436, |
6835
|
|
|
|
|
|
|
443, 450, 453, 455, 457, 458, 460, 468, |
6836
|
|
|
|
|
|
|
477, 483, 490, 499, 508, 517, 525, 534, |
6837
|
|
|
|
|
|
|
541, 548, 557, 566, 575 |
6838
|
|
|
|
|
|
|
}; |
6839
|
|
|
|
|
|
|
|
6840
|
|
|
|
|
|
|
static const unsigned char _VBD_VBN_indicies[] = { |
6841
|
|
|
|
|
|
|
0, 2, 1, 3, 1, 4, 4, 4, |
6842
|
|
|
|
|
|
|
4, 4, 4, 1, 5, 5, 5, 5, |
6843
|
|
|
|
|
|
|
6, 1, 7, 7, 7, 7, 7, 1, |
6844
|
|
|
|
|
|
|
8, 8, 8, 8, 9, 1, 5, 5, |
6845
|
|
|
|
|
|
|
5, 5, 10, 1, 11, 11, 11, 11, |
6846
|
|
|
|
|
|
|
11, 1, 11, 12, 1, 11, 1, 13, |
6847
|
|
|
|
|
|
|
1, 11, 1, 14, 1, 11, 1, 11, |
6848
|
|
|
|
|
|
|
1, 4, 4, 4, 4, 4, 16, 15, |
6849
|
|
|
|
|
|
|
1, 5, 5, 5, 5, 6, 17, 1, |
6850
|
|
|
|
|
|
|
5, 5, 5, 5, 6, 18, 1, 19, |
6851
|
|
|
|
|
|
|
20, 1, 19, 1, 21, 1, 19, 1, |
6852
|
|
|
|
|
|
|
22, 1, 19, 1, 19, 1, 23, 24, |
6853
|
|
|
|
|
|
|
23, 25, 26, 1, 27, 28, 27, 29, |
6854
|
|
|
|
|
|
|
30, 31, 27, 32, 1, 33, 1, 33, |
6855
|
|
|
|
|
|
|
1, 34, 1, 33, 1, 33, 1, 35, |
6856
|
|
|
|
|
|
|
36, 1, 35, 1, 37, 1, 35, 1, |
6857
|
|
|
|
|
|
|
38, 1, 35, 1, 35, 1, 39, 39, |
6858
|
|
|
|
|
|
|
39, 39, 39, 4, 1, 40, 1, 42, |
6859
|
|
|
|
|
|
|
43, 44, 45, 46, 47, 48, 49, 50, |
6860
|
|
|
|
|
|
|
51, 52, 53, 54, 55, 56, 57, 58, |
6861
|
|
|
|
|
|
|
59, 60, 61, 62, 63, 64, 41, 1, |
6862
|
|
|
|
|
|
|
1, 65, 66, 65, 65, 65, 65, 4, |
6863
|
|
|
|
|
|
|
4, 1, 67, 1, 68, 68, 68, 68, |
6864
|
|
|
|
|
|
|
68, 1, 70, 71, 70, 69, 69, 69, |
6865
|
|
|
|
|
|
|
70, 69, 70, 1, 72, 66, 72, 72, |
6866
|
|
|
|
|
|
|
72, 72, 4, 4, 1, 65, 65, 66, |
6867
|
|
|
|
|
|
|
65, 65, 65, 4, 4, 1, 69, 69, |
6868
|
|
|
|
|
|
|
71, 69, 73, 69, 69, 70, 70, 1, |
6869
|
|
|
|
|
|
|
74, 74, 74, 74, 74, 1, 75, 76, |
6870
|
|
|
|
|
|
|
77, 78, 79, 1, 75, 76, 77, 11, |
6871
|
|
|
|
|
|
|
78, 79, 1, 65, 65, 66, 65, 65, |
6872
|
|
|
|
|
|
|
80, 65, 4, 4, 1, 81, 65, 65, |
6873
|
|
|
|
|
|
|
65, 66, 65, 65, 4, 4, 1, 4, |
6874
|
|
|
|
|
|
|
82, 4, 65, 66, 65, 65, 4, 65, |
6875
|
|
|
|
|
|
|
4, 1, 7, 1, 65, 65, 65, 71, |
6876
|
|
|
|
|
|
|
65, 83, 65, 83, 70, 70, 1, 5, |
6877
|
|
|
|
|
|
|
65, 65, 65, 66, 65, 65, 4, 4, |
6878
|
|
|
|
|
|
|
1, 84, 84, 85, 66, 84, 84, 4, |
6879
|
|
|
|
|
|
|
4, 1, 84, 84, 84, 84, 66, 84, |
6880
|
|
|
|
|
|
|
4, 4, 1, 65, 65, 65, 65, 66, |
6881
|
|
|
|
|
|
|
65, 4, 4, 1, 65, 86, 65, 87, |
6882
|
|
|
|
|
|
|
66, 65, 4, 4, 1, 5, 5, 5, |
6883
|
|
|
|
|
|
|
5, 6, 1, 88, 89, 88, 5, 89, |
6884
|
|
|
|
|
|
|
89, 5, 89, 6, 5, 88, 1, 90, |
6885
|
|
|
|
|
|
|
91, 92, 93, 94, 88, 88, 88, 1, |
6886
|
|
|
|
|
|
|
90, 95, 92, 96, 97, 1, 90, 95, |
6887
|
|
|
|
|
|
|
92, 19, 96, 97, 1, 90, 19, 91, |
6888
|
|
|
|
|
|
|
92, 93, 94, 88, 88, 1, 90, 22, |
6889
|
|
|
|
|
|
|
91, 92, 93, 94, 88, 88, 1, 98, |
6890
|
|
|
|
|
|
|
91, 92, 93, 94, 88, 88, 88, 1, |
6891
|
|
|
|
|
|
|
19, 90, 95, 92, 20, 96, 97, 1, |
6892
|
|
|
|
|
|
|
90, 100, 92, 101, 102, 99, 99, 99, |
6893
|
|
|
|
|
|
|
1, 69, 69, 69, 69, 103, 69, 70, |
6894
|
|
|
|
|
|
|
70, 1, 104, 105, 106, 65, 66, 65, |
6895
|
|
|
|
|
|
|
4, 4, 1, 107, 109, 109, 109, 109, |
6896
|
|
|
|
|
|
|
109, 109, 108, 110, 110, 110, 110, 110, |
6897
|
|
|
|
|
|
|
110, 1, 33, 111, 1, 33, 1, 112, |
6898
|
|
|
|
|
|
|
1, 108, 113, 107, 5, 5, 5, 115, |
6899
|
|
|
|
|
|
|
5, 6, 114, 1, 116, 117, 118, 119, |
6900
|
|
|
|
|
|
|
120, 114, 114, 114, 1, 116, 121, 118, |
6901
|
|
|
|
|
|
|
122, 123, 1, 116, 121, 118, 35, 122, |
6902
|
|
|
|
|
|
|
123, 1, 116, 35, 117, 118, 119, 120, |
6903
|
|
|
|
|
|
|
114, 114, 1, 116, 38, 117, 118, 119, |
6904
|
|
|
|
|
|
|
120, 114, 114, 1, 124, 117, 118, 119, |
6905
|
|
|
|
|
|
|
120, 114, 114, 114, 1, 35, 116, 121, |
6906
|
|
|
|
|
|
|
118, 36, 122, 123, 1, 116, 126, 118, |
6907
|
|
|
|
|
|
|
127, 128, 125, 125, 125, 1, 5, 5, |
6908
|
|
|
|
|
|
|
5, 5, 6, 114, 1, 4, 4, 4, |
6909
|
|
|
|
|
|
|
4, 4, 4, 1, 69, 69, 69, 69, |
6910
|
|
|
|
|
|
|
69, 71, 70, 70, 1, 84, 84, 84, |
6911
|
|
|
|
|
|
|
84, 84, 66, 4, 4, 1, 84, 84, |
6912
|
|
|
|
|
|
|
84, 84, 84, 66, 4, 4, 1, 129, |
6913
|
|
|
|
|
|
|
129, 129, 129, 129, 131, 132, 130, 1, |
6914
|
|
|
|
|
|
|
0 |
6915
|
|
|
|
|
|
|
}; |
6916
|
|
|
|
|
|
|
|
6917
|
|
|
|
|
|
|
static const char _VBD_VBN_trans_targs[] = { |
6918
|
|
|
|
|
|
|
2, 0, 41, 42, 43, 43, 45, 43, |
6919
|
|
|
|
|
|
|
43, 45, 45, 52, 53, 12, 14, 43, |
6920
|
|
|
|
|
|
|
43, 43, 43, 69, 70, 22, 24, 78, |
6921
|
|
|
|
|
|
|
79, 84, 85, 43, 81, 28, 83, 30, |
6922
|
|
|
|
|
|
|
32, 43, 31, 88, 89, 36, 38, 66, |
6923
|
|
|
|
|
|
|
43, 3, 44, 47, 48, 49, 50, 54, |
6924
|
|
|
|
|
|
|
16, 56, 57, 59, 61, 62, 63, 64, |
6925
|
|
|
|
|
|
|
65, 76, 77, 96, 97, 98, 99, 40, |
6926
|
|
|
|
|
|
|
100, 4, 46, 43, 5, 6, 43, 46, |
6927
|
|
|
|
|
|
|
7, 51, 8, 9, 10, 11, 13, 15, |
6928
|
|
|
|
|
|
|
55, 43, 58, 60, 17, 18, 66, 67, |
6929
|
|
|
|
|
|
|
68, 75, 19, 71, 21, 72, 73, 20, |
6930
|
|
|
|
|
|
|
23, 25, 74, 68, 71, 72, 73, 46, |
6931
|
|
|
|
|
|
|
26, 86, 95, 43, 43, 80, 27, 82, |
6932
|
|
|
|
|
|
|
29, 43, 87, 94, 33, 90, 35, 91, |
6933
|
|
|
|
|
|
|
92, 34, 37, 39, 93, 87, 90, 91, |
6934
|
|
|
|
|
|
|
92, 66, 43, 43, 46 |
6935
|
|
|
|
|
|
|
}; |
6936
|
|
|
|
|
|
|
|
6937
|
|
|
|
|
|
|
static const char _VBD_VBN_trans_actions[] = { |
6938
|
|
|
|
|
|
|
0, 0, 0, 31, 29, 25, 25, 5, |
6939
|
|
|
|
|
|
|
51, 51, 45, 0, 0, 0, 0, 15, |
6940
|
|
|
|
|
|
|
39, 9, 36, 0, 0, 0, 0, 25, |
6941
|
|
|
|
|
|
|
25, 25, 25, 21, 21, 0, 21, 0, |
6942
|
|
|
|
|
|
|
0, 19, 0, 0, 0, 0, 0, 29, |
6943
|
|
|
|
|
|
|
1, 0, 0, 0, 0, 0, 0, 0, |
6944
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
6945
|
|
|
|
|
|
|
0, 0, 0, 27, 0, 0, 0, 0, |
6946
|
|
|
|
|
|
|
0, 0, 29, 17, 0, 0, 54, 54, |
6947
|
|
|
|
|
|
|
0, 54, 0, 0, 0, 0, 0, 0, |
6948
|
|
|
|
|
|
|
29, 27, 29, 54, 0, 0, 13, 13, |
6949
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
6950
|
|
|
|
|
|
|
0, 0, 0, 7, 7, 7, 7, 61, |
6951
|
|
|
|
|
|
|
0, 19, 19, 23, 48, 48, 0, 19, |
6952
|
|
|
|
|
|
|
0, 42, 0, 0, 0, 0, 0, 0, |
6953
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 17, 17, 17, |
6954
|
|
|
|
|
|
|
17, 3, 33, 3, 57 |
6955
|
|
|
|
|
|
|
}; |
6956
|
|
|
|
|
|
|
|
6957
|
|
|
|
|
|
|
static const char _VBD_VBN_eof_actions[] = { |
6958
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
6959
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
6960
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
6961
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
6962
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
6963
|
|
|
|
|
|
|
0, 0, 0, 0, 5, 0, 0, 5, |
6964
|
|
|
|
|
|
|
5, 5, 5, 0, 5, 5, 5, 0, |
6965
|
|
|
|
|
|
|
5, 5, 0, 5, 0, 5, 5, 5, |
6966
|
|
|
|
|
|
|
5, 5, 0, 0, 11, 11, 11, 11, |
6967
|
|
|
|
|
|
|
11, 11, 11, 11, 5, 5, 0, 0, |
6968
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 17, |
6969
|
|
|
|
|
|
|
17, 17, 17, 17, 17, 17, 17, 0, |
6970
|
|
|
|
|
|
|
0, 5, 5, 5, 5 |
6971
|
|
|
|
|
|
|
}; |
6972
|
|
|
|
|
|
|
|
6973
|
|
|
|
|
|
|
static const int VBD_VBN_start = 1; |
6974
|
|
|
|
|
|
|
|
6975
|
0
|
|
|
|
|
|
void english_morpho_guesser::add_VBD_VBN(const string& form, vector& lemmas) const { |
6976
|
|
|
|
|
|
|
const char* p = form.c_str(); int cs; |
6977
|
|
|
|
|
|
|
char best = 'z'; unsigned remove = 0; const char* append = nullptr; |
6978
|
|
|
|
|
|
|
|
6979
|
|
|
|
|
|
|
{ |
6980
|
|
|
|
|
|
|
cs = VBD_VBN_start; |
6981
|
|
|
|
|
|
|
} |
6982
|
|
|
|
|
|
|
|
6983
|
|
|
|
|
|
|
{ |
6984
|
|
|
|
|
|
|
int _klen; |
6985
|
|
|
|
|
|
|
unsigned int _trans; |
6986
|
|
|
|
|
|
|
const char *_acts; |
6987
|
|
|
|
|
|
|
unsigned int _nacts; |
6988
|
|
|
|
|
|
|
const char *_keys; |
6989
|
|
|
|
|
|
|
|
6990
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
6991
|
|
|
|
|
|
|
goto _test_eof; |
6992
|
|
|
|
|
|
|
if ( cs == 0 ) |
6993
|
|
|
|
|
|
|
goto _out; |
6994
|
|
|
|
|
|
|
_resume: |
6995
|
0
|
|
|
|
|
|
_keys = _VBD_VBN_trans_keys + _VBD_VBN_key_offsets[cs]; |
6996
|
0
|
|
|
|
|
|
_trans = _VBD_VBN_index_offsets[cs]; |
6997
|
|
|
|
|
|
|
|
6998
|
0
|
|
|
|
|
|
_klen = _VBD_VBN_single_lengths[cs]; |
6999
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
7000
|
|
|
|
|
|
|
const char *_lower = _keys; |
7001
|
|
|
|
|
|
|
const char *_mid; |
7002
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
7003
|
|
|
|
|
|
|
while (1) { |
7004
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
7005
|
|
|
|
|
|
|
break; |
7006
|
|
|
|
|
|
|
|
7007
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
7008
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid ) |
7009
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
7010
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid ) |
7011
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
7012
|
|
|
|
|
|
|
else { |
7013
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
7014
|
0
|
|
|
|
|
|
goto _match; |
7015
|
|
|
|
|
|
|
} |
7016
|
|
|
|
|
|
|
} |
7017
|
0
|
|
|
|
|
|
_keys += _klen; |
7018
|
0
|
|
|
|
|
|
_trans += _klen; |
7019
|
|
|
|
|
|
|
} |
7020
|
|
|
|
|
|
|
|
7021
|
0
|
|
|
|
|
|
_klen = _VBD_VBN_range_lengths[cs]; |
7022
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
7023
|
|
|
|
|
|
|
const char *_lower = _keys; |
7024
|
|
|
|
|
|
|
const char *_mid; |
7025
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
7026
|
|
|
|
|
|
|
while (1) { |
7027
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
7028
|
|
|
|
|
|
|
break; |
7029
|
|
|
|
|
|
|
|
7030
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
7031
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] ) |
7032
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
7033
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] ) |
7034
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
7035
|
|
|
|
|
|
|
else { |
7036
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
7037
|
0
|
|
|
|
|
|
goto _match; |
7038
|
|
|
|
|
|
|
} |
7039
|
|
|
|
|
|
|
} |
7040
|
0
|
|
|
|
|
|
_trans += _klen; |
7041
|
|
|
|
|
|
|
} |
7042
|
|
|
|
|
|
|
|
7043
|
|
|
|
|
|
|
_match: |
7044
|
0
|
|
|
|
|
|
_trans = _VBD_VBN_indicies[_trans]; |
7045
|
0
|
|
|
|
|
|
cs = _VBD_VBN_trans_targs[_trans]; |
7046
|
|
|
|
|
|
|
|
7047
|
0
|
0
|
|
|
|
|
if ( _VBD_VBN_trans_actions[_trans] == 0 ) |
7048
|
|
|
|
|
|
|
goto _again; |
7049
|
|
|
|
|
|
|
|
7050
|
0
|
|
|
|
|
|
_acts = _VBD_VBN_actions + _VBD_VBN_trans_actions[_trans]; |
7051
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
7052
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
7053
|
|
|
|
|
|
|
{ |
7054
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
7055
|
|
|
|
|
|
|
{ |
7056
|
|
|
|
|
|
|
case 0: |
7057
|
0
|
0
|
|
|
|
|
{ if (best > 'a') best = 'a', remove = 1, append = nullptr; } |
7058
|
|
|
|
|
|
|
break; |
7059
|
|
|
|
|
|
|
case 1: |
7060
|
0
|
0
|
|
|
|
|
{ if (best > 'b') best = 'b', remove = 2, append = nullptr; } |
7061
|
|
|
|
|
|
|
break; |
7062
|
|
|
|
|
|
|
case 2: |
7063
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 1, append = nullptr; } |
7064
|
|
|
|
|
|
|
break; |
7065
|
|
|
|
|
|
|
case 3: |
7066
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 2, append = nullptr; } |
7067
|
|
|
|
|
|
|
break; |
7068
|
|
|
|
|
|
|
case 4: |
7069
|
0
|
0
|
|
|
|
|
{ if (best > 'e') best = 'e', remove = 1, append = nullptr; } |
7070
|
|
|
|
|
|
|
break; |
7071
|
|
|
|
|
|
|
case 5: |
7072
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 2, append = nullptr; } |
7073
|
|
|
|
|
|
|
break; |
7074
|
|
|
|
|
|
|
case 7: |
7075
|
0
|
0
|
|
|
|
|
{ if (best > 'h') best = 'h', remove = 2, append = nullptr; } |
7076
|
|
|
|
|
|
|
break; |
7077
|
|
|
|
|
|
|
case 8: |
7078
|
0
|
0
|
|
|
|
|
{ if (best > 'i') best = 'i', remove = 3, append = "y"; } |
7079
|
|
|
|
|
|
|
break; |
7080
|
|
|
|
|
|
|
case 9: |
7081
|
0
|
0
|
|
|
|
|
{ if (best > 'j') best = 'j', remove = 1, append = nullptr; } |
7082
|
|
|
|
|
|
|
break; |
7083
|
|
|
|
|
|
|
case 10: |
7084
|
0
|
0
|
|
|
|
|
{ if (best > 'k') best = 'k', remove = 2, append = nullptr; } |
7085
|
|
|
|
|
|
|
break; |
7086
|
|
|
|
|
|
|
case 11: |
7087
|
0
|
0
|
|
|
|
|
{ if (best > 'l') best = 'l', remove = 1, append = nullptr; } |
7088
|
|
|
|
|
|
|
break; |
7089
|
|
|
|
|
|
|
case 12: |
7090
|
0
|
0
|
|
|
|
|
{ if (best > 'm') best = 'm', remove = 2, append = nullptr; } |
7091
|
|
|
|
|
|
|
break; |
7092
|
|
|
|
|
|
|
case 13: |
7093
|
0
|
0
|
|
|
|
|
{ if (best > 'n') best = 'n', remove = 1, append = nullptr; } |
7094
|
|
|
|
|
|
|
break; |
7095
|
|
|
|
|
|
|
case 14: |
7096
|
0
|
0
|
|
|
|
|
{ if (best > 'o') best = 'o', remove = 2, append = nullptr; } |
7097
|
|
|
|
|
|
|
break; |
7098
|
|
|
|
|
|
|
case 15: |
7099
|
0
|
0
|
|
|
|
|
{ if (best > 'p') best = 'p', remove = 1, append = nullptr; } |
7100
|
|
|
|
|
|
|
break; |
7101
|
|
|
|
|
|
|
case 16: |
7102
|
0
|
0
|
|
|
|
|
{ if (best > 'q') best = 'q', remove = 2, append = nullptr; } |
7103
|
|
|
|
|
|
|
break; |
7104
|
|
|
|
|
|
|
case 17: |
7105
|
0
|
0
|
|
|
|
|
{ if (best > 'r') best = 'r', remove = 1, append = nullptr; } |
7106
|
|
|
|
|
|
|
break; |
7107
|
|
|
|
|
|
|
} |
7108
|
|
|
|
|
|
|
} |
7109
|
|
|
|
|
|
|
|
7110
|
|
|
|
|
|
|
_again: |
7111
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
7112
|
|
|
|
|
|
|
goto _out; |
7113
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form.c_str() + form.size())) ) |
7114
|
|
|
|
|
|
|
goto _resume; |
7115
|
|
|
|
|
|
|
_test_eof: {} |
7116
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
7117
|
|
|
|
|
|
|
{ |
7118
|
0
|
|
|
|
|
|
const char *__acts = _VBD_VBN_actions + _VBD_VBN_eof_actions[cs]; |
7119
|
0
|
|
|
|
|
|
unsigned int __nacts = (unsigned int) *__acts++; |
7120
|
0
|
0
|
|
|
|
|
while ( __nacts-- > 0 ) { |
7121
|
0
|
|
|
|
|
|
switch ( *__acts++ ) { |
7122
|
|
|
|
|
|
|
case 3: |
7123
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 2, append = nullptr; } |
7124
|
|
|
|
|
|
|
break; |
7125
|
|
|
|
|
|
|
case 6: |
7126
|
0
|
0
|
|
|
|
|
{ if (best > 'g') best = 'g', remove = 1, append = nullptr; } |
7127
|
|
|
|
|
|
|
break; |
7128
|
|
|
|
|
|
|
case 9: |
7129
|
0
|
0
|
|
|
|
|
{ if (best > 'j') best = 'j', remove = 1, append = nullptr; } |
7130
|
|
|
|
|
|
|
break; |
7131
|
|
|
|
|
|
|
} |
7132
|
|
|
|
|
|
|
} |
7133
|
|
|
|
|
|
|
} |
7134
|
|
|
|
|
|
|
|
7135
|
|
|
|
|
|
|
_out: {} |
7136
|
|
|
|
|
|
|
} |
7137
|
|
|
|
|
|
|
|
7138
|
0
|
0
|
|
|
|
|
add(VBD, VBN, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas); |
|
|
0
|
|
|
|
|
|
7139
|
0
|
|
|
|
|
|
} |
7140
|
|
|
|
|
|
|
|
7141
|
|
|
|
|
|
|
static const char _VBZ_actions[] = { |
7142
|
|
|
|
|
|
|
0, 1, 0, 1, 1, 1, 2, 1, |
7143
|
|
|
|
|
|
|
3, 1, 4, 1, 5, 1, 6, 1, |
7144
|
|
|
|
|
|
|
7, 1, 8 |
7145
|
|
|
|
|
|
|
}; |
7146
|
|
|
|
|
|
|
|
7147
|
|
|
|
|
|
|
static const char _VBZ_key_offsets[] = { |
7148
|
|
|
|
|
|
|
0, 0, 1, 2, 4, 14, 14, 25, |
7149
|
|
|
|
|
|
|
26, 31, 31, 31, 31, 37, 45, 54 |
7150
|
|
|
|
|
|
|
}; |
7151
|
|
|
|
|
|
|
|
7152
|
|
|
|
|
|
|
static const char _VBZ_trans_keys[] = { |
7153
|
|
|
|
|
|
|
115, 101, 99, 115, 98, 100, 102, 104, |
7154
|
|
|
|
|
|
|
106, 110, 112, 116, 118, 122, 122, 98, |
7155
|
|
|
|
|
|
|
100, 102, 104, 106, 110, 112, 116, 118, |
7156
|
|
|
|
|
|
|
120, 111, 97, 101, 105, 111, 117, 104, |
7157
|
|
|
|
|
|
|
105, 111, 115, 120, 122, 97, 101, 105, |
7158
|
|
|
|
|
|
|
110, 111, 114, 115, 117, 97, 101, 105, |
7159
|
|
|
|
|
|
|
111, 117, 121, 122, 98, 120, 0 |
7160
|
|
|
|
|
|
|
}; |
7161
|
|
|
|
|
|
|
|
7162
|
|
|
|
|
|
|
static const char _VBZ_single_lengths[] = { |
7163
|
|
|
|
|
|
|
0, 1, 1, 2, 0, 0, 1, 1, |
7164
|
|
|
|
|
|
|
5, 0, 0, 0, 6, 8, 7, 0 |
7165
|
|
|
|
|
|
|
}; |
7166
|
|
|
|
|
|
|
|
7167
|
|
|
|
|
|
|
static const char _VBZ_range_lengths[] = { |
7168
|
|
|
|
|
|
|
0, 0, 0, 0, 5, 0, 5, 0, |
7169
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 1, 0 |
7170
|
|
|
|
|
|
|
}; |
7171
|
|
|
|
|
|
|
|
7172
|
|
|
|
|
|
|
static const char _VBZ_index_offsets[] = { |
7173
|
|
|
|
|
|
|
0, 0, 2, 4, 7, 13, 14, 21, |
7174
|
|
|
|
|
|
|
23, 29, 30, 31, 32, 39, 48, 57 |
7175
|
|
|
|
|
|
|
}; |
7176
|
|
|
|
|
|
|
|
7177
|
|
|
|
|
|
|
static const char _VBZ_indicies[] = { |
7178
|
|
|
|
|
|
|
0, 1, 3, 2, 4, 4, 1, 5, |
7179
|
|
|
|
|
|
|
5, 5, 5, 5, 1, 6, 7, 7, |
7180
|
|
|
|
|
|
|
7, 7, 7, 7, 1, 8, 1, 9, |
7181
|
|
|
|
|
|
|
9, 9, 9, 9, 1, 8, 10, 1, |
7182
|
|
|
|
|
|
|
11, 12, 13, 14, 4, 15, 1, 16, |
7183
|
|
|
|
|
|
|
16, 16, 17, 16, 18, 19, 16, 1, |
7184
|
|
|
|
|
|
|
20, 20, 20, 20, 20, 20, 22, 21, |
7185
|
|
|
|
|
|
|
1, 10, 0 |
7186
|
|
|
|
|
|
|
}; |
7187
|
|
|
|
|
|
|
|
7188
|
|
|
|
|
|
|
static const char _VBZ_trans_targs[] = { |
7189
|
|
|
|
|
|
|
2, 0, 11, 12, 11, 5, 11, 11, |
7190
|
|
|
|
|
|
|
11, 9, 11, 3, 4, 6, 13, 14, |
7191
|
|
|
|
|
|
|
11, 7, 8, 11, 11, 10, 15 |
7192
|
|
|
|
|
|
|
}; |
7193
|
|
|
|
|
|
|
|
7194
|
|
|
|
|
|
|
static const char _VBZ_trans_actions[] = { |
7195
|
|
|
|
|
|
|
0, 0, 17, 17, 11, 0, 13, 15, |
7196
|
|
|
|
|
|
|
9, 0, 3, 0, 0, 0, 11, 11, |
7197
|
|
|
|
|
|
|
1, 0, 0, 7, 5, 0, 7 |
7198
|
|
|
|
|
|
|
}; |
7199
|
|
|
|
|
|
|
|
7200
|
|
|
|
|
|
|
static const int VBZ_start = 1; |
7201
|
|
|
|
|
|
|
|
7202
|
0
|
|
|
|
|
|
void english_morpho_guesser::add_VBZ(const string& form, vector& lemmas) const { |
7203
|
|
|
|
|
|
|
const char* p = form.c_str(); int cs; |
7204
|
|
|
|
|
|
|
char best = 'z'; unsigned remove = 0; const char* append = nullptr; |
7205
|
|
|
|
|
|
|
|
7206
|
|
|
|
|
|
|
{ |
7207
|
|
|
|
|
|
|
cs = VBZ_start; |
7208
|
|
|
|
|
|
|
} |
7209
|
|
|
|
|
|
|
|
7210
|
|
|
|
|
|
|
{ |
7211
|
|
|
|
|
|
|
int _klen; |
7212
|
|
|
|
|
|
|
unsigned int _trans; |
7213
|
|
|
|
|
|
|
const char *_acts; |
7214
|
|
|
|
|
|
|
unsigned int _nacts; |
7215
|
|
|
|
|
|
|
const char *_keys; |
7216
|
|
|
|
|
|
|
|
7217
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
7218
|
|
|
|
|
|
|
goto _test_eof; |
7219
|
|
|
|
|
|
|
if ( cs == 0 ) |
7220
|
|
|
|
|
|
|
goto _out; |
7221
|
|
|
|
|
|
|
_resume: |
7222
|
0
|
|
|
|
|
|
_keys = _VBZ_trans_keys + _VBZ_key_offsets[cs]; |
7223
|
0
|
|
|
|
|
|
_trans = _VBZ_index_offsets[cs]; |
7224
|
|
|
|
|
|
|
|
7225
|
0
|
|
|
|
|
|
_klen = _VBZ_single_lengths[cs]; |
7226
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
7227
|
|
|
|
|
|
|
const char *_lower = _keys; |
7228
|
|
|
|
|
|
|
const char *_mid; |
7229
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
7230
|
|
|
|
|
|
|
while (1) { |
7231
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
7232
|
|
|
|
|
|
|
break; |
7233
|
|
|
|
|
|
|
|
7234
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
7235
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid ) |
7236
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
7237
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid ) |
7238
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
7239
|
|
|
|
|
|
|
else { |
7240
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
7241
|
0
|
|
|
|
|
|
goto _match; |
7242
|
|
|
|
|
|
|
} |
7243
|
|
|
|
|
|
|
} |
7244
|
0
|
|
|
|
|
|
_keys += _klen; |
7245
|
0
|
|
|
|
|
|
_trans += _klen; |
7246
|
|
|
|
|
|
|
} |
7247
|
|
|
|
|
|
|
|
7248
|
0
|
|
|
|
|
|
_klen = _VBZ_range_lengths[cs]; |
7249
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
7250
|
|
|
|
|
|
|
const char *_lower = _keys; |
7251
|
|
|
|
|
|
|
const char *_mid; |
7252
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
7253
|
|
|
|
|
|
|
while (1) { |
7254
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
7255
|
|
|
|
|
|
|
break; |
7256
|
|
|
|
|
|
|
|
7257
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
7258
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] ) |
7259
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
7260
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] ) |
7261
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
7262
|
|
|
|
|
|
|
else { |
7263
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
7264
|
0
|
|
|
|
|
|
goto _match; |
7265
|
|
|
|
|
|
|
} |
7266
|
|
|
|
|
|
|
} |
7267
|
0
|
|
|
|
|
|
_trans += _klen; |
7268
|
|
|
|
|
|
|
} |
7269
|
|
|
|
|
|
|
|
7270
|
|
|
|
|
|
|
_match: |
7271
|
0
|
|
|
|
|
|
_trans = _VBZ_indicies[_trans]; |
7272
|
0
|
|
|
|
|
|
cs = _VBZ_trans_targs[_trans]; |
7273
|
|
|
|
|
|
|
|
7274
|
0
|
0
|
|
|
|
|
if ( _VBZ_trans_actions[_trans] == 0 ) |
7275
|
|
|
|
|
|
|
goto _again; |
7276
|
|
|
|
|
|
|
|
7277
|
0
|
|
|
|
|
|
_acts = _VBZ_actions + _VBZ_trans_actions[_trans]; |
7278
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
7279
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
7280
|
|
|
|
|
|
|
{ |
7281
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
7282
|
|
|
|
|
|
|
{ |
7283
|
|
|
|
|
|
|
case 0: |
7284
|
0
|
0
|
|
|
|
|
{ if (best > 'a') best = 'a', remove = 1, append = nullptr; } |
7285
|
|
|
|
|
|
|
break; |
7286
|
|
|
|
|
|
|
case 1: |
7287
|
0
|
0
|
|
|
|
|
{ if (best > 'b') best = 'b', remove = 2, append = nullptr; } |
7288
|
|
|
|
|
|
|
break; |
7289
|
|
|
|
|
|
|
case 2: |
7290
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 1, append = nullptr; } |
7291
|
|
|
|
|
|
|
break; |
7292
|
|
|
|
|
|
|
case 3: |
7293
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 2, append = nullptr; } |
7294
|
|
|
|
|
|
|
break; |
7295
|
|
|
|
|
|
|
case 4: |
7296
|
0
|
0
|
|
|
|
|
{ if (best > 'e') best = 'e', remove = 1, append = nullptr; } |
7297
|
|
|
|
|
|
|
break; |
7298
|
|
|
|
|
|
|
case 5: |
7299
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 2, append = nullptr; } |
7300
|
|
|
|
|
|
|
break; |
7301
|
|
|
|
|
|
|
case 6: |
7302
|
0
|
0
|
|
|
|
|
{ if (best > 'g') best = 'g', remove = 3, append = "y"; } |
7303
|
|
|
|
|
|
|
break; |
7304
|
|
|
|
|
|
|
case 7: |
7305
|
0
|
0
|
|
|
|
|
{ if (best > 'h') best = 'h', remove = 2, append = nullptr; } |
7306
|
|
|
|
|
|
|
break; |
7307
|
|
|
|
|
|
|
case 8: |
7308
|
0
|
0
|
|
|
|
|
{ if (best > 'i') best = 'i', remove = 1, append = nullptr; } |
7309
|
|
|
|
|
|
|
break; |
7310
|
|
|
|
|
|
|
} |
7311
|
|
|
|
|
|
|
} |
7312
|
|
|
|
|
|
|
|
7313
|
|
|
|
|
|
|
_again: |
7314
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
7315
|
|
|
|
|
|
|
goto _out; |
7316
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form.c_str() + form.size())) ) |
7317
|
|
|
|
|
|
|
goto _resume; |
7318
|
|
|
|
|
|
|
_test_eof: {} |
7319
|
|
|
|
|
|
|
_out: {} |
7320
|
|
|
|
|
|
|
} |
7321
|
|
|
|
|
|
|
|
7322
|
0
|
0
|
|
|
|
|
add(VBZ, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas); |
|
|
0
|
|
|
|
|
|
7323
|
0
|
|
|
|
|
|
} |
7324
|
|
|
|
|
|
|
|
7325
|
|
|
|
|
|
|
static const char _JJR_RBR_actions[] = { |
7326
|
|
|
|
|
|
|
0, 1, 0, 1, 1, 1, 3, 1, |
7327
|
|
|
|
|
|
|
4, 1, 5, 2, 1, 4, 2, 2, |
7328
|
|
|
|
|
|
|
5, 2, 4, 5 |
7329
|
|
|
|
|
|
|
}; |
7330
|
|
|
|
|
|
|
|
7331
|
|
|
|
|
|
|
static const unsigned char _JJR_RBR_key_offsets[] = { |
7332
|
|
|
|
|
|
|
0, 0, 1, 2, 26, 26, 32, 37, |
7333
|
|
|
|
|
|
|
50, 56, 62, 73, 79, 85, 91, 102, |
7334
|
|
|
|
|
|
|
103, 109, 115, 117, 123, 129, 135, 146, |
7335
|
|
|
|
|
|
|
152, 163, 169, 175, 181 |
7336
|
|
|
|
|
|
|
}; |
7337
|
|
|
|
|
|
|
|
7338
|
|
|
|
|
|
|
static const char _JJR_RBR_trans_keys[] = { |
7339
|
|
|
|
|
|
|
114, 101, 98, 99, 100, 101, 102, 103, |
7340
|
|
|
|
|
|
|
104, 105, 106, 107, 108, 109, 110, 112, |
7341
|
|
|
|
|
|
|
113, 114, 115, 116, 117, 118, 119, 120, |
7342
|
|
|
|
|
|
|
121, 122, 97, 98, 101, 105, 111, 117, |
7343
|
|
|
|
|
|
|
97, 101, 105, 111, 117, 98, 99, 100, |
7344
|
|
|
|
|
|
|
105, 111, 117, 122, 97, 101, 102, 109, |
7345
|
|
|
|
|
|
|
112, 120, 97, 100, 101, 105, 111, 117, |
7346
|
|
|
|
|
|
|
97, 101, 102, 105, 111, 117, 97, 101, |
7347
|
|
|
|
|
|
|
103, 105, 111, 117, 122, 98, 109, 112, |
7348
|
|
|
|
|
|
|
120, 97, 101, 104, 105, 111, 117, 97, |
7349
|
|
|
|
|
|
|
101, 105, 106, 111, 117, 97, 101, 105, |
7350
|
|
|
|
|
|
|
107, 111, 117, 97, 101, 105, 108, 111, |
7351
|
|
|
|
|
|
|
117, 122, 98, 109, 112, 120, 101, 97, |
7352
|
|
|
|
|
|
|
101, 105, 109, 111, 117, 97, 101, 105, |
7353
|
|
|
|
|
|
|
110, 111, 117, 97, 122, 97, 101, 105, |
7354
|
|
|
|
|
|
|
111, 112, 117, 97, 101, 105, 111, 113, |
7355
|
|
|
|
|
|
|
117, 97, 101, 105, 111, 114, 117, 97, |
7356
|
|
|
|
|
|
|
101, 105, 111, 115, 117, 122, 98, 109, |
7357
|
|
|
|
|
|
|
112, 120, 97, 101, 105, 111, 116, 117, |
7358
|
|
|
|
|
|
|
97, 101, 105, 111, 117, 118, 122, 98, |
7359
|
|
|
|
|
|
|
109, 112, 120, 97, 101, 105, 111, 117, |
7360
|
|
|
|
|
|
|
119, 97, 101, 105, 111, 117, 120, 97, |
7361
|
|
|
|
|
|
|
101, 105, 111, 117, 121, 97, 101, 105, |
7362
|
|
|
|
|
|
|
111, 117, 122, 0 |
7363
|
|
|
|
|
|
|
}; |
7364
|
|
|
|
|
|
|
|
7365
|
|
|
|
|
|
|
static const char _JJR_RBR_single_lengths[] = { |
7366
|
|
|
|
|
|
|
0, 1, 1, 24, 0, 6, 5, 7, |
7367
|
|
|
|
|
|
|
6, 6, 7, 6, 6, 6, 7, 1, |
7368
|
|
|
|
|
|
|
6, 6, 0, 6, 6, 6, 7, 6, |
7369
|
|
|
|
|
|
|
7, 6, 6, 6, 6 |
7370
|
|
|
|
|
|
|
}; |
7371
|
|
|
|
|
|
|
|
7372
|
|
|
|
|
|
|
static const char _JJR_RBR_range_lengths[] = { |
7373
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 3, |
7374
|
|
|
|
|
|
|
0, 0, 2, 0, 0, 0, 2, 0, |
7375
|
|
|
|
|
|
|
0, 0, 1, 0, 0, 0, 2, 0, |
7376
|
|
|
|
|
|
|
2, 0, 0, 0, 0 |
7377
|
|
|
|
|
|
|
}; |
7378
|
|
|
|
|
|
|
|
7379
|
|
|
|
|
|
|
static const unsigned char _JJR_RBR_index_offsets[] = { |
7380
|
|
|
|
|
|
|
0, 0, 2, 4, 29, 30, 37, 43, |
7381
|
|
|
|
|
|
|
54, 61, 68, 78, 85, 92, 99, 109, |
7382
|
|
|
|
|
|
|
111, 118, 125, 127, 134, 141, 148, 158, |
7383
|
|
|
|
|
|
|
165, 175, 182, 189, 196 |
7384
|
|
|
|
|
|
|
}; |
7385
|
|
|
|
|
|
|
|
7386
|
|
|
|
|
|
|
static const char _JJR_RBR_indicies[] = { |
7387
|
|
|
|
|
|
|
0, 1, 2, 1, 4, 5, 6, 7, |
7388
|
|
|
|
|
|
|
8, 9, 10, 11, 12, 13, 14, 15, |
7389
|
|
|
|
|
|
|
16, 17, 18, 19, 20, 21, 7, 22, |
7390
|
|
|
|
|
|
|
23, 24, 25, 26, 3, 1, 27, 28, |
7391
|
|
|
|
|
|
|
27, 27, 27, 27, 1, 29, 29, 29, |
7392
|
|
|
|
|
|
|
29, 29, 1, 30, 31, 30, 27, 27, |
7393
|
|
|
|
|
|
|
27, 30, 27, 30, 30, 1, 27, 28, |
7394
|
|
|
|
|
|
|
27, 27, 27, 27, 1, 27, 27, 28, |
7395
|
|
|
|
|
|
|
27, 27, 27, 1, 27, 27, 31, 27, |
7396
|
|
|
|
|
|
|
27, 27, 30, 30, 30, 1, 27, 27, |
7397
|
|
|
|
|
|
|
28, 27, 27, 27, 1, 27, 27, 27, |
7398
|
|
|
|
|
|
|
28, 27, 27, 1, 27, 27, 27, 28, |
7399
|
|
|
|
|
|
|
27, 27, 1, 27, 27, 27, 32, 27, |
7400
|
|
|
|
|
|
|
27, 30, 30, 30, 1, 1, 33, 27, |
7401
|
|
|
|
|
|
|
27, 27, 28, 27, 27, 1, 34, 34, |
7402
|
|
|
|
|
|
|
34, 28, 34, 34, 1, 29, 1, 34, |
7403
|
|
|
|
|
|
|
34, 34, 34, 28, 34, 1, 27, 27, |
7404
|
|
|
|
|
|
|
27, 27, 28, 27, 1, 27, 27, 27, |
7405
|
|
|
|
|
|
|
27, 28, 27, 1, 27, 27, 27, 27, |
7406
|
|
|
|
|
|
|
31, 27, 30, 30, 30, 1, 27, 27, |
7407
|
|
|
|
|
|
|
27, 27, 28, 27, 1, 27, 27, 27, |
7408
|
|
|
|
|
|
|
27, 27, 31, 30, 30, 30, 1, 34, |
7409
|
|
|
|
|
|
|
34, 34, 34, 34, 28, 1, 34, 34, |
7410
|
|
|
|
|
|
|
34, 34, 34, 28, 1, 27, 27, 27, |
7411
|
|
|
|
|
|
|
27, 27, 28, 1, 27, 27, 27, 27, |
7412
|
|
|
|
|
|
|
27, 28, 1, 0 |
7413
|
|
|
|
|
|
|
}; |
7414
|
|
|
|
|
|
|
|
7415
|
|
|
|
|
|
|
static const char _JJR_RBR_trans_targs[] = { |
7416
|
|
|
|
|
|
|
2, 0, 3, 4, 5, 7, 8, 4, |
7417
|
|
|
|
|
|
|
9, 10, 11, 4, 12, 13, 14, 16, |
7418
|
|
|
|
|
|
|
17, 19, 20, 21, 22, 23, 24, 25, |
7419
|
|
|
|
|
|
|
26, 27, 28, 6, 4, 4, 4, 4, |
7420
|
|
|
|
|
|
|
15, 4, 18 |
7421
|
|
|
|
|
|
|
}; |
7422
|
|
|
|
|
|
|
|
7423
|
|
|
|
|
|
|
static const char _JJR_RBR_trans_actions[] = { |
7424
|
|
|
|
|
|
|
0, 0, 0, 9, 9, 9, 9, 17, |
7425
|
|
|
|
|
|
|
9, 9, 9, 14, 9, 9, 9, 9, |
7426
|
|
|
|
|
|
|
9, 9, 9, 9, 9, 9, 9, 9, |
7427
|
|
|
|
|
|
|
9, 9, 9, 7, 3, 5, 7, 11, |
7428
|
|
|
|
|
|
|
11, 1, 7 |
7429
|
|
|
|
|
|
|
}; |
7430
|
|
|
|
|
|
|
|
7431
|
|
|
|
|
|
|
static const int JJR_RBR_start = 1; |
7432
|
|
|
|
|
|
|
|
7433
|
0
|
|
|
|
|
|
void english_morpho_guesser::add_JJR_RBR(const string& form, unsigned negation_len, vector& lemmas) const { |
7434
|
0
|
|
|
|
|
|
const char* p = form.c_str() + negation_len; int cs; |
7435
|
|
|
|
|
|
|
char best = 'z'; unsigned remove = 0; const char* append = nullptr; |
7436
|
|
|
|
|
|
|
|
7437
|
|
|
|
|
|
|
{ |
7438
|
|
|
|
|
|
|
cs = JJR_RBR_start; |
7439
|
|
|
|
|
|
|
} |
7440
|
|
|
|
|
|
|
|
7441
|
|
|
|
|
|
|
{ |
7442
|
|
|
|
|
|
|
int _klen; |
7443
|
|
|
|
|
|
|
unsigned int _trans; |
7444
|
|
|
|
|
|
|
const char *_acts; |
7445
|
|
|
|
|
|
|
unsigned int _nacts; |
7446
|
|
|
|
|
|
|
const char *_keys; |
7447
|
|
|
|
|
|
|
|
7448
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
7449
|
|
|
|
|
|
|
goto _test_eof; |
7450
|
|
|
|
|
|
|
if ( cs == 0 ) |
7451
|
|
|
|
|
|
|
goto _out; |
7452
|
|
|
|
|
|
|
_resume: |
7453
|
0
|
|
|
|
|
|
_keys = _JJR_RBR_trans_keys + _JJR_RBR_key_offsets[cs]; |
7454
|
0
|
|
|
|
|
|
_trans = _JJR_RBR_index_offsets[cs]; |
7455
|
|
|
|
|
|
|
|
7456
|
0
|
|
|
|
|
|
_klen = _JJR_RBR_single_lengths[cs]; |
7457
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
7458
|
|
|
|
|
|
|
const char *_lower = _keys; |
7459
|
|
|
|
|
|
|
const char *_mid; |
7460
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
7461
|
|
|
|
|
|
|
while (1) { |
7462
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
7463
|
|
|
|
|
|
|
break; |
7464
|
|
|
|
|
|
|
|
7465
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
7466
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < *_mid ) |
7467
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
7468
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > *_mid ) |
7469
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
7470
|
|
|
|
|
|
|
else { |
7471
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
7472
|
0
|
|
|
|
|
|
goto _match; |
7473
|
|
|
|
|
|
|
} |
7474
|
|
|
|
|
|
|
} |
7475
|
0
|
|
|
|
|
|
_keys += _klen; |
7476
|
0
|
|
|
|
|
|
_trans += _klen; |
7477
|
|
|
|
|
|
|
} |
7478
|
|
|
|
|
|
|
|
7479
|
0
|
|
|
|
|
|
_klen = _JJR_RBR_range_lengths[cs]; |
7480
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
7481
|
|
|
|
|
|
|
const char *_lower = _keys; |
7482
|
|
|
|
|
|
|
const char *_mid; |
7483
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
7484
|
|
|
|
|
|
|
while (1) { |
7485
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
7486
|
|
|
|
|
|
|
break; |
7487
|
|
|
|
|
|
|
|
7488
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
7489
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < _mid[0] ) |
7490
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
7491
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > _mid[1] ) |
7492
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
7493
|
|
|
|
|
|
|
else { |
7494
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
7495
|
0
|
|
|
|
|
|
goto _match; |
7496
|
|
|
|
|
|
|
} |
7497
|
|
|
|
|
|
|
} |
7498
|
0
|
|
|
|
|
|
_trans += _klen; |
7499
|
|
|
|
|
|
|
} |
7500
|
|
|
|
|
|
|
|
7501
|
|
|
|
|
|
|
_match: |
7502
|
0
|
|
|
|
|
|
_trans = _JJR_RBR_indicies[_trans]; |
7503
|
0
|
|
|
|
|
|
cs = _JJR_RBR_trans_targs[_trans]; |
7504
|
|
|
|
|
|
|
|
7505
|
0
|
0
|
|
|
|
|
if ( _JJR_RBR_trans_actions[_trans] == 0 ) |
7506
|
|
|
|
|
|
|
goto _again; |
7507
|
|
|
|
|
|
|
|
7508
|
0
|
|
|
|
|
|
_acts = _JJR_RBR_actions + _JJR_RBR_trans_actions[_trans]; |
7509
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
7510
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
7511
|
|
|
|
|
|
|
{ |
7512
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
7513
|
|
|
|
|
|
|
{ |
7514
|
|
|
|
|
|
|
case 0: |
7515
|
0
|
0
|
|
|
|
|
{ if (best > 'a') best = 'a', remove = 2, append = nullptr; } |
7516
|
|
|
|
|
|
|
break; |
7517
|
|
|
|
|
|
|
case 1: |
7518
|
0
|
0
|
|
|
|
|
{ if (best > 'b') best = 'b', remove = 3, append = nullptr; } |
7519
|
|
|
|
|
|
|
break; |
7520
|
|
|
|
|
|
|
case 2: |
7521
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 3, append = "y"; } |
7522
|
|
|
|
|
|
|
break; |
7523
|
|
|
|
|
|
|
case 3: |
7524
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 2, append = nullptr; } |
7525
|
|
|
|
|
|
|
break; |
7526
|
|
|
|
|
|
|
case 4: |
7527
|
0
|
0
|
|
|
|
|
{ if (best > 'e') best = 'e', remove = 1, append = nullptr; } |
7528
|
|
|
|
|
|
|
break; |
7529
|
|
|
|
|
|
|
case 5: |
7530
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 2, append = nullptr; } |
7531
|
|
|
|
|
|
|
break; |
7532
|
|
|
|
|
|
|
} |
7533
|
|
|
|
|
|
|
} |
7534
|
|
|
|
|
|
|
|
7535
|
|
|
|
|
|
|
_again: |
7536
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
7537
|
|
|
|
|
|
|
goto _out; |
7538
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form.c_str() + form.size())) ) |
7539
|
|
|
|
|
|
|
goto _resume; |
7540
|
|
|
|
|
|
|
_test_eof: {} |
7541
|
|
|
|
|
|
|
_out: {} |
7542
|
|
|
|
|
|
|
} |
7543
|
|
|
|
|
|
|
|
7544
|
0
|
0
|
|
|
|
|
add(JJR, RBR, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7545
|
0
|
|
|
|
|
|
} |
7546
|
|
|
|
|
|
|
|
7547
|
|
|
|
|
|
|
static const char _JJS_RBS_actions[] = { |
7548
|
|
|
|
|
|
|
0, 1, 1, 1, 2, 1, 4, 1, |
7549
|
|
|
|
|
|
|
5, 2, 0, 5, 2, 1, 4, 2, |
7550
|
|
|
|
|
|
|
3, 5 |
7551
|
|
|
|
|
|
|
}; |
7552
|
|
|
|
|
|
|
|
7553
|
|
|
|
|
|
|
static const unsigned char _JJS_RBS_key_offsets[] = { |
7554
|
|
|
|
|
|
|
0, 0, 1, 2, 3, 25, 25, 25, |
7555
|
|
|
|
|
|
|
31, 44, 50, 56, 67, 73, 79, 85, |
7556
|
|
|
|
|
|
|
96, 102, 108, 114, 120, 126, 137, 143, |
7557
|
|
|
|
|
|
|
154, 160, 166, 172, 178, 178, 183, 183, |
7558
|
|
|
|
|
|
|
183, 184 |
7559
|
|
|
|
|
|
|
}; |
7560
|
|
|
|
|
|
|
|
7561
|
|
|
|
|
|
|
static const char _JJS_RBS_trans_keys[] = { |
7562
|
|
|
|
|
|
|
116, 115, 101, 98, 99, 100, 102, 103, |
7563
|
|
|
|
|
|
|
104, 105, 106, 107, 108, 109, 110, 112, |
7564
|
|
|
|
|
|
|
113, 114, 115, 116, 118, 119, 120, 121, |
7565
|
|
|
|
|
|
|
122, 97, 98, 101, 105, 111, 117, 98, |
7566
|
|
|
|
|
|
|
99, 100, 105, 111, 117, 122, 97, 101, |
7567
|
|
|
|
|
|
|
102, 109, 112, 120, 97, 100, 101, 105, |
7568
|
|
|
|
|
|
|
111, 117, 97, 101, 102, 105, 111, 117, |
7569
|
|
|
|
|
|
|
97, 101, 103, 105, 111, 117, 122, 98, |
7570
|
|
|
|
|
|
|
109, 112, 120, 97, 101, 104, 105, 111, |
7571
|
|
|
|
|
|
|
117, 97, 101, 105, 106, 111, 117, 97, |
7572
|
|
|
|
|
|
|
101, 105, 107, 111, 117, 97, 101, 105, |
7573
|
|
|
|
|
|
|
108, 111, 117, 122, 98, 109, 112, 120, |
7574
|
|
|
|
|
|
|
97, 101, 105, 109, 111, 117, 97, 101, |
7575
|
|
|
|
|
|
|
105, 110, 111, 117, 97, 101, 105, 111, |
7576
|
|
|
|
|
|
|
112, 117, 97, 101, 105, 111, 113, 117, |
7577
|
|
|
|
|
|
|
97, 101, 105, 111, 114, 117, 97, 101, |
7578
|
|
|
|
|
|
|
105, 111, 115, 117, 122, 98, 109, 112, |
7579
|
|
|
|
|
|
|
120, 97, 101, 105, 111, 116, 117, 97, |
7580
|
|
|
|
|
|
|
101, 105, 111, 117, 118, 122, 98, 109, |
7581
|
|
|
|
|
|
|
112, 120, 97, 101, 105, 111, 117, 119, |
7582
|
|
|
|
|
|
|
97, 101, 105, 111, 117, 120, 97, 101, |
7583
|
|
|
|
|
|
|
105, 111, 117, 121, 97, 101, 105, 111, |
7584
|
|
|
|
|
|
|
117, 122, 97, 101, 105, 111, 117, 101, |
7585
|
|
|
|
|
|
|
97, 122, 0 |
7586
|
|
|
|
|
|
|
}; |
7587
|
|
|
|
|
|
|
|
7588
|
|
|
|
|
|
|
static const char _JJS_RBS_single_lengths[] = { |
7589
|
|
|
|
|
|
|
0, 1, 1, 1, 22, 0, 0, 6, |
7590
|
|
|
|
|
|
|
7, 6, 6, 7, 6, 6, 6, 7, |
7591
|
|
|
|
|
|
|
6, 6, 6, 6, 6, 7, 6, 7, |
7592
|
|
|
|
|
|
|
6, 6, 6, 6, 0, 5, 0, 0, |
7593
|
|
|
|
|
|
|
1, 0 |
7594
|
|
|
|
|
|
|
}; |
7595
|
|
|
|
|
|
|
|
7596
|
|
|
|
|
|
|
static const char _JJS_RBS_range_lengths[] = { |
7597
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
7598
|
|
|
|
|
|
|
3, 0, 0, 2, 0, 0, 0, 2, |
7599
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 2, 0, 2, |
7600
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
7601
|
|
|
|
|
|
|
0, 1 |
7602
|
|
|
|
|
|
|
}; |
7603
|
|
|
|
|
|
|
|
7604
|
|
|
|
|
|
|
static const unsigned char _JJS_RBS_index_offsets[] = { |
7605
|
|
|
|
|
|
|
0, 0, 2, 4, 6, 29, 30, 31, |
7606
|
|
|
|
|
|
|
38, 49, 56, 63, 73, 80, 87, 94, |
7607
|
|
|
|
|
|
|
104, 111, 118, 125, 132, 139, 149, 156, |
7608
|
|
|
|
|
|
|
166, 173, 180, 187, 194, 195, 201, 202, |
7609
|
|
|
|
|
|
|
203, 205 |
7610
|
|
|
|
|
|
|
}; |
7611
|
|
|
|
|
|
|
|
7612
|
|
|
|
|
|
|
static const char _JJS_RBS_indicies[] = { |
7613
|
|
|
|
|
|
|
0, 1, 2, 1, 3, 1, 5, 6, |
7614
|
|
|
|
|
|
|
7, 8, 9, 10, 11, 12, 13, 14, |
7615
|
|
|
|
|
|
|
15, 16, 17, 18, 19, 20, 21, 22, |
7616
|
|
|
|
|
|
|
23, 24, 25, 26, 4, 27, 28, 29, |
7617
|
|
|
|
|
|
|
30, 29, 29, 29, 29, 27, 31, 32, |
7618
|
|
|
|
|
|
|
31, 29, 29, 29, 31, 29, 31, 31, |
7619
|
|
|
|
|
|
|
27, 29, 30, 29, 29, 29, 29, 27, |
7620
|
|
|
|
|
|
|
29, 29, 30, 29, 29, 29, 27, 29, |
7621
|
|
|
|
|
|
|
29, 32, 29, 29, 29, 31, 31, 31, |
7622
|
|
|
|
|
|
|
27, 29, 29, 30, 29, 29, 29, 27, |
7623
|
|
|
|
|
|
|
29, 29, 29, 30, 29, 29, 27, 29, |
7624
|
|
|
|
|
|
|
29, 29, 30, 29, 29, 27, 29, 29, |
7625
|
|
|
|
|
|
|
29, 33, 29, 29, 31, 31, 31, 27, |
7626
|
|
|
|
|
|
|
29, 29, 29, 30, 29, 29, 27, 34, |
7627
|
|
|
|
|
|
|
34, 34, 30, 34, 34, 27, 34, 34, |
7628
|
|
|
|
|
|
|
34, 34, 30, 34, 27, 29, 29, 29, |
7629
|
|
|
|
|
|
|
29, 30, 29, 27, 29, 29, 29, 29, |
7630
|
|
|
|
|
|
|
30, 29, 27, 29, 29, 29, 29, 32, |
7631
|
|
|
|
|
|
|
29, 31, 31, 31, 27, 29, 29, 29, |
7632
|
|
|
|
|
|
|
29, 30, 29, 27, 29, 29, 29, 29, |
7633
|
|
|
|
|
|
|
29, 32, 31, 31, 31, 27, 34, 34, |
7634
|
|
|
|
|
|
|
34, 34, 34, 30, 27, 34, 34, 34, |
7635
|
|
|
|
|
|
|
34, 34, 30, 27, 29, 29, 29, 29, |
7636
|
|
|
|
|
|
|
29, 30, 27, 29, 29, 29, 29, 29, |
7637
|
|
|
|
|
|
|
30, 27, 1, 35, 35, 35, 35, 35, |
7638
|
|
|
|
|
|
|
28, 28, 27, 28, 36, 35, 28, 0 |
7639
|
|
|
|
|
|
|
}; |
7640
|
|
|
|
|
|
|
|
7641
|
|
|
|
|
|
|
static const char _JJS_RBS_trans_targs[] = { |
7642
|
|
|
|
|
|
|
2, 0, 3, 4, 5, 7, 8, 9, |
7643
|
|
|
|
|
|
|
10, 11, 12, 31, 13, 14, 15, 16, |
7644
|
|
|
|
|
|
|
17, 18, 19, 20, 21, 22, 23, 24, |
7645
|
|
|
|
|
|
|
25, 26, 27, 6, 28, 29, 30, 30, |
7646
|
|
|
|
|
|
|
30, 32, 33, 28, 28 |
7647
|
|
|
|
|
|
|
}; |
7648
|
|
|
|
|
|
|
|
7649
|
|
|
|
|
|
|
static const char _JJS_RBS_trans_actions[] = { |
7650
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
7651
|
|
|
|
|
|
|
0, 0, 0, 3, 0, 0, 0, 0, |
7652
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
7653
|
|
|
|
|
|
|
0, 0, 0, 0, 7, 5, 1, 5, |
7654
|
|
|
|
|
|
|
12, 12, 5, 15, 9 |
7655
|
|
|
|
|
|
|
}; |
7656
|
|
|
|
|
|
|
|
7657
|
|
|
|
|
|
|
static const int JJS_RBS_start = 1; |
7658
|
|
|
|
|
|
|
|
7659
|
0
|
|
|
|
|
|
void english_morpho_guesser::add_JJS_RBS(const string& form, unsigned negation_len, vector& lemmas) const { |
7660
|
0
|
|
|
|
|
|
const char* p = form.c_str() + negation_len; int cs; |
7661
|
|
|
|
|
|
|
char best = 'z'; unsigned remove = 0; const char* append = nullptr; |
7662
|
|
|
|
|
|
|
|
7663
|
|
|
|
|
|
|
{ |
7664
|
|
|
|
|
|
|
cs = JJS_RBS_start; |
7665
|
|
|
|
|
|
|
} |
7666
|
|
|
|
|
|
|
|
7667
|
|
|
|
|
|
|
{ |
7668
|
|
|
|
|
|
|
int _klen; |
7669
|
|
|
|
|
|
|
unsigned int _trans; |
7670
|
|
|
|
|
|
|
const char *_acts; |
7671
|
|
|
|
|
|
|
unsigned int _nacts; |
7672
|
|
|
|
|
|
|
const char *_keys; |
7673
|
|
|
|
|
|
|
|
7674
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
7675
|
|
|
|
|
|
|
goto _test_eof; |
7676
|
|
|
|
|
|
|
if ( cs == 0 ) |
7677
|
|
|
|
|
|
|
goto _out; |
7678
|
|
|
|
|
|
|
_resume: |
7679
|
0
|
|
|
|
|
|
_keys = _JJS_RBS_trans_keys + _JJS_RBS_key_offsets[cs]; |
7680
|
0
|
|
|
|
|
|
_trans = _JJS_RBS_index_offsets[cs]; |
7681
|
|
|
|
|
|
|
|
7682
|
0
|
|
|
|
|
|
_klen = _JJS_RBS_single_lengths[cs]; |
7683
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
7684
|
|
|
|
|
|
|
const char *_lower = _keys; |
7685
|
|
|
|
|
|
|
const char *_mid; |
7686
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
7687
|
|
|
|
|
|
|
while (1) { |
7688
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
7689
|
|
|
|
|
|
|
break; |
7690
|
|
|
|
|
|
|
|
7691
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
7692
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < *_mid ) |
7693
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
7694
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > *_mid ) |
7695
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
7696
|
|
|
|
|
|
|
else { |
7697
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
7698
|
0
|
|
|
|
|
|
goto _match; |
7699
|
|
|
|
|
|
|
} |
7700
|
|
|
|
|
|
|
} |
7701
|
0
|
|
|
|
|
|
_keys += _klen; |
7702
|
0
|
|
|
|
|
|
_trans += _klen; |
7703
|
|
|
|
|
|
|
} |
7704
|
|
|
|
|
|
|
|
7705
|
0
|
|
|
|
|
|
_klen = _JJS_RBS_range_lengths[cs]; |
7706
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
7707
|
|
|
|
|
|
|
const char *_lower = _keys; |
7708
|
|
|
|
|
|
|
const char *_mid; |
7709
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
7710
|
|
|
|
|
|
|
while (1) { |
7711
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
7712
|
|
|
|
|
|
|
break; |
7713
|
|
|
|
|
|
|
|
7714
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
7715
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < _mid[0] ) |
7716
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
7717
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > _mid[1] ) |
7718
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
7719
|
|
|
|
|
|
|
else { |
7720
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
7721
|
0
|
|
|
|
|
|
goto _match; |
7722
|
|
|
|
|
|
|
} |
7723
|
|
|
|
|
|
|
} |
7724
|
0
|
|
|
|
|
|
_trans += _klen; |
7725
|
|
|
|
|
|
|
} |
7726
|
|
|
|
|
|
|
|
7727
|
|
|
|
|
|
|
_match: |
7728
|
0
|
|
|
|
|
|
_trans = _JJS_RBS_indicies[_trans]; |
7729
|
0
|
|
|
|
|
|
cs = _JJS_RBS_trans_targs[_trans]; |
7730
|
|
|
|
|
|
|
|
7731
|
0
|
0
|
|
|
|
|
if ( _JJS_RBS_trans_actions[_trans] == 0 ) |
7732
|
|
|
|
|
|
|
goto _again; |
7733
|
|
|
|
|
|
|
|
7734
|
0
|
|
|
|
|
|
_acts = _JJS_RBS_actions + _JJS_RBS_trans_actions[_trans]; |
7735
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
7736
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
7737
|
|
|
|
|
|
|
{ |
7738
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
7739
|
|
|
|
|
|
|
{ |
7740
|
|
|
|
|
|
|
case 0: |
7741
|
0
|
0
|
|
|
|
|
{ if (best > 'a') best = 'a', remove = 3, append = nullptr; } |
7742
|
|
|
|
|
|
|
break; |
7743
|
|
|
|
|
|
|
case 1: |
7744
|
0
|
0
|
|
|
|
|
{ if (best > 'b') best = 'b', remove = 4, append = nullptr; } |
7745
|
|
|
|
|
|
|
break; |
7746
|
|
|
|
|
|
|
case 2: |
7747
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 4, append = "y"; } |
7748
|
|
|
|
|
|
|
break; |
7749
|
|
|
|
|
|
|
case 3: |
7750
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 3, append = nullptr; } |
7751
|
|
|
|
|
|
|
break; |
7752
|
|
|
|
|
|
|
case 4: |
7753
|
0
|
0
|
|
|
|
|
{ if (best > 'e') best = 'e', remove = 2, append = nullptr; } |
7754
|
|
|
|
|
|
|
break; |
7755
|
|
|
|
|
|
|
case 5: |
7756
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 3, append = nullptr; } |
7757
|
|
|
|
|
|
|
break; |
7758
|
|
|
|
|
|
|
} |
7759
|
|
|
|
|
|
|
} |
7760
|
|
|
|
|
|
|
|
7761
|
|
|
|
|
|
|
_again: |
7762
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
7763
|
|
|
|
|
|
|
goto _out; |
7764
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form.c_str() + form.size())) ) |
7765
|
|
|
|
|
|
|
goto _resume; |
7766
|
|
|
|
|
|
|
_test_eof: {} |
7767
|
|
|
|
|
|
|
_out: {} |
7768
|
|
|
|
|
|
|
} |
7769
|
|
|
|
|
|
|
|
7770
|
0
|
0
|
|
|
|
|
add(JJS, RBS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7771
|
0
|
|
|
|
|
|
} |
7772
|
|
|
|
|
|
|
|
7773
|
|
|
|
|
|
|
} // namespace morphodita |
7774
|
|
|
|
|
|
|
|
7775
|
|
|
|
|
|
|
///////// |
7776
|
|
|
|
|
|
|
// File: morphodita/morpho/external_morpho.h |
7777
|
|
|
|
|
|
|
///////// |
7778
|
|
|
|
|
|
|
|
7779
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
7780
|
|
|
|
|
|
|
// |
7781
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
7782
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
7783
|
|
|
|
|
|
|
// |
7784
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
7785
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
7786
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
7787
|
|
|
|
|
|
|
|
7788
|
|
|
|
|
|
|
namespace morphodita { |
7789
|
|
|
|
|
|
|
|
7790
|
0
|
|
|
|
|
|
class external_morpho : public morpho { |
7791
|
|
|
|
|
|
|
public: |
7792
|
0
|
|
|
|
|
|
external_morpho(unsigned version) : version(version) {} |
7793
|
|
|
|
|
|
|
|
7794
|
|
|
|
|
|
|
virtual int analyze(string_piece form, morpho::guesser_mode guesser, vector& lemmas) const override; |
7795
|
|
|
|
|
|
|
virtual int generate(string_piece lemma, const char* tag_wildcard, guesser_mode guesser, vector& forms) const override; |
7796
|
|
|
|
|
|
|
virtual int raw_lemma_len(string_piece lemma) const override; |
7797
|
|
|
|
|
|
|
virtual int lemma_id_len(string_piece lemma) const override; |
7798
|
|
|
|
|
|
|
virtual int raw_form_len(string_piece form) const override; |
7799
|
|
|
|
|
|
|
virtual tokenizer* new_tokenizer() const override; |
7800
|
|
|
|
|
|
|
|
7801
|
|
|
|
|
|
|
bool load(istream& is); |
7802
|
|
|
|
|
|
|
|
7803
|
|
|
|
|
|
|
private: |
7804
|
|
|
|
|
|
|
unsigned version; |
7805
|
|
|
|
|
|
|
|
7806
|
|
|
|
|
|
|
string unknown_tag; |
7807
|
|
|
|
|
|
|
}; |
7808
|
|
|
|
|
|
|
|
7809
|
|
|
|
|
|
|
} // namespace morphodita |
7810
|
|
|
|
|
|
|
|
7811
|
|
|
|
|
|
|
///////// |
7812
|
|
|
|
|
|
|
// File: morphodita/tokenizer/generic_tokenizer.h |
7813
|
|
|
|
|
|
|
///////// |
7814
|
|
|
|
|
|
|
|
7815
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
7816
|
|
|
|
|
|
|
// |
7817
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
7818
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
7819
|
|
|
|
|
|
|
// |
7820
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
7821
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
7822
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
7823
|
|
|
|
|
|
|
|
7824
|
|
|
|
|
|
|
namespace morphodita { |
7825
|
|
|
|
|
|
|
|
7826
|
6
|
|
|
|
|
|
class generic_tokenizer : public ragel_tokenizer { |
7827
|
|
|
|
|
|
|
public: |
7828
|
|
|
|
|
|
|
enum { LATEST = 2 }; |
7829
|
|
|
|
|
|
|
generic_tokenizer(unsigned version); |
7830
|
|
|
|
|
|
|
|
7831
|
|
|
|
|
|
|
virtual bool next_sentence(vector& tokens) override; |
7832
|
|
|
|
|
|
|
}; |
7833
|
|
|
|
|
|
|
|
7834
|
|
|
|
|
|
|
} // namespace morphodita |
7835
|
|
|
|
|
|
|
|
7836
|
|
|
|
|
|
|
///////// |
7837
|
|
|
|
|
|
|
// File: morphodita/morpho/external_morpho.cpp |
7838
|
|
|
|
|
|
|
///////// |
7839
|
|
|
|
|
|
|
|
7840
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
7841
|
|
|
|
|
|
|
// |
7842
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
7843
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
7844
|
|
|
|
|
|
|
// |
7845
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
7846
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
7847
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
7848
|
|
|
|
|
|
|
|
7849
|
|
|
|
|
|
|
namespace morphodita { |
7850
|
|
|
|
|
|
|
|
7851
|
0
|
|
|
|
|
|
bool external_morpho::load(istream& is) { |
7852
|
|
|
|
|
|
|
binary_decoder data; |
7853
|
0
|
0
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
0
|
|
|
|
|
|
7854
|
|
|
|
|
|
|
|
7855
|
|
|
|
|
|
|
try { |
7856
|
|
|
|
|
|
|
// Load unknown_tag |
7857
|
0
|
0
|
|
|
|
|
unsigned length = data.next_1B(); |
7858
|
0
|
0
|
|
|
|
|
unknown_tag.assign(data.next(length), length); |
|
|
0
|
|
|
|
|
|
7859
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
7860
|
|
|
|
|
|
|
return false; |
7861
|
|
|
|
|
|
|
} |
7862
|
|
|
|
|
|
|
|
7863
|
0
|
|
|
|
|
|
return data.is_end(); |
7864
|
|
|
|
|
|
|
} |
7865
|
|
|
|
|
|
|
|
7866
|
0
|
|
|
|
|
|
int external_morpho::analyze(string_piece form, guesser_mode /*guesser*/, vector& lemmas) const { |
7867
|
|
|
|
|
|
|
lemmas.clear(); |
7868
|
|
|
|
|
|
|
|
7869
|
0
|
0
|
|
|
|
|
if (form.len) { |
7870
|
|
|
|
|
|
|
// Start by skipping the first form |
7871
|
|
|
|
|
|
|
string_piece lemmatags = form; |
7872
|
0
|
0
|
|
|
|
|
while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++; |
|
|
0
|
|
|
|
|
|
7873
|
0
|
0
|
|
|
|
|
if (lemmatags.len) lemmatags.len--, lemmatags.str++; |
7874
|
|
|
|
|
|
|
|
7875
|
|
|
|
|
|
|
// Split lemmatags using ' ' into lemma-tag pairs. |
7876
|
0
|
0
|
|
|
|
|
while (lemmatags.len) { |
7877
|
|
|
|
|
|
|
auto lemma_start = lemmatags.str; |
7878
|
0
|
0
|
|
|
|
|
while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++; |
|
|
0
|
|
|
|
|
|
7879
|
0
|
0
|
|
|
|
|
if (!lemmatags.len) break; |
7880
|
|
|
|
|
|
|
auto lemma_len = lemmatags.str - lemma_start; |
7881
|
0
|
|
|
|
|
|
lemmatags.len--, lemmatags.str++; |
7882
|
|
|
|
|
|
|
|
7883
|
|
|
|
|
|
|
auto tag_start = lemmatags.str; |
7884
|
0
|
0
|
|
|
|
|
while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++; |
|
|
0
|
|
|
|
|
|
7885
|
|
|
|
|
|
|
auto tag_len = lemmatags.str - tag_start; |
7886
|
0
|
0
|
|
|
|
|
if (lemmatags.len) lemmatags.len--, lemmatags.str++; |
7887
|
|
|
|
|
|
|
|
7888
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(lemma_start, lemma_len), string(tag_start, tag_len)); |
7889
|
|
|
|
|
|
|
} |
7890
|
|
|
|
|
|
|
|
7891
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) return NO_GUESSER; |
7892
|
|
|
|
|
|
|
} |
7893
|
|
|
|
|
|
|
|
7894
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), unknown_tag); |
7895
|
0
|
|
|
|
|
|
return -1; |
7896
|
|
|
|
|
|
|
} |
7897
|
|
|
|
|
|
|
|
7898
|
0
|
|
|
|
|
|
int external_morpho::generate(string_piece lemma, const char* tag_wildcard, morpho::guesser_mode /*guesser*/, vector& forms) const { |
7899
|
|
|
|
|
|
|
forms.clear(); |
7900
|
|
|
|
|
|
|
|
7901
|
0
|
|
|
|
|
|
tag_filter filter(tag_wildcard); |
7902
|
|
|
|
|
|
|
|
7903
|
0
|
0
|
|
|
|
|
if (lemma.len) { |
7904
|
|
|
|
|
|
|
// Start by locating the lemma |
7905
|
|
|
|
|
|
|
string_piece formtags = lemma; |
7906
|
0
|
0
|
|
|
|
|
while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++; |
|
|
0
|
|
|
|
|
|
7907
|
0
|
|
|
|
|
|
string_piece real_lemma(lemma.str, lemma.len - formtags.len); |
7908
|
0
|
0
|
|
|
|
|
if (formtags.len) formtags.len--, formtags.str++; |
7909
|
|
|
|
|
|
|
|
7910
|
|
|
|
|
|
|
// Split formtags using ' ' into form-tag pairs. |
7911
|
|
|
|
|
|
|
bool any_result = false; |
7912
|
0
|
0
|
|
|
|
|
while (formtags.len) { |
7913
|
|
|
|
|
|
|
auto form_start = formtags.str; |
7914
|
0
|
0
|
|
|
|
|
while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++; |
|
|
0
|
|
|
|
|
|
7915
|
0
|
0
|
|
|
|
|
if (!formtags.len) break; |
7916
|
|
|
|
|
|
|
auto form_len = formtags.str - form_start; |
7917
|
0
|
|
|
|
|
|
formtags.len--, formtags.str++; |
7918
|
|
|
|
|
|
|
|
7919
|
|
|
|
|
|
|
auto tag_start = formtags.str; |
7920
|
0
|
0
|
|
|
|
|
while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++; |
|
|
0
|
|
|
|
|
|
7921
|
|
|
|
|
|
|
auto tag_len = formtags.str - tag_start; |
7922
|
0
|
0
|
|
|
|
|
if (formtags.len) formtags.len--, formtags.str++; |
7923
|
|
|
|
|
|
|
|
7924
|
|
|
|
|
|
|
any_result = true; |
7925
|
|
|
|
|
|
|
string tag(tag_start, tag_len); |
7926
|
0
|
0
|
|
|
|
|
if (filter.matches(tag.c_str())) { |
7927
|
0
|
0
|
|
|
|
|
if (forms.empty()) forms.emplace_back(string(real_lemma.str, real_lemma.len)); |
|
|
0
|
|
|
|
|
|
7928
|
0
|
0
|
|
|
|
|
forms.back().forms.emplace_back(string(form_start, form_len), tag); |
7929
|
|
|
|
|
|
|
} |
7930
|
|
|
|
|
|
|
} |
7931
|
|
|
|
|
|
|
|
7932
|
0
|
0
|
|
|
|
|
if (any_result) return NO_GUESSER; |
7933
|
|
|
|
|
|
|
} |
7934
|
|
|
|
|
|
|
|
7935
|
|
|
|
|
|
|
return -1; |
7936
|
|
|
|
|
|
|
} |
7937
|
|
|
|
|
|
|
|
7938
|
0
|
|
|
|
|
|
int external_morpho::raw_lemma_len(string_piece lemma) const { |
7939
|
|
|
|
|
|
|
unsigned lemma_len = 0; |
7940
|
0
|
0
|
|
|
|
|
while (lemma_len < lemma.len && lemma.str[lemma_len] != ' ') lemma_len++; |
|
|
0
|
|
|
|
|
|
7941
|
0
|
|
|
|
|
|
return lemma_len; |
7942
|
|
|
|
|
|
|
} |
7943
|
|
|
|
|
|
|
|
7944
|
0
|
|
|
|
|
|
int external_morpho::lemma_id_len(string_piece lemma) const { |
7945
|
|
|
|
|
|
|
unsigned lemma_len = 0; |
7946
|
0
|
0
|
|
|
|
|
while (lemma_len < lemma.len && lemma.str[lemma_len] != ' ') lemma_len++; |
|
|
0
|
|
|
|
|
|
7947
|
0
|
|
|
|
|
|
return lemma_len; |
7948
|
|
|
|
|
|
|
} |
7949
|
|
|
|
|
|
|
|
7950
|
0
|
|
|
|
|
|
int external_morpho::raw_form_len(string_piece form) const { |
7951
|
|
|
|
|
|
|
unsigned form_len = 0; |
7952
|
0
|
0
|
|
|
|
|
while (form_len < form.len && form.str[form_len] != ' ') form_len++; |
|
|
0
|
|
|
|
|
|
7953
|
0
|
|
|
|
|
|
return form_len; |
7954
|
|
|
|
|
|
|
} |
7955
|
|
|
|
|
|
|
|
7956
|
0
|
|
|
|
|
|
tokenizer* external_morpho::new_tokenizer() const { |
7957
|
0
|
|
|
|
|
|
return new generic_tokenizer(version); |
7958
|
|
|
|
|
|
|
} |
7959
|
|
|
|
|
|
|
|
7960
|
|
|
|
|
|
|
} // namespace morphodita |
7961
|
|
|
|
|
|
|
|
7962
|
|
|
|
|
|
|
///////// |
7963
|
|
|
|
|
|
|
// File: morphodita/morpho/generic_lemma_addinfo.h |
7964
|
|
|
|
|
|
|
///////// |
7965
|
|
|
|
|
|
|
|
7966
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
7967
|
|
|
|
|
|
|
// |
7968
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
7969
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
7970
|
|
|
|
|
|
|
// |
7971
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
7972
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
7973
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
7974
|
|
|
|
|
|
|
|
7975
|
|
|
|
|
|
|
namespace morphodita { |
7976
|
|
|
|
|
|
|
|
7977
|
|
|
|
|
|
|
// Declarations |
7978
|
0
|
|
|
|
|
|
struct generic_lemma_addinfo { |
7979
|
|
|
|
|
|
|
inline static int raw_lemma_len(string_piece lemma); |
7980
|
|
|
|
|
|
|
inline static int lemma_id_len(string_piece lemma); |
7981
|
|
|
|
|
|
|
inline static string format(const unsigned char* addinfo, int addinfo_len); |
7982
|
|
|
|
|
|
|
inline static bool generatable(const unsigned char* addinfo, int addinfo_len); |
7983
|
|
|
|
|
|
|
|
7984
|
|
|
|
|
|
|
inline int parse(string_piece lemma, bool die_on_failure = false); |
7985
|
|
|
|
|
|
|
inline bool match_lemma_id(const unsigned char* other_addinfo, int other_addinfo_len); |
7986
|
|
|
|
|
|
|
|
7987
|
|
|
|
|
|
|
vector data; |
7988
|
|
|
|
|
|
|
}; |
7989
|
|
|
|
|
|
|
|
7990
|
|
|
|
|
|
|
// Definitions |
7991
|
|
|
|
|
|
|
int generic_lemma_addinfo::raw_lemma_len(string_piece lemma) { |
7992
|
18
|
|
|
|
|
|
return lemma.len; |
7993
|
|
|
|
|
|
|
} |
7994
|
|
|
|
|
|
|
|
7995
|
|
|
|
|
|
|
int generic_lemma_addinfo::lemma_id_len(string_piece lemma) { |
7996
|
7
|
|
|
|
|
|
return lemma.len; |
7997
|
|
|
|
|
|
|
} |
7998
|
|
|
|
|
|
|
|
7999
|
|
|
|
|
|
|
string generic_lemma_addinfo::format(const unsigned char* /*addinfo*/, int /*addinfo_len*/) { |
8000
|
|
|
|
|
|
|
return string(); |
8001
|
|
|
|
|
|
|
} |
8002
|
|
|
|
|
|
|
|
8003
|
|
|
|
|
|
|
bool generic_lemma_addinfo::generatable(const unsigned char* /*addinfo*/, int /*addinfo_len*/) { |
8004
|
|
|
|
|
|
|
return true; |
8005
|
|
|
|
|
|
|
} |
8006
|
|
|
|
|
|
|
|
8007
|
|
|
|
|
|
|
int generic_lemma_addinfo::parse(string_piece lemma, bool /*die_on_failure*/) { |
8008
|
0
|
|
|
|
|
|
return lemma.len; |
8009
|
|
|
|
|
|
|
} |
8010
|
|
|
|
|
|
|
|
8011
|
|
|
|
|
|
|
bool generic_lemma_addinfo::match_lemma_id(const unsigned char* /*other_addinfo*/, int /*other_addinfo_len*/) { |
8012
|
|
|
|
|
|
|
return true; |
8013
|
|
|
|
|
|
|
} |
8014
|
|
|
|
|
|
|
|
8015
|
|
|
|
|
|
|
} // namespace morphodita |
8016
|
|
|
|
|
|
|
|
8017
|
|
|
|
|
|
|
///////// |
8018
|
|
|
|
|
|
|
// File: morphodita/morpho/generic_morpho.h |
8019
|
|
|
|
|
|
|
///////// |
8020
|
|
|
|
|
|
|
|
8021
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
8022
|
|
|
|
|
|
|
// |
8023
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
8024
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
8025
|
|
|
|
|
|
|
// |
8026
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
8027
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
8028
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8029
|
|
|
|
|
|
|
|
8030
|
|
|
|
|
|
|
namespace morphodita { |
8031
|
|
|
|
|
|
|
|
8032
|
0
|
|
|
|
|
|
class generic_morpho : public morpho { |
8033
|
|
|
|
|
|
|
public: |
8034
|
1
|
|
|
|
|
|
generic_morpho(unsigned version) : version(version) {} |
8035
|
|
|
|
|
|
|
|
8036
|
|
|
|
|
|
|
virtual int analyze(string_piece form, morpho::guesser_mode guesser, vector& lemmas) const override; |
8037
|
|
|
|
|
|
|
virtual int generate(string_piece lemma, const char* tag_wildcard, guesser_mode guesser, vector& forms) const override; |
8038
|
|
|
|
|
|
|
virtual int raw_lemma_len(string_piece lemma) const override; |
8039
|
|
|
|
|
|
|
virtual int lemma_id_len(string_piece lemma) const override; |
8040
|
|
|
|
|
|
|
virtual int raw_form_len(string_piece form) const override; |
8041
|
|
|
|
|
|
|
virtual tokenizer* new_tokenizer() const override; |
8042
|
|
|
|
|
|
|
|
8043
|
|
|
|
|
|
|
bool load(istream& is); |
8044
|
|
|
|
|
|
|
private: |
8045
|
|
|
|
|
|
|
inline void analyze_special(string_piece form, vector& lemmas) const; |
8046
|
|
|
|
|
|
|
|
8047
|
|
|
|
|
|
|
unsigned version; |
8048
|
|
|
|
|
|
|
morpho_dictionary dictionary; |
8049
|
|
|
|
|
|
|
unique_ptr statistical_guesser; |
8050
|
|
|
|
|
|
|
|
8051
|
|
|
|
|
|
|
string unknown_tag, number_tag, punctuation_tag, symbol_tag; |
8052
|
|
|
|
|
|
|
}; |
8053
|
|
|
|
|
|
|
|
8054
|
|
|
|
|
|
|
} // namespace morphodita |
8055
|
|
|
|
|
|
|
|
8056
|
|
|
|
|
|
|
///////// |
8057
|
|
|
|
|
|
|
// File: morphodita/morpho/generic_morpho.cpp |
8058
|
|
|
|
|
|
|
///////// |
8059
|
|
|
|
|
|
|
|
8060
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
8061
|
|
|
|
|
|
|
// |
8062
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
8063
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
8064
|
|
|
|
|
|
|
// |
8065
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
8066
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
8067
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8068
|
|
|
|
|
|
|
|
8069
|
|
|
|
|
|
|
namespace morphodita { |
8070
|
|
|
|
|
|
|
|
8071
|
1
|
|
|
|
|
|
bool generic_morpho::load(istream& is) { |
8072
|
|
|
|
|
|
|
binary_decoder data; |
8073
|
1
|
50
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
50
|
|
|
|
|
|
8074
|
|
|
|
|
|
|
|
8075
|
|
|
|
|
|
|
try { |
8076
|
|
|
|
|
|
|
// Load tags |
8077
|
1
|
50
|
|
|
|
|
unsigned length = data.next_1B(); |
8078
|
1
|
50
|
|
|
|
|
unknown_tag.assign(data.next(length), length); |
8079
|
1
|
50
|
|
|
|
|
length = data.next_1B(); |
8080
|
1
|
50
|
|
|
|
|
number_tag.assign(data.next(length), length); |
8081
|
1
|
50
|
|
|
|
|
length = data.next_1B(); |
8082
|
1
|
50
|
|
|
|
|
punctuation_tag.assign(data.next(length), length); |
8083
|
1
|
50
|
|
|
|
|
length = data.next_1B(); |
8084
|
1
|
50
|
|
|
|
|
symbol_tag.assign(data.next(length), length); |
8085
|
|
|
|
|
|
|
|
8086
|
|
|
|
|
|
|
// Load dictionary |
8087
|
1
|
50
|
|
|
|
|
dictionary.load(data); |
8088
|
|
|
|
|
|
|
|
8089
|
|
|
|
|
|
|
// Optionally statistical guesser if present |
8090
|
|
|
|
|
|
|
statistical_guesser.reset(); |
8091
|
1
|
50
|
|
|
|
|
if (data.next_1B()) { |
|
|
50
|
|
|
|
|
|
8092
|
0
|
0
|
|
|
|
|
statistical_guesser.reset(new morpho_statistical_guesser()); |
8093
|
0
|
0
|
|
|
|
|
statistical_guesser->load(data); |
8094
|
|
0
|
|
|
|
|
} |
8095
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
8096
|
|
|
|
|
|
|
return false; |
8097
|
|
|
|
|
|
|
} |
8098
|
|
|
|
|
|
|
|
8099
|
1
|
|
|
|
|
|
return data.is_end(); |
8100
|
|
|
|
|
|
|
} |
8101
|
|
|
|
|
|
|
|
8102
|
14
|
|
|
|
|
|
int generic_morpho::analyze(string_piece form, guesser_mode guesser, vector& lemmas) const { |
8103
|
|
|
|
|
|
|
lemmas.clear(); |
8104
|
|
|
|
|
|
|
|
8105
|
14
|
50
|
|
|
|
|
if (form.len) { |
8106
|
|
|
|
|
|
|
// Generate all casing variants if needed (they are different than given form). |
8107
|
|
|
|
|
|
|
string form_uclc; // first uppercase, rest lowercase |
8108
|
|
|
|
|
|
|
string form_lc; // all lowercase |
8109
|
14
|
50
|
|
|
|
|
generate_casing_variants(form, form_uclc, form_lc); |
8110
|
|
|
|
|
|
|
|
8111
|
|
|
|
|
|
|
// Start by analysing using the dictionary and all casing variants. |
8112
|
14
|
50
|
|
|
|
|
dictionary.analyze(form, lemmas); |
8113
|
14
|
50
|
|
|
|
|
if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas); |
|
|
0
|
|
|
|
|
|
8114
|
14
|
100
|
|
|
|
|
if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas); |
|
|
50
|
|
|
|
|
|
8115
|
14
|
100
|
|
|
|
|
if (!lemmas.empty()) return NO_GUESSER; |
8116
|
|
|
|
|
|
|
|
8117
|
|
|
|
|
|
|
// Then call analyze_special to handle numbers, punctuation and symbols. |
8118
|
4
|
50
|
|
|
|
|
analyze_special(form, lemmas); |
8119
|
4
|
50
|
|
|
|
|
if (!lemmas.empty()) return NO_GUESSER; |
8120
|
|
|
|
|
|
|
|
8121
|
|
|
|
|
|
|
// For the statistical guesser, use all casing variants. |
8122
|
0
|
0
|
|
|
|
|
if (guesser == GUESSER && statistical_guesser) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8123
|
0
|
0
|
|
|
|
|
if (form_uclc.empty() && form_lc.empty()) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8124
|
0
|
0
|
|
|
|
|
statistical_guesser->analyze(form, lemmas, nullptr); |
8125
|
|
|
|
|
|
|
else { |
8126
|
0
|
0
|
|
|
|
|
morpho_statistical_guesser::used_rules used_rules; used_rules.reserve(3); |
8127
|
0
|
0
|
|
|
|
|
statistical_guesser->analyze(form, lemmas, &used_rules); |
8128
|
0
|
0
|
|
|
|
|
if (!form_uclc.empty()) statistical_guesser->analyze(form_uclc, lemmas, &used_rules); |
|
|
0
|
|
|
|
|
|
8129
|
0
|
0
|
|
|
|
|
if (!form_lc.empty()) statistical_guesser->analyze(form_lc, lemmas, &used_rules); |
|
|
0
|
|
|
|
|
|
8130
|
|
|
|
|
|
|
} |
8131
|
|
|
|
|
|
|
} |
8132
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) return GUESSER; |
8133
|
|
|
|
|
|
|
} |
8134
|
|
|
|
|
|
|
|
8135
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), unknown_tag); |
8136
|
14
|
|
|
|
|
|
return -1; |
8137
|
|
|
|
|
|
|
} |
8138
|
|
|
|
|
|
|
|
8139
|
0
|
|
|
|
|
|
int generic_morpho::generate(string_piece lemma, const char* tag_wildcard, morpho::guesser_mode /*guesser*/, vector& forms) const { |
8140
|
|
|
|
|
|
|
forms.clear(); |
8141
|
|
|
|
|
|
|
|
8142
|
0
|
|
|
|
|
|
tag_filter filter(tag_wildcard); |
8143
|
|
|
|
|
|
|
|
8144
|
0
|
0
|
|
|
|
|
if (lemma.len) { |
8145
|
0
|
0
|
|
|
|
|
if (dictionary.generate(lemma, filter, forms)) |
|
|
0
|
|
|
|
|
|
8146
|
|
|
|
|
|
|
return NO_GUESSER; |
8147
|
|
|
|
|
|
|
} |
8148
|
|
|
|
|
|
|
|
8149
|
|
|
|
|
|
|
return -1; |
8150
|
|
|
|
|
|
|
} |
8151
|
|
|
|
|
|
|
|
8152
|
18
|
|
|
|
|
|
int generic_morpho::raw_lemma_len(string_piece lemma) const { |
8153
|
18
|
|
|
|
|
|
return generic_lemma_addinfo::raw_lemma_len(lemma); |
8154
|
|
|
|
|
|
|
} |
8155
|
|
|
|
|
|
|
|
8156
|
7
|
|
|
|
|
|
int generic_morpho::lemma_id_len(string_piece lemma) const { |
8157
|
7
|
|
|
|
|
|
return generic_lemma_addinfo::lemma_id_len(lemma); |
8158
|
|
|
|
|
|
|
} |
8159
|
|
|
|
|
|
|
|
8160
|
14
|
|
|
|
|
|
int generic_morpho::raw_form_len(string_piece form) const { |
8161
|
14
|
|
|
|
|
|
return form.len; |
8162
|
|
|
|
|
|
|
} |
8163
|
|
|
|
|
|
|
|
8164
|
0
|
|
|
|
|
|
tokenizer* generic_morpho::new_tokenizer() const { |
8165
|
0
|
|
|
|
|
|
return new generic_tokenizer(version); |
8166
|
|
|
|
|
|
|
} |
8167
|
|
|
|
|
|
|
|
8168
|
4
|
|
|
|
|
|
void generic_morpho::analyze_special(string_piece form, vector& lemmas) const { |
8169
|
|
|
|
|
|
|
using namespace unilib; |
8170
|
|
|
|
|
|
|
|
8171
|
|
|
|
|
|
|
// Analyzer for numbers, punctuation and symbols. |
8172
|
|
|
|
|
|
|
// Number is anything matching [+-]? is_Pn* ([.,] is_Pn*)? ([Ee] [+-]? is_Pn+)? for at least one is_Pn* nonempty. |
8173
|
|
|
|
|
|
|
// Punctuation is any form beginning with either unicode punctuation or punctuation_exceptions character. |
8174
|
|
|
|
|
|
|
// Beware that numbers takes precedence, so - is punctuation, -3 is number, -. is punctuation, -.3 is number. |
8175
|
8
|
50
|
|
|
|
|
if (!form.len) return; |
8176
|
|
|
|
|
|
|
|
8177
|
4
|
|
|
|
|
|
string_piece number = form; |
8178
|
4
|
|
|
|
|
|
char32_t first = utf8::decode(number.str, number.len); |
8179
|
|
|
|
|
|
|
|
8180
|
|
|
|
|
|
|
// Try matching a number. |
8181
|
|
|
|
|
|
|
char32_t codepoint = first; |
8182
|
|
|
|
|
|
|
bool any_digit = false; |
8183
|
4
|
50
|
|
|
|
|
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len); |
8184
|
4
|
50
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
8185
|
4
|
50
|
|
|
|
|
if ((codepoint == '.' && number.len) || codepoint == ',') codepoint = utf8::decode(number.str, number.len); |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
8186
|
4
|
50
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
8187
|
4
|
50
|
|
|
|
|
if (any_digit && (codepoint == 'e' || codepoint == 'E')) { |
|
|
0
|
|
|
|
|
|
8188
|
0
|
|
|
|
|
|
codepoint = utf8::decode(number.str, number.len); |
8189
|
0
|
0
|
|
|
|
|
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len); |
8190
|
|
|
|
|
|
|
any_digit = false; |
8191
|
0
|
0
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
8192
|
|
|
|
|
|
|
} |
8193
|
|
|
|
|
|
|
|
8194
|
4
|
50
|
|
|
|
|
if (any_digit && !number.len && (!codepoint || codepoint == '.')) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8195
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), number_tag); |
8196
|
0
|
|
|
|
|
|
return; |
8197
|
|
|
|
|
|
|
} |
8198
|
|
|
|
|
|
|
|
8199
|
|
|
|
|
|
|
// Try matching punctuation or symbol. |
8200
|
|
|
|
|
|
|
bool punctuation = true, symbol = true; |
8201
|
8
|
|
|
|
|
|
string_piece form_ori = form; |
8202
|
8
|
100
|
|
|
|
|
while (form.len) { |
8203
|
4
|
|
|
|
|
|
codepoint = utf8::decode(form.str, form.len); |
8204
|
8
|
50
|
|
|
|
|
punctuation = punctuation && unicode::category(codepoint) & unicode::P; |
|
|
50
|
|
|
|
|
|
8205
|
8
|
50
|
|
|
|
|
symbol = symbol && unicode::category(codepoint) & unicode::S; |
|
|
50
|
|
|
|
|
|
8206
|
|
|
|
|
|
|
} |
8207
|
4
|
50
|
|
|
|
|
if (punctuation) |
8208
|
8
|
50
|
|
|
|
|
lemmas.emplace_back(string(form_ori.str, form_ori.len), punctuation_tag); |
8209
|
0
|
0
|
|
|
|
|
else if (symbol) |
8210
|
4
|
0
|
|
|
|
|
lemmas.emplace_back(string(form_ori.str, form_ori.len), symbol_tag); |
8211
|
|
|
|
|
|
|
} |
8212
|
|
|
|
|
|
|
|
8213
|
|
|
|
|
|
|
} // namespace morphodita |
8214
|
|
|
|
|
|
|
|
8215
|
|
|
|
|
|
|
///////// |
8216
|
|
|
|
|
|
|
// File: morphodita/morpho/morpho_ids.h |
8217
|
|
|
|
|
|
|
///////// |
8218
|
|
|
|
|
|
|
|
8219
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
8220
|
|
|
|
|
|
|
// |
8221
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
8222
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
8223
|
|
|
|
|
|
|
// |
8224
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
8225
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
8226
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8227
|
|
|
|
|
|
|
|
8228
|
|
|
|
|
|
|
namespace morphodita { |
8229
|
|
|
|
|
|
|
|
8230
|
|
|
|
|
|
|
class morpho_ids { |
8231
|
|
|
|
|
|
|
public: |
8232
|
|
|
|
|
|
|
enum morpho_id { |
8233
|
|
|
|
|
|
|
CZECH = 0, |
8234
|
|
|
|
|
|
|
ENGLISH_V1 = 1, |
8235
|
|
|
|
|
|
|
GENERIC = 2, |
8236
|
|
|
|
|
|
|
EXTERNAL = 3, |
8237
|
|
|
|
|
|
|
ENGLISH_V2 = 4, |
8238
|
|
|
|
|
|
|
ENGLISH_V3 = 5, ENGLISH = ENGLISH_V3, |
8239
|
|
|
|
|
|
|
SLOVAK_PDT = 6, |
8240
|
|
|
|
|
|
|
DERIVATOR_DICTIONARY = 7, |
8241
|
|
|
|
|
|
|
}; |
8242
|
|
|
|
|
|
|
|
8243
|
|
|
|
|
|
|
static bool parse(const string& str, morpho_id& id) { |
8244
|
|
|
|
|
|
|
if (str == "czech") return id = CZECH, true; |
8245
|
|
|
|
|
|
|
if (str == "english") return id = ENGLISH, true; |
8246
|
|
|
|
|
|
|
if (str == "external") return id = EXTERNAL, true; |
8247
|
|
|
|
|
|
|
if (str == "generic") return id = GENERIC, true; |
8248
|
|
|
|
|
|
|
if (str == "slovak_pdt") return id = SLOVAK_PDT, true; |
8249
|
|
|
|
|
|
|
return false; |
8250
|
|
|
|
|
|
|
} |
8251
|
|
|
|
|
|
|
}; |
8252
|
|
|
|
|
|
|
|
8253
|
|
|
|
|
|
|
typedef morpho_ids::morpho_id morpho_id; |
8254
|
|
|
|
|
|
|
|
8255
|
|
|
|
|
|
|
} // namespace morphodita |
8256
|
|
|
|
|
|
|
|
8257
|
|
|
|
|
|
|
///////// |
8258
|
|
|
|
|
|
|
// File: utils/new_unique_ptr.h |
8259
|
|
|
|
|
|
|
///////// |
8260
|
|
|
|
|
|
|
|
8261
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
8262
|
|
|
|
|
|
|
// |
8263
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
8264
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
8265
|
|
|
|
|
|
|
// |
8266
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
8267
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
8268
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8269
|
|
|
|
|
|
|
|
8270
|
|
|
|
|
|
|
namespace utils { |
8271
|
|
|
|
|
|
|
|
8272
|
|
|
|
|
|
|
template |
8273
|
2
|
|
|
|
|
|
unique_ptr new_unique_ptr(Args&&... args) { |
8274
|
2
|
0
|
|
|
|
|
return unique_ptr(new T(std::forward(args)...)); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8275
|
|
|
|
|
|
|
} |
8276
|
|
|
|
|
|
|
|
8277
|
|
|
|
|
|
|
} // namespace utils |
8278
|
|
|
|
|
|
|
|
8279
|
|
|
|
|
|
|
///////// |
8280
|
|
|
|
|
|
|
// File: morphodita/morpho/morpho.cpp |
8281
|
|
|
|
|
|
|
///////// |
8282
|
|
|
|
|
|
|
|
8283
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
8284
|
|
|
|
|
|
|
// |
8285
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
8286
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
8287
|
|
|
|
|
|
|
// |
8288
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
8289
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
8290
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8291
|
|
|
|
|
|
|
|
8292
|
|
|
|
|
|
|
namespace morphodita { |
8293
|
|
|
|
|
|
|
|
8294
|
1
|
|
|
|
|
|
morpho* morpho::load(istream& is) { |
8295
|
1
|
|
|
|
|
|
morpho_id id = morpho_id(is.get()); |
8296
|
1
|
|
|
|
|
|
switch (id) { |
8297
|
|
|
|
|
|
|
case morpho_ids::CZECH: |
8298
|
|
|
|
|
|
|
{ |
8299
|
0
|
|
|
|
|
|
auto res = new_unique_ptr(czech_morpho::morpho_language::CZECH, 1); |
8300
|
0
|
0
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
0
|
|
|
|
|
|
8301
|
|
|
|
|
|
|
break; |
8302
|
|
|
|
|
|
|
} |
8303
|
|
|
|
|
|
|
case morpho_ids::ENGLISH_V1: |
8304
|
|
|
|
|
|
|
case morpho_ids::ENGLISH_V2: |
8305
|
|
|
|
|
|
|
case morpho_ids::ENGLISH_V3: |
8306
|
|
|
|
|
|
|
{ |
8307
|
|
|
|
|
|
|
auto res = new_unique_ptr(id == morpho_ids::ENGLISH_V1 ? 1 : |
8308
|
|
|
|
|
|
|
id == morpho_ids::ENGLISH_V2 ? 2 : |
8309
|
0
|
0
|
|
|
|
|
3); |
|
|
0
|
|
|
|
|
|
8310
|
0
|
0
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
0
|
|
|
|
|
|
8311
|
|
|
|
|
|
|
break; |
8312
|
|
|
|
|
|
|
} |
8313
|
|
|
|
|
|
|
case morpho_ids::EXTERNAL: |
8314
|
|
|
|
|
|
|
{ |
8315
|
0
|
|
|
|
|
|
auto res = new_unique_ptr(1); |
8316
|
0
|
0
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
0
|
|
|
|
|
|
8317
|
|
|
|
|
|
|
break; |
8318
|
|
|
|
|
|
|
} |
8319
|
|
|
|
|
|
|
case morpho_ids::GENERIC: |
8320
|
|
|
|
|
|
|
{ |
8321
|
1
|
|
|
|
|
|
auto res = new_unique_ptr(1); |
8322
|
1
|
50
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
50
|
|
|
|
|
|
8323
|
|
|
|
|
|
|
break; |
8324
|
|
|
|
|
|
|
} |
8325
|
|
|
|
|
|
|
case morpho_ids::SLOVAK_PDT: |
8326
|
|
|
|
|
|
|
{ |
8327
|
0
|
|
|
|
|
|
auto res = new_unique_ptr(czech_morpho::morpho_language::SLOVAK, 3); |
8328
|
0
|
0
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
0
|
|
|
|
|
|
8329
|
|
|
|
|
|
|
break; |
8330
|
|
|
|
|
|
|
} |
8331
|
|
|
|
|
|
|
case morpho_ids::DERIVATOR_DICTIONARY: |
8332
|
|
|
|
|
|
|
{ |
8333
|
0
|
|
|
|
|
|
auto derinet = new_unique_ptr(); |
8334
|
0
|
0
|
|
|
|
|
if (!derinet->load(is)) return nullptr; |
|
|
0
|
|
|
|
|
|
8335
|
|
|
|
|
|
|
|
8336
|
0
|
0
|
|
|
|
|
unique_ptr dictionary(load(is)); |
8337
|
0
|
0
|
|
|
|
|
if (!dictionary) return nullptr; |
8338
|
0
|
|
|
|
|
|
derinet->dictionary = dictionary.get(); |
8339
|
|
|
|
|
|
|
dictionary->derinet.reset(derinet.release()); |
8340
|
0
|
|
|
|
|
|
return dictionary.release(); |
8341
|
|
|
|
|
|
|
} |
8342
|
|
|
|
|
|
|
} |
8343
|
|
|
|
|
|
|
|
8344
|
|
|
|
|
|
|
return nullptr; |
8345
|
|
|
|
|
|
|
} |
8346
|
|
|
|
|
|
|
|
8347
|
0
|
|
|
|
|
|
morpho* morpho::load(const char* fname) { |
8348
|
0
|
0
|
|
|
|
|
ifstream f(path_from_utf8(fname).c_str(), ifstream::binary); |
8349
|
0
|
0
|
|
|
|
|
if (!f) return nullptr; |
8350
|
|
|
|
|
|
|
|
8351
|
0
|
0
|
|
|
|
|
return load(f); |
8352
|
|
|
|
|
|
|
} |
8353
|
|
|
|
|
|
|
|
8354
|
0
|
|
|
|
|
|
const derivator* morpho::get_derivator() const { |
8355
|
0
|
|
|
|
|
|
return derinet.get(); |
8356
|
|
|
|
|
|
|
} |
8357
|
|
|
|
|
|
|
|
8358
|
|
|
|
|
|
|
} // namespace morphodita |
8359
|
|
|
|
|
|
|
|
8360
|
|
|
|
|
|
|
///////// |
8361
|
|
|
|
|
|
|
// File: morphodita/morpho/morpho_statistical_guesser.cpp |
8362
|
|
|
|
|
|
|
///////// |
8363
|
|
|
|
|
|
|
|
8364
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
8365
|
|
|
|
|
|
|
// |
8366
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
8367
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
8368
|
|
|
|
|
|
|
// |
8369
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
8370
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
8371
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8372
|
|
|
|
|
|
|
|
8373
|
|
|
|
|
|
|
namespace morphodita { |
8374
|
|
|
|
|
|
|
|
8375
|
0
|
|
|
|
|
|
void morpho_statistical_guesser::load(binary_decoder& data) { |
8376
|
|
|
|
|
|
|
// Load tags and default tag |
8377
|
0
|
|
|
|
|
|
tags.resize(data.next_2B()); |
8378
|
0
|
0
|
|
|
|
|
for (auto&& tag : tags) { |
8379
|
0
|
|
|
|
|
|
tag.resize(data.next_1B()); |
8380
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < tag.size(); i++) |
8381
|
0
|
|
|
|
|
|
tag[i] = data.next_1B(); |
8382
|
|
|
|
|
|
|
} |
8383
|
0
|
|
|
|
|
|
default_tag = data.next_2B(); |
8384
|
|
|
|
|
|
|
|
8385
|
|
|
|
|
|
|
// Load rules |
8386
|
0
|
|
|
|
|
|
rules.load(data); |
8387
|
0
|
|
|
|
|
|
} |
8388
|
|
|
|
|
|
|
|
8389
|
|
|
|
|
|
|
// Helper method for analyze. |
8390
|
0
|
|
|
|
|
|
static bool contains(morpho_statistical_guesser::used_rules* used, const string& rule) { |
8391
|
0
|
0
|
|
|
|
|
if (!used) return false; |
8392
|
|
|
|
|
|
|
|
8393
|
0
|
0
|
|
|
|
|
for (auto&& used_rule : *used) |
8394
|
0
|
0
|
|
|
|
|
if (used_rule == rule) |
8395
|
|
|
|
|
|
|
return true; |
8396
|
|
|
|
|
|
|
|
8397
|
|
|
|
|
|
|
return false; |
8398
|
|
|
|
|
|
|
} |
8399
|
|
|
|
|
|
|
|
8400
|
|
|
|
|
|
|
// Produces unique lemma-tag pairs. |
8401
|
0
|
|
|
|
|
|
void morpho_statistical_guesser::analyze(string_piece form, vector& lemmas, morpho_statistical_guesser::used_rules* used) { |
8402
|
|
|
|
|
|
|
unsigned lemmas_initial_size = lemmas.size(); |
8403
|
|
|
|
|
|
|
|
8404
|
|
|
|
|
|
|
// We have rules in format "suffix prefix" in rules. |
8405
|
|
|
|
|
|
|
// Find the matching rule with longest suffix and of those with longest prefix. |
8406
|
0
|
0
|
|
|
|
|
string rule_label; rule_label.reserve(12); |
8407
|
|
|
|
|
|
|
unsigned suffix_len = 0; |
8408
|
0
|
0
|
|
|
|
|
for (; suffix_len < form.len; suffix_len++) { |
8409
|
0
|
0
|
|
|
|
|
rule_label.push_back(form.str[form.len - (suffix_len + 1)]); |
8410
|
0
|
0
|
|
|
|
|
if (!rules.at(rule_label.c_str(), rule_label.size(), [](pointer_decoder& data){ data.next(data.next_2B()); })) |
8411
|
|
|
|
|
|
|
break; |
8412
|
|
|
|
|
|
|
} |
8413
|
|
|
|
|
|
|
|
8414
|
0
|
0
|
|
|
|
|
for (suffix_len++; suffix_len--; ) { |
8415
|
0
|
|
|
|
|
|
rule_label.resize(suffix_len); |
8416
|
0
|
0
|
|
|
|
|
rule_label.push_back(' '); |
8417
|
|
|
|
|
|
|
|
8418
|
|
|
|
|
|
|
const unsigned char* rule = nullptr; |
8419
|
|
|
|
|
|
|
unsigned rule_prefix_len = 0; |
8420
|
0
|
0
|
|
|
|
|
for (unsigned prefix_len = 0; prefix_len + suffix_len <= form.len; prefix_len++) { |
8421
|
0
|
0
|
|
|
|
|
if (prefix_len) rule_label.push_back(form.str[prefix_len - 1]); |
|
|
0
|
|
|
|
|
|
8422
|
0
|
|
|
|
|
|
const unsigned char* found = rules.at(rule_label.c_str(), rule_label.size(), [](pointer_decoder& data){ data.next(data.next_2B()); }); |
8423
|
0
|
0
|
|
|
|
|
if (!found) break; |
8424
|
0
|
0
|
|
|
|
|
if (*(found += sizeof(uint16_t))) { |
8425
|
|
|
|
|
|
|
rule = found; |
8426
|
|
|
|
|
|
|
rule_prefix_len = prefix_len; |
8427
|
|
|
|
|
|
|
} |
8428
|
|
|
|
|
|
|
} |
8429
|
|
|
|
|
|
|
|
8430
|
0
|
0
|
|
|
|
|
if (rule) { |
8431
|
0
|
|
|
|
|
|
rule_label.resize(suffix_len + 1 + rule_prefix_len); |
8432
|
0
|
0
|
|
|
|
|
if (rule_label.size() > 1 && !contains(used, rule_label)) { // ignore rule ' ' |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8433
|
0
|
0
|
|
|
|
|
if (used) used->push_back(rule_label); |
|
|
0
|
|
|
|
|
|
8434
|
0
|
0
|
|
|
|
|
for (int rules_len = *rule++; rules_len; rules_len--) { |
8435
|
0
|
|
|
|
|
|
unsigned pref_del_len = *rule++; const char* pref_del = (const char*)rule; rule += pref_del_len; |
8436
|
0
|
|
|
|
|
|
unsigned pref_add_len = *rule++; const char* pref_add = (const char*)rule; rule += pref_add_len; |
8437
|
0
|
|
|
|
|
|
unsigned suff_del_len = *rule++; const char* suff_del = (const char*)rule; rule += suff_del_len; |
8438
|
0
|
|
|
|
|
|
unsigned suff_add_len = *rule++; const char* suff_add = (const char*)rule; rule += suff_add_len; |
8439
|
0
|
|
|
|
|
|
unsigned tags_len = *rule++; const uint16_t* tags = (const uint16_t*)rule; rule += tags_len * sizeof(uint16_t); |
8440
|
|
|
|
|
|
|
|
8441
|
0
|
0
|
|
|
|
|
if (pref_del_len + suff_del_len > form.len || |
|
|
0
|
|
|
|
|
|
8442
|
0
|
0
|
|
|
|
|
(pref_del_len && !small_memeq(pref_del, form.str, pref_del_len)) || |
|
|
0
|
|
|
|
|
|
8443
|
0
|
0
|
|
|
|
|
(suff_del_len && !small_memeq(suff_del, form.str + form.len - suff_del_len, suff_del_len)) || |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8444
|
0
|
|
|
|
|
|
(form.len + pref_add_len - pref_del_len + suff_add_len - suff_del_len == 0)) |
8445
|
0
|
|
|
|
|
|
continue; |
8446
|
|
|
|
|
|
|
|
8447
|
|
|
|
|
|
|
string lemma; |
8448
|
0
|
0
|
|
|
|
|
lemma.reserve(form.len + pref_add_len - pref_del_len + suff_add_len - suff_del_len); |
8449
|
0
|
0
|
|
|
|
|
if (pref_add_len) lemma.append(pref_add, pref_add_len); |
|
|
0
|
|
|
|
|
|
8450
|
0
|
0
|
|
|
|
|
if (pref_del_len + suff_del_len < form.len) lemma.append(form.str + pref_del_len, form.len - pref_del_len - suff_del_len); |
|
|
0
|
|
|
|
|
|
8451
|
0
|
0
|
|
|
|
|
if (suff_add_len) lemma.append(suff_add, suff_add_len); |
|
|
0
|
|
|
|
|
|
8452
|
0
|
0
|
|
|
|
|
while (tags_len--) |
8453
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(lemma, this->tags[unaligned_load_inc(tags)]); |
8454
|
|
|
|
|
|
|
} |
8455
|
|
|
|
|
|
|
} |
8456
|
|
|
|
|
|
|
break; |
8457
|
|
|
|
|
|
|
} |
8458
|
|
|
|
|
|
|
} |
8459
|
|
|
|
|
|
|
|
8460
|
|
|
|
|
|
|
// If nothing was found, use default tag. |
8461
|
0
|
0
|
|
|
|
|
if (lemmas.size() == lemmas_initial_size) |
8462
|
0
|
0
|
|
|
|
|
if (!contains(used, string())) { |
8463
|
0
|
0
|
|
|
|
|
if (used) used->push_back(string()); |
8464
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), tags[default_tag]); |
8465
|
|
|
|
|
|
|
} |
8466
|
0
|
|
|
|
|
|
} |
8467
|
|
|
|
|
|
|
|
8468
|
|
|
|
|
|
|
} // namespace morphodita |
8469
|
|
|
|
|
|
|
|
8470
|
|
|
|
|
|
|
///////// |
8471
|
|
|
|
|
|
|
// File: morphodita/morpho/tag_filter.cpp |
8472
|
|
|
|
|
|
|
///////// |
8473
|
|
|
|
|
|
|
|
8474
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
8475
|
|
|
|
|
|
|
// |
8476
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
8477
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
8478
|
|
|
|
|
|
|
// |
8479
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
8480
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
8481
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8482
|
|
|
|
|
|
|
|
8483
|
|
|
|
|
|
|
namespace morphodita { |
8484
|
|
|
|
|
|
|
|
8485
|
0
|
|
|
|
|
|
tag_filter::tag_filter(const char* filter) { |
8486
|
0
|
0
|
|
|
|
|
if (!filter) return; |
8487
|
|
|
|
|
|
|
|
8488
|
0
|
0
|
|
|
|
|
wildcard.assign(filter); |
8489
|
|
|
|
|
|
|
filter = wildcard.c_str(); |
8490
|
|
|
|
|
|
|
|
8491
|
0
|
0
|
|
|
|
|
for (int tag_pos = 0, filter_pos = 0; filter[filter_pos]; tag_pos++, filter_pos++) { |
8492
|
0
|
0
|
|
|
|
|
if (filter[filter_pos] == '?') continue; |
8493
|
0
|
0
|
|
|
|
|
if (filter[filter_pos] == '[') { |
8494
|
0
|
|
|
|
|
|
filter_pos++; |
8495
|
|
|
|
|
|
|
|
8496
|
0
|
|
|
|
|
|
bool negate = false; |
8497
|
0
|
0
|
|
|
|
|
if (filter[filter_pos] == '^') negate = true, filter_pos++; |
8498
|
|
|
|
|
|
|
|
8499
|
0
|
|
|
|
|
|
int chars_start = filter_pos; |
8500
|
0
|
0
|
|
|
|
|
for (bool first = true; filter[filter_pos] && (first || filter[filter_pos] != ']'); first = false) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8501
|
0
|
|
|
|
|
|
filter_pos++; |
8502
|
|
|
|
|
|
|
|
8503
|
0
|
0
|
|
|
|
|
filters.emplace_back(tag_pos, negate, chars_start, filter_pos - chars_start); |
8504
|
0
|
0
|
|
|
|
|
if (!filter[filter_pos]) break; |
8505
|
|
|
|
|
|
|
} else { |
8506
|
0
|
0
|
|
|
|
|
filters.emplace_back(tag_pos, false, filter_pos, 1); |
8507
|
|
|
|
|
|
|
} |
8508
|
|
|
|
|
|
|
} |
8509
|
|
|
|
|
|
|
} |
8510
|
|
|
|
|
|
|
|
8511
|
|
|
|
|
|
|
} // namespace morphodita |
8512
|
|
|
|
|
|
|
|
8513
|
|
|
|
|
|
|
///////// |
8514
|
|
|
|
|
|
|
// File: morphodita/tagger/tagger.h |
8515
|
|
|
|
|
|
|
///////// |
8516
|
|
|
|
|
|
|
|
8517
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
8518
|
|
|
|
|
|
|
// |
8519
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
8520
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
8521
|
|
|
|
|
|
|
// |
8522
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
8523
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
8524
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8525
|
|
|
|
|
|
|
|
8526
|
|
|
|
|
|
|
namespace morphodita { |
8527
|
|
|
|
|
|
|
|
8528
|
1
|
|
|
|
|
|
class tagger { |
8529
|
|
|
|
|
|
|
public: |
8530
|
0
|
|
|
|
|
|
virtual ~tagger() {} |
8531
|
|
|
|
|
|
|
|
8532
|
|
|
|
|
|
|
static tagger* load(const char* fname); |
8533
|
|
|
|
|
|
|
static tagger* load(istream& is); |
8534
|
|
|
|
|
|
|
|
8535
|
|
|
|
|
|
|
// Return morpho associated with the tagger. Do not delete the pointer, it is |
8536
|
|
|
|
|
|
|
// owned by the tagger instance and deleted in the tagger destructor. |
8537
|
|
|
|
|
|
|
virtual const morpho* get_morpho() const = 0; |
8538
|
|
|
|
|
|
|
|
8539
|
|
|
|
|
|
|
// Perform morphologic analysis and subsequent disambiguation. |
8540
|
|
|
|
|
|
|
virtual void tag(const vector& forms, vector& tags, morpho::guesser_mode guesser = morpho::GUESSER_UNSPECIFIED) const = 0; |
8541
|
|
|
|
|
|
|
|
8542
|
|
|
|
|
|
|
// Perform disambiguation only on given analyses. |
8543
|
|
|
|
|
|
|
virtual void tag_analyzed(const vector& forms, const vector>& analyses, vector& tags) const = 0; |
8544
|
|
|
|
|
|
|
|
8545
|
|
|
|
|
|
|
// Construct a new tokenizer instance appropriate for this tagger. |
8546
|
|
|
|
|
|
|
// Can return NULL if no such tokenizer exists. |
8547
|
|
|
|
|
|
|
// Is equal to get_morpho()->new_tokenizer. |
8548
|
|
|
|
|
|
|
tokenizer* new_tokenizer() const; |
8549
|
|
|
|
|
|
|
}; |
8550
|
|
|
|
|
|
|
|
8551
|
|
|
|
|
|
|
} // namespace morphodita |
8552
|
|
|
|
|
|
|
|
8553
|
|
|
|
|
|
|
///////// |
8554
|
|
|
|
|
|
|
// File: morphodita/tagger/elementary_features.h |
8555
|
|
|
|
|
|
|
///////// |
8556
|
|
|
|
|
|
|
|
8557
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
8558
|
|
|
|
|
|
|
// |
8559
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
8560
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
8561
|
|
|
|
|
|
|
// |
8562
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
8563
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
8564
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8565
|
|
|
|
|
|
|
|
8566
|
|
|
|
|
|
|
namespace morphodita { |
8567
|
|
|
|
|
|
|
|
8568
|
|
|
|
|
|
|
// Declarations |
8569
|
|
|
|
|
|
|
enum elementary_feature_type { PER_FORM, PER_TAG, DYNAMIC }; |
8570
|
|
|
|
|
|
|
enum elementary_feature_range { ONLY_CURRENT, ANY_OFFSET }; |
8571
|
|
|
|
|
|
|
|
8572
|
|
|
|
|
|
|
typedef uint32_t elementary_feature_value; |
8573
|
|
|
|
|
|
|
enum :elementary_feature_value { elementary_feature_unknown = 0, elementary_feature_empty = 1 }; |
8574
|
|
|
|
|
|
|
|
8575
|
|
|
|
|
|
|
struct elementary_feature_description { |
8576
|
|
|
|
|
|
|
string name; |
8577
|
|
|
|
|
|
|
elementary_feature_type type; |
8578
|
|
|
|
|
|
|
elementary_feature_range range; |
8579
|
|
|
|
|
|
|
int index; |
8580
|
|
|
|
|
|
|
int map_index; |
8581
|
|
|
|
|
|
|
}; |
8582
|
|
|
|
|
|
|
|
8583
|
|
|
|
|
|
|
template |
8584
|
0
|
|
|
|
|
|
class elementary_features { |
8585
|
|
|
|
|
|
|
public: |
8586
|
|
|
|
|
|
|
bool load(istream& is); |
8587
|
|
|
|
|
|
|
bool save(ostream& out); |
8588
|
|
|
|
|
|
|
|
8589
|
|
|
|
|
|
|
vector |
8590
|
|
|
|
|
|
|
}; |
8591
|
|
|
|
|
|
|
|
8592
|
0
|
|
|
|
|
|
class persistent_elementary_feature_map : public persistent_unordered_map { |
8593
|
|
|
|
|
|
|
public: |
8594
|
|
|
|
|
|
|
persistent_elementary_feature_map() : persistent_unordered_map() {} |
8595
|
|
|
|
|
|
|
persistent_elementary_feature_map(const persistent_unordered_map&& map) : persistent_unordered_map(map) {} |
8596
|
|
|
|
|
|
|
|
8597
|
|
|
|
|
|
|
elementary_feature_value value(const char* feature, int len) const { |
8598
|
96
|
|
|
|
|
|
auto* it = at_typed(feature, len); |
8599
|
96
|
0
|
|
|
|
|
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8600
|
|
|
|
|
|
|
} |
8601
|
|
|
|
|
|
|
}; |
8602
|
|
|
|
|
|
|
|
8603
|
|
|
|
|
|
|
// Definitions |
8604
|
|
|
|
|
|
|
template |
8605
|
1
|
|
|
|
|
|
inline bool elementary_features |
8606
|
|
|
|
|
|
|
binary_decoder data; |
8607
|
1
|
50
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
50
|
|
|
|
|
|
8608
|
|
|
|
|
|
|
|
8609
|
|
|
|
|
|
|
try { |
8610
|
1
|
50
|
|
|
|
|
maps.resize(data.next_1B()); |
|
|
50
|
|
|
|
|
|
8611
|
27
|
100
|
|
|
|
|
for (auto&& map : maps) |
8612
|
26
|
50
|
|
|
|
|
map.load(data); |
|
|
0
|
|
|
|
|
|
8613
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
8614
|
|
|
|
|
|
|
return false; |
8615
|
|
|
|
|
|
|
} |
8616
|
|
|
|
|
|
|
|
8617
|
1
|
|
|
|
|
|
return data.is_end(); |
8618
|
|
|
|
|
|
|
} |
8619
|
|
|
|
|
|
|
|
8620
|
|
|
|
|
|
|
} // namespace morphodita |
8621
|
|
|
|
|
|
|
|
8622
|
|
|
|
|
|
|
///////// |
8623
|
|
|
|
|
|
|
// File: morphodita/tagger/vli.h |
8624
|
|
|
|
|
|
|
///////// |
8625
|
|
|
|
|
|
|
|
8626
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
8627
|
|
|
|
|
|
|
// |
8628
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
8629
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
8630
|
|
|
|
|
|
|
// |
8631
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
8632
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
8633
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8634
|
|
|
|
|
|
|
|
8635
|
|
|
|
|
|
|
namespace morphodita { |
8636
|
|
|
|
|
|
|
|
8637
|
|
|
|
|
|
|
// Declarations |
8638
|
|
|
|
|
|
|
template |
8639
|
|
|
|
|
|
|
class vli { |
8640
|
|
|
|
|
|
|
public: |
8641
|
|
|
|
|
|
|
static int max_length(); |
8642
|
|
|
|
|
|
|
static void encode(T value, char*& where); |
8643
|
|
|
|
|
|
|
static T decode(const char*& from); |
8644
|
|
|
|
|
|
|
}; |
8645
|
|
|
|
|
|
|
|
8646
|
|
|
|
|
|
|
// Definitions |
8647
|
|
|
|
|
|
|
template <> |
8648
|
|
|
|
|
|
|
inline int vli::max_length() { |
8649
|
|
|
|
|
|
|
return 5; |
8650
|
|
|
|
|
|
|
} |
8651
|
|
|
|
|
|
|
|
8652
|
|
|
|
|
|
|
template <> |
8653
|
267
|
|
|
|
|
|
inline void vli::encode(uint32_t value, char*& where) { |
8654
|
267
|
50
|
|
|
|
|
if (value < 0x80) *where++ = value; |
8655
|
0
|
0
|
|
|
|
|
else if (value < 0x4000) *where++ = (value >> 7) | 0x80u, *where++ = value & 0x7Fu; |
8656
|
0
|
0
|
|
|
|
|
else if (value < 0x200000) *where++ = (value >> 14) | 0x80u, *where++ = ((value >> 7) & 0x7Fu) | 0x80u, *where++ = value & 0x7Fu; |
8657
|
0
|
0
|
|
|
|
|
else if (value < 0x10000000) *where++ = (value >> 21) | 0x80u, *where++ = ((value >> 14) & 0x7Fu) | 0x80u, *where++ = ((value >> 7) & 0x7Fu) | 0x80u, *where++ = value & 0x7Fu; |
8658
|
0
|
|
|
|
|
|
else *where++ = (value >> 28) | 0x80u, *where++ = ((value >> 21) & 0x7Fu) | 0x80u, *where++ = ((value >> 14) & 0x7Fu) | 0x80u, *where++ = ((value >> 7) & 0x7Fu) | 0x80u, *where++ = value & 0x7Fu; |
8659
|
267
|
|
|
|
|
|
} |
8660
|
|
|
|
|
|
|
|
8661
|
|
|
|
|
|
|
template <> |
8662
|
|
|
|
|
|
|
inline uint32_t vli::decode(const char*& from) { |
8663
|
|
|
|
|
|
|
uint32_t value = 0; |
8664
|
|
|
|
|
|
|
while (((unsigned char)(*from)) & 0x80u) value = (value << 7) | (((unsigned char)(*from++)) ^ 0x80u); |
8665
|
|
|
|
|
|
|
value = (value << 7) | ((unsigned char)(*from++)); |
8666
|
|
|
|
|
|
|
return value; |
8667
|
|
|
|
|
|
|
} |
8668
|
|
|
|
|
|
|
|
8669
|
|
|
|
|
|
|
} // namespace morphodita |
8670
|
|
|
|
|
|
|
|
8671
|
|
|
|
|
|
|
///////// |
8672
|
|
|
|
|
|
|
// File: morphodita/tagger/feature_sequences.h |
8673
|
|
|
|
|
|
|
///////// |
8674
|
|
|
|
|
|
|
|
8675
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
8676
|
|
|
|
|
|
|
// |
8677
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
8678
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
8679
|
|
|
|
|
|
|
// |
8680
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
8681
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
8682
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8683
|
|
|
|
|
|
|
|
8684
|
|
|
|
|
|
|
namespace morphodita { |
8685
|
|
|
|
|
|
|
|
8686
|
|
|
|
|
|
|
// Declarations |
8687
|
|
|
|
|
|
|
typedef int32_t feature_sequence_score; |
8688
|
|
|
|
|
|
|
typedef int64_t feature_sequences_score; |
8689
|
|
|
|
|
|
|
|
8690
|
|
|
|
|
|
|
struct feature_sequence_element { |
8691
|
|
|
|
|
|
|
elementary_feature_type type; |
8692
|
|
|
|
|
|
|
int elementary_index; |
8693
|
|
|
|
|
|
|
int sequence_index; |
8694
|
|
|
|
|
|
|
|
8695
|
|
|
|
|
|
|
feature_sequence_element() {} |
8696
|
|
|
|
|
|
|
feature_sequence_element(elementary_feature_type type, int elementary_index, int sequence_index) : type(type), elementary_index(elementary_index), sequence_index(sequence_index) {} |
8697
|
|
|
|
|
|
|
}; |
8698
|
|
|
|
|
|
|
|
8699
|
21
|
|
|
|
|
|
struct feature_sequence { |
8700
|
|
|
|
|
|
|
vector elements; |
8701
|
|
|
|
|
|
|
int dependant_range = 1; |
8702
|
|
|
|
|
|
|
}; |
8703
|
|
|
|
|
|
|
|
8704
|
|
|
|
|
|
|
template |
8705
|
1
|
0
|
|
|
|
|
class feature_sequences { |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8706
|
|
|
|
|
|
|
public: |
8707
|
|
|
|
|
|
|
typedef typename ElementaryFeatures::per_form_features per_form_features; |
8708
|
|
|
|
|
|
|
typedef typename ElementaryFeatures::per_tag_features per_tag_features; |
8709
|
|
|
|
|
|
|
typedef typename ElementaryFeatures::dynamic_features dynamic_features; |
8710
|
|
|
|
|
|
|
|
8711
|
|
|
|
|
|
|
void parse(int window_size, istream& is); |
8712
|
|
|
|
|
|
|
bool load(istream& is); |
8713
|
|
|
|
|
|
|
bool save(ostream& os); |
8714
|
|
|
|
|
|
|
|
8715
|
|
|
|
|
|
|
struct cache; |
8716
|
|
|
|
|
|
|
|
8717
|
|
|
|
|
|
|
inline void initialize_sentence(const vector& forms, const vector>& analyses, cache& c) const; |
8718
|
|
|
|
|
|
|
inline void compute_dynamic_features(int form_index, int tag_index, const dynamic_features* prev_dynamic, dynamic_features& dynamic, cache& c) const; |
8719
|
|
|
|
|
|
|
inline feature_sequences_score score(int form_index, int tags_window[], int tags_unchanged, dynamic_features& dynamic, cache& c) const; |
8720
|
|
|
|
|
|
|
void feature_keys(int form_index, int tags_window[], int tags_unchanged, dynamic_features& dynamic, vector& keys, cache& c) const; |
8721
|
|
|
|
|
|
|
|
8722
|
|
|
|
|
|
|
ElementaryFeatures elementary; |
8723
|
|
|
|
|
|
|
vector |
8724
|
|
|
|
|
|
|
vector sequences; |
8725
|
|
|
|
|
|
|
}; |
8726
|
|
|
|
|
|
|
|
8727
|
0
|
|
|
|
|
|
class persistent_feature_sequence_map : public persistent_unordered_map { |
8728
|
|
|
|
|
|
|
public: |
8729
|
|
|
|
|
|
|
persistent_feature_sequence_map() : persistent_unordered_map() {} |
8730
|
|
|
|
|
|
|
persistent_feature_sequence_map(const persistent_unordered_map&& map) : persistent_unordered_map(map) {} |
8731
|
|
|
|
|
|
|
|
8732
|
|
|
|
|
|
|
feature_sequence_score score(const char* feature, int len) const { |
8733
|
108
|
|
|
|
|
|
auto* it = at_typed(feature, len); |
8734
|
108
|
0
|
|
|
|
|
return it ? unaligned_load(it) : 0; |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8735
|
|
|
|
|
|
|
} |
8736
|
|
|
|
|
|
|
}; |
8737
|
|
|
|
|
|
|
|
8738
|
|
|
|
|
|
|
template using persistent_feature_sequences = feature_sequences; |
8739
|
|
|
|
|
|
|
|
8740
|
|
|
|
|
|
|
// Definitions |
8741
|
|
|
|
|
|
|
template |
8742
|
1
|
|
|
|
|
|
inline bool feature_sequences::load(istream& is) { |
8743
|
1
|
0
|
|
|
|
|
if (!elementary.load(is)) return false; |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8744
|
|
|
|
|
|
|
|
8745
|
|
|
|
|
|
|
binary_decoder data; |
8746
|
1
|
0
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8747
|
|
|
|
|
|
|
|
8748
|
|
|
|
|
|
|
try { |
8749
|
1
|
0
|
|
|
|
|
sequences.resize(data.next_1B()); |
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8750
|
22
|
0
|
|
|
|
|
for (auto&& sequence : sequences) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8751
|
21
|
0
|
|
|
|
|
sequence.dependant_range = data.next_4B(); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8752
|
21
|
0
|
|
|
|
|
sequence.elements.resize(data.next_1B()); |
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8753
|
66
|
0
|
|
|
|
|
for (auto&& element : sequence.elements) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8754
|
45
|
0
|
|
|
|
|
element.type = elementary_feature_type(data.next_4B()); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8755
|
45
|
0
|
|
|
|
|
element.elementary_index = data.next_4B(); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8756
|
45
|
0
|
|
|
|
|
element.sequence_index = data.next_4B(); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8757
|
|
|
|
|
|
|
} |
8758
|
|
|
|
|
|
|
} |
8759
|
|
|
|
|
|
|
|
8760
|
1
|
0
|
|
|
|
|
scores.resize(data.next_1B()); |
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8761
|
22
|
0
|
|
|
|
|
for (auto&& score : scores) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8762
|
21
|
0
|
|
|
|
|
score.load(data); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8763
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
8764
|
|
|
|
|
|
|
return false; |
8765
|
|
|
|
|
|
|
} |
8766
|
|
|
|
|
|
|
|
8767
|
1
|
|
|
|
|
|
return data.is_end(); |
8768
|
|
|
|
|
|
|
} |
8769
|
|
|
|
|
|
|
|
8770
|
|
|
|
|
|
|
template |
8771
|
0
|
|
|
|
|
|
struct feature_sequences::cache { |
8772
|
|
|
|
|
|
|
const vector* forms; |
8773
|
|
|
|
|
|
|
const vector>* analyses; |
8774
|
|
|
|
|
|
|
vector elementary_per_form; |
8775
|
|
|
|
|
|
|
vector> elementary_per_tag; |
8776
|
|
|
|
|
|
|
|
8777
|
0
|
|
|
|
|
|
struct cache_element { |
8778
|
|
|
|
|
|
|
vector key; |
8779
|
|
|
|
|
|
|
int key_size; |
8780
|
|
|
|
|
|
|
feature_sequence_score score; |
8781
|
|
|
|
|
|
|
|
8782
|
21
|
0
|
|
|
|
|
cache_element(int elements) : key(vli::max_length() * elements), key_size(0), score(0) {} |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8783
|
|
|
|
|
|
|
}; |
8784
|
|
|
|
|
|
|
vector caches; |
8785
|
|
|
|
|
|
|
vector window; |
8786
|
|
|
|
|
|
|
vector key; |
8787
|
|
|
|
|
|
|
feature_sequences_score score; |
8788
|
|
|
|
|
|
|
|
8789
|
1
|
|
|
|
|
|
cache(const feature_sequences& self) : score(0) { |
8790
|
1
|
0
|
|
|
|
|
caches.reserve(self.sequences.size()); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8791
|
|
|
|
|
|
|
int max_sequence_elements = 0, max_window_size = 1; |
8792
|
22
|
0
|
|
|
|
|
for (auto&& sequence : self.sequences) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8793
|
21
|
0
|
|
|
|
|
caches.emplace_back(int(sequence.elements.size())); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8794
|
21
|
0
|
|
|
|
|
if (int(sequence.elements.size()) > max_sequence_elements) max_sequence_elements = sequence.elements.size(); |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8795
|
66
|
0
|
|
|
|
|
for (auto&& element : sequence.elements) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8796
|
45
|
0
|
|
|
|
|
if (element.type == PER_TAG && 1 - element.sequence_index > max_window_size) |
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8797
|
|
|
|
|
|
|
max_window_size = 1 - element.sequence_index; |
8798
|
|
|
|
|
|
|
} |
8799
|
1
|
0
|
|
|
|
|
key.resize(max_sequence_elements * vli::max_length()); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8800
|
1
|
0
|
|
|
|
|
window.resize(max_window_size); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8801
|
1
|
|
|
|
|
|
} |
8802
|
|
|
|
|
|
|
}; |
8803
|
|
|
|
|
|
|
|
8804
|
|
|
|
|
|
|
template |
8805
|
2
|
|
|
|
|
|
void feature_sequences::initialize_sentence(const vector& forms, const vector>& analyses, cache& c) const { |
8806
|
|
|
|
|
|
|
// Store forms and forms_size |
8807
|
2
|
|
|
|
|
|
c.forms = &forms; |
8808
|
2
|
|
|
|
|
|
c.analyses = &analyses; |
8809
|
|
|
|
|
|
|
|
8810
|
|
|
|
|
|
|
// Enlarge elementary features vectors if needed |
8811
|
2
|
0
|
|
|
|
|
if (forms.size() > c.elementary_per_form.size()) c.elementary_per_form.resize(forms.size() * 2); |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8812
|
2
|
0
|
|
|
|
|
if (forms.size() > c.elementary_per_tag.size()) c.elementary_per_tag.resize(forms.size() * 2); |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8813
|
9
|
0
|
|
|
|
|
for (unsigned i = 0; i < forms.size(); i++) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8814
|
7
|
0
|
|
|
|
|
if (analyses[i].size() > c.elementary_per_tag[i].size()) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8815
|
5
|
|
|
|
|
|
c.elementary_per_tag[i].resize(analyses[i].size() * 2); |
8816
|
|
|
|
|
|
|
|
8817
|
|
|
|
|
|
|
// Compute elementary features |
8818
|
2
|
|
|
|
|
|
elementary.compute_features(forms, analyses, c.elementary_per_form, c.elementary_per_tag); |
8819
|
|
|
|
|
|
|
|
8820
|
|
|
|
|
|
|
// Clear score cache, because scores may have been modified |
8821
|
2
|
|
|
|
|
|
c.score = 0; |
8822
|
44
|
0
|
|
|
|
|
for (auto&& cache : c.caches) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8823
|
42
|
|
|
|
|
|
cache.key_size = cache.score = 0; |
8824
|
2
|
|
|
|
|
|
} |
8825
|
|
|
|
|
|
|
|
8826
|
|
|
|
|
|
|
template |
8827
|
44
|
|
|
|
|
|
void feature_sequences::compute_dynamic_features(int form_index, int tag_index, const dynamic_features* prev_dynamic, dynamic_features& dynamic, cache& c) const { |
8828
|
22
|
0
|
|
|
|
|
elementary.compute_dynamic_features((*c.analyses)[form_index][tag_index], c.elementary_per_form[form_index], c.elementary_per_tag[form_index][tag_index], form_index > 0 ? prev_dynamic : nullptr, dynamic); |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8829
|
22
|
|
|
|
|
|
} |
8830
|
|
|
|
|
|
|
|
8831
|
|
|
|
|
|
|
template |
8832
|
36
|
|
|
|
|
|
feature_sequences_score feature_sequences::score(int form_index, int tags_window[], int tags_unchanged, dynamic_features& dynamic, cache& c) const { |
8833
|
|
|
|
|
|
|
// Start by creating a window of per_tag_features* |
8834
|
70
|
0
|
|
|
|
|
for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8835
|
156
|
|
|
|
|
|
c.window[i] = &c.elementary_per_tag[form_index - i][tags_window[i]]; |
8836
|
|
|
|
|
|
|
|
8837
|
|
|
|
|
|
|
// Compute the score |
8838
|
18
|
|
|
|
|
|
feature_sequences_score result = c.score; |
8839
|
208
|
0
|
|
|
|
|
for (unsigned i = 0; i < sequences.size(); i++) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8840
|
190
|
0
|
|
|
|
|
if (tags_unchanged >= sequences[i].dependant_range) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8841
|
|
|
|
|
|
|
break; |
8842
|
|
|
|
|
|
|
|
8843
|
179
|
|
|
|
|
|
char* key = c.key.data(); |
8844
|
446
|
0
|
|
|
|
|
for (unsigned j = 0; j < sequences[i].elements.size(); j++) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8845
|
|
|
|
|
|
|
auto& element = sequences[i].elements[j]; |
8846
|
|
|
|
|
|
|
elementary_feature_value value; |
8847
|
|
|
|
|
|
|
|
8848
|
332
|
|
|
|
|
|
switch (element.type) { |
8849
|
|
|
|
|
|
|
case PER_FORM: |
8850
|
95
|
0
|
|
|
|
|
value = form_index + element.sequence_index < 0 || unsigned(form_index + element.sequence_index) >= c.forms->size() ? elementary_feature_empty : c.elementary_per_form[form_index + element.sequence_index].values[element.elementary_index]; |
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8851
|
|
|
|
|
|
|
break; |
8852
|
|
|
|
|
|
|
case PER_TAG: |
8853
|
237
|
0
|
|
|
|
|
value = form_index + element.sequence_index < 0 ? elementary_feature_empty : c.window[-element.sequence_index]->values[element.elementary_index]; |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8854
|
|
|
|
|
|
|
break; |
8855
|
|
|
|
|
|
|
case DYNAMIC: |
8856
|
|
|
|
|
|
|
default: |
8857
|
0
|
|
|
|
|
|
value = dynamic.values[element.elementary_index]; |
8858
|
|
|
|
|
|
|
} |
8859
|
|
|
|
|
|
|
|
8860
|
332
|
0
|
|
|
|
|
if (value == elementary_feature_unknown) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8861
|
65
|
|
|
|
|
|
key = c.key.data(); |
8862
|
65
|
|
|
|
|
|
break; |
8863
|
|
|
|
|
|
|
} |
8864
|
267
|
|
|
|
|
|
vli::encode(value, key); |
8865
|
|
|
|
|
|
|
} |
8866
|
|
|
|
|
|
|
|
8867
|
179
|
|
|
|
|
|
result -= c.caches[i].score; |
8868
|
179
|
|
|
|
|
|
int key_size = key - c.key.data(); |
8869
|
179
|
0
|
|
|
|
|
if (!key_size) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8870
|
65
|
|
|
|
|
|
c.caches[i].score = 0; |
8871
|
65
|
|
|
|
|
|
c.caches[i].key_size = 0; |
8872
|
198
|
0
|
|
|
|
|
} else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8873
|
108
|
|
|
|
|
|
c.caches[i].score = scores[i].score(c.key.data(), key_size); |
8874
|
108
|
|
|
|
|
|
c.caches[i].key_size = key_size; |
8875
|
108
|
|
|
|
|
|
small_memcpy(c.caches[i].key.data(), c.key.data(), key_size); |
8876
|
|
|
|
|
|
|
} |
8877
|
179
|
|
|
|
|
|
result += c.caches[i].score; |
8878
|
|
|
|
|
|
|
} |
8879
|
|
|
|
|
|
|
|
8880
|
18
|
|
|
|
|
|
c.score = result; |
8881
|
18
|
|
|
|
|
|
return result; |
8882
|
|
|
|
|
|
|
} |
8883
|
|
|
|
|
|
|
|
8884
|
|
|
|
|
|
|
template |
8885
|
|
|
|
|
|
|
void feature_sequences::feature_keys(int form_index, int tags_window[], int tags_unchanged, dynamic_features& dynamic, vector& keys, cache& c) const { |
8886
|
|
|
|
|
|
|
score(form_index, tags_window, tags_unchanged, dynamic, c); |
8887
|
|
|
|
|
|
|
|
8888
|
|
|
|
|
|
|
keys.resize(c.caches.size()); |
8889
|
|
|
|
|
|
|
for (unsigned i = 0; i < c.caches.size(); i++) |
8890
|
|
|
|
|
|
|
keys[i].assign(c.caches[i].key.data(), c.caches[i].key_size); |
8891
|
|
|
|
|
|
|
} |
8892
|
|
|
|
|
|
|
|
8893
|
|
|
|
|
|
|
} // namespace morphodita |
8894
|
|
|
|
|
|
|
|
8895
|
|
|
|
|
|
|
///////// |
8896
|
|
|
|
|
|
|
// File: morphodita/tagger/viterbi.h |
8897
|
|
|
|
|
|
|
///////// |
8898
|
|
|
|
|
|
|
|
8899
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
8900
|
|
|
|
|
|
|
// |
8901
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
8902
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
8903
|
|
|
|
|
|
|
// |
8904
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
8905
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
8906
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8907
|
|
|
|
|
|
|
|
8908
|
|
|
|
|
|
|
namespace morphodita { |
8909
|
|
|
|
|
|
|
|
8910
|
|
|
|
|
|
|
// Declarations |
8911
|
|
|
|
|
|
|
template |
8912
|
|
|
|
|
|
|
class viterbi { |
8913
|
|
|
|
|
|
|
public: |
8914
|
|
|
|
|
|
|
viterbi(const FeatureSequences& features, int decoding_order, int window_size) |
8915
|
1
|
|
|
|
|
|
: features(features), decoding_order(decoding_order), window_size(window_size) {} |
8916
|
|
|
|
|
|
|
|
8917
|
|
|
|
|
|
|
struct cache; |
8918
|
|
|
|
|
|
|
void tag(const vector& forms, const vector>& analyses, cache& c, vector& tags) const; |
8919
|
|
|
|
|
|
|
|
8920
|
|
|
|
|
|
|
private: |
8921
|
|
|
|
|
|
|
struct node; |
8922
|
|
|
|
|
|
|
|
8923
|
|
|
|
|
|
|
const FeatureSequences& features; |
8924
|
|
|
|
|
|
|
int decoding_order, window_size; |
8925
|
|
|
|
|
|
|
}; |
8926
|
|
|
|
|
|
|
|
8927
|
|
|
|
|
|
|
// Definitions |
8928
|
|
|
|
|
|
|
template |
8929
|
0
|
|
|
|
|
|
struct viterbi::cache { |
8930
|
|
|
|
|
|
|
vector nodes; |
8931
|
|
|
|
|
|
|
typename FeatureSequences::cache features_cache; |
8932
|
|
|
|
|
|
|
|
8933
|
1
|
0
|
|
|
|
|
cache(const viterbi& self) : features_cache(self.features) {} |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8934
|
|
|
|
|
|
|
}; |
8935
|
|
|
|
|
|
|
|
8936
|
|
|
|
|
|
|
template |
8937
|
|
|
|
|
|
|
struct viterbi::node { |
8938
|
|
|
|
|
|
|
int tag; |
8939
|
|
|
|
|
|
|
int prev; |
8940
|
|
|
|
|
|
|
feature_sequences_score score; |
8941
|
|
|
|
|
|
|
typename FeatureSequences::dynamic_features dynamic; |
8942
|
|
|
|
|
|
|
}; |
8943
|
|
|
|
|
|
|
|
8944
|
|
|
|
|
|
|
template |
8945
|
2
|
|
|
|
|
|
void viterbi::tag(const vector& forms, const vector>& analyses, cache& c, vector& tags) const { |
8946
|
4
|
0
|
|
|
|
|
if (!forms.size()) return; |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8947
|
|
|
|
|
|
|
|
8948
|
|
|
|
|
|
|
// Count number of nodes and allocate |
8949
|
|
|
|
|
|
|
unsigned nodes = 0; |
8950
|
9
|
0
|
|
|
|
|
for (unsigned i = 0, states = 1; i < forms.size(); i++) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8951
|
7
|
0
|
|
|
|
|
if (analyses[i].empty()) return; |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8952
|
7
|
0
|
|
|
|
|
states = (i+1 >= unsigned(decoding_order) ? states / analyses[i-decoding_order+1].size() : states) * analyses[i].size(); |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8953
|
7
|
|
|
|
|
|
nodes += states; |
8954
|
|
|
|
|
|
|
} |
8955
|
2
|
0
|
|
|
|
|
if (nodes > c.nodes.size()) c.nodes.resize(nodes); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8956
|
|
|
|
|
|
|
|
8957
|
|
|
|
|
|
|
// Init feature sequences |
8958
|
2
|
|
|
|
|
|
features.initialize_sentence(forms, analyses, c.features_cache); |
8959
|
|
|
|
|
|
|
|
8960
|
|
|
|
|
|
|
int window_stack[16]; vector window_heap; |
8961
|
2
|
0
|
|
|
|
|
int* window = window_size <= 16 ? window_stack : (window_heap.resize(window_size), window_heap.data()); |
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8962
|
|
|
|
|
|
|
typename FeatureSequences::dynamic_features dynamic; |
8963
|
|
|
|
|
|
|
feature_sequences_score score; |
8964
|
|
|
|
|
|
|
|
8965
|
|
|
|
|
|
|
// Compute all nodes score |
8966
|
|
|
|
|
|
|
int nodes_prev = -1, nodes_now = 0; |
8967
|
9
|
0
|
|
|
|
|
for (unsigned i = 0; i < forms.size(); i++) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8968
|
|
|
|
|
|
|
int nodes_next = nodes_now; |
8969
|
|
|
|
|
|
|
|
8970
|
28
|
0
|
|
|
|
|
for (int j = 0; j < window_size; j++) window[j] = -1; |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8971
|
18
|
0
|
|
|
|
|
for (int tag = 0; tag < int(analyses[i].size()); tag++) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8972
|
33
|
0
|
|
|
|
|
for (int prev = nodes_prev; prev < nodes_now; prev++) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8973
|
|
|
|
|
|
|
// Compute predecessors and number of unchanges |
8974
|
22
|
|
|
|
|
|
int same_tags = window[0] == tag; |
8975
|
22
|
|
|
|
|
|
window[0] = tag; |
8976
|
59
|
0
|
|
|
|
|
for (int p = prev, n = 1; p >= 0 && n < window_size; p = c.nodes[p].prev, n++) { |
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8977
|
37
|
0
|
|
|
|
|
same_tags += same_tags == n && window[n] == c.nodes[p].tag; |
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8978
|
74
|
|
|
|
|
|
window[n] = c.nodes[p].tag; |
8979
|
|
|
|
|
|
|
} |
8980
|
|
|
|
|
|
|
|
8981
|
|
|
|
|
|
|
// Compute dynamic elementary features and score |
8982
|
22
|
0
|
|
|
|
|
features.compute_dynamic_features(i, tag, prev >= 0 ? &c.nodes[prev].dynamic : nullptr, dynamic, c.features_cache); |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8983
|
22
|
0
|
|
|
|
|
score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) + |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8984
|
20
|
|
|
|
|
|
(prev >= 0 ? c.nodes[prev].score : 0); |
8985
|
|
|
|
|
|
|
|
8986
|
|
|
|
|
|
|
// Update existing node or create a new one |
8987
|
22
|
0
|
|
|
|
|
if (same_tags >= decoding_order-1) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8988
|
4
|
0
|
|
|
|
|
if (score <= c.nodes[nodes_next-1].score) continue; |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8989
|
|
|
|
|
|
|
nodes_next--; |
8990
|
|
|
|
|
|
|
} |
8991
|
44
|
|
|
|
|
|
c.nodes[nodes_next].tag = tag; |
8992
|
22
|
|
|
|
|
|
c.nodes[nodes_next].prev = prev; |
8993
|
22
|
|
|
|
|
|
c.nodes[nodes_next].score = score; |
8994
|
22
|
|
|
|
|
|
c.nodes[nodes_next++].dynamic = dynamic; |
8995
|
|
|
|
|
|
|
} |
8996
|
|
|
|
|
|
|
|
8997
|
|
|
|
|
|
|
nodes_prev = nodes_now; |
8998
|
|
|
|
|
|
|
nodes_now = nodes_next; |
8999
|
|
|
|
|
|
|
} |
9000
|
|
|
|
|
|
|
|
9001
|
|
|
|
|
|
|
// Choose the best ending node |
9002
|
|
|
|
|
|
|
int best = nodes_prev; |
9003
|
5
|
0
|
|
|
|
|
for (int node = nodes_prev + 1; node < nodes_now; node++) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9004
|
3
|
0
|
|
|
|
|
if (c.nodes[node].score > c.nodes[best].score) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9005
|
|
|
|
|
|
|
best = node; |
9006
|
|
|
|
|
|
|
|
9007
|
9
|
0
|
|
|
|
|
for (int i = forms.size() - 1; i >= 0; i--, best = c.nodes[best].prev) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9008
|
21
|
|
|
|
|
|
tags[i] = c.nodes[best].tag; |
9009
|
|
|
|
|
|
|
} |
9010
|
|
|
|
|
|
|
|
9011
|
|
|
|
|
|
|
} // namespace morphodita |
9012
|
|
|
|
|
|
|
|
9013
|
|
|
|
|
|
|
///////// |
9014
|
|
|
|
|
|
|
// File: morphodita/tagger/conllu_elementary_features.h |
9015
|
|
|
|
|
|
|
///////// |
9016
|
|
|
|
|
|
|
|
9017
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
9018
|
|
|
|
|
|
|
// |
9019
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
9020
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
9021
|
|
|
|
|
|
|
// |
9022
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
9023
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
9024
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
9025
|
|
|
|
|
|
|
|
9026
|
|
|
|
|
|
|
namespace morphodita { |
9027
|
|
|
|
|
|
|
|
9028
|
|
|
|
|
|
|
// Declarations |
9029
|
|
|
|
|
|
|
template |
9030
|
0
|
|
|
|
|
|
class conllu_elementary_features : public elementary_features |
9031
|
|
|
|
|
|
|
public: |
9032
|
|
|
|
|
|
|
conllu_elementary_features(); |
9033
|
|
|
|
|
|
|
|
9034
|
|
|
|
|
|
|
enum features_per_form { FORM, FOLLOWING_VERB_TAG, FOLLOWING_VERB_FORM, NUM, CAP, DASH, PREFIX1, PREFIX2, PREFIX3, PREFIX4, PREFIX5, PREFIX6, PREFIX7, PREFIX8, PREFIX9, SUFFIX1, SUFFIX2, SUFFIX3, SUFFIX4, SUFFIX5, SUFFIX6, SUFFIX7, SUFFIX8, SUFFIX9, PER_FORM_TOTAL }; |
9035
|
|
|
|
|
|
|
enum features_per_tag { TAG, TAG_UPOS, TAG_CASE, TAG_GENDER, TAG_NUMBER, TAG_NEGATIVE, TAG_PERSON, LEMMA, PER_TAG_TOTAL }; |
9036
|
|
|
|
|
|
|
enum features_dynamic { PREVIOUS_VERB_TAG, PREVIOUS_VERB_FORM, PREVIOUS_OR_CURRENT_VERB_TAG, PREVIOUS_OR_CURRENT_VERB_FORM, DYNAMIC_TOTAL }; |
9037
|
|
|
|
|
|
|
enum features_map { MAP_NONE = -1, MAP_FORM, MAP_PREFIX1, MAP_PREFIX2, MAP_PREFIX3, MAP_PREFIX4, MAP_PREFIX5, MAP_PREFIX6, MAP_PREFIX7, MAP_PREFIX8, MAP_PREFIX9, MAP_SUFFIX1, MAP_SUFFIX2, MAP_SUFFIX3, MAP_SUFFIX4, MAP_SUFFIX5, MAP_SUFFIX6, MAP_SUFFIX7, MAP_SUFFIX8, MAP_SUFFIX9, MAP_TAG, MAP_TAG_UPOS, MAP_TAG_CASE, MAP_TAG_GENDER, MAP_TAG_NUMBER, MAP_TAG_NEGATIVE, MAP_TAG_PERSON, MAP_LEMMA, MAP_TOTAL } ; |
9038
|
|
|
|
|
|
|
|
9039
|
|
|
|
|
|
|
struct per_form_features { elementary_feature_value values[PER_FORM_TOTAL]; }; |
9040
|
|
|
|
|
|
|
struct per_tag_features { elementary_feature_value values[PER_TAG_TOTAL]; }; |
9041
|
|
|
|
|
|
|
struct dynamic_features { elementary_feature_value values[DYNAMIC_TOTAL]; }; |
9042
|
|
|
|
|
|
|
|
9043
|
|
|
|
|
|
|
static vector descriptions; |
9044
|
|
|
|
|
|
|
|
9045
|
|
|
|
|
|
|
void compute_features(const vector& forms, const vector>& analyses, vector& per_form, vector>& per_tag) const; |
9046
|
|
|
|
|
|
|
inline void compute_dynamic_features(const tagged_lemma& tag, const per_form_features& per_form, const per_tag_features& per_tag, const dynamic_features* prev_dynamic, dynamic_features& dynamic) const; |
9047
|
|
|
|
|
|
|
|
9048
|
|
|
|
|
|
|
using elementary_features |
9049
|
|
|
|
|
|
|
}; |
9050
|
|
|
|
|
|
|
|
9051
|
|
|
|
|
|
|
typedef conllu_elementary_features persistent_conllu_elementary_features; |
9052
|
|
|
|
|
|
|
|
9053
|
|
|
|
|
|
|
// Definitions |
9054
|
|
|
|
|
|
|
template |
9055
|
0
|
|
|
|
|
|
conllu_elementary_features |
9056
|
0
|
0
|
|
|
|
|
maps.resize(MAP_TOTAL); |
9057
|
0
|
|
|
|
|
|
} |
9058
|
|
|
|
|
|
|
|
9059
|
|
|
|
|
|
|
template |
9060
|
|
|
|
|
|
|
vector conllu_elementary_features |
9061
|
|
|
|
|
|
|
{"Form", PER_FORM, ANY_OFFSET, FORM, MAP_FORM}, |
9062
|
|
|
|
|
|
|
{"FollowingVerbTag", PER_FORM, ANY_OFFSET, FOLLOWING_VERB_TAG, MAP_TAG}, |
9063
|
|
|
|
|
|
|
{"FollowingVerbForm", PER_FORM, ANY_OFFSET, FOLLOWING_VERB_FORM, MAP_FORM}, |
9064
|
|
|
|
|
|
|
{"Num", PER_FORM, ONLY_CURRENT, NUM, MAP_NONE}, |
9065
|
|
|
|
|
|
|
{"Cap", PER_FORM, ONLY_CURRENT, CAP, MAP_NONE}, |
9066
|
|
|
|
|
|
|
{"Dash", PER_FORM, ONLY_CURRENT, DASH, MAP_NONE}, |
9067
|
|
|
|
|
|
|
{"Prefix1", PER_FORM, ONLY_CURRENT, PREFIX1, MAP_PREFIX1}, |
9068
|
|
|
|
|
|
|
{"Prefix2", PER_FORM, ONLY_CURRENT, PREFIX2, MAP_PREFIX2}, |
9069
|
|
|
|
|
|
|
{"Prefix3", PER_FORM, ONLY_CURRENT, PREFIX3, MAP_PREFIX3}, |
9070
|
|
|
|
|
|
|
{"Prefix4", PER_FORM, ONLY_CURRENT, PREFIX4, MAP_PREFIX4}, |
9071
|
|
|
|
|
|
|
{"Prefix5", PER_FORM, ONLY_CURRENT, PREFIX5, MAP_PREFIX5}, |
9072
|
|
|
|
|
|
|
{"Prefix6", PER_FORM, ONLY_CURRENT, PREFIX6, MAP_PREFIX6}, |
9073
|
|
|
|
|
|
|
{"Prefix7", PER_FORM, ONLY_CURRENT, PREFIX7, MAP_PREFIX7}, |
9074
|
|
|
|
|
|
|
{"Prefix8", PER_FORM, ONLY_CURRENT, PREFIX8, MAP_PREFIX8}, |
9075
|
|
|
|
|
|
|
{"Prefix9", PER_FORM, ONLY_CURRENT, PREFIX9, MAP_PREFIX9}, |
9076
|
|
|
|
|
|
|
{"Suffix1", PER_FORM, ONLY_CURRENT, SUFFIX1, MAP_SUFFIX1}, |
9077
|
|
|
|
|
|
|
{"Suffix2", PER_FORM, ONLY_CURRENT, SUFFIX2, MAP_SUFFIX2}, |
9078
|
|
|
|
|
|
|
{"Suffix3", PER_FORM, ONLY_CURRENT, SUFFIX3, MAP_SUFFIX3}, |
9079
|
|
|
|
|
|
|
{"Suffix4", PER_FORM, ONLY_CURRENT, SUFFIX4, MAP_SUFFIX4}, |
9080
|
|
|
|
|
|
|
{"Suffix5", PER_FORM, ONLY_CURRENT, SUFFIX5, MAP_SUFFIX5}, |
9081
|
|
|
|
|
|
|
{"Suffix6", PER_FORM, ONLY_CURRENT, SUFFIX6, MAP_SUFFIX6}, |
9082
|
|
|
|
|
|
|
{"Suffix7", PER_FORM, ONLY_CURRENT, SUFFIX7, MAP_SUFFIX7}, |
9083
|
|
|
|
|
|
|
{"Suffix8", PER_FORM, ONLY_CURRENT, SUFFIX8, MAP_SUFFIX8}, |
9084
|
|
|
|
|
|
|
{"Suffix9", PER_FORM, ONLY_CURRENT, SUFFIX9, MAP_SUFFIX9}, |
9085
|
|
|
|
|
|
|
|
9086
|
|
|
|
|
|
|
{"Tag", PER_TAG, ANY_OFFSET, TAG, MAP_TAG}, |
9087
|
|
|
|
|
|
|
{"TagUPos", PER_TAG, ANY_OFFSET, TAG_UPOS, MAP_TAG_UPOS}, |
9088
|
|
|
|
|
|
|
{"TagCase", PER_TAG, ANY_OFFSET, TAG_CASE, MAP_TAG_CASE}, |
9089
|
|
|
|
|
|
|
{"TagGender", PER_TAG, ANY_OFFSET, TAG_GENDER, MAP_TAG_GENDER}, |
9090
|
|
|
|
|
|
|
{"TagNumber", PER_TAG, ANY_OFFSET, TAG_NUMBER, MAP_TAG_NUMBER}, |
9091
|
|
|
|
|
|
|
{"TagNegative", PER_TAG, ANY_OFFSET, TAG_NEGATIVE, MAP_TAG_NEGATIVE}, |
9092
|
|
|
|
|
|
|
{"TagPerson", PER_TAG, ANY_OFFSET, TAG_PERSON, MAP_TAG_PERSON}, |
9093
|
|
|
|
|
|
|
{"Lemma", PER_TAG, ANY_OFFSET, LEMMA, MAP_LEMMA}, |
9094
|
|
|
|
|
|
|
|
9095
|
|
|
|
|
|
|
{"PreviousVerbTag", DYNAMIC, ANY_OFFSET, PREVIOUS_VERB_TAG, MAP_TAG}, |
9096
|
|
|
|
|
|
|
{"PreviousVerbForm", DYNAMIC, ANY_OFFSET, PREVIOUS_VERB_FORM, MAP_FORM}, |
9097
|
|
|
|
|
|
|
}; |
9098
|
|
|
|
|
|
|
|
9099
|
|
|
|
|
|
|
template |
9100
|
0
|
|
|
|
|
|
void conllu_elementary_features |
9101
|
|
|
|
|
|
|
using namespace unilib; |
9102
|
|
|
|
|
|
|
|
9103
|
|
|
|
|
|
|
// We process the sentence in reverse order, so that we can compute FollowingVerbTag and FollowingVerbLemma directly. |
9104
|
|
|
|
|
|
|
elementary_feature_value following_verb_tag = elementary_feature_empty, following_verb_form = elementary_feature_empty; |
9105
|
0
|
0
|
|
|
|
|
for (unsigned i = forms.size(); i--;) { |
9106
|
|
|
|
|
|
|
int verb_candidate = -1; |
9107
|
|
|
|
|
|
|
|
9108
|
|
|
|
|
|
|
// Per_tag features and verb_candidate |
9109
|
0
|
0
|
|
|
|
|
for (unsigned j = 0; j < analyses[i].size(); j++) { |
9110
|
0
|
|
|
|
|
|
const string& tag = analyses[i][j].tag; |
9111
|
0
|
|
|
|
|
|
const string& lemma = analyses[i][j].lemma; |
9112
|
|
|
|
|
|
|
|
9113
|
|
|
|
|
|
|
// Tag consists of three parts separated by tag[0] character |
9114
|
|
|
|
|
|
|
// - first is TAG_UPOS, |
9115
|
|
|
|
|
|
|
// - second is TAG_LPOS, |
9116
|
|
|
|
|
|
|
// - then there is any number of | separated named fields in format Name=Value |
9117
|
0
|
|
|
|
|
|
per_tag[i][j].values[TAG] = maps[MAP_TAG].value(tag.c_str(), tag.size()); |
9118
|
0
|
|
|
|
|
|
per_tag[i][j].values[TAG_UPOS] = per_tag[i][j].values[TAG_CASE] = per_tag[i][j].values[TAG_GENDER] = elementary_feature_empty; |
9119
|
0
|
|
|
|
|
|
per_tag[i][j].values[TAG_NUMBER] = per_tag[i][j].values[TAG_NEGATIVE] = per_tag[i][j].values[TAG_PERSON] = elementary_feature_empty; |
9120
|
0
|
0
|
|
|
|
|
per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == lemma ? per_tag[i][j-1].values[LEMMA] : |
|
|
0
|
|
|
|
|
|
9121
|
|
|
|
|
|
|
maps[MAP_LEMMA].value(lemma.c_str(), lemma.size()); |
9122
|
|
|
|
|
|
|
|
9123
|
0
|
|
|
|
|
|
char separator = tag[0]; |
9124
|
0
|
|
|
|
|
|
size_t index = tag.find(separator, 1); |
9125
|
0
|
0
|
|
|
|
|
if (index == string::npos) index = tag.size(); |
9126
|
0
|
0
|
|
|
|
|
per_tag[i][j].values[TAG_UPOS] = maps[MAP_TAG_UPOS].value(tag.c_str() + (index ? 1 : 0), index - (index ? 1 : 0)); |
|
|
0
|
|
|
|
|
|
9127
|
|
|
|
|
|
|
|
9128
|
0
|
0
|
|
|
|
|
if (index < tag.size()) index++; |
9129
|
0
|
0
|
|
|
|
|
if (index < tag.size()) index = tag.find(separator, index); |
9130
|
0
|
0
|
|
|
|
|
if (index < tag.size()) index++; |
9131
|
0
|
0
|
|
|
|
|
for (size_t length; index < tag.size(); index += length + 1) { |
9132
|
0
|
|
|
|
|
|
length = tag.find('|', index); |
9133
|
0
|
0
|
|
|
|
|
length = (length == string::npos ? tag.size() : length) - index; |
9134
|
|
|
|
|
|
|
|
9135
|
0
|
0
|
|
|
|
|
for (size_t equal_sign = 0; equal_sign + 1 < length; equal_sign++) |
9136
|
0
|
0
|
|
|
|
|
if (tag[index + equal_sign] == '=') { |
9137
|
|
|
|
|
|
|
int value = -1, map; |
9138
|
0
|
|
|
|
|
|
switch (equal_sign) { |
9139
|
|
|
|
|
|
|
case 4: |
9140
|
0
|
0
|
|
|
|
|
if (tag.compare(index, equal_sign, "Case") == 0) value = TAG_CASE, map = MAP_TAG_CASE; |
9141
|
|
|
|
|
|
|
break; |
9142
|
|
|
|
|
|
|
case 6: |
9143
|
0
|
0
|
|
|
|
|
if (tag.compare(index, equal_sign, "Gender") == 0) value = TAG_GENDER, map = MAP_TAG_GENDER; |
9144
|
0
|
0
|
|
|
|
|
if (tag.compare(index, equal_sign, "Number") == 0) value = TAG_NUMBER, map = MAP_TAG_NUMBER; |
9145
|
0
|
0
|
|
|
|
|
if (tag.compare(index, equal_sign, "Person") == 0) value = TAG_PERSON, map = MAP_TAG_PERSON; |
9146
|
|
|
|
|
|
|
break; |
9147
|
|
|
|
|
|
|
case 8: |
9148
|
0
|
0
|
|
|
|
|
if (tag.compare(index, equal_sign, "Negative") == 0) value = TAG_NEGATIVE, map = MAP_TAG_NEGATIVE; |
9149
|
|
|
|
|
|
|
break; |
9150
|
|
|
|
|
|
|
} |
9151
|
|
|
|
|
|
|
|
9152
|
0
|
0
|
|
|
|
|
if (value >= 0) |
9153
|
0
|
|
|
|
|
|
per_tag[i][j].values[value] = maps[map].value(tag.c_str() + index + equal_sign + 1, length - equal_sign - 1); |
9154
|
|
|
|
|
|
|
break; |
9155
|
|
|
|
|
|
|
} |
9156
|
|
|
|
|
|
|
} |
9157
|
|
|
|
|
|
|
|
9158
|
0
|
0
|
|
|
|
|
if (tag.size() >= 2 && tag[1] == 'V') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9159
|
|
|
|
|
|
|
int tag_compare; |
9160
|
0
|
0
|
|
|
|
|
verb_candidate = verb_candidate < 0 || (tag_compare = tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate; |
|
|
0
|
|
|
|
|
|
9161
|
|
|
|
|
|
|
} |
9162
|
|
|
|
|
|
|
} |
9163
|
|
|
|
|
|
|
|
9164
|
|
|
|
|
|
|
// Per_form features |
9165
|
0
|
|
|
|
|
|
per_form[i].values[FORM] = maps[MAP_FORM].value(forms[i].str, forms[i].len); |
9166
|
0
|
|
|
|
|
|
per_form[i].values[FOLLOWING_VERB_TAG] = following_verb_tag; |
9167
|
0
|
|
|
|
|
|
per_form[i].values[FOLLOWING_VERB_FORM] = following_verb_form; |
9168
|
|
|
|
|
|
|
|
9169
|
|
|
|
|
|
|
// Update following_verb_{tag,lemma} _after_ filling FOLLOWING_VERB_{TAG,LEMMA}. |
9170
|
0
|
0
|
|
|
|
|
if (verb_candidate >= 0) { |
9171
|
0
|
|
|
|
|
|
following_verb_tag = per_tag[i][verb_candidate].values[TAG]; |
9172
|
0
|
|
|
|
|
|
following_verb_form = per_form[i].values[FORM]; |
9173
|
|
|
|
|
|
|
} |
9174
|
|
|
|
|
|
|
|
9175
|
|
|
|
|
|
|
// Ortographic per_form features if needed |
9176
|
0
|
0
|
|
|
|
|
if (analyses[i].size() == 1) { |
9177
|
0
|
|
|
|
|
|
per_form[i].values[NUM] = per_form[i].values[CAP] = per_form[i].values[DASH] = elementary_feature_unknown; |
9178
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX1] = per_form[i].values[PREFIX2] = per_form[i].values[PREFIX3] = elementary_feature_unknown; |
9179
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX4] = per_form[i].values[PREFIX5] = per_form[i].values[PREFIX6] = elementary_feature_unknown; |
9180
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX7] = per_form[i].values[PREFIX8] = per_form[i].values[PREFIX9] = elementary_feature_unknown; |
9181
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = per_form[i].values[SUFFIX2] = per_form[i].values[SUFFIX3] = elementary_feature_unknown; |
9182
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX4] = per_form[i].values[SUFFIX5] = per_form[i].values[SUFFIX6] = elementary_feature_unknown; |
9183
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX7] = per_form[i].values[SUFFIX8] = per_form[i].values[SUFFIX9] = elementary_feature_unknown; |
9184
|
0
|
0
|
|
|
|
|
} else if (forms[i].len <= 0) { |
9185
|
0
|
|
|
|
|
|
per_form[i].values[NUM] = per_form[i].values[CAP] = per_form[i].values[DASH] = elementary_feature_empty + 1; |
9186
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX1] = per_form[i].values[PREFIX2] = per_form[i].values[PREFIX3] = elementary_feature_empty; |
9187
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX4] = per_form[i].values[PREFIX5] = per_form[i].values[PREFIX6] = elementary_feature_empty; |
9188
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX7] = per_form[i].values[PREFIX8] = per_form[i].values[PREFIX9] = elementary_feature_empty; |
9189
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = per_form[i].values[SUFFIX2] = per_form[i].values[SUFFIX3] = elementary_feature_empty; |
9190
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX4] = per_form[i].values[SUFFIX5] = per_form[i].values[SUFFIX6] = elementary_feature_empty; |
9191
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX7] = per_form[i].values[SUFFIX8] = per_form[i].values[SUFFIX9] = elementary_feature_empty; |
9192
|
|
|
|
|
|
|
} else { |
9193
|
0
|
|
|
|
|
|
string_piece form = forms[i]; |
9194
|
0
|
|
|
|
|
|
const char* form_start = form.str; |
9195
|
|
|
|
|
|
|
|
9196
|
|
|
|
|
|
|
bool num = false, cap = false, dash = false; |
9197
|
0
|
|
|
|
|
|
size_t indices[18] = {0, form.len, form.len, form.len, form.len, form.len, form.len, form.len, form.len, form.len, 0, 0, 0, 0, 0, 0, 0, 0}; // careful here regarding forms shorter than 9 characters |
9198
|
|
|
|
|
|
|
int index = 0; |
9199
|
0
|
0
|
|
|
|
|
while (form.len) { |
9200
|
0
|
|
|
|
|
|
indices[(index++) % 18] = form.str - form_start; |
9201
|
|
|
|
|
|
|
|
9202
|
0
|
|
|
|
|
|
unicode::category_t cat = unicode::category(utf8::decode(form.str, form.len)); |
9203
|
0
|
0
|
|
|
|
|
num = num || cat & unicode::N; |
|
|
0
|
|
|
|
|
|
9204
|
0
|
0
|
|
|
|
|
cap = cap || cat & unicode::Lut; |
|
|
0
|
|
|
|
|
|
9205
|
0
|
0
|
|
|
|
|
dash = dash || cat & unicode::Pd; |
|
|
0
|
|
|
|
|
|
9206
|
|
|
|
|
|
|
|
9207
|
0
|
0
|
|
|
|
|
if (index == 10 || (!form.len && index < 10)) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9208
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX1] = maps[MAP_PREFIX1].value(form_start, indices[1]); |
9209
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX2] = maps[MAP_PREFIX2].value(form_start, indices[2]); |
9210
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX3] = maps[MAP_PREFIX3].value(form_start, indices[3]); |
9211
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX4] = maps[MAP_PREFIX4].value(form_start, indices[4]); |
9212
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX5] = maps[MAP_PREFIX5].value(form_start, indices[5]); |
9213
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX6] = maps[MAP_PREFIX6].value(form_start, indices[6]); |
9214
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX7] = maps[MAP_PREFIX7].value(form_start, indices[7]); |
9215
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX8] = maps[MAP_PREFIX8].value(form_start, indices[8]); |
9216
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX9] = maps[MAP_PREFIX9].value(form_start, indices[9]); |
9217
|
|
|
|
|
|
|
} |
9218
|
|
|
|
|
|
|
} |
9219
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = maps[MAP_SUFFIX1].value(form_start + indices[(index+18-1) % 18], form.str - form_start - indices[(index+18-1) % 18]); |
9220
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX2] = maps[MAP_SUFFIX2].value(form_start + indices[(index+18-2) % 18], form.str - form_start - indices[(index+18-2) % 18]); |
9221
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX3] = maps[MAP_SUFFIX3].value(form_start + indices[(index+18-3) % 18], form.str - form_start - indices[(index+18-3) % 18]); |
9222
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX4] = maps[MAP_SUFFIX4].value(form_start + indices[(index+18-4) % 18], form.str - form_start - indices[(index+18-4) % 18]); |
9223
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX5] = maps[MAP_SUFFIX5].value(form_start + indices[(index+18-5) % 18], form.str - form_start - indices[(index+18-5) % 18]); |
9224
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX6] = maps[MAP_SUFFIX6].value(form_start + indices[(index+18-6) % 18], form.str - form_start - indices[(index+18-6) % 18]); |
9225
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX7] = maps[MAP_SUFFIX7].value(form_start + indices[(index+18-7) % 18], form.str - form_start - indices[(index+18-7) % 18]); |
9226
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX8] = maps[MAP_SUFFIX8].value(form_start + indices[(index+18-8) % 18], form.str - form_start - indices[(index+18-8) % 18]); |
9227
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX9] = maps[MAP_SUFFIX9].value(form_start + indices[(index+18-9) % 18], form.str - form_start - indices[(index+18-9) % 18]); |
9228
|
0
|
|
|
|
|
|
per_form[i].values[NUM] = elementary_feature_empty + 1 + num; |
9229
|
0
|
|
|
|
|
|
per_form[i].values[CAP] = elementary_feature_empty + 1 + cap; |
9230
|
0
|
|
|
|
|
|
per_form[i].values[DASH] = elementary_feature_empty + 1 + dash; |
9231
|
|
|
|
|
|
|
} |
9232
|
|
|
|
|
|
|
} |
9233
|
0
|
|
|
|
|
|
} |
9234
|
|
|
|
|
|
|
|
9235
|
|
|
|
|
|
|
template |
9236
|
|
|
|
|
|
|
void conllu_elementary_features |
9237
|
0
|
0
|
|
|
|
|
if (prev_dynamic) { |
9238
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_TAG] = prev_dynamic->values[PREVIOUS_OR_CURRENT_VERB_TAG]; |
9239
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_FORM] = prev_dynamic->values[PREVIOUS_OR_CURRENT_VERB_FORM]; |
9240
|
|
|
|
|
|
|
} else { |
9241
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_TAG] = elementary_feature_empty; |
9242
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_FORM] = elementary_feature_empty; |
9243
|
|
|
|
|
|
|
} |
9244
|
|
|
|
|
|
|
|
9245
|
0
|
0
|
|
|
|
|
if (tag.tag.size() >= 2 && tag.tag[1] == 'V') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9246
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_TAG] = per_tag.values[TAG]; |
9247
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_FORM] = per_form.values[FORM]; |
9248
|
|
|
|
|
|
|
} else { |
9249
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_TAG] = dynamic.values[PREVIOUS_VERB_TAG]; |
9250
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_FORM] = dynamic.values[PREVIOUS_VERB_FORM]; |
9251
|
|
|
|
|
|
|
} |
9252
|
|
|
|
|
|
|
} |
9253
|
|
|
|
|
|
|
|
9254
|
|
|
|
|
|
|
} // namespace morphodita |
9255
|
|
|
|
|
|
|
|
9256
|
|
|
|
|
|
|
///////// |
9257
|
|
|
|
|
|
|
// File: morphodita/tagger/czech_elementary_features.h |
9258
|
|
|
|
|
|
|
///////// |
9259
|
|
|
|
|
|
|
|
9260
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
9261
|
|
|
|
|
|
|
// |
9262
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
9263
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
9264
|
|
|
|
|
|
|
// |
9265
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
9266
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
9267
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
9268
|
|
|
|
|
|
|
|
9269
|
|
|
|
|
|
|
namespace morphodita { |
9270
|
|
|
|
|
|
|
|
9271
|
|
|
|
|
|
|
// Declarations |
9272
|
|
|
|
|
|
|
template |
9273
|
0
|
|
|
|
|
|
class czech_elementary_features : public elementary_features |
9274
|
|
|
|
|
|
|
public: |
9275
|
|
|
|
|
|
|
czech_elementary_features(); |
9276
|
|
|
|
|
|
|
|
9277
|
|
|
|
|
|
|
enum features_per_form { FORM, FOLLOWING_VERB_TAG, FOLLOWING_VERB_LEMMA, NUM, CAP, DASH, PREFIX1, PREFIX2, PREFIX3, PREFIX4, SUFFIX1, SUFFIX2, SUFFIX3, SUFFIX4, PER_FORM_TOTAL }; |
9278
|
|
|
|
|
|
|
enum features_per_tag { TAG, TAG3, TAG5, TAG25, LEMMA, PER_TAG_TOTAL }; |
9279
|
|
|
|
|
|
|
enum features_dynamic { PREVIOUS_VERB_TAG, PREVIOUS_VERB_LEMMA, PREVIOUS_OR_CURRENT_VERB_TAG, PREVIOUS_OR_CURRENT_VERB_LEMMA, DYNAMIC_TOTAL }; |
9280
|
|
|
|
|
|
|
enum features_map { MAP_NONE = -1, MAP_FORM, MAP_LEMMA, MAP_PREFIX1, MAP_PREFIX2, MAP_PREFIX3, MAP_PREFIX4, MAP_SUFFIX1, MAP_SUFFIX2, MAP_SUFFIX3, MAP_SUFFIX4, MAP_TAG, MAP_TAG3, MAP_TAG5, MAP_TAG25, MAP_TOTAL } ; |
9281
|
|
|
|
|
|
|
|
9282
|
|
|
|
|
|
|
struct per_form_features { elementary_feature_value values[PER_FORM_TOTAL]; }; |
9283
|
|
|
|
|
|
|
struct per_tag_features { elementary_feature_value values[PER_TAG_TOTAL]; }; |
9284
|
|
|
|
|
|
|
struct dynamic_features { elementary_feature_value values[DYNAMIC_TOTAL]; }; |
9285
|
|
|
|
|
|
|
|
9286
|
|
|
|
|
|
|
static vector descriptions; |
9287
|
|
|
|
|
|
|
|
9288
|
|
|
|
|
|
|
void compute_features(const vector& forms, const vector>& analyses, vector& per_form, vector>& per_tag) const; |
9289
|
|
|
|
|
|
|
inline void compute_dynamic_features(const tagged_lemma& tag, const per_form_features& per_form, const per_tag_features& per_tag, const dynamic_features* prev_dynamic, dynamic_features& dynamic) const; |
9290
|
|
|
|
|
|
|
|
9291
|
|
|
|
|
|
|
using elementary_features |
9292
|
|
|
|
|
|
|
}; |
9293
|
|
|
|
|
|
|
|
9294
|
|
|
|
|
|
|
typedef czech_elementary_features persistent_czech_elementary_features; |
9295
|
|
|
|
|
|
|
|
9296
|
|
|
|
|
|
|
// Definitions |
9297
|
|
|
|
|
|
|
template |
9298
|
0
|
|
|
|
|
|
czech_elementary_features |
9299
|
0
|
0
|
|
|
|
|
maps.resize(MAP_TOTAL); |
9300
|
0
|
|
|
|
|
|
} |
9301
|
|
|
|
|
|
|
|
9302
|
|
|
|
|
|
|
template |
9303
|
|
|
|
|
|
|
vector czech_elementary_features |
9304
|
|
|
|
|
|
|
{"Form", PER_FORM, ANY_OFFSET, FORM, MAP_FORM}, |
9305
|
|
|
|
|
|
|
{"FollowingVerbTag", PER_FORM, ANY_OFFSET, FOLLOWING_VERB_TAG, MAP_TAG}, |
9306
|
|
|
|
|
|
|
{"FollowingVerbLemma", PER_FORM, ANY_OFFSET, FOLLOWING_VERB_LEMMA, MAP_LEMMA }, |
9307
|
|
|
|
|
|
|
{"Num", PER_FORM, ONLY_CURRENT, NUM, MAP_NONE}, |
9308
|
|
|
|
|
|
|
{"Cap", PER_FORM, ONLY_CURRENT, CAP, MAP_NONE}, |
9309
|
|
|
|
|
|
|
{"Dash", PER_FORM, ONLY_CURRENT, DASH, MAP_NONE}, |
9310
|
|
|
|
|
|
|
{"Prefix1", PER_FORM, ONLY_CURRENT, PREFIX1, MAP_PREFIX1}, |
9311
|
|
|
|
|
|
|
{"Prefix2", PER_FORM, ONLY_CURRENT, PREFIX2, MAP_PREFIX2}, |
9312
|
|
|
|
|
|
|
{"Prefix3", PER_FORM, ONLY_CURRENT, PREFIX3, MAP_PREFIX3}, |
9313
|
|
|
|
|
|
|
{"Prefix4", PER_FORM, ONLY_CURRENT, PREFIX4, MAP_PREFIX4}, |
9314
|
|
|
|
|
|
|
{"Suffix1", PER_FORM, ONLY_CURRENT, SUFFIX1, MAP_SUFFIX1}, |
9315
|
|
|
|
|
|
|
{"Suffix2", PER_FORM, ONLY_CURRENT, SUFFIX2, MAP_SUFFIX2}, |
9316
|
|
|
|
|
|
|
{"Suffix3", PER_FORM, ONLY_CURRENT, SUFFIX3, MAP_SUFFIX3}, |
9317
|
|
|
|
|
|
|
{"Suffix4", PER_FORM, ONLY_CURRENT, SUFFIX4, MAP_SUFFIX4}, |
9318
|
|
|
|
|
|
|
|
9319
|
|
|
|
|
|
|
{"Tag", PER_TAG, ANY_OFFSET, TAG, MAP_TAG}, |
9320
|
|
|
|
|
|
|
{"Tag3", PER_TAG, ANY_OFFSET, TAG3, MAP_TAG3}, |
9321
|
|
|
|
|
|
|
{"Tag5", PER_TAG, ANY_OFFSET, TAG5, MAP_TAG5}, |
9322
|
|
|
|
|
|
|
{"Tag25", PER_TAG, ANY_OFFSET, TAG25, MAP_TAG25}, |
9323
|
|
|
|
|
|
|
{"Lemma", PER_TAG, ANY_OFFSET, LEMMA, MAP_LEMMA}, |
9324
|
|
|
|
|
|
|
|
9325
|
|
|
|
|
|
|
{"PreviousVerbTag", DYNAMIC, ANY_OFFSET, PREVIOUS_VERB_TAG, MAP_TAG}, |
9326
|
|
|
|
|
|
|
{"PreviousVerbLemma", DYNAMIC, ANY_OFFSET, PREVIOUS_VERB_LEMMA, MAP_LEMMA} |
9327
|
|
|
|
|
|
|
}; |
9328
|
|
|
|
|
|
|
|
9329
|
|
|
|
|
|
|
template |
9330
|
0
|
|
|
|
|
|
void czech_elementary_features |
9331
|
|
|
|
|
|
|
using namespace unilib; |
9332
|
|
|
|
|
|
|
|
9333
|
|
|
|
|
|
|
// We process the sentence in reverse order, so that we can compute FollowingVerbTag and FollowingVerbLemma directly. |
9334
|
|
|
|
|
|
|
elementary_feature_value following_verb_tag = elementary_feature_empty, following_verb_lemma = elementary_feature_empty; |
9335
|
0
|
0
|
|
|
|
|
for (unsigned i = forms.size(); i--;) { |
9336
|
|
|
|
|
|
|
int verb_candidate = -1; |
9337
|
|
|
|
|
|
|
|
9338
|
|
|
|
|
|
|
// Per_tag features and verb_candidate |
9339
|
0
|
0
|
|
|
|
|
for (unsigned j = 0; j < analyses[i].size(); j++) { |
9340
|
|
|
|
|
|
|
char tag25[2]; |
9341
|
0
|
|
|
|
|
|
per_tag[i][j].values[TAG] = maps[MAP_TAG].value(analyses[i][j].tag.c_str(), analyses[i][j].tag.size()); |
9342
|
0
|
0
|
|
|
|
|
per_tag[i][j].values[TAG3] = analyses[i][j].tag.size() >= 3 ? maps[MAP_TAG3].value(analyses[i][j].tag.c_str() + 2, 1) : elementary_feature_empty; |
9343
|
0
|
0
|
|
|
|
|
per_tag[i][j].values[TAG5] = analyses[i][j].tag.size() >= 5 ? maps[MAP_TAG5].value(analyses[i][j].tag.c_str() + 4, 1) : elementary_feature_empty; |
9344
|
0
|
0
|
|
|
|
|
per_tag[i][j].values[TAG25] = analyses[i][j].tag.size() >= 5 ? maps[MAP_TAG25].value((tag25[0] = analyses[i][j].tag[1], tag25[1] = analyses[i][j].tag[4], tag25), 2) : elementary_feature_empty; |
9345
|
0
|
0
|
|
|
|
|
per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == analyses[i][j].lemma ? per_tag[i][j-1].values[LEMMA] : |
|
|
0
|
|
|
|
|
|
9346
|
|
|
|
|
|
|
maps[MAP_LEMMA].value(analyses[i][j].lemma.c_str(), analyses[i][j].lemma.size()); |
9347
|
|
|
|
|
|
|
|
9348
|
0
|
0
|
|
|
|
|
if (analyses[i][j].tag[0] == 'V') { |
9349
|
|
|
|
|
|
|
int tag_compare; |
9350
|
0
|
0
|
|
|
|
|
verb_candidate = verb_candidate < 0 || (tag_compare = analyses[i][j].tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && analyses[i][j].lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate; |
|
|
0
|
|
|
|
|
|
9351
|
|
|
|
|
|
|
} |
9352
|
|
|
|
|
|
|
} |
9353
|
|
|
|
|
|
|
|
9354
|
|
|
|
|
|
|
// Per_form features |
9355
|
0
|
|
|
|
|
|
per_form[i].values[FORM] = maps[MAP_FORM].value(forms[i].str, forms[i].len); |
9356
|
0
|
|
|
|
|
|
per_form[i].values[FOLLOWING_VERB_TAG] = following_verb_tag; |
9357
|
0
|
|
|
|
|
|
per_form[i].values[FOLLOWING_VERB_LEMMA] = following_verb_lemma; |
9358
|
|
|
|
|
|
|
|
9359
|
|
|
|
|
|
|
// Update following_verb_{tag,lemma} _after_ filling FOLLOWING_VERB_{TAG,LEMMA}. |
9360
|
0
|
0
|
|
|
|
|
if (verb_candidate >= 0) { |
9361
|
0
|
|
|
|
|
|
following_verb_tag = per_tag[i][verb_candidate].values[TAG]; |
9362
|
0
|
|
|
|
|
|
following_verb_lemma = per_tag[i][verb_candidate].values[LEMMA]; |
9363
|
|
|
|
|
|
|
} |
9364
|
|
|
|
|
|
|
|
9365
|
|
|
|
|
|
|
// Ortographic per_form features if needed |
9366
|
0
|
0
|
|
|
|
|
if (analyses[i].size() == 1) { |
9367
|
0
|
|
|
|
|
|
per_form[i].values[NUM] = per_form[i].values[CAP] = per_form[i].values[DASH] = elementary_feature_unknown; |
9368
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX1] = per_form[i].values[PREFIX2] = per_form[i].values[PREFIX3] = per_form[i].values[PREFIX4] = elementary_feature_unknown; |
9369
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = per_form[i].values[SUFFIX2] = per_form[i].values[SUFFIX3] = per_form[i].values[SUFFIX4] = elementary_feature_unknown; |
9370
|
0
|
0
|
|
|
|
|
} else if (forms[i].len <= 0) { |
9371
|
0
|
|
|
|
|
|
per_form[i].values[NUM] = per_form[i].values[CAP] = per_form[i].values[DASH] = elementary_feature_empty + 1; |
9372
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX1] = per_form[i].values[PREFIX2] = per_form[i].values[PREFIX3] = per_form[i].values[PREFIX4] = elementary_feature_empty; |
9373
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = per_form[i].values[SUFFIX2] = per_form[i].values[SUFFIX3] = per_form[i].values[SUFFIX4] = elementary_feature_empty; |
9374
|
|
|
|
|
|
|
} else { |
9375
|
0
|
|
|
|
|
|
string_piece form = forms[i]; |
9376
|
0
|
|
|
|
|
|
const char* form_start = form.str; |
9377
|
|
|
|
|
|
|
|
9378
|
|
|
|
|
|
|
bool num = false, cap = false, dash = false; |
9379
|
0
|
|
|
|
|
|
size_t indices[8] = {0, form.len, form.len, form.len, form.len, 0, 0, 0}; // careful here regarding forms shorter than 4 characters |
9380
|
|
|
|
|
|
|
int index = 0; |
9381
|
0
|
0
|
|
|
|
|
while (form.len) { |
9382
|
0
|
|
|
|
|
|
indices[(index++)&7] = form.str - form_start; |
9383
|
|
|
|
|
|
|
|
9384
|
0
|
|
|
|
|
|
unicode::category_t cat = unicode::category(utf8::decode(form.str, form.len)); |
9385
|
0
|
0
|
|
|
|
|
num = num || cat & unicode::N; |
|
|
0
|
|
|
|
|
|
9386
|
0
|
0
|
|
|
|
|
cap = cap || cat & unicode::Lut; |
|
|
0
|
|
|
|
|
|
9387
|
0
|
0
|
|
|
|
|
dash = dash || cat & unicode::Pd; |
|
|
0
|
|
|
|
|
|
9388
|
|
|
|
|
|
|
|
9389
|
0
|
0
|
|
|
|
|
if (index == 5 || (!form.len && index < 5)) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9390
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX1] = maps[MAP_PREFIX1].value(form_start, indices[1]); |
9391
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX2] = maps[MAP_PREFIX2].value(form_start, indices[2]); |
9392
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX3] = maps[MAP_PREFIX3].value(form_start, indices[3]); |
9393
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX4] = maps[MAP_PREFIX4].value(form_start, indices[4]); |
9394
|
|
|
|
|
|
|
} |
9395
|
|
|
|
|
|
|
} |
9396
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = maps[MAP_SUFFIX1].value(form_start + indices[(index-1)&7], form.str - form_start - indices[(index-1)&7]); |
9397
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX2] = maps[MAP_SUFFIX2].value(form_start + indices[(index-2)&7], form.str - form_start - indices[(index-2)&7]); |
9398
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX3] = maps[MAP_SUFFIX3].value(form_start + indices[(index-3)&7], form.str - form_start - indices[(index-3)&7]); |
9399
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX4] = maps[MAP_SUFFIX4].value(form_start + indices[(index-4)&7], form.str - form_start - indices[(index-4)&7]); |
9400
|
0
|
|
|
|
|
|
per_form[i].values[NUM] = elementary_feature_empty + 1 + num; |
9401
|
0
|
|
|
|
|
|
per_form[i].values[CAP] = elementary_feature_empty + 1 + cap; |
9402
|
0
|
|
|
|
|
|
per_form[i].values[DASH] = elementary_feature_empty + 1 + dash; |
9403
|
|
|
|
|
|
|
} |
9404
|
|
|
|
|
|
|
} |
9405
|
0
|
|
|
|
|
|
} |
9406
|
|
|
|
|
|
|
|
9407
|
|
|
|
|
|
|
template |
9408
|
|
|
|
|
|
|
void czech_elementary_features |
9409
|
0
|
0
|
|
|
|
|
if (prev_dynamic) { |
9410
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_TAG] = prev_dynamic->values[PREVIOUS_OR_CURRENT_VERB_TAG]; |
9411
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_LEMMA] = prev_dynamic->values[PREVIOUS_OR_CURRENT_VERB_LEMMA]; |
9412
|
|
|
|
|
|
|
} else { |
9413
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_TAG] = elementary_feature_empty; |
9414
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_LEMMA] = elementary_feature_empty; |
9415
|
|
|
|
|
|
|
} |
9416
|
|
|
|
|
|
|
|
9417
|
0
|
0
|
|
|
|
|
if (tag.tag[0] == 'V') { |
9418
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_TAG] = per_tag.values[TAG]; |
9419
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_LEMMA] = per_tag.values[LEMMA]; |
9420
|
|
|
|
|
|
|
} else { |
9421
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_TAG] = dynamic.values[PREVIOUS_VERB_TAG]; |
9422
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_LEMMA] = dynamic.values[PREVIOUS_VERB_LEMMA]; |
9423
|
|
|
|
|
|
|
} |
9424
|
|
|
|
|
|
|
} |
9425
|
|
|
|
|
|
|
|
9426
|
|
|
|
|
|
|
} // namespace morphodita |
9427
|
|
|
|
|
|
|
|
9428
|
|
|
|
|
|
|
///////// |
9429
|
|
|
|
|
|
|
// File: morphodita/tagger/generic_elementary_features.h |
9430
|
|
|
|
|
|
|
///////// |
9431
|
|
|
|
|
|
|
|
9432
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
9433
|
|
|
|
|
|
|
// |
9434
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
9435
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
9436
|
|
|
|
|
|
|
// |
9437
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
9438
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
9439
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
9440
|
|
|
|
|
|
|
|
9441
|
|
|
|
|
|
|
namespace morphodita { |
9442
|
|
|
|
|
|
|
|
9443
|
|
|
|
|
|
|
// Declarations |
9444
|
|
|
|
|
|
|
template |
9445
|
0
|
|
|
|
|
|
class generic_elementary_features : public elementary_features |
9446
|
|
|
|
|
|
|
public: |
9447
|
|
|
|
|
|
|
generic_elementary_features(); |
9448
|
|
|
|
|
|
|
|
9449
|
|
|
|
|
|
|
enum features_per_form { FORM, FOLLOWING_VERB_TAG, FOLLOWING_VERB_LEMMA, NUM, CAP, DASH, PREFIX1, PREFIX2, PREFIX3, PREFIX4, PREFIX5, PREFIX6, PREFIX7, PREFIX8, PREFIX9, SUFFIX1, SUFFIX2, SUFFIX3, SUFFIX4, SUFFIX5, SUFFIX6, SUFFIX7, SUFFIX8, SUFFIX9, PER_FORM_TOTAL }; |
9450
|
|
|
|
|
|
|
enum features_per_tag { TAG, TAG1, TAG2, TAG3, TAG4, TAG5, LEMMA, PER_TAG_TOTAL }; |
9451
|
|
|
|
|
|
|
enum features_dynamic { PREVIOUS_VERB_TAG, PREVIOUS_VERB_LEMMA, PREVIOUS_OR_CURRENT_VERB_TAG, PREVIOUS_OR_CURRENT_VERB_LEMMA, DYNAMIC_TOTAL }; |
9452
|
|
|
|
|
|
|
enum features_map { MAP_NONE = -1, MAP_FORM, MAP_PREFIX1, MAP_PREFIX2, MAP_PREFIX3, MAP_PREFIX4, MAP_PREFIX5, MAP_PREFIX6, MAP_PREFIX7, MAP_PREFIX8, MAP_PREFIX9, MAP_SUFFIX1, MAP_SUFFIX2, MAP_SUFFIX3, MAP_SUFFIX4, MAP_SUFFIX5, MAP_SUFFIX6, MAP_SUFFIX7, MAP_SUFFIX8, MAP_SUFFIX9, MAP_TAG, MAP_TAG1, MAP_TAG2, MAP_TAG3, MAP_TAG4, MAP_TAG5, MAP_LEMMA, MAP_TOTAL } ; |
9453
|
|
|
|
|
|
|
|
9454
|
|
|
|
|
|
|
struct per_form_features { elementary_feature_value values[PER_FORM_TOTAL]; }; |
9455
|
|
|
|
|
|
|
struct per_tag_features { elementary_feature_value values[PER_TAG_TOTAL]; }; |
9456
|
|
|
|
|
|
|
struct dynamic_features { elementary_feature_value values[DYNAMIC_TOTAL]; }; |
9457
|
|
|
|
|
|
|
|
9458
|
|
|
|
|
|
|
static vector descriptions; |
9459
|
|
|
|
|
|
|
|
9460
|
|
|
|
|
|
|
void compute_features(const vector& forms, const vector>& analyses, vector& per_form, vector>& per_tag) const; |
9461
|
|
|
|
|
|
|
inline void compute_dynamic_features(const tagged_lemma& tag, const per_form_features& per_form, const per_tag_features& per_tag, const dynamic_features* prev_dynamic, dynamic_features& dynamic) const; |
9462
|
|
|
|
|
|
|
|
9463
|
|
|
|
|
|
|
using elementary_features |
9464
|
|
|
|
|
|
|
}; |
9465
|
|
|
|
|
|
|
|
9466
|
|
|
|
|
|
|
typedef generic_elementary_features persistent_generic_elementary_features; |
9467
|
|
|
|
|
|
|
|
9468
|
|
|
|
|
|
|
// Definitions |
9469
|
|
|
|
|
|
|
template |
9470
|
1
|
|
|
|
|
|
generic_elementary_features |
9471
|
1
|
50
|
|
|
|
|
maps.resize(MAP_TOTAL); |
9472
|
1
|
|
|
|
|
|
} |
9473
|
|
|
|
|
|
|
|
9474
|
|
|
|
|
|
|
template |
9475
|
|
|
|
|
|
|
vector generic_elementary_features |
9476
|
|
|
|
|
|
|
{"Form", PER_FORM, ANY_OFFSET, FORM, MAP_FORM}, |
9477
|
|
|
|
|
|
|
{"FollowingVerbTag", PER_FORM, ANY_OFFSET, FOLLOWING_VERB_TAG, MAP_TAG}, |
9478
|
|
|
|
|
|
|
{"FollowingVerbLemma", PER_FORM, ANY_OFFSET, FOLLOWING_VERB_LEMMA, MAP_LEMMA }, |
9479
|
|
|
|
|
|
|
{"Num", PER_FORM, ONLY_CURRENT, NUM, MAP_NONE}, |
9480
|
|
|
|
|
|
|
{"Cap", PER_FORM, ONLY_CURRENT, CAP, MAP_NONE}, |
9481
|
|
|
|
|
|
|
{"Dash", PER_FORM, ONLY_CURRENT, DASH, MAP_NONE}, |
9482
|
|
|
|
|
|
|
{"Prefix1", PER_FORM, ONLY_CURRENT, PREFIX1, MAP_PREFIX1}, |
9483
|
|
|
|
|
|
|
{"Prefix2", PER_FORM, ONLY_CURRENT, PREFIX2, MAP_PREFIX2}, |
9484
|
|
|
|
|
|
|
{"Prefix3", PER_FORM, ONLY_CURRENT, PREFIX3, MAP_PREFIX3}, |
9485
|
|
|
|
|
|
|
{"Prefix4", PER_FORM, ONLY_CURRENT, PREFIX4, MAP_PREFIX4}, |
9486
|
|
|
|
|
|
|
{"Prefix5", PER_FORM, ONLY_CURRENT, PREFIX5, MAP_PREFIX5}, |
9487
|
|
|
|
|
|
|
{"Prefix6", PER_FORM, ONLY_CURRENT, PREFIX6, MAP_PREFIX6}, |
9488
|
|
|
|
|
|
|
{"Prefix7", PER_FORM, ONLY_CURRENT, PREFIX7, MAP_PREFIX7}, |
9489
|
|
|
|
|
|
|
{"Prefix8", PER_FORM, ONLY_CURRENT, PREFIX8, MAP_PREFIX8}, |
9490
|
|
|
|
|
|
|
{"Prefix9", PER_FORM, ONLY_CURRENT, PREFIX9, MAP_PREFIX9}, |
9491
|
|
|
|
|
|
|
{"Suffix1", PER_FORM, ONLY_CURRENT, SUFFIX1, MAP_SUFFIX1}, |
9492
|
|
|
|
|
|
|
{"Suffix2", PER_FORM, ONLY_CURRENT, SUFFIX2, MAP_SUFFIX2}, |
9493
|
|
|
|
|
|
|
{"Suffix3", PER_FORM, ONLY_CURRENT, SUFFIX3, MAP_SUFFIX3}, |
9494
|
|
|
|
|
|
|
{"Suffix4", PER_FORM, ONLY_CURRENT, SUFFIX4, MAP_SUFFIX4}, |
9495
|
|
|
|
|
|
|
{"Suffix5", PER_FORM, ONLY_CURRENT, SUFFIX5, MAP_SUFFIX5}, |
9496
|
|
|
|
|
|
|
{"Suffix6", PER_FORM, ONLY_CURRENT, SUFFIX6, MAP_SUFFIX6}, |
9497
|
|
|
|
|
|
|
{"Suffix7", PER_FORM, ONLY_CURRENT, SUFFIX7, MAP_SUFFIX7}, |
9498
|
|
|
|
|
|
|
{"Suffix8", PER_FORM, ONLY_CURRENT, SUFFIX8, MAP_SUFFIX8}, |
9499
|
|
|
|
|
|
|
{"Suffix9", PER_FORM, ONLY_CURRENT, SUFFIX9, MAP_SUFFIX9}, |
9500
|
|
|
|
|
|
|
|
9501
|
|
|
|
|
|
|
{"Tag", PER_TAG, ANY_OFFSET, TAG, MAP_TAG}, |
9502
|
|
|
|
|
|
|
{"Tag1", PER_TAG, ANY_OFFSET, TAG1, MAP_TAG1}, |
9503
|
|
|
|
|
|
|
{"Tag2", PER_TAG, ANY_OFFSET, TAG2, MAP_TAG2}, |
9504
|
|
|
|
|
|
|
{"Tag3", PER_TAG, ANY_OFFSET, TAG3, MAP_TAG3}, |
9505
|
|
|
|
|
|
|
{"Tag4", PER_TAG, ANY_OFFSET, TAG4, MAP_TAG4}, |
9506
|
|
|
|
|
|
|
{"Tag5", PER_TAG, ANY_OFFSET, TAG5, MAP_TAG5}, |
9507
|
|
|
|
|
|
|
{"Lemma", PER_TAG, ANY_OFFSET, LEMMA, MAP_LEMMA}, |
9508
|
|
|
|
|
|
|
|
9509
|
|
|
|
|
|
|
{"PreviousVerbTag", DYNAMIC, ANY_OFFSET, PREVIOUS_VERB_TAG, MAP_TAG}, |
9510
|
|
|
|
|
|
|
{"PreviousVerbLemma", DYNAMIC, ANY_OFFSET, PREVIOUS_VERB_LEMMA, MAP_LEMMA} |
9511
|
|
|
|
|
|
|
}; |
9512
|
|
|
|
|
|
|
|
9513
|
|
|
|
|
|
|
template |
9514
|
2
|
|
|
|
|
|
void generic_elementary_features |
9515
|
|
|
|
|
|
|
using namespace unilib; |
9516
|
|
|
|
|
|
|
|
9517
|
|
|
|
|
|
|
// We process the sentence in reverse order, so that we can compute FollowingVerbTag and FollowingVerbLemma directly. |
9518
|
|
|
|
|
|
|
elementary_feature_value following_verb_tag = elementary_feature_empty, following_verb_lemma = elementary_feature_empty; |
9519
|
9
|
100
|
|
|
|
|
for (unsigned i = forms.size(); i--;) { |
9520
|
|
|
|
|
|
|
int verb_candidate = -1; |
9521
|
|
|
|
|
|
|
|
9522
|
|
|
|
|
|
|
// Per_tag features and verb_candidate |
9523
|
18
|
100
|
|
|
|
|
for (unsigned j = 0; j < analyses[i].size(); j++) { |
9524
|
22
|
|
|
|
|
|
per_tag[i][j].values[TAG] = maps[MAP_TAG].value(analyses[i][j].tag.c_str(), analyses[i][j].tag.size()); |
9525
|
11
|
50
|
|
|
|
|
per_tag[i][j].values[TAG1] = analyses[i][j].tag.size() >= 1 ? maps[MAP_TAG1].value(analyses[i][j].tag.c_str() + 0, 1) : elementary_feature_empty; |
9526
|
11
|
50
|
|
|
|
|
per_tag[i][j].values[TAG2] = analyses[i][j].tag.size() >= 2 ? maps[MAP_TAG2].value(analyses[i][j].tag.c_str() + 1, 1) : elementary_feature_empty; |
9527
|
11
|
50
|
|
|
|
|
per_tag[i][j].values[TAG3] = analyses[i][j].tag.size() >= 3 ? maps[MAP_TAG3].value(analyses[i][j].tag.c_str() + 2, 1) : elementary_feature_empty; |
9528
|
11
|
100
|
|
|
|
|
per_tag[i][j].values[TAG4] = analyses[i][j].tag.size() >= 4 ? maps[MAP_TAG4].value(analyses[i][j].tag.c_str() + 3, 1) : elementary_feature_empty; |
9529
|
11
|
50
|
|
|
|
|
per_tag[i][j].values[TAG5] = analyses[i][j].tag.size() >= 5 ? maps[MAP_TAG5].value(analyses[i][j].tag.c_str() + 4, 1) : elementary_feature_empty; |
9530
|
11
|
100
|
|
|
|
|
per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == analyses[i][j].lemma ? per_tag[i][j-1].values[LEMMA] : |
|
|
50
|
|
|
|
|
|
9531
|
|
|
|
|
|
|
maps[MAP_LEMMA].value(analyses[i][j].lemma.c_str(), analyses[i][j].lemma.size()); |
9532
|
|
|
|
|
|
|
|
9533
|
11
|
100
|
|
|
|
|
if (analyses[i][j].tag[0] == 'V') { |
9534
|
|
|
|
|
|
|
int tag_compare; |
9535
|
3
|
100
|
|
|
|
|
verb_candidate = verb_candidate < 0 || (tag_compare = analyses[i][j].tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && analyses[i][j].lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate; |
|
|
50
|
|
|
|
|
|
9536
|
|
|
|
|
|
|
} |
9537
|
|
|
|
|
|
|
} |
9538
|
|
|
|
|
|
|
|
9539
|
|
|
|
|
|
|
// Per_form features |
9540
|
14
|
|
|
|
|
|
per_form[i].values[FORM] = maps[MAP_FORM].value(forms[i].str, forms[i].len); |
9541
|
7
|
|
|
|
|
|
per_form[i].values[FOLLOWING_VERB_TAG] = following_verb_tag; |
9542
|
7
|
|
|
|
|
|
per_form[i].values[FOLLOWING_VERB_LEMMA] = following_verb_lemma; |
9543
|
|
|
|
|
|
|
|
9544
|
|
|
|
|
|
|
// Update following_verb_{tag,lemma} _after_ filling FOLLOWING_VERB_{TAG,LEMMA}. |
9545
|
7
|
100
|
|
|
|
|
if (verb_candidate >= 0) { |
9546
|
4
|
|
|
|
|
|
following_verb_tag = per_tag[i][verb_candidate].values[TAG]; |
9547
|
2
|
|
|
|
|
|
following_verb_lemma = per_tag[i][verb_candidate].values[LEMMA]; |
9548
|
|
|
|
|
|
|
} |
9549
|
|
|
|
|
|
|
|
9550
|
|
|
|
|
|
|
// Ortographic per_form features if needed |
9551
|
7
|
100
|
|
|
|
|
if (analyses[i].size() == 1) { |
9552
|
5
|
|
|
|
|
|
per_form[i].values[NUM] = per_form[i].values[CAP] = per_form[i].values[DASH] = elementary_feature_unknown; |
9553
|
5
|
|
|
|
|
|
per_form[i].values[PREFIX1] = per_form[i].values[PREFIX2] = per_form[i].values[PREFIX3] = elementary_feature_unknown; |
9554
|
5
|
|
|
|
|
|
per_form[i].values[PREFIX4] = per_form[i].values[PREFIX5] = per_form[i].values[PREFIX6] = elementary_feature_unknown; |
9555
|
5
|
|
|
|
|
|
per_form[i].values[PREFIX7] = per_form[i].values[PREFIX8] = per_form[i].values[PREFIX9] = elementary_feature_unknown; |
9556
|
5
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = per_form[i].values[SUFFIX2] = per_form[i].values[SUFFIX3] = elementary_feature_unknown; |
9557
|
5
|
|
|
|
|
|
per_form[i].values[SUFFIX4] = per_form[i].values[SUFFIX5] = per_form[i].values[SUFFIX6] = elementary_feature_unknown; |
9558
|
5
|
|
|
|
|
|
per_form[i].values[SUFFIX7] = per_form[i].values[SUFFIX8] = per_form[i].values[SUFFIX9] = elementary_feature_unknown; |
9559
|
2
|
50
|
|
|
|
|
} else if (forms[i].len <= 0) { |
9560
|
0
|
|
|
|
|
|
per_form[i].values[NUM] = per_form[i].values[CAP] = per_form[i].values[DASH] = elementary_feature_empty + 1; |
9561
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX1] = per_form[i].values[PREFIX2] = per_form[i].values[PREFIX3] = elementary_feature_empty; |
9562
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX4] = per_form[i].values[PREFIX5] = per_form[i].values[PREFIX6] = elementary_feature_empty; |
9563
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX7] = per_form[i].values[PREFIX8] = per_form[i].values[PREFIX9] = elementary_feature_empty; |
9564
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = per_form[i].values[SUFFIX2] = per_form[i].values[SUFFIX3] = elementary_feature_empty; |
9565
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX4] = per_form[i].values[SUFFIX5] = per_form[i].values[SUFFIX6] = elementary_feature_empty; |
9566
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX7] = per_form[i].values[SUFFIX8] = per_form[i].values[SUFFIX9] = elementary_feature_empty; |
9567
|
|
|
|
|
|
|
} else { |
9568
|
2
|
|
|
|
|
|
string_piece form = forms[i]; |
9569
|
2
|
|
|
|
|
|
const char* form_start = form.str; |
9570
|
|
|
|
|
|
|
|
9571
|
|
|
|
|
|
|
bool num = false, cap = false, dash = false; |
9572
|
11
|
|
|
|
|
|
size_t indices[18] = {0, form.len, form.len, form.len, form.len, form.len, form.len, form.len, form.len, form.len, 0, 0, 0, 0, 0, 0, 0, 0}; // careful here regarding forms shorter than 9 characters |
9573
|
|
|
|
|
|
|
int index = 0; |
9574
|
11
|
100
|
|
|
|
|
while (form.len) { |
9575
|
9
|
|
|
|
|
|
indices[(index++) % 18] = form.str - form_start; |
9576
|
|
|
|
|
|
|
|
9577
|
9
|
|
|
|
|
|
unicode::category_t cat = unicode::category(utf8::decode(form.str, form.len)); |
9578
|
9
|
50
|
|
|
|
|
num = num || cat & unicode::N; |
|
|
50
|
|
|
|
|
|
9579
|
9
|
50
|
|
|
|
|
cap = cap || cat & unicode::Lut; |
|
|
50
|
|
|
|
|
|
9580
|
9
|
50
|
|
|
|
|
dash = dash || cat & unicode::Pd; |
|
|
50
|
|
|
|
|
|
9581
|
|
|
|
|
|
|
|
9582
|
9
|
50
|
|
|
|
|
if (index == 10 || (!form.len && index < 10)) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
9583
|
4
|
|
|
|
|
|
per_form[i].values[PREFIX1] = maps[MAP_PREFIX1].value(form_start, indices[1]); |
9584
|
4
|
|
|
|
|
|
per_form[i].values[PREFIX2] = maps[MAP_PREFIX2].value(form_start, indices[2]); |
9585
|
4
|
|
|
|
|
|
per_form[i].values[PREFIX3] = maps[MAP_PREFIX3].value(form_start, indices[3]); |
9586
|
4
|
|
|
|
|
|
per_form[i].values[PREFIX4] = maps[MAP_PREFIX4].value(form_start, indices[4]); |
9587
|
4
|
|
|
|
|
|
per_form[i].values[PREFIX5] = maps[MAP_PREFIX5].value(form_start, indices[5]); |
9588
|
4
|
|
|
|
|
|
per_form[i].values[PREFIX6] = maps[MAP_PREFIX6].value(form_start, indices[6]); |
9589
|
4
|
|
|
|
|
|
per_form[i].values[PREFIX7] = maps[MAP_PREFIX7].value(form_start, indices[7]); |
9590
|
4
|
|
|
|
|
|
per_form[i].values[PREFIX8] = maps[MAP_PREFIX8].value(form_start, indices[8]); |
9591
|
2
|
|
|
|
|
|
per_form[i].values[PREFIX9] = maps[MAP_PREFIX9].value(form_start, indices[9]); |
9592
|
|
|
|
|
|
|
} |
9593
|
|
|
|
|
|
|
} |
9594
|
4
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = maps[MAP_SUFFIX1].value(form_start + indices[(index+18-1) % 18], form.str - form_start - indices[(index+18-1) % 18]); |
9595
|
4
|
|
|
|
|
|
per_form[i].values[SUFFIX2] = maps[MAP_SUFFIX2].value(form_start + indices[(index+18-2) % 18], form.str - form_start - indices[(index+18-2) % 18]); |
9596
|
4
|
|
|
|
|
|
per_form[i].values[SUFFIX3] = maps[MAP_SUFFIX3].value(form_start + indices[(index+18-3) % 18], form.str - form_start - indices[(index+18-3) % 18]); |
9597
|
4
|
|
|
|
|
|
per_form[i].values[SUFFIX4] = maps[MAP_SUFFIX4].value(form_start + indices[(index+18-4) % 18], form.str - form_start - indices[(index+18-4) % 18]); |
9598
|
4
|
|
|
|
|
|
per_form[i].values[SUFFIX5] = maps[MAP_SUFFIX5].value(form_start + indices[(index+18-5) % 18], form.str - form_start - indices[(index+18-5) % 18]); |
9599
|
4
|
|
|
|
|
|
per_form[i].values[SUFFIX6] = maps[MAP_SUFFIX6].value(form_start + indices[(index+18-6) % 18], form.str - form_start - indices[(index+18-6) % 18]); |
9600
|
4
|
|
|
|
|
|
per_form[i].values[SUFFIX7] = maps[MAP_SUFFIX7].value(form_start + indices[(index+18-7) % 18], form.str - form_start - indices[(index+18-7) % 18]); |
9601
|
4
|
|
|
|
|
|
per_form[i].values[SUFFIX8] = maps[MAP_SUFFIX8].value(form_start + indices[(index+18-8) % 18], form.str - form_start - indices[(index+18-8) % 18]); |
9602
|
4
|
|
|
|
|
|
per_form[i].values[SUFFIX9] = maps[MAP_SUFFIX9].value(form_start + indices[(index+18-9) % 18], form.str - form_start - indices[(index+18-9) % 18]); |
9603
|
2
|
|
|
|
|
|
per_form[i].values[NUM] = elementary_feature_empty + 1 + num; |
9604
|
2
|
|
|
|
|
|
per_form[i].values[CAP] = elementary_feature_empty + 1 + cap; |
9605
|
2
|
|
|
|
|
|
per_form[i].values[DASH] = elementary_feature_empty + 1 + dash; |
9606
|
|
|
|
|
|
|
} |
9607
|
|
|
|
|
|
|
} |
9608
|
2
|
|
|
|
|
|
} |
9609
|
|
|
|
|
|
|
|
9610
|
|
|
|
|
|
|
template |
9611
|
|
|
|
|
|
|
void generic_elementary_features |
9612
|
22
|
100
|
|
|
|
|
if (prev_dynamic) { |
9613
|
20
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_TAG] = prev_dynamic->values[PREVIOUS_OR_CURRENT_VERB_TAG]; |
9614
|
20
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_LEMMA] = prev_dynamic->values[PREVIOUS_OR_CURRENT_VERB_LEMMA]; |
9615
|
|
|
|
|
|
|
} else { |
9616
|
2
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_TAG] = elementary_feature_empty; |
9617
|
2
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_LEMMA] = elementary_feature_empty; |
9618
|
|
|
|
|
|
|
} |
9619
|
|
|
|
|
|
|
|
9620
|
22
|
100
|
|
|
|
|
if (tag.tag[0] == 'V') { |
9621
|
3
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_TAG] = per_tag.values[TAG]; |
9622
|
3
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_LEMMA] = per_tag.values[LEMMA]; |
9623
|
|
|
|
|
|
|
} else { |
9624
|
19
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_TAG] = dynamic.values[PREVIOUS_VERB_TAG]; |
9625
|
19
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_LEMMA] = dynamic.values[PREVIOUS_VERB_LEMMA]; |
9626
|
|
|
|
|
|
|
} |
9627
|
|
|
|
|
|
|
} |
9628
|
|
|
|
|
|
|
|
9629
|
|
|
|
|
|
|
} // namespace morphodita |
9630
|
|
|
|
|
|
|
|
9631
|
|
|
|
|
|
|
///////// |
9632
|
|
|
|
|
|
|
// File: utils/threadsafe_stack.h |
9633
|
|
|
|
|
|
|
///////// |
9634
|
|
|
|
|
|
|
|
9635
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
9636
|
|
|
|
|
|
|
// |
9637
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
9638
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
9639
|
|
|
|
|
|
|
// |
9640
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
9641
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
9642
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
9643
|
|
|
|
|
|
|
|
9644
|
|
|
|
|
|
|
namespace utils { |
9645
|
|
|
|
|
|
|
|
9646
|
|
|
|
|
|
|
// |
9647
|
|
|
|
|
|
|
// Declarations |
9648
|
|
|
|
|
|
|
// |
9649
|
|
|
|
|
|
|
|
9650
|
|
|
|
|
|
|
template |
9651
|
0
|
|
|
|
|
|
class threadsafe_stack { |
9652
|
|
|
|
|
|
|
public: |
9653
|
|
|
|
|
|
|
inline void push(T* t); |
9654
|
|
|
|
|
|
|
inline T* pop(); |
9655
|
|
|
|
|
|
|
|
9656
|
|
|
|
|
|
|
private: |
9657
|
|
|
|
|
|
|
vector> stack; |
9658
|
|
|
|
|
|
|
atomic_flag lock = ATOMIC_FLAG_INIT; |
9659
|
|
|
|
|
|
|
}; |
9660
|
|
|
|
|
|
|
|
9661
|
|
|
|
|
|
|
// |
9662
|
|
|
|
|
|
|
// Definitions |
9663
|
|
|
|
|
|
|
// |
9664
|
|
|
|
|
|
|
|
9665
|
|
|
|
|
|
|
template |
9666
|
12
|
|
|
|
|
|
void threadsafe_stack::push(T* t) { |
9667
|
6
|
0
|
|
|
|
|
while (lock.test_and_set(memory_order_acquire)) {} |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
9668
|
6
|
|
|
|
|
|
stack.emplace_back(t); |
9669
|
|
|
|
|
|
|
lock.clear(memory_order_release); |
9670
|
6
|
|
|
|
|
|
} |
9671
|
|
|
|
|
|
|
|
9672
|
|
|
|
|
|
|
template |
9673
|
12
|
|
|
|
|
|
T* threadsafe_stack::pop() { |
9674
|
|
|
|
|
|
|
T* res = nullptr; |
9675
|
|
|
|
|
|
|
|
9676
|
6
|
0
|
|
|
|
|
while (lock.test_and_set(memory_order_acquire)) {} |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
9677
|
6
|
0
|
|
|
|
|
if (!stack.empty()) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
9678
|
|
|
|
|
|
|
res = stack.back().release(); |
9679
|
|
|
|
|
|
|
stack.pop_back(); |
9680
|
|
|
|
|
|
|
} |
9681
|
|
|
|
|
|
|
lock.clear(memory_order_release); |
9682
|
|
|
|
|
|
|
|
9683
|
6
|
|
|
|
|
|
return res; |
9684
|
|
|
|
|
|
|
} |
9685
|
|
|
|
|
|
|
|
9686
|
|
|
|
|
|
|
} // namespace utils |
9687
|
|
|
|
|
|
|
|
9688
|
|
|
|
|
|
|
///////// |
9689
|
|
|
|
|
|
|
// File: morphodita/tagger/perceptron_tagger.h |
9690
|
|
|
|
|
|
|
///////// |
9691
|
|
|
|
|
|
|
|
9692
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
9693
|
|
|
|
|
|
|
// |
9694
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
9695
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
9696
|
|
|
|
|
|
|
// |
9697
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
9698
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
9699
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
9700
|
|
|
|
|
|
|
|
9701
|
|
|
|
|
|
|
namespace morphodita { |
9702
|
|
|
|
|
|
|
|
9703
|
|
|
|
|
|
|
// Declarations |
9704
|
|
|
|
|
|
|
template |
9705
|
0
|
|
|
|
|
|
class perceptron_tagger : public tagger { |
9706
|
|
|
|
|
|
|
public: |
9707
|
|
|
|
|
|
|
perceptron_tagger(int decoding_order, int window_size); |
9708
|
|
|
|
|
|
|
|
9709
|
|
|
|
|
|
|
bool load(istream& is); |
9710
|
|
|
|
|
|
|
virtual const morpho* get_morpho() const override; |
9711
|
|
|
|
|
|
|
virtual void tag(const vector& forms, vector& tags, morpho::guesser_mode guesser = morpho::guesser_mode(-1)) const override; |
9712
|
|
|
|
|
|
|
virtual void tag_analyzed(const vector& forms, const vector>& analyses, vector& tags) const override; |
9713
|
|
|
|
|
|
|
|
9714
|
|
|
|
|
|
|
private: |
9715
|
|
|
|
|
|
|
int decoding_order, window_size; |
9716
|
|
|
|
|
|
|
|
9717
|
|
|
|
|
|
|
unique_ptr dict; |
9718
|
|
|
|
|
|
|
bool use_guesser; |
9719
|
|
|
|
|
|
|
FeatureSequences features; |
9720
|
|
|
|
|
|
|
typedef viterbi viterbi_decoder; |
9721
|
|
|
|
|
|
|
viterbi_decoder decoder; |
9722
|
0
|
|
|
|
|
|
struct cache { |
9723
|
|
|
|
|
|
|
vector forms; |
9724
|
|
|
|
|
|
|
vector> analyses; |
9725
|
|
|
|
|
|
|
vector tags; |
9726
|
|
|
|
|
|
|
typename viterbi_decoder::cache decoder_cache; |
9727
|
|
|
|
|
|
|
|
9728
|
1
|
0
|
|
|
|
|
cache(const perceptron_tagger& self) : decoder_cache(self.decoder) {} |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9729
|
|
|
|
|
|
|
}; |
9730
|
|
|
|
|
|
|
|
9731
|
|
|
|
|
|
|
mutable threadsafe_stack caches; |
9732
|
|
|
|
|
|
|
}; |
9733
|
|
|
|
|
|
|
|
9734
|
|
|
|
|
|
|
// Definitions |
9735
|
|
|
|
|
|
|
|
9736
|
|
|
|
|
|
|
template |
9737
|
1
|
|
|
|
|
|
perceptron_tagger::perceptron_tagger(int decoding_order, int window_size) |
9738
|
1
|
|
|
|
|
|
: decoding_order(decoding_order), window_size(window_size), decoder(features, decoding_order, window_size) {} |
9739
|
|
|
|
|
|
|
|
9740
|
|
|
|
|
|
|
template |
9741
|
1
|
|
|
|
|
|
bool perceptron_tagger::load(istream& is) { |
9742
|
2
|
0
|
|
|
|
|
if (dict.reset(morpho::load(is)), !dict) return false; |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9743
|
1
|
|
|
|
|
|
use_guesser = is.get(); |
9744
|
1
|
0
|
|
|
|
|
if (!features.load(is)) return false; |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9745
|
1
|
|
|
|
|
|
return true; |
9746
|
|
|
|
|
|
|
} |
9747
|
|
|
|
|
|
|
|
9748
|
|
|
|
|
|
|
template |
9749
|
1
|
|
|
|
|
|
const morpho* perceptron_tagger::get_morpho() const { |
9750
|
1
|
|
|
|
|
|
return dict.get(); |
9751
|
|
|
|
|
|
|
} |
9752
|
|
|
|
|
|
|
|
9753
|
|
|
|
|
|
|
template |
9754
|
2
|
|
|
|
|
|
void perceptron_tagger::tag(const vector& forms, vector& tags, morpho::guesser_mode guesser) const { |
9755
|
|
|
|
|
|
|
tags.clear(); |
9756
|
2
|
0
|
|
|
|
|
if (!dict) return; |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9757
|
|
|
|
|
|
|
|
9758
|
2
|
|
|
|
|
|
cache* c = caches.pop(); |
9759
|
2
|
0
|
|
|
|
|
if (!c) c = new cache(*this); |
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9760
|
|
|
|
|
|
|
|
9761
|
2
|
|
|
|
|
|
c->forms.resize(forms.size()); |
9762
|
2
|
0
|
|
|
|
|
if (c->analyses.size() < forms.size()) c->analyses.resize(forms.size()); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9763
|
9
|
0
|
|
|
|
|
for (unsigned i = 0; i < forms.size(); i++) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9764
|
7
|
|
|
|
|
|
c->forms[i] = forms[i]; |
9765
|
7
|
|
|
|
|
|
c->forms[i].len = dict->raw_form_len(forms[i]); |
9766
|
7
|
0
|
|
|
|
|
dict->analyze(forms[i], guesser >= 0 ? guesser : use_guesser ? morpho::GUESSER : morpho::NO_GUESSER, c->analyses[i]); |
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9767
|
|
|
|
|
|
|
} |
9768
|
|
|
|
|
|
|
|
9769
|
2
|
0
|
|
|
|
|
if (c->tags.size() < forms.size()) c->tags.resize(forms.size() * 2); |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9770
|
2
|
|
|
|
|
|
decoder.tag(c->forms, c->analyses, c->decoder_cache, c->tags); |
9771
|
|
|
|
|
|
|
|
9772
|
9
|
0
|
|
|
|
|
for (unsigned i = 0; i < forms.size(); i++) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9773
|
7
|
|
|
|
|
|
tags.emplace_back(c->analyses[i][c->tags[i]]); |
9774
|
|
|
|
|
|
|
|
9775
|
2
|
|
|
|
|
|
caches.push(c); |
9776
|
|
|
|
|
|
|
} |
9777
|
|
|
|
|
|
|
|
9778
|
|
|
|
|
|
|
template |
9779
|
0
|
|
|
|
|
|
void perceptron_tagger::tag_analyzed(const vector& forms, const vector>& analyses, vector& tags) const { |
9780
|
|
|
|
|
|
|
tags.clear(); |
9781
|
|
|
|
|
|
|
|
9782
|
0
|
|
|
|
|
|
cache* c = caches.pop(); |
9783
|
0
|
0
|
|
|
|
|
if (!c) c = new cache(*this); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9784
|
|
|
|
|
|
|
|
9785
|
0
|
|
|
|
|
|
tags.resize(forms.size()); |
9786
|
0
|
|
|
|
|
|
decoder.tag(forms, analyses, c->decoder_cache, tags); |
9787
|
|
|
|
|
|
|
|
9788
|
0
|
|
|
|
|
|
caches.push(c); |
9789
|
0
|
|
|
|
|
|
} |
9790
|
|
|
|
|
|
|
|
9791
|
|
|
|
|
|
|
} // namespace morphodita |
9792
|
|
|
|
|
|
|
|
9793
|
|
|
|
|
|
|
///////// |
9794
|
|
|
|
|
|
|
// File: morphodita/tagger/tagger_ids.h |
9795
|
|
|
|
|
|
|
///////// |
9796
|
|
|
|
|
|
|
|
9797
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
9798
|
|
|
|
|
|
|
// |
9799
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
9800
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
9801
|
|
|
|
|
|
|
// |
9802
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
9803
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
9804
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
9805
|
|
|
|
|
|
|
|
9806
|
|
|
|
|
|
|
namespace morphodita { |
9807
|
|
|
|
|
|
|
|
9808
|
|
|
|
|
|
|
class tagger_ids { |
9809
|
|
|
|
|
|
|
public: |
9810
|
|
|
|
|
|
|
enum tagger_id { |
9811
|
|
|
|
|
|
|
CZECH2 = 0, CZECH3 = 1, CZECH2_3 = 6, |
9812
|
|
|
|
|
|
|
/* 2 was used internally for ENGLISH3, but never released publicly */ |
9813
|
|
|
|
|
|
|
GENERIC2 = 3, GENERIC3 = 4, GENERIC4 = 5, GENERIC2_3 = 7, |
9814
|
|
|
|
|
|
|
CONLLU2 = 8, CONLLU2_3 = 9, CONLLU3 = 10, |
9815
|
|
|
|
|
|
|
}; |
9816
|
|
|
|
|
|
|
|
9817
|
|
|
|
|
|
|
static bool parse(const string& str, tagger_id& id) { |
9818
|
|
|
|
|
|
|
if (str == "czech2") return id = CZECH2, true; |
9819
|
|
|
|
|
|
|
if (str == "czech2_3") return id = CZECH2_3, true; |
9820
|
|
|
|
|
|
|
if (str == "czech3") return id = CZECH3, true; |
9821
|
|
|
|
|
|
|
if (str == "generic2") return id = GENERIC2, true; |
9822
|
|
|
|
|
|
|
if (str == "generic2_3") return id = GENERIC2_3, true; |
9823
|
|
|
|
|
|
|
if (str == "generic3") return id = GENERIC3, true; |
9824
|
|
|
|
|
|
|
if (str == "generic4") return id = GENERIC4, true; |
9825
|
|
|
|
|
|
|
if (str == "conllu2") return id = CONLLU2, true; |
9826
|
|
|
|
|
|
|
if (str == "conllu2_3") return id = CONLLU2_3, true; |
9827
|
|
|
|
|
|
|
if (str == "conllu3") return id = CONLLU3, true; |
9828
|
|
|
|
|
|
|
return false; |
9829
|
|
|
|
|
|
|
} |
9830
|
|
|
|
|
|
|
|
9831
|
|
|
|
|
|
|
static int decoding_order(tagger_id id) { |
9832
|
|
|
|
|
|
|
switch (id) { |
9833
|
|
|
|
|
|
|
case CZECH2: return 2; |
9834
|
|
|
|
|
|
|
case CZECH2_3: return 2; |
9835
|
|
|
|
|
|
|
case CZECH3: return 3; |
9836
|
|
|
|
|
|
|
case GENERIC2: return 2; |
9837
|
|
|
|
|
|
|
case GENERIC2_3: return 2; |
9838
|
|
|
|
|
|
|
case GENERIC3: return 3; |
9839
|
|
|
|
|
|
|
case GENERIC4: return 4; |
9840
|
|
|
|
|
|
|
case CONLLU2: return 2; |
9841
|
|
|
|
|
|
|
case CONLLU2_3: return 2; |
9842
|
|
|
|
|
|
|
case CONLLU3: return 3; |
9843
|
|
|
|
|
|
|
} |
9844
|
|
|
|
|
|
|
return 0; |
9845
|
|
|
|
|
|
|
} |
9846
|
|
|
|
|
|
|
|
9847
|
|
|
|
|
|
|
static int window_size(tagger_id id) { |
9848
|
|
|
|
|
|
|
switch (id) { |
9849
|
|
|
|
|
|
|
case CZECH2_3: return 3; |
9850
|
|
|
|
|
|
|
case GENERIC2_3: return 3; |
9851
|
|
|
|
|
|
|
case CONLLU2_3: return 3; |
9852
|
|
|
|
|
|
|
default: break; |
9853
|
|
|
|
|
|
|
} |
9854
|
|
|
|
|
|
|
return decoding_order(id); |
9855
|
|
|
|
|
|
|
} |
9856
|
|
|
|
|
|
|
}; |
9857
|
|
|
|
|
|
|
|
9858
|
|
|
|
|
|
|
typedef tagger_ids::tagger_id tagger_id; |
9859
|
|
|
|
|
|
|
|
9860
|
|
|
|
|
|
|
} // namespace morphodita |
9861
|
|
|
|
|
|
|
|
9862
|
|
|
|
|
|
|
///////// |
9863
|
|
|
|
|
|
|
// File: morphodita/tagger/tagger.cpp |
9864
|
|
|
|
|
|
|
///////// |
9865
|
|
|
|
|
|
|
|
9866
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
9867
|
|
|
|
|
|
|
// |
9868
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
9869
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
9870
|
|
|
|
|
|
|
// |
9871
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
9872
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
9873
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
9874
|
|
|
|
|
|
|
|
9875
|
|
|
|
|
|
|
namespace morphodita { |
9876
|
|
|
|
|
|
|
|
9877
|
1
|
|
|
|
|
|
tagger* tagger::load(istream& is) { |
9878
|
1
|
50
|
|
|
|
|
tagger_id id = tagger_id(is.get()); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9879
|
|
|
|
|
|
|
switch (id) { |
9880
|
|
|
|
|
|
|
case tagger_ids::CZECH2: |
9881
|
|
|
|
|
|
|
case tagger_ids::CZECH2_3: |
9882
|
|
|
|
|
|
|
case tagger_ids::CZECH3: |
9883
|
|
|
|
|
|
|
{ |
9884
|
0
|
0
|
|
|
|
|
auto res = new_unique_ptr>>(tagger_ids::decoding_order(id), tagger_ids::window_size(id)); |
9885
|
0
|
0
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
0
|
|
|
|
|
|
9886
|
|
|
|
|
|
|
break; |
9887
|
|
|
|
|
|
|
} |
9888
|
|
|
|
|
|
|
case tagger_ids::GENERIC2: |
9889
|
|
|
|
|
|
|
case tagger_ids::GENERIC2_3: |
9890
|
|
|
|
|
|
|
case tagger_ids::GENERIC3: |
9891
|
|
|
|
|
|
|
case tagger_ids::GENERIC4: |
9892
|
|
|
|
|
|
|
{ |
9893
|
1
|
50
|
|
|
|
|
auto res = new_unique_ptr>>(tagger_ids::decoding_order(id), tagger_ids::window_size(id)); |
9894
|
1
|
50
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
50
|
|
|
|
|
|
9895
|
|
|
|
|
|
|
break; |
9896
|
|
|
|
|
|
|
} |
9897
|
|
|
|
|
|
|
case tagger_ids::CONLLU2: |
9898
|
|
|
|
|
|
|
case tagger_ids::CONLLU2_3: |
9899
|
|
|
|
|
|
|
case tagger_ids::CONLLU3: |
9900
|
|
|
|
|
|
|
{ |
9901
|
0
|
0
|
|
|
|
|
auto res = new_unique_ptr>>(tagger_ids::decoding_order(id), tagger_ids::window_size(id)); |
9902
|
0
|
0
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
0
|
|
|
|
|
|
9903
|
|
|
|
|
|
|
break; |
9904
|
|
|
|
|
|
|
} |
9905
|
|
|
|
|
|
|
} |
9906
|
|
|
|
|
|
|
|
9907
|
|
|
|
|
|
|
return nullptr; |
9908
|
|
|
|
|
|
|
} |
9909
|
|
|
|
|
|
|
|
9910
|
0
|
|
|
|
|
|
tagger* tagger::load(const char* fname) { |
9911
|
0
|
0
|
|
|
|
|
ifstream f(path_from_utf8(fname).c_str(), ifstream::binary); |
9912
|
0
|
0
|
|
|
|
|
if (!f) return nullptr; |
9913
|
|
|
|
|
|
|
|
9914
|
0
|
0
|
|
|
|
|
return load(f); |
9915
|
|
|
|
|
|
|
} |
9916
|
|
|
|
|
|
|
|
9917
|
0
|
|
|
|
|
|
tokenizer* tagger::new_tokenizer() const { |
9918
|
0
|
|
|
|
|
|
auto morpho = get_morpho(); |
9919
|
0
|
0
|
|
|
|
|
return morpho ? morpho->new_tokenizer() : nullptr; |
9920
|
|
|
|
|
|
|
} |
9921
|
|
|
|
|
|
|
|
9922
|
|
|
|
|
|
|
} // namespace morphodita |
9923
|
|
|
|
|
|
|
|
9924
|
|
|
|
|
|
|
///////// |
9925
|
|
|
|
|
|
|
// File: morphodita/tagset_converter/identity_tagset_converter.h |
9926
|
|
|
|
|
|
|
///////// |
9927
|
|
|
|
|
|
|
|
9928
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
9929
|
|
|
|
|
|
|
// |
9930
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
9931
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
9932
|
|
|
|
|
|
|
// |
9933
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
9934
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
9935
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
9936
|
|
|
|
|
|
|
|
9937
|
|
|
|
|
|
|
namespace morphodita { |
9938
|
|
|
|
|
|
|
|
9939
|
0
|
|
|
|
|
|
class identity_tagset_converter : public tagset_converter { |
9940
|
|
|
|
|
|
|
public: |
9941
|
|
|
|
|
|
|
virtual void convert(tagged_lemma& tagged_lemma) const override; |
9942
|
|
|
|
|
|
|
virtual void convert_analyzed(vector& tagged_lemmas) const override; |
9943
|
|
|
|
|
|
|
virtual void convert_generated(vector& forms) const override; |
9944
|
|
|
|
|
|
|
}; |
9945
|
|
|
|
|
|
|
|
9946
|
|
|
|
|
|
|
} // namespace morphodita |
9947
|
|
|
|
|
|
|
|
9948
|
|
|
|
|
|
|
///////// |
9949
|
|
|
|
|
|
|
// File: morphodita/tagset_converter/identity_tagset_converter.cpp |
9950
|
|
|
|
|
|
|
///////// |
9951
|
|
|
|
|
|
|
|
9952
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
9953
|
|
|
|
|
|
|
// |
9954
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
9955
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
9956
|
|
|
|
|
|
|
// |
9957
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
9958
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
9959
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
9960
|
|
|
|
|
|
|
|
9961
|
|
|
|
|
|
|
namespace morphodita { |
9962
|
|
|
|
|
|
|
|
9963
|
0
|
|
|
|
|
|
void identity_tagset_converter::convert(tagged_lemma& /*tagged_lemma*/) const {} |
9964
|
|
|
|
|
|
|
|
9965
|
0
|
|
|
|
|
|
void identity_tagset_converter::convert_analyzed(vector& /*tagged_lemmas*/) const {} |
9966
|
|
|
|
|
|
|
|
9967
|
0
|
|
|
|
|
|
void identity_tagset_converter::convert_generated(vector& /*forms*/) const {} |
9968
|
|
|
|
|
|
|
|
9969
|
|
|
|
|
|
|
} // namespace morphodita |
9970
|
|
|
|
|
|
|
|
9971
|
|
|
|
|
|
|
///////// |
9972
|
|
|
|
|
|
|
// File: morphodita/tagset_converter/pdt_to_conll2009_tagset_converter.h |
9973
|
|
|
|
|
|
|
///////// |
9974
|
|
|
|
|
|
|
|
9975
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
9976
|
|
|
|
|
|
|
// |
9977
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
9978
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
9979
|
|
|
|
|
|
|
// |
9980
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
9981
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
9982
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
9983
|
|
|
|
|
|
|
|
9984
|
|
|
|
|
|
|
namespace morphodita { |
9985
|
|
|
|
|
|
|
|
9986
|
0
|
|
|
|
|
|
class pdt_to_conll2009_tagset_converter : public tagset_converter { |
9987
|
|
|
|
|
|
|
public: |
9988
|
|
|
|
|
|
|
virtual void convert(tagged_lemma& tagged_lemma) const override; |
9989
|
|
|
|
|
|
|
virtual void convert_analyzed(vector& tagged_lemmas) const override; |
9990
|
|
|
|
|
|
|
virtual void convert_generated(vector& forms) const override; |
9991
|
|
|
|
|
|
|
|
9992
|
|
|
|
|
|
|
private: |
9993
|
|
|
|
|
|
|
inline void convert_tag(const string& lemma, string& tag) const; |
9994
|
|
|
|
|
|
|
inline bool convert_lemma(string& lemma) const; |
9995
|
|
|
|
|
|
|
}; |
9996
|
|
|
|
|
|
|
|
9997
|
|
|
|
|
|
|
} // namespace morphodita |
9998
|
|
|
|
|
|
|
|
9999
|
|
|
|
|
|
|
///////// |
10000
|
|
|
|
|
|
|
// File: morphodita/tagset_converter/pdt_to_conll2009_tagset_converter.cpp |
10001
|
|
|
|
|
|
|
///////// |
10002
|
|
|
|
|
|
|
|
10003
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
10004
|
|
|
|
|
|
|
// |
10005
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
10006
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
10007
|
|
|
|
|
|
|
// |
10008
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
10009
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
10010
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
10011
|
|
|
|
|
|
|
|
10012
|
|
|
|
|
|
|
namespace morphodita { |
10013
|
|
|
|
|
|
|
|
10014
|
|
|
|
|
|
|
static const char* names[15] = {"POS", "SubPOS", "Gen", "Num", "Cas", "PGe", "PNu", "Per", "Ten", "Gra", "Neg", "Voi", "", "", "Var"}; |
10015
|
|
|
|
|
|
|
|
10016
|
0
|
|
|
|
|
|
inline void pdt_to_conll2009_tagset_converter::convert_tag(const string& lemma, string& tag) const { |
10017
|
|
|
|
|
|
|
char pdt_tag[16]; |
10018
|
|
|
|
|
|
|
strncpy(pdt_tag, tag.c_str(), 15); |
10019
|
|
|
|
|
|
|
|
10020
|
|
|
|
|
|
|
// Clear the tag |
10021
|
|
|
|
|
|
|
tag.clear(); |
10022
|
|
|
|
|
|
|
|
10023
|
|
|
|
|
|
|
// Fill FEAT of filled tag characters |
10024
|
0
|
0
|
|
|
|
|
for (int i = 0; i < 15 && pdt_tag[i]; i++) |
|
|
0
|
|
|
|
|
|
10025
|
0
|
0
|
|
|
|
|
if (pdt_tag[i] != '-') { |
10026
|
0
|
0
|
|
|
|
|
if (!tag.empty()) tag.push_back('|'); |
10027
|
0
|
|
|
|
|
|
tag.append(names[i]); |
10028
|
0
|
|
|
|
|
|
tag.push_back('='); |
10029
|
0
|
|
|
|
|
|
tag.push_back(pdt_tag[i]); |
10030
|
|
|
|
|
|
|
} |
10031
|
|
|
|
|
|
|
|
10032
|
|
|
|
|
|
|
// Try adding Sem FEAT |
10033
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i + 2 < lemma.size(); i++) |
10034
|
0
|
0
|
|
|
|
|
if (lemma[i] == '_' && lemma[i + 1] == ';') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
10035
|
0
|
0
|
|
|
|
|
if (!tag.empty()) tag.push_back('|'); |
10036
|
0
|
|
|
|
|
|
tag.append("Sem="); |
10037
|
0
|
|
|
|
|
|
tag.push_back(lemma[i + 2]); |
10038
|
|
|
|
|
|
|
break; |
10039
|
|
|
|
|
|
|
} |
10040
|
0
|
|
|
|
|
|
} |
10041
|
|
|
|
|
|
|
|
10042
|
0
|
|
|
|
|
|
inline bool pdt_to_conll2009_tagset_converter::convert_lemma(string& lemma) const { |
10043
|
0
|
|
|
|
|
|
unsigned raw_lemma = czech_lemma_addinfo::raw_lemma_len(lemma); |
10044
|
0
|
0
|
|
|
|
|
return raw_lemma < lemma.size() ? (lemma.resize(raw_lemma), true) : false; |
10045
|
|
|
|
|
|
|
} |
10046
|
|
|
|
|
|
|
|
10047
|
0
|
|
|
|
|
|
void pdt_to_conll2009_tagset_converter::convert(tagged_lemma& tagged_lemma) const { |
10048
|
0
|
|
|
|
|
|
convert_tag(tagged_lemma.lemma, tagged_lemma.tag); |
10049
|
0
|
|
|
|
|
|
convert_lemma(tagged_lemma.lemma); |
10050
|
0
|
|
|
|
|
|
} |
10051
|
|
|
|
|
|
|
|
10052
|
0
|
|
|
|
|
|
void pdt_to_conll2009_tagset_converter::convert_analyzed(vector& tagged_lemmas) const { |
10053
|
|
|
|
|
|
|
bool lemma_changed = false; |
10054
|
|
|
|
|
|
|
|
10055
|
0
|
0
|
|
|
|
|
for (auto&& tagged_lemma : tagged_lemmas) { |
10056
|
0
|
|
|
|
|
|
convert_tag(tagged_lemma.lemma, tagged_lemma.tag); |
10057
|
0
|
|
|
|
|
|
lemma_changed |= convert_lemma(tagged_lemma.lemma); |
10058
|
|
|
|
|
|
|
} |
10059
|
|
|
|
|
|
|
|
10060
|
|
|
|
|
|
|
// If no lemma was changed or there is 1 analysis, no duplicates could be created. |
10061
|
0
|
0
|
|
|
|
|
if (!lemma_changed || tagged_lemmas.size() < 2) return; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
10062
|
|
|
|
|
|
|
|
10063
|
0
|
|
|
|
|
|
tagset_converter_unique_analyzed(tagged_lemmas); |
10064
|
|
|
|
|
|
|
} |
10065
|
|
|
|
|
|
|
|
10066
|
0
|
|
|
|
|
|
void pdt_to_conll2009_tagset_converter::convert_generated(vector& forms) const { |
10067
|
|
|
|
|
|
|
bool lemma_changed = false; |
10068
|
|
|
|
|
|
|
|
10069
|
0
|
0
|
|
|
|
|
for (auto&& tagged_lemma_forms : forms) { |
10070
|
0
|
0
|
|
|
|
|
for (auto&& tagged_form : tagged_lemma_forms.forms) |
10071
|
0
|
|
|
|
|
|
convert_tag(tagged_lemma_forms.lemma, tagged_form.tag); |
10072
|
0
|
|
|
|
|
|
lemma_changed |= convert_lemma(tagged_lemma_forms.lemma); |
10073
|
|
|
|
|
|
|
} |
10074
|
|
|
|
|
|
|
|
10075
|
|
|
|
|
|
|
// If no lemma was changed or there is 1 analysis, no duplicates could be created. |
10076
|
0
|
0
|
|
|
|
|
if (!lemma_changed || forms.size() < 2) return; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
10077
|
|
|
|
|
|
|
|
10078
|
0
|
|
|
|
|
|
tagset_converter_unique_generated(forms); |
10079
|
|
|
|
|
|
|
} |
10080
|
|
|
|
|
|
|
|
10081
|
|
|
|
|
|
|
} // namespace morphodita |
10082
|
|
|
|
|
|
|
|
10083
|
|
|
|
|
|
|
///////// |
10084
|
|
|
|
|
|
|
// File: morphodita/tagset_converter/strip_lemma_comment_tagset_converter.h |
10085
|
|
|
|
|
|
|
///////// |
10086
|
|
|
|
|
|
|
|
10087
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
10088
|
|
|
|
|
|
|
// |
10089
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
10090
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
10091
|
|
|
|
|
|
|
// |
10092
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
10093
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
10094
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
10095
|
|
|
|
|
|
|
|
10096
|
|
|
|
|
|
|
namespace morphodita { |
10097
|
|
|
|
|
|
|
|
10098
|
0
|
|
|
|
|
|
class strip_lemma_comment_tagset_converter : public tagset_converter { |
10099
|
|
|
|
|
|
|
public: |
10100
|
0
|
|
|
|
|
|
strip_lemma_comment_tagset_converter(const morpho& dictionary) : dictionary(dictionary) {} |
10101
|
|
|
|
|
|
|
|
10102
|
|
|
|
|
|
|
virtual void convert(tagged_lemma& tagged_lemma) const override; |
10103
|
|
|
|
|
|
|
virtual void convert_analyzed(vector& tagged_lemmas) const override; |
10104
|
|
|
|
|
|
|
virtual void convert_generated(vector& forms) const override; |
10105
|
|
|
|
|
|
|
|
10106
|
|
|
|
|
|
|
private: |
10107
|
|
|
|
|
|
|
inline bool convert_lemma(string& lemma) const; |
10108
|
|
|
|
|
|
|
const morpho& dictionary; |
10109
|
|
|
|
|
|
|
}; |
10110
|
|
|
|
|
|
|
|
10111
|
|
|
|
|
|
|
} // namespace morphodita |
10112
|
|
|
|
|
|
|
|
10113
|
|
|
|
|
|
|
///////// |
10114
|
|
|
|
|
|
|
// File: morphodita/tagset_converter/strip_lemma_comment_tagset_converter.cpp |
10115
|
|
|
|
|
|
|
///////// |
10116
|
|
|
|
|
|
|
|
10117
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
10118
|
|
|
|
|
|
|
// |
10119
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
10120
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
10121
|
|
|
|
|
|
|
// |
10122
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
10123
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
10124
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
10125
|
|
|
|
|
|
|
|
10126
|
|
|
|
|
|
|
namespace morphodita { |
10127
|
|
|
|
|
|
|
|
10128
|
0
|
|
|
|
|
|
inline bool strip_lemma_comment_tagset_converter::convert_lemma(string& lemma) const { |
10129
|
0
|
|
|
|
|
|
unsigned lemma_id_len = dictionary.lemma_id_len(lemma); |
10130
|
0
|
0
|
|
|
|
|
return lemma_id_len < lemma.size() ? (lemma.resize(lemma_id_len), true) : false; |
10131
|
|
|
|
|
|
|
} |
10132
|
|
|
|
|
|
|
|
10133
|
0
|
|
|
|
|
|
void strip_lemma_comment_tagset_converter::convert(tagged_lemma& tagged_lemma) const { |
10134
|
0
|
|
|
|
|
|
convert_lemma(tagged_lemma.lemma); |
10135
|
0
|
|
|
|
|
|
} |
10136
|
|
|
|
|
|
|
|
10137
|
0
|
|
|
|
|
|
void strip_lemma_comment_tagset_converter::convert_analyzed(vector& tagged_lemmas) const { |
10138
|
|
|
|
|
|
|
bool lemma_changed = false; |
10139
|
|
|
|
|
|
|
|
10140
|
0
|
0
|
|
|
|
|
for (auto&& tagged_lemma : tagged_lemmas) |
10141
|
0
|
|
|
|
|
|
lemma_changed |= convert_lemma(tagged_lemma.lemma); |
10142
|
|
|
|
|
|
|
|
10143
|
|
|
|
|
|
|
// If no lemma was changed or there is 1 analysis, no duplicates could be created. |
10144
|
0
|
0
|
|
|
|
|
if (!lemma_changed || tagged_lemmas.size() < 2) return; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
10145
|
|
|
|
|
|
|
|
10146
|
0
|
|
|
|
|
|
tagset_converter_unique_analyzed(tagged_lemmas); |
10147
|
|
|
|
|
|
|
} |
10148
|
|
|
|
|
|
|
|
10149
|
0
|
|
|
|
|
|
void strip_lemma_comment_tagset_converter::convert_generated(vector& forms) const { |
10150
|
|
|
|
|
|
|
bool lemma_changed = false; |
10151
|
|
|
|
|
|
|
|
10152
|
0
|
0
|
|
|
|
|
for (auto&& tagged_lemma_forms : forms) |
10153
|
0
|
|
|
|
|
|
lemma_changed |= convert_lemma(tagged_lemma_forms.lemma); |
10154
|
|
|
|
|
|
|
|
10155
|
|
|
|
|
|
|
// If no lemma was changed or there is 1 analysis, no duplicates could be created. |
10156
|
0
|
0
|
|
|
|
|
if (!lemma_changed || forms.size() < 2) return; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
10157
|
|
|
|
|
|
|
|
10158
|
0
|
|
|
|
|
|
tagset_converter_unique_generated(forms); |
10159
|
|
|
|
|
|
|
} |
10160
|
|
|
|
|
|
|
|
10161
|
|
|
|
|
|
|
} // namespace morphodita |
10162
|
|
|
|
|
|
|
|
10163
|
|
|
|
|
|
|
///////// |
10164
|
|
|
|
|
|
|
// File: morphodita/tagset_converter/strip_lemma_id_tagset_converter.h |
10165
|
|
|
|
|
|
|
///////// |
10166
|
|
|
|
|
|
|
|
10167
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
10168
|
|
|
|
|
|
|
// |
10169
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
10170
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
10171
|
|
|
|
|
|
|
// |
10172
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
10173
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
10174
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
10175
|
|
|
|
|
|
|
|
10176
|
|
|
|
|
|
|
namespace morphodita { |
10177
|
|
|
|
|
|
|
|
10178
|
0
|
|
|
|
|
|
class strip_lemma_id_tagset_converter : public tagset_converter { |
10179
|
|
|
|
|
|
|
public: |
10180
|
0
|
|
|
|
|
|
strip_lemma_id_tagset_converter(const morpho& dictionary) : dictionary(dictionary) {} |
10181
|
|
|
|
|
|
|
|
10182
|
|
|
|
|
|
|
virtual void convert(tagged_lemma& tagged_lemma) const override; |
10183
|
|
|
|
|
|
|
virtual void convert_analyzed(vector& tagged_lemmas) const override; |
10184
|
|
|
|
|
|
|
virtual void convert_generated(vector& forms) const override; |
10185
|
|
|
|
|
|
|
|
10186
|
|
|
|
|
|
|
private: |
10187
|
|
|
|
|
|
|
inline bool convert_lemma(string& lemma) const; |
10188
|
|
|
|
|
|
|
const morpho& dictionary; |
10189
|
|
|
|
|
|
|
}; |
10190
|
|
|
|
|
|
|
|
10191
|
|
|
|
|
|
|
} // namespace morphodita |
10192
|
|
|
|
|
|
|
|
10193
|
|
|
|
|
|
|
///////// |
10194
|
|
|
|
|
|
|
// File: morphodita/tagset_converter/strip_lemma_id_tagset_converter.cpp |
10195
|
|
|
|
|
|
|
///////// |
10196
|
|
|
|
|
|
|
|
10197
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
10198
|
|
|
|
|
|
|
// |
10199
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
10200
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
10201
|
|
|
|
|
|
|
// |
10202
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
10203
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
10204
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
10205
|
|
|
|
|
|
|
|
10206
|
|
|
|
|
|
|
namespace morphodita { |
10207
|
|
|
|
|
|
|
|
10208
|
0
|
|
|
|
|
|
inline bool strip_lemma_id_tagset_converter::convert_lemma(string& lemma) const { |
10209
|
0
|
|
|
|
|
|
unsigned raw_lemma_len = dictionary.raw_lemma_len(lemma); |
10210
|
0
|
0
|
|
|
|
|
return raw_lemma_len < lemma.size() ? (lemma.resize(raw_lemma_len), true) : false; |
10211
|
|
|
|
|
|
|
} |
10212
|
|
|
|
|
|
|
|
10213
|
0
|
|
|
|
|
|
void strip_lemma_id_tagset_converter::convert(tagged_lemma& tagged_lemma) const { |
10214
|
0
|
|
|
|
|
|
convert_lemma(tagged_lemma.lemma); |
10215
|
0
|
|
|
|
|
|
} |
10216
|
|
|
|
|
|
|
|
10217
|
0
|
|
|
|
|
|
void strip_lemma_id_tagset_converter::convert_analyzed(vector& tagged_lemmas) const { |
10218
|
|
|
|
|
|
|
bool lemma_changed = false; |
10219
|
|
|
|
|
|
|
|
10220
|
0
|
0
|
|
|
|
|
for (auto&& tagged_lemma : tagged_lemmas) |
10221
|
0
|
|
|
|
|
|
lemma_changed |= convert_lemma(tagged_lemma.lemma); |
10222
|
|
|
|
|
|
|
|
10223
|
|
|
|
|
|
|
// If no lemma was changed or there is 1 analysis, no duplicates could be created. |
10224
|
0
|
0
|
|
|
|
|
if (!lemma_changed || tagged_lemmas.size() < 2) return; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
10225
|
|
|
|
|
|
|
|
10226
|
0
|
|
|
|
|
|
tagset_converter_unique_analyzed(tagged_lemmas); |
10227
|
|
|
|
|
|
|
} |
10228
|
|
|
|
|
|
|
|
10229
|
0
|
|
|
|
|
|
void strip_lemma_id_tagset_converter::convert_generated(vector& forms) const { |
10230
|
|
|
|
|
|
|
bool lemma_changed = false; |
10231
|
|
|
|
|
|
|
|
10232
|
0
|
0
|
|
|
|
|
for (auto&& tagged_lemma_forms : forms) |
10233
|
0
|
|
|
|
|
|
lemma_changed |= convert_lemma(tagged_lemma_forms.lemma); |
10234
|
|
|
|
|
|
|
|
10235
|
|
|
|
|
|
|
// If no lemma was changed or there is 1 analysis, no duplicates could be created. |
10236
|
0
|
0
|
|
|
|
|
if (!lemma_changed || forms.size() < 2) return; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
10237
|
|
|
|
|
|
|
|
10238
|
0
|
|
|
|
|
|
tagset_converter_unique_generated(forms); |
10239
|
|
|
|
|
|
|
} |
10240
|
|
|
|
|
|
|
|
10241
|
|
|
|
|
|
|
} // namespace morphodita |
10242
|
|
|
|
|
|
|
|
10243
|
|
|
|
|
|
|
///////// |
10244
|
|
|
|
|
|
|
// File: morphodita/tagset_converter/tagset_converter.cpp |
10245
|
|
|
|
|
|
|
///////// |
10246
|
|
|
|
|
|
|
|
10247
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
10248
|
|
|
|
|
|
|
// |
10249
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
10250
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
10251
|
|
|
|
|
|
|
// |
10252
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
10253
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
10254
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
10255
|
|
|
|
|
|
|
|
10256
|
|
|
|
|
|
|
namespace morphodita { |
10257
|
|
|
|
|
|
|
|
10258
|
0
|
|
|
|
|
|
tagset_converter* tagset_converter::new_identity_converter() { |
10259
|
0
|
|
|
|
|
|
return new identity_tagset_converter(); |
10260
|
|
|
|
|
|
|
} |
10261
|
|
|
|
|
|
|
|
10262
|
0
|
|
|
|
|
|
tagset_converter* tagset_converter::new_pdt_to_conll2009_converter() { |
10263
|
0
|
|
|
|
|
|
return new pdt_to_conll2009_tagset_converter(); |
10264
|
|
|
|
|
|
|
} |
10265
|
|
|
|
|
|
|
|
10266
|
0
|
|
|
|
|
|
tagset_converter* tagset_converter::new_strip_lemma_comment_converter(const morpho& dictionary) { |
10267
|
0
|
|
|
|
|
|
return new strip_lemma_comment_tagset_converter(dictionary); |
10268
|
|
|
|
|
|
|
} |
10269
|
|
|
|
|
|
|
|
10270
|
0
|
|
|
|
|
|
tagset_converter* tagset_converter::new_strip_lemma_id_converter(const morpho& dictionary) { |
10271
|
0
|
|
|
|
|
|
return new strip_lemma_id_tagset_converter(dictionary); |
10272
|
|
|
|
|
|
|
} |
10273
|
|
|
|
|
|
|
|
10274
|
0
|
|
|
|
|
|
tagset_converter* new_tagset_converter(const string& name, const morpho& dictionary) { |
10275
|
0
|
0
|
|
|
|
|
if (name == "pdt_to_conll2009") return tagset_converter::new_pdt_to_conll2009_converter(); |
10276
|
0
|
0
|
|
|
|
|
if (name == "strip_lemma_comment") return tagset_converter::new_strip_lemma_comment_converter(dictionary); |
10277
|
0
|
0
|
|
|
|
|
if (name == "strip_lemma_id") return tagset_converter::new_strip_lemma_id_converter(dictionary); |
10278
|
|
|
|
|
|
|
return nullptr; |
10279
|
|
|
|
|
|
|
} |
10280
|
|
|
|
|
|
|
|
10281
|
0
|
|
|
|
|
|
void tagset_converter_unique_analyzed(vector& tagged_lemmas) { |
10282
|
|
|
|
|
|
|
// Remove possible lemma-tag pair duplicates |
10283
|
|
|
|
|
|
|
struct tagged_lemma_comparator { |
10284
|
0
|
0
|
|
|
|
|
inline static bool eq(const tagged_lemma& a, const tagged_lemma& b) { return a.lemma == b.lemma && a.tag == b.tag; } |
|
|
0
|
|
|
|
|
|
10285
|
0
|
0
|
|
|
|
|
inline static bool lt(const tagged_lemma& a, const tagged_lemma& b) { int lemma_compare = a.lemma.compare(b.lemma); return lemma_compare < 0 || (lemma_compare == 0 && a.tag < b.tag); } |
10286
|
|
|
|
|
|
|
}; |
10287
|
|
|
|
|
|
|
|
10288
|
|
|
|
|
|
|
sort(tagged_lemmas.begin(), tagged_lemmas.end(), tagged_lemma_comparator::lt); |
10289
|
0
|
|
|
|
|
|
tagged_lemmas.resize(unique(tagged_lemmas.begin(), tagged_lemmas.end(), tagged_lemma_comparator::eq) - tagged_lemmas.begin()); |
10290
|
0
|
|
|
|
|
|
} |
10291
|
|
|
|
|
|
|
|
10292
|
0
|
|
|
|
|
|
void tagset_converter_unique_generated(vector& forms) { |
10293
|
|
|
|
|
|
|
// Regroup and if needed remove duplicate form-tag pairs for each lemma |
10294
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < forms.size(); i++) { |
10295
|
|
|
|
|
|
|
bool any_merged = false; |
10296
|
0
|
0
|
|
|
|
|
for (unsigned j = forms.size() - 1; j > i; j--) |
10297
|
0
|
0
|
|
|
|
|
if (forms[j].lemma == forms[i].lemma) { |
10298
|
|
|
|
|
|
|
// Same lemma was found. Merge form-tag pairs |
10299
|
0
|
0
|
|
|
|
|
for (auto&& tagged_form : forms[j].forms) |
10300
|
0
|
|
|
|
|
|
forms[i].forms.emplace_back(move(tagged_form)); |
10301
|
|
|
|
|
|
|
|
10302
|
|
|
|
|
|
|
// Remove lemma j by moving it to end and deleting |
10303
|
0
|
0
|
|
|
|
|
if (j < forms.size() - 1) { |
10304
|
0
|
|
|
|
|
|
forms[j].lemma.swap(forms[forms.size() - 1].lemma); |
10305
|
0
|
|
|
|
|
|
forms[j].forms.swap(forms[forms.size() - 1].forms); |
10306
|
|
|
|
|
|
|
} |
10307
|
|
|
|
|
|
|
forms.pop_back(); |
10308
|
|
|
|
|
|
|
any_merged = true; |
10309
|
|
|
|
|
|
|
} |
10310
|
|
|
|
|
|
|
|
10311
|
0
|
0
|
|
|
|
|
if (any_merged && forms[i].forms.size() > 1) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
10312
|
|
|
|
|
|
|
// Remove duplicate form-tag pairs |
10313
|
|
|
|
|
|
|
struct tagged_form_comparator { |
10314
|
0
|
0
|
|
|
|
|
inline static bool eq(const tagged_form& a, const tagged_form& b) { return a.tag == b.tag && a.form == b.form; } |
|
|
0
|
|
|
|
|
|
10315
|
0
|
0
|
|
|
|
|
inline static bool lt(const tagged_form& a, const tagged_form& b) { int tag_compare = a.tag.compare(b.tag); return tag_compare < 0 || (tag_compare == 0 && a.form < b.form); } |
10316
|
|
|
|
|
|
|
}; |
10317
|
|
|
|
|
|
|
|
10318
|
|
|
|
|
|
|
sort(forms[i].forms.begin(), forms[i].forms.end(), tagged_form_comparator::lt); |
10319
|
0
|
|
|
|
|
|
forms[i].forms.resize(unique(forms[i].forms.begin(), forms[i].forms.end(), tagged_form_comparator::eq) - forms[i].forms.begin()); |
10320
|
|
|
|
|
|
|
} |
10321
|
|
|
|
|
|
|
} |
10322
|
0
|
|
|
|
|
|
} |
10323
|
|
|
|
|
|
|
|
10324
|
|
|
|
|
|
|
} // namespace morphodita |
10325
|
|
|
|
|
|
|
|
10326
|
|
|
|
|
|
|
///////// |
10327
|
|
|
|
|
|
|
// File: morphodita/tokenizer/czech_tokenizer.cpp |
10328
|
|
|
|
|
|
|
///////// |
10329
|
|
|
|
|
|
|
|
10330
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
10331
|
|
|
|
|
|
|
// |
10332
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
10333
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
10334
|
|
|
|
|
|
|
// |
10335
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
10336
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
10337
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
10338
|
|
|
|
|
|
|
|
10339
|
|
|
|
|
|
|
namespace morphodita { |
10340
|
|
|
|
|
|
|
|
10341
|
|
|
|
|
|
|
static const char _czech_tokenizer_cond_offsets[] = { |
10342
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
10343
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 2, |
10344
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2 |
10345
|
|
|
|
|
|
|
}; |
10346
|
|
|
|
|
|
|
|
10347
|
|
|
|
|
|
|
static const char _czech_tokenizer_cond_lengths[] = { |
10348
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 2, |
10349
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
10350
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0 |
10351
|
|
|
|
|
|
|
}; |
10352
|
|
|
|
|
|
|
|
10353
|
|
|
|
|
|
|
static const short _czech_tokenizer_cond_keys[] = { |
10354
|
|
|
|
|
|
|
43u, 43u, 45u, 45u, 0 |
10355
|
|
|
|
|
|
|
}; |
10356
|
|
|
|
|
|
|
|
10357
|
|
|
|
|
|
|
static const char _czech_tokenizer_cond_spaces[] = { |
10358
|
|
|
|
|
|
|
1, 0, 0 |
10359
|
|
|
|
|
|
|
}; |
10360
|
|
|
|
|
|
|
|
10361
|
|
|
|
|
|
|
static const unsigned char _czech_tokenizer_key_offsets[] = { |
10362
|
|
|
|
|
|
|
0, 0, 17, 29, 43, 46, 51, 54, |
10363
|
|
|
|
|
|
|
89, 94, 98, 101, 105, 110, 111, 116, |
10364
|
|
|
|
|
|
|
117, 122, 136, 143, 148, 151, 163 |
10365
|
|
|
|
|
|
|
}; |
10366
|
|
|
|
|
|
|
|
10367
|
|
|
|
|
|
|
static const short _czech_tokenizer_trans_keys[] = { |
10368
|
|
|
|
|
|
|
13u, 32u, 34u, 40u, 91u, 96u, 123u, 129u, |
10369
|
|
|
|
|
|
|
133u, 135u, 147u, 150u, 162u, 9u, 10u, 65u, |
10370
|
|
|
|
|
|
|
90u, 34u, 40u, 91u, 96u, 123u, 129u, 133u, |
10371
|
|
|
|
|
|
|
135u, 150u, 162u, 65u, 90u, 13u, 32u, 34u, |
10372
|
|
|
|
|
|
|
39u, 41u, 59u, 93u, 125u, 139u, 141u, 147u, |
10373
|
|
|
|
|
|
|
161u, 9u, 10u, 159u, 48u, 57u, 43u, 45u, |
10374
|
|
|
|
|
|
|
159u, 48u, 57u, 159u, 48u, 57u, 9u, 10u, |
10375
|
|
|
|
|
|
|
13u, 32u, 33u, 44u, 46u, 47u, 63u, 129u, |
10376
|
|
|
|
|
|
|
131u, 135u, 142u, 147u, 157u, 159u, 160u, 301u, |
10377
|
|
|
|
|
|
|
557u, 811u, 1067u, 0u, 42u, 48u, 57u, 58u, |
10378
|
|
|
|
|
|
|
64u, 65u, 90u, 91u, 96u, 97u, 122u, 123u, |
10379
|
|
|
|
|
|
|
255u, 9u, 10u, 13u, 32u, 147u, 9u, 13u, |
10380
|
|
|
|
|
|
|
32u, 147u, 9u, 32u, 147u, 9u, 10u, 32u, |
10381
|
|
|
|
|
|
|
147u, 9u, 10u, 13u, 32u, 147u, 13u, 9u, |
10382
|
|
|
|
|
|
|
10u, 13u, 32u, 147u, 10u, 9u, 10u, 13u, |
10383
|
|
|
|
|
|
|
32u, 147u, 13u, 32u, 34u, 39u, 41u, 59u, |
10384
|
|
|
|
|
|
|
93u, 125u, 139u, 141u, 147u, 161u, 9u, 10u, |
10385
|
|
|
|
|
|
|
44u, 46u, 69u, 101u, 159u, 48u, 57u, 69u, |
10386
|
|
|
|
|
|
|
101u, 159u, 48u, 57u, 159u, 48u, 57u, 129u, |
10387
|
|
|
|
|
|
|
131u, 135u, 151u, 155u, 157u, 65u, 90u, 97u, |
10388
|
|
|
|
|
|
|
122u, 142u, 143u, 159u, 48u, 57u, 0 |
10389
|
|
|
|
|
|
|
}; |
10390
|
|
|
|
|
|
|
|
10391
|
|
|
|
|
|
|
static const char _czech_tokenizer_single_lengths[] = { |
10392
|
|
|
|
|
|
|
0, 13, 10, 12, 1, 3, 1, 21, |
10393
|
|
|
|
|
|
|
5, 4, 3, 4, 5, 1, 5, 1, |
10394
|
|
|
|
|
|
|
5, 12, 5, 3, 1, 6, 1 |
10395
|
|
|
|
|
|
|
}; |
10396
|
|
|
|
|
|
|
|
10397
|
|
|
|
|
|
|
static const char _czech_tokenizer_range_lengths[] = { |
10398
|
|
|
|
|
|
|
0, 2, 1, 1, 1, 1, 1, 7, |
10399
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
10400
|
|
|
|
|
|
|
0, 1, 1, 1, 1, 3, 1 |
10401
|
|
|
|
|
|
|
}; |
10402
|
|
|
|
|
|
|
|
10403
|
|
|
|
|
|
|
static const unsigned char _czech_tokenizer_index_offsets[] = { |
10404
|
|
|
|
|
|
|
0, 0, 16, 28, 42, 45, 50, 53, |
10405
|
|
|
|
|
|
|
82, 88, 93, 97, 102, 108, 110, 116, |
10406
|
|
|
|
|
|
|
118, 124, 138, 145, 150, 153, 163 |
10407
|
|
|
|
|
|
|
}; |
10408
|
|
|
|
|
|
|
|
10409
|
|
|
|
|
|
|
static const char _czech_tokenizer_indicies[] = { |
10410
|
|
|
|
|
|
|
1, 1, 2, 2, 2, 2, 2, 3, |
10411
|
|
|
|
|
|
|
2, 3, 1, 2, 2, 1, 3, 0, |
10412
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 3, 2, 3, |
10413
|
|
|
|
|
|
|
2, 2, 3, 0, 4, 4, 5, 5, |
10414
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 4, 5, |
10415
|
|
|
|
|
|
|
4, 0, 6, 6, 0, 7, 7, 8, |
10416
|
|
|
|
|
|
|
8, 0, 8, 8, 0, 10, 11, 12, |
10417
|
|
|
|
|
|
|
10, 13, 9, 13, 9, 13, 16, 16, |
10418
|
|
|
|
|
|
|
16, 16, 10, 16, 15, 13, 9, 17, |
10419
|
|
|
|
|
|
|
9, 17, 9, 15, 9, 16, 9, 16, |
10420
|
|
|
|
|
|
|
9, 14, 10, 19, 20, 10, 10, 18, |
10421
|
|
|
|
|
|
|
10, 21, 10, 10, 18, 10, 10, 10, |
10422
|
|
|
|
|
|
|
18, 10, 21, 10, 10, 18, 10, 22, |
10423
|
|
|
|
|
|
|
23, 10, 10, 18, 25, 24, 10, 22, |
10424
|
|
|
|
|
|
|
26, 10, 10, 18, 25, 24, 10, 23, |
10425
|
|
|
|
|
|
|
26, 10, 10, 18, 4, 4, 5, 5, |
10426
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 4, 5, |
10427
|
|
|
|
|
|
|
4, 27, 28, 28, 29, 29, 15, 15, |
10428
|
|
|
|
|
|
|
27, 29, 29, 6, 6, 27, 8, 8, |
10429
|
|
|
|
|
|
|
27, 16, 16, 16, 16, 16, 16, 16, |
10430
|
|
|
|
|
|
|
16, 16, 27, 15, 15, 27, 0 |
10431
|
|
|
|
|
|
|
}; |
10432
|
|
|
|
|
|
|
|
10433
|
|
|
|
|
|
|
static const char _czech_tokenizer_trans_targs[] = { |
10434
|
|
|
|
|
|
|
7, 1, 2, 7, 1, 3, 19, 6, |
10435
|
|
|
|
|
|
|
20, 7, 8, 12, 16, 17, 0, 18, |
10436
|
|
|
|
|
|
|
21, 22, 7, 9, 11, 10, 13, 14, |
10437
|
|
|
|
|
|
|
7, 7, 15, 7, 4, 5 |
10438
|
|
|
|
|
|
|
}; |
10439
|
|
|
|
|
|
|
|
10440
|
|
|
|
|
|
|
static const char _czech_tokenizer_trans_actions[] = { |
10441
|
|
|
|
|
|
|
1, 0, 0, 2, 3, 0, 4, 0, |
10442
|
|
|
|
|
|
|
0, 7, 0, 0, 0, 4, 0, 4, |
10443
|
|
|
|
|
|
|
0, 0, 8, 0, 0, 0, 0, 0, |
10444
|
|
|
|
|
|
|
9, 10, 0, 11, 0, 0 |
10445
|
|
|
|
|
|
|
}; |
10446
|
|
|
|
|
|
|
|
10447
|
|
|
|
|
|
|
static const char _czech_tokenizer_to_state_actions[] = { |
10448
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 5, |
10449
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
10450
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0 |
10451
|
|
|
|
|
|
|
}; |
10452
|
|
|
|
|
|
|
|
10453
|
|
|
|
|
|
|
static const char _czech_tokenizer_from_state_actions[] = { |
10454
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 6, |
10455
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
10456
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0 |
10457
|
|
|
|
|
|
|
}; |
10458
|
|
|
|
|
|
|
|
10459
|
|
|
|
|
|
|
static const unsigned char _czech_tokenizer_eof_trans[] = { |
10460
|
|
|
|
|
|
|
0, 1, 1, 1, 1, 1, 1, 0, |
10461
|
|
|
|
|
|
|
19, 19, 19, 19, 19, 25, 19, 25, |
10462
|
|
|
|
|
|
|
19, 28, 28, 28, 28, 28, 28 |
10463
|
|
|
|
|
|
|
}; |
10464
|
|
|
|
|
|
|
|
10465
|
|
|
|
|
|
|
static const int czech_tokenizer_start = 7; |
10466
|
|
|
|
|
|
|
|
10467
|
|
|
|
|
|
|
// The list of lower cased words that when preceding eos do not end sentence. |
10468
|
|
|
|
|
|
|
// Note: because of VS, we cannot list the abbreviations directly in UTF-8, |
10469
|
|
|
|
|
|
|
// because the compilation of utf-8 encoded sources fail on some locales |
10470
|
|
|
|
|
|
|
// (e.g., Japanese). |
10471
|
|
|
|
|
|
|
// perl -CS -ple 'use Encode;s/([^[:ascii:]])/join("", map {sprintf "\\%o", ord($_)} split(m@@, encode("utf-8", $1)))/ge' |
10472
|
|
|
|
|
|
|
// perl -CS -ple 'use Encode;s/\\([0-7]{3})\\([0-7]{3})/decode("utf-8", chr(oct($1)).chr(oct($2)))/ge' |
10473
|
327
|
100
|
|
|
|
|
const unordered_set czech_tokenizer::abbreviations_czech = { |
|
|
0
|
|
|
|
|
|
10474
|
|
|
|
|
|
|
// Titles |
10475
|
|
|
|
|
|
|
"prof", "csc", "drsc", "doc", "phd", "ph", "d", |
10476
|
|
|
|
|
|
|
"judr", "mddr", "mudr", "mvdr", "paeddr", "paedr", "phdr", "rndr", "rsdr", "dr", |
10477
|
|
|
|
|
|
|
"ing", "arch", "mgr", "bc", "mag", "mba", "bca", "mga", |
10478
|
|
|
|
|
|
|
"gen", "plk", "pplk", "npor", "por", "ppor", "kpt", "mjr", "sgt", "pls", "p", "s", |
10479
|
|
|
|
|
|
|
"p", "p\303\255", "fa", "fy", "mr", "mrs", "ms", "miss", "tr", "sv", |
10480
|
|
|
|
|
|
|
// Geographic names |
10481
|
|
|
|
|
|
|
"angl", "fr", "\304\215es", "ces", "\304\215s", "cs", "slov", "n\304\233m", "nem", "it", "pol", "ma\304\217", "mad", "rus", |
10482
|
|
|
|
|
|
|
"sev", "v\303\275ch", "vych", "ji\305\276", "jiz", "z\303\241p", "zap", |
10483
|
|
|
|
|
|
|
// Common abbrevs |
10484
|
|
|
|
|
|
|
"adr", "\304\215", "c", "eg", "ev", "g", "hod", "j", "kr", "m", "max", "min", "mj", "nap\305\231", "napr", |
10485
|
|
|
|
|
|
|
"okr", "pop\305\231", "popr", "pozn", "r", "\305\231", "red", "rep", "resp", "srov", "st", "st\305\231", "str", |
10486
|
|
|
|
|
|
|
"sv", "tel", "tj", "tzv", "\303\272", "u", "uh", "ul", "um", "zl", "zn", |
10487
|
|
|
|
|
|
|
}; |
10488
|
|
|
|
|
|
|
|
10489
|
315
|
100
|
|
|
|
|
const unordered_set czech_tokenizer::abbreviations_slovak = { |
|
|
0
|
|
|
|
|
|
10490
|
|
|
|
|
|
|
// Titles |
10491
|
|
|
|
|
|
|
"prof", "csc", "drsc", "doc", "phd", "ph", "d", |
10492
|
|
|
|
|
|
|
"judr", "mddr", "mudr", "mvdr", "paeddr", "paedr", "phdr", "rndr", "rsdr", "dr", |
10493
|
|
|
|
|
|
|
"ing", "arch", "mgr", "bc", "mag", "mba", "bca", "mga", |
10494
|
|
|
|
|
|
|
"gen", "plk", "pplk", "npor", "por", "ppor", "kpt", "mjr", "sgt", "pls", "p", "s", |
10495
|
|
|
|
|
|
|
"p", "p\303\255", "fa", "fy", "mr", "mrs", "ms", "miss", "tr", "sv", |
10496
|
|
|
|
|
|
|
// Geographic names |
10497
|
|
|
|
|
|
|
"angl", "fr", "\304\215es", "ces", "\304\215s", "cs", "slov", "nem", "it", "po\304\276", "pol", "ma\304\217", "mad", |
10498
|
|
|
|
|
|
|
"rus", "sev", "v\303\275ch", "vych", "ju\305\276", "juz", "z\303\241p", "zap", |
10499
|
|
|
|
|
|
|
// Common abbrevs |
10500
|
|
|
|
|
|
|
"adr", "\304\215", "c", "eg", "ev", "g", "hod", "j", "kr", "m", "max", "min", "mj", "napr", |
10501
|
|
|
|
|
|
|
"okr", "popr", "pozn", "r", "red", "rep", "resp", "srov", "st", "str", |
10502
|
|
|
|
|
|
|
"sv", "tel", "tj", "tzv", "\303\272", "u", "uh", "ul", "um", "zl", "zn", |
10503
|
|
|
|
|
|
|
}; |
10504
|
|
|
|
|
|
|
|
10505
|
0
|
|
|
|
|
|
czech_tokenizer::czech_tokenizer(tokenizer_language language, unsigned version, const morpho* m) |
10506
|
0
|
0
|
|
|
|
|
: ragel_tokenizer(version <= 1 ? 1 : 2), m(m) { |
|
|
0
|
|
|
|
|
|
10507
|
0
|
|
|
|
|
|
switch (language) { |
10508
|
|
|
|
|
|
|
case CZECH: |
10509
|
0
|
|
|
|
|
|
abbreviations = &abbreviations_czech; |
10510
|
0
|
|
|
|
|
|
break; |
10511
|
|
|
|
|
|
|
case SLOVAK: |
10512
|
0
|
|
|
|
|
|
abbreviations = &abbreviations_slovak; |
10513
|
0
|
|
|
|
|
|
break; |
10514
|
|
|
|
|
|
|
} |
10515
|
0
|
|
|
|
|
|
} |
10516
|
|
|
|
|
|
|
|
10517
|
0
|
|
|
|
|
|
void czech_tokenizer::merge_hyphenated(vector& tokens) { |
10518
|
|
|
|
|
|
|
using namespace unilib; |
10519
|
|
|
|
|
|
|
|
10520
|
0
|
0
|
|
|
|
|
if (!m) return; |
10521
|
0
|
0
|
|
|
|
|
if (tokens.empty() || chars[tokens.back().start].cat & ~unicode::L) return; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
10522
|
|
|
|
|
|
|
|
10523
|
|
|
|
|
|
|
unsigned matched_hyphens = 0; |
10524
|
0
|
0
|
|
|
|
|
for (unsigned hyphens = 1; hyphens <= 2; hyphens++) { |
10525
|
|
|
|
|
|
|
// Are the tokens a sequence of 'hyphens' hyphenated tokens? |
10526
|
0
|
0
|
|
|
|
|
if (tokens.size() < 2*hyphens + 1) break; |
10527
|
0
|
|
|
|
|
|
unsigned first_hyphen = tokens.size() - 2*hyphens; |
10528
|
0
|
0
|
|
|
|
|
if (tokens[first_hyphen].length != 1 || chars[tokens[first_hyphen].start].cat & ~unicode::P || |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
10529
|
0
|
0
|
|
|
|
|
tokens[first_hyphen].start + tokens[first_hyphen].length != tokens[first_hyphen + 1].start || |
10530
|
0
|
0
|
|
|
|
|
tokens[first_hyphen-1].start + tokens[first_hyphen-1].length != tokens[first_hyphen].start || |
|
|
0
|
|
|
|
|
|
10531
|
0
|
|
|
|
|
|
chars[tokens[first_hyphen-1].start].cat & ~unicode::L) |
10532
|
|
|
|
|
|
|
break; |
10533
|
|
|
|
|
|
|
|
10534
|
0
|
0
|
|
|
|
|
if (m->analyze(string_piece(chars[tokens[first_hyphen-1].start].str, chars[tokens.back().start + tokens.back().length].str - chars[tokens[first_hyphen-1].start].str), morpho::NO_GUESSER, lemmas) >= 0) |
10535
|
|
|
|
|
|
|
matched_hyphens = hyphens; |
10536
|
|
|
|
|
|
|
} |
10537
|
|
|
|
|
|
|
|
10538
|
0
|
0
|
|
|
|
|
if (matched_hyphens) { |
10539
|
0
|
|
|
|
|
|
unsigned first = tokens.size() - 2*matched_hyphens - 1; |
10540
|
0
|
|
|
|
|
|
tokens[first].length = tokens.back().start + tokens.back().length - tokens[first].start; |
10541
|
0
|
|
|
|
|
|
tokens.resize(first + 1); |
10542
|
|
|
|
|
|
|
} |
10543
|
|
|
|
|
|
|
} |
10544
|
|
|
|
|
|
|
|
10545
|
0
|
|
|
|
|
|
bool czech_tokenizer::next_sentence(vector& tokens) { |
10546
|
|
|
|
|
|
|
using namespace unilib; |
10547
|
|
|
|
|
|
|
|
10548
|
|
|
|
|
|
|
int cs, act; |
10549
|
|
|
|
|
|
|
size_t ts, te; |
10550
|
|
|
|
|
|
|
size_t whitespace = 0; // Suppress "may be uninitialized" warning |
10551
|
|
|
|
|
|
|
|
10552
|
0
|
0
|
|
|
|
|
while (tokenize_url_email(tokens)) |
10553
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) |
10554
|
|
|
|
|
|
|
return true; |
10555
|
|
|
|
|
|
|
|
10556
|
|
|
|
|
|
|
{ |
10557
|
|
|
|
|
|
|
cs = czech_tokenizer_start; |
10558
|
0
|
|
|
|
|
|
ts = 0; |
10559
|
|
|
|
|
|
|
te = 0; |
10560
|
|
|
|
|
|
|
act = 0; |
10561
|
|
|
|
|
|
|
} |
10562
|
|
|
|
|
|
|
|
10563
|
|
|
|
|
|
|
{ |
10564
|
|
|
|
|
|
|
int _klen; |
10565
|
|
|
|
|
|
|
const short *_keys; |
10566
|
|
|
|
|
|
|
int _trans; |
10567
|
|
|
|
|
|
|
short _widec; |
10568
|
|
|
|
|
|
|
|
10569
|
0
|
0
|
|
|
|
|
if ( ( current) == ( (chars.size() - 1)) ) |
10570
|
|
|
|
|
|
|
goto _test_eof; |
10571
|
|
|
|
|
|
|
if ( cs == 0 ) |
10572
|
|
|
|
|
|
|
goto _out; |
10573
|
|
|
|
|
|
|
_resume: |
10574
|
0
|
0
|
|
|
|
|
switch ( _czech_tokenizer_from_state_actions[cs] ) { |
10575
|
|
|
|
|
|
|
case 6: |
10576
|
0
|
|
|
|
|
|
{ts = ( current);} |
10577
|
0
|
|
|
|
|
|
break; |
10578
|
|
|
|
|
|
|
} |
10579
|
|
|
|
|
|
|
|
10580
|
0
|
|
|
|
|
|
_widec = ( ragel_char(chars[current])); |
10581
|
0
|
|
|
|
|
|
_klen = _czech_tokenizer_cond_lengths[cs]; |
10582
|
0
|
|
|
|
|
|
_keys = _czech_tokenizer_cond_keys + (_czech_tokenizer_cond_offsets[cs]*2); |
10583
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
10584
|
|
|
|
|
|
|
const short *_lower = _keys; |
10585
|
|
|
|
|
|
|
const short *_mid; |
10586
|
0
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
10587
|
|
|
|
|
|
|
while (1) { |
10588
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
10589
|
|
|
|
|
|
|
break; |
10590
|
|
|
|
|
|
|
|
10591
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
10592
|
0
|
0
|
|
|
|
|
if ( _widec < _mid[0] ) |
10593
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
10594
|
0
|
0
|
|
|
|
|
else if ( _widec > _mid[1] ) |
10595
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
10596
|
|
|
|
|
|
|
else { |
10597
|
0
|
|
|
|
|
|
switch ( _czech_tokenizer_cond_spaces[_czech_tokenizer_cond_offsets[cs] + ((_mid - _keys)>>1)] ) { |
10598
|
|
|
|
|
|
|
case 0: { |
10599
|
0
|
|
|
|
|
|
_widec = (short)(256u + (( ragel_char(chars[current])) - 0u)); |
10600
|
0
|
0
|
|
|
|
|
if ( |
10601
|
0
|
0
|
|
|
|
|
!current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256; |
|
|
0
|
|
|
|
|
|
10602
|
|
|
|
|
|
|
break; |
10603
|
|
|
|
|
|
|
} |
10604
|
|
|
|
|
|
|
case 1: { |
10605
|
0
|
|
|
|
|
|
_widec = (short)(768u + (( ragel_char(chars[current])) - 0u)); |
10606
|
0
|
0
|
|
|
|
|
if ( |
10607
|
0
|
0
|
|
|
|
|
!current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
10608
|
|
|
|
|
|
|
break; |
10609
|
|
|
|
|
|
|
} |
10610
|
|
|
|
|
|
|
} |
10611
|
|
|
|
|
|
|
break; |
10612
|
|
|
|
|
|
|
} |
10613
|
|
|
|
|
|
|
} |
10614
|
|
|
|
|
|
|
} |
10615
|
|
|
|
|
|
|
|
10616
|
0
|
|
|
|
|
|
_keys = _czech_tokenizer_trans_keys + _czech_tokenizer_key_offsets[cs]; |
10617
|
0
|
|
|
|
|
|
_trans = _czech_tokenizer_index_offsets[cs]; |
10618
|
|
|
|
|
|
|
|
10619
|
0
|
|
|
|
|
|
_klen = _czech_tokenizer_single_lengths[cs]; |
10620
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
10621
|
|
|
|
|
|
|
const short *_lower = _keys; |
10622
|
|
|
|
|
|
|
const short *_mid; |
10623
|
0
|
|
|
|
|
|
const short *_upper = _keys + _klen - 1; |
10624
|
|
|
|
|
|
|
while (1) { |
10625
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
10626
|
|
|
|
|
|
|
break; |
10627
|
|
|
|
|
|
|
|
10628
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
10629
|
0
|
0
|
|
|
|
|
if ( _widec < *_mid ) |
10630
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
10631
|
0
|
0
|
|
|
|
|
else if ( _widec > *_mid ) |
10632
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
10633
|
|
|
|
|
|
|
else { |
10634
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
10635
|
0
|
|
|
|
|
|
goto _match; |
10636
|
|
|
|
|
|
|
} |
10637
|
|
|
|
|
|
|
} |
10638
|
0
|
|
|
|
|
|
_keys += _klen; |
10639
|
0
|
|
|
|
|
|
_trans += _klen; |
10640
|
|
|
|
|
|
|
} |
10641
|
|
|
|
|
|
|
|
10642
|
0
|
|
|
|
|
|
_klen = _czech_tokenizer_range_lengths[cs]; |
10643
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
10644
|
|
|
|
|
|
|
const short *_lower = _keys; |
10645
|
|
|
|
|
|
|
const short *_mid; |
10646
|
0
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
10647
|
|
|
|
|
|
|
while (1) { |
10648
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
10649
|
|
|
|
|
|
|
break; |
10650
|
|
|
|
|
|
|
|
10651
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
10652
|
0
|
0
|
|
|
|
|
if ( _widec < _mid[0] ) |
10653
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
10654
|
0
|
0
|
|
|
|
|
else if ( _widec > _mid[1] ) |
10655
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
10656
|
|
|
|
|
|
|
else { |
10657
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
10658
|
0
|
|
|
|
|
|
goto _match; |
10659
|
|
|
|
|
|
|
} |
10660
|
|
|
|
|
|
|
} |
10661
|
0
|
|
|
|
|
|
_trans += _klen; |
10662
|
|
|
|
|
|
|
} |
10663
|
|
|
|
|
|
|
|
10664
|
|
|
|
|
|
|
_match: |
10665
|
0
|
|
|
|
|
|
_trans = _czech_tokenizer_indicies[_trans]; |
10666
|
|
|
|
|
|
|
_eof_trans: |
10667
|
0
|
|
|
|
|
|
cs = _czech_tokenizer_trans_targs[_trans]; |
10668
|
|
|
|
|
|
|
|
10669
|
0
|
0
|
|
|
|
|
if ( _czech_tokenizer_trans_actions[_trans] == 0 ) |
10670
|
|
|
|
|
|
|
goto _again; |
10671
|
|
|
|
|
|
|
|
10672
|
0
|
|
|
|
|
|
switch ( _czech_tokenizer_trans_actions[_trans] ) { |
10673
|
|
|
|
|
|
|
case 3: |
10674
|
0
|
|
|
|
|
|
{ whitespace = current; } |
10675
|
0
|
|
|
|
|
|
break; |
10676
|
|
|
|
|
|
|
case 4: |
10677
|
0
|
|
|
|
|
|
{te = ( current)+1;} |
10678
|
0
|
|
|
|
|
|
break; |
10679
|
|
|
|
|
|
|
case 7: |
10680
|
0
|
|
|
|
|
|
{te = ( current)+1;{ tokens.emplace_back(ts, te - ts); |
10681
|
0
|
|
|
|
|
|
merge_hyphenated(tokens); |
10682
|
0
|
|
|
|
|
|
current = te; |
10683
|
0
|
0
|
|
|
|
|
do |
10684
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
10685
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
10686
|
0
|
|
|
|
|
|
( current)--; |
10687
|
|
|
|
|
|
|
}} |
10688
|
0
|
|
|
|
|
|
break; |
10689
|
|
|
|
|
|
|
case 2: |
10690
|
0
|
|
|
|
|
|
{te = ( current)+1;{ |
10691
|
0
|
|
|
|
|
|
bool eos = is_eos(tokens, chars[ts].chr, abbreviations); |
10692
|
0
|
0
|
|
|
|
|
for (current = ts; current < whitespace; current++) |
10693
|
0
|
|
|
|
|
|
tokens.emplace_back(current, 1); |
10694
|
0
|
|
|
|
|
|
{( current) = (( whitespace))-1;} |
10695
|
0
|
0
|
|
|
|
|
if (eos) {( current)++; goto _out; } |
10696
|
|
|
|
|
|
|
}} |
10697
|
|
|
|
|
|
|
break; |
10698
|
|
|
|
|
|
|
case 10: |
10699
|
0
|
|
|
|
|
|
{te = ( current)+1;{ |
10700
|
0
|
0
|
|
|
|
|
if (!tokens.empty()) {( current)++; goto _out; } |
10701
|
0
|
|
|
|
|
|
current = te; |
10702
|
0
|
0
|
|
|
|
|
do |
10703
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
10704
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
10705
|
0
|
|
|
|
|
|
( current)--; |
10706
|
|
|
|
|
|
|
}} |
10707
|
0
|
|
|
|
|
|
break; |
10708
|
|
|
|
|
|
|
case 11: |
10709
|
0
|
|
|
|
|
|
{te = ( current);( current)--;{ tokens.emplace_back(ts, te - ts); |
10710
|
0
|
|
|
|
|
|
merge_hyphenated(tokens); |
10711
|
0
|
|
|
|
|
|
current = te; |
10712
|
0
|
0
|
|
|
|
|
do |
10713
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
10714
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
10715
|
0
|
|
|
|
|
|
( current)--; |
10716
|
|
|
|
|
|
|
}} |
10717
|
0
|
|
|
|
|
|
break; |
10718
|
|
|
|
|
|
|
case 8: |
10719
|
0
|
|
|
|
|
|
{te = ( current);( current)--;{ |
10720
|
0
|
|
|
|
|
|
current = te; |
10721
|
0
|
0
|
|
|
|
|
do |
10722
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
10723
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
10724
|
0
|
|
|
|
|
|
( current)--; |
10725
|
|
|
|
|
|
|
}} |
10726
|
0
|
|
|
|
|
|
break; |
10727
|
|
|
|
|
|
|
case 9: |
10728
|
0
|
|
|
|
|
|
{te = ( current);( current)--;{ |
10729
|
0
|
0
|
|
|
|
|
if (!tokens.empty()) {( current)++; goto _out; } |
10730
|
0
|
|
|
|
|
|
current = te; |
10731
|
0
|
0
|
|
|
|
|
do |
10732
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
10733
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
10734
|
0
|
|
|
|
|
|
( current)--; |
10735
|
|
|
|
|
|
|
}} |
10736
|
0
|
|
|
|
|
|
break; |
10737
|
|
|
|
|
|
|
case 1: |
10738
|
0
|
|
|
|
|
|
{{( current) = ((te))-1;}{ tokens.emplace_back(ts, te - ts); |
10739
|
0
|
|
|
|
|
|
merge_hyphenated(tokens); |
10740
|
0
|
|
|
|
|
|
current = te; |
10741
|
0
|
0
|
|
|
|
|
do |
10742
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
10743
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
10744
|
0
|
|
|
|
|
|
( current)--; |
10745
|
|
|
|
|
|
|
}} |
10746
|
0
|
|
|
|
|
|
break; |
10747
|
|
|
|
|
|
|
} |
10748
|
|
|
|
|
|
|
|
10749
|
|
|
|
|
|
|
_again: |
10750
|
0
|
0
|
|
|
|
|
switch ( _czech_tokenizer_to_state_actions[cs] ) { |
10751
|
|
|
|
|
|
|
case 5: |
10752
|
0
|
|
|
|
|
|
{ts = 0;} |
10753
|
0
|
|
|
|
|
|
break; |
10754
|
|
|
|
|
|
|
} |
10755
|
|
|
|
|
|
|
|
10756
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
10757
|
|
|
|
|
|
|
goto _out; |
10758
|
0
|
0
|
|
|
|
|
if ( ++( current) != ( (chars.size() - 1)) ) |
10759
|
|
|
|
|
|
|
goto _resume; |
10760
|
|
|
|
|
|
|
_test_eof: {} |
10761
|
0
|
0
|
|
|
|
|
if ( ( current) == ( (chars.size() - 1)) ) |
10762
|
|
|
|
|
|
|
{ |
10763
|
0
|
0
|
|
|
|
|
if ( _czech_tokenizer_eof_trans[cs] > 0 ) { |
10764
|
0
|
|
|
|
|
|
_trans = _czech_tokenizer_eof_trans[cs] - 1; |
10765
|
0
|
|
|
|
|
|
goto _eof_trans; |
10766
|
|
|
|
|
|
|
} |
10767
|
|
|
|
|
|
|
} |
10768
|
|
|
|
|
|
|
|
10769
|
|
|
|
|
|
|
_out: {} |
10770
|
|
|
|
|
|
|
} |
10771
|
|
|
|
|
|
|
|
10772
|
|
|
|
|
|
|
(void)act; // Suppress unused variable warning |
10773
|
|
|
|
|
|
|
|
10774
|
0
|
|
|
|
|
|
return !tokens.empty(); |
10775
|
|
|
|
|
|
|
} |
10776
|
|
|
|
|
|
|
|
10777
|
|
|
|
|
|
|
} // namespace morphodita |
10778
|
|
|
|
|
|
|
|
10779
|
|
|
|
|
|
|
///////// |
10780
|
|
|
|
|
|
|
// File: morphodita/tokenizer/english_tokenizer.cpp |
10781
|
|
|
|
|
|
|
///////// |
10782
|
|
|
|
|
|
|
|
10783
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
10784
|
|
|
|
|
|
|
// |
10785
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
10786
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
10787
|
|
|
|
|
|
|
// |
10788
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
10789
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
10790
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
10791
|
|
|
|
|
|
|
|
10792
|
|
|
|
|
|
|
namespace morphodita { |
10793
|
|
|
|
|
|
|
|
10794
|
|
|
|
|
|
|
// The list of lowercased words that when preceding eos do not end sentence. |
10795
|
348
|
100
|
|
|
|
|
const unordered_set english_tokenizer::abbreviations = { |
|
|
0
|
|
|
|
|
|
10796
|
|
|
|
|
|
|
// Titles |
10797
|
|
|
|
|
|
|
"adj", "adm", "adv", "assoc", "asst", "bart", "bldg", "brig", "bros", "capt", |
10798
|
|
|
|
|
|
|
"cmdr", "col", "comdr", "con", "corp", "cpl", "d", "dr", "dr", "drs", "ens", |
10799
|
|
|
|
|
|
|
"gen", "gov", "hon", "hosp", "hr", "insp", "lt", "mm", "mr", "mrs", "ms", |
10800
|
|
|
|
|
|
|
"maj", "messrs", "mlle", "mme", "mr", "mrs", "ms", "msgr", "op", "ord", |
10801
|
|
|
|
|
|
|
"pfc", "ph", "phd", "prof", "pvt", "rep", "reps", "res", "rev", "rt", "sen", |
10802
|
|
|
|
|
|
|
"sens", "sfc", "sgt", "sr", "st", "supt", "surg", "univ", |
10803
|
|
|
|
|
|
|
// Common abbrevs |
10804
|
|
|
|
|
|
|
"addr", "approx", "apr", "aug", "calif", "co", "corp", "dec", "def", "e", |
10805
|
|
|
|
|
|
|
"e.g", "eg", "feb", "fla", "ft", "gen", "gov", "hrs", "i.", "i.e", "ie", |
10806
|
|
|
|
|
|
|
"inc", "jan", "jr", "ltd", "mar", "max", "min", "mph", "mt", "n", "nov", |
10807
|
|
|
|
|
|
|
"oct", "ont", "pa", "pres", "rep", "rev", "s", "sec", "sen", "sep", "sept", |
10808
|
|
|
|
|
|
|
"sgt", "sr", "tel", "un", "univ", "v", "va", "vs", "w", "yrs", |
10809
|
|
|
|
|
|
|
}; |
10810
|
|
|
|
|
|
|
|
10811
|
|
|
|
|
|
|
static const char _english_tokenizer_split_token_key_offsets[] = { |
10812
|
|
|
|
|
|
|
0, 0, 16, 20, 22, 26, 28, 30, |
10813
|
|
|
|
|
|
|
32, 34, 36, 44, 46, 50, 52, 54, |
10814
|
|
|
|
|
|
|
56, 58, 60, 62, 64, 66, 68, 72, |
10815
|
|
|
|
|
|
|
74, 76, 78, 80, 82, 82 |
10816
|
|
|
|
|
|
|
}; |
10817
|
|
|
|
|
|
|
|
10818
|
|
|
|
|
|
|
static const unsigned char _english_tokenizer_split_token_trans_keys[] = { |
10819
|
|
|
|
|
|
|
65u, 68u, 69u, 76u, 77u, 78u, 83u, 84u, |
10820
|
|
|
|
|
|
|
97u, 100u, 101u, 108u, 109u, 110u, 115u, 116u, |
10821
|
|
|
|
|
|
|
78u, 84u, 110u, 116u, 78u, 110u, 65u, 79u, |
10822
|
|
|
|
|
|
|
97u, 111u, 87u, 119u, 71u, 103u, 84u, 116u, |
10823
|
|
|
|
|
|
|
79u, 111u, 39u, 161u, 77u, 82u, 86u, 89u, |
10824
|
|
|
|
|
|
|
109u, 114u, 118u, 121u, 77u, 109u, 69u, 73u, |
10825
|
|
|
|
|
|
|
101u, 105u, 76u, 108u, 39u, 161u, 68u, 100u, |
10826
|
|
|
|
|
|
|
76u, 108u, 39u, 161u, 69u, 101u, 82u, 114u, |
10827
|
|
|
|
|
|
|
79u, 111u, 77u, 109u, 39u, 79u, 111u, 161u, |
10828
|
|
|
|
|
|
|
78u, 110u, 78u, 110u, 78u, 110u, 65u, 97u, |
10829
|
|
|
|
|
|
|
67u, 99u, 0 |
10830
|
|
|
|
|
|
|
}; |
10831
|
|
|
|
|
|
|
|
10832
|
|
|
|
|
|
|
static const char _english_tokenizer_split_token_single_lengths[] = { |
10833
|
|
|
|
|
|
|
0, 16, 4, 2, 4, 2, 2, 2, |
10834
|
|
|
|
|
|
|
2, 2, 8, 2, 4, 2, 2, 2, |
10835
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 4, 2, |
10836
|
|
|
|
|
|
|
2, 2, 2, 2, 0, 0 |
10837
|
|
|
|
|
|
|
}; |
10838
|
|
|
|
|
|
|
|
10839
|
|
|
|
|
|
|
static const char _english_tokenizer_split_token_range_lengths[] = { |
10840
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
10841
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
10842
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
10843
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0 |
10844
|
|
|
|
|
|
|
}; |
10845
|
|
|
|
|
|
|
|
10846
|
|
|
|
|
|
|
static const unsigned char _english_tokenizer_split_token_index_offsets[] = { |
10847
|
|
|
|
|
|
|
0, 0, 17, 22, 25, 30, 33, 36, |
10848
|
|
|
|
|
|
|
39, 42, 45, 54, 57, 62, 65, 68, |
10849
|
|
|
|
|
|
|
71, 74, 77, 80, 83, 86, 89, 94, |
10850
|
|
|
|
|
|
|
97, 100, 103, 106, 109, 110 |
10851
|
|
|
|
|
|
|
}; |
10852
|
|
|
|
|
|
|
|
10853
|
|
|
|
|
|
|
static const char _english_tokenizer_split_token_indicies[] = { |
10854
|
|
|
|
|
|
|
0, 2, 3, 4, 2, 5, 2, 6, |
10855
|
|
|
|
|
|
|
0, 2, 3, 4, 2, 5, 2, 6, |
10856
|
|
|
|
|
|
|
1, 7, 8, 7, 8, 1, 9, 9, |
10857
|
|
|
|
|
|
|
1, 10, 11, 10, 11, 1, 12, 12, |
10858
|
|
|
|
|
|
|
1, 12, 12, 1, 13, 13, 1, 11, |
10859
|
|
|
|
|
|
|
11, 1, 14, 14, 1, 15, 2, 2, |
10860
|
|
|
|
|
|
|
16, 15, 2, 2, 16, 1, 17, 17, |
10861
|
|
|
|
|
|
|
1, 18, 11, 18, 11, 1, 12, 12, |
10862
|
|
|
|
|
|
|
1, 19, 19, 1, 12, 12, 1, 2, |
10863
|
|
|
|
|
|
|
2, 1, 20, 20, 1, 21, 21, 1, |
10864
|
|
|
|
|
|
|
22, 22, 1, 23, 23, 1, 12, 12, |
10865
|
|
|
|
|
|
|
1, 24, 25, 25, 24, 1, 14, 14, |
10866
|
|
|
|
|
|
|
1, 26, 26, 1, 27, 27, 1, 28, |
10867
|
|
|
|
|
|
|
28, 1, 12, 12, 1, 1, 1, 0 |
10868
|
|
|
|
|
|
|
}; |
10869
|
|
|
|
|
|
|
|
10870
|
|
|
|
|
|
|
static const char _english_tokenizer_split_token_trans_targs[] = { |
10871
|
|
|
|
|
|
|
2, 0, 9, 10, 16, 17, 22, 3, |
10872
|
|
|
|
|
|
|
7, 4, 5, 6, 28, 8, 29, 11, |
10873
|
|
|
|
|
|
|
14, 12, 13, 15, 18, 19, 20, 21, |
10874
|
|
|
|
|
|
|
23, 24, 25, 26, 27 |
10875
|
|
|
|
|
|
|
}; |
10876
|
|
|
|
|
|
|
|
10877
|
|
|
|
|
|
|
static const char _english_tokenizer_split_token_trans_actions[] = { |
10878
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 1, |
10879
|
|
|
|
|
|
|
1, 0, 0, 0, 0, 0, 2, 1, |
10880
|
|
|
|
|
|
|
1, 0, 0, 0, 1, 0, 0, 0, |
10881
|
|
|
|
|
|
|
0, 0, 1, 0, 0 |
10882
|
|
|
|
|
|
|
}; |
10883
|
|
|
|
|
|
|
|
10884
|
|
|
|
|
|
|
static const char _english_tokenizer_split_token_eof_actions[] = { |
10885
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
10886
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
10887
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
10888
|
|
|
|
|
|
|
0, 0, 0, 0, 3, 0 |
10889
|
|
|
|
|
|
|
}; |
10890
|
|
|
|
|
|
|
|
10891
|
|
|
|
|
|
|
static const int english_tokenizer_split_token_start = 1; |
10892
|
|
|
|
|
|
|
|
10893
|
0
|
|
|
|
|
|
void english_tokenizer::split_token(vector& tokens) { |
10894
|
0
|
0
|
|
|
|
|
if (tokens.empty() || chars[tokens.back().start].cat & ~unilib::unicode::L) return; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
10895
|
|
|
|
|
|
|
|
10896
|
0
|
|
|
|
|
|
size_t index = tokens.back().start, end = index + tokens.back().length; |
10897
|
|
|
|
|
|
|
int cs; |
10898
|
0
|
|
|
|
|
|
size_t split_mark = 0, split_len = 0; |
10899
|
|
|
|
|
|
|
|
10900
|
|
|
|
|
|
|
{ |
10901
|
|
|
|
|
|
|
cs = english_tokenizer_split_token_start; |
10902
|
|
|
|
|
|
|
} |
10903
|
|
|
|
|
|
|
|
10904
|
|
|
|
|
|
|
{ |
10905
|
|
|
|
|
|
|
int _klen; |
10906
|
|
|
|
|
|
|
const unsigned char *_keys; |
10907
|
|
|
|
|
|
|
int _trans; |
10908
|
|
|
|
|
|
|
|
10909
|
0
|
0
|
|
|
|
|
if ( ( index) == ( end) ) |
10910
|
|
|
|
|
|
|
goto _test_eof; |
10911
|
|
|
|
|
|
|
if ( cs == 0 ) |
10912
|
|
|
|
|
|
|
goto _out; |
10913
|
|
|
|
|
|
|
_resume: |
10914
|
0
|
|
|
|
|
|
_keys = _english_tokenizer_split_token_trans_keys + _english_tokenizer_split_token_key_offsets[cs]; |
10915
|
0
|
|
|
|
|
|
_trans = _english_tokenizer_split_token_index_offsets[cs]; |
10916
|
|
|
|
|
|
|
|
10917
|
0
|
|
|
|
|
|
_klen = _english_tokenizer_split_token_single_lengths[cs]; |
10918
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
10919
|
|
|
|
|
|
|
const unsigned char *_lower = _keys; |
10920
|
|
|
|
|
|
|
const unsigned char *_mid; |
10921
|
0
|
|
|
|
|
|
const unsigned char *_upper = _keys + _klen - 1; |
10922
|
|
|
|
|
|
|
while (1) { |
10923
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
10924
|
|
|
|
|
|
|
break; |
10925
|
|
|
|
|
|
|
|
10926
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
10927
|
0
|
0
|
|
|
|
|
if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) < *_mid ) |
10928
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
10929
|
0
|
0
|
|
|
|
|
else if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) > *_mid ) |
10930
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
10931
|
|
|
|
|
|
|
else { |
10932
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
10933
|
0
|
|
|
|
|
|
goto _match; |
10934
|
|
|
|
|
|
|
} |
10935
|
|
|
|
|
|
|
} |
10936
|
0
|
|
|
|
|
|
_keys += _klen; |
10937
|
0
|
|
|
|
|
|
_trans += _klen; |
10938
|
|
|
|
|
|
|
} |
10939
|
|
|
|
|
|
|
|
10940
|
0
|
|
|
|
|
|
_klen = _english_tokenizer_split_token_range_lengths[cs]; |
10941
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
10942
|
|
|
|
|
|
|
const unsigned char *_lower = _keys; |
10943
|
|
|
|
|
|
|
const unsigned char *_mid; |
10944
|
0
|
|
|
|
|
|
const unsigned char *_upper = _keys + (_klen<<1) - 2; |
10945
|
|
|
|
|
|
|
while (1) { |
10946
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
10947
|
|
|
|
|
|
|
break; |
10948
|
|
|
|
|
|
|
|
10949
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
10950
|
0
|
0
|
|
|
|
|
if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) < _mid[0] ) |
10951
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
10952
|
0
|
0
|
|
|
|
|
else if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) > _mid[1] ) |
10953
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
10954
|
|
|
|
|
|
|
else { |
10955
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
10956
|
0
|
|
|
|
|
|
goto _match; |
10957
|
|
|
|
|
|
|
} |
10958
|
|
|
|
|
|
|
} |
10959
|
0
|
|
|
|
|
|
_trans += _klen; |
10960
|
|
|
|
|
|
|
} |
10961
|
|
|
|
|
|
|
|
10962
|
|
|
|
|
|
|
_match: |
10963
|
0
|
|
|
|
|
|
_trans = _english_tokenizer_split_token_indicies[_trans]; |
10964
|
0
|
|
|
|
|
|
cs = _english_tokenizer_split_token_trans_targs[_trans]; |
10965
|
|
|
|
|
|
|
|
10966
|
0
|
0
|
|
|
|
|
if ( _english_tokenizer_split_token_trans_actions[_trans] == 0 ) |
10967
|
|
|
|
|
|
|
goto _again; |
10968
|
|
|
|
|
|
|
|
10969
|
0
|
|
|
|
|
|
switch ( _english_tokenizer_split_token_trans_actions[_trans] ) { |
10970
|
|
|
|
|
|
|
case 1: |
10971
|
0
|
|
|
|
|
|
{ split_mark = index - tokens.back().start + 1; } |
10972
|
0
|
|
|
|
|
|
break; |
10973
|
|
|
|
|
|
|
case 2: |
10974
|
0
|
|
|
|
|
|
{ split_mark = index - tokens.back().start + 1; } |
10975
|
0
|
|
|
|
|
|
{ split_len = split_mark; {( index)++; goto _out; } } |
10976
|
|
|
|
|
|
|
break; |
10977
|
|
|
|
|
|
|
} |
10978
|
|
|
|
|
|
|
|
10979
|
|
|
|
|
|
|
_again: |
10980
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
10981
|
|
|
|
|
|
|
goto _out; |
10982
|
0
|
0
|
|
|
|
|
if ( ++( index) != ( end) ) |
10983
|
|
|
|
|
|
|
goto _resume; |
10984
|
|
|
|
|
|
|
_test_eof: {} |
10985
|
0
|
0
|
|
|
|
|
if ( ( index) == ( end) ) |
10986
|
|
|
|
|
|
|
{ |
10987
|
0
|
0
|
|
|
|
|
switch ( _english_tokenizer_split_token_eof_actions[cs] ) { |
10988
|
|
|
|
|
|
|
case 3: |
10989
|
0
|
|
|
|
|
|
{ split_len = split_mark; {( index)++; goto _out; } } |
10990
|
|
|
|
|
|
|
break; |
10991
|
|
|
|
|
|
|
} |
10992
|
|
|
|
|
|
|
} |
10993
|
|
|
|
|
|
|
|
10994
|
|
|
|
|
|
|
_out: {} |
10995
|
|
|
|
|
|
|
} |
10996
|
|
|
|
|
|
|
|
10997
|
0
|
0
|
|
|
|
|
if (split_len && split_len < end) { |
10998
|
0
|
|
|
|
|
|
tokens.back().length -= split_len; |
10999
|
0
|
|
|
|
|
|
tokens.emplace_back(end - split_len, split_len); |
11000
|
|
|
|
|
|
|
} |
11001
|
|
|
|
|
|
|
} |
11002
|
|
|
|
|
|
|
|
11003
|
|
|
|
|
|
|
static const char _english_tokenizer_cond_offsets[] = { |
11004
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
11005
|
|
|
|
|
|
|
0, 0, 0, 2, 2, 2, 2, 2, |
11006
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 2, |
11007
|
|
|
|
|
|
|
2, 2, 2, 2, 2 |
11008
|
|
|
|
|
|
|
}; |
11009
|
|
|
|
|
|
|
|
11010
|
|
|
|
|
|
|
static const char _english_tokenizer_cond_lengths[] = { |
11011
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
11012
|
|
|
|
|
|
|
0, 0, 2, 0, 0, 0, 0, 0, |
11013
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
11014
|
|
|
|
|
|
|
0, 0, 0, 0, 0 |
11015
|
|
|
|
|
|
|
}; |
11016
|
|
|
|
|
|
|
|
11017
|
|
|
|
|
|
|
static const short _english_tokenizer_cond_keys[] = { |
11018
|
|
|
|
|
|
|
43u, 43u, 45u, 45u, 0 |
11019
|
|
|
|
|
|
|
}; |
11020
|
|
|
|
|
|
|
|
11021
|
|
|
|
|
|
|
static const char _english_tokenizer_cond_spaces[] = { |
11022
|
|
|
|
|
|
|
1, 0, 0 |
11023
|
|
|
|
|
|
|
}; |
11024
|
|
|
|
|
|
|
|
11025
|
|
|
|
|
|
|
static const unsigned char _english_tokenizer_key_offsets[] = { |
11026
|
|
|
|
|
|
|
0, 0, 17, 29, 43, 46, 49, 52, |
11027
|
|
|
|
|
|
|
55, 60, 63, 98, 103, 107, 110, 114, |
11028
|
|
|
|
|
|
|
119, 120, 125, 126, 131, 145, 152, 156, |
11029
|
|
|
|
|
|
|
161, 164, 179, 192, 206 |
11030
|
|
|
|
|
|
|
}; |
11031
|
|
|
|
|
|
|
|
11032
|
|
|
|
|
|
|
static const short _english_tokenizer_trans_keys[] = { |
11033
|
|
|
|
|
|
|
13u, 32u, 34u, 40u, 91u, 96u, 123u, 129u, |
11034
|
|
|
|
|
|
|
133u, 135u, 147u, 150u, 162u, 9u, 10u, 65u, |
11035
|
|
|
|
|
|
|
90u, 34u, 40u, 91u, 96u, 123u, 129u, 133u, |
11036
|
|
|
|
|
|
|
135u, 150u, 162u, 65u, 90u, 13u, 32u, 34u, |
11037
|
|
|
|
|
|
|
39u, 41u, 59u, 93u, 125u, 139u, 141u, 147u, |
11038
|
|
|
|
|
|
|
161u, 9u, 10u, 159u, 48u, 57u, 159u, 48u, |
11039
|
|
|
|
|
|
|
57u, 159u, 48u, 57u, 159u, 48u, 57u, 43u, |
11040
|
|
|
|
|
|
|
45u, 159u, 48u, 57u, 159u, 48u, 57u, 9u, |
11041
|
|
|
|
|
|
|
10u, 13u, 32u, 33u, 44u, 46u, 47u, 63u, |
11042
|
|
|
|
|
|
|
129u, 131u, 135u, 142u, 147u, 157u, 159u, 160u, |
11043
|
|
|
|
|
|
|
301u, 557u, 811u, 1067u, 0u, 42u, 48u, 57u, |
11044
|
|
|
|
|
|
|
58u, 64u, 65u, 90u, 91u, 96u, 97u, 122u, |
11045
|
|
|
|
|
|
|
123u, 255u, 9u, 10u, 13u, 32u, 147u, 9u, |
11046
|
|
|
|
|
|
|
13u, 32u, 147u, 9u, 32u, 147u, 9u, 10u, |
11047
|
|
|
|
|
|
|
32u, 147u, 9u, 10u, 13u, 32u, 147u, 13u, |
11048
|
|
|
|
|
|
|
9u, 10u, 13u, 32u, 147u, 10u, 9u, 10u, |
11049
|
|
|
|
|
|
|
13u, 32u, 147u, 13u, 32u, 34u, 39u, 41u, |
11050
|
|
|
|
|
|
|
59u, 93u, 125u, 139u, 141u, 147u, 161u, 9u, |
11051
|
|
|
|
|
|
|
10u, 44u, 46u, 69u, 101u, 159u, 48u, 57u, |
11052
|
|
|
|
|
|
|
44u, 46u, 69u, 101u, 69u, 101u, 159u, 48u, |
11053
|
|
|
|
|
|
|
57u, 159u, 48u, 57u, 39u, 45u, 129u, 131u, |
11054
|
|
|
|
|
|
|
135u, 151u, 155u, 157u, 161u, 65u, 90u, 97u, |
11055
|
|
|
|
|
|
|
122u, 142u, 143u, 45u, 129u, 131u, 135u, 151u, |
11056
|
|
|
|
|
|
|
155u, 157u, 65u, 90u, 97u, 122u, 142u, 143u, |
11057
|
|
|
|
|
|
|
39u, 129u, 131u, 135u, 151u, 155u, 157u, 161u, |
11058
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 142u, 143u, 159u, 48u, |
11059
|
|
|
|
|
|
|
57u, 0 |
11060
|
|
|
|
|
|
|
}; |
11061
|
|
|
|
|
|
|
|
11062
|
|
|
|
|
|
|
static const char _english_tokenizer_single_lengths[] = { |
11063
|
|
|
|
|
|
|
0, 13, 10, 12, 1, 1, 1, 1, |
11064
|
|
|
|
|
|
|
3, 1, 21, 5, 4, 3, 4, 5, |
11065
|
|
|
|
|
|
|
1, 5, 1, 5, 12, 5, 4, 3, |
11066
|
|
|
|
|
|
|
1, 9, 7, 8, 1 |
11067
|
|
|
|
|
|
|
}; |
11068
|
|
|
|
|
|
|
|
11069
|
|
|
|
|
|
|
static const char _english_tokenizer_range_lengths[] = { |
11070
|
|
|
|
|
|
|
0, 2, 1, 1, 1, 1, 1, 1, |
11071
|
|
|
|
|
|
|
1, 1, 7, 0, 0, 0, 0, 0, |
11072
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 1, 0, 1, |
11073
|
|
|
|
|
|
|
1, 3, 3, 3, 1 |
11074
|
|
|
|
|
|
|
}; |
11075
|
|
|
|
|
|
|
|
11076
|
|
|
|
|
|
|
static const unsigned char _english_tokenizer_index_offsets[] = { |
11077
|
|
|
|
|
|
|
0, 0, 16, 28, 42, 45, 48, 51, |
11078
|
|
|
|
|
|
|
54, 59, 62, 91, 97, 102, 106, 111, |
11079
|
|
|
|
|
|
|
117, 119, 125, 127, 133, 147, 154, 159, |
11080
|
|
|
|
|
|
|
164, 167, 180, 191, 203 |
11081
|
|
|
|
|
|
|
}; |
11082
|
|
|
|
|
|
|
|
11083
|
|
|
|
|
|
|
static const char _english_tokenizer_indicies[] = { |
11084
|
|
|
|
|
|
|
1, 1, 2, 2, 2, 2, 2, 3, |
11085
|
|
|
|
|
|
|
2, 3, 1, 2, 2, 1, 3, 0, |
11086
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 3, 2, 3, |
11087
|
|
|
|
|
|
|
2, 2, 3, 0, 4, 4, 5, 5, |
11088
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 4, 5, |
11089
|
|
|
|
|
|
|
4, 0, 6, 6, 0, 7, 7, 0, |
11090
|
|
|
|
|
|
|
8, 8, 0, 9, 9, 0, 10, 10, |
11091
|
|
|
|
|
|
|
11, 11, 0, 11, 11, 0, 13, 14, |
11092
|
|
|
|
|
|
|
15, 13, 16, 12, 16, 12, 16, 19, |
11093
|
|
|
|
|
|
|
19, 19, 19, 13, 19, 18, 16, 12, |
11094
|
|
|
|
|
|
|
20, 12, 20, 12, 18, 12, 19, 12, |
11095
|
|
|
|
|
|
|
19, 12, 17, 13, 22, 23, 13, 13, |
11096
|
|
|
|
|
|
|
21, 13, 24, 13, 13, 21, 13, 13, |
11097
|
|
|
|
|
|
|
13, 21, 13, 24, 13, 13, 21, 13, |
11098
|
|
|
|
|
|
|
25, 26, 13, 13, 21, 28, 27, 13, |
11099
|
|
|
|
|
|
|
25, 29, 13, 13, 21, 28, 27, 13, |
11100
|
|
|
|
|
|
|
26, 29, 13, 13, 21, 4, 4, 5, |
11101
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 5, 4, |
11102
|
|
|
|
|
|
|
5, 4, 30, 31, 32, 33, 33, 18, |
11103
|
|
|
|
|
|
|
18, 30, 31, 32, 33, 33, 30, 33, |
11104
|
|
|
|
|
|
|
33, 9, 9, 30, 11, 11, 30, 34, |
11105
|
|
|
|
|
|
|
35, 19, 19, 19, 19, 19, 19, 34, |
11106
|
|
|
|
|
|
|
19, 19, 19, 30, 35, 19, 19, 19, |
11107
|
|
|
|
|
|
|
19, 19, 19, 19, 19, 19, 30, 34, |
11108
|
|
|
|
|
|
|
19, 19, 19, 19, 19, 19, 34, 19, |
11109
|
|
|
|
|
|
|
19, 19, 30, 18, 18, 30, 0 |
11110
|
|
|
|
|
|
|
}; |
11111
|
|
|
|
|
|
|
|
11112
|
|
|
|
|
|
|
static const char _english_tokenizer_trans_targs[] = { |
11113
|
|
|
|
|
|
|
10, 1, 2, 10, 1, 3, 5, 6, |
11114
|
|
|
|
|
|
|
22, 23, 9, 24, 10, 11, 15, 19, |
11115
|
|
|
|
|
|
|
20, 0, 21, 25, 28, 10, 12, 14, |
11116
|
|
|
|
|
|
|
13, 16, 17, 10, 10, 18, 10, 4, |
11117
|
|
|
|
|
|
|
7, 8, 26, 27 |
11118
|
|
|
|
|
|
|
}; |
11119
|
|
|
|
|
|
|
|
11120
|
|
|
|
|
|
|
static const char _english_tokenizer_trans_actions[] = { |
11121
|
|
|
|
|
|
|
1, 0, 0, 2, 3, 0, 0, 0, |
11122
|
|
|
|
|
|
|
4, 4, 0, 0, 7, 0, 0, 0, |
11123
|
|
|
|
|
|
|
4, 0, 4, 0, 0, 8, 0, 0, |
11124
|
|
|
|
|
|
|
0, 0, 0, 9, 10, 0, 11, 0, |
11125
|
|
|
|
|
|
|
0, 0, 0, 0 |
11126
|
|
|
|
|
|
|
}; |
11127
|
|
|
|
|
|
|
|
11128
|
|
|
|
|
|
|
static const char _english_tokenizer_to_state_actions[] = { |
11129
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
11130
|
|
|
|
|
|
|
0, 0, 5, 0, 0, 0, 0, 0, |
11131
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
11132
|
|
|
|
|
|
|
0, 0, 0, 0, 0 |
11133
|
|
|
|
|
|
|
}; |
11134
|
|
|
|
|
|
|
|
11135
|
|
|
|
|
|
|
static const char _english_tokenizer_from_state_actions[] = { |
11136
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
11137
|
|
|
|
|
|
|
0, 0, 6, 0, 0, 0, 0, 0, |
11138
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
11139
|
|
|
|
|
|
|
0, 0, 0, 0, 0 |
11140
|
|
|
|
|
|
|
}; |
11141
|
|
|
|
|
|
|
|
11142
|
|
|
|
|
|
|
static const unsigned char _english_tokenizer_eof_trans[] = { |
11143
|
|
|
|
|
|
|
0, 1, 1, 1, 1, 1, 1, 1, |
11144
|
|
|
|
|
|
|
1, 1, 0, 22, 22, 22, 22, 22, |
11145
|
|
|
|
|
|
|
28, 22, 28, 22, 31, 31, 31, 31, |
11146
|
|
|
|
|
|
|
31, 31, 31, 31, 31 |
11147
|
|
|
|
|
|
|
}; |
11148
|
|
|
|
|
|
|
|
11149
|
|
|
|
|
|
|
static const int english_tokenizer_start = 10; |
11150
|
|
|
|
|
|
|
|
11151
|
0
|
0
|
|
|
|
|
english_tokenizer::english_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {} |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
11152
|
|
|
|
|
|
|
|
11153
|
0
|
|
|
|
|
|
bool english_tokenizer::next_sentence(vector& tokens) { |
11154
|
|
|
|
|
|
|
using namespace unilib; |
11155
|
|
|
|
|
|
|
|
11156
|
|
|
|
|
|
|
int cs, act; |
11157
|
|
|
|
|
|
|
size_t ts, te; |
11158
|
|
|
|
|
|
|
size_t whitespace = 0; // Suppress "may be uninitialized" warning |
11159
|
|
|
|
|
|
|
|
11160
|
0
|
0
|
|
|
|
|
while (tokenize_url_email(tokens)) |
11161
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) |
11162
|
|
|
|
|
|
|
return true; |
11163
|
|
|
|
|
|
|
|
11164
|
|
|
|
|
|
|
{ |
11165
|
|
|
|
|
|
|
cs = english_tokenizer_start; |
11166
|
0
|
|
|
|
|
|
ts = 0; |
11167
|
|
|
|
|
|
|
te = 0; |
11168
|
|
|
|
|
|
|
act = 0; |
11169
|
|
|
|
|
|
|
} |
11170
|
|
|
|
|
|
|
|
11171
|
|
|
|
|
|
|
{ |
11172
|
|
|
|
|
|
|
int _klen; |
11173
|
|
|
|
|
|
|
const short *_keys; |
11174
|
|
|
|
|
|
|
int _trans; |
11175
|
|
|
|
|
|
|
short _widec; |
11176
|
|
|
|
|
|
|
|
11177
|
0
|
0
|
|
|
|
|
if ( ( current) == ( (chars.size() - 1)) ) |
11178
|
|
|
|
|
|
|
goto _test_eof; |
11179
|
|
|
|
|
|
|
if ( cs == 0 ) |
11180
|
|
|
|
|
|
|
goto _out; |
11181
|
|
|
|
|
|
|
_resume: |
11182
|
0
|
0
|
|
|
|
|
switch ( _english_tokenizer_from_state_actions[cs] ) { |
11183
|
|
|
|
|
|
|
case 6: |
11184
|
0
|
|
|
|
|
|
{ts = ( current);} |
11185
|
0
|
|
|
|
|
|
break; |
11186
|
|
|
|
|
|
|
} |
11187
|
|
|
|
|
|
|
|
11188
|
0
|
|
|
|
|
|
_widec = ( ragel_char(chars[current])); |
11189
|
0
|
|
|
|
|
|
_klen = _english_tokenizer_cond_lengths[cs]; |
11190
|
0
|
|
|
|
|
|
_keys = _english_tokenizer_cond_keys + (_english_tokenizer_cond_offsets[cs]*2); |
11191
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
11192
|
|
|
|
|
|
|
const short *_lower = _keys; |
11193
|
|
|
|
|
|
|
const short *_mid; |
11194
|
0
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
11195
|
|
|
|
|
|
|
while (1) { |
11196
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
11197
|
|
|
|
|
|
|
break; |
11198
|
|
|
|
|
|
|
|
11199
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
11200
|
0
|
0
|
|
|
|
|
if ( _widec < _mid[0] ) |
11201
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
11202
|
0
|
0
|
|
|
|
|
else if ( _widec > _mid[1] ) |
11203
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
11204
|
|
|
|
|
|
|
else { |
11205
|
0
|
|
|
|
|
|
switch ( _english_tokenizer_cond_spaces[_english_tokenizer_cond_offsets[cs] + ((_mid - _keys)>>1)] ) { |
11206
|
|
|
|
|
|
|
case 0: { |
11207
|
0
|
|
|
|
|
|
_widec = (short)(256u + (( ragel_char(chars[current])) - 0u)); |
11208
|
0
|
0
|
|
|
|
|
if ( |
11209
|
0
|
0
|
|
|
|
|
!current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256; |
|
|
0
|
|
|
|
|
|
11210
|
|
|
|
|
|
|
break; |
11211
|
|
|
|
|
|
|
} |
11212
|
|
|
|
|
|
|
case 1: { |
11213
|
0
|
|
|
|
|
|
_widec = (short)(768u + (( ragel_char(chars[current])) - 0u)); |
11214
|
0
|
0
|
|
|
|
|
if ( |
11215
|
0
|
0
|
|
|
|
|
!current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
11216
|
|
|
|
|
|
|
break; |
11217
|
|
|
|
|
|
|
} |
11218
|
|
|
|
|
|
|
} |
11219
|
|
|
|
|
|
|
break; |
11220
|
|
|
|
|
|
|
} |
11221
|
|
|
|
|
|
|
} |
11222
|
|
|
|
|
|
|
} |
11223
|
|
|
|
|
|
|
|
11224
|
0
|
|
|
|
|
|
_keys = _english_tokenizer_trans_keys + _english_tokenizer_key_offsets[cs]; |
11225
|
0
|
|
|
|
|
|
_trans = _english_tokenizer_index_offsets[cs]; |
11226
|
|
|
|
|
|
|
|
11227
|
0
|
|
|
|
|
|
_klen = _english_tokenizer_single_lengths[cs]; |
11228
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
11229
|
|
|
|
|
|
|
const short *_lower = _keys; |
11230
|
|
|
|
|
|
|
const short *_mid; |
11231
|
0
|
|
|
|
|
|
const short *_upper = _keys + _klen - 1; |
11232
|
|
|
|
|
|
|
while (1) { |
11233
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
11234
|
|
|
|
|
|
|
break; |
11235
|
|
|
|
|
|
|
|
11236
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
11237
|
0
|
0
|
|
|
|
|
if ( _widec < *_mid ) |
11238
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
11239
|
0
|
0
|
|
|
|
|
else if ( _widec > *_mid ) |
11240
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
11241
|
|
|
|
|
|
|
else { |
11242
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
11243
|
0
|
|
|
|
|
|
goto _match; |
11244
|
|
|
|
|
|
|
} |
11245
|
|
|
|
|
|
|
} |
11246
|
0
|
|
|
|
|
|
_keys += _klen; |
11247
|
0
|
|
|
|
|
|
_trans += _klen; |
11248
|
|
|
|
|
|
|
} |
11249
|
|
|
|
|
|
|
|
11250
|
0
|
|
|
|
|
|
_klen = _english_tokenizer_range_lengths[cs]; |
11251
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
11252
|
|
|
|
|
|
|
const short *_lower = _keys; |
11253
|
|
|
|
|
|
|
const short *_mid; |
11254
|
0
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
11255
|
|
|
|
|
|
|
while (1) { |
11256
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
11257
|
|
|
|
|
|
|
break; |
11258
|
|
|
|
|
|
|
|
11259
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
11260
|
0
|
0
|
|
|
|
|
if ( _widec < _mid[0] ) |
11261
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
11262
|
0
|
0
|
|
|
|
|
else if ( _widec > _mid[1] ) |
11263
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
11264
|
|
|
|
|
|
|
else { |
11265
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
11266
|
0
|
|
|
|
|
|
goto _match; |
11267
|
|
|
|
|
|
|
} |
11268
|
|
|
|
|
|
|
} |
11269
|
0
|
|
|
|
|
|
_trans += _klen; |
11270
|
|
|
|
|
|
|
} |
11271
|
|
|
|
|
|
|
|
11272
|
|
|
|
|
|
|
_match: |
11273
|
0
|
|
|
|
|
|
_trans = _english_tokenizer_indicies[_trans]; |
11274
|
|
|
|
|
|
|
_eof_trans: |
11275
|
0
|
|
|
|
|
|
cs = _english_tokenizer_trans_targs[_trans]; |
11276
|
|
|
|
|
|
|
|
11277
|
0
|
0
|
|
|
|
|
if ( _english_tokenizer_trans_actions[_trans] == 0 ) |
11278
|
|
|
|
|
|
|
goto _again; |
11279
|
|
|
|
|
|
|
|
11280
|
0
|
|
|
|
|
|
switch ( _english_tokenizer_trans_actions[_trans] ) { |
11281
|
|
|
|
|
|
|
case 3: |
11282
|
0
|
|
|
|
|
|
{ whitespace = current; } |
11283
|
0
|
|
|
|
|
|
break; |
11284
|
|
|
|
|
|
|
case 4: |
11285
|
0
|
|
|
|
|
|
{te = ( current)+1;} |
11286
|
0
|
|
|
|
|
|
break; |
11287
|
|
|
|
|
|
|
case 7: |
11288
|
0
|
|
|
|
|
|
{te = ( current)+1;{ tokens.emplace_back(ts, te - ts); |
11289
|
0
|
|
|
|
|
|
split_token(tokens); |
11290
|
0
|
|
|
|
|
|
current = te; |
11291
|
0
|
0
|
|
|
|
|
do |
11292
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
11293
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
11294
|
0
|
|
|
|
|
|
( current)--; |
11295
|
|
|
|
|
|
|
}} |
11296
|
0
|
|
|
|
|
|
break; |
11297
|
|
|
|
|
|
|
case 2: |
11298
|
0
|
|
|
|
|
|
{te = ( current)+1;{ |
11299
|
0
|
|
|
|
|
|
bool eos = is_eos(tokens, chars[ts].chr, &abbreviations); |
11300
|
0
|
0
|
|
|
|
|
for (current = ts; current < whitespace; current++) |
11301
|
0
|
|
|
|
|
|
tokens.emplace_back(current, 1); |
11302
|
0
|
|
|
|
|
|
{( current) = (( whitespace))-1;} |
11303
|
0
|
0
|
|
|
|
|
if (eos) {( current)++; goto _out; } |
11304
|
|
|
|
|
|
|
}} |
11305
|
|
|
|
|
|
|
break; |
11306
|
|
|
|
|
|
|
case 10: |
11307
|
0
|
|
|
|
|
|
{te = ( current)+1;{ |
11308
|
0
|
0
|
|
|
|
|
if (!tokens.empty()) {( current)++; goto _out; } |
11309
|
0
|
|
|
|
|
|
current = te; |
11310
|
0
|
0
|
|
|
|
|
do |
11311
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
11312
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
11313
|
0
|
|
|
|
|
|
( current)--; |
11314
|
|
|
|
|
|
|
}} |
11315
|
0
|
|
|
|
|
|
break; |
11316
|
|
|
|
|
|
|
case 11: |
11317
|
0
|
|
|
|
|
|
{te = ( current);( current)--;{ tokens.emplace_back(ts, te - ts); |
11318
|
0
|
|
|
|
|
|
split_token(tokens); |
11319
|
0
|
|
|
|
|
|
current = te; |
11320
|
0
|
0
|
|
|
|
|
do |
11321
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
11322
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
11323
|
0
|
|
|
|
|
|
( current)--; |
11324
|
|
|
|
|
|
|
}} |
11325
|
0
|
|
|
|
|
|
break; |
11326
|
|
|
|
|
|
|
case 8: |
11327
|
0
|
|
|
|
|
|
{te = ( current);( current)--;{ |
11328
|
0
|
|
|
|
|
|
current = te; |
11329
|
0
|
0
|
|
|
|
|
do |
11330
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
11331
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
11332
|
0
|
|
|
|
|
|
( current)--; |
11333
|
|
|
|
|
|
|
}} |
11334
|
0
|
|
|
|
|
|
break; |
11335
|
|
|
|
|
|
|
case 9: |
11336
|
0
|
|
|
|
|
|
{te = ( current);( current)--;{ |
11337
|
0
|
0
|
|
|
|
|
if (!tokens.empty()) {( current)++; goto _out; } |
11338
|
0
|
|
|
|
|
|
current = te; |
11339
|
0
|
0
|
|
|
|
|
do |
11340
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
11341
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
11342
|
0
|
|
|
|
|
|
( current)--; |
11343
|
|
|
|
|
|
|
}} |
11344
|
0
|
|
|
|
|
|
break; |
11345
|
|
|
|
|
|
|
case 1: |
11346
|
0
|
|
|
|
|
|
{{( current) = ((te))-1;}{ tokens.emplace_back(ts, te - ts); |
11347
|
0
|
|
|
|
|
|
split_token(tokens); |
11348
|
0
|
|
|
|
|
|
current = te; |
11349
|
0
|
0
|
|
|
|
|
do |
11350
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
11351
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
11352
|
0
|
|
|
|
|
|
( current)--; |
11353
|
|
|
|
|
|
|
}} |
11354
|
0
|
|
|
|
|
|
break; |
11355
|
|
|
|
|
|
|
} |
11356
|
|
|
|
|
|
|
|
11357
|
|
|
|
|
|
|
_again: |
11358
|
0
|
0
|
|
|
|
|
switch ( _english_tokenizer_to_state_actions[cs] ) { |
11359
|
|
|
|
|
|
|
case 5: |
11360
|
0
|
|
|
|
|
|
{ts = 0;} |
11361
|
0
|
|
|
|
|
|
break; |
11362
|
|
|
|
|
|
|
} |
11363
|
|
|
|
|
|
|
|
11364
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
11365
|
|
|
|
|
|
|
goto _out; |
11366
|
0
|
0
|
|
|
|
|
if ( ++( current) != ( (chars.size() - 1)) ) |
11367
|
|
|
|
|
|
|
goto _resume; |
11368
|
|
|
|
|
|
|
_test_eof: {} |
11369
|
0
|
0
|
|
|
|
|
if ( ( current) == ( (chars.size() - 1)) ) |
11370
|
|
|
|
|
|
|
{ |
11371
|
0
|
0
|
|
|
|
|
if ( _english_tokenizer_eof_trans[cs] > 0 ) { |
11372
|
0
|
|
|
|
|
|
_trans = _english_tokenizer_eof_trans[cs] - 1; |
11373
|
0
|
|
|
|
|
|
goto _eof_trans; |
11374
|
|
|
|
|
|
|
} |
11375
|
|
|
|
|
|
|
} |
11376
|
|
|
|
|
|
|
|
11377
|
|
|
|
|
|
|
_out: {} |
11378
|
|
|
|
|
|
|
} |
11379
|
|
|
|
|
|
|
|
11380
|
|
|
|
|
|
|
(void)act; // Suppress unused variable warning |
11381
|
|
|
|
|
|
|
|
11382
|
0
|
|
|
|
|
|
return !tokens.empty(); |
11383
|
|
|
|
|
|
|
} |
11384
|
|
|
|
|
|
|
|
11385
|
|
|
|
|
|
|
} // namespace morphodita |
11386
|
|
|
|
|
|
|
|
11387
|
|
|
|
|
|
|
///////// |
11388
|
|
|
|
|
|
|
// File: morphodita/tokenizer/generic_tokenizer.cpp |
11389
|
|
|
|
|
|
|
///////// |
11390
|
|
|
|
|
|
|
|
11391
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
11392
|
|
|
|
|
|
|
// |
11393
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
11394
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
11395
|
|
|
|
|
|
|
// |
11396
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
11397
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
11398
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
11399
|
|
|
|
|
|
|
|
11400
|
|
|
|
|
|
|
namespace morphodita { |
11401
|
|
|
|
|
|
|
|
11402
|
|
|
|
|
|
|
static const char _generic_tokenizer_cond_offsets[] = { |
11403
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
11404
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 2, |
11405
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2 |
11406
|
|
|
|
|
|
|
}; |
11407
|
|
|
|
|
|
|
|
11408
|
|
|
|
|
|
|
static const char _generic_tokenizer_cond_lengths[] = { |
11409
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 2, |
11410
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
11411
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0 |
11412
|
|
|
|
|
|
|
}; |
11413
|
|
|
|
|
|
|
|
11414
|
|
|
|
|
|
|
static const short _generic_tokenizer_cond_keys[] = { |
11415
|
|
|
|
|
|
|
43u, 43u, 45u, 45u, 0 |
11416
|
|
|
|
|
|
|
}; |
11417
|
|
|
|
|
|
|
|
11418
|
|
|
|
|
|
|
static const char _generic_tokenizer_cond_spaces[] = { |
11419
|
|
|
|
|
|
|
1, 0, 0 |
11420
|
|
|
|
|
|
|
}; |
11421
|
|
|
|
|
|
|
|
11422
|
|
|
|
|
|
|
static const unsigned char _generic_tokenizer_key_offsets[] = { |
11423
|
|
|
|
|
|
|
0, 0, 17, 29, 43, 46, 51, 54, |
11424
|
|
|
|
|
|
|
89, 94, 98, 101, 105, 110, 111, 116, |
11425
|
|
|
|
|
|
|
117, 122, 136, 142, 147, 150, 162 |
11426
|
|
|
|
|
|
|
}; |
11427
|
|
|
|
|
|
|
|
11428
|
|
|
|
|
|
|
static const short _generic_tokenizer_trans_keys[] = { |
11429
|
|
|
|
|
|
|
13u, 32u, 34u, 40u, 91u, 96u, 123u, 129u, |
11430
|
|
|
|
|
|
|
133u, 135u, 147u, 150u, 162u, 9u, 10u, 65u, |
11431
|
|
|
|
|
|
|
90u, 34u, 40u, 91u, 96u, 123u, 129u, 133u, |
11432
|
|
|
|
|
|
|
135u, 150u, 162u, 65u, 90u, 13u, 32u, 34u, |
11433
|
|
|
|
|
|
|
39u, 41u, 59u, 93u, 125u, 139u, 141u, 147u, |
11434
|
|
|
|
|
|
|
161u, 9u, 10u, 159u, 48u, 57u, 43u, 45u, |
11435
|
|
|
|
|
|
|
159u, 48u, 57u, 159u, 48u, 57u, 9u, 10u, |
11436
|
|
|
|
|
|
|
13u, 32u, 33u, 44u, 46u, 47u, 63u, 129u, |
11437
|
|
|
|
|
|
|
131u, 135u, 142u, 147u, 157u, 159u, 160u, 301u, |
11438
|
|
|
|
|
|
|
557u, 811u, 1067u, 0u, 42u, 48u, 57u, 58u, |
11439
|
|
|
|
|
|
|
64u, 65u, 90u, 91u, 96u, 97u, 122u, 123u, |
11440
|
|
|
|
|
|
|
255u, 9u, 10u, 13u, 32u, 147u, 9u, 13u, |
11441
|
|
|
|
|
|
|
32u, 147u, 9u, 32u, 147u, 9u, 10u, 32u, |
11442
|
|
|
|
|
|
|
147u, 9u, 10u, 13u, 32u, 147u, 13u, 9u, |
11443
|
|
|
|
|
|
|
10u, 13u, 32u, 147u, 10u, 9u, 10u, 13u, |
11444
|
|
|
|
|
|
|
32u, 147u, 13u, 32u, 34u, 39u, 41u, 59u, |
11445
|
|
|
|
|
|
|
93u, 125u, 139u, 141u, 147u, 161u, 9u, 10u, |
11446
|
|
|
|
|
|
|
46u, 69u, 101u, 159u, 48u, 57u, 69u, 101u, |
11447
|
|
|
|
|
|
|
159u, 48u, 57u, 159u, 48u, 57u, 129u, 131u, |
11448
|
|
|
|
|
|
|
135u, 151u, 155u, 157u, 65u, 90u, 97u, 122u, |
11449
|
|
|
|
|
|
|
142u, 143u, 159u, 48u, 57u, 0 |
11450
|
|
|
|
|
|
|
}; |
11451
|
|
|
|
|
|
|
|
11452
|
|
|
|
|
|
|
static const char _generic_tokenizer_single_lengths[] = { |
11453
|
|
|
|
|
|
|
0, 13, 10, 12, 1, 3, 1, 21, |
11454
|
|
|
|
|
|
|
5, 4, 3, 4, 5, 1, 5, 1, |
11455
|
|
|
|
|
|
|
5, 12, 4, 3, 1, 6, 1 |
11456
|
|
|
|
|
|
|
}; |
11457
|
|
|
|
|
|
|
|
11458
|
|
|
|
|
|
|
static const char _generic_tokenizer_range_lengths[] = { |
11459
|
|
|
|
|
|
|
0, 2, 1, 1, 1, 1, 1, 7, |
11460
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
11461
|
|
|
|
|
|
|
0, 1, 1, 1, 1, 3, 1 |
11462
|
|
|
|
|
|
|
}; |
11463
|
|
|
|
|
|
|
|
11464
|
|
|
|
|
|
|
static const unsigned char _generic_tokenizer_index_offsets[] = { |
11465
|
|
|
|
|
|
|
0, 0, 16, 28, 42, 45, 50, 53, |
11466
|
|
|
|
|
|
|
82, 88, 93, 97, 102, 108, 110, 116, |
11467
|
|
|
|
|
|
|
118, 124, 138, 144, 149, 152, 162 |
11468
|
|
|
|
|
|
|
}; |
11469
|
|
|
|
|
|
|
|
11470
|
|
|
|
|
|
|
static const char _generic_tokenizer_indicies[] = { |
11471
|
|
|
|
|
|
|
1, 1, 2, 2, 2, 2, 2, 3, |
11472
|
|
|
|
|
|
|
2, 3, 1, 2, 2, 1, 3, 0, |
11473
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 3, 2, 3, |
11474
|
|
|
|
|
|
|
2, 2, 3, 0, 4, 4, 5, 5, |
11475
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 4, 5, |
11476
|
|
|
|
|
|
|
4, 0, 6, 6, 0, 7, 7, 8, |
11477
|
|
|
|
|
|
|
8, 0, 8, 8, 0, 10, 11, 12, |
11478
|
|
|
|
|
|
|
10, 13, 9, 13, 9, 13, 16, 16, |
11479
|
|
|
|
|
|
|
16, 16, 10, 16, 15, 13, 9, 17, |
11480
|
|
|
|
|
|
|
9, 17, 9, 15, 9, 16, 9, 16, |
11481
|
|
|
|
|
|
|
9, 14, 10, 19, 20, 10, 10, 18, |
11482
|
|
|
|
|
|
|
10, 21, 10, 10, 18, 10, 10, 10, |
11483
|
|
|
|
|
|
|
18, 10, 21, 10, 10, 18, 10, 22, |
11484
|
|
|
|
|
|
|
23, 10, 10, 18, 25, 24, 10, 22, |
11485
|
|
|
|
|
|
|
26, 10, 10, 18, 25, 24, 10, 23, |
11486
|
|
|
|
|
|
|
26, 10, 10, 18, 4, 4, 5, 5, |
11487
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 4, 5, |
11488
|
|
|
|
|
|
|
4, 27, 28, 29, 29, 15, 15, 27, |
11489
|
|
|
|
|
|
|
29, 29, 6, 6, 27, 8, 8, 27, |
11490
|
|
|
|
|
|
|
16, 16, 16, 16, 16, 16, 16, 16, |
11491
|
|
|
|
|
|
|
16, 27, 15, 15, 27, 0 |
11492
|
|
|
|
|
|
|
}; |
11493
|
|
|
|
|
|
|
|
11494
|
|
|
|
|
|
|
static const char _generic_tokenizer_trans_targs[] = { |
11495
|
|
|
|
|
|
|
7, 1, 2, 7, 1, 3, 19, 6, |
11496
|
|
|
|
|
|
|
20, 7, 8, 12, 16, 17, 0, 18, |
11497
|
|
|
|
|
|
|
21, 22, 7, 9, 11, 10, 13, 14, |
11498
|
|
|
|
|
|
|
7, 7, 15, 7, 4, 5 |
11499
|
|
|
|
|
|
|
}; |
11500
|
|
|
|
|
|
|
|
11501
|
|
|
|
|
|
|
static const char _generic_tokenizer_trans_actions[] = { |
11502
|
|
|
|
|
|
|
1, 0, 0, 2, 3, 0, 4, 0, |
11503
|
|
|
|
|
|
|
0, 7, 0, 0, 0, 4, 0, 4, |
11504
|
|
|
|
|
|
|
0, 0, 8, 0, 0, 0, 0, 0, |
11505
|
|
|
|
|
|
|
9, 10, 0, 11, 0, 0 |
11506
|
|
|
|
|
|
|
}; |
11507
|
|
|
|
|
|
|
|
11508
|
|
|
|
|
|
|
static const char _generic_tokenizer_to_state_actions[] = { |
11509
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 5, |
11510
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
11511
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0 |
11512
|
|
|
|
|
|
|
}; |
11513
|
|
|
|
|
|
|
|
11514
|
|
|
|
|
|
|
static const char _generic_tokenizer_from_state_actions[] = { |
11515
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 6, |
11516
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
11517
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0 |
11518
|
|
|
|
|
|
|
}; |
11519
|
|
|
|
|
|
|
|
11520
|
|
|
|
|
|
|
static const unsigned char _generic_tokenizer_eof_trans[] = { |
11521
|
|
|
|
|
|
|
0, 1, 1, 1, 1, 1, 1, 0, |
11522
|
|
|
|
|
|
|
19, 19, 19, 19, 19, 25, 19, 25, |
11523
|
|
|
|
|
|
|
19, 28, 28, 28, 28, 28, 28 |
11524
|
|
|
|
|
|
|
}; |
11525
|
|
|
|
|
|
|
|
11526
|
|
|
|
|
|
|
static const int generic_tokenizer_start = 7; |
11527
|
|
|
|
|
|
|
|
11528
|
3
|
50
|
|
|
|
|
generic_tokenizer::generic_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {} |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
11529
|
|
|
|
|
|
|
|
11530
|
4
|
|
|
|
|
|
bool generic_tokenizer::next_sentence(vector& tokens) { |
11531
|
|
|
|
|
|
|
using namespace unilib; |
11532
|
|
|
|
|
|
|
|
11533
|
|
|
|
|
|
|
int cs, act; |
11534
|
|
|
|
|
|
|
size_t ts, te; |
11535
|
|
|
|
|
|
|
size_t whitespace = 0; // Suppress "may be uninitialized" warning |
11536
|
|
|
|
|
|
|
|
11537
|
2
|
50
|
|
|
|
|
while (tokenize_url_email(tokens)) |
11538
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) |
11539
|
|
|
|
|
|
|
return true; |
11540
|
|
|
|
|
|
|
|
11541
|
|
|
|
|
|
|
{ |
11542
|
|
|
|
|
|
|
cs = generic_tokenizer_start; |
11543
|
2
|
|
|
|
|
|
ts = 0; |
11544
|
|
|
|
|
|
|
te = 0; |
11545
|
|
|
|
|
|
|
act = 0; |
11546
|
|
|
|
|
|
|
} |
11547
|
|
|
|
|
|
|
|
11548
|
|
|
|
|
|
|
{ |
11549
|
|
|
|
|
|
|
int _klen; |
11550
|
|
|
|
|
|
|
const short *_keys; |
11551
|
|
|
|
|
|
|
int _trans; |
11552
|
|
|
|
|
|
|
short _widec; |
11553
|
|
|
|
|
|
|
|
11554
|
2
|
50
|
|
|
|
|
if ( ( current) == ( (chars.size() - 1)) ) |
11555
|
|
|
|
|
|
|
goto _test_eof; |
11556
|
|
|
|
|
|
|
if ( cs == 0 ) |
11557
|
|
|
|
|
|
|
goto _out; |
11558
|
|
|
|
|
|
|
_resume: |
11559
|
37
|
100
|
|
|
|
|
switch ( _generic_tokenizer_from_state_actions[cs] ) { |
11560
|
|
|
|
|
|
|
case 6: |
11561
|
10
|
|
|
|
|
|
{ts = ( current);} |
11562
|
10
|
|
|
|
|
|
break; |
11563
|
|
|
|
|
|
|
} |
11564
|
|
|
|
|
|
|
|
11565
|
74
|
|
|
|
|
|
_widec = ( ragel_char(chars[current])); |
11566
|
37
|
|
|
|
|
|
_klen = _generic_tokenizer_cond_lengths[cs]; |
11567
|
37
|
|
|
|
|
|
_keys = _generic_tokenizer_cond_keys + (_generic_tokenizer_cond_offsets[cs]*2); |
11568
|
37
|
100
|
|
|
|
|
if ( _klen > 0 ) { |
11569
|
|
|
|
|
|
|
const short *_lower = _keys; |
11570
|
|
|
|
|
|
|
const short *_mid; |
11571
|
27
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
11572
|
|
|
|
|
|
|
while (1) { |
11573
|
27
|
100
|
|
|
|
|
if ( _upper < _lower ) |
11574
|
|
|
|
|
|
|
break; |
11575
|
|
|
|
|
|
|
|
11576
|
17
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
11577
|
17
|
100
|
|
|
|
|
if ( _widec < _mid[0] ) |
11578
|
3
|
|
|
|
|
|
_upper = _mid - 2; |
11579
|
14
|
50
|
|
|
|
|
else if ( _widec > _mid[1] ) |
11580
|
14
|
|
|
|
|
|
_lower = _mid + 2; |
11581
|
|
|
|
|
|
|
else { |
11582
|
0
|
|
|
|
|
|
switch ( _generic_tokenizer_cond_spaces[_generic_tokenizer_cond_offsets[cs] + ((_mid - _keys)>>1)] ) { |
11583
|
|
|
|
|
|
|
case 0: { |
11584
|
0
|
|
|
|
|
|
_widec = (short)(256u + (( ragel_char(chars[current])) - 0u)); |
11585
|
0
|
0
|
|
|
|
|
if ( |
11586
|
0
|
0
|
|
|
|
|
!current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256; |
|
|
0
|
|
|
|
|
|
11587
|
|
|
|
|
|
|
break; |
11588
|
|
|
|
|
|
|
} |
11589
|
|
|
|
|
|
|
case 1: { |
11590
|
0
|
|
|
|
|
|
_widec = (short)(768u + (( ragel_char(chars[current])) - 0u)); |
11591
|
0
|
0
|
|
|
|
|
if ( |
11592
|
0
|
0
|
|
|
|
|
!current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
11593
|
|
|
|
|
|
|
break; |
11594
|
|
|
|
|
|
|
} |
11595
|
|
|
|
|
|
|
} |
11596
|
|
|
|
|
|
|
break; |
11597
|
|
|
|
|
|
|
} |
11598
|
|
|
|
|
|
|
} |
11599
|
|
|
|
|
|
|
} |
11600
|
|
|
|
|
|
|
|
11601
|
37
|
|
|
|
|
|
_keys = _generic_tokenizer_trans_keys + _generic_tokenizer_key_offsets[cs]; |
11602
|
37
|
|
|
|
|
|
_trans = _generic_tokenizer_index_offsets[cs]; |
11603
|
|
|
|
|
|
|
|
11604
|
37
|
|
|
|
|
|
_klen = _generic_tokenizer_single_lengths[cs]; |
11605
|
37
|
50
|
|
|
|
|
if ( _klen > 0 ) { |
11606
|
|
|
|
|
|
|
const short *_lower = _keys; |
11607
|
|
|
|
|
|
|
const short *_mid; |
11608
|
139
|
|
|
|
|
|
const short *_upper = _keys + _klen - 1; |
11609
|
|
|
|
|
|
|
while (1) { |
11610
|
139
|
100
|
|
|
|
|
if ( _upper < _lower ) |
11611
|
|
|
|
|
|
|
break; |
11612
|
|
|
|
|
|
|
|
11613
|
112
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
11614
|
112
|
100
|
|
|
|
|
if ( _widec < *_mid ) |
11615
|
66
|
|
|
|
|
|
_upper = _mid - 1; |
11616
|
46
|
100
|
|
|
|
|
else if ( _widec > *_mid ) |
11617
|
36
|
|
|
|
|
|
_lower = _mid + 1; |
11618
|
|
|
|
|
|
|
else { |
11619
|
10
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
11620
|
10
|
|
|
|
|
|
goto _match; |
11621
|
|
|
|
|
|
|
} |
11622
|
|
|
|
|
|
|
} |
11623
|
27
|
|
|
|
|
|
_keys += _klen; |
11624
|
27
|
|
|
|
|
|
_trans += _klen; |
11625
|
|
|
|
|
|
|
} |
11626
|
|
|
|
|
|
|
|
11627
|
27
|
|
|
|
|
|
_klen = _generic_tokenizer_range_lengths[cs]; |
11628
|
27
|
100
|
|
|
|
|
if ( _klen > 0 ) { |
11629
|
|
|
|
|
|
|
const short *_lower = _keys; |
11630
|
|
|
|
|
|
|
const short *_mid; |
11631
|
37
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
11632
|
|
|
|
|
|
|
while (1) { |
11633
|
37
|
100
|
|
|
|
|
if ( _upper < _lower ) |
11634
|
|
|
|
|
|
|
break; |
11635
|
|
|
|
|
|
|
|
11636
|
32
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
11637
|
32
|
100
|
|
|
|
|
if ( _widec < _mid[0] ) |
11638
|
10
|
|
|
|
|
|
_upper = _mid - 2; |
11639
|
22
|
100
|
|
|
|
|
else if ( _widec > _mid[1] ) |
11640
|
3
|
|
|
|
|
|
_lower = _mid + 2; |
11641
|
|
|
|
|
|
|
else { |
11642
|
19
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
11643
|
19
|
|
|
|
|
|
goto _match; |
11644
|
|
|
|
|
|
|
} |
11645
|
|
|
|
|
|
|
} |
11646
|
5
|
|
|
|
|
|
_trans += _klen; |
11647
|
|
|
|
|
|
|
} |
11648
|
|
|
|
|
|
|
|
11649
|
|
|
|
|
|
|
_match: |
11650
|
37
|
|
|
|
|
|
_trans = _generic_tokenizer_indicies[_trans]; |
11651
|
|
|
|
|
|
|
_eof_trans: |
11652
|
39
|
|
|
|
|
|
cs = _generic_tokenizer_trans_targs[_trans]; |
11653
|
|
|
|
|
|
|
|
11654
|
39
|
100
|
|
|
|
|
if ( _generic_tokenizer_trans_actions[_trans] == 0 ) |
11655
|
|
|
|
|
|
|
goto _again; |
11656
|
|
|
|
|
|
|
|
11657
|
12
|
|
|
|
|
|
switch ( _generic_tokenizer_trans_actions[_trans] ) { |
11658
|
|
|
|
|
|
|
case 3: |
11659
|
0
|
|
|
|
|
|
{ whitespace = current; } |
11660
|
0
|
|
|
|
|
|
break; |
11661
|
|
|
|
|
|
|
case 4: |
11662
|
2
|
|
|
|
|
|
{te = ( current)+1;} |
11663
|
2
|
|
|
|
|
|
break; |
11664
|
|
|
|
|
|
|
case 7: |
11665
|
0
|
|
|
|
|
|
{te = ( current)+1;{ tokens.emplace_back(ts, te - ts); |
11666
|
0
|
|
|
|
|
|
current = te; |
11667
|
0
|
0
|
|
|
|
|
do |
11668
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
11669
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
11670
|
0
|
|
|
|
|
|
( current)--; |
11671
|
|
|
|
|
|
|
}} |
11672
|
0
|
|
|
|
|
|
break; |
11673
|
|
|
|
|
|
|
case 2: |
11674
|
0
|
|
|
|
|
|
{te = ( current)+1;{ |
11675
|
0
|
|
|
|
|
|
bool eos = is_eos(tokens, chars[ts].chr, nullptr); |
11676
|
0
|
0
|
|
|
|
|
for (current = ts; current < whitespace; current++) |
11677
|
0
|
|
|
|
|
|
tokens.emplace_back(current, 1); |
11678
|
0
|
|
|
|
|
|
{( current) = (( whitespace))-1;} |
11679
|
0
|
0
|
|
|
|
|
if (eos) {( current)++; goto _out; } |
11680
|
|
|
|
|
|
|
}} |
11681
|
|
|
|
|
|
|
break; |
11682
|
|
|
|
|
|
|
case 10: |
11683
|
0
|
|
|
|
|
|
{te = ( current)+1;{ |
11684
|
0
|
0
|
|
|
|
|
if (!tokens.empty()) {( current)++; goto _out; } |
11685
|
0
|
|
|
|
|
|
current = te; |
11686
|
0
|
0
|
|
|
|
|
do |
11687
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
11688
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
11689
|
0
|
|
|
|
|
|
( current)--; |
11690
|
|
|
|
|
|
|
}} |
11691
|
0
|
|
|
|
|
|
break; |
11692
|
|
|
|
|
|
|
case 11: |
11693
|
7
|
|
|
|
|
|
{te = ( current);( current)--;{ tokens.emplace_back(ts, te - ts); |
11694
|
7
|
|
|
|
|
|
current = te; |
11695
|
7
|
50
|
|
|
|
|
do |
11696
|
7
|
50
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
11697
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
11698
|
7
|
|
|
|
|
|
( current)--; |
11699
|
|
|
|
|
|
|
}} |
11700
|
7
|
|
|
|
|
|
break; |
11701
|
|
|
|
|
|
|
case 8: |
11702
|
3
|
|
|
|
|
|
{te = ( current);( current)--;{ |
11703
|
3
|
|
|
|
|
|
current = te; |
11704
|
3
|
50
|
|
|
|
|
do |
11705
|
3
|
50
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
11706
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
11707
|
3
|
|
|
|
|
|
( current)--; |
11708
|
|
|
|
|
|
|
}} |
11709
|
3
|
|
|
|
|
|
break; |
11710
|
|
|
|
|
|
|
case 9: |
11711
|
0
|
|
|
|
|
|
{te = ( current);( current)--;{ |
11712
|
0
|
0
|
|
|
|
|
if (!tokens.empty()) {( current)++; goto _out; } |
11713
|
0
|
|
|
|
|
|
current = te; |
11714
|
0
|
0
|
|
|
|
|
do |
11715
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
11716
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
11717
|
0
|
|
|
|
|
|
( current)--; |
11718
|
|
|
|
|
|
|
}} |
11719
|
0
|
|
|
|
|
|
break; |
11720
|
|
|
|
|
|
|
case 1: |
11721
|
0
|
|
|
|
|
|
{{( current) = ((te))-1;}{ tokens.emplace_back(ts, te - ts); |
11722
|
0
|
|
|
|
|
|
current = te; |
11723
|
0
|
0
|
|
|
|
|
do |
11724
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
11725
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
11726
|
0
|
|
|
|
|
|
( current)--; |
11727
|
|
|
|
|
|
|
}} |
11728
|
0
|
|
|
|
|
|
break; |
11729
|
|
|
|
|
|
|
} |
11730
|
|
|
|
|
|
|
|
11731
|
|
|
|
|
|
|
_again: |
11732
|
39
|
100
|
|
|
|
|
switch ( _generic_tokenizer_to_state_actions[cs] ) { |
11733
|
|
|
|
|
|
|
case 5: |
11734
|
10
|
|
|
|
|
|
{ts = 0;} |
11735
|
10
|
|
|
|
|
|
break; |
11736
|
|
|
|
|
|
|
} |
11737
|
|
|
|
|
|
|
|
11738
|
39
|
50
|
|
|
|
|
if ( cs == 0 ) |
11739
|
|
|
|
|
|
|
goto _out; |
11740
|
39
|
100
|
|
|
|
|
if ( ++( current) != ( (chars.size() - 1)) ) |
11741
|
|
|
|
|
|
|
goto _resume; |
11742
|
|
|
|
|
|
|
_test_eof: {} |
11743
|
4
|
50
|
|
|
|
|
if ( ( current) == ( (chars.size() - 1)) ) |
11744
|
|
|
|
|
|
|
{ |
11745
|
4
|
100
|
|
|
|
|
if ( _generic_tokenizer_eof_trans[cs] > 0 ) { |
11746
|
2
|
|
|
|
|
|
_trans = _generic_tokenizer_eof_trans[cs] - 1; |
11747
|
2
|
|
|
|
|
|
goto _eof_trans; |
11748
|
|
|
|
|
|
|
} |
11749
|
|
|
|
|
|
|
} |
11750
|
|
|
|
|
|
|
|
11751
|
|
|
|
|
|
|
_out: {} |
11752
|
|
|
|
|
|
|
} |
11753
|
|
|
|
|
|
|
|
11754
|
|
|
|
|
|
|
(void)act; // Suppress unused variable warning |
11755
|
|
|
|
|
|
|
|
11756
|
2
|
|
|
|
|
|
return !tokens.empty(); |
11757
|
|
|
|
|
|
|
} |
11758
|
|
|
|
|
|
|
|
11759
|
|
|
|
|
|
|
} // namespace morphodita |
11760
|
|
|
|
|
|
|
|
11761
|
|
|
|
|
|
|
///////// |
11762
|
|
|
|
|
|
|
// File: morphodita/tokenizer/ragel_tokenizer.cpp |
11763
|
|
|
|
|
|
|
///////// |
11764
|
|
|
|
|
|
|
|
11765
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
11766
|
|
|
|
|
|
|
// |
11767
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
11768
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
11769
|
|
|
|
|
|
|
// |
11770
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
11771
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
11772
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
11773
|
|
|
|
|
|
|
|
11774
|
|
|
|
|
|
|
namespace morphodita { |
11775
|
|
|
|
|
|
|
|
11776
|
|
|
|
|
|
|
static const char _ragel_url_email_cond_offsets[] = { |
11777
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
11778
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 1, 1, 1, |
11779
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
11780
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
11781
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
11782
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
11783
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
11784
|
|
|
|
|
|
|
1, 1, 1, 2, 3, 3, 4, 5, |
11785
|
|
|
|
|
|
|
6, 7, 8, 9, 10, 11, 12, 13, |
11786
|
|
|
|
|
|
|
14, 15, 16 |
11787
|
|
|
|
|
|
|
}; |
11788
|
|
|
|
|
|
|
|
11789
|
|
|
|
|
|
|
static const char _ragel_url_email_cond_lengths[] = { |
11790
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
11791
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 0, 0, 0, |
11792
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
11793
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
11794
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
11795
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
11796
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
11797
|
|
|
|
|
|
|
0, 0, 1, 1, 0, 1, 1, 1, |
11798
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
11799
|
|
|
|
|
|
|
1, 1, 1 |
11800
|
|
|
|
|
|
|
}; |
11801
|
|
|
|
|
|
|
|
11802
|
|
|
|
|
|
|
static const short _ragel_url_email_cond_keys[] = { |
11803
|
|
|
|
|
|
|
41u, 41u, 47u, 47u, 47u, 47u, 41u, 41u, |
11804
|
|
|
|
|
|
|
47u, 47u, 47u, 47u, 47u, 47u, 47u, 47u, |
11805
|
|
|
|
|
|
|
47u, 47u, 47u, 47u, 47u, 47u, 47u, 47u, |
11806
|
|
|
|
|
|
|
47u, 47u, 47u, 47u, 47u, 47u, 47u, 47u, |
11807
|
|
|
|
|
|
|
47u, 47u, 0 |
11808
|
|
|
|
|
|
|
}; |
11809
|
|
|
|
|
|
|
|
11810
|
|
|
|
|
|
|
static const char _ragel_url_email_cond_spaces[] = { |
11811
|
|
|
|
|
|
|
1, 0, 0, 1, 0, 0, 0, 0, |
11812
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
11813
|
|
|
|
|
|
|
0, 0 |
11814
|
|
|
|
|
|
|
}; |
11815
|
|
|
|
|
|
|
|
11816
|
|
|
|
|
|
|
static const short _ragel_url_email_key_offsets[] = { |
11817
|
|
|
|
|
|
|
0, 0, 15, 29, 41, 54, 63, 71, |
11818
|
|
|
|
|
|
|
78, 86, 92, 100, 117, 145, 154, 162, |
11819
|
|
|
|
|
|
|
171, 179, 188, 196, 204, 215, 225, 233, |
11820
|
|
|
|
|
|
|
241, 252, 262, 270, 278, 289, 299, 315, |
11821
|
|
|
|
|
|
|
330, 346, 360, 376, 393, 409, 426, 442, |
11822
|
|
|
|
|
|
|
459, 475, 491, 510, 528, 544, 560, 579, |
11823
|
|
|
|
|
|
|
597, 613, 629, 648, 666, 682, 698, 714, |
11824
|
|
|
|
|
|
|
725, 726, 741, 752, 756, 773, 801, 812, |
11825
|
|
|
|
|
|
|
823, 834, 848, 861, 879, 893, 908, 926, |
11826
|
|
|
|
|
|
|
944, 962, 983 |
11827
|
|
|
|
|
|
|
}; |
11828
|
|
|
|
|
|
|
|
11829
|
|
|
|
|
|
|
static const short _ragel_url_email_trans_keys[] = { |
11830
|
|
|
|
|
|
|
33u, 48u, 49u, 50u, 95u, 36u, 37u, 39u, |
11831
|
|
|
|
|
|
|
46u, 51u, 57u, 65u, 90u, 97u, 122u, 33u, |
11832
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 46u, 48u, |
11833
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 95u, 36u, |
11834
|
|
|
|
|
|
|
37u, 39u, 46u, 48u, 57u, 65u, 90u, 97u, |
11835
|
|
|
|
|
|
|
122u, 33u, 64u, 95u, 36u, 37u, 39u, 46u, |
11836
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 48u, 49u, |
11837
|
|
|
|
|
|
|
50u, 51u, 57u, 65u, 90u, 97u, 122u, 45u, |
11838
|
|
|
|
|
|
|
46u, 48u, 57u, 65u, 90u, 97u, 122u, 45u, |
11839
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 45u, 46u, |
11840
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 48u, 57u, |
11841
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 45u, 46u, 48u, 57u, |
11842
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 33u, 39u, 41u, 61u, |
11843
|
|
|
|
|
|
|
95u, 36u, 47u, 48u, 57u, 58u, 59u, 63u, |
11844
|
|
|
|
|
|
|
64u, 65u, 90u, 97u, 122u, 33u, 39u, 40u, |
11845
|
|
|
|
|
|
|
44u, 46u, 61u, 63u, 95u, 129u, 131u, 135u, |
11846
|
|
|
|
|
|
|
151u, 809u, 1065u, 36u, 38u, 42u, 57u, 58u, |
11847
|
|
|
|
|
|
|
59u, 64u, 90u, 97u, 122u, 142u, 143u, 155u, |
11848
|
|
|
|
|
|
|
159u, 48u, 49u, 50u, 51u, 57u, 65u, 90u, |
11849
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 48u, 57u, 65u, 90u, |
11850
|
|
|
|
|
|
|
97u, 122u, 48u, 49u, 50u, 51u, 57u, 65u, |
11851
|
|
|
|
|
|
|
90u, 97u, 122u, 45u, 46u, 48u, 57u, 65u, |
11852
|
|
|
|
|
|
|
90u, 97u, 122u, 48u, 49u, 50u, 51u, 57u, |
11853
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 45u, 46u, 48u, 57u, |
11854
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 45u, 46u, 48u, 57u, |
11855
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 45u, 46u, 53u, 48u, |
11856
|
|
|
|
|
|
|
52u, 54u, 57u, 65u, 90u, 97u, 122u, 45u, |
11857
|
|
|
|
|
|
|
46u, 48u, 53u, 54u, 57u, 65u, 90u, 97u, |
11858
|
|
|
|
|
|
|
122u, 45u, 46u, 48u, 57u, 65u, 90u, 97u, |
11859
|
|
|
|
|
|
|
122u, 45u, 46u, 48u, 57u, 65u, 90u, 97u, |
11860
|
|
|
|
|
|
|
122u, 45u, 46u, 53u, 48u, 52u, 54u, 57u, |
11861
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 45u, 46u, 48u, 53u, |
11862
|
|
|
|
|
|
|
54u, 57u, 65u, 90u, 97u, 122u, 45u, 46u, |
11863
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 45u, 46u, |
11864
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 45u, 46u, |
11865
|
|
|
|
|
|
|
53u, 48u, 52u, 54u, 57u, 65u, 90u, 97u, |
11866
|
|
|
|
|
|
|
122u, 45u, 46u, 48u, 53u, 54u, 57u, 65u, |
11867
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
11868
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 57u, 65u, |
11869
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 58u, 64u, 95u, |
11870
|
|
|
|
|
|
|
36u, 37u, 39u, 46u, 48u, 57u, 65u, 90u, |
11871
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
11872
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
11873
|
|
|
|
|
|
|
97u, 122u, 33u, 58u, 64u, 95u, 36u, 37u, |
11874
|
|
|
|
|
|
|
39u, 46u, 48u, 57u, 65u, 90u, 97u, 122u, |
11875
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
11876
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
11877
|
|
|
|
|
|
|
33u, 48u, 49u, 50u, 58u, 64u, 95u, 36u, |
11878
|
|
|
|
|
|
|
37u, 39u, 46u, 51u, 57u, 65u, 90u, 97u, |
11879
|
|
|
|
|
|
|
122u, 33u, 45u, 46u, 58u, 64u, 95u, 36u, |
11880
|
|
|
|
|
|
|
37u, 39u, 44u, 48u, 57u, 65u, 90u, 97u, |
11881
|
|
|
|
|
|
|
122u, 33u, 48u, 49u, 50u, 58u, 64u, 95u, |
11882
|
|
|
|
|
|
|
36u, 37u, 39u, 46u, 51u, 57u, 65u, 90u, |
11883
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
11884
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
11885
|
|
|
|
|
|
|
97u, 122u, 33u, 48u, 49u, 50u, 58u, 64u, |
11886
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 46u, 51u, 57u, 65u, |
11887
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
11888
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 57u, 65u, |
11889
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
11890
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 57u, 65u, |
11891
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 53u, 58u, |
11892
|
|
|
|
|
|
|
64u, 95u, 36u, 37u, 39u, 44u, 48u, 52u, |
11893
|
|
|
|
|
|
|
54u, 57u, 65u, 90u, 97u, 122u, 33u, 45u, |
11894
|
|
|
|
|
|
|
46u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
11895
|
|
|
|
|
|
|
48u, 53u, 54u, 57u, 65u, 90u, 97u, 122u, |
11896
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
11897
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
11898
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
11899
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
11900
|
|
|
|
|
|
|
33u, 45u, 46u, 53u, 58u, 64u, 95u, 36u, |
11901
|
|
|
|
|
|
|
37u, 39u, 44u, 48u, 52u, 54u, 57u, 65u, |
11902
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
11903
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 53u, 54u, |
11904
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
11905
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 44u, 48u, |
11906
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
11907
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 44u, 48u, |
11908
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
11909
|
|
|
|
|
|
|
53u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
11910
|
|
|
|
|
|
|
48u, 52u, 54u, 57u, 65u, 90u, 97u, 122u, |
11911
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
11912
|
|
|
|
|
|
|
39u, 44u, 48u, 53u, 54u, 57u, 65u, 90u, |
11913
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
11914
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
11915
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
11916
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
11917
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
11918
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
11919
|
|
|
|
|
|
|
97u, 122u, 33u, 47u, 95u, 36u, 37u, 39u, |
11920
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 47u, 33u, 48u, |
11921
|
|
|
|
|
|
|
49u, 50u, 95u, 36u, 37u, 39u, 46u, 51u, |
11922
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 58u, |
11923
|
|
|
|
|
|
|
303u, 559u, 48u, 57u, 65u, 90u, 97u, 122u, |
11924
|
|
|
|
|
|
|
303u, 559u, 48u, 57u, 33u, 39u, 41u, 61u, |
11925
|
|
|
|
|
|
|
95u, 36u, 47u, 48u, 57u, 58u, 59u, 63u, |
11926
|
|
|
|
|
|
|
64u, 65u, 90u, 97u, 122u, 33u, 39u, 40u, |
11927
|
|
|
|
|
|
|
44u, 46u, 61u, 63u, 95u, 129u, 131u, 135u, |
11928
|
|
|
|
|
|
|
151u, 809u, 1065u, 36u, 38u, 42u, 57u, 58u, |
11929
|
|
|
|
|
|
|
59u, 64u, 90u, 97u, 122u, 142u, 143u, 155u, |
11930
|
|
|
|
|
|
|
159u, 45u, 46u, 58u, 303u, 559u, 48u, 57u, |
11931
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 45u, 46u, 58u, 303u, |
11932
|
|
|
|
|
|
|
559u, 48u, 57u, 65u, 90u, 97u, 122u, 45u, |
11933
|
|
|
|
|
|
|
46u, 58u, 303u, 559u, 48u, 57u, 65u, 90u, |
11934
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 53u, 58u, 303u, 559u, |
11935
|
|
|
|
|
|
|
48u, 52u, 54u, 57u, 65u, 90u, 97u, 122u, |
11936
|
|
|
|
|
|
|
45u, 46u, 58u, 303u, 559u, 48u, 53u, 54u, |
11937
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
11938
|
|
|
|
|
|
|
58u, 64u, 95u, 303u, 559u, 36u, 37u, 39u, |
11939
|
|
|
|
|
|
|
44u, 48u, 57u, 65u, 90u, 97u, 122u, 33u, |
11940
|
|
|
|
|
|
|
95u, 303u, 559u, 36u, 37u, 39u, 46u, 48u, |
11941
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 64u, 95u, |
11942
|
|
|
|
|
|
|
303u, 559u, 36u, 37u, 39u, 46u, 48u, 57u, |
11943
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 33u, 45u, 46u, 58u, |
11944
|
|
|
|
|
|
|
64u, 95u, 303u, 559u, 36u, 37u, 39u, 44u, |
11945
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 33u, 45u, |
11946
|
|
|
|
|
|
|
46u, 58u, 64u, 95u, 303u, 559u, 36u, 37u, |
11947
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
11948
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 303u, 559u, |
11949
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
11950
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 53u, 58u, 64u, |
11951
|
|
|
|
|
|
|
95u, 303u, 559u, 36u, 37u, 39u, 44u, 48u, |
11952
|
|
|
|
|
|
|
52u, 54u, 57u, 65u, 90u, 97u, 122u, 33u, |
11953
|
|
|
|
|
|
|
45u, 46u, 58u, 64u, 95u, 303u, 559u, 36u, |
11954
|
|
|
|
|
|
|
37u, 39u, 44u, 48u, 53u, 54u, 57u, 65u, |
11955
|
|
|
|
|
|
|
90u, 97u, 122u, 0 |
11956
|
|
|
|
|
|
|
}; |
11957
|
|
|
|
|
|
|
|
11958
|
|
|
|
|
|
|
static const char _ragel_url_email_single_lengths[] = { |
11959
|
|
|
|
|
|
|
0, 5, 4, 2, 3, 3, 2, 1, |
11960
|
|
|
|
|
|
|
2, 0, 2, 5, 14, 3, 2, 3, |
11961
|
|
|
|
|
|
|
2, 3, 2, 2, 3, 2, 2, 2, |
11962
|
|
|
|
|
|
|
3, 2, 2, 2, 3, 2, 6, 5, |
11963
|
|
|
|
|
|
|
6, 4, 6, 7, 6, 7, 6, 7, |
11964
|
|
|
|
|
|
|
6, 6, 7, 6, 6, 6, 7, 6, |
11965
|
|
|
|
|
|
|
6, 6, 7, 6, 6, 6, 6, 3, |
11966
|
|
|
|
|
|
|
1, 5, 5, 2, 5, 14, 5, 5, |
11967
|
|
|
|
|
|
|
5, 6, 5, 8, 4, 5, 8, 8, |
11968
|
|
|
|
|
|
|
8, 9, 8 |
11969
|
|
|
|
|
|
|
}; |
11970
|
|
|
|
|
|
|
|
11971
|
|
|
|
|
|
|
static const char _ragel_url_email_range_lengths[] = { |
11972
|
|
|
|
|
|
|
0, 5, 5, 5, 5, 3, 3, 3, |
11973
|
|
|
|
|
|
|
3, 3, 3, 6, 7, 3, 3, 3, |
11974
|
|
|
|
|
|
|
3, 3, 3, 3, 4, 4, 3, 3, |
11975
|
|
|
|
|
|
|
4, 4, 3, 3, 4, 4, 5, 5, |
11976
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 5, 5, |
11977
|
|
|
|
|
|
|
5, 5, 6, 6, 5, 5, 6, 6, |
11978
|
|
|
|
|
|
|
5, 5, 6, 6, 5, 5, 5, 4, |
11979
|
|
|
|
|
|
|
0, 5, 3, 1, 6, 7, 3, 3, |
11980
|
|
|
|
|
|
|
3, 4, 4, 5, 5, 5, 5, 5, |
11981
|
|
|
|
|
|
|
5, 6, 6 |
11982
|
|
|
|
|
|
|
}; |
11983
|
|
|
|
|
|
|
|
11984
|
|
|
|
|
|
|
static const short _ragel_url_email_index_offsets[] = { |
11985
|
|
|
|
|
|
|
0, 0, 11, 21, 29, 38, 45, 51, |
11986
|
|
|
|
|
|
|
56, 62, 66, 72, 84, 106, 113, 119, |
11987
|
|
|
|
|
|
|
126, 132, 139, 145, 151, 159, 166, 172, |
11988
|
|
|
|
|
|
|
178, 186, 193, 199, 205, 213, 220, 232, |
11989
|
|
|
|
|
|
|
243, 255, 265, 277, 290, 302, 315, 327, |
11990
|
|
|
|
|
|
|
340, 352, 364, 378, 391, 403, 415, 429, |
11991
|
|
|
|
|
|
|
442, 454, 466, 480, 493, 505, 517, 529, |
11992
|
|
|
|
|
|
|
537, 539, 550, 559, 563, 575, 597, 606, |
11993
|
|
|
|
|
|
|
615, 624, 635, 645, 659, 669, 680, 694, |
11994
|
|
|
|
|
|
|
708, 722, 738 |
11995
|
|
|
|
|
|
|
}; |
11996
|
|
|
|
|
|
|
|
11997
|
|
|
|
|
|
|
static const char _ragel_url_email_indicies[] = { |
11998
|
|
|
|
|
|
|
0, 2, 3, 4, 0, 0, 0, 5, |
11999
|
|
|
|
|
|
|
6, 6, 1, 0, 7, 8, 0, 0, |
12000
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 9, 9, 9, |
12001
|
|
|
|
|
|
|
9, 9, 9, 9, 1, 9, 8, 9, |
12002
|
|
|
|
|
|
|
9, 9, 9, 9, 9, 1, 10, 11, |
12003
|
|
|
|
|
|
|
12, 13, 14, 14, 1, 15, 16, 14, |
12004
|
|
|
|
|
|
|
14, 14, 1, 15, 14, 14, 14, 1, |
12005
|
|
|
|
|
|
|
15, 17, 14, 14, 14, 1, 14, 18, |
12006
|
|
|
|
|
|
|
18, 1, 15, 17, 14, 19, 19, 1, |
12007
|
|
|
|
|
|
|
20, 21, 21, 20, 20, 20, 21, 20, |
12008
|
|
|
|
|
|
|
20, 21, 21, 1, 22, 22, 24, 22, |
12009
|
|
|
|
|
|
|
22, 23, 22, 23, 23, 23, 23, 23, |
12010
|
|
|
|
|
|
|
25, 26, 23, 23, 22, 23, 23, 23, |
12011
|
|
|
|
|
|
|
23, 1, 27, 28, 29, 30, 18, 18, |
12012
|
|
|
|
|
|
|
1, 15, 31, 14, 14, 14, 1, 32, |
12013
|
|
|
|
|
|
|
33, 34, 35, 18, 18, 1, 15, 36, |
12014
|
|
|
|
|
|
|
14, 14, 14, 1, 37, 38, 39, 40, |
12015
|
|
|
|
|
|
|
18, 18, 1, 15, 36, 35, 14, 14, |
12016
|
|
|
|
|
|
|
1, 15, 36, 32, 14, 14, 1, 15, |
12017
|
|
|
|
|
|
|
36, 41, 35, 32, 14, 14, 1, 15, |
12018
|
|
|
|
|
|
|
36, 32, 14, 14, 14, 1, 15, 31, |
12019
|
|
|
|
|
|
|
30, 14, 14, 1, 15, 31, 27, 14, |
12020
|
|
|
|
|
|
|
14, 1, 15, 31, 42, 30, 27, 14, |
12021
|
|
|
|
|
|
|
14, 1, 15, 31, 27, 14, 14, 14, |
12022
|
|
|
|
|
|
|
1, 15, 16, 13, 14, 14, 1, 15, |
12023
|
|
|
|
|
|
|
16, 10, 14, 14, 1, 15, 16, 43, |
12024
|
|
|
|
|
|
|
13, 10, 14, 14, 1, 15, 16, 10, |
12025
|
|
|
|
|
|
|
14, 14, 14, 1, 0, 44, 45, 7, |
12026
|
|
|
|
|
|
|
8, 0, 0, 0, 46, 46, 46, 1, |
12027
|
|
|
|
|
|
|
0, 44, 7, 8, 0, 0, 0, 46, |
12028
|
|
|
|
|
|
|
46, 46, 1, 0, 44, 47, 7, 8, |
12029
|
|
|
|
|
|
|
0, 0, 0, 46, 46, 46, 1, 0, |
12030
|
|
|
|
|
|
|
7, 8, 0, 0, 0, 46, 48, 48, |
12031
|
|
|
|
|
|
|
1, 0, 44, 47, 7, 8, 0, 0, |
12032
|
|
|
|
|
|
|
0, 46, 49, 49, 1, 0, 50, 51, |
12033
|
|
|
|
|
|
|
52, 7, 8, 0, 0, 0, 53, 48, |
12034
|
|
|
|
|
|
|
48, 1, 0, 44, 54, 7, 8, 0, |
12035
|
|
|
|
|
|
|
0, 0, 46, 46, 46, 1, 0, 55, |
12036
|
|
|
|
|
|
|
56, 57, 7, 8, 0, 0, 0, 58, |
12037
|
|
|
|
|
|
|
48, 48, 1, 0, 44, 59, 7, 8, |
12038
|
|
|
|
|
|
|
0, 0, 0, 46, 46, 46, 1, 0, |
12039
|
|
|
|
|
|
|
60, 61, 62, 7, 8, 0, 0, 0, |
12040
|
|
|
|
|
|
|
63, 48, 48, 1, 0, 44, 59, 7, |
12041
|
|
|
|
|
|
|
8, 0, 0, 0, 58, 46, 46, 1, |
12042
|
|
|
|
|
|
|
0, 44, 59, 7, 8, 0, 0, 0, |
12043
|
|
|
|
|
|
|
55, 46, 46, 1, 0, 44, 59, 64, |
12044
|
|
|
|
|
|
|
7, 8, 0, 0, 0, 58, 55, 46, |
12045
|
|
|
|
|
|
|
46, 1, 0, 44, 59, 7, 8, 0, |
12046
|
|
|
|
|
|
|
0, 0, 55, 46, 46, 46, 1, 0, |
12047
|
|
|
|
|
|
|
44, 54, 7, 8, 0, 0, 0, 53, |
12048
|
|
|
|
|
|
|
46, 46, 1, 0, 44, 54, 7, 8, |
12049
|
|
|
|
|
|
|
0, 0, 0, 50, 46, 46, 1, 0, |
12050
|
|
|
|
|
|
|
44, 54, 65, 7, 8, 0, 0, 0, |
12051
|
|
|
|
|
|
|
53, 50, 46, 46, 1, 0, 44, 54, |
12052
|
|
|
|
|
|
|
7, 8, 0, 0, 0, 50, 46, 46, |
12053
|
|
|
|
|
|
|
46, 1, 0, 44, 45, 7, 8, 0, |
12054
|
|
|
|
|
|
|
0, 0, 5, 46, 46, 1, 0, 44, |
12055
|
|
|
|
|
|
|
45, 7, 8, 0, 0, 0, 2, 46, |
12056
|
|
|
|
|
|
|
46, 1, 0, 44, 45, 66, 7, 8, |
12057
|
|
|
|
|
|
|
0, 0, 0, 5, 2, 46, 46, 1, |
12058
|
|
|
|
|
|
|
0, 44, 45, 7, 8, 0, 0, 0, |
12059
|
|
|
|
|
|
|
2, 46, 46, 46, 1, 0, 44, 47, |
12060
|
|
|
|
|
|
|
7, 8, 0, 0, 0, 46, 67, 67, |
12061
|
|
|
|
|
|
|
1, 0, 44, 47, 7, 8, 0, 0, |
12062
|
|
|
|
|
|
|
0, 46, 68, 68, 1, 0, 44, 47, |
12063
|
|
|
|
|
|
|
69, 8, 0, 0, 0, 46, 68, 68, |
12064
|
|
|
|
|
|
|
1, 9, 70, 9, 9, 9, 9, 9, |
12065
|
|
|
|
|
|
|
1, 71, 1, 0, 2, 3, 4, 0, |
12066
|
|
|
|
|
|
|
0, 0, 5, 46, 46, 1, 15, 17, |
12067
|
|
|
|
|
|
|
72, 21, 23, 14, 19, 19, 1, 21, |
12068
|
|
|
|
|
|
|
23, 72, 1, 20, 21, 21, 20, 20, |
12069
|
|
|
|
|
|
|
20, 21, 20, 20, 21, 21, 1, 22, |
12070
|
|
|
|
|
|
|
22, 24, 22, 22, 23, 22, 23, 23, |
12071
|
|
|
|
|
|
|
23, 23, 23, 25, 26, 23, 23, 22, |
12072
|
|
|
|
|
|
|
23, 23, 23, 23, 1, 15, 17, 72, |
12073
|
|
|
|
|
|
|
21, 23, 14, 14, 14, 1, 15, 17, |
12074
|
|
|
|
|
|
|
72, 21, 23, 40, 14, 14, 1, 15, |
12075
|
|
|
|
|
|
|
17, 72, 21, 23, 37, 14, 14, 1, |
12076
|
|
|
|
|
|
|
15, 17, 73, 72, 21, 23, 40, 37, |
12077
|
|
|
|
|
|
|
14, 14, 1, 15, 17, 72, 21, 23, |
12078
|
|
|
|
|
|
|
37, 14, 14, 14, 1, 0, 44, 47, |
12079
|
|
|
|
|
|
|
74, 8, 0, 21, 23, 0, 0, 46, |
12080
|
|
|
|
|
|
|
49, 49, 1, 9, 9, 21, 23, 9, |
12081
|
|
|
|
|
|
|
9, 75, 9, 9, 1, 9, 8, 9, |
12082
|
|
|
|
|
|
|
21, 23, 9, 9, 75, 9, 9, 1, |
12083
|
|
|
|
|
|
|
0, 44, 47, 74, 8, 0, 21, 23, |
12084
|
|
|
|
|
|
|
0, 0, 46, 46, 46, 1, 0, 44, |
12085
|
|
|
|
|
|
|
47, 74, 8, 0, 21, 23, 0, 0, |
12086
|
|
|
|
|
|
|
63, 46, 46, 1, 0, 44, 47, 74, |
12087
|
|
|
|
|
|
|
8, 0, 21, 23, 0, 0, 60, 46, |
12088
|
|
|
|
|
|
|
46, 1, 0, 44, 47, 76, 74, 8, |
12089
|
|
|
|
|
|
|
0, 21, 23, 0, 0, 63, 60, 46, |
12090
|
|
|
|
|
|
|
46, 1, 0, 44, 47, 74, 8, 0, |
12091
|
|
|
|
|
|
|
21, 23, 0, 0, 60, 46, 46, 46, |
12092
|
|
|
|
|
|
|
1, 0 |
12093
|
|
|
|
|
|
|
}; |
12094
|
|
|
|
|
|
|
|
12095
|
|
|
|
|
|
|
static const char _ragel_url_email_trans_targs[] = { |
12096
|
|
|
|
|
|
|
2, 0, 30, 48, 50, 49, 52, 3, |
12097
|
|
|
|
|
|
|
5, 4, 6, 26, 28, 27, 8, 7, |
12098
|
|
|
|
|
|
|
13, 9, 10, 58, 11, 60, 12, 61, |
12099
|
|
|
|
|
|
|
61, 12, 61, 14, 22, 24, 23, 15, |
12100
|
|
|
|
|
|
|
16, 18, 20, 19, 17, 62, 63, 65, |
12101
|
|
|
|
|
|
|
64, 21, 25, 29, 31, 35, 32, 33, |
12102
|
|
|
|
|
|
|
34, 67, 36, 44, 46, 45, 37, 38, |
12103
|
|
|
|
|
|
|
40, 42, 41, 39, 70, 71, 73, 72, |
12104
|
|
|
|
|
|
|
43, 47, 51, 53, 54, 55, 56, 57, |
12105
|
|
|
|
|
|
|
59, 66, 68, 69, 74 |
12106
|
|
|
|
|
|
|
}; |
12107
|
|
|
|
|
|
|
|
12108
|
|
|
|
|
|
|
static const char _ragel_url_email_trans_actions[] = { |
12109
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
12110
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
12111
|
|
|
|
|
|
|
0, 0, 0, 1, 0, 1, 0, 1, |
12112
|
|
|
|
|
|
|
2, 3, 4, 0, 0, 0, 0, 0, |
12113
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 1, 1, 1, |
12114
|
|
|
|
|
|
|
1, 0, 0, 0, 0, 0, 0, 0, |
12115
|
|
|
|
|
|
|
0, 1, 0, 0, 0, 0, 0, 0, |
12116
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 1, 1, 1, |
12117
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
12118
|
|
|
|
|
|
|
1, 1, 1, 1, 1 |
12119
|
|
|
|
|
|
|
}; |
12120
|
|
|
|
|
|
|
|
12121
|
|
|
|
|
|
|
static const int ragel_url_email_start = 1; |
12122
|
|
|
|
|
|
|
|
12123
|
3
|
|
|
|
|
|
vector ragel_tokenizer::ragel_map; |
12124
|
|
|
|
|
|
|
atomic_flag ragel_tokenizer::ragel_map_flag = ATOMIC_FLAG_INIT; |
12125
|
|
|
|
|
|
|
|
12126
|
6
|
|
|
|
|
|
ragel_tokenizer::ragel_tokenizer(unsigned url_email_tokenizer) : unicode_tokenizer(url_email_tokenizer) { |
12127
|
3
|
50
|
|
|
|
|
initialize_ragel_map(); |
12128
|
3
|
|
|
|
|
|
} |
12129
|
|
|
|
|
|
|
|
12130
|
14
|
|
|
|
|
|
void ragel_tokenizer::initialize_ragel_map() { |
12131
|
7
|
50
|
|
|
|
|
while (ragel_map_flag.test_and_set()) {} |
12132
|
7
|
100
|
|
|
|
|
if (ragel_map.empty()) { |
12133
|
258
|
100
|
|
|
|
|
for (uint8_t ascii = 0; ascii < 128; ascii++) |
12134
|
256
|
|
|
|
|
|
ragel_map.push_back(ascii); |
12135
|
|
|
|
|
|
|
|
12136
|
2
|
|
|
|
|
|
ragel_map_add(U'\u2026', 160); // horizontal ellipsis (TRIPLE DOT) |
12137
|
2
|
|
|
|
|
|
ragel_map_add(U'\u2019', 161); // right single quotation mark |
12138
|
2
|
|
|
|
|
|
ragel_map_add(U'\u2018', 162); // left single quotation mark |
12139
|
2
|
|
|
|
|
|
ragel_map_add(U'\u2010', 163); // hyphen |
12140
|
|
|
|
|
|
|
} |
12141
|
|
|
|
|
|
|
ragel_map_flag.clear(); |
12142
|
7
|
|
|
|
|
|
} |
12143
|
|
|
|
|
|
|
|
12144
|
8
|
|
|
|
|
|
void ragel_tokenizer::ragel_map_add(char32_t chr, uint8_t mapping) { |
12145
|
8
|
100
|
|
|
|
|
if (chr >= ragel_map.size()) |
12146
|
2
|
|
|
|
|
|
ragel_map.resize(chr + 1, 128); |
12147
|
8
|
|
|
|
|
|
ragel_map[chr] = mapping; |
12148
|
8
|
|
|
|
|
|
} |
12149
|
|
|
|
|
|
|
|
12150
|
10
|
|
|
|
|
|
bool ragel_tokenizer::ragel_url_email(unsigned version, const vector& chars, size_t& current, vector& tokens) { |
12151
|
|
|
|
|
|
|
int cs; |
12152
|
|
|
|
|
|
|
|
12153
|
10
|
|
|
|
|
|
size_t start = current, end = current, parens = 0; |
12154
|
|
|
|
|
|
|
|
12155
|
|
|
|
|
|
|
{ |
12156
|
|
|
|
|
|
|
cs = ragel_url_email_start; |
12157
|
|
|
|
|
|
|
} |
12158
|
|
|
|
|
|
|
|
12159
|
|
|
|
|
|
|
{ |
12160
|
|
|
|
|
|
|
int _klen; |
12161
|
|
|
|
|
|
|
const short *_keys; |
12162
|
|
|
|
|
|
|
int _trans; |
12163
|
|
|
|
|
|
|
short _widec; |
12164
|
|
|
|
|
|
|
|
12165
|
10
|
50
|
|
|
|
|
if ( ( current) == ( (chars.size() - 1)) ) |
12166
|
|
|
|
|
|
|
goto _test_eof; |
12167
|
|
|
|
|
|
|
if ( cs == 0 ) |
12168
|
|
|
|
|
|
|
goto _out; |
12169
|
|
|
|
|
|
|
_resume: |
12170
|
44
|
|
|
|
|
|
_widec = ( ragel_char(chars[current])); |
12171
|
22
|
|
|
|
|
|
_klen = _ragel_url_email_cond_lengths[cs]; |
12172
|
22
|
|
|
|
|
|
_keys = _ragel_url_email_cond_keys + (_ragel_url_email_cond_offsets[cs]*2); |
12173
|
22
|
50
|
|
|
|
|
if ( _klen > 0 ) { |
12174
|
|
|
|
|
|
|
const short *_lower = _keys; |
12175
|
|
|
|
|
|
|
const short *_mid; |
12176
|
0
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
12177
|
|
|
|
|
|
|
while (1) { |
12178
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
12179
|
|
|
|
|
|
|
break; |
12180
|
|
|
|
|
|
|
|
12181
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
12182
|
0
|
0
|
|
|
|
|
if ( _widec < _mid[0] ) |
12183
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
12184
|
0
|
0
|
|
|
|
|
else if ( _widec > _mid[1] ) |
12185
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
12186
|
|
|
|
|
|
|
else { |
12187
|
0
|
|
|
|
|
|
switch ( _ragel_url_email_cond_spaces[_ragel_url_email_cond_offsets[cs] + ((_mid - _keys)>>1)] ) { |
12188
|
|
|
|
|
|
|
case 0: { |
12189
|
0
|
|
|
|
|
|
_widec = (short)(256u + (( ragel_char(chars[current])) - 0u)); |
12190
|
0
|
0
|
|
|
|
|
if ( |
12191
|
0
|
|
|
|
|
|
version >= 2 ) _widec += 256; |
12192
|
|
|
|
|
|
|
break; |
12193
|
|
|
|
|
|
|
} |
12194
|
|
|
|
|
|
|
case 1: { |
12195
|
0
|
|
|
|
|
|
_widec = (short)(768u + (( ragel_char(chars[current])) - 0u)); |
12196
|
0
|
0
|
|
|
|
|
if ( |
12197
|
0
|
|
|
|
|
|
parens ) _widec += 256; |
12198
|
|
|
|
|
|
|
break; |
12199
|
|
|
|
|
|
|
} |
12200
|
|
|
|
|
|
|
} |
12201
|
|
|
|
|
|
|
break; |
12202
|
|
|
|
|
|
|
} |
12203
|
|
|
|
|
|
|
} |
12204
|
|
|
|
|
|
|
} |
12205
|
|
|
|
|
|
|
|
12206
|
22
|
|
|
|
|
|
_keys = _ragel_url_email_trans_keys + _ragel_url_email_key_offsets[cs]; |
12207
|
22
|
|
|
|
|
|
_trans = _ragel_url_email_index_offsets[cs]; |
12208
|
|
|
|
|
|
|
|
12209
|
22
|
|
|
|
|
|
_klen = _ragel_url_email_single_lengths[cs]; |
12210
|
22
|
50
|
|
|
|
|
if ( _klen > 0 ) { |
12211
|
|
|
|
|
|
|
const short *_lower = _keys; |
12212
|
|
|
|
|
|
|
const short *_mid; |
12213
|
85
|
|
|
|
|
|
const short *_upper = _keys + _klen - 1; |
12214
|
|
|
|
|
|
|
while (1) { |
12215
|
85
|
100
|
|
|
|
|
if ( _upper < _lower ) |
12216
|
|
|
|
|
|
|
break; |
12217
|
|
|
|
|
|
|
|
12218
|
63
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
12219
|
63
|
100
|
|
|
|
|
if ( _widec < *_mid ) |
12220
|
12
|
|
|
|
|
|
_upper = _mid - 1; |
12221
|
51
|
50
|
|
|
|
|
else if ( _widec > *_mid ) |
12222
|
51
|
|
|
|
|
|
_lower = _mid + 1; |
12223
|
|
|
|
|
|
|
else { |
12224
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
12225
|
0
|
|
|
|
|
|
goto _match; |
12226
|
|
|
|
|
|
|
} |
12227
|
|
|
|
|
|
|
} |
12228
|
22
|
|
|
|
|
|
_keys += _klen; |
12229
|
22
|
|
|
|
|
|
_trans += _klen; |
12230
|
|
|
|
|
|
|
} |
12231
|
|
|
|
|
|
|
|
12232
|
22
|
|
|
|
|
|
_klen = _ragel_url_email_range_lengths[cs]; |
12233
|
22
|
50
|
|
|
|
|
if ( _klen > 0 ) { |
12234
|
|
|
|
|
|
|
const short *_lower = _keys; |
12235
|
|
|
|
|
|
|
const short *_mid; |
12236
|
69
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
12237
|
|
|
|
|
|
|
while (1) { |
12238
|
69
|
100
|
|
|
|
|
if ( _upper < _lower ) |
12239
|
|
|
|
|
|
|
break; |
12240
|
|
|
|
|
|
|
|
12241
|
61
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
12242
|
61
|
100
|
|
|
|
|
if ( _widec < _mid[0] ) |
12243
|
8
|
|
|
|
|
|
_upper = _mid - 2; |
12244
|
53
|
100
|
|
|
|
|
else if ( _widec > _mid[1] ) |
12245
|
39
|
|
|
|
|
|
_lower = _mid + 2; |
12246
|
|
|
|
|
|
|
else { |
12247
|
14
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
12248
|
14
|
|
|
|
|
|
goto _match; |
12249
|
|
|
|
|
|
|
} |
12250
|
|
|
|
|
|
|
} |
12251
|
8
|
|
|
|
|
|
_trans += _klen; |
12252
|
|
|
|
|
|
|
} |
12253
|
|
|
|
|
|
|
|
12254
|
|
|
|
|
|
|
_match: |
12255
|
22
|
|
|
|
|
|
_trans = _ragel_url_email_indicies[_trans]; |
12256
|
22
|
|
|
|
|
|
cs = _ragel_url_email_trans_targs[_trans]; |
12257
|
|
|
|
|
|
|
|
12258
|
22
|
50
|
|
|
|
|
if ( _ragel_url_email_trans_actions[_trans] == 0 ) |
12259
|
|
|
|
|
|
|
goto _again; |
12260
|
|
|
|
|
|
|
|
12261
|
0
|
|
|
|
|
|
switch ( _ragel_url_email_trans_actions[_trans] ) { |
12262
|
|
|
|
|
|
|
case 3: |
12263
|
0
|
|
|
|
|
|
{parens-=!!parens;} |
12264
|
0
|
|
|
|
|
|
break; |
12265
|
|
|
|
|
|
|
case 1: |
12266
|
0
|
|
|
|
|
|
{ end = current + 1; } |
12267
|
0
|
|
|
|
|
|
break; |
12268
|
|
|
|
|
|
|
case 2: |
12269
|
0
|
|
|
|
|
|
{parens++;} |
12270
|
0
|
|
|
|
|
|
{ end = current + 1; } |
12271
|
0
|
|
|
|
|
|
break; |
12272
|
|
|
|
|
|
|
case 4: |
12273
|
0
|
|
|
|
|
|
{parens-=!!parens;} |
12274
|
0
|
|
|
|
|
|
{ end = current + 1; } |
12275
|
0
|
|
|
|
|
|
break; |
12276
|
|
|
|
|
|
|
} |
12277
|
|
|
|
|
|
|
|
12278
|
|
|
|
|
|
|
_again: |
12279
|
22
|
100
|
|
|
|
|
if ( cs == 0 ) |
12280
|
|
|
|
|
|
|
goto _out; |
12281
|
14
|
100
|
|
|
|
|
if ( ++( current) != ( (chars.size() - 1)) ) |
12282
|
|
|
|
|
|
|
goto _resume; |
12283
|
|
|
|
|
|
|
_test_eof: {} |
12284
|
|
|
|
|
|
|
_out: {} |
12285
|
|
|
|
|
|
|
} |
12286
|
|
|
|
|
|
|
|
12287
|
10
|
50
|
|
|
|
|
if (end > start) { |
12288
|
0
|
|
|
|
|
|
tokens.emplace_back(start, end - start); |
12289
|
0
|
|
|
|
|
|
current = end; |
12290
|
0
|
|
|
|
|
|
return true; |
12291
|
|
|
|
|
|
|
} else { |
12292
|
10
|
|
|
|
|
|
current = start; |
12293
|
10
|
|
|
|
|
|
return false; |
12294
|
|
|
|
|
|
|
} |
12295
|
|
|
|
|
|
|
} |
12296
|
|
|
|
|
|
|
|
12297
|
|
|
|
|
|
|
} // namespace morphodita |
12298
|
|
|
|
|
|
|
|
12299
|
|
|
|
|
|
|
///////// |
12300
|
|
|
|
|
|
|
// File: morphodita/tokenizer/vertical_tokenizer.h |
12301
|
|
|
|
|
|
|
///////// |
12302
|
|
|
|
|
|
|
|
12303
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
12304
|
|
|
|
|
|
|
// |
12305
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
12306
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
12307
|
|
|
|
|
|
|
// |
12308
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
12309
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
12310
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
12311
|
|
|
|
|
|
|
|
12312
|
|
|
|
|
|
|
namespace morphodita { |
12313
|
|
|
|
|
|
|
|
12314
|
2
|
|
|
|
|
|
class vertical_tokenizer : public unicode_tokenizer { |
12315
|
|
|
|
|
|
|
public: |
12316
|
1
|
50
|
|
|
|
|
vertical_tokenizer() : unicode_tokenizer(0) {} |
12317
|
|
|
|
|
|
|
|
12318
|
|
|
|
|
|
|
virtual bool next_sentence(vector& tokens) override; |
12319
|
|
|
|
|
|
|
}; |
12320
|
|
|
|
|
|
|
|
12321
|
|
|
|
|
|
|
} // namespace morphodita |
12322
|
|
|
|
|
|
|
|
12323
|
|
|
|
|
|
|
///////// |
12324
|
|
|
|
|
|
|
// File: morphodita/tokenizer/tokenizer.cpp |
12325
|
|
|
|
|
|
|
///////// |
12326
|
|
|
|
|
|
|
|
12327
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
12328
|
|
|
|
|
|
|
// |
12329
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
12330
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
12331
|
|
|
|
|
|
|
// |
12332
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
12333
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
12334
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
12335
|
|
|
|
|
|
|
|
12336
|
|
|
|
|
|
|
namespace morphodita { |
12337
|
|
|
|
|
|
|
|
12338
|
1
|
|
|
|
|
|
tokenizer* tokenizer::new_vertical_tokenizer() { |
12339
|
2
|
|
|
|
|
|
return new vertical_tokenizer(); |
12340
|
|
|
|
|
|
|
} |
12341
|
|
|
|
|
|
|
|
12342
|
0
|
|
|
|
|
|
tokenizer* tokenizer::new_czech_tokenizer() { |
12343
|
0
|
|
|
|
|
|
return new czech_tokenizer(czech_tokenizer::CZECH, czech_tokenizer::LATEST); |
12344
|
|
|
|
|
|
|
} |
12345
|
|
|
|
|
|
|
|
12346
|
0
|
|
|
|
|
|
tokenizer* tokenizer::new_english_tokenizer() { |
12347
|
0
|
|
|
|
|
|
return new english_tokenizer(english_tokenizer::LATEST); |
12348
|
|
|
|
|
|
|
} |
12349
|
|
|
|
|
|
|
|
12350
|
3
|
|
|
|
|
|
tokenizer* tokenizer::new_generic_tokenizer() { |
12351
|
6
|
|
|
|
|
|
return new generic_tokenizer(generic_tokenizer::LATEST); |
12352
|
|
|
|
|
|
|
} |
12353
|
|
|
|
|
|
|
|
12354
|
|
|
|
|
|
|
} // namespace morphodita |
12355
|
|
|
|
|
|
|
|
12356
|
|
|
|
|
|
|
///////// |
12357
|
|
|
|
|
|
|
// File: morphodita/tokenizer/unicode_tokenizer.cpp |
12358
|
|
|
|
|
|
|
///////// |
12359
|
|
|
|
|
|
|
|
12360
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
12361
|
|
|
|
|
|
|
// |
12362
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
12363
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
12364
|
|
|
|
|
|
|
// |
12365
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
12366
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
12367
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
12368
|
|
|
|
|
|
|
|
12369
|
|
|
|
|
|
|
namespace morphodita { |
12370
|
|
|
|
|
|
|
|
12371
|
4
|
|
|
|
|
|
unicode_tokenizer::unicode_tokenizer(unsigned url_email_tokenizer) : url_email_tokenizer(url_email_tokenizer) { |
12372
|
4
|
50
|
|
|
|
|
ragel_tokenizer::initialize_ragel_map(); |
12373
|
|
|
|
|
|
|
|
12374
|
4
|
50
|
|
|
|
|
set_text(string_piece(nullptr, 0)); |
12375
|
4
|
|
|
|
|
|
} |
12376
|
|
|
|
|
|
|
|
12377
|
7
|
|
|
|
|
|
void unicode_tokenizer::set_text(string_piece text, bool make_copy /*= false*/) { |
12378
|
|
|
|
|
|
|
using namespace unilib; |
12379
|
|
|
|
|
|
|
|
12380
|
7
|
100
|
|
|
|
|
if (make_copy && text.str) { |
|
|
50
|
|
|
|
|
|
12381
|
3
|
|
|
|
|
|
text_buffer.assign(text.str, text.len); |
12382
|
3
|
|
|
|
|
|
text.str = text_buffer.c_str(); |
12383
|
|
|
|
|
|
|
} |
12384
|
7
|
|
|
|
|
|
current = 0; |
12385
|
|
|
|
|
|
|
|
12386
|
|
|
|
|
|
|
chars.clear(); |
12387
|
152
|
100
|
|
|
|
|
for (const char* curr_str = text.str; text.len; curr_str = text.str) |
12388
|
145
|
|
|
|
|
|
chars.emplace_back(utf8::decode(text.str, text.len), curr_str); |
12389
|
7
|
|
|
|
|
|
chars.emplace_back(0, text.str); |
12390
|
7
|
|
|
|
|
|
} |
12391
|
|
|
|
|
|
|
|
12392
|
7
|
|
|
|
|
|
bool unicode_tokenizer::next_sentence(vector* forms, vector* tokens_ptr) { |
12393
|
7
|
50
|
|
|
|
|
vector& tokens = tokens_ptr ? *tokens_ptr : tokens_buffer; |
12394
|
|
|
|
|
|
|
tokens.clear(); |
12395
|
7
|
50
|
|
|
|
|
if (forms) forms->clear(); |
12396
|
7
|
100
|
|
|
|
|
if (current >= chars.size() - 1) return false; |
12397
|
|
|
|
|
|
|
|
12398
|
4
|
|
|
|
|
|
bool result = next_sentence(tokens); |
12399
|
4
|
50
|
|
|
|
|
if (forms) |
12400
|
37
|
100
|
|
|
|
|
for (auto&& token : tokens) |
12401
|
33
|
|
|
|
|
|
forms->emplace_back(chars[token.start].str, chars[token.start + token.length].str - chars[token.start].str); |
12402
|
|
|
|
|
|
|
|
12403
|
|
|
|
|
|
|
return result; |
12404
|
|
|
|
|
|
|
} |
12405
|
|
|
|
|
|
|
|
12406
|
12
|
|
|
|
|
|
bool unicode_tokenizer::tokenize_url_email(vector& tokens) { |
12407
|
12
|
100
|
|
|
|
|
if (current >= chars.size() - 1) return false; |
12408
|
|
|
|
|
|
|
|
12409
|
10
|
50
|
|
|
|
|
return url_email_tokenizer ? ragel_tokenizer::ragel_url_email(url_email_tokenizer, chars, current, tokens) : false; |
12410
|
|
|
|
|
|
|
} |
12411
|
|
|
|
|
|
|
|
12412
|
10
|
|
|
|
|
|
bool unicode_tokenizer::emergency_sentence_split(const vector& tokens) { |
12413
|
|
|
|
|
|
|
using namespace unilib; |
12414
|
|
|
|
|
|
|
|
12415
|
|
|
|
|
|
|
// Implement emergency splitting for large sentences |
12416
|
10
|
50
|
|
|
|
|
return tokens.size() >= 500 || |
12417
|
20
|
50
|
|
|
|
|
(tokens.size() >= 450 && chars[tokens.back().start].cat & unicode::P) || |
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
12418
|
0
|
0
|
|
|
|
|
(tokens.size() >= 400 && chars[tokens.back().start].cat & unicode::Po); |
12419
|
|
|
|
|
|
|
} |
12420
|
|
|
|
|
|
|
|
12421
|
0
|
|
|
|
|
|
bool unicode_tokenizer::is_eos(const vector& tokens, char32_t eos_chr, const unordered_set* abbreviations) { |
12422
|
|
|
|
|
|
|
using namespace unilib; |
12423
|
|
|
|
|
|
|
|
12424
|
0
|
0
|
|
|
|
|
if (eos_chr == '.' && !tokens.empty()) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
12425
|
|
|
|
|
|
|
// Ignore one-letter capitals before dot |
12426
|
0
|
0
|
|
|
|
|
if (tokens.back().length == 1 && chars[tokens.back().start].cat & unicode::Lut) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
12427
|
|
|
|
|
|
|
return false; |
12428
|
|
|
|
|
|
|
|
12429
|
|
|
|
|
|
|
// Ignore specified abbreviations |
12430
|
0
|
0
|
|
|
|
|
if (abbreviations) { |
12431
|
|
|
|
|
|
|
eos_buffer.clear(); |
12432
|
0
|
0
|
|
|
|
|
for (size_t i = 0; i < tokens.back().length; i++) |
12433
|
0
|
|
|
|
|
|
utf8::append(eos_buffer, unicode::lowercase(chars[tokens.back().start + i].chr)); |
12434
|
0
|
0
|
|
|
|
|
if (abbreviations->count(eos_buffer)) |
12435
|
|
|
|
|
|
|
return false; |
12436
|
|
|
|
|
|
|
} |
12437
|
|
|
|
|
|
|
} |
12438
|
|
|
|
|
|
|
return true; |
12439
|
|
|
|
|
|
|
} |
12440
|
|
|
|
|
|
|
|
12441
|
|
|
|
|
|
|
} // namespace morphodita |
12442
|
|
|
|
|
|
|
|
12443
|
|
|
|
|
|
|
///////// |
12444
|
|
|
|
|
|
|
// File: morphodita/tokenizer/vertical_tokenizer.cpp |
12445
|
|
|
|
|
|
|
///////// |
12446
|
|
|
|
|
|
|
|
12447
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
12448
|
|
|
|
|
|
|
// |
12449
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
12450
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
12451
|
|
|
|
|
|
|
// |
12452
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
12453
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
12454
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
12455
|
|
|
|
|
|
|
|
12456
|
|
|
|
|
|
|
namespace morphodita { |
12457
|
|
|
|
|
|
|
|
12458
|
2
|
|
|
|
|
|
bool vertical_tokenizer::next_sentence(vector& tokens) { |
12459
|
2
|
50
|
|
|
|
|
if (current >= chars.size() - 1) return false; |
12460
|
|
|
|
|
|
|
|
12461
|
26
|
|
|
|
|
|
while (true) { |
12462
|
28
|
|
|
|
|
|
size_t line_start = current; |
12463
|
118
|
100
|
|
|
|
|
while (current < chars.size() - 1 && chars[current].chr != '\r' && chars[current].chr != '\n') current++; |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
12464
|
|
|
|
|
|
|
|
12465
|
|
|
|
|
|
|
size_t line_end = current; |
12466
|
28
|
100
|
|
|
|
|
if (current < chars.size() - 1) { |
12467
|
26
|
|
|
|
|
|
current++; |
12468
|
52
|
50
|
|
|
|
|
if (current < chars.size() - 1 && |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
12469
|
26
|
0
|
|
|
|
|
((chars[current-1].chr == '\r' && chars[current].chr == '\n') || |
|
|
50
|
|
|
|
|
|
12470
|
26
|
50
|
|
|
|
|
(chars[current-1].chr == '\n' && chars[current].chr == '\r'))) |
12471
|
0
|
|
|
|
|
|
current++; |
12472
|
|
|
|
|
|
|
} |
12473
|
|
|
|
|
|
|
|
12474
|
28
|
100
|
|
|
|
|
if (line_start < line_end) |
12475
|
26
|
|
|
|
|
|
tokens.emplace_back(line_start, line_end - line_start); |
12476
|
|
|
|
|
|
|
else |
12477
|
|
|
|
|
|
|
break; |
12478
|
|
|
|
|
|
|
} |
12479
|
|
|
|
|
|
|
|
12480
|
2
|
|
|
|
|
|
return true; |
12481
|
|
|
|
|
|
|
} |
12482
|
|
|
|
|
|
|
|
12483
|
|
|
|
|
|
|
} // namespace morphodita |
12484
|
|
|
|
|
|
|
|
12485
|
|
|
|
|
|
|
///////// |
12486
|
|
|
|
|
|
|
// File: unilib/version.h |
12487
|
|
|
|
|
|
|
///////// |
12488
|
|
|
|
|
|
|
|
12489
|
|
|
|
|
|
|
// This file is part of UniLib . |
12490
|
|
|
|
|
|
|
// |
12491
|
|
|
|
|
|
|
// Copyright 2014 Institute of Formal and Applied Linguistics, Faculty of |
12492
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
12493
|
|
|
|
|
|
|
// |
12494
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
12495
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
12496
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
12497
|
|
|
|
|
|
|
// |
12498
|
|
|
|
|
|
|
// UniLib version: 3.3.0 |
12499
|
|
|
|
|
|
|
// Unicode version: 15.0.0 |
12500
|
|
|
|
|
|
|
|
12501
|
|
|
|
|
|
|
namespace unilib { |
12502
|
|
|
|
|
|
|
|
12503
|
0
|
|
|
|
|
|
struct version { |
12504
|
|
|
|
|
|
|
unsigned major; |
12505
|
|
|
|
|
|
|
unsigned minor; |
12506
|
|
|
|
|
|
|
unsigned patch; |
12507
|
|
|
|
|
|
|
std::string prerelease; |
12508
|
|
|
|
|
|
|
|
12509
|
|
|
|
|
|
|
// Returns current version. |
12510
|
|
|
|
|
|
|
static version current(); |
12511
|
|
|
|
|
|
|
}; |
12512
|
|
|
|
|
|
|
|
12513
|
|
|
|
|
|
|
} // namespace unilib |
12514
|
|
|
|
|
|
|
|
12515
|
|
|
|
|
|
|
///////// |
12516
|
|
|
|
|
|
|
// File: morphodita/version/version.h |
12517
|
|
|
|
|
|
|
///////// |
12518
|
|
|
|
|
|
|
|
12519
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
12520
|
|
|
|
|
|
|
// |
12521
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
12522
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
12523
|
|
|
|
|
|
|
// |
12524
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
12525
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
12526
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
12527
|
|
|
|
|
|
|
|
12528
|
|
|
|
|
|
|
namespace morphodita { |
12529
|
|
|
|
|
|
|
|
12530
|
0
|
|
|
|
|
|
class version { |
12531
|
|
|
|
|
|
|
public: |
12532
|
|
|
|
|
|
|
unsigned major; |
12533
|
|
|
|
|
|
|
unsigned minor; |
12534
|
|
|
|
|
|
|
unsigned patch; |
12535
|
|
|
|
|
|
|
string prerelease; |
12536
|
|
|
|
|
|
|
|
12537
|
|
|
|
|
|
|
// Returns current MorphoDiTa version. |
12538
|
|
|
|
|
|
|
static version current(); |
12539
|
|
|
|
|
|
|
|
12540
|
|
|
|
|
|
|
// Returns multi-line formated version and copyright string. |
12541
|
|
|
|
|
|
|
static string version_and_copyright(const string& other_libraries = string()); |
12542
|
|
|
|
|
|
|
}; |
12543
|
|
|
|
|
|
|
|
12544
|
|
|
|
|
|
|
} // namespace morphodita |
12545
|
|
|
|
|
|
|
|
12546
|
|
|
|
|
|
|
///////// |
12547
|
|
|
|
|
|
|
// File: morphodita/version/version.cpp |
12548
|
|
|
|
|
|
|
///////// |
12549
|
|
|
|
|
|
|
|
12550
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
12551
|
|
|
|
|
|
|
// |
12552
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
12553
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
12554
|
|
|
|
|
|
|
// |
12555
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
12556
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
12557
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
12558
|
|
|
|
|
|
|
|
12559
|
|
|
|
|
|
|
namespace morphodita { |
12560
|
|
|
|
|
|
|
|
12561
|
0
|
|
|
|
|
|
version version::current() { |
12562
|
0
|
0
|
|
|
|
|
return {1, 11, 1, ""}; |
|
|
0
|
|
|
|
|
|
12563
|
|
|
|
|
|
|
} |
12564
|
|
|
|
|
|
|
|
12565
|
|
|
|
|
|
|
// Returns multi-line formated version and copyright string. |
12566
|
0
|
|
|
|
|
|
string version::version_and_copyright(const string& other_libraries) { |
12567
|
0
|
|
|
|
|
|
ostringstream info; |
12568
|
|
|
|
|
|
|
|
12569
|
|
|
|
|
|
|
auto morphodita = version::current(); |
12570
|
|
|
|
|
|
|
auto unilib = unilib::version::current(); |
12571
|
|
|
|
|
|
|
|
12572
|
0
|
|
|
|
|
|
info << "MorphoDiTa version " << morphodita.major << '.' << morphodita.minor << '.' << morphodita.patch |
12573
|
0
|
0
|
|
|
|
|
<< (morphodita.prerelease.empty() ? "" : "-") << morphodita.prerelease |
|
|
0
|
|
|
|
|
|
12574
|
0
|
|
|
|
|
|
<< " (using UniLib " << unilib.major << '.' << unilib.minor << '.' << unilib.patch |
12575
|
0
|
0
|
|
|
|
|
<< (other_libraries.empty() ? "" : " and ") << other_libraries << ")\n" |
|
|
0
|
|
|
|
|
|
12576
|
|
|
|
|
|
|
"Copyright 2015 by Institute of Formal and Applied Linguistics, Faculty of\n" |
12577
|
0
|
0
|
|
|
|
|
"Mathematics and Physics, Charles University in Prague, Czech Republic."; |
12578
|
|
|
|
|
|
|
|
12579
|
0
|
|
|
|
|
|
return info.str(); |
12580
|
|
|
|
|
|
|
} |
12581
|
|
|
|
|
|
|
|
12582
|
|
|
|
|
|
|
} // namespace morphodita |
12583
|
|
|
|
|
|
|
|
12584
|
|
|
|
|
|
|
///////// |
12585
|
|
|
|
|
|
|
// File: bilou/bilou_entity.h |
12586
|
|
|
|
|
|
|
///////// |
12587
|
|
|
|
|
|
|
|
12588
|
|
|
|
|
|
|
// This file is part of NameTag . |
12589
|
|
|
|
|
|
|
// |
12590
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
12591
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
12592
|
|
|
|
|
|
|
// |
12593
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
12594
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
12595
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
12596
|
|
|
|
|
|
|
|
12597
|
|
|
|
|
|
|
class bilou_entity { |
12598
|
|
|
|
|
|
|
public: |
12599
|
|
|
|
|
|
|
typedef entity_type value; |
12600
|
|
|
|
|
|
|
|
12601
|
|
|
|
|
|
|
enum :value { I, L, O, B_first, U_first, unknown = ~0U }; |
12602
|
|
|
|
|
|
|
static constexpr inline value B(entity_type entity) { return entity == entity_type_unknown ? unknown : B_first + 2 * entity; } |
12603
|
|
|
|
|
|
|
static constexpr inline value U(entity_type entity) { return entity == entity_type_unknown ? unknown : U_first + 2 * entity; } |
12604
|
|
|
|
|
|
|
static constexpr inline value total(entity_type entities) { return B_first + 2 * entities; } |
12605
|
|
|
|
|
|
|
|
12606
|
|
|
|
|
|
|
static inline bilou_type get_bilou(value bilou_entity) { |
12607
|
126
|
|
|
|
|
|
switch (bilou_entity) { |
12608
|
|
|
|
|
|
|
case I: return bilou_type_I; |
12609
|
|
|
|
|
|
|
case L: return bilou_type_L; |
12610
|
|
|
|
|
|
|
case O: return bilou_type_O; |
12611
|
84
|
100
|
|
|
|
|
default: return (bilou_entity - B_first) & 1 ? bilou_type_U : bilou_type_B; |
12612
|
|
|
|
|
|
|
} |
12613
|
|
|
|
|
|
|
} |
12614
|
|
|
|
|
|
|
static inline entity_type get_entity(value bilou_entity) { |
12615
|
70
|
100
|
|
|
|
|
switch (bilou_entity) { |
12616
|
|
|
|
|
|
|
case I: |
12617
|
|
|
|
|
|
|
case L: |
12618
|
|
|
|
|
|
|
case O: return entity_type_unknown; |
12619
|
28
|
|
|
|
|
|
default: return (bilou_entity - B_first) >> 1; |
12620
|
|
|
|
|
|
|
} |
12621
|
|
|
|
|
|
|
} |
12622
|
|
|
|
|
|
|
static inline value from_bilou_entity(bilou_type bilou, entity_type entity) { |
12623
|
|
|
|
|
|
|
switch (bilou) { |
12624
|
|
|
|
|
|
|
case bilou_type_I: return I; |
12625
|
|
|
|
|
|
|
case bilou_type_L: return L; |
12626
|
|
|
|
|
|
|
case bilou_type_O: return O; |
12627
|
|
|
|
|
|
|
case bilou_type_B: return B(entity); |
12628
|
|
|
|
|
|
|
default: return U(entity); |
12629
|
|
|
|
|
|
|
} |
12630
|
|
|
|
|
|
|
} |
12631
|
|
|
|
|
|
|
}; |
12632
|
|
|
|
|
|
|
|
12633
|
|
|
|
|
|
|
///////// |
12634
|
|
|
|
|
|
|
// File: ner/ner_ids.h |
12635
|
|
|
|
|
|
|
///////// |
12636
|
|
|
|
|
|
|
|
12637
|
|
|
|
|
|
|
// This file is part of NameTag . |
12638
|
|
|
|
|
|
|
// |
12639
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
12640
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
12641
|
|
|
|
|
|
|
// |
12642
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
12643
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
12644
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
12645
|
|
|
|
|
|
|
|
12646
|
|
|
|
|
|
|
class ner_ids { |
12647
|
|
|
|
|
|
|
public: |
12648
|
|
|
|
|
|
|
enum ner_id { CZECH_NER = 0, ENGLISH_NER = 1, GENERIC_NER = 2 }; |
12649
|
|
|
|
|
|
|
|
12650
|
|
|
|
|
|
|
static bool parse(const string& str, ner_id& id) { |
12651
|
|
|
|
|
|
|
if (str == "czech") return id = CZECH_NER, true; |
12652
|
|
|
|
|
|
|
if (str == "english") return id = ENGLISH_NER, true; |
12653
|
|
|
|
|
|
|
if (str == "generic") return id = GENERIC_NER, true; |
12654
|
|
|
|
|
|
|
return false; |
12655
|
|
|
|
|
|
|
} |
12656
|
|
|
|
|
|
|
}; |
12657
|
|
|
|
|
|
|
|
12658
|
|
|
|
|
|
|
typedef ner_ids::ner_id ner_id; |
12659
|
|
|
|
|
|
|
|
12660
|
|
|
|
|
|
|
///////// |
12661
|
|
|
|
|
|
|
// File: ner/bilou_ner.h |
12662
|
|
|
|
|
|
|
///////// |
12663
|
|
|
|
|
|
|
|
12664
|
|
|
|
|
|
|
// This file is part of NameTag . |
12665
|
|
|
|
|
|
|
// |
12666
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
12667
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
12668
|
|
|
|
|
|
|
// |
12669
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
12670
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
12671
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
12672
|
|
|
|
|
|
|
|
12673
|
0
|
|
|
|
|
|
class bilou_ner : public ner { |
12674
|
|
|
|
|
|
|
public: |
12675
|
|
|
|
|
|
|
bilou_ner(ner_id id); |
12676
|
|
|
|
|
|
|
|
12677
|
|
|
|
|
|
|
bool load(istream& is); |
12678
|
|
|
|
|
|
|
|
12679
|
|
|
|
|
|
|
virtual void recognize(const vector& forms, vector& entities) const override; |
12680
|
|
|
|
|
|
|
virtual tokenizer* new_tokenizer() const override; |
12681
|
|
|
|
|
|
|
|
12682
|
|
|
|
|
|
|
virtual void entity_types(vector& types) const override; |
12683
|
|
|
|
|
|
|
|
12684
|
|
|
|
|
|
|
virtual void gazetteers(vector& gazetteers, vector* gazetteer_types) const override; |
12685
|
|
|
|
|
|
|
private: |
12686
|
|
|
|
|
|
|
friend class bilou_ner_trainer; |
12687
|
|
|
|
|
|
|
|
12688
|
|
|
|
|
|
|
// Methods used by bylou_ner_trainer |
12689
|
|
|
|
|
|
|
static void fill_bilou_probabilities(const vector& outcomes, bilou_probabilities& prob); |
12690
|
|
|
|
|
|
|
static tokenizer* new_tokenizer(ner_id id); |
12691
|
|
|
|
|
|
|
|
12692
|
|
|
|
|
|
|
// Internal members of bilou_ner |
12693
|
|
|
|
|
|
|
ner_id id; |
12694
|
|
|
|
|
|
|
unique_ptr tagger; |
12695
|
|
|
|
|
|
|
entity_map named_entities; |
12696
|
|
|
|
|
|
|
feature_templates templates; |
12697
|
|
|
|
|
|
|
vector networks; |
12698
|
|
|
|
|
|
|
|
12699
|
0
|
|
|
|
|
|
struct cache { |
12700
|
|
|
|
|
|
|
ner_sentence sentence; |
12701
|
|
|
|
|
|
|
vector outcomes, network_buffer; |
12702
|
|
|
|
|
|
|
string string_buffer; |
12703
|
|
|
|
|
|
|
vector entities_buffer; |
12704
|
|
|
|
|
|
|
}; |
12705
|
|
|
|
|
|
|
mutable threadsafe_stack caches; |
12706
|
|
|
|
|
|
|
}; |
12707
|
|
|
|
|
|
|
|
12708
|
|
|
|
|
|
|
///////// |
12709
|
|
|
|
|
|
|
// File: tokenizer/morphodita_tokenizer_wrapper.h |
12710
|
|
|
|
|
|
|
///////// |
12711
|
|
|
|
|
|
|
|
12712
|
|
|
|
|
|
|
// This file is part of NameTag . |
12713
|
|
|
|
|
|
|
// |
12714
|
|
|
|
|
|
|
// Copyright 2017 Institute of Formal and Applied Linguistics, Faculty of |
12715
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
12716
|
|
|
|
|
|
|
// |
12717
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
12718
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
12719
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
12720
|
|
|
|
|
|
|
|
12721
|
|
|
|
|
|
|
class morphodita_tokenizer_wrapper : public tokenizer { |
12722
|
|
|
|
|
|
|
public: |
12723
|
|
|
|
|
|
|
morphodita_tokenizer_wrapper(morphodita::tokenizer* morphodita_tokenizer) |
12724
|
4
|
|
|
|
|
|
: morphodita_tokenizer(morphodita_tokenizer) {} |
12725
|
8
|
|
|
|
|
|
virtual ~morphodita_tokenizer_wrapper() override {} |
12726
|
|
|
|
|
|
|
|
12727
|
|
|
|
|
|
|
virtual void set_text(string_piece text, bool make_copy = false) override; |
12728
|
|
|
|
|
|
|
virtual bool next_sentence(vector* forms, vector* tokens) override; |
12729
|
|
|
|
|
|
|
|
12730
|
|
|
|
|
|
|
private: |
12731
|
|
|
|
|
|
|
unique_ptr morphodita_tokenizer; |
12732
|
|
|
|
|
|
|
}; |
12733
|
|
|
|
|
|
|
|
12734
|
|
|
|
|
|
|
///////// |
12735
|
|
|
|
|
|
|
// File: ner/bilou_ner.cpp |
12736
|
|
|
|
|
|
|
///////// |
12737
|
|
|
|
|
|
|
|
12738
|
|
|
|
|
|
|
// This file is part of NameTag . |
12739
|
|
|
|
|
|
|
// |
12740
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
12741
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
12742
|
|
|
|
|
|
|
// |
12743
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
12744
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
12745
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
12746
|
|
|
|
|
|
|
|
12747
|
0
|
|
|
|
|
|
bilou_ner::bilou_ner(ner_id id) : id(id) {} |
12748
|
|
|
|
|
|
|
|
12749
|
1
|
|
|
|
|
|
bool bilou_ner::load(istream& is) { |
12750
|
2
|
50
|
|
|
|
|
if (tagger.reset(tagger::load_instance(is)), !tagger) return false; |
12751
|
1
|
50
|
|
|
|
|
if (!named_entities.load(is)) return false; |
12752
|
|
|
|
|
|
|
|
12753
|
1
|
|
|
|
|
|
unique_ptr tokenizer(new_tokenizer()); |
12754
|
1
|
50
|
|
|
|
|
if (!templates.load(is, nlp_pipeline(tokenizer.get(), tagger.get()))) return false; |
|
|
50
|
|
|
|
|
|
12755
|
|
|
|
|
|
|
|
12756
|
1
|
50
|
|
|
|
|
int stages = is.get(); |
12757
|
1
|
50
|
|
|
|
|
if (stages == EOF) return false; |
12758
|
1
|
50
|
|
|
|
|
networks.resize(stages); |
12759
|
3
|
100
|
|
|
|
|
for (auto&& network : networks) |
12760
|
2
|
50
|
|
|
|
|
if (!network.load(is)) return false; |
|
|
50
|
|
|
|
|
|
12761
|
|
|
|
|
|
|
|
12762
|
|
|
|
|
|
|
return true; |
12763
|
|
|
|
|
|
|
} |
12764
|
|
|
|
|
|
|
|
12765
|
2
|
|
|
|
|
|
void bilou_ner::recognize(const vector& forms, vector& entities) const { |
12766
|
2
|
|
|
|
|
|
entities.clear(); |
12767
|
2
|
50
|
|
|
|
|
if (forms.empty() || !tagger || !named_entities.size() || !networks.size()) return; |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
12768
|
|
|
|
|
|
|
|
12769
|
|
|
|
|
|
|
// Acquire cache |
12770
|
2
|
|
|
|
|
|
cache* c = caches.pop(); |
12771
|
2
|
100
|
|
|
|
|
if (!c) c = new cache(); |
12772
|
2
|
|
|
|
|
|
auto& sentence = c->sentence; |
12773
|
|
|
|
|
|
|
|
12774
|
|
|
|
|
|
|
// Tag |
12775
|
2
|
|
|
|
|
|
tagger->tag(forms, sentence); |
12776
|
|
|
|
|
|
|
|
12777
|
2
|
50
|
|
|
|
|
if (sentence.size) { |
12778
|
|
|
|
|
|
|
sentence.clear_previous_stage(); |
12779
|
|
|
|
|
|
|
|
12780
|
|
|
|
|
|
|
// Perform required NER stages |
12781
|
6
|
100
|
|
|
|
|
for (auto&& network : networks) { |
12782
|
|
|
|
|
|
|
sentence.clear_features(); |
12783
|
|
|
|
|
|
|
sentence.clear_probabilities_local_filled(); |
12784
|
|
|
|
|
|
|
|
12785
|
|
|
|
|
|
|
// Compute per-sentence feature templates |
12786
|
4
|
|
|
|
|
|
templates.process_sentence(sentence, c->string_buffer); |
12787
|
|
|
|
|
|
|
|
12788
|
|
|
|
|
|
|
// Sequentially classify sentence words |
12789
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
12790
|
14
|
50
|
|
|
|
|
if (!sentence.probabilities[i].local_filled) { |
12791
|
14
|
|
|
|
|
|
network.classify(sentence.features[i], c->outcomes, c->network_buffer); |
12792
|
14
|
|
|
|
|
|
fill_bilou_probabilities(c->outcomes, sentence.probabilities[i].local); |
12793
|
14
|
|
|
|
|
|
sentence.probabilities[i].local_filled = true; |
12794
|
|
|
|
|
|
|
} |
12795
|
|
|
|
|
|
|
|
12796
|
14
|
100
|
|
|
|
|
if (i == 0) { |
12797
|
4
|
|
|
|
|
|
sentence.probabilities[i].global.init(sentence.probabilities[i].local); |
12798
|
|
|
|
|
|
|
} else { |
12799
|
10
|
|
|
|
|
|
sentence.probabilities[i].global.update(sentence.probabilities[i].local, sentence.probabilities[i - 1].global); |
12800
|
|
|
|
|
|
|
} |
12801
|
|
|
|
|
|
|
} |
12802
|
|
|
|
|
|
|
|
12803
|
4
|
|
|
|
|
|
sentence.compute_best_decoding(); |
12804
|
4
|
|
|
|
|
|
sentence.fill_previous_stage(); |
12805
|
|
|
|
|
|
|
} |
12806
|
|
|
|
|
|
|
|
12807
|
|
|
|
|
|
|
// Store entities in the output array |
12808
|
9
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) |
12809
|
7
|
100
|
|
|
|
|
if (sentence.probabilities[i].global.best == bilou_type_U) { |
12810
|
3
|
|
|
|
|
|
entities.emplace_back(i, 1, named_entities.name(sentence.probabilities[i].global.bilou[bilou_type_U].entity)); |
12811
|
4
|
50
|
|
|
|
|
} else if (sentence.probabilities[i].global.best == bilou_type_B) { |
12812
|
0
|
|
|
|
|
|
unsigned start = i++; |
12813
|
0
|
0
|
|
|
|
|
while (i < sentence.size && sentence.probabilities[i].global.best != bilou_type_L) i++; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
12814
|
0
|
|
|
|
|
|
entities.emplace_back(start, i - start + (i < sentence.size), named_entities.name(sentence.probabilities[start].global.bilou[bilou_type_B].entity)); |
12815
|
|
|
|
|
|
|
} |
12816
|
|
|
|
|
|
|
|
12817
|
|
|
|
|
|
|
// Process the entities |
12818
|
2
|
|
|
|
|
|
templates.process_entities(sentence, entities, c->entities_buffer); |
12819
|
|
|
|
|
|
|
} |
12820
|
|
|
|
|
|
|
|
12821
|
2
|
|
|
|
|
|
caches.push(c); |
12822
|
|
|
|
|
|
|
} |
12823
|
|
|
|
|
|
|
|
12824
|
3
|
|
|
|
|
|
tokenizer* bilou_ner::new_tokenizer() const { |
12825
|
3
|
|
|
|
|
|
return new_tokenizer(id); |
12826
|
|
|
|
|
|
|
} |
12827
|
|
|
|
|
|
|
|
12828
|
0
|
|
|
|
|
|
void bilou_ner::entity_types(vector& types) const { |
12829
|
0
|
|
|
|
|
|
types.resize(named_entities.size()); |
12830
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < types.size(); i++) |
12831
|
|
|
|
|
|
|
types[i] = named_entities.name(i); |
12832
|
0
|
|
|
|
|
|
} |
12833
|
|
|
|
|
|
|
|
12834
|
0
|
|
|
|
|
|
void bilou_ner::gazetteers(vector& gazetteers, vector* gazetteer_types) const { |
12835
|
0
|
|
|
|
|
|
gazetteers.clear(); |
12836
|
0
|
0
|
|
|
|
|
if (gazetteer_types) gazetteer_types->clear(); |
12837
|
|
|
|
|
|
|
|
12838
|
|
|
|
|
|
|
templates.gazetteers(gazetteers, gazetteer_types); |
12839
|
0
|
|
|
|
|
|
} |
12840
|
|
|
|
|
|
|
|
12841
|
14
|
|
|
|
|
|
void bilou_ner::fill_bilou_probabilities(const vector& outcomes, bilou_probabilities& prob) { |
12842
|
84
|
100
|
|
|
|
|
for (auto&& prob_bilou : prob.bilou) |
12843
|
70
|
|
|
|
|
|
prob_bilou.probability = -1; |
12844
|
|
|
|
|
|
|
|
12845
|
140
|
100
|
|
|
|
|
for (bilou_entity::value i = 0; i < outcomes.size(); i++) { |
12846
|
|
|
|
|
|
|
auto bilou = bilou_entity::get_bilou(i); |
12847
|
126
|
100
|
|
|
|
|
if (outcomes[i] > prob.bilou[bilou].probability) { |
12848
|
70
|
|
|
|
|
|
prob.bilou[bilou].probability = outcomes[i]; |
12849
|
70
|
|
|
|
|
|
prob.bilou[bilou].entity = bilou_entity::get_entity(i); |
12850
|
|
|
|
|
|
|
} |
12851
|
|
|
|
|
|
|
} |
12852
|
14
|
|
|
|
|
|
} |
12853
|
|
|
|
|
|
|
|
12854
|
3
|
|
|
|
|
|
tokenizer* bilou_ner::new_tokenizer(ner_id id) { |
12855
|
3
|
|
|
|
|
|
switch (id) { |
12856
|
|
|
|
|
|
|
case ner_id::CZECH_NER: |
12857
|
0
|
|
|
|
|
|
return new morphodita_tokenizer_wrapper(morphodita::tokenizer::new_czech_tokenizer()); |
12858
|
|
|
|
|
|
|
case ner_id::ENGLISH_NER: |
12859
|
0
|
|
|
|
|
|
return new morphodita_tokenizer_wrapper(morphodita::tokenizer::new_english_tokenizer()); |
12860
|
|
|
|
|
|
|
case ner_id::GENERIC_NER: |
12861
|
3
|
|
|
|
|
|
return new morphodita_tokenizer_wrapper(morphodita::tokenizer::new_generic_tokenizer()); |
12862
|
|
|
|
|
|
|
} |
12863
|
|
|
|
|
|
|
|
12864
|
|
|
|
|
|
|
return nullptr; |
12865
|
|
|
|
|
|
|
} |
12866
|
|
|
|
|
|
|
|
12867
|
|
|
|
|
|
|
///////// |
12868
|
|
|
|
|
|
|
// File: ner/entity_map.cpp |
12869
|
|
|
|
|
|
|
///////// |
12870
|
|
|
|
|
|
|
|
12871
|
|
|
|
|
|
|
// This file is part of NameTag . |
12872
|
|
|
|
|
|
|
// |
12873
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
12874
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
12875
|
|
|
|
|
|
|
// |
12876
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
12877
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
12878
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
12879
|
|
|
|
|
|
|
|
12880
|
0
|
|
|
|
|
|
entity_type entity_map::parse(const char* str, bool add_entity) const { |
12881
|
0
|
|
|
|
|
|
auto it = str2id.find(str); |
12882
|
0
|
0
|
|
|
|
|
if (it == str2id.end() && add_entity) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
12883
|
0
|
|
|
|
|
|
it = str2id.emplace(str, (int)id2str.size()).first; |
12884
|
0
|
|
|
|
|
|
id2str.emplace_back(str); |
12885
|
|
|
|
|
|
|
} |
12886
|
0
|
0
|
|
|
|
|
return it == str2id.end() ? entity_type_unknown : it->second; |
12887
|
|
|
|
|
|
|
} |
12888
|
|
|
|
|
|
|
|
12889
|
0
|
|
|
|
|
|
const string& entity_map::name(entity_type entity) const { |
12890
|
3
|
0
|
|
|
|
|
return entity < id2str.size() ? id2str[entity] : empty; |
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
12891
|
|
|
|
|
|
|
} |
12892
|
|
|
|
|
|
|
|
12893
|
1
|
|
|
|
|
|
bool entity_map::load(istream& is) { |
12894
|
|
|
|
|
|
|
binary_decoder data; |
12895
|
1
|
50
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
50
|
|
|
|
|
|
12896
|
|
|
|
|
|
|
|
12897
|
|
|
|
|
|
|
try { |
12898
|
|
|
|
|
|
|
str2id.clear(); |
12899
|
1
|
50
|
|
|
|
|
id2str.resize(data.next_4B()); |
|
|
50
|
|
|
|
|
|
12900
|
4
|
100
|
|
|
|
|
for (unsigned i = 0; i < id2str.size(); i++) { |
12901
|
3
|
50
|
|
|
|
|
data.next_str(id2str[i]); |
12902
|
3
|
|
|
|
|
|
str2id.emplace(id2str[i], i); |
12903
|
|
0
|
|
|
|
|
} |
12904
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
12905
|
|
|
|
|
|
|
return false; |
12906
|
|
|
|
|
|
|
} |
12907
|
|
|
|
|
|
|
|
12908
|
1
|
|
|
|
|
|
return data.is_end(); |
12909
|
|
|
|
|
|
|
} |
12910
|
|
|
|
|
|
|
|
12911
|
0
|
|
|
|
|
|
entity_type entity_map::size() const { |
12912
|
2
|
|
|
|
|
|
return id2str.size(); |
12913
|
|
|
|
|
|
|
} |
12914
|
|
|
|
|
|
|
|
12915
|
|
|
|
|
|
|
///////// |
12916
|
|
|
|
|
|
|
// File: ner/ner.cpp |
12917
|
|
|
|
|
|
|
///////// |
12918
|
|
|
|
|
|
|
|
12919
|
|
|
|
|
|
|
// This file is part of NameTag . |
12920
|
|
|
|
|
|
|
// |
12921
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
12922
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
12923
|
|
|
|
|
|
|
// |
12924
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
12925
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
12926
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
12927
|
|
|
|
|
|
|
|
12928
|
1
|
|
|
|
|
|
ner* ner::load(istream& is) { |
12929
|
1
|
|
|
|
|
|
ner_id id = ner_id(is.get()); |
12930
|
1
|
50
|
|
|
|
|
switch (id) { |
12931
|
|
|
|
|
|
|
case ner_ids::CZECH_NER: |
12932
|
|
|
|
|
|
|
case ner_ids::ENGLISH_NER: |
12933
|
|
|
|
|
|
|
case ner_ids::GENERIC_NER: |
12934
|
|
|
|
|
|
|
{ |
12935
|
1
|
|
|
|
|
|
unique_ptr res(new bilou_ner(id)); |
12936
|
1
|
50
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
50
|
|
|
|
|
|
12937
|
|
|
|
|
|
|
break; |
12938
|
|
|
|
|
|
|
} |
12939
|
|
|
|
|
|
|
} |
12940
|
|
|
|
|
|
|
|
12941
|
|
|
|
|
|
|
return nullptr; |
12942
|
|
|
|
|
|
|
} |
12943
|
|
|
|
|
|
|
|
12944
|
1
|
|
|
|
|
|
ner* ner::load(const char* fname) { |
12945
|
2
|
50
|
|
|
|
|
ifstream in(path_from_utf8(fname).c_str(), ifstream::in | ifstream::binary); |
12946
|
1
|
50
|
|
|
|
|
if (!in.is_open()) return nullptr; |
12947
|
|
|
|
|
|
|
|
12948
|
1
|
50
|
|
|
|
|
return load(in); |
12949
|
|
|
|
|
|
|
} |
12950
|
|
|
|
|
|
|
|
12951
|
|
|
|
|
|
|
///////// |
12952
|
|
|
|
|
|
|
// File: tagger/external_tagger.h |
12953
|
|
|
|
|
|
|
///////// |
12954
|
|
|
|
|
|
|
|
12955
|
|
|
|
|
|
|
// This file is part of NameTag . |
12956
|
|
|
|
|
|
|
// |
12957
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
12958
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
12959
|
|
|
|
|
|
|
// |
12960
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
12961
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
12962
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
12963
|
|
|
|
|
|
|
|
12964
|
0
|
|
|
|
|
|
class external_tagger : public tagger { |
12965
|
|
|
|
|
|
|
public: |
12966
|
|
|
|
|
|
|
virtual bool load(istream& is) override; |
12967
|
|
|
|
|
|
|
virtual bool create_and_encode(const string& params, ostream& os) override; |
12968
|
|
|
|
|
|
|
virtual void tag(const vector& forms, ner_sentence& sentence) const override; |
12969
|
|
|
|
|
|
|
}; |
12970
|
|
|
|
|
|
|
|
12971
|
|
|
|
|
|
|
///////// |
12972
|
|
|
|
|
|
|
// File: tagger/external_tagger.cpp |
12973
|
|
|
|
|
|
|
///////// |
12974
|
|
|
|
|
|
|
|
12975
|
|
|
|
|
|
|
// This file is part of NameTag . |
12976
|
|
|
|
|
|
|
// |
12977
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
12978
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
12979
|
|
|
|
|
|
|
// |
12980
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
12981
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
12982
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
12983
|
|
|
|
|
|
|
|
12984
|
|
|
|
|
|
|
inline static size_t strnchrpos(const char* str, char c, size_t len) { |
12985
|
|
|
|
|
|
|
size_t pos = 0; |
12986
|
0
|
0
|
|
|
|
|
for (; len--; str++, pos++) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
12987
|
0
|
0
|
|
|
|
|
if (*str == c) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
12988
|
|
|
|
|
|
|
return pos; |
12989
|
|
|
|
|
|
|
|
12990
|
|
|
|
|
|
|
return pos; |
12991
|
|
|
|
|
|
|
} |
12992
|
|
|
|
|
|
|
|
12993
|
0
|
|
|
|
|
|
bool external_tagger::load(istream& /*is*/) { |
12994
|
0
|
|
|
|
|
|
return true; |
12995
|
|
|
|
|
|
|
} |
12996
|
|
|
|
|
|
|
|
12997
|
0
|
|
|
|
|
|
bool external_tagger::create_and_encode(const string& /*params*/, ostream& /*os*/) { |
12998
|
0
|
|
|
|
|
|
return true; |
12999
|
|
|
|
|
|
|
} |
13000
|
|
|
|
|
|
|
|
13001
|
0
|
|
|
|
|
|
void external_tagger::tag(const vector& forms, ner_sentence& sentence) const { |
13002
|
0
|
|
|
|
|
|
sentence.resize(forms.size()); |
13003
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < forms.size(); i++) { |
13004
|
0
|
|
|
|
|
|
string_piece form = forms[i]; |
13005
|
|
|
|
|
|
|
|
13006
|
|
|
|
|
|
|
size_t space = strnchrpos(form.str, ' ', form.len); |
13007
|
0
|
0
|
|
|
|
|
if (space < form.len) { |
13008
|
0
|
|
|
|
|
|
sentence.words[i].form.assign(form.str, space); |
13009
|
0
|
|
|
|
|
|
form.len -= space + 1; |
13010
|
0
|
|
|
|
|
|
form.str += space + 1; |
13011
|
|
|
|
|
|
|
|
13012
|
|
|
|
|
|
|
space = strnchrpos(form.str, ' ', form.len); |
13013
|
0
|
0
|
|
|
|
|
if (space < form.len) { |
13014
|
0
|
|
|
|
|
|
sentence.words[i].raw_lemma.assign(form.str, space); |
13015
|
0
|
|
|
|
|
|
form.len -= space + 1; |
13016
|
0
|
|
|
|
|
|
form.str += space + 1; |
13017
|
|
|
|
|
|
|
|
13018
|
0
|
|
|
|
|
|
sentence.words[i].tag.assign(form.str, strnchrpos(form.str, ' ', form.len)); |
13019
|
|
|
|
|
|
|
} else { |
13020
|
0
|
|
|
|
|
|
sentence.words[i].raw_lemma.assign(form.str, form.len); |
13021
|
|
|
|
|
|
|
sentence.words[i].tag.clear(); |
13022
|
|
|
|
|
|
|
} |
13023
|
|
|
|
|
|
|
} else { |
13024
|
0
|
|
|
|
|
|
sentence.words[i].form.assign(form.str, form.len); |
13025
|
0
|
|
|
|
|
|
sentence.words[i].raw_lemma = sentence.words[i].form; |
13026
|
|
|
|
|
|
|
sentence.words[i].tag.clear(); |
13027
|
|
|
|
|
|
|
} |
13028
|
0
|
|
|
|
|
|
sentence.words[i].raw_lemmas_all.assign(1, sentence.words[i].raw_lemma); |
13029
|
0
|
|
|
|
|
|
sentence.words[i].lemma_id = sentence.words[i].raw_lemma; |
13030
|
|
|
|
|
|
|
sentence.words[i].lemma_comments.clear(); |
13031
|
|
|
|
|
|
|
} |
13032
|
0
|
|
|
|
|
|
} |
13033
|
|
|
|
|
|
|
|
13034
|
|
|
|
|
|
|
///////// |
13035
|
|
|
|
|
|
|
// File: tagger/morphodita_tagger.h |
13036
|
|
|
|
|
|
|
///////// |
13037
|
|
|
|
|
|
|
|
13038
|
|
|
|
|
|
|
// This file is part of NameTag . |
13039
|
|
|
|
|
|
|
// |
13040
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
13041
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
13042
|
|
|
|
|
|
|
// |
13043
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
13044
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
13045
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
13046
|
|
|
|
|
|
|
|
13047
|
0
|
|
|
|
|
|
class morphodita_tagger : public tagger { |
13048
|
|
|
|
|
|
|
public: |
13049
|
|
|
|
|
|
|
virtual void tag(const vector& forms, ner_sentence& sentence) const override; |
13050
|
|
|
|
|
|
|
|
13051
|
|
|
|
|
|
|
protected: |
13052
|
|
|
|
|
|
|
virtual bool load(istream& is) override; |
13053
|
|
|
|
|
|
|
virtual bool create_and_encode(const string& params, ostream& os) override; |
13054
|
|
|
|
|
|
|
|
13055
|
|
|
|
|
|
|
private: |
13056
|
|
|
|
|
|
|
unique_ptr tagger; |
13057
|
|
|
|
|
|
|
const morphodita::morpho* morpho; |
13058
|
|
|
|
|
|
|
|
13059
|
0
|
|
|
|
|
|
struct cache { |
13060
|
|
|
|
|
|
|
vector tags, analyses; |
13061
|
|
|
|
|
|
|
string lemma_cased; |
13062
|
|
|
|
|
|
|
}; |
13063
|
|
|
|
|
|
|
mutable threadsafe_stack caches; |
13064
|
|
|
|
|
|
|
}; |
13065
|
|
|
|
|
|
|
|
13066
|
|
|
|
|
|
|
///////// |
13067
|
|
|
|
|
|
|
// File: tagger/morphodita_tagger.cpp |
13068
|
|
|
|
|
|
|
///////// |
13069
|
|
|
|
|
|
|
|
13070
|
|
|
|
|
|
|
// This file is part of NameTag . |
13071
|
|
|
|
|
|
|
// |
13072
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
13073
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
13074
|
|
|
|
|
|
|
// |
13075
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
13076
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
13077
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
13078
|
|
|
|
|
|
|
|
13079
|
1
|
|
|
|
|
|
bool morphodita_tagger::load(istream& is) { |
13080
|
1
|
|
|
|
|
|
tagger.reset(morphodita::tagger::load(is)); |
13081
|
1
|
50
|
|
|
|
|
morpho = tagger ? tagger->get_morpho() : nullptr; |
13082
|
1
|
50
|
|
|
|
|
return tagger && morpho; |
|
|
50
|
|
|
|
|
|
13083
|
|
|
|
|
|
|
} |
13084
|
|
|
|
|
|
|
|
13085
|
0
|
|
|
|
|
|
bool morphodita_tagger::create_and_encode(const string& params, ostream& os) { |
13086
|
0
|
0
|
|
|
|
|
if (params.empty()) return cerr << "Missing tagger_file argument to morphodita_tagger!" << endl, false; |
13087
|
|
|
|
|
|
|
|
13088
|
0
|
|
|
|
|
|
ifstream in(path_from_utf8(params).c_str(), ifstream::in | ifstream::binary); |
13089
|
0
|
0
|
|
|
|
|
if (!in.is_open()) return cerr << "Cannot open morphodita tagger file '" << params << "'!" << endl, false; |
13090
|
0
|
0
|
|
|
|
|
if (!load(in)) return cerr << "Cannot load morphodita tagger from file '" << params << "'!" << endl, false; |
|
|
0
|
|
|
|
|
|
13091
|
|
|
|
|
|
|
|
13092
|
0
|
0
|
|
|
|
|
if (!in.seekg(0, ifstream::beg)) return cerr << "Cannot seek in morphodita tagger file '" << params << "'!" << endl, false; |
|
|
0
|
|
|
|
|
|
13093
|
0
|
0
|
|
|
|
|
os << in.rdbuf(); |
13094
|
|
|
|
|
|
|
|
13095
|
0
|
|
|
|
|
|
return bool(os); |
13096
|
|
|
|
|
|
|
} |
13097
|
|
|
|
|
|
|
|
13098
|
2
|
|
|
|
|
|
void morphodita_tagger::tag(const vector& forms, ner_sentence& sentence) const { |
13099
|
2
|
|
|
|
|
|
sentence.resize(0); |
13100
|
2
|
50
|
|
|
|
|
if (!tagger || !morpho) return; |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
13101
|
|
|
|
|
|
|
|
13102
|
|
|
|
|
|
|
// Obtain a cache |
13103
|
2
|
|
|
|
|
|
cache* c = caches.pop(); |
13104
|
2
|
100
|
|
|
|
|
if (!c) c = new cache(); |
13105
|
|
|
|
|
|
|
|
13106
|
|
|
|
|
|
|
// Tag |
13107
|
2
|
|
|
|
|
|
tagger->tag(forms, c->tags); |
13108
|
|
|
|
|
|
|
|
13109
|
|
|
|
|
|
|
// Fill sentence |
13110
|
2
|
50
|
|
|
|
|
if (c->tags.size() >= forms.size()) { |
13111
|
2
|
|
|
|
|
|
sentence.resize(forms.size()); |
13112
|
9
|
100
|
|
|
|
|
for (unsigned i = 0; i < forms.size(); i++) { |
13113
|
7
|
|
|
|
|
|
sentence.words[i].form.assign(forms[i].str, morpho->raw_form_len(forms[i])); |
13114
|
|
|
|
|
|
|
|
13115
|
7
|
|
|
|
|
|
const string& lemma = c->tags[i].lemma; |
13116
|
|
|
|
|
|
|
|
13117
|
7
|
|
|
|
|
|
unsigned raw_lemma_len = morpho->raw_lemma_len(lemma); |
13118
|
7
|
|
|
|
|
|
sentence.words[i].raw_lemma.assign(lemma, 0, raw_lemma_len); |
13119
|
|
|
|
|
|
|
|
13120
|
7
|
|
|
|
|
|
morpho->analyze(forms[i], morphodita::morpho::GUESSER, c->analyses); |
13121
|
7
|
|
|
|
|
|
sentence.words[i].raw_lemmas_all.clear(); |
13122
|
18
|
100
|
|
|
|
|
for (auto&& analysis : c->analyses) |
13123
|
11
|
50
|
|
|
|
|
sentence.words[i].raw_lemmas_all.emplace_back(analysis.lemma, 0, morpho->raw_lemma_len(analysis.lemma)); |
|
|
50
|
|
|
|
|
|
13124
|
|
|
|
|
|
|
sort(sentence.words[i].raw_lemmas_all.begin(), sentence.words[i].raw_lemmas_all.end()); |
13125
|
|
|
|
|
|
|
sentence.words[i].raw_lemmas_all.erase(unique(sentence.words[i].raw_lemmas_all.begin(), sentence.words[i].raw_lemmas_all.end()), |
13126
|
7
|
|
|
|
|
|
sentence.words[i].raw_lemmas_all.end()); |
13127
|
|
|
|
|
|
|
|
13128
|
7
|
|
|
|
|
|
unsigned lemma_id_len = morpho->lemma_id_len(lemma); |
13129
|
7
|
|
|
|
|
|
sentence.words[i].lemma_id.assign(sentence.words[i].raw_lemma).append(lemma, raw_lemma_len, lemma_id_len - raw_lemma_len); |
13130
|
7
|
|
|
|
|
|
sentence.words[i].lemma_comments.assign(lemma, lemma_id_len, string::npos); |
13131
|
14
|
|
|
|
|
|
sentence.words[i].tag = c->tags[i].tag; |
13132
|
|
|
|
|
|
|
} |
13133
|
|
|
|
|
|
|
} |
13134
|
|
|
|
|
|
|
|
13135
|
2
|
|
|
|
|
|
caches.push(c); |
13136
|
|
|
|
|
|
|
} |
13137
|
|
|
|
|
|
|
|
13138
|
|
|
|
|
|
|
///////// |
13139
|
|
|
|
|
|
|
// File: tagger/trivial_tagger.h |
13140
|
|
|
|
|
|
|
///////// |
13141
|
|
|
|
|
|
|
|
13142
|
|
|
|
|
|
|
// This file is part of NameTag . |
13143
|
|
|
|
|
|
|
// |
13144
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
13145
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
13146
|
|
|
|
|
|
|
// |
13147
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
13148
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
13149
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
13150
|
|
|
|
|
|
|
|
13151
|
0
|
|
|
|
|
|
class trivial_tagger : public tagger { |
13152
|
|
|
|
|
|
|
public: |
13153
|
|
|
|
|
|
|
virtual bool load(istream& is) override; |
13154
|
|
|
|
|
|
|
virtual bool create_and_encode(const string& params, ostream& os) override; |
13155
|
|
|
|
|
|
|
virtual void tag(const vector& forms, ner_sentence& sentence) const override; |
13156
|
|
|
|
|
|
|
}; |
13157
|
|
|
|
|
|
|
|
13158
|
|
|
|
|
|
|
///////// |
13159
|
|
|
|
|
|
|
// File: tagger/tagger.cpp |
13160
|
|
|
|
|
|
|
///////// |
13161
|
|
|
|
|
|
|
|
13162
|
|
|
|
|
|
|
// This file is part of NameTag . |
13163
|
|
|
|
|
|
|
// |
13164
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
13165
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
13166
|
|
|
|
|
|
|
// |
13167
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
13168
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
13169
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
13170
|
|
|
|
|
|
|
|
13171
|
1
|
|
|
|
|
|
tagger* tagger::load_instance(istream& is) { |
13172
|
1
|
|
|
|
|
|
unique_ptr res(create(tagger_id(is.get()))); |
13173
|
|
|
|
|
|
|
|
13174
|
1
|
50
|
|
|
|
|
if (!res) return nullptr; |
13175
|
1
|
50
|
|
|
|
|
if (!res->load(is)) return nullptr; |
|
|
50
|
|
|
|
|
|
13176
|
|
|
|
|
|
|
|
13177
|
1
|
|
|
|
|
|
return res.release(); |
13178
|
|
|
|
|
|
|
} |
13179
|
|
|
|
|
|
|
|
13180
|
0
|
|
|
|
|
|
tagger* tagger::create_and_encode_instance(const string& tagger_id_and_params, ostream& os) { |
13181
|
|
|
|
|
|
|
string tagger_id, params; |
13182
|
|
|
|
|
|
|
|
13183
|
|
|
|
|
|
|
// Split the id and params using optional : |
13184
|
0
|
|
|
|
|
|
auto colon = tagger_id_and_params.find(':'); |
13185
|
0
|
0
|
|
|
|
|
if (colon == string::npos) { |
13186
|
|
|
|
|
|
|
tagger_id = tagger_id_and_params; |
13187
|
|
|
|
|
|
|
} else { |
13188
|
0
|
0
|
|
|
|
|
tagger_id = tagger_id_and_params.substr(0, colon); |
13189
|
0
|
0
|
|
|
|
|
params = tagger_id_and_params.substr(colon + 1); |
13190
|
|
|
|
|
|
|
} |
13191
|
|
|
|
|
|
|
|
13192
|
|
|
|
|
|
|
// Parse tagger_id |
13193
|
|
|
|
|
|
|
tagger_ids::tagger_id id; |
13194
|
0
|
0
|
|
|
|
|
if (!tagger_ids::parse(tagger_id, id)) return cerr << "Unknown tagger_id '" << tagger_id << "'!" << endl, nullptr; |
|
|
0
|
|
|
|
|
|
13195
|
|
|
|
|
|
|
|
13196
|
|
|
|
|
|
|
// Create instance |
13197
|
0
|
0
|
|
|
|
|
unique_ptr res(create(id)); |
13198
|
0
|
0
|
|
|
|
|
if (!res) return cerr << "Cannot create instance for tagger_id '" << tagger_id << "'!" << endl, nullptr; |
13199
|
|
|
|
|
|
|
|
13200
|
|
|
|
|
|
|
// Load and encode the tagger |
13201
|
0
|
0
|
|
|
|
|
os.put(id); |
13202
|
0
|
0
|
|
|
|
|
if (!res->create_and_encode(params, os)) return cerr << "Cannot encode instance of tagger_id '" << tagger_id << "'!" << endl, nullptr; |
|
|
0
|
|
|
|
|
|
13203
|
|
|
|
|
|
|
|
13204
|
|
|
|
|
|
|
return res.release(); |
13205
|
|
|
|
|
|
|
} |
13206
|
|
|
|
|
|
|
|
13207
|
1
|
|
|
|
|
|
tagger* tagger::create(tagger_id id) { |
13208
|
1
|
|
|
|
|
|
switch (id) { |
13209
|
|
|
|
|
|
|
case tagger_ids::TRIVIAL: |
13210
|
0
|
|
|
|
|
|
return new trivial_tagger(); |
13211
|
|
|
|
|
|
|
case tagger_ids::EXTERNAL: |
13212
|
0
|
|
|
|
|
|
return new external_tagger(); |
13213
|
|
|
|
|
|
|
case tagger_ids::MORPHODITA: |
13214
|
1
|
|
|
|
|
|
return new morphodita_tagger(); |
13215
|
|
|
|
|
|
|
} |
13216
|
|
|
|
|
|
|
|
13217
|
|
|
|
|
|
|
return nullptr; |
13218
|
|
|
|
|
|
|
} |
13219
|
|
|
|
|
|
|
|
13220
|
|
|
|
|
|
|
///////// |
13221
|
|
|
|
|
|
|
// File: tagger/trivial_tagger.cpp |
13222
|
|
|
|
|
|
|
///////// |
13223
|
|
|
|
|
|
|
|
13224
|
|
|
|
|
|
|
// This file is part of NameTag . |
13225
|
|
|
|
|
|
|
// |
13226
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
13227
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
13228
|
|
|
|
|
|
|
// |
13229
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
13230
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
13231
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
13232
|
|
|
|
|
|
|
|
13233
|
0
|
|
|
|
|
|
bool trivial_tagger::load(istream& /*is*/) { |
13234
|
0
|
|
|
|
|
|
return true; |
13235
|
|
|
|
|
|
|
} |
13236
|
|
|
|
|
|
|
|
13237
|
0
|
|
|
|
|
|
bool trivial_tagger::create_and_encode(const string& /*params*/, ostream& /*os*/) { |
13238
|
0
|
|
|
|
|
|
return true; |
13239
|
|
|
|
|
|
|
} |
13240
|
|
|
|
|
|
|
|
13241
|
0
|
|
|
|
|
|
void trivial_tagger::tag(const vector& forms, ner_sentence& sentence) const { |
13242
|
0
|
|
|
|
|
|
sentence.resize(forms.size()); |
13243
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < forms.size(); i++) { |
13244
|
0
|
|
|
|
|
|
sentence.words[i].form.assign(forms[i].str, forms[i].len); |
13245
|
0
|
|
|
|
|
|
sentence.words[i].raw_lemma = sentence.words[i].form; |
13246
|
0
|
|
|
|
|
|
sentence.words[i].raw_lemmas_all.assign(1, sentence.words[i].raw_lemma); |
13247
|
0
|
|
|
|
|
|
sentence.words[i].lemma_id = sentence.words[i].form; |
13248
|
|
|
|
|
|
|
sentence.words[i].lemma_comments.clear(); |
13249
|
|
|
|
|
|
|
sentence.words[i].tag.clear(); |
13250
|
|
|
|
|
|
|
} |
13251
|
0
|
|
|
|
|
|
} |
13252
|
|
|
|
|
|
|
|
13253
|
|
|
|
|
|
|
///////// |
13254
|
|
|
|
|
|
|
// File: tokenizer/morphodita_tokenizer_wrapper.cpp |
13255
|
|
|
|
|
|
|
///////// |
13256
|
|
|
|
|
|
|
|
13257
|
|
|
|
|
|
|
// This file is part of NameTag . |
13258
|
|
|
|
|
|
|
// |
13259
|
|
|
|
|
|
|
// Copyright 2017 Institute of Formal and Applied Linguistics, Faculty of |
13260
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
13261
|
|
|
|
|
|
|
// |
13262
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
13263
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
13264
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
13265
|
|
|
|
|
|
|
|
13266
|
3
|
|
|
|
|
|
void morphodita_tokenizer_wrapper::set_text(string_piece text, bool make_copy) { |
13267
|
3
|
|
|
|
|
|
morphodita_tokenizer->set_text(text, make_copy); |
13268
|
3
|
|
|
|
|
|
} |
13269
|
|
|
|
|
|
|
|
13270
|
7
|
|
|
|
|
|
bool morphodita_tokenizer_wrapper::next_sentence(vector* forms, vector* tokens) { |
13271
|
7
|
|
|
|
|
|
return morphodita_tokenizer->next_sentence(forms, (vector*) tokens); |
13272
|
|
|
|
|
|
|
} |
13273
|
|
|
|
|
|
|
|
13274
|
|
|
|
|
|
|
///////// |
13275
|
|
|
|
|
|
|
// File: tokenizer/tokenizer.cpp |
13276
|
|
|
|
|
|
|
///////// |
13277
|
|
|
|
|
|
|
|
13278
|
|
|
|
|
|
|
// This file is part of NameTag . |
13279
|
|
|
|
|
|
|
// |
13280
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
13281
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
13282
|
|
|
|
|
|
|
// |
13283
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
13284
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
13285
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
13286
|
|
|
|
|
|
|
|
13287
|
1
|
|
|
|
|
|
tokenizer* tokenizer::new_vertical_tokenizer() { |
13288
|
1
|
|
|
|
|
|
return new morphodita_tokenizer_wrapper(morphodita::tokenizer::new_vertical_tokenizer()); |
13289
|
|
|
|
|
|
|
} |
13290
|
|
|
|
|
|
|
|
13291
|
|
|
|
|
|
|
///////// |
13292
|
|
|
|
|
|
|
// File: unilib/unicode.cpp |
13293
|
|
|
|
|
|
|
///////// |
13294
|
|
|
|
|
|
|
|
13295
|
|
|
|
|
|
|
// This file is part of UniLib . |
13296
|
|
|
|
|
|
|
// |
13297
|
|
|
|
|
|
|
// Copyright 2014 Institute of Formal and Applied Linguistics, Faculty of |
13298
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
13299
|
|
|
|
|
|
|
// |
13300
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
13301
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
13302
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
13303
|
|
|
|
|
|
|
// |
13304
|
|
|
|
|
|
|
// UniLib version: 3.3.0 |
13305
|
|
|
|
|
|
|
// Unicode version: 15.0.0 |
13306
|
|
|
|
|
|
|
|
13307
|
|
|
|
|
|
|
namespace unilib { |
13308
|
|
|
|
|
|
|
|
13309
|
|
|
|
|
|
|
const char32_t unicode::CHARS; |
13310
|
|
|
|
|
|
|
|
13311
|
|
|
|
|
|
|
const int32_t unicode::DEFAULT_CAT; |
13312
|
|
|
|
|
|
|
|
13313
|
|
|
|
|
|
|
const uint8_t unicode::category_index[unicode::CHARS >> 8] = { |
13314
|
|
|
|
|
|
|
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,17,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,33,41,42,43,44,45,46,47,48,39,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,49,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,50,17,17,17,51,17,52,53,54,55,56,57,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,58,59,59,59,59,59,59,59,59,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,17,61,62,17,63,64,65,66,67,68,69,70,71,17,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,17,17,17,97,98,99,100,100,100,100,100,100,100,100,100,101,17,17,17,17,102,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,17,17,103,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,17,17,104,105,100,100,106,107,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,108,17,17,17,17,109,110,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,111,17,112,113,100,100,100,100,100,100,100,100,100,114,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,115,116,117,118,119,120,121,122,123,39,39,124,100,100,100,100,125,126,127,128,100,129,100,100,130,131,132,100,100,133,134,135,100,136,137,138,139,39,39,140,141,142,39,143,144,100,100,100,100,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, |
13315
|
|
|
|
|
|
|
17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,145,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,146,147,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,148,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,149,100,100,100,100,100,100,100,100,100,100,100,100,17,17,150,100,100,100,100,100,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,151,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,152,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100, |
13316
|
|
|
|
|
|
|
100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100, |
13317
|
|
|
|
|
|
|
100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100, |
13318
|
|
|
|
|
|
|
100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100, |
13319
|
|
|
|
|
|
|
100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100, |
13320
|
|
|
|
|
|
|
100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,153,154,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100, |
13321
|
|
|
|
|
|
|
100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,155,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60, |
13322
|
|
|
|
|
|
|
60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,155 |
13323
|
|
|
|
|
|
|
}; |
13324
|
|
|
|
|
|
|
|
13325
|
|
|
|
|
|
|
const uint8_t unicode::category_block[][256] = { |
13326
|
|
|
|
|
|
|
{_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Zs,_Po,_Po,_Po,_Sc,_Po,_Po,_Po,_Ps,_Pe,_Po,_Sm,_Po,_Pd,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Sm,_Sm,_Sm,_Po,_Po,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ps,_Po,_Pe,_Sk,_Pc,_Sk,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ps,_Sm,_Pe,_Sm,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Zs,_Po,_Sc,_Sc,_Sc,_Sc,_So,_Po,_Sk,_So,_Lo,_Pi,_Sm,_Cf,_So,_Sk,_So,_Sm,_No,_No,_Sk,_Ll,_Po,_Po,_Sk,_No,_Lo,_Pf,_No,_No,_No,_Po,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Sm,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll}, |
13327
|
|
|
|
|
|
|
{_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Ll,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Lu,_Lu,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Lu,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Ll,_Ll,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Ll,_Ll,_Lo,_Lu,_Ll,_Ll,_Ll,_Lo,_Lo,_Lo,_Lo,_Lu,_Lt,_Ll,_Lu,_Lt,_Ll,_Lu,_Lt,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Lu,_Lt,_Ll,_Lu,_Ll,_Lu,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll}, |
13328
|
|
|
|
|
|
|
{_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Ll,_Lu,_Lu,_Ll,_Ll,_Lu,_Ll,_Lu,_Lu,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lo,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Sk,_Sk,_Sk,_Sk,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Lm,_Lm,_Lm,_Lm,_Lm,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Lm,_Sk,_Lm,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk}, |
13329
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lu,_Ll,_Lu,_Ll,_Lm,_Sk,_Lu,_Ll,_Cn,_Cn,_Lm,_Ll,_Ll,_Ll,_Po,_Lu,_Cn,_Cn,_Cn,_Cn,_Sk,_Sk,_Lu,_Po,_Lu,_Lu,_Lu,_Cn,_Lu,_Cn,_Lu,_Lu,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Ll,_Ll,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Ll,_Sm,_Lu,_Ll,_Lu,_Lu,_Ll,_Ll,_Lu,_Lu,_Lu}, |
13330
|
|
|
|
|
|
|
{_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_So,_Mn,_Mn,_Mn,_Mn,_Mn,_Me,_Me,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll}, |
13331
|
|
|
|
|
|
|
{_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Lm,_Po,_Po,_Po,_Po,_Po,_Po,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Po,_Pd,_Cn,_Cn,_So,_So,_Sc,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Pd,_Mn,_Po,_Mn,_Mn,_Po,_Mn,_Mn,_Po,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13332
|
|
|
|
|
|
|
{_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Sm,_Sm,_Sm,_Po,_Po,_Sc,_Po,_Po,_So,_So,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Cf,_Po,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Po,_Po,_Lo,_Lo,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cf,_So,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lm,_Lm,_Mn,_Mn,_So,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Lo,_So,_So,_Lo}, |
13333
|
|
|
|
|
|
|
{_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cf,_Lo,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lm,_Lm,_So,_Po,_Po,_Po,_Lm,_Cn,_Cn,_Mn,_Sc,_Sc}, |
13334
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Lm,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lm,_Mn,_Mn,_Mn,_Lm,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Cn,_Cn,_Po,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Sk,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cf,_Cf,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cf,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn}, |
13335
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mn,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mc,_Mc,_Mn,_Mc,_Mc,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mc,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Mc,_Mc,_Cn,_Cn,_Mc,_Mc,_Mn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mc,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Sc,_Sc,_No,_No,_No,_No,_No,_No,_So,_Sc,_Lo,_Po,_Mn,_Cn}, |
13336
|
|
|
|
|
|
|
{_Cn,_Mn,_Mn,_Mc,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Cn,_Mn,_Cn,_Mc,_Mc,_Mc,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Cn,_Cn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Mn,_Mn,_Lo,_Lo,_Lo,_Mn,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mc,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mc,_Cn,_Mc,_Mc,_Mn,_Cn,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Sc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn}, |
13337
|
|
|
|
|
|
|
{_Cn,_Mn,_Mc,_Mc,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Lo,_Mc,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Mc,_Mc,_Cn,_Cn,_Mc,_Mc,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mc,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_So,_Lo,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Mc,_Mc,_Mn,_Mc,_Mc,_Cn,_Cn,_Cn,_Mc,_Mc,_Mc,_Cn,_Mc,_Mc,_Mc,_Mn,_Cn,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_No,_No,_No,_So,_So,_So,_So,_So,_So,_Sc,_So,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13338
|
|
|
|
|
|
|
{_Mn,_Mc,_Mc,_Mc,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Lo,_Mn,_Mn,_Mn,_Mc,_Mc,_Mc,_Mc,_Cn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Cn,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Cn,_Cn,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_No,_No,_No,_No,_No,_No,_No,_So,_Lo,_Mn,_Mc,_Mc,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Lo,_Mc,_Mn,_Mc,_Mc,_Mc,_Mc,_Mc,_Cn,_Mn,_Mc,_Mc,_Cn,_Mc,_Mc,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mc,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Lo,_Lo,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13339
|
|
|
|
|
|
|
{_Mn,_Mn,_Mc,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Cn,_Mc,_Mc,_Mc,_Cn,_Mc,_Mc,_Mc,_Mn,_Lo,_So,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Mc,_No,_No,_No,_No,_No,_No,_No,_Lo,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Mn,_Mc,_Mc,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Mn,_Cn,_Cn,_Cn,_Cn,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Cn,_Mn,_Cn,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Mc,_Mc,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13340
|
|
|
|
|
|
|
{_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Sc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lm,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13341
|
|
|
|
|
|
|
{_Lo,_So,_So,_So,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_So,_Po,_So,_So,_So,_Mn,_Mn,_So,_So,_So,_So,_So,_So,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_Mn,_So,_Mn,_So,_Mn,_Ps,_Pe,_Ps,_Pe,_Mc,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_Mn,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_Po,_Po,_Po,_Po,_Po,_So,_So,_So,_So,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13342
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Lo,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Po,_Po,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Lo,_Mc,_Mc,_Mc,_Lo,_Lo,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mc,_Mn,_Mn,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mn,_Lo,_Mc,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Mc,_Mc,_Mc,_Mn,_So,_So,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Po,_Lm,_Ll,_Ll,_Ll}, |
13343
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
13344
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
13345
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn}, |
13346
|
|
|
|
|
|
|
{_Pd,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
13347
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_So,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Zs,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Ps,_Pe,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Po,_Po,_Nl,_Nl,_Nl,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13348
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mc,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Cn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mn,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Po,_Po,_Lm,_Po,_Po,_Po,_Sc,_Lo,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13349
|
|
|
|
|
|
|
{_Po,_Po,_Po,_Po,_Po,_Po,_Pd,_Po,_Po,_Po,_Po,_Mn,_Mn,_Mn,_Cf,_Mn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13350
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mc,_Mc,_Mn,_Mn,_Mc,_Mc,_Mc,_Cn,_Cn,_Cn,_Cn,_Mc,_Mc,_Mn,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_So,_Cn,_Cn,_Cn,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_No,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
13351
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mc,_Mc,_Mn,_Cn,_Cn,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mc,_Mn,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Mn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Lm,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Me,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13352
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mc,_Mc,_Mc,_Mc,_Mc,_Mn,_Mc,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Po,_Po,_Cn,_Mn,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Mc,_Mn,_Mn,_Mn,_Lo,_Lo,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mn,_Mn,_Mc,_Mc,_Mc,_Mn,_Mc,_Mn,_Mn,_Mn,_Mc,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po}, |
13353
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Po,_Po,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Lu,_Lu,_Lu,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Po,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Lo,_Lo,_Mc,_Mn,_Mn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13354
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn}, |
13355
|
|
|
|
|
|
|
{_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll}, |
13356
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Lu,_Cn,_Lu,_Cn,_Lu,_Cn,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lt,_Sk,_Ll,_Sk,_Sk,_Sk,_Ll,_Ll,_Ll,_Cn,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lt,_Sk,_Sk,_Sk,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Cn,_Sk,_Sk,_Sk,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Sk,_Sk,_Sk,_Cn,_Cn,_Ll,_Ll,_Ll,_Cn,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lt,_Sk,_Sk,_Cn}, |
13357
|
|
|
|
|
|
|
{_Zs,_Zs,_Zs,_Zs,_Zs,_Zs,_Zs,_Zs,_Zs,_Zs,_Zs,_Cf,_Cf,_Cf,_Cf,_Cf,_Pd,_Pd,_Pd,_Pd,_Pd,_Pd,_Po,_Po,_Pi,_Pf,_Ps,_Pi,_Pi,_Pf,_Ps,_Pi,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Zl,_Zp,_Cf,_Cf,_Cf,_Cf,_Cf,_Zs,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Pi,_Pf,_Po,_Po,_Po,_Po,_Pc,_Pc,_Po,_Po,_Po,_Sm,_Ps,_Pe,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Sm,_Po,_Pc,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Zs,_Cf,_Cf,_Cf,_Cf,_Cf,_Cn,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_No,_Lm,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_Sm,_Sm,_Sm,_Ps,_Pe,_Lm,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Sm,_Sm,_Sm,_Ps,_Pe,_Cn,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Cn,_Cn,_Cn,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Me,_Me,_Me,_Me,_Mn,_Me,_Me,_Me,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13358
|
|
|
|
|
|
|
{_So,_So,_Lu,_So,_So,_So,_So,_Lu,_So,_So,_Ll,_Lu,_Lu,_Lu,_Ll,_Ll,_Lu,_Lu,_Lu,_Ll,_So,_Lu,_So,_So,_Sm,_Lu,_Lu,_Lu,_Lu,_Lu,_So,_So,_So,_So,_So,_So,_Lu,_So,_Lu,_So,_Lu,_So,_Lu,_Lu,_Lu,_Lu,_So,_Ll,_Lu,_Lu,_Lu,_Lu,_Ll,_Lo,_Lo,_Lo,_Lo,_Ll,_So,_So,_Ll,_Ll,_Lu,_Lu,_Sm,_Sm,_Sm,_Sm,_Sm,_Lu,_Ll,_Ll,_Ll,_Ll,_So,_Sm,_So,_So,_Ll,_So,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Lu,_Ll,_Nl,_Nl,_Nl,_Nl,_No,_So,_So,_Cn,_Cn,_Cn,_Cn,_Sm,_Sm,_Sm,_Sm,_Sm,_So,_So,_So,_So,_So,_Sm,_Sm,_So,_So,_So,_So,_Sm,_So,_So,_Sm,_So,_So,_Sm,_So,_So,_So,_So,_So,_So,_So,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_So,_So,_Sm,_So,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm}, |
13359
|
|
|
|
|
|
|
{_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm}, |
13360
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_Ps,_Pe,_Ps,_Pe,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_So,_So,_So,_So,_So,_So,_So,_Ps,_Pe,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
13361
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No}, |
13362
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm}, |
13363
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
13364
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_Sm,_Sm,_Sm,_Ps,_Pe,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm}, |
13365
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
13366
|
|
|
|
|
|
|
{_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Ps,_Pe,_Ps,_Pe,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Ps,_Pe,_Sm,_Sm}, |
13367
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_So,_So,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
13368
|
|
|
|
|
|
|
{_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Ll,_Lu,_Lu,_Lu,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Lu,_Lu,_Ll,_Lu,_Ll,_Ll,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lm,_Lm,_Lu,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_So,_So,_So,_So,_So,_So,_Lu,_Ll,_Lu,_Ll,_Mn,_Mn,_Mn,_Lu,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po,_No,_Po,_Po}, |
13369
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Ll,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lm,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn}, |
13370
|
|
|
|
|
|
|
{_Po,_Po,_Pi,_Pf,_Pi,_Pf,_Po,_Po,_Po,_Pi,_Pf,_Po,_Pi,_Pf,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Pd,_Po,_Po,_Pd,_Po,_Pi,_Pf,_Po,_Po,_Pi,_Pf,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Po,_Po,_Po,_Po,_Po,_Lm,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Pd,_Pd,_Po,_Po,_Po,_Po,_Pd,_Po,_Ps,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_So,_So,_Po,_Po,_Po,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Pd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13371
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn}, |
13372
|
|
|
|
|
|
|
{_Zs,_Po,_Po,_Po,_So,_Lm,_Lo,_Nl,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_So,_So,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Pd,_Ps,_Pe,_Pe,_So,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Pd,_Lm,_Lm,_Lm,_Lm,_Lm,_So,_So,_Nl,_Nl,_Nl,_Lm,_Lo,_Po,_So,_So,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Mn,_Sk,_Sk,_Lm,_Lm,_Lo,_Pd,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Lm,_Lm,_Lm,_Lo}, |
13373
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_So,_So,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
13374
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_No,_No,_No,_No,_No,_No,_No,_No,_So,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
13375
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
13376
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
13377
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Po,_Po}, |
13378
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Po,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lo,_Mn,_Me,_Me,_Me,_Po,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Lm,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lm,_Lm,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13379
|
|
|
|
|
|
|
{_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Sk,_Sk,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lm,_Sk,_Sk,_Lu,_Ll,_Lu,_Ll,_Lo,_Lu,_Ll,_Lu,_Ll,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Lu,_Lu,_Ll,_Lu,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Ll,_Cn,_Ll,_Cn,_Ll,_Lu,_Ll,_Lu,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lm,_Lm,_Lm,_Lu,_Ll,_Lo,_Lm,_Lm,_Ll,_Lo,_Lo,_Lo,_Lo,_Lo}, |
13380
|
|
|
|
|
|
|
{_Lo,_Lo,_Mn,_Lo,_Lo,_Lo,_Mn,_Lo,_Lo,_Lo,_Lo,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mn,_Mn,_Mc,_So,_So,_So,_So,_Mn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_So,_So,_Sc,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mc,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Po,_Po,_Lo,_Po,_Lo,_Lo,_Mn}, |
13381
|
|
|
|
|
|
|
{_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Mc,_Mc,_Mc,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Lm,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn}, |
13382
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Po,_Po,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_So,_So,_So,_Lo,_Mc,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Lo,_Mn,_Mn,_Mn,_Lo,_Lo,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Lo,_Mn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lm,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mn,_Mn,_Mc,_Mc,_Po,_Po,_Lo,_Lm,_Lm,_Mc,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13383
|
|
|
|
|
|
|
{_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Sk,_Lm,_Lm,_Lm,_Lm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lm,_Sk,_Sk,_Cn,_Cn,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mn,_Mc,_Mc,_Mn,_Mc,_Mc,_Po,_Mc,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13384
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn}, |
13385
|
|
|
|
|
|
|
{_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs}, |
13386
|
|
|
|
|
|
|
{_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co}, |
13387
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13388
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Sm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
13389
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Pe,_Ps,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Sc,_So,_So,_So}, |
13390
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Ps,_Pe,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Pd,_Pd,_Pc,_Pc,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Po,_Po,_Ps,_Pe,_Po,_Po,_Po,_Po,_Pc,_Pc,_Pc,_Po,_Po,_Po,_Cn,_Po,_Po,_Po,_Po,_Pd,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Po,_Po,_Po,_Sm,_Pd,_Sm,_Sm,_Sm,_Cn,_Po,_Sc,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cf}, |
13391
|
|
|
|
|
|
|
{_Cn,_Po,_Po,_Po,_Sc,_Po,_Po,_Po,_Ps,_Pe,_Po,_Sm,_Po,_Pd,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Sm,_Sm,_Sm,_Po,_Po,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ps,_Po,_Pe,_Sk,_Pc,_Sk,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ps,_Sm,_Pe,_Sm,_Ps,_Pe,_Po,_Ps,_Pe,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Sc,_Sc,_Sm,_Sk,_So,_Sc,_Sc,_Cn,_So,_Sm,_Sm,_Sm,_Sm,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cf,_Cf,_Cf,_So,_So,_Cn,_Cn}, |
13392
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13393
|
|
|
|
|
|
|
{_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_No,_No,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Mn,_Cn,_Cn}, |
13394
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn}, |
13395
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Nl,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Nl,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Nl,_Nl,_Nl,_Nl,_Nl,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13396
|
|
|
|
|
|
|
{_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Cn,_Cn}, |
13397
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Lu,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Ll,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13398
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Cn,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Cn,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13399
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Po,_No,_No,_No,_No,_No,_No,_No,_No,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_So,_So,_No,_No,_No,_No,_No,_No,_No,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No}, |
13400
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_No,_No,_Lo,_Lo,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No}, |
13401
|
|
|
|
|
|
|
{_Lo,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Mn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_No,_No,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_So,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13402
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13403
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No}, |
13404
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13405
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Mn,_Mn,_Pd,_Cn,_Cn,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn}, |
13406
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_No,_No,_No,_No,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13407
|
|
|
|
|
|
|
{_Mc,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Mn,_Lo,_Lo,_Mn,_Mn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Po,_Po,_Cf,_Po,_Po,_Po,_Po,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cf,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13408
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Po,_Po,_Lo,_Mc,_Mc,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Po,_Po,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Lo,_Lo,_Lo,_Lo,_Po,_Po,_Po,_Po,_Mn,_Mn,_Mn,_Mn,_Po,_Mc,_Mn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Po,_Lo,_Po,_Po,_Po,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13409
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mc,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Mn,_Lo,_Lo,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13410
|
|
|
|
|
|
|
{_Mn,_Mn,_Mc,_Mc,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Mn,_Mn,_Lo,_Mc,_Mc,_Mn,_Mc,_Mc,_Mc,_Mc,_Cn,_Cn,_Mc,_Mc,_Cn,_Cn,_Mc,_Mc,_Mc,_Cn,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13411
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Mn,_Mc,_Mn,_Lo,_Lo,_Lo,_Lo,_Po,_Po,_Po,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Cn,_Po,_Mn,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mc,_Mc,_Mc,_Mc,_Mn,_Mn,_Mc,_Mn,_Mn,_Lo,_Lo,_Po,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13412
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Mc,_Mc,_Mc,_Mc,_Mn,_Mn,_Mc,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13413
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mc,_Mn,_Mn,_Po,_Po,_Po,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mn,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Lo,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13414
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_No,_No,_Po,_Po,_Po,_So,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13415
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo}, |
13416
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Cn,_Mc,_Mc,_Cn,_Cn,_Mn,_Mn,_Mc,_Mn,_Lo,_Mc,_Lo,_Mc,_Mn,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Mn,_Mn,_Mc,_Mc,_Mc,_Mc,_Mn,_Lo,_Po,_Lo,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13417
|
|
|
|
|
|
|
{_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Lo,_Mn,_Mn,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Po,_Po,_Po,_Lo,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13418
|
|
|
|
|
|
|
{_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13419
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Lo,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Mc,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13420
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Mn,_Cn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mc,_Mc,_Cn,_Mn,_Mn,_Cn,_Mc,_Mc,_Mn,_Mc,_Mn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13421
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mc,_Mc,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13422
|
|
|
|
|
|
|
{_Mn,_Mn,_Lo,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Mc,_Mc,_Mn,_Mc,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_Sc,_Sc,_Sc,_Sc,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po}, |
13423
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13424
|
|
|
|
|
|
|
{_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Cn,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
13425
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13426
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13427
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13428
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13429
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13430
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13431
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_So,_So,_So,_So,_Lm,_Lm,_Lm,_Lm,_Po,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_No,_No,_No,_No,_No,_No,_No,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13432
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13433
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Mn,_Lo,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lm,_Lm,_Po,_Lm,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mc,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13434
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13435
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13436
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13437
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lm,_Lm,_Lm,_Lm,_Cn,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Cn,_Lm,_Lm,_Cn}, |
13438
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
13439
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn}, |
13440
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_So,_Mn,_Mn,_Po,_Cf,_Cf,_Cf,_Cf,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13441
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13442
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13443
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Mc,_Mc,_Mn,_Mn,_Mn,_So,_So,_So,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_So,_So,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Mn,_Mn,_Mn,_Mn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13444
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Mn,_Mn,_Mn,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13445
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13446
|
|
|
|
|
|
|
{_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Cn,_Lu,_Lu,_Cn,_Cn,_Lu,_Cn,_Cn,_Lu,_Lu,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Cn,_Ll,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll}, |
13447
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Cn,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll}, |
13448
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Sm,_Ll,_Ll,_Ll,_Ll}, |
13449
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Ll,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd}, |
13450
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_So,_So,_So,_So,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_So,_So,_So,_So,_So,_So,_So,_So,_Mn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Mn,_So,_So,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13451
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lo,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13452
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13453
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Lo,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13454
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Sc}, |
13455
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Mn,_Mn,_Mn,_Mn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13456
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn}, |
13457
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13458
|
|
|
|
|
|
|
{_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lm,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13459
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_No,_No,_No,_Sc,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13460
|
|
|
|
|
|
|
{_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13461
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Cn,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Lo,_Cn,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Cn,_Cn,_Lo,_Cn,_Lo,_Cn,_Lo,_Cn,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Sm,_Sm,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13462
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13463
|
|
|
|
|
|
|
{_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
13464
|
|
|
|
|
|
|
{_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13465
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sk,_Sk,_Sk,_Sk,_Sk}, |
13466
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn}, |
13467
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13468
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13469
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13470
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13471
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13472
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
13473
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
13474
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
13475
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13476
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13477
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
13478
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13479
|
|
|
|
|
|
|
{_Cn,_Cf,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13480
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
13481
|
|
|
|
|
|
|
{_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Cn,_Cn} |
13482
|
|
|
|
|
|
|
}; |
13483
|
|
|
|
|
|
|
|
13484
|
|
|
|
|
|
|
const uint8_t unicode::othercase_index[unicode::CHARS >> 8] = { |
13485
|
|
|
|
|
|
|
0,1,2,3,4,5,6,6,6,6,6,6,6,6,6,6,7,6,6,8,6,6,6,6,6,6,6,6,9,10,11,12,6,13,6,6,14,6,6,6,6,6,6,6,15,16,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,17,18,6,6,6,19,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,20,6,6,6,6,21,22,6,6,6,6,6,6,23,6,6,6,6,6,6,6,6,6,6,6,24,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,25,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,26,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, |
13486
|
|
|
|
|
|
|
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, |
13487
|
|
|
|
|
|
|
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, |
13488
|
|
|
|
|
|
|
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, |
13489
|
|
|
|
|
|
|
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6 |
13490
|
|
|
|
|
|
|
}; |
13491
|
|
|
|
|
|
|
|
13492
|
|
|
|
|
|
|
const char32_t unicode::othercase_block[][256] = { |
13493
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24833,25089,25345,25601,25857,26113,26369,26625,26881,27137,27393,27649,27905,28161,28417,28673,28929,29185,29441,29697,29953,30209,30465,30721,30977,31233,0,0,0,0,0,0,16642,16898,17154,17410,17666,17922,18178,18434,18690,18946,19202,19458,19714,19970,20226,20482,20738,20994,21250,21506,21762,22018,22274,22530,22786,23042,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,236546,0,0,0,0,0,0,0,0,0,0,57345,57601,57857,58113,58369,58625,58881,59137,59393,59649,59905,60161,60417,60673,60929,61185,61441,61697,61953,62209,62465,62721,62977,0,63489,63745,64001,64257,64513,64769,65025,0,49154,49410,49666,49922,50178,50434,50690,50946,51202,51458,51714,51970,52226,52482,52738,52994,53250,53506,53762,54018,54274,54530,54786,0,55298,55554,55810,56066,56322,56578,56834,96258}, |
13494
|
|
|
|
|
|
|
{65793,65538,66305,66050,66817,66562,67329,67074,67841,67586,68353,68098,68865,68610,69377,69122,69889,69634,70401,70146,70913,70658,71425,71170,71937,71682,72449,72194,72961,72706,73473,73218,73985,73730,74497,74242,75009,74754,75521,75266,76033,75778,76545,76290,77057,76802,77569,77314,26881,18690,78593,78338,79105,78850,79617,79362,0,80385,80130,80897,80642,81409,81154,81921,81666,82433,82178,82945,82690,83457,83202,83969,83714,0,84737,84482,85249,84994,85761,85506,86273,86018,86785,86530,87297,87042,87809,87554,88321,88066,88833,88578,89345,89090,89857,89602,90369,90114,90881,90626,91393,91138,91905,91650,92417,92162,92929,92674,93441,93186,93953,93698,94465,94210,94977,94722,95489,95234,96001,95746,65281,96769,96514,97281,97026,97793,97538,21250,148226,152321,99073,98818,99585,99330,152577,100353,100098,153089,153345,101377,101122,0,122113,153857,154369,102913,102658,155649,156417,128514,157953,157697,104705,104450,146690,0,159489,160257,139266,161025,106753,106498,107265,107010,107777,107522,163841,108545,108290,164609,0,0,109825,109570,165889,110593,110338,166401,166657,111617,111362,112129,111874,168449,112897,112642,0,0,113921,113666,0,128770,0,0,0,0,115974,116228,115717,116742,116996,116485,117510,117764,117253,118273,118018,118785,118530,119297,119042,119809,119554,120321,120066,120833,120578,121345,121090,121857,121602,101890,122625,122370,123137,122882,123649,123394,124161,123906,124673,124418,125185,124930,125697,125442,126209,125954,126721,126466,0,127494,127748,127237,128257,128002,103681,114433,129281,129026,129793,129538,130305,130050,130817,130562}, |
13495
|
|
|
|
|
|
|
{131329,131074,131841,131586,132353,132098,132865,132610,133377,133122,133889,133634,134401,134146,134913,134658,135425,135170,135937,135682,136449,136194,136961,136706,137473,137218,137985,137730,138497,138242,139009,138754,105985,0,140033,139778,140545,140290,141057,140802,141569,141314,142081,141826,142593,142338,143105,142850,143617,143362,144129,143874,0,0,0,0,0,0,2909441,146433,146178,104961,2909697,2915842,2916098,147969,147714,98305,166145,166913,149249,148994,149761,149506,150273,150018,150785,150530,151297,151042,2912002,2911490,2912258,98562,99842,0,100610,100866,0,102146,0,102402,10988290,0,0,0,103170,10988546,0,103426,0,10980610,10988034,0,104194,103938,10989058,2908674,10988802,0,0,105474,0,2911746,105730,0,0,106242,0,0,0,0,0,0,0,2909186,0,0,108034,0,10994946,108802,0,0,0,10989826,110082,148482,110850,111106,148738,0,0,0,0,0,112386,0,0,0,0,0,0,0,0,0,0,10990082,10989570,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
13496
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,235778,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,225537,225282,226049,225794,0,0,227073,226818,0,0,0,261378,261634,261890,0,258817,0,0,0,0,0,0,240641,0,240897,241153,241409,0,248833,0,249089,249345,0,241921,242177,242433,242689,242945,243201,243457,243713,243969,244225,244481,244737,244993,245249,245505,245761,246017,0,246529,246785,247041,247297,247553,247809,248065,248321,248577,230914,231426,231682,231938,0,233730,233986,234242,234498,234754,235010,235266,235522,235778,236034,236290,236546,236802,237058,237314,237570,237826,238338,238338,238594,238850,239106,239362,239618,239874,240130,240386,232450,232962,233218,251649,233986,235522,0,0,0,239106,237570,249602,252161,251906,252673,252418,253185,252930,253697,253442,254209,253954,254721,254466,255233,254978,255745,255490,256257,256002,256769,256514,257281,257026,257793,257538,236034,237826,260354,229122,243713,234754,0,260097,259842,258561,260865,260610,0,228097,228353,228609}, |
13497
|
|
|
|
|
|
|
{282625,282881,283137,283393,283649,283905,284161,284417,284673,284929,285185,285441,285697,285953,286209,286465,274433,274689,274945,275201,275457,275713,275969,276225,276481,276737,276993,277249,277505,277761,278017,278273,278529,278785,279041,279297,279553,279809,280065,280321,280577,280833,281089,281345,281601,281857,282113,282369,266242,266498,266754,267010,267266,267522,267778,268034,268290,268546,268802,269058,269314,269570,269826,270082,270338,270594,270850,271106,271362,271618,271874,272130,272386,272642,272898,273154,273410,273666,273922,274178,262146,262402,262658,262914,263170,263426,263682,263938,264194,264450,264706,264962,265218,265474,265730,265986,286977,286722,287489,287234,288001,287746,288513,288258,289025,288770,289537,289282,290049,289794,290561,290306,291073,290818,291585,291330,292097,291842,292609,292354,293121,292866,293633,293378,294145,293890,294657,294402,295169,294914,0,0,0,0,0,0,0,0,297729,297474,298241,297986,298753,298498,299265,299010,299777,299522,300289,300034,300801,300546,301313,301058,301825,301570,302337,302082,302849,302594,303361,303106,303873,303618,304385,304130,304897,304642,305409,305154,305921,305666,306433,306178,306945,306690,307457,307202,307969,307714,308481,308226,308993,308738,309505,309250,310017,309762,310529,310274,311041,310786,315137,311809,311554,312321,312066,312833,312578,313345,313090,313857,313602,314369,314114,314881,314626,311298,315649,315394,316161,315906,316673,316418,317185,316930,317697,317442,318209,317954,318721,318466,319233,318978,319745,319490,320257,320002,320769,320514,321281,321026,321793,321538,322305,322050,322817,322562,323329,323074,323841,323586,324353,324098,324865,324610,325377,325122,325889,325634,326401,326146,326913,326658,327425,327170}, |
13498
|
|
|
|
|
|
|
{327937,327682,328449,328194,328961,328706,329473,329218,329985,329730,330497,330242,331009,330754,331521,331266,332033,331778,332545,332290,333057,332802,333569,333314,334081,333826,334593,334338,335105,334850,335617,335362,336129,335874,336641,336386,337153,336898,337665,337410,338177,337922,338689,338434,339201,338946,339713,339458,0,352513,352769,353025,353281,353537,353793,354049,354305,354561,354817,355073,355329,355585,355841,356097,356353,356609,356865,357121,357377,357633,357889,358145,358401,358657,358913,359169,359425,359681,359937,360193,360449,360705,360961,361217,361473,361729,361985,0,0,0,0,0,0,0,0,0,0,340226,340482,340738,340994,341250,341506,341762,342018,342274,342530,342786,343042,343298,343554,343810,344066,344322,344578,344834,345090,345346,345602,345858,346114,346370,346626,346882,347138,347394,347650,347906,348162,348418,348674,348930,349186,349442,349698,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
13499
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
13500
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2949121,2949377,2949633,2949889,2950145,2950401,2950657,2950913,2951169,2951425,2951681,2951937,2952193,2952449,2952705,2952961,2953217,2953473,2953729,2953985,2954241,2954497,2954753,2955009,2955265,2955521,2955777,2956033,2956289,2956545,2956801,2957057,2957313,2957569,2957825,2958081,2958337,2958593,0,2959105,0,0,0,0,0,2960641,0,0,1871875,1872131,1872387,1872643,1872899,1873155,1873411,1873667,1873923,1874179,1874435,1874691,1874947,1875203,1875459,1875715,1875971,1876227,1876483,1876739,1876995,1877251,1877507,1877763,1878019,1878275,1878531,1878787,1879043,1879299,1879555,1879811,1880067,1880323,1880579,1880835,1881091,1881347,1881603,1881859,1882115,1882371,1882627,0,0,1883395,1883651,1883907}, |
13501
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11235329,11235585,11235841,11236097,11236353,11236609,11236865,11237121,11237377,11237633,11237889,11238145,11238401,11238657,11238913,11239169,11239425,11239681,11239937,11240193,11240449,11240705,11240961,11241217,11241473,11241729,11241985,11242241,11242497,11242753,11243009,11243265,11243521,11243777,11244033,11244289,11244545,11244801,11245057,11245313,11245569,11245825,11246081,11246337,11246593,11246849,11247105,11247361,11247617,11247873,11248129,11248385,11248641,11248897,11249153,11249409,11249665,11249921,11250177,11250433,11250689,11250945,11251201,11251457,11251713,11251969,11252225,11252481,11252737,11252993,11253249,11253505,11253761,11254017,11254273,11254529,11254785,11255041,11255297,11255553,1308673,1308929,1309185,1309441,1309697,1309953,0,0,1306626,1306882,1307138,1307394,1307650,1307906,0,0}, |
13502
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,266754,267266,269826,270594,270850,270850,272898,287234,10897922,0,0,0,0,0,0,0,1101825,1102081,1102337,1102593,1102849,1103105,1103361,1103617,1103873,1104129,1104385,1104641,1104897,1105153,1105409,1105665,1105921,1106177,1106433,1106689,1106945,1107201,1107457,1107713,1107969,1108225,1108481,1108737,1108993,1109249,1109505,1109761,1110017,1110273,1110529,1110785,1111041,1111297,1111553,1111809,1112065,1112321,1112577,0,0,1113345,1113601,1113857,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
13503
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10976514,0,0,0,2908930,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10995202,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
13504
|
|
|
|
|
|
|
{1966337,1966082,1966849,1966594,1967361,1967106,1967873,1967618,1968385,1968130,1968897,1968642,1969409,1969154,1969921,1969666,1970433,1970178,1970945,1970690,1971457,1971202,1971969,1971714,1972481,1972226,1972993,1972738,1973505,1973250,1974017,1973762,1974529,1974274,1975041,1974786,1975553,1975298,1976065,1975810,1976577,1976322,1977089,1976834,1977601,1977346,1978113,1977858,1978625,1978370,1979137,1978882,1979649,1979394,1980161,1979906,1980673,1980418,1981185,1980930,1981697,1981442,1982209,1981954,1982721,1982466,1983233,1982978,1983745,1983490,1984257,1984002,1984769,1984514,1985281,1985026,1985793,1985538,1986305,1986050,1986817,1986562,1987329,1987074,1987841,1987586,1988353,1988098,1988865,1988610,1989377,1989122,1989889,1989634,1990401,1990146,1990913,1990658,1991425,1991170,1991937,1991682,1992449,1992194,1992961,1992706,1993473,1993218,1993985,1993730,1994497,1994242,1995009,1994754,1995521,1995266,1996033,1995778,1996545,1996290,1997057,1996802,1997569,1997314,1998081,1997826,1998593,1998338,1999105,1998850,1999617,1999362,2000129,1999874,2000641,2000386,2001153,2000898,2001665,2001410,2002177,2001922,2002689,2002434,2003201,2002946,2003713,2003458,2004225,2003970,0,0,0,0,0,1990658,0,0,57089,0,2007297,2007042,2007809,2007554,2008321,2008066,2008833,2008578,2009345,2009090,2009857,2009602,2010369,2010114,2010881,2010626,2011393,2011138,2011905,2011650,2012417,2012162,2012929,2012674,2013441,2013186,2013953,2013698,2014465,2014210,2014977,2014722,2015489,2015234,2016001,2015746,2016513,2016258,2017025,2016770,2017537,2017282,2018049,2017794,2018561,2018306,2019073,2018818,2019585,2019330,2020097,2019842,2020609,2020354,2021121,2020866,2021633,2021378,2022145,2021890,2022657,2022402,2023169,2022914,2023681,2023426,2024193,2023938,2024705,2024450,2025217,2024962,2025729,2025474,2026241,2025986,2026753,2026498,2027265,2027010,2027777,2027522,2028289,2028034,2028801,2028546,2029313,2029058,2029825,2029570,2030337,2030082,2030849,2030594,2031361, |
13505
|
|
|
|
|
|
|
2031106}, |
13506
|
|
|
|
|
|
|
{2033666,2033922,2034178,2034434,2034690,2034946,2035202,2035458,2031617,2031873,2032129,2032385,2032641,2032897,2033153,2033409,2037762,2038018,2038274,2038530,2038786,2039042,0,0,2035713,2035969,2036225,2036481,2036737,2036993,0,0,2041858,2042114,2042370,2042626,2042882,2043138,2043394,2043650,2039809,2040065,2040321,2040577,2040833,2041089,2041345,2041601,2045954,2046210,2046466,2046722,2046978,2047234,2047490,2047746,2043905,2044161,2044417,2044673,2044929,2045185,2045441,2045697,2050050,2050306,2050562,2050818,2051074,2051330,0,0,2048001,2048257,2048513,2048769,2049025,2049281,0,0,0,2054402,0,2054914,0,2055426,0,2055938,0,2052353,0,2052865,0,2053377,0,2053889,2058242,2058498,2058754,2059010,2059266,2059522,2059778,2060034,2056193,2056449,2056705,2056961,2057217,2057473,2057729,2057985,2079234,2079490,2082818,2083074,2083330,2083586,2087426,2087682,2095106,2095362,2091522,2091778,2095618,2095874,0,0,2066434,2066690,2066946,2067202,2067458,2067714,2067970,2068226,2064385,2064641,2064897,2065153,2065409,2065665,2065921,2066177,2070530,2070786,2071042,2071298,2071554,2071810,2072066,2072322,2068481,2068737,2068993,2069249,2069505,2069761,2070017,2070273,2074626,2074882,2075138,2075394,2075650,2075906,2076162,2076418,2072577,2072833,2073089,2073345,2073601,2073857,2074113,2074369,2078722,2078978,0,2079746,0,0,0,0,2076673,2076929,2060289,2060545,2077441,0,235778,0,0,0,0,2083842,0,0,0,0,2060801,2061057,2061313,2061569,2081537,0,0,0,2086914,2087170,0,0,0,0,0,0,2084865,2085121,2061825,2062081,0,0,0,0,2091010,2091266,0,0,0,2092034,0,0,2088961,2089217,2062849,2063105,2090241,0,0,0,0,0,0,2096130,0,0,0,0,2062337,2062593,2063361,2063617,2093825,0,0,0}, |
13507
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,248065,0,0,0,27393,58625,0,0,0,0,0,0,2182657,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2175490,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2191361,2191617,2191873,2192129,2192385,2192641,2192897,2193153,2193409,2193665,2193921,2194177,2194433,2194689,2194945,2195201,2187266,2187522,2187778,2188034,2188290,2188546,2188802,2189058,2189314,2189570,2189826,2190082,2190338,2190594,2190850,2191106,0,0,0,2196481,2196226,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
13508
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2412545,2412801,2413057,2413313,2413569,2413825,2414081,2414337,2414593,2414849,2415105,2415361,2415617,2415873,2416129,2416385,2416641,2416897,2417153,2417409,2417665,2417921,2418177,2418433,2418689,2418945,2405890,2406146,2406402,2406658,2406914,2407170,2407426,2407682,2407938,2408194,2408450,2408706,2408962,2409218,2409474,2409730,2409986,2410242,2410498,2410754,2411010,2411266,2411522,2411778,2412034,2412290,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
13509
|
|
|
|
|
|
|
{2895873,2896129,2896385,2896641,2896897,2897153,2897409,2897665,2897921,2898177,2898433,2898689,2898945,2899201,2899457,2899713,2899969,2900225,2900481,2900737,2900993,2901249,2901505,2901761,2902017,2902273,2902529,2902785,2903041,2903297,2903553,2903809,2904065,2904321,2904577,2904833,2905089,2905345,2905601,2905857,2906113,2906369,2906625,2906881,2907137,2907393,2907649,2907905,2883586,2883842,2884098,2884354,2884610,2884866,2885122,2885378,2885634,2885890,2886146,2886402,2886658,2886914,2887170,2887426,2887682,2887938,2888194,2888450,2888706,2888962,2889218,2889474,2889730,2889986,2890242,2890498,2890754,2891010,2891266,2891522,2891778,2892034,2892290,2892546,2892802,2893058,2893314,2893570,2893826,2894082,2894338,2894594,2894850,2895106,2895362,2895618,2908417,2908162,158465,1932545,163073,145922,146946,2910209,2909954,2910721,2910466,2911233,2910978,151809,160001,151553,152065,0,2913025,2912770,0,2913793,2913538,0,0,0,0,0,0,0,147201,147457,2916609,2916354,2917121,2916866,2917633,2917378,2918145,2917890,2918657,2918402,2919169,2918914,2919681,2919426,2920193,2919938,2920705,2920450,2921217,2920962,2921729,2921474,2922241,2921986,2922753,2922498,2923265,2923010,2923777,2923522,2924289,2924034,2924801,2924546,2925313,2925058,2925825,2925570,2926337,2926082,2926849,2926594,2927361,2927106,2927873,2927618,2928385,2928130,2928897,2928642,2929409,2929154,2929921,2929666,2930433,2930178,2930945,2930690,2931457,2931202,2931969,2931714,2932481,2932226,2932993,2932738,2933505,2933250,2934017,2933762,2934529,2934274,2935041,2934786,2935553,2935298,2936065,2935810,2936577,2936322,2937089,2936834,2937601,2937346,2938113,2937858,2938625,2938370,2939137,2938882,2939649,2939394,2940161,2939906,2940673,2940418,2941185,2940930,2941697,2941442,0,0,0,0,0,0,0,2944001,2943746,2944513,2944258,0,0,0,2945793,2945538,0,0,0,0,0,0,0,0,0,0,0,0}, |
13510
|
|
|
|
|
|
|
{1089538,1089794,1090050,1090306,1090562,1090818,1091074,1091330,1091586,1091842,1092098,1092354,1092610,1092866,1093122,1093378,1093634,1093890,1094146,1094402,1094658,1094914,1095170,1095426,1095682,1095938,1096194,1096450,1096706,1096962,1097218,1097474,1097730,1097986,1098242,1098498,1098754,1099010,0,1099522,0,0,0,0,0,1101058,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
13511
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10895617,10895362,10896129,10895874,10896641,10896386,10897153,10896898,10897665,10897410,10898177,10897922,10898689,10898434,10899201,10898946,10899713,10899458,10900225,10899970,10900737,10900482,10901249,10900994,10901761,10901506,10902273,10902018,10902785,10902530,10903297,10903042,10903809,10903554,10904321,10904066,10904833,10904578,10905345,10905090,10905857,10905602,10906369,10906114,10906881,10906626,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10912001,10911746,10912513,10912258,10913025,10912770,10913537,10913282,10914049,10913794,10914561,10914306,10915073,10914818,10915585,10915330,10916097,10915842,10916609,10916354,10917121,10916866,10917633,10917378,10918145,10917890,10918657,10918402,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
13512
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10953473,10953218,10953985,10953730,10954497,10954242,10955009,10954754,10955521,10955266,10956033,10955778,10956545,10956290,0,0,10957569,10957314,10958081,10957826,10958593,10958338,10959105,10958850,10959617,10959362,10960129,10959874,10960641,10960386,10961153,10960898,10961665,10961410,10962177,10961922,10962689,10962434,10963201,10962946,10963713,10963458,10964225,10963970,10964737,10964482,10965249,10964994,10965761,10965506,10966273,10966018,10966785,10966530,10967297,10967042,10967809,10967554,10968321,10968066,10968833,10968578,10969345,10969090,10969857,10969602,10970369,10970114,10970881,10970626,10971393,10971138,10971905,10971650,10972417,10972162,10972929,10972674,0,0,0,0,0,0,0,0,0,10975745,10975490,10976257,10976002,1931521,10977025,10976770,10977537,10977282,10978049,10977794,10978561,10978306,10979073,10978818,0,0,0,10980353,10980098,156929,0,0,10981633,10981378,10982145,10981890,10994690,0,10983169,10982914,10983681,10983426,10984193,10983938,10984705,10984450,10985217,10984962,10985729,10985474,10986241,10985986,10986753,10986498,10987265,10987010,10987777,10987522,157185,154625,155905,158721,158209,0,171521,165633,171265,11227905,10990849,10990594,10991361,10991106,10991873,10991618,10992385,10992130,10992897,10992642,10993409,10993154,10993921,10993666,10994433,10994178,10982401,164353,1936897,10995713,10995458,10996225,10995970,0,0,0,0,0,10998017,10997762,0,0,0,0,10999553,10999298,11000065,10999810,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11007489,11007234,0,0,0,0,0,0,0,0,0}, |
13513
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10990338,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1286146,1286402,1286658,1286914,1287170,1287426,1287682,1287938,1288194,1288450,1288706,1288962,1289218,1289474,1289730,1289986,1290242,1290498,1290754,1291010,1291266,1291522,1291778,1292034,1292290,1292546,1292802,1293058,1293314,1293570,1293826,1294082,1294338,1294594,1294850,1295106,1295362,1295618,1295874,1296130,1296386,1296642,1296898,1297154,1297410,1297666,1297922,1298178,1298434,1298690,1298946,1299202,1299458,1299714,1299970,1300226,1300482,1300738,1300994,1301250,1301506,1301762,1302018,1302274,1302530,1302786,1303042,1303298,1303554,1303810,1304066,1304322,1304578,1304834,1305090,1305346,1305602,1305858,1306114,1306370,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
13514
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16728321,16728577,16728833,16729089,16729345,16729601,16729857,16730113,16730369,16730625,16730881,16731137,16731393,16731649,16731905,16732161,16732417,16732673,16732929,16733185,16733441,16733697,16733953,16734209,16734465,16734721,0,0,0,0,0,0,16720130,16720386,16720642,16720898,16721154,16721410,16721666,16721922,16722178,16722434,16722690,16722946,16723202,16723458,16723714,16723970,16724226,16724482,16724738,16724994,16725250,16725506,16725762,16726018,16726274,16726530,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
13515
|
|
|
|
|
|
|
{17049601,17049857,17050113,17050369,17050625,17050881,17051137,17051393,17051649,17051905,17052161,17052417,17052673,17052929,17053185,17053441,17053697,17053953,17054209,17054465,17054721,17054977,17055233,17055489,17055745,17056001,17056257,17056513,17056769,17057025,17057281,17057537,17057793,17058049,17058305,17058561,17058817,17059073,17059329,17059585,17039362,17039618,17039874,17040130,17040386,17040642,17040898,17041154,17041410,17041666,17041922,17042178,17042434,17042690,17042946,17043202,17043458,17043714,17043970,17044226,17044482,17044738,17044994,17045250,17045506,17045762,17046018,17046274,17046530,17046786,17047042,17047298,17047554,17047810,17048066,17048322,17048578,17048834,17049090,17049346,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17094657,17094913,17095169,17095425,17095681,17095937,17096193,17096449,17096705,17096961,17097217,17097473,17097729,17097985,17098241,17098497,17098753,17099009,17099265,17099521,17099777,17100033,17100289,17100545,17100801,17101057,17101313,17101569,17101825,17102081,17102337,17102593,17102849,17103105,17103361,17103617,0,0,0,0,17084418,17084674,17084930,17085186,17085442,17085698,17085954,17086210,17086466,17086722,17086978,17087234,17087490,17087746,17088002,17088258,17088514,17088770,17089026,17089282,17089538,17089794,17090050,17090306,17090562,17090818,17091074,17091330,17091586,17091842,17092098,17092354,17092610,17092866,17093122,17093378,0,0,0,0}, |
13516
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17143553,17143809,17144065,17144321,17144577,17144833,17145089,17145345,17145601,17145857,17146113,0,17146625,17146881,17147137,17147393,17147649,17147905,17148161,17148417,17148673,17148929,17149185,17149441,17149697,17149953,17150209,0,17150721,17150977,17151233,17151489,17151745,17152001,17152257,0,17152769,17153025,0,17133570,17133826,17134082,17134338,17134594,17134850,17135106,17135362,17135618,17135874,17136130,0,17136642,17136898,17137154,17137410,17137666,17137922,17138178,17138434,17138690,17138946,17139202,17139458,17139714,17139970,17140226,0,17140738,17140994,17141250,17141506,17141762,17142018,17142274,0,17142786,17143042,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
13517
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17612801,17613057,17613313,17613569,17613825,17614081,17614337,17614593,17614849,17615105,17615361,17615617,17615873,17616129,17616385,17616641,17616897,17617153,17617409,17617665,17617921,17618177,17618433,17618689,17618945,17619201,17619457,17619713,17619969,17620225,17620481,17620737,17620993,17621249,17621505,17621761,17622017,17622273,17622529,17622785,17623041,17623297,17623553,17623809,17624065,17624321,17624577,17624833,17625089,17625345,17625601,0,0,0,0,0,0,0,0,0,0,0,0,0,17596418,17596674,17596930,17597186,17597442,17597698,17597954,17598210,17598466,17598722,17598978,17599234,17599490,17599746,17600002,17600258,17600514,17600770,17601026,17601282,17601538,17601794,17602050,17602306,17602562,17602818,17603074,17603330,17603586,17603842,17604098,17604354,17604610,17604866,17605122,17605378,17605634,17605890,17606146,17606402,17606658,17606914,17607170,17607426,17607682,17607938,17608194,17608450,17608706,17608962,17609218,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
13518
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18399233,18399489,18399745,18400001,18400257,18400513,18400769,18401025,18401281,18401537,18401793,18402049,18402305,18402561,18402817,18403073,18403329,18403585,18403841,18404097,18404353,18404609,18404865,18405121,18405377,18405633,18405889,18406145,18406401,18406657,18406913,18407169,18391042,18391298,18391554,18391810,18392066,18392322,18392578,18392834,18393090,18393346,18393602,18393858,18394114,18394370,18394626,18394882,18395138,18395394,18395650,18395906,18396162,18396418,18396674,18396930,18397186,18397442,18397698,18397954,18398210,18398466,18398722,18398978,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
13519
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24010753,24011009,24011265,24011521,24011777,24012033,24012289,24012545,24012801,24013057,24013313,24013569,24013825,24014081,24014337,24014593,24014849,24015105,24015361,24015617,24015873,24016129,24016385,24016641,24016897,24017153,24017409,24017665,24017921,24018177,24018433,24018689,24002562,24002818,24003074,24003330,24003586,24003842,24004098,24004354,24004610,24004866,24005122,24005378,24005634,24005890,24006146,24006402,24006658,24006914,24007170,24007426,24007682,24007938,24008194,24008450,24008706,24008962,24009218,24009474,24009730,24009986,24010242,24010498,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
13520
|
|
|
|
|
|
|
{32055809,32056065,32056321,32056577,32056833,32057089,32057345,32057601,32057857,32058113,32058369,32058625,32058881,32059137,32059393,32059649,32059905,32060161,32060417,32060673,32060929,32061185,32061441,32061697,32061953,32062209,32062465,32062721,32062977,32063233,32063489,32063745,32064001,32064257,32047106,32047362,32047618,32047874,32048130,32048386,32048642,32048898,32049154,32049410,32049666,32049922,32050178,32050434,32050690,32050946,32051202,32051458,32051714,32051970,32052226,32052482,32052738,32052994,32053250,32053506,32053762,32054018,32054274,32054530,32054786,32055042,32055298,32055554,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} |
13521
|
|
|
|
|
|
|
}; |
13522
|
|
|
|
|
|
|
|
13523
|
|
|
|
|
|
|
} // namespace unilib |
13524
|
|
|
|
|
|
|
|
13525
|
|
|
|
|
|
|
///////// |
13526
|
|
|
|
|
|
|
// File: unilib/utf8.cpp |
13527
|
|
|
|
|
|
|
///////// |
13528
|
|
|
|
|
|
|
|
13529
|
|
|
|
|
|
|
// This file is part of UniLib . |
13530
|
|
|
|
|
|
|
// |
13531
|
|
|
|
|
|
|
// Copyright 2014 Institute of Formal and Applied Linguistics, Faculty of |
13532
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
13533
|
|
|
|
|
|
|
// |
13534
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
13535
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
13536
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
13537
|
|
|
|
|
|
|
// |
13538
|
|
|
|
|
|
|
// UniLib version: 3.3.0 |
13539
|
|
|
|
|
|
|
// Unicode version: 15.0.0 |
13540
|
|
|
|
|
|
|
|
13541
|
|
|
|
|
|
|
namespace unilib { |
13542
|
|
|
|
|
|
|
|
13543
|
0
|
|
|
|
|
|
bool utf8::valid(const char* str) { |
13544
|
0
|
0
|
|
|
|
|
for (; *str; str++) |
13545
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) >= 0x80) { |
13546
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0xC0) return false; |
13547
|
0
|
0
|
|
|
|
|
else if (((unsigned char)*str) < 0xE0) { |
13548
|
0
|
0
|
|
|
|
|
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
13549
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF0) { |
13550
|
0
|
0
|
|
|
|
|
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
13551
|
0
|
0
|
|
|
|
|
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
13552
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF8) { |
13553
|
0
|
0
|
|
|
|
|
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
13554
|
0
|
0
|
|
|
|
|
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
13555
|
0
|
0
|
|
|
|
|
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
13556
|
|
|
|
|
|
|
} else return false; |
13557
|
|
|
|
|
|
|
} |
13558
|
|
|
|
|
|
|
return true; |
13559
|
|
|
|
|
|
|
} |
13560
|
|
|
|
|
|
|
|
13561
|
0
|
|
|
|
|
|
bool utf8::valid(const char* str, size_t len) { |
13562
|
0
|
0
|
|
|
|
|
for (; len > 0; str++, len--) |
13563
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) >= 0x80) { |
13564
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0xC0) return false; |
13565
|
0
|
0
|
|
|
|
|
else if (((unsigned char)*str) < 0xE0) { |
13566
|
0
|
0
|
|
|
|
|
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
13567
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF0) { |
13568
|
0
|
0
|
|
|
|
|
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
13569
|
0
|
0
|
|
|
|
|
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
13570
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF8) { |
13571
|
0
|
0
|
|
|
|
|
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
13572
|
0
|
0
|
|
|
|
|
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
13573
|
0
|
0
|
|
|
|
|
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
13574
|
|
|
|
|
|
|
} else return false; |
13575
|
|
|
|
|
|
|
} |
13576
|
|
|
|
|
|
|
return true; |
13577
|
|
|
|
|
|
|
} |
13578
|
|
|
|
|
|
|
|
13579
|
0
|
|
|
|
|
|
void utf8::decode(const char* str, std::u32string& decoded) { |
13580
|
|
|
|
|
|
|
decoded.clear(); |
13581
|
|
|
|
|
|
|
|
13582
|
0
|
0
|
|
|
|
|
for (char32_t chr; (chr = decode(str)); ) |
13583
|
0
|
|
|
|
|
|
decoded.push_back(chr); |
13584
|
0
|
|
|
|
|
|
} |
13585
|
|
|
|
|
|
|
|
13586
|
0
|
|
|
|
|
|
void utf8::decode(const char* str, size_t len, std::u32string& decoded) { |
13587
|
|
|
|
|
|
|
decoded.clear(); |
13588
|
|
|
|
|
|
|
|
13589
|
0
|
0
|
|
|
|
|
while (len) |
13590
|
0
|
|
|
|
|
|
decoded.push_back(decode(str, len)); |
13591
|
0
|
|
|
|
|
|
} |
13592
|
|
|
|
|
|
|
|
13593
|
0
|
|
|
|
|
|
void utf8::encode(const std::u32string& str, std::string& encoded) { |
13594
|
|
|
|
|
|
|
encoded.clear(); |
13595
|
|
|
|
|
|
|
|
13596
|
0
|
0
|
|
|
|
|
for (auto&& chr : str) |
13597
|
0
|
|
|
|
|
|
append(encoded, chr); |
13598
|
0
|
|
|
|
|
|
} |
13599
|
|
|
|
|
|
|
|
13600
|
|
|
|
|
|
|
const char utf8::REPLACEMENT_CHAR; |
13601
|
|
|
|
|
|
|
|
13602
|
|
|
|
|
|
|
} // namespace unilib |
13603
|
|
|
|
|
|
|
|
13604
|
|
|
|
|
|
|
///////// |
13605
|
|
|
|
|
|
|
// File: unilib/version.cpp |
13606
|
|
|
|
|
|
|
///////// |
13607
|
|
|
|
|
|
|
|
13608
|
|
|
|
|
|
|
// This file is part of UniLib . |
13609
|
|
|
|
|
|
|
// |
13610
|
|
|
|
|
|
|
// Copyright 2014 Institute of Formal and Applied Linguistics, Faculty of |
13611
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
13612
|
|
|
|
|
|
|
// |
13613
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
13614
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
13615
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
13616
|
|
|
|
|
|
|
// |
13617
|
|
|
|
|
|
|
// UniLib version: 3.3.0 |
13618
|
|
|
|
|
|
|
// Unicode version: 15.0.0 |
13619
|
|
|
|
|
|
|
|
13620
|
|
|
|
|
|
|
namespace unilib { |
13621
|
|
|
|
|
|
|
|
13622
|
|
|
|
|
|
|
// Returns current version. |
13623
|
0
|
|
|
|
|
|
version version::current() { |
13624
|
0
|
0
|
|
|
|
|
return {3, 3, 0, ""}; |
|
|
0
|
|
|
|
|
|
13625
|
|
|
|
|
|
|
} |
13626
|
|
|
|
|
|
|
|
13627
|
|
|
|
|
|
|
} // namespace unilib |
13628
|
|
|
|
|
|
|
|
13629
|
|
|
|
|
|
|
///////// |
13630
|
|
|
|
|
|
|
// File: utils/compressor_load.cpp |
13631
|
|
|
|
|
|
|
///////// |
13632
|
|
|
|
|
|
|
|
13633
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
13634
|
|
|
|
|
|
|
// |
13635
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
13636
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
13637
|
|
|
|
|
|
|
// |
13638
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
13639
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
13640
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
13641
|
|
|
|
|
|
|
|
13642
|
|
|
|
|
|
|
namespace utils { |
13643
|
|
|
|
|
|
|
|
13644
|
|
|
|
|
|
|
// Start of LZMA compression library by Igor Pavlov |
13645
|
|
|
|
|
|
|
namespace lzma { |
13646
|
|
|
|
|
|
|
|
13647
|
|
|
|
|
|
|
// Types.h -- Basic types |
13648
|
|
|
|
|
|
|
// 2010-10-09 : Igor Pavlov : Public domain |
13649
|
|
|
|
|
|
|
#ifndef UFAL_CPPUTILS_COMPRESSOR_LZMA_TYPES_H |
13650
|
|
|
|
|
|
|
#define UFAL_CPPUTILS_COMPRESSOR_LZMA_TYPES_H |
13651
|
|
|
|
|
|
|
|
13652
|
|
|
|
|
|
|
#define SZ_OK 0 |
13653
|
|
|
|
|
|
|
|
13654
|
|
|
|
|
|
|
#define SZ_ERROR_DATA 1 |
13655
|
|
|
|
|
|
|
#define SZ_ERROR_MEM 2 |
13656
|
|
|
|
|
|
|
#define SZ_ERROR_CRC 3 |
13657
|
|
|
|
|
|
|
#define SZ_ERROR_UNSUPPORTED 4 |
13658
|
|
|
|
|
|
|
#define SZ_ERROR_PARAM 5 |
13659
|
|
|
|
|
|
|
#define SZ_ERROR_INPUT_EOF 6 |
13660
|
|
|
|
|
|
|
#define SZ_ERROR_OUTPUT_EOF 7 |
13661
|
|
|
|
|
|
|
#define SZ_ERROR_READ 8 |
13662
|
|
|
|
|
|
|
#define SZ_ERROR_WRITE 9 |
13663
|
|
|
|
|
|
|
#define SZ_ERROR_PROGRESS 10 |
13664
|
|
|
|
|
|
|
#define SZ_ERROR_FAIL 11 |
13665
|
|
|
|
|
|
|
#define SZ_ERROR_THREAD 12 |
13666
|
|
|
|
|
|
|
|
13667
|
|
|
|
|
|
|
#define SZ_ERROR_ARCHIVE 16 |
13668
|
|
|
|
|
|
|
#define SZ_ERROR_NO_ARCHIVE 17 |
13669
|
|
|
|
|
|
|
|
13670
|
|
|
|
|
|
|
typedef int SRes; |
13671
|
|
|
|
|
|
|
|
13672
|
|
|
|
|
|
|
#ifndef RINOK |
13673
|
|
|
|
|
|
|
#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } |
13674
|
|
|
|
|
|
|
#endif |
13675
|
|
|
|
|
|
|
|
13676
|
|
|
|
|
|
|
/* The following interfaces use first parameter as pointer to structure */ |
13677
|
|
|
|
|
|
|
|
13678
|
|
|
|
|
|
|
struct IByteIn |
13679
|
|
|
|
|
|
|
{ |
13680
|
|
|
|
|
|
|
uint8_t (*Read)(void *p); /* reads one byte, returns 0 in case of EOF or error */ |
13681
|
|
|
|
|
|
|
}; |
13682
|
|
|
|
|
|
|
|
13683
|
|
|
|
|
|
|
struct IByteOut |
13684
|
|
|
|
|
|
|
{ |
13685
|
|
|
|
|
|
|
void (*Write)(void *p, uint8_t b); |
13686
|
|
|
|
|
|
|
}; |
13687
|
|
|
|
|
|
|
|
13688
|
|
|
|
|
|
|
struct ISeqInStream |
13689
|
|
|
|
|
|
|
{ |
13690
|
|
|
|
|
|
|
SRes (*Read)(void *p, void *buf, size_t *size); |
13691
|
|
|
|
|
|
|
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. |
13692
|
|
|
|
|
|
|
(output(*size) < input(*size)) is allowed */ |
13693
|
|
|
|
|
|
|
}; |
13694
|
|
|
|
|
|
|
|
13695
|
|
|
|
|
|
|
/* it can return SZ_ERROR_INPUT_EOF */ |
13696
|
|
|
|
|
|
|
SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size); |
13697
|
|
|
|
|
|
|
SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType); |
13698
|
|
|
|
|
|
|
SRes SeqInStream_ReadByte(ISeqInStream *stream, uint8_t *buf); |
13699
|
|
|
|
|
|
|
|
13700
|
|
|
|
|
|
|
struct ISeqOutStream |
13701
|
|
|
|
|
|
|
{ |
13702
|
|
|
|
|
|
|
size_t (*Write)(void *p, const void *buf, size_t size); |
13703
|
|
|
|
|
|
|
/* Returns: result - the number of actually written bytes. |
13704
|
|
|
|
|
|
|
(result < size) means error */ |
13705
|
|
|
|
|
|
|
}; |
13706
|
|
|
|
|
|
|
|
13707
|
|
|
|
|
|
|
enum ESzSeek |
13708
|
|
|
|
|
|
|
{ |
13709
|
|
|
|
|
|
|
SZ_SEEK_SET = 0, |
13710
|
|
|
|
|
|
|
SZ_SEEK_CUR = 1, |
13711
|
|
|
|
|
|
|
SZ_SEEK_END = 2 |
13712
|
|
|
|
|
|
|
}; |
13713
|
|
|
|
|
|
|
|
13714
|
|
|
|
|
|
|
struct ISeekInStream |
13715
|
|
|
|
|
|
|
{ |
13716
|
|
|
|
|
|
|
SRes (*Read)(void *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ |
13717
|
|
|
|
|
|
|
SRes (*Seek)(void *p, int64_t *pos, ESzSeek origin); |
13718
|
|
|
|
|
|
|
}; |
13719
|
|
|
|
|
|
|
|
13720
|
|
|
|
|
|
|
struct ILookInStream |
13721
|
|
|
|
|
|
|
{ |
13722
|
|
|
|
|
|
|
SRes (*Look)(void *p, const void **buf, size_t *size); |
13723
|
|
|
|
|
|
|
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. |
13724
|
|
|
|
|
|
|
(output(*size) > input(*size)) is not allowed |
13725
|
|
|
|
|
|
|
(output(*size) < input(*size)) is allowed */ |
13726
|
|
|
|
|
|
|
SRes (*Skip)(void *p, size_t offset); |
13727
|
|
|
|
|
|
|
/* offset must be <= output(*size) of Look */ |
13728
|
|
|
|
|
|
|
|
13729
|
|
|
|
|
|
|
SRes (*Read)(void *p, void *buf, size_t *size); |
13730
|
|
|
|
|
|
|
/* reads directly (without buffer). It's same as ISeqInStream::Read */ |
13731
|
|
|
|
|
|
|
SRes (*Seek)(void *p, int64_t *pos, ESzSeek origin); |
13732
|
|
|
|
|
|
|
}; |
13733
|
|
|
|
|
|
|
|
13734
|
|
|
|
|
|
|
SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size); |
13735
|
|
|
|
|
|
|
SRes LookInStream_SeekTo(ILookInStream *stream, uint64_t offset); |
13736
|
|
|
|
|
|
|
|
13737
|
|
|
|
|
|
|
/* reads via ILookInStream::Read */ |
13738
|
|
|
|
|
|
|
SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType); |
13739
|
|
|
|
|
|
|
SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size); |
13740
|
|
|
|
|
|
|
|
13741
|
|
|
|
|
|
|
#define LookToRead_BUF_SIZE (1 << 14) |
13742
|
|
|
|
|
|
|
|
13743
|
|
|
|
|
|
|
struct CLookToRead |
13744
|
|
|
|
|
|
|
{ |
13745
|
|
|
|
|
|
|
ILookInStream s; |
13746
|
|
|
|
|
|
|
ISeekInStream *realStream; |
13747
|
|
|
|
|
|
|
size_t pos; |
13748
|
|
|
|
|
|
|
size_t size; |
13749
|
|
|
|
|
|
|
uint8_t buf[LookToRead_BUF_SIZE]; |
13750
|
|
|
|
|
|
|
}; |
13751
|
|
|
|
|
|
|
|
13752
|
|
|
|
|
|
|
void LookToRead_CreateVTable(CLookToRead *p, int lookahead); |
13753
|
|
|
|
|
|
|
void LookToRead_Init(CLookToRead *p); |
13754
|
|
|
|
|
|
|
|
13755
|
|
|
|
|
|
|
struct CSecToLook |
13756
|
|
|
|
|
|
|
{ |
13757
|
|
|
|
|
|
|
ISeqInStream s; |
13758
|
|
|
|
|
|
|
ILookInStream *realStream; |
13759
|
|
|
|
|
|
|
}; |
13760
|
|
|
|
|
|
|
|
13761
|
|
|
|
|
|
|
void SecToLook_CreateVTable(CSecToLook *p); |
13762
|
|
|
|
|
|
|
|
13763
|
|
|
|
|
|
|
struct CSecToRead |
13764
|
|
|
|
|
|
|
{ |
13765
|
|
|
|
|
|
|
ISeqInStream s; |
13766
|
|
|
|
|
|
|
ILookInStream *realStream; |
13767
|
|
|
|
|
|
|
}; |
13768
|
|
|
|
|
|
|
|
13769
|
|
|
|
|
|
|
void SecToRead_CreateVTable(CSecToRead *p); |
13770
|
|
|
|
|
|
|
|
13771
|
|
|
|
|
|
|
struct ICompressProgress |
13772
|
|
|
|
|
|
|
{ |
13773
|
|
|
|
|
|
|
SRes (*Progress)(void *p, uint64_t inSize, uint64_t outSize); |
13774
|
|
|
|
|
|
|
/* Returns: result. (result != SZ_OK) means break. |
13775
|
|
|
|
|
|
|
Value (uint64_t)(int64_t)-1 for size means unknown value. */ |
13776
|
|
|
|
|
|
|
}; |
13777
|
|
|
|
|
|
|
|
13778
|
|
|
|
|
|
|
struct ISzAlloc |
13779
|
|
|
|
|
|
|
{ |
13780
|
|
|
|
|
|
|
void *(*Alloc)(void *p, size_t size); |
13781
|
|
|
|
|
|
|
void (*Free)(void *p, void *address); /* address can be 0 */ |
13782
|
|
|
|
|
|
|
}; |
13783
|
|
|
|
|
|
|
|
13784
|
|
|
|
|
|
|
#define IAlloc_Alloc(p, size) (p)->Alloc((p), size) |
13785
|
|
|
|
|
|
|
#define IAlloc_Free(p, a) (p)->Free((p), a) |
13786
|
|
|
|
|
|
|
|
13787
|
|
|
|
|
|
|
#endif // UFAL_CPPUTILS_COMPRESSOR_LZMA_TYPES_H |
13788
|
|
|
|
|
|
|
|
13789
|
|
|
|
|
|
|
// LzmaDec.h -- LZMA Decoder |
13790
|
|
|
|
|
|
|
// 2009-02-07 : Igor Pavlov : Public domain |
13791
|
|
|
|
|
|
|
|
13792
|
|
|
|
|
|
|
/* #define _LZMA_PROB32 */ |
13793
|
|
|
|
|
|
|
/* _LZMA_PROB32 can increase the speed on some CPUs, |
13794
|
|
|
|
|
|
|
but memory usage for CLzmaDec::probs will be doubled in that case */ |
13795
|
|
|
|
|
|
|
|
13796
|
|
|
|
|
|
|
#ifdef _LZMA_PROB32 |
13797
|
|
|
|
|
|
|
#define CLzmaProb uint32_t |
13798
|
|
|
|
|
|
|
#else |
13799
|
|
|
|
|
|
|
#define CLzmaProb uint16_t |
13800
|
|
|
|
|
|
|
#endif |
13801
|
|
|
|
|
|
|
|
13802
|
|
|
|
|
|
|
/* ---------- LZMA Properties ---------- */ |
13803
|
|
|
|
|
|
|
|
13804
|
|
|
|
|
|
|
#define LZMA_PROPS_SIZE 5 |
13805
|
|
|
|
|
|
|
|
13806
|
|
|
|
|
|
|
struct CLzmaProps |
13807
|
|
|
|
|
|
|
{ |
13808
|
|
|
|
|
|
|
unsigned lc, lp, pb; |
13809
|
|
|
|
|
|
|
uint32_t dicSize; |
13810
|
|
|
|
|
|
|
}; |
13811
|
|
|
|
|
|
|
|
13812
|
|
|
|
|
|
|
/* LzmaProps_Decode - decodes properties |
13813
|
|
|
|
|
|
|
Returns: |
13814
|
|
|
|
|
|
|
SZ_OK |
13815
|
|
|
|
|
|
|
SZ_ERROR_UNSUPPORTED - Unsupported properties |
13816
|
|
|
|
|
|
|
*/ |
13817
|
|
|
|
|
|
|
|
13818
|
|
|
|
|
|
|
SRes LzmaProps_Decode(CLzmaProps *p, const uint8_t *data, unsigned size); |
13819
|
|
|
|
|
|
|
|
13820
|
|
|
|
|
|
|
/* ---------- LZMA Decoder state ---------- */ |
13821
|
|
|
|
|
|
|
|
13822
|
|
|
|
|
|
|
/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. |
13823
|
|
|
|
|
|
|
Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ |
13824
|
|
|
|
|
|
|
|
13825
|
|
|
|
|
|
|
#define LZMA_REQUIRED_INPUT_MAX 20 |
13826
|
|
|
|
|
|
|
|
13827
|
|
|
|
|
|
|
struct CLzmaDec |
13828
|
|
|
|
|
|
|
{ |
13829
|
|
|
|
|
|
|
CLzmaProps prop; |
13830
|
|
|
|
|
|
|
CLzmaProb *probs; |
13831
|
|
|
|
|
|
|
uint8_t *dic; |
13832
|
|
|
|
|
|
|
const uint8_t *buf; |
13833
|
|
|
|
|
|
|
uint32_t range, code; |
13834
|
|
|
|
|
|
|
size_t dicPos; |
13835
|
|
|
|
|
|
|
size_t dicBufSize; |
13836
|
|
|
|
|
|
|
uint32_t processedPos; |
13837
|
|
|
|
|
|
|
uint32_t checkDicSize; |
13838
|
|
|
|
|
|
|
unsigned state; |
13839
|
|
|
|
|
|
|
uint32_t reps[4]; |
13840
|
|
|
|
|
|
|
unsigned remainLen; |
13841
|
|
|
|
|
|
|
int needFlush; |
13842
|
|
|
|
|
|
|
int needInitState; |
13843
|
|
|
|
|
|
|
uint32_t numProbs; |
13844
|
|
|
|
|
|
|
unsigned tempBufSize; |
13845
|
|
|
|
|
|
|
uint8_t tempBuf[LZMA_REQUIRED_INPUT_MAX]; |
13846
|
|
|
|
|
|
|
}; |
13847
|
|
|
|
|
|
|
|
13848
|
|
|
|
|
|
|
#define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; } |
13849
|
|
|
|
|
|
|
|
13850
|
|
|
|
|
|
|
void LzmaDec_Init(CLzmaDec *p); |
13851
|
|
|
|
|
|
|
|
13852
|
|
|
|
|
|
|
/* There are two types of LZMA streams: |
13853
|
|
|
|
|
|
|
0) Stream with end mark. That end mark adds about 6 bytes to compressed size. |
13854
|
|
|
|
|
|
|
1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */ |
13855
|
|
|
|
|
|
|
|
13856
|
|
|
|
|
|
|
enum ELzmaFinishMode |
13857
|
|
|
|
|
|
|
{ |
13858
|
|
|
|
|
|
|
LZMA_FINISH_ANY, /* finish at any point */ |
13859
|
|
|
|
|
|
|
LZMA_FINISH_END /* block must be finished at the end */ |
13860
|
|
|
|
|
|
|
}; |
13861
|
|
|
|
|
|
|
|
13862
|
|
|
|
|
|
|
/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! |
13863
|
|
|
|
|
|
|
|
13864
|
|
|
|
|
|
|
You must use LZMA_FINISH_END, when you know that current output buffer |
13865
|
|
|
|
|
|
|
covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. |
13866
|
|
|
|
|
|
|
|
13867
|
|
|
|
|
|
|
If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, |
13868
|
|
|
|
|
|
|
and output value of destLen will be less than output buffer size limit. |
13869
|
|
|
|
|
|
|
You can check status result also. |
13870
|
|
|
|
|
|
|
|
13871
|
|
|
|
|
|
|
You can use multiple checks to test data integrity after full decompression: |
13872
|
|
|
|
|
|
|
1) Check Result and "status" variable. |
13873
|
|
|
|
|
|
|
2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. |
13874
|
|
|
|
|
|
|
3) Check that output(srcLen) = compressedSize, if you know real compressedSize. |
13875
|
|
|
|
|
|
|
You must use correct finish mode in that case. */ |
13876
|
|
|
|
|
|
|
|
13877
|
|
|
|
|
|
|
enum ELzmaStatus |
13878
|
|
|
|
|
|
|
{ |
13879
|
|
|
|
|
|
|
LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ |
13880
|
|
|
|
|
|
|
LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ |
13881
|
|
|
|
|
|
|
LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ |
13882
|
|
|
|
|
|
|
LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ |
13883
|
|
|
|
|
|
|
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ |
13884
|
|
|
|
|
|
|
}; |
13885
|
|
|
|
|
|
|
|
13886
|
|
|
|
|
|
|
/* ELzmaStatus is used only as output value for function call */ |
13887
|
|
|
|
|
|
|
|
13888
|
|
|
|
|
|
|
/* ---------- Interfaces ---------- */ |
13889
|
|
|
|
|
|
|
|
13890
|
|
|
|
|
|
|
/* There are 3 levels of interfaces: |
13891
|
|
|
|
|
|
|
1) Dictionary Interface |
13892
|
|
|
|
|
|
|
2) Buffer Interface |
13893
|
|
|
|
|
|
|
3) One Call Interface |
13894
|
|
|
|
|
|
|
You can select any of these interfaces, but don't mix functions from different |
13895
|
|
|
|
|
|
|
groups for same object. */ |
13896
|
|
|
|
|
|
|
|
13897
|
|
|
|
|
|
|
/* There are two variants to allocate state for Dictionary Interface: |
13898
|
|
|
|
|
|
|
1) LzmaDec_Allocate / LzmaDec_Free |
13899
|
|
|
|
|
|
|
2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs |
13900
|
|
|
|
|
|
|
You can use variant 2, if you set dictionary buffer manually. |
13901
|
|
|
|
|
|
|
For Buffer Interface you must always use variant 1. |
13902
|
|
|
|
|
|
|
|
13903
|
|
|
|
|
|
|
LzmaDec_Allocate* can return: |
13904
|
|
|
|
|
|
|
SZ_OK |
13905
|
|
|
|
|
|
|
SZ_ERROR_MEM - Memory allocation error |
13906
|
|
|
|
|
|
|
SZ_ERROR_UNSUPPORTED - Unsupported properties |
13907
|
|
|
|
|
|
|
*/ |
13908
|
|
|
|
|
|
|
|
13909
|
|
|
|
|
|
|
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const uint8_t *props, unsigned propsSize, ISzAlloc *alloc); |
13910
|
|
|
|
|
|
|
void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc); |
13911
|
|
|
|
|
|
|
|
13912
|
|
|
|
|
|
|
SRes LzmaDec_Allocate(CLzmaDec *state, const uint8_t *prop, unsigned propsSize, ISzAlloc *alloc); |
13913
|
|
|
|
|
|
|
void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc); |
13914
|
|
|
|
|
|
|
|
13915
|
|
|
|
|
|
|
/* ---------- Dictionary Interface ---------- */ |
13916
|
|
|
|
|
|
|
|
13917
|
|
|
|
|
|
|
/* You can use it, if you want to eliminate the overhead for data copying from |
13918
|
|
|
|
|
|
|
dictionary to some other external buffer. |
13919
|
|
|
|
|
|
|
You must work with CLzmaDec variables directly in this interface. |
13920
|
|
|
|
|
|
|
|
13921
|
|
|
|
|
|
|
STEPS: |
13922
|
|
|
|
|
|
|
LzmaDec_Constr() |
13923
|
|
|
|
|
|
|
LzmaDec_Allocate() |
13924
|
|
|
|
|
|
|
for (each new stream) |
13925
|
|
|
|
|
|
|
{ |
13926
|
|
|
|
|
|
|
LzmaDec_Init() |
13927
|
|
|
|
|
|
|
while (it needs more decompression) |
13928
|
|
|
|
|
|
|
{ |
13929
|
|
|
|
|
|
|
LzmaDec_DecodeToDic() |
13930
|
|
|
|
|
|
|
use data from CLzmaDec::dic and update CLzmaDec::dicPos |
13931
|
|
|
|
|
|
|
} |
13932
|
|
|
|
|
|
|
} |
13933
|
|
|
|
|
|
|
LzmaDec_Free() |
13934
|
|
|
|
|
|
|
*/ |
13935
|
|
|
|
|
|
|
|
13936
|
|
|
|
|
|
|
/* LzmaDec_DecodeToDic |
13937
|
|
|
|
|
|
|
|
13938
|
|
|
|
|
|
|
The decoding to internal dictionary buffer (CLzmaDec::dic). |
13939
|
|
|
|
|
|
|
You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! |
13940
|
|
|
|
|
|
|
|
13941
|
|
|
|
|
|
|
finishMode: |
13942
|
|
|
|
|
|
|
It has meaning only if the decoding reaches output limit (dicLimit). |
13943
|
|
|
|
|
|
|
LZMA_FINISH_ANY - Decode just dicLimit bytes. |
13944
|
|
|
|
|
|
|
LZMA_FINISH_END - Stream must be finished after dicLimit. |
13945
|
|
|
|
|
|
|
|
13946
|
|
|
|
|
|
|
Returns: |
13947
|
|
|
|
|
|
|
SZ_OK |
13948
|
|
|
|
|
|
|
status: |
13949
|
|
|
|
|
|
|
LZMA_STATUS_FINISHED_WITH_MARK |
13950
|
|
|
|
|
|
|
LZMA_STATUS_NOT_FINISHED |
13951
|
|
|
|
|
|
|
LZMA_STATUS_NEEDS_MORE_INPUT |
13952
|
|
|
|
|
|
|
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK |
13953
|
|
|
|
|
|
|
SZ_ERROR_DATA - Data error |
13954
|
|
|
|
|
|
|
*/ |
13955
|
|
|
|
|
|
|
|
13956
|
|
|
|
|
|
|
SRes LzmaDec_DecodeToDic(CLzmaDec *p, size_t dicLimit, |
13957
|
|
|
|
|
|
|
const uint8_t *src, size_t *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); |
13958
|
|
|
|
|
|
|
|
13959
|
|
|
|
|
|
|
/* ---------- Buffer Interface ---------- */ |
13960
|
|
|
|
|
|
|
|
13961
|
|
|
|
|
|
|
/* It's zlib-like interface. |
13962
|
|
|
|
|
|
|
See LzmaDec_DecodeToDic description for information about STEPS and return results, |
13963
|
|
|
|
|
|
|
but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need |
13964
|
|
|
|
|
|
|
to work with CLzmaDec variables manually. |
13965
|
|
|
|
|
|
|
|
13966
|
|
|
|
|
|
|
finishMode: |
13967
|
|
|
|
|
|
|
It has meaning only if the decoding reaches output limit (*destLen). |
13968
|
|
|
|
|
|
|
LZMA_FINISH_ANY - Decode just destLen bytes. |
13969
|
|
|
|
|
|
|
LZMA_FINISH_END - Stream must be finished after (*destLen). |
13970
|
|
|
|
|
|
|
*/ |
13971
|
|
|
|
|
|
|
|
13972
|
|
|
|
|
|
|
SRes LzmaDec_DecodeToBuf(CLzmaDec *p, uint8_t *dest, size_t *destLen, |
13973
|
|
|
|
|
|
|
const uint8_t *src, size_t *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); |
13974
|
|
|
|
|
|
|
|
13975
|
|
|
|
|
|
|
/* ---------- One Call Interface ---------- */ |
13976
|
|
|
|
|
|
|
|
13977
|
|
|
|
|
|
|
/* LzmaDecode |
13978
|
|
|
|
|
|
|
|
13979
|
|
|
|
|
|
|
finishMode: |
13980
|
|
|
|
|
|
|
It has meaning only if the decoding reaches output limit (*destLen). |
13981
|
|
|
|
|
|
|
LZMA_FINISH_ANY - Decode just destLen bytes. |
13982
|
|
|
|
|
|
|
LZMA_FINISH_END - Stream must be finished after (*destLen). |
13983
|
|
|
|
|
|
|
|
13984
|
|
|
|
|
|
|
Returns: |
13985
|
|
|
|
|
|
|
SZ_OK |
13986
|
|
|
|
|
|
|
status: |
13987
|
|
|
|
|
|
|
LZMA_STATUS_FINISHED_WITH_MARK |
13988
|
|
|
|
|
|
|
LZMA_STATUS_NOT_FINISHED |
13989
|
|
|
|
|
|
|
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK |
13990
|
|
|
|
|
|
|
SZ_ERROR_DATA - Data error |
13991
|
|
|
|
|
|
|
SZ_ERROR_MEM - Memory allocation error |
13992
|
|
|
|
|
|
|
SZ_ERROR_UNSUPPORTED - Unsupported properties |
13993
|
|
|
|
|
|
|
SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). |
13994
|
|
|
|
|
|
|
*/ |
13995
|
|
|
|
|
|
|
|
13996
|
|
|
|
|
|
|
SRes LzmaDecode(uint8_t *dest, size_t *destLen, const uint8_t *src, size_t *srcLen, |
13997
|
|
|
|
|
|
|
const uint8_t *propData, unsigned propSize, ELzmaFinishMode finishMode, |
13998
|
|
|
|
|
|
|
ELzmaStatus *status, ISzAlloc *alloc); |
13999
|
|
|
|
|
|
|
|
14000
|
|
|
|
|
|
|
// LzmaDec.c -- LZMA Decoder |
14001
|
|
|
|
|
|
|
// 2009-09-20 : Igor Pavlov : Public domain |
14002
|
|
|
|
|
|
|
|
14003
|
|
|
|
|
|
|
#define kNumTopBits 24 |
14004
|
|
|
|
|
|
|
#define kTopValue ((uint32_t)1 << kNumTopBits) |
14005
|
|
|
|
|
|
|
|
14006
|
|
|
|
|
|
|
#define kNumBitModelTotalBits 11 |
14007
|
|
|
|
|
|
|
#define kBitModelTotal (1 << kNumBitModelTotalBits) |
14008
|
|
|
|
|
|
|
#define kNumMoveBits 5 |
14009
|
|
|
|
|
|
|
|
14010
|
|
|
|
|
|
|
#define RC_INIT_SIZE 5 |
14011
|
|
|
|
|
|
|
|
14012
|
|
|
|
|
|
|
#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } |
14013
|
|
|
|
|
|
|
|
14014
|
|
|
|
|
|
|
#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound) |
14015
|
|
|
|
|
|
|
#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); |
14016
|
|
|
|
|
|
|
#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); |
14017
|
|
|
|
|
|
|
#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ |
14018
|
|
|
|
|
|
|
{ UPDATE_0(p); i = (i + i); A0; } else \ |
14019
|
|
|
|
|
|
|
{ UPDATE_1(p); i = (i + i) + 1; A1; } |
14020
|
|
|
|
|
|
|
#define GET_BIT(p, i) GET_BIT2(p, i, ; , ;) |
14021
|
|
|
|
|
|
|
|
14022
|
|
|
|
|
|
|
#define TREE_GET_BIT(probs, i) { GET_BIT((probs + i), i); } |
14023
|
|
|
|
|
|
|
#define TREE_DECODE(probs, limit, i) \ |
14024
|
|
|
|
|
|
|
{ i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } |
14025
|
|
|
|
|
|
|
|
14026
|
|
|
|
|
|
|
/* #define _LZMA_SIZE_OPT */ |
14027
|
|
|
|
|
|
|
|
14028
|
|
|
|
|
|
|
#ifdef _LZMA_SIZE_OPT |
14029
|
|
|
|
|
|
|
#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) |
14030
|
|
|
|
|
|
|
#else |
14031
|
|
|
|
|
|
|
#define TREE_6_DECODE(probs, i) \ |
14032
|
|
|
|
|
|
|
{ i = 1; \ |
14033
|
|
|
|
|
|
|
TREE_GET_BIT(probs, i); \ |
14034
|
|
|
|
|
|
|
TREE_GET_BIT(probs, i); \ |
14035
|
|
|
|
|
|
|
TREE_GET_BIT(probs, i); \ |
14036
|
|
|
|
|
|
|
TREE_GET_BIT(probs, i); \ |
14037
|
|
|
|
|
|
|
TREE_GET_BIT(probs, i); \ |
14038
|
|
|
|
|
|
|
TREE_GET_BIT(probs, i); \ |
14039
|
|
|
|
|
|
|
i -= 0x40; } |
14040
|
|
|
|
|
|
|
#endif |
14041
|
|
|
|
|
|
|
|
14042
|
|
|
|
|
|
|
#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); } |
14043
|
|
|
|
|
|
|
|
14044
|
|
|
|
|
|
|
#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound) |
14045
|
|
|
|
|
|
|
#define UPDATE_0_CHECK range = bound; |
14046
|
|
|
|
|
|
|
#define UPDATE_1_CHECK range -= bound; code -= bound; |
14047
|
|
|
|
|
|
|
#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ |
14048
|
|
|
|
|
|
|
{ UPDATE_0_CHECK; i = (i + i); A0; } else \ |
14049
|
|
|
|
|
|
|
{ UPDATE_1_CHECK; i = (i + i) + 1; A1; } |
14050
|
|
|
|
|
|
|
#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) |
14051
|
|
|
|
|
|
|
#define TREE_DECODE_CHECK(probs, limit, i) \ |
14052
|
|
|
|
|
|
|
{ i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } |
14053
|
|
|
|
|
|
|
|
14054
|
|
|
|
|
|
|
#define kNumPosBitsMax 4 |
14055
|
|
|
|
|
|
|
#define kNumPosStatesMax (1 << kNumPosBitsMax) |
14056
|
|
|
|
|
|
|
|
14057
|
|
|
|
|
|
|
#define kLenNumLowBits 3 |
14058
|
|
|
|
|
|
|
#define kLenNumLowSymbols (1 << kLenNumLowBits) |
14059
|
|
|
|
|
|
|
#define kLenNumMidBits 3 |
14060
|
|
|
|
|
|
|
#define kLenNumMidSymbols (1 << kLenNumMidBits) |
14061
|
|
|
|
|
|
|
#define kLenNumHighBits 8 |
14062
|
|
|
|
|
|
|
#define kLenNumHighSymbols (1 << kLenNumHighBits) |
14063
|
|
|
|
|
|
|
|
14064
|
|
|
|
|
|
|
#define LenChoice 0 |
14065
|
|
|
|
|
|
|
#define LenChoice2 (LenChoice + 1) |
14066
|
|
|
|
|
|
|
#define LenLow (LenChoice2 + 1) |
14067
|
|
|
|
|
|
|
#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits)) |
14068
|
|
|
|
|
|
|
#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits)) |
14069
|
|
|
|
|
|
|
#define kNumLenProbs (LenHigh + kLenNumHighSymbols) |
14070
|
|
|
|
|
|
|
|
14071
|
|
|
|
|
|
|
#define kNumStates 12 |
14072
|
|
|
|
|
|
|
#define kNumLitStates 7 |
14073
|
|
|
|
|
|
|
|
14074
|
|
|
|
|
|
|
#define kStartPosModelIndex 4 |
14075
|
|
|
|
|
|
|
#define kEndPosModelIndex 14 |
14076
|
|
|
|
|
|
|
#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) |
14077
|
|
|
|
|
|
|
|
14078
|
|
|
|
|
|
|
#define kNumPosSlotBits 6 |
14079
|
|
|
|
|
|
|
#define kNumLenToPosStates 4 |
14080
|
|
|
|
|
|
|
|
14081
|
|
|
|
|
|
|
#define kNumAlignBits 4 |
14082
|
|
|
|
|
|
|
#define kAlignTableSize (1 << kNumAlignBits) |
14083
|
|
|
|
|
|
|
|
14084
|
|
|
|
|
|
|
#define kMatchMinLen 2 |
14085
|
|
|
|
|
|
|
#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) |
14086
|
|
|
|
|
|
|
|
14087
|
|
|
|
|
|
|
#define IsMatch 0 |
14088
|
|
|
|
|
|
|
#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax)) |
14089
|
|
|
|
|
|
|
#define IsRepG0 (IsRep + kNumStates) |
14090
|
|
|
|
|
|
|
#define IsRepG1 (IsRepG0 + kNumStates) |
14091
|
|
|
|
|
|
|
#define IsRepG2 (IsRepG1 + kNumStates) |
14092
|
|
|
|
|
|
|
#define IsRep0Long (IsRepG2 + kNumStates) |
14093
|
|
|
|
|
|
|
#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax)) |
14094
|
|
|
|
|
|
|
#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) |
14095
|
|
|
|
|
|
|
#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex) |
14096
|
|
|
|
|
|
|
#define LenCoder (Align + kAlignTableSize) |
14097
|
|
|
|
|
|
|
#define RepLenCoder (LenCoder + kNumLenProbs) |
14098
|
|
|
|
|
|
|
#define Literal (RepLenCoder + kNumLenProbs) |
14099
|
|
|
|
|
|
|
|
14100
|
|
|
|
|
|
|
#define LZMA_BASE_SIZE 1846 |
14101
|
|
|
|
|
|
|
#define LZMA_LIT_SIZE 768 |
14102
|
|
|
|
|
|
|
|
14103
|
|
|
|
|
|
|
#define LzmaProps_GetNumProbs(p) ((uint32_t)LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) |
14104
|
|
|
|
|
|
|
|
14105
|
|
|
|
|
|
|
#if Literal != LZMA_BASE_SIZE |
14106
|
|
|
|
|
|
|
StopCompilingDueBUG |
14107
|
|
|
|
|
|
|
#endif |
14108
|
|
|
|
|
|
|
|
14109
|
|
|
|
|
|
|
#define LZMA_DIC_MIN (1 << 12) |
14110
|
|
|
|
|
|
|
|
14111
|
|
|
|
|
|
|
/* First LZMA-symbol is always decoded. |
14112
|
|
|
|
|
|
|
And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization |
14113
|
|
|
|
|
|
|
Out: |
14114
|
|
|
|
|
|
|
Result: |
14115
|
|
|
|
|
|
|
SZ_OK - OK |
14116
|
|
|
|
|
|
|
SZ_ERROR_DATA - Error |
14117
|
|
|
|
|
|
|
p->remainLen: |
14118
|
|
|
|
|
|
|
< kMatchSpecLenStart : normal remain |
14119
|
|
|
|
|
|
|
= kMatchSpecLenStart : finished |
14120
|
|
|
|
|
|
|
= kMatchSpecLenStart + 1 : Flush marker |
14121
|
|
|
|
|
|
|
= kMatchSpecLenStart + 2 : State Init Marker |
14122
|
|
|
|
|
|
|
*/ |
14123
|
|
|
|
|
|
|
|
14124
|
120
|
|
|
|
|
|
static int LzmaDec_DecodeReal(CLzmaDec *p, size_t limit, const uint8_t *bufLimit) |
14125
|
|
|
|
|
|
|
{ |
14126
|
120
|
|
|
|
|
|
CLzmaProb *probs = p->probs; |
14127
|
|
|
|
|
|
|
|
14128
|
120
|
|
|
|
|
|
unsigned state = p->state; |
14129
|
120
|
|
|
|
|
|
uint32_t rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; |
14130
|
120
|
|
|
|
|
|
unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; |
14131
|
120
|
|
|
|
|
|
unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1; |
14132
|
120
|
|
|
|
|
|
unsigned lc = p->prop.lc; |
14133
|
|
|
|
|
|
|
|
14134
|
120
|
|
|
|
|
|
uint8_t *dic = p->dic; |
14135
|
120
|
|
|
|
|
|
size_t dicBufSize = p->dicBufSize; |
14136
|
120
|
|
|
|
|
|
size_t dicPos = p->dicPos; |
14137
|
|
|
|
|
|
|
|
14138
|
120
|
|
|
|
|
|
uint32_t processedPos = p->processedPos; |
14139
|
120
|
|
|
|
|
|
uint32_t checkDicSize = p->checkDicSize; |
14140
|
|
|
|
|
|
|
unsigned len = 0; |
14141
|
|
|
|
|
|
|
|
14142
|
120
|
|
|
|
|
|
const uint8_t *buf = p->buf; |
14143
|
120
|
|
|
|
|
|
uint32_t range = p->range; |
14144
|
120
|
|
|
|
|
|
uint32_t code = p->code; |
14145
|
|
|
|
|
|
|
|
14146
|
|
|
|
|
|
|
do |
14147
|
|
|
|
|
|
|
{ |
14148
|
|
|
|
|
|
|
CLzmaProb *prob; |
14149
|
|
|
|
|
|
|
uint32_t bound; |
14150
|
|
|
|
|
|
|
unsigned ttt; |
14151
|
32137
|
|
|
|
|
|
unsigned posState = processedPos & pbMask; |
14152
|
|
|
|
|
|
|
|
14153
|
32137
|
|
|
|
|
|
prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; |
14154
|
32137
|
100
|
|
|
|
|
IF_BIT_0(prob) |
|
|
100
|
|
|
|
|
|
14155
|
|
|
|
|
|
|
{ |
14156
|
|
|
|
|
|
|
unsigned symbol; |
14157
|
784
|
|
|
|
|
|
UPDATE_0(prob); |
14158
|
784
|
|
|
|
|
|
prob = probs + Literal; |
14159
|
784
|
100
|
|
|
|
|
if (checkDicSize != 0 || processedPos != 0) |
14160
|
1554
|
|
|
|
|
|
prob += (LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) + |
14161
|
777
|
50
|
|
|
|
|
(dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc)))); |
14162
|
|
|
|
|
|
|
|
14163
|
784
|
100
|
|
|
|
|
if (state < kNumLitStates) |
14164
|
|
|
|
|
|
|
{ |
14165
|
440
|
|
|
|
|
|
state -= (state < 4) ? state : 3; |
14166
|
|
|
|
|
|
|
symbol = 1; |
14167
|
3520
|
100
|
|
|
|
|
do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100); |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
14168
|
|
|
|
|
|
|
} |
14169
|
|
|
|
|
|
|
else |
14170
|
|
|
|
|
|
|
{ |
14171
|
344
|
50
|
|
|
|
|
unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; |
14172
|
|
|
|
|
|
|
unsigned offs = 0x100; |
14173
|
344
|
100
|
|
|
|
|
state -= (state < 10) ? 3 : 6; |
14174
|
|
|
|
|
|
|
symbol = 1; |
14175
|
|
|
|
|
|
|
do |
14176
|
|
|
|
|
|
|
{ |
14177
|
|
|
|
|
|
|
unsigned bit; |
14178
|
|
|
|
|
|
|
CLzmaProb *probLit; |
14179
|
2752
|
|
|
|
|
|
matchByte <<= 1; |
14180
|
2752
|
|
|
|
|
|
bit = (matchByte & offs); |
14181
|
2752
|
|
|
|
|
|
probLit = prob + offs + bit + symbol; |
14182
|
2752
|
100
|
|
|
|
|
GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit) |
|
|
100
|
|
|
|
|
|
14183
|
|
|
|
|
|
|
} |
14184
|
2752
|
100
|
|
|
|
|
while (symbol < 0x100); |
14185
|
|
|
|
|
|
|
} |
14186
|
784
|
|
|
|
|
|
dic[dicPos++] = (uint8_t)symbol; |
14187
|
784
|
|
|
|
|
|
processedPos++; |
14188
|
784
|
|
|
|
|
|
continue; |
14189
|
|
|
|
|
|
|
} |
14190
|
|
|
|
|
|
|
else |
14191
|
|
|
|
|
|
|
{ |
14192
|
31353
|
|
|
|
|
|
UPDATE_1(prob); |
14193
|
31353
|
|
|
|
|
|
prob = probs + IsRep + state; |
14194
|
31353
|
100
|
|
|
|
|
IF_BIT_0(prob) |
|
|
100
|
|
|
|
|
|
14195
|
|
|
|
|
|
|
{ |
14196
|
267
|
|
|
|
|
|
UPDATE_0(prob); |
14197
|
267
|
|
|
|
|
|
state += kNumStates; |
14198
|
267
|
|
|
|
|
|
prob = probs + LenCoder; |
14199
|
|
|
|
|
|
|
} |
14200
|
|
|
|
|
|
|
else |
14201
|
|
|
|
|
|
|
{ |
14202
|
31086
|
|
|
|
|
|
UPDATE_1(prob); |
14203
|
31086
|
50
|
|
|
|
|
if (checkDicSize == 0 && processedPos == 0) |
14204
|
|
|
|
|
|
|
return SZ_ERROR_DATA; |
14205
|
31086
|
|
|
|
|
|
prob = probs + IsRepG0 + state; |
14206
|
31086
|
100
|
|
|
|
|
IF_BIT_0(prob) |
|
|
100
|
|
|
|
|
|
14207
|
|
|
|
|
|
|
{ |
14208
|
30975
|
|
|
|
|
|
UPDATE_0(prob); |
14209
|
30975
|
|
|
|
|
|
prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; |
14210
|
30975
|
100
|
|
|
|
|
IF_BIT_0(prob) |
|
|
100
|
|
|
|
|
|
14211
|
|
|
|
|
|
|
{ |
14212
|
54
|
|
|
|
|
|
UPDATE_0(prob); |
14213
|
54
|
50
|
|
|
|
|
dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; |
14214
|
54
|
|
|
|
|
|
dicPos++; |
14215
|
54
|
|
|
|
|
|
processedPos++; |
14216
|
54
|
100
|
|
|
|
|
state = state < kNumLitStates ? 9 : 11; |
14217
|
|
|
|
|
|
|
continue; |
14218
|
|
|
|
|
|
|
} |
14219
|
30921
|
|
|
|
|
|
UPDATE_1(prob); |
14220
|
|
|
|
|
|
|
} |
14221
|
|
|
|
|
|
|
else |
14222
|
|
|
|
|
|
|
{ |
14223
|
|
|
|
|
|
|
uint32_t distance; |
14224
|
111
|
|
|
|
|
|
UPDATE_1(prob); |
14225
|
111
|
|
|
|
|
|
prob = probs + IsRepG1 + state; |
14226
|
111
|
100
|
|
|
|
|
IF_BIT_0(prob) |
|
|
100
|
|
|
|
|
|
14227
|
|
|
|
|
|
|
{ |
14228
|
61
|
|
|
|
|
|
UPDATE_0(prob); |
14229
|
|
|
|
|
|
|
distance = rep1; |
14230
|
|
|
|
|
|
|
} |
14231
|
|
|
|
|
|
|
else |
14232
|
|
|
|
|
|
|
{ |
14233
|
50
|
|
|
|
|
|
UPDATE_1(prob); |
14234
|
50
|
|
|
|
|
|
prob = probs + IsRepG2 + state; |
14235
|
50
|
100
|
|
|
|
|
IF_BIT_0(prob) |
|
|
100
|
|
|
|
|
|
14236
|
|
|
|
|
|
|
{ |
14237
|
31
|
|
|
|
|
|
UPDATE_0(prob); |
14238
|
|
|
|
|
|
|
distance = rep2; |
14239
|
|
|
|
|
|
|
} |
14240
|
|
|
|
|
|
|
else |
14241
|
|
|
|
|
|
|
{ |
14242
|
19
|
|
|
|
|
|
UPDATE_1(prob); |
14243
|
|
|
|
|
|
|
distance = rep3; |
14244
|
|
|
|
|
|
|
rep3 = rep2; |
14245
|
|
|
|
|
|
|
} |
14246
|
|
|
|
|
|
|
rep2 = rep1; |
14247
|
|
|
|
|
|
|
} |
14248
|
|
|
|
|
|
|
rep1 = rep0; |
14249
|
|
|
|
|
|
|
rep0 = distance; |
14250
|
|
|
|
|
|
|
} |
14251
|
31032
|
100
|
|
|
|
|
state = state < kNumLitStates ? 8 : 11; |
14252
|
31032
|
|
|
|
|
|
prob = probs + RepLenCoder; |
14253
|
|
|
|
|
|
|
} |
14254
|
|
|
|
|
|
|
{ |
14255
|
|
|
|
|
|
|
unsigned limit, offset; |
14256
|
|
|
|
|
|
|
CLzmaProb *probLen = prob + LenChoice; |
14257
|
31299
|
100
|
|
|
|
|
IF_BIT_0(probLen) |
|
|
100
|
|
|
|
|
|
14258
|
|
|
|
|
|
|
{ |
14259
|
276
|
|
|
|
|
|
UPDATE_0(probLen); |
14260
|
276
|
|
|
|
|
|
probLen = prob + LenLow + (posState << kLenNumLowBits); |
14261
|
|
|
|
|
|
|
offset = 0; |
14262
|
|
|
|
|
|
|
limit = (1 << kLenNumLowBits); |
14263
|
|
|
|
|
|
|
} |
14264
|
|
|
|
|
|
|
else |
14265
|
|
|
|
|
|
|
{ |
14266
|
31023
|
|
|
|
|
|
UPDATE_1(probLen); |
14267
|
|
|
|
|
|
|
probLen = prob + LenChoice2; |
14268
|
31023
|
100
|
|
|
|
|
IF_BIT_0(probLen) |
|
|
100
|
|
|
|
|
|
14269
|
|
|
|
|
|
|
{ |
14270
|
54
|
|
|
|
|
|
UPDATE_0(probLen); |
14271
|
54
|
|
|
|
|
|
probLen = prob + LenMid + (posState << kLenNumMidBits); |
14272
|
|
|
|
|
|
|
offset = kLenNumLowSymbols; |
14273
|
|
|
|
|
|
|
limit = (1 << kLenNumMidBits); |
14274
|
|
|
|
|
|
|
} |
14275
|
|
|
|
|
|
|
else |
14276
|
|
|
|
|
|
|
{ |
14277
|
30969
|
|
|
|
|
|
UPDATE_1(probLen); |
14278
|
31299
|
|
|
|
|
|
probLen = prob + LenHigh; |
14279
|
|
|
|
|
|
|
offset = kLenNumLowSymbols + kLenNumMidSymbols; |
14280
|
|
|
|
|
|
|
limit = (1 << kLenNumHighBits); |
14281
|
|
|
|
|
|
|
} |
14282
|
|
|
|
|
|
|
} |
14283
|
248742
|
100
|
|
|
|
|
TREE_DECODE(probLen, limit, len); |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
14284
|
31299
|
|
|
|
|
|
len += offset; |
14285
|
|
|
|
|
|
|
} |
14286
|
|
|
|
|
|
|
|
14287
|
31299
|
100
|
|
|
|
|
if (state >= kNumStates) |
14288
|
|
|
|
|
|
|
{ |
14289
|
|
|
|
|
|
|
uint32_t distance; |
14290
|
267
|
|
|
|
|
|
prob = probs + PosSlot + |
14291
|
534
|
|
|
|
|
|
((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); |
14292
|
267
|
100
|
|
|
|
|
TREE_6_DECODE(prob, distance); |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
14293
|
267
|
100
|
|
|
|
|
if (distance >= kStartPosModelIndex) |
14294
|
|
|
|
|
|
|
{ |
14295
|
|
|
|
|
|
|
unsigned posSlot = (unsigned)distance; |
14296
|
229
|
|
|
|
|
|
int numDirectBits = (int)(((distance >> 1) - 1)); |
14297
|
229
|
|
|
|
|
|
distance = (2 | (distance & 1)); |
14298
|
229
|
100
|
|
|
|
|
if (posSlot < kEndPosModelIndex) |
14299
|
|
|
|
|
|
|
{ |
14300
|
119
|
|
|
|
|
|
distance <<= numDirectBits; |
14301
|
119
|
|
|
|
|
|
prob = probs + SpecPos + distance - posSlot - 1; |
14302
|
|
|
|
|
|
|
{ |
14303
|
|
|
|
|
|
|
uint32_t mask = 1; |
14304
|
|
|
|
|
|
|
unsigned i = 1; |
14305
|
337
|
100
|
|
|
|
|
do |
14306
|
|
|
|
|
|
|
{ |
14307
|
337
|
100
|
|
|
|
|
GET_BIT2(prob + i, i, ; , distance |= mask); |
|
|
100
|
|
|
|
|
|
14308
|
337
|
|
|
|
|
|
mask <<= 1; |
14309
|
|
|
|
|
|
|
} |
14310
|
|
|
|
|
|
|
while (--numDirectBits != 0); |
14311
|
|
|
|
|
|
|
} |
14312
|
|
|
|
|
|
|
} |
14313
|
|
|
|
|
|
|
else |
14314
|
|
|
|
|
|
|
{ |
14315
|
110
|
|
|
|
|
|
numDirectBits -= kNumAlignBits; |
14316
|
1135
|
100
|
|
|
|
|
do |
14317
|
|
|
|
|
|
|
{ |
14318
|
1135
|
100
|
|
|
|
|
NORMALIZE |
14319
|
1135
|
|
|
|
|
|
range >>= 1; |
14320
|
|
|
|
|
|
|
|
14321
|
|
|
|
|
|
|
{ |
14322
|
|
|
|
|
|
|
uint32_t t; |
14323
|
1135
|
|
|
|
|
|
code -= range; |
14324
|
1135
|
|
|
|
|
|
t = (0 - ((uint32_t)code >> 31)); /* (uint32_t)((int32_t)code >> 31) */ |
14325
|
1135
|
|
|
|
|
|
distance = (distance << 1) + (t + 1); |
14326
|
1135
|
|
|
|
|
|
code += range & t; |
14327
|
|
|
|
|
|
|
} |
14328
|
|
|
|
|
|
|
/* |
14329
|
|
|
|
|
|
|
distance <<= 1; |
14330
|
|
|
|
|
|
|
if (code >= range) |
14331
|
|
|
|
|
|
|
{ |
14332
|
|
|
|
|
|
|
code -= range; |
14333
|
|
|
|
|
|
|
distance |= 1; |
14334
|
|
|
|
|
|
|
} |
14335
|
|
|
|
|
|
|
*/ |
14336
|
|
|
|
|
|
|
} |
14337
|
|
|
|
|
|
|
while (--numDirectBits != 0); |
14338
|
110
|
|
|
|
|
|
prob = probs + Align; |
14339
|
110
|
|
|
|
|
|
distance <<= kNumAlignBits; |
14340
|
|
|
|
|
|
|
{ |
14341
|
|
|
|
|
|
|
unsigned i = 1; |
14342
|
110
|
100
|
|
|
|
|
GET_BIT2(prob + i, i, ; , distance |= 1); |
|
|
100
|
|
|
|
|
|
14343
|
110
|
100
|
|
|
|
|
GET_BIT2(prob + i, i, ; , distance |= 2); |
|
|
100
|
|
|
|
|
|
14344
|
110
|
100
|
|
|
|
|
GET_BIT2(prob + i, i, ; , distance |= 4); |
|
|
100
|
|
|
|
|
|
14345
|
110
|
100
|
|
|
|
|
GET_BIT2(prob + i, i, ; , distance |= 8); |
|
|
100
|
|
|
|
|
|
14346
|
|
|
|
|
|
|
} |
14347
|
110
|
50
|
|
|
|
|
if (distance == (uint32_t)0xFFFFFFFF) |
14348
|
|
|
|
|
|
|
{ |
14349
|
0
|
|
|
|
|
|
len += kMatchSpecLenStart; |
14350
|
0
|
|
|
|
|
|
state -= kNumStates; |
14351
|
0
|
|
|
|
|
|
break; |
14352
|
|
|
|
|
|
|
} |
14353
|
|
|
|
|
|
|
} |
14354
|
|
|
|
|
|
|
} |
14355
|
|
|
|
|
|
|
rep3 = rep2; |
14356
|
|
|
|
|
|
|
rep2 = rep1; |
14357
|
|
|
|
|
|
|
rep1 = rep0; |
14358
|
267
|
|
|
|
|
|
rep0 = distance + 1; |
14359
|
267
|
50
|
|
|
|
|
if (checkDicSize == 0) |
14360
|
|
|
|
|
|
|
{ |
14361
|
267
|
50
|
|
|
|
|
if (distance >= processedPos) |
14362
|
|
|
|
|
|
|
return SZ_ERROR_DATA; |
14363
|
|
|
|
|
|
|
} |
14364
|
0
|
0
|
|
|
|
|
else if (distance >= checkDicSize) |
14365
|
|
|
|
|
|
|
return SZ_ERROR_DATA; |
14366
|
267
|
100
|
|
|
|
|
state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; |
14367
|
|
|
|
|
|
|
} |
14368
|
|
|
|
|
|
|
|
14369
|
31299
|
|
|
|
|
|
len += kMatchMinLen; |
14370
|
|
|
|
|
|
|
|
14371
|
31299
|
50
|
|
|
|
|
if (limit == dicPos) |
14372
|
|
|
|
|
|
|
return SZ_ERROR_DATA; |
14373
|
|
|
|
|
|
|
{ |
14374
|
31299
|
|
|
|
|
|
size_t rem = limit - dicPos; |
14375
|
31299
|
50
|
|
|
|
|
unsigned curLen = ((rem < len) ? (unsigned)rem : len); |
14376
|
31299
|
50
|
|
|
|
|
size_t pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0); |
14377
|
|
|
|
|
|
|
|
14378
|
31299
|
|
|
|
|
|
processedPos += curLen; |
14379
|
|
|
|
|
|
|
|
14380
|
31299
|
|
|
|
|
|
len -= curLen; |
14381
|
31299
|
50
|
|
|
|
|
if (pos + curLen <= dicBufSize) |
14382
|
|
|
|
|
|
|
{ |
14383
|
31299
|
|
|
|
|
|
uint8_t *dest = dic + dicPos; |
14384
|
31299
|
|
|
|
|
|
ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; |
14385
|
31299
|
|
|
|
|
|
const uint8_t *lim = dest + curLen; |
14386
|
31299
|
|
|
|
|
|
dicPos += curLen; |
14387
|
8428629
|
100
|
|
|
|
|
do |
14388
|
8428629
|
|
|
|
|
|
*(dest) = (uint8_t)*(dest + src); |
14389
|
|
|
|
|
|
|
while (++dest != lim); |
14390
|
|
|
|
|
|
|
} |
14391
|
|
|
|
|
|
|
else |
14392
|
|
|
|
|
|
|
{ |
14393
|
0
|
0
|
|
|
|
|
do |
14394
|
|
|
|
|
|
|
{ |
14395
|
0
|
|
|
|
|
|
dic[dicPos++] = dic[pos]; |
14396
|
0
|
0
|
|
|
|
|
if (++pos == dicBufSize) |
14397
|
|
|
|
|
|
|
pos = 0; |
14398
|
|
|
|
|
|
|
} |
14399
|
|
|
|
|
|
|
while (--curLen != 0); |
14400
|
|
|
|
|
|
|
} |
14401
|
|
|
|
|
|
|
} |
14402
|
|
|
|
|
|
|
} |
14403
|
|
|
|
|
|
|
} |
14404
|
32137
|
100
|
|
|
|
|
while (dicPos < limit && buf < bufLimit); |
14405
|
120
|
100
|
|
|
|
|
NORMALIZE; |
14406
|
120
|
|
|
|
|
|
p->buf = buf; |
14407
|
120
|
|
|
|
|
|
p->range = range; |
14408
|
120
|
|
|
|
|
|
p->code = code; |
14409
|
120
|
|
|
|
|
|
p->remainLen = len; |
14410
|
120
|
|
|
|
|
|
p->dicPos = dicPos; |
14411
|
120
|
|
|
|
|
|
p->processedPos = processedPos; |
14412
|
120
|
|
|
|
|
|
p->reps[0] = rep0; |
14413
|
120
|
|
|
|
|
|
p->reps[1] = rep1; |
14414
|
120
|
|
|
|
|
|
p->reps[2] = rep2; |
14415
|
120
|
|
|
|
|
|
p->reps[3] = rep3; |
14416
|
120
|
|
|
|
|
|
p->state = state; |
14417
|
|
|
|
|
|
|
|
14418
|
120
|
|
|
|
|
|
return SZ_OK; |
14419
|
|
|
|
|
|
|
} |
14420
|
|
|
|
|
|
|
|
14421
|
127
|
|
|
|
|
|
static void LzmaDec_WriteRem(CLzmaDec *p, size_t limit) |
14422
|
|
|
|
|
|
|
{ |
14423
|
127
|
50
|
|
|
|
|
if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) |
14424
|
|
|
|
|
|
|
{ |
14425
|
0
|
|
|
|
|
|
uint8_t *dic = p->dic; |
14426
|
0
|
|
|
|
|
|
size_t dicPos = p->dicPos; |
14427
|
0
|
|
|
|
|
|
size_t dicBufSize = p->dicBufSize; |
14428
|
|
|
|
|
|
|
unsigned len = p->remainLen; |
14429
|
0
|
|
|
|
|
|
uint32_t rep0 = p->reps[0]; |
14430
|
0
|
0
|
|
|
|
|
if (limit - dicPos < len) |
14431
|
0
|
|
|
|
|
|
len = (unsigned)(limit - dicPos); |
14432
|
|
|
|
|
|
|
|
14433
|
0
|
0
|
|
|
|
|
if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) |
|
|
0
|
|
|
|
|
|
14434
|
0
|
|
|
|
|
|
p->checkDicSize = p->prop.dicSize; |
14435
|
|
|
|
|
|
|
|
14436
|
0
|
|
|
|
|
|
p->processedPos += len; |
14437
|
0
|
|
|
|
|
|
p->remainLen -= len; |
14438
|
0
|
0
|
|
|
|
|
while (len-- != 0) |
14439
|
|
|
|
|
|
|
{ |
14440
|
0
|
0
|
|
|
|
|
dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; |
14441
|
0
|
|
|
|
|
|
dicPos++; |
14442
|
|
|
|
|
|
|
} |
14443
|
0
|
|
|
|
|
|
p->dicPos = dicPos; |
14444
|
|
|
|
|
|
|
} |
14445
|
127
|
|
|
|
|
|
} |
14446
|
|
|
|
|
|
|
|
14447
|
240
|
|
|
|
|
|
static int LzmaDec_DecodeReal2(CLzmaDec *p, size_t limit, const uint8_t *bufLimit) |
14448
|
|
|
|
|
|
|
{ |
14449
|
|
|
|
|
|
|
do |
14450
|
|
|
|
|
|
|
{ |
14451
|
|
|
|
|
|
|
size_t limit2 = limit; |
14452
|
120
|
50
|
|
|
|
|
if (p->checkDicSize == 0) |
14453
|
|
|
|
|
|
|
{ |
14454
|
120
|
|
|
|
|
|
uint32_t rem = p->prop.dicSize - p->processedPos; |
14455
|
120
|
50
|
|
|
|
|
if (limit - p->dicPos > rem) |
14456
|
0
|
|
|
|
|
|
limit2 = p->dicPos + rem; |
14457
|
|
|
|
|
|
|
} |
14458
|
120
|
50
|
|
|
|
|
RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit)); |
14459
|
120
|
50
|
|
|
|
|
if (p->processedPos >= p->prop.dicSize) |
14460
|
0
|
|
|
|
|
|
p->checkDicSize = p->prop.dicSize; |
14461
|
120
|
|
|
|
|
|
LzmaDec_WriteRem(p, limit); |
14462
|
|
|
|
|
|
|
} |
14463
|
120
|
100
|
|
|
|
|
while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
14464
|
|
|
|
|
|
|
|
14465
|
120
|
50
|
|
|
|
|
if (p->remainLen > kMatchSpecLenStart) |
14466
|
|
|
|
|
|
|
{ |
14467
|
0
|
|
|
|
|
|
p->remainLen = kMatchSpecLenStart; |
14468
|
|
|
|
|
|
|
} |
14469
|
|
|
|
|
|
|
return 0; |
14470
|
|
|
|
|
|
|
} |
14471
|
|
|
|
|
|
|
|
14472
|
|
|
|
|
|
|
enum ELzmaDummy |
14473
|
|
|
|
|
|
|
{ |
14474
|
|
|
|
|
|
|
DUMMY_ERROR, /* unexpected end of input stream */ |
14475
|
|
|
|
|
|
|
DUMMY_LIT, |
14476
|
|
|
|
|
|
|
DUMMY_MATCH, |
14477
|
|
|
|
|
|
|
DUMMY_REP |
14478
|
|
|
|
|
|
|
}; |
14479
|
|
|
|
|
|
|
|
14480
|
110
|
|
|
|
|
|
static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const uint8_t *buf, size_t inSize) |
14481
|
|
|
|
|
|
|
{ |
14482
|
110
|
|
|
|
|
|
uint32_t range = p->range; |
14483
|
110
|
|
|
|
|
|
uint32_t code = p->code; |
14484
|
110
|
|
|
|
|
|
const uint8_t *bufLimit = buf + inSize; |
14485
|
110
|
|
|
|
|
|
CLzmaProb *probs = p->probs; |
14486
|
110
|
|
|
|
|
|
unsigned state = p->state; |
14487
|
|
|
|
|
|
|
ELzmaDummy res; |
14488
|
|
|
|
|
|
|
|
14489
|
|
|
|
|
|
|
{ |
14490
|
|
|
|
|
|
|
CLzmaProb *prob; |
14491
|
|
|
|
|
|
|
uint32_t bound; |
14492
|
|
|
|
|
|
|
unsigned ttt; |
14493
|
110
|
|
|
|
|
|
unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1); |
14494
|
|
|
|
|
|
|
|
14495
|
110
|
|
|
|
|
|
prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; |
14496
|
110
|
50
|
|
|
|
|
IF_BIT_0_CHECK(prob) |
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
14497
|
|
|
|
|
|
|
{ |
14498
|
|
|
|
|
|
|
UPDATE_0_CHECK |
14499
|
|
|
|
|
|
|
|
14500
|
|
|
|
|
|
|
/* if (bufLimit - buf >= 7) return DUMMY_LIT; */ |
14501
|
|
|
|
|
|
|
|
14502
|
69
|
|
|
|
|
|
prob = probs + Literal; |
14503
|
69
|
100
|
|
|
|
|
if (p->checkDicSize != 0 || p->processedPos != 0) |
14504
|
68
|
|
|
|
|
|
prob += (LZMA_LIT_SIZE * |
14505
|
136
|
|
|
|
|
|
((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + |
14506
|
68
|
50
|
|
|
|
|
(p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); |
14507
|
|
|
|
|
|
|
|
14508
|
69
|
100
|
|
|
|
|
if (state < kNumLitStates) |
14509
|
|
|
|
|
|
|
{ |
14510
|
|
|
|
|
|
|
unsigned symbol = 1; |
14511
|
352
|
100
|
|
|
|
|
do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
14512
|
|
|
|
|
|
|
} |
14513
|
|
|
|
|
|
|
else |
14514
|
|
|
|
|
|
|
{ |
14515
|
50
|
|
|
|
|
|
unsigned matchByte = p->dic[p->dicPos - p->reps[0] + |
14516
|
25
|
50
|
|
|
|
|
((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)]; |
14517
|
|
|
|
|
|
|
unsigned offs = 0x100; |
14518
|
|
|
|
|
|
|
unsigned symbol = 1; |
14519
|
|
|
|
|
|
|
do |
14520
|
|
|
|
|
|
|
{ |
14521
|
|
|
|
|
|
|
unsigned bit; |
14522
|
|
|
|
|
|
|
CLzmaProb *probLit; |
14523
|
200
|
|
|
|
|
|
matchByte <<= 1; |
14524
|
200
|
|
|
|
|
|
bit = (matchByte & offs); |
14525
|
200
|
|
|
|
|
|
probLit = prob + offs + bit + symbol; |
14526
|
200
|
100
|
|
|
|
|
GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit) |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
14527
|
|
|
|
|
|
|
} |
14528
|
200
|
100
|
|
|
|
|
while (symbol < 0x100); |
14529
|
|
|
|
|
|
|
} |
14530
|
|
|
|
|
|
|
res = DUMMY_LIT; |
14531
|
|
|
|
|
|
|
} |
14532
|
|
|
|
|
|
|
else |
14533
|
|
|
|
|
|
|
{ |
14534
|
|
|
|
|
|
|
unsigned len; |
14535
|
41
|
|
|
|
|
|
UPDATE_1_CHECK; |
14536
|
|
|
|
|
|
|
|
14537
|
41
|
|
|
|
|
|
prob = probs + IsRep + state; |
14538
|
41
|
100
|
|
|
|
|
IF_BIT_0_CHECK(prob) |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
14539
|
|
|
|
|
|
|
{ |
14540
|
|
|
|
|
|
|
UPDATE_0_CHECK; |
14541
|
|
|
|
|
|
|
state = 0; |
14542
|
20
|
|
|
|
|
|
prob = probs + LenCoder; |
14543
|
|
|
|
|
|
|
res = DUMMY_MATCH; |
14544
|
|
|
|
|
|
|
} |
14545
|
|
|
|
|
|
|
else |
14546
|
|
|
|
|
|
|
{ |
14547
|
21
|
|
|
|
|
|
UPDATE_1_CHECK; |
14548
|
|
|
|
|
|
|
res = DUMMY_REP; |
14549
|
21
|
|
|
|
|
|
prob = probs + IsRepG0 + state; |
14550
|
21
|
100
|
|
|
|
|
IF_BIT_0_CHECK(prob) |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
14551
|
|
|
|
|
|
|
{ |
14552
|
|
|
|
|
|
|
UPDATE_0_CHECK; |
14553
|
15
|
|
|
|
|
|
prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; |
14554
|
15
|
100
|
|
|
|
|
IF_BIT_0_CHECK(prob) |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
14555
|
|
|
|
|
|
|
{ |
14556
|
|
|
|
|
|
|
UPDATE_0_CHECK; |
14557
|
9
|
100
|
|
|
|
|
NORMALIZE_CHECK; |
|
|
50
|
|
|
|
|
|
14558
|
|
|
|
|
|
|
return DUMMY_REP; |
14559
|
|
|
|
|
|
|
} |
14560
|
|
|
|
|
|
|
else |
14561
|
|
|
|
|
|
|
{ |
14562
|
6
|
|
|
|
|
|
UPDATE_1_CHECK; |
14563
|
|
|
|
|
|
|
} |
14564
|
|
|
|
|
|
|
} |
14565
|
|
|
|
|
|
|
else |
14566
|
|
|
|
|
|
|
{ |
14567
|
6
|
|
|
|
|
|
UPDATE_1_CHECK; |
14568
|
6
|
|
|
|
|
|
prob = probs + IsRepG1 + state; |
14569
|
6
|
100
|
|
|
|
|
IF_BIT_0_CHECK(prob) |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
14570
|
|
|
|
|
|
|
{ |
14571
|
|
|
|
|
|
|
UPDATE_0_CHECK; |
14572
|
|
|
|
|
|
|
} |
14573
|
|
|
|
|
|
|
else |
14574
|
|
|
|
|
|
|
{ |
14575
|
4
|
|
|
|
|
|
UPDATE_1_CHECK; |
14576
|
4
|
|
|
|
|
|
prob = probs + IsRepG2 + state; |
14577
|
4
|
100
|
|
|
|
|
IF_BIT_0_CHECK(prob) |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
14578
|
|
|
|
|
|
|
{ |
14579
|
|
|
|
|
|
|
UPDATE_0_CHECK; |
14580
|
|
|
|
|
|
|
} |
14581
|
|
|
|
|
|
|
else |
14582
|
|
|
|
|
|
|
{ |
14583
|
1
|
|
|
|
|
|
UPDATE_1_CHECK; |
14584
|
|
|
|
|
|
|
} |
14585
|
|
|
|
|
|
|
} |
14586
|
|
|
|
|
|
|
} |
14587
|
|
|
|
|
|
|
state = kNumStates; |
14588
|
12
|
|
|
|
|
|
prob = probs + RepLenCoder; |
14589
|
|
|
|
|
|
|
} |
14590
|
|
|
|
|
|
|
{ |
14591
|
|
|
|
|
|
|
unsigned limit, offset; |
14592
|
|
|
|
|
|
|
CLzmaProb *probLen = prob + LenChoice; |
14593
|
32
|
100
|
|
|
|
|
IF_BIT_0_CHECK(probLen) |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
14594
|
|
|
|
|
|
|
{ |
14595
|
|
|
|
|
|
|
UPDATE_0_CHECK; |
14596
|
23
|
|
|
|
|
|
probLen = prob + LenLow + (posState << kLenNumLowBits); |
14597
|
|
|
|
|
|
|
offset = 0; |
14598
|
|
|
|
|
|
|
limit = 1 << kLenNumLowBits; |
14599
|
|
|
|
|
|
|
} |
14600
|
|
|
|
|
|
|
else |
14601
|
|
|
|
|
|
|
{ |
14602
|
9
|
|
|
|
|
|
UPDATE_1_CHECK; |
14603
|
|
|
|
|
|
|
probLen = prob + LenChoice2; |
14604
|
9
|
50
|
|
|
|
|
IF_BIT_0_CHECK(probLen) |
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
14605
|
|
|
|
|
|
|
{ |
14606
|
|
|
|
|
|
|
UPDATE_0_CHECK; |
14607
|
4
|
|
|
|
|
|
probLen = prob + LenMid + (posState << kLenNumMidBits); |
14608
|
|
|
|
|
|
|
offset = kLenNumLowSymbols; |
14609
|
|
|
|
|
|
|
limit = 1 << kLenNumMidBits; |
14610
|
|
|
|
|
|
|
} |
14611
|
|
|
|
|
|
|
else |
14612
|
|
|
|
|
|
|
{ |
14613
|
5
|
|
|
|
|
|
UPDATE_1_CHECK; |
14614
|
32
|
|
|
|
|
|
probLen = prob + LenHigh; |
14615
|
|
|
|
|
|
|
offset = kLenNumLowSymbols + kLenNumMidSymbols; |
14616
|
|
|
|
|
|
|
limit = 1 << kLenNumHighBits; |
14617
|
|
|
|
|
|
|
} |
14618
|
|
|
|
|
|
|
} |
14619
|
121
|
100
|
|
|
|
|
TREE_DECODE_CHECK(probLen, limit, len); |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
14620
|
32
|
|
|
|
|
|
len += offset; |
14621
|
|
|
|
|
|
|
} |
14622
|
|
|
|
|
|
|
|
14623
|
32
|
100
|
|
|
|
|
if (state < 4) |
14624
|
|
|
|
|
|
|
{ |
14625
|
|
|
|
|
|
|
unsigned posSlot; |
14626
|
20
|
|
|
|
|
|
prob = probs + PosSlot + |
14627
|
20
|
|
|
|
|
|
((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << |
14628
|
20
|
|
|
|
|
|
kNumPosSlotBits); |
14629
|
120
|
100
|
|
|
|
|
TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
14630
|
20
|
100
|
|
|
|
|
if (posSlot >= kStartPosModelIndex) |
14631
|
|
|
|
|
|
|
{ |
14632
|
18
|
|
|
|
|
|
int numDirectBits = ((posSlot >> 1) - 1); |
14633
|
|
|
|
|
|
|
|
14634
|
|
|
|
|
|
|
/* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ |
14635
|
|
|
|
|
|
|
|
14636
|
18
|
100
|
|
|
|
|
if (posSlot < kEndPosModelIndex) |
14637
|
|
|
|
|
|
|
{ |
14638
|
10
|
|
|
|
|
|
prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1; |
14639
|
|
|
|
|
|
|
} |
14640
|
|
|
|
|
|
|
else |
14641
|
|
|
|
|
|
|
{ |
14642
|
8
|
|
|
|
|
|
numDirectBits -= kNumAlignBits; |
14643
|
55
|
100
|
|
|
|
|
do |
14644
|
|
|
|
|
|
|
{ |
14645
|
55
|
100
|
|
|
|
|
NORMALIZE_CHECK |
|
|
50
|
|
|
|
|
|
14646
|
55
|
|
|
|
|
|
range >>= 1; |
14647
|
55
|
|
|
|
|
|
code -= range & (((code - range) >> 31) - 1); |
14648
|
|
|
|
|
|
|
/* if (code >= range) code -= range; */ |
14649
|
|
|
|
|
|
|
} |
14650
|
|
|
|
|
|
|
while (--numDirectBits != 0); |
14651
|
18
|
|
|
|
|
|
prob = probs + Align; |
14652
|
|
|
|
|
|
|
numDirectBits = kNumAlignBits; |
14653
|
|
|
|
|
|
|
} |
14654
|
|
|
|
|
|
|
{ |
14655
|
|
|
|
|
|
|
unsigned i = 1; |
14656
|
63
|
100
|
|
|
|
|
do |
14657
|
|
|
|
|
|
|
{ |
14658
|
63
|
100
|
|
|
|
|
GET_BIT_CHECK(prob + i, i); |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
14659
|
|
|
|
|
|
|
} |
14660
|
|
|
|
|
|
|
while (--numDirectBits != 0); |
14661
|
|
|
|
|
|
|
} |
14662
|
|
|
|
|
|
|
} |
14663
|
|
|
|
|
|
|
} |
14664
|
|
|
|
|
|
|
} |
14665
|
|
|
|
|
|
|
} |
14666
|
101
|
100
|
|
|
|
|
NORMALIZE_CHECK; |
|
|
50
|
|
|
|
|
|
14667
|
|
|
|
|
|
|
return res; |
14668
|
|
|
|
|
|
|
} |
14669
|
|
|
|
|
|
|
|
14670
|
|
|
|
|
|
|
static void LzmaDec_InitRc(CLzmaDec *p, const uint8_t *data) |
14671
|
|
|
|
|
|
|
{ |
14672
|
7
|
|
|
|
|
|
p->code = ((uint32_t)data[1] << 24) | ((uint32_t)data[2] << 16) | ((uint32_t)data[3] << 8) | ((uint32_t)data[4]); |
14673
|
7
|
|
|
|
|
|
p->range = 0xFFFFFFFF; |
14674
|
7
|
|
|
|
|
|
p->needFlush = 0; |
14675
|
|
|
|
|
|
|
} |
14676
|
|
|
|
|
|
|
|
14677
|
0
|
|
|
|
|
|
void LzmaDec_InitDicAndState(CLzmaDec *p, bool initDic, bool initState) |
14678
|
|
|
|
|
|
|
{ |
14679
|
7
|
|
|
|
|
|
p->needFlush = 1; |
14680
|
7
|
|
|
|
|
|
p->remainLen = 0; |
14681
|
7
|
|
|
|
|
|
p->tempBufSize = 0; |
14682
|
|
|
|
|
|
|
|
14683
|
0
|
0
|
|
|
|
|
if (initDic) |
14684
|
|
|
|
|
|
|
{ |
14685
|
7
|
|
|
|
|
|
p->processedPos = 0; |
14686
|
7
|
|
|
|
|
|
p->checkDicSize = 0; |
14687
|
0
|
|
|
|
|
|
p->needInitState = 1; |
14688
|
|
|
|
|
|
|
} |
14689
|
0
|
0
|
|
|
|
|
if (initState) |
14690
|
0
|
|
|
|
|
|
p->needInitState = 1; |
14691
|
0
|
|
|
|
|
|
} |
14692
|
|
|
|
|
|
|
|
14693
|
0
|
|
|
|
|
|
void LzmaDec_Init(CLzmaDec *p) |
14694
|
|
|
|
|
|
|
{ |
14695
|
7
|
|
|
|
|
|
p->dicPos = 0; |
14696
|
|
|
|
|
|
|
LzmaDec_InitDicAndState(p, true, true); |
14697
|
0
|
|
|
|
|
|
} |
14698
|
|
|
|
|
|
|
|
14699
|
|
|
|
|
|
|
static void LzmaDec_InitStateReal(CLzmaDec *p) |
14700
|
|
|
|
|
|
|
{ |
14701
|
7
|
|
|
|
|
|
uint32_t numProbs = Literal + ((uint32_t)LZMA_LIT_SIZE << (p->prop.lc + p->prop.lp)); |
14702
|
|
|
|
|
|
|
uint32_t i; |
14703
|
7
|
|
|
|
|
|
CLzmaProb *probs = p->probs; |
14704
|
55937
|
100
|
|
|
|
|
for (i = 0; i < numProbs; i++) |
14705
|
55930
|
|
|
|
|
|
probs[i] = kBitModelTotal >> 1; |
14706
|
7
|
|
|
|
|
|
p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; |
14707
|
7
|
|
|
|
|
|
p->state = 0; |
14708
|
7
|
|
|
|
|
|
p->needInitState = 0; |
14709
|
|
|
|
|
|
|
} |
14710
|
|
|
|
|
|
|
|
14711
|
7
|
|
|
|
|
|
SRes LzmaDec_DecodeToDic(CLzmaDec *p, size_t dicLimit, const uint8_t *src, size_t *srcLen, |
14712
|
|
|
|
|
|
|
ELzmaFinishMode finishMode, ELzmaStatus *status) |
14713
|
|
|
|
|
|
|
{ |
14714
|
7
|
|
|
|
|
|
size_t inSize = *srcLen; |
14715
|
7
|
|
|
|
|
|
(*srcLen) = 0; |
14716
|
7
|
|
|
|
|
|
LzmaDec_WriteRem(p, dicLimit); |
14717
|
|
|
|
|
|
|
|
14718
|
127
|
|
|
|
|
|
*status = LZMA_STATUS_NOT_SPECIFIED; |
14719
|
|
|
|
|
|
|
|
14720
|
127
|
50
|
|
|
|
|
while (p->remainLen != kMatchSpecLenStart) |
14721
|
|
|
|
|
|
|
{ |
14722
|
|
|
|
|
|
|
int checkEndMarkNow; |
14723
|
|
|
|
|
|
|
|
14724
|
127
|
100
|
|
|
|
|
if (p->needFlush != 0) |
14725
|
|
|
|
|
|
|
{ |
14726
|
42
|
50
|
|
|
|
|
for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) |
|
|
100
|
|
|
|
|
|
14727
|
35
|
|
|
|
|
|
p->tempBuf[p->tempBufSize++] = *src++; |
14728
|
7
|
50
|
|
|
|
|
if (p->tempBufSize < RC_INIT_SIZE) |
14729
|
|
|
|
|
|
|
{ |
14730
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_NEEDS_MORE_INPUT; |
14731
|
0
|
|
|
|
|
|
return SZ_OK; |
14732
|
|
|
|
|
|
|
} |
14733
|
7
|
50
|
|
|
|
|
if (p->tempBuf[0] != 0) |
14734
|
|
|
|
|
|
|
return SZ_ERROR_DATA; |
14735
|
|
|
|
|
|
|
|
14736
|
|
|
|
|
|
|
LzmaDec_InitRc(p, p->tempBuf); |
14737
|
7
|
|
|
|
|
|
p->tempBufSize = 0; |
14738
|
|
|
|
|
|
|
} |
14739
|
|
|
|
|
|
|
|
14740
|
|
|
|
|
|
|
checkEndMarkNow = 0; |
14741
|
127
|
100
|
|
|
|
|
if (p->dicPos >= dicLimit) |
14742
|
|
|
|
|
|
|
{ |
14743
|
7
|
50
|
|
|
|
|
if (p->remainLen == 0 && p->code == 0) |
|
|
50
|
|
|
|
|
|
14744
|
|
|
|
|
|
|
{ |
14745
|
7
|
|
|
|
|
|
*status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; |
14746
|
7
|
|
|
|
|
|
return SZ_OK; |
14747
|
|
|
|
|
|
|
} |
14748
|
0
|
0
|
|
|
|
|
if (finishMode == LZMA_FINISH_ANY) |
14749
|
|
|
|
|
|
|
{ |
14750
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_NOT_FINISHED; |
14751
|
0
|
|
|
|
|
|
return SZ_OK; |
14752
|
|
|
|
|
|
|
} |
14753
|
0
|
0
|
|
|
|
|
if (p->remainLen != 0) |
14754
|
|
|
|
|
|
|
{ |
14755
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_NOT_FINISHED; |
14756
|
0
|
|
|
|
|
|
return SZ_ERROR_DATA; |
14757
|
|
|
|
|
|
|
} |
14758
|
|
|
|
|
|
|
checkEndMarkNow = 1; |
14759
|
|
|
|
|
|
|
} |
14760
|
|
|
|
|
|
|
|
14761
|
120
|
100
|
|
|
|
|
if (p->needInitState) |
14762
|
|
|
|
|
|
|
LzmaDec_InitStateReal(p); |
14763
|
|
|
|
|
|
|
|
14764
|
120
|
50
|
|
|
|
|
if (p->tempBufSize == 0) |
14765
|
|
|
|
|
|
|
{ |
14766
|
|
|
|
|
|
|
size_t processed; |
14767
|
|
|
|
|
|
|
const uint8_t *bufLimit; |
14768
|
120
|
100
|
|
|
|
|
if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) |
14769
|
|
|
|
|
|
|
{ |
14770
|
110
|
|
|
|
|
|
int dummyRes = LzmaDec_TryDummy(p, src, inSize); |
14771
|
110
|
50
|
|
|
|
|
if (dummyRes == DUMMY_ERROR) |
14772
|
|
|
|
|
|
|
{ |
14773
|
0
|
|
|
|
|
|
memcpy(p->tempBuf, src, inSize); |
14774
|
0
|
|
|
|
|
|
p->tempBufSize = (unsigned)inSize; |
14775
|
0
|
|
|
|
|
|
(*srcLen) += inSize; |
14776
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_NEEDS_MORE_INPUT; |
14777
|
0
|
|
|
|
|
|
return SZ_OK; |
14778
|
|
|
|
|
|
|
} |
14779
|
110
|
50
|
|
|
|
|
if (checkEndMarkNow && dummyRes != DUMMY_MATCH) |
14780
|
|
|
|
|
|
|
{ |
14781
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_NOT_FINISHED; |
14782
|
0
|
|
|
|
|
|
return SZ_ERROR_DATA; |
14783
|
|
|
|
|
|
|
} |
14784
|
|
|
|
|
|
|
bufLimit = src; |
14785
|
|
|
|
|
|
|
} |
14786
|
|
|
|
|
|
|
else |
14787
|
10
|
|
|
|
|
|
bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; |
14788
|
120
|
|
|
|
|
|
p->buf = src; |
14789
|
120
|
50
|
|
|
|
|
if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) |
14790
|
|
|
|
|
|
|
return SZ_ERROR_DATA; |
14791
|
120
|
|
|
|
|
|
processed = (size_t)(p->buf - src); |
14792
|
120
|
|
|
|
|
|
(*srcLen) += processed; |
14793
|
|
|
|
|
|
|
src += processed; |
14794
|
120
|
|
|
|
|
|
inSize -= processed; |
14795
|
|
|
|
|
|
|
} |
14796
|
|
|
|
|
|
|
else |
14797
|
|
|
|
|
|
|
{ |
14798
|
|
|
|
|
|
|
unsigned rem = p->tempBufSize, lookAhead = 0; |
14799
|
0
|
0
|
|
|
|
|
while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) |
|
|
0
|
|
|
|
|
|
14800
|
0
|
|
|
|
|
|
p->tempBuf[rem++] = src[lookAhead++]; |
14801
|
0
|
|
|
|
|
|
p->tempBufSize = rem; |
14802
|
0
|
0
|
|
|
|
|
if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) |
14803
|
|
|
|
|
|
|
{ |
14804
|
0
|
|
|
|
|
|
int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem); |
14805
|
0
|
0
|
|
|
|
|
if (dummyRes == DUMMY_ERROR) |
14806
|
|
|
|
|
|
|
{ |
14807
|
0
|
|
|
|
|
|
(*srcLen) += lookAhead; |
14808
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_NEEDS_MORE_INPUT; |
14809
|
0
|
|
|
|
|
|
return SZ_OK; |
14810
|
|
|
|
|
|
|
} |
14811
|
0
|
0
|
|
|
|
|
if (checkEndMarkNow && dummyRes != DUMMY_MATCH) |
14812
|
|
|
|
|
|
|
{ |
14813
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_NOT_FINISHED; |
14814
|
0
|
|
|
|
|
|
return SZ_ERROR_DATA; |
14815
|
|
|
|
|
|
|
} |
14816
|
|
|
|
|
|
|
} |
14817
|
0
|
|
|
|
|
|
p->buf = p->tempBuf; |
14818
|
0
|
0
|
|
|
|
|
if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) |
14819
|
|
|
|
|
|
|
return SZ_ERROR_DATA; |
14820
|
0
|
|
|
|
|
|
lookAhead -= (rem - (unsigned)(p->buf - p->tempBuf)); |
14821
|
0
|
|
|
|
|
|
(*srcLen) += lookAhead; |
14822
|
0
|
|
|
|
|
|
src += lookAhead; |
14823
|
0
|
|
|
|
|
|
inSize -= lookAhead; |
14824
|
0
|
|
|
|
|
|
p->tempBufSize = 0; |
14825
|
|
|
|
|
|
|
} |
14826
|
|
|
|
|
|
|
} |
14827
|
0
|
0
|
|
|
|
|
if (p->code == 0) |
14828
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_FINISHED_WITH_MARK; |
14829
|
0
|
|
|
|
|
|
return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA; |
14830
|
|
|
|
|
|
|
} |
14831
|
|
|
|
|
|
|
|
14832
|
0
|
|
|
|
|
|
SRes LzmaDec_DecodeToBuf(CLzmaDec *p, uint8_t *dest, size_t *destLen, const uint8_t *src, size_t *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) |
14833
|
|
|
|
|
|
|
{ |
14834
|
0
|
|
|
|
|
|
size_t outSize = *destLen; |
14835
|
0
|
|
|
|
|
|
size_t inSize = *srcLen; |
14836
|
0
|
|
|
|
|
|
*srcLen = *destLen = 0; |
14837
|
0
|
|
|
|
|
|
for (;;) |
14838
|
|
|
|
|
|
|
{ |
14839
|
0
|
|
|
|
|
|
size_t inSizeCur = inSize, outSizeCur, dicPos; |
14840
|
|
|
|
|
|
|
ELzmaFinishMode curFinishMode; |
14841
|
|
|
|
|
|
|
SRes res; |
14842
|
0
|
0
|
|
|
|
|
if (p->dicPos == p->dicBufSize) |
14843
|
0
|
|
|
|
|
|
p->dicPos = 0; |
14844
|
0
|
|
|
|
|
|
dicPos = p->dicPos; |
14845
|
0
|
0
|
|
|
|
|
if (outSize > p->dicBufSize - dicPos) |
14846
|
|
|
|
|
|
|
{ |
14847
|
|
|
|
|
|
|
outSizeCur = p->dicBufSize; |
14848
|
|
|
|
|
|
|
curFinishMode = LZMA_FINISH_ANY; |
14849
|
|
|
|
|
|
|
} |
14850
|
|
|
|
|
|
|
else |
14851
|
|
|
|
|
|
|
{ |
14852
|
0
|
|
|
|
|
|
outSizeCur = dicPos + outSize; |
14853
|
|
|
|
|
|
|
curFinishMode = finishMode; |
14854
|
|
|
|
|
|
|
} |
14855
|
|
|
|
|
|
|
|
14856
|
0
|
|
|
|
|
|
res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); |
14857
|
0
|
|
|
|
|
|
src += inSizeCur; |
14858
|
0
|
|
|
|
|
|
inSize -= inSizeCur; |
14859
|
0
|
|
|
|
|
|
*srcLen += inSizeCur; |
14860
|
0
|
|
|
|
|
|
outSizeCur = p->dicPos - dicPos; |
14861
|
0
|
|
|
|
|
|
memcpy(dest, p->dic + dicPos, outSizeCur); |
14862
|
0
|
|
|
|
|
|
dest += outSizeCur; |
14863
|
0
|
|
|
|
|
|
outSize -= outSizeCur; |
14864
|
0
|
|
|
|
|
|
*destLen += outSizeCur; |
14865
|
0
|
0
|
|
|
|
|
if (res != 0) |
14866
|
0
|
|
|
|
|
|
return res; |
14867
|
0
|
0
|
|
|
|
|
if (outSizeCur == 0 || outSize == 0) |
14868
|
|
|
|
|
|
|
return SZ_OK; |
14869
|
|
|
|
|
|
|
} |
14870
|
|
|
|
|
|
|
} |
14871
|
|
|
|
|
|
|
|
14872
|
0
|
|
|
|
|
|
void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc) |
14873
|
|
|
|
|
|
|
{ |
14874
|
14
|
|
|
|
|
|
alloc->Free(alloc, p->probs); |
14875
|
7
|
|
|
|
|
|
p->probs = 0; |
14876
|
0
|
|
|
|
|
|
} |
14877
|
|
|
|
|
|
|
|
14878
|
|
|
|
|
|
|
static void LzmaDec_FreeDict(CLzmaDec *p, ISzAlloc *alloc) |
14879
|
|
|
|
|
|
|
{ |
14880
|
0
|
|
|
|
|
|
alloc->Free(alloc, p->dic); |
14881
|
0
|
|
|
|
|
|
p->dic = 0; |
14882
|
|
|
|
|
|
|
} |
14883
|
|
|
|
|
|
|
|
14884
|
0
|
|
|
|
|
|
void LzmaDec_Free(CLzmaDec *p, ISzAlloc *alloc) |
14885
|
|
|
|
|
|
|
{ |
14886
|
|
|
|
|
|
|
LzmaDec_FreeProbs(p, alloc); |
14887
|
|
|
|
|
|
|
LzmaDec_FreeDict(p, alloc); |
14888
|
0
|
|
|
|
|
|
} |
14889
|
|
|
|
|
|
|
|
14890
|
7
|
|
|
|
|
|
SRes LzmaProps_Decode(CLzmaProps *p, const uint8_t *data, unsigned size) |
14891
|
|
|
|
|
|
|
{ |
14892
|
|
|
|
|
|
|
uint32_t dicSize; |
14893
|
|
|
|
|
|
|
uint8_t d; |
14894
|
|
|
|
|
|
|
|
14895
|
7
|
50
|
|
|
|
|
if (size < LZMA_PROPS_SIZE) |
14896
|
|
|
|
|
|
|
return SZ_ERROR_UNSUPPORTED; |
14897
|
|
|
|
|
|
|
else |
14898
|
7
|
|
|
|
|
|
dicSize = data[1] | ((uint32_t)data[2] << 8) | ((uint32_t)data[3] << 16) | ((uint32_t)data[4] << 24); |
14899
|
|
|
|
|
|
|
|
14900
|
7
|
50
|
|
|
|
|
if (dicSize < LZMA_DIC_MIN) |
14901
|
|
|
|
|
|
|
dicSize = LZMA_DIC_MIN; |
14902
|
7
|
|
|
|
|
|
p->dicSize = dicSize; |
14903
|
|
|
|
|
|
|
|
14904
|
7
|
|
|
|
|
|
d = data[0]; |
14905
|
7
|
50
|
|
|
|
|
if (d >= (9 * 5 * 5)) |
14906
|
|
|
|
|
|
|
return SZ_ERROR_UNSUPPORTED; |
14907
|
|
|
|
|
|
|
|
14908
|
7
|
|
|
|
|
|
p->lc = d % 9; |
14909
|
7
|
|
|
|
|
|
d /= 9; |
14910
|
7
|
|
|
|
|
|
p->pb = d / 5; |
14911
|
7
|
|
|
|
|
|
p->lp = d % 5; |
14912
|
|
|
|
|
|
|
|
14913
|
7
|
|
|
|
|
|
return SZ_OK; |
14914
|
|
|
|
|
|
|
} |
14915
|
|
|
|
|
|
|
|
14916
|
14
|
|
|
|
|
|
static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAlloc *alloc) |
14917
|
|
|
|
|
|
|
{ |
14918
|
7
|
|
|
|
|
|
uint32_t numProbs = LzmaProps_GetNumProbs(propNew); |
14919
|
7
|
50
|
|
|
|
|
if (p->probs == 0 || numProbs != p->numProbs) |
|
|
0
|
|
|
|
|
|
14920
|
|
|
|
|
|
|
{ |
14921
|
|
|
|
|
|
|
LzmaDec_FreeProbs(p, alloc); |
14922
|
7
|
|
|
|
|
|
p->probs = (CLzmaProb *)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb)); |
14923
|
7
|
|
|
|
|
|
p->numProbs = numProbs; |
14924
|
7
|
50
|
|
|
|
|
if (p->probs == 0) |
14925
|
|
|
|
|
|
|
return SZ_ERROR_MEM; |
14926
|
|
|
|
|
|
|
} |
14927
|
|
|
|
|
|
|
return SZ_OK; |
14928
|
|
|
|
|
|
|
} |
14929
|
|
|
|
|
|
|
|
14930
|
7
|
|
|
|
|
|
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const uint8_t *props, unsigned propsSize, ISzAlloc *alloc) |
14931
|
|
|
|
|
|
|
{ |
14932
|
|
|
|
|
|
|
CLzmaProps propNew; |
14933
|
7
|
50
|
|
|
|
|
RINOK(LzmaProps_Decode(&propNew, props, propsSize)); |
14934
|
7
|
50
|
|
|
|
|
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); |
14935
|
7
|
|
|
|
|
|
p->prop = propNew; |
14936
|
7
|
|
|
|
|
|
return SZ_OK; |
14937
|
|
|
|
|
|
|
} |
14938
|
|
|
|
|
|
|
|
14939
|
0
|
|
|
|
|
|
SRes LzmaDec_Allocate(CLzmaDec *p, const uint8_t *props, unsigned propsSize, ISzAlloc *alloc) |
14940
|
|
|
|
|
|
|
{ |
14941
|
|
|
|
|
|
|
CLzmaProps propNew; |
14942
|
|
|
|
|
|
|
size_t dicBufSize; |
14943
|
0
|
0
|
|
|
|
|
RINOK(LzmaProps_Decode(&propNew, props, propsSize)); |
14944
|
0
|
0
|
|
|
|
|
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); |
14945
|
0
|
|
|
|
|
|
dicBufSize = propNew.dicSize; |
14946
|
0
|
0
|
|
|
|
|
if (p->dic == 0 || dicBufSize != p->dicBufSize) |
|
|
0
|
|
|
|
|
|
14947
|
|
|
|
|
|
|
{ |
14948
|
|
|
|
|
|
|
LzmaDec_FreeDict(p, alloc); |
14949
|
0
|
|
|
|
|
|
p->dic = (uint8_t *)alloc->Alloc(alloc, dicBufSize); |
14950
|
0
|
0
|
|
|
|
|
if (p->dic == 0) |
14951
|
|
|
|
|
|
|
{ |
14952
|
|
|
|
|
|
|
LzmaDec_FreeProbs(p, alloc); |
14953
|
0
|
|
|
|
|
|
return SZ_ERROR_MEM; |
14954
|
|
|
|
|
|
|
} |
14955
|
|
|
|
|
|
|
} |
14956
|
0
|
|
|
|
|
|
p->dicBufSize = dicBufSize; |
14957
|
0
|
|
|
|
|
|
p->prop = propNew; |
14958
|
0
|
|
|
|
|
|
return SZ_OK; |
14959
|
|
|
|
|
|
|
} |
14960
|
|
|
|
|
|
|
|
14961
|
7
|
|
|
|
|
|
SRes LzmaDecode(uint8_t *dest, size_t *destLen, const uint8_t *src, size_t *srcLen, |
14962
|
|
|
|
|
|
|
const uint8_t *propData, unsigned propSize, ELzmaFinishMode finishMode, |
14963
|
|
|
|
|
|
|
ELzmaStatus *status, ISzAlloc *alloc) |
14964
|
|
|
|
|
|
|
{ |
14965
|
|
|
|
|
|
|
CLzmaDec p; |
14966
|
|
|
|
|
|
|
SRes res; |
14967
|
7
|
|
|
|
|
|
size_t inSize = *srcLen; |
14968
|
7
|
|
|
|
|
|
size_t outSize = *destLen; |
14969
|
7
|
|
|
|
|
|
*srcLen = *destLen = 0; |
14970
|
7
|
50
|
|
|
|
|
if (inSize < RC_INIT_SIZE) |
14971
|
|
|
|
|
|
|
return SZ_ERROR_INPUT_EOF; |
14972
|
|
|
|
|
|
|
|
14973
|
7
|
|
|
|
|
|
LzmaDec_Construct(&p); |
14974
|
7
|
|
|
|
|
|
res = LzmaDec_AllocateProbs(&p, propData, propSize, alloc); |
14975
|
7
|
50
|
|
|
|
|
if (res != 0) |
14976
|
|
|
|
|
|
|
return res; |
14977
|
7
|
|
|
|
|
|
p.dic = dest; |
14978
|
7
|
|
|
|
|
|
p.dicBufSize = outSize; |
14979
|
|
|
|
|
|
|
|
14980
|
|
|
|
|
|
|
LzmaDec_Init(&p); |
14981
|
|
|
|
|
|
|
|
14982
|
7
|
|
|
|
|
|
*srcLen = inSize; |
14983
|
7
|
|
|
|
|
|
res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); |
14984
|
|
|
|
|
|
|
|
14985
|
7
|
50
|
|
|
|
|
if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) |
|
|
50
|
|
|
|
|
|
14986
|
|
|
|
|
|
|
res = SZ_ERROR_INPUT_EOF; |
14987
|
|
|
|
|
|
|
|
14988
|
7
|
|
|
|
|
|
(*destLen) = p.dicPos; |
14989
|
|
|
|
|
|
|
LzmaDec_FreeProbs(&p, alloc); |
14990
|
|
|
|
|
|
|
return res; |
14991
|
|
|
|
|
|
|
} |
14992
|
|
|
|
|
|
|
|
14993
|
|
|
|
|
|
|
} // namespace lzma |
14994
|
|
|
|
|
|
|
// End of LZMA compression library by Igor Pavlov |
14995
|
|
|
|
|
|
|
|
14996
|
|
|
|
|
|
|
#ifndef UFAL_CPPUTILS_COMPRESSOR_LZMA_ALLOCATOR_H |
14997
|
|
|
|
|
|
|
#define UFAL_CPPUTILS_COMPRESSOR_LZMA_ALLOCATOR_H |
14998
|
14
|
|
|
|
|
|
static void *LzmaAlloc(void* /*p*/, size_t size) { return new char[size]; } |
14999
|
28
|
100
|
|
|
|
|
static void LzmaFree(void* /*p*/, void *address) { delete[] (char*) address; } |
15000
|
|
|
|
|
|
|
static lzma::ISzAlloc lzmaAllocator = { LzmaAlloc, LzmaFree }; |
15001
|
|
|
|
|
|
|
#endif // UFAL_CPPUTILS_COMPRESSOR_LZMA_ALLOCATOR_H |
15002
|
|
|
|
|
|
|
|
15003
|
7
|
|
|
|
|
|
bool compressor::load(istream& is, binary_decoder& data) { |
15004
|
|
|
|
|
|
|
uint32_t uncompressed_len, compressed_len, poor_crc; |
15005
|
|
|
|
|
|
|
unsigned char props_encoded[LZMA_PROPS_SIZE]; |
15006
|
|
|
|
|
|
|
|
15007
|
7
|
50
|
|
|
|
|
if (!is.read((char *) &uncompressed_len, sizeof(uncompressed_len))) return false; |
15008
|
7
|
50
|
|
|
|
|
if (!is.read((char *) &compressed_len, sizeof(compressed_len))) return false; |
15009
|
7
|
50
|
|
|
|
|
if (!is.read((char *) &poor_crc, sizeof(poor_crc))) return false; |
15010
|
7
|
50
|
|
|
|
|
if (poor_crc != uncompressed_len * 19991 + compressed_len * 199999991 + 1234567890) return false; |
15011
|
7
|
50
|
|
|
|
|
if (!is.read((char *) props_encoded, sizeof(props_encoded))) return false; |
15012
|
|
|
|
|
|
|
|
15013
|
7
|
|
|
|
|
|
vector compressed(compressed_len); |
15014
|
7
|
50
|
|
|
|
|
if (!is.read((char *) compressed.data(), compressed_len)) return false; |
|
|
50
|
|
|
|
|
|
15015
|
|
|
|
|
|
|
|
15016
|
|
|
|
|
|
|
lzma::ELzmaStatus status; |
15017
|
7
|
|
|
|
|
|
size_t uncompressed_size = uncompressed_len, compressed_size = compressed_len; |
15018
|
7
|
50
|
|
|
|
|
auto res = lzma::LzmaDecode(data.fill(uncompressed_len), &uncompressed_size, compressed.data(), &compressed_size, props_encoded, LZMA_PROPS_SIZE, lzma::LZMA_FINISH_ANY, &status, &lzmaAllocator); |
15019
|
7
|
50
|
|
|
|
|
if (res != SZ_OK || uncompressed_size != uncompressed_len || compressed_size != compressed_len) return false; |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
15020
|
|
|
|
|
|
|
|
15021
|
7
|
|
|
|
|
|
return true; |
15022
|
|
|
|
|
|
|
} |
15023
|
|
|
|
|
|
|
|
15024
|
|
|
|
|
|
|
} // namespace utils |
15025
|
|
|
|
|
|
|
|
15026
|
|
|
|
|
|
|
///////// |
15027
|
|
|
|
|
|
|
// File: utils/url_detector.cpp |
15028
|
|
|
|
|
|
|
///////// |
15029
|
|
|
|
|
|
|
|
15030
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
15031
|
|
|
|
|
|
|
// |
15032
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
15033
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
15034
|
|
|
|
|
|
|
// |
15035
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
15036
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
15037
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
15038
|
|
|
|
|
|
|
|
15039
|
|
|
|
|
|
|
namespace utils { |
15040
|
|
|
|
|
|
|
|
15041
|
|
|
|
|
|
|
static const char _url_detector_actions[] = { |
15042
|
|
|
|
|
|
|
0, 1, 0, 1, 3, 2, 0, 1, |
15043
|
|
|
|
|
|
|
2, 2, 0, 2, 3, 0 |
15044
|
|
|
|
|
|
|
}; |
15045
|
|
|
|
|
|
|
|
15046
|
|
|
|
|
|
|
static const char _url_detector_cond_offsets[] = { |
15047
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15048
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 1, 1, 1, |
15049
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15050
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15051
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15052
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15053
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15054
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15055
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15056
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15057
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15058
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15059
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15060
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15061
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15062
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15063
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15064
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15065
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15066
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15067
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15068
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15069
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15070
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15071
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15072
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15073
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15074
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15075
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15076
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15077
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15078
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15079
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15080
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15081
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15082
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15083
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15084
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15085
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15086
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15087
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15088
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15089
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15090
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15091
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15092
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
15093
|
|
|
|
|
|
|
1, 2, 2, 2, 2, 2, 2, 2, |
15094
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 2, |
15095
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 2, |
15096
|
|
|
|
|
|
|
2, 2 |
15097
|
|
|
|
|
|
|
}; |
15098
|
|
|
|
|
|
|
|
15099
|
|
|
|
|
|
|
static const char _url_detector_cond_lengths[] = { |
15100
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15101
|
|
|
|
|
|
|
0, 0, 0, 1, 0, 0, 0, 0, |
15102
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15103
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15104
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15105
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15106
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15107
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15108
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15109
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15110
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15111
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15112
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15113
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15114
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15115
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15116
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15117
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15118
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15119
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15120
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15121
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15122
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15123
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15124
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15125
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15126
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15127
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15128
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15129
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15130
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15131
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15132
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15133
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15134
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15135
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15136
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15137
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15138
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15139
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15140
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15141
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15142
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15143
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15144
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15145
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15146
|
|
|
|
|
|
|
1, 0, 0, 0, 0, 0, 0, 0, |
15147
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15148
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15149
|
|
|
|
|
|
|
0, 0 |
15150
|
|
|
|
|
|
|
}; |
15151
|
|
|
|
|
|
|
|
15152
|
|
|
|
|
|
|
static const short _url_detector_cond_keys[] = { |
15153
|
|
|
|
|
|
|
41u, 41u, 41u, 41u, 0 |
15154
|
|
|
|
|
|
|
}; |
15155
|
|
|
|
|
|
|
|
15156
|
|
|
|
|
|
|
static const char _url_detector_cond_spaces[] = { |
15157
|
|
|
|
|
|
|
0, 0, 0 |
15158
|
|
|
|
|
|
|
}; |
15159
|
|
|
|
|
|
|
|
15160
|
|
|
|
|
|
|
static const short _url_detector_key_offsets[] = { |
15161
|
|
|
|
|
|
|
0, 0, 15, 29, 41, 54, 63, 71, |
15162
|
|
|
|
|
|
|
78, 86, 92, 100, 153, 161, 167, 169, |
15163
|
|
|
|
|
|
|
177, 184, 192, 196, 200, 204, 209, 214, |
15164
|
|
|
|
|
|
|
223, 227, 231, 240, 242, 246, 248, 251, |
15165
|
|
|
|
|
|
|
283, 285, 287, 289, 291, 297, 312, 327, |
15166
|
|
|
|
|
|
|
346, 358, 374, 386, 402, 418, 439, 449, |
15167
|
|
|
|
|
|
|
461, 477, 491, 506, 516, 529, 538, 550, |
15168
|
|
|
|
|
|
|
552, 556, 577, 586, 596, 602, 608, 609, |
15169
|
|
|
|
|
|
|
653, 657, 661, 669, 680, 690, 697, 703, |
15170
|
|
|
|
|
|
|
709, 713, 717, 719, 723, 727, 731, 737, |
15171
|
|
|
|
|
|
|
745, 754, 760, 762, 766, 768, 774, 778, |
15172
|
|
|
|
|
|
|
782, 786, 790, 795, 802, 808, 810, 814, |
15173
|
|
|
|
|
|
|
820, 824, 830, 841, 846, 860, 880, 885, |
15174
|
|
|
|
|
|
|
889, 891, 907, 912, 914, 916, 918, 920, |
15175
|
|
|
|
|
|
|
922, 926, 930, 935, 941, 945, 955, 965, |
15176
|
|
|
|
|
|
|
966, 979, 987, 995, 999, 1003, 1009, 1011, |
15177
|
|
|
|
|
|
|
1013, 1017, 1021, 1026, 1028, 1031, 1033, 1055, |
15178
|
|
|
|
|
|
|
1057, 1059, 1063, 1068, 1072, 1078, 1080, 1084, |
15179
|
|
|
|
|
|
|
1092, 1096, 1100, 1105, 1107, 1115, 1123, 1135, |
15180
|
|
|
|
|
|
|
1141, 1147, 1151, 1155, 1159, 1178, 1180, 1191, |
15181
|
|
|
|
|
|
|
1197, 1199, 1201, 1205, 1209, 1213, 1217, 1219, |
15182
|
|
|
|
|
|
|
1223, 1227, 1229, 1237, 1253, 1295, 1304, 1308, |
15183
|
|
|
|
|
|
|
1310, 1312, 1314, 1316, 1317, 1321, 1325, 1329, |
15184
|
|
|
|
|
|
|
1335, 1339, 1343, 1345, 1349, 1358, 1364, 1368, |
15185
|
|
|
|
|
|
|
1374, 1378, 1382, 1395, 1399, 1401, 1407, 1413, |
15186
|
|
|
|
|
|
|
1417, 1419, 1421, 1425, 1427, 1452, 1457, 1461, |
15187
|
|
|
|
|
|
|
1465, 1468, 1477, 1481, 1492, 1496, 1512, 1526, |
15188
|
|
|
|
|
|
|
1531, 1535, 1538, 1542, 1548, 1551, 1558, 1560, |
15189
|
|
|
|
|
|
|
1562, 1565, 1568, 1570, 1581, 1585, 1589, 1599, |
15190
|
|
|
|
|
|
|
1601, 1605, 1607, 1611, 1613, 1617, 1623, 1643, |
15191
|
|
|
|
|
|
|
1649, 1655, 1657, 1659, 1663, 1677, 1681, 1693, |
15192
|
|
|
|
|
|
|
1700, 1704, 1711, 1717, 1723, 1729, 1735, 1739, |
15193
|
|
|
|
|
|
|
1742, 1747, 1753, 1757, 1771, 1797, 1807, 1808, |
15194
|
|
|
|
|
|
|
1811, 1813, 1817, 1819, 1822, 1824, 1827, 1829, |
15195
|
|
|
|
|
|
|
1830, 1833, 1835, 1837, 1846, 1854, 1863, 1871, |
15196
|
|
|
|
|
|
|
1880, 1888, 1896, 1907, 1917, 1925, 1933, 1944, |
15197
|
|
|
|
|
|
|
1954, 1962, 1970, 1981, 1991, 2000, 2008, 2015, |
15198
|
|
|
|
|
|
|
2023, 2029, 2037, 2046, 2054, 2063, 2071, 2080, |
15199
|
|
|
|
|
|
|
2088, 2096, 2107, 2117, 2125, 2133, 2144, 2154, |
15200
|
|
|
|
|
|
|
2162, 2170, 2181, 2191, 2207, 2222, 2238, 2252, |
15201
|
|
|
|
|
|
|
2268, 2280, 2297, 2313, 2330, 2346, 2363, 2379, |
15202
|
|
|
|
|
|
|
2395, 2414, 2432, 2448, 2464, 2483, 2501, 2517, |
15203
|
|
|
|
|
|
|
2533, 2552, 2570, 2586, 2602, 2618, 2629, 2630, |
15204
|
|
|
|
|
|
|
2645, 2659, 2675, 2690, 2706, 2720, 2736, 2753, |
15205
|
|
|
|
|
|
|
2769, 2786, 2802, 2819, 2835, 2851, 2870, 2888, |
15206
|
|
|
|
|
|
|
2904, 2920, 2939, 2957, 2973, 2989, 3008, 3026, |
15207
|
|
|
|
|
|
|
3036, 3089, 3092, 3102, 3112, 3122, 3135, 3147, |
15208
|
|
|
|
|
|
|
3157, 3167, 3177, 3187, 3200, 3212, 3229, 3243, |
15209
|
|
|
|
|
|
|
3260, 3277, 3294, 3314, 3333, 3350, 3367, 3384, |
15210
|
|
|
|
|
|
|
3401, 3421 |
15211
|
|
|
|
|
|
|
}; |
15212
|
|
|
|
|
|
|
|
15213
|
|
|
|
|
|
|
static const short _url_detector_trans_keys[] = { |
15214
|
|
|
|
|
|
|
33u, 48u, 49u, 50u, 95u, 36u, 37u, 39u, |
15215
|
|
|
|
|
|
|
46u, 51u, 57u, 65u, 90u, 97u, 122u, 33u, |
15216
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 46u, 48u, |
15217
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 95u, 36u, |
15218
|
|
|
|
|
|
|
37u, 39u, 46u, 48u, 57u, 65u, 90u, 97u, |
15219
|
|
|
|
|
|
|
122u, 33u, 64u, 95u, 36u, 37u, 39u, 46u, |
15220
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 48u, 49u, |
15221
|
|
|
|
|
|
|
50u, 51u, 57u, 65u, 90u, 97u, 122u, 45u, |
15222
|
|
|
|
|
|
|
46u, 48u, 57u, 65u, 90u, 97u, 122u, 45u, |
15223
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 45u, 46u, |
15224
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 48u, 57u, |
15225
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 45u, 46u, 48u, 57u, |
15226
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 33u, 39u, 40u, 44u, |
15227
|
|
|
|
|
|
|
46u, 61u, 63u, 95u, 194u, 195u, 203u, 205u, |
15228
|
|
|
|
|
|
|
206u, 207u, 210u, 212u, 213u, 214u, 215u, 216u, |
15229
|
|
|
|
|
|
|
217u, 219u, 220u, 221u, 222u, 223u, 224u, 225u, |
15230
|
|
|
|
|
|
|
226u, 227u, 228u, 233u, 234u, 237u, 239u, 240u, |
15231
|
|
|
|
|
|
|
243u, 297u, 553u, 36u, 38u, 42u, 57u, 58u, |
15232
|
|
|
|
|
|
|
59u, 64u, 90u, 97u, 122u, 196u, 218u, 229u, |
15233
|
|
|
|
|
|
|
236u, 170u, 181u, 178u, 179u, 185u, 186u, 188u, |
15234
|
|
|
|
|
|
|
190u, 128u, 150u, 152u, 182u, 184u, 191u, 128u, |
15235
|
|
|
|
|
|
|
191u, 172u, 174u, 128u, 129u, 134u, 145u, 160u, |
15236
|
|
|
|
|
|
|
164u, 191u, 128u, 180u, 182u, 183u, 186u, 189u, |
15237
|
|
|
|
|
|
|
134u, 140u, 136u, 138u, 142u, 161u, 163u, 191u, |
15238
|
|
|
|
|
|
|
128u, 181u, 183u, 191u, 128u, 129u, 131u, 191u, |
15239
|
|
|
|
|
|
|
128u, 175u, 177u, 191u, 153u, 128u, 150u, 161u, |
15240
|
|
|
|
|
|
|
191u, 191u, 128u, 135u, 145u, 189u, 135u, 129u, |
15241
|
|
|
|
|
|
|
130u, 132u, 133u, 144u, 170u, 176u, 178u, 144u, |
15242
|
|
|
|
|
|
|
154u, 160u, 191u, 128u, 169u, 174u, 191u, 191u, |
15243
|
|
|
|
|
|
|
128u, 147u, 149u, 156u, 159u, 168u, 170u, 188u, |
15244
|
|
|
|
|
|
|
144u, 191u, 128u, 138u, 141u, 191u, 128u, 177u, |
15245
|
|
|
|
|
|
|
186u, 128u, 181u, 160u, 161u, 162u, 163u, 164u, |
15246
|
|
|
|
|
|
|
165u, 166u, 167u, 168u, 169u, 170u, 171u, 172u, |
15247
|
|
|
|
|
|
|
173u, 174u, 175u, 176u, 177u, 178u, 179u, 180u, |
15248
|
|
|
|
|
|
|
181u, 182u, 183u, 184u, 185u, 186u, 187u, 188u, |
15249
|
|
|
|
|
|
|
189u, 190u, 191u, 128u, 173u, 128u, 155u, 160u, |
15250
|
|
|
|
|
|
|
180u, 163u, 191u, 128u, 163u, 166u, 175u, 177u, |
15251
|
|
|
|
|
|
|
191u, 178u, 128u, 131u, 133u, 140u, 143u, 144u, |
15252
|
|
|
|
|
|
|
147u, 168u, 170u, 176u, 182u, 185u, 188u, 191u, |
15253
|
|
|
|
|
|
|
151u, 128u, 132u, 135u, 136u, 139u, 142u, 156u, |
15254
|
|
|
|
|
|
|
157u, 159u, 163u, 166u, 177u, 180u, 185u, 188u, |
15255
|
|
|
|
|
|
|
129u, 131u, 133u, 138u, 143u, 144u, 147u, 168u, |
15256
|
|
|
|
|
|
|
170u, 176u, 178u, 179u, 181u, 182u, 184u, 185u, |
15257
|
|
|
|
|
|
|
190u, 191u, 145u, 158u, 128u, 130u, 135u, 136u, |
15258
|
|
|
|
|
|
|
139u, 141u, 153u, 156u, 166u, 181u, 129u, 131u, |
15259
|
|
|
|
|
|
|
133u, 141u, 143u, 145u, 147u, 168u, 170u, 176u, |
15260
|
|
|
|
|
|
|
178u, 179u, 181u, 185u, 188u, 191u, 144u, 185u, |
15261
|
|
|
|
|
|
|
128u, 133u, 135u, 137u, 139u, 141u, 160u, 163u, |
15262
|
|
|
|
|
|
|
166u, 175u, 129u, 131u, 133u, 140u, 143u, 144u, |
15263
|
|
|
|
|
|
|
147u, 168u, 170u, 176u, 178u, 179u, 181u, 185u, |
15264
|
|
|
|
|
|
|
188u, 191u, 128u, 132u, 135u, 136u, 139u, 141u, |
15265
|
|
|
|
|
|
|
150u, 151u, 156u, 157u, 159u, 163u, 166u, 175u, |
15266
|
|
|
|
|
|
|
177u, 183u, 156u, 130u, 131u, 133u, 138u, 142u, |
15267
|
|
|
|
|
|
|
144u, 146u, 149u, 153u, 154u, 158u, 159u, 163u, |
15268
|
|
|
|
|
|
|
164u, 168u, 170u, 174u, 185u, 190u, 191u, 144u, |
15269
|
|
|
|
|
|
|
151u, 128u, 130u, 134u, 136u, 138u, 141u, 166u, |
15270
|
|
|
|
|
|
|
178u, 128u, 131u, 133u, 140u, 142u, 144u, 146u, |
15271
|
|
|
|
|
|
|
168u, 170u, 185u, 189u, 191u, 128u, 132u, 134u, |
15272
|
|
|
|
|
|
|
136u, 138u, 141u, 149u, 150u, 152u, 154u, 160u, |
15273
|
|
|
|
|
|
|
163u, 166u, 175u, 184u, 190u, 129u, 131u, 133u, |
15274
|
|
|
|
|
|
|
140u, 142u, 144u, 146u, 168u, 170u, 179u, 181u, |
15275
|
|
|
|
|
|
|
185u, 188u, 191u, 158u, 128u, 132u, 134u, 136u, |
15276
|
|
|
|
|
|
|
138u, 141u, 149u, 150u, 160u, 163u, 166u, 175u, |
15277
|
|
|
|
|
|
|
177u, 178u, 129u, 131u, 133u, 140u, 142u, 144u, |
15278
|
|
|
|
|
|
|
146u, 186u, 189u, 191u, 151u, 128u, 132u, 134u, |
15279
|
|
|
|
|
|
|
136u, 138u, 142u, 159u, 163u, 166u, 181u, 186u, |
15280
|
|
|
|
|
|
|
191u, 189u, 130u, 131u, 133u, 150u, 154u, 177u, |
15281
|
|
|
|
|
|
|
179u, 187u, 138u, 150u, 128u, 134u, 143u, 148u, |
15282
|
|
|
|
|
|
|
152u, 159u, 166u, 175u, 178u, 179u, 129u, 186u, |
15283
|
|
|
|
|
|
|
128u, 142u, 144u, 153u, 132u, 138u, 141u, 165u, |
15284
|
|
|
|
|
|
|
167u, 129u, 130u, 135u, 136u, 148u, 151u, 153u, |
15285
|
|
|
|
|
|
|
159u, 161u, 163u, 170u, 171u, 173u, 185u, 187u, |
15286
|
|
|
|
|
|
|
189u, 134u, 128u, 132u, 136u, 141u, 144u, 153u, |
15287
|
|
|
|
|
|
|
156u, 159u, 128u, 181u, 183u, 185u, 152u, 153u, |
15288
|
|
|
|
|
|
|
160u, 179u, 190u, 191u, 128u, 135u, 137u, 172u, |
15289
|
|
|
|
|
|
|
177u, 191u, 128u, 132u, 134u, 151u, 153u, 188u, |
15290
|
|
|
|
|
|
|
134u, 129u, 130u, 131u, 137u, 138u, 139u, 140u, |
15291
|
|
|
|
|
|
|
141u, 142u, 143u, 144u, 153u, 154u, 155u, 156u, |
15292
|
|
|
|
|
|
|
157u, 159u, 160u, 161u, 162u, 163u, 164u, 165u, |
15293
|
|
|
|
|
|
|
166u, 167u, 168u, 169u, 170u, 173u, 175u, 176u, |
15294
|
|
|
|
|
|
|
177u, 179u, 183u, 188u, 189u, 190u, 191u, 128u, |
15295
|
|
|
|
|
|
|
158u, 172u, 174u, 180u, 187u, 128u, 137u, 144u, |
15296
|
|
|
|
|
|
|
191u, 128u, 157u, 160u, 191u, 135u, 141u, 128u, |
15297
|
|
|
|
|
|
|
133u, 144u, 186u, 188u, 191u, 152u, 128u, 136u, |
15298
|
|
|
|
|
|
|
138u, 141u, 144u, 150u, 154u, 157u, 160u, 191u, |
15299
|
|
|
|
|
|
|
128u, 136u, 138u, 141u, 144u, 176u, 178u, 181u, |
15300
|
|
|
|
|
|
|
184u, 190u, 128u, 130u, 133u, 136u, 150u, 152u, |
15301
|
|
|
|
|
|
|
191u, 128u, 144u, 146u, 149u, 152u, 191u, 128u, |
15302
|
|
|
|
|
|
|
154u, 157u, 159u, 169u, 188u, 128u, 143u, 160u, |
15303
|
|
|
|
|
|
|
191u, 128u, 181u, 184u, 189u, 129u, 191u, 128u, |
15304
|
|
|
|
|
|
|
172u, 175u, 191u, 129u, 154u, 160u, 191u, 128u, |
15305
|
|
|
|
|
|
|
170u, 174u, 184u, 128u, 140u, 142u, 148u, 160u, |
15306
|
|
|
|
|
|
|
180u, 128u, 147u, 160u, 172u, 174u, 176u, 178u, |
15307
|
|
|
|
|
|
|
179u, 151u, 128u, 147u, 156u, 157u, 160u, 169u, |
15308
|
|
|
|
|
|
|
176u, 185u, 139u, 141u, 144u, 153u, 160u, 191u, |
15309
|
|
|
|
|
|
|
128u, 183u, 128u, 170u, 176u, 191u, 128u, 181u, |
15310
|
|
|
|
|
|
|
128u, 158u, 160u, 171u, 176u, 187u, 134u, 173u, |
15311
|
|
|
|
|
|
|
176u, 180u, 128u, 171u, 176u, 191u, 128u, 137u, |
15312
|
|
|
|
|
|
|
144u, 154u, 128u, 155u, 160u, 191u, 191u, 128u, |
15313
|
|
|
|
|
|
|
158u, 160u, 188u, 167u, 128u, 137u, 144u, 153u, |
15314
|
|
|
|
|
|
|
176u, 190u, 128u, 139u, 144u, 153u, 171u, 179u, |
15315
|
|
|
|
|
|
|
128u, 179u, 128u, 137u, 141u, 189u, 144u, 146u, |
15316
|
|
|
|
|
|
|
148u, 182u, 184u, 185u, 128u, 181u, 188u, 191u, |
15317
|
|
|
|
|
|
|
128u, 149u, 152u, 157u, 160u, 191u, 153u, 155u, |
15318
|
|
|
|
|
|
|
157u, 128u, 133u, 136u, 141u, 144u, 151u, 159u, |
15319
|
|
|
|
|
|
|
189u, 190u, 128u, 180u, 182u, 188u, 130u, 132u, |
15320
|
|
|
|
|
|
|
134u, 140u, 144u, 147u, 150u, 155u, 160u, 172u, |
15321
|
|
|
|
|
|
|
178u, 180u, 182u, 188u, 129u, 130u, 131u, 132u, |
15322
|
|
|
|
|
|
|
133u, 134u, 145u, 146u, 147u, 157u, 158u, 176u, |
15323
|
|
|
|
|
|
|
177u, 178u, 179u, 180u, 181u, 182u, 183u, 184u, |
15324
|
|
|
|
|
|
|
191u, 176u, 177u, 180u, 185u, 128u, 137u, 144u, |
15325
|
|
|
|
|
|
|
156u, 144u, 176u, 130u, 135u, 149u, 164u, 166u, |
15326
|
|
|
|
|
|
|
168u, 138u, 147u, 153u, 157u, 170u, 173u, 175u, |
15327
|
|
|
|
|
|
|
185u, 188u, 191u, 142u, 133u, 137u, 144u, 191u, |
15328
|
|
|
|
|
|
|
128u, 137u, 160u, 191u, 170u, 191u, 182u, 191u, |
15329
|
|
|
|
|
|
|
128u, 147u, 128u, 174u, 176u, 191u, 128u, 158u, |
15330
|
|
|
|
|
|
|
160u, 191u, 189u, 128u, 164u, 171u, 179u, 167u, |
15331
|
|
|
|
|
|
|
173u, 128u, 165u, 176u, 191u, 175u, 191u, 128u, |
15332
|
|
|
|
|
|
|
167u, 128u, 150u, 160u, 166u, 168u, 174u, 176u, |
15333
|
|
|
|
|
|
|
182u, 184u, 190u, 128u, 134u, 136u, 142u, 144u, |
15334
|
|
|
|
|
|
|
150u, 152u, 158u, 160u, 191u, 175u, 128u, 129u, |
15335
|
|
|
|
|
|
|
130u, 131u, 132u, 133u, 134u, 135u, 136u, 137u, |
15336
|
|
|
|
|
|
|
138u, 144u, 191u, 133u, 135u, 161u, 175u, 177u, |
15337
|
|
|
|
|
|
|
181u, 184u, 188u, 128u, 150u, 153u, 154u, 157u, |
15338
|
|
|
|
|
|
|
159u, 161u, 191u, 128u, 186u, 188u, 191u, 133u, |
15339
|
|
|
|
|
|
|
173u, 177u, 191u, 128u, 142u, 146u, 149u, 160u, |
15340
|
|
|
|
|
|
|
186u, 176u, 191u, 160u, 169u, 136u, 143u, 145u, |
15341
|
|
|
|
|
|
|
159u, 128u, 137u, 177u, 191u, 182u, 128u, 181u, |
15342
|
|
|
|
|
|
|
184u, 191u, 128u, 191u, 191u, 128u, 190u, 128u, |
15343
|
|
|
|
|
|
|
149u, 146u, 147u, 152u, 153u, 155u, 156u, 158u, |
15344
|
|
|
|
|
|
|
159u, 160u, 161u, 163u, 164u, 165u, 167u, 168u, |
15345
|
|
|
|
|
|
|
169u, 171u, 172u, 173u, 175u, 128u, 191u, 128u, |
15346
|
|
|
|
|
|
|
140u, 144u, 189u, 128u, 140u, 144u, 171u, 191u, |
15347
|
|
|
|
|
|
|
128u, 178u, 180u, 189u, 151u, 159u, 162u, 191u, |
15348
|
|
|
|
|
|
|
128u, 136u, 139u, 173u, 176u, 183u, 183u, 191u, |
15349
|
|
|
|
|
|
|
128u, 167u, 176u, 181u, 187u, 189u, 128u, 132u, |
15350
|
|
|
|
|
|
|
144u, 153u, 160u, 183u, 128u, 173u, 176u, 191u, |
15351
|
|
|
|
|
|
|
128u, 147u, 160u, 188u, 128u, 143u, 153u, 160u, |
15352
|
|
|
|
|
|
|
190u, 128u, 182u, 128u, 141u, 144u, 153u, 160u, |
15353
|
|
|
|
|
|
|
182u, 186u, 191u, 128u, 130u, 155u, 157u, 160u, |
15354
|
|
|
|
|
|
|
175u, 178u, 182u, 129u, 134u, 137u, 142u, 145u, |
15355
|
|
|
|
|
|
|
150u, 160u, 166u, 168u, 174u, 176u, 191u, 128u, |
15356
|
|
|
|
|
|
|
154u, 156u, 165u, 176u, 191u, 128u, 170u, 172u, |
15357
|
|
|
|
|
|
|
173u, 176u, 185u, 158u, 159u, 128u, 157u, 128u, |
15358
|
|
|
|
|
|
|
163u, 176u, 191u, 128u, 134u, 139u, 187u, 169u, |
15359
|
|
|
|
|
|
|
171u, 172u, 173u, 174u, 175u, 180u, 181u, 182u, |
15360
|
|
|
|
|
|
|
183u, 184u, 185u, 187u, 188u, 189u, 190u, 191u, |
15361
|
|
|
|
|
|
|
164u, 186u, 128u, 153u, 190u, 128u, 134u, 147u, |
15362
|
|
|
|
|
|
|
151u, 157u, 168u, 170u, 182u, 184u, 188u, 128u, |
15363
|
|
|
|
|
|
|
129u, 131u, 132u, 134u, 191u, 147u, 191u, 128u, |
15364
|
|
|
|
|
|
|
189u, 128u, 143u, 146u, 191u, 128u, 135u, 176u, |
15365
|
|
|
|
|
|
|
187u, 128u, 143u, 160u, 175u, 176u, 180u, 182u, |
15366
|
|
|
|
|
|
|
191u, 128u, 188u, 144u, 153u, 161u, 186u, 129u, |
15367
|
|
|
|
|
|
|
154u, 166u, 191u, 128u, 190u, 130u, 135u, 138u, |
15368
|
|
|
|
|
|
|
143u, 146u, 151u, 154u, 156u, 144u, 145u, 146u, |
15369
|
|
|
|
|
|
|
147u, 148u, 150u, 155u, 157u, 158u, 159u, 170u, |
15370
|
|
|
|
|
|
|
171u, 172u, 175u, 160u, 169u, 128u, 129u, 130u, |
15371
|
|
|
|
|
|
|
131u, 132u, 133u, 134u, 135u, 138u, 139u, 140u, |
15372
|
|
|
|
|
|
|
141u, 142u, 143u, 146u, 148u, 149u, 156u, 157u, |
15373
|
|
|
|
|
|
|
160u, 161u, 162u, 163u, 164u, 166u, 167u, 168u, |
15374
|
|
|
|
|
|
|
169u, 170u, 171u, 172u, 173u, 174u, 176u, 177u, |
15375
|
|
|
|
|
|
|
178u, 179u, 185u, 144u, 145u, 152u, 155u, 191u, |
15376
|
|
|
|
|
|
|
128u, 139u, 141u, 166u, 168u, 186u, 188u, 189u, |
15377
|
|
|
|
|
|
|
128u, 141u, 144u, 157u, 128u, 186u, 135u, 179u, |
15378
|
|
|
|
|
|
|
128u, 184u, 138u, 139u, 189u, 128u, 156u, 160u, |
15379
|
|
|
|
|
|
|
191u, 128u, 144u, 160u, 187u, 128u, 138u, 144u, |
15380
|
|
|
|
|
|
|
186u, 128u, 131u, 136u, 143u, 145u, 149u, 128u, |
15381
|
|
|
|
|
|
|
157u, 160u, 169u, 128u, 167u, 176u, 191u, 128u, |
15382
|
|
|
|
|
|
|
163u, 128u, 149u, 160u, 167u, 136u, 188u, 191u, |
15383
|
|
|
|
|
|
|
128u, 133u, 138u, 181u, 183u, 184u, 128u, 149u, |
15384
|
|
|
|
|
|
|
152u, 182u, 185u, 191u, 128u, 158u, 167u, 175u, |
15385
|
|
|
|
|
|
|
160u, 178u, 180u, 181u, 187u, 191u, 128u, 155u, |
15386
|
|
|
|
|
|
|
160u, 185u, 128u, 183u, 188u, 191u, 191u, 128u, |
15387
|
|
|
|
|
|
|
131u, 133u, 134u, 140u, 147u, 149u, 151u, 153u, |
15388
|
|
|
|
|
|
|
179u, 184u, 186u, 128u, 135u, 160u, 190u, 128u, |
15389
|
|
|
|
|
|
|
159u, 128u, 135u, 137u, 166u, 171u, 175u, 128u, |
15390
|
|
|
|
|
|
|
149u, 152u, 178u, 184u, 191u, 128u, 145u, 169u, |
15391
|
|
|
|
|
|
|
175u, 128u, 136u, 128u, 178u, 128u, 178u, 186u, |
15392
|
|
|
|
|
|
|
191u, 160u, 190u, 129u, 130u, 131u, 132u, 133u, |
15393
|
|
|
|
|
|
|
135u, 136u, 138u, 139u, 140u, 141u, 146u, 147u, |
15394
|
|
|
|
|
|
|
150u, 151u, 152u, 153u, 154u, 155u, 156u, 162u, |
15395
|
|
|
|
|
|
|
163u, 171u, 128u, 134u, 191u, 128u, 134u, 146u, |
15396
|
|
|
|
|
|
|
175u, 144u, 168u, 176u, 185u, 128u, 180u, 182u, |
15397
|
|
|
|
|
|
|
191u, 182u, 144u, 179u, 156u, 128u, 132u, 138u, |
15398
|
|
|
|
|
|
|
140u, 144u, 154u, 161u, 180u, 128u, 145u, 147u, |
15399
|
|
|
|
|
|
|
183u, 136u, 128u, 134u, 138u, 141u, 143u, 157u, |
15400
|
|
|
|
|
|
|
159u, 168u, 176u, 191u, 128u, 170u, 176u, 185u, |
15401
|
|
|
|
|
|
|
128u, 131u, 133u, 140u, 143u, 144u, 147u, 168u, |
15402
|
|
|
|
|
|
|
170u, 176u, 178u, 179u, 181u, 185u, 188u, 191u, |
15403
|
|
|
|
|
|
|
144u, 151u, 128u, 132u, 135u, 136u, 139u, 141u, |
15404
|
|
|
|
|
|
|
157u, 163u, 166u, 172u, 176u, 180u, 135u, 128u, |
15405
|
|
|
|
|
|
|
133u, 144u, 153u, 128u, 181u, 184u, 191u, 128u, |
15406
|
|
|
|
|
|
|
152u, 157u, 128u, 132u, 144u, 153u, 128u, 153u, |
15407
|
|
|
|
|
|
|
157u, 171u, 176u, 187u, 191u, 128u, 178u, 142u, |
15408
|
|
|
|
|
|
|
145u, 149u, 128u, 141u, 144u, 148u, 128u, 174u, |
15409
|
|
|
|
|
|
|
128u, 131u, 144u, 128u, 143u, 153u, 144u, 152u, |
15410
|
|
|
|
|
|
|
128u, 134u, 168u, 169u, 171u, 172u, 173u, 174u, |
15411
|
|
|
|
|
|
|
188u, 189u, 190u, 160u, 167u, 128u, 158u, 160u, |
15412
|
|
|
|
|
|
|
169u, 144u, 173u, 176u, 180u, 128u, 131u, 144u, |
15413
|
|
|
|
|
|
|
153u, 155u, 161u, 163u, 183u, 189u, 191u, 128u, |
15414
|
|
|
|
|
|
|
143u, 128u, 132u, 144u, 190u, 143u, 159u, 128u, |
15415
|
|
|
|
|
|
|
176u, 177u, 178u, 128u, 129u, 128u, 170u, 176u, |
15416
|
|
|
|
|
|
|
188u, 128u, 136u, 144u, 153u, 157u, 158u, 133u, |
15417
|
|
|
|
|
|
|
134u, 137u, 141u, 145u, 146u, 147u, 148u, 149u, |
15418
|
|
|
|
|
|
|
154u, 155u, 156u, 157u, 158u, 159u, 168u, 169u, |
15419
|
|
|
|
|
|
|
170u, 144u, 153u, 165u, 169u, 173u, 178u, 187u, |
15420
|
|
|
|
|
|
|
191u, 128u, 130u, 133u, 139u, 170u, 173u, 130u, |
15421
|
|
|
|
|
|
|
132u, 160u, 177u, 128u, 148u, 150u, 191u, 162u, |
15422
|
|
|
|
|
|
|
187u, 128u, 156u, 158u, 159u, 165u, 166u, 169u, |
15423
|
|
|
|
|
|
|
172u, 174u, 185u, 189u, 191u, 128u, 131u, 133u, |
15424
|
|
|
|
|
|
|
191u, 128u, 133u, 135u, 138u, 141u, 148u, 150u, |
15425
|
|
|
|
|
|
|
156u, 158u, 185u, 187u, 190u, 134u, 128u, 132u, |
15426
|
|
|
|
|
|
|
138u, 144u, 146u, 191u, 128u, 165u, 168u, 191u, |
15427
|
|
|
|
|
|
|
128u, 130u, 154u, 156u, 186u, 188u, 191u, 128u, |
15428
|
|
|
|
|
|
|
148u, 150u, 180u, 182u, 191u, 128u, 142u, 144u, |
15429
|
|
|
|
|
|
|
174u, 176u, 191u, 128u, 136u, 138u, 168u, 170u, |
15430
|
|
|
|
|
|
|
191u, 128u, 130u, 132u, 139u, 142u, 191u, 128u, |
15431
|
|
|
|
|
|
|
182u, 187u, 191u, 181u, 128u, 172u, 132u, 155u, |
15432
|
|
|
|
|
|
|
159u, 161u, 175u, 163u, 184u, 185u, 186u, 160u, |
15433
|
|
|
|
|
|
|
162u, 128u, 132u, 135u, 150u, 164u, 167u, 185u, |
15434
|
|
|
|
|
|
|
187u, 128u, 131u, 133u, 159u, 161u, 162u, 169u, |
15435
|
|
|
|
|
|
|
178u, 180u, 183u, 130u, 135u, 137u, 139u, 148u, |
15436
|
|
|
|
|
|
|
151u, 153u, 155u, 157u, 159u, 164u, 190u, 141u, |
15437
|
|
|
|
|
|
|
143u, 145u, 146u, 161u, 162u, 167u, 170u, 172u, |
15438
|
|
|
|
|
|
|
178u, 180u, 183u, 185u, 188u, 128u, 137u, 139u, |
15439
|
|
|
|
|
|
|
155u, 161u, 163u, 165u, 169u, 171u, 187u, 132u, |
15440
|
|
|
|
|
|
|
155u, 128u, 191u, 128u, 150u, 156u, 160u, 128u, |
15441
|
|
|
|
|
|
|
191u, 128u, 180u, 186u, 128u, 185u, 128u, 161u, |
15442
|
|
|
|
|
|
|
168u, 160u, 167u, 128u, 157u, 160u, 135u, 132u, |
15443
|
|
|
|
|
|
|
134u, 128u, 175u, 48u, 57u, 48u, 49u, 50u, |
15444
|
|
|
|
|
|
|
51u, 57u, 65u, 90u, 97u, 122u, 45u, 46u, |
15445
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 48u, 49u, |
15446
|
|
|
|
|
|
|
50u, 51u, 57u, 65u, 90u, 97u, 122u, 45u, |
15447
|
|
|
|
|
|
|
46u, 48u, 57u, 65u, 90u, 97u, 122u, 48u, |
15448
|
|
|
|
|
|
|
49u, 50u, 51u, 57u, 65u, 90u, 97u, 122u, |
15449
|
|
|
|
|
|
|
45u, 46u, 48u, 57u, 65u, 90u, 97u, 122u, |
15450
|
|
|
|
|
|
|
45u, 46u, 48u, 57u, 65u, 90u, 97u, 122u, |
15451
|
|
|
|
|
|
|
45u, 46u, 53u, 48u, 52u, 54u, 57u, 65u, |
15452
|
|
|
|
|
|
|
90u, 97u, 122u, 45u, 46u, 48u, 53u, 54u, |
15453
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 48u, |
15454
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 48u, |
15455
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 53u, |
15456
|
|
|
|
|
|
|
48u, 52u, 54u, 57u, 65u, 90u, 97u, 122u, |
15457
|
|
|
|
|
|
|
45u, 46u, 48u, 53u, 54u, 57u, 65u, 90u, |
15458
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 48u, 57u, 65u, 90u, |
15459
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 48u, 57u, 65u, 90u, |
15460
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 53u, 48u, 52u, 54u, |
15461
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 48u, |
15462
|
|
|
|
|
|
|
53u, 54u, 57u, 65u, 90u, 97u, 122u, 48u, |
15463
|
|
|
|
|
|
|
49u, 50u, 51u, 57u, 65u, 90u, 97u, 122u, |
15464
|
|
|
|
|
|
|
45u, 46u, 48u, 57u, 65u, 90u, 97u, 122u, |
15465
|
|
|
|
|
|
|
45u, 48u, 57u, 65u, 90u, 97u, 122u, 45u, |
15466
|
|
|
|
|
|
|
46u, 48u, 57u, 65u, 90u, 97u, 122u, 48u, |
15467
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 48u, |
15468
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 48u, 49u, 50u, |
15469
|
|
|
|
|
|
|
51u, 57u, 65u, 90u, 97u, 122u, 45u, 46u, |
15470
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 48u, 49u, |
15471
|
|
|
|
|
|
|
50u, 51u, 57u, 65u, 90u, 97u, 122u, 45u, |
15472
|
|
|
|
|
|
|
46u, 48u, 57u, 65u, 90u, 97u, 122u, 48u, |
15473
|
|
|
|
|
|
|
49u, 50u, 51u, 57u, 65u, 90u, 97u, 122u, |
15474
|
|
|
|
|
|
|
45u, 46u, 48u, 57u, 65u, 90u, 97u, 122u, |
15475
|
|
|
|
|
|
|
45u, 46u, 48u, 57u, 65u, 90u, 97u, 122u, |
15476
|
|
|
|
|
|
|
45u, 46u, 53u, 48u, 52u, 54u, 57u, 65u, |
15477
|
|
|
|
|
|
|
90u, 97u, 122u, 45u, 46u, 48u, 53u, 54u, |
15478
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 48u, |
15479
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 48u, |
15480
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 53u, |
15481
|
|
|
|
|
|
|
48u, 52u, 54u, 57u, 65u, 90u, 97u, 122u, |
15482
|
|
|
|
|
|
|
45u, 46u, 48u, 53u, 54u, 57u, 65u, 90u, |
15483
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 48u, 57u, 65u, 90u, |
15484
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 48u, 57u, 65u, 90u, |
15485
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 53u, 48u, 52u, 54u, |
15486
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 48u, |
15487
|
|
|
|
|
|
|
53u, 54u, 57u, 65u, 90u, 97u, 122u, 33u, |
15488
|
|
|
|
|
|
|
45u, 46u, 58u, 64u, 95u, 36u, 37u, 39u, |
15489
|
|
|
|
|
|
|
44u, 48u, 57u, 65u, 90u, 97u, 122u, 33u, |
15490
|
|
|
|
|
|
|
45u, 58u, 64u, 95u, 36u, 37u, 39u, 46u, |
15491
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 33u, 45u, |
15492
|
|
|
|
|
|
|
46u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
15493
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 33u, 58u, |
15494
|
|
|
|
|
|
|
64u, 95u, 36u, 37u, 39u, 46u, 48u, 57u, |
15495
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 33u, 45u, 46u, 58u, |
15496
|
|
|
|
|
|
|
64u, 95u, 36u, 37u, 39u, 44u, 48u, 57u, |
15497
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 33u, 95u, 36u, 37u, |
15498
|
|
|
|
|
|
|
39u, 46u, 48u, 57u, 65u, 90u, 97u, 122u, |
15499
|
|
|
|
|
|
|
33u, 48u, 49u, 50u, 58u, 64u, 95u, 36u, |
15500
|
|
|
|
|
|
|
37u, 39u, 46u, 51u, 57u, 65u, 90u, 97u, |
15501
|
|
|
|
|
|
|
122u, 33u, 45u, 46u, 58u, 64u, 95u, 36u, |
15502
|
|
|
|
|
|
|
37u, 39u, 44u, 48u, 57u, 65u, 90u, 97u, |
15503
|
|
|
|
|
|
|
122u, 33u, 48u, 49u, 50u, 58u, 64u, 95u, |
15504
|
|
|
|
|
|
|
36u, 37u, 39u, 46u, 51u, 57u, 65u, 90u, |
15505
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
15506
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
15507
|
|
|
|
|
|
|
97u, 122u, 33u, 48u, 49u, 50u, 58u, 64u, |
15508
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 46u, 51u, 57u, 65u, |
15509
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
15510
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 57u, 65u, |
15511
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
15512
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 57u, 65u, |
15513
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 53u, 58u, |
15514
|
|
|
|
|
|
|
64u, 95u, 36u, 37u, 39u, 44u, 48u, 52u, |
15515
|
|
|
|
|
|
|
54u, 57u, 65u, 90u, 97u, 122u, 33u, 45u, |
15516
|
|
|
|
|
|
|
46u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
15517
|
|
|
|
|
|
|
48u, 53u, 54u, 57u, 65u, 90u, 97u, 122u, |
15518
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
15519
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
15520
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
15521
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
15522
|
|
|
|
|
|
|
33u, 45u, 46u, 53u, 58u, 64u, 95u, 36u, |
15523
|
|
|
|
|
|
|
37u, 39u, 44u, 48u, 52u, 54u, 57u, 65u, |
15524
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
15525
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 53u, 54u, |
15526
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
15527
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 44u, 48u, |
15528
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
15529
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 44u, 48u, |
15530
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
15531
|
|
|
|
|
|
|
53u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
15532
|
|
|
|
|
|
|
48u, 52u, 54u, 57u, 65u, 90u, 97u, 122u, |
15533
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
15534
|
|
|
|
|
|
|
39u, 44u, 48u, 53u, 54u, 57u, 65u, 90u, |
15535
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
15536
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
15537
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
15538
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
15539
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
15540
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
15541
|
|
|
|
|
|
|
97u, 122u, 33u, 47u, 95u, 36u, 37u, 39u, |
15542
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 47u, 33u, 48u, |
15543
|
|
|
|
|
|
|
49u, 50u, 95u, 36u, 37u, 39u, 46u, 51u, |
15544
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 58u, 64u, |
15545
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 46u, 48u, 57u, 65u, |
15546
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
15547
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 57u, 65u, |
15548
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 58u, 64u, 95u, |
15549
|
|
|
|
|
|
|
36u, 37u, 39u, 46u, 48u, 57u, 65u, 90u, |
15550
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
15551
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
15552
|
|
|
|
|
|
|
97u, 122u, 33u, 58u, 64u, 95u, 36u, 37u, |
15553
|
|
|
|
|
|
|
39u, 46u, 48u, 57u, 65u, 90u, 97u, 122u, |
15554
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
15555
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
15556
|
|
|
|
|
|
|
33u, 48u, 49u, 50u, 58u, 64u, 95u, 36u, |
15557
|
|
|
|
|
|
|
37u, 39u, 46u, 51u, 57u, 65u, 90u, 97u, |
15558
|
|
|
|
|
|
|
122u, 33u, 45u, 46u, 58u, 64u, 95u, 36u, |
15559
|
|
|
|
|
|
|
37u, 39u, 44u, 48u, 57u, 65u, 90u, 97u, |
15560
|
|
|
|
|
|
|
122u, 33u, 48u, 49u, 50u, 58u, 64u, 95u, |
15561
|
|
|
|
|
|
|
36u, 37u, 39u, 46u, 51u, 57u, 65u, 90u, |
15562
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
15563
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
15564
|
|
|
|
|
|
|
97u, 122u, 33u, 48u, 49u, 50u, 58u, 64u, |
15565
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 46u, 51u, 57u, 65u, |
15566
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
15567
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 57u, 65u, |
15568
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
15569
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 57u, 65u, |
15570
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 53u, 58u, |
15571
|
|
|
|
|
|
|
64u, 95u, 36u, 37u, 39u, 44u, 48u, 52u, |
15572
|
|
|
|
|
|
|
54u, 57u, 65u, 90u, 97u, 122u, 33u, 45u, |
15573
|
|
|
|
|
|
|
46u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
15574
|
|
|
|
|
|
|
48u, 53u, 54u, 57u, 65u, 90u, 97u, 122u, |
15575
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
15576
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
15577
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
15578
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
15579
|
|
|
|
|
|
|
33u, 45u, 46u, 53u, 58u, 64u, 95u, 36u, |
15580
|
|
|
|
|
|
|
37u, 39u, 44u, 48u, 52u, 54u, 57u, 65u, |
15581
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
15582
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 53u, 54u, |
15583
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
15584
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 44u, 48u, |
15585
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
15586
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 44u, 48u, |
15587
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
15588
|
|
|
|
|
|
|
53u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
15589
|
|
|
|
|
|
|
48u, 52u, 54u, 57u, 65u, 90u, 97u, 122u, |
15590
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
15591
|
|
|
|
|
|
|
39u, 44u, 48u, 53u, 54u, 57u, 65u, 90u, |
15592
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 47u, 58u, 48u, 57u, |
15593
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 33u, 39u, 40u, 44u, |
15594
|
|
|
|
|
|
|
46u, 61u, 63u, 95u, 194u, 195u, 203u, 205u, |
15595
|
|
|
|
|
|
|
206u, 207u, 210u, 212u, 213u, 214u, 215u, 216u, |
15596
|
|
|
|
|
|
|
217u, 219u, 220u, 221u, 222u, 223u, 224u, 225u, |
15597
|
|
|
|
|
|
|
226u, 227u, 228u, 233u, 234u, 237u, 239u, 240u, |
15598
|
|
|
|
|
|
|
243u, 297u, 553u, 36u, 38u, 42u, 57u, 58u, |
15599
|
|
|
|
|
|
|
59u, 64u, 90u, 97u, 122u, 196u, 218u, 229u, |
15600
|
|
|
|
|
|
|
236u, 47u, 48u, 57u, 45u, 46u, 47u, 58u, |
15601
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 45u, 46u, |
15602
|
|
|
|
|
|
|
47u, 58u, 48u, 57u, 65u, 90u, 97u, 122u, |
15603
|
|
|
|
|
|
|
45u, 46u, 47u, 58u, 48u, 57u, 65u, 90u, |
15604
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 47u, 53u, 58u, 48u, |
15605
|
|
|
|
|
|
|
52u, 54u, 57u, 65u, 90u, 97u, 122u, 45u, |
15606
|
|
|
|
|
|
|
46u, 47u, 58u, 48u, 53u, 54u, 57u, 65u, |
15607
|
|
|
|
|
|
|
90u, 97u, 122u, 45u, 46u, 47u, 58u, 48u, |
15608
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 47u, |
15609
|
|
|
|
|
|
|
58u, 48u, 57u, 65u, 90u, 97u, 122u, 45u, |
15610
|
|
|
|
|
|
|
46u, 47u, 58u, 48u, 57u, 65u, 90u, 97u, |
15611
|
|
|
|
|
|
|
122u, 45u, 46u, 47u, 58u, 48u, 57u, 65u, |
15612
|
|
|
|
|
|
|
90u, 97u, 122u, 45u, 46u, 47u, 53u, 58u, |
15613
|
|
|
|
|
|
|
48u, 52u, 54u, 57u, 65u, 90u, 97u, 122u, |
15614
|
|
|
|
|
|
|
45u, 46u, 47u, 58u, 48u, 53u, 54u, 57u, |
15615
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 33u, 45u, 46u, 47u, |
15616
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 44u, 48u, |
15617
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 47u, 64u, |
15618
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 46u, 48u, 57u, 65u, |
15619
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 47u, 58u, |
15620
|
|
|
|
|
|
|
64u, 95u, 36u, 37u, 39u, 44u, 48u, 57u, |
15621
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 33u, 45u, 46u, 47u, |
15622
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 44u, 48u, |
15623
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
15624
|
|
|
|
|
|
|
47u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
15625
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 33u, 45u, |
15626
|
|
|
|
|
|
|
46u, 47u, 53u, 58u, 64u, 95u, 36u, 37u, |
15627
|
|
|
|
|
|
|
39u, 44u, 48u, 52u, 54u, 57u, 65u, 90u, |
15628
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 47u, 58u, 64u, |
15629
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 53u, 54u, |
15630
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
15631
|
|
|
|
|
|
|
47u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
15632
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 33u, 45u, |
15633
|
|
|
|
|
|
|
46u, 47u, 58u, 64u, 95u, 36u, 37u, 39u, |
15634
|
|
|
|
|
|
|
44u, 48u, 57u, 65u, 90u, 97u, 122u, 33u, |
15635
|
|
|
|
|
|
|
45u, 46u, 47u, 58u, 64u, 95u, 36u, 37u, |
15636
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
15637
|
|
|
|
|
|
|
33u, 45u, 46u, 47u, 58u, 64u, 95u, 36u, |
15638
|
|
|
|
|
|
|
37u, 39u, 44u, 48u, 57u, 65u, 90u, 97u, |
15639
|
|
|
|
|
|
|
122u, 33u, 45u, 46u, 47u, 53u, 58u, 64u, |
15640
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 52u, 54u, |
15641
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
15642
|
|
|
|
|
|
|
47u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
15643
|
|
|
|
|
|
|
48u, 53u, 54u, 57u, 65u, 90u, 97u, 122u, |
15644
|
|
|
|
|
|
|
0 |
15645
|
|
|
|
|
|
|
}; |
15646
|
|
|
|
|
|
|
|
15647
|
|
|
|
|
|
|
static const char _url_detector_single_lengths[] = { |
15648
|
|
|
|
|
|
|
0, 5, 4, 2, 3, 3, 2, 1, |
15649
|
|
|
|
|
|
|
2, 0, 2, 39, 2, 0, 0, 2, |
15650
|
|
|
|
|
|
|
1, 2, 0, 0, 0, 1, 1, 1, |
15651
|
|
|
|
|
|
|
0, 0, 1, 0, 0, 0, 1, 32, |
15652
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 1, 1, 1, |
15653
|
|
|
|
|
|
|
2, 0, 2, 0, 0, 1, 2, 0, |
15654
|
|
|
|
|
|
|
0, 0, 1, 0, 1, 1, 2, 0, |
15655
|
|
|
|
|
|
|
0, 5, 1, 4, 0, 0, 1, 38, |
15656
|
|
|
|
|
|
|
0, 0, 2, 1, 0, 1, 0, 0, |
15657
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15658
|
|
|
|
|
|
|
1, 0, 0, 0, 0, 0, 0, 0, |
15659
|
|
|
|
|
|
|
0, 0, 1, 1, 0, 0, 0, 0, |
15660
|
|
|
|
|
|
|
0, 0, 3, 1, 0, 20, 1, 0, |
15661
|
|
|
|
|
|
|
0, 6, 1, 0, 0, 0, 0, 0, |
15662
|
|
|
|
|
|
|
0, 0, 1, 2, 2, 0, 0, 1, |
15663
|
|
|
|
|
|
|
11, 0, 0, 0, 0, 0, 0, 0, |
15664
|
|
|
|
|
|
|
0, 0, 1, 0, 1, 0, 20, 0, |
15665
|
|
|
|
|
|
|
0, 0, 1, 0, 0, 0, 0, 2, |
15666
|
|
|
|
|
|
|
0, 0, 1, 0, 0, 0, 0, 0, |
15667
|
|
|
|
|
|
|
0, 2, 0, 0, 17, 0, 1, 0, |
15668
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
15669
|
|
|
|
|
|
|
0, 0, 0, 14, 38, 1, 0, 0, |
15670
|
|
|
|
|
|
|
0, 0, 0, 1, 0, 0, 0, 0, |
15671
|
|
|
|
|
|
|
0, 0, 0, 0, 3, 0, 0, 0, |
15672
|
|
|
|
|
|
|
0, 0, 1, 0, 0, 0, 0, 0, |
15673
|
|
|
|
|
|
|
0, 0, 0, 0, 23, 1, 0, 0, |
15674
|
|
|
|
|
|
|
1, 1, 0, 1, 0, 0, 2, 1, |
15675
|
|
|
|
|
|
|
0, 1, 2, 0, 1, 3, 0, 0, |
15676
|
|
|
|
|
|
|
1, 1, 0, 9, 0, 0, 0, 0, |
15677
|
|
|
|
|
|
|
0, 0, 4, 0, 0, 0, 18, 0, |
15678
|
|
|
|
|
|
|
0, 0, 0, 0, 2, 0, 0, 1, |
15679
|
|
|
|
|
|
|
0, 1, 0, 0, 0, 0, 0, 1, |
15680
|
|
|
|
|
|
|
1, 4, 0, 4, 12, 0, 1, 1, |
15681
|
|
|
|
|
|
|
0, 2, 0, 1, 0, 1, 0, 1, |
15682
|
|
|
|
|
|
|
1, 0, 0, 3, 2, 3, 2, 3, |
15683
|
|
|
|
|
|
|
2, 2, 3, 2, 2, 2, 3, 2, |
15684
|
|
|
|
|
|
|
2, 2, 3, 2, 3, 2, 1, 2, |
15685
|
|
|
|
|
|
|
0, 2, 3, 2, 3, 2, 3, 2, |
15686
|
|
|
|
|
|
|
2, 3, 2, 2, 2, 3, 2, 2, |
15687
|
|
|
|
|
|
|
2, 3, 2, 6, 5, 6, 4, 6, |
15688
|
|
|
|
|
|
|
2, 7, 6, 7, 6, 7, 6, 6, |
15689
|
|
|
|
|
|
|
7, 6, 6, 6, 7, 6, 6, 6, |
15690
|
|
|
|
|
|
|
7, 6, 6, 6, 6, 3, 1, 5, |
15691
|
|
|
|
|
|
|
4, 6, 5, 6, 4, 6, 7, 6, |
15692
|
|
|
|
|
|
|
7, 6, 7, 6, 6, 7, 6, 6, |
15693
|
|
|
|
|
|
|
6, 7, 6, 6, 6, 7, 6, 4, |
15694
|
|
|
|
|
|
|
39, 1, 4, 4, 4, 5, 4, 4, |
15695
|
|
|
|
|
|
|
4, 4, 4, 5, 4, 7, 4, 7, |
15696
|
|
|
|
|
|
|
7, 7, 8, 7, 7, 7, 7, 7, |
15697
|
|
|
|
|
|
|
8, 7 |
15698
|
|
|
|
|
|
|
}; |
15699
|
|
|
|
|
|
|
|
15700
|
|
|
|
|
|
|
static const char _url_detector_range_lengths[] = { |
15701
|
|
|
|
|
|
|
0, 5, 5, 5, 5, 3, 3, 3, |
15702
|
|
|
|
|
|
|
3, 3, 3, 7, 3, 3, 1, 3, |
15703
|
|
|
|
|
|
|
3, 3, 2, 2, 2, 2, 2, 4, |
15704
|
|
|
|
|
|
|
2, 2, 4, 1, 2, 1, 1, 0, |
15705
|
|
|
|
|
|
|
1, 1, 1, 1, 3, 7, 7, 9, |
15706
|
|
|
|
|
|
|
5, 8, 5, 8, 8, 10, 4, 6, |
15707
|
|
|
|
|
|
|
8, 7, 7, 5, 6, 4, 5, 1, |
15708
|
|
|
|
|
|
|
2, 8, 4, 3, 3, 3, 0, 3, |
15709
|
|
|
|
|
|
|
2, 2, 3, 5, 5, 3, 3, 3, |
15710
|
|
|
|
|
|
|
2, 2, 1, 2, 2, 2, 3, 4, |
15711
|
|
|
|
|
|
|
4, 3, 1, 2, 1, 3, 2, 2, |
15712
|
|
|
|
|
|
|
2, 2, 2, 3, 3, 1, 2, 3, |
15713
|
|
|
|
|
|
|
2, 3, 4, 2, 7, 0, 2, 2, |
15714
|
|
|
|
|
|
|
1, 5, 2, 1, 1, 1, 1, 1, |
15715
|
|
|
|
|
|
|
2, 2, 2, 2, 1, 5, 5, 0, |
15716
|
|
|
|
|
|
|
1, 4, 4, 2, 2, 3, 1, 1, |
15717
|
|
|
|
|
|
|
2, 2, 2, 1, 1, 1, 1, 1, |
15718
|
|
|
|
|
|
|
1, 2, 2, 2, 3, 1, 2, 3, |
15719
|
|
|
|
|
|
|
2, 2, 2, 1, 4, 4, 6, 3, |
15720
|
|
|
|
|
|
|
3, 1, 2, 2, 1, 1, 5, 3, |
15721
|
|
|
|
|
|
|
1, 1, 2, 2, 2, 2, 1, 2, |
15722
|
|
|
|
|
|
|
2, 1, 4, 1, 2, 4, 2, 1, |
15723
|
|
|
|
|
|
|
1, 1, 1, 0, 2, 2, 2, 3, |
15724
|
|
|
|
|
|
|
2, 2, 1, 2, 3, 3, 2, 3, |
15725
|
|
|
|
|
|
|
2, 2, 6, 2, 1, 3, 3, 2, |
15726
|
|
|
|
|
|
|
1, 1, 2, 1, 1, 2, 2, 2, |
15727
|
|
|
|
|
|
|
1, 4, 2, 5, 2, 8, 6, 2, |
15728
|
|
|
|
|
|
|
2, 1, 1, 3, 1, 2, 1, 1, |
15729
|
|
|
|
|
|
|
1, 1, 1, 1, 2, 2, 5, 1, |
15730
|
|
|
|
|
|
|
2, 1, 0, 1, 2, 3, 1, 3, |
15731
|
|
|
|
|
|
|
3, 1, 1, 2, 6, 2, 6, 3, |
15732
|
|
|
|
|
|
|
2, 3, 3, 3, 3, 3, 2, 1, |
15733
|
|
|
|
|
|
|
2, 1, 2, 5, 7, 5, 0, 1, |
15734
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 0, |
15735
|
|
|
|
|
|
|
1, 1, 1, 3, 3, 3, 3, 3, |
15736
|
|
|
|
|
|
|
3, 3, 4, 4, 3, 3, 4, 4, |
15737
|
|
|
|
|
|
|
3, 3, 4, 4, 3, 3, 3, 3, |
15738
|
|
|
|
|
|
|
3, 3, 3, 3, 3, 3, 3, 3, |
15739
|
|
|
|
|
|
|
3, 4, 4, 3, 3, 4, 4, 3, |
15740
|
|
|
|
|
|
|
3, 4, 4, 5, 5, 5, 5, 5, |
15741
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 5, 5, |
15742
|
|
|
|
|
|
|
6, 6, 5, 5, 6, 6, 5, 5, |
15743
|
|
|
|
|
|
|
6, 6, 5, 5, 5, 4, 0, 5, |
15744
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 5, 5, |
15745
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 6, 6, 5, |
15746
|
|
|
|
|
|
|
5, 6, 6, 5, 5, 6, 6, 3, |
15747
|
|
|
|
|
|
|
7, 1, 3, 3, 3, 4, 4, 3, |
15748
|
|
|
|
|
|
|
3, 3, 3, 4, 4, 5, 5, 5, |
15749
|
|
|
|
|
|
|
5, 5, 6, 6, 5, 5, 5, 5, |
15750
|
|
|
|
|
|
|
6, 6 |
15751
|
|
|
|
|
|
|
}; |
15752
|
|
|
|
|
|
|
|
15753
|
|
|
|
|
|
|
static const short _url_detector_index_offsets[] = { |
15754
|
|
|
|
|
|
|
0, 0, 11, 21, 29, 38, 45, 51, |
15755
|
|
|
|
|
|
|
56, 62, 66, 72, 119, 125, 129, 131, |
15756
|
|
|
|
|
|
|
137, 142, 148, 151, 154, 157, 161, 165, |
15757
|
|
|
|
|
|
|
171, 174, 177, 183, 185, 188, 190, 193, |
15758
|
|
|
|
|
|
|
226, 228, 230, 232, 234, 238, 247, 256, |
15759
|
|
|
|
|
|
|
267, 275, 284, 292, 301, 310, 322, 329, |
15760
|
|
|
|
|
|
|
336, 345, 353, 362, 368, 376, 382, 390, |
15761
|
|
|
|
|
|
|
392, 395, 409, 415, 423, 427, 431, 433, |
15762
|
|
|
|
|
|
|
475, 478, 481, 487, 494, 500, 505, 509, |
15763
|
|
|
|
|
|
|
513, 516, 519, 521, 524, 527, 530, 534, |
15764
|
|
|
|
|
|
|
539, 545, 549, 551, 554, 556, 560, 563, |
15765
|
|
|
|
|
|
|
566, 569, 572, 576, 581, 585, 587, 590, |
15766
|
|
|
|
|
|
|
594, 597, 601, 609, 613, 621, 642, 646, |
15767
|
|
|
|
|
|
|
649, 651, 663, 667, 669, 671, 673, 675, |
15768
|
|
|
|
|
|
|
677, 680, 683, 687, 692, 696, 702, 708, |
15769
|
|
|
|
|
|
|
710, 723, 728, 733, 736, 739, 743, 745, |
15770
|
|
|
|
|
|
|
747, 750, 753, 757, 759, 762, 764, 786, |
15771
|
|
|
|
|
|
|
788, 790, 793, 797, 800, 804, 806, 809, |
15772
|
|
|
|
|
|
|
815, 818, 821, 825, 827, 832, 837, 844, |
15773
|
|
|
|
|
|
|
848, 852, 856, 859, 862, 881, 883, 890, |
15774
|
|
|
|
|
|
|
894, 896, 898, 901, 904, 907, 910, 912, |
15775
|
|
|
|
|
|
|
915, 918, 920, 925, 941, 982, 988, 991, |
15776
|
|
|
|
|
|
|
993, 995, 997, 999, 1001, 1004, 1007, 1010, |
15777
|
|
|
|
|
|
|
1014, 1017, 1020, 1022, 1025, 1032, 1036, 1039, |
15778
|
|
|
|
|
|
|
1043, 1046, 1049, 1057, 1060, 1062, 1066, 1070, |
15779
|
|
|
|
|
|
|
1073, 1075, 1077, 1080, 1082, 1107, 1111, 1114, |
15780
|
|
|
|
|
|
|
1117, 1120, 1126, 1129, 1136, 1139, 1148, 1157, |
15781
|
|
|
|
|
|
|
1161, 1164, 1167, 1171, 1175, 1178, 1184, 1186, |
15782
|
|
|
|
|
|
|
1188, 1191, 1194, 1196, 1207, 1210, 1213, 1219, |
15783
|
|
|
|
|
|
|
1221, 1224, 1226, 1231, 1233, 1236, 1240, 1260, |
15784
|
|
|
|
|
|
|
1264, 1268, 1270, 1272, 1275, 1284, 1287, 1294, |
15785
|
|
|
|
|
|
|
1299, 1302, 1307, 1311, 1315, 1319, 1323, 1326, |
15786
|
|
|
|
|
|
|
1329, 1333, 1339, 1342, 1352, 1372, 1378, 1380, |
15787
|
|
|
|
|
|
|
1383, 1385, 1389, 1391, 1394, 1396, 1399, 1401, |
15788
|
|
|
|
|
|
|
1403, 1406, 1408, 1410, 1417, 1423, 1430, 1436, |
15789
|
|
|
|
|
|
|
1443, 1449, 1455, 1463, 1470, 1476, 1482, 1490, |
15790
|
|
|
|
|
|
|
1497, 1503, 1509, 1517, 1524, 1531, 1537, 1542, |
15791
|
|
|
|
|
|
|
1548, 1552, 1558, 1565, 1571, 1578, 1584, 1591, |
15792
|
|
|
|
|
|
|
1597, 1603, 1611, 1618, 1624, 1630, 1638, 1645, |
15793
|
|
|
|
|
|
|
1651, 1657, 1665, 1672, 1684, 1695, 1707, 1717, |
15794
|
|
|
|
|
|
|
1729, 1737, 1750, 1762, 1775, 1787, 1800, 1812, |
15795
|
|
|
|
|
|
|
1824, 1838, 1851, 1863, 1875, 1889, 1902, 1914, |
15796
|
|
|
|
|
|
|
1926, 1940, 1953, 1965, 1977, 1989, 1997, 1999, |
15797
|
|
|
|
|
|
|
2010, 2020, 2032, 2043, 2055, 2065, 2077, 2090, |
15798
|
|
|
|
|
|
|
2102, 2115, 2127, 2140, 2152, 2164, 2178, 2191, |
15799
|
|
|
|
|
|
|
2203, 2215, 2229, 2242, 2254, 2266, 2280, 2293, |
15800
|
|
|
|
|
|
|
2301, 2348, 2351, 2359, 2367, 2375, 2385, 2394, |
15801
|
|
|
|
|
|
|
2402, 2410, 2418, 2426, 2436, 2445, 2458, 2468, |
15802
|
|
|
|
|
|
|
2481, 2494, 2507, 2522, 2536, 2549, 2562, 2575, |
15803
|
|
|
|
|
|
|
2588, 2603 |
15804
|
|
|
|
|
|
|
}; |
15805
|
|
|
|
|
|
|
|
15806
|
|
|
|
|
|
|
static const short _url_detector_indicies[] = { |
15807
|
|
|
|
|
|
|
0, 2, 3, 4, 0, 0, 0, 5, |
15808
|
|
|
|
|
|
|
6, 6, 1, 0, 7, 8, 0, 0, |
15809
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 9, 9, 9, |
15810
|
|
|
|
|
|
|
9, 9, 9, 9, 1, 9, 10, 9, |
15811
|
|
|
|
|
|
|
9, 9, 9, 9, 9, 1, 11, 12, |
15812
|
|
|
|
|
|
|
13, 14, 15, 15, 1, 16, 17, 15, |
15813
|
|
|
|
|
|
|
15, 15, 1, 16, 15, 15, 15, 1, |
15814
|
|
|
|
|
|
|
16, 18, 15, 15, 15, 1, 15, 19, |
15815
|
|
|
|
|
|
|
19, 1, 16, 18, 15, 20, 20, 1, |
15816
|
|
|
|
|
|
|
21, 21, 23, 21, 21, 22, 21, 22, |
15817
|
|
|
|
|
|
|
24, 25, 27, 28, 29, 30, 31, 32, |
15818
|
|
|
|
|
|
|
33, 34, 35, 36, 37, 38, 39, 40, |
15819
|
|
|
|
|
|
|
41, 42, 43, 44, 45, 46, 47, 49, |
15820
|
|
|
|
|
|
|
50, 51, 52, 53, 54, 55, 56, 22, |
15821
|
|
|
|
|
|
|
22, 21, 22, 22, 26, 48, 1, 22, |
15822
|
|
|
|
|
|
|
22, 22, 22, 22, 1, 22, 22, 22, |
15823
|
|
|
|
|
|
|
1, 22, 1, 22, 22, 22, 22, 22, |
15824
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 1, 22, 22, |
15825
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 1, 22, |
15826
|
|
|
|
|
|
|
22, 1, 22, 22, 1, 22, 22, 22, |
15827
|
|
|
|
|
|
|
1, 22, 22, 22, 1, 22, 22, 22, |
15828
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 1, 22, 22, |
15829
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 1, 22, |
15830
|
|
|
|
|
|
|
1, 22, 22, 1, 22, 1, 22, 22, |
15831
|
|
|
|
|
|
|
1, 57, 58, 59, 60, 26, 61, 62, |
15832
|
|
|
|
|
|
|
63, 64, 65, 66, 67, 68, 69, 70, |
15833
|
|
|
|
|
|
|
71, 72, 73, 74, 75, 76, 77, 78, |
15834
|
|
|
|
|
|
|
79, 80, 81, 82, 83, 84, 85, 86, |
15835
|
|
|
|
|
|
|
87, 1, 22, 1, 22, 1, 22, 1, |
15836
|
|
|
|
|
|
|
22, 1, 22, 22, 22, 1, 22, 22, |
15837
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 1, 22, |
15838
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 22, 1, |
15839
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 22, 22, |
15840
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 22, 22, |
15841
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 22, 22, |
15842
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 22, 22, |
15843
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 22, 22, |
15844
|
|
|
|
|
|
|
22, 22, 22, 22, 1, 22, 22, 22, |
15845
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 1, 22, 22, |
15846
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 22, 22, |
15847
|
|
|
|
|
|
|
22, 1, 22, 22, 22, 22, 22, 22, |
15848
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 22, 1, |
15849
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 22, 22, |
15850
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 22, 22, |
15851
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 22, 22, |
15852
|
|
|
|
|
|
|
22, 1, 22, 22, 22, 22, 22, 1, |
15853
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 22, 1, |
15854
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 1, 22, 22, |
15855
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 1, 22, 1, |
15856
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 22, 22, |
15857
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 22, 22, |
15858
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 1, 22, |
15859
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 1, 22, |
15860
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 1, 22, |
15861
|
|
|
|
|
|
|
1, 88, 89, 90, 91, 92, 93, 94, |
15862
|
|
|
|
|
|
|
95, 96, 97, 98, 99, 100, 101, 102, |
15863
|
|
|
|
|
|
|
103, 104, 105, 106, 107, 108, 109, 110, |
15864
|
|
|
|
|
|
|
111, 112, 113, 114, 115, 116, 117, 106, |
15865
|
|
|
|
|
|
|
118, 119, 120, 121, 122, 123, 124, 26, |
15866
|
|
|
|
|
|
|
26, 26, 1, 22, 22, 1, 22, 22, |
15867
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 1, 22, |
15868
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 1, 22, 22, |
15869
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 22, 22, |
15870
|
|
|
|
|
|
|
1, 22, 22, 22, 1, 22, 22, 22, |
15871
|
|
|
|
|
|
|
1, 22, 22, 1, 22, 22, 1, 22, |
15872
|
|
|
|
|
|
|
1, 22, 22, 1, 22, 22, 1, 22, |
15873
|
|
|
|
|
|
|
22, 1, 22, 22, 22, 1, 22, 22, |
15874
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 22, 22, |
15875
|
|
|
|
|
|
|
1, 22, 22, 22, 1, 22, 1, 22, |
15876
|
|
|
|
|
|
|
22, 1, 22, 1, 22, 22, 22, 1, |
15877
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 1, 22, 22, |
15878
|
|
|
|
|
|
|
1, 22, 22, 1, 22, 22, 22, 1, |
15879
|
|
|
|
|
|
|
22, 22, 22, 22, 1, 22, 22, 22, |
15880
|
|
|
|
|
|
|
1, 22, 1, 22, 22, 1, 22, 22, |
15881
|
|
|
|
|
|
|
22, 1, 22, 22, 1, 22, 22, 22, |
15882
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 22, 22, |
15883
|
|
|
|
|
|
|
1, 22, 22, 22, 1, 22, 22, 22, |
15884
|
|
|
|
|
|
|
22, 22, 22, 22, 1, 125, 126, 127, |
15885
|
|
|
|
|
|
|
128, 129, 130, 131, 58, 132, 133, 134, |
15886
|
|
|
|
|
|
|
135, 136, 26, 137, 138, 139, 140, 141, |
15887
|
|
|
|
|
|
|
142, 1, 22, 22, 22, 1, 22, 22, |
15888
|
|
|
|
|
|
|
1, 22, 1, 22, 22, 22, 22, 22, |
15889
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 1, 22, |
15890
|
|
|
|
|
|
|
22, 22, 1, 22, 1, 22, 1, 22, |
15891
|
|
|
|
|
|
|
1, 22, 1, 22, 1, 22, 22, 1, |
15892
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 1, 22, |
15893
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 22, 1, |
15894
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 1, 22, 22, |
15895
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 1, 143, 98, |
15896
|
|
|
|
|
|
|
144, 145, 146, 26, 147, 148, 149, 150, |
15897
|
|
|
|
|
|
|
151, 26, 1, 22, 22, 22, 22, 1, |
15898
|
|
|
|
|
|
|
22, 22, 22, 22, 1, 22, 22, 1, |
15899
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 1, 22, |
15900
|
|
|
|
|
|
|
1, 22, 1, 22, 22, 1, 22, 22, |
15901
|
|
|
|
|
|
|
1, 108, 26, 26, 1, 26, 1, 152, |
15902
|
|
|
|
|
|
|
26, 1, 22, 1, 153, 154, 155, 156, |
15903
|
|
|
|
|
|
|
41, 157, 158, 159, 160, 117, 161, 162, |
15904
|
|
|
|
|
|
|
163, 164, 165, 166, 167, 168, 169, 170, |
15905
|
|
|
|
|
|
|
26, 1, 22, 1, 22, 1, 22, 22, |
15906
|
|
|
|
|
|
|
1, 22, 22, 22, 1, 22, 22, 1, |
15907
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 1, 22, 22, |
15908
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 1, 22, |
15909
|
|
|
|
|
|
|
22, 1, 22, 22, 1, 22, 22, 22, |
15910
|
|
|
|
|
|
|
1, 22, 1, 22, 22, 22, 22, 1, |
15911
|
|
|
|
|
|
|
22, 22, 22, 22, 1, 22, 22, 22, |
15912
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 22, 1, |
15913
|
|
|
|
|
|
|
22, 22, 22, 1, 171, 172, 26, 1, |
15914
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 1, 162, 173, |
15915
|
|
|
|
|
|
|
174, 175, 41, 176, 177, 39, 178, 179, |
15916
|
|
|
|
|
|
|
180, 181, 182, 183, 184, 185, 186, 26, |
15917
|
|
|
|
|
|
|
1, 22, 1, 22, 22, 22, 22, 22, |
15918
|
|
|
|
|
|
|
22, 1, 22, 22, 22, 1, 22, 1, |
15919
|
|
|
|
|
|
|
22, 1, 22, 22, 1, 22, 22, 1, |
15920
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 1, 22, 1, |
15921
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 1, 22, 1, |
15922
|
|
|
|
|
|
|
22, 22, 22, 22, 1, 187, 188, 189, |
15923
|
|
|
|
|
|
|
190, 191, 192, 193, 194, 195, 196, 197, |
15924
|
|
|
|
|
|
|
198, 199, 200, 48, 1, 201, 202, 26, |
15925
|
|
|
|
|
|
|
203, 204, 205, 206, 207, 208, 209, 171, |
15926
|
|
|
|
|
|
|
210, 89, 211, 212, 213, 214, 165, 215, |
15927
|
|
|
|
|
|
|
216, 217, 218, 219, 220, 221, 178, 222, |
15928
|
|
|
|
|
|
|
223, 224, 225, 108, 226, 227, 26, 228, |
15929
|
|
|
|
|
|
|
229, 230, 231, 26, 26, 1, 22, 22, |
15930
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 1, 22, |
15931
|
|
|
|
|
|
|
1, 22, 1, 22, 1, 22, 1, 22, |
15932
|
|
|
|
|
|
|
1, 22, 22, 1, 22, 22, 1, 22, |
15933
|
|
|
|
|
|
|
22, 1, 22, 22, 22, 1, 22, 22, |
15934
|
|
|
|
|
|
|
1, 22, 22, 1, 22, 1, 22, 22, |
15935
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 22, 1, |
15936
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 1, 22, |
15937
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 1, 22, 22, |
15938
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 22, 22, |
15939
|
|
|
|
|
|
|
1, 22, 22, 1, 22, 1, 22, 22, |
15940
|
|
|
|
|
|
|
22, 1, 22, 22, 22, 1, 22, 22, |
15941
|
|
|
|
|
|
|
1, 22, 1, 22, 1, 22, 22, 1, |
15942
|
|
|
|
|
|
|
22, 1, 232, 203, 233, 234, 235, 236, |
15943
|
|
|
|
|
|
|
237, 238, 239, 240, 241, 26, 242, 243, |
15944
|
|
|
|
|
|
|
244, 26, 245, 106, 130, 246, 131, 247, |
15945
|
|
|
|
|
|
|
205, 26, 1, 22, 22, 22, 1, 22, |
15946
|
|
|
|
|
|
|
22, 1, 22, 22, 1, 22, 22, 1, |
15947
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 1, 22, 22, |
15948
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 22, 1, |
15949
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 22, 22, |
15950
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 22, 22, |
15951
|
|
|
|
|
|
|
22, 22, 22, 22, 1, 22, 22, 22, |
15952
|
|
|
|
|
|
|
1, 22, 22, 1, 22, 22, 1, 22, |
15953
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 1, 22, |
15954
|
|
|
|
|
|
|
22, 1, 173, 248, 249, 26, 26, 1, |
15955
|
|
|
|
|
|
|
22, 1, 22, 1, 248, 26, 1, 250, |
15956
|
|
|
|
|
|
|
26, 1, 22, 1, 205, 251, 252, 165, |
15957
|
|
|
|
|
|
|
253, 254, 26, 255, 256, 26, 1, 22, |
15958
|
|
|
|
|
|
|
22, 1, 22, 22, 1, 22, 22, 22, |
15959
|
|
|
|
|
|
|
22, 22, 1, 22, 1, 22, 22, 1, |
15960
|
|
|
|
|
|
|
22, 1, 257, 26, 258, 259, 1, 22, |
15961
|
|
|
|
|
|
|
1, 22, 22, 1, 22, 22, 22, 1, |
15962
|
|
|
|
|
|
|
260, 261, 262, 263, 264, 265, 266, 267, |
15963
|
|
|
|
|
|
|
268, 269, 270, 271, 272, 273, 274, 275, |
15964
|
|
|
|
|
|
|
276, 277, 26, 1, 22, 22, 22, 1, |
15965
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 1, 22, 1, |
15966
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 22, 22, |
15967
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 1, 22, |
15968
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 1, 22, 22, |
15969
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 1, 22, 22, |
15970
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 1, 22, |
15971
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 1, 22, |
15972
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 1, 22, 22, |
15973
|
|
|
|
|
|
|
1, 22, 22, 22, 1, 278, 279, 280, |
15974
|
|
|
|
|
|
|
281, 26, 1, 22, 22, 1, 22, 22, |
15975
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 22, 1, |
15976
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 22, 22, |
15977
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 22, 22, |
15978
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 22, 22, |
15979
|
|
|
|
|
|
|
22, 1, 153, 1, 282, 26, 1, 22, |
15980
|
|
|
|
|
|
|
1, 283, 89, 26, 1, 22, 1, 284, |
15981
|
|
|
|
|
|
|
26, 1, 22, 1, 285, 26, 1, 22, |
15982
|
|
|
|
|
|
|
1, 286, 1, 287, 26, 1, 22, 1, |
15983
|
|
|
|
|
|
|
288, 1, 289, 290, 291, 292, 19, 19, |
15984
|
|
|
|
|
|
|
1, 16, 293, 15, 15, 15, 1, 294, |
15985
|
|
|
|
|
|
|
295, 296, 297, 19, 19, 1, 16, 298, |
15986
|
|
|
|
|
|
|
15, 15, 15, 1, 299, 300, 301, 302, |
15987
|
|
|
|
|
|
|
19, 19, 1, 16, 298, 297, 15, 15, |
15988
|
|
|
|
|
|
|
1, 16, 298, 294, 15, 15, 1, 16, |
15989
|
|
|
|
|
|
|
298, 303, 297, 294, 15, 15, 1, 16, |
15990
|
|
|
|
|
|
|
298, 294, 15, 15, 15, 1, 16, 293, |
15991
|
|
|
|
|
|
|
292, 15, 15, 1, 16, 293, 289, 15, |
15992
|
|
|
|
|
|
|
15, 1, 16, 293, 304, 292, 289, 15, |
15993
|
|
|
|
|
|
|
15, 1, 16, 293, 289, 15, 15, 15, |
15994
|
|
|
|
|
|
|
1, 16, 17, 14, 15, 15, 1, 16, |
15995
|
|
|
|
|
|
|
17, 11, 15, 15, 1, 16, 17, 305, |
15996
|
|
|
|
|
|
|
14, 11, 15, 15, 1, 16, 17, 11, |
15997
|
|
|
|
|
|
|
15, 15, 15, 1, 306, 307, 308, 309, |
15998
|
|
|
|
|
|
|
310, 310, 1, 311, 312, 310, 310, 310, |
15999
|
|
|
|
|
|
|
1, 311, 310, 310, 310, 1, 311, 313, |
16000
|
|
|
|
|
|
|
310, 310, 310, 1, 310, 314, 314, 1, |
16001
|
|
|
|
|
|
|
311, 313, 310, 315, 315, 1, 316, 317, |
16002
|
|
|
|
|
|
|
318, 319, 314, 314, 1, 311, 320, 310, |
16003
|
|
|
|
|
|
|
310, 310, 1, 321, 322, 323, 324, 314, |
16004
|
|
|
|
|
|
|
314, 1, 311, 325, 310, 310, 310, 1, |
16005
|
|
|
|
|
|
|
326, 327, 328, 329, 314, 314, 1, 311, |
16006
|
|
|
|
|
|
|
325, 324, 310, 310, 1, 311, 325, 321, |
16007
|
|
|
|
|
|
|
310, 310, 1, 311, 325, 330, 324, 321, |
16008
|
|
|
|
|
|
|
310, 310, 1, 311, 325, 321, 310, 310, |
16009
|
|
|
|
|
|
|
310, 1, 311, 320, 319, 310, 310, 1, |
16010
|
|
|
|
|
|
|
311, 320, 316, 310, 310, 1, 311, 320, |
16011
|
|
|
|
|
|
|
331, 319, 316, 310, 310, 1, 311, 320, |
16012
|
|
|
|
|
|
|
316, 310, 310, 310, 1, 311, 312, 309, |
16013
|
|
|
|
|
|
|
310, 310, 1, 311, 312, 306, 310, 310, |
16014
|
|
|
|
|
|
|
1, 311, 312, 332, 309, 306, 310, 310, |
16015
|
|
|
|
|
|
|
1, 311, 312, 306, 310, 310, 310, 1, |
16016
|
|
|
|
|
|
|
0, 333, 334, 7, 8, 0, 0, 0, |
16017
|
|
|
|
|
|
|
335, 335, 335, 1, 0, 333, 7, 8, |
16018
|
|
|
|
|
|
|
0, 0, 0, 335, 335, 335, 1, 0, |
16019
|
|
|
|
|
|
|
333, 336, 7, 8, 0, 0, 0, 335, |
16020
|
|
|
|
|
|
|
335, 335, 1, 0, 7, 8, 0, 0, |
16021
|
|
|
|
|
|
|
0, 335, 337, 337, 1, 0, 333, 336, |
16022
|
|
|
|
|
|
|
7, 8, 0, 0, 0, 335, 338, 338, |
16023
|
|
|
|
|
|
|
1, 9, 9, 9, 9, 339, 9, 9, |
16024
|
|
|
|
|
|
|
1, 0, 340, 341, 342, 7, 8, 0, |
16025
|
|
|
|
|
|
|
0, 0, 343, 337, 337, 1, 0, 333, |
16026
|
|
|
|
|
|
|
344, 7, 8, 0, 0, 0, 335, 335, |
16027
|
|
|
|
|
|
|
335, 1, 0, 345, 346, 347, 7, 8, |
16028
|
|
|
|
|
|
|
0, 0, 0, 348, 337, 337, 1, 0, |
16029
|
|
|
|
|
|
|
333, 349, 7, 8, 0, 0, 0, 335, |
16030
|
|
|
|
|
|
|
335, 335, 1, 0, 350, 351, 352, 7, |
16031
|
|
|
|
|
|
|
8, 0, 0, 0, 353, 337, 337, 1, |
16032
|
|
|
|
|
|
|
0, 333, 349, 7, 8, 0, 0, 0, |
16033
|
|
|
|
|
|
|
348, 335, 335, 1, 0, 333, 349, 7, |
16034
|
|
|
|
|
|
|
8, 0, 0, 0, 345, 335, 335, 1, |
16035
|
|
|
|
|
|
|
0, 333, 349, 354, 7, 8, 0, 0, |
16036
|
|
|
|
|
|
|
0, 348, 345, 335, 335, 1, 0, 333, |
16037
|
|
|
|
|
|
|
349, 7, 8, 0, 0, 0, 345, 335, |
16038
|
|
|
|
|
|
|
335, 335, 1, 0, 333, 344, 7, 8, |
16039
|
|
|
|
|
|
|
0, 0, 0, 343, 335, 335, 1, 0, |
16040
|
|
|
|
|
|
|
333, 344, 7, 8, 0, 0, 0, 340, |
16041
|
|
|
|
|
|
|
335, 335, 1, 0, 333, 344, 355, 7, |
16042
|
|
|
|
|
|
|
8, 0, 0, 0, 343, 340, 335, 335, |
16043
|
|
|
|
|
|
|
1, 0, 333, 344, 7, 8, 0, 0, |
16044
|
|
|
|
|
|
|
0, 340, 335, 335, 335, 1, 0, 333, |
16045
|
|
|
|
|
|
|
334, 7, 8, 0, 0, 0, 5, 335, |
16046
|
|
|
|
|
|
|
335, 1, 0, 333, 334, 7, 8, 0, |
16047
|
|
|
|
|
|
|
0, 0, 2, 335, 335, 1, 0, 333, |
16048
|
|
|
|
|
|
|
334, 356, 7, 8, 0, 0, 0, 5, |
16049
|
|
|
|
|
|
|
2, 335, 335, 1, 0, 333, 334, 7, |
16050
|
|
|
|
|
|
|
8, 0, 0, 0, 2, 335, 335, 335, |
16051
|
|
|
|
|
|
|
1, 0, 333, 336, 7, 8, 0, 0, |
16052
|
|
|
|
|
|
|
0, 335, 357, 357, 1, 0, 333, 336, |
16053
|
|
|
|
|
|
|
7, 8, 0, 0, 0, 335, 358, 358, |
16054
|
|
|
|
|
|
|
1, 0, 333, 336, 359, 8, 0, 0, |
16055
|
|
|
|
|
|
|
0, 335, 358, 358, 1, 9, 360, 9, |
16056
|
|
|
|
|
|
|
9, 9, 9, 9, 1, 361, 1, 362, |
16057
|
|
|
|
|
|
|
363, 364, 365, 362, 362, 362, 366, 367, |
16058
|
|
|
|
|
|
|
367, 1, 362, 7, 10, 362, 362, 362, |
16059
|
|
|
|
|
|
|
362, 362, 362, 1, 362, 368, 369, 7, |
16060
|
|
|
|
|
|
|
10, 362, 362, 362, 367, 367, 367, 1, |
16061
|
|
|
|
|
|
|
362, 368, 7, 10, 362, 362, 362, 367, |
16062
|
|
|
|
|
|
|
367, 367, 1, 362, 368, 370, 7, 10, |
16063
|
|
|
|
|
|
|
362, 362, 362, 367, 367, 367, 1, 362, |
16064
|
|
|
|
|
|
|
7, 10, 362, 362, 362, 367, 371, 371, |
16065
|
|
|
|
|
|
|
1, 362, 368, 370, 7, 10, 362, 362, |
16066
|
|
|
|
|
|
|
362, 367, 372, 372, 1, 362, 373, 374, |
16067
|
|
|
|
|
|
|
375, 7, 10, 362, 362, 362, 376, 371, |
16068
|
|
|
|
|
|
|
371, 1, 362, 368, 377, 7, 10, 362, |
16069
|
|
|
|
|
|
|
362, 362, 367, 367, 367, 1, 362, 378, |
16070
|
|
|
|
|
|
|
379, 380, 7, 10, 362, 362, 362, 381, |
16071
|
|
|
|
|
|
|
371, 371, 1, 362, 368, 382, 7, 10, |
16072
|
|
|
|
|
|
|
362, 362, 362, 367, 367, 367, 1, 362, |
16073
|
|
|
|
|
|
|
383, 384, 385, 7, 10, 362, 362, 362, |
16074
|
|
|
|
|
|
|
386, 371, 371, 1, 362, 368, 382, 7, |
16075
|
|
|
|
|
|
|
10, 362, 362, 362, 381, 367, 367, 1, |
16076
|
|
|
|
|
|
|
362, 368, 382, 7, 10, 362, 362, 362, |
16077
|
|
|
|
|
|
|
378, 367, 367, 1, 362, 368, 382, 387, |
16078
|
|
|
|
|
|
|
7, 10, 362, 362, 362, 381, 378, 367, |
16079
|
|
|
|
|
|
|
367, 1, 362, 368, 382, 7, 10, 362, |
16080
|
|
|
|
|
|
|
362, 362, 378, 367, 367, 367, 1, 362, |
16081
|
|
|
|
|
|
|
368, 377, 7, 10, 362, 362, 362, 376, |
16082
|
|
|
|
|
|
|
367, 367, 1, 362, 368, 377, 7, 10, |
16083
|
|
|
|
|
|
|
362, 362, 362, 373, 367, 367, 1, 362, |
16084
|
|
|
|
|
|
|
368, 377, 388, 7, 10, 362, 362, 362, |
16085
|
|
|
|
|
|
|
376, 373, 367, 367, 1, 362, 368, 377, |
16086
|
|
|
|
|
|
|
7, 10, 362, 362, 362, 373, 367, 367, |
16087
|
|
|
|
|
|
|
367, 1, 362, 368, 369, 7, 10, 362, |
16088
|
|
|
|
|
|
|
362, 362, 366, 367, 367, 1, 362, 368, |
16089
|
|
|
|
|
|
|
369, 7, 10, 362, 362, 362, 363, 367, |
16090
|
|
|
|
|
|
|
367, 1, 362, 368, 369, 389, 7, 10, |
16091
|
|
|
|
|
|
|
362, 362, 362, 366, 363, 367, 367, 1, |
16092
|
|
|
|
|
|
|
362, 368, 369, 7, 10, 362, 362, 362, |
16093
|
|
|
|
|
|
|
363, 367, 367, 367, 1, 16, 18, 22, |
16094
|
|
|
|
|
|
|
390, 15, 20, 20, 1, 21, 21, 23, |
16095
|
|
|
|
|
|
|
21, 21, 22, 21, 22, 24, 25, 27, |
16096
|
|
|
|
|
|
|
28, 29, 30, 31, 32, 33, 34, 35, |
16097
|
|
|
|
|
|
|
36, 37, 38, 39, 40, 41, 42, 43, |
16098
|
|
|
|
|
|
|
44, 45, 46, 47, 49, 50, 51, 52, |
16099
|
|
|
|
|
|
|
53, 54, 55, 56, 22, 22, 21, 22, |
16100
|
|
|
|
|
|
|
22, 26, 48, 1, 22, 288, 1, 16, |
16101
|
|
|
|
|
|
|
18, 22, 390, 15, 15, 15, 1, 16, |
16102
|
|
|
|
|
|
|
18, 22, 390, 302, 15, 15, 1, 16, |
16103
|
|
|
|
|
|
|
18, 22, 390, 299, 15, 15, 1, 16, |
16104
|
|
|
|
|
|
|
18, 22, 391, 390, 302, 299, 15, 15, |
16105
|
|
|
|
|
|
|
1, 16, 18, 22, 390, 299, 15, 15, |
16106
|
|
|
|
|
|
|
15, 1, 311, 313, 22, 390, 310, 315, |
16107
|
|
|
|
|
|
|
315, 1, 311, 313, 22, 390, 310, 310, |
16108
|
|
|
|
|
|
|
310, 1, 311, 313, 22, 390, 329, 310, |
16109
|
|
|
|
|
|
|
310, 1, 311, 313, 22, 390, 326, 310, |
16110
|
|
|
|
|
|
|
310, 1, 311, 313, 22, 392, 390, 329, |
16111
|
|
|
|
|
|
|
326, 310, 310, 1, 311, 313, 22, 390, |
16112
|
|
|
|
|
|
|
326, 310, 310, 310, 1, 0, 333, 336, |
16113
|
|
|
|
|
|
|
22, 393, 8, 0, 0, 0, 335, 338, |
16114
|
|
|
|
|
|
|
338, 1, 9, 22, 10, 9, 9, 9, |
16115
|
|
|
|
|
|
|
339, 9, 9, 1, 0, 333, 336, 22, |
16116
|
|
|
|
|
|
|
393, 8, 0, 0, 0, 335, 335, 335, |
16117
|
|
|
|
|
|
|
1, 0, 333, 336, 22, 393, 8, 0, |
16118
|
|
|
|
|
|
|
0, 0, 353, 335, 335, 1, 0, 333, |
16119
|
|
|
|
|
|
|
336, 22, 393, 8, 0, 0, 0, 350, |
16120
|
|
|
|
|
|
|
335, 335, 1, 0, 333, 336, 22, 394, |
16121
|
|
|
|
|
|
|
393, 8, 0, 0, 0, 353, 350, 335, |
16122
|
|
|
|
|
|
|
335, 1, 0, 333, 336, 22, 393, 8, |
16123
|
|
|
|
|
|
|
0, 0, 0, 350, 335, 335, 335, 1, |
16124
|
|
|
|
|
|
|
362, 368, 370, 22, 393, 10, 362, 362, |
16125
|
|
|
|
|
|
|
362, 367, 372, 372, 1, 362, 368, 370, |
16126
|
|
|
|
|
|
|
22, 393, 10, 362, 362, 362, 367, 367, |
16127
|
|
|
|
|
|
|
367, 1, 362, 368, 370, 22, 393, 10, |
16128
|
|
|
|
|
|
|
362, 362, 362, 386, 367, 367, 1, 362, |
16129
|
|
|
|
|
|
|
368, 370, 22, 393, 10, 362, 362, 362, |
16130
|
|
|
|
|
|
|
383, 367, 367, 1, 362, 368, 370, 22, |
16131
|
|
|
|
|
|
|
395, 393, 10, 362, 362, 362, 386, 383, |
16132
|
|
|
|
|
|
|
367, 367, 1, 362, 368, 370, 22, 393, |
16133
|
|
|
|
|
|
|
10, 362, 362, 362, 383, 367, 367, 367, |
16134
|
|
|
|
|
|
|
1, 0 |
16135
|
|
|
|
|
|
|
}; |
16136
|
|
|
|
|
|
|
|
16137
|
|
|
|
|
|
|
static const short _url_detector_trans_targs[] = { |
16138
|
|
|
|
|
|
|
2, 0, 315, 334, 336, 335, 338, 3, |
16139
|
|
|
|
|
|
|
292, 4, 5, 6, 288, 290, 289, 8, |
16140
|
|
|
|
|
|
|
7, 275, 9, 10, 367, 11, 368, 368, |
16141
|
|
|
|
|
|
|
12, 13, 14, 15, 16, 17, 18, 19, |
16142
|
|
|
|
|
|
|
20, 21, 22, 23, 24, 25, 26, 27, |
16143
|
|
|
|
|
|
|
28, 29, 30, 31, 63, 101, 120, 130, |
16144
|
|
|
|
|
|
|
131, 132, 134, 153, 156, 171, 271, 11, |
16145
|
|
|
|
|
|
|
368, 32, 33, 34, 35, 36, 37, 38, |
16146
|
|
|
|
|
|
|
39, 40, 41, 42, 43, 44, 45, 46, |
16147
|
|
|
|
|
|
|
47, 48, 49, 50, 51, 52, 53, 54, |
16148
|
|
|
|
|
|
|
55, 56, 57, 58, 59, 60, 61, 62, |
16149
|
|
|
|
|
|
|
64, 65, 66, 67, 68, 69, 70, 71, |
16150
|
|
|
|
|
|
|
72, 73, 74, 75, 76, 77, 78, 79, |
16151
|
|
|
|
|
|
|
80, 81, 82, 83, 84, 85, 86, 87, |
16152
|
|
|
|
|
|
|
88, 89, 90, 91, 92, 93, 94, 95, |
16153
|
|
|
|
|
|
|
96, 97, 98, 99, 100, 102, 103, 104, |
16154
|
|
|
|
|
|
|
105, 106, 107, 108, 109, 110, 111, 112, |
16155
|
|
|
|
|
|
|
113, 114, 115, 116, 117, 118, 119, 121, |
16156
|
|
|
|
|
|
|
122, 123, 124, 125, 126, 127, 128, 129, |
16157
|
|
|
|
|
|
|
133, 135, 136, 137, 138, 139, 140, 141, |
16158
|
|
|
|
|
|
|
142, 143, 144, 145, 146, 147, 148, 149, |
16159
|
|
|
|
|
|
|
150, 151, 152, 154, 155, 157, 158, 159, |
16160
|
|
|
|
|
|
|
160, 161, 162, 163, 164, 165, 166, 167, |
16161
|
|
|
|
|
|
|
168, 169, 170, 172, 204, 221, 224, 225, |
16162
|
|
|
|
|
|
|
227, 234, 238, 257, 262, 263, 265, 267, |
16163
|
|
|
|
|
|
|
269, 173, 174, 175, 176, 177, 178, 179, |
16164
|
|
|
|
|
|
|
180, 181, 182, 183, 184, 185, 186, 187, |
16165
|
|
|
|
|
|
|
188, 189, 190, 191, 192, 193, 194, 195, |
16166
|
|
|
|
|
|
|
196, 197, 198, 199, 200, 201, 202, 203, |
16167
|
|
|
|
|
|
|
205, 206, 207, 208, 209, 210, 211, 212, |
16168
|
|
|
|
|
|
|
213, 214, 215, 216, 217, 218, 219, 220, |
16169
|
|
|
|
|
|
|
222, 223, 226, 228, 229, 230, 231, 232, |
16170
|
|
|
|
|
|
|
233, 235, 236, 237, 239, 240, 241, 242, |
16171
|
|
|
|
|
|
|
243, 244, 245, 246, 247, 248, 249, 250, |
16172
|
|
|
|
|
|
|
251, 252, 253, 254, 255, 256, 258, 259, |
16173
|
|
|
|
|
|
|
260, 261, 264, 266, 268, 270, 272, 273, |
16174
|
|
|
|
|
|
|
369, 276, 284, 286, 285, 277, 278, 280, |
16175
|
|
|
|
|
|
|
282, 281, 279, 370, 371, 373, 372, 283, |
16176
|
|
|
|
|
|
|
287, 291, 293, 311, 313, 312, 295, 294, |
16177
|
|
|
|
|
|
|
298, 296, 297, 375, 299, 307, 309, 308, |
16178
|
|
|
|
|
|
|
300, 301, 303, 305, 304, 302, 376, 377, |
16179
|
|
|
|
|
|
|
379, 378, 306, 310, 314, 316, 321, 317, |
16180
|
|
|
|
|
|
|
318, 319, 381, 382, 322, 330, 332, 331, |
16181
|
|
|
|
|
|
|
323, 324, 326, 328, 327, 325, 383, 384, |
16182
|
|
|
|
|
|
|
386, 385, 329, 333, 337, 339, 340, 341, |
16183
|
|
|
|
|
|
|
342, 343, 344, 345, 363, 365, 364, 347, |
16184
|
|
|
|
|
|
|
346, 350, 348, 349, 388, 351, 359, 361, |
16185
|
|
|
|
|
|
|
360, 352, 353, 355, 357, 356, 354, 389, |
16186
|
|
|
|
|
|
|
390, 392, 391, 358, 362, 366, 274, 374, |
16187
|
|
|
|
|
|
|
380, 320, 387, 393 |
16188
|
|
|
|
|
|
|
}; |
16189
|
|
|
|
|
|
|
|
16190
|
|
|
|
|
|
|
static const char _url_detector_trans_actions[] = { |
16191
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16192
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16193
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 0, 1, 8, |
16194
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16195
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16196
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16197
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 3, |
16198
|
|
|
|
|
|
|
11, 0, 0, 0, 0, 0, 0, 0, |
16199
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16200
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16201
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16202
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16203
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16204
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16205
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16206
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16207
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16208
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16209
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16210
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16211
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16212
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16213
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16214
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16215
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16216
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16217
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16218
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16219
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16220
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16221
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16222
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16223
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16224
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16225
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16226
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16227
|
|
|
|
|
|
|
1, 0, 0, 0, 0, 0, 0, 0, |
16228
|
|
|
|
|
|
|
0, 0, 0, 1, 1, 1, 1, 0, |
16229
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16230
|
|
|
|
|
|
|
0, 0, 0, 5, 0, 0, 0, 0, |
16231
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 1, 1, |
16232
|
|
|
|
|
|
|
1, 1, 0, 0, 0, 0, 0, 0, |
16233
|
|
|
|
|
|
|
0, 0, 1, 1, 0, 0, 0, 0, |
16234
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 1, 1, |
16235
|
|
|
|
|
|
|
1, 1, 0, 0, 0, 0, 0, 0, |
16236
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
16237
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 0, 0, 0, |
16238
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 1, |
16239
|
|
|
|
|
|
|
1, 1, 1, 0, 0, 0, 0, 1, |
16240
|
|
|
|
|
|
|
1, 0, 1, 1 |
16241
|
|
|
|
|
|
|
}; |
16242
|
|
|
|
|
|
|
|
16243
|
|
|
|
|
|
|
static const int url_detector_start = 1; |
16244
|
|
|
|
|
|
|
|
16245
|
14
|
|
|
|
|
|
url_detector::url_type url_detector::detect(string_piece str, size_t* length) { |
16246
|
|
|
|
|
|
|
int cs; |
16247
|
|
|
|
|
|
|
const char* p = str.str; |
16248
|
|
|
|
|
|
|
|
16249
|
|
|
|
|
|
|
url_type result = NO_URL; |
16250
|
|
|
|
|
|
|
size_t result_length = 0, parens = 0; |
16251
|
|
|
|
|
|
|
|
16252
|
|
|
|
|
|
|
{ |
16253
|
|
|
|
|
|
|
cs = url_detector_start; |
16254
|
|
|
|
|
|
|
} |
16255
|
|
|
|
|
|
|
|
16256
|
|
|
|
|
|
|
{ |
16257
|
|
|
|
|
|
|
int _klen; |
16258
|
|
|
|
|
|
|
unsigned int _trans; |
16259
|
|
|
|
|
|
|
short _widec; |
16260
|
|
|
|
|
|
|
const char *_acts; |
16261
|
|
|
|
|
|
|
unsigned int _nacts; |
16262
|
|
|
|
|
|
|
const short *_keys; |
16263
|
|
|
|
|
|
|
|
16264
|
14
|
50
|
|
|
|
|
if ( p == ( (str.str + str.len)) ) |
16265
|
|
|
|
|
|
|
goto _test_eof; |
16266
|
|
|
|
|
|
|
if ( cs == 0 ) |
16267
|
|
|
|
|
|
|
goto _out; |
16268
|
|
|
|
|
|
|
_resume: |
16269
|
38
|
|
|
|
|
|
_widec = (*p); |
16270
|
38
|
|
|
|
|
|
_klen = _url_detector_cond_lengths[cs]; |
16271
|
38
|
|
|
|
|
|
_keys = _url_detector_cond_keys + (_url_detector_cond_offsets[cs]*2); |
16272
|
38
|
50
|
|
|
|
|
if ( _klen > 0 ) { |
16273
|
|
|
|
|
|
|
const short *_lower = _keys; |
16274
|
|
|
|
|
|
|
const short *_mid; |
16275
|
0
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
16276
|
|
|
|
|
|
|
while (1) { |
16277
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
16278
|
|
|
|
|
|
|
break; |
16279
|
|
|
|
|
|
|
|
16280
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
16281
|
0
|
0
|
|
|
|
|
if ( _widec < _mid[0] ) |
16282
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
16283
|
0
|
0
|
|
|
|
|
else if ( _widec > _mid[1] ) |
16284
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
16285
|
|
|
|
|
|
|
else { |
16286
|
0
|
0
|
|
|
|
|
switch ( _url_detector_cond_spaces[_url_detector_cond_offsets[cs] + ((_mid - _keys)>>1)] ) { |
16287
|
|
|
|
|
|
|
case 0: { |
16288
|
0
|
|
|
|
|
|
_widec = (short)(256u + ((*p) - 0u)); |
16289
|
0
|
0
|
|
|
|
|
if ( |
16290
|
0
|
|
|
|
|
|
parens ) _widec += 256; |
16291
|
|
|
|
|
|
|
break; |
16292
|
|
|
|
|
|
|
} |
16293
|
|
|
|
|
|
|
} |
16294
|
|
|
|
|
|
|
break; |
16295
|
|
|
|
|
|
|
} |
16296
|
|
|
|
|
|
|
} |
16297
|
|
|
|
|
|
|
} |
16298
|
|
|
|
|
|
|
|
16299
|
38
|
|
|
|
|
|
_keys = _url_detector_trans_keys + _url_detector_key_offsets[cs]; |
16300
|
38
|
|
|
|
|
|
_trans = _url_detector_index_offsets[cs]; |
16301
|
|
|
|
|
|
|
|
16302
|
38
|
|
|
|
|
|
_klen = _url_detector_single_lengths[cs]; |
16303
|
38
|
50
|
|
|
|
|
if ( _klen > 0 ) { |
16304
|
|
|
|
|
|
|
const short *_lower = _keys; |
16305
|
|
|
|
|
|
|
const short *_mid; |
16306
|
142
|
|
|
|
|
|
const short *_upper = _keys + _klen - 1; |
16307
|
|
|
|
|
|
|
while (1) { |
16308
|
142
|
100
|
|
|
|
|
if ( _upper < _lower ) |
16309
|
|
|
|
|
|
|
break; |
16310
|
|
|
|
|
|
|
|
16311
|
104
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
16312
|
104
|
100
|
|
|
|
|
if ( _widec < *_mid ) |
16313
|
32
|
|
|
|
|
|
_upper = _mid - 1; |
16314
|
72
|
50
|
|
|
|
|
else if ( _widec > *_mid ) |
16315
|
72
|
|
|
|
|
|
_lower = _mid + 1; |
16316
|
|
|
|
|
|
|
else { |
16317
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
16318
|
0
|
|
|
|
|
|
goto _match; |
16319
|
|
|
|
|
|
|
} |
16320
|
|
|
|
|
|
|
} |
16321
|
38
|
|
|
|
|
|
_keys += _klen; |
16322
|
38
|
|
|
|
|
|
_trans += _klen; |
16323
|
|
|
|
|
|
|
} |
16324
|
|
|
|
|
|
|
|
16325
|
38
|
|
|
|
|
|
_klen = _url_detector_range_lengths[cs]; |
16326
|
38
|
50
|
|
|
|
|
if ( _klen > 0 ) { |
16327
|
|
|
|
|
|
|
const short *_lower = _keys; |
16328
|
|
|
|
|
|
|
const short *_mid; |
16329
|
110
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
16330
|
|
|
|
|
|
|
while (1) { |
16331
|
110
|
100
|
|
|
|
|
if ( _upper < _lower ) |
16332
|
|
|
|
|
|
|
break; |
16333
|
|
|
|
|
|
|
|
16334
|
100
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
16335
|
100
|
100
|
|
|
|
|
if ( _widec < _mid[0] ) |
16336
|
24
|
|
|
|
|
|
_upper = _mid - 2; |
16337
|
76
|
100
|
|
|
|
|
else if ( _widec > _mid[1] ) |
16338
|
48
|
|
|
|
|
|
_lower = _mid + 2; |
16339
|
|
|
|
|
|
|
else { |
16340
|
28
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
16341
|
28
|
|
|
|
|
|
goto _match; |
16342
|
|
|
|
|
|
|
} |
16343
|
|
|
|
|
|
|
} |
16344
|
10
|
|
|
|
|
|
_trans += _klen; |
16345
|
|
|
|
|
|
|
} |
16346
|
|
|
|
|
|
|
|
16347
|
|
|
|
|
|
|
_match: |
16348
|
38
|
|
|
|
|
|
_trans = _url_detector_indicies[_trans]; |
16349
|
38
|
|
|
|
|
|
cs = _url_detector_trans_targs[_trans]; |
16350
|
|
|
|
|
|
|
|
16351
|
38
|
50
|
|
|
|
|
if ( _url_detector_trans_actions[_trans] == 0 ) |
16352
|
|
|
|
|
|
|
goto _again; |
16353
|
|
|
|
|
|
|
|
16354
|
0
|
|
|
|
|
|
_acts = _url_detector_actions + _url_detector_trans_actions[_trans]; |
16355
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
16356
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
16357
|
|
|
|
|
|
|
{ |
16358
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
16359
|
|
|
|
|
|
|
{ |
16360
|
|
|
|
|
|
|
case 0: |
16361
|
0
|
|
|
|
|
|
{ result = URL; result_length = p - str.str + 1; } |
16362
|
0
|
|
|
|
|
|
break; |
16363
|
|
|
|
|
|
|
case 1: |
16364
|
0
|
|
|
|
|
|
{ result = EMAIL; result_length = p - str.str + 1; } |
16365
|
0
|
|
|
|
|
|
break; |
16366
|
|
|
|
|
|
|
case 2: |
16367
|
0
|
|
|
|
|
|
{parens++;} |
16368
|
0
|
|
|
|
|
|
break; |
16369
|
|
|
|
|
|
|
case 3: |
16370
|
0
|
|
|
|
|
|
{parens-=!!parens;} |
16371
|
0
|
|
|
|
|
|
break; |
16372
|
|
|
|
|
|
|
} |
16373
|
|
|
|
|
|
|
} |
16374
|
|
|
|
|
|
|
|
16375
|
|
|
|
|
|
|
_again: |
16376
|
38
|
100
|
|
|
|
|
if ( cs == 0 ) |
16377
|
|
|
|
|
|
|
goto _out; |
16378
|
28
|
100
|
|
|
|
|
if ( ++p != ( (str.str + str.len)) ) |
16379
|
|
|
|
|
|
|
goto _resume; |
16380
|
|
|
|
|
|
|
_test_eof: {} |
16381
|
|
|
|
|
|
|
_out: {} |
16382
|
|
|
|
|
|
|
} |
16383
|
|
|
|
|
|
|
|
16384
|
|
|
|
|
|
|
// if (result == URL) { |
16385
|
|
|
|
|
|
|
// // Ignore the last character if it is one of [.!',;?:)], |
16386
|
|
|
|
|
|
|
// // and the ) only if it is unmatched. |
16387
|
|
|
|
|
|
|
// size_t unmatched_parens = 0; |
16388
|
|
|
|
|
|
|
// bool unmatched_parens_computed = false; |
16389
|
|
|
|
|
|
|
// do { |
16390
|
|
|
|
|
|
|
// if (str.str[result_length-1] == ')' && !unmatched_parens_computed) { |
16391
|
|
|
|
|
|
|
// for (size_t i = |
16392
|
|
|
|
|
|
|
// |
16393
|
|
|
|
|
|
|
// |
16394
|
|
|
|
|
|
|
// unmatched_parens_computed = true; |
16395
|
|
|
|
|
|
|
// } |
16396
|
|
|
|
|
|
|
// } while (); |
16397
|
|
|
|
|
|
|
// while (result_length) { |
16398
|
|
|
|
|
|
|
// |
16399
|
|
|
|
|
|
|
// |
16400
|
|
|
|
|
|
|
// } |
16401
|
|
|
|
|
|
|
// |
16402
|
|
|
|
|
|
|
// if (str.str[result_length-1] == '.' || |
16403
|
|
|
|
|
|
|
// |
16404
|
|
|
|
|
|
|
// // Also ignore ) is ignored only if there is not a matching left one. |
16405
|
|
|
|
|
|
|
// } |
16406
|
|
|
|
|
|
|
// |
16407
|
14
|
50
|
|
|
|
|
if (length) *length = result_length; |
16408
|
14
|
50
|
|
|
|
|
return length || result_length == str.len ? result : NO_URL; |
|
|
50
|
|
|
|
|
|
16409
|
|
|
|
|
|
|
} |
16410
|
|
|
|
|
|
|
|
16411
|
|
|
|
|
|
|
} // namespace utils |
16412
|
|
|
|
|
|
|
|
16413
|
|
|
|
|
|
|
///////// |
16414
|
|
|
|
|
|
|
// File: version/version.h |
16415
|
|
|
|
|
|
|
///////// |
16416
|
|
|
|
|
|
|
|
16417
|
|
|
|
|
|
|
// This file is part of NameTag . |
16418
|
|
|
|
|
|
|
// |
16419
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
16420
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
16421
|
|
|
|
|
|
|
// |
16422
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
16423
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
16424
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
16425
|
|
|
|
|
|
|
|
16426
|
0
|
|
|
|
|
|
class version { |
16427
|
|
|
|
|
|
|
public: |
16428
|
|
|
|
|
|
|
unsigned major; |
16429
|
|
|
|
|
|
|
unsigned minor; |
16430
|
|
|
|
|
|
|
unsigned patch; |
16431
|
|
|
|
|
|
|
string prerelease; |
16432
|
|
|
|
|
|
|
|
16433
|
|
|
|
|
|
|
// Returns current NameTag version. |
16434
|
|
|
|
|
|
|
static version current(); |
16435
|
|
|
|
|
|
|
|
16436
|
|
|
|
|
|
|
// Returns multi-line formated version and copyright string. |
16437
|
|
|
|
|
|
|
static string version_and_copyright(const string& other_libraries = string()); |
16438
|
|
|
|
|
|
|
}; |
16439
|
|
|
|
|
|
|
|
16440
|
|
|
|
|
|
|
///////// |
16441
|
|
|
|
|
|
|
// File: version/version.cpp |
16442
|
|
|
|
|
|
|
///////// |
16443
|
|
|
|
|
|
|
|
16444
|
|
|
|
|
|
|
// This file is part of NameTag . |
16445
|
|
|
|
|
|
|
// |
16446
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
16447
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
16448
|
|
|
|
|
|
|
// |
16449
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
16450
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
16451
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
16452
|
|
|
|
|
|
|
|
16453
|
0
|
|
|
|
|
|
version version::current() { |
16454
|
0
|
0
|
|
|
|
|
return {1, 2, 0, ""}; |
16455
|
|
|
|
|
|
|
} |
16456
|
|
|
|
|
|
|
|
16457
|
|
|
|
|
|
|
// Returns multi-line formated version and copyright string. |
16458
|
0
|
|
|
|
|
|
string version::version_and_copyright(const string& other_libraries) { |
16459
|
0
|
|
|
|
|
|
ostringstream info; |
16460
|
|
|
|
|
|
|
|
16461
|
|
|
|
|
|
|
auto nametag = version::current(); |
16462
|
|
|
|
|
|
|
auto unilib = unilib::version::current(); |
16463
|
|
|
|
|
|
|
auto morphodita = morphodita::version::current(); |
16464
|
|
|
|
|
|
|
|
16465
|
0
|
|
|
|
|
|
info << "NameTag version " << nametag.major << '.' << nametag.minor << '.' << nametag.patch |
16466
|
0
|
0
|
|
|
|
|
<< (nametag.prerelease.empty() ? "" : "-") << nametag.prerelease |
|
|
0
|
|
|
|
|
|
16467
|
0
|
|
|
|
|
|
<< " (using UniLib " << unilib.major << '.' << unilib.minor << '.' << unilib.patch |
16468
|
0
|
0
|
|
|
|
|
<< (unilib.prerelease.empty() ? "" : "-") << unilib.prerelease |
|
|
0
|
|
|
|
|
|
16469
|
0
|
|
|
|
|
|
<< ", MorphoDiTa " << morphodita.major << '.' << morphodita.minor << '.' << unilib.patch |
16470
|
0
|
0
|
|
|
|
|
<< (morphodita.prerelease.empty() ? "" : "-") << morphodita.prerelease |
|
|
0
|
|
|
|
|
|
16471
|
0
|
0
|
|
|
|
|
<< (other_libraries.empty() ? "" : "\nand ") << other_libraries << ")\n" |
|
|
0
|
|
|
|
|
|
16472
|
|
|
|
|
|
|
"Copyright 2016 by Institute of Formal and Applied Linguistics, Faculty of\n" |
16473
|
0
|
0
|
|
|
|
|
"Mathematics and Physics, Charles University in Prague, Czech Republic."; |
16474
|
|
|
|
|
|
|
|
16475
|
0
|
|
|
|
|
|
return info.str(); |
16476
|
|
|
|
|
|
|
} |
16477
|
|
|
|
|
|
|
|
16478
|
|
|
|
|
|
|
} // namespace nametag |
16479
|
12
|
50
|
|
|
|
|
} // namespace ufal |
|
|
50
|
|
|
|
|
|