| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
2
|
|
|
|
|
|
|
// |
|
3
|
|
|
|
|
|
|
// This file is a bundle of all sources and headers of NameTag library. |
|
4
|
|
|
|
|
|
|
// Comments and copyrights of all individual files are kept. |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
#include |
|
7
|
|
|
|
|
|
|
#include |
|
8
|
|
|
|
|
|
|
#include |
|
9
|
|
|
|
|
|
|
#include |
|
10
|
|
|
|
|
|
|
#include |
|
11
|
|
|
|
|
|
|
#include |
|
12
|
|
|
|
|
|
|
#include |
|
13
|
|
|
|
|
|
|
#include |
|
14
|
|
|
|
|
|
|
#include |
|
15
|
|
|
|
|
|
|
#include |
|
16
|
|
|
|
|
|
|
#include |
|
17
|
|
|
|
|
|
|
#include |
|
18
|
|
|
|
|
|
|
#include |
|
19
|
|
|
|
|
|
|
#include |
|
20
|
|
|
|
|
|
|
#include |
|
21
|
|
|
|
|
|
|
#include |
|
22
|
|
|
|
|
|
|
#include |
|
23
|
|
|
|
|
|
|
#include |
|
24
|
|
|
|
|
|
|
#include |
|
25
|
|
|
|
|
|
|
#include |
|
26
|
|
|
|
|
|
|
#include |
|
27
|
|
|
|
|
|
|
#include |
|
28
|
|
|
|
|
|
|
#include |
|
29
|
|
|
|
|
|
|
#include |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
namespace ufal { |
|
32
|
|
|
|
|
|
|
namespace nametag { |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
///////// |
|
35
|
|
|
|
|
|
|
// File: utils/common.h |
|
36
|
|
|
|
|
|
|
///////// |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
|
39
|
|
|
|
|
|
|
// |
|
40
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
41
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
42
|
|
|
|
|
|
|
// |
|
43
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
44
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
45
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
// Headers available in all sources |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
namespace utils { |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
using namespace std; |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
// Assert that int is at least 4B |
|
54
|
|
|
|
|
|
|
static_assert(sizeof(int) >= sizeof(int32_t), "Int must be at least 4B wide!"); |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
// Assert that we are on a little endian system |
|
57
|
|
|
|
|
|
|
#ifdef __BYTE_ORDER__ |
|
58
|
|
|
|
|
|
|
static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, "Only little endian systems are supported!"); |
|
59
|
|
|
|
|
|
|
#endif |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
#define runtime_failure(message) exit((cerr << message << endl, 1)) |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
} // namespace utils |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
///////// |
|
66
|
|
|
|
|
|
|
// File: common.h |
|
67
|
|
|
|
|
|
|
///////// |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
70
|
|
|
|
|
|
|
// |
|
71
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
72
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
73
|
|
|
|
|
|
|
// |
|
74
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
75
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
76
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
using namespace utils; |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
///////// |
|
81
|
|
|
|
|
|
|
// File: classifier/classifier_outcome.h |
|
82
|
|
|
|
|
|
|
///////// |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
85
|
|
|
|
|
|
|
// |
|
86
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
87
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
88
|
|
|
|
|
|
|
// |
|
89
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
90
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
91
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
typedef uint32_t classifier_outcome; |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
///////// |
|
96
|
|
|
|
|
|
|
// File: bilou/entity_type.h |
|
97
|
|
|
|
|
|
|
///////// |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
100
|
|
|
|
|
|
|
// |
|
101
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
102
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
103
|
|
|
|
|
|
|
// |
|
104
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
105
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
106
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
typedef classifier_outcome entity_type; |
|
109
|
|
|
|
|
|
|
enum :entity_type { entity_type_unknown = ~0U }; |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
///////// |
|
112
|
|
|
|
|
|
|
// File: bilou/bilou_type.h |
|
113
|
|
|
|
|
|
|
///////// |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
116
|
|
|
|
|
|
|
// |
|
117
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
118
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
119
|
|
|
|
|
|
|
// |
|
120
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
121
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
122
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
typedef unsigned bilou_type; |
|
125
|
|
|
|
|
|
|
enum :bilou_type { bilou_type_B, bilou_type_I, bilou_type_L, bilou_type_O, bilou_type_U, bilou_type_total, bilou_type_unknown = ~0U }; |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
///////// |
|
128
|
|
|
|
|
|
|
// File: bilou/bilou_probabilities.h |
|
129
|
|
|
|
|
|
|
///////// |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
132
|
|
|
|
|
|
|
// |
|
133
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
134
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
135
|
|
|
|
|
|
|
// |
|
136
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
137
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
138
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
class bilou_probabilities { |
|
141
|
|
|
|
|
|
|
public: |
|
142
|
|
|
|
|
|
|
struct probability_info { |
|
143
|
|
|
|
|
|
|
double probability; |
|
144
|
|
|
|
|
|
|
entity_type entity; |
|
145
|
|
|
|
|
|
|
}; |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
array bilou; |
|
148
|
|
|
|
|
|
|
}; |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
class bilou_probabilities_global : public bilou_probabilities { |
|
151
|
|
|
|
|
|
|
public: |
|
152
|
|
|
|
|
|
|
bilou_type best; |
|
153
|
|
|
|
|
|
|
array previous; |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
void init(const bilou_probabilities& local); |
|
156
|
|
|
|
|
|
|
void update(const bilou_probabilities& local, const bilou_probabilities_global& prev); |
|
157
|
|
|
|
|
|
|
}; |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
///////// |
|
160
|
|
|
|
|
|
|
// File: bilou/bilou_probabilities.cpp |
|
161
|
|
|
|
|
|
|
///////// |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
164
|
|
|
|
|
|
|
// |
|
165
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
166
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
167
|
|
|
|
|
|
|
// |
|
168
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
169
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
170
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
171
|
|
|
|
|
|
|
|
|
172
|
4
|
|
|
|
|
|
void bilou_probabilities_global::init(const bilou_probabilities& local) { |
|
173
|
4
|
|
|
|
|
|
bilou = local.bilou; |
|
174
|
|
|
|
|
|
|
previous.fill(bilou_type_unknown); |
|
175
|
|
|
|
|
|
|
|
|
176
|
4
|
|
|
|
|
|
best = bilou_type_B; |
|
177
|
4
|
|
|
|
|
|
bilou[bilou_type_I].probability = 0; |
|
178
|
4
|
|
|
|
|
|
bilou[bilou_type_L].probability = 0; |
|
179
|
4
|
50
|
|
|
|
|
if (bilou[bilou_type_O].probability > bilou[best].probability) best = bilou_type_O; |
|
180
|
4
|
100
|
|
|
|
|
if (bilou[bilou_type_U].probability > bilou[best].probability) best = bilou_type_U; |
|
181
|
4
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
|
|
183
|
10
|
|
|
|
|
|
void bilou_probabilities_global::update(const bilou_probabilities& local, const bilou_probabilities_global& prev) { |
|
184
|
|
|
|
|
|
|
// Find the best of previous LOU |
|
185
|
|
|
|
|
|
|
bilou_type best_LOU = bilou_type_L; |
|
186
|
10
|
|
|
|
|
|
double best_LOU_prob = prev.bilou[bilou_type_L].probability; |
|
187
|
10
|
50
|
|
|
|
|
if (prev.bilou[bilou_type_O].probability > best_LOU_prob) { |
|
188
|
|
|
|
|
|
|
best_LOU = bilou_type_O; |
|
189
|
|
|
|
|
|
|
best_LOU_prob = prev.bilou[bilou_type_O].probability; |
|
190
|
|
|
|
|
|
|
} |
|
191
|
10
|
100
|
|
|
|
|
if (prev.bilou[bilou_type_U].probability > best_LOU_prob) { |
|
192
|
|
|
|
|
|
|
best_LOU = bilou_type_U; |
|
193
|
|
|
|
|
|
|
best_LOU_prob = prev.bilou[bilou_type_U].probability; |
|
194
|
|
|
|
|
|
|
} |
|
195
|
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
// Find the best of previous BI |
|
197
|
|
|
|
|
|
|
bilou_type best_BI = bilou_type_B; |
|
198
|
10
|
|
|
|
|
|
double best_BI_prob = prev.bilou[bilou_type_B].probability; |
|
199
|
10
|
50
|
|
|
|
|
if (prev.bilou[bilou_type_I].probability > best_BI_prob) { |
|
200
|
|
|
|
|
|
|
best_BI = bilou_type_I; |
|
201
|
|
|
|
|
|
|
best_BI_prob = prev.bilou[bilou_type_I].probability; |
|
202
|
|
|
|
|
|
|
} |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
// Normalize the best_*_prob probabilities |
|
205
|
10
|
50
|
|
|
|
|
if (best_BI_prob > best_LOU_prob) { |
|
206
|
0
|
|
|
|
|
|
best_LOU_prob /= best_BI_prob; |
|
207
|
|
|
|
|
|
|
best_BI_prob = 1; |
|
208
|
|
|
|
|
|
|
} else { |
|
209
|
10
|
|
|
|
|
|
best_BI_prob /= best_LOU_prob; |
|
210
|
|
|
|
|
|
|
best_LOU_prob = 1; |
|
211
|
|
|
|
|
|
|
} |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
// Store update probabilites |
|
214
|
10
|
|
|
|
|
|
bilou[bilou_type_B].probability = best_LOU_prob * local.bilou[bilou_type_B].probability; |
|
215
|
10
|
|
|
|
|
|
bilou[bilou_type_B].entity = local.bilou[bilou_type_B].entity; |
|
216
|
10
|
|
|
|
|
|
previous[bilou_type_B] = best_LOU; |
|
217
|
10
|
|
|
|
|
|
best = bilou_type_B; |
|
218
|
|
|
|
|
|
|
|
|
219
|
10
|
|
|
|
|
|
bilou[bilou_type_I].probability = best_BI_prob * local.bilou[bilou_type_I].probability; |
|
220
|
10
|
|
|
|
|
|
bilou[bilou_type_I].entity = prev.bilou[best_BI].entity; |
|
221
|
10
|
|
|
|
|
|
previous[bilou_type_I] = best_BI; |
|
222
|
10
|
50
|
|
|
|
|
if (bilou[bilou_type_I].probability > bilou[best].probability) best = bilou_type_I; |
|
223
|
|
|
|
|
|
|
|
|
224
|
10
|
|
|
|
|
|
bilou[bilou_type_L].probability = best_BI_prob * local.bilou[bilou_type_L].probability; |
|
225
|
10
|
|
|
|
|
|
bilou[bilou_type_L].entity = prev.bilou[best_BI].entity; |
|
226
|
10
|
|
|
|
|
|
previous[bilou_type_L] = best_BI; |
|
227
|
10
|
50
|
|
|
|
|
if (bilou[bilou_type_L].probability > bilou[best].probability) best = bilou_type_L; |
|
228
|
|
|
|
|
|
|
|
|
229
|
10
|
|
|
|
|
|
bilou[bilou_type_O].probability = best_LOU_prob * local.bilou[bilou_type_O].probability; |
|
230
|
10
|
|
|
|
|
|
bilou[bilou_type_O].entity = local.bilou[bilou_type_O].entity; |
|
231
|
10
|
|
|
|
|
|
previous[bilou_type_O] = best_LOU; |
|
232
|
10
|
50
|
|
|
|
|
if (bilou[bilou_type_O].probability > bilou[best].probability) best = bilou_type_O; |
|
233
|
|
|
|
|
|
|
|
|
234
|
10
|
|
|
|
|
|
bilou[bilou_type_U].probability = best_LOU_prob * local.bilou[bilou_type_U].probability; |
|
235
|
10
|
|
|
|
|
|
bilou[bilou_type_U].entity = local.bilou[bilou_type_U].entity; |
|
236
|
10
|
|
|
|
|
|
previous[bilou_type_U] = best_LOU; |
|
237
|
10
|
100
|
|
|
|
|
if (bilou[bilou_type_U].probability > bilou[best].probability) best = bilou_type_U; |
|
238
|
10
|
|
|
|
|
|
} |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
///////// |
|
241
|
|
|
|
|
|
|
// File: classifier/classifier_feature.h |
|
242
|
|
|
|
|
|
|
///////// |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
245
|
|
|
|
|
|
|
// |
|
246
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
247
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
248
|
|
|
|
|
|
|
// |
|
249
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
250
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
251
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
typedef uint32_t classifier_feature; |
|
254
|
|
|
|
|
|
|
typedef vector classifier_features; |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
///////// |
|
257
|
|
|
|
|
|
|
// File: features/ner_feature.h |
|
258
|
|
|
|
|
|
|
///////// |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
261
|
|
|
|
|
|
|
// |
|
262
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
263
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
264
|
|
|
|
|
|
|
// |
|
265
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
266
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
267
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
typedef classifier_feature ner_feature; |
|
270
|
|
|
|
|
|
|
enum :ner_feature { ner_feature_unknown = ~0U }; |
|
271
|
|
|
|
|
|
|
typedef classifier_features ner_features; |
|
272
|
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
///////// |
|
274
|
|
|
|
|
|
|
// File: bilou/ner_word.h |
|
275
|
|
|
|
|
|
|
///////// |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
278
|
|
|
|
|
|
|
// |
|
279
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
280
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
281
|
|
|
|
|
|
|
// |
|
282
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
283
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
284
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
285
|
|
|
|
|
|
|
|
|
286
|
9
|
|
|
|
|
|
struct ner_word { |
|
287
|
|
|
|
|
|
|
string form; |
|
288
|
|
|
|
|
|
|
string raw_lemma; |
|
289
|
|
|
|
|
|
|
vector raw_lemmas_all; |
|
290
|
|
|
|
|
|
|
string lemma_id; |
|
291
|
|
|
|
|
|
|
string lemma_comments; |
|
292
|
|
|
|
|
|
|
string tag; |
|
293
|
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
ner_word() {} |
|
295
|
|
|
|
|
|
|
ner_word(const string& form) : form(form) {} |
|
296
|
|
|
|
|
|
|
}; |
|
297
|
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
///////// |
|
299
|
|
|
|
|
|
|
// File: bilou/ner_sentence.h |
|
300
|
|
|
|
|
|
|
///////// |
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
303
|
|
|
|
|
|
|
// |
|
304
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
305
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
306
|
|
|
|
|
|
|
// |
|
307
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
308
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
309
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
310
|
|
|
|
|
|
|
|
|
311
|
0
|
|
|
|
|
|
struct ner_sentence { |
|
312
|
|
|
|
|
|
|
unsigned size = 0; |
|
313
|
|
|
|
|
|
|
vector words; |
|
314
|
|
|
|
|
|
|
vector features; |
|
315
|
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
struct probability_info { |
|
317
|
|
|
|
|
|
|
bilou_probabilities local; |
|
318
|
|
|
|
|
|
|
bool local_filled; |
|
319
|
|
|
|
|
|
|
bilou_probabilities_global global; |
|
320
|
|
|
|
|
|
|
}; |
|
321
|
|
|
|
|
|
|
vector probabilities; |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
struct previous_stage_info { |
|
324
|
|
|
|
|
|
|
bilou_type bilou; |
|
325
|
|
|
|
|
|
|
entity_type entity; |
|
326
|
|
|
|
|
|
|
}; |
|
327
|
|
|
|
|
|
|
vector previous_stage; |
|
328
|
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
void resize(unsigned size); |
|
330
|
|
|
|
|
|
|
void clear_features(); |
|
331
|
|
|
|
|
|
|
void clear_probabilities_local_filled(); |
|
332
|
|
|
|
|
|
|
void clear_previous_stage(); |
|
333
|
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
void compute_best_decoding(); |
|
335
|
|
|
|
|
|
|
void fill_previous_stage(); |
|
336
|
|
|
|
|
|
|
}; |
|
337
|
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
///////// |
|
339
|
|
|
|
|
|
|
// File: bilou/ner_sentence.cpp |
|
340
|
|
|
|
|
|
|
///////// |
|
341
|
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
343
|
|
|
|
|
|
|
// |
|
344
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
345
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
346
|
|
|
|
|
|
|
// |
|
347
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
348
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
349
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
350
|
|
|
|
|
|
|
|
|
351
|
4
|
|
|
|
|
|
void ner_sentence::resize(unsigned size) { |
|
352
|
4
|
|
|
|
|
|
this->size = size; |
|
353
|
4
|
100
|
|
|
|
|
if (words.size() < size) words.resize(size); |
|
354
|
4
|
100
|
|
|
|
|
if (features.size() < size) features.resize(size); |
|
355
|
4
|
100
|
|
|
|
|
if (probabilities.size() < size) probabilities.resize(size); |
|
356
|
4
|
100
|
|
|
|
|
if (previous_stage.size() < size) previous_stage.resize(size); |
|
357
|
4
|
|
|
|
|
|
} |
|
358
|
|
|
|
|
|
|
|
|
359
|
0
|
|
|
|
|
|
void ner_sentence::clear_features() { |
|
360
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < size; i++) |
|
|
|
0
|
|
|
|
|
|
|
361
|
14
|
|
|
|
|
|
features[i].clear(); |
|
362
|
0
|
|
|
|
|
|
} |
|
363
|
|
|
|
|
|
|
|
|
364
|
0
|
|
|
|
|
|
void ner_sentence::clear_probabilities_local_filled() { |
|
365
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < size; i++) |
|
|
|
0
|
|
|
|
|
|
|
366
|
28
|
|
|
|
|
|
probabilities[i].local_filled = false; |
|
367
|
0
|
|
|
|
|
|
} |
|
368
|
|
|
|
|
|
|
|
|
369
|
0
|
|
|
|
|
|
void ner_sentence::clear_previous_stage() { |
|
370
|
9
|
100
|
|
|
|
|
for (unsigned i = 0; i < size; i++) { |
|
|
|
0
|
|
|
|
|
|
|
371
|
14
|
|
|
|
|
|
previous_stage[i].bilou = bilou_type_unknown; |
|
372
|
7
|
|
|
|
|
|
previous_stage[i].entity = entity_type_unknown; |
|
373
|
|
|
|
|
|
|
} |
|
374
|
0
|
|
|
|
|
|
} |
|
375
|
|
|
|
|
|
|
|
|
376
|
4
|
|
|
|
|
|
void ner_sentence::compute_best_decoding() { |
|
377
|
|
|
|
|
|
|
// Find best L O U bilou_type for the last word |
|
378
|
|
|
|
|
|
|
bilou_type best = bilou_type_L; |
|
379
|
4
|
|
|
|
|
|
auto& last_bilou = probabilities[size - 1].global.bilou; |
|
380
|
4
|
50
|
|
|
|
|
if (last_bilou[bilou_type_O].probability > last_bilou[best].probability) best = bilou_type_O; |
|
381
|
4
|
50
|
|
|
|
|
if (last_bilou[bilou_type_U].probability > last_bilou[best].probability) best = bilou_type_U; |
|
382
|
4
|
|
|
|
|
|
probabilities[size - 1].global.best = best; |
|
383
|
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
// Store the best bilou_type for all preceeding words |
|
385
|
14
|
100
|
|
|
|
|
for (unsigned i = size - 1; i; i--) { |
|
386
|
20
|
|
|
|
|
|
best = probabilities[i].global.previous[best]; |
|
387
|
20
|
|
|
|
|
|
probabilities[i - 1].global.best = best; |
|
388
|
|
|
|
|
|
|
} |
|
389
|
4
|
|
|
|
|
|
} |
|
390
|
|
|
|
|
|
|
|
|
391
|
8
|
|
|
|
|
|
void ner_sentence::fill_previous_stage() { |
|
392
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < size; i++) { |
|
393
|
28
|
|
|
|
|
|
previous_stage[i].bilou = probabilities[i].global.best; |
|
394
|
14
|
|
|
|
|
|
previous_stage[i].entity = probabilities[i].global.bilou[probabilities[i].global.best].entity; |
|
395
|
|
|
|
|
|
|
} |
|
396
|
4
|
|
|
|
|
|
} |
|
397
|
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
///////// |
|
399
|
|
|
|
|
|
|
// File: classifier/classifier_instance.h |
|
400
|
|
|
|
|
|
|
///////// |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
403
|
|
|
|
|
|
|
// |
|
404
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
405
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
406
|
|
|
|
|
|
|
// |
|
407
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
408
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
409
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
410
|
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
class classifier_instance { |
|
412
|
|
|
|
|
|
|
public: |
|
413
|
|
|
|
|
|
|
classifier_features features; |
|
414
|
|
|
|
|
|
|
classifier_outcome outcome; |
|
415
|
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
classifier_instance(const classifier_features& features, const classifier_outcome& outcome) : features(features), outcome(outcome) {} |
|
417
|
|
|
|
|
|
|
}; |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
///////// |
|
420
|
|
|
|
|
|
|
// File: classifier/network_parameters.h |
|
421
|
|
|
|
|
|
|
///////// |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
424
|
|
|
|
|
|
|
// |
|
425
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
426
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
427
|
|
|
|
|
|
|
// |
|
428
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
429
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
430
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
struct network_parameters { |
|
433
|
|
|
|
|
|
|
int iterations; |
|
434
|
|
|
|
|
|
|
double missing_weight; |
|
435
|
|
|
|
|
|
|
double initial_learning_rate; |
|
436
|
|
|
|
|
|
|
double final_learning_rate; |
|
437
|
|
|
|
|
|
|
double gaussian_sigma; |
|
438
|
|
|
|
|
|
|
int hidden_layer; // Experimental use only. |
|
439
|
|
|
|
|
|
|
}; |
|
440
|
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
///////// |
|
442
|
|
|
|
|
|
|
// File: utils/binary_decoder.h |
|
443
|
|
|
|
|
|
|
///////// |
|
444
|
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
|
446
|
|
|
|
|
|
|
// |
|
447
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
448
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
449
|
|
|
|
|
|
|
// |
|
450
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
451
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
452
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
namespace utils { |
|
455
|
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
// |
|
457
|
|
|
|
|
|
|
// Declarations |
|
458
|
|
|
|
|
|
|
// |
|
459
|
|
|
|
|
|
|
|
|
460
|
0
|
|
|
|
|
|
class binary_decoder_error : public runtime_error { |
|
461
|
|
|
|
|
|
|
public: |
|
462
|
0
|
0
|
|
|
|
|
explicit binary_decoder_error(const char* description) : runtime_error(description) {} |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
}; |
|
464
|
|
|
|
|
|
|
|
|
465
|
7
|
0
|
|
|
|
|
class binary_decoder { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
public: |
|
467
|
|
|
|
|
|
|
inline unsigned char* fill(unsigned len); |
|
468
|
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
inline unsigned next_1B(); |
|
470
|
|
|
|
|
|
|
inline unsigned next_2B(); |
|
471
|
|
|
|
|
|
|
inline unsigned next_4B(); |
|
472
|
|
|
|
|
|
|
inline void next_str(string& str); |
|
473
|
|
|
|
|
|
|
template inline const T* next(unsigned elements); |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
inline bool is_end(); |
|
476
|
|
|
|
|
|
|
inline unsigned tell(); |
|
477
|
|
|
|
|
|
|
inline void seek(unsigned pos); |
|
478
|
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
private: |
|
480
|
|
|
|
|
|
|
vector buffer; |
|
481
|
|
|
|
|
|
|
const unsigned char* data; |
|
482
|
|
|
|
|
|
|
const unsigned char* data_end; |
|
483
|
|
|
|
|
|
|
}; |
|
484
|
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
// |
|
486
|
|
|
|
|
|
|
// Definitions |
|
487
|
|
|
|
|
|
|
// |
|
488
|
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
unsigned char* binary_decoder::fill(unsigned len) { |
|
490
|
7
|
50
|
|
|
|
|
buffer.resize(len); |
|
491
|
7
|
|
|
|
|
|
data = buffer.data(); |
|
492
|
7
|
|
|
|
|
|
data_end = buffer.data() + len; |
|
493
|
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
return buffer.data(); |
|
495
|
|
|
|
|
|
|
} |
|
496
|
|
|
|
|
|
|
|
|
497
|
626
|
|
|
|
|
|
unsigned binary_decoder::next_1B() { |
|
498
|
313
|
50
|
|
|
|
|
if (data + 1 > data_end) throw binary_decoder_error("No more data in binary_decoder"); |
|
499
|
313
|
|
|
|
|
|
return *data++; |
|
500
|
|
|
|
|
|
|
} |
|
501
|
|
|
|
|
|
|
|
|
502
|
679
|
|
|
|
|
|
unsigned binary_decoder::next_2B() { |
|
503
|
679
|
50
|
|
|
|
|
if (data + sizeof(uint16_t) > data_end) throw binary_decoder_error("No more data in binary_decoder"); |
|
504
|
|
|
|
|
|
|
uint16_t result; |
|
505
|
|
|
|
|
|
|
memcpy(&result, data, sizeof(uint16_t)); |
|
506
|
679
|
|
|
|
|
|
data += sizeof(uint16_t); |
|
507
|
679
|
|
|
|
|
|
return result; |
|
508
|
|
|
|
|
|
|
} |
|
509
|
|
|
|
|
|
|
|
|
510
|
516
|
|
|
|
|
|
unsigned binary_decoder::next_4B() { |
|
511
|
516
|
50
|
|
|
|
|
if (data + sizeof(uint32_t) > data_end) throw binary_decoder_error("No more data in binary_decoder"); |
|
512
|
|
|
|
|
|
|
uint32_t result; |
|
513
|
|
|
|
|
|
|
memcpy(&result, data, sizeof(uint32_t)); |
|
514
|
516
|
|
|
|
|
|
data += sizeof(uint32_t); |
|
515
|
516
|
|
|
|
|
|
return result; |
|
516
|
|
|
|
|
|
|
} |
|
517
|
|
|
|
|
|
|
|
|
518
|
46
|
|
|
|
|
|
void binary_decoder::next_str(string& str) { |
|
519
|
46
|
|
|
|
|
|
unsigned len = next_1B(); |
|
520
|
46
|
50
|
|
|
|
|
if (len == 255) len = next_4B(); |
|
521
|
46
|
|
|
|
|
|
str.assign(next(len), len); |
|
522
|
46
|
|
|
|
|
|
} |
|
523
|
|
|
|
|
|
|
|
|
524
|
978
|
|
|
|
|
|
template const T* binary_decoder::next(unsigned elements) { |
|
525
|
489
|
50
|
|
|
|
|
if (data + sizeof(T) * elements > data_end) throw binary_decoder_error("No more data in binary_decoder"); |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
const T* result = (const T*) data; |
|
527
|
489
|
|
|
|
|
|
data += sizeof(T) * elements; |
|
528
|
489
|
|
|
|
|
|
return result; |
|
529
|
|
|
|
|
|
|
} |
|
530
|
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
bool binary_decoder::is_end() { |
|
532
|
7
|
|
|
|
|
|
return data >= data_end; |
|
533
|
|
|
|
|
|
|
} |
|
534
|
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
unsigned binary_decoder::tell() { |
|
536
|
1
|
|
|
|
|
|
return data - buffer.data(); |
|
537
|
|
|
|
|
|
|
} |
|
538
|
|
|
|
|
|
|
|
|
539
|
1
|
|
|
|
|
|
void binary_decoder::seek(unsigned pos) { |
|
540
|
1
|
50
|
|
|
|
|
if (pos > buffer.size()) throw binary_decoder_error("Cannot seek past end of binary_decoder"); |
|
541
|
1
|
|
|
|
|
|
data = buffer.data() + pos; |
|
542
|
1
|
|
|
|
|
|
} |
|
543
|
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
} // namespace utils |
|
545
|
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
///////// |
|
547
|
|
|
|
|
|
|
// File: utils/string_piece.h |
|
548
|
|
|
|
|
|
|
///////// |
|
549
|
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
|
551
|
|
|
|
|
|
|
// |
|
552
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
553
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
554
|
|
|
|
|
|
|
// |
|
555
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
556
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
557
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
558
|
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
namespace utils { |
|
560
|
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
struct string_piece { |
|
562
|
|
|
|
|
|
|
const char* str; |
|
563
|
|
|
|
|
|
|
size_t len; |
|
564
|
|
|
|
|
|
|
|
|
565
|
4
|
|
|
|
|
|
string_piece() : str(nullptr), len(0) {} |
|
566
|
0
|
|
|
|
|
|
string_piece(const char* str) : str(str), len(strlen(str)) {} |
|
567
|
33
|
|
|
|
|
|
string_piece(const char* str, size_t len) : str(str), len(len) {} |
|
568
|
43
|
|
|
|
|
|
string_piece(const string& str) : str(str.c_str()), len(str.size()) {} |
|
569
|
|
|
|
|
|
|
}; |
|
570
|
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
inline ostream& operator<<(ostream& os, const string_piece& str) { |
|
572
|
0
|
|
|
|
|
|
return os.write(str.str, str.len); |
|
573
|
|
|
|
|
|
|
} |
|
574
|
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
inline bool operator==(const string_piece& a, const string_piece& b) { |
|
576
|
0
|
0
|
|
|
|
|
return a.len == b.len && memcmp(a.str, b.str, a.len) == 0; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
} |
|
578
|
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
inline bool operator!=(const string_piece& a, const string_piece& b) { |
|
580
|
|
|
|
|
|
|
return a.len != b.len || memcmp(a.str, b.str, a.len) != 0; |
|
581
|
|
|
|
|
|
|
} |
|
582
|
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
} // namespace utils |
|
584
|
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
///////// |
|
586
|
|
|
|
|
|
|
// File: utils/binary_encoder.h |
|
587
|
|
|
|
|
|
|
///////// |
|
588
|
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
|
590
|
|
|
|
|
|
|
// |
|
591
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
592
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
593
|
|
|
|
|
|
|
// |
|
594
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
595
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
596
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
597
|
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
namespace utils { |
|
599
|
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
// |
|
601
|
|
|
|
|
|
|
// Declarations |
|
602
|
|
|
|
|
|
|
// |
|
603
|
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
class binary_encoder { |
|
605
|
|
|
|
|
|
|
public: |
|
606
|
|
|
|
|
|
|
inline binary_encoder(); |
|
607
|
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
inline void add_1B(unsigned val); |
|
609
|
|
|
|
|
|
|
inline void add_2B(unsigned val); |
|
610
|
|
|
|
|
|
|
inline void add_4B(unsigned val); |
|
611
|
|
|
|
|
|
|
inline void add_float(double val); |
|
612
|
|
|
|
|
|
|
inline void add_double(double val); |
|
613
|
|
|
|
|
|
|
inline void add_str(string_piece str); |
|
614
|
|
|
|
|
|
|
inline void add_data(string_piece data); |
|
615
|
|
|
|
|
|
|
template inline void add_data(const vector& data); |
|
616
|
|
|
|
|
|
|
template inline void add_data(const T* data, size_t elements); |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
vector data; |
|
619
|
|
|
|
|
|
|
}; |
|
620
|
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
// |
|
622
|
|
|
|
|
|
|
// Definitions |
|
623
|
|
|
|
|
|
|
// |
|
624
|
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
binary_encoder::binary_encoder() { |
|
626
|
|
|
|
|
|
|
data.reserve(16); |
|
627
|
|
|
|
|
|
|
} |
|
628
|
|
|
|
|
|
|
|
|
629
|
0
|
|
|
|
|
|
void binary_encoder::add_1B(unsigned val) { |
|
630
|
0
|
0
|
|
|
|
|
if (uint8_t(val) != val) runtime_failure("Should encode value " << val << " in one byte!"); |
|
631
|
0
|
|
|
|
|
|
data.push_back(val); |
|
632
|
0
|
|
|
|
|
|
} |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
void binary_encoder::add_2B(unsigned val) { |
|
635
|
|
|
|
|
|
|
if (uint16_t(val) != val) runtime_failure("Should encode value " << val << " in two bytes!"); |
|
636
|
|
|
|
|
|
|
data.insert(data.end(), (unsigned char*) &val, ((unsigned char*) &val) + sizeof(uint16_t)); |
|
637
|
|
|
|
|
|
|
} |
|
638
|
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
void binary_encoder::add_4B(unsigned val) { |
|
640
|
|
|
|
|
|
|
if (uint32_t(val) != val) runtime_failure("Should encode value " << val << " in four bytes!"); |
|
641
|
0
|
|
|
|
|
|
data.insert(data.end(), (unsigned char*) &val, ((unsigned char*) &val) + sizeof(uint32_t)); |
|
642
|
|
|
|
|
|
|
} |
|
643
|
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
void binary_encoder::add_float(double val) { |
|
645
|
|
|
|
|
|
|
data.insert(data.end(), (unsigned char*) &val, ((unsigned char*) &val) + sizeof(float)); |
|
646
|
|
|
|
|
|
|
} |
|
647
|
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
void binary_encoder::add_double(double val) { |
|
649
|
|
|
|
|
|
|
data.insert(data.end(), (unsigned char*) &val, ((unsigned char*) &val) + sizeof(double)); |
|
650
|
|
|
|
|
|
|
} |
|
651
|
|
|
|
|
|
|
|
|
652
|
0
|
|
|
|
|
|
void binary_encoder::add_str(string_piece str) { |
|
653
|
0
|
|
|
|
|
|
add_1B(str.len < 255 ? str.len : 255); |
|
654
|
0
|
0
|
|
|
|
|
if (!(str.len < 255)) add_4B(str.len); |
|
655
|
|
|
|
|
|
|
add_data(str); |
|
656
|
0
|
|
|
|
|
|
} |
|
657
|
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
void binary_encoder::add_data(string_piece data) { |
|
659
|
0
|
|
|
|
|
|
this->data.insert(this->data.end(), (const unsigned char*) data.str, (const unsigned char*) (data.str + data.len)); |
|
660
|
|
|
|
|
|
|
} |
|
661
|
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
template |
|
663
|
|
|
|
|
|
|
void binary_encoder::add_data(const vector& data) { |
|
664
|
|
|
|
|
|
|
this->data.insert(this->data.end(), (const unsigned char*) data.data(), (const unsigned char*) (data.data() + data.size())); |
|
665
|
|
|
|
|
|
|
} |
|
666
|
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
template |
|
668
|
|
|
|
|
|
|
void binary_encoder::add_data(const T* data, size_t elements) { |
|
669
|
|
|
|
|
|
|
this->data.insert(this->data.end(), (const unsigned char*) data, (const unsigned char*) (data + elements)); |
|
670
|
|
|
|
|
|
|
} |
|
671
|
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
} // namespace utils |
|
673
|
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
///////// |
|
675
|
|
|
|
|
|
|
// File: classifier/network_classifier.h |
|
676
|
|
|
|
|
|
|
///////// |
|
677
|
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
679
|
|
|
|
|
|
|
// |
|
680
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
681
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
682
|
|
|
|
|
|
|
// |
|
683
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
684
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
685
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
686
|
|
|
|
|
|
|
|
|
687
|
6
|
0
|
|
|
|
|
class network_classifier { |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
public: |
|
689
|
|
|
|
|
|
|
bool load(istream& is); |
|
690
|
|
|
|
|
|
|
bool save(ostream& os); |
|
691
|
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
bool train(unsigned features, unsigned outcomes, const vector& train, |
|
693
|
|
|
|
|
|
|
const vector& heldout, const network_parameters& parameters, bool verbose); |
|
694
|
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
void classify(const classifier_features& features, vector& outcomes, vector& buffer) const; |
|
696
|
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
private: |
|
698
|
|
|
|
|
|
|
// Direct connections |
|
699
|
|
|
|
|
|
|
vector> weights; |
|
700
|
|
|
|
|
|
|
vector> indices; |
|
701
|
|
|
|
|
|
|
double missing_weight; |
|
702
|
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
// Hidden layer, experimental use only |
|
704
|
|
|
|
|
|
|
vector> hidden_weights[2]; |
|
705
|
|
|
|
|
|
|
vector hidden_layer, hidden_error; |
|
706
|
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
// Output layer |
|
708
|
|
|
|
|
|
|
vector output_layer, output_error; |
|
709
|
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
inline void propagate(const classifier_features& features); |
|
711
|
|
|
|
|
|
|
inline void propagate(const classifier_features& features, vector& hidden_layer, vector& output_layer) const; |
|
712
|
|
|
|
|
|
|
inline void backpropagate(const classifier_instance& instance, double learning_rate, double gaussian_sigma); |
|
713
|
|
|
|
|
|
|
inline classifier_outcome best_outcome(); |
|
714
|
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
template void load_matrix(binary_decoder& data, vector>& m); |
|
716
|
|
|
|
|
|
|
template void save_matrix(binary_encoder& enc, const vector>& m); |
|
717
|
|
|
|
|
|
|
}; |
|
718
|
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
///////// |
|
720
|
|
|
|
|
|
|
// File: utils/compressor.h |
|
721
|
|
|
|
|
|
|
///////// |
|
722
|
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
|
724
|
|
|
|
|
|
|
// |
|
725
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
726
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
727
|
|
|
|
|
|
|
// |
|
728
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
729
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
730
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
731
|
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
namespace utils { |
|
733
|
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
class binary_decoder; |
|
735
|
|
|
|
|
|
|
class binary_encoder; |
|
736
|
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
class compressor { |
|
738
|
|
|
|
|
|
|
public: |
|
739
|
|
|
|
|
|
|
static bool load(istream& is, binary_decoder& data); |
|
740
|
|
|
|
|
|
|
static bool save(ostream& os, const binary_encoder& enc); |
|
741
|
|
|
|
|
|
|
}; |
|
742
|
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
} // namespace utils |
|
744
|
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
///////// |
|
746
|
|
|
|
|
|
|
// File: utils/unaligned_access.h |
|
747
|
|
|
|
|
|
|
///////// |
|
748
|
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
|
750
|
|
|
|
|
|
|
// |
|
751
|
|
|
|
|
|
|
// Copyright 2017 Institute of Formal and Applied Linguistics, Faculty of |
|
752
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
753
|
|
|
|
|
|
|
// |
|
754
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
755
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
756
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
757
|
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
namespace utils { |
|
759
|
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
// |
|
761
|
|
|
|
|
|
|
// Declarations |
|
762
|
|
|
|
|
|
|
// |
|
763
|
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
template |
|
765
|
|
|
|
|
|
|
inline T unaligned_load(const P* ptr); |
|
766
|
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
template |
|
768
|
|
|
|
|
|
|
inline T unaligned_load_inc(const P*& ptr); |
|
769
|
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
template |
|
771
|
|
|
|
|
|
|
inline void unaligned_store(P* ptr, T value); |
|
772
|
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
template |
|
774
|
|
|
|
|
|
|
inline void unaligned_store_inc(P*& ptr, T value); |
|
775
|
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
template |
|
777
|
|
|
|
|
|
|
T* unaligned_lower_bound(T* first, size_t size, T val); |
|
778
|
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
template |
|
780
|
|
|
|
|
|
|
T* unaligned_upper_bound(T* first, size_t size, T val); |
|
781
|
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
// |
|
783
|
|
|
|
|
|
|
// Definitions |
|
784
|
|
|
|
|
|
|
// |
|
785
|
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
template |
|
787
|
|
|
|
|
|
|
inline T unaligned_load(const P* ptr) { |
|
788
|
|
|
|
|
|
|
T value; |
|
789
|
|
|
|
|
|
|
memcpy(&value, ptr, sizeof(T)); |
|
790
|
|
|
|
|
|
|
return value; |
|
791
|
|
|
|
|
|
|
} |
|
792
|
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
template |
|
794
|
|
|
|
|
|
|
inline T unaligned_load_inc(const P*& ptr) { |
|
795
|
|
|
|
|
|
|
T value; |
|
796
|
|
|
|
|
|
|
memcpy(&value, ptr, sizeof(T)); |
|
797
|
0
|
|
|
|
|
|
((const char*&)ptr) += sizeof(T); |
|
798
|
|
|
|
|
|
|
return value; |
|
799
|
|
|
|
|
|
|
} |
|
800
|
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
template |
|
802
|
|
|
|
|
|
|
inline void unaligned_store(P* ptr, T value) { |
|
803
|
|
|
|
|
|
|
memcpy(ptr, &value, sizeof(T)); |
|
804
|
|
|
|
|
|
|
} |
|
805
|
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
template |
|
807
|
|
|
|
|
|
|
inline void unaligned_store_inc(P*& ptr, T value) { |
|
808
|
|
|
|
|
|
|
memcpy(ptr, &value, sizeof(T)); |
|
809
|
35
|
|
|
|
|
|
((char*&)ptr) += sizeof(T); |
|
810
|
|
|
|
|
|
|
} |
|
811
|
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
template |
|
813
|
|
|
|
|
|
|
T* unaligned_lower_bound(T* first, size_t size, T val) { |
|
814
|
26
|
100
|
|
|
|
|
while (size) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
815
|
16
|
|
|
|
|
|
size_t step = size >> 1; |
|
816
|
16
|
100
|
|
|
|
|
if (unaligned_load(first + step) < val) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
817
|
6
|
|
|
|
|
|
first += step + 1; |
|
818
|
6
|
|
|
|
|
|
size -= step + 1; |
|
819
|
|
|
|
|
|
|
} else { |
|
820
|
|
|
|
|
|
|
size = step; |
|
821
|
|
|
|
|
|
|
} |
|
822
|
|
|
|
|
|
|
} |
|
823
|
|
|
|
|
|
|
return first; |
|
824
|
|
|
|
|
|
|
} |
|
825
|
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
template |
|
827
|
|
|
|
|
|
|
T* unaligned_upper_bound(T* first, size_t size, T val) { |
|
828
|
|
|
|
|
|
|
while (size) { |
|
829
|
|
|
|
|
|
|
size_t step = size >> 1; |
|
830
|
|
|
|
|
|
|
if (!(val < unaligned_load(first + step))) { |
|
831
|
|
|
|
|
|
|
first += step + 1; |
|
832
|
|
|
|
|
|
|
size -= step + 1; |
|
833
|
|
|
|
|
|
|
} else { |
|
834
|
|
|
|
|
|
|
size = step; |
|
835
|
|
|
|
|
|
|
} |
|
836
|
|
|
|
|
|
|
} |
|
837
|
|
|
|
|
|
|
return first; |
|
838
|
|
|
|
|
|
|
} |
|
839
|
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
} // namespace utils |
|
841
|
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
///////// |
|
843
|
|
|
|
|
|
|
// File: classifier/network_classifier.cpp |
|
844
|
|
|
|
|
|
|
///////// |
|
845
|
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
847
|
|
|
|
|
|
|
// |
|
848
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
849
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
850
|
|
|
|
|
|
|
// |
|
851
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
852
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
853
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
854
|
|
|
|
|
|
|
|
|
855
|
2
|
|
|
|
|
|
bool network_classifier::load(istream& is) { |
|
856
|
|
|
|
|
|
|
binary_decoder data; |
|
857
|
2
|
50
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
|
50
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
try { |
|
860
|
|
|
|
|
|
|
// Direct connections |
|
861
|
2
|
50
|
|
|
|
|
load_matrix(data, indices); |
|
862
|
2
|
50
|
|
|
|
|
missing_weight = unaligned_load(data.next(1)); |
|
863
|
2
|
50
|
|
|
|
|
load_matrix(data, weights); |
|
864
|
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
// Hidden layer |
|
866
|
2
|
|
|
|
|
|
hidden_weights[0].clear(); |
|
867
|
2
|
|
|
|
|
|
hidden_weights[1].clear(); |
|
868
|
2
|
50
|
|
|
|
|
hidden_layer.resize(data.next_2B()); |
|
|
|
50
|
|
|
|
|
|
|
869
|
2
|
50
|
|
|
|
|
if (!hidden_layer.empty()) { |
|
870
|
0
|
0
|
|
|
|
|
load_matrix(data, hidden_weights[0]); |
|
871
|
0
|
0
|
|
|
|
|
load_matrix(data, hidden_weights[1]); |
|
872
|
|
|
|
|
|
|
} |
|
873
|
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
// Output layer |
|
875
|
2
|
50
|
|
|
|
|
unsigned outcomes = data.next_2B(); |
|
876
|
2
|
50
|
|
|
|
|
output_layer.resize(outcomes); |
|
877
|
2
|
50
|
|
|
|
|
output_error.resize(outcomes); |
|
|
|
0
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
|
879
|
|
|
|
|
|
|
return false; |
|
880
|
|
|
|
|
|
|
} |
|
881
|
|
|
|
|
|
|
|
|
882
|
2
|
|
|
|
|
|
return data.is_end(); |
|
883
|
|
|
|
|
|
|
} |
|
884
|
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
template |
|
886
|
4
|
|
|
|
|
|
void network_classifier::load_matrix(binary_decoder& data, vector>& m) { |
|
887
|
4
|
|
|
|
|
|
m.resize(data.next_4B()); |
|
888
|
664
|
100
|
|
|
|
|
for (auto&& row : m) { |
|
|
|
100
|
|
|
|
|
|
|
889
|
660
|
|
|
|
|
|
row.resize(data.next_2B()); |
|
890
|
660
|
|
|
|
|
|
if (!row.empty()) |
|
891
|
232
|
|
|
|
|
|
memcpy((unsigned char*) row.data(), data.next(row.size()), row.size() * sizeof(T)); |
|
892
|
|
|
|
|
|
|
} |
|
893
|
4
|
|
|
|
|
|
} |
|
894
|
|
|
|
|
|
|
|
|
895
|
0
|
|
|
|
|
|
bool network_classifier::train(unsigned features, unsigned outcomes, const vector& train, |
|
896
|
|
|
|
|
|
|
const vector& heldout, const network_parameters& parameters, bool verbose) { |
|
897
|
|
|
|
|
|
|
// Assertions |
|
898
|
0
|
0
|
|
|
|
|
if (features <= 0) { if (verbose) cerr << "There must be more than zero features!" << endl; return false; } |
|
|
|
0
|
|
|
|
|
|
|
899
|
0
|
0
|
|
|
|
|
if (outcomes <= 0) { if (verbose) cerr << "There must be more than zero features!" << endl; return false; } |
|
|
|
0
|
|
|
|
|
|
|
900
|
0
|
0
|
|
|
|
|
if (train.empty()) { if (verbose) cerr << "No training data!" << endl; return false; } |
|
|
|
0
|
|
|
|
|
|
|
901
|
0
|
0
|
|
|
|
|
for (auto&& instance : train) { |
|
902
|
0
|
0
|
|
|
|
|
if (instance.outcome >= outcomes) { if (verbose) cerr << "Training instances out of range!" << endl; return false; } |
|
|
|
0
|
|
|
|
|
|
|
903
|
0
|
0
|
|
|
|
|
for(auto& feature : instance.features) |
|
904
|
0
|
0
|
|
|
|
|
if (feature >= features) { if (verbose) cerr << "Training instances out of range!" << endl; return false; } |
|
|
|
0
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
} |
|
906
|
0
|
0
|
|
|
|
|
for (auto&& instance : heldout) |
|
907
|
0
|
0
|
|
|
|
|
for(auto& feature : instance.features) |
|
908
|
0
|
0
|
|
|
|
|
if (feature >= features) { if (verbose) cerr << "Heldout instances out of range!" << endl; return false; } |
|
|
|
0
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
mt19937 generator(42); |
|
911
|
|
|
|
|
|
|
uniform_real_distribution uniform(-0.1f, 0.1f); |
|
912
|
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
// Compute indices from existing feature-outcome pairs |
|
914
|
0
|
|
|
|
|
|
indices.clear(); |
|
915
|
0
|
|
|
|
|
|
indices.resize(features); |
|
916
|
0
|
0
|
|
|
|
|
for (auto&& instance : train) |
|
917
|
0
|
0
|
|
|
|
|
for (auto&& feature : instance.features) |
|
918
|
0
|
|
|
|
|
|
indices[feature].emplace_back(instance.outcome); |
|
919
|
|
|
|
|
|
|
|
|
920
|
0
|
0
|
|
|
|
|
for (auto&& row : indices) { |
|
921
|
|
|
|
|
|
|
sort(row.begin(), row.end()); |
|
922
|
0
|
|
|
|
|
|
row.resize(unique(row.begin(), row.end()) - row.begin()); |
|
923
|
|
|
|
|
|
|
} |
|
924
|
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
// Initialize direct connections |
|
926
|
0
|
|
|
|
|
|
weights.clear(); |
|
927
|
0
|
0
|
|
|
|
|
for (auto&& row : indices) |
|
928
|
0
|
|
|
|
|
|
weights.emplace_back(row.size()); |
|
929
|
0
|
|
|
|
|
|
missing_weight = parameters.missing_weight; |
|
930
|
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
// Initialize hidden layer |
|
932
|
0
|
|
|
|
|
|
hidden_layer.resize(parameters.hidden_layer); |
|
933
|
0
|
0
|
|
|
|
|
if (!hidden_layer.empty()) { |
|
934
|
0
|
|
|
|
|
|
hidden_error.resize(hidden_layer.size()); |
|
935
|
|
|
|
|
|
|
|
|
936
|
0
|
|
|
|
|
|
hidden_weights[0].resize(features); |
|
937
|
0
|
0
|
|
|
|
|
for (auto&& row : hidden_weights[0]) |
|
938
|
0
|
0
|
|
|
|
|
for (auto&& weight : row.resize(hidden_layer.size()), row) |
|
939
|
0
|
|
|
|
|
|
weight = uniform(generator) + uniform(generator) + uniform(generator); |
|
940
|
|
|
|
|
|
|
|
|
941
|
0
|
|
|
|
|
|
hidden_weights[1].resize(hidden_layer.size()); |
|
942
|
0
|
0
|
|
|
|
|
for (auto&& row : hidden_weights[1]) |
|
943
|
0
|
0
|
|
|
|
|
for (auto&& weight : row.resize(outcomes), row) |
|
944
|
0
|
|
|
|
|
|
weight = uniform(generator) + uniform(generator) + uniform(generator); |
|
945
|
|
|
|
|
|
|
} |
|
946
|
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
// Initialize output layer |
|
948
|
0
|
|
|
|
|
|
output_layer.resize(outcomes); |
|
949
|
0
|
|
|
|
|
|
output_error.resize(outcomes); |
|
950
|
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
// Normalize gaussian_sigma |
|
952
|
0
|
|
|
|
|
|
double gaussian_sigma = parameters.gaussian_sigma / train.size(); |
|
953
|
|
|
|
|
|
|
|
|
954
|
|
|
|
|
|
|
// Train |
|
955
|
|
|
|
|
|
|
vector permutation; |
|
956
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < train.size(); i++) |
|
957
|
0
|
|
|
|
|
|
permutation.push_back(i); |
|
958
|
|
|
|
|
|
|
|
|
959
|
0
|
0
|
|
|
|
|
for (int iteration = 0; iteration < parameters.iterations; iteration++) { |
|
960
|
0
|
0
|
|
|
|
|
if (verbose) cerr << "Iteration " << iteration + 1 << ": "; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
|
|
962
|
0
|
0
|
|
|
|
|
double learning_rate = parameters.final_learning_rate && parameters.iterations > 1 ? |
|
963
|
0
|
|
|
|
|
|
exp(((parameters.iterations - 1 - iteration) * log(parameters.initial_learning_rate) + iteration * log(parameters.final_learning_rate)) / (parameters.iterations-1)) : |
|
964
|
0
|
0
|
|
|
|
|
parameters.initial_learning_rate; |
|
965
|
|
|
|
|
|
|
double logprob = 0; |
|
966
|
|
|
|
|
|
|
int training_correct = 0; |
|
967
|
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
// Process instances in random order |
|
969
|
0
|
|
|
|
|
|
shuffle(permutation.begin(), permutation.end(), generator); |
|
970
|
0
|
0
|
|
|
|
|
for (auto&& train_index : permutation) { |
|
971
|
0
|
|
|
|
|
|
auto& instance = train[train_index]; |
|
972
|
0
|
|
|
|
|
|
propagate(instance.features); |
|
973
|
|
|
|
|
|
|
|
|
974
|
|
|
|
|
|
|
// Update logprob and training_correct |
|
975
|
0
|
|
|
|
|
|
logprob += log(output_layer[instance.outcome]); |
|
976
|
0
|
|
|
|
|
|
training_correct += best_outcome() == instance.outcome; |
|
977
|
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
// Improve network weights according to correct outcome |
|
979
|
0
|
|
|
|
|
|
backpropagate(instance, learning_rate, gaussian_sigma); |
|
980
|
|
|
|
|
|
|
} |
|
981
|
0
|
0
|
|
|
|
|
if (verbose) |
|
982
|
|
|
|
|
|
|
cerr << "a " << fixed << setprecision(3) << learning_rate |
|
983
|
|
|
|
|
|
|
<< ", logprob " << scientific << setprecision(4) << logprob |
|
984
|
0
|
|
|
|
|
|
<< ", training acc " << fixed << setprecision(2) << training_correct * 100. / train.size() |
|
985
|
0
|
0
|
|
|
|
|
<< "%, "; |
|
986
|
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
// Evaluate heldout accuracy if heldout data are present |
|
988
|
0
|
0
|
|
|
|
|
if (!heldout.empty()) { |
|
989
|
|
|
|
|
|
|
int heldout_correct = 0; |
|
990
|
0
|
0
|
|
|
|
|
for (auto&& instance : heldout) { |
|
991
|
0
|
|
|
|
|
|
propagate(instance.features); |
|
992
|
0
|
|
|
|
|
|
heldout_correct += best_outcome() == instance.outcome; |
|
993
|
|
|
|
|
|
|
} |
|
994
|
0
|
0
|
|
|
|
|
if (verbose) cerr << "heldout acc " << heldout_correct * 100. / heldout.size() << ", "; |
|
|
|
0
|
|
|
|
|
|
|
995
|
|
|
|
|
|
|
} |
|
996
|
0
|
0
|
|
|
|
|
if (verbose) cerr << "done." << endl; |
|
997
|
|
|
|
|
|
|
} |
|
998
|
|
|
|
|
|
|
return true; |
|
999
|
|
|
|
|
|
|
} |
|
1000
|
|
|
|
|
|
|
|
|
1001
|
14
|
|
|
|
|
|
void network_classifier::classify(const classifier_features& features, vector& outcomes, vector& buffer) const { |
|
1002
|
14
|
100
|
|
|
|
|
if (outcomes.size() != output_layer.size()) outcomes.resize(output_layer.size()); |
|
1003
|
14
|
50
|
|
|
|
|
if (buffer.size() != hidden_layer.size()) buffer.resize(hidden_layer.size()); |
|
1004
|
|
|
|
|
|
|
|
|
1005
|
|
|
|
|
|
|
// Propagation |
|
1006
|
14
|
|
|
|
|
|
propagate(features, buffer, outcomes); |
|
1007
|
14
|
|
|
|
|
|
} |
|
1008
|
|
|
|
|
|
|
|
|
1009
|
|
|
|
|
|
|
void network_classifier::propagate(const classifier_features& features) { |
|
1010
|
0
|
0
|
|
|
|
|
propagate(features, hidden_layer, output_layer); |
|
|
|
0
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
} |
|
1012
|
|
|
|
|
|
|
|
|
1013
|
14
|
|
|
|
|
|
void network_classifier::propagate(const classifier_features& features, vector& hidden_layer, vector& output_layer) const { |
|
1014
|
14
|
|
|
|
|
|
output_layer.assign(output_layer.size(), features.size() * missing_weight); |
|
1015
|
|
|
|
|
|
|
|
|
1016
|
|
|
|
|
|
|
// Direct connections |
|
1017
|
305
|
100
|
|
|
|
|
for (auto&& feature : features) |
|
1018
|
291
|
50
|
|
|
|
|
if (feature < indices.size()) |
|
1019
|
705
|
100
|
|
|
|
|
for (unsigned i = 0; i < indices[feature].size(); i++) |
|
1020
|
1242
|
|
|
|
|
|
output_layer[indices[feature][i]] += weights[feature][i] - missing_weight; |
|
1021
|
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
// Hidden layer |
|
1023
|
14
|
50
|
|
|
|
|
if (!hidden_layer.empty()) { |
|
1024
|
0
|
0
|
|
|
|
|
for (auto&& weight : hidden_layer) |
|
1025
|
0
|
|
|
|
|
|
weight = 0; |
|
1026
|
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
// Propagate to hidden layer |
|
1028
|
0
|
0
|
|
|
|
|
for (auto&& feature : features) |
|
1029
|
0
|
0
|
|
|
|
|
if (feature < hidden_weights[0].size()) |
|
1030
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < hidden_layer.size(); i++) { |
|
1031
|
0
|
|
|
|
|
|
hidden_layer[i] += hidden_weights[0][feature][i]; |
|
1032
|
|
|
|
|
|
|
} |
|
1033
|
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
// Apply logistic sigmoid to hidden layer |
|
1035
|
0
|
0
|
|
|
|
|
for (auto&& weight : hidden_layer) |
|
1036
|
0
|
|
|
|
|
|
weight = 1 / (1 + exp(-weight)); |
|
1037
|
|
|
|
|
|
|
|
|
1038
|
|
|
|
|
|
|
// Propagate to output_layer |
|
1039
|
14
|
0
|
|
|
|
|
for (unsigned h = 0; h < hidden_layer.size(); h++) |
|
1040
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < output_layer.size(); i++) |
|
1041
|
0
|
|
|
|
|
|
output_layer[i] += hidden_layer[h] * hidden_weights[1][h][i]; |
|
1042
|
|
|
|
|
|
|
} |
|
1043
|
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
// Apply softmax sigmoid to output_layer layer |
|
1045
|
|
|
|
|
|
|
double sum = 0; |
|
1046
|
140
|
100
|
|
|
|
|
for (unsigned i = 0; i < output_layer.size(); sum += output_layer[i], i++) |
|
1047
|
126
|
|
|
|
|
|
output_layer[i] = exp(output_layer[i]); |
|
1048
|
14
|
|
|
|
|
|
sum = 1 / sum; |
|
1049
|
140
|
100
|
|
|
|
|
for (unsigned i = 0; i < output_layer.size(); i++) |
|
1050
|
126
|
|
|
|
|
|
output_layer[i] *= sum; |
|
1051
|
14
|
|
|
|
|
|
} |
|
1052
|
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
classifier_outcome network_classifier::best_outcome() { |
|
1054
|
|
|
|
|
|
|
classifier_outcome best = 0; |
|
1055
|
0
|
0
|
|
|
|
|
for (unsigned i = 1; i < output_layer.size(); i++) |
|
|
|
0
|
|
|
|
|
|
|
1056
|
0
|
0
|
|
|
|
|
if (output_layer[i] > output_layer[best]) |
|
|
|
0
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
best = i; |
|
1058
|
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
return best; |
|
1060
|
|
|
|
|
|
|
} |
|
1061
|
|
|
|
|
|
|
|
|
1062
|
0
|
|
|
|
|
|
void network_classifier::backpropagate(const classifier_instance& instance, double learning_rate, double gaussian_sigma) { |
|
1063
|
|
|
|
|
|
|
// Compute error vector |
|
1064
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < output_error.size(); i++) |
|
1065
|
0
|
0
|
|
|
|
|
output_error[i] = (i == instance.outcome) - output_layer[i]; |
|
1066
|
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
// Update direct connections |
|
1068
|
0
|
0
|
|
|
|
|
for (auto&& feature : instance.features) |
|
1069
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < indices[feature].size(); i++) |
|
1070
|
0
|
|
|
|
|
|
weights[feature][i] += learning_rate * output_error[indices[feature][i]] - weights[feature][i] * gaussian_sigma; |
|
1071
|
|
|
|
|
|
|
|
|
1072
|
|
|
|
|
|
|
// Update hidden layer |
|
1073
|
0
|
0
|
|
|
|
|
if (!hidden_layer.empty()) { |
|
1074
|
|
|
|
|
|
|
// Backpropagate output_error into hidden_error |
|
1075
|
0
|
0
|
|
|
|
|
for (unsigned h = 0; h < hidden_layer.size(); h++) { |
|
1076
|
0
|
|
|
|
|
|
hidden_error[h] = 0; |
|
1077
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < output_layer.size(); i++) |
|
1078
|
0
|
|
|
|
|
|
hidden_error[h] += hidden_weights[1][h][i] * output_error[i]; |
|
1079
|
0
|
|
|
|
|
|
hidden_error[h] *= hidden_layer[h] * (1-hidden_layer[h]); |
|
1080
|
|
|
|
|
|
|
} |
|
1081
|
|
|
|
|
|
|
|
|
1082
|
|
|
|
|
|
|
// Update hidden_weights[1] |
|
1083
|
0
|
0
|
|
|
|
|
for (unsigned h = 0; h < hidden_layer.size(); h++) |
|
1084
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < output_layer.size(); i++) |
|
1085
|
0
|
|
|
|
|
|
hidden_weights[1][h][i] += learning_rate * hidden_layer[h] * output_error[i] - hidden_weights[1][h][i] * gaussian_sigma; |
|
1086
|
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
// Update hidden_weights[0] |
|
1088
|
0
|
0
|
|
|
|
|
for (auto&& feature : instance.features) |
|
1089
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < hidden_layer.size(); i++) |
|
1090
|
0
|
|
|
|
|
|
hidden_weights[0][feature][i] += learning_rate * hidden_error[i] - hidden_weights[0][feature][i] * gaussian_sigma; |
|
1091
|
|
|
|
|
|
|
} |
|
1092
|
0
|
|
|
|
|
|
} |
|
1093
|
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
///////// |
|
1095
|
|
|
|
|
|
|
// File: ner/entity_map.h |
|
1096
|
|
|
|
|
|
|
///////// |
|
1097
|
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
1099
|
|
|
|
|
|
|
// |
|
1100
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
1101
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
1102
|
|
|
|
|
|
|
// |
|
1103
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
1104
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
1105
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
1106
|
|
|
|
|
|
|
|
|
1107
|
0
|
|
|
|
|
|
class entity_map { |
|
1108
|
|
|
|
|
|
|
public: |
|
1109
|
|
|
|
|
|
|
entity_type parse(const char* str, bool add_entity = false) const; |
|
1110
|
|
|
|
|
|
|
const string& name(entity_type entity) const; |
|
1111
|
|
|
|
|
|
|
|
|
1112
|
|
|
|
|
|
|
bool load(istream& is); |
|
1113
|
|
|
|
|
|
|
bool save(ostream& os) const; |
|
1114
|
|
|
|
|
|
|
|
|
1115
|
|
|
|
|
|
|
entity_type size() const; |
|
1116
|
|
|
|
|
|
|
private: |
|
1117
|
|
|
|
|
|
|
mutable unordered_map str2id; |
|
1118
|
|
|
|
|
|
|
mutable vector id2str; |
|
1119
|
|
|
|
|
|
|
string empty; |
|
1120
|
|
|
|
|
|
|
}; |
|
1121
|
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
///////// |
|
1123
|
|
|
|
|
|
|
// File: tokenizer/tokenizer.h |
|
1124
|
|
|
|
|
|
|
///////// |
|
1125
|
|
|
|
|
|
|
|
|
1126
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
1127
|
|
|
|
|
|
|
// |
|
1128
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
1129
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
1130
|
|
|
|
|
|
|
// |
|
1131
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
1132
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
1133
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
1134
|
|
|
|
|
|
|
|
|
1135
|
|
|
|
|
|
|
// Range of a token, measured in Unicode characters, not UTF8 bytes. |
|
1136
|
|
|
|
|
|
|
// It must be in sync with morphodita::token_range. |
|
1137
|
|
|
|
|
|
|
struct token_range { |
|
1138
|
|
|
|
|
|
|
size_t start; |
|
1139
|
|
|
|
|
|
|
size_t length; |
|
1140
|
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
token_range() {} |
|
1142
|
|
|
|
|
|
|
token_range(size_t start, size_t length) : start(start), length(length) {} |
|
1143
|
|
|
|
|
|
|
}; |
|
1144
|
|
|
|
|
|
|
|
|
1145
|
4
|
|
|
|
|
|
class tokenizer { |
|
1146
|
|
|
|
|
|
|
public: |
|
1147
|
4
|
|
|
|
|
|
virtual ~tokenizer() {} |
|
1148
|
|
|
|
|
|
|
|
|
1149
|
|
|
|
|
|
|
virtual void set_text(string_piece text, bool make_copy = false) = 0; |
|
1150
|
|
|
|
|
|
|
virtual bool next_sentence(vector* forms, vector* tokens) = 0; |
|
1151
|
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
// Static factory method |
|
1153
|
|
|
|
|
|
|
static tokenizer* new_vertical_tokenizer(); |
|
1154
|
|
|
|
|
|
|
}; |
|
1155
|
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
///////// |
|
1157
|
|
|
|
|
|
|
// File: ner/ner.h |
|
1158
|
|
|
|
|
|
|
///////// |
|
1159
|
|
|
|
|
|
|
|
|
1160
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
1161
|
|
|
|
|
|
|
// |
|
1162
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
1163
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
1164
|
|
|
|
|
|
|
// |
|
1165
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
1166
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
1167
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
1168
|
|
|
|
|
|
|
|
|
1169
|
0
|
|
|
|
|
|
struct named_entity { |
|
1170
|
|
|
|
|
|
|
size_t start; |
|
1171
|
|
|
|
|
|
|
size_t length; |
|
1172
|
|
|
|
|
|
|
string type; |
|
1173
|
|
|
|
|
|
|
|
|
1174
|
|
|
|
|
|
|
named_entity() {} |
|
1175
|
3
|
|
|
|
|
|
named_entity(size_t start, size_t length, const string& type) : start(start), length(length), type(type) {} |
|
1176
|
|
|
|
|
|
|
}; |
|
1177
|
|
|
|
|
|
|
|
|
1178
|
1
|
|
|
|
|
|
class ner { |
|
1179
|
|
|
|
|
|
|
public: |
|
1180
|
0
|
|
|
|
|
|
virtual ~ner() {} |
|
1181
|
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
static ner* load(const char* fname); |
|
1183
|
|
|
|
|
|
|
static ner* load(istream& is); |
|
1184
|
|
|
|
|
|
|
|
|
1185
|
|
|
|
|
|
|
// Perform named entity recognition on a tokenizes sentence and return found |
|
1186
|
|
|
|
|
|
|
// named entities in the given vector. |
|
1187
|
|
|
|
|
|
|
virtual void recognize(const vector& forms, vector& entities) const = 0; |
|
1188
|
|
|
|
|
|
|
|
|
1189
|
|
|
|
|
|
|
// Return the possible entity types |
|
1190
|
|
|
|
|
|
|
virtual void entity_types(vector& types) const = 0; |
|
1191
|
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
// Return gazetteers used by the recognizer, if any, optionally with the index of entity type |
|
1193
|
|
|
|
|
|
|
virtual void gazetteers(vector& gazetteers, vector* gazetteer_types) const = 0; |
|
1194
|
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
// Construct a new tokenizer instance appropriate for this recognizer. |
|
1196
|
|
|
|
|
|
|
// Can return NULL if no such tokenizer exists. |
|
1197
|
|
|
|
|
|
|
virtual tokenizer* new_tokenizer() const = 0; |
|
1198
|
|
|
|
|
|
|
}; |
|
1199
|
|
|
|
|
|
|
|
|
1200
|
|
|
|
|
|
|
///////// |
|
1201
|
|
|
|
|
|
|
// File: tagger/tagger_ids.h |
|
1202
|
|
|
|
|
|
|
///////// |
|
1203
|
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
1205
|
|
|
|
|
|
|
// |
|
1206
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
1207
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
1208
|
|
|
|
|
|
|
// |
|
1209
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
1210
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
1211
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
1212
|
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
class tagger_ids { |
|
1214
|
|
|
|
|
|
|
public: |
|
1215
|
|
|
|
|
|
|
enum tagger_id { TRIVIAL, EXTERNAL, MORPHODITA }; |
|
1216
|
|
|
|
|
|
|
|
|
1217
|
0
|
|
|
|
|
|
static bool parse(const string& str, tagger_id& id) { |
|
1218
|
0
|
0
|
|
|
|
|
if (str == "trivial") return id = TRIVIAL, true; |
|
1219
|
0
|
0
|
|
|
|
|
if (str == "external") return id = EXTERNAL, true; |
|
1220
|
0
|
0
|
|
|
|
|
if (str == "morphodita") return id = MORPHODITA, true; |
|
1221
|
|
|
|
|
|
|
return false; |
|
1222
|
|
|
|
|
|
|
} |
|
1223
|
|
|
|
|
|
|
}; |
|
1224
|
|
|
|
|
|
|
|
|
1225
|
|
|
|
|
|
|
typedef tagger_ids::tagger_id tagger_id; |
|
1226
|
|
|
|
|
|
|
|
|
1227
|
|
|
|
|
|
|
///////// |
|
1228
|
|
|
|
|
|
|
// File: tagger/tagger.h |
|
1229
|
|
|
|
|
|
|
///////// |
|
1230
|
|
|
|
|
|
|
|
|
1231
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
1232
|
|
|
|
|
|
|
// |
|
1233
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
1234
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
1235
|
|
|
|
|
|
|
// |
|
1236
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
1237
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
1238
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
1239
|
|
|
|
|
|
|
|
|
1240
|
1
|
|
|
|
|
|
class tagger { |
|
1241
|
|
|
|
|
|
|
public: |
|
1242
|
0
|
|
|
|
|
|
virtual ~tagger() {} |
|
1243
|
|
|
|
|
|
|
|
|
1244
|
|
|
|
|
|
|
virtual void tag(const vector& forms, ner_sentence& sentence) const = 0; |
|
1245
|
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
// Factory methods |
|
1247
|
|
|
|
|
|
|
static tagger* load_instance(istream& is); |
|
1248
|
|
|
|
|
|
|
static tagger* create_and_encode_instance(const string& tagger_id_and_params, ostream& os); |
|
1249
|
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
protected: |
|
1251
|
|
|
|
|
|
|
virtual bool load(istream& is) = 0; |
|
1252
|
|
|
|
|
|
|
virtual bool create_and_encode(const string& params, ostream& os) = 0; |
|
1253
|
|
|
|
|
|
|
|
|
1254
|
|
|
|
|
|
|
private: |
|
1255
|
|
|
|
|
|
|
static tagger* create(tagger_id id); |
|
1256
|
|
|
|
|
|
|
}; |
|
1257
|
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
///////// |
|
1259
|
|
|
|
|
|
|
// File: features/nlp_pipeline.h |
|
1260
|
|
|
|
|
|
|
///////// |
|
1261
|
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
1263
|
|
|
|
|
|
|
// |
|
1264
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
1265
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
1266
|
|
|
|
|
|
|
// |
|
1267
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
1268
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
1269
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
1270
|
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
struct nlp_pipeline { |
|
1272
|
|
|
|
|
|
|
ufal::nametag::tokenizer* tokenizer; |
|
1273
|
|
|
|
|
|
|
const ufal::nametag::tagger* tagger; |
|
1274
|
|
|
|
|
|
|
|
|
1275
|
1
|
|
|
|
|
|
nlp_pipeline(ufal::nametag::tokenizer* tokenizer, const ufal::nametag::tagger* tagger) : tokenizer(tokenizer), tagger(tagger) {} |
|
1276
|
|
|
|
|
|
|
}; |
|
1277
|
|
|
|
|
|
|
|
|
1278
|
|
|
|
|
|
|
///////// |
|
1279
|
|
|
|
|
|
|
// File: features/feature_processor.h |
|
1280
|
|
|
|
|
|
|
///////// |
|
1281
|
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
1283
|
|
|
|
|
|
|
// |
|
1284
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
1285
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
1286
|
|
|
|
|
|
|
// |
|
1287
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
1288
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
1289
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
1290
|
|
|
|
|
|
|
|
|
1291
|
8
|
|
|
|
|
|
class feature_processor { |
|
1292
|
|
|
|
|
|
|
public: |
|
1293
|
|
|
|
|
|
|
virtual ~feature_processor(); |
|
1294
|
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
virtual bool parse(int window, const vector& args, entity_map& entities, |
|
1296
|
|
|
|
|
|
|
ner_feature* total_features, const nlp_pipeline& pipeline); |
|
1297
|
|
|
|
|
|
|
virtual void load(binary_decoder& data, const nlp_pipeline& pipeline); |
|
1298
|
|
|
|
|
|
|
virtual void save(binary_encoder& enc); |
|
1299
|
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& buffer) const; |
|
1301
|
|
|
|
|
|
|
virtual void process_entities(ner_sentence& sentence, vector& entities, vector& buffer) const; |
|
1302
|
|
|
|
|
|
|
|
|
1303
|
|
|
|
|
|
|
virtual void gazetteers(vector& gazetteers, vector* gazetteer_types) const; |
|
1304
|
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
protected: |
|
1306
|
|
|
|
|
|
|
int window; |
|
1307
|
|
|
|
|
|
|
|
|
1308
|
99
|
|
|
|
|
|
inline ner_feature lookup(const string& key, ner_feature* total_features) const { |
|
1309
|
|
|
|
|
|
|
auto it = map.find(key); |
|
1310
|
99
|
100
|
|
|
|
|
if (it == map.end() && total_features) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
1311
|
0
|
|
|
|
|
|
it = map.emplace(key, window + *total_features).first; |
|
1312
|
0
|
|
|
|
|
|
*total_features += 2*window + 1; |
|
1313
|
|
|
|
|
|
|
} |
|
1314
|
99
|
100
|
|
|
|
|
return it != map.end() ? it->second : ner_feature_unknown; |
|
1315
|
|
|
|
|
|
|
} |
|
1316
|
|
|
|
|
|
|
|
|
1317
|
|
|
|
|
|
|
mutable unordered_map map; |
|
1318
|
|
|
|
|
|
|
|
|
1319
|
|
|
|
|
|
|
// Factory method |
|
1320
|
|
|
|
|
|
|
public: |
|
1321
|
|
|
|
|
|
|
static feature_processor* create(const string& name); |
|
1322
|
|
|
|
|
|
|
}; |
|
1323
|
|
|
|
|
|
|
|
|
1324
|
|
|
|
|
|
|
///////// |
|
1325
|
|
|
|
|
|
|
// File: features/feature_processor.cpp |
|
1326
|
|
|
|
|
|
|
///////// |
|
1327
|
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
1329
|
|
|
|
|
|
|
// |
|
1330
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
1331
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
1332
|
|
|
|
|
|
|
// |
|
1333
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
1334
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
1335
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
1336
|
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
// Feature processor -- methods and virtual methods |
|
1338
|
0
|
|
|
|
|
|
feature_processor::~feature_processor() {} |
|
1339
|
|
|
|
|
|
|
|
|
1340
|
0
|
|
|
|
|
|
bool feature_processor::parse(int window, const vector& /*args*/, entity_map& /*entities*/, |
|
1341
|
|
|
|
|
|
|
ner_feature* total_features, const nlp_pipeline& /*pipeline*/) { |
|
1342
|
0
|
0
|
|
|
|
|
if (window < 0) return false; |
|
1343
|
0
|
0
|
|
|
|
|
if (!total_features) return false; |
|
1344
|
|
|
|
|
|
|
|
|
1345
|
0
|
|
|
|
|
|
this->window = window; |
|
1346
|
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
map.clear(); |
|
1348
|
0
|
0
|
|
|
|
|
lookup(string(), total_features); // Always add an empty string to the map |
|
1349
|
|
|
|
|
|
|
|
|
1350
|
0
|
|
|
|
|
|
return true; |
|
1351
|
|
|
|
|
|
|
} |
|
1352
|
|
|
|
|
|
|
|
|
1353
|
8
|
|
|
|
|
|
void feature_processor::load(binary_decoder& data, const nlp_pipeline& /*pipeline*/) { |
|
1354
|
8
|
|
|
|
|
|
window = data.next_4B(); |
|
1355
|
|
|
|
|
|
|
|
|
1356
|
|
|
|
|
|
|
map.clear(); |
|
1357
|
8
|
|
|
|
|
|
map.rehash(data.next_4B()); |
|
1358
|
43
|
100
|
|
|
|
|
for (unsigned i = data.next_4B(); i > 0; i--) { |
|
1359
|
|
|
|
|
|
|
string key; |
|
1360
|
35
|
50
|
|
|
|
|
data.next_str(key); |
|
1361
|
35
|
50
|
|
|
|
|
map.emplace(key, data.next_4B()); |
|
1362
|
|
|
|
|
|
|
} |
|
1363
|
8
|
|
|
|
|
|
} |
|
1364
|
|
|
|
|
|
|
|
|
1365
|
0
|
|
|
|
|
|
void feature_processor::save(binary_encoder& enc) { |
|
1366
|
0
|
|
|
|
|
|
enc.add_4B(window); |
|
1367
|
|
|
|
|
|
|
|
|
1368
|
0
|
|
|
|
|
|
enc.add_4B(map.bucket_count()); |
|
1369
|
0
|
|
|
|
|
|
enc.add_4B(map.size()); |
|
1370
|
|
|
|
|
|
|
|
|
1371
|
0
|
|
|
|
|
|
vector> map_elements(map.begin(), map.end()); |
|
1372
|
|
|
|
|
|
|
sort(map_elements.begin(), map_elements.end()); |
|
1373
|
0
|
0
|
|
|
|
|
for (auto&& element : map_elements) { |
|
1374
|
0
|
0
|
|
|
|
|
enc.add_str(element.first); |
|
1375
|
0
|
|
|
|
|
|
enc.add_4B(element.second); |
|
1376
|
|
|
|
|
|
|
} |
|
1377
|
0
|
|
|
|
|
|
} |
|
1378
|
|
|
|
|
|
|
|
|
1379
|
0
|
|
|
|
|
|
void feature_processor::process_sentence(ner_sentence& /*sentence*/, ner_feature* /*total_features*/, string& /*buffer*/) const {} |
|
1380
|
|
|
|
|
|
|
|
|
1381
|
16
|
|
|
|
|
|
void feature_processor::process_entities(ner_sentence& /*sentence*/, vector& /*entities*/, vector& /*buffer*/) const {} |
|
1382
|
|
|
|
|
|
|
|
|
1383
|
0
|
|
|
|
|
|
void feature_processor::gazetteers(vector& /*gazetteers*/, vector* /*gazetteer_types*/) const {} |
|
1384
|
|
|
|
|
|
|
|
|
1385
|
|
|
|
|
|
|
///////// |
|
1386
|
|
|
|
|
|
|
// File: unilib/unicode.h |
|
1387
|
|
|
|
|
|
|
///////// |
|
1388
|
|
|
|
|
|
|
|
|
1389
|
|
|
|
|
|
|
// This file is part of UniLib . |
|
1390
|
|
|
|
|
|
|
// |
|
1391
|
|
|
|
|
|
|
// Copyright 2014 Institute of Formal and Applied Linguistics, Faculty of |
|
1392
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
1393
|
|
|
|
|
|
|
// |
|
1394
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
1395
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
1396
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
1397
|
|
|
|
|
|
|
// |
|
1398
|
|
|
|
|
|
|
// UniLib version: 3.3.0 |
|
1399
|
|
|
|
|
|
|
// Unicode version: 15.0.0 |
|
1400
|
|
|
|
|
|
|
|
|
1401
|
|
|
|
|
|
|
namespace unilib { |
|
1402
|
|
|
|
|
|
|
|
|
1403
|
|
|
|
|
|
|
class unicode { |
|
1404
|
|
|
|
|
|
|
enum : uint8_t { |
|
1405
|
|
|
|
|
|
|
_Lu = 1, _Ll = 2, _Lt = 3, _Lm = 4, _Lo = 5, |
|
1406
|
|
|
|
|
|
|
_Mn = 6, _Mc = 7, _Me = 8, |
|
1407
|
|
|
|
|
|
|
_Nd = 9, _Nl = 10, _No = 11, |
|
1408
|
|
|
|
|
|
|
_Pc = 12, _Pd = 13, _Ps = 14, _Pe = 15, _Pi = 16, _Pf = 17, _Po = 18, |
|
1409
|
|
|
|
|
|
|
_Sm = 19, _Sc = 20, _Sk = 21, _So = 22, |
|
1410
|
|
|
|
|
|
|
_Zs = 23, _Zl = 24, _Zp = 25, |
|
1411
|
|
|
|
|
|
|
_Cc = 26, _Cf = 27, _Cs = 28, _Co = 29, _Cn = 30 |
|
1412
|
|
|
|
|
|
|
}; |
|
1413
|
|
|
|
|
|
|
|
|
1414
|
|
|
|
|
|
|
public: |
|
1415
|
|
|
|
|
|
|
typedef uint32_t category_t; |
|
1416
|
|
|
|
|
|
|
enum : category_t { |
|
1417
|
|
|
|
|
|
|
Lu = 1 << _Lu, Ll = 1 << _Ll, Lt = 1 << _Lt, Lut = Lu | Lt, LC = Lu | Ll | Lt, |
|
1418
|
|
|
|
|
|
|
Lm = 1 << _Lm, Lo = 1 << _Lo, L = Lu | Ll | Lt | Lm | Lo, |
|
1419
|
|
|
|
|
|
|
Mn = 1 << _Mn, Mc = 1 << _Mc, Me = 1 << _Me, M = Mn | Mc | Me, |
|
1420
|
|
|
|
|
|
|
Nd = 1 << _Nd, Nl = 1 << _Nl, No = 1 << _No, N = Nd | Nl | No, |
|
1421
|
|
|
|
|
|
|
Pc = 1 << _Pc, Pd = 1 << _Pd, Ps = 1 << _Ps, Pe = 1 << _Pe, Pi = 1 << _Pi, |
|
1422
|
|
|
|
|
|
|
Pf = 1 << _Pf, Po = 1 << _Po, P = Pc | Pd | Ps | Pe | Pi | Pf | Po, |
|
1423
|
|
|
|
|
|
|
Sm = 1 << _Sm, Sc = 1 << _Sc, Sk = 1 << _Sk, So = 1 << _So, S = Sm | Sc | Sk | So, |
|
1424
|
|
|
|
|
|
|
Zs = 1 << _Zs, Zl = 1 << _Zl, Zp = 1 << _Zp, Z = Zs | Zl | Zp, |
|
1425
|
|
|
|
|
|
|
Cc = 1 << _Cc, Cf = 1 << _Cf, Cs = 1 << _Cs, Co = 1 << _Co, Cn = 1 << _Cn, C = Cc | Cf | Cs | Co | Cn |
|
1426
|
|
|
|
|
|
|
}; |
|
1427
|
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
static inline category_t category(char32_t chr); |
|
1429
|
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
static inline char32_t lowercase(char32_t chr); |
|
1431
|
|
|
|
|
|
|
static inline char32_t uppercase(char32_t chr); |
|
1432
|
|
|
|
|
|
|
static inline char32_t titlecase(char32_t chr); |
|
1433
|
|
|
|
|
|
|
|
|
1434
|
|
|
|
|
|
|
private: |
|
1435
|
|
|
|
|
|
|
static const char32_t CHARS = 0x110000; |
|
1436
|
|
|
|
|
|
|
static const int32_t DEFAULT_CAT = Cn; |
|
1437
|
|
|
|
|
|
|
|
|
1438
|
|
|
|
|
|
|
static const uint8_t category_index[CHARS >> 8]; |
|
1439
|
|
|
|
|
|
|
static const uint8_t category_block[][256]; |
|
1440
|
|
|
|
|
|
|
static const uint8_t othercase_index[CHARS >> 8]; |
|
1441
|
|
|
|
|
|
|
static const char32_t othercase_block[][256]; |
|
1442
|
|
|
|
|
|
|
|
|
1443
|
|
|
|
|
|
|
enum othercase_type { LOWER_ONLY = 1, UPPERTITLE_ONLY = 2, UPPER_ONLY = 3, LOWER_THEN_UPPER = 4, UPPER_THEN_TITLE = 5, TITLE_THEN_LOWER = 6 }; |
|
1444
|
|
|
|
|
|
|
}; |
|
1445
|
|
|
|
|
|
|
|
|
1446
|
|
|
|
|
|
|
unicode::category_t unicode::category(char32_t chr) { |
|
1447
|
283
|
0
|
|
|
|
|
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1448
|
|
|
|
|
|
|
} |
|
1449
|
|
|
|
|
|
|
|
|
1450
|
4
|
|
|
|
|
|
char32_t unicode::lowercase(char32_t chr) { |
|
1451
|
4
|
50
|
|
|
|
|
if (chr < CHARS) { |
|
1452
|
4
|
|
|
|
|
|
char32_t othercase = othercase_block[othercase_index[chr >> 8]][chr & 0xFF]; |
|
1453
|
4
|
50
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::LOWER_ONLY) return othercase >> 8; |
|
1454
|
0
|
0
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::LOWER_THEN_UPPER) return othercase >> 8; |
|
1455
|
0
|
0
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::TITLE_THEN_LOWER) return othercase_block[othercase_index[(othercase >> 8) >> 8]][(othercase >> 8) & 0xFF] >> 8; |
|
1456
|
|
|
|
|
|
|
} |
|
1457
|
|
|
|
|
|
|
return chr; |
|
1458
|
|
|
|
|
|
|
} |
|
1459
|
|
|
|
|
|
|
|
|
1460
|
0
|
|
|
|
|
|
char32_t unicode::uppercase(char32_t chr) { |
|
1461
|
0
|
0
|
|
|
|
|
if (chr < CHARS) { |
|
1462
|
0
|
|
|
|
|
|
char32_t othercase = othercase_block[othercase_index[chr >> 8]][chr & 0xFF]; |
|
1463
|
0
|
0
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::UPPERTITLE_ONLY) return othercase >> 8; |
|
1464
|
0
|
0
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::UPPER_ONLY) return othercase >> 8; |
|
1465
|
0
|
0
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::UPPER_THEN_TITLE) return othercase >> 8; |
|
1466
|
0
|
0
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::LOWER_THEN_UPPER) return othercase_block[othercase_index[(othercase >> 8) >> 8]][(othercase >> 8) & 0xFF] >> 8; |
|
1467
|
|
|
|
|
|
|
} |
|
1468
|
|
|
|
|
|
|
return chr; |
|
1469
|
|
|
|
|
|
|
} |
|
1470
|
|
|
|
|
|
|
|
|
1471
|
|
|
|
|
|
|
char32_t unicode::titlecase(char32_t chr) { |
|
1472
|
|
|
|
|
|
|
if (chr < CHARS) { |
|
1473
|
|
|
|
|
|
|
char32_t othercase = othercase_block[othercase_index[chr >> 8]][chr & 0xFF]; |
|
1474
|
|
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::UPPERTITLE_ONLY) return othercase >> 8; |
|
1475
|
|
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::TITLE_THEN_LOWER) return othercase >> 8; |
|
1476
|
|
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::UPPER_THEN_TITLE) return othercase_block[othercase_index[(othercase >> 8) >> 8]][(othercase >> 8) & 0xFF] >> 8; |
|
1477
|
|
|
|
|
|
|
} |
|
1478
|
|
|
|
|
|
|
return chr; |
|
1479
|
|
|
|
|
|
|
} |
|
1480
|
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
} // namespace unilib |
|
1482
|
|
|
|
|
|
|
|
|
1483
|
|
|
|
|
|
|
///////// |
|
1484
|
|
|
|
|
|
|
// File: unilib/utf8.h |
|
1485
|
|
|
|
|
|
|
///////// |
|
1486
|
|
|
|
|
|
|
|
|
1487
|
|
|
|
|
|
|
// This file is part of UniLib . |
|
1488
|
|
|
|
|
|
|
// |
|
1489
|
|
|
|
|
|
|
// Copyright 2014 Institute of Formal and Applied Linguistics, Faculty of |
|
1490
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
1491
|
|
|
|
|
|
|
// |
|
1492
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
1493
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
1494
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
1495
|
|
|
|
|
|
|
// |
|
1496
|
|
|
|
|
|
|
// UniLib version: 3.3.0 |
|
1497
|
|
|
|
|
|
|
// Unicode version: 15.0.0 |
|
1498
|
|
|
|
|
|
|
|
|
1499
|
|
|
|
|
|
|
namespace unilib { |
|
1500
|
|
|
|
|
|
|
|
|
1501
|
|
|
|
|
|
|
class utf8 { |
|
1502
|
|
|
|
|
|
|
public: |
|
1503
|
|
|
|
|
|
|
static bool valid(const char* str); |
|
1504
|
|
|
|
|
|
|
static bool valid(const char* str, size_t len); |
|
1505
|
|
|
|
|
|
|
static inline bool valid(const std::string& str); |
|
1506
|
|
|
|
|
|
|
|
|
1507
|
|
|
|
|
|
|
static inline char32_t decode(const char*& str); |
|
1508
|
|
|
|
|
|
|
static inline char32_t decode(const char*& str, size_t& len); |
|
1509
|
|
|
|
|
|
|
static inline char32_t first(const char* str); |
|
1510
|
|
|
|
|
|
|
static inline char32_t first(const char* str, size_t len); |
|
1511
|
|
|
|
|
|
|
static inline char32_t first(const std::string& str); |
|
1512
|
|
|
|
|
|
|
|
|
1513
|
|
|
|
|
|
|
static void decode(const char* str, std::u32string& decoded); |
|
1514
|
|
|
|
|
|
|
static void decode(const char* str, size_t len, std::u32string& decoded); |
|
1515
|
|
|
|
|
|
|
static inline void decode(const std::string& str, std::u32string& decoded); |
|
1516
|
|
|
|
|
|
|
|
|
1517
|
|
|
|
|
|
|
class string_decoder { |
|
1518
|
|
|
|
|
|
|
public: |
|
1519
|
|
|
|
|
|
|
class iterator; |
|
1520
|
|
|
|
|
|
|
inline iterator begin(); |
|
1521
|
|
|
|
|
|
|
inline iterator end(); |
|
1522
|
|
|
|
|
|
|
private: |
|
1523
|
|
|
|
|
|
|
inline string_decoder(const char* str); |
|
1524
|
|
|
|
|
|
|
const char* str; |
|
1525
|
|
|
|
|
|
|
friend class utf8; |
|
1526
|
|
|
|
|
|
|
}; |
|
1527
|
|
|
|
|
|
|
static inline string_decoder decoder(const char* str); |
|
1528
|
|
|
|
|
|
|
static inline string_decoder decoder(const std::string& str); |
|
1529
|
|
|
|
|
|
|
|
|
1530
|
|
|
|
|
|
|
class buffer_decoder { |
|
1531
|
|
|
|
|
|
|
public: |
|
1532
|
|
|
|
|
|
|
class iterator; |
|
1533
|
|
|
|
|
|
|
inline iterator begin(); |
|
1534
|
|
|
|
|
|
|
inline iterator end(); |
|
1535
|
|
|
|
|
|
|
private: |
|
1536
|
|
|
|
|
|
|
inline buffer_decoder(const char* str, size_t len); |
|
1537
|
|
|
|
|
|
|
const char* str; |
|
1538
|
|
|
|
|
|
|
size_t len; |
|
1539
|
|
|
|
|
|
|
friend class utf8; |
|
1540
|
|
|
|
|
|
|
}; |
|
1541
|
|
|
|
|
|
|
static inline buffer_decoder decoder(const char* str, size_t len); |
|
1542
|
|
|
|
|
|
|
|
|
1543
|
|
|
|
|
|
|
static inline void append(char*& str, char32_t chr); |
|
1544
|
|
|
|
|
|
|
static inline void append(std::string& str, char32_t chr); |
|
1545
|
|
|
|
|
|
|
static void encode(const std::u32string& str, std::string& encoded); |
|
1546
|
|
|
|
|
|
|
|
|
1547
|
|
|
|
|
|
|
template static void map(F f, const char* str, std::string& result); |
|
1548
|
|
|
|
|
|
|
template static void map(F f, const char* str, size_t len, std::string& result); |
|
1549
|
|
|
|
|
|
|
template static void map(F f, const std::string& str, std::string& result); |
|
1550
|
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
private: |
|
1552
|
|
|
|
|
|
|
static const char REPLACEMENT_CHAR = '?'; |
|
1553
|
|
|
|
|
|
|
}; |
|
1554
|
|
|
|
|
|
|
|
|
1555
|
|
|
|
|
|
|
bool utf8::valid(const std::string& str) { |
|
1556
|
|
|
|
|
|
|
return valid(str.c_str()); |
|
1557
|
|
|
|
|
|
|
} |
|
1558
|
|
|
|
|
|
|
|
|
1559
|
68
|
|
|
|
|
|
char32_t utf8::decode(const char*& str) { |
|
1560
|
68
|
100
|
|
|
|
|
if (((unsigned char)*str) < 0x80) return (unsigned char)*str++; |
|
1561
|
10
|
50
|
|
|
|
|
else if (((unsigned char)*str) < 0xC0) return ++str, REPLACEMENT_CHAR; |
|
1562
|
10
|
50
|
|
|
|
|
else if (((unsigned char)*str) < 0xE0) { |
|
1563
|
10
|
|
|
|
|
|
char32_t res = (((unsigned char)*str++) & 0x1F) << 6; |
|
1564
|
10
|
50
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
|
50
|
|
|
|
|
|
|
1565
|
10
|
|
|
|
|
|
return res + (((unsigned char)*str++) & 0x3F); |
|
1566
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF0) { |
|
1567
|
0
|
|
|
|
|
|
char32_t res = (((unsigned char)*str++) & 0x0F) << 12; |
|
1568
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
|
0
|
|
|
|
|
|
|
1569
|
0
|
|
|
|
|
|
res += (((unsigned char)*str++) & 0x3F) << 6; |
|
1570
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
|
0
|
|
|
|
|
|
|
1571
|
0
|
|
|
|
|
|
return res + (((unsigned char)*str++) & 0x3F); |
|
1572
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF8) { |
|
1573
|
0
|
|
|
|
|
|
char32_t res = (((unsigned char)*str++) & 0x07) << 18; |
|
1574
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
|
0
|
|
|
|
|
|
|
1575
|
0
|
|
|
|
|
|
res += (((unsigned char)*str++) & 0x3F) << 12; |
|
1576
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
|
0
|
|
|
|
|
|
|
1577
|
0
|
|
|
|
|
|
res += (((unsigned char)*str++) & 0x3F) << 6; |
|
1578
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
|
0
|
|
|
|
|
|
|
1579
|
0
|
|
|
|
|
|
return res + (((unsigned char)*str++) & 0x3F); |
|
1580
|
0
|
|
|
|
|
|
} else return ++str, REPLACEMENT_CHAR; |
|
1581
|
|
|
|
|
|
|
} |
|
1582
|
|
|
|
|
|
|
|
|
1583
|
218
|
|
|
|
|
|
char32_t utf8::decode(const char*& str, size_t& len) { |
|
1584
|
218
|
50
|
|
|
|
|
if (!len) return 0; |
|
1585
|
218
|
|
|
|
|
|
--len; |
|
1586
|
218
|
100
|
|
|
|
|
if (((unsigned char)*str) < 0x80) return (unsigned char)*str++; |
|
1587
|
23
|
50
|
|
|
|
|
else if (((unsigned char)*str) < 0xC0) return ++str, REPLACEMENT_CHAR; |
|
1588
|
23
|
50
|
|
|
|
|
else if (((unsigned char)*str) < 0xE0) { |
|
1589
|
23
|
|
|
|
|
|
char32_t res = (((unsigned char)*str++) & 0x1F) << 6; |
|
1590
|
23
|
50
|
|
|
|
|
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
1591
|
23
|
|
|
|
|
|
return res + ((--len, ((unsigned char)*str++)) & 0x3F); |
|
1592
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF0) { |
|
1593
|
0
|
|
|
|
|
|
char32_t res = (((unsigned char)*str++) & 0x0F) << 12; |
|
1594
|
0
|
0
|
|
|
|
|
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1595
|
0
|
|
|
|
|
|
res += ((--len, ((unsigned char)*str++)) & 0x3F) << 6; |
|
1596
|
0
|
0
|
|
|
|
|
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1597
|
0
|
|
|
|
|
|
return res + ((--len, ((unsigned char)*str++)) & 0x3F); |
|
1598
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF8) { |
|
1599
|
0
|
|
|
|
|
|
char32_t res = (((unsigned char)*str++) & 0x07) << 18; |
|
1600
|
0
|
0
|
|
|
|
|
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1601
|
0
|
|
|
|
|
|
res += ((--len, ((unsigned char)*str++)) & 0x3F) << 12; |
|
1602
|
0
|
0
|
|
|
|
|
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1603
|
0
|
|
|
|
|
|
res += ((--len, ((unsigned char)*str++)) & 0x3F) << 6; |
|
1604
|
0
|
0
|
|
|
|
|
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1605
|
0
|
|
|
|
|
|
return res + ((--len, ((unsigned char)*str++)) & 0x3F); |
|
1606
|
0
|
|
|
|
|
|
} else return ++str, REPLACEMENT_CHAR; |
|
1607
|
|
|
|
|
|
|
} |
|
1608
|
|
|
|
|
|
|
|
|
1609
|
|
|
|
|
|
|
char32_t utf8::first(const char* str) { |
|
1610
|
|
|
|
|
|
|
return decode(str); |
|
1611
|
|
|
|
|
|
|
} |
|
1612
|
|
|
|
|
|
|
|
|
1613
|
|
|
|
|
|
|
char32_t utf8::first(const char* str, size_t len) { |
|
1614
|
|
|
|
|
|
|
return decode(str, len); |
|
1615
|
|
|
|
|
|
|
} |
|
1616
|
|
|
|
|
|
|
|
|
1617
|
|
|
|
|
|
|
char32_t utf8::first(const std::string& str) { |
|
1618
|
|
|
|
|
|
|
return first(str.c_str()); |
|
1619
|
|
|
|
|
|
|
} |
|
1620
|
|
|
|
|
|
|
|
|
1621
|
|
|
|
|
|
|
void utf8::decode(const std::string& str, std::u32string& decoded) { |
|
1622
|
|
|
|
|
|
|
decode(str.c_str(), decoded); |
|
1623
|
|
|
|
|
|
|
} |
|
1624
|
|
|
|
|
|
|
|
|
1625
|
|
|
|
|
|
|
class utf8::string_decoder::iterator : public std::iterator { |
|
1626
|
|
|
|
|
|
|
public: |
|
1627
|
0
|
|
|
|
|
|
iterator(const char* str) : codepoint(0), next(str) { operator++(); } |
|
1628
|
|
|
|
|
|
|
iterator(const iterator& it) : codepoint(it.codepoint), next(it.next) {} |
|
1629
|
0
|
0
|
|
|
|
|
iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; } |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1630
|
|
|
|
|
|
|
iterator operator++(int) { iterator tmp(*this); operator++(); return tmp; } |
|
1631
|
|
|
|
|
|
|
bool operator==(const iterator& other) const { return next == other.next; } |
|
1632
|
|
|
|
|
|
|
bool operator!=(const iterator& other) const { return next != other.next; } |
|
1633
|
|
|
|
|
|
|
const char32_t& operator*() { return codepoint; } |
|
1634
|
|
|
|
|
|
|
private: |
|
1635
|
|
|
|
|
|
|
char32_t codepoint; |
|
1636
|
|
|
|
|
|
|
const char* next; |
|
1637
|
|
|
|
|
|
|
}; |
|
1638
|
|
|
|
|
|
|
|
|
1639
|
|
|
|
|
|
|
utf8::string_decoder::string_decoder(const char* str) : str(str) {} |
|
1640
|
|
|
|
|
|
|
|
|
1641
|
|
|
|
|
|
|
utf8::string_decoder::iterator utf8::string_decoder::begin() { |
|
1642
|
|
|
|
|
|
|
return iterator(str); |
|
1643
|
|
|
|
|
|
|
} |
|
1644
|
|
|
|
|
|
|
|
|
1645
|
|
|
|
|
|
|
utf8::string_decoder::iterator utf8::string_decoder::end() { |
|
1646
|
|
|
|
|
|
|
return iterator(nullptr); |
|
1647
|
|
|
|
|
|
|
} |
|
1648
|
|
|
|
|
|
|
|
|
1649
|
|
|
|
|
|
|
utf8::string_decoder utf8::decoder(const char* str) { |
|
1650
|
|
|
|
|
|
|
return string_decoder(str); |
|
1651
|
|
|
|
|
|
|
} |
|
1652
|
|
|
|
|
|
|
|
|
1653
|
|
|
|
|
|
|
utf8::string_decoder utf8::decoder(const std::string& str) { |
|
1654
|
|
|
|
|
|
|
return string_decoder(str.c_str()); |
|
1655
|
|
|
|
|
|
|
} |
|
1656
|
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
class utf8::buffer_decoder::iterator : public std::iterator { |
|
1658
|
|
|
|
|
|
|
public: |
|
1659
|
|
|
|
|
|
|
iterator(const char* str, size_t len) : codepoint(0), next(str), len(len) { operator++(); } |
|
1660
|
|
|
|
|
|
|
iterator(const iterator& it) : codepoint(it.codepoint), next(it.next), len(it.len) {} |
|
1661
|
|
|
|
|
|
|
iterator& operator++() { if (!len) next = nullptr; if (next) codepoint = decode(next, len); return *this; } |
|
1662
|
|
|
|
|
|
|
iterator operator++(int) { iterator tmp(*this); operator++(); return tmp; } |
|
1663
|
|
|
|
|
|
|
bool operator==(const iterator& other) const { return next == other.next; } |
|
1664
|
|
|
|
|
|
|
bool operator!=(const iterator& other) const { return next != other.next; } |
|
1665
|
|
|
|
|
|
|
const char32_t& operator*() { return codepoint; } |
|
1666
|
|
|
|
|
|
|
private: |
|
1667
|
|
|
|
|
|
|
char32_t codepoint; |
|
1668
|
|
|
|
|
|
|
const char* next; |
|
1669
|
|
|
|
|
|
|
size_t len; |
|
1670
|
|
|
|
|
|
|
}; |
|
1671
|
|
|
|
|
|
|
|
|
1672
|
|
|
|
|
|
|
utf8::buffer_decoder::buffer_decoder(const char* str, size_t len) : str(str), len(len) {} |
|
1673
|
|
|
|
|
|
|
|
|
1674
|
|
|
|
|
|
|
utf8::buffer_decoder::iterator utf8::buffer_decoder::begin() { |
|
1675
|
|
|
|
|
|
|
return iterator(str, len); |
|
1676
|
|
|
|
|
|
|
} |
|
1677
|
|
|
|
|
|
|
|
|
1678
|
|
|
|
|
|
|
utf8::buffer_decoder::iterator utf8::buffer_decoder::end() { |
|
1679
|
|
|
|
|
|
|
return iterator(nullptr, 0); |
|
1680
|
|
|
|
|
|
|
} |
|
1681
|
|
|
|
|
|
|
|
|
1682
|
|
|
|
|
|
|
utf8::buffer_decoder utf8::decoder(const char* str, size_t len) { |
|
1683
|
|
|
|
|
|
|
return buffer_decoder(str, len); |
|
1684
|
|
|
|
|
|
|
} |
|
1685
|
|
|
|
|
|
|
|
|
1686
|
|
|
|
|
|
|
void utf8::append(char*& str, char32_t chr) { |
|
1687
|
|
|
|
|
|
|
if (chr < 0x80) *str++ = chr; |
|
1688
|
|
|
|
|
|
|
else if (chr < 0x800) { *str++ = 0xC0 + (chr >> 6); *str++ = 0x80 + (chr & 0x3F); } |
|
1689
|
|
|
|
|
|
|
else if (chr < 0x10000) { *str++ = 0xE0 + (chr >> 12); *str++ = 0x80 + ((chr >> 6) & 0x3F); *str++ = 0x80 + (chr & 0x3F); } |
|
1690
|
|
|
|
|
|
|
else if (chr < 0x200000) { *str++ = 0xF0 + (chr >> 18); *str++ = 0x80 + ((chr >> 12) & 0x3F); *str++ = 0x80 + ((chr >> 6) & 0x3F); *str++ = 0x80 + (chr & 0x3F); } |
|
1691
|
|
|
|
|
|
|
else *str++ = REPLACEMENT_CHAR; |
|
1692
|
|
|
|
|
|
|
} |
|
1693
|
|
|
|
|
|
|
|
|
1694
|
4
|
|
|
|
|
|
void utf8::append(std::string& str, char32_t chr) { |
|
1695
|
4
|
50
|
|
|
|
|
if (chr < 0x80) str += chr; |
|
1696
|
0
|
0
|
|
|
|
|
else if (chr < 0x800) { str += 0xC0 + (chr >> 6); str += 0x80 + (chr & 0x3F); } |
|
1697
|
0
|
0
|
|
|
|
|
else if (chr < 0x10000) { str += 0xE0 + (chr >> 12); str += 0x80 + ((chr >> 6) & 0x3F); str += 0x80 + (chr & 0x3F); } |
|
1698
|
0
|
0
|
|
|
|
|
else if (chr < 0x200000) { str += 0xF0 + (chr >> 18); str += 0x80 + ((chr >> 12) & 0x3F); str += 0x80 + ((chr >> 6) & 0x3F); str += 0x80 + (chr & 0x3F); } |
|
1699
|
|
|
|
|
|
|
else str += REPLACEMENT_CHAR; |
|
1700
|
4
|
|
|
|
|
|
} |
|
1701
|
|
|
|
|
|
|
|
|
1702
|
0
|
|
|
|
|
|
template void utf8::map(F f, const char* str, std::string& result) { |
|
1703
|
|
|
|
|
|
|
result.clear(); |
|
1704
|
|
|
|
|
|
|
|
|
1705
|
0
|
0
|
|
|
|
|
for (char32_t chr; (chr = decode(str)); ) |
|
1706
|
0
|
|
|
|
|
|
append(result, f(chr)); |
|
1707
|
0
|
|
|
|
|
|
} |
|
1708
|
|
|
|
|
|
|
|
|
1709
|
0
|
|
|
|
|
|
template void utf8::map(F f, const char* str, size_t len, std::string& result) { |
|
1710
|
|
|
|
|
|
|
result.clear(); |
|
1711
|
|
|
|
|
|
|
|
|
1712
|
0
|
0
|
|
|
|
|
while (len) |
|
1713
|
0
|
|
|
|
|
|
append(result, f(decode(str, len))); |
|
1714
|
0
|
|
|
|
|
|
} |
|
1715
|
|
|
|
|
|
|
|
|
1716
|
|
|
|
|
|
|
template void utf8::map(F f, const std::string& str, std::string& result) { |
|
1717
|
0
|
|
|
|
|
|
map(f, str.c_str(), result); |
|
1718
|
|
|
|
|
|
|
} |
|
1719
|
|
|
|
|
|
|
|
|
1720
|
|
|
|
|
|
|
} // namespace unilib |
|
1721
|
|
|
|
|
|
|
|
|
1722
|
|
|
|
|
|
|
///////// |
|
1723
|
|
|
|
|
|
|
// File: utils/parse_int.h |
|
1724
|
|
|
|
|
|
|
///////// |
|
1725
|
|
|
|
|
|
|
|
|
1726
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
|
1727
|
|
|
|
|
|
|
// |
|
1728
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
1729
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
1730
|
|
|
|
|
|
|
// |
|
1731
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
1732
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
1733
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
1734
|
|
|
|
|
|
|
|
|
1735
|
|
|
|
|
|
|
namespace utils { |
|
1736
|
|
|
|
|
|
|
|
|
1737
|
|
|
|
|
|
|
// |
|
1738
|
|
|
|
|
|
|
// Declarations |
|
1739
|
|
|
|
|
|
|
// |
|
1740
|
|
|
|
|
|
|
|
|
1741
|
|
|
|
|
|
|
// Try to parse an int from given string. If the int cannot be parsed or does |
|
1742
|
|
|
|
|
|
|
// not fit into int, false is returned and the error string is filled using the |
|
1743
|
|
|
|
|
|
|
// value_name argument. |
|
1744
|
|
|
|
|
|
|
inline bool parse_int(string_piece str, const char* value_name, int& value, string& error); |
|
1745
|
|
|
|
|
|
|
|
|
1746
|
|
|
|
|
|
|
// Try to parse an int from given string. If the int cannot be parsed or does |
|
1747
|
|
|
|
|
|
|
// not fit into int, an error is displayed and program exits. |
|
1748
|
|
|
|
|
|
|
inline int parse_int(string_piece str, const char* value_name); |
|
1749
|
|
|
|
|
|
|
|
|
1750
|
|
|
|
|
|
|
// |
|
1751
|
|
|
|
|
|
|
// Definitions |
|
1752
|
|
|
|
|
|
|
// |
|
1753
|
|
|
|
|
|
|
|
|
1754
|
0
|
|
|
|
|
|
bool parse_int(string_piece str, const char* value_name, int& value, string& error) { |
|
1755
|
|
|
|
|
|
|
string_piece original = str; |
|
1756
|
|
|
|
|
|
|
|
|
1757
|
|
|
|
|
|
|
// Skip spaces |
|
1758
|
0
|
0
|
|
|
|
|
while (str.len && (str.str[0] == ' ' || str.str[0] == '\f' || str.str[0] == '\n' || str.str[0] == '\r' || str.str[0] == '\t' || str.str[0] == '\v')) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1759
|
0
|
|
|
|
|
|
str.str++, str.len--; |
|
1760
|
|
|
|
|
|
|
|
|
1761
|
|
|
|
|
|
|
// Allow minus |
|
1762
|
|
|
|
|
|
|
bool positive = true; |
|
1763
|
0
|
0
|
|
|
|
|
if (str.len && (str.str[0] == '+' || str.str[0] == '-')) { |
|
|
|
0
|
|
|
|
|
|
|
1764
|
|
|
|
|
|
|
positive = str.str[0] == '+'; |
|
1765
|
0
|
|
|
|
|
|
str.str++, str.len--; |
|
1766
|
|
|
|
|
|
|
} |
|
1767
|
|
|
|
|
|
|
|
|
1768
|
|
|
|
|
|
|
// Parse value, checking for overflow/underflow |
|
1769
|
0
|
0
|
|
|
|
|
if (!str.len) return error.assign("Cannot parse ").append(value_name).append(" int value '").append(original.str, original.len).append("': empty string."), false; |
|
1770
|
|
|
|
|
|
|
if (!(str.str[0] >= '0' || str.str[0] <= '9')) return error.assign("Cannot parse ").append(value_name).append(" int value '").append(original.str, original.len).append("': non-digit character found."), false; |
|
1771
|
|
|
|
|
|
|
|
|
1772
|
0
|
|
|
|
|
|
value = 0; |
|
1773
|
0
|
0
|
|
|
|
|
while (str.len && str.str[0] >= '0' && str.str[0] <= '9') { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1774
|
0
|
0
|
|
|
|
|
if (positive) { |
|
1775
|
0
|
0
|
|
|
|
|
if (value > (numeric_limits::max() - (str.str[0] - '0')) / 10) |
|
1776
|
0
|
|
|
|
|
|
return error.assign("Cannot parse ").append(value_name).append(" int value '").append(original.str, original.len).append("': overflow occured."), false; |
|
1777
|
0
|
|
|
|
|
|
value = 10 * value + (str.str[0] - '0'); |
|
1778
|
|
|
|
|
|
|
} else { |
|
1779
|
0
|
0
|
|
|
|
|
if (value < (numeric_limits::min() + (str.str[0] - '0')) / 10) |
|
1780
|
0
|
|
|
|
|
|
return error.assign("Cannot parse ").append(value_name).append(" int value '").append(original.str, original.len).append("': underflow occured."), false; |
|
1781
|
0
|
|
|
|
|
|
value = 10 * value - (str.str[0] - '0'); |
|
1782
|
|
|
|
|
|
|
} |
|
1783
|
0
|
|
|
|
|
|
str.str++, str.len--; |
|
1784
|
|
|
|
|
|
|
} |
|
1785
|
|
|
|
|
|
|
|
|
1786
|
|
|
|
|
|
|
// Skip spaces |
|
1787
|
0
|
0
|
|
|
|
|
while (str.len && (str.str[0] == ' ' || str.str[0] == '\f' || str.str[0] == '\n' || str.str[0] == '\r' || str.str[0] == '\t' || str.str[0] == '\v')) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1788
|
0
|
|
|
|
|
|
str.str++, str.len--; |
|
1789
|
|
|
|
|
|
|
|
|
1790
|
|
|
|
|
|
|
// Check for remaining characters |
|
1791
|
0
|
0
|
|
|
|
|
if (str.len) return error.assign("Cannot parse ").append(value_name).append(" int value '").append(original.str, original.len).append("': non-digit character found."), false; |
|
1792
|
|
|
|
|
|
|
|
|
1793
|
|
|
|
|
|
|
return true; |
|
1794
|
|
|
|
|
|
|
} |
|
1795
|
|
|
|
|
|
|
|
|
1796
|
0
|
|
|
|
|
|
int parse_int(string_piece str, const char* value_name) { |
|
1797
|
|
|
|
|
|
|
int result; |
|
1798
|
|
|
|
|
|
|
string error; |
|
1799
|
|
|
|
|
|
|
|
|
1800
|
0
|
0
|
|
|
|
|
if (!parse_int(str, value_name, result, error)) |
|
|
|
0
|
|
|
|
|
|
|
1801
|
0
|
|
|
|
|
|
runtime_failure(error); |
|
1802
|
|
|
|
|
|
|
|
|
1803
|
0
|
|
|
|
|
|
return result; |
|
1804
|
|
|
|
|
|
|
} |
|
1805
|
|
|
|
|
|
|
|
|
1806
|
|
|
|
|
|
|
} // namespace utils |
|
1807
|
|
|
|
|
|
|
|
|
1808
|
|
|
|
|
|
|
///////// |
|
1809
|
|
|
|
|
|
|
// File: utils/path_from_utf8.h |
|
1810
|
|
|
|
|
|
|
///////// |
|
1811
|
|
|
|
|
|
|
|
|
1812
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
|
1813
|
|
|
|
|
|
|
// |
|
1814
|
|
|
|
|
|
|
// Copyright 2022 Institute of Formal and Applied Linguistics, Faculty of |
|
1815
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
1816
|
|
|
|
|
|
|
// |
|
1817
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
1818
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
1819
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
1820
|
|
|
|
|
|
|
|
|
1821
|
|
|
|
|
|
|
namespace utils { |
|
1822
|
|
|
|
|
|
|
|
|
1823
|
|
|
|
|
|
|
// |
|
1824
|
|
|
|
|
|
|
// Declarations |
|
1825
|
|
|
|
|
|
|
// |
|
1826
|
|
|
|
|
|
|
|
|
1827
|
|
|
|
|
|
|
#ifdef _WIN32 |
|
1828
|
|
|
|
|
|
|
inline wstring path_from_utf8(const char* str); |
|
1829
|
|
|
|
|
|
|
inline wstring path_from_utf8(const string& str); |
|
1830
|
|
|
|
|
|
|
#else |
|
1831
|
|
|
|
|
|
|
inline string path_from_utf8(const char* str); |
|
1832
|
|
|
|
|
|
|
inline const string& path_from_utf8(const string& str); |
|
1833
|
|
|
|
|
|
|
#endif |
|
1834
|
|
|
|
|
|
|
|
|
1835
|
|
|
|
|
|
|
// |
|
1836
|
|
|
|
|
|
|
// Definitions |
|
1837
|
|
|
|
|
|
|
// |
|
1838
|
|
|
|
|
|
|
|
|
1839
|
|
|
|
|
|
|
#ifdef _WIN32 |
|
1840
|
|
|
|
|
|
|
|
|
1841
|
|
|
|
|
|
|
inline wstring path_from_utf8(const char* str) { |
|
1842
|
|
|
|
|
|
|
// We could implement this using codecvt_utf8_utf16, but it is not available |
|
1843
|
|
|
|
|
|
|
// in GCC 4.9, which we still use. We could also use MultiByteToWideChar, |
|
1844
|
|
|
|
|
|
|
// but using it would require changing our build infrastructure -- hence |
|
1845
|
|
|
|
|
|
|
// we implement the conversion manually. |
|
1846
|
|
|
|
|
|
|
wstring wstr; |
|
1847
|
|
|
|
|
|
|
while (*str) { |
|
1848
|
|
|
|
|
|
|
char32_t chr; |
|
1849
|
|
|
|
|
|
|
if (((unsigned char)*str) < 0x80) chr = (unsigned char)*str++; |
|
1850
|
|
|
|
|
|
|
else if (((unsigned char)*str) < 0xC0) chr = '?', ++str; |
|
1851
|
|
|
|
|
|
|
else if (((unsigned char)*str) < 0xE0) { |
|
1852
|
|
|
|
|
|
|
chr = (((unsigned char)*str++) & 0x1F) << 6; |
|
1853
|
|
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) chr = '?'; |
|
1854
|
|
|
|
|
|
|
else chr += ((unsigned char)*str++) & 0x3F; |
|
1855
|
|
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF0) { |
|
1856
|
|
|
|
|
|
|
chr = (((unsigned char)*str++) & 0x0F) << 12; |
|
1857
|
|
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) chr = '?'; |
|
1858
|
|
|
|
|
|
|
else { |
|
1859
|
|
|
|
|
|
|
chr += (((unsigned char)*str++) & 0x3F) << 6; |
|
1860
|
|
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) chr = '?'; |
|
1861
|
|
|
|
|
|
|
else chr += ((unsigned char)*str++) & 0x3F; |
|
1862
|
|
|
|
|
|
|
} |
|
1863
|
|
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF8) { |
|
1864
|
|
|
|
|
|
|
chr = (((unsigned char)*str++) & 0x07) << 18; |
|
1865
|
|
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) chr = '?'; |
|
1866
|
|
|
|
|
|
|
else { |
|
1867
|
|
|
|
|
|
|
chr += (((unsigned char)*str++) & 0x3F) << 12; |
|
1868
|
|
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) chr = '?'; |
|
1869
|
|
|
|
|
|
|
else { |
|
1870
|
|
|
|
|
|
|
chr += (((unsigned char)*str++) & 0x3F) << 6; |
|
1871
|
|
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) chr = '?'; |
|
1872
|
|
|
|
|
|
|
else chr += ((unsigned char)*str++) & 0x3F; |
|
1873
|
|
|
|
|
|
|
} |
|
1874
|
|
|
|
|
|
|
} |
|
1875
|
|
|
|
|
|
|
} else chr = '?', ++str; |
|
1876
|
|
|
|
|
|
|
|
|
1877
|
|
|
|
|
|
|
if (chr <= 0xFFFF) wstr.push_back(chr); |
|
1878
|
|
|
|
|
|
|
else if (chr <= 0x10FFFF) { |
|
1879
|
|
|
|
|
|
|
wstr.push_back(0xD800 + ((chr - 0x10000) >> 10)); |
|
1880
|
|
|
|
|
|
|
wstr.push_back(0xDC00 + ((chr - 0x10000) & 0x3FF)); |
|
1881
|
|
|
|
|
|
|
} else { |
|
1882
|
|
|
|
|
|
|
wstr.push_back('?'); |
|
1883
|
|
|
|
|
|
|
} |
|
1884
|
|
|
|
|
|
|
} |
|
1885
|
|
|
|
|
|
|
return wstr; |
|
1886
|
|
|
|
|
|
|
} |
|
1887
|
|
|
|
|
|
|
|
|
1888
|
|
|
|
|
|
|
inline wstring path_from_utf8(const string& str) { |
|
1889
|
|
|
|
|
|
|
return path_from_utf8(str.c_str()); |
|
1890
|
|
|
|
|
|
|
} |
|
1891
|
|
|
|
|
|
|
|
|
1892
|
|
|
|
|
|
|
#else |
|
1893
|
|
|
|
|
|
|
|
|
1894
|
|
|
|
|
|
|
inline string path_from_utf8(const char* str) { |
|
1895
|
1
|
|
|
|
|
|
return str; |
|
1896
|
|
|
|
|
|
|
} |
|
1897
|
|
|
|
|
|
|
|
|
1898
|
|
|
|
|
|
|
inline const string& path_from_utf8(const string& str) { |
|
1899
|
|
|
|
|
|
|
return str; |
|
1900
|
|
|
|
|
|
|
} |
|
1901
|
|
|
|
|
|
|
|
|
1902
|
|
|
|
|
|
|
#endif |
|
1903
|
|
|
|
|
|
|
|
|
1904
|
|
|
|
|
|
|
} // namespace utils |
|
1905
|
|
|
|
|
|
|
|
|
1906
|
|
|
|
|
|
|
///////// |
|
1907
|
|
|
|
|
|
|
// File: utils/split.h |
|
1908
|
|
|
|
|
|
|
///////// |
|
1909
|
|
|
|
|
|
|
|
|
1910
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
|
1911
|
|
|
|
|
|
|
// |
|
1912
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
1913
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
1914
|
|
|
|
|
|
|
// |
|
1915
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
1916
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
1917
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
1918
|
|
|
|
|
|
|
|
|
1919
|
|
|
|
|
|
|
namespace utils { |
|
1920
|
|
|
|
|
|
|
|
|
1921
|
|
|
|
|
|
|
// |
|
1922
|
|
|
|
|
|
|
// Declarations |
|
1923
|
|
|
|
|
|
|
// |
|
1924
|
|
|
|
|
|
|
|
|
1925
|
|
|
|
|
|
|
// Split given text on the separator character. |
|
1926
|
|
|
|
|
|
|
inline void split(const string& text, char sep, vector& tokens); |
|
1927
|
|
|
|
|
|
|
inline void split(string_piece text, char sep, vector& tokens); |
|
1928
|
|
|
|
|
|
|
|
|
1929
|
|
|
|
|
|
|
// |
|
1930
|
|
|
|
|
|
|
// Definitions |
|
1931
|
|
|
|
|
|
|
// |
|
1932
|
|
|
|
|
|
|
|
|
1933
|
0
|
|
|
|
|
|
void split(const string& text, char sep, vector& tokens) { |
|
1934
|
0
|
|
|
|
|
|
tokens.clear(); |
|
1935
|
0
|
0
|
|
|
|
|
if (text.empty()) return; |
|
1936
|
|
|
|
|
|
|
|
|
1937
|
0
|
|
|
|
|
|
string::size_type index = 0; |
|
1938
|
0
|
0
|
|
|
|
|
for (string::size_type next; (next = text.find(sep, index)) != string::npos; index = next + 1) |
|
1939
|
0
|
|
|
|
|
|
tokens.emplace_back(text, index, next - index); |
|
1940
|
|
|
|
|
|
|
|
|
1941
|
0
|
|
|
|
|
|
tokens.emplace_back(text, index); |
|
1942
|
|
|
|
|
|
|
} |
|
1943
|
|
|
|
|
|
|
|
|
1944
|
|
|
|
|
|
|
void split(string_piece text, char sep, vector& tokens) { |
|
1945
|
|
|
|
|
|
|
tokens.clear(); |
|
1946
|
|
|
|
|
|
|
if (!text.len) return; |
|
1947
|
|
|
|
|
|
|
|
|
1948
|
|
|
|
|
|
|
const char* str = text.str; |
|
1949
|
|
|
|
|
|
|
for (const char* next; (next = (const char*) memchr(str, sep, text.str + text.len - str)); str = next + 1) |
|
1950
|
|
|
|
|
|
|
tokens.emplace_back(str, next - str); |
|
1951
|
|
|
|
|
|
|
|
|
1952
|
|
|
|
|
|
|
tokens.emplace_back(str, text.str + text.len - str); |
|
1953
|
|
|
|
|
|
|
} |
|
1954
|
|
|
|
|
|
|
|
|
1955
|
|
|
|
|
|
|
} // namespace utils |
|
1956
|
|
|
|
|
|
|
|
|
1957
|
|
|
|
|
|
|
///////// |
|
1958
|
|
|
|
|
|
|
// File: utils/url_detector.h |
|
1959
|
|
|
|
|
|
|
///////// |
|
1960
|
|
|
|
|
|
|
|
|
1961
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
|
1962
|
|
|
|
|
|
|
// |
|
1963
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
1964
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
1965
|
|
|
|
|
|
|
// |
|
1966
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
1967
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
1968
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
1969
|
|
|
|
|
|
|
|
|
1970
|
|
|
|
|
|
|
namespace utils { |
|
1971
|
|
|
|
|
|
|
|
|
1972
|
|
|
|
|
|
|
class url_detector { |
|
1973
|
|
|
|
|
|
|
public: |
|
1974
|
|
|
|
|
|
|
enum url_type { NO_URL = 0, URL = 1, EMAIL = 2 }; |
|
1975
|
|
|
|
|
|
|
|
|
1976
|
|
|
|
|
|
|
// Detect whether given string is an url or an email. |
|
1977
|
|
|
|
|
|
|
// If length==nullptr, the whole str must match. |
|
1978
|
|
|
|
|
|
|
// If length!=nullptr, length of longest matching prefix is returned. |
|
1979
|
|
|
|
|
|
|
static url_type detect(string_piece str, size_t* length = nullptr); |
|
1980
|
|
|
|
|
|
|
}; |
|
1981
|
|
|
|
|
|
|
|
|
1982
|
|
|
|
|
|
|
} // namespace utils |
|
1983
|
|
|
|
|
|
|
|
|
1984
|
|
|
|
|
|
|
///////// |
|
1985
|
|
|
|
|
|
|
// File: features/feature_processor_instances.cpp |
|
1986
|
|
|
|
|
|
|
///////// |
|
1987
|
|
|
|
|
|
|
|
|
1988
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
1989
|
|
|
|
|
|
|
// |
|
1990
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
1991
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
1992
|
|
|
|
|
|
|
// |
|
1993
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
1994
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
1995
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
1996
|
|
|
|
|
|
|
|
|
1997
|
|
|
|
|
|
|
// Helper functions defined as macros so that they can access arguments without passing them |
|
1998
|
|
|
|
|
|
|
#define apply_in_window(I, Feature) apply_in_range(I, Feature, -window, window) |
|
1999
|
|
|
|
|
|
|
|
|
2000
|
|
|
|
|
|
|
#define apply_in_range(I, Feature, Left, Right) { \ |
|
2001
|
|
|
|
|
|
|
ner_feature _feature = (Feature); \ |
|
2002
|
|
|
|
|
|
|
if (_feature != ner_feature_unknown) \ |
|
2003
|
|
|
|
|
|
|
for (int _w = int(I) + (Left) < 0 ? 0 : int(I) + (Left), \ |
|
2004
|
|
|
|
|
|
|
_end = int(I) + (Right) + 1 < int(sentence.size) ? int(I) + (Right) + 1 : sentence.size; \ |
|
2005
|
|
|
|
|
|
|
_w < _end; _w++) \ |
|
2006
|
|
|
|
|
|
|
sentence.features[_w].emplace_back(_feature + _w - int(I)); \ |
|
2007
|
|
|
|
|
|
|
} |
|
2008
|
|
|
|
|
|
|
|
|
2009
|
|
|
|
|
|
|
#define apply_outer_words_in_window(Feature) { \ |
|
2010
|
|
|
|
|
|
|
ner_feature _outer_feature = (Feature); \ |
|
2011
|
|
|
|
|
|
|
if (_outer_feature != ner_feature_unknown) \ |
|
2012
|
|
|
|
|
|
|
for (int _i = 1; _i <= window; _i++) { \ |
|
2013
|
|
|
|
|
|
|
apply_in_window(-_i, _outer_feature); \ |
|
2014
|
|
|
|
|
|
|
apply_in_window(sentence.size - 1 + _i, _outer_feature); \ |
|
2015
|
|
|
|
|
|
|
} \ |
|
2016
|
|
|
|
|
|
|
} |
|
2017
|
|
|
|
|
|
|
|
|
2018
|
|
|
|
|
|
|
#define lookup_empty() /* lookup(string()) always returns */(window) |
|
2019
|
|
|
|
|
|
|
|
|
2020
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////// |
|
2021
|
|
|
|
|
|
|
// Feature processor instances (ordered lexicographically) // |
|
2022
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////// |
|
2023
|
|
|
|
|
|
|
namespace feature_processors { |
|
2024
|
|
|
|
|
|
|
|
|
2025
|
|
|
|
|
|
|
// BrownClusters |
|
2026
|
0
|
|
|
|
|
|
class brown_clusters : public feature_processor { |
|
2027
|
|
|
|
|
|
|
public: |
|
2028
|
0
|
|
|
|
|
|
virtual bool parse(int window, const vector& args, entity_map& entities, |
|
2029
|
|
|
|
|
|
|
ner_feature* total_features, const nlp_pipeline& pipeline) override { |
|
2030
|
0
|
0
|
|
|
|
|
if (!feature_processor::parse(window, args, entities, total_features, pipeline)) return false; |
|
2031
|
0
|
0
|
|
|
|
|
if (args.size() < 1) return cerr << "BrownCluster requires a cluster file as the first argument!" << endl, false; |
|
2032
|
|
|
|
|
|
|
|
|
2033
|
0
|
|
|
|
|
|
ifstream in(path_from_utf8(args[0]).c_str()); |
|
2034
|
0
|
0
|
|
|
|
|
if (!in.is_open()) return cerr << "Cannot open Brown clusters file '" << args[0] << "'!" << endl, false; |
|
2035
|
|
|
|
|
|
|
|
|
2036
|
|
|
|
|
|
|
vector substrings; |
|
2037
|
0
|
0
|
|
|
|
|
substrings.emplace_back(string::npos); |
|
2038
|
0
|
0
|
|
|
|
|
for (unsigned i = 1; i < args.size(); i++) { |
|
2039
|
0
|
0
|
|
|
|
|
int len = parse_int(args[i].c_str(), "BrownCluster_prefix_length"); |
|
2040
|
0
|
0
|
|
|
|
|
if (len <= 0) |
|
2041
|
0
|
0
|
|
|
|
|
return cerr << "Wrong prefix length '" << len << "' in BrownCluster specification!" << endl, false; |
|
2042
|
|
|
|
|
|
|
else |
|
2043
|
0
|
0
|
|
|
|
|
substrings.emplace_back(len); |
|
2044
|
|
|
|
|
|
|
} |
|
2045
|
|
|
|
|
|
|
|
|
2046
|
0
|
|
|
|
|
|
clusters.clear(); |
|
2047
|
|
|
|
|
|
|
unordered_map cluster_map; |
|
2048
|
|
|
|
|
|
|
unordered_map prefixes_map; |
|
2049
|
|
|
|
|
|
|
string line; |
|
2050
|
0
|
|
|
|
|
|
vector tokens; |
|
2051
|
0
|
0
|
|
|
|
|
while (getline(in, line)) { |
|
|
|
0
|
|
|
|
|
|
|
2052
|
0
|
0
|
|
|
|
|
split(line, '\t', tokens); |
|
2053
|
0
|
0
|
|
|
|
|
if (tokens.size() != 2) return cerr << "Wrong line '" << line << "' in Brown cluster file '" << args[0] << "'!" << endl, false; |
|
2054
|
|
|
|
|
|
|
|
|
2055
|
|
|
|
|
|
|
string cluster = tokens[0], form = tokens[1]; |
|
2056
|
|
|
|
|
|
|
auto it = cluster_map.find(cluster); |
|
2057
|
0
|
0
|
|
|
|
|
if (it == cluster_map.end()) { |
|
2058
|
0
|
|
|
|
|
|
unsigned id = clusters.size(); |
|
2059
|
0
|
0
|
|
|
|
|
clusters.emplace_back(); |
|
2060
|
0
|
0
|
|
|
|
|
for (auto&& substring : substrings) |
|
2061
|
0
|
0
|
|
|
|
|
if (substring == string::npos || substring < cluster.size()) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2062
|
0
|
0
|
|
|
|
|
clusters.back().emplace_back(prefixes_map.emplace(cluster.substr(0, substring), *total_features + (2*window + 1) * (int)prefixes_map.size() + window).first->second); |
|
|
|
0
|
|
|
|
|
|
|
2063
|
|
|
|
|
|
|
it = cluster_map.emplace(cluster, id).first; |
|
2064
|
|
|
|
|
|
|
} |
|
2065
|
0
|
0
|
|
|
|
|
if (!map.emplace(form, it->second).second) return cerr << "Form '" << form << "' is present twice in Brown cluster file '" << args[0] << "'!" << endl, false; |
|
2066
|
|
|
|
|
|
|
} |
|
2067
|
|
|
|
|
|
|
|
|
2068
|
0
|
|
|
|
|
|
*total_features += (2*window + 1) * prefixes_map.size(); |
|
2069
|
0
|
|
|
|
|
|
return true; |
|
2070
|
|
|
|
|
|
|
} |
|
2071
|
|
|
|
|
|
|
|
|
2072
|
0
|
|
|
|
|
|
virtual void load(binary_decoder& data, const nlp_pipeline& pipeline) override { |
|
2073
|
0
|
|
|
|
|
|
feature_processor::load(data, pipeline); |
|
2074
|
|
|
|
|
|
|
|
|
2075
|
0
|
|
|
|
|
|
clusters.resize(data.next_4B()); |
|
2076
|
0
|
0
|
|
|
|
|
for (auto&& cluster : clusters) { |
|
2077
|
0
|
|
|
|
|
|
cluster.resize(data.next_4B()); |
|
2078
|
0
|
0
|
|
|
|
|
for (auto&& feature : cluster) |
|
2079
|
0
|
|
|
|
|
|
feature = data.next_4B(); |
|
2080
|
|
|
|
|
|
|
} |
|
2081
|
0
|
|
|
|
|
|
} |
|
2082
|
|
|
|
|
|
|
|
|
2083
|
0
|
|
|
|
|
|
virtual void save(binary_encoder& enc) override { |
|
2084
|
0
|
|
|
|
|
|
feature_processor::save(enc); |
|
2085
|
|
|
|
|
|
|
|
|
2086
|
0
|
|
|
|
|
|
enc.add_4B(clusters.size()); |
|
2087
|
0
|
0
|
|
|
|
|
for (auto&& cluster : clusters) { |
|
2088
|
0
|
|
|
|
|
|
enc.add_4B(cluster.size()); |
|
2089
|
0
|
0
|
|
|
|
|
for (auto&& feature : cluster) |
|
2090
|
0
|
|
|
|
|
|
enc.add_4B(feature); |
|
2091
|
|
|
|
|
|
|
} |
|
2092
|
0
|
|
|
|
|
|
} |
|
2093
|
|
|
|
|
|
|
|
|
2094
|
0
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* /*total_features*/, string& /*buffer*/) const override { |
|
2095
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
|
2096
|
0
|
|
|
|
|
|
auto it = map.find(sentence.words[i].raw_lemma); |
|
2097
|
0
|
0
|
|
|
|
|
if (it != map.end()) { |
|
2098
|
0
|
|
|
|
|
|
auto& cluster = clusters[it->second]; |
|
2099
|
0
|
0
|
|
|
|
|
for (auto&& feature : cluster) |
|
2100
|
0
|
0
|
|
|
|
|
apply_in_window(i, feature); |
|
|
|
0
|
|
|
|
|
|
|
2101
|
|
|
|
|
|
|
} |
|
2102
|
|
|
|
|
|
|
} |
|
2103
|
0
|
|
|
|
|
|
} |
|
2104
|
|
|
|
|
|
|
|
|
2105
|
|
|
|
|
|
|
private: |
|
2106
|
|
|
|
|
|
|
vector> clusters; |
|
2107
|
|
|
|
|
|
|
}; |
|
2108
|
|
|
|
|
|
|
|
|
2109
|
|
|
|
|
|
|
// CzechAddContainers |
|
2110
|
0
|
|
|
|
|
|
class czech_add_containers : public feature_processor { |
|
2111
|
|
|
|
|
|
|
public: |
|
2112
|
0
|
|
|
|
|
|
virtual bool parse(int window, const vector& args, entity_map& entities, ner_feature* total_features, const nlp_pipeline& pipeline) override { |
|
2113
|
0
|
0
|
|
|
|
|
if (window) return cerr << "CzechAddContainers cannot have non-zero window!" << endl, false; |
|
2114
|
|
|
|
|
|
|
|
|
2115
|
0
|
|
|
|
|
|
return feature_processor::parse(window, args, entities, total_features, pipeline); |
|
2116
|
|
|
|
|
|
|
} |
|
2117
|
|
|
|
|
|
|
|
|
2118
|
0
|
|
|
|
|
|
virtual void process_entities(ner_sentence& /*sentence*/, vector& entities, vector& buffer) const override { |
|
2119
|
0
|
|
|
|
|
|
buffer.clear(); |
|
2120
|
|
|
|
|
|
|
|
|
2121
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < entities.size(); i++) { |
|
2122
|
|
|
|
|
|
|
// P if ps+ pf+ |
|
2123
|
0
|
0
|
|
|
|
|
if (entities[i].type.compare("pf") == 0 && (!i || entities[i-1].start + entities[i-1].length < entities[i].start || entities[i-1].type.compare("pf") != 0)) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2124
|
0
|
|
|
|
|
|
unsigned j = i + 1; |
|
2125
|
0
|
0
|
|
|
|
|
while (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("pf") == 0) j++; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2126
|
0
|
0
|
|
|
|
|
if (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("ps") == 0) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2127
|
0
|
|
|
|
|
|
j++; |
|
2128
|
0
|
0
|
|
|
|
|
while (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("ps") == 0) j++; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2129
|
0
|
|
|
|
|
|
buffer.emplace_back(entities[i].start, entities[j - 1].start + entities[j - 1].length - entities[i].start, "P"); |
|
2130
|
|
|
|
|
|
|
} |
|
2131
|
|
|
|
|
|
|
} |
|
2132
|
|
|
|
|
|
|
|
|
2133
|
|
|
|
|
|
|
// T if td tm ty | td tm |
|
2134
|
0
|
0
|
|
|
|
|
if (entities[i].type.compare("td") == 0 && i+1 < entities.size() && entities[i+1].start == entities[i].start + entities[i].length && entities[i+1].type.compare("tm") == 0) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2135
|
0
|
|
|
|
|
|
unsigned j = i + 2; |
|
2136
|
0
|
0
|
|
|
|
|
if (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("ty") == 0) j++; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2137
|
0
|
|
|
|
|
|
buffer.emplace_back(entities[i].start, entities[j - 1].start + entities[j - 1].length - entities[i].start, "T"); |
|
2138
|
|
|
|
|
|
|
} |
|
2139
|
|
|
|
|
|
|
// T if !td tm ty |
|
2140
|
0
|
0
|
|
|
|
|
if (entities[i].type.compare("tm") == 0 && (!i || entities[i-1].start + entities[i-1].length < entities[i].start || entities[i-1].type.compare("td") != 0)) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2141
|
0
|
0
|
|
|
|
|
if (i+1 < entities.size() && entities[i+1].start == entities[i].start + entities[i].length && entities[i+1].type.compare("ty") == 0) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2142
|
0
|
|
|
|
|
|
buffer.emplace_back(entities[i].start, entities[i + 1].start + entities[i + 1].length - entities[i].start, "T"); |
|
2143
|
|
|
|
|
|
|
|
|
2144
|
0
|
|
|
|
|
|
buffer.push_back(entities[i]); |
|
2145
|
|
|
|
|
|
|
} |
|
2146
|
|
|
|
|
|
|
|
|
2147
|
0
|
0
|
|
|
|
|
if (buffer.size() > entities.size()) entities = buffer; |
|
2148
|
0
|
|
|
|
|
|
} |
|
2149
|
|
|
|
|
|
|
|
|
2150
|
|
|
|
|
|
|
// CzechAddContainers used to be entity_processor which had empty load and save methods. |
|
2151
|
0
|
|
|
|
|
|
virtual void load(binary_decoder& /*data*/, const nlp_pipeline& /*pipeline*/) override {} |
|
2152
|
0
|
|
|
|
|
|
virtual void save(binary_encoder& /*enc*/) override {} |
|
2153
|
|
|
|
|
|
|
}; |
|
2154
|
|
|
|
|
|
|
|
|
2155
|
|
|
|
|
|
|
// CzechLemmaTerm |
|
2156
|
0
|
|
|
|
|
|
class czech_lemma_term : public feature_processor { |
|
2157
|
|
|
|
|
|
|
public: |
|
2158
|
0
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& buffer) const override { |
|
2159
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
|
2160
|
0
|
0
|
|
|
|
|
for (unsigned pos = 0; pos + 2 < sentence.words[i].lemma_comments.size(); pos++) |
|
2161
|
0
|
0
|
|
|
|
|
if (sentence.words[i].lemma_comments[pos] == '_' && sentence.words[i].lemma_comments[pos+1] == ';') { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2162
|
0
|
|
|
|
|
|
buffer.assign(1, sentence.words[i].lemma_comments[pos+2]); |
|
2163
|
0
|
0
|
|
|
|
|
apply_in_window(i, lookup(buffer, total_features)); |
|
|
|
0
|
|
|
|
|
|
|
2164
|
|
|
|
|
|
|
} |
|
2165
|
|
|
|
|
|
|
} |
|
2166
|
0
|
|
|
|
|
|
} |
|
2167
|
|
|
|
|
|
|
}; |
|
2168
|
|
|
|
|
|
|
|
|
2169
|
|
|
|
|
|
|
// Form |
|
2170
|
0
|
|
|
|
|
|
class form : public feature_processor { |
|
2171
|
|
|
|
|
|
|
public: |
|
2172
|
8
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& /*buffer*/) const override { |
|
2173
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) |
|
2174
|
54
|
100
|
|
|
|
|
apply_in_window(i, lookup(sentence.words[i].form, total_features)); |
|
|
|
100
|
|
|
|
|
|
|
2175
|
|
|
|
|
|
|
|
|
2176
|
36
|
50
|
|
|
|
|
apply_outer_words_in_window(lookup_empty()); |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
2177
|
4
|
|
|
|
|
|
} |
|
2178
|
|
|
|
|
|
|
}; |
|
2179
|
|
|
|
|
|
|
|
|
2180
|
|
|
|
|
|
|
// FormCapitalization |
|
2181
|
0
|
|
|
|
|
|
class form_capitalization : public feature_processor { |
|
2182
|
|
|
|
|
|
|
public: |
|
2183
|
0
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& buffer) const override { |
|
2184
|
|
|
|
|
|
|
using namespace unilib; |
|
2185
|
|
|
|
|
|
|
|
|
2186
|
0
|
|
|
|
|
|
ner_feature fst_cap = lookup(buffer.assign("f"), total_features); |
|
2187
|
0
|
|
|
|
|
|
ner_feature all_cap = lookup(buffer.assign("a"), total_features); |
|
2188
|
0
|
|
|
|
|
|
ner_feature mixed_cap = lookup(buffer.assign("m"), total_features); |
|
2189
|
|
|
|
|
|
|
|
|
2190
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
|
2191
|
|
|
|
|
|
|
bool was_upper = false, was_lower = false; |
|
2192
|
|
|
|
|
|
|
|
|
2193
|
0
|
|
|
|
|
|
auto* form = sentence.words[i].form.c_str(); |
|
2194
|
|
|
|
|
|
|
char32_t chr; |
|
2195
|
0
|
0
|
|
|
|
|
for (bool first = true; (chr = utf8::decode(form)); first = false) { |
|
2196
|
|
|
|
|
|
|
auto category = unicode::category(chr); |
|
2197
|
0
|
0
|
|
|
|
|
was_upper = was_upper || category & unicode::Lut; |
|
|
|
0
|
|
|
|
|
|
|
2198
|
0
|
0
|
|
|
|
|
was_lower = was_lower || category & unicode::Ll; |
|
|
|
0
|
|
|
|
|
|
|
2199
|
|
|
|
|
|
|
|
|
2200
|
0
|
0
|
|
|
|
|
if (first && was_upper) apply_in_window(i, fst_cap); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2201
|
|
|
|
|
|
|
} |
|
2202
|
0
|
0
|
|
|
|
|
if (was_upper && !was_lower) apply_in_window(i, all_cap); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2203
|
0
|
0
|
|
|
|
|
if (was_upper && was_lower) apply_in_window(i, mixed_cap); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2204
|
|
|
|
|
|
|
} |
|
2205
|
0
|
|
|
|
|
|
} |
|
2206
|
|
|
|
|
|
|
}; |
|
2207
|
|
|
|
|
|
|
|
|
2208
|
|
|
|
|
|
|
// FormCaseNormalized |
|
2209
|
0
|
|
|
|
|
|
class form_case_normalized : public feature_processor { |
|
2210
|
|
|
|
|
|
|
public: |
|
2211
|
0
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& buffer) const override { |
|
2212
|
|
|
|
|
|
|
using namespace unilib; |
|
2213
|
|
|
|
|
|
|
|
|
2214
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
|
2215
|
|
|
|
|
|
|
buffer.clear(); |
|
2216
|
0
|
0
|
|
|
|
|
for (auto&& chr : utf8::decoder(sentence.words[i].form)) |
|
2217
|
0
|
0
|
|
|
|
|
utf8::append(buffer, buffer.empty() ? chr : unicode::lowercase(chr)); |
|
2218
|
0
|
0
|
|
|
|
|
apply_in_window(i, lookup(buffer, total_features)); |
|
|
|
0
|
|
|
|
|
|
|
2219
|
|
|
|
|
|
|
} |
|
2220
|
|
|
|
|
|
|
|
|
2221
|
0
|
0
|
|
|
|
|
apply_outer_words_in_window(lookup_empty()); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2222
|
0
|
|
|
|
|
|
} |
|
2223
|
|
|
|
|
|
|
}; |
|
2224
|
|
|
|
|
|
|
|
|
2225
|
|
|
|
|
|
|
// Gazetteers |
|
2226
|
0
|
|
|
|
|
|
class gazetteers : public feature_processor { |
|
2227
|
|
|
|
|
|
|
public: |
|
2228
|
|
|
|
|
|
|
enum { G = 0, U = 1, B = 2, L = 3, I = 4 }; |
|
2229
|
|
|
|
|
|
|
|
|
2230
|
0
|
|
|
|
|
|
virtual bool parse(int window, const vector& args, entity_map& entities, |
|
2231
|
|
|
|
|
|
|
ner_feature* total_features, const nlp_pipeline& pipeline) override { |
|
2232
|
|
|
|
|
|
|
cerr << "The 'Gazetteers' feature template is deprecated, use 'GazetteersEnhanced' !" << endl; |
|
2233
|
|
|
|
|
|
|
|
|
2234
|
0
|
0
|
|
|
|
|
if (!feature_processor::parse(window, args, entities, total_features, pipeline)) return false; |
|
2235
|
|
|
|
|
|
|
|
|
2236
|
0
|
|
|
|
|
|
gazetteers_info.clear(); |
|
2237
|
0
|
0
|
|
|
|
|
for (auto&& arg : args) { |
|
2238
|
0
|
0
|
|
|
|
|
ifstream in(path_from_utf8(arg).c_str()); |
|
2239
|
0
|
0
|
|
|
|
|
if (!in.is_open()) return cerr << "Cannot open gazetteers file '" << arg << "'!" << endl, false; |
|
2240
|
|
|
|
|
|
|
|
|
2241
|
|
|
|
|
|
|
unsigned longest = 0; |
|
2242
|
|
|
|
|
|
|
string gazetteer; |
|
2243
|
|
|
|
|
|
|
string line; |
|
2244
|
0
|
|
|
|
|
|
vector tokens; |
|
2245
|
0
|
0
|
|
|
|
|
while (getline(in, line)) { |
|
|
|
0
|
|
|
|
|
|
|
2246
|
0
|
0
|
|
|
|
|
split(line, ' ', tokens); |
|
2247
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < tokens.size(); i++) |
|
2248
|
0
|
0
|
|
|
|
|
if (!tokens[i][0]) |
|
2249
|
0
|
|
|
|
|
|
tokens.erase(tokens.begin() + i--); |
|
2250
|
0
|
0
|
|
|
|
|
if (tokens.size() > longest) longest = tokens.size(); |
|
2251
|
|
|
|
|
|
|
|
|
2252
|
|
|
|
|
|
|
gazetteer.clear(); |
|
2253
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < tokens.size(); i++) { |
|
2254
|
0
|
0
|
|
|
|
|
if (i) gazetteer += ' '; |
|
2255
|
|
|
|
|
|
|
gazetteer += tokens[i]; |
|
2256
|
0
|
|
|
|
|
|
auto it = map.emplace(gazetteer, (int)gazetteers_info.size()).first; |
|
2257
|
0
|
0
|
|
|
|
|
if (it->second == gazetteers_info.size()) gazetteers_info.emplace_back(); |
|
|
|
0
|
|
|
|
|
|
|
2258
|
0
|
|
|
|
|
|
auto& info = gazetteers_info[it->second]; |
|
2259
|
0
|
0
|
|
|
|
|
if (i + 1 < tokens.size()) |
|
2260
|
0
|
|
|
|
|
|
info.prefix_of_longer |= true; |
|
2261
|
|
|
|
|
|
|
else |
|
2262
|
0
|
0
|
|
|
|
|
if (find(info.features.begin(), info.features.end(), *total_features + window) == info.features.end()) |
|
2263
|
0
|
0
|
|
|
|
|
info.features.emplace_back(*total_features + window); |
|
2264
|
|
|
|
|
|
|
} |
|
2265
|
|
|
|
|
|
|
} |
|
2266
|
0
|
0
|
|
|
|
|
*total_features += (2*window + 1) * (longest == 0 ? 0 : longest == 1 ? U+1 : longest == 2 ? L+1 : I+1); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2267
|
|
|
|
|
|
|
} |
|
2268
|
|
|
|
|
|
|
|
|
2269
|
|
|
|
|
|
|
return true; |
|
2270
|
|
|
|
|
|
|
} |
|
2271
|
|
|
|
|
|
|
|
|
2272
|
0
|
|
|
|
|
|
virtual void load(binary_decoder& data, const nlp_pipeline& pipeline) override { |
|
2273
|
0
|
|
|
|
|
|
feature_processor::load(data, pipeline); |
|
2274
|
|
|
|
|
|
|
|
|
2275
|
0
|
|
|
|
|
|
gazetteers_info.resize(data.next_4B()); |
|
2276
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer : gazetteers_info) { |
|
2277
|
0
|
|
|
|
|
|
gazetteer.prefix_of_longer = data.next_1B(); |
|
2278
|
0
|
|
|
|
|
|
gazetteer.features.resize(data.next_1B()); |
|
2279
|
0
|
0
|
|
|
|
|
for (auto&& feature : gazetteer.features) |
|
2280
|
0
|
|
|
|
|
|
feature = data.next_4B(); |
|
2281
|
|
|
|
|
|
|
} |
|
2282
|
0
|
|
|
|
|
|
} |
|
2283
|
|
|
|
|
|
|
|
|
2284
|
0
|
|
|
|
|
|
virtual void save(binary_encoder& enc) override { |
|
2285
|
0
|
|
|
|
|
|
feature_processor::save(enc); |
|
2286
|
|
|
|
|
|
|
|
|
2287
|
0
|
|
|
|
|
|
enc.add_4B(gazetteers_info.size()); |
|
2288
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer : gazetteers_info) { |
|
2289
|
0
|
|
|
|
|
|
enc.add_1B(gazetteer.prefix_of_longer); |
|
2290
|
0
|
|
|
|
|
|
enc.add_1B(gazetteer.features.size()); |
|
2291
|
0
|
0
|
|
|
|
|
for (auto&& feature : gazetteer.features) |
|
2292
|
0
|
|
|
|
|
|
enc.add_4B(feature); |
|
2293
|
|
|
|
|
|
|
} |
|
2294
|
0
|
|
|
|
|
|
} |
|
2295
|
|
|
|
|
|
|
|
|
2296
|
0
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* /*total_features*/, string& buffer) const override { |
|
2297
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
|
2298
|
0
|
|
|
|
|
|
auto it = map.find(sentence.words[i].raw_lemma); |
|
2299
|
0
|
0
|
|
|
|
|
if (it == map.end()) continue; |
|
2300
|
|
|
|
|
|
|
|
|
2301
|
|
|
|
|
|
|
// Apply regular gazetteer feature G + unigram gazetteer feature U |
|
2302
|
0
|
0
|
|
|
|
|
for (auto&& feature : gazetteers_info[it->second].features) { |
|
2303
|
0
|
0
|
|
|
|
|
apply_in_window(i, feature + G * (2*window + 1)); |
|
|
|
0
|
|
|
|
|
|
|
2304
|
0
|
0
|
|
|
|
|
apply_in_window(i, feature + U * (2*window + 1)); |
|
|
|
0
|
|
|
|
|
|
|
2305
|
|
|
|
|
|
|
} |
|
2306
|
|
|
|
|
|
|
|
|
2307
|
0
|
0
|
|
|
|
|
for (unsigned j = i + 1; gazetteers_info[it->second].prefix_of_longer && j < sentence.size; j++) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2308
|
0
|
0
|
|
|
|
|
if (j == i + 1) buffer.assign(sentence.words[i].raw_lemma); |
|
2309
|
|
|
|
|
|
|
buffer += ' '; |
|
2310
|
0
|
|
|
|
|
|
buffer += sentence.words[j].raw_lemma; |
|
2311
|
|
|
|
|
|
|
it = map.find(buffer); |
|
2312
|
0
|
0
|
|
|
|
|
if (it == map.end()) break; |
|
2313
|
|
|
|
|
|
|
|
|
2314
|
|
|
|
|
|
|
// Apply regular gazetteer feature G + position specific gazetteers B, I, L |
|
2315
|
0
|
0
|
|
|
|
|
for (auto&& feature : gazetteers_info[it->second].features) |
|
2316
|
0
|
0
|
|
|
|
|
for (unsigned g = i; g <= j; g++) { |
|
2317
|
0
|
0
|
|
|
|
|
apply_in_window(g, feature + G * (2*window + 1)); |
|
|
|
0
|
|
|
|
|
|
|
2318
|
0
|
0
|
|
|
|
|
apply_in_window(g, feature + (g == i ? B : g == j ? L : I) * (2*window + 1)); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2319
|
|
|
|
|
|
|
} |
|
2320
|
|
|
|
|
|
|
} |
|
2321
|
|
|
|
|
|
|
} |
|
2322
|
0
|
|
|
|
|
|
} |
|
2323
|
|
|
|
|
|
|
|
|
2324
|
|
|
|
|
|
|
private: |
|
2325
|
0
|
|
|
|
|
|
struct gazetteer_info { |
|
2326
|
|
|
|
|
|
|
vector features; |
|
2327
|
|
|
|
|
|
|
bool prefix_of_longer; |
|
2328
|
|
|
|
|
|
|
}; |
|
2329
|
|
|
|
|
|
|
vector gazetteers_info; |
|
2330
|
|
|
|
|
|
|
}; |
|
2331
|
|
|
|
|
|
|
|
|
2332
|
|
|
|
|
|
|
// GazetteersEnhanced |
|
2333
|
0
|
|
|
|
|
|
class gazetteers_enhanced : public feature_processor { |
|
2334
|
|
|
|
|
|
|
public: |
|
2335
|
|
|
|
|
|
|
enum { G = 0, U = 1, B = 2, L = 3, I = 4, TOTAL = 5 }; |
|
2336
|
|
|
|
|
|
|
|
|
2337
|
0
|
|
|
|
|
|
virtual bool parse(int window, const vector& args, entity_map& entities, |
|
2338
|
|
|
|
|
|
|
ner_feature* total_features, const nlp_pipeline& pipeline) override { |
|
2339
|
0
|
0
|
|
|
|
|
if (!feature_processor::parse(window, args, entities, total_features, pipeline)) return false; |
|
2340
|
|
|
|
|
|
|
|
|
2341
|
0
|
|
|
|
|
|
gazetteer_metas.clear(); |
|
2342
|
|
|
|
|
|
|
gazetteer_lists.clear(); |
|
2343
|
|
|
|
|
|
|
|
|
2344
|
0
|
0
|
|
|
|
|
if (args.size() < 4) return cerr << "Not enough parameters to GazetteersEnhanced!" << endl, false; |
|
2345
|
0
|
0
|
|
|
|
|
if (args.size() & 1) return cerr << "Odd number of parameters to GazetteersEnhanced!" << endl, false; |
|
2346
|
|
|
|
|
|
|
|
|
2347
|
0
|
0
|
|
|
|
|
if (args[0] == "form") match = MATCH_FORM; |
|
2348
|
0
|
0
|
|
|
|
|
else if (args[0] == "rawlemma") match = MATCH_RAWLEMMA; |
|
2349
|
0
|
0
|
|
|
|
|
else if (args[0] == "rawlemmas") match = MATCH_RAWLEMMAS; |
|
2350
|
0
|
|
|
|
|
|
else return cerr << "First parameter of GazetteersEnhanced not one of form/rawlemma/rawlemmas!" << endl, false; |
|
2351
|
|
|
|
|
|
|
|
|
2352
|
0
|
0
|
|
|
|
|
if (args[1] == "embed_in_model") embed = EMBED_IN_MODEL; |
|
2353
|
0
|
0
|
|
|
|
|
else if (args[1] == "out_of_model") embed = OUT_OF_MODEL; |
|
2354
|
0
|
|
|
|
|
|
else return cerr << "Second parameter of GazetteersEnhanced not one of [embed_in|out_of]_model!" << endl, false; |
|
2355
|
|
|
|
|
|
|
|
|
2356
|
0
|
0
|
|
|
|
|
for (unsigned i = 2; i < args.size(); i += 2) { |
|
2357
|
0
|
|
|
|
|
|
gazetteer_metas.emplace_back(); |
|
2358
|
0
|
|
|
|
|
|
gazetteer_metas.back().basename = args[i]; |
|
2359
|
0
|
|
|
|
|
|
gazetteer_metas.back().feature = *total_features + window; *total_features += TOTAL * (2 * window + 1); |
|
2360
|
0
|
0
|
|
|
|
|
gazetteer_metas.back().entity = args[i + 1] == "NONE" ? -1 : entities.parse(args[i + 1].c_str(), true); |
|
2361
|
|
|
|
|
|
|
} |
|
2362
|
|
|
|
|
|
|
|
|
2363
|
0
|
|
|
|
|
|
entity_list.clear(); |
|
2364
|
0
|
0
|
|
|
|
|
for (entity_type i = 0; i < entities.size(); i++) |
|
2365
|
0
|
|
|
|
|
|
entity_list.push_back(entities.name(i)); |
|
2366
|
|
|
|
|
|
|
|
|
2367
|
0
|
0
|
|
|
|
|
if (!load_gazetteer_lists(pipeline, embed == EMBED_IN_MODEL)) return false; |
|
2368
|
|
|
|
|
|
|
|
|
2369
|
0
|
|
|
|
|
|
return true; |
|
2370
|
|
|
|
|
|
|
} |
|
2371
|
|
|
|
|
|
|
|
|
2372
|
0
|
|
|
|
|
|
virtual void load(binary_decoder& data, const nlp_pipeline& pipeline) override { |
|
2373
|
0
|
|
|
|
|
|
feature_processor::load(data, pipeline); |
|
2374
|
|
|
|
|
|
|
|
|
2375
|
0
|
|
|
|
|
|
match = data.next_4B(); |
|
2376
|
0
|
|
|
|
|
|
embed = OUT_OF_MODEL; |
|
2377
|
|
|
|
|
|
|
|
|
2378
|
0
|
|
|
|
|
|
gazetteer_metas.resize(data.next_4B()); |
|
2379
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer_meta : gazetteer_metas) { |
|
2380
|
0
|
|
|
|
|
|
data.next_str(gazetteer_meta.basename); |
|
2381
|
0
|
|
|
|
|
|
gazetteer_meta.feature = data.next_4B(); |
|
2382
|
0
|
|
|
|
|
|
gazetteer_meta.entity = data.next_4B(); |
|
2383
|
|
|
|
|
|
|
} |
|
2384
|
|
|
|
|
|
|
|
|
2385
|
0
|
|
|
|
|
|
gazetteer_lists.resize(data.next_4B()); |
|
2386
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer_list : gazetteer_lists) { |
|
2387
|
0
|
|
|
|
|
|
gazetteer_list.gazetteers.resize(data.next_4B()); |
|
2388
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer : gazetteer_list.gazetteers) |
|
2389
|
0
|
|
|
|
|
|
data.next_str(gazetteer); |
|
2390
|
0
|
|
|
|
|
|
gazetteer_list.feature = data.next_4B(); |
|
2391
|
0
|
|
|
|
|
|
gazetteer_list.entity = data.next_4B(); |
|
2392
|
0
|
|
|
|
|
|
gazetteer_list.mode = data.next_4B(); |
|
2393
|
|
|
|
|
|
|
} |
|
2394
|
|
|
|
|
|
|
|
|
2395
|
0
|
|
|
|
|
|
entity_list.resize(data.next_4B()); |
|
2396
|
0
|
0
|
|
|
|
|
for (auto&& entity : entity_list) |
|
2397
|
0
|
|
|
|
|
|
data.next_str(entity); |
|
2398
|
|
|
|
|
|
|
|
|
2399
|
0
|
|
|
|
|
|
load_gazetteer_lists(pipeline, false); |
|
2400
|
0
|
|
|
|
|
|
} |
|
2401
|
|
|
|
|
|
|
|
|
2402
|
0
|
|
|
|
|
|
virtual void save(binary_encoder& enc) override { |
|
2403
|
0
|
|
|
|
|
|
feature_processor::save(enc); |
|
2404
|
|
|
|
|
|
|
|
|
2405
|
0
|
|
|
|
|
|
enc.add_4B(match); |
|
2406
|
|
|
|
|
|
|
|
|
2407
|
0
|
|
|
|
|
|
enc.add_4B(gazetteer_metas.size()); |
|
2408
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer_meta : gazetteer_metas) { |
|
2409
|
0
|
|
|
|
|
|
enc.add_str(gazetteer_meta.basename); |
|
2410
|
0
|
|
|
|
|
|
enc.add_4B(gazetteer_meta.feature); |
|
2411
|
0
|
|
|
|
|
|
enc.add_4B(gazetteer_meta.entity); |
|
2412
|
|
|
|
|
|
|
} |
|
2413
|
|
|
|
|
|
|
|
|
2414
|
0
|
0
|
|
|
|
|
if (embed == EMBED_IN_MODEL) { |
|
2415
|
0
|
|
|
|
|
|
enc.add_4B(gazetteer_lists.size()); |
|
2416
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer_list : gazetteer_lists) { |
|
2417
|
0
|
|
|
|
|
|
enc.add_4B(gazetteer_list.gazetteers.size()); |
|
2418
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer : gazetteer_list.gazetteers) |
|
2419
|
0
|
|
|
|
|
|
enc.add_str(gazetteer); |
|
2420
|
0
|
|
|
|
|
|
enc.add_4B(gazetteer_list.feature); |
|
2421
|
0
|
|
|
|
|
|
enc.add_4B(gazetteer_list.entity); |
|
2422
|
0
|
|
|
|
|
|
enc.add_4B(gazetteer_list.mode); |
|
2423
|
|
|
|
|
|
|
} |
|
2424
|
|
|
|
|
|
|
} else { |
|
2425
|
|
|
|
|
|
|
enc.add_4B(0); |
|
2426
|
|
|
|
|
|
|
} |
|
2427
|
|
|
|
|
|
|
|
|
2428
|
0
|
|
|
|
|
|
enc.add_4B(entity_list.size()); |
|
2429
|
0
|
0
|
|
|
|
|
for (auto&& entity : entity_list) |
|
2430
|
0
|
|
|
|
|
|
enc.add_str(entity); |
|
2431
|
0
|
|
|
|
|
|
} |
|
2432
|
|
|
|
|
|
|
|
|
2433
|
0
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* /*total_features*/, string& /*buffer*/) const override { |
|
2434
|
|
|
|
|
|
|
vector nodes, new_nodes; |
|
2435
|
0
|
0
|
|
|
|
|
vector> features(sentence.size); |
|
2436
|
|
|
|
|
|
|
|
|
2437
|
0
|
0
|
|
|
|
|
vector> recased_match_sources(sentence.size); |
|
2438
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) |
|
2439
|
0
|
0
|
|
|
|
|
recase_match_source(sentence.words[i], RECASE_ANY, recased_match_sources[i]); |
|
2440
|
|
|
|
|
|
|
|
|
2441
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
|
2442
|
|
|
|
|
|
|
unsigned hard_pre_length = 0, hard_pre_node = -1; |
|
2443
|
|
|
|
|
|
|
bool hard_pre_possible = true; |
|
2444
|
0
|
|
|
|
|
|
nodes.assign(1, 0); |
|
2445
|
0
|
0
|
|
|
|
|
for (unsigned j = i; j < sentence.size && !nodes.empty(); j++) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2446
|
|
|
|
|
|
|
new_nodes.clear(); |
|
2447
|
0
|
0
|
|
|
|
|
for (auto&& node : nodes) |
|
2448
|
0
|
0
|
|
|
|
|
if (!gazetteers_trie[node].children.empty()) |
|
2449
|
0
|
0
|
|
|
|
|
for (auto&& match_source : recased_match_sources[j]) { |
|
2450
|
0
|
|
|
|
|
|
auto range = gazetteers_trie[node].children.equal_range(match_source); |
|
2451
|
0
|
0
|
|
|
|
|
for (auto&& it = range.first; it != range.second; it++) |
|
2452
|
0
|
0
|
|
|
|
|
append_unless_exists(new_nodes, it->second); |
|
2453
|
|
|
|
|
|
|
} |
|
2454
|
|
|
|
|
|
|
|
|
2455
|
0
|
0
|
|
|
|
|
hard_pre_possible = hard_pre_possible && !sentence.probabilities[j].local_filled; |
|
|
|
0
|
|
|
|
|
|
|
2456
|
0
|
0
|
|
|
|
|
if (hard_pre_possible) |
|
2457
|
0
|
0
|
|
|
|
|
for (auto&& node : new_nodes) |
|
2458
|
0
|
0
|
|
|
|
|
if (gazetteers_trie[node].mode == HARD_PRE && |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2459
|
0
|
0
|
|
|
|
|
((j - i + 1) > hard_pre_length || node < hard_pre_node)) |
|
2460
|
0
|
|
|
|
|
|
hard_pre_length = j - i + 1, hard_pre_node = node; |
|
2461
|
|
|
|
|
|
|
|
|
2462
|
|
|
|
|
|
|
// Fill features |
|
2463
|
0
|
0
|
|
|
|
|
for (auto&& node : new_nodes) |
|
2464
|
0
|
0
|
|
|
|
|
for (auto&& feature : gazetteers_trie[node].features) |
|
2465
|
0
|
0
|
|
|
|
|
for (unsigned k = i; k <= j; k++) { |
|
2466
|
0
|
0
|
|
|
|
|
bilou_type type = j == i ? bilou_type_U : k == i ? bilou_type_B : k == j ? bilou_type_L : bilou_type_I; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2467
|
0
|
0
|
|
|
|
|
append_unless_exists(features[k], feature + G * (2 * window + 1)); |
|
2468
|
0
|
0
|
|
|
|
|
append_unless_exists(features[k], feature + type * (2 * window + 1)); |
|
2469
|
|
|
|
|
|
|
} |
|
2470
|
|
|
|
|
|
|
|
|
2471
|
|
|
|
|
|
|
nodes.swap(new_nodes); |
|
2472
|
|
|
|
|
|
|
} |
|
2473
|
|
|
|
|
|
|
|
|
2474
|
0
|
0
|
|
|
|
|
if (hard_pre_length) |
|
2475
|
0
|
0
|
|
|
|
|
for (unsigned j = i; j < i + hard_pre_length; j++) { |
|
2476
|
0
|
0
|
|
|
|
|
for (auto&& bilou : sentence.probabilities[j].local.bilou) { |
|
2477
|
0
|
|
|
|
|
|
bilou.probability = 0.; |
|
2478
|
0
|
|
|
|
|
|
bilou.entity = entity_type_unknown; |
|
2479
|
|
|
|
|
|
|
} |
|
2480
|
|
|
|
|
|
|
bilou_type type = hard_pre_length == 1 ? bilou_type_U : |
|
2481
|
0
|
0
|
|
|
|
|
j == i ? bilou_type_B : j + 1 == i + hard_pre_length ? bilou_type_L : bilou_type_I; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2482
|
0
|
|
|
|
|
|
sentence.probabilities[j].local.bilou[type].probability = 1.; |
|
2483
|
0
|
|
|
|
|
|
sentence.probabilities[j].local.bilou[type].entity = gazetteers_trie[hard_pre_node].entity; |
|
2484
|
0
|
|
|
|
|
|
sentence.probabilities[j].local_filled = true; |
|
2485
|
|
|
|
|
|
|
} |
|
2486
|
|
|
|
|
|
|
} |
|
2487
|
|
|
|
|
|
|
|
|
2488
|
|
|
|
|
|
|
// Apply generated features |
|
2489
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) |
|
2490
|
0
|
0
|
|
|
|
|
for (auto&& feature : features[i]) |
|
2491
|
0
|
0
|
|
|
|
|
apply_in_window(i, feature); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2492
|
0
|
|
|
|
|
|
} |
|
2493
|
|
|
|
|
|
|
|
|
2494
|
0
|
|
|
|
|
|
virtual void process_entities(ner_sentence& sentence, vector& entities, vector& buffer) const override { |
|
2495
|
|
|
|
|
|
|
vector nodes, new_nodes; |
|
2496
|
|
|
|
|
|
|
|
|
2497
|
0
|
0
|
|
|
|
|
vector> recased_match_sources(sentence.size); |
|
2498
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) |
|
2499
|
0
|
0
|
|
|
|
|
recase_match_source(sentence.words[i], RECASE_ANY, recased_match_sources[i]); |
|
2500
|
|
|
|
|
|
|
|
|
2501
|
0
|
|
|
|
|
|
buffer.clear(); |
|
2502
|
|
|
|
|
|
|
unsigned entity_until = 0; |
|
2503
|
0
|
0
|
|
|
|
|
for (unsigned i = 0, e = 0; i < sentence.size; i++) { |
|
2504
|
0
|
0
|
|
|
|
|
while (e < entities.size() && entities[e].start == i) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2505
|
0
|
0
|
|
|
|
|
if (i + entities[e].length > entity_until) |
|
2506
|
0
|
|
|
|
|
|
entity_until = i + entities[e].length; |
|
2507
|
0
|
0
|
|
|
|
|
buffer.push_back(entities[e++]); |
|
2508
|
|
|
|
|
|
|
} |
|
2509
|
|
|
|
|
|
|
|
|
2510
|
0
|
0
|
|
|
|
|
if (entity_until <= i) { |
|
2511
|
|
|
|
|
|
|
// There is place for a possible POST gazetteer |
|
2512
|
0
|
0
|
|
|
|
|
unsigned free_until = e < entities.size() ? entities[e].start : sentence.size; |
|
2513
|
|
|
|
|
|
|
|
|
2514
|
0
|
|
|
|
|
|
unsigned hard_post_length = 0, hard_post_node = -1; |
|
2515
|
0
|
|
|
|
|
|
nodes.assign(1, 0); |
|
2516
|
0
|
0
|
|
|
|
|
for (unsigned j = i; j < free_until && !nodes.empty(); j++) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2517
|
|
|
|
|
|
|
new_nodes.clear(); |
|
2518
|
0
|
0
|
|
|
|
|
for (auto&& node : nodes) |
|
2519
|
0
|
0
|
|
|
|
|
if (!gazetteers_trie[node].children.empty()) |
|
2520
|
0
|
0
|
|
|
|
|
for (auto&& match_source : recased_match_sources[j]) { |
|
2521
|
0
|
|
|
|
|
|
auto range = gazetteers_trie[node].children.equal_range(match_source); |
|
2522
|
0
|
0
|
|
|
|
|
for (auto&& it = range.first; it != range.second; it++) |
|
2523
|
0
|
0
|
|
|
|
|
append_unless_exists(new_nodes, it->second); |
|
2524
|
|
|
|
|
|
|
} |
|
2525
|
|
|
|
|
|
|
|
|
2526
|
0
|
0
|
|
|
|
|
for (auto&& node : new_nodes) |
|
2527
|
0
|
0
|
|
|
|
|
if (gazetteers_trie[node].mode == HARD_POST && |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2528
|
0
|
0
|
|
|
|
|
((j - i + 1) > hard_post_length || node < hard_post_node)) |
|
2529
|
0
|
|
|
|
|
|
hard_post_length = j - i + 1, hard_post_node = node; |
|
2530
|
|
|
|
|
|
|
|
|
2531
|
|
|
|
|
|
|
nodes.swap(new_nodes); |
|
2532
|
|
|
|
|
|
|
} |
|
2533
|
|
|
|
|
|
|
|
|
2534
|
0
|
0
|
|
|
|
|
if (hard_post_length) { |
|
2535
|
0
|
0
|
|
|
|
|
buffer.emplace_back(i, hard_post_length, entity_list[gazetteers_trie[hard_post_node].entity]); |
|
2536
|
0
|
|
|
|
|
|
entity_until = i + hard_post_length; |
|
2537
|
|
|
|
|
|
|
} |
|
2538
|
|
|
|
|
|
|
} |
|
2539
|
|
|
|
|
|
|
} |
|
2540
|
|
|
|
|
|
|
|
|
2541
|
0
|
0
|
|
|
|
|
if (buffer.size() != entities.size()) |
|
2542
|
|
|
|
|
|
|
entities.swap(buffer); |
|
2543
|
0
|
|
|
|
|
|
} |
|
2544
|
|
|
|
|
|
|
|
|
2545
|
0
|
|
|
|
|
|
virtual void gazetteers(vector& gazetteers, vector* gazetteer_types) const override { |
|
2546
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer_list : gazetteer_lists) |
|
2547
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer : gazetteer_list.gazetteers) { |
|
2548
|
0
|
0
|
|
|
|
|
gazetteers.push_back(gazetteer); |
|
2549
|
0
|
0
|
|
|
|
|
if (gazetteer_types) gazetteer_types->push_back(gazetteer_list.entity); |
|
|
|
0
|
|
|
|
|
|
|
2550
|
|
|
|
|
|
|
} |
|
2551
|
0
|
|
|
|
|
|
} |
|
2552
|
|
|
|
|
|
|
|
|
2553
|
|
|
|
|
|
|
private: |
|
2554
|
|
|
|
|
|
|
enum { MATCH_FORM = 0, MATCH_RAWLEMMA = 1, MATCH_RAWLEMMAS = 2 }; |
|
2555
|
|
|
|
|
|
|
int match; |
|
2556
|
|
|
|
|
|
|
|
|
2557
|
|
|
|
|
|
|
enum { EMBED_IN_MODEL = 0, OUT_OF_MODEL = 1 }; |
|
2558
|
|
|
|
|
|
|
int embed; |
|
2559
|
|
|
|
|
|
|
|
|
2560
|
|
|
|
|
|
|
enum { SOFT, HARD_PRE, HARD_POST, MODES_TOTAL }; |
|
2561
|
|
|
|
|
|
|
const static vector basename_suffixes; |
|
2562
|
|
|
|
|
|
|
|
|
2563
|
0
|
|
|
|
|
|
struct gazetteer_meta_info { |
|
2564
|
|
|
|
|
|
|
string basename; |
|
2565
|
|
|
|
|
|
|
ner_feature feature; |
|
2566
|
|
|
|
|
|
|
int entity; |
|
2567
|
|
|
|
|
|
|
}; |
|
2568
|
|
|
|
|
|
|
vector gazetteer_metas; |
|
2569
|
|
|
|
|
|
|
|
|
2570
|
0
|
|
|
|
|
|
struct gazetteer_list_info { |
|
2571
|
|
|
|
|
|
|
vector gazetteers; |
|
2572
|
|
|
|
|
|
|
ner_feature feature; |
|
2573
|
|
|
|
|
|
|
int entity; |
|
2574
|
|
|
|
|
|
|
int mode; |
|
2575
|
|
|
|
|
|
|
}; |
|
2576
|
|
|
|
|
|
|
vector gazetteer_lists; |
|
2577
|
|
|
|
|
|
|
|
|
2578
|
0
|
|
|
|
|
|
struct gazetteer_trie_node { |
|
2579
|
|
|
|
|
|
|
vector features; |
|
2580
|
|
|
|
|
|
|
unordered_multimap children; |
|
2581
|
|
|
|
|
|
|
int mode = SOFT, entity = -1; |
|
2582
|
|
|
|
|
|
|
}; |
|
2583
|
|
|
|
|
|
|
vector gazetteers_trie; |
|
2584
|
|
|
|
|
|
|
|
|
2585
|
|
|
|
|
|
|
vector entity_list; |
|
2586
|
|
|
|
|
|
|
|
|
2587
|
|
|
|
|
|
|
template |
|
2588
|
0
|
|
|
|
|
|
inline static void append_unless_exists(vector& array, T value) { |
|
2589
|
|
|
|
|
|
|
size_t i; |
|
2590
|
0
|
0
|
|
|
|
|
for (i = array.size(); i; i--) |
|
2591
|
0
|
0
|
|
|
|
|
if (array[i - 1] == value) |
|
2592
|
|
|
|
|
|
|
break; |
|
2593
|
|
|
|
|
|
|
|
|
2594
|
0
|
0
|
|
|
|
|
if (!i) |
|
2595
|
0
|
|
|
|
|
|
array.push_back(value); |
|
2596
|
0
|
|
|
|
|
|
} |
|
2597
|
|
|
|
|
|
|
|
|
2598
|
0
|
|
|
|
|
|
bool load_gazetteer_lists(const nlp_pipeline& pipeline, bool files_must_exist) { |
|
2599
|
|
|
|
|
|
|
string file_name, line; |
|
2600
|
|
|
|
|
|
|
|
|
2601
|
|
|
|
|
|
|
// Load raw gazetteers (maybe additional during inference) |
|
2602
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer_meta : gazetteer_metas) |
|
2603
|
0
|
0
|
|
|
|
|
for (int mode = 0; mode < MODES_TOTAL; mode++) { |
|
2604
|
0
|
|
|
|
|
|
file_name.assign(gazetteer_meta.basename).append(basename_suffixes[mode]); |
|
2605
|
|
|
|
|
|
|
|
|
2606
|
0
|
0
|
|
|
|
|
ifstream file(path_from_utf8(file_name).c_str()); |
|
2607
|
0
|
0
|
|
|
|
|
if (!file.is_open()) { |
|
2608
|
0
|
0
|
|
|
|
|
if (mode == SOFT && files_must_exist) |
|
2609
|
0
|
|
|
|
|
|
return cerr << "Cannot open gazetteers file '" << file_name << "'!" << endl, false; |
|
2610
|
0
|
|
|
|
|
|
continue; |
|
2611
|
|
|
|
|
|
|
} |
|
2612
|
|
|
|
|
|
|
|
|
2613
|
0
|
0
|
|
|
|
|
gazetteer_lists.emplace_back(); |
|
2614
|
0
|
|
|
|
|
|
gazetteer_lists.back().feature = gazetteer_meta.feature; |
|
2615
|
0
|
|
|
|
|
|
gazetteer_lists.back().entity = gazetteer_meta.entity; |
|
2616
|
0
|
|
|
|
|
|
gazetteer_lists.back().mode = mode; |
|
2617
|
|
|
|
|
|
|
|
|
2618
|
0
|
0
|
|
|
|
|
while (getline(file, line)) |
|
|
|
0
|
|
|
|
|
|
|
2619
|
0
|
0
|
|
|
|
|
if (!line.empty() && line[0] != '#') |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2620
|
0
|
0
|
|
|
|
|
gazetteer_lists.back().gazetteers.push_back(line); |
|
2621
|
|
|
|
|
|
|
} |
|
2622
|
|
|
|
|
|
|
|
|
2623
|
|
|
|
|
|
|
// Build the gazetteers_trie |
|
2624
|
|
|
|
|
|
|
unordered_map gazetteer_prefixes; |
|
2625
|
0
|
0
|
|
|
|
|
vector gazetteer_tokens, gazetteer_tokens_additional, gazetteer_token(1); |
|
2626
|
0
|
|
|
|
|
|
ner_sentence gazetteer_token_tagged; |
|
2627
|
0
|
|
|
|
|
|
vector gazetteer_recased_match_sources; |
|
2628
|
|
|
|
|
|
|
|
|
2629
|
|
|
|
|
|
|
gazetteers_trie.clear(); |
|
2630
|
0
|
0
|
|
|
|
|
gazetteers_trie.emplace_back(); |
|
2631
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer_list : gazetteer_lists) |
|
2632
|
0
|
0
|
|
|
|
|
for (auto&& gazetteer : gazetteer_list.gazetteers) { |
|
2633
|
0
|
0
|
|
|
|
|
pipeline.tokenizer->set_text(gazetteer); |
|
2634
|
0
|
0
|
|
|
|
|
if (!pipeline.tokenizer->next_sentence(&gazetteer_tokens, nullptr)) continue; |
|
|
|
0
|
|
|
|
|
|
|
2635
|
0
|
0
|
|
|
|
|
while (pipeline.tokenizer->next_sentence(&gazetteer_tokens_additional, nullptr)) |
|
|
|
0
|
|
|
|
|
|
|
2636
|
0
|
0
|
|
|
|
|
gazetteer_tokens.insert(gazetteer_tokens.end(), gazetteer_tokens_additional.begin(), gazetteer_tokens_additional.end()); |
|
2637
|
|
|
|
|
|
|
|
|
2638
|
|
|
|
|
|
|
unsigned node = 0; |
|
2639
|
|
|
|
|
|
|
string prefix; |
|
2640
|
0
|
0
|
|
|
|
|
for (unsigned token = 0; token < gazetteer_tokens.size(); token++) { |
|
2641
|
0
|
0
|
|
|
|
|
if (token) prefix.push_back('\t'); |
|
|
|
0
|
|
|
|
|
|
|
2642
|
0
|
0
|
|
|
|
|
prefix.append(gazetteer_tokens[token].str, gazetteer_tokens[token].len); |
|
2643
|
|
|
|
|
|
|
auto prefix_it = gazetteer_prefixes.find(prefix); |
|
2644
|
0
|
0
|
|
|
|
|
if (prefix_it == gazetteer_prefixes.end()) { |
|
2645
|
0
|
|
|
|
|
|
unsigned new_node = gazetteers_trie.size(); |
|
2646
|
0
|
0
|
|
|
|
|
gazetteers_trie.emplace_back(); |
|
2647
|
|
|
|
|
|
|
gazetteer_prefixes.emplace(prefix, new_node); |
|
2648
|
|
|
|
|
|
|
|
|
2649
|
0
|
|
|
|
|
|
gazetteer_token[0] = string_piece(gazetteer_tokens[token]); |
|
2650
|
0
|
0
|
|
|
|
|
pipeline.tagger->tag(gazetteer_token, gazetteer_token_tagged); |
|
2651
|
0
|
0
|
|
|
|
|
recase_match_source(gazetteer_token_tagged.words[0], RECASE_NATIVE, gazetteer_recased_match_sources); |
|
2652
|
0
|
0
|
|
|
|
|
for (auto&& match_source : gazetteer_recased_match_sources) |
|
2653
|
0
|
|
|
|
|
|
gazetteers_trie[node].children.emplace(match_source, new_node); |
|
2654
|
|
|
|
|
|
|
|
|
2655
|
0
|
|
|
|
|
|
node = new_node; |
|
2656
|
|
|
|
|
|
|
} else { |
|
2657
|
0
|
|
|
|
|
|
node = prefix_it->second; |
|
2658
|
|
|
|
|
|
|
} |
|
2659
|
|
|
|
|
|
|
} |
|
2660
|
|
|
|
|
|
|
|
|
2661
|
0
|
0
|
|
|
|
|
append_unless_exists(gazetteers_trie[node].features, gazetteer_list.feature); |
|
2662
|
0
|
0
|
|
|
|
|
if ((gazetteer_list.mode == HARD_PRE && gazetteers_trie[node].mode != HARD_PRE) || |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2663
|
0
|
0
|
|
|
|
|
(gazetteer_list.mode == HARD_POST && gazetteers_trie[node].mode == SOFT)) { |
|
2664
|
0
|
|
|
|
|
|
gazetteers_trie[node].mode = gazetteer_list.mode; |
|
2665
|
0
|
|
|
|
|
|
gazetteers_trie[node].entity = gazetteer_list.entity; |
|
2666
|
|
|
|
|
|
|
} |
|
2667
|
|
|
|
|
|
|
} |
|
2668
|
|
|
|
|
|
|
|
|
2669
|
|
|
|
|
|
|
return true; |
|
2670
|
|
|
|
|
|
|
} |
|
2671
|
|
|
|
|
|
|
|
|
2672
|
|
|
|
|
|
|
enum { TO_LOWER, TO_TITLE, TO_UPPER, TO_TOTAL }; |
|
2673
|
0
|
|
|
|
|
|
static void recase_text(const string& text, int mode, vector& recased) { |
|
2674
|
|
|
|
|
|
|
using namespace unilib; |
|
2675
|
|
|
|
|
|
|
|
|
2676
|
0
|
|
|
|
|
|
recased.emplace_back(); |
|
2677
|
|
|
|
|
|
|
|
|
2678
|
0
|
0
|
|
|
|
|
if (mode == TO_UPPER) |
|
2679
|
|
|
|
|
|
|
utf8::map(unicode::uppercase, text, recased.back()); |
|
2680
|
0
|
0
|
|
|
|
|
else if (mode == TO_LOWER) |
|
2681
|
|
|
|
|
|
|
utf8::map(unicode::lowercase, text, recased.back()); |
|
2682
|
0
|
0
|
|
|
|
|
else if (mode == TO_TITLE) |
|
2683
|
0
|
0
|
|
|
|
|
for (auto&& chr : utf8::decoder(text)) |
|
2684
|
0
|
0
|
|
|
|
|
utf8::append(recased.back(), recased.back().empty() ? unicode::uppercase(chr) : unicode::lowercase(chr)); |
|
2685
|
0
|
|
|
|
|
|
} |
|
2686
|
|
|
|
|
|
|
|
|
2687
|
|
|
|
|
|
|
enum { RECASE_NATIVE, RECASE_ANY }; |
|
2688
|
0
|
|
|
|
|
|
void recase_match_source(const ner_word& word, int mode, vector& recased) const { |
|
2689
|
|
|
|
|
|
|
using namespace unilib; |
|
2690
|
|
|
|
|
|
|
|
|
2691
|
|
|
|
|
|
|
bool any_lower = false, first_uc = false, first = true; |
|
2692
|
0
|
0
|
|
|
|
|
for (auto&& chr : utf8::decoder(word.form)) { |
|
2693
|
0
|
0
|
|
|
|
|
any_lower = any_lower || (unicode::category(chr) & unicode::Ll); |
|
|
|
0
|
|
|
|
|
|
|
2694
|
0
|
0
|
|
|
|
|
if (first) first_uc = unicode::category(chr) & unicode::Lut; |
|
2695
|
|
|
|
|
|
|
first = false; |
|
2696
|
|
|
|
|
|
|
} |
|
2697
|
|
|
|
|
|
|
|
|
2698
|
0
|
|
|
|
|
|
recased.clear(); |
|
2699
|
|
|
|
|
|
|
|
|
2700
|
0
|
0
|
|
|
|
|
for (int perform = 0; perform < TO_TOTAL; perform++) { |
|
2701
|
0
|
0
|
|
|
|
|
if (mode == RECASE_NATIVE) { |
|
2702
|
0
|
0
|
|
|
|
|
if (perform == TO_UPPER && !(first_uc && !any_lower)) continue; |
|
|
|
0
|
|
|
|
|
|
|
2703
|
0
|
0
|
|
|
|
|
if (perform == TO_TITLE && !(first_uc && any_lower)) continue; |
|
|
|
0
|
|
|
|
|
|
|
2704
|
0
|
0
|
|
|
|
|
if (perform == TO_LOWER && first_uc) continue; |
|
2705
|
|
|
|
|
|
|
} |
|
2706
|
0
|
0
|
|
|
|
|
if (mode == RECASE_ANY) { |
|
2707
|
0
|
0
|
|
|
|
|
if (perform == TO_UPPER && !(first_uc && !any_lower)) continue; |
|
|
|
0
|
|
|
|
|
|
|
2708
|
0
|
0
|
|
|
|
|
if (perform == TO_TITLE && !first_uc) continue; |
|
2709
|
|
|
|
|
|
|
} |
|
2710
|
|
|
|
|
|
|
|
|
2711
|
0
|
0
|
|
|
|
|
if (match == MATCH_FORM) |
|
2712
|
0
|
|
|
|
|
|
recase_text(word.form, perform, recased); |
|
2713
|
0
|
0
|
|
|
|
|
else if (match == MATCH_RAWLEMMA) |
|
2714
|
0
|
|
|
|
|
|
recase_text(word.raw_lemma, perform, recased); |
|
2715
|
0
|
0
|
|
|
|
|
else if (match == MATCH_RAWLEMMAS) |
|
2716
|
0
|
0
|
|
|
|
|
for (auto&& raw_lemma : word.raw_lemmas_all) |
|
2717
|
0
|
|
|
|
|
|
recase_text(raw_lemma, perform, recased); |
|
2718
|
|
|
|
|
|
|
} |
|
2719
|
0
|
|
|
|
|
|
} |
|
2720
|
|
|
|
|
|
|
}; |
|
2721
|
12
|
50
|
|
|
|
|
const vector gazetteers_enhanced::basename_suffixes = {".txt", ".hard_pre.txt", ".hard_post.txt"}; |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2722
|
|
|
|
|
|
|
|
|
2723
|
|
|
|
|
|
|
// Lemma |
|
2724
|
0
|
|
|
|
|
|
class lemma : public feature_processor { |
|
2725
|
|
|
|
|
|
|
public: |
|
2726
|
8
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& /*buffer*/) const override { |
|
2727
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) |
|
2728
|
60
|
50
|
|
|
|
|
apply_in_window(i, lookup(sentence.words[i].lemma_id, total_features)); |
|
|
|
100
|
|
|
|
|
|
|
2729
|
|
|
|
|
|
|
|
|
2730
|
36
|
50
|
|
|
|
|
apply_outer_words_in_window(lookup_empty()); |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
2731
|
4
|
|
|
|
|
|
} |
|
2732
|
|
|
|
|
|
|
}; |
|
2733
|
|
|
|
|
|
|
|
|
2734
|
|
|
|
|
|
|
// NumericTimeValue |
|
2735
|
0
|
|
|
|
|
|
class number_time_value : public feature_processor { |
|
2736
|
|
|
|
|
|
|
public: |
|
2737
|
4
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& buffer) const override { |
|
2738
|
4
|
|
|
|
|
|
ner_feature hour = lookup(buffer.assign("H"), total_features); |
|
2739
|
4
|
|
|
|
|
|
ner_feature minute = lookup(buffer.assign("M"), total_features); |
|
2740
|
4
|
|
|
|
|
|
ner_feature time = lookup(buffer.assign("t"), total_features); |
|
2741
|
4
|
|
|
|
|
|
ner_feature day = lookup(buffer.assign("d"), total_features); |
|
2742
|
4
|
|
|
|
|
|
ner_feature month = lookup(buffer.assign("m"), total_features); |
|
2743
|
4
|
|
|
|
|
|
ner_feature year = lookup(buffer.assign("y"), total_features); |
|
2744
|
|
|
|
|
|
|
|
|
2745
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
|
2746
|
14
|
|
|
|
|
|
const char* form = sentence.words[i].form.c_str(); |
|
2747
|
|
|
|
|
|
|
unsigned num; |
|
2748
|
|
|
|
|
|
|
bool digit; |
|
2749
|
|
|
|
|
|
|
|
|
2750
|
14
|
50
|
|
|
|
|
for (digit = false, num = 0; *form; form++) { |
|
2751
|
14
|
50
|
|
|
|
|
if (*form < '0' || *form > '9') break; |
|
2752
|
|
|
|
|
|
|
digit = true; |
|
2753
|
0
|
|
|
|
|
|
num = num * 10 + *form - '0'; |
|
2754
|
|
|
|
|
|
|
} |
|
2755
|
14
|
50
|
|
|
|
|
if (digit && !*form) { |
|
|
|
0
|
|
|
|
|
|
|
2756
|
|
|
|
|
|
|
// We have a number |
|
2757
|
0
|
0
|
|
|
|
|
if (num < 24) apply_in_window(i, hour); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2758
|
0
|
0
|
|
|
|
|
if (num < 60) apply_in_window(i, minute); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2759
|
0
|
0
|
|
|
|
|
if (num >= 1 && num <= 31) apply_in_window(i, day); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2760
|
0
|
0
|
|
|
|
|
if (num >= 1 && num <= 12) apply_in_window(i, month); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2761
|
0
|
0
|
|
|
|
|
if (num >= 1000 && num <= 2200) apply_in_window(i, year);; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2762
|
|
|
|
|
|
|
} |
|
2763
|
14
|
50
|
|
|
|
|
if (digit && num < 24 && (*form == '.' || *form == ':')) { |
|
|
|
0
|
|
|
|
|
|
|
2764
|
|
|
|
|
|
|
// Maybe time |
|
2765
|
0
|
0
|
|
|
|
|
for (digit = false, num = 0, form++; *form; form++) { |
|
2766
|
0
|
0
|
|
|
|
|
if (*form < '0' || *form > '9') break; |
|
2767
|
|
|
|
|
|
|
digit = true; |
|
2768
|
0
|
|
|
|
|
|
num = num * 10 + *form - '0'; |
|
2769
|
|
|
|
|
|
|
} |
|
2770
|
0
|
0
|
|
|
|
|
if (digit && !*form && num < 60) apply_in_window(i, time); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2771
|
|
|
|
|
|
|
} |
|
2772
|
|
|
|
|
|
|
} |
|
2773
|
4
|
|
|
|
|
|
} |
|
2774
|
|
|
|
|
|
|
}; |
|
2775
|
|
|
|
|
|
|
|
|
2776
|
|
|
|
|
|
|
// PreviousStage |
|
2777
|
0
|
|
|
|
|
|
class previous_stage : public feature_processor { |
|
2778
|
|
|
|
|
|
|
public: |
|
2779
|
8
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& buffer) const override { |
|
2780
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) |
|
2781
|
14
|
100
|
|
|
|
|
if (sentence.previous_stage[i].bilou != bilou_type_unknown) { |
|
2782
|
|
|
|
|
|
|
buffer.clear(); |
|
2783
|
7
|
|
|
|
|
|
append_encoded(buffer, sentence.previous_stage[i].bilou); |
|
2784
|
7
|
|
|
|
|
|
buffer.push_back(' '); |
|
2785
|
7
|
|
|
|
|
|
append_encoded(buffer, sentence.previous_stage[i].entity); |
|
2786
|
16
|
50
|
|
|
|
|
apply_in_range(i, lookup(buffer, total_features), 1, window); |
|
|
|
100
|
|
|
|
|
|
|
2787
|
|
|
|
|
|
|
} |
|
2788
|
4
|
|
|
|
|
|
} |
|
2789
|
|
|
|
|
|
|
|
|
2790
|
|
|
|
|
|
|
private: |
|
2791
|
14
|
|
|
|
|
|
static void append_encoded(string& str, int value) { |
|
2792
|
14
|
100
|
|
|
|
|
if (value < 0) { |
|
2793
|
4
|
|
|
|
|
|
str.push_back('-'); |
|
2794
|
14
|
|
|
|
|
|
value = -value; |
|
2795
|
|
|
|
|
|
|
} |
|
2796
|
25
|
100
|
|
|
|
|
for (; value; value >>= 4) |
|
2797
|
11
|
|
|
|
|
|
str.push_back("0123456789abcdef"[value & 0xF]); |
|
2798
|
14
|
|
|
|
|
|
} |
|
2799
|
|
|
|
|
|
|
}; |
|
2800
|
|
|
|
|
|
|
|
|
2801
|
|
|
|
|
|
|
// RawLemma |
|
2802
|
0
|
|
|
|
|
|
class raw_lemma : public feature_processor { |
|
2803
|
|
|
|
|
|
|
public: |
|
2804
|
8
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& /*buffer*/) const override { |
|
2805
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) |
|
2806
|
60
|
50
|
|
|
|
|
apply_in_window(i, lookup(sentence.words[i].raw_lemma, total_features)); |
|
|
|
100
|
|
|
|
|
|
|
2807
|
|
|
|
|
|
|
|
|
2808
|
36
|
50
|
|
|
|
|
apply_outer_words_in_window(lookup_empty()); |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
2809
|
4
|
|
|
|
|
|
} |
|
2810
|
|
|
|
|
|
|
}; |
|
2811
|
|
|
|
|
|
|
|
|
2812
|
|
|
|
|
|
|
// RawLemmaCapitalization |
|
2813
|
0
|
|
|
|
|
|
class raw_lemma_capitalization : public feature_processor { |
|
2814
|
|
|
|
|
|
|
public: |
|
2815
|
4
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& buffer) const override { |
|
2816
|
|
|
|
|
|
|
using namespace unilib; |
|
2817
|
|
|
|
|
|
|
|
|
2818
|
4
|
|
|
|
|
|
ner_feature fst_cap = lookup(buffer.assign("f"), total_features); |
|
2819
|
4
|
|
|
|
|
|
ner_feature all_cap = lookup(buffer.assign("a"), total_features); |
|
2820
|
4
|
|
|
|
|
|
ner_feature mixed_cap = lookup(buffer.assign("m"), total_features); |
|
2821
|
|
|
|
|
|
|
|
|
2822
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
|
2823
|
|
|
|
|
|
|
bool was_upper = false, was_lower = false; |
|
2824
|
|
|
|
|
|
|
|
|
2825
|
68
|
|
|
|
|
|
auto* raw_lemma = sentence.words[i].raw_lemma.c_str(); |
|
2826
|
|
|
|
|
|
|
char32_t chr; |
|
2827
|
68
|
100
|
|
|
|
|
for (bool first = true; (chr = utf8::decode(raw_lemma)); first = false) { |
|
2828
|
|
|
|
|
|
|
auto category = unicode::category(chr); |
|
2829
|
54
|
50
|
|
|
|
|
was_upper = was_upper || category & unicode::Lut; |
|
|
|
50
|
|
|
|
|
|
|
2830
|
54
|
100
|
|
|
|
|
was_lower = was_lower || category & unicode::Ll; |
|
|
|
100
|
|
|
|
|
|
|
2831
|
|
|
|
|
|
|
|
|
2832
|
54
|
50
|
|
|
|
|
if (first && was_upper) apply_in_window(i, fst_cap); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2833
|
|
|
|
|
|
|
} |
|
2834
|
14
|
50
|
|
|
|
|
if (was_upper && !was_lower) apply_in_window(i, all_cap); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2835
|
14
|
50
|
|
|
|
|
if (was_upper && was_lower) apply_in_window(i, mixed_cap); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2836
|
|
|
|
|
|
|
} |
|
2837
|
4
|
|
|
|
|
|
} |
|
2838
|
|
|
|
|
|
|
}; |
|
2839
|
|
|
|
|
|
|
|
|
2840
|
|
|
|
|
|
|
// RawLemmaCaseNormalized |
|
2841
|
0
|
|
|
|
|
|
class raw_lemma_case_normalized : public feature_processor { |
|
2842
|
|
|
|
|
|
|
public: |
|
2843
|
0
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& buffer) const override { |
|
2844
|
|
|
|
|
|
|
using namespace unilib; |
|
2845
|
|
|
|
|
|
|
|
|
2846
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
|
2847
|
|
|
|
|
|
|
buffer.clear(); |
|
2848
|
0
|
0
|
|
|
|
|
for (auto&& chr : utf8::decoder(sentence.words[i].raw_lemma)) |
|
2849
|
0
|
0
|
|
|
|
|
utf8::append(buffer, buffer.empty() ? chr : unicode::lowercase(chr)); |
|
2850
|
0
|
0
|
|
|
|
|
apply_in_window(i, lookup(buffer, total_features)); |
|
|
|
0
|
|
|
|
|
|
|
2851
|
|
|
|
|
|
|
} |
|
2852
|
|
|
|
|
|
|
|
|
2853
|
0
|
0
|
|
|
|
|
apply_outer_words_in_window(lookup_empty()); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2854
|
0
|
|
|
|
|
|
} |
|
2855
|
|
|
|
|
|
|
}; |
|
2856
|
|
|
|
|
|
|
|
|
2857
|
|
|
|
|
|
|
// *Suffix |
|
2858
|
|
|
|
|
|
|
enum { SUFFIX_SOURCE_FORM, SUFFIX_SOURCE_RAWLEMMA }; |
|
2859
|
|
|
|
|
|
|
enum { SUFFIX_CASE_ORIGINAL, SUFFIX_CASE_NORMALIZED }; |
|
2860
|
0
|
|
|
|
|
|
class suffix : public feature_processor { |
|
2861
|
|
|
|
|
|
|
public: |
|
2862
|
0
|
|
|
|
|
|
suffix(int source, int casing) : source(source), casing(casing) {} |
|
2863
|
|
|
|
|
|
|
|
|
2864
|
0
|
|
|
|
|
|
virtual bool parse(int window, const vector& args, entity_map& entities, |
|
2865
|
|
|
|
|
|
|
ner_feature* total_features, const nlp_pipeline& pipeline) override { |
|
2866
|
0
|
0
|
|
|
|
|
if (!feature_processor::parse(window, args, entities, total_features, pipeline)) return false; |
|
2867
|
0
|
0
|
|
|
|
|
if (args.size() != 2) return cerr << "*Suffix features require exactly two arguments -- shortest and longest suffix length!" << endl, false; |
|
2868
|
|
|
|
|
|
|
|
|
2869
|
|
|
|
|
|
|
string error; |
|
2870
|
0
|
0
|
|
|
|
|
if (!parse_int(args[0], "*Suffix shortest length", shortest, error)) return cerr << error << endl, false; |
|
|
|
0
|
|
|
|
|
|
|
2871
|
0
|
0
|
|
|
|
|
if (!parse_int(args[1], "*Suffix longest length", longest, error)) return cerr << error << endl, false; |
|
|
|
0
|
|
|
|
|
|
|
2872
|
|
|
|
|
|
|
return true; |
|
2873
|
|
|
|
|
|
|
} |
|
2874
|
|
|
|
|
|
|
|
|
2875
|
0
|
|
|
|
|
|
virtual void load(binary_decoder& data, const nlp_pipeline& pipeline) override { |
|
2876
|
0
|
|
|
|
|
|
feature_processor::load(data, pipeline); |
|
2877
|
|
|
|
|
|
|
|
|
2878
|
0
|
|
|
|
|
|
shortest = data.next_4B(); |
|
2879
|
0
|
|
|
|
|
|
longest = data.next_4B(); |
|
2880
|
0
|
|
|
|
|
|
} |
|
2881
|
|
|
|
|
|
|
|
|
2882
|
0
|
|
|
|
|
|
virtual void save(binary_encoder& enc) override { |
|
2883
|
0
|
|
|
|
|
|
feature_processor::save(enc); |
|
2884
|
|
|
|
|
|
|
|
|
2885
|
0
|
|
|
|
|
|
enc.add_4B(shortest); |
|
2886
|
0
|
|
|
|
|
|
enc.add_4B(longest); |
|
2887
|
0
|
|
|
|
|
|
} |
|
2888
|
|
|
|
|
|
|
|
|
2889
|
0
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& buffer) const override { |
|
2890
|
|
|
|
|
|
|
using namespace unilib; |
|
2891
|
|
|
|
|
|
|
|
|
2892
|
|
|
|
|
|
|
vector chrs; |
|
2893
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
|
2894
|
|
|
|
|
|
|
chrs.clear(); |
|
2895
|
0
|
0
|
|
|
|
|
for (auto&& chr : utf8::decoder(source == SUFFIX_SOURCE_FORM ? sentence.words[i].form : sentence.words[i].raw_lemma)) |
|
|
|
0
|
|
|
|
|
|
|
2896
|
0
|
0
|
|
|
|
|
chrs.push_back((casing == SUFFIX_CASE_ORIGINAL || chrs.empty()) ? chr : unicode::lowercase(chr)); |
|
|
|
0
|
|
|
|
|
|
|
2897
|
|
|
|
|
|
|
|
|
2898
|
|
|
|
|
|
|
buffer.clear(); |
|
2899
|
0
|
0
|
|
|
|
|
for (int s = 1; s <= longest && s <= int(chrs.size()); s++) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2900
|
0
|
0
|
|
|
|
|
utf8::append(buffer, chrs[chrs.size() - s]); |
|
2901
|
0
|
0
|
|
|
|
|
if (s >= shortest) { |
|
2902
|
0
|
0
|
|
|
|
|
apply_in_window(i, lookup(buffer, total_features)); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2903
|
|
|
|
|
|
|
} |
|
2904
|
|
|
|
|
|
|
} |
|
2905
|
|
|
|
|
|
|
} |
|
2906
|
|
|
|
|
|
|
|
|
2907
|
0
|
0
|
|
|
|
|
apply_outer_words_in_window(lookup_empty()); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
2908
|
0
|
|
|
|
|
|
} |
|
2909
|
|
|
|
|
|
|
|
|
2910
|
|
|
|
|
|
|
private: |
|
2911
|
|
|
|
|
|
|
int shortest, longest; |
|
2912
|
|
|
|
|
|
|
int source, casing; |
|
2913
|
|
|
|
|
|
|
}; |
|
2914
|
|
|
|
|
|
|
|
|
2915
|
|
|
|
|
|
|
// Tag |
|
2916
|
0
|
|
|
|
|
|
class tag : public feature_processor { |
|
2917
|
|
|
|
|
|
|
public: |
|
2918
|
8
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* total_features, string& /*buffer*/) const override { |
|
2919
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) |
|
2920
|
54
|
100
|
|
|
|
|
apply_in_window(i, lookup(sentence.words[i].tag, total_features)); |
|
|
|
100
|
|
|
|
|
|
|
2921
|
|
|
|
|
|
|
|
|
2922
|
36
|
50
|
|
|
|
|
apply_outer_words_in_window(lookup_empty()); |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
2923
|
4
|
|
|
|
|
|
} |
|
2924
|
|
|
|
|
|
|
}; |
|
2925
|
|
|
|
|
|
|
|
|
2926
|
|
|
|
|
|
|
// URLEmailDetector |
|
2927
|
0
|
|
|
|
|
|
class url_email_detector : public feature_processor { |
|
2928
|
|
|
|
|
|
|
public: |
|
2929
|
0
|
|
|
|
|
|
virtual bool parse(int window, const vector& args, entity_map& entities, |
|
2930
|
|
|
|
|
|
|
ner_feature* total_features, const nlp_pipeline& pipeline) override { |
|
2931
|
0
|
0
|
|
|
|
|
if (!feature_processor::parse(window, args, entities, total_features, pipeline)) return false; |
|
2932
|
0
|
0
|
|
|
|
|
if (args.size() != 2) return cerr << "URLEmailDetector requires exactly two arguments -- named entity types for URL and email!" << endl, false; |
|
2933
|
|
|
|
|
|
|
|
|
2934
|
0
|
|
|
|
|
|
url = entities.parse(args[0].c_str(), true); |
|
2935
|
0
|
|
|
|
|
|
email = entities.parse(args[1].c_str(), true); |
|
2936
|
|
|
|
|
|
|
|
|
2937
|
0
|
0
|
|
|
|
|
if (url == entity_type_unknown || email == entity_type_unknown) |
|
|
|
0
|
|
|
|
|
|
|
2938
|
0
|
|
|
|
|
|
return cerr << "Cannot create entities '" << args[0] << "' and '" << args[1] << "' in URLEmailDetector!" << endl, false; |
|
2939
|
|
|
|
|
|
|
return true; |
|
2940
|
|
|
|
|
|
|
} |
|
2941
|
|
|
|
|
|
|
|
|
2942
|
1
|
|
|
|
|
|
virtual void load(binary_decoder& data, const nlp_pipeline& pipeline) override { |
|
2943
|
1
|
|
|
|
|
|
feature_processor::load(data, pipeline); |
|
2944
|
|
|
|
|
|
|
|
|
2945
|
1
|
|
|
|
|
|
url = data.next_4B(); |
|
2946
|
1
|
|
|
|
|
|
email = data.next_4B(); |
|
2947
|
1
|
|
|
|
|
|
} |
|
2948
|
|
|
|
|
|
|
|
|
2949
|
0
|
|
|
|
|
|
virtual void save(binary_encoder& enc) override { |
|
2950
|
0
|
|
|
|
|
|
feature_processor::save(enc); |
|
2951
|
|
|
|
|
|
|
|
|
2952
|
0
|
|
|
|
|
|
enc.add_4B(url); |
|
2953
|
0
|
|
|
|
|
|
enc.add_4B(email); |
|
2954
|
0
|
|
|
|
|
|
} |
|
2955
|
|
|
|
|
|
|
|
|
2956
|
8
|
|
|
|
|
|
virtual void process_sentence(ner_sentence& sentence, ner_feature* /*total_features*/, string& /*buffer*/) const override { |
|
2957
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
|
2958
|
14
|
|
|
|
|
|
auto type = url_detector::detect(sentence.words[i].form); |
|
2959
|
14
|
50
|
|
|
|
|
if (type == url_detector::NO_URL || sentence.probabilities[i].local_filled) continue; |
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
2960
|
|
|
|
|
|
|
|
|
2961
|
|
|
|
|
|
|
// We have found URL or email and the word has not yet been determined |
|
2962
|
0
|
0
|
|
|
|
|
for (auto&& bilou : sentence.probabilities[i].local.bilou) { |
|
2963
|
0
|
|
|
|
|
|
bilou.probability = 0.; |
|
2964
|
0
|
|
|
|
|
|
bilou.entity = entity_type_unknown; |
|
2965
|
|
|
|
|
|
|
} |
|
2966
|
0
|
|
|
|
|
|
sentence.probabilities[i].local.bilou[bilou_type_U].probability = 1.; |
|
2967
|
0
|
0
|
|
|
|
|
sentence.probabilities[i].local.bilou[bilou_type_U].entity = type == url_detector::EMAIL ? email : url; |
|
2968
|
0
|
|
|
|
|
|
sentence.probabilities[i].local_filled = true; |
|
2969
|
|
|
|
|
|
|
} |
|
2970
|
4
|
|
|
|
|
|
} |
|
2971
|
|
|
|
|
|
|
|
|
2972
|
|
|
|
|
|
|
private: |
|
2973
|
|
|
|
|
|
|
entity_type url, email; |
|
2974
|
|
|
|
|
|
|
}; |
|
2975
|
|
|
|
|
|
|
|
|
2976
|
|
|
|
|
|
|
} // namespace feature_processors |
|
2977
|
|
|
|
|
|
|
|
|
2978
|
|
|
|
|
|
|
// Feature processor factory method |
|
2979
|
8
|
|
|
|
|
|
feature_processor* feature_processor::create(const string& name) { |
|
2980
|
|
|
|
|
|
|
using namespace feature_processors; |
|
2981
|
|
|
|
|
|
|
|
|
2982
|
8
|
50
|
|
|
|
|
if (name.compare("BrownClusters") == 0) return new brown_clusters(); |
|
2983
|
8
|
50
|
|
|
|
|
if (name.compare("CzechAddContainers") == 0) return new czech_add_containers(); |
|
2984
|
8
|
50
|
|
|
|
|
if (name.compare("CzechLemmaTerm") == 0) return new czech_lemma_term(); |
|
2985
|
8
|
100
|
|
|
|
|
if (name.compare("Form") == 0) return new form(); |
|
2986
|
7
|
50
|
|
|
|
|
if (name.compare("FormCapitalization") == 0) return new form_capitalization(); |
|
2987
|
7
|
50
|
|
|
|
|
if (name.compare("FormCaseNormalized") == 0) return new form_case_normalized(); |
|
2988
|
7
|
50
|
|
|
|
|
if (name.compare("FormCaseNormalizedSuffix") == 0) return new suffix(SUFFIX_SOURCE_FORM, SUFFIX_CASE_NORMALIZED); |
|
2989
|
7
|
50
|
|
|
|
|
if (name.compare("FormSuffix") == 0) return new suffix(SUFFIX_SOURCE_FORM, SUFFIX_CASE_ORIGINAL); |
|
2990
|
7
|
50
|
|
|
|
|
if (name.compare("Gazetteers") == 0) return new feature_processors::gazetteers(); |
|
2991
|
7
|
50
|
|
|
|
|
if (name.compare("GazetteersEnhanced") == 0) return new gazetteers_enhanced(); |
|
2992
|
7
|
100
|
|
|
|
|
if (name.compare("Lemma") == 0) return new lemma(); |
|
2993
|
6
|
100
|
|
|
|
|
if (name.compare("NumericTimeValue") == 0) return new number_time_value(); |
|
2994
|
5
|
100
|
|
|
|
|
if (name.compare("PreviousStage") == 0) return new previous_stage(); |
|
2995
|
4
|
100
|
|
|
|
|
if (name.compare("RawLemma") == 0) return new raw_lemma(); |
|
2996
|
3
|
100
|
|
|
|
|
if (name.compare("RawLemmaCapitalization") == 0) return new raw_lemma_capitalization(); |
|
2997
|
2
|
50
|
|
|
|
|
if (name.compare("RawLemmaCaseNormalized") == 0) return new raw_lemma_case_normalized(); |
|
2998
|
2
|
50
|
|
|
|
|
if (name.compare("RawLemmaCaseNormalizedSuffix") == 0) return new suffix(SUFFIX_SOURCE_RAWLEMMA, SUFFIX_CASE_NORMALIZED); |
|
2999
|
2
|
50
|
|
|
|
|
if (name.compare("RawLemmaSuffix") == 0) return new suffix(SUFFIX_SOURCE_RAWLEMMA, SUFFIX_CASE_ORIGINAL); |
|
3000
|
2
|
100
|
|
|
|
|
if (name.compare("Tag") == 0) return new tag(); |
|
3001
|
1
|
50
|
|
|
|
|
if (name.compare("URLEmailDetector") == 0) return new url_email_detector(); |
|
3002
|
|
|
|
|
|
|
return nullptr; |
|
3003
|
|
|
|
|
|
|
} |
|
3004
|
|
|
|
|
|
|
|
|
3005
|
|
|
|
|
|
|
///////// |
|
3006
|
|
|
|
|
|
|
// File: features/feature_templates.h |
|
3007
|
|
|
|
|
|
|
///////// |
|
3008
|
|
|
|
|
|
|
|
|
3009
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
3010
|
|
|
|
|
|
|
// |
|
3011
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
3012
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
3013
|
|
|
|
|
|
|
// |
|
3014
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
3015
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3016
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
3017
|
|
|
|
|
|
|
|
|
3018
|
0
|
|
|
|
|
|
class feature_templates { |
|
3019
|
|
|
|
|
|
|
public: |
|
3020
|
|
|
|
|
|
|
void parse(istream& is, entity_map& entities, const nlp_pipeline& pipeline); |
|
3021
|
|
|
|
|
|
|
|
|
3022
|
|
|
|
|
|
|
bool load(istream& is, const nlp_pipeline& pipeline); |
|
3023
|
|
|
|
|
|
|
bool save(ostream& os); |
|
3024
|
|
|
|
|
|
|
|
|
3025
|
|
|
|
|
|
|
void process_sentence(ner_sentence& sentence, string& buffer, bool add_features = false) const; |
|
3026
|
|
|
|
|
|
|
void process_entities(ner_sentence& sentence, vector& entities, vector& buffer) const; |
|
3027
|
|
|
|
|
|
|
ner_feature get_total_features() const; |
|
3028
|
|
|
|
|
|
|
|
|
3029
|
|
|
|
|
|
|
void gazetteers(vector& gazetteers, vector* gazetteer_types) const; |
|
3030
|
|
|
|
|
|
|
|
|
3031
|
|
|
|
|
|
|
private: |
|
3032
|
|
|
|
|
|
|
mutable ner_feature total_features; |
|
3033
|
|
|
|
|
|
|
|
|
3034
|
7
|
|
|
|
|
|
struct feature_processor_info { |
|
3035
|
|
|
|
|
|
|
string name; |
|
3036
|
|
|
|
|
|
|
unique_ptr processor; |
|
3037
|
|
|
|
|
|
|
|
|
3038
|
8
|
|
|
|
|
|
feature_processor_info(const string& name, feature_processor* processor) : name(name), processor(processor) {} |
|
3039
|
|
|
|
|
|
|
}; |
|
3040
|
|
|
|
|
|
|
vector processors; |
|
3041
|
|
|
|
|
|
|
}; |
|
3042
|
|
|
|
|
|
|
|
|
3043
|
|
|
|
|
|
|
///////// |
|
3044
|
|
|
|
|
|
|
// File: features/feature_templates.cpp |
|
3045
|
|
|
|
|
|
|
///////// |
|
3046
|
|
|
|
|
|
|
|
|
3047
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
3048
|
|
|
|
|
|
|
// |
|
3049
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
3050
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
3051
|
|
|
|
|
|
|
// |
|
3052
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
3053
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3054
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
3055
|
|
|
|
|
|
|
|
|
3056
|
1
|
|
|
|
|
|
bool feature_templates::load(istream& is, const nlp_pipeline& pipeline) { |
|
3057
|
|
|
|
|
|
|
binary_decoder data; |
|
3058
|
1
|
50
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
|
50
|
|
|
|
|
|
|
3059
|
|
|
|
|
|
|
|
|
3060
|
|
|
|
|
|
|
try { |
|
3061
|
1
|
50
|
|
|
|
|
total_features = data.next_4B(); |
|
3062
|
|
|
|
|
|
|
|
|
3063
|
|
|
|
|
|
|
processors.clear(); |
|
3064
|
9
|
50
|
|
|
|
|
for (unsigned i = data.next_4B(); i; i--) { |
|
|
|
100
|
|
|
|
|
|
|
3065
|
|
|
|
|
|
|
string name; |
|
3066
|
8
|
50
|
|
|
|
|
data.next_str(name); |
|
3067
|
|
|
|
|
|
|
|
|
3068
|
|
|
|
|
|
|
// Try creating the processor |
|
3069
|
8
|
50
|
|
|
|
|
auto* processor = feature_processor::create(name); |
|
3070
|
8
|
50
|
|
|
|
|
if (processor) { |
|
3071
|
8
|
50
|
|
|
|
|
processor->load(data, pipeline); |
|
3072
|
8
|
50
|
|
|
|
|
processors.emplace_back(name, processor); |
|
3073
|
|
|
|
|
|
|
continue; |
|
3074
|
|
|
|
|
|
|
} |
|
3075
|
|
|
|
|
|
|
|
|
3076
|
|
|
|
|
|
|
// Could not find processor with specified name |
|
3077
|
|
|
|
|
|
|
return false; |
|
3078
|
|
0
|
|
|
|
|
} |
|
3079
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
|
3080
|
|
|
|
|
|
|
return false; |
|
3081
|
|
|
|
|
|
|
} |
|
3082
|
|
|
|
|
|
|
|
|
3083
|
1
|
|
|
|
|
|
return data.is_end(); |
|
3084
|
|
|
|
|
|
|
} |
|
3085
|
|
|
|
|
|
|
|
|
3086
|
8
|
|
|
|
|
|
void feature_templates::process_sentence(ner_sentence& sentence, string& buffer, bool adding_features) const { |
|
3087
|
|
|
|
|
|
|
// Start with omnipresent feature |
|
3088
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
|
3089
|
14
|
|
|
|
|
|
sentence.features[i].clear(); |
|
3090
|
14
|
|
|
|
|
|
sentence.features[i].emplace_back(0); |
|
3091
|
|
|
|
|
|
|
} |
|
3092
|
|
|
|
|
|
|
|
|
3093
|
|
|
|
|
|
|
// Add features from feature processors |
|
3094
|
36
|
100
|
|
|
|
|
for (auto&& processor : processors) |
|
3095
|
32
|
50
|
|
|
|
|
processor.processor->process_sentence(sentence, adding_features ? &total_features : nullptr, buffer); |
|
3096
|
4
|
|
|
|
|
|
} |
|
3097
|
|
|
|
|
|
|
|
|
3098
|
0
|
|
|
|
|
|
void feature_templates::process_entities(ner_sentence& sentence, vector& entities, vector& buffer) const { |
|
3099
|
18
|
100
|
|
|
|
|
for (auto&& processor : processors) |
|
|
|
0
|
|
|
|
|
|
|
3100
|
16
|
|
|
|
|
|
processor.processor->process_entities(sentence, entities, buffer); |
|
3101
|
0
|
|
|
|
|
|
} |
|
3102
|
|
|
|
|
|
|
|
|
3103
|
0
|
|
|
|
|
|
ner_feature feature_templates::get_total_features() const { |
|
3104
|
0
|
|
|
|
|
|
return total_features; |
|
3105
|
|
|
|
|
|
|
} |
|
3106
|
|
|
|
|
|
|
|
|
3107
|
0
|
|
|
|
|
|
void feature_templates::gazetteers(vector& gazetteers, vector* gazetteer_types) const { |
|
3108
|
0
|
0
|
|
|
|
|
for (auto&& processor : processors) |
|
|
|
0
|
|
|
|
|
|
|
3109
|
0
|
|
|
|
|
|
processor.processor->gazetteers(gazetteers, gazetteer_types); |
|
3110
|
0
|
|
|
|
|
|
} |
|
3111
|
|
|
|
|
|
|
|
|
3112
|
|
|
|
|
|
|
///////// |
|
3113
|
|
|
|
|
|
|
// File: morphodita/derivator/derivator.h |
|
3114
|
|
|
|
|
|
|
///////// |
|
3115
|
|
|
|
|
|
|
|
|
3116
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
3117
|
|
|
|
|
|
|
// |
|
3118
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
3119
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
3120
|
|
|
|
|
|
|
// |
|
3121
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
3122
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3123
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
3124
|
|
|
|
|
|
|
|
|
3125
|
|
|
|
|
|
|
namespace morphodita { |
|
3126
|
|
|
|
|
|
|
|
|
3127
|
0
|
|
|
|
|
|
struct derivated_lemma { |
|
3128
|
|
|
|
|
|
|
string lemma; |
|
3129
|
|
|
|
|
|
|
}; |
|
3130
|
|
|
|
|
|
|
|
|
3131
|
0
|
|
|
|
|
|
class derivator { |
|
3132
|
|
|
|
|
|
|
public: |
|
3133
|
0
|
|
|
|
|
|
virtual ~derivator() {} |
|
3134
|
|
|
|
|
|
|
|
|
3135
|
|
|
|
|
|
|
// For given lemma, return the parent in the derivation graph. |
|
3136
|
|
|
|
|
|
|
// The lemma is assumed to be lemma id and any lemma comments are ignored. |
|
3137
|
|
|
|
|
|
|
virtual bool parent(string_piece lemma, derivated_lemma& parent) const = 0; |
|
3138
|
|
|
|
|
|
|
|
|
3139
|
|
|
|
|
|
|
// For given lemma, return the children in the derivation graph. |
|
3140
|
|
|
|
|
|
|
// The lemma is assumed to be lemma id and any lemma comments are ignored. |
|
3141
|
|
|
|
|
|
|
virtual bool children(string_piece lemma, vector& children) const = 0; |
|
3142
|
|
|
|
|
|
|
}; |
|
3143
|
|
|
|
|
|
|
|
|
3144
|
|
|
|
|
|
|
} // namespace morphodita |
|
3145
|
|
|
|
|
|
|
|
|
3146
|
|
|
|
|
|
|
///////// |
|
3147
|
|
|
|
|
|
|
// File: morphodita/tokenizer/tokenizer.h |
|
3148
|
|
|
|
|
|
|
///////// |
|
3149
|
|
|
|
|
|
|
|
|
3150
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
3151
|
|
|
|
|
|
|
// |
|
3152
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
3153
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
3154
|
|
|
|
|
|
|
// |
|
3155
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
3156
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3157
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
3158
|
|
|
|
|
|
|
|
|
3159
|
|
|
|
|
|
|
namespace morphodita { |
|
3160
|
|
|
|
|
|
|
|
|
3161
|
|
|
|
|
|
|
// Range of a token, measured in Unicode characters, not UTF8 bytes. |
|
3162
|
|
|
|
|
|
|
struct token_range { |
|
3163
|
|
|
|
|
|
|
size_t start; |
|
3164
|
|
|
|
|
|
|
size_t length; |
|
3165
|
|
|
|
|
|
|
|
|
3166
|
|
|
|
|
|
|
token_range() {} |
|
3167
|
33
|
|
|
|
|
|
token_range(size_t start, size_t length) : start(start), length(length) {} |
|
3168
|
|
|
|
|
|
|
}; |
|
3169
|
|
|
|
|
|
|
|
|
3170
|
4
|
|
|
|
|
|
class tokenizer { |
|
3171
|
|
|
|
|
|
|
public: |
|
3172
|
4
|
|
|
|
|
|
virtual ~tokenizer() {} |
|
3173
|
|
|
|
|
|
|
|
|
3174
|
|
|
|
|
|
|
virtual void set_text(string_piece text, bool make_copy = false) = 0; |
|
3175
|
|
|
|
|
|
|
virtual bool next_sentence(vector* forms, vector* tokens) = 0; |
|
3176
|
|
|
|
|
|
|
|
|
3177
|
|
|
|
|
|
|
// Static factory methods |
|
3178
|
|
|
|
|
|
|
static tokenizer* new_vertical_tokenizer(); |
|
3179
|
|
|
|
|
|
|
|
|
3180
|
|
|
|
|
|
|
static tokenizer* new_czech_tokenizer(); |
|
3181
|
|
|
|
|
|
|
static tokenizer* new_english_tokenizer(); |
|
3182
|
|
|
|
|
|
|
static tokenizer* new_generic_tokenizer(); |
|
3183
|
|
|
|
|
|
|
}; |
|
3184
|
|
|
|
|
|
|
|
|
3185
|
|
|
|
|
|
|
} // namespace morphodita |
|
3186
|
|
|
|
|
|
|
|
|
3187
|
|
|
|
|
|
|
///////// |
|
3188
|
|
|
|
|
|
|
// File: morphodita/morpho/morpho.h |
|
3189
|
|
|
|
|
|
|
///////// |
|
3190
|
|
|
|
|
|
|
|
|
3191
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
3192
|
|
|
|
|
|
|
// |
|
3193
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
3194
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
3195
|
|
|
|
|
|
|
// |
|
3196
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
3197
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3198
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
3199
|
|
|
|
|
|
|
|
|
3200
|
|
|
|
|
|
|
namespace morphodita { |
|
3201
|
|
|
|
|
|
|
|
|
3202
|
0
|
|
|
|
|
|
struct tagged_form { |
|
3203
|
|
|
|
|
|
|
string form; |
|
3204
|
|
|
|
|
|
|
string tag; |
|
3205
|
|
|
|
|
|
|
|
|
3206
|
|
|
|
|
|
|
tagged_form() {} |
|
3207
|
0
|
|
|
|
|
|
tagged_form(const string& form, const string& tag) : form(form), tag(tag) {} |
|
3208
|
|
|
|
|
|
|
}; |
|
3209
|
|
|
|
|
|
|
|
|
3210
|
43
|
|
|
|
|
|
struct tagged_lemma { |
|
3211
|
|
|
|
|
|
|
string lemma; |
|
3212
|
|
|
|
|
|
|
string tag; |
|
3213
|
|
|
|
|
|
|
|
|
3214
|
|
|
|
|
|
|
tagged_lemma() {} |
|
3215
|
22
|
|
|
|
|
|
tagged_lemma(const string& lemma, const string& tag) : lemma(lemma), tag(tag) {} |
|
3216
|
|
|
|
|
|
|
}; |
|
3217
|
|
|
|
|
|
|
|
|
3218
|
0
|
|
|
|
|
|
struct tagged_lemma_forms { |
|
3219
|
|
|
|
|
|
|
string lemma; |
|
3220
|
|
|
|
|
|
|
vector forms; |
|
3221
|
|
|
|
|
|
|
|
|
3222
|
|
|
|
|
|
|
tagged_lemma_forms() {} |
|
3223
|
0
|
|
|
|
|
|
tagged_lemma_forms(const string& lemma) : lemma(lemma) {} |
|
3224
|
|
|
|
|
|
|
}; |
|
3225
|
|
|
|
|
|
|
|
|
3226
|
1
|
|
|
|
|
|
class morpho { |
|
3227
|
|
|
|
|
|
|
public: |
|
3228
|
0
|
|
|
|
|
|
virtual ~morpho() {} |
|
3229
|
|
|
|
|
|
|
|
|
3230
|
|
|
|
|
|
|
static morpho* load(istream& is); |
|
3231
|
|
|
|
|
|
|
static morpho* load(const char* fname); |
|
3232
|
|
|
|
|
|
|
|
|
3233
|
|
|
|
|
|
|
enum guesser_mode { NO_GUESSER = 0, GUESSER = 1, GUESSER_UNSPECIFIED = -1 }; |
|
3234
|
|
|
|
|
|
|
|
|
3235
|
|
|
|
|
|
|
// Perform morphologic analysis of a form. The form is given by a pointer and |
|
3236
|
|
|
|
|
|
|
// length and therefore does not need to be '\0' terminated. The guesser |
|
3237
|
|
|
|
|
|
|
// parameter specifies whether a guesser can be used if the form is not found |
|
3238
|
|
|
|
|
|
|
// in the dictionary. Output is assigned to the lemmas vector. |
|
3239
|
|
|
|
|
|
|
// |
|
3240
|
|
|
|
|
|
|
// If the form is found in the dictionary, analyses are assigned to lemmas |
|
3241
|
|
|
|
|
|
|
// and NO_GUESSER returned. If guesser == GUESSER and the form analyses are |
|
3242
|
|
|
|
|
|
|
// found using a guesser, they are assigned to lemmas and GUESSER is |
|
3243
|
|
|
|
|
|
|
// returned. Otherwise <0 is returned and lemmas are filled with one |
|
3244
|
|
|
|
|
|
|
// analysis containing given form as lemma and a tag for unknown word. |
|
3245
|
|
|
|
|
|
|
virtual int analyze(string_piece form, guesser_mode guesser, vector& lemmas) const = 0; |
|
3246
|
|
|
|
|
|
|
|
|
3247
|
|
|
|
|
|
|
// Perform morphologic generation of a lemma. The lemma is given by a pointer |
|
3248
|
|
|
|
|
|
|
// and length and therefore does not need to be '\0' terminated. Optionally |
|
3249
|
|
|
|
|
|
|
// a tag_wildcard can be specified (or be NULL) and if so, results are |
|
3250
|
|
|
|
|
|
|
// filtered using this wildcard. The guesser parameter speficies whether |
|
3251
|
|
|
|
|
|
|
// a guesser can be used if the lemma is not found in the dictionary. Output |
|
3252
|
|
|
|
|
|
|
// is assigned to the forms vector. |
|
3253
|
|
|
|
|
|
|
// |
|
3254
|
|
|
|
|
|
|
// Tag_wildcard can be either NULL or a wildcard applied to the results. |
|
3255
|
|
|
|
|
|
|
// A ? in the wildcard matches any character, [bytes] matches any of the |
|
3256
|
|
|
|
|
|
|
// bytes and [^bytes] matches any byte different from the specified ones. |
|
3257
|
|
|
|
|
|
|
// A - has no special meaning inside the bytes and if ] is first in bytes, it |
|
3258
|
|
|
|
|
|
|
// does not end the bytes group. |
|
3259
|
|
|
|
|
|
|
// |
|
3260
|
|
|
|
|
|
|
// If the given lemma is only a raw lemma, all lemma ids with this raw lemma |
|
3261
|
|
|
|
|
|
|
// are returned. Otherwise only matching lemma ids are returned, ignoring any |
|
3262
|
|
|
|
|
|
|
// lemma comments. For every found lemma, matching forms are filtered using |
|
3263
|
|
|
|
|
|
|
// the tag_wildcard. If at least one lemma is found in the dictionary, |
|
3264
|
|
|
|
|
|
|
// NO_GUESSER is returned. If guesser == GUESSER and the lemma is found by |
|
3265
|
|
|
|
|
|
|
// the guesser, GUESSER is returned. Otherwise, forms are cleared and <0 is |
|
3266
|
|
|
|
|
|
|
// returned. |
|
3267
|
|
|
|
|
|
|
virtual int generate(string_piece lemma, const char* tag_wildcard, guesser_mode guesser, vector& forms) const = 0; |
|
3268
|
|
|
|
|
|
|
|
|
3269
|
|
|
|
|
|
|
// Rawlemma and lemma id identification |
|
3270
|
|
|
|
|
|
|
virtual int raw_lemma_len(string_piece lemma) const = 0; |
|
3271
|
|
|
|
|
|
|
virtual int lemma_id_len(string_piece lemma) const = 0; |
|
3272
|
|
|
|
|
|
|
|
|
3273
|
|
|
|
|
|
|
// Rawform identification |
|
3274
|
|
|
|
|
|
|
virtual int raw_form_len(string_piece form) const = 0; |
|
3275
|
|
|
|
|
|
|
|
|
3276
|
|
|
|
|
|
|
// Construct a new tokenizer instance appropriate for this morphology. |
|
3277
|
|
|
|
|
|
|
// Can return NULL if no such tokenizer exists. |
|
3278
|
|
|
|
|
|
|
virtual tokenizer* new_tokenizer() const = 0; |
|
3279
|
|
|
|
|
|
|
|
|
3280
|
|
|
|
|
|
|
// Return a derivator for this morphology, or NULL if it does not exist. |
|
3281
|
|
|
|
|
|
|
// The returned instance is owned by the morphology and should not be deleted. |
|
3282
|
|
|
|
|
|
|
virtual const derivator* get_derivator() const; |
|
3283
|
|
|
|
|
|
|
|
|
3284
|
|
|
|
|
|
|
protected: |
|
3285
|
|
|
|
|
|
|
unique_ptr derinet; |
|
3286
|
|
|
|
|
|
|
}; |
|
3287
|
|
|
|
|
|
|
|
|
3288
|
|
|
|
|
|
|
} // namespace morphodita |
|
3289
|
|
|
|
|
|
|
|
|
3290
|
|
|
|
|
|
|
///////// |
|
3291
|
|
|
|
|
|
|
// File: morphodita/tagset_converter/tagset_converter.h |
|
3292
|
|
|
|
|
|
|
///////// |
|
3293
|
|
|
|
|
|
|
|
|
3294
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
3295
|
|
|
|
|
|
|
// |
|
3296
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
3297
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
3298
|
|
|
|
|
|
|
// |
|
3299
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
3300
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3301
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
3302
|
|
|
|
|
|
|
|
|
3303
|
|
|
|
|
|
|
namespace morphodita { |
|
3304
|
|
|
|
|
|
|
|
|
3305
|
0
|
|
|
|
|
|
class tagset_converter { |
|
3306
|
|
|
|
|
|
|
public: |
|
3307
|
0
|
|
|
|
|
|
virtual ~tagset_converter() {} |
|
3308
|
|
|
|
|
|
|
|
|
3309
|
|
|
|
|
|
|
// Convert a tag-lemma pair to a different tag set. |
|
3310
|
|
|
|
|
|
|
virtual void convert(tagged_lemma& tagged_lemma) const = 0; |
|
3311
|
|
|
|
|
|
|
// Convert a result of analysis to a different tag set. Apart from calling |
|
3312
|
|
|
|
|
|
|
// convert, any repeated entry is removed. |
|
3313
|
|
|
|
|
|
|
virtual void convert_analyzed(vector& tagged_lemmas) const = 0; |
|
3314
|
|
|
|
|
|
|
// Convert a result of generation to a different tag set. Apart from calling |
|
3315
|
|
|
|
|
|
|
// convert, any repeated entry is removed. |
|
3316
|
|
|
|
|
|
|
virtual void convert_generated(vector& forms) const = 0; |
|
3317
|
|
|
|
|
|
|
|
|
3318
|
|
|
|
|
|
|
// Static factory methods |
|
3319
|
|
|
|
|
|
|
static tagset_converter* new_identity_converter(); |
|
3320
|
|
|
|
|
|
|
|
|
3321
|
|
|
|
|
|
|
static tagset_converter* new_pdt_to_conll2009_converter(); |
|
3322
|
|
|
|
|
|
|
static tagset_converter* new_strip_lemma_comment_converter(const morpho& dictionary); |
|
3323
|
|
|
|
|
|
|
static tagset_converter* new_strip_lemma_id_converter(const morpho& dictionary); |
|
3324
|
|
|
|
|
|
|
}; |
|
3325
|
|
|
|
|
|
|
|
|
3326
|
|
|
|
|
|
|
// Helper method for creating tagset_converter from instance name. |
|
3327
|
|
|
|
|
|
|
tagset_converter* new_tagset_converter(const string& name, const morpho& dictionary); |
|
3328
|
|
|
|
|
|
|
|
|
3329
|
|
|
|
|
|
|
// Helper methods making sure remapped results are unique. |
|
3330
|
|
|
|
|
|
|
void tagset_converter_unique_analyzed(vector& tagged_lemmas); |
|
3331
|
|
|
|
|
|
|
void tagset_converter_unique_generated(vector& forms); |
|
3332
|
|
|
|
|
|
|
|
|
3333
|
|
|
|
|
|
|
} // namespace morphodita |
|
3334
|
|
|
|
|
|
|
|
|
3335
|
|
|
|
|
|
|
///////// |
|
3336
|
|
|
|
|
|
|
// File: morphodita/derivator/derivation_formatter.h |
|
3337
|
|
|
|
|
|
|
///////// |
|
3338
|
|
|
|
|
|
|
|
|
3339
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
3340
|
|
|
|
|
|
|
// |
|
3341
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
3342
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
3343
|
|
|
|
|
|
|
// |
|
3344
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
3345
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3346
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
3347
|
|
|
|
|
|
|
|
|
3348
|
|
|
|
|
|
|
namespace morphodita { |
|
3349
|
|
|
|
|
|
|
|
|
3350
|
0
|
|
|
|
|
|
class derivation_formatter { |
|
3351
|
|
|
|
|
|
|
public: |
|
3352
|
0
|
|
|
|
|
|
virtual ~derivation_formatter() {} |
|
3353
|
|
|
|
|
|
|
|
|
3354
|
|
|
|
|
|
|
// Perform the required derivation and store it directly in the lemma. |
|
3355
|
|
|
|
|
|
|
virtual void format_derivation(string& lemma) const; |
|
3356
|
|
|
|
|
|
|
|
|
3357
|
|
|
|
|
|
|
// Perform the required derivation and store it directly in the tagged_lemma. |
|
3358
|
|
|
|
|
|
|
// If a tagset_converter is given, it is also applied. |
|
3359
|
|
|
|
|
|
|
virtual void format_tagged_lemma(tagged_lemma& lemma, const tagset_converter* converter = nullptr) const = 0; |
|
3360
|
|
|
|
|
|
|
|
|
3361
|
|
|
|
|
|
|
// Perform the required derivation on a list of tagged_lemmas. |
|
3362
|
|
|
|
|
|
|
// If a tagset_converter is given, it is also applied. |
|
3363
|
|
|
|
|
|
|
// Either way, only unique entries are returned. |
|
3364
|
|
|
|
|
|
|
virtual void format_tagged_lemmas(vector& lemmas, const tagset_converter* converter = nullptr) const; |
|
3365
|
|
|
|
|
|
|
|
|
3366
|
|
|
|
|
|
|
// Static factory methods. |
|
3367
|
|
|
|
|
|
|
static derivation_formatter* new_none_derivation_formatter(); |
|
3368
|
|
|
|
|
|
|
static derivation_formatter* new_root_derivation_formatter(const derivator* derinet); |
|
3369
|
|
|
|
|
|
|
static derivation_formatter* new_path_derivation_formatter(const derivator* derinet); |
|
3370
|
|
|
|
|
|
|
static derivation_formatter* new_tree_derivation_formatter(const derivator* derinet); |
|
3371
|
|
|
|
|
|
|
// String version of static factory method. |
|
3372
|
|
|
|
|
|
|
static derivation_formatter* new_derivation_formatter(string_piece name, const derivator* derinet); |
|
3373
|
|
|
|
|
|
|
}; |
|
3374
|
|
|
|
|
|
|
|
|
3375
|
|
|
|
|
|
|
} // namespace morphodita |
|
3376
|
|
|
|
|
|
|
|
|
3377
|
|
|
|
|
|
|
///////// |
|
3378
|
|
|
|
|
|
|
// File: morphodita/derivator/derivation_formatter.cpp |
|
3379
|
|
|
|
|
|
|
///////// |
|
3380
|
|
|
|
|
|
|
|
|
3381
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
3382
|
|
|
|
|
|
|
// |
|
3383
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
3384
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
3385
|
|
|
|
|
|
|
// |
|
3386
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
3387
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3388
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
3389
|
|
|
|
|
|
|
|
|
3390
|
|
|
|
|
|
|
namespace morphodita { |
|
3391
|
|
|
|
|
|
|
|
|
3392
|
0
|
|
|
|
|
|
void derivation_formatter::format_derivation(string& lemma) const { |
|
3393
|
0
|
|
|
|
|
|
tagged_lemma result; |
|
3394
|
0
|
|
|
|
|
|
result.lemma.swap(lemma); |
|
3395
|
0
|
0
|
|
|
|
|
format_tagged_lemma(result); |
|
3396
|
0
|
|
|
|
|
|
lemma.swap(result.lemma); |
|
3397
|
0
|
|
|
|
|
|
} |
|
3398
|
|
|
|
|
|
|
|
|
3399
|
0
|
|
|
|
|
|
void derivation_formatter::format_tagged_lemmas(vector& lemmas, const tagset_converter* converter) const { |
|
3400
|
0
|
0
|
|
|
|
|
for (auto&& lemma : lemmas) |
|
3401
|
0
|
|
|
|
|
|
format_tagged_lemma(lemma, converter); |
|
3402
|
|
|
|
|
|
|
|
|
3403
|
0
|
0
|
|
|
|
|
if (lemmas.size() > 1) |
|
3404
|
0
|
|
|
|
|
|
tagset_converter_unique_analyzed(lemmas); |
|
3405
|
0
|
|
|
|
|
|
} |
|
3406
|
|
|
|
|
|
|
|
|
3407
|
0
|
|
|
|
|
|
class none_derivation_formatter : public derivation_formatter { |
|
3408
|
0
|
|
|
|
|
|
virtual void format_derivation(string& /*lemma*/) const override {} |
|
3409
|
|
|
|
|
|
|
|
|
3410
|
0
|
|
|
|
|
|
virtual void format_tagged_lemma(tagged_lemma& lemma, const tagset_converter* converter) const override { |
|
3411
|
0
|
0
|
|
|
|
|
if (converter) converter->convert(lemma); |
|
3412
|
0
|
|
|
|
|
|
} |
|
3413
|
|
|
|
|
|
|
|
|
3414
|
0
|
|
|
|
|
|
virtual void format_tagged_lemmas(vector& lemmas, const tagset_converter* converter) const override { |
|
3415
|
0
|
0
|
|
|
|
|
if (converter) converter->convert_analyzed(lemmas); |
|
3416
|
0
|
|
|
|
|
|
} |
|
3417
|
|
|
|
|
|
|
}; |
|
3418
|
|
|
|
|
|
|
|
|
3419
|
0
|
|
|
|
|
|
derivation_formatter* derivation_formatter::new_none_derivation_formatter() { |
|
3420
|
0
|
|
|
|
|
|
return new none_derivation_formatter(); |
|
3421
|
|
|
|
|
|
|
} |
|
3422
|
|
|
|
|
|
|
|
|
3423
|
0
|
|
|
|
|
|
class root_derivation_formatter : public derivation_formatter { |
|
3424
|
|
|
|
|
|
|
public: |
|
3425
|
0
|
|
|
|
|
|
root_derivation_formatter(const derivator* derinet) : derinet(derinet) {} |
|
3426
|
|
|
|
|
|
|
|
|
3427
|
0
|
|
|
|
|
|
virtual void format_tagged_lemma(tagged_lemma& lemma, const tagset_converter* converter) const override { |
|
3428
|
0
|
0
|
|
|
|
|
for (derivated_lemma parent; derinet->parent(lemma.lemma, parent); ) |
|
|
|
0
|
|
|
|
|
|
|
3429
|
0
|
|
|
|
|
|
lemma.lemma.assign(parent.lemma); |
|
3430
|
0
|
0
|
|
|
|
|
if (converter) converter->convert(lemma); |
|
3431
|
0
|
|
|
|
|
|
} |
|
3432
|
|
|
|
|
|
|
|
|
3433
|
|
|
|
|
|
|
private: |
|
3434
|
|
|
|
|
|
|
const derivator* derinet; |
|
3435
|
|
|
|
|
|
|
}; |
|
3436
|
|
|
|
|
|
|
|
|
3437
|
0
|
|
|
|
|
|
derivation_formatter* derivation_formatter::new_root_derivation_formatter(const derivator* derinet) { |
|
3438
|
0
|
0
|
|
|
|
|
return derinet ? new root_derivation_formatter(derinet) : nullptr; |
|
|
|
0
|
|
|
|
|
|
|
3439
|
|
|
|
|
|
|
} |
|
3440
|
|
|
|
|
|
|
|
|
3441
|
0
|
|
|
|
|
|
class path_derivation_formatter : public derivation_formatter { |
|
3442
|
|
|
|
|
|
|
public: |
|
3443
|
0
|
|
|
|
|
|
path_derivation_formatter(const derivator* derinet) : derinet(derinet) {} |
|
3444
|
|
|
|
|
|
|
|
|
3445
|
0
|
|
|
|
|
|
virtual void format_tagged_lemma(tagged_lemma& lemma, const tagset_converter* converter) const override { |
|
3446
|
0
|
|
|
|
|
|
tagged_lemma current(lemma); |
|
3447
|
0
|
0
|
|
|
|
|
if (converter) converter->convert(lemma); |
|
|
|
0
|
|
|
|
|
|
|
3448
|
0
|
0
|
|
|
|
|
for (derivated_lemma parent; derinet->parent(current.lemma, parent); current.lemma.swap(parent.lemma)) { |
|
|
|
0
|
|
|
|
|
|
|
3449
|
0
|
0
|
|
|
|
|
tagged_lemma parrent_lemma(parent.lemma, current.tag); |
|
3450
|
0
|
0
|
|
|
|
|
if (converter) converter->convert(parrent_lemma); |
|
|
|
0
|
|
|
|
|
|
|
3451
|
0
|
0
|
|
|
|
|
lemma.lemma.append(" ").append(parrent_lemma.lemma); |
|
3452
|
|
|
|
|
|
|
} |
|
3453
|
0
|
|
|
|
|
|
} |
|
3454
|
|
|
|
|
|
|
|
|
3455
|
|
|
|
|
|
|
private: |
|
3456
|
|
|
|
|
|
|
const derivator* derinet; |
|
3457
|
|
|
|
|
|
|
}; |
|
3458
|
|
|
|
|
|
|
|
|
3459
|
0
|
|
|
|
|
|
derivation_formatter* derivation_formatter::new_path_derivation_formatter(const derivator* derinet) { |
|
3460
|
0
|
0
|
|
|
|
|
return derinet ? new path_derivation_formatter(derinet) : nullptr; |
|
|
|
0
|
|
|
|
|
|
|
3461
|
|
|
|
|
|
|
} |
|
3462
|
|
|
|
|
|
|
|
|
3463
|
0
|
|
|
|
|
|
class tree_derivation_formatter : public derivation_formatter { |
|
3464
|
|
|
|
|
|
|
public: |
|
3465
|
0
|
|
|
|
|
|
tree_derivation_formatter(const derivator* derinet) : derinet(derinet) {} |
|
3466
|
|
|
|
|
|
|
|
|
3467
|
0
|
|
|
|
|
|
virtual void format_tagged_lemma(tagged_lemma& lemma, const tagset_converter* converter) const override { |
|
3468
|
|
|
|
|
|
|
string root(lemma.lemma), tag(lemma.tag); |
|
3469
|
0
|
0
|
|
|
|
|
if (converter) converter->convert(lemma); |
|
|
|
0
|
|
|
|
|
|
|
3470
|
0
|
0
|
|
|
|
|
for (derivated_lemma parent; derinet->parent(root, parent); root.swap(parent.lemma)) {} |
|
|
|
0
|
|
|
|
|
|
|
3471
|
0
|
0
|
|
|
|
|
format_tree(root, tag, lemma, converter); |
|
3472
|
0
|
|
|
|
|
|
} |
|
3473
|
|
|
|
|
|
|
|
|
3474
|
0
|
|
|
|
|
|
void format_tree(const string& root, const string& tag, tagged_lemma& tree, const tagset_converter* converter) const { |
|
3475
|
0
|
|
|
|
|
|
vector children; |
|
3476
|
|
|
|
|
|
|
|
|
3477
|
0
|
0
|
|
|
|
|
if (converter) { |
|
3478
|
0
|
0
|
|
|
|
|
tagged_lemma current(root, tag); |
|
3479
|
0
|
0
|
|
|
|
|
converter->convert(current); |
|
3480
|
0
|
0
|
|
|
|
|
tree.lemma.append(" ").append(current.lemma); |
|
3481
|
|
|
|
|
|
|
} else { |
|
3482
|
0
|
0
|
|
|
|
|
tree.lemma.append(" ").append(root); |
|
3483
|
|
|
|
|
|
|
} |
|
3484
|
|
|
|
|
|
|
|
|
3485
|
0
|
0
|
|
|
|
|
if (derinet->children(root, children)) |
|
|
|
0
|
|
|
|
|
|
|
3486
|
0
|
0
|
|
|
|
|
for (auto&& child : children) |
|
3487
|
0
|
0
|
|
|
|
|
format_tree(child.lemma, tag, tree, converter); |
|
3488
|
0
|
0
|
|
|
|
|
tree.lemma.push_back(' '); |
|
3489
|
0
|
|
|
|
|
|
} |
|
3490
|
|
|
|
|
|
|
|
|
3491
|
|
|
|
|
|
|
private: |
|
3492
|
|
|
|
|
|
|
const derivator* derinet; |
|
3493
|
|
|
|
|
|
|
}; |
|
3494
|
|
|
|
|
|
|
|
|
3495
|
0
|
|
|
|
|
|
derivation_formatter* derivation_formatter::new_tree_derivation_formatter(const derivator* derinet) { |
|
3496
|
0
|
0
|
|
|
|
|
return derinet ? new tree_derivation_formatter(derinet) : nullptr; |
|
|
|
0
|
|
|
|
|
|
|
3497
|
|
|
|
|
|
|
} |
|
3498
|
|
|
|
|
|
|
|
|
3499
|
0
|
|
|
|
|
|
derivation_formatter* derivation_formatter::new_derivation_formatter(string_piece name, const derivator* derinet) { |
|
3500
|
0
|
0
|
|
|
|
|
if (name == "none") return new_none_derivation_formatter(); |
|
3501
|
0
|
0
|
|
|
|
|
if (name == "root") return new_root_derivation_formatter(derinet); |
|
3502
|
0
|
0
|
|
|
|
|
if (name == "path") return new_path_derivation_formatter(derinet); |
|
3503
|
0
|
0
|
|
|
|
|
if (name == "tree") return new_tree_derivation_formatter(derinet); |
|
3504
|
|
|
|
|
|
|
return nullptr; |
|
3505
|
|
|
|
|
|
|
} |
|
3506
|
|
|
|
|
|
|
|
|
3507
|
|
|
|
|
|
|
} // namespace morphodita |
|
3508
|
|
|
|
|
|
|
|
|
3509
|
|
|
|
|
|
|
///////// |
|
3510
|
|
|
|
|
|
|
// File: morphodita/morpho/small_stringops.h |
|
3511
|
|
|
|
|
|
|
///////// |
|
3512
|
|
|
|
|
|
|
|
|
3513
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
3514
|
|
|
|
|
|
|
// |
|
3515
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
3516
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
3517
|
|
|
|
|
|
|
// |
|
3518
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
3519
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3520
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
3521
|
|
|
|
|
|
|
|
|
3522
|
|
|
|
|
|
|
namespace morphodita { |
|
3523
|
|
|
|
|
|
|
|
|
3524
|
|
|
|
|
|
|
// Declarations |
|
3525
|
|
|
|
|
|
|
inline bool small_memeq(const void* a, const void* b, size_t len); |
|
3526
|
|
|
|
|
|
|
inline void small_memcpy(void* dest, const void* src, size_t len); |
|
3527
|
|
|
|
|
|
|
|
|
3528
|
|
|
|
|
|
|
// Definitions |
|
3529
|
|
|
|
|
|
|
bool small_memeq(const void* a_void, const void* b_void, size_t len) { |
|
3530
|
|
|
|
|
|
|
const char* a = (const char*)a_void; |
|
3531
|
|
|
|
|
|
|
const char* b = (const char*)b_void; |
|
3532
|
|
|
|
|
|
|
|
|
3533
|
438
|
0
|
|
|
|
|
while (len--) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
3534
|
393
|
0
|
|
|
|
|
if (*a++ != *b++) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
3535
|
|
|
|
|
|
|
return false; |
|
3536
|
|
|
|
|
|
|
return true; |
|
3537
|
|
|
|
|
|
|
} |
|
3538
|
|
|
|
|
|
|
|
|
3539
|
|
|
|
|
|
|
void small_memcpy(void* dest_void, const void* src_void, size_t len) { |
|
3540
|
|
|
|
|
|
|
char* dest = (char*)dest_void; |
|
3541
|
|
|
|
|
|
|
const char* src = (const char*)src_void; |
|
3542
|
|
|
|
|
|
|
|
|
3543
|
437
|
0
|
|
|
|
|
while (len--) |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
3544
|
316
|
|
|
|
|
|
*dest++ = *src++; |
|
3545
|
|
|
|
|
|
|
} |
|
3546
|
|
|
|
|
|
|
|
|
3547
|
|
|
|
|
|
|
} // namespace morphodita |
|
3548
|
|
|
|
|
|
|
|
|
3549
|
|
|
|
|
|
|
///////// |
|
3550
|
|
|
|
|
|
|
// File: utils/pointer_decoder.h |
|
3551
|
|
|
|
|
|
|
///////// |
|
3552
|
|
|
|
|
|
|
|
|
3553
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
|
3554
|
|
|
|
|
|
|
// |
|
3555
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
3556
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
3557
|
|
|
|
|
|
|
// |
|
3558
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
3559
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3560
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
3561
|
|
|
|
|
|
|
|
|
3562
|
|
|
|
|
|
|
namespace utils { |
|
3563
|
|
|
|
|
|
|
|
|
3564
|
|
|
|
|
|
|
// |
|
3565
|
|
|
|
|
|
|
// Declarations |
|
3566
|
|
|
|
|
|
|
// |
|
3567
|
|
|
|
|
|
|
|
|
3568
|
|
|
|
|
|
|
class pointer_decoder { |
|
3569
|
|
|
|
|
|
|
public: |
|
3570
|
|
|
|
|
|
|
inline pointer_decoder(const unsigned char*& data); |
|
3571
|
|
|
|
|
|
|
inline unsigned next_1B(); |
|
3572
|
|
|
|
|
|
|
inline unsigned next_2B(); |
|
3573
|
|
|
|
|
|
|
inline unsigned next_4B(); |
|
3574
|
|
|
|
|
|
|
inline void next_str(string& str); |
|
3575
|
|
|
|
|
|
|
template inline const T* next(unsigned elements); |
|
3576
|
|
|
|
|
|
|
|
|
3577
|
|
|
|
|
|
|
private: |
|
3578
|
|
|
|
|
|
|
const unsigned char*& data; |
|
3579
|
|
|
|
|
|
|
}; |
|
3580
|
|
|
|
|
|
|
|
|
3581
|
|
|
|
|
|
|
// |
|
3582
|
|
|
|
|
|
|
// Definitions |
|
3583
|
|
|
|
|
|
|
// |
|
3584
|
|
|
|
|
|
|
|
|
3585
|
43
|
|
|
|
|
|
pointer_decoder::pointer_decoder(const unsigned char*& data) : data(data) {} |
|
3586
|
|
|
|
|
|
|
|
|
3587
|
|
|
|
|
|
|
unsigned pointer_decoder::next_1B() { |
|
3588
|
0
|
|
|
|
|
|
return *data++; |
|
3589
|
|
|
|
|
|
|
} |
|
3590
|
|
|
|
|
|
|
|
|
3591
|
|
|
|
|
|
|
unsigned pointer_decoder::next_2B() { |
|
3592
|
|
|
|
|
|
|
uint16_t result; |
|
3593
|
56
|
|
|
|
|
|
memcpy(&result, data, sizeof(uint16_t)); |
|
3594
|
56
|
|
|
|
|
|
data += sizeof(uint16_t); |
|
3595
|
13
|
|
|
|
|
|
return result; |
|
3596
|
|
|
|
|
|
|
} |
|
3597
|
|
|
|
|
|
|
|
|
3598
|
|
|
|
|
|
|
unsigned pointer_decoder::next_4B() { |
|
3599
|
|
|
|
|
|
|
uint32_t result; |
|
3600
|
30
|
|
|
|
|
|
memcpy(&result, data, sizeof(uint32_t)); |
|
3601
|
30
|
|
|
|
|
|
data += sizeof(uint32_t); |
|
3602
|
|
|
|
|
|
|
return result; |
|
3603
|
|
|
|
|
|
|
} |
|
3604
|
|
|
|
|
|
|
|
|
3605
|
|
|
|
|
|
|
void pointer_decoder::next_str(string& str) { |
|
3606
|
|
|
|
|
|
|
unsigned len = next_1B(); |
|
3607
|
|
|
|
|
|
|
if (len == 255) len = next_4B(); |
|
3608
|
|
|
|
|
|
|
str.assign(next(len), len); |
|
3609
|
|
|
|
|
|
|
} |
|
3610
|
|
|
|
|
|
|
|
|
3611
|
|
|
|
|
|
|
template const T* pointer_decoder::next(unsigned elements) { |
|
3612
|
39
|
|
|
|
|
|
const T* result = (const T*) data; |
|
3613
|
0
|
|
|
|
|
|
data += sizeof(T) * elements; |
|
3614
|
|
|
|
|
|
|
return result; |
|
3615
|
|
|
|
|
|
|
} |
|
3616
|
|
|
|
|
|
|
|
|
3617
|
|
|
|
|
|
|
} // namespace utils |
|
3618
|
|
|
|
|
|
|
|
|
3619
|
|
|
|
|
|
|
///////// |
|
3620
|
|
|
|
|
|
|
// File: morphodita/morpho/persistent_unordered_map.h |
|
3621
|
|
|
|
|
|
|
///////// |
|
3622
|
|
|
|
|
|
|
|
|
3623
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
3624
|
|
|
|
|
|
|
// |
|
3625
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
3626
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
3627
|
|
|
|
|
|
|
// |
|
3628
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
3629
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3630
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
3631
|
|
|
|
|
|
|
|
|
3632
|
|
|
|
|
|
|
namespace morphodita { |
|
3633
|
|
|
|
|
|
|
|
|
3634
|
|
|
|
|
|
|
// Declarations |
|
3635
|
0
|
|
|
|
|
|
class persistent_unordered_map { |
|
3636
|
|
|
|
|
|
|
public: |
|
3637
|
|
|
|
|
|
|
// Accessing function |
|
3638
|
|
|
|
|
|
|
template |
|
3639
|
|
|
|
|
|
|
inline const unsigned char* at(const char* str, int len, EntrySize entry_size) const; |
|
3640
|
|
|
|
|
|
|
|
|
3641
|
|
|
|
|
|
|
template |
|
3642
|
|
|
|
|
|
|
inline const T* at_typed(const char* str, int len) const; |
|
3643
|
|
|
|
|
|
|
|
|
3644
|
|
|
|
|
|
|
template |
|
3645
|
|
|
|
|
|
|
inline void iter(const char* str, int len, EntryProcess entry_process) const; |
|
3646
|
|
|
|
|
|
|
|
|
3647
|
|
|
|
|
|
|
template |
|
3648
|
|
|
|
|
|
|
inline void iter_all(EntryProcess entry_process) const; |
|
3649
|
|
|
|
|
|
|
|
|
3650
|
|
|
|
|
|
|
// Two helper functions accessing some internals |
|
3651
|
|
|
|
|
|
|
inline int max_length() const; |
|
3652
|
|
|
|
|
|
|
inline const unsigned char* data_start(int len) const; |
|
3653
|
|
|
|
|
|
|
|
|
3654
|
|
|
|
|
|
|
// Creation functions |
|
3655
|
|
|
|
|
|
|
persistent_unordered_map() {} |
|
3656
|
|
|
|
|
|
|
template |
|
3657
|
|
|
|
|
|
|
persistent_unordered_map(const unordered_map& map, double load_factor, EntryEncode entry_encode); |
|
3658
|
|
|
|
|
|
|
template |
|
3659
|
|
|
|
|
|
|
persistent_unordered_map(const unordered_map& map, double load_factor, bool add_prefixes, bool add_suffixes, EntryEncode entry_encode); |
|
3660
|
|
|
|
|
|
|
|
|
3661
|
|
|
|
|
|
|
// Manual creation functions |
|
3662
|
|
|
|
|
|
|
inline void resize(unsigned elems); |
|
3663
|
|
|
|
|
|
|
inline void add(const char* str, int str_len, int data_len); |
|
3664
|
|
|
|
|
|
|
inline void done_adding(); |
|
3665
|
|
|
|
|
|
|
inline unsigned char* fill(const char* str, int str_len, int data_len); |
|
3666
|
|
|
|
|
|
|
inline void done_filling(); |
|
3667
|
|
|
|
|
|
|
|
|
3668
|
|
|
|
|
|
|
// Serialization |
|
3669
|
|
|
|
|
|
|
inline void load(binary_decoder& data); |
|
3670
|
|
|
|
|
|
|
inline void save(binary_encoder& enc); |
|
3671
|
|
|
|
|
|
|
|
|
3672
|
|
|
|
|
|
|
private: |
|
3673
|
|
|
|
|
|
|
struct fnv_hash; |
|
3674
|
|
|
|
|
|
|
vector hashes; |
|
3675
|
|
|
|
|
|
|
|
|
3676
|
|
|
|
|
|
|
template |
|
3677
|
|
|
|
|
|
|
void construct(const map& map, double load_factor, EntryEncode entry_encode); |
|
3678
|
|
|
|
|
|
|
}; |
|
3679
|
|
|
|
|
|
|
|
|
3680
|
|
|
|
|
|
|
// Definitions |
|
3681
|
147
|
|
|
|
|
|
struct persistent_unordered_map::fnv_hash { |
|
3682
|
16
|
|
|
|
|
|
fnv_hash(unsigned num) { |
|
3683
|
16
|
|
|
|
|
|
mask = 1; |
|
3684
|
68
|
100
|
|
|
|
|
while (mask < num) |
|
3685
|
52
|
|
|
|
|
|
mask <<= 1; |
|
3686
|
16
|
50
|
|
|
|
|
hash.resize(mask + 1); |
|
3687
|
16
|
|
|
|
|
|
mask--; |
|
3688
|
16
|
|
|
|
|
|
} |
|
3689
|
137
|
|
|
|
|
|
fnv_hash(binary_decoder& data) { |
|
3690
|
137
|
50
|
|
|
|
|
uint32_t size = data.next_4B(); |
|
3691
|
137
|
|
|
|
|
|
mask = size - 2; |
|
3692
|
137
|
50
|
|
|
|
|
hash.resize(size); |
|
3693
|
137
|
50
|
|
|
|
|
memcpy(hash.data(), data.next(size), size * sizeof(uint32_t)); |
|
3694
|
|
|
|
|
|
|
|
|
3695
|
137
|
50
|
|
|
|
|
size = data.next_4B(); |
|
3696
|
137
|
50
|
|
|
|
|
this->data.resize(size); |
|
3697
|
137
|
100
|
|
|
|
|
if (size) memcpy(this->data.data(), data.next(size), size); |
|
|
|
50
|
|
|
|
|
|
|
3698
|
137
|
|
|
|
|
|
} |
|
3699
|
|
|
|
|
|
|
|
|
3700
|
|
|
|
|
|
|
inline uint32_t index(const char* data, int len) const { |
|
3701
|
245
|
0
|
|
|
|
|
if (len <= 0) return 0; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
3702
|
227
|
0
|
|
|
|
|
if (len == 1) return unaligned_load(data); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
3703
|
198
|
0
|
|
|
|
|
if (len == 2) return unaligned_load(data); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
3704
|
|
|
|
|
|
|
|
|
3705
|
|
|
|
|
|
|
uint32_t hash = 2166136261U; |
|
3706
|
540
|
0
|
|
|
|
|
while (len--) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
3707
|
446
|
|
|
|
|
|
hash = (hash ^ unsigned((signed char)*data++)) * 16777619U; |
|
3708
|
94
|
|
|
|
|
|
return hash & mask; |
|
3709
|
|
|
|
|
|
|
} |
|
3710
|
|
|
|
|
|
|
|
|
3711
|
|
|
|
|
|
|
inline void save(binary_encoder& enc); |
|
3712
|
|
|
|
|
|
|
|
|
3713
|
|
|
|
|
|
|
unsigned mask; |
|
3714
|
|
|
|
|
|
|
vector hash; |
|
3715
|
|
|
|
|
|
|
vector data; |
|
3716
|
|
|
|
|
|
|
}; |
|
3717
|
|
|
|
|
|
|
|
|
3718
|
|
|
|
|
|
|
template |
|
3719
|
48
|
|
|
|
|
|
const unsigned char* persistent_unordered_map::at(const char* str, int len, EntrySize entry_size) const { |
|
3720
|
48
|
0
|
|
|
|
|
if (unsigned(len) >= hashes.size()) return nullptr; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
3721
|
|
|
|
|
|
|
|
|
3722
|
48
|
|
|
|
|
|
unsigned index = hashes[len].index(str, len); |
|
3723
|
96
|
|
|
|
|
|
const unsigned char* data = hashes[len].data.data() + hashes[len].hash[index]; |
|
3724
|
96
|
|
|
|
|
|
const unsigned char* end = hashes[len].data.data() + hashes[len].hash[index+1]; |
|
3725
|
|
|
|
|
|
|
|
|
3726
|
48
|
0
|
|
|
|
|
if (len <= 2) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
3727
|
48
|
0
|
|
|
|
|
return data != end ? data + len : nullptr; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
3728
|
|
|
|
|
|
|
|
|
3729
|
0
|
0
|
|
|
|
|
while (data < end) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
3730
|
0
|
0
|
|
|
|
|
if (small_memeq(str, data, len)) return data + len; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
3731
|
0
|
|
|
|
|
|
data += len; |
|
3732
|
|
|
|
|
|
|
pointer_decoder decoder(data); |
|
3733
|
0
|
|
|
|
|
|
entry_size(decoder); |
|
3734
|
|
|
|
|
|
|
} |
|
3735
|
|
|
|
|
|
|
|
|
3736
|
|
|
|
|
|
|
return nullptr; |
|
3737
|
|
|
|
|
|
|
} |
|
3738
|
|
|
|
|
|
|
|
|
3739
|
|
|
|
|
|
|
template |
|
3740
|
204
|
|
|
|
|
|
const T* persistent_unordered_map::at_typed(const char* str, int len) const { |
|
3741
|
204
|
50
|
|
|
|
|
if (unsigned(len) >= hashes.size()) return nullptr; |
|
|
|
100
|
|
|
|
|
|
|
3742
|
|
|
|
|
|
|
|
|
3743
|
149
|
|
|
|
|
|
unsigned index = hashes[len].index(str, len); |
|
3744
|
298
|
|
|
|
|
|
const unsigned char* data = hashes[len].data.data() + hashes[len].hash[index]; |
|
3745
|
298
|
|
|
|
|
|
const unsigned char* end = hashes[len].data.data() + hashes[len].hash[index+1]; |
|
3746
|
|
|
|
|
|
|
|
|
3747
|
149
|
100
|
|
|
|
|
if (len <= 2) |
|
|
|
100
|
|
|
|
|
|
|
3748
|
99
|
100
|
|
|
|
|
return data != end ? (const T*)(data + len) : nullptr; |
|
|
|
50
|
|
|
|
|
|
|
3749
|
|
|
|
|
|
|
|
|
3750
|
79
|
100
|
|
|
|
|
while (data < end) { |
|
|
|
100
|
|
|
|
|
|
|
3751
|
58
|
100
|
|
|
|
|
if (small_memeq(str, data, len)) return (const T*)(data + len); |
|
|
|
100
|
|
|
|
|
|
|
3752
|
29
|
|
|
|
|
|
data += len + sizeof(T); |
|
3753
|
|
|
|
|
|
|
} |
|
3754
|
|
|
|
|
|
|
|
|
3755
|
|
|
|
|
|
|
return nullptr; |
|
3756
|
|
|
|
|
|
|
} |
|
3757
|
|
|
|
|
|
|
|
|
3758
|
|
|
|
|
|
|
template |
|
3759
|
30
|
|
|
|
|
|
void persistent_unordered_map::iter(const char* str, int len, EntryProcess entry_process) const { |
|
3760
|
30
|
0
|
|
|
|
|
if (unsigned(len) >= hashes.size()) return; |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
3761
|
|
|
|
|
|
|
|
|
3762
|
30
|
|
|
|
|
|
unsigned index = hashes[len].index(str, len); |
|
3763
|
60
|
|
|
|
|
|
const unsigned char* data = hashes[len].data.data() + hashes[len].hash[index]; |
|
3764
|
30
|
|
|
|
|
|
const unsigned char* end = hashes[len].data.data() + hashes[len].hash[index+1]; |
|
3765
|
|
|
|
|
|
|
|
|
3766
|
60
|
0
|
|
|
|
|
while (data < end) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
3767
|
|
|
|
|
|
|
auto start = (const char*) data; |
|
3768
|
30
|
|
|
|
|
|
data += len; |
|
3769
|
|
|
|
|
|
|
pointer_decoder decoder(data); |
|
3770
|
30
|
|
|
|
|
|
entry_process(start, decoder); |
|
3771
|
|
|
|
|
|
|
} |
|
3772
|
|
|
|
|
|
|
} |
|
3773
|
|
|
|
|
|
|
|
|
3774
|
|
|
|
|
|
|
template |
|
3775
|
2
|
|
|
|
|
|
void persistent_unordered_map::iter_all(EntryProcess entry_process) const { |
|
3776
|
4
|
100
|
|
|
|
|
for (unsigned len = 0; len < hashes.size(); len++) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
3777
|
3
|
|
|
|
|
|
const unsigned char* data = hashes[len].data.data(); |
|
3778
|
|
|
|
|
|
|
const unsigned char* end = data + hashes[len].data.size(); |
|
3779
|
|
|
|
|
|
|
|
|
3780
|
16
|
100
|
|
|
|
|
while (data < end) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
3781
|
|
|
|
|
|
|
auto start = (const char*) data; |
|
3782
|
13
|
|
|
|
|
|
data += len; |
|
3783
|
|
|
|
|
|
|
pointer_decoder decoder(data); |
|
3784
|
13
|
|
|
|
|
|
entry_process(start, len, decoder); |
|
3785
|
|
|
|
|
|
|
} |
|
3786
|
|
|
|
|
|
|
} |
|
3787
|
1
|
|
|
|
|
|
} |
|
3788
|
|
|
|
|
|
|
|
|
3789
|
|
|
|
|
|
|
int persistent_unordered_map::max_length() const { |
|
3790
|
52
|
|
|
|
|
|
return hashes.size(); |
|
3791
|
|
|
|
|
|
|
} |
|
3792
|
|
|
|
|
|
|
|
|
3793
|
|
|
|
|
|
|
const unsigned char* persistent_unordered_map::data_start(int len) const { |
|
3794
|
19
|
0
|
|
|
|
|
return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
3795
|
|
|
|
|
|
|
} |
|
3796
|
|
|
|
|
|
|
|
|
3797
|
16
|
|
|
|
|
|
void persistent_unordered_map::resize(unsigned elems) { |
|
3798
|
16
|
100
|
|
|
|
|
if (hashes.size() == 0) hashes.emplace_back(1); |
|
3799
|
14
|
100
|
|
|
|
|
else if (hashes.size() == 1) hashes.emplace_back(1<<8); |
|
3800
|
12
|
100
|
|
|
|
|
else if (hashes.size() == 2) hashes.emplace_back(1<<16); |
|
3801
|
10
|
|
|
|
|
|
else hashes.emplace_back(elems); |
|
3802
|
16
|
|
|
|
|
|
} |
|
3803
|
|
|
|
|
|
|
|
|
3804
|
9
|
|
|
|
|
|
void persistent_unordered_map::add(const char* str, int str_len, int data_len) { |
|
3805
|
9
|
50
|
|
|
|
|
if (unsigned(str_len) < hashes.size()) |
|
3806
|
9
|
|
|
|
|
|
hashes[str_len].hash[hashes[str_len].index(str, str_len)] += str_len + data_len; |
|
3807
|
9
|
|
|
|
|
|
} |
|
3808
|
|
|
|
|
|
|
|
|
3809
|
2
|
|
|
|
|
|
void persistent_unordered_map::done_adding() { |
|
3810
|
18
|
100
|
|
|
|
|
for (auto&& hash : hashes) { |
|
3811
|
|
|
|
|
|
|
int total = 0; |
|
3812
|
131632
|
100
|
|
|
|
|
for (auto&& len : hash.hash) total += len, len = total - len; |
|
3813
|
16
|
|
|
|
|
|
hash.data.resize(total); |
|
3814
|
|
|
|
|
|
|
} |
|
3815
|
2
|
|
|
|
|
|
} |
|
3816
|
|
|
|
|
|
|
|
|
3817
|
9
|
|
|
|
|
|
unsigned char* persistent_unordered_map::fill(const char* str, int str_len, int data_len) { |
|
3818
|
9
|
50
|
|
|
|
|
if (unsigned(str_len) < hashes.size()) { |
|
3819
|
9
|
|
|
|
|
|
unsigned index = hashes[str_len].index(str, str_len); |
|
3820
|
18
|
|
|
|
|
|
unsigned offset = hashes[str_len].hash[index]; |
|
3821
|
9
|
|
|
|
|
|
small_memcpy(hashes[str_len].data.data() + offset, str, str_len); |
|
3822
|
9
|
|
|
|
|
|
hashes[str_len].hash[index] += str_len + data_len; |
|
3823
|
9
|
|
|
|
|
|
return hashes[str_len].data.data() + offset + str_len; |
|
3824
|
|
|
|
|
|
|
} |
|
3825
|
|
|
|
|
|
|
return nullptr; |
|
3826
|
|
|
|
|
|
|
} |
|
3827
|
|
|
|
|
|
|
|
|
3828
|
2
|
|
|
|
|
|
void persistent_unordered_map::done_filling() { |
|
3829
|
18
|
100
|
|
|
|
|
for (auto&& hash : hashes) |
|
3830
|
131632
|
100
|
|
|
|
|
for (int i = hash.hash.size() - 1; i >= 0; i--) |
|
3831
|
131616
|
100
|
|
|
|
|
hash.hash[i] = i > 0 ? hash.hash[i-1] : 0; |
|
3832
|
2
|
|
|
|
|
|
} |
|
3833
|
|
|
|
|
|
|
|
|
3834
|
48
|
|
|
|
|
|
void persistent_unordered_map::load(binary_decoder& data) { |
|
3835
|
48
|
|
|
|
|
|
unsigned sizes = data.next_1B(); |
|
3836
|
|
|
|
|
|
|
|
|
3837
|
|
|
|
|
|
|
hashes.clear(); |
|
3838
|
185
|
100
|
|
|
|
|
for (unsigned i = 0; i < sizes; i++) |
|
3839
|
137
|
|
|
|
|
|
hashes.emplace_back(data); |
|
3840
|
48
|
|
|
|
|
|
} |
|
3841
|
|
|
|
|
|
|
|
|
3842
|
|
|
|
|
|
|
} // namespace morphodita |
|
3843
|
|
|
|
|
|
|
|
|
3844
|
|
|
|
|
|
|
///////// |
|
3845
|
|
|
|
|
|
|
// File: morphodita/derivator/derivator_dictionary.h |
|
3846
|
|
|
|
|
|
|
///////// |
|
3847
|
|
|
|
|
|
|
|
|
3848
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
3849
|
|
|
|
|
|
|
// |
|
3850
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
3851
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
3852
|
|
|
|
|
|
|
// |
|
3853
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
3854
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3855
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
3856
|
|
|
|
|
|
|
|
|
3857
|
|
|
|
|
|
|
namespace morphodita { |
|
3858
|
|
|
|
|
|
|
|
|
3859
|
0
|
|
|
|
|
|
class derivator_dictionary : public derivator { |
|
3860
|
|
|
|
|
|
|
public: |
|
3861
|
|
|
|
|
|
|
virtual bool parent(string_piece lemma, derivated_lemma& parent) const override; |
|
3862
|
|
|
|
|
|
|
virtual bool children(string_piece lemma, vector& children) const override; |
|
3863
|
|
|
|
|
|
|
|
|
3864
|
|
|
|
|
|
|
bool load(istream& is); |
|
3865
|
|
|
|
|
|
|
|
|
3866
|
|
|
|
|
|
|
private: |
|
3867
|
|
|
|
|
|
|
friend class morpho; |
|
3868
|
|
|
|
|
|
|
const morpho* dictionary; |
|
3869
|
|
|
|
|
|
|
persistent_unordered_map derinet; |
|
3870
|
|
|
|
|
|
|
}; |
|
3871
|
|
|
|
|
|
|
|
|
3872
|
|
|
|
|
|
|
} // namespace morphodita |
|
3873
|
|
|
|
|
|
|
|
|
3874
|
|
|
|
|
|
|
///////// |
|
3875
|
|
|
|
|
|
|
// File: morphodita/derivator/derivator_dictionary.cpp |
|
3876
|
|
|
|
|
|
|
///////// |
|
3877
|
|
|
|
|
|
|
|
|
3878
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
3879
|
|
|
|
|
|
|
// |
|
3880
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
3881
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
3882
|
|
|
|
|
|
|
// |
|
3883
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
3884
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
3885
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
3886
|
|
|
|
|
|
|
|
|
3887
|
|
|
|
|
|
|
namespace morphodita { |
|
3888
|
|
|
|
|
|
|
|
|
3889
|
0
|
|
|
|
|
|
bool derivator_dictionary::parent(string_piece lemma, derivated_lemma& parent) const { |
|
3890
|
0
|
0
|
|
|
|
|
if (dictionary) lemma.len = dictionary->lemma_id_len(lemma); |
|
3891
|
|
|
|
|
|
|
|
|
3892
|
0
|
|
|
|
|
|
auto lemma_data = derinet.at(lemma.str, lemma.len, [](pointer_decoder& data) { |
|
3893
|
|
|
|
|
|
|
data.next(data.next_1B()); |
|
3894
|
|
|
|
|
|
|
data.next_4B(); |
|
3895
|
|
|
|
|
|
|
data.next(data.next_2B()); |
|
3896
|
0
|
|
|
|
|
|
}); |
|
3897
|
0
|
0
|
|
|
|
|
if (lemma_data) { |
|
3898
|
0
|
|
|
|
|
|
auto parent_encoded = *(uint32_t*)(lemma_data + 1 + *lemma_data); |
|
3899
|
0
|
0
|
|
|
|
|
if (parent_encoded) { |
|
3900
|
0
|
|
|
|
|
|
unsigned parent_len = parent_encoded & 0xFF; |
|
3901
|
0
|
|
|
|
|
|
auto parent_data = derinet.data_start(parent_len) + (parent_encoded >> 8); |
|
3902
|
0
|
|
|
|
|
|
parent.lemma.assign((const char*) parent_data, parent_len); |
|
3903
|
0
|
0
|
|
|
|
|
if (parent_data[parent_len]) |
|
3904
|
0
|
|
|
|
|
|
parent.lemma.append((const char*) parent_data + parent_len + 1, parent_data[parent_len]); |
|
3905
|
|
|
|
|
|
|
return true; |
|
3906
|
|
|
|
|
|
|
} |
|
3907
|
|
|
|
|
|
|
} |
|
3908
|
|
|
|
|
|
|
parent.lemma.clear(); |
|
3909
|
0
|
|
|
|
|
|
return false; |
|
3910
|
|
|
|
|
|
|
} |
|
3911
|
|
|
|
|
|
|
|
|
3912
|
0
|
|
|
|
|
|
bool derivator_dictionary::children(string_piece lemma, vector& children) const { |
|
3913
|
0
|
0
|
|
|
|
|
if (dictionary) lemma.len = dictionary->lemma_id_len(lemma); |
|
3914
|
|
|
|
|
|
|
|
|
3915
|
0
|
|
|
|
|
|
auto lemma_data = derinet.at(lemma.str, lemma.len, [](pointer_decoder& data) { |
|
3916
|
|
|
|
|
|
|
data.next(data.next_1B()); |
|
3917
|
|
|
|
|
|
|
data.next_4B(); |
|
3918
|
|
|
|
|
|
|
data.next(data.next_2B()); |
|
3919
|
0
|
|
|
|
|
|
}); |
|
3920
|
0
|
0
|
|
|
|
|
if (lemma_data) { |
|
3921
|
0
|
|
|
|
|
|
auto children_len = *(uint16_t*)(lemma_data + 1 + *lemma_data + 4); |
|
3922
|
0
|
|
|
|
|
|
auto children_encoded = (uint32_t*)(lemma_data + 1 + *lemma_data + 4 + 2); |
|
3923
|
0
|
0
|
|
|
|
|
if (children_len) { |
|
3924
|
0
|
|
|
|
|
|
children.resize(children_len); |
|
3925
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < children_len; i++) { |
|
3926
|
0
|
|
|
|
|
|
unsigned child_len = children_encoded[i] & 0xFF; |
|
3927
|
0
|
|
|
|
|
|
auto child_data = derinet.data_start(child_len) + (children_encoded[i] >> 8); |
|
3928
|
0
|
|
|
|
|
|
children[i].lemma.assign((const char*) child_data, child_len); |
|
3929
|
0
|
0
|
|
|
|
|
if (child_data[child_len]) |
|
3930
|
0
|
|
|
|
|
|
children[i].lemma.append((const char*) child_data + child_len + 1, child_data[child_len]); |
|
3931
|
|
|
|
|
|
|
} |
|
3932
|
|
|
|
|
|
|
return true; |
|
3933
|
|
|
|
|
|
|
} |
|
3934
|
|
|
|
|
|
|
} |
|
3935
|
0
|
|
|
|
|
|
children.clear(); |
|
3936
|
0
|
|
|
|
|
|
return false; |
|
3937
|
|
|
|
|
|
|
} |
|
3938
|
|
|
|
|
|
|
|
|
3939
|
0
|
|
|
|
|
|
bool derivator_dictionary::load(istream& is) { |
|
3940
|
|
|
|
|
|
|
binary_decoder data; |
|
3941
|
0
|
0
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
|
0
|
|
|
|
|
|
|
3942
|
|
|
|
|
|
|
|
|
3943
|
|
|
|
|
|
|
try { |
|
3944
|
0
|
0
|
|
|
|
|
for (int i = data.next_1B(); i > 0; i--) |
|
|
|
0
|
|
|
|
|
|
|
3945
|
0
|
0
|
|
|
|
|
derinet.resize(data.next_4B()); |
|
|
|
0
|
|
|
|
|
|
|
3946
|
|
|
|
|
|
|
|
|
3947
|
|
|
|
|
|
|
unsigned data_position = data.tell(); |
|
3948
|
|
|
|
|
|
|
vector lemma, parent; |
|
3949
|
0
|
0
|
|
|
|
|
for (int pass = 1; pass <= 3; pass++) { |
|
3950
|
0
|
0
|
|
|
|
|
if (pass > 1) data.seek(data_position); |
|
|
|
0
|
|
|
|
|
|
|
3951
|
|
|
|
|
|
|
|
|
3952
|
|
|
|
|
|
|
lemma.clear(); |
|
3953
|
0
|
0
|
|
|
|
|
for (int i = data.next_4B(); i > 0; i--) { |
|
|
|
0
|
|
|
|
|
|
|
3954
|
0
|
0
|
|
|
|
|
lemma.resize(lemma.size() - data.next_1B()); |
|
|
|
0
|
|
|
|
|
|
|
3955
|
0
|
0
|
|
|
|
|
for (int i = data.next_1B(); i > 0; i--) |
|
|
|
0
|
|
|
|
|
|
|
3956
|
0
|
0
|
|
|
|
|
lemma.push_back(data.next_1B()); |
|
3957
|
|
|
|
|
|
|
|
|
3958
|
0
|
0
|
|
|
|
|
unsigned char lemma_comment_len = data.next_1B(); |
|
3959
|
0
|
0
|
|
|
|
|
const char* lemma_comment = lemma_comment_len ? data.next(lemma_comment_len) : nullptr; |
|
|
|
0
|
|
|
|
|
|
|
3960
|
|
|
|
|
|
|
|
|
3961
|
0
|
0
|
|
|
|
|
unsigned children = data.next_2B(); |
|
3962
|
|
|
|
|
|
|
|
|
3963
|
0
|
0
|
|
|
|
|
if (pass == 3) parent.clear(); |
|
3964
|
|
|
|
|
|
|
enum { REMOVE_START = 1, REMOVE_END = 2, ADD_START = 4, ADD_END = 8 }; |
|
3965
|
0
|
0
|
|
|
|
|
int operations = data.next_1B(); |
|
3966
|
0
|
0
|
|
|
|
|
if (operations) { |
|
3967
|
0
|
0
|
|
|
|
|
int remove_start = operations & REMOVE_START ? data.next_1B() : 0; |
|
|
|
0
|
|
|
|
|
|
|
3968
|
0
|
0
|
|
|
|
|
int remove_end = operations & REMOVE_END ? data.next_1B() : 0; |
|
|
|
0
|
|
|
|
|
|
|
3969
|
0
|
0
|
|
|
|
|
if (operations & ADD_START) { |
|
3970
|
0
|
0
|
|
|
|
|
int add_start = data.next_1B(); |
|
3971
|
0
|
0
|
|
|
|
|
const char* str = data.next(add_start); |
|
3972
|
0
|
0
|
|
|
|
|
if (pass == 3) parent.assign(str, str + add_start); |
|
3973
|
|
|
|
|
|
|
} |
|
3974
|
0
|
0
|
|
|
|
|
if (pass == 3) parent.insert(parent.end(), lemma.begin() + remove_start, lemma.end() - remove_end); |
|
|
|
0
|
|
|
|
|
|
|
3975
|
0
|
0
|
|
|
|
|
if (operations & ADD_END) { |
|
3976
|
0
|
0
|
|
|
|
|
int add_end = data.next_1B(); |
|
3977
|
0
|
0
|
|
|
|
|
const char* str = data.next(add_end); |
|
3978
|
0
|
0
|
|
|
|
|
if (pass == 3) parent.insert(parent.end(), str, str + add_end); |
|
3979
|
|
|
|
|
|
|
} |
|
3980
|
|
|
|
|
|
|
} |
|
3981
|
|
|
|
|
|
|
|
|
3982
|
0
|
0
|
|
|
|
|
if (pass == 1) { |
|
3983
|
0
|
|
|
|
|
|
derinet.add(lemma.data(), lemma.size(), 1 + lemma_comment_len + 4 + 2 + 4 * children); |
|
3984
|
0
|
0
|
|
|
|
|
} else if (pass == 2) { |
|
3985
|
0
|
|
|
|
|
|
unsigned char* lemma_data = derinet.fill(lemma.data(), lemma.size(), 1 + lemma_comment_len + 4 + 2 + 4 * children); |
|
3986
|
0
|
|
|
|
|
|
*lemma_data++ = lemma_comment_len; |
|
3987
|
0
|
0
|
|
|
|
|
while (lemma_comment_len--) *lemma_data++ = *lemma_comment++; |
|
3988
|
|
|
|
|
|
|
unaligned_store_inc(lemma_data, 0); |
|
3989
|
|
|
|
|
|
|
unaligned_store_inc(lemma_data, children); |
|
3990
|
0
|
0
|
|
|
|
|
if (children) unaligned_store(((uint32_t*)lemma_data) + children - 1, 0); |
|
3991
|
0
|
0
|
|
|
|
|
} else if (pass == 3 && !parent.empty()) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
3992
|
0
|
|
|
|
|
|
auto lemma_data = derinet.at(lemma.data(), lemma.size(), [](pointer_decoder& data) { |
|
3993
|
|
|
|
|
|
|
data.next(data.next_1B()); |
|
3994
|
|
|
|
|
|
|
data.next_4B(); |
|
3995
|
|
|
|
|
|
|
data.next(data.next_2B()); |
|
3996
|
0
|
|
|
|
|
|
}); |
|
3997
|
0
|
|
|
|
|
|
auto parent_data = derinet.at(parent.data(), parent.size(), [](pointer_decoder& data) { |
|
3998
|
|
|
|
|
|
|
data.next(data.next_1B()); |
|
3999
|
|
|
|
|
|
|
data.next_4B(); |
|
4000
|
|
|
|
|
|
|
data.next(data.next_2B()); |
|
4001
|
0
|
|
|
|
|
|
}); |
|
4002
|
0
|
0
|
|
|
|
|
assert(lemma_data && parent_data); |
|
4003
|
|
|
|
|
|
|
|
|
4004
|
0
|
|
|
|
|
|
unsigned parent_offset = parent_data - parent.size() - derinet.data_start(parent.size()); |
|
4005
|
0
|
0
|
|
|
|
|
assert(parent.size() < (1<<8) && parent_offset < (1<<24)); |
|
|
|
0
|
|
|
|
|
|
|
4006
|
0
|
|
|
|
|
|
unaligned_store((void *)(lemma_data + 1 + *lemma_data), (parent_offset << 8) | parent.size()); |
|
4007
|
|
|
|
|
|
|
|
|
4008
|
0
|
|
|
|
|
|
unsigned lemma_offset = lemma_data - lemma.size() - derinet.data_start(lemma.size()); |
|
4009
|
0
|
0
|
|
|
|
|
assert(lemma.size() < (1<<8) && lemma_offset < (1<<24)); |
|
|
|
0
|
|
|
|
|
|
|
4010
|
0
|
|
|
|
|
|
auto children_len = unaligned_load(parent_data + 1 + *parent_data + 4); |
|
4011
|
0
|
|
|
|
|
|
auto children = (uint32_t*)(parent_data + 1 + *parent_data + 4 + 2); |
|
4012
|
0
|
|
|
|
|
|
auto child_index = unaligned_load(children + children_len - 1); |
|
4013
|
0
|
|
|
|
|
|
unaligned_store(children + child_index, (lemma_offset << 8) | lemma.size()); |
|
4014
|
0
|
0
|
|
|
|
|
if (child_index+1 < children_len) |
|
4015
|
0
|
|
|
|
|
|
unaligned_store(children + children_len - 1, unaligned_load(children + children_len - 1) + 1); |
|
4016
|
|
|
|
|
|
|
} |
|
4017
|
|
|
|
|
|
|
} |
|
4018
|
|
|
|
|
|
|
|
|
4019
|
0
|
0
|
|
|
|
|
if (pass == 1) |
|
4020
|
0
|
0
|
|
|
|
|
derinet.done_adding(); |
|
4021
|
0
|
0
|
|
|
|
|
if (pass == 2) |
|
4022
|
0
|
|
|
|
|
|
derinet.done_filling(); |
|
4023
|
|
0
|
|
|
|
|
} |
|
4024
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
|
4025
|
|
|
|
|
|
|
return false; |
|
4026
|
|
|
|
|
|
|
} |
|
4027
|
0
|
|
|
|
|
|
return true; |
|
4028
|
|
|
|
|
|
|
} |
|
4029
|
|
|
|
|
|
|
|
|
4030
|
|
|
|
|
|
|
} // namespace morphodita |
|
4031
|
|
|
|
|
|
|
|
|
4032
|
|
|
|
|
|
|
///////// |
|
4033
|
|
|
|
|
|
|
// File: morphodita/morpho/casing_variants.h |
|
4034
|
|
|
|
|
|
|
///////// |
|
4035
|
|
|
|
|
|
|
|
|
4036
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
4037
|
|
|
|
|
|
|
// |
|
4038
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
4039
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
4040
|
|
|
|
|
|
|
// |
|
4041
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
4042
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4043
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
4044
|
|
|
|
|
|
|
|
|
4045
|
|
|
|
|
|
|
namespace morphodita { |
|
4046
|
|
|
|
|
|
|
|
|
4047
|
14
|
|
|
|
|
|
inline void generate_casing_variants(string_piece form, string& form_uclc, string& form_lc) { |
|
4048
|
|
|
|
|
|
|
using namespace unilib; |
|
4049
|
|
|
|
|
|
|
|
|
4050
|
|
|
|
|
|
|
// Detect uppercase+titlecase characters. |
|
4051
|
|
|
|
|
|
|
bool first_Lut = false; // first character is uppercase or titlecase |
|
4052
|
|
|
|
|
|
|
bool rest_has_Lut = false; // any character but first is uppercase or titlecase |
|
4053
|
|
|
|
|
|
|
{ |
|
4054
|
14
|
|
|
|
|
|
string_piece form_tmp = form; |
|
4055
|
28
|
|
|
|
|
|
first_Lut = unicode::category(utf8::decode(form_tmp.str, form_tmp.len)) & unicode::Lut; |
|
4056
|
52
|
100
|
|
|
|
|
while (form_tmp.len && !rest_has_Lut) |
|
|
|
50
|
|
|
|
|
|
|
4057
|
38
|
|
|
|
|
|
rest_has_Lut = unicode::category(utf8::decode(form_tmp.str, form_tmp.len)) & unicode::Lut; |
|
4058
|
|
|
|
|
|
|
} |
|
4059
|
|
|
|
|
|
|
|
|
4060
|
|
|
|
|
|
|
// Generate all casing variants if needed (they are different than given form). |
|
4061
|
|
|
|
|
|
|
// We only replace letters with their lowercase variants. |
|
4062
|
|
|
|
|
|
|
// - form_uclc: first uppercase, rest lowercase |
|
4063
|
|
|
|
|
|
|
// - form_lc: all lowercase |
|
4064
|
|
|
|
|
|
|
|
|
4065
|
14
|
100
|
|
|
|
|
if (first_Lut && !rest_has_Lut) { // common case allowing fast execution |
|
4066
|
4
|
|
|
|
|
|
form_lc.reserve(form.len); |
|
4067
|
4
|
|
|
|
|
|
string_piece form_tmp = form; |
|
4068
|
4
|
|
|
|
|
|
utf8::append(form_lc, unicode::lowercase(utf8::decode(form_tmp.str, form_tmp.len))); |
|
4069
|
4
|
|
|
|
|
|
form_lc.append(form_tmp.str, form_tmp.len); |
|
4070
|
10
|
50
|
|
|
|
|
} else if (!first_Lut && rest_has_Lut) { |
|
4071
|
0
|
|
|
|
|
|
form_lc.reserve(form.len); |
|
4072
|
0
|
|
|
|
|
|
utf8::map(unicode::lowercase, form.str, form.len, form_lc); |
|
4073
|
10
|
50
|
|
|
|
|
} else if (first_Lut && rest_has_Lut) { |
|
4074
|
0
|
|
|
|
|
|
form_lc.reserve(form.len); |
|
4075
|
0
|
|
|
|
|
|
form_uclc.reserve(form.len); |
|
4076
|
0
|
|
|
|
|
|
string_piece form_tmp = form; |
|
4077
|
0
|
|
|
|
|
|
char32_t first = utf8::decode(form_tmp.str, form_tmp.len); |
|
4078
|
0
|
|
|
|
|
|
utf8::append(form_lc, unicode::lowercase(first)); |
|
4079
|
0
|
|
|
|
|
|
utf8::append(form_uclc, first); |
|
4080
|
0
|
0
|
|
|
|
|
while (form_tmp.len) { |
|
4081
|
0
|
|
|
|
|
|
char32_t lowercase = unicode::lowercase(utf8::decode(form_tmp.str, form_tmp.len)); |
|
4082
|
0
|
|
|
|
|
|
utf8::append(form_lc, lowercase); |
|
4083
|
0
|
|
|
|
|
|
utf8::append(form_uclc, lowercase); |
|
4084
|
|
|
|
|
|
|
} |
|
4085
|
|
|
|
|
|
|
} |
|
4086
|
14
|
|
|
|
|
|
} |
|
4087
|
|
|
|
|
|
|
|
|
4088
|
|
|
|
|
|
|
} // namespace morphodita |
|
4089
|
|
|
|
|
|
|
|
|
4090
|
|
|
|
|
|
|
///////// |
|
4091
|
|
|
|
|
|
|
// File: morphodita/morpho/czech_lemma_addinfo.h |
|
4092
|
|
|
|
|
|
|
///////// |
|
4093
|
|
|
|
|
|
|
|
|
4094
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
4095
|
|
|
|
|
|
|
// |
|
4096
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
4097
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
4098
|
|
|
|
|
|
|
// |
|
4099
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
4100
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4101
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
4102
|
|
|
|
|
|
|
|
|
4103
|
|
|
|
|
|
|
namespace morphodita { |
|
4104
|
|
|
|
|
|
|
|
|
4105
|
|
|
|
|
|
|
// Declarations |
|
4106
|
0
|
|
|
|
|
|
struct czech_lemma_addinfo { |
|
4107
|
|
|
|
|
|
|
inline static int raw_lemma_len(string_piece lemma); |
|
4108
|
|
|
|
|
|
|
inline static int lemma_id_len(string_piece lemma); |
|
4109
|
|
|
|
|
|
|
inline static string format(const unsigned char* addinfo, int addinfo_len); |
|
4110
|
|
|
|
|
|
|
inline static bool generatable(const unsigned char* addinfo, int addinfo_len); |
|
4111
|
|
|
|
|
|
|
|
|
4112
|
|
|
|
|
|
|
inline int parse(string_piece lemma, bool die_on_failure = false); |
|
4113
|
|
|
|
|
|
|
inline bool match_lemma_id(const unsigned char* other_addinfo, int other_addinfo_len); |
|
4114
|
|
|
|
|
|
|
|
|
4115
|
|
|
|
|
|
|
vector data; |
|
4116
|
|
|
|
|
|
|
}; |
|
4117
|
|
|
|
|
|
|
|
|
4118
|
|
|
|
|
|
|
// Definitions |
|
4119
|
0
|
|
|
|
|
|
int czech_lemma_addinfo::raw_lemma_len(string_piece lemma) { |
|
4120
|
|
|
|
|
|
|
// Lemma ends by a '-[0-9]', '`' or '_' on non-first position. |
|
4121
|
0
|
0
|
|
|
|
|
for (unsigned len = 1; len < lemma.len; len++) |
|
4122
|
0
|
0
|
|
|
|
|
if (lemma.str[len] == '`' || lemma.str[len] == '_' || |
|
|
|
0
|
|
|
|
|
|
|
4123
|
0
|
0
|
|
|
|
|
(lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9')) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4124
|
0
|
|
|
|
|
|
return len; |
|
4125
|
0
|
|
|
|
|
|
return lemma.len; |
|
4126
|
|
|
|
|
|
|
} |
|
4127
|
|
|
|
|
|
|
|
|
4128
|
0
|
|
|
|
|
|
int czech_lemma_addinfo::lemma_id_len(string_piece lemma) { |
|
4129
|
|
|
|
|
|
|
// Lemma ends by a '-[0-9]', '`' or '_' on non-first position. |
|
4130
|
0
|
0
|
|
|
|
|
for (unsigned len = 1; len < lemma.len; len++) { |
|
4131
|
0
|
0
|
|
|
|
|
if (lemma.str[len] == '`' || lemma.str[len] == '_') |
|
4132
|
0
|
|
|
|
|
|
return len; |
|
4133
|
0
|
0
|
|
|
|
|
if (lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9') { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4134
|
0
|
|
|
|
|
|
len += 2; |
|
4135
|
0
|
0
|
|
|
|
|
while (len < lemma.len && lemma.str[len] >= '0' && lemma.str[len] <= '9') len++; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4136
|
0
|
|
|
|
|
|
return len; |
|
4137
|
|
|
|
|
|
|
} |
|
4138
|
|
|
|
|
|
|
} |
|
4139
|
0
|
|
|
|
|
|
return lemma.len; |
|
4140
|
|
|
|
|
|
|
} |
|
4141
|
|
|
|
|
|
|
|
|
4142
|
0
|
|
|
|
|
|
string czech_lemma_addinfo::format(const unsigned char* addinfo, int addinfo_len) { |
|
4143
|
|
|
|
|
|
|
string res; |
|
4144
|
|
|
|
|
|
|
|
|
4145
|
0
|
0
|
|
|
|
|
if (addinfo_len) { |
|
4146
|
0
|
0
|
|
|
|
|
res.reserve(addinfo_len + 4); |
|
4147
|
0
|
0
|
|
|
|
|
if (addinfo[0] != 255) { |
|
4148
|
|
|
|
|
|
|
char num[5]; |
|
4149
|
0
|
|
|
|
|
|
snprintf(num, sizeof(num), "-%u", addinfo[0]); |
|
4150
|
|
|
|
|
|
|
res += num; |
|
4151
|
|
|
|
|
|
|
} |
|
4152
|
0
|
0
|
|
|
|
|
for (int i = 1; i < addinfo_len; i++) |
|
4153
|
0
|
|
|
|
|
|
res += addinfo[i]; |
|
4154
|
|
|
|
|
|
|
} |
|
4155
|
|
|
|
|
|
|
|
|
4156
|
0
|
|
|
|
|
|
return res; |
|
4157
|
|
|
|
|
|
|
} |
|
4158
|
|
|
|
|
|
|
|
|
4159
|
|
|
|
|
|
|
bool czech_lemma_addinfo::generatable(const unsigned char* addinfo, int addinfo_len) { |
|
4160
|
0
|
0
|
|
|
|
|
for (int i = 1; i + 2 < addinfo_len; i++) |
|
4161
|
0
|
0
|
|
|
|
|
if (addinfo[i] == '_' && addinfo[i+1] == ',' && addinfo[i+2] == 'x') |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4162
|
|
|
|
|
|
|
return false; |
|
4163
|
|
|
|
|
|
|
|
|
4164
|
|
|
|
|
|
|
return true; |
|
4165
|
|
|
|
|
|
|
} |
|
4166
|
|
|
|
|
|
|
|
|
4167
|
0
|
|
|
|
|
|
int czech_lemma_addinfo::parse(string_piece lemma, bool die_on_failure) { |
|
4168
|
|
|
|
|
|
|
data.clear(); |
|
4169
|
|
|
|
|
|
|
|
|
4170
|
0
|
|
|
|
|
|
const char* lemma_info = lemma.str + raw_lemma_len(lemma); |
|
4171
|
0
|
0
|
|
|
|
|
if (lemma_info < lemma.str + lemma.len) { |
|
4172
|
0
|
|
|
|
|
|
int lemma_num = 255; |
|
4173
|
|
|
|
|
|
|
const char* lemma_additional_info = lemma_info; |
|
4174
|
|
|
|
|
|
|
|
|
4175
|
0
|
0
|
|
|
|
|
if (*lemma_info == '-') { |
|
4176
|
0
|
|
|
|
|
|
lemma_num = 0; |
|
4177
|
0
|
|
|
|
|
|
for (lemma_additional_info = lemma_info + 1; |
|
4178
|
0
|
0
|
|
|
|
|
lemma_additional_info < lemma.str + lemma.len && (*lemma_additional_info >= '0' && *lemma_additional_info <= '9'); |
|
|
|
0
|
|
|
|
|
|
|
4179
|
|
|
|
|
|
|
lemma_additional_info++) |
|
4180
|
0
|
|
|
|
|
|
lemma_num = 10 * lemma_num + (*lemma_additional_info - '0'); |
|
4181
|
|
|
|
|
|
|
|
|
4182
|
0
|
0
|
|
|
|
|
if (lemma_additional_info == lemma_info + 1 || (lemma_additional_info < lemma.str + lemma.len && *lemma_additional_info != '`' && *lemma_additional_info != '_') || lemma_num >= 255) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4183
|
0
|
0
|
|
|
|
|
if (die_on_failure) |
|
4184
|
0
|
|
|
|
|
|
runtime_failure("Lemma number " << lemma_num << " in lemma " << lemma << " out of range!"); |
|
4185
|
|
|
|
|
|
|
else |
|
4186
|
0
|
|
|
|
|
|
lemma_num = 255; |
|
4187
|
|
|
|
|
|
|
} |
|
4188
|
|
|
|
|
|
|
} |
|
4189
|
0
|
|
|
|
|
|
data.emplace_back(lemma_num); |
|
4190
|
0
|
0
|
|
|
|
|
while (lemma_additional_info < lemma.str + lemma.len) |
|
4191
|
0
|
|
|
|
|
|
data.push_back(*(unsigned char*)lemma_additional_info++); |
|
4192
|
|
|
|
|
|
|
|
|
4193
|
0
|
0
|
|
|
|
|
if (data.size() > 255) { |
|
4194
|
0
|
0
|
|
|
|
|
if (die_on_failure) |
|
4195
|
0
|
|
|
|
|
|
runtime_failure("Too long lemma info " << lemma_info << " in lemma " << lemma << '!'); |
|
4196
|
|
|
|
|
|
|
else |
|
4197
|
0
|
|
|
|
|
|
data.resize(255); |
|
4198
|
|
|
|
|
|
|
} |
|
4199
|
|
|
|
|
|
|
} |
|
4200
|
|
|
|
|
|
|
|
|
4201
|
0
|
|
|
|
|
|
return lemma_info - lemma.str; |
|
4202
|
|
|
|
|
|
|
} |
|
4203
|
|
|
|
|
|
|
|
|
4204
|
|
|
|
|
|
|
bool czech_lemma_addinfo::match_lemma_id(const unsigned char* other_addinfo, int other_addinfo_len) { |
|
4205
|
0
|
0
|
|
|
|
|
if (data.empty()) return true; |
|
4206
|
0
|
0
|
|
|
|
|
if (data[0] != 255 && (!other_addinfo_len || other_addinfo[0] != data[0])) return false; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4207
|
|
|
|
|
|
|
return true; |
|
4208
|
|
|
|
|
|
|
} |
|
4209
|
|
|
|
|
|
|
|
|
4210
|
|
|
|
|
|
|
} // namespace morphodita |
|
4211
|
|
|
|
|
|
|
|
|
4212
|
|
|
|
|
|
|
///////// |
|
4213
|
|
|
|
|
|
|
// File: morphodita/morpho/tag_filter.h |
|
4214
|
|
|
|
|
|
|
///////// |
|
4215
|
|
|
|
|
|
|
|
|
4216
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
4217
|
|
|
|
|
|
|
// |
|
4218
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
4219
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
4220
|
|
|
|
|
|
|
// |
|
4221
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
4222
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4223
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
4224
|
|
|
|
|
|
|
|
|
4225
|
|
|
|
|
|
|
namespace morphodita { |
|
4226
|
|
|
|
|
|
|
|
|
4227
|
|
|
|
|
|
|
// Declarations |
|
4228
|
0
|
|
|
|
|
|
class tag_filter { |
|
4229
|
|
|
|
|
|
|
public: |
|
4230
|
|
|
|
|
|
|
tag_filter(const char* filter = nullptr); |
|
4231
|
|
|
|
|
|
|
|
|
4232
|
|
|
|
|
|
|
inline bool matches(const char* tag) const; |
|
4233
|
|
|
|
|
|
|
|
|
4234
|
|
|
|
|
|
|
private: |
|
4235
|
|
|
|
|
|
|
struct char_filter { |
|
4236
|
|
|
|
|
|
|
char_filter(int pos, bool negate, int chars_offset, int chars_len) |
|
4237
|
0
|
|
|
|
|
|
: pos(pos), negate(negate), chars_offset(chars_offset), chars_len(chars_len) {} |
|
4238
|
|
|
|
|
|
|
|
|
4239
|
|
|
|
|
|
|
int pos; |
|
4240
|
|
|
|
|
|
|
bool negate; |
|
4241
|
|
|
|
|
|
|
int chars_offset, chars_len; |
|
4242
|
|
|
|
|
|
|
}; |
|
4243
|
|
|
|
|
|
|
|
|
4244
|
|
|
|
|
|
|
string wildcard; |
|
4245
|
|
|
|
|
|
|
std::vector filters; |
|
4246
|
|
|
|
|
|
|
}; |
|
4247
|
|
|
|
|
|
|
|
|
4248
|
|
|
|
|
|
|
// Definitions |
|
4249
|
0
|
|
|
|
|
|
inline bool tag_filter::matches(const char* tag) const { |
|
4250
|
0
|
0
|
|
|
|
|
if (filters.empty()) return true; |
|
4251
|
|
|
|
|
|
|
|
|
4252
|
|
|
|
|
|
|
int tag_pos = 0; |
|
4253
|
0
|
0
|
|
|
|
|
for (auto&& filter : filters) { |
|
4254
|
|
|
|
|
|
|
// Skip until next filter position. If the tag ends prematurely, accept. |
|
4255
|
0
|
0
|
|
|
|
|
while (tag_pos < filter.pos) |
|
4256
|
0
|
0
|
|
|
|
|
if (!tag[tag_pos++]) |
|
4257
|
|
|
|
|
|
|
return true; |
|
4258
|
0
|
0
|
|
|
|
|
if (!tag[tag_pos]) |
|
4259
|
|
|
|
|
|
|
return true; |
|
4260
|
|
|
|
|
|
|
|
|
4261
|
|
|
|
|
|
|
// We assume filter.chars_len >= 1. |
|
4262
|
0
|
|
|
|
|
|
bool matched = (wildcard[filter.chars_offset] == tag[tag_pos]) ^ filter.negate; |
|
4263
|
0
|
0
|
|
|
|
|
for (int i = 1; i < filter.chars_len && ((!matched) ^ filter.negate); i++) |
|
|
|
0
|
|
|
|
|
|
|
4264
|
0
|
|
|
|
|
|
matched = (wildcard[filter.chars_offset + i] == tag[tag_pos]) ^ filter.negate; |
|
4265
|
0
|
0
|
|
|
|
|
if (!matched) return false; |
|
4266
|
|
|
|
|
|
|
} |
|
4267
|
|
|
|
|
|
|
return true; |
|
4268
|
|
|
|
|
|
|
} |
|
4269
|
|
|
|
|
|
|
|
|
4270
|
|
|
|
|
|
|
} // namespace morphodita |
|
4271
|
|
|
|
|
|
|
|
|
4272
|
|
|
|
|
|
|
///////// |
|
4273
|
|
|
|
|
|
|
// File: morphodita/morpho/morpho_dictionary.h |
|
4274
|
|
|
|
|
|
|
///////// |
|
4275
|
|
|
|
|
|
|
|
|
4276
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
4277
|
|
|
|
|
|
|
// |
|
4278
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
4279
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
4280
|
|
|
|
|
|
|
// |
|
4281
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
4282
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4283
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
4284
|
|
|
|
|
|
|
|
|
4285
|
|
|
|
|
|
|
namespace morphodita { |
|
4286
|
|
|
|
|
|
|
|
|
4287
|
|
|
|
|
|
|
// Declarations |
|
4288
|
|
|
|
|
|
|
template |
|
4289
|
0
|
|
|
|
|
|
class morpho_dictionary { |
|
4290
|
|
|
|
|
|
|
public: |
|
4291
|
|
|
|
|
|
|
void load(binary_decoder& data); |
|
4292
|
|
|
|
|
|
|
void analyze(string_piece form, vector& lemmas) const; |
|
4293
|
|
|
|
|
|
|
bool generate(string_piece lemma, const tag_filter& filter, vector& lemmas_forms) const; |
|
4294
|
|
|
|
|
|
|
private: |
|
4295
|
|
|
|
|
|
|
persistent_unordered_map lemmas, roots, suffixes; |
|
4296
|
|
|
|
|
|
|
|
|
4297
|
|
|
|
|
|
|
vector tags; |
|
4298
|
|
|
|
|
|
|
vector>>> classes; |
|
4299
|
|
|
|
|
|
|
}; |
|
4300
|
|
|
|
|
|
|
|
|
4301
|
|
|
|
|
|
|
// Definitions |
|
4302
|
|
|
|
|
|
|
template |
|
4303
|
1
|
|
|
|
|
|
void morpho_dictionary::load(binary_decoder& data) { |
|
4304
|
|
|
|
|
|
|
// Prepare lemmas and roots hashes |
|
4305
|
8
|
100
|
|
|
|
|
for (int i = data.next_1B(); i > 0; i--) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4306
|
7
|
|
|
|
|
|
lemmas.resize(data.next_4B()); |
|
4307
|
10
|
100
|
|
|
|
|
for (int i = data.next_1B(); i > 0; i--) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4308
|
9
|
|
|
|
|
|
roots.resize(data.next_4B()); |
|
4309
|
|
|
|
|
|
|
|
|
4310
|
|
|
|
|
|
|
// Perform two pass over the lemmas and roots data, filling the hashes. |
|
4311
|
|
|
|
|
|
|
|
|
4312
|
1
|
|
|
|
|
|
vector lemma(max(lemmas.max_length(), roots.max_length())); |
|
4313
|
1
|
50
|
|
|
|
|
vector root(max(lemmas.max_length(), roots.max_length())); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4314
|
|
|
|
|
|
|
unsigned data_position = data.tell(); |
|
4315
|
3
|
100
|
|
|
|
|
for (int pass = 1; pass <= 2; pass++) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4316
|
2
|
100
|
|
|
|
|
if (pass > 1) data.seek(data_position); |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4317
|
|
|
|
|
|
|
|
|
4318
|
|
|
|
|
|
|
int lemma_len = 0; |
|
4319
|
|
|
|
|
|
|
int root_len = 0; |
|
4320
|
|
|
|
|
|
|
|
|
4321
|
6
|
50
|
|
|
|
|
for (int i = data.next_4B(); i > 0; i--) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4322
|
4
|
50
|
|
|
|
|
lemma_len -= data.next_1B(); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4323
|
28
|
50
|
|
|
|
|
for (int i = data.next_1B(); i > 0; i--) |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4324
|
24
|
50
|
|
|
|
|
lemma[lemma_len++] = data.next_1B(); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4325
|
4
|
50
|
|
|
|
|
unsigned char lemma_info_len = data.next_1B(); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4326
|
4
|
50
|
|
|
|
|
const char* lemma_info = lemma_info_len ? data.next(lemma_info_len) : nullptr; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4327
|
4
|
50
|
|
|
|
|
unsigned lemma_roots = data.next_1B(); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4328
|
|
|
|
|
|
|
|
|
4329
|
|
|
|
|
|
|
unsigned char* lemma_data /* to keep compiler happy */ = nullptr; |
|
4330
|
|
|
|
|
|
|
unsigned lemma_offset /* to keep compiler happy */ = 0; |
|
4331
|
|
|
|
|
|
|
|
|
4332
|
4
|
100
|
|
|
|
|
if (pass == 1) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4333
|
2
|
|
|
|
|
|
lemmas.add(lemma.data(), lemma_len, 1 + lemma_info_len + 1 + lemma_roots * (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint16_t))); |
|
4334
|
|
|
|
|
|
|
} else /*if (pass == 2)*/ { |
|
4335
|
2
|
|
|
|
|
|
lemma_data = lemmas.fill(lemma.data(), lemma_len, 1 + lemma_info_len + 1 + lemma_roots * (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint16_t))); |
|
4336
|
4
|
|
|
|
|
|
lemma_offset = lemma_data - lemma_len - lemmas.data_start(lemma_len); |
|
4337
|
|
|
|
|
|
|
|
|
4338
|
2
|
|
|
|
|
|
*lemma_data++ = lemma_info_len; |
|
4339
|
2
|
50
|
|
|
|
|
if (lemma_info_len) small_memcpy(lemma_data, lemma_info, lemma_info_len), lemma_data += lemma_info_len; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4340
|
2
|
|
|
|
|
|
*lemma_data++ = lemma_roots; |
|
4341
|
|
|
|
|
|
|
} |
|
4342
|
|
|
|
|
|
|
|
|
4343
|
4
|
|
|
|
|
|
small_memcpy(root.data(), lemma.data(), lemma_len); root_len = lemma_len; |
|
4344
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < lemma_roots; i++) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4345
|
|
|
|
|
|
|
enum { REMOVE_START = 1, REMOVE_END = 2, ADD_START = 4, ADD_END = 8 }; |
|
4346
|
14
|
50
|
|
|
|
|
int operations = data.next_1B(); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4347
|
14
|
50
|
|
|
|
|
if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; } |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4348
|
14
|
100
|
|
|
|
|
if (operations & REMOVE_END) root_len -= data.next_1B(); |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4349
|
14
|
50
|
|
|
|
|
if (operations & ADD_START) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4350
|
0
|
0
|
|
|
|
|
int from = root_len, to = from + data.next_1B(); while (from > 0) root[--to] = root[--from]; root_len += to; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4351
|
0
|
0
|
|
|
|
|
for (int i = 0; i < to; i++) root[i] = data.next_1B(); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4352
|
|
|
|
|
|
|
} |
|
4353
|
14
|
50
|
|
|
|
|
if (operations & ADD_END) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4354
|
42
|
50
|
|
|
|
|
for (int len = data.next_1B(); len > 0; len--) |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4355
|
28
|
50
|
|
|
|
|
root[root_len++] = data.next_1B(); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4356
|
14
|
50
|
|
|
|
|
uint16_t clas = data.next_2B(); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4357
|
|
|
|
|
|
|
|
|
4358
|
14
|
100
|
|
|
|
|
if (pass == 1) { // for each root |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4359
|
7
|
|
|
|
|
|
roots.add(root.data(), root_len, sizeof(uint16_t) + sizeof(uint32_t) + sizeof(uint8_t)); |
|
4360
|
|
|
|
|
|
|
} else /*if (pass == 2)*/ { |
|
4361
|
7
|
|
|
|
|
|
unsigned char* root_data = roots.fill(root.data(), root_len, sizeof(uint16_t) + sizeof(uint32_t) + sizeof(uint8_t)); |
|
4362
|
14
|
|
|
|
|
|
unsigned root_offset = root_data - root_len - roots.data_start(root_len); |
|
4363
|
|
|
|
|
|
|
|
|
4364
|
|
|
|
|
|
|
unaligned_store_inc(root_data, clas); |
|
4365
|
|
|
|
|
|
|
unaligned_store_inc(root_data, lemma_offset); |
|
4366
|
|
|
|
|
|
|
unaligned_store_inc(root_data, lemma_len); |
|
4367
|
7
|
50
|
|
|
|
|
assert(uint8_t(lemma_len) == lemma_len); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4368
|
|
|
|
|
|
|
|
|
4369
|
|
|
|
|
|
|
unaligned_store_inc(lemma_data, root_offset); |
|
4370
|
|
|
|
|
|
|
unaligned_store_inc(lemma_data, root_len); |
|
4371
|
|
|
|
|
|
|
unaligned_store_inc(lemma_data, clas); |
|
4372
|
7
|
50
|
|
|
|
|
assert(uint8_t(root_len) == root_len); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4373
|
|
|
|
|
|
|
} |
|
4374
|
|
|
|
|
|
|
} |
|
4375
|
|
|
|
|
|
|
} |
|
4376
|
|
|
|
|
|
|
|
|
4377
|
2
|
100
|
|
|
|
|
if (pass == 1) { // after the whole pass |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4378
|
1
|
50
|
|
|
|
|
lemmas.done_adding(); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4379
|
1
|
50
|
|
|
|
|
roots.done_adding(); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4380
|
|
|
|
|
|
|
} else /*if (pass == 2)*/ { |
|
4381
|
1
|
|
|
|
|
|
lemmas.done_filling(); |
|
4382
|
1
|
|
|
|
|
|
roots.done_filling(); |
|
4383
|
|
|
|
|
|
|
} |
|
4384
|
|
|
|
|
|
|
} |
|
4385
|
|
|
|
|
|
|
|
|
4386
|
|
|
|
|
|
|
// Load tags |
|
4387
|
1
|
50
|
|
|
|
|
tags.resize(data.next_2B()); |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4388
|
21
|
100
|
|
|
|
|
for (auto&& tag : tags) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4389
|
20
|
50
|
|
|
|
|
tag.resize(data.next_1B()); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4390
|
80
|
100
|
|
|
|
|
for (unsigned i = 0; i < tag.size(); i++) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4391
|
60
|
50
|
|
|
|
|
tag[i] = data.next_1B(); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4392
|
|
|
|
|
|
|
} |
|
4393
|
|
|
|
|
|
|
|
|
4394
|
|
|
|
|
|
|
// Load suffixes |
|
4395
|
1
|
50
|
|
|
|
|
suffixes.load(data); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4396
|
|
|
|
|
|
|
|
|
4397
|
|
|
|
|
|
|
// Fill classes from suffixes |
|
4398
|
14
|
50
|
|
|
|
|
suffixes.iter_all([this](const char* suffix, int len, pointer_decoder& data) mutable { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4399
|
|
|
|
|
|
|
unsigned classes_len = data.next_2B(); |
|
4400
|
|
|
|
|
|
|
const uint16_t* classes_ptr = data.next(classes_len); |
|
4401
|
|
|
|
|
|
|
const uint16_t* indices_ptr = data.next(classes_len); |
|
4402
|
|
|
|
|
|
|
const uint16_t* tags_ptr = data.next(data.next_2B()); |
|
4403
|
|
|
|
|
|
|
|
|
4404
|
13
|
|
|
|
|
|
string suffix_str(suffix, len); |
|
4405
|
28
|
100
|
|
|
|
|
for (unsigned i = 0; i < classes_len; i++) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4406
|
15
|
|
|
|
|
|
auto classes_ptr_i = unaligned_load(classes_ptr + i); |
|
4407
|
15
|
100
|
|
|
|
|
if (classes_ptr_i >= classes.size()) classes.resize(classes_ptr_i + 1); |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4408
|
15
|
50
|
|
|
|
|
classes[classes_ptr_i].emplace_back(suffix_str, vector()); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4409
|
35
|
100
|
|
|
|
|
for (const uint16_t* ptr = tags_ptr + unaligned_load(indices_ptr + i), |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4410
|
15
|
|
|
|
|
|
* end = tags_ptr + unaligned_load(indices_ptr + i + 1); |
|
4411
|
|
|
|
|
|
|
ptr < end; ptr++) |
|
4412
|
20
|
50
|
|
|
|
|
classes[classes_ptr_i].back().second.emplace_back(unaligned_load(ptr)); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4413
|
|
|
|
|
|
|
} |
|
4414
|
13
|
|
|
|
|
|
}); |
|
4415
|
1
|
|
|
|
|
|
} |
|
4416
|
|
|
|
|
|
|
|
|
4417
|
|
|
|
|
|
|
template |
|
4418
|
18
|
|
|
|
|
|
void morpho_dictionary::analyze(string_piece form, vector& lemmas) const { |
|
4419
|
|
|
|
|
|
|
int max_suffix_len = suffixes.max_length(); |
|
4420
|
|
|
|
|
|
|
|
|
4421
|
|
|
|
|
|
|
uint16_t* suff_stack[16]; vector suff_heap; |
|
4422
|
18
|
50
|
|
|
|
|
uint16_t** suff = max_suffix_len <= 16 ? suff_stack : (suff_heap.resize(max_suffix_len), suff_heap.data()); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4423
|
|
|
|
|
|
|
int suff_len = 0; |
|
4424
|
48
|
50
|
|
|
|
|
for (int i = form.len; i >= 0 && suff_len < max_suffix_len; i--, suff_len++) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4425
|
48
|
|
|
|
|
|
suff[suff_len] = (uint16_t*) suffixes.at(form.str + i, suff_len, [](pointer_decoder& data) { |
|
4426
|
0
|
|
|
|
|
|
data.next(2 * data.next_2B()); |
|
4427
|
|
|
|
|
|
|
data.next(data.next_2B()); |
|
4428
|
0
|
|
|
|
|
|
}); |
|
4429
|
48
|
|
|
|
|
|
if (!suff[suff_len]) break; |
|
4430
|
|
|
|
|
|
|
} |
|
4431
|
|
|
|
|
|
|
|
|
4432
|
48
|
100
|
|
|
|
|
for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++) |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4433
|
30
|
50
|
|
|
|
|
if (unaligned_load(suff[suff_len])) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4434
|
30
|
|
|
|
|
|
unsigned suff_classes = unaligned_load(suff[suff_len]); |
|
4435
|
30
|
|
|
|
|
|
uint16_t* suff_data = suff[suff_len] + 1; |
|
4436
|
|
|
|
|
|
|
|
|
4437
|
60
|
50
|
|
|
|
|
roots.iter(form.str, root_len, [&](const char* root, pointer_decoder& root_data) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4438
|
|
|
|
|
|
|
uint16_t root_class = root_data.next_2B(); |
|
4439
|
|
|
|
|
|
|
unsigned lemma_offset = root_data.next_4B(); |
|
4440
|
|
|
|
|
|
|
unsigned lemma_len = root_data.next_1B(); |
|
4441
|
|
|
|
|
|
|
|
|
4442
|
60
|
100
|
|
|
|
|
if (small_memeq(form.str, root, root_len)) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4443
|
16
|
|
|
|
|
|
uint16_t* suffix_class_ptr = unaligned_lower_bound(suff_data, suff_classes, root_class); |
|
4444
|
10
|
50
|
|
|
|
|
if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4445
|
38
|
|
|
|
|
|
const unsigned char* lemma_data = this->lemmas.data_start(lemma_len) + lemma_offset; |
|
4446
|
|
|
|
|
|
|
string lemma((const char*)lemma_data, lemma_len); |
|
4447
|
10
|
50
|
|
|
|
|
if (lemma_data[lemma_len]) lemma += LemmaAddinfo::format(lemma_data + lemma_len + 1, lemma_data[lemma_len]); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4448
|
|
|
|
|
|
|
|
|
4449
|
20
|
|
|
|
|
|
uint16_t* suff_tag_indices = suff_data + suff_classes; |
|
4450
|
10
|
|
|
|
|
|
uint16_t* suff_tags = suff_tag_indices + suff_classes + 1; |
|
4451
|
28
|
100
|
|
|
|
|
for (unsigned i = unaligned_load(suff_tag_indices + (suffix_class_ptr - suff_data)); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4452
|
28
|
|
|
|
|
|
i < unaligned_load(suff_tag_indices + (suffix_class_ptr - suff_data) + 1); i++) |
|
4453
|
18
|
50
|
|
|
|
|
lemmas.emplace_back(lemma, tags[unaligned_load(suff_tags + i)]); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4454
|
|
|
|
|
|
|
} |
|
4455
|
|
|
|
|
|
|
} |
|
4456
|
30
|
|
|
|
|
|
}); |
|
4457
|
|
|
|
|
|
|
} |
|
4458
|
18
|
|
|
|
|
|
} |
|
4459
|
|
|
|
|
|
|
|
|
4460
|
|
|
|
|
|
|
template |
|
4461
|
0
|
|
|
|
|
|
bool morpho_dictionary::generate(string_piece lemma, const tag_filter& filter, vector& lemmas_forms) const { |
|
4462
|
|
|
|
|
|
|
LemmaAddinfo addinfo; |
|
4463
|
0
|
0
|
|
|
|
|
int raw_lemma_len = addinfo.parse(lemma); |
|
|
|
0
|
|
|
|
|
|
|
4464
|
0
|
|
|
|
|
|
bool matched_lemma = false; |
|
4465
|
|
|
|
|
|
|
|
|
4466
|
0
|
0
|
|
|
|
|
lemmas.iter(lemma.str, raw_lemma_len, [&](const char* lemma_str, pointer_decoder& data) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4467
|
|
|
|
|
|
|
unsigned lemma_info_len = data.next_1B(); |
|
4468
|
|
|
|
|
|
|
const auto* lemma_info = data.next(lemma_info_len); |
|
4469
|
|
|
|
|
|
|
unsigned lemma_roots_len = data.next_1B(); |
|
4470
|
0
|
|
|
|
|
|
auto* lemma_roots_ptr = data.next(lemma_roots_len * (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint16_t))); |
|
4471
|
|
|
|
|
|
|
|
|
4472
|
0
|
0
|
|
|
|
|
if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4473
|
0
|
|
|
|
|
|
matched_lemma = true; |
|
4474
|
|
|
|
|
|
|
|
|
4475
|
|
|
|
|
|
|
vector* forms = nullptr; |
|
4476
|
|
|
|
|
|
|
pointer_decoder lemma_roots(lemma_roots_ptr); |
|
4477
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < lemma_roots_len; i++) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4478
|
|
|
|
|
|
|
unsigned root_offset = lemma_roots.next_4B(); |
|
4479
|
|
|
|
|
|
|
unsigned root_len = lemma_roots.next_1B(); |
|
4480
|
|
|
|
|
|
|
unsigned clas = lemma_roots.next_2B(); |
|
4481
|
|
|
|
|
|
|
|
|
4482
|
0
|
|
|
|
|
|
const unsigned char* root_data = roots.data_start(root_len) + root_offset; |
|
4483
|
0
|
0
|
|
|
|
|
for (auto&& suffix : classes[clas]) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4484
|
|
|
|
|
|
|
string root_with_suffix; |
|
4485
|
0
|
0
|
|
|
|
|
for (auto&& tag : suffix.second) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4486
|
0
|
0
|
|
|
|
|
if (filter.matches(tags[tag].c_str())) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4487
|
0
|
0
|
|
|
|
|
if (!forms) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4488
|
0
|
0
|
|
|
|
|
lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len)); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4489
|
0
|
|
|
|
|
|
forms = &lemmas_forms.back().forms; |
|
4490
|
|
|
|
|
|
|
} |
|
4491
|
|
|
|
|
|
|
|
|
4492
|
0
|
0
|
|
|
|
|
if (root_with_suffix.empty() && root_len + suffix.first.size()) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4493
|
0
|
0
|
|
|
|
|
root_with_suffix.reserve(root_len + suffix.first.size()); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4494
|
|
|
|
|
|
|
root_with_suffix.assign((const char*)root_data, root_len); |
|
4495
|
|
|
|
|
|
|
root_with_suffix.append(suffix.first); |
|
4496
|
|
|
|
|
|
|
} |
|
4497
|
|
|
|
|
|
|
|
|
4498
|
0
|
0
|
|
|
|
|
forms->emplace_back(root_with_suffix, tags[tag]); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4499
|
|
|
|
|
|
|
} |
|
4500
|
|
|
|
|
|
|
} |
|
4501
|
|
|
|
|
|
|
} |
|
4502
|
|
|
|
|
|
|
} |
|
4503
|
0
|
|
|
|
|
|
}); |
|
4504
|
|
|
|
|
|
|
|
|
4505
|
0
|
|
|
|
|
|
return matched_lemma; |
|
4506
|
|
|
|
|
|
|
} |
|
4507
|
|
|
|
|
|
|
|
|
4508
|
|
|
|
|
|
|
} // namespace morphodita |
|
4509
|
|
|
|
|
|
|
|
|
4510
|
|
|
|
|
|
|
///////// |
|
4511
|
|
|
|
|
|
|
// File: morphodita/morpho/morpho_prefix_guesser.h |
|
4512
|
|
|
|
|
|
|
///////// |
|
4513
|
|
|
|
|
|
|
|
|
4514
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
4515
|
|
|
|
|
|
|
// |
|
4516
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
4517
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
4518
|
|
|
|
|
|
|
// |
|
4519
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
4520
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4521
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
4522
|
|
|
|
|
|
|
|
|
4523
|
|
|
|
|
|
|
namespace morphodita { |
|
4524
|
|
|
|
|
|
|
|
|
4525
|
|
|
|
|
|
|
// Declarations |
|
4526
|
|
|
|
|
|
|
template |
|
4527
|
0
|
|
|
|
|
|
class morpho_prefix_guesser { |
|
4528
|
|
|
|
|
|
|
public: |
|
4529
|
0
|
|
|
|
|
|
morpho_prefix_guesser(const MorphoDictionary& dictionary) : dictionary(dictionary) {} |
|
4530
|
|
|
|
|
|
|
|
|
4531
|
|
|
|
|
|
|
void load(binary_decoder& data); |
|
4532
|
|
|
|
|
|
|
void analyze(string_piece form, vector& lemmas); |
|
4533
|
|
|
|
|
|
|
bool generate(string_piece lemma, const tag_filter& filter, vector& lemmas_forms); |
|
4534
|
|
|
|
|
|
|
|
|
4535
|
|
|
|
|
|
|
private: |
|
4536
|
|
|
|
|
|
|
const MorphoDictionary& dictionary; |
|
4537
|
|
|
|
|
|
|
vector tag_filters; |
|
4538
|
|
|
|
|
|
|
persistent_unordered_map prefixes_initial, prefixes_middle; |
|
4539
|
|
|
|
|
|
|
}; |
|
4540
|
|
|
|
|
|
|
|
|
4541
|
|
|
|
|
|
|
// Definitions |
|
4542
|
|
|
|
|
|
|
template |
|
4543
|
0
|
|
|
|
|
|
void morpho_prefix_guesser::load(binary_decoder& data) { |
|
4544
|
|
|
|
|
|
|
// Load and construct tag filters |
|
4545
|
0
|
0
|
|
|
|
|
for (unsigned tag_filters_len = data.next_1B(); tag_filters_len; tag_filters_len--) { |
|
4546
|
0
|
|
|
|
|
|
unsigned tag_filter_len = data.next_1B(); |
|
4547
|
0
|
|
|
|
|
|
string tag_filter(data.next(tag_filter_len), tag_filter_len); |
|
4548
|
|
|
|
|
|
|
|
|
4549
|
0
|
0
|
|
|
|
|
tag_filters.emplace_back(tag_filter.c_str()); |
|
4550
|
|
|
|
|
|
|
} |
|
4551
|
|
|
|
|
|
|
|
|
4552
|
|
|
|
|
|
|
// Load prefixes |
|
4553
|
0
|
|
|
|
|
|
prefixes_initial.load(data); |
|
4554
|
0
|
|
|
|
|
|
prefixes_middle.load(data); |
|
4555
|
0
|
|
|
|
|
|
} |
|
4556
|
|
|
|
|
|
|
|
|
4557
|
|
|
|
|
|
|
// Analyze can return non-unique lemma-tag pairs. |
|
4558
|
|
|
|
|
|
|
template |
|
4559
|
0
|
|
|
|
|
|
void morpho_prefix_guesser::analyze(string_piece form, vector& lemmas) { |
|
4560
|
0
|
0
|
|
|
|
|
if (!form.len) return; |
|
4561
|
|
|
|
|
|
|
|
|
4562
|
|
|
|
|
|
|
vector form_tmp; |
|
4563
|
|
|
|
|
|
|
vector middle_masks; |
|
4564
|
0
|
0
|
|
|
|
|
middle_masks.reserve(form.len); |
|
4565
|
|
|
|
|
|
|
|
|
4566
|
0
|
0
|
|
|
|
|
for (unsigned initial = 0; initial < form.len; initial++) { |
|
4567
|
|
|
|
|
|
|
// Match the initial prefix. |
|
4568
|
0
|
|
|
|
|
|
unsigned initial_mask = (1<
|
|
4569
|
0
|
0
|
|
|
|
|
if (initial) { |
|
4570
|
0
|
|
|
|
|
|
auto found = prefixes_initial.at_typed(form.str, initial); |
|
4571
|
0
|
0
|
|
|
|
|
if (!found) break; |
|
4572
|
0
|
|
|
|
|
|
initial_mask = unaligned_load(found); |
|
4573
|
|
|
|
|
|
|
} |
|
4574
|
|
|
|
|
|
|
|
|
4575
|
|
|
|
|
|
|
// If we have found an initial prefix (including the empty one), match middle prefixes. |
|
4576
|
0
|
0
|
|
|
|
|
if (initial_mask) { |
|
4577
|
0
|
0
|
|
|
|
|
middle_masks.resize(initial); |
|
4578
|
0
|
0
|
|
|
|
|
middle_masks.emplace_back(initial_mask); |
|
4579
|
0
|
0
|
|
|
|
|
for (unsigned middle = initial; middle < middle_masks.size(); middle++) { |
|
4580
|
0
|
0
|
|
|
|
|
if (!middle_masks[middle]) continue; |
|
4581
|
|
|
|
|
|
|
// Try matching middle prefixes from current index. |
|
4582
|
0
|
0
|
|
|
|
|
for (unsigned i = middle + 1; i < form.len; i++) { |
|
4583
|
0
|
|
|
|
|
|
auto found = prefixes_middle.at_typed(form.str + middle, i - middle); |
|
4584
|
0
|
0
|
|
|
|
|
if (!found) break; |
|
4585
|
0
|
0
|
|
|
|
|
if (unaligned_load(found)) { |
|
4586
|
0
|
0
|
|
|
|
|
if (i + 1 > middle_masks.size()) middle_masks.resize(i + 1); |
|
|
|
0
|
|
|
|
|
|
|
4587
|
0
|
|
|
|
|
|
middle_masks[i] |= middle_masks[middle] & unaligned_load(found); |
|
4588
|
|
|
|
|
|
|
} |
|
4589
|
|
|
|
|
|
|
} |
|
4590
|
|
|
|
|
|
|
|
|
4591
|
|
|
|
|
|
|
// Try matching word forms if at least one middle prefix was found. |
|
4592
|
0
|
0
|
|
|
|
|
if (middle > initial && middle < form.len ) { |
|
|
|
0
|
|
|
|
|
|
|
4593
|
0
|
0
|
|
|
|
|
if (initial) { |
|
4594
|
0
|
0
|
|
|
|
|
if (form_tmp.empty()) form_tmp.assign(form.str, form.str + form.len); |
|
4595
|
0
|
|
|
|
|
|
small_memcpy(form_tmp.data() + middle - initial, form.str, initial); |
|
4596
|
|
|
|
|
|
|
} |
|
4597
|
0
|
|
|
|
|
|
unsigned lemmas_ori_size = lemmas.size(); |
|
4598
|
0
|
0
|
|
|
|
|
dictionary.analyze(string_piece((initial ? form_tmp.data() : form.str) + middle - initial, form.len - middle + initial), lemmas); |
|
|
|
0
|
|
|
|
|
|
|
4599
|
|
|
|
|
|
|
unsigned lemmas_new_size = lemmas_ori_size; |
|
4600
|
0
|
0
|
|
|
|
|
for (unsigned i = lemmas_ori_size; i < lemmas.size(); i++) { |
|
4601
|
0
|
0
|
|
|
|
|
for (unsigned filter = 0; filter < tag_filters.size(); filter++) |
|
4602
|
0
|
0
|
|
|
|
|
if ((middle_masks[middle] & (1<
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4603
|
0
|
0
|
|
|
|
|
if (i == lemmas_new_size) { |
|
4604
|
0
|
|
|
|
|
|
lemmas[lemmas_new_size].lemma.insert(0, form.str + initial, middle - initial); |
|
4605
|
|
|
|
|
|
|
} else { |
|
4606
|
0
|
0
|
|
|
|
|
lemmas[lemmas_new_size].lemma.reserve(lemmas[i].lemma.size() + middle - initial); |
|
4607
|
0
|
|
|
|
|
|
lemmas[lemmas_new_size].lemma.assign(form.str + initial, middle - initial); |
|
4608
|
0
|
|
|
|
|
|
lemmas[lemmas_new_size].lemma.append(lemmas[i].lemma); |
|
4609
|
0
|
|
|
|
|
|
lemmas[lemmas_new_size].tag = lemmas[i].tag; |
|
4610
|
|
|
|
|
|
|
} |
|
4611
|
0
|
|
|
|
|
|
lemmas_new_size++; |
|
4612
|
0
|
|
|
|
|
|
break; |
|
4613
|
|
|
|
|
|
|
} |
|
4614
|
|
|
|
|
|
|
} |
|
4615
|
0
|
0
|
|
|
|
|
if (lemmas_new_size < lemmas.size()) lemmas.erase(lemmas.begin() + lemmas_new_size, lemmas.end()); |
|
4616
|
|
|
|
|
|
|
} |
|
4617
|
|
|
|
|
|
|
} |
|
4618
|
|
|
|
|
|
|
} |
|
4619
|
|
|
|
|
|
|
} |
|
4620
|
|
|
|
|
|
|
} |
|
4621
|
|
|
|
|
|
|
|
|
4622
|
|
|
|
|
|
|
template |
|
4623
|
|
|
|
|
|
|
bool morpho_prefix_guesser::generate(string_piece /*lemma*/, const tag_filter& /*filter*/, vector& /*lemmas_forms*/) { |
|
4624
|
|
|
|
|
|
|
// Not implemented yet. Is it actually needed? |
|
4625
|
|
|
|
|
|
|
return false; |
|
4626
|
|
|
|
|
|
|
} |
|
4627
|
|
|
|
|
|
|
} // namespace morphodita |
|
4628
|
|
|
|
|
|
|
|
|
4629
|
|
|
|
|
|
|
///////// |
|
4630
|
|
|
|
|
|
|
// File: morphodita/morpho/morpho_statistical_guesser.h |
|
4631
|
|
|
|
|
|
|
///////// |
|
4632
|
|
|
|
|
|
|
|
|
4633
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
4634
|
|
|
|
|
|
|
// |
|
4635
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
4636
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
4637
|
|
|
|
|
|
|
// |
|
4638
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
4639
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4640
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
4641
|
|
|
|
|
|
|
|
|
4642
|
|
|
|
|
|
|
namespace morphodita { |
|
4643
|
|
|
|
|
|
|
|
|
4644
|
0
|
|
|
|
|
|
class morpho_statistical_guesser { |
|
4645
|
|
|
|
|
|
|
public: |
|
4646
|
|
|
|
|
|
|
void load(binary_decoder& data); |
|
4647
|
|
|
|
|
|
|
typedef vector used_rules; |
|
4648
|
|
|
|
|
|
|
void analyze(string_piece form, vector& lemmas, used_rules* used); |
|
4649
|
|
|
|
|
|
|
|
|
4650
|
|
|
|
|
|
|
private: |
|
4651
|
|
|
|
|
|
|
vector tags; |
|
4652
|
|
|
|
|
|
|
unsigned default_tag; |
|
4653
|
|
|
|
|
|
|
persistent_unordered_map rules; |
|
4654
|
|
|
|
|
|
|
}; |
|
4655
|
|
|
|
|
|
|
|
|
4656
|
|
|
|
|
|
|
} // namespace morphodita |
|
4657
|
|
|
|
|
|
|
|
|
4658
|
|
|
|
|
|
|
///////// |
|
4659
|
|
|
|
|
|
|
// File: morphodita/tokenizer/unicode_tokenizer.h |
|
4660
|
|
|
|
|
|
|
///////// |
|
4661
|
|
|
|
|
|
|
|
|
4662
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
4663
|
|
|
|
|
|
|
// |
|
4664
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
4665
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
4666
|
|
|
|
|
|
|
// |
|
4667
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
4668
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4669
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
4670
|
|
|
|
|
|
|
|
|
4671
|
|
|
|
|
|
|
namespace morphodita { |
|
4672
|
|
|
|
|
|
|
|
|
4673
|
4
|
|
|
|
|
|
class unicode_tokenizer : public tokenizer { |
|
4674
|
|
|
|
|
|
|
public: |
|
4675
|
|
|
|
|
|
|
enum { URL_EMAIL_LATEST = 2 }; |
|
4676
|
|
|
|
|
|
|
unicode_tokenizer(unsigned url_email_tokenizer); |
|
4677
|
|
|
|
|
|
|
|
|
4678
|
|
|
|
|
|
|
virtual void set_text(string_piece text, bool make_copy = false) override; |
|
4679
|
|
|
|
|
|
|
virtual bool next_sentence(vector* forms, vector* tokens) override; |
|
4680
|
|
|
|
|
|
|
|
|
4681
|
|
|
|
|
|
|
virtual bool next_sentence(vector& tokens) = 0; |
|
4682
|
|
|
|
|
|
|
|
|
4683
|
|
|
|
|
|
|
protected: |
|
4684
|
|
|
|
|
|
|
struct char_info { |
|
4685
|
|
|
|
|
|
|
char32_t chr; |
|
4686
|
|
|
|
|
|
|
unilib::unicode::category_t cat; |
|
4687
|
|
|
|
|
|
|
const char* str; |
|
4688
|
|
|
|
|
|
|
|
|
4689
|
152
|
|
|
|
|
|
char_info(char32_t chr, const char* str) : chr(chr), cat(unilib::unicode::category(chr)), str(str) {} |
|
4690
|
|
|
|
|
|
|
}; |
|
4691
|
|
|
|
|
|
|
vector chars; |
|
4692
|
|
|
|
|
|
|
size_t current; |
|
4693
|
|
|
|
|
|
|
|
|
4694
|
|
|
|
|
|
|
bool tokenize_url_email(vector& tokens); |
|
4695
|
|
|
|
|
|
|
bool emergency_sentence_split(const vector& tokens); |
|
4696
|
|
|
|
|
|
|
bool is_eos(const vector& tokens, char32_t eos_chr, const unordered_set* abbreviations); |
|
4697
|
|
|
|
|
|
|
|
|
4698
|
|
|
|
|
|
|
private: |
|
4699
|
|
|
|
|
|
|
unsigned url_email_tokenizer; |
|
4700
|
|
|
|
|
|
|
string text_buffer; |
|
4701
|
|
|
|
|
|
|
vector tokens_buffer; |
|
4702
|
|
|
|
|
|
|
string eos_buffer; |
|
4703
|
|
|
|
|
|
|
}; |
|
4704
|
|
|
|
|
|
|
|
|
4705
|
|
|
|
|
|
|
} // namespace morphodita |
|
4706
|
|
|
|
|
|
|
|
|
4707
|
|
|
|
|
|
|
///////// |
|
4708
|
|
|
|
|
|
|
// File: morphodita/tokenizer/ragel_tokenizer.h |
|
4709
|
|
|
|
|
|
|
///////// |
|
4710
|
|
|
|
|
|
|
|
|
4711
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
4712
|
|
|
|
|
|
|
// |
|
4713
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
4714
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
4715
|
|
|
|
|
|
|
// |
|
4716
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
4717
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4718
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
4719
|
|
|
|
|
|
|
|
|
4720
|
|
|
|
|
|
|
namespace morphodita { |
|
4721
|
|
|
|
|
|
|
|
|
4722
|
3
|
|
|
|
|
|
class ragel_tokenizer : public unicode_tokenizer { |
|
4723
|
|
|
|
|
|
|
public: |
|
4724
|
|
|
|
|
|
|
ragel_tokenizer(unsigned url_email_tokenizer); |
|
4725
|
|
|
|
|
|
|
|
|
4726
|
|
|
|
|
|
|
protected: |
|
4727
|
|
|
|
|
|
|
static inline uint8_t ragel_char(const char_info& chr); |
|
4728
|
|
|
|
|
|
|
|
|
4729
|
|
|
|
|
|
|
private: |
|
4730
|
|
|
|
|
|
|
static void initialize_ragel_map(); |
|
4731
|
|
|
|
|
|
|
static vector ragel_map; |
|
4732
|
|
|
|
|
|
|
static atomic_flag ragel_map_flag; |
|
4733
|
|
|
|
|
|
|
static void ragel_map_add(char32_t chr, uint8_t mapping); |
|
4734
|
|
|
|
|
|
|
|
|
4735
|
|
|
|
|
|
|
friend class unicode_tokenizer; |
|
4736
|
|
|
|
|
|
|
static bool ragel_url_email(unsigned version, const vector& chars, size_t& current_char, vector& tokens); |
|
4737
|
|
|
|
|
|
|
}; |
|
4738
|
|
|
|
|
|
|
|
|
4739
|
|
|
|
|
|
|
uint8_t ragel_tokenizer::ragel_char(const char_info& chr) { |
|
4740
|
59
|
50
|
|
|
|
|
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4741
|
|
|
|
|
|
|
} |
|
4742
|
|
|
|
|
|
|
|
|
4743
|
|
|
|
|
|
|
} // namespace morphodita |
|
4744
|
|
|
|
|
|
|
|
|
4745
|
|
|
|
|
|
|
///////// |
|
4746
|
|
|
|
|
|
|
// File: morphodita/tokenizer/czech_tokenizer.h |
|
4747
|
|
|
|
|
|
|
///////// |
|
4748
|
|
|
|
|
|
|
|
|
4749
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
4750
|
|
|
|
|
|
|
// |
|
4751
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
4752
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
4753
|
|
|
|
|
|
|
// |
|
4754
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
4755
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4756
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
4757
|
|
|
|
|
|
|
|
|
4758
|
|
|
|
|
|
|
namespace morphodita { |
|
4759
|
|
|
|
|
|
|
|
|
4760
|
0
|
|
|
|
|
|
class czech_tokenizer : public ragel_tokenizer { |
|
4761
|
|
|
|
|
|
|
public: |
|
4762
|
|
|
|
|
|
|
enum tokenizer_language { CZECH = 0, SLOVAK = 1 }; |
|
4763
|
|
|
|
|
|
|
enum { LATEST = 2 }; |
|
4764
|
|
|
|
|
|
|
czech_tokenizer(tokenizer_language language, unsigned version, const morpho* m = nullptr); |
|
4765
|
|
|
|
|
|
|
|
|
4766
|
|
|
|
|
|
|
virtual bool next_sentence(vector& tokens) override; |
|
4767
|
|
|
|
|
|
|
|
|
4768
|
|
|
|
|
|
|
private: |
|
4769
|
|
|
|
|
|
|
const morpho* m; |
|
4770
|
|
|
|
|
|
|
const unordered_set* abbreviations; |
|
4771
|
|
|
|
|
|
|
vector lemmas; |
|
4772
|
|
|
|
|
|
|
|
|
4773
|
|
|
|
|
|
|
void merge_hyphenated(vector& tokens); |
|
4774
|
|
|
|
|
|
|
|
|
4775
|
|
|
|
|
|
|
static const unordered_set abbreviations_czech; |
|
4776
|
|
|
|
|
|
|
static const unordered_set abbreviations_slovak; |
|
4777
|
|
|
|
|
|
|
}; |
|
4778
|
|
|
|
|
|
|
|
|
4779
|
|
|
|
|
|
|
} // namespace morphodita |
|
4780
|
|
|
|
|
|
|
|
|
4781
|
|
|
|
|
|
|
///////// |
|
4782
|
|
|
|
|
|
|
// File: morphodita/morpho/czech_morpho.h |
|
4783
|
|
|
|
|
|
|
///////// |
|
4784
|
|
|
|
|
|
|
|
|
4785
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
4786
|
|
|
|
|
|
|
// |
|
4787
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
4788
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
4789
|
|
|
|
|
|
|
// |
|
4790
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
4791
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4792
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
4793
|
|
|
|
|
|
|
|
|
4794
|
|
|
|
|
|
|
namespace morphodita { |
|
4795
|
|
|
|
|
|
|
|
|
4796
|
0
|
|
|
|
|
|
class czech_morpho : public morpho { |
|
4797
|
|
|
|
|
|
|
public: |
|
4798
|
|
|
|
|
|
|
using morpho_language = czech_tokenizer::tokenizer_language; |
|
4799
|
|
|
|
|
|
|
|
|
4800
|
0
|
0
|
|
|
|
|
czech_morpho(morpho_language language, unsigned version) : language(language), version(version) {} |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4801
|
|
|
|
|
|
|
|
|
4802
|
|
|
|
|
|
|
virtual int analyze(string_piece form, morpho::guesser_mode guesser, vector& lemmas) const override; |
|
4803
|
|
|
|
|
|
|
virtual int generate(string_piece lemma, const char* tag_wildcard, guesser_mode guesser, vector& forms) const override; |
|
4804
|
|
|
|
|
|
|
virtual int raw_lemma_len(string_piece lemma) const override; |
|
4805
|
|
|
|
|
|
|
virtual int lemma_id_len(string_piece lemma) const override; |
|
4806
|
|
|
|
|
|
|
virtual int raw_form_len(string_piece form) const override; |
|
4807
|
|
|
|
|
|
|
virtual tokenizer* new_tokenizer() const override; |
|
4808
|
|
|
|
|
|
|
|
|
4809
|
|
|
|
|
|
|
bool load(istream& is); |
|
4810
|
|
|
|
|
|
|
private: |
|
4811
|
|
|
|
|
|
|
inline void analyze_special(string_piece form, vector& lemmas) const; |
|
4812
|
|
|
|
|
|
|
|
|
4813
|
|
|
|
|
|
|
morpho_language language; |
|
4814
|
|
|
|
|
|
|
unsigned version; |
|
4815
|
|
|
|
|
|
|
morpho_dictionary dictionary; |
|
4816
|
|
|
|
|
|
|
unique_ptr> prefix_guesser; |
|
4817
|
|
|
|
|
|
|
unique_ptr statistical_guesser; |
|
4818
|
|
|
|
|
|
|
|
|
4819
|
|
|
|
|
|
|
string unknown_tag = "X@-------------"; |
|
4820
|
|
|
|
|
|
|
string number_tag = "C=-------------"; |
|
4821
|
|
|
|
|
|
|
string punctuation_tag = "Z:-------------"; |
|
4822
|
|
|
|
|
|
|
}; |
|
4823
|
|
|
|
|
|
|
|
|
4824
|
|
|
|
|
|
|
} // namespace morphodita |
|
4825
|
|
|
|
|
|
|
|
|
4826
|
|
|
|
|
|
|
///////// |
|
4827
|
|
|
|
|
|
|
// File: morphodita/morpho/czech_morpho.cpp |
|
4828
|
|
|
|
|
|
|
///////// |
|
4829
|
|
|
|
|
|
|
|
|
4830
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
4831
|
|
|
|
|
|
|
// |
|
4832
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
4833
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
4834
|
|
|
|
|
|
|
// |
|
4835
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
4836
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4837
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
4838
|
|
|
|
|
|
|
|
|
4839
|
|
|
|
|
|
|
namespace morphodita { |
|
4840
|
|
|
|
|
|
|
|
|
4841
|
0
|
|
|
|
|
|
bool czech_morpho::load(istream& is) { |
|
4842
|
|
|
|
|
|
|
binary_decoder data; |
|
4843
|
0
|
0
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
|
0
|
|
|
|
|
|
|
4844
|
|
|
|
|
|
|
|
|
4845
|
|
|
|
|
|
|
try { |
|
4846
|
|
|
|
|
|
|
// Load tag length |
|
4847
|
0
|
0
|
|
|
|
|
unsigned tag_length = data.next_1B(); |
|
4848
|
0
|
0
|
|
|
|
|
if (tag_length < unknown_tag.size()) unknown_tag.erase(tag_length); |
|
|
|
0
|
|
|
|
|
|
|
4849
|
0
|
0
|
|
|
|
|
if (tag_length < number_tag.size()) number_tag.erase(tag_length); |
|
|
|
0
|
|
|
|
|
|
|
4850
|
0
|
0
|
|
|
|
|
if (tag_length < punctuation_tag.size()) punctuation_tag.erase(tag_length); |
|
|
|
0
|
|
|
|
|
|
|
4851
|
|
|
|
|
|
|
|
|
4852
|
|
|
|
|
|
|
// Load dictionary |
|
4853
|
0
|
0
|
|
|
|
|
dictionary.load(data); |
|
4854
|
|
|
|
|
|
|
|
|
4855
|
|
|
|
|
|
|
// Optionally prefix guesser if present |
|
4856
|
0
|
|
|
|
|
|
prefix_guesser.reset(); |
|
4857
|
0
|
0
|
|
|
|
|
if (data.next_1B()) { |
|
|
|
0
|
|
|
|
|
|
|
4858
|
0
|
0
|
|
|
|
|
prefix_guesser.reset(new morpho_prefix_guesser(dictionary)); |
|
4859
|
0
|
0
|
|
|
|
|
prefix_guesser->load(data); |
|
4860
|
|
|
|
|
|
|
} |
|
4861
|
|
|
|
|
|
|
|
|
4862
|
|
|
|
|
|
|
// Optionally statistical guesser if present |
|
4863
|
|
|
|
|
|
|
statistical_guesser.reset(); |
|
4864
|
0
|
0
|
|
|
|
|
if (data.next_1B()) { |
|
|
|
0
|
|
|
|
|
|
|
4865
|
0
|
0
|
|
|
|
|
statistical_guesser.reset(new morpho_statistical_guesser()); |
|
4866
|
0
|
0
|
|
|
|
|
statistical_guesser->load(data); |
|
4867
|
|
0
|
|
|
|
|
} |
|
4868
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
|
4869
|
|
|
|
|
|
|
return false; |
|
4870
|
|
|
|
|
|
|
} |
|
4871
|
|
|
|
|
|
|
|
|
4872
|
0
|
|
|
|
|
|
return data.is_end(); |
|
4873
|
|
|
|
|
|
|
} |
|
4874
|
|
|
|
|
|
|
|
|
4875
|
0
|
|
|
|
|
|
int czech_morpho::analyze(string_piece form, guesser_mode guesser, vector& lemmas) const { |
|
4876
|
|
|
|
|
|
|
lemmas.clear(); |
|
4877
|
|
|
|
|
|
|
|
|
4878
|
0
|
0
|
|
|
|
|
if (form.len) { |
|
4879
|
|
|
|
|
|
|
// Generate all casing variants if needed (they are different than given form). |
|
4880
|
|
|
|
|
|
|
string form_uclc; // first uppercase, rest lowercase |
|
4881
|
|
|
|
|
|
|
string form_lc; // all lowercase |
|
4882
|
0
|
0
|
|
|
|
|
generate_casing_variants(form, form_uclc, form_lc); |
|
4883
|
|
|
|
|
|
|
|
|
4884
|
|
|
|
|
|
|
// Start by analysing using the dictionary and all casing variants. |
|
4885
|
0
|
0
|
|
|
|
|
dictionary.analyze(form, lemmas); |
|
4886
|
0
|
0
|
|
|
|
|
if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas); |
|
|
|
0
|
|
|
|
|
|
|
4887
|
0
|
0
|
|
|
|
|
if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas); |
|
|
|
0
|
|
|
|
|
|
|
4888
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) return NO_GUESSER; |
|
4889
|
|
|
|
|
|
|
|
|
4890
|
|
|
|
|
|
|
// Then call analyze_special to handle numbers and punctuation. |
|
4891
|
0
|
0
|
|
|
|
|
analyze_special(form, lemmas); |
|
4892
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) return NO_GUESSER; |
|
4893
|
|
|
|
|
|
|
|
|
4894
|
|
|
|
|
|
|
// For the prefix guesser, use only form_lc. |
|
4895
|
0
|
0
|
|
|
|
|
if (guesser == GUESSER && prefix_guesser) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4896
|
0
|
0
|
|
|
|
|
prefix_guesser->analyze(form_lc.empty() ? form : form_lc, lemmas); |
|
|
|
0
|
|
|
|
|
|
|
4897
|
|
|
|
|
|
|
bool prefix_guesser_guesses = !lemmas.empty(); |
|
4898
|
|
|
|
|
|
|
|
|
4899
|
|
|
|
|
|
|
// For the statistical guesser, use all casing variants. |
|
4900
|
0
|
0
|
|
|
|
|
if (guesser == GUESSER && statistical_guesser) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4901
|
0
|
0
|
|
|
|
|
if (form_uclc.empty() && form_lc.empty()) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
4902
|
0
|
0
|
|
|
|
|
statistical_guesser->analyze(form, lemmas, nullptr); |
|
4903
|
|
|
|
|
|
|
else { |
|
4904
|
0
|
0
|
|
|
|
|
morpho_statistical_guesser::used_rules used_rules; used_rules.reserve(3); |
|
4905
|
0
|
0
|
|
|
|
|
statistical_guesser->analyze(form, lemmas, &used_rules); |
|
4906
|
0
|
0
|
|
|
|
|
if (!form_uclc.empty()) statistical_guesser->analyze(form_uclc, lemmas, &used_rules); |
|
|
|
0
|
|
|
|
|
|
|
4907
|
0
|
0
|
|
|
|
|
if (!form_lc.empty()) statistical_guesser->analyze(form_lc, lemmas, &used_rules); |
|
|
|
0
|
|
|
|
|
|
|
4908
|
|
|
|
|
|
|
} |
|
4909
|
|
|
|
|
|
|
} |
|
4910
|
|
|
|
|
|
|
|
|
4911
|
|
|
|
|
|
|
// Make sure results are unique lemma-tag pairs. Statistical guesser produces |
|
4912
|
|
|
|
|
|
|
// unique lemma-tag pairs, but prefix guesser does not. |
|
4913
|
0
|
0
|
|
|
|
|
if (prefix_guesser_guesses) { |
|
4914
|
0
|
|
|
|
|
|
sort(lemmas.begin(), lemmas.end(), [](const tagged_lemma& a, const tagged_lemma& b) { |
|
4915
|
0
|
|
|
|
|
|
int lemma_compare = a.lemma.compare(b.lemma); |
|
4916
|
0
|
0
|
|
|
|
|
return lemma_compare < 0 || (lemma_compare == 0 && a.tag < b.tag); |
|
4917
|
|
|
|
|
|
|
}); |
|
4918
|
0
|
|
|
|
|
|
auto lemmas_end = unique(lemmas.begin(), lemmas.end(), [](const tagged_lemma& a, const tagged_lemma& b) { |
|
4919
|
0
|
0
|
|
|
|
|
return a.lemma == b.lemma && a.tag == b.tag; |
|
|
|
0
|
|
|
|
|
|
|
4920
|
0
|
|
|
|
|
|
}); |
|
4921
|
0
|
0
|
|
|
|
|
if (lemmas_end != lemmas.end()) lemmas.erase(lemmas_end, lemmas.end()); |
|
4922
|
|
|
|
|
|
|
} |
|
4923
|
|
|
|
|
|
|
|
|
4924
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) return GUESSER; |
|
4925
|
|
|
|
|
|
|
} |
|
4926
|
|
|
|
|
|
|
|
|
4927
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), unknown_tag); |
|
4928
|
0
|
|
|
|
|
|
return -1; |
|
4929
|
|
|
|
|
|
|
} |
|
4930
|
|
|
|
|
|
|
|
|
4931
|
0
|
|
|
|
|
|
int czech_morpho::generate(string_piece lemma, const char* tag_wildcard, morpho::guesser_mode guesser, vector& forms) const { |
|
4932
|
|
|
|
|
|
|
forms.clear(); |
|
4933
|
|
|
|
|
|
|
|
|
4934
|
0
|
|
|
|
|
|
tag_filter filter(tag_wildcard); |
|
4935
|
|
|
|
|
|
|
|
|
4936
|
0
|
0
|
|
|
|
|
if (lemma.len) { |
|
4937
|
0
|
0
|
|
|
|
|
if (dictionary.generate(lemma, filter, forms)) |
|
|
|
0
|
|
|
|
|
|
|
4938
|
|
|
|
|
|
|
return NO_GUESSER; |
|
4939
|
|
|
|
|
|
|
|
|
4940
|
0
|
0
|
|
|
|
|
if (guesser == GUESSER && prefix_guesser) |
|
|
|
0
|
|
|
|
|
|
|
4941
|
|
|
|
|
|
|
if (prefix_guesser->generate(lemma, filter, forms)) |
|
4942
|
|
|
|
|
|
|
return GUESSER; |
|
4943
|
|
|
|
|
|
|
} |
|
4944
|
|
|
|
|
|
|
|
|
4945
|
|
|
|
|
|
|
return -1; |
|
4946
|
|
|
|
|
|
|
} |
|
4947
|
|
|
|
|
|
|
|
|
4948
|
0
|
|
|
|
|
|
int czech_morpho::raw_lemma_len(string_piece lemma) const { |
|
4949
|
0
|
|
|
|
|
|
return czech_lemma_addinfo::raw_lemma_len(lemma); |
|
4950
|
|
|
|
|
|
|
} |
|
4951
|
|
|
|
|
|
|
|
|
4952
|
0
|
|
|
|
|
|
int czech_morpho::lemma_id_len(string_piece lemma) const { |
|
4953
|
0
|
|
|
|
|
|
return czech_lemma_addinfo::lemma_id_len(lemma); |
|
4954
|
|
|
|
|
|
|
} |
|
4955
|
|
|
|
|
|
|
|
|
4956
|
0
|
|
|
|
|
|
int czech_morpho::raw_form_len(string_piece form) const { |
|
4957
|
0
|
|
|
|
|
|
return form.len; |
|
4958
|
|
|
|
|
|
|
} |
|
4959
|
|
|
|
|
|
|
|
|
4960
|
0
|
|
|
|
|
|
tokenizer* czech_morpho::new_tokenizer() const { |
|
4961
|
0
|
0
|
|
|
|
|
return new czech_tokenizer(language, version, this); |
|
4962
|
|
|
|
|
|
|
} |
|
4963
|
|
|
|
|
|
|
|
|
4964
|
|
|
|
|
|
|
// What characters are considered punctuation except for the ones in unicode Punctuation category. |
|
4965
|
|
|
|
|
|
|
static bool punctuation_additional[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1/*$*/, |
|
4966
|
|
|
|
|
|
|
0,0,0,0,0,0,1/*+*/,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1/*<*/,1/*=*/,1/*>*/,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
4967
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,1/*^*/,0,1/*`*/,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1/*|*/,0,1/*~*/,0,0,0,0,0,0,0,0, |
|
4968
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
4969
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
4970
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
4971
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
4972
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
4973
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
4974
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
4975
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
4976
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
4977
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1/*caron*/}; |
|
4978
|
|
|
|
|
|
|
|
|
4979
|
|
|
|
|
|
|
// What characters of unicode Punctuation category are not considered punctuation. |
|
4980
|
|
|
|
|
|
|
static bool punctuation_exceptions[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
4981
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
4982
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
|
4983
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,1/*paragraph*/}; |
|
4984
|
|
|
|
|
|
|
|
|
4985
|
0
|
|
|
|
|
|
void czech_morpho::analyze_special(string_piece form, vector& lemmas) const { |
|
4986
|
|
|
|
|
|
|
using namespace unilib; |
|
4987
|
|
|
|
|
|
|
|
|
4988
|
|
|
|
|
|
|
// Analyzer for numbers and punctuation. |
|
4989
|
|
|
|
|
|
|
// Number is anything matching [+-]? is_Pn* ([.,] is_Pn*)? ([Ee] [+-]? is_Pn+)? for at least one is_Pn* nonempty. |
|
4990
|
|
|
|
|
|
|
// Punctuation is any form beginning with either unicode punctuation or punctuation_exceptions character. |
|
4991
|
|
|
|
|
|
|
// Beware that numbers takes precedence, so - is punctuation, -3 is number, -. is punctuation, -.3 is number. |
|
4992
|
0
|
0
|
|
|
|
|
if (!form.len) return; |
|
4993
|
|
|
|
|
|
|
|
|
4994
|
0
|
|
|
|
|
|
string_piece form_ori = form; |
|
4995
|
0
|
|
|
|
|
|
char32_t first = utf8::decode(form.str, form.len); |
|
4996
|
|
|
|
|
|
|
|
|
4997
|
|
|
|
|
|
|
// Try matching a number. |
|
4998
|
|
|
|
|
|
|
char32_t codepoint = first; |
|
4999
|
|
|
|
|
|
|
bool any_digit = false; |
|
5000
|
0
|
0
|
|
|
|
|
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(form.str, form.len); |
|
5001
|
0
|
0
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(form.str, form.len); |
|
5002
|
0
|
0
|
|
|
|
|
if ((codepoint == '.' && form.len) || codepoint == ',') codepoint = utf8::decode(form.str, form.len); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5003
|
0
|
0
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(form.str, form.len); |
|
5004
|
0
|
0
|
|
|
|
|
if (any_digit && (codepoint == 'e' || codepoint == 'E')) { |
|
|
|
0
|
|
|
|
|
|
|
5005
|
0
|
|
|
|
|
|
codepoint = utf8::decode(form.str, form.len); |
|
5006
|
0
|
0
|
|
|
|
|
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(form.str, form.len); |
|
5007
|
|
|
|
|
|
|
any_digit = false; |
|
5008
|
0
|
0
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(form.str, form.len); |
|
5009
|
|
|
|
|
|
|
} |
|
5010
|
|
|
|
|
|
|
|
|
5011
|
0
|
0
|
|
|
|
|
if (any_digit && !form.len && (!codepoint || codepoint == '.')) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5012
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form_ori.str, form_ori.len), number_tag); |
|
5013
|
0
|
0
|
|
|
|
|
} else if ((first < sizeof(punctuation_additional) && punctuation_additional[first]) || |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5014
|
0
|
0
|
|
|
|
|
((unicode::category(first) & unicode::P) && (first >= sizeof(punctuation_exceptions) || !punctuation_exceptions[first]))) |
|
|
|
0
|
|
|
|
|
|
|
5015
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form_ori.str, form_ori.len), punctuation_tag); |
|
5016
|
|
|
|
|
|
|
} |
|
5017
|
|
|
|
|
|
|
|
|
5018
|
|
|
|
|
|
|
} // namespace morphodita |
|
5019
|
|
|
|
|
|
|
|
|
5020
|
|
|
|
|
|
|
///////// |
|
5021
|
|
|
|
|
|
|
// File: morphodita/morpho/english_lemma_addinfo.h |
|
5022
|
|
|
|
|
|
|
///////// |
|
5023
|
|
|
|
|
|
|
|
|
5024
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
5025
|
|
|
|
|
|
|
// |
|
5026
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
5027
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
5028
|
|
|
|
|
|
|
// |
|
5029
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
5030
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5031
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
5032
|
|
|
|
|
|
|
|
|
5033
|
|
|
|
|
|
|
namespace morphodita { |
|
5034
|
|
|
|
|
|
|
|
|
5035
|
|
|
|
|
|
|
// Declarations |
|
5036
|
0
|
|
|
|
|
|
struct english_lemma_addinfo { |
|
5037
|
|
|
|
|
|
|
inline static int raw_lemma_len(string_piece lemma); |
|
5038
|
|
|
|
|
|
|
inline static int lemma_id_len(string_piece lemma); |
|
5039
|
|
|
|
|
|
|
inline static string format(const unsigned char* addinfo, int addinfo_len); |
|
5040
|
|
|
|
|
|
|
inline static bool generatable(const unsigned char* addinfo, int addinfo_len); |
|
5041
|
|
|
|
|
|
|
|
|
5042
|
|
|
|
|
|
|
inline int parse(string_piece lemma, bool die_on_failure = false); |
|
5043
|
|
|
|
|
|
|
inline bool match_lemma_id(const unsigned char* other_addinfo, int other_addinfo_len); |
|
5044
|
|
|
|
|
|
|
|
|
5045
|
|
|
|
|
|
|
vector data; |
|
5046
|
|
|
|
|
|
|
}; |
|
5047
|
|
|
|
|
|
|
|
|
5048
|
|
|
|
|
|
|
// Definitions |
|
5049
|
0
|
|
|
|
|
|
int english_lemma_addinfo::raw_lemma_len(string_piece lemma) { |
|
5050
|
|
|
|
|
|
|
// Lemma ends either by |
|
5051
|
|
|
|
|
|
|
// - '^' on non-first position followed by nothing or [A-Za-z][-A-Za-z]* |
|
5052
|
|
|
|
|
|
|
// - '+' on non-first position followed by nothing |
|
5053
|
0
|
0
|
|
|
|
|
for (unsigned len = 1; len < lemma.len; len++) { |
|
5054
|
0
|
0
|
|
|
|
|
if (len + 1 == lemma.len && (lemma.str[len] == '^' || lemma.str[len] == '+')) |
|
|
|
0
|
|
|
|
|
|
|
5055
|
0
|
|
|
|
|
|
return len; |
|
5056
|
0
|
0
|
|
|
|
|
if (len + 1 < lemma.len && lemma.str[len] == '^') { |
|
|
|
0
|
|
|
|
|
|
|
5057
|
|
|
|
|
|
|
bool ok = true; |
|
5058
|
0
|
0
|
|
|
|
|
for (unsigned i = len + 1; ok && i < lemma.len; i++) |
|
|
|
0
|
|
|
|
|
|
|
5059
|
0
|
0
|
|
|
|
|
ok &= (lemma.str[i] >= 'A' && lemma.str[i] <= 'Z') || |
|
5060
|
0
|
0
|
|
|
|
|
(lemma.str[i] >= 'a' && lemma.str[i] <= 'z') || |
|
|
|
0
|
|
|
|
|
|
|
5061
|
0
|
0
|
|
|
|
|
(i > len + 1 && lemma.str[i] == '-'); |
|
5062
|
0
|
0
|
|
|
|
|
if (ok) return len; |
|
5063
|
|
|
|
|
|
|
} |
|
5064
|
|
|
|
|
|
|
} |
|
5065
|
0
|
|
|
|
|
|
return lemma.len; |
|
5066
|
|
|
|
|
|
|
} |
|
5067
|
|
|
|
|
|
|
|
|
5068
|
|
|
|
|
|
|
int english_lemma_addinfo::lemma_id_len(string_piece lemma) { |
|
5069
|
|
|
|
|
|
|
// No lemma comments. |
|
5070
|
0
|
|
|
|
|
|
return lemma.len; |
|
5071
|
|
|
|
|
|
|
} |
|
5072
|
|
|
|
|
|
|
|
|
5073
|
|
|
|
|
|
|
string english_lemma_addinfo::format(const unsigned char* addinfo, int addinfo_len) { |
|
5074
|
0
|
|
|
|
|
|
return string((const char*) addinfo, addinfo_len); |
|
5075
|
|
|
|
|
|
|
} |
|
5076
|
|
|
|
|
|
|
|
|
5077
|
|
|
|
|
|
|
bool english_lemma_addinfo::generatable(const unsigned char* /*addinfo*/, int /*addinfo_len*/) { |
|
5078
|
|
|
|
|
|
|
return true; |
|
5079
|
|
|
|
|
|
|
} |
|
5080
|
|
|
|
|
|
|
|
|
5081
|
0
|
|
|
|
|
|
int english_lemma_addinfo::parse(string_piece lemma, bool /*die_on_failure*/) { |
|
5082
|
|
|
|
|
|
|
data.clear(); |
|
5083
|
|
|
|
|
|
|
|
|
5084
|
0
|
|
|
|
|
|
size_t len = raw_lemma_len(lemma); |
|
5085
|
0
|
0
|
|
|
|
|
for (size_t i = len; i < lemma.len; i++) |
|
5086
|
0
|
|
|
|
|
|
data.push_back(lemma.str[i]); |
|
5087
|
|
|
|
|
|
|
|
|
5088
|
0
|
|
|
|
|
|
return len; |
|
5089
|
|
|
|
|
|
|
} |
|
5090
|
|
|
|
|
|
|
|
|
5091
|
0
|
|
|
|
|
|
bool english_lemma_addinfo::match_lemma_id(const unsigned char* other_addinfo, int other_addinfo_len) { |
|
5092
|
0
|
0
|
|
|
|
|
if (data.empty()) return true; |
|
5093
|
0
|
0
|
|
|
|
|
if (data.size() == 1 && data[0] == '^') return other_addinfo_len > 0 && other_addinfo[0] == '^'; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5094
|
0
|
0
|
|
|
|
|
if (data.size() == 1 && data[0] == '+') return other_addinfo_len == 0; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5095
|
0
|
0
|
|
|
|
|
return data.size() == size_t(other_addinfo_len) && small_memeq(data.data(), other_addinfo, other_addinfo_len); |
|
|
|
0
|
|
|
|
|
|
|
5096
|
|
|
|
|
|
|
} |
|
5097
|
|
|
|
|
|
|
|
|
5098
|
|
|
|
|
|
|
} // namespace morphodita |
|
5099
|
|
|
|
|
|
|
|
|
5100
|
|
|
|
|
|
|
///////// |
|
5101
|
|
|
|
|
|
|
// File: morphodita/morpho/english_morpho_guesser.h |
|
5102
|
|
|
|
|
|
|
///////// |
|
5103
|
|
|
|
|
|
|
|
|
5104
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
5105
|
|
|
|
|
|
|
// |
|
5106
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
5107
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
5108
|
|
|
|
|
|
|
// |
|
5109
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
5110
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5111
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
5112
|
|
|
|
|
|
|
|
|
5113
|
|
|
|
|
|
|
namespace morphodita { |
|
5114
|
|
|
|
|
|
|
|
|
5115
|
0
|
0
|
|
|
|
|
class english_morpho_guesser { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5116
|
|
|
|
|
|
|
public: |
|
5117
|
|
|
|
|
|
|
void load(binary_decoder& data); |
|
5118
|
|
|
|
|
|
|
void analyze(string_piece form, string_piece form_lc, vector& lemmas) const; |
|
5119
|
|
|
|
|
|
|
bool analyze_proper_names(string_piece form, string_piece form_lc, vector& lemmas) const; |
|
5120
|
|
|
|
|
|
|
|
|
5121
|
|
|
|
|
|
|
private: |
|
5122
|
|
|
|
|
|
|
inline void add(const string& tag, const string& form, vector& lemmas) const; |
|
5123
|
|
|
|
|
|
|
inline void add(const string& tag, const string& tag2, const string& form, vector& lemmas) const; |
|
5124
|
|
|
|
|
|
|
inline void add(const string& tag, const string& form, unsigned negation_len, vector& lemmas) const; |
|
5125
|
|
|
|
|
|
|
inline void add(const string& tag, const string& tag2, const string& form, unsigned negation_len, vector& lemmas) const; |
|
5126
|
|
|
|
|
|
|
void add_NNS(const string& form, unsigned negation_len, vector& lemmas) const; |
|
5127
|
|
|
|
|
|
|
void add_NNPS(const string& form, vector& lemmas) const; |
|
5128
|
|
|
|
|
|
|
void add_VBG(const string& form, vector& lemmas) const; |
|
5129
|
|
|
|
|
|
|
void add_VBD_VBN(const string& form, vector& lemmas) const; |
|
5130
|
|
|
|
|
|
|
void add_VBZ(const string& form, vector& lemmas) const; |
|
5131
|
|
|
|
|
|
|
void add_JJR_RBR(const string& form, unsigned negation_len, vector& lemmas) const; |
|
5132
|
|
|
|
|
|
|
void add_JJS_RBS(const string& form, unsigned negation_len, vector& lemmas) const; |
|
5133
|
|
|
|
|
|
|
|
|
5134
|
|
|
|
|
|
|
enum { NEGATION_LEN = 0, TO_FOLLOW = 1, TOTAL = 2 }; |
|
5135
|
|
|
|
|
|
|
vector exceptions_tags; |
|
5136
|
|
|
|
|
|
|
persistent_unordered_map exceptions; |
|
5137
|
|
|
|
|
|
|
persistent_unordered_map negations; |
|
5138
|
|
|
|
|
|
|
string CD = "CD", FW = "FW", JJ = "JJ", JJR = "JJR", JJS = "JJS", |
|
5139
|
|
|
|
|
|
|
NN = "NN", NNP = "NNP", NNPS = "NNPS", NNS = "NNS", RB = "RB", |
|
5140
|
|
|
|
|
|
|
RBR = "RBR", RBS = "RBS", SYM = "SYM", VB = "VB", VBD = "VBD", |
|
5141
|
|
|
|
|
|
|
VBG = "VBG", VBN = "VBN", VBP = "VBP", VBZ = "VBZ"; |
|
5142
|
|
|
|
|
|
|
}; |
|
5143
|
|
|
|
|
|
|
|
|
5144
|
|
|
|
|
|
|
} // namespace morphodita |
|
5145
|
|
|
|
|
|
|
|
|
5146
|
|
|
|
|
|
|
///////// |
|
5147
|
|
|
|
|
|
|
// File: morphodita/morpho/english_morpho.h |
|
5148
|
|
|
|
|
|
|
///////// |
|
5149
|
|
|
|
|
|
|
|
|
5150
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
5151
|
|
|
|
|
|
|
// |
|
5152
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
5153
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
5154
|
|
|
|
|
|
|
// |
|
5155
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
5156
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5157
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
5158
|
|
|
|
|
|
|
|
|
5159
|
|
|
|
|
|
|
namespace morphodita { |
|
5160
|
|
|
|
|
|
|
|
|
5161
|
0
|
|
|
|
|
|
class english_morpho : public morpho { |
|
5162
|
|
|
|
|
|
|
public: |
|
5163
|
0
|
0
|
|
|
|
|
english_morpho(unsigned version) : version(version) {} |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5164
|
|
|
|
|
|
|
|
|
5165
|
|
|
|
|
|
|
virtual int analyze(string_piece form, morpho::guesser_mode guesser, vector& lemmas) const override; |
|
5166
|
|
|
|
|
|
|
virtual int generate(string_piece lemma, const char* tag_wildcard, guesser_mode guesser, vector& forms) const override; |
|
5167
|
|
|
|
|
|
|
virtual int raw_lemma_len(string_piece lemma) const override; |
|
5168
|
|
|
|
|
|
|
virtual int lemma_id_len(string_piece lemma) const override; |
|
5169
|
|
|
|
|
|
|
virtual int raw_form_len(string_piece form) const override; |
|
5170
|
|
|
|
|
|
|
virtual tokenizer* new_tokenizer() const override; |
|
5171
|
|
|
|
|
|
|
|
|
5172
|
|
|
|
|
|
|
bool load(istream& is); |
|
5173
|
|
|
|
|
|
|
private: |
|
5174
|
|
|
|
|
|
|
inline void analyze_special(string_piece form, vector& lemmas) const; |
|
5175
|
|
|
|
|
|
|
|
|
5176
|
|
|
|
|
|
|
unsigned version; |
|
5177
|
|
|
|
|
|
|
morpho_dictionary dictionary; |
|
5178
|
|
|
|
|
|
|
english_morpho_guesser morpho_guesser; |
|
5179
|
|
|
|
|
|
|
|
|
5180
|
|
|
|
|
|
|
string unknown_tag = "UNK"; |
|
5181
|
|
|
|
|
|
|
string number_tag = "CD", nnp_tag = "NNP", ls_tag = "LS"; |
|
5182
|
|
|
|
|
|
|
string open_quotation_tag = "``", close_quotation_tag = "''"; |
|
5183
|
|
|
|
|
|
|
string open_parenthesis_tag = "(", close_parenthesis_tag = ")"; |
|
5184
|
|
|
|
|
|
|
string comma_tag = ",", dot_tag = ".", punctuation_tag = ":", hash_tag = "#", dollar_tag = "$"; |
|
5185
|
|
|
|
|
|
|
string sym_tag = "SYM", jj_tag = "JJ", nn_tag = "NN", nns_tag = "NNS", cc_tag = "CC", pos_tag = "POS", in_tag = "IN"; |
|
5186
|
|
|
|
|
|
|
}; |
|
5187
|
|
|
|
|
|
|
|
|
5188
|
|
|
|
|
|
|
} // namespace morphodita |
|
5189
|
|
|
|
|
|
|
|
|
5190
|
|
|
|
|
|
|
///////// |
|
5191
|
|
|
|
|
|
|
// File: morphodita/tokenizer/english_tokenizer.h |
|
5192
|
|
|
|
|
|
|
///////// |
|
5193
|
|
|
|
|
|
|
|
|
5194
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
5195
|
|
|
|
|
|
|
// |
|
5196
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
5197
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
5198
|
|
|
|
|
|
|
// |
|
5199
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
5200
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5201
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
5202
|
|
|
|
|
|
|
|
|
5203
|
|
|
|
|
|
|
namespace morphodita { |
|
5204
|
|
|
|
|
|
|
|
|
5205
|
0
|
|
|
|
|
|
class english_tokenizer : public ragel_tokenizer { |
|
5206
|
|
|
|
|
|
|
public: |
|
5207
|
|
|
|
|
|
|
enum { LATEST = 2 }; |
|
5208
|
|
|
|
|
|
|
english_tokenizer(unsigned version); |
|
5209
|
|
|
|
|
|
|
|
|
5210
|
|
|
|
|
|
|
virtual bool next_sentence(vector& tokens) override; |
|
5211
|
|
|
|
|
|
|
|
|
5212
|
|
|
|
|
|
|
private: |
|
5213
|
|
|
|
|
|
|
void split_token(vector& tokens); |
|
5214
|
|
|
|
|
|
|
|
|
5215
|
|
|
|
|
|
|
static const unordered_set abbreviations; |
|
5216
|
|
|
|
|
|
|
}; |
|
5217
|
|
|
|
|
|
|
|
|
5218
|
|
|
|
|
|
|
} // namespace morphodita |
|
5219
|
|
|
|
|
|
|
|
|
5220
|
|
|
|
|
|
|
///////// |
|
5221
|
|
|
|
|
|
|
// File: morphodita/morpho/english_morpho.cpp |
|
5222
|
|
|
|
|
|
|
///////// |
|
5223
|
|
|
|
|
|
|
|
|
5224
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
5225
|
|
|
|
|
|
|
// |
|
5226
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
5227
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
5228
|
|
|
|
|
|
|
// |
|
5229
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
5230
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5231
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
5232
|
|
|
|
|
|
|
|
|
5233
|
|
|
|
|
|
|
namespace morphodita { |
|
5234
|
|
|
|
|
|
|
|
|
5235
|
0
|
|
|
|
|
|
bool english_morpho::load(istream& is) { |
|
5236
|
|
|
|
|
|
|
binary_decoder data; |
|
5237
|
0
|
0
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
|
0
|
|
|
|
|
|
|
5238
|
|
|
|
|
|
|
|
|
5239
|
|
|
|
|
|
|
try { |
|
5240
|
0
|
0
|
|
|
|
|
dictionary.load(data); |
|
5241
|
0
|
0
|
|
|
|
|
morpho_guesser.load(data); |
|
|
|
0
|
|
|
|
|
|
|
5242
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
|
5243
|
|
|
|
|
|
|
return false; |
|
5244
|
|
|
|
|
|
|
} |
|
5245
|
|
|
|
|
|
|
|
|
5246
|
0
|
|
|
|
|
|
return data.is_end(); |
|
5247
|
|
|
|
|
|
|
} |
|
5248
|
|
|
|
|
|
|
|
|
5249
|
0
|
|
|
|
|
|
int english_morpho::analyze(string_piece form, guesser_mode guesser, vector& lemmas) const { |
|
5250
|
|
|
|
|
|
|
lemmas.clear(); |
|
5251
|
|
|
|
|
|
|
|
|
5252
|
0
|
0
|
|
|
|
|
if (form.len) { |
|
5253
|
|
|
|
|
|
|
// Generate all casing variants if needed (they are different than given form). |
|
5254
|
|
|
|
|
|
|
string form_uclc; // first uppercase, rest lowercase |
|
5255
|
|
|
|
|
|
|
string form_lc; // all lowercase |
|
5256
|
0
|
0
|
|
|
|
|
generate_casing_variants(form, form_uclc, form_lc); |
|
5257
|
|
|
|
|
|
|
|
|
5258
|
|
|
|
|
|
|
// Start by analysing using the dictionary and all casing variants. |
|
5259
|
0
|
0
|
|
|
|
|
dictionary.analyze(form, lemmas); |
|
5260
|
0
|
0
|
|
|
|
|
if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas); |
|
|
|
0
|
|
|
|
|
|
|
5261
|
0
|
0
|
|
|
|
|
if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas); |
|
|
|
0
|
|
|
|
|
|
|
5262
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) |
|
5263
|
0
|
0
|
|
|
|
|
return guesser == NO_GUESSER || !morpho_guesser.analyze_proper_names(form, form_lc.empty() ? form : form_lc, lemmas) ? NO_GUESSER : GUESSER; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5264
|
|
|
|
|
|
|
|
|
5265
|
|
|
|
|
|
|
// Then call analyze_special to handle numbers, punctuation and symbols. |
|
5266
|
0
|
0
|
|
|
|
|
analyze_special(form, lemmas); |
|
5267
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) return NO_GUESSER; |
|
5268
|
|
|
|
|
|
|
|
|
5269
|
|
|
|
|
|
|
// Use English guesser on form_lc if allowed. |
|
5270
|
0
|
0
|
|
|
|
|
if (guesser == GUESSER) |
|
5271
|
0
|
0
|
|
|
|
|
morpho_guesser.analyze(form, form_lc.empty() ? form : form_lc, lemmas); |
|
|
|
0
|
|
|
|
|
|
|
5272
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) return GUESSER; |
|
5273
|
|
|
|
|
|
|
} |
|
5274
|
|
|
|
|
|
|
|
|
5275
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), unknown_tag); |
|
5276
|
0
|
|
|
|
|
|
return -1; |
|
5277
|
|
|
|
|
|
|
} |
|
5278
|
|
|
|
|
|
|
|
|
5279
|
0
|
|
|
|
|
|
int english_morpho::generate(string_piece lemma, const char* tag_wildcard, morpho::guesser_mode /*guesser*/, vector& forms) const { |
|
5280
|
|
|
|
|
|
|
forms.clear(); |
|
5281
|
|
|
|
|
|
|
|
|
5282
|
0
|
|
|
|
|
|
tag_filter filter(tag_wildcard); |
|
5283
|
|
|
|
|
|
|
|
|
5284
|
0
|
0
|
|
|
|
|
if (lemma.len) { |
|
5285
|
0
|
0
|
|
|
|
|
if (dictionary.generate(lemma, filter, forms)) |
|
|
|
0
|
|
|
|
|
|
|
5286
|
|
|
|
|
|
|
return NO_GUESSER; |
|
5287
|
|
|
|
|
|
|
} |
|
5288
|
|
|
|
|
|
|
|
|
5289
|
|
|
|
|
|
|
return -1; |
|
5290
|
|
|
|
|
|
|
} |
|
5291
|
|
|
|
|
|
|
|
|
5292
|
0
|
|
|
|
|
|
int english_morpho::raw_lemma_len(string_piece lemma) const { |
|
5293
|
0
|
|
|
|
|
|
return english_lemma_addinfo::raw_lemma_len(lemma); |
|
5294
|
|
|
|
|
|
|
} |
|
5295
|
|
|
|
|
|
|
|
|
5296
|
0
|
|
|
|
|
|
int english_morpho::lemma_id_len(string_piece lemma) const { |
|
5297
|
0
|
|
|
|
|
|
return english_lemma_addinfo::lemma_id_len(lemma); |
|
5298
|
|
|
|
|
|
|
} |
|
5299
|
|
|
|
|
|
|
|
|
5300
|
0
|
|
|
|
|
|
int english_morpho::raw_form_len(string_piece form) const { |
|
5301
|
0
|
|
|
|
|
|
return form.len; |
|
5302
|
|
|
|
|
|
|
} |
|
5303
|
|
|
|
|
|
|
|
|
5304
|
0
|
|
|
|
|
|
tokenizer* english_morpho::new_tokenizer() const { |
|
5305
|
0
|
0
|
|
|
|
|
return new english_tokenizer(version <= 2 ? 1 : 2); |
|
5306
|
|
|
|
|
|
|
} |
|
5307
|
|
|
|
|
|
|
|
|
5308
|
0
|
|
|
|
|
|
void english_morpho::analyze_special(string_piece form, vector& lemmas) const { |
|
5309
|
|
|
|
|
|
|
using namespace unilib; |
|
5310
|
|
|
|
|
|
|
|
|
5311
|
|
|
|
|
|
|
// Analyzer for numbers and punctuation. |
|
5312
|
0
|
0
|
|
|
|
|
if (!form.len) return; |
|
5313
|
|
|
|
|
|
|
|
|
5314
|
|
|
|
|
|
|
// One-letter punctuation exceptions. |
|
5315
|
0
|
0
|
|
|
|
|
if (form.len == 1) |
|
5316
|
0
|
|
|
|
|
|
switch(*form.str) { |
|
5317
|
|
|
|
|
|
|
case '.': |
|
5318
|
|
|
|
|
|
|
case '!': |
|
5319
|
0
|
0
|
|
|
|
|
case '?': lemmas.emplace_back(string(form.str, form.len), dot_tag); return; |
|
5320
|
0
|
0
|
|
|
|
|
case ',': lemmas.emplace_back(string(form.str, form.len), comma_tag); return; |
|
5321
|
0
|
0
|
|
|
|
|
case '#': lemmas.emplace_back(string(form.str, form.len), hash_tag); return; |
|
5322
|
0
|
0
|
|
|
|
|
case '$': lemmas.emplace_back(string(form.str, form.len), dollar_tag); return; |
|
5323
|
0
|
0
|
|
|
|
|
case '[': lemmas.emplace_back(string(form.str, form.len), sym_tag); return; |
|
5324
|
0
|
0
|
|
|
|
|
case ']': lemmas.emplace_back(string(form.str, form.len), sym_tag); return; |
|
5325
|
0
|
0
|
|
|
|
|
case '%': lemmas.emplace_back(string(form.str, form.len), jj_tag); |
|
5326
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), nn_tag); return; |
|
5327
|
0
|
0
|
|
|
|
|
case '&': lemmas.emplace_back(string(form.str, form.len), cc_tag); |
|
5328
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), sym_tag); return; |
|
5329
|
0
|
0
|
|
|
|
|
case '*': lemmas.emplace_back(string(form.str, form.len), sym_tag); |
|
5330
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), nn_tag); return; |
|
5331
|
0
|
0
|
|
|
|
|
case '@': lemmas.emplace_back(string(form.str, form.len), sym_tag); |
|
5332
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), in_tag); return; |
|
5333
|
0
|
0
|
|
|
|
|
case '\'': lemmas.emplace_back(string(form.str, form.len), close_quotation_tag); |
|
5334
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), pos_tag); return; |
|
5335
|
|
|
|
|
|
|
} |
|
5336
|
|
|
|
|
|
|
|
|
5337
|
|
|
|
|
|
|
// Try matching a number: [+-]? is_Pn* (, is_Pn{3})? (. is_Pn*)? (s | [Ee] [+-]? is_Pn+)? with at least one digit |
|
5338
|
0
|
|
|
|
|
|
string_piece number = form; |
|
5339
|
0
|
|
|
|
|
|
char32_t codepoint = utf8::decode(number.str, number.len); |
|
5340
|
|
|
|
|
|
|
bool any_digit = false; |
|
5341
|
0
|
0
|
|
|
|
|
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len); |
|
5342
|
0
|
0
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
|
5343
|
0
|
0
|
|
|
|
|
while (codepoint == ',') { |
|
5344
|
0
|
|
|
|
|
|
string_piece group = number; |
|
5345
|
0
|
0
|
|
|
|
|
if (unicode::category(utf8::decode(group.str, group.len) & ~unicode::N)) break; |
|
5346
|
0
|
0
|
|
|
|
|
if (unicode::category(utf8::decode(group.str, group.len) & ~unicode::N)) break; |
|
5347
|
0
|
0
|
|
|
|
|
if (unicode::category(utf8::decode(group.str, group.len) & ~unicode::N)) break; |
|
5348
|
|
|
|
|
|
|
any_digit = true; |
|
5349
|
0
|
|
|
|
|
|
number = group; |
|
5350
|
0
|
|
|
|
|
|
codepoint = utf8::decode(number.str, number.len); |
|
5351
|
|
|
|
|
|
|
} |
|
5352
|
0
|
0
|
|
|
|
|
if (codepoint == '.' && number.len) { |
|
|
|
0
|
|
|
|
|
|
|
5353
|
0
|
|
|
|
|
|
codepoint = utf8::decode(number.str, number.len); |
|
5354
|
0
|
0
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
|
5355
|
|
|
|
|
|
|
} |
|
5356
|
0
|
0
|
|
|
|
|
if (version >= 2 && any_digit && codepoint == 's' && !number.len) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5357
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), number_tag); |
|
5358
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len - 1), nns_tag); |
|
5359
|
0
|
|
|
|
|
|
return; |
|
5360
|
|
|
|
|
|
|
} |
|
5361
|
0
|
0
|
|
|
|
|
if (any_digit && (codepoint == 'e' || codepoint == 'E')) { |
|
|
|
0
|
|
|
|
|
|
|
5362
|
0
|
|
|
|
|
|
codepoint = utf8::decode(number.str, number.len); |
|
5363
|
0
|
0
|
|
|
|
|
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len); |
|
5364
|
|
|
|
|
|
|
any_digit = false; |
|
5365
|
0
|
0
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
|
5366
|
|
|
|
|
|
|
} |
|
5367
|
0
|
0
|
|
|
|
|
if (any_digit && !number.len && (!codepoint || codepoint == '.')) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5368
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), number_tag); |
|
5369
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), nnp_tag); |
|
5370
|
0
|
0
|
|
|
|
|
if (form.len == 1 + (codepoint == '.') && *form.str >= '1' && *form.str <= '9') |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5371
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), ls_tag); |
|
5372
|
|
|
|
|
|
|
return; |
|
5373
|
|
|
|
|
|
|
} |
|
5374
|
|
|
|
|
|
|
|
|
5375
|
|
|
|
|
|
|
// Open quotation, end quotation, open parentheses, end parentheses, symbol, or other |
|
5376
|
0
|
|
|
|
|
|
string_piece punctuation = form; |
|
5377
|
|
|
|
|
|
|
bool open_quotation = true, close_quotation = true, open_parenthesis = true, close_parenthesis = true, any_punctuation = true, symbol = true; |
|
5378
|
0
|
0
|
|
|
|
|
while ((symbol || any_punctuation) && punctuation.len) { |
|
|
|
0
|
|
|
|
|
|
|
5379
|
0
|
|
|
|
|
|
codepoint = utf8::decode(punctuation.str, punctuation.len); |
|
5380
|
0
|
0
|
|
|
|
|
if (open_quotation) open_quotation = codepoint == '`' || unicode::category(codepoint) & unicode::Pi; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5381
|
0
|
0
|
|
|
|
|
if (close_quotation) close_quotation = codepoint == '\'' || codepoint == '"' || unicode::category(codepoint) & unicode::Pf; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5382
|
0
|
0
|
|
|
|
|
if (open_parenthesis) open_parenthesis = unicode::category(codepoint) & unicode::Ps; |
|
5383
|
0
|
0
|
|
|
|
|
if (close_parenthesis) close_parenthesis = unicode::category(codepoint) & unicode::Pe; |
|
5384
|
0
|
0
|
|
|
|
|
if (any_punctuation) any_punctuation = unicode::category(codepoint) & unicode::P; |
|
5385
|
0
|
0
|
|
|
|
|
if (symbol) symbol = codepoint == '*' || unicode::category(codepoint) & unicode::S; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5386
|
|
|
|
|
|
|
} |
|
5387
|
0
|
0
|
|
|
|
|
if (!punctuation.len && open_quotation) { lemmas.emplace_back(string(form.str, form.len), open_quotation_tag); return; } |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5388
|
0
|
0
|
|
|
|
|
if (!punctuation.len && close_quotation) { lemmas.emplace_back(string(form.str, form.len), close_quotation_tag); return; } |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5389
|
0
|
0
|
|
|
|
|
if (!punctuation.len && open_parenthesis) { lemmas.emplace_back(string(form.str, form.len), open_parenthesis_tag); return; } |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5390
|
0
|
0
|
|
|
|
|
if (!punctuation.len && close_parenthesis) { lemmas.emplace_back(string(form.str, form.len), close_parenthesis_tag); return; } |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5391
|
0
|
0
|
|
|
|
|
if (!punctuation.len && symbol) { lemmas.emplace_back(string(form.str, form.len), sym_tag); return; } |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5392
|
0
|
0
|
|
|
|
|
if (!punctuation.len && any_punctuation) { lemmas.emplace_back(string(form.str, form.len), punctuation_tag); return; } |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5393
|
|
|
|
|
|
|
} |
|
5394
|
|
|
|
|
|
|
|
|
5395
|
|
|
|
|
|
|
} // namespace morphodita |
|
5396
|
|
|
|
|
|
|
|
|
5397
|
|
|
|
|
|
|
///////// |
|
5398
|
|
|
|
|
|
|
// File: morphodita/morpho/english_morpho_guesser.cpp |
|
5399
|
|
|
|
|
|
|
///////// |
|
5400
|
|
|
|
|
|
|
|
|
5401
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
5402
|
|
|
|
|
|
|
// |
|
5403
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
5404
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
5405
|
|
|
|
|
|
|
// |
|
5406
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
5407
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
5408
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
5409
|
|
|
|
|
|
|
|
|
5410
|
|
|
|
|
|
|
// This code is a reimplementation of morphologic analyzer Morphium |
|
5411
|
|
|
|
|
|
|
// by Johanka Spoustova (Treex::Tool::EnglishMorpho::Analysis Perl module) |
|
5412
|
|
|
|
|
|
|
// and reimplementation of morphologic lemmatizer by Martin Popel |
|
5413
|
|
|
|
|
|
|
// (Treex::Tool::EnglishMorpho::Lemmatizer Perl module). The latter is based |
|
5414
|
|
|
|
|
|
|
// on morpha: |
|
5415
|
|
|
|
|
|
|
// Minnen, G., J. Carroll and D. Pearce (2001). Applied morphological |
|
5416
|
|
|
|
|
|
|
// processing of English, Natural Language Engineering, 7(3). 207-223. |
|
5417
|
|
|
|
|
|
|
// Morpha has been released under LGPL as a part of RASP system |
|
5418
|
|
|
|
|
|
|
// http://ilexir.co.uk/applications/rasp/. |
|
5419
|
|
|
|
|
|
|
|
|
5420
|
|
|
|
|
|
|
namespace morphodita { |
|
5421
|
|
|
|
|
|
|
|
|
5422
|
0
|
|
|
|
|
|
void english_morpho_guesser::load(binary_decoder& data) { |
|
5423
|
0
|
|
|
|
|
|
unsigned tags = data.next_2B(); |
|
5424
|
0
|
|
|
|
|
|
exceptions_tags.clear(); |
|
5425
|
0
|
|
|
|
|
|
exceptions_tags.reserve(tags); |
|
5426
|
0
|
0
|
|
|
|
|
while (tags--) { |
|
5427
|
0
|
|
|
|
|
|
unsigned len = data.next_1B(); |
|
5428
|
0
|
0
|
|
|
|
|
exceptions_tags.emplace_back(string(data.next(len), len)); |
|
5429
|
|
|
|
|
|
|
} |
|
5430
|
|
|
|
|
|
|
|
|
5431
|
0
|
|
|
|
|
|
exceptions.load(data); |
|
5432
|
0
|
|
|
|
|
|
negations.load(data); |
|
5433
|
0
|
|
|
|
|
|
} |
|
5434
|
|
|
|
|
|
|
|
|
5435
|
|
|
|
|
|
|
static const char _tag_guesser_actions[] = { |
|
5436
|
|
|
|
|
|
|
0, 1, 0, 1, 1, 1, 2, 1, |
|
5437
|
|
|
|
|
|
|
3, 1, 4, 1, 5, 1, 6, 1, |
|
5438
|
|
|
|
|
|
|
7, 2, 2, 6, 2, 2, 7, 2, |
|
5439
|
|
|
|
|
|
|
4, 6, 2, 4, 7, 2, 5, 6, |
|
5440
|
|
|
|
|
|
|
2, 5, 7, 2, 6, 7, 3, 2, |
|
5441
|
|
|
|
|
|
|
6, 7, 3, 4, 6, 7, 3, 5, |
|
5442
|
|
|
|
|
|
|
6, 7 |
|
5443
|
|
|
|
|
|
|
}; |
|
5444
|
|
|
|
|
|
|
|
|
5445
|
|
|
|
|
|
|
static const unsigned char _tag_guesser_key_offsets[] = { |
|
5446
|
|
|
|
|
|
|
0, 19, 26, 34, 42, 50, 58, 66, |
|
5447
|
|
|
|
|
|
|
74, 82, 90, 100, 108, 116, 124, 132, |
|
5448
|
|
|
|
|
|
|
145, 153, 161, 168, 179, 195, 212, 220, |
|
5449
|
|
|
|
|
|
|
228, 236 |
|
5450
|
|
|
|
|
|
|
}; |
|
5451
|
|
|
|
|
|
|
|
|
5452
|
|
|
|
|
|
|
static const char _tag_guesser_trans_keys[] = { |
|
5453
|
|
|
|
|
|
|
45, 46, 99, 100, 103, 105, 109, 110, |
|
5454
|
|
|
|
|
|
|
114, 115, 116, 118, 120, 48, 57, 65, |
|
5455
|
|
|
|
|
|
|
90, 97, 122, 45, 48, 57, 65, 90, |
|
5456
|
|
|
|
|
|
|
97, 122, 45, 114, 48, 57, 65, 90, |
|
5457
|
|
|
|
|
|
|
97, 122, 45, 111, 48, 57, 65, 90, |
|
5458
|
|
|
|
|
|
|
97, 122, 45, 109, 48, 57, 65, 90, |
|
5459
|
|
|
|
|
|
|
97, 122, 45, 101, 48, 57, 65, 90, |
|
5460
|
|
|
|
|
|
|
97, 122, 45, 115, 48, 57, 65, 90, |
|
5461
|
|
|
|
|
|
|
97, 122, 45, 101, 48, 57, 65, 90, |
|
5462
|
|
|
|
|
|
|
97, 122, 45, 108, 48, 57, 65, 90, |
|
5463
|
|
|
|
|
|
|
97, 122, 45, 115, 48, 57, 65, 90, |
|
5464
|
|
|
|
|
|
|
97, 122, 45, 97, 101, 111, 48, 57, |
|
5465
|
|
|
|
|
|
|
65, 90, 98, 122, 45, 101, 48, 57, |
|
5466
|
|
|
|
|
|
|
65, 90, 97, 122, 45, 108, 48, 57, |
|
5467
|
|
|
|
|
|
|
65, 90, 97, 122, 45, 109, 48, 57, |
|
5468
|
|
|
|
|
|
|
65, 90, 97, 122, 45, 105, 48, 57, |
|
5469
|
|
|
|
|
|
|
65, 90, 97, 122, 45, 97, 101, 105, |
|
5470
|
|
|
|
|
|
|
111, 117, 121, 48, 57, 65, 90, 98, |
|
5471
|
|
|
|
|
|
|
122, 45, 115, 48, 57, 65, 90, 97, |
|
5472
|
|
|
|
|
|
|
122, 45, 101, 48, 57, 65, 90, 97, |
|
5473
|
|
|
|
|
|
|
122, 45, 48, 57, 65, 90, 97, 122, |
|
5474
|
|
|
|
|
|
|
45, 101, 114, 115, 116, 48, 57, 65, |
|
5475
|
|
|
|
|
|
|
90, 97, 122, 45, 46, 105, 109, 118, |
|
5476
|
|
|
|
|
|
|
120, 48, 57, 65, 90, 97, 98, 99, |
|
5477
|
|
|
|
|
|
|
100, 101, 122, 45, 46, 101, 105, 109, |
|
5478
|
|
|
|
|
|
|
118, 120, 48, 57, 65, 90, 97, 98, |
|
5479
|
|
|
|
|
|
|
99, 100, 102, 122, 45, 110, 48, 57, |
|
5480
|
|
|
|
|
|
|
65, 90, 97, 122, 45, 105, 48, 57, |
|
5481
|
|
|
|
|
|
|
65, 90, 97, 122, 45, 101, 48, 57, |
|
5482
|
|
|
|
|
|
|
65, 90, 97, 122, 45, 115, 48, 57, |
|
5483
|
|
|
|
|
|
|
65, 90, 97, 122, 0 |
|
5484
|
|
|
|
|
|
|
}; |
|
5485
|
|
|
|
|
|
|
|
|
5486
|
|
|
|
|
|
|
static const char _tag_guesser_single_lengths[] = { |
|
5487
|
|
|
|
|
|
|
13, 1, 2, 2, 2, 2, 2, 2, |
|
5488
|
|
|
|
|
|
|
2, 2, 4, 2, 2, 2, 2, 7, |
|
5489
|
|
|
|
|
|
|
2, 2, 1, 5, 6, 7, 2, 2, |
|
5490
|
|
|
|
|
|
|
2, 2 |
|
5491
|
|
|
|
|
|
|
}; |
|
5492
|
|
|
|
|
|
|
|
|
5493
|
|
|
|
|
|
|
static const char _tag_guesser_range_lengths[] = { |
|
5494
|
|
|
|
|
|
|
3, 3, 3, 3, 3, 3, 3, 3, |
|
5495
|
|
|
|
|
|
|
3, 3, 3, 3, 3, 3, 3, 3, |
|
5496
|
|
|
|
|
|
|
3, 3, 3, 3, 5, 5, 3, 3, |
|
5497
|
|
|
|
|
|
|
3, 3 |
|
5498
|
|
|
|
|
|
|
}; |
|
5499
|
|
|
|
|
|
|
|
|
5500
|
|
|
|
|
|
|
static const unsigned char _tag_guesser_index_offsets[] = { |
|
5501
|
|
|
|
|
|
|
0, 17, 22, 28, 34, 40, 46, 52, |
|
5502
|
|
|
|
|
|
|
58, 64, 70, 78, 84, 90, 96, 102, |
|
5503
|
|
|
|
|
|
|
113, 119, 125, 130, 139, 151, 164, 170, |
|
5504
|
|
|
|
|
|
|
176, 182 |
|
5505
|
|
|
|
|
|
|
}; |
|
5506
|
|
|
|
|
|
|
|
|
5507
|
|
|
|
|
|
|
static const char _tag_guesser_indicies[] = { |
|
5508
|
|
|
|
|
|
|
1, 2, 5, 6, 7, 5, 5, 8, |
|
5509
|
|
|
|
|
|
|
9, 10, 11, 5, 5, 3, 4, 4, |
|
5510
|
|
|
|
|
|
|
0, 13, 14, 15, 15, 12, 13, 16, |
|
5511
|
|
|
|
|
|
|
14, 15, 15, 12, 13, 17, 14, 15, |
|
5512
|
|
|
|
|
|
|
15, 12, 13, 18, 14, 15, 15, 12, |
|
5513
|
|
|
|
|
|
|
13, 18, 14, 15, 15, 12, 13, 19, |
|
5514
|
|
|
|
|
|
|
14, 15, 15, 12, 13, 20, 14, 15, |
|
5515
|
|
|
|
|
|
|
15, 12, 13, 18, 14, 15, 15, 12, |
|
5516
|
|
|
|
|
|
|
13, 21, 14, 15, 15, 12, 13, 22, |
|
5517
|
|
|
|
|
|
|
23, 24, 14, 15, 15, 12, 13, 25, |
|
5518
|
|
|
|
|
|
|
14, 15, 15, 12, 13, 23, 14, 15, |
|
5519
|
|
|
|
|
|
|
15, 12, 13, 23, 14, 15, 15, 12, |
|
5520
|
|
|
|
|
|
|
13, 26, 14, 15, 15, 12, 28, 15, |
|
5521
|
|
|
|
|
|
|
15, 15, 15, 15, 15, 29, 26, 26, |
|
5522
|
|
|
|
|
|
|
27, 31, 4, 32, 33, 33, 30, 13, |
|
5523
|
|
|
|
|
|
|
23, 14, 15, 15, 12, 13, 14, 15, |
|
5524
|
|
|
|
|
|
|
15, 12, 13, 34, 35, 36, 37, 14, |
|
5525
|
|
|
|
|
|
|
15, 15, 12, 13, 38, 39, 39, 39, |
|
5526
|
|
|
|
|
|
|
39, 14, 15, 15, 39, 15, 12, 13, |
|
5527
|
|
|
|
|
|
|
38, 40, 39, 39, 39, 39, 14, 15, |
|
5528
|
|
|
|
|
|
|
15, 39, 15, 12, 13, 41, 14, 15, |
|
5529
|
|
|
|
|
|
|
15, 12, 13, 42, 14, 15, 15, 12, |
|
5530
|
|
|
|
|
|
|
13, 18, 14, 15, 15, 12, 13, 43, |
|
5531
|
|
|
|
|
|
|
14, 15, 15, 12, 0 |
|
5532
|
|
|
|
|
|
|
}; |
|
5533
|
|
|
|
|
|
|
|
|
5534
|
|
|
|
|
|
|
static const char _tag_guesser_trans_targs[] = { |
|
5535
|
|
|
|
|
|
|
18, 19, 20, 18, 18, 20, 21, 22, |
|
5536
|
|
|
|
|
|
|
23, 24, 16, 25, 18, 19, 18, 1, |
|
5537
|
|
|
|
|
|
|
3, 4, 18, 7, 8, 10, 11, 18, |
|
5538
|
|
|
|
|
|
|
13, 12, 18, 18, 19, 18, 18, 19, |
|
5539
|
|
|
|
|
|
|
18, 18, 2, 5, 6, 9, 20, 20, |
|
5540
|
|
|
|
|
|
|
18, 14, 15, 17 |
|
5541
|
|
|
|
|
|
|
}; |
|
5542
|
|
|
|
|
|
|
|
|
5543
|
|
|
|
|
|
|
static const char _tag_guesser_trans_actions[] = { |
|
5544
|
|
|
|
|
|
|
29, 46, 29, 32, 11, 11, 11, 11, |
|
5545
|
|
|
|
|
|
|
11, 11, 0, 11, 13, 35, 15, 0, |
|
5546
|
|
|
|
|
|
|
0, 0, 1, 0, 0, 0, 0, 3, |
|
5547
|
|
|
|
|
|
|
0, 0, 5, 17, 38, 20, 23, 42, |
|
5548
|
|
|
|
|
|
|
26, 9, 0, 0, 0, 0, 13, 0, |
|
5549
|
|
|
|
|
|
|
7, 0, 0, 0 |
|
5550
|
|
|
|
|
|
|
}; |
|
5551
|
|
|
|
|
|
|
|
|
5552
|
|
|
|
|
|
|
static const char _tag_guesser_eof_actions[] = { |
|
5553
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
5554
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
5555
|
|
|
|
|
|
|
0, 0, 0, 0, 15, 15, 0, 0, |
|
5556
|
|
|
|
|
|
|
0, 0 |
|
5557
|
|
|
|
|
|
|
}; |
|
5558
|
|
|
|
|
|
|
|
|
5559
|
|
|
|
|
|
|
static const int tag_guesser_start = 0; |
|
5560
|
|
|
|
|
|
|
|
|
5561
|
0
|
|
|
|
|
|
void english_morpho_guesser::analyze(string_piece form, string_piece form_lc, vector& lemmas) const { |
|
5562
|
|
|
|
|
|
|
// Try exceptions list |
|
5563
|
0
|
|
|
|
|
|
auto* exception = exceptions.at(form_lc.str, form_lc.len, [](pointer_decoder& data){ |
|
5564
|
0
|
0
|
|
|
|
|
for (unsigned len = data.next_1B(); len; len--) { |
|
5565
|
|
|
|
|
|
|
data.next(data.next_1B()); |
|
5566
|
|
|
|
|
|
|
data.next(data.next_1B()); |
|
5567
|
|
|
|
|
|
|
} |
|
5568
|
0
|
|
|
|
|
|
}); |
|
5569
|
|
|
|
|
|
|
|
|
5570
|
0
|
0
|
|
|
|
|
if (exception) { |
|
5571
|
|
|
|
|
|
|
// Found in exceptions list |
|
5572
|
|
|
|
|
|
|
pointer_decoder data(exception); |
|
5573
|
0
|
0
|
|
|
|
|
for (unsigned len = data.next_1B(); len; len--) { |
|
5574
|
|
|
|
|
|
|
unsigned lemma_len = data.next_1B(); |
|
5575
|
0
|
|
|
|
|
|
string lemma(data.next(lemma_len), lemma_len); |
|
5576
|
0
|
0
|
|
|
|
|
for (unsigned tags = data.next_1B(); tags; tags--) |
|
5577
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(lemma, exceptions_tags[data.next_2B()]); |
|
5578
|
|
|
|
|
|
|
} |
|
5579
|
|
|
|
|
|
|
} else { |
|
5580
|
|
|
|
|
|
|
// Try stripping negative prefix and use rule guesser |
|
5581
|
|
|
|
|
|
|
string lemma_lc(form_lc.str, form_lc.len); |
|
5582
|
|
|
|
|
|
|
// Try finding negative prefix |
|
5583
|
|
|
|
|
|
|
unsigned negation_len = 0; |
|
5584
|
0
|
0
|
|
|
|
|
for (unsigned prefix = 1; prefix <= form_lc.len; prefix++) { |
|
5585
|
0
|
|
|
|
|
|
auto found = negations.at(form_lc.str, prefix, [](pointer_decoder& data){ data.next(TOTAL); }); |
|
5586
|
0
|
0
|
|
|
|
|
if (!found) break; |
|
5587
|
0
|
0
|
|
|
|
|
if (found[NEGATION_LEN]) { |
|
5588
|
0
|
0
|
|
|
|
|
if (form_lc.len - prefix >= found[TO_FOLLOW]) negation_len = found[NEGATION_LEN]; |
|
5589
|
|
|
|
|
|
|
} |
|
5590
|
|
|
|
|
|
|
} |
|
5591
|
|
|
|
|
|
|
|
|
5592
|
|
|
|
|
|
|
// Add default tags |
|
5593
|
0
|
|
|
|
|
|
add(FW, lemma_lc, lemmas); |
|
5594
|
0
|
0
|
|
|
|
|
add(JJ, lemma_lc, negation_len, lemmas); |
|
5595
|
0
|
0
|
|
|
|
|
add(RB, lemma_lc, negation_len, lemmas); |
|
5596
|
0
|
0
|
|
|
|
|
add(NN, lemma_lc, negation_len, lemmas); |
|
5597
|
0
|
0
|
|
|
|
|
add_NNS(lemma_lc, negation_len, lemmas); |
|
5598
|
|
|
|
|
|
|
|
|
5599
|
|
|
|
|
|
|
// Add specialized tags |
|
5600
|
|
|
|
|
|
|
const char* p = form_lc.str; int cs; |
|
5601
|
|
|
|
|
|
|
bool added_JJR_RBR = false, added_JJS_RBS = false, added_SYM = false, added_CD = false; |
|
5602
|
|
|
|
|
|
|
|
|
5603
|
|
|
|
|
|
|
{ |
|
5604
|
|
|
|
|
|
|
cs = tag_guesser_start; |
|
5605
|
|
|
|
|
|
|
} |
|
5606
|
|
|
|
|
|
|
|
|
5607
|
|
|
|
|
|
|
{ |
|
5608
|
|
|
|
|
|
|
int _klen; |
|
5609
|
|
|
|
|
|
|
unsigned int _trans; |
|
5610
|
|
|
|
|
|
|
const char *_acts; |
|
5611
|
|
|
|
|
|
|
unsigned int _nacts; |
|
5612
|
|
|
|
|
|
|
const char *_keys; |
|
5613
|
|
|
|
|
|
|
|
|
5614
|
0
|
0
|
|
|
|
|
if ( p == ( (form_lc.str + form_lc.len)) ) |
|
5615
|
|
|
|
|
|
|
goto _test_eof; |
|
5616
|
|
|
|
|
|
|
_resume: |
|
5617
|
0
|
|
|
|
|
|
_keys = _tag_guesser_trans_keys + _tag_guesser_key_offsets[cs]; |
|
5618
|
0
|
|
|
|
|
|
_trans = _tag_guesser_index_offsets[cs]; |
|
5619
|
|
|
|
|
|
|
|
|
5620
|
0
|
|
|
|
|
|
_klen = _tag_guesser_single_lengths[cs]; |
|
5621
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
5622
|
|
|
|
|
|
|
const char *_lower = _keys; |
|
5623
|
|
|
|
|
|
|
const char *_mid; |
|
5624
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
|
5625
|
|
|
|
|
|
|
while (1) { |
|
5626
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
5627
|
|
|
|
|
|
|
break; |
|
5628
|
|
|
|
|
|
|
|
|
5629
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
|
5630
|
0
|
0
|
|
|
|
|
if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) < *_mid ) |
|
5631
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
|
5632
|
0
|
0
|
|
|
|
|
else if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) > *_mid ) |
|
5633
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
|
5634
|
|
|
|
|
|
|
else { |
|
5635
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
|
5636
|
0
|
|
|
|
|
|
goto _match; |
|
5637
|
|
|
|
|
|
|
} |
|
5638
|
|
|
|
|
|
|
} |
|
5639
|
0
|
|
|
|
|
|
_keys += _klen; |
|
5640
|
0
|
|
|
|
|
|
_trans += _klen; |
|
5641
|
|
|
|
|
|
|
} |
|
5642
|
|
|
|
|
|
|
|
|
5643
|
0
|
|
|
|
|
|
_klen = _tag_guesser_range_lengths[cs]; |
|
5644
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
5645
|
|
|
|
|
|
|
const char *_lower = _keys; |
|
5646
|
|
|
|
|
|
|
const char *_mid; |
|
5647
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
|
5648
|
|
|
|
|
|
|
while (1) { |
|
5649
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
5650
|
|
|
|
|
|
|
break; |
|
5651
|
|
|
|
|
|
|
|
|
5652
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
|
5653
|
0
|
0
|
|
|
|
|
if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) < _mid[0] ) |
|
5654
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
|
5655
|
0
|
0
|
|
|
|
|
else if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) > _mid[1] ) |
|
5656
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
|
5657
|
|
|
|
|
|
|
else { |
|
5658
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
|
5659
|
0
|
|
|
|
|
|
goto _match; |
|
5660
|
|
|
|
|
|
|
} |
|
5661
|
|
|
|
|
|
|
} |
|
5662
|
0
|
|
|
|
|
|
_trans += _klen; |
|
5663
|
|
|
|
|
|
|
} |
|
5664
|
|
|
|
|
|
|
|
|
5665
|
|
|
|
|
|
|
_match: |
|
5666
|
0
|
|
|
|
|
|
_trans = _tag_guesser_indicies[_trans]; |
|
5667
|
0
|
|
|
|
|
|
cs = _tag_guesser_trans_targs[_trans]; |
|
5668
|
|
|
|
|
|
|
|
|
5669
|
0
|
0
|
|
|
|
|
if ( _tag_guesser_trans_actions[_trans] == 0 ) |
|
5670
|
|
|
|
|
|
|
goto _again; |
|
5671
|
|
|
|
|
|
|
|
|
5672
|
0
|
|
|
|
|
|
_acts = _tag_guesser_actions + _tag_guesser_trans_actions[_trans]; |
|
5673
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
|
5674
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
|
5675
|
|
|
|
|
|
|
{ |
|
5676
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
|
5677
|
|
|
|
|
|
|
{ |
|
5678
|
|
|
|
|
|
|
case 0: |
|
5679
|
0
|
0
|
|
|
|
|
{ if (!added_JJR_RBR) added_JJR_RBR = true, add_JJR_RBR(lemma_lc, negation_len, lemmas); } |
|
|
|
0
|
|
|
|
|
|
|
5680
|
|
|
|
|
|
|
break; |
|
5681
|
|
|
|
|
|
|
case 1: |
|
5682
|
0
|
0
|
|
|
|
|
{ if (!added_JJS_RBS) added_JJS_RBS = true, add_JJS_RBS(lemma_lc, negation_len, lemmas); } |
|
|
|
0
|
|
|
|
|
|
|
5683
|
|
|
|
|
|
|
break; |
|
5684
|
|
|
|
|
|
|
case 2: |
|
5685
|
0
|
0
|
|
|
|
|
{ add_VBG(lemma_lc, lemmas); } |
|
5686
|
|
|
|
|
|
|
break; |
|
5687
|
|
|
|
|
|
|
case 3: |
|
5688
|
0
|
0
|
|
|
|
|
{ add_VBD_VBN(lemma_lc, lemmas); } |
|
5689
|
|
|
|
|
|
|
break; |
|
5690
|
|
|
|
|
|
|
case 4: |
|
5691
|
0
|
0
|
|
|
|
|
{ add_VBZ(lemma_lc, lemmas); } |
|
5692
|
|
|
|
|
|
|
break; |
|
5693
|
|
|
|
|
|
|
case 5: |
|
5694
|
0
|
|
|
|
|
|
{ add(VB, lemma_lc, lemmas); add(VBP, lemma_lc, lemmas); } |
|
5695
|
|
|
|
|
|
|
break; |
|
5696
|
|
|
|
|
|
|
case 6: |
|
5697
|
0
|
0
|
|
|
|
|
{ if (!added_SYM) added_SYM = true, add(SYM, lemma_lc, lemmas); } |
|
5698
|
|
|
|
|
|
|
break; |
|
5699
|
|
|
|
|
|
|
case 7: |
|
5700
|
0
|
0
|
|
|
|
|
{ if (!added_CD) added_CD = true, add(CD, lemma_lc, lemmas); } |
|
5701
|
|
|
|
|
|
|
break; |
|
5702
|
|
|
|
|
|
|
} |
|
5703
|
|
|
|
|
|
|
} |
|
5704
|
|
|
|
|
|
|
|
|
5705
|
|
|
|
|
|
|
_again: |
|
5706
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form_lc.str + form_lc.len)) ) |
|
5707
|
|
|
|
|
|
|
goto _resume; |
|
5708
|
|
|
|
|
|
|
_test_eof: {} |
|
5709
|
0
|
0
|
|
|
|
|
if ( p == ( (form_lc.str + form_lc.len)) ) |
|
5710
|
|
|
|
|
|
|
{ |
|
5711
|
0
|
|
|
|
|
|
const char *__acts = _tag_guesser_actions + _tag_guesser_eof_actions[cs]; |
|
5712
|
0
|
|
|
|
|
|
unsigned int __nacts = (unsigned int) *__acts++; |
|
5713
|
0
|
0
|
|
|
|
|
while ( __nacts-- > 0 ) { |
|
5714
|
0
|
0
|
|
|
|
|
switch ( *__acts++ ) { |
|
5715
|
|
|
|
|
|
|
case 7: |
|
5716
|
0
|
0
|
|
|
|
|
{ if (!added_CD) added_CD = true, add(CD, lemma_lc, lemmas); } |
|
5717
|
|
|
|
|
|
|
break; |
|
5718
|
|
|
|
|
|
|
} |
|
5719
|
|
|
|
|
|
|
} |
|
5720
|
|
|
|
|
|
|
} |
|
5721
|
|
|
|
|
|
|
|
|
5722
|
|
|
|
|
|
|
} |
|
5723
|
|
|
|
|
|
|
|
|
5724
|
|
|
|
|
|
|
} |
|
5725
|
|
|
|
|
|
|
|
|
5726
|
|
|
|
|
|
|
// Add proper names |
|
5727
|
0
|
|
|
|
|
|
analyze_proper_names(form, form_lc, lemmas); |
|
5728
|
0
|
|
|
|
|
|
} |
|
5729
|
|
|
|
|
|
|
|
|
5730
|
0
|
|
|
|
|
|
bool english_morpho_guesser::analyze_proper_names(string_piece form, string_piece form_lc, vector& lemmas) const { |
|
5731
|
|
|
|
|
|
|
// NNP if form_lc != form or form.str[0] =~ /[0-9']/, NNPS if form_lc != form |
|
5732
|
0
|
0
|
|
|
|
|
bool is_NNP = form.str != form_lc.str || (form.len && (*form.str == '\'' || (*form.str >= '0' && *form.str <= '9'))); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5733
|
0
|
|
|
|
|
|
bool is_NNPS = form.str != form_lc.str; |
|
5734
|
0
|
0
|
|
|
|
|
if (!is_NNP && !is_NNPS) return false; |
|
5735
|
|
|
|
|
|
|
|
|
5736
|
|
|
|
|
|
|
bool was_NNP = false, was_NNPS = false; |
|
5737
|
0
|
0
|
|
|
|
|
for (auto&& lemma : lemmas) { |
|
5738
|
0
|
|
|
|
|
|
was_NNP |= lemma.tag == NNP; |
|
5739
|
0
|
|
|
|
|
|
was_NNPS |= lemma.tag == NNPS; |
|
5740
|
|
|
|
|
|
|
} |
|
5741
|
0
|
0
|
|
|
|
|
if (!((is_NNP && !was_NNP) || (is_NNPS && !was_NNPS))) return false; |
|
|
|
0
|
|
|
|
|
|
|
5742
|
|
|
|
|
|
|
|
|
5743
|
|
|
|
|
|
|
string lemma(form.str, form.len); |
|
5744
|
0
|
0
|
|
|
|
|
if (is_NNP && !was_NNP) add(NNP, lemma, lemmas); |
|
5745
|
0
|
0
|
|
|
|
|
if (is_NNPS && !was_NNPS) add_NNPS(lemma, lemmas); |
|
|
|
0
|
|
|
|
|
|
|
5746
|
|
|
|
|
|
|
return true; |
|
5747
|
|
|
|
|
|
|
} |
|
5748
|
|
|
|
|
|
|
|
|
5749
|
|
|
|
|
|
|
inline void english_morpho_guesser::add(const string& tag, const string& form, vector& lemmas) const { |
|
5750
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(form, tag); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5751
|
|
|
|
|
|
|
} |
|
5752
|
|
|
|
|
|
|
|
|
5753
|
|
|
|
|
|
|
inline void english_morpho_guesser::add(const string& tag, const string& tag2, const string& form, vector& lemmas) const { |
|
5754
|
|
|
|
|
|
|
add(tag, form, lemmas); |
|
5755
|
|
|
|
|
|
|
add(tag2, form, lemmas); |
|
5756
|
|
|
|
|
|
|
} |
|
5757
|
|
|
|
|
|
|
|
|
5758
|
0
|
|
|
|
|
|
inline void english_morpho_guesser::add(const string& tag, const string& form, unsigned negation_len, vector& lemmas) const { |
|
5759
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5760
|
0
|
|
|
|
|
|
} |
|
5761
|
|
|
|
|
|
|
|
|
5762
|
0
|
|
|
|
|
|
inline void english_morpho_guesser::add(const string& tag, const string& tag2, const string& form, unsigned negation_len, vector& lemmas) const { |
|
5763
|
0
|
|
|
|
|
|
add(tag, form, negation_len, lemmas); |
|
5764
|
0
|
|
|
|
|
|
add(tag2, form, negation_len, lemmas); |
|
5765
|
0
|
|
|
|
|
|
} |
|
5766
|
|
|
|
|
|
|
|
|
5767
|
|
|
|
|
|
|
// Common definitions (written backwards) |
|
5768
|
|
|
|
|
|
|
#define REM(str, len) (str.substr(0, str.size() - len)) |
|
5769
|
|
|
|
|
|
|
#define REM_ADD(str, len, add) (str.substr(0, str.size() - len).append(add)) |
|
5770
|
|
|
|
|
|
|
|
|
5771
|
|
|
|
|
|
|
static const char _NNS_actions[] = { |
|
5772
|
|
|
|
|
|
|
0, 1, 0, 1, 1, 1, 2, 1, |
|
5773
|
|
|
|
|
|
|
3, 1, 4, 1, 5, 1, 6, 1, |
|
5774
|
|
|
|
|
|
|
7, 1, 8, 1, 9, 1, 10, 1, |
|
5775
|
|
|
|
|
|
|
11, 1, 12, 1, 13 |
|
5776
|
|
|
|
|
|
|
}; |
|
5777
|
|
|
|
|
|
|
|
|
5778
|
|
|
|
|
|
|
static const char _NNS_key_offsets[] = { |
|
5779
|
|
|
|
|
|
|
0, 0, 2, 3, 4, 5, 7, 17, |
|
5780
|
|
|
|
|
|
|
17, 29, 30, 35, 35, 36, 37, 37, |
|
5781
|
|
|
|
|
|
|
37, 44, 45, 53, 63, 72 |
|
5782
|
|
|
|
|
|
|
}; |
|
5783
|
|
|
|
|
|
|
|
|
5784
|
|
|
|
|
|
|
static const char _NNS_trans_keys[] = { |
|
5785
|
|
|
|
|
|
|
110, 115, 101, 109, 101, 99, 115, 98, |
|
5786
|
|
|
|
|
|
|
100, 102, 104, 106, 110, 112, 116, 118, |
|
5787
|
|
|
|
|
|
|
122, 104, 122, 98, 100, 102, 103, 106, |
|
5788
|
|
|
|
|
|
|
110, 112, 116, 118, 120, 111, 97, 101, |
|
5789
|
|
|
|
|
|
|
105, 111, 117, 105, 119, 104, 105, 111, |
|
5790
|
|
|
|
|
|
|
115, 118, 120, 122, 115, 97, 101, 105, |
|
5791
|
|
|
|
|
|
|
110, 111, 114, 115, 117, 98, 100, 102, |
|
5792
|
|
|
|
|
|
|
104, 106, 110, 112, 116, 118, 122, 97, |
|
5793
|
|
|
|
|
|
|
101, 105, 111, 117, 121, 122, 98, 120, |
|
5794
|
|
|
|
|
|
|
0 |
|
5795
|
|
|
|
|
|
|
}; |
|
5796
|
|
|
|
|
|
|
|
|
5797
|
|
|
|
|
|
|
static const char _NNS_single_lengths[] = { |
|
5798
|
|
|
|
|
|
|
0, 2, 1, 1, 1, 2, 0, 0, |
|
5799
|
|
|
|
|
|
|
2, 1, 5, 0, 1, 1, 0, 0, |
|
5800
|
|
|
|
|
|
|
7, 1, 8, 0, 7, 0 |
|
5801
|
|
|
|
|
|
|
}; |
|
5802
|
|
|
|
|
|
|
|
|
5803
|
|
|
|
|
|
|
static const char _NNS_range_lengths[] = { |
|
5804
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 5, 0, |
|
5805
|
|
|
|
|
|
|
5, 0, 0, 0, 0, 0, 0, 0, |
|
5806
|
|
|
|
|
|
|
0, 0, 0, 5, 1, 0 |
|
5807
|
|
|
|
|
|
|
}; |
|
5808
|
|
|
|
|
|
|
|
|
5809
|
|
|
|
|
|
|
static const char _NNS_index_offsets[] = { |
|
5810
|
|
|
|
|
|
|
0, 0, 3, 5, 7, 9, 12, 18, |
|
5811
|
|
|
|
|
|
|
19, 27, 29, 35, 36, 38, 40, 41, |
|
5812
|
|
|
|
|
|
|
42, 50, 52, 61, 67, 76 |
|
5813
|
|
|
|
|
|
|
}; |
|
5814
|
|
|
|
|
|
|
|
|
5815
|
|
|
|
|
|
|
static const char _NNS_indicies[] = { |
|
5816
|
|
|
|
|
|
|
0, 2, 1, 3, 1, 4, 1, 6, |
|
5817
|
|
|
|
|
|
|
5, 7, 7, 1, 8, 8, 8, 8, |
|
5818
|
|
|
|
|
|
|
8, 1, 9, 11, 10, 10, 10, 10, |
|
5819
|
|
|
|
|
|
|
10, 10, 1, 12, 1, 13, 13, 13, |
|
5820
|
|
|
|
|
|
|
13, 13, 1, 14, 15, 1, 16, 1, |
|
5821
|
|
|
|
|
|
|
17, 1, 18, 19, 20, 21, 22, 7, |
|
5822
|
|
|
|
|
|
|
23, 1, 24, 1, 25, 25, 25, 26, |
|
5823
|
|
|
|
|
|
|
25, 27, 28, 29, 1, 30, 30, 30, |
|
5824
|
|
|
|
|
|
|
30, 30, 1, 31, 31, 31, 31, 31, |
|
5825
|
|
|
|
|
|
|
31, 33, 32, 1, 17, 0 |
|
5826
|
|
|
|
|
|
|
}; |
|
5827
|
|
|
|
|
|
|
|
|
5828
|
|
|
|
|
|
|
static const char _NNS_trans_targs[] = { |
|
5829
|
|
|
|
|
|
|
2, 0, 4, 3, 15, 15, 16, 15, |
|
5830
|
|
|
|
|
|
|
7, 15, 15, 17, 15, 11, 15, 13, |
|
5831
|
|
|
|
|
|
|
15, 15, 5, 6, 8, 18, 12, 20, |
|
5832
|
|
|
|
|
|
|
15, 15, 9, 10, 15, 19, 15, 15, |
|
5833
|
|
|
|
|
|
|
14, 21 |
|
5834
|
|
|
|
|
|
|
}; |
|
5835
|
|
|
|
|
|
|
|
|
5836
|
|
|
|
|
|
|
static const char _NNS_trans_actions[] = { |
|
5837
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 27, 27, 21, |
|
5838
|
|
|
|
|
|
|
0, 23, 25, 25, 19, 0, 17, 0, |
|
5839
|
|
|
|
|
|
|
5, 11, 0, 0, 0, 21, 0, 21, |
|
5840
|
|
|
|
|
|
|
3, 9, 0, 0, 15, 9, 7, 13, |
|
5841
|
|
|
|
|
|
|
0, 15 |
|
5842
|
|
|
|
|
|
|
}; |
|
5843
|
|
|
|
|
|
|
|
|
5844
|
|
|
|
|
|
|
static const int NNS_start = 1; |
|
5845
|
|
|
|
|
|
|
|
|
5846
|
0
|
|
|
|
|
|
void english_morpho_guesser::add_NNS(const string& form, unsigned negation_len, vector& lemmas) const { |
|
5847
|
0
|
|
|
|
|
|
const char* p = form.c_str() + negation_len; int cs; |
|
5848
|
|
|
|
|
|
|
char best = 'z'; unsigned remove = 0; const char* append = nullptr; |
|
5849
|
|
|
|
|
|
|
|
|
5850
|
|
|
|
|
|
|
{ |
|
5851
|
|
|
|
|
|
|
cs = NNS_start; |
|
5852
|
|
|
|
|
|
|
} |
|
5853
|
|
|
|
|
|
|
|
|
5854
|
|
|
|
|
|
|
{ |
|
5855
|
|
|
|
|
|
|
int _klen; |
|
5856
|
|
|
|
|
|
|
unsigned int _trans; |
|
5857
|
|
|
|
|
|
|
const char *_acts; |
|
5858
|
|
|
|
|
|
|
unsigned int _nacts; |
|
5859
|
|
|
|
|
|
|
const char *_keys; |
|
5860
|
|
|
|
|
|
|
|
|
5861
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
|
5862
|
|
|
|
|
|
|
goto _test_eof; |
|
5863
|
|
|
|
|
|
|
if ( cs == 0 ) |
|
5864
|
|
|
|
|
|
|
goto _out; |
|
5865
|
|
|
|
|
|
|
_resume: |
|
5866
|
0
|
|
|
|
|
|
_keys = _NNS_trans_keys + _NNS_key_offsets[cs]; |
|
5867
|
0
|
|
|
|
|
|
_trans = _NNS_index_offsets[cs]; |
|
5868
|
|
|
|
|
|
|
|
|
5869
|
0
|
|
|
|
|
|
_klen = _NNS_single_lengths[cs]; |
|
5870
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
5871
|
|
|
|
|
|
|
const char *_lower = _keys; |
|
5872
|
|
|
|
|
|
|
const char *_mid; |
|
5873
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
|
5874
|
|
|
|
|
|
|
while (1) { |
|
5875
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
5876
|
|
|
|
|
|
|
break; |
|
5877
|
|
|
|
|
|
|
|
|
5878
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
|
5879
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < *_mid ) |
|
5880
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
|
5881
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > *_mid ) |
|
5882
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
|
5883
|
|
|
|
|
|
|
else { |
|
5884
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
|
5885
|
0
|
|
|
|
|
|
goto _match; |
|
5886
|
|
|
|
|
|
|
} |
|
5887
|
|
|
|
|
|
|
} |
|
5888
|
0
|
|
|
|
|
|
_keys += _klen; |
|
5889
|
0
|
|
|
|
|
|
_trans += _klen; |
|
5890
|
|
|
|
|
|
|
} |
|
5891
|
|
|
|
|
|
|
|
|
5892
|
0
|
|
|
|
|
|
_klen = _NNS_range_lengths[cs]; |
|
5893
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
5894
|
|
|
|
|
|
|
const char *_lower = _keys; |
|
5895
|
|
|
|
|
|
|
const char *_mid; |
|
5896
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
|
5897
|
|
|
|
|
|
|
while (1) { |
|
5898
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
5899
|
|
|
|
|
|
|
break; |
|
5900
|
|
|
|
|
|
|
|
|
5901
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
|
5902
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < _mid[0] ) |
|
5903
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
|
5904
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > _mid[1] ) |
|
5905
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
|
5906
|
|
|
|
|
|
|
else { |
|
5907
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
|
5908
|
0
|
|
|
|
|
|
goto _match; |
|
5909
|
|
|
|
|
|
|
} |
|
5910
|
|
|
|
|
|
|
} |
|
5911
|
0
|
|
|
|
|
|
_trans += _klen; |
|
5912
|
|
|
|
|
|
|
} |
|
5913
|
|
|
|
|
|
|
|
|
5914
|
|
|
|
|
|
|
_match: |
|
5915
|
0
|
|
|
|
|
|
_trans = _NNS_indicies[_trans]; |
|
5916
|
0
|
|
|
|
|
|
cs = _NNS_trans_targs[_trans]; |
|
5917
|
|
|
|
|
|
|
|
|
5918
|
0
|
0
|
|
|
|
|
if ( _NNS_trans_actions[_trans] == 0 ) |
|
5919
|
|
|
|
|
|
|
goto _again; |
|
5920
|
|
|
|
|
|
|
|
|
5921
|
0
|
|
|
|
|
|
_acts = _NNS_actions + _NNS_trans_actions[_trans]; |
|
5922
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
|
5923
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
|
5924
|
|
|
|
|
|
|
{ |
|
5925
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
|
5926
|
|
|
|
|
|
|
{ |
|
5927
|
|
|
|
|
|
|
case 0: |
|
5928
|
0
|
0
|
|
|
|
|
{ if (best > 'a') best = 'a', remove = 2, append = "an"; } |
|
5929
|
|
|
|
|
|
|
break; |
|
5930
|
|
|
|
|
|
|
case 1: |
|
5931
|
0
|
0
|
|
|
|
|
{ if (best > 'b') best = 'b', remove = 1, append = nullptr; } |
|
5932
|
|
|
|
|
|
|
break; |
|
5933
|
|
|
|
|
|
|
case 2: |
|
5934
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 3, append = "fe"; } |
|
5935
|
|
|
|
|
|
|
break; |
|
5936
|
|
|
|
|
|
|
case 3: |
|
5937
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 2, append = nullptr; } |
|
5938
|
|
|
|
|
|
|
break; |
|
5939
|
|
|
|
|
|
|
case 4: |
|
5940
|
0
|
0
|
|
|
|
|
{ if (best > 'e') best = 'e', remove = 1, append = nullptr; } |
|
5941
|
|
|
|
|
|
|
break; |
|
5942
|
|
|
|
|
|
|
case 5: |
|
5943
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 2, append = nullptr; } |
|
5944
|
|
|
|
|
|
|
break; |
|
5945
|
|
|
|
|
|
|
case 6: |
|
5946
|
0
|
0
|
|
|
|
|
{ if (best > 'g') best = 'g', remove = 1, append = nullptr; } |
|
5947
|
|
|
|
|
|
|
break; |
|
5948
|
|
|
|
|
|
|
case 7: |
|
5949
|
0
|
0
|
|
|
|
|
{ if (best > 'h') best = 'h', remove = 2, append = nullptr; } |
|
5950
|
|
|
|
|
|
|
break; |
|
5951
|
|
|
|
|
|
|
case 8: |
|
5952
|
0
|
0
|
|
|
|
|
{ if (best > 'i') best = 'i', remove = 1, append = nullptr; } |
|
5953
|
|
|
|
|
|
|
break; |
|
5954
|
|
|
|
|
|
|
case 9: |
|
5955
|
0
|
0
|
|
|
|
|
{ if (best > 'j') best = 'j', remove = 1, append = nullptr; } |
|
5956
|
|
|
|
|
|
|
break; |
|
5957
|
|
|
|
|
|
|
case 10: |
|
5958
|
0
|
0
|
|
|
|
|
{ if (best > 'k') best = 'k', remove = 2, append = nullptr; } |
|
5959
|
|
|
|
|
|
|
break; |
|
5960
|
|
|
|
|
|
|
case 11: |
|
5961
|
0
|
0
|
|
|
|
|
{ if (best > 'l') best = 'l', remove = 3, append = "y"; } |
|
5962
|
|
|
|
|
|
|
break; |
|
5963
|
|
|
|
|
|
|
case 12: |
|
5964
|
0
|
0
|
|
|
|
|
{ if (best > 'm') best = 'm', remove = 2, append = nullptr; } |
|
5965
|
|
|
|
|
|
|
break; |
|
5966
|
|
|
|
|
|
|
case 13: |
|
5967
|
0
|
0
|
|
|
|
|
{ if (best > 'n') best = 'n', remove = 1, append = nullptr; } |
|
5968
|
|
|
|
|
|
|
break; |
|
5969
|
|
|
|
|
|
|
} |
|
5970
|
|
|
|
|
|
|
} |
|
5971
|
|
|
|
|
|
|
|
|
5972
|
|
|
|
|
|
|
_again: |
|
5973
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
|
5974
|
|
|
|
|
|
|
goto _out; |
|
5975
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form.c_str() + form.size())) ) |
|
5976
|
|
|
|
|
|
|
goto _resume; |
|
5977
|
|
|
|
|
|
|
_test_eof: {} |
|
5978
|
|
|
|
|
|
|
_out: {} |
|
5979
|
|
|
|
|
|
|
} |
|
5980
|
|
|
|
|
|
|
|
|
5981
|
0
|
0
|
|
|
|
|
add(NNS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5982
|
0
|
|
|
|
|
|
} |
|
5983
|
|
|
|
|
|
|
|
|
5984
|
|
|
|
|
|
|
static const char _NNPS_actions[] = { |
|
5985
|
|
|
|
|
|
|
0, 1, 1, 1, 2, 1, 4, 1, |
|
5986
|
|
|
|
|
|
|
5, 1, 6, 1, 7, 1, 8, 1, |
|
5987
|
|
|
|
|
|
|
9, 1, 10, 1, 11, 1, 12, 1, |
|
5988
|
|
|
|
|
|
|
14, 1, 15, 1, 16, 2, 0, 1, |
|
5989
|
|
|
|
|
|
|
2, 3, 4, 2, 13, 14 |
|
5990
|
|
|
|
|
|
|
}; |
|
5991
|
|
|
|
|
|
|
|
|
5992
|
|
|
|
|
|
|
static const unsigned char _NNPS_key_offsets[] = { |
|
5993
|
|
|
|
|
|
|
0, 0, 4, 6, 8, 10, 12, 16, |
|
5994
|
|
|
|
|
|
|
36, 36, 60, 62, 72, 72, 74, 76, |
|
5995
|
|
|
|
|
|
|
78, 78, 98, 98, 100, 102, 104, 104, |
|
5996
|
|
|
|
|
|
|
118, 120, 136, 156, 174, 174 |
|
5997
|
|
|
|
|
|
|
}; |
|
5998
|
|
|
|
|
|
|
|
|
5999
|
|
|
|
|
|
|
static const char _NNPS_trans_keys[] = { |
|
6000
|
|
|
|
|
|
|
78, 83, 110, 115, 69, 101, 77, 109, |
|
6001
|
|
|
|
|
|
|
77, 109, 69, 101, 67, 83, 99, 115, |
|
6002
|
|
|
|
|
|
|
66, 68, 70, 72, 74, 78, 80, 84, |
|
6003
|
|
|
|
|
|
|
86, 90, 98, 100, 102, 104, 106, 110, |
|
6004
|
|
|
|
|
|
|
112, 116, 118, 122, 72, 90, 104, 122, |
|
6005
|
|
|
|
|
|
|
66, 68, 70, 71, 74, 78, 80, 84, |
|
6006
|
|
|
|
|
|
|
86, 88, 98, 100, 102, 103, 106, 110, |
|
6007
|
|
|
|
|
|
|
112, 116, 118, 120, 79, 111, 65, 69, |
|
6008
|
|
|
|
|
|
|
73, 79, 85, 97, 101, 105, 111, 117, |
|
6009
|
|
|
|
|
|
|
73, 105, 87, 119, 87, 119, 66, 68, |
|
6010
|
|
|
|
|
|
|
70, 72, 74, 78, 80, 84, 86, 90, |
|
6011
|
|
|
|
|
|
|
98, 100, 102, 104, 106, 110, 112, 116, |
|
6012
|
|
|
|
|
|
|
118, 122, 73, 105, 69, 101, 69, 101, |
|
6013
|
|
|
|
|
|
|
72, 73, 79, 83, 86, 88, 90, 104, |
|
6014
|
|
|
|
|
|
|
105, 111, 115, 118, 120, 122, 83, 115, |
|
6015
|
|
|
|
|
|
|
65, 69, 73, 78, 79, 82, 83, 85, |
|
6016
|
|
|
|
|
|
|
97, 101, 105, 110, 111, 114, 115, 117, |
|
6017
|
|
|
|
|
|
|
66, 68, 70, 72, 74, 78, 80, 84, |
|
6018
|
|
|
|
|
|
|
86, 90, 98, 100, 102, 104, 106, 110, |
|
6019
|
|
|
|
|
|
|
112, 116, 118, 122, 65, 69, 73, 79, |
|
6020
|
|
|
|
|
|
|
85, 89, 90, 97, 101, 105, 111, 117, |
|
6021
|
|
|
|
|
|
|
121, 122, 66, 88, 98, 120, 72, 73, |
|
6022
|
|
|
|
|
|
|
79, 83, 86, 88, 90, 104, 105, 111, |
|
6023
|
|
|
|
|
|
|
115, 118, 120, 122, 0 |
|
6024
|
|
|
|
|
|
|
}; |
|
6025
|
|
|
|
|
|
|
|
|
6026
|
|
|
|
|
|
|
static const char _NNPS_single_lengths[] = { |
|
6027
|
|
|
|
|
|
|
0, 4, 2, 2, 2, 2, 4, 0, |
|
6028
|
|
|
|
|
|
|
0, 4, 2, 10, 0, 2, 2, 2, |
|
6029
|
|
|
|
|
|
|
0, 0, 0, 2, 2, 2, 0, 14, |
|
6030
|
|
|
|
|
|
|
2, 16, 0, 14, 0, 14 |
|
6031
|
|
|
|
|
|
|
}; |
|
6032
|
|
|
|
|
|
|
|
|
6033
|
|
|
|
|
|
|
static const char _NNPS_range_lengths[] = { |
|
6034
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 10, |
|
6035
|
|
|
|
|
|
|
0, 10, 0, 0, 0, 0, 0, 0, |
|
6036
|
|
|
|
|
|
|
0, 10, 0, 0, 0, 0, 0, 0, |
|
6037
|
|
|
|
|
|
|
0, 0, 10, 2, 0, 0 |
|
6038
|
|
|
|
|
|
|
}; |
|
6039
|
|
|
|
|
|
|
|
|
6040
|
|
|
|
|
|
|
static const unsigned char _NNPS_index_offsets[] = { |
|
6041
|
|
|
|
|
|
|
0, 0, 5, 8, 11, 14, 17, 22, |
|
6042
|
|
|
|
|
|
|
33, 34, 49, 52, 63, 64, 67, 70, |
|
6043
|
|
|
|
|
|
|
73, 74, 85, 86, 89, 92, 95, 96, |
|
6044
|
|
|
|
|
|
|
111, 114, 131, 142, 159, 160 |
|
6045
|
|
|
|
|
|
|
}; |
|
6046
|
|
|
|
|
|
|
|
|
6047
|
|
|
|
|
|
|
static const char _NNPS_indicies[] = { |
|
6048
|
|
|
|
|
|
|
0, 2, 3, 4, 1, 5, 6, 1, |
|
6049
|
|
|
|
|
|
|
7, 8, 1, 8, 8, 1, 10, 11, |
|
6050
|
|
|
|
|
|
|
9, 12, 12, 12, 12, 1, 13, 13, |
|
6051
|
|
|
|
|
|
|
13, 13, 13, 13, 13, 13, 13, 13, |
|
6052
|
|
|
|
|
|
|
1, 14, 16, 15, 16, 15, 15, 15, |
|
6053
|
|
|
|
|
|
|
15, 15, 15, 15, 15, 15, 15, 15, |
|
6054
|
|
|
|
|
|
|
1, 17, 17, 1, 18, 18, 18, 18, |
|
6055
|
|
|
|
|
|
|
18, 18, 18, 18, 18, 18, 1, 19, |
|
6056
|
|
|
|
|
|
|
20, 21, 1, 22, 23, 1, 23, 23, |
|
6057
|
|
|
|
|
|
|
1, 24, 25, 25, 25, 25, 25, 25, |
|
6058
|
|
|
|
|
|
|
25, 25, 25, 25, 1, 26, 21, 21, |
|
6059
|
|
|
|
|
|
|
1, 6, 6, 1, 11, 11, 9, 1, |
|
6060
|
|
|
|
|
|
|
27, 28, 29, 30, 31, 12, 32, 27, |
|
6061
|
|
|
|
|
|
|
33, 29, 30, 34, 12, 32, 1, 35, |
|
6062
|
|
|
|
|
|
|
35, 1, 36, 36, 36, 37, 36, 38, |
|
6063
|
|
|
|
|
|
|
39, 40, 36, 36, 36, 37, 36, 38, |
|
6064
|
|
|
|
|
|
|
39, 40, 1, 41, 41, 41, 41, 41, |
|
6065
|
|
|
|
|
|
|
41, 41, 41, 41, 41, 1, 42, 42, |
|
6066
|
|
|
|
|
|
|
42, 42, 42, 42, 44, 42, 42, 42, |
|
6067
|
|
|
|
|
|
|
42, 42, 42, 44, 43, 43, 1, 24, |
|
6068
|
|
|
|
|
|
|
27, 33, 29, 30, 34, 12, 32, 27, |
|
6069
|
|
|
|
|
|
|
33, 29, 30, 34, 12, 32, 1, 0 |
|
6070
|
|
|
|
|
|
|
}; |
|
6071
|
|
|
|
|
|
|
|
|
6072
|
|
|
|
|
|
|
static const char _NNPS_trans_targs[] = { |
|
6073
|
|
|
|
|
|
|
2, 0, 5, 20, 21, 3, 4, 22, |
|
6074
|
|
|
|
|
|
|
22, 22, 23, 29, 22, 8, 22, 22, |
|
6075
|
|
|
|
|
|
|
24, 22, 12, 22, 14, 15, 22, 22, |
|
6076
|
|
|
|
|
|
|
22, 18, 22, 6, 7, 9, 25, 13, |
|
6077
|
|
|
|
|
|
|
27, 17, 19, 22, 22, 10, 11, 22, |
|
6078
|
|
|
|
|
|
|
26, 22, 22, 16, 28 |
|
6079
|
|
|
|
|
|
|
}; |
|
6080
|
|
|
|
|
|
|
|
|
6081
|
|
|
|
|
|
|
static const char _NNPS_trans_actions[] = { |
|
6082
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 29, |
|
6083
|
|
|
|
|
|
|
1, 27, 27, 27, 21, 0, 35, 25, |
|
6084
|
|
|
|
|
|
|
25, 19, 0, 17, 0, 0, 32, 5, |
|
6085
|
|
|
|
|
|
|
11, 0, 23, 0, 0, 0, 21, 0, |
|
6086
|
|
|
|
|
|
|
21, 0, 0, 3, 9, 0, 0, 15, |
|
6087
|
|
|
|
|
|
|
9, 7, 13, 0, 15 |
|
6088
|
|
|
|
|
|
|
}; |
|
6089
|
|
|
|
|
|
|
|
|
6090
|
|
|
|
|
|
|
static const int NNPS_start = 1; |
|
6091
|
|
|
|
|
|
|
|
|
6092
|
0
|
|
|
|
|
|
void english_morpho_guesser::add_NNPS(const string& form, vector& lemmas) const { |
|
6093
|
|
|
|
|
|
|
const char* p = form.c_str(); int cs; |
|
6094
|
|
|
|
|
|
|
char best = 'z'; unsigned remove = 0; const char* append = nullptr; |
|
6095
|
|
|
|
|
|
|
|
|
6096
|
|
|
|
|
|
|
{ |
|
6097
|
|
|
|
|
|
|
cs = NNPS_start; |
|
6098
|
|
|
|
|
|
|
} |
|
6099
|
|
|
|
|
|
|
|
|
6100
|
|
|
|
|
|
|
{ |
|
6101
|
|
|
|
|
|
|
int _klen; |
|
6102
|
|
|
|
|
|
|
unsigned int _trans; |
|
6103
|
|
|
|
|
|
|
const char *_acts; |
|
6104
|
|
|
|
|
|
|
unsigned int _nacts; |
|
6105
|
|
|
|
|
|
|
const char *_keys; |
|
6106
|
|
|
|
|
|
|
|
|
6107
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
|
6108
|
|
|
|
|
|
|
goto _test_eof; |
|
6109
|
|
|
|
|
|
|
if ( cs == 0 ) |
|
6110
|
|
|
|
|
|
|
goto _out; |
|
6111
|
|
|
|
|
|
|
_resume: |
|
6112
|
0
|
|
|
|
|
|
_keys = _NNPS_trans_keys + _NNPS_key_offsets[cs]; |
|
6113
|
0
|
|
|
|
|
|
_trans = _NNPS_index_offsets[cs]; |
|
6114
|
|
|
|
|
|
|
|
|
6115
|
0
|
|
|
|
|
|
_klen = _NNPS_single_lengths[cs]; |
|
6116
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
6117
|
|
|
|
|
|
|
const char *_lower = _keys; |
|
6118
|
|
|
|
|
|
|
const char *_mid; |
|
6119
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
|
6120
|
|
|
|
|
|
|
while (1) { |
|
6121
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
6122
|
|
|
|
|
|
|
break; |
|
6123
|
|
|
|
|
|
|
|
|
6124
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
|
6125
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid ) |
|
6126
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
|
6127
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid ) |
|
6128
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
|
6129
|
|
|
|
|
|
|
else { |
|
6130
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
|
6131
|
0
|
|
|
|
|
|
goto _match; |
|
6132
|
|
|
|
|
|
|
} |
|
6133
|
|
|
|
|
|
|
} |
|
6134
|
0
|
|
|
|
|
|
_keys += _klen; |
|
6135
|
0
|
|
|
|
|
|
_trans += _klen; |
|
6136
|
|
|
|
|
|
|
} |
|
6137
|
|
|
|
|
|
|
|
|
6138
|
0
|
|
|
|
|
|
_klen = _NNPS_range_lengths[cs]; |
|
6139
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
6140
|
|
|
|
|
|
|
const char *_lower = _keys; |
|
6141
|
|
|
|
|
|
|
const char *_mid; |
|
6142
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
|
6143
|
|
|
|
|
|
|
while (1) { |
|
6144
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
6145
|
|
|
|
|
|
|
break; |
|
6146
|
|
|
|
|
|
|
|
|
6147
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
|
6148
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] ) |
|
6149
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
|
6150
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] ) |
|
6151
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
|
6152
|
|
|
|
|
|
|
else { |
|
6153
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
|
6154
|
0
|
|
|
|
|
|
goto _match; |
|
6155
|
|
|
|
|
|
|
} |
|
6156
|
|
|
|
|
|
|
} |
|
6157
|
0
|
|
|
|
|
|
_trans += _klen; |
|
6158
|
|
|
|
|
|
|
} |
|
6159
|
|
|
|
|
|
|
|
|
6160
|
|
|
|
|
|
|
_match: |
|
6161
|
0
|
|
|
|
|
|
_trans = _NNPS_indicies[_trans]; |
|
6162
|
0
|
|
|
|
|
|
cs = _NNPS_trans_targs[_trans]; |
|
6163
|
|
|
|
|
|
|
|
|
6164
|
0
|
0
|
|
|
|
|
if ( _NNPS_trans_actions[_trans] == 0 ) |
|
6165
|
|
|
|
|
|
|
goto _again; |
|
6166
|
|
|
|
|
|
|
|
|
6167
|
0
|
|
|
|
|
|
_acts = _NNPS_actions + _NNPS_trans_actions[_trans]; |
|
6168
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
|
6169
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
|
6170
|
|
|
|
|
|
|
{ |
|
6171
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
|
6172
|
|
|
|
|
|
|
{ |
|
6173
|
|
|
|
|
|
|
case 0: |
|
6174
|
0
|
0
|
|
|
|
|
{ if (best > 'a') best = 'a', remove = 2, append = "AN"; } |
|
6175
|
|
|
|
|
|
|
break; |
|
6176
|
|
|
|
|
|
|
case 1: |
|
6177
|
0
|
0
|
|
|
|
|
{ if (best > 'b') best = 'b', remove = 2, append = "an"; } |
|
6178
|
|
|
|
|
|
|
break; |
|
6179
|
|
|
|
|
|
|
case 2: |
|
6180
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 1, append = nullptr; } |
|
6181
|
|
|
|
|
|
|
break; |
|
6182
|
|
|
|
|
|
|
case 3: |
|
6183
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 3, append = "FE"; } |
|
6184
|
|
|
|
|
|
|
break; |
|
6185
|
|
|
|
|
|
|
case 4: |
|
6186
|
0
|
0
|
|
|
|
|
{ if (best > 'e') best = 'e', remove = 3, append = "fe"; } |
|
6187
|
|
|
|
|
|
|
break; |
|
6188
|
|
|
|
|
|
|
case 5: |
|
6189
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 2, append = nullptr; } |
|
6190
|
|
|
|
|
|
|
break; |
|
6191
|
|
|
|
|
|
|
case 6: |
|
6192
|
0
|
0
|
|
|
|
|
{ if (best > 'g') best = 'g', remove = 1, append = nullptr; } |
|
6193
|
|
|
|
|
|
|
break; |
|
6194
|
|
|
|
|
|
|
case 7: |
|
6195
|
0
|
0
|
|
|
|
|
{ if (best > 'h') best = 'h', remove = 2, append = nullptr; } |
|
6196
|
|
|
|
|
|
|
break; |
|
6197
|
|
|
|
|
|
|
case 8: |
|
6198
|
0
|
0
|
|
|
|
|
{ if (best > 'i') best = 'i', remove = 1, append = nullptr; } |
|
6199
|
|
|
|
|
|
|
break; |
|
6200
|
|
|
|
|
|
|
case 9: |
|
6201
|
0
|
0
|
|
|
|
|
{ if (best > 'j') best = 'j', remove = 2, append = nullptr; } |
|
6202
|
|
|
|
|
|
|
break; |
|
6203
|
|
|
|
|
|
|
case 10: |
|
6204
|
0
|
0
|
|
|
|
|
{ if (best > 'k') best = 'k', remove = 1, append = nullptr; } |
|
6205
|
|
|
|
|
|
|
break; |
|
6206
|
|
|
|
|
|
|
case 11: |
|
6207
|
0
|
0
|
|
|
|
|
{ if (best > 'l') best = 'l', remove = 1, append = nullptr; } |
|
6208
|
|
|
|
|
|
|
break; |
|
6209
|
|
|
|
|
|
|
case 12: |
|
6210
|
0
|
0
|
|
|
|
|
{ if (best > 'm') best = 'm', remove = 2, append = nullptr; } |
|
6211
|
|
|
|
|
|
|
break; |
|
6212
|
|
|
|
|
|
|
case 13: |
|
6213
|
0
|
0
|
|
|
|
|
{ if (best > 'n') best = 'n', remove = 3, append = "Y"; } |
|
6214
|
|
|
|
|
|
|
break; |
|
6215
|
|
|
|
|
|
|
case 14: |
|
6216
|
0
|
0
|
|
|
|
|
{ if (best > 'o') best = 'o', remove = 3, append = "y"; } |
|
6217
|
|
|
|
|
|
|
break; |
|
6218
|
|
|
|
|
|
|
case 15: |
|
6219
|
0
|
0
|
|
|
|
|
{ if (best > 'p') best = 'p', remove = 2, append = nullptr; } |
|
6220
|
|
|
|
|
|
|
break; |
|
6221
|
|
|
|
|
|
|
case 16: |
|
6222
|
0
|
0
|
|
|
|
|
{ if (best > 'q') best = 'q', remove = 1, append = nullptr; } |
|
6223
|
|
|
|
|
|
|
break; |
|
6224
|
|
|
|
|
|
|
} |
|
6225
|
|
|
|
|
|
|
} |
|
6226
|
|
|
|
|
|
|
|
|
6227
|
|
|
|
|
|
|
_again: |
|
6228
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
|
6229
|
|
|
|
|
|
|
goto _out; |
|
6230
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form.c_str() + form.size())) ) |
|
6231
|
|
|
|
|
|
|
goto _resume; |
|
6232
|
|
|
|
|
|
|
_test_eof: {} |
|
6233
|
|
|
|
|
|
|
_out: {} |
|
6234
|
|
|
|
|
|
|
} |
|
6235
|
|
|
|
|
|
|
|
|
6236
|
0
|
0
|
|
|
|
|
add(NNPS, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas); |
|
|
|
0
|
|
|
|
|
|
|
6237
|
0
|
|
|
|
|
|
} |
|
6238
|
|
|
|
|
|
|
|
|
6239
|
|
|
|
|
|
|
static const char _VBG_actions[] = { |
|
6240
|
|
|
|
|
|
|
0, 1, 1, 1, 2, 1, 4, 1, |
|
6241
|
|
|
|
|
|
|
5, 1, 6, 1, 7, 1, 9, 1, |
|
6242
|
|
|
|
|
|
|
10, 1, 11, 1, 12, 1, 13, 1, |
|
6243
|
|
|
|
|
|
|
14, 1, 15, 1, 16, 1, 17, 2, |
|
6244
|
|
|
|
|
|
|
0, 12, 2, 3, 4, 2, 5, 9, |
|
6245
|
|
|
|
|
|
|
2, 5, 10, 2, 8, 9, 2, 9, |
|
6246
|
|
|
|
|
|
|
10, 2, 11, 12, 3, 0, 2, 12, |
|
6247
|
|
|
|
|
|
|
3, 2, 11, 12 |
|
6248
|
|
|
|
|
|
|
}; |
|
6249
|
|
|
|
|
|
|
|
|
6250
|
|
|
|
|
|
|
static const short _VBG_key_offsets[] = { |
|
6251
|
|
|
|
|
|
|
0, 0, 1, 2, 3, 9, 14, 24, |
|
6252
|
|
|
|
|
|
|
29, 34, 44, 46, 47, 48, 49, 50, |
|
6253
|
|
|
|
|
|
|
51, 52, 59, 66, 68, 70, 71, 72, |
|
6254
|
|
|
|
|
|
|
73, 74, 75, 76, 81, 89, 90, 91, |
|
6255
|
|
|
|
|
|
|
92, 93, 94, 96, 97, 98, 99, 100, |
|
6256
|
|
|
|
|
|
|
101, 102, 127, 127, 136, 137, 142, 153, |
|
6257
|
|
|
|
|
|
|
162, 171, 181, 186, 191, 197, 207, 207, |
|
6258
|
|
|
|
|
|
|
216, 228, 229, 240, 240, 249, 258, 267, |
|
6259
|
|
|
|
|
|
|
276, 285, 290, 302, 313, 318, 324, 334, |
|
6260
|
|
|
|
|
|
|
344, 355, 362, 373, 382, 391, 391, 402, |
|
6261
|
|
|
|
|
|
|
413, 415, 416, 417, 417, 418, 426, 437, |
|
6262
|
|
|
|
|
|
|
442, 448, 458, 468, 479, 486, 497, 504, |
|
6263
|
|
|
|
|
|
|
510, 519, 528, 537, 543 |
|
6264
|
|
|
|
|
|
|
}; |
|
6265
|
|
|
|
|
|
|
|
|
6266
|
|
|
|
|
|
|
static const char _VBG_trans_keys[] = { |
|
6267
|
|
|
|
|
|
|
103, 110, 105, 97, 101, 105, 111, 117, |
|
6268
|
|
|
|
|
|
|
121, 97, 101, 105, 111, 117, 98, 100, |
|
6269
|
|
|
|
|
|
|
102, 104, 106, 110, 112, 116, 118, 122, |
|
6270
|
|
|
|
|
|
|
97, 101, 105, 111, 117, 97, 101, 105, |
|
6271
|
|
|
|
|
|
|
111, 117, 98, 100, 102, 104, 106, 110, |
|
6272
|
|
|
|
|
|
|
112, 116, 118, 122, 98, 114, 105, 114, |
|
6273
|
|
|
|
|
|
|
112, 105, 109, 101, 97, 101, 105, 111, |
|
6274
|
|
|
|
|
|
|
117, 98, 122, 97, 101, 105, 111, 117, |
|
6275
|
|
|
|
|
|
|
98, 122, 97, 122, 98, 114, 105, 114, |
|
6276
|
|
|
|
|
|
|
112, 105, 109, 101, 97, 101, 105, 111, |
|
6277
|
|
|
|
|
|
|
117, 97, 101, 105, 110, 111, 115, 117, |
|
6278
|
|
|
|
|
|
|
120, 105, 112, 105, 109, 101, 98, 114, |
|
6279
|
|
|
|
|
|
|
105, 114, 112, 105, 109, 101, 98, 99, |
|
6280
|
|
|
|
|
|
|
100, 102, 103, 104, 106, 107, 108, 109, |
|
6281
|
|
|
|
|
|
|
110, 111, 112, 113, 114, 115, 116, 117, |
|
6282
|
|
|
|
|
|
|
118, 119, 120, 121, 122, 97, 105, 97, |
|
6283
|
|
|
|
|
|
|
98, 101, 105, 111, 117, 122, 99, 120, |
|
6284
|
|
|
|
|
|
|
113, 97, 101, 105, 111, 117, 98, 99, |
|
6285
|
|
|
|
|
|
|
100, 105, 111, 117, 122, 97, 101, 102, |
|
6286
|
|
|
|
|
|
|
120, 97, 100, 101, 105, 111, 117, 122, |
|
6287
|
|
|
|
|
|
|
98, 120, 97, 101, 102, 105, 111, 117, |
|
6288
|
|
|
|
|
|
|
122, 98, 120, 97, 101, 103, 105, 110, |
|
6289
|
|
|
|
|
|
|
111, 117, 122, 98, 120, 97, 101, 105, |
|
6290
|
|
|
|
|
|
|
111, 117, 101, 110, 111, 115, 120, 101, |
|
6291
|
|
|
|
|
|
|
110, 111, 112, 115, 120, 97, 101, 104, |
|
6292
|
|
|
|
|
|
|
105, 111, 116, 117, 122, 98, 120, 97, |
|
6293
|
|
|
|
|
|
|
101, 105, 106, 111, 117, 122, 98, 120, |
|
6294
|
|
|
|
|
|
|
98, 99, 100, 105, 107, 111, 117, 122, |
|
6295
|
|
|
|
|
|
|
97, 101, 102, 120, 105, 97, 101, 105, |
|
6296
|
|
|
|
|
|
|
108, 111, 114, 117, 119, 122, 98, 120, |
|
6297
|
|
|
|
|
|
|
97, 101, 105, 109, 111, 117, 122, 98, |
|
6298
|
|
|
|
|
|
|
120, 97, 101, 105, 110, 111, 117, 122, |
|
6299
|
|
|
|
|
|
|
98, 120, 97, 101, 105, 111, 112, 117, |
|
6300
|
|
|
|
|
|
|
122, 98, 120, 97, 101, 105, 111, 113, |
|
6301
|
|
|
|
|
|
|
117, 122, 98, 120, 97, 101, 105, 111, |
|
6302
|
|
|
|
|
|
|
114, 117, 122, 98, 120, 97, 101, 105, |
|
6303
|
|
|
|
|
|
|
111, 117, 98, 99, 100, 105, 108, 111, |
|
6304
|
|
|
|
|
|
|
116, 117, 97, 101, 102, 122, 101, 110, |
|
6305
|
|
|
|
|
|
|
111, 115, 120, 98, 104, 106, 116, 118, |
|
6306
|
|
|
|
|
|
|
122, 101, 110, 111, 115, 120, 101, 110, |
|
6307
|
|
|
|
|
|
|
111, 112, 115, 120, 101, 105, 110, 111, |
|
6308
|
|
|
|
|
|
|
115, 120, 98, 116, 118, 122, 101, 105, |
|
6309
|
|
|
|
|
|
|
110, 111, 115, 120, 98, 116, 118, 122, |
|
6310
|
|
|
|
|
|
|
101, 110, 111, 115, 120, 98, 104, 106, |
|
6311
|
|
|
|
|
|
|
116, 118, 122, 98, 101, 110, 111, 114, |
|
6312
|
|
|
|
|
|
|
115, 120, 101, 110, 111, 115, 120, 98, |
|
6313
|
|
|
|
|
|
|
104, 106, 116, 118, 122, 97, 101, 105, |
|
6314
|
|
|
|
|
|
|
111, 115, 117, 122, 98, 120, 97, 101, |
|
6315
|
|
|
|
|
|
|
105, 111, 116, 117, 122, 98, 120, 122, |
|
6316
|
|
|
|
|
|
|
98, 100, 102, 104, 106, 110, 112, 116, |
|
6317
|
|
|
|
|
|
|
118, 120, 122, 98, 100, 102, 104, 106, |
|
6318
|
|
|
|
|
|
|
110, 112, 116, 118, 120, 98, 114, 112, |
|
6319
|
|
|
|
|
|
|
114, 113, 97, 101, 105, 108, 111, 117, |
|
6320
|
|
|
|
|
|
|
98, 122, 101, 110, 111, 115, 120, 98, |
|
6321
|
|
|
|
|
|
|
104, 106, 116, 118, 122, 101, 110, 111, |
|
6322
|
|
|
|
|
|
|
115, 120, 101, 110, 111, 112, 115, 120, |
|
6323
|
|
|
|
|
|
|
101, 105, 110, 111, 115, 120, 98, 116, |
|
6324
|
|
|
|
|
|
|
118, 122, 101, 105, 110, 111, 115, 120, |
|
6325
|
|
|
|
|
|
|
98, 116, 118, 122, 101, 110, 111, 115, |
|
6326
|
|
|
|
|
|
|
120, 98, 104, 106, 116, 118, 122, 98, |
|
6327
|
|
|
|
|
|
|
101, 110, 111, 114, 115, 120, 101, 110, |
|
6328
|
|
|
|
|
|
|
111, 115, 120, 98, 104, 106, 116, 118, |
|
6329
|
|
|
|
|
|
|
122, 97, 101, 105, 111, 117, 98, 122, |
|
6330
|
|
|
|
|
|
|
97, 101, 105, 111, 117, 121, 97, 101, |
|
6331
|
|
|
|
|
|
|
105, 111, 117, 118, 122, 98, 120, 97, |
|
6332
|
|
|
|
|
|
|
101, 105, 111, 117, 119, 122, 98, 120, |
|
6333
|
|
|
|
|
|
|
97, 101, 105, 111, 117, 120, 122, 98, |
|
6334
|
|
|
|
|
|
|
119, 97, 101, 105, 111, 117, 121, 97, |
|
6335
|
|
|
|
|
|
|
101, 105, 111, 117, 121, 122, 98, 120, |
|
6336
|
|
|
|
|
|
|
0 |
|
6337
|
|
|
|
|
|
|
}; |
|
6338
|
|
|
|
|
|
|
|
|
6339
|
|
|
|
|
|
|
static const char _VBG_single_lengths[] = { |
|
6340
|
|
|
|
|
|
|
0, 1, 1, 1, 6, 5, 0, 5, |
|
6341
|
|
|
|
|
|
|
5, 0, 2, 1, 1, 1, 1, 1, |
|
6342
|
|
|
|
|
|
|
1, 5, 5, 0, 2, 1, 1, 1, |
|
6343
|
|
|
|
|
|
|
1, 1, 1, 5, 8, 1, 1, 1, |
|
6344
|
|
|
|
|
|
|
1, 1, 2, 1, 1, 1, 1, 1, |
|
6345
|
|
|
|
|
|
|
1, 23, 0, 7, 1, 5, 7, 7, |
|
6346
|
|
|
|
|
|
|
7, 8, 5, 5, 6, 8, 0, 7, |
|
6347
|
|
|
|
|
|
|
8, 1, 9, 0, 7, 7, 7, 7, |
|
6348
|
|
|
|
|
|
|
7, 5, 8, 5, 5, 6, 6, 6, |
|
6349
|
|
|
|
|
|
|
5, 7, 5, 7, 7, 0, 1, 1, |
|
6350
|
|
|
|
|
|
|
2, 1, 1, 0, 1, 6, 5, 5, |
|
6351
|
|
|
|
|
|
|
6, 6, 6, 5, 7, 5, 5, 6, |
|
6352
|
|
|
|
|
|
|
7, 7, 7, 6, 7 |
|
6353
|
|
|
|
|
|
|
}; |
|
6354
|
|
|
|
|
|
|
|
|
6355
|
|
|
|
|
|
|
static const char _VBG_range_lengths[] = { |
|
6356
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 5, 0, |
|
6357
|
|
|
|
|
|
|
0, 5, 0, 0, 0, 0, 0, 0, |
|
6358
|
|
|
|
|
|
|
0, 1, 1, 1, 0, 0, 0, 0, |
|
6359
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
6360
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
6361
|
|
|
|
|
|
|
0, 1, 0, 1, 0, 0, 2, 1, |
|
6362
|
|
|
|
|
|
|
1, 1, 0, 0, 0, 1, 0, 1, |
|
6363
|
|
|
|
|
|
|
2, 0, 1, 0, 1, 1, 1, 1, |
|
6364
|
|
|
|
|
|
|
1, 0, 2, 3, 0, 0, 2, 2, |
|
6365
|
|
|
|
|
|
|
3, 0, 3, 1, 1, 0, 5, 5, |
|
6366
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 1, 3, 0, |
|
6367
|
|
|
|
|
|
|
0, 2, 2, 3, 0, 3, 1, 0, |
|
6368
|
|
|
|
|
|
|
1, 1, 1, 0, 1 |
|
6369
|
|
|
|
|
|
|
}; |
|
6370
|
|
|
|
|
|
|
|
|
6371
|
|
|
|
|
|
|
static const short _VBG_index_offsets[] = { |
|
6372
|
|
|
|
|
|
|
0, 0, 2, 4, 6, 13, 19, 25, |
|
6373
|
|
|
|
|
|
|
31, 37, 43, 46, 48, 50, 52, 54, |
|
6374
|
|
|
|
|
|
|
56, 58, 65, 72, 74, 77, 79, 81, |
|
6375
|
|
|
|
|
|
|
83, 85, 87, 89, 95, 104, 106, 108, |
|
6376
|
|
|
|
|
|
|
110, 112, 114, 117, 119, 121, 123, 125, |
|
6377
|
|
|
|
|
|
|
127, 129, 154, 155, 164, 166, 172, 182, |
|
6378
|
|
|
|
|
|
|
191, 200, 210, 216, 222, 229, 239, 240, |
|
6379
|
|
|
|
|
|
|
249, 260, 262, 273, 274, 283, 292, 301, |
|
6380
|
|
|
|
|
|
|
310, 319, 325, 336, 345, 351, 358, 367, |
|
6381
|
|
|
|
|
|
|
376, 385, 393, 402, 411, 420, 421, 428, |
|
6382
|
|
|
|
|
|
|
435, 438, 440, 442, 443, 445, 453, 462, |
|
6383
|
|
|
|
|
|
|
468, 475, 484, 493, 502, 510, 519, 526, |
|
6384
|
|
|
|
|
|
|
533, 542, 551, 560, 567 |
|
6385
|
|
|
|
|
|
|
}; |
|
6386
|
|
|
|
|
|
|
|
|
6387
|
|
|
|
|
|
|
static const unsigned char _VBG_indicies[] = { |
|
6388
|
|
|
|
|
|
|
0, 1, 2, 1, 3, 1, 4, 4, |
|
6389
|
|
|
|
|
|
|
4, 4, 4, 4, 1, 5, 5, 5, |
|
6390
|
|
|
|
|
|
|
5, 6, 1, 7, 7, 7, 7, 7, |
|
6391
|
|
|
|
|
|
|
1, 8, 8, 8, 8, 9, 1, 5, |
|
6392
|
|
|
|
|
|
|
5, 5, 5, 10, 1, 11, 11, 11, |
|
6393
|
|
|
|
|
|
|
11, 11, 1, 11, 12, 1, 11, 1, |
|
6394
|
|
|
|
|
|
|
13, 1, 11, 1, 14, 1, 11, 1, |
|
6395
|
|
|
|
|
|
|
11, 1, 5, 5, 5, 5, 6, 15, |
|
6396
|
|
|
|
|
|
|
1, 5, 5, 5, 5, 6, 16, 1, |
|
6397
|
|
|
|
|
|
|
4, 1, 17, 18, 1, 17, 1, 19, |
|
6398
|
|
|
|
|
|
|
1, 17, 1, 20, 1, 17, 1, 17, |
|
6399
|
|
|
|
|
|
|
1, 21, 22, 21, 23, 24, 1, 25, |
|
6400
|
|
|
|
|
|
|
26, 25, 27, 28, 29, 25, 30, 1, |
|
6401
|
|
|
|
|
|
|
31, 1, 31, 1, 32, 1, 31, 1, |
|
6402
|
|
|
|
|
|
|
31, 1, 33, 34, 1, 33, 1, 35, |
|
6403
|
|
|
|
|
|
|
1, 33, 1, 36, 1, 33, 1, 33, |
|
6404
|
|
|
|
|
|
|
1, 38, 39, 40, 41, 42, 43, 44, |
|
6405
|
|
|
|
|
|
|
45, 46, 47, 48, 49, 50, 51, 52, |
|
6406
|
|
|
|
|
|
|
53, 54, 55, 56, 57, 58, 59, 60, |
|
6407
|
|
|
|
|
|
|
37, 1, 1, 61, 62, 61, 61, 61, |
|
6408
|
|
|
|
|
|
|
61, 63, 63, 1, 64, 1, 65, 65, |
|
6409
|
|
|
|
|
|
|
65, 65, 65, 1, 67, 68, 67, 66, |
|
6410
|
|
|
|
|
|
|
66, 66, 67, 66, 67, 1, 69, 62, |
|
6411
|
|
|
|
|
|
|
69, 69, 69, 69, 63, 63, 1, 61, |
|
6412
|
|
|
|
|
|
|
61, 62, 61, 61, 61, 63, 63, 1, |
|
6413
|
|
|
|
|
|
|
66, 66, 68, 66, 70, 66, 66, 67, |
|
6414
|
|
|
|
|
|
|
67, 1, 71, 71, 71, 71, 71, 1, |
|
6415
|
|
|
|
|
|
|
72, 73, 74, 75, 76, 1, 72, 73, |
|
6416
|
|
|
|
|
|
|
74, 11, 75, 76, 1, 61, 61, 62, |
|
6417
|
|
|
|
|
|
|
61, 61, 77, 61, 63, 63, 1, 78, |
|
6418
|
|
|
|
|
|
|
61, 61, 61, 62, 61, 61, 63, 63, |
|
6419
|
|
|
|
|
|
|
1, 63, 79, 63, 61, 62, 61, 61, |
|
6420
|
|
|
|
|
|
|
63, 61, 63, 1, 7, 1, 61, 61, |
|
6421
|
|
|
|
|
|
|
61, 68, 61, 80, 61, 80, 67, 67, |
|
6422
|
|
|
|
|
|
|
1, 5, 61, 61, 61, 62, 61, 61, |
|
6423
|
|
|
|
|
|
|
63, 63, 1, 81, 81, 82, 62, 81, |
|
6424
|
|
|
|
|
|
|
81, 63, 63, 1, 81, 81, 81, 81, |
|
6425
|
|
|
|
|
|
|
62, 81, 63, 63, 1, 61, 61, 61, |
|
6426
|
|
|
|
|
|
|
61, 62, 61, 63, 63, 1, 61, 83, |
|
6427
|
|
|
|
|
|
|
61, 84, 62, 61, 63, 63, 1, 5, |
|
6428
|
|
|
|
|
|
|
5, 5, 5, 6, 1, 85, 86, 85, |
|
6429
|
|
|
|
|
|
|
5, 86, 5, 86, 6, 5, 85, 1, |
|
6430
|
|
|
|
|
|
|
87, 88, 89, 90, 91, 85, 85, 85, |
|
6431
|
|
|
|
|
|
|
1, 87, 92, 89, 93, 94, 1, 87, |
|
6432
|
|
|
|
|
|
|
92, 89, 17, 93, 94, 1, 87, 17, |
|
6433
|
|
|
|
|
|
|
88, 89, 90, 91, 85, 85, 1, 87, |
|
6434
|
|
|
|
|
|
|
20, 88, 89, 90, 91, 85, 85, 1, |
|
6435
|
|
|
|
|
|
|
95, 88, 89, 90, 91, 85, 85, 85, |
|
6436
|
|
|
|
|
|
|
1, 17, 87, 92, 89, 18, 93, 94, |
|
6437
|
|
|
|
|
|
|
1, 87, 97, 89, 98, 99, 96, 96, |
|
6438
|
|
|
|
|
|
|
96, 1, 66, 66, 66, 66, 100, 66, |
|
6439
|
|
|
|
|
|
|
67, 67, 1, 101, 102, 103, 61, 62, |
|
6440
|
|
|
|
|
|
|
61, 63, 63, 1, 104, 106, 106, 106, |
|
6441
|
|
|
|
|
|
|
106, 106, 106, 105, 107, 107, 107, 107, |
|
6442
|
|
|
|
|
|
|
107, 107, 1, 31, 108, 1, 31, 1, |
|
6443
|
|
|
|
|
|
|
109, 1, 105, 110, 104, 5, 5, 5, |
|
6444
|
|
|
|
|
|
|
112, 5, 6, 111, 1, 113, 114, 115, |
|
6445
|
|
|
|
|
|
|
116, 117, 111, 111, 111, 1, 113, 118, |
|
6446
|
|
|
|
|
|
|
115, 119, 120, 1, 113, 118, 115, 33, |
|
6447
|
|
|
|
|
|
|
119, 120, 1, 113, 33, 114, 115, 116, |
|
6448
|
|
|
|
|
|
|
117, 111, 111, 1, 113, 36, 114, 115, |
|
6449
|
|
|
|
|
|
|
116, 117, 111, 111, 1, 121, 114, 115, |
|
6450
|
|
|
|
|
|
|
116, 117, 111, 111, 111, 1, 33, 113, |
|
6451
|
|
|
|
|
|
|
118, 115, 34, 119, 120, 1, 113, 123, |
|
6452
|
|
|
|
|
|
|
115, 124, 125, 122, 122, 122, 1, 5, |
|
6453
|
|
|
|
|
|
|
5, 5, 5, 6, 111, 1, 4, 4, |
|
6454
|
|
|
|
|
|
|
4, 4, 4, 4, 1, 66, 66, 66, |
|
6455
|
|
|
|
|
|
|
66, 66, 68, 67, 67, 1, 81, 81, |
|
6456
|
|
|
|
|
|
|
81, 81, 81, 62, 63, 63, 1, 81, |
|
6457
|
|
|
|
|
|
|
81, 81, 81, 81, 62, 63, 63, 1, |
|
6458
|
|
|
|
|
|
|
126, 126, 126, 126, 126, 4, 1, 127, |
|
6459
|
|
|
|
|
|
|
127, 127, 127, 127, 129, 130, 128, 1, |
|
6460
|
|
|
|
|
|
|
0 |
|
6461
|
|
|
|
|
|
|
}; |
|
6462
|
|
|
|
|
|
|
|
|
6463
|
|
|
|
|
|
|
static const char _VBG_trans_targs[] = { |
|
6464
|
|
|
|
|
|
|
2, 0, 3, 41, 42, 42, 44, 42, |
|
6465
|
|
|
|
|
|
|
42, 44, 44, 51, 52, 13, 15, 42, |
|
6466
|
|
|
|
|
|
|
42, 68, 69, 23, 25, 77, 78, 83, |
|
6467
|
|
|
|
|
|
|
84, 42, 80, 29, 82, 31, 33, 42, |
|
6468
|
|
|
|
|
|
|
32, 87, 88, 37, 39, 4, 43, 46, |
|
6469
|
|
|
|
|
|
|
47, 48, 49, 53, 55, 56, 58, 60, |
|
6470
|
|
|
|
|
|
|
61, 19, 62, 63, 64, 75, 76, 95, |
|
6471
|
|
|
|
|
|
|
96, 97, 98, 99, 100, 5, 45, 42, |
|
6472
|
|
|
|
|
|
|
42, 6, 7, 42, 45, 8, 50, 9, |
|
6473
|
|
|
|
|
|
|
10, 11, 12, 14, 16, 54, 42, 57, |
|
6474
|
|
|
|
|
|
|
59, 17, 18, 65, 66, 67, 74, 20, |
|
6475
|
|
|
|
|
|
|
70, 22, 71, 72, 21, 24, 26, 73, |
|
6476
|
|
|
|
|
|
|
67, 70, 71, 72, 45, 27, 85, 94, |
|
6477
|
|
|
|
|
|
|
42, 42, 79, 28, 81, 30, 42, 86, |
|
6478
|
|
|
|
|
|
|
93, 34, 89, 36, 90, 91, 35, 38, |
|
6479
|
|
|
|
|
|
|
40, 92, 86, 89, 90, 91, 65, 65, |
|
6480
|
|
|
|
|
|
|
42, 42, 45 |
|
6481
|
|
|
|
|
|
|
}; |
|
6482
|
|
|
|
|
|
|
|
|
6483
|
|
|
|
|
|
|
static const char _VBG_trans_actions[] = { |
|
6484
|
|
|
|
|
|
|
0, 0, 0, 29, 23, 15, 15, 3, |
|
6485
|
|
|
|
|
|
|
46, 46, 40, 0, 0, 0, 0, 5, |
|
6486
|
|
|
|
|
|
|
34, 0, 0, 0, 0, 15, 15, 15, |
|
6487
|
|
|
|
|
|
|
15, 11, 11, 0, 11, 0, 0, 9, |
|
6488
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
6489
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
6490
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 21, |
|
6491
|
|
|
|
|
|
|
0, 0, 0, 23, 0, 0, 19, 19, |
|
6492
|
|
|
|
|
|
|
7, 0, 0, 49, 49, 0, 49, 0, |
|
6493
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 19, 17, 19, |
|
6494
|
|
|
|
|
|
|
49, 0, 0, 27, 27, 0, 0, 0, |
|
6495
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
6496
|
|
|
|
|
|
|
25, 25, 25, 25, 56, 0, 9, 9, |
|
6497
|
|
|
|
|
|
|
13, 43, 43, 0, 9, 0, 37, 0, |
|
6498
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
6499
|
|
|
|
|
|
|
0, 0, 7, 7, 7, 7, 23, 1, |
|
6500
|
|
|
|
|
|
|
31, 1, 52 |
|
6501
|
|
|
|
|
|
|
}; |
|
6502
|
|
|
|
|
|
|
|
|
6503
|
|
|
|
|
|
|
static const char _VBG_eof_actions[] = { |
|
6504
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
6505
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
6506
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
6507
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
6508
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
6509
|
|
|
|
|
|
|
0, 0, 0, 3, 0, 0, 3, 3, |
|
6510
|
|
|
|
|
|
|
3, 3, 0, 3, 3, 3, 0, 3, |
|
6511
|
|
|
|
|
|
|
3, 0, 3, 0, 3, 3, 3, 3, |
|
6512
|
|
|
|
|
|
|
3, 0, 0, 25, 25, 25, 25, 25, |
|
6513
|
|
|
|
|
|
|
25, 25, 25, 3, 3, 0, 0, 0, |
|
6514
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 7, 7, |
|
6515
|
|
|
|
|
|
|
7, 7, 7, 7, 7, 7, 0, 0, |
|
6516
|
|
|
|
|
|
|
3, 3, 3, 0, 3 |
|
6517
|
|
|
|
|
|
|
}; |
|
6518
|
|
|
|
|
|
|
|
|
6519
|
|
|
|
|
|
|
static const int VBG_start = 1; |
|
6520
|
|
|
|
|
|
|
|
|
6521
|
0
|
|
|
|
|
|
void english_morpho_guesser::add_VBG(const string& form, vector& lemmas) const { |
|
6522
|
|
|
|
|
|
|
const char* p = form.c_str(); int cs; |
|
6523
|
|
|
|
|
|
|
char best = 'z'; unsigned remove = 0; const char* append = nullptr; |
|
6524
|
|
|
|
|
|
|
|
|
6525
|
|
|
|
|
|
|
{ |
|
6526
|
|
|
|
|
|
|
cs = VBG_start; |
|
6527
|
|
|
|
|
|
|
} |
|
6528
|
|
|
|
|
|
|
|
|
6529
|
|
|
|
|
|
|
{ |
|
6530
|
|
|
|
|
|
|
int _klen; |
|
6531
|
|
|
|
|
|
|
unsigned int _trans; |
|
6532
|
|
|
|
|
|
|
const char *_acts; |
|
6533
|
|
|
|
|
|
|
unsigned int _nacts; |
|
6534
|
|
|
|
|
|
|
const char *_keys; |
|
6535
|
|
|
|
|
|
|
|
|
6536
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
|
6537
|
|
|
|
|
|
|
goto _test_eof; |
|
6538
|
|
|
|
|
|
|
if ( cs == 0 ) |
|
6539
|
|
|
|
|
|
|
goto _out; |
|
6540
|
|
|
|
|
|
|
_resume: |
|
6541
|
0
|
|
|
|
|
|
_keys = _VBG_trans_keys + _VBG_key_offsets[cs]; |
|
6542
|
0
|
|
|
|
|
|
_trans = _VBG_index_offsets[cs]; |
|
6543
|
|
|
|
|
|
|
|
|
6544
|
0
|
|
|
|
|
|
_klen = _VBG_single_lengths[cs]; |
|
6545
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
6546
|
|
|
|
|
|
|
const char *_lower = _keys; |
|
6547
|
|
|
|
|
|
|
const char *_mid; |
|
6548
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
|
6549
|
|
|
|
|
|
|
while (1) { |
|
6550
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
6551
|
|
|
|
|
|
|
break; |
|
6552
|
|
|
|
|
|
|
|
|
6553
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
|
6554
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid ) |
|
6555
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
|
6556
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid ) |
|
6557
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
|
6558
|
|
|
|
|
|
|
else { |
|
6559
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
|
6560
|
0
|
|
|
|
|
|
goto _match; |
|
6561
|
|
|
|
|
|
|
} |
|
6562
|
|
|
|
|
|
|
} |
|
6563
|
0
|
|
|
|
|
|
_keys += _klen; |
|
6564
|
0
|
|
|
|
|
|
_trans += _klen; |
|
6565
|
|
|
|
|
|
|
} |
|
6566
|
|
|
|
|
|
|
|
|
6567
|
0
|
|
|
|
|
|
_klen = _VBG_range_lengths[cs]; |
|
6568
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
6569
|
|
|
|
|
|
|
const char *_lower = _keys; |
|
6570
|
|
|
|
|
|
|
const char *_mid; |
|
6571
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
|
6572
|
|
|
|
|
|
|
while (1) { |
|
6573
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
6574
|
|
|
|
|
|
|
break; |
|
6575
|
|
|
|
|
|
|
|
|
6576
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
|
6577
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] ) |
|
6578
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
|
6579
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] ) |
|
6580
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
|
6581
|
|
|
|
|
|
|
else { |
|
6582
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
|
6583
|
0
|
|
|
|
|
|
goto _match; |
|
6584
|
|
|
|
|
|
|
} |
|
6585
|
|
|
|
|
|
|
} |
|
6586
|
0
|
|
|
|
|
|
_trans += _klen; |
|
6587
|
|
|
|
|
|
|
} |
|
6588
|
|
|
|
|
|
|
|
|
6589
|
|
|
|
|
|
|
_match: |
|
6590
|
0
|
|
|
|
|
|
_trans = _VBG_indicies[_trans]; |
|
6591
|
0
|
|
|
|
|
|
cs = _VBG_trans_targs[_trans]; |
|
6592
|
|
|
|
|
|
|
|
|
6593
|
0
|
0
|
|
|
|
|
if ( _VBG_trans_actions[_trans] == 0 ) |
|
6594
|
|
|
|
|
|
|
goto _again; |
|
6595
|
|
|
|
|
|
|
|
|
6596
|
0
|
|
|
|
|
|
_acts = _VBG_actions + _VBG_trans_actions[_trans]; |
|
6597
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
|
6598
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
|
6599
|
|
|
|
|
|
|
{ |
|
6600
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
|
6601
|
|
|
|
|
|
|
{ |
|
6602
|
|
|
|
|
|
|
case 0: |
|
6603
|
0
|
0
|
|
|
|
|
{ if (best > 'a') best = 'a', remove = 3, append = nullptr; } |
|
6604
|
|
|
|
|
|
|
break; |
|
6605
|
|
|
|
|
|
|
case 1: |
|
6606
|
0
|
0
|
|
|
|
|
{ if (best > 'b') best = 'b', remove = 3, append = "e"; } |
|
6607
|
|
|
|
|
|
|
break; |
|
6608
|
|
|
|
|
|
|
case 2: |
|
6609
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 3, append = nullptr; } |
|
6610
|
|
|
|
|
|
|
break; |
|
6611
|
|
|
|
|
|
|
case 3: |
|
6612
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 3, append = "e"; } |
|
6613
|
|
|
|
|
|
|
break; |
|
6614
|
|
|
|
|
|
|
case 4: |
|
6615
|
0
|
0
|
|
|
|
|
{ if (best > 'e') best = 'e', remove = 3, append = nullptr; } |
|
6616
|
|
|
|
|
|
|
break; |
|
6617
|
|
|
|
|
|
|
case 5: |
|
6618
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 3, append = "e"; } |
|
6619
|
|
|
|
|
|
|
break; |
|
6620
|
|
|
|
|
|
|
case 6: |
|
6621
|
0
|
0
|
|
|
|
|
{ if (best > 'g') best = 'g', remove = 3, append = nullptr; } |
|
6622
|
|
|
|
|
|
|
break; |
|
6623
|
|
|
|
|
|
|
case 7: |
|
6624
|
0
|
0
|
|
|
|
|
{ if (best > 'h') best = 'h', remove = 3, append = "e"; } |
|
6625
|
|
|
|
|
|
|
break; |
|
6626
|
|
|
|
|
|
|
case 8: |
|
6627
|
0
|
0
|
|
|
|
|
{ if (best > 'i') best = 'i', remove = 3, append = nullptr; } |
|
6628
|
|
|
|
|
|
|
break; |
|
6629
|
|
|
|
|
|
|
case 9: |
|
6630
|
0
|
0
|
|
|
|
|
{ if (best > 'j') best = 'j', remove = 3, append = "e"; } |
|
6631
|
|
|
|
|
|
|
break; |
|
6632
|
|
|
|
|
|
|
case 10: |
|
6633
|
0
|
0
|
|
|
|
|
{ if (best > 'k') best = 'k', remove = 3, append = nullptr; } |
|
6634
|
|
|
|
|
|
|
break; |
|
6635
|
|
|
|
|
|
|
case 11: |
|
6636
|
0
|
0
|
|
|
|
|
{ if (best > 'l') best = 'l', remove = 3, append = "e"; } |
|
6637
|
|
|
|
|
|
|
break; |
|
6638
|
|
|
|
|
|
|
case 12: |
|
6639
|
0
|
0
|
|
|
|
|
{ if (best > 'm') best = 'm', remove = 3, append = nullptr; } |
|
6640
|
|
|
|
|
|
|
break; |
|
6641
|
|
|
|
|
|
|
case 13: |
|
6642
|
0
|
0
|
|
|
|
|
{ if (best > 'n') best = 'n', remove = 3, append = "e"; } |
|
6643
|
|
|
|
|
|
|
break; |
|
6644
|
|
|
|
|
|
|
case 14: |
|
6645
|
0
|
0
|
|
|
|
|
{ if (best > 'o') best = 'o', remove = 3, append = nullptr; } |
|
6646
|
|
|
|
|
|
|
break; |
|
6647
|
|
|
|
|
|
|
case 15: |
|
6648
|
0
|
0
|
|
|
|
|
{ if (best > 'p') best = 'p', remove = 3, append = "e"; } |
|
6649
|
|
|
|
|
|
|
break; |
|
6650
|
|
|
|
|
|
|
case 16: |
|
6651
|
0
|
0
|
|
|
|
|
{ if (best > 'q') best = 'q', remove = 3, append = nullptr; } |
|
6652
|
|
|
|
|
|
|
break; |
|
6653
|
|
|
|
|
|
|
case 17: |
|
6654
|
0
|
0
|
|
|
|
|
{ if (best > 'r') best = 'r', remove = 3, append = "e"; } |
|
6655
|
|
|
|
|
|
|
break; |
|
6656
|
|
|
|
|
|
|
} |
|
6657
|
|
|
|
|
|
|
} |
|
6658
|
|
|
|
|
|
|
|
|
6659
|
|
|
|
|
|
|
_again: |
|
6660
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
|
6661
|
|
|
|
|
|
|
goto _out; |
|
6662
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form.c_str() + form.size())) ) |
|
6663
|
|
|
|
|
|
|
goto _resume; |
|
6664
|
|
|
|
|
|
|
_test_eof: {} |
|
6665
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
|
6666
|
|
|
|
|
|
|
{ |
|
6667
|
0
|
|
|
|
|
|
const char *__acts = _VBG_actions + _VBG_eof_actions[cs]; |
|
6668
|
0
|
|
|
|
|
|
unsigned int __nacts = (unsigned int) *__acts++; |
|
6669
|
0
|
0
|
|
|
|
|
while ( __nacts-- > 0 ) { |
|
6670
|
0
|
|
|
|
|
|
switch ( *__acts++ ) { |
|
6671
|
|
|
|
|
|
|
case 2: |
|
6672
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 3, append = nullptr; } |
|
6673
|
|
|
|
|
|
|
break; |
|
6674
|
|
|
|
|
|
|
case 5: |
|
6675
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 3, append = "e"; } |
|
6676
|
|
|
|
|
|
|
break; |
|
6677
|
|
|
|
|
|
|
case 15: |
|
6678
|
0
|
0
|
|
|
|
|
{ if (best > 'p') best = 'p', remove = 3, append = "e"; } |
|
6679
|
|
|
|
|
|
|
break; |
|
6680
|
|
|
|
|
|
|
} |
|
6681
|
|
|
|
|
|
|
} |
|
6682
|
|
|
|
|
|
|
} |
|
6683
|
|
|
|
|
|
|
|
|
6684
|
|
|
|
|
|
|
_out: {} |
|
6685
|
|
|
|
|
|
|
} |
|
6686
|
|
|
|
|
|
|
|
|
6687
|
0
|
0
|
|
|
|
|
add(VBG, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas); |
|
|
|
0
|
|
|
|
|
|
|
6688
|
0
|
|
|
|
|
|
} |
|
6689
|
|
|
|
|
|
|
|
|
6690
|
|
|
|
|
|
|
static const char _VBD_VBN_actions[] = { |
|
6691
|
|
|
|
|
|
|
0, 1, 0, 1, 2, 1, 3, 1, |
|
6692
|
|
|
|
|
|
|
4, 1, 5, 1, 6, 1, 7, 1, |
|
6693
|
|
|
|
|
|
|
8, 1, 9, 1, 10, 1, 11, 1, |
|
6694
|
|
|
|
|
|
|
13, 1, 14, 1, 15, 1, 16, 1, |
|
6695
|
|
|
|
|
|
|
17, 2, 1, 16, 2, 4, 5, 2, |
|
6696
|
|
|
|
|
|
|
8, 16, 2, 9, 13, 2, 9, 14, |
|
6697
|
|
|
|
|
|
|
2, 12, 13, 2, 13, 14, 2, 15, |
|
6698
|
|
|
|
|
|
|
16, 3, 1, 3, 16, 3, 3, 15, |
|
6699
|
|
|
|
|
|
|
16 |
|
6700
|
|
|
|
|
|
|
}; |
|
6701
|
|
|
|
|
|
|
|
|
6702
|
|
|
|
|
|
|
static const short _VBD_VBN_key_offsets[] = { |
|
6703
|
|
|
|
|
|
|
0, 0, 2, 3, 9, 14, 24, 29, |
|
6704
|
|
|
|
|
|
|
34, 44, 46, 47, 48, 49, 50, 51, |
|
6705
|
|
|
|
|
|
|
52, 60, 67, 74, 76, 77, 78, 79, |
|
6706
|
|
|
|
|
|
|
80, 81, 82, 87, 95, 96, 97, 98, |
|
6707
|
|
|
|
|
|
|
99, 100, 102, 103, 104, 105, 106, 107, |
|
6708
|
|
|
|
|
|
|
108, 114, 115, 140, 140, 149, 150, 155, |
|
6709
|
|
|
|
|
|
|
166, 175, 184, 194, 199, 204, 210, 220, |
|
6710
|
|
|
|
|
|
|
220, 229, 241, 242, 253, 253, 262, 271, |
|
6711
|
|
|
|
|
|
|
280, 289, 298, 303, 316, 327, 332, 338, |
|
6712
|
|
|
|
|
|
|
348, 358, 369, 376, 387, 396, 405, 405, |
|
6713
|
|
|
|
|
|
|
416, 427, 429, 430, 431, 431, 432, 440, |
|
6714
|
|
|
|
|
|
|
451, 456, 462, 472, 482, 493, 500, 511, |
|
6715
|
|
|
|
|
|
|
518, 524, 533, 542, 551 |
|
6716
|
|
|
|
|
|
|
}; |
|
6717
|
|
|
|
|
|
|
|
|
6718
|
|
|
|
|
|
|
static const char _VBD_VBN_trans_keys[] = { |
|
6719
|
|
|
|
|
|
|
100, 110, 101, 97, 101, 105, 111, 117, |
|
6720
|
|
|
|
|
|
|
121, 97, 101, 105, 111, 117, 98, 100, |
|
6721
|
|
|
|
|
|
|
102, 104, 106, 110, 112, 116, 118, 122, |
|
6722
|
|
|
|
|
|
|
97, 101, 105, 111, 117, 97, 101, 105, |
|
6723
|
|
|
|
|
|
|
111, 117, 98, 100, 102, 104, 106, 110, |
|
6724
|
|
|
|
|
|
|
112, 116, 118, 122, 98, 114, 105, 114, |
|
6725
|
|
|
|
|
|
|
112, 105, 109, 101, 97, 101, 105, 111, |
|
6726
|
|
|
|
|
|
|
117, 121, 98, 122, 97, 101, 105, 111, |
|
6727
|
|
|
|
|
|
|
117, 98, 122, 97, 101, 105, 111, 117, |
|
6728
|
|
|
|
|
|
|
98, 122, 98, 114, 105, 114, 112, 105, |
|
6729
|
|
|
|
|
|
|
109, 101, 97, 101, 105, 111, 117, 97, |
|
6730
|
|
|
|
|
|
|
101, 105, 110, 111, 115, 117, 120, 105, |
|
6731
|
|
|
|
|
|
|
112, 105, 109, 101, 98, 114, 105, 114, |
|
6732
|
|
|
|
|
|
|
112, 105, 109, 101, 97, 101, 105, 111, |
|
6733
|
|
|
|
|
|
|
117, 121, 101, 98, 99, 100, 102, 103, |
|
6734
|
|
|
|
|
|
|
104, 105, 106, 107, 108, 109, 110, 112, |
|
6735
|
|
|
|
|
|
|
113, 114, 115, 116, 117, 118, 119, 120, |
|
6736
|
|
|
|
|
|
|
121, 122, 97, 111, 97, 98, 101, 105, |
|
6737
|
|
|
|
|
|
|
111, 117, 122, 99, 120, 113, 97, 101, |
|
6738
|
|
|
|
|
|
|
105, 111, 117, 98, 99, 100, 105, 111, |
|
6739
|
|
|
|
|
|
|
117, 122, 97, 101, 102, 120, 97, 100, |
|
6740
|
|
|
|
|
|
|
101, 105, 111, 117, 122, 98, 120, 97, |
|
6741
|
|
|
|
|
|
|
101, 102, 105, 111, 117, 122, 98, 120, |
|
6742
|
|
|
|
|
|
|
97, 101, 103, 105, 110, 111, 117, 122, |
|
6743
|
|
|
|
|
|
|
98, 120, 97, 101, 105, 111, 117, 101, |
|
6744
|
|
|
|
|
|
|
110, 111, 115, 120, 101, 110, 111, 112, |
|
6745
|
|
|
|
|
|
|
115, 120, 97, 101, 104, 105, 111, 116, |
|
6746
|
|
|
|
|
|
|
117, 122, 98, 120, 97, 101, 105, 106, |
|
6747
|
|
|
|
|
|
|
111, 117, 122, 98, 120, 98, 99, 100, |
|
6748
|
|
|
|
|
|
|
105, 107, 111, 117, 122, 97, 101, 102, |
|
6749
|
|
|
|
|
|
|
120, 105, 97, 101, 105, 108, 111, 114, |
|
6750
|
|
|
|
|
|
|
117, 119, 122, 98, 120, 97, 101, 105, |
|
6751
|
|
|
|
|
|
|
109, 111, 117, 122, 98, 120, 97, 101, |
|
6752
|
|
|
|
|
|
|
105, 110, 111, 117, 122, 98, 120, 97, |
|
6753
|
|
|
|
|
|
|
101, 105, 111, 112, 117, 122, 98, 120, |
|
6754
|
|
|
|
|
|
|
97, 101, 105, 111, 113, 117, 122, 98, |
|
6755
|
|
|
|
|
|
|
120, 97, 101, 105, 111, 114, 117, 122, |
|
6756
|
|
|
|
|
|
|
98, 120, 97, 101, 105, 111, 117, 98, |
|
6757
|
|
|
|
|
|
|
99, 100, 105, 108, 110, 111, 116, 117, |
|
6758
|
|
|
|
|
|
|
97, 101, 102, 122, 101, 110, 111, 115, |
|
6759
|
|
|
|
|
|
|
120, 98, 104, 106, 116, 118, 122, 101, |
|
6760
|
|
|
|
|
|
|
110, 111, 115, 120, 101, 110, 111, 112, |
|
6761
|
|
|
|
|
|
|
115, 120, 101, 105, 110, 111, 115, 120, |
|
6762
|
|
|
|
|
|
|
98, 116, 118, 122, 101, 105, 110, 111, |
|
6763
|
|
|
|
|
|
|
115, 120, 98, 116, 118, 122, 101, 110, |
|
6764
|
|
|
|
|
|
|
111, 115, 120, 98, 104, 106, 116, 118, |
|
6765
|
|
|
|
|
|
|
122, 98, 101, 110, 111, 114, 115, 120, |
|
6766
|
|
|
|
|
|
|
101, 110, 111, 115, 120, 98, 104, 106, |
|
6767
|
|
|
|
|
|
|
116, 118, 122, 97, 101, 105, 111, 115, |
|
6768
|
|
|
|
|
|
|
117, 122, 98, 120, 97, 101, 105, 111, |
|
6769
|
|
|
|
|
|
|
116, 117, 122, 98, 120, 122, 98, 100, |
|
6770
|
|
|
|
|
|
|
102, 104, 106, 110, 112, 116, 118, 120, |
|
6771
|
|
|
|
|
|
|
122, 98, 100, 102, 104, 106, 110, 112, |
|
6772
|
|
|
|
|
|
|
116, 118, 120, 98, 114, 112, 114, 113, |
|
6773
|
|
|
|
|
|
|
97, 101, 105, 108, 111, 117, 98, 122, |
|
6774
|
|
|
|
|
|
|
101, 110, 111, 115, 120, 98, 104, 106, |
|
6775
|
|
|
|
|
|
|
116, 118, 122, 101, 110, 111, 115, 120, |
|
6776
|
|
|
|
|
|
|
101, 110, 111, 112, 115, 120, 101, 105, |
|
6777
|
|
|
|
|
|
|
110, 111, 115, 120, 98, 116, 118, 122, |
|
6778
|
|
|
|
|
|
|
101, 105, 110, 111, 115, 120, 98, 116, |
|
6779
|
|
|
|
|
|
|
118, 122, 101, 110, 111, 115, 120, 98, |
|
6780
|
|
|
|
|
|
|
104, 106, 116, 118, 122, 98, 101, 110, |
|
6781
|
|
|
|
|
|
|
111, 114, 115, 120, 101, 110, 111, 115, |
|
6782
|
|
|
|
|
|
|
120, 98, 104, 106, 116, 118, 122, 97, |
|
6783
|
|
|
|
|
|
|
101, 105, 111, 117, 98, 122, 97, 101, |
|
6784
|
|
|
|
|
|
|
105, 111, 117, 121, 97, 101, 105, 111, |
|
6785
|
|
|
|
|
|
|
117, 118, 122, 98, 120, 97, 101, 105, |
|
6786
|
|
|
|
|
|
|
111, 117, 119, 122, 98, 120, 97, 101, |
|
6787
|
|
|
|
|
|
|
105, 111, 117, 120, 122, 98, 119, 97, |
|
6788
|
|
|
|
|
|
|
101, 105, 111, 117, 121, 122, 98, 120, |
|
6789
|
|
|
|
|
|
|
0 |
|
6790
|
|
|
|
|
|
|
}; |
|
6791
|
|
|
|
|
|
|
|
|
6792
|
|
|
|
|
|
|
static const char _VBD_VBN_single_lengths[] = { |
|
6793
|
|
|
|
|
|
|
0, 2, 1, 6, 5, 0, 5, 5, |
|
6794
|
|
|
|
|
|
|
0, 2, 1, 1, 1, 1, 1, 1, |
|
6795
|
|
|
|
|
|
|
6, 5, 5, 2, 1, 1, 1, 1, |
|
6796
|
|
|
|
|
|
|
1, 1, 5, 8, 1, 1, 1, 1, |
|
6797
|
|
|
|
|
|
|
1, 2, 1, 1, 1, 1, 1, 1, |
|
6798
|
|
|
|
|
|
|
6, 1, 23, 0, 7, 1, 5, 7, |
|
6799
|
|
|
|
|
|
|
7, 7, 8, 5, 5, 6, 8, 0, |
|
6800
|
|
|
|
|
|
|
7, 8, 1, 9, 0, 7, 7, 7, |
|
6801
|
|
|
|
|
|
|
7, 7, 5, 9, 5, 5, 6, 6, |
|
6802
|
|
|
|
|
|
|
6, 5, 7, 5, 7, 7, 0, 1, |
|
6803
|
|
|
|
|
|
|
1, 2, 1, 1, 0, 1, 6, 5, |
|
6804
|
|
|
|
|
|
|
5, 6, 6, 6, 5, 7, 5, 5, |
|
6805
|
|
|
|
|
|
|
6, 7, 7, 7, 7 |
|
6806
|
|
|
|
|
|
|
}; |
|
6807
|
|
|
|
|
|
|
|
|
6808
|
|
|
|
|
|
|
static const char _VBD_VBN_range_lengths[] = { |
|
6809
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 5, 0, 0, |
|
6810
|
|
|
|
|
|
|
5, 0, 0, 0, 0, 0, 0, 0, |
|
6811
|
|
|
|
|
|
|
1, 1, 1, 0, 0, 0, 0, 0, |
|
6812
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
6813
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
6814
|
|
|
|
|
|
|
0, 0, 1, 0, 1, 0, 0, 2, |
|
6815
|
|
|
|
|
|
|
1, 1, 1, 0, 0, 0, 1, 0, |
|
6816
|
|
|
|
|
|
|
1, 2, 0, 1, 0, 1, 1, 1, |
|
6817
|
|
|
|
|
|
|
1, 1, 0, 2, 3, 0, 0, 2, |
|
6818
|
|
|
|
|
|
|
2, 3, 0, 3, 1, 1, 0, 5, |
|
6819
|
|
|
|
|
|
|
5, 0, 0, 0, 0, 0, 1, 3, |
|
6820
|
|
|
|
|
|
|
0, 0, 2, 2, 3, 0, 3, 1, |
|
6821
|
|
|
|
|
|
|
0, 1, 1, 1, 1 |
|
6822
|
|
|
|
|
|
|
}; |
|
6823
|
|
|
|
|
|
|
|
|
6824
|
|
|
|
|
|
|
static const short _VBD_VBN_index_offsets[] = { |
|
6825
|
|
|
|
|
|
|
0, 0, 3, 5, 12, 18, 24, 30, |
|
6826
|
|
|
|
|
|
|
36, 42, 45, 47, 49, 51, 53, 55, |
|
6827
|
|
|
|
|
|
|
57, 65, 72, 79, 82, 84, 86, 88, |
|
6828
|
|
|
|
|
|
|
90, 92, 94, 100, 109, 111, 113, 115, |
|
6829
|
|
|
|
|
|
|
117, 119, 122, 124, 126, 128, 130, 132, |
|
6830
|
|
|
|
|
|
|
134, 141, 143, 168, 169, 178, 180, 186, |
|
6831
|
|
|
|
|
|
|
196, 205, 214, 224, 230, 236, 243, 253, |
|
6832
|
|
|
|
|
|
|
254, 263, 274, 276, 287, 288, 297, 306, |
|
6833
|
|
|
|
|
|
|
315, 324, 333, 339, 351, 360, 366, 373, |
|
6834
|
|
|
|
|
|
|
382, 391, 400, 408, 417, 426, 435, 436, |
|
6835
|
|
|
|
|
|
|
443, 450, 453, 455, 457, 458, 460, 468, |
|
6836
|
|
|
|
|
|
|
477, 483, 490, 499, 508, 517, 525, 534, |
|
6837
|
|
|
|
|
|
|
541, 548, 557, 566, 575 |
|
6838
|
|
|
|
|
|
|
}; |
|
6839
|
|
|
|
|
|
|
|
|
6840
|
|
|
|
|
|
|
static const unsigned char _VBD_VBN_indicies[] = { |
|
6841
|
|
|
|
|
|
|
0, 2, 1, 3, 1, 4, 4, 4, |
|
6842
|
|
|
|
|
|
|
4, 4, 4, 1, 5, 5, 5, 5, |
|
6843
|
|
|
|
|
|
|
6, 1, 7, 7, 7, 7, 7, 1, |
|
6844
|
|
|
|
|
|
|
8, 8, 8, 8, 9, 1, 5, 5, |
|
6845
|
|
|
|
|
|
|
5, 5, 10, 1, 11, 11, 11, 11, |
|
6846
|
|
|
|
|
|
|
11, 1, 11, 12, 1, 11, 1, 13, |
|
6847
|
|
|
|
|
|
|
1, 11, 1, 14, 1, 11, 1, 11, |
|
6848
|
|
|
|
|
|
|
1, 4, 4, 4, 4, 4, 16, 15, |
|
6849
|
|
|
|
|
|
|
1, 5, 5, 5, 5, 6, 17, 1, |
|
6850
|
|
|
|
|
|
|
5, 5, 5, 5, 6, 18, 1, 19, |
|
6851
|
|
|
|
|
|
|
20, 1, 19, 1, 21, 1, 19, 1, |
|
6852
|
|
|
|
|
|
|
22, 1, 19, 1, 19, 1, 23, 24, |
|
6853
|
|
|
|
|
|
|
23, 25, 26, 1, 27, 28, 27, 29, |
|
6854
|
|
|
|
|
|
|
30, 31, 27, 32, 1, 33, 1, 33, |
|
6855
|
|
|
|
|
|
|
1, 34, 1, 33, 1, 33, 1, 35, |
|
6856
|
|
|
|
|
|
|
36, 1, 35, 1, 37, 1, 35, 1, |
|
6857
|
|
|
|
|
|
|
38, 1, 35, 1, 35, 1, 39, 39, |
|
6858
|
|
|
|
|
|
|
39, 39, 39, 4, 1, 40, 1, 42, |
|
6859
|
|
|
|
|
|
|
43, 44, 45, 46, 47, 48, 49, 50, |
|
6860
|
|
|
|
|
|
|
51, 52, 53, 54, 55, 56, 57, 58, |
|
6861
|
|
|
|
|
|
|
59, 60, 61, 62, 63, 64, 41, 1, |
|
6862
|
|
|
|
|
|
|
1, 65, 66, 65, 65, 65, 65, 4, |
|
6863
|
|
|
|
|
|
|
4, 1, 67, 1, 68, 68, 68, 68, |
|
6864
|
|
|
|
|
|
|
68, 1, 70, 71, 70, 69, 69, 69, |
|
6865
|
|
|
|
|
|
|
70, 69, 70, 1, 72, 66, 72, 72, |
|
6866
|
|
|
|
|
|
|
72, 72, 4, 4, 1, 65, 65, 66, |
|
6867
|
|
|
|
|
|
|
65, 65, 65, 4, 4, 1, 69, 69, |
|
6868
|
|
|
|
|
|
|
71, 69, 73, 69, 69, 70, 70, 1, |
|
6869
|
|
|
|
|
|
|
74, 74, 74, 74, 74, 1, 75, 76, |
|
6870
|
|
|
|
|
|
|
77, 78, 79, 1, 75, 76, 77, 11, |
|
6871
|
|
|
|
|
|
|
78, 79, 1, 65, 65, 66, 65, 65, |
|
6872
|
|
|
|
|
|
|
80, 65, 4, 4, 1, 81, 65, 65, |
|
6873
|
|
|
|
|
|
|
65, 66, 65, 65, 4, 4, 1, 4, |
|
6874
|
|
|
|
|
|
|
82, 4, 65, 66, 65, 65, 4, 65, |
|
6875
|
|
|
|
|
|
|
4, 1, 7, 1, 65, 65, 65, 71, |
|
6876
|
|
|
|
|
|
|
65, 83, 65, 83, 70, 70, 1, 5, |
|
6877
|
|
|
|
|
|
|
65, 65, 65, 66, 65, 65, 4, 4, |
|
6878
|
|
|
|
|
|
|
1, 84, 84, 85, 66, 84, 84, 4, |
|
6879
|
|
|
|
|
|
|
4, 1, 84, 84, 84, 84, 66, 84, |
|
6880
|
|
|
|
|
|
|
4, 4, 1, 65, 65, 65, 65, 66, |
|
6881
|
|
|
|
|
|
|
65, 4, 4, 1, 65, 86, 65, 87, |
|
6882
|
|
|
|
|
|
|
66, 65, 4, 4, 1, 5, 5, 5, |
|
6883
|
|
|
|
|
|
|
5, 6, 1, 88, 89, 88, 5, 89, |
|
6884
|
|
|
|
|
|
|
89, 5, 89, 6, 5, 88, 1, 90, |
|
6885
|
|
|
|
|
|
|
91, 92, 93, 94, 88, 88, 88, 1, |
|
6886
|
|
|
|
|
|
|
90, 95, 92, 96, 97, 1, 90, 95, |
|
6887
|
|
|
|
|
|
|
92, 19, 96, 97, 1, 90, 19, 91, |
|
6888
|
|
|
|
|
|
|
92, 93, 94, 88, 88, 1, 90, 22, |
|
6889
|
|
|
|
|
|
|
91, 92, 93, 94, 88, 88, 1, 98, |
|
6890
|
|
|
|
|
|
|
91, 92, 93, 94, 88, 88, 88, 1, |
|
6891
|
|
|
|
|
|
|
19, 90, 95, 92, 20, 96, 97, 1, |
|
6892
|
|
|
|
|
|
|
90, 100, 92, 101, 102, 99, 99, 99, |
|
6893
|
|
|
|
|
|
|
1, 69, 69, 69, 69, 103, 69, 70, |
|
6894
|
|
|
|
|
|
|
70, 1, 104, 105, 106, 65, 66, 65, |
|
6895
|
|
|
|
|
|
|
4, 4, 1, 107, 109, 109, 109, 109, |
|
6896
|
|
|
|
|
|
|
109, 109, 108, 110, 110, 110, 110, 110, |
|
6897
|
|
|
|
|
|
|
110, 1, 33, 111, 1, 33, 1, 112, |
|
6898
|
|
|
|
|
|
|
1, 108, 113, 107, 5, 5, 5, 115, |
|
6899
|
|
|
|
|
|
|
5, 6, 114, 1, 116, 117, 118, 119, |
|
6900
|
|
|
|
|
|
|
120, 114, 114, 114, 1, 116, 121, 118, |
|
6901
|
|
|
|
|
|
|
122, 123, 1, 116, 121, 118, 35, 122, |
|
6902
|
|
|
|
|
|
|
123, 1, 116, 35, 117, 118, 119, 120, |
|
6903
|
|
|
|
|
|
|
114, 114, 1, 116, 38, 117, 118, 119, |
|
6904
|
|
|
|
|
|
|
120, 114, 114, 1, 124, 117, 118, 119, |
|
6905
|
|
|
|
|
|
|
120, 114, 114, 114, 1, 35, 116, 121, |
|
6906
|
|
|
|
|
|
|
118, 36, 122, 123, 1, 116, 126, 118, |
|
6907
|
|
|
|
|
|
|
127, 128, 125, 125, 125, 1, 5, 5, |
|
6908
|
|
|
|
|
|
|
5, 5, 6, 114, 1, 4, 4, 4, |
|
6909
|
|
|
|
|
|
|
4, 4, 4, 1, 69, 69, 69, 69, |
|
6910
|
|
|
|
|
|
|
69, 71, 70, 70, 1, 84, 84, 84, |
|
6911
|
|
|
|
|
|
|
84, 84, 66, 4, 4, 1, 84, 84, |
|
6912
|
|
|
|
|
|
|
84, 84, 84, 66, 4, 4, 1, 129, |
|
6913
|
|
|
|
|
|
|
129, 129, 129, 129, 131, 132, 130, 1, |
|
6914
|
|
|
|
|
|
|
0 |
|
6915
|
|
|
|
|
|
|
}; |
|
6916
|
|
|
|
|
|
|
|
|
6917
|
|
|
|
|
|
|
static const char _VBD_VBN_trans_targs[] = { |
|
6918
|
|
|
|
|
|
|
2, 0, 41, 42, 43, 43, 45, 43, |
|
6919
|
|
|
|
|
|
|
43, 45, 45, 52, 53, 12, 14, 43, |
|
6920
|
|
|
|
|
|
|
43, 43, 43, 69, 70, 22, 24, 78, |
|
6921
|
|
|
|
|
|
|
79, 84, 85, 43, 81, 28, 83, 30, |
|
6922
|
|
|
|
|
|
|
32, 43, 31, 88, 89, 36, 38, 66, |
|
6923
|
|
|
|
|
|
|
43, 3, 44, 47, 48, 49, 50, 54, |
|
6924
|
|
|
|
|
|
|
16, 56, 57, 59, 61, 62, 63, 64, |
|
6925
|
|
|
|
|
|
|
65, 76, 77, 96, 97, 98, 99, 40, |
|
6926
|
|
|
|
|
|
|
100, 4, 46, 43, 5, 6, 43, 46, |
|
6927
|
|
|
|
|
|
|
7, 51, 8, 9, 10, 11, 13, 15, |
|
6928
|
|
|
|
|
|
|
55, 43, 58, 60, 17, 18, 66, 67, |
|
6929
|
|
|
|
|
|
|
68, 75, 19, 71, 21, 72, 73, 20, |
|
6930
|
|
|
|
|
|
|
23, 25, 74, 68, 71, 72, 73, 46, |
|
6931
|
|
|
|
|
|
|
26, 86, 95, 43, 43, 80, 27, 82, |
|
6932
|
|
|
|
|
|
|
29, 43, 87, 94, 33, 90, 35, 91, |
|
6933
|
|
|
|
|
|
|
92, 34, 37, 39, 93, 87, 90, 91, |
|
6934
|
|
|
|
|
|
|
92, 66, 43, 43, 46 |
|
6935
|
|
|
|
|
|
|
}; |
|
6936
|
|
|
|
|
|
|
|
|
6937
|
|
|
|
|
|
|
static const char _VBD_VBN_trans_actions[] = { |
|
6938
|
|
|
|
|
|
|
0, 0, 0, 31, 29, 25, 25, 5, |
|
6939
|
|
|
|
|
|
|
51, 51, 45, 0, 0, 0, 0, 15, |
|
6940
|
|
|
|
|
|
|
39, 9, 36, 0, 0, 0, 0, 25, |
|
6941
|
|
|
|
|
|
|
25, 25, 25, 21, 21, 0, 21, 0, |
|
6942
|
|
|
|
|
|
|
0, 19, 0, 0, 0, 0, 0, 29, |
|
6943
|
|
|
|
|
|
|
1, 0, 0, 0, 0, 0, 0, 0, |
|
6944
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
6945
|
|
|
|
|
|
|
0, 0, 0, 27, 0, 0, 0, 0, |
|
6946
|
|
|
|
|
|
|
0, 0, 29, 17, 0, 0, 54, 54, |
|
6947
|
|
|
|
|
|
|
0, 54, 0, 0, 0, 0, 0, 0, |
|
6948
|
|
|
|
|
|
|
29, 27, 29, 54, 0, 0, 13, 13, |
|
6949
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
6950
|
|
|
|
|
|
|
0, 0, 0, 7, 7, 7, 7, 61, |
|
6951
|
|
|
|
|
|
|
0, 19, 19, 23, 48, 48, 0, 19, |
|
6952
|
|
|
|
|
|
|
0, 42, 0, 0, 0, 0, 0, 0, |
|
6953
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 17, 17, 17, |
|
6954
|
|
|
|
|
|
|
17, 3, 33, 3, 57 |
|
6955
|
|
|
|
|
|
|
}; |
|
6956
|
|
|
|
|
|
|
|
|
6957
|
|
|
|
|
|
|
static const char _VBD_VBN_eof_actions[] = { |
|
6958
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
6959
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
6960
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
6961
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
6962
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
6963
|
|
|
|
|
|
|
0, 0, 0, 0, 5, 0, 0, 5, |
|
6964
|
|
|
|
|
|
|
5, 5, 5, 0, 5, 5, 5, 0, |
|
6965
|
|
|
|
|
|
|
5, 5, 0, 5, 0, 5, 5, 5, |
|
6966
|
|
|
|
|
|
|
5, 5, 0, 0, 11, 11, 11, 11, |
|
6967
|
|
|
|
|
|
|
11, 11, 11, 11, 5, 5, 0, 0, |
|
6968
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 17, |
|
6969
|
|
|
|
|
|
|
17, 17, 17, 17, 17, 17, 17, 0, |
|
6970
|
|
|
|
|
|
|
0, 5, 5, 5, 5 |
|
6971
|
|
|
|
|
|
|
}; |
|
6972
|
|
|
|
|
|
|
|
|
6973
|
|
|
|
|
|
|
static const int VBD_VBN_start = 1; |
|
6974
|
|
|
|
|
|
|
|
|
6975
|
0
|
|
|
|
|
|
void english_morpho_guesser::add_VBD_VBN(const string& form, vector& lemmas) const { |
|
6976
|
|
|
|
|
|
|
const char* p = form.c_str(); int cs; |
|
6977
|
|
|
|
|
|
|
char best = 'z'; unsigned remove = 0; const char* append = nullptr; |
|
6978
|
|
|
|
|
|
|
|
|
6979
|
|
|
|
|
|
|
{ |
|
6980
|
|
|
|
|
|
|
cs = VBD_VBN_start; |
|
6981
|
|
|
|
|
|
|
} |
|
6982
|
|
|
|
|
|
|
|
|
6983
|
|
|
|
|
|
|
{ |
|
6984
|
|
|
|
|
|
|
int _klen; |
|
6985
|
|
|
|
|
|
|
unsigned int _trans; |
|
6986
|
|
|
|
|
|
|
const char *_acts; |
|
6987
|
|
|
|
|
|
|
unsigned int _nacts; |
|
6988
|
|
|
|
|
|
|
const char *_keys; |
|
6989
|
|
|
|
|
|
|
|
|
6990
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
|
6991
|
|
|
|
|
|
|
goto _test_eof; |
|
6992
|
|
|
|
|
|
|
if ( cs == 0 ) |
|
6993
|
|
|
|
|
|
|
goto _out; |
|
6994
|
|
|
|
|
|
|
_resume: |
|
6995
|
0
|
|
|
|
|
|
_keys = _VBD_VBN_trans_keys + _VBD_VBN_key_offsets[cs]; |
|
6996
|
0
|
|
|
|
|
|
_trans = _VBD_VBN_index_offsets[cs]; |
|
6997
|
|
|
|
|
|
|
|
|
6998
|
0
|
|
|
|
|
|
_klen = _VBD_VBN_single_lengths[cs]; |
|
6999
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
7000
|
|
|
|
|
|
|
const char *_lower = _keys; |
|
7001
|
|
|
|
|
|
|
const char *_mid; |
|
7002
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
|
7003
|
|
|
|
|
|
|
while (1) { |
|
7004
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
7005
|
|
|
|
|
|
|
break; |
|
7006
|
|
|
|
|
|
|
|
|
7007
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
|
7008
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid ) |
|
7009
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
|
7010
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid ) |
|
7011
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
|
7012
|
|
|
|
|
|
|
else { |
|
7013
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
|
7014
|
0
|
|
|
|
|
|
goto _match; |
|
7015
|
|
|
|
|
|
|
} |
|
7016
|
|
|
|
|
|
|
} |
|
7017
|
0
|
|
|
|
|
|
_keys += _klen; |
|
7018
|
0
|
|
|
|
|
|
_trans += _klen; |
|
7019
|
|
|
|
|
|
|
} |
|
7020
|
|
|
|
|
|
|
|
|
7021
|
0
|
|
|
|
|
|
_klen = _VBD_VBN_range_lengths[cs]; |
|
7022
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
7023
|
|
|
|
|
|
|
const char *_lower = _keys; |
|
7024
|
|
|
|
|
|
|
const char *_mid; |
|
7025
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
|
7026
|
|
|
|
|
|
|
while (1) { |
|
7027
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
7028
|
|
|
|
|
|
|
break; |
|
7029
|
|
|
|
|
|
|
|
|
7030
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
|
7031
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] ) |
|
7032
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
|
7033
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] ) |
|
7034
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
|
7035
|
|
|
|
|
|
|
else { |
|
7036
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
|
7037
|
0
|
|
|
|
|
|
goto _match; |
|
7038
|
|
|
|
|
|
|
} |
|
7039
|
|
|
|
|
|
|
} |
|
7040
|
0
|
|
|
|
|
|
_trans += _klen; |
|
7041
|
|
|
|
|
|
|
} |
|
7042
|
|
|
|
|
|
|
|
|
7043
|
|
|
|
|
|
|
_match: |
|
7044
|
0
|
|
|
|
|
|
_trans = _VBD_VBN_indicies[_trans]; |
|
7045
|
0
|
|
|
|
|
|
cs = _VBD_VBN_trans_targs[_trans]; |
|
7046
|
|
|
|
|
|
|
|
|
7047
|
0
|
0
|
|
|
|
|
if ( _VBD_VBN_trans_actions[_trans] == 0 ) |
|
7048
|
|
|
|
|
|
|
goto _again; |
|
7049
|
|
|
|
|
|
|
|
|
7050
|
0
|
|
|
|
|
|
_acts = _VBD_VBN_actions + _VBD_VBN_trans_actions[_trans]; |
|
7051
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
|
7052
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
|
7053
|
|
|
|
|
|
|
{ |
|
7054
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
|
7055
|
|
|
|
|
|
|
{ |
|
7056
|
|
|
|
|
|
|
case 0: |
|
7057
|
0
|
0
|
|
|
|
|
{ if (best > 'a') best = 'a', remove = 1, append = nullptr; } |
|
7058
|
|
|
|
|
|
|
break; |
|
7059
|
|
|
|
|
|
|
case 1: |
|
7060
|
0
|
0
|
|
|
|
|
{ if (best > 'b') best = 'b', remove = 2, append = nullptr; } |
|
7061
|
|
|
|
|
|
|
break; |
|
7062
|
|
|
|
|
|
|
case 2: |
|
7063
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 1, append = nullptr; } |
|
7064
|
|
|
|
|
|
|
break; |
|
7065
|
|
|
|
|
|
|
case 3: |
|
7066
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 2, append = nullptr; } |
|
7067
|
|
|
|
|
|
|
break; |
|
7068
|
|
|
|
|
|
|
case 4: |
|
7069
|
0
|
0
|
|
|
|
|
{ if (best > 'e') best = 'e', remove = 1, append = nullptr; } |
|
7070
|
|
|
|
|
|
|
break; |
|
7071
|
|
|
|
|
|
|
case 5: |
|
7072
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 2, append = nullptr; } |
|
7073
|
|
|
|
|
|
|
break; |
|
7074
|
|
|
|
|
|
|
case 7: |
|
7075
|
0
|
0
|
|
|
|
|
{ if (best > 'h') best = 'h', remove = 2, append = nullptr; } |
|
7076
|
|
|
|
|
|
|
break; |
|
7077
|
|
|
|
|
|
|
case 8: |
|
7078
|
0
|
0
|
|
|
|
|
{ if (best > 'i') best = 'i', remove = 3, append = "y"; } |
|
7079
|
|
|
|
|
|
|
break; |
|
7080
|
|
|
|
|
|
|
case 9: |
|
7081
|
0
|
0
|
|
|
|
|
{ if (best > 'j') best = 'j', remove = 1, append = nullptr; } |
|
7082
|
|
|
|
|
|
|
break; |
|
7083
|
|
|
|
|
|
|
case 10: |
|
7084
|
0
|
0
|
|
|
|
|
{ if (best > 'k') best = 'k', remove = 2, append = nullptr; } |
|
7085
|
|
|
|
|
|
|
break; |
|
7086
|
|
|
|
|
|
|
case 11: |
|
7087
|
0
|
0
|
|
|
|
|
{ if (best > 'l') best = 'l', remove = 1, append = nullptr; } |
|
7088
|
|
|
|
|
|
|
break; |
|
7089
|
|
|
|
|
|
|
case 12: |
|
7090
|
0
|
0
|
|
|
|
|
{ if (best > 'm') best = 'm', remove = 2, append = nullptr; } |
|
7091
|
|
|
|
|
|
|
break; |
|
7092
|
|
|
|
|
|
|
case 13: |
|
7093
|
0
|
0
|
|
|
|
|
{ if (best > 'n') best = 'n', remove = 1, append = nullptr; } |
|
7094
|
|
|
|
|
|
|
break; |
|
7095
|
|
|
|
|
|
|
case 14: |
|
7096
|
0
|
0
|
|
|
|
|
{ if (best > 'o') best = 'o', remove = 2, append = nullptr; } |
|
7097
|
|
|
|
|
|
|
break; |
|
7098
|
|
|
|
|
|
|
case 15: |
|
7099
|
0
|
0
|
|
|
|
|
{ if (best > 'p') best = 'p', remove = 1, append = nullptr; } |
|
7100
|
|
|
|
|
|
|
break; |
|
7101
|
|
|
|
|
|
|
case 16: |
|
7102
|
0
|
0
|
|
|
|
|
{ if (best > 'q') best = 'q', remove = 2, append = nullptr; } |
|
7103
|
|
|
|
|
|
|
break; |
|
7104
|
|
|
|
|
|
|
case 17: |
|
7105
|
0
|
0
|
|
|
|
|
{ if (best > 'r') best = 'r', remove = 1, append = nullptr; } |
|
7106
|
|
|
|
|
|
|
break; |
|
7107
|
|
|
|
|
|
|
} |
|
7108
|
|
|
|
|
|
|
} |
|
7109
|
|
|
|
|
|
|
|
|
7110
|
|
|
|
|
|
|
_again: |
|
7111
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
|
7112
|
|
|
|
|
|
|
goto _out; |
|
7113
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form.c_str() + form.size())) ) |
|
7114
|
|
|
|
|
|
|
goto _resume; |
|
7115
|
|
|
|
|
|
|
_test_eof: {} |
|
7116
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
|
7117
|
|
|
|
|
|
|
{ |
|
7118
|
0
|
|
|
|
|
|
const char *__acts = _VBD_VBN_actions + _VBD_VBN_eof_actions[cs]; |
|
7119
|
0
|
|
|
|
|
|
unsigned int __nacts = (unsigned int) *__acts++; |
|
7120
|
0
|
0
|
|
|
|
|
while ( __nacts-- > 0 ) { |
|
7121
|
0
|
|
|
|
|
|
switch ( *__acts++ ) { |
|
7122
|
|
|
|
|
|
|
case 3: |
|
7123
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 2, append = nullptr; } |
|
7124
|
|
|
|
|
|
|
break; |
|
7125
|
|
|
|
|
|
|
case 6: |
|
7126
|
0
|
0
|
|
|
|
|
{ if (best > 'g') best = 'g', remove = 1, append = nullptr; } |
|
7127
|
|
|
|
|
|
|
break; |
|
7128
|
|
|
|
|
|
|
case 9: |
|
7129
|
0
|
0
|
|
|
|
|
{ if (best > 'j') best = 'j', remove = 1, append = nullptr; } |
|
7130
|
|
|
|
|
|
|
break; |
|
7131
|
|
|
|
|
|
|
} |
|
7132
|
|
|
|
|
|
|
} |
|
7133
|
|
|
|
|
|
|
} |
|
7134
|
|
|
|
|
|
|
|
|
7135
|
|
|
|
|
|
|
_out: {} |
|
7136
|
|
|
|
|
|
|
} |
|
7137
|
|
|
|
|
|
|
|
|
7138
|
0
|
0
|
|
|
|
|
add(VBD, VBN, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas); |
|
|
|
0
|
|
|
|
|
|
|
7139
|
0
|
|
|
|
|
|
} |
|
7140
|
|
|
|
|
|
|
|
|
7141
|
|
|
|
|
|
|
static const char _VBZ_actions[] = { |
|
7142
|
|
|
|
|
|
|
0, 1, 0, 1, 1, 1, 2, 1, |
|
7143
|
|
|
|
|
|
|
3, 1, 4, 1, 5, 1, 6, 1, |
|
7144
|
|
|
|
|
|
|
7, 1, 8 |
|
7145
|
|
|
|
|
|
|
}; |
|
7146
|
|
|
|
|
|
|
|
|
7147
|
|
|
|
|
|
|
static const char _VBZ_key_offsets[] = { |
|
7148
|
|
|
|
|
|
|
0, 0, 1, 2, 4, 14, 14, 25, |
|
7149
|
|
|
|
|
|
|
26, 31, 31, 31, 31, 37, 45, 54 |
|
7150
|
|
|
|
|
|
|
}; |
|
7151
|
|
|
|
|
|
|
|
|
7152
|
|
|
|
|
|
|
static const char _VBZ_trans_keys[] = { |
|
7153
|
|
|
|
|
|
|
115, 101, 99, 115, 98, 100, 102, 104, |
|
7154
|
|
|
|
|
|
|
106, 110, 112, 116, 118, 122, 122, 98, |
|
7155
|
|
|
|
|
|
|
100, 102, 104, 106, 110, 112, 116, 118, |
|
7156
|
|
|
|
|
|
|
120, 111, 97, 101, 105, 111, 117, 104, |
|
7157
|
|
|
|
|
|
|
105, 111, 115, 120, 122, 97, 101, 105, |
|
7158
|
|
|
|
|
|
|
110, 111, 114, 115, 117, 97, 101, 105, |
|
7159
|
|
|
|
|
|
|
111, 117, 121, 122, 98, 120, 0 |
|
7160
|
|
|
|
|
|
|
}; |
|
7161
|
|
|
|
|
|
|
|
|
7162
|
|
|
|
|
|
|
static const char _VBZ_single_lengths[] = { |
|
7163
|
|
|
|
|
|
|
0, 1, 1, 2, 0, 0, 1, 1, |
|
7164
|
|
|
|
|
|
|
5, 0, 0, 0, 6, 8, 7, 0 |
|
7165
|
|
|
|
|
|
|
}; |
|
7166
|
|
|
|
|
|
|
|
|
7167
|
|
|
|
|
|
|
static const char _VBZ_range_lengths[] = { |
|
7168
|
|
|
|
|
|
|
0, 0, 0, 0, 5, 0, 5, 0, |
|
7169
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 1, 0 |
|
7170
|
|
|
|
|
|
|
}; |
|
7171
|
|
|
|
|
|
|
|
|
7172
|
|
|
|
|
|
|
static const char _VBZ_index_offsets[] = { |
|
7173
|
|
|
|
|
|
|
0, 0, 2, 4, 7, 13, 14, 21, |
|
7174
|
|
|
|
|
|
|
23, 29, 30, 31, 32, 39, 48, 57 |
|
7175
|
|
|
|
|
|
|
}; |
|
7176
|
|
|
|
|
|
|
|
|
7177
|
|
|
|
|
|
|
static const char _VBZ_indicies[] = { |
|
7178
|
|
|
|
|
|
|
0, 1, 3, 2, 4, 4, 1, 5, |
|
7179
|
|
|
|
|
|
|
5, 5, 5, 5, 1, 6, 7, 7, |
|
7180
|
|
|
|
|
|
|
7, 7, 7, 7, 1, 8, 1, 9, |
|
7181
|
|
|
|
|
|
|
9, 9, 9, 9, 1, 8, 10, 1, |
|
7182
|
|
|
|
|
|
|
11, 12, 13, 14, 4, 15, 1, 16, |
|
7183
|
|
|
|
|
|
|
16, 16, 17, 16, 18, 19, 16, 1, |
|
7184
|
|
|
|
|
|
|
20, 20, 20, 20, 20, 20, 22, 21, |
|
7185
|
|
|
|
|
|
|
1, 10, 0 |
|
7186
|
|
|
|
|
|
|
}; |
|
7187
|
|
|
|
|
|
|
|
|
7188
|
|
|
|
|
|
|
static const char _VBZ_trans_targs[] = { |
|
7189
|
|
|
|
|
|
|
2, 0, 11, 12, 11, 5, 11, 11, |
|
7190
|
|
|
|
|
|
|
11, 9, 11, 3, 4, 6, 13, 14, |
|
7191
|
|
|
|
|
|
|
11, 7, 8, 11, 11, 10, 15 |
|
7192
|
|
|
|
|
|
|
}; |
|
7193
|
|
|
|
|
|
|
|
|
7194
|
|
|
|
|
|
|
static const char _VBZ_trans_actions[] = { |
|
7195
|
|
|
|
|
|
|
0, 0, 17, 17, 11, 0, 13, 15, |
|
7196
|
|
|
|
|
|
|
9, 0, 3, 0, 0, 0, 11, 11, |
|
7197
|
|
|
|
|
|
|
1, 0, 0, 7, 5, 0, 7 |
|
7198
|
|
|
|
|
|
|
}; |
|
7199
|
|
|
|
|
|
|
|
|
7200
|
|
|
|
|
|
|
static const int VBZ_start = 1; |
|
7201
|
|
|
|
|
|
|
|
|
7202
|
0
|
|
|
|
|
|
void english_morpho_guesser::add_VBZ(const string& form, vector& lemmas) const { |
|
7203
|
|
|
|
|
|
|
const char* p = form.c_str(); int cs; |
|
7204
|
|
|
|
|
|
|
char best = 'z'; unsigned remove = 0; const char* append = nullptr; |
|
7205
|
|
|
|
|
|
|
|
|
7206
|
|
|
|
|
|
|
{ |
|
7207
|
|
|
|
|
|
|
cs = VBZ_start; |
|
7208
|
|
|
|
|
|
|
} |
|
7209
|
|
|
|
|
|
|
|
|
7210
|
|
|
|
|
|
|
{ |
|
7211
|
|
|
|
|
|
|
int _klen; |
|
7212
|
|
|
|
|
|
|
unsigned int _trans; |
|
7213
|
|
|
|
|
|
|
const char *_acts; |
|
7214
|
|
|
|
|
|
|
unsigned int _nacts; |
|
7215
|
|
|
|
|
|
|
const char *_keys; |
|
7216
|
|
|
|
|
|
|
|
|
7217
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
|
7218
|
|
|
|
|
|
|
goto _test_eof; |
|
7219
|
|
|
|
|
|
|
if ( cs == 0 ) |
|
7220
|
|
|
|
|
|
|
goto _out; |
|
7221
|
|
|
|
|
|
|
_resume: |
|
7222
|
0
|
|
|
|
|
|
_keys = _VBZ_trans_keys + _VBZ_key_offsets[cs]; |
|
7223
|
0
|
|
|
|
|
|
_trans = _VBZ_index_offsets[cs]; |
|
7224
|
|
|
|
|
|
|
|
|
7225
|
0
|
|
|
|
|
|
_klen = _VBZ_single_lengths[cs]; |
|
7226
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
7227
|
|
|
|
|
|
|
const char *_lower = _keys; |
|
7228
|
|
|
|
|
|
|
const char *_mid; |
|
7229
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
|
7230
|
|
|
|
|
|
|
while (1) { |
|
7231
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
7232
|
|
|
|
|
|
|
break; |
|
7233
|
|
|
|
|
|
|
|
|
7234
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
|
7235
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid ) |
|
7236
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
|
7237
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid ) |
|
7238
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
|
7239
|
|
|
|
|
|
|
else { |
|
7240
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
|
7241
|
0
|
|
|
|
|
|
goto _match; |
|
7242
|
|
|
|
|
|
|
} |
|
7243
|
|
|
|
|
|
|
} |
|
7244
|
0
|
|
|
|
|
|
_keys += _klen; |
|
7245
|
0
|
|
|
|
|
|
_trans += _klen; |
|
7246
|
|
|
|
|
|
|
} |
|
7247
|
|
|
|
|
|
|
|
|
7248
|
0
|
|
|
|
|
|
_klen = _VBZ_range_lengths[cs]; |
|
7249
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
7250
|
|
|
|
|
|
|
const char *_lower = _keys; |
|
7251
|
|
|
|
|
|
|
const char *_mid; |
|
7252
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
|
7253
|
|
|
|
|
|
|
while (1) { |
|
7254
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
7255
|
|
|
|
|
|
|
break; |
|
7256
|
|
|
|
|
|
|
|
|
7257
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
|
7258
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] ) |
|
7259
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
|
7260
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] ) |
|
7261
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
|
7262
|
|
|
|
|
|
|
else { |
|
7263
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
|
7264
|
0
|
|
|
|
|
|
goto _match; |
|
7265
|
|
|
|
|
|
|
} |
|
7266
|
|
|
|
|
|
|
} |
|
7267
|
0
|
|
|
|
|
|
_trans += _klen; |
|
7268
|
|
|
|
|
|
|
} |
|
7269
|
|
|
|
|
|
|
|
|
7270
|
|
|
|
|
|
|
_match: |
|
7271
|
0
|
|
|
|
|
|
_trans = _VBZ_indicies[_trans]; |
|
7272
|
0
|
|
|
|
|
|
cs = _VBZ_trans_targs[_trans]; |
|
7273
|
|
|
|
|
|
|
|
|
7274
|
0
|
0
|
|
|
|
|
if ( _VBZ_trans_actions[_trans] == 0 ) |
|
7275
|
|
|
|
|
|
|
goto _again; |
|
7276
|
|
|
|
|
|
|
|
|
7277
|
0
|
|
|
|
|
|
_acts = _VBZ_actions + _VBZ_trans_actions[_trans]; |
|
7278
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
|
7279
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
|
7280
|
|
|
|
|
|
|
{ |
|
7281
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
|
7282
|
|
|
|
|
|
|
{ |
|
7283
|
|
|
|
|
|
|
case 0: |
|
7284
|
0
|
0
|
|
|
|
|
{ if (best > 'a') best = 'a', remove = 1, append = nullptr; } |
|
7285
|
|
|
|
|
|
|
break; |
|
7286
|
|
|
|
|
|
|
case 1: |
|
7287
|
0
|
0
|
|
|
|
|
{ if (best > 'b') best = 'b', remove = 2, append = nullptr; } |
|
7288
|
|
|
|
|
|
|
break; |
|
7289
|
|
|
|
|
|
|
case 2: |
|
7290
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 1, append = nullptr; } |
|
7291
|
|
|
|
|
|
|
break; |
|
7292
|
|
|
|
|
|
|
case 3: |
|
7293
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 2, append = nullptr; } |
|
7294
|
|
|
|
|
|
|
break; |
|
7295
|
|
|
|
|
|
|
case 4: |
|
7296
|
0
|
0
|
|
|
|
|
{ if (best > 'e') best = 'e', remove = 1, append = nullptr; } |
|
7297
|
|
|
|
|
|
|
break; |
|
7298
|
|
|
|
|
|
|
case 5: |
|
7299
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 2, append = nullptr; } |
|
7300
|
|
|
|
|
|
|
break; |
|
7301
|
|
|
|
|
|
|
case 6: |
|
7302
|
0
|
0
|
|
|
|
|
{ if (best > 'g') best = 'g', remove = 3, append = "y"; } |
|
7303
|
|
|
|
|
|
|
break; |
|
7304
|
|
|
|
|
|
|
case 7: |
|
7305
|
0
|
0
|
|
|
|
|
{ if (best > 'h') best = 'h', remove = 2, append = nullptr; } |
|
7306
|
|
|
|
|
|
|
break; |
|
7307
|
|
|
|
|
|
|
case 8: |
|
7308
|
0
|
0
|
|
|
|
|
{ if (best > 'i') best = 'i', remove = 1, append = nullptr; } |
|
7309
|
|
|
|
|
|
|
break; |
|
7310
|
|
|
|
|
|
|
} |
|
7311
|
|
|
|
|
|
|
} |
|
7312
|
|
|
|
|
|
|
|
|
7313
|
|
|
|
|
|
|
_again: |
|
7314
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
|
7315
|
|
|
|
|
|
|
goto _out; |
|
7316
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form.c_str() + form.size())) ) |
|
7317
|
|
|
|
|
|
|
goto _resume; |
|
7318
|
|
|
|
|
|
|
_test_eof: {} |
|
7319
|
|
|
|
|
|
|
_out: {} |
|
7320
|
|
|
|
|
|
|
} |
|
7321
|
|
|
|
|
|
|
|
|
7322
|
0
|
0
|
|
|
|
|
add(VBZ, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas); |
|
|
|
0
|
|
|
|
|
|
|
7323
|
0
|
|
|
|
|
|
} |
|
7324
|
|
|
|
|
|
|
|
|
7325
|
|
|
|
|
|
|
static const char _JJR_RBR_actions[] = { |
|
7326
|
|
|
|
|
|
|
0, 1, 0, 1, 1, 1, 3, 1, |
|
7327
|
|
|
|
|
|
|
4, 1, 5, 2, 1, 4, 2, 2, |
|
7328
|
|
|
|
|
|
|
5, 2, 4, 5 |
|
7329
|
|
|
|
|
|
|
}; |
|
7330
|
|
|
|
|
|
|
|
|
7331
|
|
|
|
|
|
|
static const unsigned char _JJR_RBR_key_offsets[] = { |
|
7332
|
|
|
|
|
|
|
0, 0, 1, 2, 26, 26, 32, 37, |
|
7333
|
|
|
|
|
|
|
50, 56, 62, 73, 79, 85, 91, 102, |
|
7334
|
|
|
|
|
|
|
103, 109, 115, 117, 123, 129, 135, 146, |
|
7335
|
|
|
|
|
|
|
152, 163, 169, 175, 181 |
|
7336
|
|
|
|
|
|
|
}; |
|
7337
|
|
|
|
|
|
|
|
|
7338
|
|
|
|
|
|
|
static const char _JJR_RBR_trans_keys[] = { |
|
7339
|
|
|
|
|
|
|
114, 101, 98, 99, 100, 101, 102, 103, |
|
7340
|
|
|
|
|
|
|
104, 105, 106, 107, 108, 109, 110, 112, |
|
7341
|
|
|
|
|
|
|
113, 114, 115, 116, 117, 118, 119, 120, |
|
7342
|
|
|
|
|
|
|
121, 122, 97, 98, 101, 105, 111, 117, |
|
7343
|
|
|
|
|
|
|
97, 101, 105, 111, 117, 98, 99, 100, |
|
7344
|
|
|
|
|
|
|
105, 111, 117, 122, 97, 101, 102, 109, |
|
7345
|
|
|
|
|
|
|
112, 120, 97, 100, 101, 105, 111, 117, |
|
7346
|
|
|
|
|
|
|
97, 101, 102, 105, 111, 117, 97, 101, |
|
7347
|
|
|
|
|
|
|
103, 105, 111, 117, 122, 98, 109, 112, |
|
7348
|
|
|
|
|
|
|
120, 97, 101, 104, 105, 111, 117, 97, |
|
7349
|
|
|
|
|
|
|
101, 105, 106, 111, 117, 97, 101, 105, |
|
7350
|
|
|
|
|
|
|
107, 111, 117, 97, 101, 105, 108, 111, |
|
7351
|
|
|
|
|
|
|
117, 122, 98, 109, 112, 120, 101, 97, |
|
7352
|
|
|
|
|
|
|
101, 105, 109, 111, 117, 97, 101, 105, |
|
7353
|
|
|
|
|
|
|
110, 111, 117, 97, 122, 97, 101, 105, |
|
7354
|
|
|
|
|
|
|
111, 112, 117, 97, 101, 105, 111, 113, |
|
7355
|
|
|
|
|
|
|
117, 97, 101, 105, 111, 114, 117, 97, |
|
7356
|
|
|
|
|
|
|
101, 105, 111, 115, 117, 122, 98, 109, |
|
7357
|
|
|
|
|
|
|
112, 120, 97, 101, 105, 111, 116, 117, |
|
7358
|
|
|
|
|
|
|
97, 101, 105, 111, 117, 118, 122, 98, |
|
7359
|
|
|
|
|
|
|
109, 112, 120, 97, 101, 105, 111, 117, |
|
7360
|
|
|
|
|
|
|
119, 97, 101, 105, 111, 117, 120, 97, |
|
7361
|
|
|
|
|
|
|
101, 105, 111, 117, 121, 97, 101, 105, |
|
7362
|
|
|
|
|
|
|
111, 117, 122, 0 |
|
7363
|
|
|
|
|
|
|
}; |
|
7364
|
|
|
|
|
|
|
|
|
7365
|
|
|
|
|
|
|
static const char _JJR_RBR_single_lengths[] = { |
|
7366
|
|
|
|
|
|
|
0, 1, 1, 24, 0, 6, 5, 7, |
|
7367
|
|
|
|
|
|
|
6, 6, 7, 6, 6, 6, 7, 1, |
|
7368
|
|
|
|
|
|
|
6, 6, 0, 6, 6, 6, 7, 6, |
|
7369
|
|
|
|
|
|
|
7, 6, 6, 6, 6 |
|
7370
|
|
|
|
|
|
|
}; |
|
7371
|
|
|
|
|
|
|
|
|
7372
|
|
|
|
|
|
|
static const char _JJR_RBR_range_lengths[] = { |
|
7373
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 3, |
|
7374
|
|
|
|
|
|
|
0, 0, 2, 0, 0, 0, 2, 0, |
|
7375
|
|
|
|
|
|
|
0, 0, 1, 0, 0, 0, 2, 0, |
|
7376
|
|
|
|
|
|
|
2, 0, 0, 0, 0 |
|
7377
|
|
|
|
|
|
|
}; |
|
7378
|
|
|
|
|
|
|
|
|
7379
|
|
|
|
|
|
|
static const unsigned char _JJR_RBR_index_offsets[] = { |
|
7380
|
|
|
|
|
|
|
0, 0, 2, 4, 29, 30, 37, 43, |
|
7381
|
|
|
|
|
|
|
54, 61, 68, 78, 85, 92, 99, 109, |
|
7382
|
|
|
|
|
|
|
111, 118, 125, 127, 134, 141, 148, 158, |
|
7383
|
|
|
|
|
|
|
165, 175, 182, 189, 196 |
|
7384
|
|
|
|
|
|
|
}; |
|
7385
|
|
|
|
|
|
|
|
|
7386
|
|
|
|
|
|
|
static const char _JJR_RBR_indicies[] = { |
|
7387
|
|
|
|
|
|
|
0, 1, 2, 1, 4, 5, 6, 7, |
|
7388
|
|
|
|
|
|
|
8, 9, 10, 11, 12, 13, 14, 15, |
|
7389
|
|
|
|
|
|
|
16, 17, 18, 19, 20, 21, 7, 22, |
|
7390
|
|
|
|
|
|
|
23, 24, 25, 26, 3, 1, 27, 28, |
|
7391
|
|
|
|
|
|
|
27, 27, 27, 27, 1, 29, 29, 29, |
|
7392
|
|
|
|
|
|
|
29, 29, 1, 30, 31, 30, 27, 27, |
|
7393
|
|
|
|
|
|
|
27, 30, 27, 30, 30, 1, 27, 28, |
|
7394
|
|
|
|
|
|
|
27, 27, 27, 27, 1, 27, 27, 28, |
|
7395
|
|
|
|
|
|
|
27, 27, 27, 1, 27, 27, 31, 27, |
|
7396
|
|
|
|
|
|
|
27, 27, 30, 30, 30, 1, 27, 27, |
|
7397
|
|
|
|
|
|
|
28, 27, 27, 27, 1, 27, 27, 27, |
|
7398
|
|
|
|
|
|
|
28, 27, 27, 1, 27, 27, 27, 28, |
|
7399
|
|
|
|
|
|
|
27, 27, 1, 27, 27, 27, 32, 27, |
|
7400
|
|
|
|
|
|
|
27, 30, 30, 30, 1, 1, 33, 27, |
|
7401
|
|
|
|
|
|
|
27, 27, 28, 27, 27, 1, 34, 34, |
|
7402
|
|
|
|
|
|
|
34, 28, 34, 34, 1, 29, 1, 34, |
|
7403
|
|
|
|
|
|
|
34, 34, 34, 28, 34, 1, 27, 27, |
|
7404
|
|
|
|
|
|
|
27, 27, 28, 27, 1, 27, 27, 27, |
|
7405
|
|
|
|
|
|
|
27, 28, 27, 1, 27, 27, 27, 27, |
|
7406
|
|
|
|
|
|
|
31, 27, 30, 30, 30, 1, 27, 27, |
|
7407
|
|
|
|
|
|
|
27, 27, 28, 27, 1, 27, 27, 27, |
|
7408
|
|
|
|
|
|
|
27, 27, 31, 30, 30, 30, 1, 34, |
|
7409
|
|
|
|
|
|
|
34, 34, 34, 34, 28, 1, 34, 34, |
|
7410
|
|
|
|
|
|
|
34, 34, 34, 28, 1, 27, 27, 27, |
|
7411
|
|
|
|
|
|
|
27, 27, 28, 1, 27, 27, 27, 27, |
|
7412
|
|
|
|
|
|
|
27, 28, 1, 0 |
|
7413
|
|
|
|
|
|
|
}; |
|
7414
|
|
|
|
|
|
|
|
|
7415
|
|
|
|
|
|
|
static const char _JJR_RBR_trans_targs[] = { |
|
7416
|
|
|
|
|
|
|
2, 0, 3, 4, 5, 7, 8, 4, |
|
7417
|
|
|
|
|
|
|
9, 10, 11, 4, 12, 13, 14, 16, |
|
7418
|
|
|
|
|
|
|
17, 19, 20, 21, 22, 23, 24, 25, |
|
7419
|
|
|
|
|
|
|
26, 27, 28, 6, 4, 4, 4, 4, |
|
7420
|
|
|
|
|
|
|
15, 4, 18 |
|
7421
|
|
|
|
|
|
|
}; |
|
7422
|
|
|
|
|
|
|
|
|
7423
|
|
|
|
|
|
|
static const char _JJR_RBR_trans_actions[] = { |
|
7424
|
|
|
|
|
|
|
0, 0, 0, 9, 9, 9, 9, 17, |
|
7425
|
|
|
|
|
|
|
9, 9, 9, 14, 9, 9, 9, 9, |
|
7426
|
|
|
|
|
|
|
9, 9, 9, 9, 9, 9, 9, 9, |
|
7427
|
|
|
|
|
|
|
9, 9, 9, 7, 3, 5, 7, 11, |
|
7428
|
|
|
|
|
|
|
11, 1, 7 |
|
7429
|
|
|
|
|
|
|
}; |
|
7430
|
|
|
|
|
|
|
|
|
7431
|
|
|
|
|
|
|
static const int JJR_RBR_start = 1; |
|
7432
|
|
|
|
|
|
|
|
|
7433
|
0
|
|
|
|
|
|
void english_morpho_guesser::add_JJR_RBR(const string& form, unsigned negation_len, vector& lemmas) const { |
|
7434
|
0
|
|
|
|
|
|
const char* p = form.c_str() + negation_len; int cs; |
|
7435
|
|
|
|
|
|
|
char best = 'z'; unsigned remove = 0; const char* append = nullptr; |
|
7436
|
|
|
|
|
|
|
|
|
7437
|
|
|
|
|
|
|
{ |
|
7438
|
|
|
|
|
|
|
cs = JJR_RBR_start; |
|
7439
|
|
|
|
|
|
|
} |
|
7440
|
|
|
|
|
|
|
|
|
7441
|
|
|
|
|
|
|
{ |
|
7442
|
|
|
|
|
|
|
int _klen; |
|
7443
|
|
|
|
|
|
|
unsigned int _trans; |
|
7444
|
|
|
|
|
|
|
const char *_acts; |
|
7445
|
|
|
|
|
|
|
unsigned int _nacts; |
|
7446
|
|
|
|
|
|
|
const char *_keys; |
|
7447
|
|
|
|
|
|
|
|
|
7448
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
|
7449
|
|
|
|
|
|
|
goto _test_eof; |
|
7450
|
|
|
|
|
|
|
if ( cs == 0 ) |
|
7451
|
|
|
|
|
|
|
goto _out; |
|
7452
|
|
|
|
|
|
|
_resume: |
|
7453
|
0
|
|
|
|
|
|
_keys = _JJR_RBR_trans_keys + _JJR_RBR_key_offsets[cs]; |
|
7454
|
0
|
|
|
|
|
|
_trans = _JJR_RBR_index_offsets[cs]; |
|
7455
|
|
|
|
|
|
|
|
|
7456
|
0
|
|
|
|
|
|
_klen = _JJR_RBR_single_lengths[cs]; |
|
7457
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
7458
|
|
|
|
|
|
|
const char *_lower = _keys; |
|
7459
|
|
|
|
|
|
|
const char *_mid; |
|
7460
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
|
7461
|
|
|
|
|
|
|
while (1) { |
|
7462
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
7463
|
|
|
|
|
|
|
break; |
|
7464
|
|
|
|
|
|
|
|
|
7465
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
|
7466
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < *_mid ) |
|
7467
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
|
7468
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > *_mid ) |
|
7469
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
|
7470
|
|
|
|
|
|
|
else { |
|
7471
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
|
7472
|
0
|
|
|
|
|
|
goto _match; |
|
7473
|
|
|
|
|
|
|
} |
|
7474
|
|
|
|
|
|
|
} |
|
7475
|
0
|
|
|
|
|
|
_keys += _klen; |
|
7476
|
0
|
|
|
|
|
|
_trans += _klen; |
|
7477
|
|
|
|
|
|
|
} |
|
7478
|
|
|
|
|
|
|
|
|
7479
|
0
|
|
|
|
|
|
_klen = _JJR_RBR_range_lengths[cs]; |
|
7480
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
7481
|
|
|
|
|
|
|
const char *_lower = _keys; |
|
7482
|
|
|
|
|
|
|
const char *_mid; |
|
7483
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
|
7484
|
|
|
|
|
|
|
while (1) { |
|
7485
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
7486
|
|
|
|
|
|
|
break; |
|
7487
|
|
|
|
|
|
|
|
|
7488
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
|
7489
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < _mid[0] ) |
|
7490
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
|
7491
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > _mid[1] ) |
|
7492
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
|
7493
|
|
|
|
|
|
|
else { |
|
7494
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
|
7495
|
0
|
|
|
|
|
|
goto _match; |
|
7496
|
|
|
|
|
|
|
} |
|
7497
|
|
|
|
|
|
|
} |
|
7498
|
0
|
|
|
|
|
|
_trans += _klen; |
|
7499
|
|
|
|
|
|
|
} |
|
7500
|
|
|
|
|
|
|
|
|
7501
|
|
|
|
|
|
|
_match: |
|
7502
|
0
|
|
|
|
|
|
_trans = _JJR_RBR_indicies[_trans]; |
|
7503
|
0
|
|
|
|
|
|
cs = _JJR_RBR_trans_targs[_trans]; |
|
7504
|
|
|
|
|
|
|
|
|
7505
|
0
|
0
|
|
|
|
|
if ( _JJR_RBR_trans_actions[_trans] == 0 ) |
|
7506
|
|
|
|
|
|
|
goto _again; |
|
7507
|
|
|
|
|
|
|
|
|
7508
|
0
|
|
|
|
|
|
_acts = _JJR_RBR_actions + _JJR_RBR_trans_actions[_trans]; |
|
7509
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
|
7510
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
|
7511
|
|
|
|
|
|
|
{ |
|
7512
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
|
7513
|
|
|
|
|
|
|
{ |
|
7514
|
|
|
|
|
|
|
case 0: |
|
7515
|
0
|
0
|
|
|
|
|
{ if (best > 'a') best = 'a', remove = 2, append = nullptr; } |
|
7516
|
|
|
|
|
|
|
break; |
|
7517
|
|
|
|
|
|
|
case 1: |
|
7518
|
0
|
0
|
|
|
|
|
{ if (best > 'b') best = 'b', remove = 3, append = nullptr; } |
|
7519
|
|
|
|
|
|
|
break; |
|
7520
|
|
|
|
|
|
|
case 2: |
|
7521
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 3, append = "y"; } |
|
7522
|
|
|
|
|
|
|
break; |
|
7523
|
|
|
|
|
|
|
case 3: |
|
7524
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 2, append = nullptr; } |
|
7525
|
|
|
|
|
|
|
break; |
|
7526
|
|
|
|
|
|
|
case 4: |
|
7527
|
0
|
0
|
|
|
|
|
{ if (best > 'e') best = 'e', remove = 1, append = nullptr; } |
|
7528
|
|
|
|
|
|
|
break; |
|
7529
|
|
|
|
|
|
|
case 5: |
|
7530
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 2, append = nullptr; } |
|
7531
|
|
|
|
|
|
|
break; |
|
7532
|
|
|
|
|
|
|
} |
|
7533
|
|
|
|
|
|
|
} |
|
7534
|
|
|
|
|
|
|
|
|
7535
|
|
|
|
|
|
|
_again: |
|
7536
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
|
7537
|
|
|
|
|
|
|
goto _out; |
|
7538
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form.c_str() + form.size())) ) |
|
7539
|
|
|
|
|
|
|
goto _resume; |
|
7540
|
|
|
|
|
|
|
_test_eof: {} |
|
7541
|
|
|
|
|
|
|
_out: {} |
|
7542
|
|
|
|
|
|
|
} |
|
7543
|
|
|
|
|
|
|
|
|
7544
|
0
|
0
|
|
|
|
|
add(JJR, RBR, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
7545
|
0
|
|
|
|
|
|
} |
|
7546
|
|
|
|
|
|
|
|
|
7547
|
|
|
|
|
|
|
static const char _JJS_RBS_actions[] = { |
|
7548
|
|
|
|
|
|
|
0, 1, 1, 1, 2, 1, 4, 1, |
|
7549
|
|
|
|
|
|
|
5, 2, 0, 5, 2, 1, 4, 2, |
|
7550
|
|
|
|
|
|
|
3, 5 |
|
7551
|
|
|
|
|
|
|
}; |
|
7552
|
|
|
|
|
|
|
|
|
7553
|
|
|
|
|
|
|
static const unsigned char _JJS_RBS_key_offsets[] = { |
|
7554
|
|
|
|
|
|
|
0, 0, 1, 2, 3, 25, 25, 25, |
|
7555
|
|
|
|
|
|
|
31, 44, 50, 56, 67, 73, 79, 85, |
|
7556
|
|
|
|
|
|
|
96, 102, 108, 114, 120, 126, 137, 143, |
|
7557
|
|
|
|
|
|
|
154, 160, 166, 172, 178, 178, 183, 183, |
|
7558
|
|
|
|
|
|
|
183, 184 |
|
7559
|
|
|
|
|
|
|
}; |
|
7560
|
|
|
|
|
|
|
|
|
7561
|
|
|
|
|
|
|
static const char _JJS_RBS_trans_keys[] = { |
|
7562
|
|
|
|
|
|
|
116, 115, 101, 98, 99, 100, 102, 103, |
|
7563
|
|
|
|
|
|
|
104, 105, 106, 107, 108, 109, 110, 112, |
|
7564
|
|
|
|
|
|
|
113, 114, 115, 116, 118, 119, 120, 121, |
|
7565
|
|
|
|
|
|
|
122, 97, 98, 101, 105, 111, 117, 98, |
|
7566
|
|
|
|
|
|
|
99, 100, 105, 111, 117, 122, 97, 101, |
|
7567
|
|
|
|
|
|
|
102, 109, 112, 120, 97, 100, 101, 105, |
|
7568
|
|
|
|
|
|
|
111, 117, 97, 101, 102, 105, 111, 117, |
|
7569
|
|
|
|
|
|
|
97, 101, 103, 105, 111, 117, 122, 98, |
|
7570
|
|
|
|
|
|
|
109, 112, 120, 97, 101, 104, 105, 111, |
|
7571
|
|
|
|
|
|
|
117, 97, 101, 105, 106, 111, 117, 97, |
|
7572
|
|
|
|
|
|
|
101, 105, 107, 111, 117, 97, 101, 105, |
|
7573
|
|
|
|
|
|
|
108, 111, 117, 122, 98, 109, 112, 120, |
|
7574
|
|
|
|
|
|
|
97, 101, 105, 109, 111, 117, 97, 101, |
|
7575
|
|
|
|
|
|
|
105, 110, 111, 117, 97, 101, 105, 111, |
|
7576
|
|
|
|
|
|
|
112, 117, 97, 101, 105, 111, 113, 117, |
|
7577
|
|
|
|
|
|
|
97, 101, 105, 111, 114, 117, 97, 101, |
|
7578
|
|
|
|
|
|
|
105, 111, 115, 117, 122, 98, 109, 112, |
|
7579
|
|
|
|
|
|
|
120, 97, 101, 105, 111, 116, 117, 97, |
|
7580
|
|
|
|
|
|
|
101, 105, 111, 117, 118, 122, 98, 109, |
|
7581
|
|
|
|
|
|
|
112, 120, 97, 101, 105, 111, 117, 119, |
|
7582
|
|
|
|
|
|
|
97, 101, 105, 111, 117, 120, 97, 101, |
|
7583
|
|
|
|
|
|
|
105, 111, 117, 121, 97, 101, 105, 111, |
|
7584
|
|
|
|
|
|
|
117, 122, 97, 101, 105, 111, 117, 101, |
|
7585
|
|
|
|
|
|
|
97, 122, 0 |
|
7586
|
|
|
|
|
|
|
}; |
|
7587
|
|
|
|
|
|
|
|
|
7588
|
|
|
|
|
|
|
static const char _JJS_RBS_single_lengths[] = { |
|
7589
|
|
|
|
|
|
|
0, 1, 1, 1, 22, 0, 0, 6, |
|
7590
|
|
|
|
|
|
|
7, 6, 6, 7, 6, 6, 6, 7, |
|
7591
|
|
|
|
|
|
|
6, 6, 6, 6, 6, 7, 6, 7, |
|
7592
|
|
|
|
|
|
|
6, 6, 6, 6, 0, 5, 0, 0, |
|
7593
|
|
|
|
|
|
|
1, 0 |
|
7594
|
|
|
|
|
|
|
}; |
|
7595
|
|
|
|
|
|
|
|
|
7596
|
|
|
|
|
|
|
static const char _JJS_RBS_range_lengths[] = { |
|
7597
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
7598
|
|
|
|
|
|
|
3, 0, 0, 2, 0, 0, 0, 2, |
|
7599
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 2, 0, 2, |
|
7600
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
7601
|
|
|
|
|
|
|
0, 1 |
|
7602
|
|
|
|
|
|
|
}; |
|
7603
|
|
|
|
|
|
|
|
|
7604
|
|
|
|
|
|
|
static const unsigned char _JJS_RBS_index_offsets[] = { |
|
7605
|
|
|
|
|
|
|
0, 0, 2, 4, 6, 29, 30, 31, |
|
7606
|
|
|
|
|
|
|
38, 49, 56, 63, 73, 80, 87, 94, |
|
7607
|
|
|
|
|
|
|
104, 111, 118, 125, 132, 139, 149, 156, |
|
7608
|
|
|
|
|
|
|
166, 173, 180, 187, 194, 195, 201, 202, |
|
7609
|
|
|
|
|
|
|
203, 205 |
|
7610
|
|
|
|
|
|
|
}; |
|
7611
|
|
|
|
|
|
|
|
|
7612
|
|
|
|
|
|
|
static const char _JJS_RBS_indicies[] = { |
|
7613
|
|
|
|
|
|
|
0, 1, 2, 1, 3, 1, 5, 6, |
|
7614
|
|
|
|
|
|
|
7, 8, 9, 10, 11, 12, 13, 14, |
|
7615
|
|
|
|
|
|
|
15, 16, 17, 18, 19, 20, 21, 22, |
|
7616
|
|
|
|
|
|
|
23, 24, 25, 26, 4, 27, 28, 29, |
|
7617
|
|
|
|
|
|
|
30, 29, 29, 29, 29, 27, 31, 32, |
|
7618
|
|
|
|
|
|
|
31, 29, 29, 29, 31, 29, 31, 31, |
|
7619
|
|
|
|
|
|
|
27, 29, 30, 29, 29, 29, 29, 27, |
|
7620
|
|
|
|
|
|
|
29, 29, 30, 29, 29, 29, 27, 29, |
|
7621
|
|
|
|
|
|
|
29, 32, 29, 29, 29, 31, 31, 31, |
|
7622
|
|
|
|
|
|
|
27, 29, 29, 30, 29, 29, 29, 27, |
|
7623
|
|
|
|
|
|
|
29, 29, 29, 30, 29, 29, 27, 29, |
|
7624
|
|
|
|
|
|
|
29, 29, 30, 29, 29, 27, 29, 29, |
|
7625
|
|
|
|
|
|
|
29, 33, 29, 29, 31, 31, 31, 27, |
|
7626
|
|
|
|
|
|
|
29, 29, 29, 30, 29, 29, 27, 34, |
|
7627
|
|
|
|
|
|
|
34, 34, 30, 34, 34, 27, 34, 34, |
|
7628
|
|
|
|
|
|
|
34, 34, 30, 34, 27, 29, 29, 29, |
|
7629
|
|
|
|
|
|
|
29, 30, 29, 27, 29, 29, 29, 29, |
|
7630
|
|
|
|
|
|
|
30, 29, 27, 29, 29, 29, 29, 32, |
|
7631
|
|
|
|
|
|
|
29, 31, 31, 31, 27, 29, 29, 29, |
|
7632
|
|
|
|
|
|
|
29, 30, 29, 27, 29, 29, 29, 29, |
|
7633
|
|
|
|
|
|
|
29, 32, 31, 31, 31, 27, 34, 34, |
|
7634
|
|
|
|
|
|
|
34, 34, 34, 30, 27, 34, 34, 34, |
|
7635
|
|
|
|
|
|
|
34, 34, 30, 27, 29, 29, 29, 29, |
|
7636
|
|
|
|
|
|
|
29, 30, 27, 29, 29, 29, 29, 29, |
|
7637
|
|
|
|
|
|
|
30, 27, 1, 35, 35, 35, 35, 35, |
|
7638
|
|
|
|
|
|
|
28, 28, 27, 28, 36, 35, 28, 0 |
|
7639
|
|
|
|
|
|
|
}; |
|
7640
|
|
|
|
|
|
|
|
|
7641
|
|
|
|
|
|
|
static const char _JJS_RBS_trans_targs[] = { |
|
7642
|
|
|
|
|
|
|
2, 0, 3, 4, 5, 7, 8, 9, |
|
7643
|
|
|
|
|
|
|
10, 11, 12, 31, 13, 14, 15, 16, |
|
7644
|
|
|
|
|
|
|
17, 18, 19, 20, 21, 22, 23, 24, |
|
7645
|
|
|
|
|
|
|
25, 26, 27, 6, 28, 29, 30, 30, |
|
7646
|
|
|
|
|
|
|
30, 32, 33, 28, 28 |
|
7647
|
|
|
|
|
|
|
}; |
|
7648
|
|
|
|
|
|
|
|
|
7649
|
|
|
|
|
|
|
static const char _JJS_RBS_trans_actions[] = { |
|
7650
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
7651
|
|
|
|
|
|
|
0, 0, 0, 3, 0, 0, 0, 0, |
|
7652
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
7653
|
|
|
|
|
|
|
0, 0, 0, 0, 7, 5, 1, 5, |
|
7654
|
|
|
|
|
|
|
12, 12, 5, 15, 9 |
|
7655
|
|
|
|
|
|
|
}; |
|
7656
|
|
|
|
|
|
|
|
|
7657
|
|
|
|
|
|
|
static const int JJS_RBS_start = 1; |
|
7658
|
|
|
|
|
|
|
|
|
7659
|
0
|
|
|
|
|
|
void english_morpho_guesser::add_JJS_RBS(const string& form, unsigned negation_len, vector& lemmas) const { |
|
7660
|
0
|
|
|
|
|
|
const char* p = form.c_str() + negation_len; int cs; |
|
7661
|
|
|
|
|
|
|
char best = 'z'; unsigned remove = 0; const char* append = nullptr; |
|
7662
|
|
|
|
|
|
|
|
|
7663
|
|
|
|
|
|
|
{ |
|
7664
|
|
|
|
|
|
|
cs = JJS_RBS_start; |
|
7665
|
|
|
|
|
|
|
} |
|
7666
|
|
|
|
|
|
|
|
|
7667
|
|
|
|
|
|
|
{ |
|
7668
|
|
|
|
|
|
|
int _klen; |
|
7669
|
|
|
|
|
|
|
unsigned int _trans; |
|
7670
|
|
|
|
|
|
|
const char *_acts; |
|
7671
|
|
|
|
|
|
|
unsigned int _nacts; |
|
7672
|
|
|
|
|
|
|
const char *_keys; |
|
7673
|
|
|
|
|
|
|
|
|
7674
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
|
7675
|
|
|
|
|
|
|
goto _test_eof; |
|
7676
|
|
|
|
|
|
|
if ( cs == 0 ) |
|
7677
|
|
|
|
|
|
|
goto _out; |
|
7678
|
|
|
|
|
|
|
_resume: |
|
7679
|
0
|
|
|
|
|
|
_keys = _JJS_RBS_trans_keys + _JJS_RBS_key_offsets[cs]; |
|
7680
|
0
|
|
|
|
|
|
_trans = _JJS_RBS_index_offsets[cs]; |
|
7681
|
|
|
|
|
|
|
|
|
7682
|
0
|
|
|
|
|
|
_klen = _JJS_RBS_single_lengths[cs]; |
|
7683
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
7684
|
|
|
|
|
|
|
const char *_lower = _keys; |
|
7685
|
|
|
|
|
|
|
const char *_mid; |
|
7686
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
|
7687
|
|
|
|
|
|
|
while (1) { |
|
7688
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
7689
|
|
|
|
|
|
|
break; |
|
7690
|
|
|
|
|
|
|
|
|
7691
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
|
7692
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < *_mid ) |
|
7693
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
|
7694
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > *_mid ) |
|
7695
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
|
7696
|
|
|
|
|
|
|
else { |
|
7697
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
|
7698
|
0
|
|
|
|
|
|
goto _match; |
|
7699
|
|
|
|
|
|
|
} |
|
7700
|
|
|
|
|
|
|
} |
|
7701
|
0
|
|
|
|
|
|
_keys += _klen; |
|
7702
|
0
|
|
|
|
|
|
_trans += _klen; |
|
7703
|
|
|
|
|
|
|
} |
|
7704
|
|
|
|
|
|
|
|
|
7705
|
0
|
|
|
|
|
|
_klen = _JJS_RBS_range_lengths[cs]; |
|
7706
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
7707
|
|
|
|
|
|
|
const char *_lower = _keys; |
|
7708
|
|
|
|
|
|
|
const char *_mid; |
|
7709
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
|
7710
|
|
|
|
|
|
|
while (1) { |
|
7711
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
7712
|
|
|
|
|
|
|
break; |
|
7713
|
|
|
|
|
|
|
|
|
7714
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
|
7715
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < _mid[0] ) |
|
7716
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
|
7717
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > _mid[1] ) |
|
7718
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
|
7719
|
|
|
|
|
|
|
else { |
|
7720
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
|
7721
|
0
|
|
|
|
|
|
goto _match; |
|
7722
|
|
|
|
|
|
|
} |
|
7723
|
|
|
|
|
|
|
} |
|
7724
|
0
|
|
|
|
|
|
_trans += _klen; |
|
7725
|
|
|
|
|
|
|
} |
|
7726
|
|
|
|
|
|
|
|
|
7727
|
|
|
|
|
|
|
_match: |
|
7728
|
0
|
|
|
|
|
|
_trans = _JJS_RBS_indicies[_trans]; |
|
7729
|
0
|
|
|
|
|
|
cs = _JJS_RBS_trans_targs[_trans]; |
|
7730
|
|
|
|
|
|
|
|
|
7731
|
0
|
0
|
|
|
|
|
if ( _JJS_RBS_trans_actions[_trans] == 0 ) |
|
7732
|
|
|
|
|
|
|
goto _again; |
|
7733
|
|
|
|
|
|
|
|
|
7734
|
0
|
|
|
|
|
|
_acts = _JJS_RBS_actions + _JJS_RBS_trans_actions[_trans]; |
|
7735
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
|
7736
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
|
7737
|
|
|
|
|
|
|
{ |
|
7738
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
|
7739
|
|
|
|
|
|
|
{ |
|
7740
|
|
|
|
|
|
|
case 0: |
|
7741
|
0
|
0
|
|
|
|
|
{ if (best > 'a') best = 'a', remove = 3, append = nullptr; } |
|
7742
|
|
|
|
|
|
|
break; |
|
7743
|
|
|
|
|
|
|
case 1: |
|
7744
|
0
|
0
|
|
|
|
|
{ if (best > 'b') best = 'b', remove = 4, append = nullptr; } |
|
7745
|
|
|
|
|
|
|
break; |
|
7746
|
|
|
|
|
|
|
case 2: |
|
7747
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 4, append = "y"; } |
|
7748
|
|
|
|
|
|
|
break; |
|
7749
|
|
|
|
|
|
|
case 3: |
|
7750
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 3, append = nullptr; } |
|
7751
|
|
|
|
|
|
|
break; |
|
7752
|
|
|
|
|
|
|
case 4: |
|
7753
|
0
|
0
|
|
|
|
|
{ if (best > 'e') best = 'e', remove = 2, append = nullptr; } |
|
7754
|
|
|
|
|
|
|
break; |
|
7755
|
|
|
|
|
|
|
case 5: |
|
7756
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 3, append = nullptr; } |
|
7757
|
|
|
|
|
|
|
break; |
|
7758
|
|
|
|
|
|
|
} |
|
7759
|
|
|
|
|
|
|
} |
|
7760
|
|
|
|
|
|
|
|
|
7761
|
|
|
|
|
|
|
_again: |
|
7762
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
|
7763
|
|
|
|
|
|
|
goto _out; |
|
7764
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form.c_str() + form.size())) ) |
|
7765
|
|
|
|
|
|
|
goto _resume; |
|
7766
|
|
|
|
|
|
|
_test_eof: {} |
|
7767
|
|
|
|
|
|
|
_out: {} |
|
7768
|
|
|
|
|
|
|
} |
|
7769
|
|
|
|
|
|
|
|
|
7770
|
0
|
0
|
|
|
|
|
add(JJS, RBS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
7771
|
0
|
|
|
|
|
|
} |
|
7772
|
|
|
|
|
|
|
|
|
7773
|
|
|
|
|
|
|
} // namespace morphodita |
|
7774
|
|
|
|
|
|
|
|
|
7775
|
|
|
|
|
|
|
///////// |
|
7776
|
|
|
|
|
|
|
// File: morphodita/morpho/external_morpho.h |
|
7777
|
|
|
|
|
|
|
///////// |
|
7778
|
|
|
|
|
|
|
|
|
7779
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
7780
|
|
|
|
|
|
|
// |
|
7781
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
7782
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
7783
|
|
|
|
|
|
|
// |
|
7784
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
7785
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
7786
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
7787
|
|
|
|
|
|
|
|
|
7788
|
|
|
|
|
|
|
namespace morphodita { |
|
7789
|
|
|
|
|
|
|
|
|
7790
|
0
|
|
|
|
|
|
class external_morpho : public morpho { |
|
7791
|
|
|
|
|
|
|
public: |
|
7792
|
0
|
|
|
|
|
|
external_morpho(unsigned version) : version(version) {} |
|
7793
|
|
|
|
|
|
|
|
|
7794
|
|
|
|
|
|
|
virtual int analyze(string_piece form, morpho::guesser_mode guesser, vector& lemmas) const override; |
|
7795
|
|
|
|
|
|
|
virtual int generate(string_piece lemma, const char* tag_wildcard, guesser_mode guesser, vector& forms) const override; |
|
7796
|
|
|
|
|
|
|
virtual int raw_lemma_len(string_piece lemma) const override; |
|
7797
|
|
|
|
|
|
|
virtual int lemma_id_len(string_piece lemma) const override; |
|
7798
|
|
|
|
|
|
|
virtual int raw_form_len(string_piece form) const override; |
|
7799
|
|
|
|
|
|
|
virtual tokenizer* new_tokenizer() const override; |
|
7800
|
|
|
|
|
|
|
|
|
7801
|
|
|
|
|
|
|
bool load(istream& is); |
|
7802
|
|
|
|
|
|
|
|
|
7803
|
|
|
|
|
|
|
private: |
|
7804
|
|
|
|
|
|
|
unsigned version; |
|
7805
|
|
|
|
|
|
|
|
|
7806
|
|
|
|
|
|
|
string unknown_tag; |
|
7807
|
|
|
|
|
|
|
}; |
|
7808
|
|
|
|
|
|
|
|
|
7809
|
|
|
|
|
|
|
} // namespace morphodita |
|
7810
|
|
|
|
|
|
|
|
|
7811
|
|
|
|
|
|
|
///////// |
|
7812
|
|
|
|
|
|
|
// File: morphodita/tokenizer/generic_tokenizer.h |
|
7813
|
|
|
|
|
|
|
///////// |
|
7814
|
|
|
|
|
|
|
|
|
7815
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
7816
|
|
|
|
|
|
|
// |
|
7817
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
7818
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
7819
|
|
|
|
|
|
|
// |
|
7820
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
7821
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
7822
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
7823
|
|
|
|
|
|
|
|
|
7824
|
|
|
|
|
|
|
namespace morphodita { |
|
7825
|
|
|
|
|
|
|
|
|
7826
|
6
|
|
|
|
|
|
class generic_tokenizer : public ragel_tokenizer { |
|
7827
|
|
|
|
|
|
|
public: |
|
7828
|
|
|
|
|
|
|
enum { LATEST = 2 }; |
|
7829
|
|
|
|
|
|
|
generic_tokenizer(unsigned version); |
|
7830
|
|
|
|
|
|
|
|
|
7831
|
|
|
|
|
|
|
virtual bool next_sentence(vector& tokens) override; |
|
7832
|
|
|
|
|
|
|
}; |
|
7833
|
|
|
|
|
|
|
|
|
7834
|
|
|
|
|
|
|
} // namespace morphodita |
|
7835
|
|
|
|
|
|
|
|
|
7836
|
|
|
|
|
|
|
///////// |
|
7837
|
|
|
|
|
|
|
// File: morphodita/morpho/external_morpho.cpp |
|
7838
|
|
|
|
|
|
|
///////// |
|
7839
|
|
|
|
|
|
|
|
|
7840
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
7841
|
|
|
|
|
|
|
// |
|
7842
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
7843
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
7844
|
|
|
|
|
|
|
// |
|
7845
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
7846
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
7847
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
7848
|
|
|
|
|
|
|
|
|
7849
|
|
|
|
|
|
|
namespace morphodita { |
|
7850
|
|
|
|
|
|
|
|
|
7851
|
0
|
|
|
|
|
|
bool external_morpho::load(istream& is) { |
|
7852
|
|
|
|
|
|
|
binary_decoder data; |
|
7853
|
0
|
0
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
|
0
|
|
|
|
|
|
|
7854
|
|
|
|
|
|
|
|
|
7855
|
|
|
|
|
|
|
try { |
|
7856
|
|
|
|
|
|
|
// Load unknown_tag |
|
7857
|
0
|
0
|
|
|
|
|
unsigned length = data.next_1B(); |
|
7858
|
0
|
0
|
|
|
|
|
unknown_tag.assign(data.next(length), length); |
|
|
|
0
|
|
|
|
|
|
|
7859
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
|
7860
|
|
|
|
|
|
|
return false; |
|
7861
|
|
|
|
|
|
|
} |
|
7862
|
|
|
|
|
|
|
|
|
7863
|
0
|
|
|
|
|
|
return data.is_end(); |
|
7864
|
|
|
|
|
|
|
} |
|
7865
|
|
|
|
|
|
|
|
|
7866
|
0
|
|
|
|
|
|
int external_morpho::analyze(string_piece form, guesser_mode /*guesser*/, vector& lemmas) const { |
|
7867
|
|
|
|
|
|
|
lemmas.clear(); |
|
7868
|
|
|
|
|
|
|
|
|
7869
|
0
|
0
|
|
|
|
|
if (form.len) { |
|
7870
|
|
|
|
|
|
|
// Start by skipping the first form |
|
7871
|
|
|
|
|
|
|
string_piece lemmatags = form; |
|
7872
|
0
|
0
|
|
|
|
|
while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++; |
|
|
|
0
|
|
|
|
|
|
|
7873
|
0
|
0
|
|
|
|
|
if (lemmatags.len) lemmatags.len--, lemmatags.str++; |
|
7874
|
|
|
|
|
|
|
|
|
7875
|
|
|
|
|
|
|
// Split lemmatags using ' ' into lemma-tag pairs. |
|
7876
|
0
|
0
|
|
|
|
|
while (lemmatags.len) { |
|
7877
|
|
|
|
|
|
|
auto lemma_start = lemmatags.str; |
|
7878
|
0
|
0
|
|
|
|
|
while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++; |
|
|
|
0
|
|
|
|
|
|
|
7879
|
0
|
0
|
|
|
|
|
if (!lemmatags.len) break; |
|
7880
|
|
|
|
|
|
|
auto lemma_len = lemmatags.str - lemma_start; |
|
7881
|
0
|
|
|
|
|
|
lemmatags.len--, lemmatags.str++; |
|
7882
|
|
|
|
|
|
|
|
|
7883
|
|
|
|
|
|
|
auto tag_start = lemmatags.str; |
|
7884
|
0
|
0
|
|
|
|
|
while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++; |
|
|
|
0
|
|
|
|
|
|
|
7885
|
|
|
|
|
|
|
auto tag_len = lemmatags.str - tag_start; |
|
7886
|
0
|
0
|
|
|
|
|
if (lemmatags.len) lemmatags.len--, lemmatags.str++; |
|
7887
|
|
|
|
|
|
|
|
|
7888
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(lemma_start, lemma_len), string(tag_start, tag_len)); |
|
7889
|
|
|
|
|
|
|
} |
|
7890
|
|
|
|
|
|
|
|
|
7891
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) return NO_GUESSER; |
|
7892
|
|
|
|
|
|
|
} |
|
7893
|
|
|
|
|
|
|
|
|
7894
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), unknown_tag); |
|
7895
|
0
|
|
|
|
|
|
return -1; |
|
7896
|
|
|
|
|
|
|
} |
|
7897
|
|
|
|
|
|
|
|
|
7898
|
0
|
|
|
|
|
|
int external_morpho::generate(string_piece lemma, const char* tag_wildcard, morpho::guesser_mode /*guesser*/, vector& forms) const { |
|
7899
|
|
|
|
|
|
|
forms.clear(); |
|
7900
|
|
|
|
|
|
|
|
|
7901
|
0
|
|
|
|
|
|
tag_filter filter(tag_wildcard); |
|
7902
|
|
|
|
|
|
|
|
|
7903
|
0
|
0
|
|
|
|
|
if (lemma.len) { |
|
7904
|
|
|
|
|
|
|
// Start by locating the lemma |
|
7905
|
|
|
|
|
|
|
string_piece formtags = lemma; |
|
7906
|
0
|
0
|
|
|
|
|
while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++; |
|
|
|
0
|
|
|
|
|
|
|
7907
|
0
|
|
|
|
|
|
string_piece real_lemma(lemma.str, lemma.len - formtags.len); |
|
7908
|
0
|
0
|
|
|
|
|
if (formtags.len) formtags.len--, formtags.str++; |
|
7909
|
|
|
|
|
|
|
|
|
7910
|
|
|
|
|
|
|
// Split formtags using ' ' into form-tag pairs. |
|
7911
|
|
|
|
|
|
|
bool any_result = false; |
|
7912
|
0
|
0
|
|
|
|
|
while (formtags.len) { |
|
7913
|
|
|
|
|
|
|
auto form_start = formtags.str; |
|
7914
|
0
|
0
|
|
|
|
|
while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++; |
|
|
|
0
|
|
|
|
|
|
|
7915
|
0
|
0
|
|
|
|
|
if (!formtags.len) break; |
|
7916
|
|
|
|
|
|
|
auto form_len = formtags.str - form_start; |
|
7917
|
0
|
|
|
|
|
|
formtags.len--, formtags.str++; |
|
7918
|
|
|
|
|
|
|
|
|
7919
|
|
|
|
|
|
|
auto tag_start = formtags.str; |
|
7920
|
0
|
0
|
|
|
|
|
while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++; |
|
|
|
0
|
|
|
|
|
|
|
7921
|
|
|
|
|
|
|
auto tag_len = formtags.str - tag_start; |
|
7922
|
0
|
0
|
|
|
|
|
if (formtags.len) formtags.len--, formtags.str++; |
|
7923
|
|
|
|
|
|
|
|
|
7924
|
|
|
|
|
|
|
any_result = true; |
|
7925
|
|
|
|
|
|
|
string tag(tag_start, tag_len); |
|
7926
|
0
|
0
|
|
|
|
|
if (filter.matches(tag.c_str())) { |
|
7927
|
0
|
0
|
|
|
|
|
if (forms.empty()) forms.emplace_back(string(real_lemma.str, real_lemma.len)); |
|
|
|
0
|
|
|
|
|
|
|
7928
|
0
|
0
|
|
|
|
|
forms.back().forms.emplace_back(string(form_start, form_len), tag); |
|
7929
|
|
|
|
|
|
|
} |
|
7930
|
|
|
|
|
|
|
} |
|
7931
|
|
|
|
|
|
|
|
|
7932
|
0
|
0
|
|
|
|
|
if (any_result) return NO_GUESSER; |
|
7933
|
|
|
|
|
|
|
} |
|
7934
|
|
|
|
|
|
|
|
|
7935
|
|
|
|
|
|
|
return -1; |
|
7936
|
|
|
|
|
|
|
} |
|
7937
|
|
|
|
|
|
|
|
|
7938
|
0
|
|
|
|
|
|
int external_morpho::raw_lemma_len(string_piece lemma) const { |
|
7939
|
|
|
|
|
|
|
unsigned lemma_len = 0; |
|
7940
|
0
|
0
|
|
|
|
|
while (lemma_len < lemma.len && lemma.str[lemma_len] != ' ') lemma_len++; |
|
|
|
0
|
|
|
|
|
|
|
7941
|
0
|
|
|
|
|
|
return lemma_len; |
|
7942
|
|
|
|
|
|
|
} |
|
7943
|
|
|
|
|
|
|
|
|
7944
|
0
|
|
|
|
|
|
int external_morpho::lemma_id_len(string_piece lemma) const { |
|
7945
|
|
|
|
|
|
|
unsigned lemma_len = 0; |
|
7946
|
0
|
0
|
|
|
|
|
while (lemma_len < lemma.len && lemma.str[lemma_len] != ' ') lemma_len++; |
|
|
|
0
|
|
|
|
|
|
|
7947
|
0
|
|
|
|
|
|
return lemma_len; |
|
7948
|
|
|
|
|
|
|
} |
|
7949
|
|
|
|
|
|
|
|
|
7950
|
0
|
|
|
|
|
|
int external_morpho::raw_form_len(string_piece form) const { |
|
7951
|
|
|
|
|
|
|
unsigned form_len = 0; |
|
7952
|
0
|
0
|
|
|
|
|
while (form_len < form.len && form.str[form_len] != ' ') form_len++; |
|
|
|
0
|
|
|
|
|
|
|
7953
|
0
|
|
|
|
|
|
return form_len; |
|
7954
|
|
|
|
|
|
|
} |
|
7955
|
|
|
|
|
|
|
|
|
7956
|
0
|
|
|
|
|
|
tokenizer* external_morpho::new_tokenizer() const { |
|
7957
|
0
|
|
|
|
|
|
return new generic_tokenizer(version); |
|
7958
|
|
|
|
|
|
|
} |
|
7959
|
|
|
|
|
|
|
|
|
7960
|
|
|
|
|
|
|
} // namespace morphodita |
|
7961
|
|
|
|
|
|
|
|
|
7962
|
|
|
|
|
|
|
///////// |
|
7963
|
|
|
|
|
|
|
// File: morphodita/morpho/generic_lemma_addinfo.h |
|
7964
|
|
|
|
|
|
|
///////// |
|
7965
|
|
|
|
|
|
|
|
|
7966
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
7967
|
|
|
|
|
|
|
// |
|
7968
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
7969
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
7970
|
|
|
|
|
|
|
// |
|
7971
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
7972
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
7973
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
7974
|
|
|
|
|
|
|
|
|
7975
|
|
|
|
|
|
|
namespace morphodita { |
|
7976
|
|
|
|
|
|
|
|
|
7977
|
|
|
|
|
|
|
// Declarations |
|
7978
|
0
|
|
|
|
|
|
struct generic_lemma_addinfo { |
|
7979
|
|
|
|
|
|
|
inline static int raw_lemma_len(string_piece lemma); |
|
7980
|
|
|
|
|
|
|
inline static int lemma_id_len(string_piece lemma); |
|
7981
|
|
|
|
|
|
|
inline static string format(const unsigned char* addinfo, int addinfo_len); |
|
7982
|
|
|
|
|
|
|
inline static bool generatable(const unsigned char* addinfo, int addinfo_len); |
|
7983
|
|
|
|
|
|
|
|
|
7984
|
|
|
|
|
|
|
inline int parse(string_piece lemma, bool die_on_failure = false); |
|
7985
|
|
|
|
|
|
|
inline bool match_lemma_id(const unsigned char* other_addinfo, int other_addinfo_len); |
|
7986
|
|
|
|
|
|
|
|
|
7987
|
|
|
|
|
|
|
vector data; |
|
7988
|
|
|
|
|
|
|
}; |
|
7989
|
|
|
|
|
|
|
|
|
7990
|
|
|
|
|
|
|
// Definitions |
|
7991
|
|
|
|
|
|
|
int generic_lemma_addinfo::raw_lemma_len(string_piece lemma) { |
|
7992
|
18
|
|
|
|
|
|
return lemma.len; |
|
7993
|
|
|
|
|
|
|
} |
|
7994
|
|
|
|
|
|
|
|
|
7995
|
|
|
|
|
|
|
int generic_lemma_addinfo::lemma_id_len(string_piece lemma) { |
|
7996
|
7
|
|
|
|
|
|
return lemma.len; |
|
7997
|
|
|
|
|
|
|
} |
|
7998
|
|
|
|
|
|
|
|
|
7999
|
|
|
|
|
|
|
string generic_lemma_addinfo::format(const unsigned char* /*addinfo*/, int /*addinfo_len*/) { |
|
8000
|
|
|
|
|
|
|
return string(); |
|
8001
|
|
|
|
|
|
|
} |
|
8002
|
|
|
|
|
|
|
|
|
8003
|
|
|
|
|
|
|
bool generic_lemma_addinfo::generatable(const unsigned char* /*addinfo*/, int /*addinfo_len*/) { |
|
8004
|
|
|
|
|
|
|
return true; |
|
8005
|
|
|
|
|
|
|
} |
|
8006
|
|
|
|
|
|
|
|
|
8007
|
|
|
|
|
|
|
int generic_lemma_addinfo::parse(string_piece lemma, bool /*die_on_failure*/) { |
|
8008
|
0
|
|
|
|
|
|
return lemma.len; |
|
8009
|
|
|
|
|
|
|
} |
|
8010
|
|
|
|
|
|
|
|
|
8011
|
|
|
|
|
|
|
bool generic_lemma_addinfo::match_lemma_id(const unsigned char* /*other_addinfo*/, int /*other_addinfo_len*/) { |
|
8012
|
|
|
|
|
|
|
return true; |
|
8013
|
|
|
|
|
|
|
} |
|
8014
|
|
|
|
|
|
|
|
|
8015
|
|
|
|
|
|
|
} // namespace morphodita |
|
8016
|
|
|
|
|
|
|
|
|
8017
|
|
|
|
|
|
|
///////// |
|
8018
|
|
|
|
|
|
|
// File: morphodita/morpho/generic_morpho.h |
|
8019
|
|
|
|
|
|
|
///////// |
|
8020
|
|
|
|
|
|
|
|
|
8021
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
8022
|
|
|
|
|
|
|
// |
|
8023
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
8024
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
8025
|
|
|
|
|
|
|
// |
|
8026
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
8027
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
8028
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
8029
|
|
|
|
|
|
|
|
|
8030
|
|
|
|
|
|
|
namespace morphodita { |
|
8031
|
|
|
|
|
|
|
|
|
8032
|
0
|
|
|
|
|
|
class generic_morpho : public morpho { |
|
8033
|
|
|
|
|
|
|
public: |
|
8034
|
1
|
|
|
|
|
|
generic_morpho(unsigned version) : version(version) {} |
|
8035
|
|
|
|
|
|
|
|
|
8036
|
|
|
|
|
|
|
virtual int analyze(string_piece form, morpho::guesser_mode guesser, vector& lemmas) const override; |
|
8037
|
|
|
|
|
|
|
virtual int generate(string_piece lemma, const char* tag_wildcard, guesser_mode guesser, vector& forms) const override; |
|
8038
|
|
|
|
|
|
|
virtual int raw_lemma_len(string_piece lemma) const override; |
|
8039
|
|
|
|
|
|
|
virtual int lemma_id_len(string_piece lemma) const override; |
|
8040
|
|
|
|
|
|
|
virtual int raw_form_len(string_piece form) const override; |
|
8041
|
|
|
|
|
|
|
virtual tokenizer* new_tokenizer() const override; |
|
8042
|
|
|
|
|
|
|
|
|
8043
|
|
|
|
|
|
|
bool load(istream& is); |
|
8044
|
|
|
|
|
|
|
private: |
|
8045
|
|
|
|
|
|
|
inline void analyze_special(string_piece form, vector& lemmas) const; |
|
8046
|
|
|
|
|
|
|
|
|
8047
|
|
|
|
|
|
|
unsigned version; |
|
8048
|
|
|
|
|
|
|
morpho_dictionary dictionary; |
|
8049
|
|
|
|
|
|
|
unique_ptr statistical_guesser; |
|
8050
|
|
|
|
|
|
|
|
|
8051
|
|
|
|
|
|
|
string unknown_tag, number_tag, punctuation_tag, symbol_tag; |
|
8052
|
|
|
|
|
|
|
}; |
|
8053
|
|
|
|
|
|
|
|
|
8054
|
|
|
|
|
|
|
} // namespace morphodita |
|
8055
|
|
|
|
|
|
|
|
|
8056
|
|
|
|
|
|
|
///////// |
|
8057
|
|
|
|
|
|
|
// File: morphodita/morpho/generic_morpho.cpp |
|
8058
|
|
|
|
|
|
|
///////// |
|
8059
|
|
|
|
|
|
|
|
|
8060
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
8061
|
|
|
|
|
|
|
// |
|
8062
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
8063
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
8064
|
|
|
|
|
|
|
// |
|
8065
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
8066
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
8067
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
8068
|
|
|
|
|
|
|
|
|
8069
|
|
|
|
|
|
|
namespace morphodita { |
|
8070
|
|
|
|
|
|
|
|
|
8071
|
1
|
|
|
|
|
|
bool generic_morpho::load(istream& is) { |
|
8072
|
|
|
|
|
|
|
binary_decoder data; |
|
8073
|
1
|
50
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
|
50
|
|
|
|
|
|
|
8074
|
|
|
|
|
|
|
|
|
8075
|
|
|
|
|
|
|
try { |
|
8076
|
|
|
|
|
|
|
// Load tags |
|
8077
|
1
|
50
|
|
|
|
|
unsigned length = data.next_1B(); |
|
8078
|
1
|
50
|
|
|
|
|
unknown_tag.assign(data.next(length), length); |
|
8079
|
1
|
50
|
|
|
|
|
length = data.next_1B(); |
|
8080
|
1
|
50
|
|
|
|
|
number_tag.assign(data.next(length), length); |
|
8081
|
1
|
50
|
|
|
|
|
length = data.next_1B(); |
|
8082
|
1
|
50
|
|
|
|
|
punctuation_tag.assign(data.next(length), length); |
|
8083
|
1
|
50
|
|
|
|
|
length = data.next_1B(); |
|
8084
|
1
|
50
|
|
|
|
|
symbol_tag.assign(data.next(length), length); |
|
8085
|
|
|
|
|
|
|
|
|
8086
|
|
|
|
|
|
|
// Load dictionary |
|
8087
|
1
|
50
|
|
|
|
|
dictionary.load(data); |
|
8088
|
|
|
|
|
|
|
|
|
8089
|
|
|
|
|
|
|
// Optionally statistical guesser if present |
|
8090
|
|
|
|
|
|
|
statistical_guesser.reset(); |
|
8091
|
1
|
50
|
|
|
|
|
if (data.next_1B()) { |
|
|
|
50
|
|
|
|
|
|
|
8092
|
0
|
0
|
|
|
|
|
statistical_guesser.reset(new morpho_statistical_guesser()); |
|
8093
|
0
|
0
|
|
|
|
|
statistical_guesser->load(data); |
|
8094
|
|
0
|
|
|
|
|
} |
|
8095
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
|
8096
|
|
|
|
|
|
|
return false; |
|
8097
|
|
|
|
|
|
|
} |
|
8098
|
|
|
|
|
|
|
|
|
8099
|
1
|
|
|
|
|
|
return data.is_end(); |
|
8100
|
|
|
|
|
|
|
} |
|
8101
|
|
|
|
|
|
|
|
|
8102
|
14
|
|
|
|
|
|
int generic_morpho::analyze(string_piece form, guesser_mode guesser, vector& lemmas) const { |
|
8103
|
|
|
|
|
|
|
lemmas.clear(); |
|
8104
|
|
|
|
|
|
|
|
|
8105
|
14
|
50
|
|
|
|
|
if (form.len) { |
|
8106
|
|
|
|
|
|
|
// Generate all casing variants if needed (they are different than given form). |
|
8107
|
|
|
|
|
|
|
string form_uclc; // first uppercase, rest lowercase |
|
8108
|
|
|
|
|
|
|
string form_lc; // all lowercase |
|
8109
|
14
|
50
|
|
|
|
|
generate_casing_variants(form, form_uclc, form_lc); |
|
8110
|
|
|
|
|
|
|
|
|
8111
|
|
|
|
|
|
|
// Start by analysing using the dictionary and all casing variants. |
|
8112
|
14
|
50
|
|
|
|
|
dictionary.analyze(form, lemmas); |
|
8113
|
14
|
50
|
|
|
|
|
if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas); |
|
|
|
0
|
|
|
|
|
|
|
8114
|
14
|
100
|
|
|
|
|
if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas); |
|
|
|
50
|
|
|
|
|
|
|
8115
|
14
|
100
|
|
|
|
|
if (!lemmas.empty()) return NO_GUESSER; |
|
8116
|
|
|
|
|
|
|
|
|
8117
|
|
|
|
|
|
|
// Then call analyze_special to handle numbers, punctuation and symbols. |
|
8118
|
4
|
50
|
|
|
|
|
analyze_special(form, lemmas); |
|
8119
|
4
|
50
|
|
|
|
|
if (!lemmas.empty()) return NO_GUESSER; |
|
8120
|
|
|
|
|
|
|
|
|
8121
|
|
|
|
|
|
|
// For the statistical guesser, use all casing variants. |
|
8122
|
0
|
0
|
|
|
|
|
if (guesser == GUESSER && statistical_guesser) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8123
|
0
|
0
|
|
|
|
|
if (form_uclc.empty() && form_lc.empty()) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8124
|
0
|
0
|
|
|
|
|
statistical_guesser->analyze(form, lemmas, nullptr); |
|
8125
|
|
|
|
|
|
|
else { |
|
8126
|
0
|
0
|
|
|
|
|
morpho_statistical_guesser::used_rules used_rules; used_rules.reserve(3); |
|
8127
|
0
|
0
|
|
|
|
|
statistical_guesser->analyze(form, lemmas, &used_rules); |
|
8128
|
0
|
0
|
|
|
|
|
if (!form_uclc.empty()) statistical_guesser->analyze(form_uclc, lemmas, &used_rules); |
|
|
|
0
|
|
|
|
|
|
|
8129
|
0
|
0
|
|
|
|
|
if (!form_lc.empty()) statistical_guesser->analyze(form_lc, lemmas, &used_rules); |
|
|
|
0
|
|
|
|
|
|
|
8130
|
|
|
|
|
|
|
} |
|
8131
|
|
|
|
|
|
|
} |
|
8132
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) return GUESSER; |
|
8133
|
|
|
|
|
|
|
} |
|
8134
|
|
|
|
|
|
|
|
|
8135
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), unknown_tag); |
|
8136
|
14
|
|
|
|
|
|
return -1; |
|
8137
|
|
|
|
|
|
|
} |
|
8138
|
|
|
|
|
|
|
|
|
8139
|
0
|
|
|
|
|
|
int generic_morpho::generate(string_piece lemma, const char* tag_wildcard, morpho::guesser_mode /*guesser*/, vector& forms) const { |
|
8140
|
|
|
|
|
|
|
forms.clear(); |
|
8141
|
|
|
|
|
|
|
|
|
8142
|
0
|
|
|
|
|
|
tag_filter filter(tag_wildcard); |
|
8143
|
|
|
|
|
|
|
|
|
8144
|
0
|
0
|
|
|
|
|
if (lemma.len) { |
|
8145
|
0
|
0
|
|
|
|
|
if (dictionary.generate(lemma, filter, forms)) |
|
|
|
0
|
|
|
|
|
|
|
8146
|
|
|
|
|
|
|
return NO_GUESSER; |
|
8147
|
|
|
|
|
|
|
} |
|
8148
|
|
|
|
|
|
|
|
|
8149
|
|
|
|
|
|
|
return -1; |
|
8150
|
|
|
|
|
|
|
} |
|
8151
|
|
|
|
|
|
|
|
|
8152
|
18
|
|
|
|
|
|
int generic_morpho::raw_lemma_len(string_piece lemma) const { |
|
8153
|
18
|
|
|
|
|
|
return generic_lemma_addinfo::raw_lemma_len(lemma); |
|
8154
|
|
|
|
|
|
|
} |
|
8155
|
|
|
|
|
|
|
|
|
8156
|
7
|
|
|
|
|
|
int generic_morpho::lemma_id_len(string_piece lemma) const { |
|
8157
|
7
|
|
|
|
|
|
return generic_lemma_addinfo::lemma_id_len(lemma); |
|
8158
|
|
|
|
|
|
|
} |
|
8159
|
|
|
|
|
|
|
|
|
8160
|
14
|
|
|
|
|
|
int generic_morpho::raw_form_len(string_piece form) const { |
|
8161
|
14
|
|
|
|
|
|
return form.len; |
|
8162
|
|
|
|
|
|
|
} |
|
8163
|
|
|
|
|
|
|
|
|
8164
|
0
|
|
|
|
|
|
tokenizer* generic_morpho::new_tokenizer() const { |
|
8165
|
0
|
|
|
|
|
|
return new generic_tokenizer(version); |
|
8166
|
|
|
|
|
|
|
} |
|
8167
|
|
|
|
|
|
|
|
|
8168
|
4
|
|
|
|
|
|
void generic_morpho::analyze_special(string_piece form, vector& lemmas) const { |
|
8169
|
|
|
|
|
|
|
using namespace unilib; |
|
8170
|
|
|
|
|
|
|
|
|
8171
|
|
|
|
|
|
|
// Analyzer for numbers, punctuation and symbols. |
|
8172
|
|
|
|
|
|
|
// Number is anything matching [+-]? is_Pn* ([.,] is_Pn*)? ([Ee] [+-]? is_Pn+)? for at least one is_Pn* nonempty. |
|
8173
|
|
|
|
|
|
|
// Punctuation is any form beginning with either unicode punctuation or punctuation_exceptions character. |
|
8174
|
|
|
|
|
|
|
// Beware that numbers takes precedence, so - is punctuation, -3 is number, -. is punctuation, -.3 is number. |
|
8175
|
8
|
50
|
|
|
|
|
if (!form.len) return; |
|
8176
|
|
|
|
|
|
|
|
|
8177
|
4
|
|
|
|
|
|
string_piece number = form; |
|
8178
|
4
|
|
|
|
|
|
char32_t first = utf8::decode(number.str, number.len); |
|
8179
|
|
|
|
|
|
|
|
|
8180
|
|
|
|
|
|
|
// Try matching a number. |
|
8181
|
|
|
|
|
|
|
char32_t codepoint = first; |
|
8182
|
|
|
|
|
|
|
bool any_digit = false; |
|
8183
|
4
|
50
|
|
|
|
|
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len); |
|
8184
|
4
|
50
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
|
8185
|
4
|
50
|
|
|
|
|
if ((codepoint == '.' && number.len) || codepoint == ',') codepoint = utf8::decode(number.str, number.len); |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
8186
|
4
|
50
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
|
8187
|
4
|
50
|
|
|
|
|
if (any_digit && (codepoint == 'e' || codepoint == 'E')) { |
|
|
|
0
|
|
|
|
|
|
|
8188
|
0
|
|
|
|
|
|
codepoint = utf8::decode(number.str, number.len); |
|
8189
|
0
|
0
|
|
|
|
|
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len); |
|
8190
|
|
|
|
|
|
|
any_digit = false; |
|
8191
|
0
|
0
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
|
8192
|
|
|
|
|
|
|
} |
|
8193
|
|
|
|
|
|
|
|
|
8194
|
4
|
50
|
|
|
|
|
if (any_digit && !number.len && (!codepoint || codepoint == '.')) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8195
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), number_tag); |
|
8196
|
0
|
|
|
|
|
|
return; |
|
8197
|
|
|
|
|
|
|
} |
|
8198
|
|
|
|
|
|
|
|
|
8199
|
|
|
|
|
|
|
// Try matching punctuation or symbol. |
|
8200
|
|
|
|
|
|
|
bool punctuation = true, symbol = true; |
|
8201
|
8
|
|
|
|
|
|
string_piece form_ori = form; |
|
8202
|
8
|
100
|
|
|
|
|
while (form.len) { |
|
8203
|
4
|
|
|
|
|
|
codepoint = utf8::decode(form.str, form.len); |
|
8204
|
8
|
50
|
|
|
|
|
punctuation = punctuation && unicode::category(codepoint) & unicode::P; |
|
|
|
50
|
|
|
|
|
|
|
8205
|
8
|
50
|
|
|
|
|
symbol = symbol && unicode::category(codepoint) & unicode::S; |
|
|
|
50
|
|
|
|
|
|
|
8206
|
|
|
|
|
|
|
} |
|
8207
|
4
|
50
|
|
|
|
|
if (punctuation) |
|
8208
|
8
|
50
|
|
|
|
|
lemmas.emplace_back(string(form_ori.str, form_ori.len), punctuation_tag); |
|
8209
|
0
|
0
|
|
|
|
|
else if (symbol) |
|
8210
|
4
|
0
|
|
|
|
|
lemmas.emplace_back(string(form_ori.str, form_ori.len), symbol_tag); |
|
8211
|
|
|
|
|
|
|
} |
|
8212
|
|
|
|
|
|
|
|
|
8213
|
|
|
|
|
|
|
} // namespace morphodita |
|
8214
|
|
|
|
|
|
|
|
|
8215
|
|
|
|
|
|
|
///////// |
|
8216
|
|
|
|
|
|
|
// File: morphodita/morpho/morpho_ids.h |
|
8217
|
|
|
|
|
|
|
///////// |
|
8218
|
|
|
|
|
|
|
|
|
8219
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
8220
|
|
|
|
|
|
|
// |
|
8221
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
8222
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
8223
|
|
|
|
|
|
|
// |
|
8224
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
8225
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
8226
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
8227
|
|
|
|
|
|
|
|
|
8228
|
|
|
|
|
|
|
namespace morphodita { |
|
8229
|
|
|
|
|
|
|
|
|
8230
|
|
|
|
|
|
|
class morpho_ids { |
|
8231
|
|
|
|
|
|
|
public: |
|
8232
|
|
|
|
|
|
|
enum morpho_id { |
|
8233
|
|
|
|
|
|
|
CZECH = 0, |
|
8234
|
|
|
|
|
|
|
ENGLISH_V1 = 1, |
|
8235
|
|
|
|
|
|
|
GENERIC = 2, |
|
8236
|
|
|
|
|
|
|
EXTERNAL = 3, |
|
8237
|
|
|
|
|
|
|
ENGLISH_V2 = 4, |
|
8238
|
|
|
|
|
|
|
ENGLISH_V3 = 5, ENGLISH = ENGLISH_V3, |
|
8239
|
|
|
|
|
|
|
SLOVAK_PDT = 6, |
|
8240
|
|
|
|
|
|
|
DERIVATOR_DICTIONARY = 7, |
|
8241
|
|
|
|
|
|
|
}; |
|
8242
|
|
|
|
|
|
|
|
|
8243
|
|
|
|
|
|
|
static bool parse(const string& str, morpho_id& id) { |
|
8244
|
|
|
|
|
|
|
if (str == "czech") return id = CZECH, true; |
|
8245
|
|
|
|
|
|
|
if (str == "english") return id = ENGLISH, true; |
|
8246
|
|
|
|
|
|
|
if (str == "external") return id = EXTERNAL, true; |
|
8247
|
|
|
|
|
|
|
if (str == "generic") return id = GENERIC, true; |
|
8248
|
|
|
|
|
|
|
if (str == "slovak_pdt") return id = SLOVAK_PDT, true; |
|
8249
|
|
|
|
|
|
|
return false; |
|
8250
|
|
|
|
|
|
|
} |
|
8251
|
|
|
|
|
|
|
}; |
|
8252
|
|
|
|
|
|
|
|
|
8253
|
|
|
|
|
|
|
typedef morpho_ids::morpho_id morpho_id; |
|
8254
|
|
|
|
|
|
|
|
|
8255
|
|
|
|
|
|
|
} // namespace morphodita |
|
8256
|
|
|
|
|
|
|
|
|
8257
|
|
|
|
|
|
|
///////// |
|
8258
|
|
|
|
|
|
|
// File: utils/new_unique_ptr.h |
|
8259
|
|
|
|
|
|
|
///////// |
|
8260
|
|
|
|
|
|
|
|
|
8261
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
|
8262
|
|
|
|
|
|
|
// |
|
8263
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
8264
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
8265
|
|
|
|
|
|
|
// |
|
8266
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
8267
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
8268
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
8269
|
|
|
|
|
|
|
|
|
8270
|
|
|
|
|
|
|
namespace utils { |
|
8271
|
|
|
|
|
|
|
|
|
8272
|
|
|
|
|
|
|
template |
|
8273
|
2
|
|
|
|
|
|
unique_ptr new_unique_ptr(Args&&... args) { |
|
8274
|
2
|
0
|
|
|
|
|
return unique_ptr(new T(std::forward(args)...)); |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8275
|
|
|
|
|
|
|
} |
|
8276
|
|
|
|
|
|
|
|
|
8277
|
|
|
|
|
|
|
} // namespace utils |
|
8278
|
|
|
|
|
|
|
|
|
8279
|
|
|
|
|
|
|
///////// |
|
8280
|
|
|
|
|
|
|
// File: morphodita/morpho/morpho.cpp |
|
8281
|
|
|
|
|
|
|
///////// |
|
8282
|
|
|
|
|
|
|
|
|
8283
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
8284
|
|
|
|
|
|
|
// |
|
8285
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
8286
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
8287
|
|
|
|
|
|
|
// |
|
8288
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
8289
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
8290
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
8291
|
|
|
|
|
|
|
|
|
8292
|
|
|
|
|
|
|
namespace morphodita { |
|
8293
|
|
|
|
|
|
|
|
|
8294
|
1
|
|
|
|
|
|
morpho* morpho::load(istream& is) { |
|
8295
|
1
|
|
|
|
|
|
morpho_id id = morpho_id(is.get()); |
|
8296
|
1
|
|
|
|
|
|
switch (id) { |
|
8297
|
|
|
|
|
|
|
case morpho_ids::CZECH: |
|
8298
|
|
|
|
|
|
|
{ |
|
8299
|
0
|
|
|
|
|
|
auto res = new_unique_ptr(czech_morpho::morpho_language::CZECH, 1); |
|
8300
|
0
|
0
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
|
0
|
|
|
|
|
|
|
8301
|
|
|
|
|
|
|
break; |
|
8302
|
|
|
|
|
|
|
} |
|
8303
|
|
|
|
|
|
|
case morpho_ids::ENGLISH_V1: |
|
8304
|
|
|
|
|
|
|
case morpho_ids::ENGLISH_V2: |
|
8305
|
|
|
|
|
|
|
case morpho_ids::ENGLISH_V3: |
|
8306
|
|
|
|
|
|
|
{ |
|
8307
|
|
|
|
|
|
|
auto res = new_unique_ptr(id == morpho_ids::ENGLISH_V1 ? 1 : |
|
8308
|
|
|
|
|
|
|
id == morpho_ids::ENGLISH_V2 ? 2 : |
|
8309
|
0
|
0
|
|
|
|
|
3); |
|
|
|
0
|
|
|
|
|
|
|
8310
|
0
|
0
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
|
0
|
|
|
|
|
|
|
8311
|
|
|
|
|
|
|
break; |
|
8312
|
|
|
|
|
|
|
} |
|
8313
|
|
|
|
|
|
|
case morpho_ids::EXTERNAL: |
|
8314
|
|
|
|
|
|
|
{ |
|
8315
|
0
|
|
|
|
|
|
auto res = new_unique_ptr(1); |
|
8316
|
0
|
0
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
|
0
|
|
|
|
|
|
|
8317
|
|
|
|
|
|
|
break; |
|
8318
|
|
|
|
|
|
|
} |
|
8319
|
|
|
|
|
|
|
case morpho_ids::GENERIC: |
|
8320
|
|
|
|
|
|
|
{ |
|
8321
|
1
|
|
|
|
|
|
auto res = new_unique_ptr(1); |
|
8322
|
1
|
50
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
|
50
|
|
|
|
|
|
|
8323
|
|
|
|
|
|
|
break; |
|
8324
|
|
|
|
|
|
|
} |
|
8325
|
|
|
|
|
|
|
case morpho_ids::SLOVAK_PDT: |
|
8326
|
|
|
|
|
|
|
{ |
|
8327
|
0
|
|
|
|
|
|
auto res = new_unique_ptr(czech_morpho::morpho_language::SLOVAK, 3); |
|
8328
|
0
|
0
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
|
0
|
|
|
|
|
|
|
8329
|
|
|
|
|
|
|
break; |
|
8330
|
|
|
|
|
|
|
} |
|
8331
|
|
|
|
|
|
|
case morpho_ids::DERIVATOR_DICTIONARY: |
|
8332
|
|
|
|
|
|
|
{ |
|
8333
|
0
|
|
|
|
|
|
auto derinet = new_unique_ptr(); |
|
8334
|
0
|
0
|
|
|
|
|
if (!derinet->load(is)) return nullptr; |
|
|
|
0
|
|
|
|
|
|
|
8335
|
|
|
|
|
|
|
|
|
8336
|
0
|
0
|
|
|
|
|
unique_ptr dictionary(load(is)); |
|
8337
|
0
|
0
|
|
|
|
|
if (!dictionary) return nullptr; |
|
8338
|
0
|
|
|
|
|
|
derinet->dictionary = dictionary.get(); |
|
8339
|
|
|
|
|
|
|
dictionary->derinet.reset(derinet.release()); |
|
8340
|
0
|
|
|
|
|
|
return dictionary.release(); |
|
8341
|
|
|
|
|
|
|
} |
|
8342
|
|
|
|
|
|
|
} |
|
8343
|
|
|
|
|
|
|
|
|
8344
|
|
|
|
|
|
|
return nullptr; |
|
8345
|
|
|
|
|
|
|
} |
|
8346
|
|
|
|
|
|
|
|
|
8347
|
0
|
|
|
|
|
|
morpho* morpho::load(const char* fname) { |
|
8348
|
0
|
0
|
|
|
|
|
ifstream f(path_from_utf8(fname).c_str(), ifstream::binary); |
|
8349
|
0
|
0
|
|
|
|
|
if (!f) return nullptr; |
|
8350
|
|
|
|
|
|
|
|
|
8351
|
0
|
0
|
|
|
|
|
return load(f); |
|
8352
|
|
|
|
|
|
|
} |
|
8353
|
|
|
|
|
|
|
|
|
8354
|
0
|
|
|
|
|
|
const derivator* morpho::get_derivator() const { |
|
8355
|
0
|
|
|
|
|
|
return derinet.get(); |
|
8356
|
|
|
|
|
|
|
} |
|
8357
|
|
|
|
|
|
|
|
|
8358
|
|
|
|
|
|
|
} // namespace morphodita |
|
8359
|
|
|
|
|
|
|
|
|
8360
|
|
|
|
|
|
|
///////// |
|
8361
|
|
|
|
|
|
|
// File: morphodita/morpho/morpho_statistical_guesser.cpp |
|
8362
|
|
|
|
|
|
|
///////// |
|
8363
|
|
|
|
|
|
|
|
|
8364
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
8365
|
|
|
|
|
|
|
// |
|
8366
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
8367
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
8368
|
|
|
|
|
|
|
// |
|
8369
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
8370
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
8371
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
8372
|
|
|
|
|
|
|
|
|
8373
|
|
|
|
|
|
|
namespace morphodita { |
|
8374
|
|
|
|
|
|
|
|
|
8375
|
0
|
|
|
|
|
|
void morpho_statistical_guesser::load(binary_decoder& data) { |
|
8376
|
|
|
|
|
|
|
// Load tags and default tag |
|
8377
|
0
|
|
|
|
|
|
tags.resize(data.next_2B()); |
|
8378
|
0
|
0
|
|
|
|
|
for (auto&& tag : tags) { |
|
8379
|
0
|
|
|
|
|
|
tag.resize(data.next_1B()); |
|
8380
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < tag.size(); i++) |
|
8381
|
0
|
|
|
|
|
|
tag[i] = data.next_1B(); |
|
8382
|
|
|
|
|
|
|
} |
|
8383
|
0
|
|
|
|
|
|
default_tag = data.next_2B(); |
|
8384
|
|
|
|
|
|
|
|
|
8385
|
|
|
|
|
|
|
// Load rules |
|
8386
|
0
|
|
|
|
|
|
rules.load(data); |
|
8387
|
0
|
|
|
|
|
|
} |
|
8388
|
|
|
|
|
|
|
|
|
8389
|
|
|
|
|
|
|
// Helper method for analyze. |
|
8390
|
0
|
|
|
|
|
|
static bool contains(morpho_statistical_guesser::used_rules* used, const string& rule) { |
|
8391
|
0
|
0
|
|
|
|
|
if (!used) return false; |
|
8392
|
|
|
|
|
|
|
|
|
8393
|
0
|
0
|
|
|
|
|
for (auto&& used_rule : *used) |
|
8394
|
0
|
0
|
|
|
|
|
if (used_rule == rule) |
|
8395
|
|
|
|
|
|
|
return true; |
|
8396
|
|
|
|
|
|
|
|
|
8397
|
|
|
|
|
|
|
return false; |
|
8398
|
|
|
|
|
|
|
} |
|
8399
|
|
|
|
|
|
|
|
|
8400
|
|
|
|
|
|
|
// Produces unique lemma-tag pairs. |
|
8401
|
0
|
|
|
|
|
|
void morpho_statistical_guesser::analyze(string_piece form, vector& lemmas, morpho_statistical_guesser::used_rules* used) { |
|
8402
|
|
|
|
|
|
|
unsigned lemmas_initial_size = lemmas.size(); |
|
8403
|
|
|
|
|
|
|
|
|
8404
|
|
|
|
|
|
|
// We have rules in format "suffix prefix" in rules. |
|
8405
|
|
|
|
|
|
|
// Find the matching rule with longest suffix and of those with longest prefix. |
|
8406
|
0
|
0
|
|
|
|
|
string rule_label; rule_label.reserve(12); |
|
8407
|
|
|
|
|
|
|
unsigned suffix_len = 0; |
|
8408
|
0
|
0
|
|
|
|
|
for (; suffix_len < form.len; suffix_len++) { |
|
8409
|
0
|
0
|
|
|
|
|
rule_label.push_back(form.str[form.len - (suffix_len + 1)]); |
|
8410
|
0
|
0
|
|
|
|
|
if (!rules.at(rule_label.c_str(), rule_label.size(), [](pointer_decoder& data){ data.next(data.next_2B()); })) |
|
8411
|
|
|
|
|
|
|
break; |
|
8412
|
|
|
|
|
|
|
} |
|
8413
|
|
|
|
|
|
|
|
|
8414
|
0
|
0
|
|
|
|
|
for (suffix_len++; suffix_len--; ) { |
|
8415
|
0
|
|
|
|
|
|
rule_label.resize(suffix_len); |
|
8416
|
0
|
0
|
|
|
|
|
rule_label.push_back(' '); |
|
8417
|
|
|
|
|
|
|
|
|
8418
|
|
|
|
|
|
|
const unsigned char* rule = nullptr; |
|
8419
|
|
|
|
|
|
|
unsigned rule_prefix_len = 0; |
|
8420
|
0
|
0
|
|
|
|
|
for (unsigned prefix_len = 0; prefix_len + suffix_len <= form.len; prefix_len++) { |
|
8421
|
0
|
0
|
|
|
|
|
if (prefix_len) rule_label.push_back(form.str[prefix_len - 1]); |
|
|
|
0
|
|
|
|
|
|
|
8422
|
0
|
|
|
|
|
|
const unsigned char* found = rules.at(rule_label.c_str(), rule_label.size(), [](pointer_decoder& data){ data.next(data.next_2B()); }); |
|
8423
|
0
|
0
|
|
|
|
|
if (!found) break; |
|
8424
|
0
|
0
|
|
|
|
|
if (*(found += sizeof(uint16_t))) { |
|
8425
|
|
|
|
|
|
|
rule = found; |
|
8426
|
|
|
|
|
|
|
rule_prefix_len = prefix_len; |
|
8427
|
|
|
|
|
|
|
} |
|
8428
|
|
|
|
|
|
|
} |
|
8429
|
|
|
|
|
|
|
|
|
8430
|
0
|
0
|
|
|
|
|
if (rule) { |
|
8431
|
0
|
|
|
|
|
|
rule_label.resize(suffix_len + 1 + rule_prefix_len); |
|
8432
|
0
|
0
|
|
|
|
|
if (rule_label.size() > 1 && !contains(used, rule_label)) { // ignore rule ' ' |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8433
|
0
|
0
|
|
|
|
|
if (used) used->push_back(rule_label); |
|
|
|
0
|
|
|
|
|
|
|
8434
|
0
|
0
|
|
|
|
|
for (int rules_len = *rule++; rules_len; rules_len--) { |
|
8435
|
0
|
|
|
|
|
|
unsigned pref_del_len = *rule++; const char* pref_del = (const char*)rule; rule += pref_del_len; |
|
8436
|
0
|
|
|
|
|
|
unsigned pref_add_len = *rule++; const char* pref_add = (const char*)rule; rule += pref_add_len; |
|
8437
|
0
|
|
|
|
|
|
unsigned suff_del_len = *rule++; const char* suff_del = (const char*)rule; rule += suff_del_len; |
|
8438
|
0
|
|
|
|
|
|
unsigned suff_add_len = *rule++; const char* suff_add = (const char*)rule; rule += suff_add_len; |
|
8439
|
0
|
|
|
|
|
|
unsigned tags_len = *rule++; const uint16_t* tags = (const uint16_t*)rule; rule += tags_len * sizeof(uint16_t); |
|
8440
|
|
|
|
|
|
|
|
|
8441
|
0
|
0
|
|
|
|
|
if (pref_del_len + suff_del_len > form.len || |
|
|
|
0
|
|
|
|
|
|
|
8442
|
0
|
0
|
|
|
|
|
(pref_del_len && !small_memeq(pref_del, form.str, pref_del_len)) || |
|
|
|
0
|
|
|
|
|
|
|
8443
|
0
|
0
|
|
|
|
|
(suff_del_len && !small_memeq(suff_del, form.str + form.len - suff_del_len, suff_del_len)) || |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8444
|
0
|
|
|
|
|
|
(form.len + pref_add_len - pref_del_len + suff_add_len - suff_del_len == 0)) |
|
8445
|
0
|
|
|
|
|
|
continue; |
|
8446
|
|
|
|
|
|
|
|
|
8447
|
|
|
|
|
|
|
string lemma; |
|
8448
|
0
|
0
|
|
|
|
|
lemma.reserve(form.len + pref_add_len - pref_del_len + suff_add_len - suff_del_len); |
|
8449
|
0
|
0
|
|
|
|
|
if (pref_add_len) lemma.append(pref_add, pref_add_len); |
|
|
|
0
|
|
|
|
|
|
|
8450
|
0
|
0
|
|
|
|
|
if (pref_del_len + suff_del_len < form.len) lemma.append(form.str + pref_del_len, form.len - pref_del_len - suff_del_len); |
|
|
|
0
|
|
|
|
|
|
|
8451
|
0
|
0
|
|
|
|
|
if (suff_add_len) lemma.append(suff_add, suff_add_len); |
|
|
|
0
|
|
|
|
|
|
|
8452
|
0
|
0
|
|
|
|
|
while (tags_len--) |
|
8453
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(lemma, this->tags[unaligned_load_inc(tags)]); |
|
8454
|
|
|
|
|
|
|
} |
|
8455
|
|
|
|
|
|
|
} |
|
8456
|
|
|
|
|
|
|
break; |
|
8457
|
|
|
|
|
|
|
} |
|
8458
|
|
|
|
|
|
|
} |
|
8459
|
|
|
|
|
|
|
|
|
8460
|
|
|
|
|
|
|
// If nothing was found, use default tag. |
|
8461
|
0
|
0
|
|
|
|
|
if (lemmas.size() == lemmas_initial_size) |
|
8462
|
0
|
0
|
|
|
|
|
if (!contains(used, string())) { |
|
8463
|
0
|
0
|
|
|
|
|
if (used) used->push_back(string()); |
|
8464
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), tags[default_tag]); |
|
8465
|
|
|
|
|
|
|
} |
|
8466
|
0
|
|
|
|
|
|
} |
|
8467
|
|
|
|
|
|
|
|
|
8468
|
|
|
|
|
|
|
} // namespace morphodita |
|
8469
|
|
|
|
|
|
|
|
|
8470
|
|
|
|
|
|
|
///////// |
|
8471
|
|
|
|
|
|
|
// File: morphodita/morpho/tag_filter.cpp |
|
8472
|
|
|
|
|
|
|
///////// |
|
8473
|
|
|
|
|
|
|
|
|
8474
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
8475
|
|
|
|
|
|
|
// |
|
8476
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
8477
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
8478
|
|
|
|
|
|
|
// |
|
8479
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
8480
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
8481
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
8482
|
|
|
|
|
|
|
|
|
8483
|
|
|
|
|
|
|
namespace morphodita { |
|
8484
|
|
|
|
|
|
|
|
|
8485
|
0
|
|
|
|
|
|
tag_filter::tag_filter(const char* filter) { |
|
8486
|
0
|
0
|
|
|
|
|
if (!filter) return; |
|
8487
|
|
|
|
|
|
|
|
|
8488
|
0
|
0
|
|
|
|
|
wildcard.assign(filter); |
|
8489
|
|
|
|
|
|
|
filter = wildcard.c_str(); |
|
8490
|
|
|
|
|
|
|
|
|
8491
|
0
|
0
|
|
|
|
|
for (int tag_pos = 0, filter_pos = 0; filter[filter_pos]; tag_pos++, filter_pos++) { |
|
8492
|
0
|
0
|
|
|
|
|
if (filter[filter_pos] == '?') continue; |
|
8493
|
0
|
0
|
|
|
|
|
if (filter[filter_pos] == '[') { |
|
8494
|
0
|
|
|
|
|
|
filter_pos++; |
|
8495
|
|
|
|
|
|
|
|
|
8496
|
0
|
|
|
|
|
|
bool negate = false; |
|
8497
|
0
|
0
|
|
|
|
|
if (filter[filter_pos] == '^') negate = true, filter_pos++; |
|
8498
|
|
|
|
|
|
|
|
|
8499
|
0
|
|
|
|
|
|
int chars_start = filter_pos; |
|
8500
|
0
|
0
|
|
|
|
|
for (bool first = true; filter[filter_pos] && (first || filter[filter_pos] != ']'); first = false) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8501
|
0
|
|
|
|
|
|
filter_pos++; |
|
8502
|
|
|
|
|
|
|
|
|
8503
|
0
|
0
|
|
|
|
|
filters.emplace_back(tag_pos, negate, chars_start, filter_pos - chars_start); |
|
8504
|
0
|
0
|
|
|
|
|
if (!filter[filter_pos]) break; |
|
8505
|
|
|
|
|
|
|
} else { |
|
8506
|
0
|
0
|
|
|
|
|
filters.emplace_back(tag_pos, false, filter_pos, 1); |
|
8507
|
|
|
|
|
|
|
} |
|
8508
|
|
|
|
|
|
|
} |
|
8509
|
|
|
|
|
|
|
} |
|
8510
|
|
|
|
|
|
|
|
|
8511
|
|
|
|
|
|
|
} // namespace morphodita |
|
8512
|
|
|
|
|
|
|
|
|
8513
|
|
|
|
|
|
|
///////// |
|
8514
|
|
|
|
|
|
|
// File: morphodita/tagger/tagger.h |
|
8515
|
|
|
|
|
|
|
///////// |
|
8516
|
|
|
|
|
|
|
|
|
8517
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
8518
|
|
|
|
|
|
|
// |
|
8519
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
8520
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
8521
|
|
|
|
|
|
|
// |
|
8522
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
8523
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
8524
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
8525
|
|
|
|
|
|
|
|
|
8526
|
|
|
|
|
|
|
namespace morphodita { |
|
8527
|
|
|
|
|
|
|
|
|
8528
|
1
|
|
|
|
|
|
class tagger { |
|
8529
|
|
|
|
|
|
|
public: |
|
8530
|
0
|
|
|
|
|
|
virtual ~tagger() {} |
|
8531
|
|
|
|
|
|
|
|
|
8532
|
|
|
|
|
|
|
static tagger* load(const char* fname); |
|
8533
|
|
|
|
|
|
|
static tagger* load(istream& is); |
|
8534
|
|
|
|
|
|
|
|
|
8535
|
|
|
|
|
|
|
// Return morpho associated with the tagger. Do not delete the pointer, it is |
|
8536
|
|
|
|
|
|
|
// owned by the tagger instance and deleted in the tagger destructor. |
|
8537
|
|
|
|
|
|
|
virtual const morpho* get_morpho() const = 0; |
|
8538
|
|
|
|
|
|
|
|
|
8539
|
|
|
|
|
|
|
// Perform morphologic analysis and subsequent disambiguation. |
|
8540
|
|
|
|
|
|
|
virtual void tag(const vector& forms, vector& tags, morpho::guesser_mode guesser = morpho::GUESSER_UNSPECIFIED) const = 0; |
|
8541
|
|
|
|
|
|
|
|
|
8542
|
|
|
|
|
|
|
// Perform disambiguation only on given analyses. |
|
8543
|
|
|
|
|
|
|
virtual void tag_analyzed(const vector& forms, const vector>& analyses, vector& tags) const = 0; |
|
8544
|
|
|
|
|
|
|
|
|
8545
|
|
|
|
|
|
|
// Construct a new tokenizer instance appropriate for this tagger. |
|
8546
|
|
|
|
|
|
|
// Can return NULL if no such tokenizer exists. |
|
8547
|
|
|
|
|
|
|
// Is equal to get_morpho()->new_tokenizer. |
|
8548
|
|
|
|
|
|
|
tokenizer* new_tokenizer() const; |
|
8549
|
|
|
|
|
|
|
}; |
|
8550
|
|
|
|
|
|
|
|
|
8551
|
|
|
|
|
|
|
} // namespace morphodita |
|
8552
|
|
|
|
|
|
|
|
|
8553
|
|
|
|
|
|
|
///////// |
|
8554
|
|
|
|
|
|
|
// File: morphodita/tagger/elementary_features.h |
|
8555
|
|
|
|
|
|
|
///////// |
|
8556
|
|
|
|
|
|
|
|
|
8557
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
8558
|
|
|
|
|
|
|
// |
|
8559
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
8560
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
8561
|
|
|
|
|
|
|
// |
|
8562
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
8563
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
8564
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
8565
|
|
|
|
|
|
|
|
|
8566
|
|
|
|
|
|
|
namespace morphodita { |
|
8567
|
|
|
|
|
|
|
|
|
8568
|
|
|
|
|
|
|
// Declarations |
|
8569
|
|
|
|
|
|
|
enum elementary_feature_type { PER_FORM, PER_TAG, DYNAMIC }; |
|
8570
|
|
|
|
|
|
|
enum elementary_feature_range { ONLY_CURRENT, ANY_OFFSET }; |
|
8571
|
|
|
|
|
|
|
|
|
8572
|
|
|
|
|
|
|
typedef uint32_t elementary_feature_value; |
|
8573
|
|
|
|
|
|
|
enum :elementary_feature_value { elementary_feature_unknown = 0, elementary_feature_empty = 1 }; |
|
8574
|
|
|
|
|
|
|
|
|
8575
|
|
|
|
|
|
|
struct elementary_feature_description { |
|
8576
|
|
|
|
|
|
|
string name; |
|
8577
|
|
|
|
|
|
|
elementary_feature_type type; |
|
8578
|
|
|
|
|
|
|
elementary_feature_range range; |
|
8579
|
|
|
|
|
|
|
int index; |
|
8580
|
|
|
|
|
|
|
int map_index; |
|
8581
|
|
|
|
|
|
|
}; |
|
8582
|
|
|
|
|
|
|
|
|
8583
|
|
|
|
|
|
|
template |
|
8584
|
0
|
|
|
|
|
|
class elementary_features { |
|
8585
|
|
|
|
|
|
|
public: |
|
8586
|
|
|
|
|
|
|
bool load(istream& is); |
|
8587
|
|
|
|
|
|
|
bool save(ostream& out); |
|
8588
|
|
|
|
|
|
|
|
|
8589
|
|
|
|
|
|
|
vector |
|
8590
|
|
|
|
|
|
|
}; |
|
8591
|
|
|
|
|
|
|
|
|
8592
|
0
|
|
|
|
|
|
class persistent_elementary_feature_map : public persistent_unordered_map { |
|
8593
|
|
|
|
|
|
|
public: |
|
8594
|
|
|
|
|
|
|
persistent_elementary_feature_map() : persistent_unordered_map() {} |
|
8595
|
|
|
|
|
|
|
persistent_elementary_feature_map(const persistent_unordered_map&& map) : persistent_unordered_map(map) {} |
|
8596
|
|
|
|
|
|
|
|
|
8597
|
|
|
|
|
|
|
elementary_feature_value value(const char* feature, int len) const { |
|
8598
|
96
|
|
|
|
|
|
auto* it = at_typed(feature, len); |
|
8599
|
96
|
0
|
|
|
|
|
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8600
|
|
|
|
|
|
|
} |
|
8601
|
|
|
|
|
|
|
}; |
|
8602
|
|
|
|
|
|
|
|
|
8603
|
|
|
|
|
|
|
// Definitions |
|
8604
|
|
|
|
|
|
|
template |
|
8605
|
1
|
|
|
|
|
|
inline bool elementary_features |
|
8606
|
|
|
|
|
|
|
binary_decoder data; |
|
8607
|
1
|
50
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
|
50
|
|
|
|
|
|
|
8608
|
|
|
|
|
|
|
|
|
8609
|
|
|
|
|
|
|
try { |
|
8610
|
1
|
50
|
|
|
|
|
maps.resize(data.next_1B()); |
|
|
|
50
|
|
|
|
|
|
|
8611
|
27
|
100
|
|
|
|
|
for (auto&& map : maps) |
|
8612
|
26
|
50
|
|
|
|
|
map.load(data); |
|
|
|
0
|
|
|
|
|
|
|
8613
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
|
8614
|
|
|
|
|
|
|
return false; |
|
8615
|
|
|
|
|
|
|
} |
|
8616
|
|
|
|
|
|
|
|
|
8617
|
1
|
|
|
|
|
|
return data.is_end(); |
|
8618
|
|
|
|
|
|
|
} |
|
8619
|
|
|
|
|
|
|
|
|
8620
|
|
|
|
|
|
|
} // namespace morphodita |
|
8621
|
|
|
|
|
|
|
|
|
8622
|
|
|
|
|
|
|
///////// |
|
8623
|
|
|
|
|
|
|
// File: morphodita/tagger/vli.h |
|
8624
|
|
|
|
|
|
|
///////// |
|
8625
|
|
|
|
|
|
|
|
|
8626
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
8627
|
|
|
|
|
|
|
// |
|
8628
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
8629
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
8630
|
|
|
|
|
|
|
// |
|
8631
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
8632
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
8633
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
8634
|
|
|
|
|
|
|
|
|
8635
|
|
|
|
|
|
|
namespace morphodita { |
|
8636
|
|
|
|
|
|
|
|
|
8637
|
|
|
|
|
|
|
// Declarations |
|
8638
|
|
|
|
|
|
|
template |
|
8639
|
|
|
|
|
|
|
class vli { |
|
8640
|
|
|
|
|
|
|
public: |
|
8641
|
|
|
|
|
|
|
static int max_length(); |
|
8642
|
|
|
|
|
|
|
static void encode(T value, char*& where); |
|
8643
|
|
|
|
|
|
|
static T decode(const char*& from); |
|
8644
|
|
|
|
|
|
|
}; |
|
8645
|
|
|
|
|
|
|
|
|
8646
|
|
|
|
|
|
|
// Definitions |
|
8647
|
|
|
|
|
|
|
template <> |
|
8648
|
|
|
|
|
|
|
inline int vli::max_length() { |
|
8649
|
|
|
|
|
|
|
return 5; |
|
8650
|
|
|
|
|
|
|
} |
|
8651
|
|
|
|
|
|
|
|
|
8652
|
|
|
|
|
|
|
template <> |
|
8653
|
267
|
|
|
|
|
|
inline void vli::encode(uint32_t value, char*& where) { |
|
8654
|
267
|
50
|
|
|
|
|
if (value < 0x80) *where++ = value; |
|
8655
|
0
|
0
|
|
|
|
|
else if (value < 0x4000) *where++ = (value >> 7) | 0x80u, *where++ = value & 0x7Fu; |
|
8656
|
0
|
0
|
|
|
|
|
else if (value < 0x200000) *where++ = (value >> 14) | 0x80u, *where++ = ((value >> 7) & 0x7Fu) | 0x80u, *where++ = value & 0x7Fu; |
|
8657
|
0
|
0
|
|
|
|
|
else if (value < 0x10000000) *where++ = (value >> 21) | 0x80u, *where++ = ((value >> 14) & 0x7Fu) | 0x80u, *where++ = ((value >> 7) & 0x7Fu) | 0x80u, *where++ = value & 0x7Fu; |
|
8658
|
0
|
|
|
|
|
|
else *where++ = (value >> 28) | 0x80u, *where++ = ((value >> 21) & 0x7Fu) | 0x80u, *where++ = ((value >> 14) & 0x7Fu) | 0x80u, *where++ = ((value >> 7) & 0x7Fu) | 0x80u, *where++ = value & 0x7Fu; |
|
8659
|
267
|
|
|
|
|
|
} |
|
8660
|
|
|
|
|
|
|
|
|
8661
|
|
|
|
|
|
|
template <> |
|
8662
|
|
|
|
|
|
|
inline uint32_t vli::decode(const char*& from) { |
|
8663
|
|
|
|
|
|
|
uint32_t value = 0; |
|
8664
|
|
|
|
|
|
|
while (((unsigned char)(*from)) & 0x80u) value = (value << 7) | (((unsigned char)(*from++)) ^ 0x80u); |
|
8665
|
|
|
|
|
|
|
value = (value << 7) | ((unsigned char)(*from++)); |
|
8666
|
|
|
|
|
|
|
return value; |
|
8667
|
|
|
|
|
|
|
} |
|
8668
|
|
|
|
|
|
|
|
|
8669
|
|
|
|
|
|
|
} // namespace morphodita |
|
8670
|
|
|
|
|
|
|
|
|
8671
|
|
|
|
|
|
|
///////// |
|
8672
|
|
|
|
|
|
|
// File: morphodita/tagger/feature_sequences.h |
|
8673
|
|
|
|
|
|
|
///////// |
|
8674
|
|
|
|
|
|
|
|
|
8675
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
8676
|
|
|
|
|
|
|
// |
|
8677
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
8678
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
8679
|
|
|
|
|
|
|
// |
|
8680
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
8681
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
8682
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
8683
|
|
|
|
|
|
|
|
|
8684
|
|
|
|
|
|
|
namespace morphodita { |
|
8685
|
|
|
|
|
|
|
|
|
8686
|
|
|
|
|
|
|
// Declarations |
|
8687
|
|
|
|
|
|
|
typedef int32_t feature_sequence_score; |
|
8688
|
|
|
|
|
|
|
typedef int64_t feature_sequences_score; |
|
8689
|
|
|
|
|
|
|
|
|
8690
|
|
|
|
|
|
|
struct feature_sequence_element { |
|
8691
|
|
|
|
|
|
|
elementary_feature_type type; |
|
8692
|
|
|
|
|
|
|
int elementary_index; |
|
8693
|
|
|
|
|
|
|
int sequence_index; |
|
8694
|
|
|
|
|
|
|
|
|
8695
|
|
|
|
|
|
|
feature_sequence_element() {} |
|
8696
|
|
|
|
|
|
|
feature_sequence_element(elementary_feature_type type, int elementary_index, int sequence_index) : type(type), elementary_index(elementary_index), sequence_index(sequence_index) {} |
|
8697
|
|
|
|
|
|
|
}; |
|
8698
|
|
|
|
|
|
|
|
|
8699
|
21
|
|
|
|
|
|
struct feature_sequence { |
|
8700
|
|
|
|
|
|
|
vector elements; |
|
8701
|
|
|
|
|
|
|
int dependant_range = 1; |
|
8702
|
|
|
|
|
|
|
}; |
|
8703
|
|
|
|
|
|
|
|
|
8704
|
|
|
|
|
|
|
template |
|
8705
|
1
|
0
|
|
|
|
|
class feature_sequences { |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8706
|
|
|
|
|
|
|
public: |
|
8707
|
|
|
|
|
|
|
typedef typename ElementaryFeatures::per_form_features per_form_features; |
|
8708
|
|
|
|
|
|
|
typedef typename ElementaryFeatures::per_tag_features per_tag_features; |
|
8709
|
|
|
|
|
|
|
typedef typename ElementaryFeatures::dynamic_features dynamic_features; |
|
8710
|
|
|
|
|
|
|
|
|
8711
|
|
|
|
|
|
|
void parse(int window_size, istream& is); |
|
8712
|
|
|
|
|
|
|
bool load(istream& is); |
|
8713
|
|
|
|
|
|
|
bool save(ostream& os); |
|
8714
|
|
|
|
|
|
|
|
|
8715
|
|
|
|
|
|
|
struct cache; |
|
8716
|
|
|
|
|
|
|
|
|
8717
|
|
|
|
|
|
|
inline void initialize_sentence(const vector& forms, const vector>& analyses, cache& c) const; |
|
8718
|
|
|
|
|
|
|
inline void compute_dynamic_features(int form_index, int tag_index, const dynamic_features* prev_dynamic, dynamic_features& dynamic, cache& c) const; |
|
8719
|
|
|
|
|
|
|
inline feature_sequences_score score(int form_index, int tags_window[], int tags_unchanged, dynamic_features& dynamic, cache& c) const; |
|
8720
|
|
|
|
|
|
|
void feature_keys(int form_index, int tags_window[], int tags_unchanged, dynamic_features& dynamic, vector& keys, cache& c) const; |
|
8721
|
|
|
|
|
|
|
|
|
8722
|
|
|
|
|
|
|
ElementaryFeatures elementary; |
|
8723
|
|
|
|
|
|
|
vector |
|
8724
|
|
|
|
|
|
|
vector sequences; |
|
8725
|
|
|
|
|
|
|
}; |
|
8726
|
|
|
|
|
|
|
|
|
8727
|
0
|
|
|
|
|
|
class persistent_feature_sequence_map : public persistent_unordered_map { |
|
8728
|
|
|
|
|
|
|
public: |
|
8729
|
|
|
|
|
|
|
persistent_feature_sequence_map() : persistent_unordered_map() {} |
|
8730
|
|
|
|
|
|
|
persistent_feature_sequence_map(const persistent_unordered_map&& map) : persistent_unordered_map(map) {} |
|
8731
|
|
|
|
|
|
|
|
|
8732
|
|
|
|
|
|
|
feature_sequence_score score(const char* feature, int len) const { |
|
8733
|
108
|
|
|
|
|
|
auto* it = at_typed(feature, len); |
|
8734
|
108
|
0
|
|
|
|
|
return it ? unaligned_load(it) : 0; |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8735
|
|
|
|
|
|
|
} |
|
8736
|
|
|
|
|
|
|
}; |
|
8737
|
|
|
|
|
|
|
|
|
8738
|
|
|
|
|
|
|
template using persistent_feature_sequences = feature_sequences; |
|
8739
|
|
|
|
|
|
|
|
|
8740
|
|
|
|
|
|
|
// Definitions |
|
8741
|
|
|
|
|
|
|
template |
|
8742
|
1
|
|
|
|
|
|
inline bool feature_sequences::load(istream& is) { |
|
8743
|
1
|
0
|
|
|
|
|
if (!elementary.load(is)) return false; |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8744
|
|
|
|
|
|
|
|
|
8745
|
|
|
|
|
|
|
binary_decoder data; |
|
8746
|
1
|
0
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8747
|
|
|
|
|
|
|
|
|
8748
|
|
|
|
|
|
|
try { |
|
8749
|
1
|
0
|
|
|
|
|
sequences.resize(data.next_1B()); |
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8750
|
22
|
0
|
|
|
|
|
for (auto&& sequence : sequences) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8751
|
21
|
0
|
|
|
|
|
sequence.dependant_range = data.next_4B(); |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8752
|
21
|
0
|
|
|
|
|
sequence.elements.resize(data.next_1B()); |
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8753
|
66
|
0
|
|
|
|
|
for (auto&& element : sequence.elements) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8754
|
45
|
0
|
|
|
|
|
element.type = elementary_feature_type(data.next_4B()); |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8755
|
45
|
0
|
|
|
|
|
element.elementary_index = data.next_4B(); |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8756
|
45
|
0
|
|
|
|
|
element.sequence_index = data.next_4B(); |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8757
|
|
|
|
|
|
|
} |
|
8758
|
|
|
|
|
|
|
} |
|
8759
|
|
|
|
|
|
|
|
|
8760
|
1
|
0
|
|
|
|
|
scores.resize(data.next_1B()); |
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8761
|
22
|
0
|
|
|
|
|
for (auto&& score : scores) |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8762
|
21
|
0
|
|
|
|
|
score.load(data); |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8763
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
|
8764
|
|
|
|
|
|
|
return false; |
|
8765
|
|
|
|
|
|
|
} |
|
8766
|
|
|
|
|
|
|
|
|
8767
|
1
|
|
|
|
|
|
return data.is_end(); |
|
8768
|
|
|
|
|
|
|
} |
|
8769
|
|
|
|
|
|
|
|
|
8770
|
|
|
|
|
|
|
template |
|
8771
|
0
|
|
|
|
|
|
struct feature_sequences::cache { |
|
8772
|
|
|
|
|
|
|
const vector* forms; |
|
8773
|
|
|
|
|
|
|
const vector>* analyses; |
|
8774
|
|
|
|
|
|
|
vector elementary_per_form; |
|
8775
|
|
|
|
|
|
|
vector> elementary_per_tag; |
|
8776
|
|
|
|
|
|
|
|
|
8777
|
0
|
|
|
|
|
|
struct cache_element { |
|
8778
|
|
|
|
|
|
|
vector key; |
|
8779
|
|
|
|
|
|
|
int key_size; |
|
8780
|
|
|
|
|
|
|
feature_sequence_score score; |
|
8781
|
|
|
|
|
|
|
|
|
8782
|
21
|
0
|
|
|
|
|
cache_element(int elements) : key(vli::max_length() * elements), key_size(0), score(0) {} |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8783
|
|
|
|
|
|
|
}; |
|
8784
|
|
|
|
|
|
|
vector caches; |
|
8785
|
|
|
|
|
|
|
vector window; |
|
8786
|
|
|
|
|
|
|
vector key; |
|
8787
|
|
|
|
|
|
|
feature_sequences_score score; |
|
8788
|
|
|
|
|
|
|
|
|
8789
|
1
|
|
|
|
|
|
cache(const feature_sequences& self) : score(0) { |
|
8790
|
1
|
0
|
|
|
|
|
caches.reserve(self.sequences.size()); |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8791
|
|
|
|
|
|
|
int max_sequence_elements = 0, max_window_size = 1; |
|
8792
|
22
|
0
|
|
|
|
|
for (auto&& sequence : self.sequences) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8793
|
21
|
0
|
|
|
|
|
caches.emplace_back(int(sequence.elements.size())); |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8794
|
21
|
0
|
|
|
|
|
if (int(sequence.elements.size()) > max_sequence_elements) max_sequence_elements = sequence.elements.size(); |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8795
|
66
|
0
|
|
|
|
|
for (auto&& element : sequence.elements) |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8796
|
45
|
0
|
|
|
|
|
if (element.type == PER_TAG && 1 - element.sequence_index > max_window_size) |
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8797
|
|
|
|
|
|
|
max_window_size = 1 - element.sequence_index; |
|
8798
|
|
|
|
|
|
|
} |
|
8799
|
1
|
0
|
|
|
|
|
key.resize(max_sequence_elements * vli::max_length()); |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8800
|
1
|
0
|
|
|
|
|
window.resize(max_window_size); |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8801
|
1
|
|
|
|
|
|
} |
|
8802
|
|
|
|
|
|
|
}; |
|
8803
|
|
|
|
|
|
|
|
|
8804
|
|
|
|
|
|
|
template |
|
8805
|
2
|
|
|
|
|
|
void feature_sequences::initialize_sentence(const vector& forms, const vector>& analyses, cache& c) const { |
|
8806
|
|
|
|
|
|
|
// Store forms and forms_size |
|
8807
|
2
|
|
|
|
|
|
c.forms = &forms; |
|
8808
|
2
|
|
|
|
|
|
c.analyses = &analyses; |
|
8809
|
|
|
|
|
|
|
|
|
8810
|
|
|
|
|
|
|
// Enlarge elementary features vectors if needed |
|
8811
|
2
|
0
|
|
|
|
|
if (forms.size() > c.elementary_per_form.size()) c.elementary_per_form.resize(forms.size() * 2); |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8812
|
2
|
0
|
|
|
|
|
if (forms.size() > c.elementary_per_tag.size()) c.elementary_per_tag.resize(forms.size() * 2); |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8813
|
9
|
0
|
|
|
|
|
for (unsigned i = 0; i < forms.size(); i++) |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8814
|
7
|
0
|
|
|
|
|
if (analyses[i].size() > c.elementary_per_tag[i].size()) |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8815
|
5
|
|
|
|
|
|
c.elementary_per_tag[i].resize(analyses[i].size() * 2); |
|
8816
|
|
|
|
|
|
|
|
|
8817
|
|
|
|
|
|
|
// Compute elementary features |
|
8818
|
2
|
|
|
|
|
|
elementary.compute_features(forms, analyses, c.elementary_per_form, c.elementary_per_tag); |
|
8819
|
|
|
|
|
|
|
|
|
8820
|
|
|
|
|
|
|
// Clear score cache, because scores may have been modified |
|
8821
|
2
|
|
|
|
|
|
c.score = 0; |
|
8822
|
44
|
0
|
|
|
|
|
for (auto&& cache : c.caches) |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8823
|
42
|
|
|
|
|
|
cache.key_size = cache.score = 0; |
|
8824
|
2
|
|
|
|
|
|
} |
|
8825
|
|
|
|
|
|
|
|
|
8826
|
|
|
|
|
|
|
template |
|
8827
|
44
|
|
|
|
|
|
void feature_sequences::compute_dynamic_features(int form_index, int tag_index, const dynamic_features* prev_dynamic, dynamic_features& dynamic, cache& c) const { |
|
8828
|
22
|
0
|
|
|
|
|
elementary.compute_dynamic_features((*c.analyses)[form_index][tag_index], c.elementary_per_form[form_index], c.elementary_per_tag[form_index][tag_index], form_index > 0 ? prev_dynamic : nullptr, dynamic); |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8829
|
22
|
|
|
|
|
|
} |
|
8830
|
|
|
|
|
|
|
|
|
8831
|
|
|
|
|
|
|
template |
|
8832
|
36
|
|
|
|
|
|
feature_sequences_score feature_sequences::score(int form_index, int tags_window[], int tags_unchanged, dynamic_features& dynamic, cache& c) const { |
|
8833
|
|
|
|
|
|
|
// Start by creating a window of per_tag_features* |
|
8834
|
70
|
0
|
|
|
|
|
for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8835
|
156
|
|
|
|
|
|
c.window[i] = &c.elementary_per_tag[form_index - i][tags_window[i]]; |
|
8836
|
|
|
|
|
|
|
|
|
8837
|
|
|
|
|
|
|
// Compute the score |
|
8838
|
18
|
|
|
|
|
|
feature_sequences_score result = c.score; |
|
8839
|
208
|
0
|
|
|
|
|
for (unsigned i = 0; i < sequences.size(); i++) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8840
|
190
|
0
|
|
|
|
|
if (tags_unchanged >= sequences[i].dependant_range) |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8841
|
|
|
|
|
|
|
break; |
|
8842
|
|
|
|
|
|
|
|
|
8843
|
179
|
|
|
|
|
|
char* key = c.key.data(); |
|
8844
|
446
|
0
|
|
|
|
|
for (unsigned j = 0; j < sequences[i].elements.size(); j++) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8845
|
|
|
|
|
|
|
auto& element = sequences[i].elements[j]; |
|
8846
|
|
|
|
|
|
|
elementary_feature_value value; |
|
8847
|
|
|
|
|
|
|
|
|
8848
|
332
|
|
|
|
|
|
switch (element.type) { |
|
8849
|
|
|
|
|
|
|
case PER_FORM: |
|
8850
|
95
|
0
|
|
|
|
|
value = form_index + element.sequence_index < 0 || unsigned(form_index + element.sequence_index) >= c.forms->size() ? elementary_feature_empty : c.elementary_per_form[form_index + element.sequence_index].values[element.elementary_index]; |
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8851
|
|
|
|
|
|
|
break; |
|
8852
|
|
|
|
|
|
|
case PER_TAG: |
|
8853
|
237
|
0
|
|
|
|
|
value = form_index + element.sequence_index < 0 ? elementary_feature_empty : c.window[-element.sequence_index]->values[element.elementary_index]; |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8854
|
|
|
|
|
|
|
break; |
|
8855
|
|
|
|
|
|
|
case DYNAMIC: |
|
8856
|
|
|
|
|
|
|
default: |
|
8857
|
0
|
|
|
|
|
|
value = dynamic.values[element.elementary_index]; |
|
8858
|
|
|
|
|
|
|
} |
|
8859
|
|
|
|
|
|
|
|
|
8860
|
332
|
0
|
|
|
|
|
if (value == elementary_feature_unknown) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8861
|
65
|
|
|
|
|
|
key = c.key.data(); |
|
8862
|
65
|
|
|
|
|
|
break; |
|
8863
|
|
|
|
|
|
|
} |
|
8864
|
267
|
|
|
|
|
|
vli::encode(value, key); |
|
8865
|
|
|
|
|
|
|
} |
|
8866
|
|
|
|
|
|
|
|
|
8867
|
179
|
|
|
|
|
|
result -= c.caches[i].score; |
|
8868
|
179
|
|
|
|
|
|
int key_size = key - c.key.data(); |
|
8869
|
179
|
0
|
|
|
|
|
if (!key_size) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8870
|
65
|
|
|
|
|
|
c.caches[i].score = 0; |
|
8871
|
65
|
|
|
|
|
|
c.caches[i].key_size = 0; |
|
8872
|
198
|
0
|
|
|
|
|
} else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8873
|
108
|
|
|
|
|
|
c.caches[i].score = scores[i].score(c.key.data(), key_size); |
|
8874
|
108
|
|
|
|
|
|
c.caches[i].key_size = key_size; |
|
8875
|
108
|
|
|
|
|
|
small_memcpy(c.caches[i].key.data(), c.key.data(), key_size); |
|
8876
|
|
|
|
|
|
|
} |
|
8877
|
179
|
|
|
|
|
|
result += c.caches[i].score; |
|
8878
|
|
|
|
|
|
|
} |
|
8879
|
|
|
|
|
|
|
|
|
8880
|
18
|
|
|
|
|
|
c.score = result; |
|
8881
|
18
|
|
|
|
|
|
return result; |
|
8882
|
|
|
|
|
|
|
} |
|
8883
|
|
|
|
|
|
|
|
|
8884
|
|
|
|
|
|
|
template |
|
8885
|
|
|
|
|
|
|
void feature_sequences::feature_keys(int form_index, int tags_window[], int tags_unchanged, dynamic_features& dynamic, vector& keys, cache& c) const { |
|
8886
|
|
|
|
|
|
|
score(form_index, tags_window, tags_unchanged, dynamic, c); |
|
8887
|
|
|
|
|
|
|
|
|
8888
|
|
|
|
|
|
|
keys.resize(c.caches.size()); |
|
8889
|
|
|
|
|
|
|
for (unsigned i = 0; i < c.caches.size(); i++) |
|
8890
|
|
|
|
|
|
|
keys[i].assign(c.caches[i].key.data(), c.caches[i].key_size); |
|
8891
|
|
|
|
|
|
|
} |
|
8892
|
|
|
|
|
|
|
|
|
8893
|
|
|
|
|
|
|
} // namespace morphodita |
|
8894
|
|
|
|
|
|
|
|
|
8895
|
|
|
|
|
|
|
///////// |
|
8896
|
|
|
|
|
|
|
// File: morphodita/tagger/viterbi.h |
|
8897
|
|
|
|
|
|
|
///////// |
|
8898
|
|
|
|
|
|
|
|
|
8899
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
8900
|
|
|
|
|
|
|
// |
|
8901
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
8902
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
8903
|
|
|
|
|
|
|
// |
|
8904
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
8905
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
8906
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
8907
|
|
|
|
|
|
|
|
|
8908
|
|
|
|
|
|
|
namespace morphodita { |
|
8909
|
|
|
|
|
|
|
|
|
8910
|
|
|
|
|
|
|
// Declarations |
|
8911
|
|
|
|
|
|
|
template |
|
8912
|
|
|
|
|
|
|
class viterbi { |
|
8913
|
|
|
|
|
|
|
public: |
|
8914
|
|
|
|
|
|
|
viterbi(const FeatureSequences& features, int decoding_order, int window_size) |
|
8915
|
1
|
|
|
|
|
|
: features(features), decoding_order(decoding_order), window_size(window_size) {} |
|
8916
|
|
|
|
|
|
|
|
|
8917
|
|
|
|
|
|
|
struct cache; |
|
8918
|
|
|
|
|
|
|
void tag(const vector& forms, const vector>& analyses, cache& c, vector& tags) const; |
|
8919
|
|
|
|
|
|
|
|
|
8920
|
|
|
|
|
|
|
private: |
|
8921
|
|
|
|
|
|
|
struct node; |
|
8922
|
|
|
|
|
|
|
|
|
8923
|
|
|
|
|
|
|
const FeatureSequences& features; |
|
8924
|
|
|
|
|
|
|
int decoding_order, window_size; |
|
8925
|
|
|
|
|
|
|
}; |
|
8926
|
|
|
|
|
|
|
|
|
8927
|
|
|
|
|
|
|
// Definitions |
|
8928
|
|
|
|
|
|
|
template |
|
8929
|
0
|
|
|
|
|
|
struct viterbi::cache { |
|
8930
|
|
|
|
|
|
|
vector nodes; |
|
8931
|
|
|
|
|
|
|
typename FeatureSequences::cache features_cache; |
|
8932
|
|
|
|
|
|
|
|
|
8933
|
1
|
0
|
|
|
|
|
cache(const viterbi& self) : features_cache(self.features) {} |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8934
|
|
|
|
|
|
|
}; |
|
8935
|
|
|
|
|
|
|
|
|
8936
|
|
|
|
|
|
|
template |
|
8937
|
|
|
|
|
|
|
struct viterbi::node { |
|
8938
|
|
|
|
|
|
|
int tag; |
|
8939
|
|
|
|
|
|
|
int prev; |
|
8940
|
|
|
|
|
|
|
feature_sequences_score score; |
|
8941
|
|
|
|
|
|
|
typename FeatureSequences::dynamic_features dynamic; |
|
8942
|
|
|
|
|
|
|
}; |
|
8943
|
|
|
|
|
|
|
|
|
8944
|
|
|
|
|
|
|
template |
|
8945
|
2
|
|
|
|
|
|
void viterbi::tag(const vector& forms, const vector>& analyses, cache& c, vector& tags) const { |
|
8946
|
4
|
0
|
|
|
|
|
if (!forms.size()) return; |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8947
|
|
|
|
|
|
|
|
|
8948
|
|
|
|
|
|
|
// Count number of nodes and allocate |
|
8949
|
|
|
|
|
|
|
unsigned nodes = 0; |
|
8950
|
9
|
0
|
|
|
|
|
for (unsigned i = 0, states = 1; i < forms.size(); i++) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8951
|
7
|
0
|
|
|
|
|
if (analyses[i].empty()) return; |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8952
|
7
|
0
|
|
|
|
|
states = (i+1 >= unsigned(decoding_order) ? states / analyses[i-decoding_order+1].size() : states) * analyses[i].size(); |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8953
|
7
|
|
|
|
|
|
nodes += states; |
|
8954
|
|
|
|
|
|
|
} |
|
8955
|
2
|
0
|
|
|
|
|
if (nodes > c.nodes.size()) c.nodes.resize(nodes); |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8956
|
|
|
|
|
|
|
|
|
8957
|
|
|
|
|
|
|
// Init feature sequences |
|
8958
|
2
|
|
|
|
|
|
features.initialize_sentence(forms, analyses, c.features_cache); |
|
8959
|
|
|
|
|
|
|
|
|
8960
|
|
|
|
|
|
|
int window_stack[16]; vector window_heap; |
|
8961
|
2
|
0
|
|
|
|
|
int* window = window_size <= 16 ? window_stack : (window_heap.resize(window_size), window_heap.data()); |
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8962
|
|
|
|
|
|
|
typename FeatureSequences::dynamic_features dynamic; |
|
8963
|
|
|
|
|
|
|
feature_sequences_score score; |
|
8964
|
|
|
|
|
|
|
|
|
8965
|
|
|
|
|
|
|
// Compute all nodes score |
|
8966
|
|
|
|
|
|
|
int nodes_prev = -1, nodes_now = 0; |
|
8967
|
9
|
0
|
|
|
|
|
for (unsigned i = 0; i < forms.size(); i++) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8968
|
|
|
|
|
|
|
int nodes_next = nodes_now; |
|
8969
|
|
|
|
|
|
|
|
|
8970
|
28
|
0
|
|
|
|
|
for (int j = 0; j < window_size; j++) window[j] = -1; |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8971
|
18
|
0
|
|
|
|
|
for (int tag = 0; tag < int(analyses[i].size()); tag++) |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8972
|
33
|
0
|
|
|
|
|
for (int prev = nodes_prev; prev < nodes_now; prev++) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8973
|
|
|
|
|
|
|
// Compute predecessors and number of unchanges |
|
8974
|
22
|
|
|
|
|
|
int same_tags = window[0] == tag; |
|
8975
|
22
|
|
|
|
|
|
window[0] = tag; |
|
8976
|
59
|
0
|
|
|
|
|
for (int p = prev, n = 1; p >= 0 && n < window_size; p = c.nodes[p].prev, n++) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8977
|
37
|
0
|
|
|
|
|
same_tags += same_tags == n && window[n] == c.nodes[p].tag; |
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8978
|
74
|
|
|
|
|
|
window[n] = c.nodes[p].tag; |
|
8979
|
|
|
|
|
|
|
} |
|
8980
|
|
|
|
|
|
|
|
|
8981
|
|
|
|
|
|
|
// Compute dynamic elementary features and score |
|
8982
|
22
|
0
|
|
|
|
|
features.compute_dynamic_features(i, tag, prev >= 0 ? &c.nodes[prev].dynamic : nullptr, dynamic, c.features_cache); |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8983
|
22
|
0
|
|
|
|
|
score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) + |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8984
|
20
|
|
|
|
|
|
(prev >= 0 ? c.nodes[prev].score : 0); |
|
8985
|
|
|
|
|
|
|
|
|
8986
|
|
|
|
|
|
|
// Update existing node or create a new one |
|
8987
|
22
|
0
|
|
|
|
|
if (same_tags >= decoding_order-1) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8988
|
4
|
0
|
|
|
|
|
if (score <= c.nodes[nodes_next-1].score) continue; |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
8989
|
|
|
|
|
|
|
nodes_next--; |
|
8990
|
|
|
|
|
|
|
} |
|
8991
|
44
|
|
|
|
|
|
c.nodes[nodes_next].tag = tag; |
|
8992
|
22
|
|
|
|
|
|
c.nodes[nodes_next].prev = prev; |
|
8993
|
22
|
|
|
|
|
|
c.nodes[nodes_next].score = score; |
|
8994
|
22
|
|
|
|
|
|
c.nodes[nodes_next++].dynamic = dynamic; |
|
8995
|
|
|
|
|
|
|
} |
|
8996
|
|
|
|
|
|
|
|
|
8997
|
|
|
|
|
|
|
nodes_prev = nodes_now; |
|
8998
|
|
|
|
|
|
|
nodes_now = nodes_next; |
|
8999
|
|
|
|
|
|
|
} |
|
9000
|
|
|
|
|
|
|
|
|
9001
|
|
|
|
|
|
|
// Choose the best ending node |
|
9002
|
|
|
|
|
|
|
int best = nodes_prev; |
|
9003
|
5
|
0
|
|
|
|
|
for (int node = nodes_prev + 1; node < nodes_now; node++) |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9004
|
3
|
0
|
|
|
|
|
if (c.nodes[node].score > c.nodes[best].score) |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9005
|
|
|
|
|
|
|
best = node; |
|
9006
|
|
|
|
|
|
|
|
|
9007
|
9
|
0
|
|
|
|
|
for (int i = forms.size() - 1; i >= 0; i--, best = c.nodes[best].prev) |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9008
|
21
|
|
|
|
|
|
tags[i] = c.nodes[best].tag; |
|
9009
|
|
|
|
|
|
|
} |
|
9010
|
|
|
|
|
|
|
|
|
9011
|
|
|
|
|
|
|
} // namespace morphodita |
|
9012
|
|
|
|
|
|
|
|
|
9013
|
|
|
|
|
|
|
///////// |
|
9014
|
|
|
|
|
|
|
// File: morphodita/tagger/conllu_elementary_features.h |
|
9015
|
|
|
|
|
|
|
///////// |
|
9016
|
|
|
|
|
|
|
|
|
9017
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
9018
|
|
|
|
|
|
|
// |
|
9019
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
9020
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
9021
|
|
|
|
|
|
|
// |
|
9022
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
9023
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
9024
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
9025
|
|
|
|
|
|
|
|
|
9026
|
|
|
|
|
|
|
namespace morphodita { |
|
9027
|
|
|
|
|
|
|
|
|
9028
|
|
|
|
|
|
|
// Declarations |
|
9029
|
|
|
|
|
|
|
template |
|
9030
|
0
|
|
|
|
|
|
class conllu_elementary_features : public elementary_features |
|
9031
|
|
|
|
|
|
|
public: |
|
9032
|
|
|
|
|
|
|
conllu_elementary_features(); |
|
9033
|
|
|
|
|
|
|
|
|
9034
|
|
|
|
|
|
|
enum features_per_form { FORM, FOLLOWING_VERB_TAG, FOLLOWING_VERB_FORM, NUM, CAP, DASH, PREFIX1, PREFIX2, PREFIX3, PREFIX4, PREFIX5, PREFIX6, PREFIX7, PREFIX8, PREFIX9, SUFFIX1, SUFFIX2, SUFFIX3, SUFFIX4, SUFFIX5, SUFFIX6, SUFFIX7, SUFFIX8, SUFFIX9, PER_FORM_TOTAL }; |
|
9035
|
|
|
|
|
|
|
enum features_per_tag { TAG, TAG_UPOS, TAG_CASE, TAG_GENDER, TAG_NUMBER, TAG_NEGATIVE, TAG_PERSON, LEMMA, PER_TAG_TOTAL }; |
|
9036
|
|
|
|
|
|
|
enum features_dynamic { PREVIOUS_VERB_TAG, PREVIOUS_VERB_FORM, PREVIOUS_OR_CURRENT_VERB_TAG, PREVIOUS_OR_CURRENT_VERB_FORM, DYNAMIC_TOTAL }; |
|
9037
|
|
|
|
|
|
|
enum features_map { MAP_NONE = -1, MAP_FORM, MAP_PREFIX1, MAP_PREFIX2, MAP_PREFIX3, MAP_PREFIX4, MAP_PREFIX5, MAP_PREFIX6, MAP_PREFIX7, MAP_PREFIX8, MAP_PREFIX9, MAP_SUFFIX1, MAP_SUFFIX2, MAP_SUFFIX3, MAP_SUFFIX4, MAP_SUFFIX5, MAP_SUFFIX6, MAP_SUFFIX7, MAP_SUFFIX8, MAP_SUFFIX9, MAP_TAG, MAP_TAG_UPOS, MAP_TAG_CASE, MAP_TAG_GENDER, MAP_TAG_NUMBER, MAP_TAG_NEGATIVE, MAP_TAG_PERSON, MAP_LEMMA, MAP_TOTAL } ; |
|
9038
|
|
|
|
|
|
|
|
|
9039
|
|
|
|
|
|
|
struct per_form_features { elementary_feature_value values[PER_FORM_TOTAL]; }; |
|
9040
|
|
|
|
|
|
|
struct per_tag_features { elementary_feature_value values[PER_TAG_TOTAL]; }; |
|
9041
|
|
|
|
|
|
|
struct dynamic_features { elementary_feature_value values[DYNAMIC_TOTAL]; }; |
|
9042
|
|
|
|
|
|
|
|
|
9043
|
|
|
|
|
|
|
static vector descriptions; |
|
9044
|
|
|
|
|
|
|
|
|
9045
|
|
|
|
|
|
|
void compute_features(const vector& forms, const vector>& analyses, vector& per_form, vector>& per_tag) const; |
|
9046
|
|
|
|
|
|
|
inline void compute_dynamic_features(const tagged_lemma& tag, const per_form_features& per_form, const per_tag_features& per_tag, const dynamic_features* prev_dynamic, dynamic_features& dynamic) const; |
|
9047
|
|
|
|
|
|
|
|
|
9048
|
|
|
|
|
|
|
using elementary_features |
|
9049
|
|
|
|
|
|
|
}; |
|
9050
|
|
|
|
|
|
|
|
|
9051
|
|
|
|
|
|
|
typedef conllu_elementary_features persistent_conllu_elementary_features; |
|
9052
|
|
|
|
|
|
|
|
|
9053
|
|
|
|
|
|
|
// Definitions |
|
9054
|
|
|
|
|
|
|
template |
|
9055
|
0
|
|
|
|
|
|
conllu_elementary_features |
|
9056
|
0
|
0
|
|
|
|
|
maps.resize(MAP_TOTAL); |
|
9057
|
0
|
|
|
|
|
|
} |
|
9058
|
|
|
|
|
|
|
|
|
9059
|
|
|
|
|
|
|
template |
|
9060
|
|
|
|
|
|
|
vector conllu_elementary_features |
|
9061
|
|
|
|
|
|
|
{"Form", PER_FORM, ANY_OFFSET, FORM, MAP_FORM}, |
|
9062
|
|
|
|
|
|
|
{"FollowingVerbTag", PER_FORM, ANY_OFFSET, FOLLOWING_VERB_TAG, MAP_TAG}, |
|
9063
|
|
|
|
|
|
|
{"FollowingVerbForm", PER_FORM, ANY_OFFSET, FOLLOWING_VERB_FORM, MAP_FORM}, |
|
9064
|
|
|
|
|
|
|
{"Num", PER_FORM, ONLY_CURRENT, NUM, MAP_NONE}, |
|
9065
|
|
|
|
|
|
|
{"Cap", PER_FORM, ONLY_CURRENT, CAP, MAP_NONE}, |
|
9066
|
|
|
|
|
|
|
{"Dash", PER_FORM, ONLY_CURRENT, DASH, MAP_NONE}, |
|
9067
|
|
|
|
|
|
|
{"Prefix1", PER_FORM, ONLY_CURRENT, PREFIX1, MAP_PREFIX1}, |
|
9068
|
|
|
|
|
|
|
{"Prefix2", PER_FORM, ONLY_CURRENT, PREFIX2, MAP_PREFIX2}, |
|
9069
|
|
|
|
|
|
|
{"Prefix3", PER_FORM, ONLY_CURRENT, PREFIX3, MAP_PREFIX3}, |
|
9070
|
|
|
|
|
|
|
{"Prefix4", PER_FORM, ONLY_CURRENT, PREFIX4, MAP_PREFIX4}, |
|
9071
|
|
|
|
|
|
|
{"Prefix5", PER_FORM, ONLY_CURRENT, PREFIX5, MAP_PREFIX5}, |
|
9072
|
|
|
|
|
|
|
{"Prefix6", PER_FORM, ONLY_CURRENT, PREFIX6, MAP_PREFIX6}, |
|
9073
|
|
|
|
|
|
|
{"Prefix7", PER_FORM, ONLY_CURRENT, PREFIX7, MAP_PREFIX7}, |
|
9074
|
|
|
|
|
|
|
{"Prefix8", PER_FORM, ONLY_CURRENT, PREFIX8, MAP_PREFIX8}, |
|
9075
|
|
|
|
|
|
|
{"Prefix9", PER_FORM, ONLY_CURRENT, PREFIX9, MAP_PREFIX9}, |
|
9076
|
|
|
|
|
|
|
{"Suffix1", PER_FORM, ONLY_CURRENT, SUFFIX1, MAP_SUFFIX1}, |
|
9077
|
|
|
|
|
|
|
{"Suffix2", PER_FORM, ONLY_CURRENT, SUFFIX2, MAP_SUFFIX2}, |
|
9078
|
|
|
|
|
|
|
{"Suffix3", PER_FORM, ONLY_CURRENT, SUFFIX3, MAP_SUFFIX3}, |
|
9079
|
|
|
|
|
|
|
{"Suffix4", PER_FORM, ONLY_CURRENT, SUFFIX4, MAP_SUFFIX4}, |
|
9080
|
|
|
|
|
|
|
{"Suffix5", PER_FORM, ONLY_CURRENT, SUFFIX5, MAP_SUFFIX5}, |
|
9081
|
|
|
|
|
|
|
{"Suffix6", PER_FORM, ONLY_CURRENT, SUFFIX6, MAP_SUFFIX6}, |
|
9082
|
|
|
|
|
|
|
{"Suffix7", PER_FORM, ONLY_CURRENT, SUFFIX7, MAP_SUFFIX7}, |
|
9083
|
|
|
|
|
|
|
{"Suffix8", PER_FORM, ONLY_CURRENT, SUFFIX8, MAP_SUFFIX8}, |
|
9084
|
|
|
|
|
|
|
{"Suffix9", PER_FORM, ONLY_CURRENT, SUFFIX9, MAP_SUFFIX9}, |
|
9085
|
|
|
|
|
|
|
|
|
9086
|
|
|
|
|
|
|
{"Tag", PER_TAG, ANY_OFFSET, TAG, MAP_TAG}, |
|
9087
|
|
|
|
|
|
|
{"TagUPos", PER_TAG, ANY_OFFSET, TAG_UPOS, MAP_TAG_UPOS}, |
|
9088
|
|
|
|
|
|
|
{"TagCase", PER_TAG, ANY_OFFSET, TAG_CASE, MAP_TAG_CASE}, |
|
9089
|
|
|
|
|
|
|
{"TagGender", PER_TAG, ANY_OFFSET, TAG_GENDER, MAP_TAG_GENDER}, |
|
9090
|
|
|
|
|
|
|
{"TagNumber", PER_TAG, ANY_OFFSET, TAG_NUMBER, MAP_TAG_NUMBER}, |
|
9091
|
|
|
|
|
|
|
{"TagNegative", PER_TAG, ANY_OFFSET, TAG_NEGATIVE, MAP_TAG_NEGATIVE}, |
|
9092
|
|
|
|
|
|
|
{"TagPerson", PER_TAG, ANY_OFFSET, TAG_PERSON, MAP_TAG_PERSON}, |
|
9093
|
|
|
|
|
|
|
{"Lemma", PER_TAG, ANY_OFFSET, LEMMA, MAP_LEMMA}, |
|
9094
|
|
|
|
|
|
|
|
|
9095
|
|
|
|
|
|
|
{"PreviousVerbTag", DYNAMIC, ANY_OFFSET, PREVIOUS_VERB_TAG, MAP_TAG}, |
|
9096
|
|
|
|
|
|
|
{"PreviousVerbForm", DYNAMIC, ANY_OFFSET, PREVIOUS_VERB_FORM, MAP_FORM}, |
|
9097
|
|
|
|
|
|
|
}; |
|
9098
|
|
|
|
|
|
|
|
|
9099
|
|
|
|
|
|
|
template |
|
9100
|
0
|
|
|
|
|
|
void conllu_elementary_features |
|
9101
|
|
|
|
|
|
|
using namespace unilib; |
|
9102
|
|
|
|
|
|
|
|
|
9103
|
|
|
|
|
|
|
// We process the sentence in reverse order, so that we can compute FollowingVerbTag and FollowingVerbLemma directly. |
|
9104
|
|
|
|
|
|
|
elementary_feature_value following_verb_tag = elementary_feature_empty, following_verb_form = elementary_feature_empty; |
|
9105
|
0
|
0
|
|
|
|
|
for (unsigned i = forms.size(); i--;) { |
|
9106
|
|
|
|
|
|
|
int verb_candidate = -1; |
|
9107
|
|
|
|
|
|
|
|
|
9108
|
|
|
|
|
|
|
// Per_tag features and verb_candidate |
|
9109
|
0
|
0
|
|
|
|
|
for (unsigned j = 0; j < analyses[i].size(); j++) { |
|
9110
|
0
|
|
|
|
|
|
const string& tag = analyses[i][j].tag; |
|
9111
|
0
|
|
|
|
|
|
const string& lemma = analyses[i][j].lemma; |
|
9112
|
|
|
|
|
|
|
|
|
9113
|
|
|
|
|
|
|
// Tag consists of three parts separated by tag[0] character |
|
9114
|
|
|
|
|
|
|
// - first is TAG_UPOS, |
|
9115
|
|
|
|
|
|
|
// - second is TAG_LPOS, |
|
9116
|
|
|
|
|
|
|
// - then there is any number of | separated named fields in format Name=Value |
|
9117
|
0
|
|
|
|
|
|
per_tag[i][j].values[TAG] = maps[MAP_TAG].value(tag.c_str(), tag.size()); |
|
9118
|
0
|
|
|
|
|
|
per_tag[i][j].values[TAG_UPOS] = per_tag[i][j].values[TAG_CASE] = per_tag[i][j].values[TAG_GENDER] = elementary_feature_empty; |
|
9119
|
0
|
|
|
|
|
|
per_tag[i][j].values[TAG_NUMBER] = per_tag[i][j].values[TAG_NEGATIVE] = per_tag[i][j].values[TAG_PERSON] = elementary_feature_empty; |
|
9120
|
0
|
0
|
|
|
|
|
per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == lemma ? per_tag[i][j-1].values[LEMMA] : |
|
|
|
0
|
|
|
|
|
|
|
9121
|
|
|
|
|
|
|
maps[MAP_LEMMA].value(lemma.c_str(), lemma.size()); |
|
9122
|
|
|
|
|
|
|
|
|
9123
|
0
|
|
|
|
|
|
char separator = tag[0]; |
|
9124
|
0
|
|
|
|
|
|
size_t index = tag.find(separator, 1); |
|
9125
|
0
|
0
|
|
|
|
|
if (index == string::npos) index = tag.size(); |
|
9126
|
0
|
0
|
|
|
|
|
per_tag[i][j].values[TAG_UPOS] = maps[MAP_TAG_UPOS].value(tag.c_str() + (index ? 1 : 0), index - (index ? 1 : 0)); |
|
|
|
0
|
|
|
|
|
|
|
9127
|
|
|
|
|
|
|
|
|
9128
|
0
|
0
|
|
|
|
|
if (index < tag.size()) index++; |
|
9129
|
0
|
0
|
|
|
|
|
if (index < tag.size()) index = tag.find(separator, index); |
|
9130
|
0
|
0
|
|
|
|
|
if (index < tag.size()) index++; |
|
9131
|
0
|
0
|
|
|
|
|
for (size_t length; index < tag.size(); index += length + 1) { |
|
9132
|
0
|
|
|
|
|
|
length = tag.find('|', index); |
|
9133
|
0
|
0
|
|
|
|
|
length = (length == string::npos ? tag.size() : length) - index; |
|
9134
|
|
|
|
|
|
|
|
|
9135
|
0
|
0
|
|
|
|
|
for (size_t equal_sign = 0; equal_sign + 1 < length; equal_sign++) |
|
9136
|
0
|
0
|
|
|
|
|
if (tag[index + equal_sign] == '=') { |
|
9137
|
|
|
|
|
|
|
int value = -1, map; |
|
9138
|
0
|
|
|
|
|
|
switch (equal_sign) { |
|
9139
|
|
|
|
|
|
|
case 4: |
|
9140
|
0
|
0
|
|
|
|
|
if (tag.compare(index, equal_sign, "Case") == 0) value = TAG_CASE, map = MAP_TAG_CASE; |
|
9141
|
|
|
|
|
|
|
break; |
|
9142
|
|
|
|
|
|
|
case 6: |
|
9143
|
0
|
0
|
|
|
|
|
if (tag.compare(index, equal_sign, "Gender") == 0) value = TAG_GENDER, map = MAP_TAG_GENDER; |
|
9144
|
0
|
0
|
|
|
|
|
if (tag.compare(index, equal_sign, "Number") == 0) value = TAG_NUMBER, map = MAP_TAG_NUMBER; |
|
9145
|
0
|
0
|
|
|
|
|
if (tag.compare(index, equal_sign, "Person") == 0) value = TAG_PERSON, map = MAP_TAG_PERSON; |
|
9146
|
|
|
|
|
|
|
break; |
|
9147
|
|
|
|
|
|
|
case 8: |
|
9148
|
0
|
0
|
|
|
|
|
if (tag.compare(index, equal_sign, "Negative") == 0) value = TAG_NEGATIVE, map = MAP_TAG_NEGATIVE; |
|
9149
|
|
|
|
|
|
|
break; |
|
9150
|
|
|
|
|
|
|
} |
|
9151
|
|
|
|
|
|
|
|
|
9152
|
0
|
0
|
|
|
|
|
if (value >= 0) |
|
9153
|
0
|
|
|
|
|
|
per_tag[i][j].values[value] = maps[map].value(tag.c_str() + index + equal_sign + 1, length - equal_sign - 1); |
|
9154
|
|
|
|
|
|
|
break; |
|
9155
|
|
|
|
|
|
|
} |
|
9156
|
|
|
|
|
|
|
} |
|
9157
|
|
|
|
|
|
|
|
|
9158
|
0
|
0
|
|
|
|
|
if (tag.size() >= 2 && tag[1] == 'V') { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9159
|
|
|
|
|
|
|
int tag_compare; |
|
9160
|
0
|
0
|
|
|
|
|
verb_candidate = verb_candidate < 0 || (tag_compare = tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate; |
|
|
|
0
|
|
|
|
|
|
|
9161
|
|
|
|
|
|
|
} |
|
9162
|
|
|
|
|
|
|
} |
|
9163
|
|
|
|
|
|
|
|
|
9164
|
|
|
|
|
|
|
// Per_form features |
|
9165
|
0
|
|
|
|
|
|
per_form[i].values[FORM] = maps[MAP_FORM].value(forms[i].str, forms[i].len); |
|
9166
|
0
|
|
|
|
|
|
per_form[i].values[FOLLOWING_VERB_TAG] = following_verb_tag; |
|
9167
|
0
|
|
|
|
|
|
per_form[i].values[FOLLOWING_VERB_FORM] = following_verb_form; |
|
9168
|
|
|
|
|
|
|
|
|
9169
|
|
|
|
|
|
|
// Update following_verb_{tag,lemma} _after_ filling FOLLOWING_VERB_{TAG,LEMMA}. |
|
9170
|
0
|
0
|
|
|
|
|
if (verb_candidate >= 0) { |
|
9171
|
0
|
|
|
|
|
|
following_verb_tag = per_tag[i][verb_candidate].values[TAG]; |
|
9172
|
0
|
|
|
|
|
|
following_verb_form = per_form[i].values[FORM]; |
|
9173
|
|
|
|
|
|
|
} |
|
9174
|
|
|
|
|
|
|
|
|
9175
|
|
|
|
|
|
|
// Ortographic per_form features if needed |
|
9176
|
0
|
0
|
|
|
|
|
if (analyses[i].size() == 1) { |
|
9177
|
0
|
|
|
|
|
|
per_form[i].values[NUM] = per_form[i].values[CAP] = per_form[i].values[DASH] = elementary_feature_unknown; |
|
9178
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX1] = per_form[i].values[PREFIX2] = per_form[i].values[PREFIX3] = elementary_feature_unknown; |
|
9179
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX4] = per_form[i].values[PREFIX5] = per_form[i].values[PREFIX6] = elementary_feature_unknown; |
|
9180
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX7] = per_form[i].values[PREFIX8] = per_form[i].values[PREFIX9] = elementary_feature_unknown; |
|
9181
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = per_form[i].values[SUFFIX2] = per_form[i].values[SUFFIX3] = elementary_feature_unknown; |
|
9182
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX4] = per_form[i].values[SUFFIX5] = per_form[i].values[SUFFIX6] = elementary_feature_unknown; |
|
9183
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX7] = per_form[i].values[SUFFIX8] = per_form[i].values[SUFFIX9] = elementary_feature_unknown; |
|
9184
|
0
|
0
|
|
|
|
|
} else if (forms[i].len <= 0) { |
|
9185
|
0
|
|
|
|
|
|
per_form[i].values[NUM] = per_form[i].values[CAP] = per_form[i].values[DASH] = elementary_feature_empty + 1; |
|
9186
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX1] = per_form[i].values[PREFIX2] = per_form[i].values[PREFIX3] = elementary_feature_empty; |
|
9187
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX4] = per_form[i].values[PREFIX5] = per_form[i].values[PREFIX6] = elementary_feature_empty; |
|
9188
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX7] = per_form[i].values[PREFIX8] = per_form[i].values[PREFIX9] = elementary_feature_empty; |
|
9189
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = per_form[i].values[SUFFIX2] = per_form[i].values[SUFFIX3] = elementary_feature_empty; |
|
9190
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX4] = per_form[i].values[SUFFIX5] = per_form[i].values[SUFFIX6] = elementary_feature_empty; |
|
9191
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX7] = per_form[i].values[SUFFIX8] = per_form[i].values[SUFFIX9] = elementary_feature_empty; |
|
9192
|
|
|
|
|
|
|
} else { |
|
9193
|
0
|
|
|
|
|
|
string_piece form = forms[i]; |
|
9194
|
0
|
|
|
|
|
|
const char* form_start = form.str; |
|
9195
|
|
|
|
|
|
|
|
|
9196
|
|
|
|
|
|
|
bool num = false, cap = false, dash = false; |
|
9197
|
0
|
|
|
|
|
|
size_t indices[18] = {0, form.len, form.len, form.len, form.len, form.len, form.len, form.len, form.len, form.len, 0, 0, 0, 0, 0, 0, 0, 0}; // careful here regarding forms shorter than 9 characters |
|
9198
|
|
|
|
|
|
|
int index = 0; |
|
9199
|
0
|
0
|
|
|
|
|
while (form.len) { |
|
9200
|
0
|
|
|
|
|
|
indices[(index++) % 18] = form.str - form_start; |
|
9201
|
|
|
|
|
|
|
|
|
9202
|
0
|
|
|
|
|
|
unicode::category_t cat = unicode::category(utf8::decode(form.str, form.len)); |
|
9203
|
0
|
0
|
|
|
|
|
num = num || cat & unicode::N; |
|
|
|
0
|
|
|
|
|
|
|
9204
|
0
|
0
|
|
|
|
|
cap = cap || cat & unicode::Lut; |
|
|
|
0
|
|
|
|
|
|
|
9205
|
0
|
0
|
|
|
|
|
dash = dash || cat & unicode::Pd; |
|
|
|
0
|
|
|
|
|
|
|
9206
|
|
|
|
|
|
|
|
|
9207
|
0
|
0
|
|
|
|
|
if (index == 10 || (!form.len && index < 10)) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9208
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX1] = maps[MAP_PREFIX1].value(form_start, indices[1]); |
|
9209
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX2] = maps[MAP_PREFIX2].value(form_start, indices[2]); |
|
9210
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX3] = maps[MAP_PREFIX3].value(form_start, indices[3]); |
|
9211
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX4] = maps[MAP_PREFIX4].value(form_start, indices[4]); |
|
9212
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX5] = maps[MAP_PREFIX5].value(form_start, indices[5]); |
|
9213
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX6] = maps[MAP_PREFIX6].value(form_start, indices[6]); |
|
9214
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX7] = maps[MAP_PREFIX7].value(form_start, indices[7]); |
|
9215
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX8] = maps[MAP_PREFIX8].value(form_start, indices[8]); |
|
9216
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX9] = maps[MAP_PREFIX9].value(form_start, indices[9]); |
|
9217
|
|
|
|
|
|
|
} |
|
9218
|
|
|
|
|
|
|
} |
|
9219
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = maps[MAP_SUFFIX1].value(form_start + indices[(index+18-1) % 18], form.str - form_start - indices[(index+18-1) % 18]); |
|
9220
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX2] = maps[MAP_SUFFIX2].value(form_start + indices[(index+18-2) % 18], form.str - form_start - indices[(index+18-2) % 18]); |
|
9221
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX3] = maps[MAP_SUFFIX3].value(form_start + indices[(index+18-3) % 18], form.str - form_start - indices[(index+18-3) % 18]); |
|
9222
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX4] = maps[MAP_SUFFIX4].value(form_start + indices[(index+18-4) % 18], form.str - form_start - indices[(index+18-4) % 18]); |
|
9223
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX5] = maps[MAP_SUFFIX5].value(form_start + indices[(index+18-5) % 18], form.str - form_start - indices[(index+18-5) % 18]); |
|
9224
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX6] = maps[MAP_SUFFIX6].value(form_start + indices[(index+18-6) % 18], form.str - form_start - indices[(index+18-6) % 18]); |
|
9225
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX7] = maps[MAP_SUFFIX7].value(form_start + indices[(index+18-7) % 18], form.str - form_start - indices[(index+18-7) % 18]); |
|
9226
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX8] = maps[MAP_SUFFIX8].value(form_start + indices[(index+18-8) % 18], form.str - form_start - indices[(index+18-8) % 18]); |
|
9227
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX9] = maps[MAP_SUFFIX9].value(form_start + indices[(index+18-9) % 18], form.str - form_start - indices[(index+18-9) % 18]); |
|
9228
|
0
|
|
|
|
|
|
per_form[i].values[NUM] = elementary_feature_empty + 1 + num; |
|
9229
|
0
|
|
|
|
|
|
per_form[i].values[CAP] = elementary_feature_empty + 1 + cap; |
|
9230
|
0
|
|
|
|
|
|
per_form[i].values[DASH] = elementary_feature_empty + 1 + dash; |
|
9231
|
|
|
|
|
|
|
} |
|
9232
|
|
|
|
|
|
|
} |
|
9233
|
0
|
|
|
|
|
|
} |
|
9234
|
|
|
|
|
|
|
|
|
9235
|
|
|
|
|
|
|
template |
|
9236
|
|
|
|
|
|
|
void conllu_elementary_features |
|
9237
|
0
|
0
|
|
|
|
|
if (prev_dynamic) { |
|
9238
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_TAG] = prev_dynamic->values[PREVIOUS_OR_CURRENT_VERB_TAG]; |
|
9239
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_FORM] = prev_dynamic->values[PREVIOUS_OR_CURRENT_VERB_FORM]; |
|
9240
|
|
|
|
|
|
|
} else { |
|
9241
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_TAG] = elementary_feature_empty; |
|
9242
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_FORM] = elementary_feature_empty; |
|
9243
|
|
|
|
|
|
|
} |
|
9244
|
|
|
|
|
|
|
|
|
9245
|
0
|
0
|
|
|
|
|
if (tag.tag.size() >= 2 && tag.tag[1] == 'V') { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9246
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_TAG] = per_tag.values[TAG]; |
|
9247
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_FORM] = per_form.values[FORM]; |
|
9248
|
|
|
|
|
|
|
} else { |
|
9249
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_TAG] = dynamic.values[PREVIOUS_VERB_TAG]; |
|
9250
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_FORM] = dynamic.values[PREVIOUS_VERB_FORM]; |
|
9251
|
|
|
|
|
|
|
} |
|
9252
|
|
|
|
|
|
|
} |
|
9253
|
|
|
|
|
|
|
|
|
9254
|
|
|
|
|
|
|
} // namespace morphodita |
|
9255
|
|
|
|
|
|
|
|
|
9256
|
|
|
|
|
|
|
///////// |
|
9257
|
|
|
|
|
|
|
// File: morphodita/tagger/czech_elementary_features.h |
|
9258
|
|
|
|
|
|
|
///////// |
|
9259
|
|
|
|
|
|
|
|
|
9260
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
9261
|
|
|
|
|
|
|
// |
|
9262
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
9263
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
9264
|
|
|
|
|
|
|
// |
|
9265
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
9266
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
9267
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
9268
|
|
|
|
|
|
|
|
|
9269
|
|
|
|
|
|
|
namespace morphodita { |
|
9270
|
|
|
|
|
|
|
|
|
9271
|
|
|
|
|
|
|
// Declarations |
|
9272
|
|
|
|
|
|
|
template |
|
9273
|
0
|
|
|
|
|
|
class czech_elementary_features : public elementary_features |
|
9274
|
|
|
|
|
|
|
public: |
|
9275
|
|
|
|
|
|
|
czech_elementary_features(); |
|
9276
|
|
|
|
|
|
|
|
|
9277
|
|
|
|
|
|
|
enum features_per_form { FORM, FOLLOWING_VERB_TAG, FOLLOWING_VERB_LEMMA, NUM, CAP, DASH, PREFIX1, PREFIX2, PREFIX3, PREFIX4, SUFFIX1, SUFFIX2, SUFFIX3, SUFFIX4, PER_FORM_TOTAL }; |
|
9278
|
|
|
|
|
|
|
enum features_per_tag { TAG, TAG3, TAG5, TAG25, LEMMA, PER_TAG_TOTAL }; |
|
9279
|
|
|
|
|
|
|
enum features_dynamic { PREVIOUS_VERB_TAG, PREVIOUS_VERB_LEMMA, PREVIOUS_OR_CURRENT_VERB_TAG, PREVIOUS_OR_CURRENT_VERB_LEMMA, DYNAMIC_TOTAL }; |
|
9280
|
|
|
|
|
|
|
enum features_map { MAP_NONE = -1, MAP_FORM, MAP_LEMMA, MAP_PREFIX1, MAP_PREFIX2, MAP_PREFIX3, MAP_PREFIX4, MAP_SUFFIX1, MAP_SUFFIX2, MAP_SUFFIX3, MAP_SUFFIX4, MAP_TAG, MAP_TAG3, MAP_TAG5, MAP_TAG25, MAP_TOTAL } ; |
|
9281
|
|
|
|
|
|
|
|
|
9282
|
|
|
|
|
|
|
struct per_form_features { elementary_feature_value values[PER_FORM_TOTAL]; }; |
|
9283
|
|
|
|
|
|
|
struct per_tag_features { elementary_feature_value values[PER_TAG_TOTAL]; }; |
|
9284
|
|
|
|
|
|
|
struct dynamic_features { elementary_feature_value values[DYNAMIC_TOTAL]; }; |
|
9285
|
|
|
|
|
|
|
|
|
9286
|
|
|
|
|
|
|
static vector descriptions; |
|
9287
|
|
|
|
|
|
|
|
|
9288
|
|
|
|
|
|
|
void compute_features(const vector& forms, const vector>& analyses, vector& per_form, vector>& per_tag) const; |
|
9289
|
|
|
|
|
|
|
inline void compute_dynamic_features(const tagged_lemma& tag, const per_form_features& per_form, const per_tag_features& per_tag, const dynamic_features* prev_dynamic, dynamic_features& dynamic) const; |
|
9290
|
|
|
|
|
|
|
|
|
9291
|
|
|
|
|
|
|
using elementary_features |
|
9292
|
|
|
|
|
|
|
}; |
|
9293
|
|
|
|
|
|
|
|
|
9294
|
|
|
|
|
|
|
typedef czech_elementary_features persistent_czech_elementary_features; |
|
9295
|
|
|
|
|
|
|
|
|
9296
|
|
|
|
|
|
|
// Definitions |
|
9297
|
|
|
|
|
|
|
template |
|
9298
|
0
|
|
|
|
|
|
czech_elementary_features |
|
9299
|
0
|
0
|
|
|
|
|
maps.resize(MAP_TOTAL); |
|
9300
|
0
|
|
|
|
|
|
} |
|
9301
|
|
|
|
|
|
|
|
|
9302
|
|
|
|
|
|
|
template |
|
9303
|
|
|
|
|
|
|
vector czech_elementary_features |
|
9304
|
|
|
|
|
|
|
{"Form", PER_FORM, ANY_OFFSET, FORM, MAP_FORM}, |
|
9305
|
|
|
|
|
|
|
{"FollowingVerbTag", PER_FORM, ANY_OFFSET, FOLLOWING_VERB_TAG, MAP_TAG}, |
|
9306
|
|
|
|
|
|
|
{"FollowingVerbLemma", PER_FORM, ANY_OFFSET, FOLLOWING_VERB_LEMMA, MAP_LEMMA }, |
|
9307
|
|
|
|
|
|
|
{"Num", PER_FORM, ONLY_CURRENT, NUM, MAP_NONE}, |
|
9308
|
|
|
|
|
|
|
{"Cap", PER_FORM, ONLY_CURRENT, CAP, MAP_NONE}, |
|
9309
|
|
|
|
|
|
|
{"Dash", PER_FORM, ONLY_CURRENT, DASH, MAP_NONE}, |
|
9310
|
|
|
|
|
|
|
{"Prefix1", PER_FORM, ONLY_CURRENT, PREFIX1, MAP_PREFIX1}, |
|
9311
|
|
|
|
|
|
|
{"Prefix2", PER_FORM, ONLY_CURRENT, PREFIX2, MAP_PREFIX2}, |
|
9312
|
|
|
|
|
|
|
{"Prefix3", PER_FORM, ONLY_CURRENT, PREFIX3, MAP_PREFIX3}, |
|
9313
|
|
|
|
|
|
|
{"Prefix4", PER_FORM, ONLY_CURRENT, PREFIX4, MAP_PREFIX4}, |
|
9314
|
|
|
|
|
|
|
{"Suffix1", PER_FORM, ONLY_CURRENT, SUFFIX1, MAP_SUFFIX1}, |
|
9315
|
|
|
|
|
|
|
{"Suffix2", PER_FORM, ONLY_CURRENT, SUFFIX2, MAP_SUFFIX2}, |
|
9316
|
|
|
|
|
|
|
{"Suffix3", PER_FORM, ONLY_CURRENT, SUFFIX3, MAP_SUFFIX3}, |
|
9317
|
|
|
|
|
|
|
{"Suffix4", PER_FORM, ONLY_CURRENT, SUFFIX4, MAP_SUFFIX4}, |
|
9318
|
|
|
|
|
|
|
|
|
9319
|
|
|
|
|
|
|
{"Tag", PER_TAG, ANY_OFFSET, TAG, MAP_TAG}, |
|
9320
|
|
|
|
|
|
|
{"Tag3", PER_TAG, ANY_OFFSET, TAG3, MAP_TAG3}, |
|
9321
|
|
|
|
|
|
|
{"Tag5", PER_TAG, ANY_OFFSET, TAG5, MAP_TAG5}, |
|
9322
|
|
|
|
|
|
|
{"Tag25", PER_TAG, ANY_OFFSET, TAG25, MAP_TAG25}, |
|
9323
|
|
|
|
|
|
|
{"Lemma", PER_TAG, ANY_OFFSET, LEMMA, MAP_LEMMA}, |
|
9324
|
|
|
|
|
|
|
|
|
9325
|
|
|
|
|
|
|
{"PreviousVerbTag", DYNAMIC, ANY_OFFSET, PREVIOUS_VERB_TAG, MAP_TAG}, |
|
9326
|
|
|
|
|
|
|
{"PreviousVerbLemma", DYNAMIC, ANY_OFFSET, PREVIOUS_VERB_LEMMA, MAP_LEMMA} |
|
9327
|
|
|
|
|
|
|
}; |
|
9328
|
|
|
|
|
|
|
|
|
9329
|
|
|
|
|
|
|
template |
|
9330
|
0
|
|
|
|
|
|
void czech_elementary_features |
|
9331
|
|
|
|
|
|
|
using namespace unilib; |
|
9332
|
|
|
|
|
|
|
|
|
9333
|
|
|
|
|
|
|
// We process the sentence in reverse order, so that we can compute FollowingVerbTag and FollowingVerbLemma directly. |
|
9334
|
|
|
|
|
|
|
elementary_feature_value following_verb_tag = elementary_feature_empty, following_verb_lemma = elementary_feature_empty; |
|
9335
|
0
|
0
|
|
|
|
|
for (unsigned i = forms.size(); i--;) { |
|
9336
|
|
|
|
|
|
|
int verb_candidate = -1; |
|
9337
|
|
|
|
|
|
|
|
|
9338
|
|
|
|
|
|
|
// Per_tag features and verb_candidate |
|
9339
|
0
|
0
|
|
|
|
|
for (unsigned j = 0; j < analyses[i].size(); j++) { |
|
9340
|
|
|
|
|
|
|
char tag25[2]; |
|
9341
|
0
|
|
|
|
|
|
per_tag[i][j].values[TAG] = maps[MAP_TAG].value(analyses[i][j].tag.c_str(), analyses[i][j].tag.size()); |
|
9342
|
0
|
0
|
|
|
|
|
per_tag[i][j].values[TAG3] = analyses[i][j].tag.size() >= 3 ? maps[MAP_TAG3].value(analyses[i][j].tag.c_str() + 2, 1) : elementary_feature_empty; |
|
9343
|
0
|
0
|
|
|
|
|
per_tag[i][j].values[TAG5] = analyses[i][j].tag.size() >= 5 ? maps[MAP_TAG5].value(analyses[i][j].tag.c_str() + 4, 1) : elementary_feature_empty; |
|
9344
|
0
|
0
|
|
|
|
|
per_tag[i][j].values[TAG25] = analyses[i][j].tag.size() >= 5 ? maps[MAP_TAG25].value((tag25[0] = analyses[i][j].tag[1], tag25[1] = analyses[i][j].tag[4], tag25), 2) : elementary_feature_empty; |
|
9345
|
0
|
0
|
|
|
|
|
per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == analyses[i][j].lemma ? per_tag[i][j-1].values[LEMMA] : |
|
|
|
0
|
|
|
|
|
|
|
9346
|
|
|
|
|
|
|
maps[MAP_LEMMA].value(analyses[i][j].lemma.c_str(), analyses[i][j].lemma.size()); |
|
9347
|
|
|
|
|
|
|
|
|
9348
|
0
|
0
|
|
|
|
|
if (analyses[i][j].tag[0] == 'V') { |
|
9349
|
|
|
|
|
|
|
int tag_compare; |
|
9350
|
0
|
0
|
|
|
|
|
verb_candidate = verb_candidate < 0 || (tag_compare = analyses[i][j].tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && analyses[i][j].lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate; |
|
|
|
0
|
|
|
|
|
|
|
9351
|
|
|
|
|
|
|
} |
|
9352
|
|
|
|
|
|
|
} |
|
9353
|
|
|
|
|
|
|
|
|
9354
|
|
|
|
|
|
|
// Per_form features |
|
9355
|
0
|
|
|
|
|
|
per_form[i].values[FORM] = maps[MAP_FORM].value(forms[i].str, forms[i].len); |
|
9356
|
0
|
|
|
|
|
|
per_form[i].values[FOLLOWING_VERB_TAG] = following_verb_tag; |
|
9357
|
0
|
|
|
|
|
|
per_form[i].values[FOLLOWING_VERB_LEMMA] = following_verb_lemma; |
|
9358
|
|
|
|
|
|
|
|
|
9359
|
|
|
|
|
|
|
// Update following_verb_{tag,lemma} _after_ filling FOLLOWING_VERB_{TAG,LEMMA}. |
|
9360
|
0
|
0
|
|
|
|
|
if (verb_candidate >= 0) { |
|
9361
|
0
|
|
|
|
|
|
following_verb_tag = per_tag[i][verb_candidate].values[TAG]; |
|
9362
|
0
|
|
|
|
|
|
following_verb_lemma = per_tag[i][verb_candidate].values[LEMMA]; |
|
9363
|
|
|
|
|
|
|
} |
|
9364
|
|
|
|
|
|
|
|
|
9365
|
|
|
|
|
|
|
// Ortographic per_form features if needed |
|
9366
|
0
|
0
|
|
|
|
|
if (analyses[i].size() == 1) { |
|
9367
|
0
|
|
|
|
|
|
per_form[i].values[NUM] = per_form[i].values[CAP] = per_form[i].values[DASH] = elementary_feature_unknown; |
|
9368
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX1] = per_form[i].values[PREFIX2] = per_form[i].values[PREFIX3] = per_form[i].values[PREFIX4] = elementary_feature_unknown; |
|
9369
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = per_form[i].values[SUFFIX2] = per_form[i].values[SUFFIX3] = per_form[i].values[SUFFIX4] = elementary_feature_unknown; |
|
9370
|
0
|
0
|
|
|
|
|
} else if (forms[i].len <= 0) { |
|
9371
|
0
|
|
|
|
|
|
per_form[i].values[NUM] = per_form[i].values[CAP] = per_form[i].values[DASH] = elementary_feature_empty + 1; |
|
9372
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX1] = per_form[i].values[PREFIX2] = per_form[i].values[PREFIX3] = per_form[i].values[PREFIX4] = elementary_feature_empty; |
|
9373
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = per_form[i].values[SUFFIX2] = per_form[i].values[SUFFIX3] = per_form[i].values[SUFFIX4] = elementary_feature_empty; |
|
9374
|
|
|
|
|
|
|
} else { |
|
9375
|
0
|
|
|
|
|
|
string_piece form = forms[i]; |
|
9376
|
0
|
|
|
|
|
|
const char* form_start = form.str; |
|
9377
|
|
|
|
|
|
|
|
|
9378
|
|
|
|
|
|
|
bool num = false, cap = false, dash = false; |
|
9379
|
0
|
|
|
|
|
|
size_t indices[8] = {0, form.len, form.len, form.len, form.len, 0, 0, 0}; // careful here regarding forms shorter than 4 characters |
|
9380
|
|
|
|
|
|
|
int index = 0; |
|
9381
|
0
|
0
|
|
|
|
|
while (form.len) { |
|
9382
|
0
|
|
|
|
|
|
indices[(index++)&7] = form.str - form_start; |
|
9383
|
|
|
|
|
|
|
|
|
9384
|
0
|
|
|
|
|
|
unicode::category_t cat = unicode::category(utf8::decode(form.str, form.len)); |
|
9385
|
0
|
0
|
|
|
|
|
num = num || cat & unicode::N; |
|
|
|
0
|
|
|
|
|
|
|
9386
|
0
|
0
|
|
|
|
|
cap = cap || cat & unicode::Lut; |
|
|
|
0
|
|
|
|
|
|
|
9387
|
0
|
0
|
|
|
|
|
dash = dash || cat & unicode::Pd; |
|
|
|
0
|
|
|
|
|
|
|
9388
|
|
|
|
|
|
|
|
|
9389
|
0
|
0
|
|
|
|
|
if (index == 5 || (!form.len && index < 5)) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9390
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX1] = maps[MAP_PREFIX1].value(form_start, indices[1]); |
|
9391
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX2] = maps[MAP_PREFIX2].value(form_start, indices[2]); |
|
9392
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX3] = maps[MAP_PREFIX3].value(form_start, indices[3]); |
|
9393
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX4] = maps[MAP_PREFIX4].value(form_start, indices[4]); |
|
9394
|
|
|
|
|
|
|
} |
|
9395
|
|
|
|
|
|
|
} |
|
9396
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = maps[MAP_SUFFIX1].value(form_start + indices[(index-1)&7], form.str - form_start - indices[(index-1)&7]); |
|
9397
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX2] = maps[MAP_SUFFIX2].value(form_start + indices[(index-2)&7], form.str - form_start - indices[(index-2)&7]); |
|
9398
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX3] = maps[MAP_SUFFIX3].value(form_start + indices[(index-3)&7], form.str - form_start - indices[(index-3)&7]); |
|
9399
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX4] = maps[MAP_SUFFIX4].value(form_start + indices[(index-4)&7], form.str - form_start - indices[(index-4)&7]); |
|
9400
|
0
|
|
|
|
|
|
per_form[i].values[NUM] = elementary_feature_empty + 1 + num; |
|
9401
|
0
|
|
|
|
|
|
per_form[i].values[CAP] = elementary_feature_empty + 1 + cap; |
|
9402
|
0
|
|
|
|
|
|
per_form[i].values[DASH] = elementary_feature_empty + 1 + dash; |
|
9403
|
|
|
|
|
|
|
} |
|
9404
|
|
|
|
|
|
|
} |
|
9405
|
0
|
|
|
|
|
|
} |
|
9406
|
|
|
|
|
|
|
|
|
9407
|
|
|
|
|
|
|
template |
|
9408
|
|
|
|
|
|
|
void czech_elementary_features |
|
9409
|
0
|
0
|
|
|
|
|
if (prev_dynamic) { |
|
9410
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_TAG] = prev_dynamic->values[PREVIOUS_OR_CURRENT_VERB_TAG]; |
|
9411
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_LEMMA] = prev_dynamic->values[PREVIOUS_OR_CURRENT_VERB_LEMMA]; |
|
9412
|
|
|
|
|
|
|
} else { |
|
9413
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_TAG] = elementary_feature_empty; |
|
9414
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_LEMMA] = elementary_feature_empty; |
|
9415
|
|
|
|
|
|
|
} |
|
9416
|
|
|
|
|
|
|
|
|
9417
|
0
|
0
|
|
|
|
|
if (tag.tag[0] == 'V') { |
|
9418
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_TAG] = per_tag.values[TAG]; |
|
9419
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_LEMMA] = per_tag.values[LEMMA]; |
|
9420
|
|
|
|
|
|
|
} else { |
|
9421
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_TAG] = dynamic.values[PREVIOUS_VERB_TAG]; |
|
9422
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_LEMMA] = dynamic.values[PREVIOUS_VERB_LEMMA]; |
|
9423
|
|
|
|
|
|
|
} |
|
9424
|
|
|
|
|
|
|
} |
|
9425
|
|
|
|
|
|
|
|
|
9426
|
|
|
|
|
|
|
} // namespace morphodita |
|
9427
|
|
|
|
|
|
|
|
|
9428
|
|
|
|
|
|
|
///////// |
|
9429
|
|
|
|
|
|
|
// File: morphodita/tagger/generic_elementary_features.h |
|
9430
|
|
|
|
|
|
|
///////// |
|
9431
|
|
|
|
|
|
|
|
|
9432
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
9433
|
|
|
|
|
|
|
// |
|
9434
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
9435
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
9436
|
|
|
|
|
|
|
// |
|
9437
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
9438
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
9439
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
9440
|
|
|
|
|
|
|
|
|
9441
|
|
|
|
|
|
|
namespace morphodita { |
|
9442
|
|
|
|
|
|
|
|
|
9443
|
|
|
|
|
|
|
// Declarations |
|
9444
|
|
|
|
|
|
|
template |
|
9445
|
0
|
|
|
|
|
|
class generic_elementary_features : public elementary_features |
|
9446
|
|
|
|
|
|
|
public: |
|
9447
|
|
|
|
|
|
|
generic_elementary_features(); |
|
9448
|
|
|
|
|
|
|
|
|
9449
|
|
|
|
|
|
|
enum features_per_form { FORM, FOLLOWING_VERB_TAG, FOLLOWING_VERB_LEMMA, NUM, CAP, DASH, PREFIX1, PREFIX2, PREFIX3, PREFIX4, PREFIX5, PREFIX6, PREFIX7, PREFIX8, PREFIX9, SUFFIX1, SUFFIX2, SUFFIX3, SUFFIX4, SUFFIX5, SUFFIX6, SUFFIX7, SUFFIX8, SUFFIX9, PER_FORM_TOTAL }; |
|
9450
|
|
|
|
|
|
|
enum features_per_tag { TAG, TAG1, TAG2, TAG3, TAG4, TAG5, LEMMA, PER_TAG_TOTAL }; |
|
9451
|
|
|
|
|
|
|
enum features_dynamic { PREVIOUS_VERB_TAG, PREVIOUS_VERB_LEMMA, PREVIOUS_OR_CURRENT_VERB_TAG, PREVIOUS_OR_CURRENT_VERB_LEMMA, DYNAMIC_TOTAL }; |
|
9452
|
|
|
|
|
|
|
enum features_map { MAP_NONE = -1, MAP_FORM, MAP_PREFIX1, MAP_PREFIX2, MAP_PREFIX3, MAP_PREFIX4, MAP_PREFIX5, MAP_PREFIX6, MAP_PREFIX7, MAP_PREFIX8, MAP_PREFIX9, MAP_SUFFIX1, MAP_SUFFIX2, MAP_SUFFIX3, MAP_SUFFIX4, MAP_SUFFIX5, MAP_SUFFIX6, MAP_SUFFIX7, MAP_SUFFIX8, MAP_SUFFIX9, MAP_TAG, MAP_TAG1, MAP_TAG2, MAP_TAG3, MAP_TAG4, MAP_TAG5, MAP_LEMMA, MAP_TOTAL } ; |
|
9453
|
|
|
|
|
|
|
|
|
9454
|
|
|
|
|
|
|
struct per_form_features { elementary_feature_value values[PER_FORM_TOTAL]; }; |
|
9455
|
|
|
|
|
|
|
struct per_tag_features { elementary_feature_value values[PER_TAG_TOTAL]; }; |
|
9456
|
|
|
|
|
|
|
struct dynamic_features { elementary_feature_value values[DYNAMIC_TOTAL]; }; |
|
9457
|
|
|
|
|
|
|
|
|
9458
|
|
|
|
|
|
|
static vector descriptions; |
|
9459
|
|
|
|
|
|
|
|
|
9460
|
|
|
|
|
|
|
void compute_features(const vector& forms, const vector>& analyses, vector& per_form, vector>& per_tag) const; |
|
9461
|
|
|
|
|
|
|
inline void compute_dynamic_features(const tagged_lemma& tag, const per_form_features& per_form, const per_tag_features& per_tag, const dynamic_features* prev_dynamic, dynamic_features& dynamic) const; |
|
9462
|
|
|
|
|
|
|
|
|
9463
|
|
|
|
|
|
|
using elementary_features |
|
9464
|
|
|
|
|
|
|
}; |
|
9465
|
|
|
|
|
|
|
|
|
9466
|
|
|
|
|
|
|
typedef generic_elementary_features persistent_generic_elementary_features; |
|
9467
|
|
|
|
|
|
|
|
|
9468
|
|
|
|
|
|
|
// Definitions |
|
9469
|
|
|
|
|
|
|
template |
|
9470
|
1
|
|
|
|
|
|
generic_elementary_features |
|
9471
|
1
|
50
|
|
|
|
|
maps.resize(MAP_TOTAL); |
|
9472
|
1
|
|
|
|
|
|
} |
|
9473
|
|
|
|
|
|
|
|
|
9474
|
|
|
|
|
|
|
template |
|
9475
|
|
|
|
|
|
|
vector generic_elementary_features |
|
9476
|
|
|
|
|
|
|
{"Form", PER_FORM, ANY_OFFSET, FORM, MAP_FORM}, |
|
9477
|
|
|
|
|
|
|
{"FollowingVerbTag", PER_FORM, ANY_OFFSET, FOLLOWING_VERB_TAG, MAP_TAG}, |
|
9478
|
|
|
|
|
|
|
{"FollowingVerbLemma", PER_FORM, ANY_OFFSET, FOLLOWING_VERB_LEMMA, MAP_LEMMA }, |
|
9479
|
|
|
|
|
|
|
{"Num", PER_FORM, ONLY_CURRENT, NUM, MAP_NONE}, |
|
9480
|
|
|
|
|
|
|
{"Cap", PER_FORM, ONLY_CURRENT, CAP, MAP_NONE}, |
|
9481
|
|
|
|
|
|
|
{"Dash", PER_FORM, ONLY_CURRENT, DASH, MAP_NONE}, |
|
9482
|
|
|
|
|
|
|
{"Prefix1", PER_FORM, ONLY_CURRENT, PREFIX1, MAP_PREFIX1}, |
|
9483
|
|
|
|
|
|
|
{"Prefix2", PER_FORM, ONLY_CURRENT, PREFIX2, MAP_PREFIX2}, |
|
9484
|
|
|
|
|
|
|
{"Prefix3", PER_FORM, ONLY_CURRENT, PREFIX3, MAP_PREFIX3}, |
|
9485
|
|
|
|
|
|
|
{"Prefix4", PER_FORM, ONLY_CURRENT, PREFIX4, MAP_PREFIX4}, |
|
9486
|
|
|
|
|
|
|
{"Prefix5", PER_FORM, ONLY_CURRENT, PREFIX5, MAP_PREFIX5}, |
|
9487
|
|
|
|
|
|
|
{"Prefix6", PER_FORM, ONLY_CURRENT, PREFIX6, MAP_PREFIX6}, |
|
9488
|
|
|
|
|
|
|
{"Prefix7", PER_FORM, ONLY_CURRENT, PREFIX7, MAP_PREFIX7}, |
|
9489
|
|
|
|
|
|
|
{"Prefix8", PER_FORM, ONLY_CURRENT, PREFIX8, MAP_PREFIX8}, |
|
9490
|
|
|
|
|
|
|
{"Prefix9", PER_FORM, ONLY_CURRENT, PREFIX9, MAP_PREFIX9}, |
|
9491
|
|
|
|
|
|
|
{"Suffix1", PER_FORM, ONLY_CURRENT, SUFFIX1, MAP_SUFFIX1}, |
|
9492
|
|
|
|
|
|
|
{"Suffix2", PER_FORM, ONLY_CURRENT, SUFFIX2, MAP_SUFFIX2}, |
|
9493
|
|
|
|
|
|
|
{"Suffix3", PER_FORM, ONLY_CURRENT, SUFFIX3, MAP_SUFFIX3}, |
|
9494
|
|
|
|
|
|
|
{"Suffix4", PER_FORM, ONLY_CURRENT, SUFFIX4, MAP_SUFFIX4}, |
|
9495
|
|
|
|
|
|
|
{"Suffix5", PER_FORM, ONLY_CURRENT, SUFFIX5, MAP_SUFFIX5}, |
|
9496
|
|
|
|
|
|
|
{"Suffix6", PER_FORM, ONLY_CURRENT, SUFFIX6, MAP_SUFFIX6}, |
|
9497
|
|
|
|
|
|
|
{"Suffix7", PER_FORM, ONLY_CURRENT, SUFFIX7, MAP_SUFFIX7}, |
|
9498
|
|
|
|
|
|
|
{"Suffix8", PER_FORM, ONLY_CURRENT, SUFFIX8, MAP_SUFFIX8}, |
|
9499
|
|
|
|
|
|
|
{"Suffix9", PER_FORM, ONLY_CURRENT, SUFFIX9, MAP_SUFFIX9}, |
|
9500
|
|
|
|
|
|
|
|
|
9501
|
|
|
|
|
|
|
{"Tag", PER_TAG, ANY_OFFSET, TAG, MAP_TAG}, |
|
9502
|
|
|
|
|
|
|
{"Tag1", PER_TAG, ANY_OFFSET, TAG1, MAP_TAG1}, |
|
9503
|
|
|
|
|
|
|
{"Tag2", PER_TAG, ANY_OFFSET, TAG2, MAP_TAG2}, |
|
9504
|
|
|
|
|
|
|
{"Tag3", PER_TAG, ANY_OFFSET, TAG3, MAP_TAG3}, |
|
9505
|
|
|
|
|
|
|
{"Tag4", PER_TAG, ANY_OFFSET, TAG4, MAP_TAG4}, |
|
9506
|
|
|
|
|
|
|
{"Tag5", PER_TAG, ANY_OFFSET, TAG5, MAP_TAG5}, |
|
9507
|
|
|
|
|
|
|
{"Lemma", PER_TAG, ANY_OFFSET, LEMMA, MAP_LEMMA}, |
|
9508
|
|
|
|
|
|
|
|
|
9509
|
|
|
|
|
|
|
{"PreviousVerbTag", DYNAMIC, ANY_OFFSET, PREVIOUS_VERB_TAG, MAP_TAG}, |
|
9510
|
|
|
|
|
|
|
{"PreviousVerbLemma", DYNAMIC, ANY_OFFSET, PREVIOUS_VERB_LEMMA, MAP_LEMMA} |
|
9511
|
|
|
|
|
|
|
}; |
|
9512
|
|
|
|
|
|
|
|
|
9513
|
|
|
|
|
|
|
template |
|
9514
|
2
|
|
|
|
|
|
void generic_elementary_features |
|
9515
|
|
|
|
|
|
|
using namespace unilib; |
|
9516
|
|
|
|
|
|
|
|
|
9517
|
|
|
|
|
|
|
// We process the sentence in reverse order, so that we can compute FollowingVerbTag and FollowingVerbLemma directly. |
|
9518
|
|
|
|
|
|
|
elementary_feature_value following_verb_tag = elementary_feature_empty, following_verb_lemma = elementary_feature_empty; |
|
9519
|
9
|
100
|
|
|
|
|
for (unsigned i = forms.size(); i--;) { |
|
9520
|
|
|
|
|
|
|
int verb_candidate = -1; |
|
9521
|
|
|
|
|
|
|
|
|
9522
|
|
|
|
|
|
|
// Per_tag features and verb_candidate |
|
9523
|
18
|
100
|
|
|
|
|
for (unsigned j = 0; j < analyses[i].size(); j++) { |
|
9524
|
22
|
|
|
|
|
|
per_tag[i][j].values[TAG] = maps[MAP_TAG].value(analyses[i][j].tag.c_str(), analyses[i][j].tag.size()); |
|
9525
|
11
|
50
|
|
|
|
|
per_tag[i][j].values[TAG1] = analyses[i][j].tag.size() >= 1 ? maps[MAP_TAG1].value(analyses[i][j].tag.c_str() + 0, 1) : elementary_feature_empty; |
|
9526
|
11
|
50
|
|
|
|
|
per_tag[i][j].values[TAG2] = analyses[i][j].tag.size() >= 2 ? maps[MAP_TAG2].value(analyses[i][j].tag.c_str() + 1, 1) : elementary_feature_empty; |
|
9527
|
11
|
50
|
|
|
|
|
per_tag[i][j].values[TAG3] = analyses[i][j].tag.size() >= 3 ? maps[MAP_TAG3].value(analyses[i][j].tag.c_str() + 2, 1) : elementary_feature_empty; |
|
9528
|
11
|
100
|
|
|
|
|
per_tag[i][j].values[TAG4] = analyses[i][j].tag.size() >= 4 ? maps[MAP_TAG4].value(analyses[i][j].tag.c_str() + 3, 1) : elementary_feature_empty; |
|
9529
|
11
|
50
|
|
|
|
|
per_tag[i][j].values[TAG5] = analyses[i][j].tag.size() >= 5 ? maps[MAP_TAG5].value(analyses[i][j].tag.c_str() + 4, 1) : elementary_feature_empty; |
|
9530
|
11
|
100
|
|
|
|
|
per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == analyses[i][j].lemma ? per_tag[i][j-1].values[LEMMA] : |
|
|
|
50
|
|
|
|
|
|
|
9531
|
|
|
|
|
|
|
maps[MAP_LEMMA].value(analyses[i][j].lemma.c_str(), analyses[i][j].lemma.size()); |
|
9532
|
|
|
|
|
|
|
|
|
9533
|
11
|
100
|
|
|
|
|
if (analyses[i][j].tag[0] == 'V') { |
|
9534
|
|
|
|
|
|
|
int tag_compare; |
|
9535
|
3
|
100
|
|
|
|
|
verb_candidate = verb_candidate < 0 || (tag_compare = analyses[i][j].tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && analyses[i][j].lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate; |
|
|
|
50
|
|
|
|
|
|
|
9536
|
|
|
|
|
|
|
} |
|
9537
|
|
|
|
|
|
|
} |
|
9538
|
|
|
|
|
|
|
|
|
9539
|
|
|
|
|
|
|
// Per_form features |
|
9540
|
14
|
|
|
|
|
|
per_form[i].values[FORM] = maps[MAP_FORM].value(forms[i].str, forms[i].len); |
|
9541
|
7
|
|
|
|
|
|
per_form[i].values[FOLLOWING_VERB_TAG] = following_verb_tag; |
|
9542
|
7
|
|
|
|
|
|
per_form[i].values[FOLLOWING_VERB_LEMMA] = following_verb_lemma; |
|
9543
|
|
|
|
|
|
|
|
|
9544
|
|
|
|
|
|
|
// Update following_verb_{tag,lemma} _after_ filling FOLLOWING_VERB_{TAG,LEMMA}. |
|
9545
|
7
|
100
|
|
|
|
|
if (verb_candidate >= 0) { |
|
9546
|
4
|
|
|
|
|
|
following_verb_tag = per_tag[i][verb_candidate].values[TAG]; |
|
9547
|
2
|
|
|
|
|
|
following_verb_lemma = per_tag[i][verb_candidate].values[LEMMA]; |
|
9548
|
|
|
|
|
|
|
} |
|
9549
|
|
|
|
|
|
|
|
|
9550
|
|
|
|
|
|
|
// Ortographic per_form features if needed |
|
9551
|
7
|
100
|
|
|
|
|
if (analyses[i].size() == 1) { |
|
9552
|
5
|
|
|
|
|
|
per_form[i].values[NUM] = per_form[i].values[CAP] = per_form[i].values[DASH] = elementary_feature_unknown; |
|
9553
|
5
|
|
|
|
|
|
per_form[i].values[PREFIX1] = per_form[i].values[PREFIX2] = per_form[i].values[PREFIX3] = elementary_feature_unknown; |
|
9554
|
5
|
|
|
|
|
|
per_form[i].values[PREFIX4] = per_form[i].values[PREFIX5] = per_form[i].values[PREFIX6] = elementary_feature_unknown; |
|
9555
|
5
|
|
|
|
|
|
per_form[i].values[PREFIX7] = per_form[i].values[PREFIX8] = per_form[i].values[PREFIX9] = elementary_feature_unknown; |
|
9556
|
5
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = per_form[i].values[SUFFIX2] = per_form[i].values[SUFFIX3] = elementary_feature_unknown; |
|
9557
|
5
|
|
|
|
|
|
per_form[i].values[SUFFIX4] = per_form[i].values[SUFFIX5] = per_form[i].values[SUFFIX6] = elementary_feature_unknown; |
|
9558
|
5
|
|
|
|
|
|
per_form[i].values[SUFFIX7] = per_form[i].values[SUFFIX8] = per_form[i].values[SUFFIX9] = elementary_feature_unknown; |
|
9559
|
2
|
50
|
|
|
|
|
} else if (forms[i].len <= 0) { |
|
9560
|
0
|
|
|
|
|
|
per_form[i].values[NUM] = per_form[i].values[CAP] = per_form[i].values[DASH] = elementary_feature_empty + 1; |
|
9561
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX1] = per_form[i].values[PREFIX2] = per_form[i].values[PREFIX3] = elementary_feature_empty; |
|
9562
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX4] = per_form[i].values[PREFIX5] = per_form[i].values[PREFIX6] = elementary_feature_empty; |
|
9563
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX7] = per_form[i].values[PREFIX8] = per_form[i].values[PREFIX9] = elementary_feature_empty; |
|
9564
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = per_form[i].values[SUFFIX2] = per_form[i].values[SUFFIX3] = elementary_feature_empty; |
|
9565
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX4] = per_form[i].values[SUFFIX5] = per_form[i].values[SUFFIX6] = elementary_feature_empty; |
|
9566
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX7] = per_form[i].values[SUFFIX8] = per_form[i].values[SUFFIX9] = elementary_feature_empty; |
|
9567
|
|
|
|
|
|
|
} else { |
|
9568
|
2
|
|
|
|
|
|
string_piece form = forms[i]; |
|
9569
|
2
|
|
|
|
|
|
const char* form_start = form.str; |
|
9570
|
|
|
|
|
|
|
|
|
9571
|
|
|
|
|
|
|
bool num = false, cap = false, dash = false; |
|
9572
|
11
|
|
|
|
|
|
size_t indices[18] = {0, form.len, form.len, form.len, form.len, form.len, form.len, form.len, form.len, form.len, 0, 0, 0, 0, 0, 0, 0, 0}; // careful here regarding forms shorter than 9 characters |
|
9573
|
|
|
|
|
|
|
int index = 0; |
|
9574
|
11
|
100
|
|
|
|
|
while (form.len) { |
|
9575
|
9
|
|
|
|
|
|
indices[(index++) % 18] = form.str - form_start; |
|
9576
|
|
|
|
|
|
|
|
|
9577
|
9
|
|
|
|
|
|
unicode::category_t cat = unicode::category(utf8::decode(form.str, form.len)); |
|
9578
|
9
|
50
|
|
|
|
|
num = num || cat & unicode::N; |
|
|
|
50
|
|
|
|
|
|
|
9579
|
9
|
50
|
|
|
|
|
cap = cap || cat & unicode::Lut; |
|
|
|
50
|
|
|
|
|
|
|
9580
|
9
|
50
|
|
|
|
|
dash = dash || cat & unicode::Pd; |
|
|
|
50
|
|
|
|
|
|
|
9581
|
|
|
|
|
|
|
|
|
9582
|
9
|
50
|
|
|
|
|
if (index == 10 || (!form.len && index < 10)) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
9583
|
4
|
|
|
|
|
|
per_form[i].values[PREFIX1] = maps[MAP_PREFIX1].value(form_start, indices[1]); |
|
9584
|
4
|
|
|
|
|
|
per_form[i].values[PREFIX2] = maps[MAP_PREFIX2].value(form_start, indices[2]); |
|
9585
|
4
|
|
|
|
|
|
per_form[i].values[PREFIX3] = maps[MAP_PREFIX3].value(form_start, indices[3]); |
|
9586
|
4
|
|
|
|
|
|
per_form[i].values[PREFIX4] = maps[MAP_PREFIX4].value(form_start, indices[4]); |
|
9587
|
4
|
|
|
|
|
|
per_form[i].values[PREFIX5] = maps[MAP_PREFIX5].value(form_start, indices[5]); |
|
9588
|
4
|
|
|
|
|
|
per_form[i].values[PREFIX6] = maps[MAP_PREFIX6].value(form_start, indices[6]); |
|
9589
|
4
|
|
|
|
|
|
per_form[i].values[PREFIX7] = maps[MAP_PREFIX7].value(form_start, indices[7]); |
|
9590
|
4
|
|
|
|
|
|
per_form[i].values[PREFIX8] = maps[MAP_PREFIX8].value(form_start, indices[8]); |
|
9591
|
2
|
|
|
|
|
|
per_form[i].values[PREFIX9] = maps[MAP_PREFIX9].value(form_start, indices[9]); |
|
9592
|
|
|
|
|
|
|
} |
|
9593
|
|
|
|
|
|
|
} |
|
9594
|
4
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = maps[MAP_SUFFIX1].value(form_start + indices[(index+18-1) % 18], form.str - form_start - indices[(index+18-1) % 18]); |
|
9595
|
4
|
|
|
|
|
|
per_form[i].values[SUFFIX2] = maps[MAP_SUFFIX2].value(form_start + indices[(index+18-2) % 18], form.str - form_start - indices[(index+18-2) % 18]); |
|
9596
|
4
|
|
|
|
|
|
per_form[i].values[SUFFIX3] = maps[MAP_SUFFIX3].value(form_start + indices[(index+18-3) % 18], form.str - form_start - indices[(index+18-3) % 18]); |
|
9597
|
4
|
|
|
|
|
|
per_form[i].values[SUFFIX4] = maps[MAP_SUFFIX4].value(form_start + indices[(index+18-4) % 18], form.str - form_start - indices[(index+18-4) % 18]); |
|
9598
|
4
|
|
|
|
|
|
per_form[i].values[SUFFIX5] = maps[MAP_SUFFIX5].value(form_start + indices[(index+18-5) % 18], form.str - form_start - indices[(index+18-5) % 18]); |
|
9599
|
4
|
|
|
|
|
|
per_form[i].values[SUFFIX6] = maps[MAP_SUFFIX6].value(form_start + indices[(index+18-6) % 18], form.str - form_start - indices[(index+18-6) % 18]); |
|
9600
|
4
|
|
|
|
|
|
per_form[i].values[SUFFIX7] = maps[MAP_SUFFIX7].value(form_start + indices[(index+18-7) % 18], form.str - form_start - indices[(index+18-7) % 18]); |
|
9601
|
4
|
|
|
|
|
|
per_form[i].values[SUFFIX8] = maps[MAP_SUFFIX8].value(form_start + indices[(index+18-8) % 18], form.str - form_start - indices[(index+18-8) % 18]); |
|
9602
|
4
|
|
|
|
|
|
per_form[i].values[SUFFIX9] = maps[MAP_SUFFIX9].value(form_start + indices[(index+18-9) % 18], form.str - form_start - indices[(index+18-9) % 18]); |
|
9603
|
2
|
|
|
|
|
|
per_form[i].values[NUM] = elementary_feature_empty + 1 + num; |
|
9604
|
2
|
|
|
|
|
|
per_form[i].values[CAP] = elementary_feature_empty + 1 + cap; |
|
9605
|
2
|
|
|
|
|
|
per_form[i].values[DASH] = elementary_feature_empty + 1 + dash; |
|
9606
|
|
|
|
|
|
|
} |
|
9607
|
|
|
|
|
|
|
} |
|
9608
|
2
|
|
|
|
|
|
} |
|
9609
|
|
|
|
|
|
|
|
|
9610
|
|
|
|
|
|
|
template |
|
9611
|
|
|
|
|
|
|
void generic_elementary_features |
|
9612
|
22
|
100
|
|
|
|
|
if (prev_dynamic) { |
|
9613
|
20
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_TAG] = prev_dynamic->values[PREVIOUS_OR_CURRENT_VERB_TAG]; |
|
9614
|
20
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_LEMMA] = prev_dynamic->values[PREVIOUS_OR_CURRENT_VERB_LEMMA]; |
|
9615
|
|
|
|
|
|
|
} else { |
|
9616
|
2
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_TAG] = elementary_feature_empty; |
|
9617
|
2
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_LEMMA] = elementary_feature_empty; |
|
9618
|
|
|
|
|
|
|
} |
|
9619
|
|
|
|
|
|
|
|
|
9620
|
22
|
100
|
|
|
|
|
if (tag.tag[0] == 'V') { |
|
9621
|
3
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_TAG] = per_tag.values[TAG]; |
|
9622
|
3
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_LEMMA] = per_tag.values[LEMMA]; |
|
9623
|
|
|
|
|
|
|
} else { |
|
9624
|
19
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_TAG] = dynamic.values[PREVIOUS_VERB_TAG]; |
|
9625
|
19
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_LEMMA] = dynamic.values[PREVIOUS_VERB_LEMMA]; |
|
9626
|
|
|
|
|
|
|
} |
|
9627
|
|
|
|
|
|
|
} |
|
9628
|
|
|
|
|
|
|
|
|
9629
|
|
|
|
|
|
|
} // namespace morphodita |
|
9630
|
|
|
|
|
|
|
|
|
9631
|
|
|
|
|
|
|
///////// |
|
9632
|
|
|
|
|
|
|
// File: utils/threadsafe_stack.h |
|
9633
|
|
|
|
|
|
|
///////// |
|
9634
|
|
|
|
|
|
|
|
|
9635
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
|
9636
|
|
|
|
|
|
|
// |
|
9637
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
9638
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
9639
|
|
|
|
|
|
|
// |
|
9640
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
9641
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
9642
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
9643
|
|
|
|
|
|
|
|
|
9644
|
|
|
|
|
|
|
namespace utils { |
|
9645
|
|
|
|
|
|
|
|
|
9646
|
|
|
|
|
|
|
// |
|
9647
|
|
|
|
|
|
|
// Declarations |
|
9648
|
|
|
|
|
|
|
// |
|
9649
|
|
|
|
|
|
|
|
|
9650
|
|
|
|
|
|
|
template |
|
9651
|
0
|
|
|
|
|
|
class threadsafe_stack { |
|
9652
|
|
|
|
|
|
|
public: |
|
9653
|
|
|
|
|
|
|
inline void push(T* t); |
|
9654
|
|
|
|
|
|
|
inline T* pop(); |
|
9655
|
|
|
|
|
|
|
|
|
9656
|
|
|
|
|
|
|
private: |
|
9657
|
|
|
|
|
|
|
vector> stack; |
|
9658
|
|
|
|
|
|
|
atomic_flag lock = ATOMIC_FLAG_INIT; |
|
9659
|
|
|
|
|
|
|
}; |
|
9660
|
|
|
|
|
|
|
|
|
9661
|
|
|
|
|
|
|
// |
|
9662
|
|
|
|
|
|
|
// Definitions |
|
9663
|
|
|
|
|
|
|
// |
|
9664
|
|
|
|
|
|
|
|
|
9665
|
|
|
|
|
|
|
template |
|
9666
|
12
|
|
|
|
|
|
void threadsafe_stack::push(T* t) { |
|
9667
|
6
|
0
|
|
|
|
|
while (lock.test_and_set(memory_order_acquire)) {} |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
9668
|
6
|
|
|
|
|
|
stack.emplace_back(t); |
|
9669
|
|
|
|
|
|
|
lock.clear(memory_order_release); |
|
9670
|
6
|
|
|
|
|
|
} |
|
9671
|
|
|
|
|
|
|
|
|
9672
|
|
|
|
|
|
|
template |
|
9673
|
12
|
|
|
|
|
|
T* threadsafe_stack::pop() { |
|
9674
|
|
|
|
|
|
|
T* res = nullptr; |
|
9675
|
|
|
|
|
|
|
|
|
9676
|
6
|
0
|
|
|
|
|
while (lock.test_and_set(memory_order_acquire)) {} |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
9677
|
6
|
0
|
|
|
|
|
if (!stack.empty()) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
9678
|
|
|
|
|
|
|
res = stack.back().release(); |
|
9679
|
|
|
|
|
|
|
stack.pop_back(); |
|
9680
|
|
|
|
|
|
|
} |
|
9681
|
|
|
|
|
|
|
lock.clear(memory_order_release); |
|
9682
|
|
|
|
|
|
|
|
|
9683
|
6
|
|
|
|
|
|
return res; |
|
9684
|
|
|
|
|
|
|
} |
|
9685
|
|
|
|
|
|
|
|
|
9686
|
|
|
|
|
|
|
} // namespace utils |
|
9687
|
|
|
|
|
|
|
|
|
9688
|
|
|
|
|
|
|
///////// |
|
9689
|
|
|
|
|
|
|
// File: morphodita/tagger/perceptron_tagger.h |
|
9690
|
|
|
|
|
|
|
///////// |
|
9691
|
|
|
|
|
|
|
|
|
9692
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
9693
|
|
|
|
|
|
|
// |
|
9694
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
9695
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
9696
|
|
|
|
|
|
|
// |
|
9697
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
9698
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
9699
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
9700
|
|
|
|
|
|
|
|
|
9701
|
|
|
|
|
|
|
namespace morphodita { |
|
9702
|
|
|
|
|
|
|
|
|
9703
|
|
|
|
|
|
|
// Declarations |
|
9704
|
|
|
|
|
|
|
template |
|
9705
|
0
|
|
|
|
|
|
class perceptron_tagger : public tagger { |
|
9706
|
|
|
|
|
|
|
public: |
|
9707
|
|
|
|
|
|
|
perceptron_tagger(int decoding_order, int window_size); |
|
9708
|
|
|
|
|
|
|
|
|
9709
|
|
|
|
|
|
|
bool load(istream& is); |
|
9710
|
|
|
|
|
|
|
virtual const morpho* get_morpho() const override; |
|
9711
|
|
|
|
|
|
|
virtual void tag(const vector& forms, vector& tags, morpho::guesser_mode guesser = morpho::guesser_mode(-1)) const override; |
|
9712
|
|
|
|
|
|
|
virtual void tag_analyzed(const vector& forms, const vector>& analyses, vector& tags) const override; |
|
9713
|
|
|
|
|
|
|
|
|
9714
|
|
|
|
|
|
|
private: |
|
9715
|
|
|
|
|
|
|
int decoding_order, window_size; |
|
9716
|
|
|
|
|
|
|
|
|
9717
|
|
|
|
|
|
|
unique_ptr dict; |
|
9718
|
|
|
|
|
|
|
bool use_guesser; |
|
9719
|
|
|
|
|
|
|
FeatureSequences features; |
|
9720
|
|
|
|
|
|
|
typedef viterbi viterbi_decoder; |
|
9721
|
|
|
|
|
|
|
viterbi_decoder decoder; |
|
9722
|
0
|
|
|
|
|
|
struct cache { |
|
9723
|
|
|
|
|
|
|
vector forms; |
|
9724
|
|
|
|
|
|
|
vector> analyses; |
|
9725
|
|
|
|
|
|
|
vector tags; |
|
9726
|
|
|
|
|
|
|
typename viterbi_decoder::cache decoder_cache; |
|
9727
|
|
|
|
|
|
|
|
|
9728
|
1
|
0
|
|
|
|
|
cache(const perceptron_tagger& self) : decoder_cache(self.decoder) {} |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9729
|
|
|
|
|
|
|
}; |
|
9730
|
|
|
|
|
|
|
|
|
9731
|
|
|
|
|
|
|
mutable threadsafe_stack caches; |
|
9732
|
|
|
|
|
|
|
}; |
|
9733
|
|
|
|
|
|
|
|
|
9734
|
|
|
|
|
|
|
// Definitions |
|
9735
|
|
|
|
|
|
|
|
|
9736
|
|
|
|
|
|
|
template |
|
9737
|
1
|
|
|
|
|
|
perceptron_tagger::perceptron_tagger(int decoding_order, int window_size) |
|
9738
|
1
|
|
|
|
|
|
: decoding_order(decoding_order), window_size(window_size), decoder(features, decoding_order, window_size) {} |
|
9739
|
|
|
|
|
|
|
|
|
9740
|
|
|
|
|
|
|
template |
|
9741
|
1
|
|
|
|
|
|
bool perceptron_tagger::load(istream& is) { |
|
9742
|
2
|
0
|
|
|
|
|
if (dict.reset(morpho::load(is)), !dict) return false; |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9743
|
1
|
|
|
|
|
|
use_guesser = is.get(); |
|
9744
|
1
|
0
|
|
|
|
|
if (!features.load(is)) return false; |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9745
|
1
|
|
|
|
|
|
return true; |
|
9746
|
|
|
|
|
|
|
} |
|
9747
|
|
|
|
|
|
|
|
|
9748
|
|
|
|
|
|
|
template |
|
9749
|
1
|
|
|
|
|
|
const morpho* perceptron_tagger::get_morpho() const { |
|
9750
|
1
|
|
|
|
|
|
return dict.get(); |
|
9751
|
|
|
|
|
|
|
} |
|
9752
|
|
|
|
|
|
|
|
|
9753
|
|
|
|
|
|
|
template |
|
9754
|
2
|
|
|
|
|
|
void perceptron_tagger::tag(const vector& forms, vector& tags, morpho::guesser_mode guesser) const { |
|
9755
|
|
|
|
|
|
|
tags.clear(); |
|
9756
|
2
|
0
|
|
|
|
|
if (!dict) return; |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9757
|
|
|
|
|
|
|
|
|
9758
|
2
|
|
|
|
|
|
cache* c = caches.pop(); |
|
9759
|
2
|
0
|
|
|
|
|
if (!c) c = new cache(*this); |
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9760
|
|
|
|
|
|
|
|
|
9761
|
2
|
|
|
|
|
|
c->forms.resize(forms.size()); |
|
9762
|
2
|
0
|
|
|
|
|
if (c->analyses.size() < forms.size()) c->analyses.resize(forms.size()); |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9763
|
9
|
0
|
|
|
|
|
for (unsigned i = 0; i < forms.size(); i++) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9764
|
7
|
|
|
|
|
|
c->forms[i] = forms[i]; |
|
9765
|
7
|
|
|
|
|
|
c->forms[i].len = dict->raw_form_len(forms[i]); |
|
9766
|
7
|
0
|
|
|
|
|
dict->analyze(forms[i], guesser >= 0 ? guesser : use_guesser ? morpho::GUESSER : morpho::NO_GUESSER, c->analyses[i]); |
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9767
|
|
|
|
|
|
|
} |
|
9768
|
|
|
|
|
|
|
|
|
9769
|
2
|
0
|
|
|
|
|
if (c->tags.size() < forms.size()) c->tags.resize(forms.size() * 2); |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9770
|
2
|
|
|
|
|
|
decoder.tag(c->forms, c->analyses, c->decoder_cache, c->tags); |
|
9771
|
|
|
|
|
|
|
|
|
9772
|
9
|
0
|
|
|
|
|
for (unsigned i = 0; i < forms.size(); i++) |
|
|
|
100
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9773
|
7
|
|
|
|
|
|
tags.emplace_back(c->analyses[i][c->tags[i]]); |
|
9774
|
|
|
|
|
|
|
|
|
9775
|
2
|
|
|
|
|
|
caches.push(c); |
|
9776
|
|
|
|
|
|
|
} |
|
9777
|
|
|
|
|
|
|
|
|
9778
|
|
|
|
|
|
|
template |
|
9779
|
0
|
|
|
|
|
|
void perceptron_tagger::tag_analyzed(const vector& forms, const vector>& analyses, vector& tags) const { |
|
9780
|
|
|
|
|
|
|
tags.clear(); |
|
9781
|
|
|
|
|
|
|
|
|
9782
|
0
|
|
|
|
|
|
cache* c = caches.pop(); |
|
9783
|
0
|
0
|
|
|
|
|
if (!c) c = new cache(*this); |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9784
|
|
|
|
|
|
|
|
|
9785
|
0
|
|
|
|
|
|
tags.resize(forms.size()); |
|
9786
|
0
|
|
|
|
|
|
decoder.tag(forms, analyses, c->decoder_cache, tags); |
|
9787
|
|
|
|
|
|
|
|
|
9788
|
0
|
|
|
|
|
|
caches.push(c); |
|
9789
|
0
|
|
|
|
|
|
} |
|
9790
|
|
|
|
|
|
|
|
|
9791
|
|
|
|
|
|
|
} // namespace morphodita |
|
9792
|
|
|
|
|
|
|
|
|
9793
|
|
|
|
|
|
|
///////// |
|
9794
|
|
|
|
|
|
|
// File: morphodita/tagger/tagger_ids.h |
|
9795
|
|
|
|
|
|
|
///////// |
|
9796
|
|
|
|
|
|
|
|
|
9797
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
9798
|
|
|
|
|
|
|
// |
|
9799
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
9800
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
9801
|
|
|
|
|
|
|
// |
|
9802
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
9803
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
9804
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
9805
|
|
|
|
|
|
|
|
|
9806
|
|
|
|
|
|
|
namespace morphodita { |
|
9807
|
|
|
|
|
|
|
|
|
9808
|
|
|
|
|
|
|
class tagger_ids { |
|
9809
|
|
|
|
|
|
|
public: |
|
9810
|
|
|
|
|
|
|
enum tagger_id { |
|
9811
|
|
|
|
|
|
|
CZECH2 = 0, CZECH3 = 1, CZECH2_3 = 6, |
|
9812
|
|
|
|
|
|
|
/* 2 was used internally for ENGLISH3, but never released publicly */ |
|
9813
|
|
|
|
|
|
|
GENERIC2 = 3, GENERIC3 = 4, GENERIC4 = 5, GENERIC2_3 = 7, |
|
9814
|
|
|
|
|
|
|
CONLLU2 = 8, CONLLU2_3 = 9, CONLLU3 = 10, |
|
9815
|
|
|
|
|
|
|
}; |
|
9816
|
|
|
|
|
|
|
|
|
9817
|
|
|
|
|
|
|
static bool parse(const string& str, tagger_id& id) { |
|
9818
|
|
|
|
|
|
|
if (str == "czech2") return id = CZECH2, true; |
|
9819
|
|
|
|
|
|
|
if (str == "czech2_3") return id = CZECH2_3, true; |
|
9820
|
|
|
|
|
|
|
if (str == "czech3") return id = CZECH3, true; |
|
9821
|
|
|
|
|
|
|
if (str == "generic2") return id = GENERIC2, true; |
|
9822
|
|
|
|
|
|
|
if (str == "generic2_3") return id = GENERIC2_3, true; |
|
9823
|
|
|
|
|
|
|
if (str == "generic3") return id = GENERIC3, true; |
|
9824
|
|
|
|
|
|
|
if (str == "generic4") return id = GENERIC4, true; |
|
9825
|
|
|
|
|
|
|
if (str == "conllu2") return id = CONLLU2, true; |
|
9826
|
|
|
|
|
|
|
if (str == "conllu2_3") return id = CONLLU2_3, true; |
|
9827
|
|
|
|
|
|
|
if (str == "conllu3") return id = CONLLU3, true; |
|
9828
|
|
|
|
|
|
|
return false; |
|
9829
|
|
|
|
|
|
|
} |
|
9830
|
|
|
|
|
|
|
|
|
9831
|
|
|
|
|
|
|
static int decoding_order(tagger_id id) { |
|
9832
|
|
|
|
|
|
|
switch (id) { |
|
9833
|
|
|
|
|
|
|
case CZECH2: return 2; |
|
9834
|
|
|
|
|
|
|
case CZECH2_3: return 2; |
|
9835
|
|
|
|
|
|
|
case CZECH3: return 3; |
|
9836
|
|
|
|
|
|
|
case GENERIC2: return 2; |
|
9837
|
|
|
|
|
|
|
case GENERIC2_3: return 2; |
|
9838
|
|
|
|
|
|
|
case GENERIC3: return 3; |
|
9839
|
|
|
|
|
|
|
case GENERIC4: return 4; |
|
9840
|
|
|
|
|
|
|
case CONLLU2: return 2; |
|
9841
|
|
|
|
|
|
|
case CONLLU2_3: return 2; |
|
9842
|
|
|
|
|
|
|
case CONLLU3: return 3; |
|
9843
|
|
|
|
|
|
|
} |
|
9844
|
|
|
|
|
|
|
return 0; |
|
9845
|
|
|
|
|
|
|
} |
|
9846
|
|
|
|
|
|
|
|
|
9847
|
|
|
|
|
|
|
static int window_size(tagger_id id) { |
|
9848
|
|
|
|
|
|
|
switch (id) { |
|
9849
|
|
|
|
|
|
|
case CZECH2_3: return 3; |
|
9850
|
|
|
|
|
|
|
case GENERIC2_3: return 3; |
|
9851
|
|
|
|
|
|
|
case CONLLU2_3: return 3; |
|
9852
|
|
|
|
|
|
|
default: break; |
|
9853
|
|
|
|
|
|
|
} |
|
9854
|
|
|
|
|
|
|
return decoding_order(id); |
|
9855
|
|
|
|
|
|
|
} |
|
9856
|
|
|
|
|
|
|
}; |
|
9857
|
|
|
|
|
|
|
|
|
9858
|
|
|
|
|
|
|
typedef tagger_ids::tagger_id tagger_id; |
|
9859
|
|
|
|
|
|
|
|
|
9860
|
|
|
|
|
|
|
} // namespace morphodita |
|
9861
|
|
|
|
|
|
|
|
|
9862
|
|
|
|
|
|
|
///////// |
|
9863
|
|
|
|
|
|
|
// File: morphodita/tagger/tagger.cpp |
|
9864
|
|
|
|
|
|
|
///////// |
|
9865
|
|
|
|
|
|
|
|
|
9866
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
9867
|
|
|
|
|
|
|
// |
|
9868
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
9869
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
9870
|
|
|
|
|
|
|
// |
|
9871
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
9872
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
9873
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
9874
|
|
|
|
|
|
|
|
|
9875
|
|
|
|
|
|
|
namespace morphodita { |
|
9876
|
|
|
|
|
|
|
|
|
9877
|
1
|
|
|
|
|
|
tagger* tagger::load(istream& is) { |
|
9878
|
1
|
50
|
|
|
|
|
tagger_id id = tagger_id(is.get()); |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9879
|
|
|
|
|
|
|
switch (id) { |
|
9880
|
|
|
|
|
|
|
case tagger_ids::CZECH2: |
|
9881
|
|
|
|
|
|
|
case tagger_ids::CZECH2_3: |
|
9882
|
|
|
|
|
|
|
case tagger_ids::CZECH3: |
|
9883
|
|
|
|
|
|
|
{ |
|
9884
|
0
|
0
|
|
|
|
|
auto res = new_unique_ptr>>(tagger_ids::decoding_order(id), tagger_ids::window_size(id)); |
|
9885
|
0
|
0
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
|
0
|
|
|
|
|
|
|
9886
|
|
|
|
|
|
|
break; |
|
9887
|
|
|
|
|
|
|
} |
|
9888
|
|
|
|
|
|
|
case tagger_ids::GENERIC2: |
|
9889
|
|
|
|
|
|
|
case tagger_ids::GENERIC2_3: |
|
9890
|
|
|
|
|
|
|
case tagger_ids::GENERIC3: |
|
9891
|
|
|
|
|
|
|
case tagger_ids::GENERIC4: |
|
9892
|
|
|
|
|
|
|
{ |
|
9893
|
1
|
50
|
|
|
|
|
auto res = new_unique_ptr>>(tagger_ids::decoding_order(id), tagger_ids::window_size(id)); |
|
9894
|
1
|
50
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
|
50
|
|
|
|
|
|
|
9895
|
|
|
|
|
|
|
break; |
|
9896
|
|
|
|
|
|
|
} |
|
9897
|
|
|
|
|
|
|
case tagger_ids::CONLLU2: |
|
9898
|
|
|
|
|
|
|
case tagger_ids::CONLLU2_3: |
|
9899
|
|
|
|
|
|
|
case tagger_ids::CONLLU3: |
|
9900
|
|
|
|
|
|
|
{ |
|
9901
|
0
|
0
|
|
|
|
|
auto res = new_unique_ptr>>(tagger_ids::decoding_order(id), tagger_ids::window_size(id)); |
|
9902
|
0
|
0
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
|
0
|
|
|
|
|
|
|
9903
|
|
|
|
|
|
|
break; |
|
9904
|
|
|
|
|
|
|
} |
|
9905
|
|
|
|
|
|
|
} |
|
9906
|
|
|
|
|
|
|
|
|
9907
|
|
|
|
|
|
|
return nullptr; |
|
9908
|
|
|
|
|
|
|
} |
|
9909
|
|
|
|
|
|
|
|
|
9910
|
0
|
|
|
|
|
|
tagger* tagger::load(const char* fname) { |
|
9911
|
0
|
0
|
|
|
|
|
ifstream f(path_from_utf8(fname).c_str(), ifstream::binary); |
|
9912
|
0
|
0
|
|
|
|
|
if (!f) return nullptr; |
|
9913
|
|
|
|
|
|
|
|
|
9914
|
0
|
0
|
|
|
|
|
return load(f); |
|
9915
|
|
|
|
|
|
|
} |
|
9916
|
|
|
|
|
|
|
|
|
9917
|
0
|
|
|
|
|
|
tokenizer* tagger::new_tokenizer() const { |
|
9918
|
0
|
|
|
|
|
|
auto morpho = get_morpho(); |
|
9919
|
0
|
0
|
|
|
|
|
return morpho ? morpho->new_tokenizer() : nullptr; |
|
9920
|
|
|
|
|
|
|
} |
|
9921
|
|
|
|
|
|
|
|
|
9922
|
|
|
|
|
|
|
} // namespace morphodita |
|
9923
|
|
|
|
|
|
|
|
|
9924
|
|
|
|
|
|
|
///////// |
|
9925
|
|
|
|
|
|
|
// File: morphodita/tagset_converter/identity_tagset_converter.h |
|
9926
|
|
|
|
|
|
|
///////// |
|
9927
|
|
|
|
|
|
|
|
|
9928
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
9929
|
|
|
|
|
|
|
// |
|
9930
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
9931
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
9932
|
|
|
|
|
|
|
// |
|
9933
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
9934
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
9935
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
9936
|
|
|
|
|
|
|
|
|
9937
|
|
|
|
|
|
|
namespace morphodita { |
|
9938
|
|
|
|
|
|
|
|
|
9939
|
0
|
|
|
|
|
|
class identity_tagset_converter : public tagset_converter { |
|
9940
|
|
|
|
|
|
|
public: |
|
9941
|
|
|
|
|
|
|
virtual void convert(tagged_lemma& tagged_lemma) const override; |
|
9942
|
|
|
|
|
|
|
virtual void convert_analyzed(vector& tagged_lemmas) const override; |
|
9943
|
|
|
|
|
|
|
virtual void convert_generated(vector& forms) const override; |
|
9944
|
|
|
|
|
|
|
}; |
|
9945
|
|
|
|
|
|
|
|
|
9946
|
|
|
|
|
|
|
} // namespace morphodita |
|
9947
|
|
|
|
|
|
|
|
|
9948
|
|
|
|
|
|
|
///////// |
|
9949
|
|
|
|
|
|
|
// File: morphodita/tagset_converter/identity_tagset_converter.cpp |
|
9950
|
|
|
|
|
|
|
///////// |
|
9951
|
|
|
|
|
|
|
|
|
9952
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
9953
|
|
|
|
|
|
|
// |
|
9954
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
9955
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
9956
|
|
|
|
|
|
|
// |
|
9957
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
9958
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
9959
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
9960
|
|
|
|
|
|
|
|
|
9961
|
|
|
|
|
|
|
namespace morphodita { |
|
9962
|
|
|
|
|
|
|
|
|
9963
|
0
|
|
|
|
|
|
void identity_tagset_converter::convert(tagged_lemma& /*tagged_lemma*/) const {} |
|
9964
|
|
|
|
|
|
|
|
|
9965
|
0
|
|
|
|
|
|
void identity_tagset_converter::convert_analyzed(vector& /*tagged_lemmas*/) const {} |
|
9966
|
|
|
|
|
|
|
|
|
9967
|
0
|
|
|
|
|
|
void identity_tagset_converter::convert_generated(vector& /*forms*/) const {} |
|
9968
|
|
|
|
|
|
|
|
|
9969
|
|
|
|
|
|
|
} // namespace morphodita |
|
9970
|
|
|
|
|
|
|
|
|
9971
|
|
|
|
|
|
|
///////// |
|
9972
|
|
|
|
|
|
|
// File: morphodita/tagset_converter/pdt_to_conll2009_tagset_converter.h |
|
9973
|
|
|
|
|
|
|
///////// |
|
9974
|
|
|
|
|
|
|
|
|
9975
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
9976
|
|
|
|
|
|
|
// |
|
9977
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
9978
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
9979
|
|
|
|
|
|
|
// |
|
9980
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
9981
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
9982
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
9983
|
|
|
|
|
|
|
|
|
9984
|
|
|
|
|
|
|
namespace morphodita { |
|
9985
|
|
|
|
|
|
|
|
|
9986
|
0
|
|
|
|
|
|
class pdt_to_conll2009_tagset_converter : public tagset_converter { |
|
9987
|
|
|
|
|
|
|
public: |
|
9988
|
|
|
|
|
|
|
virtual void convert(tagged_lemma& tagged_lemma) const override; |
|
9989
|
|
|
|
|
|
|
virtual void convert_analyzed(vector& tagged_lemmas) const override; |
|
9990
|
|
|
|
|
|
|
virtual void convert_generated(vector& forms) const override; |
|
9991
|
|
|
|
|
|
|
|
|
9992
|
|
|
|
|
|
|
private: |
|
9993
|
|
|
|
|
|
|
inline void convert_tag(const string& lemma, string& tag) const; |
|
9994
|
|
|
|
|
|
|
inline bool convert_lemma(string& lemma) const; |
|
9995
|
|
|
|
|
|
|
}; |
|
9996
|
|
|
|
|
|
|
|
|
9997
|
|
|
|
|
|
|
} // namespace morphodita |
|
9998
|
|
|
|
|
|
|
|
|
9999
|
|
|
|
|
|
|
///////// |
|
10000
|
|
|
|
|
|
|
// File: morphodita/tagset_converter/pdt_to_conll2009_tagset_converter.cpp |
|
10001
|
|
|
|
|
|
|
///////// |
|
10002
|
|
|
|
|
|
|
|
|
10003
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
10004
|
|
|
|
|
|
|
// |
|
10005
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
10006
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
10007
|
|
|
|
|
|
|
// |
|
10008
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
10009
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
10010
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
10011
|
|
|
|
|
|
|
|
|
10012
|
|
|
|
|
|
|
namespace morphodita { |
|
10013
|
|
|
|
|
|
|
|
|
10014
|
|
|
|
|
|
|
static const char* names[15] = {"POS", "SubPOS", "Gen", "Num", "Cas", "PGe", "PNu", "Per", "Ten", "Gra", "Neg", "Voi", "", "", "Var"}; |
|
10015
|
|
|
|
|
|
|
|
|
10016
|
0
|
|
|
|
|
|
inline void pdt_to_conll2009_tagset_converter::convert_tag(const string& lemma, string& tag) const { |
|
10017
|
|
|
|
|
|
|
char pdt_tag[16]; |
|
10018
|
|
|
|
|
|
|
strncpy(pdt_tag, tag.c_str(), 15); |
|
10019
|
|
|
|
|
|
|
|
|
10020
|
|
|
|
|
|
|
// Clear the tag |
|
10021
|
|
|
|
|
|
|
tag.clear(); |
|
10022
|
|
|
|
|
|
|
|
|
10023
|
|
|
|
|
|
|
// Fill FEAT of filled tag characters |
|
10024
|
0
|
0
|
|
|
|
|
for (int i = 0; i < 15 && pdt_tag[i]; i++) |
|
|
|
0
|
|
|
|
|
|
|
10025
|
0
|
0
|
|
|
|
|
if (pdt_tag[i] != '-') { |
|
10026
|
0
|
0
|
|
|
|
|
if (!tag.empty()) tag.push_back('|'); |
|
10027
|
0
|
|
|
|
|
|
tag.append(names[i]); |
|
10028
|
0
|
|
|
|
|
|
tag.push_back('='); |
|
10029
|
0
|
|
|
|
|
|
tag.push_back(pdt_tag[i]); |
|
10030
|
|
|
|
|
|
|
} |
|
10031
|
|
|
|
|
|
|
|
|
10032
|
|
|
|
|
|
|
// Try adding Sem FEAT |
|
10033
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i + 2 < lemma.size(); i++) |
|
10034
|
0
|
0
|
|
|
|
|
if (lemma[i] == '_' && lemma[i + 1] == ';') { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
10035
|
0
|
0
|
|
|
|
|
if (!tag.empty()) tag.push_back('|'); |
|
10036
|
0
|
|
|
|
|
|
tag.append("Sem="); |
|
10037
|
0
|
|
|
|
|
|
tag.push_back(lemma[i + 2]); |
|
10038
|
|
|
|
|
|
|
break; |
|
10039
|
|
|
|
|
|
|
} |
|
10040
|
0
|
|
|
|
|
|
} |
|
10041
|
|
|
|
|
|
|
|
|
10042
|
0
|
|
|
|
|
|
inline bool pdt_to_conll2009_tagset_converter::convert_lemma(string& lemma) const { |
|
10043
|
0
|
|
|
|
|
|
unsigned raw_lemma = czech_lemma_addinfo::raw_lemma_len(lemma); |
|
10044
|
0
|
0
|
|
|
|
|
return raw_lemma < lemma.size() ? (lemma.resize(raw_lemma), true) : false; |
|
10045
|
|
|
|
|
|
|
} |
|
10046
|
|
|
|
|
|
|
|
|
10047
|
0
|
|
|
|
|
|
void pdt_to_conll2009_tagset_converter::convert(tagged_lemma& tagged_lemma) const { |
|
10048
|
0
|
|
|
|
|
|
convert_tag(tagged_lemma.lemma, tagged_lemma.tag); |
|
10049
|
0
|
|
|
|
|
|
convert_lemma(tagged_lemma.lemma); |
|
10050
|
0
|
|
|
|
|
|
} |
|
10051
|
|
|
|
|
|
|
|
|
10052
|
0
|
|
|
|
|
|
void pdt_to_conll2009_tagset_converter::convert_analyzed(vector& tagged_lemmas) const { |
|
10053
|
|
|
|
|
|
|
bool lemma_changed = false; |
|
10054
|
|
|
|
|
|
|
|
|
10055
|
0
|
0
|
|
|
|
|
for (auto&& tagged_lemma : tagged_lemmas) { |
|
10056
|
0
|
|
|
|
|
|
convert_tag(tagged_lemma.lemma, tagged_lemma.tag); |
|
10057
|
0
|
|
|
|
|
|
lemma_changed |= convert_lemma(tagged_lemma.lemma); |
|
10058
|
|
|
|
|
|
|
} |
|
10059
|
|
|
|
|
|
|
|
|
10060
|
|
|
|
|
|
|
// If no lemma was changed or there is 1 analysis, no duplicates could be created. |
|
10061
|
0
|
0
|
|
|
|
|
if (!lemma_changed || tagged_lemmas.size() < 2) return; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
10062
|
|
|
|
|
|
|
|
|
10063
|
0
|
|
|
|
|
|
tagset_converter_unique_analyzed(tagged_lemmas); |
|
10064
|
|
|
|
|
|
|
} |
|
10065
|
|
|
|
|
|
|
|
|
10066
|
0
|
|
|
|
|
|
void pdt_to_conll2009_tagset_converter::convert_generated(vector& forms) const { |
|
10067
|
|
|
|
|
|
|
bool lemma_changed = false; |
|
10068
|
|
|
|
|
|
|
|
|
10069
|
0
|
0
|
|
|
|
|
for (auto&& tagged_lemma_forms : forms) { |
|
10070
|
0
|
0
|
|
|
|
|
for (auto&& tagged_form : tagged_lemma_forms.forms) |
|
10071
|
0
|
|
|
|
|
|
convert_tag(tagged_lemma_forms.lemma, tagged_form.tag); |
|
10072
|
0
|
|
|
|
|
|
lemma_changed |= convert_lemma(tagged_lemma_forms.lemma); |
|
10073
|
|
|
|
|
|
|
} |
|
10074
|
|
|
|
|
|
|
|
|
10075
|
|
|
|
|
|
|
// If no lemma was changed or there is 1 analysis, no duplicates could be created. |
|
10076
|
0
|
0
|
|
|
|
|
if (!lemma_changed || forms.size() < 2) return; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
10077
|
|
|
|
|
|
|
|
|
10078
|
0
|
|
|
|
|
|
tagset_converter_unique_generated(forms); |
|
10079
|
|
|
|
|
|
|
} |
|
10080
|
|
|
|
|
|
|
|
|
10081
|
|
|
|
|
|
|
} // namespace morphodita |
|
10082
|
|
|
|
|
|
|
|
|
10083
|
|
|
|
|
|
|
///////// |
|
10084
|
|
|
|
|
|
|
// File: morphodita/tagset_converter/strip_lemma_comment_tagset_converter.h |
|
10085
|
|
|
|
|
|
|
///////// |
|
10086
|
|
|
|
|
|
|
|
|
10087
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
10088
|
|
|
|
|
|
|
// |
|
10089
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
10090
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
10091
|
|
|
|
|
|
|
// |
|
10092
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
10093
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
10094
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
10095
|
|
|
|
|
|
|
|
|
10096
|
|
|
|
|
|
|
namespace morphodita { |
|
10097
|
|
|
|
|
|
|
|
|
10098
|
0
|
|
|
|
|
|
class strip_lemma_comment_tagset_converter : public tagset_converter { |
|
10099
|
|
|
|
|
|
|
public: |
|
10100
|
0
|
|
|
|
|
|
strip_lemma_comment_tagset_converter(const morpho& dictionary) : dictionary(dictionary) {} |
|
10101
|
|
|
|
|
|
|
|
|
10102
|
|
|
|
|
|
|
virtual void convert(tagged_lemma& tagged_lemma) const override; |
|
10103
|
|
|
|
|
|
|
virtual void convert_analyzed(vector& tagged_lemmas) const override; |
|
10104
|
|
|
|
|
|
|
virtual void convert_generated(vector& forms) const override; |
|
10105
|
|
|
|
|
|
|
|
|
10106
|
|
|
|
|
|
|
private: |
|
10107
|
|
|
|
|
|
|
inline bool convert_lemma(string& lemma) const; |
|
10108
|
|
|
|
|
|
|
const morpho& dictionary; |
|
10109
|
|
|
|
|
|
|
}; |
|
10110
|
|
|
|
|
|
|
|
|
10111
|
|
|
|
|
|
|
} // namespace morphodita |
|
10112
|
|
|
|
|
|
|
|
|
10113
|
|
|
|
|
|
|
///////// |
|
10114
|
|
|
|
|
|
|
// File: morphodita/tagset_converter/strip_lemma_comment_tagset_converter.cpp |
|
10115
|
|
|
|
|
|
|
///////// |
|
10116
|
|
|
|
|
|
|
|
|
10117
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
10118
|
|
|
|
|
|
|
// |
|
10119
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
10120
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
10121
|
|
|
|
|
|
|
// |
|
10122
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
10123
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
10124
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
10125
|
|
|
|
|
|
|
|
|
10126
|
|
|
|
|
|
|
namespace morphodita { |
|
10127
|
|
|
|
|
|
|
|
|
10128
|
0
|
|
|
|
|
|
inline bool strip_lemma_comment_tagset_converter::convert_lemma(string& lemma) const { |
|
10129
|
0
|
|
|
|
|
|
unsigned lemma_id_len = dictionary.lemma_id_len(lemma); |
|
10130
|
0
|
0
|
|
|
|
|
return lemma_id_len < lemma.size() ? (lemma.resize(lemma_id_len), true) : false; |
|
10131
|
|
|
|
|
|
|
} |
|
10132
|
|
|
|
|
|
|
|
|
10133
|
0
|
|
|
|
|
|
void strip_lemma_comment_tagset_converter::convert(tagged_lemma& tagged_lemma) const { |
|
10134
|
0
|
|
|
|
|
|
convert_lemma(tagged_lemma.lemma); |
|
10135
|
0
|
|
|
|
|
|
} |
|
10136
|
|
|
|
|
|
|
|
|
10137
|
0
|
|
|
|
|
|
void strip_lemma_comment_tagset_converter::convert_analyzed(vector& tagged_lemmas) const { |
|
10138
|
|
|
|
|
|
|
bool lemma_changed = false; |
|
10139
|
|
|
|
|
|
|
|
|
10140
|
0
|
0
|
|
|
|
|
for (auto&& tagged_lemma : tagged_lemmas) |
|
10141
|
0
|
|
|
|
|
|
lemma_changed |= convert_lemma(tagged_lemma.lemma); |
|
10142
|
|
|
|
|
|
|
|
|
10143
|
|
|
|
|
|
|
// If no lemma was changed or there is 1 analysis, no duplicates could be created. |
|
10144
|
0
|
0
|
|
|
|
|
if (!lemma_changed || tagged_lemmas.size() < 2) return; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
10145
|
|
|
|
|
|
|
|
|
10146
|
0
|
|
|
|
|
|
tagset_converter_unique_analyzed(tagged_lemmas); |
|
10147
|
|
|
|
|
|
|
} |
|
10148
|
|
|
|
|
|
|
|
|
10149
|
0
|
|
|
|
|
|
void strip_lemma_comment_tagset_converter::convert_generated(vector& forms) const { |
|
10150
|
|
|
|
|
|
|
bool lemma_changed = false; |
|
10151
|
|
|
|
|
|
|
|
|
10152
|
0
|
0
|
|
|
|
|
for (auto&& tagged_lemma_forms : forms) |
|
10153
|
0
|
|
|
|
|
|
lemma_changed |= convert_lemma(tagged_lemma_forms.lemma); |
|
10154
|
|
|
|
|
|
|
|
|
10155
|
|
|
|
|
|
|
// If no lemma was changed or there is 1 analysis, no duplicates could be created. |
|
10156
|
0
|
0
|
|
|
|
|
if (!lemma_changed || forms.size() < 2) return; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
10157
|
|
|
|
|
|
|
|
|
10158
|
0
|
|
|
|
|
|
tagset_converter_unique_generated(forms); |
|
10159
|
|
|
|
|
|
|
} |
|
10160
|
|
|
|
|
|
|
|
|
10161
|
|
|
|
|
|
|
} // namespace morphodita |
|
10162
|
|
|
|
|
|
|
|
|
10163
|
|
|
|
|
|
|
///////// |
|
10164
|
|
|
|
|
|
|
// File: morphodita/tagset_converter/strip_lemma_id_tagset_converter.h |
|
10165
|
|
|
|
|
|
|
///////// |
|
10166
|
|
|
|
|
|
|
|
|
10167
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
10168
|
|
|
|
|
|
|
// |
|
10169
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
10170
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
10171
|
|
|
|
|
|
|
// |
|
10172
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
10173
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
10174
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
10175
|
|
|
|
|
|
|
|
|
10176
|
|
|
|
|
|
|
namespace morphodita { |
|
10177
|
|
|
|
|
|
|
|
|
10178
|
0
|
|
|
|
|
|
class strip_lemma_id_tagset_converter : public tagset_converter { |
|
10179
|
|
|
|
|
|
|
public: |
|
10180
|
0
|
|
|
|
|
|
strip_lemma_id_tagset_converter(const morpho& dictionary) : dictionary(dictionary) {} |
|
10181
|
|
|
|
|
|
|
|
|
10182
|
|
|
|
|
|
|
virtual void convert(tagged_lemma& tagged_lemma) const override; |
|
10183
|
|
|
|
|
|
|
virtual void convert_analyzed(vector& tagged_lemmas) const override; |
|
10184
|
|
|
|
|
|
|
virtual void convert_generated(vector& forms) const override; |
|
10185
|
|
|
|
|
|
|
|
|
10186
|
|
|
|
|
|
|
private: |
|
10187
|
|
|
|
|
|
|
inline bool convert_lemma(string& lemma) const; |
|
10188
|
|
|
|
|
|
|
const morpho& dictionary; |
|
10189
|
|
|
|
|
|
|
}; |
|
10190
|
|
|
|
|
|
|
|
|
10191
|
|
|
|
|
|
|
} // namespace morphodita |
|
10192
|
|
|
|
|
|
|
|
|
10193
|
|
|
|
|
|
|
///////// |
|
10194
|
|
|
|
|
|
|
// File: morphodita/tagset_converter/strip_lemma_id_tagset_converter.cpp |
|
10195
|
|
|
|
|
|
|
///////// |
|
10196
|
|
|
|
|
|
|
|
|
10197
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
10198
|
|
|
|
|
|
|
// |
|
10199
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
10200
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
10201
|
|
|
|
|
|
|
// |
|
10202
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
10203
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
10204
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
10205
|
|
|
|
|
|
|
|
|
10206
|
|
|
|
|
|
|
namespace morphodita { |
|
10207
|
|
|
|
|
|
|
|
|
10208
|
0
|
|
|
|
|
|
inline bool strip_lemma_id_tagset_converter::convert_lemma(string& lemma) const { |
|
10209
|
0
|
|
|
|
|
|
unsigned raw_lemma_len = dictionary.raw_lemma_len(lemma); |
|
10210
|
0
|
0
|
|
|
|
|
return raw_lemma_len < lemma.size() ? (lemma.resize(raw_lemma_len), true) : false; |
|
10211
|
|
|
|
|
|
|
} |
|
10212
|
|
|
|
|
|
|
|
|
10213
|
0
|
|
|
|
|
|
void strip_lemma_id_tagset_converter::convert(tagged_lemma& tagged_lemma) const { |
|
10214
|
0
|
|
|
|
|
|
convert_lemma(tagged_lemma.lemma); |
|
10215
|
0
|
|
|
|
|
|
} |
|
10216
|
|
|
|
|
|
|
|
|
10217
|
0
|
|
|
|
|
|
void strip_lemma_id_tagset_converter::convert_analyzed(vector& tagged_lemmas) const { |
|
10218
|
|
|
|
|
|
|
bool lemma_changed = false; |
|
10219
|
|
|
|
|
|
|
|
|
10220
|
0
|
0
|
|
|
|
|
for (auto&& tagged_lemma : tagged_lemmas) |
|
10221
|
0
|
|
|
|
|
|
lemma_changed |= convert_lemma(tagged_lemma.lemma); |
|
10222
|
|
|
|
|
|
|
|
|
10223
|
|
|
|
|
|
|
// If no lemma was changed or there is 1 analysis, no duplicates could be created. |
|
10224
|
0
|
0
|
|
|
|
|
if (!lemma_changed || tagged_lemmas.size() < 2) return; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
10225
|
|
|
|
|
|
|
|
|
10226
|
0
|
|
|
|
|
|
tagset_converter_unique_analyzed(tagged_lemmas); |
|
10227
|
|
|
|
|
|
|
} |
|
10228
|
|
|
|
|
|
|
|
|
10229
|
0
|
|
|
|
|
|
void strip_lemma_id_tagset_converter::convert_generated(vector& forms) const { |
|
10230
|
|
|
|
|
|
|
bool lemma_changed = false; |
|
10231
|
|
|
|
|
|
|
|
|
10232
|
0
|
0
|
|
|
|
|
for (auto&& tagged_lemma_forms : forms) |
|
10233
|
0
|
|
|
|
|
|
lemma_changed |= convert_lemma(tagged_lemma_forms.lemma); |
|
10234
|
|
|
|
|
|
|
|
|
10235
|
|
|
|
|
|
|
// If no lemma was changed or there is 1 analysis, no duplicates could be created. |
|
10236
|
0
|
0
|
|
|
|
|
if (!lemma_changed || forms.size() < 2) return; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
10237
|
|
|
|
|
|
|
|
|
10238
|
0
|
|
|
|
|
|
tagset_converter_unique_generated(forms); |
|
10239
|
|
|
|
|
|
|
} |
|
10240
|
|
|
|
|
|
|
|
|
10241
|
|
|
|
|
|
|
} // namespace morphodita |
|
10242
|
|
|
|
|
|
|
|
|
10243
|
|
|
|
|
|
|
///////// |
|
10244
|
|
|
|
|
|
|
// File: morphodita/tagset_converter/tagset_converter.cpp |
|
10245
|
|
|
|
|
|
|
///////// |
|
10246
|
|
|
|
|
|
|
|
|
10247
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
10248
|
|
|
|
|
|
|
// |
|
10249
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
10250
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
10251
|
|
|
|
|
|
|
// |
|
10252
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
10253
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
10254
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
10255
|
|
|
|
|
|
|
|
|
10256
|
|
|
|
|
|
|
namespace morphodita { |
|
10257
|
|
|
|
|
|
|
|
|
10258
|
0
|
|
|
|
|
|
tagset_converter* tagset_converter::new_identity_converter() { |
|
10259
|
0
|
|
|
|
|
|
return new identity_tagset_converter(); |
|
10260
|
|
|
|
|
|
|
} |
|
10261
|
|
|
|
|
|
|
|
|
10262
|
0
|
|
|
|
|
|
tagset_converter* tagset_converter::new_pdt_to_conll2009_converter() { |
|
10263
|
0
|
|
|
|
|
|
return new pdt_to_conll2009_tagset_converter(); |
|
10264
|
|
|
|
|
|
|
} |
|
10265
|
|
|
|
|
|
|
|
|
10266
|
0
|
|
|
|
|
|
tagset_converter* tagset_converter::new_strip_lemma_comment_converter(const morpho& dictionary) { |
|
10267
|
0
|
|
|
|
|
|
return new strip_lemma_comment_tagset_converter(dictionary); |
|
10268
|
|
|
|
|
|
|
} |
|
10269
|
|
|
|
|
|
|
|
|
10270
|
0
|
|
|
|
|
|
tagset_converter* tagset_converter::new_strip_lemma_id_converter(const morpho& dictionary) { |
|
10271
|
0
|
|
|
|
|
|
return new strip_lemma_id_tagset_converter(dictionary); |
|
10272
|
|
|
|
|
|
|
} |
|
10273
|
|
|
|
|
|
|
|
|
10274
|
0
|
|
|
|
|
|
tagset_converter* new_tagset_converter(const string& name, const morpho& dictionary) { |
|
10275
|
0
|
0
|
|
|
|
|
if (name == "pdt_to_conll2009") return tagset_converter::new_pdt_to_conll2009_converter(); |
|
10276
|
0
|
0
|
|
|
|
|
if (name == "strip_lemma_comment") return tagset_converter::new_strip_lemma_comment_converter(dictionary); |
|
10277
|
0
|
0
|
|
|
|
|
if (name == "strip_lemma_id") return tagset_converter::new_strip_lemma_id_converter(dictionary); |
|
10278
|
|
|
|
|
|
|
return nullptr; |
|
10279
|
|
|
|
|
|
|
} |
|
10280
|
|
|
|
|
|
|
|
|
10281
|
0
|
|
|
|
|
|
void tagset_converter_unique_analyzed(vector& tagged_lemmas) { |
|
10282
|
|
|
|
|
|
|
// Remove possible lemma-tag pair duplicates |
|
10283
|
|
|
|
|
|
|
struct tagged_lemma_comparator { |
|
10284
|
0
|
0
|
|
|
|
|
inline static bool eq(const tagged_lemma& a, const tagged_lemma& b) { return a.lemma == b.lemma && a.tag == b.tag; } |
|
|
|
0
|
|
|
|
|
|
|
10285
|
0
|
0
|
|
|
|
|
inline static bool lt(const tagged_lemma& a, const tagged_lemma& b) { int lemma_compare = a.lemma.compare(b.lemma); return lemma_compare < 0 || (lemma_compare == 0 && a.tag < b.tag); } |
|
10286
|
|
|
|
|
|
|
}; |
|
10287
|
|
|
|
|
|
|
|
|
10288
|
|
|
|
|
|
|
sort(tagged_lemmas.begin(), tagged_lemmas.end(), tagged_lemma_comparator::lt); |
|
10289
|
0
|
|
|
|
|
|
tagged_lemmas.resize(unique(tagged_lemmas.begin(), tagged_lemmas.end(), tagged_lemma_comparator::eq) - tagged_lemmas.begin()); |
|
10290
|
0
|
|
|
|
|
|
} |
|
10291
|
|
|
|
|
|
|
|
|
10292
|
0
|
|
|
|
|
|
void tagset_converter_unique_generated(vector& forms) { |
|
10293
|
|
|
|
|
|
|
// Regroup and if needed remove duplicate form-tag pairs for each lemma |
|
10294
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < forms.size(); i++) { |
|
10295
|
|
|
|
|
|
|
bool any_merged = false; |
|
10296
|
0
|
0
|
|
|
|
|
for (unsigned j = forms.size() - 1; j > i; j--) |
|
10297
|
0
|
0
|
|
|
|
|
if (forms[j].lemma == forms[i].lemma) { |
|
10298
|
|
|
|
|
|
|
// Same lemma was found. Merge form-tag pairs |
|
10299
|
0
|
0
|
|
|
|
|
for (auto&& tagged_form : forms[j].forms) |
|
10300
|
0
|
|
|
|
|
|
forms[i].forms.emplace_back(move(tagged_form)); |
|
10301
|
|
|
|
|
|
|
|
|
10302
|
|
|
|
|
|
|
// Remove lemma j by moving it to end and deleting |
|
10303
|
0
|
0
|
|
|
|
|
if (j < forms.size() - 1) { |
|
10304
|
0
|
|
|
|
|
|
forms[j].lemma.swap(forms[forms.size() - 1].lemma); |
|
10305
|
0
|
|
|
|
|
|
forms[j].forms.swap(forms[forms.size() - 1].forms); |
|
10306
|
|
|
|
|
|
|
} |
|
10307
|
|
|
|
|
|
|
forms.pop_back(); |
|
10308
|
|
|
|
|
|
|
any_merged = true; |
|
10309
|
|
|
|
|
|
|
} |
|
10310
|
|
|
|
|
|
|
|
|
10311
|
0
|
0
|
|
|
|
|
if (any_merged && forms[i].forms.size() > 1) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
10312
|
|
|
|
|
|
|
// Remove duplicate form-tag pairs |
|
10313
|
|
|
|
|
|
|
struct tagged_form_comparator { |
|
10314
|
0
|
0
|
|
|
|
|
inline static bool eq(const tagged_form& a, const tagged_form& b) { return a.tag == b.tag && a.form == b.form; } |
|
|
|
0
|
|
|
|
|
|
|
10315
|
0
|
0
|
|
|
|
|
inline static bool lt(const tagged_form& a, const tagged_form& b) { int tag_compare = a.tag.compare(b.tag); return tag_compare < 0 || (tag_compare == 0 && a.form < b.form); } |
|
10316
|
|
|
|
|
|
|
}; |
|
10317
|
|
|
|
|
|
|
|
|
10318
|
|
|
|
|
|
|
sort(forms[i].forms.begin(), forms[i].forms.end(), tagged_form_comparator::lt); |
|
10319
|
0
|
|
|
|
|
|
forms[i].forms.resize(unique(forms[i].forms.begin(), forms[i].forms.end(), tagged_form_comparator::eq) - forms[i].forms.begin()); |
|
10320
|
|
|
|
|
|
|
} |
|
10321
|
|
|
|
|
|
|
} |
|
10322
|
0
|
|
|
|
|
|
} |
|
10323
|
|
|
|
|
|
|
|
|
10324
|
|
|
|
|
|
|
} // namespace morphodita |
|
10325
|
|
|
|
|
|
|
|
|
10326
|
|
|
|
|
|
|
///////// |
|
10327
|
|
|
|
|
|
|
// File: morphodita/tokenizer/czech_tokenizer.cpp |
|
10328
|
|
|
|
|
|
|
///////// |
|
10329
|
|
|
|
|
|
|
|
|
10330
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
10331
|
|
|
|
|
|
|
// |
|
10332
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
10333
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
10334
|
|
|
|
|
|
|
// |
|
10335
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
10336
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
10337
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
10338
|
|
|
|
|
|
|
|
|
10339
|
|
|
|
|
|
|
namespace morphodita { |
|
10340
|
|
|
|
|
|
|
|
|
10341
|
|
|
|
|
|
|
static const char _czech_tokenizer_cond_offsets[] = { |
|
10342
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
10343
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 2, |
|
10344
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2 |
|
10345
|
|
|
|
|
|
|
}; |
|
10346
|
|
|
|
|
|
|
|
|
10347
|
|
|
|
|
|
|
static const char _czech_tokenizer_cond_lengths[] = { |
|
10348
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 2, |
|
10349
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
10350
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0 |
|
10351
|
|
|
|
|
|
|
}; |
|
10352
|
|
|
|
|
|
|
|
|
10353
|
|
|
|
|
|
|
static const short _czech_tokenizer_cond_keys[] = { |
|
10354
|
|
|
|
|
|
|
43u, 43u, 45u, 45u, 0 |
|
10355
|
|
|
|
|
|
|
}; |
|
10356
|
|
|
|
|
|
|
|
|
10357
|
|
|
|
|
|
|
static const char _czech_tokenizer_cond_spaces[] = { |
|
10358
|
|
|
|
|
|
|
1, 0, 0 |
|
10359
|
|
|
|
|
|
|
}; |
|
10360
|
|
|
|
|
|
|
|
|
10361
|
|
|
|
|
|
|
static const unsigned char _czech_tokenizer_key_offsets[] = { |
|
10362
|
|
|
|
|
|
|
0, 0, 17, 29, 43, 46, 51, 54, |
|
10363
|
|
|
|
|
|
|
89, 94, 98, 101, 105, 110, 111, 116, |
|
10364
|
|
|
|
|
|
|
117, 122, 136, 143, 148, 151, 163 |
|
10365
|
|
|
|
|
|
|
}; |
|
10366
|
|
|
|
|
|
|
|
|
10367
|
|
|
|
|
|
|
static const short _czech_tokenizer_trans_keys[] = { |
|
10368
|
|
|
|
|
|
|
13u, 32u, 34u, 40u, 91u, 96u, 123u, 129u, |
|
10369
|
|
|
|
|
|
|
133u, 135u, 147u, 150u, 162u, 9u, 10u, 65u, |
|
10370
|
|
|
|
|
|
|
90u, 34u, 40u, 91u, 96u, 123u, 129u, 133u, |
|
10371
|
|
|
|
|
|
|
135u, 150u, 162u, 65u, 90u, 13u, 32u, 34u, |
|
10372
|
|
|
|
|
|
|
39u, 41u, 59u, 93u, 125u, 139u, 141u, 147u, |
|
10373
|
|
|
|
|
|
|
161u, 9u, 10u, 159u, 48u, 57u, 43u, 45u, |
|
10374
|
|
|
|
|
|
|
159u, 48u, 57u, 159u, 48u, 57u, 9u, 10u, |
|
10375
|
|
|
|
|
|
|
13u, 32u, 33u, 44u, 46u, 47u, 63u, 129u, |
|
10376
|
|
|
|
|
|
|
131u, 135u, 142u, 147u, 157u, 159u, 160u, 301u, |
|
10377
|
|
|
|
|
|
|
557u, 811u, 1067u, 0u, 42u, 48u, 57u, 58u, |
|
10378
|
|
|
|
|
|
|
64u, 65u, 90u, 91u, 96u, 97u, 122u, 123u, |
|
10379
|
|
|
|
|
|
|
255u, 9u, 10u, 13u, 32u, 147u, 9u, 13u, |
|
10380
|
|
|
|
|
|
|
32u, 147u, 9u, 32u, 147u, 9u, 10u, 32u, |
|
10381
|
|
|
|
|
|
|
147u, 9u, 10u, 13u, 32u, 147u, 13u, 9u, |
|
10382
|
|
|
|
|
|
|
10u, 13u, 32u, 147u, 10u, 9u, 10u, 13u, |
|
10383
|
|
|
|
|
|
|
32u, 147u, 13u, 32u, 34u, 39u, 41u, 59u, |
|
10384
|
|
|
|
|
|
|
93u, 125u, 139u, 141u, 147u, 161u, 9u, 10u, |
|
10385
|
|
|
|
|
|
|
44u, 46u, 69u, 101u, 159u, 48u, 57u, 69u, |
|
10386
|
|
|
|
|
|
|
101u, 159u, 48u, 57u, 159u, 48u, 57u, 129u, |
|
10387
|
|
|
|
|
|
|
131u, 135u, 151u, 155u, 157u, 65u, 90u, 97u, |
|
10388
|
|
|
|
|
|
|
122u, 142u, 143u, 159u, 48u, 57u, 0 |
|
10389
|
|
|
|
|
|
|
}; |
|
10390
|
|
|
|
|
|
|
|
|
10391
|
|
|
|
|
|
|
static const char _czech_tokenizer_single_lengths[] = { |
|
10392
|
|
|
|
|
|
|
0, 13, 10, 12, 1, 3, 1, 21, |
|
10393
|
|
|
|
|
|
|
5, 4, 3, 4, 5, 1, 5, 1, |
|
10394
|
|
|
|
|
|
|
5, 12, 5, 3, 1, 6, 1 |
|
10395
|
|
|
|
|
|
|
}; |
|
10396
|
|
|
|
|
|
|
|
|
10397
|
|
|
|
|
|
|
static const char _czech_tokenizer_range_lengths[] = { |
|
10398
|
|
|
|
|
|
|
0, 2, 1, 1, 1, 1, 1, 7, |
|
10399
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
10400
|
|
|
|
|
|
|
0, 1, 1, 1, 1, 3, 1 |
|
10401
|
|
|
|
|
|
|
}; |
|
10402
|
|
|
|
|
|
|
|
|
10403
|
|
|
|
|
|
|
static const unsigned char _czech_tokenizer_index_offsets[] = { |
|
10404
|
|
|
|
|
|
|
0, 0, 16, 28, 42, 45, 50, 53, |
|
10405
|
|
|
|
|
|
|
82, 88, 93, 97, 102, 108, 110, 116, |
|
10406
|
|
|
|
|
|
|
118, 124, 138, 145, 150, 153, 163 |
|
10407
|
|
|
|
|
|
|
}; |
|
10408
|
|
|
|
|
|
|
|
|
10409
|
|
|
|
|
|
|
static const char _czech_tokenizer_indicies[] = { |
|
10410
|
|
|
|
|
|
|
1, 1, 2, 2, 2, 2, 2, 3, |
|
10411
|
|
|
|
|
|
|
2, 3, 1, 2, 2, 1, 3, 0, |
|
10412
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 3, 2, 3, |
|
10413
|
|
|
|
|
|
|
2, 2, 3, 0, 4, 4, 5, 5, |
|
10414
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 4, 5, |
|
10415
|
|
|
|
|
|
|
4, 0, 6, 6, 0, 7, 7, 8, |
|
10416
|
|
|
|
|
|
|
8, 0, 8, 8, 0, 10, 11, 12, |
|
10417
|
|
|
|
|
|
|
10, 13, 9, 13, 9, 13, 16, 16, |
|
10418
|
|
|
|
|
|
|
16, 16, 10, 16, 15, 13, 9, 17, |
|
10419
|
|
|
|
|
|
|
9, 17, 9, 15, 9, 16, 9, 16, |
|
10420
|
|
|
|
|
|
|
9, 14, 10, 19, 20, 10, 10, 18, |
|
10421
|
|
|
|
|
|
|
10, 21, 10, 10, 18, 10, 10, 10, |
|
10422
|
|
|
|
|
|
|
18, 10, 21, 10, 10, 18, 10, 22, |
|
10423
|
|
|
|
|
|
|
23, 10, 10, 18, 25, 24, 10, 22, |
|
10424
|
|
|
|
|
|
|
26, 10, 10, 18, 25, 24, 10, 23, |
|
10425
|
|
|
|
|
|
|
26, 10, 10, 18, 4, 4, 5, 5, |
|
10426
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 4, 5, |
|
10427
|
|
|
|
|
|
|
4, 27, 28, 28, 29, 29, 15, 15, |
|
10428
|
|
|
|
|
|
|
27, 29, 29, 6, 6, 27, 8, 8, |
|
10429
|
|
|
|
|
|
|
27, 16, 16, 16, 16, 16, 16, 16, |
|
10430
|
|
|
|
|
|
|
16, 16, 27, 15, 15, 27, 0 |
|
10431
|
|
|
|
|
|
|
}; |
|
10432
|
|
|
|
|
|
|
|
|
10433
|
|
|
|
|
|
|
static const char _czech_tokenizer_trans_targs[] = { |
|
10434
|
|
|
|
|
|
|
7, 1, 2, 7, 1, 3, 19, 6, |
|
10435
|
|
|
|
|
|
|
20, 7, 8, 12, 16, 17, 0, 18, |
|
10436
|
|
|
|
|
|
|
21, 22, 7, 9, 11, 10, 13, 14, |
|
10437
|
|
|
|
|
|
|
7, 7, 15, 7, 4, 5 |
|
10438
|
|
|
|
|
|
|
}; |
|
10439
|
|
|
|
|
|
|
|
|
10440
|
|
|
|
|
|
|
static const char _czech_tokenizer_trans_actions[] = { |
|
10441
|
|
|
|
|
|
|
1, 0, 0, 2, 3, 0, 4, 0, |
|
10442
|
|
|
|
|
|
|
0, 7, 0, 0, 0, 4, 0, 4, |
|
10443
|
|
|
|
|
|
|
0, 0, 8, 0, 0, 0, 0, 0, |
|
10444
|
|
|
|
|
|
|
9, 10, 0, 11, 0, 0 |
|
10445
|
|
|
|
|
|
|
}; |
|
10446
|
|
|
|
|
|
|
|
|
10447
|
|
|
|
|
|
|
static const char _czech_tokenizer_to_state_actions[] = { |
|
10448
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 5, |
|
10449
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
10450
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0 |
|
10451
|
|
|
|
|
|
|
}; |
|
10452
|
|
|
|
|
|
|
|
|
10453
|
|
|
|
|
|
|
static const char _czech_tokenizer_from_state_actions[] = { |
|
10454
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 6, |
|
10455
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
10456
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0 |
|
10457
|
|
|
|
|
|
|
}; |
|
10458
|
|
|
|
|
|
|
|
|
10459
|
|
|
|
|
|
|
static const unsigned char _czech_tokenizer_eof_trans[] = { |
|
10460
|
|
|
|
|
|
|
0, 1, 1, 1, 1, 1, 1, 0, |
|
10461
|
|
|
|
|
|
|
19, 19, 19, 19, 19, 25, 19, 25, |
|
10462
|
|
|
|
|
|
|
19, 28, 28, 28, 28, 28, 28 |
|
10463
|
|
|
|
|
|
|
}; |
|
10464
|
|
|
|
|
|
|
|
|
10465
|
|
|
|
|
|
|
static const int czech_tokenizer_start = 7; |
|
10466
|
|
|
|
|
|
|
|
|
10467
|
|
|
|
|
|
|
// The list of lower cased words that when preceding eos do not end sentence. |
|
10468
|
|
|
|
|
|
|
// Note: because of VS, we cannot list the abbreviations directly in UTF-8, |
|
10469
|
|
|
|
|
|
|
// because the compilation of utf-8 encoded sources fail on some locales |
|
10470
|
|
|
|
|
|
|
// (e.g., Japanese). |
|
10471
|
|
|
|
|
|
|
// perl -CS -ple 'use Encode;s/([^[:ascii:]])/join("", map {sprintf "\\%o", ord($_)} split(m@@, encode("utf-8", $1)))/ge' |
|
10472
|
|
|
|
|
|
|
// perl -CS -ple 'use Encode;s/\\([0-7]{3})\\([0-7]{3})/decode("utf-8", chr(oct($1)).chr(oct($2)))/ge' |
|
10473
|
327
|
100
|
|
|
|
|
const unordered_set czech_tokenizer::abbreviations_czech = { |
|
|
|
0
|
|
|
|
|
|
|
10474
|
|
|
|
|
|
|
// Titles |
|
10475
|
|
|
|
|
|
|
"prof", "csc", "drsc", "doc", "phd", "ph", "d", |
|
10476
|
|
|
|
|
|
|
"judr", "mddr", "mudr", "mvdr", "paeddr", "paedr", "phdr", "rndr", "rsdr", "dr", |
|
10477
|
|
|
|
|
|
|
"ing", "arch", "mgr", "bc", "mag", "mba", "bca", "mga", |
|
10478
|
|
|
|
|
|
|
"gen", "plk", "pplk", "npor", "por", "ppor", "kpt", "mjr", "sgt", "pls", "p", "s", |
|
10479
|
|
|
|
|
|
|
"p", "p\303\255", "fa", "fy", "mr", "mrs", "ms", "miss", "tr", "sv", |
|
10480
|
|
|
|
|
|
|
// Geographic names |
|
10481
|
|
|
|
|
|
|
"angl", "fr", "\304\215es", "ces", "\304\215s", "cs", "slov", "n\304\233m", "nem", "it", "pol", "ma\304\217", "mad", "rus", |
|
10482
|
|
|
|
|
|
|
"sev", "v\303\275ch", "vych", "ji\305\276", "jiz", "z\303\241p", "zap", |
|
10483
|
|
|
|
|
|
|
// Common abbrevs |
|
10484
|
|
|
|
|
|
|
"adr", "\304\215", "c", "eg", "ev", "g", "hod", "j", "kr", "m", "max", "min", "mj", "nap\305\231", "napr", |
|
10485
|
|
|
|
|
|
|
"okr", "pop\305\231", "popr", "pozn", "r", "\305\231", "red", "rep", "resp", "srov", "st", "st\305\231", "str", |
|
10486
|
|
|
|
|
|
|
"sv", "tel", "tj", "tzv", "\303\272", "u", "uh", "ul", "um", "zl", "zn", |
|
10487
|
|
|
|
|
|
|
}; |
|
10488
|
|
|
|
|
|
|
|
|
10489
|
315
|
100
|
|
|
|
|
const unordered_set czech_tokenizer::abbreviations_slovak = { |
|
|
|
0
|
|
|
|
|
|
|
10490
|
|
|
|
|
|
|
// Titles |
|
10491
|
|
|
|
|
|
|
"prof", "csc", "drsc", "doc", "phd", "ph", "d", |
|
10492
|
|
|
|
|
|
|
"judr", "mddr", "mudr", "mvdr", "paeddr", "paedr", "phdr", "rndr", "rsdr", "dr", |
|
10493
|
|
|
|
|
|
|
"ing", "arch", "mgr", "bc", "mag", "mba", "bca", "mga", |
|
10494
|
|
|
|
|
|
|
"gen", "plk", "pplk", "npor", "por", "ppor", "kpt", "mjr", "sgt", "pls", "p", "s", |
|
10495
|
|
|
|
|
|
|
"p", "p\303\255", "fa", "fy", "mr", "mrs", "ms", "miss", "tr", "sv", |
|
10496
|
|
|
|
|
|
|
// Geographic names |
|
10497
|
|
|
|
|
|
|
"angl", "fr", "\304\215es", "ces", "\304\215s", "cs", "slov", "nem", "it", "po\304\276", "pol", "ma\304\217", "mad", |
|
10498
|
|
|
|
|
|
|
"rus", "sev", "v\303\275ch", "vych", "ju\305\276", "juz", "z\303\241p", "zap", |
|
10499
|
|
|
|
|
|
|
// Common abbrevs |
|
10500
|
|
|
|
|
|
|
"adr", "\304\215", "c", "eg", "ev", "g", "hod", "j", "kr", "m", "max", "min", "mj", "napr", |
|
10501
|
|
|
|
|
|
|
"okr", "popr", "pozn", "r", "red", "rep", "resp", "srov", "st", "str", |
|
10502
|
|
|
|
|
|
|
"sv", "tel", "tj", "tzv", "\303\272", "u", "uh", "ul", "um", "zl", "zn", |
|
10503
|
|
|
|
|
|
|
}; |
|
10504
|
|
|
|
|
|
|
|
|
10505
|
0
|
|
|
|
|
|
czech_tokenizer::czech_tokenizer(tokenizer_language language, unsigned version, const morpho* m) |
|
10506
|
0
|
0
|
|
|
|
|
: ragel_tokenizer(version <= 1 ? 1 : 2), m(m) { |
|
|
|
0
|
|
|
|
|
|
|
10507
|
0
|
|
|
|
|
|
switch (language) { |
|
10508
|
|
|
|
|
|
|
case CZECH: |
|
10509
|
0
|
|
|
|
|
|
abbreviations = &abbreviations_czech; |
|
10510
|
0
|
|
|
|
|
|
break; |
|
10511
|
|
|
|
|
|
|
case SLOVAK: |
|
10512
|
0
|
|
|
|
|
|
abbreviations = &abbreviations_slovak; |
|
10513
|
0
|
|
|
|
|
|
break; |
|
10514
|
|
|
|
|
|
|
} |
|
10515
|
0
|
|
|
|
|
|
} |
|
10516
|
|
|
|
|
|
|
|
|
10517
|
0
|
|
|
|
|
|
void czech_tokenizer::merge_hyphenated(vector& tokens) { |
|
10518
|
|
|
|
|
|
|
using namespace unilib; |
|
10519
|
|
|
|
|
|
|
|
|
10520
|
0
|
0
|
|
|
|
|
if (!m) return; |
|
10521
|
0
|
0
|
|
|
|
|
if (tokens.empty() || chars[tokens.back().start].cat & ~unicode::L) return; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
10522
|
|
|
|
|
|
|
|
|
10523
|
|
|
|
|
|
|
unsigned matched_hyphens = 0; |
|
10524
|
0
|
0
|
|
|
|
|
for (unsigned hyphens = 1; hyphens <= 2; hyphens++) { |
|
10525
|
|
|
|
|
|
|
// Are the tokens a sequence of 'hyphens' hyphenated tokens? |
|
10526
|
0
|
0
|
|
|
|
|
if (tokens.size() < 2*hyphens + 1) break; |
|
10527
|
0
|
|
|
|
|
|
unsigned first_hyphen = tokens.size() - 2*hyphens; |
|
10528
|
0
|
0
|
|
|
|
|
if (tokens[first_hyphen].length != 1 || chars[tokens[first_hyphen].start].cat & ~unicode::P || |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
10529
|
0
|
0
|
|
|
|
|
tokens[first_hyphen].start + tokens[first_hyphen].length != tokens[first_hyphen + 1].start || |
|
10530
|
0
|
0
|
|
|
|
|
tokens[first_hyphen-1].start + tokens[first_hyphen-1].length != tokens[first_hyphen].start || |
|
|
|
0
|
|
|
|
|
|
|
10531
|
0
|
|
|
|
|
|
chars[tokens[first_hyphen-1].start].cat & ~unicode::L) |
|
10532
|
|
|
|
|
|
|
break; |
|
10533
|
|
|
|
|
|
|
|
|
10534
|
0
|
0
|
|
|
|
|
if (m->analyze(string_piece(chars[tokens[first_hyphen-1].start].str, chars[tokens.back().start + tokens.back().length].str - chars[tokens[first_hyphen-1].start].str), morpho::NO_GUESSER, lemmas) >= 0) |
|
10535
|
|
|
|
|
|
|
matched_hyphens = hyphens; |
|
10536
|
|
|
|
|
|
|
} |
|
10537
|
|
|
|
|
|
|
|
|
10538
|
0
|
0
|
|
|
|
|
if (matched_hyphens) { |
|
10539
|
0
|
|
|
|
|
|
unsigned first = tokens.size() - 2*matched_hyphens - 1; |
|
10540
|
0
|
|
|
|
|
|
tokens[first].length = tokens.back().start + tokens.back().length - tokens[first].start; |
|
10541
|
0
|
|
|
|
|
|
tokens.resize(first + 1); |
|
10542
|
|
|
|
|
|
|
} |
|
10543
|
|
|
|
|
|
|
} |
|
10544
|
|
|
|
|
|
|
|
|
10545
|
0
|
|
|
|
|
|
bool czech_tokenizer::next_sentence(vector& tokens) { |
|
10546
|
|
|
|
|
|
|
using namespace unilib; |
|
10547
|
|
|
|
|
|
|
|
|
10548
|
|
|
|
|
|
|
int cs, act; |
|
10549
|
|
|
|
|
|
|
size_t ts, te; |
|
10550
|
|
|
|
|
|
|
size_t whitespace = 0; // Suppress "may be uninitialized" warning |
|
10551
|
|
|
|
|
|
|
|
|
10552
|
0
|
0
|
|
|
|
|
while (tokenize_url_email(tokens)) |
|
10553
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) |
|
10554
|
|
|
|
|
|
|
return true; |
|
10555
|
|
|
|
|
|
|
|
|
10556
|
|
|
|
|
|
|
{ |
|
10557
|
|
|
|
|
|
|
cs = czech_tokenizer_start; |
|
10558
|
0
|
|
|
|
|
|
ts = 0; |
|
10559
|
|
|
|
|
|
|
te = 0; |
|
10560
|
|
|
|
|
|
|
act = 0; |
|
10561
|
|
|
|
|
|
|
} |
|
10562
|
|
|
|
|
|
|
|
|
10563
|
|
|
|
|
|
|
{ |
|
10564
|
|
|
|
|
|
|
int _klen; |
|
10565
|
|
|
|
|
|
|
const short *_keys; |
|
10566
|
|
|
|
|
|
|
int _trans; |
|
10567
|
|
|
|
|
|
|
short _widec; |
|
10568
|
|
|
|
|
|
|
|
|
10569
|
0
|
0
|
|
|
|
|
if ( ( current) == ( (chars.size() - 1)) ) |
|
10570
|
|
|
|
|
|
|
goto _test_eof; |
|
10571
|
|
|
|
|
|
|
if ( cs == 0 ) |
|
10572
|
|
|
|
|
|
|
goto _out; |
|
10573
|
|
|
|
|
|
|
_resume: |
|
10574
|
0
|
0
|
|
|
|
|
switch ( _czech_tokenizer_from_state_actions[cs] ) { |
|
10575
|
|
|
|
|
|
|
case 6: |
|
10576
|
0
|
|
|
|
|
|
{ts = ( current);} |
|
10577
|
0
|
|
|
|
|
|
break; |
|
10578
|
|
|
|
|
|
|
} |
|
10579
|
|
|
|
|
|
|
|
|
10580
|
0
|
|
|
|
|
|
_widec = ( ragel_char(chars[current])); |
|
10581
|
0
|
|
|
|
|
|
_klen = _czech_tokenizer_cond_lengths[cs]; |
|
10582
|
0
|
|
|
|
|
|
_keys = _czech_tokenizer_cond_keys + (_czech_tokenizer_cond_offsets[cs]*2); |
|
10583
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
10584
|
|
|
|
|
|
|
const short *_lower = _keys; |
|
10585
|
|
|
|
|
|
|
const short *_mid; |
|
10586
|
0
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
|
10587
|
|
|
|
|
|
|
while (1) { |
|
10588
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
10589
|
|
|
|
|
|
|
break; |
|
10590
|
|
|
|
|
|
|
|
|
10591
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
|
10592
|
0
|
0
|
|
|
|
|
if ( _widec < _mid[0] ) |
|
10593
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
|
10594
|
0
|
0
|
|
|
|
|
else if ( _widec > _mid[1] ) |
|
10595
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
|
10596
|
|
|
|
|
|
|
else { |
|
10597
|
0
|
|
|
|
|
|
switch ( _czech_tokenizer_cond_spaces[_czech_tokenizer_cond_offsets[cs] + ((_mid - _keys)>>1)] ) { |
|
10598
|
|
|
|
|
|
|
case 0: { |
|
10599
|
0
|
|
|
|
|
|
_widec = (short)(256u + (( ragel_char(chars[current])) - 0u)); |
|
10600
|
0
|
0
|
|
|
|
|
if ( |
|
10601
|
0
|
0
|
|
|
|
|
!current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256; |
|
|
|
0
|
|
|
|
|
|
|
10602
|
|
|
|
|
|
|
break; |
|
10603
|
|
|
|
|
|
|
} |
|
10604
|
|
|
|
|
|
|
case 1: { |
|
10605
|
0
|
|
|
|
|
|
_widec = (short)(768u + (( ragel_char(chars[current])) - 0u)); |
|
10606
|
0
|
0
|
|
|
|
|
if ( |
|
10607
|
0
|
0
|
|
|
|
|
!current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
10608
|
|
|
|
|
|
|
break; |
|
10609
|
|
|
|
|
|
|
} |
|
10610
|
|
|
|
|
|
|
} |
|
10611
|
|
|
|
|
|
|
break; |
|
10612
|
|
|
|
|
|
|
} |
|
10613
|
|
|
|
|
|
|
} |
|
10614
|
|
|
|
|
|
|
} |
|
10615
|
|
|
|
|
|
|
|
|
10616
|
0
|
|
|
|
|
|
_keys = _czech_tokenizer_trans_keys + _czech_tokenizer_key_offsets[cs]; |
|
10617
|
0
|
|
|
|
|
|
_trans = _czech_tokenizer_index_offsets[cs]; |
|
10618
|
|
|
|
|
|
|
|
|
10619
|
0
|
|
|
|
|
|
_klen = _czech_tokenizer_single_lengths[cs]; |
|
10620
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
10621
|
|
|
|
|
|
|
const short *_lower = _keys; |
|
10622
|
|
|
|
|
|
|
const short *_mid; |
|
10623
|
0
|
|
|
|
|
|
const short *_upper = _keys + _klen - 1; |
|
10624
|
|
|
|
|
|
|
while (1) { |
|
10625
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
10626
|
|
|
|
|
|
|
break; |
|
10627
|
|
|
|
|
|
|
|
|
10628
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
|
10629
|
0
|
0
|
|
|
|
|
if ( _widec < *_mid ) |
|
10630
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
|
10631
|
0
|
0
|
|
|
|
|
else if ( _widec > *_mid ) |
|
10632
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
|
10633
|
|
|
|
|
|
|
else { |
|
10634
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
|
10635
|
0
|
|
|
|
|
|
goto _match; |
|
10636
|
|
|
|
|
|
|
} |
|
10637
|
|
|
|
|
|
|
} |
|
10638
|
0
|
|
|
|
|
|
_keys += _klen; |
|
10639
|
0
|
|
|
|
|
|
_trans += _klen; |
|
10640
|
|
|
|
|
|
|
} |
|
10641
|
|
|
|
|
|
|
|
|
10642
|
0
|
|
|
|
|
|
_klen = _czech_tokenizer_range_lengths[cs]; |
|
10643
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
10644
|
|
|
|
|
|
|
const short *_lower = _keys; |
|
10645
|
|
|
|
|
|
|
const short *_mid; |
|
10646
|
0
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
|
10647
|
|
|
|
|
|
|
while (1) { |
|
10648
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
10649
|
|
|
|
|
|
|
break; |
|
10650
|
|
|
|
|
|
|
|
|
10651
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
|
10652
|
0
|
0
|
|
|
|
|
if ( _widec < _mid[0] ) |
|
10653
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
|
10654
|
0
|
0
|
|
|
|
|
else if ( _widec > _mid[1] ) |
|
10655
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
|
10656
|
|
|
|
|
|
|
else { |
|
10657
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
|
10658
|
0
|
|
|
|
|
|
goto _match; |
|
10659
|
|
|
|
|
|
|
} |
|
10660
|
|
|
|
|
|
|
} |
|
10661
|
0
|
|
|
|
|
|
_trans += _klen; |
|
10662
|
|
|
|
|
|
|
} |
|
10663
|
|
|
|
|
|
|
|
|
10664
|
|
|
|
|
|
|
_match: |
|
10665
|
0
|
|
|
|
|
|
_trans = _czech_tokenizer_indicies[_trans]; |
|
10666
|
|
|
|
|
|
|
_eof_trans: |
|
10667
|
0
|
|
|
|
|
|
cs = _czech_tokenizer_trans_targs[_trans]; |
|
10668
|
|
|
|
|
|
|
|
|
10669
|
0
|
0
|
|
|
|
|
if ( _czech_tokenizer_trans_actions[_trans] == 0 ) |
|
10670
|
|
|
|
|
|
|
goto _again; |
|
10671
|
|
|
|
|
|
|
|
|
10672
|
0
|
|
|
|
|
|
switch ( _czech_tokenizer_trans_actions[_trans] ) { |
|
10673
|
|
|
|
|
|
|
case 3: |
|
10674
|
0
|
|
|
|
|
|
{ whitespace = current; } |
|
10675
|
0
|
|
|
|
|
|
break; |
|
10676
|
|
|
|
|
|
|
case 4: |
|
10677
|
0
|
|
|
|
|
|
{te = ( current)+1;} |
|
10678
|
0
|
|
|
|
|
|
break; |
|
10679
|
|
|
|
|
|
|
case 7: |
|
10680
|
0
|
|
|
|
|
|
{te = ( current)+1;{ tokens.emplace_back(ts, te - ts); |
|
10681
|
0
|
|
|
|
|
|
merge_hyphenated(tokens); |
|
10682
|
0
|
|
|
|
|
|
current = te; |
|
10683
|
0
|
0
|
|
|
|
|
do |
|
10684
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
|
10685
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
|
10686
|
0
|
|
|
|
|
|
( current)--; |
|
10687
|
|
|
|
|
|
|
}} |
|
10688
|
0
|
|
|
|
|
|
break; |
|
10689
|
|
|
|
|
|
|
case 2: |
|
10690
|
0
|
|
|
|
|
|
{te = ( current)+1;{ |
|
10691
|
0
|
|
|
|
|
|
bool eos = is_eos(tokens, chars[ts].chr, abbreviations); |
|
10692
|
0
|
0
|
|
|
|
|
for (current = ts; current < whitespace; current++) |
|
10693
|
0
|
|
|
|
|
|
tokens.emplace_back(current, 1); |
|
10694
|
0
|
|
|
|
|
|
{( current) = (( whitespace))-1;} |
|
10695
|
0
|
0
|
|
|
|
|
if (eos) {( current)++; goto _out; } |
|
10696
|
|
|
|
|
|
|
}} |
|
10697
|
|
|
|
|
|
|
break; |
|
10698
|
|
|
|
|
|
|
case 10: |
|
10699
|
0
|
|
|
|
|
|
{te = ( current)+1;{ |
|
10700
|
0
|
0
|
|
|
|
|
if (!tokens.empty()) {( current)++; goto _out; } |
|
10701
|
0
|
|
|
|
|
|
current = te; |
|
10702
|
0
|
0
|
|
|
|
|
do |
|
10703
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
|
10704
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
|
10705
|
0
|
|
|
|
|
|
( current)--; |
|
10706
|
|
|
|
|
|
|
}} |
|
10707
|
0
|
|
|
|
|
|
break; |
|
10708
|
|
|
|
|
|
|
case 11: |
|
10709
|
0
|
|
|
|
|
|
{te = ( current);( current)--;{ tokens.emplace_back(ts, te - ts); |
|
10710
|
0
|
|
|
|
|
|
merge_hyphenated(tokens); |
|
10711
|
0
|
|
|
|
|
|
current = te; |
|
10712
|
0
|
0
|
|
|
|
|
do |
|
10713
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
|
10714
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
|
10715
|
0
|
|
|
|
|
|
( current)--; |
|
10716
|
|
|
|
|
|
|
}} |
|
10717
|
0
|
|
|
|
|
|
break; |
|
10718
|
|
|
|
|
|
|
case 8: |
|
10719
|
0
|
|
|
|
|
|
{te = ( current);( current)--;{ |
|
10720
|
0
|
|
|
|
|
|
current = te; |
|
10721
|
0
|
0
|
|
|
|
|
do |
|
10722
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
|
10723
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
|
10724
|
0
|
|
|
|
|
|
( current)--; |
|
10725
|
|
|
|
|
|
|
}} |
|
10726
|
0
|
|
|
|
|
|
break; |
|
10727
|
|
|
|
|
|
|
case 9: |
|
10728
|
0
|
|
|
|
|
|
{te = ( current);( current)--;{ |
|
10729
|
0
|
0
|
|
|
|
|
if (!tokens.empty()) {( current)++; goto _out; } |
|
10730
|
0
|
|
|
|
|
|
current = te; |
|
10731
|
0
|
0
|
|
|
|
|
do |
|
10732
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
|
10733
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
|
10734
|
0
|
|
|
|
|
|
( current)--; |
|
10735
|
|
|
|
|
|
|
}} |
|
10736
|
0
|
|
|
|
|
|
break; |
|
10737
|
|
|
|
|
|
|
case 1: |
|
10738
|
0
|
|
|
|
|
|
{{( current) = ((te))-1;}{ tokens.emplace_back(ts, te - ts); |
|
10739
|
0
|
|
|
|
|
|
merge_hyphenated(tokens); |
|
10740
|
0
|
|
|
|
|
|
current = te; |
|
10741
|
0
|
0
|
|
|
|
|
do |
|
10742
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
|
10743
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
|
10744
|
0
|
|
|
|
|
|
( current)--; |
|
10745
|
|
|
|
|
|
|
}} |
|
10746
|
0
|
|
|
|
|
|
break; |
|
10747
|
|
|
|
|
|
|
} |
|
10748
|
|
|
|
|
|
|
|
|
10749
|
|
|
|
|
|
|
_again: |
|
10750
|
0
|
0
|
|
|
|
|
switch ( _czech_tokenizer_to_state_actions[cs] ) { |
|
10751
|
|
|
|
|
|
|
case 5: |
|
10752
|
0
|
|
|
|
|
|
{ts = 0;} |
|
10753
|
0
|
|
|
|
|
|
break; |
|
10754
|
|
|
|
|
|
|
} |
|
10755
|
|
|
|
|
|
|
|
|
10756
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
|
10757
|
|
|
|
|
|
|
goto _out; |
|
10758
|
0
|
0
|
|
|
|
|
if ( ++( current) != ( (chars.size() - 1)) ) |
|
10759
|
|
|
|
|
|
|
goto _resume; |
|
10760
|
|
|
|
|
|
|
_test_eof: {} |
|
10761
|
0
|
0
|
|
|
|
|
if ( ( current) == ( (chars.size() - 1)) ) |
|
10762
|
|
|
|
|
|
|
{ |
|
10763
|
0
|
0
|
|
|
|
|
if ( _czech_tokenizer_eof_trans[cs] > 0 ) { |
|
10764
|
0
|
|
|
|
|
|
_trans = _czech_tokenizer_eof_trans[cs] - 1; |
|
10765
|
0
|
|
|
|
|
|
goto _eof_trans; |
|
10766
|
|
|
|
|
|
|
} |
|
10767
|
|
|
|
|
|
|
} |
|
10768
|
|
|
|
|
|
|
|
|
10769
|
|
|
|
|
|
|
_out: {} |
|
10770
|
|
|
|
|
|
|
} |
|
10771
|
|
|
|
|
|
|
|
|
10772
|
|
|
|
|
|
|
(void)act; // Suppress unused variable warning |
|
10773
|
|
|
|
|
|
|
|
|
10774
|
0
|
|
|
|
|
|
return !tokens.empty(); |
|
10775
|
|
|
|
|
|
|
} |
|
10776
|
|
|
|
|
|
|
|
|
10777
|
|
|
|
|
|
|
} // namespace morphodita |
|
10778
|
|
|
|
|
|
|
|
|
10779
|
|
|
|
|
|
|
///////// |
|
10780
|
|
|
|
|
|
|
// File: morphodita/tokenizer/english_tokenizer.cpp |
|
10781
|
|
|
|
|
|
|
///////// |
|
10782
|
|
|
|
|
|
|
|
|
10783
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
10784
|
|
|
|
|
|
|
// |
|
10785
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
10786
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
10787
|
|
|
|
|
|
|
// |
|
10788
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
10789
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
10790
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
10791
|
|
|
|
|
|
|
|
|
10792
|
|
|
|
|
|
|
namespace morphodita { |
|
10793
|
|
|
|
|
|
|
|
|
10794
|
|
|
|
|
|
|
// The list of lowercased words that when preceding eos do not end sentence. |
|
10795
|
348
|
100
|
|
|
|
|
const unordered_set english_tokenizer::abbreviations = { |
|
|
|
0
|
|
|
|
|
|
|
10796
|
|
|
|
|
|
|
// Titles |
|
10797
|
|
|
|
|
|
|
"adj", "adm", "adv", "assoc", "asst", "bart", "bldg", "brig", "bros", "capt", |
|
10798
|
|
|
|
|
|
|
"cmdr", "col", "comdr", "con", "corp", "cpl", "d", "dr", "dr", "drs", "ens", |
|
10799
|
|
|
|
|
|
|
"gen", "gov", "hon", "hosp", "hr", "insp", "lt", "mm", "mr", "mrs", "ms", |
|
10800
|
|
|
|
|
|
|
"maj", "messrs", "mlle", "mme", "mr", "mrs", "ms", "msgr", "op", "ord", |
|
10801
|
|
|
|
|
|
|
"pfc", "ph", "phd", "prof", "pvt", "rep", "reps", "res", "rev", "rt", "sen", |
|
10802
|
|
|
|
|
|
|
"sens", "sfc", "sgt", "sr", "st", "supt", "surg", "univ", |
|
10803
|
|
|
|
|
|
|
// Common abbrevs |
|
10804
|
|
|
|
|
|
|
"addr", "approx", "apr", "aug", "calif", "co", "corp", "dec", "def", "e", |
|
10805
|
|
|
|
|
|
|
"e.g", "eg", "feb", "fla", "ft", "gen", "gov", "hrs", "i.", "i.e", "ie", |
|
10806
|
|
|
|
|
|
|
"inc", "jan", "jr", "ltd", "mar", "max", "min", "mph", "mt", "n", "nov", |
|
10807
|
|
|
|
|
|
|
"oct", "ont", "pa", "pres", "rep", "rev", "s", "sec", "sen", "sep", "sept", |
|
10808
|
|
|
|
|
|
|
"sgt", "sr", "tel", "un", "univ", "v", "va", "vs", "w", "yrs", |
|
10809
|
|
|
|
|
|
|
}; |
|
10810
|
|
|
|
|
|
|
|
|
10811
|
|
|
|
|
|
|
static const char _english_tokenizer_split_token_key_offsets[] = { |
|
10812
|
|
|
|
|
|
|
0, 0, 16, 20, 22, 26, 28, 30, |
|
10813
|
|
|
|
|
|
|
32, 34, 36, 44, 46, 50, 52, 54, |
|
10814
|
|
|
|
|
|
|
56, 58, 60, 62, 64, 66, 68, 72, |
|
10815
|
|
|
|
|
|
|
74, 76, 78, 80, 82, 82 |
|
10816
|
|
|
|
|
|
|
}; |
|
10817
|
|
|
|
|
|
|
|
|
10818
|
|
|
|
|
|
|
static const unsigned char _english_tokenizer_split_token_trans_keys[] = { |
|
10819
|
|
|
|
|
|
|
65u, 68u, 69u, 76u, 77u, 78u, 83u, 84u, |
|
10820
|
|
|
|
|
|
|
97u, 100u, 101u, 108u, 109u, 110u, 115u, 116u, |
|
10821
|
|
|
|
|
|
|
78u, 84u, 110u, 116u, 78u, 110u, 65u, 79u, |
|
10822
|
|
|
|
|
|
|
97u, 111u, 87u, 119u, 71u, 103u, 84u, 116u, |
|
10823
|
|
|
|
|
|
|
79u, 111u, 39u, 161u, 77u, 82u, 86u, 89u, |
|
10824
|
|
|
|
|
|
|
109u, 114u, 118u, 121u, 77u, 109u, 69u, 73u, |
|
10825
|
|
|
|
|
|
|
101u, 105u, 76u, 108u, 39u, 161u, 68u, 100u, |
|
10826
|
|
|
|
|
|
|
76u, 108u, 39u, 161u, 69u, 101u, 82u, 114u, |
|
10827
|
|
|
|
|
|
|
79u, 111u, 77u, 109u, 39u, 79u, 111u, 161u, |
|
10828
|
|
|
|
|
|
|
78u, 110u, 78u, 110u, 78u, 110u, 65u, 97u, |
|
10829
|
|
|
|
|
|
|
67u, 99u, 0 |
|
10830
|
|
|
|
|
|
|
}; |
|
10831
|
|
|
|
|
|
|
|
|
10832
|
|
|
|
|
|
|
static const char _english_tokenizer_split_token_single_lengths[] = { |
|
10833
|
|
|
|
|
|
|
0, 16, 4, 2, 4, 2, 2, 2, |
|
10834
|
|
|
|
|
|
|
2, 2, 8, 2, 4, 2, 2, 2, |
|
10835
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 4, 2, |
|
10836
|
|
|
|
|
|
|
2, 2, 2, 2, 0, 0 |
|
10837
|
|
|
|
|
|
|
}; |
|
10838
|
|
|
|
|
|
|
|
|
10839
|
|
|
|
|
|
|
static const char _english_tokenizer_split_token_range_lengths[] = { |
|
10840
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
10841
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
10842
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
10843
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0 |
|
10844
|
|
|
|
|
|
|
}; |
|
10845
|
|
|
|
|
|
|
|
|
10846
|
|
|
|
|
|
|
static const unsigned char _english_tokenizer_split_token_index_offsets[] = { |
|
10847
|
|
|
|
|
|
|
0, 0, 17, 22, 25, 30, 33, 36, |
|
10848
|
|
|
|
|
|
|
39, 42, 45, 54, 57, 62, 65, 68, |
|
10849
|
|
|
|
|
|
|
71, 74, 77, 80, 83, 86, 89, 94, |
|
10850
|
|
|
|
|
|
|
97, 100, 103, 106, 109, 110 |
|
10851
|
|
|
|
|
|
|
}; |
|
10852
|
|
|
|
|
|
|
|
|
10853
|
|
|
|
|
|
|
static const char _english_tokenizer_split_token_indicies[] = { |
|
10854
|
|
|
|
|
|
|
0, 2, 3, 4, 2, 5, 2, 6, |
|
10855
|
|
|
|
|
|
|
0, 2, 3, 4, 2, 5, 2, 6, |
|
10856
|
|
|
|
|
|
|
1, 7, 8, 7, 8, 1, 9, 9, |
|
10857
|
|
|
|
|
|
|
1, 10, 11, 10, 11, 1, 12, 12, |
|
10858
|
|
|
|
|
|
|
1, 12, 12, 1, 13, 13, 1, 11, |
|
10859
|
|
|
|
|
|
|
11, 1, 14, 14, 1, 15, 2, 2, |
|
10860
|
|
|
|
|
|
|
16, 15, 2, 2, 16, 1, 17, 17, |
|
10861
|
|
|
|
|
|
|
1, 18, 11, 18, 11, 1, 12, 12, |
|
10862
|
|
|
|
|
|
|
1, 19, 19, 1, 12, 12, 1, 2, |
|
10863
|
|
|
|
|
|
|
2, 1, 20, 20, 1, 21, 21, 1, |
|
10864
|
|
|
|
|
|
|
22, 22, 1, 23, 23, 1, 12, 12, |
|
10865
|
|
|
|
|
|
|
1, 24, 25, 25, 24, 1, 14, 14, |
|
10866
|
|
|
|
|
|
|
1, 26, 26, 1, 27, 27, 1, 28, |
|
10867
|
|
|
|
|
|
|
28, 1, 12, 12, 1, 1, 1, 0 |
|
10868
|
|
|
|
|
|
|
}; |
|
10869
|
|
|
|
|
|
|
|
|
10870
|
|
|
|
|
|
|
static const char _english_tokenizer_split_token_trans_targs[] = { |
|
10871
|
|
|
|
|
|
|
2, 0, 9, 10, 16, 17, 22, 3, |
|
10872
|
|
|
|
|
|
|
7, 4, 5, 6, 28, 8, 29, 11, |
|
10873
|
|
|
|
|
|
|
14, 12, 13, 15, 18, 19, 20, 21, |
|
10874
|
|
|
|
|
|
|
23, 24, 25, 26, 27 |
|
10875
|
|
|
|
|
|
|
}; |
|
10876
|
|
|
|
|
|
|
|
|
10877
|
|
|
|
|
|
|
static const char _english_tokenizer_split_token_trans_actions[] = { |
|
10878
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 1, |
|
10879
|
|
|
|
|
|
|
1, 0, 0, 0, 0, 0, 2, 1, |
|
10880
|
|
|
|
|
|
|
1, 0, 0, 0, 1, 0, 0, 0, |
|
10881
|
|
|
|
|
|
|
0, 0, 1, 0, 0 |
|
10882
|
|
|
|
|
|
|
}; |
|
10883
|
|
|
|
|
|
|
|
|
10884
|
|
|
|
|
|
|
static const char _english_tokenizer_split_token_eof_actions[] = { |
|
10885
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
10886
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
10887
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
10888
|
|
|
|
|
|
|
0, 0, 0, 0, 3, 0 |
|
10889
|
|
|
|
|
|
|
}; |
|
10890
|
|
|
|
|
|
|
|
|
10891
|
|
|
|
|
|
|
static const int english_tokenizer_split_token_start = 1; |
|
10892
|
|
|
|
|
|
|
|
|
10893
|
0
|
|
|
|
|
|
void english_tokenizer::split_token(vector& tokens) { |
|
10894
|
0
|
0
|
|
|
|
|
if (tokens.empty() || chars[tokens.back().start].cat & ~unilib::unicode::L) return; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
10895
|
|
|
|
|
|
|
|
|
10896
|
0
|
|
|
|
|
|
size_t index = tokens.back().start, end = index + tokens.back().length; |
|
10897
|
|
|
|
|
|
|
int cs; |
|
10898
|
0
|
|
|
|
|
|
size_t split_mark = 0, split_len = 0; |
|
10899
|
|
|
|
|
|
|
|
|
10900
|
|
|
|
|
|
|
{ |
|
10901
|
|
|
|
|
|
|
cs = english_tokenizer_split_token_start; |
|
10902
|
|
|
|
|
|
|
} |
|
10903
|
|
|
|
|
|
|
|
|
10904
|
|
|
|
|
|
|
{ |
|
10905
|
|
|
|
|
|
|
int _klen; |
|
10906
|
|
|
|
|
|
|
const unsigned char *_keys; |
|
10907
|
|
|
|
|
|
|
int _trans; |
|
10908
|
|
|
|
|
|
|
|
|
10909
|
0
|
0
|
|
|
|
|
if ( ( index) == ( end) ) |
|
10910
|
|
|
|
|
|
|
goto _test_eof; |
|
10911
|
|
|
|
|
|
|
if ( cs == 0 ) |
|
10912
|
|
|
|
|
|
|
goto _out; |
|
10913
|
|
|
|
|
|
|
_resume: |
|
10914
|
0
|
|
|
|
|
|
_keys = _english_tokenizer_split_token_trans_keys + _english_tokenizer_split_token_key_offsets[cs]; |
|
10915
|
0
|
|
|
|
|
|
_trans = _english_tokenizer_split_token_index_offsets[cs]; |
|
10916
|
|
|
|
|
|
|
|
|
10917
|
0
|
|
|
|
|
|
_klen = _english_tokenizer_split_token_single_lengths[cs]; |
|
10918
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
10919
|
|
|
|
|
|
|
const unsigned char *_lower = _keys; |
|
10920
|
|
|
|
|
|
|
const unsigned char *_mid; |
|
10921
|
0
|
|
|
|
|
|
const unsigned char *_upper = _keys + _klen - 1; |
|
10922
|
|
|
|
|
|
|
while (1) { |
|
10923
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
10924
|
|
|
|
|
|
|
break; |
|
10925
|
|
|
|
|
|
|
|
|
10926
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
|
10927
|
0
|
0
|
|
|
|
|
if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) < *_mid ) |
|
10928
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
|
10929
|
0
|
0
|
|
|
|
|
else if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) > *_mid ) |
|
10930
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
|
10931
|
|
|
|
|
|
|
else { |
|
10932
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
|
10933
|
0
|
|
|
|
|
|
goto _match; |
|
10934
|
|
|
|
|
|
|
} |
|
10935
|
|
|
|
|
|
|
} |
|
10936
|
0
|
|
|
|
|
|
_keys += _klen; |
|
10937
|
0
|
|
|
|
|
|
_trans += _klen; |
|
10938
|
|
|
|
|
|
|
} |
|
10939
|
|
|
|
|
|
|
|
|
10940
|
0
|
|
|
|
|
|
_klen = _english_tokenizer_split_token_range_lengths[cs]; |
|
10941
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
10942
|
|
|
|
|
|
|
const unsigned char *_lower = _keys; |
|
10943
|
|
|
|
|
|
|
const unsigned char *_mid; |
|
10944
|
0
|
|
|
|
|
|
const unsigned char *_upper = _keys + (_klen<<1) - 2; |
|
10945
|
|
|
|
|
|
|
while (1) { |
|
10946
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
10947
|
|
|
|
|
|
|
break; |
|
10948
|
|
|
|
|
|
|
|
|
10949
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
|
10950
|
0
|
0
|
|
|
|
|
if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) < _mid[0] ) |
|
10951
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
|
10952
|
0
|
0
|
|
|
|
|
else if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) > _mid[1] ) |
|
10953
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
|
10954
|
|
|
|
|
|
|
else { |
|
10955
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
|
10956
|
0
|
|
|
|
|
|
goto _match; |
|
10957
|
|
|
|
|
|
|
} |
|
10958
|
|
|
|
|
|
|
} |
|
10959
|
0
|
|
|
|
|
|
_trans += _klen; |
|
10960
|
|
|
|
|
|
|
} |
|
10961
|
|
|
|
|
|
|
|
|
10962
|
|
|
|
|
|
|
_match: |
|
10963
|
0
|
|
|
|
|
|
_trans = _english_tokenizer_split_token_indicies[_trans]; |
|
10964
|
0
|
|
|
|
|
|
cs = _english_tokenizer_split_token_trans_targs[_trans]; |
|
10965
|
|
|
|
|
|
|
|
|
10966
|
0
|
0
|
|
|
|
|
if ( _english_tokenizer_split_token_trans_actions[_trans] == 0 ) |
|
10967
|
|
|
|
|
|
|
goto _again; |
|
10968
|
|
|
|
|
|
|
|
|
10969
|
0
|
|
|
|
|
|
switch ( _english_tokenizer_split_token_trans_actions[_trans] ) { |
|
10970
|
|
|
|
|
|
|
case 1: |
|
10971
|
0
|
|
|
|
|
|
{ split_mark = index - tokens.back().start + 1; } |
|
10972
|
0
|
|
|
|
|
|
break; |
|
10973
|
|
|
|
|
|
|
case 2: |
|
10974
|
0
|
|
|
|
|
|
{ split_mark = index - tokens.back().start + 1; } |
|
10975
|
0
|
|
|
|
|
|
{ split_len = split_mark; {( index)++; goto _out; } } |
|
10976
|
|
|
|
|
|
|
break; |
|
10977
|
|
|
|
|
|
|
} |
|
10978
|
|
|
|
|
|
|
|
|
10979
|
|
|
|
|
|
|
_again: |
|
10980
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
|
10981
|
|
|
|
|
|
|
goto _out; |
|
10982
|
0
|
0
|
|
|
|
|
if ( ++( index) != ( end) ) |
|
10983
|
|
|
|
|
|
|
goto _resume; |
|
10984
|
|
|
|
|
|
|
_test_eof: {} |
|
10985
|
0
|
0
|
|
|
|
|
if ( ( index) == ( end) ) |
|
10986
|
|
|
|
|
|
|
{ |
|
10987
|
0
|
0
|
|
|
|
|
switch ( _english_tokenizer_split_token_eof_actions[cs] ) { |
|
10988
|
|
|
|
|
|
|
case 3: |
|
10989
|
0
|
|
|
|
|
|
{ split_len = split_mark; {( index)++; goto _out; } } |
|
10990
|
|
|
|
|
|
|
break; |
|
10991
|
|
|
|
|
|
|
} |
|
10992
|
|
|
|
|
|
|
} |
|
10993
|
|
|
|
|
|
|
|
|
10994
|
|
|
|
|
|
|
_out: {} |
|
10995
|
|
|
|
|
|
|
} |
|
10996
|
|
|
|
|
|
|
|
|
10997
|
0
|
0
|
|
|
|
|
if (split_len && split_len < end) { |
|
10998
|
0
|
|
|
|
|
|
tokens.back().length -= split_len; |
|
10999
|
0
|
|
|
|
|
|
tokens.emplace_back(end - split_len, split_len); |
|
11000
|
|
|
|
|
|
|
} |
|
11001
|
|
|
|
|
|
|
} |
|
11002
|
|
|
|
|
|
|
|
|
11003
|
|
|
|
|
|
|
static const char _english_tokenizer_cond_offsets[] = { |
|
11004
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
11005
|
|
|
|
|
|
|
0, 0, 0, 2, 2, 2, 2, 2, |
|
11006
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 2, |
|
11007
|
|
|
|
|
|
|
2, 2, 2, 2, 2 |
|
11008
|
|
|
|
|
|
|
}; |
|
11009
|
|
|
|
|
|
|
|
|
11010
|
|
|
|
|
|
|
static const char _english_tokenizer_cond_lengths[] = { |
|
11011
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
11012
|
|
|
|
|
|
|
0, 0, 2, 0, 0, 0, 0, 0, |
|
11013
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
11014
|
|
|
|
|
|
|
0, 0, 0, 0, 0 |
|
11015
|
|
|
|
|
|
|
}; |
|
11016
|
|
|
|
|
|
|
|
|
11017
|
|
|
|
|
|
|
static const short _english_tokenizer_cond_keys[] = { |
|
11018
|
|
|
|
|
|
|
43u, 43u, 45u, 45u, 0 |
|
11019
|
|
|
|
|
|
|
}; |
|
11020
|
|
|
|
|
|
|
|
|
11021
|
|
|
|
|
|
|
static const char _english_tokenizer_cond_spaces[] = { |
|
11022
|
|
|
|
|
|
|
1, 0, 0 |
|
11023
|
|
|
|
|
|
|
}; |
|
11024
|
|
|
|
|
|
|
|
|
11025
|
|
|
|
|
|
|
static const unsigned char _english_tokenizer_key_offsets[] = { |
|
11026
|
|
|
|
|
|
|
0, 0, 17, 29, 43, 46, 49, 52, |
|
11027
|
|
|
|
|
|
|
55, 60, 63, 98, 103, 107, 110, 114, |
|
11028
|
|
|
|
|
|
|
119, 120, 125, 126, 131, 145, 152, 156, |
|
11029
|
|
|
|
|
|
|
161, 164, 179, 192, 206 |
|
11030
|
|
|
|
|
|
|
}; |
|
11031
|
|
|
|
|
|
|
|
|
11032
|
|
|
|
|
|
|
static const short _english_tokenizer_trans_keys[] = { |
|
11033
|
|
|
|
|
|
|
13u, 32u, 34u, 40u, 91u, 96u, 123u, 129u, |
|
11034
|
|
|
|
|
|
|
133u, 135u, 147u, 150u, 162u, 9u, 10u, 65u, |
|
11035
|
|
|
|
|
|
|
90u, 34u, 40u, 91u, 96u, 123u, 129u, 133u, |
|
11036
|
|
|
|
|
|
|
135u, 150u, 162u, 65u, 90u, 13u, 32u, 34u, |
|
11037
|
|
|
|
|
|
|
39u, 41u, 59u, 93u, 125u, 139u, 141u, 147u, |
|
11038
|
|
|
|
|
|
|
161u, 9u, 10u, 159u, 48u, 57u, 159u, 48u, |
|
11039
|
|
|
|
|
|
|
57u, 159u, 48u, 57u, 159u, 48u, 57u, 43u, |
|
11040
|
|
|
|
|
|
|
45u, 159u, 48u, 57u, 159u, 48u, 57u, 9u, |
|
11041
|
|
|
|
|
|
|
10u, 13u, 32u, 33u, 44u, 46u, 47u, 63u, |
|
11042
|
|
|
|
|
|
|
129u, 131u, 135u, 142u, 147u, 157u, 159u, 160u, |
|
11043
|
|
|
|
|
|
|
301u, 557u, 811u, 1067u, 0u, 42u, 48u, 57u, |
|
11044
|
|
|
|
|
|
|
58u, 64u, 65u, 90u, 91u, 96u, 97u, 122u, |
|
11045
|
|
|
|
|
|
|
123u, 255u, 9u, 10u, 13u, 32u, 147u, 9u, |
|
11046
|
|
|
|
|
|
|
13u, 32u, 147u, 9u, 32u, 147u, 9u, 10u, |
|
11047
|
|
|
|
|
|
|
32u, 147u, 9u, 10u, 13u, 32u, 147u, 13u, |
|
11048
|
|
|
|
|
|
|
9u, 10u, 13u, 32u, 147u, 10u, 9u, 10u, |
|
11049
|
|
|
|
|
|
|
13u, 32u, 147u, 13u, 32u, 34u, 39u, 41u, |
|
11050
|
|
|
|
|
|
|
59u, 93u, 125u, 139u, 141u, 147u, 161u, 9u, |
|
11051
|
|
|
|
|
|
|
10u, 44u, 46u, 69u, 101u, 159u, 48u, 57u, |
|
11052
|
|
|
|
|
|
|
44u, 46u, 69u, 101u, 69u, 101u, 159u, 48u, |
|
11053
|
|
|
|
|
|
|
57u, 159u, 48u, 57u, 39u, 45u, 129u, 131u, |
|
11054
|
|
|
|
|
|
|
135u, 151u, 155u, 157u, 161u, 65u, 90u, 97u, |
|
11055
|
|
|
|
|
|
|
122u, 142u, 143u, 45u, 129u, 131u, 135u, 151u, |
|
11056
|
|
|
|
|
|
|
155u, 157u, 65u, 90u, 97u, 122u, 142u, 143u, |
|
11057
|
|
|
|
|
|
|
39u, 129u, 131u, 135u, 151u, 155u, 157u, 161u, |
|
11058
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 142u, 143u, 159u, 48u, |
|
11059
|
|
|
|
|
|
|
57u, 0 |
|
11060
|
|
|
|
|
|
|
}; |
|
11061
|
|
|
|
|
|
|
|
|
11062
|
|
|
|
|
|
|
static const char _english_tokenizer_single_lengths[] = { |
|
11063
|
|
|
|
|
|
|
0, 13, 10, 12, 1, 1, 1, 1, |
|
11064
|
|
|
|
|
|
|
3, 1, 21, 5, 4, 3, 4, 5, |
|
11065
|
|
|
|
|
|
|
1, 5, 1, 5, 12, 5, 4, 3, |
|
11066
|
|
|
|
|
|
|
1, 9, 7, 8, 1 |
|
11067
|
|
|
|
|
|
|
}; |
|
11068
|
|
|
|
|
|
|
|
|
11069
|
|
|
|
|
|
|
static const char _english_tokenizer_range_lengths[] = { |
|
11070
|
|
|
|
|
|
|
0, 2, 1, 1, 1, 1, 1, 1, |
|
11071
|
|
|
|
|
|
|
1, 1, 7, 0, 0, 0, 0, 0, |
|
11072
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 1, 0, 1, |
|
11073
|
|
|
|
|
|
|
1, 3, 3, 3, 1 |
|
11074
|
|
|
|
|
|
|
}; |
|
11075
|
|
|
|
|
|
|
|
|
11076
|
|
|
|
|
|
|
static const unsigned char _english_tokenizer_index_offsets[] = { |
|
11077
|
|
|
|
|
|
|
0, 0, 16, 28, 42, 45, 48, 51, |
|
11078
|
|
|
|
|
|
|
54, 59, 62, 91, 97, 102, 106, 111, |
|
11079
|
|
|
|
|
|
|
117, 119, 125, 127, 133, 147, 154, 159, |
|
11080
|
|
|
|
|
|
|
164, 167, 180, 191, 203 |
|
11081
|
|
|
|
|
|
|
}; |
|
11082
|
|
|
|
|
|
|
|
|
11083
|
|
|
|
|
|
|
static const char _english_tokenizer_indicies[] = { |
|
11084
|
|
|
|
|
|
|
1, 1, 2, 2, 2, 2, 2, 3, |
|
11085
|
|
|
|
|
|
|
2, 3, 1, 2, 2, 1, 3, 0, |
|
11086
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 3, 2, 3, |
|
11087
|
|
|
|
|
|
|
2, 2, 3, 0, 4, 4, 5, 5, |
|
11088
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 4, 5, |
|
11089
|
|
|
|
|
|
|
4, 0, 6, 6, 0, 7, 7, 0, |
|
11090
|
|
|
|
|
|
|
8, 8, 0, 9, 9, 0, 10, 10, |
|
11091
|
|
|
|
|
|
|
11, 11, 0, 11, 11, 0, 13, 14, |
|
11092
|
|
|
|
|
|
|
15, 13, 16, 12, 16, 12, 16, 19, |
|
11093
|
|
|
|
|
|
|
19, 19, 19, 13, 19, 18, 16, 12, |
|
11094
|
|
|
|
|
|
|
20, 12, 20, 12, 18, 12, 19, 12, |
|
11095
|
|
|
|
|
|
|
19, 12, 17, 13, 22, 23, 13, 13, |
|
11096
|
|
|
|
|
|
|
21, 13, 24, 13, 13, 21, 13, 13, |
|
11097
|
|
|
|
|
|
|
13, 21, 13, 24, 13, 13, 21, 13, |
|
11098
|
|
|
|
|
|
|
25, 26, 13, 13, 21, 28, 27, 13, |
|
11099
|
|
|
|
|
|
|
25, 29, 13, 13, 21, 28, 27, 13, |
|
11100
|
|
|
|
|
|
|
26, 29, 13, 13, 21, 4, 4, 5, |
|
11101
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 5, 4, |
|
11102
|
|
|
|
|
|
|
5, 4, 30, 31, 32, 33, 33, 18, |
|
11103
|
|
|
|
|
|
|
18, 30, 31, 32, 33, 33, 30, 33, |
|
11104
|
|
|
|
|
|
|
33, 9, 9, 30, 11, 11, 30, 34, |
|
11105
|
|
|
|
|
|
|
35, 19, 19, 19, 19, 19, 19, 34, |
|
11106
|
|
|
|
|
|
|
19, 19, 19, 30, 35, 19, 19, 19, |
|
11107
|
|
|
|
|
|
|
19, 19, 19, 19, 19, 19, 30, 34, |
|
11108
|
|
|
|
|
|
|
19, 19, 19, 19, 19, 19, 34, 19, |
|
11109
|
|
|
|
|
|
|
19, 19, 30, 18, 18, 30, 0 |
|
11110
|
|
|
|
|
|
|
}; |
|
11111
|
|
|
|
|
|
|
|
|
11112
|
|
|
|
|
|
|
static const char _english_tokenizer_trans_targs[] = { |
|
11113
|
|
|
|
|
|
|
10, 1, 2, 10, 1, 3, 5, 6, |
|
11114
|
|
|
|
|
|
|
22, 23, 9, 24, 10, 11, 15, 19, |
|
11115
|
|
|
|
|
|
|
20, 0, 21, 25, 28, 10, 12, 14, |
|
11116
|
|
|
|
|
|
|
13, 16, 17, 10, 10, 18, 10, 4, |
|
11117
|
|
|
|
|
|
|
7, 8, 26, 27 |
|
11118
|
|
|
|
|
|
|
}; |
|
11119
|
|
|
|
|
|
|
|
|
11120
|
|
|
|
|
|
|
static const char _english_tokenizer_trans_actions[] = { |
|
11121
|
|
|
|
|
|
|
1, 0, 0, 2, 3, 0, 0, 0, |
|
11122
|
|
|
|
|
|
|
4, 4, 0, 0, 7, 0, 0, 0, |
|
11123
|
|
|
|
|
|
|
4, 0, 4, 0, 0, 8, 0, 0, |
|
11124
|
|
|
|
|
|
|
0, 0, 0, 9, 10, 0, 11, 0, |
|
11125
|
|
|
|
|
|
|
0, 0, 0, 0 |
|
11126
|
|
|
|
|
|
|
}; |
|
11127
|
|
|
|
|
|
|
|
|
11128
|
|
|
|
|
|
|
static const char _english_tokenizer_to_state_actions[] = { |
|
11129
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
11130
|
|
|
|
|
|
|
0, 0, 5, 0, 0, 0, 0, 0, |
|
11131
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
11132
|
|
|
|
|
|
|
0, 0, 0, 0, 0 |
|
11133
|
|
|
|
|
|
|
}; |
|
11134
|
|
|
|
|
|
|
|
|
11135
|
|
|
|
|
|
|
static const char _english_tokenizer_from_state_actions[] = { |
|
11136
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
11137
|
|
|
|
|
|
|
0, 0, 6, 0, 0, 0, 0, 0, |
|
11138
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
11139
|
|
|
|
|
|
|
0, 0, 0, 0, 0 |
|
11140
|
|
|
|
|
|
|
}; |
|
11141
|
|
|
|
|
|
|
|
|
11142
|
|
|
|
|
|
|
static const unsigned char _english_tokenizer_eof_trans[] = { |
|
11143
|
|
|
|
|
|
|
0, 1, 1, 1, 1, 1, 1, 1, |
|
11144
|
|
|
|
|
|
|
1, 1, 0, 22, 22, 22, 22, 22, |
|
11145
|
|
|
|
|
|
|
28, 22, 28, 22, 31, 31, 31, 31, |
|
11146
|
|
|
|
|
|
|
31, 31, 31, 31, 31 |
|
11147
|
|
|
|
|
|
|
}; |
|
11148
|
|
|
|
|
|
|
|
|
11149
|
|
|
|
|
|
|
static const int english_tokenizer_start = 10; |
|
11150
|
|
|
|
|
|
|
|
|
11151
|
0
|
0
|
|
|
|
|
english_tokenizer::english_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {} |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
11152
|
|
|
|
|
|
|
|
|
11153
|
0
|
|
|
|
|
|
bool english_tokenizer::next_sentence(vector& tokens) { |
|
11154
|
|
|
|
|
|
|
using namespace unilib; |
|
11155
|
|
|
|
|
|
|
|
|
11156
|
|
|
|
|
|
|
int cs, act; |
|
11157
|
|
|
|
|
|
|
size_t ts, te; |
|
11158
|
|
|
|
|
|
|
size_t whitespace = 0; // Suppress "may be uninitialized" warning |
|
11159
|
|
|
|
|
|
|
|
|
11160
|
0
|
0
|
|
|
|
|
while (tokenize_url_email(tokens)) |
|
11161
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) |
|
11162
|
|
|
|
|
|
|
return true; |
|
11163
|
|
|
|
|
|
|
|
|
11164
|
|
|
|
|
|
|
{ |
|
11165
|
|
|
|
|
|
|
cs = english_tokenizer_start; |
|
11166
|
0
|
|
|
|
|
|
ts = 0; |
|
11167
|
|
|
|
|
|
|
te = 0; |
|
11168
|
|
|
|
|
|
|
act = 0; |
|
11169
|
|
|
|
|
|
|
} |
|
11170
|
|
|
|
|
|
|
|
|
11171
|
|
|
|
|
|
|
{ |
|
11172
|
|
|
|
|
|
|
int _klen; |
|
11173
|
|
|
|
|
|
|
const short *_keys; |
|
11174
|
|
|
|
|
|
|
int _trans; |
|
11175
|
|
|
|
|
|
|
short _widec; |
|
11176
|
|
|
|
|
|
|
|
|
11177
|
0
|
0
|
|
|
|
|
if ( ( current) == ( (chars.size() - 1)) ) |
|
11178
|
|
|
|
|
|
|
goto _test_eof; |
|
11179
|
|
|
|
|
|
|
if ( cs == 0 ) |
|
11180
|
|
|
|
|
|
|
goto _out; |
|
11181
|
|
|
|
|
|
|
_resume: |
|
11182
|
0
|
0
|
|
|
|
|
switch ( _english_tokenizer_from_state_actions[cs] ) { |
|
11183
|
|
|
|
|
|
|
case 6: |
|
11184
|
0
|
|
|
|
|
|
{ts = ( current);} |
|
11185
|
0
|
|
|
|
|
|
break; |
|
11186
|
|
|
|
|
|
|
} |
|
11187
|
|
|
|
|
|
|
|
|
11188
|
0
|
|
|
|
|
|
_widec = ( ragel_char(chars[current])); |
|
11189
|
0
|
|
|
|
|
|
_klen = _english_tokenizer_cond_lengths[cs]; |
|
11190
|
0
|
|
|
|
|
|
_keys = _english_tokenizer_cond_keys + (_english_tokenizer_cond_offsets[cs]*2); |
|
11191
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
11192
|
|
|
|
|
|
|
const short *_lower = _keys; |
|
11193
|
|
|
|
|
|
|
const short *_mid; |
|
11194
|
0
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
|
11195
|
|
|
|
|
|
|
while (1) { |
|
11196
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
11197
|
|
|
|
|
|
|
break; |
|
11198
|
|
|
|
|
|
|
|
|
11199
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
|
11200
|
0
|
0
|
|
|
|
|
if ( _widec < _mid[0] ) |
|
11201
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
|
11202
|
0
|
0
|
|
|
|
|
else if ( _widec > _mid[1] ) |
|
11203
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
|
11204
|
|
|
|
|
|
|
else { |
|
11205
|
0
|
|
|
|
|
|
switch ( _english_tokenizer_cond_spaces[_english_tokenizer_cond_offsets[cs] + ((_mid - _keys)>>1)] ) { |
|
11206
|
|
|
|
|
|
|
case 0: { |
|
11207
|
0
|
|
|
|
|
|
_widec = (short)(256u + (( ragel_char(chars[current])) - 0u)); |
|
11208
|
0
|
0
|
|
|
|
|
if ( |
|
11209
|
0
|
0
|
|
|
|
|
!current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256; |
|
|
|
0
|
|
|
|
|
|
|
11210
|
|
|
|
|
|
|
break; |
|
11211
|
|
|
|
|
|
|
} |
|
11212
|
|
|
|
|
|
|
case 1: { |
|
11213
|
0
|
|
|
|
|
|
_widec = (short)(768u + (( ragel_char(chars[current])) - 0u)); |
|
11214
|
0
|
0
|
|
|
|
|
if ( |
|
11215
|
0
|
0
|
|
|
|
|
!current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
11216
|
|
|
|
|
|
|
break; |
|
11217
|
|
|
|
|
|
|
} |
|
11218
|
|
|
|
|
|
|
} |
|
11219
|
|
|
|
|
|
|
break; |
|
11220
|
|
|
|
|
|
|
} |
|
11221
|
|
|
|
|
|
|
} |
|
11222
|
|
|
|
|
|
|
} |
|
11223
|
|
|
|
|
|
|
|
|
11224
|
0
|
|
|
|
|
|
_keys = _english_tokenizer_trans_keys + _english_tokenizer_key_offsets[cs]; |
|
11225
|
0
|
|
|
|
|
|
_trans = _english_tokenizer_index_offsets[cs]; |
|
11226
|
|
|
|
|
|
|
|
|
11227
|
0
|
|
|
|
|
|
_klen = _english_tokenizer_single_lengths[cs]; |
|
11228
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
11229
|
|
|
|
|
|
|
const short *_lower = _keys; |
|
11230
|
|
|
|
|
|
|
const short *_mid; |
|
11231
|
0
|
|
|
|
|
|
const short *_upper = _keys + _klen - 1; |
|
11232
|
|
|
|
|
|
|
while (1) { |
|
11233
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
11234
|
|
|
|
|
|
|
break; |
|
11235
|
|
|
|
|
|
|
|
|
11236
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
|
11237
|
0
|
0
|
|
|
|
|
if ( _widec < *_mid ) |
|
11238
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
|
11239
|
0
|
0
|
|
|
|
|
else if ( _widec > *_mid ) |
|
11240
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
|
11241
|
|
|
|
|
|
|
else { |
|
11242
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
|
11243
|
0
|
|
|
|
|
|
goto _match; |
|
11244
|
|
|
|
|
|
|
} |
|
11245
|
|
|
|
|
|
|
} |
|
11246
|
0
|
|
|
|
|
|
_keys += _klen; |
|
11247
|
0
|
|
|
|
|
|
_trans += _klen; |
|
11248
|
|
|
|
|
|
|
} |
|
11249
|
|
|
|
|
|
|
|
|
11250
|
0
|
|
|
|
|
|
_klen = _english_tokenizer_range_lengths[cs]; |
|
11251
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
|
11252
|
|
|
|
|
|
|
const short *_lower = _keys; |
|
11253
|
|
|
|
|
|
|
const short *_mid; |
|
11254
|
0
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
|
11255
|
|
|
|
|
|
|
while (1) { |
|
11256
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
11257
|
|
|
|
|
|
|
break; |
|
11258
|
|
|
|
|
|
|
|
|
11259
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
|
11260
|
0
|
0
|
|
|
|
|
if ( _widec < _mid[0] ) |
|
11261
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
|
11262
|
0
|
0
|
|
|
|
|
else if ( _widec > _mid[1] ) |
|
11263
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
|
11264
|
|
|
|
|
|
|
else { |
|
11265
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
|
11266
|
0
|
|
|
|
|
|
goto _match; |
|
11267
|
|
|
|
|
|
|
} |
|
11268
|
|
|
|
|
|
|
} |
|
11269
|
0
|
|
|
|
|
|
_trans += _klen; |
|
11270
|
|
|
|
|
|
|
} |
|
11271
|
|
|
|
|
|
|
|
|
11272
|
|
|
|
|
|
|
_match: |
|
11273
|
0
|
|
|
|
|
|
_trans = _english_tokenizer_indicies[_trans]; |
|
11274
|
|
|
|
|
|
|
_eof_trans: |
|
11275
|
0
|
|
|
|
|
|
cs = _english_tokenizer_trans_targs[_trans]; |
|
11276
|
|
|
|
|
|
|
|
|
11277
|
0
|
0
|
|
|
|
|
if ( _english_tokenizer_trans_actions[_trans] == 0 ) |
|
11278
|
|
|
|
|
|
|
goto _again; |
|
11279
|
|
|
|
|
|
|
|
|
11280
|
0
|
|
|
|
|
|
switch ( _english_tokenizer_trans_actions[_trans] ) { |
|
11281
|
|
|
|
|
|
|
case 3: |
|
11282
|
0
|
|
|
|
|
|
{ whitespace = current; } |
|
11283
|
0
|
|
|
|
|
|
break; |
|
11284
|
|
|
|
|
|
|
case 4: |
|
11285
|
0
|
|
|
|
|
|
{te = ( current)+1;} |
|
11286
|
0
|
|
|
|
|
|
break; |
|
11287
|
|
|
|
|
|
|
case 7: |
|
11288
|
0
|
|
|
|
|
|
{te = ( current)+1;{ tokens.emplace_back(ts, te - ts); |
|
11289
|
0
|
|
|
|
|
|
split_token(tokens); |
|
11290
|
0
|
|
|
|
|
|
current = te; |
|
11291
|
0
|
0
|
|
|
|
|
do |
|
11292
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
|
11293
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
|
11294
|
0
|
|
|
|
|
|
( current)--; |
|
11295
|
|
|
|
|
|
|
}} |
|
11296
|
0
|
|
|
|
|
|
break; |
|
11297
|
|
|
|
|
|
|
case 2: |
|
11298
|
0
|
|
|
|
|
|
{te = ( current)+1;{ |
|
11299
|
0
|
|
|
|
|
|
bool eos = is_eos(tokens, chars[ts].chr, &abbreviations); |
|
11300
|
0
|
0
|
|
|
|
|
for (current = ts; current < whitespace; current++) |
|
11301
|
0
|
|
|
|
|
|
tokens.emplace_back(current, 1); |
|
11302
|
0
|
|
|
|
|
|
{( current) = (( whitespace))-1;} |
|
11303
|
0
|
0
|
|
|
|
|
if (eos) {( current)++; goto _out; } |
|
11304
|
|
|
|
|
|
|
}} |
|
11305
|
|
|
|
|
|
|
break; |
|
11306
|
|
|
|
|
|
|
case 10: |
|
11307
|
0
|
|
|
|
|
|
{te = ( current)+1;{ |
|
11308
|
0
|
0
|
|
|
|
|
if (!tokens.empty()) {( current)++; goto _out; } |
|
11309
|
0
|
|
|
|
|
|
current = te; |
|
11310
|
0
|
0
|
|
|
|
|
do |
|
11311
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
|
11312
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
|
11313
|
0
|
|
|
|
|
|
( current)--; |
|
11314
|
|
|
|
|
|
|
}} |
|
11315
|
0
|
|
|
|
|
|
break; |
|
11316
|
|
|
|
|
|
|
case 11: |
|
11317
|
0
|
|
|
|
|
|
{te = ( current);( current)--;{ tokens.emplace_back(ts, te - ts); |
|
11318
|
0
|
|
|
|
|
|
split_token(tokens); |
|
11319
|
0
|
|
|
|
|
|
current = te; |
|
11320
|
0
|
0
|
|
|
|
|
do |
|
11321
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
|
11322
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
|
11323
|
0
|
|
|
|
|
|
( current)--; |
|
11324
|
|
|
|
|
|
|
}} |
|
11325
|
0
|
|
|
|
|
|
break; |
|
11326
|
|
|
|
|
|
|
case 8: |
|
11327
|
0
|
|
|
|
|
|
{te = ( current);( current)--;{ |
|
11328
|
0
|
|
|
|
|
|
current = te; |
|
11329
|
0
|
0
|
|
|
|
|
do |
|
11330
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
|
11331
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
|
11332
|
0
|
|
|
|
|
|
( current)--; |
|
11333
|
|
|
|
|
|
|
}} |
|
11334
|
0
|
|
|
|
|
|
break; |
|
11335
|
|
|
|
|
|
|
case 9: |
|
11336
|
0
|
|
|
|
|
|
{te = ( current);( current)--;{ |
|
11337
|
0
|
0
|
|
|
|
|
if (!tokens.empty()) {( current)++; goto _out; } |
|
11338
|
0
|
|
|
|
|
|
current = te; |
|
11339
|
0
|
0
|
|
|
|
|
do |
|
11340
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
|
11341
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
|
11342
|
0
|
|
|
|
|
|
( current)--; |
|
11343
|
|
|
|
|
|
|
}} |
|
11344
|
0
|
|
|
|
|
|
break; |
|
11345
|
|
|
|
|
|
|
case 1: |
|
11346
|
0
|
|
|
|
|
|
{{( current) = ((te))-1;}{ tokens.emplace_back(ts, te - ts); |
|
11347
|
0
|
|
|
|
|
|
split_token(tokens); |
|
11348
|
0
|
|
|
|
|
|
current = te; |
|
11349
|
0
|
0
|
|
|
|
|
do |
|
11350
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
|
11351
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
|
11352
|
0
|
|
|
|
|
|
( current)--; |
|
11353
|
|
|
|
|
|
|
}} |
|
11354
|
0
|
|
|
|
|
|
break; |
|
11355
|
|
|
|
|
|
|
} |
|
11356
|
|
|
|
|
|
|
|
|
11357
|
|
|
|
|
|
|
_again: |
|
11358
|
0
|
0
|
|
|
|
|
switch ( _english_tokenizer_to_state_actions[cs] ) { |
|
11359
|
|
|
|
|
|
|
case 5: |
|
11360
|
0
|
|
|
|
|
|
{ts = 0;} |
|
11361
|
0
|
|
|
|
|
|
break; |
|
11362
|
|
|
|
|
|
|
} |
|
11363
|
|
|
|
|
|
|
|
|
11364
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
|
11365
|
|
|
|
|
|
|
goto _out; |
|
11366
|
0
|
0
|
|
|
|
|
if ( ++( current) != ( (chars.size() - 1)) ) |
|
11367
|
|
|
|
|
|
|
goto _resume; |
|
11368
|
|
|
|
|
|
|
_test_eof: {} |
|
11369
|
0
|
0
|
|
|
|
|
if ( ( current) == ( (chars.size() - 1)) ) |
|
11370
|
|
|
|
|
|
|
{ |
|
11371
|
0
|
0
|
|
|
|
|
if ( _english_tokenizer_eof_trans[cs] > 0 ) { |
|
11372
|
0
|
|
|
|
|
|
_trans = _english_tokenizer_eof_trans[cs] - 1; |
|
11373
|
0
|
|
|
|
|
|
goto _eof_trans; |
|
11374
|
|
|
|
|
|
|
} |
|
11375
|
|
|
|
|
|
|
} |
|
11376
|
|
|
|
|
|
|
|
|
11377
|
|
|
|
|
|
|
_out: {} |
|
11378
|
|
|
|
|
|
|
} |
|
11379
|
|
|
|
|
|
|
|
|
11380
|
|
|
|
|
|
|
(void)act; // Suppress unused variable warning |
|
11381
|
|
|
|
|
|
|
|
|
11382
|
0
|
|
|
|
|
|
return !tokens.empty(); |
|
11383
|
|
|
|
|
|
|
} |
|
11384
|
|
|
|
|
|
|
|
|
11385
|
|
|
|
|
|
|
} // namespace morphodita |
|
11386
|
|
|
|
|
|
|
|
|
11387
|
|
|
|
|
|
|
///////// |
|
11388
|
|
|
|
|
|
|
// File: morphodita/tokenizer/generic_tokenizer.cpp |
|
11389
|
|
|
|
|
|
|
///////// |
|
11390
|
|
|
|
|
|
|
|
|
11391
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
11392
|
|
|
|
|
|
|
// |
|
11393
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
11394
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
11395
|
|
|
|
|
|
|
// |
|
11396
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
11397
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
11398
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
11399
|
|
|
|
|
|
|
|
|
11400
|
|
|
|
|
|
|
namespace morphodita { |
|
11401
|
|
|
|
|
|
|
|
|
11402
|
|
|
|
|
|
|
static const char _generic_tokenizer_cond_offsets[] = { |
|
11403
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
11404
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 2, |
|
11405
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2 |
|
11406
|
|
|
|
|
|
|
}; |
|
11407
|
|
|
|
|
|
|
|
|
11408
|
|
|
|
|
|
|
static const char _generic_tokenizer_cond_lengths[] = { |
|
11409
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 2, |
|
11410
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
11411
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0 |
|
11412
|
|
|
|
|
|
|
}; |
|
11413
|
|
|
|
|
|
|
|
|
11414
|
|
|
|
|
|
|
static const short _generic_tokenizer_cond_keys[] = { |
|
11415
|
|
|
|
|
|
|
43u, 43u, 45u, 45u, 0 |
|
11416
|
|
|
|
|
|
|
}; |
|
11417
|
|
|
|
|
|
|
|
|
11418
|
|
|
|
|
|
|
static const char _generic_tokenizer_cond_spaces[] = { |
|
11419
|
|
|
|
|
|
|
1, 0, 0 |
|
11420
|
|
|
|
|
|
|
}; |
|
11421
|
|
|
|
|
|
|
|
|
11422
|
|
|
|
|
|
|
static const unsigned char _generic_tokenizer_key_offsets[] = { |
|
11423
|
|
|
|
|
|
|
0, 0, 17, 29, 43, 46, 51, 54, |
|
11424
|
|
|
|
|
|
|
89, 94, 98, 101, 105, 110, 111, 116, |
|
11425
|
|
|
|
|
|
|
117, 122, 136, 142, 147, 150, 162 |
|
11426
|
|
|
|
|
|
|
}; |
|
11427
|
|
|
|
|
|
|
|
|
11428
|
|
|
|
|
|
|
static const short _generic_tokenizer_trans_keys[] = { |
|
11429
|
|
|
|
|
|
|
13u, 32u, 34u, 40u, 91u, 96u, 123u, 129u, |
|
11430
|
|
|
|
|
|
|
133u, 135u, 147u, 150u, 162u, 9u, 10u, 65u, |
|
11431
|
|
|
|
|
|
|
90u, 34u, 40u, 91u, 96u, 123u, 129u, 133u, |
|
11432
|
|
|
|
|
|
|
135u, 150u, 162u, 65u, 90u, 13u, 32u, 34u, |
|
11433
|
|
|
|
|
|
|
39u, 41u, 59u, 93u, 125u, 139u, 141u, 147u, |
|
11434
|
|
|
|
|
|
|
161u, 9u, 10u, 159u, 48u, 57u, 43u, 45u, |
|
11435
|
|
|
|
|
|
|
159u, 48u, 57u, 159u, 48u, 57u, 9u, 10u, |
|
11436
|
|
|
|
|
|
|
13u, 32u, 33u, 44u, 46u, 47u, 63u, 129u, |
|
11437
|
|
|
|
|
|
|
131u, 135u, 142u, 147u, 157u, 159u, 160u, 301u, |
|
11438
|
|
|
|
|
|
|
557u, 811u, 1067u, 0u, 42u, 48u, 57u, 58u, |
|
11439
|
|
|
|
|
|
|
64u, 65u, 90u, 91u, 96u, 97u, 122u, 123u, |
|
11440
|
|
|
|
|
|
|
255u, 9u, 10u, 13u, 32u, 147u, 9u, 13u, |
|
11441
|
|
|
|
|
|
|
32u, 147u, 9u, 32u, 147u, 9u, 10u, 32u, |
|
11442
|
|
|
|
|
|
|
147u, 9u, 10u, 13u, 32u, 147u, 13u, 9u, |
|
11443
|
|
|
|
|
|
|
10u, 13u, 32u, 147u, 10u, 9u, 10u, 13u, |
|
11444
|
|
|
|
|
|
|
32u, 147u, 13u, 32u, 34u, 39u, 41u, 59u, |
|
11445
|
|
|
|
|
|
|
93u, 125u, 139u, 141u, 147u, 161u, 9u, 10u, |
|
11446
|
|
|
|
|
|
|
46u, 69u, 101u, 159u, 48u, 57u, 69u, 101u, |
|
11447
|
|
|
|
|
|
|
159u, 48u, 57u, 159u, 48u, 57u, 129u, 131u, |
|
11448
|
|
|
|
|
|
|
135u, 151u, 155u, 157u, 65u, 90u, 97u, 122u, |
|
11449
|
|
|
|
|
|
|
142u, 143u, 159u, 48u, 57u, 0 |
|
11450
|
|
|
|
|
|
|
}; |
|
11451
|
|
|
|
|
|
|
|
|
11452
|
|
|
|
|
|
|
static const char _generic_tokenizer_single_lengths[] = { |
|
11453
|
|
|
|
|
|
|
0, 13, 10, 12, 1, 3, 1, 21, |
|
11454
|
|
|
|
|
|
|
5, 4, 3, 4, 5, 1, 5, 1, |
|
11455
|
|
|
|
|
|
|
5, 12, 4, 3, 1, 6, 1 |
|
11456
|
|
|
|
|
|
|
}; |
|
11457
|
|
|
|
|
|
|
|
|
11458
|
|
|
|
|
|
|
static const char _generic_tokenizer_range_lengths[] = { |
|
11459
|
|
|
|
|
|
|
0, 2, 1, 1, 1, 1, 1, 7, |
|
11460
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
11461
|
|
|
|
|
|
|
0, 1, 1, 1, 1, 3, 1 |
|
11462
|
|
|
|
|
|
|
}; |
|
11463
|
|
|
|
|
|
|
|
|
11464
|
|
|
|
|
|
|
static const unsigned char _generic_tokenizer_index_offsets[] = { |
|
11465
|
|
|
|
|
|
|
0, 0, 16, 28, 42, 45, 50, 53, |
|
11466
|
|
|
|
|
|
|
82, 88, 93, 97, 102, 108, 110, 116, |
|
11467
|
|
|
|
|
|
|
118, 124, 138, 144, 149, 152, 162 |
|
11468
|
|
|
|
|
|
|
}; |
|
11469
|
|
|
|
|
|
|
|
|
11470
|
|
|
|
|
|
|
static const char _generic_tokenizer_indicies[] = { |
|
11471
|
|
|
|
|
|
|
1, 1, 2, 2, 2, 2, 2, 3, |
|
11472
|
|
|
|
|
|
|
2, 3, 1, 2, 2, 1, 3, 0, |
|
11473
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 3, 2, 3, |
|
11474
|
|
|
|
|
|
|
2, 2, 3, 0, 4, 4, 5, 5, |
|
11475
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 4, 5, |
|
11476
|
|
|
|
|
|
|
4, 0, 6, 6, 0, 7, 7, 8, |
|
11477
|
|
|
|
|
|
|
8, 0, 8, 8, 0, 10, 11, 12, |
|
11478
|
|
|
|
|
|
|
10, 13, 9, 13, 9, 13, 16, 16, |
|
11479
|
|
|
|
|
|
|
16, 16, 10, 16, 15, 13, 9, 17, |
|
11480
|
|
|
|
|
|
|
9, 17, 9, 15, 9, 16, 9, 16, |
|
11481
|
|
|
|
|
|
|
9, 14, 10, 19, 20, 10, 10, 18, |
|
11482
|
|
|
|
|
|
|
10, 21, 10, 10, 18, 10, 10, 10, |
|
11483
|
|
|
|
|
|
|
18, 10, 21, 10, 10, 18, 10, 22, |
|
11484
|
|
|
|
|
|
|
23, 10, 10, 18, 25, 24, 10, 22, |
|
11485
|
|
|
|
|
|
|
26, 10, 10, 18, 25, 24, 10, 23, |
|
11486
|
|
|
|
|
|
|
26, 10, 10, 18, 4, 4, 5, 5, |
|
11487
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 4, 5, |
|
11488
|
|
|
|
|
|
|
4, 27, 28, 29, 29, 15, 15, 27, |
|
11489
|
|
|
|
|
|
|
29, 29, 6, 6, 27, 8, 8, 27, |
|
11490
|
|
|
|
|
|
|
16, 16, 16, 16, 16, 16, 16, 16, |
|
11491
|
|
|
|
|
|
|
16, 27, 15, 15, 27, 0 |
|
11492
|
|
|
|
|
|
|
}; |
|
11493
|
|
|
|
|
|
|
|
|
11494
|
|
|
|
|
|
|
static const char _generic_tokenizer_trans_targs[] = { |
|
11495
|
|
|
|
|
|
|
7, 1, 2, 7, 1, 3, 19, 6, |
|
11496
|
|
|
|
|
|
|
20, 7, 8, 12, 16, 17, 0, 18, |
|
11497
|
|
|
|
|
|
|
21, 22, 7, 9, 11, 10, 13, 14, |
|
11498
|
|
|
|
|
|
|
7, 7, 15, 7, 4, 5 |
|
11499
|
|
|
|
|
|
|
}; |
|
11500
|
|
|
|
|
|
|
|
|
11501
|
|
|
|
|
|
|
static const char _generic_tokenizer_trans_actions[] = { |
|
11502
|
|
|
|
|
|
|
1, 0, 0, 2, 3, 0, 4, 0, |
|
11503
|
|
|
|
|
|
|
0, 7, 0, 0, 0, 4, 0, 4, |
|
11504
|
|
|
|
|
|
|
0, 0, 8, 0, 0, 0, 0, 0, |
|
11505
|
|
|
|
|
|
|
9, 10, 0, 11, 0, 0 |
|
11506
|
|
|
|
|
|
|
}; |
|
11507
|
|
|
|
|
|
|
|
|
11508
|
|
|
|
|
|
|
static const char _generic_tokenizer_to_state_actions[] = { |
|
11509
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 5, |
|
11510
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
11511
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0 |
|
11512
|
|
|
|
|
|
|
}; |
|
11513
|
|
|
|
|
|
|
|
|
11514
|
|
|
|
|
|
|
static const char _generic_tokenizer_from_state_actions[] = { |
|
11515
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 6, |
|
11516
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
11517
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0 |
|
11518
|
|
|
|
|
|
|
}; |
|
11519
|
|
|
|
|
|
|
|
|
11520
|
|
|
|
|
|
|
static const unsigned char _generic_tokenizer_eof_trans[] = { |
|
11521
|
|
|
|
|
|
|
0, 1, 1, 1, 1, 1, 1, 0, |
|
11522
|
|
|
|
|
|
|
19, 19, 19, 19, 19, 25, 19, 25, |
|
11523
|
|
|
|
|
|
|
19, 28, 28, 28, 28, 28, 28 |
|
11524
|
|
|
|
|
|
|
}; |
|
11525
|
|
|
|
|
|
|
|
|
11526
|
|
|
|
|
|
|
static const int generic_tokenizer_start = 7; |
|
11527
|
|
|
|
|
|
|
|
|
11528
|
3
|
50
|
|
|
|
|
generic_tokenizer::generic_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {} |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
11529
|
|
|
|
|
|
|
|
|
11530
|
4
|
|
|
|
|
|
bool generic_tokenizer::next_sentence(vector& tokens) { |
|
11531
|
|
|
|
|
|
|
using namespace unilib; |
|
11532
|
|
|
|
|
|
|
|
|
11533
|
|
|
|
|
|
|
int cs, act; |
|
11534
|
|
|
|
|
|
|
size_t ts, te; |
|
11535
|
|
|
|
|
|
|
size_t whitespace = 0; // Suppress "may be uninitialized" warning |
|
11536
|
|
|
|
|
|
|
|
|
11537
|
2
|
50
|
|
|
|
|
while (tokenize_url_email(tokens)) |
|
11538
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) |
|
11539
|
|
|
|
|
|
|
return true; |
|
11540
|
|
|
|
|
|
|
|
|
11541
|
|
|
|
|
|
|
{ |
|
11542
|
|
|
|
|
|
|
cs = generic_tokenizer_start; |
|
11543
|
2
|
|
|
|
|
|
ts = 0; |
|
11544
|
|
|
|
|
|
|
te = 0; |
|
11545
|
|
|
|
|
|
|
act = 0; |
|
11546
|
|
|
|
|
|
|
} |
|
11547
|
|
|
|
|
|
|
|
|
11548
|
|
|
|
|
|
|
{ |
|
11549
|
|
|
|
|
|
|
int _klen; |
|
11550
|
|
|
|
|
|
|
const short *_keys; |
|
11551
|
|
|
|
|
|
|
int _trans; |
|
11552
|
|
|
|
|
|
|
short _widec; |
|
11553
|
|
|
|
|
|
|
|
|
11554
|
2
|
50
|
|
|
|
|
if ( ( current) == ( (chars.size() - 1)) ) |
|
11555
|
|
|
|
|
|
|
goto _test_eof; |
|
11556
|
|
|
|
|
|
|
if ( cs == 0 ) |
|
11557
|
|
|
|
|
|
|
goto _out; |
|
11558
|
|
|
|
|
|
|
_resume: |
|
11559
|
37
|
100
|
|
|
|
|
switch ( _generic_tokenizer_from_state_actions[cs] ) { |
|
11560
|
|
|
|
|
|
|
case 6: |
|
11561
|
10
|
|
|
|
|
|
{ts = ( current);} |
|
11562
|
10
|
|
|
|
|
|
break; |
|
11563
|
|
|
|
|
|
|
} |
|
11564
|
|
|
|
|
|
|
|
|
11565
|
74
|
|
|
|
|
|
_widec = ( ragel_char(chars[current])); |
|
11566
|
37
|
|
|
|
|
|
_klen = _generic_tokenizer_cond_lengths[cs]; |
|
11567
|
37
|
|
|
|
|
|
_keys = _generic_tokenizer_cond_keys + (_generic_tokenizer_cond_offsets[cs]*2); |
|
11568
|
37
|
100
|
|
|
|
|
if ( _klen > 0 ) { |
|
11569
|
|
|
|
|
|
|
const short *_lower = _keys; |
|
11570
|
|
|
|
|
|
|
const short *_mid; |
|
11571
|
27
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
|
11572
|
|
|
|
|
|
|
while (1) { |
|
11573
|
27
|
100
|
|
|
|
|
if ( _upper < _lower ) |
|
11574
|
|
|
|
|
|
|
break; |
|
11575
|
|
|
|
|
|
|
|
|
11576
|
17
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
|
11577
|
17
|
100
|
|
|
|
|
if ( _widec < _mid[0] ) |
|
11578
|
3
|
|
|
|
|
|
_upper = _mid - 2; |
|
11579
|
14
|
50
|
|
|
|
|
else if ( _widec > _mid[1] ) |
|
11580
|
14
|
|
|
|
|
|
_lower = _mid + 2; |
|
11581
|
|
|
|
|
|
|
else { |
|
11582
|
0
|
|
|
|
|
|
switch ( _generic_tokenizer_cond_spaces[_generic_tokenizer_cond_offsets[cs] + ((_mid - _keys)>>1)] ) { |
|
11583
|
|
|
|
|
|
|
case 0: { |
|
11584
|
0
|
|
|
|
|
|
_widec = (short)(256u + (( ragel_char(chars[current])) - 0u)); |
|
11585
|
0
|
0
|
|
|
|
|
if ( |
|
11586
|
0
|
0
|
|
|
|
|
!current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256; |
|
|
|
0
|
|
|
|
|
|
|
11587
|
|
|
|
|
|
|
break; |
|
11588
|
|
|
|
|
|
|
} |
|
11589
|
|
|
|
|
|
|
case 1: { |
|
11590
|
0
|
|
|
|
|
|
_widec = (short)(768u + (( ragel_char(chars[current])) - 0u)); |
|
11591
|
0
|
0
|
|
|
|
|
if ( |
|
11592
|
0
|
0
|
|
|
|
|
!current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
11593
|
|
|
|
|
|
|
break; |
|
11594
|
|
|
|
|
|
|
} |
|
11595
|
|
|
|
|
|
|
} |
|
11596
|
|
|
|
|
|
|
break; |
|
11597
|
|
|
|
|
|
|
} |
|
11598
|
|
|
|
|
|
|
} |
|
11599
|
|
|
|
|
|
|
} |
|
11600
|
|
|
|
|
|
|
|
|
11601
|
37
|
|
|
|
|
|
_keys = _generic_tokenizer_trans_keys + _generic_tokenizer_key_offsets[cs]; |
|
11602
|
37
|
|
|
|
|
|
_trans = _generic_tokenizer_index_offsets[cs]; |
|
11603
|
|
|
|
|
|
|
|
|
11604
|
37
|
|
|
|
|
|
_klen = _generic_tokenizer_single_lengths[cs]; |
|
11605
|
37
|
50
|
|
|
|
|
if ( _klen > 0 ) { |
|
11606
|
|
|
|
|
|
|
const short *_lower = _keys; |
|
11607
|
|
|
|
|
|
|
const short *_mid; |
|
11608
|
139
|
|
|
|
|
|
const short *_upper = _keys + _klen - 1; |
|
11609
|
|
|
|
|
|
|
while (1) { |
|
11610
|
139
|
100
|
|
|
|
|
if ( _upper < _lower ) |
|
11611
|
|
|
|
|
|
|
break; |
|
11612
|
|
|
|
|
|
|
|
|
11613
|
112
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
|
11614
|
112
|
100
|
|
|
|
|
if ( _widec < *_mid ) |
|
11615
|
66
|
|
|
|
|
|
_upper = _mid - 1; |
|
11616
|
46
|
100
|
|
|
|
|
else if ( _widec > *_mid ) |
|
11617
|
36
|
|
|
|
|
|
_lower = _mid + 1; |
|
11618
|
|
|
|
|
|
|
else { |
|
11619
|
10
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
|
11620
|
10
|
|
|
|
|
|
goto _match; |
|
11621
|
|
|
|
|
|
|
} |
|
11622
|
|
|
|
|
|
|
} |
|
11623
|
27
|
|
|
|
|
|
_keys += _klen; |
|
11624
|
27
|
|
|
|
|
|
_trans += _klen; |
|
11625
|
|
|
|
|
|
|
} |
|
11626
|
|
|
|
|
|
|
|
|
11627
|
27
|
|
|
|
|
|
_klen = _generic_tokenizer_range_lengths[cs]; |
|
11628
|
27
|
100
|
|
|
|
|
if ( _klen > 0 ) { |
|
11629
|
|
|
|
|
|
|
const short *_lower = _keys; |
|
11630
|
|
|
|
|
|
|
const short *_mid; |
|
11631
|
37
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
|
11632
|
|
|
|
|
|
|
while (1) { |
|
11633
|
37
|
100
|
|
|
|
|
if ( _upper < _lower ) |
|
11634
|
|
|
|
|
|
|
break; |
|
11635
|
|
|
|
|
|
|
|
|
11636
|
32
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
|
11637
|
32
|
100
|
|
|
|
|
if ( _widec < _mid[0] ) |
|
11638
|
10
|
|
|
|
|
|
_upper = _mid - 2; |
|
11639
|
22
|
100
|
|
|
|
|
else if ( _widec > _mid[1] ) |
|
11640
|
3
|
|
|
|
|
|
_lower = _mid + 2; |
|
11641
|
|
|
|
|
|
|
else { |
|
11642
|
19
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
|
11643
|
19
|
|
|
|
|
|
goto _match; |
|
11644
|
|
|
|
|
|
|
} |
|
11645
|
|
|
|
|
|
|
} |
|
11646
|
5
|
|
|
|
|
|
_trans += _klen; |
|
11647
|
|
|
|
|
|
|
} |
|
11648
|
|
|
|
|
|
|
|
|
11649
|
|
|
|
|
|
|
_match: |
|
11650
|
37
|
|
|
|
|
|
_trans = _generic_tokenizer_indicies[_trans]; |
|
11651
|
|
|
|
|
|
|
_eof_trans: |
|
11652
|
39
|
|
|
|
|
|
cs = _generic_tokenizer_trans_targs[_trans]; |
|
11653
|
|
|
|
|
|
|
|
|
11654
|
39
|
100
|
|
|
|
|
if ( _generic_tokenizer_trans_actions[_trans] == 0 ) |
|
11655
|
|
|
|
|
|
|
goto _again; |
|
11656
|
|
|
|
|
|
|
|
|
11657
|
12
|
|
|
|
|
|
switch ( _generic_tokenizer_trans_actions[_trans] ) { |
|
11658
|
|
|
|
|
|
|
case 3: |
|
11659
|
0
|
|
|
|
|
|
{ whitespace = current; } |
|
11660
|
0
|
|
|
|
|
|
break; |
|
11661
|
|
|
|
|
|
|
case 4: |
|
11662
|
2
|
|
|
|
|
|
{te = ( current)+1;} |
|
11663
|
2
|
|
|
|
|
|
break; |
|
11664
|
|
|
|
|
|
|
case 7: |
|
11665
|
0
|
|
|
|
|
|
{te = ( current)+1;{ tokens.emplace_back(ts, te - ts); |
|
11666
|
0
|
|
|
|
|
|
current = te; |
|
11667
|
0
|
0
|
|
|
|
|
do |
|
11668
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
|
11669
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
|
11670
|
0
|
|
|
|
|
|
( current)--; |
|
11671
|
|
|
|
|
|
|
}} |
|
11672
|
0
|
|
|
|
|
|
break; |
|
11673
|
|
|
|
|
|
|
case 2: |
|
11674
|
0
|
|
|
|
|
|
{te = ( current)+1;{ |
|
11675
|
0
|
|
|
|
|
|
bool eos = is_eos(tokens, chars[ts].chr, nullptr); |
|
11676
|
0
|
0
|
|
|
|
|
for (current = ts; current < whitespace; current++) |
|
11677
|
0
|
|
|
|
|
|
tokens.emplace_back(current, 1); |
|
11678
|
0
|
|
|
|
|
|
{( current) = (( whitespace))-1;} |
|
11679
|
0
|
0
|
|
|
|
|
if (eos) {( current)++; goto _out; } |
|
11680
|
|
|
|
|
|
|
}} |
|
11681
|
|
|
|
|
|
|
break; |
|
11682
|
|
|
|
|
|
|
case 10: |
|
11683
|
0
|
|
|
|
|
|
{te = ( current)+1;{ |
|
11684
|
0
|
0
|
|
|
|
|
if (!tokens.empty()) {( current)++; goto _out; } |
|
11685
|
0
|
|
|
|
|
|
current = te; |
|
11686
|
0
|
0
|
|
|
|
|
do |
|
11687
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
|
11688
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
|
11689
|
0
|
|
|
|
|
|
( current)--; |
|
11690
|
|
|
|
|
|
|
}} |
|
11691
|
0
|
|
|
|
|
|
break; |
|
11692
|
|
|
|
|
|
|
case 11: |
|
11693
|
7
|
|
|
|
|
|
{te = ( current);( current)--;{ tokens.emplace_back(ts, te - ts); |
|
11694
|
7
|
|
|
|
|
|
current = te; |
|
11695
|
7
|
50
|
|
|
|
|
do |
|
11696
|
7
|
50
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
|
11697
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
|
11698
|
7
|
|
|
|
|
|
( current)--; |
|
11699
|
|
|
|
|
|
|
}} |
|
11700
|
7
|
|
|
|
|
|
break; |
|
11701
|
|
|
|
|
|
|
case 8: |
|
11702
|
3
|
|
|
|
|
|
{te = ( current);( current)--;{ |
|
11703
|
3
|
|
|
|
|
|
current = te; |
|
11704
|
3
|
50
|
|
|
|
|
do |
|
11705
|
3
|
50
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
|
11706
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
|
11707
|
3
|
|
|
|
|
|
( current)--; |
|
11708
|
|
|
|
|
|
|
}} |
|
11709
|
3
|
|
|
|
|
|
break; |
|
11710
|
|
|
|
|
|
|
case 9: |
|
11711
|
0
|
|
|
|
|
|
{te = ( current);( current)--;{ |
|
11712
|
0
|
0
|
|
|
|
|
if (!tokens.empty()) {( current)++; goto _out; } |
|
11713
|
0
|
|
|
|
|
|
current = te; |
|
11714
|
0
|
0
|
|
|
|
|
do |
|
11715
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
|
11716
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
|
11717
|
0
|
|
|
|
|
|
( current)--; |
|
11718
|
|
|
|
|
|
|
}} |
|
11719
|
0
|
|
|
|
|
|
break; |
|
11720
|
|
|
|
|
|
|
case 1: |
|
11721
|
0
|
|
|
|
|
|
{{( current) = ((te))-1;}{ tokens.emplace_back(ts, te - ts); |
|
11722
|
0
|
|
|
|
|
|
current = te; |
|
11723
|
0
|
0
|
|
|
|
|
do |
|
11724
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
|
11725
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
|
11726
|
0
|
|
|
|
|
|
( current)--; |
|
11727
|
|
|
|
|
|
|
}} |
|
11728
|
0
|
|
|
|
|
|
break; |
|
11729
|
|
|
|
|
|
|
} |
|
11730
|
|
|
|
|
|
|
|
|
11731
|
|
|
|
|
|
|
_again: |
|
11732
|
39
|
100
|
|
|
|
|
switch ( _generic_tokenizer_to_state_actions[cs] ) { |
|
11733
|
|
|
|
|
|
|
case 5: |
|
11734
|
10
|
|
|
|
|
|
{ts = 0;} |
|
11735
|
10
|
|
|
|
|
|
break; |
|
11736
|
|
|
|
|
|
|
} |
|
11737
|
|
|
|
|
|
|
|
|
11738
|
39
|
50
|
|
|
|
|
if ( cs == 0 ) |
|
11739
|
|
|
|
|
|
|
goto _out; |
|
11740
|
39
|
100
|
|
|
|
|
if ( ++( current) != ( (chars.size() - 1)) ) |
|
11741
|
|
|
|
|
|
|
goto _resume; |
|
11742
|
|
|
|
|
|
|
_test_eof: {} |
|
11743
|
4
|
50
|
|
|
|
|
if ( ( current) == ( (chars.size() - 1)) ) |
|
11744
|
|
|
|
|
|
|
{ |
|
11745
|
4
|
100
|
|
|
|
|
if ( _generic_tokenizer_eof_trans[cs] > 0 ) { |
|
11746
|
2
|
|
|
|
|
|
_trans = _generic_tokenizer_eof_trans[cs] - 1; |
|
11747
|
2
|
|
|
|
|
|
goto _eof_trans; |
|
11748
|
|
|
|
|
|
|
} |
|
11749
|
|
|
|
|
|
|
} |
|
11750
|
|
|
|
|
|
|
|
|
11751
|
|
|
|
|
|
|
_out: {} |
|
11752
|
|
|
|
|
|
|
} |
|
11753
|
|
|
|
|
|
|
|
|
11754
|
|
|
|
|
|
|
(void)act; // Suppress unused variable warning |
|
11755
|
|
|
|
|
|
|
|
|
11756
|
2
|
|
|
|
|
|
return !tokens.empty(); |
|
11757
|
|
|
|
|
|
|
} |
|
11758
|
|
|
|
|
|
|
|
|
11759
|
|
|
|
|
|
|
} // namespace morphodita |
|
11760
|
|
|
|
|
|
|
|
|
11761
|
|
|
|
|
|
|
///////// |
|
11762
|
|
|
|
|
|
|
// File: morphodita/tokenizer/ragel_tokenizer.cpp |
|
11763
|
|
|
|
|
|
|
///////// |
|
11764
|
|
|
|
|
|
|
|
|
11765
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
11766
|
|
|
|
|
|
|
// |
|
11767
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
11768
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
11769
|
|
|
|
|
|
|
// |
|
11770
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
11771
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
11772
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
11773
|
|
|
|
|
|
|
|
|
11774
|
|
|
|
|
|
|
namespace morphodita { |
|
11775
|
|
|
|
|
|
|
|
|
11776
|
|
|
|
|
|
|
static const char _ragel_url_email_cond_offsets[] = { |
|
11777
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
11778
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 1, 1, 1, |
|
11779
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
11780
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
11781
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
11782
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
11783
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
11784
|
|
|
|
|
|
|
1, 1, 1, 2, 3, 3, 4, 5, |
|
11785
|
|
|
|
|
|
|
6, 7, 8, 9, 10, 11, 12, 13, |
|
11786
|
|
|
|
|
|
|
14, 15, 16 |
|
11787
|
|
|
|
|
|
|
}; |
|
11788
|
|
|
|
|
|
|
|
|
11789
|
|
|
|
|
|
|
static const char _ragel_url_email_cond_lengths[] = { |
|
11790
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
11791
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 0, 0, 0, |
|
11792
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
11793
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
11794
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
11795
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
11796
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
11797
|
|
|
|
|
|
|
0, 0, 1, 1, 0, 1, 1, 1, |
|
11798
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
11799
|
|
|
|
|
|
|
1, 1, 1 |
|
11800
|
|
|
|
|
|
|
}; |
|
11801
|
|
|
|
|
|
|
|
|
11802
|
|
|
|
|
|
|
static const short _ragel_url_email_cond_keys[] = { |
|
11803
|
|
|
|
|
|
|
41u, 41u, 47u, 47u, 47u, 47u, 41u, 41u, |
|
11804
|
|
|
|
|
|
|
47u, 47u, 47u, 47u, 47u, 47u, 47u, 47u, |
|
11805
|
|
|
|
|
|
|
47u, 47u, 47u, 47u, 47u, 47u, 47u, 47u, |
|
11806
|
|
|
|
|
|
|
47u, 47u, 47u, 47u, 47u, 47u, 47u, 47u, |
|
11807
|
|
|
|
|
|
|
47u, 47u, 0 |
|
11808
|
|
|
|
|
|
|
}; |
|
11809
|
|
|
|
|
|
|
|
|
11810
|
|
|
|
|
|
|
static const char _ragel_url_email_cond_spaces[] = { |
|
11811
|
|
|
|
|
|
|
1, 0, 0, 1, 0, 0, 0, 0, |
|
11812
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
11813
|
|
|
|
|
|
|
0, 0 |
|
11814
|
|
|
|
|
|
|
}; |
|
11815
|
|
|
|
|
|
|
|
|
11816
|
|
|
|
|
|
|
static const short _ragel_url_email_key_offsets[] = { |
|
11817
|
|
|
|
|
|
|
0, 0, 15, 29, 41, 54, 63, 71, |
|
11818
|
|
|
|
|
|
|
78, 86, 92, 100, 117, 145, 154, 162, |
|
11819
|
|
|
|
|
|
|
171, 179, 188, 196, 204, 215, 225, 233, |
|
11820
|
|
|
|
|
|
|
241, 252, 262, 270, 278, 289, 299, 315, |
|
11821
|
|
|
|
|
|
|
330, 346, 360, 376, 393, 409, 426, 442, |
|
11822
|
|
|
|
|
|
|
459, 475, 491, 510, 528, 544, 560, 579, |
|
11823
|
|
|
|
|
|
|
597, 613, 629, 648, 666, 682, 698, 714, |
|
11824
|
|
|
|
|
|
|
725, 726, 741, 752, 756, 773, 801, 812, |
|
11825
|
|
|
|
|
|
|
823, 834, 848, 861, 879, 893, 908, 926, |
|
11826
|
|
|
|
|
|
|
944, 962, 983 |
|
11827
|
|
|
|
|
|
|
}; |
|
11828
|
|
|
|
|
|
|
|
|
11829
|
|
|
|
|
|
|
static const short _ragel_url_email_trans_keys[] = { |
|
11830
|
|
|
|
|
|
|
33u, 48u, 49u, 50u, 95u, 36u, 37u, 39u, |
|
11831
|
|
|
|
|
|
|
46u, 51u, 57u, 65u, 90u, 97u, 122u, 33u, |
|
11832
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 46u, 48u, |
|
11833
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 95u, 36u, |
|
11834
|
|
|
|
|
|
|
37u, 39u, 46u, 48u, 57u, 65u, 90u, 97u, |
|
11835
|
|
|
|
|
|
|
122u, 33u, 64u, 95u, 36u, 37u, 39u, 46u, |
|
11836
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 48u, 49u, |
|
11837
|
|
|
|
|
|
|
50u, 51u, 57u, 65u, 90u, 97u, 122u, 45u, |
|
11838
|
|
|
|
|
|
|
46u, 48u, 57u, 65u, 90u, 97u, 122u, 45u, |
|
11839
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 45u, 46u, |
|
11840
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 48u, 57u, |
|
11841
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 45u, 46u, 48u, 57u, |
|
11842
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 33u, 39u, 41u, 61u, |
|
11843
|
|
|
|
|
|
|
95u, 36u, 47u, 48u, 57u, 58u, 59u, 63u, |
|
11844
|
|
|
|
|
|
|
64u, 65u, 90u, 97u, 122u, 33u, 39u, 40u, |
|
11845
|
|
|
|
|
|
|
44u, 46u, 61u, 63u, 95u, 129u, 131u, 135u, |
|
11846
|
|
|
|
|
|
|
151u, 809u, 1065u, 36u, 38u, 42u, 57u, 58u, |
|
11847
|
|
|
|
|
|
|
59u, 64u, 90u, 97u, 122u, 142u, 143u, 155u, |
|
11848
|
|
|
|
|
|
|
159u, 48u, 49u, 50u, 51u, 57u, 65u, 90u, |
|
11849
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 48u, 57u, 65u, 90u, |
|
11850
|
|
|
|
|
|
|
97u, 122u, 48u, 49u, 50u, 51u, 57u, 65u, |
|
11851
|
|
|
|
|
|
|
90u, 97u, 122u, 45u, 46u, 48u, 57u, 65u, |
|
11852
|
|
|
|
|
|
|
90u, 97u, 122u, 48u, 49u, 50u, 51u, 57u, |
|
11853
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 45u, 46u, 48u, 57u, |
|
11854
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 45u, 46u, 48u, 57u, |
|
11855
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 45u, 46u, 53u, 48u, |
|
11856
|
|
|
|
|
|
|
52u, 54u, 57u, 65u, 90u, 97u, 122u, 45u, |
|
11857
|
|
|
|
|
|
|
46u, 48u, 53u, 54u, 57u, 65u, 90u, 97u, |
|
11858
|
|
|
|
|
|
|
122u, 45u, 46u, 48u, 57u, 65u, 90u, 97u, |
|
11859
|
|
|
|
|
|
|
122u, 45u, 46u, 48u, 57u, 65u, 90u, 97u, |
|
11860
|
|
|
|
|
|
|
122u, 45u, 46u, 53u, 48u, 52u, 54u, 57u, |
|
11861
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 45u, 46u, 48u, 53u, |
|
11862
|
|
|
|
|
|
|
54u, 57u, 65u, 90u, 97u, 122u, 45u, 46u, |
|
11863
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 45u, 46u, |
|
11864
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 45u, 46u, |
|
11865
|
|
|
|
|
|
|
53u, 48u, 52u, 54u, 57u, 65u, 90u, 97u, |
|
11866
|
|
|
|
|
|
|
122u, 45u, 46u, 48u, 53u, 54u, 57u, 65u, |
|
11867
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
|
11868
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 57u, 65u, |
|
11869
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 58u, 64u, 95u, |
|
11870
|
|
|
|
|
|
|
36u, 37u, 39u, 46u, 48u, 57u, 65u, 90u, |
|
11871
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
|
11872
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
|
11873
|
|
|
|
|
|
|
97u, 122u, 33u, 58u, 64u, 95u, 36u, 37u, |
|
11874
|
|
|
|
|
|
|
39u, 46u, 48u, 57u, 65u, 90u, 97u, 122u, |
|
11875
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
|
11876
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
|
11877
|
|
|
|
|
|
|
33u, 48u, 49u, 50u, 58u, 64u, 95u, 36u, |
|
11878
|
|
|
|
|
|
|
37u, 39u, 46u, 51u, 57u, 65u, 90u, 97u, |
|
11879
|
|
|
|
|
|
|
122u, 33u, 45u, 46u, 58u, 64u, 95u, 36u, |
|
11880
|
|
|
|
|
|
|
37u, 39u, 44u, 48u, 57u, 65u, 90u, 97u, |
|
11881
|
|
|
|
|
|
|
122u, 33u, 48u, 49u, 50u, 58u, 64u, 95u, |
|
11882
|
|
|
|
|
|
|
36u, 37u, 39u, 46u, 51u, 57u, 65u, 90u, |
|
11883
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
|
11884
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
|
11885
|
|
|
|
|
|
|
97u, 122u, 33u, 48u, 49u, 50u, 58u, 64u, |
|
11886
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 46u, 51u, 57u, 65u, |
|
11887
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
|
11888
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 57u, 65u, |
|
11889
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
|
11890
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 57u, 65u, |
|
11891
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 53u, 58u, |
|
11892
|
|
|
|
|
|
|
64u, 95u, 36u, 37u, 39u, 44u, 48u, 52u, |
|
11893
|
|
|
|
|
|
|
54u, 57u, 65u, 90u, 97u, 122u, 33u, 45u, |
|
11894
|
|
|
|
|
|
|
46u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
|
11895
|
|
|
|
|
|
|
48u, 53u, 54u, 57u, 65u, 90u, 97u, 122u, |
|
11896
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
|
11897
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
|
11898
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
|
11899
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
|
11900
|
|
|
|
|
|
|
33u, 45u, 46u, 53u, 58u, 64u, 95u, 36u, |
|
11901
|
|
|
|
|
|
|
37u, 39u, 44u, 48u, 52u, 54u, 57u, 65u, |
|
11902
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
|
11903
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 53u, 54u, |
|
11904
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
|
11905
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 44u, 48u, |
|
11906
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
|
11907
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 44u, 48u, |
|
11908
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
|
11909
|
|
|
|
|
|
|
53u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
|
11910
|
|
|
|
|
|
|
48u, 52u, 54u, 57u, 65u, 90u, 97u, 122u, |
|
11911
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
|
11912
|
|
|
|
|
|
|
39u, 44u, 48u, 53u, 54u, 57u, 65u, 90u, |
|
11913
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
|
11914
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
|
11915
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
|
11916
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
|
11917
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
|
11918
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
|
11919
|
|
|
|
|
|
|
97u, 122u, 33u, 47u, 95u, 36u, 37u, 39u, |
|
11920
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 47u, 33u, 48u, |
|
11921
|
|
|
|
|
|
|
49u, 50u, 95u, 36u, 37u, 39u, 46u, 51u, |
|
11922
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 58u, |
|
11923
|
|
|
|
|
|
|
303u, 559u, 48u, 57u, 65u, 90u, 97u, 122u, |
|
11924
|
|
|
|
|
|
|
303u, 559u, 48u, 57u, 33u, 39u, 41u, 61u, |
|
11925
|
|
|
|
|
|
|
95u, 36u, 47u, 48u, 57u, 58u, 59u, 63u, |
|
11926
|
|
|
|
|
|
|
64u, 65u, 90u, 97u, 122u, 33u, 39u, 40u, |
|
11927
|
|
|
|
|
|
|
44u, 46u, 61u, 63u, 95u, 129u, 131u, 135u, |
|
11928
|
|
|
|
|
|
|
151u, 809u, 1065u, 36u, 38u, 42u, 57u, 58u, |
|
11929
|
|
|
|
|
|
|
59u, 64u, 90u, 97u, 122u, 142u, 143u, 155u, |
|
11930
|
|
|
|
|
|
|
159u, 45u, 46u, 58u, 303u, 559u, 48u, 57u, |
|
11931
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 45u, 46u, 58u, 303u, |
|
11932
|
|
|
|
|
|
|
559u, 48u, 57u, 65u, 90u, 97u, 122u, 45u, |
|
11933
|
|
|
|
|
|
|
46u, 58u, 303u, 559u, 48u, 57u, 65u, 90u, |
|
11934
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 53u, 58u, 303u, 559u, |
|
11935
|
|
|
|
|
|
|
48u, 52u, 54u, 57u, 65u, 90u, 97u, 122u, |
|
11936
|
|
|
|
|
|
|
45u, 46u, 58u, 303u, 559u, 48u, 53u, 54u, |
|
11937
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
|
11938
|
|
|
|
|
|
|
58u, 64u, 95u, 303u, 559u, 36u, 37u, 39u, |
|
11939
|
|
|
|
|
|
|
44u, 48u, 57u, 65u, 90u, 97u, 122u, 33u, |
|
11940
|
|
|
|
|
|
|
95u, 303u, 559u, 36u, 37u, 39u, 46u, 48u, |
|
11941
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 64u, 95u, |
|
11942
|
|
|
|
|
|
|
303u, 559u, 36u, 37u, 39u, 46u, 48u, 57u, |
|
11943
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 33u, 45u, 46u, 58u, |
|
11944
|
|
|
|
|
|
|
64u, 95u, 303u, 559u, 36u, 37u, 39u, 44u, |
|
11945
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 33u, 45u, |
|
11946
|
|
|
|
|
|
|
46u, 58u, 64u, 95u, 303u, 559u, 36u, 37u, |
|
11947
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
|
11948
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 303u, 559u, |
|
11949
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
|
11950
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 53u, 58u, 64u, |
|
11951
|
|
|
|
|
|
|
95u, 303u, 559u, 36u, 37u, 39u, 44u, 48u, |
|
11952
|
|
|
|
|
|
|
52u, 54u, 57u, 65u, 90u, 97u, 122u, 33u, |
|
11953
|
|
|
|
|
|
|
45u, 46u, 58u, 64u, 95u, 303u, 559u, 36u, |
|
11954
|
|
|
|
|
|
|
37u, 39u, 44u, 48u, 53u, 54u, 57u, 65u, |
|
11955
|
|
|
|
|
|
|
90u, 97u, 122u, 0 |
|
11956
|
|
|
|
|
|
|
}; |
|
11957
|
|
|
|
|
|
|
|
|
11958
|
|
|
|
|
|
|
static const char _ragel_url_email_single_lengths[] = { |
|
11959
|
|
|
|
|
|
|
0, 5, 4, 2, 3, 3, 2, 1, |
|
11960
|
|
|
|
|
|
|
2, 0, 2, 5, 14, 3, 2, 3, |
|
11961
|
|
|
|
|
|
|
2, 3, 2, 2, 3, 2, 2, 2, |
|
11962
|
|
|
|
|
|
|
3, 2, 2, 2, 3, 2, 6, 5, |
|
11963
|
|
|
|
|
|
|
6, 4, 6, 7, 6, 7, 6, 7, |
|
11964
|
|
|
|
|
|
|
6, 6, 7, 6, 6, 6, 7, 6, |
|
11965
|
|
|
|
|
|
|
6, 6, 7, 6, 6, 6, 6, 3, |
|
11966
|
|
|
|
|
|
|
1, 5, 5, 2, 5, 14, 5, 5, |
|
11967
|
|
|
|
|
|
|
5, 6, 5, 8, 4, 5, 8, 8, |
|
11968
|
|
|
|
|
|
|
8, 9, 8 |
|
11969
|
|
|
|
|
|
|
}; |
|
11970
|
|
|
|
|
|
|
|
|
11971
|
|
|
|
|
|
|
static const char _ragel_url_email_range_lengths[] = { |
|
11972
|
|
|
|
|
|
|
0, 5, 5, 5, 5, 3, 3, 3, |
|
11973
|
|
|
|
|
|
|
3, 3, 3, 6, 7, 3, 3, 3, |
|
11974
|
|
|
|
|
|
|
3, 3, 3, 3, 4, 4, 3, 3, |
|
11975
|
|
|
|
|
|
|
4, 4, 3, 3, 4, 4, 5, 5, |
|
11976
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 5, 5, |
|
11977
|
|
|
|
|
|
|
5, 5, 6, 6, 5, 5, 6, 6, |
|
11978
|
|
|
|
|
|
|
5, 5, 6, 6, 5, 5, 5, 4, |
|
11979
|
|
|
|
|
|
|
0, 5, 3, 1, 6, 7, 3, 3, |
|
11980
|
|
|
|
|
|
|
3, 4, 4, 5, 5, 5, 5, 5, |
|
11981
|
|
|
|
|
|
|
5, 6, 6 |
|
11982
|
|
|
|
|
|
|
}; |
|
11983
|
|
|
|
|
|
|
|
|
11984
|
|
|
|
|
|
|
static const short _ragel_url_email_index_offsets[] = { |
|
11985
|
|
|
|
|
|
|
0, 0, 11, 21, 29, 38, 45, 51, |
|
11986
|
|
|
|
|
|
|
56, 62, 66, 72, 84, 106, 113, 119, |
|
11987
|
|
|
|
|
|
|
126, 132, 139, 145, 151, 159, 166, 172, |
|
11988
|
|
|
|
|
|
|
178, 186, 193, 199, 205, 213, 220, 232, |
|
11989
|
|
|
|
|
|
|
243, 255, 265, 277, 290, 302, 315, 327, |
|
11990
|
|
|
|
|
|
|
340, 352, 364, 378, 391, 403, 415, 429, |
|
11991
|
|
|
|
|
|
|
442, 454, 466, 480, 493, 505, 517, 529, |
|
11992
|
|
|
|
|
|
|
537, 539, 550, 559, 563, 575, 597, 606, |
|
11993
|
|
|
|
|
|
|
615, 624, 635, 645, 659, 669, 680, 694, |
|
11994
|
|
|
|
|
|
|
708, 722, 738 |
|
11995
|
|
|
|
|
|
|
}; |
|
11996
|
|
|
|
|
|
|
|
|
11997
|
|
|
|
|
|
|
static const char _ragel_url_email_indicies[] = { |
|
11998
|
|
|
|
|
|
|
0, 2, 3, 4, 0, 0, 0, 5, |
|
11999
|
|
|
|
|
|
|
6, 6, 1, 0, 7, 8, 0, 0, |
|
12000
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 9, 9, 9, |
|
12001
|
|
|
|
|
|
|
9, 9, 9, 9, 1, 9, 8, 9, |
|
12002
|
|
|
|
|
|
|
9, 9, 9, 9, 9, 1, 10, 11, |
|
12003
|
|
|
|
|
|
|
12, 13, 14, 14, 1, 15, 16, 14, |
|
12004
|
|
|
|
|
|
|
14, 14, 1, 15, 14, 14, 14, 1, |
|
12005
|
|
|
|
|
|
|
15, 17, 14, 14, 14, 1, 14, 18, |
|
12006
|
|
|
|
|
|
|
18, 1, 15, 17, 14, 19, 19, 1, |
|
12007
|
|
|
|
|
|
|
20, 21, 21, 20, 20, 20, 21, 20, |
|
12008
|
|
|
|
|
|
|
20, 21, 21, 1, 22, 22, 24, 22, |
|
12009
|
|
|
|
|
|
|
22, 23, 22, 23, 23, 23, 23, 23, |
|
12010
|
|
|
|
|
|
|
25, 26, 23, 23, 22, 23, 23, 23, |
|
12011
|
|
|
|
|
|
|
23, 1, 27, 28, 29, 30, 18, 18, |
|
12012
|
|
|
|
|
|
|
1, 15, 31, 14, 14, 14, 1, 32, |
|
12013
|
|
|
|
|
|
|
33, 34, 35, 18, 18, 1, 15, 36, |
|
12014
|
|
|
|
|
|
|
14, 14, 14, 1, 37, 38, 39, 40, |
|
12015
|
|
|
|
|
|
|
18, 18, 1, 15, 36, 35, 14, 14, |
|
12016
|
|
|
|
|
|
|
1, 15, 36, 32, 14, 14, 1, 15, |
|
12017
|
|
|
|
|
|
|
36, 41, 35, 32, 14, 14, 1, 15, |
|
12018
|
|
|
|
|
|
|
36, 32, 14, 14, 14, 1, 15, 31, |
|
12019
|
|
|
|
|
|
|
30, 14, 14, 1, 15, 31, 27, 14, |
|
12020
|
|
|
|
|
|
|
14, 1, 15, 31, 42, 30, 27, 14, |
|
12021
|
|
|
|
|
|
|
14, 1, 15, 31, 27, 14, 14, 14, |
|
12022
|
|
|
|
|
|
|
1, 15, 16, 13, 14, 14, 1, 15, |
|
12023
|
|
|
|
|
|
|
16, 10, 14, 14, 1, 15, 16, 43, |
|
12024
|
|
|
|
|
|
|
13, 10, 14, 14, 1, 15, 16, 10, |
|
12025
|
|
|
|
|
|
|
14, 14, 14, 1, 0, 44, 45, 7, |
|
12026
|
|
|
|
|
|
|
8, 0, 0, 0, 46, 46, 46, 1, |
|
12027
|
|
|
|
|
|
|
0, 44, 7, 8, 0, 0, 0, 46, |
|
12028
|
|
|
|
|
|
|
46, 46, 1, 0, 44, 47, 7, 8, |
|
12029
|
|
|
|
|
|
|
0, 0, 0, 46, 46, 46, 1, 0, |
|
12030
|
|
|
|
|
|
|
7, 8, 0, 0, 0, 46, 48, 48, |
|
12031
|
|
|
|
|
|
|
1, 0, 44, 47, 7, 8, 0, 0, |
|
12032
|
|
|
|
|
|
|
0, 46, 49, 49, 1, 0, 50, 51, |
|
12033
|
|
|
|
|
|
|
52, 7, 8, 0, 0, 0, 53, 48, |
|
12034
|
|
|
|
|
|
|
48, 1, 0, 44, 54, 7, 8, 0, |
|
12035
|
|
|
|
|
|
|
0, 0, 46, 46, 46, 1, 0, 55, |
|
12036
|
|
|
|
|
|
|
56, 57, 7, 8, 0, 0, 0, 58, |
|
12037
|
|
|
|
|
|
|
48, 48, 1, 0, 44, 59, 7, 8, |
|
12038
|
|
|
|
|
|
|
0, 0, 0, 46, 46, 46, 1, 0, |
|
12039
|
|
|
|
|
|
|
60, 61, 62, 7, 8, 0, 0, 0, |
|
12040
|
|
|
|
|
|
|
63, 48, 48, 1, 0, 44, 59, 7, |
|
12041
|
|
|
|
|
|
|
8, 0, 0, 0, 58, 46, 46, 1, |
|
12042
|
|
|
|
|
|
|
0, 44, 59, 7, 8, 0, 0, 0, |
|
12043
|
|
|
|
|
|
|
55, 46, 46, 1, 0, 44, 59, 64, |
|
12044
|
|
|
|
|
|
|
7, 8, 0, 0, 0, 58, 55, 46, |
|
12045
|
|
|
|
|
|
|
46, 1, 0, 44, 59, 7, 8, 0, |
|
12046
|
|
|
|
|
|
|
0, 0, 55, 46, 46, 46, 1, 0, |
|
12047
|
|
|
|
|
|
|
44, 54, 7, 8, 0, 0, 0, 53, |
|
12048
|
|
|
|
|
|
|
46, 46, 1, 0, 44, 54, 7, 8, |
|
12049
|
|
|
|
|
|
|
0, 0, 0, 50, 46, 46, 1, 0, |
|
12050
|
|
|
|
|
|
|
44, 54, 65, 7, 8, 0, 0, 0, |
|
12051
|
|
|
|
|
|
|
53, 50, 46, 46, 1, 0, 44, 54, |
|
12052
|
|
|
|
|
|
|
7, 8, 0, 0, 0, 50, 46, 46, |
|
12053
|
|
|
|
|
|
|
46, 1, 0, 44, 45, 7, 8, 0, |
|
12054
|
|
|
|
|
|
|
0, 0, 5, 46, 46, 1, 0, 44, |
|
12055
|
|
|
|
|
|
|
45, 7, 8, 0, 0, 0, 2, 46, |
|
12056
|
|
|
|
|
|
|
46, 1, 0, 44, 45, 66, 7, 8, |
|
12057
|
|
|
|
|
|
|
0, 0, 0, 5, 2, 46, 46, 1, |
|
12058
|
|
|
|
|
|
|
0, 44, 45, 7, 8, 0, 0, 0, |
|
12059
|
|
|
|
|
|
|
2, 46, 46, 46, 1, 0, 44, 47, |
|
12060
|
|
|
|
|
|
|
7, 8, 0, 0, 0, 46, 67, 67, |
|
12061
|
|
|
|
|
|
|
1, 0, 44, 47, 7, 8, 0, 0, |
|
12062
|
|
|
|
|
|
|
0, 46, 68, 68, 1, 0, 44, 47, |
|
12063
|
|
|
|
|
|
|
69, 8, 0, 0, 0, 46, 68, 68, |
|
12064
|
|
|
|
|
|
|
1, 9, 70, 9, 9, 9, 9, 9, |
|
12065
|
|
|
|
|
|
|
1, 71, 1, 0, 2, 3, 4, 0, |
|
12066
|
|
|
|
|
|
|
0, 0, 5, 46, 46, 1, 15, 17, |
|
12067
|
|
|
|
|
|
|
72, 21, 23, 14, 19, 19, 1, 21, |
|
12068
|
|
|
|
|
|
|
23, 72, 1, 20, 21, 21, 20, 20, |
|
12069
|
|
|
|
|
|
|
20, 21, 20, 20, 21, 21, 1, 22, |
|
12070
|
|
|
|
|
|
|
22, 24, 22, 22, 23, 22, 23, 23, |
|
12071
|
|
|
|
|
|
|
23, 23, 23, 25, 26, 23, 23, 22, |
|
12072
|
|
|
|
|
|
|
23, 23, 23, 23, 1, 15, 17, 72, |
|
12073
|
|
|
|
|
|
|
21, 23, 14, 14, 14, 1, 15, 17, |
|
12074
|
|
|
|
|
|
|
72, 21, 23, 40, 14, 14, 1, 15, |
|
12075
|
|
|
|
|
|
|
17, 72, 21, 23, 37, 14, 14, 1, |
|
12076
|
|
|
|
|
|
|
15, 17, 73, 72, 21, 23, 40, 37, |
|
12077
|
|
|
|
|
|
|
14, 14, 1, 15, 17, 72, 21, 23, |
|
12078
|
|
|
|
|
|
|
37, 14, 14, 14, 1, 0, 44, 47, |
|
12079
|
|
|
|
|
|
|
74, 8, 0, 21, 23, 0, 0, 46, |
|
12080
|
|
|
|
|
|
|
49, 49, 1, 9, 9, 21, 23, 9, |
|
12081
|
|
|
|
|
|
|
9, 75, 9, 9, 1, 9, 8, 9, |
|
12082
|
|
|
|
|
|
|
21, 23, 9, 9, 75, 9, 9, 1, |
|
12083
|
|
|
|
|
|
|
0, 44, 47, 74, 8, 0, 21, 23, |
|
12084
|
|
|
|
|
|
|
0, 0, 46, 46, 46, 1, 0, 44, |
|
12085
|
|
|
|
|
|
|
47, 74, 8, 0, 21, 23, 0, 0, |
|
12086
|
|
|
|
|
|
|
63, 46, 46, 1, 0, 44, 47, 74, |
|
12087
|
|
|
|
|
|
|
8, 0, 21, 23, 0, 0, 60, 46, |
|
12088
|
|
|
|
|
|
|
46, 1, 0, 44, 47, 76, 74, 8, |
|
12089
|
|
|
|
|
|
|
0, 21, 23, 0, 0, 63, 60, 46, |
|
12090
|
|
|
|
|
|
|
46, 1, 0, 44, 47, 74, 8, 0, |
|
12091
|
|
|
|
|
|
|
21, 23, 0, 0, 60, 46, 46, 46, |
|
12092
|
|
|
|
|
|
|
1, 0 |
|
12093
|
|
|
|
|
|
|
}; |
|
12094
|
|
|
|
|
|
|
|
|
12095
|
|
|
|
|
|
|
static const char _ragel_url_email_trans_targs[] = { |
|
12096
|
|
|
|
|
|
|
2, 0, 30, 48, 50, 49, 52, 3, |
|
12097
|
|
|
|
|
|
|
5, 4, 6, 26, 28, 27, 8, 7, |
|
12098
|
|
|
|
|
|
|
13, 9, 10, 58, 11, 60, 12, 61, |
|
12099
|
|
|
|
|
|
|
61, 12, 61, 14, 22, 24, 23, 15, |
|
12100
|
|
|
|
|
|
|
16, 18, 20, 19, 17, 62, 63, 65, |
|
12101
|
|
|
|
|
|
|
64, 21, 25, 29, 31, 35, 32, 33, |
|
12102
|
|
|
|
|
|
|
34, 67, 36, 44, 46, 45, 37, 38, |
|
12103
|
|
|
|
|
|
|
40, 42, 41, 39, 70, 71, 73, 72, |
|
12104
|
|
|
|
|
|
|
43, 47, 51, 53, 54, 55, 56, 57, |
|
12105
|
|
|
|
|
|
|
59, 66, 68, 69, 74 |
|
12106
|
|
|
|
|
|
|
}; |
|
12107
|
|
|
|
|
|
|
|
|
12108
|
|
|
|
|
|
|
static const char _ragel_url_email_trans_actions[] = { |
|
12109
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
12110
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
12111
|
|
|
|
|
|
|
0, 0, 0, 1, 0, 1, 0, 1, |
|
12112
|
|
|
|
|
|
|
2, 3, 4, 0, 0, 0, 0, 0, |
|
12113
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 1, 1, 1, |
|
12114
|
|
|
|
|
|
|
1, 0, 0, 0, 0, 0, 0, 0, |
|
12115
|
|
|
|
|
|
|
0, 1, 0, 0, 0, 0, 0, 0, |
|
12116
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 1, 1, 1, |
|
12117
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
12118
|
|
|
|
|
|
|
1, 1, 1, 1, 1 |
|
12119
|
|
|
|
|
|
|
}; |
|
12120
|
|
|
|
|
|
|
|
|
12121
|
|
|
|
|
|
|
static const int ragel_url_email_start = 1; |
|
12122
|
|
|
|
|
|
|
|
|
12123
|
3
|
|
|
|
|
|
vector ragel_tokenizer::ragel_map; |
|
12124
|
|
|
|
|
|
|
atomic_flag ragel_tokenizer::ragel_map_flag = ATOMIC_FLAG_INIT; |
|
12125
|
|
|
|
|
|
|
|
|
12126
|
6
|
|
|
|
|
|
ragel_tokenizer::ragel_tokenizer(unsigned url_email_tokenizer) : unicode_tokenizer(url_email_tokenizer) { |
|
12127
|
3
|
50
|
|
|
|
|
initialize_ragel_map(); |
|
12128
|
3
|
|
|
|
|
|
} |
|
12129
|
|
|
|
|
|
|
|
|
12130
|
14
|
|
|
|
|
|
void ragel_tokenizer::initialize_ragel_map() { |
|
12131
|
7
|
50
|
|
|
|
|
while (ragel_map_flag.test_and_set()) {} |
|
12132
|
7
|
100
|
|
|
|
|
if (ragel_map.empty()) { |
|
12133
|
258
|
100
|
|
|
|
|
for (uint8_t ascii = 0; ascii < 128; ascii++) |
|
12134
|
256
|
|
|
|
|
|
ragel_map.push_back(ascii); |
|
12135
|
|
|
|
|
|
|
|
|
12136
|
2
|
|
|
|
|
|
ragel_map_add(U'\u2026', 160); // horizontal ellipsis (TRIPLE DOT) |
|
12137
|
2
|
|
|
|
|
|
ragel_map_add(U'\u2019', 161); // right single quotation mark |
|
12138
|
2
|
|
|
|
|
|
ragel_map_add(U'\u2018', 162); // left single quotation mark |
|
12139
|
2
|
|
|
|
|
|
ragel_map_add(U'\u2010', 163); // hyphen |
|
12140
|
|
|
|
|
|
|
} |
|
12141
|
|
|
|
|
|
|
ragel_map_flag.clear(); |
|
12142
|
7
|
|
|
|
|
|
} |
|
12143
|
|
|
|
|
|
|
|
|
12144
|
8
|
|
|
|
|
|
void ragel_tokenizer::ragel_map_add(char32_t chr, uint8_t mapping) { |
|
12145
|
8
|
100
|
|
|
|
|
if (chr >= ragel_map.size()) |
|
12146
|
2
|
|
|
|
|
|
ragel_map.resize(chr + 1, 128); |
|
12147
|
8
|
|
|
|
|
|
ragel_map[chr] = mapping; |
|
12148
|
8
|
|
|
|
|
|
} |
|
12149
|
|
|
|
|
|
|
|
|
12150
|
10
|
|
|
|
|
|
bool ragel_tokenizer::ragel_url_email(unsigned version, const vector& chars, size_t& current, vector& tokens) { |
|
12151
|
|
|
|
|
|
|
int cs; |
|
12152
|
|
|
|
|
|
|
|
|
12153
|
10
|
|
|
|
|
|
size_t start = current, end = current, parens = 0; |
|
12154
|
|
|
|
|
|
|
|
|
12155
|
|
|
|
|
|
|
{ |
|
12156
|
|
|
|
|
|
|
cs = ragel_url_email_start; |
|
12157
|
|
|
|
|
|
|
} |
|
12158
|
|
|
|
|
|
|
|
|
12159
|
|
|
|
|
|
|
{ |
|
12160
|
|
|
|
|
|
|
int _klen; |
|
12161
|
|
|
|
|
|
|
const short *_keys; |
|
12162
|
|
|
|
|
|
|
int _trans; |
|
12163
|
|
|
|
|
|
|
short _widec; |
|
12164
|
|
|
|
|
|
|
|
|
12165
|
10
|
50
|
|
|
|
|
if ( ( current) == ( (chars.size() - 1)) ) |
|
12166
|
|
|
|
|
|
|
goto _test_eof; |
|
12167
|
|
|
|
|
|
|
if ( cs == 0 ) |
|
12168
|
|
|
|
|
|
|
goto _out; |
|
12169
|
|
|
|
|
|
|
_resume: |
|
12170
|
44
|
|
|
|
|
|
_widec = ( ragel_char(chars[current])); |
|
12171
|
22
|
|
|
|
|
|
_klen = _ragel_url_email_cond_lengths[cs]; |
|
12172
|
22
|
|
|
|
|
|
_keys = _ragel_url_email_cond_keys + (_ragel_url_email_cond_offsets[cs]*2); |
|
12173
|
22
|
50
|
|
|
|
|
if ( _klen > 0 ) { |
|
12174
|
|
|
|
|
|
|
const short *_lower = _keys; |
|
12175
|
|
|
|
|
|
|
const short *_mid; |
|
12176
|
0
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
|
12177
|
|
|
|
|
|
|
while (1) { |
|
12178
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
12179
|
|
|
|
|
|
|
break; |
|
12180
|
|
|
|
|
|
|
|
|
12181
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
|
12182
|
0
|
0
|
|
|
|
|
if ( _widec < _mid[0] ) |
|
12183
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
|
12184
|
0
|
0
|
|
|
|
|
else if ( _widec > _mid[1] ) |
|
12185
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
|
12186
|
|
|
|
|
|
|
else { |
|
12187
|
0
|
|
|
|
|
|
switch ( _ragel_url_email_cond_spaces[_ragel_url_email_cond_offsets[cs] + ((_mid - _keys)>>1)] ) { |
|
12188
|
|
|
|
|
|
|
case 0: { |
|
12189
|
0
|
|
|
|
|
|
_widec = (short)(256u + (( ragel_char(chars[current])) - 0u)); |
|
12190
|
0
|
0
|
|
|
|
|
if ( |
|
12191
|
0
|
|
|
|
|
|
version >= 2 ) _widec += 256; |
|
12192
|
|
|
|
|
|
|
break; |
|
12193
|
|
|
|
|
|
|
} |
|
12194
|
|
|
|
|
|
|
case 1: { |
|
12195
|
0
|
|
|
|
|
|
_widec = (short)(768u + (( ragel_char(chars[current])) - 0u)); |
|
12196
|
0
|
0
|
|
|
|
|
if ( |
|
12197
|
0
|
|
|
|
|
|
parens ) _widec += 256; |
|
12198
|
|
|
|
|
|
|
break; |
|
12199
|
|
|
|
|
|
|
} |
|
12200
|
|
|
|
|
|
|
} |
|
12201
|
|
|
|
|
|
|
break; |
|
12202
|
|
|
|
|
|
|
} |
|
12203
|
|
|
|
|
|
|
} |
|
12204
|
|
|
|
|
|
|
} |
|
12205
|
|
|
|
|
|
|
|
|
12206
|
22
|
|
|
|
|
|
_keys = _ragel_url_email_trans_keys + _ragel_url_email_key_offsets[cs]; |
|
12207
|
22
|
|
|
|
|
|
_trans = _ragel_url_email_index_offsets[cs]; |
|
12208
|
|
|
|
|
|
|
|
|
12209
|
22
|
|
|
|
|
|
_klen = _ragel_url_email_single_lengths[cs]; |
|
12210
|
22
|
50
|
|
|
|
|
if ( _klen > 0 ) { |
|
12211
|
|
|
|
|
|
|
const short *_lower = _keys; |
|
12212
|
|
|
|
|
|
|
const short *_mid; |
|
12213
|
85
|
|
|
|
|
|
const short *_upper = _keys + _klen - 1; |
|
12214
|
|
|
|
|
|
|
while (1) { |
|
12215
|
85
|
100
|
|
|
|
|
if ( _upper < _lower ) |
|
12216
|
|
|
|
|
|
|
break; |
|
12217
|
|
|
|
|
|
|
|
|
12218
|
63
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
|
12219
|
63
|
100
|
|
|
|
|
if ( _widec < *_mid ) |
|
12220
|
12
|
|
|
|
|
|
_upper = _mid - 1; |
|
12221
|
51
|
50
|
|
|
|
|
else if ( _widec > *_mid ) |
|
12222
|
51
|
|
|
|
|
|
_lower = _mid + 1; |
|
12223
|
|
|
|
|
|
|
else { |
|
12224
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
|
12225
|
0
|
|
|
|
|
|
goto _match; |
|
12226
|
|
|
|
|
|
|
} |
|
12227
|
|
|
|
|
|
|
} |
|
12228
|
22
|
|
|
|
|
|
_keys += _klen; |
|
12229
|
22
|
|
|
|
|
|
_trans += _klen; |
|
12230
|
|
|
|
|
|
|
} |
|
12231
|
|
|
|
|
|
|
|
|
12232
|
22
|
|
|
|
|
|
_klen = _ragel_url_email_range_lengths[cs]; |
|
12233
|
22
|
50
|
|
|
|
|
if ( _klen > 0 ) { |
|
12234
|
|
|
|
|
|
|
const short *_lower = _keys; |
|
12235
|
|
|
|
|
|
|
const short *_mid; |
|
12236
|
69
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
|
12237
|
|
|
|
|
|
|
while (1) { |
|
12238
|
69
|
100
|
|
|
|
|
if ( _upper < _lower ) |
|
12239
|
|
|
|
|
|
|
break; |
|
12240
|
|
|
|
|
|
|
|
|
12241
|
61
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
|
12242
|
61
|
100
|
|
|
|
|
if ( _widec < _mid[0] ) |
|
12243
|
8
|
|
|
|
|
|
_upper = _mid - 2; |
|
12244
|
53
|
100
|
|
|
|
|
else if ( _widec > _mid[1] ) |
|
12245
|
39
|
|
|
|
|
|
_lower = _mid + 2; |
|
12246
|
|
|
|
|
|
|
else { |
|
12247
|
14
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
|
12248
|
14
|
|
|
|
|
|
goto _match; |
|
12249
|
|
|
|
|
|
|
} |
|
12250
|
|
|
|
|
|
|
} |
|
12251
|
8
|
|
|
|
|
|
_trans += _klen; |
|
12252
|
|
|
|
|
|
|
} |
|
12253
|
|
|
|
|
|
|
|
|
12254
|
|
|
|
|
|
|
_match: |
|
12255
|
22
|
|
|
|
|
|
_trans = _ragel_url_email_indicies[_trans]; |
|
12256
|
22
|
|
|
|
|
|
cs = _ragel_url_email_trans_targs[_trans]; |
|
12257
|
|
|
|
|
|
|
|
|
12258
|
22
|
50
|
|
|
|
|
if ( _ragel_url_email_trans_actions[_trans] == 0 ) |
|
12259
|
|
|
|
|
|
|
goto _again; |
|
12260
|
|
|
|
|
|
|
|
|
12261
|
0
|
|
|
|
|
|
switch ( _ragel_url_email_trans_actions[_trans] ) { |
|
12262
|
|
|
|
|
|
|
case 3: |
|
12263
|
0
|
|
|
|
|
|
{parens-=!!parens;} |
|
12264
|
0
|
|
|
|
|
|
break; |
|
12265
|
|
|
|
|
|
|
case 1: |
|
12266
|
0
|
|
|
|
|
|
{ end = current + 1; } |
|
12267
|
0
|
|
|
|
|
|
break; |
|
12268
|
|
|
|
|
|
|
case 2: |
|
12269
|
0
|
|
|
|
|
|
{parens++;} |
|
12270
|
0
|
|
|
|
|
|
{ end = current + 1; } |
|
12271
|
0
|
|
|
|
|
|
break; |
|
12272
|
|
|
|
|
|
|
case 4: |
|
12273
|
0
|
|
|
|
|
|
{parens-=!!parens;} |
|
12274
|
0
|
|
|
|
|
|
{ end = current + 1; } |
|
12275
|
0
|
|
|
|
|
|
break; |
|
12276
|
|
|
|
|
|
|
} |
|
12277
|
|
|
|
|
|
|
|
|
12278
|
|
|
|
|
|
|
_again: |
|
12279
|
22
|
100
|
|
|
|
|
if ( cs == 0 ) |
|
12280
|
|
|
|
|
|
|
goto _out; |
|
12281
|
14
|
100
|
|
|
|
|
if ( ++( current) != ( (chars.size() - 1)) ) |
|
12282
|
|
|
|
|
|
|
goto _resume; |
|
12283
|
|
|
|
|
|
|
_test_eof: {} |
|
12284
|
|
|
|
|
|
|
_out: {} |
|
12285
|
|
|
|
|
|
|
} |
|
12286
|
|
|
|
|
|
|
|
|
12287
|
10
|
50
|
|
|
|
|
if (end > start) { |
|
12288
|
0
|
|
|
|
|
|
tokens.emplace_back(start, end - start); |
|
12289
|
0
|
|
|
|
|
|
current = end; |
|
12290
|
0
|
|
|
|
|
|
return true; |
|
12291
|
|
|
|
|
|
|
} else { |
|
12292
|
10
|
|
|
|
|
|
current = start; |
|
12293
|
10
|
|
|
|
|
|
return false; |
|
12294
|
|
|
|
|
|
|
} |
|
12295
|
|
|
|
|
|
|
} |
|
12296
|
|
|
|
|
|
|
|
|
12297
|
|
|
|
|
|
|
} // namespace morphodita |
|
12298
|
|
|
|
|
|
|
|
|
12299
|
|
|
|
|
|
|
///////// |
|
12300
|
|
|
|
|
|
|
// File: morphodita/tokenizer/vertical_tokenizer.h |
|
12301
|
|
|
|
|
|
|
///////// |
|
12302
|
|
|
|
|
|
|
|
|
12303
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
12304
|
|
|
|
|
|
|
// |
|
12305
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
12306
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
12307
|
|
|
|
|
|
|
// |
|
12308
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
12309
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
12310
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
12311
|
|
|
|
|
|
|
|
|
12312
|
|
|
|
|
|
|
namespace morphodita { |
|
12313
|
|
|
|
|
|
|
|
|
12314
|
2
|
|
|
|
|
|
class vertical_tokenizer : public unicode_tokenizer { |
|
12315
|
|
|
|
|
|
|
public: |
|
12316
|
1
|
50
|
|
|
|
|
vertical_tokenizer() : unicode_tokenizer(0) {} |
|
12317
|
|
|
|
|
|
|
|
|
12318
|
|
|
|
|
|
|
virtual bool next_sentence(vector& tokens) override; |
|
12319
|
|
|
|
|
|
|
}; |
|
12320
|
|
|
|
|
|
|
|
|
12321
|
|
|
|
|
|
|
} // namespace morphodita |
|
12322
|
|
|
|
|
|
|
|
|
12323
|
|
|
|
|
|
|
///////// |
|
12324
|
|
|
|
|
|
|
// File: morphodita/tokenizer/tokenizer.cpp |
|
12325
|
|
|
|
|
|
|
///////// |
|
12326
|
|
|
|
|
|
|
|
|
12327
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
12328
|
|
|
|
|
|
|
// |
|
12329
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
12330
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
12331
|
|
|
|
|
|
|
// |
|
12332
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
12333
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
12334
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
12335
|
|
|
|
|
|
|
|
|
12336
|
|
|
|
|
|
|
namespace morphodita { |
|
12337
|
|
|
|
|
|
|
|
|
12338
|
1
|
|
|
|
|
|
tokenizer* tokenizer::new_vertical_tokenizer() { |
|
12339
|
2
|
|
|
|
|
|
return new vertical_tokenizer(); |
|
12340
|
|
|
|
|
|
|
} |
|
12341
|
|
|
|
|
|
|
|
|
12342
|
0
|
|
|
|
|
|
tokenizer* tokenizer::new_czech_tokenizer() { |
|
12343
|
0
|
|
|
|
|
|
return new czech_tokenizer(czech_tokenizer::CZECH, czech_tokenizer::LATEST); |
|
12344
|
|
|
|
|
|
|
} |
|
12345
|
|
|
|
|
|
|
|
|
12346
|
0
|
|
|
|
|
|
tokenizer* tokenizer::new_english_tokenizer() { |
|
12347
|
0
|
|
|
|
|
|
return new english_tokenizer(english_tokenizer::LATEST); |
|
12348
|
|
|
|
|
|
|
} |
|
12349
|
|
|
|
|
|
|
|
|
12350
|
3
|
|
|
|
|
|
tokenizer* tokenizer::new_generic_tokenizer() { |
|
12351
|
6
|
|
|
|
|
|
return new generic_tokenizer(generic_tokenizer::LATEST); |
|
12352
|
|
|
|
|
|
|
} |
|
12353
|
|
|
|
|
|
|
|
|
12354
|
|
|
|
|
|
|
} // namespace morphodita |
|
12355
|
|
|
|
|
|
|
|
|
12356
|
|
|
|
|
|
|
///////// |
|
12357
|
|
|
|
|
|
|
// File: morphodita/tokenizer/unicode_tokenizer.cpp |
|
12358
|
|
|
|
|
|
|
///////// |
|
12359
|
|
|
|
|
|
|
|
|
12360
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
12361
|
|
|
|
|
|
|
// |
|
12362
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
12363
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
12364
|
|
|
|
|
|
|
// |
|
12365
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
12366
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
12367
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
12368
|
|
|
|
|
|
|
|
|
12369
|
|
|
|
|
|
|
namespace morphodita { |
|
12370
|
|
|
|
|
|
|
|
|
12371
|
4
|
|
|
|
|
|
unicode_tokenizer::unicode_tokenizer(unsigned url_email_tokenizer) : url_email_tokenizer(url_email_tokenizer) { |
|
12372
|
4
|
50
|
|
|
|
|
ragel_tokenizer::initialize_ragel_map(); |
|
12373
|
|
|
|
|
|
|
|
|
12374
|
4
|
50
|
|
|
|
|
set_text(string_piece(nullptr, 0)); |
|
12375
|
4
|
|
|
|
|
|
} |
|
12376
|
|
|
|
|
|
|
|
|
12377
|
7
|
|
|
|
|
|
void unicode_tokenizer::set_text(string_piece text, bool make_copy /*= false*/) { |
|
12378
|
|
|
|
|
|
|
using namespace unilib; |
|
12379
|
|
|
|
|
|
|
|
|
12380
|
7
|
100
|
|
|
|
|
if (make_copy && text.str) { |
|
|
|
50
|
|
|
|
|
|
|
12381
|
3
|
|
|
|
|
|
text_buffer.assign(text.str, text.len); |
|
12382
|
3
|
|
|
|
|
|
text.str = text_buffer.c_str(); |
|
12383
|
|
|
|
|
|
|
} |
|
12384
|
7
|
|
|
|
|
|
current = 0; |
|
12385
|
|
|
|
|
|
|
|
|
12386
|
|
|
|
|
|
|
chars.clear(); |
|
12387
|
152
|
100
|
|
|
|
|
for (const char* curr_str = text.str; text.len; curr_str = text.str) |
|
12388
|
145
|
|
|
|
|
|
chars.emplace_back(utf8::decode(text.str, text.len), curr_str); |
|
12389
|
7
|
|
|
|
|
|
chars.emplace_back(0, text.str); |
|
12390
|
7
|
|
|
|
|
|
} |
|
12391
|
|
|
|
|
|
|
|
|
12392
|
7
|
|
|
|
|
|
bool unicode_tokenizer::next_sentence(vector* forms, vector* tokens_ptr) { |
|
12393
|
7
|
50
|
|
|
|
|
vector& tokens = tokens_ptr ? *tokens_ptr : tokens_buffer; |
|
12394
|
|
|
|
|
|
|
tokens.clear(); |
|
12395
|
7
|
50
|
|
|
|
|
if (forms) forms->clear(); |
|
12396
|
7
|
100
|
|
|
|
|
if (current >= chars.size() - 1) return false; |
|
12397
|
|
|
|
|
|
|
|
|
12398
|
4
|
|
|
|
|
|
bool result = next_sentence(tokens); |
|
12399
|
4
|
50
|
|
|
|
|
if (forms) |
|
12400
|
37
|
100
|
|
|
|
|
for (auto&& token : tokens) |
|
12401
|
33
|
|
|
|
|
|
forms->emplace_back(chars[token.start].str, chars[token.start + token.length].str - chars[token.start].str); |
|
12402
|
|
|
|
|
|
|
|
|
12403
|
|
|
|
|
|
|
return result; |
|
12404
|
|
|
|
|
|
|
} |
|
12405
|
|
|
|
|
|
|
|
|
12406
|
12
|
|
|
|
|
|
bool unicode_tokenizer::tokenize_url_email(vector& tokens) { |
|
12407
|
12
|
100
|
|
|
|
|
if (current >= chars.size() - 1) return false; |
|
12408
|
|
|
|
|
|
|
|
|
12409
|
10
|
50
|
|
|
|
|
return url_email_tokenizer ? ragel_tokenizer::ragel_url_email(url_email_tokenizer, chars, current, tokens) : false; |
|
12410
|
|
|
|
|
|
|
} |
|
12411
|
|
|
|
|
|
|
|
|
12412
|
10
|
|
|
|
|
|
bool unicode_tokenizer::emergency_sentence_split(const vector& tokens) { |
|
12413
|
|
|
|
|
|
|
using namespace unilib; |
|
12414
|
|
|
|
|
|
|
|
|
12415
|
|
|
|
|
|
|
// Implement emergency splitting for large sentences |
|
12416
|
10
|
50
|
|
|
|
|
return tokens.size() >= 500 || |
|
12417
|
20
|
50
|
|
|
|
|
(tokens.size() >= 450 && chars[tokens.back().start].cat & unicode::P) || |
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
12418
|
0
|
0
|
|
|
|
|
(tokens.size() >= 400 && chars[tokens.back().start].cat & unicode::Po); |
|
12419
|
|
|
|
|
|
|
} |
|
12420
|
|
|
|
|
|
|
|
|
12421
|
0
|
|
|
|
|
|
bool unicode_tokenizer::is_eos(const vector& tokens, char32_t eos_chr, const unordered_set* abbreviations) { |
|
12422
|
|
|
|
|
|
|
using namespace unilib; |
|
12423
|
|
|
|
|
|
|
|
|
12424
|
0
|
0
|
|
|
|
|
if (eos_chr == '.' && !tokens.empty()) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
12425
|
|
|
|
|
|
|
// Ignore one-letter capitals before dot |
|
12426
|
0
|
0
|
|
|
|
|
if (tokens.back().length == 1 && chars[tokens.back().start].cat & unicode::Lut) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
12427
|
|
|
|
|
|
|
return false; |
|
12428
|
|
|
|
|
|
|
|
|
12429
|
|
|
|
|
|
|
// Ignore specified abbreviations |
|
12430
|
0
|
0
|
|
|
|
|
if (abbreviations) { |
|
12431
|
|
|
|
|
|
|
eos_buffer.clear(); |
|
12432
|
0
|
0
|
|
|
|
|
for (size_t i = 0; i < tokens.back().length; i++) |
|
12433
|
0
|
|
|
|
|
|
utf8::append(eos_buffer, unicode::lowercase(chars[tokens.back().start + i].chr)); |
|
12434
|
0
|
0
|
|
|
|
|
if (abbreviations->count(eos_buffer)) |
|
12435
|
|
|
|
|
|
|
return false; |
|
12436
|
|
|
|
|
|
|
} |
|
12437
|
|
|
|
|
|
|
} |
|
12438
|
|
|
|
|
|
|
return true; |
|
12439
|
|
|
|
|
|
|
} |
|
12440
|
|
|
|
|
|
|
|
|
12441
|
|
|
|
|
|
|
} // namespace morphodita |
|
12442
|
|
|
|
|
|
|
|
|
12443
|
|
|
|
|
|
|
///////// |
|
12444
|
|
|
|
|
|
|
// File: morphodita/tokenizer/vertical_tokenizer.cpp |
|
12445
|
|
|
|
|
|
|
///////// |
|
12446
|
|
|
|
|
|
|
|
|
12447
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
12448
|
|
|
|
|
|
|
// |
|
12449
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
12450
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
12451
|
|
|
|
|
|
|
// |
|
12452
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
12453
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
12454
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
12455
|
|
|
|
|
|
|
|
|
12456
|
|
|
|
|
|
|
namespace morphodita { |
|
12457
|
|
|
|
|
|
|
|
|
12458
|
2
|
|
|
|
|
|
bool vertical_tokenizer::next_sentence(vector& tokens) { |
|
12459
|
2
|
50
|
|
|
|
|
if (current >= chars.size() - 1) return false; |
|
12460
|
|
|
|
|
|
|
|
|
12461
|
26
|
|
|
|
|
|
while (true) { |
|
12462
|
28
|
|
|
|
|
|
size_t line_start = current; |
|
12463
|
118
|
100
|
|
|
|
|
while (current < chars.size() - 1 && chars[current].chr != '\r' && chars[current].chr != '\n') current++; |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
12464
|
|
|
|
|
|
|
|
|
12465
|
|
|
|
|
|
|
size_t line_end = current; |
|
12466
|
28
|
100
|
|
|
|
|
if (current < chars.size() - 1) { |
|
12467
|
26
|
|
|
|
|
|
current++; |
|
12468
|
52
|
50
|
|
|
|
|
if (current < chars.size() - 1 && |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
12469
|
26
|
0
|
|
|
|
|
((chars[current-1].chr == '\r' && chars[current].chr == '\n') || |
|
|
|
50
|
|
|
|
|
|
|
12470
|
26
|
50
|
|
|
|
|
(chars[current-1].chr == '\n' && chars[current].chr == '\r'))) |
|
12471
|
0
|
|
|
|
|
|
current++; |
|
12472
|
|
|
|
|
|
|
} |
|
12473
|
|
|
|
|
|
|
|
|
12474
|
28
|
100
|
|
|
|
|
if (line_start < line_end) |
|
12475
|
26
|
|
|
|
|
|
tokens.emplace_back(line_start, line_end - line_start); |
|
12476
|
|
|
|
|
|
|
else |
|
12477
|
|
|
|
|
|
|
break; |
|
12478
|
|
|
|
|
|
|
} |
|
12479
|
|
|
|
|
|
|
|
|
12480
|
2
|
|
|
|
|
|
return true; |
|
12481
|
|
|
|
|
|
|
} |
|
12482
|
|
|
|
|
|
|
|
|
12483
|
|
|
|
|
|
|
} // namespace morphodita |
|
12484
|
|
|
|
|
|
|
|
|
12485
|
|
|
|
|
|
|
///////// |
|
12486
|
|
|
|
|
|
|
// File: unilib/version.h |
|
12487
|
|
|
|
|
|
|
///////// |
|
12488
|
|
|
|
|
|
|
|
|
12489
|
|
|
|
|
|
|
// This file is part of UniLib . |
|
12490
|
|
|
|
|
|
|
// |
|
12491
|
|
|
|
|
|
|
// Copyright 2014 Institute of Formal and Applied Linguistics, Faculty of |
|
12492
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
12493
|
|
|
|
|
|
|
// |
|
12494
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
12495
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
12496
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
12497
|
|
|
|
|
|
|
// |
|
12498
|
|
|
|
|
|
|
// UniLib version: 3.3.0 |
|
12499
|
|
|
|
|
|
|
// Unicode version: 15.0.0 |
|
12500
|
|
|
|
|
|
|
|
|
12501
|
|
|
|
|
|
|
namespace unilib { |
|
12502
|
|
|
|
|
|
|
|
|
12503
|
0
|
|
|
|
|
|
struct version { |
|
12504
|
|
|
|
|
|
|
unsigned major; |
|
12505
|
|
|
|
|
|
|
unsigned minor; |
|
12506
|
|
|
|
|
|
|
unsigned patch; |
|
12507
|
|
|
|
|
|
|
std::string prerelease; |
|
12508
|
|
|
|
|
|
|
|
|
12509
|
|
|
|
|
|
|
// Returns current version. |
|
12510
|
|
|
|
|
|
|
static version current(); |
|
12511
|
|
|
|
|
|
|
}; |
|
12512
|
|
|
|
|
|
|
|
|
12513
|
|
|
|
|
|
|
} // namespace unilib |
|
12514
|
|
|
|
|
|
|
|
|
12515
|
|
|
|
|
|
|
///////// |
|
12516
|
|
|
|
|
|
|
// File: morphodita/version/version.h |
|
12517
|
|
|
|
|
|
|
///////// |
|
12518
|
|
|
|
|
|
|
|
|
12519
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
12520
|
|
|
|
|
|
|
// |
|
12521
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
12522
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
12523
|
|
|
|
|
|
|
// |
|
12524
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
12525
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
12526
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
12527
|
|
|
|
|
|
|
|
|
12528
|
|
|
|
|
|
|
namespace morphodita { |
|
12529
|
|
|
|
|
|
|
|
|
12530
|
0
|
|
|
|
|
|
class version { |
|
12531
|
|
|
|
|
|
|
public: |
|
12532
|
|
|
|
|
|
|
unsigned major; |
|
12533
|
|
|
|
|
|
|
unsigned minor; |
|
12534
|
|
|
|
|
|
|
unsigned patch; |
|
12535
|
|
|
|
|
|
|
string prerelease; |
|
12536
|
|
|
|
|
|
|
|
|
12537
|
|
|
|
|
|
|
// Returns current MorphoDiTa version. |
|
12538
|
|
|
|
|
|
|
static version current(); |
|
12539
|
|
|
|
|
|
|
|
|
12540
|
|
|
|
|
|
|
// Returns multi-line formated version and copyright string. |
|
12541
|
|
|
|
|
|
|
static string version_and_copyright(const string& other_libraries = string()); |
|
12542
|
|
|
|
|
|
|
}; |
|
12543
|
|
|
|
|
|
|
|
|
12544
|
|
|
|
|
|
|
} // namespace morphodita |
|
12545
|
|
|
|
|
|
|
|
|
12546
|
|
|
|
|
|
|
///////// |
|
12547
|
|
|
|
|
|
|
// File: morphodita/version/version.cpp |
|
12548
|
|
|
|
|
|
|
///////// |
|
12549
|
|
|
|
|
|
|
|
|
12550
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
|
12551
|
|
|
|
|
|
|
// |
|
12552
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
12553
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
12554
|
|
|
|
|
|
|
// |
|
12555
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
12556
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
12557
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
12558
|
|
|
|
|
|
|
|
|
12559
|
|
|
|
|
|
|
namespace morphodita { |
|
12560
|
|
|
|
|
|
|
|
|
12561
|
0
|
|
|
|
|
|
version version::current() { |
|
12562
|
0
|
0
|
|
|
|
|
return {1, 11, 1, ""}; |
|
|
|
0
|
|
|
|
|
|
|
12563
|
|
|
|
|
|
|
} |
|
12564
|
|
|
|
|
|
|
|
|
12565
|
|
|
|
|
|
|
// Returns multi-line formated version and copyright string. |
|
12566
|
0
|
|
|
|
|
|
string version::version_and_copyright(const string& other_libraries) { |
|
12567
|
0
|
|
|
|
|
|
ostringstream info; |
|
12568
|
|
|
|
|
|
|
|
|
12569
|
|
|
|
|
|
|
auto morphodita = version::current(); |
|
12570
|
|
|
|
|
|
|
auto unilib = unilib::version::current(); |
|
12571
|
|
|
|
|
|
|
|
|
12572
|
0
|
|
|
|
|
|
info << "MorphoDiTa version " << morphodita.major << '.' << morphodita.minor << '.' << morphodita.patch |
|
12573
|
0
|
0
|
|
|
|
|
<< (morphodita.prerelease.empty() ? "" : "-") << morphodita.prerelease |
|
|
|
0
|
|
|
|
|
|
|
12574
|
0
|
|
|
|
|
|
<< " (using UniLib " << unilib.major << '.' << unilib.minor << '.' << unilib.patch |
|
12575
|
0
|
0
|
|
|
|
|
<< (other_libraries.empty() ? "" : " and ") << other_libraries << ")\n" |
|
|
|
0
|
|
|
|
|
|
|
12576
|
|
|
|
|
|
|
"Copyright 2015 by Institute of Formal and Applied Linguistics, Faculty of\n" |
|
12577
|
0
|
0
|
|
|
|
|
"Mathematics and Physics, Charles University in Prague, Czech Republic."; |
|
12578
|
|
|
|
|
|
|
|
|
12579
|
0
|
|
|
|
|
|
return info.str(); |
|
12580
|
|
|
|
|
|
|
} |
|
12581
|
|
|
|
|
|
|
|
|
12582
|
|
|
|
|
|
|
} // namespace morphodita |
|
12583
|
|
|
|
|
|
|
|
|
12584
|
|
|
|
|
|
|
///////// |
|
12585
|
|
|
|
|
|
|
// File: bilou/bilou_entity.h |
|
12586
|
|
|
|
|
|
|
///////// |
|
12587
|
|
|
|
|
|
|
|
|
12588
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
12589
|
|
|
|
|
|
|
// |
|
12590
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
12591
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
12592
|
|
|
|
|
|
|
// |
|
12593
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
12594
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
12595
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
12596
|
|
|
|
|
|
|
|
|
12597
|
|
|
|
|
|
|
class bilou_entity { |
|
12598
|
|
|
|
|
|
|
public: |
|
12599
|
|
|
|
|
|
|
typedef entity_type value; |
|
12600
|
|
|
|
|
|
|
|
|
12601
|
|
|
|
|
|
|
enum :value { I, L, O, B_first, U_first, unknown = ~0U }; |
|
12602
|
|
|
|
|
|
|
static constexpr inline value B(entity_type entity) { return entity == entity_type_unknown ? unknown : B_first + 2 * entity; } |
|
12603
|
|
|
|
|
|
|
static constexpr inline value U(entity_type entity) { return entity == entity_type_unknown ? unknown : U_first + 2 * entity; } |
|
12604
|
|
|
|
|
|
|
static constexpr inline value total(entity_type entities) { return B_first + 2 * entities; } |
|
12605
|
|
|
|
|
|
|
|
|
12606
|
|
|
|
|
|
|
static inline bilou_type get_bilou(value bilou_entity) { |
|
12607
|
126
|
|
|
|
|
|
switch (bilou_entity) { |
|
12608
|
|
|
|
|
|
|
case I: return bilou_type_I; |
|
12609
|
|
|
|
|
|
|
case L: return bilou_type_L; |
|
12610
|
|
|
|
|
|
|
case O: return bilou_type_O; |
|
12611
|
84
|
100
|
|
|
|
|
default: return (bilou_entity - B_first) & 1 ? bilou_type_U : bilou_type_B; |
|
12612
|
|
|
|
|
|
|
} |
|
12613
|
|
|
|
|
|
|
} |
|
12614
|
|
|
|
|
|
|
static inline entity_type get_entity(value bilou_entity) { |
|
12615
|
70
|
100
|
|
|
|
|
switch (bilou_entity) { |
|
12616
|
|
|
|
|
|
|
case I: |
|
12617
|
|
|
|
|
|
|
case L: |
|
12618
|
|
|
|
|
|
|
case O: return entity_type_unknown; |
|
12619
|
28
|
|
|
|
|
|
default: return (bilou_entity - B_first) >> 1; |
|
12620
|
|
|
|
|
|
|
} |
|
12621
|
|
|
|
|
|
|
} |
|
12622
|
|
|
|
|
|
|
static inline value from_bilou_entity(bilou_type bilou, entity_type entity) { |
|
12623
|
|
|
|
|
|
|
switch (bilou) { |
|
12624
|
|
|
|
|
|
|
case bilou_type_I: return I; |
|
12625
|
|
|
|
|
|
|
case bilou_type_L: return L; |
|
12626
|
|
|
|
|
|
|
case bilou_type_O: return O; |
|
12627
|
|
|
|
|
|
|
case bilou_type_B: return B(entity); |
|
12628
|
|
|
|
|
|
|
default: return U(entity); |
|
12629
|
|
|
|
|
|
|
} |
|
12630
|
|
|
|
|
|
|
} |
|
12631
|
|
|
|
|
|
|
}; |
|
12632
|
|
|
|
|
|
|
|
|
12633
|
|
|
|
|
|
|
///////// |
|
12634
|
|
|
|
|
|
|
// File: ner/ner_ids.h |
|
12635
|
|
|
|
|
|
|
///////// |
|
12636
|
|
|
|
|
|
|
|
|
12637
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
12638
|
|
|
|
|
|
|
// |
|
12639
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
12640
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
12641
|
|
|
|
|
|
|
// |
|
12642
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
12643
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
12644
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
12645
|
|
|
|
|
|
|
|
|
12646
|
|
|
|
|
|
|
class ner_ids { |
|
12647
|
|
|
|
|
|
|
public: |
|
12648
|
|
|
|
|
|
|
enum ner_id { CZECH_NER = 0, ENGLISH_NER = 1, GENERIC_NER = 2 }; |
|
12649
|
|
|
|
|
|
|
|
|
12650
|
|
|
|
|
|
|
static bool parse(const string& str, ner_id& id) { |
|
12651
|
|
|
|
|
|
|
if (str == "czech") return id = CZECH_NER, true; |
|
12652
|
|
|
|
|
|
|
if (str == "english") return id = ENGLISH_NER, true; |
|
12653
|
|
|
|
|
|
|
if (str == "generic") return id = GENERIC_NER, true; |
|
12654
|
|
|
|
|
|
|
return false; |
|
12655
|
|
|
|
|
|
|
} |
|
12656
|
|
|
|
|
|
|
}; |
|
12657
|
|
|
|
|
|
|
|
|
12658
|
|
|
|
|
|
|
typedef ner_ids::ner_id ner_id; |
|
12659
|
|
|
|
|
|
|
|
|
12660
|
|
|
|
|
|
|
///////// |
|
12661
|
|
|
|
|
|
|
// File: ner/bilou_ner.h |
|
12662
|
|
|
|
|
|
|
///////// |
|
12663
|
|
|
|
|
|
|
|
|
12664
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
12665
|
|
|
|
|
|
|
// |
|
12666
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
12667
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
12668
|
|
|
|
|
|
|
// |
|
12669
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
12670
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
12671
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
12672
|
|
|
|
|
|
|
|
|
12673
|
0
|
|
|
|
|
|
class bilou_ner : public ner { |
|
12674
|
|
|
|
|
|
|
public: |
|
12675
|
|
|
|
|
|
|
bilou_ner(ner_id id); |
|
12676
|
|
|
|
|
|
|
|
|
12677
|
|
|
|
|
|
|
bool load(istream& is); |
|
12678
|
|
|
|
|
|
|
|
|
12679
|
|
|
|
|
|
|
virtual void recognize(const vector& forms, vector& entities) const override; |
|
12680
|
|
|
|
|
|
|
virtual tokenizer* new_tokenizer() const override; |
|
12681
|
|
|
|
|
|
|
|
|
12682
|
|
|
|
|
|
|
virtual void entity_types(vector& types) const override; |
|
12683
|
|
|
|
|
|
|
|
|
12684
|
|
|
|
|
|
|
virtual void gazetteers(vector& gazetteers, vector* gazetteer_types) const override; |
|
12685
|
|
|
|
|
|
|
private: |
|
12686
|
|
|
|
|
|
|
friend class bilou_ner_trainer; |
|
12687
|
|
|
|
|
|
|
|
|
12688
|
|
|
|
|
|
|
// Methods used by bylou_ner_trainer |
|
12689
|
|
|
|
|
|
|
static void fill_bilou_probabilities(const vector& outcomes, bilou_probabilities& prob); |
|
12690
|
|
|
|
|
|
|
static tokenizer* new_tokenizer(ner_id id); |
|
12691
|
|
|
|
|
|
|
|
|
12692
|
|
|
|
|
|
|
// Internal members of bilou_ner |
|
12693
|
|
|
|
|
|
|
ner_id id; |
|
12694
|
|
|
|
|
|
|
unique_ptr tagger; |
|
12695
|
|
|
|
|
|
|
entity_map named_entities; |
|
12696
|
|
|
|
|
|
|
feature_templates templates; |
|
12697
|
|
|
|
|
|
|
vector networks; |
|
12698
|
|
|
|
|
|
|
|
|
12699
|
0
|
|
|
|
|
|
struct cache { |
|
12700
|
|
|
|
|
|
|
ner_sentence sentence; |
|
12701
|
|
|
|
|
|
|
vector outcomes, network_buffer; |
|
12702
|
|
|
|
|
|
|
string string_buffer; |
|
12703
|
|
|
|
|
|
|
vector entities_buffer; |
|
12704
|
|
|
|
|
|
|
}; |
|
12705
|
|
|
|
|
|
|
mutable threadsafe_stack caches; |
|
12706
|
|
|
|
|
|
|
}; |
|
12707
|
|
|
|
|
|
|
|
|
12708
|
|
|
|
|
|
|
///////// |
|
12709
|
|
|
|
|
|
|
// File: tokenizer/morphodita_tokenizer_wrapper.h |
|
12710
|
|
|
|
|
|
|
///////// |
|
12711
|
|
|
|
|
|
|
|
|
12712
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
12713
|
|
|
|
|
|
|
// |
|
12714
|
|
|
|
|
|
|
// Copyright 2017 Institute of Formal and Applied Linguistics, Faculty of |
|
12715
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
12716
|
|
|
|
|
|
|
// |
|
12717
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
12718
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
12719
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
12720
|
|
|
|
|
|
|
|
|
12721
|
|
|
|
|
|
|
class morphodita_tokenizer_wrapper : public tokenizer { |
|
12722
|
|
|
|
|
|
|
public: |
|
12723
|
|
|
|
|
|
|
morphodita_tokenizer_wrapper(morphodita::tokenizer* morphodita_tokenizer) |
|
12724
|
4
|
|
|
|
|
|
: morphodita_tokenizer(morphodita_tokenizer) {} |
|
12725
|
8
|
|
|
|
|
|
virtual ~morphodita_tokenizer_wrapper() override {} |
|
12726
|
|
|
|
|
|
|
|
|
12727
|
|
|
|
|
|
|
virtual void set_text(string_piece text, bool make_copy = false) override; |
|
12728
|
|
|
|
|
|
|
virtual bool next_sentence(vector* forms, vector* tokens) override; |
|
12729
|
|
|
|
|
|
|
|
|
12730
|
|
|
|
|
|
|
private: |
|
12731
|
|
|
|
|
|
|
unique_ptr morphodita_tokenizer; |
|
12732
|
|
|
|
|
|
|
}; |
|
12733
|
|
|
|
|
|
|
|
|
12734
|
|
|
|
|
|
|
///////// |
|
12735
|
|
|
|
|
|
|
// File: ner/bilou_ner.cpp |
|
12736
|
|
|
|
|
|
|
///////// |
|
12737
|
|
|
|
|
|
|
|
|
12738
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
12739
|
|
|
|
|
|
|
// |
|
12740
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
12741
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
12742
|
|
|
|
|
|
|
// |
|
12743
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
12744
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
12745
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
12746
|
|
|
|
|
|
|
|
|
12747
|
0
|
|
|
|
|
|
bilou_ner::bilou_ner(ner_id id) : id(id) {} |
|
12748
|
|
|
|
|
|
|
|
|
12749
|
1
|
|
|
|
|
|
bool bilou_ner::load(istream& is) { |
|
12750
|
2
|
50
|
|
|
|
|
if (tagger.reset(tagger::load_instance(is)), !tagger) return false; |
|
12751
|
1
|
50
|
|
|
|
|
if (!named_entities.load(is)) return false; |
|
12752
|
|
|
|
|
|
|
|
|
12753
|
1
|
|
|
|
|
|
unique_ptr tokenizer(new_tokenizer()); |
|
12754
|
1
|
50
|
|
|
|
|
if (!templates.load(is, nlp_pipeline(tokenizer.get(), tagger.get()))) return false; |
|
|
|
50
|
|
|
|
|
|
|
12755
|
|
|
|
|
|
|
|
|
12756
|
1
|
50
|
|
|
|
|
int stages = is.get(); |
|
12757
|
1
|
50
|
|
|
|
|
if (stages == EOF) return false; |
|
12758
|
1
|
50
|
|
|
|
|
networks.resize(stages); |
|
12759
|
3
|
100
|
|
|
|
|
for (auto&& network : networks) |
|
12760
|
2
|
50
|
|
|
|
|
if (!network.load(is)) return false; |
|
|
|
50
|
|
|
|
|
|
|
12761
|
|
|
|
|
|
|
|
|
12762
|
|
|
|
|
|
|
return true; |
|
12763
|
|
|
|
|
|
|
} |
|
12764
|
|
|
|
|
|
|
|
|
12765
|
2
|
|
|
|
|
|
void bilou_ner::recognize(const vector& forms, vector& entities) const { |
|
12766
|
2
|
|
|
|
|
|
entities.clear(); |
|
12767
|
2
|
50
|
|
|
|
|
if (forms.empty() || !tagger || !named_entities.size() || !networks.size()) return; |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
12768
|
|
|
|
|
|
|
|
|
12769
|
|
|
|
|
|
|
// Acquire cache |
|
12770
|
2
|
|
|
|
|
|
cache* c = caches.pop(); |
|
12771
|
2
|
100
|
|
|
|
|
if (!c) c = new cache(); |
|
12772
|
2
|
|
|
|
|
|
auto& sentence = c->sentence; |
|
12773
|
|
|
|
|
|
|
|
|
12774
|
|
|
|
|
|
|
// Tag |
|
12775
|
2
|
|
|
|
|
|
tagger->tag(forms, sentence); |
|
12776
|
|
|
|
|
|
|
|
|
12777
|
2
|
50
|
|
|
|
|
if (sentence.size) { |
|
12778
|
|
|
|
|
|
|
sentence.clear_previous_stage(); |
|
12779
|
|
|
|
|
|
|
|
|
12780
|
|
|
|
|
|
|
// Perform required NER stages |
|
12781
|
6
|
100
|
|
|
|
|
for (auto&& network : networks) { |
|
12782
|
|
|
|
|
|
|
sentence.clear_features(); |
|
12783
|
|
|
|
|
|
|
sentence.clear_probabilities_local_filled(); |
|
12784
|
|
|
|
|
|
|
|
|
12785
|
|
|
|
|
|
|
// Compute per-sentence feature templates |
|
12786
|
4
|
|
|
|
|
|
templates.process_sentence(sentence, c->string_buffer); |
|
12787
|
|
|
|
|
|
|
|
|
12788
|
|
|
|
|
|
|
// Sequentially classify sentence words |
|
12789
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) { |
|
12790
|
14
|
50
|
|
|
|
|
if (!sentence.probabilities[i].local_filled) { |
|
12791
|
14
|
|
|
|
|
|
network.classify(sentence.features[i], c->outcomes, c->network_buffer); |
|
12792
|
14
|
|
|
|
|
|
fill_bilou_probabilities(c->outcomes, sentence.probabilities[i].local); |
|
12793
|
14
|
|
|
|
|
|
sentence.probabilities[i].local_filled = true; |
|
12794
|
|
|
|
|
|
|
} |
|
12795
|
|
|
|
|
|
|
|
|
12796
|
14
|
100
|
|
|
|
|
if (i == 0) { |
|
12797
|
4
|
|
|
|
|
|
sentence.probabilities[i].global.init(sentence.probabilities[i].local); |
|
12798
|
|
|
|
|
|
|
} else { |
|
12799
|
10
|
|
|
|
|
|
sentence.probabilities[i].global.update(sentence.probabilities[i].local, sentence.probabilities[i - 1].global); |
|
12800
|
|
|
|
|
|
|
} |
|
12801
|
|
|
|
|
|
|
} |
|
12802
|
|
|
|
|
|
|
|
|
12803
|
4
|
|
|
|
|
|
sentence.compute_best_decoding(); |
|
12804
|
4
|
|
|
|
|
|
sentence.fill_previous_stage(); |
|
12805
|
|
|
|
|
|
|
} |
|
12806
|
|
|
|
|
|
|
|
|
12807
|
|
|
|
|
|
|
// Store entities in the output array |
|
12808
|
9
|
100
|
|
|
|
|
for (unsigned i = 0; i < sentence.size; i++) |
|
12809
|
7
|
100
|
|
|
|
|
if (sentence.probabilities[i].global.best == bilou_type_U) { |
|
12810
|
3
|
|
|
|
|
|
entities.emplace_back(i, 1, named_entities.name(sentence.probabilities[i].global.bilou[bilou_type_U].entity)); |
|
12811
|
4
|
50
|
|
|
|
|
} else if (sentence.probabilities[i].global.best == bilou_type_B) { |
|
12812
|
0
|
|
|
|
|
|
unsigned start = i++; |
|
12813
|
0
|
0
|
|
|
|
|
while (i < sentence.size && sentence.probabilities[i].global.best != bilou_type_L) i++; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
12814
|
0
|
|
|
|
|
|
entities.emplace_back(start, i - start + (i < sentence.size), named_entities.name(sentence.probabilities[start].global.bilou[bilou_type_B].entity)); |
|
12815
|
|
|
|
|
|
|
} |
|
12816
|
|
|
|
|
|
|
|
|
12817
|
|
|
|
|
|
|
// Process the entities |
|
12818
|
2
|
|
|
|
|
|
templates.process_entities(sentence, entities, c->entities_buffer); |
|
12819
|
|
|
|
|
|
|
} |
|
12820
|
|
|
|
|
|
|
|
|
12821
|
2
|
|
|
|
|
|
caches.push(c); |
|
12822
|
|
|
|
|
|
|
} |
|
12823
|
|
|
|
|
|
|
|
|
12824
|
3
|
|
|
|
|
|
tokenizer* bilou_ner::new_tokenizer() const { |
|
12825
|
3
|
|
|
|
|
|
return new_tokenizer(id); |
|
12826
|
|
|
|
|
|
|
} |
|
12827
|
|
|
|
|
|
|
|
|
12828
|
0
|
|
|
|
|
|
void bilou_ner::entity_types(vector& types) const { |
|
12829
|
0
|
|
|
|
|
|
types.resize(named_entities.size()); |
|
12830
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < types.size(); i++) |
|
12831
|
|
|
|
|
|
|
types[i] = named_entities.name(i); |
|
12832
|
0
|
|
|
|
|
|
} |
|
12833
|
|
|
|
|
|
|
|
|
12834
|
0
|
|
|
|
|
|
void bilou_ner::gazetteers(vector& gazetteers, vector* gazetteer_types) const { |
|
12835
|
0
|
|
|
|
|
|
gazetteers.clear(); |
|
12836
|
0
|
0
|
|
|
|
|
if (gazetteer_types) gazetteer_types->clear(); |
|
12837
|
|
|
|
|
|
|
|
|
12838
|
|
|
|
|
|
|
templates.gazetteers(gazetteers, gazetteer_types); |
|
12839
|
0
|
|
|
|
|
|
} |
|
12840
|
|
|
|
|
|
|
|
|
12841
|
14
|
|
|
|
|
|
void bilou_ner::fill_bilou_probabilities(const vector& outcomes, bilou_probabilities& prob) { |
|
12842
|
84
|
100
|
|
|
|
|
for (auto&& prob_bilou : prob.bilou) |
|
12843
|
70
|
|
|
|
|
|
prob_bilou.probability = -1; |
|
12844
|
|
|
|
|
|
|
|
|
12845
|
140
|
100
|
|
|
|
|
for (bilou_entity::value i = 0; i < outcomes.size(); i++) { |
|
12846
|
|
|
|
|
|
|
auto bilou = bilou_entity::get_bilou(i); |
|
12847
|
126
|
100
|
|
|
|
|
if (outcomes[i] > prob.bilou[bilou].probability) { |
|
12848
|
70
|
|
|
|
|
|
prob.bilou[bilou].probability = outcomes[i]; |
|
12849
|
70
|
|
|
|
|
|
prob.bilou[bilou].entity = bilou_entity::get_entity(i); |
|
12850
|
|
|
|
|
|
|
} |
|
12851
|
|
|
|
|
|
|
} |
|
12852
|
14
|
|
|
|
|
|
} |
|
12853
|
|
|
|
|
|
|
|
|
12854
|
3
|
|
|
|
|
|
tokenizer* bilou_ner::new_tokenizer(ner_id id) { |
|
12855
|
3
|
|
|
|
|
|
switch (id) { |
|
12856
|
|
|
|
|
|
|
case ner_id::CZECH_NER: |
|
12857
|
0
|
|
|
|
|
|
return new morphodita_tokenizer_wrapper(morphodita::tokenizer::new_czech_tokenizer()); |
|
12858
|
|
|
|
|
|
|
case ner_id::ENGLISH_NER: |
|
12859
|
0
|
|
|
|
|
|
return new morphodita_tokenizer_wrapper(morphodita::tokenizer::new_english_tokenizer()); |
|
12860
|
|
|
|
|
|
|
case ner_id::GENERIC_NER: |
|
12861
|
3
|
|
|
|
|
|
return new morphodita_tokenizer_wrapper(morphodita::tokenizer::new_generic_tokenizer()); |
|
12862
|
|
|
|
|
|
|
} |
|
12863
|
|
|
|
|
|
|
|
|
12864
|
|
|
|
|
|
|
return nullptr; |
|
12865
|
|
|
|
|
|
|
} |
|
12866
|
|
|
|
|
|
|
|
|
12867
|
|
|
|
|
|
|
///////// |
|
12868
|
|
|
|
|
|
|
// File: ner/entity_map.cpp |
|
12869
|
|
|
|
|
|
|
///////// |
|
12870
|
|
|
|
|
|
|
|
|
12871
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
12872
|
|
|
|
|
|
|
// |
|
12873
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
12874
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
12875
|
|
|
|
|
|
|
// |
|
12876
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
12877
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
12878
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
12879
|
|
|
|
|
|
|
|
|
12880
|
0
|
|
|
|
|
|
entity_type entity_map::parse(const char* str, bool add_entity) const { |
|
12881
|
0
|
|
|
|
|
|
auto it = str2id.find(str); |
|
12882
|
0
|
0
|
|
|
|
|
if (it == str2id.end() && add_entity) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
12883
|
0
|
|
|
|
|
|
it = str2id.emplace(str, (int)id2str.size()).first; |
|
12884
|
0
|
|
|
|
|
|
id2str.emplace_back(str); |
|
12885
|
|
|
|
|
|
|
} |
|
12886
|
0
|
0
|
|
|
|
|
return it == str2id.end() ? entity_type_unknown : it->second; |
|
12887
|
|
|
|
|
|
|
} |
|
12888
|
|
|
|
|
|
|
|
|
12889
|
0
|
|
|
|
|
|
const string& entity_map::name(entity_type entity) const { |
|
12890
|
3
|
0
|
|
|
|
|
return entity < id2str.size() ? id2str[entity] : empty; |
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
12891
|
|
|
|
|
|
|
} |
|
12892
|
|
|
|
|
|
|
|
|
12893
|
1
|
|
|
|
|
|
bool entity_map::load(istream& is) { |
|
12894
|
|
|
|
|
|
|
binary_decoder data; |
|
12895
|
1
|
50
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
|
50
|
|
|
|
|
|
|
12896
|
|
|
|
|
|
|
|
|
12897
|
|
|
|
|
|
|
try { |
|
12898
|
|
|
|
|
|
|
str2id.clear(); |
|
12899
|
1
|
50
|
|
|
|
|
id2str.resize(data.next_4B()); |
|
|
|
50
|
|
|
|
|
|
|
12900
|
4
|
100
|
|
|
|
|
for (unsigned i = 0; i < id2str.size(); i++) { |
|
12901
|
3
|
50
|
|
|
|
|
data.next_str(id2str[i]); |
|
12902
|
3
|
|
|
|
|
|
str2id.emplace(id2str[i], i); |
|
12903
|
|
0
|
|
|
|
|
} |
|
12904
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
|
12905
|
|
|
|
|
|
|
return false; |
|
12906
|
|
|
|
|
|
|
} |
|
12907
|
|
|
|
|
|
|
|
|
12908
|
1
|
|
|
|
|
|
return data.is_end(); |
|
12909
|
|
|
|
|
|
|
} |
|
12910
|
|
|
|
|
|
|
|
|
12911
|
0
|
|
|
|
|
|
entity_type entity_map::size() const { |
|
12912
|
2
|
|
|
|
|
|
return id2str.size(); |
|
12913
|
|
|
|
|
|
|
} |
|
12914
|
|
|
|
|
|
|
|
|
12915
|
|
|
|
|
|
|
///////// |
|
12916
|
|
|
|
|
|
|
// File: ner/ner.cpp |
|
12917
|
|
|
|
|
|
|
///////// |
|
12918
|
|
|
|
|
|
|
|
|
12919
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
12920
|
|
|
|
|
|
|
// |
|
12921
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
12922
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
12923
|
|
|
|
|
|
|
// |
|
12924
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
12925
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
12926
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
12927
|
|
|
|
|
|
|
|
|
12928
|
1
|
|
|
|
|
|
ner* ner::load(istream& is) { |
|
12929
|
1
|
|
|
|
|
|
ner_id id = ner_id(is.get()); |
|
12930
|
1
|
50
|
|
|
|
|
switch (id) { |
|
12931
|
|
|
|
|
|
|
case ner_ids::CZECH_NER: |
|
12932
|
|
|
|
|
|
|
case ner_ids::ENGLISH_NER: |
|
12933
|
|
|
|
|
|
|
case ner_ids::GENERIC_NER: |
|
12934
|
|
|
|
|
|
|
{ |
|
12935
|
1
|
|
|
|
|
|
unique_ptr res(new bilou_ner(id)); |
|
12936
|
1
|
50
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
|
50
|
|
|
|
|
|
|
12937
|
|
|
|
|
|
|
break; |
|
12938
|
|
|
|
|
|
|
} |
|
12939
|
|
|
|
|
|
|
} |
|
12940
|
|
|
|
|
|
|
|
|
12941
|
|
|
|
|
|
|
return nullptr; |
|
12942
|
|
|
|
|
|
|
} |
|
12943
|
|
|
|
|
|
|
|
|
12944
|
1
|
|
|
|
|
|
ner* ner::load(const char* fname) { |
|
12945
|
2
|
50
|
|
|
|
|
ifstream in(path_from_utf8(fname).c_str(), ifstream::in | ifstream::binary); |
|
12946
|
1
|
50
|
|
|
|
|
if (!in.is_open()) return nullptr; |
|
12947
|
|
|
|
|
|
|
|
|
12948
|
1
|
50
|
|
|
|
|
return load(in); |
|
12949
|
|
|
|
|
|
|
} |
|
12950
|
|
|
|
|
|
|
|
|
12951
|
|
|
|
|
|
|
///////// |
|
12952
|
|
|
|
|
|
|
// File: tagger/external_tagger.h |
|
12953
|
|
|
|
|
|
|
///////// |
|
12954
|
|
|
|
|
|
|
|
|
12955
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
12956
|
|
|
|
|
|
|
// |
|
12957
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
12958
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
12959
|
|
|
|
|
|
|
// |
|
12960
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
12961
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
12962
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
12963
|
|
|
|
|
|
|
|
|
12964
|
0
|
|
|
|
|
|
class external_tagger : public tagger { |
|
12965
|
|
|
|
|
|
|
public: |
|
12966
|
|
|
|
|
|
|
virtual bool load(istream& is) override; |
|
12967
|
|
|
|
|
|
|
virtual bool create_and_encode(const string& params, ostream& os) override; |
|
12968
|
|
|
|
|
|
|
virtual void tag(const vector& forms, ner_sentence& sentence) const override; |
|
12969
|
|
|
|
|
|
|
}; |
|
12970
|
|
|
|
|
|
|
|
|
12971
|
|
|
|
|
|
|
///////// |
|
12972
|
|
|
|
|
|
|
// File: tagger/external_tagger.cpp |
|
12973
|
|
|
|
|
|
|
///////// |
|
12974
|
|
|
|
|
|
|
|
|
12975
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
12976
|
|
|
|
|
|
|
// |
|
12977
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
12978
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
12979
|
|
|
|
|
|
|
// |
|
12980
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
12981
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
12982
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
12983
|
|
|
|
|
|
|
|
|
12984
|
|
|
|
|
|
|
inline static size_t strnchrpos(const char* str, char c, size_t len) { |
|
12985
|
|
|
|
|
|
|
size_t pos = 0; |
|
12986
|
0
|
0
|
|
|
|
|
for (; len--; str++, pos++) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
12987
|
0
|
0
|
|
|
|
|
if (*str == c) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
12988
|
|
|
|
|
|
|
return pos; |
|
12989
|
|
|
|
|
|
|
|
|
12990
|
|
|
|
|
|
|
return pos; |
|
12991
|
|
|
|
|
|
|
} |
|
12992
|
|
|
|
|
|
|
|
|
12993
|
0
|
|
|
|
|
|
bool external_tagger::load(istream& /*is*/) { |
|
12994
|
0
|
|
|
|
|
|
return true; |
|
12995
|
|
|
|
|
|
|
} |
|
12996
|
|
|
|
|
|
|
|
|
12997
|
0
|
|
|
|
|
|
bool external_tagger::create_and_encode(const string& /*params*/, ostream& /*os*/) { |
|
12998
|
0
|
|
|
|
|
|
return true; |
|
12999
|
|
|
|
|
|
|
} |
|
13000
|
|
|
|
|
|
|
|
|
13001
|
0
|
|
|
|
|
|
void external_tagger::tag(const vector& forms, ner_sentence& sentence) const { |
|
13002
|
0
|
|
|
|
|
|
sentence.resize(forms.size()); |
|
13003
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < forms.size(); i++) { |
|
13004
|
0
|
|
|
|
|
|
string_piece form = forms[i]; |
|
13005
|
|
|
|
|
|
|
|
|
13006
|
|
|
|
|
|
|
size_t space = strnchrpos(form.str, ' ', form.len); |
|
13007
|
0
|
0
|
|
|
|
|
if (space < form.len) { |
|
13008
|
0
|
|
|
|
|
|
sentence.words[i].form.assign(form.str, space); |
|
13009
|
0
|
|
|
|
|
|
form.len -= space + 1; |
|
13010
|
0
|
|
|
|
|
|
form.str += space + 1; |
|
13011
|
|
|
|
|
|
|
|
|
13012
|
|
|
|
|
|
|
space = strnchrpos(form.str, ' ', form.len); |
|
13013
|
0
|
0
|
|
|
|
|
if (space < form.len) { |
|
13014
|
0
|
|
|
|
|
|
sentence.words[i].raw_lemma.assign(form.str, space); |
|
13015
|
0
|
|
|
|
|
|
form.len -= space + 1; |
|
13016
|
0
|
|
|
|
|
|
form.str += space + 1; |
|
13017
|
|
|
|
|
|
|
|
|
13018
|
0
|
|
|
|
|
|
sentence.words[i].tag.assign(form.str, strnchrpos(form.str, ' ', form.len)); |
|
13019
|
|
|
|
|
|
|
} else { |
|
13020
|
0
|
|
|
|
|
|
sentence.words[i].raw_lemma.assign(form.str, form.len); |
|
13021
|
|
|
|
|
|
|
sentence.words[i].tag.clear(); |
|
13022
|
|
|
|
|
|
|
} |
|
13023
|
|
|
|
|
|
|
} else { |
|
13024
|
0
|
|
|
|
|
|
sentence.words[i].form.assign(form.str, form.len); |
|
13025
|
0
|
|
|
|
|
|
sentence.words[i].raw_lemma = sentence.words[i].form; |
|
13026
|
|
|
|
|
|
|
sentence.words[i].tag.clear(); |
|
13027
|
|
|
|
|
|
|
} |
|
13028
|
0
|
|
|
|
|
|
sentence.words[i].raw_lemmas_all.assign(1, sentence.words[i].raw_lemma); |
|
13029
|
0
|
|
|
|
|
|
sentence.words[i].lemma_id = sentence.words[i].raw_lemma; |
|
13030
|
|
|
|
|
|
|
sentence.words[i].lemma_comments.clear(); |
|
13031
|
|
|
|
|
|
|
} |
|
13032
|
0
|
|
|
|
|
|
} |
|
13033
|
|
|
|
|
|
|
|
|
13034
|
|
|
|
|
|
|
///////// |
|
13035
|
|
|
|
|
|
|
// File: tagger/morphodita_tagger.h |
|
13036
|
|
|
|
|
|
|
///////// |
|
13037
|
|
|
|
|
|
|
|
|
13038
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
13039
|
|
|
|
|
|
|
// |
|
13040
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
13041
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
13042
|
|
|
|
|
|
|
// |
|
13043
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
13044
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
13045
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
13046
|
|
|
|
|
|
|
|
|
13047
|
0
|
|
|
|
|
|
class morphodita_tagger : public tagger { |
|
13048
|
|
|
|
|
|
|
public: |
|
13049
|
|
|
|
|
|
|
virtual void tag(const vector& forms, ner_sentence& sentence) const override; |
|
13050
|
|
|
|
|
|
|
|
|
13051
|
|
|
|
|
|
|
protected: |
|
13052
|
|
|
|
|
|
|
virtual bool load(istream& is) override; |
|
13053
|
|
|
|
|
|
|
virtual bool create_and_encode(const string& params, ostream& os) override; |
|
13054
|
|
|
|
|
|
|
|
|
13055
|
|
|
|
|
|
|
private: |
|
13056
|
|
|
|
|
|
|
unique_ptr tagger; |
|
13057
|
|
|
|
|
|
|
const morphodita::morpho* morpho; |
|
13058
|
|
|
|
|
|
|
|
|
13059
|
0
|
|
|
|
|
|
struct cache { |
|
13060
|
|
|
|
|
|
|
vector tags, analyses; |
|
13061
|
|
|
|
|
|
|
string lemma_cased; |
|
13062
|
|
|
|
|
|
|
}; |
|
13063
|
|
|
|
|
|
|
mutable threadsafe_stack caches; |
|
13064
|
|
|
|
|
|
|
}; |
|
13065
|
|
|
|
|
|
|
|
|
13066
|
|
|
|
|
|
|
///////// |
|
13067
|
|
|
|
|
|
|
// File: tagger/morphodita_tagger.cpp |
|
13068
|
|
|
|
|
|
|
///////// |
|
13069
|
|
|
|
|
|
|
|
|
13070
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
13071
|
|
|
|
|
|
|
// |
|
13072
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
13073
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
13074
|
|
|
|
|
|
|
// |
|
13075
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
13076
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
13077
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
13078
|
|
|
|
|
|
|
|
|
13079
|
1
|
|
|
|
|
|
bool morphodita_tagger::load(istream& is) { |
|
13080
|
1
|
|
|
|
|
|
tagger.reset(morphodita::tagger::load(is)); |
|
13081
|
1
|
50
|
|
|
|
|
morpho = tagger ? tagger->get_morpho() : nullptr; |
|
13082
|
1
|
50
|
|
|
|
|
return tagger && morpho; |
|
|
|
50
|
|
|
|
|
|
|
13083
|
|
|
|
|
|
|
} |
|
13084
|
|
|
|
|
|
|
|
|
13085
|
0
|
|
|
|
|
|
bool morphodita_tagger::create_and_encode(const string& params, ostream& os) { |
|
13086
|
0
|
0
|
|
|
|
|
if (params.empty()) return cerr << "Missing tagger_file argument to morphodita_tagger!" << endl, false; |
|
13087
|
|
|
|
|
|
|
|
|
13088
|
0
|
|
|
|
|
|
ifstream in(path_from_utf8(params).c_str(), ifstream::in | ifstream::binary); |
|
13089
|
0
|
0
|
|
|
|
|
if (!in.is_open()) return cerr << "Cannot open morphodita tagger file '" << params << "'!" << endl, false; |
|
13090
|
0
|
0
|
|
|
|
|
if (!load(in)) return cerr << "Cannot load morphodita tagger from file '" << params << "'!" << endl, false; |
|
|
|
0
|
|
|
|
|
|
|
13091
|
|
|
|
|
|
|
|
|
13092
|
0
|
0
|
|
|
|
|
if (!in.seekg(0, ifstream::beg)) return cerr << "Cannot seek in morphodita tagger file '" << params << "'!" << endl, false; |
|
|
|
0
|
|
|
|
|
|
|
13093
|
0
|
0
|
|
|
|
|
os << in.rdbuf(); |
|
13094
|
|
|
|
|
|
|
|
|
13095
|
0
|
|
|
|
|
|
return bool(os); |
|
13096
|
|
|
|
|
|
|
} |
|
13097
|
|
|
|
|
|
|
|
|
13098
|
2
|
|
|
|
|
|
void morphodita_tagger::tag(const vector& forms, ner_sentence& sentence) const { |
|
13099
|
2
|
|
|
|
|
|
sentence.resize(0); |
|
13100
|
2
|
50
|
|
|
|
|
if (!tagger || !morpho) return; |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
13101
|
|
|
|
|
|
|
|
|
13102
|
|
|
|
|
|
|
// Obtain a cache |
|
13103
|
2
|
|
|
|
|
|
cache* c = caches.pop(); |
|
13104
|
2
|
100
|
|
|
|
|
if (!c) c = new cache(); |
|
13105
|
|
|
|
|
|
|
|
|
13106
|
|
|
|
|
|
|
// Tag |
|
13107
|
2
|
|
|
|
|
|
tagger->tag(forms, c->tags); |
|
13108
|
|
|
|
|
|
|
|
|
13109
|
|
|
|
|
|
|
// Fill sentence |
|
13110
|
2
|
50
|
|
|
|
|
if (c->tags.size() >= forms.size()) { |
|
13111
|
2
|
|
|
|
|
|
sentence.resize(forms.size()); |
|
13112
|
9
|
100
|
|
|
|
|
for (unsigned i = 0; i < forms.size(); i++) { |
|
13113
|
7
|
|
|
|
|
|
sentence.words[i].form.assign(forms[i].str, morpho->raw_form_len(forms[i])); |
|
13114
|
|
|
|
|
|
|
|
|
13115
|
7
|
|
|
|
|
|
const string& lemma = c->tags[i].lemma; |
|
13116
|
|
|
|
|
|
|
|
|
13117
|
7
|
|
|
|
|
|
unsigned raw_lemma_len = morpho->raw_lemma_len(lemma); |
|
13118
|
7
|
|
|
|
|
|
sentence.words[i].raw_lemma.assign(lemma, 0, raw_lemma_len); |
|
13119
|
|
|
|
|
|
|
|
|
13120
|
7
|
|
|
|
|
|
morpho->analyze(forms[i], morphodita::morpho::GUESSER, c->analyses); |
|
13121
|
7
|
|
|
|
|
|
sentence.words[i].raw_lemmas_all.clear(); |
|
13122
|
18
|
100
|
|
|
|
|
for (auto&& analysis : c->analyses) |
|
13123
|
11
|
50
|
|
|
|
|
sentence.words[i].raw_lemmas_all.emplace_back(analysis.lemma, 0, morpho->raw_lemma_len(analysis.lemma)); |
|
|
|
50
|
|
|
|
|
|
|
13124
|
|
|
|
|
|
|
sort(sentence.words[i].raw_lemmas_all.begin(), sentence.words[i].raw_lemmas_all.end()); |
|
13125
|
|
|
|
|
|
|
sentence.words[i].raw_lemmas_all.erase(unique(sentence.words[i].raw_lemmas_all.begin(), sentence.words[i].raw_lemmas_all.end()), |
|
13126
|
7
|
|
|
|
|
|
sentence.words[i].raw_lemmas_all.end()); |
|
13127
|
|
|
|
|
|
|
|
|
13128
|
7
|
|
|
|
|
|
unsigned lemma_id_len = morpho->lemma_id_len(lemma); |
|
13129
|
7
|
|
|
|
|
|
sentence.words[i].lemma_id.assign(sentence.words[i].raw_lemma).append(lemma, raw_lemma_len, lemma_id_len - raw_lemma_len); |
|
13130
|
7
|
|
|
|
|
|
sentence.words[i].lemma_comments.assign(lemma, lemma_id_len, string::npos); |
|
13131
|
14
|
|
|
|
|
|
sentence.words[i].tag = c->tags[i].tag; |
|
13132
|
|
|
|
|
|
|
} |
|
13133
|
|
|
|
|
|
|
} |
|
13134
|
|
|
|
|
|
|
|
|
13135
|
2
|
|
|
|
|
|
caches.push(c); |
|
13136
|
|
|
|
|
|
|
} |
|
13137
|
|
|
|
|
|
|
|
|
13138
|
|
|
|
|
|
|
///////// |
|
13139
|
|
|
|
|
|
|
// File: tagger/trivial_tagger.h |
|
13140
|
|
|
|
|
|
|
///////// |
|
13141
|
|
|
|
|
|
|
|
|
13142
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
13143
|
|
|
|
|
|
|
// |
|
13144
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
13145
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
13146
|
|
|
|
|
|
|
// |
|
13147
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
13148
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
13149
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
13150
|
|
|
|
|
|
|
|
|
13151
|
0
|
|
|
|
|
|
class trivial_tagger : public tagger { |
|
13152
|
|
|
|
|
|
|
public: |
|
13153
|
|
|
|
|
|
|
virtual bool load(istream& is) override; |
|
13154
|
|
|
|
|
|
|
virtual bool create_and_encode(const string& params, ostream& os) override; |
|
13155
|
|
|
|
|
|
|
virtual void tag(const vector& forms, ner_sentence& sentence) const override; |
|
13156
|
|
|
|
|
|
|
}; |
|
13157
|
|
|
|
|
|
|
|
|
13158
|
|
|
|
|
|
|
///////// |
|
13159
|
|
|
|
|
|
|
// File: tagger/tagger.cpp |
|
13160
|
|
|
|
|
|
|
///////// |
|
13161
|
|
|
|
|
|
|
|
|
13162
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
13163
|
|
|
|
|
|
|
// |
|
13164
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
13165
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
13166
|
|
|
|
|
|
|
// |
|
13167
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
13168
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
13169
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
13170
|
|
|
|
|
|
|
|
|
13171
|
1
|
|
|
|
|
|
tagger* tagger::load_instance(istream& is) { |
|
13172
|
1
|
|
|
|
|
|
unique_ptr res(create(tagger_id(is.get()))); |
|
13173
|
|
|
|
|
|
|
|
|
13174
|
1
|
50
|
|
|
|
|
if (!res) return nullptr; |
|
13175
|
1
|
50
|
|
|
|
|
if (!res->load(is)) return nullptr; |
|
|
|
50
|
|
|
|
|
|
|
13176
|
|
|
|
|
|
|
|
|
13177
|
1
|
|
|
|
|
|
return res.release(); |
|
13178
|
|
|
|
|
|
|
} |
|
13179
|
|
|
|
|
|
|
|
|
13180
|
0
|
|
|
|
|
|
tagger* tagger::create_and_encode_instance(const string& tagger_id_and_params, ostream& os) { |
|
13181
|
|
|
|
|
|
|
string tagger_id, params; |
|
13182
|
|
|
|
|
|
|
|
|
13183
|
|
|
|
|
|
|
// Split the id and params using optional : |
|
13184
|
0
|
|
|
|
|
|
auto colon = tagger_id_and_params.find(':'); |
|
13185
|
0
|
0
|
|
|
|
|
if (colon == string::npos) { |
|
13186
|
|
|
|
|
|
|
tagger_id = tagger_id_and_params; |
|
13187
|
|
|
|
|
|
|
} else { |
|
13188
|
0
|
0
|
|
|
|
|
tagger_id = tagger_id_and_params.substr(0, colon); |
|
13189
|
0
|
0
|
|
|
|
|
params = tagger_id_and_params.substr(colon + 1); |
|
13190
|
|
|
|
|
|
|
} |
|
13191
|
|
|
|
|
|
|
|
|
13192
|
|
|
|
|
|
|
// Parse tagger_id |
|
13193
|
|
|
|
|
|
|
tagger_ids::tagger_id id; |
|
13194
|
0
|
0
|
|
|
|
|
if (!tagger_ids::parse(tagger_id, id)) return cerr << "Unknown tagger_id '" << tagger_id << "'!" << endl, nullptr; |
|
|
|
0
|
|
|
|
|
|
|
13195
|
|
|
|
|
|
|
|
|
13196
|
|
|
|
|
|
|
// Create instance |
|
13197
|
0
|
0
|
|
|
|
|
unique_ptr res(create(id)); |
|
13198
|
0
|
0
|
|
|
|
|
if (!res) return cerr << "Cannot create instance for tagger_id '" << tagger_id << "'!" << endl, nullptr; |
|
13199
|
|
|
|
|
|
|
|
|
13200
|
|
|
|
|
|
|
// Load and encode the tagger |
|
13201
|
0
|
0
|
|
|
|
|
os.put(id); |
|
13202
|
0
|
0
|
|
|
|
|
if (!res->create_and_encode(params, os)) return cerr << "Cannot encode instance of tagger_id '" << tagger_id << "'!" << endl, nullptr; |
|
|
|
0
|
|
|
|
|
|
|
13203
|
|
|
|
|
|
|
|
|
13204
|
|
|
|
|
|
|
return res.release(); |
|
13205
|
|
|
|
|
|
|
} |
|
13206
|
|
|
|
|
|
|
|
|
13207
|
1
|
|
|
|
|
|
tagger* tagger::create(tagger_id id) { |
|
13208
|
1
|
|
|
|
|
|
switch (id) { |
|
13209
|
|
|
|
|
|
|
case tagger_ids::TRIVIAL: |
|
13210
|
0
|
|
|
|
|
|
return new trivial_tagger(); |
|
13211
|
|
|
|
|
|
|
case tagger_ids::EXTERNAL: |
|
13212
|
0
|
|
|
|
|
|
return new external_tagger(); |
|
13213
|
|
|
|
|
|
|
case tagger_ids::MORPHODITA: |
|
13214
|
1
|
|
|
|
|
|
return new morphodita_tagger(); |
|
13215
|
|
|
|
|
|
|
} |
|
13216
|
|
|
|
|
|
|
|
|
13217
|
|
|
|
|
|
|
return nullptr; |
|
13218
|
|
|
|
|
|
|
} |
|
13219
|
|
|
|
|
|
|
|
|
13220
|
|
|
|
|
|
|
///////// |
|
13221
|
|
|
|
|
|
|
// File: tagger/trivial_tagger.cpp |
|
13222
|
|
|
|
|
|
|
///////// |
|
13223
|
|
|
|
|
|
|
|
|
13224
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
13225
|
|
|
|
|
|
|
// |
|
13226
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
13227
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
13228
|
|
|
|
|
|
|
// |
|
13229
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
13230
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
13231
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
13232
|
|
|
|
|
|
|
|
|
13233
|
0
|
|
|
|
|
|
bool trivial_tagger::load(istream& /*is*/) { |
|
13234
|
0
|
|
|
|
|
|
return true; |
|
13235
|
|
|
|
|
|
|
} |
|
13236
|
|
|
|
|
|
|
|
|
13237
|
0
|
|
|
|
|
|
bool trivial_tagger::create_and_encode(const string& /*params*/, ostream& /*os*/) { |
|
13238
|
0
|
|
|
|
|
|
return true; |
|
13239
|
|
|
|
|
|
|
} |
|
13240
|
|
|
|
|
|
|
|
|
13241
|
0
|
|
|
|
|
|
void trivial_tagger::tag(const vector& forms, ner_sentence& sentence) const { |
|
13242
|
0
|
|
|
|
|
|
sentence.resize(forms.size()); |
|
13243
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < forms.size(); i++) { |
|
13244
|
0
|
|
|
|
|
|
sentence.words[i].form.assign(forms[i].str, forms[i].len); |
|
13245
|
0
|
|
|
|
|
|
sentence.words[i].raw_lemma = sentence.words[i].form; |
|
13246
|
0
|
|
|
|
|
|
sentence.words[i].raw_lemmas_all.assign(1, sentence.words[i].raw_lemma); |
|
13247
|
0
|
|
|
|
|
|
sentence.words[i].lemma_id = sentence.words[i].form; |
|
13248
|
|
|
|
|
|
|
sentence.words[i].lemma_comments.clear(); |
|
13249
|
|
|
|
|
|
|
sentence.words[i].tag.clear(); |
|
13250
|
|
|
|
|
|
|
} |
|
13251
|
0
|
|
|
|
|
|
} |
|
13252
|
|
|
|
|
|
|
|
|
13253
|
|
|
|
|
|
|
///////// |
|
13254
|
|
|
|
|
|
|
// File: tokenizer/morphodita_tokenizer_wrapper.cpp |
|
13255
|
|
|
|
|
|
|
///////// |
|
13256
|
|
|
|
|
|
|
|
|
13257
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
13258
|
|
|
|
|
|
|
// |
|
13259
|
|
|
|
|
|
|
// Copyright 2017 Institute of Formal and Applied Linguistics, Faculty of |
|
13260
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
13261
|
|
|
|
|
|
|
// |
|
13262
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
13263
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
13264
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
13265
|
|
|
|
|
|
|
|
|
13266
|
3
|
|
|
|
|
|
void morphodita_tokenizer_wrapper::set_text(string_piece text, bool make_copy) { |
|
13267
|
3
|
|
|
|
|
|
morphodita_tokenizer->set_text(text, make_copy); |
|
13268
|
3
|
|
|
|
|
|
} |
|
13269
|
|
|
|
|
|
|
|
|
13270
|
7
|
|
|
|
|
|
bool morphodita_tokenizer_wrapper::next_sentence(vector* forms, vector* tokens) { |
|
13271
|
7
|
|
|
|
|
|
return morphodita_tokenizer->next_sentence(forms, (vector*) tokens); |
|
13272
|
|
|
|
|
|
|
} |
|
13273
|
|
|
|
|
|
|
|
|
13274
|
|
|
|
|
|
|
///////// |
|
13275
|
|
|
|
|
|
|
// File: tokenizer/tokenizer.cpp |
|
13276
|
|
|
|
|
|
|
///////// |
|
13277
|
|
|
|
|
|
|
|
|
13278
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
13279
|
|
|
|
|
|
|
// |
|
13280
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
13281
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
13282
|
|
|
|
|
|
|
// |
|
13283
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
13284
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
13285
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
13286
|
|
|
|
|
|
|
|
|
13287
|
1
|
|
|
|
|
|
tokenizer* tokenizer::new_vertical_tokenizer() { |
|
13288
|
1
|
|
|
|
|
|
return new morphodita_tokenizer_wrapper(morphodita::tokenizer::new_vertical_tokenizer()); |
|
13289
|
|
|
|
|
|
|
} |
|
13290
|
|
|
|
|
|
|
|
|
13291
|
|
|
|
|
|
|
///////// |
|
13292
|
|
|
|
|
|
|
// File: unilib/unicode.cpp |
|
13293
|
|
|
|
|
|
|
///////// |
|
13294
|
|
|
|
|
|
|
|
|
13295
|
|
|
|
|
|
|
// This file is part of UniLib . |
|
13296
|
|
|
|
|
|
|
// |
|
13297
|
|
|
|
|
|
|
// Copyright 2014 Institute of Formal and Applied Linguistics, Faculty of |
|
13298
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
13299
|
|
|
|
|
|
|
// |
|
13300
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
13301
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
13302
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
13303
|
|
|
|
|
|
|
// |
|
13304
|
|
|
|
|
|
|
// UniLib version: 3.3.0 |
|
13305
|
|
|
|
|
|
|
// Unicode version: 15.0.0 |
|
13306
|
|
|
|
|
|
|
|
|
13307
|
|
|
|
|
|
|
namespace unilib { |
|
13308
|
|
|
|
|
|
|
|
|
13309
|
|
|
|
|
|
|
const char32_t unicode::CHARS; |
|
13310
|
|
|
|
|
|
|
|
|
13311
|
|
|
|
|
|
|
const int32_t unicode::DEFAULT_CAT; |
|
13312
|
|
|
|
|
|
|
|
|
13313
|
|
|
|
|
|
|
const uint8_t unicode::category_index[unicode::CHARS >> 8] = { |
|
13314
|
|
|
|
|
|
|
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,17,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,33,41,42,43,44,45,46,47,48,39,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,49,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,50,17,17,17,51,17,52,53,54,55,56,57,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,58,59,59,59,59,59,59,59,59,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,17,61,62,17,63,64,65,66,67,68,69,70,71,17,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,17,17,17,97,98,99,100,100,100,100,100,100,100,100,100,101,17,17,17,17,102,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,17,17,103,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,17,17,104,105,100,100,106,107,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,108,17,17,17,17,109,110,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,111,17,112,113,100,100,100,100,100,100,100,100,100,114,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,115,116,117,118,119,120,121,122,123,39,39,124,100,100,100,100,125,126,127,128,100,129,100,100,130,131,132,100,100,133,134,135,100,136,137,138,139,39,39,140,141,142,39,143,144,100,100,100,100,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, |
|
13315
|
|
|
|
|
|
|
17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,145,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,146,147,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,148,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,149,100,100,100,100,100,100,100,100,100,100,100,100,17,17,150,100,100,100,100,100,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,151,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,152,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100, |
|
13316
|
|
|
|
|
|
|
100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100, |
|
13317
|
|
|
|
|
|
|
100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100, |
|
13318
|
|
|
|
|
|
|
100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100, |
|
13319
|
|
|
|
|
|
|
100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100, |
|
13320
|
|
|
|
|
|
|
100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,153,154,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100, |
|
13321
|
|
|
|
|
|
|
100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,100,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,155,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60, |
|
13322
|
|
|
|
|
|
|
60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,60,155 |
|
13323
|
|
|
|
|
|
|
}; |
|
13324
|
|
|
|
|
|
|
|
|
13325
|
|
|
|
|
|
|
const uint8_t unicode::category_block[][256] = { |
|
13326
|
|
|
|
|
|
|
{_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Zs,_Po,_Po,_Po,_Sc,_Po,_Po,_Po,_Ps,_Pe,_Po,_Sm,_Po,_Pd,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Sm,_Sm,_Sm,_Po,_Po,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ps,_Po,_Pe,_Sk,_Pc,_Sk,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ps,_Sm,_Pe,_Sm,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Zs,_Po,_Sc,_Sc,_Sc,_Sc,_So,_Po,_Sk,_So,_Lo,_Pi,_Sm,_Cf,_So,_Sk,_So,_Sm,_No,_No,_Sk,_Ll,_Po,_Po,_Sk,_No,_Lo,_Pf,_No,_No,_No,_Po,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Sm,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll}, |
|
13327
|
|
|
|
|
|
|
{_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Ll,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Lu,_Lu,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Lu,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Ll,_Ll,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Ll,_Ll,_Lo,_Lu,_Ll,_Ll,_Ll,_Lo,_Lo,_Lo,_Lo,_Lu,_Lt,_Ll,_Lu,_Lt,_Ll,_Lu,_Lt,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Lu,_Lt,_Ll,_Lu,_Ll,_Lu,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll}, |
|
13328
|
|
|
|
|
|
|
{_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Ll,_Lu,_Lu,_Ll,_Ll,_Lu,_Ll,_Lu,_Lu,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lo,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Sk,_Sk,_Sk,_Sk,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Lm,_Lm,_Lm,_Lm,_Lm,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Lm,_Sk,_Lm,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk}, |
|
13329
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lu,_Ll,_Lu,_Ll,_Lm,_Sk,_Lu,_Ll,_Cn,_Cn,_Lm,_Ll,_Ll,_Ll,_Po,_Lu,_Cn,_Cn,_Cn,_Cn,_Sk,_Sk,_Lu,_Po,_Lu,_Lu,_Lu,_Cn,_Lu,_Cn,_Lu,_Lu,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Ll,_Ll,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Ll,_Sm,_Lu,_Ll,_Lu,_Lu,_Ll,_Ll,_Lu,_Lu,_Lu}, |
|
13330
|
|
|
|
|
|
|
{_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_So,_Mn,_Mn,_Mn,_Mn,_Mn,_Me,_Me,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll}, |
|
13331
|
|
|
|
|
|
|
{_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Lm,_Po,_Po,_Po,_Po,_Po,_Po,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Po,_Pd,_Cn,_Cn,_So,_So,_Sc,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Pd,_Mn,_Po,_Mn,_Mn,_Po,_Mn,_Mn,_Po,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13332
|
|
|
|
|
|
|
{_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Sm,_Sm,_Sm,_Po,_Po,_Sc,_Po,_Po,_So,_So,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Cf,_Po,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Po,_Po,_Lo,_Lo,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cf,_So,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lm,_Lm,_Mn,_Mn,_So,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Lo,_So,_So,_Lo}, |
|
13333
|
|
|
|
|
|
|
{_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cf,_Lo,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lm,_Lm,_So,_Po,_Po,_Po,_Lm,_Cn,_Cn,_Mn,_Sc,_Sc}, |
|
13334
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Lm,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lm,_Mn,_Mn,_Mn,_Lm,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Cn,_Cn,_Po,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Sk,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cf,_Cf,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cf,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn}, |
|
13335
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mn,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mc,_Mc,_Mn,_Mc,_Mc,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mc,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Mc,_Mc,_Cn,_Cn,_Mc,_Mc,_Mn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mc,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Sc,_Sc,_No,_No,_No,_No,_No,_No,_So,_Sc,_Lo,_Po,_Mn,_Cn}, |
|
13336
|
|
|
|
|
|
|
{_Cn,_Mn,_Mn,_Mc,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Cn,_Mn,_Cn,_Mc,_Mc,_Mc,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Cn,_Cn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Mn,_Mn,_Lo,_Lo,_Lo,_Mn,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mc,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mc,_Cn,_Mc,_Mc,_Mn,_Cn,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Sc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn}, |
|
13337
|
|
|
|
|
|
|
{_Cn,_Mn,_Mc,_Mc,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Lo,_Mc,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Mc,_Mc,_Cn,_Cn,_Mc,_Mc,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mc,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_So,_Lo,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Mc,_Mc,_Mn,_Mc,_Mc,_Cn,_Cn,_Cn,_Mc,_Mc,_Mc,_Cn,_Mc,_Mc,_Mc,_Mn,_Cn,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_No,_No,_No,_So,_So,_So,_So,_So,_So,_Sc,_So,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13338
|
|
|
|
|
|
|
{_Mn,_Mc,_Mc,_Mc,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Lo,_Mn,_Mn,_Mn,_Mc,_Mc,_Mc,_Mc,_Cn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Cn,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Cn,_Cn,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_No,_No,_No,_No,_No,_No,_No,_So,_Lo,_Mn,_Mc,_Mc,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Lo,_Mc,_Mn,_Mc,_Mc,_Mc,_Mc,_Mc,_Cn,_Mn,_Mc,_Mc,_Cn,_Mc,_Mc,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mc,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Lo,_Lo,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13339
|
|
|
|
|
|
|
{_Mn,_Mn,_Mc,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Cn,_Mc,_Mc,_Mc,_Cn,_Mc,_Mc,_Mc,_Mn,_Lo,_So,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Mc,_No,_No,_No,_No,_No,_No,_No,_Lo,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Mn,_Mc,_Mc,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Mn,_Cn,_Cn,_Cn,_Cn,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Cn,_Mn,_Cn,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Mc,_Mc,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13340
|
|
|
|
|
|
|
{_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Sc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lm,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13341
|
|
|
|
|
|
|
{_Lo,_So,_So,_So,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_So,_Po,_So,_So,_So,_Mn,_Mn,_So,_So,_So,_So,_So,_So,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_Mn,_So,_Mn,_So,_Mn,_Ps,_Pe,_Ps,_Pe,_Mc,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_Mn,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_Po,_Po,_Po,_Po,_Po,_So,_So,_So,_So,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13342
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Lo,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Po,_Po,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Lo,_Mc,_Mc,_Mc,_Lo,_Lo,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mc,_Mn,_Mn,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mn,_Lo,_Mc,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Mc,_Mc,_Mc,_Mn,_So,_So,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Po,_Lm,_Ll,_Ll,_Ll}, |
|
13343
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
|
13344
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
|
13345
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn}, |
|
13346
|
|
|
|
|
|
|
{_Pd,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
|
13347
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_So,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Zs,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Ps,_Pe,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Po,_Po,_Nl,_Nl,_Nl,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13348
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mc,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Cn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mn,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Po,_Po,_Lm,_Po,_Po,_Po,_Sc,_Lo,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13349
|
|
|
|
|
|
|
{_Po,_Po,_Po,_Po,_Po,_Po,_Pd,_Po,_Po,_Po,_Po,_Mn,_Mn,_Mn,_Cf,_Mn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13350
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mc,_Mc,_Mn,_Mn,_Mc,_Mc,_Mc,_Cn,_Cn,_Cn,_Cn,_Mc,_Mc,_Mn,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_So,_Cn,_Cn,_Cn,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_No,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
|
13351
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mc,_Mc,_Mn,_Cn,_Cn,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mc,_Mn,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Mn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Lm,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Me,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13352
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mc,_Mc,_Mc,_Mc,_Mc,_Mn,_Mc,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Po,_Po,_Cn,_Mn,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Mc,_Mn,_Mn,_Mn,_Lo,_Lo,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mn,_Mn,_Mc,_Mc,_Mc,_Mn,_Mc,_Mn,_Mn,_Mn,_Mc,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po}, |
|
13353
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Po,_Po,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Lu,_Lu,_Lu,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Po,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Lo,_Lo,_Mc,_Mn,_Mn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13354
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn}, |
|
13355
|
|
|
|
|
|
|
{_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll}, |
|
13356
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Lu,_Cn,_Lu,_Cn,_Lu,_Cn,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lt,_Sk,_Ll,_Sk,_Sk,_Sk,_Ll,_Ll,_Ll,_Cn,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lt,_Sk,_Sk,_Sk,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Cn,_Sk,_Sk,_Sk,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Sk,_Sk,_Sk,_Cn,_Cn,_Ll,_Ll,_Ll,_Cn,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lt,_Sk,_Sk,_Cn}, |
|
13357
|
|
|
|
|
|
|
{_Zs,_Zs,_Zs,_Zs,_Zs,_Zs,_Zs,_Zs,_Zs,_Zs,_Zs,_Cf,_Cf,_Cf,_Cf,_Cf,_Pd,_Pd,_Pd,_Pd,_Pd,_Pd,_Po,_Po,_Pi,_Pf,_Ps,_Pi,_Pi,_Pf,_Ps,_Pi,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Zl,_Zp,_Cf,_Cf,_Cf,_Cf,_Cf,_Zs,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Pi,_Pf,_Po,_Po,_Po,_Po,_Pc,_Pc,_Po,_Po,_Po,_Sm,_Ps,_Pe,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Sm,_Po,_Pc,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Zs,_Cf,_Cf,_Cf,_Cf,_Cf,_Cn,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_No,_Lm,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_Sm,_Sm,_Sm,_Ps,_Pe,_Lm,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Sm,_Sm,_Sm,_Ps,_Pe,_Cn,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Cn,_Cn,_Cn,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Me,_Me,_Me,_Me,_Mn,_Me,_Me,_Me,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13358
|
|
|
|
|
|
|
{_So,_So,_Lu,_So,_So,_So,_So,_Lu,_So,_So,_Ll,_Lu,_Lu,_Lu,_Ll,_Ll,_Lu,_Lu,_Lu,_Ll,_So,_Lu,_So,_So,_Sm,_Lu,_Lu,_Lu,_Lu,_Lu,_So,_So,_So,_So,_So,_So,_Lu,_So,_Lu,_So,_Lu,_So,_Lu,_Lu,_Lu,_Lu,_So,_Ll,_Lu,_Lu,_Lu,_Lu,_Ll,_Lo,_Lo,_Lo,_Lo,_Ll,_So,_So,_Ll,_Ll,_Lu,_Lu,_Sm,_Sm,_Sm,_Sm,_Sm,_Lu,_Ll,_Ll,_Ll,_Ll,_So,_Sm,_So,_So,_Ll,_So,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Lu,_Ll,_Nl,_Nl,_Nl,_Nl,_No,_So,_So,_Cn,_Cn,_Cn,_Cn,_Sm,_Sm,_Sm,_Sm,_Sm,_So,_So,_So,_So,_So,_Sm,_Sm,_So,_So,_So,_So,_Sm,_So,_So,_Sm,_So,_So,_Sm,_So,_So,_So,_So,_So,_So,_So,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_So,_So,_Sm,_So,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm}, |
|
13359
|
|
|
|
|
|
|
{_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm}, |
|
13360
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_Ps,_Pe,_Ps,_Pe,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_So,_So,_So,_So,_So,_So,_So,_Ps,_Pe,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
|
13361
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No}, |
|
13362
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm}, |
|
13363
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
|
13364
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_Sm,_Sm,_Sm,_Ps,_Pe,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm}, |
|
13365
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
|
13366
|
|
|
|
|
|
|
{_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Ps,_Pe,_Ps,_Pe,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Ps,_Pe,_Sm,_Sm}, |
|
13367
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_So,_So,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
|
13368
|
|
|
|
|
|
|
{_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Ll,_Lu,_Lu,_Lu,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Lu,_Lu,_Ll,_Lu,_Ll,_Ll,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lm,_Lm,_Lu,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_So,_So,_So,_So,_So,_So,_Lu,_Ll,_Lu,_Ll,_Mn,_Mn,_Mn,_Lu,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po,_No,_Po,_Po}, |
|
13369
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Ll,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lm,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn}, |
|
13370
|
|
|
|
|
|
|
{_Po,_Po,_Pi,_Pf,_Pi,_Pf,_Po,_Po,_Po,_Pi,_Pf,_Po,_Pi,_Pf,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Pd,_Po,_Po,_Pd,_Po,_Pi,_Pf,_Po,_Po,_Pi,_Pf,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Po,_Po,_Po,_Po,_Po,_Lm,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Pd,_Pd,_Po,_Po,_Po,_Po,_Pd,_Po,_Ps,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_So,_So,_Po,_Po,_Po,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Pd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13371
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn}, |
|
13372
|
|
|
|
|
|
|
{_Zs,_Po,_Po,_Po,_So,_Lm,_Lo,_Nl,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_So,_So,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Pd,_Ps,_Pe,_Pe,_So,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Pd,_Lm,_Lm,_Lm,_Lm,_Lm,_So,_So,_Nl,_Nl,_Nl,_Lm,_Lo,_Po,_So,_So,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Mn,_Sk,_Sk,_Lm,_Lm,_Lo,_Pd,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Lm,_Lm,_Lm,_Lo}, |
|
13373
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_So,_So,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
|
13374
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_No,_No,_No,_No,_No,_No,_No,_No,_So,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
|
13375
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
|
13376
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
|
13377
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Po,_Po}, |
|
13378
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Po,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lo,_Mn,_Me,_Me,_Me,_Po,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Lm,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lm,_Lm,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13379
|
|
|
|
|
|
|
{_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Sk,_Sk,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lm,_Sk,_Sk,_Lu,_Ll,_Lu,_Ll,_Lo,_Lu,_Ll,_Lu,_Ll,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Lu,_Lu,_Ll,_Lu,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Ll,_Cn,_Ll,_Cn,_Ll,_Lu,_Ll,_Lu,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lm,_Lm,_Lm,_Lu,_Ll,_Lo,_Lm,_Lm,_Ll,_Lo,_Lo,_Lo,_Lo,_Lo}, |
|
13380
|
|
|
|
|
|
|
{_Lo,_Lo,_Mn,_Lo,_Lo,_Lo,_Mn,_Lo,_Lo,_Lo,_Lo,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mn,_Mn,_Mc,_So,_So,_So,_So,_Mn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_So,_So,_Sc,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mc,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Po,_Po,_Lo,_Po,_Lo,_Lo,_Mn}, |
|
13381
|
|
|
|
|
|
|
{_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Mc,_Mc,_Mc,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Lm,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn}, |
|
13382
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Po,_Po,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_So,_So,_So,_Lo,_Mc,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Lo,_Mn,_Mn,_Mn,_Lo,_Lo,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Lo,_Mn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lm,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mn,_Mn,_Mc,_Mc,_Po,_Po,_Lo,_Lm,_Lm,_Mc,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13383
|
|
|
|
|
|
|
{_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Sk,_Lm,_Lm,_Lm,_Lm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lm,_Sk,_Sk,_Cn,_Cn,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mn,_Mc,_Mc,_Mn,_Mc,_Mc,_Po,_Mc,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13384
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn}, |
|
13385
|
|
|
|
|
|
|
{_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs}, |
|
13386
|
|
|
|
|
|
|
{_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co}, |
|
13387
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13388
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Sm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
|
13389
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Pe,_Ps,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Sc,_So,_So,_So}, |
|
13390
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Ps,_Pe,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Pd,_Pd,_Pc,_Pc,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Po,_Po,_Ps,_Pe,_Po,_Po,_Po,_Po,_Pc,_Pc,_Pc,_Po,_Po,_Po,_Cn,_Po,_Po,_Po,_Po,_Pd,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Po,_Po,_Po,_Sm,_Pd,_Sm,_Sm,_Sm,_Cn,_Po,_Sc,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cf}, |
|
13391
|
|
|
|
|
|
|
{_Cn,_Po,_Po,_Po,_Sc,_Po,_Po,_Po,_Ps,_Pe,_Po,_Sm,_Po,_Pd,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Sm,_Sm,_Sm,_Po,_Po,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ps,_Po,_Pe,_Sk,_Pc,_Sk,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ps,_Sm,_Pe,_Sm,_Ps,_Pe,_Po,_Ps,_Pe,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Sc,_Sc,_Sm,_Sk,_So,_Sc,_Sc,_Cn,_So,_Sm,_Sm,_Sm,_Sm,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cf,_Cf,_Cf,_So,_So,_Cn,_Cn}, |
|
13392
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13393
|
|
|
|
|
|
|
{_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_No,_No,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Mn,_Cn,_Cn}, |
|
13394
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn}, |
|
13395
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Nl,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Nl,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Nl,_Nl,_Nl,_Nl,_Nl,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13396
|
|
|
|
|
|
|
{_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Cn,_Cn}, |
|
13397
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Lu,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Ll,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13398
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Cn,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Cn,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13399
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Po,_No,_No,_No,_No,_No,_No,_No,_No,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_So,_So,_No,_No,_No,_No,_No,_No,_No,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No}, |
|
13400
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_No,_No,_Lo,_Lo,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No}, |
|
13401
|
|
|
|
|
|
|
{_Lo,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Mn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_No,_No,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_So,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13402
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13403
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No}, |
|
13404
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13405
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Mn,_Mn,_Pd,_Cn,_Cn,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn}, |
|
13406
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_No,_No,_No,_No,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13407
|
|
|
|
|
|
|
{_Mc,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Mn,_Lo,_Lo,_Mn,_Mn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Po,_Po,_Cf,_Po,_Po,_Po,_Po,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cf,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13408
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Po,_Po,_Lo,_Mc,_Mc,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Po,_Po,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Lo,_Lo,_Lo,_Lo,_Po,_Po,_Po,_Po,_Mn,_Mn,_Mn,_Mn,_Po,_Mc,_Mn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Po,_Lo,_Po,_Po,_Po,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13409
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mc,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Mn,_Lo,_Lo,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13410
|
|
|
|
|
|
|
{_Mn,_Mn,_Mc,_Mc,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Mn,_Mn,_Lo,_Mc,_Mc,_Mn,_Mc,_Mc,_Mc,_Mc,_Cn,_Cn,_Mc,_Mc,_Cn,_Cn,_Mc,_Mc,_Mc,_Cn,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13411
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Mn,_Mc,_Mn,_Lo,_Lo,_Lo,_Lo,_Po,_Po,_Po,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Cn,_Po,_Mn,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mc,_Mc,_Mc,_Mc,_Mn,_Mn,_Mc,_Mn,_Mn,_Lo,_Lo,_Po,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13412
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Mc,_Mc,_Mc,_Mc,_Mn,_Mn,_Mc,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13413
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mc,_Mn,_Mn,_Po,_Po,_Po,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mn,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Lo,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13414
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_No,_No,_Po,_Po,_Po,_So,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13415
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo}, |
|
13416
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Cn,_Mc,_Mc,_Cn,_Cn,_Mn,_Mn,_Mc,_Mn,_Lo,_Mc,_Lo,_Mc,_Mn,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Mn,_Mn,_Mc,_Mc,_Mc,_Mc,_Mn,_Lo,_Po,_Lo,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13417
|
|
|
|
|
|
|
{_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Lo,_Mn,_Mn,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Po,_Po,_Po,_Lo,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13418
|
|
|
|
|
|
|
{_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13419
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Lo,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Mc,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13420
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Mn,_Cn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mc,_Mc,_Cn,_Mn,_Mn,_Cn,_Mc,_Mc,_Mn,_Mc,_Mn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13421
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mc,_Mc,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13422
|
|
|
|
|
|
|
{_Mn,_Mn,_Lo,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Mc,_Mc,_Mn,_Mc,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_Sc,_Sc,_Sc,_Sc,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po}, |
|
13423
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13424
|
|
|
|
|
|
|
{_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Cn,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
|
13425
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13426
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13427
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13428
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13429
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13430
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13431
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_So,_So,_So,_So,_Lm,_Lm,_Lm,_Lm,_Po,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_No,_No,_No,_No,_No,_No,_No,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13432
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13433
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Mn,_Lo,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lm,_Lm,_Po,_Lm,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mc,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13434
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13435
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13436
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13437
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lm,_Lm,_Lm,_Lm,_Cn,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Cn,_Lm,_Lm,_Cn}, |
|
13438
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
|
13439
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn}, |
|
13440
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_So,_Mn,_Mn,_Po,_Cf,_Cf,_Cf,_Cf,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13441
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13442
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13443
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Mc,_Mc,_Mn,_Mn,_Mn,_So,_So,_So,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_So,_So,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Mn,_Mn,_Mn,_Mn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13444
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Mn,_Mn,_Mn,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13445
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13446
|
|
|
|
|
|
|
{_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Cn,_Lu,_Lu,_Cn,_Cn,_Lu,_Cn,_Cn,_Lu,_Lu,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Cn,_Ll,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll}, |
|
13447
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Cn,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll}, |
|
13448
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Sm,_Ll,_Ll,_Ll,_Ll}, |
|
13449
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Ll,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd}, |
|
13450
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_So,_So,_So,_So,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_So,_So,_So,_So,_So,_So,_So,_So,_Mn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Mn,_So,_So,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13451
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lo,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13452
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13453
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Lo,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13454
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Sc}, |
|
13455
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Mn,_Mn,_Mn,_Mn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13456
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn}, |
|
13457
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13458
|
|
|
|
|
|
|
{_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lm,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13459
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_No,_No,_No,_Sc,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13460
|
|
|
|
|
|
|
{_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13461
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Cn,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Lo,_Cn,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Cn,_Cn,_Lo,_Cn,_Lo,_Cn,_Lo,_Cn,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Sm,_Sm,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13462
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13463
|
|
|
|
|
|
|
{_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
|
13464
|
|
|
|
|
|
|
{_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13465
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sk,_Sk,_Sk,_Sk,_Sk}, |
|
13466
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn}, |
|
13467
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13468
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13469
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13470
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13471
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13472
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
|
13473
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
|
13474
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
|
13475
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13476
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13477
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
|
13478
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13479
|
|
|
|
|
|
|
{_Cn,_Cf,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13480
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
|
13481
|
|
|
|
|
|
|
{_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Cn,_Cn} |
|
13482
|
|
|
|
|
|
|
}; |
|
13483
|
|
|
|
|
|
|
|
|
13484
|
|
|
|
|
|
|
const uint8_t unicode::othercase_index[unicode::CHARS >> 8] = { |
|
13485
|
|
|
|
|
|
|
0,1,2,3,4,5,6,6,6,6,6,6,6,6,6,6,7,6,6,8,6,6,6,6,6,6,6,6,9,10,11,12,6,13,6,6,14,6,6,6,6,6,6,6,15,16,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,17,18,6,6,6,19,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,20,6,6,6,6,21,22,6,6,6,6,6,6,23,6,6,6,6,6,6,6,6,6,6,6,24,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,25,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,26,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, |
|
13486
|
|
|
|
|
|
|
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, |
|
13487
|
|
|
|
|
|
|
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, |
|
13488
|
|
|
|
|
|
|
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, |
|
13489
|
|
|
|
|
|
|
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6 |
|
13490
|
|
|
|
|
|
|
}; |
|
13491
|
|
|
|
|
|
|
|
|
13492
|
|
|
|
|
|
|
const char32_t unicode::othercase_block[][256] = { |
|
13493
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24833,25089,25345,25601,25857,26113,26369,26625,26881,27137,27393,27649,27905,28161,28417,28673,28929,29185,29441,29697,29953,30209,30465,30721,30977,31233,0,0,0,0,0,0,16642,16898,17154,17410,17666,17922,18178,18434,18690,18946,19202,19458,19714,19970,20226,20482,20738,20994,21250,21506,21762,22018,22274,22530,22786,23042,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,236546,0,0,0,0,0,0,0,0,0,0,57345,57601,57857,58113,58369,58625,58881,59137,59393,59649,59905,60161,60417,60673,60929,61185,61441,61697,61953,62209,62465,62721,62977,0,63489,63745,64001,64257,64513,64769,65025,0,49154,49410,49666,49922,50178,50434,50690,50946,51202,51458,51714,51970,52226,52482,52738,52994,53250,53506,53762,54018,54274,54530,54786,0,55298,55554,55810,56066,56322,56578,56834,96258}, |
|
13494
|
|
|
|
|
|
|
{65793,65538,66305,66050,66817,66562,67329,67074,67841,67586,68353,68098,68865,68610,69377,69122,69889,69634,70401,70146,70913,70658,71425,71170,71937,71682,72449,72194,72961,72706,73473,73218,73985,73730,74497,74242,75009,74754,75521,75266,76033,75778,76545,76290,77057,76802,77569,77314,26881,18690,78593,78338,79105,78850,79617,79362,0,80385,80130,80897,80642,81409,81154,81921,81666,82433,82178,82945,82690,83457,83202,83969,83714,0,84737,84482,85249,84994,85761,85506,86273,86018,86785,86530,87297,87042,87809,87554,88321,88066,88833,88578,89345,89090,89857,89602,90369,90114,90881,90626,91393,91138,91905,91650,92417,92162,92929,92674,93441,93186,93953,93698,94465,94210,94977,94722,95489,95234,96001,95746,65281,96769,96514,97281,97026,97793,97538,21250,148226,152321,99073,98818,99585,99330,152577,100353,100098,153089,153345,101377,101122,0,122113,153857,154369,102913,102658,155649,156417,128514,157953,157697,104705,104450,146690,0,159489,160257,139266,161025,106753,106498,107265,107010,107777,107522,163841,108545,108290,164609,0,0,109825,109570,165889,110593,110338,166401,166657,111617,111362,112129,111874,168449,112897,112642,0,0,113921,113666,0,128770,0,0,0,0,115974,116228,115717,116742,116996,116485,117510,117764,117253,118273,118018,118785,118530,119297,119042,119809,119554,120321,120066,120833,120578,121345,121090,121857,121602,101890,122625,122370,123137,122882,123649,123394,124161,123906,124673,124418,125185,124930,125697,125442,126209,125954,126721,126466,0,127494,127748,127237,128257,128002,103681,114433,129281,129026,129793,129538,130305,130050,130817,130562}, |
|
13495
|
|
|
|
|
|
|
{131329,131074,131841,131586,132353,132098,132865,132610,133377,133122,133889,133634,134401,134146,134913,134658,135425,135170,135937,135682,136449,136194,136961,136706,137473,137218,137985,137730,138497,138242,139009,138754,105985,0,140033,139778,140545,140290,141057,140802,141569,141314,142081,141826,142593,142338,143105,142850,143617,143362,144129,143874,0,0,0,0,0,0,2909441,146433,146178,104961,2909697,2915842,2916098,147969,147714,98305,166145,166913,149249,148994,149761,149506,150273,150018,150785,150530,151297,151042,2912002,2911490,2912258,98562,99842,0,100610,100866,0,102146,0,102402,10988290,0,0,0,103170,10988546,0,103426,0,10980610,10988034,0,104194,103938,10989058,2908674,10988802,0,0,105474,0,2911746,105730,0,0,106242,0,0,0,0,0,0,0,2909186,0,0,108034,0,10994946,108802,0,0,0,10989826,110082,148482,110850,111106,148738,0,0,0,0,0,112386,0,0,0,0,0,0,0,0,0,0,10990082,10989570,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
|
13496
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,235778,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,225537,225282,226049,225794,0,0,227073,226818,0,0,0,261378,261634,261890,0,258817,0,0,0,0,0,0,240641,0,240897,241153,241409,0,248833,0,249089,249345,0,241921,242177,242433,242689,242945,243201,243457,243713,243969,244225,244481,244737,244993,245249,245505,245761,246017,0,246529,246785,247041,247297,247553,247809,248065,248321,248577,230914,231426,231682,231938,0,233730,233986,234242,234498,234754,235010,235266,235522,235778,236034,236290,236546,236802,237058,237314,237570,237826,238338,238338,238594,238850,239106,239362,239618,239874,240130,240386,232450,232962,233218,251649,233986,235522,0,0,0,239106,237570,249602,252161,251906,252673,252418,253185,252930,253697,253442,254209,253954,254721,254466,255233,254978,255745,255490,256257,256002,256769,256514,257281,257026,257793,257538,236034,237826,260354,229122,243713,234754,0,260097,259842,258561,260865,260610,0,228097,228353,228609}, |
|
13497
|
|
|
|
|
|
|
{282625,282881,283137,283393,283649,283905,284161,284417,284673,284929,285185,285441,285697,285953,286209,286465,274433,274689,274945,275201,275457,275713,275969,276225,276481,276737,276993,277249,277505,277761,278017,278273,278529,278785,279041,279297,279553,279809,280065,280321,280577,280833,281089,281345,281601,281857,282113,282369,266242,266498,266754,267010,267266,267522,267778,268034,268290,268546,268802,269058,269314,269570,269826,270082,270338,270594,270850,271106,271362,271618,271874,272130,272386,272642,272898,273154,273410,273666,273922,274178,262146,262402,262658,262914,263170,263426,263682,263938,264194,264450,264706,264962,265218,265474,265730,265986,286977,286722,287489,287234,288001,287746,288513,288258,289025,288770,289537,289282,290049,289794,290561,290306,291073,290818,291585,291330,292097,291842,292609,292354,293121,292866,293633,293378,294145,293890,294657,294402,295169,294914,0,0,0,0,0,0,0,0,297729,297474,298241,297986,298753,298498,299265,299010,299777,299522,300289,300034,300801,300546,301313,301058,301825,301570,302337,302082,302849,302594,303361,303106,303873,303618,304385,304130,304897,304642,305409,305154,305921,305666,306433,306178,306945,306690,307457,307202,307969,307714,308481,308226,308993,308738,309505,309250,310017,309762,310529,310274,311041,310786,315137,311809,311554,312321,312066,312833,312578,313345,313090,313857,313602,314369,314114,314881,314626,311298,315649,315394,316161,315906,316673,316418,317185,316930,317697,317442,318209,317954,318721,318466,319233,318978,319745,319490,320257,320002,320769,320514,321281,321026,321793,321538,322305,322050,322817,322562,323329,323074,323841,323586,324353,324098,324865,324610,325377,325122,325889,325634,326401,326146,326913,326658,327425,327170}, |
|
13498
|
|
|
|
|
|
|
{327937,327682,328449,328194,328961,328706,329473,329218,329985,329730,330497,330242,331009,330754,331521,331266,332033,331778,332545,332290,333057,332802,333569,333314,334081,333826,334593,334338,335105,334850,335617,335362,336129,335874,336641,336386,337153,336898,337665,337410,338177,337922,338689,338434,339201,338946,339713,339458,0,352513,352769,353025,353281,353537,353793,354049,354305,354561,354817,355073,355329,355585,355841,356097,356353,356609,356865,357121,357377,357633,357889,358145,358401,358657,358913,359169,359425,359681,359937,360193,360449,360705,360961,361217,361473,361729,361985,0,0,0,0,0,0,0,0,0,0,340226,340482,340738,340994,341250,341506,341762,342018,342274,342530,342786,343042,343298,343554,343810,344066,344322,344578,344834,345090,345346,345602,345858,346114,346370,346626,346882,347138,347394,347650,347906,348162,348418,348674,348930,349186,349442,349698,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
|
13499
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
|
13500
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2949121,2949377,2949633,2949889,2950145,2950401,2950657,2950913,2951169,2951425,2951681,2951937,2952193,2952449,2952705,2952961,2953217,2953473,2953729,2953985,2954241,2954497,2954753,2955009,2955265,2955521,2955777,2956033,2956289,2956545,2956801,2957057,2957313,2957569,2957825,2958081,2958337,2958593,0,2959105,0,0,0,0,0,2960641,0,0,1871875,1872131,1872387,1872643,1872899,1873155,1873411,1873667,1873923,1874179,1874435,1874691,1874947,1875203,1875459,1875715,1875971,1876227,1876483,1876739,1876995,1877251,1877507,1877763,1878019,1878275,1878531,1878787,1879043,1879299,1879555,1879811,1880067,1880323,1880579,1880835,1881091,1881347,1881603,1881859,1882115,1882371,1882627,0,0,1883395,1883651,1883907}, |
|
13501
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11235329,11235585,11235841,11236097,11236353,11236609,11236865,11237121,11237377,11237633,11237889,11238145,11238401,11238657,11238913,11239169,11239425,11239681,11239937,11240193,11240449,11240705,11240961,11241217,11241473,11241729,11241985,11242241,11242497,11242753,11243009,11243265,11243521,11243777,11244033,11244289,11244545,11244801,11245057,11245313,11245569,11245825,11246081,11246337,11246593,11246849,11247105,11247361,11247617,11247873,11248129,11248385,11248641,11248897,11249153,11249409,11249665,11249921,11250177,11250433,11250689,11250945,11251201,11251457,11251713,11251969,11252225,11252481,11252737,11252993,11253249,11253505,11253761,11254017,11254273,11254529,11254785,11255041,11255297,11255553,1308673,1308929,1309185,1309441,1309697,1309953,0,0,1306626,1306882,1307138,1307394,1307650,1307906,0,0}, |
|
13502
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,266754,267266,269826,270594,270850,270850,272898,287234,10897922,0,0,0,0,0,0,0,1101825,1102081,1102337,1102593,1102849,1103105,1103361,1103617,1103873,1104129,1104385,1104641,1104897,1105153,1105409,1105665,1105921,1106177,1106433,1106689,1106945,1107201,1107457,1107713,1107969,1108225,1108481,1108737,1108993,1109249,1109505,1109761,1110017,1110273,1110529,1110785,1111041,1111297,1111553,1111809,1112065,1112321,1112577,0,0,1113345,1113601,1113857,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
|
13503
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10976514,0,0,0,2908930,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10995202,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
|
13504
|
|
|
|
|
|
|
{1966337,1966082,1966849,1966594,1967361,1967106,1967873,1967618,1968385,1968130,1968897,1968642,1969409,1969154,1969921,1969666,1970433,1970178,1970945,1970690,1971457,1971202,1971969,1971714,1972481,1972226,1972993,1972738,1973505,1973250,1974017,1973762,1974529,1974274,1975041,1974786,1975553,1975298,1976065,1975810,1976577,1976322,1977089,1976834,1977601,1977346,1978113,1977858,1978625,1978370,1979137,1978882,1979649,1979394,1980161,1979906,1980673,1980418,1981185,1980930,1981697,1981442,1982209,1981954,1982721,1982466,1983233,1982978,1983745,1983490,1984257,1984002,1984769,1984514,1985281,1985026,1985793,1985538,1986305,1986050,1986817,1986562,1987329,1987074,1987841,1987586,1988353,1988098,1988865,1988610,1989377,1989122,1989889,1989634,1990401,1990146,1990913,1990658,1991425,1991170,1991937,1991682,1992449,1992194,1992961,1992706,1993473,1993218,1993985,1993730,1994497,1994242,1995009,1994754,1995521,1995266,1996033,1995778,1996545,1996290,1997057,1996802,1997569,1997314,1998081,1997826,1998593,1998338,1999105,1998850,1999617,1999362,2000129,1999874,2000641,2000386,2001153,2000898,2001665,2001410,2002177,2001922,2002689,2002434,2003201,2002946,2003713,2003458,2004225,2003970,0,0,0,0,0,1990658,0,0,57089,0,2007297,2007042,2007809,2007554,2008321,2008066,2008833,2008578,2009345,2009090,2009857,2009602,2010369,2010114,2010881,2010626,2011393,2011138,2011905,2011650,2012417,2012162,2012929,2012674,2013441,2013186,2013953,2013698,2014465,2014210,2014977,2014722,2015489,2015234,2016001,2015746,2016513,2016258,2017025,2016770,2017537,2017282,2018049,2017794,2018561,2018306,2019073,2018818,2019585,2019330,2020097,2019842,2020609,2020354,2021121,2020866,2021633,2021378,2022145,2021890,2022657,2022402,2023169,2022914,2023681,2023426,2024193,2023938,2024705,2024450,2025217,2024962,2025729,2025474,2026241,2025986,2026753,2026498,2027265,2027010,2027777,2027522,2028289,2028034,2028801,2028546,2029313,2029058,2029825,2029570,2030337,2030082,2030849,2030594,2031361, |
|
13505
|
|
|
|
|
|
|
2031106}, |
|
13506
|
|
|
|
|
|
|
{2033666,2033922,2034178,2034434,2034690,2034946,2035202,2035458,2031617,2031873,2032129,2032385,2032641,2032897,2033153,2033409,2037762,2038018,2038274,2038530,2038786,2039042,0,0,2035713,2035969,2036225,2036481,2036737,2036993,0,0,2041858,2042114,2042370,2042626,2042882,2043138,2043394,2043650,2039809,2040065,2040321,2040577,2040833,2041089,2041345,2041601,2045954,2046210,2046466,2046722,2046978,2047234,2047490,2047746,2043905,2044161,2044417,2044673,2044929,2045185,2045441,2045697,2050050,2050306,2050562,2050818,2051074,2051330,0,0,2048001,2048257,2048513,2048769,2049025,2049281,0,0,0,2054402,0,2054914,0,2055426,0,2055938,0,2052353,0,2052865,0,2053377,0,2053889,2058242,2058498,2058754,2059010,2059266,2059522,2059778,2060034,2056193,2056449,2056705,2056961,2057217,2057473,2057729,2057985,2079234,2079490,2082818,2083074,2083330,2083586,2087426,2087682,2095106,2095362,2091522,2091778,2095618,2095874,0,0,2066434,2066690,2066946,2067202,2067458,2067714,2067970,2068226,2064385,2064641,2064897,2065153,2065409,2065665,2065921,2066177,2070530,2070786,2071042,2071298,2071554,2071810,2072066,2072322,2068481,2068737,2068993,2069249,2069505,2069761,2070017,2070273,2074626,2074882,2075138,2075394,2075650,2075906,2076162,2076418,2072577,2072833,2073089,2073345,2073601,2073857,2074113,2074369,2078722,2078978,0,2079746,0,0,0,0,2076673,2076929,2060289,2060545,2077441,0,235778,0,0,0,0,2083842,0,0,0,0,2060801,2061057,2061313,2061569,2081537,0,0,0,2086914,2087170,0,0,0,0,0,0,2084865,2085121,2061825,2062081,0,0,0,0,2091010,2091266,0,0,0,2092034,0,0,2088961,2089217,2062849,2063105,2090241,0,0,0,0,0,0,2096130,0,0,0,0,2062337,2062593,2063361,2063617,2093825,0,0,0}, |
|
13507
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,248065,0,0,0,27393,58625,0,0,0,0,0,0,2182657,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2175490,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2191361,2191617,2191873,2192129,2192385,2192641,2192897,2193153,2193409,2193665,2193921,2194177,2194433,2194689,2194945,2195201,2187266,2187522,2187778,2188034,2188290,2188546,2188802,2189058,2189314,2189570,2189826,2190082,2190338,2190594,2190850,2191106,0,0,0,2196481,2196226,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
|
13508
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2412545,2412801,2413057,2413313,2413569,2413825,2414081,2414337,2414593,2414849,2415105,2415361,2415617,2415873,2416129,2416385,2416641,2416897,2417153,2417409,2417665,2417921,2418177,2418433,2418689,2418945,2405890,2406146,2406402,2406658,2406914,2407170,2407426,2407682,2407938,2408194,2408450,2408706,2408962,2409218,2409474,2409730,2409986,2410242,2410498,2410754,2411010,2411266,2411522,2411778,2412034,2412290,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
|
13509
|
|
|
|
|
|
|
{2895873,2896129,2896385,2896641,2896897,2897153,2897409,2897665,2897921,2898177,2898433,2898689,2898945,2899201,2899457,2899713,2899969,2900225,2900481,2900737,2900993,2901249,2901505,2901761,2902017,2902273,2902529,2902785,2903041,2903297,2903553,2903809,2904065,2904321,2904577,2904833,2905089,2905345,2905601,2905857,2906113,2906369,2906625,2906881,2907137,2907393,2907649,2907905,2883586,2883842,2884098,2884354,2884610,2884866,2885122,2885378,2885634,2885890,2886146,2886402,2886658,2886914,2887170,2887426,2887682,2887938,2888194,2888450,2888706,2888962,2889218,2889474,2889730,2889986,2890242,2890498,2890754,2891010,2891266,2891522,2891778,2892034,2892290,2892546,2892802,2893058,2893314,2893570,2893826,2894082,2894338,2894594,2894850,2895106,2895362,2895618,2908417,2908162,158465,1932545,163073,145922,146946,2910209,2909954,2910721,2910466,2911233,2910978,151809,160001,151553,152065,0,2913025,2912770,0,2913793,2913538,0,0,0,0,0,0,0,147201,147457,2916609,2916354,2917121,2916866,2917633,2917378,2918145,2917890,2918657,2918402,2919169,2918914,2919681,2919426,2920193,2919938,2920705,2920450,2921217,2920962,2921729,2921474,2922241,2921986,2922753,2922498,2923265,2923010,2923777,2923522,2924289,2924034,2924801,2924546,2925313,2925058,2925825,2925570,2926337,2926082,2926849,2926594,2927361,2927106,2927873,2927618,2928385,2928130,2928897,2928642,2929409,2929154,2929921,2929666,2930433,2930178,2930945,2930690,2931457,2931202,2931969,2931714,2932481,2932226,2932993,2932738,2933505,2933250,2934017,2933762,2934529,2934274,2935041,2934786,2935553,2935298,2936065,2935810,2936577,2936322,2937089,2936834,2937601,2937346,2938113,2937858,2938625,2938370,2939137,2938882,2939649,2939394,2940161,2939906,2940673,2940418,2941185,2940930,2941697,2941442,0,0,0,0,0,0,0,2944001,2943746,2944513,2944258,0,0,0,2945793,2945538,0,0,0,0,0,0,0,0,0,0,0,0}, |
|
13510
|
|
|
|
|
|
|
{1089538,1089794,1090050,1090306,1090562,1090818,1091074,1091330,1091586,1091842,1092098,1092354,1092610,1092866,1093122,1093378,1093634,1093890,1094146,1094402,1094658,1094914,1095170,1095426,1095682,1095938,1096194,1096450,1096706,1096962,1097218,1097474,1097730,1097986,1098242,1098498,1098754,1099010,0,1099522,0,0,0,0,0,1101058,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
|
13511
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10895617,10895362,10896129,10895874,10896641,10896386,10897153,10896898,10897665,10897410,10898177,10897922,10898689,10898434,10899201,10898946,10899713,10899458,10900225,10899970,10900737,10900482,10901249,10900994,10901761,10901506,10902273,10902018,10902785,10902530,10903297,10903042,10903809,10903554,10904321,10904066,10904833,10904578,10905345,10905090,10905857,10905602,10906369,10906114,10906881,10906626,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10912001,10911746,10912513,10912258,10913025,10912770,10913537,10913282,10914049,10913794,10914561,10914306,10915073,10914818,10915585,10915330,10916097,10915842,10916609,10916354,10917121,10916866,10917633,10917378,10918145,10917890,10918657,10918402,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
|
13512
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10953473,10953218,10953985,10953730,10954497,10954242,10955009,10954754,10955521,10955266,10956033,10955778,10956545,10956290,0,0,10957569,10957314,10958081,10957826,10958593,10958338,10959105,10958850,10959617,10959362,10960129,10959874,10960641,10960386,10961153,10960898,10961665,10961410,10962177,10961922,10962689,10962434,10963201,10962946,10963713,10963458,10964225,10963970,10964737,10964482,10965249,10964994,10965761,10965506,10966273,10966018,10966785,10966530,10967297,10967042,10967809,10967554,10968321,10968066,10968833,10968578,10969345,10969090,10969857,10969602,10970369,10970114,10970881,10970626,10971393,10971138,10971905,10971650,10972417,10972162,10972929,10972674,0,0,0,0,0,0,0,0,0,10975745,10975490,10976257,10976002,1931521,10977025,10976770,10977537,10977282,10978049,10977794,10978561,10978306,10979073,10978818,0,0,0,10980353,10980098,156929,0,0,10981633,10981378,10982145,10981890,10994690,0,10983169,10982914,10983681,10983426,10984193,10983938,10984705,10984450,10985217,10984962,10985729,10985474,10986241,10985986,10986753,10986498,10987265,10987010,10987777,10987522,157185,154625,155905,158721,158209,0,171521,165633,171265,11227905,10990849,10990594,10991361,10991106,10991873,10991618,10992385,10992130,10992897,10992642,10993409,10993154,10993921,10993666,10994433,10994178,10982401,164353,1936897,10995713,10995458,10996225,10995970,0,0,0,0,0,10998017,10997762,0,0,0,0,10999553,10999298,11000065,10999810,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11007489,11007234,0,0,0,0,0,0,0,0,0}, |
|
13513
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10990338,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1286146,1286402,1286658,1286914,1287170,1287426,1287682,1287938,1288194,1288450,1288706,1288962,1289218,1289474,1289730,1289986,1290242,1290498,1290754,1291010,1291266,1291522,1291778,1292034,1292290,1292546,1292802,1293058,1293314,1293570,1293826,1294082,1294338,1294594,1294850,1295106,1295362,1295618,1295874,1296130,1296386,1296642,1296898,1297154,1297410,1297666,1297922,1298178,1298434,1298690,1298946,1299202,1299458,1299714,1299970,1300226,1300482,1300738,1300994,1301250,1301506,1301762,1302018,1302274,1302530,1302786,1303042,1303298,1303554,1303810,1304066,1304322,1304578,1304834,1305090,1305346,1305602,1305858,1306114,1306370,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
|
13514
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16728321,16728577,16728833,16729089,16729345,16729601,16729857,16730113,16730369,16730625,16730881,16731137,16731393,16731649,16731905,16732161,16732417,16732673,16732929,16733185,16733441,16733697,16733953,16734209,16734465,16734721,0,0,0,0,0,0,16720130,16720386,16720642,16720898,16721154,16721410,16721666,16721922,16722178,16722434,16722690,16722946,16723202,16723458,16723714,16723970,16724226,16724482,16724738,16724994,16725250,16725506,16725762,16726018,16726274,16726530,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
|
13515
|
|
|
|
|
|
|
{17049601,17049857,17050113,17050369,17050625,17050881,17051137,17051393,17051649,17051905,17052161,17052417,17052673,17052929,17053185,17053441,17053697,17053953,17054209,17054465,17054721,17054977,17055233,17055489,17055745,17056001,17056257,17056513,17056769,17057025,17057281,17057537,17057793,17058049,17058305,17058561,17058817,17059073,17059329,17059585,17039362,17039618,17039874,17040130,17040386,17040642,17040898,17041154,17041410,17041666,17041922,17042178,17042434,17042690,17042946,17043202,17043458,17043714,17043970,17044226,17044482,17044738,17044994,17045250,17045506,17045762,17046018,17046274,17046530,17046786,17047042,17047298,17047554,17047810,17048066,17048322,17048578,17048834,17049090,17049346,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17094657,17094913,17095169,17095425,17095681,17095937,17096193,17096449,17096705,17096961,17097217,17097473,17097729,17097985,17098241,17098497,17098753,17099009,17099265,17099521,17099777,17100033,17100289,17100545,17100801,17101057,17101313,17101569,17101825,17102081,17102337,17102593,17102849,17103105,17103361,17103617,0,0,0,0,17084418,17084674,17084930,17085186,17085442,17085698,17085954,17086210,17086466,17086722,17086978,17087234,17087490,17087746,17088002,17088258,17088514,17088770,17089026,17089282,17089538,17089794,17090050,17090306,17090562,17090818,17091074,17091330,17091586,17091842,17092098,17092354,17092610,17092866,17093122,17093378,0,0,0,0}, |
|
13516
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17143553,17143809,17144065,17144321,17144577,17144833,17145089,17145345,17145601,17145857,17146113,0,17146625,17146881,17147137,17147393,17147649,17147905,17148161,17148417,17148673,17148929,17149185,17149441,17149697,17149953,17150209,0,17150721,17150977,17151233,17151489,17151745,17152001,17152257,0,17152769,17153025,0,17133570,17133826,17134082,17134338,17134594,17134850,17135106,17135362,17135618,17135874,17136130,0,17136642,17136898,17137154,17137410,17137666,17137922,17138178,17138434,17138690,17138946,17139202,17139458,17139714,17139970,17140226,0,17140738,17140994,17141250,17141506,17141762,17142018,17142274,0,17142786,17143042,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
|
13517
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17612801,17613057,17613313,17613569,17613825,17614081,17614337,17614593,17614849,17615105,17615361,17615617,17615873,17616129,17616385,17616641,17616897,17617153,17617409,17617665,17617921,17618177,17618433,17618689,17618945,17619201,17619457,17619713,17619969,17620225,17620481,17620737,17620993,17621249,17621505,17621761,17622017,17622273,17622529,17622785,17623041,17623297,17623553,17623809,17624065,17624321,17624577,17624833,17625089,17625345,17625601,0,0,0,0,0,0,0,0,0,0,0,0,0,17596418,17596674,17596930,17597186,17597442,17597698,17597954,17598210,17598466,17598722,17598978,17599234,17599490,17599746,17600002,17600258,17600514,17600770,17601026,17601282,17601538,17601794,17602050,17602306,17602562,17602818,17603074,17603330,17603586,17603842,17604098,17604354,17604610,17604866,17605122,17605378,17605634,17605890,17606146,17606402,17606658,17606914,17607170,17607426,17607682,17607938,17608194,17608450,17608706,17608962,17609218,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
|
13518
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18399233,18399489,18399745,18400001,18400257,18400513,18400769,18401025,18401281,18401537,18401793,18402049,18402305,18402561,18402817,18403073,18403329,18403585,18403841,18404097,18404353,18404609,18404865,18405121,18405377,18405633,18405889,18406145,18406401,18406657,18406913,18407169,18391042,18391298,18391554,18391810,18392066,18392322,18392578,18392834,18393090,18393346,18393602,18393858,18394114,18394370,18394626,18394882,18395138,18395394,18395650,18395906,18396162,18396418,18396674,18396930,18397186,18397442,18397698,18397954,18398210,18398466,18398722,18398978,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
|
13519
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24010753,24011009,24011265,24011521,24011777,24012033,24012289,24012545,24012801,24013057,24013313,24013569,24013825,24014081,24014337,24014593,24014849,24015105,24015361,24015617,24015873,24016129,24016385,24016641,24016897,24017153,24017409,24017665,24017921,24018177,24018433,24018689,24002562,24002818,24003074,24003330,24003586,24003842,24004098,24004354,24004610,24004866,24005122,24005378,24005634,24005890,24006146,24006402,24006658,24006914,24007170,24007426,24007682,24007938,24008194,24008450,24008706,24008962,24009218,24009474,24009730,24009986,24010242,24010498,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
|
13520
|
|
|
|
|
|
|
{32055809,32056065,32056321,32056577,32056833,32057089,32057345,32057601,32057857,32058113,32058369,32058625,32058881,32059137,32059393,32059649,32059905,32060161,32060417,32060673,32060929,32061185,32061441,32061697,32061953,32062209,32062465,32062721,32062977,32063233,32063489,32063745,32064001,32064257,32047106,32047362,32047618,32047874,32048130,32048386,32048642,32048898,32049154,32049410,32049666,32049922,32050178,32050434,32050690,32050946,32051202,32051458,32051714,32051970,32052226,32052482,32052738,32052994,32053250,32053506,32053762,32054018,32054274,32054530,32054786,32055042,32055298,32055554,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} |
|
13521
|
|
|
|
|
|
|
}; |
|
13522
|
|
|
|
|
|
|
|
|
13523
|
|
|
|
|
|
|
} // namespace unilib |
|
13524
|
|
|
|
|
|
|
|
|
13525
|
|
|
|
|
|
|
///////// |
|
13526
|
|
|
|
|
|
|
// File: unilib/utf8.cpp |
|
13527
|
|
|
|
|
|
|
///////// |
|
13528
|
|
|
|
|
|
|
|
|
13529
|
|
|
|
|
|
|
// This file is part of UniLib . |
|
13530
|
|
|
|
|
|
|
// |
|
13531
|
|
|
|
|
|
|
// Copyright 2014 Institute of Formal and Applied Linguistics, Faculty of |
|
13532
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
13533
|
|
|
|
|
|
|
// |
|
13534
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
13535
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
13536
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
13537
|
|
|
|
|
|
|
// |
|
13538
|
|
|
|
|
|
|
// UniLib version: 3.3.0 |
|
13539
|
|
|
|
|
|
|
// Unicode version: 15.0.0 |
|
13540
|
|
|
|
|
|
|
|
|
13541
|
|
|
|
|
|
|
namespace unilib { |
|
13542
|
|
|
|
|
|
|
|
|
13543
|
0
|
|
|
|
|
|
bool utf8::valid(const char* str) { |
|
13544
|
0
|
0
|
|
|
|
|
for (; *str; str++) |
|
13545
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) >= 0x80) { |
|
13546
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0xC0) return false; |
|
13547
|
0
|
0
|
|
|
|
|
else if (((unsigned char)*str) < 0xE0) { |
|
13548
|
0
|
0
|
|
|
|
|
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
|
0
|
|
|
|
|
|
|
13549
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF0) { |
|
13550
|
0
|
0
|
|
|
|
|
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
|
0
|
|
|
|
|
|
|
13551
|
0
|
0
|
|
|
|
|
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
|
0
|
|
|
|
|
|
|
13552
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF8) { |
|
13553
|
0
|
0
|
|
|
|
|
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
|
0
|
|
|
|
|
|
|
13554
|
0
|
0
|
|
|
|
|
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
|
0
|
|
|
|
|
|
|
13555
|
0
|
0
|
|
|
|
|
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
|
0
|
|
|
|
|
|
|
13556
|
|
|
|
|
|
|
} else return false; |
|
13557
|
|
|
|
|
|
|
} |
|
13558
|
|
|
|
|
|
|
return true; |
|
13559
|
|
|
|
|
|
|
} |
|
13560
|
|
|
|
|
|
|
|
|
13561
|
0
|
|
|
|
|
|
bool utf8::valid(const char* str, size_t len) { |
|
13562
|
0
|
0
|
|
|
|
|
for (; len > 0; str++, len--) |
|
13563
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) >= 0x80) { |
|
13564
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0xC0) return false; |
|
13565
|
0
|
0
|
|
|
|
|
else if (((unsigned char)*str) < 0xE0) { |
|
13566
|
0
|
0
|
|
|
|
|
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
13567
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF0) { |
|
13568
|
0
|
0
|
|
|
|
|
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
13569
|
0
|
0
|
|
|
|
|
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
13570
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF8) { |
|
13571
|
0
|
0
|
|
|
|
|
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
13572
|
0
|
0
|
|
|
|
|
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
13573
|
0
|
0
|
|
|
|
|
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
13574
|
|
|
|
|
|
|
} else return false; |
|
13575
|
|
|
|
|
|
|
} |
|
13576
|
|
|
|
|
|
|
return true; |
|
13577
|
|
|
|
|
|
|
} |
|
13578
|
|
|
|
|
|
|
|
|
13579
|
0
|
|
|
|
|
|
void utf8::decode(const char* str, std::u32string& decoded) { |
|
13580
|
|
|
|
|
|
|
decoded.clear(); |
|
13581
|
|
|
|
|
|
|
|
|
13582
|
0
|
0
|
|
|
|
|
for (char32_t chr; (chr = decode(str)); ) |
|
13583
|
0
|
|
|
|
|
|
decoded.push_back(chr); |
|
13584
|
0
|
|
|
|
|
|
} |
|
13585
|
|
|
|
|
|
|
|
|
13586
|
0
|
|
|
|
|
|
void utf8::decode(const char* str, size_t len, std::u32string& decoded) { |
|
13587
|
|
|
|
|
|
|
decoded.clear(); |
|
13588
|
|
|
|
|
|
|
|
|
13589
|
0
|
0
|
|
|
|
|
while (len) |
|
13590
|
0
|
|
|
|
|
|
decoded.push_back(decode(str, len)); |
|
13591
|
0
|
|
|
|
|
|
} |
|
13592
|
|
|
|
|
|
|
|
|
13593
|
0
|
|
|
|
|
|
void utf8::encode(const std::u32string& str, std::string& encoded) { |
|
13594
|
|
|
|
|
|
|
encoded.clear(); |
|
13595
|
|
|
|
|
|
|
|
|
13596
|
0
|
0
|
|
|
|
|
for (auto&& chr : str) |
|
13597
|
0
|
|
|
|
|
|
append(encoded, chr); |
|
13598
|
0
|
|
|
|
|
|
} |
|
13599
|
|
|
|
|
|
|
|
|
13600
|
|
|
|
|
|
|
const char utf8::REPLACEMENT_CHAR; |
|
13601
|
|
|
|
|
|
|
|
|
13602
|
|
|
|
|
|
|
} // namespace unilib |
|
13603
|
|
|
|
|
|
|
|
|
13604
|
|
|
|
|
|
|
///////// |
|
13605
|
|
|
|
|
|
|
// File: unilib/version.cpp |
|
13606
|
|
|
|
|
|
|
///////// |
|
13607
|
|
|
|
|
|
|
|
|
13608
|
|
|
|
|
|
|
// This file is part of UniLib . |
|
13609
|
|
|
|
|
|
|
// |
|
13610
|
|
|
|
|
|
|
// Copyright 2014 Institute of Formal and Applied Linguistics, Faculty of |
|
13611
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
13612
|
|
|
|
|
|
|
// |
|
13613
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
13614
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
13615
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
13616
|
|
|
|
|
|
|
// |
|
13617
|
|
|
|
|
|
|
// UniLib version: 3.3.0 |
|
13618
|
|
|
|
|
|
|
// Unicode version: 15.0.0 |
|
13619
|
|
|
|
|
|
|
|
|
13620
|
|
|
|
|
|
|
namespace unilib { |
|
13621
|
|
|
|
|
|
|
|
|
13622
|
|
|
|
|
|
|
// Returns current version. |
|
13623
|
0
|
|
|
|
|
|
version version::current() { |
|
13624
|
0
|
0
|
|
|
|
|
return {3, 3, 0, ""}; |
|
|
|
0
|
|
|
|
|
|
|
13625
|
|
|
|
|
|
|
} |
|
13626
|
|
|
|
|
|
|
|
|
13627
|
|
|
|
|
|
|
} // namespace unilib |
|
13628
|
|
|
|
|
|
|
|
|
13629
|
|
|
|
|
|
|
///////// |
|
13630
|
|
|
|
|
|
|
// File: utils/compressor_load.cpp |
|
13631
|
|
|
|
|
|
|
///////// |
|
13632
|
|
|
|
|
|
|
|
|
13633
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
|
13634
|
|
|
|
|
|
|
// |
|
13635
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
13636
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
13637
|
|
|
|
|
|
|
// |
|
13638
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
13639
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
13640
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
13641
|
|
|
|
|
|
|
|
|
13642
|
|
|
|
|
|
|
namespace utils { |
|
13643
|
|
|
|
|
|
|
|
|
13644
|
|
|
|
|
|
|
// Start of LZMA compression library by Igor Pavlov |
|
13645
|
|
|
|
|
|
|
namespace lzma { |
|
13646
|
|
|
|
|
|
|
|
|
13647
|
|
|
|
|
|
|
// Types.h -- Basic types |
|
13648
|
|
|
|
|
|
|
// 2010-10-09 : Igor Pavlov : Public domain |
|
13649
|
|
|
|
|
|
|
#ifndef UFAL_CPPUTILS_COMPRESSOR_LZMA_TYPES_H |
|
13650
|
|
|
|
|
|
|
#define UFAL_CPPUTILS_COMPRESSOR_LZMA_TYPES_H |
|
13651
|
|
|
|
|
|
|
|
|
13652
|
|
|
|
|
|
|
#define SZ_OK 0 |
|
13653
|
|
|
|
|
|
|
|
|
13654
|
|
|
|
|
|
|
#define SZ_ERROR_DATA 1 |
|
13655
|
|
|
|
|
|
|
#define SZ_ERROR_MEM 2 |
|
13656
|
|
|
|
|
|
|
#define SZ_ERROR_CRC 3 |
|
13657
|
|
|
|
|
|
|
#define SZ_ERROR_UNSUPPORTED 4 |
|
13658
|
|
|
|
|
|
|
#define SZ_ERROR_PARAM 5 |
|
13659
|
|
|
|
|
|
|
#define SZ_ERROR_INPUT_EOF 6 |
|
13660
|
|
|
|
|
|
|
#define SZ_ERROR_OUTPUT_EOF 7 |
|
13661
|
|
|
|
|
|
|
#define SZ_ERROR_READ 8 |
|
13662
|
|
|
|
|
|
|
#define SZ_ERROR_WRITE 9 |
|
13663
|
|
|
|
|
|
|
#define SZ_ERROR_PROGRESS 10 |
|
13664
|
|
|
|
|
|
|
#define SZ_ERROR_FAIL 11 |
|
13665
|
|
|
|
|
|
|
#define SZ_ERROR_THREAD 12 |
|
13666
|
|
|
|
|
|
|
|
|
13667
|
|
|
|
|
|
|
#define SZ_ERROR_ARCHIVE 16 |
|
13668
|
|
|
|
|
|
|
#define SZ_ERROR_NO_ARCHIVE 17 |
|
13669
|
|
|
|
|
|
|
|
|
13670
|
|
|
|
|
|
|
typedef int SRes; |
|
13671
|
|
|
|
|
|
|
|
|
13672
|
|
|
|
|
|
|
#ifndef RINOK |
|
13673
|
|
|
|
|
|
|
#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } |
|
13674
|
|
|
|
|
|
|
#endif |
|
13675
|
|
|
|
|
|
|
|
|
13676
|
|
|
|
|
|
|
/* The following interfaces use first parameter as pointer to structure */ |
|
13677
|
|
|
|
|
|
|
|
|
13678
|
|
|
|
|
|
|
struct IByteIn |
|
13679
|
|
|
|
|
|
|
{ |
|
13680
|
|
|
|
|
|
|
uint8_t (*Read)(void *p); /* reads one byte, returns 0 in case of EOF or error */ |
|
13681
|
|
|
|
|
|
|
}; |
|
13682
|
|
|
|
|
|
|
|
|
13683
|
|
|
|
|
|
|
struct IByteOut |
|
13684
|
|
|
|
|
|
|
{ |
|
13685
|
|
|
|
|
|
|
void (*Write)(void *p, uint8_t b); |
|
13686
|
|
|
|
|
|
|
}; |
|
13687
|
|
|
|
|
|
|
|
|
13688
|
|
|
|
|
|
|
struct ISeqInStream |
|
13689
|
|
|
|
|
|
|
{ |
|
13690
|
|
|
|
|
|
|
SRes (*Read)(void *p, void *buf, size_t *size); |
|
13691
|
|
|
|
|
|
|
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. |
|
13692
|
|
|
|
|
|
|
(output(*size) < input(*size)) is allowed */ |
|
13693
|
|
|
|
|
|
|
}; |
|
13694
|
|
|
|
|
|
|
|
|
13695
|
|
|
|
|
|
|
/* it can return SZ_ERROR_INPUT_EOF */ |
|
13696
|
|
|
|
|
|
|
SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size); |
|
13697
|
|
|
|
|
|
|
SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType); |
|
13698
|
|
|
|
|
|
|
SRes SeqInStream_ReadByte(ISeqInStream *stream, uint8_t *buf); |
|
13699
|
|
|
|
|
|
|
|
|
13700
|
|
|
|
|
|
|
struct ISeqOutStream |
|
13701
|
|
|
|
|
|
|
{ |
|
13702
|
|
|
|
|
|
|
size_t (*Write)(void *p, const void *buf, size_t size); |
|
13703
|
|
|
|
|
|
|
/* Returns: result - the number of actually written bytes. |
|
13704
|
|
|
|
|
|
|
(result < size) means error */ |
|
13705
|
|
|
|
|
|
|
}; |
|
13706
|
|
|
|
|
|
|
|
|
13707
|
|
|
|
|
|
|
enum ESzSeek |
|
13708
|
|
|
|
|
|
|
{ |
|
13709
|
|
|
|
|
|
|
SZ_SEEK_SET = 0, |
|
13710
|
|
|
|
|
|
|
SZ_SEEK_CUR = 1, |
|
13711
|
|
|
|
|
|
|
SZ_SEEK_END = 2 |
|
13712
|
|
|
|
|
|
|
}; |
|
13713
|
|
|
|
|
|
|
|
|
13714
|
|
|
|
|
|
|
struct ISeekInStream |
|
13715
|
|
|
|
|
|
|
{ |
|
13716
|
|
|
|
|
|
|
SRes (*Read)(void *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ |
|
13717
|
|
|
|
|
|
|
SRes (*Seek)(void *p, int64_t *pos, ESzSeek origin); |
|
13718
|
|
|
|
|
|
|
}; |
|
13719
|
|
|
|
|
|
|
|
|
13720
|
|
|
|
|
|
|
struct ILookInStream |
|
13721
|
|
|
|
|
|
|
{ |
|
13722
|
|
|
|
|
|
|
SRes (*Look)(void *p, const void **buf, size_t *size); |
|
13723
|
|
|
|
|
|
|
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. |
|
13724
|
|
|
|
|
|
|
(output(*size) > input(*size)) is not allowed |
|
13725
|
|
|
|
|
|
|
(output(*size) < input(*size)) is allowed */ |
|
13726
|
|
|
|
|
|
|
SRes (*Skip)(void *p, size_t offset); |
|
13727
|
|
|
|
|
|
|
/* offset must be <= output(*size) of Look */ |
|
13728
|
|
|
|
|
|
|
|
|
13729
|
|
|
|
|
|
|
SRes (*Read)(void *p, void *buf, size_t *size); |
|
13730
|
|
|
|
|
|
|
/* reads directly (without buffer). It's same as ISeqInStream::Read */ |
|
13731
|
|
|
|
|
|
|
SRes (*Seek)(void *p, int64_t *pos, ESzSeek origin); |
|
13732
|
|
|
|
|
|
|
}; |
|
13733
|
|
|
|
|
|
|
|
|
13734
|
|
|
|
|
|
|
SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size); |
|
13735
|
|
|
|
|
|
|
SRes LookInStream_SeekTo(ILookInStream *stream, uint64_t offset); |
|
13736
|
|
|
|
|
|
|
|
|
13737
|
|
|
|
|
|
|
/* reads via ILookInStream::Read */ |
|
13738
|
|
|
|
|
|
|
SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType); |
|
13739
|
|
|
|
|
|
|
SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size); |
|
13740
|
|
|
|
|
|
|
|
|
13741
|
|
|
|
|
|
|
#define LookToRead_BUF_SIZE (1 << 14) |
|
13742
|
|
|
|
|
|
|
|
|
13743
|
|
|
|
|
|
|
struct CLookToRead |
|
13744
|
|
|
|
|
|
|
{ |
|
13745
|
|
|
|
|
|
|
ILookInStream s; |
|
13746
|
|
|
|
|
|
|
ISeekInStream *realStream; |
|
13747
|
|
|
|
|
|
|
size_t pos; |
|
13748
|
|
|
|
|
|
|
size_t size; |
|
13749
|
|
|
|
|
|
|
uint8_t buf[LookToRead_BUF_SIZE]; |
|
13750
|
|
|
|
|
|
|
}; |
|
13751
|
|
|
|
|
|
|
|
|
13752
|
|
|
|
|
|
|
void LookToRead_CreateVTable(CLookToRead *p, int lookahead); |
|
13753
|
|
|
|
|
|
|
void LookToRead_Init(CLookToRead *p); |
|
13754
|
|
|
|
|
|
|
|
|
13755
|
|
|
|
|
|
|
struct CSecToLook |
|
13756
|
|
|
|
|
|
|
{ |
|
13757
|
|
|
|
|
|
|
ISeqInStream s; |
|
13758
|
|
|
|
|
|
|
ILookInStream *realStream; |
|
13759
|
|
|
|
|
|
|
}; |
|
13760
|
|
|
|
|
|
|
|
|
13761
|
|
|
|
|
|
|
void SecToLook_CreateVTable(CSecToLook *p); |
|
13762
|
|
|
|
|
|
|
|
|
13763
|
|
|
|
|
|
|
struct CSecToRead |
|
13764
|
|
|
|
|
|
|
{ |
|
13765
|
|
|
|
|
|
|
ISeqInStream s; |
|
13766
|
|
|
|
|
|
|
ILookInStream *realStream; |
|
13767
|
|
|
|
|
|
|
}; |
|
13768
|
|
|
|
|
|
|
|
|
13769
|
|
|
|
|
|
|
void SecToRead_CreateVTable(CSecToRead *p); |
|
13770
|
|
|
|
|
|
|
|
|
13771
|
|
|
|
|
|
|
struct ICompressProgress |
|
13772
|
|
|
|
|
|
|
{ |
|
13773
|
|
|
|
|
|
|
SRes (*Progress)(void *p, uint64_t inSize, uint64_t outSize); |
|
13774
|
|
|
|
|
|
|
/* Returns: result. (result != SZ_OK) means break. |
|
13775
|
|
|
|
|
|
|
Value (uint64_t)(int64_t)-1 for size means unknown value. */ |
|
13776
|
|
|
|
|
|
|
}; |
|
13777
|
|
|
|
|
|
|
|
|
13778
|
|
|
|
|
|
|
struct ISzAlloc |
|
13779
|
|
|
|
|
|
|
{ |
|
13780
|
|
|
|
|
|
|
void *(*Alloc)(void *p, size_t size); |
|
13781
|
|
|
|
|
|
|
void (*Free)(void *p, void *address); /* address can be 0 */ |
|
13782
|
|
|
|
|
|
|
}; |
|
13783
|
|
|
|
|
|
|
|
|
13784
|
|
|
|
|
|
|
#define IAlloc_Alloc(p, size) (p)->Alloc((p), size) |
|
13785
|
|
|
|
|
|
|
#define IAlloc_Free(p, a) (p)->Free((p), a) |
|
13786
|
|
|
|
|
|
|
|
|
13787
|
|
|
|
|
|
|
#endif // UFAL_CPPUTILS_COMPRESSOR_LZMA_TYPES_H |
|
13788
|
|
|
|
|
|
|
|
|
13789
|
|
|
|
|
|
|
// LzmaDec.h -- LZMA Decoder |
|
13790
|
|
|
|
|
|
|
// 2009-02-07 : Igor Pavlov : Public domain |
|
13791
|
|
|
|
|
|
|
|
|
13792
|
|
|
|
|
|
|
/* #define _LZMA_PROB32 */ |
|
13793
|
|
|
|
|
|
|
/* _LZMA_PROB32 can increase the speed on some CPUs, |
|
13794
|
|
|
|
|
|
|
but memory usage for CLzmaDec::probs will be doubled in that case */ |
|
13795
|
|
|
|
|
|
|
|
|
13796
|
|
|
|
|
|
|
#ifdef _LZMA_PROB32 |
|
13797
|
|
|
|
|
|
|
#define CLzmaProb uint32_t |
|
13798
|
|
|
|
|
|
|
#else |
|
13799
|
|
|
|
|
|
|
#define CLzmaProb uint16_t |
|
13800
|
|
|
|
|
|
|
#endif |
|
13801
|
|
|
|
|
|
|
|
|
13802
|
|
|
|
|
|
|
/* ---------- LZMA Properties ---------- */ |
|
13803
|
|
|
|
|
|
|
|
|
13804
|
|
|
|
|
|
|
#define LZMA_PROPS_SIZE 5 |
|
13805
|
|
|
|
|
|
|
|
|
13806
|
|
|
|
|
|
|
struct CLzmaProps |
|
13807
|
|
|
|
|
|
|
{ |
|
13808
|
|
|
|
|
|
|
unsigned lc, lp, pb; |
|
13809
|
|
|
|
|
|
|
uint32_t dicSize; |
|
13810
|
|
|
|
|
|
|
}; |
|
13811
|
|
|
|
|
|
|
|
|
13812
|
|
|
|
|
|
|
/* LzmaProps_Decode - decodes properties |
|
13813
|
|
|
|
|
|
|
Returns: |
|
13814
|
|
|
|
|
|
|
SZ_OK |
|
13815
|
|
|
|
|
|
|
SZ_ERROR_UNSUPPORTED - Unsupported properties |
|
13816
|
|
|
|
|
|
|
*/ |
|
13817
|
|
|
|
|
|
|
|
|
13818
|
|
|
|
|
|
|
SRes LzmaProps_Decode(CLzmaProps *p, const uint8_t *data, unsigned size); |
|
13819
|
|
|
|
|
|
|
|
|
13820
|
|
|
|
|
|
|
/* ---------- LZMA Decoder state ---------- */ |
|
13821
|
|
|
|
|
|
|
|
|
13822
|
|
|
|
|
|
|
/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. |
|
13823
|
|
|
|
|
|
|
Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ |
|
13824
|
|
|
|
|
|
|
|
|
13825
|
|
|
|
|
|
|
#define LZMA_REQUIRED_INPUT_MAX 20 |
|
13826
|
|
|
|
|
|
|
|
|
13827
|
|
|
|
|
|
|
struct CLzmaDec |
|
13828
|
|
|
|
|
|
|
{ |
|
13829
|
|
|
|
|
|
|
CLzmaProps prop; |
|
13830
|
|
|
|
|
|
|
CLzmaProb *probs; |
|
13831
|
|
|
|
|
|
|
uint8_t *dic; |
|
13832
|
|
|
|
|
|
|
const uint8_t *buf; |
|
13833
|
|
|
|
|
|
|
uint32_t range, code; |
|
13834
|
|
|
|
|
|
|
size_t dicPos; |
|
13835
|
|
|
|
|
|
|
size_t dicBufSize; |
|
13836
|
|
|
|
|
|
|
uint32_t processedPos; |
|
13837
|
|
|
|
|
|
|
uint32_t checkDicSize; |
|
13838
|
|
|
|
|
|
|
unsigned state; |
|
13839
|
|
|
|
|
|
|
uint32_t reps[4]; |
|
13840
|
|
|
|
|
|
|
unsigned remainLen; |
|
13841
|
|
|
|
|
|
|
int needFlush; |
|
13842
|
|
|
|
|
|
|
int needInitState; |
|
13843
|
|
|
|
|
|
|
uint32_t numProbs; |
|
13844
|
|
|
|
|
|
|
unsigned tempBufSize; |
|
13845
|
|
|
|
|
|
|
uint8_t tempBuf[LZMA_REQUIRED_INPUT_MAX]; |
|
13846
|
|
|
|
|
|
|
}; |
|
13847
|
|
|
|
|
|
|
|
|
13848
|
|
|
|
|
|
|
#define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; } |
|
13849
|
|
|
|
|
|
|
|
|
13850
|
|
|
|
|
|
|
void LzmaDec_Init(CLzmaDec *p); |
|
13851
|
|
|
|
|
|
|
|
|
13852
|
|
|
|
|
|
|
/* There are two types of LZMA streams: |
|
13853
|
|
|
|
|
|
|
0) Stream with end mark. That end mark adds about 6 bytes to compressed size. |
|
13854
|
|
|
|
|
|
|
1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */ |
|
13855
|
|
|
|
|
|
|
|
|
13856
|
|
|
|
|
|
|
enum ELzmaFinishMode |
|
13857
|
|
|
|
|
|
|
{ |
|
13858
|
|
|
|
|
|
|
LZMA_FINISH_ANY, /* finish at any point */ |
|
13859
|
|
|
|
|
|
|
LZMA_FINISH_END /* block must be finished at the end */ |
|
13860
|
|
|
|
|
|
|
}; |
|
13861
|
|
|
|
|
|
|
|
|
13862
|
|
|
|
|
|
|
/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! |
|
13863
|
|
|
|
|
|
|
|
|
13864
|
|
|
|
|
|
|
You must use LZMA_FINISH_END, when you know that current output buffer |
|
13865
|
|
|
|
|
|
|
covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. |
|
13866
|
|
|
|
|
|
|
|
|
13867
|
|
|
|
|
|
|
If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, |
|
13868
|
|
|
|
|
|
|
and output value of destLen will be less than output buffer size limit. |
|
13869
|
|
|
|
|
|
|
You can check status result also. |
|
13870
|
|
|
|
|
|
|
|
|
13871
|
|
|
|
|
|
|
You can use multiple checks to test data integrity after full decompression: |
|
13872
|
|
|
|
|
|
|
1) Check Result and "status" variable. |
|
13873
|
|
|
|
|
|
|
2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. |
|
13874
|
|
|
|
|
|
|
3) Check that output(srcLen) = compressedSize, if you know real compressedSize. |
|
13875
|
|
|
|
|
|
|
You must use correct finish mode in that case. */ |
|
13876
|
|
|
|
|
|
|
|
|
13877
|
|
|
|
|
|
|
enum ELzmaStatus |
|
13878
|
|
|
|
|
|
|
{ |
|
13879
|
|
|
|
|
|
|
LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ |
|
13880
|
|
|
|
|
|
|
LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ |
|
13881
|
|
|
|
|
|
|
LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ |
|
13882
|
|
|
|
|
|
|
LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ |
|
13883
|
|
|
|
|
|
|
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ |
|
13884
|
|
|
|
|
|
|
}; |
|
13885
|
|
|
|
|
|
|
|
|
13886
|
|
|
|
|
|
|
/* ELzmaStatus is used only as output value for function call */ |
|
13887
|
|
|
|
|
|
|
|
|
13888
|
|
|
|
|
|
|
/* ---------- Interfaces ---------- */ |
|
13889
|
|
|
|
|
|
|
|
|
13890
|
|
|
|
|
|
|
/* There are 3 levels of interfaces: |
|
13891
|
|
|
|
|
|
|
1) Dictionary Interface |
|
13892
|
|
|
|
|
|
|
2) Buffer Interface |
|
13893
|
|
|
|
|
|
|
3) One Call Interface |
|
13894
|
|
|
|
|
|
|
You can select any of these interfaces, but don't mix functions from different |
|
13895
|
|
|
|
|
|
|
groups for same object. */ |
|
13896
|
|
|
|
|
|
|
|
|
13897
|
|
|
|
|
|
|
/* There are two variants to allocate state for Dictionary Interface: |
|
13898
|
|
|
|
|
|
|
1) LzmaDec_Allocate / LzmaDec_Free |
|
13899
|
|
|
|
|
|
|
2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs |
|
13900
|
|
|
|
|
|
|
You can use variant 2, if you set dictionary buffer manually. |
|
13901
|
|
|
|
|
|
|
For Buffer Interface you must always use variant 1. |
|
13902
|
|
|
|
|
|
|
|
|
13903
|
|
|
|
|
|
|
LzmaDec_Allocate* can return: |
|
13904
|
|
|
|
|
|
|
SZ_OK |
|
13905
|
|
|
|
|
|
|
SZ_ERROR_MEM - Memory allocation error |
|
13906
|
|
|
|
|
|
|
SZ_ERROR_UNSUPPORTED - Unsupported properties |
|
13907
|
|
|
|
|
|
|
*/ |
|
13908
|
|
|
|
|
|
|
|
|
13909
|
|
|
|
|
|
|
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const uint8_t *props, unsigned propsSize, ISzAlloc *alloc); |
|
13910
|
|
|
|
|
|
|
void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc); |
|
13911
|
|
|
|
|
|
|
|
|
13912
|
|
|
|
|
|
|
SRes LzmaDec_Allocate(CLzmaDec *state, const uint8_t *prop, unsigned propsSize, ISzAlloc *alloc); |
|
13913
|
|
|
|
|
|
|
void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc); |
|
13914
|
|
|
|
|
|
|
|
|
13915
|
|
|
|
|
|
|
/* ---------- Dictionary Interface ---------- */ |
|
13916
|
|
|
|
|
|
|
|
|
13917
|
|
|
|
|
|
|
/* You can use it, if you want to eliminate the overhead for data copying from |
|
13918
|
|
|
|
|
|
|
dictionary to some other external buffer. |
|
13919
|
|
|
|
|
|
|
You must work with CLzmaDec variables directly in this interface. |
|
13920
|
|
|
|
|
|
|
|
|
13921
|
|
|
|
|
|
|
STEPS: |
|
13922
|
|
|
|
|
|
|
LzmaDec_Constr() |
|
13923
|
|
|
|
|
|
|
LzmaDec_Allocate() |
|
13924
|
|
|
|
|
|
|
for (each new stream) |
|
13925
|
|
|
|
|
|
|
{ |
|
13926
|
|
|
|
|
|
|
LzmaDec_Init() |
|
13927
|
|
|
|
|
|
|
while (it needs more decompression) |
|
13928
|
|
|
|
|
|
|
{ |
|
13929
|
|
|
|
|
|
|
LzmaDec_DecodeToDic() |
|
13930
|
|
|
|
|
|
|
use data from CLzmaDec::dic and update CLzmaDec::dicPos |
|
13931
|
|
|
|
|
|
|
} |
|
13932
|
|
|
|
|
|
|
} |
|
13933
|
|
|
|
|
|
|
LzmaDec_Free() |
|
13934
|
|
|
|
|
|
|
*/ |
|
13935
|
|
|
|
|
|
|
|
|
13936
|
|
|
|
|
|
|
/* LzmaDec_DecodeToDic |
|
13937
|
|
|
|
|
|
|
|
|
13938
|
|
|
|
|
|
|
The decoding to internal dictionary buffer (CLzmaDec::dic). |
|
13939
|
|
|
|
|
|
|
You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! |
|
13940
|
|
|
|
|
|
|
|
|
13941
|
|
|
|
|
|
|
finishMode: |
|
13942
|
|
|
|
|
|
|
It has meaning only if the decoding reaches output limit (dicLimit). |
|
13943
|
|
|
|
|
|
|
LZMA_FINISH_ANY - Decode just dicLimit bytes. |
|
13944
|
|
|
|
|
|
|
LZMA_FINISH_END - Stream must be finished after dicLimit. |
|
13945
|
|
|
|
|
|
|
|
|
13946
|
|
|
|
|
|
|
Returns: |
|
13947
|
|
|
|
|
|
|
SZ_OK |
|
13948
|
|
|
|
|
|
|
status: |
|
13949
|
|
|
|
|
|
|
LZMA_STATUS_FINISHED_WITH_MARK |
|
13950
|
|
|
|
|
|
|
LZMA_STATUS_NOT_FINISHED |
|
13951
|
|
|
|
|
|
|
LZMA_STATUS_NEEDS_MORE_INPUT |
|
13952
|
|
|
|
|
|
|
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK |
|
13953
|
|
|
|
|
|
|
SZ_ERROR_DATA - Data error |
|
13954
|
|
|
|
|
|
|
*/ |
|
13955
|
|
|
|
|
|
|
|
|
13956
|
|
|
|
|
|
|
SRes LzmaDec_DecodeToDic(CLzmaDec *p, size_t dicLimit, |
|
13957
|
|
|
|
|
|
|
const uint8_t *src, size_t *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); |
|
13958
|
|
|
|
|
|
|
|
|
13959
|
|
|
|
|
|
|
/* ---------- Buffer Interface ---------- */ |
|
13960
|
|
|
|
|
|
|
|
|
13961
|
|
|
|
|
|
|
/* It's zlib-like interface. |
|
13962
|
|
|
|
|
|
|
See LzmaDec_DecodeToDic description for information about STEPS and return results, |
|
13963
|
|
|
|
|
|
|
but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need |
|
13964
|
|
|
|
|
|
|
to work with CLzmaDec variables manually. |
|
13965
|
|
|
|
|
|
|
|
|
13966
|
|
|
|
|
|
|
finishMode: |
|
13967
|
|
|
|
|
|
|
It has meaning only if the decoding reaches output limit (*destLen). |
|
13968
|
|
|
|
|
|
|
LZMA_FINISH_ANY - Decode just destLen bytes. |
|
13969
|
|
|
|
|
|
|
LZMA_FINISH_END - Stream must be finished after (*destLen). |
|
13970
|
|
|
|
|
|
|
*/ |
|
13971
|
|
|
|
|
|
|
|
|
13972
|
|
|
|
|
|
|
SRes LzmaDec_DecodeToBuf(CLzmaDec *p, uint8_t *dest, size_t *destLen, |
|
13973
|
|
|
|
|
|
|
const uint8_t *src, size_t *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); |
|
13974
|
|
|
|
|
|
|
|
|
13975
|
|
|
|
|
|
|
/* ---------- One Call Interface ---------- */ |
|
13976
|
|
|
|
|
|
|
|
|
13977
|
|
|
|
|
|
|
/* LzmaDecode |
|
13978
|
|
|
|
|
|
|
|
|
13979
|
|
|
|
|
|
|
finishMode: |
|
13980
|
|
|
|
|
|
|
It has meaning only if the decoding reaches output limit (*destLen). |
|
13981
|
|
|
|
|
|
|
LZMA_FINISH_ANY - Decode just destLen bytes. |
|
13982
|
|
|
|
|
|
|
LZMA_FINISH_END - Stream must be finished after (*destLen). |
|
13983
|
|
|
|
|
|
|
|
|
13984
|
|
|
|
|
|
|
Returns: |
|
13985
|
|
|
|
|
|
|
SZ_OK |
|
13986
|
|
|
|
|
|
|
status: |
|
13987
|
|
|
|
|
|
|
LZMA_STATUS_FINISHED_WITH_MARK |
|
13988
|
|
|
|
|
|
|
LZMA_STATUS_NOT_FINISHED |
|
13989
|
|
|
|
|
|
|
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK |
|
13990
|
|
|
|
|
|
|
SZ_ERROR_DATA - Data error |
|
13991
|
|
|
|
|
|
|
SZ_ERROR_MEM - Memory allocation error |
|
13992
|
|
|
|
|
|
|
SZ_ERROR_UNSUPPORTED - Unsupported properties |
|
13993
|
|
|
|
|
|
|
SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). |
|
13994
|
|
|
|
|
|
|
*/ |
|
13995
|
|
|
|
|
|
|
|
|
13996
|
|
|
|
|
|
|
SRes LzmaDecode(uint8_t *dest, size_t *destLen, const uint8_t *src, size_t *srcLen, |
|
13997
|
|
|
|
|
|
|
const uint8_t *propData, unsigned propSize, ELzmaFinishMode finishMode, |
|
13998
|
|
|
|
|
|
|
ELzmaStatus *status, ISzAlloc *alloc); |
|
13999
|
|
|
|
|
|
|
|
|
14000
|
|
|
|
|
|
|
// LzmaDec.c -- LZMA Decoder |
|
14001
|
|
|
|
|
|
|
// 2009-09-20 : Igor Pavlov : Public domain |
|
14002
|
|
|
|
|
|
|
|
|
14003
|
|
|
|
|
|
|
#define kNumTopBits 24 |
|
14004
|
|
|
|
|
|
|
#define kTopValue ((uint32_t)1 << kNumTopBits) |
|
14005
|
|
|
|
|
|
|
|
|
14006
|
|
|
|
|
|
|
#define kNumBitModelTotalBits 11 |
|
14007
|
|
|
|
|
|
|
#define kBitModelTotal (1 << kNumBitModelTotalBits) |
|
14008
|
|
|
|
|
|
|
#define kNumMoveBits 5 |
|
14009
|
|
|
|
|
|
|
|
|
14010
|
|
|
|
|
|
|
#define RC_INIT_SIZE 5 |
|
14011
|
|
|
|
|
|
|
|
|
14012
|
|
|
|
|
|
|
#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } |
|
14013
|
|
|
|
|
|
|
|
|
14014
|
|
|
|
|
|
|
#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound) |
|
14015
|
|
|
|
|
|
|
#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); |
|
14016
|
|
|
|
|
|
|
#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); |
|
14017
|
|
|
|
|
|
|
#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ |
|
14018
|
|
|
|
|
|
|
{ UPDATE_0(p); i = (i + i); A0; } else \ |
|
14019
|
|
|
|
|
|
|
{ UPDATE_1(p); i = (i + i) + 1; A1; } |
|
14020
|
|
|
|
|
|
|
#define GET_BIT(p, i) GET_BIT2(p, i, ; , ;) |
|
14021
|
|
|
|
|
|
|
|
|
14022
|
|
|
|
|
|
|
#define TREE_GET_BIT(probs, i) { GET_BIT((probs + i), i); } |
|
14023
|
|
|
|
|
|
|
#define TREE_DECODE(probs, limit, i) \ |
|
14024
|
|
|
|
|
|
|
{ i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } |
|
14025
|
|
|
|
|
|
|
|
|
14026
|
|
|
|
|
|
|
/* #define _LZMA_SIZE_OPT */ |
|
14027
|
|
|
|
|
|
|
|
|
14028
|
|
|
|
|
|
|
#ifdef _LZMA_SIZE_OPT |
|
14029
|
|
|
|
|
|
|
#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) |
|
14030
|
|
|
|
|
|
|
#else |
|
14031
|
|
|
|
|
|
|
#define TREE_6_DECODE(probs, i) \ |
|
14032
|
|
|
|
|
|
|
{ i = 1; \ |
|
14033
|
|
|
|
|
|
|
TREE_GET_BIT(probs, i); \ |
|
14034
|
|
|
|
|
|
|
TREE_GET_BIT(probs, i); \ |
|
14035
|
|
|
|
|
|
|
TREE_GET_BIT(probs, i); \ |
|
14036
|
|
|
|
|
|
|
TREE_GET_BIT(probs, i); \ |
|
14037
|
|
|
|
|
|
|
TREE_GET_BIT(probs, i); \ |
|
14038
|
|
|
|
|
|
|
TREE_GET_BIT(probs, i); \ |
|
14039
|
|
|
|
|
|
|
i -= 0x40; } |
|
14040
|
|
|
|
|
|
|
#endif |
|
14041
|
|
|
|
|
|
|
|
|
14042
|
|
|
|
|
|
|
#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); } |
|
14043
|
|
|
|
|
|
|
|
|
14044
|
|
|
|
|
|
|
#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound) |
|
14045
|
|
|
|
|
|
|
#define UPDATE_0_CHECK range = bound; |
|
14046
|
|
|
|
|
|
|
#define UPDATE_1_CHECK range -= bound; code -= bound; |
|
14047
|
|
|
|
|
|
|
#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ |
|
14048
|
|
|
|
|
|
|
{ UPDATE_0_CHECK; i = (i + i); A0; } else \ |
|
14049
|
|
|
|
|
|
|
{ UPDATE_1_CHECK; i = (i + i) + 1; A1; } |
|
14050
|
|
|
|
|
|
|
#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) |
|
14051
|
|
|
|
|
|
|
#define TREE_DECODE_CHECK(probs, limit, i) \ |
|
14052
|
|
|
|
|
|
|
{ i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } |
|
14053
|
|
|
|
|
|
|
|
|
14054
|
|
|
|
|
|
|
#define kNumPosBitsMax 4 |
|
14055
|
|
|
|
|
|
|
#define kNumPosStatesMax (1 << kNumPosBitsMax) |
|
14056
|
|
|
|
|
|
|
|
|
14057
|
|
|
|
|
|
|
#define kLenNumLowBits 3 |
|
14058
|
|
|
|
|
|
|
#define kLenNumLowSymbols (1 << kLenNumLowBits) |
|
14059
|
|
|
|
|
|
|
#define kLenNumMidBits 3 |
|
14060
|
|
|
|
|
|
|
#define kLenNumMidSymbols (1 << kLenNumMidBits) |
|
14061
|
|
|
|
|
|
|
#define kLenNumHighBits 8 |
|
14062
|
|
|
|
|
|
|
#define kLenNumHighSymbols (1 << kLenNumHighBits) |
|
14063
|
|
|
|
|
|
|
|
|
14064
|
|
|
|
|
|
|
#define LenChoice 0 |
|
14065
|
|
|
|
|
|
|
#define LenChoice2 (LenChoice + 1) |
|
14066
|
|
|
|
|
|
|
#define LenLow (LenChoice2 + 1) |
|
14067
|
|
|
|
|
|
|
#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits)) |
|
14068
|
|
|
|
|
|
|
#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits)) |
|
14069
|
|
|
|
|
|
|
#define kNumLenProbs (LenHigh + kLenNumHighSymbols) |
|
14070
|
|
|
|
|
|
|
|
|
14071
|
|
|
|
|
|
|
#define kNumStates 12 |
|
14072
|
|
|
|
|
|
|
#define kNumLitStates 7 |
|
14073
|
|
|
|
|
|
|
|
|
14074
|
|
|
|
|
|
|
#define kStartPosModelIndex 4 |
|
14075
|
|
|
|
|
|
|
#define kEndPosModelIndex 14 |
|
14076
|
|
|
|
|
|
|
#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) |
|
14077
|
|
|
|
|
|
|
|
|
14078
|
|
|
|
|
|
|
#define kNumPosSlotBits 6 |
|
14079
|
|
|
|
|
|
|
#define kNumLenToPosStates 4 |
|
14080
|
|
|
|
|
|
|
|
|
14081
|
|
|
|
|
|
|
#define kNumAlignBits 4 |
|
14082
|
|
|
|
|
|
|
#define kAlignTableSize (1 << kNumAlignBits) |
|
14083
|
|
|
|
|
|
|
|
|
14084
|
|
|
|
|
|
|
#define kMatchMinLen 2 |
|
14085
|
|
|
|
|
|
|
#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) |
|
14086
|
|
|
|
|
|
|
|
|
14087
|
|
|
|
|
|
|
#define IsMatch 0 |
|
14088
|
|
|
|
|
|
|
#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax)) |
|
14089
|
|
|
|
|
|
|
#define IsRepG0 (IsRep + kNumStates) |
|
14090
|
|
|
|
|
|
|
#define IsRepG1 (IsRepG0 + kNumStates) |
|
14091
|
|
|
|
|
|
|
#define IsRepG2 (IsRepG1 + kNumStates) |
|
14092
|
|
|
|
|
|
|
#define IsRep0Long (IsRepG2 + kNumStates) |
|
14093
|
|
|
|
|
|
|
#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax)) |
|
14094
|
|
|
|
|
|
|
#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) |
|
14095
|
|
|
|
|
|
|
#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex) |
|
14096
|
|
|
|
|
|
|
#define LenCoder (Align + kAlignTableSize) |
|
14097
|
|
|
|
|
|
|
#define RepLenCoder (LenCoder + kNumLenProbs) |
|
14098
|
|
|
|
|
|
|
#define Literal (RepLenCoder + kNumLenProbs) |
|
14099
|
|
|
|
|
|
|
|
|
14100
|
|
|
|
|
|
|
#define LZMA_BASE_SIZE 1846 |
|
14101
|
|
|
|
|
|
|
#define LZMA_LIT_SIZE 768 |
|
14102
|
|
|
|
|
|
|
|
|
14103
|
|
|
|
|
|
|
#define LzmaProps_GetNumProbs(p) ((uint32_t)LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) |
|
14104
|
|
|
|
|
|
|
|
|
14105
|
|
|
|
|
|
|
#if Literal != LZMA_BASE_SIZE |
|
14106
|
|
|
|
|
|
|
StopCompilingDueBUG |
|
14107
|
|
|
|
|
|
|
#endif |
|
14108
|
|
|
|
|
|
|
|
|
14109
|
|
|
|
|
|
|
#define LZMA_DIC_MIN (1 << 12) |
|
14110
|
|
|
|
|
|
|
|
|
14111
|
|
|
|
|
|
|
/* First LZMA-symbol is always decoded. |
|
14112
|
|
|
|
|
|
|
And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization |
|
14113
|
|
|
|
|
|
|
Out: |
|
14114
|
|
|
|
|
|
|
Result: |
|
14115
|
|
|
|
|
|
|
SZ_OK - OK |
|
14116
|
|
|
|
|
|
|
SZ_ERROR_DATA - Error |
|
14117
|
|
|
|
|
|
|
p->remainLen: |
|
14118
|
|
|
|
|
|
|
< kMatchSpecLenStart : normal remain |
|
14119
|
|
|
|
|
|
|
= kMatchSpecLenStart : finished |
|
14120
|
|
|
|
|
|
|
= kMatchSpecLenStart + 1 : Flush marker |
|
14121
|
|
|
|
|
|
|
= kMatchSpecLenStart + 2 : State Init Marker |
|
14122
|
|
|
|
|
|
|
*/ |
|
14123
|
|
|
|
|
|
|
|
|
14124
|
120
|
|
|
|
|
|
static int LzmaDec_DecodeReal(CLzmaDec *p, size_t limit, const uint8_t *bufLimit) |
|
14125
|
|
|
|
|
|
|
{ |
|
14126
|
120
|
|
|
|
|
|
CLzmaProb *probs = p->probs; |
|
14127
|
|
|
|
|
|
|
|
|
14128
|
120
|
|
|
|
|
|
unsigned state = p->state; |
|
14129
|
120
|
|
|
|
|
|
uint32_t rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; |
|
14130
|
120
|
|
|
|
|
|
unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; |
|
14131
|
120
|
|
|
|
|
|
unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1; |
|
14132
|
120
|
|
|
|
|
|
unsigned lc = p->prop.lc; |
|
14133
|
|
|
|
|
|
|
|
|
14134
|
120
|
|
|
|
|
|
uint8_t *dic = p->dic; |
|
14135
|
120
|
|
|
|
|
|
size_t dicBufSize = p->dicBufSize; |
|
14136
|
120
|
|
|
|
|
|
size_t dicPos = p->dicPos; |
|
14137
|
|
|
|
|
|
|
|
|
14138
|
120
|
|
|
|
|
|
uint32_t processedPos = p->processedPos; |
|
14139
|
120
|
|
|
|
|
|
uint32_t checkDicSize = p->checkDicSize; |
|
14140
|
|
|
|
|
|
|
unsigned len = 0; |
|
14141
|
|
|
|
|
|
|
|
|
14142
|
120
|
|
|
|
|
|
const uint8_t *buf = p->buf; |
|
14143
|
120
|
|
|
|
|
|
uint32_t range = p->range; |
|
14144
|
120
|
|
|
|
|
|
uint32_t code = p->code; |
|
14145
|
|
|
|
|
|
|
|
|
14146
|
|
|
|
|
|
|
do |
|
14147
|
|
|
|
|
|
|
{ |
|
14148
|
|
|
|
|
|
|
CLzmaProb *prob; |
|
14149
|
|
|
|
|
|
|
uint32_t bound; |
|
14150
|
|
|
|
|
|
|
unsigned ttt; |
|
14151
|
32137
|
|
|
|
|
|
unsigned posState = processedPos & pbMask; |
|
14152
|
|
|
|
|
|
|
|
|
14153
|
32137
|
|
|
|
|
|
prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; |
|
14154
|
32137
|
100
|
|
|
|
|
IF_BIT_0(prob) |
|
|
|
100
|
|
|
|
|
|
|
14155
|
|
|
|
|
|
|
{ |
|
14156
|
|
|
|
|
|
|
unsigned symbol; |
|
14157
|
784
|
|
|
|
|
|
UPDATE_0(prob); |
|
14158
|
784
|
|
|
|
|
|
prob = probs + Literal; |
|
14159
|
784
|
100
|
|
|
|
|
if (checkDicSize != 0 || processedPos != 0) |
|
14160
|
1554
|
|
|
|
|
|
prob += (LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) + |
|
14161
|
777
|
50
|
|
|
|
|
(dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc)))); |
|
14162
|
|
|
|
|
|
|
|
|
14163
|
784
|
100
|
|
|
|
|
if (state < kNumLitStates) |
|
14164
|
|
|
|
|
|
|
{ |
|
14165
|
440
|
|
|
|
|
|
state -= (state < 4) ? state : 3; |
|
14166
|
|
|
|
|
|
|
symbol = 1; |
|
14167
|
3520
|
100
|
|
|
|
|
do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100); |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
14168
|
|
|
|
|
|
|
} |
|
14169
|
|
|
|
|
|
|
else |
|
14170
|
|
|
|
|
|
|
{ |
|
14171
|
344
|
50
|
|
|
|
|
unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; |
|
14172
|
|
|
|
|
|
|
unsigned offs = 0x100; |
|
14173
|
344
|
100
|
|
|
|
|
state -= (state < 10) ? 3 : 6; |
|
14174
|
|
|
|
|
|
|
symbol = 1; |
|
14175
|
|
|
|
|
|
|
do |
|
14176
|
|
|
|
|
|
|
{ |
|
14177
|
|
|
|
|
|
|
unsigned bit; |
|
14178
|
|
|
|
|
|
|
CLzmaProb *probLit; |
|
14179
|
2752
|
|
|
|
|
|
matchByte <<= 1; |
|
14180
|
2752
|
|
|
|
|
|
bit = (matchByte & offs); |
|
14181
|
2752
|
|
|
|
|
|
probLit = prob + offs + bit + symbol; |
|
14182
|
2752
|
100
|
|
|
|
|
GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit) |
|
|
|
100
|
|
|
|
|
|
|
14183
|
|
|
|
|
|
|
} |
|
14184
|
2752
|
100
|
|
|
|
|
while (symbol < 0x100); |
|
14185
|
|
|
|
|
|
|
} |
|
14186
|
784
|
|
|
|
|
|
dic[dicPos++] = (uint8_t)symbol; |
|
14187
|
784
|
|
|
|
|
|
processedPos++; |
|
14188
|
784
|
|
|
|
|
|
continue; |
|
14189
|
|
|
|
|
|
|
} |
|
14190
|
|
|
|
|
|
|
else |
|
14191
|
|
|
|
|
|
|
{ |
|
14192
|
31353
|
|
|
|
|
|
UPDATE_1(prob); |
|
14193
|
31353
|
|
|
|
|
|
prob = probs + IsRep + state; |
|
14194
|
31353
|
100
|
|
|
|
|
IF_BIT_0(prob) |
|
|
|
100
|
|
|
|
|
|
|
14195
|
|
|
|
|
|
|
{ |
|
14196
|
267
|
|
|
|
|
|
UPDATE_0(prob); |
|
14197
|
267
|
|
|
|
|
|
state += kNumStates; |
|
14198
|
267
|
|
|
|
|
|
prob = probs + LenCoder; |
|
14199
|
|
|
|
|
|
|
} |
|
14200
|
|
|
|
|
|
|
else |
|
14201
|
|
|
|
|
|
|
{ |
|
14202
|
31086
|
|
|
|
|
|
UPDATE_1(prob); |
|
14203
|
31086
|
50
|
|
|
|
|
if (checkDicSize == 0 && processedPos == 0) |
|
14204
|
|
|
|
|
|
|
return SZ_ERROR_DATA; |
|
14205
|
31086
|
|
|
|
|
|
prob = probs + IsRepG0 + state; |
|
14206
|
31086
|
100
|
|
|
|
|
IF_BIT_0(prob) |
|
|
|
100
|
|
|
|
|
|
|
14207
|
|
|
|
|
|
|
{ |
|
14208
|
30975
|
|
|
|
|
|
UPDATE_0(prob); |
|
14209
|
30975
|
|
|
|
|
|
prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; |
|
14210
|
30975
|
100
|
|
|
|
|
IF_BIT_0(prob) |
|
|
|
100
|
|
|
|
|
|
|
14211
|
|
|
|
|
|
|
{ |
|
14212
|
54
|
|
|
|
|
|
UPDATE_0(prob); |
|
14213
|
54
|
50
|
|
|
|
|
dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; |
|
14214
|
54
|
|
|
|
|
|
dicPos++; |
|
14215
|
54
|
|
|
|
|
|
processedPos++; |
|
14216
|
54
|
100
|
|
|
|
|
state = state < kNumLitStates ? 9 : 11; |
|
14217
|
|
|
|
|
|
|
continue; |
|
14218
|
|
|
|
|
|
|
} |
|
14219
|
30921
|
|
|
|
|
|
UPDATE_1(prob); |
|
14220
|
|
|
|
|
|
|
} |
|
14221
|
|
|
|
|
|
|
else |
|
14222
|
|
|
|
|
|
|
{ |
|
14223
|
|
|
|
|
|
|
uint32_t distance; |
|
14224
|
111
|
|
|
|
|
|
UPDATE_1(prob); |
|
14225
|
111
|
|
|
|
|
|
prob = probs + IsRepG1 + state; |
|
14226
|
111
|
100
|
|
|
|
|
IF_BIT_0(prob) |
|
|
|
100
|
|
|
|
|
|
|
14227
|
|
|
|
|
|
|
{ |
|
14228
|
61
|
|
|
|
|
|
UPDATE_0(prob); |
|
14229
|
|
|
|
|
|
|
distance = rep1; |
|
14230
|
|
|
|
|
|
|
} |
|
14231
|
|
|
|
|
|
|
else |
|
14232
|
|
|
|
|
|
|
{ |
|
14233
|
50
|
|
|
|
|
|
UPDATE_1(prob); |
|
14234
|
50
|
|
|
|
|
|
prob = probs + IsRepG2 + state; |
|
14235
|
50
|
100
|
|
|
|
|
IF_BIT_0(prob) |
|
|
|
100
|
|
|
|
|
|
|
14236
|
|
|
|
|
|
|
{ |
|
14237
|
31
|
|
|
|
|
|
UPDATE_0(prob); |
|
14238
|
|
|
|
|
|
|
distance = rep2; |
|
14239
|
|
|
|
|
|
|
} |
|
14240
|
|
|
|
|
|
|
else |
|
14241
|
|
|
|
|
|
|
{ |
|
14242
|
19
|
|
|
|
|
|
UPDATE_1(prob); |
|
14243
|
|
|
|
|
|
|
distance = rep3; |
|
14244
|
|
|
|
|
|
|
rep3 = rep2; |
|
14245
|
|
|
|
|
|
|
} |
|
14246
|
|
|
|
|
|
|
rep2 = rep1; |
|
14247
|
|
|
|
|
|
|
} |
|
14248
|
|
|
|
|
|
|
rep1 = rep0; |
|
14249
|
|
|
|
|
|
|
rep0 = distance; |
|
14250
|
|
|
|
|
|
|
} |
|
14251
|
31032
|
100
|
|
|
|
|
state = state < kNumLitStates ? 8 : 11; |
|
14252
|
31032
|
|
|
|
|
|
prob = probs + RepLenCoder; |
|
14253
|
|
|
|
|
|
|
} |
|
14254
|
|
|
|
|
|
|
{ |
|
14255
|
|
|
|
|
|
|
unsigned limit, offset; |
|
14256
|
|
|
|
|
|
|
CLzmaProb *probLen = prob + LenChoice; |
|
14257
|
31299
|
100
|
|
|
|
|
IF_BIT_0(probLen) |
|
|
|
100
|
|
|
|
|
|
|
14258
|
|
|
|
|
|
|
{ |
|
14259
|
276
|
|
|
|
|
|
UPDATE_0(probLen); |
|
14260
|
276
|
|
|
|
|
|
probLen = prob + LenLow + (posState << kLenNumLowBits); |
|
14261
|
|
|
|
|
|
|
offset = 0; |
|
14262
|
|
|
|
|
|
|
limit = (1 << kLenNumLowBits); |
|
14263
|
|
|
|
|
|
|
} |
|
14264
|
|
|
|
|
|
|
else |
|
14265
|
|
|
|
|
|
|
{ |
|
14266
|
31023
|
|
|
|
|
|
UPDATE_1(probLen); |
|
14267
|
|
|
|
|
|
|
probLen = prob + LenChoice2; |
|
14268
|
31023
|
100
|
|
|
|
|
IF_BIT_0(probLen) |
|
|
|
100
|
|
|
|
|
|
|
14269
|
|
|
|
|
|
|
{ |
|
14270
|
54
|
|
|
|
|
|
UPDATE_0(probLen); |
|
14271
|
54
|
|
|
|
|
|
probLen = prob + LenMid + (posState << kLenNumMidBits); |
|
14272
|
|
|
|
|
|
|
offset = kLenNumLowSymbols; |
|
14273
|
|
|
|
|
|
|
limit = (1 << kLenNumMidBits); |
|
14274
|
|
|
|
|
|
|
} |
|
14275
|
|
|
|
|
|
|
else |
|
14276
|
|
|
|
|
|
|
{ |
|
14277
|
30969
|
|
|
|
|
|
UPDATE_1(probLen); |
|
14278
|
31299
|
|
|
|
|
|
probLen = prob + LenHigh; |
|
14279
|
|
|
|
|
|
|
offset = kLenNumLowSymbols + kLenNumMidSymbols; |
|
14280
|
|
|
|
|
|
|
limit = (1 << kLenNumHighBits); |
|
14281
|
|
|
|
|
|
|
} |
|
14282
|
|
|
|
|
|
|
} |
|
14283
|
248742
|
100
|
|
|
|
|
TREE_DECODE(probLen, limit, len); |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
14284
|
31299
|
|
|
|
|
|
len += offset; |
|
14285
|
|
|
|
|
|
|
} |
|
14286
|
|
|
|
|
|
|
|
|
14287
|
31299
|
100
|
|
|
|
|
if (state >= kNumStates) |
|
14288
|
|
|
|
|
|
|
{ |
|
14289
|
|
|
|
|
|
|
uint32_t distance; |
|
14290
|
267
|
|
|
|
|
|
prob = probs + PosSlot + |
|
14291
|
534
|
|
|
|
|
|
((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); |
|
14292
|
267
|
100
|
|
|
|
|
TREE_6_DECODE(prob, distance); |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
14293
|
267
|
100
|
|
|
|
|
if (distance >= kStartPosModelIndex) |
|
14294
|
|
|
|
|
|
|
{ |
|
14295
|
|
|
|
|
|
|
unsigned posSlot = (unsigned)distance; |
|
14296
|
229
|
|
|
|
|
|
int numDirectBits = (int)(((distance >> 1) - 1)); |
|
14297
|
229
|
|
|
|
|
|
distance = (2 | (distance & 1)); |
|
14298
|
229
|
100
|
|
|
|
|
if (posSlot < kEndPosModelIndex) |
|
14299
|
|
|
|
|
|
|
{ |
|
14300
|
119
|
|
|
|
|
|
distance <<= numDirectBits; |
|
14301
|
119
|
|
|
|
|
|
prob = probs + SpecPos + distance - posSlot - 1; |
|
14302
|
|
|
|
|
|
|
{ |
|
14303
|
|
|
|
|
|
|
uint32_t mask = 1; |
|
14304
|
|
|
|
|
|
|
unsigned i = 1; |
|
14305
|
337
|
100
|
|
|
|
|
do |
|
14306
|
|
|
|
|
|
|
{ |
|
14307
|
337
|
100
|
|
|
|
|
GET_BIT2(prob + i, i, ; , distance |= mask); |
|
|
|
100
|
|
|
|
|
|
|
14308
|
337
|
|
|
|
|
|
mask <<= 1; |
|
14309
|
|
|
|
|
|
|
} |
|
14310
|
|
|
|
|
|
|
while (--numDirectBits != 0); |
|
14311
|
|
|
|
|
|
|
} |
|
14312
|
|
|
|
|
|
|
} |
|
14313
|
|
|
|
|
|
|
else |
|
14314
|
|
|
|
|
|
|
{ |
|
14315
|
110
|
|
|
|
|
|
numDirectBits -= kNumAlignBits; |
|
14316
|
1135
|
100
|
|
|
|
|
do |
|
14317
|
|
|
|
|
|
|
{ |
|
14318
|
1135
|
100
|
|
|
|
|
NORMALIZE |
|
14319
|
1135
|
|
|
|
|
|
range >>= 1; |
|
14320
|
|
|
|
|
|
|
|
|
14321
|
|
|
|
|
|
|
{ |
|
14322
|
|
|
|
|
|
|
uint32_t t; |
|
14323
|
1135
|
|
|
|
|
|
code -= range; |
|
14324
|
1135
|
|
|
|
|
|
t = (0 - ((uint32_t)code >> 31)); /* (uint32_t)((int32_t)code >> 31) */ |
|
14325
|
1135
|
|
|
|
|
|
distance = (distance << 1) + (t + 1); |
|
14326
|
1135
|
|
|
|
|
|
code += range & t; |
|
14327
|
|
|
|
|
|
|
} |
|
14328
|
|
|
|
|
|
|
/* |
|
14329
|
|
|
|
|
|
|
distance <<= 1; |
|
14330
|
|
|
|
|
|
|
if (code >= range) |
|
14331
|
|
|
|
|
|
|
{ |
|
14332
|
|
|
|
|
|
|
code -= range; |
|
14333
|
|
|
|
|
|
|
distance |= 1; |
|
14334
|
|
|
|
|
|
|
} |
|
14335
|
|
|
|
|
|
|
*/ |
|
14336
|
|
|
|
|
|
|
} |
|
14337
|
|
|
|
|
|
|
while (--numDirectBits != 0); |
|
14338
|
110
|
|
|
|
|
|
prob = probs + Align; |
|
14339
|
110
|
|
|
|
|
|
distance <<= kNumAlignBits; |
|
14340
|
|
|
|
|
|
|
{ |
|
14341
|
|
|
|
|
|
|
unsigned i = 1; |
|
14342
|
110
|
100
|
|
|
|
|
GET_BIT2(prob + i, i, ; , distance |= 1); |
|
|
|
100
|
|
|
|
|
|
|
14343
|
110
|
100
|
|
|
|
|
GET_BIT2(prob + i, i, ; , distance |= 2); |
|
|
|
100
|
|
|
|
|
|
|
14344
|
110
|
100
|
|
|
|
|
GET_BIT2(prob + i, i, ; , distance |= 4); |
|
|
|
100
|
|
|
|
|
|
|
14345
|
110
|
100
|
|
|
|
|
GET_BIT2(prob + i, i, ; , distance |= 8); |
|
|
|
100
|
|
|
|
|
|
|
14346
|
|
|
|
|
|
|
} |
|
14347
|
110
|
50
|
|
|
|
|
if (distance == (uint32_t)0xFFFFFFFF) |
|
14348
|
|
|
|
|
|
|
{ |
|
14349
|
0
|
|
|
|
|
|
len += kMatchSpecLenStart; |
|
14350
|
0
|
|
|
|
|
|
state -= kNumStates; |
|
14351
|
0
|
|
|
|
|
|
break; |
|
14352
|
|
|
|
|
|
|
} |
|
14353
|
|
|
|
|
|
|
} |
|
14354
|
|
|
|
|
|
|
} |
|
14355
|
|
|
|
|
|
|
rep3 = rep2; |
|
14356
|
|
|
|
|
|
|
rep2 = rep1; |
|
14357
|
|
|
|
|
|
|
rep1 = rep0; |
|
14358
|
267
|
|
|
|
|
|
rep0 = distance + 1; |
|
14359
|
267
|
50
|
|
|
|
|
if (checkDicSize == 0) |
|
14360
|
|
|
|
|
|
|
{ |
|
14361
|
267
|
50
|
|
|
|
|
if (distance >= processedPos) |
|
14362
|
|
|
|
|
|
|
return SZ_ERROR_DATA; |
|
14363
|
|
|
|
|
|
|
} |
|
14364
|
0
|
0
|
|
|
|
|
else if (distance >= checkDicSize) |
|
14365
|
|
|
|
|
|
|
return SZ_ERROR_DATA; |
|
14366
|
267
|
100
|
|
|
|
|
state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; |
|
14367
|
|
|
|
|
|
|
} |
|
14368
|
|
|
|
|
|
|
|
|
14369
|
31299
|
|
|
|
|
|
len += kMatchMinLen; |
|
14370
|
|
|
|
|
|
|
|
|
14371
|
31299
|
50
|
|
|
|
|
if (limit == dicPos) |
|
14372
|
|
|
|
|
|
|
return SZ_ERROR_DATA; |
|
14373
|
|
|
|
|
|
|
{ |
|
14374
|
31299
|
|
|
|
|
|
size_t rem = limit - dicPos; |
|
14375
|
31299
|
50
|
|
|
|
|
unsigned curLen = ((rem < len) ? (unsigned)rem : len); |
|
14376
|
31299
|
50
|
|
|
|
|
size_t pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0); |
|
14377
|
|
|
|
|
|
|
|
|
14378
|
31299
|
|
|
|
|
|
processedPos += curLen; |
|
14379
|
|
|
|
|
|
|
|
|
14380
|
31299
|
|
|
|
|
|
len -= curLen; |
|
14381
|
31299
|
50
|
|
|
|
|
if (pos + curLen <= dicBufSize) |
|
14382
|
|
|
|
|
|
|
{ |
|
14383
|
31299
|
|
|
|
|
|
uint8_t *dest = dic + dicPos; |
|
14384
|
31299
|
|
|
|
|
|
ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; |
|
14385
|
31299
|
|
|
|
|
|
const uint8_t *lim = dest + curLen; |
|
14386
|
31299
|
|
|
|
|
|
dicPos += curLen; |
|
14387
|
8428629
|
100
|
|
|
|
|
do |
|
14388
|
8428629
|
|
|
|
|
|
*(dest) = (uint8_t)*(dest + src); |
|
14389
|
|
|
|
|
|
|
while (++dest != lim); |
|
14390
|
|
|
|
|
|
|
} |
|
14391
|
|
|
|
|
|
|
else |
|
14392
|
|
|
|
|
|
|
{ |
|
14393
|
0
|
0
|
|
|
|
|
do |
|
14394
|
|
|
|
|
|
|
{ |
|
14395
|
0
|
|
|
|
|
|
dic[dicPos++] = dic[pos]; |
|
14396
|
0
|
0
|
|
|
|
|
if (++pos == dicBufSize) |
|
14397
|
|
|
|
|
|
|
pos = 0; |
|
14398
|
|
|
|
|
|
|
} |
|
14399
|
|
|
|
|
|
|
while (--curLen != 0); |
|
14400
|
|
|
|
|
|
|
} |
|
14401
|
|
|
|
|
|
|
} |
|
14402
|
|
|
|
|
|
|
} |
|
14403
|
|
|
|
|
|
|
} |
|
14404
|
32137
|
100
|
|
|
|
|
while (dicPos < limit && buf < bufLimit); |
|
14405
|
120
|
100
|
|
|
|
|
NORMALIZE; |
|
14406
|
120
|
|
|
|
|
|
p->buf = buf; |
|
14407
|
120
|
|
|
|
|
|
p->range = range; |
|
14408
|
120
|
|
|
|
|
|
p->code = code; |
|
14409
|
120
|
|
|
|
|
|
p->remainLen = len; |
|
14410
|
120
|
|
|
|
|
|
p->dicPos = dicPos; |
|
14411
|
120
|
|
|
|
|
|
p->processedPos = processedPos; |
|
14412
|
120
|
|
|
|
|
|
p->reps[0] = rep0; |
|
14413
|
120
|
|
|
|
|
|
p->reps[1] = rep1; |
|
14414
|
120
|
|
|
|
|
|
p->reps[2] = rep2; |
|
14415
|
120
|
|
|
|
|
|
p->reps[3] = rep3; |
|
14416
|
120
|
|
|
|
|
|
p->state = state; |
|
14417
|
|
|
|
|
|
|
|
|
14418
|
120
|
|
|
|
|
|
return SZ_OK; |
|
14419
|
|
|
|
|
|
|
} |
|
14420
|
|
|
|
|
|
|
|
|
14421
|
127
|
|
|
|
|
|
static void LzmaDec_WriteRem(CLzmaDec *p, size_t limit) |
|
14422
|
|
|
|
|
|
|
{ |
|
14423
|
127
|
50
|
|
|
|
|
if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) |
|
14424
|
|
|
|
|
|
|
{ |
|
14425
|
0
|
|
|
|
|
|
uint8_t *dic = p->dic; |
|
14426
|
0
|
|
|
|
|
|
size_t dicPos = p->dicPos; |
|
14427
|
0
|
|
|
|
|
|
size_t dicBufSize = p->dicBufSize; |
|
14428
|
|
|
|
|
|
|
unsigned len = p->remainLen; |
|
14429
|
0
|
|
|
|
|
|
uint32_t rep0 = p->reps[0]; |
|
14430
|
0
|
0
|
|
|
|
|
if (limit - dicPos < len) |
|
14431
|
0
|
|
|
|
|
|
len = (unsigned)(limit - dicPos); |
|
14432
|
|
|
|
|
|
|
|
|
14433
|
0
|
0
|
|
|
|
|
if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) |
|
|
|
0
|
|
|
|
|
|
|
14434
|
0
|
|
|
|
|
|
p->checkDicSize = p->prop.dicSize; |
|
14435
|
|
|
|
|
|
|
|
|
14436
|
0
|
|
|
|
|
|
p->processedPos += len; |
|
14437
|
0
|
|
|
|
|
|
p->remainLen -= len; |
|
14438
|
0
|
0
|
|
|
|
|
while (len-- != 0) |
|
14439
|
|
|
|
|
|
|
{ |
|
14440
|
0
|
0
|
|
|
|
|
dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; |
|
14441
|
0
|
|
|
|
|
|
dicPos++; |
|
14442
|
|
|
|
|
|
|
} |
|
14443
|
0
|
|
|
|
|
|
p->dicPos = dicPos; |
|
14444
|
|
|
|
|
|
|
} |
|
14445
|
127
|
|
|
|
|
|
} |
|
14446
|
|
|
|
|
|
|
|
|
14447
|
240
|
|
|
|
|
|
static int LzmaDec_DecodeReal2(CLzmaDec *p, size_t limit, const uint8_t *bufLimit) |
|
14448
|
|
|
|
|
|
|
{ |
|
14449
|
|
|
|
|
|
|
do |
|
14450
|
|
|
|
|
|
|
{ |
|
14451
|
|
|
|
|
|
|
size_t limit2 = limit; |
|
14452
|
120
|
50
|
|
|
|
|
if (p->checkDicSize == 0) |
|
14453
|
|
|
|
|
|
|
{ |
|
14454
|
120
|
|
|
|
|
|
uint32_t rem = p->prop.dicSize - p->processedPos; |
|
14455
|
120
|
50
|
|
|
|
|
if (limit - p->dicPos > rem) |
|
14456
|
0
|
|
|
|
|
|
limit2 = p->dicPos + rem; |
|
14457
|
|
|
|
|
|
|
} |
|
14458
|
120
|
50
|
|
|
|
|
RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit)); |
|
14459
|
120
|
50
|
|
|
|
|
if (p->processedPos >= p->prop.dicSize) |
|
14460
|
0
|
|
|
|
|
|
p->checkDicSize = p->prop.dicSize; |
|
14461
|
120
|
|
|
|
|
|
LzmaDec_WriteRem(p, limit); |
|
14462
|
|
|
|
|
|
|
} |
|
14463
|
120
|
100
|
|
|
|
|
while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
14464
|
|
|
|
|
|
|
|
|
14465
|
120
|
50
|
|
|
|
|
if (p->remainLen > kMatchSpecLenStart) |
|
14466
|
|
|
|
|
|
|
{ |
|
14467
|
0
|
|
|
|
|
|
p->remainLen = kMatchSpecLenStart; |
|
14468
|
|
|
|
|
|
|
} |
|
14469
|
|
|
|
|
|
|
return 0; |
|
14470
|
|
|
|
|
|
|
} |
|
14471
|
|
|
|
|
|
|
|
|
14472
|
|
|
|
|
|
|
enum ELzmaDummy |
|
14473
|
|
|
|
|
|
|
{ |
|
14474
|
|
|
|
|
|
|
DUMMY_ERROR, /* unexpected end of input stream */ |
|
14475
|
|
|
|
|
|
|
DUMMY_LIT, |
|
14476
|
|
|
|
|
|
|
DUMMY_MATCH, |
|
14477
|
|
|
|
|
|
|
DUMMY_REP |
|
14478
|
|
|
|
|
|
|
}; |
|
14479
|
|
|
|
|
|
|
|
|
14480
|
110
|
|
|
|
|
|
static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const uint8_t *buf, size_t inSize) |
|
14481
|
|
|
|
|
|
|
{ |
|
14482
|
110
|
|
|
|
|
|
uint32_t range = p->range; |
|
14483
|
110
|
|
|
|
|
|
uint32_t code = p->code; |
|
14484
|
110
|
|
|
|
|
|
const uint8_t *bufLimit = buf + inSize; |
|
14485
|
110
|
|
|
|
|
|
CLzmaProb *probs = p->probs; |
|
14486
|
110
|
|
|
|
|
|
unsigned state = p->state; |
|
14487
|
|
|
|
|
|
|
ELzmaDummy res; |
|
14488
|
|
|
|
|
|
|
|
|
14489
|
|
|
|
|
|
|
{ |
|
14490
|
|
|
|
|
|
|
CLzmaProb *prob; |
|
14491
|
|
|
|
|
|
|
uint32_t bound; |
|
14492
|
|
|
|
|
|
|
unsigned ttt; |
|
14493
|
110
|
|
|
|
|
|
unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1); |
|
14494
|
|
|
|
|
|
|
|
|
14495
|
110
|
|
|
|
|
|
prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; |
|
14496
|
110
|
50
|
|
|
|
|
IF_BIT_0_CHECK(prob) |
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
14497
|
|
|
|
|
|
|
{ |
|
14498
|
|
|
|
|
|
|
UPDATE_0_CHECK |
|
14499
|
|
|
|
|
|
|
|
|
14500
|
|
|
|
|
|
|
/* if (bufLimit - buf >= 7) return DUMMY_LIT; */ |
|
14501
|
|
|
|
|
|
|
|
|
14502
|
69
|
|
|
|
|
|
prob = probs + Literal; |
|
14503
|
69
|
100
|
|
|
|
|
if (p->checkDicSize != 0 || p->processedPos != 0) |
|
14504
|
68
|
|
|
|
|
|
prob += (LZMA_LIT_SIZE * |
|
14505
|
136
|
|
|
|
|
|
((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + |
|
14506
|
68
|
50
|
|
|
|
|
(p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); |
|
14507
|
|
|
|
|
|
|
|
|
14508
|
69
|
100
|
|
|
|
|
if (state < kNumLitStates) |
|
14509
|
|
|
|
|
|
|
{ |
|
14510
|
|
|
|
|
|
|
unsigned symbol = 1; |
|
14511
|
352
|
100
|
|
|
|
|
do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
14512
|
|
|
|
|
|
|
} |
|
14513
|
|
|
|
|
|
|
else |
|
14514
|
|
|
|
|
|
|
{ |
|
14515
|
50
|
|
|
|
|
|
unsigned matchByte = p->dic[p->dicPos - p->reps[0] + |
|
14516
|
25
|
50
|
|
|
|
|
((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)]; |
|
14517
|
|
|
|
|
|
|
unsigned offs = 0x100; |
|
14518
|
|
|
|
|
|
|
unsigned symbol = 1; |
|
14519
|
|
|
|
|
|
|
do |
|
14520
|
|
|
|
|
|
|
{ |
|
14521
|
|
|
|
|
|
|
unsigned bit; |
|
14522
|
|
|
|
|
|
|
CLzmaProb *probLit; |
|
14523
|
200
|
|
|
|
|
|
matchByte <<= 1; |
|
14524
|
200
|
|
|
|
|
|
bit = (matchByte & offs); |
|
14525
|
200
|
|
|
|
|
|
probLit = prob + offs + bit + symbol; |
|
14526
|
200
|
100
|
|
|
|
|
GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit) |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
14527
|
|
|
|
|
|
|
} |
|
14528
|
200
|
100
|
|
|
|
|
while (symbol < 0x100); |
|
14529
|
|
|
|
|
|
|
} |
|
14530
|
|
|
|
|
|
|
res = DUMMY_LIT; |
|
14531
|
|
|
|
|
|
|
} |
|
14532
|
|
|
|
|
|
|
else |
|
14533
|
|
|
|
|
|
|
{ |
|
14534
|
|
|
|
|
|
|
unsigned len; |
|
14535
|
41
|
|
|
|
|
|
UPDATE_1_CHECK; |
|
14536
|
|
|
|
|
|
|
|
|
14537
|
41
|
|
|
|
|
|
prob = probs + IsRep + state; |
|
14538
|
41
|
100
|
|
|
|
|
IF_BIT_0_CHECK(prob) |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
14539
|
|
|
|
|
|
|
{ |
|
14540
|
|
|
|
|
|
|
UPDATE_0_CHECK; |
|
14541
|
|
|
|
|
|
|
state = 0; |
|
14542
|
20
|
|
|
|
|
|
prob = probs + LenCoder; |
|
14543
|
|
|
|
|
|
|
res = DUMMY_MATCH; |
|
14544
|
|
|
|
|
|
|
} |
|
14545
|
|
|
|
|
|
|
else |
|
14546
|
|
|
|
|
|
|
{ |
|
14547
|
21
|
|
|
|
|
|
UPDATE_1_CHECK; |
|
14548
|
|
|
|
|
|
|
res = DUMMY_REP; |
|
14549
|
21
|
|
|
|
|
|
prob = probs + IsRepG0 + state; |
|
14550
|
21
|
100
|
|
|
|
|
IF_BIT_0_CHECK(prob) |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
14551
|
|
|
|
|
|
|
{ |
|
14552
|
|
|
|
|
|
|
UPDATE_0_CHECK; |
|
14553
|
15
|
|
|
|
|
|
prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; |
|
14554
|
15
|
100
|
|
|
|
|
IF_BIT_0_CHECK(prob) |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
14555
|
|
|
|
|
|
|
{ |
|
14556
|
|
|
|
|
|
|
UPDATE_0_CHECK; |
|
14557
|
9
|
100
|
|
|
|
|
NORMALIZE_CHECK; |
|
|
|
50
|
|
|
|
|
|
|
14558
|
|
|
|
|
|
|
return DUMMY_REP; |
|
14559
|
|
|
|
|
|
|
} |
|
14560
|
|
|
|
|
|
|
else |
|
14561
|
|
|
|
|
|
|
{ |
|
14562
|
6
|
|
|
|
|
|
UPDATE_1_CHECK; |
|
14563
|
|
|
|
|
|
|
} |
|
14564
|
|
|
|
|
|
|
} |
|
14565
|
|
|
|
|
|
|
else |
|
14566
|
|
|
|
|
|
|
{ |
|
14567
|
6
|
|
|
|
|
|
UPDATE_1_CHECK; |
|
14568
|
6
|
|
|
|
|
|
prob = probs + IsRepG1 + state; |
|
14569
|
6
|
100
|
|
|
|
|
IF_BIT_0_CHECK(prob) |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
14570
|
|
|
|
|
|
|
{ |
|
14571
|
|
|
|
|
|
|
UPDATE_0_CHECK; |
|
14572
|
|
|
|
|
|
|
} |
|
14573
|
|
|
|
|
|
|
else |
|
14574
|
|
|
|
|
|
|
{ |
|
14575
|
4
|
|
|
|
|
|
UPDATE_1_CHECK; |
|
14576
|
4
|
|
|
|
|
|
prob = probs + IsRepG2 + state; |
|
14577
|
4
|
100
|
|
|
|
|
IF_BIT_0_CHECK(prob) |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
14578
|
|
|
|
|
|
|
{ |
|
14579
|
|
|
|
|
|
|
UPDATE_0_CHECK; |
|
14580
|
|
|
|
|
|
|
} |
|
14581
|
|
|
|
|
|
|
else |
|
14582
|
|
|
|
|
|
|
{ |
|
14583
|
1
|
|
|
|
|
|
UPDATE_1_CHECK; |
|
14584
|
|
|
|
|
|
|
} |
|
14585
|
|
|
|
|
|
|
} |
|
14586
|
|
|
|
|
|
|
} |
|
14587
|
|
|
|
|
|
|
state = kNumStates; |
|
14588
|
12
|
|
|
|
|
|
prob = probs + RepLenCoder; |
|
14589
|
|
|
|
|
|
|
} |
|
14590
|
|
|
|
|
|
|
{ |
|
14591
|
|
|
|
|
|
|
unsigned limit, offset; |
|
14592
|
|
|
|
|
|
|
CLzmaProb *probLen = prob + LenChoice; |
|
14593
|
32
|
100
|
|
|
|
|
IF_BIT_0_CHECK(probLen) |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
14594
|
|
|
|
|
|
|
{ |
|
14595
|
|
|
|
|
|
|
UPDATE_0_CHECK; |
|
14596
|
23
|
|
|
|
|
|
probLen = prob + LenLow + (posState << kLenNumLowBits); |
|
14597
|
|
|
|
|
|
|
offset = 0; |
|
14598
|
|
|
|
|
|
|
limit = 1 << kLenNumLowBits; |
|
14599
|
|
|
|
|
|
|
} |
|
14600
|
|
|
|
|
|
|
else |
|
14601
|
|
|
|
|
|
|
{ |
|
14602
|
9
|
|
|
|
|
|
UPDATE_1_CHECK; |
|
14603
|
|
|
|
|
|
|
probLen = prob + LenChoice2; |
|
14604
|
9
|
50
|
|
|
|
|
IF_BIT_0_CHECK(probLen) |
|
|
|
0
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
14605
|
|
|
|
|
|
|
{ |
|
14606
|
|
|
|
|
|
|
UPDATE_0_CHECK; |
|
14607
|
4
|
|
|
|
|
|
probLen = prob + LenMid + (posState << kLenNumMidBits); |
|
14608
|
|
|
|
|
|
|
offset = kLenNumLowSymbols; |
|
14609
|
|
|
|
|
|
|
limit = 1 << kLenNumMidBits; |
|
14610
|
|
|
|
|
|
|
} |
|
14611
|
|
|
|
|
|
|
else |
|
14612
|
|
|
|
|
|
|
{ |
|
14613
|
5
|
|
|
|
|
|
UPDATE_1_CHECK; |
|
14614
|
32
|
|
|
|
|
|
probLen = prob + LenHigh; |
|
14615
|
|
|
|
|
|
|
offset = kLenNumLowSymbols + kLenNumMidSymbols; |
|
14616
|
|
|
|
|
|
|
limit = 1 << kLenNumHighBits; |
|
14617
|
|
|
|
|
|
|
} |
|
14618
|
|
|
|
|
|
|
} |
|
14619
|
121
|
100
|
|
|
|
|
TREE_DECODE_CHECK(probLen, limit, len); |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
14620
|
32
|
|
|
|
|
|
len += offset; |
|
14621
|
|
|
|
|
|
|
} |
|
14622
|
|
|
|
|
|
|
|
|
14623
|
32
|
100
|
|
|
|
|
if (state < 4) |
|
14624
|
|
|
|
|
|
|
{ |
|
14625
|
|
|
|
|
|
|
unsigned posSlot; |
|
14626
|
20
|
|
|
|
|
|
prob = probs + PosSlot + |
|
14627
|
20
|
|
|
|
|
|
((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << |
|
14628
|
20
|
|
|
|
|
|
kNumPosSlotBits); |
|
14629
|
120
|
100
|
|
|
|
|
TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
14630
|
20
|
100
|
|
|
|
|
if (posSlot >= kStartPosModelIndex) |
|
14631
|
|
|
|
|
|
|
{ |
|
14632
|
18
|
|
|
|
|
|
int numDirectBits = ((posSlot >> 1) - 1); |
|
14633
|
|
|
|
|
|
|
|
|
14634
|
|
|
|
|
|
|
/* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ |
|
14635
|
|
|
|
|
|
|
|
|
14636
|
18
|
100
|
|
|
|
|
if (posSlot < kEndPosModelIndex) |
|
14637
|
|
|
|
|
|
|
{ |
|
14638
|
10
|
|
|
|
|
|
prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1; |
|
14639
|
|
|
|
|
|
|
} |
|
14640
|
|
|
|
|
|
|
else |
|
14641
|
|
|
|
|
|
|
{ |
|
14642
|
8
|
|
|
|
|
|
numDirectBits -= kNumAlignBits; |
|
14643
|
55
|
100
|
|
|
|
|
do |
|
14644
|
|
|
|
|
|
|
{ |
|
14645
|
55
|
100
|
|
|
|
|
NORMALIZE_CHECK |
|
|
|
50
|
|
|
|
|
|
|
14646
|
55
|
|
|
|
|
|
range >>= 1; |
|
14647
|
55
|
|
|
|
|
|
code -= range & (((code - range) >> 31) - 1); |
|
14648
|
|
|
|
|
|
|
/* if (code >= range) code -= range; */ |
|
14649
|
|
|
|
|
|
|
} |
|
14650
|
|
|
|
|
|
|
while (--numDirectBits != 0); |
|
14651
|
18
|
|
|
|
|
|
prob = probs + Align; |
|
14652
|
|
|
|
|
|
|
numDirectBits = kNumAlignBits; |
|
14653
|
|
|
|
|
|
|
} |
|
14654
|
|
|
|
|
|
|
{ |
|
14655
|
|
|
|
|
|
|
unsigned i = 1; |
|
14656
|
63
|
100
|
|
|
|
|
do |
|
14657
|
|
|
|
|
|
|
{ |
|
14658
|
63
|
100
|
|
|
|
|
GET_BIT_CHECK(prob + i, i); |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
14659
|
|
|
|
|
|
|
} |
|
14660
|
|
|
|
|
|
|
while (--numDirectBits != 0); |
|
14661
|
|
|
|
|
|
|
} |
|
14662
|
|
|
|
|
|
|
} |
|
14663
|
|
|
|
|
|
|
} |
|
14664
|
|
|
|
|
|
|
} |
|
14665
|
|
|
|
|
|
|
} |
|
14666
|
101
|
100
|
|
|
|
|
NORMALIZE_CHECK; |
|
|
|
50
|
|
|
|
|
|
|
14667
|
|
|
|
|
|
|
return res; |
|
14668
|
|
|
|
|
|
|
} |
|
14669
|
|
|
|
|
|
|
|
|
14670
|
|
|
|
|
|
|
static void LzmaDec_InitRc(CLzmaDec *p, const uint8_t *data) |
|
14671
|
|
|
|
|
|
|
{ |
|
14672
|
7
|
|
|
|
|
|
p->code = ((uint32_t)data[1] << 24) | ((uint32_t)data[2] << 16) | ((uint32_t)data[3] << 8) | ((uint32_t)data[4]); |
|
14673
|
7
|
|
|
|
|
|
p->range = 0xFFFFFFFF; |
|
14674
|
7
|
|
|
|
|
|
p->needFlush = 0; |
|
14675
|
|
|
|
|
|
|
} |
|
14676
|
|
|
|
|
|
|
|
|
14677
|
0
|
|
|
|
|
|
void LzmaDec_InitDicAndState(CLzmaDec *p, bool initDic, bool initState) |
|
14678
|
|
|
|
|
|
|
{ |
|
14679
|
7
|
|
|
|
|
|
p->needFlush = 1; |
|
14680
|
7
|
|
|
|
|
|
p->remainLen = 0; |
|
14681
|
7
|
|
|
|
|
|
p->tempBufSize = 0; |
|
14682
|
|
|
|
|
|
|
|
|
14683
|
0
|
0
|
|
|
|
|
if (initDic) |
|
14684
|
|
|
|
|
|
|
{ |
|
14685
|
7
|
|
|
|
|
|
p->processedPos = 0; |
|
14686
|
7
|
|
|
|
|
|
p->checkDicSize = 0; |
|
14687
|
0
|
|
|
|
|
|
p->needInitState = 1; |
|
14688
|
|
|
|
|
|
|
} |
|
14689
|
0
|
0
|
|
|
|
|
if (initState) |
|
14690
|
0
|
|
|
|
|
|
p->needInitState = 1; |
|
14691
|
0
|
|
|
|
|
|
} |
|
14692
|
|
|
|
|
|
|
|
|
14693
|
0
|
|
|
|
|
|
void LzmaDec_Init(CLzmaDec *p) |
|
14694
|
|
|
|
|
|
|
{ |
|
14695
|
7
|
|
|
|
|
|
p->dicPos = 0; |
|
14696
|
|
|
|
|
|
|
LzmaDec_InitDicAndState(p, true, true); |
|
14697
|
0
|
|
|
|
|
|
} |
|
14698
|
|
|
|
|
|
|
|
|
14699
|
|
|
|
|
|
|
static void LzmaDec_InitStateReal(CLzmaDec *p) |
|
14700
|
|
|
|
|
|
|
{ |
|
14701
|
7
|
|
|
|
|
|
uint32_t numProbs = Literal + ((uint32_t)LZMA_LIT_SIZE << (p->prop.lc + p->prop.lp)); |
|
14702
|
|
|
|
|
|
|
uint32_t i; |
|
14703
|
7
|
|
|
|
|
|
CLzmaProb *probs = p->probs; |
|
14704
|
55937
|
100
|
|
|
|
|
for (i = 0; i < numProbs; i++) |
|
14705
|
55930
|
|
|
|
|
|
probs[i] = kBitModelTotal >> 1; |
|
14706
|
7
|
|
|
|
|
|
p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; |
|
14707
|
7
|
|
|
|
|
|
p->state = 0; |
|
14708
|
7
|
|
|
|
|
|
p->needInitState = 0; |
|
14709
|
|
|
|
|
|
|
} |
|
14710
|
|
|
|
|
|
|
|
|
14711
|
7
|
|
|
|
|
|
SRes LzmaDec_DecodeToDic(CLzmaDec *p, size_t dicLimit, const uint8_t *src, size_t *srcLen, |
|
14712
|
|
|
|
|
|
|
ELzmaFinishMode finishMode, ELzmaStatus *status) |
|
14713
|
|
|
|
|
|
|
{ |
|
14714
|
7
|
|
|
|
|
|
size_t inSize = *srcLen; |
|
14715
|
7
|
|
|
|
|
|
(*srcLen) = 0; |
|
14716
|
7
|
|
|
|
|
|
LzmaDec_WriteRem(p, dicLimit); |
|
14717
|
|
|
|
|
|
|
|
|
14718
|
127
|
|
|
|
|
|
*status = LZMA_STATUS_NOT_SPECIFIED; |
|
14719
|
|
|
|
|
|
|
|
|
14720
|
127
|
50
|
|
|
|
|
while (p->remainLen != kMatchSpecLenStart) |
|
14721
|
|
|
|
|
|
|
{ |
|
14722
|
|
|
|
|
|
|
int checkEndMarkNow; |
|
14723
|
|
|
|
|
|
|
|
|
14724
|
127
|
100
|
|
|
|
|
if (p->needFlush != 0) |
|
14725
|
|
|
|
|
|
|
{ |
|
14726
|
42
|
50
|
|
|
|
|
for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) |
|
|
|
100
|
|
|
|
|
|
|
14727
|
35
|
|
|
|
|
|
p->tempBuf[p->tempBufSize++] = *src++; |
|
14728
|
7
|
50
|
|
|
|
|
if (p->tempBufSize < RC_INIT_SIZE) |
|
14729
|
|
|
|
|
|
|
{ |
|
14730
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_NEEDS_MORE_INPUT; |
|
14731
|
0
|
|
|
|
|
|
return SZ_OK; |
|
14732
|
|
|
|
|
|
|
} |
|
14733
|
7
|
50
|
|
|
|
|
if (p->tempBuf[0] != 0) |
|
14734
|
|
|
|
|
|
|
return SZ_ERROR_DATA; |
|
14735
|
|
|
|
|
|
|
|
|
14736
|
|
|
|
|
|
|
LzmaDec_InitRc(p, p->tempBuf); |
|
14737
|
7
|
|
|
|
|
|
p->tempBufSize = 0; |
|
14738
|
|
|
|
|
|
|
} |
|
14739
|
|
|
|
|
|
|
|
|
14740
|
|
|
|
|
|
|
checkEndMarkNow = 0; |
|
14741
|
127
|
100
|
|
|
|
|
if (p->dicPos >= dicLimit) |
|
14742
|
|
|
|
|
|
|
{ |
|
14743
|
7
|
50
|
|
|
|
|
if (p->remainLen == 0 && p->code == 0) |
|
|
|
50
|
|
|
|
|
|
|
14744
|
|
|
|
|
|
|
{ |
|
14745
|
7
|
|
|
|
|
|
*status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; |
|
14746
|
7
|
|
|
|
|
|
return SZ_OK; |
|
14747
|
|
|
|
|
|
|
} |
|
14748
|
0
|
0
|
|
|
|
|
if (finishMode == LZMA_FINISH_ANY) |
|
14749
|
|
|
|
|
|
|
{ |
|
14750
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_NOT_FINISHED; |
|
14751
|
0
|
|
|
|
|
|
return SZ_OK; |
|
14752
|
|
|
|
|
|
|
} |
|
14753
|
0
|
0
|
|
|
|
|
if (p->remainLen != 0) |
|
14754
|
|
|
|
|
|
|
{ |
|
14755
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_NOT_FINISHED; |
|
14756
|
0
|
|
|
|
|
|
return SZ_ERROR_DATA; |
|
14757
|
|
|
|
|
|
|
} |
|
14758
|
|
|
|
|
|
|
checkEndMarkNow = 1; |
|
14759
|
|
|
|
|
|
|
} |
|
14760
|
|
|
|
|
|
|
|
|
14761
|
120
|
100
|
|
|
|
|
if (p->needInitState) |
|
14762
|
|
|
|
|
|
|
LzmaDec_InitStateReal(p); |
|
14763
|
|
|
|
|
|
|
|
|
14764
|
120
|
50
|
|
|
|
|
if (p->tempBufSize == 0) |
|
14765
|
|
|
|
|
|
|
{ |
|
14766
|
|
|
|
|
|
|
size_t processed; |
|
14767
|
|
|
|
|
|
|
const uint8_t *bufLimit; |
|
14768
|
120
|
100
|
|
|
|
|
if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) |
|
14769
|
|
|
|
|
|
|
{ |
|
14770
|
110
|
|
|
|
|
|
int dummyRes = LzmaDec_TryDummy(p, src, inSize); |
|
14771
|
110
|
50
|
|
|
|
|
if (dummyRes == DUMMY_ERROR) |
|
14772
|
|
|
|
|
|
|
{ |
|
14773
|
0
|
|
|
|
|
|
memcpy(p->tempBuf, src, inSize); |
|
14774
|
0
|
|
|
|
|
|
p->tempBufSize = (unsigned)inSize; |
|
14775
|
0
|
|
|
|
|
|
(*srcLen) += inSize; |
|
14776
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_NEEDS_MORE_INPUT; |
|
14777
|
0
|
|
|
|
|
|
return SZ_OK; |
|
14778
|
|
|
|
|
|
|
} |
|
14779
|
110
|
50
|
|
|
|
|
if (checkEndMarkNow && dummyRes != DUMMY_MATCH) |
|
14780
|
|
|
|
|
|
|
{ |
|
14781
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_NOT_FINISHED; |
|
14782
|
0
|
|
|
|
|
|
return SZ_ERROR_DATA; |
|
14783
|
|
|
|
|
|
|
} |
|
14784
|
|
|
|
|
|
|
bufLimit = src; |
|
14785
|
|
|
|
|
|
|
} |
|
14786
|
|
|
|
|
|
|
else |
|
14787
|
10
|
|
|
|
|
|
bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; |
|
14788
|
120
|
|
|
|
|
|
p->buf = src; |
|
14789
|
120
|
50
|
|
|
|
|
if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) |
|
14790
|
|
|
|
|
|
|
return SZ_ERROR_DATA; |
|
14791
|
120
|
|
|
|
|
|
processed = (size_t)(p->buf - src); |
|
14792
|
120
|
|
|
|
|
|
(*srcLen) += processed; |
|
14793
|
|
|
|
|
|
|
src += processed; |
|
14794
|
120
|
|
|
|
|
|
inSize -= processed; |
|
14795
|
|
|
|
|
|
|
} |
|
14796
|
|
|
|
|
|
|
else |
|
14797
|
|
|
|
|
|
|
{ |
|
14798
|
|
|
|
|
|
|
unsigned rem = p->tempBufSize, lookAhead = 0; |
|
14799
|
0
|
0
|
|
|
|
|
while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) |
|
|
|
0
|
|
|
|
|
|
|
14800
|
0
|
|
|
|
|
|
p->tempBuf[rem++] = src[lookAhead++]; |
|
14801
|
0
|
|
|
|
|
|
p->tempBufSize = rem; |
|
14802
|
0
|
0
|
|
|
|
|
if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) |
|
14803
|
|
|
|
|
|
|
{ |
|
14804
|
0
|
|
|
|
|
|
int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem); |
|
14805
|
0
|
0
|
|
|
|
|
if (dummyRes == DUMMY_ERROR) |
|
14806
|
|
|
|
|
|
|
{ |
|
14807
|
0
|
|
|
|
|
|
(*srcLen) += lookAhead; |
|
14808
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_NEEDS_MORE_INPUT; |
|
14809
|
0
|
|
|
|
|
|
return SZ_OK; |
|
14810
|
|
|
|
|
|
|
} |
|
14811
|
0
|
0
|
|
|
|
|
if (checkEndMarkNow && dummyRes != DUMMY_MATCH) |
|
14812
|
|
|
|
|
|
|
{ |
|
14813
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_NOT_FINISHED; |
|
14814
|
0
|
|
|
|
|
|
return SZ_ERROR_DATA; |
|
14815
|
|
|
|
|
|
|
} |
|
14816
|
|
|
|
|
|
|
} |
|
14817
|
0
|
|
|
|
|
|
p->buf = p->tempBuf; |
|
14818
|
0
|
0
|
|
|
|
|
if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) |
|
14819
|
|
|
|
|
|
|
return SZ_ERROR_DATA; |
|
14820
|
0
|
|
|
|
|
|
lookAhead -= (rem - (unsigned)(p->buf - p->tempBuf)); |
|
14821
|
0
|
|
|
|
|
|
(*srcLen) += lookAhead; |
|
14822
|
0
|
|
|
|
|
|
src += lookAhead; |
|
14823
|
0
|
|
|
|
|
|
inSize -= lookAhead; |
|
14824
|
0
|
|
|
|
|
|
p->tempBufSize = 0; |
|
14825
|
|
|
|
|
|
|
} |
|
14826
|
|
|
|
|
|
|
} |
|
14827
|
0
|
0
|
|
|
|
|
if (p->code == 0) |
|
14828
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_FINISHED_WITH_MARK; |
|
14829
|
0
|
|
|
|
|
|
return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA; |
|
14830
|
|
|
|
|
|
|
} |
|
14831
|
|
|
|
|
|
|
|
|
14832
|
0
|
|
|
|
|
|
SRes LzmaDec_DecodeToBuf(CLzmaDec *p, uint8_t *dest, size_t *destLen, const uint8_t *src, size_t *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) |
|
14833
|
|
|
|
|
|
|
{ |
|
14834
|
0
|
|
|
|
|
|
size_t outSize = *destLen; |
|
14835
|
0
|
|
|
|
|
|
size_t inSize = *srcLen; |
|
14836
|
0
|
|
|
|
|
|
*srcLen = *destLen = 0; |
|
14837
|
0
|
|
|
|
|
|
for (;;) |
|
14838
|
|
|
|
|
|
|
{ |
|
14839
|
0
|
|
|
|
|
|
size_t inSizeCur = inSize, outSizeCur, dicPos; |
|
14840
|
|
|
|
|
|
|
ELzmaFinishMode curFinishMode; |
|
14841
|
|
|
|
|
|
|
SRes res; |
|
14842
|
0
|
0
|
|
|
|
|
if (p->dicPos == p->dicBufSize) |
|
14843
|
0
|
|
|
|
|
|
p->dicPos = 0; |
|
14844
|
0
|
|
|
|
|
|
dicPos = p->dicPos; |
|
14845
|
0
|
0
|
|
|
|
|
if (outSize > p->dicBufSize - dicPos) |
|
14846
|
|
|
|
|
|
|
{ |
|
14847
|
|
|
|
|
|
|
outSizeCur = p->dicBufSize; |
|
14848
|
|
|
|
|
|
|
curFinishMode = LZMA_FINISH_ANY; |
|
14849
|
|
|
|
|
|
|
} |
|
14850
|
|
|
|
|
|
|
else |
|
14851
|
|
|
|
|
|
|
{ |
|
14852
|
0
|
|
|
|
|
|
outSizeCur = dicPos + outSize; |
|
14853
|
|
|
|
|
|
|
curFinishMode = finishMode; |
|
14854
|
|
|
|
|
|
|
} |
|
14855
|
|
|
|
|
|
|
|
|
14856
|
0
|
|
|
|
|
|
res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); |
|
14857
|
0
|
|
|
|
|
|
src += inSizeCur; |
|
14858
|
0
|
|
|
|
|
|
inSize -= inSizeCur; |
|
14859
|
0
|
|
|
|
|
|
*srcLen += inSizeCur; |
|
14860
|
0
|
|
|
|
|
|
outSizeCur = p->dicPos - dicPos; |
|
14861
|
0
|
|
|
|
|
|
memcpy(dest, p->dic + dicPos, outSizeCur); |
|
14862
|
0
|
|
|
|
|
|
dest += outSizeCur; |
|
14863
|
0
|
|
|
|
|
|
outSize -= outSizeCur; |
|
14864
|
0
|
|
|
|
|
|
*destLen += outSizeCur; |
|
14865
|
0
|
0
|
|
|
|
|
if (res != 0) |
|
14866
|
0
|
|
|
|
|
|
return res; |
|
14867
|
0
|
0
|
|
|
|
|
if (outSizeCur == 0 || outSize == 0) |
|
14868
|
|
|
|
|
|
|
return SZ_OK; |
|
14869
|
|
|
|
|
|
|
} |
|
14870
|
|
|
|
|
|
|
} |
|
14871
|
|
|
|
|
|
|
|
|
14872
|
0
|
|
|
|
|
|
void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc) |
|
14873
|
|
|
|
|
|
|
{ |
|
14874
|
14
|
|
|
|
|
|
alloc->Free(alloc, p->probs); |
|
14875
|
7
|
|
|
|
|
|
p->probs = 0; |
|
14876
|
0
|
|
|
|
|
|
} |
|
14877
|
|
|
|
|
|
|
|
|
14878
|
|
|
|
|
|
|
static void LzmaDec_FreeDict(CLzmaDec *p, ISzAlloc *alloc) |
|
14879
|
|
|
|
|
|
|
{ |
|
14880
|
0
|
|
|
|
|
|
alloc->Free(alloc, p->dic); |
|
14881
|
0
|
|
|
|
|
|
p->dic = 0; |
|
14882
|
|
|
|
|
|
|
} |
|
14883
|
|
|
|
|
|
|
|
|
14884
|
0
|
|
|
|
|
|
void LzmaDec_Free(CLzmaDec *p, ISzAlloc *alloc) |
|
14885
|
|
|
|
|
|
|
{ |
|
14886
|
|
|
|
|
|
|
LzmaDec_FreeProbs(p, alloc); |
|
14887
|
|
|
|
|
|
|
LzmaDec_FreeDict(p, alloc); |
|
14888
|
0
|
|
|
|
|
|
} |
|
14889
|
|
|
|
|
|
|
|
|
14890
|
7
|
|
|
|
|
|
SRes LzmaProps_Decode(CLzmaProps *p, const uint8_t *data, unsigned size) |
|
14891
|
|
|
|
|
|
|
{ |
|
14892
|
|
|
|
|
|
|
uint32_t dicSize; |
|
14893
|
|
|
|
|
|
|
uint8_t d; |
|
14894
|
|
|
|
|
|
|
|
|
14895
|
7
|
50
|
|
|
|
|
if (size < LZMA_PROPS_SIZE) |
|
14896
|
|
|
|
|
|
|
return SZ_ERROR_UNSUPPORTED; |
|
14897
|
|
|
|
|
|
|
else |
|
14898
|
7
|
|
|
|
|
|
dicSize = data[1] | ((uint32_t)data[2] << 8) | ((uint32_t)data[3] << 16) | ((uint32_t)data[4] << 24); |
|
14899
|
|
|
|
|
|
|
|
|
14900
|
7
|
50
|
|
|
|
|
if (dicSize < LZMA_DIC_MIN) |
|
14901
|
|
|
|
|
|
|
dicSize = LZMA_DIC_MIN; |
|
14902
|
7
|
|
|
|
|
|
p->dicSize = dicSize; |
|
14903
|
|
|
|
|
|
|
|
|
14904
|
7
|
|
|
|
|
|
d = data[0]; |
|
14905
|
7
|
50
|
|
|
|
|
if (d >= (9 * 5 * 5)) |
|
14906
|
|
|
|
|
|
|
return SZ_ERROR_UNSUPPORTED; |
|
14907
|
|
|
|
|
|
|
|
|
14908
|
7
|
|
|
|
|
|
p->lc = d % 9; |
|
14909
|
7
|
|
|
|
|
|
d /= 9; |
|
14910
|
7
|
|
|
|
|
|
p->pb = d / 5; |
|
14911
|
7
|
|
|
|
|
|
p->lp = d % 5; |
|
14912
|
|
|
|
|
|
|
|
|
14913
|
7
|
|
|
|
|
|
return SZ_OK; |
|
14914
|
|
|
|
|
|
|
} |
|
14915
|
|
|
|
|
|
|
|
|
14916
|
14
|
|
|
|
|
|
static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAlloc *alloc) |
|
14917
|
|
|
|
|
|
|
{ |
|
14918
|
7
|
|
|
|
|
|
uint32_t numProbs = LzmaProps_GetNumProbs(propNew); |
|
14919
|
7
|
50
|
|
|
|
|
if (p->probs == 0 || numProbs != p->numProbs) |
|
|
|
0
|
|
|
|
|
|
|
14920
|
|
|
|
|
|
|
{ |
|
14921
|
|
|
|
|
|
|
LzmaDec_FreeProbs(p, alloc); |
|
14922
|
7
|
|
|
|
|
|
p->probs = (CLzmaProb *)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb)); |
|
14923
|
7
|
|
|
|
|
|
p->numProbs = numProbs; |
|
14924
|
7
|
50
|
|
|
|
|
if (p->probs == 0) |
|
14925
|
|
|
|
|
|
|
return SZ_ERROR_MEM; |
|
14926
|
|
|
|
|
|
|
} |
|
14927
|
|
|
|
|
|
|
return SZ_OK; |
|
14928
|
|
|
|
|
|
|
} |
|
14929
|
|
|
|
|
|
|
|
|
14930
|
7
|
|
|
|
|
|
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const uint8_t *props, unsigned propsSize, ISzAlloc *alloc) |
|
14931
|
|
|
|
|
|
|
{ |
|
14932
|
|
|
|
|
|
|
CLzmaProps propNew; |
|
14933
|
7
|
50
|
|
|
|
|
RINOK(LzmaProps_Decode(&propNew, props, propsSize)); |
|
14934
|
7
|
50
|
|
|
|
|
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); |
|
14935
|
7
|
|
|
|
|
|
p->prop = propNew; |
|
14936
|
7
|
|
|
|
|
|
return SZ_OK; |
|
14937
|
|
|
|
|
|
|
} |
|
14938
|
|
|
|
|
|
|
|
|
14939
|
0
|
|
|
|
|
|
SRes LzmaDec_Allocate(CLzmaDec *p, const uint8_t *props, unsigned propsSize, ISzAlloc *alloc) |
|
14940
|
|
|
|
|
|
|
{ |
|
14941
|
|
|
|
|
|
|
CLzmaProps propNew; |
|
14942
|
|
|
|
|
|
|
size_t dicBufSize; |
|
14943
|
0
|
0
|
|
|
|
|
RINOK(LzmaProps_Decode(&propNew, props, propsSize)); |
|
14944
|
0
|
0
|
|
|
|
|
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); |
|
14945
|
0
|
|
|
|
|
|
dicBufSize = propNew.dicSize; |
|
14946
|
0
|
0
|
|
|
|
|
if (p->dic == 0 || dicBufSize != p->dicBufSize) |
|
|
|
0
|
|
|
|
|
|
|
14947
|
|
|
|
|
|
|
{ |
|
14948
|
|
|
|
|
|
|
LzmaDec_FreeDict(p, alloc); |
|
14949
|
0
|
|
|
|
|
|
p->dic = (uint8_t *)alloc->Alloc(alloc, dicBufSize); |
|
14950
|
0
|
0
|
|
|
|
|
if (p->dic == 0) |
|
14951
|
|
|
|
|
|
|
{ |
|
14952
|
|
|
|
|
|
|
LzmaDec_FreeProbs(p, alloc); |
|
14953
|
0
|
|
|
|
|
|
return SZ_ERROR_MEM; |
|
14954
|
|
|
|
|
|
|
} |
|
14955
|
|
|
|
|
|
|
} |
|
14956
|
0
|
|
|
|
|
|
p->dicBufSize = dicBufSize; |
|
14957
|
0
|
|
|
|
|
|
p->prop = propNew; |
|
14958
|
0
|
|
|
|
|
|
return SZ_OK; |
|
14959
|
|
|
|
|
|
|
} |
|
14960
|
|
|
|
|
|
|
|
|
14961
|
7
|
|
|
|
|
|
SRes LzmaDecode(uint8_t *dest, size_t *destLen, const uint8_t *src, size_t *srcLen, |
|
14962
|
|
|
|
|
|
|
const uint8_t *propData, unsigned propSize, ELzmaFinishMode finishMode, |
|
14963
|
|
|
|
|
|
|
ELzmaStatus *status, ISzAlloc *alloc) |
|
14964
|
|
|
|
|
|
|
{ |
|
14965
|
|
|
|
|
|
|
CLzmaDec p; |
|
14966
|
|
|
|
|
|
|
SRes res; |
|
14967
|
7
|
|
|
|
|
|
size_t inSize = *srcLen; |
|
14968
|
7
|
|
|
|
|
|
size_t outSize = *destLen; |
|
14969
|
7
|
|
|
|
|
|
*srcLen = *destLen = 0; |
|
14970
|
7
|
50
|
|
|
|
|
if (inSize < RC_INIT_SIZE) |
|
14971
|
|
|
|
|
|
|
return SZ_ERROR_INPUT_EOF; |
|
14972
|
|
|
|
|
|
|
|
|
14973
|
7
|
|
|
|
|
|
LzmaDec_Construct(&p); |
|
14974
|
7
|
|
|
|
|
|
res = LzmaDec_AllocateProbs(&p, propData, propSize, alloc); |
|
14975
|
7
|
50
|
|
|
|
|
if (res != 0) |
|
14976
|
|
|
|
|
|
|
return res; |
|
14977
|
7
|
|
|
|
|
|
p.dic = dest; |
|
14978
|
7
|
|
|
|
|
|
p.dicBufSize = outSize; |
|
14979
|
|
|
|
|
|
|
|
|
14980
|
|
|
|
|
|
|
LzmaDec_Init(&p); |
|
14981
|
|
|
|
|
|
|
|
|
14982
|
7
|
|
|
|
|
|
*srcLen = inSize; |
|
14983
|
7
|
|
|
|
|
|
res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); |
|
14984
|
|
|
|
|
|
|
|
|
14985
|
7
|
50
|
|
|
|
|
if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) |
|
|
|
50
|
|
|
|
|
|
|
14986
|
|
|
|
|
|
|
res = SZ_ERROR_INPUT_EOF; |
|
14987
|
|
|
|
|
|
|
|
|
14988
|
7
|
|
|
|
|
|
(*destLen) = p.dicPos; |
|
14989
|
|
|
|
|
|
|
LzmaDec_FreeProbs(&p, alloc); |
|
14990
|
|
|
|
|
|
|
return res; |
|
14991
|
|
|
|
|
|
|
} |
|
14992
|
|
|
|
|
|
|
|
|
14993
|
|
|
|
|
|
|
} // namespace lzma |
|
14994
|
|
|
|
|
|
|
// End of LZMA compression library by Igor Pavlov |
|
14995
|
|
|
|
|
|
|
|
|
14996
|
|
|
|
|
|
|
#ifndef UFAL_CPPUTILS_COMPRESSOR_LZMA_ALLOCATOR_H |
|
14997
|
|
|
|
|
|
|
#define UFAL_CPPUTILS_COMPRESSOR_LZMA_ALLOCATOR_H |
|
14998
|
14
|
|
|
|
|
|
static void *LzmaAlloc(void* /*p*/, size_t size) { return new char[size]; } |
|
14999
|
28
|
100
|
|
|
|
|
static void LzmaFree(void* /*p*/, void *address) { delete[] (char*) address; } |
|
15000
|
|
|
|
|
|
|
static lzma::ISzAlloc lzmaAllocator = { LzmaAlloc, LzmaFree }; |
|
15001
|
|
|
|
|
|
|
#endif // UFAL_CPPUTILS_COMPRESSOR_LZMA_ALLOCATOR_H |
|
15002
|
|
|
|
|
|
|
|
|
15003
|
7
|
|
|
|
|
|
bool compressor::load(istream& is, binary_decoder& data) { |
|
15004
|
|
|
|
|
|
|
uint32_t uncompressed_len, compressed_len, poor_crc; |
|
15005
|
|
|
|
|
|
|
unsigned char props_encoded[LZMA_PROPS_SIZE]; |
|
15006
|
|
|
|
|
|
|
|
|
15007
|
7
|
50
|
|
|
|
|
if (!is.read((char *) &uncompressed_len, sizeof(uncompressed_len))) return false; |
|
15008
|
7
|
50
|
|
|
|
|
if (!is.read((char *) &compressed_len, sizeof(compressed_len))) return false; |
|
15009
|
7
|
50
|
|
|
|
|
if (!is.read((char *) &poor_crc, sizeof(poor_crc))) return false; |
|
15010
|
7
|
50
|
|
|
|
|
if (poor_crc != uncompressed_len * 19991 + compressed_len * 199999991 + 1234567890) return false; |
|
15011
|
7
|
50
|
|
|
|
|
if (!is.read((char *) props_encoded, sizeof(props_encoded))) return false; |
|
15012
|
|
|
|
|
|
|
|
|
15013
|
7
|
|
|
|
|
|
vector compressed(compressed_len); |
|
15014
|
7
|
50
|
|
|
|
|
if (!is.read((char *) compressed.data(), compressed_len)) return false; |
|
|
|
50
|
|
|
|
|
|
|
15015
|
|
|
|
|
|
|
|
|
15016
|
|
|
|
|
|
|
lzma::ELzmaStatus status; |
|
15017
|
7
|
|
|
|
|
|
size_t uncompressed_size = uncompressed_len, compressed_size = compressed_len; |
|
15018
|
7
|
50
|
|
|
|
|
auto res = lzma::LzmaDecode(data.fill(uncompressed_len), &uncompressed_size, compressed.data(), &compressed_size, props_encoded, LZMA_PROPS_SIZE, lzma::LZMA_FINISH_ANY, &status, &lzmaAllocator); |
|
15019
|
7
|
50
|
|
|
|
|
if (res != SZ_OK || uncompressed_size != uncompressed_len || compressed_size != compressed_len) return false; |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
15020
|
|
|
|
|
|
|
|
|
15021
|
7
|
|
|
|
|
|
return true; |
|
15022
|
|
|
|
|
|
|
} |
|
15023
|
|
|
|
|
|
|
|
|
15024
|
|
|
|
|
|
|
} // namespace utils |
|
15025
|
|
|
|
|
|
|
|
|
15026
|
|
|
|
|
|
|
///////// |
|
15027
|
|
|
|
|
|
|
// File: utils/url_detector.cpp |
|
15028
|
|
|
|
|
|
|
///////// |
|
15029
|
|
|
|
|
|
|
|
|
15030
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
|
15031
|
|
|
|
|
|
|
// |
|
15032
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
|
15033
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
15034
|
|
|
|
|
|
|
// |
|
15035
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
15036
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
15037
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
15038
|
|
|
|
|
|
|
|
|
15039
|
|
|
|
|
|
|
namespace utils { |
|
15040
|
|
|
|
|
|
|
|
|
15041
|
|
|
|
|
|
|
static const char _url_detector_actions[] = { |
|
15042
|
|
|
|
|
|
|
0, 1, 0, 1, 3, 2, 0, 1, |
|
15043
|
|
|
|
|
|
|
2, 2, 0, 2, 3, 0 |
|
15044
|
|
|
|
|
|
|
}; |
|
15045
|
|
|
|
|
|
|
|
|
15046
|
|
|
|
|
|
|
static const char _url_detector_cond_offsets[] = { |
|
15047
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15048
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 1, 1, 1, |
|
15049
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15050
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15051
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15052
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15053
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15054
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15055
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15056
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15057
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15058
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15059
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15060
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15061
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15062
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15063
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15064
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15065
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15066
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15067
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15068
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15069
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15070
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15071
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15072
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15073
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15074
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15075
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15076
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15077
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15078
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15079
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15080
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15081
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15082
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15083
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15084
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15085
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15086
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15087
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15088
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15089
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15090
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15091
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15092
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
|
15093
|
|
|
|
|
|
|
1, 2, 2, 2, 2, 2, 2, 2, |
|
15094
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 2, |
|
15095
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 2, |
|
15096
|
|
|
|
|
|
|
2, 2 |
|
15097
|
|
|
|
|
|
|
}; |
|
15098
|
|
|
|
|
|
|
|
|
15099
|
|
|
|
|
|
|
static const char _url_detector_cond_lengths[] = { |
|
15100
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15101
|
|
|
|
|
|
|
0, 0, 0, 1, 0, 0, 0, 0, |
|
15102
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15103
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15104
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15105
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15106
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15107
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15108
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15109
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15110
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15111
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15112
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15113
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15114
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15115
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15116
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15117
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15118
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15119
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15120
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15121
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15122
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15123
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15124
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15125
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15126
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15127
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15128
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15129
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15130
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15131
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15132
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15133
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15134
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15135
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15136
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15137
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15138
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15139
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15140
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15141
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15142
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15143
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15144
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15145
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15146
|
|
|
|
|
|
|
1, 0, 0, 0, 0, 0, 0, 0, |
|
15147
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15148
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15149
|
|
|
|
|
|
|
0, 0 |
|
15150
|
|
|
|
|
|
|
}; |
|
15151
|
|
|
|
|
|
|
|
|
15152
|
|
|
|
|
|
|
static const short _url_detector_cond_keys[] = { |
|
15153
|
|
|
|
|
|
|
41u, 41u, 41u, 41u, 0 |
|
15154
|
|
|
|
|
|
|
}; |
|
15155
|
|
|
|
|
|
|
|
|
15156
|
|
|
|
|
|
|
static const char _url_detector_cond_spaces[] = { |
|
15157
|
|
|
|
|
|
|
0, 0, 0 |
|
15158
|
|
|
|
|
|
|
}; |
|
15159
|
|
|
|
|
|
|
|
|
15160
|
|
|
|
|
|
|
static const short _url_detector_key_offsets[] = { |
|
15161
|
|
|
|
|
|
|
0, 0, 15, 29, 41, 54, 63, 71, |
|
15162
|
|
|
|
|
|
|
78, 86, 92, 100, 153, 161, 167, 169, |
|
15163
|
|
|
|
|
|
|
177, 184, 192, 196, 200, 204, 209, 214, |
|
15164
|
|
|
|
|
|
|
223, 227, 231, 240, 242, 246, 248, 251, |
|
15165
|
|
|
|
|
|
|
283, 285, 287, 289, 291, 297, 312, 327, |
|
15166
|
|
|
|
|
|
|
346, 358, 374, 386, 402, 418, 439, 449, |
|
15167
|
|
|
|
|
|
|
461, 477, 491, 506, 516, 529, 538, 550, |
|
15168
|
|
|
|
|
|
|
552, 556, 577, 586, 596, 602, 608, 609, |
|
15169
|
|
|
|
|
|
|
653, 657, 661, 669, 680, 690, 697, 703, |
|
15170
|
|
|
|
|
|
|
709, 713, 717, 719, 723, 727, 731, 737, |
|
15171
|
|
|
|
|
|
|
745, 754, 760, 762, 766, 768, 774, 778, |
|
15172
|
|
|
|
|
|
|
782, 786, 790, 795, 802, 808, 810, 814, |
|
15173
|
|
|
|
|
|
|
820, 824, 830, 841, 846, 860, 880, 885, |
|
15174
|
|
|
|
|
|
|
889, 891, 907, 912, 914, 916, 918, 920, |
|
15175
|
|
|
|
|
|
|
922, 926, 930, 935, 941, 945, 955, 965, |
|
15176
|
|
|
|
|
|
|
966, 979, 987, 995, 999, 1003, 1009, 1011, |
|
15177
|
|
|
|
|
|
|
1013, 1017, 1021, 1026, 1028, 1031, 1033, 1055, |
|
15178
|
|
|
|
|
|
|
1057, 1059, 1063, 1068, 1072, 1078, 1080, 1084, |
|
15179
|
|
|
|
|
|
|
1092, 1096, 1100, 1105, 1107, 1115, 1123, 1135, |
|
15180
|
|
|
|
|
|
|
1141, 1147, 1151, 1155, 1159, 1178, 1180, 1191, |
|
15181
|
|
|
|
|
|
|
1197, 1199, 1201, 1205, 1209, 1213, 1217, 1219, |
|
15182
|
|
|
|
|
|
|
1223, 1227, 1229, 1237, 1253, 1295, 1304, 1308, |
|
15183
|
|
|
|
|
|
|
1310, 1312, 1314, 1316, 1317, 1321, 1325, 1329, |
|
15184
|
|
|
|
|
|
|
1335, 1339, 1343, 1345, 1349, 1358, 1364, 1368, |
|
15185
|
|
|
|
|
|
|
1374, 1378, 1382, 1395, 1399, 1401, 1407, 1413, |
|
15186
|
|
|
|
|
|
|
1417, 1419, 1421, 1425, 1427, 1452, 1457, 1461, |
|
15187
|
|
|
|
|
|
|
1465, 1468, 1477, 1481, 1492, 1496, 1512, 1526, |
|
15188
|
|
|
|
|
|
|
1531, 1535, 1538, 1542, 1548, 1551, 1558, 1560, |
|
15189
|
|
|
|
|
|
|
1562, 1565, 1568, 1570, 1581, 1585, 1589, 1599, |
|
15190
|
|
|
|
|
|
|
1601, 1605, 1607, 1611, 1613, 1617, 1623, 1643, |
|
15191
|
|
|
|
|
|
|
1649, 1655, 1657, 1659, 1663, 1677, 1681, 1693, |
|
15192
|
|
|
|
|
|
|
1700, 1704, 1711, 1717, 1723, 1729, 1735, 1739, |
|
15193
|
|
|
|
|
|
|
1742, 1747, 1753, 1757, 1771, 1797, 1807, 1808, |
|
15194
|
|
|
|
|
|
|
1811, 1813, 1817, 1819, 1822, 1824, 1827, 1829, |
|
15195
|
|
|
|
|
|
|
1830, 1833, 1835, 1837, 1846, 1854, 1863, 1871, |
|
15196
|
|
|
|
|
|
|
1880, 1888, 1896, 1907, 1917, 1925, 1933, 1944, |
|
15197
|
|
|
|
|
|
|
1954, 1962, 1970, 1981, 1991, 2000, 2008, 2015, |
|
15198
|
|
|
|
|
|
|
2023, 2029, 2037, 2046, 2054, 2063, 2071, 2080, |
|
15199
|
|
|
|
|
|
|
2088, 2096, 2107, 2117, 2125, 2133, 2144, 2154, |
|
15200
|
|
|
|
|
|
|
2162, 2170, 2181, 2191, 2207, 2222, 2238, 2252, |
|
15201
|
|
|
|
|
|
|
2268, 2280, 2297, 2313, 2330, 2346, 2363, 2379, |
|
15202
|
|
|
|
|
|
|
2395, 2414, 2432, 2448, 2464, 2483, 2501, 2517, |
|
15203
|
|
|
|
|
|
|
2533, 2552, 2570, 2586, 2602, 2618, 2629, 2630, |
|
15204
|
|
|
|
|
|
|
2645, 2659, 2675, 2690, 2706, 2720, 2736, 2753, |
|
15205
|
|
|
|
|
|
|
2769, 2786, 2802, 2819, 2835, 2851, 2870, 2888, |
|
15206
|
|
|
|
|
|
|
2904, 2920, 2939, 2957, 2973, 2989, 3008, 3026, |
|
15207
|
|
|
|
|
|
|
3036, 3089, 3092, 3102, 3112, 3122, 3135, 3147, |
|
15208
|
|
|
|
|
|
|
3157, 3167, 3177, 3187, 3200, 3212, 3229, 3243, |
|
15209
|
|
|
|
|
|
|
3260, 3277, 3294, 3314, 3333, 3350, 3367, 3384, |
|
15210
|
|
|
|
|
|
|
3401, 3421 |
|
15211
|
|
|
|
|
|
|
}; |
|
15212
|
|
|
|
|
|
|
|
|
15213
|
|
|
|
|
|
|
static const short _url_detector_trans_keys[] = { |
|
15214
|
|
|
|
|
|
|
33u, 48u, 49u, 50u, 95u, 36u, 37u, 39u, |
|
15215
|
|
|
|
|
|
|
46u, 51u, 57u, 65u, 90u, 97u, 122u, 33u, |
|
15216
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 46u, 48u, |
|
15217
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 95u, 36u, |
|
15218
|
|
|
|
|
|
|
37u, 39u, 46u, 48u, 57u, 65u, 90u, 97u, |
|
15219
|
|
|
|
|
|
|
122u, 33u, 64u, 95u, 36u, 37u, 39u, 46u, |
|
15220
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 48u, 49u, |
|
15221
|
|
|
|
|
|
|
50u, 51u, 57u, 65u, 90u, 97u, 122u, 45u, |
|
15222
|
|
|
|
|
|
|
46u, 48u, 57u, 65u, 90u, 97u, 122u, 45u, |
|
15223
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 45u, 46u, |
|
15224
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 48u, 57u, |
|
15225
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 45u, 46u, 48u, 57u, |
|
15226
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 33u, 39u, 40u, 44u, |
|
15227
|
|
|
|
|
|
|
46u, 61u, 63u, 95u, 194u, 195u, 203u, 205u, |
|
15228
|
|
|
|
|
|
|
206u, 207u, 210u, 212u, 213u, 214u, 215u, 216u, |
|
15229
|
|
|
|
|
|
|
217u, 219u, 220u, 221u, 222u, 223u, 224u, 225u, |
|
15230
|
|
|
|
|
|
|
226u, 227u, 228u, 233u, 234u, 237u, 239u, 240u, |
|
15231
|
|
|
|
|
|
|
243u, 297u, 553u, 36u, 38u, 42u, 57u, 58u, |
|
15232
|
|
|
|
|
|
|
59u, 64u, 90u, 97u, 122u, 196u, 218u, 229u, |
|
15233
|
|
|
|
|
|
|
236u, 170u, 181u, 178u, 179u, 185u, 186u, 188u, |
|
15234
|
|
|
|
|
|
|
190u, 128u, 150u, 152u, 182u, 184u, 191u, 128u, |
|
15235
|
|
|
|
|
|
|
191u, 172u, 174u, 128u, 129u, 134u, 145u, 160u, |
|
15236
|
|
|
|
|
|
|
164u, 191u, 128u, 180u, 182u, 183u, 186u, 189u, |
|
15237
|
|
|
|
|
|
|
134u, 140u, 136u, 138u, 142u, 161u, 163u, 191u, |
|
15238
|
|
|
|
|
|
|
128u, 181u, 183u, 191u, 128u, 129u, 131u, 191u, |
|
15239
|
|
|
|
|
|
|
128u, 175u, 177u, 191u, 153u, 128u, 150u, 161u, |
|
15240
|
|
|
|
|
|
|
191u, 191u, 128u, 135u, 145u, 189u, 135u, 129u, |
|
15241
|
|
|
|
|
|
|
130u, 132u, 133u, 144u, 170u, 176u, 178u, 144u, |
|
15242
|
|
|
|
|
|
|
154u, 160u, 191u, 128u, 169u, 174u, 191u, 191u, |
|
15243
|
|
|
|
|
|
|
128u, 147u, 149u, 156u, 159u, 168u, 170u, 188u, |
|
15244
|
|
|
|
|
|
|
144u, 191u, 128u, 138u, 141u, 191u, 128u, 177u, |
|
15245
|
|
|
|
|
|
|
186u, 128u, 181u, 160u, 161u, 162u, 163u, 164u, |
|
15246
|
|
|
|
|
|
|
165u, 166u, 167u, 168u, 169u, 170u, 171u, 172u, |
|
15247
|
|
|
|
|
|
|
173u, 174u, 175u, 176u, 177u, 178u, 179u, 180u, |
|
15248
|
|
|
|
|
|
|
181u, 182u, 183u, 184u, 185u, 186u, 187u, 188u, |
|
15249
|
|
|
|
|
|
|
189u, 190u, 191u, 128u, 173u, 128u, 155u, 160u, |
|
15250
|
|
|
|
|
|
|
180u, 163u, 191u, 128u, 163u, 166u, 175u, 177u, |
|
15251
|
|
|
|
|
|
|
191u, 178u, 128u, 131u, 133u, 140u, 143u, 144u, |
|
15252
|
|
|
|
|
|
|
147u, 168u, 170u, 176u, 182u, 185u, 188u, 191u, |
|
15253
|
|
|
|
|
|
|
151u, 128u, 132u, 135u, 136u, 139u, 142u, 156u, |
|
15254
|
|
|
|
|
|
|
157u, 159u, 163u, 166u, 177u, 180u, 185u, 188u, |
|
15255
|
|
|
|
|
|
|
129u, 131u, 133u, 138u, 143u, 144u, 147u, 168u, |
|
15256
|
|
|
|
|
|
|
170u, 176u, 178u, 179u, 181u, 182u, 184u, 185u, |
|
15257
|
|
|
|
|
|
|
190u, 191u, 145u, 158u, 128u, 130u, 135u, 136u, |
|
15258
|
|
|
|
|
|
|
139u, 141u, 153u, 156u, 166u, 181u, 129u, 131u, |
|
15259
|
|
|
|
|
|
|
133u, 141u, 143u, 145u, 147u, 168u, 170u, 176u, |
|
15260
|
|
|
|
|
|
|
178u, 179u, 181u, 185u, 188u, 191u, 144u, 185u, |
|
15261
|
|
|
|
|
|
|
128u, 133u, 135u, 137u, 139u, 141u, 160u, 163u, |
|
15262
|
|
|
|
|
|
|
166u, 175u, 129u, 131u, 133u, 140u, 143u, 144u, |
|
15263
|
|
|
|
|
|
|
147u, 168u, 170u, 176u, 178u, 179u, 181u, 185u, |
|
15264
|
|
|
|
|
|
|
188u, 191u, 128u, 132u, 135u, 136u, 139u, 141u, |
|
15265
|
|
|
|
|
|
|
150u, 151u, 156u, 157u, 159u, 163u, 166u, 175u, |
|
15266
|
|
|
|
|
|
|
177u, 183u, 156u, 130u, 131u, 133u, 138u, 142u, |
|
15267
|
|
|
|
|
|
|
144u, 146u, 149u, 153u, 154u, 158u, 159u, 163u, |
|
15268
|
|
|
|
|
|
|
164u, 168u, 170u, 174u, 185u, 190u, 191u, 144u, |
|
15269
|
|
|
|
|
|
|
151u, 128u, 130u, 134u, 136u, 138u, 141u, 166u, |
|
15270
|
|
|
|
|
|
|
178u, 128u, 131u, 133u, 140u, 142u, 144u, 146u, |
|
15271
|
|
|
|
|
|
|
168u, 170u, 185u, 189u, 191u, 128u, 132u, 134u, |
|
15272
|
|
|
|
|
|
|
136u, 138u, 141u, 149u, 150u, 152u, 154u, 160u, |
|
15273
|
|
|
|
|
|
|
163u, 166u, 175u, 184u, 190u, 129u, 131u, 133u, |
|
15274
|
|
|
|
|
|
|
140u, 142u, 144u, 146u, 168u, 170u, 179u, 181u, |
|
15275
|
|
|
|
|
|
|
185u, 188u, 191u, 158u, 128u, 132u, 134u, 136u, |
|
15276
|
|
|
|
|
|
|
138u, 141u, 149u, 150u, 160u, 163u, 166u, 175u, |
|
15277
|
|
|
|
|
|
|
177u, 178u, 129u, 131u, 133u, 140u, 142u, 144u, |
|
15278
|
|
|
|
|
|
|
146u, 186u, 189u, 191u, 151u, 128u, 132u, 134u, |
|
15279
|
|
|
|
|
|
|
136u, 138u, 142u, 159u, 163u, 166u, 181u, 186u, |
|
15280
|
|
|
|
|
|
|
191u, 189u, 130u, 131u, 133u, 150u, 154u, 177u, |
|
15281
|
|
|
|
|
|
|
179u, 187u, 138u, 150u, 128u, 134u, 143u, 148u, |
|
15282
|
|
|
|
|
|
|
152u, 159u, 166u, 175u, 178u, 179u, 129u, 186u, |
|
15283
|
|
|
|
|
|
|
128u, 142u, 144u, 153u, 132u, 138u, 141u, 165u, |
|
15284
|
|
|
|
|
|
|
167u, 129u, 130u, 135u, 136u, 148u, 151u, 153u, |
|
15285
|
|
|
|
|
|
|
159u, 161u, 163u, 170u, 171u, 173u, 185u, 187u, |
|
15286
|
|
|
|
|
|
|
189u, 134u, 128u, 132u, 136u, 141u, 144u, 153u, |
|
15287
|
|
|
|
|
|
|
156u, 159u, 128u, 181u, 183u, 185u, 152u, 153u, |
|
15288
|
|
|
|
|
|
|
160u, 179u, 190u, 191u, 128u, 135u, 137u, 172u, |
|
15289
|
|
|
|
|
|
|
177u, 191u, 128u, 132u, 134u, 151u, 153u, 188u, |
|
15290
|
|
|
|
|
|
|
134u, 129u, 130u, 131u, 137u, 138u, 139u, 140u, |
|
15291
|
|
|
|
|
|
|
141u, 142u, 143u, 144u, 153u, 154u, 155u, 156u, |
|
15292
|
|
|
|
|
|
|
157u, 159u, 160u, 161u, 162u, 163u, 164u, 165u, |
|
15293
|
|
|
|
|
|
|
166u, 167u, 168u, 169u, 170u, 173u, 175u, 176u, |
|
15294
|
|
|
|
|
|
|
177u, 179u, 183u, 188u, 189u, 190u, 191u, 128u, |
|
15295
|
|
|
|
|
|
|
158u, 172u, 174u, 180u, 187u, 128u, 137u, 144u, |
|
15296
|
|
|
|
|
|
|
191u, 128u, 157u, 160u, 191u, 135u, 141u, 128u, |
|
15297
|
|
|
|
|
|
|
133u, 144u, 186u, 188u, 191u, 152u, 128u, 136u, |
|
15298
|
|
|
|
|
|
|
138u, 141u, 144u, 150u, 154u, 157u, 160u, 191u, |
|
15299
|
|
|
|
|
|
|
128u, 136u, 138u, 141u, 144u, 176u, 178u, 181u, |
|
15300
|
|
|
|
|
|
|
184u, 190u, 128u, 130u, 133u, 136u, 150u, 152u, |
|
15301
|
|
|
|
|
|
|
191u, 128u, 144u, 146u, 149u, 152u, 191u, 128u, |
|
15302
|
|
|
|
|
|
|
154u, 157u, 159u, 169u, 188u, 128u, 143u, 160u, |
|
15303
|
|
|
|
|
|
|
191u, 128u, 181u, 184u, 189u, 129u, 191u, 128u, |
|
15304
|
|
|
|
|
|
|
172u, 175u, 191u, 129u, 154u, 160u, 191u, 128u, |
|
15305
|
|
|
|
|
|
|
170u, 174u, 184u, 128u, 140u, 142u, 148u, 160u, |
|
15306
|
|
|
|
|
|
|
180u, 128u, 147u, 160u, 172u, 174u, 176u, 178u, |
|
15307
|
|
|
|
|
|
|
179u, 151u, 128u, 147u, 156u, 157u, 160u, 169u, |
|
15308
|
|
|
|
|
|
|
176u, 185u, 139u, 141u, 144u, 153u, 160u, 191u, |
|
15309
|
|
|
|
|
|
|
128u, 183u, 128u, 170u, 176u, 191u, 128u, 181u, |
|
15310
|
|
|
|
|
|
|
128u, 158u, 160u, 171u, 176u, 187u, 134u, 173u, |
|
15311
|
|
|
|
|
|
|
176u, 180u, 128u, 171u, 176u, 191u, 128u, 137u, |
|
15312
|
|
|
|
|
|
|
144u, 154u, 128u, 155u, 160u, 191u, 191u, 128u, |
|
15313
|
|
|
|
|
|
|
158u, 160u, 188u, 167u, 128u, 137u, 144u, 153u, |
|
15314
|
|
|
|
|
|
|
176u, 190u, 128u, 139u, 144u, 153u, 171u, 179u, |
|
15315
|
|
|
|
|
|
|
128u, 179u, 128u, 137u, 141u, 189u, 144u, 146u, |
|
15316
|
|
|
|
|
|
|
148u, 182u, 184u, 185u, 128u, 181u, 188u, 191u, |
|
15317
|
|
|
|
|
|
|
128u, 149u, 152u, 157u, 160u, 191u, 153u, 155u, |
|
15318
|
|
|
|
|
|
|
157u, 128u, 133u, 136u, 141u, 144u, 151u, 159u, |
|
15319
|
|
|
|
|
|
|
189u, 190u, 128u, 180u, 182u, 188u, 130u, 132u, |
|
15320
|
|
|
|
|
|
|
134u, 140u, 144u, 147u, 150u, 155u, 160u, 172u, |
|
15321
|
|
|
|
|
|
|
178u, 180u, 182u, 188u, 129u, 130u, 131u, 132u, |
|
15322
|
|
|
|
|
|
|
133u, 134u, 145u, 146u, 147u, 157u, 158u, 176u, |
|
15323
|
|
|
|
|
|
|
177u, 178u, 179u, 180u, 181u, 182u, 183u, 184u, |
|
15324
|
|
|
|
|
|
|
191u, 176u, 177u, 180u, 185u, 128u, 137u, 144u, |
|
15325
|
|
|
|
|
|
|
156u, 144u, 176u, 130u, 135u, 149u, 164u, 166u, |
|
15326
|
|
|
|
|
|
|
168u, 138u, 147u, 153u, 157u, 170u, 173u, 175u, |
|
15327
|
|
|
|
|
|
|
185u, 188u, 191u, 142u, 133u, 137u, 144u, 191u, |
|
15328
|
|
|
|
|
|
|
128u, 137u, 160u, 191u, 170u, 191u, 182u, 191u, |
|
15329
|
|
|
|
|
|
|
128u, 147u, 128u, 174u, 176u, 191u, 128u, 158u, |
|
15330
|
|
|
|
|
|
|
160u, 191u, 189u, 128u, 164u, 171u, 179u, 167u, |
|
15331
|
|
|
|
|
|
|
173u, 128u, 165u, 176u, 191u, 175u, 191u, 128u, |
|
15332
|
|
|
|
|
|
|
167u, 128u, 150u, 160u, 166u, 168u, 174u, 176u, |
|
15333
|
|
|
|
|
|
|
182u, 184u, 190u, 128u, 134u, 136u, 142u, 144u, |
|
15334
|
|
|
|
|
|
|
150u, 152u, 158u, 160u, 191u, 175u, 128u, 129u, |
|
15335
|
|
|
|
|
|
|
130u, 131u, 132u, 133u, 134u, 135u, 136u, 137u, |
|
15336
|
|
|
|
|
|
|
138u, 144u, 191u, 133u, 135u, 161u, 175u, 177u, |
|
15337
|
|
|
|
|
|
|
181u, 184u, 188u, 128u, 150u, 153u, 154u, 157u, |
|
15338
|
|
|
|
|
|
|
159u, 161u, 191u, 128u, 186u, 188u, 191u, 133u, |
|
15339
|
|
|
|
|
|
|
173u, 177u, 191u, 128u, 142u, 146u, 149u, 160u, |
|
15340
|
|
|
|
|
|
|
186u, 176u, 191u, 160u, 169u, 136u, 143u, 145u, |
|
15341
|
|
|
|
|
|
|
159u, 128u, 137u, 177u, 191u, 182u, 128u, 181u, |
|
15342
|
|
|
|
|
|
|
184u, 191u, 128u, 191u, 191u, 128u, 190u, 128u, |
|
15343
|
|
|
|
|
|
|
149u, 146u, 147u, 152u, 153u, 155u, 156u, 158u, |
|
15344
|
|
|
|
|
|
|
159u, 160u, 161u, 163u, 164u, 165u, 167u, 168u, |
|
15345
|
|
|
|
|
|
|
169u, 171u, 172u, 173u, 175u, 128u, 191u, 128u, |
|
15346
|
|
|
|
|
|
|
140u, 144u, 189u, 128u, 140u, 144u, 171u, 191u, |
|
15347
|
|
|
|
|
|
|
128u, 178u, 180u, 189u, 151u, 159u, 162u, 191u, |
|
15348
|
|
|
|
|
|
|
128u, 136u, 139u, 173u, 176u, 183u, 183u, 191u, |
|
15349
|
|
|
|
|
|
|
128u, 167u, 176u, 181u, 187u, 189u, 128u, 132u, |
|
15350
|
|
|
|
|
|
|
144u, 153u, 160u, 183u, 128u, 173u, 176u, 191u, |
|
15351
|
|
|
|
|
|
|
128u, 147u, 160u, 188u, 128u, 143u, 153u, 160u, |
|
15352
|
|
|
|
|
|
|
190u, 128u, 182u, 128u, 141u, 144u, 153u, 160u, |
|
15353
|
|
|
|
|
|
|
182u, 186u, 191u, 128u, 130u, 155u, 157u, 160u, |
|
15354
|
|
|
|
|
|
|
175u, 178u, 182u, 129u, 134u, 137u, 142u, 145u, |
|
15355
|
|
|
|
|
|
|
150u, 160u, 166u, 168u, 174u, 176u, 191u, 128u, |
|
15356
|
|
|
|
|
|
|
154u, 156u, 165u, 176u, 191u, 128u, 170u, 172u, |
|
15357
|
|
|
|
|
|
|
173u, 176u, 185u, 158u, 159u, 128u, 157u, 128u, |
|
15358
|
|
|
|
|
|
|
163u, 176u, 191u, 128u, 134u, 139u, 187u, 169u, |
|
15359
|
|
|
|
|
|
|
171u, 172u, 173u, 174u, 175u, 180u, 181u, 182u, |
|
15360
|
|
|
|
|
|
|
183u, 184u, 185u, 187u, 188u, 189u, 190u, 191u, |
|
15361
|
|
|
|
|
|
|
164u, 186u, 128u, 153u, 190u, 128u, 134u, 147u, |
|
15362
|
|
|
|
|
|
|
151u, 157u, 168u, 170u, 182u, 184u, 188u, 128u, |
|
15363
|
|
|
|
|
|
|
129u, 131u, 132u, 134u, 191u, 147u, 191u, 128u, |
|
15364
|
|
|
|
|
|
|
189u, 128u, 143u, 146u, 191u, 128u, 135u, 176u, |
|
15365
|
|
|
|
|
|
|
187u, 128u, 143u, 160u, 175u, 176u, 180u, 182u, |
|
15366
|
|
|
|
|
|
|
191u, 128u, 188u, 144u, 153u, 161u, 186u, 129u, |
|
15367
|
|
|
|
|
|
|
154u, 166u, 191u, 128u, 190u, 130u, 135u, 138u, |
|
15368
|
|
|
|
|
|
|
143u, 146u, 151u, 154u, 156u, 144u, 145u, 146u, |
|
15369
|
|
|
|
|
|
|
147u, 148u, 150u, 155u, 157u, 158u, 159u, 170u, |
|
15370
|
|
|
|
|
|
|
171u, 172u, 175u, 160u, 169u, 128u, 129u, 130u, |
|
15371
|
|
|
|
|
|
|
131u, 132u, 133u, 134u, 135u, 138u, 139u, 140u, |
|
15372
|
|
|
|
|
|
|
141u, 142u, 143u, 146u, 148u, 149u, 156u, 157u, |
|
15373
|
|
|
|
|
|
|
160u, 161u, 162u, 163u, 164u, 166u, 167u, 168u, |
|
15374
|
|
|
|
|
|
|
169u, 170u, 171u, 172u, 173u, 174u, 176u, 177u, |
|
15375
|
|
|
|
|
|
|
178u, 179u, 185u, 144u, 145u, 152u, 155u, 191u, |
|
15376
|
|
|
|
|
|
|
128u, 139u, 141u, 166u, 168u, 186u, 188u, 189u, |
|
15377
|
|
|
|
|
|
|
128u, 141u, 144u, 157u, 128u, 186u, 135u, 179u, |
|
15378
|
|
|
|
|
|
|
128u, 184u, 138u, 139u, 189u, 128u, 156u, 160u, |
|
15379
|
|
|
|
|
|
|
191u, 128u, 144u, 160u, 187u, 128u, 138u, 144u, |
|
15380
|
|
|
|
|
|
|
186u, 128u, 131u, 136u, 143u, 145u, 149u, 128u, |
|
15381
|
|
|
|
|
|
|
157u, 160u, 169u, 128u, 167u, 176u, 191u, 128u, |
|
15382
|
|
|
|
|
|
|
163u, 128u, 149u, 160u, 167u, 136u, 188u, 191u, |
|
15383
|
|
|
|
|
|
|
128u, 133u, 138u, 181u, 183u, 184u, 128u, 149u, |
|
15384
|
|
|
|
|
|
|
152u, 182u, 185u, 191u, 128u, 158u, 167u, 175u, |
|
15385
|
|
|
|
|
|
|
160u, 178u, 180u, 181u, 187u, 191u, 128u, 155u, |
|
15386
|
|
|
|
|
|
|
160u, 185u, 128u, 183u, 188u, 191u, 191u, 128u, |
|
15387
|
|
|
|
|
|
|
131u, 133u, 134u, 140u, 147u, 149u, 151u, 153u, |
|
15388
|
|
|
|
|
|
|
179u, 184u, 186u, 128u, 135u, 160u, 190u, 128u, |
|
15389
|
|
|
|
|
|
|
159u, 128u, 135u, 137u, 166u, 171u, 175u, 128u, |
|
15390
|
|
|
|
|
|
|
149u, 152u, 178u, 184u, 191u, 128u, 145u, 169u, |
|
15391
|
|
|
|
|
|
|
175u, 128u, 136u, 128u, 178u, 128u, 178u, 186u, |
|
15392
|
|
|
|
|
|
|
191u, 160u, 190u, 129u, 130u, 131u, 132u, 133u, |
|
15393
|
|
|
|
|
|
|
135u, 136u, 138u, 139u, 140u, 141u, 146u, 147u, |
|
15394
|
|
|
|
|
|
|
150u, 151u, 152u, 153u, 154u, 155u, 156u, 162u, |
|
15395
|
|
|
|
|
|
|
163u, 171u, 128u, 134u, 191u, 128u, 134u, 146u, |
|
15396
|
|
|
|
|
|
|
175u, 144u, 168u, 176u, 185u, 128u, 180u, 182u, |
|
15397
|
|
|
|
|
|
|
191u, 182u, 144u, 179u, 156u, 128u, 132u, 138u, |
|
15398
|
|
|
|
|
|
|
140u, 144u, 154u, 161u, 180u, 128u, 145u, 147u, |
|
15399
|
|
|
|
|
|
|
183u, 136u, 128u, 134u, 138u, 141u, 143u, 157u, |
|
15400
|
|
|
|
|
|
|
159u, 168u, 176u, 191u, 128u, 170u, 176u, 185u, |
|
15401
|
|
|
|
|
|
|
128u, 131u, 133u, 140u, 143u, 144u, 147u, 168u, |
|
15402
|
|
|
|
|
|
|
170u, 176u, 178u, 179u, 181u, 185u, 188u, 191u, |
|
15403
|
|
|
|
|
|
|
144u, 151u, 128u, 132u, 135u, 136u, 139u, 141u, |
|
15404
|
|
|
|
|
|
|
157u, 163u, 166u, 172u, 176u, 180u, 135u, 128u, |
|
15405
|
|
|
|
|
|
|
133u, 144u, 153u, 128u, 181u, 184u, 191u, 128u, |
|
15406
|
|
|
|
|
|
|
152u, 157u, 128u, 132u, 144u, 153u, 128u, 153u, |
|
15407
|
|
|
|
|
|
|
157u, 171u, 176u, 187u, 191u, 128u, 178u, 142u, |
|
15408
|
|
|
|
|
|
|
145u, 149u, 128u, 141u, 144u, 148u, 128u, 174u, |
|
15409
|
|
|
|
|
|
|
128u, 131u, 144u, 128u, 143u, 153u, 144u, 152u, |
|
15410
|
|
|
|
|
|
|
128u, 134u, 168u, 169u, 171u, 172u, 173u, 174u, |
|
15411
|
|
|
|
|
|
|
188u, 189u, 190u, 160u, 167u, 128u, 158u, 160u, |
|
15412
|
|
|
|
|
|
|
169u, 144u, 173u, 176u, 180u, 128u, 131u, 144u, |
|
15413
|
|
|
|
|
|
|
153u, 155u, 161u, 163u, 183u, 189u, 191u, 128u, |
|
15414
|
|
|
|
|
|
|
143u, 128u, 132u, 144u, 190u, 143u, 159u, 128u, |
|
15415
|
|
|
|
|
|
|
176u, 177u, 178u, 128u, 129u, 128u, 170u, 176u, |
|
15416
|
|
|
|
|
|
|
188u, 128u, 136u, 144u, 153u, 157u, 158u, 133u, |
|
15417
|
|
|
|
|
|
|
134u, 137u, 141u, 145u, 146u, 147u, 148u, 149u, |
|
15418
|
|
|
|
|
|
|
154u, 155u, 156u, 157u, 158u, 159u, 168u, 169u, |
|
15419
|
|
|
|
|
|
|
170u, 144u, 153u, 165u, 169u, 173u, 178u, 187u, |
|
15420
|
|
|
|
|
|
|
191u, 128u, 130u, 133u, 139u, 170u, 173u, 130u, |
|
15421
|
|
|
|
|
|
|
132u, 160u, 177u, 128u, 148u, 150u, 191u, 162u, |
|
15422
|
|
|
|
|
|
|
187u, 128u, 156u, 158u, 159u, 165u, 166u, 169u, |
|
15423
|
|
|
|
|
|
|
172u, 174u, 185u, 189u, 191u, 128u, 131u, 133u, |
|
15424
|
|
|
|
|
|
|
191u, 128u, 133u, 135u, 138u, 141u, 148u, 150u, |
|
15425
|
|
|
|
|
|
|
156u, 158u, 185u, 187u, 190u, 134u, 128u, 132u, |
|
15426
|
|
|
|
|
|
|
138u, 144u, 146u, 191u, 128u, 165u, 168u, 191u, |
|
15427
|
|
|
|
|
|
|
128u, 130u, 154u, 156u, 186u, 188u, 191u, 128u, |
|
15428
|
|
|
|
|
|
|
148u, 150u, 180u, 182u, 191u, 128u, 142u, 144u, |
|
15429
|
|
|
|
|
|
|
174u, 176u, 191u, 128u, 136u, 138u, 168u, 170u, |
|
15430
|
|
|
|
|
|
|
191u, 128u, 130u, 132u, 139u, 142u, 191u, 128u, |
|
15431
|
|
|
|
|
|
|
182u, 187u, 191u, 181u, 128u, 172u, 132u, 155u, |
|
15432
|
|
|
|
|
|
|
159u, 161u, 175u, 163u, 184u, 185u, 186u, 160u, |
|
15433
|
|
|
|
|
|
|
162u, 128u, 132u, 135u, 150u, 164u, 167u, 185u, |
|
15434
|
|
|
|
|
|
|
187u, 128u, 131u, 133u, 159u, 161u, 162u, 169u, |
|
15435
|
|
|
|
|
|
|
178u, 180u, 183u, 130u, 135u, 137u, 139u, 148u, |
|
15436
|
|
|
|
|
|
|
151u, 153u, 155u, 157u, 159u, 164u, 190u, 141u, |
|
15437
|
|
|
|
|
|
|
143u, 145u, 146u, 161u, 162u, 167u, 170u, 172u, |
|
15438
|
|
|
|
|
|
|
178u, 180u, 183u, 185u, 188u, 128u, 137u, 139u, |
|
15439
|
|
|
|
|
|
|
155u, 161u, 163u, 165u, 169u, 171u, 187u, 132u, |
|
15440
|
|
|
|
|
|
|
155u, 128u, 191u, 128u, 150u, 156u, 160u, 128u, |
|
15441
|
|
|
|
|
|
|
191u, 128u, 180u, 186u, 128u, 185u, 128u, 161u, |
|
15442
|
|
|
|
|
|
|
168u, 160u, 167u, 128u, 157u, 160u, 135u, 132u, |
|
15443
|
|
|
|
|
|
|
134u, 128u, 175u, 48u, 57u, 48u, 49u, 50u, |
|
15444
|
|
|
|
|
|
|
51u, 57u, 65u, 90u, 97u, 122u, 45u, 46u, |
|
15445
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 48u, 49u, |
|
15446
|
|
|
|
|
|
|
50u, 51u, 57u, 65u, 90u, 97u, 122u, 45u, |
|
15447
|
|
|
|
|
|
|
46u, 48u, 57u, 65u, 90u, 97u, 122u, 48u, |
|
15448
|
|
|
|
|
|
|
49u, 50u, 51u, 57u, 65u, 90u, 97u, 122u, |
|
15449
|
|
|
|
|
|
|
45u, 46u, 48u, 57u, 65u, 90u, 97u, 122u, |
|
15450
|
|
|
|
|
|
|
45u, 46u, 48u, 57u, 65u, 90u, 97u, 122u, |
|
15451
|
|
|
|
|
|
|
45u, 46u, 53u, 48u, 52u, 54u, 57u, 65u, |
|
15452
|
|
|
|
|
|
|
90u, 97u, 122u, 45u, 46u, 48u, 53u, 54u, |
|
15453
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 48u, |
|
15454
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 48u, |
|
15455
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 53u, |
|
15456
|
|
|
|
|
|
|
48u, 52u, 54u, 57u, 65u, 90u, 97u, 122u, |
|
15457
|
|
|
|
|
|
|
45u, 46u, 48u, 53u, 54u, 57u, 65u, 90u, |
|
15458
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 48u, 57u, 65u, 90u, |
|
15459
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 48u, 57u, 65u, 90u, |
|
15460
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 53u, 48u, 52u, 54u, |
|
15461
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 48u, |
|
15462
|
|
|
|
|
|
|
53u, 54u, 57u, 65u, 90u, 97u, 122u, 48u, |
|
15463
|
|
|
|
|
|
|
49u, 50u, 51u, 57u, 65u, 90u, 97u, 122u, |
|
15464
|
|
|
|
|
|
|
45u, 46u, 48u, 57u, 65u, 90u, 97u, 122u, |
|
15465
|
|
|
|
|
|
|
45u, 48u, 57u, 65u, 90u, 97u, 122u, 45u, |
|
15466
|
|
|
|
|
|
|
46u, 48u, 57u, 65u, 90u, 97u, 122u, 48u, |
|
15467
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 48u, |
|
15468
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 48u, 49u, 50u, |
|
15469
|
|
|
|
|
|
|
51u, 57u, 65u, 90u, 97u, 122u, 45u, 46u, |
|
15470
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 48u, 49u, |
|
15471
|
|
|
|
|
|
|
50u, 51u, 57u, 65u, 90u, 97u, 122u, 45u, |
|
15472
|
|
|
|
|
|
|
46u, 48u, 57u, 65u, 90u, 97u, 122u, 48u, |
|
15473
|
|
|
|
|
|
|
49u, 50u, 51u, 57u, 65u, 90u, 97u, 122u, |
|
15474
|
|
|
|
|
|
|
45u, 46u, 48u, 57u, 65u, 90u, 97u, 122u, |
|
15475
|
|
|
|
|
|
|
45u, 46u, 48u, 57u, 65u, 90u, 97u, 122u, |
|
15476
|
|
|
|
|
|
|
45u, 46u, 53u, 48u, 52u, 54u, 57u, 65u, |
|
15477
|
|
|
|
|
|
|
90u, 97u, 122u, 45u, 46u, 48u, 53u, 54u, |
|
15478
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 48u, |
|
15479
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 48u, |
|
15480
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 53u, |
|
15481
|
|
|
|
|
|
|
48u, 52u, 54u, 57u, 65u, 90u, 97u, 122u, |
|
15482
|
|
|
|
|
|
|
45u, 46u, 48u, 53u, 54u, 57u, 65u, 90u, |
|
15483
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 48u, 57u, 65u, 90u, |
|
15484
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 48u, 57u, 65u, 90u, |
|
15485
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 53u, 48u, 52u, 54u, |
|
15486
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 48u, |
|
15487
|
|
|
|
|
|
|
53u, 54u, 57u, 65u, 90u, 97u, 122u, 33u, |
|
15488
|
|
|
|
|
|
|
45u, 46u, 58u, 64u, 95u, 36u, 37u, 39u, |
|
15489
|
|
|
|
|
|
|
44u, 48u, 57u, 65u, 90u, 97u, 122u, 33u, |
|
15490
|
|
|
|
|
|
|
45u, 58u, 64u, 95u, 36u, 37u, 39u, 46u, |
|
15491
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 33u, 45u, |
|
15492
|
|
|
|
|
|
|
46u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
|
15493
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 33u, 58u, |
|
15494
|
|
|
|
|
|
|
64u, 95u, 36u, 37u, 39u, 46u, 48u, 57u, |
|
15495
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 33u, 45u, 46u, 58u, |
|
15496
|
|
|
|
|
|
|
64u, 95u, 36u, 37u, 39u, 44u, 48u, 57u, |
|
15497
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 33u, 95u, 36u, 37u, |
|
15498
|
|
|
|
|
|
|
39u, 46u, 48u, 57u, 65u, 90u, 97u, 122u, |
|
15499
|
|
|
|
|
|
|
33u, 48u, 49u, 50u, 58u, 64u, 95u, 36u, |
|
15500
|
|
|
|
|
|
|
37u, 39u, 46u, 51u, 57u, 65u, 90u, 97u, |
|
15501
|
|
|
|
|
|
|
122u, 33u, 45u, 46u, 58u, 64u, 95u, 36u, |
|
15502
|
|
|
|
|
|
|
37u, 39u, 44u, 48u, 57u, 65u, 90u, 97u, |
|
15503
|
|
|
|
|
|
|
122u, 33u, 48u, 49u, 50u, 58u, 64u, 95u, |
|
15504
|
|
|
|
|
|
|
36u, 37u, 39u, 46u, 51u, 57u, 65u, 90u, |
|
15505
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
|
15506
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
|
15507
|
|
|
|
|
|
|
97u, 122u, 33u, 48u, 49u, 50u, 58u, 64u, |
|
15508
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 46u, 51u, 57u, 65u, |
|
15509
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
|
15510
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 57u, 65u, |
|
15511
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
|
15512
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 57u, 65u, |
|
15513
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 53u, 58u, |
|
15514
|
|
|
|
|
|
|
64u, 95u, 36u, 37u, 39u, 44u, 48u, 52u, |
|
15515
|
|
|
|
|
|
|
54u, 57u, 65u, 90u, 97u, 122u, 33u, 45u, |
|
15516
|
|
|
|
|
|
|
46u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
|
15517
|
|
|
|
|
|
|
48u, 53u, 54u, 57u, 65u, 90u, 97u, 122u, |
|
15518
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
|
15519
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
|
15520
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
|
15521
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
|
15522
|
|
|
|
|
|
|
33u, 45u, 46u, 53u, 58u, 64u, 95u, 36u, |
|
15523
|
|
|
|
|
|
|
37u, 39u, 44u, 48u, 52u, 54u, 57u, 65u, |
|
15524
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
|
15525
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 53u, 54u, |
|
15526
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
|
15527
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 44u, 48u, |
|
15528
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
|
15529
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 44u, 48u, |
|
15530
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
|
15531
|
|
|
|
|
|
|
53u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
|
15532
|
|
|
|
|
|
|
48u, 52u, 54u, 57u, 65u, 90u, 97u, 122u, |
|
15533
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
|
15534
|
|
|
|
|
|
|
39u, 44u, 48u, 53u, 54u, 57u, 65u, 90u, |
|
15535
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
|
15536
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
|
15537
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
|
15538
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
|
15539
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
|
15540
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
|
15541
|
|
|
|
|
|
|
97u, 122u, 33u, 47u, 95u, 36u, 37u, 39u, |
|
15542
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 47u, 33u, 48u, |
|
15543
|
|
|
|
|
|
|
49u, 50u, 95u, 36u, 37u, 39u, 46u, 51u, |
|
15544
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 58u, 64u, |
|
15545
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 46u, 48u, 57u, 65u, |
|
15546
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
|
15547
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 57u, 65u, |
|
15548
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 58u, 64u, 95u, |
|
15549
|
|
|
|
|
|
|
36u, 37u, 39u, 46u, 48u, 57u, 65u, 90u, |
|
15550
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
|
15551
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
|
15552
|
|
|
|
|
|
|
97u, 122u, 33u, 58u, 64u, 95u, 36u, 37u, |
|
15553
|
|
|
|
|
|
|
39u, 46u, 48u, 57u, 65u, 90u, 97u, 122u, |
|
15554
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
|
15555
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
|
15556
|
|
|
|
|
|
|
33u, 48u, 49u, 50u, 58u, 64u, 95u, 36u, |
|
15557
|
|
|
|
|
|
|
37u, 39u, 46u, 51u, 57u, 65u, 90u, 97u, |
|
15558
|
|
|
|
|
|
|
122u, 33u, 45u, 46u, 58u, 64u, 95u, 36u, |
|
15559
|
|
|
|
|
|
|
37u, 39u, 44u, 48u, 57u, 65u, 90u, 97u, |
|
15560
|
|
|
|
|
|
|
122u, 33u, 48u, 49u, 50u, 58u, 64u, 95u, |
|
15561
|
|
|
|
|
|
|
36u, 37u, 39u, 46u, 51u, 57u, 65u, 90u, |
|
15562
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
|
15563
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
|
15564
|
|
|
|
|
|
|
97u, 122u, 33u, 48u, 49u, 50u, 58u, 64u, |
|
15565
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 46u, 51u, 57u, 65u, |
|
15566
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
|
15567
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 57u, 65u, |
|
15568
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
|
15569
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 57u, 65u, |
|
15570
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 53u, 58u, |
|
15571
|
|
|
|
|
|
|
64u, 95u, 36u, 37u, 39u, 44u, 48u, 52u, |
|
15572
|
|
|
|
|
|
|
54u, 57u, 65u, 90u, 97u, 122u, 33u, 45u, |
|
15573
|
|
|
|
|
|
|
46u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
|
15574
|
|
|
|
|
|
|
48u, 53u, 54u, 57u, 65u, 90u, 97u, 122u, |
|
15575
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
|
15576
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
|
15577
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
|
15578
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
|
15579
|
|
|
|
|
|
|
33u, 45u, 46u, 53u, 58u, 64u, 95u, 36u, |
|
15580
|
|
|
|
|
|
|
37u, 39u, 44u, 48u, 52u, 54u, 57u, 65u, |
|
15581
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
|
15582
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 53u, 54u, |
|
15583
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
|
15584
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 44u, 48u, |
|
15585
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
|
15586
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 44u, 48u, |
|
15587
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
|
15588
|
|
|
|
|
|
|
53u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
|
15589
|
|
|
|
|
|
|
48u, 52u, 54u, 57u, 65u, 90u, 97u, 122u, |
|
15590
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
|
15591
|
|
|
|
|
|
|
39u, 44u, 48u, 53u, 54u, 57u, 65u, 90u, |
|
15592
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 47u, 58u, 48u, 57u, |
|
15593
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 33u, 39u, 40u, 44u, |
|
15594
|
|
|
|
|
|
|
46u, 61u, 63u, 95u, 194u, 195u, 203u, 205u, |
|
15595
|
|
|
|
|
|
|
206u, 207u, 210u, 212u, 213u, 214u, 215u, 216u, |
|
15596
|
|
|
|
|
|
|
217u, 219u, 220u, 221u, 222u, 223u, 224u, 225u, |
|
15597
|
|
|
|
|
|
|
226u, 227u, 228u, 233u, 234u, 237u, 239u, 240u, |
|
15598
|
|
|
|
|
|
|
243u, 297u, 553u, 36u, 38u, 42u, 57u, 58u, |
|
15599
|
|
|
|
|
|
|
59u, 64u, 90u, 97u, 122u, 196u, 218u, 229u, |
|
15600
|
|
|
|
|
|
|
236u, 47u, 48u, 57u, 45u, 46u, 47u, 58u, |
|
15601
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 45u, 46u, |
|
15602
|
|
|
|
|
|
|
47u, 58u, 48u, 57u, 65u, 90u, 97u, 122u, |
|
15603
|
|
|
|
|
|
|
45u, 46u, 47u, 58u, 48u, 57u, 65u, 90u, |
|
15604
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 47u, 53u, 58u, 48u, |
|
15605
|
|
|
|
|
|
|
52u, 54u, 57u, 65u, 90u, 97u, 122u, 45u, |
|
15606
|
|
|
|
|
|
|
46u, 47u, 58u, 48u, 53u, 54u, 57u, 65u, |
|
15607
|
|
|
|
|
|
|
90u, 97u, 122u, 45u, 46u, 47u, 58u, 48u, |
|
15608
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 47u, |
|
15609
|
|
|
|
|
|
|
58u, 48u, 57u, 65u, 90u, 97u, 122u, 45u, |
|
15610
|
|
|
|
|
|
|
46u, 47u, 58u, 48u, 57u, 65u, 90u, 97u, |
|
15611
|
|
|
|
|
|
|
122u, 45u, 46u, 47u, 58u, 48u, 57u, 65u, |
|
15612
|
|
|
|
|
|
|
90u, 97u, 122u, 45u, 46u, 47u, 53u, 58u, |
|
15613
|
|
|
|
|
|
|
48u, 52u, 54u, 57u, 65u, 90u, 97u, 122u, |
|
15614
|
|
|
|
|
|
|
45u, 46u, 47u, 58u, 48u, 53u, 54u, 57u, |
|
15615
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 33u, 45u, 46u, 47u, |
|
15616
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 44u, 48u, |
|
15617
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 47u, 64u, |
|
15618
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 46u, 48u, 57u, 65u, |
|
15619
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 47u, 58u, |
|
15620
|
|
|
|
|
|
|
64u, 95u, 36u, 37u, 39u, 44u, 48u, 57u, |
|
15621
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 33u, 45u, 46u, 47u, |
|
15622
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 44u, 48u, |
|
15623
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
|
15624
|
|
|
|
|
|
|
47u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
|
15625
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 33u, 45u, |
|
15626
|
|
|
|
|
|
|
46u, 47u, 53u, 58u, 64u, 95u, 36u, 37u, |
|
15627
|
|
|
|
|
|
|
39u, 44u, 48u, 52u, 54u, 57u, 65u, 90u, |
|
15628
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 47u, 58u, 64u, |
|
15629
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 53u, 54u, |
|
15630
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
|
15631
|
|
|
|
|
|
|
47u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
|
15632
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 33u, 45u, |
|
15633
|
|
|
|
|
|
|
46u, 47u, 58u, 64u, 95u, 36u, 37u, 39u, |
|
15634
|
|
|
|
|
|
|
44u, 48u, 57u, 65u, 90u, 97u, 122u, 33u, |
|
15635
|
|
|
|
|
|
|
45u, 46u, 47u, 58u, 64u, 95u, 36u, 37u, |
|
15636
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
|
15637
|
|
|
|
|
|
|
33u, 45u, 46u, 47u, 58u, 64u, 95u, 36u, |
|
15638
|
|
|
|
|
|
|
37u, 39u, 44u, 48u, 57u, 65u, 90u, 97u, |
|
15639
|
|
|
|
|
|
|
122u, 33u, 45u, 46u, 47u, 53u, 58u, 64u, |
|
15640
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 52u, 54u, |
|
15641
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
|
15642
|
|
|
|
|
|
|
47u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
|
15643
|
|
|
|
|
|
|
48u, 53u, 54u, 57u, 65u, 90u, 97u, 122u, |
|
15644
|
|
|
|
|
|
|
0 |
|
15645
|
|
|
|
|
|
|
}; |
|
15646
|
|
|
|
|
|
|
|
|
15647
|
|
|
|
|
|
|
static const char _url_detector_single_lengths[] = { |
|
15648
|
|
|
|
|
|
|
0, 5, 4, 2, 3, 3, 2, 1, |
|
15649
|
|
|
|
|
|
|
2, 0, 2, 39, 2, 0, 0, 2, |
|
15650
|
|
|
|
|
|
|
1, 2, 0, 0, 0, 1, 1, 1, |
|
15651
|
|
|
|
|
|
|
0, 0, 1, 0, 0, 0, 1, 32, |
|
15652
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 1, 1, 1, |
|
15653
|
|
|
|
|
|
|
2, 0, 2, 0, 0, 1, 2, 0, |
|
15654
|
|
|
|
|
|
|
0, 0, 1, 0, 1, 1, 2, 0, |
|
15655
|
|
|
|
|
|
|
0, 5, 1, 4, 0, 0, 1, 38, |
|
15656
|
|
|
|
|
|
|
0, 0, 2, 1, 0, 1, 0, 0, |
|
15657
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15658
|
|
|
|
|
|
|
1, 0, 0, 0, 0, 0, 0, 0, |
|
15659
|
|
|
|
|
|
|
0, 0, 1, 1, 0, 0, 0, 0, |
|
15660
|
|
|
|
|
|
|
0, 0, 3, 1, 0, 20, 1, 0, |
|
15661
|
|
|
|
|
|
|
0, 6, 1, 0, 0, 0, 0, 0, |
|
15662
|
|
|
|
|
|
|
0, 0, 1, 2, 2, 0, 0, 1, |
|
15663
|
|
|
|
|
|
|
11, 0, 0, 0, 0, 0, 0, 0, |
|
15664
|
|
|
|
|
|
|
0, 0, 1, 0, 1, 0, 20, 0, |
|
15665
|
|
|
|
|
|
|
0, 0, 1, 0, 0, 0, 0, 2, |
|
15666
|
|
|
|
|
|
|
0, 0, 1, 0, 0, 0, 0, 0, |
|
15667
|
|
|
|
|
|
|
0, 2, 0, 0, 17, 0, 1, 0, |
|
15668
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
15669
|
|
|
|
|
|
|
0, 0, 0, 14, 38, 1, 0, 0, |
|
15670
|
|
|
|
|
|
|
0, 0, 0, 1, 0, 0, 0, 0, |
|
15671
|
|
|
|
|
|
|
0, 0, 0, 0, 3, 0, 0, 0, |
|
15672
|
|
|
|
|
|
|
0, 0, 1, 0, 0, 0, 0, 0, |
|
15673
|
|
|
|
|
|
|
0, 0, 0, 0, 23, 1, 0, 0, |
|
15674
|
|
|
|
|
|
|
1, 1, 0, 1, 0, 0, 2, 1, |
|
15675
|
|
|
|
|
|
|
0, 1, 2, 0, 1, 3, 0, 0, |
|
15676
|
|
|
|
|
|
|
1, 1, 0, 9, 0, 0, 0, 0, |
|
15677
|
|
|
|
|
|
|
0, 0, 4, 0, 0, 0, 18, 0, |
|
15678
|
|
|
|
|
|
|
0, 0, 0, 0, 2, 0, 0, 1, |
|
15679
|
|
|
|
|
|
|
0, 1, 0, 0, 0, 0, 0, 1, |
|
15680
|
|
|
|
|
|
|
1, 4, 0, 4, 12, 0, 1, 1, |
|
15681
|
|
|
|
|
|
|
0, 2, 0, 1, 0, 1, 0, 1, |
|
15682
|
|
|
|
|
|
|
1, 0, 0, 3, 2, 3, 2, 3, |
|
15683
|
|
|
|
|
|
|
2, 2, 3, 2, 2, 2, 3, 2, |
|
15684
|
|
|
|
|
|
|
2, 2, 3, 2, 3, 2, 1, 2, |
|
15685
|
|
|
|
|
|
|
0, 2, 3, 2, 3, 2, 3, 2, |
|
15686
|
|
|
|
|
|
|
2, 3, 2, 2, 2, 3, 2, 2, |
|
15687
|
|
|
|
|
|
|
2, 3, 2, 6, 5, 6, 4, 6, |
|
15688
|
|
|
|
|
|
|
2, 7, 6, 7, 6, 7, 6, 6, |
|
15689
|
|
|
|
|
|
|
7, 6, 6, 6, 7, 6, 6, 6, |
|
15690
|
|
|
|
|
|
|
7, 6, 6, 6, 6, 3, 1, 5, |
|
15691
|
|
|
|
|
|
|
4, 6, 5, 6, 4, 6, 7, 6, |
|
15692
|
|
|
|
|
|
|
7, 6, 7, 6, 6, 7, 6, 6, |
|
15693
|
|
|
|
|
|
|
6, 7, 6, 6, 6, 7, 6, 4, |
|
15694
|
|
|
|
|
|
|
39, 1, 4, 4, 4, 5, 4, 4, |
|
15695
|
|
|
|
|
|
|
4, 4, 4, 5, 4, 7, 4, 7, |
|
15696
|
|
|
|
|
|
|
7, 7, 8, 7, 7, 7, 7, 7, |
|
15697
|
|
|
|
|
|
|
8, 7 |
|
15698
|
|
|
|
|
|
|
}; |
|
15699
|
|
|
|
|
|
|
|
|
15700
|
|
|
|
|
|
|
static const char _url_detector_range_lengths[] = { |
|
15701
|
|
|
|
|
|
|
0, 5, 5, 5, 5, 3, 3, 3, |
|
15702
|
|
|
|
|
|
|
3, 3, 3, 7, 3, 3, 1, 3, |
|
15703
|
|
|
|
|
|
|
3, 3, 2, 2, 2, 2, 2, 4, |
|
15704
|
|
|
|
|
|
|
2, 2, 4, 1, 2, 1, 1, 0, |
|
15705
|
|
|
|
|
|
|
1, 1, 1, 1, 3, 7, 7, 9, |
|
15706
|
|
|
|
|
|
|
5, 8, 5, 8, 8, 10, 4, 6, |
|
15707
|
|
|
|
|
|
|
8, 7, 7, 5, 6, 4, 5, 1, |
|
15708
|
|
|
|
|
|
|
2, 8, 4, 3, 3, 3, 0, 3, |
|
15709
|
|
|
|
|
|
|
2, 2, 3, 5, 5, 3, 3, 3, |
|
15710
|
|
|
|
|
|
|
2, 2, 1, 2, 2, 2, 3, 4, |
|
15711
|
|
|
|
|
|
|
4, 3, 1, 2, 1, 3, 2, 2, |
|
15712
|
|
|
|
|
|
|
2, 2, 2, 3, 3, 1, 2, 3, |
|
15713
|
|
|
|
|
|
|
2, 3, 4, 2, 7, 0, 2, 2, |
|
15714
|
|
|
|
|
|
|
1, 5, 2, 1, 1, 1, 1, 1, |
|
15715
|
|
|
|
|
|
|
2, 2, 2, 2, 1, 5, 5, 0, |
|
15716
|
|
|
|
|
|
|
1, 4, 4, 2, 2, 3, 1, 1, |
|
15717
|
|
|
|
|
|
|
2, 2, 2, 1, 1, 1, 1, 1, |
|
15718
|
|
|
|
|
|
|
1, 2, 2, 2, 3, 1, 2, 3, |
|
15719
|
|
|
|
|
|
|
2, 2, 2, 1, 4, 4, 6, 3, |
|
15720
|
|
|
|
|
|
|
3, 1, 2, 2, 1, 1, 5, 3, |
|
15721
|
|
|
|
|
|
|
1, 1, 2, 2, 2, 2, 1, 2, |
|
15722
|
|
|
|
|
|
|
2, 1, 4, 1, 2, 4, 2, 1, |
|
15723
|
|
|
|
|
|
|
1, 1, 1, 0, 2, 2, 2, 3, |
|
15724
|
|
|
|
|
|
|
2, 2, 1, 2, 3, 3, 2, 3, |
|
15725
|
|
|
|
|
|
|
2, 2, 6, 2, 1, 3, 3, 2, |
|
15726
|
|
|
|
|
|
|
1, 1, 2, 1, 1, 2, 2, 2, |
|
15727
|
|
|
|
|
|
|
1, 4, 2, 5, 2, 8, 6, 2, |
|
15728
|
|
|
|
|
|
|
2, 1, 1, 3, 1, 2, 1, 1, |
|
15729
|
|
|
|
|
|
|
1, 1, 1, 1, 2, 2, 5, 1, |
|
15730
|
|
|
|
|
|
|
2, 1, 0, 1, 2, 3, 1, 3, |
|
15731
|
|
|
|
|
|
|
3, 1, 1, 2, 6, 2, 6, 3, |
|
15732
|
|
|
|
|
|
|
2, 3, 3, 3, 3, 3, 2, 1, |
|
15733
|
|
|
|
|
|
|
2, 1, 2, 5, 7, 5, 0, 1, |
|
15734
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 0, |
|
15735
|
|
|
|
|
|
|
1, 1, 1, 3, 3, 3, 3, 3, |
|
15736
|
|
|
|
|
|
|
3, 3, 4, 4, 3, 3, 4, 4, |
|
15737
|
|
|
|
|
|
|
3, 3, 4, 4, 3, 3, 3, 3, |
|
15738
|
|
|
|
|
|
|
3, 3, 3, 3, 3, 3, 3, 3, |
|
15739
|
|
|
|
|
|
|
3, 4, 4, 3, 3, 4, 4, 3, |
|
15740
|
|
|
|
|
|
|
3, 4, 4, 5, 5, 5, 5, 5, |
|
15741
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 5, 5, |
|
15742
|
|
|
|
|
|
|
6, 6, 5, 5, 6, 6, 5, 5, |
|
15743
|
|
|
|
|
|
|
6, 6, 5, 5, 5, 4, 0, 5, |
|
15744
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 5, 5, |
|
15745
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 6, 6, 5, |
|
15746
|
|
|
|
|
|
|
5, 6, 6, 5, 5, 6, 6, 3, |
|
15747
|
|
|
|
|
|
|
7, 1, 3, 3, 3, 4, 4, 3, |
|
15748
|
|
|
|
|
|
|
3, 3, 3, 4, 4, 5, 5, 5, |
|
15749
|
|
|
|
|
|
|
5, 5, 6, 6, 5, 5, 5, 5, |
|
15750
|
|
|
|
|
|
|
6, 6 |
|
15751
|
|
|
|
|
|
|
}; |
|
15752
|
|
|
|
|
|
|
|
|
15753
|
|
|
|
|
|
|
static const short _url_detector_index_offsets[] = { |
|
15754
|
|
|
|
|
|
|
0, 0, 11, 21, 29, 38, 45, 51, |
|
15755
|
|
|
|
|
|
|
56, 62, 66, 72, 119, 125, 129, 131, |
|
15756
|
|
|
|
|
|
|
137, 142, 148, 151, 154, 157, 161, 165, |
|
15757
|
|
|
|
|
|
|
171, 174, 177, 183, 185, 188, 190, 193, |
|
15758
|
|
|
|
|
|
|
226, 228, 230, 232, 234, 238, 247, 256, |
|
15759
|
|
|
|
|
|
|
267, 275, 284, 292, 301, 310, 322, 329, |
|
15760
|
|
|
|
|
|
|
336, 345, 353, 362, 368, 376, 382, 390, |
|
15761
|
|
|
|
|
|
|
392, 395, 409, 415, 423, 427, 431, 433, |
|
15762
|
|
|
|
|
|
|
475, 478, 481, 487, 494, 500, 505, 509, |
|
15763
|
|
|
|
|
|
|
513, 516, 519, 521, 524, 527, 530, 534, |
|
15764
|
|
|
|
|
|
|
539, 545, 549, 551, 554, 556, 560, 563, |
|
15765
|
|
|
|
|
|
|
566, 569, 572, 576, 581, 585, 587, 590, |
|
15766
|
|
|
|
|
|
|
594, 597, 601, 609, 613, 621, 642, 646, |
|
15767
|
|
|
|
|
|
|
649, 651, 663, 667, 669, 671, 673, 675, |
|
15768
|
|
|
|
|
|
|
677, 680, 683, 687, 692, 696, 702, 708, |
|
15769
|
|
|
|
|
|
|
710, 723, 728, 733, 736, 739, 743, 745, |
|
15770
|
|
|
|
|
|
|
747, 750, 753, 757, 759, 762, 764, 786, |
|
15771
|
|
|
|
|
|
|
788, 790, 793, 797, 800, 804, 806, 809, |
|
15772
|
|
|
|
|
|
|
815, 818, 821, 825, 827, 832, 837, 844, |
|
15773
|
|
|
|
|
|
|
848, 852, 856, 859, 862, 881, 883, 890, |
|
15774
|
|
|
|
|
|
|
894, 896, 898, 901, 904, 907, 910, 912, |
|
15775
|
|
|
|
|
|
|
915, 918, 920, 925, 941, 982, 988, 991, |
|
15776
|
|
|
|
|
|
|
993, 995, 997, 999, 1001, 1004, 1007, 1010, |
|
15777
|
|
|
|
|
|
|
1014, 1017, 1020, 1022, 1025, 1032, 1036, 1039, |
|
15778
|
|
|
|
|
|
|
1043, 1046, 1049, 1057, 1060, 1062, 1066, 1070, |
|
15779
|
|
|
|
|
|
|
1073, 1075, 1077, 1080, 1082, 1107, 1111, 1114, |
|
15780
|
|
|
|
|
|
|
1117, 1120, 1126, 1129, 1136, 1139, 1148, 1157, |
|
15781
|
|
|
|
|
|
|
1161, 1164, 1167, 1171, 1175, 1178, 1184, 1186, |
|
15782
|
|
|
|
|
|
|
1188, 1191, 1194, 1196, 1207, 1210, 1213, 1219, |
|
15783
|
|
|
|
|
|
|
1221, 1224, 1226, 1231, 1233, 1236, 1240, 1260, |
|
15784
|
|
|
|
|
|
|
1264, 1268, 1270, 1272, 1275, 1284, 1287, 1294, |
|
15785
|
|
|
|
|
|
|
1299, 1302, 1307, 1311, 1315, 1319, 1323, 1326, |
|
15786
|
|
|
|
|
|
|
1329, 1333, 1339, 1342, 1352, 1372, 1378, 1380, |
|
15787
|
|
|
|
|
|
|
1383, 1385, 1389, 1391, 1394, 1396, 1399, 1401, |
|
15788
|
|
|
|
|
|
|
1403, 1406, 1408, 1410, 1417, 1423, 1430, 1436, |
|
15789
|
|
|
|
|
|
|
1443, 1449, 1455, 1463, 1470, 1476, 1482, 1490, |
|
15790
|
|
|
|
|
|
|
1497, 1503, 1509, 1517, 1524, 1531, 1537, 1542, |
|
15791
|
|
|
|
|
|
|
1548, 1552, 1558, 1565, 1571, 1578, 1584, 1591, |
|
15792
|
|
|
|
|
|
|
1597, 1603, 1611, 1618, 1624, 1630, 1638, 1645, |
|
15793
|
|
|
|
|
|
|
1651, 1657, 1665, 1672, 1684, 1695, 1707, 1717, |
|
15794
|
|
|
|
|
|
|
1729, 1737, 1750, 1762, 1775, 1787, 1800, 1812, |
|
15795
|
|
|
|
|
|
|
1824, 1838, 1851, 1863, 1875, 1889, 1902, 1914, |
|
15796
|
|
|
|
|
|
|
1926, 1940, 1953, 1965, 1977, 1989, 1997, 1999, |
|
15797
|
|
|
|
|
|
|
2010, 2020, 2032, 2043, 2055, 2065, 2077, 2090, |
|
15798
|
|
|
|
|
|
|
2102, 2115, 2127, 2140, 2152, 2164, 2178, 2191, |
|
15799
|
|
|
|
|
|
|
2203, 2215, 2229, 2242, 2254, 2266, 2280, 2293, |
|
15800
|
|
|
|
|
|
|
2301, 2348, 2351, 2359, 2367, 2375, 2385, 2394, |
|
15801
|
|
|
|
|
|
|
2402, 2410, 2418, 2426, 2436, 2445, 2458, 2468, |
|
15802
|
|
|
|
|
|
|
2481, 2494, 2507, 2522, 2536, 2549, 2562, 2575, |
|
15803
|
|
|
|
|
|
|
2588, 2603 |
|
15804
|
|
|
|
|
|
|
}; |
|
15805
|
|
|
|
|
|
|
|
|
15806
|
|
|
|
|
|
|
static const short _url_detector_indicies[] = { |
|
15807
|
|
|
|
|
|
|
0, 2, 3, 4, 0, 0, 0, 5, |
|
15808
|
|
|
|
|
|
|
6, 6, 1, 0, 7, 8, 0, 0, |
|
15809
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 9, 9, 9, |
|
15810
|
|
|
|
|
|
|
9, 9, 9, 9, 1, 9, 10, 9, |
|
15811
|
|
|
|
|
|
|
9, 9, 9, 9, 9, 1, 11, 12, |
|
15812
|
|
|
|
|
|
|
13, 14, 15, 15, 1, 16, 17, 15, |
|
15813
|
|
|
|
|
|
|
15, 15, 1, 16, 15, 15, 15, 1, |
|
15814
|
|
|
|
|
|
|
16, 18, 15, 15, 15, 1, 15, 19, |
|
15815
|
|
|
|
|
|
|
19, 1, 16, 18, 15, 20, 20, 1, |
|
15816
|
|
|
|
|
|
|
21, 21, 23, 21, 21, 22, 21, 22, |
|
15817
|
|
|
|
|
|
|
24, 25, 27, 28, 29, 30, 31, 32, |
|
15818
|
|
|
|
|
|
|
33, 34, 35, 36, 37, 38, 39, 40, |
|
15819
|
|
|
|
|
|
|
41, 42, 43, 44, 45, 46, 47, 49, |
|
15820
|
|
|
|
|
|
|
50, 51, 52, 53, 54, 55, 56, 22, |
|
15821
|
|
|
|
|
|
|
22, 21, 22, 22, 26, 48, 1, 22, |
|
15822
|
|
|
|
|
|
|
22, 22, 22, 22, 1, 22, 22, 22, |
|
15823
|
|
|
|
|
|
|
1, 22, 1, 22, 22, 22, 22, 22, |
|
15824
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 1, 22, 22, |
|
15825
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 1, 22, |
|
15826
|
|
|
|
|
|
|
22, 1, 22, 22, 1, 22, 22, 22, |
|
15827
|
|
|
|
|
|
|
1, 22, 22, 22, 1, 22, 22, 22, |
|
15828
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 1, 22, 22, |
|
15829
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 1, 22, |
|
15830
|
|
|
|
|
|
|
1, 22, 22, 1, 22, 1, 22, 22, |
|
15831
|
|
|
|
|
|
|
1, 57, 58, 59, 60, 26, 61, 62, |
|
15832
|
|
|
|
|
|
|
63, 64, 65, 66, 67, 68, 69, 70, |
|
15833
|
|
|
|
|
|
|
71, 72, 73, 74, 75, 76, 77, 78, |
|
15834
|
|
|
|
|
|
|
79, 80, 81, 82, 83, 84, 85, 86, |
|
15835
|
|
|
|
|
|
|
87, 1, 22, 1, 22, 1, 22, 1, |
|
15836
|
|
|
|
|
|
|
22, 1, 22, 22, 22, 1, 22, 22, |
|
15837
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 1, 22, |
|
15838
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 22, 1, |
|
15839
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 22, 22, |
|
15840
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 22, 22, |
|
15841
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 22, 22, |
|
15842
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 22, 22, |
|
15843
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 22, 22, |
|
15844
|
|
|
|
|
|
|
22, 22, 22, 22, 1, 22, 22, 22, |
|
15845
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 1, 22, 22, |
|
15846
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 22, 22, |
|
15847
|
|
|
|
|
|
|
22, 1, 22, 22, 22, 22, 22, 22, |
|
15848
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 22, 1, |
|
15849
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 22, 22, |
|
15850
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 22, 22, |
|
15851
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 22, 22, |
|
15852
|
|
|
|
|
|
|
22, 1, 22, 22, 22, 22, 22, 1, |
|
15853
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 22, 1, |
|
15854
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 1, 22, 22, |
|
15855
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 1, 22, 1, |
|
15856
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 22, 22, |
|
15857
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 22, 22, |
|
15858
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 1, 22, |
|
15859
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 1, 22, |
|
15860
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 1, 22, |
|
15861
|
|
|
|
|
|
|
1, 88, 89, 90, 91, 92, 93, 94, |
|
15862
|
|
|
|
|
|
|
95, 96, 97, 98, 99, 100, 101, 102, |
|
15863
|
|
|
|
|
|
|
103, 104, 105, 106, 107, 108, 109, 110, |
|
15864
|
|
|
|
|
|
|
111, 112, 113, 114, 115, 116, 117, 106, |
|
15865
|
|
|
|
|
|
|
118, 119, 120, 121, 122, 123, 124, 26, |
|
15866
|
|
|
|
|
|
|
26, 26, 1, 22, 22, 1, 22, 22, |
|
15867
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 1, 22, |
|
15868
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 1, 22, 22, |
|
15869
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 22, 22, |
|
15870
|
|
|
|
|
|
|
1, 22, 22, 22, 1, 22, 22, 22, |
|
15871
|
|
|
|
|
|
|
1, 22, 22, 1, 22, 22, 1, 22, |
|
15872
|
|
|
|
|
|
|
1, 22, 22, 1, 22, 22, 1, 22, |
|
15873
|
|
|
|
|
|
|
22, 1, 22, 22, 22, 1, 22, 22, |
|
15874
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 22, 22, |
|
15875
|
|
|
|
|
|
|
1, 22, 22, 22, 1, 22, 1, 22, |
|
15876
|
|
|
|
|
|
|
22, 1, 22, 1, 22, 22, 22, 1, |
|
15877
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 1, 22, 22, |
|
15878
|
|
|
|
|
|
|
1, 22, 22, 1, 22, 22, 22, 1, |
|
15879
|
|
|
|
|
|
|
22, 22, 22, 22, 1, 22, 22, 22, |
|
15880
|
|
|
|
|
|
|
1, 22, 1, 22, 22, 1, 22, 22, |
|
15881
|
|
|
|
|
|
|
22, 1, 22, 22, 1, 22, 22, 22, |
|
15882
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 22, 22, |
|
15883
|
|
|
|
|
|
|
1, 22, 22, 22, 1, 22, 22, 22, |
|
15884
|
|
|
|
|
|
|
22, 22, 22, 22, 1, 125, 126, 127, |
|
15885
|
|
|
|
|
|
|
128, 129, 130, 131, 58, 132, 133, 134, |
|
15886
|
|
|
|
|
|
|
135, 136, 26, 137, 138, 139, 140, 141, |
|
15887
|
|
|
|
|
|
|
142, 1, 22, 22, 22, 1, 22, 22, |
|
15888
|
|
|
|
|
|
|
1, 22, 1, 22, 22, 22, 22, 22, |
|
15889
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 1, 22, |
|
15890
|
|
|
|
|
|
|
22, 22, 1, 22, 1, 22, 1, 22, |
|
15891
|
|
|
|
|
|
|
1, 22, 1, 22, 1, 22, 22, 1, |
|
15892
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 1, 22, |
|
15893
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 22, 1, |
|
15894
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 1, 22, 22, |
|
15895
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 1, 143, 98, |
|
15896
|
|
|
|
|
|
|
144, 145, 146, 26, 147, 148, 149, 150, |
|
15897
|
|
|
|
|
|
|
151, 26, 1, 22, 22, 22, 22, 1, |
|
15898
|
|
|
|
|
|
|
22, 22, 22, 22, 1, 22, 22, 1, |
|
15899
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 1, 22, |
|
15900
|
|
|
|
|
|
|
1, 22, 1, 22, 22, 1, 22, 22, |
|
15901
|
|
|
|
|
|
|
1, 108, 26, 26, 1, 26, 1, 152, |
|
15902
|
|
|
|
|
|
|
26, 1, 22, 1, 153, 154, 155, 156, |
|
15903
|
|
|
|
|
|
|
41, 157, 158, 159, 160, 117, 161, 162, |
|
15904
|
|
|
|
|
|
|
163, 164, 165, 166, 167, 168, 169, 170, |
|
15905
|
|
|
|
|
|
|
26, 1, 22, 1, 22, 1, 22, 22, |
|
15906
|
|
|
|
|
|
|
1, 22, 22, 22, 1, 22, 22, 1, |
|
15907
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 1, 22, 22, |
|
15908
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 1, 22, |
|
15909
|
|
|
|
|
|
|
22, 1, 22, 22, 1, 22, 22, 22, |
|
15910
|
|
|
|
|
|
|
1, 22, 1, 22, 22, 22, 22, 1, |
|
15911
|
|
|
|
|
|
|
22, 22, 22, 22, 1, 22, 22, 22, |
|
15912
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 22, 1, |
|
15913
|
|
|
|
|
|
|
22, 22, 22, 1, 171, 172, 26, 1, |
|
15914
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 1, 162, 173, |
|
15915
|
|
|
|
|
|
|
174, 175, 41, 176, 177, 39, 178, 179, |
|
15916
|
|
|
|
|
|
|
180, 181, 182, 183, 184, 185, 186, 26, |
|
15917
|
|
|
|
|
|
|
1, 22, 1, 22, 22, 22, 22, 22, |
|
15918
|
|
|
|
|
|
|
22, 1, 22, 22, 22, 1, 22, 1, |
|
15919
|
|
|
|
|
|
|
22, 1, 22, 22, 1, 22, 22, 1, |
|
15920
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 1, 22, 1, |
|
15921
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 1, 22, 1, |
|
15922
|
|
|
|
|
|
|
22, 22, 22, 22, 1, 187, 188, 189, |
|
15923
|
|
|
|
|
|
|
190, 191, 192, 193, 194, 195, 196, 197, |
|
15924
|
|
|
|
|
|
|
198, 199, 200, 48, 1, 201, 202, 26, |
|
15925
|
|
|
|
|
|
|
203, 204, 205, 206, 207, 208, 209, 171, |
|
15926
|
|
|
|
|
|
|
210, 89, 211, 212, 213, 214, 165, 215, |
|
15927
|
|
|
|
|
|
|
216, 217, 218, 219, 220, 221, 178, 222, |
|
15928
|
|
|
|
|
|
|
223, 224, 225, 108, 226, 227, 26, 228, |
|
15929
|
|
|
|
|
|
|
229, 230, 231, 26, 26, 1, 22, 22, |
|
15930
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 1, 22, |
|
15931
|
|
|
|
|
|
|
1, 22, 1, 22, 1, 22, 1, 22, |
|
15932
|
|
|
|
|
|
|
1, 22, 22, 1, 22, 22, 1, 22, |
|
15933
|
|
|
|
|
|
|
22, 1, 22, 22, 22, 1, 22, 22, |
|
15934
|
|
|
|
|
|
|
1, 22, 22, 1, 22, 1, 22, 22, |
|
15935
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 22, 1, |
|
15936
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 1, 22, |
|
15937
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 1, 22, 22, |
|
15938
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 22, 22, |
|
15939
|
|
|
|
|
|
|
1, 22, 22, 1, 22, 1, 22, 22, |
|
15940
|
|
|
|
|
|
|
22, 1, 22, 22, 22, 1, 22, 22, |
|
15941
|
|
|
|
|
|
|
1, 22, 1, 22, 1, 22, 22, 1, |
|
15942
|
|
|
|
|
|
|
22, 1, 232, 203, 233, 234, 235, 236, |
|
15943
|
|
|
|
|
|
|
237, 238, 239, 240, 241, 26, 242, 243, |
|
15944
|
|
|
|
|
|
|
244, 26, 245, 106, 130, 246, 131, 247, |
|
15945
|
|
|
|
|
|
|
205, 26, 1, 22, 22, 22, 1, 22, |
|
15946
|
|
|
|
|
|
|
22, 1, 22, 22, 1, 22, 22, 1, |
|
15947
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 1, 22, 22, |
|
15948
|
|
|
|
|
|
|
1, 22, 22, 22, 22, 22, 22, 1, |
|
15949
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 22, 22, |
|
15950
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 22, 22, |
|
15951
|
|
|
|
|
|
|
22, 22, 22, 22, 1, 22, 22, 22, |
|
15952
|
|
|
|
|
|
|
1, 22, 22, 1, 22, 22, 1, 22, |
|
15953
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 1, 22, |
|
15954
|
|
|
|
|
|
|
22, 1, 173, 248, 249, 26, 26, 1, |
|
15955
|
|
|
|
|
|
|
22, 1, 22, 1, 248, 26, 1, 250, |
|
15956
|
|
|
|
|
|
|
26, 1, 22, 1, 205, 251, 252, 165, |
|
15957
|
|
|
|
|
|
|
253, 254, 26, 255, 256, 26, 1, 22, |
|
15958
|
|
|
|
|
|
|
22, 1, 22, 22, 1, 22, 22, 22, |
|
15959
|
|
|
|
|
|
|
22, 22, 1, 22, 1, 22, 22, 1, |
|
15960
|
|
|
|
|
|
|
22, 1, 257, 26, 258, 259, 1, 22, |
|
15961
|
|
|
|
|
|
|
1, 22, 22, 1, 22, 22, 22, 1, |
|
15962
|
|
|
|
|
|
|
260, 261, 262, 263, 264, 265, 266, 267, |
|
15963
|
|
|
|
|
|
|
268, 269, 270, 271, 272, 273, 274, 275, |
|
15964
|
|
|
|
|
|
|
276, 277, 26, 1, 22, 22, 22, 1, |
|
15965
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 1, 22, 1, |
|
15966
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 22, 22, |
|
15967
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 1, 22, |
|
15968
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 1, 22, 22, |
|
15969
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 1, 22, 22, |
|
15970
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 1, 22, |
|
15971
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 22, 1, 22, |
|
15972
|
|
|
|
|
|
|
22, 22, 1, 22, 22, 1, 22, 22, |
|
15973
|
|
|
|
|
|
|
1, 22, 22, 22, 1, 278, 279, 280, |
|
15974
|
|
|
|
|
|
|
281, 26, 1, 22, 22, 1, 22, 22, |
|
15975
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 22, 1, |
|
15976
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 22, 22, |
|
15977
|
|
|
|
|
|
|
22, 22, 22, 22, 22, 22, 22, 22, |
|
15978
|
|
|
|
|
|
|
22, 22, 22, 1, 22, 22, 22, 22, |
|
15979
|
|
|
|
|
|
|
22, 1, 153, 1, 282, 26, 1, 22, |
|
15980
|
|
|
|
|
|
|
1, 283, 89, 26, 1, 22, 1, 284, |
|
15981
|
|
|
|
|
|
|
26, 1, 22, 1, 285, 26, 1, 22, |
|
15982
|
|
|
|
|
|
|
1, 286, 1, 287, 26, 1, 22, 1, |
|
15983
|
|
|
|
|
|
|
288, 1, 289, 290, 291, 292, 19, 19, |
|
15984
|
|
|
|
|
|
|
1, 16, 293, 15, 15, 15, 1, 294, |
|
15985
|
|
|
|
|
|
|
295, 296, 297, 19, 19, 1, 16, 298, |
|
15986
|
|
|
|
|
|
|
15, 15, 15, 1, 299, 300, 301, 302, |
|
15987
|
|
|
|
|
|
|
19, 19, 1, 16, 298, 297, 15, 15, |
|
15988
|
|
|
|
|
|
|
1, 16, 298, 294, 15, 15, 1, 16, |
|
15989
|
|
|
|
|
|
|
298, 303, 297, 294, 15, 15, 1, 16, |
|
15990
|
|
|
|
|
|
|
298, 294, 15, 15, 15, 1, 16, 293, |
|
15991
|
|
|
|
|
|
|
292, 15, 15, 1, 16, 293, 289, 15, |
|
15992
|
|
|
|
|
|
|
15, 1, 16, 293, 304, 292, 289, 15, |
|
15993
|
|
|
|
|
|
|
15, 1, 16, 293, 289, 15, 15, 15, |
|
15994
|
|
|
|
|
|
|
1, 16, 17, 14, 15, 15, 1, 16, |
|
15995
|
|
|
|
|
|
|
17, 11, 15, 15, 1, 16, 17, 305, |
|
15996
|
|
|
|
|
|
|
14, 11, 15, 15, 1, 16, 17, 11, |
|
15997
|
|
|
|
|
|
|
15, 15, 15, 1, 306, 307, 308, 309, |
|
15998
|
|
|
|
|
|
|
310, 310, 1, 311, 312, 310, 310, 310, |
|
15999
|
|
|
|
|
|
|
1, 311, 310, 310, 310, 1, 311, 313, |
|
16000
|
|
|
|
|
|
|
310, 310, 310, 1, 310, 314, 314, 1, |
|
16001
|
|
|
|
|
|
|
311, 313, 310, 315, 315, 1, 316, 317, |
|
16002
|
|
|
|
|
|
|
318, 319, 314, 314, 1, 311, 320, 310, |
|
16003
|
|
|
|
|
|
|
310, 310, 1, 321, 322, 323, 324, 314, |
|
16004
|
|
|
|
|
|
|
314, 1, 311, 325, 310, 310, 310, 1, |
|
16005
|
|
|
|
|
|
|
326, 327, 328, 329, 314, 314, 1, 311, |
|
16006
|
|
|
|
|
|
|
325, 324, 310, 310, 1, 311, 325, 321, |
|
16007
|
|
|
|
|
|
|
310, 310, 1, 311, 325, 330, 324, 321, |
|
16008
|
|
|
|
|
|
|
310, 310, 1, 311, 325, 321, 310, 310, |
|
16009
|
|
|
|
|
|
|
310, 1, 311, 320, 319, 310, 310, 1, |
|
16010
|
|
|
|
|
|
|
311, 320, 316, 310, 310, 1, 311, 320, |
|
16011
|
|
|
|
|
|
|
331, 319, 316, 310, 310, 1, 311, 320, |
|
16012
|
|
|
|
|
|
|
316, 310, 310, 310, 1, 311, 312, 309, |
|
16013
|
|
|
|
|
|
|
310, 310, 1, 311, 312, 306, 310, 310, |
|
16014
|
|
|
|
|
|
|
1, 311, 312, 332, 309, 306, 310, 310, |
|
16015
|
|
|
|
|
|
|
1, 311, 312, 306, 310, 310, 310, 1, |
|
16016
|
|
|
|
|
|
|
0, 333, 334, 7, 8, 0, 0, 0, |
|
16017
|
|
|
|
|
|
|
335, 335, 335, 1, 0, 333, 7, 8, |
|
16018
|
|
|
|
|
|
|
0, 0, 0, 335, 335, 335, 1, 0, |
|
16019
|
|
|
|
|
|
|
333, 336, 7, 8, 0, 0, 0, 335, |
|
16020
|
|
|
|
|
|
|
335, 335, 1, 0, 7, 8, 0, 0, |
|
16021
|
|
|
|
|
|
|
0, 335, 337, 337, 1, 0, 333, 336, |
|
16022
|
|
|
|
|
|
|
7, 8, 0, 0, 0, 335, 338, 338, |
|
16023
|
|
|
|
|
|
|
1, 9, 9, 9, 9, 339, 9, 9, |
|
16024
|
|
|
|
|
|
|
1, 0, 340, 341, 342, 7, 8, 0, |
|
16025
|
|
|
|
|
|
|
0, 0, 343, 337, 337, 1, 0, 333, |
|
16026
|
|
|
|
|
|
|
344, 7, 8, 0, 0, 0, 335, 335, |
|
16027
|
|
|
|
|
|
|
335, 1, 0, 345, 346, 347, 7, 8, |
|
16028
|
|
|
|
|
|
|
0, 0, 0, 348, 337, 337, 1, 0, |
|
16029
|
|
|
|
|
|
|
333, 349, 7, 8, 0, 0, 0, 335, |
|
16030
|
|
|
|
|
|
|
335, 335, 1, 0, 350, 351, 352, 7, |
|
16031
|
|
|
|
|
|
|
8, 0, 0, 0, 353, 337, 337, 1, |
|
16032
|
|
|
|
|
|
|
0, 333, 349, 7, 8, 0, 0, 0, |
|
16033
|
|
|
|
|
|
|
348, 335, 335, 1, 0, 333, 349, 7, |
|
16034
|
|
|
|
|
|
|
8, 0, 0, 0, 345, 335, 335, 1, |
|
16035
|
|
|
|
|
|
|
0, 333, 349, 354, 7, 8, 0, 0, |
|
16036
|
|
|
|
|
|
|
0, 348, 345, 335, 335, 1, 0, 333, |
|
16037
|
|
|
|
|
|
|
349, 7, 8, 0, 0, 0, 345, 335, |
|
16038
|
|
|
|
|
|
|
335, 335, 1, 0, 333, 344, 7, 8, |
|
16039
|
|
|
|
|
|
|
0, 0, 0, 343, 335, 335, 1, 0, |
|
16040
|
|
|
|
|
|
|
333, 344, 7, 8, 0, 0, 0, 340, |
|
16041
|
|
|
|
|
|
|
335, 335, 1, 0, 333, 344, 355, 7, |
|
16042
|
|
|
|
|
|
|
8, 0, 0, 0, 343, 340, 335, 335, |
|
16043
|
|
|
|
|
|
|
1, 0, 333, 344, 7, 8, 0, 0, |
|
16044
|
|
|
|
|
|
|
0, 340, 335, 335, 335, 1, 0, 333, |
|
16045
|
|
|
|
|
|
|
334, 7, 8, 0, 0, 0, 5, 335, |
|
16046
|
|
|
|
|
|
|
335, 1, 0, 333, 334, 7, 8, 0, |
|
16047
|
|
|
|
|
|
|
0, 0, 2, 335, 335, 1, 0, 333, |
|
16048
|
|
|
|
|
|
|
334, 356, 7, 8, 0, 0, 0, 5, |
|
16049
|
|
|
|
|
|
|
2, 335, 335, 1, 0, 333, 334, 7, |
|
16050
|
|
|
|
|
|
|
8, 0, 0, 0, 2, 335, 335, 335, |
|
16051
|
|
|
|
|
|
|
1, 0, 333, 336, 7, 8, 0, 0, |
|
16052
|
|
|
|
|
|
|
0, 335, 357, 357, 1, 0, 333, 336, |
|
16053
|
|
|
|
|
|
|
7, 8, 0, 0, 0, 335, 358, 358, |
|
16054
|
|
|
|
|
|
|
1, 0, 333, 336, 359, 8, 0, 0, |
|
16055
|
|
|
|
|
|
|
0, 335, 358, 358, 1, 9, 360, 9, |
|
16056
|
|
|
|
|
|
|
9, 9, 9, 9, 1, 361, 1, 362, |
|
16057
|
|
|
|
|
|
|
363, 364, 365, 362, 362, 362, 366, 367, |
|
16058
|
|
|
|
|
|
|
367, 1, 362, 7, 10, 362, 362, 362, |
|
16059
|
|
|
|
|
|
|
362, 362, 362, 1, 362, 368, 369, 7, |
|
16060
|
|
|
|
|
|
|
10, 362, 362, 362, 367, 367, 367, 1, |
|
16061
|
|
|
|
|
|
|
362, 368, 7, 10, 362, 362, 362, 367, |
|
16062
|
|
|
|
|
|
|
367, 367, 1, 362, 368, 370, 7, 10, |
|
16063
|
|
|
|
|
|
|
362, 362, 362, 367, 367, 367, 1, 362, |
|
16064
|
|
|
|
|
|
|
7, 10, 362, 362, 362, 367, 371, 371, |
|
16065
|
|
|
|
|
|
|
1, 362, 368, 370, 7, 10, 362, 362, |
|
16066
|
|
|
|
|
|
|
362, 367, 372, 372, 1, 362, 373, 374, |
|
16067
|
|
|
|
|
|
|
375, 7, 10, 362, 362, 362, 376, 371, |
|
16068
|
|
|
|
|
|
|
371, 1, 362, 368, 377, 7, 10, 362, |
|
16069
|
|
|
|
|
|
|
362, 362, 367, 367, 367, 1, 362, 378, |
|
16070
|
|
|
|
|
|
|
379, 380, 7, 10, 362, 362, 362, 381, |
|
16071
|
|
|
|
|
|
|
371, 371, 1, 362, 368, 382, 7, 10, |
|
16072
|
|
|
|
|
|
|
362, 362, 362, 367, 367, 367, 1, 362, |
|
16073
|
|
|
|
|
|
|
383, 384, 385, 7, 10, 362, 362, 362, |
|
16074
|
|
|
|
|
|
|
386, 371, 371, 1, 362, 368, 382, 7, |
|
16075
|
|
|
|
|
|
|
10, 362, 362, 362, 381, 367, 367, 1, |
|
16076
|
|
|
|
|
|
|
362, 368, 382, 7, 10, 362, 362, 362, |
|
16077
|
|
|
|
|
|
|
378, 367, 367, 1, 362, 368, 382, 387, |
|
16078
|
|
|
|
|
|
|
7, 10, 362, 362, 362, 381, 378, 367, |
|
16079
|
|
|
|
|
|
|
367, 1, 362, 368, 382, 7, 10, 362, |
|
16080
|
|
|
|
|
|
|
362, 362, 378, 367, 367, 367, 1, 362, |
|
16081
|
|
|
|
|
|
|
368, 377, 7, 10, 362, 362, 362, 376, |
|
16082
|
|
|
|
|
|
|
367, 367, 1, 362, 368, 377, 7, 10, |
|
16083
|
|
|
|
|
|
|
362, 362, 362, 373, 367, 367, 1, 362, |
|
16084
|
|
|
|
|
|
|
368, 377, 388, 7, 10, 362, 362, 362, |
|
16085
|
|
|
|
|
|
|
376, 373, 367, 367, 1, 362, 368, 377, |
|
16086
|
|
|
|
|
|
|
7, 10, 362, 362, 362, 373, 367, 367, |
|
16087
|
|
|
|
|
|
|
367, 1, 362, 368, 369, 7, 10, 362, |
|
16088
|
|
|
|
|
|
|
362, 362, 366, 367, 367, 1, 362, 368, |
|
16089
|
|
|
|
|
|
|
369, 7, 10, 362, 362, 362, 363, 367, |
|
16090
|
|
|
|
|
|
|
367, 1, 362, 368, 369, 389, 7, 10, |
|
16091
|
|
|
|
|
|
|
362, 362, 362, 366, 363, 367, 367, 1, |
|
16092
|
|
|
|
|
|
|
362, 368, 369, 7, 10, 362, 362, 362, |
|
16093
|
|
|
|
|
|
|
363, 367, 367, 367, 1, 16, 18, 22, |
|
16094
|
|
|
|
|
|
|
390, 15, 20, 20, 1, 21, 21, 23, |
|
16095
|
|
|
|
|
|
|
21, 21, 22, 21, 22, 24, 25, 27, |
|
16096
|
|
|
|
|
|
|
28, 29, 30, 31, 32, 33, 34, 35, |
|
16097
|
|
|
|
|
|
|
36, 37, 38, 39, 40, 41, 42, 43, |
|
16098
|
|
|
|
|
|
|
44, 45, 46, 47, 49, 50, 51, 52, |
|
16099
|
|
|
|
|
|
|
53, 54, 55, 56, 22, 22, 21, 22, |
|
16100
|
|
|
|
|
|
|
22, 26, 48, 1, 22, 288, 1, 16, |
|
16101
|
|
|
|
|
|
|
18, 22, 390, 15, 15, 15, 1, 16, |
|
16102
|
|
|
|
|
|
|
18, 22, 390, 302, 15, 15, 1, 16, |
|
16103
|
|
|
|
|
|
|
18, 22, 390, 299, 15, 15, 1, 16, |
|
16104
|
|
|
|
|
|
|
18, 22, 391, 390, 302, 299, 15, 15, |
|
16105
|
|
|
|
|
|
|
1, 16, 18, 22, 390, 299, 15, 15, |
|
16106
|
|
|
|
|
|
|
15, 1, 311, 313, 22, 390, 310, 315, |
|
16107
|
|
|
|
|
|
|
315, 1, 311, 313, 22, 390, 310, 310, |
|
16108
|
|
|
|
|
|
|
310, 1, 311, 313, 22, 390, 329, 310, |
|
16109
|
|
|
|
|
|
|
310, 1, 311, 313, 22, 390, 326, 310, |
|
16110
|
|
|
|
|
|
|
310, 1, 311, 313, 22, 392, 390, 329, |
|
16111
|
|
|
|
|
|
|
326, 310, 310, 1, 311, 313, 22, 390, |
|
16112
|
|
|
|
|
|
|
326, 310, 310, 310, 1, 0, 333, 336, |
|
16113
|
|
|
|
|
|
|
22, 393, 8, 0, 0, 0, 335, 338, |
|
16114
|
|
|
|
|
|
|
338, 1, 9, 22, 10, 9, 9, 9, |
|
16115
|
|
|
|
|
|
|
339, 9, 9, 1, 0, 333, 336, 22, |
|
16116
|
|
|
|
|
|
|
393, 8, 0, 0, 0, 335, 335, 335, |
|
16117
|
|
|
|
|
|
|
1, 0, 333, 336, 22, 393, 8, 0, |
|
16118
|
|
|
|
|
|
|
0, 0, 353, 335, 335, 1, 0, 333, |
|
16119
|
|
|
|
|
|
|
336, 22, 393, 8, 0, 0, 0, 350, |
|
16120
|
|
|
|
|
|
|
335, 335, 1, 0, 333, 336, 22, 394, |
|
16121
|
|
|
|
|
|
|
393, 8, 0, 0, 0, 353, 350, 335, |
|
16122
|
|
|
|
|
|
|
335, 1, 0, 333, 336, 22, 393, 8, |
|
16123
|
|
|
|
|
|
|
0, 0, 0, 350, 335, 335, 335, 1, |
|
16124
|
|
|
|
|
|
|
362, 368, 370, 22, 393, 10, 362, 362, |
|
16125
|
|
|
|
|
|
|
362, 367, 372, 372, 1, 362, 368, 370, |
|
16126
|
|
|
|
|
|
|
22, 393, 10, 362, 362, 362, 367, 367, |
|
16127
|
|
|
|
|
|
|
367, 1, 362, 368, 370, 22, 393, 10, |
|
16128
|
|
|
|
|
|
|
362, 362, 362, 386, 367, 367, 1, 362, |
|
16129
|
|
|
|
|
|
|
368, 370, 22, 393, 10, 362, 362, 362, |
|
16130
|
|
|
|
|
|
|
383, 367, 367, 1, 362, 368, 370, 22, |
|
16131
|
|
|
|
|
|
|
395, 393, 10, 362, 362, 362, 386, 383, |
|
16132
|
|
|
|
|
|
|
367, 367, 1, 362, 368, 370, 22, 393, |
|
16133
|
|
|
|
|
|
|
10, 362, 362, 362, 383, 367, 367, 367, |
|
16134
|
|
|
|
|
|
|
1, 0 |
|
16135
|
|
|
|
|
|
|
}; |
|
16136
|
|
|
|
|
|
|
|
|
16137
|
|
|
|
|
|
|
static const short _url_detector_trans_targs[] = { |
|
16138
|
|
|
|
|
|
|
2, 0, 315, 334, 336, 335, 338, 3, |
|
16139
|
|
|
|
|
|
|
292, 4, 5, 6, 288, 290, 289, 8, |
|
16140
|
|
|
|
|
|
|
7, 275, 9, 10, 367, 11, 368, 368, |
|
16141
|
|
|
|
|
|
|
12, 13, 14, 15, 16, 17, 18, 19, |
|
16142
|
|
|
|
|
|
|
20, 21, 22, 23, 24, 25, 26, 27, |
|
16143
|
|
|
|
|
|
|
28, 29, 30, 31, 63, 101, 120, 130, |
|
16144
|
|
|
|
|
|
|
131, 132, 134, 153, 156, 171, 271, 11, |
|
16145
|
|
|
|
|
|
|
368, 32, 33, 34, 35, 36, 37, 38, |
|
16146
|
|
|
|
|
|
|
39, 40, 41, 42, 43, 44, 45, 46, |
|
16147
|
|
|
|
|
|
|
47, 48, 49, 50, 51, 52, 53, 54, |
|
16148
|
|
|
|
|
|
|
55, 56, 57, 58, 59, 60, 61, 62, |
|
16149
|
|
|
|
|
|
|
64, 65, 66, 67, 68, 69, 70, 71, |
|
16150
|
|
|
|
|
|
|
72, 73, 74, 75, 76, 77, 78, 79, |
|
16151
|
|
|
|
|
|
|
80, 81, 82, 83, 84, 85, 86, 87, |
|
16152
|
|
|
|
|
|
|
88, 89, 90, 91, 92, 93, 94, 95, |
|
16153
|
|
|
|
|
|
|
96, 97, 98, 99, 100, 102, 103, 104, |
|
16154
|
|
|
|
|
|
|
105, 106, 107, 108, 109, 110, 111, 112, |
|
16155
|
|
|
|
|
|
|
113, 114, 115, 116, 117, 118, 119, 121, |
|
16156
|
|
|
|
|
|
|
122, 123, 124, 125, 126, 127, 128, 129, |
|
16157
|
|
|
|
|
|
|
133, 135, 136, 137, 138, 139, 140, 141, |
|
16158
|
|
|
|
|
|
|
142, 143, 144, 145, 146, 147, 148, 149, |
|
16159
|
|
|
|
|
|
|
150, 151, 152, 154, 155, 157, 158, 159, |
|
16160
|
|
|
|
|
|
|
160, 161, 162, 163, 164, 165, 166, 167, |
|
16161
|
|
|
|
|
|
|
168, 169, 170, 172, 204, 221, 224, 225, |
|
16162
|
|
|
|
|
|
|
227, 234, 238, 257, 262, 263, 265, 267, |
|
16163
|
|
|
|
|
|
|
269, 173, 174, 175, 176, 177, 178, 179, |
|
16164
|
|
|
|
|
|
|
180, 181, 182, 183, 184, 185, 186, 187, |
|
16165
|
|
|
|
|
|
|
188, 189, 190, 191, 192, 193, 194, 195, |
|
16166
|
|
|
|
|
|
|
196, 197, 198, 199, 200, 201, 202, 203, |
|
16167
|
|
|
|
|
|
|
205, 206, 207, 208, 209, 210, 211, 212, |
|
16168
|
|
|
|
|
|
|
213, 214, 215, 216, 217, 218, 219, 220, |
|
16169
|
|
|
|
|
|
|
222, 223, 226, 228, 229, 230, 231, 232, |
|
16170
|
|
|
|
|
|
|
233, 235, 236, 237, 239, 240, 241, 242, |
|
16171
|
|
|
|
|
|
|
243, 244, 245, 246, 247, 248, 249, 250, |
|
16172
|
|
|
|
|
|
|
251, 252, 253, 254, 255, 256, 258, 259, |
|
16173
|
|
|
|
|
|
|
260, 261, 264, 266, 268, 270, 272, 273, |
|
16174
|
|
|
|
|
|
|
369, 276, 284, 286, 285, 277, 278, 280, |
|
16175
|
|
|
|
|
|
|
282, 281, 279, 370, 371, 373, 372, 283, |
|
16176
|
|
|
|
|
|
|
287, 291, 293, 311, 313, 312, 295, 294, |
|
16177
|
|
|
|
|
|
|
298, 296, 297, 375, 299, 307, 309, 308, |
|
16178
|
|
|
|
|
|
|
300, 301, 303, 305, 304, 302, 376, 377, |
|
16179
|
|
|
|
|
|
|
379, 378, 306, 310, 314, 316, 321, 317, |
|
16180
|
|
|
|
|
|
|
318, 319, 381, 382, 322, 330, 332, 331, |
|
16181
|
|
|
|
|
|
|
323, 324, 326, 328, 327, 325, 383, 384, |
|
16182
|
|
|
|
|
|
|
386, 385, 329, 333, 337, 339, 340, 341, |
|
16183
|
|
|
|
|
|
|
342, 343, 344, 345, 363, 365, 364, 347, |
|
16184
|
|
|
|
|
|
|
346, 350, 348, 349, 388, 351, 359, 361, |
|
16185
|
|
|
|
|
|
|
360, 352, 353, 355, 357, 356, 354, 389, |
|
16186
|
|
|
|
|
|
|
390, 392, 391, 358, 362, 366, 274, 374, |
|
16187
|
|
|
|
|
|
|
380, 320, 387, 393 |
|
16188
|
|
|
|
|
|
|
}; |
|
16189
|
|
|
|
|
|
|
|
|
16190
|
|
|
|
|
|
|
static const char _url_detector_trans_actions[] = { |
|
16191
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16192
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16193
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 0, 1, 8, |
|
16194
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16195
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16196
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16197
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 3, |
|
16198
|
|
|
|
|
|
|
11, 0, 0, 0, 0, 0, 0, 0, |
|
16199
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16200
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16201
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16202
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16203
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16204
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16205
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16206
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16207
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16208
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16209
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16210
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16211
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16212
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16213
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16214
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16215
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16216
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16217
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16218
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16219
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16220
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16221
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16222
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16223
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16224
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16225
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16226
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16227
|
|
|
|
|
|
|
1, 0, 0, 0, 0, 0, 0, 0, |
|
16228
|
|
|
|
|
|
|
0, 0, 0, 1, 1, 1, 1, 0, |
|
16229
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16230
|
|
|
|
|
|
|
0, 0, 0, 5, 0, 0, 0, 0, |
|
16231
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 1, 1, |
|
16232
|
|
|
|
|
|
|
1, 1, 0, 0, 0, 0, 0, 0, |
|
16233
|
|
|
|
|
|
|
0, 0, 1, 1, 0, 0, 0, 0, |
|
16234
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 1, 1, |
|
16235
|
|
|
|
|
|
|
1, 1, 0, 0, 0, 0, 0, 0, |
|
16236
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
|
16237
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 0, 0, 0, |
|
16238
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 1, |
|
16239
|
|
|
|
|
|
|
1, 1, 1, 0, 0, 0, 0, 1, |
|
16240
|
|
|
|
|
|
|
1, 0, 1, 1 |
|
16241
|
|
|
|
|
|
|
}; |
|
16242
|
|
|
|
|
|
|
|
|
16243
|
|
|
|
|
|
|
static const int url_detector_start = 1; |
|
16244
|
|
|
|
|
|
|
|
|
16245
|
14
|
|
|
|
|
|
url_detector::url_type url_detector::detect(string_piece str, size_t* length) { |
|
16246
|
|
|
|
|
|
|
int cs; |
|
16247
|
|
|
|
|
|
|
const char* p = str.str; |
|
16248
|
|
|
|
|
|
|
|
|
16249
|
|
|
|
|
|
|
url_type result = NO_URL; |
|
16250
|
|
|
|
|
|
|
size_t result_length = 0, parens = 0; |
|
16251
|
|
|
|
|
|
|
|
|
16252
|
|
|
|
|
|
|
{ |
|
16253
|
|
|
|
|
|
|
cs = url_detector_start; |
|
16254
|
|
|
|
|
|
|
} |
|
16255
|
|
|
|
|
|
|
|
|
16256
|
|
|
|
|
|
|
{ |
|
16257
|
|
|
|
|
|
|
int _klen; |
|
16258
|
|
|
|
|
|
|
unsigned int _trans; |
|
16259
|
|
|
|
|
|
|
short _widec; |
|
16260
|
|
|
|
|
|
|
const char *_acts; |
|
16261
|
|
|
|
|
|
|
unsigned int _nacts; |
|
16262
|
|
|
|
|
|
|
const short *_keys; |
|
16263
|
|
|
|
|
|
|
|
|
16264
|
14
|
50
|
|
|
|
|
if ( p == ( (str.str + str.len)) ) |
|
16265
|
|
|
|
|
|
|
goto _test_eof; |
|
16266
|
|
|
|
|
|
|
if ( cs == 0 ) |
|
16267
|
|
|
|
|
|
|
goto _out; |
|
16268
|
|
|
|
|
|
|
_resume: |
|
16269
|
38
|
|
|
|
|
|
_widec = (*p); |
|
16270
|
38
|
|
|
|
|
|
_klen = _url_detector_cond_lengths[cs]; |
|
16271
|
38
|
|
|
|
|
|
_keys = _url_detector_cond_keys + (_url_detector_cond_offsets[cs]*2); |
|
16272
|
38
|
50
|
|
|
|
|
if ( _klen > 0 ) { |
|
16273
|
|
|
|
|
|
|
const short *_lower = _keys; |
|
16274
|
|
|
|
|
|
|
const short *_mid; |
|
16275
|
0
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
|
16276
|
|
|
|
|
|
|
while (1) { |
|
16277
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
|
16278
|
|
|
|
|
|
|
break; |
|
16279
|
|
|
|
|
|
|
|
|
16280
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
|
16281
|
0
|
0
|
|
|
|
|
if ( _widec < _mid[0] ) |
|
16282
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
|
16283
|
0
|
0
|
|
|
|
|
else if ( _widec > _mid[1] ) |
|
16284
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
|
16285
|
|
|
|
|
|
|
else { |
|
16286
|
0
|
0
|
|
|
|
|
switch ( _url_detector_cond_spaces[_url_detector_cond_offsets[cs] + ((_mid - _keys)>>1)] ) { |
|
16287
|
|
|
|
|
|
|
case 0: { |
|
16288
|
0
|
|
|
|
|
|
_widec = (short)(256u + ((*p) - 0u)); |
|
16289
|
0
|
0
|
|
|
|
|
if ( |
|
16290
|
0
|
|
|
|
|
|
parens ) _widec += 256; |
|
16291
|
|
|
|
|
|
|
break; |
|
16292
|
|
|
|
|
|
|
} |
|
16293
|
|
|
|
|
|
|
} |
|
16294
|
|
|
|
|
|
|
break; |
|
16295
|
|
|
|
|
|
|
} |
|
16296
|
|
|
|
|
|
|
} |
|
16297
|
|
|
|
|
|
|
} |
|
16298
|
|
|
|
|
|
|
|
|
16299
|
38
|
|
|
|
|
|
_keys = _url_detector_trans_keys + _url_detector_key_offsets[cs]; |
|
16300
|
38
|
|
|
|
|
|
_trans = _url_detector_index_offsets[cs]; |
|
16301
|
|
|
|
|
|
|
|
|
16302
|
38
|
|
|
|
|
|
_klen = _url_detector_single_lengths[cs]; |
|
16303
|
38
|
50
|
|
|
|
|
if ( _klen > 0 ) { |
|
16304
|
|
|
|
|
|
|
const short *_lower = _keys; |
|
16305
|
|
|
|
|
|
|
const short *_mid; |
|
16306
|
142
|
|
|
|
|
|
const short *_upper = _keys + _klen - 1; |
|
16307
|
|
|
|
|
|
|
while (1) { |
|
16308
|
142
|
100
|
|
|
|
|
if ( _upper < _lower ) |
|
16309
|
|
|
|
|
|
|
break; |
|
16310
|
|
|
|
|
|
|
|
|
16311
|
104
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
|
16312
|
104
|
100
|
|
|
|
|
if ( _widec < *_mid ) |
|
16313
|
32
|
|
|
|
|
|
_upper = _mid - 1; |
|
16314
|
72
|
50
|
|
|
|
|
else if ( _widec > *_mid ) |
|
16315
|
72
|
|
|
|
|
|
_lower = _mid + 1; |
|
16316
|
|
|
|
|
|
|
else { |
|
16317
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
|
16318
|
0
|
|
|
|
|
|
goto _match; |
|
16319
|
|
|
|
|
|
|
} |
|
16320
|
|
|
|
|
|
|
} |
|
16321
|
38
|
|
|
|
|
|
_keys += _klen; |
|
16322
|
38
|
|
|
|
|
|
_trans += _klen; |
|
16323
|
|
|
|
|
|
|
} |
|
16324
|
|
|
|
|
|
|
|
|
16325
|
38
|
|
|
|
|
|
_klen = _url_detector_range_lengths[cs]; |
|
16326
|
38
|
50
|
|
|
|
|
if ( _klen > 0 ) { |
|
16327
|
|
|
|
|
|
|
const short *_lower = _keys; |
|
16328
|
|
|
|
|
|
|
const short *_mid; |
|
16329
|
110
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
|
16330
|
|
|
|
|
|
|
while (1) { |
|
16331
|
110
|
100
|
|
|
|
|
if ( _upper < _lower ) |
|
16332
|
|
|
|
|
|
|
break; |
|
16333
|
|
|
|
|
|
|
|
|
16334
|
100
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
|
16335
|
100
|
100
|
|
|
|
|
if ( _widec < _mid[0] ) |
|
16336
|
24
|
|
|
|
|
|
_upper = _mid - 2; |
|
16337
|
76
|
100
|
|
|
|
|
else if ( _widec > _mid[1] ) |
|
16338
|
48
|
|
|
|
|
|
_lower = _mid + 2; |
|
16339
|
|
|
|
|
|
|
else { |
|
16340
|
28
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
|
16341
|
28
|
|
|
|
|
|
goto _match; |
|
16342
|
|
|
|
|
|
|
} |
|
16343
|
|
|
|
|
|
|
} |
|
16344
|
10
|
|
|
|
|
|
_trans += _klen; |
|
16345
|
|
|
|
|
|
|
} |
|
16346
|
|
|
|
|
|
|
|
|
16347
|
|
|
|
|
|
|
_match: |
|
16348
|
38
|
|
|
|
|
|
_trans = _url_detector_indicies[_trans]; |
|
16349
|
38
|
|
|
|
|
|
cs = _url_detector_trans_targs[_trans]; |
|
16350
|
|
|
|
|
|
|
|
|
16351
|
38
|
50
|
|
|
|
|
if ( _url_detector_trans_actions[_trans] == 0 ) |
|
16352
|
|
|
|
|
|
|
goto _again; |
|
16353
|
|
|
|
|
|
|
|
|
16354
|
0
|
|
|
|
|
|
_acts = _url_detector_actions + _url_detector_trans_actions[_trans]; |
|
16355
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
|
16356
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
|
16357
|
|
|
|
|
|
|
{ |
|
16358
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
|
16359
|
|
|
|
|
|
|
{ |
|
16360
|
|
|
|
|
|
|
case 0: |
|
16361
|
0
|
|
|
|
|
|
{ result = URL; result_length = p - str.str + 1; } |
|
16362
|
0
|
|
|
|
|
|
break; |
|
16363
|
|
|
|
|
|
|
case 1: |
|
16364
|
0
|
|
|
|
|
|
{ result = EMAIL; result_length = p - str.str + 1; } |
|
16365
|
0
|
|
|
|
|
|
break; |
|
16366
|
|
|
|
|
|
|
case 2: |
|
16367
|
0
|
|
|
|
|
|
{parens++;} |
|
16368
|
0
|
|
|
|
|
|
break; |
|
16369
|
|
|
|
|
|
|
case 3: |
|
16370
|
0
|
|
|
|
|
|
{parens-=!!parens;} |
|
16371
|
0
|
|
|
|
|
|
break; |
|
16372
|
|
|
|
|
|
|
} |
|
16373
|
|
|
|
|
|
|
} |
|
16374
|
|
|
|
|
|
|
|
|
16375
|
|
|
|
|
|
|
_again: |
|
16376
|
38
|
100
|
|
|
|
|
if ( cs == 0 ) |
|
16377
|
|
|
|
|
|
|
goto _out; |
|
16378
|
28
|
100
|
|
|
|
|
if ( ++p != ( (str.str + str.len)) ) |
|
16379
|
|
|
|
|
|
|
goto _resume; |
|
16380
|
|
|
|
|
|
|
_test_eof: {} |
|
16381
|
|
|
|
|
|
|
_out: {} |
|
16382
|
|
|
|
|
|
|
} |
|
16383
|
|
|
|
|
|
|
|
|
16384
|
|
|
|
|
|
|
// if (result == URL) { |
|
16385
|
|
|
|
|
|
|
// // Ignore the last character if it is one of [.!',;?:)], |
|
16386
|
|
|
|
|
|
|
// // and the ) only if it is unmatched. |
|
16387
|
|
|
|
|
|
|
// size_t unmatched_parens = 0; |
|
16388
|
|
|
|
|
|
|
// bool unmatched_parens_computed = false; |
|
16389
|
|
|
|
|
|
|
// do { |
|
16390
|
|
|
|
|
|
|
// if (str.str[result_length-1] == ')' && !unmatched_parens_computed) { |
|
16391
|
|
|
|
|
|
|
// for (size_t i = |
|
16392
|
|
|
|
|
|
|
// |
|
16393
|
|
|
|
|
|
|
// |
|
16394
|
|
|
|
|
|
|
// unmatched_parens_computed = true; |
|
16395
|
|
|
|
|
|
|
// } |
|
16396
|
|
|
|
|
|
|
// } while (); |
|
16397
|
|
|
|
|
|
|
// while (result_length) { |
|
16398
|
|
|
|
|
|
|
// |
|
16399
|
|
|
|
|
|
|
// |
|
16400
|
|
|
|
|
|
|
// } |
|
16401
|
|
|
|
|
|
|
// |
|
16402
|
|
|
|
|
|
|
// if (str.str[result_length-1] == '.' || |
|
16403
|
|
|
|
|
|
|
// |
|
16404
|
|
|
|
|
|
|
// // Also ignore ) is ignored only if there is not a matching left one. |
|
16405
|
|
|
|
|
|
|
// } |
|
16406
|
|
|
|
|
|
|
// |
|
16407
|
14
|
50
|
|
|
|
|
if (length) *length = result_length; |
|
16408
|
14
|
50
|
|
|
|
|
return length || result_length == str.len ? result : NO_URL; |
|
|
|
50
|
|
|
|
|
|
|
16409
|
|
|
|
|
|
|
} |
|
16410
|
|
|
|
|
|
|
|
|
16411
|
|
|
|
|
|
|
} // namespace utils |
|
16412
|
|
|
|
|
|
|
|
|
16413
|
|
|
|
|
|
|
///////// |
|
16414
|
|
|
|
|
|
|
// File: version/version.h |
|
16415
|
|
|
|
|
|
|
///////// |
|
16416
|
|
|
|
|
|
|
|
|
16417
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
16418
|
|
|
|
|
|
|
// |
|
16419
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
16420
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
16421
|
|
|
|
|
|
|
// |
|
16422
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
16423
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
16424
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
16425
|
|
|
|
|
|
|
|
|
16426
|
0
|
|
|
|
|
|
class version { |
|
16427
|
|
|
|
|
|
|
public: |
|
16428
|
|
|
|
|
|
|
unsigned major; |
|
16429
|
|
|
|
|
|
|
unsigned minor; |
|
16430
|
|
|
|
|
|
|
unsigned patch; |
|
16431
|
|
|
|
|
|
|
string prerelease; |
|
16432
|
|
|
|
|
|
|
|
|
16433
|
|
|
|
|
|
|
// Returns current NameTag version. |
|
16434
|
|
|
|
|
|
|
static version current(); |
|
16435
|
|
|
|
|
|
|
|
|
16436
|
|
|
|
|
|
|
// Returns multi-line formated version and copyright string. |
|
16437
|
|
|
|
|
|
|
static string version_and_copyright(const string& other_libraries = string()); |
|
16438
|
|
|
|
|
|
|
}; |
|
16439
|
|
|
|
|
|
|
|
|
16440
|
|
|
|
|
|
|
///////// |
|
16441
|
|
|
|
|
|
|
// File: version/version.cpp |
|
16442
|
|
|
|
|
|
|
///////// |
|
16443
|
|
|
|
|
|
|
|
|
16444
|
|
|
|
|
|
|
// This file is part of NameTag . |
|
16445
|
|
|
|
|
|
|
// |
|
16446
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
|
16447
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
|
16448
|
|
|
|
|
|
|
// |
|
16449
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
|
16450
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
|
16451
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
|
16452
|
|
|
|
|
|
|
|
|
16453
|
0
|
|
|
|
|
|
version version::current() { |
|
16454
|
0
|
0
|
|
|
|
|
return {1, 2, 0, ""}; |
|
16455
|
|
|
|
|
|
|
} |
|
16456
|
|
|
|
|
|
|
|
|
16457
|
|
|
|
|
|
|
// Returns multi-line formated version and copyright string. |
|
16458
|
0
|
|
|
|
|
|
string version::version_and_copyright(const string& other_libraries) { |
|
16459
|
0
|
|
|
|
|
|
ostringstream info; |
|
16460
|
|
|
|
|
|
|
|
|
16461
|
|
|
|
|
|
|
auto nametag = version::current(); |
|
16462
|
|
|
|
|
|
|
auto unilib = unilib::version::current(); |
|
16463
|
|
|
|
|
|
|
auto morphodita = morphodita::version::current(); |
|
16464
|
|
|
|
|
|
|
|
|
16465
|
0
|
|
|
|
|
|
info << "NameTag version " << nametag.major << '.' << nametag.minor << '.' << nametag.patch |
|
16466
|
0
|
0
|
|
|
|
|
<< (nametag.prerelease.empty() ? "" : "-") << nametag.prerelease |
|
|
|
0
|
|
|
|
|
|
|
16467
|
0
|
|
|
|
|
|
<< " (using UniLib " << unilib.major << '.' << unilib.minor << '.' << unilib.patch |
|
16468
|
0
|
0
|
|
|
|
|
<< (unilib.prerelease.empty() ? "" : "-") << unilib.prerelease |
|
|
|
0
|
|
|
|
|
|
|
16469
|
0
|
|
|
|
|
|
<< ", MorphoDiTa " << morphodita.major << '.' << morphodita.minor << '.' << unilib.patch |
|
16470
|
0
|
0
|
|
|
|
|
<< (morphodita.prerelease.empty() ? "" : "-") << morphodita.prerelease |
|
|
|
0
|
|
|
|
|
|
|
16471
|
0
|
0
|
|
|
|
|
<< (other_libraries.empty() ? "" : "\nand ") << other_libraries << ")\n" |
|
|
|
0
|
|
|
|
|
|
|
16472
|
|
|
|
|
|
|
"Copyright 2016 by Institute of Formal and Applied Linguistics, Faculty of\n" |
|
16473
|
0
|
0
|
|
|
|
|
"Mathematics and Physics, Charles University in Prague, Czech Republic."; |
|
16474
|
|
|
|
|
|
|
|
|
16475
|
0
|
|
|
|
|
|
return info.str(); |
|
16476
|
|
|
|
|
|
|
} |
|
16477
|
|
|
|
|
|
|
|
|
16478
|
|
|
|
|
|
|
} // namespace nametag |
|
16479
|
12
|
50
|
|
|
|
|
} // namespace ufal |
|
|
|
50
|
|
|
|
|
|