line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
2
|
|
|
|
|
|
|
// |
3
|
|
|
|
|
|
|
// This file is a bundle of all sources and headers of MorphoDiTa library. |
4
|
|
|
|
|
|
|
// Comments and copyrights of all individual files are kept. |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
#include |
7
|
|
|
|
|
|
|
#include |
8
|
|
|
|
|
|
|
#include |
9
|
|
|
|
|
|
|
#include |
10
|
|
|
|
|
|
|
#include |
11
|
|
|
|
|
|
|
#include |
12
|
|
|
|
|
|
|
#include |
13
|
|
|
|
|
|
|
#include |
14
|
|
|
|
|
|
|
#include |
15
|
|
|
|
|
|
|
#include |
16
|
|
|
|
|
|
|
#include |
17
|
|
|
|
|
|
|
#include |
18
|
|
|
|
|
|
|
#include |
19
|
|
|
|
|
|
|
#include |
20
|
|
|
|
|
|
|
#include |
21
|
|
|
|
|
|
|
#include |
22
|
|
|
|
|
|
|
#include |
23
|
|
|
|
|
|
|
#include |
24
|
|
|
|
|
|
|
#include |
25
|
|
|
|
|
|
|
#include |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
namespace ufal { |
28
|
|
|
|
|
|
|
namespace morphodita { |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
///////// |
31
|
|
|
|
|
|
|
// File: utils/common.h |
32
|
|
|
|
|
|
|
///////// |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
35
|
|
|
|
|
|
|
// |
36
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
37
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
38
|
|
|
|
|
|
|
// |
39
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
40
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
41
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
// Headers available in all sources |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
namespace utils { |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
using namespace std; |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
// Assert that int is at least 4B |
50
|
|
|
|
|
|
|
static_assert(sizeof(int) >= sizeof(int32_t), "Int must be at least 4B wide!"); |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
// Assert that we are on a little endian system |
53
|
|
|
|
|
|
|
#ifdef __BYTE_ORDER__ |
54
|
|
|
|
|
|
|
static_assert(__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__, "Only little endian systems are supported!"); |
55
|
|
|
|
|
|
|
#endif |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
#define runtime_failure(message) exit((cerr << message << endl, 1)) |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
} // namespace utils |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
///////// |
62
|
|
|
|
|
|
|
// File: common.h |
63
|
|
|
|
|
|
|
///////// |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
66
|
|
|
|
|
|
|
// |
67
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
68
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
69
|
|
|
|
|
|
|
// |
70
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
71
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
72
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
using namespace utils; |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
///////// |
77
|
|
|
|
|
|
|
// File: utils/string_piece.h |
78
|
|
|
|
|
|
|
///////// |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
81
|
|
|
|
|
|
|
// |
82
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
83
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
84
|
|
|
|
|
|
|
// |
85
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
86
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
87
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
namespace utils { |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
struct string_piece { |
92
|
|
|
|
|
|
|
const char* str; |
93
|
|
|
|
|
|
|
size_t len; |
94
|
|
|
|
|
|
|
|
95
|
3
|
|
|
|
|
|
string_piece() : str(nullptr), len(0) {} |
96
|
|
|
|
|
|
|
string_piece(const char* str) : str(str), len(strlen(str)) {} |
97
|
32
|
|
|
|
|
|
string_piece(const char* str, size_t len) : str(str), len(len) {} |
98
|
2
|
|
|
|
|
|
string_piece(const string& str) : str(str.c_str()), len(str.size()) {} |
99
|
|
|
|
|
|
|
}; |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
inline ostream& operator<<(ostream& os, const string_piece& str) { |
102
|
0
|
|
|
|
|
|
return os.write(str.str, str.len); |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
inline bool operator==(const string_piece& a, const string_piece& b) { |
106
|
0
|
0
|
|
|
|
|
return a.len == b.len && memcmp(a.str, b.str, a.len) == 0; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
inline bool operator!=(const string_piece& a, const string_piece& b) { |
110
|
|
|
|
|
|
|
return a.len != b.len || memcmp(a.str, b.str, a.len) != 0; |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
} // namespace utils |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
///////// |
116
|
|
|
|
|
|
|
// File: derivator/derivator.h |
117
|
|
|
|
|
|
|
///////// |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
120
|
|
|
|
|
|
|
// |
121
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
122
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
123
|
|
|
|
|
|
|
// |
124
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
125
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
126
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
127
|
|
|
|
|
|
|
|
128
|
0
|
|
|
|
|
|
struct derivated_lemma { |
129
|
|
|
|
|
|
|
string lemma; |
130
|
|
|
|
|
|
|
}; |
131
|
|
|
|
|
|
|
|
132
|
0
|
|
|
|
|
|
class derivator { |
133
|
|
|
|
|
|
|
public: |
134
|
0
|
|
|
|
|
|
virtual ~derivator() {} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
// For given lemma, return the parent in the derivation graph. |
137
|
|
|
|
|
|
|
// The lemma is assumed to be lemma id and any lemma comments are ignored. |
138
|
|
|
|
|
|
|
virtual bool parent(string_piece lemma, derivated_lemma& parent) const = 0; |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
// For given lemma, return the children in the derivation graph. |
141
|
|
|
|
|
|
|
// The lemma is assumed to be lemma id and any lemma comments are ignored. |
142
|
|
|
|
|
|
|
virtual bool children(string_piece lemma, vector& children) const = 0; |
143
|
|
|
|
|
|
|
}; |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
///////// |
146
|
|
|
|
|
|
|
// File: tokenizer/tokenizer.h |
147
|
|
|
|
|
|
|
///////// |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
150
|
|
|
|
|
|
|
// |
151
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
152
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
153
|
|
|
|
|
|
|
// |
154
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
155
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
156
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
// Range of a token, measured in Unicode characters, not UTF8 bytes. |
159
|
|
|
|
|
|
|
struct token_range { |
160
|
|
|
|
|
|
|
size_t start; |
161
|
|
|
|
|
|
|
size_t length; |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
token_range() {} |
164
|
32
|
|
|
|
|
|
token_range(size_t start, size_t length) : start(start), length(length) {} |
165
|
|
|
|
|
|
|
}; |
166
|
|
|
|
|
|
|
|
167
|
3
|
|
|
|
|
|
class tokenizer { |
168
|
|
|
|
|
|
|
public: |
169
|
3
|
|
|
|
|
|
virtual ~tokenizer() {} |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
virtual void set_text(string_piece text, bool make_copy = false) = 0; |
172
|
|
|
|
|
|
|
virtual bool next_sentence(vector* forms, vector* tokens) = 0; |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
// Static factory methods |
175
|
|
|
|
|
|
|
static tokenizer* new_vertical_tokenizer(); |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
static tokenizer* new_czech_tokenizer(); |
178
|
|
|
|
|
|
|
static tokenizer* new_english_tokenizer(); |
179
|
|
|
|
|
|
|
static tokenizer* new_generic_tokenizer(); |
180
|
|
|
|
|
|
|
}; |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
///////// |
183
|
|
|
|
|
|
|
// File: morpho/morpho.h |
184
|
|
|
|
|
|
|
///////// |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
187
|
|
|
|
|
|
|
// |
188
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
189
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
190
|
|
|
|
|
|
|
// |
191
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
192
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
193
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
194
|
|
|
|
|
|
|
|
195
|
46
|
|
|
|
|
|
struct tagged_form { |
196
|
|
|
|
|
|
|
string form; |
197
|
|
|
|
|
|
|
string tag; |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
tagged_form() {} |
200
|
23
|
|
|
|
|
|
tagged_form(const string& form, const string& tag) : form(form), tag(tag) {} |
201
|
|
|
|
|
|
|
}; |
202
|
|
|
|
|
|
|
|
203
|
44
|
|
|
|
|
|
struct tagged_lemma { |
204
|
|
|
|
|
|
|
string lemma; |
205
|
|
|
|
|
|
|
string tag; |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
tagged_lemma() {} |
208
|
24
|
|
|
|
|
|
tagged_lemma(const string& lemma, const string& tag) : lemma(lemma), tag(tag) {} |
209
|
|
|
|
|
|
|
}; |
210
|
|
|
|
|
|
|
|
211
|
0
|
|
|
|
|
|
struct tagged_lemma_forms { |
212
|
|
|
|
|
|
|
string lemma; |
213
|
|
|
|
|
|
|
vector forms; |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
tagged_lemma_forms() {} |
216
|
4
|
|
|
|
|
|
tagged_lemma_forms(const string& lemma) : lemma(lemma) {} |
217
|
|
|
|
|
|
|
}; |
218
|
|
|
|
|
|
|
|
219
|
2
|
|
|
|
|
|
class morpho { |
220
|
|
|
|
|
|
|
public: |
221
|
0
|
|
|
|
|
|
virtual ~morpho() {} |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
static morpho* load(istream& is); |
224
|
|
|
|
|
|
|
static morpho* load(const char* fname); |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
enum guesser_mode { NO_GUESSER = 0, GUESSER = 1, GUESSER_UNSPECIFIED = -1 }; |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
// Perform morphologic analysis of a form. The form is given by a pointer and |
229
|
|
|
|
|
|
|
// length and therefore does not need to be '\0' terminated. The guesser |
230
|
|
|
|
|
|
|
// parameter specifies whether a guesser can be used if the form is not found |
231
|
|
|
|
|
|
|
// in the dictionary. Output is assigned to the lemmas vector. |
232
|
|
|
|
|
|
|
// |
233
|
|
|
|
|
|
|
// If the form is found in the dictionary, analyses are assigned to lemmas |
234
|
|
|
|
|
|
|
// and NO_GUESSER returned. If guesser == GUESSER and the form analyses are |
235
|
|
|
|
|
|
|
// found using a guesser, they are assigned to lemmas and GUESSER is |
236
|
|
|
|
|
|
|
// returned. Otherwise <0 is returned and lemmas are filled with one |
237
|
|
|
|
|
|
|
// analysis containing given form as lemma and a tag for unknown word. |
238
|
|
|
|
|
|
|
virtual int analyze(string_piece form, guesser_mode guesser, vector& lemmas) const = 0; |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
// Perform morphologic generation of a lemma. The lemma is given by a pointer |
241
|
|
|
|
|
|
|
// and length and therefore does not need to be '\0' terminated. Optionally |
242
|
|
|
|
|
|
|
// a tag_wildcard can be specified (or be NULL) and if so, results are |
243
|
|
|
|
|
|
|
// filtered using this wildcard. The guesser parameter speficies whether |
244
|
|
|
|
|
|
|
// a guesser can be used if the lemma is not found in the dictionary. Output |
245
|
|
|
|
|
|
|
// is assigned to the forms vector. |
246
|
|
|
|
|
|
|
// |
247
|
|
|
|
|
|
|
// Tag_wildcard can be either NULL or a wildcard applied to the results. |
248
|
|
|
|
|
|
|
// A ? in the wildcard matches any character, [bytes] matches any of the |
249
|
|
|
|
|
|
|
// bytes and [^bytes] matches any byte different from the specified ones. |
250
|
|
|
|
|
|
|
// A - has no special meaning inside the bytes and if ] is first in bytes, it |
251
|
|
|
|
|
|
|
// does not end the bytes group. |
252
|
|
|
|
|
|
|
// |
253
|
|
|
|
|
|
|
// If the given lemma is only a raw lemma, all lemma ids with this raw lemma |
254
|
|
|
|
|
|
|
// are returned. Otherwise only matching lemma ids are returned, ignoring any |
255
|
|
|
|
|
|
|
// lemma comments. For every found lemma, matching forms are filtered using |
256
|
|
|
|
|
|
|
// the tag_wildcard. If at least one lemma is found in the dictionary, |
257
|
|
|
|
|
|
|
// NO_GUESSER is returned. If guesser == GUESSER and the lemma is found by |
258
|
|
|
|
|
|
|
// the guesser, GUESSER is returned. Otherwise, forms are cleared and <0 is |
259
|
|
|
|
|
|
|
// returned. |
260
|
|
|
|
|
|
|
virtual int generate(string_piece lemma, const char* tag_wildcard, guesser_mode guesser, vector& forms) const = 0; |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
// Rawlemma and lemma id identification |
263
|
|
|
|
|
|
|
virtual int raw_lemma_len(string_piece lemma) const = 0; |
264
|
|
|
|
|
|
|
virtual int lemma_id_len(string_piece lemma) const = 0; |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
// Rawform identification |
267
|
|
|
|
|
|
|
virtual int raw_form_len(string_piece form) const = 0; |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
// Construct a new tokenizer instance appropriate for this morphology. |
270
|
|
|
|
|
|
|
// Can return NULL if no such tokenizer exists. |
271
|
|
|
|
|
|
|
virtual tokenizer* new_tokenizer() const = 0; |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
// Return a derivator for this morphology, or NULL if it does not exist. |
274
|
|
|
|
|
|
|
// The returned instance is owned by the morphology and should not be deleted. |
275
|
|
|
|
|
|
|
virtual const derivator* get_derivator() const; |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
protected: |
278
|
|
|
|
|
|
|
unique_ptr derinet; |
279
|
|
|
|
|
|
|
}; |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
///////// |
282
|
|
|
|
|
|
|
// File: tagset_converter/tagset_converter.h |
283
|
|
|
|
|
|
|
///////// |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
286
|
|
|
|
|
|
|
// |
287
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
288
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
289
|
|
|
|
|
|
|
// |
290
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
291
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
292
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
293
|
|
|
|
|
|
|
|
294
|
0
|
|
|
|
|
|
class tagset_converter { |
295
|
|
|
|
|
|
|
public: |
296
|
0
|
|
|
|
|
|
virtual ~tagset_converter() {} |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
// Convert a tag-lemma pair to a different tag set. |
299
|
|
|
|
|
|
|
virtual void convert(tagged_lemma& tagged_lemma) const = 0; |
300
|
|
|
|
|
|
|
// Convert a result of analysis to a different tag set. Apart from calling |
301
|
|
|
|
|
|
|
// convert, any repeated entry is removed. |
302
|
|
|
|
|
|
|
virtual void convert_analyzed(vector& tagged_lemmas) const = 0; |
303
|
|
|
|
|
|
|
// Convert a result of generation to a different tag set. Apart from calling |
304
|
|
|
|
|
|
|
// convert, any repeated entry is removed. |
305
|
|
|
|
|
|
|
virtual void convert_generated(vector& forms) const = 0; |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
// Static factory methods |
308
|
|
|
|
|
|
|
static tagset_converter* new_identity_converter(); |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
static tagset_converter* new_pdt_to_conll2009_converter(); |
311
|
|
|
|
|
|
|
static tagset_converter* new_strip_lemma_comment_converter(const morpho& dictionary); |
312
|
|
|
|
|
|
|
static tagset_converter* new_strip_lemma_id_converter(const morpho& dictionary); |
313
|
|
|
|
|
|
|
}; |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
// Helper method for creating tagset_converter from instance name. |
316
|
|
|
|
|
|
|
tagset_converter* new_tagset_converter(const string& name, const morpho& dictionary); |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
// Helper methods making sure remapped results are unique. |
319
|
|
|
|
|
|
|
void tagset_converter_unique_analyzed(vector& tagged_lemmas); |
320
|
|
|
|
|
|
|
void tagset_converter_unique_generated(vector& forms); |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
///////// |
323
|
|
|
|
|
|
|
// File: derivator/derivation_formatter.h |
324
|
|
|
|
|
|
|
///////// |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
327
|
|
|
|
|
|
|
// |
328
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
329
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
330
|
|
|
|
|
|
|
// |
331
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
332
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
333
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
334
|
|
|
|
|
|
|
|
335
|
0
|
|
|
|
|
|
class derivation_formatter { |
336
|
|
|
|
|
|
|
public: |
337
|
0
|
|
|
|
|
|
virtual ~derivation_formatter() {} |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
// Perform the required derivation and store it directly in the lemma. |
340
|
|
|
|
|
|
|
virtual void format_derivation(string& lemma) const; |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
// Perform the required derivation and store it directly in the tagged_lemma. |
343
|
|
|
|
|
|
|
// If a tagset_converter is given, it is also applied. |
344
|
|
|
|
|
|
|
virtual void format_tagged_lemma(tagged_lemma& lemma, const tagset_converter* converter = nullptr) const = 0; |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
// Perform the required derivation on a list of tagged_lemmas. |
347
|
|
|
|
|
|
|
// If a tagset_converter is given, it is also applied. |
348
|
|
|
|
|
|
|
// Either way, only unique entries are returned. |
349
|
|
|
|
|
|
|
virtual void format_tagged_lemmas(vector& lemmas, const tagset_converter* converter = nullptr) const; |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
// Static factory methods. |
352
|
|
|
|
|
|
|
static derivation_formatter* new_none_derivation_formatter(); |
353
|
|
|
|
|
|
|
static derivation_formatter* new_root_derivation_formatter(const derivator* derinet); |
354
|
|
|
|
|
|
|
static derivation_formatter* new_path_derivation_formatter(const derivator* derinet); |
355
|
|
|
|
|
|
|
static derivation_formatter* new_tree_derivation_formatter(const derivator* derinet); |
356
|
|
|
|
|
|
|
// String version of static factory method. |
357
|
|
|
|
|
|
|
static derivation_formatter* new_derivation_formatter(string_piece name, const derivator* derinet); |
358
|
|
|
|
|
|
|
}; |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
///////// |
361
|
|
|
|
|
|
|
// File: derivator/derivation_formatter.cpp |
362
|
|
|
|
|
|
|
///////// |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
365
|
|
|
|
|
|
|
// |
366
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
367
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
368
|
|
|
|
|
|
|
// |
369
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
370
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
371
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
372
|
|
|
|
|
|
|
|
373
|
0
|
|
|
|
|
|
void derivation_formatter::format_derivation(string& lemma) const { |
374
|
0
|
|
|
|
|
|
tagged_lemma result; |
375
|
0
|
|
|
|
|
|
result.lemma.swap(lemma); |
376
|
0
|
0
|
|
|
|
|
format_tagged_lemma(result); |
377
|
0
|
|
|
|
|
|
lemma.swap(result.lemma); |
378
|
0
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
|
380
|
0
|
|
|
|
|
|
void derivation_formatter::format_tagged_lemmas(vector& lemmas, const tagset_converter* converter) const { |
381
|
0
|
0
|
|
|
|
|
for (auto&& lemma : lemmas) |
382
|
0
|
|
|
|
|
|
format_tagged_lemma(lemma, converter); |
383
|
|
|
|
|
|
|
|
384
|
0
|
0
|
|
|
|
|
if (lemmas.size() > 1) |
385
|
0
|
|
|
|
|
|
tagset_converter_unique_analyzed(lemmas); |
386
|
0
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
|
388
|
0
|
|
|
|
|
|
class none_derivation_formatter : public derivation_formatter { |
389
|
0
|
|
|
|
|
|
virtual void format_derivation(string& /*lemma*/) const override {} |
390
|
|
|
|
|
|
|
|
391
|
0
|
|
|
|
|
|
virtual void format_tagged_lemma(tagged_lemma& lemma, const tagset_converter* converter) const override { |
392
|
0
|
0
|
|
|
|
|
if (converter) converter->convert(lemma); |
393
|
0
|
|
|
|
|
|
} |
394
|
|
|
|
|
|
|
|
395
|
0
|
|
|
|
|
|
virtual void format_tagged_lemmas(vector& lemmas, const tagset_converter* converter) const override { |
396
|
0
|
0
|
|
|
|
|
if (converter) converter->convert_analyzed(lemmas); |
397
|
0
|
|
|
|
|
|
} |
398
|
|
|
|
|
|
|
}; |
399
|
|
|
|
|
|
|
|
400
|
0
|
|
|
|
|
|
derivation_formatter* derivation_formatter::new_none_derivation_formatter() { |
401
|
0
|
|
|
|
|
|
return new none_derivation_formatter(); |
402
|
|
|
|
|
|
|
} |
403
|
|
|
|
|
|
|
|
404
|
0
|
|
|
|
|
|
class root_derivation_formatter : public derivation_formatter { |
405
|
|
|
|
|
|
|
public: |
406
|
0
|
|
|
|
|
|
root_derivation_formatter(const derivator* derinet) : derinet(derinet) {} |
407
|
|
|
|
|
|
|
|
408
|
0
|
|
|
|
|
|
virtual void format_tagged_lemma(tagged_lemma& lemma, const tagset_converter* converter) const override { |
409
|
0
|
0
|
|
|
|
|
for (derivated_lemma parent; derinet->parent(lemma.lemma, parent); ) |
|
|
0
|
|
|
|
|
|
410
|
0
|
|
|
|
|
|
lemma.lemma.assign(parent.lemma); |
411
|
0
|
0
|
|
|
|
|
if (converter) converter->convert(lemma); |
412
|
0
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
private: |
415
|
|
|
|
|
|
|
const derivator* derinet; |
416
|
|
|
|
|
|
|
}; |
417
|
|
|
|
|
|
|
|
418
|
0
|
|
|
|
|
|
derivation_formatter* derivation_formatter::new_root_derivation_formatter(const derivator* derinet) { |
419
|
0
|
0
|
|
|
|
|
return derinet ? new root_derivation_formatter(derinet) : nullptr; |
|
|
0
|
|
|
|
|
|
420
|
|
|
|
|
|
|
} |
421
|
|
|
|
|
|
|
|
422
|
0
|
|
|
|
|
|
class path_derivation_formatter : public derivation_formatter { |
423
|
|
|
|
|
|
|
public: |
424
|
0
|
|
|
|
|
|
path_derivation_formatter(const derivator* derinet) : derinet(derinet) {} |
425
|
|
|
|
|
|
|
|
426
|
0
|
|
|
|
|
|
virtual void format_tagged_lemma(tagged_lemma& lemma, const tagset_converter* converter) const override { |
427
|
0
|
|
|
|
|
|
tagged_lemma current(lemma); |
428
|
0
|
0
|
|
|
|
|
if (converter) converter->convert(lemma); |
|
|
0
|
|
|
|
|
|
429
|
0
|
0
|
|
|
|
|
for (derivated_lemma parent; derinet->parent(current.lemma, parent); current.lemma.swap(parent.lemma)) { |
|
|
0
|
|
|
|
|
|
430
|
0
|
0
|
|
|
|
|
tagged_lemma parrent_lemma(parent.lemma, current.tag); |
431
|
0
|
0
|
|
|
|
|
if (converter) converter->convert(parrent_lemma); |
|
|
0
|
|
|
|
|
|
432
|
0
|
0
|
|
|
|
|
lemma.lemma.append(" ").append(parrent_lemma.lemma); |
433
|
|
|
|
|
|
|
} |
434
|
0
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
private: |
437
|
|
|
|
|
|
|
const derivator* derinet; |
438
|
|
|
|
|
|
|
}; |
439
|
|
|
|
|
|
|
|
440
|
0
|
|
|
|
|
|
derivation_formatter* derivation_formatter::new_path_derivation_formatter(const derivator* derinet) { |
441
|
0
|
0
|
|
|
|
|
return derinet ? new path_derivation_formatter(derinet) : nullptr; |
|
|
0
|
|
|
|
|
|
442
|
|
|
|
|
|
|
} |
443
|
|
|
|
|
|
|
|
444
|
0
|
|
|
|
|
|
class tree_derivation_formatter : public derivation_formatter { |
445
|
|
|
|
|
|
|
public: |
446
|
0
|
|
|
|
|
|
tree_derivation_formatter(const derivator* derinet) : derinet(derinet) {} |
447
|
|
|
|
|
|
|
|
448
|
0
|
|
|
|
|
|
virtual void format_tagged_lemma(tagged_lemma& lemma, const tagset_converter* converter) const override { |
449
|
|
|
|
|
|
|
string root(lemma.lemma), tag(lemma.tag); |
450
|
0
|
0
|
|
|
|
|
if (converter) converter->convert(lemma); |
|
|
0
|
|
|
|
|
|
451
|
0
|
0
|
|
|
|
|
for (derivated_lemma parent; derinet->parent(root, parent); root.swap(parent.lemma)) {} |
|
|
0
|
|
|
|
|
|
452
|
0
|
0
|
|
|
|
|
format_tree(root, tag, lemma, converter); |
453
|
0
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
|
455
|
0
|
|
|
|
|
|
void format_tree(const string& root, const string& tag, tagged_lemma& tree, const tagset_converter* converter) const { |
456
|
0
|
|
|
|
|
|
vector children; |
457
|
|
|
|
|
|
|
|
458
|
0
|
0
|
|
|
|
|
if (converter) { |
459
|
0
|
0
|
|
|
|
|
tagged_lemma current(root, tag); |
460
|
0
|
0
|
|
|
|
|
converter->convert(current); |
461
|
0
|
0
|
|
|
|
|
tree.lemma.append(" ").append(current.lemma); |
462
|
|
|
|
|
|
|
} else { |
463
|
0
|
0
|
|
|
|
|
tree.lemma.append(" ").append(root); |
464
|
|
|
|
|
|
|
} |
465
|
|
|
|
|
|
|
|
466
|
0
|
0
|
|
|
|
|
if (derinet->children(root, children)) |
|
|
0
|
|
|
|
|
|
467
|
0
|
0
|
|
|
|
|
for (auto&& child : children) |
468
|
0
|
0
|
|
|
|
|
format_tree(child.lemma, tag, tree, converter); |
469
|
0
|
0
|
|
|
|
|
tree.lemma.push_back(' '); |
470
|
0
|
|
|
|
|
|
} |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
private: |
473
|
|
|
|
|
|
|
const derivator* derinet; |
474
|
|
|
|
|
|
|
}; |
475
|
|
|
|
|
|
|
|
476
|
0
|
|
|
|
|
|
derivation_formatter* derivation_formatter::new_tree_derivation_formatter(const derivator* derinet) { |
477
|
0
|
0
|
|
|
|
|
return derinet ? new tree_derivation_formatter(derinet) : nullptr; |
|
|
0
|
|
|
|
|
|
478
|
|
|
|
|
|
|
} |
479
|
|
|
|
|
|
|
|
480
|
0
|
|
|
|
|
|
derivation_formatter* derivation_formatter::new_derivation_formatter(string_piece name, const derivator* derinet) { |
481
|
0
|
0
|
|
|
|
|
if (name == "none") return new_none_derivation_formatter(); |
482
|
0
|
0
|
|
|
|
|
if (name == "root") return new_root_derivation_formatter(derinet); |
483
|
0
|
0
|
|
|
|
|
if (name == "path") return new_path_derivation_formatter(derinet); |
484
|
0
|
0
|
|
|
|
|
if (name == "tree") return new_tree_derivation_formatter(derinet); |
485
|
|
|
|
|
|
|
return nullptr; |
486
|
|
|
|
|
|
|
} |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
///////// |
489
|
|
|
|
|
|
|
// File: morpho/small_stringops.h |
490
|
|
|
|
|
|
|
///////// |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
493
|
|
|
|
|
|
|
// |
494
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
495
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
496
|
|
|
|
|
|
|
// |
497
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
498
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
499
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
// Declarations |
502
|
|
|
|
|
|
|
inline bool small_memeq(const void* a, const void* b, size_t len); |
503
|
|
|
|
|
|
|
inline void small_memcpy(void* dest, const void* src, size_t len); |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
// Definitions |
506
|
|
|
|
|
|
|
bool small_memeq(const void* a_void, const void* b_void, size_t len) { |
507
|
|
|
|
|
|
|
const char* a = (const char*)a_void; |
508
|
|
|
|
|
|
|
const char* b = (const char*)b_void; |
509
|
|
|
|
|
|
|
|
510
|
486
|
0
|
|
|
|
|
while (len--) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
511
|
418
|
0
|
|
|
|
|
if (*a++ != *b++) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
512
|
|
|
|
|
|
|
return false; |
513
|
|
|
|
|
|
|
return true; |
514
|
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
void small_memcpy(void* dest_void, const void* src_void, size_t len) { |
517
|
|
|
|
|
|
|
char* dest = (char*)dest_void; |
518
|
|
|
|
|
|
|
const char* src = (const char*)src_void; |
519
|
|
|
|
|
|
|
|
520
|
690
|
0
|
|
|
|
|
while (len--) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
521
|
506
|
|
|
|
|
|
*dest++ = *src++; |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
///////// |
525
|
|
|
|
|
|
|
// File: utils/binary_decoder.h |
526
|
|
|
|
|
|
|
///////// |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
529
|
|
|
|
|
|
|
// |
530
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
531
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
532
|
|
|
|
|
|
|
// |
533
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
534
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
535
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
namespace utils { |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
// |
540
|
|
|
|
|
|
|
// Declarations |
541
|
|
|
|
|
|
|
// |
542
|
|
|
|
|
|
|
|
543
|
0
|
|
|
|
|
|
class binary_decoder_error : public runtime_error { |
544
|
|
|
|
|
|
|
public: |
545
|
0
|
0
|
|
|
|
|
explicit binary_decoder_error(const char* description) : runtime_error(description) {} |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
546
|
|
|
|
|
|
|
}; |
547
|
|
|
|
|
|
|
|
548
|
4
|
0
|
|
|
|
|
class binary_decoder { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
549
|
|
|
|
|
|
|
public: |
550
|
|
|
|
|
|
|
inline unsigned char* fill(unsigned len); |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
inline unsigned next_1B(); |
553
|
|
|
|
|
|
|
inline unsigned next_2B(); |
554
|
|
|
|
|
|
|
inline unsigned next_4B(); |
555
|
|
|
|
|
|
|
inline void next_str(string& str); |
556
|
|
|
|
|
|
|
template inline const T* next(unsigned elements); |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
inline bool is_end(); |
559
|
|
|
|
|
|
|
inline unsigned tell(); |
560
|
|
|
|
|
|
|
inline void seek(unsigned pos); |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
private: |
563
|
|
|
|
|
|
|
vector buffer; |
564
|
|
|
|
|
|
|
const unsigned char* data; |
565
|
|
|
|
|
|
|
const unsigned char* data_end; |
566
|
|
|
|
|
|
|
}; |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
// |
569
|
|
|
|
|
|
|
// Definitions |
570
|
|
|
|
|
|
|
// |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
unsigned char* binary_decoder::fill(unsigned len) { |
573
|
4
|
50
|
|
|
|
|
buffer.resize(len); |
574
|
4
|
|
|
|
|
|
data = buffer.data(); |
575
|
4
|
|
|
|
|
|
data_end = buffer.data() + len; |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
return buffer.data(); |
578
|
|
|
|
|
|
|
} |
579
|
|
|
|
|
|
|
|
580
|
926
|
|
|
|
|
|
unsigned binary_decoder::next_1B() { |
581
|
463
|
50
|
|
|
|
|
if (data + 1 > data_end) throw binary_decoder_error("No more data in binary_decoder"); |
582
|
463
|
|
|
|
|
|
return *data++; |
583
|
|
|
|
|
|
|
} |
584
|
|
|
|
|
|
|
|
585
|
30
|
|
|
|
|
|
unsigned binary_decoder::next_2B() { |
586
|
30
|
50
|
|
|
|
|
if (data + sizeof(uint16_t) > data_end) throw binary_decoder_error("No more data in binary_decoder"); |
587
|
|
|
|
|
|
|
uint16_t result; |
588
|
|
|
|
|
|
|
memcpy(&result, data, sizeof(uint16_t)); |
589
|
30
|
|
|
|
|
|
data += sizeof(uint16_t); |
590
|
30
|
|
|
|
|
|
return result; |
591
|
|
|
|
|
|
|
} |
592
|
|
|
|
|
|
|
|
593
|
472
|
|
|
|
|
|
unsigned binary_decoder::next_4B() { |
594
|
472
|
50
|
|
|
|
|
if (data + sizeof(uint32_t) > data_end) throw binary_decoder_error("No more data in binary_decoder"); |
595
|
|
|
|
|
|
|
uint32_t result; |
596
|
|
|
|
|
|
|
memcpy(&result, data, sizeof(uint32_t)); |
597
|
472
|
|
|
|
|
|
data += sizeof(uint32_t); |
598
|
472
|
|
|
|
|
|
return result; |
599
|
|
|
|
|
|
|
} |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
void binary_decoder::next_str(string& str) { |
602
|
|
|
|
|
|
|
unsigned len = next_1B(); |
603
|
|
|
|
|
|
|
if (len == 255) len = next_4B(); |
604
|
|
|
|
|
|
|
str.assign(next(len), len); |
605
|
|
|
|
|
|
|
} |
606
|
|
|
|
|
|
|
|
607
|
438
|
|
|
|
|
|
template const T* binary_decoder::next(unsigned elements) { |
608
|
219
|
50
|
|
|
|
|
if (data + sizeof(T) * elements > data_end) throw binary_decoder_error("No more data in binary_decoder"); |
|
|
50
|
|
|
|
|
|
609
|
|
|
|
|
|
|
const T* result = (const T*) data; |
610
|
219
|
|
|
|
|
|
data += sizeof(T) * elements; |
611
|
219
|
|
|
|
|
|
return result; |
612
|
|
|
|
|
|
|
} |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
bool binary_decoder::is_end() { |
615
|
4
|
|
|
|
|
|
return data >= data_end; |
616
|
|
|
|
|
|
|
} |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
unsigned binary_decoder::tell() { |
619
|
2
|
|
|
|
|
|
return data - buffer.data(); |
620
|
|
|
|
|
|
|
} |
621
|
|
|
|
|
|
|
|
622
|
2
|
|
|
|
|
|
void binary_decoder::seek(unsigned pos) { |
623
|
2
|
50
|
|
|
|
|
if (pos > buffer.size()) throw binary_decoder_error("Cannot seek past end of binary_decoder"); |
624
|
2
|
|
|
|
|
|
data = buffer.data() + pos; |
625
|
2
|
|
|
|
|
|
} |
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
} // namespace utils |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
///////// |
630
|
|
|
|
|
|
|
// File: utils/binary_encoder.h |
631
|
|
|
|
|
|
|
///////// |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
634
|
|
|
|
|
|
|
// |
635
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
636
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
637
|
|
|
|
|
|
|
// |
638
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
639
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
640
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
namespace utils { |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
// |
645
|
|
|
|
|
|
|
// Declarations |
646
|
|
|
|
|
|
|
// |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
class binary_encoder { |
649
|
|
|
|
|
|
|
public: |
650
|
|
|
|
|
|
|
inline binary_encoder(); |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
inline void add_1B(unsigned val); |
653
|
|
|
|
|
|
|
inline void add_2B(unsigned val); |
654
|
|
|
|
|
|
|
inline void add_4B(unsigned val); |
655
|
|
|
|
|
|
|
inline void add_float(double val); |
656
|
|
|
|
|
|
|
inline void add_double(double val); |
657
|
|
|
|
|
|
|
inline void add_str(string_piece str); |
658
|
|
|
|
|
|
|
inline void add_data(string_piece data); |
659
|
|
|
|
|
|
|
template inline void add_data(const vector& data); |
660
|
|
|
|
|
|
|
template inline void add_data(const T* data, size_t elements); |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
vector data; |
663
|
|
|
|
|
|
|
}; |
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
// |
666
|
|
|
|
|
|
|
// Definitions |
667
|
|
|
|
|
|
|
// |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
binary_encoder::binary_encoder() { |
670
|
|
|
|
|
|
|
data.reserve(16); |
671
|
|
|
|
|
|
|
} |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
void binary_encoder::add_1B(unsigned val) { |
674
|
|
|
|
|
|
|
if (uint8_t(val) != val) runtime_failure("Should encode value " << val << " in one byte!"); |
675
|
|
|
|
|
|
|
data.push_back(val); |
676
|
|
|
|
|
|
|
} |
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
void binary_encoder::add_2B(unsigned val) { |
679
|
|
|
|
|
|
|
if (uint16_t(val) != val) runtime_failure("Should encode value " << val << " in two bytes!"); |
680
|
|
|
|
|
|
|
data.insert(data.end(), (unsigned char*) &val, ((unsigned char*) &val) + sizeof(uint16_t)); |
681
|
|
|
|
|
|
|
} |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
void binary_encoder::add_4B(unsigned val) { |
684
|
|
|
|
|
|
|
if (uint32_t(val) != val) runtime_failure("Should encode value " << val << " in four bytes!"); |
685
|
|
|
|
|
|
|
data.insert(data.end(), (unsigned char*) &val, ((unsigned char*) &val) + sizeof(uint32_t)); |
686
|
|
|
|
|
|
|
} |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
void binary_encoder::add_float(double val) { |
689
|
|
|
|
|
|
|
data.insert(data.end(), (unsigned char*) &val, ((unsigned char*) &val) + sizeof(float)); |
690
|
|
|
|
|
|
|
} |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
void binary_encoder::add_double(double val) { |
693
|
|
|
|
|
|
|
data.insert(data.end(), (unsigned char*) &val, ((unsigned char*) &val) + sizeof(double)); |
694
|
|
|
|
|
|
|
} |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
void binary_encoder::add_str(string_piece str) { |
697
|
|
|
|
|
|
|
add_1B(str.len < 255 ? str.len : 255); |
698
|
|
|
|
|
|
|
if (!(str.len < 255)) add_4B(str.len); |
699
|
|
|
|
|
|
|
add_data(str); |
700
|
|
|
|
|
|
|
} |
701
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
void binary_encoder::add_data(string_piece data) { |
703
|
|
|
|
|
|
|
this->data.insert(this->data.end(), (const unsigned char*) data.str, (const unsigned char*) (data.str + data.len)); |
704
|
|
|
|
|
|
|
} |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
template |
707
|
|
|
|
|
|
|
void binary_encoder::add_data(const vector& data) { |
708
|
|
|
|
|
|
|
this->data.insert(this->data.end(), (const unsigned char*) data.data(), (const unsigned char*) (data.data() + data.size())); |
709
|
|
|
|
|
|
|
} |
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
template |
712
|
|
|
|
|
|
|
void binary_encoder::add_data(const T* data, size_t elements) { |
713
|
|
|
|
|
|
|
this->data.insert(this->data.end(), (const unsigned char*) data, (const unsigned char*) (data + elements)); |
714
|
|
|
|
|
|
|
} |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
} // namespace utils |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
///////// |
719
|
|
|
|
|
|
|
// File: utils/pointer_decoder.h |
720
|
|
|
|
|
|
|
///////// |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
723
|
|
|
|
|
|
|
// |
724
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
725
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
726
|
|
|
|
|
|
|
// |
727
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
728
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
729
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
730
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
namespace utils { |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
// |
734
|
|
|
|
|
|
|
// Declarations |
735
|
|
|
|
|
|
|
// |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
class pointer_decoder { |
738
|
|
|
|
|
|
|
public: |
739
|
|
|
|
|
|
|
inline pointer_decoder(const unsigned char*& data); |
740
|
|
|
|
|
|
|
inline unsigned next_1B(); |
741
|
|
|
|
|
|
|
inline unsigned next_2B(); |
742
|
|
|
|
|
|
|
inline unsigned next_4B(); |
743
|
|
|
|
|
|
|
inline void next_str(string& str); |
744
|
|
|
|
|
|
|
template inline const T* next(unsigned elements); |
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
private: |
747
|
|
|
|
|
|
|
const unsigned char*& data; |
748
|
|
|
|
|
|
|
}; |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
// |
751
|
|
|
|
|
|
|
// Definitions |
752
|
|
|
|
|
|
|
// |
753
|
|
|
|
|
|
|
|
754
|
50
|
|
|
|
|
|
pointer_decoder::pointer_decoder(const unsigned char*& data) : data(data) {} |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
unsigned pointer_decoder::next_1B() { |
757
|
0
|
|
|
|
|
|
return *data++; |
758
|
|
|
|
|
|
|
} |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
unsigned pointer_decoder::next_2B() { |
761
|
|
|
|
|
|
|
uint16_t result; |
762
|
72
|
|
|
|
|
|
memcpy(&result, data, sizeof(uint16_t)); |
763
|
86
|
|
|
|
|
|
data += sizeof(uint16_t); |
764
|
26
|
|
|
|
|
|
return result; |
765
|
|
|
|
|
|
|
} |
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
unsigned pointer_decoder::next_4B() { |
768
|
|
|
|
|
|
|
uint32_t result; |
769
|
34
|
|
|
|
|
|
memcpy(&result, data, sizeof(uint32_t)); |
770
|
34
|
|
|
|
|
|
data += sizeof(uint32_t); |
771
|
|
|
|
|
|
|
return result; |
772
|
|
|
|
|
|
|
} |
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
void pointer_decoder::next_str(string& str) { |
775
|
|
|
|
|
|
|
unsigned len = next_1B(); |
776
|
|
|
|
|
|
|
if (len == 255) len = next_4B(); |
777
|
|
|
|
|
|
|
str.assign(next(len), len); |
778
|
|
|
|
|
|
|
} |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
template const T* pointer_decoder::next(unsigned elements) { |
781
|
86
|
|
|
|
|
|
const T* result = (const T*) data; |
782
|
0
|
|
|
|
|
|
data += sizeof(T) * elements; |
783
|
|
|
|
|
|
|
return result; |
784
|
|
|
|
|
|
|
} |
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
} // namespace utils |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
///////// |
789
|
|
|
|
|
|
|
// File: utils/unaligned_access.h |
790
|
|
|
|
|
|
|
///////// |
791
|
|
|
|
|
|
|
|
792
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
793
|
|
|
|
|
|
|
// |
794
|
|
|
|
|
|
|
// Copyright 2017 Institute of Formal and Applied Linguistics, Faculty of |
795
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
796
|
|
|
|
|
|
|
// |
797
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
798
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
799
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
namespace utils { |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
// |
804
|
|
|
|
|
|
|
// Declarations |
805
|
|
|
|
|
|
|
// |
806
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
template |
808
|
|
|
|
|
|
|
inline T unaligned_load(const P* ptr); |
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
template |
811
|
|
|
|
|
|
|
inline T unaligned_load_inc(const P*& ptr); |
812
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
template |
814
|
|
|
|
|
|
|
inline void unaligned_store(P* ptr, T value); |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
template |
817
|
|
|
|
|
|
|
inline void unaligned_store_inc(P*& ptr, T value); |
818
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
template |
820
|
|
|
|
|
|
|
T* unaligned_lower_bound(T* first, size_t size, T val); |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
template |
823
|
|
|
|
|
|
|
T* unaligned_upper_bound(T* first, size_t size, T val); |
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
// |
826
|
|
|
|
|
|
|
// Definitions |
827
|
|
|
|
|
|
|
// |
828
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
template |
830
|
|
|
|
|
|
|
inline T unaligned_load(const P* ptr) { |
831
|
|
|
|
|
|
|
T value; |
832
|
|
|
|
|
|
|
memcpy(&value, ptr, sizeof(T)); |
833
|
|
|
|
|
|
|
return value; |
834
|
|
|
|
|
|
|
} |
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
template |
837
|
|
|
|
|
|
|
inline T unaligned_load_inc(const P*& ptr) { |
838
|
|
|
|
|
|
|
T value; |
839
|
|
|
|
|
|
|
memcpy(&value, ptr, sizeof(T)); |
840
|
0
|
|
|
|
|
|
((const char*&)ptr) += sizeof(T); |
841
|
|
|
|
|
|
|
return value; |
842
|
|
|
|
|
|
|
} |
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
template |
845
|
|
|
|
|
|
|
inline void unaligned_store(P* ptr, T value) { |
846
|
|
|
|
|
|
|
memcpy(ptr, &value, sizeof(T)); |
847
|
|
|
|
|
|
|
} |
848
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
template |
850
|
|
|
|
|
|
|
inline void unaligned_store_inc(P*& ptr, T value) { |
851
|
|
|
|
|
|
|
memcpy(ptr, &value, sizeof(T)); |
852
|
70
|
|
|
|
|
|
((char*&)ptr) += sizeof(T); |
853
|
|
|
|
|
|
|
} |
854
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
template |
856
|
|
|
|
|
|
|
T* unaligned_lower_bound(T* first, size_t size, T val) { |
857
|
17
|
100
|
|
|
|
|
while (size) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
858
|
11
|
|
|
|
|
|
size_t step = size >> 1; |
859
|
11
|
100
|
|
|
|
|
if (unaligned_load(first + step) < val) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
860
|
5
|
|
|
|
|
|
first += step + 1; |
861
|
5
|
|
|
|
|
|
size -= step + 1; |
862
|
|
|
|
|
|
|
} else { |
863
|
|
|
|
|
|
|
size = step; |
864
|
|
|
|
|
|
|
} |
865
|
|
|
|
|
|
|
} |
866
|
|
|
|
|
|
|
return first; |
867
|
|
|
|
|
|
|
} |
868
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
template |
870
|
|
|
|
|
|
|
T* unaligned_upper_bound(T* first, size_t size, T val) { |
871
|
|
|
|
|
|
|
while (size) { |
872
|
|
|
|
|
|
|
size_t step = size >> 1; |
873
|
|
|
|
|
|
|
if (!(val < unaligned_load(first + step))) { |
874
|
|
|
|
|
|
|
first += step + 1; |
875
|
|
|
|
|
|
|
size -= step + 1; |
876
|
|
|
|
|
|
|
} else { |
877
|
|
|
|
|
|
|
size = step; |
878
|
|
|
|
|
|
|
} |
879
|
|
|
|
|
|
|
} |
880
|
|
|
|
|
|
|
return first; |
881
|
|
|
|
|
|
|
} |
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
} // namespace utils |
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
///////// |
886
|
|
|
|
|
|
|
// File: morpho/persistent_unordered_map.h |
887
|
|
|
|
|
|
|
///////// |
888
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
890
|
|
|
|
|
|
|
// |
891
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
892
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
893
|
|
|
|
|
|
|
// |
894
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
895
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
896
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
897
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
// Declarations |
899
|
0
|
|
|
|
|
|
class persistent_unordered_map { |
900
|
|
|
|
|
|
|
public: |
901
|
|
|
|
|
|
|
// Accessing function |
902
|
|
|
|
|
|
|
template |
903
|
|
|
|
|
|
|
inline const unsigned char* at(const char* str, int len, EntrySize entry_size) const; |
904
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
template |
906
|
|
|
|
|
|
|
inline const T* at_typed(const char* str, int len) const; |
907
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
template |
909
|
|
|
|
|
|
|
inline void iter(const char* str, int len, EntryProcess entry_process) const; |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
template |
912
|
|
|
|
|
|
|
inline void iter_all(EntryProcess entry_process) const; |
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
// Two helper functions accessing some internals |
915
|
|
|
|
|
|
|
inline int max_length() const; |
916
|
|
|
|
|
|
|
inline const unsigned char* data_start(int len) const; |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
// Creation functions |
919
|
|
|
|
|
|
|
persistent_unordered_map() {} |
920
|
|
|
|
|
|
|
template |
921
|
|
|
|
|
|
|
persistent_unordered_map(const unordered_map& map, double load_factor, EntryEncode entry_encode); |
922
|
|
|
|
|
|
|
template |
923
|
|
|
|
|
|
|
persistent_unordered_map(const unordered_map& map, double load_factor, bool add_prefixes, bool add_suffixes, EntryEncode entry_encode); |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
// Manual creation functions |
926
|
|
|
|
|
|
|
inline void resize(unsigned elems); |
927
|
|
|
|
|
|
|
inline void add(const char* str, int str_len, int data_len); |
928
|
|
|
|
|
|
|
inline void done_adding(); |
929
|
|
|
|
|
|
|
inline unsigned char* fill(const char* str, int str_len, int data_len); |
930
|
|
|
|
|
|
|
inline void done_filling(); |
931
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
// Serialization |
933
|
|
|
|
|
|
|
inline void load(binary_decoder& data); |
934
|
|
|
|
|
|
|
inline void save(binary_encoder& enc); |
935
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
private: |
937
|
|
|
|
|
|
|
struct fnv_hash; |
938
|
|
|
|
|
|
|
vector hashes; |
939
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
template |
941
|
|
|
|
|
|
|
void construct(const map& map, double load_factor, EntryEncode entry_encode); |
942
|
|
|
|
|
|
|
}; |
943
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
// Definitions |
945
|
172
|
|
|
|
|
|
struct persistent_unordered_map::fnv_hash { |
946
|
32
|
|
|
|
|
|
fnv_hash(unsigned num) { |
947
|
32
|
|
|
|
|
|
mask = 1; |
948
|
136
|
100
|
|
|
|
|
while (mask < num) |
949
|
104
|
|
|
|
|
|
mask <<= 1; |
950
|
32
|
50
|
|
|
|
|
hash.resize(mask + 1); |
951
|
32
|
|
|
|
|
|
mask--; |
952
|
32
|
|
|
|
|
|
} |
953
|
140
|
|
|
|
|
|
fnv_hash(binary_decoder& data) { |
954
|
140
|
50
|
|
|
|
|
uint32_t size = data.next_4B(); |
955
|
140
|
|
|
|
|
|
mask = size - 2; |
956
|
140
|
50
|
|
|
|
|
hash.resize(size); |
957
|
140
|
50
|
|
|
|
|
memcpy(hash.data(), data.next(size), size * sizeof(uint32_t)); |
958
|
|
|
|
|
|
|
|
959
|
140
|
50
|
|
|
|
|
size = data.next_4B(); |
960
|
140
|
50
|
|
|
|
|
this->data.resize(size); |
961
|
140
|
100
|
|
|
|
|
if (size) memcpy(this->data.data(), data.next(size), size); |
|
|
50
|
|
|
|
|
|
962
|
140
|
|
|
|
|
|
} |
963
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
inline uint32_t index(const char* data, int len) const { |
965
|
304
|
0
|
|
|
|
|
if (len <= 0) return 0; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
966
|
289
|
0
|
|
|
|
|
if (len == 1) return unaligned_load(data); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
967
|
262
|
0
|
|
|
|
|
if (len == 2) return unaligned_load(data); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
968
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
uint32_t hash = 2166136261U; |
970
|
696
|
0
|
|
|
|
|
while (len--) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
971
|
575
|
|
|
|
|
|
hash = (hash ^ unsigned(*data++)) * 16777619U; |
972
|
121
|
|
|
|
|
|
return hash & mask; |
973
|
|
|
|
|
|
|
} |
974
|
|
|
|
|
|
|
|
975
|
|
|
|
|
|
|
inline void save(binary_encoder& enc); |
976
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
unsigned mask; |
978
|
|
|
|
|
|
|
vector hash; |
979
|
|
|
|
|
|
|
vector data; |
980
|
|
|
|
|
|
|
}; |
981
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
template |
983
|
36
|
|
|
|
|
|
const unsigned char* persistent_unordered_map::at(const char* str, int len, EntrySize entry_size) const { |
984
|
36
|
0
|
|
|
|
|
if (unsigned(len) >= hashes.size()) return nullptr; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
985
|
|
|
|
|
|
|
|
986
|
36
|
|
|
|
|
|
unsigned index = hashes[len].index(str, len); |
987
|
72
|
|
|
|
|
|
const unsigned char* data = hashes[len].data.data() + hashes[len].hash[index]; |
988
|
72
|
|
|
|
|
|
const unsigned char* end = hashes[len].data.data() + hashes[len].hash[index+1]; |
989
|
|
|
|
|
|
|
|
990
|
36
|
0
|
|
|
|
|
if (len <= 2) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
991
|
36
|
0
|
|
|
|
|
return data != end ? data + len : nullptr; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
992
|
|
|
|
|
|
|
|
993
|
0
|
0
|
|
|
|
|
while (data < end) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
994
|
0
|
0
|
|
|
|
|
if (small_memeq(str, data, len)) return data + len; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
995
|
0
|
|
|
|
|
|
data += len; |
996
|
|
|
|
|
|
|
pointer_decoder decoder(data); |
997
|
0
|
|
|
|
|
|
entry_size(decoder); |
998
|
|
|
|
|
|
|
} |
999
|
|
|
|
|
|
|
|
1000
|
|
|
|
|
|
|
return nullptr; |
1001
|
|
|
|
|
|
|
} |
1002
|
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
template |
1004
|
278
|
|
|
|
|
|
const T* persistent_unordered_map::at_typed(const char* str, int len) const { |
1005
|
278
|
50
|
|
|
|
|
if (unsigned(len) >= hashes.size()) return nullptr; |
|
|
100
|
|
|
|
|
|
1006
|
|
|
|
|
|
|
|
1007
|
207
|
|
|
|
|
|
unsigned index = hashes[len].index(str, len); |
1008
|
414
|
|
|
|
|
|
const unsigned char* data = hashes[len].data.data() + hashes[len].hash[index]; |
1009
|
414
|
|
|
|
|
|
const unsigned char* end = hashes[len].data.data() + hashes[len].hash[index+1]; |
1010
|
|
|
|
|
|
|
|
1011
|
207
|
100
|
|
|
|
|
if (len <= 2) |
|
|
100
|
|
|
|
|
|
1012
|
143
|
100
|
|
|
|
|
return data != end ? (const T*)(data + len) : nullptr; |
|
|
50
|
|
|
|
|
|
1013
|
|
|
|
|
|
|
|
1014
|
87
|
100
|
|
|
|
|
while (data < end) { |
|
|
100
|
|
|
|
|
|
1015
|
65
|
100
|
|
|
|
|
if (small_memeq(str, data, len)) return (const T*)(data + len); |
|
|
100
|
|
|
|
|
|
1016
|
23
|
|
|
|
|
|
data += len + sizeof(T); |
1017
|
|
|
|
|
|
|
} |
1018
|
|
|
|
|
|
|
|
1019
|
|
|
|
|
|
|
return nullptr; |
1020
|
|
|
|
|
|
|
} |
1021
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
template |
1023
|
25
|
|
|
|
|
|
void persistent_unordered_map::iter(const char* str, int len, EntryProcess entry_process) const { |
1024
|
25
|
50
|
|
|
|
|
if (unsigned(len) >= hashes.size()) return; |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
|
1026
|
25
|
|
|
|
|
|
unsigned index = hashes[len].index(str, len); |
1027
|
50
|
|
|
|
|
|
const unsigned char* data = hashes[len].data.data() + hashes[len].hash[index]; |
1028
|
25
|
|
|
|
|
|
const unsigned char* end = hashes[len].data.data() + hashes[len].hash[index+1]; |
1029
|
|
|
|
|
|
|
|
1030
|
49
|
100
|
|
|
|
|
while (data < end) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
auto start = (const char*) data; |
1032
|
24
|
|
|
|
|
|
data += len; |
1033
|
|
|
|
|
|
|
pointer_decoder decoder(data); |
1034
|
24
|
|
|
|
|
|
entry_process(start, decoder); |
1035
|
|
|
|
|
|
|
} |
1036
|
|
|
|
|
|
|
} |
1037
|
|
|
|
|
|
|
|
1038
|
|
|
|
|
|
|
template |
1039
|
4
|
|
|
|
|
|
void persistent_unordered_map::iter_all(EntryProcess entry_process) const { |
1040
|
8
|
100
|
|
|
|
|
for (unsigned len = 0; len < hashes.size(); len++) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1041
|
6
|
|
|
|
|
|
const unsigned char* data = hashes[len].data.data(); |
1042
|
|
|
|
|
|
|
const unsigned char* end = data + hashes[len].data.size(); |
1043
|
|
|
|
|
|
|
|
1044
|
32
|
100
|
|
|
|
|
while (data < end) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1045
|
|
|
|
|
|
|
auto start = (const char*) data; |
1046
|
26
|
|
|
|
|
|
data += len; |
1047
|
|
|
|
|
|
|
pointer_decoder decoder(data); |
1048
|
26
|
|
|
|
|
|
entry_process(start, len, decoder); |
1049
|
|
|
|
|
|
|
} |
1050
|
|
|
|
|
|
|
} |
1051
|
2
|
|
|
|
|
|
} |
1052
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
int persistent_unordered_map::max_length() const { |
1054
|
44
|
|
|
|
|
|
return hashes.size(); |
1055
|
|
|
|
|
|
|
} |
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
const unsigned char* persistent_unordered_map::data_start(int len) const { |
1058
|
38
|
50
|
|
|
|
|
return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
} |
1060
|
|
|
|
|
|
|
|
1061
|
32
|
|
|
|
|
|
void persistent_unordered_map::resize(unsigned elems) { |
1062
|
32
|
100
|
|
|
|
|
if (hashes.size() == 0) hashes.emplace_back(1); |
1063
|
28
|
100
|
|
|
|
|
else if (hashes.size() == 1) hashes.emplace_back(1<<8); |
1064
|
24
|
100
|
|
|
|
|
else if (hashes.size() == 2) hashes.emplace_back(1<<16); |
1065
|
20
|
|
|
|
|
|
else hashes.emplace_back(elems); |
1066
|
32
|
|
|
|
|
|
} |
1067
|
|
|
|
|
|
|
|
1068
|
18
|
|
|
|
|
|
void persistent_unordered_map::add(const char* str, int str_len, int data_len) { |
1069
|
18
|
50
|
|
|
|
|
if (unsigned(str_len) < hashes.size()) |
1070
|
18
|
|
|
|
|
|
hashes[str_len].hash[hashes[str_len].index(str, str_len)] += str_len + data_len; |
1071
|
18
|
|
|
|
|
|
} |
1072
|
|
|
|
|
|
|
|
1073
|
4
|
|
|
|
|
|
void persistent_unordered_map::done_adding() { |
1074
|
36
|
100
|
|
|
|
|
for (auto&& hash : hashes) { |
1075
|
|
|
|
|
|
|
int total = 0; |
1076
|
263264
|
100
|
|
|
|
|
for (auto&& len : hash.hash) total += len, len = total - len; |
1077
|
32
|
|
|
|
|
|
hash.data.resize(total); |
1078
|
|
|
|
|
|
|
} |
1079
|
4
|
|
|
|
|
|
} |
1080
|
|
|
|
|
|
|
|
1081
|
18
|
|
|
|
|
|
unsigned char* persistent_unordered_map::fill(const char* str, int str_len, int data_len) { |
1082
|
18
|
50
|
|
|
|
|
if (unsigned(str_len) < hashes.size()) { |
1083
|
18
|
|
|
|
|
|
unsigned index = hashes[str_len].index(str, str_len); |
1084
|
36
|
|
|
|
|
|
unsigned offset = hashes[str_len].hash[index]; |
1085
|
18
|
|
|
|
|
|
small_memcpy(hashes[str_len].data.data() + offset, str, str_len); |
1086
|
18
|
|
|
|
|
|
hashes[str_len].hash[index] += str_len + data_len; |
1087
|
18
|
|
|
|
|
|
return hashes[str_len].data.data() + offset + str_len; |
1088
|
|
|
|
|
|
|
} |
1089
|
|
|
|
|
|
|
return nullptr; |
1090
|
|
|
|
|
|
|
} |
1091
|
|
|
|
|
|
|
|
1092
|
4
|
|
|
|
|
|
void persistent_unordered_map::done_filling() { |
1093
|
36
|
100
|
|
|
|
|
for (auto&& hash : hashes) |
1094
|
263264
|
100
|
|
|
|
|
for (int i = hash.hash.size() - 1; i >= 0; i--) |
1095
|
263232
|
100
|
|
|
|
|
hash.hash[i] = i > 0 ? hash.hash[i-1] : 0; |
1096
|
4
|
|
|
|
|
|
} |
1097
|
|
|
|
|
|
|
|
1098
|
49
|
|
|
|
|
|
void persistent_unordered_map::load(binary_decoder& data) { |
1099
|
49
|
|
|
|
|
|
unsigned sizes = data.next_1B(); |
1100
|
|
|
|
|
|
|
|
1101
|
|
|
|
|
|
|
hashes.clear(); |
1102
|
189
|
100
|
|
|
|
|
for (unsigned i = 0; i < sizes; i++) |
1103
|
140
|
|
|
|
|
|
hashes.emplace_back(data); |
1104
|
49
|
|
|
|
|
|
} |
1105
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
///////// |
1107
|
|
|
|
|
|
|
// File: derivator/derivator_dictionary.h |
1108
|
|
|
|
|
|
|
///////// |
1109
|
|
|
|
|
|
|
|
1110
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
1111
|
|
|
|
|
|
|
// |
1112
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
1113
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1114
|
|
|
|
|
|
|
// |
1115
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1116
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1117
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1118
|
|
|
|
|
|
|
|
1119
|
0
|
|
|
|
|
|
class derivator_dictionary : public derivator { |
1120
|
|
|
|
|
|
|
public: |
1121
|
|
|
|
|
|
|
virtual bool parent(string_piece lemma, derivated_lemma& parent) const override; |
1122
|
|
|
|
|
|
|
virtual bool children(string_piece lemma, vector& children) const override; |
1123
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
bool load(istream& is); |
1125
|
|
|
|
|
|
|
|
1126
|
|
|
|
|
|
|
private: |
1127
|
|
|
|
|
|
|
friend class morpho; |
1128
|
|
|
|
|
|
|
const morpho* dictionary; |
1129
|
|
|
|
|
|
|
persistent_unordered_map derinet; |
1130
|
|
|
|
|
|
|
}; |
1131
|
|
|
|
|
|
|
|
1132
|
|
|
|
|
|
|
///////// |
1133
|
|
|
|
|
|
|
// File: utils/compressor.h |
1134
|
|
|
|
|
|
|
///////// |
1135
|
|
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
1137
|
|
|
|
|
|
|
// |
1138
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
1139
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1140
|
|
|
|
|
|
|
// |
1141
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1142
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1143
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1144
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
namespace utils { |
1146
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
class binary_decoder; |
1148
|
|
|
|
|
|
|
class binary_encoder; |
1149
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
class compressor { |
1151
|
|
|
|
|
|
|
public: |
1152
|
|
|
|
|
|
|
static bool load(istream& is, binary_decoder& data); |
1153
|
|
|
|
|
|
|
static bool save(ostream& os, const binary_encoder& enc); |
1154
|
|
|
|
|
|
|
}; |
1155
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
} // namespace utils |
1157
|
|
|
|
|
|
|
|
1158
|
|
|
|
|
|
|
///////// |
1159
|
|
|
|
|
|
|
// File: derivator/derivator_dictionary.cpp |
1160
|
|
|
|
|
|
|
///////// |
1161
|
|
|
|
|
|
|
|
1162
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
1163
|
|
|
|
|
|
|
// |
1164
|
|
|
|
|
|
|
// Copyright 2016 Institute of Formal and Applied Linguistics, Faculty of |
1165
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1166
|
|
|
|
|
|
|
// |
1167
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1168
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1169
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1170
|
|
|
|
|
|
|
|
1171
|
0
|
|
|
|
|
|
bool derivator_dictionary::parent(string_piece lemma, derivated_lemma& parent) const { |
1172
|
0
|
0
|
|
|
|
|
if (dictionary) lemma.len = dictionary->lemma_id_len(lemma); |
1173
|
|
|
|
|
|
|
|
1174
|
0
|
|
|
|
|
|
auto lemma_data = derinet.at(lemma.str, lemma.len, [](pointer_decoder& data) { |
1175
|
|
|
|
|
|
|
data.next(data.next_1B()); |
1176
|
|
|
|
|
|
|
data.next_4B(); |
1177
|
|
|
|
|
|
|
data.next(data.next_2B()); |
1178
|
0
|
|
|
|
|
|
}); |
1179
|
0
|
0
|
|
|
|
|
if (lemma_data) { |
1180
|
0
|
|
|
|
|
|
auto parent_encoded = *(uint32_t*)(lemma_data + 1 + *lemma_data); |
1181
|
0
|
0
|
|
|
|
|
if (parent_encoded) { |
1182
|
0
|
|
|
|
|
|
unsigned parent_len = parent_encoded & 0xFF; |
1183
|
0
|
|
|
|
|
|
auto parent_data = derinet.data_start(parent_len) + (parent_encoded >> 8); |
1184
|
0
|
|
|
|
|
|
parent.lemma.assign((const char*) parent_data, parent_len); |
1185
|
0
|
0
|
|
|
|
|
if (parent_data[parent_len]) |
1186
|
0
|
|
|
|
|
|
parent.lemma.append((const char*) parent_data + parent_len + 1, parent_data[parent_len]); |
1187
|
|
|
|
|
|
|
return true; |
1188
|
|
|
|
|
|
|
} |
1189
|
|
|
|
|
|
|
} |
1190
|
|
|
|
|
|
|
parent.lemma.clear(); |
1191
|
0
|
|
|
|
|
|
return false; |
1192
|
|
|
|
|
|
|
} |
1193
|
|
|
|
|
|
|
|
1194
|
0
|
|
|
|
|
|
bool derivator_dictionary::children(string_piece lemma, vector& children) const { |
1195
|
0
|
0
|
|
|
|
|
if (dictionary) lemma.len = dictionary->lemma_id_len(lemma); |
1196
|
|
|
|
|
|
|
|
1197
|
0
|
|
|
|
|
|
auto lemma_data = derinet.at(lemma.str, lemma.len, [](pointer_decoder& data) { |
1198
|
|
|
|
|
|
|
data.next(data.next_1B()); |
1199
|
|
|
|
|
|
|
data.next_4B(); |
1200
|
|
|
|
|
|
|
data.next(data.next_2B()); |
1201
|
0
|
|
|
|
|
|
}); |
1202
|
0
|
0
|
|
|
|
|
if (lemma_data) { |
1203
|
0
|
|
|
|
|
|
auto children_len = *(uint16_t*)(lemma_data + 1 + *lemma_data + 4); |
1204
|
0
|
|
|
|
|
|
auto children_encoded = (uint32_t*)(lemma_data + 1 + *lemma_data + 4 + 2); |
1205
|
0
|
0
|
|
|
|
|
if (children_len) { |
1206
|
0
|
|
|
|
|
|
children.resize(children_len); |
1207
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < children_len; i++) { |
1208
|
0
|
|
|
|
|
|
unsigned child_len = children_encoded[i] & 0xFF; |
1209
|
0
|
|
|
|
|
|
auto child_data = derinet.data_start(child_len) + (children_encoded[i] >> 8); |
1210
|
0
|
|
|
|
|
|
children[i].lemma.assign((const char*) child_data, child_len); |
1211
|
0
|
0
|
|
|
|
|
if (child_data[child_len]) |
1212
|
0
|
|
|
|
|
|
children[i].lemma.append((const char*) child_data + child_len + 1, child_data[child_len]); |
1213
|
|
|
|
|
|
|
} |
1214
|
|
|
|
|
|
|
return true; |
1215
|
|
|
|
|
|
|
} |
1216
|
|
|
|
|
|
|
} |
1217
|
0
|
|
|
|
|
|
children.clear(); |
1218
|
0
|
|
|
|
|
|
return false; |
1219
|
|
|
|
|
|
|
} |
1220
|
|
|
|
|
|
|
|
1221
|
0
|
|
|
|
|
|
bool derivator_dictionary::load(istream& is) { |
1222
|
|
|
|
|
|
|
binary_decoder data; |
1223
|
0
|
0
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
0
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
|
1225
|
|
|
|
|
|
|
try { |
1226
|
0
|
0
|
|
|
|
|
for (int i = data.next_1B(); i > 0; i--) |
|
|
0
|
|
|
|
|
|
1227
|
0
|
0
|
|
|
|
|
derinet.resize(data.next_4B()); |
|
|
0
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
unsigned data_position = data.tell(); |
1230
|
|
|
|
|
|
|
vector lemma, parent; |
1231
|
0
|
0
|
|
|
|
|
for (int pass = 1; pass <= 3; pass++) { |
1232
|
0
|
0
|
|
|
|
|
if (pass > 1) data.seek(data_position); |
|
|
0
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
|
1234
|
|
|
|
|
|
|
lemma.clear(); |
1235
|
0
|
0
|
|
|
|
|
for (int i = data.next_4B(); i > 0; i--) { |
|
|
0
|
|
|
|
|
|
1236
|
0
|
0
|
|
|
|
|
lemma.resize(lemma.size() - data.next_1B()); |
|
|
0
|
|
|
|
|
|
1237
|
0
|
0
|
|
|
|
|
for (int i = data.next_1B(); i > 0; i--) |
|
|
0
|
|
|
|
|
|
1238
|
0
|
0
|
|
|
|
|
lemma.push_back(data.next_1B()); |
1239
|
|
|
|
|
|
|
|
1240
|
0
|
0
|
|
|
|
|
unsigned char lemma_comment_len = data.next_1B(); |
1241
|
0
|
0
|
|
|
|
|
const char* lemma_comment = lemma_comment_len ? data.next(lemma_comment_len) : nullptr; |
|
|
0
|
|
|
|
|
|
1242
|
|
|
|
|
|
|
|
1243
|
0
|
0
|
|
|
|
|
unsigned children = data.next_2B(); |
1244
|
|
|
|
|
|
|
|
1245
|
0
|
0
|
|
|
|
|
if (pass == 3) parent.clear(); |
1246
|
|
|
|
|
|
|
enum { REMOVE_START = 1, REMOVE_END = 2, ADD_START = 4, ADD_END = 8 }; |
1247
|
0
|
0
|
|
|
|
|
int operations = data.next_1B(); |
1248
|
0
|
0
|
|
|
|
|
if (operations) { |
1249
|
0
|
0
|
|
|
|
|
int remove_start = operations & REMOVE_START ? data.next_1B() : 0; |
|
|
0
|
|
|
|
|
|
1250
|
0
|
0
|
|
|
|
|
int remove_end = operations & REMOVE_END ? data.next_1B() : 0; |
|
|
0
|
|
|
|
|
|
1251
|
0
|
0
|
|
|
|
|
if (operations & ADD_START) { |
1252
|
0
|
0
|
|
|
|
|
int add_start = data.next_1B(); |
1253
|
0
|
0
|
|
|
|
|
const char* str = data.next(add_start); |
1254
|
0
|
0
|
|
|
|
|
if (pass == 3) parent.assign(str, str + add_start); |
1255
|
|
|
|
|
|
|
} |
1256
|
0
|
0
|
|
|
|
|
if (pass == 3) parent.insert(parent.end(), lemma.begin() + remove_start, lemma.end() - remove_end); |
|
|
0
|
|
|
|
|
|
1257
|
0
|
0
|
|
|
|
|
if (operations & ADD_END) { |
1258
|
0
|
0
|
|
|
|
|
int add_end = data.next_1B(); |
1259
|
0
|
0
|
|
|
|
|
const char* str = data.next(add_end); |
1260
|
0
|
0
|
|
|
|
|
if (pass == 3) parent.insert(parent.end(), str, str + add_end); |
1261
|
|
|
|
|
|
|
} |
1262
|
|
|
|
|
|
|
} |
1263
|
|
|
|
|
|
|
|
1264
|
0
|
0
|
|
|
|
|
if (pass == 1) { |
1265
|
0
|
|
|
|
|
|
derinet.add(lemma.data(), lemma.size(), 1 + lemma_comment_len + 4 + 2 + 4 * children); |
1266
|
0
|
0
|
|
|
|
|
} else if (pass == 2) { |
1267
|
0
|
|
|
|
|
|
unsigned char* lemma_data = derinet.fill(lemma.data(), lemma.size(), 1 + lemma_comment_len + 4 + 2 + 4 * children); |
1268
|
0
|
|
|
|
|
|
*lemma_data++ = lemma_comment_len; |
1269
|
0
|
0
|
|
|
|
|
while (lemma_comment_len--) *lemma_data++ = *lemma_comment++; |
1270
|
|
|
|
|
|
|
unaligned_store_inc(lemma_data, 0); |
1271
|
|
|
|
|
|
|
unaligned_store_inc(lemma_data, children); |
1272
|
0
|
0
|
|
|
|
|
if (children) unaligned_store(((uint32_t*)lemma_data) + children - 1, 0); |
1273
|
0
|
0
|
|
|
|
|
} else if (pass == 3 && !parent.empty()) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1274
|
0
|
|
|
|
|
|
auto lemma_data = derinet.at(lemma.data(), lemma.size(), [](pointer_decoder& data) { |
1275
|
|
|
|
|
|
|
data.next(data.next_1B()); |
1276
|
|
|
|
|
|
|
data.next_4B(); |
1277
|
|
|
|
|
|
|
data.next(data.next_2B()); |
1278
|
0
|
|
|
|
|
|
}); |
1279
|
0
|
|
|
|
|
|
auto parent_data = derinet.at(parent.data(), parent.size(), [](pointer_decoder& data) { |
1280
|
|
|
|
|
|
|
data.next(data.next_1B()); |
1281
|
|
|
|
|
|
|
data.next_4B(); |
1282
|
|
|
|
|
|
|
data.next(data.next_2B()); |
1283
|
0
|
|
|
|
|
|
}); |
1284
|
0
|
0
|
|
|
|
|
assert(lemma_data && parent_data); |
1285
|
|
|
|
|
|
|
|
1286
|
0
|
|
|
|
|
|
unsigned parent_offset = parent_data - parent.size() - derinet.data_start(parent.size()); |
1287
|
0
|
0
|
|
|
|
|
assert(parent.size() < (1<<8) && parent_offset < (1<<24)); |
|
|
0
|
|
|
|
|
|
1288
|
0
|
|
|
|
|
|
unaligned_store((void *)(lemma_data + 1 + *lemma_data), (parent_offset << 8) | parent.size()); |
1289
|
|
|
|
|
|
|
|
1290
|
0
|
|
|
|
|
|
unsigned lemma_offset = lemma_data - lemma.size() - derinet.data_start(lemma.size()); |
1291
|
0
|
0
|
|
|
|
|
assert(lemma.size() < (1<<8) && lemma_offset < (1<<24)); |
|
|
0
|
|
|
|
|
|
1292
|
0
|
|
|
|
|
|
auto children_len = unaligned_load(parent_data + 1 + *parent_data + 4); |
1293
|
0
|
|
|
|
|
|
auto children = (uint32_t*)(parent_data + 1 + *parent_data + 4 + 2); |
1294
|
0
|
|
|
|
|
|
auto child_index = unaligned_load(children + children_len - 1); |
1295
|
0
|
|
|
|
|
|
unaligned_store(children + child_index, (lemma_offset << 8) | lemma.size()); |
1296
|
0
|
0
|
|
|
|
|
if (child_index+1 < children_len) |
1297
|
0
|
|
|
|
|
|
unaligned_store(children + children_len - 1, unaligned_load(children + children_len - 1) + 1); |
1298
|
|
|
|
|
|
|
} |
1299
|
|
|
|
|
|
|
} |
1300
|
|
|
|
|
|
|
|
1301
|
0
|
0
|
|
|
|
|
if (pass == 1) |
1302
|
0
|
0
|
|
|
|
|
derinet.done_adding(); |
1303
|
0
|
0
|
|
|
|
|
if (pass == 2) |
1304
|
0
|
|
|
|
|
|
derinet.done_filling(); |
1305
|
|
0
|
|
|
|
|
} |
1306
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
1307
|
|
|
|
|
|
|
return false; |
1308
|
|
|
|
|
|
|
} |
1309
|
0
|
|
|
|
|
|
return true; |
1310
|
|
|
|
|
|
|
} |
1311
|
|
|
|
|
|
|
|
1312
|
|
|
|
|
|
|
///////// |
1313
|
|
|
|
|
|
|
// File: unilib/unicode.h |
1314
|
|
|
|
|
|
|
///////// |
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
// This file is part of UniLib . |
1317
|
|
|
|
|
|
|
// |
1318
|
|
|
|
|
|
|
// Copyright 2014 Institute of Formal and Applied Linguistics, Faculty of |
1319
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1320
|
|
|
|
|
|
|
// |
1321
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1322
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1323
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1324
|
|
|
|
|
|
|
// |
1325
|
|
|
|
|
|
|
// UniLib version: 3.2.0 |
1326
|
|
|
|
|
|
|
// Unicode version: 13.0.0 |
1327
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
namespace unilib { |
1329
|
|
|
|
|
|
|
|
1330
|
|
|
|
|
|
|
class unicode { |
1331
|
|
|
|
|
|
|
enum : uint8_t { |
1332
|
|
|
|
|
|
|
_Lu = 1, _Ll = 2, _Lt = 3, _Lm = 4, _Lo = 5, |
1333
|
|
|
|
|
|
|
_Mn = 6, _Mc = 7, _Me = 8, |
1334
|
|
|
|
|
|
|
_Nd = 9, _Nl = 10, _No = 11, |
1335
|
|
|
|
|
|
|
_Pc = 12, _Pd = 13, _Ps = 14, _Pe = 15, _Pi = 16, _Pf = 17, _Po = 18, |
1336
|
|
|
|
|
|
|
_Sm = 19, _Sc = 20, _Sk = 21, _So = 22, |
1337
|
|
|
|
|
|
|
_Zs = 23, _Zl = 24, _Zp = 25, |
1338
|
|
|
|
|
|
|
_Cc = 26, _Cf = 27, _Cs = 28, _Co = 29, _Cn = 30 |
1339
|
|
|
|
|
|
|
}; |
1340
|
|
|
|
|
|
|
|
1341
|
|
|
|
|
|
|
public: |
1342
|
|
|
|
|
|
|
typedef uint32_t category_t; |
1343
|
|
|
|
|
|
|
enum : category_t { |
1344
|
|
|
|
|
|
|
Lu = 1 << _Lu, Ll = 1 << _Ll, Lt = 1 << _Lt, Lut = Lu | Lt, LC = Lu | Ll | Lt, |
1345
|
|
|
|
|
|
|
Lm = 1 << _Lm, Lo = 1 << _Lo, L = Lu | Ll | Lt | Lm | Lo, |
1346
|
|
|
|
|
|
|
Mn = 1 << _Mn, Mc = 1 << _Mc, Me = 1 << _Me, M = Mn | Mc | Me, |
1347
|
|
|
|
|
|
|
Nd = 1 << _Nd, Nl = 1 << _Nl, No = 1 << _No, N = Nd | Nl | No, |
1348
|
|
|
|
|
|
|
Pc = 1 << _Pc, Pd = 1 << _Pd, Ps = 1 << _Ps, Pe = 1 << _Pe, Pi = 1 << _Pi, |
1349
|
|
|
|
|
|
|
Pf = 1 << _Pf, Po = 1 << _Po, P = Pc | Pd | Ps | Pe | Pi | Pf | Po, |
1350
|
|
|
|
|
|
|
Sm = 1 << _Sm, Sc = 1 << _Sc, Sk = 1 << _Sk, So = 1 << _So, S = Sm | Sc | Sk | So, |
1351
|
|
|
|
|
|
|
Zs = 1 << _Zs, Zl = 1 << _Zl, Zp = 1 << _Zp, Z = Zs | Zl | Zp, |
1352
|
|
|
|
|
|
|
Cc = 1 << _Cc, Cf = 1 << _Cf, Cs = 1 << _Cs, Co = 1 << _Co, Cn = 1 << _Cn, C = Cc | Cf | Cs | Co | Cn |
1353
|
|
|
|
|
|
|
}; |
1354
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
static inline category_t category(char32_t chr); |
1356
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
static inline char32_t lowercase(char32_t chr); |
1358
|
|
|
|
|
|
|
static inline char32_t uppercase(char32_t chr); |
1359
|
|
|
|
|
|
|
static inline char32_t titlecase(char32_t chr); |
1360
|
|
|
|
|
|
|
|
1361
|
|
|
|
|
|
|
private: |
1362
|
|
|
|
|
|
|
static const char32_t CHARS = 0x110000; |
1363
|
|
|
|
|
|
|
static const int32_t DEFAULT_CAT = Cn; |
1364
|
|
|
|
|
|
|
|
1365
|
|
|
|
|
|
|
static const uint8_t category_index[CHARS >> 8]; |
1366
|
|
|
|
|
|
|
static const uint8_t category_block[][256]; |
1367
|
|
|
|
|
|
|
static const uint8_t othercase_index[CHARS >> 8]; |
1368
|
|
|
|
|
|
|
static const char32_t othercase_block[][256]; |
1369
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
enum othercase_type { LOWER_ONLY = 1, UPPERTITLE_ONLY = 2, UPPER_ONLY = 3, LOWER_THEN_UPPER = 4, UPPER_THEN_TITLE = 5, TITLE_THEN_LOWER = 6 }; |
1371
|
|
|
|
|
|
|
}; |
1372
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
unicode::category_t unicode::category(char32_t chr) { |
1374
|
228
|
0
|
|
|
|
|
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
} |
1376
|
|
|
|
|
|
|
|
1377
|
13
|
|
|
|
|
|
char32_t unicode::lowercase(char32_t chr) { |
1378
|
13
|
50
|
|
|
|
|
if (chr < CHARS) { |
1379
|
13
|
|
|
|
|
|
char32_t othercase = othercase_block[othercase_index[chr >> 8]][chr & 0xFF]; |
1380
|
13
|
100
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::LOWER_ONLY) return othercase >> 8; |
1381
|
10
|
50
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::LOWER_THEN_UPPER) return othercase >> 8; |
1382
|
10
|
50
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::TITLE_THEN_LOWER) return othercase_block[othercase_index[(othercase >> 8) >> 8]][(othercase >> 8) & 0xFF] >> 8; |
1383
|
|
|
|
|
|
|
} |
1384
|
|
|
|
|
|
|
return chr; |
1385
|
|
|
|
|
|
|
} |
1386
|
|
|
|
|
|
|
|
1387
|
|
|
|
|
|
|
char32_t unicode::uppercase(char32_t chr) { |
1388
|
|
|
|
|
|
|
if (chr < CHARS) { |
1389
|
|
|
|
|
|
|
char32_t othercase = othercase_block[othercase_index[chr >> 8]][chr & 0xFF]; |
1390
|
|
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::UPPERTITLE_ONLY) return othercase >> 8; |
1391
|
|
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::UPPER_ONLY) return othercase >> 8; |
1392
|
|
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::UPPER_THEN_TITLE) return othercase >> 8; |
1393
|
|
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::LOWER_THEN_UPPER) return othercase_block[othercase_index[(othercase >> 8) >> 8]][(othercase >> 8) & 0xFF] >> 8; |
1394
|
|
|
|
|
|
|
} |
1395
|
|
|
|
|
|
|
return chr; |
1396
|
|
|
|
|
|
|
} |
1397
|
|
|
|
|
|
|
|
1398
|
|
|
|
|
|
|
char32_t unicode::titlecase(char32_t chr) { |
1399
|
|
|
|
|
|
|
if (chr < CHARS) { |
1400
|
|
|
|
|
|
|
char32_t othercase = othercase_block[othercase_index[chr >> 8]][chr & 0xFF]; |
1401
|
|
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::UPPERTITLE_ONLY) return othercase >> 8; |
1402
|
|
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::TITLE_THEN_LOWER) return othercase >> 8; |
1403
|
|
|
|
|
|
|
if ((othercase & 0xFF) == othercase_type::UPPER_THEN_TITLE) return othercase_block[othercase_index[(othercase >> 8) >> 8]][(othercase >> 8) & 0xFF] >> 8; |
1404
|
|
|
|
|
|
|
} |
1405
|
|
|
|
|
|
|
return chr; |
1406
|
|
|
|
|
|
|
} |
1407
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
} // namespace unilib |
1409
|
|
|
|
|
|
|
|
1410
|
|
|
|
|
|
|
///////// |
1411
|
|
|
|
|
|
|
// File: unilib/utf8.h |
1412
|
|
|
|
|
|
|
///////// |
1413
|
|
|
|
|
|
|
|
1414
|
|
|
|
|
|
|
// This file is part of UniLib . |
1415
|
|
|
|
|
|
|
// |
1416
|
|
|
|
|
|
|
// Copyright 2014 Institute of Formal and Applied Linguistics, Faculty of |
1417
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1418
|
|
|
|
|
|
|
// |
1419
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1420
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1421
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1422
|
|
|
|
|
|
|
// |
1423
|
|
|
|
|
|
|
// UniLib version: 3.2.0 |
1424
|
|
|
|
|
|
|
// Unicode version: 13.0.0 |
1425
|
|
|
|
|
|
|
|
1426
|
|
|
|
|
|
|
namespace unilib { |
1427
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
class utf8 { |
1429
|
|
|
|
|
|
|
public: |
1430
|
|
|
|
|
|
|
static bool valid(const char* str); |
1431
|
|
|
|
|
|
|
static bool valid(const char* str, size_t len); |
1432
|
|
|
|
|
|
|
static inline bool valid(const std::string& str); |
1433
|
|
|
|
|
|
|
|
1434
|
|
|
|
|
|
|
static inline char32_t decode(const char*& str); |
1435
|
|
|
|
|
|
|
static inline char32_t decode(const char*& str, size_t& len); |
1436
|
|
|
|
|
|
|
static inline char32_t first(const char* str); |
1437
|
|
|
|
|
|
|
static inline char32_t first(const char* str, size_t len); |
1438
|
|
|
|
|
|
|
static inline char32_t first(const std::string& str); |
1439
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
static void decode(const char* str, std::u32string& decoded); |
1441
|
|
|
|
|
|
|
static void decode(const char* str, size_t len, std::u32string& decoded); |
1442
|
|
|
|
|
|
|
static inline void decode(const std::string& str, std::u32string& decoded); |
1443
|
|
|
|
|
|
|
|
1444
|
|
|
|
|
|
|
class string_decoder { |
1445
|
|
|
|
|
|
|
public: |
1446
|
|
|
|
|
|
|
class iterator; |
1447
|
|
|
|
|
|
|
inline iterator begin(); |
1448
|
|
|
|
|
|
|
inline iterator end(); |
1449
|
|
|
|
|
|
|
private: |
1450
|
|
|
|
|
|
|
inline string_decoder(const char* str); |
1451
|
|
|
|
|
|
|
const char* str; |
1452
|
|
|
|
|
|
|
friend class utf8; |
1453
|
|
|
|
|
|
|
}; |
1454
|
|
|
|
|
|
|
static inline string_decoder decoder(const char* str); |
1455
|
|
|
|
|
|
|
static inline string_decoder decoder(const std::string& str); |
1456
|
|
|
|
|
|
|
|
1457
|
|
|
|
|
|
|
class buffer_decoder { |
1458
|
|
|
|
|
|
|
public: |
1459
|
|
|
|
|
|
|
class iterator; |
1460
|
|
|
|
|
|
|
inline iterator begin(); |
1461
|
|
|
|
|
|
|
inline iterator end(); |
1462
|
|
|
|
|
|
|
private: |
1463
|
|
|
|
|
|
|
inline buffer_decoder(const char* str, size_t len); |
1464
|
|
|
|
|
|
|
const char* str; |
1465
|
|
|
|
|
|
|
size_t len; |
1466
|
|
|
|
|
|
|
friend class utf8; |
1467
|
|
|
|
|
|
|
}; |
1468
|
|
|
|
|
|
|
static inline buffer_decoder decoder(const char* str, size_t len); |
1469
|
|
|
|
|
|
|
|
1470
|
|
|
|
|
|
|
static inline void append(char*& str, char32_t chr); |
1471
|
|
|
|
|
|
|
static inline void append(std::string& str, char32_t chr); |
1472
|
|
|
|
|
|
|
static void encode(const std::u32string& str, std::string& encoded); |
1473
|
|
|
|
|
|
|
|
1474
|
|
|
|
|
|
|
template static void map(F f, const char* str, std::string& result); |
1475
|
|
|
|
|
|
|
template static void map(F f, const char* str, size_t len, std::string& result); |
1476
|
|
|
|
|
|
|
template static void map(F f, const std::string& str, std::string& result); |
1477
|
|
|
|
|
|
|
|
1478
|
|
|
|
|
|
|
private: |
1479
|
|
|
|
|
|
|
static const char REPLACEMENT_CHAR = '?'; |
1480
|
|
|
|
|
|
|
}; |
1481
|
|
|
|
|
|
|
|
1482
|
|
|
|
|
|
|
bool utf8::valid(const std::string& str) { |
1483
|
|
|
|
|
|
|
return valid(str.c_str()); |
1484
|
|
|
|
|
|
|
} |
1485
|
|
|
|
|
|
|
|
1486
|
0
|
|
|
|
|
|
char32_t utf8::decode(const char*& str) { |
1487
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0x80) return (unsigned char)*str++; |
1488
|
0
|
0
|
|
|
|
|
else if (((unsigned char)*str) < 0xC0) return ++str, REPLACEMENT_CHAR; |
1489
|
0
|
0
|
|
|
|
|
else if (((unsigned char)*str) < 0xE0) { |
1490
|
0
|
|
|
|
|
|
char32_t res = (((unsigned char)*str++) & 0x1F) << 6; |
1491
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
0
|
|
|
|
|
|
1492
|
0
|
|
|
|
|
|
return res + (((unsigned char)*str++) & 0x3F); |
1493
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF0) { |
1494
|
0
|
|
|
|
|
|
char32_t res = (((unsigned char)*str++) & 0x0F) << 12; |
1495
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
0
|
|
|
|
|
|
1496
|
0
|
|
|
|
|
|
res += (((unsigned char)*str++) & 0x3F) << 6; |
1497
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
0
|
|
|
|
|
|
1498
|
0
|
|
|
|
|
|
return res + (((unsigned char)*str++) & 0x3F); |
1499
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF8) { |
1500
|
0
|
|
|
|
|
|
char32_t res = (((unsigned char)*str++) & 0x07) << 18; |
1501
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
0
|
|
|
|
|
|
1502
|
0
|
|
|
|
|
|
res += (((unsigned char)*str++) & 0x3F) << 12; |
1503
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
0
|
|
|
|
|
|
1504
|
0
|
|
|
|
|
|
res += (((unsigned char)*str++) & 0x3F) << 6; |
1505
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
0
|
|
|
|
|
|
1506
|
0
|
|
|
|
|
|
return res + (((unsigned char)*str++) & 0x3F); |
1507
|
0
|
|
|
|
|
|
} else return ++str, REPLACEMENT_CHAR; |
1508
|
|
|
|
|
|
|
} |
1509
|
|
|
|
|
|
|
|
1510
|
219
|
|
|
|
|
|
char32_t utf8::decode(const char*& str, size_t& len) { |
1511
|
219
|
100
|
|
|
|
|
if (!len) return 0; |
1512
|
218
|
|
|
|
|
|
--len; |
1513
|
218
|
100
|
|
|
|
|
if (((unsigned char)*str) < 0x80) return (unsigned char)*str++; |
1514
|
25
|
50
|
|
|
|
|
else if (((unsigned char)*str) < 0xC0) return ++str, REPLACEMENT_CHAR; |
1515
|
25
|
100
|
|
|
|
|
else if (((unsigned char)*str) < 0xE0) { |
1516
|
22
|
|
|
|
|
|
char32_t res = (((unsigned char)*str++) & 0x1F) << 6; |
1517
|
22
|
50
|
|
|
|
|
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1518
|
22
|
|
|
|
|
|
return res + ((--len, ((unsigned char)*str++)) & 0x3F); |
1519
|
3
|
50
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF0) { |
1520
|
3
|
|
|
|
|
|
char32_t res = (((unsigned char)*str++) & 0x0F) << 12; |
1521
|
3
|
50
|
|
|
|
|
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1522
|
3
|
|
|
|
|
|
res += ((--len, ((unsigned char)*str++)) & 0x3F) << 6; |
1523
|
3
|
50
|
|
|
|
|
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1524
|
3
|
|
|
|
|
|
return res + ((--len, ((unsigned char)*str++)) & 0x3F); |
1525
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF8) { |
1526
|
0
|
|
|
|
|
|
char32_t res = (((unsigned char)*str++) & 0x07) << 18; |
1527
|
0
|
0
|
|
|
|
|
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1528
|
0
|
|
|
|
|
|
res += ((--len, ((unsigned char)*str++)) & 0x3F) << 12; |
1529
|
0
|
0
|
|
|
|
|
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1530
|
0
|
|
|
|
|
|
res += ((--len, ((unsigned char)*str++)) & 0x3F) << 6; |
1531
|
0
|
0
|
|
|
|
|
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1532
|
0
|
|
|
|
|
|
return res + ((--len, ((unsigned char)*str++)) & 0x3F); |
1533
|
0
|
|
|
|
|
|
} else return ++str, REPLACEMENT_CHAR; |
1534
|
|
|
|
|
|
|
} |
1535
|
|
|
|
|
|
|
|
1536
|
|
|
|
|
|
|
char32_t utf8::first(const char* str) { |
1537
|
|
|
|
|
|
|
return decode(str); |
1538
|
|
|
|
|
|
|
} |
1539
|
|
|
|
|
|
|
|
1540
|
|
|
|
|
|
|
char32_t utf8::first(const char* str, size_t len) { |
1541
|
|
|
|
|
|
|
return decode(str, len); |
1542
|
|
|
|
|
|
|
} |
1543
|
|
|
|
|
|
|
|
1544
|
|
|
|
|
|
|
char32_t utf8::first(const std::string& str) { |
1545
|
|
|
|
|
|
|
return first(str.c_str()); |
1546
|
|
|
|
|
|
|
} |
1547
|
|
|
|
|
|
|
|
1548
|
|
|
|
|
|
|
void utf8::decode(const std::string& str, std::u32string& decoded) { |
1549
|
|
|
|
|
|
|
decode(str.c_str(), decoded); |
1550
|
|
|
|
|
|
|
} |
1551
|
|
|
|
|
|
|
|
1552
|
|
|
|
|
|
|
class utf8::string_decoder::iterator : public std::iterator { |
1553
|
|
|
|
|
|
|
public: |
1554
|
|
|
|
|
|
|
iterator(const char* str) : codepoint(0), next(str) { operator++(); } |
1555
|
|
|
|
|
|
|
iterator(const iterator& it) : codepoint(it.codepoint), next(it.next) {} |
1556
|
|
|
|
|
|
|
iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; } |
1557
|
|
|
|
|
|
|
iterator operator++(int) { iterator tmp(*this); operator++(); return tmp; } |
1558
|
|
|
|
|
|
|
bool operator==(const iterator& other) const { return next == other.next; } |
1559
|
|
|
|
|
|
|
bool operator!=(const iterator& other) const { return next != other.next; } |
1560
|
|
|
|
|
|
|
const char32_t& operator*() { return codepoint; } |
1561
|
|
|
|
|
|
|
private: |
1562
|
|
|
|
|
|
|
char32_t codepoint; |
1563
|
|
|
|
|
|
|
const char* next; |
1564
|
|
|
|
|
|
|
}; |
1565
|
|
|
|
|
|
|
|
1566
|
|
|
|
|
|
|
utf8::string_decoder::string_decoder(const char* str) : str(str) {} |
1567
|
|
|
|
|
|
|
|
1568
|
|
|
|
|
|
|
utf8::string_decoder::iterator utf8::string_decoder::begin() { |
1569
|
|
|
|
|
|
|
return iterator(str); |
1570
|
|
|
|
|
|
|
} |
1571
|
|
|
|
|
|
|
|
1572
|
|
|
|
|
|
|
utf8::string_decoder::iterator utf8::string_decoder::end() { |
1573
|
|
|
|
|
|
|
return iterator(nullptr); |
1574
|
|
|
|
|
|
|
} |
1575
|
|
|
|
|
|
|
|
1576
|
|
|
|
|
|
|
utf8::string_decoder utf8::decoder(const char* str) { |
1577
|
|
|
|
|
|
|
return string_decoder(str); |
1578
|
|
|
|
|
|
|
} |
1579
|
|
|
|
|
|
|
|
1580
|
|
|
|
|
|
|
utf8::string_decoder utf8::decoder(const std::string& str) { |
1581
|
|
|
|
|
|
|
return string_decoder(str.c_str()); |
1582
|
|
|
|
|
|
|
} |
1583
|
|
|
|
|
|
|
|
1584
|
|
|
|
|
|
|
class utf8::buffer_decoder::iterator : public std::iterator { |
1585
|
|
|
|
|
|
|
public: |
1586
|
|
|
|
|
|
|
iterator(const char* str, size_t len) : codepoint(0), next(str), len(len) { operator++(); } |
1587
|
|
|
|
|
|
|
iterator(const iterator& it) : codepoint(it.codepoint), next(it.next), len(it.len) {} |
1588
|
|
|
|
|
|
|
iterator& operator++() { if (!len) next = nullptr; if (next) codepoint = decode(next, len); return *this; } |
1589
|
|
|
|
|
|
|
iterator operator++(int) { iterator tmp(*this); operator++(); return tmp; } |
1590
|
|
|
|
|
|
|
bool operator==(const iterator& other) const { return next == other.next; } |
1591
|
|
|
|
|
|
|
bool operator!=(const iterator& other) const { return next != other.next; } |
1592
|
|
|
|
|
|
|
const char32_t& operator*() { return codepoint; } |
1593
|
|
|
|
|
|
|
private: |
1594
|
|
|
|
|
|
|
char32_t codepoint; |
1595
|
|
|
|
|
|
|
const char* next; |
1596
|
|
|
|
|
|
|
size_t len; |
1597
|
|
|
|
|
|
|
}; |
1598
|
|
|
|
|
|
|
|
1599
|
|
|
|
|
|
|
utf8::buffer_decoder::buffer_decoder(const char* str, size_t len) : str(str), len(len) {} |
1600
|
|
|
|
|
|
|
|
1601
|
|
|
|
|
|
|
utf8::buffer_decoder::iterator utf8::buffer_decoder::begin() { |
1602
|
|
|
|
|
|
|
return iterator(str, len); |
1603
|
|
|
|
|
|
|
} |
1604
|
|
|
|
|
|
|
|
1605
|
|
|
|
|
|
|
utf8::buffer_decoder::iterator utf8::buffer_decoder::end() { |
1606
|
|
|
|
|
|
|
return iterator(nullptr, 0); |
1607
|
|
|
|
|
|
|
} |
1608
|
|
|
|
|
|
|
|
1609
|
|
|
|
|
|
|
utf8::buffer_decoder utf8::decoder(const char* str, size_t len) { |
1610
|
|
|
|
|
|
|
return buffer_decoder(str, len); |
1611
|
|
|
|
|
|
|
} |
1612
|
|
|
|
|
|
|
|
1613
|
|
|
|
|
|
|
void utf8::append(char*& str, char32_t chr) { |
1614
|
|
|
|
|
|
|
if (chr < 0x80) *str++ = chr; |
1615
|
|
|
|
|
|
|
else if (chr < 0x800) { *str++ = 0xC0 + (chr >> 6); *str++ = 0x80 + (chr & 0x3F); } |
1616
|
|
|
|
|
|
|
else if (chr < 0x10000) { *str++ = 0xE0 + (chr >> 12); *str++ = 0x80 + ((chr >> 6) & 0x3F); *str++ = 0x80 + (chr & 0x3F); } |
1617
|
|
|
|
|
|
|
else if (chr < 0x200000) { *str++ = 0xF0 + (chr >> 18); *str++ = 0x80 + ((chr >> 12) & 0x3F); *str++ = 0x80 + ((chr >> 6) & 0x3F); *str++ = 0x80 + (chr & 0x3F); } |
1618
|
|
|
|
|
|
|
else *str++ = REPLACEMENT_CHAR; |
1619
|
|
|
|
|
|
|
} |
1620
|
|
|
|
|
|
|
|
1621
|
13
|
|
|
|
|
|
void utf8::append(std::string& str, char32_t chr) { |
1622
|
13
|
50
|
|
|
|
|
if (chr < 0x80) str += chr; |
1623
|
0
|
0
|
|
|
|
|
else if (chr < 0x800) { str += 0xC0 + (chr >> 6); str += 0x80 + (chr & 0x3F); } |
1624
|
0
|
0
|
|
|
|
|
else if (chr < 0x10000) { str += 0xE0 + (chr >> 12); str += 0x80 + ((chr >> 6) & 0x3F); str += 0x80 + (chr & 0x3F); } |
1625
|
0
|
0
|
|
|
|
|
else if (chr < 0x200000) { str += 0xF0 + (chr >> 18); str += 0x80 + ((chr >> 12) & 0x3F); str += 0x80 + ((chr >> 6) & 0x3F); str += 0x80 + (chr & 0x3F); } |
1626
|
|
|
|
|
|
|
else str += REPLACEMENT_CHAR; |
1627
|
13
|
|
|
|
|
|
} |
1628
|
|
|
|
|
|
|
|
1629
|
|
|
|
|
|
|
template void utf8::map(F f, const char* str, std::string& result) { |
1630
|
|
|
|
|
|
|
result.clear(); |
1631
|
|
|
|
|
|
|
|
1632
|
|
|
|
|
|
|
for (char32_t chr; (chr = decode(str)); ) |
1633
|
|
|
|
|
|
|
append(result, f(chr)); |
1634
|
|
|
|
|
|
|
} |
1635
|
|
|
|
|
|
|
|
1636
|
0
|
|
|
|
|
|
template void utf8::map(F f, const char* str, size_t len, std::string& result) { |
1637
|
|
|
|
|
|
|
result.clear(); |
1638
|
|
|
|
|
|
|
|
1639
|
0
|
0
|
|
|
|
|
while (len) |
1640
|
0
|
|
|
|
|
|
append(result, f(decode(str, len))); |
1641
|
0
|
|
|
|
|
|
} |
1642
|
|
|
|
|
|
|
|
1643
|
|
|
|
|
|
|
template void utf8::map(F f, const std::string& str, std::string& result) { |
1644
|
|
|
|
|
|
|
map(f, str.c_str(), result); |
1645
|
|
|
|
|
|
|
} |
1646
|
|
|
|
|
|
|
|
1647
|
|
|
|
|
|
|
} // namespace unilib |
1648
|
|
|
|
|
|
|
|
1649
|
|
|
|
|
|
|
///////// |
1650
|
|
|
|
|
|
|
// File: morpho/casing_variants.h |
1651
|
|
|
|
|
|
|
///////// |
1652
|
|
|
|
|
|
|
|
1653
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
1654
|
|
|
|
|
|
|
// |
1655
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
1656
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1657
|
|
|
|
|
|
|
// |
1658
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1659
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1660
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1661
|
|
|
|
|
|
|
|
1662
|
13
|
|
|
|
|
|
inline void generate_casing_variants(string_piece form, string& form_uclc, string& form_lc) { |
1663
|
|
|
|
|
|
|
using namespace unilib; |
1664
|
|
|
|
|
|
|
|
1665
|
|
|
|
|
|
|
// Detect uppercase+titlecase characters. |
1666
|
|
|
|
|
|
|
bool first_Lut = false; // first character is uppercase or titlecase |
1667
|
|
|
|
|
|
|
bool rest_has_Lut = false; // any character but first is uppercase or titlecase |
1668
|
|
|
|
|
|
|
{ |
1669
|
13
|
|
|
|
|
|
string_piece form_tmp = form; |
1670
|
26
|
|
|
|
|
|
first_Lut = unicode::category(utf8::decode(form_tmp.str, form_tmp.len)) & unicode::Lut; |
1671
|
47
|
100
|
|
|
|
|
while (form_tmp.len && !rest_has_Lut) |
|
|
50
|
|
|
|
|
|
1672
|
34
|
|
|
|
|
|
rest_has_Lut = unicode::category(utf8::decode(form_tmp.str, form_tmp.len)) & unicode::Lut; |
1673
|
|
|
|
|
|
|
} |
1674
|
|
|
|
|
|
|
|
1675
|
|
|
|
|
|
|
// Generate all casing variants if needed (they are different than given form). |
1676
|
|
|
|
|
|
|
// We only replace letters with their lowercase variants. |
1677
|
|
|
|
|
|
|
// - form_uclc: first uppercase, rest lowercase |
1678
|
|
|
|
|
|
|
// - form_lc: all lowercase |
1679
|
|
|
|
|
|
|
|
1680
|
13
|
100
|
|
|
|
|
if (first_Lut && !rest_has_Lut) { // common case allowing fast execution |
1681
|
2
|
|
|
|
|
|
form_lc.reserve(form.len); |
1682
|
2
|
|
|
|
|
|
string_piece form_tmp = form; |
1683
|
2
|
|
|
|
|
|
utf8::append(form_lc, unicode::lowercase(utf8::decode(form_tmp.str, form_tmp.len))); |
1684
|
2
|
|
|
|
|
|
form_lc.append(form_tmp.str, form_tmp.len); |
1685
|
11
|
50
|
|
|
|
|
} else if (!first_Lut && rest_has_Lut) { |
1686
|
0
|
|
|
|
|
|
form_lc.reserve(form.len); |
1687
|
0
|
|
|
|
|
|
utf8::map(unicode::lowercase, form.str, form.len, form_lc); |
1688
|
11
|
50
|
|
|
|
|
} else if (first_Lut && rest_has_Lut) { |
1689
|
0
|
|
|
|
|
|
form_lc.reserve(form.len); |
1690
|
0
|
|
|
|
|
|
form_uclc.reserve(form.len); |
1691
|
0
|
|
|
|
|
|
string_piece form_tmp = form; |
1692
|
0
|
|
|
|
|
|
char32_t first = utf8::decode(form_tmp.str, form_tmp.len); |
1693
|
0
|
|
|
|
|
|
utf8::append(form_lc, unicode::lowercase(first)); |
1694
|
0
|
|
|
|
|
|
utf8::append(form_uclc, first); |
1695
|
0
|
0
|
|
|
|
|
while (form_tmp.len) { |
1696
|
0
|
|
|
|
|
|
char32_t lowercase = unicode::lowercase(utf8::decode(form_tmp.str, form_tmp.len)); |
1697
|
0
|
|
|
|
|
|
utf8::append(form_lc, lowercase); |
1698
|
0
|
|
|
|
|
|
utf8::append(form_uclc, lowercase); |
1699
|
|
|
|
|
|
|
} |
1700
|
|
|
|
|
|
|
} |
1701
|
13
|
|
|
|
|
|
} |
1702
|
|
|
|
|
|
|
|
1703
|
|
|
|
|
|
|
///////// |
1704
|
|
|
|
|
|
|
// File: morpho/czech_lemma_addinfo.h |
1705
|
|
|
|
|
|
|
///////// |
1706
|
|
|
|
|
|
|
|
1707
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
1708
|
|
|
|
|
|
|
// |
1709
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
1710
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1711
|
|
|
|
|
|
|
// |
1712
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1713
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1714
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1715
|
|
|
|
|
|
|
|
1716
|
|
|
|
|
|
|
// Declarations |
1717
|
0
|
|
|
|
|
|
struct czech_lemma_addinfo { |
1718
|
|
|
|
|
|
|
inline static int raw_lemma_len(string_piece lemma); |
1719
|
|
|
|
|
|
|
inline static int lemma_id_len(string_piece lemma); |
1720
|
|
|
|
|
|
|
inline static string format(const unsigned char* addinfo, int addinfo_len); |
1721
|
|
|
|
|
|
|
inline static bool generatable(const unsigned char* addinfo, int addinfo_len); |
1722
|
|
|
|
|
|
|
|
1723
|
|
|
|
|
|
|
inline int parse(string_piece lemma, bool die_on_failure = false); |
1724
|
|
|
|
|
|
|
inline bool match_lemma_id(const unsigned char* other_addinfo, int other_addinfo_len); |
1725
|
|
|
|
|
|
|
|
1726
|
|
|
|
|
|
|
vector data; |
1727
|
|
|
|
|
|
|
}; |
1728
|
|
|
|
|
|
|
|
1729
|
|
|
|
|
|
|
// Definitions |
1730
|
0
|
|
|
|
|
|
int czech_lemma_addinfo::raw_lemma_len(string_piece lemma) { |
1731
|
|
|
|
|
|
|
// Lemma ends by a '-[0-9]', '`' or '_' on non-first position. |
1732
|
0
|
0
|
|
|
|
|
for (unsigned len = 1; len < lemma.len; len++) |
1733
|
0
|
0
|
|
|
|
|
if (lemma.str[len] == '`' || lemma.str[len] == '_' || |
|
|
0
|
|
|
|
|
|
1734
|
0
|
0
|
|
|
|
|
(lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9')) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1735
|
0
|
|
|
|
|
|
return len; |
1736
|
0
|
|
|
|
|
|
return lemma.len; |
1737
|
|
|
|
|
|
|
} |
1738
|
|
|
|
|
|
|
|
1739
|
0
|
|
|
|
|
|
int czech_lemma_addinfo::lemma_id_len(string_piece lemma) { |
1740
|
|
|
|
|
|
|
// Lemma ends by a '-[0-9]', '`' or '_' on non-first position. |
1741
|
0
|
0
|
|
|
|
|
for (unsigned len = 1; len < lemma.len; len++) { |
1742
|
0
|
0
|
|
|
|
|
if (lemma.str[len] == '`' || lemma.str[len] == '_') |
1743
|
0
|
|
|
|
|
|
return len; |
1744
|
0
|
0
|
|
|
|
|
if (lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1745
|
0
|
|
|
|
|
|
len += 2; |
1746
|
0
|
0
|
|
|
|
|
while (len < lemma.len && lemma.str[len] >= '0' && lemma.str[len] <= '9') len++; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1747
|
0
|
|
|
|
|
|
return len; |
1748
|
|
|
|
|
|
|
} |
1749
|
|
|
|
|
|
|
} |
1750
|
0
|
|
|
|
|
|
return lemma.len; |
1751
|
|
|
|
|
|
|
} |
1752
|
|
|
|
|
|
|
|
1753
|
0
|
|
|
|
|
|
string czech_lemma_addinfo::format(const unsigned char* addinfo, int addinfo_len) { |
1754
|
|
|
|
|
|
|
string res; |
1755
|
|
|
|
|
|
|
|
1756
|
0
|
0
|
|
|
|
|
if (addinfo_len) { |
1757
|
0
|
0
|
|
|
|
|
res.reserve(addinfo_len + 4); |
1758
|
0
|
0
|
|
|
|
|
if (addinfo[0] != 255) { |
1759
|
|
|
|
|
|
|
char num[5]; |
1760
|
0
|
|
|
|
|
|
snprintf(num, sizeof(num), "-%u", addinfo[0]); |
1761
|
|
|
|
|
|
|
res += num; |
1762
|
|
|
|
|
|
|
} |
1763
|
0
|
0
|
|
|
|
|
for (int i = 1; i < addinfo_len; i++) |
1764
|
0
|
|
|
|
|
|
res += addinfo[i]; |
1765
|
|
|
|
|
|
|
} |
1766
|
|
|
|
|
|
|
|
1767
|
0
|
|
|
|
|
|
return res; |
1768
|
|
|
|
|
|
|
} |
1769
|
|
|
|
|
|
|
|
1770
|
|
|
|
|
|
|
bool czech_lemma_addinfo::generatable(const unsigned char* addinfo, int addinfo_len) { |
1771
|
0
|
0
|
|
|
|
|
for (int i = 1; i + 2 < addinfo_len; i++) |
1772
|
0
|
0
|
|
|
|
|
if (addinfo[i] == '_' && addinfo[i+1] == ',' && addinfo[i+2] == 'x') |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1773
|
|
|
|
|
|
|
return false; |
1774
|
|
|
|
|
|
|
|
1775
|
|
|
|
|
|
|
return true; |
1776
|
|
|
|
|
|
|
} |
1777
|
|
|
|
|
|
|
|
1778
|
0
|
|
|
|
|
|
int czech_lemma_addinfo::parse(string_piece lemma, bool die_on_failure) { |
1779
|
|
|
|
|
|
|
data.clear(); |
1780
|
|
|
|
|
|
|
|
1781
|
0
|
|
|
|
|
|
const char* lemma_info = lemma.str + raw_lemma_len(lemma); |
1782
|
0
|
0
|
|
|
|
|
if (lemma_info < lemma.str + lemma.len) { |
1783
|
0
|
|
|
|
|
|
int lemma_num = 255; |
1784
|
|
|
|
|
|
|
const char* lemma_additional_info = lemma_info; |
1785
|
|
|
|
|
|
|
|
1786
|
0
|
0
|
|
|
|
|
if (*lemma_info == '-') { |
1787
|
0
|
|
|
|
|
|
lemma_num = 0; |
1788
|
0
|
|
|
|
|
|
for (lemma_additional_info = lemma_info + 1; |
1789
|
0
|
0
|
|
|
|
|
lemma_additional_info < lemma.str + lemma.len && (*lemma_additional_info >= '0' && *lemma_additional_info <= '9'); |
|
|
0
|
|
|
|
|
|
1790
|
|
|
|
|
|
|
lemma_additional_info++) |
1791
|
0
|
|
|
|
|
|
lemma_num = 10 * lemma_num + (*lemma_additional_info - '0'); |
1792
|
|
|
|
|
|
|
|
1793
|
0
|
0
|
|
|
|
|
if (lemma_additional_info == lemma_info + 1 || (lemma_additional_info < lemma.str + lemma.len && *lemma_additional_info != '`' && *lemma_additional_info != '_') || lemma_num >= 255) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1794
|
0
|
0
|
|
|
|
|
if (die_on_failure) |
1795
|
0
|
|
|
|
|
|
runtime_failure("Lemma number " << lemma_num << " in lemma " << lemma << " out of range!"); |
1796
|
|
|
|
|
|
|
else |
1797
|
0
|
|
|
|
|
|
lemma_num = 255; |
1798
|
|
|
|
|
|
|
} |
1799
|
|
|
|
|
|
|
} |
1800
|
0
|
|
|
|
|
|
data.emplace_back(lemma_num); |
1801
|
0
|
0
|
|
|
|
|
while (lemma_additional_info < lemma.str + lemma.len) |
1802
|
0
|
|
|
|
|
|
data.push_back(*(unsigned char*)lemma_additional_info++); |
1803
|
|
|
|
|
|
|
|
1804
|
0
|
0
|
|
|
|
|
if (data.size() > 255) { |
1805
|
0
|
0
|
|
|
|
|
if (die_on_failure) |
1806
|
0
|
|
|
|
|
|
runtime_failure("Too long lemma info " << lemma_info << " in lemma " << lemma << '!'); |
1807
|
|
|
|
|
|
|
else |
1808
|
0
|
|
|
|
|
|
data.resize(255); |
1809
|
|
|
|
|
|
|
} |
1810
|
|
|
|
|
|
|
} |
1811
|
|
|
|
|
|
|
|
1812
|
0
|
|
|
|
|
|
return lemma_info - lemma.str; |
1813
|
|
|
|
|
|
|
} |
1814
|
|
|
|
|
|
|
|
1815
|
|
|
|
|
|
|
bool czech_lemma_addinfo::match_lemma_id(const unsigned char* other_addinfo, int other_addinfo_len) { |
1816
|
0
|
0
|
|
|
|
|
if (data.empty()) return true; |
1817
|
0
|
0
|
|
|
|
|
if (data[0] != 255 && (!other_addinfo_len || other_addinfo[0] != data[0])) return false; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1818
|
|
|
|
|
|
|
return true; |
1819
|
|
|
|
|
|
|
} |
1820
|
|
|
|
|
|
|
|
1821
|
|
|
|
|
|
|
///////// |
1822
|
|
|
|
|
|
|
// File: morpho/tag_filter.h |
1823
|
|
|
|
|
|
|
///////// |
1824
|
|
|
|
|
|
|
|
1825
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
1826
|
|
|
|
|
|
|
// |
1827
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
1828
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1829
|
|
|
|
|
|
|
// |
1830
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1831
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1832
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1833
|
|
|
|
|
|
|
|
1834
|
|
|
|
|
|
|
// Declarations |
1835
|
4
|
|
|
|
|
|
class tag_filter { |
1836
|
|
|
|
|
|
|
public: |
1837
|
|
|
|
|
|
|
tag_filter(const char* filter = nullptr); |
1838
|
|
|
|
|
|
|
|
1839
|
|
|
|
|
|
|
inline bool matches(const char* tag) const; |
1840
|
|
|
|
|
|
|
|
1841
|
|
|
|
|
|
|
private: |
1842
|
|
|
|
|
|
|
struct char_filter { |
1843
|
|
|
|
|
|
|
char_filter(int pos, bool negate, int chars_offset, int chars_len) |
1844
|
5
|
|
|
|
|
|
: pos(pos), negate(negate), chars_offset(chars_offset), chars_len(chars_len) {} |
1845
|
|
|
|
|
|
|
|
1846
|
|
|
|
|
|
|
int pos; |
1847
|
|
|
|
|
|
|
bool negate; |
1848
|
|
|
|
|
|
|
int chars_offset, chars_len; |
1849
|
|
|
|
|
|
|
}; |
1850
|
|
|
|
|
|
|
|
1851
|
|
|
|
|
|
|
string wildcard; |
1852
|
|
|
|
|
|
|
std::vector filters; |
1853
|
|
|
|
|
|
|
}; |
1854
|
|
|
|
|
|
|
|
1855
|
|
|
|
|
|
|
// Definitions |
1856
|
40
|
|
|
|
|
|
inline bool tag_filter::matches(const char* tag) const { |
1857
|
40
|
100
|
|
|
|
|
if (filters.empty()) return true; |
1858
|
|
|
|
|
|
|
|
1859
|
|
|
|
|
|
|
int tag_pos = 0; |
1860
|
39
|
100
|
|
|
|
|
for (auto&& filter : filters) { |
1861
|
|
|
|
|
|
|
// Skip until next filter position. If the tag ends prematurely, accept. |
1862
|
66
|
100
|
|
|
|
|
while (tag_pos < filter.pos) |
1863
|
30
|
50
|
|
|
|
|
if (!tag[tag_pos++]) |
1864
|
|
|
|
|
|
|
return true; |
1865
|
36
|
50
|
|
|
|
|
if (!tag[tag_pos]) |
1866
|
|
|
|
|
|
|
return true; |
1867
|
|
|
|
|
|
|
|
1868
|
|
|
|
|
|
|
// We assume filter.chars_len >= 1. |
1869
|
36
|
|
|
|
|
|
bool matched = (wildcard[filter.chars_offset] == tag[tag_pos]) ^ filter.negate; |
1870
|
42
|
100
|
|
|
|
|
for (int i = 1; i < filter.chars_len && ((!matched) ^ filter.negate); i++) |
|
|
100
|
|
|
|
|
|
1871
|
12
|
|
|
|
|
|
matched = (wildcard[filter.chars_offset + i] == tag[tag_pos]) ^ filter.negate; |
1872
|
36
|
100
|
|
|
|
|
if (!matched) return false; |
1873
|
|
|
|
|
|
|
} |
1874
|
|
|
|
|
|
|
return true; |
1875
|
|
|
|
|
|
|
} |
1876
|
|
|
|
|
|
|
|
1877
|
|
|
|
|
|
|
///////// |
1878
|
|
|
|
|
|
|
// File: morpho/morpho_dictionary.h |
1879
|
|
|
|
|
|
|
///////// |
1880
|
|
|
|
|
|
|
|
1881
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
1882
|
|
|
|
|
|
|
// |
1883
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
1884
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
1885
|
|
|
|
|
|
|
// |
1886
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
1887
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
1888
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
1889
|
|
|
|
|
|
|
|
1890
|
|
|
|
|
|
|
// Declarations |
1891
|
|
|
|
|
|
|
template |
1892
|
0
|
|
|
|
|
|
class morpho_dictionary { |
1893
|
|
|
|
|
|
|
public: |
1894
|
|
|
|
|
|
|
void load(binary_decoder& data); |
1895
|
|
|
|
|
|
|
void analyze(string_piece form, vector& lemmas) const; |
1896
|
|
|
|
|
|
|
bool generate(string_piece lemma, const tag_filter& filter, vector& lemmas_forms) const; |
1897
|
|
|
|
|
|
|
private: |
1898
|
|
|
|
|
|
|
persistent_unordered_map lemmas, roots, suffixes; |
1899
|
|
|
|
|
|
|
|
1900
|
|
|
|
|
|
|
vector tags; |
1901
|
|
|
|
|
|
|
vector>>> classes; |
1902
|
|
|
|
|
|
|
}; |
1903
|
|
|
|
|
|
|
|
1904
|
|
|
|
|
|
|
// Definitions |
1905
|
|
|
|
|
|
|
template |
1906
|
2
|
|
|
|
|
|
void morpho_dictionary::load(binary_decoder& data) { |
1907
|
|
|
|
|
|
|
// Prepare lemmas and roots hashes |
1908
|
16
|
100
|
|
|
|
|
for (int i = data.next_1B(); i > 0; i--) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1909
|
14
|
|
|
|
|
|
lemmas.resize(data.next_4B()); |
1910
|
20
|
100
|
|
|
|
|
for (int i = data.next_1B(); i > 0; i--) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1911
|
18
|
|
|
|
|
|
roots.resize(data.next_4B()); |
1912
|
|
|
|
|
|
|
|
1913
|
|
|
|
|
|
|
// Perform two pass over the lemmas and roots data, filling the hashes. |
1914
|
|
|
|
|
|
|
|
1915
|
2
|
|
|
|
|
|
vector lemma(max(lemmas.max_length(), roots.max_length())); |
1916
|
2
|
50
|
|
|
|
|
vector root(max(lemmas.max_length(), roots.max_length())); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1917
|
|
|
|
|
|
|
unsigned data_position = data.tell(); |
1918
|
6
|
100
|
|
|
|
|
for (int pass = 1; pass <= 2; pass++) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1919
|
4
|
100
|
|
|
|
|
if (pass > 1) data.seek(data_position); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1920
|
|
|
|
|
|
|
|
1921
|
|
|
|
|
|
|
int lemma_len = 0; |
1922
|
|
|
|
|
|
|
int root_len = 0; |
1923
|
|
|
|
|
|
|
|
1924
|
12
|
50
|
|
|
|
|
for (int i = data.next_4B(); i > 0; i--) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1925
|
8
|
50
|
|
|
|
|
lemma_len -= data.next_1B(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1926
|
56
|
50
|
|
|
|
|
for (int i = data.next_1B(); i > 0; i--) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1927
|
48
|
50
|
|
|
|
|
lemma[lemma_len++] = data.next_1B(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1928
|
8
|
50
|
|
|
|
|
unsigned char lemma_info_len = data.next_1B(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1929
|
8
|
50
|
|
|
|
|
const char* lemma_info = lemma_info_len ? data.next(lemma_info_len) : nullptr; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1930
|
8
|
50
|
|
|
|
|
unsigned lemma_roots = data.next_1B(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1931
|
|
|
|
|
|
|
|
1932
|
|
|
|
|
|
|
unsigned char* lemma_data /* to keep compiler happy */ = nullptr; |
1933
|
|
|
|
|
|
|
unsigned lemma_offset /* to keep compiler happy */ = 0; |
1934
|
|
|
|
|
|
|
|
1935
|
8
|
100
|
|
|
|
|
if (pass == 1) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1936
|
4
|
|
|
|
|
|
lemmas.add(lemma.data(), lemma_len, 1 + lemma_info_len + 1 + lemma_roots * (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint16_t))); |
1937
|
|
|
|
|
|
|
} else /*if (pass == 2)*/ { |
1938
|
4
|
|
|
|
|
|
lemma_data = lemmas.fill(lemma.data(), lemma_len, 1 + lemma_info_len + 1 + lemma_roots * (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint16_t))); |
1939
|
8
|
|
|
|
|
|
lemma_offset = lemma_data - lemma_len - lemmas.data_start(lemma_len); |
1940
|
|
|
|
|
|
|
|
1941
|
4
|
|
|
|
|
|
*lemma_data++ = lemma_info_len; |
1942
|
4
|
50
|
|
|
|
|
if (lemma_info_len) small_memcpy(lemma_data, lemma_info, lemma_info_len), lemma_data += lemma_info_len; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1943
|
4
|
|
|
|
|
|
*lemma_data++ = lemma_roots; |
1944
|
|
|
|
|
|
|
} |
1945
|
|
|
|
|
|
|
|
1946
|
8
|
|
|
|
|
|
small_memcpy(root.data(), lemma.data(), lemma_len); root_len = lemma_len; |
1947
|
36
|
100
|
|
|
|
|
for (unsigned i = 0; i < lemma_roots; i++) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1948
|
|
|
|
|
|
|
enum { REMOVE_START = 1, REMOVE_END = 2, ADD_START = 4, ADD_END = 8 }; |
1949
|
28
|
50
|
|
|
|
|
int operations = data.next_1B(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1950
|
28
|
50
|
|
|
|
|
if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; } |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1951
|
28
|
100
|
|
|
|
|
if (operations & REMOVE_END) root_len -= data.next_1B(); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1952
|
28
|
50
|
|
|
|
|
if (operations & ADD_START) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1953
|
0
|
0
|
|
|
|
|
int from = root_len, to = from + data.next_1B(); while (from > 0) root[--to] = root[--from]; root_len += to; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1954
|
0
|
0
|
|
|
|
|
for (int i = 0; i < to; i++) root[i] = data.next_1B(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1955
|
|
|
|
|
|
|
} |
1956
|
28
|
50
|
|
|
|
|
if (operations & ADD_END) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1957
|
84
|
50
|
|
|
|
|
for (int len = data.next_1B(); len > 0; len--) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1958
|
56
|
50
|
|
|
|
|
root[root_len++] = data.next_1B(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1959
|
28
|
50
|
|
|
|
|
uint16_t clas = data.next_2B(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1960
|
|
|
|
|
|
|
|
1961
|
28
|
100
|
|
|
|
|
if (pass == 1) { // for each root |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1962
|
14
|
|
|
|
|
|
roots.add(root.data(), root_len, sizeof(uint16_t) + sizeof(uint32_t) + sizeof(uint8_t)); |
1963
|
|
|
|
|
|
|
} else /*if (pass == 2)*/ { |
1964
|
14
|
|
|
|
|
|
unsigned char* root_data = roots.fill(root.data(), root_len, sizeof(uint16_t) + sizeof(uint32_t) + sizeof(uint8_t)); |
1965
|
28
|
|
|
|
|
|
unsigned root_offset = root_data - root_len - roots.data_start(root_len); |
1966
|
|
|
|
|
|
|
|
1967
|
|
|
|
|
|
|
unaligned_store_inc(root_data, clas); |
1968
|
|
|
|
|
|
|
unaligned_store_inc(root_data, lemma_offset); |
1969
|
|
|
|
|
|
|
unaligned_store_inc(root_data, lemma_len); |
1970
|
14
|
50
|
|
|
|
|
assert(uint8_t(lemma_len) == lemma_len); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1971
|
|
|
|
|
|
|
|
1972
|
|
|
|
|
|
|
unaligned_store_inc(lemma_data, root_offset); |
1973
|
|
|
|
|
|
|
unaligned_store_inc(lemma_data, root_len); |
1974
|
|
|
|
|
|
|
unaligned_store_inc(lemma_data, clas); |
1975
|
14
|
50
|
|
|
|
|
assert(uint8_t(root_len) == root_len); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1976
|
|
|
|
|
|
|
} |
1977
|
|
|
|
|
|
|
} |
1978
|
|
|
|
|
|
|
} |
1979
|
|
|
|
|
|
|
|
1980
|
4
|
100
|
|
|
|
|
if (pass == 1) { // after the whole pass |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1981
|
2
|
50
|
|
|
|
|
lemmas.done_adding(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1982
|
2
|
50
|
|
|
|
|
roots.done_adding(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1983
|
|
|
|
|
|
|
} else /*if (pass == 2)*/ { |
1984
|
2
|
|
|
|
|
|
lemmas.done_filling(); |
1985
|
2
|
|
|
|
|
|
roots.done_filling(); |
1986
|
|
|
|
|
|
|
} |
1987
|
|
|
|
|
|
|
} |
1988
|
|
|
|
|
|
|
|
1989
|
|
|
|
|
|
|
// Load tags |
1990
|
2
|
50
|
|
|
|
|
tags.resize(data.next_2B()); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1991
|
42
|
100
|
|
|
|
|
for (auto&& tag : tags) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1992
|
40
|
50
|
|
|
|
|
tag.resize(data.next_1B()); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1993
|
160
|
100
|
|
|
|
|
for (unsigned i = 0; i < tag.size(); i++) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1994
|
120
|
50
|
|
|
|
|
tag[i] = data.next_1B(); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1995
|
|
|
|
|
|
|
} |
1996
|
|
|
|
|
|
|
|
1997
|
|
|
|
|
|
|
// Load suffixes |
1998
|
2
|
50
|
|
|
|
|
suffixes.load(data); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1999
|
|
|
|
|
|
|
|
2000
|
|
|
|
|
|
|
// Fill classes from suffixes |
2001
|
28
|
50
|
|
|
|
|
suffixes.iter_all([this](const char* suffix, int len, pointer_decoder& data) mutable { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2002
|
|
|
|
|
|
|
unsigned classes_len = data.next_2B(); |
2003
|
|
|
|
|
|
|
const uint16_t* classes_ptr = data.next(classes_len); |
2004
|
|
|
|
|
|
|
const uint16_t* indices_ptr = data.next(classes_len); |
2005
|
|
|
|
|
|
|
const uint16_t* tags_ptr = data.next(data.next_2B()); |
2006
|
|
|
|
|
|
|
|
2007
|
26
|
|
|
|
|
|
string suffix_str(suffix, len); |
2008
|
56
|
100
|
|
|
|
|
for (unsigned i = 0; i < classes_len; i++) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2009
|
30
|
|
|
|
|
|
auto classes_ptr_i = unaligned_load(classes_ptr + i); |
2010
|
30
|
100
|
|
|
|
|
if (classes_ptr_i >= classes.size()) classes.resize(classes_ptr_i + 1); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2011
|
30
|
50
|
|
|
|
|
classes[classes_ptr_i].emplace_back(suffix_str, vector()); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2012
|
70
|
100
|
|
|
|
|
for (const uint16_t* ptr = tags_ptr + unaligned_load(indices_ptr + i), |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2013
|
30
|
|
|
|
|
|
* end = tags_ptr + unaligned_load(indices_ptr + i + 1); |
2014
|
|
|
|
|
|
|
ptr < end; ptr++) |
2015
|
40
|
50
|
|
|
|
|
classes[classes_ptr_i].back().second.emplace_back(unaligned_load(ptr)); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2016
|
|
|
|
|
|
|
} |
2017
|
26
|
|
|
|
|
|
}); |
2018
|
2
|
|
|
|
|
|
} |
2019
|
|
|
|
|
|
|
|
2020
|
|
|
|
|
|
|
template |
2021
|
15
|
|
|
|
|
|
void morpho_dictionary::analyze(string_piece form, vector& lemmas) const { |
2022
|
|
|
|
|
|
|
int max_suffix_len = suffixes.max_length(); |
2023
|
|
|
|
|
|
|
|
2024
|
|
|
|
|
|
|
uint16_t* suff_stack[16]; vector suff_heap; |
2025
|
15
|
50
|
|
|
|
|
uint16_t** suff = max_suffix_len <= 16 ? suff_stack : (suff_heap.resize(max_suffix_len), suff_heap.data()); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2026
|
|
|
|
|
|
|
int suff_len = 0; |
2027
|
36
|
50
|
|
|
|
|
for (int i = form.len; i >= 0 && suff_len < max_suffix_len; i--, suff_len++) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2028
|
36
|
|
|
|
|
|
suff[suff_len] = (uint16_t*) suffixes.at(form.str + i, suff_len, [](pointer_decoder& data) { |
2029
|
0
|
|
|
|
|
|
data.next(2 * data.next_2B()); |
2030
|
|
|
|
|
|
|
data.next(data.next_2B()); |
2031
|
0
|
|
|
|
|
|
}); |
2032
|
36
|
|
|
|
|
|
if (!suff[suff_len]) break; |
2033
|
|
|
|
|
|
|
} |
2034
|
|
|
|
|
|
|
|
2035
|
36
|
100
|
|
|
|
|
for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++) |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2036
|
21
|
50
|
|
|
|
|
if (unaligned_load(suff[suff_len])) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2037
|
21
|
|
|
|
|
|
unsigned suff_classes = unaligned_load(suff[suff_len]); |
2038
|
21
|
|
|
|
|
|
uint16_t* suff_data = suff[suff_len] + 1; |
2039
|
|
|
|
|
|
|
|
2040
|
41
|
50
|
|
|
|
|
roots.iter(form.str, root_len, [&](const char* root, pointer_decoder& root_data) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2041
|
|
|
|
|
|
|
uint16_t root_class = root_data.next_2B(); |
2042
|
|
|
|
|
|
|
unsigned lemma_offset = root_data.next_4B(); |
2043
|
|
|
|
|
|
|
unsigned lemma_len = root_data.next_1B(); |
2044
|
|
|
|
|
|
|
|
2045
|
40
|
100
|
|
|
|
|
if (small_memeq(form.str, root, root_len)) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2046
|
11
|
|
|
|
|
|
uint16_t* suffix_class_ptr = unaligned_lower_bound(suff_data, suff_classes, root_class); |
2047
|
6
|
50
|
|
|
|
|
if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2048
|
29
|
|
|
|
|
|
const unsigned char* lemma_data = this->lemmas.data_start(lemma_len) + lemma_offset; |
2049
|
|
|
|
|
|
|
string lemma((const char*)lemma_data, lemma_len); |
2050
|
6
|
50
|
|
|
|
|
if (lemma_data[lemma_len]) lemma += LemmaAddinfo::format(lemma_data + lemma_len + 1, lemma_data[lemma_len]); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2051
|
|
|
|
|
|
|
|
2052
|
12
|
|
|
|
|
|
uint16_t* suff_tag_indices = suff_data + suff_classes; |
2053
|
6
|
|
|
|
|
|
uint16_t* suff_tags = suff_tag_indices + suff_classes + 1; |
2054
|
23
|
100
|
|
|
|
|
for (unsigned i = unaligned_load(suff_tag_indices + (suffix_class_ptr - suff_data)); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2055
|
23
|
|
|
|
|
|
i < unaligned_load(suff_tag_indices + (suffix_class_ptr - suff_data) + 1); i++) |
2056
|
17
|
50
|
|
|
|
|
lemmas.emplace_back(lemma, tags[unaligned_load(suff_tags + i)]); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2057
|
|
|
|
|
|
|
} |
2058
|
|
|
|
|
|
|
} |
2059
|
20
|
|
|
|
|
|
}); |
2060
|
|
|
|
|
|
|
} |
2061
|
15
|
|
|
|
|
|
} |
2062
|
|
|
|
|
|
|
|
2063
|
|
|
|
|
|
|
template |
2064
|
4
|
|
|
|
|
|
bool morpho_dictionary::generate(string_piece lemma, const tag_filter& filter, vector& lemmas_forms) const { |
2065
|
|
|
|
|
|
|
LemmaAddinfo addinfo; |
2066
|
0
|
0
|
|
|
|
|
int raw_lemma_len = addinfo.parse(lemma); |
|
|
0
|
|
|
|
|
|
2067
|
4
|
|
|
|
|
|
bool matched_lemma = false; |
2068
|
|
|
|
|
|
|
|
2069
|
8
|
50
|
|
|
|
|
lemmas.iter(lemma.str, raw_lemma_len, [&](const char* lemma_str, pointer_decoder& data) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2070
|
|
|
|
|
|
|
unsigned lemma_info_len = data.next_1B(); |
2071
|
|
|
|
|
|
|
const auto* lemma_info = data.next(lemma_info_len); |
2072
|
|
|
|
|
|
|
unsigned lemma_roots_len = data.next_1B(); |
2073
|
8
|
|
|
|
|
|
auto* lemma_roots_ptr = data.next(lemma_roots_len * (sizeof(uint32_t) + sizeof(uint8_t) + sizeof(uint16_t))); |
2074
|
|
|
|
|
|
|
|
2075
|
8
|
50
|
|
|
|
|
if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) { |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2076
|
4
|
|
|
|
|
|
matched_lemma = true; |
2077
|
|
|
|
|
|
|
|
2078
|
|
|
|
|
|
|
vector* forms = nullptr; |
2079
|
|
|
|
|
|
|
pointer_decoder lemma_roots(lemma_roots_ptr); |
2080
|
18
|
100
|
|
|
|
|
for (unsigned i = 0; i < lemma_roots_len; i++) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2081
|
|
|
|
|
|
|
unsigned root_offset = lemma_roots.next_4B(); |
2082
|
|
|
|
|
|
|
unsigned root_len = lemma_roots.next_1B(); |
2083
|
|
|
|
|
|
|
unsigned clas = lemma_roots.next_2B(); |
2084
|
|
|
|
|
|
|
|
2085
|
91
|
|
|
|
|
|
const unsigned char* root_data = roots.data_start(root_len) + root_offset; |
2086
|
44
|
100
|
|
|
|
|
for (auto&& suffix : classes[clas]) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2087
|
|
|
|
|
|
|
string root_with_suffix; |
2088
|
70
|
100
|
|
|
|
|
for (auto&& tag : suffix.second) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2089
|
40
|
100
|
|
|
|
|
if (filter.matches(tags[tag].c_str())) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2090
|
23
|
100
|
|
|
|
|
if (!forms) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2091
|
4
|
50
|
|
|
|
|
lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len)); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2092
|
4
|
|
|
|
|
|
forms = &lemmas_forms.back().forms; |
2093
|
|
|
|
|
|
|
} |
2094
|
|
|
|
|
|
|
|
2095
|
23
|
100
|
|
|
|
|
if (root_with_suffix.empty() && root_len + suffix.first.size()) { |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2096
|
18
|
50
|
|
|
|
|
root_with_suffix.reserve(root_len + suffix.first.size()); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2097
|
|
|
|
|
|
|
root_with_suffix.assign((const char*)root_data, root_len); |
2098
|
|
|
|
|
|
|
root_with_suffix.append(suffix.first); |
2099
|
|
|
|
|
|
|
} |
2100
|
|
|
|
|
|
|
|
2101
|
23
|
50
|
|
|
|
|
forms->emplace_back(root_with_suffix, tags[tag]); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2102
|
|
|
|
|
|
|
} |
2103
|
|
|
|
|
|
|
} |
2104
|
|
|
|
|
|
|
} |
2105
|
|
|
|
|
|
|
} |
2106
|
4
|
|
|
|
|
|
}); |
2107
|
|
|
|
|
|
|
|
2108
|
4
|
|
|
|
|
|
return matched_lemma; |
2109
|
|
|
|
|
|
|
} |
2110
|
|
|
|
|
|
|
|
2111
|
|
|
|
|
|
|
///////// |
2112
|
|
|
|
|
|
|
// File: morpho/morpho_prefix_guesser.h |
2113
|
|
|
|
|
|
|
///////// |
2114
|
|
|
|
|
|
|
|
2115
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
2116
|
|
|
|
|
|
|
// |
2117
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
2118
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
2119
|
|
|
|
|
|
|
// |
2120
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
2121
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
2122
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
2123
|
|
|
|
|
|
|
|
2124
|
|
|
|
|
|
|
// Declarations |
2125
|
|
|
|
|
|
|
template |
2126
|
0
|
|
|
|
|
|
class morpho_prefix_guesser { |
2127
|
|
|
|
|
|
|
public: |
2128
|
0
|
|
|
|
|
|
morpho_prefix_guesser(const MorphoDictionary& dictionary) : dictionary(dictionary) {} |
2129
|
|
|
|
|
|
|
|
2130
|
|
|
|
|
|
|
void load(binary_decoder& data); |
2131
|
|
|
|
|
|
|
void analyze(string_piece form, vector& lemmas); |
2132
|
|
|
|
|
|
|
bool generate(string_piece lemma, const tag_filter& filter, vector& lemmas_forms); |
2133
|
|
|
|
|
|
|
|
2134
|
|
|
|
|
|
|
private: |
2135
|
|
|
|
|
|
|
const MorphoDictionary& dictionary; |
2136
|
|
|
|
|
|
|
vector tag_filters; |
2137
|
|
|
|
|
|
|
persistent_unordered_map prefixes_initial, prefixes_middle; |
2138
|
|
|
|
|
|
|
}; |
2139
|
|
|
|
|
|
|
|
2140
|
|
|
|
|
|
|
// Definitions |
2141
|
|
|
|
|
|
|
template |
2142
|
0
|
|
|
|
|
|
void morpho_prefix_guesser::load(binary_decoder& data) { |
2143
|
|
|
|
|
|
|
// Load and construct tag filters |
2144
|
0
|
0
|
|
|
|
|
for (unsigned tag_filters_len = data.next_1B(); tag_filters_len; tag_filters_len--) { |
2145
|
0
|
|
|
|
|
|
unsigned tag_filter_len = data.next_1B(); |
2146
|
0
|
|
|
|
|
|
string tag_filter(data.next(tag_filter_len), tag_filter_len); |
2147
|
|
|
|
|
|
|
|
2148
|
0
|
0
|
|
|
|
|
tag_filters.emplace_back(tag_filter.c_str()); |
2149
|
|
|
|
|
|
|
} |
2150
|
|
|
|
|
|
|
|
2151
|
|
|
|
|
|
|
// Load prefixes |
2152
|
0
|
|
|
|
|
|
prefixes_initial.load(data); |
2153
|
0
|
|
|
|
|
|
prefixes_middle.load(data); |
2154
|
0
|
|
|
|
|
|
} |
2155
|
|
|
|
|
|
|
|
2156
|
|
|
|
|
|
|
// Analyze can return non-unique lemma-tag pairs. |
2157
|
|
|
|
|
|
|
template |
2158
|
0
|
|
|
|
|
|
void morpho_prefix_guesser::analyze(string_piece form, vector& lemmas) { |
2159
|
0
|
0
|
|
|
|
|
if (!form.len) return; |
2160
|
|
|
|
|
|
|
|
2161
|
|
|
|
|
|
|
vector form_tmp; |
2162
|
|
|
|
|
|
|
vector middle_masks; |
2163
|
0
|
0
|
|
|
|
|
middle_masks.reserve(form.len); |
2164
|
|
|
|
|
|
|
|
2165
|
0
|
0
|
|
|
|
|
for (unsigned initial = 0; initial < form.len; initial++) { |
2166
|
|
|
|
|
|
|
// Match the initial prefix. |
2167
|
0
|
|
|
|
|
|
unsigned initial_mask = (1<
|
2168
|
0
|
0
|
|
|
|
|
if (initial) { |
2169
|
0
|
|
|
|
|
|
auto found = prefixes_initial.at_typed(form.str, initial); |
2170
|
0
|
0
|
|
|
|
|
if (!found) break; |
2171
|
0
|
|
|
|
|
|
initial_mask = unaligned_load(found); |
2172
|
|
|
|
|
|
|
} |
2173
|
|
|
|
|
|
|
|
2174
|
|
|
|
|
|
|
// If we have found an initial prefix (including the empty one), match middle prefixes. |
2175
|
0
|
0
|
|
|
|
|
if (initial_mask) { |
2176
|
0
|
0
|
|
|
|
|
middle_masks.resize(initial); |
2177
|
0
|
0
|
|
|
|
|
middle_masks.emplace_back(initial_mask); |
2178
|
0
|
0
|
|
|
|
|
for (unsigned middle = initial; middle < middle_masks.size(); middle++) { |
2179
|
0
|
0
|
|
|
|
|
if (!middle_masks[middle]) continue; |
2180
|
|
|
|
|
|
|
// Try matching middle prefixes from current index. |
2181
|
0
|
0
|
|
|
|
|
for (unsigned i = middle + 1; i < form.len; i++) { |
2182
|
0
|
|
|
|
|
|
auto found = prefixes_middle.at_typed(form.str + middle, i - middle); |
2183
|
0
|
0
|
|
|
|
|
if (!found) break; |
2184
|
0
|
0
|
|
|
|
|
if (unaligned_load(found)) { |
2185
|
0
|
0
|
|
|
|
|
if (i + 1 > middle_masks.size()) middle_masks.resize(i + 1); |
|
|
0
|
|
|
|
|
|
2186
|
0
|
|
|
|
|
|
middle_masks[i] |= middle_masks[middle] & unaligned_load(found); |
2187
|
|
|
|
|
|
|
} |
2188
|
|
|
|
|
|
|
} |
2189
|
|
|
|
|
|
|
|
2190
|
|
|
|
|
|
|
// Try matching word forms if at least one middle prefix was found. |
2191
|
0
|
0
|
|
|
|
|
if (middle > initial && middle < form.len ) { |
|
|
0
|
|
|
|
|
|
2192
|
0
|
0
|
|
|
|
|
if (initial) { |
2193
|
0
|
0
|
|
|
|
|
if (form_tmp.empty()) form_tmp.assign(form.str, form.str + form.len); |
2194
|
0
|
|
|
|
|
|
small_memcpy(form_tmp.data() + middle - initial, form.str, initial); |
2195
|
|
|
|
|
|
|
} |
2196
|
0
|
|
|
|
|
|
unsigned lemmas_ori_size = lemmas.size(); |
2197
|
0
|
0
|
|
|
|
|
dictionary.analyze(string_piece((initial ? form_tmp.data() : form.str) + middle - initial, form.len - middle + initial), lemmas); |
|
|
0
|
|
|
|
|
|
2198
|
|
|
|
|
|
|
unsigned lemmas_new_size = lemmas_ori_size; |
2199
|
0
|
0
|
|
|
|
|
for (unsigned i = lemmas_ori_size; i < lemmas.size(); i++) { |
2200
|
0
|
0
|
|
|
|
|
for (unsigned filter = 0; filter < tag_filters.size(); filter++) |
2201
|
0
|
0
|
|
|
|
|
if ((middle_masks[middle] & (1<
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2202
|
0
|
0
|
|
|
|
|
if (i == lemmas_new_size) { |
2203
|
0
|
|
|
|
|
|
lemmas[lemmas_new_size].lemma.insert(0, form.str + initial, middle - initial); |
2204
|
|
|
|
|
|
|
} else { |
2205
|
0
|
0
|
|
|
|
|
lemmas[lemmas_new_size].lemma.reserve(lemmas[i].lemma.size() + middle - initial); |
2206
|
0
|
|
|
|
|
|
lemmas[lemmas_new_size].lemma.assign(form.str + initial, middle - initial); |
2207
|
0
|
|
|
|
|
|
lemmas[lemmas_new_size].lemma.append(lemmas[i].lemma); |
2208
|
0
|
|
|
|
|
|
lemmas[lemmas_new_size].tag = lemmas[i].tag; |
2209
|
|
|
|
|
|
|
} |
2210
|
0
|
|
|
|
|
|
lemmas_new_size++; |
2211
|
0
|
|
|
|
|
|
break; |
2212
|
|
|
|
|
|
|
} |
2213
|
|
|
|
|
|
|
} |
2214
|
0
|
0
|
|
|
|
|
if (lemmas_new_size < lemmas.size()) lemmas.erase(lemmas.begin() + lemmas_new_size, lemmas.end()); |
2215
|
|
|
|
|
|
|
} |
2216
|
|
|
|
|
|
|
} |
2217
|
|
|
|
|
|
|
} |
2218
|
|
|
|
|
|
|
} |
2219
|
|
|
|
|
|
|
} |
2220
|
|
|
|
|
|
|
|
2221
|
|
|
|
|
|
|
template |
2222
|
|
|
|
|
|
|
bool morpho_prefix_guesser::generate(string_piece /*lemma*/, const tag_filter& /*filter*/, vector& /*lemmas_forms*/) { |
2223
|
|
|
|
|
|
|
// Not implemented yet. Is it actually needed? |
2224
|
|
|
|
|
|
|
return false; |
2225
|
|
|
|
|
|
|
} |
2226
|
|
|
|
|
|
|
|
2227
|
|
|
|
|
|
|
///////// |
2228
|
|
|
|
|
|
|
// File: morpho/morpho_statistical_guesser.h |
2229
|
|
|
|
|
|
|
///////// |
2230
|
|
|
|
|
|
|
|
2231
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
2232
|
|
|
|
|
|
|
// |
2233
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
2234
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
2235
|
|
|
|
|
|
|
// |
2236
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
2237
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
2238
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
2239
|
|
|
|
|
|
|
|
2240
|
0
|
|
|
|
|
|
class morpho_statistical_guesser { |
2241
|
|
|
|
|
|
|
public: |
2242
|
|
|
|
|
|
|
void load(binary_decoder& data); |
2243
|
|
|
|
|
|
|
typedef vector used_rules; |
2244
|
|
|
|
|
|
|
void analyze(string_piece form, vector& lemmas, used_rules* used); |
2245
|
|
|
|
|
|
|
|
2246
|
|
|
|
|
|
|
private: |
2247
|
|
|
|
|
|
|
vector tags; |
2248
|
|
|
|
|
|
|
unsigned default_tag; |
2249
|
|
|
|
|
|
|
persistent_unordered_map rules; |
2250
|
|
|
|
|
|
|
}; |
2251
|
|
|
|
|
|
|
|
2252
|
|
|
|
|
|
|
///////// |
2253
|
|
|
|
|
|
|
// File: tokenizer/unicode_tokenizer.h |
2254
|
|
|
|
|
|
|
///////// |
2255
|
|
|
|
|
|
|
|
2256
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
2257
|
|
|
|
|
|
|
// |
2258
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
2259
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
2260
|
|
|
|
|
|
|
// |
2261
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
2262
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
2263
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
2264
|
|
|
|
|
|
|
|
2265
|
3
|
|
|
|
|
|
class unicode_tokenizer : public tokenizer { |
2266
|
|
|
|
|
|
|
public: |
2267
|
|
|
|
|
|
|
enum { URL_EMAIL_LATEST = 2 }; |
2268
|
|
|
|
|
|
|
unicode_tokenizer(unsigned url_email_tokenizer); |
2269
|
|
|
|
|
|
|
|
2270
|
|
|
|
|
|
|
virtual void set_text(string_piece text, bool make_copy = false) override; |
2271
|
|
|
|
|
|
|
virtual bool next_sentence(vector* forms, vector* tokens) override; |
2272
|
|
|
|
|
|
|
|
2273
|
|
|
|
|
|
|
virtual bool next_sentence(vector& tokens) = 0; |
2274
|
|
|
|
|
|
|
|
2275
|
|
|
|
|
|
|
protected: |
2276
|
|
|
|
|
|
|
struct char_info { |
2277
|
|
|
|
|
|
|
char32_t chr; |
2278
|
|
|
|
|
|
|
unilib::unicode::category_t cat; |
2279
|
|
|
|
|
|
|
const char* str; |
2280
|
|
|
|
|
|
|
|
2281
|
136
|
|
|
|
|
|
char_info(char32_t chr, const char* str) : chr(chr), cat(unilib::unicode::category(chr)), str(str) {} |
2282
|
|
|
|
|
|
|
}; |
2283
|
|
|
|
|
|
|
vector chars; |
2284
|
|
|
|
|
|
|
size_t current; |
2285
|
|
|
|
|
|
|
|
2286
|
|
|
|
|
|
|
bool tokenize_url_email(vector& tokens); |
2287
|
|
|
|
|
|
|
bool emergency_sentence_split(const vector& tokens); |
2288
|
|
|
|
|
|
|
bool is_eos(const vector& tokens, char32_t eos_chr, const unordered_set* abbreviations); |
2289
|
|
|
|
|
|
|
|
2290
|
|
|
|
|
|
|
private: |
2291
|
|
|
|
|
|
|
unsigned url_email_tokenizer; |
2292
|
|
|
|
|
|
|
string text_buffer; |
2293
|
|
|
|
|
|
|
vector tokens_buffer; |
2294
|
|
|
|
|
|
|
string eos_buffer; |
2295
|
|
|
|
|
|
|
}; |
2296
|
|
|
|
|
|
|
|
2297
|
|
|
|
|
|
|
///////// |
2298
|
|
|
|
|
|
|
// File: tokenizer/ragel_tokenizer.h |
2299
|
|
|
|
|
|
|
///////// |
2300
|
|
|
|
|
|
|
|
2301
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
2302
|
|
|
|
|
|
|
// |
2303
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
2304
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
2305
|
|
|
|
|
|
|
// |
2306
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
2307
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
2308
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
2309
|
|
|
|
|
|
|
|
2310
|
3
|
|
|
|
|
|
class ragel_tokenizer : public unicode_tokenizer { |
2311
|
|
|
|
|
|
|
public: |
2312
|
|
|
|
|
|
|
ragel_tokenizer(unsigned url_email_tokenizer); |
2313
|
|
|
|
|
|
|
|
2314
|
|
|
|
|
|
|
protected: |
2315
|
|
|
|
|
|
|
static inline uint8_t ragel_char(const char_info& chr); |
2316
|
|
|
|
|
|
|
|
2317
|
|
|
|
|
|
|
private: |
2318
|
|
|
|
|
|
|
static void initialize_ragel_map(); |
2319
|
|
|
|
|
|
|
static vector ragel_map; |
2320
|
|
|
|
|
|
|
static atomic_flag ragel_map_flag; |
2321
|
|
|
|
|
|
|
static void ragel_map_add(char32_t chr, uint8_t mapping); |
2322
|
|
|
|
|
|
|
|
2323
|
|
|
|
|
|
|
friend class unicode_tokenizer; |
2324
|
|
|
|
|
|
|
static bool ragel_url_email(unsigned version, const vector& chars, size_t& current_char, vector& tokens); |
2325
|
|
|
|
|
|
|
}; |
2326
|
|
|
|
|
|
|
|
2327
|
|
|
|
|
|
|
uint8_t ragel_tokenizer::ragel_char(const char_info& chr) { |
2328
|
322
|
50
|
|
|
|
|
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2329
|
|
|
|
|
|
|
} |
2330
|
|
|
|
|
|
|
|
2331
|
|
|
|
|
|
|
///////// |
2332
|
|
|
|
|
|
|
// File: tokenizer/czech_tokenizer.h |
2333
|
|
|
|
|
|
|
///////// |
2334
|
|
|
|
|
|
|
|
2335
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
2336
|
|
|
|
|
|
|
// |
2337
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
2338
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
2339
|
|
|
|
|
|
|
// |
2340
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
2341
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
2342
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
2343
|
|
|
|
|
|
|
|
2344
|
3
|
|
|
|
|
|
class czech_tokenizer : public ragel_tokenizer { |
2345
|
|
|
|
|
|
|
public: |
2346
|
|
|
|
|
|
|
enum tokenizer_language { CZECH = 0, SLOVAK = 1 }; |
2347
|
|
|
|
|
|
|
enum { LATEST = 2 }; |
2348
|
|
|
|
|
|
|
czech_tokenizer(tokenizer_language language, unsigned version, const morpho* m = nullptr); |
2349
|
|
|
|
|
|
|
|
2350
|
|
|
|
|
|
|
virtual bool next_sentence(vector& tokens) override; |
2351
|
|
|
|
|
|
|
|
2352
|
|
|
|
|
|
|
private: |
2353
|
|
|
|
|
|
|
const morpho* m; |
2354
|
|
|
|
|
|
|
const unordered_set* abbreviations; |
2355
|
|
|
|
|
|
|
vector lemmas; |
2356
|
|
|
|
|
|
|
|
2357
|
|
|
|
|
|
|
void merge_hyphenated(vector& tokens); |
2358
|
|
|
|
|
|
|
|
2359
|
|
|
|
|
|
|
static const unordered_set abbreviations_czech; |
2360
|
|
|
|
|
|
|
static const unordered_set abbreviations_slovak; |
2361
|
|
|
|
|
|
|
}; |
2362
|
|
|
|
|
|
|
|
2363
|
|
|
|
|
|
|
///////// |
2364
|
|
|
|
|
|
|
// File: morpho/czech_morpho.h |
2365
|
|
|
|
|
|
|
///////// |
2366
|
|
|
|
|
|
|
|
2367
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
2368
|
|
|
|
|
|
|
// |
2369
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
2370
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
2371
|
|
|
|
|
|
|
// |
2372
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
2373
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
2374
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
2375
|
|
|
|
|
|
|
|
2376
|
0
|
|
|
|
|
|
class czech_morpho : public morpho { |
2377
|
|
|
|
|
|
|
public: |
2378
|
|
|
|
|
|
|
using morpho_language = czech_tokenizer::tokenizer_language; |
2379
|
|
|
|
|
|
|
|
2380
|
0
|
0
|
|
|
|
|
czech_morpho(morpho_language language, unsigned version) : language(language), version(version) {} |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2381
|
|
|
|
|
|
|
|
2382
|
|
|
|
|
|
|
virtual int analyze(string_piece form, morpho::guesser_mode guesser, vector& lemmas) const override; |
2383
|
|
|
|
|
|
|
virtual int generate(string_piece lemma, const char* tag_wildcard, guesser_mode guesser, vector& forms) const override; |
2384
|
|
|
|
|
|
|
virtual int raw_lemma_len(string_piece lemma) const override; |
2385
|
|
|
|
|
|
|
virtual int lemma_id_len(string_piece lemma) const override; |
2386
|
|
|
|
|
|
|
virtual int raw_form_len(string_piece form) const override; |
2387
|
|
|
|
|
|
|
virtual tokenizer* new_tokenizer() const override; |
2388
|
|
|
|
|
|
|
|
2389
|
|
|
|
|
|
|
bool load(istream& is); |
2390
|
|
|
|
|
|
|
private: |
2391
|
|
|
|
|
|
|
inline void analyze_special(string_piece form, vector& lemmas) const; |
2392
|
|
|
|
|
|
|
|
2393
|
|
|
|
|
|
|
morpho_language language; |
2394
|
|
|
|
|
|
|
unsigned version; |
2395
|
|
|
|
|
|
|
morpho_dictionary dictionary; |
2396
|
|
|
|
|
|
|
unique_ptr> prefix_guesser; |
2397
|
|
|
|
|
|
|
unique_ptr statistical_guesser; |
2398
|
|
|
|
|
|
|
|
2399
|
|
|
|
|
|
|
string unknown_tag = "X@-------------"; |
2400
|
|
|
|
|
|
|
string number_tag = "C=-------------"; |
2401
|
|
|
|
|
|
|
string punctuation_tag = "Z:-------------"; |
2402
|
|
|
|
|
|
|
}; |
2403
|
|
|
|
|
|
|
|
2404
|
|
|
|
|
|
|
///////// |
2405
|
|
|
|
|
|
|
// File: morpho/czech_morpho.cpp |
2406
|
|
|
|
|
|
|
///////// |
2407
|
|
|
|
|
|
|
|
2408
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
2409
|
|
|
|
|
|
|
// |
2410
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
2411
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
2412
|
|
|
|
|
|
|
// |
2413
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
2414
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
2415
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
2416
|
|
|
|
|
|
|
|
2417
|
0
|
|
|
|
|
|
bool czech_morpho::load(istream& is) { |
2418
|
|
|
|
|
|
|
binary_decoder data; |
2419
|
0
|
0
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
0
|
|
|
|
|
|
2420
|
|
|
|
|
|
|
|
2421
|
|
|
|
|
|
|
try { |
2422
|
|
|
|
|
|
|
// Load tag length |
2423
|
0
|
0
|
|
|
|
|
unsigned tag_length = data.next_1B(); |
2424
|
0
|
0
|
|
|
|
|
if (tag_length < unknown_tag.size()) unknown_tag.erase(tag_length); |
|
|
0
|
|
|
|
|
|
2425
|
0
|
0
|
|
|
|
|
if (tag_length < number_tag.size()) number_tag.erase(tag_length); |
|
|
0
|
|
|
|
|
|
2426
|
0
|
0
|
|
|
|
|
if (tag_length < punctuation_tag.size()) punctuation_tag.erase(tag_length); |
|
|
0
|
|
|
|
|
|
2427
|
|
|
|
|
|
|
|
2428
|
|
|
|
|
|
|
// Load dictionary |
2429
|
0
|
0
|
|
|
|
|
dictionary.load(data); |
2430
|
|
|
|
|
|
|
|
2431
|
|
|
|
|
|
|
// Optionally prefix guesser if present |
2432
|
0
|
|
|
|
|
|
prefix_guesser.reset(); |
2433
|
0
|
0
|
|
|
|
|
if (data.next_1B()) { |
|
|
0
|
|
|
|
|
|
2434
|
0
|
0
|
|
|
|
|
prefix_guesser.reset(new morpho_prefix_guesser(dictionary)); |
2435
|
0
|
0
|
|
|
|
|
prefix_guesser->load(data); |
2436
|
|
|
|
|
|
|
} |
2437
|
|
|
|
|
|
|
|
2438
|
|
|
|
|
|
|
// Optionally statistical guesser if present |
2439
|
|
|
|
|
|
|
statistical_guesser.reset(); |
2440
|
0
|
0
|
|
|
|
|
if (data.next_1B()) { |
|
|
0
|
|
|
|
|
|
2441
|
0
|
0
|
|
|
|
|
statistical_guesser.reset(new morpho_statistical_guesser()); |
2442
|
0
|
0
|
|
|
|
|
statistical_guesser->load(data); |
2443
|
|
0
|
|
|
|
|
} |
2444
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
2445
|
|
|
|
|
|
|
return false; |
2446
|
|
|
|
|
|
|
} |
2447
|
|
|
|
|
|
|
|
2448
|
0
|
|
|
|
|
|
return data.is_end(); |
2449
|
|
|
|
|
|
|
} |
2450
|
|
|
|
|
|
|
|
2451
|
0
|
|
|
|
|
|
int czech_morpho::analyze(string_piece form, guesser_mode guesser, vector& lemmas) const { |
2452
|
|
|
|
|
|
|
lemmas.clear(); |
2453
|
|
|
|
|
|
|
|
2454
|
0
|
0
|
|
|
|
|
if (form.len) { |
2455
|
|
|
|
|
|
|
// Generate all casing variants if needed (they are different than given form). |
2456
|
|
|
|
|
|
|
string form_uclc; // first uppercase, rest lowercase |
2457
|
|
|
|
|
|
|
string form_lc; // all lowercase |
2458
|
0
|
0
|
|
|
|
|
generate_casing_variants(form, form_uclc, form_lc); |
2459
|
|
|
|
|
|
|
|
2460
|
|
|
|
|
|
|
// Start by analysing using the dictionary and all casing variants. |
2461
|
0
|
0
|
|
|
|
|
dictionary.analyze(form, lemmas); |
2462
|
0
|
0
|
|
|
|
|
if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas); |
|
|
0
|
|
|
|
|
|
2463
|
0
|
0
|
|
|
|
|
if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas); |
|
|
0
|
|
|
|
|
|
2464
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) return NO_GUESSER; |
2465
|
|
|
|
|
|
|
|
2466
|
|
|
|
|
|
|
// Then call analyze_special to handle numbers and punctuation. |
2467
|
0
|
0
|
|
|
|
|
analyze_special(form, lemmas); |
2468
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) return NO_GUESSER; |
2469
|
|
|
|
|
|
|
|
2470
|
|
|
|
|
|
|
// For the prefix guesser, use only form_lc. |
2471
|
0
|
0
|
|
|
|
|
if (guesser == GUESSER && prefix_guesser) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2472
|
0
|
0
|
|
|
|
|
prefix_guesser->analyze(form_lc.empty() ? form : form_lc, lemmas); |
|
|
0
|
|
|
|
|
|
2473
|
|
|
|
|
|
|
bool prefix_guesser_guesses = !lemmas.empty(); |
2474
|
|
|
|
|
|
|
|
2475
|
|
|
|
|
|
|
// For the statistical guesser, use all casing variants. |
2476
|
0
|
0
|
|
|
|
|
if (guesser == GUESSER && statistical_guesser) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2477
|
0
|
0
|
|
|
|
|
if (form_uclc.empty() && form_lc.empty()) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2478
|
0
|
0
|
|
|
|
|
statistical_guesser->analyze(form, lemmas, nullptr); |
2479
|
|
|
|
|
|
|
else { |
2480
|
0
|
0
|
|
|
|
|
morpho_statistical_guesser::used_rules used_rules; used_rules.reserve(3); |
2481
|
0
|
0
|
|
|
|
|
statistical_guesser->analyze(form, lemmas, &used_rules); |
2482
|
0
|
0
|
|
|
|
|
if (!form_uclc.empty()) statistical_guesser->analyze(form_uclc, lemmas, &used_rules); |
|
|
0
|
|
|
|
|
|
2483
|
0
|
0
|
|
|
|
|
if (!form_lc.empty()) statistical_guesser->analyze(form_lc, lemmas, &used_rules); |
|
|
0
|
|
|
|
|
|
2484
|
|
|
|
|
|
|
} |
2485
|
|
|
|
|
|
|
} |
2486
|
|
|
|
|
|
|
|
2487
|
|
|
|
|
|
|
// Make sure results are unique lemma-tag pairs. Statistical guesser produces |
2488
|
|
|
|
|
|
|
// unique lemma-tag pairs, but prefix guesser does not. |
2489
|
0
|
0
|
|
|
|
|
if (prefix_guesser_guesses) { |
2490
|
0
|
|
|
|
|
|
sort(lemmas.begin(), lemmas.end(), [](const tagged_lemma& a, const tagged_lemma& b) { |
2491
|
0
|
|
|
|
|
|
int lemma_compare = a.lemma.compare(b.lemma); |
2492
|
0
|
0
|
|
|
|
|
return lemma_compare < 0 || (lemma_compare == 0 && a.tag < b.tag); |
2493
|
|
|
|
|
|
|
}); |
2494
|
0
|
|
|
|
|
|
auto lemmas_end = unique(lemmas.begin(), lemmas.end(), [](const tagged_lemma& a, const tagged_lemma& b) { |
2495
|
0
|
0
|
|
|
|
|
return a.lemma == b.lemma && a.tag == b.tag; |
|
|
0
|
|
|
|
|
|
2496
|
0
|
|
|
|
|
|
}); |
2497
|
0
|
0
|
|
|
|
|
if (lemmas_end != lemmas.end()) lemmas.erase(lemmas_end, lemmas.end()); |
2498
|
|
|
|
|
|
|
} |
2499
|
|
|
|
|
|
|
|
2500
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) return GUESSER; |
2501
|
|
|
|
|
|
|
} |
2502
|
|
|
|
|
|
|
|
2503
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), unknown_tag); |
2504
|
0
|
|
|
|
|
|
return -1; |
2505
|
|
|
|
|
|
|
} |
2506
|
|
|
|
|
|
|
|
2507
|
0
|
|
|
|
|
|
int czech_morpho::generate(string_piece lemma, const char* tag_wildcard, morpho::guesser_mode guesser, vector& forms) const { |
2508
|
|
|
|
|
|
|
forms.clear(); |
2509
|
|
|
|
|
|
|
|
2510
|
0
|
|
|
|
|
|
tag_filter filter(tag_wildcard); |
2511
|
|
|
|
|
|
|
|
2512
|
0
|
0
|
|
|
|
|
if (lemma.len) { |
2513
|
0
|
0
|
|
|
|
|
if (dictionary.generate(lemma, filter, forms)) |
|
|
0
|
|
|
|
|
|
2514
|
|
|
|
|
|
|
return NO_GUESSER; |
2515
|
|
|
|
|
|
|
|
2516
|
0
|
0
|
|
|
|
|
if (guesser == GUESSER && prefix_guesser) |
|
|
0
|
|
|
|
|
|
2517
|
|
|
|
|
|
|
if (prefix_guesser->generate(lemma, filter, forms)) |
2518
|
|
|
|
|
|
|
return GUESSER; |
2519
|
|
|
|
|
|
|
} |
2520
|
|
|
|
|
|
|
|
2521
|
|
|
|
|
|
|
return -1; |
2522
|
|
|
|
|
|
|
} |
2523
|
|
|
|
|
|
|
|
2524
|
0
|
|
|
|
|
|
int czech_morpho::raw_lemma_len(string_piece lemma) const { |
2525
|
0
|
|
|
|
|
|
return czech_lemma_addinfo::raw_lemma_len(lemma); |
2526
|
|
|
|
|
|
|
} |
2527
|
|
|
|
|
|
|
|
2528
|
0
|
|
|
|
|
|
int czech_morpho::lemma_id_len(string_piece lemma) const { |
2529
|
0
|
|
|
|
|
|
return czech_lemma_addinfo::lemma_id_len(lemma); |
2530
|
|
|
|
|
|
|
} |
2531
|
|
|
|
|
|
|
|
2532
|
0
|
|
|
|
|
|
int czech_morpho::raw_form_len(string_piece form) const { |
2533
|
0
|
|
|
|
|
|
return form.len; |
2534
|
|
|
|
|
|
|
} |
2535
|
|
|
|
|
|
|
|
2536
|
0
|
|
|
|
|
|
tokenizer* czech_morpho::new_tokenizer() const { |
2537
|
0
|
0
|
|
|
|
|
return new czech_tokenizer(language, version, this); |
2538
|
|
|
|
|
|
|
} |
2539
|
|
|
|
|
|
|
|
2540
|
|
|
|
|
|
|
// What characters are considered punctuation except for the ones in unicode Punctuation category. |
2541
|
|
|
|
|
|
|
static bool punctuation_additional[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1/*$*/, |
2542
|
|
|
|
|
|
|
0,0,0,0,0,0,1/*+*/,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1/*<*/,1/*=*/,1/*>*/,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
2543
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,1/*^*/,0,1/*`*/,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1/*|*/,0,1/*~*/,0,0,0,0,0,0,0,0, |
2544
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
2545
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
2546
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
2547
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
2548
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
2549
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
2550
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
2551
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
2552
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
2553
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1/*caron*/}; |
2554
|
|
|
|
|
|
|
|
2555
|
|
|
|
|
|
|
// What characters of unicode Punctuation category are not considered punctuation. |
2556
|
|
|
|
|
|
|
static bool punctuation_exceptions[] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
2557
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
2558
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, |
2559
|
|
|
|
|
|
|
0,0,0,0,0,0,0,0,0,1/*paragraph*/}; |
2560
|
|
|
|
|
|
|
|
2561
|
0
|
|
|
|
|
|
void czech_morpho::analyze_special(string_piece form, vector& lemmas) const { |
2562
|
|
|
|
|
|
|
using namespace unilib; |
2563
|
|
|
|
|
|
|
|
2564
|
|
|
|
|
|
|
// Analyzer for numbers and punctuation. |
2565
|
|
|
|
|
|
|
// Number is anything matching [+-]? is_Pn* ([.,] is_Pn*)? ([Ee] [+-]? is_Pn+)? for at least one is_Pn* nonempty. |
2566
|
|
|
|
|
|
|
// Punctuation is any form beginning with either unicode punctuation or punctuation_exceptions character. |
2567
|
|
|
|
|
|
|
// Beware that numbers takes precedence, so - is punctuation, -3 is number, -. is punctuation, -.3 is number. |
2568
|
0
|
0
|
|
|
|
|
if (!form.len) return; |
2569
|
|
|
|
|
|
|
|
2570
|
0
|
|
|
|
|
|
string_piece form_ori = form; |
2571
|
0
|
|
|
|
|
|
char32_t first = utf8::decode(form.str, form.len); |
2572
|
|
|
|
|
|
|
|
2573
|
|
|
|
|
|
|
// Try matching a number. |
2574
|
|
|
|
|
|
|
char32_t codepoint = first; |
2575
|
|
|
|
|
|
|
bool any_digit = false; |
2576
|
0
|
0
|
|
|
|
|
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(form.str, form.len); |
2577
|
0
|
0
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(form.str, form.len); |
2578
|
0
|
0
|
|
|
|
|
if ((codepoint == '.' && form.len) || codepoint == ',') codepoint = utf8::decode(form.str, form.len); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2579
|
0
|
0
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(form.str, form.len); |
2580
|
0
|
0
|
|
|
|
|
if (any_digit && (codepoint == 'e' || codepoint == 'E')) { |
|
|
0
|
|
|
|
|
|
2581
|
0
|
|
|
|
|
|
codepoint = utf8::decode(form.str, form.len); |
2582
|
0
|
0
|
|
|
|
|
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(form.str, form.len); |
2583
|
|
|
|
|
|
|
any_digit = false; |
2584
|
0
|
0
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(form.str, form.len); |
2585
|
|
|
|
|
|
|
} |
2586
|
|
|
|
|
|
|
|
2587
|
0
|
0
|
|
|
|
|
if (any_digit && !form.len && (!codepoint || codepoint == '.')) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2588
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form_ori.str, form_ori.len), number_tag); |
2589
|
0
|
0
|
|
|
|
|
} else if ((first < sizeof(punctuation_additional) && punctuation_additional[first]) || |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2590
|
0
|
0
|
|
|
|
|
((unicode::category(first) & unicode::P) && (first >= sizeof(punctuation_exceptions) || !punctuation_exceptions[first]))) |
|
|
0
|
|
|
|
|
|
2591
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form_ori.str, form_ori.len), punctuation_tag); |
2592
|
|
|
|
|
|
|
} |
2593
|
|
|
|
|
|
|
|
2594
|
|
|
|
|
|
|
///////// |
2595
|
|
|
|
|
|
|
// File: morpho/english_lemma_addinfo.h |
2596
|
|
|
|
|
|
|
///////// |
2597
|
|
|
|
|
|
|
|
2598
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
2599
|
|
|
|
|
|
|
// |
2600
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
2601
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
2602
|
|
|
|
|
|
|
// |
2603
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
2604
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
2605
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
2606
|
|
|
|
|
|
|
|
2607
|
|
|
|
|
|
|
// Declarations |
2608
|
0
|
|
|
|
|
|
struct english_lemma_addinfo { |
2609
|
|
|
|
|
|
|
inline static int raw_lemma_len(string_piece lemma); |
2610
|
|
|
|
|
|
|
inline static int lemma_id_len(string_piece lemma); |
2611
|
|
|
|
|
|
|
inline static string format(const unsigned char* addinfo, int addinfo_len); |
2612
|
|
|
|
|
|
|
inline static bool generatable(const unsigned char* addinfo, int addinfo_len); |
2613
|
|
|
|
|
|
|
|
2614
|
|
|
|
|
|
|
inline int parse(string_piece lemma, bool die_on_failure = false); |
2615
|
|
|
|
|
|
|
inline bool match_lemma_id(const unsigned char* other_addinfo, int other_addinfo_len); |
2616
|
|
|
|
|
|
|
|
2617
|
|
|
|
|
|
|
vector data; |
2618
|
|
|
|
|
|
|
}; |
2619
|
|
|
|
|
|
|
|
2620
|
|
|
|
|
|
|
// Definitions |
2621
|
0
|
|
|
|
|
|
int english_lemma_addinfo::raw_lemma_len(string_piece lemma) { |
2622
|
|
|
|
|
|
|
// Lemma ends either by |
2623
|
|
|
|
|
|
|
// - '^' on non-first position followed by nothing or [A-Za-z][-A-Za-z]* |
2624
|
|
|
|
|
|
|
// - '+' on non-first position followed by nothing |
2625
|
0
|
0
|
|
|
|
|
for (unsigned len = 1; len < lemma.len; len++) { |
2626
|
0
|
0
|
|
|
|
|
if (len + 1 == lemma.len && (lemma.str[len] == '^' || lemma.str[len] == '+')) |
|
|
0
|
|
|
|
|
|
2627
|
0
|
|
|
|
|
|
return len; |
2628
|
0
|
0
|
|
|
|
|
if (len + 1 < lemma.len && lemma.str[len] == '^') { |
|
|
0
|
|
|
|
|
|
2629
|
|
|
|
|
|
|
bool ok = true; |
2630
|
0
|
0
|
|
|
|
|
for (unsigned i = len + 1; ok && i < lemma.len; i++) |
|
|
0
|
|
|
|
|
|
2631
|
0
|
0
|
|
|
|
|
ok &= (lemma.str[i] >= 'A' && lemma.str[i] <= 'Z') || |
2632
|
0
|
0
|
|
|
|
|
(lemma.str[i] >= 'a' && lemma.str[i] <= 'z') || |
|
|
0
|
|
|
|
|
|
2633
|
0
|
0
|
|
|
|
|
(i > len + 1 && lemma.str[i] == '-'); |
2634
|
0
|
0
|
|
|
|
|
if (ok) return len; |
2635
|
|
|
|
|
|
|
} |
2636
|
|
|
|
|
|
|
} |
2637
|
0
|
|
|
|
|
|
return lemma.len; |
2638
|
|
|
|
|
|
|
} |
2639
|
|
|
|
|
|
|
|
2640
|
|
|
|
|
|
|
int english_lemma_addinfo::lemma_id_len(string_piece lemma) { |
2641
|
|
|
|
|
|
|
// No lemma comments. |
2642
|
0
|
|
|
|
|
|
return lemma.len; |
2643
|
|
|
|
|
|
|
} |
2644
|
|
|
|
|
|
|
|
2645
|
|
|
|
|
|
|
string english_lemma_addinfo::format(const unsigned char* addinfo, int addinfo_len) { |
2646
|
0
|
|
|
|
|
|
return string((const char*) addinfo, addinfo_len); |
2647
|
|
|
|
|
|
|
} |
2648
|
|
|
|
|
|
|
|
2649
|
|
|
|
|
|
|
bool english_lemma_addinfo::generatable(const unsigned char* /*addinfo*/, int /*addinfo_len*/) { |
2650
|
|
|
|
|
|
|
return true; |
2651
|
|
|
|
|
|
|
} |
2652
|
|
|
|
|
|
|
|
2653
|
0
|
|
|
|
|
|
int english_lemma_addinfo::parse(string_piece lemma, bool /*die_on_failure*/) { |
2654
|
|
|
|
|
|
|
data.clear(); |
2655
|
|
|
|
|
|
|
|
2656
|
0
|
|
|
|
|
|
size_t len = raw_lemma_len(lemma); |
2657
|
0
|
0
|
|
|
|
|
for (size_t i = len; i < lemma.len; i++) |
2658
|
0
|
|
|
|
|
|
data.push_back(lemma.str[i]); |
2659
|
|
|
|
|
|
|
|
2660
|
0
|
|
|
|
|
|
return len; |
2661
|
|
|
|
|
|
|
} |
2662
|
|
|
|
|
|
|
|
2663
|
0
|
|
|
|
|
|
bool english_lemma_addinfo::match_lemma_id(const unsigned char* other_addinfo, int other_addinfo_len) { |
2664
|
0
|
0
|
|
|
|
|
if (data.empty()) return true; |
2665
|
0
|
0
|
|
|
|
|
if (data.size() == 1 && data[0] == '^') return other_addinfo_len > 0 && other_addinfo[0] == '^'; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2666
|
0
|
0
|
|
|
|
|
if (data.size() == 1 && data[0] == '+') return other_addinfo_len == 0; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2667
|
0
|
0
|
|
|
|
|
return data.size() == size_t(other_addinfo_len) && small_memeq(data.data(), other_addinfo, other_addinfo_len); |
|
|
0
|
|
|
|
|
|
2668
|
|
|
|
|
|
|
} |
2669
|
|
|
|
|
|
|
|
2670
|
|
|
|
|
|
|
///////// |
2671
|
|
|
|
|
|
|
// File: morpho/english_morpho_guesser.h |
2672
|
|
|
|
|
|
|
///////// |
2673
|
|
|
|
|
|
|
|
2674
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
2675
|
|
|
|
|
|
|
// |
2676
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
2677
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
2678
|
|
|
|
|
|
|
// |
2679
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
2680
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
2681
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
2682
|
|
|
|
|
|
|
|
2683
|
0
|
0
|
|
|
|
|
class english_morpho_guesser { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2684
|
|
|
|
|
|
|
public: |
2685
|
|
|
|
|
|
|
void load(binary_decoder& data); |
2686
|
|
|
|
|
|
|
void analyze(string_piece form, string_piece form_lc, vector& lemmas) const; |
2687
|
|
|
|
|
|
|
bool analyze_proper_names(string_piece form, string_piece form_lc, vector& lemmas) const; |
2688
|
|
|
|
|
|
|
|
2689
|
|
|
|
|
|
|
private: |
2690
|
|
|
|
|
|
|
inline void add(const string& tag, const string& form, vector& lemmas) const; |
2691
|
|
|
|
|
|
|
inline void add(const string& tag, const string& tag2, const string& form, vector& lemmas) const; |
2692
|
|
|
|
|
|
|
inline void add(const string& tag, const string& form, unsigned negation_len, vector& lemmas) const; |
2693
|
|
|
|
|
|
|
inline void add(const string& tag, const string& tag2, const string& form, unsigned negation_len, vector& lemmas) const; |
2694
|
|
|
|
|
|
|
void add_NNS(const string& form, unsigned negation_len, vector& lemmas) const; |
2695
|
|
|
|
|
|
|
void add_NNPS(const string& form, vector& lemmas) const; |
2696
|
|
|
|
|
|
|
void add_VBG(const string& form, vector& lemmas) const; |
2697
|
|
|
|
|
|
|
void add_VBD_VBN(const string& form, vector& lemmas) const; |
2698
|
|
|
|
|
|
|
void add_VBZ(const string& form, vector& lemmas) const; |
2699
|
|
|
|
|
|
|
void add_JJR_RBR(const string& form, unsigned negation_len, vector& lemmas) const; |
2700
|
|
|
|
|
|
|
void add_JJS_RBS(const string& form, unsigned negation_len, vector& lemmas) const; |
2701
|
|
|
|
|
|
|
|
2702
|
|
|
|
|
|
|
enum { NEGATION_LEN = 0, TO_FOLLOW = 1, TOTAL = 2 }; |
2703
|
|
|
|
|
|
|
vector exceptions_tags; |
2704
|
|
|
|
|
|
|
persistent_unordered_map exceptions; |
2705
|
|
|
|
|
|
|
persistent_unordered_map negations; |
2706
|
|
|
|
|
|
|
string CD = "CD", FW = "FW", JJ = "JJ", JJR = "JJR", JJS = "JJS", |
2707
|
|
|
|
|
|
|
NN = "NN", NNP = "NNP", NNPS = "NNPS", NNS = "NNS", RB = "RB", |
2708
|
|
|
|
|
|
|
RBR = "RBR", RBS = "RBS", SYM = "SYM", VB = "VB", VBD = "VBD", |
2709
|
|
|
|
|
|
|
VBG = "VBG", VBN = "VBN", VBP = "VBP", VBZ = "VBZ"; |
2710
|
|
|
|
|
|
|
}; |
2711
|
|
|
|
|
|
|
|
2712
|
|
|
|
|
|
|
///////// |
2713
|
|
|
|
|
|
|
// File: morpho/english_morpho.h |
2714
|
|
|
|
|
|
|
///////// |
2715
|
|
|
|
|
|
|
|
2716
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
2717
|
|
|
|
|
|
|
// |
2718
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
2719
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
2720
|
|
|
|
|
|
|
// |
2721
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
2722
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
2723
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
2724
|
|
|
|
|
|
|
|
2725
|
0
|
|
|
|
|
|
class english_morpho : public morpho { |
2726
|
|
|
|
|
|
|
public: |
2727
|
0
|
0
|
|
|
|
|
english_morpho(unsigned version) : version(version) {} |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2728
|
|
|
|
|
|
|
|
2729
|
|
|
|
|
|
|
virtual int analyze(string_piece form, morpho::guesser_mode guesser, vector& lemmas) const override; |
2730
|
|
|
|
|
|
|
virtual int generate(string_piece lemma, const char* tag_wildcard, guesser_mode guesser, vector& forms) const override; |
2731
|
|
|
|
|
|
|
virtual int raw_lemma_len(string_piece lemma) const override; |
2732
|
|
|
|
|
|
|
virtual int lemma_id_len(string_piece lemma) const override; |
2733
|
|
|
|
|
|
|
virtual int raw_form_len(string_piece form) const override; |
2734
|
|
|
|
|
|
|
virtual tokenizer* new_tokenizer() const override; |
2735
|
|
|
|
|
|
|
|
2736
|
|
|
|
|
|
|
bool load(istream& is); |
2737
|
|
|
|
|
|
|
private: |
2738
|
|
|
|
|
|
|
inline void analyze_special(string_piece form, vector& lemmas) const; |
2739
|
|
|
|
|
|
|
|
2740
|
|
|
|
|
|
|
unsigned version; |
2741
|
|
|
|
|
|
|
morpho_dictionary dictionary; |
2742
|
|
|
|
|
|
|
english_morpho_guesser morpho_guesser; |
2743
|
|
|
|
|
|
|
|
2744
|
|
|
|
|
|
|
string unknown_tag = "UNK"; |
2745
|
|
|
|
|
|
|
string number_tag = "CD", nnp_tag = "NNP", ls_tag = "LS"; |
2746
|
|
|
|
|
|
|
string open_quotation_tag = "``", close_quotation_tag = "''"; |
2747
|
|
|
|
|
|
|
string open_parenthesis_tag = "(", close_parenthesis_tag = ")"; |
2748
|
|
|
|
|
|
|
string comma_tag = ",", dot_tag = ".", punctuation_tag = ":", hash_tag = "#", dollar_tag = "$"; |
2749
|
|
|
|
|
|
|
string sym_tag = "SYM", jj_tag = "JJ", nn_tag = "NN", nns_tag = "NNS", cc_tag = "CC", pos_tag = "POS", in_tag = "IN"; |
2750
|
|
|
|
|
|
|
}; |
2751
|
|
|
|
|
|
|
|
2752
|
|
|
|
|
|
|
///////// |
2753
|
|
|
|
|
|
|
// File: tokenizer/english_tokenizer.h |
2754
|
|
|
|
|
|
|
///////// |
2755
|
|
|
|
|
|
|
|
2756
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
2757
|
|
|
|
|
|
|
// |
2758
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
2759
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
2760
|
|
|
|
|
|
|
// |
2761
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
2762
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
2763
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
2764
|
|
|
|
|
|
|
|
2765
|
0
|
|
|
|
|
|
class english_tokenizer : public ragel_tokenizer { |
2766
|
|
|
|
|
|
|
public: |
2767
|
|
|
|
|
|
|
enum { LATEST = 2 }; |
2768
|
|
|
|
|
|
|
english_tokenizer(unsigned version); |
2769
|
|
|
|
|
|
|
|
2770
|
|
|
|
|
|
|
virtual bool next_sentence(vector& tokens) override; |
2771
|
|
|
|
|
|
|
|
2772
|
|
|
|
|
|
|
private: |
2773
|
|
|
|
|
|
|
void split_token(vector& tokens); |
2774
|
|
|
|
|
|
|
|
2775
|
|
|
|
|
|
|
static const unordered_set abbreviations; |
2776
|
|
|
|
|
|
|
}; |
2777
|
|
|
|
|
|
|
|
2778
|
|
|
|
|
|
|
///////// |
2779
|
|
|
|
|
|
|
// File: morpho/english_morpho.cpp |
2780
|
|
|
|
|
|
|
///////// |
2781
|
|
|
|
|
|
|
|
2782
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
2783
|
|
|
|
|
|
|
// |
2784
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
2785
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
2786
|
|
|
|
|
|
|
// |
2787
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
2788
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
2789
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
2790
|
|
|
|
|
|
|
|
2791
|
0
|
|
|
|
|
|
bool english_morpho::load(istream& is) { |
2792
|
|
|
|
|
|
|
binary_decoder data; |
2793
|
0
|
0
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
0
|
|
|
|
|
|
2794
|
|
|
|
|
|
|
|
2795
|
|
|
|
|
|
|
try { |
2796
|
0
|
0
|
|
|
|
|
dictionary.load(data); |
2797
|
0
|
0
|
|
|
|
|
morpho_guesser.load(data); |
|
|
0
|
|
|
|
|
|
2798
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
2799
|
|
|
|
|
|
|
return false; |
2800
|
|
|
|
|
|
|
} |
2801
|
|
|
|
|
|
|
|
2802
|
0
|
|
|
|
|
|
return data.is_end(); |
2803
|
|
|
|
|
|
|
} |
2804
|
|
|
|
|
|
|
|
2805
|
0
|
|
|
|
|
|
int english_morpho::analyze(string_piece form, guesser_mode guesser, vector& lemmas) const { |
2806
|
|
|
|
|
|
|
lemmas.clear(); |
2807
|
|
|
|
|
|
|
|
2808
|
0
|
0
|
|
|
|
|
if (form.len) { |
2809
|
|
|
|
|
|
|
// Generate all casing variants if needed (they are different than given form). |
2810
|
|
|
|
|
|
|
string form_uclc; // first uppercase, rest lowercase |
2811
|
|
|
|
|
|
|
string form_lc; // all lowercase |
2812
|
0
|
0
|
|
|
|
|
generate_casing_variants(form, form_uclc, form_lc); |
2813
|
|
|
|
|
|
|
|
2814
|
|
|
|
|
|
|
// Start by analysing using the dictionary and all casing variants. |
2815
|
0
|
0
|
|
|
|
|
dictionary.analyze(form, lemmas); |
2816
|
0
|
0
|
|
|
|
|
if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas); |
|
|
0
|
|
|
|
|
|
2817
|
0
|
0
|
|
|
|
|
if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas); |
|
|
0
|
|
|
|
|
|
2818
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) |
2819
|
0
|
0
|
|
|
|
|
return guesser == NO_GUESSER || !morpho_guesser.analyze_proper_names(form, form_lc.empty() ? form : form_lc, lemmas) ? NO_GUESSER : GUESSER; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2820
|
|
|
|
|
|
|
|
2821
|
|
|
|
|
|
|
// Then call analyze_special to handle numbers, punctuation and symbols. |
2822
|
0
|
0
|
|
|
|
|
analyze_special(form, lemmas); |
2823
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) return NO_GUESSER; |
2824
|
|
|
|
|
|
|
|
2825
|
|
|
|
|
|
|
// Use English guesser on form_lc if allowed. |
2826
|
0
|
0
|
|
|
|
|
if (guesser == GUESSER) |
2827
|
0
|
0
|
|
|
|
|
morpho_guesser.analyze(form, form_lc.empty() ? form : form_lc, lemmas); |
|
|
0
|
|
|
|
|
|
2828
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) return GUESSER; |
2829
|
|
|
|
|
|
|
} |
2830
|
|
|
|
|
|
|
|
2831
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), unknown_tag); |
2832
|
0
|
|
|
|
|
|
return -1; |
2833
|
|
|
|
|
|
|
} |
2834
|
|
|
|
|
|
|
|
2835
|
0
|
|
|
|
|
|
int english_morpho::generate(string_piece lemma, const char* tag_wildcard, morpho::guesser_mode /*guesser*/, vector& forms) const { |
2836
|
|
|
|
|
|
|
forms.clear(); |
2837
|
|
|
|
|
|
|
|
2838
|
0
|
|
|
|
|
|
tag_filter filter(tag_wildcard); |
2839
|
|
|
|
|
|
|
|
2840
|
0
|
0
|
|
|
|
|
if (lemma.len) { |
2841
|
0
|
0
|
|
|
|
|
if (dictionary.generate(lemma, filter, forms)) |
|
|
0
|
|
|
|
|
|
2842
|
|
|
|
|
|
|
return NO_GUESSER; |
2843
|
|
|
|
|
|
|
} |
2844
|
|
|
|
|
|
|
|
2845
|
|
|
|
|
|
|
return -1; |
2846
|
|
|
|
|
|
|
} |
2847
|
|
|
|
|
|
|
|
2848
|
0
|
|
|
|
|
|
int english_morpho::raw_lemma_len(string_piece lemma) const { |
2849
|
0
|
|
|
|
|
|
return english_lemma_addinfo::raw_lemma_len(lemma); |
2850
|
|
|
|
|
|
|
} |
2851
|
|
|
|
|
|
|
|
2852
|
0
|
|
|
|
|
|
int english_morpho::lemma_id_len(string_piece lemma) const { |
2853
|
0
|
|
|
|
|
|
return english_lemma_addinfo::lemma_id_len(lemma); |
2854
|
|
|
|
|
|
|
} |
2855
|
|
|
|
|
|
|
|
2856
|
0
|
|
|
|
|
|
int english_morpho::raw_form_len(string_piece form) const { |
2857
|
0
|
|
|
|
|
|
return form.len; |
2858
|
|
|
|
|
|
|
} |
2859
|
|
|
|
|
|
|
|
2860
|
0
|
|
|
|
|
|
tokenizer* english_morpho::new_tokenizer() const { |
2861
|
0
|
0
|
|
|
|
|
return new english_tokenizer(version <= 2 ? 1 : 2); |
2862
|
|
|
|
|
|
|
} |
2863
|
|
|
|
|
|
|
|
2864
|
0
|
|
|
|
|
|
void english_morpho::analyze_special(string_piece form, vector& lemmas) const { |
2865
|
|
|
|
|
|
|
using namespace unilib; |
2866
|
|
|
|
|
|
|
|
2867
|
|
|
|
|
|
|
// Analyzer for numbers and punctuation. |
2868
|
0
|
0
|
|
|
|
|
if (!form.len) return; |
2869
|
|
|
|
|
|
|
|
2870
|
|
|
|
|
|
|
// One-letter punctuation exceptions. |
2871
|
0
|
0
|
|
|
|
|
if (form.len == 1) |
2872
|
0
|
|
|
|
|
|
switch(*form.str) { |
2873
|
|
|
|
|
|
|
case '.': |
2874
|
|
|
|
|
|
|
case '!': |
2875
|
0
|
0
|
|
|
|
|
case '?': lemmas.emplace_back(string(form.str, form.len), dot_tag); return; |
2876
|
0
|
0
|
|
|
|
|
case ',': lemmas.emplace_back(string(form.str, form.len), comma_tag); return; |
2877
|
0
|
0
|
|
|
|
|
case '#': lemmas.emplace_back(string(form.str, form.len), hash_tag); return; |
2878
|
0
|
0
|
|
|
|
|
case '$': lemmas.emplace_back(string(form.str, form.len), dollar_tag); return; |
2879
|
0
|
0
|
|
|
|
|
case '[': lemmas.emplace_back(string(form.str, form.len), sym_tag); return; |
2880
|
0
|
0
|
|
|
|
|
case ']': lemmas.emplace_back(string(form.str, form.len), sym_tag); return; |
2881
|
0
|
0
|
|
|
|
|
case '%': lemmas.emplace_back(string(form.str, form.len), jj_tag); |
2882
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), nn_tag); return; |
2883
|
0
|
0
|
|
|
|
|
case '&': lemmas.emplace_back(string(form.str, form.len), cc_tag); |
2884
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), sym_tag); return; |
2885
|
0
|
0
|
|
|
|
|
case '*': lemmas.emplace_back(string(form.str, form.len), sym_tag); |
2886
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), nn_tag); return; |
2887
|
0
|
0
|
|
|
|
|
case '@': lemmas.emplace_back(string(form.str, form.len), sym_tag); |
2888
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), in_tag); return; |
2889
|
0
|
0
|
|
|
|
|
case '\'': lemmas.emplace_back(string(form.str, form.len), close_quotation_tag); |
2890
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), pos_tag); return; |
2891
|
|
|
|
|
|
|
} |
2892
|
|
|
|
|
|
|
|
2893
|
|
|
|
|
|
|
// Try matching a number: [+-]? is_Pn* (, is_Pn{3})? (. is_Pn*)? (s | [Ee] [+-]? is_Pn+)? with at least one digit |
2894
|
0
|
|
|
|
|
|
string_piece number = form; |
2895
|
0
|
|
|
|
|
|
char32_t codepoint = utf8::decode(number.str, number.len); |
2896
|
|
|
|
|
|
|
bool any_digit = false; |
2897
|
0
|
0
|
|
|
|
|
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len); |
2898
|
0
|
0
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
2899
|
0
|
0
|
|
|
|
|
while (codepoint == ',') { |
2900
|
0
|
|
|
|
|
|
string_piece group = number; |
2901
|
0
|
0
|
|
|
|
|
if (unicode::category(utf8::decode(group.str, group.len) & ~unicode::N)) break; |
2902
|
0
|
0
|
|
|
|
|
if (unicode::category(utf8::decode(group.str, group.len) & ~unicode::N)) break; |
2903
|
0
|
0
|
|
|
|
|
if (unicode::category(utf8::decode(group.str, group.len) & ~unicode::N)) break; |
2904
|
|
|
|
|
|
|
any_digit = true; |
2905
|
0
|
|
|
|
|
|
number = group; |
2906
|
0
|
|
|
|
|
|
codepoint = utf8::decode(number.str, number.len); |
2907
|
|
|
|
|
|
|
} |
2908
|
0
|
0
|
|
|
|
|
if (codepoint == '.' && number.len) { |
|
|
0
|
|
|
|
|
|
2909
|
0
|
|
|
|
|
|
codepoint = utf8::decode(number.str, number.len); |
2910
|
0
|
0
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
2911
|
|
|
|
|
|
|
} |
2912
|
0
|
0
|
|
|
|
|
if (version >= 2 && any_digit && codepoint == 's' && !number.len) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2913
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), number_tag); |
2914
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len - 1), nns_tag); |
2915
|
0
|
|
|
|
|
|
return; |
2916
|
|
|
|
|
|
|
} |
2917
|
0
|
0
|
|
|
|
|
if (any_digit && (codepoint == 'e' || codepoint == 'E')) { |
|
|
0
|
|
|
|
|
|
2918
|
0
|
|
|
|
|
|
codepoint = utf8::decode(number.str, number.len); |
2919
|
0
|
0
|
|
|
|
|
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len); |
2920
|
|
|
|
|
|
|
any_digit = false; |
2921
|
0
|
0
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
2922
|
|
|
|
|
|
|
} |
2923
|
0
|
0
|
|
|
|
|
if (any_digit && !number.len && (!codepoint || codepoint == '.')) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2924
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), number_tag); |
2925
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), nnp_tag); |
2926
|
0
|
0
|
|
|
|
|
if (form.len == 1 + (codepoint == '.') && *form.str >= '1' && *form.str <= '9') |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2927
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), ls_tag); |
2928
|
|
|
|
|
|
|
return; |
2929
|
|
|
|
|
|
|
} |
2930
|
|
|
|
|
|
|
|
2931
|
|
|
|
|
|
|
// Open quotation, end quotation, open parentheses, end parentheses, symbol, or other |
2932
|
0
|
|
|
|
|
|
string_piece punctuation = form; |
2933
|
|
|
|
|
|
|
bool open_quotation = true, close_quotation = true, open_parenthesis = true, close_parenthesis = true, any_punctuation = true, symbol = true; |
2934
|
0
|
0
|
|
|
|
|
while ((symbol || any_punctuation) && punctuation.len) { |
|
|
0
|
|
|
|
|
|
2935
|
0
|
|
|
|
|
|
codepoint = utf8::decode(punctuation.str, punctuation.len); |
2936
|
0
|
0
|
|
|
|
|
if (open_quotation) open_quotation = codepoint == '`' || unicode::category(codepoint) & unicode::Pi; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2937
|
0
|
0
|
|
|
|
|
if (close_quotation) close_quotation = codepoint == '\'' || codepoint == '"' || unicode::category(codepoint) & unicode::Pf; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2938
|
0
|
0
|
|
|
|
|
if (open_parenthesis) open_parenthesis = unicode::category(codepoint) & unicode::Ps; |
2939
|
0
|
0
|
|
|
|
|
if (close_parenthesis) close_parenthesis = unicode::category(codepoint) & unicode::Pe; |
2940
|
0
|
0
|
|
|
|
|
if (any_punctuation) any_punctuation = unicode::category(codepoint) & unicode::P; |
2941
|
0
|
0
|
|
|
|
|
if (symbol) symbol = codepoint == '*' || unicode::category(codepoint) & unicode::S; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2942
|
|
|
|
|
|
|
} |
2943
|
0
|
0
|
|
|
|
|
if (!punctuation.len && open_quotation) { lemmas.emplace_back(string(form.str, form.len), open_quotation_tag); return; } |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2944
|
0
|
0
|
|
|
|
|
if (!punctuation.len && close_quotation) { lemmas.emplace_back(string(form.str, form.len), close_quotation_tag); return; } |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2945
|
0
|
0
|
|
|
|
|
if (!punctuation.len && open_parenthesis) { lemmas.emplace_back(string(form.str, form.len), open_parenthesis_tag); return; } |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2946
|
0
|
0
|
|
|
|
|
if (!punctuation.len && close_parenthesis) { lemmas.emplace_back(string(form.str, form.len), close_parenthesis_tag); return; } |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2947
|
0
|
0
|
|
|
|
|
if (!punctuation.len && symbol) { lemmas.emplace_back(string(form.str, form.len), sym_tag); return; } |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2948
|
0
|
0
|
|
|
|
|
if (!punctuation.len && any_punctuation) { lemmas.emplace_back(string(form.str, form.len), punctuation_tag); return; } |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
2949
|
|
|
|
|
|
|
} |
2950
|
|
|
|
|
|
|
|
2951
|
|
|
|
|
|
|
///////// |
2952
|
|
|
|
|
|
|
// File: morpho/english_morpho_guesser.cpp |
2953
|
|
|
|
|
|
|
///////// |
2954
|
|
|
|
|
|
|
|
2955
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
2956
|
|
|
|
|
|
|
// |
2957
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
2958
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
2959
|
|
|
|
|
|
|
// |
2960
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
2961
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
2962
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
2963
|
|
|
|
|
|
|
|
2964
|
|
|
|
|
|
|
// This code is a reimplementation of morphologic analyzer Morphium |
2965
|
|
|
|
|
|
|
// by Johanka Spoustova (Treex::Tool::EnglishMorpho::Analysis Perl module) |
2966
|
|
|
|
|
|
|
// and reimplementation of morphologic lemmatizer by Martin Popel |
2967
|
|
|
|
|
|
|
// (Treex::Tool::EnglishMorpho::Lemmatizer Perl module). The latter is based |
2968
|
|
|
|
|
|
|
// on morpha: |
2969
|
|
|
|
|
|
|
// Minnen, G., J. Carroll and D. Pearce (2001). Applied morphological |
2970
|
|
|
|
|
|
|
// processing of English, Natural Language Engineering, 7(3). 207-223. |
2971
|
|
|
|
|
|
|
// Morpha has been released under LGPL as a part of RASP system |
2972
|
|
|
|
|
|
|
// http://ilexir.co.uk/applications/rasp/. |
2973
|
|
|
|
|
|
|
|
2974
|
0
|
|
|
|
|
|
void english_morpho_guesser::load(binary_decoder& data) { |
2975
|
0
|
|
|
|
|
|
unsigned tags = data.next_2B(); |
2976
|
0
|
|
|
|
|
|
exceptions_tags.clear(); |
2977
|
0
|
|
|
|
|
|
exceptions_tags.reserve(tags); |
2978
|
0
|
0
|
|
|
|
|
while (tags--) { |
2979
|
0
|
|
|
|
|
|
unsigned len = data.next_1B(); |
2980
|
0
|
0
|
|
|
|
|
exceptions_tags.emplace_back(string(data.next(len), len)); |
2981
|
|
|
|
|
|
|
} |
2982
|
|
|
|
|
|
|
|
2983
|
0
|
|
|
|
|
|
exceptions.load(data); |
2984
|
0
|
|
|
|
|
|
negations.load(data); |
2985
|
0
|
|
|
|
|
|
} |
2986
|
|
|
|
|
|
|
|
2987
|
|
|
|
|
|
|
static const char _tag_guesser_actions[] = { |
2988
|
|
|
|
|
|
|
0, 1, 0, 1, 1, 1, 2, 1, |
2989
|
|
|
|
|
|
|
3, 1, 4, 1, 5, 1, 6, 1, |
2990
|
|
|
|
|
|
|
7, 2, 2, 6, 2, 2, 7, 2, |
2991
|
|
|
|
|
|
|
4, 6, 2, 4, 7, 2, 5, 6, |
2992
|
|
|
|
|
|
|
2, 5, 7, 2, 6, 7, 3, 2, |
2993
|
|
|
|
|
|
|
6, 7, 3, 4, 6, 7, 3, 5, |
2994
|
|
|
|
|
|
|
6, 7 |
2995
|
|
|
|
|
|
|
}; |
2996
|
|
|
|
|
|
|
|
2997
|
|
|
|
|
|
|
static const unsigned char _tag_guesser_key_offsets[] = { |
2998
|
|
|
|
|
|
|
0, 19, 26, 34, 42, 50, 58, 66, |
2999
|
|
|
|
|
|
|
74, 82, 90, 100, 108, 116, 124, 132, |
3000
|
|
|
|
|
|
|
145, 153, 161, 168, 179, 195, 212, 220, |
3001
|
|
|
|
|
|
|
228, 236 |
3002
|
|
|
|
|
|
|
}; |
3003
|
|
|
|
|
|
|
|
3004
|
|
|
|
|
|
|
static const char _tag_guesser_trans_keys[] = { |
3005
|
|
|
|
|
|
|
45, 46, 99, 100, 103, 105, 109, 110, |
3006
|
|
|
|
|
|
|
114, 115, 116, 118, 120, 48, 57, 65, |
3007
|
|
|
|
|
|
|
90, 97, 122, 45, 48, 57, 65, 90, |
3008
|
|
|
|
|
|
|
97, 122, 45, 114, 48, 57, 65, 90, |
3009
|
|
|
|
|
|
|
97, 122, 45, 111, 48, 57, 65, 90, |
3010
|
|
|
|
|
|
|
97, 122, 45, 109, 48, 57, 65, 90, |
3011
|
|
|
|
|
|
|
97, 122, 45, 101, 48, 57, 65, 90, |
3012
|
|
|
|
|
|
|
97, 122, 45, 115, 48, 57, 65, 90, |
3013
|
|
|
|
|
|
|
97, 122, 45, 101, 48, 57, 65, 90, |
3014
|
|
|
|
|
|
|
97, 122, 45, 108, 48, 57, 65, 90, |
3015
|
|
|
|
|
|
|
97, 122, 45, 115, 48, 57, 65, 90, |
3016
|
|
|
|
|
|
|
97, 122, 45, 97, 101, 111, 48, 57, |
3017
|
|
|
|
|
|
|
65, 90, 98, 122, 45, 101, 48, 57, |
3018
|
|
|
|
|
|
|
65, 90, 97, 122, 45, 108, 48, 57, |
3019
|
|
|
|
|
|
|
65, 90, 97, 122, 45, 109, 48, 57, |
3020
|
|
|
|
|
|
|
65, 90, 97, 122, 45, 105, 48, 57, |
3021
|
|
|
|
|
|
|
65, 90, 97, 122, 45, 97, 101, 105, |
3022
|
|
|
|
|
|
|
111, 117, 121, 48, 57, 65, 90, 98, |
3023
|
|
|
|
|
|
|
122, 45, 115, 48, 57, 65, 90, 97, |
3024
|
|
|
|
|
|
|
122, 45, 101, 48, 57, 65, 90, 97, |
3025
|
|
|
|
|
|
|
122, 45, 48, 57, 65, 90, 97, 122, |
3026
|
|
|
|
|
|
|
45, 101, 114, 115, 116, 48, 57, 65, |
3027
|
|
|
|
|
|
|
90, 97, 122, 45, 46, 105, 109, 118, |
3028
|
|
|
|
|
|
|
120, 48, 57, 65, 90, 97, 98, 99, |
3029
|
|
|
|
|
|
|
100, 101, 122, 45, 46, 101, 105, 109, |
3030
|
|
|
|
|
|
|
118, 120, 48, 57, 65, 90, 97, 98, |
3031
|
|
|
|
|
|
|
99, 100, 102, 122, 45, 110, 48, 57, |
3032
|
|
|
|
|
|
|
65, 90, 97, 122, 45, 105, 48, 57, |
3033
|
|
|
|
|
|
|
65, 90, 97, 122, 45, 101, 48, 57, |
3034
|
|
|
|
|
|
|
65, 90, 97, 122, 45, 115, 48, 57, |
3035
|
|
|
|
|
|
|
65, 90, 97, 122, 0 |
3036
|
|
|
|
|
|
|
}; |
3037
|
|
|
|
|
|
|
|
3038
|
|
|
|
|
|
|
static const char _tag_guesser_single_lengths[] = { |
3039
|
|
|
|
|
|
|
13, 1, 2, 2, 2, 2, 2, 2, |
3040
|
|
|
|
|
|
|
2, 2, 4, 2, 2, 2, 2, 7, |
3041
|
|
|
|
|
|
|
2, 2, 1, 5, 6, 7, 2, 2, |
3042
|
|
|
|
|
|
|
2, 2 |
3043
|
|
|
|
|
|
|
}; |
3044
|
|
|
|
|
|
|
|
3045
|
|
|
|
|
|
|
static const char _tag_guesser_range_lengths[] = { |
3046
|
|
|
|
|
|
|
3, 3, 3, 3, 3, 3, 3, 3, |
3047
|
|
|
|
|
|
|
3, 3, 3, 3, 3, 3, 3, 3, |
3048
|
|
|
|
|
|
|
3, 3, 3, 3, 5, 5, 3, 3, |
3049
|
|
|
|
|
|
|
3, 3 |
3050
|
|
|
|
|
|
|
}; |
3051
|
|
|
|
|
|
|
|
3052
|
|
|
|
|
|
|
static const unsigned char _tag_guesser_index_offsets[] = { |
3053
|
|
|
|
|
|
|
0, 17, 22, 28, 34, 40, 46, 52, |
3054
|
|
|
|
|
|
|
58, 64, 70, 78, 84, 90, 96, 102, |
3055
|
|
|
|
|
|
|
113, 119, 125, 130, 139, 151, 164, 170, |
3056
|
|
|
|
|
|
|
176, 182 |
3057
|
|
|
|
|
|
|
}; |
3058
|
|
|
|
|
|
|
|
3059
|
|
|
|
|
|
|
static const char _tag_guesser_indicies[] = { |
3060
|
|
|
|
|
|
|
1, 2, 5, 6, 7, 5, 5, 8, |
3061
|
|
|
|
|
|
|
9, 10, 11, 5, 5, 3, 4, 4, |
3062
|
|
|
|
|
|
|
0, 13, 14, 15, 15, 12, 13, 16, |
3063
|
|
|
|
|
|
|
14, 15, 15, 12, 13, 17, 14, 15, |
3064
|
|
|
|
|
|
|
15, 12, 13, 18, 14, 15, 15, 12, |
3065
|
|
|
|
|
|
|
13, 18, 14, 15, 15, 12, 13, 19, |
3066
|
|
|
|
|
|
|
14, 15, 15, 12, 13, 20, 14, 15, |
3067
|
|
|
|
|
|
|
15, 12, 13, 18, 14, 15, 15, 12, |
3068
|
|
|
|
|
|
|
13, 21, 14, 15, 15, 12, 13, 22, |
3069
|
|
|
|
|
|
|
23, 24, 14, 15, 15, 12, 13, 25, |
3070
|
|
|
|
|
|
|
14, 15, 15, 12, 13, 23, 14, 15, |
3071
|
|
|
|
|
|
|
15, 12, 13, 23, 14, 15, 15, 12, |
3072
|
|
|
|
|
|
|
13, 26, 14, 15, 15, 12, 28, 15, |
3073
|
|
|
|
|
|
|
15, 15, 15, 15, 15, 29, 26, 26, |
3074
|
|
|
|
|
|
|
27, 31, 4, 32, 33, 33, 30, 13, |
3075
|
|
|
|
|
|
|
23, 14, 15, 15, 12, 13, 14, 15, |
3076
|
|
|
|
|
|
|
15, 12, 13, 34, 35, 36, 37, 14, |
3077
|
|
|
|
|
|
|
15, 15, 12, 13, 38, 39, 39, 39, |
3078
|
|
|
|
|
|
|
39, 14, 15, 15, 39, 15, 12, 13, |
3079
|
|
|
|
|
|
|
38, 40, 39, 39, 39, 39, 14, 15, |
3080
|
|
|
|
|
|
|
15, 39, 15, 12, 13, 41, 14, 15, |
3081
|
|
|
|
|
|
|
15, 12, 13, 42, 14, 15, 15, 12, |
3082
|
|
|
|
|
|
|
13, 18, 14, 15, 15, 12, 13, 43, |
3083
|
|
|
|
|
|
|
14, 15, 15, 12, 0 |
3084
|
|
|
|
|
|
|
}; |
3085
|
|
|
|
|
|
|
|
3086
|
|
|
|
|
|
|
static const char _tag_guesser_trans_targs[] = { |
3087
|
|
|
|
|
|
|
18, 19, 20, 18, 18, 20, 21, 22, |
3088
|
|
|
|
|
|
|
23, 24, 16, 25, 18, 19, 18, 1, |
3089
|
|
|
|
|
|
|
3, 4, 18, 7, 8, 10, 11, 18, |
3090
|
|
|
|
|
|
|
13, 12, 18, 18, 19, 18, 18, 19, |
3091
|
|
|
|
|
|
|
18, 18, 2, 5, 6, 9, 20, 20, |
3092
|
|
|
|
|
|
|
18, 14, 15, 17 |
3093
|
|
|
|
|
|
|
}; |
3094
|
|
|
|
|
|
|
|
3095
|
|
|
|
|
|
|
static const char _tag_guesser_trans_actions[] = { |
3096
|
|
|
|
|
|
|
29, 46, 29, 32, 11, 11, 11, 11, |
3097
|
|
|
|
|
|
|
11, 11, 0, 11, 13, 35, 15, 0, |
3098
|
|
|
|
|
|
|
0, 0, 1, 0, 0, 0, 0, 3, |
3099
|
|
|
|
|
|
|
0, 0, 5, 17, 38, 20, 23, 42, |
3100
|
|
|
|
|
|
|
26, 9, 0, 0, 0, 0, 13, 0, |
3101
|
|
|
|
|
|
|
7, 0, 0, 0 |
3102
|
|
|
|
|
|
|
}; |
3103
|
|
|
|
|
|
|
|
3104
|
|
|
|
|
|
|
static const char _tag_guesser_eof_actions[] = { |
3105
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
3106
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
3107
|
|
|
|
|
|
|
0, 0, 0, 0, 15, 15, 0, 0, |
3108
|
|
|
|
|
|
|
0, 0 |
3109
|
|
|
|
|
|
|
}; |
3110
|
|
|
|
|
|
|
|
3111
|
|
|
|
|
|
|
static const int tag_guesser_start = 0; |
3112
|
|
|
|
|
|
|
|
3113
|
0
|
|
|
|
|
|
void english_morpho_guesser::analyze(string_piece form, string_piece form_lc, vector& lemmas) const { |
3114
|
|
|
|
|
|
|
// Try exceptions list |
3115
|
0
|
|
|
|
|
|
auto* exception = exceptions.at(form_lc.str, form_lc.len, [](pointer_decoder& data){ |
3116
|
0
|
0
|
|
|
|
|
for (unsigned len = data.next_1B(); len; len--) { |
3117
|
|
|
|
|
|
|
data.next(data.next_1B()); |
3118
|
|
|
|
|
|
|
data.next(data.next_1B()); |
3119
|
|
|
|
|
|
|
} |
3120
|
0
|
|
|
|
|
|
}); |
3121
|
|
|
|
|
|
|
|
3122
|
0
|
0
|
|
|
|
|
if (exception) { |
3123
|
|
|
|
|
|
|
// Found in exceptions list |
3124
|
|
|
|
|
|
|
pointer_decoder data(exception); |
3125
|
0
|
0
|
|
|
|
|
for (unsigned len = data.next_1B(); len; len--) { |
3126
|
|
|
|
|
|
|
unsigned lemma_len = data.next_1B(); |
3127
|
0
|
|
|
|
|
|
string lemma(data.next(lemma_len), lemma_len); |
3128
|
0
|
0
|
|
|
|
|
for (unsigned tags = data.next_1B(); tags; tags--) |
3129
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(lemma, exceptions_tags[data.next_2B()]); |
3130
|
|
|
|
|
|
|
} |
3131
|
|
|
|
|
|
|
} else { |
3132
|
|
|
|
|
|
|
// Try stripping negative prefix and use rule guesser |
3133
|
|
|
|
|
|
|
string lemma_lc(form_lc.str, form_lc.len); |
3134
|
|
|
|
|
|
|
// Try finding negative prefix |
3135
|
|
|
|
|
|
|
unsigned negation_len = 0; |
3136
|
0
|
0
|
|
|
|
|
for (unsigned prefix = 1; prefix <= form_lc.len; prefix++) { |
3137
|
0
|
|
|
|
|
|
auto found = negations.at(form_lc.str, prefix, [](pointer_decoder& data){ data.next(TOTAL); }); |
3138
|
0
|
0
|
|
|
|
|
if (!found) break; |
3139
|
0
|
0
|
|
|
|
|
if (found[NEGATION_LEN]) { |
3140
|
0
|
0
|
|
|
|
|
if (form_lc.len - prefix >= found[TO_FOLLOW]) negation_len = found[NEGATION_LEN]; |
3141
|
|
|
|
|
|
|
} |
3142
|
|
|
|
|
|
|
} |
3143
|
|
|
|
|
|
|
|
3144
|
|
|
|
|
|
|
// Add default tags |
3145
|
0
|
|
|
|
|
|
add(FW, lemma_lc, lemmas); |
3146
|
0
|
0
|
|
|
|
|
add(JJ, lemma_lc, negation_len, lemmas); |
3147
|
0
|
0
|
|
|
|
|
add(RB, lemma_lc, negation_len, lemmas); |
3148
|
0
|
0
|
|
|
|
|
add(NN, lemma_lc, negation_len, lemmas); |
3149
|
0
|
0
|
|
|
|
|
add_NNS(lemma_lc, negation_len, lemmas); |
3150
|
|
|
|
|
|
|
|
3151
|
|
|
|
|
|
|
// Add specialized tags |
3152
|
|
|
|
|
|
|
const char* p = form_lc.str; int cs; |
3153
|
|
|
|
|
|
|
bool added_JJR_RBR = false, added_JJS_RBS = false, added_SYM = false, added_CD = false; |
3154
|
|
|
|
|
|
|
|
3155
|
|
|
|
|
|
|
{ |
3156
|
|
|
|
|
|
|
cs = tag_guesser_start; |
3157
|
|
|
|
|
|
|
} |
3158
|
|
|
|
|
|
|
|
3159
|
|
|
|
|
|
|
{ |
3160
|
|
|
|
|
|
|
int _klen; |
3161
|
|
|
|
|
|
|
unsigned int _trans; |
3162
|
|
|
|
|
|
|
const char *_acts; |
3163
|
|
|
|
|
|
|
unsigned int _nacts; |
3164
|
|
|
|
|
|
|
const char *_keys; |
3165
|
|
|
|
|
|
|
|
3166
|
0
|
0
|
|
|
|
|
if ( p == ( (form_lc.str + form_lc.len)) ) |
3167
|
|
|
|
|
|
|
goto _test_eof; |
3168
|
|
|
|
|
|
|
_resume: |
3169
|
0
|
|
|
|
|
|
_keys = _tag_guesser_trans_keys + _tag_guesser_key_offsets[cs]; |
3170
|
0
|
|
|
|
|
|
_trans = _tag_guesser_index_offsets[cs]; |
3171
|
|
|
|
|
|
|
|
3172
|
0
|
|
|
|
|
|
_klen = _tag_guesser_single_lengths[cs]; |
3173
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
3174
|
|
|
|
|
|
|
const char *_lower = _keys; |
3175
|
|
|
|
|
|
|
const char *_mid; |
3176
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
3177
|
|
|
|
|
|
|
while (1) { |
3178
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
3179
|
|
|
|
|
|
|
break; |
3180
|
|
|
|
|
|
|
|
3181
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
3182
|
0
|
0
|
|
|
|
|
if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) < *_mid ) |
3183
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
3184
|
0
|
0
|
|
|
|
|
else if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) > *_mid ) |
3185
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
3186
|
|
|
|
|
|
|
else { |
3187
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
3188
|
0
|
|
|
|
|
|
goto _match; |
3189
|
|
|
|
|
|
|
} |
3190
|
|
|
|
|
|
|
} |
3191
|
0
|
|
|
|
|
|
_keys += _klen; |
3192
|
0
|
|
|
|
|
|
_trans += _klen; |
3193
|
|
|
|
|
|
|
} |
3194
|
|
|
|
|
|
|
|
3195
|
0
|
|
|
|
|
|
_klen = _tag_guesser_range_lengths[cs]; |
3196
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
3197
|
|
|
|
|
|
|
const char *_lower = _keys; |
3198
|
|
|
|
|
|
|
const char *_mid; |
3199
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
3200
|
|
|
|
|
|
|
while (1) { |
3201
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
3202
|
|
|
|
|
|
|
break; |
3203
|
|
|
|
|
|
|
|
3204
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
3205
|
0
|
0
|
|
|
|
|
if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) < _mid[0] ) |
3206
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
3207
|
0
|
0
|
|
|
|
|
else if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) > _mid[1] ) |
3208
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
3209
|
|
|
|
|
|
|
else { |
3210
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
3211
|
0
|
|
|
|
|
|
goto _match; |
3212
|
|
|
|
|
|
|
} |
3213
|
|
|
|
|
|
|
} |
3214
|
0
|
|
|
|
|
|
_trans += _klen; |
3215
|
|
|
|
|
|
|
} |
3216
|
|
|
|
|
|
|
|
3217
|
|
|
|
|
|
|
_match: |
3218
|
0
|
|
|
|
|
|
_trans = _tag_guesser_indicies[_trans]; |
3219
|
0
|
|
|
|
|
|
cs = _tag_guesser_trans_targs[_trans]; |
3220
|
|
|
|
|
|
|
|
3221
|
0
|
0
|
|
|
|
|
if ( _tag_guesser_trans_actions[_trans] == 0 ) |
3222
|
|
|
|
|
|
|
goto _again; |
3223
|
|
|
|
|
|
|
|
3224
|
0
|
|
|
|
|
|
_acts = _tag_guesser_actions + _tag_guesser_trans_actions[_trans]; |
3225
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
3226
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
3227
|
|
|
|
|
|
|
{ |
3228
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
3229
|
|
|
|
|
|
|
{ |
3230
|
|
|
|
|
|
|
case 0: |
3231
|
0
|
0
|
|
|
|
|
{ if (!added_JJR_RBR) added_JJR_RBR = true, add_JJR_RBR(lemma_lc, negation_len, lemmas); } |
|
|
0
|
|
|
|
|
|
3232
|
|
|
|
|
|
|
break; |
3233
|
|
|
|
|
|
|
case 1: |
3234
|
0
|
0
|
|
|
|
|
{ if (!added_JJS_RBS) added_JJS_RBS = true, add_JJS_RBS(lemma_lc, negation_len, lemmas); } |
|
|
0
|
|
|
|
|
|
3235
|
|
|
|
|
|
|
break; |
3236
|
|
|
|
|
|
|
case 2: |
3237
|
0
|
0
|
|
|
|
|
{ add_VBG(lemma_lc, lemmas); } |
3238
|
|
|
|
|
|
|
break; |
3239
|
|
|
|
|
|
|
case 3: |
3240
|
0
|
0
|
|
|
|
|
{ add_VBD_VBN(lemma_lc, lemmas); } |
3241
|
|
|
|
|
|
|
break; |
3242
|
|
|
|
|
|
|
case 4: |
3243
|
0
|
0
|
|
|
|
|
{ add_VBZ(lemma_lc, lemmas); } |
3244
|
|
|
|
|
|
|
break; |
3245
|
|
|
|
|
|
|
case 5: |
3246
|
0
|
|
|
|
|
|
{ add(VB, lemma_lc, lemmas); add(VBP, lemma_lc, lemmas); } |
3247
|
|
|
|
|
|
|
break; |
3248
|
|
|
|
|
|
|
case 6: |
3249
|
0
|
0
|
|
|
|
|
{ if (!added_SYM) added_SYM = true, add(SYM, lemma_lc, lemmas); } |
3250
|
|
|
|
|
|
|
break; |
3251
|
|
|
|
|
|
|
case 7: |
3252
|
0
|
0
|
|
|
|
|
{ if (!added_CD) added_CD = true, add(CD, lemma_lc, lemmas); } |
3253
|
|
|
|
|
|
|
break; |
3254
|
|
|
|
|
|
|
} |
3255
|
|
|
|
|
|
|
} |
3256
|
|
|
|
|
|
|
|
3257
|
|
|
|
|
|
|
_again: |
3258
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form_lc.str + form_lc.len)) ) |
3259
|
|
|
|
|
|
|
goto _resume; |
3260
|
|
|
|
|
|
|
_test_eof: {} |
3261
|
0
|
0
|
|
|
|
|
if ( p == ( (form_lc.str + form_lc.len)) ) |
3262
|
|
|
|
|
|
|
{ |
3263
|
0
|
|
|
|
|
|
const char *__acts = _tag_guesser_actions + _tag_guesser_eof_actions[cs]; |
3264
|
0
|
|
|
|
|
|
unsigned int __nacts = (unsigned int) *__acts++; |
3265
|
0
|
0
|
|
|
|
|
while ( __nacts-- > 0 ) { |
3266
|
0
|
0
|
|
|
|
|
switch ( *__acts++ ) { |
3267
|
|
|
|
|
|
|
case 7: |
3268
|
0
|
0
|
|
|
|
|
{ if (!added_CD) added_CD = true, add(CD, lemma_lc, lemmas); } |
3269
|
|
|
|
|
|
|
break; |
3270
|
|
|
|
|
|
|
} |
3271
|
|
|
|
|
|
|
} |
3272
|
|
|
|
|
|
|
} |
3273
|
|
|
|
|
|
|
|
3274
|
|
|
|
|
|
|
} |
3275
|
|
|
|
|
|
|
|
3276
|
|
|
|
|
|
|
} |
3277
|
|
|
|
|
|
|
|
3278
|
|
|
|
|
|
|
// Add proper names |
3279
|
0
|
|
|
|
|
|
analyze_proper_names(form, form_lc, lemmas); |
3280
|
0
|
|
|
|
|
|
} |
3281
|
|
|
|
|
|
|
|
3282
|
0
|
|
|
|
|
|
bool english_morpho_guesser::analyze_proper_names(string_piece form, string_piece form_lc, vector& lemmas) const { |
3283
|
|
|
|
|
|
|
// NNP if form_lc != form or form.str[0] =~ /[0-9']/, NNPS if form_lc != form |
3284
|
0
|
0
|
|
|
|
|
bool is_NNP = form.str != form_lc.str || (form.len && (*form.str == '\'' || (*form.str >= '0' && *form.str <= '9'))); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
3285
|
0
|
|
|
|
|
|
bool is_NNPS = form.str != form_lc.str; |
3286
|
0
|
0
|
|
|
|
|
if (!is_NNP && !is_NNPS) return false; |
3287
|
|
|
|
|
|
|
|
3288
|
|
|
|
|
|
|
bool was_NNP = false, was_NNPS = false; |
3289
|
0
|
0
|
|
|
|
|
for (auto&& lemma : lemmas) { |
3290
|
0
|
|
|
|
|
|
was_NNP |= lemma.tag == NNP; |
3291
|
0
|
|
|
|
|
|
was_NNPS |= lemma.tag == NNPS; |
3292
|
|
|
|
|
|
|
} |
3293
|
0
|
0
|
|
|
|
|
if (!((is_NNP && !was_NNP) || (is_NNPS && !was_NNPS))) return false; |
|
|
0
|
|
|
|
|
|
3294
|
|
|
|
|
|
|
|
3295
|
|
|
|
|
|
|
string lemma(form.str, form.len); |
3296
|
0
|
0
|
|
|
|
|
if (is_NNP && !was_NNP) add(NNP, lemma, lemmas); |
3297
|
0
|
0
|
|
|
|
|
if (is_NNPS && !was_NNPS) add_NNPS(lemma, lemmas); |
|
|
0
|
|
|
|
|
|
3298
|
|
|
|
|
|
|
return true; |
3299
|
|
|
|
|
|
|
} |
3300
|
|
|
|
|
|
|
|
3301
|
|
|
|
|
|
|
inline void english_morpho_guesser::add(const string& tag, const string& form, vector& lemmas) const { |
3302
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(form, tag); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
3303
|
|
|
|
|
|
|
} |
3304
|
|
|
|
|
|
|
|
3305
|
|
|
|
|
|
|
inline void english_morpho_guesser::add(const string& tag, const string& tag2, const string& form, vector& lemmas) const { |
3306
|
|
|
|
|
|
|
add(tag, form, lemmas); |
3307
|
|
|
|
|
|
|
add(tag2, form, lemmas); |
3308
|
|
|
|
|
|
|
} |
3309
|
|
|
|
|
|
|
|
3310
|
0
|
|
|
|
|
|
inline void english_morpho_guesser::add(const string& tag, const string& form, unsigned negation_len, vector& lemmas) const { |
3311
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
3312
|
0
|
|
|
|
|
|
} |
3313
|
|
|
|
|
|
|
|
3314
|
0
|
|
|
|
|
|
inline void english_morpho_guesser::add(const string& tag, const string& tag2, const string& form, unsigned negation_len, vector& lemmas) const { |
3315
|
0
|
|
|
|
|
|
add(tag, form, negation_len, lemmas); |
3316
|
0
|
|
|
|
|
|
add(tag2, form, negation_len, lemmas); |
3317
|
0
|
|
|
|
|
|
} |
3318
|
|
|
|
|
|
|
|
3319
|
|
|
|
|
|
|
// Common definitions (written backwards) |
3320
|
|
|
|
|
|
|
#define REM(str, len) (str.substr(0, str.size() - len)) |
3321
|
|
|
|
|
|
|
#define REM_ADD(str, len, add) (str.substr(0, str.size() - len).append(add)) |
3322
|
|
|
|
|
|
|
|
3323
|
|
|
|
|
|
|
static const char _NNS_actions[] = { |
3324
|
|
|
|
|
|
|
0, 1, 0, 1, 1, 1, 2, 1, |
3325
|
|
|
|
|
|
|
3, 1, 4, 1, 5, 1, 6, 1, |
3326
|
|
|
|
|
|
|
7, 1, 8, 1, 9, 1, 10, 1, |
3327
|
|
|
|
|
|
|
11, 1, 12, 1, 13 |
3328
|
|
|
|
|
|
|
}; |
3329
|
|
|
|
|
|
|
|
3330
|
|
|
|
|
|
|
static const char _NNS_key_offsets[] = { |
3331
|
|
|
|
|
|
|
0, 0, 2, 3, 4, 5, 7, 17, |
3332
|
|
|
|
|
|
|
17, 29, 30, 35, 35, 36, 37, 37, |
3333
|
|
|
|
|
|
|
37, 44, 45, 53, 63, 72 |
3334
|
|
|
|
|
|
|
}; |
3335
|
|
|
|
|
|
|
|
3336
|
|
|
|
|
|
|
static const char _NNS_trans_keys[] = { |
3337
|
|
|
|
|
|
|
110, 115, 101, 109, 101, 99, 115, 98, |
3338
|
|
|
|
|
|
|
100, 102, 104, 106, 110, 112, 116, 118, |
3339
|
|
|
|
|
|
|
122, 104, 122, 98, 100, 102, 103, 106, |
3340
|
|
|
|
|
|
|
110, 112, 116, 118, 120, 111, 97, 101, |
3341
|
|
|
|
|
|
|
105, 111, 117, 105, 119, 104, 105, 111, |
3342
|
|
|
|
|
|
|
115, 118, 120, 122, 115, 97, 101, 105, |
3343
|
|
|
|
|
|
|
110, 111, 114, 115, 117, 98, 100, 102, |
3344
|
|
|
|
|
|
|
104, 106, 110, 112, 116, 118, 122, 97, |
3345
|
|
|
|
|
|
|
101, 105, 111, 117, 121, 122, 98, 120, |
3346
|
|
|
|
|
|
|
0 |
3347
|
|
|
|
|
|
|
}; |
3348
|
|
|
|
|
|
|
|
3349
|
|
|
|
|
|
|
static const char _NNS_single_lengths[] = { |
3350
|
|
|
|
|
|
|
0, 2, 1, 1, 1, 2, 0, 0, |
3351
|
|
|
|
|
|
|
2, 1, 5, 0, 1, 1, 0, 0, |
3352
|
|
|
|
|
|
|
7, 1, 8, 0, 7, 0 |
3353
|
|
|
|
|
|
|
}; |
3354
|
|
|
|
|
|
|
|
3355
|
|
|
|
|
|
|
static const char _NNS_range_lengths[] = { |
3356
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 5, 0, |
3357
|
|
|
|
|
|
|
5, 0, 0, 0, 0, 0, 0, 0, |
3358
|
|
|
|
|
|
|
0, 0, 0, 5, 1, 0 |
3359
|
|
|
|
|
|
|
}; |
3360
|
|
|
|
|
|
|
|
3361
|
|
|
|
|
|
|
static const char _NNS_index_offsets[] = { |
3362
|
|
|
|
|
|
|
0, 0, 3, 5, 7, 9, 12, 18, |
3363
|
|
|
|
|
|
|
19, 27, 29, 35, 36, 38, 40, 41, |
3364
|
|
|
|
|
|
|
42, 50, 52, 61, 67, 76 |
3365
|
|
|
|
|
|
|
}; |
3366
|
|
|
|
|
|
|
|
3367
|
|
|
|
|
|
|
static const char _NNS_indicies[] = { |
3368
|
|
|
|
|
|
|
0, 2, 1, 3, 1, 4, 1, 6, |
3369
|
|
|
|
|
|
|
5, 7, 7, 1, 8, 8, 8, 8, |
3370
|
|
|
|
|
|
|
8, 1, 9, 11, 10, 10, 10, 10, |
3371
|
|
|
|
|
|
|
10, 10, 1, 12, 1, 13, 13, 13, |
3372
|
|
|
|
|
|
|
13, 13, 1, 14, 15, 1, 16, 1, |
3373
|
|
|
|
|
|
|
17, 1, 18, 19, 20, 21, 22, 7, |
3374
|
|
|
|
|
|
|
23, 1, 24, 1, 25, 25, 25, 26, |
3375
|
|
|
|
|
|
|
25, 27, 28, 29, 1, 30, 30, 30, |
3376
|
|
|
|
|
|
|
30, 30, 1, 31, 31, 31, 31, 31, |
3377
|
|
|
|
|
|
|
31, 33, 32, 1, 17, 0 |
3378
|
|
|
|
|
|
|
}; |
3379
|
|
|
|
|
|
|
|
3380
|
|
|
|
|
|
|
static const char _NNS_trans_targs[] = { |
3381
|
|
|
|
|
|
|
2, 0, 4, 3, 15, 15, 16, 15, |
3382
|
|
|
|
|
|
|
7, 15, 15, 17, 15, 11, 15, 13, |
3383
|
|
|
|
|
|
|
15, 15, 5, 6, 8, 18, 12, 20, |
3384
|
|
|
|
|
|
|
15, 15, 9, 10, 15, 19, 15, 15, |
3385
|
|
|
|
|
|
|
14, 21 |
3386
|
|
|
|
|
|
|
}; |
3387
|
|
|
|
|
|
|
|
3388
|
|
|
|
|
|
|
static const char _NNS_trans_actions[] = { |
3389
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 27, 27, 21, |
3390
|
|
|
|
|
|
|
0, 23, 25, 25, 19, 0, 17, 0, |
3391
|
|
|
|
|
|
|
5, 11, 0, 0, 0, 21, 0, 21, |
3392
|
|
|
|
|
|
|
3, 9, 0, 0, 15, 9, 7, 13, |
3393
|
|
|
|
|
|
|
0, 15 |
3394
|
|
|
|
|
|
|
}; |
3395
|
|
|
|
|
|
|
|
3396
|
|
|
|
|
|
|
static const int NNS_start = 1; |
3397
|
|
|
|
|
|
|
|
3398
|
0
|
|
|
|
|
|
void english_morpho_guesser::add_NNS(const string& form, unsigned negation_len, vector& lemmas) const { |
3399
|
0
|
|
|
|
|
|
const char* p = form.c_str() + negation_len; int cs; |
3400
|
|
|
|
|
|
|
char best = 'z'; unsigned remove = 0; const char* append = nullptr; |
3401
|
|
|
|
|
|
|
|
3402
|
|
|
|
|
|
|
{ |
3403
|
|
|
|
|
|
|
cs = NNS_start; |
3404
|
|
|
|
|
|
|
} |
3405
|
|
|
|
|
|
|
|
3406
|
|
|
|
|
|
|
{ |
3407
|
|
|
|
|
|
|
int _klen; |
3408
|
|
|
|
|
|
|
unsigned int _trans; |
3409
|
|
|
|
|
|
|
const char *_acts; |
3410
|
|
|
|
|
|
|
unsigned int _nacts; |
3411
|
|
|
|
|
|
|
const char *_keys; |
3412
|
|
|
|
|
|
|
|
3413
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
3414
|
|
|
|
|
|
|
goto _test_eof; |
3415
|
|
|
|
|
|
|
if ( cs == 0 ) |
3416
|
|
|
|
|
|
|
goto _out; |
3417
|
|
|
|
|
|
|
_resume: |
3418
|
0
|
|
|
|
|
|
_keys = _NNS_trans_keys + _NNS_key_offsets[cs]; |
3419
|
0
|
|
|
|
|
|
_trans = _NNS_index_offsets[cs]; |
3420
|
|
|
|
|
|
|
|
3421
|
0
|
|
|
|
|
|
_klen = _NNS_single_lengths[cs]; |
3422
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
3423
|
|
|
|
|
|
|
const char *_lower = _keys; |
3424
|
|
|
|
|
|
|
const char *_mid; |
3425
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
3426
|
|
|
|
|
|
|
while (1) { |
3427
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
3428
|
|
|
|
|
|
|
break; |
3429
|
|
|
|
|
|
|
|
3430
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
3431
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < *_mid ) |
3432
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
3433
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > *_mid ) |
3434
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
3435
|
|
|
|
|
|
|
else { |
3436
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
3437
|
0
|
|
|
|
|
|
goto _match; |
3438
|
|
|
|
|
|
|
} |
3439
|
|
|
|
|
|
|
} |
3440
|
0
|
|
|
|
|
|
_keys += _klen; |
3441
|
0
|
|
|
|
|
|
_trans += _klen; |
3442
|
|
|
|
|
|
|
} |
3443
|
|
|
|
|
|
|
|
3444
|
0
|
|
|
|
|
|
_klen = _NNS_range_lengths[cs]; |
3445
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
3446
|
|
|
|
|
|
|
const char *_lower = _keys; |
3447
|
|
|
|
|
|
|
const char *_mid; |
3448
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
3449
|
|
|
|
|
|
|
while (1) { |
3450
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
3451
|
|
|
|
|
|
|
break; |
3452
|
|
|
|
|
|
|
|
3453
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
3454
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < _mid[0] ) |
3455
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
3456
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > _mid[1] ) |
3457
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
3458
|
|
|
|
|
|
|
else { |
3459
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
3460
|
0
|
|
|
|
|
|
goto _match; |
3461
|
|
|
|
|
|
|
} |
3462
|
|
|
|
|
|
|
} |
3463
|
0
|
|
|
|
|
|
_trans += _klen; |
3464
|
|
|
|
|
|
|
} |
3465
|
|
|
|
|
|
|
|
3466
|
|
|
|
|
|
|
_match: |
3467
|
0
|
|
|
|
|
|
_trans = _NNS_indicies[_trans]; |
3468
|
0
|
|
|
|
|
|
cs = _NNS_trans_targs[_trans]; |
3469
|
|
|
|
|
|
|
|
3470
|
0
|
0
|
|
|
|
|
if ( _NNS_trans_actions[_trans] == 0 ) |
3471
|
|
|
|
|
|
|
goto _again; |
3472
|
|
|
|
|
|
|
|
3473
|
0
|
|
|
|
|
|
_acts = _NNS_actions + _NNS_trans_actions[_trans]; |
3474
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
3475
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
3476
|
|
|
|
|
|
|
{ |
3477
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
3478
|
|
|
|
|
|
|
{ |
3479
|
|
|
|
|
|
|
case 0: |
3480
|
0
|
0
|
|
|
|
|
{ if (best > 'a') best = 'a', remove = 2, append = "an"; } |
3481
|
|
|
|
|
|
|
break; |
3482
|
|
|
|
|
|
|
case 1: |
3483
|
0
|
0
|
|
|
|
|
{ if (best > 'b') best = 'b', remove = 1, append = nullptr; } |
3484
|
|
|
|
|
|
|
break; |
3485
|
|
|
|
|
|
|
case 2: |
3486
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 3, append = "fe"; } |
3487
|
|
|
|
|
|
|
break; |
3488
|
|
|
|
|
|
|
case 3: |
3489
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 2, append = nullptr; } |
3490
|
|
|
|
|
|
|
break; |
3491
|
|
|
|
|
|
|
case 4: |
3492
|
0
|
0
|
|
|
|
|
{ if (best > 'e') best = 'e', remove = 1, append = nullptr; } |
3493
|
|
|
|
|
|
|
break; |
3494
|
|
|
|
|
|
|
case 5: |
3495
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 2, append = nullptr; } |
3496
|
|
|
|
|
|
|
break; |
3497
|
|
|
|
|
|
|
case 6: |
3498
|
0
|
0
|
|
|
|
|
{ if (best > 'g') best = 'g', remove = 1, append = nullptr; } |
3499
|
|
|
|
|
|
|
break; |
3500
|
|
|
|
|
|
|
case 7: |
3501
|
0
|
0
|
|
|
|
|
{ if (best > 'h') best = 'h', remove = 2, append = nullptr; } |
3502
|
|
|
|
|
|
|
break; |
3503
|
|
|
|
|
|
|
case 8: |
3504
|
0
|
0
|
|
|
|
|
{ if (best > 'i') best = 'i', remove = 1, append = nullptr; } |
3505
|
|
|
|
|
|
|
break; |
3506
|
|
|
|
|
|
|
case 9: |
3507
|
0
|
0
|
|
|
|
|
{ if (best > 'j') best = 'j', remove = 1, append = nullptr; } |
3508
|
|
|
|
|
|
|
break; |
3509
|
|
|
|
|
|
|
case 10: |
3510
|
0
|
0
|
|
|
|
|
{ if (best > 'k') best = 'k', remove = 2, append = nullptr; } |
3511
|
|
|
|
|
|
|
break; |
3512
|
|
|
|
|
|
|
case 11: |
3513
|
0
|
0
|
|
|
|
|
{ if (best > 'l') best = 'l', remove = 3, append = "y"; } |
3514
|
|
|
|
|
|
|
break; |
3515
|
|
|
|
|
|
|
case 12: |
3516
|
0
|
0
|
|
|
|
|
{ if (best > 'm') best = 'm', remove = 2, append = nullptr; } |
3517
|
|
|
|
|
|
|
break; |
3518
|
|
|
|
|
|
|
case 13: |
3519
|
0
|
0
|
|
|
|
|
{ if (best > 'n') best = 'n', remove = 1, append = nullptr; } |
3520
|
|
|
|
|
|
|
break; |
3521
|
|
|
|
|
|
|
} |
3522
|
|
|
|
|
|
|
} |
3523
|
|
|
|
|
|
|
|
3524
|
|
|
|
|
|
|
_again: |
3525
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
3526
|
|
|
|
|
|
|
goto _out; |
3527
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form.c_str() + form.size())) ) |
3528
|
|
|
|
|
|
|
goto _resume; |
3529
|
|
|
|
|
|
|
_test_eof: {} |
3530
|
|
|
|
|
|
|
_out: {} |
3531
|
|
|
|
|
|
|
} |
3532
|
|
|
|
|
|
|
|
3533
|
0
|
0
|
|
|
|
|
add(NNS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
3534
|
0
|
|
|
|
|
|
} |
3535
|
|
|
|
|
|
|
|
3536
|
|
|
|
|
|
|
static const char _NNPS_actions[] = { |
3537
|
|
|
|
|
|
|
0, 1, 1, 1, 2, 1, 4, 1, |
3538
|
|
|
|
|
|
|
5, 1, 6, 1, 7, 1, 8, 1, |
3539
|
|
|
|
|
|
|
9, 1, 10, 1, 11, 1, 12, 1, |
3540
|
|
|
|
|
|
|
14, 1, 15, 1, 16, 2, 0, 1, |
3541
|
|
|
|
|
|
|
2, 3, 4, 2, 13, 14 |
3542
|
|
|
|
|
|
|
}; |
3543
|
|
|
|
|
|
|
|
3544
|
|
|
|
|
|
|
static const unsigned char _NNPS_key_offsets[] = { |
3545
|
|
|
|
|
|
|
0, 0, 4, 6, 8, 10, 12, 16, |
3546
|
|
|
|
|
|
|
36, 36, 60, 62, 72, 72, 74, 76, |
3547
|
|
|
|
|
|
|
78, 78, 98, 98, 100, 102, 104, 104, |
3548
|
|
|
|
|
|
|
118, 120, 136, 156, 174, 174 |
3549
|
|
|
|
|
|
|
}; |
3550
|
|
|
|
|
|
|
|
3551
|
|
|
|
|
|
|
static const char _NNPS_trans_keys[] = { |
3552
|
|
|
|
|
|
|
78, 83, 110, 115, 69, 101, 77, 109, |
3553
|
|
|
|
|
|
|
77, 109, 69, 101, 67, 83, 99, 115, |
3554
|
|
|
|
|
|
|
66, 68, 70, 72, 74, 78, 80, 84, |
3555
|
|
|
|
|
|
|
86, 90, 98, 100, 102, 104, 106, 110, |
3556
|
|
|
|
|
|
|
112, 116, 118, 122, 72, 90, 104, 122, |
3557
|
|
|
|
|
|
|
66, 68, 70, 71, 74, 78, 80, 84, |
3558
|
|
|
|
|
|
|
86, 88, 98, 100, 102, 103, 106, 110, |
3559
|
|
|
|
|
|
|
112, 116, 118, 120, 79, 111, 65, 69, |
3560
|
|
|
|
|
|
|
73, 79, 85, 97, 101, 105, 111, 117, |
3561
|
|
|
|
|
|
|
73, 105, 87, 119, 87, 119, 66, 68, |
3562
|
|
|
|
|
|
|
70, 72, 74, 78, 80, 84, 86, 90, |
3563
|
|
|
|
|
|
|
98, 100, 102, 104, 106, 110, 112, 116, |
3564
|
|
|
|
|
|
|
118, 122, 73, 105, 69, 101, 69, 101, |
3565
|
|
|
|
|
|
|
72, 73, 79, 83, 86, 88, 90, 104, |
3566
|
|
|
|
|
|
|
105, 111, 115, 118, 120, 122, 83, 115, |
3567
|
|
|
|
|
|
|
65, 69, 73, 78, 79, 82, 83, 85, |
3568
|
|
|
|
|
|
|
97, 101, 105, 110, 111, 114, 115, 117, |
3569
|
|
|
|
|
|
|
66, 68, 70, 72, 74, 78, 80, 84, |
3570
|
|
|
|
|
|
|
86, 90, 98, 100, 102, 104, 106, 110, |
3571
|
|
|
|
|
|
|
112, 116, 118, 122, 65, 69, 73, 79, |
3572
|
|
|
|
|
|
|
85, 89, 90, 97, 101, 105, 111, 117, |
3573
|
|
|
|
|
|
|
121, 122, 66, 88, 98, 120, 72, 73, |
3574
|
|
|
|
|
|
|
79, 83, 86, 88, 90, 104, 105, 111, |
3575
|
|
|
|
|
|
|
115, 118, 120, 122, 0 |
3576
|
|
|
|
|
|
|
}; |
3577
|
|
|
|
|
|
|
|
3578
|
|
|
|
|
|
|
static const char _NNPS_single_lengths[] = { |
3579
|
|
|
|
|
|
|
0, 4, 2, 2, 2, 2, 4, 0, |
3580
|
|
|
|
|
|
|
0, 4, 2, 10, 0, 2, 2, 2, |
3581
|
|
|
|
|
|
|
0, 0, 0, 2, 2, 2, 0, 14, |
3582
|
|
|
|
|
|
|
2, 16, 0, 14, 0, 14 |
3583
|
|
|
|
|
|
|
}; |
3584
|
|
|
|
|
|
|
|
3585
|
|
|
|
|
|
|
static const char _NNPS_range_lengths[] = { |
3586
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 10, |
3587
|
|
|
|
|
|
|
0, 10, 0, 0, 0, 0, 0, 0, |
3588
|
|
|
|
|
|
|
0, 10, 0, 0, 0, 0, 0, 0, |
3589
|
|
|
|
|
|
|
0, 0, 10, 2, 0, 0 |
3590
|
|
|
|
|
|
|
}; |
3591
|
|
|
|
|
|
|
|
3592
|
|
|
|
|
|
|
static const unsigned char _NNPS_index_offsets[] = { |
3593
|
|
|
|
|
|
|
0, 0, 5, 8, 11, 14, 17, 22, |
3594
|
|
|
|
|
|
|
33, 34, 49, 52, 63, 64, 67, 70, |
3595
|
|
|
|
|
|
|
73, 74, 85, 86, 89, 92, 95, 96, |
3596
|
|
|
|
|
|
|
111, 114, 131, 142, 159, 160 |
3597
|
|
|
|
|
|
|
}; |
3598
|
|
|
|
|
|
|
|
3599
|
|
|
|
|
|
|
static const char _NNPS_indicies[] = { |
3600
|
|
|
|
|
|
|
0, 2, 3, 4, 1, 5, 6, 1, |
3601
|
|
|
|
|
|
|
7, 8, 1, 8, 8, 1, 10, 11, |
3602
|
|
|
|
|
|
|
9, 12, 12, 12, 12, 1, 13, 13, |
3603
|
|
|
|
|
|
|
13, 13, 13, 13, 13, 13, 13, 13, |
3604
|
|
|
|
|
|
|
1, 14, 16, 15, 16, 15, 15, 15, |
3605
|
|
|
|
|
|
|
15, 15, 15, 15, 15, 15, 15, 15, |
3606
|
|
|
|
|
|
|
1, 17, 17, 1, 18, 18, 18, 18, |
3607
|
|
|
|
|
|
|
18, 18, 18, 18, 18, 18, 1, 19, |
3608
|
|
|
|
|
|
|
20, 21, 1, 22, 23, 1, 23, 23, |
3609
|
|
|
|
|
|
|
1, 24, 25, 25, 25, 25, 25, 25, |
3610
|
|
|
|
|
|
|
25, 25, 25, 25, 1, 26, 21, 21, |
3611
|
|
|
|
|
|
|
1, 6, 6, 1, 11, 11, 9, 1, |
3612
|
|
|
|
|
|
|
27, 28, 29, 30, 31, 12, 32, 27, |
3613
|
|
|
|
|
|
|
33, 29, 30, 34, 12, 32, 1, 35, |
3614
|
|
|
|
|
|
|
35, 1, 36, 36, 36, 37, 36, 38, |
3615
|
|
|
|
|
|
|
39, 40, 36, 36, 36, 37, 36, 38, |
3616
|
|
|
|
|
|
|
39, 40, 1, 41, 41, 41, 41, 41, |
3617
|
|
|
|
|
|
|
41, 41, 41, 41, 41, 1, 42, 42, |
3618
|
|
|
|
|
|
|
42, 42, 42, 42, 44, 42, 42, 42, |
3619
|
|
|
|
|
|
|
42, 42, 42, 44, 43, 43, 1, 24, |
3620
|
|
|
|
|
|
|
27, 33, 29, 30, 34, 12, 32, 27, |
3621
|
|
|
|
|
|
|
33, 29, 30, 34, 12, 32, 1, 0 |
3622
|
|
|
|
|
|
|
}; |
3623
|
|
|
|
|
|
|
|
3624
|
|
|
|
|
|
|
static const char _NNPS_trans_targs[] = { |
3625
|
|
|
|
|
|
|
2, 0, 5, 20, 21, 3, 4, 22, |
3626
|
|
|
|
|
|
|
22, 22, 23, 29, 22, 8, 22, 22, |
3627
|
|
|
|
|
|
|
24, 22, 12, 22, 14, 15, 22, 22, |
3628
|
|
|
|
|
|
|
22, 18, 22, 6, 7, 9, 25, 13, |
3629
|
|
|
|
|
|
|
27, 17, 19, 22, 22, 10, 11, 22, |
3630
|
|
|
|
|
|
|
26, 22, 22, 16, 28 |
3631
|
|
|
|
|
|
|
}; |
3632
|
|
|
|
|
|
|
|
3633
|
|
|
|
|
|
|
static const char _NNPS_trans_actions[] = { |
3634
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 29, |
3635
|
|
|
|
|
|
|
1, 27, 27, 27, 21, 0, 35, 25, |
3636
|
|
|
|
|
|
|
25, 19, 0, 17, 0, 0, 32, 5, |
3637
|
|
|
|
|
|
|
11, 0, 23, 0, 0, 0, 21, 0, |
3638
|
|
|
|
|
|
|
21, 0, 0, 3, 9, 0, 0, 15, |
3639
|
|
|
|
|
|
|
9, 7, 13, 0, 15 |
3640
|
|
|
|
|
|
|
}; |
3641
|
|
|
|
|
|
|
|
3642
|
|
|
|
|
|
|
static const int NNPS_start = 1; |
3643
|
|
|
|
|
|
|
|
3644
|
0
|
|
|
|
|
|
void english_morpho_guesser::add_NNPS(const string& form, vector& lemmas) const { |
3645
|
|
|
|
|
|
|
const char* p = form.c_str(); int cs; |
3646
|
|
|
|
|
|
|
char best = 'z'; unsigned remove = 0; const char* append = nullptr; |
3647
|
|
|
|
|
|
|
|
3648
|
|
|
|
|
|
|
{ |
3649
|
|
|
|
|
|
|
cs = NNPS_start; |
3650
|
|
|
|
|
|
|
} |
3651
|
|
|
|
|
|
|
|
3652
|
|
|
|
|
|
|
{ |
3653
|
|
|
|
|
|
|
int _klen; |
3654
|
|
|
|
|
|
|
unsigned int _trans; |
3655
|
|
|
|
|
|
|
const char *_acts; |
3656
|
|
|
|
|
|
|
unsigned int _nacts; |
3657
|
|
|
|
|
|
|
const char *_keys; |
3658
|
|
|
|
|
|
|
|
3659
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
3660
|
|
|
|
|
|
|
goto _test_eof; |
3661
|
|
|
|
|
|
|
if ( cs == 0 ) |
3662
|
|
|
|
|
|
|
goto _out; |
3663
|
|
|
|
|
|
|
_resume: |
3664
|
0
|
|
|
|
|
|
_keys = _NNPS_trans_keys + _NNPS_key_offsets[cs]; |
3665
|
0
|
|
|
|
|
|
_trans = _NNPS_index_offsets[cs]; |
3666
|
|
|
|
|
|
|
|
3667
|
0
|
|
|
|
|
|
_klen = _NNPS_single_lengths[cs]; |
3668
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
3669
|
|
|
|
|
|
|
const char *_lower = _keys; |
3670
|
|
|
|
|
|
|
const char *_mid; |
3671
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
3672
|
|
|
|
|
|
|
while (1) { |
3673
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
3674
|
|
|
|
|
|
|
break; |
3675
|
|
|
|
|
|
|
|
3676
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
3677
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid ) |
3678
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
3679
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid ) |
3680
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
3681
|
|
|
|
|
|
|
else { |
3682
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
3683
|
0
|
|
|
|
|
|
goto _match; |
3684
|
|
|
|
|
|
|
} |
3685
|
|
|
|
|
|
|
} |
3686
|
0
|
|
|
|
|
|
_keys += _klen; |
3687
|
0
|
|
|
|
|
|
_trans += _klen; |
3688
|
|
|
|
|
|
|
} |
3689
|
|
|
|
|
|
|
|
3690
|
0
|
|
|
|
|
|
_klen = _NNPS_range_lengths[cs]; |
3691
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
3692
|
|
|
|
|
|
|
const char *_lower = _keys; |
3693
|
|
|
|
|
|
|
const char *_mid; |
3694
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
3695
|
|
|
|
|
|
|
while (1) { |
3696
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
3697
|
|
|
|
|
|
|
break; |
3698
|
|
|
|
|
|
|
|
3699
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
3700
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] ) |
3701
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
3702
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] ) |
3703
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
3704
|
|
|
|
|
|
|
else { |
3705
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
3706
|
0
|
|
|
|
|
|
goto _match; |
3707
|
|
|
|
|
|
|
} |
3708
|
|
|
|
|
|
|
} |
3709
|
0
|
|
|
|
|
|
_trans += _klen; |
3710
|
|
|
|
|
|
|
} |
3711
|
|
|
|
|
|
|
|
3712
|
|
|
|
|
|
|
_match: |
3713
|
0
|
|
|
|
|
|
_trans = _NNPS_indicies[_trans]; |
3714
|
0
|
|
|
|
|
|
cs = _NNPS_trans_targs[_trans]; |
3715
|
|
|
|
|
|
|
|
3716
|
0
|
0
|
|
|
|
|
if ( _NNPS_trans_actions[_trans] == 0 ) |
3717
|
|
|
|
|
|
|
goto _again; |
3718
|
|
|
|
|
|
|
|
3719
|
0
|
|
|
|
|
|
_acts = _NNPS_actions + _NNPS_trans_actions[_trans]; |
3720
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
3721
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
3722
|
|
|
|
|
|
|
{ |
3723
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
3724
|
|
|
|
|
|
|
{ |
3725
|
|
|
|
|
|
|
case 0: |
3726
|
0
|
0
|
|
|
|
|
{ if (best > 'a') best = 'a', remove = 2, append = "AN"; } |
3727
|
|
|
|
|
|
|
break; |
3728
|
|
|
|
|
|
|
case 1: |
3729
|
0
|
0
|
|
|
|
|
{ if (best > 'b') best = 'b', remove = 2, append = "an"; } |
3730
|
|
|
|
|
|
|
break; |
3731
|
|
|
|
|
|
|
case 2: |
3732
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 1, append = nullptr; } |
3733
|
|
|
|
|
|
|
break; |
3734
|
|
|
|
|
|
|
case 3: |
3735
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 3, append = "FE"; } |
3736
|
|
|
|
|
|
|
break; |
3737
|
|
|
|
|
|
|
case 4: |
3738
|
0
|
0
|
|
|
|
|
{ if (best > 'e') best = 'e', remove = 3, append = "fe"; } |
3739
|
|
|
|
|
|
|
break; |
3740
|
|
|
|
|
|
|
case 5: |
3741
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 2, append = nullptr; } |
3742
|
|
|
|
|
|
|
break; |
3743
|
|
|
|
|
|
|
case 6: |
3744
|
0
|
0
|
|
|
|
|
{ if (best > 'g') best = 'g', remove = 1, append = nullptr; } |
3745
|
|
|
|
|
|
|
break; |
3746
|
|
|
|
|
|
|
case 7: |
3747
|
0
|
0
|
|
|
|
|
{ if (best > 'h') best = 'h', remove = 2, append = nullptr; } |
3748
|
|
|
|
|
|
|
break; |
3749
|
|
|
|
|
|
|
case 8: |
3750
|
0
|
0
|
|
|
|
|
{ if (best > 'i') best = 'i', remove = 1, append = nullptr; } |
3751
|
|
|
|
|
|
|
break; |
3752
|
|
|
|
|
|
|
case 9: |
3753
|
0
|
0
|
|
|
|
|
{ if (best > 'j') best = 'j', remove = 2, append = nullptr; } |
3754
|
|
|
|
|
|
|
break; |
3755
|
|
|
|
|
|
|
case 10: |
3756
|
0
|
0
|
|
|
|
|
{ if (best > 'k') best = 'k', remove = 1, append = nullptr; } |
3757
|
|
|
|
|
|
|
break; |
3758
|
|
|
|
|
|
|
case 11: |
3759
|
0
|
0
|
|
|
|
|
{ if (best > 'l') best = 'l', remove = 1, append = nullptr; } |
3760
|
|
|
|
|
|
|
break; |
3761
|
|
|
|
|
|
|
case 12: |
3762
|
0
|
0
|
|
|
|
|
{ if (best > 'm') best = 'm', remove = 2, append = nullptr; } |
3763
|
|
|
|
|
|
|
break; |
3764
|
|
|
|
|
|
|
case 13: |
3765
|
0
|
0
|
|
|
|
|
{ if (best > 'n') best = 'n', remove = 3, append = "Y"; } |
3766
|
|
|
|
|
|
|
break; |
3767
|
|
|
|
|
|
|
case 14: |
3768
|
0
|
0
|
|
|
|
|
{ if (best > 'o') best = 'o', remove = 3, append = "y"; } |
3769
|
|
|
|
|
|
|
break; |
3770
|
|
|
|
|
|
|
case 15: |
3771
|
0
|
0
|
|
|
|
|
{ if (best > 'p') best = 'p', remove = 2, append = nullptr; } |
3772
|
|
|
|
|
|
|
break; |
3773
|
|
|
|
|
|
|
case 16: |
3774
|
0
|
0
|
|
|
|
|
{ if (best > 'q') best = 'q', remove = 1, append = nullptr; } |
3775
|
|
|
|
|
|
|
break; |
3776
|
|
|
|
|
|
|
} |
3777
|
|
|
|
|
|
|
} |
3778
|
|
|
|
|
|
|
|
3779
|
|
|
|
|
|
|
_again: |
3780
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
3781
|
|
|
|
|
|
|
goto _out; |
3782
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form.c_str() + form.size())) ) |
3783
|
|
|
|
|
|
|
goto _resume; |
3784
|
|
|
|
|
|
|
_test_eof: {} |
3785
|
|
|
|
|
|
|
_out: {} |
3786
|
|
|
|
|
|
|
} |
3787
|
|
|
|
|
|
|
|
3788
|
0
|
0
|
|
|
|
|
add(NNPS, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas); |
|
|
0
|
|
|
|
|
|
3789
|
0
|
|
|
|
|
|
} |
3790
|
|
|
|
|
|
|
|
3791
|
|
|
|
|
|
|
static const char _VBG_actions[] = { |
3792
|
|
|
|
|
|
|
0, 1, 1, 1, 2, 1, 4, 1, |
3793
|
|
|
|
|
|
|
5, 1, 6, 1, 7, 1, 9, 1, |
3794
|
|
|
|
|
|
|
10, 1, 11, 1, 12, 1, 13, 1, |
3795
|
|
|
|
|
|
|
14, 1, 15, 1, 16, 1, 17, 2, |
3796
|
|
|
|
|
|
|
0, 12, 2, 3, 4, 2, 5, 9, |
3797
|
|
|
|
|
|
|
2, 5, 10, 2, 8, 9, 2, 9, |
3798
|
|
|
|
|
|
|
10, 2, 11, 12, 3, 0, 2, 12, |
3799
|
|
|
|
|
|
|
3, 2, 11, 12 |
3800
|
|
|
|
|
|
|
}; |
3801
|
|
|
|
|
|
|
|
3802
|
|
|
|
|
|
|
static const short _VBG_key_offsets[] = { |
3803
|
|
|
|
|
|
|
0, 0, 1, 2, 3, 9, 14, 24, |
3804
|
|
|
|
|
|
|
29, 34, 44, 46, 47, 48, 49, 50, |
3805
|
|
|
|
|
|
|
51, 52, 59, 66, 68, 70, 71, 72, |
3806
|
|
|
|
|
|
|
73, 74, 75, 76, 81, 89, 90, 91, |
3807
|
|
|
|
|
|
|
92, 93, 94, 96, 97, 98, 99, 100, |
3808
|
|
|
|
|
|
|
101, 102, 127, 127, 136, 137, 142, 153, |
3809
|
|
|
|
|
|
|
162, 171, 181, 186, 191, 197, 207, 207, |
3810
|
|
|
|
|
|
|
216, 228, 229, 240, 240, 249, 258, 267, |
3811
|
|
|
|
|
|
|
276, 285, 290, 302, 313, 318, 324, 334, |
3812
|
|
|
|
|
|
|
344, 355, 362, 373, 382, 391, 391, 402, |
3813
|
|
|
|
|
|
|
413, 415, 416, 417, 417, 418, 426, 437, |
3814
|
|
|
|
|
|
|
442, 448, 458, 468, 479, 486, 497, 504, |
3815
|
|
|
|
|
|
|
510, 519, 528, 537, 543 |
3816
|
|
|
|
|
|
|
}; |
3817
|
|
|
|
|
|
|
|
3818
|
|
|
|
|
|
|
static const char _VBG_trans_keys[] = { |
3819
|
|
|
|
|
|
|
103, 110, 105, 97, 101, 105, 111, 117, |
3820
|
|
|
|
|
|
|
121, 97, 101, 105, 111, 117, 98, 100, |
3821
|
|
|
|
|
|
|
102, 104, 106, 110, 112, 116, 118, 122, |
3822
|
|
|
|
|
|
|
97, 101, 105, 111, 117, 97, 101, 105, |
3823
|
|
|
|
|
|
|
111, 117, 98, 100, 102, 104, 106, 110, |
3824
|
|
|
|
|
|
|
112, 116, 118, 122, 98, 114, 105, 114, |
3825
|
|
|
|
|
|
|
112, 105, 109, 101, 97, 101, 105, 111, |
3826
|
|
|
|
|
|
|
117, 98, 122, 97, 101, 105, 111, 117, |
3827
|
|
|
|
|
|
|
98, 122, 97, 122, 98, 114, 105, 114, |
3828
|
|
|
|
|
|
|
112, 105, 109, 101, 97, 101, 105, 111, |
3829
|
|
|
|
|
|
|
117, 97, 101, 105, 110, 111, 115, 117, |
3830
|
|
|
|
|
|
|
120, 105, 112, 105, 109, 101, 98, 114, |
3831
|
|
|
|
|
|
|
105, 114, 112, 105, 109, 101, 98, 99, |
3832
|
|
|
|
|
|
|
100, 102, 103, 104, 106, 107, 108, 109, |
3833
|
|
|
|
|
|
|
110, 111, 112, 113, 114, 115, 116, 117, |
3834
|
|
|
|
|
|
|
118, 119, 120, 121, 122, 97, 105, 97, |
3835
|
|
|
|
|
|
|
98, 101, 105, 111, 117, 122, 99, 120, |
3836
|
|
|
|
|
|
|
113, 97, 101, 105, 111, 117, 98, 99, |
3837
|
|
|
|
|
|
|
100, 105, 111, 117, 122, 97, 101, 102, |
3838
|
|
|
|
|
|
|
120, 97, 100, 101, 105, 111, 117, 122, |
3839
|
|
|
|
|
|
|
98, 120, 97, 101, 102, 105, 111, 117, |
3840
|
|
|
|
|
|
|
122, 98, 120, 97, 101, 103, 105, 110, |
3841
|
|
|
|
|
|
|
111, 117, 122, 98, 120, 97, 101, 105, |
3842
|
|
|
|
|
|
|
111, 117, 101, 110, 111, 115, 120, 101, |
3843
|
|
|
|
|
|
|
110, 111, 112, 115, 120, 97, 101, 104, |
3844
|
|
|
|
|
|
|
105, 111, 116, 117, 122, 98, 120, 97, |
3845
|
|
|
|
|
|
|
101, 105, 106, 111, 117, 122, 98, 120, |
3846
|
|
|
|
|
|
|
98, 99, 100, 105, 107, 111, 117, 122, |
3847
|
|
|
|
|
|
|
97, 101, 102, 120, 105, 97, 101, 105, |
3848
|
|
|
|
|
|
|
108, 111, 114, 117, 119, 122, 98, 120, |
3849
|
|
|
|
|
|
|
97, 101, 105, 109, 111, 117, 122, 98, |
3850
|
|
|
|
|
|
|
120, 97, 101, 105, 110, 111, 117, 122, |
3851
|
|
|
|
|
|
|
98, 120, 97, 101, 105, 111, 112, 117, |
3852
|
|
|
|
|
|
|
122, 98, 120, 97, 101, 105, 111, 113, |
3853
|
|
|
|
|
|
|
117, 122, 98, 120, 97, 101, 105, 111, |
3854
|
|
|
|
|
|
|
114, 117, 122, 98, 120, 97, 101, 105, |
3855
|
|
|
|
|
|
|
111, 117, 98, 99, 100, 105, 108, 111, |
3856
|
|
|
|
|
|
|
116, 117, 97, 101, 102, 122, 101, 110, |
3857
|
|
|
|
|
|
|
111, 115, 120, 98, 104, 106, 116, 118, |
3858
|
|
|
|
|
|
|
122, 101, 110, 111, 115, 120, 101, 110, |
3859
|
|
|
|
|
|
|
111, 112, 115, 120, 101, 105, 110, 111, |
3860
|
|
|
|
|
|
|
115, 120, 98, 116, 118, 122, 101, 105, |
3861
|
|
|
|
|
|
|
110, 111, 115, 120, 98, 116, 118, 122, |
3862
|
|
|
|
|
|
|
101, 110, 111, 115, 120, 98, 104, 106, |
3863
|
|
|
|
|
|
|
116, 118, 122, 98, 101, 110, 111, 114, |
3864
|
|
|
|
|
|
|
115, 120, 101, 110, 111, 115, 120, 98, |
3865
|
|
|
|
|
|
|
104, 106, 116, 118, 122, 97, 101, 105, |
3866
|
|
|
|
|
|
|
111, 115, 117, 122, 98, 120, 97, 101, |
3867
|
|
|
|
|
|
|
105, 111, 116, 117, 122, 98, 120, 122, |
3868
|
|
|
|
|
|
|
98, 100, 102, 104, 106, 110, 112, 116, |
3869
|
|
|
|
|
|
|
118, 120, 122, 98, 100, 102, 104, 106, |
3870
|
|
|
|
|
|
|
110, 112, 116, 118, 120, 98, 114, 112, |
3871
|
|
|
|
|
|
|
114, 113, 97, 101, 105, 108, 111, 117, |
3872
|
|
|
|
|
|
|
98, 122, 101, 110, 111, 115, 120, 98, |
3873
|
|
|
|
|
|
|
104, 106, 116, 118, 122, 101, 110, 111, |
3874
|
|
|
|
|
|
|
115, 120, 101, 110, 111, 112, 115, 120, |
3875
|
|
|
|
|
|
|
101, 105, 110, 111, 115, 120, 98, 116, |
3876
|
|
|
|
|
|
|
118, 122, 101, 105, 110, 111, 115, 120, |
3877
|
|
|
|
|
|
|
98, 116, 118, 122, 101, 110, 111, 115, |
3878
|
|
|
|
|
|
|
120, 98, 104, 106, 116, 118, 122, 98, |
3879
|
|
|
|
|
|
|
101, 110, 111, 114, 115, 120, 101, 110, |
3880
|
|
|
|
|
|
|
111, 115, 120, 98, 104, 106, 116, 118, |
3881
|
|
|
|
|
|
|
122, 97, 101, 105, 111, 117, 98, 122, |
3882
|
|
|
|
|
|
|
97, 101, 105, 111, 117, 121, 97, 101, |
3883
|
|
|
|
|
|
|
105, 111, 117, 118, 122, 98, 120, 97, |
3884
|
|
|
|
|
|
|
101, 105, 111, 117, 119, 122, 98, 120, |
3885
|
|
|
|
|
|
|
97, 101, 105, 111, 117, 120, 122, 98, |
3886
|
|
|
|
|
|
|
119, 97, 101, 105, 111, 117, 121, 97, |
3887
|
|
|
|
|
|
|
101, 105, 111, 117, 121, 122, 98, 120, |
3888
|
|
|
|
|
|
|
0 |
3889
|
|
|
|
|
|
|
}; |
3890
|
|
|
|
|
|
|
|
3891
|
|
|
|
|
|
|
static const char _VBG_single_lengths[] = { |
3892
|
|
|
|
|
|
|
0, 1, 1, 1, 6, 5, 0, 5, |
3893
|
|
|
|
|
|
|
5, 0, 2, 1, 1, 1, 1, 1, |
3894
|
|
|
|
|
|
|
1, 5, 5, 0, 2, 1, 1, 1, |
3895
|
|
|
|
|
|
|
1, 1, 1, 5, 8, 1, 1, 1, |
3896
|
|
|
|
|
|
|
1, 1, 2, 1, 1, 1, 1, 1, |
3897
|
|
|
|
|
|
|
1, 23, 0, 7, 1, 5, 7, 7, |
3898
|
|
|
|
|
|
|
7, 8, 5, 5, 6, 8, 0, 7, |
3899
|
|
|
|
|
|
|
8, 1, 9, 0, 7, 7, 7, 7, |
3900
|
|
|
|
|
|
|
7, 5, 8, 5, 5, 6, 6, 6, |
3901
|
|
|
|
|
|
|
5, 7, 5, 7, 7, 0, 1, 1, |
3902
|
|
|
|
|
|
|
2, 1, 1, 0, 1, 6, 5, 5, |
3903
|
|
|
|
|
|
|
6, 6, 6, 5, 7, 5, 5, 6, |
3904
|
|
|
|
|
|
|
7, 7, 7, 6, 7 |
3905
|
|
|
|
|
|
|
}; |
3906
|
|
|
|
|
|
|
|
3907
|
|
|
|
|
|
|
static const char _VBG_range_lengths[] = { |
3908
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 5, 0, |
3909
|
|
|
|
|
|
|
0, 5, 0, 0, 0, 0, 0, 0, |
3910
|
|
|
|
|
|
|
0, 1, 1, 1, 0, 0, 0, 0, |
3911
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
3912
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
3913
|
|
|
|
|
|
|
0, 1, 0, 1, 0, 0, 2, 1, |
3914
|
|
|
|
|
|
|
1, 1, 0, 0, 0, 1, 0, 1, |
3915
|
|
|
|
|
|
|
2, 0, 1, 0, 1, 1, 1, 1, |
3916
|
|
|
|
|
|
|
1, 0, 2, 3, 0, 0, 2, 2, |
3917
|
|
|
|
|
|
|
3, 0, 3, 1, 1, 0, 5, 5, |
3918
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 1, 3, 0, |
3919
|
|
|
|
|
|
|
0, 2, 2, 3, 0, 3, 1, 0, |
3920
|
|
|
|
|
|
|
1, 1, 1, 0, 1 |
3921
|
|
|
|
|
|
|
}; |
3922
|
|
|
|
|
|
|
|
3923
|
|
|
|
|
|
|
static const short _VBG_index_offsets[] = { |
3924
|
|
|
|
|
|
|
0, 0, 2, 4, 6, 13, 19, 25, |
3925
|
|
|
|
|
|
|
31, 37, 43, 46, 48, 50, 52, 54, |
3926
|
|
|
|
|
|
|
56, 58, 65, 72, 74, 77, 79, 81, |
3927
|
|
|
|
|
|
|
83, 85, 87, 89, 95, 104, 106, 108, |
3928
|
|
|
|
|
|
|
110, 112, 114, 117, 119, 121, 123, 125, |
3929
|
|
|
|
|
|
|
127, 129, 154, 155, 164, 166, 172, 182, |
3930
|
|
|
|
|
|
|
191, 200, 210, 216, 222, 229, 239, 240, |
3931
|
|
|
|
|
|
|
249, 260, 262, 273, 274, 283, 292, 301, |
3932
|
|
|
|
|
|
|
310, 319, 325, 336, 345, 351, 358, 367, |
3933
|
|
|
|
|
|
|
376, 385, 393, 402, 411, 420, 421, 428, |
3934
|
|
|
|
|
|
|
435, 438, 440, 442, 443, 445, 453, 462, |
3935
|
|
|
|
|
|
|
468, 475, 484, 493, 502, 510, 519, 526, |
3936
|
|
|
|
|
|
|
533, 542, 551, 560, 567 |
3937
|
|
|
|
|
|
|
}; |
3938
|
|
|
|
|
|
|
|
3939
|
|
|
|
|
|
|
static const unsigned char _VBG_indicies[] = { |
3940
|
|
|
|
|
|
|
0, 1, 2, 1, 3, 1, 4, 4, |
3941
|
|
|
|
|
|
|
4, 4, 4, 4, 1, 5, 5, 5, |
3942
|
|
|
|
|
|
|
5, 6, 1, 7, 7, 7, 7, 7, |
3943
|
|
|
|
|
|
|
1, 8, 8, 8, 8, 9, 1, 5, |
3944
|
|
|
|
|
|
|
5, 5, 5, 10, 1, 11, 11, 11, |
3945
|
|
|
|
|
|
|
11, 11, 1, 11, 12, 1, 11, 1, |
3946
|
|
|
|
|
|
|
13, 1, 11, 1, 14, 1, 11, 1, |
3947
|
|
|
|
|
|
|
11, 1, 5, 5, 5, 5, 6, 15, |
3948
|
|
|
|
|
|
|
1, 5, 5, 5, 5, 6, 16, 1, |
3949
|
|
|
|
|
|
|
4, 1, 17, 18, 1, 17, 1, 19, |
3950
|
|
|
|
|
|
|
1, 17, 1, 20, 1, 17, 1, 17, |
3951
|
|
|
|
|
|
|
1, 21, 22, 21, 23, 24, 1, 25, |
3952
|
|
|
|
|
|
|
26, 25, 27, 28, 29, 25, 30, 1, |
3953
|
|
|
|
|
|
|
31, 1, 31, 1, 32, 1, 31, 1, |
3954
|
|
|
|
|
|
|
31, 1, 33, 34, 1, 33, 1, 35, |
3955
|
|
|
|
|
|
|
1, 33, 1, 36, 1, 33, 1, 33, |
3956
|
|
|
|
|
|
|
1, 38, 39, 40, 41, 42, 43, 44, |
3957
|
|
|
|
|
|
|
45, 46, 47, 48, 49, 50, 51, 52, |
3958
|
|
|
|
|
|
|
53, 54, 55, 56, 57, 58, 59, 60, |
3959
|
|
|
|
|
|
|
37, 1, 1, 61, 62, 61, 61, 61, |
3960
|
|
|
|
|
|
|
61, 63, 63, 1, 64, 1, 65, 65, |
3961
|
|
|
|
|
|
|
65, 65, 65, 1, 67, 68, 67, 66, |
3962
|
|
|
|
|
|
|
66, 66, 67, 66, 67, 1, 69, 62, |
3963
|
|
|
|
|
|
|
69, 69, 69, 69, 63, 63, 1, 61, |
3964
|
|
|
|
|
|
|
61, 62, 61, 61, 61, 63, 63, 1, |
3965
|
|
|
|
|
|
|
66, 66, 68, 66, 70, 66, 66, 67, |
3966
|
|
|
|
|
|
|
67, 1, 71, 71, 71, 71, 71, 1, |
3967
|
|
|
|
|
|
|
72, 73, 74, 75, 76, 1, 72, 73, |
3968
|
|
|
|
|
|
|
74, 11, 75, 76, 1, 61, 61, 62, |
3969
|
|
|
|
|
|
|
61, 61, 77, 61, 63, 63, 1, 78, |
3970
|
|
|
|
|
|
|
61, 61, 61, 62, 61, 61, 63, 63, |
3971
|
|
|
|
|
|
|
1, 63, 79, 63, 61, 62, 61, 61, |
3972
|
|
|
|
|
|
|
63, 61, 63, 1, 7, 1, 61, 61, |
3973
|
|
|
|
|
|
|
61, 68, 61, 80, 61, 80, 67, 67, |
3974
|
|
|
|
|
|
|
1, 5, 61, 61, 61, 62, 61, 61, |
3975
|
|
|
|
|
|
|
63, 63, 1, 81, 81, 82, 62, 81, |
3976
|
|
|
|
|
|
|
81, 63, 63, 1, 81, 81, 81, 81, |
3977
|
|
|
|
|
|
|
62, 81, 63, 63, 1, 61, 61, 61, |
3978
|
|
|
|
|
|
|
61, 62, 61, 63, 63, 1, 61, 83, |
3979
|
|
|
|
|
|
|
61, 84, 62, 61, 63, 63, 1, 5, |
3980
|
|
|
|
|
|
|
5, 5, 5, 6, 1, 85, 86, 85, |
3981
|
|
|
|
|
|
|
5, 86, 5, 86, 6, 5, 85, 1, |
3982
|
|
|
|
|
|
|
87, 88, 89, 90, 91, 85, 85, 85, |
3983
|
|
|
|
|
|
|
1, 87, 92, 89, 93, 94, 1, 87, |
3984
|
|
|
|
|
|
|
92, 89, 17, 93, 94, 1, 87, 17, |
3985
|
|
|
|
|
|
|
88, 89, 90, 91, 85, 85, 1, 87, |
3986
|
|
|
|
|
|
|
20, 88, 89, 90, 91, 85, 85, 1, |
3987
|
|
|
|
|
|
|
95, 88, 89, 90, 91, 85, 85, 85, |
3988
|
|
|
|
|
|
|
1, 17, 87, 92, 89, 18, 93, 94, |
3989
|
|
|
|
|
|
|
1, 87, 97, 89, 98, 99, 96, 96, |
3990
|
|
|
|
|
|
|
96, 1, 66, 66, 66, 66, 100, 66, |
3991
|
|
|
|
|
|
|
67, 67, 1, 101, 102, 103, 61, 62, |
3992
|
|
|
|
|
|
|
61, 63, 63, 1, 104, 106, 106, 106, |
3993
|
|
|
|
|
|
|
106, 106, 106, 105, 107, 107, 107, 107, |
3994
|
|
|
|
|
|
|
107, 107, 1, 31, 108, 1, 31, 1, |
3995
|
|
|
|
|
|
|
109, 1, 105, 110, 104, 5, 5, 5, |
3996
|
|
|
|
|
|
|
112, 5, 6, 111, 1, 113, 114, 115, |
3997
|
|
|
|
|
|
|
116, 117, 111, 111, 111, 1, 113, 118, |
3998
|
|
|
|
|
|
|
115, 119, 120, 1, 113, 118, 115, 33, |
3999
|
|
|
|
|
|
|
119, 120, 1, 113, 33, 114, 115, 116, |
4000
|
|
|
|
|
|
|
117, 111, 111, 1, 113, 36, 114, 115, |
4001
|
|
|
|
|
|
|
116, 117, 111, 111, 1, 121, 114, 115, |
4002
|
|
|
|
|
|
|
116, 117, 111, 111, 111, 1, 33, 113, |
4003
|
|
|
|
|
|
|
118, 115, 34, 119, 120, 1, 113, 123, |
4004
|
|
|
|
|
|
|
115, 124, 125, 122, 122, 122, 1, 5, |
4005
|
|
|
|
|
|
|
5, 5, 5, 6, 111, 1, 4, 4, |
4006
|
|
|
|
|
|
|
4, 4, 4, 4, 1, 66, 66, 66, |
4007
|
|
|
|
|
|
|
66, 66, 68, 67, 67, 1, 81, 81, |
4008
|
|
|
|
|
|
|
81, 81, 81, 62, 63, 63, 1, 81, |
4009
|
|
|
|
|
|
|
81, 81, 81, 81, 62, 63, 63, 1, |
4010
|
|
|
|
|
|
|
126, 126, 126, 126, 126, 4, 1, 127, |
4011
|
|
|
|
|
|
|
127, 127, 127, 127, 129, 130, 128, 1, |
4012
|
|
|
|
|
|
|
0 |
4013
|
|
|
|
|
|
|
}; |
4014
|
|
|
|
|
|
|
|
4015
|
|
|
|
|
|
|
static const char _VBG_trans_targs[] = { |
4016
|
|
|
|
|
|
|
2, 0, 3, 41, 42, 42, 44, 42, |
4017
|
|
|
|
|
|
|
42, 44, 44, 51, 52, 13, 15, 42, |
4018
|
|
|
|
|
|
|
42, 68, 69, 23, 25, 77, 78, 83, |
4019
|
|
|
|
|
|
|
84, 42, 80, 29, 82, 31, 33, 42, |
4020
|
|
|
|
|
|
|
32, 87, 88, 37, 39, 4, 43, 46, |
4021
|
|
|
|
|
|
|
47, 48, 49, 53, 55, 56, 58, 60, |
4022
|
|
|
|
|
|
|
61, 19, 62, 63, 64, 75, 76, 95, |
4023
|
|
|
|
|
|
|
96, 97, 98, 99, 100, 5, 45, 42, |
4024
|
|
|
|
|
|
|
42, 6, 7, 42, 45, 8, 50, 9, |
4025
|
|
|
|
|
|
|
10, 11, 12, 14, 16, 54, 42, 57, |
4026
|
|
|
|
|
|
|
59, 17, 18, 65, 66, 67, 74, 20, |
4027
|
|
|
|
|
|
|
70, 22, 71, 72, 21, 24, 26, 73, |
4028
|
|
|
|
|
|
|
67, 70, 71, 72, 45, 27, 85, 94, |
4029
|
|
|
|
|
|
|
42, 42, 79, 28, 81, 30, 42, 86, |
4030
|
|
|
|
|
|
|
93, 34, 89, 36, 90, 91, 35, 38, |
4031
|
|
|
|
|
|
|
40, 92, 86, 89, 90, 91, 65, 65, |
4032
|
|
|
|
|
|
|
42, 42, 45 |
4033
|
|
|
|
|
|
|
}; |
4034
|
|
|
|
|
|
|
|
4035
|
|
|
|
|
|
|
static const char _VBG_trans_actions[] = { |
4036
|
|
|
|
|
|
|
0, 0, 0, 29, 23, 15, 15, 3, |
4037
|
|
|
|
|
|
|
46, 46, 40, 0, 0, 0, 0, 5, |
4038
|
|
|
|
|
|
|
34, 0, 0, 0, 0, 15, 15, 15, |
4039
|
|
|
|
|
|
|
15, 11, 11, 0, 11, 0, 0, 9, |
4040
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
4041
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
4042
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 21, |
4043
|
|
|
|
|
|
|
0, 0, 0, 23, 0, 0, 19, 19, |
4044
|
|
|
|
|
|
|
7, 0, 0, 49, 49, 0, 49, 0, |
4045
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 19, 17, 19, |
4046
|
|
|
|
|
|
|
49, 0, 0, 27, 27, 0, 0, 0, |
4047
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
4048
|
|
|
|
|
|
|
25, 25, 25, 25, 56, 0, 9, 9, |
4049
|
|
|
|
|
|
|
13, 43, 43, 0, 9, 0, 37, 0, |
4050
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
4051
|
|
|
|
|
|
|
0, 0, 7, 7, 7, 7, 23, 1, |
4052
|
|
|
|
|
|
|
31, 1, 52 |
4053
|
|
|
|
|
|
|
}; |
4054
|
|
|
|
|
|
|
|
4055
|
|
|
|
|
|
|
static const char _VBG_eof_actions[] = { |
4056
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
4057
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
4058
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
4059
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
4060
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
4061
|
|
|
|
|
|
|
0, 0, 0, 3, 0, 0, 3, 3, |
4062
|
|
|
|
|
|
|
3, 3, 0, 3, 3, 3, 0, 3, |
4063
|
|
|
|
|
|
|
3, 0, 3, 0, 3, 3, 3, 3, |
4064
|
|
|
|
|
|
|
3, 0, 0, 25, 25, 25, 25, 25, |
4065
|
|
|
|
|
|
|
25, 25, 25, 3, 3, 0, 0, 0, |
4066
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 7, 7, |
4067
|
|
|
|
|
|
|
7, 7, 7, 7, 7, 7, 0, 0, |
4068
|
|
|
|
|
|
|
3, 3, 3, 0, 3 |
4069
|
|
|
|
|
|
|
}; |
4070
|
|
|
|
|
|
|
|
4071
|
|
|
|
|
|
|
static const int VBG_start = 1; |
4072
|
|
|
|
|
|
|
|
4073
|
0
|
|
|
|
|
|
void english_morpho_guesser::add_VBG(const string& form, vector& lemmas) const { |
4074
|
|
|
|
|
|
|
const char* p = form.c_str(); int cs; |
4075
|
|
|
|
|
|
|
char best = 'z'; unsigned remove = 0; const char* append = nullptr; |
4076
|
|
|
|
|
|
|
|
4077
|
|
|
|
|
|
|
{ |
4078
|
|
|
|
|
|
|
cs = VBG_start; |
4079
|
|
|
|
|
|
|
} |
4080
|
|
|
|
|
|
|
|
4081
|
|
|
|
|
|
|
{ |
4082
|
|
|
|
|
|
|
int _klen; |
4083
|
|
|
|
|
|
|
unsigned int _trans; |
4084
|
|
|
|
|
|
|
const char *_acts; |
4085
|
|
|
|
|
|
|
unsigned int _nacts; |
4086
|
|
|
|
|
|
|
const char *_keys; |
4087
|
|
|
|
|
|
|
|
4088
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
4089
|
|
|
|
|
|
|
goto _test_eof; |
4090
|
|
|
|
|
|
|
if ( cs == 0 ) |
4091
|
|
|
|
|
|
|
goto _out; |
4092
|
|
|
|
|
|
|
_resume: |
4093
|
0
|
|
|
|
|
|
_keys = _VBG_trans_keys + _VBG_key_offsets[cs]; |
4094
|
0
|
|
|
|
|
|
_trans = _VBG_index_offsets[cs]; |
4095
|
|
|
|
|
|
|
|
4096
|
0
|
|
|
|
|
|
_klen = _VBG_single_lengths[cs]; |
4097
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
4098
|
|
|
|
|
|
|
const char *_lower = _keys; |
4099
|
|
|
|
|
|
|
const char *_mid; |
4100
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
4101
|
|
|
|
|
|
|
while (1) { |
4102
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
4103
|
|
|
|
|
|
|
break; |
4104
|
|
|
|
|
|
|
|
4105
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
4106
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid ) |
4107
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
4108
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid ) |
4109
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
4110
|
|
|
|
|
|
|
else { |
4111
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
4112
|
0
|
|
|
|
|
|
goto _match; |
4113
|
|
|
|
|
|
|
} |
4114
|
|
|
|
|
|
|
} |
4115
|
0
|
|
|
|
|
|
_keys += _klen; |
4116
|
0
|
|
|
|
|
|
_trans += _klen; |
4117
|
|
|
|
|
|
|
} |
4118
|
|
|
|
|
|
|
|
4119
|
0
|
|
|
|
|
|
_klen = _VBG_range_lengths[cs]; |
4120
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
4121
|
|
|
|
|
|
|
const char *_lower = _keys; |
4122
|
|
|
|
|
|
|
const char *_mid; |
4123
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
4124
|
|
|
|
|
|
|
while (1) { |
4125
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
4126
|
|
|
|
|
|
|
break; |
4127
|
|
|
|
|
|
|
|
4128
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
4129
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] ) |
4130
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
4131
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] ) |
4132
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
4133
|
|
|
|
|
|
|
else { |
4134
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
4135
|
0
|
|
|
|
|
|
goto _match; |
4136
|
|
|
|
|
|
|
} |
4137
|
|
|
|
|
|
|
} |
4138
|
0
|
|
|
|
|
|
_trans += _klen; |
4139
|
|
|
|
|
|
|
} |
4140
|
|
|
|
|
|
|
|
4141
|
|
|
|
|
|
|
_match: |
4142
|
0
|
|
|
|
|
|
_trans = _VBG_indicies[_trans]; |
4143
|
0
|
|
|
|
|
|
cs = _VBG_trans_targs[_trans]; |
4144
|
|
|
|
|
|
|
|
4145
|
0
|
0
|
|
|
|
|
if ( _VBG_trans_actions[_trans] == 0 ) |
4146
|
|
|
|
|
|
|
goto _again; |
4147
|
|
|
|
|
|
|
|
4148
|
0
|
|
|
|
|
|
_acts = _VBG_actions + _VBG_trans_actions[_trans]; |
4149
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
4150
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
4151
|
|
|
|
|
|
|
{ |
4152
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
4153
|
|
|
|
|
|
|
{ |
4154
|
|
|
|
|
|
|
case 0: |
4155
|
0
|
0
|
|
|
|
|
{ if (best > 'a') best = 'a', remove = 3, append = nullptr; } |
4156
|
|
|
|
|
|
|
break; |
4157
|
|
|
|
|
|
|
case 1: |
4158
|
0
|
0
|
|
|
|
|
{ if (best > 'b') best = 'b', remove = 3, append = "e"; } |
4159
|
|
|
|
|
|
|
break; |
4160
|
|
|
|
|
|
|
case 2: |
4161
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 3, append = nullptr; } |
4162
|
|
|
|
|
|
|
break; |
4163
|
|
|
|
|
|
|
case 3: |
4164
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 3, append = "e"; } |
4165
|
|
|
|
|
|
|
break; |
4166
|
|
|
|
|
|
|
case 4: |
4167
|
0
|
0
|
|
|
|
|
{ if (best > 'e') best = 'e', remove = 3, append = nullptr; } |
4168
|
|
|
|
|
|
|
break; |
4169
|
|
|
|
|
|
|
case 5: |
4170
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 3, append = "e"; } |
4171
|
|
|
|
|
|
|
break; |
4172
|
|
|
|
|
|
|
case 6: |
4173
|
0
|
0
|
|
|
|
|
{ if (best > 'g') best = 'g', remove = 3, append = nullptr; } |
4174
|
|
|
|
|
|
|
break; |
4175
|
|
|
|
|
|
|
case 7: |
4176
|
0
|
0
|
|
|
|
|
{ if (best > 'h') best = 'h', remove = 3, append = "e"; } |
4177
|
|
|
|
|
|
|
break; |
4178
|
|
|
|
|
|
|
case 8: |
4179
|
0
|
0
|
|
|
|
|
{ if (best > 'i') best = 'i', remove = 3, append = nullptr; } |
4180
|
|
|
|
|
|
|
break; |
4181
|
|
|
|
|
|
|
case 9: |
4182
|
0
|
0
|
|
|
|
|
{ if (best > 'j') best = 'j', remove = 3, append = "e"; } |
4183
|
|
|
|
|
|
|
break; |
4184
|
|
|
|
|
|
|
case 10: |
4185
|
0
|
0
|
|
|
|
|
{ if (best > 'k') best = 'k', remove = 3, append = nullptr; } |
4186
|
|
|
|
|
|
|
break; |
4187
|
|
|
|
|
|
|
case 11: |
4188
|
0
|
0
|
|
|
|
|
{ if (best > 'l') best = 'l', remove = 3, append = "e"; } |
4189
|
|
|
|
|
|
|
break; |
4190
|
|
|
|
|
|
|
case 12: |
4191
|
0
|
0
|
|
|
|
|
{ if (best > 'm') best = 'm', remove = 3, append = nullptr; } |
4192
|
|
|
|
|
|
|
break; |
4193
|
|
|
|
|
|
|
case 13: |
4194
|
0
|
0
|
|
|
|
|
{ if (best > 'n') best = 'n', remove = 3, append = "e"; } |
4195
|
|
|
|
|
|
|
break; |
4196
|
|
|
|
|
|
|
case 14: |
4197
|
0
|
0
|
|
|
|
|
{ if (best > 'o') best = 'o', remove = 3, append = nullptr; } |
4198
|
|
|
|
|
|
|
break; |
4199
|
|
|
|
|
|
|
case 15: |
4200
|
0
|
0
|
|
|
|
|
{ if (best > 'p') best = 'p', remove = 3, append = "e"; } |
4201
|
|
|
|
|
|
|
break; |
4202
|
|
|
|
|
|
|
case 16: |
4203
|
0
|
0
|
|
|
|
|
{ if (best > 'q') best = 'q', remove = 3, append = nullptr; } |
4204
|
|
|
|
|
|
|
break; |
4205
|
|
|
|
|
|
|
case 17: |
4206
|
0
|
0
|
|
|
|
|
{ if (best > 'r') best = 'r', remove = 3, append = "e"; } |
4207
|
|
|
|
|
|
|
break; |
4208
|
|
|
|
|
|
|
} |
4209
|
|
|
|
|
|
|
} |
4210
|
|
|
|
|
|
|
|
4211
|
|
|
|
|
|
|
_again: |
4212
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
4213
|
|
|
|
|
|
|
goto _out; |
4214
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form.c_str() + form.size())) ) |
4215
|
|
|
|
|
|
|
goto _resume; |
4216
|
|
|
|
|
|
|
_test_eof: {} |
4217
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
4218
|
|
|
|
|
|
|
{ |
4219
|
0
|
|
|
|
|
|
const char *__acts = _VBG_actions + _VBG_eof_actions[cs]; |
4220
|
0
|
|
|
|
|
|
unsigned int __nacts = (unsigned int) *__acts++; |
4221
|
0
|
0
|
|
|
|
|
while ( __nacts-- > 0 ) { |
4222
|
0
|
|
|
|
|
|
switch ( *__acts++ ) { |
4223
|
|
|
|
|
|
|
case 2: |
4224
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 3, append = nullptr; } |
4225
|
|
|
|
|
|
|
break; |
4226
|
|
|
|
|
|
|
case 5: |
4227
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 3, append = "e"; } |
4228
|
|
|
|
|
|
|
break; |
4229
|
|
|
|
|
|
|
case 15: |
4230
|
0
|
0
|
|
|
|
|
{ if (best > 'p') best = 'p', remove = 3, append = "e"; } |
4231
|
|
|
|
|
|
|
break; |
4232
|
|
|
|
|
|
|
} |
4233
|
|
|
|
|
|
|
} |
4234
|
|
|
|
|
|
|
} |
4235
|
|
|
|
|
|
|
|
4236
|
|
|
|
|
|
|
_out: {} |
4237
|
|
|
|
|
|
|
} |
4238
|
|
|
|
|
|
|
|
4239
|
0
|
0
|
|
|
|
|
add(VBG, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas); |
|
|
0
|
|
|
|
|
|
4240
|
0
|
|
|
|
|
|
} |
4241
|
|
|
|
|
|
|
|
4242
|
|
|
|
|
|
|
static const char _VBD_VBN_actions[] = { |
4243
|
|
|
|
|
|
|
0, 1, 0, 1, 2, 1, 3, 1, |
4244
|
|
|
|
|
|
|
4, 1, 5, 1, 6, 1, 7, 1, |
4245
|
|
|
|
|
|
|
8, 1, 9, 1, 10, 1, 11, 1, |
4246
|
|
|
|
|
|
|
13, 1, 14, 1, 15, 1, 16, 1, |
4247
|
|
|
|
|
|
|
17, 2, 1, 16, 2, 4, 5, 2, |
4248
|
|
|
|
|
|
|
8, 16, 2, 9, 13, 2, 9, 14, |
4249
|
|
|
|
|
|
|
2, 12, 13, 2, 13, 14, 2, 15, |
4250
|
|
|
|
|
|
|
16, 3, 1, 3, 16, 3, 3, 15, |
4251
|
|
|
|
|
|
|
16 |
4252
|
|
|
|
|
|
|
}; |
4253
|
|
|
|
|
|
|
|
4254
|
|
|
|
|
|
|
static const short _VBD_VBN_key_offsets[] = { |
4255
|
|
|
|
|
|
|
0, 0, 2, 3, 9, 14, 24, 29, |
4256
|
|
|
|
|
|
|
34, 44, 46, 47, 48, 49, 50, 51, |
4257
|
|
|
|
|
|
|
52, 60, 67, 74, 76, 77, 78, 79, |
4258
|
|
|
|
|
|
|
80, 81, 82, 87, 95, 96, 97, 98, |
4259
|
|
|
|
|
|
|
99, 100, 102, 103, 104, 105, 106, 107, |
4260
|
|
|
|
|
|
|
108, 114, 115, 140, 140, 149, 150, 155, |
4261
|
|
|
|
|
|
|
166, 175, 184, 194, 199, 204, 210, 220, |
4262
|
|
|
|
|
|
|
220, 229, 241, 242, 253, 253, 262, 271, |
4263
|
|
|
|
|
|
|
280, 289, 298, 303, 316, 327, 332, 338, |
4264
|
|
|
|
|
|
|
348, 358, 369, 376, 387, 396, 405, 405, |
4265
|
|
|
|
|
|
|
416, 427, 429, 430, 431, 431, 432, 440, |
4266
|
|
|
|
|
|
|
451, 456, 462, 472, 482, 493, 500, 511, |
4267
|
|
|
|
|
|
|
518, 524, 533, 542, 551 |
4268
|
|
|
|
|
|
|
}; |
4269
|
|
|
|
|
|
|
|
4270
|
|
|
|
|
|
|
static const char _VBD_VBN_trans_keys[] = { |
4271
|
|
|
|
|
|
|
100, 110, 101, 97, 101, 105, 111, 117, |
4272
|
|
|
|
|
|
|
121, 97, 101, 105, 111, 117, 98, 100, |
4273
|
|
|
|
|
|
|
102, 104, 106, 110, 112, 116, 118, 122, |
4274
|
|
|
|
|
|
|
97, 101, 105, 111, 117, 97, 101, 105, |
4275
|
|
|
|
|
|
|
111, 117, 98, 100, 102, 104, 106, 110, |
4276
|
|
|
|
|
|
|
112, 116, 118, 122, 98, 114, 105, 114, |
4277
|
|
|
|
|
|
|
112, 105, 109, 101, 97, 101, 105, 111, |
4278
|
|
|
|
|
|
|
117, 121, 98, 122, 97, 101, 105, 111, |
4279
|
|
|
|
|
|
|
117, 98, 122, 97, 101, 105, 111, 117, |
4280
|
|
|
|
|
|
|
98, 122, 98, 114, 105, 114, 112, 105, |
4281
|
|
|
|
|
|
|
109, 101, 97, 101, 105, 111, 117, 97, |
4282
|
|
|
|
|
|
|
101, 105, 110, 111, 115, 117, 120, 105, |
4283
|
|
|
|
|
|
|
112, 105, 109, 101, 98, 114, 105, 114, |
4284
|
|
|
|
|
|
|
112, 105, 109, 101, 97, 101, 105, 111, |
4285
|
|
|
|
|
|
|
117, 121, 101, 98, 99, 100, 102, 103, |
4286
|
|
|
|
|
|
|
104, 105, 106, 107, 108, 109, 110, 112, |
4287
|
|
|
|
|
|
|
113, 114, 115, 116, 117, 118, 119, 120, |
4288
|
|
|
|
|
|
|
121, 122, 97, 111, 97, 98, 101, 105, |
4289
|
|
|
|
|
|
|
111, 117, 122, 99, 120, 113, 97, 101, |
4290
|
|
|
|
|
|
|
105, 111, 117, 98, 99, 100, 105, 111, |
4291
|
|
|
|
|
|
|
117, 122, 97, 101, 102, 120, 97, 100, |
4292
|
|
|
|
|
|
|
101, 105, 111, 117, 122, 98, 120, 97, |
4293
|
|
|
|
|
|
|
101, 102, 105, 111, 117, 122, 98, 120, |
4294
|
|
|
|
|
|
|
97, 101, 103, 105, 110, 111, 117, 122, |
4295
|
|
|
|
|
|
|
98, 120, 97, 101, 105, 111, 117, 101, |
4296
|
|
|
|
|
|
|
110, 111, 115, 120, 101, 110, 111, 112, |
4297
|
|
|
|
|
|
|
115, 120, 97, 101, 104, 105, 111, 116, |
4298
|
|
|
|
|
|
|
117, 122, 98, 120, 97, 101, 105, 106, |
4299
|
|
|
|
|
|
|
111, 117, 122, 98, 120, 98, 99, 100, |
4300
|
|
|
|
|
|
|
105, 107, 111, 117, 122, 97, 101, 102, |
4301
|
|
|
|
|
|
|
120, 105, 97, 101, 105, 108, 111, 114, |
4302
|
|
|
|
|
|
|
117, 119, 122, 98, 120, 97, 101, 105, |
4303
|
|
|
|
|
|
|
109, 111, 117, 122, 98, 120, 97, 101, |
4304
|
|
|
|
|
|
|
105, 110, 111, 117, 122, 98, 120, 97, |
4305
|
|
|
|
|
|
|
101, 105, 111, 112, 117, 122, 98, 120, |
4306
|
|
|
|
|
|
|
97, 101, 105, 111, 113, 117, 122, 98, |
4307
|
|
|
|
|
|
|
120, 97, 101, 105, 111, 114, 117, 122, |
4308
|
|
|
|
|
|
|
98, 120, 97, 101, 105, 111, 117, 98, |
4309
|
|
|
|
|
|
|
99, 100, 105, 108, 110, 111, 116, 117, |
4310
|
|
|
|
|
|
|
97, 101, 102, 122, 101, 110, 111, 115, |
4311
|
|
|
|
|
|
|
120, 98, 104, 106, 116, 118, 122, 101, |
4312
|
|
|
|
|
|
|
110, 111, 115, 120, 101, 110, 111, 112, |
4313
|
|
|
|
|
|
|
115, 120, 101, 105, 110, 111, 115, 120, |
4314
|
|
|
|
|
|
|
98, 116, 118, 122, 101, 105, 110, 111, |
4315
|
|
|
|
|
|
|
115, 120, 98, 116, 118, 122, 101, 110, |
4316
|
|
|
|
|
|
|
111, 115, 120, 98, 104, 106, 116, 118, |
4317
|
|
|
|
|
|
|
122, 98, 101, 110, 111, 114, 115, 120, |
4318
|
|
|
|
|
|
|
101, 110, 111, 115, 120, 98, 104, 106, |
4319
|
|
|
|
|
|
|
116, 118, 122, 97, 101, 105, 111, 115, |
4320
|
|
|
|
|
|
|
117, 122, 98, 120, 97, 101, 105, 111, |
4321
|
|
|
|
|
|
|
116, 117, 122, 98, 120, 122, 98, 100, |
4322
|
|
|
|
|
|
|
102, 104, 106, 110, 112, 116, 118, 120, |
4323
|
|
|
|
|
|
|
122, 98, 100, 102, 104, 106, 110, 112, |
4324
|
|
|
|
|
|
|
116, 118, 120, 98, 114, 112, 114, 113, |
4325
|
|
|
|
|
|
|
97, 101, 105, 108, 111, 117, 98, 122, |
4326
|
|
|
|
|
|
|
101, 110, 111, 115, 120, 98, 104, 106, |
4327
|
|
|
|
|
|
|
116, 118, 122, 101, 110, 111, 115, 120, |
4328
|
|
|
|
|
|
|
101, 110, 111, 112, 115, 120, 101, 105, |
4329
|
|
|
|
|
|
|
110, 111, 115, 120, 98, 116, 118, 122, |
4330
|
|
|
|
|
|
|
101, 105, 110, 111, 115, 120, 98, 116, |
4331
|
|
|
|
|
|
|
118, 122, 101, 110, 111, 115, 120, 98, |
4332
|
|
|
|
|
|
|
104, 106, 116, 118, 122, 98, 101, 110, |
4333
|
|
|
|
|
|
|
111, 114, 115, 120, 101, 110, 111, 115, |
4334
|
|
|
|
|
|
|
120, 98, 104, 106, 116, 118, 122, 97, |
4335
|
|
|
|
|
|
|
101, 105, 111, 117, 98, 122, 97, 101, |
4336
|
|
|
|
|
|
|
105, 111, 117, 121, 97, 101, 105, 111, |
4337
|
|
|
|
|
|
|
117, 118, 122, 98, 120, 97, 101, 105, |
4338
|
|
|
|
|
|
|
111, 117, 119, 122, 98, 120, 97, 101, |
4339
|
|
|
|
|
|
|
105, 111, 117, 120, 122, 98, 119, 97, |
4340
|
|
|
|
|
|
|
101, 105, 111, 117, 121, 122, 98, 120, |
4341
|
|
|
|
|
|
|
0 |
4342
|
|
|
|
|
|
|
}; |
4343
|
|
|
|
|
|
|
|
4344
|
|
|
|
|
|
|
static const char _VBD_VBN_single_lengths[] = { |
4345
|
|
|
|
|
|
|
0, 2, 1, 6, 5, 0, 5, 5, |
4346
|
|
|
|
|
|
|
0, 2, 1, 1, 1, 1, 1, 1, |
4347
|
|
|
|
|
|
|
6, 5, 5, 2, 1, 1, 1, 1, |
4348
|
|
|
|
|
|
|
1, 1, 5, 8, 1, 1, 1, 1, |
4349
|
|
|
|
|
|
|
1, 2, 1, 1, 1, 1, 1, 1, |
4350
|
|
|
|
|
|
|
6, 1, 23, 0, 7, 1, 5, 7, |
4351
|
|
|
|
|
|
|
7, 7, 8, 5, 5, 6, 8, 0, |
4352
|
|
|
|
|
|
|
7, 8, 1, 9, 0, 7, 7, 7, |
4353
|
|
|
|
|
|
|
7, 7, 5, 9, 5, 5, 6, 6, |
4354
|
|
|
|
|
|
|
6, 5, 7, 5, 7, 7, 0, 1, |
4355
|
|
|
|
|
|
|
1, 2, 1, 1, 0, 1, 6, 5, |
4356
|
|
|
|
|
|
|
5, 6, 6, 6, 5, 7, 5, 5, |
4357
|
|
|
|
|
|
|
6, 7, 7, 7, 7 |
4358
|
|
|
|
|
|
|
}; |
4359
|
|
|
|
|
|
|
|
4360
|
|
|
|
|
|
|
static const char _VBD_VBN_range_lengths[] = { |
4361
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 5, 0, 0, |
4362
|
|
|
|
|
|
|
5, 0, 0, 0, 0, 0, 0, 0, |
4363
|
|
|
|
|
|
|
1, 1, 1, 0, 0, 0, 0, 0, |
4364
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
4365
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
4366
|
|
|
|
|
|
|
0, 0, 1, 0, 1, 0, 0, 2, |
4367
|
|
|
|
|
|
|
1, 1, 1, 0, 0, 0, 1, 0, |
4368
|
|
|
|
|
|
|
1, 2, 0, 1, 0, 1, 1, 1, |
4369
|
|
|
|
|
|
|
1, 1, 0, 2, 3, 0, 0, 2, |
4370
|
|
|
|
|
|
|
2, 3, 0, 3, 1, 1, 0, 5, |
4371
|
|
|
|
|
|
|
5, 0, 0, 0, 0, 0, 1, 3, |
4372
|
|
|
|
|
|
|
0, 0, 2, 2, 3, 0, 3, 1, |
4373
|
|
|
|
|
|
|
0, 1, 1, 1, 1 |
4374
|
|
|
|
|
|
|
}; |
4375
|
|
|
|
|
|
|
|
4376
|
|
|
|
|
|
|
static const short _VBD_VBN_index_offsets[] = { |
4377
|
|
|
|
|
|
|
0, 0, 3, 5, 12, 18, 24, 30, |
4378
|
|
|
|
|
|
|
36, 42, 45, 47, 49, 51, 53, 55, |
4379
|
|
|
|
|
|
|
57, 65, 72, 79, 82, 84, 86, 88, |
4380
|
|
|
|
|
|
|
90, 92, 94, 100, 109, 111, 113, 115, |
4381
|
|
|
|
|
|
|
117, 119, 122, 124, 126, 128, 130, 132, |
4382
|
|
|
|
|
|
|
134, 141, 143, 168, 169, 178, 180, 186, |
4383
|
|
|
|
|
|
|
196, 205, 214, 224, 230, 236, 243, 253, |
4384
|
|
|
|
|
|
|
254, 263, 274, 276, 287, 288, 297, 306, |
4385
|
|
|
|
|
|
|
315, 324, 333, 339, 351, 360, 366, 373, |
4386
|
|
|
|
|
|
|
382, 391, 400, 408, 417, 426, 435, 436, |
4387
|
|
|
|
|
|
|
443, 450, 453, 455, 457, 458, 460, 468, |
4388
|
|
|
|
|
|
|
477, 483, 490, 499, 508, 517, 525, 534, |
4389
|
|
|
|
|
|
|
541, 548, 557, 566, 575 |
4390
|
|
|
|
|
|
|
}; |
4391
|
|
|
|
|
|
|
|
4392
|
|
|
|
|
|
|
static const unsigned char _VBD_VBN_indicies[] = { |
4393
|
|
|
|
|
|
|
0, 2, 1, 3, 1, 4, 4, 4, |
4394
|
|
|
|
|
|
|
4, 4, 4, 1, 5, 5, 5, 5, |
4395
|
|
|
|
|
|
|
6, 1, 7, 7, 7, 7, 7, 1, |
4396
|
|
|
|
|
|
|
8, 8, 8, 8, 9, 1, 5, 5, |
4397
|
|
|
|
|
|
|
5, 5, 10, 1, 11, 11, 11, 11, |
4398
|
|
|
|
|
|
|
11, 1, 11, 12, 1, 11, 1, 13, |
4399
|
|
|
|
|
|
|
1, 11, 1, 14, 1, 11, 1, 11, |
4400
|
|
|
|
|
|
|
1, 4, 4, 4, 4, 4, 16, 15, |
4401
|
|
|
|
|
|
|
1, 5, 5, 5, 5, 6, 17, 1, |
4402
|
|
|
|
|
|
|
5, 5, 5, 5, 6, 18, 1, 19, |
4403
|
|
|
|
|
|
|
20, 1, 19, 1, 21, 1, 19, 1, |
4404
|
|
|
|
|
|
|
22, 1, 19, 1, 19, 1, 23, 24, |
4405
|
|
|
|
|
|
|
23, 25, 26, 1, 27, 28, 27, 29, |
4406
|
|
|
|
|
|
|
30, 31, 27, 32, 1, 33, 1, 33, |
4407
|
|
|
|
|
|
|
1, 34, 1, 33, 1, 33, 1, 35, |
4408
|
|
|
|
|
|
|
36, 1, 35, 1, 37, 1, 35, 1, |
4409
|
|
|
|
|
|
|
38, 1, 35, 1, 35, 1, 39, 39, |
4410
|
|
|
|
|
|
|
39, 39, 39, 4, 1, 40, 1, 42, |
4411
|
|
|
|
|
|
|
43, 44, 45, 46, 47, 48, 49, 50, |
4412
|
|
|
|
|
|
|
51, 52, 53, 54, 55, 56, 57, 58, |
4413
|
|
|
|
|
|
|
59, 60, 61, 62, 63, 64, 41, 1, |
4414
|
|
|
|
|
|
|
1, 65, 66, 65, 65, 65, 65, 4, |
4415
|
|
|
|
|
|
|
4, 1, 67, 1, 68, 68, 68, 68, |
4416
|
|
|
|
|
|
|
68, 1, 70, 71, 70, 69, 69, 69, |
4417
|
|
|
|
|
|
|
70, 69, 70, 1, 72, 66, 72, 72, |
4418
|
|
|
|
|
|
|
72, 72, 4, 4, 1, 65, 65, 66, |
4419
|
|
|
|
|
|
|
65, 65, 65, 4, 4, 1, 69, 69, |
4420
|
|
|
|
|
|
|
71, 69, 73, 69, 69, 70, 70, 1, |
4421
|
|
|
|
|
|
|
74, 74, 74, 74, 74, 1, 75, 76, |
4422
|
|
|
|
|
|
|
77, 78, 79, 1, 75, 76, 77, 11, |
4423
|
|
|
|
|
|
|
78, 79, 1, 65, 65, 66, 65, 65, |
4424
|
|
|
|
|
|
|
80, 65, 4, 4, 1, 81, 65, 65, |
4425
|
|
|
|
|
|
|
65, 66, 65, 65, 4, 4, 1, 4, |
4426
|
|
|
|
|
|
|
82, 4, 65, 66, 65, 65, 4, 65, |
4427
|
|
|
|
|
|
|
4, 1, 7, 1, 65, 65, 65, 71, |
4428
|
|
|
|
|
|
|
65, 83, 65, 83, 70, 70, 1, 5, |
4429
|
|
|
|
|
|
|
65, 65, 65, 66, 65, 65, 4, 4, |
4430
|
|
|
|
|
|
|
1, 84, 84, 85, 66, 84, 84, 4, |
4431
|
|
|
|
|
|
|
4, 1, 84, 84, 84, 84, 66, 84, |
4432
|
|
|
|
|
|
|
4, 4, 1, 65, 65, 65, 65, 66, |
4433
|
|
|
|
|
|
|
65, 4, 4, 1, 65, 86, 65, 87, |
4434
|
|
|
|
|
|
|
66, 65, 4, 4, 1, 5, 5, 5, |
4435
|
|
|
|
|
|
|
5, 6, 1, 88, 89, 88, 5, 89, |
4436
|
|
|
|
|
|
|
89, 5, 89, 6, 5, 88, 1, 90, |
4437
|
|
|
|
|
|
|
91, 92, 93, 94, 88, 88, 88, 1, |
4438
|
|
|
|
|
|
|
90, 95, 92, 96, 97, 1, 90, 95, |
4439
|
|
|
|
|
|
|
92, 19, 96, 97, 1, 90, 19, 91, |
4440
|
|
|
|
|
|
|
92, 93, 94, 88, 88, 1, 90, 22, |
4441
|
|
|
|
|
|
|
91, 92, 93, 94, 88, 88, 1, 98, |
4442
|
|
|
|
|
|
|
91, 92, 93, 94, 88, 88, 88, 1, |
4443
|
|
|
|
|
|
|
19, 90, 95, 92, 20, 96, 97, 1, |
4444
|
|
|
|
|
|
|
90, 100, 92, 101, 102, 99, 99, 99, |
4445
|
|
|
|
|
|
|
1, 69, 69, 69, 69, 103, 69, 70, |
4446
|
|
|
|
|
|
|
70, 1, 104, 105, 106, 65, 66, 65, |
4447
|
|
|
|
|
|
|
4, 4, 1, 107, 109, 109, 109, 109, |
4448
|
|
|
|
|
|
|
109, 109, 108, 110, 110, 110, 110, 110, |
4449
|
|
|
|
|
|
|
110, 1, 33, 111, 1, 33, 1, 112, |
4450
|
|
|
|
|
|
|
1, 108, 113, 107, 5, 5, 5, 115, |
4451
|
|
|
|
|
|
|
5, 6, 114, 1, 116, 117, 118, 119, |
4452
|
|
|
|
|
|
|
120, 114, 114, 114, 1, 116, 121, 118, |
4453
|
|
|
|
|
|
|
122, 123, 1, 116, 121, 118, 35, 122, |
4454
|
|
|
|
|
|
|
123, 1, 116, 35, 117, 118, 119, 120, |
4455
|
|
|
|
|
|
|
114, 114, 1, 116, 38, 117, 118, 119, |
4456
|
|
|
|
|
|
|
120, 114, 114, 1, 124, 117, 118, 119, |
4457
|
|
|
|
|
|
|
120, 114, 114, 114, 1, 35, 116, 121, |
4458
|
|
|
|
|
|
|
118, 36, 122, 123, 1, 116, 126, 118, |
4459
|
|
|
|
|
|
|
127, 128, 125, 125, 125, 1, 5, 5, |
4460
|
|
|
|
|
|
|
5, 5, 6, 114, 1, 4, 4, 4, |
4461
|
|
|
|
|
|
|
4, 4, 4, 1, 69, 69, 69, 69, |
4462
|
|
|
|
|
|
|
69, 71, 70, 70, 1, 84, 84, 84, |
4463
|
|
|
|
|
|
|
84, 84, 66, 4, 4, 1, 84, 84, |
4464
|
|
|
|
|
|
|
84, 84, 84, 66, 4, 4, 1, 129, |
4465
|
|
|
|
|
|
|
129, 129, 129, 129, 131, 132, 130, 1, |
4466
|
|
|
|
|
|
|
0 |
4467
|
|
|
|
|
|
|
}; |
4468
|
|
|
|
|
|
|
|
4469
|
|
|
|
|
|
|
static const char _VBD_VBN_trans_targs[] = { |
4470
|
|
|
|
|
|
|
2, 0, 41, 42, 43, 43, 45, 43, |
4471
|
|
|
|
|
|
|
43, 45, 45, 52, 53, 12, 14, 43, |
4472
|
|
|
|
|
|
|
43, 43, 43, 69, 70, 22, 24, 78, |
4473
|
|
|
|
|
|
|
79, 84, 85, 43, 81, 28, 83, 30, |
4474
|
|
|
|
|
|
|
32, 43, 31, 88, 89, 36, 38, 66, |
4475
|
|
|
|
|
|
|
43, 3, 44, 47, 48, 49, 50, 54, |
4476
|
|
|
|
|
|
|
16, 56, 57, 59, 61, 62, 63, 64, |
4477
|
|
|
|
|
|
|
65, 76, 77, 96, 97, 98, 99, 40, |
4478
|
|
|
|
|
|
|
100, 4, 46, 43, 5, 6, 43, 46, |
4479
|
|
|
|
|
|
|
7, 51, 8, 9, 10, 11, 13, 15, |
4480
|
|
|
|
|
|
|
55, 43, 58, 60, 17, 18, 66, 67, |
4481
|
|
|
|
|
|
|
68, 75, 19, 71, 21, 72, 73, 20, |
4482
|
|
|
|
|
|
|
23, 25, 74, 68, 71, 72, 73, 46, |
4483
|
|
|
|
|
|
|
26, 86, 95, 43, 43, 80, 27, 82, |
4484
|
|
|
|
|
|
|
29, 43, 87, 94, 33, 90, 35, 91, |
4485
|
|
|
|
|
|
|
92, 34, 37, 39, 93, 87, 90, 91, |
4486
|
|
|
|
|
|
|
92, 66, 43, 43, 46 |
4487
|
|
|
|
|
|
|
}; |
4488
|
|
|
|
|
|
|
|
4489
|
|
|
|
|
|
|
static const char _VBD_VBN_trans_actions[] = { |
4490
|
|
|
|
|
|
|
0, 0, 0, 31, 29, 25, 25, 5, |
4491
|
|
|
|
|
|
|
51, 51, 45, 0, 0, 0, 0, 15, |
4492
|
|
|
|
|
|
|
39, 9, 36, 0, 0, 0, 0, 25, |
4493
|
|
|
|
|
|
|
25, 25, 25, 21, 21, 0, 21, 0, |
4494
|
|
|
|
|
|
|
0, 19, 0, 0, 0, 0, 0, 29, |
4495
|
|
|
|
|
|
|
1, 0, 0, 0, 0, 0, 0, 0, |
4496
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
4497
|
|
|
|
|
|
|
0, 0, 0, 27, 0, 0, 0, 0, |
4498
|
|
|
|
|
|
|
0, 0, 29, 17, 0, 0, 54, 54, |
4499
|
|
|
|
|
|
|
0, 54, 0, 0, 0, 0, 0, 0, |
4500
|
|
|
|
|
|
|
29, 27, 29, 54, 0, 0, 13, 13, |
4501
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
4502
|
|
|
|
|
|
|
0, 0, 0, 7, 7, 7, 7, 61, |
4503
|
|
|
|
|
|
|
0, 19, 19, 23, 48, 48, 0, 19, |
4504
|
|
|
|
|
|
|
0, 42, 0, 0, 0, 0, 0, 0, |
4505
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 17, 17, 17, |
4506
|
|
|
|
|
|
|
17, 3, 33, 3, 57 |
4507
|
|
|
|
|
|
|
}; |
4508
|
|
|
|
|
|
|
|
4509
|
|
|
|
|
|
|
static const char _VBD_VBN_eof_actions[] = { |
4510
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
4511
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
4512
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
4513
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
4514
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
4515
|
|
|
|
|
|
|
0, 0, 0, 0, 5, 0, 0, 5, |
4516
|
|
|
|
|
|
|
5, 5, 5, 0, 5, 5, 5, 0, |
4517
|
|
|
|
|
|
|
5, 5, 0, 5, 0, 5, 5, 5, |
4518
|
|
|
|
|
|
|
5, 5, 0, 0, 11, 11, 11, 11, |
4519
|
|
|
|
|
|
|
11, 11, 11, 11, 5, 5, 0, 0, |
4520
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 17, |
4521
|
|
|
|
|
|
|
17, 17, 17, 17, 17, 17, 17, 0, |
4522
|
|
|
|
|
|
|
0, 5, 5, 5, 5 |
4523
|
|
|
|
|
|
|
}; |
4524
|
|
|
|
|
|
|
|
4525
|
|
|
|
|
|
|
static const int VBD_VBN_start = 1; |
4526
|
|
|
|
|
|
|
|
4527
|
0
|
|
|
|
|
|
void english_morpho_guesser::add_VBD_VBN(const string& form, vector& lemmas) const { |
4528
|
|
|
|
|
|
|
const char* p = form.c_str(); int cs; |
4529
|
|
|
|
|
|
|
char best = 'z'; unsigned remove = 0; const char* append = nullptr; |
4530
|
|
|
|
|
|
|
|
4531
|
|
|
|
|
|
|
{ |
4532
|
|
|
|
|
|
|
cs = VBD_VBN_start; |
4533
|
|
|
|
|
|
|
} |
4534
|
|
|
|
|
|
|
|
4535
|
|
|
|
|
|
|
{ |
4536
|
|
|
|
|
|
|
int _klen; |
4537
|
|
|
|
|
|
|
unsigned int _trans; |
4538
|
|
|
|
|
|
|
const char *_acts; |
4539
|
|
|
|
|
|
|
unsigned int _nacts; |
4540
|
|
|
|
|
|
|
const char *_keys; |
4541
|
|
|
|
|
|
|
|
4542
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
4543
|
|
|
|
|
|
|
goto _test_eof; |
4544
|
|
|
|
|
|
|
if ( cs == 0 ) |
4545
|
|
|
|
|
|
|
goto _out; |
4546
|
|
|
|
|
|
|
_resume: |
4547
|
0
|
|
|
|
|
|
_keys = _VBD_VBN_trans_keys + _VBD_VBN_key_offsets[cs]; |
4548
|
0
|
|
|
|
|
|
_trans = _VBD_VBN_index_offsets[cs]; |
4549
|
|
|
|
|
|
|
|
4550
|
0
|
|
|
|
|
|
_klen = _VBD_VBN_single_lengths[cs]; |
4551
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
4552
|
|
|
|
|
|
|
const char *_lower = _keys; |
4553
|
|
|
|
|
|
|
const char *_mid; |
4554
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
4555
|
|
|
|
|
|
|
while (1) { |
4556
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
4557
|
|
|
|
|
|
|
break; |
4558
|
|
|
|
|
|
|
|
4559
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
4560
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid ) |
4561
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
4562
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid ) |
4563
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
4564
|
|
|
|
|
|
|
else { |
4565
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
4566
|
0
|
|
|
|
|
|
goto _match; |
4567
|
|
|
|
|
|
|
} |
4568
|
|
|
|
|
|
|
} |
4569
|
0
|
|
|
|
|
|
_keys += _klen; |
4570
|
0
|
|
|
|
|
|
_trans += _klen; |
4571
|
|
|
|
|
|
|
} |
4572
|
|
|
|
|
|
|
|
4573
|
0
|
|
|
|
|
|
_klen = _VBD_VBN_range_lengths[cs]; |
4574
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
4575
|
|
|
|
|
|
|
const char *_lower = _keys; |
4576
|
|
|
|
|
|
|
const char *_mid; |
4577
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
4578
|
|
|
|
|
|
|
while (1) { |
4579
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
4580
|
|
|
|
|
|
|
break; |
4581
|
|
|
|
|
|
|
|
4582
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
4583
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] ) |
4584
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
4585
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] ) |
4586
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
4587
|
|
|
|
|
|
|
else { |
4588
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
4589
|
0
|
|
|
|
|
|
goto _match; |
4590
|
|
|
|
|
|
|
} |
4591
|
|
|
|
|
|
|
} |
4592
|
0
|
|
|
|
|
|
_trans += _klen; |
4593
|
|
|
|
|
|
|
} |
4594
|
|
|
|
|
|
|
|
4595
|
|
|
|
|
|
|
_match: |
4596
|
0
|
|
|
|
|
|
_trans = _VBD_VBN_indicies[_trans]; |
4597
|
0
|
|
|
|
|
|
cs = _VBD_VBN_trans_targs[_trans]; |
4598
|
|
|
|
|
|
|
|
4599
|
0
|
0
|
|
|
|
|
if ( _VBD_VBN_trans_actions[_trans] == 0 ) |
4600
|
|
|
|
|
|
|
goto _again; |
4601
|
|
|
|
|
|
|
|
4602
|
0
|
|
|
|
|
|
_acts = _VBD_VBN_actions + _VBD_VBN_trans_actions[_trans]; |
4603
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
4604
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
4605
|
|
|
|
|
|
|
{ |
4606
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
4607
|
|
|
|
|
|
|
{ |
4608
|
|
|
|
|
|
|
case 0: |
4609
|
0
|
0
|
|
|
|
|
{ if (best > 'a') best = 'a', remove = 1, append = nullptr; } |
4610
|
|
|
|
|
|
|
break; |
4611
|
|
|
|
|
|
|
case 1: |
4612
|
0
|
0
|
|
|
|
|
{ if (best > 'b') best = 'b', remove = 2, append = nullptr; } |
4613
|
|
|
|
|
|
|
break; |
4614
|
|
|
|
|
|
|
case 2: |
4615
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 1, append = nullptr; } |
4616
|
|
|
|
|
|
|
break; |
4617
|
|
|
|
|
|
|
case 3: |
4618
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 2, append = nullptr; } |
4619
|
|
|
|
|
|
|
break; |
4620
|
|
|
|
|
|
|
case 4: |
4621
|
0
|
0
|
|
|
|
|
{ if (best > 'e') best = 'e', remove = 1, append = nullptr; } |
4622
|
|
|
|
|
|
|
break; |
4623
|
|
|
|
|
|
|
case 5: |
4624
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 2, append = nullptr; } |
4625
|
|
|
|
|
|
|
break; |
4626
|
|
|
|
|
|
|
case 7: |
4627
|
0
|
0
|
|
|
|
|
{ if (best > 'h') best = 'h', remove = 2, append = nullptr; } |
4628
|
|
|
|
|
|
|
break; |
4629
|
|
|
|
|
|
|
case 8: |
4630
|
0
|
0
|
|
|
|
|
{ if (best > 'i') best = 'i', remove = 3, append = "y"; } |
4631
|
|
|
|
|
|
|
break; |
4632
|
|
|
|
|
|
|
case 9: |
4633
|
0
|
0
|
|
|
|
|
{ if (best > 'j') best = 'j', remove = 1, append = nullptr; } |
4634
|
|
|
|
|
|
|
break; |
4635
|
|
|
|
|
|
|
case 10: |
4636
|
0
|
0
|
|
|
|
|
{ if (best > 'k') best = 'k', remove = 2, append = nullptr; } |
4637
|
|
|
|
|
|
|
break; |
4638
|
|
|
|
|
|
|
case 11: |
4639
|
0
|
0
|
|
|
|
|
{ if (best > 'l') best = 'l', remove = 1, append = nullptr; } |
4640
|
|
|
|
|
|
|
break; |
4641
|
|
|
|
|
|
|
case 12: |
4642
|
0
|
0
|
|
|
|
|
{ if (best > 'm') best = 'm', remove = 2, append = nullptr; } |
4643
|
|
|
|
|
|
|
break; |
4644
|
|
|
|
|
|
|
case 13: |
4645
|
0
|
0
|
|
|
|
|
{ if (best > 'n') best = 'n', remove = 1, append = nullptr; } |
4646
|
|
|
|
|
|
|
break; |
4647
|
|
|
|
|
|
|
case 14: |
4648
|
0
|
0
|
|
|
|
|
{ if (best > 'o') best = 'o', remove = 2, append = nullptr; } |
4649
|
|
|
|
|
|
|
break; |
4650
|
|
|
|
|
|
|
case 15: |
4651
|
0
|
0
|
|
|
|
|
{ if (best > 'p') best = 'p', remove = 1, append = nullptr; } |
4652
|
|
|
|
|
|
|
break; |
4653
|
|
|
|
|
|
|
case 16: |
4654
|
0
|
0
|
|
|
|
|
{ if (best > 'q') best = 'q', remove = 2, append = nullptr; } |
4655
|
|
|
|
|
|
|
break; |
4656
|
|
|
|
|
|
|
case 17: |
4657
|
0
|
0
|
|
|
|
|
{ if (best > 'r') best = 'r', remove = 1, append = nullptr; } |
4658
|
|
|
|
|
|
|
break; |
4659
|
|
|
|
|
|
|
} |
4660
|
|
|
|
|
|
|
} |
4661
|
|
|
|
|
|
|
|
4662
|
|
|
|
|
|
|
_again: |
4663
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
4664
|
|
|
|
|
|
|
goto _out; |
4665
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form.c_str() + form.size())) ) |
4666
|
|
|
|
|
|
|
goto _resume; |
4667
|
|
|
|
|
|
|
_test_eof: {} |
4668
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
4669
|
|
|
|
|
|
|
{ |
4670
|
0
|
|
|
|
|
|
const char *__acts = _VBD_VBN_actions + _VBD_VBN_eof_actions[cs]; |
4671
|
0
|
|
|
|
|
|
unsigned int __nacts = (unsigned int) *__acts++; |
4672
|
0
|
0
|
|
|
|
|
while ( __nacts-- > 0 ) { |
4673
|
0
|
|
|
|
|
|
switch ( *__acts++ ) { |
4674
|
|
|
|
|
|
|
case 3: |
4675
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 2, append = nullptr; } |
4676
|
|
|
|
|
|
|
break; |
4677
|
|
|
|
|
|
|
case 6: |
4678
|
0
|
0
|
|
|
|
|
{ if (best > 'g') best = 'g', remove = 1, append = nullptr; } |
4679
|
|
|
|
|
|
|
break; |
4680
|
|
|
|
|
|
|
case 9: |
4681
|
0
|
0
|
|
|
|
|
{ if (best > 'j') best = 'j', remove = 1, append = nullptr; } |
4682
|
|
|
|
|
|
|
break; |
4683
|
|
|
|
|
|
|
} |
4684
|
|
|
|
|
|
|
} |
4685
|
|
|
|
|
|
|
} |
4686
|
|
|
|
|
|
|
|
4687
|
|
|
|
|
|
|
_out: {} |
4688
|
|
|
|
|
|
|
} |
4689
|
|
|
|
|
|
|
|
4690
|
0
|
0
|
|
|
|
|
add(VBD, VBN, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas); |
|
|
0
|
|
|
|
|
|
4691
|
0
|
|
|
|
|
|
} |
4692
|
|
|
|
|
|
|
|
4693
|
|
|
|
|
|
|
static const char _VBZ_actions[] = { |
4694
|
|
|
|
|
|
|
0, 1, 0, 1, 1, 1, 2, 1, |
4695
|
|
|
|
|
|
|
3, 1, 4, 1, 5, 1, 6, 1, |
4696
|
|
|
|
|
|
|
7, 1, 8 |
4697
|
|
|
|
|
|
|
}; |
4698
|
|
|
|
|
|
|
|
4699
|
|
|
|
|
|
|
static const char _VBZ_key_offsets[] = { |
4700
|
|
|
|
|
|
|
0, 0, 1, 2, 4, 14, 14, 25, |
4701
|
|
|
|
|
|
|
26, 31, 31, 31, 31, 37, 45, 54 |
4702
|
|
|
|
|
|
|
}; |
4703
|
|
|
|
|
|
|
|
4704
|
|
|
|
|
|
|
static const char _VBZ_trans_keys[] = { |
4705
|
|
|
|
|
|
|
115, 101, 99, 115, 98, 100, 102, 104, |
4706
|
|
|
|
|
|
|
106, 110, 112, 116, 118, 122, 122, 98, |
4707
|
|
|
|
|
|
|
100, 102, 104, 106, 110, 112, 116, 118, |
4708
|
|
|
|
|
|
|
120, 111, 97, 101, 105, 111, 117, 104, |
4709
|
|
|
|
|
|
|
105, 111, 115, 120, 122, 97, 101, 105, |
4710
|
|
|
|
|
|
|
110, 111, 114, 115, 117, 97, 101, 105, |
4711
|
|
|
|
|
|
|
111, 117, 121, 122, 98, 120, 0 |
4712
|
|
|
|
|
|
|
}; |
4713
|
|
|
|
|
|
|
|
4714
|
|
|
|
|
|
|
static const char _VBZ_single_lengths[] = { |
4715
|
|
|
|
|
|
|
0, 1, 1, 2, 0, 0, 1, 1, |
4716
|
|
|
|
|
|
|
5, 0, 0, 0, 6, 8, 7, 0 |
4717
|
|
|
|
|
|
|
}; |
4718
|
|
|
|
|
|
|
|
4719
|
|
|
|
|
|
|
static const char _VBZ_range_lengths[] = { |
4720
|
|
|
|
|
|
|
0, 0, 0, 0, 5, 0, 5, 0, |
4721
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 1, 0 |
4722
|
|
|
|
|
|
|
}; |
4723
|
|
|
|
|
|
|
|
4724
|
|
|
|
|
|
|
static const char _VBZ_index_offsets[] = { |
4725
|
|
|
|
|
|
|
0, 0, 2, 4, 7, 13, 14, 21, |
4726
|
|
|
|
|
|
|
23, 29, 30, 31, 32, 39, 48, 57 |
4727
|
|
|
|
|
|
|
}; |
4728
|
|
|
|
|
|
|
|
4729
|
|
|
|
|
|
|
static const char _VBZ_indicies[] = { |
4730
|
|
|
|
|
|
|
0, 1, 3, 2, 4, 4, 1, 5, |
4731
|
|
|
|
|
|
|
5, 5, 5, 5, 1, 6, 7, 7, |
4732
|
|
|
|
|
|
|
7, 7, 7, 7, 1, 8, 1, 9, |
4733
|
|
|
|
|
|
|
9, 9, 9, 9, 1, 8, 10, 1, |
4734
|
|
|
|
|
|
|
11, 12, 13, 14, 4, 15, 1, 16, |
4735
|
|
|
|
|
|
|
16, 16, 17, 16, 18, 19, 16, 1, |
4736
|
|
|
|
|
|
|
20, 20, 20, 20, 20, 20, 22, 21, |
4737
|
|
|
|
|
|
|
1, 10, 0 |
4738
|
|
|
|
|
|
|
}; |
4739
|
|
|
|
|
|
|
|
4740
|
|
|
|
|
|
|
static const char _VBZ_trans_targs[] = { |
4741
|
|
|
|
|
|
|
2, 0, 11, 12, 11, 5, 11, 11, |
4742
|
|
|
|
|
|
|
11, 9, 11, 3, 4, 6, 13, 14, |
4743
|
|
|
|
|
|
|
11, 7, 8, 11, 11, 10, 15 |
4744
|
|
|
|
|
|
|
}; |
4745
|
|
|
|
|
|
|
|
4746
|
|
|
|
|
|
|
static const char _VBZ_trans_actions[] = { |
4747
|
|
|
|
|
|
|
0, 0, 17, 17, 11, 0, 13, 15, |
4748
|
|
|
|
|
|
|
9, 0, 3, 0, 0, 0, 11, 11, |
4749
|
|
|
|
|
|
|
1, 0, 0, 7, 5, 0, 7 |
4750
|
|
|
|
|
|
|
}; |
4751
|
|
|
|
|
|
|
|
4752
|
|
|
|
|
|
|
static const int VBZ_start = 1; |
4753
|
|
|
|
|
|
|
|
4754
|
0
|
|
|
|
|
|
void english_morpho_guesser::add_VBZ(const string& form, vector& lemmas) const { |
4755
|
|
|
|
|
|
|
const char* p = form.c_str(); int cs; |
4756
|
|
|
|
|
|
|
char best = 'z'; unsigned remove = 0; const char* append = nullptr; |
4757
|
|
|
|
|
|
|
|
4758
|
|
|
|
|
|
|
{ |
4759
|
|
|
|
|
|
|
cs = VBZ_start; |
4760
|
|
|
|
|
|
|
} |
4761
|
|
|
|
|
|
|
|
4762
|
|
|
|
|
|
|
{ |
4763
|
|
|
|
|
|
|
int _klen; |
4764
|
|
|
|
|
|
|
unsigned int _trans; |
4765
|
|
|
|
|
|
|
const char *_acts; |
4766
|
|
|
|
|
|
|
unsigned int _nacts; |
4767
|
|
|
|
|
|
|
const char *_keys; |
4768
|
|
|
|
|
|
|
|
4769
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
4770
|
|
|
|
|
|
|
goto _test_eof; |
4771
|
|
|
|
|
|
|
if ( cs == 0 ) |
4772
|
|
|
|
|
|
|
goto _out; |
4773
|
|
|
|
|
|
|
_resume: |
4774
|
0
|
|
|
|
|
|
_keys = _VBZ_trans_keys + _VBZ_key_offsets[cs]; |
4775
|
0
|
|
|
|
|
|
_trans = _VBZ_index_offsets[cs]; |
4776
|
|
|
|
|
|
|
|
4777
|
0
|
|
|
|
|
|
_klen = _VBZ_single_lengths[cs]; |
4778
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
4779
|
|
|
|
|
|
|
const char *_lower = _keys; |
4780
|
|
|
|
|
|
|
const char *_mid; |
4781
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
4782
|
|
|
|
|
|
|
while (1) { |
4783
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
4784
|
|
|
|
|
|
|
break; |
4785
|
|
|
|
|
|
|
|
4786
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
4787
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid ) |
4788
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
4789
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid ) |
4790
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
4791
|
|
|
|
|
|
|
else { |
4792
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
4793
|
0
|
|
|
|
|
|
goto _match; |
4794
|
|
|
|
|
|
|
} |
4795
|
|
|
|
|
|
|
} |
4796
|
0
|
|
|
|
|
|
_keys += _klen; |
4797
|
0
|
|
|
|
|
|
_trans += _klen; |
4798
|
|
|
|
|
|
|
} |
4799
|
|
|
|
|
|
|
|
4800
|
0
|
|
|
|
|
|
_klen = _VBZ_range_lengths[cs]; |
4801
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
4802
|
|
|
|
|
|
|
const char *_lower = _keys; |
4803
|
|
|
|
|
|
|
const char *_mid; |
4804
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
4805
|
|
|
|
|
|
|
while (1) { |
4806
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
4807
|
|
|
|
|
|
|
break; |
4808
|
|
|
|
|
|
|
|
4809
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
4810
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] ) |
4811
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
4812
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] ) |
4813
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
4814
|
|
|
|
|
|
|
else { |
4815
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
4816
|
0
|
|
|
|
|
|
goto _match; |
4817
|
|
|
|
|
|
|
} |
4818
|
|
|
|
|
|
|
} |
4819
|
0
|
|
|
|
|
|
_trans += _klen; |
4820
|
|
|
|
|
|
|
} |
4821
|
|
|
|
|
|
|
|
4822
|
|
|
|
|
|
|
_match: |
4823
|
0
|
|
|
|
|
|
_trans = _VBZ_indicies[_trans]; |
4824
|
0
|
|
|
|
|
|
cs = _VBZ_trans_targs[_trans]; |
4825
|
|
|
|
|
|
|
|
4826
|
0
|
0
|
|
|
|
|
if ( _VBZ_trans_actions[_trans] == 0 ) |
4827
|
|
|
|
|
|
|
goto _again; |
4828
|
|
|
|
|
|
|
|
4829
|
0
|
|
|
|
|
|
_acts = _VBZ_actions + _VBZ_trans_actions[_trans]; |
4830
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
4831
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
4832
|
|
|
|
|
|
|
{ |
4833
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
4834
|
|
|
|
|
|
|
{ |
4835
|
|
|
|
|
|
|
case 0: |
4836
|
0
|
0
|
|
|
|
|
{ if (best > 'a') best = 'a', remove = 1, append = nullptr; } |
4837
|
|
|
|
|
|
|
break; |
4838
|
|
|
|
|
|
|
case 1: |
4839
|
0
|
0
|
|
|
|
|
{ if (best > 'b') best = 'b', remove = 2, append = nullptr; } |
4840
|
|
|
|
|
|
|
break; |
4841
|
|
|
|
|
|
|
case 2: |
4842
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 1, append = nullptr; } |
4843
|
|
|
|
|
|
|
break; |
4844
|
|
|
|
|
|
|
case 3: |
4845
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 2, append = nullptr; } |
4846
|
|
|
|
|
|
|
break; |
4847
|
|
|
|
|
|
|
case 4: |
4848
|
0
|
0
|
|
|
|
|
{ if (best > 'e') best = 'e', remove = 1, append = nullptr; } |
4849
|
|
|
|
|
|
|
break; |
4850
|
|
|
|
|
|
|
case 5: |
4851
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 2, append = nullptr; } |
4852
|
|
|
|
|
|
|
break; |
4853
|
|
|
|
|
|
|
case 6: |
4854
|
0
|
0
|
|
|
|
|
{ if (best > 'g') best = 'g', remove = 3, append = "y"; } |
4855
|
|
|
|
|
|
|
break; |
4856
|
|
|
|
|
|
|
case 7: |
4857
|
0
|
0
|
|
|
|
|
{ if (best > 'h') best = 'h', remove = 2, append = nullptr; } |
4858
|
|
|
|
|
|
|
break; |
4859
|
|
|
|
|
|
|
case 8: |
4860
|
0
|
0
|
|
|
|
|
{ if (best > 'i') best = 'i', remove = 1, append = nullptr; } |
4861
|
|
|
|
|
|
|
break; |
4862
|
|
|
|
|
|
|
} |
4863
|
|
|
|
|
|
|
} |
4864
|
|
|
|
|
|
|
|
4865
|
|
|
|
|
|
|
_again: |
4866
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
4867
|
|
|
|
|
|
|
goto _out; |
4868
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form.c_str() + form.size())) ) |
4869
|
|
|
|
|
|
|
goto _resume; |
4870
|
|
|
|
|
|
|
_test_eof: {} |
4871
|
|
|
|
|
|
|
_out: {} |
4872
|
|
|
|
|
|
|
} |
4873
|
|
|
|
|
|
|
|
4874
|
0
|
0
|
|
|
|
|
add(VBZ, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas); |
|
|
0
|
|
|
|
|
|
4875
|
0
|
|
|
|
|
|
} |
4876
|
|
|
|
|
|
|
|
4877
|
|
|
|
|
|
|
static const char _JJR_RBR_actions[] = { |
4878
|
|
|
|
|
|
|
0, 1, 0, 1, 1, 1, 3, 1, |
4879
|
|
|
|
|
|
|
4, 1, 5, 2, 1, 4, 2, 2, |
4880
|
|
|
|
|
|
|
5, 2, 4, 5 |
4881
|
|
|
|
|
|
|
}; |
4882
|
|
|
|
|
|
|
|
4883
|
|
|
|
|
|
|
static const unsigned char _JJR_RBR_key_offsets[] = { |
4884
|
|
|
|
|
|
|
0, 0, 1, 2, 26, 26, 32, 37, |
4885
|
|
|
|
|
|
|
50, 56, 62, 73, 79, 85, 91, 102, |
4886
|
|
|
|
|
|
|
103, 109, 115, 117, 123, 129, 135, 146, |
4887
|
|
|
|
|
|
|
152, 163, 169, 175, 181 |
4888
|
|
|
|
|
|
|
}; |
4889
|
|
|
|
|
|
|
|
4890
|
|
|
|
|
|
|
static const char _JJR_RBR_trans_keys[] = { |
4891
|
|
|
|
|
|
|
114, 101, 98, 99, 100, 101, 102, 103, |
4892
|
|
|
|
|
|
|
104, 105, 106, 107, 108, 109, 110, 112, |
4893
|
|
|
|
|
|
|
113, 114, 115, 116, 117, 118, 119, 120, |
4894
|
|
|
|
|
|
|
121, 122, 97, 98, 101, 105, 111, 117, |
4895
|
|
|
|
|
|
|
97, 101, 105, 111, 117, 98, 99, 100, |
4896
|
|
|
|
|
|
|
105, 111, 117, 122, 97, 101, 102, 109, |
4897
|
|
|
|
|
|
|
112, 120, 97, 100, 101, 105, 111, 117, |
4898
|
|
|
|
|
|
|
97, 101, 102, 105, 111, 117, 97, 101, |
4899
|
|
|
|
|
|
|
103, 105, 111, 117, 122, 98, 109, 112, |
4900
|
|
|
|
|
|
|
120, 97, 101, 104, 105, 111, 117, 97, |
4901
|
|
|
|
|
|
|
101, 105, 106, 111, 117, 97, 101, 105, |
4902
|
|
|
|
|
|
|
107, 111, 117, 97, 101, 105, 108, 111, |
4903
|
|
|
|
|
|
|
117, 122, 98, 109, 112, 120, 101, 97, |
4904
|
|
|
|
|
|
|
101, 105, 109, 111, 117, 97, 101, 105, |
4905
|
|
|
|
|
|
|
110, 111, 117, 97, 122, 97, 101, 105, |
4906
|
|
|
|
|
|
|
111, 112, 117, 97, 101, 105, 111, 113, |
4907
|
|
|
|
|
|
|
117, 97, 101, 105, 111, 114, 117, 97, |
4908
|
|
|
|
|
|
|
101, 105, 111, 115, 117, 122, 98, 109, |
4909
|
|
|
|
|
|
|
112, 120, 97, 101, 105, 111, 116, 117, |
4910
|
|
|
|
|
|
|
97, 101, 105, 111, 117, 118, 122, 98, |
4911
|
|
|
|
|
|
|
109, 112, 120, 97, 101, 105, 111, 117, |
4912
|
|
|
|
|
|
|
119, 97, 101, 105, 111, 117, 120, 97, |
4913
|
|
|
|
|
|
|
101, 105, 111, 117, 121, 97, 101, 105, |
4914
|
|
|
|
|
|
|
111, 117, 122, 0 |
4915
|
|
|
|
|
|
|
}; |
4916
|
|
|
|
|
|
|
|
4917
|
|
|
|
|
|
|
static const char _JJR_RBR_single_lengths[] = { |
4918
|
|
|
|
|
|
|
0, 1, 1, 24, 0, 6, 5, 7, |
4919
|
|
|
|
|
|
|
6, 6, 7, 6, 6, 6, 7, 1, |
4920
|
|
|
|
|
|
|
6, 6, 0, 6, 6, 6, 7, 6, |
4921
|
|
|
|
|
|
|
7, 6, 6, 6, 6 |
4922
|
|
|
|
|
|
|
}; |
4923
|
|
|
|
|
|
|
|
4924
|
|
|
|
|
|
|
static const char _JJR_RBR_range_lengths[] = { |
4925
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 3, |
4926
|
|
|
|
|
|
|
0, 0, 2, 0, 0, 0, 2, 0, |
4927
|
|
|
|
|
|
|
0, 0, 1, 0, 0, 0, 2, 0, |
4928
|
|
|
|
|
|
|
2, 0, 0, 0, 0 |
4929
|
|
|
|
|
|
|
}; |
4930
|
|
|
|
|
|
|
|
4931
|
|
|
|
|
|
|
static const unsigned char _JJR_RBR_index_offsets[] = { |
4932
|
|
|
|
|
|
|
0, 0, 2, 4, 29, 30, 37, 43, |
4933
|
|
|
|
|
|
|
54, 61, 68, 78, 85, 92, 99, 109, |
4934
|
|
|
|
|
|
|
111, 118, 125, 127, 134, 141, 148, 158, |
4935
|
|
|
|
|
|
|
165, 175, 182, 189, 196 |
4936
|
|
|
|
|
|
|
}; |
4937
|
|
|
|
|
|
|
|
4938
|
|
|
|
|
|
|
static const char _JJR_RBR_indicies[] = { |
4939
|
|
|
|
|
|
|
0, 1, 2, 1, 4, 5, 6, 7, |
4940
|
|
|
|
|
|
|
8, 9, 10, 11, 12, 13, 14, 15, |
4941
|
|
|
|
|
|
|
16, 17, 18, 19, 20, 21, 7, 22, |
4942
|
|
|
|
|
|
|
23, 24, 25, 26, 3, 1, 27, 28, |
4943
|
|
|
|
|
|
|
27, 27, 27, 27, 1, 29, 29, 29, |
4944
|
|
|
|
|
|
|
29, 29, 1, 30, 31, 30, 27, 27, |
4945
|
|
|
|
|
|
|
27, 30, 27, 30, 30, 1, 27, 28, |
4946
|
|
|
|
|
|
|
27, 27, 27, 27, 1, 27, 27, 28, |
4947
|
|
|
|
|
|
|
27, 27, 27, 1, 27, 27, 31, 27, |
4948
|
|
|
|
|
|
|
27, 27, 30, 30, 30, 1, 27, 27, |
4949
|
|
|
|
|
|
|
28, 27, 27, 27, 1, 27, 27, 27, |
4950
|
|
|
|
|
|
|
28, 27, 27, 1, 27, 27, 27, 28, |
4951
|
|
|
|
|
|
|
27, 27, 1, 27, 27, 27, 32, 27, |
4952
|
|
|
|
|
|
|
27, 30, 30, 30, 1, 1, 33, 27, |
4953
|
|
|
|
|
|
|
27, 27, 28, 27, 27, 1, 34, 34, |
4954
|
|
|
|
|
|
|
34, 28, 34, 34, 1, 29, 1, 34, |
4955
|
|
|
|
|
|
|
34, 34, 34, 28, 34, 1, 27, 27, |
4956
|
|
|
|
|
|
|
27, 27, 28, 27, 1, 27, 27, 27, |
4957
|
|
|
|
|
|
|
27, 28, 27, 1, 27, 27, 27, 27, |
4958
|
|
|
|
|
|
|
31, 27, 30, 30, 30, 1, 27, 27, |
4959
|
|
|
|
|
|
|
27, 27, 28, 27, 1, 27, 27, 27, |
4960
|
|
|
|
|
|
|
27, 27, 31, 30, 30, 30, 1, 34, |
4961
|
|
|
|
|
|
|
34, 34, 34, 34, 28, 1, 34, 34, |
4962
|
|
|
|
|
|
|
34, 34, 34, 28, 1, 27, 27, 27, |
4963
|
|
|
|
|
|
|
27, 27, 28, 1, 27, 27, 27, 27, |
4964
|
|
|
|
|
|
|
27, 28, 1, 0 |
4965
|
|
|
|
|
|
|
}; |
4966
|
|
|
|
|
|
|
|
4967
|
|
|
|
|
|
|
static const char _JJR_RBR_trans_targs[] = { |
4968
|
|
|
|
|
|
|
2, 0, 3, 4, 5, 7, 8, 4, |
4969
|
|
|
|
|
|
|
9, 10, 11, 4, 12, 13, 14, 16, |
4970
|
|
|
|
|
|
|
17, 19, 20, 21, 22, 23, 24, 25, |
4971
|
|
|
|
|
|
|
26, 27, 28, 6, 4, 4, 4, 4, |
4972
|
|
|
|
|
|
|
15, 4, 18 |
4973
|
|
|
|
|
|
|
}; |
4974
|
|
|
|
|
|
|
|
4975
|
|
|
|
|
|
|
static const char _JJR_RBR_trans_actions[] = { |
4976
|
|
|
|
|
|
|
0, 0, 0, 9, 9, 9, 9, 17, |
4977
|
|
|
|
|
|
|
9, 9, 9, 14, 9, 9, 9, 9, |
4978
|
|
|
|
|
|
|
9, 9, 9, 9, 9, 9, 9, 9, |
4979
|
|
|
|
|
|
|
9, 9, 9, 7, 3, 5, 7, 11, |
4980
|
|
|
|
|
|
|
11, 1, 7 |
4981
|
|
|
|
|
|
|
}; |
4982
|
|
|
|
|
|
|
|
4983
|
|
|
|
|
|
|
static const int JJR_RBR_start = 1; |
4984
|
|
|
|
|
|
|
|
4985
|
0
|
|
|
|
|
|
void english_morpho_guesser::add_JJR_RBR(const string& form, unsigned negation_len, vector& lemmas) const { |
4986
|
0
|
|
|
|
|
|
const char* p = form.c_str() + negation_len; int cs; |
4987
|
|
|
|
|
|
|
char best = 'z'; unsigned remove = 0; const char* append = nullptr; |
4988
|
|
|
|
|
|
|
|
4989
|
|
|
|
|
|
|
{ |
4990
|
|
|
|
|
|
|
cs = JJR_RBR_start; |
4991
|
|
|
|
|
|
|
} |
4992
|
|
|
|
|
|
|
|
4993
|
|
|
|
|
|
|
{ |
4994
|
|
|
|
|
|
|
int _klen; |
4995
|
|
|
|
|
|
|
unsigned int _trans; |
4996
|
|
|
|
|
|
|
const char *_acts; |
4997
|
|
|
|
|
|
|
unsigned int _nacts; |
4998
|
|
|
|
|
|
|
const char *_keys; |
4999
|
|
|
|
|
|
|
|
5000
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
5001
|
|
|
|
|
|
|
goto _test_eof; |
5002
|
|
|
|
|
|
|
if ( cs == 0 ) |
5003
|
|
|
|
|
|
|
goto _out; |
5004
|
|
|
|
|
|
|
_resume: |
5005
|
0
|
|
|
|
|
|
_keys = _JJR_RBR_trans_keys + _JJR_RBR_key_offsets[cs]; |
5006
|
0
|
|
|
|
|
|
_trans = _JJR_RBR_index_offsets[cs]; |
5007
|
|
|
|
|
|
|
|
5008
|
0
|
|
|
|
|
|
_klen = _JJR_RBR_single_lengths[cs]; |
5009
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
5010
|
|
|
|
|
|
|
const char *_lower = _keys; |
5011
|
|
|
|
|
|
|
const char *_mid; |
5012
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
5013
|
|
|
|
|
|
|
while (1) { |
5014
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
5015
|
|
|
|
|
|
|
break; |
5016
|
|
|
|
|
|
|
|
5017
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
5018
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < *_mid ) |
5019
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
5020
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > *_mid ) |
5021
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
5022
|
|
|
|
|
|
|
else { |
5023
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
5024
|
0
|
|
|
|
|
|
goto _match; |
5025
|
|
|
|
|
|
|
} |
5026
|
|
|
|
|
|
|
} |
5027
|
0
|
|
|
|
|
|
_keys += _klen; |
5028
|
0
|
|
|
|
|
|
_trans += _klen; |
5029
|
|
|
|
|
|
|
} |
5030
|
|
|
|
|
|
|
|
5031
|
0
|
|
|
|
|
|
_klen = _JJR_RBR_range_lengths[cs]; |
5032
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
5033
|
|
|
|
|
|
|
const char *_lower = _keys; |
5034
|
|
|
|
|
|
|
const char *_mid; |
5035
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
5036
|
|
|
|
|
|
|
while (1) { |
5037
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
5038
|
|
|
|
|
|
|
break; |
5039
|
|
|
|
|
|
|
|
5040
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
5041
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < _mid[0] ) |
5042
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
5043
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > _mid[1] ) |
5044
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
5045
|
|
|
|
|
|
|
else { |
5046
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
5047
|
0
|
|
|
|
|
|
goto _match; |
5048
|
|
|
|
|
|
|
} |
5049
|
|
|
|
|
|
|
} |
5050
|
0
|
|
|
|
|
|
_trans += _klen; |
5051
|
|
|
|
|
|
|
} |
5052
|
|
|
|
|
|
|
|
5053
|
|
|
|
|
|
|
_match: |
5054
|
0
|
|
|
|
|
|
_trans = _JJR_RBR_indicies[_trans]; |
5055
|
0
|
|
|
|
|
|
cs = _JJR_RBR_trans_targs[_trans]; |
5056
|
|
|
|
|
|
|
|
5057
|
0
|
0
|
|
|
|
|
if ( _JJR_RBR_trans_actions[_trans] == 0 ) |
5058
|
|
|
|
|
|
|
goto _again; |
5059
|
|
|
|
|
|
|
|
5060
|
0
|
|
|
|
|
|
_acts = _JJR_RBR_actions + _JJR_RBR_trans_actions[_trans]; |
5061
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
5062
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
5063
|
|
|
|
|
|
|
{ |
5064
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
5065
|
|
|
|
|
|
|
{ |
5066
|
|
|
|
|
|
|
case 0: |
5067
|
0
|
0
|
|
|
|
|
{ if (best > 'a') best = 'a', remove = 2, append = nullptr; } |
5068
|
|
|
|
|
|
|
break; |
5069
|
|
|
|
|
|
|
case 1: |
5070
|
0
|
0
|
|
|
|
|
{ if (best > 'b') best = 'b', remove = 3, append = nullptr; } |
5071
|
|
|
|
|
|
|
break; |
5072
|
|
|
|
|
|
|
case 2: |
5073
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 3, append = "y"; } |
5074
|
|
|
|
|
|
|
break; |
5075
|
|
|
|
|
|
|
case 3: |
5076
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 2, append = nullptr; } |
5077
|
|
|
|
|
|
|
break; |
5078
|
|
|
|
|
|
|
case 4: |
5079
|
0
|
0
|
|
|
|
|
{ if (best > 'e') best = 'e', remove = 1, append = nullptr; } |
5080
|
|
|
|
|
|
|
break; |
5081
|
|
|
|
|
|
|
case 5: |
5082
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 2, append = nullptr; } |
5083
|
|
|
|
|
|
|
break; |
5084
|
|
|
|
|
|
|
} |
5085
|
|
|
|
|
|
|
} |
5086
|
|
|
|
|
|
|
|
5087
|
|
|
|
|
|
|
_again: |
5088
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
5089
|
|
|
|
|
|
|
goto _out; |
5090
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form.c_str() + form.size())) ) |
5091
|
|
|
|
|
|
|
goto _resume; |
5092
|
|
|
|
|
|
|
_test_eof: {} |
5093
|
|
|
|
|
|
|
_out: {} |
5094
|
|
|
|
|
|
|
} |
5095
|
|
|
|
|
|
|
|
5096
|
0
|
0
|
|
|
|
|
add(JJR, RBR, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5097
|
0
|
|
|
|
|
|
} |
5098
|
|
|
|
|
|
|
|
5099
|
|
|
|
|
|
|
static const char _JJS_RBS_actions[] = { |
5100
|
|
|
|
|
|
|
0, 1, 1, 1, 2, 1, 4, 1, |
5101
|
|
|
|
|
|
|
5, 2, 0, 5, 2, 1, 4, 2, |
5102
|
|
|
|
|
|
|
3, 5 |
5103
|
|
|
|
|
|
|
}; |
5104
|
|
|
|
|
|
|
|
5105
|
|
|
|
|
|
|
static const unsigned char _JJS_RBS_key_offsets[] = { |
5106
|
|
|
|
|
|
|
0, 0, 1, 2, 3, 25, 25, 25, |
5107
|
|
|
|
|
|
|
31, 44, 50, 56, 67, 73, 79, 85, |
5108
|
|
|
|
|
|
|
96, 102, 108, 114, 120, 126, 137, 143, |
5109
|
|
|
|
|
|
|
154, 160, 166, 172, 178, 178, 183, 183, |
5110
|
|
|
|
|
|
|
183, 184 |
5111
|
|
|
|
|
|
|
}; |
5112
|
|
|
|
|
|
|
|
5113
|
|
|
|
|
|
|
static const char _JJS_RBS_trans_keys[] = { |
5114
|
|
|
|
|
|
|
116, 115, 101, 98, 99, 100, 102, 103, |
5115
|
|
|
|
|
|
|
104, 105, 106, 107, 108, 109, 110, 112, |
5116
|
|
|
|
|
|
|
113, 114, 115, 116, 118, 119, 120, 121, |
5117
|
|
|
|
|
|
|
122, 97, 98, 101, 105, 111, 117, 98, |
5118
|
|
|
|
|
|
|
99, 100, 105, 111, 117, 122, 97, 101, |
5119
|
|
|
|
|
|
|
102, 109, 112, 120, 97, 100, 101, 105, |
5120
|
|
|
|
|
|
|
111, 117, 97, 101, 102, 105, 111, 117, |
5121
|
|
|
|
|
|
|
97, 101, 103, 105, 111, 117, 122, 98, |
5122
|
|
|
|
|
|
|
109, 112, 120, 97, 101, 104, 105, 111, |
5123
|
|
|
|
|
|
|
117, 97, 101, 105, 106, 111, 117, 97, |
5124
|
|
|
|
|
|
|
101, 105, 107, 111, 117, 97, 101, 105, |
5125
|
|
|
|
|
|
|
108, 111, 117, 122, 98, 109, 112, 120, |
5126
|
|
|
|
|
|
|
97, 101, 105, 109, 111, 117, 97, 101, |
5127
|
|
|
|
|
|
|
105, 110, 111, 117, 97, 101, 105, 111, |
5128
|
|
|
|
|
|
|
112, 117, 97, 101, 105, 111, 113, 117, |
5129
|
|
|
|
|
|
|
97, 101, 105, 111, 114, 117, 97, 101, |
5130
|
|
|
|
|
|
|
105, 111, 115, 117, 122, 98, 109, 112, |
5131
|
|
|
|
|
|
|
120, 97, 101, 105, 111, 116, 117, 97, |
5132
|
|
|
|
|
|
|
101, 105, 111, 117, 118, 122, 98, 109, |
5133
|
|
|
|
|
|
|
112, 120, 97, 101, 105, 111, 117, 119, |
5134
|
|
|
|
|
|
|
97, 101, 105, 111, 117, 120, 97, 101, |
5135
|
|
|
|
|
|
|
105, 111, 117, 121, 97, 101, 105, 111, |
5136
|
|
|
|
|
|
|
117, 122, 97, 101, 105, 111, 117, 101, |
5137
|
|
|
|
|
|
|
97, 122, 0 |
5138
|
|
|
|
|
|
|
}; |
5139
|
|
|
|
|
|
|
|
5140
|
|
|
|
|
|
|
static const char _JJS_RBS_single_lengths[] = { |
5141
|
|
|
|
|
|
|
0, 1, 1, 1, 22, 0, 0, 6, |
5142
|
|
|
|
|
|
|
7, 6, 6, 7, 6, 6, 6, 7, |
5143
|
|
|
|
|
|
|
6, 6, 6, 6, 6, 7, 6, 7, |
5144
|
|
|
|
|
|
|
6, 6, 6, 6, 0, 5, 0, 0, |
5145
|
|
|
|
|
|
|
1, 0 |
5146
|
|
|
|
|
|
|
}; |
5147
|
|
|
|
|
|
|
|
5148
|
|
|
|
|
|
|
static const char _JJS_RBS_range_lengths[] = { |
5149
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
5150
|
|
|
|
|
|
|
3, 0, 0, 2, 0, 0, 0, 2, |
5151
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 2, 0, 2, |
5152
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
5153
|
|
|
|
|
|
|
0, 1 |
5154
|
|
|
|
|
|
|
}; |
5155
|
|
|
|
|
|
|
|
5156
|
|
|
|
|
|
|
static const unsigned char _JJS_RBS_index_offsets[] = { |
5157
|
|
|
|
|
|
|
0, 0, 2, 4, 6, 29, 30, 31, |
5158
|
|
|
|
|
|
|
38, 49, 56, 63, 73, 80, 87, 94, |
5159
|
|
|
|
|
|
|
104, 111, 118, 125, 132, 139, 149, 156, |
5160
|
|
|
|
|
|
|
166, 173, 180, 187, 194, 195, 201, 202, |
5161
|
|
|
|
|
|
|
203, 205 |
5162
|
|
|
|
|
|
|
}; |
5163
|
|
|
|
|
|
|
|
5164
|
|
|
|
|
|
|
static const char _JJS_RBS_indicies[] = { |
5165
|
|
|
|
|
|
|
0, 1, 2, 1, 3, 1, 5, 6, |
5166
|
|
|
|
|
|
|
7, 8, 9, 10, 11, 12, 13, 14, |
5167
|
|
|
|
|
|
|
15, 16, 17, 18, 19, 20, 21, 22, |
5168
|
|
|
|
|
|
|
23, 24, 25, 26, 4, 27, 28, 29, |
5169
|
|
|
|
|
|
|
30, 29, 29, 29, 29, 27, 31, 32, |
5170
|
|
|
|
|
|
|
31, 29, 29, 29, 31, 29, 31, 31, |
5171
|
|
|
|
|
|
|
27, 29, 30, 29, 29, 29, 29, 27, |
5172
|
|
|
|
|
|
|
29, 29, 30, 29, 29, 29, 27, 29, |
5173
|
|
|
|
|
|
|
29, 32, 29, 29, 29, 31, 31, 31, |
5174
|
|
|
|
|
|
|
27, 29, 29, 30, 29, 29, 29, 27, |
5175
|
|
|
|
|
|
|
29, 29, 29, 30, 29, 29, 27, 29, |
5176
|
|
|
|
|
|
|
29, 29, 30, 29, 29, 27, 29, 29, |
5177
|
|
|
|
|
|
|
29, 33, 29, 29, 31, 31, 31, 27, |
5178
|
|
|
|
|
|
|
29, 29, 29, 30, 29, 29, 27, 34, |
5179
|
|
|
|
|
|
|
34, 34, 30, 34, 34, 27, 34, 34, |
5180
|
|
|
|
|
|
|
34, 34, 30, 34, 27, 29, 29, 29, |
5181
|
|
|
|
|
|
|
29, 30, 29, 27, 29, 29, 29, 29, |
5182
|
|
|
|
|
|
|
30, 29, 27, 29, 29, 29, 29, 32, |
5183
|
|
|
|
|
|
|
29, 31, 31, 31, 27, 29, 29, 29, |
5184
|
|
|
|
|
|
|
29, 30, 29, 27, 29, 29, 29, 29, |
5185
|
|
|
|
|
|
|
29, 32, 31, 31, 31, 27, 34, 34, |
5186
|
|
|
|
|
|
|
34, 34, 34, 30, 27, 34, 34, 34, |
5187
|
|
|
|
|
|
|
34, 34, 30, 27, 29, 29, 29, 29, |
5188
|
|
|
|
|
|
|
29, 30, 27, 29, 29, 29, 29, 29, |
5189
|
|
|
|
|
|
|
30, 27, 1, 35, 35, 35, 35, 35, |
5190
|
|
|
|
|
|
|
28, 28, 27, 28, 36, 35, 28, 0 |
5191
|
|
|
|
|
|
|
}; |
5192
|
|
|
|
|
|
|
|
5193
|
|
|
|
|
|
|
static const char _JJS_RBS_trans_targs[] = { |
5194
|
|
|
|
|
|
|
2, 0, 3, 4, 5, 7, 8, 9, |
5195
|
|
|
|
|
|
|
10, 11, 12, 31, 13, 14, 15, 16, |
5196
|
|
|
|
|
|
|
17, 18, 19, 20, 21, 22, 23, 24, |
5197
|
|
|
|
|
|
|
25, 26, 27, 6, 28, 29, 30, 30, |
5198
|
|
|
|
|
|
|
30, 32, 33, 28, 28 |
5199
|
|
|
|
|
|
|
}; |
5200
|
|
|
|
|
|
|
|
5201
|
|
|
|
|
|
|
static const char _JJS_RBS_trans_actions[] = { |
5202
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
5203
|
|
|
|
|
|
|
0, 0, 0, 3, 0, 0, 0, 0, |
5204
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
5205
|
|
|
|
|
|
|
0, 0, 0, 0, 7, 5, 1, 5, |
5206
|
|
|
|
|
|
|
12, 12, 5, 15, 9 |
5207
|
|
|
|
|
|
|
}; |
5208
|
|
|
|
|
|
|
|
5209
|
|
|
|
|
|
|
static const int JJS_RBS_start = 1; |
5210
|
|
|
|
|
|
|
|
5211
|
0
|
|
|
|
|
|
void english_morpho_guesser::add_JJS_RBS(const string& form, unsigned negation_len, vector& lemmas) const { |
5212
|
0
|
|
|
|
|
|
const char* p = form.c_str() + negation_len; int cs; |
5213
|
|
|
|
|
|
|
char best = 'z'; unsigned remove = 0; const char* append = nullptr; |
5214
|
|
|
|
|
|
|
|
5215
|
|
|
|
|
|
|
{ |
5216
|
|
|
|
|
|
|
cs = JJS_RBS_start; |
5217
|
|
|
|
|
|
|
} |
5218
|
|
|
|
|
|
|
|
5219
|
|
|
|
|
|
|
{ |
5220
|
|
|
|
|
|
|
int _klen; |
5221
|
|
|
|
|
|
|
unsigned int _trans; |
5222
|
|
|
|
|
|
|
const char *_acts; |
5223
|
|
|
|
|
|
|
unsigned int _nacts; |
5224
|
|
|
|
|
|
|
const char *_keys; |
5225
|
|
|
|
|
|
|
|
5226
|
0
|
0
|
|
|
|
|
if ( p == ( (form.c_str() + form.size())) ) |
5227
|
|
|
|
|
|
|
goto _test_eof; |
5228
|
|
|
|
|
|
|
if ( cs == 0 ) |
5229
|
|
|
|
|
|
|
goto _out; |
5230
|
|
|
|
|
|
|
_resume: |
5231
|
0
|
|
|
|
|
|
_keys = _JJS_RBS_trans_keys + _JJS_RBS_key_offsets[cs]; |
5232
|
0
|
|
|
|
|
|
_trans = _JJS_RBS_index_offsets[cs]; |
5233
|
|
|
|
|
|
|
|
5234
|
0
|
|
|
|
|
|
_klen = _JJS_RBS_single_lengths[cs]; |
5235
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
5236
|
|
|
|
|
|
|
const char *_lower = _keys; |
5237
|
|
|
|
|
|
|
const char *_mid; |
5238
|
0
|
|
|
|
|
|
const char *_upper = _keys + _klen - 1; |
5239
|
|
|
|
|
|
|
while (1) { |
5240
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
5241
|
|
|
|
|
|
|
break; |
5242
|
|
|
|
|
|
|
|
5243
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
5244
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < *_mid ) |
5245
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
5246
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > *_mid ) |
5247
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
5248
|
|
|
|
|
|
|
else { |
5249
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
5250
|
0
|
|
|
|
|
|
goto _match; |
5251
|
|
|
|
|
|
|
} |
5252
|
|
|
|
|
|
|
} |
5253
|
0
|
|
|
|
|
|
_keys += _klen; |
5254
|
0
|
|
|
|
|
|
_trans += _klen; |
5255
|
|
|
|
|
|
|
} |
5256
|
|
|
|
|
|
|
|
5257
|
0
|
|
|
|
|
|
_klen = _JJS_RBS_range_lengths[cs]; |
5258
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
5259
|
|
|
|
|
|
|
const char *_lower = _keys; |
5260
|
|
|
|
|
|
|
const char *_mid; |
5261
|
0
|
|
|
|
|
|
const char *_upper = _keys + (_klen<<1) - 2; |
5262
|
|
|
|
|
|
|
while (1) { |
5263
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
5264
|
|
|
|
|
|
|
break; |
5265
|
|
|
|
|
|
|
|
5266
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
5267
|
0
|
0
|
|
|
|
|
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < _mid[0] ) |
5268
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
5269
|
0
|
0
|
|
|
|
|
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > _mid[1] ) |
5270
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
5271
|
|
|
|
|
|
|
else { |
5272
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
5273
|
0
|
|
|
|
|
|
goto _match; |
5274
|
|
|
|
|
|
|
} |
5275
|
|
|
|
|
|
|
} |
5276
|
0
|
|
|
|
|
|
_trans += _klen; |
5277
|
|
|
|
|
|
|
} |
5278
|
|
|
|
|
|
|
|
5279
|
|
|
|
|
|
|
_match: |
5280
|
0
|
|
|
|
|
|
_trans = _JJS_RBS_indicies[_trans]; |
5281
|
0
|
|
|
|
|
|
cs = _JJS_RBS_trans_targs[_trans]; |
5282
|
|
|
|
|
|
|
|
5283
|
0
|
0
|
|
|
|
|
if ( _JJS_RBS_trans_actions[_trans] == 0 ) |
5284
|
|
|
|
|
|
|
goto _again; |
5285
|
|
|
|
|
|
|
|
5286
|
0
|
|
|
|
|
|
_acts = _JJS_RBS_actions + _JJS_RBS_trans_actions[_trans]; |
5287
|
0
|
|
|
|
|
|
_nacts = (unsigned int) *_acts++; |
5288
|
0
|
0
|
|
|
|
|
while ( _nacts-- > 0 ) |
5289
|
|
|
|
|
|
|
{ |
5290
|
0
|
|
|
|
|
|
switch ( *_acts++ ) |
5291
|
|
|
|
|
|
|
{ |
5292
|
|
|
|
|
|
|
case 0: |
5293
|
0
|
0
|
|
|
|
|
{ if (best > 'a') best = 'a', remove = 3, append = nullptr; } |
5294
|
|
|
|
|
|
|
break; |
5295
|
|
|
|
|
|
|
case 1: |
5296
|
0
|
0
|
|
|
|
|
{ if (best > 'b') best = 'b', remove = 4, append = nullptr; } |
5297
|
|
|
|
|
|
|
break; |
5298
|
|
|
|
|
|
|
case 2: |
5299
|
0
|
0
|
|
|
|
|
{ if (best > 'c') best = 'c', remove = 4, append = "y"; } |
5300
|
|
|
|
|
|
|
break; |
5301
|
|
|
|
|
|
|
case 3: |
5302
|
0
|
0
|
|
|
|
|
{ if (best > 'd') best = 'd', remove = 3, append = nullptr; } |
5303
|
|
|
|
|
|
|
break; |
5304
|
|
|
|
|
|
|
case 4: |
5305
|
0
|
0
|
|
|
|
|
{ if (best > 'e') best = 'e', remove = 2, append = nullptr; } |
5306
|
|
|
|
|
|
|
break; |
5307
|
|
|
|
|
|
|
case 5: |
5308
|
0
|
0
|
|
|
|
|
{ if (best > 'f') best = 'f', remove = 3, append = nullptr; } |
5309
|
|
|
|
|
|
|
break; |
5310
|
|
|
|
|
|
|
} |
5311
|
|
|
|
|
|
|
} |
5312
|
|
|
|
|
|
|
|
5313
|
|
|
|
|
|
|
_again: |
5314
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
5315
|
|
|
|
|
|
|
goto _out; |
5316
|
0
|
0
|
|
|
|
|
if ( ++p != ( (form.c_str() + form.size())) ) |
5317
|
|
|
|
|
|
|
goto _resume; |
5318
|
|
|
|
|
|
|
_test_eof: {} |
5319
|
|
|
|
|
|
|
_out: {} |
5320
|
|
|
|
|
|
|
} |
5321
|
|
|
|
|
|
|
|
5322
|
0
|
0
|
|
|
|
|
add(JJS, RBS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5323
|
0
|
|
|
|
|
|
} |
5324
|
|
|
|
|
|
|
|
5325
|
|
|
|
|
|
|
///////// |
5326
|
|
|
|
|
|
|
// File: morpho/external_morpho.h |
5327
|
|
|
|
|
|
|
///////// |
5328
|
|
|
|
|
|
|
|
5329
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
5330
|
|
|
|
|
|
|
// |
5331
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
5332
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
5333
|
|
|
|
|
|
|
// |
5334
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
5335
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
5336
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5337
|
|
|
|
|
|
|
|
5338
|
0
|
|
|
|
|
|
class external_morpho : public morpho { |
5339
|
|
|
|
|
|
|
public: |
5340
|
0
|
|
|
|
|
|
external_morpho(unsigned version) : version(version) {} |
5341
|
|
|
|
|
|
|
|
5342
|
|
|
|
|
|
|
virtual int analyze(string_piece form, morpho::guesser_mode guesser, vector& lemmas) const override; |
5343
|
|
|
|
|
|
|
virtual int generate(string_piece lemma, const char* tag_wildcard, guesser_mode guesser, vector& forms) const override; |
5344
|
|
|
|
|
|
|
virtual int raw_lemma_len(string_piece lemma) const override; |
5345
|
|
|
|
|
|
|
virtual int lemma_id_len(string_piece lemma) const override; |
5346
|
|
|
|
|
|
|
virtual int raw_form_len(string_piece form) const override; |
5347
|
|
|
|
|
|
|
virtual tokenizer* new_tokenizer() const override; |
5348
|
|
|
|
|
|
|
|
5349
|
|
|
|
|
|
|
bool load(istream& is); |
5350
|
|
|
|
|
|
|
|
5351
|
|
|
|
|
|
|
private: |
5352
|
|
|
|
|
|
|
unsigned version; |
5353
|
|
|
|
|
|
|
|
5354
|
|
|
|
|
|
|
string unknown_tag; |
5355
|
|
|
|
|
|
|
}; |
5356
|
|
|
|
|
|
|
|
5357
|
|
|
|
|
|
|
///////// |
5358
|
|
|
|
|
|
|
// File: tokenizer/generic_tokenizer.h |
5359
|
|
|
|
|
|
|
///////// |
5360
|
|
|
|
|
|
|
|
5361
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
5362
|
|
|
|
|
|
|
// |
5363
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
5364
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
5365
|
|
|
|
|
|
|
// |
5366
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
5367
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
5368
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5369
|
|
|
|
|
|
|
|
5370
|
4
|
|
|
|
|
|
class generic_tokenizer : public ragel_tokenizer { |
5371
|
|
|
|
|
|
|
public: |
5372
|
|
|
|
|
|
|
enum { LATEST = 2 }; |
5373
|
|
|
|
|
|
|
generic_tokenizer(unsigned version); |
5374
|
|
|
|
|
|
|
|
5375
|
|
|
|
|
|
|
virtual bool next_sentence(vector& tokens) override; |
5376
|
|
|
|
|
|
|
}; |
5377
|
|
|
|
|
|
|
|
5378
|
|
|
|
|
|
|
///////// |
5379
|
|
|
|
|
|
|
// File: morpho/external_morpho.cpp |
5380
|
|
|
|
|
|
|
///////// |
5381
|
|
|
|
|
|
|
|
5382
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
5383
|
|
|
|
|
|
|
// |
5384
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
5385
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
5386
|
|
|
|
|
|
|
// |
5387
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
5388
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
5389
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5390
|
|
|
|
|
|
|
|
5391
|
0
|
|
|
|
|
|
bool external_morpho::load(istream& is) { |
5392
|
|
|
|
|
|
|
binary_decoder data; |
5393
|
0
|
0
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
0
|
|
|
|
|
|
5394
|
|
|
|
|
|
|
|
5395
|
|
|
|
|
|
|
try { |
5396
|
|
|
|
|
|
|
// Load unknown_tag |
5397
|
0
|
0
|
|
|
|
|
unsigned length = data.next_1B(); |
5398
|
0
|
0
|
|
|
|
|
unknown_tag.assign(data.next(length), length); |
|
|
0
|
|
|
|
|
|
5399
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
5400
|
|
|
|
|
|
|
return false; |
5401
|
|
|
|
|
|
|
} |
5402
|
|
|
|
|
|
|
|
5403
|
0
|
|
|
|
|
|
return data.is_end(); |
5404
|
|
|
|
|
|
|
} |
5405
|
|
|
|
|
|
|
|
5406
|
0
|
|
|
|
|
|
int external_morpho::analyze(string_piece form, guesser_mode /*guesser*/, vector& lemmas) const { |
5407
|
|
|
|
|
|
|
lemmas.clear(); |
5408
|
|
|
|
|
|
|
|
5409
|
0
|
0
|
|
|
|
|
if (form.len) { |
5410
|
|
|
|
|
|
|
// Start by skipping the first form |
5411
|
|
|
|
|
|
|
string_piece lemmatags = form; |
5412
|
0
|
0
|
|
|
|
|
while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++; |
|
|
0
|
|
|
|
|
|
5413
|
0
|
0
|
|
|
|
|
if (lemmatags.len) lemmatags.len--, lemmatags.str++; |
5414
|
|
|
|
|
|
|
|
5415
|
|
|
|
|
|
|
// Split lemmatags using ' ' into lemma-tag pairs. |
5416
|
0
|
0
|
|
|
|
|
while (lemmatags.len) { |
5417
|
|
|
|
|
|
|
auto lemma_start = lemmatags.str; |
5418
|
0
|
0
|
|
|
|
|
while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++; |
|
|
0
|
|
|
|
|
|
5419
|
0
|
0
|
|
|
|
|
if (!lemmatags.len) break; |
5420
|
|
|
|
|
|
|
auto lemma_len = lemmatags.str - lemma_start; |
5421
|
0
|
|
|
|
|
|
lemmatags.len--, lemmatags.str++; |
5422
|
|
|
|
|
|
|
|
5423
|
|
|
|
|
|
|
auto tag_start = lemmatags.str; |
5424
|
0
|
0
|
|
|
|
|
while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++; |
|
|
0
|
|
|
|
|
|
5425
|
|
|
|
|
|
|
auto tag_len = lemmatags.str - tag_start; |
5426
|
0
|
0
|
|
|
|
|
if (lemmatags.len) lemmatags.len--, lemmatags.str++; |
5427
|
|
|
|
|
|
|
|
5428
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(lemma_start, lemma_len), string(tag_start, tag_len)); |
5429
|
|
|
|
|
|
|
} |
5430
|
|
|
|
|
|
|
|
5431
|
0
|
0
|
|
|
|
|
if (!lemmas.empty()) return NO_GUESSER; |
5432
|
|
|
|
|
|
|
} |
5433
|
|
|
|
|
|
|
|
5434
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), unknown_tag); |
5435
|
0
|
|
|
|
|
|
return -1; |
5436
|
|
|
|
|
|
|
} |
5437
|
|
|
|
|
|
|
|
5438
|
0
|
|
|
|
|
|
int external_morpho::generate(string_piece lemma, const char* tag_wildcard, morpho::guesser_mode /*guesser*/, vector& forms) const { |
5439
|
|
|
|
|
|
|
forms.clear(); |
5440
|
|
|
|
|
|
|
|
5441
|
0
|
|
|
|
|
|
tag_filter filter(tag_wildcard); |
5442
|
|
|
|
|
|
|
|
5443
|
0
|
0
|
|
|
|
|
if (lemma.len) { |
5444
|
|
|
|
|
|
|
// Start by locating the lemma |
5445
|
|
|
|
|
|
|
string_piece formtags = lemma; |
5446
|
0
|
0
|
|
|
|
|
while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++; |
|
|
0
|
|
|
|
|
|
5447
|
0
|
|
|
|
|
|
string_piece real_lemma(lemma.str, lemma.len - formtags.len); |
5448
|
0
|
0
|
|
|
|
|
if (formtags.len) formtags.len--, formtags.str++; |
5449
|
|
|
|
|
|
|
|
5450
|
|
|
|
|
|
|
// Split formtags using ' ' into form-tag pairs. |
5451
|
|
|
|
|
|
|
bool any_result = false; |
5452
|
0
|
0
|
|
|
|
|
while (formtags.len) { |
5453
|
|
|
|
|
|
|
auto form_start = formtags.str; |
5454
|
0
|
0
|
|
|
|
|
while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++; |
|
|
0
|
|
|
|
|
|
5455
|
0
|
0
|
|
|
|
|
if (!formtags.len) break; |
5456
|
|
|
|
|
|
|
auto form_len = formtags.str - form_start; |
5457
|
0
|
|
|
|
|
|
formtags.len--, formtags.str++; |
5458
|
|
|
|
|
|
|
|
5459
|
|
|
|
|
|
|
auto tag_start = formtags.str; |
5460
|
0
|
0
|
|
|
|
|
while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++; |
|
|
0
|
|
|
|
|
|
5461
|
|
|
|
|
|
|
auto tag_len = formtags.str - tag_start; |
5462
|
0
|
0
|
|
|
|
|
if (formtags.len) formtags.len--, formtags.str++; |
5463
|
|
|
|
|
|
|
|
5464
|
|
|
|
|
|
|
any_result = true; |
5465
|
|
|
|
|
|
|
string tag(tag_start, tag_len); |
5466
|
0
|
0
|
|
|
|
|
if (filter.matches(tag.c_str())) { |
5467
|
0
|
0
|
|
|
|
|
if (forms.empty()) forms.emplace_back(string(real_lemma.str, real_lemma.len)); |
|
|
0
|
|
|
|
|
|
5468
|
0
|
0
|
|
|
|
|
forms.back().forms.emplace_back(string(form_start, form_len), tag); |
5469
|
|
|
|
|
|
|
} |
5470
|
|
|
|
|
|
|
} |
5471
|
|
|
|
|
|
|
|
5472
|
0
|
0
|
|
|
|
|
if (any_result) return NO_GUESSER; |
5473
|
|
|
|
|
|
|
} |
5474
|
|
|
|
|
|
|
|
5475
|
|
|
|
|
|
|
return -1; |
5476
|
|
|
|
|
|
|
} |
5477
|
|
|
|
|
|
|
|
5478
|
0
|
|
|
|
|
|
int external_morpho::raw_lemma_len(string_piece lemma) const { |
5479
|
|
|
|
|
|
|
unsigned lemma_len = 0; |
5480
|
0
|
0
|
|
|
|
|
while (lemma_len < lemma.len && lemma.str[lemma_len] != ' ') lemma_len++; |
|
|
0
|
|
|
|
|
|
5481
|
0
|
|
|
|
|
|
return lemma_len; |
5482
|
|
|
|
|
|
|
} |
5483
|
|
|
|
|
|
|
|
5484
|
0
|
|
|
|
|
|
int external_morpho::lemma_id_len(string_piece lemma) const { |
5485
|
|
|
|
|
|
|
unsigned lemma_len = 0; |
5486
|
0
|
0
|
|
|
|
|
while (lemma_len < lemma.len && lemma.str[lemma_len] != ' ') lemma_len++; |
|
|
0
|
|
|
|
|
|
5487
|
0
|
|
|
|
|
|
return lemma_len; |
5488
|
|
|
|
|
|
|
} |
5489
|
|
|
|
|
|
|
|
5490
|
0
|
|
|
|
|
|
int external_morpho::raw_form_len(string_piece form) const { |
5491
|
|
|
|
|
|
|
unsigned form_len = 0; |
5492
|
0
|
0
|
|
|
|
|
while (form_len < form.len && form.str[form_len] != ' ') form_len++; |
|
|
0
|
|
|
|
|
|
5493
|
0
|
|
|
|
|
|
return form_len; |
5494
|
|
|
|
|
|
|
} |
5495
|
|
|
|
|
|
|
|
5496
|
0
|
|
|
|
|
|
tokenizer* external_morpho::new_tokenizer() const { |
5497
|
0
|
|
|
|
|
|
return new generic_tokenizer(version); |
5498
|
|
|
|
|
|
|
} |
5499
|
|
|
|
|
|
|
|
5500
|
|
|
|
|
|
|
///////// |
5501
|
|
|
|
|
|
|
// File: morpho/generic_lemma_addinfo.h |
5502
|
|
|
|
|
|
|
///////// |
5503
|
|
|
|
|
|
|
|
5504
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
5505
|
|
|
|
|
|
|
// |
5506
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
5507
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
5508
|
|
|
|
|
|
|
// |
5509
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
5510
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
5511
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5512
|
|
|
|
|
|
|
|
5513
|
|
|
|
|
|
|
// Declarations |
5514
|
4
|
|
|
|
|
|
struct generic_lemma_addinfo { |
5515
|
|
|
|
|
|
|
inline static int raw_lemma_len(string_piece lemma); |
5516
|
|
|
|
|
|
|
inline static int lemma_id_len(string_piece lemma); |
5517
|
|
|
|
|
|
|
inline static string format(const unsigned char* addinfo, int addinfo_len); |
5518
|
|
|
|
|
|
|
inline static bool generatable(const unsigned char* addinfo, int addinfo_len); |
5519
|
|
|
|
|
|
|
|
5520
|
|
|
|
|
|
|
inline int parse(string_piece lemma, bool die_on_failure = false); |
5521
|
|
|
|
|
|
|
inline bool match_lemma_id(const unsigned char* other_addinfo, int other_addinfo_len); |
5522
|
|
|
|
|
|
|
|
5523
|
|
|
|
|
|
|
vector data; |
5524
|
|
|
|
|
|
|
}; |
5525
|
|
|
|
|
|
|
|
5526
|
|
|
|
|
|
|
// Definitions |
5527
|
|
|
|
|
|
|
int generic_lemma_addinfo::raw_lemma_len(string_piece lemma) { |
5528
|
0
|
|
|
|
|
|
return lemma.len; |
5529
|
|
|
|
|
|
|
} |
5530
|
|
|
|
|
|
|
|
5531
|
|
|
|
|
|
|
int generic_lemma_addinfo::lemma_id_len(string_piece lemma) { |
5532
|
0
|
|
|
|
|
|
return lemma.len; |
5533
|
|
|
|
|
|
|
} |
5534
|
|
|
|
|
|
|
|
5535
|
|
|
|
|
|
|
string generic_lemma_addinfo::format(const unsigned char* /*addinfo*/, int /*addinfo_len*/) { |
5536
|
|
|
|
|
|
|
return string(); |
5537
|
|
|
|
|
|
|
} |
5538
|
|
|
|
|
|
|
|
5539
|
|
|
|
|
|
|
bool generic_lemma_addinfo::generatable(const unsigned char* /*addinfo*/, int /*addinfo_len*/) { |
5540
|
|
|
|
|
|
|
return true; |
5541
|
|
|
|
|
|
|
} |
5542
|
|
|
|
|
|
|
|
5543
|
|
|
|
|
|
|
int generic_lemma_addinfo::parse(string_piece lemma, bool /*die_on_failure*/) { |
5544
|
4
|
|
|
|
|
|
return lemma.len; |
5545
|
|
|
|
|
|
|
} |
5546
|
|
|
|
|
|
|
|
5547
|
|
|
|
|
|
|
bool generic_lemma_addinfo::match_lemma_id(const unsigned char* /*other_addinfo*/, int /*other_addinfo_len*/) { |
5548
|
|
|
|
|
|
|
return true; |
5549
|
|
|
|
|
|
|
} |
5550
|
|
|
|
|
|
|
|
5551
|
|
|
|
|
|
|
///////// |
5552
|
|
|
|
|
|
|
// File: morpho/generic_morpho.h |
5553
|
|
|
|
|
|
|
///////// |
5554
|
|
|
|
|
|
|
|
5555
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
5556
|
|
|
|
|
|
|
// |
5557
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
5558
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
5559
|
|
|
|
|
|
|
// |
5560
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
5561
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
5562
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5563
|
|
|
|
|
|
|
|
5564
|
0
|
|
|
|
|
|
class generic_morpho : public morpho { |
5565
|
|
|
|
|
|
|
public: |
5566
|
2
|
|
|
|
|
|
generic_morpho(unsigned version) : version(version) {} |
5567
|
|
|
|
|
|
|
|
5568
|
|
|
|
|
|
|
virtual int analyze(string_piece form, morpho::guesser_mode guesser, vector& lemmas) const override; |
5569
|
|
|
|
|
|
|
virtual int generate(string_piece lemma, const char* tag_wildcard, guesser_mode guesser, vector& forms) const override; |
5570
|
|
|
|
|
|
|
virtual int raw_lemma_len(string_piece lemma) const override; |
5571
|
|
|
|
|
|
|
virtual int lemma_id_len(string_piece lemma) const override; |
5572
|
|
|
|
|
|
|
virtual int raw_form_len(string_piece form) const override; |
5573
|
|
|
|
|
|
|
virtual tokenizer* new_tokenizer() const override; |
5574
|
|
|
|
|
|
|
|
5575
|
|
|
|
|
|
|
bool load(istream& is); |
5576
|
|
|
|
|
|
|
private: |
5577
|
|
|
|
|
|
|
inline void analyze_special(string_piece form, vector& lemmas) const; |
5578
|
|
|
|
|
|
|
|
5579
|
|
|
|
|
|
|
unsigned version; |
5580
|
|
|
|
|
|
|
morpho_dictionary dictionary; |
5581
|
|
|
|
|
|
|
unique_ptr statistical_guesser; |
5582
|
|
|
|
|
|
|
|
5583
|
|
|
|
|
|
|
string unknown_tag, number_tag, punctuation_tag, symbol_tag; |
5584
|
|
|
|
|
|
|
}; |
5585
|
|
|
|
|
|
|
|
5586
|
|
|
|
|
|
|
///////// |
5587
|
|
|
|
|
|
|
// File: morpho/generic_morpho.cpp |
5588
|
|
|
|
|
|
|
///////// |
5589
|
|
|
|
|
|
|
|
5590
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
5591
|
|
|
|
|
|
|
// |
5592
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
5593
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
5594
|
|
|
|
|
|
|
// |
5595
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
5596
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
5597
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5598
|
|
|
|
|
|
|
|
5599
|
2
|
|
|
|
|
|
bool generic_morpho::load(istream& is) { |
5600
|
|
|
|
|
|
|
binary_decoder data; |
5601
|
2
|
50
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
50
|
|
|
|
|
|
5602
|
|
|
|
|
|
|
|
5603
|
|
|
|
|
|
|
try { |
5604
|
|
|
|
|
|
|
// Load tags |
5605
|
2
|
50
|
|
|
|
|
unsigned length = data.next_1B(); |
5606
|
2
|
50
|
|
|
|
|
unknown_tag.assign(data.next(length), length); |
5607
|
2
|
50
|
|
|
|
|
length = data.next_1B(); |
5608
|
2
|
50
|
|
|
|
|
number_tag.assign(data.next(length), length); |
5609
|
2
|
50
|
|
|
|
|
length = data.next_1B(); |
5610
|
2
|
50
|
|
|
|
|
punctuation_tag.assign(data.next(length), length); |
5611
|
2
|
50
|
|
|
|
|
length = data.next_1B(); |
5612
|
2
|
50
|
|
|
|
|
symbol_tag.assign(data.next(length), length); |
5613
|
|
|
|
|
|
|
|
5614
|
|
|
|
|
|
|
// Load dictionary |
5615
|
2
|
50
|
|
|
|
|
dictionary.load(data); |
5616
|
|
|
|
|
|
|
|
5617
|
|
|
|
|
|
|
// Optionally statistical guesser if present |
5618
|
|
|
|
|
|
|
statistical_guesser.reset(); |
5619
|
2
|
50
|
|
|
|
|
if (data.next_1B()) { |
|
|
50
|
|
|
|
|
|
5620
|
0
|
0
|
|
|
|
|
statistical_guesser.reset(new morpho_statistical_guesser()); |
5621
|
0
|
0
|
|
|
|
|
statistical_guesser->load(data); |
5622
|
|
0
|
|
|
|
|
} |
5623
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
5624
|
|
|
|
|
|
|
return false; |
5625
|
|
|
|
|
|
|
} |
5626
|
|
|
|
|
|
|
|
5627
|
2
|
|
|
|
|
|
return data.is_end(); |
5628
|
|
|
|
|
|
|
} |
5629
|
|
|
|
|
|
|
|
5630
|
13
|
|
|
|
|
|
int generic_morpho::analyze(string_piece form, guesser_mode guesser, vector& lemmas) const { |
5631
|
|
|
|
|
|
|
lemmas.clear(); |
5632
|
|
|
|
|
|
|
|
5633
|
13
|
50
|
|
|
|
|
if (form.len) { |
5634
|
|
|
|
|
|
|
// Generate all casing variants if needed (they are different than given form). |
5635
|
|
|
|
|
|
|
string form_uclc; // first uppercase, rest lowercase |
5636
|
|
|
|
|
|
|
string form_lc; // all lowercase |
5637
|
13
|
50
|
|
|
|
|
generate_casing_variants(form, form_uclc, form_lc); |
5638
|
|
|
|
|
|
|
|
5639
|
|
|
|
|
|
|
// Start by analysing using the dictionary and all casing variants. |
5640
|
13
|
50
|
|
|
|
|
dictionary.analyze(form, lemmas); |
5641
|
13
|
50
|
|
|
|
|
if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas); |
|
|
0
|
|
|
|
|
|
5642
|
13
|
100
|
|
|
|
|
if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas); |
|
|
50
|
|
|
|
|
|
5643
|
13
|
100
|
|
|
|
|
if (!lemmas.empty()) return NO_GUESSER; |
5644
|
|
|
|
|
|
|
|
5645
|
|
|
|
|
|
|
// Then call analyze_special to handle numbers, punctuation and symbols. |
5646
|
7
|
50
|
|
|
|
|
analyze_special(form, lemmas); |
5647
|
7
|
100
|
|
|
|
|
if (!lemmas.empty()) return NO_GUESSER; |
5648
|
|
|
|
|
|
|
|
5649
|
|
|
|
|
|
|
// For the statistical guesser, use all casing variants. |
5650
|
1
|
50
|
|
|
|
|
if (guesser == GUESSER && statistical_guesser) { |
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
5651
|
0
|
0
|
|
|
|
|
if (form_uclc.empty() && form_lc.empty()) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5652
|
0
|
0
|
|
|
|
|
statistical_guesser->analyze(form, lemmas, nullptr); |
5653
|
|
|
|
|
|
|
else { |
5654
|
0
|
0
|
|
|
|
|
morpho_statistical_guesser::used_rules used_rules; used_rules.reserve(3); |
5655
|
0
|
0
|
|
|
|
|
statistical_guesser->analyze(form, lemmas, &used_rules); |
5656
|
0
|
0
|
|
|
|
|
if (!form_uclc.empty()) statistical_guesser->analyze(form_uclc, lemmas, &used_rules); |
|
|
0
|
|
|
|
|
|
5657
|
0
|
0
|
|
|
|
|
if (!form_lc.empty()) statistical_guesser->analyze(form_lc, lemmas, &used_rules); |
|
|
0
|
|
|
|
|
|
5658
|
|
|
|
|
|
|
} |
5659
|
|
|
|
|
|
|
} |
5660
|
1
|
50
|
|
|
|
|
if (!lemmas.empty()) return GUESSER; |
5661
|
|
|
|
|
|
|
} |
5662
|
|
|
|
|
|
|
|
5663
|
1
|
50
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), unknown_tag); |
5664
|
13
|
|
|
|
|
|
return -1; |
5665
|
|
|
|
|
|
|
} |
5666
|
|
|
|
|
|
|
|
5667
|
4
|
|
|
|
|
|
int generic_morpho::generate(string_piece lemma, const char* tag_wildcard, morpho::guesser_mode /*guesser*/, vector& forms) const { |
5668
|
|
|
|
|
|
|
forms.clear(); |
5669
|
|
|
|
|
|
|
|
5670
|
8
|
|
|
|
|
|
tag_filter filter(tag_wildcard); |
5671
|
|
|
|
|
|
|
|
5672
|
4
|
50
|
|
|
|
|
if (lemma.len) { |
5673
|
4
|
50
|
|
|
|
|
if (dictionary.generate(lemma, filter, forms)) |
|
|
50
|
|
|
|
|
|
5674
|
|
|
|
|
|
|
return NO_GUESSER; |
5675
|
|
|
|
|
|
|
} |
5676
|
|
|
|
|
|
|
|
5677
|
|
|
|
|
|
|
return -1; |
5678
|
|
|
|
|
|
|
} |
5679
|
|
|
|
|
|
|
|
5680
|
0
|
|
|
|
|
|
int generic_morpho::raw_lemma_len(string_piece lemma) const { |
5681
|
0
|
|
|
|
|
|
return generic_lemma_addinfo::raw_lemma_len(lemma); |
5682
|
|
|
|
|
|
|
} |
5683
|
|
|
|
|
|
|
|
5684
|
0
|
|
|
|
|
|
int generic_morpho::lemma_id_len(string_piece lemma) const { |
5685
|
0
|
|
|
|
|
|
return generic_lemma_addinfo::lemma_id_len(lemma); |
5686
|
|
|
|
|
|
|
} |
5687
|
|
|
|
|
|
|
|
5688
|
6
|
|
|
|
|
|
int generic_morpho::raw_form_len(string_piece form) const { |
5689
|
6
|
|
|
|
|
|
return form.len; |
5690
|
|
|
|
|
|
|
} |
5691
|
|
|
|
|
|
|
|
5692
|
2
|
|
|
|
|
|
tokenizer* generic_morpho::new_tokenizer() const { |
5693
|
4
|
|
|
|
|
|
return new generic_tokenizer(version); |
5694
|
|
|
|
|
|
|
} |
5695
|
|
|
|
|
|
|
|
5696
|
7
|
|
|
|
|
|
void generic_morpho::analyze_special(string_piece form, vector& lemmas) const { |
5697
|
|
|
|
|
|
|
using namespace unilib; |
5698
|
|
|
|
|
|
|
|
5699
|
|
|
|
|
|
|
// Analyzer for numbers, punctuation and symbols. |
5700
|
|
|
|
|
|
|
// Number is anything matching [+-]? is_Pn* ([.,] is_Pn*)? ([Ee] [+-]? is_Pn+)? for at least one is_Pn* nonempty. |
5701
|
|
|
|
|
|
|
// Punctuation is any form beginning with either unicode punctuation or punctuation_exceptions character. |
5702
|
|
|
|
|
|
|
// Beware that numbers takes precedence, so - is punctuation, -3 is number, -. is punctuation, -.3 is number. |
5703
|
14
|
50
|
|
|
|
|
if (!form.len) return; |
5704
|
|
|
|
|
|
|
|
5705
|
7
|
|
|
|
|
|
string_piece number = form; |
5706
|
7
|
|
|
|
|
|
char32_t first = utf8::decode(number.str, number.len); |
5707
|
|
|
|
|
|
|
|
5708
|
|
|
|
|
|
|
// Try matching a number. |
5709
|
|
|
|
|
|
|
char32_t codepoint = first; |
5710
|
|
|
|
|
|
|
bool any_digit = false; |
5711
|
7
|
50
|
|
|
|
|
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len); |
5712
|
8
|
100
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
5713
|
7
|
100
|
|
|
|
|
if ((codepoint == '.' && number.len) || codepoint == ',') codepoint = utf8::decode(number.str, number.len); |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
5714
|
8
|
100
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
5715
|
7
|
100
|
|
|
|
|
if (any_digit && (codepoint == 'e' || codepoint == 'E')) { |
|
|
50
|
|
|
|
|
|
5716
|
1
|
|
|
|
|
|
codepoint = utf8::decode(number.str, number.len); |
5717
|
1
|
50
|
|
|
|
|
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len); |
5718
|
|
|
|
|
|
|
any_digit = false; |
5719
|
3
|
100
|
|
|
|
|
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
5720
|
|
|
|
|
|
|
} |
5721
|
|
|
|
|
|
|
|
5722
|
7
|
100
|
|
|
|
|
if (any_digit && !number.len && (!codepoint || codepoint == '.')) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
5723
|
1
|
50
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), number_tag); |
5724
|
1
|
|
|
|
|
|
return; |
5725
|
|
|
|
|
|
|
} |
5726
|
|
|
|
|
|
|
|
5727
|
|
|
|
|
|
|
// Try matching punctuation or symbol. |
5728
|
|
|
|
|
|
|
bool punctuation = true, symbol = true; |
5729
|
18
|
|
|
|
|
|
string_piece form_ori = form; |
5730
|
18
|
100
|
|
|
|
|
while (form.len) { |
5731
|
12
|
|
|
|
|
|
codepoint = utf8::decode(form.str, form.len); |
5732
|
18
|
100
|
|
|
|
|
punctuation = punctuation && unicode::category(codepoint) & unicode::P; |
|
|
100
|
|
|
|
|
|
5733
|
18
|
100
|
|
|
|
|
symbol = symbol && unicode::category(codepoint) & unicode::S; |
|
|
100
|
|
|
|
|
|
5734
|
|
|
|
|
|
|
} |
5735
|
6
|
100
|
|
|
|
|
if (punctuation) |
5736
|
8
|
50
|
|
|
|
|
lemmas.emplace_back(string(form_ori.str, form_ori.len), punctuation_tag); |
5737
|
2
|
100
|
|
|
|
|
else if (symbol) |
5738
|
7
|
50
|
|
|
|
|
lemmas.emplace_back(string(form_ori.str, form_ori.len), symbol_tag); |
5739
|
|
|
|
|
|
|
} |
5740
|
|
|
|
|
|
|
|
5741
|
|
|
|
|
|
|
///////// |
5742
|
|
|
|
|
|
|
// File: morpho/morpho_ids.h |
5743
|
|
|
|
|
|
|
///////// |
5744
|
|
|
|
|
|
|
|
5745
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
5746
|
|
|
|
|
|
|
// |
5747
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
5748
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
5749
|
|
|
|
|
|
|
// |
5750
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
5751
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
5752
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5753
|
|
|
|
|
|
|
|
5754
|
|
|
|
|
|
|
class morpho_ids { |
5755
|
|
|
|
|
|
|
public: |
5756
|
|
|
|
|
|
|
enum morpho_id { |
5757
|
|
|
|
|
|
|
CZECH = 0, |
5758
|
|
|
|
|
|
|
ENGLISH_V1 = 1, |
5759
|
|
|
|
|
|
|
GENERIC = 2, |
5760
|
|
|
|
|
|
|
EXTERNAL = 3, |
5761
|
|
|
|
|
|
|
ENGLISH_V2 = 4, |
5762
|
|
|
|
|
|
|
ENGLISH_V3 = 5, ENGLISH = ENGLISH_V3, |
5763
|
|
|
|
|
|
|
SLOVAK_PDT = 6, |
5764
|
|
|
|
|
|
|
DERIVATOR_DICTIONARY = 7, |
5765
|
|
|
|
|
|
|
}; |
5766
|
|
|
|
|
|
|
|
5767
|
|
|
|
|
|
|
static bool parse(const string& str, morpho_id& id) { |
5768
|
|
|
|
|
|
|
if (str == "czech") return id = CZECH, true; |
5769
|
|
|
|
|
|
|
if (str == "english") return id = ENGLISH, true; |
5770
|
|
|
|
|
|
|
if (str == "external") return id = EXTERNAL, true; |
5771
|
|
|
|
|
|
|
if (str == "generic") return id = GENERIC, true; |
5772
|
|
|
|
|
|
|
if (str == "slovak_pdt") return id = SLOVAK_PDT, true; |
5773
|
|
|
|
|
|
|
return false; |
5774
|
|
|
|
|
|
|
} |
5775
|
|
|
|
|
|
|
}; |
5776
|
|
|
|
|
|
|
|
5777
|
|
|
|
|
|
|
typedef morpho_ids::morpho_id morpho_id; |
5778
|
|
|
|
|
|
|
|
5779
|
|
|
|
|
|
|
///////// |
5780
|
|
|
|
|
|
|
// File: utils/new_unique_ptr.h |
5781
|
|
|
|
|
|
|
///////// |
5782
|
|
|
|
|
|
|
|
5783
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
5784
|
|
|
|
|
|
|
// |
5785
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
5786
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
5787
|
|
|
|
|
|
|
// |
5788
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
5789
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
5790
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5791
|
|
|
|
|
|
|
|
5792
|
|
|
|
|
|
|
namespace utils { |
5793
|
|
|
|
|
|
|
|
5794
|
|
|
|
|
|
|
template |
5795
|
3
|
|
|
|
|
|
unique_ptr new_unique_ptr(Args&&... args) { |
5796
|
3
|
0
|
|
|
|
|
return unique_ptr(new T(std::forward(args)...)); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
5797
|
|
|
|
|
|
|
} |
5798
|
|
|
|
|
|
|
|
5799
|
|
|
|
|
|
|
} // namespace utils |
5800
|
|
|
|
|
|
|
|
5801
|
|
|
|
|
|
|
///////// |
5802
|
|
|
|
|
|
|
// File: utils/path_from_utf8.h |
5803
|
|
|
|
|
|
|
///////// |
5804
|
|
|
|
|
|
|
|
5805
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
5806
|
|
|
|
|
|
|
// |
5807
|
|
|
|
|
|
|
// Copyright 2022 Institute of Formal and Applied Linguistics, Faculty of |
5808
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
5809
|
|
|
|
|
|
|
// |
5810
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
5811
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
5812
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5813
|
|
|
|
|
|
|
|
5814
|
|
|
|
|
|
|
namespace utils { |
5815
|
|
|
|
|
|
|
|
5816
|
|
|
|
|
|
|
// |
5817
|
|
|
|
|
|
|
// Declarations |
5818
|
|
|
|
|
|
|
// |
5819
|
|
|
|
|
|
|
|
5820
|
|
|
|
|
|
|
#ifdef _WIN32 |
5821
|
|
|
|
|
|
|
inline wstring path_from_utf8(const char* str); |
5822
|
|
|
|
|
|
|
inline wstring path_from_utf8(const string& str); |
5823
|
|
|
|
|
|
|
#else |
5824
|
|
|
|
|
|
|
inline string path_from_utf8(const char* str); |
5825
|
|
|
|
|
|
|
inline const string& path_from_utf8(const string& str); |
5826
|
|
|
|
|
|
|
#endif |
5827
|
|
|
|
|
|
|
|
5828
|
|
|
|
|
|
|
// |
5829
|
|
|
|
|
|
|
// Definitions |
5830
|
|
|
|
|
|
|
// |
5831
|
|
|
|
|
|
|
|
5832
|
|
|
|
|
|
|
#ifdef _WIN32 |
5833
|
|
|
|
|
|
|
|
5834
|
|
|
|
|
|
|
inline wstring path_from_utf8(const char* str) { |
5835
|
|
|
|
|
|
|
// We could implement this using codecvt_utf8_utf16, but it is not available |
5836
|
|
|
|
|
|
|
// in GCC 4.9, which we still use. We could also use MultiByteToWideChar, |
5837
|
|
|
|
|
|
|
// but using it would require changing our build infrastructure -- hence |
5838
|
|
|
|
|
|
|
// we implement the conversion manually. |
5839
|
|
|
|
|
|
|
wstring wstr; |
5840
|
|
|
|
|
|
|
while (*str) { |
5841
|
|
|
|
|
|
|
char32_t chr; |
5842
|
|
|
|
|
|
|
if (((unsigned char)*str) < 0x80) chr = (unsigned char)*str++; |
5843
|
|
|
|
|
|
|
else if (((unsigned char)*str) < 0xC0) chr = '?', ++str; |
5844
|
|
|
|
|
|
|
else if (((unsigned char)*str) < 0xE0) { |
5845
|
|
|
|
|
|
|
chr = (((unsigned char)*str++) & 0x1F) << 6; |
5846
|
|
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) chr = '?'; |
5847
|
|
|
|
|
|
|
else chr += ((unsigned char)*str++) & 0x3F; |
5848
|
|
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF0) { |
5849
|
|
|
|
|
|
|
chr = (((unsigned char)*str++) & 0x0F) << 12; |
5850
|
|
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) chr = '?'; |
5851
|
|
|
|
|
|
|
else { |
5852
|
|
|
|
|
|
|
chr += (((unsigned char)*str++) & 0x3F) << 6; |
5853
|
|
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) chr = '?'; |
5854
|
|
|
|
|
|
|
else chr += ((unsigned char)*str++) & 0x3F; |
5855
|
|
|
|
|
|
|
} |
5856
|
|
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF8) { |
5857
|
|
|
|
|
|
|
chr = (((unsigned char)*str++) & 0x07) << 18; |
5858
|
|
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) chr = '?'; |
5859
|
|
|
|
|
|
|
else { |
5860
|
|
|
|
|
|
|
chr += (((unsigned char)*str++) & 0x3F) << 12; |
5861
|
|
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) chr = '?'; |
5862
|
|
|
|
|
|
|
else { |
5863
|
|
|
|
|
|
|
chr += (((unsigned char)*str++) & 0x3F) << 6; |
5864
|
|
|
|
|
|
|
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) chr = '?'; |
5865
|
|
|
|
|
|
|
else chr += ((unsigned char)*str++) & 0x3F; |
5866
|
|
|
|
|
|
|
} |
5867
|
|
|
|
|
|
|
} |
5868
|
|
|
|
|
|
|
} else chr = '?', ++str; |
5869
|
|
|
|
|
|
|
|
5870
|
|
|
|
|
|
|
if (chr <= 0xFFFF) wstr.push_back(chr); |
5871
|
|
|
|
|
|
|
else if (chr <= 0x10FFFF) { |
5872
|
|
|
|
|
|
|
wstr.push_back(0xD800 + ((chr - 0x10000) >> 10)); |
5873
|
|
|
|
|
|
|
wstr.push_back(0xDC00 + ((chr - 0x10000) & 0x3FF)); |
5874
|
|
|
|
|
|
|
} else { |
5875
|
|
|
|
|
|
|
wstr.push_back('?'); |
5876
|
|
|
|
|
|
|
} |
5877
|
|
|
|
|
|
|
} |
5878
|
|
|
|
|
|
|
return wstr; |
5879
|
|
|
|
|
|
|
} |
5880
|
|
|
|
|
|
|
|
5881
|
|
|
|
|
|
|
inline wstring path_from_utf8(const string& str) { |
5882
|
|
|
|
|
|
|
return path_from_utf8(str.c_str()); |
5883
|
|
|
|
|
|
|
} |
5884
|
|
|
|
|
|
|
|
5885
|
|
|
|
|
|
|
#else |
5886
|
|
|
|
|
|
|
|
5887
|
|
|
|
|
|
|
inline string path_from_utf8(const char* str) { |
5888
|
2
|
|
|
|
|
|
return str; |
5889
|
|
|
|
|
|
|
} |
5890
|
|
|
|
|
|
|
|
5891
|
|
|
|
|
|
|
inline const string& path_from_utf8(const string& str) { |
5892
|
|
|
|
|
|
|
return str; |
5893
|
|
|
|
|
|
|
} |
5894
|
|
|
|
|
|
|
|
5895
|
|
|
|
|
|
|
#endif |
5896
|
|
|
|
|
|
|
|
5897
|
|
|
|
|
|
|
} // namespace utils |
5898
|
|
|
|
|
|
|
|
5899
|
|
|
|
|
|
|
///////// |
5900
|
|
|
|
|
|
|
// File: morpho/morpho.cpp |
5901
|
|
|
|
|
|
|
///////// |
5902
|
|
|
|
|
|
|
|
5903
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
5904
|
|
|
|
|
|
|
// |
5905
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
5906
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
5907
|
|
|
|
|
|
|
// |
5908
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
5909
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
5910
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5911
|
|
|
|
|
|
|
|
5912
|
2
|
|
|
|
|
|
morpho* morpho::load(istream& is) { |
5913
|
2
|
|
|
|
|
|
morpho_id id = morpho_id(is.get()); |
5914
|
2
|
|
|
|
|
|
switch (id) { |
5915
|
|
|
|
|
|
|
case morpho_ids::CZECH: |
5916
|
|
|
|
|
|
|
{ |
5917
|
0
|
|
|
|
|
|
auto res = new_unique_ptr(czech_morpho::morpho_language::CZECH, 1); |
5918
|
0
|
0
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
0
|
|
|
|
|
|
5919
|
|
|
|
|
|
|
break; |
5920
|
|
|
|
|
|
|
} |
5921
|
|
|
|
|
|
|
case morpho_ids::ENGLISH_V1: |
5922
|
|
|
|
|
|
|
case morpho_ids::ENGLISH_V2: |
5923
|
|
|
|
|
|
|
case morpho_ids::ENGLISH_V3: |
5924
|
|
|
|
|
|
|
{ |
5925
|
|
|
|
|
|
|
auto res = new_unique_ptr(id == morpho_ids::ENGLISH_V1 ? 1 : |
5926
|
|
|
|
|
|
|
id == morpho_ids::ENGLISH_V2 ? 2 : |
5927
|
0
|
0
|
|
|
|
|
3); |
|
|
0
|
|
|
|
|
|
5928
|
0
|
0
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
0
|
|
|
|
|
|
5929
|
|
|
|
|
|
|
break; |
5930
|
|
|
|
|
|
|
} |
5931
|
|
|
|
|
|
|
case morpho_ids::EXTERNAL: |
5932
|
|
|
|
|
|
|
{ |
5933
|
0
|
|
|
|
|
|
auto res = new_unique_ptr(1); |
5934
|
0
|
0
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
0
|
|
|
|
|
|
5935
|
|
|
|
|
|
|
break; |
5936
|
|
|
|
|
|
|
} |
5937
|
|
|
|
|
|
|
case morpho_ids::GENERIC: |
5938
|
|
|
|
|
|
|
{ |
5939
|
2
|
|
|
|
|
|
auto res = new_unique_ptr(1); |
5940
|
2
|
50
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
50
|
|
|
|
|
|
5941
|
|
|
|
|
|
|
break; |
5942
|
|
|
|
|
|
|
} |
5943
|
|
|
|
|
|
|
case morpho_ids::SLOVAK_PDT: |
5944
|
|
|
|
|
|
|
{ |
5945
|
0
|
|
|
|
|
|
auto res = new_unique_ptr(czech_morpho::morpho_language::SLOVAK, 3); |
5946
|
0
|
0
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
0
|
|
|
|
|
|
5947
|
|
|
|
|
|
|
break; |
5948
|
|
|
|
|
|
|
} |
5949
|
|
|
|
|
|
|
case morpho_ids::DERIVATOR_DICTIONARY: |
5950
|
|
|
|
|
|
|
{ |
5951
|
0
|
|
|
|
|
|
auto derinet = new_unique_ptr(); |
5952
|
0
|
0
|
|
|
|
|
if (!derinet->load(is)) return nullptr; |
|
|
0
|
|
|
|
|
|
5953
|
|
|
|
|
|
|
|
5954
|
0
|
0
|
|
|
|
|
unique_ptr dictionary(load(is)); |
5955
|
0
|
0
|
|
|
|
|
if (!dictionary) return nullptr; |
5956
|
0
|
|
|
|
|
|
derinet->dictionary = dictionary.get(); |
5957
|
|
|
|
|
|
|
dictionary->derinet.reset(derinet.release()); |
5958
|
0
|
|
|
|
|
|
return dictionary.release(); |
5959
|
|
|
|
|
|
|
} |
5960
|
|
|
|
|
|
|
} |
5961
|
|
|
|
|
|
|
|
5962
|
|
|
|
|
|
|
return nullptr; |
5963
|
|
|
|
|
|
|
} |
5964
|
|
|
|
|
|
|
|
5965
|
1
|
|
|
|
|
|
morpho* morpho::load(const char* fname) { |
5966
|
2
|
50
|
|
|
|
|
ifstream f(path_from_utf8(fname).c_str(), ifstream::binary); |
5967
|
1
|
50
|
|
|
|
|
if (!f) return nullptr; |
5968
|
|
|
|
|
|
|
|
5969
|
1
|
50
|
|
|
|
|
return load(f); |
5970
|
|
|
|
|
|
|
} |
5971
|
|
|
|
|
|
|
|
5972
|
0
|
|
|
|
|
|
const derivator* morpho::get_derivator() const { |
5973
|
0
|
|
|
|
|
|
return derinet.get(); |
5974
|
|
|
|
|
|
|
} |
5975
|
|
|
|
|
|
|
|
5976
|
|
|
|
|
|
|
///////// |
5977
|
|
|
|
|
|
|
// File: morpho/morpho_statistical_guesser.cpp |
5978
|
|
|
|
|
|
|
///////// |
5979
|
|
|
|
|
|
|
|
5980
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
5981
|
|
|
|
|
|
|
// |
5982
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
5983
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
5984
|
|
|
|
|
|
|
// |
5985
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
5986
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
5987
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
5988
|
|
|
|
|
|
|
|
5989
|
0
|
|
|
|
|
|
void morpho_statistical_guesser::load(binary_decoder& data) { |
5990
|
|
|
|
|
|
|
// Load tags and default tag |
5991
|
0
|
|
|
|
|
|
tags.resize(data.next_2B()); |
5992
|
0
|
0
|
|
|
|
|
for (auto&& tag : tags) { |
5993
|
0
|
|
|
|
|
|
tag.resize(data.next_1B()); |
5994
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < tag.size(); i++) |
5995
|
0
|
|
|
|
|
|
tag[i] = data.next_1B(); |
5996
|
|
|
|
|
|
|
} |
5997
|
0
|
|
|
|
|
|
default_tag = data.next_2B(); |
5998
|
|
|
|
|
|
|
|
5999
|
|
|
|
|
|
|
// Load rules |
6000
|
0
|
|
|
|
|
|
rules.load(data); |
6001
|
0
|
|
|
|
|
|
} |
6002
|
|
|
|
|
|
|
|
6003
|
|
|
|
|
|
|
// Helper method for analyze. |
6004
|
0
|
|
|
|
|
|
static bool contains(morpho_statistical_guesser::used_rules* used, const string& rule) { |
6005
|
0
|
0
|
|
|
|
|
if (!used) return false; |
6006
|
|
|
|
|
|
|
|
6007
|
0
|
0
|
|
|
|
|
for (auto&& used_rule : *used) |
6008
|
0
|
0
|
|
|
|
|
if (used_rule == rule) |
6009
|
|
|
|
|
|
|
return true; |
6010
|
|
|
|
|
|
|
|
6011
|
|
|
|
|
|
|
return false; |
6012
|
|
|
|
|
|
|
} |
6013
|
|
|
|
|
|
|
|
6014
|
|
|
|
|
|
|
// Produces unique lemma-tag pairs. |
6015
|
0
|
|
|
|
|
|
void morpho_statistical_guesser::analyze(string_piece form, vector& lemmas, morpho_statistical_guesser::used_rules* used) { |
6016
|
|
|
|
|
|
|
unsigned lemmas_initial_size = lemmas.size(); |
6017
|
|
|
|
|
|
|
|
6018
|
|
|
|
|
|
|
// We have rules in format "suffix prefix" in rules. |
6019
|
|
|
|
|
|
|
// Find the matching rule with longest suffix and of those with longest prefix. |
6020
|
0
|
0
|
|
|
|
|
string rule_label; rule_label.reserve(12); |
6021
|
|
|
|
|
|
|
unsigned suffix_len = 0; |
6022
|
0
|
0
|
|
|
|
|
for (; suffix_len < form.len; suffix_len++) { |
6023
|
0
|
0
|
|
|
|
|
rule_label.push_back(form.str[form.len - (suffix_len + 1)]); |
6024
|
0
|
0
|
|
|
|
|
if (!rules.at(rule_label.c_str(), rule_label.size(), [](pointer_decoder& data){ data.next(data.next_2B()); })) |
6025
|
|
|
|
|
|
|
break; |
6026
|
|
|
|
|
|
|
} |
6027
|
|
|
|
|
|
|
|
6028
|
0
|
0
|
|
|
|
|
for (suffix_len++; suffix_len--; ) { |
6029
|
0
|
|
|
|
|
|
rule_label.resize(suffix_len); |
6030
|
0
|
0
|
|
|
|
|
rule_label.push_back(' '); |
6031
|
|
|
|
|
|
|
|
6032
|
|
|
|
|
|
|
const unsigned char* rule = nullptr; |
6033
|
|
|
|
|
|
|
unsigned rule_prefix_len = 0; |
6034
|
0
|
0
|
|
|
|
|
for (unsigned prefix_len = 0; prefix_len + suffix_len <= form.len; prefix_len++) { |
6035
|
0
|
0
|
|
|
|
|
if (prefix_len) rule_label.push_back(form.str[prefix_len - 1]); |
|
|
0
|
|
|
|
|
|
6036
|
0
|
|
|
|
|
|
const unsigned char* found = rules.at(rule_label.c_str(), rule_label.size(), [](pointer_decoder& data){ data.next(data.next_2B()); }); |
6037
|
0
|
0
|
|
|
|
|
if (!found) break; |
6038
|
0
|
0
|
|
|
|
|
if (*(found += sizeof(uint16_t))) { |
6039
|
|
|
|
|
|
|
rule = found; |
6040
|
|
|
|
|
|
|
rule_prefix_len = prefix_len; |
6041
|
|
|
|
|
|
|
} |
6042
|
|
|
|
|
|
|
} |
6043
|
|
|
|
|
|
|
|
6044
|
0
|
0
|
|
|
|
|
if (rule) { |
6045
|
0
|
|
|
|
|
|
rule_label.resize(suffix_len + 1 + rule_prefix_len); |
6046
|
0
|
0
|
|
|
|
|
if (rule_label.size() > 1 && !contains(used, rule_label)) { // ignore rule ' ' |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6047
|
0
|
0
|
|
|
|
|
if (used) used->push_back(rule_label); |
|
|
0
|
|
|
|
|
|
6048
|
0
|
0
|
|
|
|
|
for (int rules_len = *rule++; rules_len; rules_len--) { |
6049
|
0
|
|
|
|
|
|
unsigned pref_del_len = *rule++; const char* pref_del = (const char*)rule; rule += pref_del_len; |
6050
|
0
|
|
|
|
|
|
unsigned pref_add_len = *rule++; const char* pref_add = (const char*)rule; rule += pref_add_len; |
6051
|
0
|
|
|
|
|
|
unsigned suff_del_len = *rule++; const char* suff_del = (const char*)rule; rule += suff_del_len; |
6052
|
0
|
|
|
|
|
|
unsigned suff_add_len = *rule++; const char* suff_add = (const char*)rule; rule += suff_add_len; |
6053
|
0
|
|
|
|
|
|
unsigned tags_len = *rule++; const uint16_t* tags = (const uint16_t*)rule; rule += tags_len * sizeof(uint16_t); |
6054
|
|
|
|
|
|
|
|
6055
|
0
|
0
|
|
|
|
|
if (pref_del_len + suff_del_len > form.len || |
|
|
0
|
|
|
|
|
|
6056
|
0
|
0
|
|
|
|
|
(pref_del_len && !small_memeq(pref_del, form.str, pref_del_len)) || |
|
|
0
|
|
|
|
|
|
6057
|
0
|
0
|
|
|
|
|
(suff_del_len && !small_memeq(suff_del, form.str + form.len - suff_del_len, suff_del_len)) || |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6058
|
0
|
|
|
|
|
|
(form.len + pref_add_len - pref_del_len + suff_add_len - suff_del_len == 0)) |
6059
|
0
|
|
|
|
|
|
continue; |
6060
|
|
|
|
|
|
|
|
6061
|
|
|
|
|
|
|
string lemma; |
6062
|
0
|
0
|
|
|
|
|
lemma.reserve(form.len + pref_add_len - pref_del_len + suff_add_len - suff_del_len); |
6063
|
0
|
0
|
|
|
|
|
if (pref_add_len) lemma.append(pref_add, pref_add_len); |
|
|
0
|
|
|
|
|
|
6064
|
0
|
0
|
|
|
|
|
if (pref_del_len + suff_del_len < form.len) lemma.append(form.str + pref_del_len, form.len - pref_del_len - suff_del_len); |
|
|
0
|
|
|
|
|
|
6065
|
0
|
0
|
|
|
|
|
if (suff_add_len) lemma.append(suff_add, suff_add_len); |
|
|
0
|
|
|
|
|
|
6066
|
0
|
0
|
|
|
|
|
while (tags_len--) |
6067
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(lemma, this->tags[unaligned_load_inc(tags)]); |
6068
|
|
|
|
|
|
|
} |
6069
|
|
|
|
|
|
|
} |
6070
|
|
|
|
|
|
|
break; |
6071
|
|
|
|
|
|
|
} |
6072
|
|
|
|
|
|
|
} |
6073
|
|
|
|
|
|
|
|
6074
|
|
|
|
|
|
|
// If nothing was found, use default tag. |
6075
|
0
|
0
|
|
|
|
|
if (lemmas.size() == lemmas_initial_size) |
6076
|
0
|
0
|
|
|
|
|
if (!contains(used, string())) { |
6077
|
0
|
0
|
|
|
|
|
if (used) used->push_back(string()); |
6078
|
0
|
0
|
|
|
|
|
lemmas.emplace_back(string(form.str, form.len), tags[default_tag]); |
6079
|
|
|
|
|
|
|
} |
6080
|
0
|
|
|
|
|
|
} |
6081
|
|
|
|
|
|
|
|
6082
|
|
|
|
|
|
|
///////// |
6083
|
|
|
|
|
|
|
// File: morpho/tag_filter.cpp |
6084
|
|
|
|
|
|
|
///////// |
6085
|
|
|
|
|
|
|
|
6086
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
6087
|
|
|
|
|
|
|
// |
6088
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
6089
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
6090
|
|
|
|
|
|
|
// |
6091
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
6092
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
6093
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
6094
|
|
|
|
|
|
|
|
6095
|
4
|
|
|
|
|
|
tag_filter::tag_filter(const char* filter) { |
6096
|
4
|
50
|
|
|
|
|
if (!filter) return; |
6097
|
|
|
|
|
|
|
|
6098
|
4
|
50
|
|
|
|
|
wildcard.assign(filter); |
6099
|
|
|
|
|
|
|
filter = wildcard.c_str(); |
6100
|
|
|
|
|
|
|
|
6101
|
10
|
100
|
|
|
|
|
for (int tag_pos = 0, filter_pos = 0; filter[filter_pos]; tag_pos++, filter_pos++) { |
6102
|
6
|
100
|
|
|
|
|
if (filter[filter_pos] == '?') continue; |
6103
|
5
|
100
|
|
|
|
|
if (filter[filter_pos] == '[') { |
6104
|
3
|
|
|
|
|
|
filter_pos++; |
6105
|
|
|
|
|
|
|
|
6106
|
3
|
|
|
|
|
|
bool negate = false; |
6107
|
3
|
100
|
|
|
|
|
if (filter[filter_pos] == '^') negate = true, filter_pos++; |
6108
|
|
|
|
|
|
|
|
6109
|
3
|
|
|
|
|
|
int chars_start = filter_pos; |
6110
|
7
|
50
|
|
|
|
|
for (bool first = true; filter[filter_pos] && (first || filter[filter_pos] != ']'); first = false) |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
6111
|
4
|
|
|
|
|
|
filter_pos++; |
6112
|
|
|
|
|
|
|
|
6113
|
3
|
50
|
|
|
|
|
filters.emplace_back(tag_pos, negate, chars_start, filter_pos - chars_start); |
6114
|
3
|
50
|
|
|
|
|
if (!filter[filter_pos]) break; |
6115
|
|
|
|
|
|
|
} else { |
6116
|
2
|
50
|
|
|
|
|
filters.emplace_back(tag_pos, false, filter_pos, 1); |
6117
|
|
|
|
|
|
|
} |
6118
|
|
|
|
|
|
|
} |
6119
|
|
|
|
|
|
|
} |
6120
|
|
|
|
|
|
|
|
6121
|
|
|
|
|
|
|
///////// |
6122
|
|
|
|
|
|
|
// File: tagger/tagger.h |
6123
|
|
|
|
|
|
|
///////// |
6124
|
|
|
|
|
|
|
|
6125
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
6126
|
|
|
|
|
|
|
// |
6127
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
6128
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
6129
|
|
|
|
|
|
|
// |
6130
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
6131
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
6132
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
6133
|
|
|
|
|
|
|
|
6134
|
1
|
|
|
|
|
|
class tagger { |
6135
|
|
|
|
|
|
|
public: |
6136
|
0
|
|
|
|
|
|
virtual ~tagger() {} |
6137
|
|
|
|
|
|
|
|
6138
|
|
|
|
|
|
|
static tagger* load(const char* fname); |
6139
|
|
|
|
|
|
|
static tagger* load(istream& is); |
6140
|
|
|
|
|
|
|
|
6141
|
|
|
|
|
|
|
// Return morpho associated with the tagger. Do not delete the pointer, it is |
6142
|
|
|
|
|
|
|
// owned by the tagger instance and deleted in the tagger destructor. |
6143
|
|
|
|
|
|
|
virtual const morpho* get_morpho() const = 0; |
6144
|
|
|
|
|
|
|
|
6145
|
|
|
|
|
|
|
// Perform morphologic analysis and subsequent disambiguation. |
6146
|
|
|
|
|
|
|
virtual void tag(const vector& forms, vector& tags, morpho::guesser_mode guesser = morpho::GUESSER_UNSPECIFIED) const = 0; |
6147
|
|
|
|
|
|
|
|
6148
|
|
|
|
|
|
|
// Perform disambiguation only on given analyses. |
6149
|
|
|
|
|
|
|
virtual void tag_analyzed(const vector& forms, const vector>& analyses, vector& tags) const = 0; |
6150
|
|
|
|
|
|
|
|
6151
|
|
|
|
|
|
|
// Construct a new tokenizer instance appropriate for this tagger. |
6152
|
|
|
|
|
|
|
// Can return NULL if no such tokenizer exists. |
6153
|
|
|
|
|
|
|
// Is equal to get_morpho()->new_tokenizer. |
6154
|
|
|
|
|
|
|
tokenizer* new_tokenizer() const; |
6155
|
|
|
|
|
|
|
}; |
6156
|
|
|
|
|
|
|
|
6157
|
|
|
|
|
|
|
///////// |
6158
|
|
|
|
|
|
|
// File: tagger/elementary_features.h |
6159
|
|
|
|
|
|
|
///////// |
6160
|
|
|
|
|
|
|
|
6161
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
6162
|
|
|
|
|
|
|
// |
6163
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
6164
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
6165
|
|
|
|
|
|
|
// |
6166
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
6167
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
6168
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
6169
|
|
|
|
|
|
|
|
6170
|
|
|
|
|
|
|
// Declarations |
6171
|
|
|
|
|
|
|
enum elementary_feature_type { PER_FORM, PER_TAG, DYNAMIC }; |
6172
|
|
|
|
|
|
|
enum elementary_feature_range { ONLY_CURRENT, ANY_OFFSET }; |
6173
|
|
|
|
|
|
|
|
6174
|
|
|
|
|
|
|
typedef uint32_t elementary_feature_value; |
6175
|
|
|
|
|
|
|
enum :elementary_feature_value { elementary_feature_unknown = 0, elementary_feature_empty = 1 }; |
6176
|
|
|
|
|
|
|
|
6177
|
|
|
|
|
|
|
struct elementary_feature_description { |
6178
|
|
|
|
|
|
|
string name; |
6179
|
|
|
|
|
|
|
elementary_feature_type type; |
6180
|
|
|
|
|
|
|
elementary_feature_range range; |
6181
|
|
|
|
|
|
|
int index; |
6182
|
|
|
|
|
|
|
int map_index; |
6183
|
|
|
|
|
|
|
}; |
6184
|
|
|
|
|
|
|
|
6185
|
|
|
|
|
|
|
template |
6186
|
0
|
|
|
|
|
|
class elementary_features { |
6187
|
|
|
|
|
|
|
public: |
6188
|
|
|
|
|
|
|
bool load(istream& is); |
6189
|
|
|
|
|
|
|
bool save(ostream& out); |
6190
|
|
|
|
|
|
|
|
6191
|
|
|
|
|
|
|
vector |
6192
|
|
|
|
|
|
|
}; |
6193
|
|
|
|
|
|
|
|
6194
|
0
|
|
|
|
|
|
class persistent_elementary_feature_map : public persistent_unordered_map { |
6195
|
|
|
|
|
|
|
public: |
6196
|
|
|
|
|
|
|
persistent_elementary_feature_map() : persistent_unordered_map() {} |
6197
|
|
|
|
|
|
|
persistent_elementary_feature_map(const persistent_unordered_map&& map) : persistent_unordered_map(map) {} |
6198
|
|
|
|
|
|
|
|
6199
|
|
|
|
|
|
|
elementary_feature_value value(const char* feature, int len) const { |
6200
|
120
|
|
|
|
|
|
auto* it = at_typed(feature, len); |
6201
|
120
|
0
|
|
|
|
|
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6202
|
|
|
|
|
|
|
} |
6203
|
|
|
|
|
|
|
}; |
6204
|
|
|
|
|
|
|
|
6205
|
|
|
|
|
|
|
// Definitions |
6206
|
|
|
|
|
|
|
template |
6207
|
1
|
|
|
|
|
|
inline bool elementary_features |
6208
|
|
|
|
|
|
|
binary_decoder data; |
6209
|
1
|
50
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
50
|
|
|
|
|
|
6210
|
|
|
|
|
|
|
|
6211
|
|
|
|
|
|
|
try { |
6212
|
1
|
50
|
|
|
|
|
maps.resize(data.next_1B()); |
|
|
50
|
|
|
|
|
|
6213
|
27
|
100
|
|
|
|
|
for (auto&& map : maps) |
6214
|
26
|
50
|
|
|
|
|
map.load(data); |
|
|
0
|
|
|
|
|
|
6215
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
6216
|
|
|
|
|
|
|
return false; |
6217
|
|
|
|
|
|
|
} |
6218
|
|
|
|
|
|
|
|
6219
|
1
|
|
|
|
|
|
return data.is_end(); |
6220
|
|
|
|
|
|
|
} |
6221
|
|
|
|
|
|
|
|
6222
|
|
|
|
|
|
|
///////// |
6223
|
|
|
|
|
|
|
// File: tagger/vli.h |
6224
|
|
|
|
|
|
|
///////// |
6225
|
|
|
|
|
|
|
|
6226
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
6227
|
|
|
|
|
|
|
// |
6228
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
6229
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
6230
|
|
|
|
|
|
|
// |
6231
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
6232
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
6233
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
6234
|
|
|
|
|
|
|
|
6235
|
|
|
|
|
|
|
// Declarations |
6236
|
|
|
|
|
|
|
template |
6237
|
|
|
|
|
|
|
class vli { |
6238
|
|
|
|
|
|
|
public: |
6239
|
|
|
|
|
|
|
static int max_length(); |
6240
|
|
|
|
|
|
|
static void encode(T value, char*& where); |
6241
|
|
|
|
|
|
|
static T decode(const char*& from); |
6242
|
|
|
|
|
|
|
}; |
6243
|
|
|
|
|
|
|
|
6244
|
|
|
|
|
|
|
// Definitions |
6245
|
|
|
|
|
|
|
template <> |
6246
|
|
|
|
|
|
|
inline int vli::max_length() { |
6247
|
|
|
|
|
|
|
return 5; |
6248
|
|
|
|
|
|
|
} |
6249
|
|
|
|
|
|
|
|
6250
|
|
|
|
|
|
|
template <> |
6251
|
426
|
|
|
|
|
|
inline void vli::encode(uint32_t value, char*& where) { |
6252
|
426
|
50
|
|
|
|
|
if (value < 0x80) *where++ = value; |
6253
|
0
|
0
|
|
|
|
|
else if (value < 0x4000) *where++ = (value >> 7) | 0x80u, *where++ = value & 0x7Fu; |
6254
|
0
|
0
|
|
|
|
|
else if (value < 0x200000) *where++ = (value >> 14) | 0x80u, *where++ = ((value >> 7) & 0x7Fu) | 0x80u, *where++ = value & 0x7Fu; |
6255
|
0
|
0
|
|
|
|
|
else if (value < 0x10000000) *where++ = (value >> 21) | 0x80u, *where++ = ((value >> 14) & 0x7Fu) | 0x80u, *where++ = ((value >> 7) & 0x7Fu) | 0x80u, *where++ = value & 0x7Fu; |
6256
|
0
|
|
|
|
|
|
else *where++ = (value >> 28) | 0x80u, *where++ = ((value >> 21) & 0x7Fu) | 0x80u, *where++ = ((value >> 14) & 0x7Fu) | 0x80u, *where++ = ((value >> 7) & 0x7Fu) | 0x80u, *where++ = value & 0x7Fu; |
6257
|
426
|
|
|
|
|
|
} |
6258
|
|
|
|
|
|
|
|
6259
|
|
|
|
|
|
|
template <> |
6260
|
|
|
|
|
|
|
inline uint32_t vli::decode(const char*& from) { |
6261
|
|
|
|
|
|
|
uint32_t value = 0; |
6262
|
|
|
|
|
|
|
while (((unsigned char)(*from)) & 0x80u) value = (value << 7) | (((unsigned char)(*from++)) ^ 0x80u); |
6263
|
|
|
|
|
|
|
value = (value << 7) | ((unsigned char)(*from++)); |
6264
|
|
|
|
|
|
|
return value; |
6265
|
|
|
|
|
|
|
} |
6266
|
|
|
|
|
|
|
|
6267
|
|
|
|
|
|
|
///////// |
6268
|
|
|
|
|
|
|
// File: tagger/feature_sequences.h |
6269
|
|
|
|
|
|
|
///////// |
6270
|
|
|
|
|
|
|
|
6271
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
6272
|
|
|
|
|
|
|
// |
6273
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
6274
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
6275
|
|
|
|
|
|
|
// |
6276
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
6277
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
6278
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
6279
|
|
|
|
|
|
|
|
6280
|
|
|
|
|
|
|
// Declarations |
6281
|
|
|
|
|
|
|
typedef int32_t feature_sequence_score; |
6282
|
|
|
|
|
|
|
typedef int64_t feature_sequences_score; |
6283
|
|
|
|
|
|
|
|
6284
|
|
|
|
|
|
|
struct feature_sequence_element { |
6285
|
|
|
|
|
|
|
elementary_feature_type type; |
6286
|
|
|
|
|
|
|
int elementary_index; |
6287
|
|
|
|
|
|
|
int sequence_index; |
6288
|
|
|
|
|
|
|
|
6289
|
|
|
|
|
|
|
feature_sequence_element() {} |
6290
|
|
|
|
|
|
|
feature_sequence_element(elementary_feature_type type, int elementary_index, int sequence_index) : type(type), elementary_index(elementary_index), sequence_index(sequence_index) {} |
6291
|
|
|
|
|
|
|
}; |
6292
|
|
|
|
|
|
|
|
6293
|
21
|
|
|
|
|
|
struct feature_sequence { |
6294
|
|
|
|
|
|
|
vector elements; |
6295
|
|
|
|
|
|
|
int dependant_range = 1; |
6296
|
|
|
|
|
|
|
}; |
6297
|
|
|
|
|
|
|
|
6298
|
|
|
|
|
|
|
template |
6299
|
1
|
0
|
|
|
|
|
class feature_sequences { |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6300
|
|
|
|
|
|
|
public: |
6301
|
|
|
|
|
|
|
typedef typename ElementaryFeatures::per_form_features per_form_features; |
6302
|
|
|
|
|
|
|
typedef typename ElementaryFeatures::per_tag_features per_tag_features; |
6303
|
|
|
|
|
|
|
typedef typename ElementaryFeatures::dynamic_features dynamic_features; |
6304
|
|
|
|
|
|
|
|
6305
|
|
|
|
|
|
|
void parse(int window_size, istream& is); |
6306
|
|
|
|
|
|
|
bool load(istream& is); |
6307
|
|
|
|
|
|
|
bool save(ostream& os); |
6308
|
|
|
|
|
|
|
|
6309
|
|
|
|
|
|
|
struct cache; |
6310
|
|
|
|
|
|
|
|
6311
|
|
|
|
|
|
|
inline void initialize_sentence(const vector& forms, const vector>& analyses, cache& c) const; |
6312
|
|
|
|
|
|
|
inline void compute_dynamic_features(int form_index, int tag_index, const dynamic_features* prev_dynamic, dynamic_features& dynamic, cache& c) const; |
6313
|
|
|
|
|
|
|
inline feature_sequences_score score(int form_index, int tags_window[], int tags_unchanged, dynamic_features& dynamic, cache& c) const; |
6314
|
|
|
|
|
|
|
void feature_keys(int form_index, int tags_window[], int tags_unchanged, dynamic_features& dynamic, vector& keys, cache& c) const; |
6315
|
|
|
|
|
|
|
|
6316
|
|
|
|
|
|
|
ElementaryFeatures elementary; |
6317
|
|
|
|
|
|
|
vector |
6318
|
|
|
|
|
|
|
vector sequences; |
6319
|
|
|
|
|
|
|
}; |
6320
|
|
|
|
|
|
|
|
6321
|
0
|
|
|
|
|
|
class persistent_feature_sequence_map : public persistent_unordered_map { |
6322
|
|
|
|
|
|
|
public: |
6323
|
|
|
|
|
|
|
persistent_feature_sequence_map() : persistent_unordered_map() {} |
6324
|
|
|
|
|
|
|
persistent_feature_sequence_map(const persistent_unordered_map&& map) : persistent_unordered_map(map) {} |
6325
|
|
|
|
|
|
|
|
6326
|
|
|
|
|
|
|
feature_sequence_score score(const char* feature, int len) const { |
6327
|
158
|
|
|
|
|
|
auto* it = at_typed(feature, len); |
6328
|
158
|
0
|
|
|
|
|
return it ? unaligned_load(it) : 0; |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6329
|
|
|
|
|
|
|
} |
6330
|
|
|
|
|
|
|
}; |
6331
|
|
|
|
|
|
|
|
6332
|
|
|
|
|
|
|
template using persistent_feature_sequences = feature_sequences; |
6333
|
|
|
|
|
|
|
|
6334
|
|
|
|
|
|
|
// Definitions |
6335
|
|
|
|
|
|
|
template |
6336
|
1
|
|
|
|
|
|
inline bool feature_sequences::load(istream& is) { |
6337
|
1
|
0
|
|
|
|
|
if (!elementary.load(is)) return false; |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6338
|
|
|
|
|
|
|
|
6339
|
|
|
|
|
|
|
binary_decoder data; |
6340
|
1
|
0
|
|
|
|
|
if (!compressor::load(is, data)) return false; |
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6341
|
|
|
|
|
|
|
|
6342
|
|
|
|
|
|
|
try { |
6343
|
1
|
0
|
|
|
|
|
sequences.resize(data.next_1B()); |
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6344
|
22
|
0
|
|
|
|
|
for (auto&& sequence : sequences) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6345
|
21
|
0
|
|
|
|
|
sequence.dependant_range = data.next_4B(); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6346
|
21
|
0
|
|
|
|
|
sequence.elements.resize(data.next_1B()); |
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6347
|
66
|
0
|
|
|
|
|
for (auto&& element : sequence.elements) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6348
|
45
|
0
|
|
|
|
|
element.type = elementary_feature_type(data.next_4B()); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6349
|
45
|
0
|
|
|
|
|
element.elementary_index = data.next_4B(); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6350
|
45
|
0
|
|
|
|
|
element.sequence_index = data.next_4B(); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6351
|
|
|
|
|
|
|
} |
6352
|
|
|
|
|
|
|
} |
6353
|
|
|
|
|
|
|
|
6354
|
1
|
0
|
|
|
|
|
scores.resize(data.next_1B()); |
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6355
|
22
|
0
|
|
|
|
|
for (auto&& score : scores) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6356
|
21
|
0
|
|
|
|
|
score.load(data); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6357
|
|
|
|
|
|
|
} catch (binary_decoder_error&) { |
6358
|
|
|
|
|
|
|
return false; |
6359
|
|
|
|
|
|
|
} |
6360
|
|
|
|
|
|
|
|
6361
|
1
|
|
|
|
|
|
return data.is_end(); |
6362
|
|
|
|
|
|
|
} |
6363
|
|
|
|
|
|
|
|
6364
|
|
|
|
|
|
|
template |
6365
|
0
|
|
|
|
|
|
struct feature_sequences::cache { |
6366
|
|
|
|
|
|
|
const vector* forms; |
6367
|
|
|
|
|
|
|
const vector>* analyses; |
6368
|
|
|
|
|
|
|
vector elementary_per_form; |
6369
|
|
|
|
|
|
|
vector> elementary_per_tag; |
6370
|
|
|
|
|
|
|
|
6371
|
0
|
|
|
|
|
|
struct cache_element { |
6372
|
|
|
|
|
|
|
vector key; |
6373
|
|
|
|
|
|
|
int key_size; |
6374
|
|
|
|
|
|
|
feature_sequence_score score; |
6375
|
|
|
|
|
|
|
|
6376
|
21
|
0
|
|
|
|
|
cache_element(int elements) : key(vli::max_length() * elements), key_size(0), score(0) {} |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6377
|
|
|
|
|
|
|
}; |
6378
|
|
|
|
|
|
|
vector caches; |
6379
|
|
|
|
|
|
|
vector window; |
6380
|
|
|
|
|
|
|
vector key; |
6381
|
|
|
|
|
|
|
feature_sequences_score score; |
6382
|
|
|
|
|
|
|
|
6383
|
1
|
|
|
|
|
|
cache(const feature_sequences& self) : score(0) { |
6384
|
1
|
0
|
|
|
|
|
caches.reserve(self.sequences.size()); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6385
|
|
|
|
|
|
|
int max_sequence_elements = 0, max_window_size = 1; |
6386
|
22
|
0
|
|
|
|
|
for (auto&& sequence : self.sequences) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6387
|
21
|
0
|
|
|
|
|
caches.emplace_back(int(sequence.elements.size())); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6388
|
21
|
0
|
|
|
|
|
if (int(sequence.elements.size()) > max_sequence_elements) max_sequence_elements = sequence.elements.size(); |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6389
|
66
|
0
|
|
|
|
|
for (auto&& element : sequence.elements) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6390
|
45
|
0
|
|
|
|
|
if (element.type == PER_TAG && 1 - element.sequence_index > max_window_size) |
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6391
|
|
|
|
|
|
|
max_window_size = 1 - element.sequence_index; |
6392
|
|
|
|
|
|
|
} |
6393
|
1
|
0
|
|
|
|
|
key.resize(max_sequence_elements * vli::max_length()); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6394
|
1
|
0
|
|
|
|
|
window.resize(max_window_size); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6395
|
1
|
|
|
|
|
|
} |
6396
|
|
|
|
|
|
|
}; |
6397
|
|
|
|
|
|
|
|
6398
|
|
|
|
|
|
|
template |
6399
|
2
|
|
|
|
|
|
void feature_sequences::initialize_sentence(const vector& forms, const vector>& analyses, cache& c) const { |
6400
|
|
|
|
|
|
|
// Store forms and forms_size |
6401
|
2
|
|
|
|
|
|
c.forms = &forms; |
6402
|
2
|
|
|
|
|
|
c.analyses = &analyses; |
6403
|
|
|
|
|
|
|
|
6404
|
|
|
|
|
|
|
// Enlarge elementary features vectors if needed |
6405
|
2
|
0
|
|
|
|
|
if (forms.size() > c.elementary_per_form.size()) c.elementary_per_form.resize(forms.size() * 2); |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6406
|
2
|
0
|
|
|
|
|
if (forms.size() > c.elementary_per_tag.size()) c.elementary_per_tag.resize(forms.size() * 2); |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6407
|
8
|
0
|
|
|
|
|
for (unsigned i = 0; i < forms.size(); i++) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6408
|
6
|
0
|
|
|
|
|
if (analyses[i].size() > c.elementary_per_tag[i].size()) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6409
|
4
|
|
|
|
|
|
c.elementary_per_tag[i].resize(analyses[i].size() * 2); |
6410
|
|
|
|
|
|
|
|
6411
|
|
|
|
|
|
|
// Compute elementary features |
6412
|
2
|
|
|
|
|
|
elementary.compute_features(forms, analyses, c.elementary_per_form, c.elementary_per_tag); |
6413
|
|
|
|
|
|
|
|
6414
|
|
|
|
|
|
|
// Clear score cache, because scores may have been modified |
6415
|
2
|
|
|
|
|
|
c.score = 0; |
6416
|
44
|
0
|
|
|
|
|
for (auto&& cache : c.caches) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6417
|
42
|
|
|
|
|
|
cache.key_size = cache.score = 0; |
6418
|
2
|
|
|
|
|
|
} |
6419
|
|
|
|
|
|
|
|
6420
|
|
|
|
|
|
|
template |
6421
|
58
|
|
|
|
|
|
void feature_sequences::compute_dynamic_features(int form_index, int tag_index, const dynamic_features* prev_dynamic, dynamic_features& dynamic, cache& c) const { |
6422
|
29
|
0
|
|
|
|
|
elementary.compute_dynamic_features((*c.analyses)[form_index][tag_index], c.elementary_per_form[form_index], c.elementary_per_tag[form_index][tag_index], form_index > 0 ? prev_dynamic : nullptr, dynamic); |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6423
|
29
|
|
|
|
|
|
} |
6424
|
|
|
|
|
|
|
|
6425
|
|
|
|
|
|
|
template |
6426
|
56
|
|
|
|
|
|
feature_sequences_score feature_sequences::score(int form_index, int tags_window[], int tags_unchanged, dynamic_features& dynamic, cache& c) const { |
6427
|
|
|
|
|
|
|
// Start by creating a window of per_tag_features* |
6428
|
92
|
0
|
|
|
|
|
for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6429
|
192
|
|
|
|
|
|
c.window[i] = &c.elementary_per_tag[form_index - i][tags_window[i]]; |
6430
|
|
|
|
|
|
|
|
6431
|
|
|
|
|
|
|
// Compute the score |
6432
|
28
|
|
|
|
|
|
feature_sequences_score result = c.score; |
6433
|
342
|
0
|
|
|
|
|
for (unsigned i = 0; i < sequences.size(); i++) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6434
|
314
|
0
|
|
|
|
|
if (tags_unchanged >= sequences[i].dependant_range) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6435
|
|
|
|
|
|
|
break; |
6436
|
|
|
|
|
|
|
|
6437
|
298
|
|
|
|
|
|
char* key = c.key.data(); |
6438
|
724
|
0
|
|
|
|
|
for (unsigned j = 0; j < sequences[i].elements.size(); j++) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6439
|
|
|
|
|
|
|
auto& element = sequences[i].elements[j]; |
6440
|
|
|
|
|
|
|
elementary_feature_value value; |
6441
|
|
|
|
|
|
|
|
6442
|
550
|
|
|
|
|
|
switch (element.type) { |
6443
|
|
|
|
|
|
|
case PER_FORM: |
6444
|
152
|
0
|
|
|
|
|
value = form_index + element.sequence_index < 0 || unsigned(form_index + element.sequence_index) >= c.forms->size() ? elementary_feature_empty : c.elementary_per_form[form_index + element.sequence_index].values[element.elementary_index]; |
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6445
|
|
|
|
|
|
|
break; |
6446
|
|
|
|
|
|
|
case PER_TAG: |
6447
|
398
|
0
|
|
|
|
|
value = form_index + element.sequence_index < 0 ? elementary_feature_empty : c.window[-element.sequence_index]->values[element.elementary_index]; |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6448
|
|
|
|
|
|
|
break; |
6449
|
|
|
|
|
|
|
case DYNAMIC: |
6450
|
|
|
|
|
|
|
default: |
6451
|
0
|
|
|
|
|
|
value = dynamic.values[element.elementary_index]; |
6452
|
|
|
|
|
|
|
} |
6453
|
|
|
|
|
|
|
|
6454
|
550
|
0
|
|
|
|
|
if (value == elementary_feature_unknown) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6455
|
124
|
|
|
|
|
|
key = c.key.data(); |
6456
|
124
|
|
|
|
|
|
break; |
6457
|
|
|
|
|
|
|
} |
6458
|
426
|
|
|
|
|
|
vli::encode(value, key); |
6459
|
|
|
|
|
|
|
} |
6460
|
|
|
|
|
|
|
|
6461
|
298
|
|
|
|
|
|
result -= c.caches[i].score; |
6462
|
298
|
|
|
|
|
|
int key_size = key - c.key.data(); |
6463
|
298
|
0
|
|
|
|
|
if (!key_size) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6464
|
124
|
|
|
|
|
|
c.caches[i].score = 0; |
6465
|
124
|
|
|
|
|
|
c.caches[i].key_size = 0; |
6466
|
270
|
0
|
|
|
|
|
} else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6467
|
158
|
|
|
|
|
|
c.caches[i].score = scores[i].score(c.key.data(), key_size); |
6468
|
158
|
|
|
|
|
|
c.caches[i].key_size = key_size; |
6469
|
158
|
|
|
|
|
|
small_memcpy(c.caches[i].key.data(), c.key.data(), key_size); |
6470
|
|
|
|
|
|
|
} |
6471
|
298
|
|
|
|
|
|
result += c.caches[i].score; |
6472
|
|
|
|
|
|
|
} |
6473
|
|
|
|
|
|
|
|
6474
|
28
|
|
|
|
|
|
c.score = result; |
6475
|
28
|
|
|
|
|
|
return result; |
6476
|
|
|
|
|
|
|
} |
6477
|
|
|
|
|
|
|
|
6478
|
|
|
|
|
|
|
template |
6479
|
|
|
|
|
|
|
void feature_sequences::feature_keys(int form_index, int tags_window[], int tags_unchanged, dynamic_features& dynamic, vector& keys, cache& c) const { |
6480
|
|
|
|
|
|
|
score(form_index, tags_window, tags_unchanged, dynamic, c); |
6481
|
|
|
|
|
|
|
|
6482
|
|
|
|
|
|
|
keys.resize(c.caches.size()); |
6483
|
|
|
|
|
|
|
for (unsigned i = 0; i < c.caches.size(); i++) |
6484
|
|
|
|
|
|
|
keys[i].assign(c.caches[i].key.data(), c.caches[i].key_size); |
6485
|
|
|
|
|
|
|
} |
6486
|
|
|
|
|
|
|
|
6487
|
|
|
|
|
|
|
///////// |
6488
|
|
|
|
|
|
|
// File: tagger/viterbi.h |
6489
|
|
|
|
|
|
|
///////// |
6490
|
|
|
|
|
|
|
|
6491
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
6492
|
|
|
|
|
|
|
// |
6493
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
6494
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
6495
|
|
|
|
|
|
|
// |
6496
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
6497
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
6498
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
6499
|
|
|
|
|
|
|
|
6500
|
|
|
|
|
|
|
// Declarations |
6501
|
|
|
|
|
|
|
template |
6502
|
|
|
|
|
|
|
class viterbi { |
6503
|
|
|
|
|
|
|
public: |
6504
|
|
|
|
|
|
|
viterbi(const FeatureSequences& features, int decoding_order, int window_size) |
6505
|
1
|
|
|
|
|
|
: features(features), decoding_order(decoding_order), window_size(window_size) {} |
6506
|
|
|
|
|
|
|
|
6507
|
|
|
|
|
|
|
struct cache; |
6508
|
|
|
|
|
|
|
void tag(const vector& forms, const vector>& analyses, cache& c, vector& tags) const; |
6509
|
|
|
|
|
|
|
|
6510
|
|
|
|
|
|
|
private: |
6511
|
|
|
|
|
|
|
struct node; |
6512
|
|
|
|
|
|
|
|
6513
|
|
|
|
|
|
|
const FeatureSequences& features; |
6514
|
|
|
|
|
|
|
int decoding_order, window_size; |
6515
|
|
|
|
|
|
|
}; |
6516
|
|
|
|
|
|
|
|
6517
|
|
|
|
|
|
|
// Definitions |
6518
|
|
|
|
|
|
|
template |
6519
|
0
|
|
|
|
|
|
struct viterbi::cache { |
6520
|
|
|
|
|
|
|
vector nodes; |
6521
|
|
|
|
|
|
|
typename FeatureSequences::cache features_cache; |
6522
|
|
|
|
|
|
|
|
6523
|
1
|
0
|
|
|
|
|
cache(const viterbi& self) : features_cache(self.features) {} |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6524
|
|
|
|
|
|
|
}; |
6525
|
|
|
|
|
|
|
|
6526
|
|
|
|
|
|
|
template |
6527
|
|
|
|
|
|
|
struct viterbi::node { |
6528
|
|
|
|
|
|
|
int tag; |
6529
|
|
|
|
|
|
|
int prev; |
6530
|
|
|
|
|
|
|
feature_sequences_score score; |
6531
|
|
|
|
|
|
|
typename FeatureSequences::dynamic_features dynamic; |
6532
|
|
|
|
|
|
|
}; |
6533
|
|
|
|
|
|
|
|
6534
|
|
|
|
|
|
|
template |
6535
|
2
|
|
|
|
|
|
void viterbi::tag(const vector& forms, const vector>& analyses, cache& c, vector& tags) const { |
6536
|
4
|
0
|
|
|
|
|
if (!forms.size()) return; |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6537
|
|
|
|
|
|
|
|
6538
|
|
|
|
|
|
|
// Count number of nodes and allocate |
6539
|
|
|
|
|
|
|
unsigned nodes = 0; |
6540
|
8
|
0
|
|
|
|
|
for (unsigned i = 0, states = 1; i < forms.size(); i++) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6541
|
6
|
0
|
|
|
|
|
if (analyses[i].empty()) return; |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6542
|
6
|
0
|
|
|
|
|
states = (i+1 >= unsigned(decoding_order) ? states / analyses[i-decoding_order+1].size() : states) * analyses[i].size(); |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6543
|
6
|
|
|
|
|
|
nodes += states; |
6544
|
|
|
|
|
|
|
} |
6545
|
2
|
0
|
|
|
|
|
if (nodes > c.nodes.size()) c.nodes.resize(nodes); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6546
|
|
|
|
|
|
|
|
6547
|
|
|
|
|
|
|
// Init feature sequences |
6548
|
2
|
|
|
|
|
|
features.initialize_sentence(forms, analyses, c.features_cache); |
6549
|
|
|
|
|
|
|
|
6550
|
|
|
|
|
|
|
int window_stack[16]; vector window_heap; |
6551
|
2
|
0
|
|
|
|
|
int* window = window_size <= 16 ? window_stack : (window_heap.resize(window_size), window_heap.data()); |
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6552
|
|
|
|
|
|
|
typename FeatureSequences::dynamic_features dynamic; |
6553
|
|
|
|
|
|
|
feature_sequences_score score; |
6554
|
|
|
|
|
|
|
|
6555
|
|
|
|
|
|
|
// Compute all nodes score |
6556
|
|
|
|
|
|
|
int nodes_prev = -1, nodes_now = 0; |
6557
|
8
|
0
|
|
|
|
|
for (unsigned i = 0; i < forms.size(); i++) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6558
|
|
|
|
|
|
|
int nodes_next = nodes_now; |
6559
|
|
|
|
|
|
|
|
6560
|
24
|
0
|
|
|
|
|
for (int j = 0; j < window_size; j++) window[j] = -1; |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6561
|
19
|
0
|
|
|
|
|
for (int tag = 0; tag < int(analyses[i].size()); tag++) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6562
|
42
|
0
|
|
|
|
|
for (int prev = nodes_prev; prev < nodes_now; prev++) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6563
|
|
|
|
|
|
|
// Compute predecessors and number of unchanges |
6564
|
29
|
|
|
|
|
|
int same_tags = window[0] == tag; |
6565
|
29
|
|
|
|
|
|
window[0] = tag; |
6566
|
65
|
0
|
|
|
|
|
for (int p = prev, n = 1; p >= 0 && n < window_size; p = c.nodes[p].prev, n++) { |
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6567
|
36
|
0
|
|
|
|
|
same_tags += same_tags == n && window[n] == c.nodes[p].tag; |
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6568
|
72
|
|
|
|
|
|
window[n] = c.nodes[p].tag; |
6569
|
|
|
|
|
|
|
} |
6570
|
|
|
|
|
|
|
|
6571
|
|
|
|
|
|
|
// Compute dynamic elementary features and score |
6572
|
29
|
0
|
|
|
|
|
features.compute_dynamic_features(i, tag, prev >= 0 ? &c.nodes[prev].dynamic : nullptr, dynamic, c.features_cache); |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6573
|
29
|
0
|
|
|
|
|
score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) + |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6574
|
24
|
|
|
|
|
|
(prev >= 0 ? c.nodes[prev].score : 0); |
6575
|
|
|
|
|
|
|
|
6576
|
|
|
|
|
|
|
// Update existing node or create a new one |
6577
|
29
|
0
|
|
|
|
|
if (same_tags >= decoding_order-1) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6578
|
6
|
0
|
|
|
|
|
if (score <= c.nodes[nodes_next-1].score) continue; |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6579
|
|
|
|
|
|
|
nodes_next--; |
6580
|
|
|
|
|
|
|
} |
6581
|
46
|
|
|
|
|
|
c.nodes[nodes_next].tag = tag; |
6582
|
23
|
|
|
|
|
|
c.nodes[nodes_next].prev = prev; |
6583
|
23
|
|
|
|
|
|
c.nodes[nodes_next].score = score; |
6584
|
23
|
|
|
|
|
|
c.nodes[nodes_next++].dynamic = dynamic; |
6585
|
|
|
|
|
|
|
} |
6586
|
|
|
|
|
|
|
|
6587
|
|
|
|
|
|
|
nodes_prev = nodes_now; |
6588
|
|
|
|
|
|
|
nodes_now = nodes_next; |
6589
|
|
|
|
|
|
|
} |
6590
|
|
|
|
|
|
|
|
6591
|
|
|
|
|
|
|
// Choose the best ending node |
6592
|
|
|
|
|
|
|
int best = nodes_prev; |
6593
|
6
|
0
|
|
|
|
|
for (int node = nodes_prev + 1; node < nodes_now; node++) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6594
|
4
|
0
|
|
|
|
|
if (c.nodes[node].score > c.nodes[best].score) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6595
|
|
|
|
|
|
|
best = node; |
6596
|
|
|
|
|
|
|
|
6597
|
8
|
0
|
|
|
|
|
for (int i = forms.size() - 1; i >= 0; i--, best = c.nodes[best].prev) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6598
|
18
|
|
|
|
|
|
tags[i] = c.nodes[best].tag; |
6599
|
|
|
|
|
|
|
} |
6600
|
|
|
|
|
|
|
|
6601
|
|
|
|
|
|
|
///////// |
6602
|
|
|
|
|
|
|
// File: tagger/conllu_elementary_features.h |
6603
|
|
|
|
|
|
|
///////// |
6604
|
|
|
|
|
|
|
|
6605
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
6606
|
|
|
|
|
|
|
// |
6607
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
6608
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
6609
|
|
|
|
|
|
|
// |
6610
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
6611
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
6612
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
6613
|
|
|
|
|
|
|
|
6614
|
|
|
|
|
|
|
// Declarations |
6615
|
|
|
|
|
|
|
template |
6616
|
0
|
|
|
|
|
|
class conllu_elementary_features : public elementary_features |
6617
|
|
|
|
|
|
|
public: |
6618
|
|
|
|
|
|
|
conllu_elementary_features(); |
6619
|
|
|
|
|
|
|
|
6620
|
|
|
|
|
|
|
enum features_per_form { FORM, FOLLOWING_VERB_TAG, FOLLOWING_VERB_FORM, NUM, CAP, DASH, PREFIX1, PREFIX2, PREFIX3, PREFIX4, PREFIX5, PREFIX6, PREFIX7, PREFIX8, PREFIX9, SUFFIX1, SUFFIX2, SUFFIX3, SUFFIX4, SUFFIX5, SUFFIX6, SUFFIX7, SUFFIX8, SUFFIX9, PER_FORM_TOTAL }; |
6621
|
|
|
|
|
|
|
enum features_per_tag { TAG, TAG_UPOS, TAG_CASE, TAG_GENDER, TAG_NUMBER, TAG_NEGATIVE, TAG_PERSON, LEMMA, PER_TAG_TOTAL }; |
6622
|
|
|
|
|
|
|
enum features_dynamic { PREVIOUS_VERB_TAG, PREVIOUS_VERB_FORM, PREVIOUS_OR_CURRENT_VERB_TAG, PREVIOUS_OR_CURRENT_VERB_FORM, DYNAMIC_TOTAL }; |
6623
|
|
|
|
|
|
|
enum features_map { MAP_NONE = -1, MAP_FORM, MAP_PREFIX1, MAP_PREFIX2, MAP_PREFIX3, MAP_PREFIX4, MAP_PREFIX5, MAP_PREFIX6, MAP_PREFIX7, MAP_PREFIX8, MAP_PREFIX9, MAP_SUFFIX1, MAP_SUFFIX2, MAP_SUFFIX3, MAP_SUFFIX4, MAP_SUFFIX5, MAP_SUFFIX6, MAP_SUFFIX7, MAP_SUFFIX8, MAP_SUFFIX9, MAP_TAG, MAP_TAG_UPOS, MAP_TAG_CASE, MAP_TAG_GENDER, MAP_TAG_NUMBER, MAP_TAG_NEGATIVE, MAP_TAG_PERSON, MAP_LEMMA, MAP_TOTAL } ; |
6624
|
|
|
|
|
|
|
|
6625
|
|
|
|
|
|
|
struct per_form_features { elementary_feature_value values[PER_FORM_TOTAL]; }; |
6626
|
|
|
|
|
|
|
struct per_tag_features { elementary_feature_value values[PER_TAG_TOTAL]; }; |
6627
|
|
|
|
|
|
|
struct dynamic_features { elementary_feature_value values[DYNAMIC_TOTAL]; }; |
6628
|
|
|
|
|
|
|
|
6629
|
|
|
|
|
|
|
static vector descriptions; |
6630
|
|
|
|
|
|
|
|
6631
|
|
|
|
|
|
|
void compute_features(const vector& forms, const vector>& analyses, vector& per_form, vector>& per_tag) const; |
6632
|
|
|
|
|
|
|
inline void compute_dynamic_features(const tagged_lemma& tag, const per_form_features& per_form, const per_tag_features& per_tag, const dynamic_features* prev_dynamic, dynamic_features& dynamic) const; |
6633
|
|
|
|
|
|
|
|
6634
|
|
|
|
|
|
|
using elementary_features |
6635
|
|
|
|
|
|
|
}; |
6636
|
|
|
|
|
|
|
|
6637
|
|
|
|
|
|
|
typedef conllu_elementary_features persistent_conllu_elementary_features; |
6638
|
|
|
|
|
|
|
|
6639
|
|
|
|
|
|
|
// Definitions |
6640
|
|
|
|
|
|
|
template |
6641
|
0
|
|
|
|
|
|
conllu_elementary_features |
6642
|
0
|
0
|
|
|
|
|
maps.resize(MAP_TOTAL); |
6643
|
0
|
|
|
|
|
|
} |
6644
|
|
|
|
|
|
|
|
6645
|
|
|
|
|
|
|
template |
6646
|
|
|
|
|
|
|
vector conllu_elementary_features |
6647
|
|
|
|
|
|
|
{"Form", PER_FORM, ANY_OFFSET, FORM, MAP_FORM}, |
6648
|
|
|
|
|
|
|
{"FollowingVerbTag", PER_FORM, ANY_OFFSET, FOLLOWING_VERB_TAG, MAP_TAG}, |
6649
|
|
|
|
|
|
|
{"FollowingVerbForm", PER_FORM, ANY_OFFSET, FOLLOWING_VERB_FORM, MAP_FORM}, |
6650
|
|
|
|
|
|
|
{"Num", PER_FORM, ONLY_CURRENT, NUM, MAP_NONE}, |
6651
|
|
|
|
|
|
|
{"Cap", PER_FORM, ONLY_CURRENT, CAP, MAP_NONE}, |
6652
|
|
|
|
|
|
|
{"Dash", PER_FORM, ONLY_CURRENT, DASH, MAP_NONE}, |
6653
|
|
|
|
|
|
|
{"Prefix1", PER_FORM, ONLY_CURRENT, PREFIX1, MAP_PREFIX1}, |
6654
|
|
|
|
|
|
|
{"Prefix2", PER_FORM, ONLY_CURRENT, PREFIX2, MAP_PREFIX2}, |
6655
|
|
|
|
|
|
|
{"Prefix3", PER_FORM, ONLY_CURRENT, PREFIX3, MAP_PREFIX3}, |
6656
|
|
|
|
|
|
|
{"Prefix4", PER_FORM, ONLY_CURRENT, PREFIX4, MAP_PREFIX4}, |
6657
|
|
|
|
|
|
|
{"Prefix5", PER_FORM, ONLY_CURRENT, PREFIX5, MAP_PREFIX5}, |
6658
|
|
|
|
|
|
|
{"Prefix6", PER_FORM, ONLY_CURRENT, PREFIX6, MAP_PREFIX6}, |
6659
|
|
|
|
|
|
|
{"Prefix7", PER_FORM, ONLY_CURRENT, PREFIX7, MAP_PREFIX7}, |
6660
|
|
|
|
|
|
|
{"Prefix8", PER_FORM, ONLY_CURRENT, PREFIX8, MAP_PREFIX8}, |
6661
|
|
|
|
|
|
|
{"Prefix9", PER_FORM, ONLY_CURRENT, PREFIX9, MAP_PREFIX9}, |
6662
|
|
|
|
|
|
|
{"Suffix1", PER_FORM, ONLY_CURRENT, SUFFIX1, MAP_SUFFIX1}, |
6663
|
|
|
|
|
|
|
{"Suffix2", PER_FORM, ONLY_CURRENT, SUFFIX2, MAP_SUFFIX2}, |
6664
|
|
|
|
|
|
|
{"Suffix3", PER_FORM, ONLY_CURRENT, SUFFIX3, MAP_SUFFIX3}, |
6665
|
|
|
|
|
|
|
{"Suffix4", PER_FORM, ONLY_CURRENT, SUFFIX4, MAP_SUFFIX4}, |
6666
|
|
|
|
|
|
|
{"Suffix5", PER_FORM, ONLY_CURRENT, SUFFIX5, MAP_SUFFIX5}, |
6667
|
|
|
|
|
|
|
{"Suffix6", PER_FORM, ONLY_CURRENT, SUFFIX6, MAP_SUFFIX6}, |
6668
|
|
|
|
|
|
|
{"Suffix7", PER_FORM, ONLY_CURRENT, SUFFIX7, MAP_SUFFIX7}, |
6669
|
|
|
|
|
|
|
{"Suffix8", PER_FORM, ONLY_CURRENT, SUFFIX8, MAP_SUFFIX8}, |
6670
|
|
|
|
|
|
|
{"Suffix9", PER_FORM, ONLY_CURRENT, SUFFIX9, MAP_SUFFIX9}, |
6671
|
|
|
|
|
|
|
|
6672
|
|
|
|
|
|
|
{"Tag", PER_TAG, ANY_OFFSET, TAG, MAP_TAG}, |
6673
|
|
|
|
|
|
|
{"TagUPos", PER_TAG, ANY_OFFSET, TAG_UPOS, MAP_TAG_UPOS}, |
6674
|
|
|
|
|
|
|
{"TagCase", PER_TAG, ANY_OFFSET, TAG_CASE, MAP_TAG_CASE}, |
6675
|
|
|
|
|
|
|
{"TagGender", PER_TAG, ANY_OFFSET, TAG_GENDER, MAP_TAG_GENDER}, |
6676
|
|
|
|
|
|
|
{"TagNumber", PER_TAG, ANY_OFFSET, TAG_NUMBER, MAP_TAG_NUMBER}, |
6677
|
|
|
|
|
|
|
{"TagNegative", PER_TAG, ANY_OFFSET, TAG_NEGATIVE, MAP_TAG_NEGATIVE}, |
6678
|
|
|
|
|
|
|
{"TagPerson", PER_TAG, ANY_OFFSET, TAG_PERSON, MAP_TAG_PERSON}, |
6679
|
|
|
|
|
|
|
{"Lemma", PER_TAG, ANY_OFFSET, LEMMA, MAP_LEMMA}, |
6680
|
|
|
|
|
|
|
|
6681
|
|
|
|
|
|
|
{"PreviousVerbTag", DYNAMIC, ANY_OFFSET, PREVIOUS_VERB_TAG, MAP_TAG}, |
6682
|
|
|
|
|
|
|
{"PreviousVerbForm", DYNAMIC, ANY_OFFSET, PREVIOUS_VERB_FORM, MAP_FORM}, |
6683
|
|
|
|
|
|
|
}; |
6684
|
|
|
|
|
|
|
|
6685
|
|
|
|
|
|
|
template |
6686
|
0
|
|
|
|
|
|
void conllu_elementary_features |
6687
|
|
|
|
|
|
|
using namespace unilib; |
6688
|
|
|
|
|
|
|
|
6689
|
|
|
|
|
|
|
// We process the sentence in reverse order, so that we can compute FollowingVerbTag and FollowingVerbLemma directly. |
6690
|
|
|
|
|
|
|
elementary_feature_value following_verb_tag = elementary_feature_empty, following_verb_form = elementary_feature_empty; |
6691
|
0
|
0
|
|
|
|
|
for (unsigned i = forms.size(); i--;) { |
6692
|
|
|
|
|
|
|
int verb_candidate = -1; |
6693
|
|
|
|
|
|
|
|
6694
|
|
|
|
|
|
|
// Per_tag features and verb_candidate |
6695
|
0
|
0
|
|
|
|
|
for (unsigned j = 0; j < analyses[i].size(); j++) { |
6696
|
0
|
|
|
|
|
|
const string& tag = analyses[i][j].tag; |
6697
|
0
|
|
|
|
|
|
const string& lemma = analyses[i][j].lemma; |
6698
|
|
|
|
|
|
|
|
6699
|
|
|
|
|
|
|
// Tag consists of three parts separated by tag[0] character |
6700
|
|
|
|
|
|
|
// - first is TAG_UPOS, |
6701
|
|
|
|
|
|
|
// - second is TAG_LPOS, |
6702
|
|
|
|
|
|
|
// - then there is any number of | separated named fields in format Name=Value |
6703
|
0
|
|
|
|
|
|
per_tag[i][j].values[TAG] = maps[MAP_TAG].value(tag.c_str(), tag.size()); |
6704
|
0
|
|
|
|
|
|
per_tag[i][j].values[TAG_UPOS] = per_tag[i][j].values[TAG_CASE] = per_tag[i][j].values[TAG_GENDER] = elementary_feature_empty; |
6705
|
0
|
|
|
|
|
|
per_tag[i][j].values[TAG_NUMBER] = per_tag[i][j].values[TAG_NEGATIVE] = per_tag[i][j].values[TAG_PERSON] = elementary_feature_empty; |
6706
|
0
|
0
|
|
|
|
|
per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == lemma ? per_tag[i][j-1].values[LEMMA] : |
|
|
0
|
|
|
|
|
|
6707
|
|
|
|
|
|
|
maps[MAP_LEMMA].value(lemma.c_str(), lemma.size()); |
6708
|
|
|
|
|
|
|
|
6709
|
0
|
|
|
|
|
|
char separator = tag[0]; |
6710
|
0
|
|
|
|
|
|
size_t index = tag.find(separator, 1); |
6711
|
0
|
0
|
|
|
|
|
if (index == string::npos) index = tag.size(); |
6712
|
0
|
0
|
|
|
|
|
per_tag[i][j].values[TAG_UPOS] = maps[MAP_TAG_UPOS].value(tag.c_str() + (index ? 1 : 0), index - (index ? 1 : 0)); |
|
|
0
|
|
|
|
|
|
6713
|
|
|
|
|
|
|
|
6714
|
0
|
0
|
|
|
|
|
if (index < tag.size()) index++; |
6715
|
0
|
0
|
|
|
|
|
if (index < tag.size()) index = tag.find(separator, index); |
6716
|
0
|
0
|
|
|
|
|
if (index < tag.size()) index++; |
6717
|
0
|
0
|
|
|
|
|
for (size_t length; index < tag.size(); index += length + 1) { |
6718
|
0
|
|
|
|
|
|
length = tag.find('|', index); |
6719
|
0
|
0
|
|
|
|
|
length = (length == string::npos ? tag.size() : length) - index; |
6720
|
|
|
|
|
|
|
|
6721
|
0
|
0
|
|
|
|
|
for (size_t equal_sign = 0; equal_sign + 1 < length; equal_sign++) |
6722
|
0
|
0
|
|
|
|
|
if (tag[index + equal_sign] == '=') { |
6723
|
|
|
|
|
|
|
int value = -1, map; |
6724
|
0
|
|
|
|
|
|
switch (equal_sign) { |
6725
|
|
|
|
|
|
|
case 4: |
6726
|
0
|
0
|
|
|
|
|
if (tag.compare(index, equal_sign, "Case") == 0) value = TAG_CASE, map = MAP_TAG_CASE; |
6727
|
|
|
|
|
|
|
break; |
6728
|
|
|
|
|
|
|
case 6: |
6729
|
0
|
0
|
|
|
|
|
if (tag.compare(index, equal_sign, "Gender") == 0) value = TAG_GENDER, map = MAP_TAG_GENDER; |
6730
|
0
|
0
|
|
|
|
|
if (tag.compare(index, equal_sign, "Number") == 0) value = TAG_NUMBER, map = MAP_TAG_NUMBER; |
6731
|
0
|
0
|
|
|
|
|
if (tag.compare(index, equal_sign, "Person") == 0) value = TAG_PERSON, map = MAP_TAG_PERSON; |
6732
|
|
|
|
|
|
|
break; |
6733
|
|
|
|
|
|
|
case 8: |
6734
|
0
|
0
|
|
|
|
|
if (tag.compare(index, equal_sign, "Negative") == 0) value = TAG_NEGATIVE, map = MAP_TAG_NEGATIVE; |
6735
|
|
|
|
|
|
|
break; |
6736
|
|
|
|
|
|
|
} |
6737
|
|
|
|
|
|
|
|
6738
|
0
|
0
|
|
|
|
|
if (value >= 0) |
6739
|
0
|
|
|
|
|
|
per_tag[i][j].values[value] = maps[map].value(tag.c_str() + index + equal_sign + 1, length - equal_sign - 1); |
6740
|
|
|
|
|
|
|
break; |
6741
|
|
|
|
|
|
|
} |
6742
|
|
|
|
|
|
|
} |
6743
|
|
|
|
|
|
|
|
6744
|
0
|
0
|
|
|
|
|
if (tag.size() >= 2 && tag[1] == 'V') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6745
|
|
|
|
|
|
|
int tag_compare; |
6746
|
0
|
0
|
|
|
|
|
verb_candidate = verb_candidate < 0 || (tag_compare = tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate; |
|
|
0
|
|
|
|
|
|
6747
|
|
|
|
|
|
|
} |
6748
|
|
|
|
|
|
|
} |
6749
|
|
|
|
|
|
|
|
6750
|
|
|
|
|
|
|
// Per_form features |
6751
|
0
|
|
|
|
|
|
per_form[i].values[FORM] = maps[MAP_FORM].value(forms[i].str, forms[i].len); |
6752
|
0
|
|
|
|
|
|
per_form[i].values[FOLLOWING_VERB_TAG] = following_verb_tag; |
6753
|
0
|
|
|
|
|
|
per_form[i].values[FOLLOWING_VERB_FORM] = following_verb_form; |
6754
|
|
|
|
|
|
|
|
6755
|
|
|
|
|
|
|
// Update following_verb_{tag,lemma} _after_ filling FOLLOWING_VERB_{TAG,LEMMA}. |
6756
|
0
|
0
|
|
|
|
|
if (verb_candidate >= 0) { |
6757
|
0
|
|
|
|
|
|
following_verb_tag = per_tag[i][verb_candidate].values[TAG]; |
6758
|
0
|
|
|
|
|
|
following_verb_form = per_form[i].values[FORM]; |
6759
|
|
|
|
|
|
|
} |
6760
|
|
|
|
|
|
|
|
6761
|
|
|
|
|
|
|
// Ortographic per_form features if needed |
6762
|
0
|
0
|
|
|
|
|
if (analyses[i].size() == 1) { |
6763
|
0
|
|
|
|
|
|
per_form[i].values[NUM] = per_form[i].values[CAP] = per_form[i].values[DASH] = elementary_feature_unknown; |
6764
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX1] = per_form[i].values[PREFIX2] = per_form[i].values[PREFIX3] = elementary_feature_unknown; |
6765
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX4] = per_form[i].values[PREFIX5] = per_form[i].values[PREFIX6] = elementary_feature_unknown; |
6766
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX7] = per_form[i].values[PREFIX8] = per_form[i].values[PREFIX9] = elementary_feature_unknown; |
6767
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = per_form[i].values[SUFFIX2] = per_form[i].values[SUFFIX3] = elementary_feature_unknown; |
6768
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX4] = per_form[i].values[SUFFIX5] = per_form[i].values[SUFFIX6] = elementary_feature_unknown; |
6769
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX7] = per_form[i].values[SUFFIX8] = per_form[i].values[SUFFIX9] = elementary_feature_unknown; |
6770
|
0
|
0
|
|
|
|
|
} else if (forms[i].len <= 0) { |
6771
|
0
|
|
|
|
|
|
per_form[i].values[NUM] = per_form[i].values[CAP] = per_form[i].values[DASH] = elementary_feature_empty + 1; |
6772
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX1] = per_form[i].values[PREFIX2] = per_form[i].values[PREFIX3] = elementary_feature_empty; |
6773
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX4] = per_form[i].values[PREFIX5] = per_form[i].values[PREFIX6] = elementary_feature_empty; |
6774
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX7] = per_form[i].values[PREFIX8] = per_form[i].values[PREFIX9] = elementary_feature_empty; |
6775
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = per_form[i].values[SUFFIX2] = per_form[i].values[SUFFIX3] = elementary_feature_empty; |
6776
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX4] = per_form[i].values[SUFFIX5] = per_form[i].values[SUFFIX6] = elementary_feature_empty; |
6777
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX7] = per_form[i].values[SUFFIX8] = per_form[i].values[SUFFIX9] = elementary_feature_empty; |
6778
|
|
|
|
|
|
|
} else { |
6779
|
0
|
|
|
|
|
|
string_piece form = forms[i]; |
6780
|
0
|
|
|
|
|
|
const char* form_start = form.str; |
6781
|
|
|
|
|
|
|
|
6782
|
|
|
|
|
|
|
bool num = false, cap = false, dash = false; |
6783
|
0
|
|
|
|
|
|
size_t indices[18] = {0, form.len, form.len, form.len, form.len, form.len, form.len, form.len, form.len, form.len, 0, 0, 0, 0, 0, 0, 0, 0}; // careful here regarding forms shorter than 9 characters |
6784
|
|
|
|
|
|
|
int index = 0; |
6785
|
0
|
0
|
|
|
|
|
while (form.len) { |
6786
|
0
|
|
|
|
|
|
indices[(index++) % 18] = form.str - form_start; |
6787
|
|
|
|
|
|
|
|
6788
|
0
|
|
|
|
|
|
unicode::category_t cat = unicode::category(utf8::decode(form.str, form.len)); |
6789
|
0
|
0
|
|
|
|
|
num = num || cat & unicode::N; |
|
|
0
|
|
|
|
|
|
6790
|
0
|
0
|
|
|
|
|
cap = cap || cat & unicode::Lut; |
|
|
0
|
|
|
|
|
|
6791
|
0
|
0
|
|
|
|
|
dash = dash || cat & unicode::Pd; |
|
|
0
|
|
|
|
|
|
6792
|
|
|
|
|
|
|
|
6793
|
0
|
0
|
|
|
|
|
if (index == 10 || (!form.len && index < 10)) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6794
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX1] = maps[MAP_PREFIX1].value(form_start, indices[1]); |
6795
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX2] = maps[MAP_PREFIX2].value(form_start, indices[2]); |
6796
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX3] = maps[MAP_PREFIX3].value(form_start, indices[3]); |
6797
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX4] = maps[MAP_PREFIX4].value(form_start, indices[4]); |
6798
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX5] = maps[MAP_PREFIX5].value(form_start, indices[5]); |
6799
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX6] = maps[MAP_PREFIX6].value(form_start, indices[6]); |
6800
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX7] = maps[MAP_PREFIX7].value(form_start, indices[7]); |
6801
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX8] = maps[MAP_PREFIX8].value(form_start, indices[8]); |
6802
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX9] = maps[MAP_PREFIX9].value(form_start, indices[9]); |
6803
|
|
|
|
|
|
|
} |
6804
|
|
|
|
|
|
|
} |
6805
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = maps[MAP_SUFFIX1].value(form_start + indices[(index+18-1) % 18], form.str - form_start - indices[(index+18-1) % 18]); |
6806
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX2] = maps[MAP_SUFFIX2].value(form_start + indices[(index+18-2) % 18], form.str - form_start - indices[(index+18-2) % 18]); |
6807
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX3] = maps[MAP_SUFFIX3].value(form_start + indices[(index+18-3) % 18], form.str - form_start - indices[(index+18-3) % 18]); |
6808
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX4] = maps[MAP_SUFFIX4].value(form_start + indices[(index+18-4) % 18], form.str - form_start - indices[(index+18-4) % 18]); |
6809
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX5] = maps[MAP_SUFFIX5].value(form_start + indices[(index+18-5) % 18], form.str - form_start - indices[(index+18-5) % 18]); |
6810
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX6] = maps[MAP_SUFFIX6].value(form_start + indices[(index+18-6) % 18], form.str - form_start - indices[(index+18-6) % 18]); |
6811
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX7] = maps[MAP_SUFFIX7].value(form_start + indices[(index+18-7) % 18], form.str - form_start - indices[(index+18-7) % 18]); |
6812
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX8] = maps[MAP_SUFFIX8].value(form_start + indices[(index+18-8) % 18], form.str - form_start - indices[(index+18-8) % 18]); |
6813
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX9] = maps[MAP_SUFFIX9].value(form_start + indices[(index+18-9) % 18], form.str - form_start - indices[(index+18-9) % 18]); |
6814
|
0
|
|
|
|
|
|
per_form[i].values[NUM] = elementary_feature_empty + 1 + num; |
6815
|
0
|
|
|
|
|
|
per_form[i].values[CAP] = elementary_feature_empty + 1 + cap; |
6816
|
0
|
|
|
|
|
|
per_form[i].values[DASH] = elementary_feature_empty + 1 + dash; |
6817
|
|
|
|
|
|
|
} |
6818
|
|
|
|
|
|
|
} |
6819
|
0
|
|
|
|
|
|
} |
6820
|
|
|
|
|
|
|
|
6821
|
|
|
|
|
|
|
template |
6822
|
|
|
|
|
|
|
void conllu_elementary_features |
6823
|
0
|
0
|
|
|
|
|
if (prev_dynamic) { |
6824
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_TAG] = prev_dynamic->values[PREVIOUS_OR_CURRENT_VERB_TAG]; |
6825
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_FORM] = prev_dynamic->values[PREVIOUS_OR_CURRENT_VERB_FORM]; |
6826
|
|
|
|
|
|
|
} else { |
6827
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_TAG] = elementary_feature_empty; |
6828
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_FORM] = elementary_feature_empty; |
6829
|
|
|
|
|
|
|
} |
6830
|
|
|
|
|
|
|
|
6831
|
0
|
0
|
|
|
|
|
if (tag.tag.size() >= 2 && tag.tag[1] == 'V') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6832
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_TAG] = per_tag.values[TAG]; |
6833
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_FORM] = per_form.values[FORM]; |
6834
|
|
|
|
|
|
|
} else { |
6835
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_TAG] = dynamic.values[PREVIOUS_VERB_TAG]; |
6836
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_FORM] = dynamic.values[PREVIOUS_VERB_FORM]; |
6837
|
|
|
|
|
|
|
} |
6838
|
|
|
|
|
|
|
} |
6839
|
|
|
|
|
|
|
|
6840
|
|
|
|
|
|
|
///////// |
6841
|
|
|
|
|
|
|
// File: tagger/czech_elementary_features.h |
6842
|
|
|
|
|
|
|
///////// |
6843
|
|
|
|
|
|
|
|
6844
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
6845
|
|
|
|
|
|
|
// |
6846
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
6847
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
6848
|
|
|
|
|
|
|
// |
6849
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
6850
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
6851
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
6852
|
|
|
|
|
|
|
|
6853
|
|
|
|
|
|
|
// Declarations |
6854
|
|
|
|
|
|
|
template |
6855
|
0
|
|
|
|
|
|
class czech_elementary_features : public elementary_features |
6856
|
|
|
|
|
|
|
public: |
6857
|
|
|
|
|
|
|
czech_elementary_features(); |
6858
|
|
|
|
|
|
|
|
6859
|
|
|
|
|
|
|
enum features_per_form { FORM, FOLLOWING_VERB_TAG, FOLLOWING_VERB_LEMMA, NUM, CAP, DASH, PREFIX1, PREFIX2, PREFIX3, PREFIX4, SUFFIX1, SUFFIX2, SUFFIX3, SUFFIX4, PER_FORM_TOTAL }; |
6860
|
|
|
|
|
|
|
enum features_per_tag { TAG, TAG3, TAG5, TAG25, LEMMA, PER_TAG_TOTAL }; |
6861
|
|
|
|
|
|
|
enum features_dynamic { PREVIOUS_VERB_TAG, PREVIOUS_VERB_LEMMA, PREVIOUS_OR_CURRENT_VERB_TAG, PREVIOUS_OR_CURRENT_VERB_LEMMA, DYNAMIC_TOTAL }; |
6862
|
|
|
|
|
|
|
enum features_map { MAP_NONE = -1, MAP_FORM, MAP_LEMMA, MAP_PREFIX1, MAP_PREFIX2, MAP_PREFIX3, MAP_PREFIX4, MAP_SUFFIX1, MAP_SUFFIX2, MAP_SUFFIX3, MAP_SUFFIX4, MAP_TAG, MAP_TAG3, MAP_TAG5, MAP_TAG25, MAP_TOTAL } ; |
6863
|
|
|
|
|
|
|
|
6864
|
|
|
|
|
|
|
struct per_form_features { elementary_feature_value values[PER_FORM_TOTAL]; }; |
6865
|
|
|
|
|
|
|
struct per_tag_features { elementary_feature_value values[PER_TAG_TOTAL]; }; |
6866
|
|
|
|
|
|
|
struct dynamic_features { elementary_feature_value values[DYNAMIC_TOTAL]; }; |
6867
|
|
|
|
|
|
|
|
6868
|
|
|
|
|
|
|
static vector descriptions; |
6869
|
|
|
|
|
|
|
|
6870
|
|
|
|
|
|
|
void compute_features(const vector& forms, const vector>& analyses, vector& per_form, vector>& per_tag) const; |
6871
|
|
|
|
|
|
|
inline void compute_dynamic_features(const tagged_lemma& tag, const per_form_features& per_form, const per_tag_features& per_tag, const dynamic_features* prev_dynamic, dynamic_features& dynamic) const; |
6872
|
|
|
|
|
|
|
|
6873
|
|
|
|
|
|
|
using elementary_features |
6874
|
|
|
|
|
|
|
}; |
6875
|
|
|
|
|
|
|
|
6876
|
|
|
|
|
|
|
typedef czech_elementary_features persistent_czech_elementary_features; |
6877
|
|
|
|
|
|
|
|
6878
|
|
|
|
|
|
|
// Definitions |
6879
|
|
|
|
|
|
|
template |
6880
|
0
|
|
|
|
|
|
czech_elementary_features |
6881
|
0
|
0
|
|
|
|
|
maps.resize(MAP_TOTAL); |
6882
|
0
|
|
|
|
|
|
} |
6883
|
|
|
|
|
|
|
|
6884
|
|
|
|
|
|
|
template |
6885
|
|
|
|
|
|
|
vector czech_elementary_features |
6886
|
|
|
|
|
|
|
{"Form", PER_FORM, ANY_OFFSET, FORM, MAP_FORM}, |
6887
|
|
|
|
|
|
|
{"FollowingVerbTag", PER_FORM, ANY_OFFSET, FOLLOWING_VERB_TAG, MAP_TAG}, |
6888
|
|
|
|
|
|
|
{"FollowingVerbLemma", PER_FORM, ANY_OFFSET, FOLLOWING_VERB_LEMMA, MAP_LEMMA }, |
6889
|
|
|
|
|
|
|
{"Num", PER_FORM, ONLY_CURRENT, NUM, MAP_NONE}, |
6890
|
|
|
|
|
|
|
{"Cap", PER_FORM, ONLY_CURRENT, CAP, MAP_NONE}, |
6891
|
|
|
|
|
|
|
{"Dash", PER_FORM, ONLY_CURRENT, DASH, MAP_NONE}, |
6892
|
|
|
|
|
|
|
{"Prefix1", PER_FORM, ONLY_CURRENT, PREFIX1, MAP_PREFIX1}, |
6893
|
|
|
|
|
|
|
{"Prefix2", PER_FORM, ONLY_CURRENT, PREFIX2, MAP_PREFIX2}, |
6894
|
|
|
|
|
|
|
{"Prefix3", PER_FORM, ONLY_CURRENT, PREFIX3, MAP_PREFIX3}, |
6895
|
|
|
|
|
|
|
{"Prefix4", PER_FORM, ONLY_CURRENT, PREFIX4, MAP_PREFIX4}, |
6896
|
|
|
|
|
|
|
{"Suffix1", PER_FORM, ONLY_CURRENT, SUFFIX1, MAP_SUFFIX1}, |
6897
|
|
|
|
|
|
|
{"Suffix2", PER_FORM, ONLY_CURRENT, SUFFIX2, MAP_SUFFIX2}, |
6898
|
|
|
|
|
|
|
{"Suffix3", PER_FORM, ONLY_CURRENT, SUFFIX3, MAP_SUFFIX3}, |
6899
|
|
|
|
|
|
|
{"Suffix4", PER_FORM, ONLY_CURRENT, SUFFIX4, MAP_SUFFIX4}, |
6900
|
|
|
|
|
|
|
|
6901
|
|
|
|
|
|
|
{"Tag", PER_TAG, ANY_OFFSET, TAG, MAP_TAG}, |
6902
|
|
|
|
|
|
|
{"Tag3", PER_TAG, ANY_OFFSET, TAG3, MAP_TAG3}, |
6903
|
|
|
|
|
|
|
{"Tag5", PER_TAG, ANY_OFFSET, TAG5, MAP_TAG5}, |
6904
|
|
|
|
|
|
|
{"Tag25", PER_TAG, ANY_OFFSET, TAG25, MAP_TAG25}, |
6905
|
|
|
|
|
|
|
{"Lemma", PER_TAG, ANY_OFFSET, LEMMA, MAP_LEMMA}, |
6906
|
|
|
|
|
|
|
|
6907
|
|
|
|
|
|
|
{"PreviousVerbTag", DYNAMIC, ANY_OFFSET, PREVIOUS_VERB_TAG, MAP_TAG}, |
6908
|
|
|
|
|
|
|
{"PreviousVerbLemma", DYNAMIC, ANY_OFFSET, PREVIOUS_VERB_LEMMA, MAP_LEMMA} |
6909
|
|
|
|
|
|
|
}; |
6910
|
|
|
|
|
|
|
|
6911
|
|
|
|
|
|
|
template |
6912
|
0
|
|
|
|
|
|
void czech_elementary_features |
6913
|
|
|
|
|
|
|
using namespace unilib; |
6914
|
|
|
|
|
|
|
|
6915
|
|
|
|
|
|
|
// We process the sentence in reverse order, so that we can compute FollowingVerbTag and FollowingVerbLemma directly. |
6916
|
|
|
|
|
|
|
elementary_feature_value following_verb_tag = elementary_feature_empty, following_verb_lemma = elementary_feature_empty; |
6917
|
0
|
0
|
|
|
|
|
for (unsigned i = forms.size(); i--;) { |
6918
|
|
|
|
|
|
|
int verb_candidate = -1; |
6919
|
|
|
|
|
|
|
|
6920
|
|
|
|
|
|
|
// Per_tag features and verb_candidate |
6921
|
0
|
0
|
|
|
|
|
for (unsigned j = 0; j < analyses[i].size(); j++) { |
6922
|
|
|
|
|
|
|
char tag25[2]; |
6923
|
0
|
|
|
|
|
|
per_tag[i][j].values[TAG] = maps[MAP_TAG].value(analyses[i][j].tag.c_str(), analyses[i][j].tag.size()); |
6924
|
0
|
0
|
|
|
|
|
per_tag[i][j].values[TAG3] = analyses[i][j].tag.size() >= 3 ? maps[MAP_TAG3].value(analyses[i][j].tag.c_str() + 2, 1) : elementary_feature_empty; |
6925
|
0
|
0
|
|
|
|
|
per_tag[i][j].values[TAG5] = analyses[i][j].tag.size() >= 5 ? maps[MAP_TAG5].value(analyses[i][j].tag.c_str() + 4, 1) : elementary_feature_empty; |
6926
|
0
|
0
|
|
|
|
|
per_tag[i][j].values[TAG25] = analyses[i][j].tag.size() >= 5 ? maps[MAP_TAG25].value((tag25[0] = analyses[i][j].tag[1], tag25[1] = analyses[i][j].tag[4], tag25), 2) : elementary_feature_empty; |
6927
|
0
|
0
|
|
|
|
|
per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == analyses[i][j].lemma ? per_tag[i][j-1].values[LEMMA] : |
|
|
0
|
|
|
|
|
|
6928
|
|
|
|
|
|
|
maps[MAP_LEMMA].value(analyses[i][j].lemma.c_str(), analyses[i][j].lemma.size()); |
6929
|
|
|
|
|
|
|
|
6930
|
0
|
0
|
|
|
|
|
if (analyses[i][j].tag[0] == 'V') { |
6931
|
|
|
|
|
|
|
int tag_compare; |
6932
|
0
|
0
|
|
|
|
|
verb_candidate = verb_candidate < 0 || (tag_compare = analyses[i][j].tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && analyses[i][j].lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate; |
|
|
0
|
|
|
|
|
|
6933
|
|
|
|
|
|
|
} |
6934
|
|
|
|
|
|
|
} |
6935
|
|
|
|
|
|
|
|
6936
|
|
|
|
|
|
|
// Per_form features |
6937
|
0
|
|
|
|
|
|
per_form[i].values[FORM] = maps[MAP_FORM].value(forms[i].str, forms[i].len); |
6938
|
0
|
|
|
|
|
|
per_form[i].values[FOLLOWING_VERB_TAG] = following_verb_tag; |
6939
|
0
|
|
|
|
|
|
per_form[i].values[FOLLOWING_VERB_LEMMA] = following_verb_lemma; |
6940
|
|
|
|
|
|
|
|
6941
|
|
|
|
|
|
|
// Update following_verb_{tag,lemma} _after_ filling FOLLOWING_VERB_{TAG,LEMMA}. |
6942
|
0
|
0
|
|
|
|
|
if (verb_candidate >= 0) { |
6943
|
0
|
|
|
|
|
|
following_verb_tag = per_tag[i][verb_candidate].values[TAG]; |
6944
|
0
|
|
|
|
|
|
following_verb_lemma = per_tag[i][verb_candidate].values[LEMMA]; |
6945
|
|
|
|
|
|
|
} |
6946
|
|
|
|
|
|
|
|
6947
|
|
|
|
|
|
|
// Ortographic per_form features if needed |
6948
|
0
|
0
|
|
|
|
|
if (analyses[i].size() == 1) { |
6949
|
0
|
|
|
|
|
|
per_form[i].values[NUM] = per_form[i].values[CAP] = per_form[i].values[DASH] = elementary_feature_unknown; |
6950
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX1] = per_form[i].values[PREFIX2] = per_form[i].values[PREFIX3] = per_form[i].values[PREFIX4] = elementary_feature_unknown; |
6951
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = per_form[i].values[SUFFIX2] = per_form[i].values[SUFFIX3] = per_form[i].values[SUFFIX4] = elementary_feature_unknown; |
6952
|
0
|
0
|
|
|
|
|
} else if (forms[i].len <= 0) { |
6953
|
0
|
|
|
|
|
|
per_form[i].values[NUM] = per_form[i].values[CAP] = per_form[i].values[DASH] = elementary_feature_empty + 1; |
6954
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX1] = per_form[i].values[PREFIX2] = per_form[i].values[PREFIX3] = per_form[i].values[PREFIX4] = elementary_feature_empty; |
6955
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = per_form[i].values[SUFFIX2] = per_form[i].values[SUFFIX3] = per_form[i].values[SUFFIX4] = elementary_feature_empty; |
6956
|
|
|
|
|
|
|
} else { |
6957
|
0
|
|
|
|
|
|
string_piece form = forms[i]; |
6958
|
0
|
|
|
|
|
|
const char* form_start = form.str; |
6959
|
|
|
|
|
|
|
|
6960
|
|
|
|
|
|
|
bool num = false, cap = false, dash = false; |
6961
|
0
|
|
|
|
|
|
size_t indices[8] = {0, form.len, form.len, form.len, form.len, 0, 0, 0}; // careful here regarding forms shorter than 4 characters |
6962
|
|
|
|
|
|
|
int index = 0; |
6963
|
0
|
0
|
|
|
|
|
while (form.len) { |
6964
|
0
|
|
|
|
|
|
indices[(index++)&7] = form.str - form_start; |
6965
|
|
|
|
|
|
|
|
6966
|
0
|
|
|
|
|
|
unicode::category_t cat = unicode::category(utf8::decode(form.str, form.len)); |
6967
|
0
|
0
|
|
|
|
|
num = num || cat & unicode::N; |
|
|
0
|
|
|
|
|
|
6968
|
0
|
0
|
|
|
|
|
cap = cap || cat & unicode::Lut; |
|
|
0
|
|
|
|
|
|
6969
|
0
|
0
|
|
|
|
|
dash = dash || cat & unicode::Pd; |
|
|
0
|
|
|
|
|
|
6970
|
|
|
|
|
|
|
|
6971
|
0
|
0
|
|
|
|
|
if (index == 5 || (!form.len && index < 5)) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
6972
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX1] = maps[MAP_PREFIX1].value(form_start, indices[1]); |
6973
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX2] = maps[MAP_PREFIX2].value(form_start, indices[2]); |
6974
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX3] = maps[MAP_PREFIX3].value(form_start, indices[3]); |
6975
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX4] = maps[MAP_PREFIX4].value(form_start, indices[4]); |
6976
|
|
|
|
|
|
|
} |
6977
|
|
|
|
|
|
|
} |
6978
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = maps[MAP_SUFFIX1].value(form_start + indices[(index-1)&7], form.str - form_start - indices[(index-1)&7]); |
6979
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX2] = maps[MAP_SUFFIX2].value(form_start + indices[(index-2)&7], form.str - form_start - indices[(index-2)&7]); |
6980
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX3] = maps[MAP_SUFFIX3].value(form_start + indices[(index-3)&7], form.str - form_start - indices[(index-3)&7]); |
6981
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX4] = maps[MAP_SUFFIX4].value(form_start + indices[(index-4)&7], form.str - form_start - indices[(index-4)&7]); |
6982
|
0
|
|
|
|
|
|
per_form[i].values[NUM] = elementary_feature_empty + 1 + num; |
6983
|
0
|
|
|
|
|
|
per_form[i].values[CAP] = elementary_feature_empty + 1 + cap; |
6984
|
0
|
|
|
|
|
|
per_form[i].values[DASH] = elementary_feature_empty + 1 + dash; |
6985
|
|
|
|
|
|
|
} |
6986
|
|
|
|
|
|
|
} |
6987
|
0
|
|
|
|
|
|
} |
6988
|
|
|
|
|
|
|
|
6989
|
|
|
|
|
|
|
template |
6990
|
|
|
|
|
|
|
void czech_elementary_features |
6991
|
0
|
0
|
|
|
|
|
if (prev_dynamic) { |
6992
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_TAG] = prev_dynamic->values[PREVIOUS_OR_CURRENT_VERB_TAG]; |
6993
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_LEMMA] = prev_dynamic->values[PREVIOUS_OR_CURRENT_VERB_LEMMA]; |
6994
|
|
|
|
|
|
|
} else { |
6995
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_TAG] = elementary_feature_empty; |
6996
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_LEMMA] = elementary_feature_empty; |
6997
|
|
|
|
|
|
|
} |
6998
|
|
|
|
|
|
|
|
6999
|
0
|
0
|
|
|
|
|
if (tag.tag[0] == 'V') { |
7000
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_TAG] = per_tag.values[TAG]; |
7001
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_LEMMA] = per_tag.values[LEMMA]; |
7002
|
|
|
|
|
|
|
} else { |
7003
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_TAG] = dynamic.values[PREVIOUS_VERB_TAG]; |
7004
|
0
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_LEMMA] = dynamic.values[PREVIOUS_VERB_LEMMA]; |
7005
|
|
|
|
|
|
|
} |
7006
|
|
|
|
|
|
|
} |
7007
|
|
|
|
|
|
|
|
7008
|
|
|
|
|
|
|
///////// |
7009
|
|
|
|
|
|
|
// File: tagger/generic_elementary_features.h |
7010
|
|
|
|
|
|
|
///////// |
7011
|
|
|
|
|
|
|
|
7012
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
7013
|
|
|
|
|
|
|
// |
7014
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
7015
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
7016
|
|
|
|
|
|
|
// |
7017
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
7018
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
7019
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
7020
|
|
|
|
|
|
|
|
7021
|
|
|
|
|
|
|
// Declarations |
7022
|
|
|
|
|
|
|
template |
7023
|
0
|
|
|
|
|
|
class generic_elementary_features : public elementary_features |
7024
|
|
|
|
|
|
|
public: |
7025
|
|
|
|
|
|
|
generic_elementary_features(); |
7026
|
|
|
|
|
|
|
|
7027
|
|
|
|
|
|
|
enum features_per_form { FORM, FOLLOWING_VERB_TAG, FOLLOWING_VERB_LEMMA, NUM, CAP, DASH, PREFIX1, PREFIX2, PREFIX3, PREFIX4, PREFIX5, PREFIX6, PREFIX7, PREFIX8, PREFIX9, SUFFIX1, SUFFIX2, SUFFIX3, SUFFIX4, SUFFIX5, SUFFIX6, SUFFIX7, SUFFIX8, SUFFIX9, PER_FORM_TOTAL }; |
7028
|
|
|
|
|
|
|
enum features_per_tag { TAG, TAG1, TAG2, TAG3, TAG4, TAG5, LEMMA, PER_TAG_TOTAL }; |
7029
|
|
|
|
|
|
|
enum features_dynamic { PREVIOUS_VERB_TAG, PREVIOUS_VERB_LEMMA, PREVIOUS_OR_CURRENT_VERB_TAG, PREVIOUS_OR_CURRENT_VERB_LEMMA, DYNAMIC_TOTAL }; |
7030
|
|
|
|
|
|
|
enum features_map { MAP_NONE = -1, MAP_FORM, MAP_PREFIX1, MAP_PREFIX2, MAP_PREFIX3, MAP_PREFIX4, MAP_PREFIX5, MAP_PREFIX6, MAP_PREFIX7, MAP_PREFIX8, MAP_PREFIX9, MAP_SUFFIX1, MAP_SUFFIX2, MAP_SUFFIX3, MAP_SUFFIX4, MAP_SUFFIX5, MAP_SUFFIX6, MAP_SUFFIX7, MAP_SUFFIX8, MAP_SUFFIX9, MAP_TAG, MAP_TAG1, MAP_TAG2, MAP_TAG3, MAP_TAG4, MAP_TAG5, MAP_LEMMA, MAP_TOTAL } ; |
7031
|
|
|
|
|
|
|
|
7032
|
|
|
|
|
|
|
struct per_form_features { elementary_feature_value values[PER_FORM_TOTAL]; }; |
7033
|
|
|
|
|
|
|
struct per_tag_features { elementary_feature_value values[PER_TAG_TOTAL]; }; |
7034
|
|
|
|
|
|
|
struct dynamic_features { elementary_feature_value values[DYNAMIC_TOTAL]; }; |
7035
|
|
|
|
|
|
|
|
7036
|
|
|
|
|
|
|
static vector descriptions; |
7037
|
|
|
|
|
|
|
|
7038
|
|
|
|
|
|
|
void compute_features(const vector& forms, const vector>& analyses, vector& per_form, vector>& per_tag) const; |
7039
|
|
|
|
|
|
|
inline void compute_dynamic_features(const tagged_lemma& tag, const per_form_features& per_form, const per_tag_features& per_tag, const dynamic_features* prev_dynamic, dynamic_features& dynamic) const; |
7040
|
|
|
|
|
|
|
|
7041
|
|
|
|
|
|
|
using elementary_features |
7042
|
|
|
|
|
|
|
}; |
7043
|
|
|
|
|
|
|
|
7044
|
|
|
|
|
|
|
typedef generic_elementary_features persistent_generic_elementary_features; |
7045
|
|
|
|
|
|
|
|
7046
|
|
|
|
|
|
|
// Definitions |
7047
|
|
|
|
|
|
|
template |
7048
|
1
|
|
|
|
|
|
generic_elementary_features |
7049
|
1
|
50
|
|
|
|
|
maps.resize(MAP_TOTAL); |
7050
|
1
|
|
|
|
|
|
} |
7051
|
|
|
|
|
|
|
|
7052
|
|
|
|
|
|
|
template |
7053
|
|
|
|
|
|
|
vector generic_elementary_features |
7054
|
|
|
|
|
|
|
{"Form", PER_FORM, ANY_OFFSET, FORM, MAP_FORM}, |
7055
|
|
|
|
|
|
|
{"FollowingVerbTag", PER_FORM, ANY_OFFSET, FOLLOWING_VERB_TAG, MAP_TAG}, |
7056
|
|
|
|
|
|
|
{"FollowingVerbLemma", PER_FORM, ANY_OFFSET, FOLLOWING_VERB_LEMMA, MAP_LEMMA }, |
7057
|
|
|
|
|
|
|
{"Num", PER_FORM, ONLY_CURRENT, NUM, MAP_NONE}, |
7058
|
|
|
|
|
|
|
{"Cap", PER_FORM, ONLY_CURRENT, CAP, MAP_NONE}, |
7059
|
|
|
|
|
|
|
{"Dash", PER_FORM, ONLY_CURRENT, DASH, MAP_NONE}, |
7060
|
|
|
|
|
|
|
{"Prefix1", PER_FORM, ONLY_CURRENT, PREFIX1, MAP_PREFIX1}, |
7061
|
|
|
|
|
|
|
{"Prefix2", PER_FORM, ONLY_CURRENT, PREFIX2, MAP_PREFIX2}, |
7062
|
|
|
|
|
|
|
{"Prefix3", PER_FORM, ONLY_CURRENT, PREFIX3, MAP_PREFIX3}, |
7063
|
|
|
|
|
|
|
{"Prefix4", PER_FORM, ONLY_CURRENT, PREFIX4, MAP_PREFIX4}, |
7064
|
|
|
|
|
|
|
{"Prefix5", PER_FORM, ONLY_CURRENT, PREFIX5, MAP_PREFIX5}, |
7065
|
|
|
|
|
|
|
{"Prefix6", PER_FORM, ONLY_CURRENT, PREFIX6, MAP_PREFIX6}, |
7066
|
|
|
|
|
|
|
{"Prefix7", PER_FORM, ONLY_CURRENT, PREFIX7, MAP_PREFIX7}, |
7067
|
|
|
|
|
|
|
{"Prefix8", PER_FORM, ONLY_CURRENT, PREFIX8, MAP_PREFIX8}, |
7068
|
|
|
|
|
|
|
{"Prefix9", PER_FORM, ONLY_CURRENT, PREFIX9, MAP_PREFIX9}, |
7069
|
|
|
|
|
|
|
{"Suffix1", PER_FORM, ONLY_CURRENT, SUFFIX1, MAP_SUFFIX1}, |
7070
|
|
|
|
|
|
|
{"Suffix2", PER_FORM, ONLY_CURRENT, SUFFIX2, MAP_SUFFIX2}, |
7071
|
|
|
|
|
|
|
{"Suffix3", PER_FORM, ONLY_CURRENT, SUFFIX3, MAP_SUFFIX3}, |
7072
|
|
|
|
|
|
|
{"Suffix4", PER_FORM, ONLY_CURRENT, SUFFIX4, MAP_SUFFIX4}, |
7073
|
|
|
|
|
|
|
{"Suffix5", PER_FORM, ONLY_CURRENT, SUFFIX5, MAP_SUFFIX5}, |
7074
|
|
|
|
|
|
|
{"Suffix6", PER_FORM, ONLY_CURRENT, SUFFIX6, MAP_SUFFIX6}, |
7075
|
|
|
|
|
|
|
{"Suffix7", PER_FORM, ONLY_CURRENT, SUFFIX7, MAP_SUFFIX7}, |
7076
|
|
|
|
|
|
|
{"Suffix8", PER_FORM, ONLY_CURRENT, SUFFIX8, MAP_SUFFIX8}, |
7077
|
|
|
|
|
|
|
{"Suffix9", PER_FORM, ONLY_CURRENT, SUFFIX9, MAP_SUFFIX9}, |
7078
|
|
|
|
|
|
|
|
7079
|
|
|
|
|
|
|
{"Tag", PER_TAG, ANY_OFFSET, TAG, MAP_TAG}, |
7080
|
|
|
|
|
|
|
{"Tag1", PER_TAG, ANY_OFFSET, TAG1, MAP_TAG1}, |
7081
|
|
|
|
|
|
|
{"Tag2", PER_TAG, ANY_OFFSET, TAG2, MAP_TAG2}, |
7082
|
|
|
|
|
|
|
{"Tag3", PER_TAG, ANY_OFFSET, TAG3, MAP_TAG3}, |
7083
|
|
|
|
|
|
|
{"Tag4", PER_TAG, ANY_OFFSET, TAG4, MAP_TAG4}, |
7084
|
|
|
|
|
|
|
{"Tag5", PER_TAG, ANY_OFFSET, TAG5, MAP_TAG5}, |
7085
|
|
|
|
|
|
|
{"Lemma", PER_TAG, ANY_OFFSET, LEMMA, MAP_LEMMA}, |
7086
|
|
|
|
|
|
|
|
7087
|
|
|
|
|
|
|
{"PreviousVerbTag", DYNAMIC, ANY_OFFSET, PREVIOUS_VERB_TAG, MAP_TAG}, |
7088
|
|
|
|
|
|
|
{"PreviousVerbLemma", DYNAMIC, ANY_OFFSET, PREVIOUS_VERB_LEMMA, MAP_LEMMA} |
7089
|
|
|
|
|
|
|
}; |
7090
|
|
|
|
|
|
|
|
7091
|
|
|
|
|
|
|
template |
7092
|
2
|
|
|
|
|
|
void generic_elementary_features |
7093
|
|
|
|
|
|
|
using namespace unilib; |
7094
|
|
|
|
|
|
|
|
7095
|
|
|
|
|
|
|
// We process the sentence in reverse order, so that we can compute FollowingVerbTag and FollowingVerbLemma directly. |
7096
|
|
|
|
|
|
|
elementary_feature_value following_verb_tag = elementary_feature_empty, following_verb_lemma = elementary_feature_empty; |
7097
|
8
|
100
|
|
|
|
|
for (unsigned i = forms.size(); i--;) { |
7098
|
|
|
|
|
|
|
int verb_candidate = -1; |
7099
|
|
|
|
|
|
|
|
7100
|
|
|
|
|
|
|
// Per_tag features and verb_candidate |
7101
|
19
|
100
|
|
|
|
|
for (unsigned j = 0; j < analyses[i].size(); j++) { |
7102
|
26
|
|
|
|
|
|
per_tag[i][j].values[TAG] = maps[MAP_TAG].value(analyses[i][j].tag.c_str(), analyses[i][j].tag.size()); |
7103
|
13
|
50
|
|
|
|
|
per_tag[i][j].values[TAG1] = analyses[i][j].tag.size() >= 1 ? maps[MAP_TAG1].value(analyses[i][j].tag.c_str() + 0, 1) : elementary_feature_empty; |
7104
|
13
|
50
|
|
|
|
|
per_tag[i][j].values[TAG2] = analyses[i][j].tag.size() >= 2 ? maps[MAP_TAG2].value(analyses[i][j].tag.c_str() + 1, 1) : elementary_feature_empty; |
7105
|
13
|
50
|
|
|
|
|
per_tag[i][j].values[TAG3] = analyses[i][j].tag.size() >= 3 ? maps[MAP_TAG3].value(analyses[i][j].tag.c_str() + 2, 1) : elementary_feature_empty; |
7106
|
13
|
100
|
|
|
|
|
per_tag[i][j].values[TAG4] = analyses[i][j].tag.size() >= 4 ? maps[MAP_TAG4].value(analyses[i][j].tag.c_str() + 3, 1) : elementary_feature_empty; |
7107
|
13
|
50
|
|
|
|
|
per_tag[i][j].values[TAG5] = analyses[i][j].tag.size() >= 5 ? maps[MAP_TAG5].value(analyses[i][j].tag.c_str() + 4, 1) : elementary_feature_empty; |
7108
|
13
|
100
|
|
|
|
|
per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == analyses[i][j].lemma ? per_tag[i][j-1].values[LEMMA] : |
|
|
50
|
|
|
|
|
|
7109
|
|
|
|
|
|
|
maps[MAP_LEMMA].value(analyses[i][j].lemma.c_str(), analyses[i][j].lemma.size()); |
7110
|
|
|
|
|
|
|
|
7111
|
13
|
100
|
|
|
|
|
if (analyses[i][j].tag[0] == 'V') { |
7112
|
|
|
|
|
|
|
int tag_compare; |
7113
|
3
|
100
|
|
|
|
|
verb_candidate = verb_candidate < 0 || (tag_compare = analyses[i][j].tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && analyses[i][j].lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate; |
|
|
50
|
|
|
|
|
|
7114
|
|
|
|
|
|
|
} |
7115
|
|
|
|
|
|
|
} |
7116
|
|
|
|
|
|
|
|
7117
|
|
|
|
|
|
|
// Per_form features |
7118
|
12
|
|
|
|
|
|
per_form[i].values[FORM] = maps[MAP_FORM].value(forms[i].str, forms[i].len); |
7119
|
6
|
|
|
|
|
|
per_form[i].values[FOLLOWING_VERB_TAG] = following_verb_tag; |
7120
|
6
|
|
|
|
|
|
per_form[i].values[FOLLOWING_VERB_LEMMA] = following_verb_lemma; |
7121
|
|
|
|
|
|
|
|
7122
|
|
|
|
|
|
|
// Update following_verb_{tag,lemma} _after_ filling FOLLOWING_VERB_{TAG,LEMMA}. |
7123
|
6
|
100
|
|
|
|
|
if (verb_candidate >= 0) { |
7124
|
4
|
|
|
|
|
|
following_verb_tag = per_tag[i][verb_candidate].values[TAG]; |
7125
|
2
|
|
|
|
|
|
following_verb_lemma = per_tag[i][verb_candidate].values[LEMMA]; |
7126
|
|
|
|
|
|
|
} |
7127
|
|
|
|
|
|
|
|
7128
|
|
|
|
|
|
|
// Ortographic per_form features if needed |
7129
|
6
|
100
|
|
|
|
|
if (analyses[i].size() == 1) { |
7130
|
3
|
|
|
|
|
|
per_form[i].values[NUM] = per_form[i].values[CAP] = per_form[i].values[DASH] = elementary_feature_unknown; |
7131
|
3
|
|
|
|
|
|
per_form[i].values[PREFIX1] = per_form[i].values[PREFIX2] = per_form[i].values[PREFIX3] = elementary_feature_unknown; |
7132
|
3
|
|
|
|
|
|
per_form[i].values[PREFIX4] = per_form[i].values[PREFIX5] = per_form[i].values[PREFIX6] = elementary_feature_unknown; |
7133
|
3
|
|
|
|
|
|
per_form[i].values[PREFIX7] = per_form[i].values[PREFIX8] = per_form[i].values[PREFIX9] = elementary_feature_unknown; |
7134
|
3
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = per_form[i].values[SUFFIX2] = per_form[i].values[SUFFIX3] = elementary_feature_unknown; |
7135
|
3
|
|
|
|
|
|
per_form[i].values[SUFFIX4] = per_form[i].values[SUFFIX5] = per_form[i].values[SUFFIX6] = elementary_feature_unknown; |
7136
|
3
|
|
|
|
|
|
per_form[i].values[SUFFIX7] = per_form[i].values[SUFFIX8] = per_form[i].values[SUFFIX9] = elementary_feature_unknown; |
7137
|
3
|
50
|
|
|
|
|
} else if (forms[i].len <= 0) { |
7138
|
0
|
|
|
|
|
|
per_form[i].values[NUM] = per_form[i].values[CAP] = per_form[i].values[DASH] = elementary_feature_empty + 1; |
7139
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX1] = per_form[i].values[PREFIX2] = per_form[i].values[PREFIX3] = elementary_feature_empty; |
7140
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX4] = per_form[i].values[PREFIX5] = per_form[i].values[PREFIX6] = elementary_feature_empty; |
7141
|
0
|
|
|
|
|
|
per_form[i].values[PREFIX7] = per_form[i].values[PREFIX8] = per_form[i].values[PREFIX9] = elementary_feature_empty; |
7142
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = per_form[i].values[SUFFIX2] = per_form[i].values[SUFFIX3] = elementary_feature_empty; |
7143
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX4] = per_form[i].values[SUFFIX5] = per_form[i].values[SUFFIX6] = elementary_feature_empty; |
7144
|
0
|
|
|
|
|
|
per_form[i].values[SUFFIX7] = per_form[i].values[SUFFIX8] = per_form[i].values[SUFFIX9] = elementary_feature_empty; |
7145
|
|
|
|
|
|
|
} else { |
7146
|
3
|
|
|
|
|
|
string_piece form = forms[i]; |
7147
|
3
|
|
|
|
|
|
const char* form_start = form.str; |
7148
|
|
|
|
|
|
|
|
7149
|
|
|
|
|
|
|
bool num = false, cap = false, dash = false; |
7150
|
17
|
|
|
|
|
|
size_t indices[18] = {0, form.len, form.len, form.len, form.len, form.len, form.len, form.len, form.len, form.len, 0, 0, 0, 0, 0, 0, 0, 0}; // careful here regarding forms shorter than 9 characters |
7151
|
|
|
|
|
|
|
int index = 0; |
7152
|
17
|
100
|
|
|
|
|
while (form.len) { |
7153
|
14
|
|
|
|
|
|
indices[(index++) % 18] = form.str - form_start; |
7154
|
|
|
|
|
|
|
|
7155
|
14
|
|
|
|
|
|
unicode::category_t cat = unicode::category(utf8::decode(form.str, form.len)); |
7156
|
14
|
50
|
|
|
|
|
num = num || cat & unicode::N; |
|
|
50
|
|
|
|
|
|
7157
|
14
|
100
|
|
|
|
|
cap = cap || cat & unicode::Lut; |
|
|
100
|
|
|
|
|
|
7158
|
14
|
50
|
|
|
|
|
dash = dash || cat & unicode::Pd; |
|
|
50
|
|
|
|
|
|
7159
|
|
|
|
|
|
|
|
7160
|
14
|
50
|
|
|
|
|
if (index == 10 || (!form.len && index < 10)) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
7161
|
6
|
|
|
|
|
|
per_form[i].values[PREFIX1] = maps[MAP_PREFIX1].value(form_start, indices[1]); |
7162
|
6
|
|
|
|
|
|
per_form[i].values[PREFIX2] = maps[MAP_PREFIX2].value(form_start, indices[2]); |
7163
|
6
|
|
|
|
|
|
per_form[i].values[PREFIX3] = maps[MAP_PREFIX3].value(form_start, indices[3]); |
7164
|
6
|
|
|
|
|
|
per_form[i].values[PREFIX4] = maps[MAP_PREFIX4].value(form_start, indices[4]); |
7165
|
6
|
|
|
|
|
|
per_form[i].values[PREFIX5] = maps[MAP_PREFIX5].value(form_start, indices[5]); |
7166
|
6
|
|
|
|
|
|
per_form[i].values[PREFIX6] = maps[MAP_PREFIX6].value(form_start, indices[6]); |
7167
|
6
|
|
|
|
|
|
per_form[i].values[PREFIX7] = maps[MAP_PREFIX7].value(form_start, indices[7]); |
7168
|
6
|
|
|
|
|
|
per_form[i].values[PREFIX8] = maps[MAP_PREFIX8].value(form_start, indices[8]); |
7169
|
3
|
|
|
|
|
|
per_form[i].values[PREFIX9] = maps[MAP_PREFIX9].value(form_start, indices[9]); |
7170
|
|
|
|
|
|
|
} |
7171
|
|
|
|
|
|
|
} |
7172
|
6
|
|
|
|
|
|
per_form[i].values[SUFFIX1] = maps[MAP_SUFFIX1].value(form_start + indices[(index+18-1) % 18], form.str - form_start - indices[(index+18-1) % 18]); |
7173
|
6
|
|
|
|
|
|
per_form[i].values[SUFFIX2] = maps[MAP_SUFFIX2].value(form_start + indices[(index+18-2) % 18], form.str - form_start - indices[(index+18-2) % 18]); |
7174
|
6
|
|
|
|
|
|
per_form[i].values[SUFFIX3] = maps[MAP_SUFFIX3].value(form_start + indices[(index+18-3) % 18], form.str - form_start - indices[(index+18-3) % 18]); |
7175
|
6
|
|
|
|
|
|
per_form[i].values[SUFFIX4] = maps[MAP_SUFFIX4].value(form_start + indices[(index+18-4) % 18], form.str - form_start - indices[(index+18-4) % 18]); |
7176
|
6
|
|
|
|
|
|
per_form[i].values[SUFFIX5] = maps[MAP_SUFFIX5].value(form_start + indices[(index+18-5) % 18], form.str - form_start - indices[(index+18-5) % 18]); |
7177
|
6
|
|
|
|
|
|
per_form[i].values[SUFFIX6] = maps[MAP_SUFFIX6].value(form_start + indices[(index+18-6) % 18], form.str - form_start - indices[(index+18-6) % 18]); |
7178
|
6
|
|
|
|
|
|
per_form[i].values[SUFFIX7] = maps[MAP_SUFFIX7].value(form_start + indices[(index+18-7) % 18], form.str - form_start - indices[(index+18-7) % 18]); |
7179
|
6
|
|
|
|
|
|
per_form[i].values[SUFFIX8] = maps[MAP_SUFFIX8].value(form_start + indices[(index+18-8) % 18], form.str - form_start - indices[(index+18-8) % 18]); |
7180
|
6
|
|
|
|
|
|
per_form[i].values[SUFFIX9] = maps[MAP_SUFFIX9].value(form_start + indices[(index+18-9) % 18], form.str - form_start - indices[(index+18-9) % 18]); |
7181
|
3
|
|
|
|
|
|
per_form[i].values[NUM] = elementary_feature_empty + 1 + num; |
7182
|
3
|
|
|
|
|
|
per_form[i].values[CAP] = elementary_feature_empty + 1 + cap; |
7183
|
3
|
|
|
|
|
|
per_form[i].values[DASH] = elementary_feature_empty + 1 + dash; |
7184
|
|
|
|
|
|
|
} |
7185
|
|
|
|
|
|
|
} |
7186
|
2
|
|
|
|
|
|
} |
7187
|
|
|
|
|
|
|
|
7188
|
|
|
|
|
|
|
template |
7189
|
|
|
|
|
|
|
void generic_elementary_features |
7190
|
29
|
100
|
|
|
|
|
if (prev_dynamic) { |
7191
|
24
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_TAG] = prev_dynamic->values[PREVIOUS_OR_CURRENT_VERB_TAG]; |
7192
|
24
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_LEMMA] = prev_dynamic->values[PREVIOUS_OR_CURRENT_VERB_LEMMA]; |
7193
|
|
|
|
|
|
|
} else { |
7194
|
5
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_TAG] = elementary_feature_empty; |
7195
|
5
|
|
|
|
|
|
dynamic.values[PREVIOUS_VERB_LEMMA] = elementary_feature_empty; |
7196
|
|
|
|
|
|
|
} |
7197
|
|
|
|
|
|
|
|
7198
|
29
|
100
|
|
|
|
|
if (tag.tag[0] == 'V') { |
7199
|
9
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_TAG] = per_tag.values[TAG]; |
7200
|
9
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_LEMMA] = per_tag.values[LEMMA]; |
7201
|
|
|
|
|
|
|
} else { |
7202
|
20
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_TAG] = dynamic.values[PREVIOUS_VERB_TAG]; |
7203
|
20
|
|
|
|
|
|
dynamic.values[PREVIOUS_OR_CURRENT_VERB_LEMMA] = dynamic.values[PREVIOUS_VERB_LEMMA]; |
7204
|
|
|
|
|
|
|
} |
7205
|
|
|
|
|
|
|
} |
7206
|
|
|
|
|
|
|
|
7207
|
|
|
|
|
|
|
///////// |
7208
|
|
|
|
|
|
|
// File: utils/threadsafe_stack.h |
7209
|
|
|
|
|
|
|
///////// |
7210
|
|
|
|
|
|
|
|
7211
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
7212
|
|
|
|
|
|
|
// |
7213
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
7214
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
7215
|
|
|
|
|
|
|
// |
7216
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
7217
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
7218
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
7219
|
|
|
|
|
|
|
|
7220
|
|
|
|
|
|
|
namespace utils { |
7221
|
|
|
|
|
|
|
|
7222
|
|
|
|
|
|
|
// |
7223
|
|
|
|
|
|
|
// Declarations |
7224
|
|
|
|
|
|
|
// |
7225
|
|
|
|
|
|
|
|
7226
|
|
|
|
|
|
|
template |
7227
|
0
|
|
|
|
|
|
class threadsafe_stack { |
7228
|
|
|
|
|
|
|
public: |
7229
|
|
|
|
|
|
|
inline void push(T* t); |
7230
|
|
|
|
|
|
|
inline T* pop(); |
7231
|
|
|
|
|
|
|
|
7232
|
|
|
|
|
|
|
private: |
7233
|
|
|
|
|
|
|
vector> stack; |
7234
|
|
|
|
|
|
|
atomic_flag lock = ATOMIC_FLAG_INIT; |
7235
|
|
|
|
|
|
|
}; |
7236
|
|
|
|
|
|
|
|
7237
|
|
|
|
|
|
|
// |
7238
|
|
|
|
|
|
|
// Definitions |
7239
|
|
|
|
|
|
|
// |
7240
|
|
|
|
|
|
|
|
7241
|
|
|
|
|
|
|
template |
7242
|
4
|
|
|
|
|
|
void threadsafe_stack::push(T* t) { |
7243
|
2
|
0
|
|
|
|
|
while (lock.test_and_set(memory_order_acquire)) {} |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7244
|
2
|
|
|
|
|
|
stack.emplace_back(t); |
7245
|
|
|
|
|
|
|
lock.clear(memory_order_release); |
7246
|
2
|
|
|
|
|
|
} |
7247
|
|
|
|
|
|
|
|
7248
|
|
|
|
|
|
|
template |
7249
|
4
|
|
|
|
|
|
T* threadsafe_stack::pop() { |
7250
|
|
|
|
|
|
|
T* res = nullptr; |
7251
|
|
|
|
|
|
|
|
7252
|
2
|
0
|
|
|
|
|
while (lock.test_and_set(memory_order_acquire)) {} |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7253
|
2
|
0
|
|
|
|
|
if (!stack.empty()) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7254
|
|
|
|
|
|
|
res = stack.back().release(); |
7255
|
|
|
|
|
|
|
stack.pop_back(); |
7256
|
|
|
|
|
|
|
} |
7257
|
|
|
|
|
|
|
lock.clear(memory_order_release); |
7258
|
|
|
|
|
|
|
|
7259
|
2
|
|
|
|
|
|
return res; |
7260
|
|
|
|
|
|
|
} |
7261
|
|
|
|
|
|
|
|
7262
|
|
|
|
|
|
|
} // namespace utils |
7263
|
|
|
|
|
|
|
|
7264
|
|
|
|
|
|
|
///////// |
7265
|
|
|
|
|
|
|
// File: tagger/perceptron_tagger.h |
7266
|
|
|
|
|
|
|
///////// |
7267
|
|
|
|
|
|
|
|
7268
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
7269
|
|
|
|
|
|
|
// |
7270
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
7271
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
7272
|
|
|
|
|
|
|
// |
7273
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
7274
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
7275
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
7276
|
|
|
|
|
|
|
|
7277
|
|
|
|
|
|
|
// Declarations |
7278
|
|
|
|
|
|
|
template |
7279
|
0
|
|
|
|
|
|
class perceptron_tagger : public tagger { |
7280
|
|
|
|
|
|
|
public: |
7281
|
|
|
|
|
|
|
perceptron_tagger(int decoding_order, int window_size); |
7282
|
|
|
|
|
|
|
|
7283
|
|
|
|
|
|
|
bool load(istream& is); |
7284
|
|
|
|
|
|
|
virtual const morpho* get_morpho() const override; |
7285
|
|
|
|
|
|
|
virtual void tag(const vector& forms, vector& tags, morpho::guesser_mode guesser = morpho::guesser_mode(-1)) const override; |
7286
|
|
|
|
|
|
|
virtual void tag_analyzed(const vector& forms, const vector>& analyses, vector& tags) const override; |
7287
|
|
|
|
|
|
|
|
7288
|
|
|
|
|
|
|
private: |
7289
|
|
|
|
|
|
|
int decoding_order, window_size; |
7290
|
|
|
|
|
|
|
|
7291
|
|
|
|
|
|
|
unique_ptr dict; |
7292
|
|
|
|
|
|
|
bool use_guesser; |
7293
|
|
|
|
|
|
|
FeatureSequences features; |
7294
|
|
|
|
|
|
|
typedef viterbi viterbi_decoder; |
7295
|
|
|
|
|
|
|
viterbi_decoder decoder; |
7296
|
0
|
|
|
|
|
|
struct cache { |
7297
|
|
|
|
|
|
|
vector forms; |
7298
|
|
|
|
|
|
|
vector> analyses; |
7299
|
|
|
|
|
|
|
vector tags; |
7300
|
|
|
|
|
|
|
typename viterbi_decoder::cache decoder_cache; |
7301
|
|
|
|
|
|
|
|
7302
|
1
|
0
|
|
|
|
|
cache(const perceptron_tagger& self) : decoder_cache(self.decoder) {} |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7303
|
|
|
|
|
|
|
}; |
7304
|
|
|
|
|
|
|
|
7305
|
|
|
|
|
|
|
mutable threadsafe_stack caches; |
7306
|
|
|
|
|
|
|
}; |
7307
|
|
|
|
|
|
|
|
7308
|
|
|
|
|
|
|
// Definitions |
7309
|
|
|
|
|
|
|
|
7310
|
|
|
|
|
|
|
template |
7311
|
1
|
|
|
|
|
|
perceptron_tagger::perceptron_tagger(int decoding_order, int window_size) |
7312
|
1
|
|
|
|
|
|
: decoding_order(decoding_order), window_size(window_size), decoder(features, decoding_order, window_size) {} |
7313
|
|
|
|
|
|
|
|
7314
|
|
|
|
|
|
|
template |
7315
|
1
|
|
|
|
|
|
bool perceptron_tagger::load(istream& is) { |
7316
|
2
|
0
|
|
|
|
|
if (dict.reset(morpho::load(is)), !dict) return false; |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7317
|
1
|
|
|
|
|
|
use_guesser = is.get(); |
7318
|
1
|
0
|
|
|
|
|
if (!features.load(is)) return false; |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7319
|
1
|
|
|
|
|
|
return true; |
7320
|
|
|
|
|
|
|
} |
7321
|
|
|
|
|
|
|
|
7322
|
|
|
|
|
|
|
template |
7323
|
2
|
|
|
|
|
|
const morpho* perceptron_tagger::get_morpho() const { |
7324
|
2
|
|
|
|
|
|
return dict.get(); |
7325
|
|
|
|
|
|
|
} |
7326
|
|
|
|
|
|
|
|
7327
|
|
|
|
|
|
|
template |
7328
|
2
|
|
|
|
|
|
void perceptron_tagger::tag(const vector& forms, vector& tags, morpho::guesser_mode guesser) const { |
7329
|
|
|
|
|
|
|
tags.clear(); |
7330
|
2
|
0
|
|
|
|
|
if (!dict) return; |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7331
|
|
|
|
|
|
|
|
7332
|
2
|
|
|
|
|
|
cache* c = caches.pop(); |
7333
|
2
|
0
|
|
|
|
|
if (!c) c = new cache(*this); |
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7334
|
|
|
|
|
|
|
|
7335
|
2
|
|
|
|
|
|
c->forms.resize(forms.size()); |
7336
|
2
|
0
|
|
|
|
|
if (c->analyses.size() < forms.size()) c->analyses.resize(forms.size()); |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7337
|
8
|
0
|
|
|
|
|
for (unsigned i = 0; i < forms.size(); i++) { |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7338
|
6
|
|
|
|
|
|
c->forms[i] = forms[i]; |
7339
|
6
|
|
|
|
|
|
c->forms[i].len = dict->raw_form_len(forms[i]); |
7340
|
6
|
0
|
|
|
|
|
dict->analyze(forms[i], guesser >= 0 ? guesser : use_guesser ? morpho::GUESSER : morpho::NO_GUESSER, c->analyses[i]); |
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7341
|
|
|
|
|
|
|
} |
7342
|
|
|
|
|
|
|
|
7343
|
2
|
0
|
|
|
|
|
if (c->tags.size() < forms.size()) c->tags.resize(forms.size() * 2); |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7344
|
2
|
|
|
|
|
|
decoder.tag(c->forms, c->analyses, c->decoder_cache, c->tags); |
7345
|
|
|
|
|
|
|
|
7346
|
8
|
0
|
|
|
|
|
for (unsigned i = 0; i < forms.size(); i++) |
|
|
100
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7347
|
6
|
|
|
|
|
|
tags.emplace_back(c->analyses[i][c->tags[i]]); |
7348
|
|
|
|
|
|
|
|
7349
|
2
|
|
|
|
|
|
caches.push(c); |
7350
|
|
|
|
|
|
|
} |
7351
|
|
|
|
|
|
|
|
7352
|
|
|
|
|
|
|
template |
7353
|
0
|
|
|
|
|
|
void perceptron_tagger::tag_analyzed(const vector& forms, const vector>& analyses, vector& tags) const { |
7354
|
|
|
|
|
|
|
tags.clear(); |
7355
|
|
|
|
|
|
|
|
7356
|
0
|
|
|
|
|
|
cache* c = caches.pop(); |
7357
|
0
|
0
|
|
|
|
|
if (!c) c = new cache(*this); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7358
|
|
|
|
|
|
|
|
7359
|
0
|
|
|
|
|
|
tags.resize(forms.size()); |
7360
|
0
|
|
|
|
|
|
decoder.tag(forms, analyses, c->decoder_cache, tags); |
7361
|
|
|
|
|
|
|
|
7362
|
0
|
|
|
|
|
|
caches.push(c); |
7363
|
0
|
|
|
|
|
|
} |
7364
|
|
|
|
|
|
|
|
7365
|
|
|
|
|
|
|
///////// |
7366
|
|
|
|
|
|
|
// File: tagger/tagger_ids.h |
7367
|
|
|
|
|
|
|
///////// |
7368
|
|
|
|
|
|
|
|
7369
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
7370
|
|
|
|
|
|
|
// |
7371
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
7372
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
7373
|
|
|
|
|
|
|
// |
7374
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
7375
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
7376
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
7377
|
|
|
|
|
|
|
|
7378
|
|
|
|
|
|
|
class tagger_ids { |
7379
|
|
|
|
|
|
|
public: |
7380
|
|
|
|
|
|
|
enum tagger_id { |
7381
|
|
|
|
|
|
|
CZECH2 = 0, CZECH3 = 1, CZECH2_3 = 6, |
7382
|
|
|
|
|
|
|
/* 2 was used internally for ENGLISH3, but never released publicly */ |
7383
|
|
|
|
|
|
|
GENERIC2 = 3, GENERIC3 = 4, GENERIC4 = 5, GENERIC2_3 = 7, |
7384
|
|
|
|
|
|
|
CONLLU2 = 8, CONLLU2_3 = 9, CONLLU3 = 10, |
7385
|
|
|
|
|
|
|
}; |
7386
|
|
|
|
|
|
|
|
7387
|
|
|
|
|
|
|
static bool parse(const string& str, tagger_id& id) { |
7388
|
|
|
|
|
|
|
if (str == "czech2") return id = CZECH2, true; |
7389
|
|
|
|
|
|
|
if (str == "czech2_3") return id = CZECH2_3, true; |
7390
|
|
|
|
|
|
|
if (str == "czech3") return id = CZECH3, true; |
7391
|
|
|
|
|
|
|
if (str == "generic2") return id = GENERIC2, true; |
7392
|
|
|
|
|
|
|
if (str == "generic2_3") return id = GENERIC2_3, true; |
7393
|
|
|
|
|
|
|
if (str == "generic3") return id = GENERIC3, true; |
7394
|
|
|
|
|
|
|
if (str == "generic4") return id = GENERIC4, true; |
7395
|
|
|
|
|
|
|
if (str == "conllu2") return id = CONLLU2, true; |
7396
|
|
|
|
|
|
|
if (str == "conllu2_3") return id = CONLLU2_3, true; |
7397
|
|
|
|
|
|
|
if (str == "conllu3") return id = CONLLU3, true; |
7398
|
|
|
|
|
|
|
return false; |
7399
|
|
|
|
|
|
|
} |
7400
|
|
|
|
|
|
|
|
7401
|
|
|
|
|
|
|
static int decoding_order(tagger_id id) { |
7402
|
|
|
|
|
|
|
switch (id) { |
7403
|
|
|
|
|
|
|
case CZECH2: return 2; |
7404
|
|
|
|
|
|
|
case CZECH2_3: return 2; |
7405
|
|
|
|
|
|
|
case CZECH3: return 3; |
7406
|
|
|
|
|
|
|
case GENERIC2: return 2; |
7407
|
|
|
|
|
|
|
case GENERIC2_3: return 2; |
7408
|
|
|
|
|
|
|
case GENERIC3: return 3; |
7409
|
|
|
|
|
|
|
case GENERIC4: return 4; |
7410
|
|
|
|
|
|
|
case CONLLU2: return 2; |
7411
|
|
|
|
|
|
|
case CONLLU2_3: return 2; |
7412
|
|
|
|
|
|
|
case CONLLU3: return 3; |
7413
|
|
|
|
|
|
|
} |
7414
|
|
|
|
|
|
|
return 0; |
7415
|
|
|
|
|
|
|
} |
7416
|
|
|
|
|
|
|
|
7417
|
|
|
|
|
|
|
static int window_size(tagger_id id) { |
7418
|
|
|
|
|
|
|
switch (id) { |
7419
|
|
|
|
|
|
|
case CZECH2_3: return 3; |
7420
|
|
|
|
|
|
|
case GENERIC2_3: return 3; |
7421
|
|
|
|
|
|
|
case CONLLU2_3: return 3; |
7422
|
|
|
|
|
|
|
default: break; |
7423
|
|
|
|
|
|
|
} |
7424
|
|
|
|
|
|
|
return decoding_order(id); |
7425
|
|
|
|
|
|
|
} |
7426
|
|
|
|
|
|
|
}; |
7427
|
|
|
|
|
|
|
|
7428
|
|
|
|
|
|
|
typedef tagger_ids::tagger_id tagger_id; |
7429
|
|
|
|
|
|
|
|
7430
|
|
|
|
|
|
|
///////// |
7431
|
|
|
|
|
|
|
// File: tagger/tagger.cpp |
7432
|
|
|
|
|
|
|
///////// |
7433
|
|
|
|
|
|
|
|
7434
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
7435
|
|
|
|
|
|
|
// |
7436
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
7437
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
7438
|
|
|
|
|
|
|
// |
7439
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
7440
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
7441
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
7442
|
|
|
|
|
|
|
|
7443
|
1
|
|
|
|
|
|
tagger* tagger::load(istream& is) { |
7444
|
1
|
50
|
|
|
|
|
tagger_id id = tagger_id(is.get()); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7445
|
|
|
|
|
|
|
switch (id) { |
7446
|
|
|
|
|
|
|
case tagger_ids::CZECH2: |
7447
|
|
|
|
|
|
|
case tagger_ids::CZECH2_3: |
7448
|
|
|
|
|
|
|
case tagger_ids::CZECH3: |
7449
|
|
|
|
|
|
|
{ |
7450
|
0
|
0
|
|
|
|
|
auto res = new_unique_ptr>>(tagger_ids::decoding_order(id), tagger_ids::window_size(id)); |
7451
|
0
|
0
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
0
|
|
|
|
|
|
7452
|
|
|
|
|
|
|
break; |
7453
|
|
|
|
|
|
|
} |
7454
|
|
|
|
|
|
|
case tagger_ids::GENERIC2: |
7455
|
|
|
|
|
|
|
case tagger_ids::GENERIC2_3: |
7456
|
|
|
|
|
|
|
case tagger_ids::GENERIC3: |
7457
|
|
|
|
|
|
|
case tagger_ids::GENERIC4: |
7458
|
|
|
|
|
|
|
{ |
7459
|
1
|
50
|
|
|
|
|
auto res = new_unique_ptr>>(tagger_ids::decoding_order(id), tagger_ids::window_size(id)); |
7460
|
1
|
50
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
50
|
|
|
|
|
|
7461
|
|
|
|
|
|
|
break; |
7462
|
|
|
|
|
|
|
} |
7463
|
|
|
|
|
|
|
case tagger_ids::CONLLU2: |
7464
|
|
|
|
|
|
|
case tagger_ids::CONLLU2_3: |
7465
|
|
|
|
|
|
|
case tagger_ids::CONLLU3: |
7466
|
|
|
|
|
|
|
{ |
7467
|
0
|
0
|
|
|
|
|
auto res = new_unique_ptr>>(tagger_ids::decoding_order(id), tagger_ids::window_size(id)); |
7468
|
0
|
0
|
|
|
|
|
if (res->load(is)) return res.release(); |
|
|
0
|
|
|
|
|
|
7469
|
|
|
|
|
|
|
break; |
7470
|
|
|
|
|
|
|
} |
7471
|
|
|
|
|
|
|
} |
7472
|
|
|
|
|
|
|
|
7473
|
|
|
|
|
|
|
return nullptr; |
7474
|
|
|
|
|
|
|
} |
7475
|
|
|
|
|
|
|
|
7476
|
1
|
|
|
|
|
|
tagger* tagger::load(const char* fname) { |
7477
|
2
|
50
|
|
|
|
|
ifstream f(path_from_utf8(fname).c_str(), ifstream::binary); |
7478
|
1
|
50
|
|
|
|
|
if (!f) return nullptr; |
7479
|
|
|
|
|
|
|
|
7480
|
1
|
50
|
|
|
|
|
return load(f); |
7481
|
|
|
|
|
|
|
} |
7482
|
|
|
|
|
|
|
|
7483
|
2
|
|
|
|
|
|
tokenizer* tagger::new_tokenizer() const { |
7484
|
2
|
|
|
|
|
|
auto morpho = get_morpho(); |
7485
|
2
|
50
|
|
|
|
|
return morpho ? morpho->new_tokenizer() : nullptr; |
7486
|
|
|
|
|
|
|
} |
7487
|
|
|
|
|
|
|
|
7488
|
|
|
|
|
|
|
///////// |
7489
|
|
|
|
|
|
|
// File: tagset_converter/identity_tagset_converter.h |
7490
|
|
|
|
|
|
|
///////// |
7491
|
|
|
|
|
|
|
|
7492
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
7493
|
|
|
|
|
|
|
// |
7494
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
7495
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
7496
|
|
|
|
|
|
|
// |
7497
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
7498
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
7499
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
7500
|
|
|
|
|
|
|
|
7501
|
0
|
|
|
|
|
|
class identity_tagset_converter : public tagset_converter { |
7502
|
|
|
|
|
|
|
public: |
7503
|
|
|
|
|
|
|
virtual void convert(tagged_lemma& tagged_lemma) const override; |
7504
|
|
|
|
|
|
|
virtual void convert_analyzed(vector& tagged_lemmas) const override; |
7505
|
|
|
|
|
|
|
virtual void convert_generated(vector& forms) const override; |
7506
|
|
|
|
|
|
|
}; |
7507
|
|
|
|
|
|
|
|
7508
|
|
|
|
|
|
|
///////// |
7509
|
|
|
|
|
|
|
// File: tagset_converter/identity_tagset_converter.cpp |
7510
|
|
|
|
|
|
|
///////// |
7511
|
|
|
|
|
|
|
|
7512
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
7513
|
|
|
|
|
|
|
// |
7514
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
7515
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
7516
|
|
|
|
|
|
|
// |
7517
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
7518
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
7519
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
7520
|
|
|
|
|
|
|
|
7521
|
0
|
|
|
|
|
|
void identity_tagset_converter::convert(tagged_lemma& /*tagged_lemma*/) const {} |
7522
|
|
|
|
|
|
|
|
7523
|
0
|
|
|
|
|
|
void identity_tagset_converter::convert_analyzed(vector& /*tagged_lemmas*/) const {} |
7524
|
|
|
|
|
|
|
|
7525
|
0
|
|
|
|
|
|
void identity_tagset_converter::convert_generated(vector& /*forms*/) const {} |
7526
|
|
|
|
|
|
|
|
7527
|
|
|
|
|
|
|
///////// |
7528
|
|
|
|
|
|
|
// File: tagset_converter/pdt_to_conll2009_tagset_converter.h |
7529
|
|
|
|
|
|
|
///////// |
7530
|
|
|
|
|
|
|
|
7531
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
7532
|
|
|
|
|
|
|
// |
7533
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
7534
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
7535
|
|
|
|
|
|
|
// |
7536
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
7537
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
7538
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
7539
|
|
|
|
|
|
|
|
7540
|
0
|
|
|
|
|
|
class pdt_to_conll2009_tagset_converter : public tagset_converter { |
7541
|
|
|
|
|
|
|
public: |
7542
|
|
|
|
|
|
|
virtual void convert(tagged_lemma& tagged_lemma) const override; |
7543
|
|
|
|
|
|
|
virtual void convert_analyzed(vector& tagged_lemmas) const override; |
7544
|
|
|
|
|
|
|
virtual void convert_generated(vector& forms) const override; |
7545
|
|
|
|
|
|
|
|
7546
|
|
|
|
|
|
|
private: |
7547
|
|
|
|
|
|
|
inline void convert_tag(const string& lemma, string& tag) const; |
7548
|
|
|
|
|
|
|
inline bool convert_lemma(string& lemma) const; |
7549
|
|
|
|
|
|
|
}; |
7550
|
|
|
|
|
|
|
|
7551
|
|
|
|
|
|
|
///////// |
7552
|
|
|
|
|
|
|
// File: tagset_converter/pdt_to_conll2009_tagset_converter.cpp |
7553
|
|
|
|
|
|
|
///////// |
7554
|
|
|
|
|
|
|
|
7555
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
7556
|
|
|
|
|
|
|
// |
7557
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
7558
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
7559
|
|
|
|
|
|
|
// |
7560
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
7561
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
7562
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
7563
|
|
|
|
|
|
|
|
7564
|
|
|
|
|
|
|
static const char* names[15] = {"POS", "SubPOS", "Gen", "Num", "Cas", "PGe", "PNu", "Per", "Ten", "Gra", "Neg", "Voi", "", "", "Var"}; |
7565
|
|
|
|
|
|
|
|
7566
|
0
|
|
|
|
|
|
inline void pdt_to_conll2009_tagset_converter::convert_tag(const string& lemma, string& tag) const { |
7567
|
|
|
|
|
|
|
char pdt_tag[16]; |
7568
|
|
|
|
|
|
|
strncpy(pdt_tag, tag.c_str(), 15); |
7569
|
|
|
|
|
|
|
|
7570
|
|
|
|
|
|
|
// Clear the tag |
7571
|
|
|
|
|
|
|
tag.clear(); |
7572
|
|
|
|
|
|
|
|
7573
|
|
|
|
|
|
|
// Fill FEAT of filled tag characters |
7574
|
0
|
0
|
|
|
|
|
for (int i = 0; i < 15 && pdt_tag[i]; i++) |
|
|
0
|
|
|
|
|
|
7575
|
0
|
0
|
|
|
|
|
if (pdt_tag[i] != '-') { |
7576
|
0
|
0
|
|
|
|
|
if (!tag.empty()) tag.push_back('|'); |
7577
|
0
|
|
|
|
|
|
tag.append(names[i]); |
7578
|
0
|
|
|
|
|
|
tag.push_back('='); |
7579
|
0
|
|
|
|
|
|
tag.push_back(pdt_tag[i]); |
7580
|
|
|
|
|
|
|
} |
7581
|
|
|
|
|
|
|
|
7582
|
|
|
|
|
|
|
// Try adding Sem FEAT |
7583
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i + 2 < lemma.size(); i++) |
7584
|
0
|
0
|
|
|
|
|
if (lemma[i] == '_' && lemma[i + 1] == ';') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7585
|
0
|
0
|
|
|
|
|
if (!tag.empty()) tag.push_back('|'); |
7586
|
0
|
|
|
|
|
|
tag.append("Sem="); |
7587
|
0
|
|
|
|
|
|
tag.push_back(lemma[i + 2]); |
7588
|
|
|
|
|
|
|
break; |
7589
|
|
|
|
|
|
|
} |
7590
|
0
|
|
|
|
|
|
} |
7591
|
|
|
|
|
|
|
|
7592
|
0
|
|
|
|
|
|
inline bool pdt_to_conll2009_tagset_converter::convert_lemma(string& lemma) const { |
7593
|
0
|
|
|
|
|
|
unsigned raw_lemma = czech_lemma_addinfo::raw_lemma_len(lemma); |
7594
|
0
|
0
|
|
|
|
|
return raw_lemma < lemma.size() ? (lemma.resize(raw_lemma), true) : false; |
7595
|
|
|
|
|
|
|
} |
7596
|
|
|
|
|
|
|
|
7597
|
0
|
|
|
|
|
|
void pdt_to_conll2009_tagset_converter::convert(tagged_lemma& tagged_lemma) const { |
7598
|
0
|
|
|
|
|
|
convert_tag(tagged_lemma.lemma, tagged_lemma.tag); |
7599
|
0
|
|
|
|
|
|
convert_lemma(tagged_lemma.lemma); |
7600
|
0
|
|
|
|
|
|
} |
7601
|
|
|
|
|
|
|
|
7602
|
0
|
|
|
|
|
|
void pdt_to_conll2009_tagset_converter::convert_analyzed(vector& tagged_lemmas) const { |
7603
|
|
|
|
|
|
|
bool lemma_changed = false; |
7604
|
|
|
|
|
|
|
|
7605
|
0
|
0
|
|
|
|
|
for (auto&& tagged_lemma : tagged_lemmas) { |
7606
|
0
|
|
|
|
|
|
convert_tag(tagged_lemma.lemma, tagged_lemma.tag); |
7607
|
0
|
|
|
|
|
|
lemma_changed |= convert_lemma(tagged_lemma.lemma); |
7608
|
|
|
|
|
|
|
} |
7609
|
|
|
|
|
|
|
|
7610
|
|
|
|
|
|
|
// If no lemma was changed or there is 1 analysis, no duplicates could be created. |
7611
|
0
|
0
|
|
|
|
|
if (!lemma_changed || tagged_lemmas.size() < 2) return; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7612
|
|
|
|
|
|
|
|
7613
|
0
|
|
|
|
|
|
tagset_converter_unique_analyzed(tagged_lemmas); |
7614
|
|
|
|
|
|
|
} |
7615
|
|
|
|
|
|
|
|
7616
|
0
|
|
|
|
|
|
void pdt_to_conll2009_tagset_converter::convert_generated(vector& forms) const { |
7617
|
|
|
|
|
|
|
bool lemma_changed = false; |
7618
|
|
|
|
|
|
|
|
7619
|
0
|
0
|
|
|
|
|
for (auto&& tagged_lemma_forms : forms) { |
7620
|
0
|
0
|
|
|
|
|
for (auto&& tagged_form : tagged_lemma_forms.forms) |
7621
|
0
|
|
|
|
|
|
convert_tag(tagged_lemma_forms.lemma, tagged_form.tag); |
7622
|
0
|
|
|
|
|
|
lemma_changed |= convert_lemma(tagged_lemma_forms.lemma); |
7623
|
|
|
|
|
|
|
} |
7624
|
|
|
|
|
|
|
|
7625
|
|
|
|
|
|
|
// If no lemma was changed or there is 1 analysis, no duplicates could be created. |
7626
|
0
|
0
|
|
|
|
|
if (!lemma_changed || forms.size() < 2) return; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7627
|
|
|
|
|
|
|
|
7628
|
0
|
|
|
|
|
|
tagset_converter_unique_generated(forms); |
7629
|
|
|
|
|
|
|
} |
7630
|
|
|
|
|
|
|
|
7631
|
|
|
|
|
|
|
///////// |
7632
|
|
|
|
|
|
|
// File: tagset_converter/strip_lemma_comment_tagset_converter.h |
7633
|
|
|
|
|
|
|
///////// |
7634
|
|
|
|
|
|
|
|
7635
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
7636
|
|
|
|
|
|
|
// |
7637
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
7638
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
7639
|
|
|
|
|
|
|
// |
7640
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
7641
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
7642
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
7643
|
|
|
|
|
|
|
|
7644
|
0
|
|
|
|
|
|
class strip_lemma_comment_tagset_converter : public tagset_converter { |
7645
|
|
|
|
|
|
|
public: |
7646
|
0
|
|
|
|
|
|
strip_lemma_comment_tagset_converter(const morpho& dictionary) : dictionary(dictionary) {} |
7647
|
|
|
|
|
|
|
|
7648
|
|
|
|
|
|
|
virtual void convert(tagged_lemma& tagged_lemma) const override; |
7649
|
|
|
|
|
|
|
virtual void convert_analyzed(vector& tagged_lemmas) const override; |
7650
|
|
|
|
|
|
|
virtual void convert_generated(vector& forms) const override; |
7651
|
|
|
|
|
|
|
|
7652
|
|
|
|
|
|
|
private: |
7653
|
|
|
|
|
|
|
inline bool convert_lemma(string& lemma) const; |
7654
|
|
|
|
|
|
|
const morpho& dictionary; |
7655
|
|
|
|
|
|
|
}; |
7656
|
|
|
|
|
|
|
|
7657
|
|
|
|
|
|
|
///////// |
7658
|
|
|
|
|
|
|
// File: tagset_converter/strip_lemma_comment_tagset_converter.cpp |
7659
|
|
|
|
|
|
|
///////// |
7660
|
|
|
|
|
|
|
|
7661
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
7662
|
|
|
|
|
|
|
// |
7663
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
7664
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
7665
|
|
|
|
|
|
|
// |
7666
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
7667
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
7668
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
7669
|
|
|
|
|
|
|
|
7670
|
0
|
|
|
|
|
|
inline bool strip_lemma_comment_tagset_converter::convert_lemma(string& lemma) const { |
7671
|
0
|
|
|
|
|
|
unsigned lemma_id_len = dictionary.lemma_id_len(lemma); |
7672
|
0
|
0
|
|
|
|
|
return lemma_id_len < lemma.size() ? (lemma.resize(lemma_id_len), true) : false; |
7673
|
|
|
|
|
|
|
} |
7674
|
|
|
|
|
|
|
|
7675
|
0
|
|
|
|
|
|
void strip_lemma_comment_tagset_converter::convert(tagged_lemma& tagged_lemma) const { |
7676
|
0
|
|
|
|
|
|
convert_lemma(tagged_lemma.lemma); |
7677
|
0
|
|
|
|
|
|
} |
7678
|
|
|
|
|
|
|
|
7679
|
0
|
|
|
|
|
|
void strip_lemma_comment_tagset_converter::convert_analyzed(vector& tagged_lemmas) const { |
7680
|
|
|
|
|
|
|
bool lemma_changed = false; |
7681
|
|
|
|
|
|
|
|
7682
|
0
|
0
|
|
|
|
|
for (auto&& tagged_lemma : tagged_lemmas) |
7683
|
0
|
|
|
|
|
|
lemma_changed |= convert_lemma(tagged_lemma.lemma); |
7684
|
|
|
|
|
|
|
|
7685
|
|
|
|
|
|
|
// If no lemma was changed or there is 1 analysis, no duplicates could be created. |
7686
|
0
|
0
|
|
|
|
|
if (!lemma_changed || tagged_lemmas.size() < 2) return; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7687
|
|
|
|
|
|
|
|
7688
|
0
|
|
|
|
|
|
tagset_converter_unique_analyzed(tagged_lemmas); |
7689
|
|
|
|
|
|
|
} |
7690
|
|
|
|
|
|
|
|
7691
|
0
|
|
|
|
|
|
void strip_lemma_comment_tagset_converter::convert_generated(vector& forms) const { |
7692
|
|
|
|
|
|
|
bool lemma_changed = false; |
7693
|
|
|
|
|
|
|
|
7694
|
0
|
0
|
|
|
|
|
for (auto&& tagged_lemma_forms : forms) |
7695
|
0
|
|
|
|
|
|
lemma_changed |= convert_lemma(tagged_lemma_forms.lemma); |
7696
|
|
|
|
|
|
|
|
7697
|
|
|
|
|
|
|
// If no lemma was changed or there is 1 analysis, no duplicates could be created. |
7698
|
0
|
0
|
|
|
|
|
if (!lemma_changed || forms.size() < 2) return; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7699
|
|
|
|
|
|
|
|
7700
|
0
|
|
|
|
|
|
tagset_converter_unique_generated(forms); |
7701
|
|
|
|
|
|
|
} |
7702
|
|
|
|
|
|
|
|
7703
|
|
|
|
|
|
|
///////// |
7704
|
|
|
|
|
|
|
// File: tagset_converter/strip_lemma_id_tagset_converter.h |
7705
|
|
|
|
|
|
|
///////// |
7706
|
|
|
|
|
|
|
|
7707
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
7708
|
|
|
|
|
|
|
// |
7709
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
7710
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
7711
|
|
|
|
|
|
|
// |
7712
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
7713
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
7714
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
7715
|
|
|
|
|
|
|
|
7716
|
0
|
|
|
|
|
|
class strip_lemma_id_tagset_converter : public tagset_converter { |
7717
|
|
|
|
|
|
|
public: |
7718
|
0
|
|
|
|
|
|
strip_lemma_id_tagset_converter(const morpho& dictionary) : dictionary(dictionary) {} |
7719
|
|
|
|
|
|
|
|
7720
|
|
|
|
|
|
|
virtual void convert(tagged_lemma& tagged_lemma) const override; |
7721
|
|
|
|
|
|
|
virtual void convert_analyzed(vector& tagged_lemmas) const override; |
7722
|
|
|
|
|
|
|
virtual void convert_generated(vector& forms) const override; |
7723
|
|
|
|
|
|
|
|
7724
|
|
|
|
|
|
|
private: |
7725
|
|
|
|
|
|
|
inline bool convert_lemma(string& lemma) const; |
7726
|
|
|
|
|
|
|
const morpho& dictionary; |
7727
|
|
|
|
|
|
|
}; |
7728
|
|
|
|
|
|
|
|
7729
|
|
|
|
|
|
|
///////// |
7730
|
|
|
|
|
|
|
// File: tagset_converter/strip_lemma_id_tagset_converter.cpp |
7731
|
|
|
|
|
|
|
///////// |
7732
|
|
|
|
|
|
|
|
7733
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
7734
|
|
|
|
|
|
|
// |
7735
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
7736
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
7737
|
|
|
|
|
|
|
// |
7738
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
7739
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
7740
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
7741
|
|
|
|
|
|
|
|
7742
|
0
|
|
|
|
|
|
inline bool strip_lemma_id_tagset_converter::convert_lemma(string& lemma) const { |
7743
|
0
|
|
|
|
|
|
unsigned raw_lemma_len = dictionary.raw_lemma_len(lemma); |
7744
|
0
|
0
|
|
|
|
|
return raw_lemma_len < lemma.size() ? (lemma.resize(raw_lemma_len), true) : false; |
7745
|
|
|
|
|
|
|
} |
7746
|
|
|
|
|
|
|
|
7747
|
0
|
|
|
|
|
|
void strip_lemma_id_tagset_converter::convert(tagged_lemma& tagged_lemma) const { |
7748
|
0
|
|
|
|
|
|
convert_lemma(tagged_lemma.lemma); |
7749
|
0
|
|
|
|
|
|
} |
7750
|
|
|
|
|
|
|
|
7751
|
0
|
|
|
|
|
|
void strip_lemma_id_tagset_converter::convert_analyzed(vector& tagged_lemmas) const { |
7752
|
|
|
|
|
|
|
bool lemma_changed = false; |
7753
|
|
|
|
|
|
|
|
7754
|
0
|
0
|
|
|
|
|
for (auto&& tagged_lemma : tagged_lemmas) |
7755
|
0
|
|
|
|
|
|
lemma_changed |= convert_lemma(tagged_lemma.lemma); |
7756
|
|
|
|
|
|
|
|
7757
|
|
|
|
|
|
|
// If no lemma was changed or there is 1 analysis, no duplicates could be created. |
7758
|
0
|
0
|
|
|
|
|
if (!lemma_changed || tagged_lemmas.size() < 2) return; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7759
|
|
|
|
|
|
|
|
7760
|
0
|
|
|
|
|
|
tagset_converter_unique_analyzed(tagged_lemmas); |
7761
|
|
|
|
|
|
|
} |
7762
|
|
|
|
|
|
|
|
7763
|
0
|
|
|
|
|
|
void strip_lemma_id_tagset_converter::convert_generated(vector& forms) const { |
7764
|
|
|
|
|
|
|
bool lemma_changed = false; |
7765
|
|
|
|
|
|
|
|
7766
|
0
|
0
|
|
|
|
|
for (auto&& tagged_lemma_forms : forms) |
7767
|
0
|
|
|
|
|
|
lemma_changed |= convert_lemma(tagged_lemma_forms.lemma); |
7768
|
|
|
|
|
|
|
|
7769
|
|
|
|
|
|
|
// If no lemma was changed or there is 1 analysis, no duplicates could be created. |
7770
|
0
|
0
|
|
|
|
|
if (!lemma_changed || forms.size() < 2) return; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7771
|
|
|
|
|
|
|
|
7772
|
0
|
|
|
|
|
|
tagset_converter_unique_generated(forms); |
7773
|
|
|
|
|
|
|
} |
7774
|
|
|
|
|
|
|
|
7775
|
|
|
|
|
|
|
///////// |
7776
|
|
|
|
|
|
|
// File: tagset_converter/tagset_converter.cpp |
7777
|
|
|
|
|
|
|
///////// |
7778
|
|
|
|
|
|
|
|
7779
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
7780
|
|
|
|
|
|
|
// |
7781
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
7782
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
7783
|
|
|
|
|
|
|
// |
7784
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
7785
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
7786
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
7787
|
|
|
|
|
|
|
|
7788
|
0
|
|
|
|
|
|
tagset_converter* tagset_converter::new_identity_converter() { |
7789
|
0
|
|
|
|
|
|
return new identity_tagset_converter(); |
7790
|
|
|
|
|
|
|
} |
7791
|
|
|
|
|
|
|
|
7792
|
0
|
|
|
|
|
|
tagset_converter* tagset_converter::new_pdt_to_conll2009_converter() { |
7793
|
0
|
|
|
|
|
|
return new pdt_to_conll2009_tagset_converter(); |
7794
|
|
|
|
|
|
|
} |
7795
|
|
|
|
|
|
|
|
7796
|
0
|
|
|
|
|
|
tagset_converter* tagset_converter::new_strip_lemma_comment_converter(const morpho& dictionary) { |
7797
|
0
|
|
|
|
|
|
return new strip_lemma_comment_tagset_converter(dictionary); |
7798
|
|
|
|
|
|
|
} |
7799
|
|
|
|
|
|
|
|
7800
|
0
|
|
|
|
|
|
tagset_converter* tagset_converter::new_strip_lemma_id_converter(const morpho& dictionary) { |
7801
|
0
|
|
|
|
|
|
return new strip_lemma_id_tagset_converter(dictionary); |
7802
|
|
|
|
|
|
|
} |
7803
|
|
|
|
|
|
|
|
7804
|
0
|
|
|
|
|
|
tagset_converter* new_tagset_converter(const string& name, const morpho& dictionary) { |
7805
|
0
|
0
|
|
|
|
|
if (name == "pdt_to_conll2009") return tagset_converter::new_pdt_to_conll2009_converter(); |
7806
|
0
|
0
|
|
|
|
|
if (name == "strip_lemma_comment") return tagset_converter::new_strip_lemma_comment_converter(dictionary); |
7807
|
0
|
0
|
|
|
|
|
if (name == "strip_lemma_id") return tagset_converter::new_strip_lemma_id_converter(dictionary); |
7808
|
|
|
|
|
|
|
return nullptr; |
7809
|
|
|
|
|
|
|
} |
7810
|
|
|
|
|
|
|
|
7811
|
0
|
|
|
|
|
|
void tagset_converter_unique_analyzed(vector& tagged_lemmas) { |
7812
|
|
|
|
|
|
|
// Remove possible lemma-tag pair duplicates |
7813
|
|
|
|
|
|
|
struct tagged_lemma_comparator { |
7814
|
0
|
0
|
|
|
|
|
inline static bool eq(const tagged_lemma& a, const tagged_lemma& b) { return a.lemma == b.lemma && a.tag == b.tag; } |
|
|
0
|
|
|
|
|
|
7815
|
0
|
0
|
|
|
|
|
inline static bool lt(const tagged_lemma& a, const tagged_lemma& b) { int lemma_compare = a.lemma.compare(b.lemma); return lemma_compare < 0 || (lemma_compare == 0 && a.tag < b.tag); } |
7816
|
|
|
|
|
|
|
}; |
7817
|
|
|
|
|
|
|
|
7818
|
|
|
|
|
|
|
sort(tagged_lemmas.begin(), tagged_lemmas.end(), tagged_lemma_comparator::lt); |
7819
|
0
|
|
|
|
|
|
tagged_lemmas.resize(unique(tagged_lemmas.begin(), tagged_lemmas.end(), tagged_lemma_comparator::eq) - tagged_lemmas.begin()); |
7820
|
0
|
|
|
|
|
|
} |
7821
|
|
|
|
|
|
|
|
7822
|
0
|
|
|
|
|
|
void tagset_converter_unique_generated(vector& forms) { |
7823
|
|
|
|
|
|
|
// Regroup and if needed remove duplicate form-tag pairs for each lemma |
7824
|
0
|
0
|
|
|
|
|
for (unsigned i = 0; i < forms.size(); i++) { |
7825
|
|
|
|
|
|
|
bool any_merged = false; |
7826
|
0
|
0
|
|
|
|
|
for (unsigned j = forms.size() - 1; j > i; j--) |
7827
|
0
|
0
|
|
|
|
|
if (forms[j].lemma == forms[i].lemma) { |
7828
|
|
|
|
|
|
|
// Same lemma was found. Merge form-tag pairs |
7829
|
0
|
0
|
|
|
|
|
for (auto&& tagged_form : forms[j].forms) |
7830
|
0
|
|
|
|
|
|
forms[i].forms.emplace_back(move(tagged_form)); |
7831
|
|
|
|
|
|
|
|
7832
|
|
|
|
|
|
|
// Remove lemma j by moving it to end and deleting |
7833
|
0
|
0
|
|
|
|
|
if (j < forms.size() - 1) { |
7834
|
0
|
|
|
|
|
|
forms[j].lemma.swap(forms[forms.size() - 1].lemma); |
7835
|
0
|
|
|
|
|
|
forms[j].forms.swap(forms[forms.size() - 1].forms); |
7836
|
|
|
|
|
|
|
} |
7837
|
|
|
|
|
|
|
forms.pop_back(); |
7838
|
|
|
|
|
|
|
any_merged = true; |
7839
|
|
|
|
|
|
|
} |
7840
|
|
|
|
|
|
|
|
7841
|
0
|
0
|
|
|
|
|
if (any_merged && forms[i].forms.size() > 1) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
7842
|
|
|
|
|
|
|
// Remove duplicate form-tag pairs |
7843
|
|
|
|
|
|
|
struct tagged_form_comparator { |
7844
|
0
|
0
|
|
|
|
|
inline static bool eq(const tagged_form& a, const tagged_form& b) { return a.tag == b.tag && a.form == b.form; } |
|
|
0
|
|
|
|
|
|
7845
|
0
|
0
|
|
|
|
|
inline static bool lt(const tagged_form& a, const tagged_form& b) { int tag_compare = a.tag.compare(b.tag); return tag_compare < 0 || (tag_compare == 0 && a.form < b.form); } |
7846
|
|
|
|
|
|
|
}; |
7847
|
|
|
|
|
|
|
|
7848
|
|
|
|
|
|
|
sort(forms[i].forms.begin(), forms[i].forms.end(), tagged_form_comparator::lt); |
7849
|
0
|
|
|
|
|
|
forms[i].forms.resize(unique(forms[i].forms.begin(), forms[i].forms.end(), tagged_form_comparator::eq) - forms[i].forms.begin()); |
7850
|
|
|
|
|
|
|
} |
7851
|
|
|
|
|
|
|
} |
7852
|
0
|
|
|
|
|
|
} |
7853
|
|
|
|
|
|
|
|
7854
|
|
|
|
|
|
|
///////// |
7855
|
|
|
|
|
|
|
// File: tokenizer/czech_tokenizer.cpp |
7856
|
|
|
|
|
|
|
///////// |
7857
|
|
|
|
|
|
|
|
7858
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
7859
|
|
|
|
|
|
|
// |
7860
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
7861
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
7862
|
|
|
|
|
|
|
// |
7863
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
7864
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
7865
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
7866
|
|
|
|
|
|
|
|
7867
|
|
|
|
|
|
|
static const char _czech_tokenizer_cond_offsets[] = { |
7868
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
7869
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 2, |
7870
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2 |
7871
|
|
|
|
|
|
|
}; |
7872
|
|
|
|
|
|
|
|
7873
|
|
|
|
|
|
|
static const char _czech_tokenizer_cond_lengths[] = { |
7874
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 2, |
7875
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
7876
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0 |
7877
|
|
|
|
|
|
|
}; |
7878
|
|
|
|
|
|
|
|
7879
|
|
|
|
|
|
|
static const short _czech_tokenizer_cond_keys[] = { |
7880
|
|
|
|
|
|
|
43u, 43u, 45u, 45u, 0 |
7881
|
|
|
|
|
|
|
}; |
7882
|
|
|
|
|
|
|
|
7883
|
|
|
|
|
|
|
static const char _czech_tokenizer_cond_spaces[] = { |
7884
|
|
|
|
|
|
|
1, 0, 0 |
7885
|
|
|
|
|
|
|
}; |
7886
|
|
|
|
|
|
|
|
7887
|
|
|
|
|
|
|
static const unsigned char _czech_tokenizer_key_offsets[] = { |
7888
|
|
|
|
|
|
|
0, 0, 17, 29, 43, 46, 51, 54, |
7889
|
|
|
|
|
|
|
89, 94, 98, 101, 105, 110, 111, 116, |
7890
|
|
|
|
|
|
|
117, 122, 136, 143, 148, 151, 163 |
7891
|
|
|
|
|
|
|
}; |
7892
|
|
|
|
|
|
|
|
7893
|
|
|
|
|
|
|
static const short _czech_tokenizer_trans_keys[] = { |
7894
|
|
|
|
|
|
|
13u, 32u, 34u, 40u, 91u, 96u, 123u, 129u, |
7895
|
|
|
|
|
|
|
133u, 135u, 147u, 150u, 162u, 9u, 10u, 65u, |
7896
|
|
|
|
|
|
|
90u, 34u, 40u, 91u, 96u, 123u, 129u, 133u, |
7897
|
|
|
|
|
|
|
135u, 150u, 162u, 65u, 90u, 13u, 32u, 34u, |
7898
|
|
|
|
|
|
|
39u, 41u, 59u, 93u, 125u, 139u, 141u, 147u, |
7899
|
|
|
|
|
|
|
161u, 9u, 10u, 159u, 48u, 57u, 43u, 45u, |
7900
|
|
|
|
|
|
|
159u, 48u, 57u, 159u, 48u, 57u, 9u, 10u, |
7901
|
|
|
|
|
|
|
13u, 32u, 33u, 44u, 46u, 47u, 63u, 129u, |
7902
|
|
|
|
|
|
|
131u, 135u, 142u, 147u, 157u, 159u, 160u, 301u, |
7903
|
|
|
|
|
|
|
557u, 811u, 1067u, 0u, 42u, 48u, 57u, 58u, |
7904
|
|
|
|
|
|
|
64u, 65u, 90u, 91u, 96u, 97u, 122u, 123u, |
7905
|
|
|
|
|
|
|
255u, 9u, 10u, 13u, 32u, 147u, 9u, 13u, |
7906
|
|
|
|
|
|
|
32u, 147u, 9u, 32u, 147u, 9u, 10u, 32u, |
7907
|
|
|
|
|
|
|
147u, 9u, 10u, 13u, 32u, 147u, 13u, 9u, |
7908
|
|
|
|
|
|
|
10u, 13u, 32u, 147u, 10u, 9u, 10u, 13u, |
7909
|
|
|
|
|
|
|
32u, 147u, 13u, 32u, 34u, 39u, 41u, 59u, |
7910
|
|
|
|
|
|
|
93u, 125u, 139u, 141u, 147u, 161u, 9u, 10u, |
7911
|
|
|
|
|
|
|
44u, 46u, 69u, 101u, 159u, 48u, 57u, 69u, |
7912
|
|
|
|
|
|
|
101u, 159u, 48u, 57u, 159u, 48u, 57u, 129u, |
7913
|
|
|
|
|
|
|
131u, 135u, 151u, 155u, 157u, 65u, 90u, 97u, |
7914
|
|
|
|
|
|
|
122u, 142u, 143u, 159u, 48u, 57u, 0 |
7915
|
|
|
|
|
|
|
}; |
7916
|
|
|
|
|
|
|
|
7917
|
|
|
|
|
|
|
static const char _czech_tokenizer_single_lengths[] = { |
7918
|
|
|
|
|
|
|
0, 13, 10, 12, 1, 3, 1, 21, |
7919
|
|
|
|
|
|
|
5, 4, 3, 4, 5, 1, 5, 1, |
7920
|
|
|
|
|
|
|
5, 12, 5, 3, 1, 6, 1 |
7921
|
|
|
|
|
|
|
}; |
7922
|
|
|
|
|
|
|
|
7923
|
|
|
|
|
|
|
static const char _czech_tokenizer_range_lengths[] = { |
7924
|
|
|
|
|
|
|
0, 2, 1, 1, 1, 1, 1, 7, |
7925
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
7926
|
|
|
|
|
|
|
0, 1, 1, 1, 1, 3, 1 |
7927
|
|
|
|
|
|
|
}; |
7928
|
|
|
|
|
|
|
|
7929
|
|
|
|
|
|
|
static const unsigned char _czech_tokenizer_index_offsets[] = { |
7930
|
|
|
|
|
|
|
0, 0, 16, 28, 42, 45, 50, 53, |
7931
|
|
|
|
|
|
|
82, 88, 93, 97, 102, 108, 110, 116, |
7932
|
|
|
|
|
|
|
118, 124, 138, 145, 150, 153, 163 |
7933
|
|
|
|
|
|
|
}; |
7934
|
|
|
|
|
|
|
|
7935
|
|
|
|
|
|
|
static const char _czech_tokenizer_indicies[] = { |
7936
|
|
|
|
|
|
|
1, 1, 2, 2, 2, 2, 2, 3, |
7937
|
|
|
|
|
|
|
2, 3, 1, 2, 2, 1, 3, 0, |
7938
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 3, 2, 3, |
7939
|
|
|
|
|
|
|
2, 2, 3, 0, 4, 4, 5, 5, |
7940
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 4, 5, |
7941
|
|
|
|
|
|
|
4, 0, 6, 6, 0, 7, 7, 8, |
7942
|
|
|
|
|
|
|
8, 0, 8, 8, 0, 10, 11, 12, |
7943
|
|
|
|
|
|
|
10, 13, 9, 13, 9, 13, 16, 16, |
7944
|
|
|
|
|
|
|
16, 16, 10, 16, 15, 13, 9, 17, |
7945
|
|
|
|
|
|
|
9, 17, 9, 15, 9, 16, 9, 16, |
7946
|
|
|
|
|
|
|
9, 14, 10, 19, 20, 10, 10, 18, |
7947
|
|
|
|
|
|
|
10, 21, 10, 10, 18, 10, 10, 10, |
7948
|
|
|
|
|
|
|
18, 10, 21, 10, 10, 18, 10, 22, |
7949
|
|
|
|
|
|
|
23, 10, 10, 18, 25, 24, 10, 22, |
7950
|
|
|
|
|
|
|
26, 10, 10, 18, 25, 24, 10, 23, |
7951
|
|
|
|
|
|
|
26, 10, 10, 18, 4, 4, 5, 5, |
7952
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 4, 5, |
7953
|
|
|
|
|
|
|
4, 27, 28, 28, 29, 29, 15, 15, |
7954
|
|
|
|
|
|
|
27, 29, 29, 6, 6, 27, 8, 8, |
7955
|
|
|
|
|
|
|
27, 16, 16, 16, 16, 16, 16, 16, |
7956
|
|
|
|
|
|
|
16, 16, 27, 15, 15, 27, 0 |
7957
|
|
|
|
|
|
|
}; |
7958
|
|
|
|
|
|
|
|
7959
|
|
|
|
|
|
|
static const char _czech_tokenizer_trans_targs[] = { |
7960
|
|
|
|
|
|
|
7, 1, 2, 7, 1, 3, 19, 6, |
7961
|
|
|
|
|
|
|
20, 7, 8, 12, 16, 17, 0, 18, |
7962
|
|
|
|
|
|
|
21, 22, 7, 9, 11, 10, 13, 14, |
7963
|
|
|
|
|
|
|
7, 7, 15, 7, 4, 5 |
7964
|
|
|
|
|
|
|
}; |
7965
|
|
|
|
|
|
|
|
7966
|
|
|
|
|
|
|
static const char _czech_tokenizer_trans_actions[] = { |
7967
|
|
|
|
|
|
|
1, 0, 0, 2, 3, 0, 4, 0, |
7968
|
|
|
|
|
|
|
0, 7, 0, 0, 0, 4, 0, 4, |
7969
|
|
|
|
|
|
|
0, 0, 8, 0, 0, 0, 0, 0, |
7970
|
|
|
|
|
|
|
9, 10, 0, 11, 0, 0 |
7971
|
|
|
|
|
|
|
}; |
7972
|
|
|
|
|
|
|
|
7973
|
|
|
|
|
|
|
static const char _czech_tokenizer_to_state_actions[] = { |
7974
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 5, |
7975
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
7976
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0 |
7977
|
|
|
|
|
|
|
}; |
7978
|
|
|
|
|
|
|
|
7979
|
|
|
|
|
|
|
static const char _czech_tokenizer_from_state_actions[] = { |
7980
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 6, |
7981
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
7982
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0 |
7983
|
|
|
|
|
|
|
}; |
7984
|
|
|
|
|
|
|
|
7985
|
|
|
|
|
|
|
static const unsigned char _czech_tokenizer_eof_trans[] = { |
7986
|
|
|
|
|
|
|
0, 1, 1, 1, 1, 1, 1, 0, |
7987
|
|
|
|
|
|
|
19, 19, 19, 19, 19, 25, 19, 25, |
7988
|
|
|
|
|
|
|
19, 28, 28, 28, 28, 28, 28 |
7989
|
|
|
|
|
|
|
}; |
7990
|
|
|
|
|
|
|
|
7991
|
|
|
|
|
|
|
static const int czech_tokenizer_start = 7; |
7992
|
|
|
|
|
|
|
|
7993
|
|
|
|
|
|
|
// The list of lower cased words that when preceding eos do not end sentence. |
7994
|
|
|
|
|
|
|
// Note: because of VS, we cannot list the abbreviations directly in UTF-8, |
7995
|
|
|
|
|
|
|
// because the compilation of utf-8 encoded sources fail on some locales |
7996
|
|
|
|
|
|
|
// (e.g., Japanese). |
7997
|
|
|
|
|
|
|
// perl -CS -ple 'use Encode;s/([^[:ascii:]])/join("", map {sprintf "\\%o", ord($_)} split(m@@, encode("utf-8", $1)))/ge' |
7998
|
|
|
|
|
|
|
// perl -CS -ple 'use Encode;s/\\([0-7]{3})\\([0-7]{3})/decode("utf-8", chr(oct($1)).chr(oct($2)))/ge' |
7999
|
436
|
100
|
|
|
|
|
const unordered_set czech_tokenizer::abbreviations_czech = { |
|
|
0
|
|
|
|
|
|
8000
|
|
|
|
|
|
|
// Titles |
8001
|
|
|
|
|
|
|
"prof", "csc", "drsc", "doc", "phd", "ph", "d", |
8002
|
|
|
|
|
|
|
"judr", "mddr", "mudr", "mvdr", "paeddr", "paedr", "phdr", "rndr", "rsdr", "dr", |
8003
|
|
|
|
|
|
|
"ing", "arch", "mgr", "bc", "mag", "mba", "bca", "mga", |
8004
|
|
|
|
|
|
|
"gen", "plk", "pplk", "npor", "por", "ppor", "kpt", "mjr", "sgt", "pls", "p", "s", |
8005
|
|
|
|
|
|
|
"p", "p\303\255", "fa", "fy", "mr", "mrs", "ms", "miss", "tr", "sv", |
8006
|
|
|
|
|
|
|
// Geographic names |
8007
|
|
|
|
|
|
|
"angl", "fr", "\304\215es", "ces", "\304\215s", "cs", "slov", "n\304\233m", "nem", "it", "pol", "ma\304\217", "mad", "rus", |
8008
|
|
|
|
|
|
|
"sev", "v\303\275ch", "vych", "ji\305\276", "jiz", "z\303\241p", "zap", |
8009
|
|
|
|
|
|
|
// Common abbrevs |
8010
|
|
|
|
|
|
|
"adr", "\304\215", "c", "eg", "ev", "g", "hod", "j", "kr", "m", "max", "min", "mj", "nap\305\231", "napr", |
8011
|
|
|
|
|
|
|
"okr", "pop\305\231", "popr", "pozn", "r", "\305\231", "red", "rep", "resp", "srov", "st", "st\305\231", "str", |
8012
|
|
|
|
|
|
|
"sv", "tel", "tj", "tzv", "\303\272", "u", "uh", "ul", "um", "zl", "zn", |
8013
|
|
|
|
|
|
|
}; |
8014
|
|
|
|
|
|
|
|
8015
|
420
|
100
|
|
|
|
|
const unordered_set czech_tokenizer::abbreviations_slovak = { |
|
|
0
|
|
|
|
|
|
8016
|
|
|
|
|
|
|
// Titles |
8017
|
|
|
|
|
|
|
"prof", "csc", "drsc", "doc", "phd", "ph", "d", |
8018
|
|
|
|
|
|
|
"judr", "mddr", "mudr", "mvdr", "paeddr", "paedr", "phdr", "rndr", "rsdr", "dr", |
8019
|
|
|
|
|
|
|
"ing", "arch", "mgr", "bc", "mag", "mba", "bca", "mga", |
8020
|
|
|
|
|
|
|
"gen", "plk", "pplk", "npor", "por", "ppor", "kpt", "mjr", "sgt", "pls", "p", "s", |
8021
|
|
|
|
|
|
|
"p", "p\303\255", "fa", "fy", "mr", "mrs", "ms", "miss", "tr", "sv", |
8022
|
|
|
|
|
|
|
// Geographic names |
8023
|
|
|
|
|
|
|
"angl", "fr", "\304\215es", "ces", "\304\215s", "cs", "slov", "nem", "it", "po\304\276", "pol", "ma\304\217", "mad", |
8024
|
|
|
|
|
|
|
"rus", "sev", "v\303\275ch", "vych", "ju\305\276", "juz", "z\303\241p", "zap", |
8025
|
|
|
|
|
|
|
// Common abbrevs |
8026
|
|
|
|
|
|
|
"adr", "\304\215", "c", "eg", "ev", "g", "hod", "j", "kr", "m", "max", "min", "mj", "napr", |
8027
|
|
|
|
|
|
|
"okr", "popr", "pozn", "r", "red", "rep", "resp", "srov", "st", "str", |
8028
|
|
|
|
|
|
|
"sv", "tel", "tj", "tzv", "\303\272", "u", "uh", "ul", "um", "zl", "zn", |
8029
|
|
|
|
|
|
|
}; |
8030
|
|
|
|
|
|
|
|
8031
|
0
|
|
|
|
|
|
czech_tokenizer::czech_tokenizer(tokenizer_language language, unsigned version, const morpho* m) |
8032
|
1
|
50
|
|
|
|
|
: ragel_tokenizer(version <= 1 ? 1 : 2), m(m) { |
|
|
0
|
|
|
|
|
|
8033
|
0
|
|
|
|
|
|
switch (language) { |
8034
|
|
|
|
|
|
|
case CZECH: |
8035
|
1
|
|
|
|
|
|
abbreviations = &abbreviations_czech; |
8036
|
0
|
|
|
|
|
|
break; |
8037
|
|
|
|
|
|
|
case SLOVAK: |
8038
|
0
|
|
|
|
|
|
abbreviations = &abbreviations_slovak; |
8039
|
0
|
|
|
|
|
|
break; |
8040
|
|
|
|
|
|
|
} |
8041
|
0
|
|
|
|
|
|
} |
8042
|
|
|
|
|
|
|
|
8043
|
22
|
|
|
|
|
|
void czech_tokenizer::merge_hyphenated(vector& tokens) { |
8044
|
|
|
|
|
|
|
using namespace unilib; |
8045
|
|
|
|
|
|
|
|
8046
|
22
|
50
|
|
|
|
|
if (!m) return; |
8047
|
0
|
0
|
|
|
|
|
if (tokens.empty() || chars[tokens.back().start].cat & ~unicode::L) return; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8048
|
|
|
|
|
|
|
|
8049
|
|
|
|
|
|
|
unsigned matched_hyphens = 0; |
8050
|
0
|
0
|
|
|
|
|
for (unsigned hyphens = 1; hyphens <= 2; hyphens++) { |
8051
|
|
|
|
|
|
|
// Are the tokens a sequence of 'hyphens' hyphenated tokens? |
8052
|
0
|
0
|
|
|
|
|
if (tokens.size() < 2*hyphens + 1) break; |
8053
|
0
|
|
|
|
|
|
unsigned first_hyphen = tokens.size() - 2*hyphens; |
8054
|
0
|
0
|
|
|
|
|
if (tokens[first_hyphen].length != 1 || chars[tokens[first_hyphen].start].cat & ~unicode::P || |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8055
|
0
|
0
|
|
|
|
|
tokens[first_hyphen].start + tokens[first_hyphen].length != tokens[first_hyphen + 1].start || |
8056
|
0
|
0
|
|
|
|
|
tokens[first_hyphen-1].start + tokens[first_hyphen-1].length != tokens[first_hyphen].start || |
|
|
0
|
|
|
|
|
|
8057
|
0
|
|
|
|
|
|
chars[tokens[first_hyphen-1].start].cat & ~unicode::L) |
8058
|
|
|
|
|
|
|
break; |
8059
|
|
|
|
|
|
|
|
8060
|
0
|
0
|
|
|
|
|
if (m->analyze(string_piece(chars[tokens[first_hyphen-1].start].str, chars[tokens.back().start + tokens.back().length].str - chars[tokens[first_hyphen-1].start].str), morpho::NO_GUESSER, lemmas) >= 0) |
8061
|
|
|
|
|
|
|
matched_hyphens = hyphens; |
8062
|
|
|
|
|
|
|
} |
8063
|
|
|
|
|
|
|
|
8064
|
0
|
0
|
|
|
|
|
if (matched_hyphens) { |
8065
|
0
|
|
|
|
|
|
unsigned first = tokens.size() - 2*matched_hyphens - 1; |
8066
|
0
|
|
|
|
|
|
tokens[first].length = tokens.back().start + tokens.back().length - tokens[first].start; |
8067
|
0
|
|
|
|
|
|
tokens.resize(first + 1); |
8068
|
|
|
|
|
|
|
} |
8069
|
|
|
|
|
|
|
} |
8070
|
|
|
|
|
|
|
|
8071
|
4
|
|
|
|
|
|
bool czech_tokenizer::next_sentence(vector& tokens) { |
8072
|
|
|
|
|
|
|
using namespace unilib; |
8073
|
|
|
|
|
|
|
|
8074
|
|
|
|
|
|
|
int cs, act; |
8075
|
|
|
|
|
|
|
size_t ts, te; |
8076
|
|
|
|
|
|
|
size_t whitespace = 0; // Suppress "may be uninitialized" warning |
8077
|
|
|
|
|
|
|
|
8078
|
2
|
50
|
|
|
|
|
while (tokenize_url_email(tokens)) |
8079
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) |
8080
|
|
|
|
|
|
|
return true; |
8081
|
|
|
|
|
|
|
|
8082
|
|
|
|
|
|
|
{ |
8083
|
|
|
|
|
|
|
cs = czech_tokenizer_start; |
8084
|
2
|
|
|
|
|
|
ts = 0; |
8085
|
|
|
|
|
|
|
te = 0; |
8086
|
|
|
|
|
|
|
act = 0; |
8087
|
|
|
|
|
|
|
} |
8088
|
|
|
|
|
|
|
|
8089
|
|
|
|
|
|
|
{ |
8090
|
|
|
|
|
|
|
int _klen; |
8091
|
|
|
|
|
|
|
const short *_keys; |
8092
|
|
|
|
|
|
|
int _trans; |
8093
|
|
|
|
|
|
|
short _widec; |
8094
|
|
|
|
|
|
|
|
8095
|
2
|
50
|
|
|
|
|
if ( ( current) == ( (chars.size() - 1)) ) |
8096
|
|
|
|
|
|
|
goto _test_eof; |
8097
|
|
|
|
|
|
|
if ( cs == 0 ) |
8098
|
|
|
|
|
|
|
goto _out; |
8099
|
|
|
|
|
|
|
_resume: |
8100
|
154
|
100
|
|
|
|
|
switch ( _czech_tokenizer_from_state_actions[cs] ) { |
8101
|
|
|
|
|
|
|
case 6: |
8102
|
43
|
|
|
|
|
|
{ts = ( current);} |
8103
|
43
|
|
|
|
|
|
break; |
8104
|
|
|
|
|
|
|
} |
8105
|
|
|
|
|
|
|
|
8106
|
308
|
|
|
|
|
|
_widec = ( ragel_char(chars[current])); |
8107
|
154
|
|
|
|
|
|
_klen = _czech_tokenizer_cond_lengths[cs]; |
8108
|
154
|
|
|
|
|
|
_keys = _czech_tokenizer_cond_keys + (_czech_tokenizer_cond_offsets[cs]*2); |
8109
|
154
|
100
|
|
|
|
|
if ( _klen > 0 ) { |
8110
|
|
|
|
|
|
|
const short *_lower = _keys; |
8111
|
|
|
|
|
|
|
const short *_mid; |
8112
|
111
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
8113
|
|
|
|
|
|
|
while (1) { |
8114
|
111
|
100
|
|
|
|
|
if ( _upper < _lower ) |
8115
|
|
|
|
|
|
|
break; |
8116
|
|
|
|
|
|
|
|
8117
|
69
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
8118
|
69
|
100
|
|
|
|
|
if ( _widec < _mid[0] ) |
8119
|
17
|
|
|
|
|
|
_upper = _mid - 2; |
8120
|
52
|
100
|
|
|
|
|
else if ( _widec > _mid[1] ) |
8121
|
51
|
|
|
|
|
|
_lower = _mid + 2; |
8122
|
|
|
|
|
|
|
else { |
8123
|
1
|
|
|
|
|
|
switch ( _czech_tokenizer_cond_spaces[_czech_tokenizer_cond_offsets[cs] + ((_mid - _keys)>>1)] ) { |
8124
|
|
|
|
|
|
|
case 0: { |
8125
|
1
|
|
|
|
|
|
_widec = (short)(256u + (( ragel_char(chars[current])) - 0u)); |
8126
|
1
|
50
|
|
|
|
|
if ( |
8127
|
1
|
50
|
|
|
|
|
!current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256; |
|
|
50
|
|
|
|
|
|
8128
|
|
|
|
|
|
|
break; |
8129
|
|
|
|
|
|
|
} |
8130
|
|
|
|
|
|
|
case 1: { |
8131
|
0
|
|
|
|
|
|
_widec = (short)(768u + (( ragel_char(chars[current])) - 0u)); |
8132
|
0
|
0
|
|
|
|
|
if ( |
8133
|
0
|
0
|
|
|
|
|
!current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8134
|
|
|
|
|
|
|
break; |
8135
|
|
|
|
|
|
|
} |
8136
|
|
|
|
|
|
|
} |
8137
|
|
|
|
|
|
|
break; |
8138
|
|
|
|
|
|
|
} |
8139
|
|
|
|
|
|
|
} |
8140
|
|
|
|
|
|
|
} |
8141
|
|
|
|
|
|
|
|
8142
|
154
|
|
|
|
|
|
_keys = _czech_tokenizer_trans_keys + _czech_tokenizer_key_offsets[cs]; |
8143
|
154
|
|
|
|
|
|
_trans = _czech_tokenizer_index_offsets[cs]; |
8144
|
|
|
|
|
|
|
|
8145
|
154
|
|
|
|
|
|
_klen = _czech_tokenizer_single_lengths[cs]; |
8146
|
154
|
50
|
|
|
|
|
if ( _klen > 0 ) { |
8147
|
|
|
|
|
|
|
const short *_lower = _keys; |
8148
|
|
|
|
|
|
|
const short *_mid; |
8149
|
604
|
|
|
|
|
|
const short *_upper = _keys + _klen - 1; |
8150
|
|
|
|
|
|
|
while (1) { |
8151
|
604
|
100
|
|
|
|
|
if ( _upper < _lower ) |
8152
|
|
|
|
|
|
|
break; |
8153
|
|
|
|
|
|
|
|
8154
|
486
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
8155
|
486
|
100
|
|
|
|
|
if ( _widec < *_mid ) |
8156
|
270
|
|
|
|
|
|
_upper = _mid - 1; |
8157
|
216
|
100
|
|
|
|
|
else if ( _widec > *_mid ) |
8158
|
180
|
|
|
|
|
|
_lower = _mid + 1; |
8159
|
|
|
|
|
|
|
else { |
8160
|
36
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
8161
|
36
|
|
|
|
|
|
goto _match; |
8162
|
|
|
|
|
|
|
} |
8163
|
|
|
|
|
|
|
} |
8164
|
118
|
|
|
|
|
|
_keys += _klen; |
8165
|
118
|
|
|
|
|
|
_trans += _klen; |
8166
|
|
|
|
|
|
|
} |
8167
|
|
|
|
|
|
|
|
8168
|
118
|
|
|
|
|
|
_klen = _czech_tokenizer_range_lengths[cs]; |
8169
|
118
|
100
|
|
|
|
|
if ( _klen > 0 ) { |
8170
|
|
|
|
|
|
|
const short *_lower = _keys; |
8171
|
|
|
|
|
|
|
const short *_mid; |
8172
|
153
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
8173
|
|
|
|
|
|
|
while (1) { |
8174
|
153
|
100
|
|
|
|
|
if ( _upper < _lower ) |
8175
|
|
|
|
|
|
|
break; |
8176
|
|
|
|
|
|
|
|
8177
|
133
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
8178
|
133
|
100
|
|
|
|
|
if ( _widec < _mid[0] ) |
8179
|
38
|
|
|
|
|
|
_upper = _mid - 2; |
8180
|
95
|
100
|
|
|
|
|
else if ( _widec > _mid[1] ) |
8181
|
14
|
|
|
|
|
|
_lower = _mid + 2; |
8182
|
|
|
|
|
|
|
else { |
8183
|
81
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
8184
|
81
|
|
|
|
|
|
goto _match; |
8185
|
|
|
|
|
|
|
} |
8186
|
|
|
|
|
|
|
} |
8187
|
20
|
|
|
|
|
|
_trans += _klen; |
8188
|
|
|
|
|
|
|
} |
8189
|
|
|
|
|
|
|
|
8190
|
|
|
|
|
|
|
_match: |
8191
|
154
|
|
|
|
|
|
_trans = _czech_tokenizer_indicies[_trans]; |
8192
|
|
|
|
|
|
|
_eof_trans: |
8193
|
155
|
|
|
|
|
|
cs = _czech_tokenizer_trans_targs[_trans]; |
8194
|
|
|
|
|
|
|
|
8195
|
155
|
100
|
|
|
|
|
if ( _czech_tokenizer_trans_actions[_trans] == 0 ) |
8196
|
|
|
|
|
|
|
goto _again; |
8197
|
|
|
|
|
|
|
|
8198
|
70
|
|
|
|
|
|
switch ( _czech_tokenizer_trans_actions[_trans] ) { |
8199
|
|
|
|
|
|
|
case 3: |
8200
|
5
|
|
|
|
|
|
{ whitespace = current; } |
8201
|
5
|
|
|
|
|
|
break; |
8202
|
|
|
|
|
|
|
case 4: |
8203
|
22
|
|
|
|
|
|
{te = ( current)+1;} |
8204
|
22
|
|
|
|
|
|
break; |
8205
|
|
|
|
|
|
|
case 7: |
8206
|
1
|
|
|
|
|
|
{te = ( current)+1;{ tokens.emplace_back(ts, te - ts); |
8207
|
1
|
|
|
|
|
|
merge_hyphenated(tokens); |
8208
|
1
|
|
|
|
|
|
current = te; |
8209
|
1
|
50
|
|
|
|
|
do |
8210
|
1
|
50
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8211
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
8212
|
1
|
|
|
|
|
|
( current)--; |
8213
|
|
|
|
|
|
|
}} |
8214
|
1
|
|
|
|
|
|
break; |
8215
|
|
|
|
|
|
|
case 2: |
8216
|
4
|
|
|
|
|
|
{te = ( current)+1;{ |
8217
|
4
|
|
|
|
|
|
bool eos = is_eos(tokens, chars[ts].chr, abbreviations); |
8218
|
8
|
100
|
|
|
|
|
for (current = ts; current < whitespace; current++) |
8219
|
4
|
|
|
|
|
|
tokens.emplace_back(current, 1); |
8220
|
4
|
|
|
|
|
|
{( current) = (( whitespace))-1;} |
8221
|
4
|
100
|
|
|
|
|
if (eos) {( current)++; goto _out; } |
8222
|
|
|
|
|
|
|
}} |
8223
|
|
|
|
|
|
|
break; |
8224
|
|
|
|
|
|
|
case 10: |
8225
|
0
|
|
|
|
|
|
{te = ( current)+1;{ |
8226
|
0
|
0
|
|
|
|
|
if (!tokens.empty()) {( current)++; goto _out; } |
8227
|
0
|
|
|
|
|
|
current = te; |
8228
|
0
|
0
|
|
|
|
|
do |
8229
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8230
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
8231
|
0
|
|
|
|
|
|
( current)--; |
8232
|
|
|
|
|
|
|
}} |
8233
|
0
|
|
|
|
|
|
break; |
8234
|
|
|
|
|
|
|
case 11: |
8235
|
19
|
|
|
|
|
|
{te = ( current);( current)--;{ tokens.emplace_back(ts, te - ts); |
8236
|
19
|
|
|
|
|
|
merge_hyphenated(tokens); |
8237
|
19
|
|
|
|
|
|
current = te; |
8238
|
19
|
50
|
|
|
|
|
do |
8239
|
19
|
50
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8240
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
8241
|
19
|
|
|
|
|
|
( current)--; |
8242
|
|
|
|
|
|
|
}} |
8243
|
19
|
|
|
|
|
|
break; |
8244
|
|
|
|
|
|
|
case 8: |
8245
|
17
|
|
|
|
|
|
{te = ( current);( current)--;{ |
8246
|
17
|
|
|
|
|
|
current = te; |
8247
|
17
|
50
|
|
|
|
|
do |
8248
|
17
|
50
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8249
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
8250
|
17
|
|
|
|
|
|
( current)--; |
8251
|
|
|
|
|
|
|
}} |
8252
|
17
|
|
|
|
|
|
break; |
8253
|
|
|
|
|
|
|
case 9: |
8254
|
0
|
|
|
|
|
|
{te = ( current);( current)--;{ |
8255
|
0
|
0
|
|
|
|
|
if (!tokens.empty()) {( current)++; goto _out; } |
8256
|
0
|
|
|
|
|
|
current = te; |
8257
|
0
|
0
|
|
|
|
|
do |
8258
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8259
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
8260
|
0
|
|
|
|
|
|
( current)--; |
8261
|
|
|
|
|
|
|
}} |
8262
|
0
|
|
|
|
|
|
break; |
8263
|
|
|
|
|
|
|
case 1: |
8264
|
2
|
|
|
|
|
|
{{( current) = ((te))-1;}{ tokens.emplace_back(ts, te - ts); |
8265
|
2
|
|
|
|
|
|
merge_hyphenated(tokens); |
8266
|
2
|
|
|
|
|
|
current = te; |
8267
|
2
|
50
|
|
|
|
|
do |
8268
|
2
|
50
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8269
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
8270
|
2
|
|
|
|
|
|
( current)--; |
8271
|
|
|
|
|
|
|
}} |
8272
|
2
|
|
|
|
|
|
break; |
8273
|
|
|
|
|
|
|
} |
8274
|
|
|
|
|
|
|
|
8275
|
|
|
|
|
|
|
_again: |
8276
|
154
|
100
|
|
|
|
|
switch ( _czech_tokenizer_to_state_actions[cs] ) { |
8277
|
|
|
|
|
|
|
case 5: |
8278
|
42
|
|
|
|
|
|
{ts = 0;} |
8279
|
42
|
|
|
|
|
|
break; |
8280
|
|
|
|
|
|
|
} |
8281
|
|
|
|
|
|
|
|
8282
|
154
|
50
|
|
|
|
|
if ( cs == 0 ) |
8283
|
|
|
|
|
|
|
goto _out; |
8284
|
154
|
100
|
|
|
|
|
if ( ++( current) != ( (chars.size() - 1)) ) |
8285
|
|
|
|
|
|
|
goto _resume; |
8286
|
|
|
|
|
|
|
_test_eof: {} |
8287
|
2
|
50
|
|
|
|
|
if ( ( current) == ( (chars.size() - 1)) ) |
8288
|
|
|
|
|
|
|
{ |
8289
|
2
|
100
|
|
|
|
|
if ( _czech_tokenizer_eof_trans[cs] > 0 ) { |
8290
|
1
|
|
|
|
|
|
_trans = _czech_tokenizer_eof_trans[cs] - 1; |
8291
|
1
|
|
|
|
|
|
goto _eof_trans; |
8292
|
|
|
|
|
|
|
} |
8293
|
|
|
|
|
|
|
} |
8294
|
|
|
|
|
|
|
|
8295
|
|
|
|
|
|
|
_out: {} |
8296
|
|
|
|
|
|
|
} |
8297
|
|
|
|
|
|
|
|
8298
|
|
|
|
|
|
|
(void)act; // Suppress unused variable warning |
8299
|
|
|
|
|
|
|
|
8300
|
2
|
|
|
|
|
|
return !tokens.empty(); |
8301
|
|
|
|
|
|
|
} |
8302
|
|
|
|
|
|
|
|
8303
|
|
|
|
|
|
|
///////// |
8304
|
|
|
|
|
|
|
// File: tokenizer/english_tokenizer.cpp |
8305
|
|
|
|
|
|
|
///////// |
8306
|
|
|
|
|
|
|
|
8307
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
8308
|
|
|
|
|
|
|
// |
8309
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
8310
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
8311
|
|
|
|
|
|
|
// |
8312
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
8313
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
8314
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8315
|
|
|
|
|
|
|
|
8316
|
|
|
|
|
|
|
// The list of lowercased words that when preceding eos do not end sentence. |
8317
|
464
|
100
|
|
|
|
|
const unordered_set english_tokenizer::abbreviations = { |
|
|
0
|
|
|
|
|
|
8318
|
|
|
|
|
|
|
// Titles |
8319
|
|
|
|
|
|
|
"adj", "adm", "adv", "assoc", "asst", "bart", "bldg", "brig", "bros", "capt", |
8320
|
|
|
|
|
|
|
"cmdr", "col", "comdr", "con", "corp", "cpl", "d", "dr", "dr", "drs", "ens", |
8321
|
|
|
|
|
|
|
"gen", "gov", "hon", "hosp", "hr", "insp", "lt", "mm", "mr", "mrs", "ms", |
8322
|
|
|
|
|
|
|
"maj", "messrs", "mlle", "mme", "mr", "mrs", "ms", "msgr", "op", "ord", |
8323
|
|
|
|
|
|
|
"pfc", "ph", "phd", "prof", "pvt", "rep", "reps", "res", "rev", "rt", "sen", |
8324
|
|
|
|
|
|
|
"sens", "sfc", "sgt", "sr", "st", "supt", "surg", "univ", |
8325
|
|
|
|
|
|
|
// Common abbrevs |
8326
|
|
|
|
|
|
|
"addr", "approx", "apr", "aug", "calif", "co", "corp", "dec", "def", "e", |
8327
|
|
|
|
|
|
|
"e.g", "eg", "feb", "fla", "ft", "gen", "gov", "hrs", "i.", "i.e", "ie", |
8328
|
|
|
|
|
|
|
"inc", "jan", "jr", "ltd", "mar", "max", "min", "mph", "mt", "n", "nov", |
8329
|
|
|
|
|
|
|
"oct", "ont", "pa", "pres", "rep", "rev", "s", "sec", "sen", "sep", "sept", |
8330
|
|
|
|
|
|
|
"sgt", "sr", "tel", "un", "univ", "v", "va", "vs", "w", "yrs", |
8331
|
|
|
|
|
|
|
}; |
8332
|
|
|
|
|
|
|
|
8333
|
|
|
|
|
|
|
static const char _english_tokenizer_split_token_key_offsets[] = { |
8334
|
|
|
|
|
|
|
0, 0, 16, 20, 22, 26, 28, 30, |
8335
|
|
|
|
|
|
|
32, 34, 36, 44, 46, 50, 52, 54, |
8336
|
|
|
|
|
|
|
56, 58, 60, 62, 64, 66, 68, 72, |
8337
|
|
|
|
|
|
|
74, 76, 78, 80, 82, 82 |
8338
|
|
|
|
|
|
|
}; |
8339
|
|
|
|
|
|
|
|
8340
|
|
|
|
|
|
|
static const unsigned char _english_tokenizer_split_token_trans_keys[] = { |
8341
|
|
|
|
|
|
|
65u, 68u, 69u, 76u, 77u, 78u, 83u, 84u, |
8342
|
|
|
|
|
|
|
97u, 100u, 101u, 108u, 109u, 110u, 115u, 116u, |
8343
|
|
|
|
|
|
|
78u, 84u, 110u, 116u, 78u, 110u, 65u, 79u, |
8344
|
|
|
|
|
|
|
97u, 111u, 87u, 119u, 71u, 103u, 84u, 116u, |
8345
|
|
|
|
|
|
|
79u, 111u, 39u, 161u, 77u, 82u, 86u, 89u, |
8346
|
|
|
|
|
|
|
109u, 114u, 118u, 121u, 77u, 109u, 69u, 73u, |
8347
|
|
|
|
|
|
|
101u, 105u, 76u, 108u, 39u, 161u, 68u, 100u, |
8348
|
|
|
|
|
|
|
76u, 108u, 39u, 161u, 69u, 101u, 82u, 114u, |
8349
|
|
|
|
|
|
|
79u, 111u, 77u, 109u, 39u, 79u, 111u, 161u, |
8350
|
|
|
|
|
|
|
78u, 110u, 78u, 110u, 78u, 110u, 65u, 97u, |
8351
|
|
|
|
|
|
|
67u, 99u, 0 |
8352
|
|
|
|
|
|
|
}; |
8353
|
|
|
|
|
|
|
|
8354
|
|
|
|
|
|
|
static const char _english_tokenizer_split_token_single_lengths[] = { |
8355
|
|
|
|
|
|
|
0, 16, 4, 2, 4, 2, 2, 2, |
8356
|
|
|
|
|
|
|
2, 2, 8, 2, 4, 2, 2, 2, |
8357
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 4, 2, |
8358
|
|
|
|
|
|
|
2, 2, 2, 2, 0, 0 |
8359
|
|
|
|
|
|
|
}; |
8360
|
|
|
|
|
|
|
|
8361
|
|
|
|
|
|
|
static const char _english_tokenizer_split_token_range_lengths[] = { |
8362
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
8363
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
8364
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
8365
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0 |
8366
|
|
|
|
|
|
|
}; |
8367
|
|
|
|
|
|
|
|
8368
|
|
|
|
|
|
|
static const unsigned char _english_tokenizer_split_token_index_offsets[] = { |
8369
|
|
|
|
|
|
|
0, 0, 17, 22, 25, 30, 33, 36, |
8370
|
|
|
|
|
|
|
39, 42, 45, 54, 57, 62, 65, 68, |
8371
|
|
|
|
|
|
|
71, 74, 77, 80, 83, 86, 89, 94, |
8372
|
|
|
|
|
|
|
97, 100, 103, 106, 109, 110 |
8373
|
|
|
|
|
|
|
}; |
8374
|
|
|
|
|
|
|
|
8375
|
|
|
|
|
|
|
static const char _english_tokenizer_split_token_indicies[] = { |
8376
|
|
|
|
|
|
|
0, 2, 3, 4, 2, 5, 2, 6, |
8377
|
|
|
|
|
|
|
0, 2, 3, 4, 2, 5, 2, 6, |
8378
|
|
|
|
|
|
|
1, 7, 8, 7, 8, 1, 9, 9, |
8379
|
|
|
|
|
|
|
1, 10, 11, 10, 11, 1, 12, 12, |
8380
|
|
|
|
|
|
|
1, 12, 12, 1, 13, 13, 1, 11, |
8381
|
|
|
|
|
|
|
11, 1, 14, 14, 1, 15, 2, 2, |
8382
|
|
|
|
|
|
|
16, 15, 2, 2, 16, 1, 17, 17, |
8383
|
|
|
|
|
|
|
1, 18, 11, 18, 11, 1, 12, 12, |
8384
|
|
|
|
|
|
|
1, 19, 19, 1, 12, 12, 1, 2, |
8385
|
|
|
|
|
|
|
2, 1, 20, 20, 1, 21, 21, 1, |
8386
|
|
|
|
|
|
|
22, 22, 1, 23, 23, 1, 12, 12, |
8387
|
|
|
|
|
|
|
1, 24, 25, 25, 24, 1, 14, 14, |
8388
|
|
|
|
|
|
|
1, 26, 26, 1, 27, 27, 1, 28, |
8389
|
|
|
|
|
|
|
28, 1, 12, 12, 1, 1, 1, 0 |
8390
|
|
|
|
|
|
|
}; |
8391
|
|
|
|
|
|
|
|
8392
|
|
|
|
|
|
|
static const char _english_tokenizer_split_token_trans_targs[] = { |
8393
|
|
|
|
|
|
|
2, 0, 9, 10, 16, 17, 22, 3, |
8394
|
|
|
|
|
|
|
7, 4, 5, 6, 28, 8, 29, 11, |
8395
|
|
|
|
|
|
|
14, 12, 13, 15, 18, 19, 20, 21, |
8396
|
|
|
|
|
|
|
23, 24, 25, 26, 27 |
8397
|
|
|
|
|
|
|
}; |
8398
|
|
|
|
|
|
|
|
8399
|
|
|
|
|
|
|
static const char _english_tokenizer_split_token_trans_actions[] = { |
8400
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 1, |
8401
|
|
|
|
|
|
|
1, 0, 0, 0, 0, 0, 2, 1, |
8402
|
|
|
|
|
|
|
1, 0, 0, 0, 1, 0, 0, 0, |
8403
|
|
|
|
|
|
|
0, 0, 1, 0, 0 |
8404
|
|
|
|
|
|
|
}; |
8405
|
|
|
|
|
|
|
|
8406
|
|
|
|
|
|
|
static const char _english_tokenizer_split_token_eof_actions[] = { |
8407
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
8408
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
8409
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
8410
|
|
|
|
|
|
|
0, 0, 0, 0, 3, 0 |
8411
|
|
|
|
|
|
|
}; |
8412
|
|
|
|
|
|
|
|
8413
|
|
|
|
|
|
|
static const int english_tokenizer_split_token_start = 1; |
8414
|
|
|
|
|
|
|
|
8415
|
0
|
|
|
|
|
|
void english_tokenizer::split_token(vector& tokens) { |
8416
|
0
|
0
|
|
|
|
|
if (tokens.empty() || chars[tokens.back().start].cat & ~unilib::unicode::L) return; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8417
|
|
|
|
|
|
|
|
8418
|
0
|
|
|
|
|
|
size_t index = tokens.back().start, end = index + tokens.back().length; |
8419
|
|
|
|
|
|
|
int cs; |
8420
|
0
|
|
|
|
|
|
size_t split_mark = 0, split_len = 0; |
8421
|
|
|
|
|
|
|
|
8422
|
|
|
|
|
|
|
{ |
8423
|
|
|
|
|
|
|
cs = english_tokenizer_split_token_start; |
8424
|
|
|
|
|
|
|
} |
8425
|
|
|
|
|
|
|
|
8426
|
|
|
|
|
|
|
{ |
8427
|
|
|
|
|
|
|
int _klen; |
8428
|
|
|
|
|
|
|
const unsigned char *_keys; |
8429
|
|
|
|
|
|
|
int _trans; |
8430
|
|
|
|
|
|
|
|
8431
|
0
|
0
|
|
|
|
|
if ( ( index) == ( end) ) |
8432
|
|
|
|
|
|
|
goto _test_eof; |
8433
|
|
|
|
|
|
|
if ( cs == 0 ) |
8434
|
|
|
|
|
|
|
goto _out; |
8435
|
|
|
|
|
|
|
_resume: |
8436
|
0
|
|
|
|
|
|
_keys = _english_tokenizer_split_token_trans_keys + _english_tokenizer_split_token_key_offsets[cs]; |
8437
|
0
|
|
|
|
|
|
_trans = _english_tokenizer_split_token_index_offsets[cs]; |
8438
|
|
|
|
|
|
|
|
8439
|
0
|
|
|
|
|
|
_klen = _english_tokenizer_split_token_single_lengths[cs]; |
8440
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
8441
|
|
|
|
|
|
|
const unsigned char *_lower = _keys; |
8442
|
|
|
|
|
|
|
const unsigned char *_mid; |
8443
|
0
|
|
|
|
|
|
const unsigned char *_upper = _keys + _klen - 1; |
8444
|
|
|
|
|
|
|
while (1) { |
8445
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
8446
|
|
|
|
|
|
|
break; |
8447
|
|
|
|
|
|
|
|
8448
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
8449
|
0
|
0
|
|
|
|
|
if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) < *_mid ) |
8450
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
8451
|
0
|
0
|
|
|
|
|
else if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) > *_mid ) |
8452
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
8453
|
|
|
|
|
|
|
else { |
8454
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
8455
|
0
|
|
|
|
|
|
goto _match; |
8456
|
|
|
|
|
|
|
} |
8457
|
|
|
|
|
|
|
} |
8458
|
0
|
|
|
|
|
|
_keys += _klen; |
8459
|
0
|
|
|
|
|
|
_trans += _klen; |
8460
|
|
|
|
|
|
|
} |
8461
|
|
|
|
|
|
|
|
8462
|
0
|
|
|
|
|
|
_klen = _english_tokenizer_split_token_range_lengths[cs]; |
8463
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
8464
|
|
|
|
|
|
|
const unsigned char *_lower = _keys; |
8465
|
|
|
|
|
|
|
const unsigned char *_mid; |
8466
|
0
|
|
|
|
|
|
const unsigned char *_upper = _keys + (_klen<<1) - 2; |
8467
|
|
|
|
|
|
|
while (1) { |
8468
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
8469
|
|
|
|
|
|
|
break; |
8470
|
|
|
|
|
|
|
|
8471
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
8472
|
0
|
0
|
|
|
|
|
if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) < _mid[0] ) |
8473
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
8474
|
0
|
0
|
|
|
|
|
else if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) > _mid[1] ) |
8475
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
8476
|
|
|
|
|
|
|
else { |
8477
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
8478
|
0
|
|
|
|
|
|
goto _match; |
8479
|
|
|
|
|
|
|
} |
8480
|
|
|
|
|
|
|
} |
8481
|
0
|
|
|
|
|
|
_trans += _klen; |
8482
|
|
|
|
|
|
|
} |
8483
|
|
|
|
|
|
|
|
8484
|
|
|
|
|
|
|
_match: |
8485
|
0
|
|
|
|
|
|
_trans = _english_tokenizer_split_token_indicies[_trans]; |
8486
|
0
|
|
|
|
|
|
cs = _english_tokenizer_split_token_trans_targs[_trans]; |
8487
|
|
|
|
|
|
|
|
8488
|
0
|
0
|
|
|
|
|
if ( _english_tokenizer_split_token_trans_actions[_trans] == 0 ) |
8489
|
|
|
|
|
|
|
goto _again; |
8490
|
|
|
|
|
|
|
|
8491
|
0
|
|
|
|
|
|
switch ( _english_tokenizer_split_token_trans_actions[_trans] ) { |
8492
|
|
|
|
|
|
|
case 1: |
8493
|
0
|
|
|
|
|
|
{ split_mark = index - tokens.back().start + 1; } |
8494
|
0
|
|
|
|
|
|
break; |
8495
|
|
|
|
|
|
|
case 2: |
8496
|
0
|
|
|
|
|
|
{ split_mark = index - tokens.back().start + 1; } |
8497
|
0
|
|
|
|
|
|
{ split_len = split_mark; {( index)++; goto _out; } } |
8498
|
|
|
|
|
|
|
break; |
8499
|
|
|
|
|
|
|
} |
8500
|
|
|
|
|
|
|
|
8501
|
|
|
|
|
|
|
_again: |
8502
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
8503
|
|
|
|
|
|
|
goto _out; |
8504
|
0
|
0
|
|
|
|
|
if ( ++( index) != ( end) ) |
8505
|
|
|
|
|
|
|
goto _resume; |
8506
|
|
|
|
|
|
|
_test_eof: {} |
8507
|
0
|
0
|
|
|
|
|
if ( ( index) == ( end) ) |
8508
|
|
|
|
|
|
|
{ |
8509
|
0
|
0
|
|
|
|
|
switch ( _english_tokenizer_split_token_eof_actions[cs] ) { |
8510
|
|
|
|
|
|
|
case 3: |
8511
|
0
|
|
|
|
|
|
{ split_len = split_mark; {( index)++; goto _out; } } |
8512
|
|
|
|
|
|
|
break; |
8513
|
|
|
|
|
|
|
} |
8514
|
|
|
|
|
|
|
} |
8515
|
|
|
|
|
|
|
|
8516
|
|
|
|
|
|
|
_out: {} |
8517
|
|
|
|
|
|
|
} |
8518
|
|
|
|
|
|
|
|
8519
|
0
|
0
|
|
|
|
|
if (split_len && split_len < end) { |
8520
|
0
|
|
|
|
|
|
tokens.back().length -= split_len; |
8521
|
0
|
|
|
|
|
|
tokens.emplace_back(end - split_len, split_len); |
8522
|
|
|
|
|
|
|
} |
8523
|
|
|
|
|
|
|
} |
8524
|
|
|
|
|
|
|
|
8525
|
|
|
|
|
|
|
static const char _english_tokenizer_cond_offsets[] = { |
8526
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
8527
|
|
|
|
|
|
|
0, 0, 0, 2, 2, 2, 2, 2, |
8528
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 2, |
8529
|
|
|
|
|
|
|
2, 2, 2, 2, 2 |
8530
|
|
|
|
|
|
|
}; |
8531
|
|
|
|
|
|
|
|
8532
|
|
|
|
|
|
|
static const char _english_tokenizer_cond_lengths[] = { |
8533
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
8534
|
|
|
|
|
|
|
0, 0, 2, 0, 0, 0, 0, 0, |
8535
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
8536
|
|
|
|
|
|
|
0, 0, 0, 0, 0 |
8537
|
|
|
|
|
|
|
}; |
8538
|
|
|
|
|
|
|
|
8539
|
|
|
|
|
|
|
static const short _english_tokenizer_cond_keys[] = { |
8540
|
|
|
|
|
|
|
43u, 43u, 45u, 45u, 0 |
8541
|
|
|
|
|
|
|
}; |
8542
|
|
|
|
|
|
|
|
8543
|
|
|
|
|
|
|
static const char _english_tokenizer_cond_spaces[] = { |
8544
|
|
|
|
|
|
|
1, 0, 0 |
8545
|
|
|
|
|
|
|
}; |
8546
|
|
|
|
|
|
|
|
8547
|
|
|
|
|
|
|
static const unsigned char _english_tokenizer_key_offsets[] = { |
8548
|
|
|
|
|
|
|
0, 0, 17, 29, 43, 46, 49, 52, |
8549
|
|
|
|
|
|
|
55, 60, 63, 98, 103, 107, 110, 114, |
8550
|
|
|
|
|
|
|
119, 120, 125, 126, 131, 145, 152, 156, |
8551
|
|
|
|
|
|
|
161, 164, 179, 192, 206 |
8552
|
|
|
|
|
|
|
}; |
8553
|
|
|
|
|
|
|
|
8554
|
|
|
|
|
|
|
static const short _english_tokenizer_trans_keys[] = { |
8555
|
|
|
|
|
|
|
13u, 32u, 34u, 40u, 91u, 96u, 123u, 129u, |
8556
|
|
|
|
|
|
|
133u, 135u, 147u, 150u, 162u, 9u, 10u, 65u, |
8557
|
|
|
|
|
|
|
90u, 34u, 40u, 91u, 96u, 123u, 129u, 133u, |
8558
|
|
|
|
|
|
|
135u, 150u, 162u, 65u, 90u, 13u, 32u, 34u, |
8559
|
|
|
|
|
|
|
39u, 41u, 59u, 93u, 125u, 139u, 141u, 147u, |
8560
|
|
|
|
|
|
|
161u, 9u, 10u, 159u, 48u, 57u, 159u, 48u, |
8561
|
|
|
|
|
|
|
57u, 159u, 48u, 57u, 159u, 48u, 57u, 43u, |
8562
|
|
|
|
|
|
|
45u, 159u, 48u, 57u, 159u, 48u, 57u, 9u, |
8563
|
|
|
|
|
|
|
10u, 13u, 32u, 33u, 44u, 46u, 47u, 63u, |
8564
|
|
|
|
|
|
|
129u, 131u, 135u, 142u, 147u, 157u, 159u, 160u, |
8565
|
|
|
|
|
|
|
301u, 557u, 811u, 1067u, 0u, 42u, 48u, 57u, |
8566
|
|
|
|
|
|
|
58u, 64u, 65u, 90u, 91u, 96u, 97u, 122u, |
8567
|
|
|
|
|
|
|
123u, 255u, 9u, 10u, 13u, 32u, 147u, 9u, |
8568
|
|
|
|
|
|
|
13u, 32u, 147u, 9u, 32u, 147u, 9u, 10u, |
8569
|
|
|
|
|
|
|
32u, 147u, 9u, 10u, 13u, 32u, 147u, 13u, |
8570
|
|
|
|
|
|
|
9u, 10u, 13u, 32u, 147u, 10u, 9u, 10u, |
8571
|
|
|
|
|
|
|
13u, 32u, 147u, 13u, 32u, 34u, 39u, 41u, |
8572
|
|
|
|
|
|
|
59u, 93u, 125u, 139u, 141u, 147u, 161u, 9u, |
8573
|
|
|
|
|
|
|
10u, 44u, 46u, 69u, 101u, 159u, 48u, 57u, |
8574
|
|
|
|
|
|
|
44u, 46u, 69u, 101u, 69u, 101u, 159u, 48u, |
8575
|
|
|
|
|
|
|
57u, 159u, 48u, 57u, 39u, 45u, 129u, 131u, |
8576
|
|
|
|
|
|
|
135u, 151u, 155u, 157u, 161u, 65u, 90u, 97u, |
8577
|
|
|
|
|
|
|
122u, 142u, 143u, 45u, 129u, 131u, 135u, 151u, |
8578
|
|
|
|
|
|
|
155u, 157u, 65u, 90u, 97u, 122u, 142u, 143u, |
8579
|
|
|
|
|
|
|
39u, 129u, 131u, 135u, 151u, 155u, 157u, 161u, |
8580
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 142u, 143u, 159u, 48u, |
8581
|
|
|
|
|
|
|
57u, 0 |
8582
|
|
|
|
|
|
|
}; |
8583
|
|
|
|
|
|
|
|
8584
|
|
|
|
|
|
|
static const char _english_tokenizer_single_lengths[] = { |
8585
|
|
|
|
|
|
|
0, 13, 10, 12, 1, 1, 1, 1, |
8586
|
|
|
|
|
|
|
3, 1, 21, 5, 4, 3, 4, 5, |
8587
|
|
|
|
|
|
|
1, 5, 1, 5, 12, 5, 4, 3, |
8588
|
|
|
|
|
|
|
1, 9, 7, 8, 1 |
8589
|
|
|
|
|
|
|
}; |
8590
|
|
|
|
|
|
|
|
8591
|
|
|
|
|
|
|
static const char _english_tokenizer_range_lengths[] = { |
8592
|
|
|
|
|
|
|
0, 2, 1, 1, 1, 1, 1, 1, |
8593
|
|
|
|
|
|
|
1, 1, 7, 0, 0, 0, 0, 0, |
8594
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 1, 0, 1, |
8595
|
|
|
|
|
|
|
1, 3, 3, 3, 1 |
8596
|
|
|
|
|
|
|
}; |
8597
|
|
|
|
|
|
|
|
8598
|
|
|
|
|
|
|
static const unsigned char _english_tokenizer_index_offsets[] = { |
8599
|
|
|
|
|
|
|
0, 0, 16, 28, 42, 45, 48, 51, |
8600
|
|
|
|
|
|
|
54, 59, 62, 91, 97, 102, 106, 111, |
8601
|
|
|
|
|
|
|
117, 119, 125, 127, 133, 147, 154, 159, |
8602
|
|
|
|
|
|
|
164, 167, 180, 191, 203 |
8603
|
|
|
|
|
|
|
}; |
8604
|
|
|
|
|
|
|
|
8605
|
|
|
|
|
|
|
static const char _english_tokenizer_indicies[] = { |
8606
|
|
|
|
|
|
|
1, 1, 2, 2, 2, 2, 2, 3, |
8607
|
|
|
|
|
|
|
2, 3, 1, 2, 2, 1, 3, 0, |
8608
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 3, 2, 3, |
8609
|
|
|
|
|
|
|
2, 2, 3, 0, 4, 4, 5, 5, |
8610
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 4, 5, |
8611
|
|
|
|
|
|
|
4, 0, 6, 6, 0, 7, 7, 0, |
8612
|
|
|
|
|
|
|
8, 8, 0, 9, 9, 0, 10, 10, |
8613
|
|
|
|
|
|
|
11, 11, 0, 11, 11, 0, 13, 14, |
8614
|
|
|
|
|
|
|
15, 13, 16, 12, 16, 12, 16, 19, |
8615
|
|
|
|
|
|
|
19, 19, 19, 13, 19, 18, 16, 12, |
8616
|
|
|
|
|
|
|
20, 12, 20, 12, 18, 12, 19, 12, |
8617
|
|
|
|
|
|
|
19, 12, 17, 13, 22, 23, 13, 13, |
8618
|
|
|
|
|
|
|
21, 13, 24, 13, 13, 21, 13, 13, |
8619
|
|
|
|
|
|
|
13, 21, 13, 24, 13, 13, 21, 13, |
8620
|
|
|
|
|
|
|
25, 26, 13, 13, 21, 28, 27, 13, |
8621
|
|
|
|
|
|
|
25, 29, 13, 13, 21, 28, 27, 13, |
8622
|
|
|
|
|
|
|
26, 29, 13, 13, 21, 4, 4, 5, |
8623
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 5, 4, |
8624
|
|
|
|
|
|
|
5, 4, 30, 31, 32, 33, 33, 18, |
8625
|
|
|
|
|
|
|
18, 30, 31, 32, 33, 33, 30, 33, |
8626
|
|
|
|
|
|
|
33, 9, 9, 30, 11, 11, 30, 34, |
8627
|
|
|
|
|
|
|
35, 19, 19, 19, 19, 19, 19, 34, |
8628
|
|
|
|
|
|
|
19, 19, 19, 30, 35, 19, 19, 19, |
8629
|
|
|
|
|
|
|
19, 19, 19, 19, 19, 19, 30, 34, |
8630
|
|
|
|
|
|
|
19, 19, 19, 19, 19, 19, 34, 19, |
8631
|
|
|
|
|
|
|
19, 19, 30, 18, 18, 30, 0 |
8632
|
|
|
|
|
|
|
}; |
8633
|
|
|
|
|
|
|
|
8634
|
|
|
|
|
|
|
static const char _english_tokenizer_trans_targs[] = { |
8635
|
|
|
|
|
|
|
10, 1, 2, 10, 1, 3, 5, 6, |
8636
|
|
|
|
|
|
|
22, 23, 9, 24, 10, 11, 15, 19, |
8637
|
|
|
|
|
|
|
20, 0, 21, 25, 28, 10, 12, 14, |
8638
|
|
|
|
|
|
|
13, 16, 17, 10, 10, 18, 10, 4, |
8639
|
|
|
|
|
|
|
7, 8, 26, 27 |
8640
|
|
|
|
|
|
|
}; |
8641
|
|
|
|
|
|
|
|
8642
|
|
|
|
|
|
|
static const char _english_tokenizer_trans_actions[] = { |
8643
|
|
|
|
|
|
|
1, 0, 0, 2, 3, 0, 0, 0, |
8644
|
|
|
|
|
|
|
4, 4, 0, 0, 7, 0, 0, 0, |
8645
|
|
|
|
|
|
|
4, 0, 4, 0, 0, 8, 0, 0, |
8646
|
|
|
|
|
|
|
0, 0, 0, 9, 10, 0, 11, 0, |
8647
|
|
|
|
|
|
|
0, 0, 0, 0 |
8648
|
|
|
|
|
|
|
}; |
8649
|
|
|
|
|
|
|
|
8650
|
|
|
|
|
|
|
static const char _english_tokenizer_to_state_actions[] = { |
8651
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
8652
|
|
|
|
|
|
|
0, 0, 5, 0, 0, 0, 0, 0, |
8653
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
8654
|
|
|
|
|
|
|
0, 0, 0, 0, 0 |
8655
|
|
|
|
|
|
|
}; |
8656
|
|
|
|
|
|
|
|
8657
|
|
|
|
|
|
|
static const char _english_tokenizer_from_state_actions[] = { |
8658
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
8659
|
|
|
|
|
|
|
0, 0, 6, 0, 0, 0, 0, 0, |
8660
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
8661
|
|
|
|
|
|
|
0, 0, 0, 0, 0 |
8662
|
|
|
|
|
|
|
}; |
8663
|
|
|
|
|
|
|
|
8664
|
|
|
|
|
|
|
static const unsigned char _english_tokenizer_eof_trans[] = { |
8665
|
|
|
|
|
|
|
0, 1, 1, 1, 1, 1, 1, 1, |
8666
|
|
|
|
|
|
|
1, 1, 0, 22, 22, 22, 22, 22, |
8667
|
|
|
|
|
|
|
28, 22, 28, 22, 31, 31, 31, 31, |
8668
|
|
|
|
|
|
|
31, 31, 31, 31, 31 |
8669
|
|
|
|
|
|
|
}; |
8670
|
|
|
|
|
|
|
|
8671
|
|
|
|
|
|
|
static const int english_tokenizer_start = 10; |
8672
|
|
|
|
|
|
|
|
8673
|
0
|
0
|
|
|
|
|
english_tokenizer::english_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {} |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8674
|
|
|
|
|
|
|
|
8675
|
0
|
|
|
|
|
|
bool english_tokenizer::next_sentence(vector& tokens) { |
8676
|
|
|
|
|
|
|
using namespace unilib; |
8677
|
|
|
|
|
|
|
|
8678
|
|
|
|
|
|
|
int cs, act; |
8679
|
|
|
|
|
|
|
size_t ts, te; |
8680
|
|
|
|
|
|
|
size_t whitespace = 0; // Suppress "may be uninitialized" warning |
8681
|
|
|
|
|
|
|
|
8682
|
0
|
0
|
|
|
|
|
while (tokenize_url_email(tokens)) |
8683
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) |
8684
|
|
|
|
|
|
|
return true; |
8685
|
|
|
|
|
|
|
|
8686
|
|
|
|
|
|
|
{ |
8687
|
|
|
|
|
|
|
cs = english_tokenizer_start; |
8688
|
0
|
|
|
|
|
|
ts = 0; |
8689
|
|
|
|
|
|
|
te = 0; |
8690
|
|
|
|
|
|
|
act = 0; |
8691
|
|
|
|
|
|
|
} |
8692
|
|
|
|
|
|
|
|
8693
|
|
|
|
|
|
|
{ |
8694
|
|
|
|
|
|
|
int _klen; |
8695
|
|
|
|
|
|
|
const short *_keys; |
8696
|
|
|
|
|
|
|
int _trans; |
8697
|
|
|
|
|
|
|
short _widec; |
8698
|
|
|
|
|
|
|
|
8699
|
0
|
0
|
|
|
|
|
if ( ( current) == ( (chars.size() - 1)) ) |
8700
|
|
|
|
|
|
|
goto _test_eof; |
8701
|
|
|
|
|
|
|
if ( cs == 0 ) |
8702
|
|
|
|
|
|
|
goto _out; |
8703
|
|
|
|
|
|
|
_resume: |
8704
|
0
|
0
|
|
|
|
|
switch ( _english_tokenizer_from_state_actions[cs] ) { |
8705
|
|
|
|
|
|
|
case 6: |
8706
|
0
|
|
|
|
|
|
{ts = ( current);} |
8707
|
0
|
|
|
|
|
|
break; |
8708
|
|
|
|
|
|
|
} |
8709
|
|
|
|
|
|
|
|
8710
|
0
|
|
|
|
|
|
_widec = ( ragel_char(chars[current])); |
8711
|
0
|
|
|
|
|
|
_klen = _english_tokenizer_cond_lengths[cs]; |
8712
|
0
|
|
|
|
|
|
_keys = _english_tokenizer_cond_keys + (_english_tokenizer_cond_offsets[cs]*2); |
8713
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
8714
|
|
|
|
|
|
|
const short *_lower = _keys; |
8715
|
|
|
|
|
|
|
const short *_mid; |
8716
|
0
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
8717
|
|
|
|
|
|
|
while (1) { |
8718
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
8719
|
|
|
|
|
|
|
break; |
8720
|
|
|
|
|
|
|
|
8721
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
8722
|
0
|
0
|
|
|
|
|
if ( _widec < _mid[0] ) |
8723
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
8724
|
0
|
0
|
|
|
|
|
else if ( _widec > _mid[1] ) |
8725
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
8726
|
|
|
|
|
|
|
else { |
8727
|
0
|
|
|
|
|
|
switch ( _english_tokenizer_cond_spaces[_english_tokenizer_cond_offsets[cs] + ((_mid - _keys)>>1)] ) { |
8728
|
|
|
|
|
|
|
case 0: { |
8729
|
0
|
|
|
|
|
|
_widec = (short)(256u + (( ragel_char(chars[current])) - 0u)); |
8730
|
0
|
0
|
|
|
|
|
if ( |
8731
|
0
|
0
|
|
|
|
|
!current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256; |
|
|
0
|
|
|
|
|
|
8732
|
|
|
|
|
|
|
break; |
8733
|
|
|
|
|
|
|
} |
8734
|
|
|
|
|
|
|
case 1: { |
8735
|
0
|
|
|
|
|
|
_widec = (short)(768u + (( ragel_char(chars[current])) - 0u)); |
8736
|
0
|
0
|
|
|
|
|
if ( |
8737
|
0
|
0
|
|
|
|
|
!current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
8738
|
|
|
|
|
|
|
break; |
8739
|
|
|
|
|
|
|
} |
8740
|
|
|
|
|
|
|
} |
8741
|
|
|
|
|
|
|
break; |
8742
|
|
|
|
|
|
|
} |
8743
|
|
|
|
|
|
|
} |
8744
|
|
|
|
|
|
|
} |
8745
|
|
|
|
|
|
|
|
8746
|
0
|
|
|
|
|
|
_keys = _english_tokenizer_trans_keys + _english_tokenizer_key_offsets[cs]; |
8747
|
0
|
|
|
|
|
|
_trans = _english_tokenizer_index_offsets[cs]; |
8748
|
|
|
|
|
|
|
|
8749
|
0
|
|
|
|
|
|
_klen = _english_tokenizer_single_lengths[cs]; |
8750
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
8751
|
|
|
|
|
|
|
const short *_lower = _keys; |
8752
|
|
|
|
|
|
|
const short *_mid; |
8753
|
0
|
|
|
|
|
|
const short *_upper = _keys + _klen - 1; |
8754
|
|
|
|
|
|
|
while (1) { |
8755
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
8756
|
|
|
|
|
|
|
break; |
8757
|
|
|
|
|
|
|
|
8758
|
0
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
8759
|
0
|
0
|
|
|
|
|
if ( _widec < *_mid ) |
8760
|
0
|
|
|
|
|
|
_upper = _mid - 1; |
8761
|
0
|
0
|
|
|
|
|
else if ( _widec > *_mid ) |
8762
|
0
|
|
|
|
|
|
_lower = _mid + 1; |
8763
|
|
|
|
|
|
|
else { |
8764
|
0
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
8765
|
0
|
|
|
|
|
|
goto _match; |
8766
|
|
|
|
|
|
|
} |
8767
|
|
|
|
|
|
|
} |
8768
|
0
|
|
|
|
|
|
_keys += _klen; |
8769
|
0
|
|
|
|
|
|
_trans += _klen; |
8770
|
|
|
|
|
|
|
} |
8771
|
|
|
|
|
|
|
|
8772
|
0
|
|
|
|
|
|
_klen = _english_tokenizer_range_lengths[cs]; |
8773
|
0
|
0
|
|
|
|
|
if ( _klen > 0 ) { |
8774
|
|
|
|
|
|
|
const short *_lower = _keys; |
8775
|
|
|
|
|
|
|
const short *_mid; |
8776
|
0
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
8777
|
|
|
|
|
|
|
while (1) { |
8778
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
8779
|
|
|
|
|
|
|
break; |
8780
|
|
|
|
|
|
|
|
8781
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
8782
|
0
|
0
|
|
|
|
|
if ( _widec < _mid[0] ) |
8783
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
8784
|
0
|
0
|
|
|
|
|
else if ( _widec > _mid[1] ) |
8785
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
8786
|
|
|
|
|
|
|
else { |
8787
|
0
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
8788
|
0
|
|
|
|
|
|
goto _match; |
8789
|
|
|
|
|
|
|
} |
8790
|
|
|
|
|
|
|
} |
8791
|
0
|
|
|
|
|
|
_trans += _klen; |
8792
|
|
|
|
|
|
|
} |
8793
|
|
|
|
|
|
|
|
8794
|
|
|
|
|
|
|
_match: |
8795
|
0
|
|
|
|
|
|
_trans = _english_tokenizer_indicies[_trans]; |
8796
|
|
|
|
|
|
|
_eof_trans: |
8797
|
0
|
|
|
|
|
|
cs = _english_tokenizer_trans_targs[_trans]; |
8798
|
|
|
|
|
|
|
|
8799
|
0
|
0
|
|
|
|
|
if ( _english_tokenizer_trans_actions[_trans] == 0 ) |
8800
|
|
|
|
|
|
|
goto _again; |
8801
|
|
|
|
|
|
|
|
8802
|
0
|
|
|
|
|
|
switch ( _english_tokenizer_trans_actions[_trans] ) { |
8803
|
|
|
|
|
|
|
case 3: |
8804
|
0
|
|
|
|
|
|
{ whitespace = current; } |
8805
|
0
|
|
|
|
|
|
break; |
8806
|
|
|
|
|
|
|
case 4: |
8807
|
0
|
|
|
|
|
|
{te = ( current)+1;} |
8808
|
0
|
|
|
|
|
|
break; |
8809
|
|
|
|
|
|
|
case 7: |
8810
|
0
|
|
|
|
|
|
{te = ( current)+1;{ tokens.emplace_back(ts, te - ts); |
8811
|
0
|
|
|
|
|
|
split_token(tokens); |
8812
|
0
|
|
|
|
|
|
current = te; |
8813
|
0
|
0
|
|
|
|
|
do |
8814
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8815
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
8816
|
0
|
|
|
|
|
|
( current)--; |
8817
|
|
|
|
|
|
|
}} |
8818
|
0
|
|
|
|
|
|
break; |
8819
|
|
|
|
|
|
|
case 2: |
8820
|
0
|
|
|
|
|
|
{te = ( current)+1;{ |
8821
|
0
|
|
|
|
|
|
bool eos = is_eos(tokens, chars[ts].chr, &abbreviations); |
8822
|
0
|
0
|
|
|
|
|
for (current = ts; current < whitespace; current++) |
8823
|
0
|
|
|
|
|
|
tokens.emplace_back(current, 1); |
8824
|
0
|
|
|
|
|
|
{( current) = (( whitespace))-1;} |
8825
|
0
|
0
|
|
|
|
|
if (eos) {( current)++; goto _out; } |
8826
|
|
|
|
|
|
|
}} |
8827
|
|
|
|
|
|
|
break; |
8828
|
|
|
|
|
|
|
case 10: |
8829
|
0
|
|
|
|
|
|
{te = ( current)+1;{ |
8830
|
0
|
0
|
|
|
|
|
if (!tokens.empty()) {( current)++; goto _out; } |
8831
|
0
|
|
|
|
|
|
current = te; |
8832
|
0
|
0
|
|
|
|
|
do |
8833
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8834
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
8835
|
0
|
|
|
|
|
|
( current)--; |
8836
|
|
|
|
|
|
|
}} |
8837
|
0
|
|
|
|
|
|
break; |
8838
|
|
|
|
|
|
|
case 11: |
8839
|
0
|
|
|
|
|
|
{te = ( current);( current)--;{ tokens.emplace_back(ts, te - ts); |
8840
|
0
|
|
|
|
|
|
split_token(tokens); |
8841
|
0
|
|
|
|
|
|
current = te; |
8842
|
0
|
0
|
|
|
|
|
do |
8843
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8844
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
8845
|
0
|
|
|
|
|
|
( current)--; |
8846
|
|
|
|
|
|
|
}} |
8847
|
0
|
|
|
|
|
|
break; |
8848
|
|
|
|
|
|
|
case 8: |
8849
|
0
|
|
|
|
|
|
{te = ( current);( current)--;{ |
8850
|
0
|
|
|
|
|
|
current = te; |
8851
|
0
|
0
|
|
|
|
|
do |
8852
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8853
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
8854
|
0
|
|
|
|
|
|
( current)--; |
8855
|
|
|
|
|
|
|
}} |
8856
|
0
|
|
|
|
|
|
break; |
8857
|
|
|
|
|
|
|
case 9: |
8858
|
0
|
|
|
|
|
|
{te = ( current);( current)--;{ |
8859
|
0
|
0
|
|
|
|
|
if (!tokens.empty()) {( current)++; goto _out; } |
8860
|
0
|
|
|
|
|
|
current = te; |
8861
|
0
|
0
|
|
|
|
|
do |
8862
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8863
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
8864
|
0
|
|
|
|
|
|
( current)--; |
8865
|
|
|
|
|
|
|
}} |
8866
|
0
|
|
|
|
|
|
break; |
8867
|
|
|
|
|
|
|
case 1: |
8868
|
0
|
|
|
|
|
|
{{( current) = ((te))-1;}{ tokens.emplace_back(ts, te - ts); |
8869
|
0
|
|
|
|
|
|
split_token(tokens); |
8870
|
0
|
|
|
|
|
|
current = te; |
8871
|
0
|
0
|
|
|
|
|
do |
8872
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8873
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
8874
|
0
|
|
|
|
|
|
( current)--; |
8875
|
|
|
|
|
|
|
}} |
8876
|
0
|
|
|
|
|
|
break; |
8877
|
|
|
|
|
|
|
} |
8878
|
|
|
|
|
|
|
|
8879
|
|
|
|
|
|
|
_again: |
8880
|
0
|
0
|
|
|
|
|
switch ( _english_tokenizer_to_state_actions[cs] ) { |
8881
|
|
|
|
|
|
|
case 5: |
8882
|
0
|
|
|
|
|
|
{ts = 0;} |
8883
|
0
|
|
|
|
|
|
break; |
8884
|
|
|
|
|
|
|
} |
8885
|
|
|
|
|
|
|
|
8886
|
0
|
0
|
|
|
|
|
if ( cs == 0 ) |
8887
|
|
|
|
|
|
|
goto _out; |
8888
|
0
|
0
|
|
|
|
|
if ( ++( current) != ( (chars.size() - 1)) ) |
8889
|
|
|
|
|
|
|
goto _resume; |
8890
|
|
|
|
|
|
|
_test_eof: {} |
8891
|
0
|
0
|
|
|
|
|
if ( ( current) == ( (chars.size() - 1)) ) |
8892
|
|
|
|
|
|
|
{ |
8893
|
0
|
0
|
|
|
|
|
if ( _english_tokenizer_eof_trans[cs] > 0 ) { |
8894
|
0
|
|
|
|
|
|
_trans = _english_tokenizer_eof_trans[cs] - 1; |
8895
|
0
|
|
|
|
|
|
goto _eof_trans; |
8896
|
|
|
|
|
|
|
} |
8897
|
|
|
|
|
|
|
} |
8898
|
|
|
|
|
|
|
|
8899
|
|
|
|
|
|
|
_out: {} |
8900
|
|
|
|
|
|
|
} |
8901
|
|
|
|
|
|
|
|
8902
|
|
|
|
|
|
|
(void)act; // Suppress unused variable warning |
8903
|
|
|
|
|
|
|
|
8904
|
0
|
|
|
|
|
|
return !tokens.empty(); |
8905
|
|
|
|
|
|
|
} |
8906
|
|
|
|
|
|
|
|
8907
|
|
|
|
|
|
|
///////// |
8908
|
|
|
|
|
|
|
// File: tokenizer/generic_tokenizer.cpp |
8909
|
|
|
|
|
|
|
///////// |
8910
|
|
|
|
|
|
|
|
8911
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
8912
|
|
|
|
|
|
|
// |
8913
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
8914
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
8915
|
|
|
|
|
|
|
// |
8916
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
8917
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
8918
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
8919
|
|
|
|
|
|
|
|
8920
|
|
|
|
|
|
|
static const char _generic_tokenizer_cond_offsets[] = { |
8921
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
8922
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 2, |
8923
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2 |
8924
|
|
|
|
|
|
|
}; |
8925
|
|
|
|
|
|
|
|
8926
|
|
|
|
|
|
|
static const char _generic_tokenizer_cond_lengths[] = { |
8927
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 2, |
8928
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
8929
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0 |
8930
|
|
|
|
|
|
|
}; |
8931
|
|
|
|
|
|
|
|
8932
|
|
|
|
|
|
|
static const short _generic_tokenizer_cond_keys[] = { |
8933
|
|
|
|
|
|
|
43u, 43u, 45u, 45u, 0 |
8934
|
|
|
|
|
|
|
}; |
8935
|
|
|
|
|
|
|
|
8936
|
|
|
|
|
|
|
static const char _generic_tokenizer_cond_spaces[] = { |
8937
|
|
|
|
|
|
|
1, 0, 0 |
8938
|
|
|
|
|
|
|
}; |
8939
|
|
|
|
|
|
|
|
8940
|
|
|
|
|
|
|
static const unsigned char _generic_tokenizer_key_offsets[] = { |
8941
|
|
|
|
|
|
|
0, 0, 17, 29, 43, 46, 51, 54, |
8942
|
|
|
|
|
|
|
89, 94, 98, 101, 105, 110, 111, 116, |
8943
|
|
|
|
|
|
|
117, 122, 136, 142, 147, 150, 162 |
8944
|
|
|
|
|
|
|
}; |
8945
|
|
|
|
|
|
|
|
8946
|
|
|
|
|
|
|
static const short _generic_tokenizer_trans_keys[] = { |
8947
|
|
|
|
|
|
|
13u, 32u, 34u, 40u, 91u, 96u, 123u, 129u, |
8948
|
|
|
|
|
|
|
133u, 135u, 147u, 150u, 162u, 9u, 10u, 65u, |
8949
|
|
|
|
|
|
|
90u, 34u, 40u, 91u, 96u, 123u, 129u, 133u, |
8950
|
|
|
|
|
|
|
135u, 150u, 162u, 65u, 90u, 13u, 32u, 34u, |
8951
|
|
|
|
|
|
|
39u, 41u, 59u, 93u, 125u, 139u, 141u, 147u, |
8952
|
|
|
|
|
|
|
161u, 9u, 10u, 159u, 48u, 57u, 43u, 45u, |
8953
|
|
|
|
|
|
|
159u, 48u, 57u, 159u, 48u, 57u, 9u, 10u, |
8954
|
|
|
|
|
|
|
13u, 32u, 33u, 44u, 46u, 47u, 63u, 129u, |
8955
|
|
|
|
|
|
|
131u, 135u, 142u, 147u, 157u, 159u, 160u, 301u, |
8956
|
|
|
|
|
|
|
557u, 811u, 1067u, 0u, 42u, 48u, 57u, 58u, |
8957
|
|
|
|
|
|
|
64u, 65u, 90u, 91u, 96u, 97u, 122u, 123u, |
8958
|
|
|
|
|
|
|
255u, 9u, 10u, 13u, 32u, 147u, 9u, 13u, |
8959
|
|
|
|
|
|
|
32u, 147u, 9u, 32u, 147u, 9u, 10u, 32u, |
8960
|
|
|
|
|
|
|
147u, 9u, 10u, 13u, 32u, 147u, 13u, 9u, |
8961
|
|
|
|
|
|
|
10u, 13u, 32u, 147u, 10u, 9u, 10u, 13u, |
8962
|
|
|
|
|
|
|
32u, 147u, 13u, 32u, 34u, 39u, 41u, 59u, |
8963
|
|
|
|
|
|
|
93u, 125u, 139u, 141u, 147u, 161u, 9u, 10u, |
8964
|
|
|
|
|
|
|
46u, 69u, 101u, 159u, 48u, 57u, 69u, 101u, |
8965
|
|
|
|
|
|
|
159u, 48u, 57u, 159u, 48u, 57u, 129u, 131u, |
8966
|
|
|
|
|
|
|
135u, 151u, 155u, 157u, 65u, 90u, 97u, 122u, |
8967
|
|
|
|
|
|
|
142u, 143u, 159u, 48u, 57u, 0 |
8968
|
|
|
|
|
|
|
}; |
8969
|
|
|
|
|
|
|
|
8970
|
|
|
|
|
|
|
static const char _generic_tokenizer_single_lengths[] = { |
8971
|
|
|
|
|
|
|
0, 13, 10, 12, 1, 3, 1, 21, |
8972
|
|
|
|
|
|
|
5, 4, 3, 4, 5, 1, 5, 1, |
8973
|
|
|
|
|
|
|
5, 12, 4, 3, 1, 6, 1 |
8974
|
|
|
|
|
|
|
}; |
8975
|
|
|
|
|
|
|
|
8976
|
|
|
|
|
|
|
static const char _generic_tokenizer_range_lengths[] = { |
8977
|
|
|
|
|
|
|
0, 2, 1, 1, 1, 1, 1, 7, |
8978
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
8979
|
|
|
|
|
|
|
0, 1, 1, 1, 1, 3, 1 |
8980
|
|
|
|
|
|
|
}; |
8981
|
|
|
|
|
|
|
|
8982
|
|
|
|
|
|
|
static const unsigned char _generic_tokenizer_index_offsets[] = { |
8983
|
|
|
|
|
|
|
0, 0, 16, 28, 42, 45, 50, 53, |
8984
|
|
|
|
|
|
|
82, 88, 93, 97, 102, 108, 110, 116, |
8985
|
|
|
|
|
|
|
118, 124, 138, 144, 149, 152, 162 |
8986
|
|
|
|
|
|
|
}; |
8987
|
|
|
|
|
|
|
|
8988
|
|
|
|
|
|
|
static const char _generic_tokenizer_indicies[] = { |
8989
|
|
|
|
|
|
|
1, 1, 2, 2, 2, 2, 2, 3, |
8990
|
|
|
|
|
|
|
2, 3, 1, 2, 2, 1, 3, 0, |
8991
|
|
|
|
|
|
|
2, 2, 2, 2, 2, 3, 2, 3, |
8992
|
|
|
|
|
|
|
2, 2, 3, 0, 4, 4, 5, 5, |
8993
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 4, 5, |
8994
|
|
|
|
|
|
|
4, 0, 6, 6, 0, 7, 7, 8, |
8995
|
|
|
|
|
|
|
8, 0, 8, 8, 0, 10, 11, 12, |
8996
|
|
|
|
|
|
|
10, 13, 9, 13, 9, 13, 16, 16, |
8997
|
|
|
|
|
|
|
16, 16, 10, 16, 15, 13, 9, 17, |
8998
|
|
|
|
|
|
|
9, 17, 9, 15, 9, 16, 9, 16, |
8999
|
|
|
|
|
|
|
9, 14, 10, 19, 20, 10, 10, 18, |
9000
|
|
|
|
|
|
|
10, 21, 10, 10, 18, 10, 10, 10, |
9001
|
|
|
|
|
|
|
18, 10, 21, 10, 10, 18, 10, 22, |
9002
|
|
|
|
|
|
|
23, 10, 10, 18, 25, 24, 10, 22, |
9003
|
|
|
|
|
|
|
26, 10, 10, 18, 25, 24, 10, 23, |
9004
|
|
|
|
|
|
|
26, 10, 10, 18, 4, 4, 5, 5, |
9005
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 4, 5, |
9006
|
|
|
|
|
|
|
4, 27, 28, 29, 29, 15, 15, 27, |
9007
|
|
|
|
|
|
|
29, 29, 6, 6, 27, 8, 8, 27, |
9008
|
|
|
|
|
|
|
16, 16, 16, 16, 16, 16, 16, 16, |
9009
|
|
|
|
|
|
|
16, 27, 15, 15, 27, 0 |
9010
|
|
|
|
|
|
|
}; |
9011
|
|
|
|
|
|
|
|
9012
|
|
|
|
|
|
|
static const char _generic_tokenizer_trans_targs[] = { |
9013
|
|
|
|
|
|
|
7, 1, 2, 7, 1, 3, 19, 6, |
9014
|
|
|
|
|
|
|
20, 7, 8, 12, 16, 17, 0, 18, |
9015
|
|
|
|
|
|
|
21, 22, 7, 9, 11, 10, 13, 14, |
9016
|
|
|
|
|
|
|
7, 7, 15, 7, 4, 5 |
9017
|
|
|
|
|
|
|
}; |
9018
|
|
|
|
|
|
|
|
9019
|
|
|
|
|
|
|
static const char _generic_tokenizer_trans_actions[] = { |
9020
|
|
|
|
|
|
|
1, 0, 0, 2, 3, 0, 4, 0, |
9021
|
|
|
|
|
|
|
0, 7, 0, 0, 0, 4, 0, 4, |
9022
|
|
|
|
|
|
|
0, 0, 8, 0, 0, 0, 0, 0, |
9023
|
|
|
|
|
|
|
9, 10, 0, 11, 0, 0 |
9024
|
|
|
|
|
|
|
}; |
9025
|
|
|
|
|
|
|
|
9026
|
|
|
|
|
|
|
static const char _generic_tokenizer_to_state_actions[] = { |
9027
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 5, |
9028
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
9029
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0 |
9030
|
|
|
|
|
|
|
}; |
9031
|
|
|
|
|
|
|
|
9032
|
|
|
|
|
|
|
static const char _generic_tokenizer_from_state_actions[] = { |
9033
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 6, |
9034
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
9035
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0 |
9036
|
|
|
|
|
|
|
}; |
9037
|
|
|
|
|
|
|
|
9038
|
|
|
|
|
|
|
static const unsigned char _generic_tokenizer_eof_trans[] = { |
9039
|
|
|
|
|
|
|
0, 1, 1, 1, 1, 1, 1, 0, |
9040
|
|
|
|
|
|
|
19, 19, 19, 19, 19, 25, 19, 25, |
9041
|
|
|
|
|
|
|
19, 28, 28, 28, 28, 28, 28 |
9042
|
|
|
|
|
|
|
}; |
9043
|
|
|
|
|
|
|
|
9044
|
|
|
|
|
|
|
static const int generic_tokenizer_start = 7; |
9045
|
|
|
|
|
|
|
|
9046
|
2
|
0
|
|
|
|
|
generic_tokenizer::generic_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {} |
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9047
|
|
|
|
|
|
|
|
9048
|
4
|
|
|
|
|
|
bool generic_tokenizer::next_sentence(vector& tokens) { |
9049
|
|
|
|
|
|
|
using namespace unilib; |
9050
|
|
|
|
|
|
|
|
9051
|
|
|
|
|
|
|
int cs, act; |
9052
|
|
|
|
|
|
|
size_t ts, te; |
9053
|
|
|
|
|
|
|
size_t whitespace = 0; // Suppress "may be uninitialized" warning |
9054
|
|
|
|
|
|
|
|
9055
|
2
|
50
|
|
|
|
|
while (tokenize_url_email(tokens)) |
9056
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) |
9057
|
|
|
|
|
|
|
return true; |
9058
|
|
|
|
|
|
|
|
9059
|
|
|
|
|
|
|
{ |
9060
|
|
|
|
|
|
|
cs = generic_tokenizer_start; |
9061
|
2
|
|
|
|
|
|
ts = 0; |
9062
|
|
|
|
|
|
|
te = 0; |
9063
|
|
|
|
|
|
|
act = 0; |
9064
|
|
|
|
|
|
|
} |
9065
|
|
|
|
|
|
|
|
9066
|
|
|
|
|
|
|
{ |
9067
|
|
|
|
|
|
|
int _klen; |
9068
|
|
|
|
|
|
|
const short *_keys; |
9069
|
|
|
|
|
|
|
int _trans; |
9070
|
|
|
|
|
|
|
short _widec; |
9071
|
|
|
|
|
|
|
|
9072
|
2
|
50
|
|
|
|
|
if ( ( current) == ( (chars.size() - 1)) ) |
9073
|
|
|
|
|
|
|
goto _test_eof; |
9074
|
|
|
|
|
|
|
if ( cs == 0 ) |
9075
|
|
|
|
|
|
|
goto _out; |
9076
|
|
|
|
|
|
|
_resume: |
9077
|
29
|
100
|
|
|
|
|
switch ( _generic_tokenizer_from_state_actions[cs] ) { |
9078
|
|
|
|
|
|
|
case 6: |
9079
|
8
|
|
|
|
|
|
{ts = ( current);} |
9080
|
8
|
|
|
|
|
|
break; |
9081
|
|
|
|
|
|
|
} |
9082
|
|
|
|
|
|
|
|
9083
|
58
|
|
|
|
|
|
_widec = ( ragel_char(chars[current])); |
9084
|
29
|
|
|
|
|
|
_klen = _generic_tokenizer_cond_lengths[cs]; |
9085
|
29
|
|
|
|
|
|
_keys = _generic_tokenizer_cond_keys + (_generic_tokenizer_cond_offsets[cs]*2); |
9086
|
29
|
100
|
|
|
|
|
if ( _klen > 0 ) { |
9087
|
|
|
|
|
|
|
const short *_lower = _keys; |
9088
|
|
|
|
|
|
|
const short *_mid; |
9089
|
22
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
9090
|
|
|
|
|
|
|
while (1) { |
9091
|
22
|
100
|
|
|
|
|
if ( _upper < _lower ) |
9092
|
|
|
|
|
|
|
break; |
9093
|
|
|
|
|
|
|
|
9094
|
14
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
9095
|
14
|
100
|
|
|
|
|
if ( _widec < _mid[0] ) |
9096
|
2
|
|
|
|
|
|
_upper = _mid - 2; |
9097
|
12
|
50
|
|
|
|
|
else if ( _widec > _mid[1] ) |
9098
|
12
|
|
|
|
|
|
_lower = _mid + 2; |
9099
|
|
|
|
|
|
|
else { |
9100
|
0
|
|
|
|
|
|
switch ( _generic_tokenizer_cond_spaces[_generic_tokenizer_cond_offsets[cs] + ((_mid - _keys)>>1)] ) { |
9101
|
|
|
|
|
|
|
case 0: { |
9102
|
0
|
|
|
|
|
|
_widec = (short)(256u + (( ragel_char(chars[current])) - 0u)); |
9103
|
0
|
0
|
|
|
|
|
if ( |
9104
|
0
|
0
|
|
|
|
|
!current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256; |
|
|
0
|
|
|
|
|
|
9105
|
|
|
|
|
|
|
break; |
9106
|
|
|
|
|
|
|
} |
9107
|
|
|
|
|
|
|
case 1: { |
9108
|
0
|
|
|
|
|
|
_widec = (short)(768u + (( ragel_char(chars[current])) - 0u)); |
9109
|
0
|
0
|
|
|
|
|
if ( |
9110
|
0
|
0
|
|
|
|
|
!current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9111
|
|
|
|
|
|
|
break; |
9112
|
|
|
|
|
|
|
} |
9113
|
|
|
|
|
|
|
} |
9114
|
|
|
|
|
|
|
break; |
9115
|
|
|
|
|
|
|
} |
9116
|
|
|
|
|
|
|
} |
9117
|
|
|
|
|
|
|
} |
9118
|
|
|
|
|
|
|
|
9119
|
29
|
|
|
|
|
|
_keys = _generic_tokenizer_trans_keys + _generic_tokenizer_key_offsets[cs]; |
9120
|
29
|
|
|
|
|
|
_trans = _generic_tokenizer_index_offsets[cs]; |
9121
|
|
|
|
|
|
|
|
9122
|
29
|
|
|
|
|
|
_klen = _generic_tokenizer_single_lengths[cs]; |
9123
|
29
|
50
|
|
|
|
|
if ( _klen > 0 ) { |
9124
|
|
|
|
|
|
|
const short *_lower = _keys; |
9125
|
|
|
|
|
|
|
const short *_mid; |
9126
|
109
|
|
|
|
|
|
const short *_upper = _keys + _klen - 1; |
9127
|
|
|
|
|
|
|
while (1) { |
9128
|
109
|
100
|
|
|
|
|
if ( _upper < _lower ) |
9129
|
|
|
|
|
|
|
break; |
9130
|
|
|
|
|
|
|
|
9131
|
88
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
9132
|
88
|
100
|
|
|
|
|
if ( _widec < *_mid ) |
9133
|
52
|
|
|
|
|
|
_upper = _mid - 1; |
9134
|
36
|
100
|
|
|
|
|
else if ( _widec > *_mid ) |
9135
|
28
|
|
|
|
|
|
_lower = _mid + 1; |
9136
|
|
|
|
|
|
|
else { |
9137
|
8
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
9138
|
8
|
|
|
|
|
|
goto _match; |
9139
|
|
|
|
|
|
|
} |
9140
|
|
|
|
|
|
|
} |
9141
|
21
|
|
|
|
|
|
_keys += _klen; |
9142
|
21
|
|
|
|
|
|
_trans += _klen; |
9143
|
|
|
|
|
|
|
} |
9144
|
|
|
|
|
|
|
|
9145
|
21
|
|
|
|
|
|
_klen = _generic_tokenizer_range_lengths[cs]; |
9146
|
21
|
100
|
|
|
|
|
if ( _klen > 0 ) { |
9147
|
|
|
|
|
|
|
const short *_lower = _keys; |
9148
|
|
|
|
|
|
|
const short *_mid; |
9149
|
29
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
9150
|
|
|
|
|
|
|
while (1) { |
9151
|
29
|
100
|
|
|
|
|
if ( _upper < _lower ) |
9152
|
|
|
|
|
|
|
break; |
9153
|
|
|
|
|
|
|
|
9154
|
25
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
9155
|
25
|
100
|
|
|
|
|
if ( _widec < _mid[0] ) |
9156
|
8
|
|
|
|
|
|
_upper = _mid - 2; |
9157
|
17
|
100
|
|
|
|
|
else if ( _widec > _mid[1] ) |
9158
|
2
|
|
|
|
|
|
_lower = _mid + 2; |
9159
|
|
|
|
|
|
|
else { |
9160
|
15
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
9161
|
15
|
|
|
|
|
|
goto _match; |
9162
|
|
|
|
|
|
|
} |
9163
|
|
|
|
|
|
|
} |
9164
|
4
|
|
|
|
|
|
_trans += _klen; |
9165
|
|
|
|
|
|
|
} |
9166
|
|
|
|
|
|
|
|
9167
|
|
|
|
|
|
|
_match: |
9168
|
29
|
|
|
|
|
|
_trans = _generic_tokenizer_indicies[_trans]; |
9169
|
|
|
|
|
|
|
_eof_trans: |
9170
|
31
|
|
|
|
|
|
cs = _generic_tokenizer_trans_targs[_trans]; |
9171
|
|
|
|
|
|
|
|
9172
|
31
|
100
|
|
|
|
|
if ( _generic_tokenizer_trans_actions[_trans] == 0 ) |
9173
|
|
|
|
|
|
|
goto _again; |
9174
|
|
|
|
|
|
|
|
9175
|
10
|
|
|
|
|
|
switch ( _generic_tokenizer_trans_actions[_trans] ) { |
9176
|
|
|
|
|
|
|
case 3: |
9177
|
0
|
|
|
|
|
|
{ whitespace = current; } |
9178
|
0
|
|
|
|
|
|
break; |
9179
|
|
|
|
|
|
|
case 4: |
9180
|
2
|
|
|
|
|
|
{te = ( current)+1;} |
9181
|
2
|
|
|
|
|
|
break; |
9182
|
|
|
|
|
|
|
case 7: |
9183
|
0
|
|
|
|
|
|
{te = ( current)+1;{ tokens.emplace_back(ts, te - ts); |
9184
|
0
|
|
|
|
|
|
current = te; |
9185
|
0
|
0
|
|
|
|
|
do |
9186
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
9187
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
9188
|
0
|
|
|
|
|
|
( current)--; |
9189
|
|
|
|
|
|
|
}} |
9190
|
0
|
|
|
|
|
|
break; |
9191
|
|
|
|
|
|
|
case 2: |
9192
|
0
|
|
|
|
|
|
{te = ( current)+1;{ |
9193
|
0
|
|
|
|
|
|
bool eos = is_eos(tokens, chars[ts].chr, nullptr); |
9194
|
0
|
0
|
|
|
|
|
for (current = ts; current < whitespace; current++) |
9195
|
0
|
|
|
|
|
|
tokens.emplace_back(current, 1); |
9196
|
0
|
|
|
|
|
|
{( current) = (( whitespace))-1;} |
9197
|
0
|
0
|
|
|
|
|
if (eos) {( current)++; goto _out; } |
9198
|
|
|
|
|
|
|
}} |
9199
|
|
|
|
|
|
|
break; |
9200
|
|
|
|
|
|
|
case 10: |
9201
|
0
|
|
|
|
|
|
{te = ( current)+1;{ |
9202
|
0
|
0
|
|
|
|
|
if (!tokens.empty()) {( current)++; goto _out; } |
9203
|
0
|
|
|
|
|
|
current = te; |
9204
|
0
|
0
|
|
|
|
|
do |
9205
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
9206
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
9207
|
0
|
|
|
|
|
|
( current)--; |
9208
|
|
|
|
|
|
|
}} |
9209
|
0
|
|
|
|
|
|
break; |
9210
|
|
|
|
|
|
|
case 11: |
9211
|
6
|
|
|
|
|
|
{te = ( current);( current)--;{ tokens.emplace_back(ts, te - ts); |
9212
|
6
|
|
|
|
|
|
current = te; |
9213
|
6
|
50
|
|
|
|
|
do |
9214
|
6
|
50
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
9215
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
9216
|
6
|
|
|
|
|
|
( current)--; |
9217
|
|
|
|
|
|
|
}} |
9218
|
6
|
|
|
|
|
|
break; |
9219
|
|
|
|
|
|
|
case 8: |
9220
|
2
|
|
|
|
|
|
{te = ( current);( current)--;{ |
9221
|
2
|
|
|
|
|
|
current = te; |
9222
|
2
|
50
|
|
|
|
|
do |
9223
|
2
|
50
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
9224
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
9225
|
2
|
|
|
|
|
|
( current)--; |
9226
|
|
|
|
|
|
|
}} |
9227
|
2
|
|
|
|
|
|
break; |
9228
|
|
|
|
|
|
|
case 9: |
9229
|
0
|
|
|
|
|
|
{te = ( current);( current)--;{ |
9230
|
0
|
0
|
|
|
|
|
if (!tokens.empty()) {( current)++; goto _out; } |
9231
|
0
|
|
|
|
|
|
current = te; |
9232
|
0
|
0
|
|
|
|
|
do |
9233
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
9234
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
9235
|
0
|
|
|
|
|
|
( current)--; |
9236
|
|
|
|
|
|
|
}} |
9237
|
0
|
|
|
|
|
|
break; |
9238
|
|
|
|
|
|
|
case 1: |
9239
|
0
|
|
|
|
|
|
{{( current) = ((te))-1;}{ tokens.emplace_back(ts, te - ts); |
9240
|
0
|
|
|
|
|
|
current = te; |
9241
|
0
|
0
|
|
|
|
|
do |
9242
|
0
|
0
|
|
|
|
|
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
9243
|
|
|
|
|
|
|
while (tokenize_url_email(tokens)); |
9244
|
0
|
|
|
|
|
|
( current)--; |
9245
|
|
|
|
|
|
|
}} |
9246
|
0
|
|
|
|
|
|
break; |
9247
|
|
|
|
|
|
|
} |
9248
|
|
|
|
|
|
|
|
9249
|
|
|
|
|
|
|
_again: |
9250
|
31
|
100
|
|
|
|
|
switch ( _generic_tokenizer_to_state_actions[cs] ) { |
9251
|
|
|
|
|
|
|
case 5: |
9252
|
8
|
|
|
|
|
|
{ts = 0;} |
9253
|
8
|
|
|
|
|
|
break; |
9254
|
|
|
|
|
|
|
} |
9255
|
|
|
|
|
|
|
|
9256
|
31
|
50
|
|
|
|
|
if ( cs == 0 ) |
9257
|
|
|
|
|
|
|
goto _out; |
9258
|
31
|
100
|
|
|
|
|
if ( ++( current) != ( (chars.size() - 1)) ) |
9259
|
|
|
|
|
|
|
goto _resume; |
9260
|
|
|
|
|
|
|
_test_eof: {} |
9261
|
4
|
50
|
|
|
|
|
if ( ( current) == ( (chars.size() - 1)) ) |
9262
|
|
|
|
|
|
|
{ |
9263
|
4
|
100
|
|
|
|
|
if ( _generic_tokenizer_eof_trans[cs] > 0 ) { |
9264
|
2
|
|
|
|
|
|
_trans = _generic_tokenizer_eof_trans[cs] - 1; |
9265
|
2
|
|
|
|
|
|
goto _eof_trans; |
9266
|
|
|
|
|
|
|
} |
9267
|
|
|
|
|
|
|
} |
9268
|
|
|
|
|
|
|
|
9269
|
|
|
|
|
|
|
_out: {} |
9270
|
|
|
|
|
|
|
} |
9271
|
|
|
|
|
|
|
|
9272
|
|
|
|
|
|
|
(void)act; // Suppress unused variable warning |
9273
|
|
|
|
|
|
|
|
9274
|
2
|
|
|
|
|
|
return !tokens.empty(); |
9275
|
|
|
|
|
|
|
} |
9276
|
|
|
|
|
|
|
|
9277
|
|
|
|
|
|
|
///////// |
9278
|
|
|
|
|
|
|
// File: tokenizer/ragel_tokenizer.cpp |
9279
|
|
|
|
|
|
|
///////// |
9280
|
|
|
|
|
|
|
|
9281
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
9282
|
|
|
|
|
|
|
// |
9283
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
9284
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
9285
|
|
|
|
|
|
|
// |
9286
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
9287
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
9288
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
9289
|
|
|
|
|
|
|
|
9290
|
|
|
|
|
|
|
static const char _ragel_url_email_cond_offsets[] = { |
9291
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
9292
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 1, 1, 1, |
9293
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
9294
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
9295
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
9296
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
9297
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
9298
|
|
|
|
|
|
|
1, 1, 1, 2, 3, 3, 4, 5, |
9299
|
|
|
|
|
|
|
6, 7, 8, 9, 10, 11, 12, 13, |
9300
|
|
|
|
|
|
|
14, 15, 16 |
9301
|
|
|
|
|
|
|
}; |
9302
|
|
|
|
|
|
|
|
9303
|
|
|
|
|
|
|
static const char _ragel_url_email_cond_lengths[] = { |
9304
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
9305
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 0, 0, 0, |
9306
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
9307
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
9308
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
9309
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
9310
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
9311
|
|
|
|
|
|
|
0, 0, 1, 1, 0, 1, 1, 1, |
9312
|
|
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, |
9313
|
|
|
|
|
|
|
1, 1, 1 |
9314
|
|
|
|
|
|
|
}; |
9315
|
|
|
|
|
|
|
|
9316
|
|
|
|
|
|
|
static const short _ragel_url_email_cond_keys[] = { |
9317
|
|
|
|
|
|
|
41u, 41u, 47u, 47u, 47u, 47u, 41u, 41u, |
9318
|
|
|
|
|
|
|
47u, 47u, 47u, 47u, 47u, 47u, 47u, 47u, |
9319
|
|
|
|
|
|
|
47u, 47u, 47u, 47u, 47u, 47u, 47u, 47u, |
9320
|
|
|
|
|
|
|
47u, 47u, 47u, 47u, 47u, 47u, 47u, 47u, |
9321
|
|
|
|
|
|
|
47u, 47u, 0 |
9322
|
|
|
|
|
|
|
}; |
9323
|
|
|
|
|
|
|
|
9324
|
|
|
|
|
|
|
static const char _ragel_url_email_cond_spaces[] = { |
9325
|
|
|
|
|
|
|
1, 0, 0, 1, 0, 0, 0, 0, |
9326
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
9327
|
|
|
|
|
|
|
0, 0 |
9328
|
|
|
|
|
|
|
}; |
9329
|
|
|
|
|
|
|
|
9330
|
|
|
|
|
|
|
static const short _ragel_url_email_key_offsets[] = { |
9331
|
|
|
|
|
|
|
0, 0, 15, 29, 41, 54, 63, 71, |
9332
|
|
|
|
|
|
|
78, 86, 92, 100, 117, 145, 154, 162, |
9333
|
|
|
|
|
|
|
171, 179, 188, 196, 204, 215, 225, 233, |
9334
|
|
|
|
|
|
|
241, 252, 262, 270, 278, 289, 299, 315, |
9335
|
|
|
|
|
|
|
330, 346, 360, 376, 393, 409, 426, 442, |
9336
|
|
|
|
|
|
|
459, 475, 491, 510, 528, 544, 560, 579, |
9337
|
|
|
|
|
|
|
597, 613, 629, 648, 666, 682, 698, 714, |
9338
|
|
|
|
|
|
|
725, 726, 741, 752, 756, 773, 801, 812, |
9339
|
|
|
|
|
|
|
823, 834, 848, 861, 879, 893, 908, 926, |
9340
|
|
|
|
|
|
|
944, 962, 983 |
9341
|
|
|
|
|
|
|
}; |
9342
|
|
|
|
|
|
|
|
9343
|
|
|
|
|
|
|
static const short _ragel_url_email_trans_keys[] = { |
9344
|
|
|
|
|
|
|
33u, 48u, 49u, 50u, 95u, 36u, 37u, 39u, |
9345
|
|
|
|
|
|
|
46u, 51u, 57u, 65u, 90u, 97u, 122u, 33u, |
9346
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 46u, 48u, |
9347
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 95u, 36u, |
9348
|
|
|
|
|
|
|
37u, 39u, 46u, 48u, 57u, 65u, 90u, 97u, |
9349
|
|
|
|
|
|
|
122u, 33u, 64u, 95u, 36u, 37u, 39u, 46u, |
9350
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 48u, 49u, |
9351
|
|
|
|
|
|
|
50u, 51u, 57u, 65u, 90u, 97u, 122u, 45u, |
9352
|
|
|
|
|
|
|
46u, 48u, 57u, 65u, 90u, 97u, 122u, 45u, |
9353
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 45u, 46u, |
9354
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 48u, 57u, |
9355
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 45u, 46u, 48u, 57u, |
9356
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 33u, 39u, 41u, 61u, |
9357
|
|
|
|
|
|
|
95u, 36u, 47u, 48u, 57u, 58u, 59u, 63u, |
9358
|
|
|
|
|
|
|
64u, 65u, 90u, 97u, 122u, 33u, 39u, 40u, |
9359
|
|
|
|
|
|
|
44u, 46u, 61u, 63u, 95u, 129u, 131u, 135u, |
9360
|
|
|
|
|
|
|
151u, 809u, 1065u, 36u, 38u, 42u, 57u, 58u, |
9361
|
|
|
|
|
|
|
59u, 64u, 90u, 97u, 122u, 142u, 143u, 155u, |
9362
|
|
|
|
|
|
|
159u, 48u, 49u, 50u, 51u, 57u, 65u, 90u, |
9363
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 48u, 57u, 65u, 90u, |
9364
|
|
|
|
|
|
|
97u, 122u, 48u, 49u, 50u, 51u, 57u, 65u, |
9365
|
|
|
|
|
|
|
90u, 97u, 122u, 45u, 46u, 48u, 57u, 65u, |
9366
|
|
|
|
|
|
|
90u, 97u, 122u, 48u, 49u, 50u, 51u, 57u, |
9367
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 45u, 46u, 48u, 57u, |
9368
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 45u, 46u, 48u, 57u, |
9369
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 45u, 46u, 53u, 48u, |
9370
|
|
|
|
|
|
|
52u, 54u, 57u, 65u, 90u, 97u, 122u, 45u, |
9371
|
|
|
|
|
|
|
46u, 48u, 53u, 54u, 57u, 65u, 90u, 97u, |
9372
|
|
|
|
|
|
|
122u, 45u, 46u, 48u, 57u, 65u, 90u, 97u, |
9373
|
|
|
|
|
|
|
122u, 45u, 46u, 48u, 57u, 65u, 90u, 97u, |
9374
|
|
|
|
|
|
|
122u, 45u, 46u, 53u, 48u, 52u, 54u, 57u, |
9375
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 45u, 46u, 48u, 53u, |
9376
|
|
|
|
|
|
|
54u, 57u, 65u, 90u, 97u, 122u, 45u, 46u, |
9377
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 45u, 46u, |
9378
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 45u, 46u, |
9379
|
|
|
|
|
|
|
53u, 48u, 52u, 54u, 57u, 65u, 90u, 97u, |
9380
|
|
|
|
|
|
|
122u, 45u, 46u, 48u, 53u, 54u, 57u, 65u, |
9381
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
9382
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 57u, 65u, |
9383
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 58u, 64u, 95u, |
9384
|
|
|
|
|
|
|
36u, 37u, 39u, 46u, 48u, 57u, 65u, 90u, |
9385
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
9386
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
9387
|
|
|
|
|
|
|
97u, 122u, 33u, 58u, 64u, 95u, 36u, 37u, |
9388
|
|
|
|
|
|
|
39u, 46u, 48u, 57u, 65u, 90u, 97u, 122u, |
9389
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
9390
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
9391
|
|
|
|
|
|
|
33u, 48u, 49u, 50u, 58u, 64u, 95u, 36u, |
9392
|
|
|
|
|
|
|
37u, 39u, 46u, 51u, 57u, 65u, 90u, 97u, |
9393
|
|
|
|
|
|
|
122u, 33u, 45u, 46u, 58u, 64u, 95u, 36u, |
9394
|
|
|
|
|
|
|
37u, 39u, 44u, 48u, 57u, 65u, 90u, 97u, |
9395
|
|
|
|
|
|
|
122u, 33u, 48u, 49u, 50u, 58u, 64u, 95u, |
9396
|
|
|
|
|
|
|
36u, 37u, 39u, 46u, 51u, 57u, 65u, 90u, |
9397
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
9398
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
9399
|
|
|
|
|
|
|
97u, 122u, 33u, 48u, 49u, 50u, 58u, 64u, |
9400
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 46u, 51u, 57u, 65u, |
9401
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
9402
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 57u, 65u, |
9403
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
9404
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 57u, 65u, |
9405
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 53u, 58u, |
9406
|
|
|
|
|
|
|
64u, 95u, 36u, 37u, 39u, 44u, 48u, 52u, |
9407
|
|
|
|
|
|
|
54u, 57u, 65u, 90u, 97u, 122u, 33u, 45u, |
9408
|
|
|
|
|
|
|
46u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
9409
|
|
|
|
|
|
|
48u, 53u, 54u, 57u, 65u, 90u, 97u, 122u, |
9410
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
9411
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
9412
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
9413
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
9414
|
|
|
|
|
|
|
33u, 45u, 46u, 53u, 58u, 64u, 95u, 36u, |
9415
|
|
|
|
|
|
|
37u, 39u, 44u, 48u, 52u, 54u, 57u, 65u, |
9416
|
|
|
|
|
|
|
90u, 97u, 122u, 33u, 45u, 46u, 58u, 64u, |
9417
|
|
|
|
|
|
|
95u, 36u, 37u, 39u, 44u, 48u, 53u, 54u, |
9418
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
9419
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 44u, 48u, |
9420
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
9421
|
|
|
|
|
|
|
58u, 64u, 95u, 36u, 37u, 39u, 44u, 48u, |
9422
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
9423
|
|
|
|
|
|
|
53u, 58u, 64u, 95u, 36u, 37u, 39u, 44u, |
9424
|
|
|
|
|
|
|
48u, 52u, 54u, 57u, 65u, 90u, 97u, 122u, |
9425
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 36u, 37u, |
9426
|
|
|
|
|
|
|
39u, 44u, 48u, 53u, 54u, 57u, 65u, 90u, |
9427
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
9428
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
9429
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
9430
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
9431
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 58u, 64u, 95u, |
9432
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
9433
|
|
|
|
|
|
|
97u, 122u, 33u, 47u, 95u, 36u, 37u, 39u, |
9434
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 47u, 33u, 48u, |
9435
|
|
|
|
|
|
|
49u, 50u, 95u, 36u, 37u, 39u, 46u, 51u, |
9436
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 45u, 46u, 58u, |
9437
|
|
|
|
|
|
|
303u, 559u, 48u, 57u, 65u, 90u, 97u, 122u, |
9438
|
|
|
|
|
|
|
303u, 559u, 48u, 57u, 33u, 39u, 41u, 61u, |
9439
|
|
|
|
|
|
|
95u, 36u, 47u, 48u, 57u, 58u, 59u, 63u, |
9440
|
|
|
|
|
|
|
64u, 65u, 90u, 97u, 122u, 33u, 39u, 40u, |
9441
|
|
|
|
|
|
|
44u, 46u, 61u, 63u, 95u, 129u, 131u, 135u, |
9442
|
|
|
|
|
|
|
151u, 809u, 1065u, 36u, 38u, 42u, 57u, 58u, |
9443
|
|
|
|
|
|
|
59u, 64u, 90u, 97u, 122u, 142u, 143u, 155u, |
9444
|
|
|
|
|
|
|
159u, 45u, 46u, 58u, 303u, 559u, 48u, 57u, |
9445
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 45u, 46u, 58u, 303u, |
9446
|
|
|
|
|
|
|
559u, 48u, 57u, 65u, 90u, 97u, 122u, 45u, |
9447
|
|
|
|
|
|
|
46u, 58u, 303u, 559u, 48u, 57u, 65u, 90u, |
9448
|
|
|
|
|
|
|
97u, 122u, 45u, 46u, 53u, 58u, 303u, 559u, |
9449
|
|
|
|
|
|
|
48u, 52u, 54u, 57u, 65u, 90u, 97u, 122u, |
9450
|
|
|
|
|
|
|
45u, 46u, 58u, 303u, 559u, 48u, 53u, 54u, |
9451
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 45u, 46u, |
9452
|
|
|
|
|
|
|
58u, 64u, 95u, 303u, 559u, 36u, 37u, 39u, |
9453
|
|
|
|
|
|
|
44u, 48u, 57u, 65u, 90u, 97u, 122u, 33u, |
9454
|
|
|
|
|
|
|
95u, 303u, 559u, 36u, 37u, 39u, 46u, 48u, |
9455
|
|
|
|
|
|
|
57u, 65u, 90u, 97u, 122u, 33u, 64u, 95u, |
9456
|
|
|
|
|
|
|
303u, 559u, 36u, 37u, 39u, 46u, 48u, 57u, |
9457
|
|
|
|
|
|
|
65u, 90u, 97u, 122u, 33u, 45u, 46u, 58u, |
9458
|
|
|
|
|
|
|
64u, 95u, 303u, 559u, 36u, 37u, 39u, 44u, |
9459
|
|
|
|
|
|
|
48u, 57u, 65u, 90u, 97u, 122u, 33u, 45u, |
9460
|
|
|
|
|
|
|
46u, 58u, 64u, 95u, 303u, 559u, 36u, 37u, |
9461
|
|
|
|
|
|
|
39u, 44u, 48u, 57u, 65u, 90u, 97u, 122u, |
9462
|
|
|
|
|
|
|
33u, 45u, 46u, 58u, 64u, 95u, 303u, 559u, |
9463
|
|
|
|
|
|
|
36u, 37u, 39u, 44u, 48u, 57u, 65u, 90u, |
9464
|
|
|
|
|
|
|
97u, 122u, 33u, 45u, 46u, 53u, 58u, 64u, |
9465
|
|
|
|
|
|
|
95u, 303u, 559u, 36u, 37u, 39u, 44u, 48u, |
9466
|
|
|
|
|
|
|
52u, 54u, 57u, 65u, 90u, 97u, 122u, 33u, |
9467
|
|
|
|
|
|
|
45u, 46u, 58u, 64u, 95u, 303u, 559u, 36u, |
9468
|
|
|
|
|
|
|
37u, 39u, 44u, 48u, 53u, 54u, 57u, 65u, |
9469
|
|
|
|
|
|
|
90u, 97u, 122u, 0 |
9470
|
|
|
|
|
|
|
}; |
9471
|
|
|
|
|
|
|
|
9472
|
|
|
|
|
|
|
static const char _ragel_url_email_single_lengths[] = { |
9473
|
|
|
|
|
|
|
0, 5, 4, 2, 3, 3, 2, 1, |
9474
|
|
|
|
|
|
|
2, 0, 2, 5, 14, 3, 2, 3, |
9475
|
|
|
|
|
|
|
2, 3, 2, 2, 3, 2, 2, 2, |
9476
|
|
|
|
|
|
|
3, 2, 2, 2, 3, 2, 6, 5, |
9477
|
|
|
|
|
|
|
6, 4, 6, 7, 6, 7, 6, 7, |
9478
|
|
|
|
|
|
|
6, 6, 7, 6, 6, 6, 7, 6, |
9479
|
|
|
|
|
|
|
6, 6, 7, 6, 6, 6, 6, 3, |
9480
|
|
|
|
|
|
|
1, 5, 5, 2, 5, 14, 5, 5, |
9481
|
|
|
|
|
|
|
5, 6, 5, 8, 4, 5, 8, 8, |
9482
|
|
|
|
|
|
|
8, 9, 8 |
9483
|
|
|
|
|
|
|
}; |
9484
|
|
|
|
|
|
|
|
9485
|
|
|
|
|
|
|
static const char _ragel_url_email_range_lengths[] = { |
9486
|
|
|
|
|
|
|
0, 5, 5, 5, 5, 3, 3, 3, |
9487
|
|
|
|
|
|
|
3, 3, 3, 6, 7, 3, 3, 3, |
9488
|
|
|
|
|
|
|
3, 3, 3, 3, 4, 4, 3, 3, |
9489
|
|
|
|
|
|
|
4, 4, 3, 3, 4, 4, 5, 5, |
9490
|
|
|
|
|
|
|
5, 5, 5, 5, 5, 5, 5, 5, |
9491
|
|
|
|
|
|
|
5, 5, 6, 6, 5, 5, 6, 6, |
9492
|
|
|
|
|
|
|
5, 5, 6, 6, 5, 5, 5, 4, |
9493
|
|
|
|
|
|
|
0, 5, 3, 1, 6, 7, 3, 3, |
9494
|
|
|
|
|
|
|
3, 4, 4, 5, 5, 5, 5, 5, |
9495
|
|
|
|
|
|
|
5, 6, 6 |
9496
|
|
|
|
|
|
|
}; |
9497
|
|
|
|
|
|
|
|
9498
|
|
|
|
|
|
|
static const short _ragel_url_email_index_offsets[] = { |
9499
|
|
|
|
|
|
|
0, 0, 11, 21, 29, 38, 45, 51, |
9500
|
|
|
|
|
|
|
56, 62, 66, 72, 84, 106, 113, 119, |
9501
|
|
|
|
|
|
|
126, 132, 139, 145, 151, 159, 166, 172, |
9502
|
|
|
|
|
|
|
178, 186, 193, 199, 205, 213, 220, 232, |
9503
|
|
|
|
|
|
|
243, 255, 265, 277, 290, 302, 315, 327, |
9504
|
|
|
|
|
|
|
340, 352, 364, 378, 391, 403, 415, 429, |
9505
|
|
|
|
|
|
|
442, 454, 466, 480, 493, 505, 517, 529, |
9506
|
|
|
|
|
|
|
537, 539, 550, 559, 563, 575, 597, 606, |
9507
|
|
|
|
|
|
|
615, 624, 635, 645, 659, 669, 680, 694, |
9508
|
|
|
|
|
|
|
708, 722, 738 |
9509
|
|
|
|
|
|
|
}; |
9510
|
|
|
|
|
|
|
|
9511
|
|
|
|
|
|
|
static const char _ragel_url_email_indicies[] = { |
9512
|
|
|
|
|
|
|
0, 2, 3, 4, 0, 0, 0, 5, |
9513
|
|
|
|
|
|
|
6, 6, 1, 0, 7, 8, 0, 0, |
9514
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 9, 9, 9, |
9515
|
|
|
|
|
|
|
9, 9, 9, 9, 1, 9, 8, 9, |
9516
|
|
|
|
|
|
|
9, 9, 9, 9, 9, 1, 10, 11, |
9517
|
|
|
|
|
|
|
12, 13, 14, 14, 1, 15, 16, 14, |
9518
|
|
|
|
|
|
|
14, 14, 1, 15, 14, 14, 14, 1, |
9519
|
|
|
|
|
|
|
15, 17, 14, 14, 14, 1, 14, 18, |
9520
|
|
|
|
|
|
|
18, 1, 15, 17, 14, 19, 19, 1, |
9521
|
|
|
|
|
|
|
20, 21, 21, 20, 20, 20, 21, 20, |
9522
|
|
|
|
|
|
|
20, 21, 21, 1, 22, 22, 24, 22, |
9523
|
|
|
|
|
|
|
22, 23, 22, 23, 23, 23, 23, 23, |
9524
|
|
|
|
|
|
|
25, 26, 23, 23, 22, 23, 23, 23, |
9525
|
|
|
|
|
|
|
23, 1, 27, 28, 29, 30, 18, 18, |
9526
|
|
|
|
|
|
|
1, 15, 31, 14, 14, 14, 1, 32, |
9527
|
|
|
|
|
|
|
33, 34, 35, 18, 18, 1, 15, 36, |
9528
|
|
|
|
|
|
|
14, 14, 14, 1, 37, 38, 39, 40, |
9529
|
|
|
|
|
|
|
18, 18, 1, 15, 36, 35, 14, 14, |
9530
|
|
|
|
|
|
|
1, 15, 36, 32, 14, 14, 1, 15, |
9531
|
|
|
|
|
|
|
36, 41, 35, 32, 14, 14, 1, 15, |
9532
|
|
|
|
|
|
|
36, 32, 14, 14, 14, 1, 15, 31, |
9533
|
|
|
|
|
|
|
30, 14, 14, 1, 15, 31, 27, 14, |
9534
|
|
|
|
|
|
|
14, 1, 15, 31, 42, 30, 27, 14, |
9535
|
|
|
|
|
|
|
14, 1, 15, 31, 27, 14, 14, 14, |
9536
|
|
|
|
|
|
|
1, 15, 16, 13, 14, 14, 1, 15, |
9537
|
|
|
|
|
|
|
16, 10, 14, 14, 1, 15, 16, 43, |
9538
|
|
|
|
|
|
|
13, 10, 14, 14, 1, 15, 16, 10, |
9539
|
|
|
|
|
|
|
14, 14, 14, 1, 0, 44, 45, 7, |
9540
|
|
|
|
|
|
|
8, 0, 0, 0, 46, 46, 46, 1, |
9541
|
|
|
|
|
|
|
0, 44, 7, 8, 0, 0, 0, 46, |
9542
|
|
|
|
|
|
|
46, 46, 1, 0, 44, 47, 7, 8, |
9543
|
|
|
|
|
|
|
0, 0, 0, 46, 46, 46, 1, 0, |
9544
|
|
|
|
|
|
|
7, 8, 0, 0, 0, 46, 48, 48, |
9545
|
|
|
|
|
|
|
1, 0, 44, 47, 7, 8, 0, 0, |
9546
|
|
|
|
|
|
|
0, 46, 49, 49, 1, 0, 50, 51, |
9547
|
|
|
|
|
|
|
52, 7, 8, 0, 0, 0, 53, 48, |
9548
|
|
|
|
|
|
|
48, 1, 0, 44, 54, 7, 8, 0, |
9549
|
|
|
|
|
|
|
0, 0, 46, 46, 46, 1, 0, 55, |
9550
|
|
|
|
|
|
|
56, 57, 7, 8, 0, 0, 0, 58, |
9551
|
|
|
|
|
|
|
48, 48, 1, 0, 44, 59, 7, 8, |
9552
|
|
|
|
|
|
|
0, 0, 0, 46, 46, 46, 1, 0, |
9553
|
|
|
|
|
|
|
60, 61, 62, 7, 8, 0, 0, 0, |
9554
|
|
|
|
|
|
|
63, 48, 48, 1, 0, 44, 59, 7, |
9555
|
|
|
|
|
|
|
8, 0, 0, 0, 58, 46, 46, 1, |
9556
|
|
|
|
|
|
|
0, 44, 59, 7, 8, 0, 0, 0, |
9557
|
|
|
|
|
|
|
55, 46, 46, 1, 0, 44, 59, 64, |
9558
|
|
|
|
|
|
|
7, 8, 0, 0, 0, 58, 55, 46, |
9559
|
|
|
|
|
|
|
46, 1, 0, 44, 59, 7, 8, 0, |
9560
|
|
|
|
|
|
|
0, 0, 55, 46, 46, 46, 1, 0, |
9561
|
|
|
|
|
|
|
44, 54, 7, 8, 0, 0, 0, 53, |
9562
|
|
|
|
|
|
|
46, 46, 1, 0, 44, 54, 7, 8, |
9563
|
|
|
|
|
|
|
0, 0, 0, 50, 46, 46, 1, 0, |
9564
|
|
|
|
|
|
|
44, 54, 65, 7, 8, 0, 0, 0, |
9565
|
|
|
|
|
|
|
53, 50, 46, 46, 1, 0, 44, 54, |
9566
|
|
|
|
|
|
|
7, 8, 0, 0, 0, 50, 46, 46, |
9567
|
|
|
|
|
|
|
46, 1, 0, 44, 45, 7, 8, 0, |
9568
|
|
|
|
|
|
|
0, 0, 5, 46, 46, 1, 0, 44, |
9569
|
|
|
|
|
|
|
45, 7, 8, 0, 0, 0, 2, 46, |
9570
|
|
|
|
|
|
|
46, 1, 0, 44, 45, 66, 7, 8, |
9571
|
|
|
|
|
|
|
0, 0, 0, 5, 2, 46, 46, 1, |
9572
|
|
|
|
|
|
|
0, 44, 45, 7, 8, 0, 0, 0, |
9573
|
|
|
|
|
|
|
2, 46, 46, 46, 1, 0, 44, 47, |
9574
|
|
|
|
|
|
|
7, 8, 0, 0, 0, 46, 67, 67, |
9575
|
|
|
|
|
|
|
1, 0, 44, 47, 7, 8, 0, 0, |
9576
|
|
|
|
|
|
|
0, 46, 68, 68, 1, 0, 44, 47, |
9577
|
|
|
|
|
|
|
69, 8, 0, 0, 0, 46, 68, 68, |
9578
|
|
|
|
|
|
|
1, 9, 70, 9, 9, 9, 9, 9, |
9579
|
|
|
|
|
|
|
1, 71, 1, 0, 2, 3, 4, 0, |
9580
|
|
|
|
|
|
|
0, 0, 5, 46, 46, 1, 15, 17, |
9581
|
|
|
|
|
|
|
72, 21, 23, 14, 19, 19, 1, 21, |
9582
|
|
|
|
|
|
|
23, 72, 1, 20, 21, 21, 20, 20, |
9583
|
|
|
|
|
|
|
20, 21, 20, 20, 21, 21, 1, 22, |
9584
|
|
|
|
|
|
|
22, 24, 22, 22, 23, 22, 23, 23, |
9585
|
|
|
|
|
|
|
23, 23, 23, 25, 26, 23, 23, 22, |
9586
|
|
|
|
|
|
|
23, 23, 23, 23, 1, 15, 17, 72, |
9587
|
|
|
|
|
|
|
21, 23, 14, 14, 14, 1, 15, 17, |
9588
|
|
|
|
|
|
|
72, 21, 23, 40, 14, 14, 1, 15, |
9589
|
|
|
|
|
|
|
17, 72, 21, 23, 37, 14, 14, 1, |
9590
|
|
|
|
|
|
|
15, 17, 73, 72, 21, 23, 40, 37, |
9591
|
|
|
|
|
|
|
14, 14, 1, 15, 17, 72, 21, 23, |
9592
|
|
|
|
|
|
|
37, 14, 14, 14, 1, 0, 44, 47, |
9593
|
|
|
|
|
|
|
74, 8, 0, 21, 23, 0, 0, 46, |
9594
|
|
|
|
|
|
|
49, 49, 1, 9, 9, 21, 23, 9, |
9595
|
|
|
|
|
|
|
9, 75, 9, 9, 1, 9, 8, 9, |
9596
|
|
|
|
|
|
|
21, 23, 9, 9, 75, 9, 9, 1, |
9597
|
|
|
|
|
|
|
0, 44, 47, 74, 8, 0, 21, 23, |
9598
|
|
|
|
|
|
|
0, 0, 46, 46, 46, 1, 0, 44, |
9599
|
|
|
|
|
|
|
47, 74, 8, 0, 21, 23, 0, 0, |
9600
|
|
|
|
|
|
|
63, 46, 46, 1, 0, 44, 47, 74, |
9601
|
|
|
|
|
|
|
8, 0, 21, 23, 0, 0, 60, 46, |
9602
|
|
|
|
|
|
|
46, 1, 0, 44, 47, 76, 74, 8, |
9603
|
|
|
|
|
|
|
0, 21, 23, 0, 0, 63, 60, 46, |
9604
|
|
|
|
|
|
|
46, 1, 0, 44, 47, 74, 8, 0, |
9605
|
|
|
|
|
|
|
21, 23, 0, 0, 60, 46, 46, 46, |
9606
|
|
|
|
|
|
|
1, 0 |
9607
|
|
|
|
|
|
|
}; |
9608
|
|
|
|
|
|
|
|
9609
|
|
|
|
|
|
|
static const char _ragel_url_email_trans_targs[] = { |
9610
|
|
|
|
|
|
|
2, 0, 30, 48, 50, 49, 52, 3, |
9611
|
|
|
|
|
|
|
5, 4, 6, 26, 28, 27, 8, 7, |
9612
|
|
|
|
|
|
|
13, 9, 10, 58, 11, 60, 12, 61, |
9613
|
|
|
|
|
|
|
61, 12, 61, 14, 22, 24, 23, 15, |
9614
|
|
|
|
|
|
|
16, 18, 20, 19, 17, 62, 63, 65, |
9615
|
|
|
|
|
|
|
64, 21, 25, 29, 31, 35, 32, 33, |
9616
|
|
|
|
|
|
|
34, 67, 36, 44, 46, 45, 37, 38, |
9617
|
|
|
|
|
|
|
40, 42, 41, 39, 70, 71, 73, 72, |
9618
|
|
|
|
|
|
|
43, 47, 51, 53, 54, 55, 56, 57, |
9619
|
|
|
|
|
|
|
59, 66, 68, 69, 74 |
9620
|
|
|
|
|
|
|
}; |
9621
|
|
|
|
|
|
|
|
9622
|
|
|
|
|
|
|
static const char _ragel_url_email_trans_actions[] = { |
9623
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
9624
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
9625
|
|
|
|
|
|
|
0, 0, 0, 1, 0, 1, 0, 1, |
9626
|
|
|
|
|
|
|
2, 3, 4, 0, 0, 0, 0, 0, |
9627
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 1, 1, 1, |
9628
|
|
|
|
|
|
|
1, 0, 0, 0, 0, 0, 0, 0, |
9629
|
|
|
|
|
|
|
0, 1, 0, 0, 0, 0, 0, 0, |
9630
|
|
|
|
|
|
|
0, 0, 0, 0, 1, 1, 1, 1, |
9631
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, |
9632
|
|
|
|
|
|
|
1, 1, 1, 1, 1 |
9633
|
|
|
|
|
|
|
}; |
9634
|
|
|
|
|
|
|
|
9635
|
|
|
|
|
|
|
static const int ragel_url_email_start = 1; |
9636
|
|
|
|
|
|
|
|
9637
|
4
|
|
|
|
|
|
vector ragel_tokenizer::ragel_map; |
9638
|
|
|
|
|
|
|
atomic_flag ragel_tokenizer::ragel_map_flag = ATOMIC_FLAG_INIT; |
9639
|
|
|
|
|
|
|
|
9640
|
6
|
|
|
|
|
|
ragel_tokenizer::ragel_tokenizer(unsigned url_email_tokenizer) : unicode_tokenizer(url_email_tokenizer) { |
9641
|
3
|
50
|
|
|
|
|
initialize_ragel_map(); |
9642
|
3
|
|
|
|
|
|
} |
9643
|
|
|
|
|
|
|
|
9644
|
12
|
|
|
|
|
|
void ragel_tokenizer::initialize_ragel_map() { |
9645
|
6
|
50
|
|
|
|
|
while (ragel_map_flag.test_and_set()) {} |
9646
|
6
|
100
|
|
|
|
|
if (ragel_map.empty()) { |
9647
|
258
|
100
|
|
|
|
|
for (uint8_t ascii = 0; ascii < 128; ascii++) |
9648
|
256
|
|
|
|
|
|
ragel_map.push_back(ascii); |
9649
|
|
|
|
|
|
|
|
9650
|
2
|
|
|
|
|
|
ragel_map_add(U'\u2026', 160); // horizontal ellipsis (TRIPLE DOT) |
9651
|
2
|
|
|
|
|
|
ragel_map_add(U'\u2019', 161); // right single quotation mark |
9652
|
2
|
|
|
|
|
|
ragel_map_add(U'\u2018', 162); // left single quotation mark |
9653
|
2
|
|
|
|
|
|
ragel_map_add(U'\u2010', 163); // hyphen |
9654
|
|
|
|
|
|
|
} |
9655
|
|
|
|
|
|
|
ragel_map_flag.clear(); |
9656
|
6
|
|
|
|
|
|
} |
9657
|
|
|
|
|
|
|
|
9658
|
8
|
|
|
|
|
|
void ragel_tokenizer::ragel_map_add(char32_t chr, uint8_t mapping) { |
9659
|
8
|
100
|
|
|
|
|
if (chr >= ragel_map.size()) |
9660
|
2
|
|
|
|
|
|
ragel_map.resize(chr + 1, 128); |
9661
|
8
|
|
|
|
|
|
ragel_map[chr] = mapping; |
9662
|
8
|
|
|
|
|
|
} |
9663
|
|
|
|
|
|
|
|
9664
|
48
|
|
|
|
|
|
bool ragel_tokenizer::ragel_url_email(unsigned version, const vector& chars, size_t& current, vector& tokens) { |
9665
|
|
|
|
|
|
|
int cs; |
9666
|
|
|
|
|
|
|
|
9667
|
48
|
|
|
|
|
|
size_t start = current, end = current, parens = 0; |
9668
|
|
|
|
|
|
|
|
9669
|
|
|
|
|
|
|
{ |
9670
|
|
|
|
|
|
|
cs = ragel_url_email_start; |
9671
|
|
|
|
|
|
|
} |
9672
|
|
|
|
|
|
|
|
9673
|
|
|
|
|
|
|
{ |
9674
|
|
|
|
|
|
|
int _klen; |
9675
|
|
|
|
|
|
|
const short *_keys; |
9676
|
|
|
|
|
|
|
int _trans; |
9677
|
|
|
|
|
|
|
short _widec; |
9678
|
|
|
|
|
|
|
|
9679
|
48
|
50
|
|
|
|
|
if ( ( current) == ( (chars.size() - 1)) ) |
9680
|
|
|
|
|
|
|
goto _test_eof; |
9681
|
|
|
|
|
|
|
if ( cs == 0 ) |
9682
|
|
|
|
|
|
|
goto _out; |
9683
|
|
|
|
|
|
|
_resume: |
9684
|
276
|
|
|
|
|
|
_widec = ( ragel_char(chars[current])); |
9685
|
138
|
|
|
|
|
|
_klen = _ragel_url_email_cond_lengths[cs]; |
9686
|
138
|
|
|
|
|
|
_keys = _ragel_url_email_cond_keys + (_ragel_url_email_cond_offsets[cs]*2); |
9687
|
138
|
50
|
|
|
|
|
if ( _klen > 0 ) { |
9688
|
|
|
|
|
|
|
const short *_lower = _keys; |
9689
|
|
|
|
|
|
|
const short *_mid; |
9690
|
0
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
9691
|
|
|
|
|
|
|
while (1) { |
9692
|
0
|
0
|
|
|
|
|
if ( _upper < _lower ) |
9693
|
|
|
|
|
|
|
break; |
9694
|
|
|
|
|
|
|
|
9695
|
0
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
9696
|
0
|
0
|
|
|
|
|
if ( _widec < _mid[0] ) |
9697
|
0
|
|
|
|
|
|
_upper = _mid - 2; |
9698
|
0
|
0
|
|
|
|
|
else if ( _widec > _mid[1] ) |
9699
|
0
|
|
|
|
|
|
_lower = _mid + 2; |
9700
|
|
|
|
|
|
|
else { |
9701
|
0
|
|
|
|
|
|
switch ( _ragel_url_email_cond_spaces[_ragel_url_email_cond_offsets[cs] + ((_mid - _keys)>>1)] ) { |
9702
|
|
|
|
|
|
|
case 0: { |
9703
|
0
|
|
|
|
|
|
_widec = (short)(256u + (( ragel_char(chars[current])) - 0u)); |
9704
|
0
|
0
|
|
|
|
|
if ( |
9705
|
0
|
|
|
|
|
|
version >= 2 ) _widec += 256; |
9706
|
|
|
|
|
|
|
break; |
9707
|
|
|
|
|
|
|
} |
9708
|
|
|
|
|
|
|
case 1: { |
9709
|
0
|
|
|
|
|
|
_widec = (short)(768u + (( ragel_char(chars[current])) - 0u)); |
9710
|
0
|
0
|
|
|
|
|
if ( |
9711
|
0
|
|
|
|
|
|
parens ) _widec += 256; |
9712
|
|
|
|
|
|
|
break; |
9713
|
|
|
|
|
|
|
} |
9714
|
|
|
|
|
|
|
} |
9715
|
|
|
|
|
|
|
break; |
9716
|
|
|
|
|
|
|
} |
9717
|
|
|
|
|
|
|
} |
9718
|
|
|
|
|
|
|
} |
9719
|
|
|
|
|
|
|
|
9720
|
138
|
|
|
|
|
|
_keys = _ragel_url_email_trans_keys + _ragel_url_email_key_offsets[cs]; |
9721
|
138
|
|
|
|
|
|
_trans = _ragel_url_email_index_offsets[cs]; |
9722
|
|
|
|
|
|
|
|
9723
|
138
|
|
|
|
|
|
_klen = _ragel_url_email_single_lengths[cs]; |
9724
|
138
|
50
|
|
|
|
|
if ( _klen > 0 ) { |
9725
|
|
|
|
|
|
|
const short *_lower = _keys; |
9726
|
|
|
|
|
|
|
const short *_mid; |
9727
|
482
|
|
|
|
|
|
const short *_upper = _keys + _klen - 1; |
9728
|
|
|
|
|
|
|
while (1) { |
9729
|
482
|
100
|
|
|
|
|
if ( _upper < _lower ) |
9730
|
|
|
|
|
|
|
break; |
9731
|
|
|
|
|
|
|
|
9732
|
355
|
|
|
|
|
|
_mid = _lower + ((_upper-_lower) >> 1); |
9733
|
355
|
100
|
|
|
|
|
if ( _widec < *_mid ) |
9734
|
133
|
|
|
|
|
|
_upper = _mid - 1; |
9735
|
222
|
100
|
|
|
|
|
else if ( _widec > *_mid ) |
9736
|
211
|
|
|
|
|
|
_lower = _mid + 1; |
9737
|
|
|
|
|
|
|
else { |
9738
|
11
|
|
|
|
|
|
_trans += (unsigned int)(_mid - _keys); |
9739
|
11
|
|
|
|
|
|
goto _match; |
9740
|
|
|
|
|
|
|
} |
9741
|
|
|
|
|
|
|
} |
9742
|
127
|
|
|
|
|
|
_keys += _klen; |
9743
|
127
|
|
|
|
|
|
_trans += _klen; |
9744
|
|
|
|
|
|
|
} |
9745
|
|
|
|
|
|
|
|
9746
|
127
|
|
|
|
|
|
_klen = _ragel_url_email_range_lengths[cs]; |
9747
|
127
|
50
|
|
|
|
|
if ( _klen > 0 ) { |
9748
|
|
|
|
|
|
|
const short *_lower = _keys; |
9749
|
|
|
|
|
|
|
const short *_mid; |
9750
|
342
|
|
|
|
|
|
const short *_upper = _keys + (_klen<<1) - 2; |
9751
|
|
|
|
|
|
|
while (1) { |
9752
|
342
|
100
|
|
|
|
|
if ( _upper < _lower ) |
9753
|
|
|
|
|
|
|
break; |
9754
|
|
|
|
|
|
|
|
9755
|
298
|
|
|
|
|
|
_mid = _lower + (((_upper-_lower) >> 1) & ~1); |
9756
|
298
|
100
|
|
|
|
|
if ( _widec < _mid[0] ) |
9757
|
81
|
|
|
|
|
|
_upper = _mid - 2; |
9758
|
217
|
100
|
|
|
|
|
else if ( _widec > _mid[1] ) |
9759
|
134
|
|
|
|
|
|
_lower = _mid + 2; |
9760
|
|
|
|
|
|
|
else { |
9761
|
83
|
|
|
|
|
|
_trans += (unsigned int)((_mid - _keys)>>1); |
9762
|
83
|
|
|
|
|
|
goto _match; |
9763
|
|
|
|
|
|
|
} |
9764
|
|
|
|
|
|
|
} |
9765
|
44
|
|
|
|
|
|
_trans += _klen; |
9766
|
|
|
|
|
|
|
} |
9767
|
|
|
|
|
|
|
|
9768
|
|
|
|
|
|
|
_match: |
9769
|
138
|
|
|
|
|
|
_trans = _ragel_url_email_indicies[_trans]; |
9770
|
138
|
|
|
|
|
|
cs = _ragel_url_email_trans_targs[_trans]; |
9771
|
|
|
|
|
|
|
|
9772
|
138
|
50
|
|
|
|
|
if ( _ragel_url_email_trans_actions[_trans] == 0 ) |
9773
|
|
|
|
|
|
|
goto _again; |
9774
|
|
|
|
|
|
|
|
9775
|
0
|
|
|
|
|
|
switch ( _ragel_url_email_trans_actions[_trans] ) { |
9776
|
|
|
|
|
|
|
case 3: |
9777
|
0
|
|
|
|
|
|
{parens-=!!parens;} |
9778
|
0
|
|
|
|
|
|
break; |
9779
|
|
|
|
|
|
|
case 1: |
9780
|
0
|
|
|
|
|
|
{ end = current + 1; } |
9781
|
0
|
|
|
|
|
|
break; |
9782
|
|
|
|
|
|
|
case 2: |
9783
|
0
|
|
|
|
|
|
{parens++;} |
9784
|
0
|
|
|
|
|
|
{ end = current + 1; } |
9785
|
0
|
|
|
|
|
|
break; |
9786
|
|
|
|
|
|
|
case 4: |
9787
|
0
|
|
|
|
|
|
{parens-=!!parens;} |
9788
|
0
|
|
|
|
|
|
{ end = current + 1; } |
9789
|
0
|
|
|
|
|
|
break; |
9790
|
|
|
|
|
|
|
} |
9791
|
|
|
|
|
|
|
|
9792
|
|
|
|
|
|
|
_again: |
9793
|
138
|
100
|
|
|
|
|
if ( cs == 0 ) |
9794
|
|
|
|
|
|
|
goto _out; |
9795
|
94
|
100
|
|
|
|
|
if ( ++( current) != ( (chars.size() - 1)) ) |
9796
|
|
|
|
|
|
|
goto _resume; |
9797
|
|
|
|
|
|
|
_test_eof: {} |
9798
|
|
|
|
|
|
|
_out: {} |
9799
|
|
|
|
|
|
|
} |
9800
|
|
|
|
|
|
|
|
9801
|
48
|
50
|
|
|
|
|
if (end > start) { |
9802
|
0
|
|
|
|
|
|
tokens.emplace_back(start, end - start); |
9803
|
0
|
|
|
|
|
|
current = end; |
9804
|
0
|
|
|
|
|
|
return true; |
9805
|
|
|
|
|
|
|
} else { |
9806
|
48
|
|
|
|
|
|
current = start; |
9807
|
48
|
|
|
|
|
|
return false; |
9808
|
|
|
|
|
|
|
} |
9809
|
|
|
|
|
|
|
} |
9810
|
|
|
|
|
|
|
|
9811
|
|
|
|
|
|
|
///////// |
9812
|
|
|
|
|
|
|
// File: tokenizer/vertical_tokenizer.h |
9813
|
|
|
|
|
|
|
///////// |
9814
|
|
|
|
|
|
|
|
9815
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
9816
|
|
|
|
|
|
|
// |
9817
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
9818
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
9819
|
|
|
|
|
|
|
// |
9820
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
9821
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
9822
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
9823
|
|
|
|
|
|
|
|
9824
|
0
|
|
|
|
|
|
class vertical_tokenizer : public unicode_tokenizer { |
9825
|
|
|
|
|
|
|
public: |
9826
|
0
|
0
|
|
|
|
|
vertical_tokenizer() : unicode_tokenizer(0) {} |
9827
|
|
|
|
|
|
|
|
9828
|
|
|
|
|
|
|
virtual bool next_sentence(vector& tokens) override; |
9829
|
|
|
|
|
|
|
}; |
9830
|
|
|
|
|
|
|
|
9831
|
|
|
|
|
|
|
///////// |
9832
|
|
|
|
|
|
|
// File: tokenizer/tokenizer.cpp |
9833
|
|
|
|
|
|
|
///////// |
9834
|
|
|
|
|
|
|
|
9835
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
9836
|
|
|
|
|
|
|
// |
9837
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
9838
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
9839
|
|
|
|
|
|
|
// |
9840
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
9841
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
9842
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
9843
|
|
|
|
|
|
|
|
9844
|
0
|
|
|
|
|
|
tokenizer* tokenizer::new_vertical_tokenizer() { |
9845
|
0
|
|
|
|
|
|
return new vertical_tokenizer(); |
9846
|
|
|
|
|
|
|
} |
9847
|
|
|
|
|
|
|
|
9848
|
1
|
|
|
|
|
|
tokenizer* tokenizer::new_czech_tokenizer() { |
9849
|
2
|
|
|
|
|
|
return new czech_tokenizer(czech_tokenizer::CZECH, czech_tokenizer::LATEST); |
9850
|
|
|
|
|
|
|
} |
9851
|
|
|
|
|
|
|
|
9852
|
0
|
|
|
|
|
|
tokenizer* tokenizer::new_english_tokenizer() { |
9853
|
0
|
|
|
|
|
|
return new english_tokenizer(english_tokenizer::LATEST); |
9854
|
|
|
|
|
|
|
} |
9855
|
|
|
|
|
|
|
|
9856
|
0
|
|
|
|
|
|
tokenizer* tokenizer::new_generic_tokenizer() { |
9857
|
0
|
|
|
|
|
|
return new generic_tokenizer(generic_tokenizer::LATEST); |
9858
|
|
|
|
|
|
|
} |
9859
|
|
|
|
|
|
|
|
9860
|
|
|
|
|
|
|
///////// |
9861
|
|
|
|
|
|
|
// File: tokenizer/unicode_tokenizer.cpp |
9862
|
|
|
|
|
|
|
///////// |
9863
|
|
|
|
|
|
|
|
9864
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
9865
|
|
|
|
|
|
|
// |
9866
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
9867
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
9868
|
|
|
|
|
|
|
// |
9869
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
9870
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
9871
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
9872
|
|
|
|
|
|
|
|
9873
|
3
|
|
|
|
|
|
unicode_tokenizer::unicode_tokenizer(unsigned url_email_tokenizer) : url_email_tokenizer(url_email_tokenizer) { |
9874
|
3
|
50
|
|
|
|
|
ragel_tokenizer::initialize_ragel_map(); |
9875
|
|
|
|
|
|
|
|
9876
|
3
|
50
|
|
|
|
|
set_text(string_piece(nullptr, 0)); |
9877
|
3
|
|
|
|
|
|
} |
9878
|
|
|
|
|
|
|
|
9879
|
6
|
|
|
|
|
|
void unicode_tokenizer::set_text(string_piece text, bool make_copy /*= false*/) { |
9880
|
|
|
|
|
|
|
using namespace unilib; |
9881
|
|
|
|
|
|
|
|
9882
|
6
|
100
|
|
|
|
|
if (make_copy && text.str) { |
|
|
50
|
|
|
|
|
|
9883
|
3
|
|
|
|
|
|
text_buffer.assign(text.str, text.len); |
9884
|
3
|
|
|
|
|
|
text.str = text_buffer.c_str(); |
9885
|
|
|
|
|
|
|
} |
9886
|
6
|
|
|
|
|
|
current = 0; |
9887
|
|
|
|
|
|
|
|
9888
|
|
|
|
|
|
|
chars.clear(); |
9889
|
136
|
100
|
|
|
|
|
for (const char* curr_str = text.str; text.len; curr_str = text.str) |
9890
|
130
|
|
|
|
|
|
chars.emplace_back(utf8::decode(text.str, text.len), curr_str); |
9891
|
6
|
|
|
|
|
|
chars.emplace_back(0, text.str); |
9892
|
6
|
|
|
|
|
|
} |
9893
|
|
|
|
|
|
|
|
9894
|
7
|
|
|
|
|
|
bool unicode_tokenizer::next_sentence(vector* forms, vector* tokens_ptr) { |
9895
|
7
|
50
|
|
|
|
|
vector& tokens = tokens_ptr ? *tokens_ptr : tokens_buffer; |
9896
|
|
|
|
|
|
|
tokens.clear(); |
9897
|
7
|
50
|
|
|
|
|
if (forms) forms->clear(); |
9898
|
7
|
100
|
|
|
|
|
if (current >= chars.size() - 1) return false; |
9899
|
|
|
|
|
|
|
|
9900
|
4
|
|
|
|
|
|
bool result = next_sentence(tokens); |
9901
|
4
|
50
|
|
|
|
|
if (forms) |
9902
|
36
|
100
|
|
|
|
|
for (auto&& token : tokens) |
9903
|
32
|
|
|
|
|
|
forms->emplace_back(chars[token.start].str, chars[token.start + token.length].str - chars[token.start].str); |
9904
|
|
|
|
|
|
|
|
9905
|
|
|
|
|
|
|
return result; |
9906
|
|
|
|
|
|
|
} |
9907
|
|
|
|
|
|
|
|
9908
|
51
|
|
|
|
|
|
bool unicode_tokenizer::tokenize_url_email(vector& tokens) { |
9909
|
51
|
100
|
|
|
|
|
if (current >= chars.size() - 1) return false; |
9910
|
|
|
|
|
|
|
|
9911
|
48
|
50
|
|
|
|
|
return url_email_tokenizer ? ragel_tokenizer::ragel_url_email(url_email_tokenizer, chars, current, tokens) : false; |
9912
|
|
|
|
|
|
|
} |
9913
|
|
|
|
|
|
|
|
9914
|
47
|
|
|
|
|
|
bool unicode_tokenizer::emergency_sentence_split(const vector& tokens) { |
9915
|
|
|
|
|
|
|
using namespace unilib; |
9916
|
|
|
|
|
|
|
|
9917
|
|
|
|
|
|
|
// Implement emergency splitting for large sentences |
9918
|
47
|
50
|
|
|
|
|
return tokens.size() >= 500 || |
9919
|
94
|
50
|
|
|
|
|
(tokens.size() >= 450 && chars[tokens.back().start].cat & unicode::P) || |
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
9920
|
0
|
0
|
|
|
|
|
(tokens.size() >= 400 && chars[tokens.back().start].cat & unicode::Po); |
9921
|
|
|
|
|
|
|
} |
9922
|
|
|
|
|
|
|
|
9923
|
4
|
|
|
|
|
|
bool unicode_tokenizer::is_eos(const vector& tokens, char32_t eos_chr, const unordered_set* abbreviations) { |
9924
|
|
|
|
|
|
|
using namespace unilib; |
9925
|
|
|
|
|
|
|
|
9926
|
4
|
50
|
|
|
|
|
if (eos_chr == '.' && !tokens.empty()) { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
9927
|
|
|
|
|
|
|
// Ignore one-letter capitals before dot |
9928
|
4
|
100
|
|
|
|
|
if (tokens.back().length == 1 && chars[tokens.back().start].cat & unicode::Lut) |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
9929
|
|
|
|
|
|
|
return false; |
9930
|
|
|
|
|
|
|
|
9931
|
|
|
|
|
|
|
// Ignore specified abbreviations |
9932
|
2
|
50
|
|
|
|
|
if (abbreviations) { |
9933
|
|
|
|
|
|
|
eos_buffer.clear(); |
9934
|
13
|
100
|
|
|
|
|
for (size_t i = 0; i < tokens.back().length; i++) |
9935
|
11
|
|
|
|
|
|
utf8::append(eos_buffer, unicode::lowercase(chars[tokens.back().start + i].chr)); |
9936
|
2
|
100
|
|
|
|
|
if (abbreviations->count(eos_buffer)) |
9937
|
|
|
|
|
|
|
return false; |
9938
|
|
|
|
|
|
|
} |
9939
|
|
|
|
|
|
|
} |
9940
|
|
|
|
|
|
|
return true; |
9941
|
|
|
|
|
|
|
} |
9942
|
|
|
|
|
|
|
|
9943
|
|
|
|
|
|
|
///////// |
9944
|
|
|
|
|
|
|
// File: tokenizer/vertical_tokenizer.cpp |
9945
|
|
|
|
|
|
|
///////// |
9946
|
|
|
|
|
|
|
|
9947
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
9948
|
|
|
|
|
|
|
// |
9949
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
9950
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
9951
|
|
|
|
|
|
|
// |
9952
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
9953
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
9954
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
9955
|
|
|
|
|
|
|
|
9956
|
0
|
|
|
|
|
|
bool vertical_tokenizer::next_sentence(vector& tokens) { |
9957
|
0
|
0
|
|
|
|
|
if (current >= chars.size() - 1) return false; |
9958
|
|
|
|
|
|
|
|
9959
|
0
|
|
|
|
|
|
while (true) { |
9960
|
0
|
|
|
|
|
|
size_t line_start = current; |
9961
|
0
|
0
|
|
|
|
|
while (current < chars.size() - 1 && chars[current].chr != '\r' && chars[current].chr != '\n') current++; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9962
|
|
|
|
|
|
|
|
9963
|
|
|
|
|
|
|
size_t line_end = current; |
9964
|
0
|
0
|
|
|
|
|
if (current < chars.size() - 1) { |
9965
|
0
|
|
|
|
|
|
current++; |
9966
|
0
|
0
|
|
|
|
|
if (current < chars.size() - 1 && |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
9967
|
0
|
0
|
|
|
|
|
((chars[current-1].chr == '\r' && chars[current].chr == '\n') || |
|
|
0
|
|
|
|
|
|
9968
|
0
|
0
|
|
|
|
|
(chars[current-1].chr == '\n' && chars[current].chr == '\r'))) |
9969
|
0
|
|
|
|
|
|
current++; |
9970
|
|
|
|
|
|
|
} |
9971
|
|
|
|
|
|
|
|
9972
|
0
|
0
|
|
|
|
|
if (line_start < line_end) |
9973
|
0
|
|
|
|
|
|
tokens.emplace_back(line_start, line_end - line_start); |
9974
|
|
|
|
|
|
|
else |
9975
|
|
|
|
|
|
|
break; |
9976
|
|
|
|
|
|
|
} |
9977
|
|
|
|
|
|
|
|
9978
|
0
|
|
|
|
|
|
return true; |
9979
|
|
|
|
|
|
|
} |
9980
|
|
|
|
|
|
|
|
9981
|
|
|
|
|
|
|
///////// |
9982
|
|
|
|
|
|
|
// File: unilib/unicode.cpp |
9983
|
|
|
|
|
|
|
///////// |
9984
|
|
|
|
|
|
|
|
9985
|
|
|
|
|
|
|
// This file is part of UniLib . |
9986
|
|
|
|
|
|
|
// |
9987
|
|
|
|
|
|
|
// Copyright 2014 Institute of Formal and Applied Linguistics, Faculty of |
9988
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
9989
|
|
|
|
|
|
|
// |
9990
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
9991
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
9992
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
9993
|
|
|
|
|
|
|
// |
9994
|
|
|
|
|
|
|
// UniLib version: 3.2.0 |
9995
|
|
|
|
|
|
|
// Unicode version: 13.0.0 |
9996
|
|
|
|
|
|
|
|
9997
|
|
|
|
|
|
|
namespace unilib { |
9998
|
|
|
|
|
|
|
|
9999
|
|
|
|
|
|
|
const char32_t unicode::CHARS; |
10000
|
|
|
|
|
|
|
|
10001
|
|
|
|
|
|
|
const int32_t unicode::DEFAULT_CAT; |
10002
|
|
|
|
|
|
|
|
10003
|
|
|
|
|
|
|
const uint8_t unicode::category_index[unicode::CHARS >> 8] = { |
10004
|
|
|
|
|
|
|
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,17,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,33,41,42,43,44,45,46,47,48,39,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,49,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,50,51,17,17,17,52,17,53,54,55,56,57,58,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,59,60,60,60,60,60,60,60,60,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,17,62,63,17,64,65,66,67,68,69,70,71,72,17,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,17,17,17,98,99,100,93,93,93,93,93,93,93,93,93,93,17,17,17,17,101,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,17,17,102,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,17,17,103,104,93,93,105,106,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,107,17,17,17,17,108,109,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,17,110,111,93,93,93,93,93,93,93,93,93,112,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,113,114,115,116,117,118,119,120,39,39,121,93,93,93,93,93,122,123,124,93,93,93,93,93,125,126,93,93,127,128,129,93,130,131,132,133,39,39,134,135,136,137,138,139,93,93,93,93,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, |
10005
|
|
|
|
|
|
|
17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,140,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,141,142,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,143,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,144,93,93,93,93,93,93,93,93,93,93,93,93,17,17,145,93,93,93,93,93,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,146,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93, |
10006
|
|
|
|
|
|
|
93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93, |
10007
|
|
|
|
|
|
|
93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93, |
10008
|
|
|
|
|
|
|
93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93, |
10009
|
|
|
|
|
|
|
93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,147,148,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,93,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61, |
10010
|
|
|
|
|
|
|
61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,149,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,61,149 |
10011
|
|
|
|
|
|
|
}; |
10012
|
|
|
|
|
|
|
|
10013
|
|
|
|
|
|
|
const uint8_t unicode::category_block[][256] = { |
10014
|
|
|
|
|
|
|
{_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Zs,_Po,_Po,_Po,_Sc,_Po,_Po,_Po,_Ps,_Pe,_Po,_Sm,_Po,_Pd,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Sm,_Sm,_Sm,_Po,_Po,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ps,_Po,_Pe,_Sk,_Pc,_Sk,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ps,_Sm,_Pe,_Sm,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Cc,_Zs,_Po,_Sc,_Sc,_Sc,_Sc,_So,_Po,_Sk,_So,_Lo,_Pi,_Sm,_Cf,_So,_Sk,_So,_Sm,_No,_No,_Sk,_Ll,_Po,_Po,_Sk,_No,_Lo,_Pf,_No,_No,_No,_Po,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Sm,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll}, |
10015
|
|
|
|
|
|
|
{_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Ll,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Lu,_Lu,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Lu,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Ll,_Ll,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Ll,_Ll,_Lo,_Lu,_Ll,_Ll,_Ll,_Lo,_Lo,_Lo,_Lo,_Lu,_Lt,_Ll,_Lu,_Lt,_Ll,_Lu,_Lt,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Lu,_Lt,_Ll,_Lu,_Ll,_Lu,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll}, |
10016
|
|
|
|
|
|
|
{_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Ll,_Lu,_Lu,_Ll,_Ll,_Lu,_Ll,_Lu,_Lu,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lo,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Sk,_Sk,_Sk,_Sk,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Lm,_Lm,_Lm,_Lm,_Lm,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Lm,_Sk,_Lm,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk}, |
10017
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lu,_Ll,_Lu,_Ll,_Lm,_Sk,_Lu,_Ll,_Cn,_Cn,_Lm,_Ll,_Ll,_Ll,_Po,_Lu,_Cn,_Cn,_Cn,_Cn,_Sk,_Sk,_Lu,_Po,_Lu,_Lu,_Lu,_Cn,_Lu,_Cn,_Lu,_Lu,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Ll,_Ll,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Ll,_Sm,_Lu,_Ll,_Lu,_Lu,_Ll,_Ll,_Lu,_Lu,_Lu}, |
10018
|
|
|
|
|
|
|
{_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_So,_Mn,_Mn,_Mn,_Mn,_Mn,_Me,_Me,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll}, |
10019
|
|
|
|
|
|
|
{_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Lm,_Po,_Po,_Po,_Po,_Po,_Po,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Po,_Pd,_Cn,_Cn,_So,_So,_Sc,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Pd,_Mn,_Po,_Mn,_Mn,_Po,_Mn,_Mn,_Po,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10020
|
|
|
|
|
|
|
{_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Sm,_Sm,_Sm,_Po,_Po,_Sc,_Po,_Po,_So,_So,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Cf,_Cn,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Po,_Po,_Lo,_Lo,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cf,_So,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lm,_Lm,_Mn,_Mn,_So,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Lo,_So,_So,_Lo}, |
10021
|
|
|
|
|
|
|
{_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cf,_Lo,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lm,_Lm,_So,_Po,_Po,_Po,_Lm,_Cn,_Cn,_Mn,_Sc,_Sc}, |
10022
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Lm,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lm,_Mn,_Mn,_Mn,_Lm,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Cn,_Cn,_Po,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cf,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn}, |
10023
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mn,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mc,_Mc,_Mn,_Mc,_Mc,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mc,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Mc,_Mc,_Cn,_Cn,_Mc,_Mc,_Mn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mc,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Sc,_Sc,_No,_No,_No,_No,_No,_No,_So,_Sc,_Lo,_Po,_Mn,_Cn}, |
10024
|
|
|
|
|
|
|
{_Cn,_Mn,_Mn,_Mc,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Cn,_Mn,_Cn,_Mc,_Mc,_Mc,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Cn,_Cn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Mn,_Mn,_Lo,_Lo,_Lo,_Mn,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mc,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mc,_Cn,_Mc,_Mc,_Mn,_Cn,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Sc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn}, |
10025
|
|
|
|
|
|
|
{_Cn,_Mn,_Mc,_Mc,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Lo,_Mc,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Mc,_Mc,_Cn,_Cn,_Mc,_Mc,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mc,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_So,_Lo,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Mc,_Mc,_Mn,_Mc,_Mc,_Cn,_Cn,_Cn,_Mc,_Mc,_Mc,_Cn,_Mc,_Mc,_Mc,_Mn,_Cn,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_No,_No,_No,_So,_So,_So,_So,_So,_So,_Sc,_So,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10026
|
|
|
|
|
|
|
{_Mn,_Mc,_Mc,_Mc,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Mn,_Mn,_Mn,_Mc,_Mc,_Mc,_Mc,_Cn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Cn,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_No,_No,_No,_No,_No,_No,_No,_So,_Lo,_Mn,_Mc,_Mc,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Lo,_Mc,_Mn,_Mc,_Mc,_Mc,_Mc,_Mc,_Cn,_Mn,_Mc,_Mc,_Cn,_Mc,_Mc,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mc,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Cn,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10027
|
|
|
|
|
|
|
{_Mn,_Mn,_Mc,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Cn,_Mc,_Mc,_Mc,_Cn,_Mc,_Mc,_Mc,_Mn,_Lo,_So,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Mc,_No,_No,_No,_No,_No,_No,_No,_Lo,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Mn,_Mc,_Mc,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Mn,_Cn,_Cn,_Cn,_Cn,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Cn,_Mn,_Cn,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Mc,_Mc,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10028
|
|
|
|
|
|
|
{_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Sc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lm,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10029
|
|
|
|
|
|
|
{_Lo,_So,_So,_So,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_So,_Po,_So,_So,_So,_Mn,_Mn,_So,_So,_So,_So,_So,_So,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_Mn,_So,_Mn,_So,_Mn,_Ps,_Pe,_Ps,_Pe,_Mc,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_Mn,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_Po,_Po,_Po,_Po,_Po,_So,_So,_So,_So,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10030
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Lo,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Po,_Po,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Lo,_Mc,_Mc,_Mc,_Lo,_Lo,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mc,_Mn,_Mn,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mn,_Lo,_Mc,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Mc,_Mc,_Mc,_Mn,_So,_So,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Po,_Lm,_Ll,_Ll,_Ll}, |
10031
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
10032
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
10033
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn}, |
10034
|
|
|
|
|
|
|
{_Pd,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
10035
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_So,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Zs,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Ps,_Pe,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Po,_Po,_Nl,_Nl,_Nl,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10036
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Cn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mn,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Po,_Po,_Lm,_Po,_Po,_Po,_Sc,_Lo,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10037
|
|
|
|
|
|
|
{_Po,_Po,_Po,_Po,_Po,_Po,_Pd,_Po,_Po,_Po,_Po,_Mn,_Mn,_Mn,_Cf,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10038
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mc,_Mc,_Mn,_Mn,_Mc,_Mc,_Mc,_Cn,_Cn,_Cn,_Cn,_Mc,_Mc,_Mn,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_So,_Cn,_Cn,_Cn,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_No,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
10039
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mc,_Mc,_Mn,_Cn,_Cn,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mc,_Mn,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Mn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Lm,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Me,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10040
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mc,_Mc,_Mc,_Mc,_Mc,_Mn,_Mc,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Mn,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Mc,_Mn,_Mn,_Mn,_Lo,_Lo,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mn,_Mn,_Mc,_Mc,_Mc,_Mn,_Mc,_Mn,_Mn,_Mn,_Mc,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po}, |
10041
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Po,_Po,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Lu,_Lu,_Lu,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Po,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Lo,_Lo,_Mc,_Mn,_Mn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10042
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn}, |
10043
|
|
|
|
|
|
|
{_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll}, |
10044
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Lu,_Cn,_Lu,_Cn,_Lu,_Cn,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Lt,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lt,_Sk,_Ll,_Sk,_Sk,_Sk,_Ll,_Ll,_Ll,_Cn,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lt,_Sk,_Sk,_Sk,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Cn,_Sk,_Sk,_Sk,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Sk,_Sk,_Sk,_Cn,_Cn,_Ll,_Ll,_Ll,_Cn,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lt,_Sk,_Sk,_Cn}, |
10045
|
|
|
|
|
|
|
{_Zs,_Zs,_Zs,_Zs,_Zs,_Zs,_Zs,_Zs,_Zs,_Zs,_Zs,_Cf,_Cf,_Cf,_Cf,_Cf,_Pd,_Pd,_Pd,_Pd,_Pd,_Pd,_Po,_Po,_Pi,_Pf,_Ps,_Pi,_Pi,_Pf,_Ps,_Pi,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Zl,_Zp,_Cf,_Cf,_Cf,_Cf,_Cf,_Zs,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Pi,_Pf,_Po,_Po,_Po,_Po,_Pc,_Pc,_Po,_Po,_Po,_Sm,_Ps,_Pe,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Sm,_Po,_Pc,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Zs,_Cf,_Cf,_Cf,_Cf,_Cf,_Cn,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_No,_Lm,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_Sm,_Sm,_Sm,_Ps,_Pe,_Lm,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Sm,_Sm,_Sm,_Ps,_Pe,_Cn,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Cn,_Cn,_Cn,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Sc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Me,_Me,_Me,_Me,_Mn,_Me,_Me,_Me,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10046
|
|
|
|
|
|
|
{_So,_So,_Lu,_So,_So,_So,_So,_Lu,_So,_So,_Ll,_Lu,_Lu,_Lu,_Ll,_Ll,_Lu,_Lu,_Lu,_Ll,_So,_Lu,_So,_So,_Sm,_Lu,_Lu,_Lu,_Lu,_Lu,_So,_So,_So,_So,_So,_So,_Lu,_So,_Lu,_So,_Lu,_So,_Lu,_Lu,_Lu,_Lu,_So,_Ll,_Lu,_Lu,_Lu,_Lu,_Ll,_Lo,_Lo,_Lo,_Lo,_Ll,_So,_So,_Ll,_Ll,_Lu,_Lu,_Sm,_Sm,_Sm,_Sm,_Sm,_Lu,_Ll,_Ll,_Ll,_Ll,_So,_Sm,_So,_So,_Ll,_So,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Lu,_Ll,_Nl,_Nl,_Nl,_Nl,_No,_So,_So,_Cn,_Cn,_Cn,_Cn,_Sm,_Sm,_Sm,_Sm,_Sm,_So,_So,_So,_So,_So,_Sm,_Sm,_So,_So,_So,_So,_Sm,_So,_So,_Sm,_So,_So,_Sm,_So,_So,_So,_So,_So,_So,_So,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_So,_So,_Sm,_So,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm}, |
10047
|
|
|
|
|
|
|
{_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm}, |
10048
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_Ps,_Pe,_Ps,_Pe,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_So,_So,_So,_So,_So,_So,_So,_Ps,_Pe,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
10049
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No}, |
10050
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm}, |
10051
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
10052
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_Sm,_Sm,_Sm,_Ps,_Pe,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm}, |
10053
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
10054
|
|
|
|
|
|
|
{_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Ps,_Pe,_Ps,_Pe,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Ps,_Pe,_Sm,_Sm}, |
10055
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_So,_So,_Sm,_Sm,_Sm,_Sm,_Sm,_Sm,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
10056
|
|
|
|
|
|
|
{_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Lu,_Ll,_Lu,_Lu,_Lu,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Lu,_Lu,_Ll,_Lu,_Ll,_Ll,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lm,_Lm,_Lu,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_So,_So,_So,_So,_So,_So,_Lu,_Ll,_Lu,_Ll,_Mn,_Mn,_Mn,_Lu,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po,_No,_Po,_Po}, |
10057
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Ll,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lm,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn}, |
10058
|
|
|
|
|
|
|
{_Po,_Po,_Pi,_Pf,_Pi,_Pf,_Po,_Po,_Po,_Pi,_Pf,_Po,_Pi,_Pf,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Pd,_Po,_Po,_Pd,_Po,_Pi,_Pf,_Po,_Po,_Pi,_Pf,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Po,_Po,_Po,_Po,_Po,_Lm,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Pd,_Pd,_Po,_Po,_Po,_Po,_Pd,_Po,_Ps,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_So,_So,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10059
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn}, |
10060
|
|
|
|
|
|
|
{_Zs,_Po,_Po,_Po,_So,_Lm,_Lo,_Nl,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_So,_So,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Pd,_Ps,_Pe,_Pe,_So,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Pd,_Lm,_Lm,_Lm,_Lm,_Lm,_So,_So,_Nl,_Nl,_Nl,_Lm,_Lo,_Po,_So,_So,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Mn,_Sk,_Sk,_Lm,_Lm,_Lo,_Pd,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Lm,_Lm,_Lm,_Lo}, |
10061
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_So,_So,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
10062
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_No,_No,_No,_No,_No,_No,_No,_No,_So,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
10063
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
10064
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn}, |
10065
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
10066
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Po,_Po}, |
10067
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Po,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lo,_Mn,_Me,_Me,_Me,_Po,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Lm,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lm,_Lm,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10068
|
|
|
|
|
|
|
{_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Sk,_Sk,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lm,_Sk,_Sk,_Lu,_Ll,_Lu,_Ll,_Lo,_Lu,_Ll,_Lu,_Ll,_Ll,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Lu,_Ll,_Cn,_Cn,_Lu,_Ll,_Lu,_Lu,_Lu,_Lu,_Ll,_Lu,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Ll,_Lo,_Lm,_Lm,_Ll,_Lo,_Lo,_Lo,_Lo,_Lo}, |
10069
|
|
|
|
|
|
|
{_Lo,_Lo,_Mn,_Lo,_Lo,_Lo,_Mn,_Lo,_Lo,_Lo,_Lo,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mn,_Mn,_Mc,_So,_So,_So,_So,_Mn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_So,_So,_Sc,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mc,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Po,_Po,_Lo,_Po,_Lo,_Lo,_Mn}, |
10070
|
|
|
|
|
|
|
{_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Mc,_Mc,_Mc,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Lm,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn}, |
10071
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Po,_Po,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_So,_So,_So,_Lo,_Mc,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Lo,_Mn,_Mn,_Mn,_Lo,_Lo,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Lo,_Mn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lm,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mn,_Mn,_Mc,_Mc,_Po,_Po,_Lo,_Lm,_Lm,_Mc,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10072
|
|
|
|
|
|
|
{_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Sk,_Lm,_Lm,_Lm,_Lm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lm,_Sk,_Sk,_Cn,_Cn,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mn,_Mc,_Mc,_Mn,_Mc,_Mc,_Po,_Mc,_Mn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10073
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn}, |
10074
|
|
|
|
|
|
|
{_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs,_Cs}, |
10075
|
|
|
|
|
|
|
{_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co}, |
10076
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10077
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Sm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Sk,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
10078
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Pe,_Ps,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Sc,_So,_Cn,_Cn}, |
10079
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Ps,_Pe,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Pd,_Pd,_Pc,_Pc,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Po,_Po,_Ps,_Pe,_Po,_Po,_Po,_Po,_Pc,_Pc,_Pc,_Po,_Po,_Po,_Cn,_Po,_Po,_Po,_Po,_Pd,_Ps,_Pe,_Ps,_Pe,_Ps,_Pe,_Po,_Po,_Po,_Sm,_Pd,_Sm,_Sm,_Sm,_Cn,_Po,_Sc,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cf}, |
10080
|
|
|
|
|
|
|
{_Cn,_Po,_Po,_Po,_Sc,_Po,_Po,_Po,_Ps,_Pe,_Po,_Sm,_Po,_Pd,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Sm,_Sm,_Sm,_Po,_Po,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ps,_Po,_Pe,_Sk,_Pc,_Sk,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ps,_Sm,_Pe,_Sm,_Ps,_Pe,_Po,_Ps,_Pe,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lm,_Lm,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Sc,_Sc,_Sm,_Sk,_So,_Sc,_Sc,_Cn,_So,_Sm,_Sm,_Sm,_Sm,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cf,_Cf,_Cf,_So,_So,_Cn,_Cn}, |
10081
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10082
|
|
|
|
|
|
|
{_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_No,_No,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Mn,_Cn,_Cn}, |
10083
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn}, |
10084
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Nl,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Nl,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Nl,_Nl,_Nl,_Nl,_Nl,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10085
|
|
|
|
|
|
|
{_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Cn,_Cn}, |
10086
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10087
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10088
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Po,_No,_No,_No,_No,_No,_No,_No,_No,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_So,_So,_No,_No,_No,_No,_No,_No,_No,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No}, |
10089
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_No,_No,_Lo,_Lo,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No}, |
10090
|
|
|
|
|
|
|
{_Lo,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Mn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_No,_No,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_So,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10091
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10092
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No}, |
10093
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10094
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Mn,_Mn,_Pd,_Cn,_Cn,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10095
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_No,_No,_No,_No,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10096
|
|
|
|
|
|
|
{_Mc,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Po,_Po,_Cf,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cf,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10097
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Po,_Po,_Lo,_Mc,_Mc,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Po,_Po,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mc,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Lo,_Lo,_Lo,_Lo,_Po,_Po,_Po,_Po,_Mn,_Mn,_Mn,_Mn,_Po,_Mc,_Mn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Lo,_Po,_Lo,_Po,_Po,_Po,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10098
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mc,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10099
|
|
|
|
|
|
|
{_Mn,_Mn,_Mc,_Mc,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Mn,_Mn,_Lo,_Mc,_Mc,_Mn,_Mc,_Mc,_Mc,_Mc,_Cn,_Cn,_Mc,_Mc,_Cn,_Cn,_Mc,_Mc,_Mc,_Cn,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10100
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Mn,_Mc,_Mn,_Lo,_Lo,_Lo,_Lo,_Po,_Po,_Po,_Po,_Po,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Po,_Po,_Cn,_Po,_Mn,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mc,_Mc,_Mc,_Mc,_Mn,_Mn,_Mc,_Mn,_Mn,_Lo,_Lo,_Po,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10101
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Mc,_Mc,_Mc,_Mc,_Mn,_Mn,_Mc,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10102
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mc,_Mn,_Mn,_Po,_Po,_Po,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mc,_Mn,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10103
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_No,_No,_Po,_Po,_Po,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10104
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo}, |
10105
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Cn,_Mc,_Mc,_Cn,_Cn,_Mn,_Mn,_Mc,_Mn,_Lo,_Mc,_Lo,_Mc,_Mn,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Mn,_Mn,_Mc,_Mc,_Mc,_Mc,_Mn,_Lo,_Po,_Lo,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10106
|
|
|
|
|
|
|
{_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Lo,_Mn,_Mn,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Po,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mc,_Mn,_Mn,_Mn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Po,_Po,_Po,_Lo,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10107
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10108
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Lo,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Po,_Po,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mc,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mc,_Mn,_Mn,_Mc,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10109
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Mn,_Cn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lo,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mc,_Mc,_Mc,_Mc,_Mc,_Cn,_Mn,_Mn,_Cn,_Mc,_Mc,_Mn,_Mc,_Mn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10110
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mc,_Mc,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10111
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_Sc,_Sc,_Sc,_Sc,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Po}, |
10112
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10113
|
|
|
|
|
|
|
{_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Nl,_Cn,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
10114
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10115
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10116
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10117
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10118
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Po,_Po,_Po,_Po,_Po,_So,_So,_So,_So,_Lm,_Lm,_Lm,_Lm,_Po,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_No,_No,_No,_No,_No,_No,_No,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10119
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10120
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Mn,_Lo,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lm,_Lm,_Po,_Lm,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mc,_Mc,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10121
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10122
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10123
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10124
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
10125
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn}, |
10126
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_So,_Mn,_Mn,_Po,_Cf,_Cf,_Cf,_Cf,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10127
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10128
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Mc,_Mc,_Mn,_Mn,_Mn,_So,_So,_So,_Mc,_Mc,_Mc,_Mc,_Mc,_Mc,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_So,_So,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Mn,_Mn,_Mn,_Mn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10129
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Mn,_Mn,_Mn,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10130
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10131
|
|
|
|
|
|
|
{_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Cn,_Lu,_Lu,_Cn,_Cn,_Lu,_Cn,_Cn,_Lu,_Lu,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Cn,_Ll,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll}, |
10132
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Lu,_Cn,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Cn,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll}, |
10133
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Cn,_Cn,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Sm,_Ll,_Ll,_Ll,_Ll}, |
10134
|
|
|
|
|
|
|
{_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Sm,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Lu,_Ll,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd}, |
10135
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_So,_So,_So,_So,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_So,_So,_So,_So,_So,_So,_So,_So,_Mn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Mn,_So,_So,_Po,_Po,_Po,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10136
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Mn,_Mn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10137
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Lm,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Lo,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10138
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Mn,_Mn,_Mn,_Mn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Sc}, |
10139
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10140
|
|
|
|
|
|
|
{_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Lu,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Ll,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Lm,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Po,_Po,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10141
|
|
|
|
|
|
|
{_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_No,_No,_No,_Sc,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10142
|
|
|
|
|
|
|
{_Cn,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10143
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Cn,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Cn,_Cn,_Cn,_Cn,_Lo,_Cn,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Cn,_Cn,_Lo,_Cn,_Lo,_Cn,_Lo,_Cn,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Cn,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Sm,_Sm,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10144
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10145
|
|
|
|
|
|
|
{_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_No,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
10146
|
|
|
|
|
|
|
{_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10147
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Sk,_Sk,_Sk,_Sk,_Sk}, |
10148
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn}, |
10149
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10150
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10151
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So}, |
10152
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10153
|
|
|
|
|
|
|
{_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_So,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Nd,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10154
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10155
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
10156
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
10157
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo}, |
10158
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10159
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10160
|
|
|
|
|
|
|
{_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Lo,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10161
|
|
|
|
|
|
|
{_Cn,_Cf,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cf,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10162
|
|
|
|
|
|
|
{_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Mn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn,_Cn}, |
10163
|
|
|
|
|
|
|
{_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Co,_Cn,_Cn} |
10164
|
|
|
|
|
|
|
}; |
10165
|
|
|
|
|
|
|
|
10166
|
|
|
|
|
|
|
const uint8_t unicode::othercase_index[unicode::CHARS >> 8] = { |
10167
|
|
|
|
|
|
|
0,1,2,3,4,5,6,6,6,6,6,6,6,6,6,6,7,6,6,8,6,6,6,6,6,6,6,6,9,10,11,12,6,13,6,6,14,6,6,6,6,6,6,6,15,16,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,17,18,6,6,6,19,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,20,6,6,6,6,21,6,6,6,6,6,6,6,22,6,6,6,6,6,6,6,6,6,6,6,23,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,24,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,25,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, |
10168
|
|
|
|
|
|
|
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, |
10169
|
|
|
|
|
|
|
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, |
10170
|
|
|
|
|
|
|
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, |
10171
|
|
|
|
|
|
|
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6 |
10172
|
|
|
|
|
|
|
}; |
10173
|
|
|
|
|
|
|
|
10174
|
|
|
|
|
|
|
const char32_t unicode::othercase_block[][256] = { |
10175
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24833,25089,25345,25601,25857,26113,26369,26625,26881,27137,27393,27649,27905,28161,28417,28673,28929,29185,29441,29697,29953,30209,30465,30721,30977,31233,0,0,0,0,0,0,16642,16898,17154,17410,17666,17922,18178,18434,18690,18946,19202,19458,19714,19970,20226,20482,20738,20994,21250,21506,21762,22018,22274,22530,22786,23042,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,236546,0,0,0,0,0,0,0,0,0,0,57345,57601,57857,58113,58369,58625,58881,59137,59393,59649,59905,60161,60417,60673,60929,61185,61441,61697,61953,62209,62465,62721,62977,0,63489,63745,64001,64257,64513,64769,65025,0,49154,49410,49666,49922,50178,50434,50690,50946,51202,51458,51714,51970,52226,52482,52738,52994,53250,53506,53762,54018,54274,54530,54786,0,55298,55554,55810,56066,56322,56578,56834,96258}, |
10176
|
|
|
|
|
|
|
{65793,65538,66305,66050,66817,66562,67329,67074,67841,67586,68353,68098,68865,68610,69377,69122,69889,69634,70401,70146,70913,70658,71425,71170,71937,71682,72449,72194,72961,72706,73473,73218,73985,73730,74497,74242,75009,74754,75521,75266,76033,75778,76545,76290,77057,76802,77569,77314,26881,18690,78593,78338,79105,78850,79617,79362,0,80385,80130,80897,80642,81409,81154,81921,81666,82433,82178,82945,82690,83457,83202,83969,83714,0,84737,84482,85249,84994,85761,85506,86273,86018,86785,86530,87297,87042,87809,87554,88321,88066,88833,88578,89345,89090,89857,89602,90369,90114,90881,90626,91393,91138,91905,91650,92417,92162,92929,92674,93441,93186,93953,93698,94465,94210,94977,94722,95489,95234,96001,95746,65281,96769,96514,97281,97026,97793,97538,21250,148226,152321,99073,98818,99585,99330,152577,100353,100098,153089,153345,101377,101122,0,122113,153857,154369,102913,102658,155649,156417,128514,157953,157697,104705,104450,146690,0,159489,160257,139266,161025,106753,106498,107265,107010,107777,107522,163841,108545,108290,164609,0,0,109825,109570,165889,110593,110338,166401,166657,111617,111362,112129,111874,168449,112897,112642,0,0,113921,113666,0,128770,0,0,0,0,115974,116228,115717,116742,116996,116485,117510,117764,117253,118273,118018,118785,118530,119297,119042,119809,119554,120321,120066,120833,120578,121345,121090,121857,121602,101890,122625,122370,123137,122882,123649,123394,124161,123906,124673,124418,125185,124930,125697,125442,126209,125954,126721,126466,0,127494,127748,127237,128257,128002,103681,114433,129281,129026,129793,129538,130305,130050,130817,130562}, |
10177
|
|
|
|
|
|
|
{131329,131074,131841,131586,132353,132098,132865,132610,133377,133122,133889,133634,134401,134146,134913,134658,135425,135170,135937,135682,136449,136194,136961,136706,137473,137218,137985,137730,138497,138242,139009,138754,105985,0,140033,139778,140545,140290,141057,140802,141569,141314,142081,141826,142593,142338,143105,142850,143617,143362,144129,143874,0,0,0,0,0,0,2909441,146433,146178,104961,2909697,2915842,2916098,147969,147714,98305,166145,166913,149249,148994,149761,149506,150273,150018,150785,150530,151297,151042,2912002,2911490,2912258,98562,99842,0,100610,100866,0,102146,0,102402,10988290,0,0,0,103170,10988546,0,103426,0,10980610,10988034,0,104194,103938,10989058,2908674,10988802,0,0,105474,0,2911746,105730,0,0,106242,0,0,0,0,0,0,0,2909186,0,0,108034,0,10994946,108802,0,0,0,10989826,110082,148482,110850,111106,148738,0,0,0,0,0,112386,0,0,0,0,0,0,0,0,0,0,10990082,10989570,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
10178
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,235778,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,225537,225282,226049,225794,0,0,227073,226818,0,0,0,261378,261634,261890,0,258817,0,0,0,0,0,0,240641,0,240897,241153,241409,0,248833,0,249089,249345,0,241921,242177,242433,242689,242945,243201,243457,243713,243969,244225,244481,244737,244993,245249,245505,245761,246017,0,246529,246785,247041,247297,247553,247809,248065,248321,248577,230914,231426,231682,231938,0,233730,233986,234242,234498,234754,235010,235266,235522,235778,236034,236290,236546,236802,237058,237314,237570,237826,238338,238338,238594,238850,239106,239362,239618,239874,240130,240386,232450,232962,233218,251649,233986,235522,0,0,0,239106,237570,249602,252161,251906,252673,252418,253185,252930,253697,253442,254209,253954,254721,254466,255233,254978,255745,255490,256257,256002,256769,256514,257281,257026,257793,257538,236034,237826,260354,229122,243713,234754,0,260097,259842,258561,260865,260610,0,228097,228353,228609}, |
10179
|
|
|
|
|
|
|
{282625,282881,283137,283393,283649,283905,284161,284417,284673,284929,285185,285441,285697,285953,286209,286465,274433,274689,274945,275201,275457,275713,275969,276225,276481,276737,276993,277249,277505,277761,278017,278273,278529,278785,279041,279297,279553,279809,280065,280321,280577,280833,281089,281345,281601,281857,282113,282369,266242,266498,266754,267010,267266,267522,267778,268034,268290,268546,268802,269058,269314,269570,269826,270082,270338,270594,270850,271106,271362,271618,271874,272130,272386,272642,272898,273154,273410,273666,273922,274178,262146,262402,262658,262914,263170,263426,263682,263938,264194,264450,264706,264962,265218,265474,265730,265986,286977,286722,287489,287234,288001,287746,288513,288258,289025,288770,289537,289282,290049,289794,290561,290306,291073,290818,291585,291330,292097,291842,292609,292354,293121,292866,293633,293378,294145,293890,294657,294402,295169,294914,0,0,0,0,0,0,0,0,297729,297474,298241,297986,298753,298498,299265,299010,299777,299522,300289,300034,300801,300546,301313,301058,301825,301570,302337,302082,302849,302594,303361,303106,303873,303618,304385,304130,304897,304642,305409,305154,305921,305666,306433,306178,306945,306690,307457,307202,307969,307714,308481,308226,308993,308738,309505,309250,310017,309762,310529,310274,311041,310786,315137,311809,311554,312321,312066,312833,312578,313345,313090,313857,313602,314369,314114,314881,314626,311298,315649,315394,316161,315906,316673,316418,317185,316930,317697,317442,318209,317954,318721,318466,319233,318978,319745,319490,320257,320002,320769,320514,321281,321026,321793,321538,322305,322050,322817,322562,323329,323074,323841,323586,324353,324098,324865,324610,325377,325122,325889,325634,326401,326146,326913,326658,327425,327170}, |
10180
|
|
|
|
|
|
|
{327937,327682,328449,328194,328961,328706,329473,329218,329985,329730,330497,330242,331009,330754,331521,331266,332033,331778,332545,332290,333057,332802,333569,333314,334081,333826,334593,334338,335105,334850,335617,335362,336129,335874,336641,336386,337153,336898,337665,337410,338177,337922,338689,338434,339201,338946,339713,339458,0,352513,352769,353025,353281,353537,353793,354049,354305,354561,354817,355073,355329,355585,355841,356097,356353,356609,356865,357121,357377,357633,357889,358145,358401,358657,358913,359169,359425,359681,359937,360193,360449,360705,360961,361217,361473,361729,361985,0,0,0,0,0,0,0,0,0,0,340226,340482,340738,340994,341250,341506,341762,342018,342274,342530,342786,343042,343298,343554,343810,344066,344322,344578,344834,345090,345346,345602,345858,346114,346370,346626,346882,347138,347394,347650,347906,348162,348418,348674,348930,349186,349442,349698,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
10181
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
10182
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2949121,2949377,2949633,2949889,2950145,2950401,2950657,2950913,2951169,2951425,2951681,2951937,2952193,2952449,2952705,2952961,2953217,2953473,2953729,2953985,2954241,2954497,2954753,2955009,2955265,2955521,2955777,2956033,2956289,2956545,2956801,2957057,2957313,2957569,2957825,2958081,2958337,2958593,0,2959105,0,0,0,0,0,2960641,0,0,1871875,1872131,1872387,1872643,1872899,1873155,1873411,1873667,1873923,1874179,1874435,1874691,1874947,1875203,1875459,1875715,1875971,1876227,1876483,1876739,1876995,1877251,1877507,1877763,1878019,1878275,1878531,1878787,1879043,1879299,1879555,1879811,1880067,1880323,1880579,1880835,1881091,1881347,1881603,1881859,1882115,1882371,1882627,0,0,1883395,1883651,1883907}, |
10183
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11235329,11235585,11235841,11236097,11236353,11236609,11236865,11237121,11237377,11237633,11237889,11238145,11238401,11238657,11238913,11239169,11239425,11239681,11239937,11240193,11240449,11240705,11240961,11241217,11241473,11241729,11241985,11242241,11242497,11242753,11243009,11243265,11243521,11243777,11244033,11244289,11244545,11244801,11245057,11245313,11245569,11245825,11246081,11246337,11246593,11246849,11247105,11247361,11247617,11247873,11248129,11248385,11248641,11248897,11249153,11249409,11249665,11249921,11250177,11250433,11250689,11250945,11251201,11251457,11251713,11251969,11252225,11252481,11252737,11252993,11253249,11253505,11253761,11254017,11254273,11254529,11254785,11255041,11255297,11255553,1308673,1308929,1309185,1309441,1309697,1309953,0,0,1306626,1306882,1307138,1307394,1307650,1307906,0,0}, |
10184
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,266754,267266,269826,270594,270850,270850,272898,287234,10897922,0,0,0,0,0,0,0,1101825,1102081,1102337,1102593,1102849,1103105,1103361,1103617,1103873,1104129,1104385,1104641,1104897,1105153,1105409,1105665,1105921,1106177,1106433,1106689,1106945,1107201,1107457,1107713,1107969,1108225,1108481,1108737,1108993,1109249,1109505,1109761,1110017,1110273,1110529,1110785,1111041,1111297,1111553,1111809,1112065,1112321,1112577,0,0,1113345,1113601,1113857,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
10185
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10976514,0,0,0,2908930,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10995202,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
10186
|
|
|
|
|
|
|
{1966337,1966082,1966849,1966594,1967361,1967106,1967873,1967618,1968385,1968130,1968897,1968642,1969409,1969154,1969921,1969666,1970433,1970178,1970945,1970690,1971457,1971202,1971969,1971714,1972481,1972226,1972993,1972738,1973505,1973250,1974017,1973762,1974529,1974274,1975041,1974786,1975553,1975298,1976065,1975810,1976577,1976322,1977089,1976834,1977601,1977346,1978113,1977858,1978625,1978370,1979137,1978882,1979649,1979394,1980161,1979906,1980673,1980418,1981185,1980930,1981697,1981442,1982209,1981954,1982721,1982466,1983233,1982978,1983745,1983490,1984257,1984002,1984769,1984514,1985281,1985026,1985793,1985538,1986305,1986050,1986817,1986562,1987329,1987074,1987841,1987586,1988353,1988098,1988865,1988610,1989377,1989122,1989889,1989634,1990401,1990146,1990913,1990658,1991425,1991170,1991937,1991682,1992449,1992194,1992961,1992706,1993473,1993218,1993985,1993730,1994497,1994242,1995009,1994754,1995521,1995266,1996033,1995778,1996545,1996290,1997057,1996802,1997569,1997314,1998081,1997826,1998593,1998338,1999105,1998850,1999617,1999362,2000129,1999874,2000641,2000386,2001153,2000898,2001665,2001410,2002177,2001922,2002689,2002434,2003201,2002946,2003713,2003458,2004225,2003970,0,0,0,0,0,1990658,0,0,57089,0,2007297,2007042,2007809,2007554,2008321,2008066,2008833,2008578,2009345,2009090,2009857,2009602,2010369,2010114,2010881,2010626,2011393,2011138,2011905,2011650,2012417,2012162,2012929,2012674,2013441,2013186,2013953,2013698,2014465,2014210,2014977,2014722,2015489,2015234,2016001,2015746,2016513,2016258,2017025,2016770,2017537,2017282,2018049,2017794,2018561,2018306,2019073,2018818,2019585,2019330,2020097,2019842,2020609,2020354,2021121,2020866,2021633,2021378,2022145,2021890,2022657,2022402,2023169,2022914,2023681,2023426,2024193,2023938,2024705,2024450,2025217,2024962,2025729,2025474,2026241,2025986,2026753,2026498,2027265,2027010,2027777,2027522,2028289,2028034,2028801,2028546,2029313,2029058,2029825,2029570,2030337,2030082,2030849,2030594,2031361, |
10187
|
|
|
|
|
|
|
2031106}, |
10188
|
|
|
|
|
|
|
{2033666,2033922,2034178,2034434,2034690,2034946,2035202,2035458,2031617,2031873,2032129,2032385,2032641,2032897,2033153,2033409,2037762,2038018,2038274,2038530,2038786,2039042,0,0,2035713,2035969,2036225,2036481,2036737,2036993,0,0,2041858,2042114,2042370,2042626,2042882,2043138,2043394,2043650,2039809,2040065,2040321,2040577,2040833,2041089,2041345,2041601,2045954,2046210,2046466,2046722,2046978,2047234,2047490,2047746,2043905,2044161,2044417,2044673,2044929,2045185,2045441,2045697,2050050,2050306,2050562,2050818,2051074,2051330,0,0,2048001,2048257,2048513,2048769,2049025,2049281,0,0,0,2054402,0,2054914,0,2055426,0,2055938,0,2052353,0,2052865,0,2053377,0,2053889,2058242,2058498,2058754,2059010,2059266,2059522,2059778,2060034,2056193,2056449,2056705,2056961,2057217,2057473,2057729,2057985,2079234,2079490,2082818,2083074,2083330,2083586,2087426,2087682,2095106,2095362,2091522,2091778,2095618,2095874,0,0,2066434,2066690,2066946,2067202,2067458,2067714,2067970,2068226,2064385,2064641,2064897,2065153,2065409,2065665,2065921,2066177,2070530,2070786,2071042,2071298,2071554,2071810,2072066,2072322,2068481,2068737,2068993,2069249,2069505,2069761,2070017,2070273,2074626,2074882,2075138,2075394,2075650,2075906,2076162,2076418,2072577,2072833,2073089,2073345,2073601,2073857,2074113,2074369,2078722,2078978,0,2079746,0,0,0,0,2076673,2076929,2060289,2060545,2077441,0,235778,0,0,0,0,2083842,0,0,0,0,2060801,2061057,2061313,2061569,2081537,0,0,0,2086914,2087170,0,0,0,0,0,0,2084865,2085121,2061825,2062081,0,0,0,0,2091010,2091266,0,0,0,2092034,0,0,2088961,2089217,2062849,2063105,2090241,0,0,0,0,0,0,2096130,0,0,0,0,2062337,2062593,2063361,2063617,2093825,0,0,0}, |
10189
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,248065,0,0,0,27393,58625,0,0,0,0,0,0,2182657,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2175490,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2191361,2191617,2191873,2192129,2192385,2192641,2192897,2193153,2193409,2193665,2193921,2194177,2194433,2194689,2194945,2195201,2187266,2187522,2187778,2188034,2188290,2188546,2188802,2189058,2189314,2189570,2189826,2190082,2190338,2190594,2190850,2191106,0,0,0,2196481,2196226,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
10190
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2412545,2412801,2413057,2413313,2413569,2413825,2414081,2414337,2414593,2414849,2415105,2415361,2415617,2415873,2416129,2416385,2416641,2416897,2417153,2417409,2417665,2417921,2418177,2418433,2418689,2418945,2405890,2406146,2406402,2406658,2406914,2407170,2407426,2407682,2407938,2408194,2408450,2408706,2408962,2409218,2409474,2409730,2409986,2410242,2410498,2410754,2411010,2411266,2411522,2411778,2412034,2412290,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
10191
|
|
|
|
|
|
|
{2895873,2896129,2896385,2896641,2896897,2897153,2897409,2897665,2897921,2898177,2898433,2898689,2898945,2899201,2899457,2899713,2899969,2900225,2900481,2900737,2900993,2901249,2901505,2901761,2902017,2902273,2902529,2902785,2903041,2903297,2903553,2903809,2904065,2904321,2904577,2904833,2905089,2905345,2905601,2905857,2906113,2906369,2906625,2906881,2907137,2907393,2907649,0,2883586,2883842,2884098,2884354,2884610,2884866,2885122,2885378,2885634,2885890,2886146,2886402,2886658,2886914,2887170,2887426,2887682,2887938,2888194,2888450,2888706,2888962,2889218,2889474,2889730,2889986,2890242,2890498,2890754,2891010,2891266,2891522,2891778,2892034,2892290,2892546,2892802,2893058,2893314,2893570,2893826,2894082,2894338,2894594,2894850,2895106,2895362,0,2908417,2908162,158465,1932545,163073,145922,146946,2910209,2909954,2910721,2910466,2911233,2910978,151809,160001,151553,152065,0,2913025,2912770,0,2913793,2913538,0,0,0,0,0,0,0,147201,147457,2916609,2916354,2917121,2916866,2917633,2917378,2918145,2917890,2918657,2918402,2919169,2918914,2919681,2919426,2920193,2919938,2920705,2920450,2921217,2920962,2921729,2921474,2922241,2921986,2922753,2922498,2923265,2923010,2923777,2923522,2924289,2924034,2924801,2924546,2925313,2925058,2925825,2925570,2926337,2926082,2926849,2926594,2927361,2927106,2927873,2927618,2928385,2928130,2928897,2928642,2929409,2929154,2929921,2929666,2930433,2930178,2930945,2930690,2931457,2931202,2931969,2931714,2932481,2932226,2932993,2932738,2933505,2933250,2934017,2933762,2934529,2934274,2935041,2934786,2935553,2935298,2936065,2935810,2936577,2936322,2937089,2936834,2937601,2937346,2938113,2937858,2938625,2938370,2939137,2938882,2939649,2939394,2940161,2939906,2940673,2940418,2941185,2940930,2941697,2941442,0,0,0,0,0,0,0,2944001,2943746,2944513,2944258,0,0,0,2945793,2945538,0,0,0,0,0,0,0,0,0,0,0,0}, |
10192
|
|
|
|
|
|
|
{1089538,1089794,1090050,1090306,1090562,1090818,1091074,1091330,1091586,1091842,1092098,1092354,1092610,1092866,1093122,1093378,1093634,1093890,1094146,1094402,1094658,1094914,1095170,1095426,1095682,1095938,1096194,1096450,1096706,1096962,1097218,1097474,1097730,1097986,1098242,1098498,1098754,1099010,0,1099522,0,0,0,0,0,1101058,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
10193
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10895617,10895362,10896129,10895874,10896641,10896386,10897153,10896898,10897665,10897410,10898177,10897922,10898689,10898434,10899201,10898946,10899713,10899458,10900225,10899970,10900737,10900482,10901249,10900994,10901761,10901506,10902273,10902018,10902785,10902530,10903297,10903042,10903809,10903554,10904321,10904066,10904833,10904578,10905345,10905090,10905857,10905602,10906369,10906114,10906881,10906626,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10912001,10911746,10912513,10912258,10913025,10912770,10913537,10913282,10914049,10913794,10914561,10914306,10915073,10914818,10915585,10915330,10916097,10915842,10916609,10916354,10917121,10916866,10917633,10917378,10918145,10917890,10918657,10918402,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
10194
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10953473,10953218,10953985,10953730,10954497,10954242,10955009,10954754,10955521,10955266,10956033,10955778,10956545,10956290,0,0,10957569,10957314,10958081,10957826,10958593,10958338,10959105,10958850,10959617,10959362,10960129,10959874,10960641,10960386,10961153,10960898,10961665,10961410,10962177,10961922,10962689,10962434,10963201,10962946,10963713,10963458,10964225,10963970,10964737,10964482,10965249,10964994,10965761,10965506,10966273,10966018,10966785,10966530,10967297,10967042,10967809,10967554,10968321,10968066,10968833,10968578,10969345,10969090,10969857,10969602,10970369,10970114,10970881,10970626,10971393,10971138,10971905,10971650,10972417,10972162,10972929,10972674,0,0,0,0,0,0,0,0,0,10975745,10975490,10976257,10976002,1931521,10977025,10976770,10977537,10977282,10978049,10977794,10978561,10978306,10979073,10978818,0,0,0,10980353,10980098,156929,0,0,10981633,10981378,10982145,10981890,10994690,0,10983169,10982914,10983681,10983426,10984193,10983938,10984705,10984450,10985217,10984962,10985729,10985474,10986241,10985986,10986753,10986498,10987265,10987010,10987777,10987522,157185,154625,155905,158721,158209,0,171521,165633,171265,11227905,10990849,10990594,10991361,10991106,10991873,10991618,10992385,10992130,10992897,10992642,10993409,10993154,0,0,10994433,10994178,10982401,164353,1936897,10995713,10995458,10996225,10995970,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,11007489,11007234,0,0,0,0,0,0,0,0,0}, |
10195
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10990338,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1286146,1286402,1286658,1286914,1287170,1287426,1287682,1287938,1288194,1288450,1288706,1288962,1289218,1289474,1289730,1289986,1290242,1290498,1290754,1291010,1291266,1291522,1291778,1292034,1292290,1292546,1292802,1293058,1293314,1293570,1293826,1294082,1294338,1294594,1294850,1295106,1295362,1295618,1295874,1296130,1296386,1296642,1296898,1297154,1297410,1297666,1297922,1298178,1298434,1298690,1298946,1299202,1299458,1299714,1299970,1300226,1300482,1300738,1300994,1301250,1301506,1301762,1302018,1302274,1302530,1302786,1303042,1303298,1303554,1303810,1304066,1304322,1304578,1304834,1305090,1305346,1305602,1305858,1306114,1306370,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
10196
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16728321,16728577,16728833,16729089,16729345,16729601,16729857,16730113,16730369,16730625,16730881,16731137,16731393,16731649,16731905,16732161,16732417,16732673,16732929,16733185,16733441,16733697,16733953,16734209,16734465,16734721,0,0,0,0,0,0,16720130,16720386,16720642,16720898,16721154,16721410,16721666,16721922,16722178,16722434,16722690,16722946,16723202,16723458,16723714,16723970,16724226,16724482,16724738,16724994,16725250,16725506,16725762,16726018,16726274,16726530,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
10197
|
|
|
|
|
|
|
{17049601,17049857,17050113,17050369,17050625,17050881,17051137,17051393,17051649,17051905,17052161,17052417,17052673,17052929,17053185,17053441,17053697,17053953,17054209,17054465,17054721,17054977,17055233,17055489,17055745,17056001,17056257,17056513,17056769,17057025,17057281,17057537,17057793,17058049,17058305,17058561,17058817,17059073,17059329,17059585,17039362,17039618,17039874,17040130,17040386,17040642,17040898,17041154,17041410,17041666,17041922,17042178,17042434,17042690,17042946,17043202,17043458,17043714,17043970,17044226,17044482,17044738,17044994,17045250,17045506,17045762,17046018,17046274,17046530,17046786,17047042,17047298,17047554,17047810,17048066,17048322,17048578,17048834,17049090,17049346,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17094657,17094913,17095169,17095425,17095681,17095937,17096193,17096449,17096705,17096961,17097217,17097473,17097729,17097985,17098241,17098497,17098753,17099009,17099265,17099521,17099777,17100033,17100289,17100545,17100801,17101057,17101313,17101569,17101825,17102081,17102337,17102593,17102849,17103105,17103361,17103617,0,0,0,0,17084418,17084674,17084930,17085186,17085442,17085698,17085954,17086210,17086466,17086722,17086978,17087234,17087490,17087746,17088002,17088258,17088514,17088770,17089026,17089282,17089538,17089794,17090050,17090306,17090562,17090818,17091074,17091330,17091586,17091842,17092098,17092354,17092610,17092866,17093122,17093378,0,0,0,0}, |
10198
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,17612801,17613057,17613313,17613569,17613825,17614081,17614337,17614593,17614849,17615105,17615361,17615617,17615873,17616129,17616385,17616641,17616897,17617153,17617409,17617665,17617921,17618177,17618433,17618689,17618945,17619201,17619457,17619713,17619969,17620225,17620481,17620737,17620993,17621249,17621505,17621761,17622017,17622273,17622529,17622785,17623041,17623297,17623553,17623809,17624065,17624321,17624577,17624833,17625089,17625345,17625601,0,0,0,0,0,0,0,0,0,0,0,0,0,17596418,17596674,17596930,17597186,17597442,17597698,17597954,17598210,17598466,17598722,17598978,17599234,17599490,17599746,17600002,17600258,17600514,17600770,17601026,17601282,17601538,17601794,17602050,17602306,17602562,17602818,17603074,17603330,17603586,17603842,17604098,17604354,17604610,17604866,17605122,17605378,17605634,17605890,17606146,17606402,17606658,17606914,17607170,17607426,17607682,17607938,17608194,17608450,17608706,17608962,17609218,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
10199
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18399233,18399489,18399745,18400001,18400257,18400513,18400769,18401025,18401281,18401537,18401793,18402049,18402305,18402561,18402817,18403073,18403329,18403585,18403841,18404097,18404353,18404609,18404865,18405121,18405377,18405633,18405889,18406145,18406401,18406657,18406913,18407169,18391042,18391298,18391554,18391810,18392066,18392322,18392578,18392834,18393090,18393346,18393602,18393858,18394114,18394370,18394626,18394882,18395138,18395394,18395650,18395906,18396162,18396418,18396674,18396930,18397186,18397442,18397698,18397954,18398210,18398466,18398722,18398978,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
10200
|
|
|
|
|
|
|
{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,24010753,24011009,24011265,24011521,24011777,24012033,24012289,24012545,24012801,24013057,24013313,24013569,24013825,24014081,24014337,24014593,24014849,24015105,24015361,24015617,24015873,24016129,24016385,24016641,24016897,24017153,24017409,24017665,24017921,24018177,24018433,24018689,24002562,24002818,24003074,24003330,24003586,24003842,24004098,24004354,24004610,24004866,24005122,24005378,24005634,24005890,24006146,24006402,24006658,24006914,24007170,24007426,24007682,24007938,24008194,24008450,24008706,24008962,24009218,24009474,24009730,24009986,24010242,24010498,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, |
10201
|
|
|
|
|
|
|
{32055809,32056065,32056321,32056577,32056833,32057089,32057345,32057601,32057857,32058113,32058369,32058625,32058881,32059137,32059393,32059649,32059905,32060161,32060417,32060673,32060929,32061185,32061441,32061697,32061953,32062209,32062465,32062721,32062977,32063233,32063489,32063745,32064001,32064257,32047106,32047362,32047618,32047874,32048130,32048386,32048642,32048898,32049154,32049410,32049666,32049922,32050178,32050434,32050690,32050946,32051202,32051458,32051714,32051970,32052226,32052482,32052738,32052994,32053250,32053506,32053762,32054018,32054274,32054530,32054786,32055042,32055298,32055554,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0} |
10202
|
|
|
|
|
|
|
}; |
10203
|
|
|
|
|
|
|
|
10204
|
|
|
|
|
|
|
} // namespace unilib |
10205
|
|
|
|
|
|
|
|
10206
|
|
|
|
|
|
|
///////// |
10207
|
|
|
|
|
|
|
// File: unilib/utf8.cpp |
10208
|
|
|
|
|
|
|
///////// |
10209
|
|
|
|
|
|
|
|
10210
|
|
|
|
|
|
|
// This file is part of UniLib . |
10211
|
|
|
|
|
|
|
// |
10212
|
|
|
|
|
|
|
// Copyright 2014 Institute of Formal and Applied Linguistics, Faculty of |
10213
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
10214
|
|
|
|
|
|
|
// |
10215
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
10216
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
10217
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
10218
|
|
|
|
|
|
|
// |
10219
|
|
|
|
|
|
|
// UniLib version: 3.2.0 |
10220
|
|
|
|
|
|
|
// Unicode version: 13.0.0 |
10221
|
|
|
|
|
|
|
|
10222
|
|
|
|
|
|
|
namespace unilib { |
10223
|
|
|
|
|
|
|
|
10224
|
0
|
|
|
|
|
|
bool utf8::valid(const char* str) { |
10225
|
0
|
0
|
|
|
|
|
for (; *str; str++) |
10226
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) >= 0x80) { |
10227
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0xC0) return false; |
10228
|
0
|
0
|
|
|
|
|
else if (((unsigned char)*str) < 0xE0) { |
10229
|
0
|
0
|
|
|
|
|
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
10230
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF0) { |
10231
|
0
|
0
|
|
|
|
|
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
10232
|
0
|
0
|
|
|
|
|
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
10233
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF8) { |
10234
|
0
|
0
|
|
|
|
|
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
10235
|
0
|
0
|
|
|
|
|
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
10236
|
0
|
0
|
|
|
|
|
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
10237
|
|
|
|
|
|
|
} else return false; |
10238
|
|
|
|
|
|
|
} |
10239
|
|
|
|
|
|
|
return true; |
10240
|
|
|
|
|
|
|
} |
10241
|
|
|
|
|
|
|
|
10242
|
0
|
|
|
|
|
|
bool utf8::valid(const char* str, size_t len) { |
10243
|
0
|
0
|
|
|
|
|
for (; len > 0; str++, len--) |
10244
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) >= 0x80) { |
10245
|
0
|
0
|
|
|
|
|
if (((unsigned char)*str) < 0xC0) return false; |
10246
|
0
|
0
|
|
|
|
|
else if (((unsigned char)*str) < 0xE0) { |
10247
|
0
|
0
|
|
|
|
|
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
10248
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF0) { |
10249
|
0
|
0
|
|
|
|
|
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
10250
|
0
|
0
|
|
|
|
|
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
10251
|
0
|
0
|
|
|
|
|
} else if (((unsigned char)*str) < 0xF8) { |
10252
|
0
|
0
|
|
|
|
|
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
10253
|
0
|
0
|
|
|
|
|
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
10254
|
0
|
0
|
|
|
|
|
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
10255
|
|
|
|
|
|
|
} else return false; |
10256
|
|
|
|
|
|
|
} |
10257
|
|
|
|
|
|
|
return true; |
10258
|
|
|
|
|
|
|
} |
10259
|
|
|
|
|
|
|
|
10260
|
0
|
|
|
|
|
|
void utf8::decode(const char* str, std::u32string& decoded) { |
10261
|
|
|
|
|
|
|
decoded.clear(); |
10262
|
|
|
|
|
|
|
|
10263
|
0
|
0
|
|
|
|
|
for (char32_t chr; (chr = decode(str)); ) |
10264
|
0
|
|
|
|
|
|
decoded.push_back(chr); |
10265
|
0
|
|
|
|
|
|
} |
10266
|
|
|
|
|
|
|
|
10267
|
0
|
|
|
|
|
|
void utf8::decode(const char* str, size_t len, std::u32string& decoded) { |
10268
|
|
|
|
|
|
|
decoded.clear(); |
10269
|
|
|
|
|
|
|
|
10270
|
0
|
0
|
|
|
|
|
while (len) |
10271
|
0
|
|
|
|
|
|
decoded.push_back(decode(str, len)); |
10272
|
0
|
|
|
|
|
|
} |
10273
|
|
|
|
|
|
|
|
10274
|
0
|
|
|
|
|
|
void utf8::encode(const std::u32string& str, std::string& encoded) { |
10275
|
|
|
|
|
|
|
encoded.clear(); |
10276
|
|
|
|
|
|
|
|
10277
|
0
|
0
|
|
|
|
|
for (auto&& chr : str) |
10278
|
0
|
|
|
|
|
|
append(encoded, chr); |
10279
|
0
|
|
|
|
|
|
} |
10280
|
|
|
|
|
|
|
|
10281
|
|
|
|
|
|
|
const char utf8::REPLACEMENT_CHAR; |
10282
|
|
|
|
|
|
|
|
10283
|
|
|
|
|
|
|
} // namespace unilib |
10284
|
|
|
|
|
|
|
|
10285
|
|
|
|
|
|
|
///////// |
10286
|
|
|
|
|
|
|
// File: unilib/version.h |
10287
|
|
|
|
|
|
|
///////// |
10288
|
|
|
|
|
|
|
|
10289
|
|
|
|
|
|
|
// This file is part of UniLib . |
10290
|
|
|
|
|
|
|
// |
10291
|
|
|
|
|
|
|
// Copyright 2014 Institute of Formal and Applied Linguistics, Faculty of |
10292
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
10293
|
|
|
|
|
|
|
// |
10294
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
10295
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
10296
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
10297
|
|
|
|
|
|
|
// |
10298
|
|
|
|
|
|
|
// UniLib version: 3.2.0 |
10299
|
|
|
|
|
|
|
// Unicode version: 13.0.0 |
10300
|
|
|
|
|
|
|
|
10301
|
|
|
|
|
|
|
namespace unilib { |
10302
|
|
|
|
|
|
|
|
10303
|
0
|
|
|
|
|
|
struct version { |
10304
|
|
|
|
|
|
|
unsigned major; |
10305
|
|
|
|
|
|
|
unsigned minor; |
10306
|
|
|
|
|
|
|
unsigned patch; |
10307
|
|
|
|
|
|
|
std::string prerelease; |
10308
|
|
|
|
|
|
|
|
10309
|
|
|
|
|
|
|
// Returns current version. |
10310
|
|
|
|
|
|
|
static version current(); |
10311
|
|
|
|
|
|
|
}; |
10312
|
|
|
|
|
|
|
|
10313
|
|
|
|
|
|
|
} // namespace unilib |
10314
|
|
|
|
|
|
|
|
10315
|
|
|
|
|
|
|
///////// |
10316
|
|
|
|
|
|
|
// File: unilib/version.cpp |
10317
|
|
|
|
|
|
|
///////// |
10318
|
|
|
|
|
|
|
|
10319
|
|
|
|
|
|
|
// This file is part of UniLib . |
10320
|
|
|
|
|
|
|
// |
10321
|
|
|
|
|
|
|
// Copyright 2014 Institute of Formal and Applied Linguistics, Faculty of |
10322
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
10323
|
|
|
|
|
|
|
// |
10324
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
10325
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
10326
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
10327
|
|
|
|
|
|
|
// |
10328
|
|
|
|
|
|
|
// UniLib version: 3.2.0 |
10329
|
|
|
|
|
|
|
// Unicode version: 13.0.0 |
10330
|
|
|
|
|
|
|
|
10331
|
|
|
|
|
|
|
namespace unilib { |
10332
|
|
|
|
|
|
|
|
10333
|
|
|
|
|
|
|
// Returns current version. |
10334
|
0
|
|
|
|
|
|
version version::current() { |
10335
|
0
|
0
|
|
|
|
|
return {3, 2, 0, ""}; |
10336
|
|
|
|
|
|
|
} |
10337
|
|
|
|
|
|
|
|
10338
|
|
|
|
|
|
|
} // namespace unilib |
10339
|
|
|
|
|
|
|
|
10340
|
|
|
|
|
|
|
///////// |
10341
|
|
|
|
|
|
|
// File: utils/compressor_load.cpp |
10342
|
|
|
|
|
|
|
///////// |
10343
|
|
|
|
|
|
|
|
10344
|
|
|
|
|
|
|
// This file is part of UFAL C++ Utils . |
10345
|
|
|
|
|
|
|
// |
10346
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
10347
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
10348
|
|
|
|
|
|
|
// |
10349
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
10350
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
10351
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
10352
|
|
|
|
|
|
|
|
10353
|
|
|
|
|
|
|
namespace utils { |
10354
|
|
|
|
|
|
|
|
10355
|
|
|
|
|
|
|
// Start of LZMA compression library by Igor Pavlov |
10356
|
|
|
|
|
|
|
namespace lzma { |
10357
|
|
|
|
|
|
|
|
10358
|
|
|
|
|
|
|
// Types.h -- Basic types |
10359
|
|
|
|
|
|
|
// 2010-10-09 : Igor Pavlov : Public domain |
10360
|
|
|
|
|
|
|
#ifndef UFAL_CPPUTILS_COMPRESSOR_LZMA_TYPES_H |
10361
|
|
|
|
|
|
|
#define UFAL_CPPUTILS_COMPRESSOR_LZMA_TYPES_H |
10362
|
|
|
|
|
|
|
|
10363
|
|
|
|
|
|
|
#define SZ_OK 0 |
10364
|
|
|
|
|
|
|
|
10365
|
|
|
|
|
|
|
#define SZ_ERROR_DATA 1 |
10366
|
|
|
|
|
|
|
#define SZ_ERROR_MEM 2 |
10367
|
|
|
|
|
|
|
#define SZ_ERROR_CRC 3 |
10368
|
|
|
|
|
|
|
#define SZ_ERROR_UNSUPPORTED 4 |
10369
|
|
|
|
|
|
|
#define SZ_ERROR_PARAM 5 |
10370
|
|
|
|
|
|
|
#define SZ_ERROR_INPUT_EOF 6 |
10371
|
|
|
|
|
|
|
#define SZ_ERROR_OUTPUT_EOF 7 |
10372
|
|
|
|
|
|
|
#define SZ_ERROR_READ 8 |
10373
|
|
|
|
|
|
|
#define SZ_ERROR_WRITE 9 |
10374
|
|
|
|
|
|
|
#define SZ_ERROR_PROGRESS 10 |
10375
|
|
|
|
|
|
|
#define SZ_ERROR_FAIL 11 |
10376
|
|
|
|
|
|
|
#define SZ_ERROR_THREAD 12 |
10377
|
|
|
|
|
|
|
|
10378
|
|
|
|
|
|
|
#define SZ_ERROR_ARCHIVE 16 |
10379
|
|
|
|
|
|
|
#define SZ_ERROR_NO_ARCHIVE 17 |
10380
|
|
|
|
|
|
|
|
10381
|
|
|
|
|
|
|
typedef int SRes; |
10382
|
|
|
|
|
|
|
|
10383
|
|
|
|
|
|
|
#ifndef RINOK |
10384
|
|
|
|
|
|
|
#define RINOK(x) { int __result__ = (x); if (__result__ != 0) return __result__; } |
10385
|
|
|
|
|
|
|
#endif |
10386
|
|
|
|
|
|
|
|
10387
|
|
|
|
|
|
|
/* The following interfaces use first parameter as pointer to structure */ |
10388
|
|
|
|
|
|
|
|
10389
|
|
|
|
|
|
|
struct IByteIn |
10390
|
|
|
|
|
|
|
{ |
10391
|
|
|
|
|
|
|
uint8_t (*Read)(void *p); /* reads one byte, returns 0 in case of EOF or error */ |
10392
|
|
|
|
|
|
|
}; |
10393
|
|
|
|
|
|
|
|
10394
|
|
|
|
|
|
|
struct IByteOut |
10395
|
|
|
|
|
|
|
{ |
10396
|
|
|
|
|
|
|
void (*Write)(void *p, uint8_t b); |
10397
|
|
|
|
|
|
|
}; |
10398
|
|
|
|
|
|
|
|
10399
|
|
|
|
|
|
|
struct ISeqInStream |
10400
|
|
|
|
|
|
|
{ |
10401
|
|
|
|
|
|
|
SRes (*Read)(void *p, void *buf, size_t *size); |
10402
|
|
|
|
|
|
|
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. |
10403
|
|
|
|
|
|
|
(output(*size) < input(*size)) is allowed */ |
10404
|
|
|
|
|
|
|
}; |
10405
|
|
|
|
|
|
|
|
10406
|
|
|
|
|
|
|
/* it can return SZ_ERROR_INPUT_EOF */ |
10407
|
|
|
|
|
|
|
SRes SeqInStream_Read(ISeqInStream *stream, void *buf, size_t size); |
10408
|
|
|
|
|
|
|
SRes SeqInStream_Read2(ISeqInStream *stream, void *buf, size_t size, SRes errorType); |
10409
|
|
|
|
|
|
|
SRes SeqInStream_ReadByte(ISeqInStream *stream, uint8_t *buf); |
10410
|
|
|
|
|
|
|
|
10411
|
|
|
|
|
|
|
struct ISeqOutStream |
10412
|
|
|
|
|
|
|
{ |
10413
|
|
|
|
|
|
|
size_t (*Write)(void *p, const void *buf, size_t size); |
10414
|
|
|
|
|
|
|
/* Returns: result - the number of actually written bytes. |
10415
|
|
|
|
|
|
|
(result < size) means error */ |
10416
|
|
|
|
|
|
|
}; |
10417
|
|
|
|
|
|
|
|
10418
|
|
|
|
|
|
|
enum ESzSeek |
10419
|
|
|
|
|
|
|
{ |
10420
|
|
|
|
|
|
|
SZ_SEEK_SET = 0, |
10421
|
|
|
|
|
|
|
SZ_SEEK_CUR = 1, |
10422
|
|
|
|
|
|
|
SZ_SEEK_END = 2 |
10423
|
|
|
|
|
|
|
}; |
10424
|
|
|
|
|
|
|
|
10425
|
|
|
|
|
|
|
struct ISeekInStream |
10426
|
|
|
|
|
|
|
{ |
10427
|
|
|
|
|
|
|
SRes (*Read)(void *p, void *buf, size_t *size); /* same as ISeqInStream::Read */ |
10428
|
|
|
|
|
|
|
SRes (*Seek)(void *p, int64_t *pos, ESzSeek origin); |
10429
|
|
|
|
|
|
|
}; |
10430
|
|
|
|
|
|
|
|
10431
|
|
|
|
|
|
|
struct ILookInStream |
10432
|
|
|
|
|
|
|
{ |
10433
|
|
|
|
|
|
|
SRes (*Look)(void *p, const void **buf, size_t *size); |
10434
|
|
|
|
|
|
|
/* if (input(*size) != 0 && output(*size) == 0) means end_of_stream. |
10435
|
|
|
|
|
|
|
(output(*size) > input(*size)) is not allowed |
10436
|
|
|
|
|
|
|
(output(*size) < input(*size)) is allowed */ |
10437
|
|
|
|
|
|
|
SRes (*Skip)(void *p, size_t offset); |
10438
|
|
|
|
|
|
|
/* offset must be <= output(*size) of Look */ |
10439
|
|
|
|
|
|
|
|
10440
|
|
|
|
|
|
|
SRes (*Read)(void *p, void *buf, size_t *size); |
10441
|
|
|
|
|
|
|
/* reads directly (without buffer). It's same as ISeqInStream::Read */ |
10442
|
|
|
|
|
|
|
SRes (*Seek)(void *p, int64_t *pos, ESzSeek origin); |
10443
|
|
|
|
|
|
|
}; |
10444
|
|
|
|
|
|
|
|
10445
|
|
|
|
|
|
|
SRes LookInStream_LookRead(ILookInStream *stream, void *buf, size_t *size); |
10446
|
|
|
|
|
|
|
SRes LookInStream_SeekTo(ILookInStream *stream, uint64_t offset); |
10447
|
|
|
|
|
|
|
|
10448
|
|
|
|
|
|
|
/* reads via ILookInStream::Read */ |
10449
|
|
|
|
|
|
|
SRes LookInStream_Read2(ILookInStream *stream, void *buf, size_t size, SRes errorType); |
10450
|
|
|
|
|
|
|
SRes LookInStream_Read(ILookInStream *stream, void *buf, size_t size); |
10451
|
|
|
|
|
|
|
|
10452
|
|
|
|
|
|
|
#define LookToRead_BUF_SIZE (1 << 14) |
10453
|
|
|
|
|
|
|
|
10454
|
|
|
|
|
|
|
struct CLookToRead |
10455
|
|
|
|
|
|
|
{ |
10456
|
|
|
|
|
|
|
ILookInStream s; |
10457
|
|
|
|
|
|
|
ISeekInStream *realStream; |
10458
|
|
|
|
|
|
|
size_t pos; |
10459
|
|
|
|
|
|
|
size_t size; |
10460
|
|
|
|
|
|
|
uint8_t buf[LookToRead_BUF_SIZE]; |
10461
|
|
|
|
|
|
|
}; |
10462
|
|
|
|
|
|
|
|
10463
|
|
|
|
|
|
|
void LookToRead_CreateVTable(CLookToRead *p, int lookahead); |
10464
|
|
|
|
|
|
|
void LookToRead_Init(CLookToRead *p); |
10465
|
|
|
|
|
|
|
|
10466
|
|
|
|
|
|
|
struct CSecToLook |
10467
|
|
|
|
|
|
|
{ |
10468
|
|
|
|
|
|
|
ISeqInStream s; |
10469
|
|
|
|
|
|
|
ILookInStream *realStream; |
10470
|
|
|
|
|
|
|
}; |
10471
|
|
|
|
|
|
|
|
10472
|
|
|
|
|
|
|
void SecToLook_CreateVTable(CSecToLook *p); |
10473
|
|
|
|
|
|
|
|
10474
|
|
|
|
|
|
|
struct CSecToRead |
10475
|
|
|
|
|
|
|
{ |
10476
|
|
|
|
|
|
|
ISeqInStream s; |
10477
|
|
|
|
|
|
|
ILookInStream *realStream; |
10478
|
|
|
|
|
|
|
}; |
10479
|
|
|
|
|
|
|
|
10480
|
|
|
|
|
|
|
void SecToRead_CreateVTable(CSecToRead *p); |
10481
|
|
|
|
|
|
|
|
10482
|
|
|
|
|
|
|
struct ICompressProgress |
10483
|
|
|
|
|
|
|
{ |
10484
|
|
|
|
|
|
|
SRes (*Progress)(void *p, uint64_t inSize, uint64_t outSize); |
10485
|
|
|
|
|
|
|
/* Returns: result. (result != SZ_OK) means break. |
10486
|
|
|
|
|
|
|
Value (uint64_t)(int64_t)-1 for size means unknown value. */ |
10487
|
|
|
|
|
|
|
}; |
10488
|
|
|
|
|
|
|
|
10489
|
|
|
|
|
|
|
struct ISzAlloc |
10490
|
|
|
|
|
|
|
{ |
10491
|
|
|
|
|
|
|
void *(*Alloc)(void *p, size_t size); |
10492
|
|
|
|
|
|
|
void (*Free)(void *p, void *address); /* address can be 0 */ |
10493
|
|
|
|
|
|
|
}; |
10494
|
|
|
|
|
|
|
|
10495
|
|
|
|
|
|
|
#define IAlloc_Alloc(p, size) (p)->Alloc((p), size) |
10496
|
|
|
|
|
|
|
#define IAlloc_Free(p, a) (p)->Free((p), a) |
10497
|
|
|
|
|
|
|
|
10498
|
|
|
|
|
|
|
#endif // UFAL_CPPUTILS_COMPRESSOR_LZMA_TYPES_H |
10499
|
|
|
|
|
|
|
|
10500
|
|
|
|
|
|
|
// LzmaDec.h -- LZMA Decoder |
10501
|
|
|
|
|
|
|
// 2009-02-07 : Igor Pavlov : Public domain |
10502
|
|
|
|
|
|
|
|
10503
|
|
|
|
|
|
|
/* #define _LZMA_PROB32 */ |
10504
|
|
|
|
|
|
|
/* _LZMA_PROB32 can increase the speed on some CPUs, |
10505
|
|
|
|
|
|
|
but memory usage for CLzmaDec::probs will be doubled in that case */ |
10506
|
|
|
|
|
|
|
|
10507
|
|
|
|
|
|
|
#ifdef _LZMA_PROB32 |
10508
|
|
|
|
|
|
|
#define CLzmaProb uint32_t |
10509
|
|
|
|
|
|
|
#else |
10510
|
|
|
|
|
|
|
#define CLzmaProb uint16_t |
10511
|
|
|
|
|
|
|
#endif |
10512
|
|
|
|
|
|
|
|
10513
|
|
|
|
|
|
|
/* ---------- LZMA Properties ---------- */ |
10514
|
|
|
|
|
|
|
|
10515
|
|
|
|
|
|
|
#define LZMA_PROPS_SIZE 5 |
10516
|
|
|
|
|
|
|
|
10517
|
|
|
|
|
|
|
struct CLzmaProps |
10518
|
|
|
|
|
|
|
{ |
10519
|
|
|
|
|
|
|
unsigned lc, lp, pb; |
10520
|
|
|
|
|
|
|
uint32_t dicSize; |
10521
|
|
|
|
|
|
|
}; |
10522
|
|
|
|
|
|
|
|
10523
|
|
|
|
|
|
|
/* LzmaProps_Decode - decodes properties |
10524
|
|
|
|
|
|
|
Returns: |
10525
|
|
|
|
|
|
|
SZ_OK |
10526
|
|
|
|
|
|
|
SZ_ERROR_UNSUPPORTED - Unsupported properties |
10527
|
|
|
|
|
|
|
*/ |
10528
|
|
|
|
|
|
|
|
10529
|
|
|
|
|
|
|
SRes LzmaProps_Decode(CLzmaProps *p, const uint8_t *data, unsigned size); |
10530
|
|
|
|
|
|
|
|
10531
|
|
|
|
|
|
|
/* ---------- LZMA Decoder state ---------- */ |
10532
|
|
|
|
|
|
|
|
10533
|
|
|
|
|
|
|
/* LZMA_REQUIRED_INPUT_MAX = number of required input bytes for worst case. |
10534
|
|
|
|
|
|
|
Num bits = log2((2^11 / 31) ^ 22) + 26 < 134 + 26 = 160; */ |
10535
|
|
|
|
|
|
|
|
10536
|
|
|
|
|
|
|
#define LZMA_REQUIRED_INPUT_MAX 20 |
10537
|
|
|
|
|
|
|
|
10538
|
|
|
|
|
|
|
struct CLzmaDec |
10539
|
|
|
|
|
|
|
{ |
10540
|
|
|
|
|
|
|
CLzmaProps prop; |
10541
|
|
|
|
|
|
|
CLzmaProb *probs; |
10542
|
|
|
|
|
|
|
uint8_t *dic; |
10543
|
|
|
|
|
|
|
const uint8_t *buf; |
10544
|
|
|
|
|
|
|
uint32_t range, code; |
10545
|
|
|
|
|
|
|
size_t dicPos; |
10546
|
|
|
|
|
|
|
size_t dicBufSize; |
10547
|
|
|
|
|
|
|
uint32_t processedPos; |
10548
|
|
|
|
|
|
|
uint32_t checkDicSize; |
10549
|
|
|
|
|
|
|
unsigned state; |
10550
|
|
|
|
|
|
|
uint32_t reps[4]; |
10551
|
|
|
|
|
|
|
unsigned remainLen; |
10552
|
|
|
|
|
|
|
int needFlush; |
10553
|
|
|
|
|
|
|
int needInitState; |
10554
|
|
|
|
|
|
|
uint32_t numProbs; |
10555
|
|
|
|
|
|
|
unsigned tempBufSize; |
10556
|
|
|
|
|
|
|
uint8_t tempBuf[LZMA_REQUIRED_INPUT_MAX]; |
10557
|
|
|
|
|
|
|
}; |
10558
|
|
|
|
|
|
|
|
10559
|
|
|
|
|
|
|
#define LzmaDec_Construct(p) { (p)->dic = 0; (p)->probs = 0; } |
10560
|
|
|
|
|
|
|
|
10561
|
|
|
|
|
|
|
void LzmaDec_Init(CLzmaDec *p); |
10562
|
|
|
|
|
|
|
|
10563
|
|
|
|
|
|
|
/* There are two types of LZMA streams: |
10564
|
|
|
|
|
|
|
0) Stream with end mark. That end mark adds about 6 bytes to compressed size. |
10565
|
|
|
|
|
|
|
1) Stream without end mark. You must know exact uncompressed size to decompress such stream. */ |
10566
|
|
|
|
|
|
|
|
10567
|
|
|
|
|
|
|
enum ELzmaFinishMode |
10568
|
|
|
|
|
|
|
{ |
10569
|
|
|
|
|
|
|
LZMA_FINISH_ANY, /* finish at any point */ |
10570
|
|
|
|
|
|
|
LZMA_FINISH_END /* block must be finished at the end */ |
10571
|
|
|
|
|
|
|
}; |
10572
|
|
|
|
|
|
|
|
10573
|
|
|
|
|
|
|
/* ELzmaFinishMode has meaning only if the decoding reaches output limit !!! |
10574
|
|
|
|
|
|
|
|
10575
|
|
|
|
|
|
|
You must use LZMA_FINISH_END, when you know that current output buffer |
10576
|
|
|
|
|
|
|
covers last bytes of block. In other cases you must use LZMA_FINISH_ANY. |
10577
|
|
|
|
|
|
|
|
10578
|
|
|
|
|
|
|
If LZMA decoder sees end marker before reaching output limit, it returns SZ_OK, |
10579
|
|
|
|
|
|
|
and output value of destLen will be less than output buffer size limit. |
10580
|
|
|
|
|
|
|
You can check status result also. |
10581
|
|
|
|
|
|
|
|
10582
|
|
|
|
|
|
|
You can use multiple checks to test data integrity after full decompression: |
10583
|
|
|
|
|
|
|
1) Check Result and "status" variable. |
10584
|
|
|
|
|
|
|
2) Check that output(destLen) = uncompressedSize, if you know real uncompressedSize. |
10585
|
|
|
|
|
|
|
3) Check that output(srcLen) = compressedSize, if you know real compressedSize. |
10586
|
|
|
|
|
|
|
You must use correct finish mode in that case. */ |
10587
|
|
|
|
|
|
|
|
10588
|
|
|
|
|
|
|
enum ELzmaStatus |
10589
|
|
|
|
|
|
|
{ |
10590
|
|
|
|
|
|
|
LZMA_STATUS_NOT_SPECIFIED, /* use main error code instead */ |
10591
|
|
|
|
|
|
|
LZMA_STATUS_FINISHED_WITH_MARK, /* stream was finished with end mark. */ |
10592
|
|
|
|
|
|
|
LZMA_STATUS_NOT_FINISHED, /* stream was not finished */ |
10593
|
|
|
|
|
|
|
LZMA_STATUS_NEEDS_MORE_INPUT, /* you must provide more input bytes */ |
10594
|
|
|
|
|
|
|
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK /* there is probability that stream was finished without end mark */ |
10595
|
|
|
|
|
|
|
}; |
10596
|
|
|
|
|
|
|
|
10597
|
|
|
|
|
|
|
/* ELzmaStatus is used only as output value for function call */ |
10598
|
|
|
|
|
|
|
|
10599
|
|
|
|
|
|
|
/* ---------- Interfaces ---------- */ |
10600
|
|
|
|
|
|
|
|
10601
|
|
|
|
|
|
|
/* There are 3 levels of interfaces: |
10602
|
|
|
|
|
|
|
1) Dictionary Interface |
10603
|
|
|
|
|
|
|
2) Buffer Interface |
10604
|
|
|
|
|
|
|
3) One Call Interface |
10605
|
|
|
|
|
|
|
You can select any of these interfaces, but don't mix functions from different |
10606
|
|
|
|
|
|
|
groups for same object. */ |
10607
|
|
|
|
|
|
|
|
10608
|
|
|
|
|
|
|
/* There are two variants to allocate state for Dictionary Interface: |
10609
|
|
|
|
|
|
|
1) LzmaDec_Allocate / LzmaDec_Free |
10610
|
|
|
|
|
|
|
2) LzmaDec_AllocateProbs / LzmaDec_FreeProbs |
10611
|
|
|
|
|
|
|
You can use variant 2, if you set dictionary buffer manually. |
10612
|
|
|
|
|
|
|
For Buffer Interface you must always use variant 1. |
10613
|
|
|
|
|
|
|
|
10614
|
|
|
|
|
|
|
LzmaDec_Allocate* can return: |
10615
|
|
|
|
|
|
|
SZ_OK |
10616
|
|
|
|
|
|
|
SZ_ERROR_MEM - Memory allocation error |
10617
|
|
|
|
|
|
|
SZ_ERROR_UNSUPPORTED - Unsupported properties |
10618
|
|
|
|
|
|
|
*/ |
10619
|
|
|
|
|
|
|
|
10620
|
|
|
|
|
|
|
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const uint8_t *props, unsigned propsSize, ISzAlloc *alloc); |
10621
|
|
|
|
|
|
|
void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc); |
10622
|
|
|
|
|
|
|
|
10623
|
|
|
|
|
|
|
SRes LzmaDec_Allocate(CLzmaDec *state, const uint8_t *prop, unsigned propsSize, ISzAlloc *alloc); |
10624
|
|
|
|
|
|
|
void LzmaDec_Free(CLzmaDec *state, ISzAlloc *alloc); |
10625
|
|
|
|
|
|
|
|
10626
|
|
|
|
|
|
|
/* ---------- Dictionary Interface ---------- */ |
10627
|
|
|
|
|
|
|
|
10628
|
|
|
|
|
|
|
/* You can use it, if you want to eliminate the overhead for data copying from |
10629
|
|
|
|
|
|
|
dictionary to some other external buffer. |
10630
|
|
|
|
|
|
|
You must work with CLzmaDec variables directly in this interface. |
10631
|
|
|
|
|
|
|
|
10632
|
|
|
|
|
|
|
STEPS: |
10633
|
|
|
|
|
|
|
LzmaDec_Constr() |
10634
|
|
|
|
|
|
|
LzmaDec_Allocate() |
10635
|
|
|
|
|
|
|
for (each new stream) |
10636
|
|
|
|
|
|
|
{ |
10637
|
|
|
|
|
|
|
LzmaDec_Init() |
10638
|
|
|
|
|
|
|
while (it needs more decompression) |
10639
|
|
|
|
|
|
|
{ |
10640
|
|
|
|
|
|
|
LzmaDec_DecodeToDic() |
10641
|
|
|
|
|
|
|
use data from CLzmaDec::dic and update CLzmaDec::dicPos |
10642
|
|
|
|
|
|
|
} |
10643
|
|
|
|
|
|
|
} |
10644
|
|
|
|
|
|
|
LzmaDec_Free() |
10645
|
|
|
|
|
|
|
*/ |
10646
|
|
|
|
|
|
|
|
10647
|
|
|
|
|
|
|
/* LzmaDec_DecodeToDic |
10648
|
|
|
|
|
|
|
|
10649
|
|
|
|
|
|
|
The decoding to internal dictionary buffer (CLzmaDec::dic). |
10650
|
|
|
|
|
|
|
You must manually update CLzmaDec::dicPos, if it reaches CLzmaDec::dicBufSize !!! |
10651
|
|
|
|
|
|
|
|
10652
|
|
|
|
|
|
|
finishMode: |
10653
|
|
|
|
|
|
|
It has meaning only if the decoding reaches output limit (dicLimit). |
10654
|
|
|
|
|
|
|
LZMA_FINISH_ANY - Decode just dicLimit bytes. |
10655
|
|
|
|
|
|
|
LZMA_FINISH_END - Stream must be finished after dicLimit. |
10656
|
|
|
|
|
|
|
|
10657
|
|
|
|
|
|
|
Returns: |
10658
|
|
|
|
|
|
|
SZ_OK |
10659
|
|
|
|
|
|
|
status: |
10660
|
|
|
|
|
|
|
LZMA_STATUS_FINISHED_WITH_MARK |
10661
|
|
|
|
|
|
|
LZMA_STATUS_NOT_FINISHED |
10662
|
|
|
|
|
|
|
LZMA_STATUS_NEEDS_MORE_INPUT |
10663
|
|
|
|
|
|
|
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK |
10664
|
|
|
|
|
|
|
SZ_ERROR_DATA - Data error |
10665
|
|
|
|
|
|
|
*/ |
10666
|
|
|
|
|
|
|
|
10667
|
|
|
|
|
|
|
SRes LzmaDec_DecodeToDic(CLzmaDec *p, size_t dicLimit, |
10668
|
|
|
|
|
|
|
const uint8_t *src, size_t *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); |
10669
|
|
|
|
|
|
|
|
10670
|
|
|
|
|
|
|
/* ---------- Buffer Interface ---------- */ |
10671
|
|
|
|
|
|
|
|
10672
|
|
|
|
|
|
|
/* It's zlib-like interface. |
10673
|
|
|
|
|
|
|
See LzmaDec_DecodeToDic description for information about STEPS and return results, |
10674
|
|
|
|
|
|
|
but you must use LzmaDec_DecodeToBuf instead of LzmaDec_DecodeToDic and you don't need |
10675
|
|
|
|
|
|
|
to work with CLzmaDec variables manually. |
10676
|
|
|
|
|
|
|
|
10677
|
|
|
|
|
|
|
finishMode: |
10678
|
|
|
|
|
|
|
It has meaning only if the decoding reaches output limit (*destLen). |
10679
|
|
|
|
|
|
|
LZMA_FINISH_ANY - Decode just destLen bytes. |
10680
|
|
|
|
|
|
|
LZMA_FINISH_END - Stream must be finished after (*destLen). |
10681
|
|
|
|
|
|
|
*/ |
10682
|
|
|
|
|
|
|
|
10683
|
|
|
|
|
|
|
SRes LzmaDec_DecodeToBuf(CLzmaDec *p, uint8_t *dest, size_t *destLen, |
10684
|
|
|
|
|
|
|
const uint8_t *src, size_t *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status); |
10685
|
|
|
|
|
|
|
|
10686
|
|
|
|
|
|
|
/* ---------- One Call Interface ---------- */ |
10687
|
|
|
|
|
|
|
|
10688
|
|
|
|
|
|
|
/* LzmaDecode |
10689
|
|
|
|
|
|
|
|
10690
|
|
|
|
|
|
|
finishMode: |
10691
|
|
|
|
|
|
|
It has meaning only if the decoding reaches output limit (*destLen). |
10692
|
|
|
|
|
|
|
LZMA_FINISH_ANY - Decode just destLen bytes. |
10693
|
|
|
|
|
|
|
LZMA_FINISH_END - Stream must be finished after (*destLen). |
10694
|
|
|
|
|
|
|
|
10695
|
|
|
|
|
|
|
Returns: |
10696
|
|
|
|
|
|
|
SZ_OK |
10697
|
|
|
|
|
|
|
status: |
10698
|
|
|
|
|
|
|
LZMA_STATUS_FINISHED_WITH_MARK |
10699
|
|
|
|
|
|
|
LZMA_STATUS_NOT_FINISHED |
10700
|
|
|
|
|
|
|
LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK |
10701
|
|
|
|
|
|
|
SZ_ERROR_DATA - Data error |
10702
|
|
|
|
|
|
|
SZ_ERROR_MEM - Memory allocation error |
10703
|
|
|
|
|
|
|
SZ_ERROR_UNSUPPORTED - Unsupported properties |
10704
|
|
|
|
|
|
|
SZ_ERROR_INPUT_EOF - It needs more bytes in input buffer (src). |
10705
|
|
|
|
|
|
|
*/ |
10706
|
|
|
|
|
|
|
|
10707
|
|
|
|
|
|
|
SRes LzmaDecode(uint8_t *dest, size_t *destLen, const uint8_t *src, size_t *srcLen, |
10708
|
|
|
|
|
|
|
const uint8_t *propData, unsigned propSize, ELzmaFinishMode finishMode, |
10709
|
|
|
|
|
|
|
ELzmaStatus *status, ISzAlloc *alloc); |
10710
|
|
|
|
|
|
|
|
10711
|
|
|
|
|
|
|
// LzmaDec.c -- LZMA Decoder |
10712
|
|
|
|
|
|
|
// 2009-09-20 : Igor Pavlov : Public domain |
10713
|
|
|
|
|
|
|
|
10714
|
|
|
|
|
|
|
#define kNumTopBits 24 |
10715
|
|
|
|
|
|
|
#define kTopValue ((uint32_t)1 << kNumTopBits) |
10716
|
|
|
|
|
|
|
|
10717
|
|
|
|
|
|
|
#define kNumBitModelTotalBits 11 |
10718
|
|
|
|
|
|
|
#define kBitModelTotal (1 << kNumBitModelTotalBits) |
10719
|
|
|
|
|
|
|
#define kNumMoveBits 5 |
10720
|
|
|
|
|
|
|
|
10721
|
|
|
|
|
|
|
#define RC_INIT_SIZE 5 |
10722
|
|
|
|
|
|
|
|
10723
|
|
|
|
|
|
|
#define NORMALIZE if (range < kTopValue) { range <<= 8; code = (code << 8) | (*buf++); } |
10724
|
|
|
|
|
|
|
|
10725
|
|
|
|
|
|
|
#define IF_BIT_0(p) ttt = *(p); NORMALIZE; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound) |
10726
|
|
|
|
|
|
|
#define UPDATE_0(p) range = bound; *(p) = (CLzmaProb)(ttt + ((kBitModelTotal - ttt) >> kNumMoveBits)); |
10727
|
|
|
|
|
|
|
#define UPDATE_1(p) range -= bound; code -= bound; *(p) = (CLzmaProb)(ttt - (ttt >> kNumMoveBits)); |
10728
|
|
|
|
|
|
|
#define GET_BIT2(p, i, A0, A1) IF_BIT_0(p) \ |
10729
|
|
|
|
|
|
|
{ UPDATE_0(p); i = (i + i); A0; } else \ |
10730
|
|
|
|
|
|
|
{ UPDATE_1(p); i = (i + i) + 1; A1; } |
10731
|
|
|
|
|
|
|
#define GET_BIT(p, i) GET_BIT2(p, i, ; , ;) |
10732
|
|
|
|
|
|
|
|
10733
|
|
|
|
|
|
|
#define TREE_GET_BIT(probs, i) { GET_BIT((probs + i), i); } |
10734
|
|
|
|
|
|
|
#define TREE_DECODE(probs, limit, i) \ |
10735
|
|
|
|
|
|
|
{ i = 1; do { TREE_GET_BIT(probs, i); } while (i < limit); i -= limit; } |
10736
|
|
|
|
|
|
|
|
10737
|
|
|
|
|
|
|
/* #define _LZMA_SIZE_OPT */ |
10738
|
|
|
|
|
|
|
|
10739
|
|
|
|
|
|
|
#ifdef _LZMA_SIZE_OPT |
10740
|
|
|
|
|
|
|
#define TREE_6_DECODE(probs, i) TREE_DECODE(probs, (1 << 6), i) |
10741
|
|
|
|
|
|
|
#else |
10742
|
|
|
|
|
|
|
#define TREE_6_DECODE(probs, i) \ |
10743
|
|
|
|
|
|
|
{ i = 1; \ |
10744
|
|
|
|
|
|
|
TREE_GET_BIT(probs, i); \ |
10745
|
|
|
|
|
|
|
TREE_GET_BIT(probs, i); \ |
10746
|
|
|
|
|
|
|
TREE_GET_BIT(probs, i); \ |
10747
|
|
|
|
|
|
|
TREE_GET_BIT(probs, i); \ |
10748
|
|
|
|
|
|
|
TREE_GET_BIT(probs, i); \ |
10749
|
|
|
|
|
|
|
TREE_GET_BIT(probs, i); \ |
10750
|
|
|
|
|
|
|
i -= 0x40; } |
10751
|
|
|
|
|
|
|
#endif |
10752
|
|
|
|
|
|
|
|
10753
|
|
|
|
|
|
|
#define NORMALIZE_CHECK if (range < kTopValue) { if (buf >= bufLimit) return DUMMY_ERROR; range <<= 8; code = (code << 8) | (*buf++); } |
10754
|
|
|
|
|
|
|
|
10755
|
|
|
|
|
|
|
#define IF_BIT_0_CHECK(p) ttt = *(p); NORMALIZE_CHECK; bound = (range >> kNumBitModelTotalBits) * ttt; if (code < bound) |
10756
|
|
|
|
|
|
|
#define UPDATE_0_CHECK range = bound; |
10757
|
|
|
|
|
|
|
#define UPDATE_1_CHECK range -= bound; code -= bound; |
10758
|
|
|
|
|
|
|
#define GET_BIT2_CHECK(p, i, A0, A1) IF_BIT_0_CHECK(p) \ |
10759
|
|
|
|
|
|
|
{ UPDATE_0_CHECK; i = (i + i); A0; } else \ |
10760
|
|
|
|
|
|
|
{ UPDATE_1_CHECK; i = (i + i) + 1; A1; } |
10761
|
|
|
|
|
|
|
#define GET_BIT_CHECK(p, i) GET_BIT2_CHECK(p, i, ; , ;) |
10762
|
|
|
|
|
|
|
#define TREE_DECODE_CHECK(probs, limit, i) \ |
10763
|
|
|
|
|
|
|
{ i = 1; do { GET_BIT_CHECK(probs + i, i) } while (i < limit); i -= limit; } |
10764
|
|
|
|
|
|
|
|
10765
|
|
|
|
|
|
|
#define kNumPosBitsMax 4 |
10766
|
|
|
|
|
|
|
#define kNumPosStatesMax (1 << kNumPosBitsMax) |
10767
|
|
|
|
|
|
|
|
10768
|
|
|
|
|
|
|
#define kLenNumLowBits 3 |
10769
|
|
|
|
|
|
|
#define kLenNumLowSymbols (1 << kLenNumLowBits) |
10770
|
|
|
|
|
|
|
#define kLenNumMidBits 3 |
10771
|
|
|
|
|
|
|
#define kLenNumMidSymbols (1 << kLenNumMidBits) |
10772
|
|
|
|
|
|
|
#define kLenNumHighBits 8 |
10773
|
|
|
|
|
|
|
#define kLenNumHighSymbols (1 << kLenNumHighBits) |
10774
|
|
|
|
|
|
|
|
10775
|
|
|
|
|
|
|
#define LenChoice 0 |
10776
|
|
|
|
|
|
|
#define LenChoice2 (LenChoice + 1) |
10777
|
|
|
|
|
|
|
#define LenLow (LenChoice2 + 1) |
10778
|
|
|
|
|
|
|
#define LenMid (LenLow + (kNumPosStatesMax << kLenNumLowBits)) |
10779
|
|
|
|
|
|
|
#define LenHigh (LenMid + (kNumPosStatesMax << kLenNumMidBits)) |
10780
|
|
|
|
|
|
|
#define kNumLenProbs (LenHigh + kLenNumHighSymbols) |
10781
|
|
|
|
|
|
|
|
10782
|
|
|
|
|
|
|
#define kNumStates 12 |
10783
|
|
|
|
|
|
|
#define kNumLitStates 7 |
10784
|
|
|
|
|
|
|
|
10785
|
|
|
|
|
|
|
#define kStartPosModelIndex 4 |
10786
|
|
|
|
|
|
|
#define kEndPosModelIndex 14 |
10787
|
|
|
|
|
|
|
#define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) |
10788
|
|
|
|
|
|
|
|
10789
|
|
|
|
|
|
|
#define kNumPosSlotBits 6 |
10790
|
|
|
|
|
|
|
#define kNumLenToPosStates 4 |
10791
|
|
|
|
|
|
|
|
10792
|
|
|
|
|
|
|
#define kNumAlignBits 4 |
10793
|
|
|
|
|
|
|
#define kAlignTableSize (1 << kNumAlignBits) |
10794
|
|
|
|
|
|
|
|
10795
|
|
|
|
|
|
|
#define kMatchMinLen 2 |
10796
|
|
|
|
|
|
|
#define kMatchSpecLenStart (kMatchMinLen + kLenNumLowSymbols + kLenNumMidSymbols + kLenNumHighSymbols) |
10797
|
|
|
|
|
|
|
|
10798
|
|
|
|
|
|
|
#define IsMatch 0 |
10799
|
|
|
|
|
|
|
#define IsRep (IsMatch + (kNumStates << kNumPosBitsMax)) |
10800
|
|
|
|
|
|
|
#define IsRepG0 (IsRep + kNumStates) |
10801
|
|
|
|
|
|
|
#define IsRepG1 (IsRepG0 + kNumStates) |
10802
|
|
|
|
|
|
|
#define IsRepG2 (IsRepG1 + kNumStates) |
10803
|
|
|
|
|
|
|
#define IsRep0Long (IsRepG2 + kNumStates) |
10804
|
|
|
|
|
|
|
#define PosSlot (IsRep0Long + (kNumStates << kNumPosBitsMax)) |
10805
|
|
|
|
|
|
|
#define SpecPos (PosSlot + (kNumLenToPosStates << kNumPosSlotBits)) |
10806
|
|
|
|
|
|
|
#define Align (SpecPos + kNumFullDistances - kEndPosModelIndex) |
10807
|
|
|
|
|
|
|
#define LenCoder (Align + kAlignTableSize) |
10808
|
|
|
|
|
|
|
#define RepLenCoder (LenCoder + kNumLenProbs) |
10809
|
|
|
|
|
|
|
#define Literal (RepLenCoder + kNumLenProbs) |
10810
|
|
|
|
|
|
|
|
10811
|
|
|
|
|
|
|
#define LZMA_BASE_SIZE 1846 |
10812
|
|
|
|
|
|
|
#define LZMA_LIT_SIZE 768 |
10813
|
|
|
|
|
|
|
|
10814
|
|
|
|
|
|
|
#define LzmaProps_GetNumProbs(p) ((uint32_t)LZMA_BASE_SIZE + (LZMA_LIT_SIZE << ((p)->lc + (p)->lp))) |
10815
|
|
|
|
|
|
|
|
10816
|
|
|
|
|
|
|
#if Literal != LZMA_BASE_SIZE |
10817
|
|
|
|
|
|
|
StopCompilingDueBUG |
10818
|
|
|
|
|
|
|
#endif |
10819
|
|
|
|
|
|
|
|
10820
|
|
|
|
|
|
|
#define LZMA_DIC_MIN (1 << 12) |
10821
|
|
|
|
|
|
|
|
10822
|
|
|
|
|
|
|
/* First LZMA-symbol is always decoded. |
10823
|
|
|
|
|
|
|
And it decodes new LZMA-symbols while (buf < bufLimit), but "buf" is without last normalization |
10824
|
|
|
|
|
|
|
Out: |
10825
|
|
|
|
|
|
|
Result: |
10826
|
|
|
|
|
|
|
SZ_OK - OK |
10827
|
|
|
|
|
|
|
SZ_ERROR_DATA - Error |
10828
|
|
|
|
|
|
|
p->remainLen: |
10829
|
|
|
|
|
|
|
< kMatchSpecLenStart : normal remain |
10830
|
|
|
|
|
|
|
= kMatchSpecLenStart : finished |
10831
|
|
|
|
|
|
|
= kMatchSpecLenStart + 1 : Flush marker |
10832
|
|
|
|
|
|
|
= kMatchSpecLenStart + 2 : State Init Marker |
10833
|
|
|
|
|
|
|
*/ |
10834
|
|
|
|
|
|
|
|
10835
|
76
|
|
|
|
|
|
static int LzmaDec_DecodeReal(CLzmaDec *p, size_t limit, const uint8_t *bufLimit) |
10836
|
|
|
|
|
|
|
{ |
10837
|
76
|
|
|
|
|
|
CLzmaProb *probs = p->probs; |
10838
|
|
|
|
|
|
|
|
10839
|
76
|
|
|
|
|
|
unsigned state = p->state; |
10840
|
76
|
|
|
|
|
|
uint32_t rep0 = p->reps[0], rep1 = p->reps[1], rep2 = p->reps[2], rep3 = p->reps[3]; |
10841
|
76
|
|
|
|
|
|
unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1; |
10842
|
76
|
|
|
|
|
|
unsigned lpMask = ((unsigned)1 << (p->prop.lp)) - 1; |
10843
|
76
|
|
|
|
|
|
unsigned lc = p->prop.lc; |
10844
|
|
|
|
|
|
|
|
10845
|
76
|
|
|
|
|
|
uint8_t *dic = p->dic; |
10846
|
76
|
|
|
|
|
|
size_t dicBufSize = p->dicBufSize; |
10847
|
76
|
|
|
|
|
|
size_t dicPos = p->dicPos; |
10848
|
|
|
|
|
|
|
|
10849
|
76
|
|
|
|
|
|
uint32_t processedPos = p->processedPos; |
10850
|
76
|
|
|
|
|
|
uint32_t checkDicSize = p->checkDicSize; |
10851
|
|
|
|
|
|
|
unsigned len = 0; |
10852
|
|
|
|
|
|
|
|
10853
|
76
|
|
|
|
|
|
const uint8_t *buf = p->buf; |
10854
|
76
|
|
|
|
|
|
uint32_t range = p->range; |
10855
|
76
|
|
|
|
|
|
uint32_t code = p->code; |
10856
|
|
|
|
|
|
|
|
10857
|
|
|
|
|
|
|
do |
10858
|
|
|
|
|
|
|
{ |
10859
|
|
|
|
|
|
|
CLzmaProb *prob; |
10860
|
|
|
|
|
|
|
uint32_t bound; |
10861
|
|
|
|
|
|
|
unsigned ttt; |
10862
|
32788
|
|
|
|
|
|
unsigned posState = processedPos & pbMask; |
10863
|
|
|
|
|
|
|
|
10864
|
32788
|
|
|
|
|
|
prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; |
10865
|
32788
|
100
|
|
|
|
|
IF_BIT_0(prob) |
|
|
100
|
|
|
|
|
|
10866
|
|
|
|
|
|
|
{ |
10867
|
|
|
|
|
|
|
unsigned symbol; |
10868
|
563
|
|
|
|
|
|
UPDATE_0(prob); |
10869
|
563
|
|
|
|
|
|
prob = probs + Literal; |
10870
|
563
|
100
|
|
|
|
|
if (checkDicSize != 0 || processedPos != 0) |
10871
|
1118
|
|
|
|
|
|
prob += (LZMA_LIT_SIZE * (((processedPos & lpMask) << lc) + |
10872
|
559
|
50
|
|
|
|
|
(dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc)))); |
10873
|
|
|
|
|
|
|
|
10874
|
563
|
100
|
|
|
|
|
if (state < kNumLitStates) |
10875
|
|
|
|
|
|
|
{ |
10876
|
237
|
|
|
|
|
|
state -= (state < 4) ? state : 3; |
10877
|
|
|
|
|
|
|
symbol = 1; |
10878
|
1896
|
100
|
|
|
|
|
do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100); |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
10879
|
|
|
|
|
|
|
} |
10880
|
|
|
|
|
|
|
else |
10881
|
|
|
|
|
|
|
{ |
10882
|
326
|
50
|
|
|
|
|
unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; |
10883
|
|
|
|
|
|
|
unsigned offs = 0x100; |
10884
|
326
|
100
|
|
|
|
|
state -= (state < 10) ? 3 : 6; |
10885
|
|
|
|
|
|
|
symbol = 1; |
10886
|
|
|
|
|
|
|
do |
10887
|
|
|
|
|
|
|
{ |
10888
|
|
|
|
|
|
|
unsigned bit; |
10889
|
|
|
|
|
|
|
CLzmaProb *probLit; |
10890
|
2608
|
|
|
|
|
|
matchByte <<= 1; |
10891
|
2608
|
|
|
|
|
|
bit = (matchByte & offs); |
10892
|
2608
|
|
|
|
|
|
probLit = prob + offs + bit + symbol; |
10893
|
2608
|
100
|
|
|
|
|
GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit) |
|
|
100
|
|
|
|
|
|
10894
|
|
|
|
|
|
|
} |
10895
|
2608
|
100
|
|
|
|
|
while (symbol < 0x100); |
10896
|
|
|
|
|
|
|
} |
10897
|
563
|
|
|
|
|
|
dic[dicPos++] = (uint8_t)symbol; |
10898
|
563
|
|
|
|
|
|
processedPos++; |
10899
|
563
|
|
|
|
|
|
continue; |
10900
|
|
|
|
|
|
|
} |
10901
|
|
|
|
|
|
|
else |
10902
|
|
|
|
|
|
|
{ |
10903
|
32225
|
|
|
|
|
|
UPDATE_1(prob); |
10904
|
32225
|
|
|
|
|
|
prob = probs + IsRep + state; |
10905
|
32225
|
100
|
|
|
|
|
IF_BIT_0(prob) |
|
|
100
|
|
|
|
|
|
10906
|
|
|
|
|
|
|
{ |
10907
|
206
|
|
|
|
|
|
UPDATE_0(prob); |
10908
|
206
|
|
|
|
|
|
state += kNumStates; |
10909
|
206
|
|
|
|
|
|
prob = probs + LenCoder; |
10910
|
|
|
|
|
|
|
} |
10911
|
|
|
|
|
|
|
else |
10912
|
|
|
|
|
|
|
{ |
10913
|
32019
|
|
|
|
|
|
UPDATE_1(prob); |
10914
|
32019
|
50
|
|
|
|
|
if (checkDicSize == 0 && processedPos == 0) |
10915
|
|
|
|
|
|
|
return SZ_ERROR_DATA; |
10916
|
32019
|
|
|
|
|
|
prob = probs + IsRepG0 + state; |
10917
|
32019
|
100
|
|
|
|
|
IF_BIT_0(prob) |
|
|
100
|
|
|
|
|
|
10918
|
|
|
|
|
|
|
{ |
10919
|
31929
|
|
|
|
|
|
UPDATE_0(prob); |
10920
|
31929
|
|
|
|
|
|
prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; |
10921
|
31929
|
100
|
|
|
|
|
IF_BIT_0(prob) |
|
|
100
|
|
|
|
|
|
10922
|
|
|
|
|
|
|
{ |
10923
|
78
|
|
|
|
|
|
UPDATE_0(prob); |
10924
|
78
|
50
|
|
|
|
|
dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; |
10925
|
78
|
|
|
|
|
|
dicPos++; |
10926
|
78
|
|
|
|
|
|
processedPos++; |
10927
|
78
|
100
|
|
|
|
|
state = state < kNumLitStates ? 9 : 11; |
10928
|
|
|
|
|
|
|
continue; |
10929
|
|
|
|
|
|
|
} |
10930
|
31851
|
|
|
|
|
|
UPDATE_1(prob); |
10931
|
|
|
|
|
|
|
} |
10932
|
|
|
|
|
|
|
else |
10933
|
|
|
|
|
|
|
{ |
10934
|
|
|
|
|
|
|
uint32_t distance; |
10935
|
90
|
|
|
|
|
|
UPDATE_1(prob); |
10936
|
90
|
|
|
|
|
|
prob = probs + IsRepG1 + state; |
10937
|
90
|
100
|
|
|
|
|
IF_BIT_0(prob) |
|
|
100
|
|
|
|
|
|
10938
|
|
|
|
|
|
|
{ |
10939
|
51
|
|
|
|
|
|
UPDATE_0(prob); |
10940
|
|
|
|
|
|
|
distance = rep1; |
10941
|
|
|
|
|
|
|
} |
10942
|
|
|
|
|
|
|
else |
10943
|
|
|
|
|
|
|
{ |
10944
|
39
|
|
|
|
|
|
UPDATE_1(prob); |
10945
|
39
|
|
|
|
|
|
prob = probs + IsRepG2 + state; |
10946
|
39
|
100
|
|
|
|
|
IF_BIT_0(prob) |
|
|
100
|
|
|
|
|
|
10947
|
|
|
|
|
|
|
{ |
10948
|
25
|
|
|
|
|
|
UPDATE_0(prob); |
10949
|
|
|
|
|
|
|
distance = rep2; |
10950
|
|
|
|
|
|
|
} |
10951
|
|
|
|
|
|
|
else |
10952
|
|
|
|
|
|
|
{ |
10953
|
14
|
|
|
|
|
|
UPDATE_1(prob); |
10954
|
|
|
|
|
|
|
distance = rep3; |
10955
|
|
|
|
|
|
|
rep3 = rep2; |
10956
|
|
|
|
|
|
|
} |
10957
|
|
|
|
|
|
|
rep2 = rep1; |
10958
|
|
|
|
|
|
|
} |
10959
|
|
|
|
|
|
|
rep1 = rep0; |
10960
|
|
|
|
|
|
|
rep0 = distance; |
10961
|
|
|
|
|
|
|
} |
10962
|
31941
|
100
|
|
|
|
|
state = state < kNumLitStates ? 8 : 11; |
10963
|
31941
|
|
|
|
|
|
prob = probs + RepLenCoder; |
10964
|
|
|
|
|
|
|
} |
10965
|
|
|
|
|
|
|
{ |
10966
|
|
|
|
|
|
|
unsigned limit, offset; |
10967
|
|
|
|
|
|
|
CLzmaProb *probLen = prob + LenChoice; |
10968
|
32147
|
100
|
|
|
|
|
IF_BIT_0(probLen) |
|
|
100
|
|
|
|
|
|
10969
|
|
|
|
|
|
|
{ |
10970
|
211
|
|
|
|
|
|
UPDATE_0(probLen); |
10971
|
211
|
|
|
|
|
|
probLen = prob + LenLow + (posState << kLenNumLowBits); |
10972
|
|
|
|
|
|
|
offset = 0; |
10973
|
|
|
|
|
|
|
limit = (1 << kLenNumLowBits); |
10974
|
|
|
|
|
|
|
} |
10975
|
|
|
|
|
|
|
else |
10976
|
|
|
|
|
|
|
{ |
10977
|
31936
|
|
|
|
|
|
UPDATE_1(probLen); |
10978
|
|
|
|
|
|
|
probLen = prob + LenChoice2; |
10979
|
31936
|
100
|
|
|
|
|
IF_BIT_0(probLen) |
|
|
100
|
|
|
|
|
|
10980
|
|
|
|
|
|
|
{ |
10981
|
29
|
|
|
|
|
|
UPDATE_0(probLen); |
10982
|
29
|
|
|
|
|
|
probLen = prob + LenMid + (posState << kLenNumMidBits); |
10983
|
|
|
|
|
|
|
offset = kLenNumLowSymbols; |
10984
|
|
|
|
|
|
|
limit = (1 << kLenNumMidBits); |
10985
|
|
|
|
|
|
|
} |
10986
|
|
|
|
|
|
|
else |
10987
|
|
|
|
|
|
|
{ |
10988
|
31907
|
|
|
|
|
|
UPDATE_1(probLen); |
10989
|
32147
|
|
|
|
|
|
probLen = prob + LenHigh; |
10990
|
|
|
|
|
|
|
offset = kLenNumLowSymbols + kLenNumMidSymbols; |
10991
|
|
|
|
|
|
|
limit = (1 << kLenNumHighBits); |
10992
|
|
|
|
|
|
|
} |
10993
|
|
|
|
|
|
|
} |
10994
|
255976
|
100
|
|
|
|
|
TREE_DECODE(probLen, limit, len); |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
10995
|
32147
|
|
|
|
|
|
len += offset; |
10996
|
|
|
|
|
|
|
} |
10997
|
|
|
|
|
|
|
|
10998
|
32147
|
100
|
|
|
|
|
if (state >= kNumStates) |
10999
|
|
|
|
|
|
|
{ |
11000
|
|
|
|
|
|
|
uint32_t distance; |
11001
|
206
|
|
|
|
|
|
prob = probs + PosSlot + |
11002
|
412
|
|
|
|
|
|
((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits); |
11003
|
206
|
100
|
|
|
|
|
TREE_6_DECODE(prob, distance); |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
11004
|
206
|
100
|
|
|
|
|
if (distance >= kStartPosModelIndex) |
11005
|
|
|
|
|
|
|
{ |
11006
|
|
|
|
|
|
|
unsigned posSlot = (unsigned)distance; |
11007
|
173
|
|
|
|
|
|
int numDirectBits = (int)(((distance >> 1) - 1)); |
11008
|
173
|
|
|
|
|
|
distance = (2 | (distance & 1)); |
11009
|
173
|
100
|
|
|
|
|
if (posSlot < kEndPosModelIndex) |
11010
|
|
|
|
|
|
|
{ |
11011
|
80
|
|
|
|
|
|
distance <<= numDirectBits; |
11012
|
80
|
|
|
|
|
|
prob = probs + SpecPos + distance - posSlot - 1; |
11013
|
|
|
|
|
|
|
{ |
11014
|
|
|
|
|
|
|
uint32_t mask = 1; |
11015
|
|
|
|
|
|
|
unsigned i = 1; |
11016
|
192
|
100
|
|
|
|
|
do |
11017
|
|
|
|
|
|
|
{ |
11018
|
192
|
100
|
|
|
|
|
GET_BIT2(prob + i, i, ; , distance |= mask); |
|
|
100
|
|
|
|
|
|
11019
|
192
|
|
|
|
|
|
mask <<= 1; |
11020
|
|
|
|
|
|
|
} |
11021
|
|
|
|
|
|
|
while (--numDirectBits != 0); |
11022
|
|
|
|
|
|
|
} |
11023
|
|
|
|
|
|
|
} |
11024
|
|
|
|
|
|
|
else |
11025
|
|
|
|
|
|
|
{ |
11026
|
93
|
|
|
|
|
|
numDirectBits -= kNumAlignBits; |
11027
|
1110
|
100
|
|
|
|
|
do |
11028
|
|
|
|
|
|
|
{ |
11029
|
1110
|
100
|
|
|
|
|
NORMALIZE |
11030
|
1110
|
|
|
|
|
|
range >>= 1; |
11031
|
|
|
|
|
|
|
|
11032
|
|
|
|
|
|
|
{ |
11033
|
|
|
|
|
|
|
uint32_t t; |
11034
|
1110
|
|
|
|
|
|
code -= range; |
11035
|
1110
|
|
|
|
|
|
t = (0 - ((uint32_t)code >> 31)); /* (uint32_t)((int32_t)code >> 31) */ |
11036
|
1110
|
|
|
|
|
|
distance = (distance << 1) + (t + 1); |
11037
|
1110
|
|
|
|
|
|
code += range & t; |
11038
|
|
|
|
|
|
|
} |
11039
|
|
|
|
|
|
|
/* |
11040
|
|
|
|
|
|
|
distance <<= 1; |
11041
|
|
|
|
|
|
|
if (code >= range) |
11042
|
|
|
|
|
|
|
{ |
11043
|
|
|
|
|
|
|
code -= range; |
11044
|
|
|
|
|
|
|
distance |= 1; |
11045
|
|
|
|
|
|
|
} |
11046
|
|
|
|
|
|
|
*/ |
11047
|
|
|
|
|
|
|
} |
11048
|
|
|
|
|
|
|
while (--numDirectBits != 0); |
11049
|
93
|
|
|
|
|
|
prob = probs + Align; |
11050
|
93
|
|
|
|
|
|
distance <<= kNumAlignBits; |
11051
|
|
|
|
|
|
|
{ |
11052
|
|
|
|
|
|
|
unsigned i = 1; |
11053
|
93
|
100
|
|
|
|
|
GET_BIT2(prob + i, i, ; , distance |= 1); |
|
|
100
|
|
|
|
|
|
11054
|
93
|
100
|
|
|
|
|
GET_BIT2(prob + i, i, ; , distance |= 2); |
|
|
100
|
|
|
|
|
|
11055
|
93
|
100
|
|
|
|
|
GET_BIT2(prob + i, i, ; , distance |= 4); |
|
|
100
|
|
|
|
|
|
11056
|
93
|
100
|
|
|
|
|
GET_BIT2(prob + i, i, ; , distance |= 8); |
|
|
100
|
|
|
|
|
|
11057
|
|
|
|
|
|
|
} |
11058
|
93
|
50
|
|
|
|
|
if (distance == (uint32_t)0xFFFFFFFF) |
11059
|
|
|
|
|
|
|
{ |
11060
|
0
|
|
|
|
|
|
len += kMatchSpecLenStart; |
11061
|
0
|
|
|
|
|
|
state -= kNumStates; |
11062
|
0
|
|
|
|
|
|
break; |
11063
|
|
|
|
|
|
|
} |
11064
|
|
|
|
|
|
|
} |
11065
|
|
|
|
|
|
|
} |
11066
|
|
|
|
|
|
|
rep3 = rep2; |
11067
|
|
|
|
|
|
|
rep2 = rep1; |
11068
|
|
|
|
|
|
|
rep1 = rep0; |
11069
|
206
|
|
|
|
|
|
rep0 = distance + 1; |
11070
|
206
|
50
|
|
|
|
|
if (checkDicSize == 0) |
11071
|
|
|
|
|
|
|
{ |
11072
|
206
|
50
|
|
|
|
|
if (distance >= processedPos) |
11073
|
|
|
|
|
|
|
return SZ_ERROR_DATA; |
11074
|
|
|
|
|
|
|
} |
11075
|
0
|
0
|
|
|
|
|
else if (distance >= checkDicSize) |
11076
|
|
|
|
|
|
|
return SZ_ERROR_DATA; |
11077
|
206
|
100
|
|
|
|
|
state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; |
11078
|
|
|
|
|
|
|
} |
11079
|
|
|
|
|
|
|
|
11080
|
32147
|
|
|
|
|
|
len += kMatchMinLen; |
11081
|
|
|
|
|
|
|
|
11082
|
32147
|
50
|
|
|
|
|
if (limit == dicPos) |
11083
|
|
|
|
|
|
|
return SZ_ERROR_DATA; |
11084
|
|
|
|
|
|
|
{ |
11085
|
32147
|
|
|
|
|
|
size_t rem = limit - dicPos; |
11086
|
32147
|
50
|
|
|
|
|
unsigned curLen = ((rem < len) ? (unsigned)rem : len); |
11087
|
32147
|
50
|
|
|
|
|
size_t pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0); |
11088
|
|
|
|
|
|
|
|
11089
|
32147
|
|
|
|
|
|
processedPos += curLen; |
11090
|
|
|
|
|
|
|
|
11091
|
32147
|
|
|
|
|
|
len -= curLen; |
11092
|
32147
|
50
|
|
|
|
|
if (pos + curLen <= dicBufSize) |
11093
|
|
|
|
|
|
|
{ |
11094
|
32147
|
|
|
|
|
|
uint8_t *dest = dic + dicPos; |
11095
|
32147
|
|
|
|
|
|
ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos; |
11096
|
32147
|
|
|
|
|
|
const uint8_t *lim = dest + curLen; |
11097
|
32147
|
|
|
|
|
|
dicPos += curLen; |
11098
|
8689457
|
100
|
|
|
|
|
do |
11099
|
8689457
|
|
|
|
|
|
*(dest) = (uint8_t)*(dest + src); |
11100
|
|
|
|
|
|
|
while (++dest != lim); |
11101
|
|
|
|
|
|
|
} |
11102
|
|
|
|
|
|
|
else |
11103
|
|
|
|
|
|
|
{ |
11104
|
0
|
0
|
|
|
|
|
do |
11105
|
|
|
|
|
|
|
{ |
11106
|
0
|
|
|
|
|
|
dic[dicPos++] = dic[pos]; |
11107
|
0
|
0
|
|
|
|
|
if (++pos == dicBufSize) |
11108
|
|
|
|
|
|
|
pos = 0; |
11109
|
|
|
|
|
|
|
} |
11110
|
|
|
|
|
|
|
while (--curLen != 0); |
11111
|
|
|
|
|
|
|
} |
11112
|
|
|
|
|
|
|
} |
11113
|
|
|
|
|
|
|
} |
11114
|
|
|
|
|
|
|
} |
11115
|
32788
|
100
|
|
|
|
|
while (dicPos < limit && buf < bufLimit); |
11116
|
76
|
100
|
|
|
|
|
NORMALIZE; |
11117
|
76
|
|
|
|
|
|
p->buf = buf; |
11118
|
76
|
|
|
|
|
|
p->range = range; |
11119
|
76
|
|
|
|
|
|
p->code = code; |
11120
|
76
|
|
|
|
|
|
p->remainLen = len; |
11121
|
76
|
|
|
|
|
|
p->dicPos = dicPos; |
11122
|
76
|
|
|
|
|
|
p->processedPos = processedPos; |
11123
|
76
|
|
|
|
|
|
p->reps[0] = rep0; |
11124
|
76
|
|
|
|
|
|
p->reps[1] = rep1; |
11125
|
76
|
|
|
|
|
|
p->reps[2] = rep2; |
11126
|
76
|
|
|
|
|
|
p->reps[3] = rep3; |
11127
|
76
|
|
|
|
|
|
p->state = state; |
11128
|
|
|
|
|
|
|
|
11129
|
76
|
|
|
|
|
|
return SZ_OK; |
11130
|
|
|
|
|
|
|
} |
11131
|
|
|
|
|
|
|
|
11132
|
80
|
|
|
|
|
|
static void LzmaDec_WriteRem(CLzmaDec *p, size_t limit) |
11133
|
|
|
|
|
|
|
{ |
11134
|
80
|
50
|
|
|
|
|
if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) |
11135
|
|
|
|
|
|
|
{ |
11136
|
0
|
|
|
|
|
|
uint8_t *dic = p->dic; |
11137
|
0
|
|
|
|
|
|
size_t dicPos = p->dicPos; |
11138
|
0
|
|
|
|
|
|
size_t dicBufSize = p->dicBufSize; |
11139
|
|
|
|
|
|
|
unsigned len = p->remainLen; |
11140
|
0
|
|
|
|
|
|
uint32_t rep0 = p->reps[0]; |
11141
|
0
|
0
|
|
|
|
|
if (limit - dicPos < len) |
11142
|
0
|
|
|
|
|
|
len = (unsigned)(limit - dicPos); |
11143
|
|
|
|
|
|
|
|
11144
|
0
|
0
|
|
|
|
|
if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) |
|
|
0
|
|
|
|
|
|
11145
|
0
|
|
|
|
|
|
p->checkDicSize = p->prop.dicSize; |
11146
|
|
|
|
|
|
|
|
11147
|
0
|
|
|
|
|
|
p->processedPos += len; |
11148
|
0
|
|
|
|
|
|
p->remainLen -= len; |
11149
|
0
|
0
|
|
|
|
|
while (len-- != 0) |
11150
|
|
|
|
|
|
|
{ |
11151
|
0
|
0
|
|
|
|
|
dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; |
11152
|
0
|
|
|
|
|
|
dicPos++; |
11153
|
|
|
|
|
|
|
} |
11154
|
0
|
|
|
|
|
|
p->dicPos = dicPos; |
11155
|
|
|
|
|
|
|
} |
11156
|
80
|
|
|
|
|
|
} |
11157
|
|
|
|
|
|
|
|
11158
|
152
|
|
|
|
|
|
static int LzmaDec_DecodeReal2(CLzmaDec *p, size_t limit, const uint8_t *bufLimit) |
11159
|
|
|
|
|
|
|
{ |
11160
|
|
|
|
|
|
|
do |
11161
|
|
|
|
|
|
|
{ |
11162
|
|
|
|
|
|
|
size_t limit2 = limit; |
11163
|
76
|
50
|
|
|
|
|
if (p->checkDicSize == 0) |
11164
|
|
|
|
|
|
|
{ |
11165
|
76
|
|
|
|
|
|
uint32_t rem = p->prop.dicSize - p->processedPos; |
11166
|
76
|
50
|
|
|
|
|
if (limit - p->dicPos > rem) |
11167
|
0
|
|
|
|
|
|
limit2 = p->dicPos + rem; |
11168
|
|
|
|
|
|
|
} |
11169
|
76
|
50
|
|
|
|
|
RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit)); |
11170
|
76
|
50
|
|
|
|
|
if (p->processedPos >= p->prop.dicSize) |
11171
|
0
|
|
|
|
|
|
p->checkDicSize = p->prop.dicSize; |
11172
|
76
|
|
|
|
|
|
LzmaDec_WriteRem(p, limit); |
11173
|
|
|
|
|
|
|
} |
11174
|
76
|
100
|
|
|
|
|
while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
11175
|
|
|
|
|
|
|
|
11176
|
76
|
50
|
|
|
|
|
if (p->remainLen > kMatchSpecLenStart) |
11177
|
|
|
|
|
|
|
{ |
11178
|
0
|
|
|
|
|
|
p->remainLen = kMatchSpecLenStart; |
11179
|
|
|
|
|
|
|
} |
11180
|
|
|
|
|
|
|
return 0; |
11181
|
|
|
|
|
|
|
} |
11182
|
|
|
|
|
|
|
|
11183
|
|
|
|
|
|
|
enum ELzmaDummy |
11184
|
|
|
|
|
|
|
{ |
11185
|
|
|
|
|
|
|
DUMMY_ERROR, /* unexpected end of input stream */ |
11186
|
|
|
|
|
|
|
DUMMY_LIT, |
11187
|
|
|
|
|
|
|
DUMMY_MATCH, |
11188
|
|
|
|
|
|
|
DUMMY_REP |
11189
|
|
|
|
|
|
|
}; |
11190
|
|
|
|
|
|
|
|
11191
|
71
|
|
|
|
|
|
static ELzmaDummy LzmaDec_TryDummy(const CLzmaDec *p, const uint8_t *buf, size_t inSize) |
11192
|
|
|
|
|
|
|
{ |
11193
|
71
|
|
|
|
|
|
uint32_t range = p->range; |
11194
|
71
|
|
|
|
|
|
uint32_t code = p->code; |
11195
|
71
|
|
|
|
|
|
const uint8_t *bufLimit = buf + inSize; |
11196
|
71
|
|
|
|
|
|
CLzmaProb *probs = p->probs; |
11197
|
71
|
|
|
|
|
|
unsigned state = p->state; |
11198
|
|
|
|
|
|
|
ELzmaDummy res; |
11199
|
|
|
|
|
|
|
|
11200
|
|
|
|
|
|
|
{ |
11201
|
|
|
|
|
|
|
CLzmaProb *prob; |
11202
|
|
|
|
|
|
|
uint32_t bound; |
11203
|
|
|
|
|
|
|
unsigned ttt; |
11204
|
71
|
|
|
|
|
|
unsigned posState = (p->processedPos) & ((1 << p->prop.pb) - 1); |
11205
|
|
|
|
|
|
|
|
11206
|
71
|
|
|
|
|
|
prob = probs + IsMatch + (state << kNumPosBitsMax) + posState; |
11207
|
71
|
50
|
|
|
|
|
IF_BIT_0_CHECK(prob) |
|
|
0
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
11208
|
|
|
|
|
|
|
{ |
11209
|
|
|
|
|
|
|
UPDATE_0_CHECK |
11210
|
|
|
|
|
|
|
|
11211
|
|
|
|
|
|
|
/* if (bufLimit - buf >= 7) return DUMMY_LIT; */ |
11212
|
|
|
|
|
|
|
|
11213
|
42
|
|
|
|
|
|
prob = probs + Literal; |
11214
|
42
|
50
|
|
|
|
|
if (p->checkDicSize != 0 || p->processedPos != 0) |
11215
|
42
|
|
|
|
|
|
prob += (LZMA_LIT_SIZE * |
11216
|
84
|
|
|
|
|
|
((((p->processedPos) & ((1 << (p->prop.lp)) - 1)) << p->prop.lc) + |
11217
|
42
|
50
|
|
|
|
|
(p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); |
11218
|
|
|
|
|
|
|
|
11219
|
42
|
100
|
|
|
|
|
if (state < kNumLitStates) |
11220
|
|
|
|
|
|
|
{ |
11221
|
|
|
|
|
|
|
unsigned symbol = 1; |
11222
|
120
|
100
|
|
|
|
|
do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
11223
|
|
|
|
|
|
|
} |
11224
|
|
|
|
|
|
|
else |
11225
|
|
|
|
|
|
|
{ |
11226
|
54
|
|
|
|
|
|
unsigned matchByte = p->dic[p->dicPos - p->reps[0] + |
11227
|
27
|
50
|
|
|
|
|
((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)]; |
11228
|
|
|
|
|
|
|
unsigned offs = 0x100; |
11229
|
|
|
|
|
|
|
unsigned symbol = 1; |
11230
|
|
|
|
|
|
|
do |
11231
|
|
|
|
|
|
|
{ |
11232
|
|
|
|
|
|
|
unsigned bit; |
11233
|
|
|
|
|
|
|
CLzmaProb *probLit; |
11234
|
216
|
|
|
|
|
|
matchByte <<= 1; |
11235
|
216
|
|
|
|
|
|
bit = (matchByte & offs); |
11236
|
216
|
|
|
|
|
|
probLit = prob + offs + bit + symbol; |
11237
|
216
|
100
|
|
|
|
|
GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit) |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
11238
|
|
|
|
|
|
|
} |
11239
|
216
|
100
|
|
|
|
|
while (symbol < 0x100); |
11240
|
|
|
|
|
|
|
} |
11241
|
|
|
|
|
|
|
res = DUMMY_LIT; |
11242
|
|
|
|
|
|
|
} |
11243
|
|
|
|
|
|
|
else |
11244
|
|
|
|
|
|
|
{ |
11245
|
|
|
|
|
|
|
unsigned len; |
11246
|
29
|
|
|
|
|
|
UPDATE_1_CHECK; |
11247
|
|
|
|
|
|
|
|
11248
|
29
|
|
|
|
|
|
prob = probs + IsRep + state; |
11249
|
29
|
100
|
|
|
|
|
IF_BIT_0_CHECK(prob) |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
11250
|
|
|
|
|
|
|
{ |
11251
|
|
|
|
|
|
|
UPDATE_0_CHECK; |
11252
|
|
|
|
|
|
|
state = 0; |
11253
|
11
|
|
|
|
|
|
prob = probs + LenCoder; |
11254
|
|
|
|
|
|
|
res = DUMMY_MATCH; |
11255
|
|
|
|
|
|
|
} |
11256
|
|
|
|
|
|
|
else |
11257
|
|
|
|
|
|
|
{ |
11258
|
18
|
|
|
|
|
|
UPDATE_1_CHECK; |
11259
|
|
|
|
|
|
|
res = DUMMY_REP; |
11260
|
18
|
|
|
|
|
|
prob = probs + IsRepG0 + state; |
11261
|
18
|
50
|
|
|
|
|
IF_BIT_0_CHECK(prob) |
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
11262
|
|
|
|
|
|
|
{ |
11263
|
|
|
|
|
|
|
UPDATE_0_CHECK; |
11264
|
18
|
|
|
|
|
|
prob = probs + IsRep0Long + (state << kNumPosBitsMax) + posState; |
11265
|
18
|
100
|
|
|
|
|
IF_BIT_0_CHECK(prob) |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
11266
|
|
|
|
|
|
|
{ |
11267
|
|
|
|
|
|
|
UPDATE_0_CHECK; |
11268
|
13
|
100
|
|
|
|
|
NORMALIZE_CHECK; |
|
|
50
|
|
|
|
|
|
11269
|
|
|
|
|
|
|
return DUMMY_REP; |
11270
|
|
|
|
|
|
|
} |
11271
|
|
|
|
|
|
|
else |
11272
|
|
|
|
|
|
|
{ |
11273
|
5
|
|
|
|
|
|
UPDATE_1_CHECK; |
11274
|
|
|
|
|
|
|
} |
11275
|
|
|
|
|
|
|
} |
11276
|
|
|
|
|
|
|
else |
11277
|
|
|
|
|
|
|
{ |
11278
|
0
|
|
|
|
|
|
UPDATE_1_CHECK; |
11279
|
0
|
|
|
|
|
|
prob = probs + IsRepG1 + state; |
11280
|
0
|
0
|
|
|
|
|
IF_BIT_0_CHECK(prob) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
11281
|
|
|
|
|
|
|
{ |
11282
|
|
|
|
|
|
|
UPDATE_0_CHECK; |
11283
|
|
|
|
|
|
|
} |
11284
|
|
|
|
|
|
|
else |
11285
|
|
|
|
|
|
|
{ |
11286
|
0
|
|
|
|
|
|
UPDATE_1_CHECK; |
11287
|
0
|
|
|
|
|
|
prob = probs + IsRepG2 + state; |
11288
|
0
|
0
|
|
|
|
|
IF_BIT_0_CHECK(prob) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
11289
|
|
|
|
|
|
|
{ |
11290
|
|
|
|
|
|
|
UPDATE_0_CHECK; |
11291
|
|
|
|
|
|
|
} |
11292
|
|
|
|
|
|
|
else |
11293
|
|
|
|
|
|
|
{ |
11294
|
0
|
|
|
|
|
|
UPDATE_1_CHECK; |
11295
|
|
|
|
|
|
|
} |
11296
|
|
|
|
|
|
|
} |
11297
|
|
|
|
|
|
|
} |
11298
|
|
|
|
|
|
|
state = kNumStates; |
11299
|
5
|
|
|
|
|
|
prob = probs + RepLenCoder; |
11300
|
|
|
|
|
|
|
} |
11301
|
|
|
|
|
|
|
{ |
11302
|
|
|
|
|
|
|
unsigned limit, offset; |
11303
|
|
|
|
|
|
|
CLzmaProb *probLen = prob + LenChoice; |
11304
|
16
|
100
|
|
|
|
|
IF_BIT_0_CHECK(probLen) |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
11305
|
|
|
|
|
|
|
{ |
11306
|
|
|
|
|
|
|
UPDATE_0_CHECK; |
11307
|
15
|
|
|
|
|
|
probLen = prob + LenLow + (posState << kLenNumLowBits); |
11308
|
|
|
|
|
|
|
offset = 0; |
11309
|
|
|
|
|
|
|
limit = 1 << kLenNumLowBits; |
11310
|
|
|
|
|
|
|
} |
11311
|
|
|
|
|
|
|
else |
11312
|
|
|
|
|
|
|
{ |
11313
|
1
|
|
|
|
|
|
UPDATE_1_CHECK; |
11314
|
|
|
|
|
|
|
probLen = prob + LenChoice2; |
11315
|
1
|
50
|
|
|
|
|
IF_BIT_0_CHECK(probLen) |
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
11316
|
|
|
|
|
|
|
{ |
11317
|
|
|
|
|
|
|
UPDATE_0_CHECK; |
11318
|
1
|
|
|
|
|
|
probLen = prob + LenMid + (posState << kLenNumMidBits); |
11319
|
|
|
|
|
|
|
offset = kLenNumLowSymbols; |
11320
|
|
|
|
|
|
|
limit = 1 << kLenNumMidBits; |
11321
|
|
|
|
|
|
|
} |
11322
|
|
|
|
|
|
|
else |
11323
|
|
|
|
|
|
|
{ |
11324
|
0
|
|
|
|
|
|
UPDATE_1_CHECK; |
11325
|
16
|
|
|
|
|
|
probLen = prob + LenHigh; |
11326
|
|
|
|
|
|
|
offset = kLenNumLowSymbols + kLenNumMidSymbols; |
11327
|
|
|
|
|
|
|
limit = 1 << kLenNumHighBits; |
11328
|
|
|
|
|
|
|
} |
11329
|
|
|
|
|
|
|
} |
11330
|
48
|
100
|
|
|
|
|
TREE_DECODE_CHECK(probLen, limit, len); |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
11331
|
16
|
|
|
|
|
|
len += offset; |
11332
|
|
|
|
|
|
|
} |
11333
|
|
|
|
|
|
|
|
11334
|
16
|
100
|
|
|
|
|
if (state < 4) |
11335
|
|
|
|
|
|
|
{ |
11336
|
|
|
|
|
|
|
unsigned posSlot; |
11337
|
11
|
|
|
|
|
|
prob = probs + PosSlot + |
11338
|
11
|
|
|
|
|
|
((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << |
11339
|
11
|
|
|
|
|
|
kNumPosSlotBits); |
11340
|
66
|
100
|
|
|
|
|
TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
11341
|
11
|
100
|
|
|
|
|
if (posSlot >= kStartPosModelIndex) |
11342
|
|
|
|
|
|
|
{ |
11343
|
10
|
|
|
|
|
|
int numDirectBits = ((posSlot >> 1) - 1); |
11344
|
|
|
|
|
|
|
|
11345
|
|
|
|
|
|
|
/* if (bufLimit - buf >= 8) return DUMMY_MATCH; */ |
11346
|
|
|
|
|
|
|
|
11347
|
10
|
100
|
|
|
|
|
if (posSlot < kEndPosModelIndex) |
11348
|
|
|
|
|
|
|
{ |
11349
|
7
|
|
|
|
|
|
prob = probs + SpecPos + ((2 | (posSlot & 1)) << numDirectBits) - posSlot - 1; |
11350
|
|
|
|
|
|
|
} |
11351
|
|
|
|
|
|
|
else |
11352
|
|
|
|
|
|
|
{ |
11353
|
3
|
|
|
|
|
|
numDirectBits -= kNumAlignBits; |
11354
|
43
|
100
|
|
|
|
|
do |
11355
|
|
|
|
|
|
|
{ |
11356
|
43
|
100
|
|
|
|
|
NORMALIZE_CHECK |
|
|
50
|
|
|
|
|
|
11357
|
43
|
|
|
|
|
|
range >>= 1; |
11358
|
43
|
|
|
|
|
|
code -= range & (((code - range) >> 31) - 1); |
11359
|
|
|
|
|
|
|
/* if (code >= range) code -= range; */ |
11360
|
|
|
|
|
|
|
} |
11361
|
|
|
|
|
|
|
while (--numDirectBits != 0); |
11362
|
10
|
|
|
|
|
|
prob = probs + Align; |
11363
|
|
|
|
|
|
|
numDirectBits = kNumAlignBits; |
11364
|
|
|
|
|
|
|
} |
11365
|
|
|
|
|
|
|
{ |
11366
|
|
|
|
|
|
|
unsigned i = 1; |
11367
|
31
|
100
|
|
|
|
|
do |
11368
|
|
|
|
|
|
|
{ |
11369
|
31
|
100
|
|
|
|
|
GET_BIT_CHECK(prob + i, i); |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
11370
|
|
|
|
|
|
|
} |
11371
|
|
|
|
|
|
|
while (--numDirectBits != 0); |
11372
|
|
|
|
|
|
|
} |
11373
|
|
|
|
|
|
|
} |
11374
|
|
|
|
|
|
|
} |
11375
|
|
|
|
|
|
|
} |
11376
|
|
|
|
|
|
|
} |
11377
|
58
|
100
|
|
|
|
|
NORMALIZE_CHECK; |
|
|
50
|
|
|
|
|
|
11378
|
|
|
|
|
|
|
return res; |
11379
|
|
|
|
|
|
|
} |
11380
|
|
|
|
|
|
|
|
11381
|
|
|
|
|
|
|
static void LzmaDec_InitRc(CLzmaDec *p, const uint8_t *data) |
11382
|
|
|
|
|
|
|
{ |
11383
|
4
|
|
|
|
|
|
p->code = ((uint32_t)data[1] << 24) | ((uint32_t)data[2] << 16) | ((uint32_t)data[3] << 8) | ((uint32_t)data[4]); |
11384
|
4
|
|
|
|
|
|
p->range = 0xFFFFFFFF; |
11385
|
4
|
|
|
|
|
|
p->needFlush = 0; |
11386
|
|
|
|
|
|
|
} |
11387
|
|
|
|
|
|
|
|
11388
|
0
|
|
|
|
|
|
void LzmaDec_InitDicAndState(CLzmaDec *p, bool initDic, bool initState) |
11389
|
|
|
|
|
|
|
{ |
11390
|
4
|
|
|
|
|
|
p->needFlush = 1; |
11391
|
4
|
|
|
|
|
|
p->remainLen = 0; |
11392
|
4
|
|
|
|
|
|
p->tempBufSize = 0; |
11393
|
|
|
|
|
|
|
|
11394
|
0
|
0
|
|
|
|
|
if (initDic) |
11395
|
|
|
|
|
|
|
{ |
11396
|
4
|
|
|
|
|
|
p->processedPos = 0; |
11397
|
4
|
|
|
|
|
|
p->checkDicSize = 0; |
11398
|
0
|
|
|
|
|
|
p->needInitState = 1; |
11399
|
|
|
|
|
|
|
} |
11400
|
0
|
0
|
|
|
|
|
if (initState) |
11401
|
0
|
|
|
|
|
|
p->needInitState = 1; |
11402
|
0
|
|
|
|
|
|
} |
11403
|
|
|
|
|
|
|
|
11404
|
0
|
|
|
|
|
|
void LzmaDec_Init(CLzmaDec *p) |
11405
|
|
|
|
|
|
|
{ |
11406
|
4
|
|
|
|
|
|
p->dicPos = 0; |
11407
|
|
|
|
|
|
|
LzmaDec_InitDicAndState(p, true, true); |
11408
|
0
|
|
|
|
|
|
} |
11409
|
|
|
|
|
|
|
|
11410
|
|
|
|
|
|
|
static void LzmaDec_InitStateReal(CLzmaDec *p) |
11411
|
|
|
|
|
|
|
{ |
11412
|
4
|
|
|
|
|
|
uint32_t numProbs = Literal + ((uint32_t)LZMA_LIT_SIZE << (p->prop.lc + p->prop.lp)); |
11413
|
|
|
|
|
|
|
uint32_t i; |
11414
|
4
|
|
|
|
|
|
CLzmaProb *probs = p->probs; |
11415
|
31964
|
100
|
|
|
|
|
for (i = 0; i < numProbs; i++) |
11416
|
31960
|
|
|
|
|
|
probs[i] = kBitModelTotal >> 1; |
11417
|
4
|
|
|
|
|
|
p->reps[0] = p->reps[1] = p->reps[2] = p->reps[3] = 1; |
11418
|
4
|
|
|
|
|
|
p->state = 0; |
11419
|
4
|
|
|
|
|
|
p->needInitState = 0; |
11420
|
|
|
|
|
|
|
} |
11421
|
|
|
|
|
|
|
|
11422
|
4
|
|
|
|
|
|
SRes LzmaDec_DecodeToDic(CLzmaDec *p, size_t dicLimit, const uint8_t *src, size_t *srcLen, |
11423
|
|
|
|
|
|
|
ELzmaFinishMode finishMode, ELzmaStatus *status) |
11424
|
|
|
|
|
|
|
{ |
11425
|
4
|
|
|
|
|
|
size_t inSize = *srcLen; |
11426
|
4
|
|
|
|
|
|
(*srcLen) = 0; |
11427
|
4
|
|
|
|
|
|
LzmaDec_WriteRem(p, dicLimit); |
11428
|
|
|
|
|
|
|
|
11429
|
80
|
|
|
|
|
|
*status = LZMA_STATUS_NOT_SPECIFIED; |
11430
|
|
|
|
|
|
|
|
11431
|
80
|
50
|
|
|
|
|
while (p->remainLen != kMatchSpecLenStart) |
11432
|
|
|
|
|
|
|
{ |
11433
|
|
|
|
|
|
|
int checkEndMarkNow; |
11434
|
|
|
|
|
|
|
|
11435
|
80
|
100
|
|
|
|
|
if (p->needFlush != 0) |
11436
|
|
|
|
|
|
|
{ |
11437
|
24
|
50
|
|
|
|
|
for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) |
|
|
100
|
|
|
|
|
|
11438
|
20
|
|
|
|
|
|
p->tempBuf[p->tempBufSize++] = *src++; |
11439
|
4
|
50
|
|
|
|
|
if (p->tempBufSize < RC_INIT_SIZE) |
11440
|
|
|
|
|
|
|
{ |
11441
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_NEEDS_MORE_INPUT; |
11442
|
0
|
|
|
|
|
|
return SZ_OK; |
11443
|
|
|
|
|
|
|
} |
11444
|
4
|
50
|
|
|
|
|
if (p->tempBuf[0] != 0) |
11445
|
|
|
|
|
|
|
return SZ_ERROR_DATA; |
11446
|
|
|
|
|
|
|
|
11447
|
|
|
|
|
|
|
LzmaDec_InitRc(p, p->tempBuf); |
11448
|
4
|
|
|
|
|
|
p->tempBufSize = 0; |
11449
|
|
|
|
|
|
|
} |
11450
|
|
|
|
|
|
|
|
11451
|
|
|
|
|
|
|
checkEndMarkNow = 0; |
11452
|
80
|
100
|
|
|
|
|
if (p->dicPos >= dicLimit) |
11453
|
|
|
|
|
|
|
{ |
11454
|
4
|
50
|
|
|
|
|
if (p->remainLen == 0 && p->code == 0) |
|
|
50
|
|
|
|
|
|
11455
|
|
|
|
|
|
|
{ |
11456
|
4
|
|
|
|
|
|
*status = LZMA_STATUS_MAYBE_FINISHED_WITHOUT_MARK; |
11457
|
4
|
|
|
|
|
|
return SZ_OK; |
11458
|
|
|
|
|
|
|
} |
11459
|
0
|
0
|
|
|
|
|
if (finishMode == LZMA_FINISH_ANY) |
11460
|
|
|
|
|
|
|
{ |
11461
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_NOT_FINISHED; |
11462
|
0
|
|
|
|
|
|
return SZ_OK; |
11463
|
|
|
|
|
|
|
} |
11464
|
0
|
0
|
|
|
|
|
if (p->remainLen != 0) |
11465
|
|
|
|
|
|
|
{ |
11466
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_NOT_FINISHED; |
11467
|
0
|
|
|
|
|
|
return SZ_ERROR_DATA; |
11468
|
|
|
|
|
|
|
} |
11469
|
|
|
|
|
|
|
checkEndMarkNow = 1; |
11470
|
|
|
|
|
|
|
} |
11471
|
|
|
|
|
|
|
|
11472
|
76
|
100
|
|
|
|
|
if (p->needInitState) |
11473
|
|
|
|
|
|
|
LzmaDec_InitStateReal(p); |
11474
|
|
|
|
|
|
|
|
11475
|
76
|
50
|
|
|
|
|
if (p->tempBufSize == 0) |
11476
|
|
|
|
|
|
|
{ |
11477
|
|
|
|
|
|
|
size_t processed; |
11478
|
|
|
|
|
|
|
const uint8_t *bufLimit; |
11479
|
76
|
100
|
|
|
|
|
if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) |
11480
|
|
|
|
|
|
|
{ |
11481
|
71
|
|
|
|
|
|
int dummyRes = LzmaDec_TryDummy(p, src, inSize); |
11482
|
71
|
50
|
|
|
|
|
if (dummyRes == DUMMY_ERROR) |
11483
|
|
|
|
|
|
|
{ |
11484
|
0
|
|
|
|
|
|
memcpy(p->tempBuf, src, inSize); |
11485
|
0
|
|
|
|
|
|
p->tempBufSize = (unsigned)inSize; |
11486
|
0
|
|
|
|
|
|
(*srcLen) += inSize; |
11487
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_NEEDS_MORE_INPUT; |
11488
|
0
|
|
|
|
|
|
return SZ_OK; |
11489
|
|
|
|
|
|
|
} |
11490
|
71
|
50
|
|
|
|
|
if (checkEndMarkNow && dummyRes != DUMMY_MATCH) |
11491
|
|
|
|
|
|
|
{ |
11492
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_NOT_FINISHED; |
11493
|
0
|
|
|
|
|
|
return SZ_ERROR_DATA; |
11494
|
|
|
|
|
|
|
} |
11495
|
|
|
|
|
|
|
bufLimit = src; |
11496
|
|
|
|
|
|
|
} |
11497
|
|
|
|
|
|
|
else |
11498
|
5
|
|
|
|
|
|
bufLimit = src + inSize - LZMA_REQUIRED_INPUT_MAX; |
11499
|
76
|
|
|
|
|
|
p->buf = src; |
11500
|
76
|
50
|
|
|
|
|
if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) |
11501
|
|
|
|
|
|
|
return SZ_ERROR_DATA; |
11502
|
76
|
|
|
|
|
|
processed = (size_t)(p->buf - src); |
11503
|
76
|
|
|
|
|
|
(*srcLen) += processed; |
11504
|
|
|
|
|
|
|
src += processed; |
11505
|
76
|
|
|
|
|
|
inSize -= processed; |
11506
|
|
|
|
|
|
|
} |
11507
|
|
|
|
|
|
|
else |
11508
|
|
|
|
|
|
|
{ |
11509
|
|
|
|
|
|
|
unsigned rem = p->tempBufSize, lookAhead = 0; |
11510
|
0
|
0
|
|
|
|
|
while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) |
|
|
0
|
|
|
|
|
|
11511
|
0
|
|
|
|
|
|
p->tempBuf[rem++] = src[lookAhead++]; |
11512
|
0
|
|
|
|
|
|
p->tempBufSize = rem; |
11513
|
0
|
0
|
|
|
|
|
if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) |
11514
|
|
|
|
|
|
|
{ |
11515
|
0
|
|
|
|
|
|
int dummyRes = LzmaDec_TryDummy(p, p->tempBuf, rem); |
11516
|
0
|
0
|
|
|
|
|
if (dummyRes == DUMMY_ERROR) |
11517
|
|
|
|
|
|
|
{ |
11518
|
0
|
|
|
|
|
|
(*srcLen) += lookAhead; |
11519
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_NEEDS_MORE_INPUT; |
11520
|
0
|
|
|
|
|
|
return SZ_OK; |
11521
|
|
|
|
|
|
|
} |
11522
|
0
|
0
|
|
|
|
|
if (checkEndMarkNow && dummyRes != DUMMY_MATCH) |
11523
|
|
|
|
|
|
|
{ |
11524
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_NOT_FINISHED; |
11525
|
0
|
|
|
|
|
|
return SZ_ERROR_DATA; |
11526
|
|
|
|
|
|
|
} |
11527
|
|
|
|
|
|
|
} |
11528
|
0
|
|
|
|
|
|
p->buf = p->tempBuf; |
11529
|
0
|
0
|
|
|
|
|
if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) |
11530
|
|
|
|
|
|
|
return SZ_ERROR_DATA; |
11531
|
0
|
|
|
|
|
|
lookAhead -= (rem - (unsigned)(p->buf - p->tempBuf)); |
11532
|
0
|
|
|
|
|
|
(*srcLen) += lookAhead; |
11533
|
0
|
|
|
|
|
|
src += lookAhead; |
11534
|
0
|
|
|
|
|
|
inSize -= lookAhead; |
11535
|
0
|
|
|
|
|
|
p->tempBufSize = 0; |
11536
|
|
|
|
|
|
|
} |
11537
|
|
|
|
|
|
|
} |
11538
|
0
|
0
|
|
|
|
|
if (p->code == 0) |
11539
|
0
|
|
|
|
|
|
*status = LZMA_STATUS_FINISHED_WITH_MARK; |
11540
|
0
|
|
|
|
|
|
return (p->code == 0) ? SZ_OK : SZ_ERROR_DATA; |
11541
|
|
|
|
|
|
|
} |
11542
|
|
|
|
|
|
|
|
11543
|
0
|
|
|
|
|
|
SRes LzmaDec_DecodeToBuf(CLzmaDec *p, uint8_t *dest, size_t *destLen, const uint8_t *src, size_t *srcLen, ELzmaFinishMode finishMode, ELzmaStatus *status) |
11544
|
|
|
|
|
|
|
{ |
11545
|
0
|
|
|
|
|
|
size_t outSize = *destLen; |
11546
|
0
|
|
|
|
|
|
size_t inSize = *srcLen; |
11547
|
0
|
|
|
|
|
|
*srcLen = *destLen = 0; |
11548
|
0
|
|
|
|
|
|
for (;;) |
11549
|
|
|
|
|
|
|
{ |
11550
|
0
|
|
|
|
|
|
size_t inSizeCur = inSize, outSizeCur, dicPos; |
11551
|
|
|
|
|
|
|
ELzmaFinishMode curFinishMode; |
11552
|
|
|
|
|
|
|
SRes res; |
11553
|
0
|
0
|
|
|
|
|
if (p->dicPos == p->dicBufSize) |
11554
|
0
|
|
|
|
|
|
p->dicPos = 0; |
11555
|
0
|
|
|
|
|
|
dicPos = p->dicPos; |
11556
|
0
|
0
|
|
|
|
|
if (outSize > p->dicBufSize - dicPos) |
11557
|
|
|
|
|
|
|
{ |
11558
|
|
|
|
|
|
|
outSizeCur = p->dicBufSize; |
11559
|
|
|
|
|
|
|
curFinishMode = LZMA_FINISH_ANY; |
11560
|
|
|
|
|
|
|
} |
11561
|
|
|
|
|
|
|
else |
11562
|
|
|
|
|
|
|
{ |
11563
|
0
|
|
|
|
|
|
outSizeCur = dicPos + outSize; |
11564
|
|
|
|
|
|
|
curFinishMode = finishMode; |
11565
|
|
|
|
|
|
|
} |
11566
|
|
|
|
|
|
|
|
11567
|
0
|
|
|
|
|
|
res = LzmaDec_DecodeToDic(p, outSizeCur, src, &inSizeCur, curFinishMode, status); |
11568
|
0
|
|
|
|
|
|
src += inSizeCur; |
11569
|
0
|
|
|
|
|
|
inSize -= inSizeCur; |
11570
|
0
|
|
|
|
|
|
*srcLen += inSizeCur; |
11571
|
0
|
|
|
|
|
|
outSizeCur = p->dicPos - dicPos; |
11572
|
0
|
|
|
|
|
|
memcpy(dest, p->dic + dicPos, outSizeCur); |
11573
|
0
|
|
|
|
|
|
dest += outSizeCur; |
11574
|
0
|
|
|
|
|
|
outSize -= outSizeCur; |
11575
|
0
|
|
|
|
|
|
*destLen += outSizeCur; |
11576
|
0
|
0
|
|
|
|
|
if (res != 0) |
11577
|
0
|
|
|
|
|
|
return res; |
11578
|
0
|
0
|
|
|
|
|
if (outSizeCur == 0 || outSize == 0) |
11579
|
|
|
|
|
|
|
return SZ_OK; |
11580
|
|
|
|
|
|
|
} |
11581
|
|
|
|
|
|
|
} |
11582
|
|
|
|
|
|
|
|
11583
|
0
|
|
|
|
|
|
void LzmaDec_FreeProbs(CLzmaDec *p, ISzAlloc *alloc) |
11584
|
|
|
|
|
|
|
{ |
11585
|
8
|
|
|
|
|
|
alloc->Free(alloc, p->probs); |
11586
|
4
|
|
|
|
|
|
p->probs = 0; |
11587
|
0
|
|
|
|
|
|
} |
11588
|
|
|
|
|
|
|
|
11589
|
|
|
|
|
|
|
static void LzmaDec_FreeDict(CLzmaDec *p, ISzAlloc *alloc) |
11590
|
|
|
|
|
|
|
{ |
11591
|
0
|
|
|
|
|
|
alloc->Free(alloc, p->dic); |
11592
|
0
|
|
|
|
|
|
p->dic = 0; |
11593
|
|
|
|
|
|
|
} |
11594
|
|
|
|
|
|
|
|
11595
|
0
|
|
|
|
|
|
void LzmaDec_Free(CLzmaDec *p, ISzAlloc *alloc) |
11596
|
|
|
|
|
|
|
{ |
11597
|
|
|
|
|
|
|
LzmaDec_FreeProbs(p, alloc); |
11598
|
|
|
|
|
|
|
LzmaDec_FreeDict(p, alloc); |
11599
|
0
|
|
|
|
|
|
} |
11600
|
|
|
|
|
|
|
|
11601
|
4
|
|
|
|
|
|
SRes LzmaProps_Decode(CLzmaProps *p, const uint8_t *data, unsigned size) |
11602
|
|
|
|
|
|
|
{ |
11603
|
|
|
|
|
|
|
uint32_t dicSize; |
11604
|
|
|
|
|
|
|
uint8_t d; |
11605
|
|
|
|
|
|
|
|
11606
|
4
|
50
|
|
|
|
|
if (size < LZMA_PROPS_SIZE) |
11607
|
|
|
|
|
|
|
return SZ_ERROR_UNSUPPORTED; |
11608
|
|
|
|
|
|
|
else |
11609
|
4
|
|
|
|
|
|
dicSize = data[1] | ((uint32_t)data[2] << 8) | ((uint32_t)data[3] << 16) | ((uint32_t)data[4] << 24); |
11610
|
|
|
|
|
|
|
|
11611
|
4
|
50
|
|
|
|
|
if (dicSize < LZMA_DIC_MIN) |
11612
|
|
|
|
|
|
|
dicSize = LZMA_DIC_MIN; |
11613
|
4
|
|
|
|
|
|
p->dicSize = dicSize; |
11614
|
|
|
|
|
|
|
|
11615
|
4
|
|
|
|
|
|
d = data[0]; |
11616
|
4
|
50
|
|
|
|
|
if (d >= (9 * 5 * 5)) |
11617
|
|
|
|
|
|
|
return SZ_ERROR_UNSUPPORTED; |
11618
|
|
|
|
|
|
|
|
11619
|
4
|
|
|
|
|
|
p->lc = d % 9; |
11620
|
4
|
|
|
|
|
|
d /= 9; |
11621
|
4
|
|
|
|
|
|
p->pb = d / 5; |
11622
|
4
|
|
|
|
|
|
p->lp = d % 5; |
11623
|
|
|
|
|
|
|
|
11624
|
4
|
|
|
|
|
|
return SZ_OK; |
11625
|
|
|
|
|
|
|
} |
11626
|
|
|
|
|
|
|
|
11627
|
8
|
|
|
|
|
|
static SRes LzmaDec_AllocateProbs2(CLzmaDec *p, const CLzmaProps *propNew, ISzAlloc *alloc) |
11628
|
|
|
|
|
|
|
{ |
11629
|
4
|
|
|
|
|
|
uint32_t numProbs = LzmaProps_GetNumProbs(propNew); |
11630
|
4
|
50
|
|
|
|
|
if (p->probs == 0 || numProbs != p->numProbs) |
|
|
0
|
|
|
|
|
|
11631
|
|
|
|
|
|
|
{ |
11632
|
|
|
|
|
|
|
LzmaDec_FreeProbs(p, alloc); |
11633
|
4
|
|
|
|
|
|
p->probs = (CLzmaProb *)alloc->Alloc(alloc, numProbs * sizeof(CLzmaProb)); |
11634
|
4
|
|
|
|
|
|
p->numProbs = numProbs; |
11635
|
4
|
50
|
|
|
|
|
if (p->probs == 0) |
11636
|
|
|
|
|
|
|
return SZ_ERROR_MEM; |
11637
|
|
|
|
|
|
|
} |
11638
|
|
|
|
|
|
|
return SZ_OK; |
11639
|
|
|
|
|
|
|
} |
11640
|
|
|
|
|
|
|
|
11641
|
4
|
|
|
|
|
|
SRes LzmaDec_AllocateProbs(CLzmaDec *p, const uint8_t *props, unsigned propsSize, ISzAlloc *alloc) |
11642
|
|
|
|
|
|
|
{ |
11643
|
|
|
|
|
|
|
CLzmaProps propNew; |
11644
|
4
|
50
|
|
|
|
|
RINOK(LzmaProps_Decode(&propNew, props, propsSize)); |
11645
|
4
|
50
|
|
|
|
|
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); |
11646
|
4
|
|
|
|
|
|
p->prop = propNew; |
11647
|
4
|
|
|
|
|
|
return SZ_OK; |
11648
|
|
|
|
|
|
|
} |
11649
|
|
|
|
|
|
|
|
11650
|
0
|
|
|
|
|
|
SRes LzmaDec_Allocate(CLzmaDec *p, const uint8_t *props, unsigned propsSize, ISzAlloc *alloc) |
11651
|
|
|
|
|
|
|
{ |
11652
|
|
|
|
|
|
|
CLzmaProps propNew; |
11653
|
|
|
|
|
|
|
size_t dicBufSize; |
11654
|
0
|
0
|
|
|
|
|
RINOK(LzmaProps_Decode(&propNew, props, propsSize)); |
11655
|
0
|
0
|
|
|
|
|
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); |
11656
|
0
|
|
|
|
|
|
dicBufSize = propNew.dicSize; |
11657
|
0
|
0
|
|
|
|
|
if (p->dic == 0 || dicBufSize != p->dicBufSize) |
|
|
0
|
|
|
|
|
|
11658
|
|
|
|
|
|
|
{ |
11659
|
|
|
|
|
|
|
LzmaDec_FreeDict(p, alloc); |
11660
|
0
|
|
|
|
|
|
p->dic = (uint8_t *)alloc->Alloc(alloc, dicBufSize); |
11661
|
0
|
0
|
|
|
|
|
if (p->dic == 0) |
11662
|
|
|
|
|
|
|
{ |
11663
|
|
|
|
|
|
|
LzmaDec_FreeProbs(p, alloc); |
11664
|
0
|
|
|
|
|
|
return SZ_ERROR_MEM; |
11665
|
|
|
|
|
|
|
} |
11666
|
|
|
|
|
|
|
} |
11667
|
0
|
|
|
|
|
|
p->dicBufSize = dicBufSize; |
11668
|
0
|
|
|
|
|
|
p->prop = propNew; |
11669
|
0
|
|
|
|
|
|
return SZ_OK; |
11670
|
|
|
|
|
|
|
} |
11671
|
|
|
|
|
|
|
|
11672
|
4
|
|
|
|
|
|
SRes LzmaDecode(uint8_t *dest, size_t *destLen, const uint8_t *src, size_t *srcLen, |
11673
|
|
|
|
|
|
|
const uint8_t *propData, unsigned propSize, ELzmaFinishMode finishMode, |
11674
|
|
|
|
|
|
|
ELzmaStatus *status, ISzAlloc *alloc) |
11675
|
|
|
|
|
|
|
{ |
11676
|
|
|
|
|
|
|
CLzmaDec p; |
11677
|
|
|
|
|
|
|
SRes res; |
11678
|
4
|
|
|
|
|
|
size_t inSize = *srcLen; |
11679
|
4
|
|
|
|
|
|
size_t outSize = *destLen; |
11680
|
4
|
|
|
|
|
|
*srcLen = *destLen = 0; |
11681
|
4
|
50
|
|
|
|
|
if (inSize < RC_INIT_SIZE) |
11682
|
|
|
|
|
|
|
return SZ_ERROR_INPUT_EOF; |
11683
|
|
|
|
|
|
|
|
11684
|
4
|
|
|
|
|
|
LzmaDec_Construct(&p); |
11685
|
4
|
|
|
|
|
|
res = LzmaDec_AllocateProbs(&p, propData, propSize, alloc); |
11686
|
4
|
50
|
|
|
|
|
if (res != 0) |
11687
|
|
|
|
|
|
|
return res; |
11688
|
4
|
|
|
|
|
|
p.dic = dest; |
11689
|
4
|
|
|
|
|
|
p.dicBufSize = outSize; |
11690
|
|
|
|
|
|
|
|
11691
|
|
|
|
|
|
|
LzmaDec_Init(&p); |
11692
|
|
|
|
|
|
|
|
11693
|
4
|
|
|
|
|
|
*srcLen = inSize; |
11694
|
4
|
|
|
|
|
|
res = LzmaDec_DecodeToDic(&p, outSize, src, srcLen, finishMode, status); |
11695
|
|
|
|
|
|
|
|
11696
|
4
|
50
|
|
|
|
|
if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) |
|
|
50
|
|
|
|
|
|
11697
|
|
|
|
|
|
|
res = SZ_ERROR_INPUT_EOF; |
11698
|
|
|
|
|
|
|
|
11699
|
4
|
|
|
|
|
|
(*destLen) = p.dicPos; |
11700
|
|
|
|
|
|
|
LzmaDec_FreeProbs(&p, alloc); |
11701
|
|
|
|
|
|
|
return res; |
11702
|
|
|
|
|
|
|
} |
11703
|
|
|
|
|
|
|
|
11704
|
|
|
|
|
|
|
} // namespace lzma |
11705
|
|
|
|
|
|
|
// End of LZMA compression library by Igor Pavlov |
11706
|
|
|
|
|
|
|
|
11707
|
|
|
|
|
|
|
#ifndef UFAL_CPPUTILS_COMPRESSOR_LZMA_ALLOCATOR_H |
11708
|
|
|
|
|
|
|
#define UFAL_CPPUTILS_COMPRESSOR_LZMA_ALLOCATOR_H |
11709
|
8
|
|
|
|
|
|
static void *LzmaAlloc(void* /*p*/, size_t size) { return new char[size]; } |
11710
|
16
|
100
|
|
|
|
|
static void LzmaFree(void* /*p*/, void *address) { delete[] (char*) address; } |
11711
|
|
|
|
|
|
|
static lzma::ISzAlloc lzmaAllocator = { LzmaAlloc, LzmaFree }; |
11712
|
|
|
|
|
|
|
#endif // UFAL_CPPUTILS_COMPRESSOR_LZMA_ALLOCATOR_H |
11713
|
|
|
|
|
|
|
|
11714
|
4
|
|
|
|
|
|
bool compressor::load(istream& is, binary_decoder& data) { |
11715
|
|
|
|
|
|
|
uint32_t uncompressed_len, compressed_len, poor_crc; |
11716
|
|
|
|
|
|
|
unsigned char props_encoded[LZMA_PROPS_SIZE]; |
11717
|
|
|
|
|
|
|
|
11718
|
4
|
50
|
|
|
|
|
if (!is.read((char *) &uncompressed_len, sizeof(uncompressed_len))) return false; |
11719
|
4
|
50
|
|
|
|
|
if (!is.read((char *) &compressed_len, sizeof(compressed_len))) return false; |
11720
|
4
|
50
|
|
|
|
|
if (!is.read((char *) &poor_crc, sizeof(poor_crc))) return false; |
11721
|
4
|
50
|
|
|
|
|
if (poor_crc != uncompressed_len * 19991 + compressed_len * 199999991 + 1234567890) return false; |
11722
|
4
|
50
|
|
|
|
|
if (!is.read((char *) props_encoded, sizeof(props_encoded))) return false; |
11723
|
|
|
|
|
|
|
|
11724
|
4
|
|
|
|
|
|
vector compressed(compressed_len); |
11725
|
4
|
50
|
|
|
|
|
if (!is.read((char *) compressed.data(), compressed_len)) return false; |
|
|
50
|
|
|
|
|
|
11726
|
|
|
|
|
|
|
|
11727
|
|
|
|
|
|
|
lzma::ELzmaStatus status; |
11728
|
4
|
|
|
|
|
|
size_t uncompressed_size = uncompressed_len, compressed_size = compressed_len; |
11729
|
4
|
50
|
|
|
|
|
auto res = lzma::LzmaDecode(data.fill(uncompressed_len), &uncompressed_size, compressed.data(), &compressed_size, props_encoded, LZMA_PROPS_SIZE, lzma::LZMA_FINISH_ANY, &status, &lzmaAllocator); |
11730
|
4
|
50
|
|
|
|
|
if (res != SZ_OK || uncompressed_size != uncompressed_len || compressed_size != compressed_len) return false; |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
11731
|
|
|
|
|
|
|
|
11732
|
4
|
|
|
|
|
|
return true; |
11733
|
|
|
|
|
|
|
} |
11734
|
|
|
|
|
|
|
|
11735
|
|
|
|
|
|
|
} // namespace utils |
11736
|
|
|
|
|
|
|
|
11737
|
|
|
|
|
|
|
///////// |
11738
|
|
|
|
|
|
|
// File: version/version.h |
11739
|
|
|
|
|
|
|
///////// |
11740
|
|
|
|
|
|
|
|
11741
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
11742
|
|
|
|
|
|
|
// |
11743
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
11744
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
11745
|
|
|
|
|
|
|
// |
11746
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
11747
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
11748
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
11749
|
|
|
|
|
|
|
|
11750
|
0
|
|
|
|
|
|
class version { |
11751
|
|
|
|
|
|
|
public: |
11752
|
|
|
|
|
|
|
unsigned major; |
11753
|
|
|
|
|
|
|
unsigned minor; |
11754
|
|
|
|
|
|
|
unsigned patch; |
11755
|
|
|
|
|
|
|
string prerelease; |
11756
|
|
|
|
|
|
|
|
11757
|
|
|
|
|
|
|
// Returns current MorphoDiTa version. |
11758
|
|
|
|
|
|
|
static version current(); |
11759
|
|
|
|
|
|
|
|
11760
|
|
|
|
|
|
|
// Returns multi-line formated version and copyright string. |
11761
|
|
|
|
|
|
|
static string version_and_copyright(const string& other_libraries = string()); |
11762
|
|
|
|
|
|
|
}; |
11763
|
|
|
|
|
|
|
|
11764
|
|
|
|
|
|
|
///////// |
11765
|
|
|
|
|
|
|
// File: version/version.cpp |
11766
|
|
|
|
|
|
|
///////// |
11767
|
|
|
|
|
|
|
|
11768
|
|
|
|
|
|
|
// This file is part of MorphoDiTa . |
11769
|
|
|
|
|
|
|
// |
11770
|
|
|
|
|
|
|
// Copyright 2015 Institute of Formal and Applied Linguistics, Faculty of |
11771
|
|
|
|
|
|
|
// Mathematics and Physics, Charles University in Prague, Czech Republic. |
11772
|
|
|
|
|
|
|
// |
11773
|
|
|
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public |
11774
|
|
|
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this |
11775
|
|
|
|
|
|
|
// file, You can obtain one at http://mozilla.org/MPL/2.0/. |
11776
|
|
|
|
|
|
|
|
11777
|
0
|
|
|
|
|
|
version version::current() { |
11778
|
0
|
0
|
|
|
|
|
return {1, 11, 0, ""}; |
11779
|
|
|
|
|
|
|
} |
11780
|
|
|
|
|
|
|
|
11781
|
|
|
|
|
|
|
// Returns multi-line formated version and copyright string. |
11782
|
0
|
|
|
|
|
|
string version::version_and_copyright(const string& other_libraries) { |
11783
|
0
|
|
|
|
|
|
ostringstream info; |
11784
|
|
|
|
|
|
|
|
11785
|
|
|
|
|
|
|
auto morphodita = version::current(); |
11786
|
|
|
|
|
|
|
auto unilib = unilib::version::current(); |
11787
|
|
|
|
|
|
|
|
11788
|
0
|
|
|
|
|
|
info << "MorphoDiTa version " << morphodita.major << '.' << morphodita.minor << '.' << morphodita.patch |
11789
|
0
|
0
|
|
|
|
|
<< (morphodita.prerelease.empty() ? "" : "-") << morphodita.prerelease |
|
|
0
|
|
|
|
|
|
11790
|
0
|
|
|
|
|
|
<< " (using UniLib " << unilib.major << '.' << unilib.minor << '.' << unilib.patch |
11791
|
0
|
0
|
|
|
|
|
<< (other_libraries.empty() ? "" : " and ") << other_libraries << ")\n" |
|
|
0
|
|
|
|
|
|
11792
|
|
|
|
|
|
|
"Copyright 2015 by Institute of Formal and Applied Linguistics, Faculty of\n" |
11793
|
0
|
0
|
|
|
|
|
"Mathematics and Physics, Charles University in Prague, Czech Republic."; |
11794
|
|
|
|
|
|
|
|
11795
|
0
|
|
|
|
|
|
return info.str(); |
11796
|
|
|
|
|
|
|
} |
11797
|
|
|
|
|
|
|
|
11798
|
|
|
|
|
|
|
} // namespace morphodita |
11799
|
16
|
50
|
|
|
|
|
} // namespace ufal |
|
|
50
|
|
|
|
|
|