line |
true |
false |
branch |
106
|
0 |
0 |
return a.len == b.len && memcmp(a.str, b.str, a.len) == 0; |
|
0 |
0 |
return a.len == b.len && memcmp(a.str, b.str, a.len) == 0; |
|
0 |
0 |
return a.len == b.len && memcmp(a.str, b.str, a.len) == 0; |
|
0 |
0 |
return a.len == b.len && memcmp(a.str, b.str, a.len) == 0; |
|
0 |
0 |
return a.len == b.len && memcmp(a.str, b.str, a.len) == 0; |
|
0 |
0 |
return a.len == b.len && memcmp(a.str, b.str, a.len) == 0; |
|
0 |
0 |
return a.len == b.len && memcmp(a.str, b.str, a.len) == 0; |
|
0 |
0 |
return a.len == b.len && memcmp(a.str, b.str, a.len) == 0; |
376
|
0 |
0 |
format_tagged_lemma(result); |
381
|
0 |
0 |
for (auto&& lemma : lemmas) |
384
|
0 |
0 |
if (lemmas.size() > 1) |
392
|
0 |
0 |
if (converter) converter->convert(lemma); |
396
|
0 |
0 |
if (converter) converter->convert_analyzed(lemmas); |
409
|
0 |
0 |
for (derivated_lemma parent; derinet->parent(lemma.lemma, parent); ) |
|
0 |
0 |
for (derivated_lemma parent; derinet->parent(lemma.lemma, parent); ) |
411
|
0 |
0 |
if (converter) converter->convert(lemma); |
419
|
0 |
0 |
return derinet ? new root_derivation_formatter(derinet) : nullptr; |
|
0 |
0 |
return derinet ? new root_derivation_formatter(derinet) : nullptr; |
428
|
0 |
0 |
if (converter) converter->convert(lemma); |
|
0 |
0 |
if (converter) converter->convert(lemma); |
429
|
0 |
0 |
for (derivated_lemma parent; derinet->parent(current.lemma, parent); current.lemma.swap(parent.lemma)) { |
|
0 |
0 |
for (derivated_lemma parent; derinet->parent(current.lemma, parent); current.lemma.swap(parent.lemma)) { |
430
|
0 |
0 |
tagged_lemma parrent_lemma(parent.lemma, current.tag); |
431
|
0 |
0 |
if (converter) converter->convert(parrent_lemma); |
|
0 |
0 |
if (converter) converter->convert(parrent_lemma); |
432
|
0 |
0 |
lemma.lemma.append(" ").append(parrent_lemma.lemma); |
441
|
0 |
0 |
return derinet ? new path_derivation_formatter(derinet) : nullptr; |
|
0 |
0 |
return derinet ? new path_derivation_formatter(derinet) : nullptr; |
450
|
0 |
0 |
if (converter) converter->convert(lemma); |
|
0 |
0 |
if (converter) converter->convert(lemma); |
451
|
0 |
0 |
for (derivated_lemma parent; derinet->parent(root, parent); root.swap(parent.lemma)) {} |
|
0 |
0 |
for (derivated_lemma parent; derinet->parent(root, parent); root.swap(parent.lemma)) {} |
452
|
0 |
0 |
format_tree(root, tag, lemma, converter); |
458
|
0 |
0 |
if (converter) { |
459
|
0 |
0 |
tagged_lemma current(root, tag); |
460
|
0 |
0 |
converter->convert(current); |
461
|
0 |
0 |
tree.lemma.append(" ").append(current.lemma); |
463
|
0 |
0 |
tree.lemma.append(" ").append(root); |
466
|
0 |
0 |
if (derinet->children(root, children)) |
|
0 |
0 |
if (derinet->children(root, children)) |
467
|
0 |
0 |
for (auto&& child : children) |
468
|
0 |
0 |
format_tree(child.lemma, tag, tree, converter); |
469
|
0 |
0 |
tree.lemma.push_back(' '); |
477
|
0 |
0 |
return derinet ? new tree_derivation_formatter(derinet) : nullptr; |
|
0 |
0 |
return derinet ? new tree_derivation_formatter(derinet) : nullptr; |
481
|
0 |
0 |
if (name == "none") return new_none_derivation_formatter(); |
482
|
0 |
0 |
if (name == "root") return new_root_derivation_formatter(derinet); |
483
|
0 |
0 |
if (name == "path") return new_path_derivation_formatter(derinet); |
484
|
0 |
0 |
if (name == "tree") return new_tree_derivation_formatter(derinet); |
510
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
127 |
16 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
53 |
12 |
while (len--) |
|
146 |
30 |
while (len--) |
|
24 |
4 |
while (len--) |
|
68 |
6 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
511
|
0 |
0 |
if (*a++ != *b++) |
|
0 |
0 |
if (*a++ != *b++) |
|
0 |
0 |
if (*a++ != *b++) |
|
0 |
0 |
if (*a++ != *b++) |
|
0 |
0 |
if (*a++ != *b++) |
|
0 |
0 |
if (*a++ != *b++) |
|
0 |
0 |
if (*a++ != *b++) |
|
0 |
0 |
if (*a++ != *b++) |
|
0 |
0 |
if (*a++ != *b++) |
|
47 |
80 |
if (*a++ != *b++) |
|
0 |
0 |
if (*a++ != *b++) |
|
0 |
0 |
if (*a++ != *b++) |
|
0 |
0 |
if (*a++ != *b++) |
|
0 |
0 |
if (*a++ != *b++) |
|
39 |
14 |
if (*a++ != *b++) |
|
137 |
9 |
if (*a++ != *b++) |
|
24 |
0 |
if (*a++ != *b++) |
|
54 |
14 |
if (*a++ != *b++) |
|
0 |
0 |
if (*a++ != *b++) |
|
0 |
0 |
if (*a++ != *b++) |
|
0 |
0 |
if (*a++ != *b++) |
|
0 |
0 |
if (*a++ != *b++) |
|
0 |
0 |
if (*a++ != *b++) |
|
0 |
0 |
if (*a++ != *b++) |
|
0 |
0 |
if (*a++ != *b++) |
520
|
0 |
0 |
while (len--) |
|
346 |
158 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
8 |
48 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
112 |
18 |
while (len--) |
545
|
0 |
0 |
explicit binary_decoder_error(const char* description) : runtime_error(description) {} |
|
0 |
0 |
explicit binary_decoder_error(const char* description) : runtime_error(description) {} |
|
0 |
0 |
explicit binary_decoder_error(const char* description) : runtime_error(description) {} |
|
0 |
0 |
explicit binary_decoder_error(const char* description) : runtime_error(description) {} |
|
0 |
0 |
explicit binary_decoder_error(const char* description) : runtime_error(description) {} |
|
0 |
0 |
explicit binary_decoder_error(const char* description) : runtime_error(description) {} |
548
|
0 |
0 |
class binary_decoder { |
|
0 |
0 |
class binary_decoder { |
|
0 |
0 |
class binary_decoder { |
|
0 |
0 |
class binary_decoder { |
|
0 |
0 |
class binary_decoder { |
|
0 |
0 |
class binary_decoder { |
|
0 |
0 |
class binary_decoder { |
|
0 |
0 |
class binary_decoder { |
573
|
4 |
0 |
buffer.resize(len); |
581
|
0 |
463 |
if (data + 1 > data_end) throw binary_decoder_error("No more data in binary_decoder"); |
586
|
0 |
30 |
if (data + sizeof(uint16_t) > data_end) throw binary_decoder_error("No more data in binary_decoder"); |
594
|
0 |
472 |
if (data + sizeof(uint32_t) > data_end) throw binary_decoder_error("No more data in binary_decoder"); |
608
|
0 |
140 |
if (data + sizeof(T) * elements > data_end) throw binary_decoder_error("No more data in binary_decoder"); |
|
0 |
79 |
if (data + sizeof(T) * elements > data_end) throw binary_decoder_error("No more data in binary_decoder"); |
623
|
0 |
2 |
if (pos > buffer.size()) throw binary_decoder_error("Cannot seek past end of binary_decoder"); |
857
|
11 |
6 |
while (size) { |
|
0 |
0 |
while (size) { |
|
0 |
0 |
while (size) { |
859
|
6 |
5 |
if (unaligned_load(first + step) < val) { |
|
0 |
0 |
if (unaligned_load(first + step) < val) { |
|
0 |
0 |
if (unaligned_load(first + step) < val) { |
948
|
104 |
32 |
while (mask < num) |
950
|
32 |
0 |
hash.resize(mask + 1); |
954
|
140 |
0 |
uint32_t size = data.next_4B(); |
956
|
140 |
0 |
hash.resize(size); |
957
|
140 |
0 |
memcpy(hash.data(), data.next(size), size * sizeof(uint32_t)); |
959
|
140 |
0 |
size = data.next_4B(); |
960
|
140 |
0 |
this->data.resize(size); |
961
|
71 |
69 |
if (size) memcpy(this->data.data(), data.next(size), size); |
|
71 |
0 |
if (size) memcpy(this->data.data(), data.next(size), size); |
965
|
0 |
0 |
if (len <= 0) return 0; |
|
0 |
0 |
if (len <= 0) return 0; |
|
0 |
0 |
if (len <= 0) return 0; |
|
0 |
0 |
if (len <= 0) return 0; |
|
0 |
0 |
if (len <= 0) return 0; |
|
0 |
0 |
if (len <= 0) return 0; |
|
0 |
0 |
if (len <= 0) return 0; |
|
0 |
0 |
if (len <= 0) return 0; |
|
4 |
0 |
if (len <= 0) return 0; |
|
21 |
0 |
if (len <= 0) return 0; |
|
21 |
15 |
if (len <= 0) return 0; |
|
0 |
0 |
if (len <= 0) return 0; |
|
0 |
0 |
if (len <= 0) return 0; |
|
0 |
0 |
if (len <= 0) return 0; |
|
0 |
0 |
if (len <= 0) return 0; |
|
0 |
0 |
if (len <= 0) return 0; |
|
0 |
0 |
if (len <= 0) return 0; |
|
158 |
0 |
if (len <= 0) return 0; |
|
49 |
0 |
if (len <= 0) return 0; |
|
18 |
0 |
if (len <= 0) return 0; |
|
18 |
0 |
if (len <= 0) return 0; |
966
|
0 |
0 |
if (len == 1) return unaligned_load(data); |
|
0 |
0 |
if (len == 1) return unaligned_load(data); |
|
0 |
0 |
if (len == 1) return unaligned_load(data); |
|
0 |
0 |
if (len == 1) return unaligned_load(data); |
|
0 |
0 |
if (len == 1) return unaligned_load(data); |
|
0 |
0 |
if (len == 1) return unaligned_load(data); |
|
0 |
0 |
if (len == 1) return unaligned_load(data); |
|
0 |
0 |
if (len == 1) return unaligned_load(data); |
|
0 |
4 |
if (len == 1) return unaligned_load(data); |
|
3 |
18 |
if (len == 1) return unaligned_load(data); |
|
15 |
6 |
if (len == 1) return unaligned_load(data); |
|
0 |
0 |
if (len == 1) return unaligned_load(data); |
|
0 |
0 |
if (len == 1) return unaligned_load(data); |
|
0 |
0 |
if (len == 1) return unaligned_load(data); |
|
0 |
0 |
if (len == 1) return unaligned_load(data); |
|
0 |
0 |
if (len == 1) return unaligned_load(data); |
|
0 |
0 |
if (len == 1) return unaligned_load(data); |
|
0 |
158 |
if (len == 1) return unaligned_load(data); |
|
9 |
40 |
if (len == 1) return unaligned_load(data); |
|
0 |
18 |
if (len == 1) return unaligned_load(data); |
|
0 |
18 |
if (len == 1) return unaligned_load(data); |
967
|
0 |
0 |
if (len == 2) return unaligned_load(data); |
|
0 |
0 |
if (len == 2) return unaligned_load(data); |
|
0 |
0 |
if (len == 2) return unaligned_load(data); |
|
0 |
0 |
if (len == 2) return unaligned_load(data); |
|
0 |
0 |
if (len == 2) return unaligned_load(data); |
|
0 |
0 |
if (len == 2) return unaligned_load(data); |
|
0 |
0 |
if (len == 2) return unaligned_load(data); |
|
0 |
0 |
if (len == 2) return unaligned_load(data); |
|
4 |
0 |
if (len == 2) return unaligned_load(data); |
|
17 |
1 |
if (len == 2) return unaligned_load(data); |
|
0 |
6 |
if (len == 2) return unaligned_load(data); |
|
0 |
0 |
if (len == 2) return unaligned_load(data); |
|
0 |
0 |
if (len == 2) return unaligned_load(data); |
|
0 |
0 |
if (len == 2) return unaligned_load(data); |
|
0 |
0 |
if (len == 2) return unaligned_load(data); |
|
0 |
0 |
if (len == 2) return unaligned_load(data); |
|
0 |
0 |
if (len == 2) return unaligned_load(data); |
|
30 |
128 |
if (len == 2) return unaligned_load(data); |
|
34 |
6 |
if (len == 2) return unaligned_load(data); |
|
18 |
0 |
if (len == 2) return unaligned_load(data); |
|
18 |
0 |
if (len == 2) return unaligned_load(data); |
970
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
24 |
4 |
while (len--) |
|
93 |
17 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
0 |
0 |
while (len--) |
|
90 |
30 |
while (len--) |
|
144 |
34 |
while (len--) |
|
112 |
18 |
while (len--) |
|
112 |
18 |
while (len--) |
984
|
0 |
0 |
if (unsigned(len) >= hashes.size()) return nullptr; |
|
0 |
0 |
if (unsigned(len) >= hashes.size()) return nullptr; |
|
0 |
0 |
if (unsigned(len) >= hashes.size()) return nullptr; |
|
0 |
0 |
if (unsigned(len) >= hashes.size()) return nullptr; |
|
0 |
0 |
if (unsigned(len) >= hashes.size()) return nullptr; |
|
0 |
0 |
if (unsigned(len) >= hashes.size()) return nullptr; |
|
0 |
0 |
if (unsigned(len) >= hashes.size()) return nullptr; |
|
0 |
0 |
if (unsigned(len) >= hashes.size()) return nullptr; |
|
36 |
0 |
if (unsigned(len) >= hashes.size()) return nullptr; |
|
0 |
0 |
if (unsigned(len) >= hashes.size()) return nullptr; |
|
0 |
0 |
if (unsigned(len) >= hashes.size()) return nullptr; |
990
|
0 |
0 |
if (len <= 2) |
|
0 |
0 |
if (len <= 2) |
|
0 |
0 |
if (len <= 2) |
|
0 |
0 |
if (len <= 2) |
|
0 |
0 |
if (len <= 2) |
|
0 |
0 |
if (len <= 2) |
|
0 |
0 |
if (len <= 2) |
|
0 |
0 |
if (len <= 2) |
|
0 |
36 |
if (len <= 2) |
|
0 |
0 |
if (len <= 2) |
|
0 |
0 |
if (len <= 2) |
991
|
0 |
0 |
return data != end ? data + len : nullptr; |
|
0 |
0 |
return data != end ? data + len : nullptr; |
|
0 |
0 |
return data != end ? data + len : nullptr; |
|
0 |
0 |
return data != end ? data + len : nullptr; |
|
0 |
0 |
return data != end ? data + len : nullptr; |
|
0 |
0 |
return data != end ? data + len : nullptr; |
|
0 |
0 |
return data != end ? data + len : nullptr; |
|
0 |
0 |
return data != end ? data + len : nullptr; |
|
21 |
15 |
return data != end ? data + len : nullptr; |
|
0 |
0 |
return data != end ? data + len : nullptr; |
|
0 |
0 |
return data != end ? data + len : nullptr; |
993
|
0 |
0 |
while (data < end) { |
|
0 |
0 |
while (data < end) { |
|
0 |
0 |
while (data < end) { |
|
0 |
0 |
while (data < end) { |
|
0 |
0 |
while (data < end) { |
|
0 |
0 |
while (data < end) { |
|
0 |
0 |
while (data < end) { |
|
0 |
0 |
while (data < end) { |
|
0 |
0 |
while (data < end) { |
|
0 |
0 |
while (data < end) { |
|
0 |
0 |
while (data < end) { |
994
|
0 |
0 |
if (small_memeq(str, data, len)) return data + len; |
|
0 |
0 |
if (small_memeq(str, data, len)) return data + len; |
|
0 |
0 |
if (small_memeq(str, data, len)) return data + len; |
|
0 |
0 |
if (small_memeq(str, data, len)) return data + len; |
|
0 |
0 |
if (small_memeq(str, data, len)) return data + len; |
|
0 |
0 |
if (small_memeq(str, data, len)) return data + len; |
|
0 |
0 |
if (small_memeq(str, data, len)) return data + len; |
|
0 |
0 |
if (small_memeq(str, data, len)) return data + len; |
|
0 |
0 |
if (small_memeq(str, data, len)) return data + len; |
|
0 |
0 |
if (small_memeq(str, data, len)) return data + len; |
|
0 |
0 |
if (small_memeq(str, data, len)) return data + len; |
1005
|
158 |
0 |
if (unsigned(len) >= hashes.size()) return nullptr; |
|
49 |
71 |
if (unsigned(len) >= hashes.size()) return nullptr; |
1011
|
30 |
128 |
if (len <= 2) |
|
34 |
15 |
if (len <= 2) |
1012
|
88 |
40 |
return data != end ? (const T*)(data + len) : nullptr; |
|
15 |
0 |
return data != end ? (const T*)(data + len) : nullptr; |
1014
|
26 |
18 |
while (data < end) { |
|
39 |
4 |
while (data < end) { |
1015
|
12 |
14 |
if (small_memeq(str, data, len)) return (const T*)(data + len); |
|
30 |
9 |
if (small_memeq(str, data, len)) return (const T*)(data + len); |
1024
|
4 |
0 |
if (unsigned(len) >= hashes.size()) return; |
|
21 |
0 |
if (unsigned(len) >= hashes.size()) return; |
|
0 |
0 |
if (unsigned(len) >= hashes.size()) return; |
|
0 |
0 |
if (unsigned(len) >= hashes.size()) return; |
|
0 |
0 |
if (unsigned(len) >= hashes.size()) return; |
|
0 |
0 |
if (unsigned(len) >= hashes.size()) return; |
1030
|
4 |
4 |
while (data < end) { |
|
20 |
21 |
while (data < end) { |
|
0 |
0 |
while (data < end) { |
|
0 |
0 |
while (data < end) { |
|
0 |
0 |
while (data < end) { |
|
0 |
0 |
while (data < end) { |
1040
|
6 |
2 |
for (unsigned len = 0; len < hashes.size(); len++) { |
|
0 |
0 |
for (unsigned len = 0; len < hashes.size(); len++) { |
|
0 |
0 |
for (unsigned len = 0; len < hashes.size(); len++) { |
1044
|
26 |
6 |
while (data < end) { |
|
0 |
0 |
while (data < end) { |
|
0 |
0 |
while (data < end) { |
1058
|
14 |
0 |
return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr; |
|
0 |
0 |
return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr; |
|
0 |
0 |
return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr; |
|
0 |
0 |
return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr; |
|
0 |
0 |
return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr; |
|
0 |
0 |
return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr; |
|
0 |
0 |
return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr; |
1062
|
4 |
28 |
if (hashes.size() == 0) hashes.emplace_back(1); |
1063
|
4 |
24 |
else if (hashes.size() == 1) hashes.emplace_back(1<<8); |
1064
|
4 |
20 |
else if (hashes.size() == 2) hashes.emplace_back(1<<16); |
1069
|
18 |
0 |
if (unsigned(str_len) < hashes.size()) |
1074
|
32 |
4 |
for (auto&& hash : hashes) { |
1076
|
263232 |
32 |
for (auto&& len : hash.hash) total += len, len = total - len; |
1082
|
18 |
0 |
if (unsigned(str_len) < hashes.size()) { |
1093
|
32 |
4 |
for (auto&& hash : hashes) |
1094
|
263232 |
32 |
for (int i = hash.hash.size() - 1; i >= 0; i--) |
1095
|
263200 |
32 |
hash.hash[i] = i > 0 ? hash.hash[i-1] : 0; |
1102
|
140 |
49 |
for (unsigned i = 0; i < sizes; i++) |
1172
|
0 |
0 |
if (dictionary) lemma.len = dictionary->lemma_id_len(lemma); |
1179
|
0 |
0 |
if (lemma_data) { |
1181
|
0 |
0 |
if (parent_encoded) { |
1185
|
0 |
0 |
if (parent_data[parent_len]) |
1195
|
0 |
0 |
if (dictionary) lemma.len = dictionary->lemma_id_len(lemma); |
1202
|
0 |
0 |
if (lemma_data) { |
1205
|
0 |
0 |
if (children_len) { |
1207
|
0 |
0 |
for (unsigned i = 0; i < children_len; i++) { |
1211
|
0 |
0 |
if (child_data[child_len]) |
1223
|
0 |
0 |
if (!compressor::load(is, data)) return false; |
|
0 |
0 |
if (!compressor::load(is, data)) return false; |
1226
|
0 |
0 |
for (int i = data.next_1B(); i > 0; i--) |
|
0 |
0 |
for (int i = data.next_1B(); i > 0; i--) |
1227
|
0 |
0 |
derinet.resize(data.next_4B()); |
|
0 |
0 |
derinet.resize(data.next_4B()); |
1231
|
0 |
0 |
for (int pass = 1; pass <= 3; pass++) { |
1232
|
0 |
0 |
if (pass > 1) data.seek(data_position); |
|
0 |
0 |
if (pass > 1) data.seek(data_position); |
1235
|
0 |
0 |
for (int i = data.next_4B(); i > 0; i--) { |
|
0 |
0 |
for (int i = data.next_4B(); i > 0; i--) { |
1236
|
0 |
0 |
lemma.resize(lemma.size() - data.next_1B()); |
|
0 |
0 |
lemma.resize(lemma.size() - data.next_1B()); |
1237
|
0 |
0 |
for (int i = data.next_1B(); i > 0; i--) |
|
0 |
0 |
for (int i = data.next_1B(); i > 0; i--) |
1238
|
0 |
0 |
lemma.push_back(data.next_1B()); |
1240
|
0 |
0 |
unsigned char lemma_comment_len = data.next_1B(); |
1241
|
0 |
0 |
const char* lemma_comment = lemma_comment_len ? data.next(lemma_comment_len) : nullptr; |
|
0 |
0 |
const char* lemma_comment = lemma_comment_len ? data.next(lemma_comment_len) : nullptr; |
1243
|
0 |
0 |
unsigned children = data.next_2B(); |
1245
|
0 |
0 |
if (pass == 3) parent.clear(); |
1247
|
0 |
0 |
int operations = data.next_1B(); |
1248
|
0 |
0 |
if (operations) { |
1249
|
0 |
0 |
int remove_start = operations & REMOVE_START ? data.next_1B() : 0; |
|
0 |
0 |
int remove_start = operations & REMOVE_START ? data.next_1B() : 0; |
1250
|
0 |
0 |
int remove_end = operations & REMOVE_END ? data.next_1B() : 0; |
|
0 |
0 |
int remove_end = operations & REMOVE_END ? data.next_1B() : 0; |
1251
|
0 |
0 |
if (operations & ADD_START) { |
1252
|
0 |
0 |
int add_start = data.next_1B(); |
1253
|
0 |
0 |
const char* str = data.next(add_start); |
1254
|
0 |
0 |
if (pass == 3) parent.assign(str, str + add_start); |
1256
|
0 |
0 |
if (pass == 3) parent.insert(parent.end(), lemma.begin() + remove_start, lemma.end() - remove_end); |
|
0 |
0 |
if (pass == 3) parent.insert(parent.end(), lemma.begin() + remove_start, lemma.end() - remove_end); |
1257
|
0 |
0 |
if (operations & ADD_END) { |
1258
|
0 |
0 |
int add_end = data.next_1B(); |
1259
|
0 |
0 |
const char* str = data.next(add_end); |
1260
|
0 |
0 |
if (pass == 3) parent.insert(parent.end(), str, str + add_end); |
1264
|
0 |
0 |
if (pass == 1) { |
1266
|
0 |
0 |
} else if (pass == 2) { |
1269
|
0 |
0 |
while (lemma_comment_len--) *lemma_data++ = *lemma_comment++; |
1272
|
0 |
0 |
if (children) unaligned_store(((uint32_t*)lemma_data) + children - 1, 0); |
1273
|
0 |
0 |
} else if (pass == 3 && !parent.empty()) { |
|
0 |
0 |
} else if (pass == 3 && !parent.empty()) { |
|
0 |
0 |
} else if (pass == 3 && !parent.empty()) { |
1284
|
0 |
0 |
assert(lemma_data && parent_data); |
1287
|
0 |
0 |
assert(parent.size() < (1<<8) && parent_offset < (1<<24)); |
|
0 |
0 |
assert(parent.size() < (1<<8) && parent_offset < (1<<24)); |
1291
|
0 |
0 |
assert(lemma.size() < (1<<8) && lemma_offset < (1<<24)); |
|
0 |
0 |
assert(lemma.size() < (1<<8) && lemma_offset < (1<<24)); |
1296
|
0 |
0 |
if (child_index+1 < children_len) |
1301
|
0 |
0 |
if (pass == 1) |
1302
|
0 |
0 |
derinet.done_adding(); |
1303
|
0 |
0 |
if (pass == 2) |
1305
|
0 |
0 |
} |
1374
|
0 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
14 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
0 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
3 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
15 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
3 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
115 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
8 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
8 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
3 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
6 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
6 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
0 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
0 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
0 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
0 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
0 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
0 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
0 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
0 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
0 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
0 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
0 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
0 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
0 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
0 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
0 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
0 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
13 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
|
34 |
0 |
return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT; |
1378
|
13 |
0 |
if (chr < CHARS) { |
1380
|
3 |
10 |
if ((othercase & 0xFF) == othercase_type::LOWER_ONLY) return othercase >> 8; |
1381
|
0 |
10 |
if ((othercase & 0xFF) == othercase_type::LOWER_THEN_UPPER) return othercase >> 8; |
1382
|
0 |
10 |
if ((othercase & 0xFF) == othercase_type::TITLE_THEN_LOWER) return othercase_block[othercase_index[(othercase >> 8) >> 8]][(othercase >> 8) & 0xFF] >> 8; |
1487
|
0 |
0 |
if (((unsigned char)*str) < 0x80) return (unsigned char)*str++; |
1488
|
0 |
0 |
else if (((unsigned char)*str) < 0xC0) return ++str, REPLACEMENT_CHAR; |
1489
|
0 |
0 |
else if (((unsigned char)*str) < 0xE0) { |
1491
|
0 |
0 |
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
0 |
0 |
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
1493
|
0 |
0 |
} else if (((unsigned char)*str) < 0xF0) { |
1495
|
0 |
0 |
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
0 |
0 |
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
1497
|
0 |
0 |
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
0 |
0 |
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
1499
|
0 |
0 |
} else if (((unsigned char)*str) < 0xF8) { |
1501
|
0 |
0 |
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
0 |
0 |
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
1503
|
0 |
0 |
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
0 |
0 |
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
1505
|
0 |
0 |
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
0 |
0 |
if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
1511
|
218 |
1 |
if (!len) return 0; |
1513
|
193 |
25 |
if (((unsigned char)*str) < 0x80) return (unsigned char)*str++; |
1514
|
0 |
25 |
else if (((unsigned char)*str) < 0xC0) return ++str, REPLACEMENT_CHAR; |
1515
|
22 |
3 |
else if (((unsigned char)*str) < 0xE0) { |
1517
|
22 |
0 |
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
22 |
0 |
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
22 |
0 |
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
1519
|
3 |
0 |
} else if (((unsigned char)*str) < 0xF0) { |
1521
|
3 |
0 |
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
3 |
0 |
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
3 |
0 |
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
1523
|
3 |
0 |
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
3 |
0 |
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
3 |
0 |
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
1525
|
0 |
0 |
} else if (((unsigned char)*str) < 0xF8) { |
1527
|
0 |
0 |
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
0 |
0 |
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
0 |
0 |
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
1529
|
0 |
0 |
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
0 |
0 |
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
0 |
0 |
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
1531
|
0 |
0 |
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
0 |
0 |
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
|
0 |
0 |
if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR; |
1622
|
13 |
0 |
if (chr < 0x80) str += chr; |
1623
|
0 |
0 |
else if (chr < 0x800) { str += 0xC0 + (chr >> 6); str += 0x80 + (chr & 0x3F); } |
1624
|
0 |
0 |
else if (chr < 0x10000) { str += 0xE0 + (chr >> 12); str += 0x80 + ((chr >> 6) & 0x3F); str += 0x80 + (chr & 0x3F); } |
1625
|
0 |
0 |
else if (chr < 0x200000) { str += 0xF0 + (chr >> 18); str += 0x80 + ((chr >> 12) & 0x3F); str += 0x80 + ((chr >> 6) & 0x3F); str += 0x80 + (chr & 0x3F); } |
1639
|
0 |
0 |
while (len) |
1671
|
34 |
13 |
while (form_tmp.len && !rest_has_Lut) |
|
34 |
0 |
while (form_tmp.len && !rest_has_Lut) |
1680
|
2 |
11 |
if (first_Lut && !rest_has_Lut) { // common case allowing fast execution |
1685
|
0 |
11 |
} else if (!first_Lut && rest_has_Lut) { |
1688
|
0 |
11 |
} else if (first_Lut && rest_has_Lut) { |
1695
|
0 |
0 |
while (form_tmp.len) { |
1732
|
0 |
0 |
for (unsigned len = 1; len < lemma.len; len++) |
1733
|
0 |
0 |
if (lemma.str[len] == '`' || lemma.str[len] == '_' || |
|
0 |
0 |
if (lemma.str[len] == '`' || lemma.str[len] == '_' || |
1734
|
0 |
0 |
(lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9')) |
|
0 |
0 |
(lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9')) |
|
0 |
0 |
(lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9')) |
1741
|
0 |
0 |
for (unsigned len = 1; len < lemma.len; len++) { |
1742
|
0 |
0 |
if (lemma.str[len] == '`' || lemma.str[len] == '_') |
1744
|
0 |
0 |
if (lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9') { |
|
0 |
0 |
if (lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9') { |
|
0 |
0 |
if (lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9') { |
|
0 |
0 |
if (lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9') { |
1746
|
0 |
0 |
while (len < lemma.len && lemma.str[len] >= '0' && lemma.str[len] <= '9') len++; |
|
0 |
0 |
while (len < lemma.len && lemma.str[len] >= '0' && lemma.str[len] <= '9') len++; |
|
0 |
0 |
while (len < lemma.len && lemma.str[len] >= '0' && lemma.str[len] <= '9') len++; |
1756
|
0 |
0 |
if (addinfo_len) { |
1757
|
0 |
0 |
res.reserve(addinfo_len + 4); |
1758
|
0 |
0 |
if (addinfo[0] != 255) { |
1763
|
0 |
0 |
for (int i = 1; i < addinfo_len; i++) |
1771
|
0 |
0 |
for (int i = 1; i + 2 < addinfo_len; i++) |
1772
|
0 |
0 |
if (addinfo[i] == '_' && addinfo[i+1] == ',' && addinfo[i+2] == 'x') |
|
0 |
0 |
if (addinfo[i] == '_' && addinfo[i+1] == ',' && addinfo[i+2] == 'x') |
|
0 |
0 |
if (addinfo[i] == '_' && addinfo[i+1] == ',' && addinfo[i+2] == 'x') |
1782
|
0 |
0 |
if (lemma_info < lemma.str + lemma.len) { |
1786
|
0 |
0 |
if (*lemma_info == '-') { |
1789
|
0 |
0 |
lemma_additional_info < lemma.str + lemma.len && (*lemma_additional_info >= '0' && *lemma_additional_info <= '9'); |
|
0 |
0 |
lemma_additional_info < lemma.str + lemma.len && (*lemma_additional_info >= '0' && *lemma_additional_info <= '9'); |
1793
|
0 |
0 |
if (lemma_additional_info == lemma_info + 1 || (lemma_additional_info < lemma.str + lemma.len && *lemma_additional_info != '`' && *lemma_additional_info != '_') || lemma_num >= 255) { |
|
0 |
0 |
if (lemma_additional_info == lemma_info + 1 || (lemma_additional_info < lemma.str + lemma.len && *lemma_additional_info != '`' && *lemma_additional_info != '_') || lemma_num >= 255) { |
|
0 |
0 |
if (lemma_additional_info == lemma_info + 1 || (lemma_additional_info < lemma.str + lemma.len && *lemma_additional_info != '`' && *lemma_additional_info != '_') || lemma_num >= 255) { |
|
0 |
0 |
if (lemma_additional_info == lemma_info + 1 || (lemma_additional_info < lemma.str + lemma.len && *lemma_additional_info != '`' && *lemma_additional_info != '_') || lemma_num >= 255) { |
|
0 |
0 |
if (lemma_additional_info == lemma_info + 1 || (lemma_additional_info < lemma.str + lemma.len && *lemma_additional_info != '`' && *lemma_additional_info != '_') || lemma_num >= 255) { |
1794
|
0 |
0 |
if (die_on_failure) |
1801
|
0 |
0 |
while (lemma_additional_info < lemma.str + lemma.len) |
1804
|
0 |
0 |
if (data.size() > 255) { |
1805
|
0 |
0 |
if (die_on_failure) |
1816
|
0 |
0 |
if (data.empty()) return true; |
1817
|
0 |
0 |
if (data[0] != 255 && (!other_addinfo_len || other_addinfo[0] != data[0])) return false; |
|
0 |
0 |
if (data[0] != 255 && (!other_addinfo_len || other_addinfo[0] != data[0])) return false; |
|
0 |
0 |
if (data[0] != 255 && (!other_addinfo_len || other_addinfo[0] != data[0])) return false; |
|
0 |
0 |
if (data[0] != 255 && (!other_addinfo_len || other_addinfo[0] != data[0])) return false; |
1857
|
20 |
20 |
if (filters.empty()) return true; |
1860
|
36 |
3 |
for (auto&& filter : filters) { |
1862
|
30 |
36 |
while (tag_pos < filter.pos) |
1863
|
30 |
0 |
if (!tag[tag_pos++]) |
1865
|
36 |
0 |
if (!tag[tag_pos]) |
1870
|
7 |
35 |
for (int i = 1; i < filter.chars_len && ((!matched) ^ filter.negate); i++) |
|
6 |
1 |
for (int i = 1; i < filter.chars_len && ((!matched) ^ filter.negate); i++) |
1872
|
19 |
17 |
if (!matched) return false; |
1908
|
14 |
2 |
for (int i = data.next_1B(); i > 0; i--) |
|
0 |
0 |
for (int i = data.next_1B(); i > 0; i--) |
|
0 |
0 |
for (int i = data.next_1B(); i > 0; i--) |
1910
|
18 |
2 |
for (int i = data.next_1B(); i > 0; i--) |
|
0 |
0 |
for (int i = data.next_1B(); i > 0; i--) |
|
0 |
0 |
for (int i = data.next_1B(); i > 0; i--) |
1916
|
2 |
0 |
vector root(max(lemmas.max_length(), roots.max_length())); |
|
0 |
0 |
vector root(max(lemmas.max_length(), roots.max_length())); |
|
0 |
0 |
vector root(max(lemmas.max_length(), roots.max_length())); |
1918
|
4 |
2 |
for (int pass = 1; pass <= 2; pass++) { |
|
0 |
0 |
for (int pass = 1; pass <= 2; pass++) { |
|
0 |
0 |
for (int pass = 1; pass <= 2; pass++) { |
1919
|
2 |
2 |
if (pass > 1) data.seek(data_position); |
|
2 |
0 |
if (pass > 1) data.seek(data_position); |
|
0 |
0 |
if (pass > 1) data.seek(data_position); |
|
0 |
0 |
if (pass > 1) data.seek(data_position); |
|
0 |
0 |
if (pass > 1) data.seek(data_position); |
|
0 |
0 |
if (pass > 1) data.seek(data_position); |
1924
|
4 |
0 |
for (int i = data.next_4B(); i > 0; i--) { |
|
8 |
4 |
for (int i = data.next_4B(); i > 0; i--) { |
|
0 |
0 |
for (int i = data.next_4B(); i > 0; i--) { |
|
0 |
0 |
for (int i = data.next_4B(); i > 0; i--) { |
|
0 |
0 |
for (int i = data.next_4B(); i > 0; i--) { |
|
0 |
0 |
for (int i = data.next_4B(); i > 0; i--) { |
1925
|
8 |
0 |
lemma_len -= data.next_1B(); |
|
0 |
0 |
lemma_len -= data.next_1B(); |
|
0 |
0 |
lemma_len -= data.next_1B(); |
1926
|
8 |
0 |
for (int i = data.next_1B(); i > 0; i--) |
|
48 |
8 |
for (int i = data.next_1B(); i > 0; i--) |
|
0 |
0 |
for (int i = data.next_1B(); i > 0; i--) |
|
0 |
0 |
for (int i = data.next_1B(); i > 0; i--) |
|
0 |
0 |
for (int i = data.next_1B(); i > 0; i--) |
|
0 |
0 |
for (int i = data.next_1B(); i > 0; i--) |
1927
|
48 |
0 |
lemma[lemma_len++] = data.next_1B(); |
|
0 |
0 |
lemma[lemma_len++] = data.next_1B(); |
|
0 |
0 |
lemma[lemma_len++] = data.next_1B(); |
1928
|
8 |
0 |
unsigned char lemma_info_len = data.next_1B(); |
|
0 |
0 |
unsigned char lemma_info_len = data.next_1B(); |
|
0 |
0 |
unsigned char lemma_info_len = data.next_1B(); |
1929
|
0 |
8 |
const char* lemma_info = lemma_info_len ? data.next(lemma_info_len) : nullptr; |
|
0 |
0 |
const char* lemma_info = lemma_info_len ? data.next(lemma_info_len) : nullptr; |
|
0 |
0 |
const char* lemma_info = lemma_info_len ? data.next(lemma_info_len) : nullptr; |
|
0 |
0 |
const char* lemma_info = lemma_info_len ? data.next(lemma_info_len) : nullptr; |
|
0 |
0 |
const char* lemma_info = lemma_info_len ? data.next(lemma_info_len) : nullptr; |
|
0 |
0 |
const char* lemma_info = lemma_info_len ? data.next(lemma_info_len) : nullptr; |
1930
|
8 |
0 |
unsigned lemma_roots = data.next_1B(); |
|
0 |
0 |
unsigned lemma_roots = data.next_1B(); |
|
0 |
0 |
unsigned lemma_roots = data.next_1B(); |
1935
|
4 |
4 |
if (pass == 1) { |
|
0 |
0 |
if (pass == 1) { |
|
0 |
0 |
if (pass == 1) { |
1942
|
0 |
4 |
if (lemma_info_len) small_memcpy(lemma_data, lemma_info, lemma_info_len), lemma_data += lemma_info_len; |
|
0 |
0 |
if (lemma_info_len) small_memcpy(lemma_data, lemma_info, lemma_info_len), lemma_data += lemma_info_len; |
|
0 |
0 |
if (lemma_info_len) small_memcpy(lemma_data, lemma_info, lemma_info_len), lemma_data += lemma_info_len; |
1947
|
28 |
8 |
for (unsigned i = 0; i < lemma_roots; i++) { |
|
0 |
0 |
for (unsigned i = 0; i < lemma_roots; i++) { |
|
0 |
0 |
for (unsigned i = 0; i < lemma_roots; i++) { |
1949
|
28 |
0 |
int operations = data.next_1B(); |
|
0 |
0 |
int operations = data.next_1B(); |
|
0 |
0 |
int operations = data.next_1B(); |
1950
|
0 |
28 |
if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; } |
|
0 |
0 |
if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; } |
|
0 |
0 |
if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; } |
|
0 |
0 |
if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; } |
|
0 |
0 |
if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; } |
|
0 |
0 |
if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; } |
|
0 |
0 |
if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; } |
|
0 |
0 |
if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; } |
|
0 |
0 |
if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; } |
1951
|
24 |
4 |
if (operations & REMOVE_END) root_len -= data.next_1B(); |
|
24 |
0 |
if (operations & REMOVE_END) root_len -= data.next_1B(); |
|
0 |
0 |
if (operations & REMOVE_END) root_len -= data.next_1B(); |
|
0 |
0 |
if (operations & REMOVE_END) root_len -= data.next_1B(); |
|
0 |
0 |
if (operations & REMOVE_END) root_len -= data.next_1B(); |
|
0 |
0 |
if (operations & REMOVE_END) root_len -= data.next_1B(); |
1952
|
0 |
28 |
if (operations & ADD_START) { |
|
0 |
0 |
if (operations & ADD_START) { |
|
0 |
0 |
if (operations & ADD_START) { |
1953
|
0 |
0 |
int from = root_len, to = from + data.next_1B(); while (from > 0) root[--to] = root[--from]; root_len += to; |
|
0 |
0 |
int from = root_len, to = from + data.next_1B(); while (from > 0) root[--to] = root[--from]; root_len += to; |
|
0 |
0 |
int from = root_len, to = from + data.next_1B(); while (from > 0) root[--to] = root[--from]; root_len += to; |
|
0 |
0 |
int from = root_len, to = from + data.next_1B(); while (from > 0) root[--to] = root[--from]; root_len += to; |
|
0 |
0 |
int from = root_len, to = from + data.next_1B(); while (from > 0) root[--to] = root[--from]; root_len += to; |
|
0 |
0 |
int from = root_len, to = from + data.next_1B(); while (from > 0) root[--to] = root[--from]; root_len += to; |
1954
|
0 |
0 |
for (int i = 0; i < to; i++) root[i] = data.next_1B(); |
|
0 |
0 |
for (int i = 0; i < to; i++) root[i] = data.next_1B(); |
|
0 |
0 |
for (int i = 0; i < to; i++) root[i] = data.next_1B(); |
|
0 |
0 |
for (int i = 0; i < to; i++) root[i] = data.next_1B(); |
|
0 |
0 |
for (int i = 0; i < to; i++) root[i] = data.next_1B(); |
|
0 |
0 |
for (int i = 0; i < to; i++) root[i] = data.next_1B(); |
1956
|
28 |
0 |
if (operations & ADD_END) |
|
0 |
0 |
if (operations & ADD_END) |
|
0 |
0 |
if (operations & ADD_END) |
1957
|
28 |
0 |
for (int len = data.next_1B(); len > 0; len--) |
|
56 |
28 |
for (int len = data.next_1B(); len > 0; len--) |
|
0 |
0 |
for (int len = data.next_1B(); len > 0; len--) |
|
0 |
0 |
for (int len = data.next_1B(); len > 0; len--) |
|
0 |
0 |
for (int len = data.next_1B(); len > 0; len--) |
|
0 |
0 |
for (int len = data.next_1B(); len > 0; len--) |
1958
|
56 |
0 |
root[root_len++] = data.next_1B(); |
|
0 |
0 |
root[root_len++] = data.next_1B(); |
|
0 |
0 |
root[root_len++] = data.next_1B(); |
1959
|
28 |
0 |
uint16_t clas = data.next_2B(); |
|
0 |
0 |
uint16_t clas = data.next_2B(); |
|
0 |
0 |
uint16_t clas = data.next_2B(); |
1961
|
14 |
14 |
if (pass == 1) { // for each root |
|
0 |
0 |
if (pass == 1) { // for each root |
|
0 |
0 |
if (pass == 1) { // for each root |
1970
|
0 |
14 |
assert(uint8_t(lemma_len) == lemma_len); |
|
0 |
0 |
assert(uint8_t(lemma_len) == lemma_len); |
|
0 |
0 |
assert(uint8_t(lemma_len) == lemma_len); |
1975
|
0 |
14 |
assert(uint8_t(root_len) == root_len); |
|
0 |
0 |
assert(uint8_t(root_len) == root_len); |
|
0 |
0 |
assert(uint8_t(root_len) == root_len); |
1980
|
2 |
2 |
if (pass == 1) { // after the whole pass |
|
0 |
0 |
if (pass == 1) { // after the whole pass |
|
0 |
0 |
if (pass == 1) { // after the whole pass |
1981
|
2 |
0 |
lemmas.done_adding(); |
|
0 |
0 |
lemmas.done_adding(); |
|
0 |
0 |
lemmas.done_adding(); |
1982
|
2 |
0 |
roots.done_adding(); |
|
0 |
0 |
roots.done_adding(); |
|
0 |
0 |
roots.done_adding(); |
1990
|
2 |
0 |
tags.resize(data.next_2B()); |
|
2 |
0 |
tags.resize(data.next_2B()); |
|
0 |
0 |
tags.resize(data.next_2B()); |
|
0 |
0 |
tags.resize(data.next_2B()); |
|
0 |
0 |
tags.resize(data.next_2B()); |
|
0 |
0 |
tags.resize(data.next_2B()); |
1991
|
40 |
2 |
for (auto&& tag : tags) { |
|
0 |
0 |
for (auto&& tag : tags) { |
|
0 |
0 |
for (auto&& tag : tags) { |
1992
|
40 |
0 |
tag.resize(data.next_1B()); |
|
0 |
0 |
tag.resize(data.next_1B()); |
|
0 |
0 |
tag.resize(data.next_1B()); |
1993
|
120 |
40 |
for (unsigned i = 0; i < tag.size(); i++) |
|
0 |
0 |
for (unsigned i = 0; i < tag.size(); i++) |
|
0 |
0 |
for (unsigned i = 0; i < tag.size(); i++) |
1994
|
120 |
0 |
tag[i] = data.next_1B(); |
|
0 |
0 |
tag[i] = data.next_1B(); |
|
0 |
0 |
tag[i] = data.next_1B(); |
1998
|
2 |
0 |
suffixes.load(data); |
|
0 |
0 |
suffixes.load(data); |
|
0 |
0 |
suffixes.load(data); |
2001
|
2 |
0 |
suffixes.iter_all([this](const char* suffix, int len, pointer_decoder& data) mutable { |
|
0 |
0 |
suffixes.iter_all([this](const char* suffix, int len, pointer_decoder& data) mutable { |
|
0 |
0 |
suffixes.iter_all([this](const char* suffix, int len, pointer_decoder& data) mutable { |
2008
|
30 |
26 |
for (unsigned i = 0; i < classes_len; i++) { |
|
0 |
0 |
for (unsigned i = 0; i < classes_len; i++) { |
|
0 |
0 |
for (unsigned i = 0; i < classes_len; i++) { |
2010
|
10 |
20 |
if (classes_ptr_i >= classes.size()) classes.resize(classes_ptr_i + 1); |
|
10 |
0 |
if (classes_ptr_i >= classes.size()) classes.resize(classes_ptr_i + 1); |
|
0 |
0 |
if (classes_ptr_i >= classes.size()) classes.resize(classes_ptr_i + 1); |
|
0 |
0 |
if (classes_ptr_i >= classes.size()) classes.resize(classes_ptr_i + 1); |
|
0 |
0 |
if (classes_ptr_i >= classes.size()) classes.resize(classes_ptr_i + 1); |
|
0 |
0 |
if (classes_ptr_i >= classes.size()) classes.resize(classes_ptr_i + 1); |
2011
|
30 |
0 |
classes[classes_ptr_i].emplace_back(suffix_str, vector()); |
|
0 |
0 |
classes[classes_ptr_i].emplace_back(suffix_str, vector()); |
|
0 |
0 |
classes[classes_ptr_i].emplace_back(suffix_str, vector()); |
2012
|
40 |
30 |
for (const uint16_t* ptr = tags_ptr + unaligned_load(indices_ptr + i), |
|
0 |
0 |
for (const uint16_t* ptr = tags_ptr + unaligned_load(indices_ptr + i), |
|
0 |
0 |
for (const uint16_t* ptr = tags_ptr + unaligned_load(indices_ptr + i), |
2015
|
40 |
0 |
classes[classes_ptr_i].back().second.emplace_back(unaligned_load(ptr)); |
|
0 |
0 |
classes[classes_ptr_i].back().second.emplace_back(unaligned_load(ptr)); |
|
0 |
0 |
classes[classes_ptr_i].back().second.emplace_back(unaligned_load(ptr)); |
2025
|
0 |
15 |
uint16_t** suff = max_suffix_len <= 16 ? suff_stack : (suff_heap.resize(max_suffix_len), suff_heap.data()); |
|
0 |
0 |
uint16_t** suff = max_suffix_len <= 16 ? suff_stack : (suff_heap.resize(max_suffix_len), suff_heap.data()); |
|
0 |
0 |
uint16_t** suff = max_suffix_len <= 16 ? suff_stack : (suff_heap.resize(max_suffix_len), suff_heap.data()); |
|
0 |
0 |
uint16_t** suff = max_suffix_len <= 16 ? suff_stack : (suff_heap.resize(max_suffix_len), suff_heap.data()); |
|
0 |
0 |
uint16_t** suff = max_suffix_len <= 16 ? suff_stack : (suff_heap.resize(max_suffix_len), suff_heap.data()); |
|
0 |
0 |
uint16_t** suff = max_suffix_len <= 16 ? suff_stack : (suff_heap.resize(max_suffix_len), suff_heap.data()); |
2027
|
36 |
0 |
for (int i = form.len; i >= 0 && suff_len < max_suffix_len; i--, suff_len++) { |
|
0 |
0 |
for (int i = form.len; i >= 0 && suff_len < max_suffix_len; i--, suff_len++) { |
|
0 |
0 |
for (int i = form.len; i >= 0 && suff_len < max_suffix_len; i--, suff_len++) { |
2035
|
21 |
15 |
for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++) |
|
0 |
21 |
for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++) |
|
21 |
15 |
for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++) |
|
0 |
0 |
for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++) |
|
0 |
0 |
for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++) |
|
0 |
0 |
for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++) |
|
0 |
0 |
for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++) |
|
0 |
0 |
for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++) |
|
0 |
0 |
for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++) |
2036
|
21 |
0 |
if (unaligned_load(suff[suff_len])) { |
|
0 |
0 |
if (unaligned_load(suff[suff_len])) { |
|
0 |
0 |
if (unaligned_load(suff[suff_len])) { |
2040
|
21 |
0 |
roots.iter(form.str, root_len, [&](const char* root, pointer_decoder& root_data) { |
|
0 |
0 |
roots.iter(form.str, root_len, [&](const char* root, pointer_decoder& root_data) { |
|
0 |
0 |
roots.iter(form.str, root_len, [&](const char* root, pointer_decoder& root_data) { |
2045
|
6 |
14 |
if (small_memeq(form.str, root, root_len)) { |
|
0 |
0 |
if (small_memeq(form.str, root, root_len)) { |
|
0 |
0 |
if (small_memeq(form.str, root, root_len)) { |
2047
|
6 |
0 |
if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) { |
|
0 |
6 |
if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) { |
|
6 |
0 |
if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) { |
|
0 |
0 |
if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) { |
|
0 |
0 |
if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) { |
|
0 |
0 |
if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) { |
|
0 |
0 |
if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) { |
|
0 |
0 |
if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) { |
|
0 |
0 |
if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) { |
2050
|
0 |
6 |
if (lemma_data[lemma_len]) lemma += LemmaAddinfo::format(lemma_data + lemma_len + 1, lemma_data[lemma_len]); |
|
0 |
0 |
if (lemma_data[lemma_len]) lemma += LemmaAddinfo::format(lemma_data + lemma_len + 1, lemma_data[lemma_len]); |
|
0 |
0 |
if (lemma_data[lemma_len]) lemma += LemmaAddinfo::format(lemma_data + lemma_len + 1, lemma_data[lemma_len]); |
|
0 |
0 |
if (lemma_data[lemma_len]) lemma += LemmaAddinfo::format(lemma_data + lemma_len + 1, lemma_data[lemma_len]); |
2054
|
17 |
6 |
for (unsigned i = unaligned_load(suff_tag_indices + (suffix_class_ptr - suff_data)); |
|
0 |
0 |
for (unsigned i = unaligned_load(suff_tag_indices + (suffix_class_ptr - suff_data)); |
|
0 |
0 |
for (unsigned i = unaligned_load(suff_tag_indices + (suffix_class_ptr - suff_data)); |
2056
|
17 |
0 |
lemmas.emplace_back(lemma, tags[unaligned_load(suff_tags + i)]); |
|
0 |
0 |
lemmas.emplace_back(lemma, tags[unaligned_load(suff_tags + i)]); |
|
0 |
0 |
lemmas.emplace_back(lemma, tags[unaligned_load(suff_tags + i)]); |
2066
|
0 |
0 |
int raw_lemma_len = addinfo.parse(lemma); |
|
0 |
0 |
int raw_lemma_len = addinfo.parse(lemma); |
2069
|
4 |
0 |
lemmas.iter(lemma.str, raw_lemma_len, [&](const char* lemma_str, pointer_decoder& data) { |
|
0 |
0 |
lemmas.iter(lemma.str, raw_lemma_len, [&](const char* lemma_str, pointer_decoder& data) { |
|
0 |
0 |
lemmas.iter(lemma.str, raw_lemma_len, [&](const char* lemma_str, pointer_decoder& data) { |
2075
|
4 |
0 |
if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) { |
|
4 |
0 |
if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) { |
|
0 |
0 |
if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) { |
|
0 |
0 |
if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) { |
|
0 |
0 |
if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) { |
|
0 |
0 |
if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) { |
|
0 |
0 |
if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) { |
|
0 |
0 |
if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) { |
|
0 |
0 |
if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) { |
2080
|
14 |
4 |
for (unsigned i = 0; i < lemma_roots_len; i++) { |
|
0 |
0 |
for (unsigned i = 0; i < lemma_roots_len; i++) { |
|
0 |
0 |
for (unsigned i = 0; i < lemma_roots_len; i++) { |
2086
|
30 |
14 |
for (auto&& suffix : classes[clas]) { |
|
0 |
0 |
for (auto&& suffix : classes[clas]) { |
|
0 |
0 |
for (auto&& suffix : classes[clas]) { |
2088
|
40 |
30 |
for (auto&& tag : suffix.second) |
|
0 |
0 |
for (auto&& tag : suffix.second) |
|
0 |
0 |
for (auto&& tag : suffix.second) |
2089
|
23 |
17 |
if (filter.matches(tags[tag].c_str())) { |
|
0 |
0 |
if (filter.matches(tags[tag].c_str())) { |
|
0 |
0 |
if (filter.matches(tags[tag].c_str())) { |
2090
|
4 |
19 |
if (!forms) { |
|
0 |
0 |
if (!forms) { |
|
0 |
0 |
if (!forms) { |
2091
|
4 |
0 |
lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len)); |
|
4 |
0 |
lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len)); |
|
0 |
0 |
lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len)); |
|
0 |
0 |
lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len)); |
|
0 |
0 |
lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len)); |
|
0 |
0 |
lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len)); |
|
0 |
0 |
lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len)); |
2095
|
18 |
5 |
if (root_with_suffix.empty() && root_len + suffix.first.size()) { |
|
0 |
18 |
if (root_with_suffix.empty() && root_len + suffix.first.size()) { |
|
18 |
5 |
if (root_with_suffix.empty() && root_len + suffix.first.size()) { |
|
0 |
0 |
if (root_with_suffix.empty() && root_len + suffix.first.size()) { |
|
0 |
0 |
if (root_with_suffix.empty() && root_len + suffix.first.size()) { |
|
0 |
0 |
if (root_with_suffix.empty() && root_len + suffix.first.size()) { |
|
0 |
0 |
if (root_with_suffix.empty() && root_len + suffix.first.size()) { |
|
0 |
0 |
if (root_with_suffix.empty() && root_len + suffix.first.size()) { |
|
0 |
0 |
if (root_with_suffix.empty() && root_len + suffix.first.size()) { |
2096
|
18 |
0 |
root_with_suffix.reserve(root_len + suffix.first.size()); |
|
0 |
0 |
root_with_suffix.reserve(root_len + suffix.first.size()); |
|
0 |
0 |
root_with_suffix.reserve(root_len + suffix.first.size()); |
2101
|
23 |
0 |
forms->emplace_back(root_with_suffix, tags[tag]); |
|
0 |
0 |
forms->emplace_back(root_with_suffix, tags[tag]); |
|
0 |
0 |
forms->emplace_back(root_with_suffix, tags[tag]); |
2144
|
0 |
0 |
for (unsigned tag_filters_len = data.next_1B(); tag_filters_len; tag_filters_len--) { |
2148
|
0 |
0 |
tag_filters.emplace_back(tag_filter.c_str()); |
2159
|
0 |
0 |
if (!form.len) return; |
2163
|
0 |
0 |
middle_masks.reserve(form.len); |
2165
|
0 |
0 |
for (unsigned initial = 0; initial < form.len; initial++) { |
2168
|
0 |
0 |
if (initial) { |
2170
|
0 |
0 |
if (!found) break; |
2175
|
0 |
0 |
if (initial_mask) { |
2176
|
0 |
0 |
middle_masks.resize(initial); |
2177
|
0 |
0 |
middle_masks.emplace_back(initial_mask); |
2178
|
0 |
0 |
for (unsigned middle = initial; middle < middle_masks.size(); middle++) { |
2179
|
0 |
0 |
if (!middle_masks[middle]) continue; |
2181
|
0 |
0 |
for (unsigned i = middle + 1; i < form.len; i++) { |
2183
|
0 |
0 |
if (!found) break; |
2184
|
0 |
0 |
if (unaligned_load(found)) { |
2185
|
0 |
0 |
if (i + 1 > middle_masks.size()) middle_masks.resize(i + 1); |
|
0 |
0 |
if (i + 1 > middle_masks.size()) middle_masks.resize(i + 1); |
2191
|
0 |
0 |
if (middle > initial && middle < form.len ) { |
|
0 |
0 |
if (middle > initial && middle < form.len ) { |
2192
|
0 |
0 |
if (initial) { |
2193
|
0 |
0 |
if (form_tmp.empty()) form_tmp.assign(form.str, form.str + form.len); |
2197
|
0 |
0 |
dictionary.analyze(string_piece((initial ? form_tmp.data() : form.str) + middle - initial, form.len - middle + initial), lemmas); |
|
0 |
0 |
dictionary.analyze(string_piece((initial ? form_tmp.data() : form.str) + middle - initial, form.len - middle + initial), lemmas); |
2199
|
0 |
0 |
for (unsigned i = lemmas_ori_size; i < lemmas.size(); i++) { |
2200
|
0 |
0 |
for (unsigned filter = 0; filter < tag_filters.size(); filter++) |
2201
|
0 |
0 |
if ((middle_masks[middle] & (1<
|
|
0 |
0 |
if ((middle_masks[middle] & (1<
|
|
0 |
0 |
if ((middle_masks[middle] & (1<
|
2202
|
0 |
0 |
if (i == lemmas_new_size) { |
2205
|
0 |
0 |
lemmas[lemmas_new_size].lemma.reserve(lemmas[i].lemma.size() + middle - initial); |
2214
|
0 |
0 |
if (lemmas_new_size < lemmas.size()) lemmas.erase(lemmas.begin() + lemmas_new_size, lemmas.end()); |
2328
|
138 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
8 |
130 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
29 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
4 |
25 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
154 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
7 |
147 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
1 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
1 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
|
0 |
0 |
return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27); |
2380
|
0 |
0 |
czech_morpho(morpho_language language, unsigned version) : language(language), version(version) {} |
|
0 |
0 |
czech_morpho(morpho_language language, unsigned version) : language(language), version(version) {} |
|
0 |
0 |
czech_morpho(morpho_language language, unsigned version) : language(language), version(version) {} |
2419
|
0 |
0 |
if (!compressor::load(is, data)) return false; |
|
0 |
0 |
if (!compressor::load(is, data)) return false; |
2423
|
0 |
0 |
unsigned tag_length = data.next_1B(); |
2424
|
0 |
0 |
if (tag_length < unknown_tag.size()) unknown_tag.erase(tag_length); |
|
0 |
0 |
if (tag_length < unknown_tag.size()) unknown_tag.erase(tag_length); |
2425
|
0 |
0 |
if (tag_length < number_tag.size()) number_tag.erase(tag_length); |
|
0 |
0 |
if (tag_length < number_tag.size()) number_tag.erase(tag_length); |
2426
|
0 |
0 |
if (tag_length < punctuation_tag.size()) punctuation_tag.erase(tag_length); |
|
0 |
0 |
if (tag_length < punctuation_tag.size()) punctuation_tag.erase(tag_length); |
2429
|
0 |
0 |
dictionary.load(data); |
2433
|
0 |
0 |
if (data.next_1B()) { |
|
0 |
0 |
if (data.next_1B()) { |
2434
|
0 |
0 |
prefix_guesser.reset(new morpho_prefix_guesser(dictionary)); |
2435
|
0 |
0 |
prefix_guesser->load(data); |
2440
|
0 |
0 |
if (data.next_1B()) { |
|
0 |
0 |
if (data.next_1B()) { |
2441
|
0 |
0 |
statistical_guesser.reset(new morpho_statistical_guesser()); |
2442
|
0 |
0 |
statistical_guesser->load(data); |
2443
|
0 |
0 |
} |
2454
|
0 |
0 |
if (form.len) { |
2458
|
0 |
0 |
generate_casing_variants(form, form_uclc, form_lc); |
2461
|
0 |
0 |
dictionary.analyze(form, lemmas); |
2462
|
0 |
0 |
if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas); |
|
0 |
0 |
if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas); |
2463
|
0 |
0 |
if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas); |
|
0 |
0 |
if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas); |
2464
|
0 |
0 |
if (!lemmas.empty()) return NO_GUESSER; |
2467
|
0 |
0 |
analyze_special(form, lemmas); |
2468
|
0 |
0 |
if (!lemmas.empty()) return NO_GUESSER; |
2471
|
0 |
0 |
if (guesser == GUESSER && prefix_guesser) |
|
0 |
0 |
if (guesser == GUESSER && prefix_guesser) |
|
0 |
0 |
if (guesser == GUESSER && prefix_guesser) |
2472
|
0 |
0 |
prefix_guesser->analyze(form_lc.empty() ? form : form_lc, lemmas); |
|
0 |
0 |
prefix_guesser->analyze(form_lc.empty() ? form : form_lc, lemmas); |
2476
|
0 |
0 |
if (guesser == GUESSER && statistical_guesser) { |
|
0 |
0 |
if (guesser == GUESSER && statistical_guesser) { |
|
0 |
0 |
if (guesser == GUESSER && statistical_guesser) { |
2477
|
0 |
0 |
if (form_uclc.empty() && form_lc.empty()) |
|
0 |
0 |
if (form_uclc.empty() && form_lc.empty()) |
|
0 |
0 |
if (form_uclc.empty() && form_lc.empty()) |
2478
|
0 |
0 |
statistical_guesser->analyze(form, lemmas, nullptr); |
2480
|
0 |
0 |
morpho_statistical_guesser::used_rules used_rules; used_rules.reserve(3); |
2481
|
0 |
0 |
statistical_guesser->analyze(form, lemmas, &used_rules); |
2482
|
0 |
0 |
if (!form_uclc.empty()) statistical_guesser->analyze(form_uclc, lemmas, &used_rules); |
|
0 |
0 |
if (!form_uclc.empty()) statistical_guesser->analyze(form_uclc, lemmas, &used_rules); |
2483
|
0 |
0 |
if (!form_lc.empty()) statistical_guesser->analyze(form_lc, lemmas, &used_rules); |
|
0 |
0 |
if (!form_lc.empty()) statistical_guesser->analyze(form_lc, lemmas, &used_rules); |
2489
|
0 |
0 |
if (prefix_guesser_guesses) { |
2492
|
0 |
0 |
return lemma_compare < 0 || (lemma_compare == 0 && a.tag < b.tag); |
2495
|
0 |
0 |
return a.lemma == b.lemma && a.tag == b.tag; |
|
0 |
0 |
return a.lemma == b.lemma && a.tag == b.tag; |
2497
|
0 |
0 |
if (lemmas_end != lemmas.end()) lemmas.erase(lemmas_end, lemmas.end()); |
2500
|
0 |
0 |
if (!lemmas.empty()) return GUESSER; |
2503
|
0 |
0 |
lemmas.emplace_back(string(form.str, form.len), unknown_tag); |
2512
|
0 |
0 |
if (lemma.len) { |
2513
|
0 |
0 |
if (dictionary.generate(lemma, filter, forms)) |
|
0 |
0 |
if (dictionary.generate(lemma, filter, forms)) |
2516
|
0 |
0 |
if (guesser == GUESSER && prefix_guesser) |
|
0 |
0 |
if (guesser == GUESSER && prefix_guesser) |
2537
|
0 |
0 |
return new czech_tokenizer(language, version, this); |
2568
|
0 |
0 |
if (!form.len) return; |
2576
|
0 |
0 |
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(form.str, form.len); |
2577
|
0 |
0 |
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(form.str, form.len); |
2578
|
0 |
0 |
if ((codepoint == '.' && form.len) || codepoint == ',') codepoint = utf8::decode(form.str, form.len); |
|
0 |
0 |
if ((codepoint == '.' && form.len) || codepoint == ',') codepoint = utf8::decode(form.str, form.len); |
|
0 |
0 |
if ((codepoint == '.' && form.len) || codepoint == ',') codepoint = utf8::decode(form.str, form.len); |
2579
|
0 |
0 |
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(form.str, form.len); |
2580
|
0 |
0 |
if (any_digit && (codepoint == 'e' || codepoint == 'E')) { |
|
0 |
0 |
if (any_digit && (codepoint == 'e' || codepoint == 'E')) { |
2582
|
0 |
0 |
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(form.str, form.len); |
2584
|
0 |
0 |
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(form.str, form.len); |
2587
|
0 |
0 |
if (any_digit && !form.len && (!codepoint || codepoint == '.')) { |
|
0 |
0 |
if (any_digit && !form.len && (!codepoint || codepoint == '.')) { |
|
0 |
0 |
if (any_digit && !form.len && (!codepoint || codepoint == '.')) { |
2588
|
0 |
0 |
lemmas.emplace_back(string(form_ori.str, form_ori.len), number_tag); |
2589
|
0 |
0 |
} else if ((first < sizeof(punctuation_additional) && punctuation_additional[first]) || |
|
0 |
0 |
} else if ((first < sizeof(punctuation_additional) && punctuation_additional[first]) || |
|
0 |
0 |
} else if ((first < sizeof(punctuation_additional) && punctuation_additional[first]) || |
|
0 |
0 |
} else if ((first < sizeof(punctuation_additional) && punctuation_additional[first]) || |
2590
|
0 |
0 |
((unicode::category(first) & unicode::P) && (first >= sizeof(punctuation_exceptions) || !punctuation_exceptions[first]))) |
|
0 |
0 |
((unicode::category(first) & unicode::P) && (first >= sizeof(punctuation_exceptions) || !punctuation_exceptions[first]))) |
2591
|
0 |
0 |
lemmas.emplace_back(string(form_ori.str, form_ori.len), punctuation_tag); |
2625
|
0 |
0 |
for (unsigned len = 1; len < lemma.len; len++) { |
2626
|
0 |
0 |
if (len + 1 == lemma.len && (lemma.str[len] == '^' || lemma.str[len] == '+')) |
|
0 |
0 |
if (len + 1 == lemma.len && (lemma.str[len] == '^' || lemma.str[len] == '+')) |
2628
|
0 |
0 |
if (len + 1 < lemma.len && lemma.str[len] == '^') { |
|
0 |
0 |
if (len + 1 < lemma.len && lemma.str[len] == '^') { |
2630
|
0 |
0 |
for (unsigned i = len + 1; ok && i < lemma.len; i++) |
|
0 |
0 |
for (unsigned i = len + 1; ok && i < lemma.len; i++) |
2631
|
0 |
0 |
ok &= (lemma.str[i] >= 'A' && lemma.str[i] <= 'Z') || |
2632
|
0 |
0 |
(lemma.str[i] >= 'a' && lemma.str[i] <= 'z') || |
|
0 |
0 |
(lemma.str[i] >= 'a' && lemma.str[i] <= 'z') || |
2633
|
0 |
0 |
(i > len + 1 && lemma.str[i] == '-'); |
2634
|
0 |
0 |
if (ok) return len; |
2657
|
0 |
0 |
for (size_t i = len; i < lemma.len; i++) |
2664
|
0 |
0 |
if (data.empty()) return true; |
2665
|
0 |
0 |
if (data.size() == 1 && data[0] == '^') return other_addinfo_len > 0 && other_addinfo[0] == '^'; |
|
0 |
0 |
if (data.size() == 1 && data[0] == '^') return other_addinfo_len > 0 && other_addinfo[0] == '^'; |
|
0 |
0 |
if (data.size() == 1 && data[0] == '^') return other_addinfo_len > 0 && other_addinfo[0] == '^'; |
|
0 |
0 |
if (data.size() == 1 && data[0] == '^') return other_addinfo_len > 0 && other_addinfo[0] == '^'; |
|
0 |
0 |
if (data.size() == 1 && data[0] == '^') return other_addinfo_len > 0 && other_addinfo[0] == '^'; |
2666
|
0 |
0 |
if (data.size() == 1 && data[0] == '+') return other_addinfo_len == 0; |
|
0 |
0 |
if (data.size() == 1 && data[0] == '+') return other_addinfo_len == 0; |
|
0 |
0 |
if (data.size() == 1 && data[0] == '+') return other_addinfo_len == 0; |
2667
|
0 |
0 |
return data.size() == size_t(other_addinfo_len) && small_memeq(data.data(), other_addinfo, other_addinfo_len); |
|
0 |
0 |
return data.size() == size_t(other_addinfo_len) && small_memeq(data.data(), other_addinfo, other_addinfo_len); |
2683
|
0 |
0 |
class english_morpho_guesser { |
|
0 |
0 |
class english_morpho_guesser { |
|
0 |
0 |
class english_morpho_guesser { |
|
0 |
0 |
class english_morpho_guesser { |
|
0 |
0 |
class english_morpho_guesser { |
|
0 |
0 |
class english_morpho_guesser { |
|
0 |
0 |
class english_morpho_guesser { |
|
0 |
0 |
class english_morpho_guesser { |
|
0 |
0 |
class english_morpho_guesser { |
|
0 |
0 |
class english_morpho_guesser { |
|
0 |
0 |
class english_morpho_guesser { |
|
0 |
0 |
class english_morpho_guesser { |
|
0 |
0 |
class english_morpho_guesser { |
|
0 |
0 |
class english_morpho_guesser { |
|
0 |
0 |
class english_morpho_guesser { |
|
0 |
0 |
class english_morpho_guesser { |
|
0 |
0 |
class english_morpho_guesser { |
|
0 |
0 |
class english_morpho_guesser { |
|
0 |
0 |
class english_morpho_guesser { |
2727
|
0 |
0 |
english_morpho(unsigned version) : version(version) {} |
|
0 |
0 |
english_morpho(unsigned version) : version(version) {} |
|
0 |
0 |
english_morpho(unsigned version) : version(version) {} |
|
0 |
0 |
english_morpho(unsigned version) : version(version) {} |
|
0 |
0 |
english_morpho(unsigned version) : version(version) {} |
|
0 |
0 |
english_morpho(unsigned version) : version(version) {} |
|
0 |
0 |
english_morpho(unsigned version) : version(version) {} |
|
0 |
0 |
english_morpho(unsigned version) : version(version) {} |
|
0 |
0 |
english_morpho(unsigned version) : version(version) {} |
|
0 |
0 |
english_morpho(unsigned version) : version(version) {} |
|
0 |
0 |
english_morpho(unsigned version) : version(version) {} |
|
0 |
0 |
english_morpho(unsigned version) : version(version) {} |
|
0 |
0 |
english_morpho(unsigned version) : version(version) {} |
|
0 |
0 |
english_morpho(unsigned version) : version(version) {} |
|
0 |
0 |
english_morpho(unsigned version) : version(version) {} |
|
0 |
0 |
english_morpho(unsigned version) : version(version) {} |
|
0 |
0 |
english_morpho(unsigned version) : version(version) {} |
|
0 |
0 |
english_morpho(unsigned version) : version(version) {} |
|
0 |
0 |
english_morpho(unsigned version) : version(version) {} |
|
0 |
0 |
english_morpho(unsigned version) : version(version) {} |
2793
|
0 |
0 |
if (!compressor::load(is, data)) return false; |
|
0 |
0 |
if (!compressor::load(is, data)) return false; |
2796
|
0 |
0 |
dictionary.load(data); |
2797
|
0 |
0 |
morpho_guesser.load(data); |
|
0 |
0 |
morpho_guesser.load(data); |
2808
|
0 |
0 |
if (form.len) { |
2812
|
0 |
0 |
generate_casing_variants(form, form_uclc, form_lc); |
2815
|
0 |
0 |
dictionary.analyze(form, lemmas); |
2816
|
0 |
0 |
if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas); |
|
0 |
0 |
if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas); |
2817
|
0 |
0 |
if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas); |
|
0 |
0 |
if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas); |
2818
|
0 |
0 |
if (!lemmas.empty()) |
2819
|
0 |
0 |
return guesser == NO_GUESSER || !morpho_guesser.analyze_proper_names(form, form_lc.empty() ? form : form_lc, lemmas) ? NO_GUESSER : GUESSER; |
|
0 |
0 |
return guesser == NO_GUESSER || !morpho_guesser.analyze_proper_names(form, form_lc.empty() ? form : form_lc, lemmas) ? NO_GUESSER : GUESSER; |
|
0 |
0 |
return guesser == NO_GUESSER || !morpho_guesser.analyze_proper_names(form, form_lc.empty() ? form : form_lc, lemmas) ? NO_GUESSER : GUESSER; |
|
0 |
0 |
return guesser == NO_GUESSER || !morpho_guesser.analyze_proper_names(form, form_lc.empty() ? form : form_lc, lemmas) ? NO_GUESSER : GUESSER; |
2822
|
0 |
0 |
analyze_special(form, lemmas); |
2823
|
0 |
0 |
if (!lemmas.empty()) return NO_GUESSER; |
2826
|
0 |
0 |
if (guesser == GUESSER) |
2827
|
0 |
0 |
morpho_guesser.analyze(form, form_lc.empty() ? form : form_lc, lemmas); |
|
0 |
0 |
morpho_guesser.analyze(form, form_lc.empty() ? form : form_lc, lemmas); |
2828
|
0 |
0 |
if (!lemmas.empty()) return GUESSER; |
2831
|
0 |
0 |
lemmas.emplace_back(string(form.str, form.len), unknown_tag); |
2840
|
0 |
0 |
if (lemma.len) { |
2841
|
0 |
0 |
if (dictionary.generate(lemma, filter, forms)) |
|
0 |
0 |
if (dictionary.generate(lemma, filter, forms)) |
2861
|
0 |
0 |
return new english_tokenizer(version <= 2 ? 1 : 2); |
2868
|
0 |
0 |
if (!form.len) return; |
2871
|
0 |
0 |
if (form.len == 1) |
2875
|
0 |
0 |
case '?': lemmas.emplace_back(string(form.str, form.len), dot_tag); return; |
2876
|
0 |
0 |
case ',': lemmas.emplace_back(string(form.str, form.len), comma_tag); return; |
2877
|
0 |
0 |
case '#': lemmas.emplace_back(string(form.str, form.len), hash_tag); return; |
2878
|
0 |
0 |
case '$': lemmas.emplace_back(string(form.str, form.len), dollar_tag); return; |
2879
|
0 |
0 |
case '[': lemmas.emplace_back(string(form.str, form.len), sym_tag); return; |
2880
|
0 |
0 |
case ']': lemmas.emplace_back(string(form.str, form.len), sym_tag); return; |
2881
|
0 |
0 |
case '%': lemmas.emplace_back(string(form.str, form.len), jj_tag); |
2882
|
0 |
0 |
lemmas.emplace_back(string(form.str, form.len), nn_tag); return; |
2883
|
0 |
0 |
case '&': lemmas.emplace_back(string(form.str, form.len), cc_tag); |
2884
|
0 |
0 |
lemmas.emplace_back(string(form.str, form.len), sym_tag); return; |
2885
|
0 |
0 |
case '*': lemmas.emplace_back(string(form.str, form.len), sym_tag); |
2886
|
0 |
0 |
lemmas.emplace_back(string(form.str, form.len), nn_tag); return; |
2887
|
0 |
0 |
case '@': lemmas.emplace_back(string(form.str, form.len), sym_tag); |
2888
|
0 |
0 |
lemmas.emplace_back(string(form.str, form.len), in_tag); return; |
2889
|
0 |
0 |
case '\'': lemmas.emplace_back(string(form.str, form.len), close_quotation_tag); |
2890
|
0 |
0 |
lemmas.emplace_back(string(form.str, form.len), pos_tag); return; |
2897
|
0 |
0 |
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len); |
2898
|
0 |
0 |
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
2899
|
0 |
0 |
while (codepoint == ',') { |
2901
|
0 |
0 |
if (unicode::category(utf8::decode(group.str, group.len) & ~unicode::N)) break; |
2902
|
0 |
0 |
if (unicode::category(utf8::decode(group.str, group.len) & ~unicode::N)) break; |
2903
|
0 |
0 |
if (unicode::category(utf8::decode(group.str, group.len) & ~unicode::N)) break; |
2908
|
0 |
0 |
if (codepoint == '.' && number.len) { |
|
0 |
0 |
if (codepoint == '.' && number.len) { |
2910
|
0 |
0 |
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
2912
|
0 |
0 |
if (version >= 2 && any_digit && codepoint == 's' && !number.len) { |
|
0 |
0 |
if (version >= 2 && any_digit && codepoint == 's' && !number.len) { |
|
0 |
0 |
if (version >= 2 && any_digit && codepoint == 's' && !number.len) { |
2913
|
0 |
0 |
lemmas.emplace_back(string(form.str, form.len), number_tag); |
2914
|
0 |
0 |
lemmas.emplace_back(string(form.str, form.len - 1), nns_tag); |
2917
|
0 |
0 |
if (any_digit && (codepoint == 'e' || codepoint == 'E')) { |
|
0 |
0 |
if (any_digit && (codepoint == 'e' || codepoint == 'E')) { |
2919
|
0 |
0 |
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len); |
2921
|
0 |
0 |
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
2923
|
0 |
0 |
if (any_digit && !number.len && (!codepoint || codepoint == '.')) { |
|
0 |
0 |
if (any_digit && !number.len && (!codepoint || codepoint == '.')) { |
|
0 |
0 |
if (any_digit && !number.len && (!codepoint || codepoint == '.')) { |
2924
|
0 |
0 |
lemmas.emplace_back(string(form.str, form.len), number_tag); |
2925
|
0 |
0 |
lemmas.emplace_back(string(form.str, form.len), nnp_tag); |
2926
|
0 |
0 |
if (form.len == 1 + (codepoint == '.') && *form.str >= '1' && *form.str <= '9') |
|
0 |
0 |
if (form.len == 1 + (codepoint == '.') && *form.str >= '1' && *form.str <= '9') |
|
0 |
0 |
if (form.len == 1 + (codepoint == '.') && *form.str >= '1' && *form.str <= '9') |
|
0 |
0 |
if (form.len == 1 + (codepoint == '.') && *form.str >= '1' && *form.str <= '9') |
2927
|
0 |
0 |
lemmas.emplace_back(string(form.str, form.len), ls_tag); |
2934
|
0 |
0 |
while ((symbol || any_punctuation) && punctuation.len) { |
|
0 |
0 |
while ((symbol || any_punctuation) && punctuation.len) { |
2936
|
0 |
0 |
if (open_quotation) open_quotation = codepoint == '`' || unicode::category(codepoint) & unicode::Pi; |
|
0 |
0 |
if (open_quotation) open_quotation = codepoint == '`' || unicode::category(codepoint) & unicode::Pi; |
|
0 |
0 |
if (open_quotation) open_quotation = codepoint == '`' || unicode::category(codepoint) & unicode::Pi; |
2937
|
0 |
0 |
if (close_quotation) close_quotation = codepoint == '\'' || codepoint == '"' || unicode::category(codepoint) & unicode::Pf; |
|
0 |
0 |
if (close_quotation) close_quotation = codepoint == '\'' || codepoint == '"' || unicode::category(codepoint) & unicode::Pf; |
|
0 |
0 |
if (close_quotation) close_quotation = codepoint == '\'' || codepoint == '"' || unicode::category(codepoint) & unicode::Pf; |
2938
|
0 |
0 |
if (open_parenthesis) open_parenthesis = unicode::category(codepoint) & unicode::Ps; |
2939
|
0 |
0 |
if (close_parenthesis) close_parenthesis = unicode::category(codepoint) & unicode::Pe; |
2940
|
0 |
0 |
if (any_punctuation) any_punctuation = unicode::category(codepoint) & unicode::P; |
2941
|
0 |
0 |
if (symbol) symbol = codepoint == '*' || unicode::category(codepoint) & unicode::S; |
|
0 |
0 |
if (symbol) symbol = codepoint == '*' || unicode::category(codepoint) & unicode::S; |
|
0 |
0 |
if (symbol) symbol = codepoint == '*' || unicode::category(codepoint) & unicode::S; |
2943
|
0 |
0 |
if (!punctuation.len && open_quotation) { lemmas.emplace_back(string(form.str, form.len), open_quotation_tag); return; } |
|
0 |
0 |
if (!punctuation.len && open_quotation) { lemmas.emplace_back(string(form.str, form.len), open_quotation_tag); return; } |
|
0 |
0 |
if (!punctuation.len && open_quotation) { lemmas.emplace_back(string(form.str, form.len), open_quotation_tag); return; } |
2944
|
0 |
0 |
if (!punctuation.len && close_quotation) { lemmas.emplace_back(string(form.str, form.len), close_quotation_tag); return; } |
|
0 |
0 |
if (!punctuation.len && close_quotation) { lemmas.emplace_back(string(form.str, form.len), close_quotation_tag); return; } |
|
0 |
0 |
if (!punctuation.len && close_quotation) { lemmas.emplace_back(string(form.str, form.len), close_quotation_tag); return; } |
2945
|
0 |
0 |
if (!punctuation.len && open_parenthesis) { lemmas.emplace_back(string(form.str, form.len), open_parenthesis_tag); return; } |
|
0 |
0 |
if (!punctuation.len && open_parenthesis) { lemmas.emplace_back(string(form.str, form.len), open_parenthesis_tag); return; } |
|
0 |
0 |
if (!punctuation.len && open_parenthesis) { lemmas.emplace_back(string(form.str, form.len), open_parenthesis_tag); return; } |
2946
|
0 |
0 |
if (!punctuation.len && close_parenthesis) { lemmas.emplace_back(string(form.str, form.len), close_parenthesis_tag); return; } |
|
0 |
0 |
if (!punctuation.len && close_parenthesis) { lemmas.emplace_back(string(form.str, form.len), close_parenthesis_tag); return; } |
|
0 |
0 |
if (!punctuation.len && close_parenthesis) { lemmas.emplace_back(string(form.str, form.len), close_parenthesis_tag); return; } |
2947
|
0 |
0 |
if (!punctuation.len && symbol) { lemmas.emplace_back(string(form.str, form.len), sym_tag); return; } |
|
0 |
0 |
if (!punctuation.len && symbol) { lemmas.emplace_back(string(form.str, form.len), sym_tag); return; } |
|
0 |
0 |
if (!punctuation.len && symbol) { lemmas.emplace_back(string(form.str, form.len), sym_tag); return; } |
2948
|
0 |
0 |
if (!punctuation.len && any_punctuation) { lemmas.emplace_back(string(form.str, form.len), punctuation_tag); return; } |
|
0 |
0 |
if (!punctuation.len && any_punctuation) { lemmas.emplace_back(string(form.str, form.len), punctuation_tag); return; } |
|
0 |
0 |
if (!punctuation.len && any_punctuation) { lemmas.emplace_back(string(form.str, form.len), punctuation_tag); return; } |
2978
|
0 |
0 |
while (tags--) { |
2980
|
0 |
0 |
exceptions_tags.emplace_back(string(data.next(len), len)); |
3116
|
0 |
0 |
for (unsigned len = data.next_1B(); len; len--) { |
3122
|
0 |
0 |
if (exception) { |
3125
|
0 |
0 |
for (unsigned len = data.next_1B(); len; len--) { |
3128
|
0 |
0 |
for (unsigned tags = data.next_1B(); tags; tags--) |
3129
|
0 |
0 |
lemmas.emplace_back(lemma, exceptions_tags[data.next_2B()]); |
3136
|
0 |
0 |
for (unsigned prefix = 1; prefix <= form_lc.len; prefix++) { |
3138
|
0 |
0 |
if (!found) break; |
3139
|
0 |
0 |
if (found[NEGATION_LEN]) { |
3140
|
0 |
0 |
if (form_lc.len - prefix >= found[TO_FOLLOW]) negation_len = found[NEGATION_LEN]; |
3146
|
0 |
0 |
add(JJ, lemma_lc, negation_len, lemmas); |
3147
|
0 |
0 |
add(RB, lemma_lc, negation_len, lemmas); |
3148
|
0 |
0 |
add(NN, lemma_lc, negation_len, lemmas); |
3149
|
0 |
0 |
add_NNS(lemma_lc, negation_len, lemmas); |
3166
|
0 |
0 |
if ( p == ( (form_lc.str + form_lc.len)) ) |
3173
|
0 |
0 |
if ( _klen > 0 ) { |
3178
|
0 |
0 |
if ( _upper < _lower ) |
3182
|
0 |
0 |
if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) < *_mid ) |
3184
|
0 |
0 |
else if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) > *_mid ) |
3196
|
0 |
0 |
if ( _klen > 0 ) { |
3201
|
0 |
0 |
if ( _upper < _lower ) |
3205
|
0 |
0 |
if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) < _mid[0] ) |
3207
|
0 |
0 |
else if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) > _mid[1] ) |
3221
|
0 |
0 |
if ( _tag_guesser_trans_actions[_trans] == 0 ) |
3226
|
0 |
0 |
while ( _nacts-- > 0 ) |
3231
|
0 |
0 |
{ if (!added_JJR_RBR) added_JJR_RBR = true, add_JJR_RBR(lemma_lc, negation_len, lemmas); } |
|
0 |
0 |
{ if (!added_JJR_RBR) added_JJR_RBR = true, add_JJR_RBR(lemma_lc, negation_len, lemmas); } |
3234
|
0 |
0 |
{ if (!added_JJS_RBS) added_JJS_RBS = true, add_JJS_RBS(lemma_lc, negation_len, lemmas); } |
|
0 |
0 |
{ if (!added_JJS_RBS) added_JJS_RBS = true, add_JJS_RBS(lemma_lc, negation_len, lemmas); } |
3237
|
0 |
0 |
{ add_VBG(lemma_lc, lemmas); } |
3240
|
0 |
0 |
{ add_VBD_VBN(lemma_lc, lemmas); } |
3243
|
0 |
0 |
{ add_VBZ(lemma_lc, lemmas); } |
3249
|
0 |
0 |
{ if (!added_SYM) added_SYM = true, add(SYM, lemma_lc, lemmas); } |
3252
|
0 |
0 |
{ if (!added_CD) added_CD = true, add(CD, lemma_lc, lemmas); } |
3258
|
0 |
0 |
if ( ++p != ( (form_lc.str + form_lc.len)) ) |
3261
|
0 |
0 |
if ( p == ( (form_lc.str + form_lc.len)) ) |
3265
|
0 |
0 |
while ( __nacts-- > 0 ) { |
3266
|
0 |
0 |
switch ( *__acts++ ) { |
3268
|
0 |
0 |
{ if (!added_CD) added_CD = true, add(CD, lemma_lc, lemmas); } |
3284
|
0 |
0 |
bool is_NNP = form.str != form_lc.str || (form.len && (*form.str == '\'' || (*form.str >= '0' && *form.str <= '9'))); |
|
0 |
0 |
bool is_NNP = form.str != form_lc.str || (form.len && (*form.str == '\'' || (*form.str >= '0' && *form.str <= '9'))); |
|
0 |
0 |
bool is_NNP = form.str != form_lc.str || (form.len && (*form.str == '\'' || (*form.str >= '0' && *form.str <= '9'))); |
|
0 |
0 |
bool is_NNP = form.str != form_lc.str || (form.len && (*form.str == '\'' || (*form.str >= '0' && *form.str <= '9'))); |
3286
|
0 |
0 |
if (!is_NNP && !is_NNPS) return false; |
3289
|
0 |
0 |
for (auto&& lemma : lemmas) { |
3293
|
0 |
0 |
if (!((is_NNP && !was_NNP) || (is_NNPS && !was_NNPS))) return false; |
|
0 |
0 |
if (!((is_NNP && !was_NNP) || (is_NNPS && !was_NNPS))) return false; |
3296
|
0 |
0 |
if (is_NNP && !was_NNP) add(NNP, lemma, lemmas); |
3297
|
0 |
0 |
if (is_NNPS && !was_NNPS) add_NNPS(lemma, lemmas); |
|
0 |
0 |
if (is_NNPS && !was_NNPS) add_NNPS(lemma, lemmas); |
3302
|
0 |
0 |
lemmas.emplace_back(form, tag); |
|
0 |
0 |
lemmas.emplace_back(form, tag); |
|
0 |
0 |
lemmas.emplace_back(form, tag); |
|
0 |
0 |
lemmas.emplace_back(form, tag); |
|
0 |
0 |
lemmas.emplace_back(form, tag); |
|
0 |
0 |
lemmas.emplace_back(form, tag); |
|
0 |
0 |
lemmas.emplace_back(form, tag); |
|
0 |
0 |
lemmas.emplace_back(form, tag); |
|
0 |
0 |
lemmas.emplace_back(form, tag); |
|
0 |
0 |
lemmas.emplace_back(form, tag); |
|
0 |
0 |
lemmas.emplace_back(form, tag); |
|
0 |
0 |
lemmas.emplace_back(form, tag); |
3311
|
0 |
0 |
lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag); |
|
0 |
0 |
lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag); |
|
0 |
0 |
lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag); |
|
0 |
0 |
lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag); |
|
0 |
0 |
lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag); |
|
0 |
0 |
lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag); |
|
0 |
0 |
lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag); |
|
0 |
0 |
lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag); |
|
0 |
0 |
lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag); |
|
0 |
0 |
lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag); |
|
0 |
0 |
lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag); |
|
0 |
0 |
lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag); |
3413
|
0 |
0 |
if ( p == ( (form.c_str() + form.size())) ) |
3422
|
0 |
0 |
if ( _klen > 0 ) { |
3427
|
0 |
0 |
if ( _upper < _lower ) |
3431
|
0 |
0 |
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < *_mid ) |
3433
|
0 |
0 |
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > *_mid ) |
3445
|
0 |
0 |
if ( _klen > 0 ) { |
3450
|
0 |
0 |
if ( _upper < _lower ) |
3454
|
0 |
0 |
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < _mid[0] ) |
3456
|
0 |
0 |
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > _mid[1] ) |
3470
|
0 |
0 |
if ( _NNS_trans_actions[_trans] == 0 ) |
3475
|
0 |
0 |
while ( _nacts-- > 0 ) |
3480
|
0 |
0 |
{ if (best > 'a') best = 'a', remove = 2, append = "an"; } |
3483
|
0 |
0 |
{ if (best > 'b') best = 'b', remove = 1, append = nullptr; } |
3486
|
0 |
0 |
{ if (best > 'c') best = 'c', remove = 3, append = "fe"; } |
3489
|
0 |
0 |
{ if (best > 'd') best = 'd', remove = 2, append = nullptr; } |
3492
|
0 |
0 |
{ if (best > 'e') best = 'e', remove = 1, append = nullptr; } |
3495
|
0 |
0 |
{ if (best > 'f') best = 'f', remove = 2, append = nullptr; } |
3498
|
0 |
0 |
{ if (best > 'g') best = 'g', remove = 1, append = nullptr; } |
3501
|
0 |
0 |
{ if (best > 'h') best = 'h', remove = 2, append = nullptr; } |
3504
|
0 |
0 |
{ if (best > 'i') best = 'i', remove = 1, append = nullptr; } |
3507
|
0 |
0 |
{ if (best > 'j') best = 'j', remove = 1, append = nullptr; } |
3510
|
0 |
0 |
{ if (best > 'k') best = 'k', remove = 2, append = nullptr; } |
3513
|
0 |
0 |
{ if (best > 'l') best = 'l', remove = 3, append = "y"; } |
3516
|
0 |
0 |
{ if (best > 'm') best = 'm', remove = 2, append = nullptr; } |
3519
|
0 |
0 |
{ if (best > 'n') best = 'n', remove = 1, append = nullptr; } |
3525
|
0 |
0 |
if ( cs == 0 ) |
3527
|
0 |
0 |
if ( ++p != ( (form.c_str() + form.size())) ) |
3533
|
0 |
0 |
add(NNS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas); |
|
0 |
0 |
add(NNS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas); |
|
0 |
0 |
add(NNS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas); |
3659
|
0 |
0 |
if ( p == ( (form.c_str() + form.size())) ) |
3668
|
0 |
0 |
if ( _klen > 0 ) { |
3673
|
0 |
0 |
if ( _upper < _lower ) |
3677
|
0 |
0 |
if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid ) |
3679
|
0 |
0 |
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid ) |
3691
|
0 |
0 |
if ( _klen > 0 ) { |
3696
|
0 |
0 |
if ( _upper < _lower ) |
3700
|
0 |
0 |
if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] ) |
3702
|
0 |
0 |
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] ) |
3716
|
0 |
0 |
if ( _NNPS_trans_actions[_trans] == 0 ) |
3721
|
0 |
0 |
while ( _nacts-- > 0 ) |
3726
|
0 |
0 |
{ if (best > 'a') best = 'a', remove = 2, append = "AN"; } |
3729
|
0 |
0 |
{ if (best > 'b') best = 'b', remove = 2, append = "an"; } |
3732
|
0 |
0 |
{ if (best > 'c') best = 'c', remove = 1, append = nullptr; } |
3735
|
0 |
0 |
{ if (best > 'd') best = 'd', remove = 3, append = "FE"; } |
3738
|
0 |
0 |
{ if (best > 'e') best = 'e', remove = 3, append = "fe"; } |
3741
|
0 |
0 |
{ if (best > 'f') best = 'f', remove = 2, append = nullptr; } |
3744
|
0 |
0 |
{ if (best > 'g') best = 'g', remove = 1, append = nullptr; } |
3747
|
0 |
0 |
{ if (best > 'h') best = 'h', remove = 2, append = nullptr; } |
3750
|
0 |
0 |
{ if (best > 'i') best = 'i', remove = 1, append = nullptr; } |
3753
|
0 |
0 |
{ if (best > 'j') best = 'j', remove = 2, append = nullptr; } |
3756
|
0 |
0 |
{ if (best > 'k') best = 'k', remove = 1, append = nullptr; } |
3759
|
0 |
0 |
{ if (best > 'l') best = 'l', remove = 1, append = nullptr; } |
3762
|
0 |
0 |
{ if (best > 'm') best = 'm', remove = 2, append = nullptr; } |
3765
|
0 |
0 |
{ if (best > 'n') best = 'n', remove = 3, append = "Y"; } |
3768
|
0 |
0 |
{ if (best > 'o') best = 'o', remove = 3, append = "y"; } |
3771
|
0 |
0 |
{ if (best > 'p') best = 'p', remove = 2, append = nullptr; } |
3774
|
0 |
0 |
{ if (best > 'q') best = 'q', remove = 1, append = nullptr; } |
3780
|
0 |
0 |
if ( cs == 0 ) |
3782
|
0 |
0 |
if ( ++p != ( (form.c_str() + form.size())) ) |
3788
|
0 |
0 |
add(NNPS, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas); |
|
0 |
0 |
add(NNPS, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas); |
4088
|
0 |
0 |
if ( p == ( (form.c_str() + form.size())) ) |
4097
|
0 |
0 |
if ( _klen > 0 ) { |
4102
|
0 |
0 |
if ( _upper < _lower ) |
4106
|
0 |
0 |
if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid ) |
4108
|
0 |
0 |
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid ) |
4120
|
0 |
0 |
if ( _klen > 0 ) { |
4125
|
0 |
0 |
if ( _upper < _lower ) |
4129
|
0 |
0 |
if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] ) |
4131
|
0 |
0 |
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] ) |
4145
|
0 |
0 |
if ( _VBG_trans_actions[_trans] == 0 ) |
4150
|
0 |
0 |
while ( _nacts-- > 0 ) |
4155
|
0 |
0 |
{ if (best > 'a') best = 'a', remove = 3, append = nullptr; } |
4158
|
0 |
0 |
{ if (best > 'b') best = 'b', remove = 3, append = "e"; } |
4161
|
0 |
0 |
{ if (best > 'c') best = 'c', remove = 3, append = nullptr; } |
4164
|
0 |
0 |
{ if (best > 'd') best = 'd', remove = 3, append = "e"; } |
4167
|
0 |
0 |
{ if (best > 'e') best = 'e', remove = 3, append = nullptr; } |
4170
|
0 |
0 |
{ if (best > 'f') best = 'f', remove = 3, append = "e"; } |
4173
|
0 |
0 |
{ if (best > 'g') best = 'g', remove = 3, append = nullptr; } |
4176
|
0 |
0 |
{ if (best > 'h') best = 'h', remove = 3, append = "e"; } |
4179
|
0 |
0 |
{ if (best > 'i') best = 'i', remove = 3, append = nullptr; } |
4182
|
0 |
0 |
{ if (best > 'j') best = 'j', remove = 3, append = "e"; } |
4185
|
0 |
0 |
{ if (best > 'k') best = 'k', remove = 3, append = nullptr; } |
4188
|
0 |
0 |
{ if (best > 'l') best = 'l', remove = 3, append = "e"; } |
4191
|
0 |
0 |
{ if (best > 'm') best = 'm', remove = 3, append = nullptr; } |
4194
|
0 |
0 |
{ if (best > 'n') best = 'n', remove = 3, append = "e"; } |
4197
|
0 |
0 |
{ if (best > 'o') best = 'o', remove = 3, append = nullptr; } |
4200
|
0 |
0 |
{ if (best > 'p') best = 'p', remove = 3, append = "e"; } |
4203
|
0 |
0 |
{ if (best > 'q') best = 'q', remove = 3, append = nullptr; } |
4206
|
0 |
0 |
{ if (best > 'r') best = 'r', remove = 3, append = "e"; } |
4212
|
0 |
0 |
if ( cs == 0 ) |
4214
|
0 |
0 |
if ( ++p != ( (form.c_str() + form.size())) ) |
4217
|
0 |
0 |
if ( p == ( (form.c_str() + form.size())) ) |
4221
|
0 |
0 |
while ( __nacts-- > 0 ) { |
4224
|
0 |
0 |
{ if (best > 'c') best = 'c', remove = 3, append = nullptr; } |
4227
|
0 |
0 |
{ if (best > 'f') best = 'f', remove = 3, append = "e"; } |
4230
|
0 |
0 |
{ if (best > 'p') best = 'p', remove = 3, append = "e"; } |
4239
|
0 |
0 |
add(VBG, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas); |
|
0 |
0 |
add(VBG, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas); |
4542
|
0 |
0 |
if ( p == ( (form.c_str() + form.size())) ) |
4551
|
0 |
0 |
if ( _klen > 0 ) { |
4556
|
0 |
0 |
if ( _upper < _lower ) |
4560
|
0 |
0 |
if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid ) |
4562
|
0 |
0 |
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid ) |
4574
|
0 |
0 |
if ( _klen > 0 ) { |
4579
|
0 |
0 |
if ( _upper < _lower ) |
4583
|
0 |
0 |
if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] ) |
4585
|
0 |
0 |
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] ) |
4599
|
0 |
0 |
if ( _VBD_VBN_trans_actions[_trans] == 0 ) |
4604
|
0 |
0 |
while ( _nacts-- > 0 ) |
4609
|
0 |
0 |
{ if (best > 'a') best = 'a', remove = 1, append = nullptr; } |
4612
|
0 |
0 |
{ if (best > 'b') best = 'b', remove = 2, append = nullptr; } |
4615
|
0 |
0 |
{ if (best > 'c') best = 'c', remove = 1, append = nullptr; } |
4618
|
0 |
0 |
{ if (best > 'd') best = 'd', remove = 2, append = nullptr; } |
4621
|
0 |
0 |
{ if (best > 'e') best = 'e', remove = 1, append = nullptr; } |
4624
|
0 |
0 |
{ if (best > 'f') best = 'f', remove = 2, append = nullptr; } |
4627
|
0 |
0 |
{ if (best > 'h') best = 'h', remove = 2, append = nullptr; } |
4630
|
0 |
0 |
{ if (best > 'i') best = 'i', remove = 3, append = "y"; } |
4633
|
0 |
0 |
{ if (best > 'j') best = 'j', remove = 1, append = nullptr; } |
4636
|
0 |
0 |
{ if (best > 'k') best = 'k', remove = 2, append = nullptr; } |
4639
|
0 |
0 |
{ if (best > 'l') best = 'l', remove = 1, append = nullptr; } |
4642
|
0 |
0 |
{ if (best > 'm') best = 'm', remove = 2, append = nullptr; } |
4645
|
0 |
0 |
{ if (best > 'n') best = 'n', remove = 1, append = nullptr; } |
4648
|
0 |
0 |
{ if (best > 'o') best = 'o', remove = 2, append = nullptr; } |
4651
|
0 |
0 |
{ if (best > 'p') best = 'p', remove = 1, append = nullptr; } |
4654
|
0 |
0 |
{ if (best > 'q') best = 'q', remove = 2, append = nullptr; } |
4657
|
0 |
0 |
{ if (best > 'r') best = 'r', remove = 1, append = nullptr; } |
4663
|
0 |
0 |
if ( cs == 0 ) |
4665
|
0 |
0 |
if ( ++p != ( (form.c_str() + form.size())) ) |
4668
|
0 |
0 |
if ( p == ( (form.c_str() + form.size())) ) |
4672
|
0 |
0 |
while ( __nacts-- > 0 ) { |
4675
|
0 |
0 |
{ if (best > 'd') best = 'd', remove = 2, append = nullptr; } |
4678
|
0 |
0 |
{ if (best > 'g') best = 'g', remove = 1, append = nullptr; } |
4681
|
0 |
0 |
{ if (best > 'j') best = 'j', remove = 1, append = nullptr; } |
4690
|
0 |
0 |
add(VBD, VBN, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas); |
|
0 |
0 |
add(VBD, VBN, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas); |
4769
|
0 |
0 |
if ( p == ( (form.c_str() + form.size())) ) |
4778
|
0 |
0 |
if ( _klen > 0 ) { |
4783
|
0 |
0 |
if ( _upper < _lower ) |
4787
|
0 |
0 |
if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid ) |
4789
|
0 |
0 |
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid ) |
4801
|
0 |
0 |
if ( _klen > 0 ) { |
4806
|
0 |
0 |
if ( _upper < _lower ) |
4810
|
0 |
0 |
if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] ) |
4812
|
0 |
0 |
else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] ) |
4826
|
0 |
0 |
if ( _VBZ_trans_actions[_trans] == 0 ) |
4831
|
0 |
0 |
while ( _nacts-- > 0 ) |
4836
|
0 |
0 |
{ if (best > 'a') best = 'a', remove = 1, append = nullptr; } |
4839
|
0 |
0 |
{ if (best > 'b') best = 'b', remove = 2, append = nullptr; } |
4842
|
0 |
0 |
{ if (best > 'c') best = 'c', remove = 1, append = nullptr; } |
4845
|
0 |
0 |
{ if (best > 'd') best = 'd', remove = 2, append = nullptr; } |
4848
|
0 |
0 |
{ if (best > 'e') best = 'e', remove = 1, append = nullptr; } |
4851
|
0 |
0 |
{ if (best > 'f') best = 'f', remove = 2, append = nullptr; } |
4854
|
0 |
0 |
{ if (best > 'g') best = 'g', remove = 3, append = "y"; } |
4857
|
0 |
0 |
{ if (best > 'h') best = 'h', remove = 2, append = nullptr; } |
4860
|
0 |
0 |
{ if (best > 'i') best = 'i', remove = 1, append = nullptr; } |
4866
|
0 |
0 |
if ( cs == 0 ) |
4868
|
0 |
0 |
if ( ++p != ( (form.c_str() + form.size())) ) |
4874
|
0 |
0 |
add(VBZ, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas); |
|
0 |
0 |
add(VBZ, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas); |
5000
|
0 |
0 |
if ( p == ( (form.c_str() + form.size())) ) |
5009
|
0 |
0 |
if ( _klen > 0 ) { |
5014
|
0 |
0 |
if ( _upper < _lower ) |
5018
|
0 |
0 |
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < *_mid ) |
5020
|
0 |
0 |
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > *_mid ) |
5032
|
0 |
0 |
if ( _klen > 0 ) { |
5037
|
0 |
0 |
if ( _upper < _lower ) |
5041
|
0 |
0 |
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < _mid[0] ) |
5043
|
0 |
0 |
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > _mid[1] ) |
5057
|
0 |
0 |
if ( _JJR_RBR_trans_actions[_trans] == 0 ) |
5062
|
0 |
0 |
while ( _nacts-- > 0 ) |
5067
|
0 |
0 |
{ if (best > 'a') best = 'a', remove = 2, append = nullptr; } |
5070
|
0 |
0 |
{ if (best > 'b') best = 'b', remove = 3, append = nullptr; } |
5073
|
0 |
0 |
{ if (best > 'c') best = 'c', remove = 3, append = "y"; } |
5076
|
0 |
0 |
{ if (best > 'd') best = 'd', remove = 2, append = nullptr; } |
5079
|
0 |
0 |
{ if (best > 'e') best = 'e', remove = 1, append = nullptr; } |
5082
|
0 |
0 |
{ if (best > 'f') best = 'f', remove = 2, append = nullptr; } |
5088
|
0 |
0 |
if ( cs == 0 ) |
5090
|
0 |
0 |
if ( ++p != ( (form.c_str() + form.size())) ) |
5096
|
0 |
0 |
add(JJR, RBR, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas); |
|
0 |
0 |
add(JJR, RBR, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas); |
|
0 |
0 |
add(JJR, RBR, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas); |
5226
|
0 |
0 |
if ( p == ( (form.c_str() + form.size())) ) |
5235
|
0 |
0 |
if ( _klen > 0 ) { |
5240
|
0 |
0 |
if ( _upper < _lower ) |
5244
|
0 |
0 |
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < *_mid ) |
5246
|
0 |
0 |
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > *_mid ) |
5258
|
0 |
0 |
if ( _klen > 0 ) { |
5263
|
0 |
0 |
if ( _upper < _lower ) |
5267
|
0 |
0 |
if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < _mid[0] ) |
5269
|
0 |
0 |
else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > _mid[1] ) |
5283
|
0 |
0 |
if ( _JJS_RBS_trans_actions[_trans] == 0 ) |
5288
|
0 |
0 |
while ( _nacts-- > 0 ) |
5293
|
0 |
0 |
{ if (best > 'a') best = 'a', remove = 3, append = nullptr; } |
5296
|
0 |
0 |
{ if (best > 'b') best = 'b', remove = 4, append = nullptr; } |
5299
|
0 |
0 |
{ if (best > 'c') best = 'c', remove = 4, append = "y"; } |
5302
|
0 |
0 |
{ if (best > 'd') best = 'd', remove = 3, append = nullptr; } |
5305
|
0 |
0 |
{ if (best > 'e') best = 'e', remove = 2, append = nullptr; } |
5308
|
0 |
0 |
{ if (best > 'f') best = 'f', remove = 3, append = nullptr; } |
5314
|
0 |
0 |
if ( cs == 0 ) |
5316
|
0 |
0 |
if ( ++p != ( (form.c_str() + form.size())) ) |
5322
|
0 |
0 |
add(JJS, RBS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas); |
|
0 |
0 |
add(JJS, RBS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas); |
|
0 |
0 |
add(JJS, RBS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas); |
5393
|
0 |
0 |
if (!compressor::load(is, data)) return false; |
|
0 |
0 |
if (!compressor::load(is, data)) return false; |
5397
|
0 |
0 |
unsigned length = data.next_1B(); |
5398
|
0 |
0 |
unknown_tag.assign(data.next(length), length); |
|
0 |
0 |
unknown_tag.assign(data.next(length), length); |
5409
|
0 |
0 |
if (form.len) { |
5412
|
0 |
0 |
while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++; |
|
0 |
0 |
while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++; |
5413
|
0 |
0 |
if (lemmatags.len) lemmatags.len--, lemmatags.str++; |
5416
|
0 |
0 |
while (lemmatags.len) { |
5418
|
0 |
0 |
while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++; |
|
0 |
0 |
while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++; |
5419
|
0 |
0 |
if (!lemmatags.len) break; |
5424
|
0 |
0 |
while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++; |
|
0 |
0 |
while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++; |
5426
|
0 |
0 |
if (lemmatags.len) lemmatags.len--, lemmatags.str++; |
5428
|
0 |
0 |
lemmas.emplace_back(string(lemma_start, lemma_len), string(tag_start, tag_len)); |
5431
|
0 |
0 |
if (!lemmas.empty()) return NO_GUESSER; |
5434
|
0 |
0 |
lemmas.emplace_back(string(form.str, form.len), unknown_tag); |
5443
|
0 |
0 |
if (lemma.len) { |
5446
|
0 |
0 |
while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++; |
|
0 |
0 |
while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++; |
5448
|
0 |
0 |
if (formtags.len) formtags.len--, formtags.str++; |
5452
|
0 |
0 |
while (formtags.len) { |
5454
|
0 |
0 |
while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++; |
|
0 |
0 |
while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++; |
5455
|
0 |
0 |
if (!formtags.len) break; |
5460
|
0 |
0 |
while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++; |
|
0 |
0 |
while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++; |
5462
|
0 |
0 |
if (formtags.len) formtags.len--, formtags.str++; |
5466
|
0 |
0 |
if (filter.matches(tag.c_str())) { |
5467
|
0 |
0 |
if (forms.empty()) forms.emplace_back(string(real_lemma.str, real_lemma.len)); |
|
0 |
0 |
if (forms.empty()) forms.emplace_back(string(real_lemma.str, real_lemma.len)); |
5468
|
0 |
0 |
forms.back().forms.emplace_back(string(form_start, form_len), tag); |
5472
|
0 |
0 |
if (any_result) return NO_GUESSER; |
5480
|
0 |
0 |
while (lemma_len < lemma.len && lemma.str[lemma_len] != ' ') lemma_len++; |
|
0 |
0 |
while (lemma_len < lemma.len && lemma.str[lemma_len] != ' ') lemma_len++; |
5486
|
0 |
0 |
while (lemma_len < lemma.len && lemma.str[lemma_len] != ' ') lemma_len++; |
|
0 |
0 |
while (lemma_len < lemma.len && lemma.str[lemma_len] != ' ') lemma_len++; |
5492
|
0 |
0 |
while (form_len < form.len && form.str[form_len] != ' ') form_len++; |
|
0 |
0 |
while (form_len < form.len && form.str[form_len] != ' ') form_len++; |
5601
|
2 |
0 |
if (!compressor::load(is, data)) return false; |
|
2 |
0 |
if (!compressor::load(is, data)) return false; |
5605
|
2 |
0 |
unsigned length = data.next_1B(); |
5606
|
2 |
0 |
unknown_tag.assign(data.next(length), length); |
5607
|
2 |
0 |
length = data.next_1B(); |
5608
|
2 |
0 |
number_tag.assign(data.next(length), length); |
5609
|
2 |
0 |
length = data.next_1B(); |
5610
|
2 |
0 |
punctuation_tag.assign(data.next(length), length); |
5611
|
2 |
0 |
length = data.next_1B(); |
5612
|
2 |
0 |
symbol_tag.assign(data.next(length), length); |
5615
|
2 |
0 |
dictionary.load(data); |
5619
|
2 |
0 |
if (data.next_1B()) { |
|
0 |
2 |
if (data.next_1B()) { |
5620
|
0 |
0 |
statistical_guesser.reset(new morpho_statistical_guesser()); |
5621
|
0 |
0 |
statistical_guesser->load(data); |
5622
|
0 |
0 |
} |
5633
|
13 |
0 |
if (form.len) { |
5637
|
13 |
0 |
generate_casing_variants(form, form_uclc, form_lc); |
5640
|
13 |
0 |
dictionary.analyze(form, lemmas); |
5641
|
0 |
13 |
if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas); |
|
0 |
0 |
if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas); |
5642
|
2 |
11 |
if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas); |
|
2 |
0 |
if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas); |
5643
|
7 |
6 |
if (!lemmas.empty()) return NO_GUESSER; |
5646
|
7 |
0 |
analyze_special(form, lemmas); |
5647
|
1 |
6 |
if (!lemmas.empty()) return NO_GUESSER; |
5650
|
0 |
1 |
if (guesser == GUESSER && statistical_guesser) { |
|
0 |
0 |
if (guesser == GUESSER && statistical_guesser) { |
|
0 |
1 |
if (guesser == GUESSER && statistical_guesser) { |
5651
|
0 |
0 |
if (form_uclc.empty() && form_lc.empty()) |
|
0 |
0 |
if (form_uclc.empty() && form_lc.empty()) |
|
0 |
0 |
if (form_uclc.empty() && form_lc.empty()) |
5652
|
0 |
0 |
statistical_guesser->analyze(form, lemmas, nullptr); |
5654
|
0 |
0 |
morpho_statistical_guesser::used_rules used_rules; used_rules.reserve(3); |
5655
|
0 |
0 |
statistical_guesser->analyze(form, lemmas, &used_rules); |
5656
|
0 |
0 |
if (!form_uclc.empty()) statistical_guesser->analyze(form_uclc, lemmas, &used_rules); |
|
0 |
0 |
if (!form_uclc.empty()) statistical_guesser->analyze(form_uclc, lemmas, &used_rules); |
5657
|
0 |
0 |
if (!form_lc.empty()) statistical_guesser->analyze(form_lc, lemmas, &used_rules); |
|
0 |
0 |
if (!form_lc.empty()) statistical_guesser->analyze(form_lc, lemmas, &used_rules); |
5660
|
1 |
0 |
if (!lemmas.empty()) return GUESSER; |
5663
|
1 |
0 |
lemmas.emplace_back(string(form.str, form.len), unknown_tag); |
5672
|
4 |
0 |
if (lemma.len) { |
5673
|
4 |
0 |
if (dictionary.generate(lemma, filter, forms)) |
|
0 |
4 |
if (dictionary.generate(lemma, filter, forms)) |
5703
|
7 |
0 |
if (!form.len) return; |
5711
|
0 |
7 |
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len); |
5712
|
1 |
7 |
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
5713
|
3 |
4 |
if ((codepoint == '.' && number.len) || codepoint == ',') codepoint = utf8::decode(number.str, number.len); |
|
2 |
1 |
if ((codepoint == '.' && number.len) || codepoint == ',') codepoint = utf8::decode(number.str, number.len); |
|
0 |
6 |
if ((codepoint == '.' && number.len) || codepoint == ',') codepoint = utf8::decode(number.str, number.len); |
5714
|
1 |
7 |
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
5715
|
1 |
6 |
if (any_digit && (codepoint == 'e' || codepoint == 'E')) { |
|
1 |
0 |
if (any_digit && (codepoint == 'e' || codepoint == 'E')) { |
5717
|
1 |
0 |
if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len); |
5719
|
2 |
1 |
while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len); |
5722
|
1 |
6 |
if (any_digit && !number.len && (!codepoint || codepoint == '.')) { |
|
1 |
0 |
if (any_digit && !number.len && (!codepoint || codepoint == '.')) { |
|
1 |
0 |
if (any_digit && !number.len && (!codepoint || codepoint == '.')) { |
5723
|
1 |
0 |
lemmas.emplace_back(string(form.str, form.len), number_tag); |
5730
|
12 |
6 |
while (form.len) { |
5732
|
6 |
6 |
punctuation = punctuation && unicode::category(codepoint) & unicode::P; |
|
2 |
4 |
punctuation = punctuation && unicode::category(codepoint) & unicode::P; |
5733
|
6 |
6 |
symbol = symbol && unicode::category(codepoint) & unicode::S; |
|
1 |
5 |
symbol = symbol && unicode::category(codepoint) & unicode::S; |
5735
|
4 |
2 |
if (punctuation) |
5736
|
4 |
0 |
lemmas.emplace_back(string(form_ori.str, form_ori.len), punctuation_tag); |
5737
|
1 |
1 |
else if (symbol) |
5738
|
1 |
0 |
lemmas.emplace_back(string(form_ori.str, form_ori.len), symbol_tag); |
5796
|
0 |
0 |
return unique_ptr(new T(std::forward(args)...)); |
|
1 |
0 |
return unique_ptr(new T(std::forward(args)...)); |
|
0 |
0 |
return unique_ptr(new T(std::forward(args)...)); |
5918
|
0 |
0 |
if (res->load(is)) return res.release(); |
|
0 |
0 |
if (res->load(is)) return res.release(); |
5927
|
0 |
0 |
3); |
|
0 |
0 |
3); |
5928
|
0 |
0 |
if (res->load(is)) return res.release(); |
|
0 |
0 |
if (res->load(is)) return res.release(); |
5934
|
0 |
0 |
if (res->load(is)) return res.release(); |
|
0 |
0 |
if (res->load(is)) return res.release(); |
5940
|
2 |
0 |
if (res->load(is)) return res.release(); |
|
2 |
0 |
if (res->load(is)) return res.release(); |
5946
|
0 |
0 |
if (res->load(is)) return res.release(); |
|
0 |
0 |
if (res->load(is)) return res.release(); |
5952
|
0 |
0 |
if (!derinet->load(is)) return nullptr; |
|
0 |
0 |
if (!derinet->load(is)) return nullptr; |
5954
|
0 |
0 |
unique_ptr dictionary(load(is)); |
5955
|
0 |
0 |
if (!dictionary) return nullptr; |
5966
|
1 |
0 |
ifstream f(path_from_utf8(fname).c_str(), ifstream::binary); |
5967
|
1 |
0 |
if (!f) return nullptr; |
5969
|
1 |
0 |
return load(f); |
5992
|
0 |
0 |
for (auto&& tag : tags) { |
5994
|
0 |
0 |
for (unsigned i = 0; i < tag.size(); i++) |
6005
|
0 |
0 |
if (!used) return false; |
6007
|
0 |
0 |
for (auto&& used_rule : *used) |
6008
|
0 |
0 |
if (used_rule == rule) |
6020
|
0 |
0 |
string rule_label; rule_label.reserve(12); |
6022
|
0 |
0 |
for (; suffix_len < form.len; suffix_len++) { |
6023
|
0 |
0 |
rule_label.push_back(form.str[form.len - (suffix_len + 1)]); |
6024
|
0 |
0 |
if (!rules.at(rule_label.c_str(), rule_label.size(), [](pointer_decoder& data){ data.next(data.next_2B()); })) |
6028
|
0 |
0 |
for (suffix_len++; suffix_len--; ) { |
6030
|
0 |
0 |
rule_label.push_back(' '); |
6034
|
0 |
0 |
for (unsigned prefix_len = 0; prefix_len + suffix_len <= form.len; prefix_len++) { |
6035
|
0 |
0 |
if (prefix_len) rule_label.push_back(form.str[prefix_len - 1]); |
|
0 |
0 |
if (prefix_len) rule_label.push_back(form.str[prefix_len - 1]); |
6037
|
0 |
0 |
if (!found) break; |
6038
|
0 |
0 |
if (*(found += sizeof(uint16_t))) { |
6044
|
0 |
0 |
if (rule) { |
6046
|
0 |
0 |
if (rule_label.size() > 1 && !contains(used, rule_label)) { // ignore rule ' ' |
|
0 |
0 |
if (rule_label.size() > 1 && !contains(used, rule_label)) { // ignore rule ' ' |
|
0 |
0 |
if (rule_label.size() > 1 && !contains(used, rule_label)) { // ignore rule ' ' |
6047
|
0 |
0 |
if (used) used->push_back(rule_label); |
|
0 |
0 |
if (used) used->push_back(rule_label); |
6048
|
0 |
0 |
for (int rules_len = *rule++; rules_len; rules_len--) { |
6055
|
0 |
0 |
if (pref_del_len + suff_del_len > form.len || |
|
0 |
0 |
if (pref_del_len + suff_del_len > form.len || |
6056
|
0 |
0 |
(pref_del_len && !small_memeq(pref_del, form.str, pref_del_len)) || |
|
0 |
0 |
(pref_del_len && !small_memeq(pref_del, form.str, pref_del_len)) || |
6057
|
0 |
0 |
(suff_del_len && !small_memeq(suff_del, form.str + form.len - suff_del_len, suff_del_len)) || |
|
0 |
0 |
(suff_del_len && !small_memeq(suff_del, form.str + form.len - suff_del_len, suff_del_len)) || |
|
0 |
0 |
(suff_del_len && !small_memeq(suff_del, form.str + form.len - suff_del_len, suff_del_len)) || |
6062
|
0 |
0 |
lemma.reserve(form.len + pref_add_len - pref_del_len + suff_add_len - suff_del_len); |
6063
|
0 |
0 |
if (pref_add_len) lemma.append(pref_add, pref_add_len); |
|
0 |
0 |
if (pref_add_len) lemma.append(pref_add, pref_add_len); |
6064
|
0 |
0 |
if (pref_del_len + suff_del_len < form.len) lemma.append(form.str + pref_del_len, form.len - pref_del_len - suff_del_len); |
|
0 |
0 |
if (pref_del_len + suff_del_len < form.len) lemma.append(form.str + pref_del_len, form.len - pref_del_len - suff_del_len); |
6065
|
0 |
0 |
if (suff_add_len) lemma.append(suff_add, suff_add_len); |
|
0 |
0 |
if (suff_add_len) lemma.append(suff_add, suff_add_len); |
6066
|
0 |
0 |
while (tags_len--) |
6067
|
0 |
0 |
lemmas.emplace_back(lemma, this->tags[unaligned_load_inc(tags)]); |
6075
|
0 |
0 |
if (lemmas.size() == lemmas_initial_size) |
6076
|
0 |
0 |
if (!contains(used, string())) { |
6077
|
0 |
0 |
if (used) used->push_back(string()); |
6078
|
0 |
0 |
lemmas.emplace_back(string(form.str, form.len), tags[default_tag]); |
6096
|
4 |
0 |
if (!filter) return; |
6098
|
4 |
0 |
wildcard.assign(filter); |
6101
|
6 |
4 |
for (int tag_pos = 0, filter_pos = 0; filter[filter_pos]; tag_pos++, filter_pos++) { |
6102
|
5 |
1 |
if (filter[filter_pos] == '?') continue; |
6103
|
3 |
2 |
if (filter[filter_pos] == '[') { |
6107
|
1 |
2 |
if (filter[filter_pos] == '^') negate = true, filter_pos++; |
6110
|
7 |
0 |
for (bool first = true; filter[filter_pos] && (first || filter[filter_pos] != ']'); first = false) |
|
4 |
3 |
for (bool first = true; filter[filter_pos] && (first || filter[filter_pos] != ']'); first = false) |
|
1 |
3 |
for (bool first = true; filter[filter_pos] && (first || filter[filter_pos] != ']'); first = false) |
6113
|
3 |
0 |
filters.emplace_back(tag_pos, negate, chars_start, filter_pos - chars_start); |
6114
|
3 |
0 |
if (!filter[filter_pos]) break; |
6116
|
2 |
0 |
filters.emplace_back(tag_pos, false, filter_pos, 1); |
6201
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
9 |
4 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
13 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
13 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
13 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
2 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
6 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
6 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
3 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
3 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
3 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
3 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
3 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
3 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
3 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
3 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
3 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
3 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
3 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
3 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
3 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
3 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
3 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
3 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
3 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
3 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
|
0 |
0 |
return it ? unaligned_load(it) : elementary_feature_unknown; |
6209
|
1 |
0 |
if (!compressor::load(is, data)) return false; |
|
1 |
0 |
if (!compressor::load(is, data)) return false; |
6212
|
1 |
0 |
maps.resize(data.next_1B()); |
|
1 |
0 |
maps.resize(data.next_1B()); |
6213
|
26 |
1 |
for (auto&& map : maps) |
6214
|
26 |
0 |
map.load(data); |
|
0 |
0 |
map.load(data); |
6252
|
426 |
0 |
if (value < 0x80) *where++ = value; |
6253
|
0 |
0 |
else if (value < 0x4000) *where++ = (value >> 7) | 0x80u, *where++ = value & 0x7Fu; |
6254
|
0 |
0 |
else if (value < 0x200000) *where++ = (value >> 14) | 0x80u, *where++ = ((value >> 7) & 0x7Fu) | 0x80u, *where++ = value & 0x7Fu; |
6255
|
0 |
0 |
else if (value < 0x10000000) *where++ = (value >> 21) | 0x80u, *where++ = ((value >> 14) & 0x7Fu) | 0x80u, *where++ = ((value >> 7) & 0x7Fu) | 0x80u, *where++ = value & 0x7Fu; |
6299
|
0 |
0 |
class feature_sequences { |
|
1 |
0 |
class feature_sequences { |
|
0 |
0 |
class feature_sequences { |
6328
|
0 |
0 |
return it ? unaligned_load(it) : 0; |
|
100 |
58 |
return it ? unaligned_load(it) : 0; |
|
0 |
0 |
return it ? unaligned_load(it) : 0; |
6337
|
0 |
0 |
if (!elementary.load(is)) return false; |
|
1 |
0 |
if (!elementary.load(is)) return false; |
|
0 |
0 |
if (!elementary.load(is)) return false; |
6340
|
0 |
0 |
if (!compressor::load(is, data)) return false; |
|
0 |
0 |
if (!compressor::load(is, data)) return false; |
|
1 |
0 |
if (!compressor::load(is, data)) return false; |
|
1 |
0 |
if (!compressor::load(is, data)) return false; |
|
0 |
0 |
if (!compressor::load(is, data)) return false; |
|
0 |
0 |
if (!compressor::load(is, data)) return false; |
6343
|
0 |
0 |
sequences.resize(data.next_1B()); |
|
0 |
0 |
sequences.resize(data.next_1B()); |
|
1 |
0 |
sequences.resize(data.next_1B()); |
|
1 |
0 |
sequences.resize(data.next_1B()); |
|
0 |
0 |
sequences.resize(data.next_1B()); |
|
0 |
0 |
sequences.resize(data.next_1B()); |
6344
|
0 |
0 |
for (auto&& sequence : sequences) { |
|
21 |
1 |
for (auto&& sequence : sequences) { |
|
0 |
0 |
for (auto&& sequence : sequences) { |
6345
|
0 |
0 |
sequence.dependant_range = data.next_4B(); |
|
21 |
0 |
sequence.dependant_range = data.next_4B(); |
|
0 |
0 |
sequence.dependant_range = data.next_4B(); |
6346
|
0 |
0 |
sequence.elements.resize(data.next_1B()); |
|
0 |
0 |
sequence.elements.resize(data.next_1B()); |
|
21 |
0 |
sequence.elements.resize(data.next_1B()); |
|
21 |
0 |
sequence.elements.resize(data.next_1B()); |
|
0 |
0 |
sequence.elements.resize(data.next_1B()); |
|
0 |
0 |
sequence.elements.resize(data.next_1B()); |
6347
|
0 |
0 |
for (auto&& element : sequence.elements) { |
|
45 |
21 |
for (auto&& element : sequence.elements) { |
|
0 |
0 |
for (auto&& element : sequence.elements) { |
6348
|
0 |
0 |
element.type = elementary_feature_type(data.next_4B()); |
|
45 |
0 |
element.type = elementary_feature_type(data.next_4B()); |
|
0 |
0 |
element.type = elementary_feature_type(data.next_4B()); |
6349
|
0 |
0 |
element.elementary_index = data.next_4B(); |
|
45 |
0 |
element.elementary_index = data.next_4B(); |
|
0 |
0 |
element.elementary_index = data.next_4B(); |
6350
|
0 |
0 |
element.sequence_index = data.next_4B(); |
|
45 |
0 |
element.sequence_index = data.next_4B(); |
|
0 |
0 |
element.sequence_index = data.next_4B(); |
6354
|
0 |
0 |
scores.resize(data.next_1B()); |
|
0 |
0 |
scores.resize(data.next_1B()); |
|
1 |
0 |
scores.resize(data.next_1B()); |
|
1 |
0 |
scores.resize(data.next_1B()); |
|
0 |
0 |
scores.resize(data.next_1B()); |
|
0 |
0 |
scores.resize(data.next_1B()); |
6355
|
0 |
0 |
for (auto&& score : scores) |
|
21 |
1 |
for (auto&& score : scores) |
|
0 |
0 |
for (auto&& score : scores) |
6356
|
0 |
0 |
score.load(data); |
|
21 |
0 |
score.load(data); |
|
0 |
0 |
score.load(data); |
|
0 |
0 |
score.load(data); |
|
0 |
0 |
score.load(data); |
|
0 |
0 |
score.load(data); |
6376
|
0 |
0 |
cache_element(int elements) : key(vli::max_length() * elements), key_size(0), score(0) {} |
|
0 |
0 |
cache_element(int elements) : key(vli::max_length() * elements), key_size(0), score(0) {} |
|
0 |
0 |
cache_element(int elements) : key(vli::max_length() * elements), key_size(0), score(0) {} |
6384
|
0 |
0 |
caches.reserve(self.sequences.size()); |
|
1 |
0 |
caches.reserve(self.sequences.size()); |
|
0 |
0 |
caches.reserve(self.sequences.size()); |
6386
|
0 |
0 |
for (auto&& sequence : self.sequences) { |
|
21 |
1 |
for (auto&& sequence : self.sequences) { |
|
0 |
0 |
for (auto&& sequence : self.sequences) { |
6387
|
0 |
0 |
caches.emplace_back(int(sequence.elements.size())); |
|
21 |
0 |
caches.emplace_back(int(sequence.elements.size())); |
|
0 |
0 |
caches.emplace_back(int(sequence.elements.size())); |
6388
|
0 |
0 |
if (int(sequence.elements.size()) > max_sequence_elements) max_sequence_elements = sequence.elements.size(); |
|
1 |
20 |
if (int(sequence.elements.size()) > max_sequence_elements) max_sequence_elements = sequence.elements.size(); |
|
0 |
0 |
if (int(sequence.elements.size()) > max_sequence_elements) max_sequence_elements = sequence.elements.size(); |
6389
|
0 |
0 |
for (auto&& element : sequence.elements) |
|
45 |
21 |
for (auto&& element : sequence.elements) |
|
0 |
0 |
for (auto&& element : sequence.elements) |
6390
|
0 |
0 |
if (element.type == PER_TAG && 1 - element.sequence_index > max_window_size) |
|
0 |
0 |
if (element.type == PER_TAG && 1 - element.sequence_index > max_window_size) |
|
26 |
19 |
if (element.type == PER_TAG && 1 - element.sequence_index > max_window_size) |
|
2 |
24 |
if (element.type == PER_TAG && 1 - element.sequence_index > max_window_size) |
|
0 |
0 |
if (element.type == PER_TAG && 1 - element.sequence_index > max_window_size) |
|
0 |
0 |
if (element.type == PER_TAG && 1 - element.sequence_index > max_window_size) |
6393
|
0 |
0 |
key.resize(max_sequence_elements * vli::max_length()); |
|
1 |
0 |
key.resize(max_sequence_elements * vli::max_length()); |
|
0 |
0 |
key.resize(max_sequence_elements * vli::max_length()); |
6394
|
0 |
0 |
window.resize(max_window_size); |
|
1 |
0 |
window.resize(max_window_size); |
|
0 |
0 |
window.resize(max_window_size); |
6405
|
0 |
0 |
if (forms.size() > c.elementary_per_form.size()) c.elementary_per_form.resize(forms.size() * 2); |
|
1 |
1 |
if (forms.size() > c.elementary_per_form.size()) c.elementary_per_form.resize(forms.size() * 2); |
|
0 |
0 |
if (forms.size() > c.elementary_per_form.size()) c.elementary_per_form.resize(forms.size() * 2); |
6406
|
0 |
0 |
if (forms.size() > c.elementary_per_tag.size()) c.elementary_per_tag.resize(forms.size() * 2); |
|
1 |
1 |
if (forms.size() > c.elementary_per_tag.size()) c.elementary_per_tag.resize(forms.size() * 2); |
|
0 |
0 |
if (forms.size() > c.elementary_per_tag.size()) c.elementary_per_tag.resize(forms.size() * 2); |
6407
|
0 |
0 |
for (unsigned i = 0; i < forms.size(); i++) |
|
6 |
2 |
for (unsigned i = 0; i < forms.size(); i++) |
|
0 |
0 |
for (unsigned i = 0; i < forms.size(); i++) |
6408
|
0 |
0 |
if (analyses[i].size() > c.elementary_per_tag[i].size()) |
|
4 |
2 |
if (analyses[i].size() > c.elementary_per_tag[i].size()) |
|
0 |
0 |
if (analyses[i].size() > c.elementary_per_tag[i].size()) |
6416
|
0 |
0 |
for (auto&& cache : c.caches) |
|
42 |
2 |
for (auto&& cache : c.caches) |
|
0 |
0 |
for (auto&& cache : c.caches) |
6422
|
0 |
0 |
elementary.compute_dynamic_features((*c.analyses)[form_index][tag_index], c.elementary_per_form[form_index], c.elementary_per_tag[form_index][tag_index], form_index > 0 ? prev_dynamic : nullptr, dynamic); |
|
24 |
5 |
elementary.compute_dynamic_features((*c.analyses)[form_index][tag_index], c.elementary_per_form[form_index], c.elementary_per_tag[form_index][tag_index], form_index > 0 ? prev_dynamic : nullptr, dynamic); |
|
0 |
0 |
elementary.compute_dynamic_features((*c.analyses)[form_index][tag_index], c.elementary_per_form[form_index], c.elementary_per_tag[form_index][tag_index], form_index > 0 ? prev_dynamic : nullptr, dynamic); |
6428
|
0 |
0 |
for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++) |
|
0 |
0 |
for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++) |
|
0 |
0 |
for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++) |
|
80 |
12 |
for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++) |
|
16 |
64 |
for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++) |
|
64 |
28 |
for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++) |
|
0 |
0 |
for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++) |
|
0 |
0 |
for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++) |
|
0 |
0 |
for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++) |
6433
|
0 |
0 |
for (unsigned i = 0; i < sequences.size(); i++) { |
|
314 |
12 |
for (unsigned i = 0; i < sequences.size(); i++) { |
|
0 |
0 |
for (unsigned i = 0; i < sequences.size(); i++) { |
6434
|
0 |
0 |
if (tags_unchanged >= sequences[i].dependant_range) |
|
298 |
16 |
if (tags_unchanged >= sequences[i].dependant_range) |
|
0 |
0 |
if (tags_unchanged >= sequences[i].dependant_range) |
6438
|
0 |
0 |
for (unsigned j = 0; j < sequences[i].elements.size(); j++) { |
|
550 |
174 |
for (unsigned j = 0; j < sequences[i].elements.size(); j++) { |
|
0 |
0 |
for (unsigned j = 0; j < sequences[i].elements.size(); j++) { |
6444
|
0 |
0 |
value = form_index + element.sequence_index < 0 || unsigned(form_index + element.sequence_index) >= c.forms->size() ? elementary_feature_empty : c.elementary_per_form[form_index + element.sequence_index].values[element.elementary_index]; |
|
0 |
0 |
value = form_index + element.sequence_index < 0 || unsigned(form_index + element.sequence_index) >= c.forms->size() ? elementary_feature_empty : c.elementary_per_form[form_index + element.sequence_index].values[element.elementary_index]; |
|
136 |
16 |
value = form_index + element.sequence_index < 0 || unsigned(form_index + element.sequence_index) >= c.forms->size() ? elementary_feature_empty : c.elementary_per_form[form_index + element.sequence_index].values[element.elementary_index]; |
|
126 |
10 |
value = form_index + element.sequence_index < 0 || unsigned(form_index + element.sequence_index) >= c.forms->size() ? elementary_feature_empty : c.elementary_per_form[form_index + element.sequence_index].values[element.elementary_index]; |
|
0 |
0 |
value = form_index + element.sequence_index < 0 || unsigned(form_index + element.sequence_index) >= c.forms->size() ? elementary_feature_empty : c.elementary_per_form[form_index + element.sequence_index].values[element.elementary_index]; |
|
0 |
0 |
value = form_index + element.sequence_index < 0 || unsigned(form_index + element.sequence_index) >= c.forms->size() ? elementary_feature_empty : c.elementary_per_form[form_index + element.sequence_index].values[element.elementary_index]; |
6447
|
0 |
0 |
value = form_index + element.sequence_index < 0 ? elementary_feature_empty : c.window[-element.sequence_index]->values[element.elementary_index]; |
|
380 |
18 |
value = form_index + element.sequence_index < 0 ? elementary_feature_empty : c.window[-element.sequence_index]->values[element.elementary_index]; |
|
0 |
0 |
value = form_index + element.sequence_index < 0 ? elementary_feature_empty : c.window[-element.sequence_index]->values[element.elementary_index]; |
6454
|
0 |
0 |
if (value == elementary_feature_unknown) { |
|
124 |
426 |
if (value == elementary_feature_unknown) { |
|
0 |
0 |
if (value == elementary_feature_unknown) { |
6463
|
0 |
0 |
if (!key_size) { |
|
124 |
174 |
if (!key_size) { |
|
0 |
0 |
if (!key_size) { |
6466
|
0 |
0 |
} else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) { |
|
0 |
0 |
} else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) { |
|
0 |
0 |
} else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) { |
|
96 |
78 |
} else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) { |
|
80 |
16 |
} else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) { |
|
158 |
16 |
} else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) { |
|
0 |
0 |
} else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) { |
|
0 |
0 |
} else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) { |
|
0 |
0 |
} else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) { |
6523
|
0 |
0 |
cache(const viterbi& self) : features_cache(self.features) {} |
|
1 |
0 |
cache(const viterbi& self) : features_cache(self.features) {} |
|
0 |
0 |
cache(const viterbi& self) : features_cache(self.features) {} |
6536
|
0 |
0 |
if (!forms.size()) return; |
|
2 |
0 |
if (!forms.size()) return; |
|
0 |
0 |
if (!forms.size()) return; |
6540
|
0 |
0 |
for (unsigned i = 0, states = 1; i < forms.size(); i++) { |
|
6 |
2 |
for (unsigned i = 0, states = 1; i < forms.size(); i++) { |
|
0 |
0 |
for (unsigned i = 0, states = 1; i < forms.size(); i++) { |
6541
|
0 |
0 |
if (analyses[i].empty()) return; |
|
6 |
0 |
if (analyses[i].empty()) return; |
|
0 |
0 |
if (analyses[i].empty()) return; |
6542
|
0 |
0 |
states = (i+1 >= unsigned(decoding_order) ? states / analyses[i-decoding_order+1].size() : states) * analyses[i].size(); |
|
2 |
4 |
states = (i+1 >= unsigned(decoding_order) ? states / analyses[i-decoding_order+1].size() : states) * analyses[i].size(); |
|
0 |
0 |
states = (i+1 >= unsigned(decoding_order) ? states / analyses[i-decoding_order+1].size() : states) * analyses[i].size(); |
6545
|
0 |
0 |
if (nodes > c.nodes.size()) c.nodes.resize(nodes); |
|
2 |
0 |
if (nodes > c.nodes.size()) c.nodes.resize(nodes); |
|
0 |
0 |
if (nodes > c.nodes.size()) c.nodes.resize(nodes); |
6551
|
0 |
0 |
int* window = window_size <= 16 ? window_stack : (window_heap.resize(window_size), window_heap.data()); |
|
0 |
0 |
int* window = window_size <= 16 ? window_stack : (window_heap.resize(window_size), window_heap.data()); |
|
0 |
2 |
int* window = window_size <= 16 ? window_stack : (window_heap.resize(window_size), window_heap.data()); |
|
0 |
0 |
int* window = window_size <= 16 ? window_stack : (window_heap.resize(window_size), window_heap.data()); |
|
0 |
0 |
int* window = window_size <= 16 ? window_stack : (window_heap.resize(window_size), window_heap.data()); |
|
0 |
0 |
int* window = window_size <= 16 ? window_stack : (window_heap.resize(window_size), window_heap.data()); |
6557
|
0 |
0 |
for (unsigned i = 0; i < forms.size(); i++) { |
|
6 |
2 |
for (unsigned i = 0; i < forms.size(); i++) { |
|
0 |
0 |
for (unsigned i = 0; i < forms.size(); i++) { |
6560
|
0 |
0 |
for (int j = 0; j < window_size; j++) window[j] = -1; |
|
6 |
18 |
for (int j = 0; j < window_size; j++) window[j] = -1; |
|
0 |
0 |
for (int j = 0; j < window_size; j++) window[j] = -1; |
6561
|
0 |
0 |
for (int tag = 0; tag < int(analyses[i].size()); tag++) |
|
13 |
6 |
for (int tag = 0; tag < int(analyses[i].size()); tag++) |
|
0 |
0 |
for (int tag = 0; tag < int(analyses[i].size()); tag++) |
6562
|
0 |
0 |
for (int prev = nodes_prev; prev < nodes_now; prev++) { |
|
29 |
13 |
for (int prev = nodes_prev; prev < nodes_now; prev++) { |
|
0 |
0 |
for (int prev = nodes_prev; prev < nodes_now; prev++) { |
6566
|
0 |
0 |
for (int p = prev, n = 1; p >= 0 && n < window_size; p = c.nodes[p].prev, n++) { |
|
0 |
0 |
for (int p = prev, n = 1; p >= 0 && n < window_size; p = c.nodes[p].prev, n++) { |
|
36 |
29 |
for (int p = prev, n = 1; p >= 0 && n < window_size; p = c.nodes[p].prev, n++) { |
|
36 |
0 |
for (int p = prev, n = 1; p >= 0 && n < window_size; p = c.nodes[p].prev, n++) { |
|
0 |
0 |
for (int p = prev, n = 1; p >= 0 && n < window_size; p = c.nodes[p].prev, n++) { |
|
0 |
0 |
for (int p = prev, n = 1; p >= 0 && n < window_size; p = c.nodes[p].prev, n++) { |
6567
|
0 |
0 |
same_tags += same_tags == n && window[n] == c.nodes[p].tag; |
|
0 |
0 |
same_tags += same_tags == n && window[n] == c.nodes[p].tag; |
|
22 |
14 |
same_tags += same_tags == n && window[n] == c.nodes[p].tag; |
|
16 |
6 |
same_tags += same_tags == n && window[n] == c.nodes[p].tag; |
|
0 |
0 |
same_tags += same_tags == n && window[n] == c.nodes[p].tag; |
|
0 |
0 |
same_tags += same_tags == n && window[n] == c.nodes[p].tag; |
6572
|
0 |
0 |
features.compute_dynamic_features(i, tag, prev >= 0 ? &c.nodes[prev].dynamic : nullptr, dynamic, c.features_cache); |
|
24 |
5 |
features.compute_dynamic_features(i, tag, prev >= 0 ? &c.nodes[prev].dynamic : nullptr, dynamic, c.features_cache); |
|
0 |
0 |
features.compute_dynamic_features(i, tag, prev >= 0 ? &c.nodes[prev].dynamic : nullptr, dynamic, c.features_cache); |
6573
|
0 |
0 |
score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) + |
|
0 |
0 |
score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) + |
|
0 |
0 |
score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) + |
|
9 |
20 |
score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) + |
|
8 |
1 |
score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) + |
|
24 |
5 |
score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) + |
|
0 |
0 |
score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) + |
|
0 |
0 |
score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) + |
|
0 |
0 |
score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) + |
6577
|
0 |
0 |
if (same_tags >= decoding_order-1) { |
|
6 |
23 |
if (same_tags >= decoding_order-1) { |
|
0 |
0 |
if (same_tags >= decoding_order-1) { |
6578
|
0 |
0 |
if (score <= c.nodes[nodes_next-1].score) continue; |
|
0 |
6 |
if (score <= c.nodes[nodes_next-1].score) continue; |
|
0 |
0 |
if (score <= c.nodes[nodes_next-1].score) continue; |
6593
|
0 |
0 |
for (int node = nodes_prev + 1; node < nodes_now; node++) |
|
4 |
2 |
for (int node = nodes_prev + 1; node < nodes_now; node++) |
|
0 |
0 |
for (int node = nodes_prev + 1; node < nodes_now; node++) |
6594
|
0 |
0 |
if (c.nodes[node].score > c.nodes[best].score) |
|
1 |
3 |
if (c.nodes[node].score > c.nodes[best].score) |
|
0 |
0 |
if (c.nodes[node].score > c.nodes[best].score) |
6597
|
0 |
0 |
for (int i = forms.size() - 1; i >= 0; i--, best = c.nodes[best].prev) |
|
6 |
2 |
for (int i = forms.size() - 1; i >= 0; i--, best = c.nodes[best].prev) |
|
0 |
0 |
for (int i = forms.size() - 1; i >= 0; i--, best = c.nodes[best].prev) |
6642
|
0 |
0 |
maps.resize(MAP_TOTAL); |
6691
|
0 |
0 |
for (unsigned i = forms.size(); i--;) { |
6695
|
0 |
0 |
for (unsigned j = 0; j < analyses[i].size(); j++) { |
6706
|
0 |
0 |
per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == lemma ? per_tag[i][j-1].values[LEMMA] : |
|
0 |
0 |
per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == lemma ? per_tag[i][j-1].values[LEMMA] : |
6711
|
0 |
0 |
if (index == string::npos) index = tag.size(); |
6712
|
0 |
0 |
per_tag[i][j].values[TAG_UPOS] = maps[MAP_TAG_UPOS].value(tag.c_str() + (index ? 1 : 0), index - (index ? 1 : 0)); |
|
0 |
0 |
per_tag[i][j].values[TAG_UPOS] = maps[MAP_TAG_UPOS].value(tag.c_str() + (index ? 1 : 0), index - (index ? 1 : 0)); |
6714
|
0 |
0 |
if (index < tag.size()) index++; |
6715
|
0 |
0 |
if (index < tag.size()) index = tag.find(separator, index); |
6716
|
0 |
0 |
if (index < tag.size()) index++; |
6717
|
0 |
0 |
for (size_t length; index < tag.size(); index += length + 1) { |
6719
|
0 |
0 |
length = (length == string::npos ? tag.size() : length) - index; |
6721
|
0 |
0 |
for (size_t equal_sign = 0; equal_sign + 1 < length; equal_sign++) |
6722
|
0 |
0 |
if (tag[index + equal_sign] == '=') { |
6726
|
0 |
0 |
if (tag.compare(index, equal_sign, "Case") == 0) value = TAG_CASE, map = MAP_TAG_CASE; |
6729
|
0 |
0 |
if (tag.compare(index, equal_sign, "Gender") == 0) value = TAG_GENDER, map = MAP_TAG_GENDER; |
6730
|
0 |
0 |
if (tag.compare(index, equal_sign, "Number") == 0) value = TAG_NUMBER, map = MAP_TAG_NUMBER; |
6731
|
0 |
0 |
if (tag.compare(index, equal_sign, "Person") == 0) value = TAG_PERSON, map = MAP_TAG_PERSON; |
6734
|
0 |
0 |
if (tag.compare(index, equal_sign, "Negative") == 0) value = TAG_NEGATIVE, map = MAP_TAG_NEGATIVE; |
6738
|
0 |
0 |
if (value >= 0) |
6744
|
0 |
0 |
if (tag.size() >= 2 && tag[1] == 'V') { |
|
0 |
0 |
if (tag.size() >= 2 && tag[1] == 'V') { |
|
0 |
0 |
if (tag.size() >= 2 && tag[1] == 'V') { |
6746
|
0 |
0 |
verb_candidate = verb_candidate < 0 || (tag_compare = tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate; |
|
0 |
0 |
verb_candidate = verb_candidate < 0 || (tag_compare = tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate; |
6756
|
0 |
0 |
if (verb_candidate >= 0) { |
6762
|
0 |
0 |
if (analyses[i].size() == 1) { |
6770
|
0 |
0 |
} else if (forms[i].len <= 0) { |
6785
|
0 |
0 |
while (form.len) { |
6789
|
0 |
0 |
num = num || cat & unicode::N; |
|
0 |
0 |
num = num || cat & unicode::N; |
6790
|
0 |
0 |
cap = cap || cat & unicode::Lut; |
|
0 |
0 |
cap = cap || cat & unicode::Lut; |
6791
|
0 |
0 |
dash = dash || cat & unicode::Pd; |
|
0 |
0 |
dash = dash || cat & unicode::Pd; |
6793
|
0 |
0 |
if (index == 10 || (!form.len && index < 10)) { |
|
0 |
0 |
if (index == 10 || (!form.len && index < 10)) { |
|
0 |
0 |
if (index == 10 || (!form.len && index < 10)) { |
6823
|
0 |
0 |
if (prev_dynamic) { |
6831
|
0 |
0 |
if (tag.tag.size() >= 2 && tag.tag[1] == 'V') { |
|
0 |
0 |
if (tag.tag.size() >= 2 && tag.tag[1] == 'V') { |
|
0 |
0 |
if (tag.tag.size() >= 2 && tag.tag[1] == 'V') { |
6881
|
0 |
0 |
maps.resize(MAP_TOTAL); |
6917
|
0 |
0 |
for (unsigned i = forms.size(); i--;) { |
6921
|
0 |
0 |
for (unsigned j = 0; j < analyses[i].size(); j++) { |
6924
|
0 |
0 |
per_tag[i][j].values[TAG3] = analyses[i][j].tag.size() >= 3 ? maps[MAP_TAG3].value(analyses[i][j].tag.c_str() + 2, 1) : elementary_feature_empty; |
6925
|
0 |
0 |
per_tag[i][j].values[TAG5] = analyses[i][j].tag.size() >= 5 ? maps[MAP_TAG5].value(analyses[i][j].tag.c_str() + 4, 1) : elementary_feature_empty; |
6926
|
0 |
0 |
per_tag[i][j].values[TAG25] = analyses[i][j].tag.size() >= 5 ? maps[MAP_TAG25].value((tag25[0] = analyses[i][j].tag[1], tag25[1] = analyses[i][j].tag[4], tag25), 2) : elementary_feature_empty; |
6927
|
0 |
0 |
per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == analyses[i][j].lemma ? per_tag[i][j-1].values[LEMMA] : |
|
0 |
0 |
per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == analyses[i][j].lemma ? per_tag[i][j-1].values[LEMMA] : |
6930
|
0 |
0 |
if (analyses[i][j].tag[0] == 'V') { |
6932
|
0 |
0 |
verb_candidate = verb_candidate < 0 || (tag_compare = analyses[i][j].tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && analyses[i][j].lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate; |
|
0 |
0 |
verb_candidate = verb_candidate < 0 || (tag_compare = analyses[i][j].tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && analyses[i][j].lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate; |
6942
|
0 |
0 |
if (verb_candidate >= 0) { |
6948
|
0 |
0 |
if (analyses[i].size() == 1) { |
6952
|
0 |
0 |
} else if (forms[i].len <= 0) { |
6963
|
0 |
0 |
while (form.len) { |
6967
|
0 |
0 |
num = num || cat & unicode::N; |
|
0 |
0 |
num = num || cat & unicode::N; |
6968
|
0 |
0 |
cap = cap || cat & unicode::Lut; |
|
0 |
0 |
cap = cap || cat & unicode::Lut; |
6969
|
0 |
0 |
dash = dash || cat & unicode::Pd; |
|
0 |
0 |
dash = dash || cat & unicode::Pd; |
6971
|
0 |
0 |
if (index == 5 || (!form.len && index < 5)) { |
|
0 |
0 |
if (index == 5 || (!form.len && index < 5)) { |
|
0 |
0 |
if (index == 5 || (!form.len && index < 5)) { |
6991
|
0 |
0 |
if (prev_dynamic) { |
6999
|
0 |
0 |
if (tag.tag[0] == 'V') { |
7049
|
1 |
0 |
maps.resize(MAP_TOTAL); |
7097
|
6 |
2 |
for (unsigned i = forms.size(); i--;) { |
7101
|
13 |
6 |
for (unsigned j = 0; j < analyses[i].size(); j++) { |
7103
|
13 |
0 |
per_tag[i][j].values[TAG1] = analyses[i][j].tag.size() >= 1 ? maps[MAP_TAG1].value(analyses[i][j].tag.c_str() + 0, 1) : elementary_feature_empty; |
7104
|
13 |
0 |
per_tag[i][j].values[TAG2] = analyses[i][j].tag.size() >= 2 ? maps[MAP_TAG2].value(analyses[i][j].tag.c_str() + 1, 1) : elementary_feature_empty; |
7105
|
13 |
0 |
per_tag[i][j].values[TAG3] = analyses[i][j].tag.size() >= 3 ? maps[MAP_TAG3].value(analyses[i][j].tag.c_str() + 2, 1) : elementary_feature_empty; |
7106
|
2 |
11 |
per_tag[i][j].values[TAG4] = analyses[i][j].tag.size() >= 4 ? maps[MAP_TAG4].value(analyses[i][j].tag.c_str() + 3, 1) : elementary_feature_empty; |
7107
|
0 |
13 |
per_tag[i][j].values[TAG5] = analyses[i][j].tag.size() >= 5 ? maps[MAP_TAG5].value(analyses[i][j].tag.c_str() + 4, 1) : elementary_feature_empty; |
7108
|
7 |
6 |
per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == analyses[i][j].lemma ? per_tag[i][j-1].values[LEMMA] : |
|
7 |
0 |
per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == analyses[i][j].lemma ? per_tag[i][j-1].values[LEMMA] : |
7111
|
3 |
10 |
if (analyses[i][j].tag[0] == 'V') { |
7113
|
1 |
2 |
verb_candidate = verb_candidate < 0 || (tag_compare = analyses[i][j].tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && analyses[i][j].lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate; |
|
1 |
0 |
verb_candidate = verb_candidate < 0 || (tag_compare = analyses[i][j].tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && analyses[i][j].lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate; |
7123
|
2 |
4 |
if (verb_candidate >= 0) { |
7129
|
3 |
3 |
if (analyses[i].size() == 1) { |
7137
|
0 |
3 |
} else if (forms[i].len <= 0) { |
7152
|
14 |
3 |
while (form.len) { |
7156
|
14 |
0 |
num = num || cat & unicode::N; |
|
14 |
0 |
num = num || cat & unicode::N; |
7157
|
10 |
4 |
cap = cap || cat & unicode::Lut; |
|
9 |
1 |
cap = cap || cat & unicode::Lut; |
7158
|
14 |
0 |
dash = dash || cat & unicode::Pd; |
|
14 |
0 |
dash = dash || cat & unicode::Pd; |
7160
|
14 |
0 |
if (index == 10 || (!form.len && index < 10)) { |
|
11 |
3 |
if (index == 10 || (!form.len && index < 10)) { |
|
0 |
3 |
if (index == 10 || (!form.len && index < 10)) { |
7190
|
24 |
5 |
if (prev_dynamic) { |
7198
|
9 |
20 |
if (tag.tag[0] == 'V') { |
7243
|
0 |
0 |
while (lock.test_and_set(memory_order_acquire)) {} |
|
0 |
2 |
while (lock.test_and_set(memory_order_acquire)) {} |
|
0 |
0 |
while (lock.test_and_set(memory_order_acquire)) {} |
7252
|
0 |
0 |
while (lock.test_and_set(memory_order_acquire)) {} |
|
0 |
2 |
while (lock.test_and_set(memory_order_acquire)) {} |
|
0 |
0 |
while (lock.test_and_set(memory_order_acquire)) {} |
7253
|
0 |
0 |
if (!stack.empty()) { |
|
1 |
1 |
if (!stack.empty()) { |
|
0 |
0 |
if (!stack.empty()) { |
7302
|
0 |
0 |
cache(const perceptron_tagger& self) : decoder_cache(self.decoder) {} |
|
1 |
0 |
cache(const perceptron_tagger& self) : decoder_cache(self.decoder) {} |
|
0 |
0 |
cache(const perceptron_tagger& self) : decoder_cache(self.decoder) {} |
7316
|
0 |
0 |
if (dict.reset(morpho::load(is)), !dict) return false; |
|
1 |
0 |
if (dict.reset(morpho::load(is)), !dict) return false; |
|
0 |
0 |
if (dict.reset(morpho::load(is)), !dict) return false; |
7318
|
0 |
0 |
if (!features.load(is)) return false; |
|
1 |
0 |
if (!features.load(is)) return false; |
|
0 |
0 |
if (!features.load(is)) return false; |
7330
|
0 |
0 |
if (!dict) return; |
|
2 |
0 |
if (!dict) return; |
|
0 |
0 |
if (!dict) return; |
7333
|
0 |
0 |
if (!c) c = new cache(*this); |
|
0 |
0 |
if (!c) c = new cache(*this); |
|
1 |
1 |
if (!c) c = new cache(*this); |
|
1 |
0 |
if (!c) c = new cache(*this); |
|
0 |
0 |
if (!c) c = new cache(*this); |
|
0 |
0 |
if (!c) c = new cache(*this); |
7336
|
0 |
0 |
if (c->analyses.size() < forms.size()) c->analyses.resize(forms.size()); |
|
1 |
1 |
if (c->analyses.size() < forms.size()) c->analyses.resize(forms.size()); |
|
0 |
0 |
if (c->analyses.size() < forms.size()) c->analyses.resize(forms.size()); |
7337
|
0 |
0 |
for (unsigned i = 0; i < forms.size(); i++) { |
|
6 |
2 |
for (unsigned i = 0; i < forms.size(); i++) { |
|
0 |
0 |
for (unsigned i = 0; i < forms.size(); i++) { |
7340
|
0 |
0 |
dict->analyze(forms[i], guesser >= 0 ? guesser : use_guesser ? morpho::GUESSER : morpho::NO_GUESSER, c->analyses[i]); |
|
0 |
0 |
dict->analyze(forms[i], guesser >= 0 ? guesser : use_guesser ? morpho::GUESSER : morpho::NO_GUESSER, c->analyses[i]); |
|
6 |
0 |
dict->analyze(forms[i], guesser >= 0 ? guesser : use_guesser ? morpho::GUESSER : morpho::NO_GUESSER, c->analyses[i]); |
|
6 |
0 |
dict->analyze(forms[i], guesser >= 0 ? guesser : use_guesser ? morpho::GUESSER : morpho::NO_GUESSER, c->analyses[i]); |
|
0 |
0 |
dict->analyze(forms[i], guesser >= 0 ? guesser : use_guesser ? morpho::GUESSER : morpho::NO_GUESSER, c->analyses[i]); |
|
0 |
0 |
dict->analyze(forms[i], guesser >= 0 ? guesser : use_guesser ? morpho::GUESSER : morpho::NO_GUESSER, c->analyses[i]); |
7343
|
0 |
0 |
if (c->tags.size() < forms.size()) c->tags.resize(forms.size() * 2); |
|
1 |
1 |
if (c->tags.size() < forms.size()) c->tags.resize(forms.size() * 2); |
|
0 |
0 |
if (c->tags.size() < forms.size()) c->tags.resize(forms.size() * 2); |
7346
|
0 |
0 |
for (unsigned i = 0; i < forms.size(); i++) |
|
6 |
2 |
for (unsigned i = 0; i < forms.size(); i++) |
|
0 |
0 |
for (unsigned i = 0; i < forms.size(); i++) |
7357
|
0 |
0 |
if (!c) c = new cache(*this); |
|
0 |
0 |
if (!c) c = new cache(*this); |
|
0 |
0 |
if (!c) c = new cache(*this); |
|
0 |
0 |
if (!c) c = new cache(*this); |
|
0 |
0 |
if (!c) c = new cache(*this); |
|
0 |
0 |
if (!c) c = new cache(*this); |
7444
|
1 |
0 |
tagger_id id = tagger_id(is.get()); |
|
0 |
1 |
tagger_id id = tagger_id(is.get()); |
|
0 |
0 |
tagger_id id = tagger_id(is.get()); |
|
0 |
0 |
tagger_id id = tagger_id(is.get()); |
|
0 |
0 |
tagger_id id = tagger_id(is.get()); |
|
0 |
0 |
tagger_id id = tagger_id(is.get()); |
|
0 |
0 |
tagger_id id = tagger_id(is.get()); |
7450
|
0 |
0 |
auto res = new_unique_ptr>>(tagger_ids::decoding_order(id), tagger_ids::window_size(id)); |
7451
|
0 |
0 |
if (res->load(is)) return res.release(); |
|
0 |
0 |
if (res->load(is)) return res.release(); |
7459
|
1 |
0 |
auto res = new_unique_ptr>>(tagger_ids::decoding_order(id), tagger_ids::window_size(id)); |
7460
|
1 |
0 |
if (res->load(is)) return res.release(); |
|
1 |
0 |
if (res->load(is)) return res.release(); |
7467
|
0 |
0 |
auto res = new_unique_ptr>>(tagger_ids::decoding_order(id), tagger_ids::window_size(id)); |
7468
|
0 |
0 |
if (res->load(is)) return res.release(); |
|
0 |
0 |
if (res->load(is)) return res.release(); |
7477
|
1 |
0 |
ifstream f(path_from_utf8(fname).c_str(), ifstream::binary); |
7478
|
1 |
0 |
if (!f) return nullptr; |
7480
|
1 |
0 |
return load(f); |
7485
|
2 |
0 |
return morpho ? morpho->new_tokenizer() : nullptr; |
7574
|
0 |
0 |
for (int i = 0; i < 15 && pdt_tag[i]; i++) |
|
0 |
0 |
for (int i = 0; i < 15 && pdt_tag[i]; i++) |
7575
|
0 |
0 |
if (pdt_tag[i] != '-') { |
7576
|
0 |
0 |
if (!tag.empty()) tag.push_back('|'); |
7583
|
0 |
0 |
for (unsigned i = 0; i + 2 < lemma.size(); i++) |
7584
|
0 |
0 |
if (lemma[i] == '_' && lemma[i + 1] == ';') { |
|
0 |
0 |
if (lemma[i] == '_' && lemma[i + 1] == ';') { |
|
0 |
0 |
if (lemma[i] == '_' && lemma[i + 1] == ';') { |
7585
|
0 |
0 |
if (!tag.empty()) tag.push_back('|'); |
7594
|
0 |
0 |
return raw_lemma < lemma.size() ? (lemma.resize(raw_lemma), true) : false; |
7605
|
0 |
0 |
for (auto&& tagged_lemma : tagged_lemmas) { |
7611
|
0 |
0 |
if (!lemma_changed || tagged_lemmas.size() < 2) return; |
|
0 |
0 |
if (!lemma_changed || tagged_lemmas.size() < 2) return; |
|
0 |
0 |
if (!lemma_changed || tagged_lemmas.size() < 2) return; |
7619
|
0 |
0 |
for (auto&& tagged_lemma_forms : forms) { |
7620
|
0 |
0 |
for (auto&& tagged_form : tagged_lemma_forms.forms) |
7626
|
0 |
0 |
if (!lemma_changed || forms.size() < 2) return; |
|
0 |
0 |
if (!lemma_changed || forms.size() < 2) return; |
|
0 |
0 |
if (!lemma_changed || forms.size() < 2) return; |
7672
|
0 |
0 |
return lemma_id_len < lemma.size() ? (lemma.resize(lemma_id_len), true) : false; |
7682
|
0 |
0 |
for (auto&& tagged_lemma : tagged_lemmas) |
7686
|
0 |
0 |
if (!lemma_changed || tagged_lemmas.size() < 2) return; |
|
0 |
0 |
if (!lemma_changed || tagged_lemmas.size() < 2) return; |
|
0 |
0 |
if (!lemma_changed || tagged_lemmas.size() < 2) return; |
7694
|
0 |
0 |
for (auto&& tagged_lemma_forms : forms) |
7698
|
0 |
0 |
if (!lemma_changed || forms.size() < 2) return; |
|
0 |
0 |
if (!lemma_changed || forms.size() < 2) return; |
|
0 |
0 |
if (!lemma_changed || forms.size() < 2) return; |
7744
|
0 |
0 |
return raw_lemma_len < lemma.size() ? (lemma.resize(raw_lemma_len), true) : false; |
7754
|
0 |
0 |
for (auto&& tagged_lemma : tagged_lemmas) |
7758
|
0 |
0 |
if (!lemma_changed || tagged_lemmas.size() < 2) return; |
|
0 |
0 |
if (!lemma_changed || tagged_lemmas.size() < 2) return; |
|
0 |
0 |
if (!lemma_changed || tagged_lemmas.size() < 2) return; |
7766
|
0 |
0 |
for (auto&& tagged_lemma_forms : forms) |
7770
|
0 |
0 |
if (!lemma_changed || forms.size() < 2) return; |
|
0 |
0 |
if (!lemma_changed || forms.size() < 2) return; |
|
0 |
0 |
if (!lemma_changed || forms.size() < 2) return; |
7805
|
0 |
0 |
if (name == "pdt_to_conll2009") return tagset_converter::new_pdt_to_conll2009_converter(); |
7806
|
0 |
0 |
if (name == "strip_lemma_comment") return tagset_converter::new_strip_lemma_comment_converter(dictionary); |
7807
|
0 |
0 |
if (name == "strip_lemma_id") return tagset_converter::new_strip_lemma_id_converter(dictionary); |
7814
|
0 |
0 |
inline static bool eq(const tagged_lemma& a, const tagged_lemma& b) { return a.lemma == b.lemma && a.tag == b.tag; } |
|
0 |
0 |
inline static bool eq(const tagged_lemma& a, const tagged_lemma& b) { return a.lemma == b.lemma && a.tag == b.tag; } |
7815
|
0 |
0 |
inline static bool lt(const tagged_lemma& a, const tagged_lemma& b) { int lemma_compare = a.lemma.compare(b.lemma); return lemma_compare < 0 || (lemma_compare == 0 && a.tag < b.tag); } |
7824
|
0 |
0 |
for (unsigned i = 0; i < forms.size(); i++) { |
7826
|
0 |
0 |
for (unsigned j = forms.size() - 1; j > i; j--) |
7827
|
0 |
0 |
if (forms[j].lemma == forms[i].lemma) { |
7829
|
0 |
0 |
for (auto&& tagged_form : forms[j].forms) |
7833
|
0 |
0 |
if (j < forms.size() - 1) { |
7841
|
0 |
0 |
if (any_merged && forms[i].forms.size() > 1) { |
|
0 |
0 |
if (any_merged && forms[i].forms.size() > 1) { |
|
0 |
0 |
if (any_merged && forms[i].forms.size() > 1) { |
7844
|
0 |
0 |
inline static bool eq(const tagged_form& a, const tagged_form& b) { return a.tag == b.tag && a.form == b.form; } |
|
0 |
0 |
inline static bool eq(const tagged_form& a, const tagged_form& b) { return a.tag == b.tag && a.form == b.form; } |
7845
|
0 |
0 |
inline static bool lt(const tagged_form& a, const tagged_form& b) { int tag_compare = a.tag.compare(b.tag); return tag_compare < 0 || (tag_compare == 0 && a.form < b.form); } |
7999
|
428 |
4 |
const unordered_set czech_tokenizer::abbreviations_czech = { |
|
0 |
0 |
const unordered_set czech_tokenizer::abbreviations_czech = { |
8015
|
412 |
4 |
const unordered_set czech_tokenizer::abbreviations_slovak = { |
|
0 |
0 |
const unordered_set czech_tokenizer::abbreviations_slovak = { |
8032
|
1 |
0 |
: ragel_tokenizer(version <= 1 ? 1 : 2), m(m) { |
|
0 |
0 |
: ragel_tokenizer(version <= 1 ? 1 : 2), m(m) { |
8046
|
0 |
22 |
if (!m) return; |
8047
|
0 |
0 |
if (tokens.empty() || chars[tokens.back().start].cat & ~unicode::L) return; |
|
0 |
0 |
if (tokens.empty() || chars[tokens.back().start].cat & ~unicode::L) return; |
|
0 |
0 |
if (tokens.empty() || chars[tokens.back().start].cat & ~unicode::L) return; |
8050
|
0 |
0 |
for (unsigned hyphens = 1; hyphens <= 2; hyphens++) { |
8052
|
0 |
0 |
if (tokens.size() < 2*hyphens + 1) break; |
8054
|
0 |
0 |
if (tokens[first_hyphen].length != 1 || chars[tokens[first_hyphen].start].cat & ~unicode::P || |
|
0 |
0 |
if (tokens[first_hyphen].length != 1 || chars[tokens[first_hyphen].start].cat & ~unicode::P || |
|
0 |
0 |
if (tokens[first_hyphen].length != 1 || chars[tokens[first_hyphen].start].cat & ~unicode::P || |
8055
|
0 |
0 |
tokens[first_hyphen].start + tokens[first_hyphen].length != tokens[first_hyphen + 1].start || |
8056
|
0 |
0 |
tokens[first_hyphen-1].start + tokens[first_hyphen-1].length != tokens[first_hyphen].start || |
|
0 |
0 |
tokens[first_hyphen-1].start + tokens[first_hyphen-1].length != tokens[first_hyphen].start || |
8060
|
0 |
0 |
if (m->analyze(string_piece(chars[tokens[first_hyphen-1].start].str, chars[tokens.back().start + tokens.back().length].str - chars[tokens[first_hyphen-1].start].str), morpho::NO_GUESSER, lemmas) >= 0) |
8064
|
0 |
0 |
if (matched_hyphens) { |
8078
|
0 |
2 |
while (tokenize_url_email(tokens)) |
8079
|
0 |
0 |
if (emergency_sentence_split(tokens)) |
8095
|
2 |
0 |
if ( ( current) == ( (chars.size() - 1)) ) |
8100
|
43 |
111 |
switch ( _czech_tokenizer_from_state_actions[cs] ) { |
8109
|
43 |
111 |
if ( _klen > 0 ) { |
8114
|
69 |
42 |
if ( _upper < _lower ) |
8118
|
17 |
52 |
if ( _widec < _mid[0] ) |
8120
|
51 |
1 |
else if ( _widec > _mid[1] ) |
8126
|
0 |
1 |
if ( |
8127
|
1 |
0 |
!current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256; |
|
0 |
1 |
!current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256; |
8132
|
0 |
0 |
if ( |
8133
|
0 |
0 |
!current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256; |
|
0 |
0 |
!current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256; |
|
0 |
0 |
!current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256; |
8146
|
154 |
0 |
if ( _klen > 0 ) { |
8151
|
486 |
118 |
if ( _upper < _lower ) |
8155
|
270 |
216 |
if ( _widec < *_mid ) |
8157
|
180 |
36 |
else if ( _widec > *_mid ) |
8169
|
101 |
17 |
if ( _klen > 0 ) { |
8174
|
133 |
20 |
if ( _upper < _lower ) |
8178
|
38 |
95 |
if ( _widec < _mid[0] ) |
8180
|
14 |
81 |
else if ( _widec > _mid[1] ) |
8195
|
70 |
85 |
if ( _czech_tokenizer_trans_actions[_trans] == 0 ) |
8209
|
0 |
1 |
do |
8210
|
0 |
1 |
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8218
|
4 |
4 |
for (current = ts; current < whitespace; current++) |
8221
|
1 |
3 |
if (eos) {( current)++; goto _out; } |
8226
|
0 |
0 |
if (!tokens.empty()) {( current)++; goto _out; } |
8228
|
0 |
0 |
do |
8229
|
0 |
0 |
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8238
|
0 |
19 |
do |
8239
|
0 |
19 |
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8247
|
0 |
17 |
do |
8248
|
0 |
17 |
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8255
|
0 |
0 |
if (!tokens.empty()) {( current)++; goto _out; } |
8257
|
0 |
0 |
do |
8258
|
0 |
0 |
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8267
|
0 |
2 |
do |
8268
|
0 |
2 |
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8276
|
42 |
112 |
switch ( _czech_tokenizer_to_state_actions[cs] ) { |
8282
|
154 |
0 |
if ( cs == 0 ) |
8284
|
152 |
2 |
if ( ++( current) != ( (chars.size() - 1)) ) |
8287
|
2 |
0 |
if ( ( current) == ( (chars.size() - 1)) ) |
8289
|
1 |
1 |
if ( _czech_tokenizer_eof_trans[cs] > 0 ) { |
8317
|
456 |
4 |
const unordered_set english_tokenizer::abbreviations = { |
|
0 |
0 |
const unordered_set english_tokenizer::abbreviations = { |
8416
|
0 |
0 |
if (tokens.empty() || chars[tokens.back().start].cat & ~unilib::unicode::L) return; |
|
0 |
0 |
if (tokens.empty() || chars[tokens.back().start].cat & ~unilib::unicode::L) return; |
|
0 |
0 |
if (tokens.empty() || chars[tokens.back().start].cat & ~unilib::unicode::L) return; |
8431
|
0 |
0 |
if ( ( index) == ( end) ) |
8440
|
0 |
0 |
if ( _klen > 0 ) { |
8445
|
0 |
0 |
if ( _upper < _lower ) |
8449
|
0 |
0 |
if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) < *_mid ) |
8451
|
0 |
0 |
else if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) > *_mid ) |
8463
|
0 |
0 |
if ( _klen > 0 ) { |
8468
|
0 |
0 |
if ( _upper < _lower ) |
8472
|
0 |
0 |
if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) < _mid[0] ) |
8474
|
0 |
0 |
else if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) > _mid[1] ) |
8488
|
0 |
0 |
if ( _english_tokenizer_split_token_trans_actions[_trans] == 0 ) |
8502
|
0 |
0 |
if ( cs == 0 ) |
8504
|
0 |
0 |
if ( ++( index) != ( end) ) |
8507
|
0 |
0 |
if ( ( index) == ( end) ) |
8509
|
0 |
0 |
switch ( _english_tokenizer_split_token_eof_actions[cs] ) { |
8519
|
0 |
0 |
if (split_len && split_len < end) { |
8673
|
0 |
0 |
english_tokenizer::english_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {} |
|
0 |
0 |
english_tokenizer::english_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {} |
|
0 |
0 |
english_tokenizer::english_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {} |
|
0 |
0 |
english_tokenizer::english_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {} |
8682
|
0 |
0 |
while (tokenize_url_email(tokens)) |
8683
|
0 |
0 |
if (emergency_sentence_split(tokens)) |
8699
|
0 |
0 |
if ( ( current) == ( (chars.size() - 1)) ) |
8704
|
0 |
0 |
switch ( _english_tokenizer_from_state_actions[cs] ) { |
8713
|
0 |
0 |
if ( _klen > 0 ) { |
8718
|
0 |
0 |
if ( _upper < _lower ) |
8722
|
0 |
0 |
if ( _widec < _mid[0] ) |
8724
|
0 |
0 |
else if ( _widec > _mid[1] ) |
8730
|
0 |
0 |
if ( |
8731
|
0 |
0 |
!current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256; |
|
0 |
0 |
!current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256; |
8736
|
0 |
0 |
if ( |
8737
|
0 |
0 |
!current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256; |
|
0 |
0 |
!current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256; |
|
0 |
0 |
!current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256; |
8750
|
0 |
0 |
if ( _klen > 0 ) { |
8755
|
0 |
0 |
if ( _upper < _lower ) |
8759
|
0 |
0 |
if ( _widec < *_mid ) |
8761
|
0 |
0 |
else if ( _widec > *_mid ) |
8773
|
0 |
0 |
if ( _klen > 0 ) { |
8778
|
0 |
0 |
if ( _upper < _lower ) |
8782
|
0 |
0 |
if ( _widec < _mid[0] ) |
8784
|
0 |
0 |
else if ( _widec > _mid[1] ) |
8799
|
0 |
0 |
if ( _english_tokenizer_trans_actions[_trans] == 0 ) |
8813
|
0 |
0 |
do |
8814
|
0 |
0 |
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8822
|
0 |
0 |
for (current = ts; current < whitespace; current++) |
8825
|
0 |
0 |
if (eos) {( current)++; goto _out; } |
8830
|
0 |
0 |
if (!tokens.empty()) {( current)++; goto _out; } |
8832
|
0 |
0 |
do |
8833
|
0 |
0 |
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8842
|
0 |
0 |
do |
8843
|
0 |
0 |
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8851
|
0 |
0 |
do |
8852
|
0 |
0 |
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8859
|
0 |
0 |
if (!tokens.empty()) {( current)++; goto _out; } |
8861
|
0 |
0 |
do |
8862
|
0 |
0 |
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8871
|
0 |
0 |
do |
8872
|
0 |
0 |
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
8880
|
0 |
0 |
switch ( _english_tokenizer_to_state_actions[cs] ) { |
8886
|
0 |
0 |
if ( cs == 0 ) |
8888
|
0 |
0 |
if ( ++( current) != ( (chars.size() - 1)) ) |
8891
|
0 |
0 |
if ( ( current) == ( (chars.size() - 1)) ) |
8893
|
0 |
0 |
if ( _english_tokenizer_eof_trans[cs] > 0 ) { |
9046
|
0 |
0 |
generic_tokenizer::generic_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {} |
|
0 |
0 |
generic_tokenizer::generic_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {} |
|
0 |
2 |
generic_tokenizer::generic_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {} |
|
0 |
0 |
generic_tokenizer::generic_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {} |
9055
|
0 |
2 |
while (tokenize_url_email(tokens)) |
9056
|
0 |
0 |
if (emergency_sentence_split(tokens)) |
9072
|
2 |
0 |
if ( ( current) == ( (chars.size() - 1)) ) |
9077
|
8 |
21 |
switch ( _generic_tokenizer_from_state_actions[cs] ) { |
9086
|
8 |
21 |
if ( _klen > 0 ) { |
9091
|
14 |
8 |
if ( _upper < _lower ) |
9095
|
2 |
12 |
if ( _widec < _mid[0] ) |
9097
|
12 |
0 |
else if ( _widec > _mid[1] ) |
9103
|
0 |
0 |
if ( |
9104
|
0 |
0 |
!current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256; |
|
0 |
0 |
!current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256; |
9109
|
0 |
0 |
if ( |
9110
|
0 |
0 |
!current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256; |
|
0 |
0 |
!current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256; |
|
0 |
0 |
!current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256; |
9123
|
29 |
0 |
if ( _klen > 0 ) { |
9128
|
88 |
21 |
if ( _upper < _lower ) |
9132
|
52 |
36 |
if ( _widec < *_mid ) |
9134
|
28 |
8 |
else if ( _widec > *_mid ) |
9146
|
19 |
2 |
if ( _klen > 0 ) { |
9151
|
25 |
4 |
if ( _upper < _lower ) |
9155
|
8 |
17 |
if ( _widec < _mid[0] ) |
9157
|
2 |
15 |
else if ( _widec > _mid[1] ) |
9172
|
10 |
21 |
if ( _generic_tokenizer_trans_actions[_trans] == 0 ) |
9185
|
0 |
0 |
do |
9186
|
0 |
0 |
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
9194
|
0 |
0 |
for (current = ts; current < whitespace; current++) |
9197
|
0 |
0 |
if (eos) {( current)++; goto _out; } |
9202
|
0 |
0 |
if (!tokens.empty()) {( current)++; goto _out; } |
9204
|
0 |
0 |
do |
9205
|
0 |
0 |
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
9213
|
0 |
6 |
do |
9214
|
0 |
6 |
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
9222
|
0 |
2 |
do |
9223
|
0 |
2 |
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
9230
|
0 |
0 |
if (!tokens.empty()) {( current)++; goto _out; } |
9232
|
0 |
0 |
do |
9233
|
0 |
0 |
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
9241
|
0 |
0 |
do |
9242
|
0 |
0 |
if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } } |
9250
|
8 |
23 |
switch ( _generic_tokenizer_to_state_actions[cs] ) { |
9256
|
31 |
0 |
if ( cs == 0 ) |
9258
|
27 |
4 |
if ( ++( current) != ( (chars.size() - 1)) ) |
9261
|
4 |
0 |
if ( ( current) == ( (chars.size() - 1)) ) |
9263
|
2 |
2 |
if ( _generic_tokenizer_eof_trans[cs] > 0 ) { |
9641
|
3 |
0 |
initialize_ragel_map(); |
9645
|
0 |
6 |
while (ragel_map_flag.test_and_set()) {} |
9646
|
2 |
4 |
if (ragel_map.empty()) { |
9647
|
256 |
2 |
for (uint8_t ascii = 0; ascii < 128; ascii++) |
9659
|
2 |
6 |
if (chr >= ragel_map.size()) |
9679
|
48 |
0 |
if ( ( current) == ( (chars.size() - 1)) ) |
9687
|
0 |
138 |
if ( _klen > 0 ) { |
9692
|
0 |
0 |
if ( _upper < _lower ) |
9696
|
0 |
0 |
if ( _widec < _mid[0] ) |
9698
|
0 |
0 |
else if ( _widec > _mid[1] ) |
9704
|
0 |
0 |
if ( |
9710
|
0 |
0 |
if ( |
9724
|
138 |
0 |
if ( _klen > 0 ) { |
9729
|
355 |
127 |
if ( _upper < _lower ) |
9733
|
133 |
222 |
if ( _widec < *_mid ) |
9735
|
211 |
11 |
else if ( _widec > *_mid ) |
9747
|
127 |
0 |
if ( _klen > 0 ) { |
9752
|
298 |
44 |
if ( _upper < _lower ) |
9756
|
81 |
217 |
if ( _widec < _mid[0] ) |
9758
|
134 |
83 |
else if ( _widec > _mid[1] ) |
9772
|
0 |
138 |
if ( _ragel_url_email_trans_actions[_trans] == 0 ) |
9793
|
94 |
44 |
if ( cs == 0 ) |
9795
|
90 |
4 |
if ( ++( current) != ( (chars.size() - 1)) ) |
9801
|
0 |
48 |
if (end > start) { |
9826
|
0 |
0 |
vertical_tokenizer() : unicode_tokenizer(0) {} |
9874
|
3 |
0 |
ragel_tokenizer::initialize_ragel_map(); |
9876
|
3 |
0 |
set_text(string_piece(nullptr, 0)); |
9882
|
3 |
3 |
if (make_copy && text.str) { |
|
3 |
0 |
if (make_copy && text.str) { |
9889
|
130 |
6 |
for (const char* curr_str = text.str; text.len; curr_str = text.str) |
9895
|
7 |
0 |
vector& tokens = tokens_ptr ? *tokens_ptr : tokens_buffer; |
9897
|
7 |
0 |
if (forms) forms->clear(); |
9898
|
4 |
3 |
if (current >= chars.size() - 1) return false; |
9901
|
4 |
0 |
if (forms) |
9902
|
32 |
4 |
for (auto&& token : tokens) |
9909
|
48 |
3 |
if (current >= chars.size() - 1) return false; |
9911
|
48 |
0 |
return url_email_tokenizer ? ragel_tokenizer::ragel_url_email(url_email_tokenizer, chars, current, tokens) : false; |
9918
|
0 |
47 |
return tokens.size() >= 500 || |
9919
|
47 |
0 |
(tokens.size() >= 450 && chars[tokens.back().start].cat & unicode::P) || |
|
0 |
0 |
(tokens.size() >= 450 && chars[tokens.back().start].cat & unicode::P) || |
|
0 |
47 |
(tokens.size() >= 450 && chars[tokens.back().start].cat & unicode::P) || |
9920
|
0 |
0 |
(tokens.size() >= 400 && chars[tokens.back().start].cat & unicode::Po); |
9926
|
4 |
0 |
if (eos_chr == '.' && !tokens.empty()) { |
|
0 |
4 |
if (eos_chr == '.' && !tokens.empty()) { |
|
4 |
0 |
if (eos_chr == '.' && !tokens.empty()) { |
9928
|
2 |
2 |
if (tokens.back().length == 1 && chars[tokens.back().start].cat & unicode::Lut) |
|
0 |
2 |
if (tokens.back().length == 1 && chars[tokens.back().start].cat & unicode::Lut) |
|
2 |
2 |
if (tokens.back().length == 1 && chars[tokens.back().start].cat & unicode::Lut) |
9932
|
2 |
0 |
if (abbreviations) { |
9934
|
11 |
2 |
for (size_t i = 0; i < tokens.back().length; i++) |
9936
|
1 |
1 |
if (abbreviations->count(eos_buffer)) |
9957
|
0 |
0 |
if (current >= chars.size() - 1) return false; |
9961
|
0 |
0 |
while (current < chars.size() - 1 && chars[current].chr != '\r' && chars[current].chr != '\n') current++; |
|
0 |
0 |
while (current < chars.size() - 1 && chars[current].chr != '\r' && chars[current].chr != '\n') current++; |
|
0 |
0 |
while (current < chars.size() - 1 && chars[current].chr != '\r' && chars[current].chr != '\n') current++; |
|
0 |
0 |
while (current < chars.size() - 1 && chars[current].chr != '\r' && chars[current].chr != '\n') current++; |
9964
|
0 |
0 |
if (current < chars.size() - 1) { |
9966
|
0 |
0 |
if (current < chars.size() - 1 && |
|
0 |
0 |
if (current < chars.size() - 1 && |
|
0 |
0 |
if (current < chars.size() - 1 && |
9967
|
0 |
0 |
((chars[current-1].chr == '\r' && chars[current].chr == '\n') || |
|
0 |
0 |
((chars[current-1].chr == '\r' && chars[current].chr == '\n') || |
9968
|
0 |
0 |
(chars[current-1].chr == '\n' && chars[current].chr == '\r'))) |
9972
|
0 |
0 |
if (line_start < line_end) |
10225
|
0 |
0 |
for (; *str; str++) |
10226
|
0 |
0 |
if (((unsigned char)*str) >= 0x80) { |
10227
|
0 |
0 |
if (((unsigned char)*str) < 0xC0) return false; |
10228
|
0 |
0 |
else if (((unsigned char)*str) < 0xE0) { |
10229
|
0 |
0 |
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
10230
|
0 |
0 |
} else if (((unsigned char)*str) < 0xF0) { |
10231
|
0 |
0 |
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
10232
|
0 |
0 |
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
10233
|
0 |
0 |
} else if (((unsigned char)*str) < 0xF8) { |
10234
|
0 |
0 |
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
10235
|
0 |
0 |
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
10236
|
0 |
0 |
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
10243
|
0 |
0 |
for (; len > 0; str++, len--) |
10244
|
0 |
0 |
if (((unsigned char)*str) >= 0x80) { |
10245
|
0 |
0 |
if (((unsigned char)*str) < 0xC0) return false; |
10246
|
0 |
0 |
else if (((unsigned char)*str) < 0xE0) { |
10247
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
10248
|
0 |
0 |
} else if (((unsigned char)*str) < 0xF0) { |
10249
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
10250
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
10251
|
0 |
0 |
} else if (((unsigned char)*str) < 0xF8) { |
10252
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
10253
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
10254
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
|
0 |
0 |
str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false; |
10263
|
0 |
0 |
for (char32_t chr; (chr = decode(str)); ) |
10270
|
0 |
0 |
while (len) |
10277
|
0 |
0 |
for (auto&& chr : str) |
10335
|
0 |
0 |
return {3, 2, 0, ""}; |
10865
|
214 |
32574 |
IF_BIT_0(prob) |
|
563 |
32225 |
IF_BIT_0(prob) |
10870
|
559 |
4 |
if (checkDicSize != 0 || processedPos != 0) |
10872
|
0 |
559 |
(dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc)))); |
10874
|
237 |
326 |
if (state < kNumLitStates) |
10878
|
203 |
1693 |
do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100); |
|
1297 |
599 |
do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100); |
|
1659 |
237 |
do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100); |
10882
|
0 |
326 |
unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; |
10884
|
88 |
238 |
state -= (state < 10) ? 3 : 6; |
10893
|
287 |
2321 |
GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit) |
|
1780 |
828 |
GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit) |
10895
|
2282 |
326 |
while (symbol < 0x100); |
10905
|
210 |
32015 |
IF_BIT_0(prob) |
|
206 |
32019 |
IF_BIT_0(prob) |
10914
|
32019 |
0 |
if (checkDicSize == 0 && processedPos == 0) |
10917
|
116 |
31903 |
IF_BIT_0(prob) |
|
31929 |
90 |
IF_BIT_0(prob) |
10921
|
132 |
31797 |
IF_BIT_0(prob) |
|
78 |
31851 |
IF_BIT_0(prob) |
10924
|
0 |
78 |
dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; |
10927
|
3 |
75 |
state = state < kNumLitStates ? 9 : 11; |
10937
|
21 |
69 |
IF_BIT_0(prob) |
|
51 |
39 |
IF_BIT_0(prob) |
10946
|
5 |
34 |
IF_BIT_0(prob) |
|
25 |
14 |
IF_BIT_0(prob) |
10962
|
31809 |
132 |
state = state < kNumLitStates ? 8 : 11; |
10968
|
203 |
31944 |
IF_BIT_0(probLen) |
|
211 |
31936 |
IF_BIT_0(probLen) |
10979
|
110 |
31826 |
IF_BIT_0(probLen) |
|
29 |
31907 |
IF_BIT_0(probLen) |
10994
|
1131 |
254845 |
TREE_DECODE(probLen, limit, len); |
|
972 |
255004 |
TREE_DECODE(probLen, limit, len); |
|
223829 |
32147 |
TREE_DECODE(probLen, limit, len); |
10998
|
206 |
31941 |
if (state >= kNumStates) |
11003
|
18 |
188 |
TREE_6_DECODE(prob, distance); |
|
132 |
74 |
TREE_6_DECODE(prob, distance); |
|
39 |
167 |
TREE_6_DECODE(prob, distance); |
|
189 |
17 |
TREE_6_DECODE(prob, distance); |
|
22 |
184 |
TREE_6_DECODE(prob, distance); |
|
152 |
54 |
TREE_6_DECODE(prob, distance); |
|
20 |
186 |
TREE_6_DECODE(prob, distance); |
|
83 |
123 |
TREE_6_DECODE(prob, distance); |
|
12 |
194 |
TREE_6_DECODE(prob, distance); |
|
115 |
91 |
TREE_6_DECODE(prob, distance); |
|
29 |
177 |
TREE_6_DECODE(prob, distance); |
|
133 |
73 |
TREE_6_DECODE(prob, distance); |
11004
|
173 |
33 |
if (distance >= kStartPosModelIndex) |
11009
|
80 |
93 |
if (posSlot < kEndPosModelIndex) |
11016
|
112 |
80 |
do |
11018
|
24 |
168 |
GET_BIT2(prob + i, i, ; , distance |= mask); |
|
89 |
103 |
GET_BIT2(prob + i, i, ; , distance |= mask); |
11027
|
1017 |
93 |
do |
11029
|
134 |
976 |
NORMALIZE |
11053
|
12 |
81 |
GET_BIT2(prob + i, i, ; , distance |= 1); |
|
55 |
38 |
GET_BIT2(prob + i, i, ; , distance |= 1); |
11054
|
16 |
77 |
GET_BIT2(prob + i, i, ; , distance |= 2); |
|
52 |
41 |
GET_BIT2(prob + i, i, ; , distance |= 2); |
11055
|
9 |
84 |
GET_BIT2(prob + i, i, ; , distance |= 4); |
|
53 |
40 |
GET_BIT2(prob + i, i, ; , distance |= 4); |
11056
|
13 |
80 |
GET_BIT2(prob + i, i, ; , distance |= 8); |
|
58 |
35 |
GET_BIT2(prob + i, i, ; , distance |= 8); |
11058
|
0 |
93 |
if (distance == (uint32_t)0xFFFFFFFF) |
11070
|
206 |
0 |
if (checkDicSize == 0) |
11072
|
206 |
0 |
if (distance >= processedPos) |
11075
|
0 |
0 |
else if (distance >= checkDicSize) |
11077
|
85 |
121 |
state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3; |
11082
|
32147 |
0 |
if (limit == dicPos) |
11086
|
0 |
32147 |
unsigned curLen = ((rem < len) ? (unsigned)rem : len); |
11087
|
0 |
32147 |
size_t pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0); |
11092
|
32147 |
0 |
if (pos + curLen <= dicBufSize) |
11098
|
8657310 |
32147 |
do |
11104
|
0 |
0 |
do |
11107
|
0 |
0 |
if (++pos == dicBufSize) |
11115
|
32712 |
76 |
while (dicPos < limit && buf < bufLimit); |
11116
|
18 |
58 |
NORMALIZE; |
11134
|
0 |
80 |
if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart) |
11141
|
0 |
0 |
if (limit - dicPos < len) |
11144
|
0 |
0 |
if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) |
|
0 |
0 |
if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len) |
11149
|
0 |
0 |
while (len-- != 0) |
11151
|
0 |
0 |
dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)]; |
11163
|
76 |
0 |
if (p->checkDicSize == 0) |
11166
|
0 |
76 |
if (limit - p->dicPos > rem) |
11169
|
76 |
0 |
RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit)); |
11170
|
0 |
76 |
if (p->processedPos >= p->prop.dicSize) |
11174
|
72 |
4 |
while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); |
|
0 |
72 |
while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); |
|
0 |
0 |
while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart); |
11176
|
0 |
76 |
if (p->remainLen > kMatchSpecLenStart) |
11207
|
0 |
71 |
IF_BIT_0_CHECK(prob) |
|
0 |
0 |
IF_BIT_0_CHECK(prob) |
|
42 |
29 |
IF_BIT_0_CHECK(prob) |
11214
|
42 |
0 |
if (p->checkDicSize != 0 || p->processedPos != 0) |
11217
|
0 |
42 |
(p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc)))); |
11219
|
15 |
27 |
if (state < kNumLitStates) |
11222
|
7 |
113 |
do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); |
|
7 |
0 |
do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); |
|
85 |
35 |
do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); |
|
105 |
15 |
do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100); |
11227
|
0 |
27 |
((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)]; |
11237
|
21 |
195 |
GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit) |
|
21 |
0 |
GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit) |
|
154 |
62 |
GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit) |
11239
|
189 |
27 |
while (symbol < 0x100); |
11249
|
1 |
28 |
IF_BIT_0_CHECK(prob) |
|
1 |
0 |
IF_BIT_0_CHECK(prob) |
|
11 |
18 |
IF_BIT_0_CHECK(prob) |
11261
|
0 |
18 |
IF_BIT_0_CHECK(prob) |
|
0 |
0 |
IF_BIT_0_CHECK(prob) |
|
18 |
0 |
IF_BIT_0_CHECK(prob) |
11265
|
2 |
16 |
IF_BIT_0_CHECK(prob) |
|
2 |
0 |
IF_BIT_0_CHECK(prob) |
|
13 |
5 |
IF_BIT_0_CHECK(prob) |
11268
|
6 |
7 |
NORMALIZE_CHECK; |
|
6 |
0 |
NORMALIZE_CHECK; |
11280
|
0 |
0 |
IF_BIT_0_CHECK(prob) |
|
0 |
0 |
IF_BIT_0_CHECK(prob) |
|
0 |
0 |
IF_BIT_0_CHECK(prob) |
11288
|
0 |
0 |
IF_BIT_0_CHECK(prob) |
|
0 |
0 |
IF_BIT_0_CHECK(prob) |
|
0 |
0 |
IF_BIT_0_CHECK(prob) |
11304
|
2 |
14 |
IF_BIT_0_CHECK(probLen) |
|
2 |
0 |
IF_BIT_0_CHECK(probLen) |
|
15 |
1 |
IF_BIT_0_CHECK(probLen) |
11315
|
0 |
1 |
IF_BIT_0_CHECK(probLen) |
|
0 |
0 |
IF_BIT_0_CHECK(probLen) |
|
1 |
0 |
IF_BIT_0_CHECK(probLen) |
11330
|
8 |
40 |
TREE_DECODE_CHECK(probLen, limit, len); |
|
8 |
0 |
TREE_DECODE_CHECK(probLen, limit, len); |
|
22 |
26 |
TREE_DECODE_CHECK(probLen, limit, len); |
|
32 |
16 |
TREE_DECODE_CHECK(probLen, limit, len); |
11334
|
11 |
5 |
if (state < 4) |
11340
|
7 |
59 |
TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); |
|
7 |
0 |
TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); |
|
42 |
24 |
TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); |
|
55 |
11 |
TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot); |
11341
|
10 |
1 |
if (posSlot >= kStartPosModelIndex) |
11347
|
7 |
3 |
if (posSlot < kEndPosModelIndex) |
11354
|
40 |
3 |
do |
11356
|
5 |
38 |
NORMALIZE_CHECK |
|
5 |
0 |
NORMALIZE_CHECK |
11367
|
21 |
10 |
do |
11369
|
5 |
26 |
GET_BIT_CHECK(prob + i, i); |
|
5 |
0 |
GET_BIT_CHECK(prob + i, i); |
|
10 |
21 |
GET_BIT_CHECK(prob + i, i); |
11377
|
12 |
46 |
NORMALIZE_CHECK; |
|
12 |
0 |
NORMALIZE_CHECK; |
11394
|
0 |
0 |
if (initDic) |
11400
|
0 |
0 |
if (initState) |
11415
|
31960 |
4 |
for (i = 0; i < numProbs; i++) |
11431
|
80 |
0 |
while (p->remainLen != kMatchSpecLenStart) |
11435
|
4 |
76 |
if (p->needFlush != 0) |
11437
|
24 |
0 |
for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) |
|
20 |
4 |
for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--) |
11439
|
0 |
4 |
if (p->tempBufSize < RC_INIT_SIZE) |
11444
|
4 |
0 |
if (p->tempBuf[0] != 0) |
11452
|
4 |
76 |
if (p->dicPos >= dicLimit) |
11454
|
4 |
0 |
if (p->remainLen == 0 && p->code == 0) |
|
4 |
0 |
if (p->remainLen == 0 && p->code == 0) |
11459
|
0 |
0 |
if (finishMode == LZMA_FINISH_ANY) |
11464
|
0 |
0 |
if (p->remainLen != 0) |
11472
|
4 |
72 |
if (p->needInitState) |
11475
|
0 |
76 |
if (p->tempBufSize == 0) |
11479
|
71 |
5 |
if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) |
11482
|
0 |
71 |
if (dummyRes == DUMMY_ERROR) |
11490
|
0 |
71 |
if (checkEndMarkNow && dummyRes != DUMMY_MATCH) |
11500
|
76 |
0 |
if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0) |
11510
|
0 |
0 |
while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) |
|
0 |
0 |
while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize) |
11513
|
0 |
0 |
if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow) |
11516
|
0 |
0 |
if (dummyRes == DUMMY_ERROR) |
11522
|
0 |
0 |
if (checkEndMarkNow && dummyRes != DUMMY_MATCH) |
11529
|
0 |
0 |
if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0) |
11538
|
0 |
0 |
if (p->code == 0) |
11553
|
0 |
0 |
if (p->dicPos == p->dicBufSize) |
11556
|
0 |
0 |
if (outSize > p->dicBufSize - dicPos) |
11576
|
0 |
0 |
if (res != 0) |
11578
|
0 |
0 |
if (outSizeCur == 0 || outSize == 0) |
11606
|
4 |
0 |
if (size < LZMA_PROPS_SIZE) |
11611
|
0 |
4 |
if (dicSize < LZMA_DIC_MIN) |
11616
|
4 |
0 |
if (d >= (9 * 5 * 5)) |
11630
|
0 |
4 |
if (p->probs == 0 || numProbs != p->numProbs) |
|
0 |
0 |
if (p->probs == 0 || numProbs != p->numProbs) |
11635
|
4 |
0 |
if (p->probs == 0) |
11644
|
4 |
0 |
RINOK(LzmaProps_Decode(&propNew, props, propsSize)); |
11645
|
4 |
0 |
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); |
11654
|
0 |
0 |
RINOK(LzmaProps_Decode(&propNew, props, propsSize)); |
11655
|
0 |
0 |
RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc)); |
11657
|
0 |
0 |
if (p->dic == 0 || dicBufSize != p->dicBufSize) |
|
0 |
0 |
if (p->dic == 0 || dicBufSize != p->dicBufSize) |
11661
|
0 |
0 |
if (p->dic == 0) |
11681
|
4 |
0 |
if (inSize < RC_INIT_SIZE) |
11686
|
4 |
0 |
if (res != 0) |
11696
|
4 |
0 |
if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) |
|
0 |
4 |
if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT) |
11710
|
4 |
4 |
static void LzmaFree(void* /*p*/, void *address) { delete[] (char*) address; } |
11718
|
4 |
0 |
if (!is.read((char *) &uncompressed_len, sizeof(uncompressed_len))) return false; |
11719
|
4 |
0 |
if (!is.read((char *) &compressed_len, sizeof(compressed_len))) return false; |
11720
|
4 |
0 |
if (!is.read((char *) &poor_crc, sizeof(poor_crc))) return false; |
11721
|
4 |
0 |
if (poor_crc != uncompressed_len * 19991 + compressed_len * 199999991 + 1234567890) return false; |
11722
|
4 |
0 |
if (!is.read((char *) props_encoded, sizeof(props_encoded))) return false; |
11725
|
4 |
0 |
if (!is.read((char *) compressed.data(), compressed_len)) return false; |
|
4 |
0 |
if (!is.read((char *) compressed.data(), compressed_len)) return false; |
11729
|
4 |
0 |
auto res = lzma::LzmaDecode(data.fill(uncompressed_len), &uncompressed_size, compressed.data(), &compressed_size, props_encoded, LZMA_PROPS_SIZE, lzma::LZMA_FINISH_ANY, &status, &lzmaAllocator); |
11730
|
4 |
0 |
if (res != SZ_OK || uncompressed_size != uncompressed_len || compressed_size != compressed_len) return false; |
|
4 |
0 |
if (res != SZ_OK || uncompressed_size != uncompressed_len || compressed_size != compressed_len) return false; |
|
4 |
0 |
if (res != SZ_OK || uncompressed_size != uncompressed_len || compressed_size != compressed_len) return false; |
11778
|
0 |
0 |
return {1, 11, 0, ""}; |
11789
|
0 |
0 |
<< (morphodita.prerelease.empty() ? "" : "-") << morphodita.prerelease |
|
0 |
0 |
<< (morphodita.prerelease.empty() ? "" : "-") << morphodita.prerelease |
11791
|
0 |
0 |
<< (other_libraries.empty() ? "" : " and ") << other_libraries << ")\n" |
|
0 |
0 |
<< (other_libraries.empty() ? "" : " and ") << other_libraries << ")\n" |
11793
|
0 |
0 |
"Mathematics and Physics, Charles University in Prague, Czech Republic."; |
11799
|
4 |
0 |
} // namespace ufal |
|
4 |
0 |
} // namespace ufal |