Branch Coverage

nametag/nametag.cpp
Criterion Covered Total %
branch 1235 6846 18.0


line true false branch
179 4 0 if (bilou[bilou_type_O].probability > bilou[best].probability) best = bilou_type_O;
180 2 2 if (bilou[bilou_type_U].probability > bilou[best].probability) best = bilou_type_U;
187 10 0 if (prev.bilou[bilou_type_O].probability > best_LOU_prob) {
191 6 4 if (prev.bilou[bilou_type_U].probability > best_LOU_prob) {
199 0 10 if (prev.bilou[bilou_type_I].probability > best_BI_prob) {
205 0 10 if (best_BI_prob > best_LOU_prob) {
222 0 10 if (bilou[bilou_type_I].probability > bilou[best].probability) best = bilou_type_I;
227 0 10 if (bilou[bilou_type_L].probability > bilou[best].probability) best = bilou_type_L;
232 10 0 if (bilou[bilou_type_O].probability > bilou[best].probability) best = bilou_type_O;
237 4 6 if (bilou[bilou_type_U].probability > bilou[best].probability) best = bilou_type_U;
353 2 2 if (words.size() < size) words.resize(size);
354 2 2 if (features.size() < size) features.resize(size);
355 2 2 if (probabilities.size() < size) probabilities.resize(size);
356 2 2 if (previous_stage.size() < size) previous_stage.resize(size);
360 4 14 for (unsigned i = 0; i < size; i++)
0 0 for (unsigned i = 0; i < size; i++)
365 14 4 for (unsigned i = 0; i < size; i++)
0 0 for (unsigned i = 0; i < size; i++)
370 7 2 for (unsigned i = 0; i < size; i++) {
0 0 for (unsigned i = 0; i < size; i++) {
380 4 0 if (last_bilou[bilou_type_O].probability > last_bilou[best].probability) best = bilou_type_O;
381 0 4 if (last_bilou[bilou_type_U].probability > last_bilou[best].probability) best = bilou_type_U;
385 10 4 for (unsigned i = size - 1; i; i--) {
392 14 4 for (unsigned i = 0; i < size; i++) {
462 0 0 explicit binary_decoder_error(const char* description) : runtime_error(description) {}
0 0 explicit binary_decoder_error(const char* description) : runtime_error(description) {}
0 0 explicit binary_decoder_error(const char* description) : runtime_error(description) {}
0 0 explicit binary_decoder_error(const char* description) : runtime_error(description) {}
0 0 explicit binary_decoder_error(const char* description) : runtime_error(description) {}
0 0 explicit binary_decoder_error(const char* description) : runtime_error(description) {}
0 0 explicit binary_decoder_error(const char* description) : runtime_error(description) {}
0 0 explicit binary_decoder_error(const char* description) : runtime_error(description) {}
465 0 0 class binary_decoder {
0 0 class binary_decoder {
0 0 class binary_decoder {
0 0 class binary_decoder {
0 0 class binary_decoder {
0 0 class binary_decoder {
0 0 class binary_decoder {
0 0 class binary_decoder {
0 0 class binary_decoder {
0 0 class binary_decoder {
490 7 0 buffer.resize(len);
498 0 313 if (data + 1 > data_end) throw binary_decoder_error("No more data in binary_decoder");
503 0 679 if (data + sizeof(uint16_t) > data_end) throw binary_decoder_error("No more data in binary_decoder");
511 0 516 if (data + sizeof(uint32_t) > data_end) throw binary_decoder_error("No more data in binary_decoder");
520 0 46 if (len == 255) len = next_4B();
525 0 116 if (data + sizeof(T) * elements > data_end) throw binary_decoder_error("No more data in binary_decoder");
0 253 if (data + sizeof(T) * elements > data_end) throw binary_decoder_error("No more data in binary_decoder");
0 2 if (data + sizeof(T) * elements > data_end) throw binary_decoder_error("No more data in binary_decoder");
0 118 if (data + sizeof(T) * elements > data_end) throw binary_decoder_error("No more data in binary_decoder");
540 0 1 if (pos > buffer.size()) throw binary_decoder_error("Cannot seek past end of binary_decoder");
576 0 0 return a.len == b.len && memcmp(a.str, b.str, a.len) == 0;
0 0 return a.len == b.len && memcmp(a.str, b.str, a.len) == 0;
0 0 return a.len == b.len && memcmp(a.str, b.str, a.len) == 0;
0 0 return a.len == b.len && memcmp(a.str, b.str, a.len) == 0;
0 0 return a.len == b.len && memcmp(a.str, b.str, a.len) == 0;
0 0 return a.len == b.len && memcmp(a.str, b.str, a.len) == 0;
0 0 return a.len == b.len && memcmp(a.str, b.str, a.len) == 0;
0 0 return a.len == b.len && memcmp(a.str, b.str, a.len) == 0;
630 0 0 if (uint8_t(val) != val) runtime_failure("Should encode value " << val << " in one byte!");
654 0 0 if (!(str.len < 255)) add_4B(str.len);
687 0 0 class network_classifier {
4 2 class network_classifier {
0 0 class network_classifier {
0 0 class network_classifier {
814 16 10 while (size) {
0 0 while (size) {
0 0 while (size) {
816 10 6 if (unaligned_load(first + step) < val) {
0 0 if (unaligned_load(first + step) < val) {
0 0 if (unaligned_load(first + step) < val) {
857 2 0 if (!compressor::load(is, data)) return false;
2 0 if (!compressor::load(is, data)) return false;
861 2 0 load_matrix(data, indices);
862 2 0 missing_weight = unaligned_load(data.next(1));
863 2 0 load_matrix(data, weights);
868 2 0 hidden_layer.resize(data.next_2B());
2 0 hidden_layer.resize(data.next_2B());
869 0 2 if (!hidden_layer.empty()) {
870 0 0 load_matrix(data, hidden_weights[0]);
871 0 0 load_matrix(data, hidden_weights[1]);
875 2 0 unsigned outcomes = data.next_2B();
876 2 0 output_layer.resize(outcomes);
877 2 0 output_error.resize(outcomes);
0 0 output_error.resize(outcomes);
888 330 2 for (auto&& row : m) {
330 2 for (auto&& row : m) {
898 0 0 if (features <= 0) { if (verbose) cerr << "There must be more than zero features!" << endl; return false; }
0 0 if (features <= 0) { if (verbose) cerr << "There must be more than zero features!" << endl; return false; }
899 0 0 if (outcomes <= 0) { if (verbose) cerr << "There must be more than zero features!" << endl; return false; }
0 0 if (outcomes <= 0) { if (verbose) cerr << "There must be more than zero features!" << endl; return false; }
900 0 0 if (train.empty()) { if (verbose) cerr << "No training data!" << endl; return false; }
0 0 if (train.empty()) { if (verbose) cerr << "No training data!" << endl; return false; }
901 0 0 for (auto&& instance : train) {
902 0 0 if (instance.outcome >= outcomes) { if (verbose) cerr << "Training instances out of range!" << endl; return false; }
0 0 if (instance.outcome >= outcomes) { if (verbose) cerr << "Training instances out of range!" << endl; return false; }
903 0 0 for(auto& feature : instance.features)
904 0 0 if (feature >= features) { if (verbose) cerr << "Training instances out of range!" << endl; return false; }
0 0 if (feature >= features) { if (verbose) cerr << "Training instances out of range!" << endl; return false; }
906 0 0 for (auto&& instance : heldout)
907 0 0 for(auto& feature : instance.features)
908 0 0 if (feature >= features) { if (verbose) cerr << "Heldout instances out of range!" << endl; return false; }
0 0 if (feature >= features) { if (verbose) cerr << "Heldout instances out of range!" << endl; return false; }
916 0 0 for (auto&& instance : train)
917 0 0 for (auto&& feature : instance.features)
920 0 0 for (auto&& row : indices) {
927 0 0 for (auto&& row : indices)
933 0 0 if (!hidden_layer.empty()) {
937 0 0 for (auto&& row : hidden_weights[0])
938 0 0 for (auto&& weight : row.resize(hidden_layer.size()), row)
942 0 0 for (auto&& row : hidden_weights[1])
943 0 0 for (auto&& weight : row.resize(outcomes), row)
956 0 0 for (unsigned i = 0; i < train.size(); i++)
959 0 0 for (int iteration = 0; iteration < parameters.iterations; iteration++) {
960 0 0 if (verbose) cerr << "Iteration " << iteration + 1 << ": ";
0 0 if (verbose) cerr << "Iteration " << iteration + 1 << ": ";
0 0 if (verbose) cerr << "Iteration " << iteration + 1 << ": ";
962 0 0 double learning_rate = parameters.final_learning_rate && parameters.iterations > 1 ?
964 0 0 parameters.initial_learning_rate;
970 0 0 for (auto&& train_index : permutation) {
981 0 0 if (verbose)
985 0 0 << "%, ";
988 0 0 if (!heldout.empty()) {
990 0 0 for (auto&& instance : heldout) {
994 0 0 if (verbose) cerr << "heldout acc " << heldout_correct * 100. / heldout.size() << ", ";
0 0 if (verbose) cerr << "heldout acc " << heldout_correct * 100. / heldout.size() << ", ";
996 0 0 if (verbose) cerr << "done." << endl;
1002 1 13 if (outcomes.size() != output_layer.size()) outcomes.resize(output_layer.size());
1003 0 14 if (buffer.size() != hidden_layer.size()) buffer.resize(hidden_layer.size());
1010 0 0 propagate(features, hidden_layer, output_layer);
0 0 propagate(features, hidden_layer, output_layer);
1017 291 14 for (auto&& feature : features)
1018 291 0 if (feature < indices.size())
1019 414 291 for (unsigned i = 0; i < indices[feature].size(); i++)
1023 0 14 if (!hidden_layer.empty()) {
1024 0 0 for (auto&& weight : hidden_layer)
1028 0 0 for (auto&& feature : features)
1029 0 0 if (feature < hidden_weights[0].size())
1030 0 0 for (unsigned i = 0; i < hidden_layer.size(); i++) {
1035 0 0 for (auto&& weight : hidden_layer)
1039 0 0 for (unsigned h = 0; h < hidden_layer.size(); h++)
1040 0 0 for (unsigned i = 0; i < output_layer.size(); i++)
1046 126 14 for (unsigned i = 0; i < output_layer.size(); sum += output_layer[i], i++)
1049 126 14 for (unsigned i = 0; i < output_layer.size(); i++)
1055 0 0 for (unsigned i = 1; i < output_layer.size(); i++)
0 0 for (unsigned i = 1; i < output_layer.size(); i++)
1056 0 0 if (output_layer[i] > output_layer[best])
0 0 if (output_layer[i] > output_layer[best])
1064 0 0 for (unsigned i = 0; i < output_error.size(); i++)
1065 0 0 output_error[i] = (i == instance.outcome) - output_layer[i];
1068 0 0 for (auto&& feature : instance.features)
1069 0 0 for (unsigned i = 0; i < indices[feature].size(); i++)
1073 0 0 if (!hidden_layer.empty()) {
1075 0 0 for (unsigned h = 0; h < hidden_layer.size(); h++) {
1077 0 0 for (unsigned i = 0; i < output_layer.size(); i++)
1083 0 0 for (unsigned h = 0; h < hidden_layer.size(); h++)
1084 0 0 for (unsigned i = 0; i < output_layer.size(); i++)
1088 0 0 for (auto&& feature : instance.features)
1089 0 0 for (unsigned i = 0; i < hidden_layer.size(); i++)
1218 0 0 if (str == "trivial") return id = TRIVIAL, true;
1219 0 0 if (str == "external") return id = EXTERNAL, true;
1220 0 0 if (str == "morphodita") return id = MORPHODITA, true;
1310 4 95 if (it == map.end() && total_features) {
4 0 if (it == map.end() && total_features) {
0 99 if (it == map.end() && total_features) {
1314 95 4 return it != map.end() ? it->second : ner_feature_unknown;
1342 0 0 if (window < 0) return false;
1343 0 0 if (!total_features) return false;
1348 0 0 lookup(string(), total_features); // Always add an empty string to the map
1358 35 8 for (unsigned i = data.next_4B(); i > 0; i--) {
1360 35 0 data.next_str(key);
1361 35 0 map.emplace(key, data.next_4B());
1373 0 0 for (auto&& element : map_elements) {
1374 0 0 enc.add_str(element.first);
1447 0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
9 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
4 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
16 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
3 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
129 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
4 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
4 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
4 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
4 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
14 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
38 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
54 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
0 0 return chr < CHARS ? 1 << category_block[category_index[chr >> 8]][chr & 0xFF] : DEFAULT_CAT;
1451 4 0 if (chr < CHARS) {
1453 4 0 if ((othercase & 0xFF) == othercase_type::LOWER_ONLY) return othercase >> 8;
1454 0 0 if ((othercase & 0xFF) == othercase_type::LOWER_THEN_UPPER) return othercase >> 8;
1455 0 0 if ((othercase & 0xFF) == othercase_type::TITLE_THEN_LOWER) return othercase_block[othercase_index[(othercase >> 8) >> 8]][(othercase >> 8) & 0xFF] >> 8;
1461 0 0 if (chr < CHARS) {
1463 0 0 if ((othercase & 0xFF) == othercase_type::UPPERTITLE_ONLY) return othercase >> 8;
1464 0 0 if ((othercase & 0xFF) == othercase_type::UPPER_ONLY) return othercase >> 8;
1465 0 0 if ((othercase & 0xFF) == othercase_type::UPPER_THEN_TITLE) return othercase >> 8;
1466 0 0 if ((othercase & 0xFF) == othercase_type::LOWER_THEN_UPPER) return othercase_block[othercase_index[(othercase >> 8) >> 8]][(othercase >> 8) & 0xFF] >> 8;
1560 58 10 if (((unsigned char)*str) < 0x80) return (unsigned char)*str++;
1561 0 10 else if (((unsigned char)*str) < 0xC0) return ++str, REPLACEMENT_CHAR;
1562 10 0 else if (((unsigned char)*str) < 0xE0) {
1564 10 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
10 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1566 0 0 } else if (((unsigned char)*str) < 0xF0) {
1568 0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1570 0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1572 0 0 } else if (((unsigned char)*str) < 0xF8) {
1574 0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1576 0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1578 0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1584 218 0 if (!len) return 0;
1586 195 23 if (((unsigned char)*str) < 0x80) return (unsigned char)*str++;
1587 0 23 else if (((unsigned char)*str) < 0xC0) return ++str, REPLACEMENT_CHAR;
1588 23 0 else if (((unsigned char)*str) < 0xE0) {
1590 23 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
23 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
23 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1592 0 0 } else if (((unsigned char)*str) < 0xF0) {
1594 0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1596 0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1598 0 0 } else if (((unsigned char)*str) < 0xF8) {
1600 0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1602 0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1604 0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
0 0 if (len <= 0 || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return REPLACEMENT_CHAR;
1629 0 0 iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; }
0 0 iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; }
0 0 iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; }
0 0 iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; }
0 0 iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; }
0 0 iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; }
0 0 iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; }
0 0 iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; }
0 0 iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; }
0 0 iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; }
0 0 iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; }
0 0 iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; }
0 0 iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; }
0 0 iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; }
0 0 iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; }
0 0 iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; }
0 0 iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; }
0 0 iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; }
0 0 iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; }
0 0 iterator& operator++() { if (next) { codepoint = decode(next); if (!codepoint) next = nullptr; } return *this; }
1695 4 0 if (chr < 0x80) str += chr;
1696 0 0 else if (chr < 0x800) { str += 0xC0 + (chr >> 6); str += 0x80 + (chr & 0x3F); }
1697 0 0 else if (chr < 0x10000) { str += 0xE0 + (chr >> 12); str += 0x80 + ((chr >> 6) & 0x3F); str += 0x80 + (chr & 0x3F); }
1698 0 0 else if (chr < 0x200000) { str += 0xF0 + (chr >> 18); str += 0x80 + ((chr >> 12) & 0x3F); str += 0x80 + ((chr >> 6) & 0x3F); str += 0x80 + (chr & 0x3F); }
1705 0 0 for (char32_t chr; (chr = decode(str)); )
1712 0 0 while (len)
1758 0 0 while (str.len && (str.str[0] == ' ' || str.str[0] == '\f' || str.str[0] == '\n' || str.str[0] == '\r' || str.str[0] == '\t' || str.str[0] == '\v'))
0 0 while (str.len && (str.str[0] == ' ' || str.str[0] == '\f' || str.str[0] == '\n' || str.str[0] == '\r' || str.str[0] == '\t' || str.str[0] == '\v'))
0 0 while (str.len && (str.str[0] == ' ' || str.str[0] == '\f' || str.str[0] == '\n' || str.str[0] == '\r' || str.str[0] == '\t' || str.str[0] == '\v'))
0 0 while (str.len && (str.str[0] == ' ' || str.str[0] == '\f' || str.str[0] == '\n' || str.str[0] == '\r' || str.str[0] == '\t' || str.str[0] == '\v'))
0 0 while (str.len && (str.str[0] == ' ' || str.str[0] == '\f' || str.str[0] == '\n' || str.str[0] == '\r' || str.str[0] == '\t' || str.str[0] == '\v'))
0 0 while (str.len && (str.str[0] == ' ' || str.str[0] == '\f' || str.str[0] == '\n' || str.str[0] == '\r' || str.str[0] == '\t' || str.str[0] == '\v'))
1763 0 0 if (str.len && (str.str[0] == '+' || str.str[0] == '-')) {
0 0 if (str.len && (str.str[0] == '+' || str.str[0] == '-')) {
1769 0 0 if (!str.len) return error.assign("Cannot parse ").append(value_name).append(" int value '").append(original.str, original.len).append("': empty string."), false;
1773 0 0 while (str.len && str.str[0] >= '0' && str.str[0] <= '9') {
0 0 while (str.len && str.str[0] >= '0' && str.str[0] <= '9') {
0 0 while (str.len && str.str[0] >= '0' && str.str[0] <= '9') {
1774 0 0 if (positive) {
1775 0 0 if (value > (numeric_limits::max() - (str.str[0] - '0')) / 10)
1779 0 0 if (value < (numeric_limits::min() + (str.str[0] - '0')) / 10)
1787 0 0 while (str.len && (str.str[0] == ' ' || str.str[0] == '\f' || str.str[0] == '\n' || str.str[0] == '\r' || str.str[0] == '\t' || str.str[0] == '\v'))
0 0 while (str.len && (str.str[0] == ' ' || str.str[0] == '\f' || str.str[0] == '\n' || str.str[0] == '\r' || str.str[0] == '\t' || str.str[0] == '\v'))
0 0 while (str.len && (str.str[0] == ' ' || str.str[0] == '\f' || str.str[0] == '\n' || str.str[0] == '\r' || str.str[0] == '\t' || str.str[0] == '\v'))
0 0 while (str.len && (str.str[0] == ' ' || str.str[0] == '\f' || str.str[0] == '\n' || str.str[0] == '\r' || str.str[0] == '\t' || str.str[0] == '\v'))
0 0 while (str.len && (str.str[0] == ' ' || str.str[0] == '\f' || str.str[0] == '\n' || str.str[0] == '\r' || str.str[0] == '\t' || str.str[0] == '\v'))
0 0 while (str.len && (str.str[0] == ' ' || str.str[0] == '\f' || str.str[0] == '\n' || str.str[0] == '\r' || str.str[0] == '\t' || str.str[0] == '\v'))
1791 0 0 if (str.len) return error.assign("Cannot parse ").append(value_name).append(" int value '").append(original.str, original.len).append("': non-digit character found."), false;
1800 0 0 if (!parse_int(str, value_name, result, error))
0 0 if (!parse_int(str, value_name, result, error))
1935 0 0 if (text.empty()) return;
1938 0 0 for (string::size_type next; (next = text.find(sep, index)) != string::npos; index = next + 1)
2030 0 0 if (!feature_processor::parse(window, args, entities, total_features, pipeline)) return false;
2031 0 0 if (args.size() < 1) return cerr << "BrownCluster requires a cluster file as the first argument!" << endl, false;
2034 0 0 if (!in.is_open()) return cerr << "Cannot open Brown clusters file '" << args[0] << "'!" << endl, false;
2037 0 0 substrings.emplace_back(string::npos);
2038 0 0 for (unsigned i = 1; i < args.size(); i++) {
2039 0 0 int len = parse_int(args[i].c_str(), "BrownCluster_prefix_length");
2040 0 0 if (len <= 0)
2041 0 0 return cerr << "Wrong prefix length '" << len << "' in BrownCluster specification!" << endl, false;
2043 0 0 substrings.emplace_back(len);
2051 0 0 while (getline(in, line)) {
0 0 while (getline(in, line)) {
2052 0 0 split(line, '\t', tokens);
2053 0 0 if (tokens.size() != 2) return cerr << "Wrong line '" << line << "' in Brown cluster file '" << args[0] << "'!" << endl, false;
2057 0 0 if (it == cluster_map.end()) {
2059 0 0 clusters.emplace_back();
2060 0 0 for (auto&& substring : substrings)
2061 0 0 if (substring == string::npos || substring < cluster.size())
0 0 if (substring == string::npos || substring < cluster.size())
0 0 if (substring == string::npos || substring < cluster.size())
2062 0 0 clusters.back().emplace_back(prefixes_map.emplace(cluster.substr(0, substring), *total_features + (2*window + 1) * (int)prefixes_map.size() + window).first->second);
0 0 clusters.back().emplace_back(prefixes_map.emplace(cluster.substr(0, substring), *total_features + (2*window + 1) * (int)prefixes_map.size() + window).first->second);
2065 0 0 if (!map.emplace(form, it->second).second) return cerr << "Form '" << form << "' is present twice in Brown cluster file '" << args[0] << "'!" << endl, false;
2076 0 0 for (auto&& cluster : clusters) {
2078 0 0 for (auto&& feature : cluster)
2087 0 0 for (auto&& cluster : clusters) {
2089 0 0 for (auto&& feature : cluster)
2095 0 0 for (unsigned i = 0; i < sentence.size; i++) {
2097 0 0 if (it != map.end()) {
2099 0 0 for (auto&& feature : cluster)
2100 0 0 apply_in_window(i, feature);
0 0 apply_in_window(i, feature);
2113 0 0 if (window) return cerr << "CzechAddContainers cannot have non-zero window!" << endl, false;
2121 0 0 for (unsigned i = 0; i < entities.size(); i++) {
2123 0 0 if (entities[i].type.compare("pf") == 0 && (!i || entities[i-1].start + entities[i-1].length < entities[i].start || entities[i-1].type.compare("pf") != 0)) {
0 0 if (entities[i].type.compare("pf") == 0 && (!i || entities[i-1].start + entities[i-1].length < entities[i].start || entities[i-1].type.compare("pf") != 0)) {
0 0 if (entities[i].type.compare("pf") == 0 && (!i || entities[i-1].start + entities[i-1].length < entities[i].start || entities[i-1].type.compare("pf") != 0)) {
0 0 if (entities[i].type.compare("pf") == 0 && (!i || entities[i-1].start + entities[i-1].length < entities[i].start || entities[i-1].type.compare("pf") != 0)) {
0 0 if (entities[i].type.compare("pf") == 0 && (!i || entities[i-1].start + entities[i-1].length < entities[i].start || entities[i-1].type.compare("pf") != 0)) {
2125 0 0 while (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("pf") == 0) j++;
0 0 while (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("pf") == 0) j++;
0 0 while (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("pf") == 0) j++;
0 0 while (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("pf") == 0) j++;
2126 0 0 if (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("ps") == 0) {
0 0 if (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("ps") == 0) {
0 0 if (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("ps") == 0) {
0 0 if (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("ps") == 0) {
2128 0 0 while (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("ps") == 0) j++;
0 0 while (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("ps") == 0) j++;
0 0 while (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("ps") == 0) j++;
0 0 while (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("ps") == 0) j++;
2134 0 0 if (entities[i].type.compare("td") == 0 && i+1 < entities.size() && entities[i+1].start == entities[i].start + entities[i].length && entities[i+1].type.compare("tm") == 0) {
0 0 if (entities[i].type.compare("td") == 0 && i+1 < entities.size() && entities[i+1].start == entities[i].start + entities[i].length && entities[i+1].type.compare("tm") == 0) {
0 0 if (entities[i].type.compare("td") == 0 && i+1 < entities.size() && entities[i+1].start == entities[i].start + entities[i].length && entities[i+1].type.compare("tm") == 0) {
0 0 if (entities[i].type.compare("td") == 0 && i+1 < entities.size() && entities[i+1].start == entities[i].start + entities[i].length && entities[i+1].type.compare("tm") == 0) {
0 0 if (entities[i].type.compare("td") == 0 && i+1 < entities.size() && entities[i+1].start == entities[i].start + entities[i].length && entities[i+1].type.compare("tm") == 0) {
2136 0 0 if (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("ty") == 0) j++;
0 0 if (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("ty") == 0) j++;
0 0 if (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("ty") == 0) j++;
0 0 if (j < entities.size() && entities[j].start == entities[j-1].start + entities[j-1].length && entities[j].type.compare("ty") == 0) j++;
2140 0 0 if (entities[i].type.compare("tm") == 0 && (!i || entities[i-1].start + entities[i-1].length < entities[i].start || entities[i-1].type.compare("td") != 0))
0 0 if (entities[i].type.compare("tm") == 0 && (!i || entities[i-1].start + entities[i-1].length < entities[i].start || entities[i-1].type.compare("td") != 0))
0 0 if (entities[i].type.compare("tm") == 0 && (!i || entities[i-1].start + entities[i-1].length < entities[i].start || entities[i-1].type.compare("td") != 0))
0 0 if (entities[i].type.compare("tm") == 0 && (!i || entities[i-1].start + entities[i-1].length < entities[i].start || entities[i-1].type.compare("td") != 0))
0 0 if (entities[i].type.compare("tm") == 0 && (!i || entities[i-1].start + entities[i-1].length < entities[i].start || entities[i-1].type.compare("td") != 0))
2141 0 0 if (i+1 < entities.size() && entities[i+1].start == entities[i].start + entities[i].length && entities[i+1].type.compare("ty") == 0)
0 0 if (i+1 < entities.size() && entities[i+1].start == entities[i].start + entities[i].length && entities[i+1].type.compare("ty") == 0)
0 0 if (i+1 < entities.size() && entities[i+1].start == entities[i].start + entities[i].length && entities[i+1].type.compare("ty") == 0)
0 0 if (i+1 < entities.size() && entities[i+1].start == entities[i].start + entities[i].length && entities[i+1].type.compare("ty") == 0)
2147 0 0 if (buffer.size() > entities.size()) entities = buffer;
2159 0 0 for (unsigned i = 0; i < sentence.size; i++) {
2160 0 0 for (unsigned pos = 0; pos + 2 < sentence.words[i].lemma_comments.size(); pos++)
2161 0 0 if (sentence.words[i].lemma_comments[pos] == '_' && sentence.words[i].lemma_comments[pos+1] == ';') {
0 0 if (sentence.words[i].lemma_comments[pos] == '_' && sentence.words[i].lemma_comments[pos+1] == ';') {
0 0 if (sentence.words[i].lemma_comments[pos] == '_' && sentence.words[i].lemma_comments[pos+1] == ';') {
2163 0 0 apply_in_window(i, lookup(buffer, total_features));
0 0 apply_in_window(i, lookup(buffer, total_features));
2173 14 4 for (unsigned i = 0; i < sentence.size; i++)
2174 12 2 apply_in_window(i, lookup(sentence.words[i].form, total_features));
40 12 apply_in_window(i, lookup(sentence.words[i].form, total_features));
2176 4 0 apply_outer_words_in_window(lookup_empty());
8 4 apply_outer_words_in_window(lookup_empty());
8 0 apply_outer_words_in_window(lookup_empty());
12 8 apply_outer_words_in_window(lookup_empty());
8 0 apply_outer_words_in_window(lookup_empty());
12 8 apply_outer_words_in_window(lookup_empty());
2190 0 0 for (unsigned i = 0; i < sentence.size; i++) {
2195 0 0 for (bool first = true; (chr = utf8::decode(form)); first = false) {
2197 0 0 was_upper = was_upper || category & unicode::Lut;
0 0 was_upper = was_upper || category & unicode::Lut;
2198 0 0 was_lower = was_lower || category & unicode::Ll;
0 0 was_lower = was_lower || category & unicode::Ll;
2200 0 0 if (first && was_upper) apply_in_window(i, fst_cap);
0 0 if (first && was_upper) apply_in_window(i, fst_cap);
0 0 if (first && was_upper) apply_in_window(i, fst_cap);
2202 0 0 if (was_upper && !was_lower) apply_in_window(i, all_cap);
0 0 if (was_upper && !was_lower) apply_in_window(i, all_cap);
0 0 if (was_upper && !was_lower) apply_in_window(i, all_cap);
2203 0 0 if (was_upper && was_lower) apply_in_window(i, mixed_cap);
0 0 if (was_upper && was_lower) apply_in_window(i, mixed_cap);
0 0 if (was_upper && was_lower) apply_in_window(i, mixed_cap);
2214 0 0 for (unsigned i = 0; i < sentence.size; i++) {
2216 0 0 for (auto&& chr : utf8::decoder(sentence.words[i].form))
2217 0 0 utf8::append(buffer, buffer.empty() ? chr : unicode::lowercase(chr));
2218 0 0 apply_in_window(i, lookup(buffer, total_features));
0 0 apply_in_window(i, lookup(buffer, total_features));
2221 0 0 apply_outer_words_in_window(lookup_empty());
0 0 apply_outer_words_in_window(lookup_empty());
0 0 apply_outer_words_in_window(lookup_empty());
0 0 apply_outer_words_in_window(lookup_empty());
0 0 apply_outer_words_in_window(lookup_empty());
0 0 apply_outer_words_in_window(lookup_empty());
2234 0 0 if (!feature_processor::parse(window, args, entities, total_features, pipeline)) return false;
2237 0 0 for (auto&& arg : args) {
2238 0 0 ifstream in(path_from_utf8(arg).c_str());
2239 0 0 if (!in.is_open()) return cerr << "Cannot open gazetteers file '" << arg << "'!" << endl, false;
2245 0 0 while (getline(in, line)) {
0 0 while (getline(in, line)) {
2246 0 0 split(line, ' ', tokens);
2247 0 0 for (unsigned i = 0; i < tokens.size(); i++)
2248 0 0 if (!tokens[i][0])
2250 0 0 if (tokens.size() > longest) longest = tokens.size();
2253 0 0 for (unsigned i = 0; i < tokens.size(); i++) {
2254 0 0 if (i) gazetteer += ' ';
2257 0 0 if (it->second == gazetteers_info.size()) gazetteers_info.emplace_back();
0 0 if (it->second == gazetteers_info.size()) gazetteers_info.emplace_back();
2259 0 0 if (i + 1 < tokens.size())
2262 0 0 if (find(info.features.begin(), info.features.end(), *total_features + window) == info.features.end())
2263 0 0 info.features.emplace_back(*total_features + window);
2266 0 0 *total_features += (2*window + 1) * (longest == 0 ? 0 : longest == 1 ? U+1 : longest == 2 ? L+1 : I+1);
0 0 *total_features += (2*window + 1) * (longest == 0 ? 0 : longest == 1 ? U+1 : longest == 2 ? L+1 : I+1);
0 0 *total_features += (2*window + 1) * (longest == 0 ? 0 : longest == 1 ? U+1 : longest == 2 ? L+1 : I+1);
2276 0 0 for (auto&& gazetteer : gazetteers_info) {
2279 0 0 for (auto&& feature : gazetteer.features)
2288 0 0 for (auto&& gazetteer : gazetteers_info) {
2291 0 0 for (auto&& feature : gazetteer.features)
2297 0 0 for (unsigned i = 0; i < sentence.size; i++) {
2299 0 0 if (it == map.end()) continue;
2302 0 0 for (auto&& feature : gazetteers_info[it->second].features) {
2303 0 0 apply_in_window(i, feature + G * (2*window + 1));
0 0 apply_in_window(i, feature + G * (2*window + 1));
2304 0 0 apply_in_window(i, feature + U * (2*window + 1));
0 0 apply_in_window(i, feature + U * (2*window + 1));
2307 0 0 for (unsigned j = i + 1; gazetteers_info[it->second].prefix_of_longer && j < sentence.size; j++) {
0 0 for (unsigned j = i + 1; gazetteers_info[it->second].prefix_of_longer && j < sentence.size; j++) {
0 0 for (unsigned j = i + 1; gazetteers_info[it->second].prefix_of_longer && j < sentence.size; j++) {
2308 0 0 if (j == i + 1) buffer.assign(sentence.words[i].raw_lemma);
2312 0 0 if (it == map.end()) break;
2315 0 0 for (auto&& feature : gazetteers_info[it->second].features)
2316 0 0 for (unsigned g = i; g <= j; g++) {
2317 0 0 apply_in_window(g, feature + G * (2*window + 1));
0 0 apply_in_window(g, feature + G * (2*window + 1));
2318 0 0 apply_in_window(g, feature + (g == i ? B : g == j ? L : I) * (2*window + 1));
0 0 apply_in_window(g, feature + (g == i ? B : g == j ? L : I) * (2*window + 1));
0 0 apply_in_window(g, feature + (g == i ? B : g == j ? L : I) * (2*window + 1));
0 0 apply_in_window(g, feature + (g == i ? B : g == j ? L : I) * (2*window + 1));
2339 0 0 if (!feature_processor::parse(window, args, entities, total_features, pipeline)) return false;
2344 0 0 if (args.size() < 4) return cerr << "Not enough parameters to GazetteersEnhanced!" << endl, false;
2345 0 0 if (args.size() & 1) return cerr << "Odd number of parameters to GazetteersEnhanced!" << endl, false;
2347 0 0 if (args[0] == "form") match = MATCH_FORM;
2348 0 0 else if (args[0] == "rawlemma") match = MATCH_RAWLEMMA;
2349 0 0 else if (args[0] == "rawlemmas") match = MATCH_RAWLEMMAS;
2352 0 0 if (args[1] == "embed_in_model") embed = EMBED_IN_MODEL;
2353 0 0 else if (args[1] == "out_of_model") embed = OUT_OF_MODEL;
2356 0 0 for (unsigned i = 2; i < args.size(); i += 2) {
2360 0 0 gazetteer_metas.back().entity = args[i + 1] == "NONE" ? -1 : entities.parse(args[i + 1].c_str(), true);
2364 0 0 for (entity_type i = 0; i < entities.size(); i++)
2367 0 0 if (!load_gazetteer_lists(pipeline, embed == EMBED_IN_MODEL)) return false;
2379 0 0 for (auto&& gazetteer_meta : gazetteer_metas) {
2386 0 0 for (auto&& gazetteer_list : gazetteer_lists) {
2388 0 0 for (auto&& gazetteer : gazetteer_list.gazetteers)
2396 0 0 for (auto&& entity : entity_list)
2408 0 0 for (auto&& gazetteer_meta : gazetteer_metas) {
2414 0 0 if (embed == EMBED_IN_MODEL) {
2416 0 0 for (auto&& gazetteer_list : gazetteer_lists) {
2418 0 0 for (auto&& gazetteer : gazetteer_list.gazetteers)
2429 0 0 for (auto&& entity : entity_list)
2435 0 0 vector> features(sentence.size);
2437 0 0 vector> recased_match_sources(sentence.size);
2438 0 0 for (unsigned i = 0; i < sentence.size; i++)
2439 0 0 recase_match_source(sentence.words[i], RECASE_ANY, recased_match_sources[i]);
2441 0 0 for (unsigned i = 0; i < sentence.size; i++) {
2445 0 0 for (unsigned j = i; j < sentence.size && !nodes.empty(); j++) {
0 0 for (unsigned j = i; j < sentence.size && !nodes.empty(); j++) {
0 0 for (unsigned j = i; j < sentence.size && !nodes.empty(); j++) {
2447 0 0 for (auto&& node : nodes)
2448 0 0 if (!gazetteers_trie[node].children.empty())
2449 0 0 for (auto&& match_source : recased_match_sources[j]) {
2451 0 0 for (auto&& it = range.first; it != range.second; it++)
2452 0 0 append_unless_exists(new_nodes, it->second);
2455 0 0 hard_pre_possible = hard_pre_possible && !sentence.probabilities[j].local_filled;
0 0 hard_pre_possible = hard_pre_possible && !sentence.probabilities[j].local_filled;
2456 0 0 if (hard_pre_possible)
2457 0 0 for (auto&& node : new_nodes)
2458 0 0 if (gazetteers_trie[node].mode == HARD_PRE &&
0 0 if (gazetteers_trie[node].mode == HARD_PRE &&
0 0 if (gazetteers_trie[node].mode == HARD_PRE &&
2459 0 0 ((j - i + 1) > hard_pre_length || node < hard_pre_node))
2463 0 0 for (auto&& node : new_nodes)
2464 0 0 for (auto&& feature : gazetteers_trie[node].features)
2465 0 0 for (unsigned k = i; k <= j; k++) {
2466 0 0 bilou_type type = j == i ? bilou_type_U : k == i ? bilou_type_B : k == j ? bilou_type_L : bilou_type_I;
0 0 bilou_type type = j == i ? bilou_type_U : k == i ? bilou_type_B : k == j ? bilou_type_L : bilou_type_I;
0 0 bilou_type type = j == i ? bilou_type_U : k == i ? bilou_type_B : k == j ? bilou_type_L : bilou_type_I;
2467 0 0 append_unless_exists(features[k], feature + G * (2 * window + 1));
2468 0 0 append_unless_exists(features[k], feature + type * (2 * window + 1));
2474 0 0 if (hard_pre_length)
2475 0 0 for (unsigned j = i; j < i + hard_pre_length; j++) {
2476 0 0 for (auto&& bilou : sentence.probabilities[j].local.bilou) {
2481 0 0 j == i ? bilou_type_B : j + 1 == i + hard_pre_length ? bilou_type_L : bilou_type_I;
0 0 j == i ? bilou_type_B : j + 1 == i + hard_pre_length ? bilou_type_L : bilou_type_I;
0 0 j == i ? bilou_type_B : j + 1 == i + hard_pre_length ? bilou_type_L : bilou_type_I;
2489 0 0 for (unsigned i = 0; i < sentence.size; i++)
2490 0 0 for (auto&& feature : features[i])
2491 0 0 apply_in_window(i, feature);
0 0 apply_in_window(i, feature);
0 0 apply_in_window(i, feature);
2497 0 0 vector> recased_match_sources(sentence.size);
2498 0 0 for (unsigned i = 0; i < sentence.size; i++)
2499 0 0 recase_match_source(sentence.words[i], RECASE_ANY, recased_match_sources[i]);
2503 0 0 for (unsigned i = 0, e = 0; i < sentence.size; i++) {
2504 0 0 while (e < entities.size() && entities[e].start == i) {
0 0 while (e < entities.size() && entities[e].start == i) {
0 0 while (e < entities.size() && entities[e].start == i) {
2505 0 0 if (i + entities[e].length > entity_until)
2507 0 0 buffer.push_back(entities[e++]);
2510 0 0 if (entity_until <= i) {
2512 0 0 unsigned free_until = e < entities.size() ? entities[e].start : sentence.size;
2516 0 0 for (unsigned j = i; j < free_until && !nodes.empty(); j++) {
0 0 for (unsigned j = i; j < free_until && !nodes.empty(); j++) {
0 0 for (unsigned j = i; j < free_until && !nodes.empty(); j++) {
2518 0 0 for (auto&& node : nodes)
2519 0 0 if (!gazetteers_trie[node].children.empty())
2520 0 0 for (auto&& match_source : recased_match_sources[j]) {
2522 0 0 for (auto&& it = range.first; it != range.second; it++)
2523 0 0 append_unless_exists(new_nodes, it->second);
2526 0 0 for (auto&& node : new_nodes)
2527 0 0 if (gazetteers_trie[node].mode == HARD_POST &&
0 0 if (gazetteers_trie[node].mode == HARD_POST &&
0 0 if (gazetteers_trie[node].mode == HARD_POST &&
2528 0 0 ((j - i + 1) > hard_post_length || node < hard_post_node))
2534 0 0 if (hard_post_length) {
2535 0 0 buffer.emplace_back(i, hard_post_length, entity_list[gazetteers_trie[hard_post_node].entity]);
2541 0 0 if (buffer.size() != entities.size())
2546 0 0 for (auto&& gazetteer_list : gazetteer_lists)
2547 0 0 for (auto&& gazetteer : gazetteer_list.gazetteers) {
2548 0 0 gazetteers.push_back(gazetteer);
2549 0 0 if (gazetteer_types) gazetteer_types->push_back(gazetteer_list.entity);
0 0 if (gazetteer_types) gazetteer_types->push_back(gazetteer_list.entity);
2590 0 0 for (i = array.size(); i; i--)
2591 0 0 if (array[i - 1] == value)
2594 0 0 if (!i)
2602 0 0 for (auto&& gazetteer_meta : gazetteer_metas)
2603 0 0 for (int mode = 0; mode < MODES_TOTAL; mode++) {
2606 0 0 ifstream file(path_from_utf8(file_name).c_str());
2607 0 0 if (!file.is_open()) {
2608 0 0 if (mode == SOFT && files_must_exist)
2613 0 0 gazetteer_lists.emplace_back();
2618 0 0 while (getline(file, line))
0 0 while (getline(file, line))
2619 0 0 if (!line.empty() && line[0] != '#')
0 0 if (!line.empty() && line[0] != '#')
0 0 if (!line.empty() && line[0] != '#')
2620 0 0 gazetteer_lists.back().gazetteers.push_back(line);
2625 0 0 vector gazetteer_tokens, gazetteer_tokens_additional, gazetteer_token(1);
2630 0 0 gazetteers_trie.emplace_back();
2631 0 0 for (auto&& gazetteer_list : gazetteer_lists)
2632 0 0 for (auto&& gazetteer : gazetteer_list.gazetteers) {
2633 0 0 pipeline.tokenizer->set_text(gazetteer);
2634 0 0 if (!pipeline.tokenizer->next_sentence(&gazetteer_tokens, nullptr)) continue;
0 0 if (!pipeline.tokenizer->next_sentence(&gazetteer_tokens, nullptr)) continue;
2635 0 0 while (pipeline.tokenizer->next_sentence(&gazetteer_tokens_additional, nullptr))
0 0 while (pipeline.tokenizer->next_sentence(&gazetteer_tokens_additional, nullptr))
2636 0 0 gazetteer_tokens.insert(gazetteer_tokens.end(), gazetteer_tokens_additional.begin(), gazetteer_tokens_additional.end());
2640 0 0 for (unsigned token = 0; token < gazetteer_tokens.size(); token++) {
2641 0 0 if (token) prefix.push_back('\t');
0 0 if (token) prefix.push_back('\t');
2642 0 0 prefix.append(gazetteer_tokens[token].str, gazetteer_tokens[token].len);
2644 0 0 if (prefix_it == gazetteer_prefixes.end()) {
2646 0 0 gazetteers_trie.emplace_back();
2650 0 0 pipeline.tagger->tag(gazetteer_token, gazetteer_token_tagged);
2651 0 0 recase_match_source(gazetteer_token_tagged.words[0], RECASE_NATIVE, gazetteer_recased_match_sources);
2652 0 0 for (auto&& match_source : gazetteer_recased_match_sources)
2661 0 0 append_unless_exists(gazetteers_trie[node].features, gazetteer_list.feature);
2662 0 0 if ((gazetteer_list.mode == HARD_PRE && gazetteers_trie[node].mode != HARD_PRE) ||
0 0 if ((gazetteer_list.mode == HARD_PRE && gazetteers_trie[node].mode != HARD_PRE) ||
0 0 if ((gazetteer_list.mode == HARD_PRE && gazetteers_trie[node].mode != HARD_PRE) ||
0 0 if ((gazetteer_list.mode == HARD_PRE && gazetteers_trie[node].mode != HARD_PRE) ||
2663 0 0 (gazetteer_list.mode == HARD_POST && gazetteers_trie[node].mode == SOFT)) {
2678 0 0 if (mode == TO_UPPER)
2680 0 0 else if (mode == TO_LOWER)
2682 0 0 else if (mode == TO_TITLE)
2683 0 0 for (auto&& chr : utf8::decoder(text))
2684 0 0 utf8::append(recased.back(), recased.back().empty() ? unicode::uppercase(chr) : unicode::lowercase(chr));
2692 0 0 for (auto&& chr : utf8::decoder(word.form)) {
2693 0 0 any_lower = any_lower || (unicode::category(chr) & unicode::Ll);
0 0 any_lower = any_lower || (unicode::category(chr) & unicode::Ll);
2694 0 0 if (first) first_uc = unicode::category(chr) & unicode::Lut;
2700 0 0 for (int perform = 0; perform < TO_TOTAL; perform++) {
2701 0 0 if (mode == RECASE_NATIVE) {
2702 0 0 if (perform == TO_UPPER && !(first_uc && !any_lower)) continue;
0 0 if (perform == TO_UPPER && !(first_uc && !any_lower)) continue;
2703 0 0 if (perform == TO_TITLE && !(first_uc && any_lower)) continue;
0 0 if (perform == TO_TITLE && !(first_uc && any_lower)) continue;
2704 0 0 if (perform == TO_LOWER && first_uc) continue;
2706 0 0 if (mode == RECASE_ANY) {
2707 0 0 if (perform == TO_UPPER && !(first_uc && !any_lower)) continue;
0 0 if (perform == TO_UPPER && !(first_uc && !any_lower)) continue;
2708 0 0 if (perform == TO_TITLE && !first_uc) continue;
2711 0 0 if (match == MATCH_FORM)
2713 0 0 else if (match == MATCH_RAWLEMMA)
2715 0 0 else if (match == MATCH_RAWLEMMAS)
2716 0 0 for (auto&& raw_lemma : word.raw_lemmas_all)
2721 0 3 const vector gazetteers_enhanced::basename_suffixes = {".txt", ".hard_pre.txt", ".hard_post.txt"};
9 3 const vector gazetteers_enhanced::basename_suffixes = {".txt", ".hard_pre.txt", ".hard_post.txt"};
0 0 const vector gazetteers_enhanced::basename_suffixes = {".txt", ".hard_pre.txt", ".hard_post.txt"};
2727 14 4 for (unsigned i = 0; i < sentence.size; i++)
2728 14 0 apply_in_window(i, lookup(sentence.words[i].lemma_id, total_features));
46 14 apply_in_window(i, lookup(sentence.words[i].lemma_id, total_features));
2730 4 0 apply_outer_words_in_window(lookup_empty());
8 4 apply_outer_words_in_window(lookup_empty());
8 0 apply_outer_words_in_window(lookup_empty());
12 8 apply_outer_words_in_window(lookup_empty());
8 0 apply_outer_words_in_window(lookup_empty());
12 8 apply_outer_words_in_window(lookup_empty());
2745 14 4 for (unsigned i = 0; i < sentence.size; i++) {
2750 14 0 for (digit = false, num = 0; *form; form++) {
2751 0 14 if (*form < '0' || *form > '9') break;
2755 0 14 if (digit && !*form) {
0 0 if (digit && !*form) {
2757 0 0 if (num < 24) apply_in_window(i, hour);
0 0 if (num < 24) apply_in_window(i, hour);
0 0 if (num < 24) apply_in_window(i, hour);
2758 0 0 if (num < 60) apply_in_window(i, minute);
0 0 if (num < 60) apply_in_window(i, minute);
0 0 if (num < 60) apply_in_window(i, minute);
2759 0 0 if (num >= 1 && num <= 31) apply_in_window(i, day);
0 0 if (num >= 1 && num <= 31) apply_in_window(i, day);
0 0 if (num >= 1 && num <= 31) apply_in_window(i, day);
2760 0 0 if (num >= 1 && num <= 12) apply_in_window(i, month);
0 0 if (num >= 1 && num <= 12) apply_in_window(i, month);
0 0 if (num >= 1 && num <= 12) apply_in_window(i, month);
2761 0 0 if (num >= 1000 && num <= 2200) apply_in_window(i, year);;
0 0 if (num >= 1000 && num <= 2200) apply_in_window(i, year);;
0 0 if (num >= 1000 && num <= 2200) apply_in_window(i, year);;
2763 0 14 if (digit && num < 24 && (*form == '.' || *form == ':')) {
0 0 if (digit && num < 24 && (*form == '.' || *form == ':')) {
2765 0 0 for (digit = false, num = 0, form++; *form; form++) {
2766 0 0 if (*form < '0' || *form > '9') break;
2770 0 0 if (digit && !*form && num < 60) apply_in_window(i, time);
0 0 if (digit && !*form && num < 60) apply_in_window(i, time);
0 0 if (digit && !*form && num < 60) apply_in_window(i, time);
0 0 if (digit && !*form && num < 60) apply_in_window(i, time);
0 0 if (digit && !*form && num < 60) apply_in_window(i, time);
2780 14 4 for (unsigned i = 0; i < sentence.size; i++)
2781 7 7 if (sentence.previous_stage[i].bilou != bilou_type_unknown) {
2786 7 0 apply_in_range(i, lookup(buffer, total_features), 1, window);
9 7 apply_in_range(i, lookup(buffer, total_features), 1, window);
2792 4 10 if (value < 0) {
2796 11 14 for (; value; value >>= 4)
2805 14 4 for (unsigned i = 0; i < sentence.size; i++)
2806 14 0 apply_in_window(i, lookup(sentence.words[i].raw_lemma, total_features));
46 14 apply_in_window(i, lookup(sentence.words[i].raw_lemma, total_features));
2808 4 0 apply_outer_words_in_window(lookup_empty());
8 4 apply_outer_words_in_window(lookup_empty());
8 0 apply_outer_words_in_window(lookup_empty());
12 8 apply_outer_words_in_window(lookup_empty());
8 0 apply_outer_words_in_window(lookup_empty());
12 8 apply_outer_words_in_window(lookup_empty());
2822 14 4 for (unsigned i = 0; i < sentence.size; i++) {
2827 54 14 for (bool first = true; (chr = utf8::decode(raw_lemma)); first = false) {
2829 54 0 was_upper = was_upper || category & unicode::Lut;
54 0 was_upper = was_upper || category & unicode::Lut;
2830 14 40 was_lower = was_lower || category & unicode::Ll;
4 10 was_lower = was_lower || category & unicode::Ll;
2832 54 0 if (first && was_upper) apply_in_window(i, fst_cap);
0 0 if (first && was_upper) apply_in_window(i, fst_cap);
0 0 if (first && was_upper) apply_in_window(i, fst_cap);
2834 0 14 if (was_upper && !was_lower) apply_in_window(i, all_cap);
0 0 if (was_upper && !was_lower) apply_in_window(i, all_cap);
0 0 if (was_upper && !was_lower) apply_in_window(i, all_cap);
2835 0 14 if (was_upper && was_lower) apply_in_window(i, mixed_cap);
0 0 if (was_upper && was_lower) apply_in_window(i, mixed_cap);
0 0 if (was_upper && was_lower) apply_in_window(i, mixed_cap);
2846 0 0 for (unsigned i = 0; i < sentence.size; i++) {
2848 0 0 for (auto&& chr : utf8::decoder(sentence.words[i].raw_lemma))
2849 0 0 utf8::append(buffer, buffer.empty() ? chr : unicode::lowercase(chr));
2850 0 0 apply_in_window(i, lookup(buffer, total_features));
0 0 apply_in_window(i, lookup(buffer, total_features));
2853 0 0 apply_outer_words_in_window(lookup_empty());
0 0 apply_outer_words_in_window(lookup_empty());
0 0 apply_outer_words_in_window(lookup_empty());
0 0 apply_outer_words_in_window(lookup_empty());
0 0 apply_outer_words_in_window(lookup_empty());
0 0 apply_outer_words_in_window(lookup_empty());
2866 0 0 if (!feature_processor::parse(window, args, entities, total_features, pipeline)) return false;
2867 0 0 if (args.size() != 2) return cerr << "*Suffix features require exactly two arguments -- shortest and longest suffix length!" << endl, false;
2870 0 0 if (!parse_int(args[0], "*Suffix shortest length", shortest, error)) return cerr << error << endl, false;
0 0 if (!parse_int(args[0], "*Suffix shortest length", shortest, error)) return cerr << error << endl, false;
2871 0 0 if (!parse_int(args[1], "*Suffix longest length", longest, error)) return cerr << error << endl, false;
0 0 if (!parse_int(args[1], "*Suffix longest length", longest, error)) return cerr << error << endl, false;
2893 0 0 for (unsigned i = 0; i < sentence.size; i++) {
2895 0 0 for (auto&& chr : utf8::decoder(source == SUFFIX_SOURCE_FORM ? sentence.words[i].form : sentence.words[i].raw_lemma))
0 0 for (auto&& chr : utf8::decoder(source == SUFFIX_SOURCE_FORM ? sentence.words[i].form : sentence.words[i].raw_lemma))
2896 0 0 chrs.push_back((casing == SUFFIX_CASE_ORIGINAL || chrs.empty()) ? chr : unicode::lowercase(chr));
0 0 chrs.push_back((casing == SUFFIX_CASE_ORIGINAL || chrs.empty()) ? chr : unicode::lowercase(chr));
2899 0 0 for (int s = 1; s <= longest && s <= int(chrs.size()); s++) {
0 0 for (int s = 1; s <= longest && s <= int(chrs.size()); s++) {
0 0 for (int s = 1; s <= longest && s <= int(chrs.size()); s++) {
2900 0 0 utf8::append(buffer, chrs[chrs.size() - s]);
2901 0 0 if (s >= shortest) {
2902 0 0 apply_in_window(i, lookup(buffer, total_features));
0 0 apply_in_window(i, lookup(buffer, total_features));
0 0 apply_in_window(i, lookup(buffer, total_features));
0 0 apply_in_window(i, lookup(buffer, total_features));
2907 0 0 apply_outer_words_in_window(lookup_empty());
0 0 apply_outer_words_in_window(lookup_empty());
0 0 apply_outer_words_in_window(lookup_empty());
0 0 apply_outer_words_in_window(lookup_empty());
0 0 apply_outer_words_in_window(lookup_empty());
0 0 apply_outer_words_in_window(lookup_empty());
0 0 apply_outer_words_in_window(lookup_empty());
0 0 apply_outer_words_in_window(lookup_empty());
2919 14 4 for (unsigned i = 0; i < sentence.size; i++)
2920 12 2 apply_in_window(i, lookup(sentence.words[i].tag, total_features));
40 12 apply_in_window(i, lookup(sentence.words[i].tag, total_features));
2922 4 0 apply_outer_words_in_window(lookup_empty());
8 4 apply_outer_words_in_window(lookup_empty());
8 0 apply_outer_words_in_window(lookup_empty());
12 8 apply_outer_words_in_window(lookup_empty());
8 0 apply_outer_words_in_window(lookup_empty());
12 8 apply_outer_words_in_window(lookup_empty());
2931 0 0 if (!feature_processor::parse(window, args, entities, total_features, pipeline)) return false;
2932 0 0 if (args.size() != 2) return cerr << "URLEmailDetector requires exactly two arguments -- named entity types for URL and email!" << endl, false;
2937 0 0 if (url == entity_type_unknown || email == entity_type_unknown)
0 0 if (url == entity_type_unknown || email == entity_type_unknown)
2957 14 4 for (unsigned i = 0; i < sentence.size; i++) {
2959 0 14 if (type == url_detector::NO_URL || sentence.probabilities[i].local_filled) continue;
0 0 if (type == url_detector::NO_URL || sentence.probabilities[i].local_filled) continue;
0 14 if (type == url_detector::NO_URL || sentence.probabilities[i].local_filled) continue;
2962 0 0 for (auto&& bilou : sentence.probabilities[i].local.bilou) {
2967 0 0 sentence.probabilities[i].local.bilou[bilou_type_U].entity = type == url_detector::EMAIL ? email : url;
2982 0 8 if (name.compare("BrownClusters") == 0) return new brown_clusters();
2983 0 8 if (name.compare("CzechAddContainers") == 0) return new czech_add_containers();
2984 0 8 if (name.compare("CzechLemmaTerm") == 0) return new czech_lemma_term();
2985 1 7 if (name.compare("Form") == 0) return new form();
2986 0 7 if (name.compare("FormCapitalization") == 0) return new form_capitalization();
2987 0 7 if (name.compare("FormCaseNormalized") == 0) return new form_case_normalized();
2988 0 7 if (name.compare("FormCaseNormalizedSuffix") == 0) return new suffix(SUFFIX_SOURCE_FORM, SUFFIX_CASE_NORMALIZED);
2989 0 7 if (name.compare("FormSuffix") == 0) return new suffix(SUFFIX_SOURCE_FORM, SUFFIX_CASE_ORIGINAL);
2990 0 7 if (name.compare("Gazetteers") == 0) return new feature_processors::gazetteers();
2991 0 7 if (name.compare("GazetteersEnhanced") == 0) return new gazetteers_enhanced();
2992 1 6 if (name.compare("Lemma") == 0) return new lemma();
2993 1 5 if (name.compare("NumericTimeValue") == 0) return new number_time_value();
2994 1 4 if (name.compare("PreviousStage") == 0) return new previous_stage();
2995 1 3 if (name.compare("RawLemma") == 0) return new raw_lemma();
2996 1 2 if (name.compare("RawLemmaCapitalization") == 0) return new raw_lemma_capitalization();
2997 0 2 if (name.compare("RawLemmaCaseNormalized") == 0) return new raw_lemma_case_normalized();
2998 0 2 if (name.compare("RawLemmaCaseNormalizedSuffix") == 0) return new suffix(SUFFIX_SOURCE_RAWLEMMA, SUFFIX_CASE_NORMALIZED);
2999 0 2 if (name.compare("RawLemmaSuffix") == 0) return new suffix(SUFFIX_SOURCE_RAWLEMMA, SUFFIX_CASE_ORIGINAL);
3000 1 1 if (name.compare("Tag") == 0) return new tag();
3001 1 0 if (name.compare("URLEmailDetector") == 0) return new url_email_detector();
3058 1 0 if (!compressor::load(is, data)) return false;
1 0 if (!compressor::load(is, data)) return false;
3061 1 0 total_features = data.next_4B();
3064 1 0 for (unsigned i = data.next_4B(); i; i--) {
8 1 for (unsigned i = data.next_4B(); i; i--) {
3066 8 0 data.next_str(name);
3069 8 0 auto* processor = feature_processor::create(name);
3070 8 0 if (processor) {
3071 8 0 processor->load(data, pipeline);
3072 8 0 processors.emplace_back(name, processor);
3078 0 0 }
3088 14 4 for (unsigned i = 0; i < sentence.size; i++) {
3094 32 4 for (auto&& processor : processors)
3095 0 32 processor.processor->process_sentence(sentence, adding_features ? &total_features : nullptr, buffer);
3099 16 2 for (auto&& processor : processors)
0 0 for (auto&& processor : processors)
3108 0 0 for (auto&& processor : processors)
0 0 for (auto&& processor : processors)
3395 0 0 format_tagged_lemma(result);
3400 0 0 for (auto&& lemma : lemmas)
3403 0 0 if (lemmas.size() > 1)
3411 0 0 if (converter) converter->convert(lemma);
3415 0 0 if (converter) converter->convert_analyzed(lemmas);
3428 0 0 for (derivated_lemma parent; derinet->parent(lemma.lemma, parent); )
0 0 for (derivated_lemma parent; derinet->parent(lemma.lemma, parent); )
3430 0 0 if (converter) converter->convert(lemma);
3438 0 0 return derinet ? new root_derivation_formatter(derinet) : nullptr;
0 0 return derinet ? new root_derivation_formatter(derinet) : nullptr;
3447 0 0 if (converter) converter->convert(lemma);
0 0 if (converter) converter->convert(lemma);
3448 0 0 for (derivated_lemma parent; derinet->parent(current.lemma, parent); current.lemma.swap(parent.lemma)) {
0 0 for (derivated_lemma parent; derinet->parent(current.lemma, parent); current.lemma.swap(parent.lemma)) {
3449 0 0 tagged_lemma parrent_lemma(parent.lemma, current.tag);
3450 0 0 if (converter) converter->convert(parrent_lemma);
0 0 if (converter) converter->convert(parrent_lemma);
3451 0 0 lemma.lemma.append(" ").append(parrent_lemma.lemma);
3460 0 0 return derinet ? new path_derivation_formatter(derinet) : nullptr;
0 0 return derinet ? new path_derivation_formatter(derinet) : nullptr;
3469 0 0 if (converter) converter->convert(lemma);
0 0 if (converter) converter->convert(lemma);
3470 0 0 for (derivated_lemma parent; derinet->parent(root, parent); root.swap(parent.lemma)) {}
0 0 for (derivated_lemma parent; derinet->parent(root, parent); root.swap(parent.lemma)) {}
3471 0 0 format_tree(root, tag, lemma, converter);
3477 0 0 if (converter) {
3478 0 0 tagged_lemma current(root, tag);
3479 0 0 converter->convert(current);
3480 0 0 tree.lemma.append(" ").append(current.lemma);
3482 0 0 tree.lemma.append(" ").append(root);
3485 0 0 if (derinet->children(root, children))
0 0 if (derinet->children(root, children))
3486 0 0 for (auto&& child : children)
3487 0 0 format_tree(child.lemma, tag, tree, converter);
3488 0 0 tree.lemma.push_back(' ');
3496 0 0 return derinet ? new tree_derivation_formatter(derinet) : nullptr;
0 0 return derinet ? new tree_derivation_formatter(derinet) : nullptr;
3500 0 0 if (name == "none") return new_none_derivation_formatter();
3501 0 0 if (name == "root") return new_root_derivation_formatter(derinet);
3502 0 0 if (name == "path") return new_path_derivation_formatter(derinet);
3503 0 0 if (name == "tree") return new_tree_derivation_formatter(derinet);
3533 0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
108 6 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
31 3 while (len--)
136 26 while (len--)
0 0 while (len--)
118 10 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
3534 0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
30 78 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
14 17 if (*a++ != *b++)
124 12 if (*a++ != *b++)
0 0 if (*a++ != *b++)
98 20 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
0 0 if (*a++ != *b++)
3543 0 0 while (len--)
236 108 while (len--)
0 0 while (len--)
0 0 while (len--)
4 24 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
56 9 while (len--)
3684 52 16 while (mask < num)
3686 16 0 hash.resize(mask + 1);
3690 137 0 uint32_t size = data.next_4B();
3692 137 0 hash.resize(size);
3693 137 0 memcpy(hash.data(), data.next(size), size * sizeof(uint32_t));
3695 137 0 size = data.next_4B();
3696 137 0 this->data.resize(size);
3697 68 69 if (size) memcpy(this->data.data(), data.next(size), size);
68 0 if (size) memcpy(this->data.data(), data.next(size), size);
3701 0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
30 0 if (len <= 0) return 0;
30 18 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
0 0 if (len <= 0) return 0;
108 0 if (len <= 0) return 0;
41 0 if (len <= 0) return 0;
9 0 if (len <= 0) return 0;
9 0 if (len <= 0) return 0;
3702 0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
4 26 if (len == 1) return unaligned_load(data);
18 12 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 0 if (len == 1) return unaligned_load(data);
0 108 if (len == 1) return unaligned_load(data);
7 34 if (len == 1) return unaligned_load(data);
0 9 if (len == 1) return unaligned_load(data);
0 9 if (len == 1) return unaligned_load(data);
3703 0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
26 0 if (len == 2) return unaligned_load(data);
0 12 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
0 0 if (len == 2) return unaligned_load(data);
20 88 if (len == 2) return unaligned_load(data);
30 4 if (len == 2) return unaligned_load(data);
9 0 if (len == 2) return unaligned_load(data);
9 0 if (len == 2) return unaligned_load(data);
3706 0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
142 26 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
0 0 while (len--)
60 20 while (len--)
132 30 while (len--)
56 9 while (len--)
56 9 while (len--)
3720 0 0 if (unsigned(len) >= hashes.size()) return nullptr;
0 0 if (unsigned(len) >= hashes.size()) return nullptr;
0 0 if (unsigned(len) >= hashes.size()) return nullptr;
0 0 if (unsigned(len) >= hashes.size()) return nullptr;
0 0 if (unsigned(len) >= hashes.size()) return nullptr;
0 0 if (unsigned(len) >= hashes.size()) return nullptr;
0 0 if (unsigned(len) >= hashes.size()) return nullptr;
0 0 if (unsigned(len) >= hashes.size()) return nullptr;
48 0 if (unsigned(len) >= hashes.size()) return nullptr;
0 0 if (unsigned(len) >= hashes.size()) return nullptr;
0 0 if (unsigned(len) >= hashes.size()) return nullptr;
3726 0 0 if (len <= 2)
0 0 if (len <= 2)
0 0 if (len <= 2)
0 0 if (len <= 2)
0 0 if (len <= 2)
0 0 if (len <= 2)
0 0 if (len <= 2)
0 0 if (len <= 2)
0 48 if (len <= 2)
0 0 if (len <= 2)
0 0 if (len <= 2)
3727 0 0 return data != end ? data + len : nullptr;
0 0 return data != end ? data + len : nullptr;
0 0 return data != end ? data + len : nullptr;
0 0 return data != end ? data + len : nullptr;
0 0 return data != end ? data + len : nullptr;
0 0 return data != end ? data + len : nullptr;
0 0 return data != end ? data + len : nullptr;
0 0 return data != end ? data + len : nullptr;
30 18 return data != end ? data + len : nullptr;
0 0 return data != end ? data + len : nullptr;
0 0 return data != end ? data + len : nullptr;
3729 0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
3730 0 0 if (small_memeq(str, data, len)) return data + len;
0 0 if (small_memeq(str, data, len)) return data + len;
0 0 if (small_memeq(str, data, len)) return data + len;
0 0 if (small_memeq(str, data, len)) return data + len;
0 0 if (small_memeq(str, data, len)) return data + len;
0 0 if (small_memeq(str, data, len)) return data + len;
0 0 if (small_memeq(str, data, len)) return data + len;
0 0 if (small_memeq(str, data, len)) return data + len;
0 0 if (small_memeq(str, data, len)) return data + len;
0 0 if (small_memeq(str, data, len)) return data + len;
0 0 if (small_memeq(str, data, len)) return data + len;
3741 108 0 if (unsigned(len) >= hashes.size()) return nullptr;
41 55 if (unsigned(len) >= hashes.size()) return nullptr;
3747 20 88 if (len <= 2)
30 11 if (len <= 2)
3748 52 36 return data != end ? (const T*)(data + len) : nullptr;
11 0 return data != end ? (const T*)(data + len) : nullptr;
3750 20 17 while (data < end) {
38 4 while (data < end) {
3751 3 17 if (small_memeq(str, data, len)) return (const T*)(data + len);
26 12 if (small_memeq(str, data, len)) return (const T*)(data + len);
3760 0 0 if (unsigned(len) >= hashes.size()) return;
30 0 if (unsigned(len) >= hashes.size()) return;
0 0 if (unsigned(len) >= hashes.size()) return;
0 0 if (unsigned(len) >= hashes.size()) return;
0 0 if (unsigned(len) >= hashes.size()) return;
0 0 if (unsigned(len) >= hashes.size()) return;
3766 0 0 while (data < end) {
30 30 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
3776 3 1 for (unsigned len = 0; len < hashes.size(); len++) {
0 0 for (unsigned len = 0; len < hashes.size(); len++) {
0 0 for (unsigned len = 0; len < hashes.size(); len++) {
3780 13 3 while (data < end) {
0 0 while (data < end) {
0 0 while (data < end) {
3794 0 0 return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr;
0 0 return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr;
0 0 return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr;
0 0 return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr;
0 0 return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr;
0 0 return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr;
0 0 return unsigned(len) < hashes.size() ? hashes[len].data.data() : nullptr;
3798 2 14 if (hashes.size() == 0) hashes.emplace_back(1);
3799 2 12 else if (hashes.size() == 1) hashes.emplace_back(1<<8);
3800 2 10 else if (hashes.size() == 2) hashes.emplace_back(1<<16);
3805 9 0 if (unsigned(str_len) < hashes.size())
3810 16 2 for (auto&& hash : hashes) {
3812 131616 16 for (auto&& len : hash.hash) total += len, len = total - len;
3818 9 0 if (unsigned(str_len) < hashes.size()) {
3829 16 2 for (auto&& hash : hashes)
3830 131616 16 for (int i = hash.hash.size() - 1; i >= 0; i--)
3831 131600 16 hash.hash[i] = i > 0 ? hash.hash[i-1] : 0;
3838 137 48 for (unsigned i = 0; i < sizes; i++)
3890 0 0 if (dictionary) lemma.len = dictionary->lemma_id_len(lemma);
3897 0 0 if (lemma_data) {
3899 0 0 if (parent_encoded) {
3903 0 0 if (parent_data[parent_len])
3913 0 0 if (dictionary) lemma.len = dictionary->lemma_id_len(lemma);
3920 0 0 if (lemma_data) {
3923 0 0 if (children_len) {
3925 0 0 for (unsigned i = 0; i < children_len; i++) {
3929 0 0 if (child_data[child_len])
3941 0 0 if (!compressor::load(is, data)) return false;
0 0 if (!compressor::load(is, data)) return false;
3944 0 0 for (int i = data.next_1B(); i > 0; i--)
0 0 for (int i = data.next_1B(); i > 0; i--)
3945 0 0 derinet.resize(data.next_4B());
0 0 derinet.resize(data.next_4B());
3949 0 0 for (int pass = 1; pass <= 3; pass++) {
3950 0 0 if (pass > 1) data.seek(data_position);
0 0 if (pass > 1) data.seek(data_position);
3953 0 0 for (int i = data.next_4B(); i > 0; i--) {
0 0 for (int i = data.next_4B(); i > 0; i--) {
3954 0 0 lemma.resize(lemma.size() - data.next_1B());
0 0 lemma.resize(lemma.size() - data.next_1B());
3955 0 0 for (int i = data.next_1B(); i > 0; i--)
0 0 for (int i = data.next_1B(); i > 0; i--)
3956 0 0 lemma.push_back(data.next_1B());
3958 0 0 unsigned char lemma_comment_len = data.next_1B();
3959 0 0 const char* lemma_comment = lemma_comment_len ? data.next(lemma_comment_len) : nullptr;
0 0 const char* lemma_comment = lemma_comment_len ? data.next(lemma_comment_len) : nullptr;
3961 0 0 unsigned children = data.next_2B();
3963 0 0 if (pass == 3) parent.clear();
3965 0 0 int operations = data.next_1B();
3966 0 0 if (operations) {
3967 0 0 int remove_start = operations & REMOVE_START ? data.next_1B() : 0;
0 0 int remove_start = operations & REMOVE_START ? data.next_1B() : 0;
3968 0 0 int remove_end = operations & REMOVE_END ? data.next_1B() : 0;
0 0 int remove_end = operations & REMOVE_END ? data.next_1B() : 0;
3969 0 0 if (operations & ADD_START) {
3970 0 0 int add_start = data.next_1B();
3971 0 0 const char* str = data.next(add_start);
3972 0 0 if (pass == 3) parent.assign(str, str + add_start);
3974 0 0 if (pass == 3) parent.insert(parent.end(), lemma.begin() + remove_start, lemma.end() - remove_end);
0 0 if (pass == 3) parent.insert(parent.end(), lemma.begin() + remove_start, lemma.end() - remove_end);
3975 0 0 if (operations & ADD_END) {
3976 0 0 int add_end = data.next_1B();
3977 0 0 const char* str = data.next(add_end);
3978 0 0 if (pass == 3) parent.insert(parent.end(), str, str + add_end);
3982 0 0 if (pass == 1) {
3984 0 0 } else if (pass == 2) {
3987 0 0 while (lemma_comment_len--) *lemma_data++ = *lemma_comment++;
3990 0 0 if (children) unaligned_store(((uint32_t*)lemma_data) + children - 1, 0);
3991 0 0 } else if (pass == 3 && !parent.empty()) {
0 0 } else if (pass == 3 && !parent.empty()) {
0 0 } else if (pass == 3 && !parent.empty()) {
4002 0 0 assert(lemma_data && parent_data);
4005 0 0 assert(parent.size() < (1<<8) && parent_offset < (1<<24));
0 0 assert(parent.size() < (1<<8) && parent_offset < (1<<24));
4009 0 0 assert(lemma.size() < (1<<8) && lemma_offset < (1<<24));
0 0 assert(lemma.size() < (1<<8) && lemma_offset < (1<<24));
4014 0 0 if (child_index+1 < children_len)
4019 0 0 if (pass == 1)
4020 0 0 derinet.done_adding();
4021 0 0 if (pass == 2)
4023 0 0 }
4056 38 14 while (form_tmp.len && !rest_has_Lut)
38 0 while (form_tmp.len && !rest_has_Lut)
4065 4 10 if (first_Lut && !rest_has_Lut) { // common case allowing fast execution
4070 0 10 } else if (!first_Lut && rest_has_Lut) {
4073 0 10 } else if (first_Lut && rest_has_Lut) {
4080 0 0 while (form_tmp.len) {
4121 0 0 for (unsigned len = 1; len < lemma.len; len++)
4122 0 0 if (lemma.str[len] == '`' || lemma.str[len] == '_' ||
0 0 if (lemma.str[len] == '`' || lemma.str[len] == '_' ||
4123 0 0 (lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9'))
0 0 (lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9'))
0 0 (lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9'))
4130 0 0 for (unsigned len = 1; len < lemma.len; len++) {
4131 0 0 if (lemma.str[len] == '`' || lemma.str[len] == '_')
4133 0 0 if (lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9') {
0 0 if (lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9') {
0 0 if (lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9') {
0 0 if (lemma.str[len] == '-' && len+1 < lemma.len && lemma.str[len+1] >= '0' && lemma.str[len+1] <= '9') {
4135 0 0 while (len < lemma.len && lemma.str[len] >= '0' && lemma.str[len] <= '9') len++;
0 0 while (len < lemma.len && lemma.str[len] >= '0' && lemma.str[len] <= '9') len++;
0 0 while (len < lemma.len && lemma.str[len] >= '0' && lemma.str[len] <= '9') len++;
4145 0 0 if (addinfo_len) {
4146 0 0 res.reserve(addinfo_len + 4);
4147 0 0 if (addinfo[0] != 255) {
4152 0 0 for (int i = 1; i < addinfo_len; i++)
4160 0 0 for (int i = 1; i + 2 < addinfo_len; i++)
4161 0 0 if (addinfo[i] == '_' && addinfo[i+1] == ',' && addinfo[i+2] == 'x')
0 0 if (addinfo[i] == '_' && addinfo[i+1] == ',' && addinfo[i+2] == 'x')
0 0 if (addinfo[i] == '_' && addinfo[i+1] == ',' && addinfo[i+2] == 'x')
4171 0 0 if (lemma_info < lemma.str + lemma.len) {
4175 0 0 if (*lemma_info == '-') {
4178 0 0 lemma_additional_info < lemma.str + lemma.len && (*lemma_additional_info >= '0' && *lemma_additional_info <= '9');
0 0 lemma_additional_info < lemma.str + lemma.len && (*lemma_additional_info >= '0' && *lemma_additional_info <= '9');
4182 0 0 if (lemma_additional_info == lemma_info + 1 || (lemma_additional_info < lemma.str + lemma.len && *lemma_additional_info != '`' && *lemma_additional_info != '_') || lemma_num >= 255) {
0 0 if (lemma_additional_info == lemma_info + 1 || (lemma_additional_info < lemma.str + lemma.len && *lemma_additional_info != '`' && *lemma_additional_info != '_') || lemma_num >= 255) {
0 0 if (lemma_additional_info == lemma_info + 1 || (lemma_additional_info < lemma.str + lemma.len && *lemma_additional_info != '`' && *lemma_additional_info != '_') || lemma_num >= 255) {
0 0 if (lemma_additional_info == lemma_info + 1 || (lemma_additional_info < lemma.str + lemma.len && *lemma_additional_info != '`' && *lemma_additional_info != '_') || lemma_num >= 255) {
0 0 if (lemma_additional_info == lemma_info + 1 || (lemma_additional_info < lemma.str + lemma.len && *lemma_additional_info != '`' && *lemma_additional_info != '_') || lemma_num >= 255) {
4183 0 0 if (die_on_failure)
4190 0 0 while (lemma_additional_info < lemma.str + lemma.len)
4193 0 0 if (data.size() > 255) {
4194 0 0 if (die_on_failure)
4205 0 0 if (data.empty()) return true;
4206 0 0 if (data[0] != 255 && (!other_addinfo_len || other_addinfo[0] != data[0])) return false;
0 0 if (data[0] != 255 && (!other_addinfo_len || other_addinfo[0] != data[0])) return false;
0 0 if (data[0] != 255 && (!other_addinfo_len || other_addinfo[0] != data[0])) return false;
0 0 if (data[0] != 255 && (!other_addinfo_len || other_addinfo[0] != data[0])) return false;
4250 0 0 if (filters.empty()) return true;
4253 0 0 for (auto&& filter : filters) {
4255 0 0 while (tag_pos < filter.pos)
4256 0 0 if (!tag[tag_pos++])
4258 0 0 if (!tag[tag_pos])
4263 0 0 for (int i = 1; i < filter.chars_len && ((!matched) ^ filter.negate); i++)
0 0 for (int i = 1; i < filter.chars_len && ((!matched) ^ filter.negate); i++)
4265 0 0 if (!matched) return false;
4305 7 1 for (int i = data.next_1B(); i > 0; i--)
0 0 for (int i = data.next_1B(); i > 0; i--)
0 0 for (int i = data.next_1B(); i > 0; i--)
4307 9 1 for (int i = data.next_1B(); i > 0; i--)
0 0 for (int i = data.next_1B(); i > 0; i--)
0 0 for (int i = data.next_1B(); i > 0; i--)
4313 1 0 vector root(max(lemmas.max_length(), roots.max_length()));
0 0 vector root(max(lemmas.max_length(), roots.max_length()));
0 0 vector root(max(lemmas.max_length(), roots.max_length()));
4315 2 1 for (int pass = 1; pass <= 2; pass++) {
0 0 for (int pass = 1; pass <= 2; pass++) {
0 0 for (int pass = 1; pass <= 2; pass++) {
4316 1 1 if (pass > 1) data.seek(data_position);
1 0 if (pass > 1) data.seek(data_position);
0 0 if (pass > 1) data.seek(data_position);
0 0 if (pass > 1) data.seek(data_position);
0 0 if (pass > 1) data.seek(data_position);
0 0 if (pass > 1) data.seek(data_position);
4321 2 0 for (int i = data.next_4B(); i > 0; i--) {
4 2 for (int i = data.next_4B(); i > 0; i--) {
0 0 for (int i = data.next_4B(); i > 0; i--) {
0 0 for (int i = data.next_4B(); i > 0; i--) {
0 0 for (int i = data.next_4B(); i > 0; i--) {
0 0 for (int i = data.next_4B(); i > 0; i--) {
4322 4 0 lemma_len -= data.next_1B();
0 0 lemma_len -= data.next_1B();
0 0 lemma_len -= data.next_1B();
4323 4 0 for (int i = data.next_1B(); i > 0; i--)
24 4 for (int i = data.next_1B(); i > 0; i--)
0 0 for (int i = data.next_1B(); i > 0; i--)
0 0 for (int i = data.next_1B(); i > 0; i--)
0 0 for (int i = data.next_1B(); i > 0; i--)
0 0 for (int i = data.next_1B(); i > 0; i--)
4324 24 0 lemma[lemma_len++] = data.next_1B();
0 0 lemma[lemma_len++] = data.next_1B();
0 0 lemma[lemma_len++] = data.next_1B();
4325 4 0 unsigned char lemma_info_len = data.next_1B();
0 0 unsigned char lemma_info_len = data.next_1B();
0 0 unsigned char lemma_info_len = data.next_1B();
4326 0 4 const char* lemma_info = lemma_info_len ? data.next(lemma_info_len) : nullptr;
0 0 const char* lemma_info = lemma_info_len ? data.next(lemma_info_len) : nullptr;
0 0 const char* lemma_info = lemma_info_len ? data.next(lemma_info_len) : nullptr;
0 0 const char* lemma_info = lemma_info_len ? data.next(lemma_info_len) : nullptr;
0 0 const char* lemma_info = lemma_info_len ? data.next(lemma_info_len) : nullptr;
0 0 const char* lemma_info = lemma_info_len ? data.next(lemma_info_len) : nullptr;
4327 4 0 unsigned lemma_roots = data.next_1B();
0 0 unsigned lemma_roots = data.next_1B();
0 0 unsigned lemma_roots = data.next_1B();
4332 2 2 if (pass == 1) {
0 0 if (pass == 1) {
0 0 if (pass == 1) {
4339 0 2 if (lemma_info_len) small_memcpy(lemma_data, lemma_info, lemma_info_len), lemma_data += lemma_info_len;
0 0 if (lemma_info_len) small_memcpy(lemma_data, lemma_info, lemma_info_len), lemma_data += lemma_info_len;
0 0 if (lemma_info_len) small_memcpy(lemma_data, lemma_info, lemma_info_len), lemma_data += lemma_info_len;
4344 14 4 for (unsigned i = 0; i < lemma_roots; i++) {
0 0 for (unsigned i = 0; i < lemma_roots; i++) {
0 0 for (unsigned i = 0; i < lemma_roots; i++) {
4346 14 0 int operations = data.next_1B();
0 0 int operations = data.next_1B();
0 0 int operations = data.next_1B();
4347 0 14 if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; }
0 0 if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; }
0 0 if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; }
0 0 if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; }
0 0 if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; }
0 0 if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; }
0 0 if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; }
0 0 if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; }
0 0 if (operations & REMOVE_START) { int from = data.next_1B(), to = 0; while (from < root_len) root[to++] = root[from++]; root_len = to; }
4348 12 2 if (operations & REMOVE_END) root_len -= data.next_1B();
12 0 if (operations & REMOVE_END) root_len -= data.next_1B();
0 0 if (operations & REMOVE_END) root_len -= data.next_1B();
0 0 if (operations & REMOVE_END) root_len -= data.next_1B();
0 0 if (operations & REMOVE_END) root_len -= data.next_1B();
0 0 if (operations & REMOVE_END) root_len -= data.next_1B();
4349 0 14 if (operations & ADD_START) {
0 0 if (operations & ADD_START) {
0 0 if (operations & ADD_START) {
4350 0 0 int from = root_len, to = from + data.next_1B(); while (from > 0) root[--to] = root[--from]; root_len += to;
0 0 int from = root_len, to = from + data.next_1B(); while (from > 0) root[--to] = root[--from]; root_len += to;
0 0 int from = root_len, to = from + data.next_1B(); while (from > 0) root[--to] = root[--from]; root_len += to;
0 0 int from = root_len, to = from + data.next_1B(); while (from > 0) root[--to] = root[--from]; root_len += to;
0 0 int from = root_len, to = from + data.next_1B(); while (from > 0) root[--to] = root[--from]; root_len += to;
0 0 int from = root_len, to = from + data.next_1B(); while (from > 0) root[--to] = root[--from]; root_len += to;
4351 0 0 for (int i = 0; i < to; i++) root[i] = data.next_1B();
0 0 for (int i = 0; i < to; i++) root[i] = data.next_1B();
0 0 for (int i = 0; i < to; i++) root[i] = data.next_1B();
0 0 for (int i = 0; i < to; i++) root[i] = data.next_1B();
0 0 for (int i = 0; i < to; i++) root[i] = data.next_1B();
0 0 for (int i = 0; i < to; i++) root[i] = data.next_1B();
4353 14 0 if (operations & ADD_END)
0 0 if (operations & ADD_END)
0 0 if (operations & ADD_END)
4354 14 0 for (int len = data.next_1B(); len > 0; len--)
28 14 for (int len = data.next_1B(); len > 0; len--)
0 0 for (int len = data.next_1B(); len > 0; len--)
0 0 for (int len = data.next_1B(); len > 0; len--)
0 0 for (int len = data.next_1B(); len > 0; len--)
0 0 for (int len = data.next_1B(); len > 0; len--)
4355 28 0 root[root_len++] = data.next_1B();
0 0 root[root_len++] = data.next_1B();
0 0 root[root_len++] = data.next_1B();
4356 14 0 uint16_t clas = data.next_2B();
0 0 uint16_t clas = data.next_2B();
0 0 uint16_t clas = data.next_2B();
4358 7 7 if (pass == 1) { // for each root
0 0 if (pass == 1) { // for each root
0 0 if (pass == 1) { // for each root
4367 0 7 assert(uint8_t(lemma_len) == lemma_len);
0 0 assert(uint8_t(lemma_len) == lemma_len);
0 0 assert(uint8_t(lemma_len) == lemma_len);
4372 0 7 assert(uint8_t(root_len) == root_len);
0 0 assert(uint8_t(root_len) == root_len);
0 0 assert(uint8_t(root_len) == root_len);
4377 1 1 if (pass == 1) { // after the whole pass
0 0 if (pass == 1) { // after the whole pass
0 0 if (pass == 1) { // after the whole pass
4378 1 0 lemmas.done_adding();
0 0 lemmas.done_adding();
0 0 lemmas.done_adding();
4379 1 0 roots.done_adding();
0 0 roots.done_adding();
0 0 roots.done_adding();
4387 1 0 tags.resize(data.next_2B());
1 0 tags.resize(data.next_2B());
0 0 tags.resize(data.next_2B());
0 0 tags.resize(data.next_2B());
0 0 tags.resize(data.next_2B());
0 0 tags.resize(data.next_2B());
4388 20 1 for (auto&& tag : tags) {
0 0 for (auto&& tag : tags) {
0 0 for (auto&& tag : tags) {
4389 20 0 tag.resize(data.next_1B());
0 0 tag.resize(data.next_1B());
0 0 tag.resize(data.next_1B());
4390 60 20 for (unsigned i = 0; i < tag.size(); i++)
0 0 for (unsigned i = 0; i < tag.size(); i++)
0 0 for (unsigned i = 0; i < tag.size(); i++)
4391 60 0 tag[i] = data.next_1B();
0 0 tag[i] = data.next_1B();
0 0 tag[i] = data.next_1B();
4395 1 0 suffixes.load(data);
0 0 suffixes.load(data);
0 0 suffixes.load(data);
4398 1 0 suffixes.iter_all([this](const char* suffix, int len, pointer_decoder& data) mutable {
0 0 suffixes.iter_all([this](const char* suffix, int len, pointer_decoder& data) mutable {
0 0 suffixes.iter_all([this](const char* suffix, int len, pointer_decoder& data) mutable {
4405 15 13 for (unsigned i = 0; i < classes_len; i++) {
0 0 for (unsigned i = 0; i < classes_len; i++) {
0 0 for (unsigned i = 0; i < classes_len; i++) {
4407 5 10 if (classes_ptr_i >= classes.size()) classes.resize(classes_ptr_i + 1);
5 0 if (classes_ptr_i >= classes.size()) classes.resize(classes_ptr_i + 1);
0 0 if (classes_ptr_i >= classes.size()) classes.resize(classes_ptr_i + 1);
0 0 if (classes_ptr_i >= classes.size()) classes.resize(classes_ptr_i + 1);
0 0 if (classes_ptr_i >= classes.size()) classes.resize(classes_ptr_i + 1);
0 0 if (classes_ptr_i >= classes.size()) classes.resize(classes_ptr_i + 1);
4408 15 0 classes[classes_ptr_i].emplace_back(suffix_str, vector());
0 0 classes[classes_ptr_i].emplace_back(suffix_str, vector());
0 0 classes[classes_ptr_i].emplace_back(suffix_str, vector());
4409 20 15 for (const uint16_t* ptr = tags_ptr + unaligned_load(indices_ptr + i),
0 0 for (const uint16_t* ptr = tags_ptr + unaligned_load(indices_ptr + i),
0 0 for (const uint16_t* ptr = tags_ptr + unaligned_load(indices_ptr + i),
4412 20 0 classes[classes_ptr_i].back().second.emplace_back(unaligned_load(ptr));
0 0 classes[classes_ptr_i].back().second.emplace_back(unaligned_load(ptr));
0 0 classes[classes_ptr_i].back().second.emplace_back(unaligned_load(ptr));
4422 0 18 uint16_t** suff = max_suffix_len <= 16 ? suff_stack : (suff_heap.resize(max_suffix_len), suff_heap.data());
0 0 uint16_t** suff = max_suffix_len <= 16 ? suff_stack : (suff_heap.resize(max_suffix_len), suff_heap.data());
0 0 uint16_t** suff = max_suffix_len <= 16 ? suff_stack : (suff_heap.resize(max_suffix_len), suff_heap.data());
0 0 uint16_t** suff = max_suffix_len <= 16 ? suff_stack : (suff_heap.resize(max_suffix_len), suff_heap.data());
0 0 uint16_t** suff = max_suffix_len <= 16 ? suff_stack : (suff_heap.resize(max_suffix_len), suff_heap.data());
0 0 uint16_t** suff = max_suffix_len <= 16 ? suff_stack : (suff_heap.resize(max_suffix_len), suff_heap.data());
4424 48 0 for (int i = form.len; i >= 0 && suff_len < max_suffix_len; i--, suff_len++) {
0 0 for (int i = form.len; i >= 0 && suff_len < max_suffix_len; i--, suff_len++) {
0 0 for (int i = form.len; i >= 0 && suff_len < max_suffix_len; i--, suff_len++) {
4432 30 18 for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++)
0 30 for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++)
30 18 for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++)
0 0 for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++)
0 0 for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++)
0 0 for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++)
0 0 for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++)
0 0 for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++)
0 0 for (int root_len = int(form.len) - --suff_len; suff_len >= 0 && root_len < int(roots.max_length()); suff_len--, root_len++)
4433 30 0 if (unaligned_load(suff[suff_len])) {
0 0 if (unaligned_load(suff[suff_len])) {
0 0 if (unaligned_load(suff[suff_len])) {
4437 30 0 roots.iter(form.str, root_len, [&](const char* root, pointer_decoder& root_data) {
0 0 roots.iter(form.str, root_len, [&](const char* root, pointer_decoder& root_data) {
0 0 roots.iter(form.str, root_len, [&](const char* root, pointer_decoder& root_data) {
4442 10 20 if (small_memeq(form.str, root, root_len)) {
0 0 if (small_memeq(form.str, root, root_len)) {
0 0 if (small_memeq(form.str, root, root_len)) {
4444 10 0 if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) {
0 10 if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) {
10 0 if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) {
0 0 if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) {
0 0 if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) {
0 0 if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) {
0 0 if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) {
0 0 if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) {
0 0 if (suffix_class_ptr < suff_data + suff_classes && unaligned_load(suffix_class_ptr) == root_class) {
4447 0 10 if (lemma_data[lemma_len]) lemma += LemmaAddinfo::format(lemma_data + lemma_len + 1, lemma_data[lemma_len]);
0 0 if (lemma_data[lemma_len]) lemma += LemmaAddinfo::format(lemma_data + lemma_len + 1, lemma_data[lemma_len]);
0 0 if (lemma_data[lemma_len]) lemma += LemmaAddinfo::format(lemma_data + lemma_len + 1, lemma_data[lemma_len]);
0 0 if (lemma_data[lemma_len]) lemma += LemmaAddinfo::format(lemma_data + lemma_len + 1, lemma_data[lemma_len]);
4451 18 10 for (unsigned i = unaligned_load(suff_tag_indices + (suffix_class_ptr - suff_data));
0 0 for (unsigned i = unaligned_load(suff_tag_indices + (suffix_class_ptr - suff_data));
0 0 for (unsigned i = unaligned_load(suff_tag_indices + (suffix_class_ptr - suff_data));
4453 18 0 lemmas.emplace_back(lemma, tags[unaligned_load(suff_tags + i)]);
0 0 lemmas.emplace_back(lemma, tags[unaligned_load(suff_tags + i)]);
0 0 lemmas.emplace_back(lemma, tags[unaligned_load(suff_tags + i)]);
4463 0 0 int raw_lemma_len = addinfo.parse(lemma);
0 0 int raw_lemma_len = addinfo.parse(lemma);
4466 0 0 lemmas.iter(lemma.str, raw_lemma_len, [&](const char* lemma_str, pointer_decoder& data) {
0 0 lemmas.iter(lemma.str, raw_lemma_len, [&](const char* lemma_str, pointer_decoder& data) {
0 0 lemmas.iter(lemma.str, raw_lemma_len, [&](const char* lemma_str, pointer_decoder& data) {
4472 0 0 if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) {
0 0 if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) {
0 0 if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) {
0 0 if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) {
0 0 if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) {
0 0 if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) {
0 0 if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) {
0 0 if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) {
0 0 if (small_memeq(lemma.str, lemma_str, raw_lemma_len) && addinfo.match_lemma_id(lemma_info, lemma_info_len) && LemmaAddinfo::generatable(lemma_info, lemma_info_len)) {
4477 0 0 for (unsigned i = 0; i < lemma_roots_len; i++) {
0 0 for (unsigned i = 0; i < lemma_roots_len; i++) {
0 0 for (unsigned i = 0; i < lemma_roots_len; i++) {
4483 0 0 for (auto&& suffix : classes[clas]) {
0 0 for (auto&& suffix : classes[clas]) {
0 0 for (auto&& suffix : classes[clas]) {
4485 0 0 for (auto&& tag : suffix.second)
0 0 for (auto&& tag : suffix.second)
0 0 for (auto&& tag : suffix.second)
4486 0 0 if (filter.matches(tags[tag].c_str())) {
0 0 if (filter.matches(tags[tag].c_str())) {
0 0 if (filter.matches(tags[tag].c_str())) {
4487 0 0 if (!forms) {
0 0 if (!forms) {
0 0 if (!forms) {
4488 0 0 lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len));
0 0 lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len));
0 0 lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len));
0 0 lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len));
0 0 lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len));
0 0 lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len));
0 0 lemmas_forms.emplace_back(string(lemma.str, raw_lemma_len) + LemmaAddinfo::format(lemma_info, lemma_info_len));
4492 0 0 if (root_with_suffix.empty() && root_len + suffix.first.size()) {
0 0 if (root_with_suffix.empty() && root_len + suffix.first.size()) {
0 0 if (root_with_suffix.empty() && root_len + suffix.first.size()) {
0 0 if (root_with_suffix.empty() && root_len + suffix.first.size()) {
0 0 if (root_with_suffix.empty() && root_len + suffix.first.size()) {
0 0 if (root_with_suffix.empty() && root_len + suffix.first.size()) {
0 0 if (root_with_suffix.empty() && root_len + suffix.first.size()) {
0 0 if (root_with_suffix.empty() && root_len + suffix.first.size()) {
0 0 if (root_with_suffix.empty() && root_len + suffix.first.size()) {
4493 0 0 root_with_suffix.reserve(root_len + suffix.first.size());
0 0 root_with_suffix.reserve(root_len + suffix.first.size());
0 0 root_with_suffix.reserve(root_len + suffix.first.size());
4498 0 0 forms->emplace_back(root_with_suffix, tags[tag]);
0 0 forms->emplace_back(root_with_suffix, tags[tag]);
0 0 forms->emplace_back(root_with_suffix, tags[tag]);
4545 0 0 for (unsigned tag_filters_len = data.next_1B(); tag_filters_len; tag_filters_len--) {
4549 0 0 tag_filters.emplace_back(tag_filter.c_str());
4560 0 0 if (!form.len) return;
4564 0 0 middle_masks.reserve(form.len);
4566 0 0 for (unsigned initial = 0; initial < form.len; initial++) {
4569 0 0 if (initial) {
4571 0 0 if (!found) break;
4576 0 0 if (initial_mask) {
4577 0 0 middle_masks.resize(initial);
4578 0 0 middle_masks.emplace_back(initial_mask);
4579 0 0 for (unsigned middle = initial; middle < middle_masks.size(); middle++) {
4580 0 0 if (!middle_masks[middle]) continue;
4582 0 0 for (unsigned i = middle + 1; i < form.len; i++) {
4584 0 0 if (!found) break;
4585 0 0 if (unaligned_load(found)) {
4586 0 0 if (i + 1 > middle_masks.size()) middle_masks.resize(i + 1);
0 0 if (i + 1 > middle_masks.size()) middle_masks.resize(i + 1);
4592 0 0 if (middle > initial && middle < form.len ) {
0 0 if (middle > initial && middle < form.len ) {
4593 0 0 if (initial) {
4594 0 0 if (form_tmp.empty()) form_tmp.assign(form.str, form.str + form.len);
4598 0 0 dictionary.analyze(string_piece((initial ? form_tmp.data() : form.str) + middle - initial, form.len - middle + initial), lemmas);
0 0 dictionary.analyze(string_piece((initial ? form_tmp.data() : form.str) + middle - initial, form.len - middle + initial), lemmas);
4600 0 0 for (unsigned i = lemmas_ori_size; i < lemmas.size(); i++) {
4601 0 0 for (unsigned filter = 0; filter < tag_filters.size(); filter++)
4602 0 0 if ((middle_masks[middle] & (1<
0 0 if ((middle_masks[middle] & (1<
0 0 if ((middle_masks[middle] & (1<
4603 0 0 if (i == lemmas_new_size) {
4606 0 0 lemmas[lemmas_new_size].lemma.reserve(lemmas[i].lemma.size() + middle - initial);
4615 0 0 if (lemmas_new_size < lemmas.size()) lemmas.erase(lemmas.begin() + lemmas_new_size, lemmas.end());
4740 22 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
5 17 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
37 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
5 32 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
0 0 return chr.chr < ragel_map.size() && ragel_map[chr.chr] != 128 ? ragel_map[chr.chr] : 128 + (uint32_t(chr.cat) * uint32_t(0x077CB531U) >> 27);
4800 0 0 czech_morpho(morpho_language language, unsigned version) : language(language), version(version) {}
0 0 czech_morpho(morpho_language language, unsigned version) : language(language), version(version) {}
0 0 czech_morpho(morpho_language language, unsigned version) : language(language), version(version) {}
4843 0 0 if (!compressor::load(is, data)) return false;
0 0 if (!compressor::load(is, data)) return false;
4847 0 0 unsigned tag_length = data.next_1B();
4848 0 0 if (tag_length < unknown_tag.size()) unknown_tag.erase(tag_length);
0 0 if (tag_length < unknown_tag.size()) unknown_tag.erase(tag_length);
4849 0 0 if (tag_length < number_tag.size()) number_tag.erase(tag_length);
0 0 if (tag_length < number_tag.size()) number_tag.erase(tag_length);
4850 0 0 if (tag_length < punctuation_tag.size()) punctuation_tag.erase(tag_length);
0 0 if (tag_length < punctuation_tag.size()) punctuation_tag.erase(tag_length);
4853 0 0 dictionary.load(data);
4857 0 0 if (data.next_1B()) {
0 0 if (data.next_1B()) {
4858 0 0 prefix_guesser.reset(new morpho_prefix_guesser(dictionary));
4859 0 0 prefix_guesser->load(data);
4864 0 0 if (data.next_1B()) {
0 0 if (data.next_1B()) {
4865 0 0 statistical_guesser.reset(new morpho_statistical_guesser());
4866 0 0 statistical_guesser->load(data);
4867 0 0 }
4878 0 0 if (form.len) {
4882 0 0 generate_casing_variants(form, form_uclc, form_lc);
4885 0 0 dictionary.analyze(form, lemmas);
4886 0 0 if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas);
0 0 if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas);
4887 0 0 if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas);
0 0 if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas);
4888 0 0 if (!lemmas.empty()) return NO_GUESSER;
4891 0 0 analyze_special(form, lemmas);
4892 0 0 if (!lemmas.empty()) return NO_GUESSER;
4895 0 0 if (guesser == GUESSER && prefix_guesser)
0 0 if (guesser == GUESSER && prefix_guesser)
0 0 if (guesser == GUESSER && prefix_guesser)
4896 0 0 prefix_guesser->analyze(form_lc.empty() ? form : form_lc, lemmas);
0 0 prefix_guesser->analyze(form_lc.empty() ? form : form_lc, lemmas);
4900 0 0 if (guesser == GUESSER && statistical_guesser) {
0 0 if (guesser == GUESSER && statistical_guesser) {
0 0 if (guesser == GUESSER && statistical_guesser) {
4901 0 0 if (form_uclc.empty() && form_lc.empty())
0 0 if (form_uclc.empty() && form_lc.empty())
0 0 if (form_uclc.empty() && form_lc.empty())
4902 0 0 statistical_guesser->analyze(form, lemmas, nullptr);
4904 0 0 morpho_statistical_guesser::used_rules used_rules; used_rules.reserve(3);
4905 0 0 statistical_guesser->analyze(form, lemmas, &used_rules);
4906 0 0 if (!form_uclc.empty()) statistical_guesser->analyze(form_uclc, lemmas, &used_rules);
0 0 if (!form_uclc.empty()) statistical_guesser->analyze(form_uclc, lemmas, &used_rules);
4907 0 0 if (!form_lc.empty()) statistical_guesser->analyze(form_lc, lemmas, &used_rules);
0 0 if (!form_lc.empty()) statistical_guesser->analyze(form_lc, lemmas, &used_rules);
4913 0 0 if (prefix_guesser_guesses) {
4916 0 0 return lemma_compare < 0 || (lemma_compare == 0 && a.tag < b.tag);
4919 0 0 return a.lemma == b.lemma && a.tag == b.tag;
0 0 return a.lemma == b.lemma && a.tag == b.tag;
4921 0 0 if (lemmas_end != lemmas.end()) lemmas.erase(lemmas_end, lemmas.end());
4924 0 0 if (!lemmas.empty()) return GUESSER;
4927 0 0 lemmas.emplace_back(string(form.str, form.len), unknown_tag);
4936 0 0 if (lemma.len) {
4937 0 0 if (dictionary.generate(lemma, filter, forms))
0 0 if (dictionary.generate(lemma, filter, forms))
4940 0 0 if (guesser == GUESSER && prefix_guesser)
0 0 if (guesser == GUESSER && prefix_guesser)
4961 0 0 return new czech_tokenizer(language, version, this);
4992 0 0 if (!form.len) return;
5000 0 0 if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(form.str, form.len);
5001 0 0 while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(form.str, form.len);
5002 0 0 if ((codepoint == '.' && form.len) || codepoint == ',') codepoint = utf8::decode(form.str, form.len);
0 0 if ((codepoint == '.' && form.len) || codepoint == ',') codepoint = utf8::decode(form.str, form.len);
0 0 if ((codepoint == '.' && form.len) || codepoint == ',') codepoint = utf8::decode(form.str, form.len);
5003 0 0 while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(form.str, form.len);
5004 0 0 if (any_digit && (codepoint == 'e' || codepoint == 'E')) {
0 0 if (any_digit && (codepoint == 'e' || codepoint == 'E')) {
5006 0 0 if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(form.str, form.len);
5008 0 0 while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(form.str, form.len);
5011 0 0 if (any_digit && !form.len && (!codepoint || codepoint == '.')) {
0 0 if (any_digit && !form.len && (!codepoint || codepoint == '.')) {
0 0 if (any_digit && !form.len && (!codepoint || codepoint == '.')) {
5012 0 0 lemmas.emplace_back(string(form_ori.str, form_ori.len), number_tag);
5013 0 0 } else if ((first < sizeof(punctuation_additional) && punctuation_additional[first]) ||
0 0 } else if ((first < sizeof(punctuation_additional) && punctuation_additional[first]) ||
0 0 } else if ((first < sizeof(punctuation_additional) && punctuation_additional[first]) ||
0 0 } else if ((first < sizeof(punctuation_additional) && punctuation_additional[first]) ||
5014 0 0 ((unicode::category(first) & unicode::P) && (first >= sizeof(punctuation_exceptions) || !punctuation_exceptions[first])))
0 0 ((unicode::category(first) & unicode::P) && (first >= sizeof(punctuation_exceptions) || !punctuation_exceptions[first])))
5015 0 0 lemmas.emplace_back(string(form_ori.str, form_ori.len), punctuation_tag);
5053 0 0 for (unsigned len = 1; len < lemma.len; len++) {
5054 0 0 if (len + 1 == lemma.len && (lemma.str[len] == '^' || lemma.str[len] == '+'))
0 0 if (len + 1 == lemma.len && (lemma.str[len] == '^' || lemma.str[len] == '+'))
5056 0 0 if (len + 1 < lemma.len && lemma.str[len] == '^') {
0 0 if (len + 1 < lemma.len && lemma.str[len] == '^') {
5058 0 0 for (unsigned i = len + 1; ok && i < lemma.len; i++)
0 0 for (unsigned i = len + 1; ok && i < lemma.len; i++)
5059 0 0 ok &= (lemma.str[i] >= 'A' && lemma.str[i] <= 'Z') ||
5060 0 0 (lemma.str[i] >= 'a' && lemma.str[i] <= 'z') ||
0 0 (lemma.str[i] >= 'a' && lemma.str[i] <= 'z') ||
5061 0 0 (i > len + 1 && lemma.str[i] == '-');
5062 0 0 if (ok) return len;
5085 0 0 for (size_t i = len; i < lemma.len; i++)
5092 0 0 if (data.empty()) return true;
5093 0 0 if (data.size() == 1 && data[0] == '^') return other_addinfo_len > 0 && other_addinfo[0] == '^';
0 0 if (data.size() == 1 && data[0] == '^') return other_addinfo_len > 0 && other_addinfo[0] == '^';
0 0 if (data.size() == 1 && data[0] == '^') return other_addinfo_len > 0 && other_addinfo[0] == '^';
0 0 if (data.size() == 1 && data[0] == '^') return other_addinfo_len > 0 && other_addinfo[0] == '^';
0 0 if (data.size() == 1 && data[0] == '^') return other_addinfo_len > 0 && other_addinfo[0] == '^';
5094 0 0 if (data.size() == 1 && data[0] == '+') return other_addinfo_len == 0;
0 0 if (data.size() == 1 && data[0] == '+') return other_addinfo_len == 0;
0 0 if (data.size() == 1 && data[0] == '+') return other_addinfo_len == 0;
5095 0 0 return data.size() == size_t(other_addinfo_len) && small_memeq(data.data(), other_addinfo, other_addinfo_len);
0 0 return data.size() == size_t(other_addinfo_len) && small_memeq(data.data(), other_addinfo, other_addinfo_len);
5115 0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
0 0 class english_morpho_guesser {
5163 0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
0 0 english_morpho(unsigned version) : version(version) {}
5237 0 0 if (!compressor::load(is, data)) return false;
0 0 if (!compressor::load(is, data)) return false;
5240 0 0 dictionary.load(data);
5241 0 0 morpho_guesser.load(data);
0 0 morpho_guesser.load(data);
5252 0 0 if (form.len) {
5256 0 0 generate_casing_variants(form, form_uclc, form_lc);
5259 0 0 dictionary.analyze(form, lemmas);
5260 0 0 if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas);
0 0 if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas);
5261 0 0 if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas);
0 0 if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas);
5262 0 0 if (!lemmas.empty())
5263 0 0 return guesser == NO_GUESSER || !morpho_guesser.analyze_proper_names(form, form_lc.empty() ? form : form_lc, lemmas) ? NO_GUESSER : GUESSER;
0 0 return guesser == NO_GUESSER || !morpho_guesser.analyze_proper_names(form, form_lc.empty() ? form : form_lc, lemmas) ? NO_GUESSER : GUESSER;
0 0 return guesser == NO_GUESSER || !morpho_guesser.analyze_proper_names(form, form_lc.empty() ? form : form_lc, lemmas) ? NO_GUESSER : GUESSER;
0 0 return guesser == NO_GUESSER || !morpho_guesser.analyze_proper_names(form, form_lc.empty() ? form : form_lc, lemmas) ? NO_GUESSER : GUESSER;
5266 0 0 analyze_special(form, lemmas);
5267 0 0 if (!lemmas.empty()) return NO_GUESSER;
5270 0 0 if (guesser == GUESSER)
5271 0 0 morpho_guesser.analyze(form, form_lc.empty() ? form : form_lc, lemmas);
0 0 morpho_guesser.analyze(form, form_lc.empty() ? form : form_lc, lemmas);
5272 0 0 if (!lemmas.empty()) return GUESSER;
5275 0 0 lemmas.emplace_back(string(form.str, form.len), unknown_tag);
5284 0 0 if (lemma.len) {
5285 0 0 if (dictionary.generate(lemma, filter, forms))
0 0 if (dictionary.generate(lemma, filter, forms))
5305 0 0 return new english_tokenizer(version <= 2 ? 1 : 2);
5312 0 0 if (!form.len) return;
5315 0 0 if (form.len == 1)
5319 0 0 case '?': lemmas.emplace_back(string(form.str, form.len), dot_tag); return;
5320 0 0 case ',': lemmas.emplace_back(string(form.str, form.len), comma_tag); return;
5321 0 0 case '#': lemmas.emplace_back(string(form.str, form.len), hash_tag); return;
5322 0 0 case '$': lemmas.emplace_back(string(form.str, form.len), dollar_tag); return;
5323 0 0 case '[': lemmas.emplace_back(string(form.str, form.len), sym_tag); return;
5324 0 0 case ']': lemmas.emplace_back(string(form.str, form.len), sym_tag); return;
5325 0 0 case '%': lemmas.emplace_back(string(form.str, form.len), jj_tag);
5326 0 0 lemmas.emplace_back(string(form.str, form.len), nn_tag); return;
5327 0 0 case '&': lemmas.emplace_back(string(form.str, form.len), cc_tag);
5328 0 0 lemmas.emplace_back(string(form.str, form.len), sym_tag); return;
5329 0 0 case '*': lemmas.emplace_back(string(form.str, form.len), sym_tag);
5330 0 0 lemmas.emplace_back(string(form.str, form.len), nn_tag); return;
5331 0 0 case '@': lemmas.emplace_back(string(form.str, form.len), sym_tag);
5332 0 0 lemmas.emplace_back(string(form.str, form.len), in_tag); return;
5333 0 0 case '\'': lemmas.emplace_back(string(form.str, form.len), close_quotation_tag);
5334 0 0 lemmas.emplace_back(string(form.str, form.len), pos_tag); return;
5341 0 0 if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len);
5342 0 0 while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len);
5343 0 0 while (codepoint == ',') {
5345 0 0 if (unicode::category(utf8::decode(group.str, group.len) & ~unicode::N)) break;
5346 0 0 if (unicode::category(utf8::decode(group.str, group.len) & ~unicode::N)) break;
5347 0 0 if (unicode::category(utf8::decode(group.str, group.len) & ~unicode::N)) break;
5352 0 0 if (codepoint == '.' && number.len) {
0 0 if (codepoint == '.' && number.len) {
5354 0 0 while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len);
5356 0 0 if (version >= 2 && any_digit && codepoint == 's' && !number.len) {
0 0 if (version >= 2 && any_digit && codepoint == 's' && !number.len) {
0 0 if (version >= 2 && any_digit && codepoint == 's' && !number.len) {
5357 0 0 lemmas.emplace_back(string(form.str, form.len), number_tag);
5358 0 0 lemmas.emplace_back(string(form.str, form.len - 1), nns_tag);
5361 0 0 if (any_digit && (codepoint == 'e' || codepoint == 'E')) {
0 0 if (any_digit && (codepoint == 'e' || codepoint == 'E')) {
5363 0 0 if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len);
5365 0 0 while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len);
5367 0 0 if (any_digit && !number.len && (!codepoint || codepoint == '.')) {
0 0 if (any_digit && !number.len && (!codepoint || codepoint == '.')) {
0 0 if (any_digit && !number.len && (!codepoint || codepoint == '.')) {
5368 0 0 lemmas.emplace_back(string(form.str, form.len), number_tag);
5369 0 0 lemmas.emplace_back(string(form.str, form.len), nnp_tag);
5370 0 0 if (form.len == 1 + (codepoint == '.') && *form.str >= '1' && *form.str <= '9')
0 0 if (form.len == 1 + (codepoint == '.') && *form.str >= '1' && *form.str <= '9')
0 0 if (form.len == 1 + (codepoint == '.') && *form.str >= '1' && *form.str <= '9')
0 0 if (form.len == 1 + (codepoint == '.') && *form.str >= '1' && *form.str <= '9')
5371 0 0 lemmas.emplace_back(string(form.str, form.len), ls_tag);
5378 0 0 while ((symbol || any_punctuation) && punctuation.len) {
0 0 while ((symbol || any_punctuation) && punctuation.len) {
5380 0 0 if (open_quotation) open_quotation = codepoint == '`' || unicode::category(codepoint) & unicode::Pi;
0 0 if (open_quotation) open_quotation = codepoint == '`' || unicode::category(codepoint) & unicode::Pi;
0 0 if (open_quotation) open_quotation = codepoint == '`' || unicode::category(codepoint) & unicode::Pi;
5381 0 0 if (close_quotation) close_quotation = codepoint == '\'' || codepoint == '"' || unicode::category(codepoint) & unicode::Pf;
0 0 if (close_quotation) close_quotation = codepoint == '\'' || codepoint == '"' || unicode::category(codepoint) & unicode::Pf;
0 0 if (close_quotation) close_quotation = codepoint == '\'' || codepoint == '"' || unicode::category(codepoint) & unicode::Pf;
5382 0 0 if (open_parenthesis) open_parenthesis = unicode::category(codepoint) & unicode::Ps;
5383 0 0 if (close_parenthesis) close_parenthesis = unicode::category(codepoint) & unicode::Pe;
5384 0 0 if (any_punctuation) any_punctuation = unicode::category(codepoint) & unicode::P;
5385 0 0 if (symbol) symbol = codepoint == '*' || unicode::category(codepoint) & unicode::S;
0 0 if (symbol) symbol = codepoint == '*' || unicode::category(codepoint) & unicode::S;
0 0 if (symbol) symbol = codepoint == '*' || unicode::category(codepoint) & unicode::S;
5387 0 0 if (!punctuation.len && open_quotation) { lemmas.emplace_back(string(form.str, form.len), open_quotation_tag); return; }
0 0 if (!punctuation.len && open_quotation) { lemmas.emplace_back(string(form.str, form.len), open_quotation_tag); return; }
0 0 if (!punctuation.len && open_quotation) { lemmas.emplace_back(string(form.str, form.len), open_quotation_tag); return; }
5388 0 0 if (!punctuation.len && close_quotation) { lemmas.emplace_back(string(form.str, form.len), close_quotation_tag); return; }
0 0 if (!punctuation.len && close_quotation) { lemmas.emplace_back(string(form.str, form.len), close_quotation_tag); return; }
0 0 if (!punctuation.len && close_quotation) { lemmas.emplace_back(string(form.str, form.len), close_quotation_tag); return; }
5389 0 0 if (!punctuation.len && open_parenthesis) { lemmas.emplace_back(string(form.str, form.len), open_parenthesis_tag); return; }
0 0 if (!punctuation.len && open_parenthesis) { lemmas.emplace_back(string(form.str, form.len), open_parenthesis_tag); return; }
0 0 if (!punctuation.len && open_parenthesis) { lemmas.emplace_back(string(form.str, form.len), open_parenthesis_tag); return; }
5390 0 0 if (!punctuation.len && close_parenthesis) { lemmas.emplace_back(string(form.str, form.len), close_parenthesis_tag); return; }
0 0 if (!punctuation.len && close_parenthesis) { lemmas.emplace_back(string(form.str, form.len), close_parenthesis_tag); return; }
0 0 if (!punctuation.len && close_parenthesis) { lemmas.emplace_back(string(form.str, form.len), close_parenthesis_tag); return; }
5391 0 0 if (!punctuation.len && symbol) { lemmas.emplace_back(string(form.str, form.len), sym_tag); return; }
0 0 if (!punctuation.len && symbol) { lemmas.emplace_back(string(form.str, form.len), sym_tag); return; }
0 0 if (!punctuation.len && symbol) { lemmas.emplace_back(string(form.str, form.len), sym_tag); return; }
5392 0 0 if (!punctuation.len && any_punctuation) { lemmas.emplace_back(string(form.str, form.len), punctuation_tag); return; }
0 0 if (!punctuation.len && any_punctuation) { lemmas.emplace_back(string(form.str, form.len), punctuation_tag); return; }
0 0 if (!punctuation.len && any_punctuation) { lemmas.emplace_back(string(form.str, form.len), punctuation_tag); return; }
5426 0 0 while (tags--) {
5428 0 0 exceptions_tags.emplace_back(string(data.next(len), len));
5564 0 0 for (unsigned len = data.next_1B(); len; len--) {
5570 0 0 if (exception) {
5573 0 0 for (unsigned len = data.next_1B(); len; len--) {
5576 0 0 for (unsigned tags = data.next_1B(); tags; tags--)
5577 0 0 lemmas.emplace_back(lemma, exceptions_tags[data.next_2B()]);
5584 0 0 for (unsigned prefix = 1; prefix <= form_lc.len; prefix++) {
5586 0 0 if (!found) break;
5587 0 0 if (found[NEGATION_LEN]) {
5588 0 0 if (form_lc.len - prefix >= found[TO_FOLLOW]) negation_len = found[NEGATION_LEN];
5594 0 0 add(JJ, lemma_lc, negation_len, lemmas);
5595 0 0 add(RB, lemma_lc, negation_len, lemmas);
5596 0 0 add(NN, lemma_lc, negation_len, lemmas);
5597 0 0 add_NNS(lemma_lc, negation_len, lemmas);
5614 0 0 if ( p == ( (form_lc.str + form_lc.len)) )
5621 0 0 if ( _klen > 0 ) {
5626 0 0 if ( _upper < _lower )
5630 0 0 if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) < *_mid )
5632 0 0 else if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) > *_mid )
5644 0 0 if ( _klen > 0 ) {
5649 0 0 if ( _upper < _lower )
5653 0 0 if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) < _mid[0] )
5655 0 0 else if ( ( form_lc.str[form_lc.len - 1 - (p - form_lc.str)]) > _mid[1] )
5669 0 0 if ( _tag_guesser_trans_actions[_trans] == 0 )
5674 0 0 while ( _nacts-- > 0 )
5679 0 0 { if (!added_JJR_RBR) added_JJR_RBR = true, add_JJR_RBR(lemma_lc, negation_len, lemmas); }
0 0 { if (!added_JJR_RBR) added_JJR_RBR = true, add_JJR_RBR(lemma_lc, negation_len, lemmas); }
5682 0 0 { if (!added_JJS_RBS) added_JJS_RBS = true, add_JJS_RBS(lemma_lc, negation_len, lemmas); }
0 0 { if (!added_JJS_RBS) added_JJS_RBS = true, add_JJS_RBS(lemma_lc, negation_len, lemmas); }
5685 0 0 { add_VBG(lemma_lc, lemmas); }
5688 0 0 { add_VBD_VBN(lemma_lc, lemmas); }
5691 0 0 { add_VBZ(lemma_lc, lemmas); }
5697 0 0 { if (!added_SYM) added_SYM = true, add(SYM, lemma_lc, lemmas); }
5700 0 0 { if (!added_CD) added_CD = true, add(CD, lemma_lc, lemmas); }
5706 0 0 if ( ++p != ( (form_lc.str + form_lc.len)) )
5709 0 0 if ( p == ( (form_lc.str + form_lc.len)) )
5713 0 0 while ( __nacts-- > 0 ) {
5714 0 0 switch ( *__acts++ ) {
5716 0 0 { if (!added_CD) added_CD = true, add(CD, lemma_lc, lemmas); }
5732 0 0 bool is_NNP = form.str != form_lc.str || (form.len && (*form.str == '\'' || (*form.str >= '0' && *form.str <= '9')));
0 0 bool is_NNP = form.str != form_lc.str || (form.len && (*form.str == '\'' || (*form.str >= '0' && *form.str <= '9')));
0 0 bool is_NNP = form.str != form_lc.str || (form.len && (*form.str == '\'' || (*form.str >= '0' && *form.str <= '9')));
0 0 bool is_NNP = form.str != form_lc.str || (form.len && (*form.str == '\'' || (*form.str >= '0' && *form.str <= '9')));
5734 0 0 if (!is_NNP && !is_NNPS) return false;
5737 0 0 for (auto&& lemma : lemmas) {
5741 0 0 if (!((is_NNP && !was_NNP) || (is_NNPS && !was_NNPS))) return false;
0 0 if (!((is_NNP && !was_NNP) || (is_NNPS && !was_NNPS))) return false;
5744 0 0 if (is_NNP && !was_NNP) add(NNP, lemma, lemmas);
5745 0 0 if (is_NNPS && !was_NNPS) add_NNPS(lemma, lemmas);
0 0 if (is_NNPS && !was_NNPS) add_NNPS(lemma, lemmas);
5750 0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
0 0 lemmas.emplace_back(form, tag);
5759 0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
0 0 lemmas.emplace_back(negation_len ? form.substr(negation_len) + "^" + form.substr(0, negation_len) : form, tag);
5861 0 0 if ( p == ( (form.c_str() + form.size())) )
5870 0 0 if ( _klen > 0 ) {
5875 0 0 if ( _upper < _lower )
5879 0 0 if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < *_mid )
5881 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > *_mid )
5893 0 0 if ( _klen > 0 ) {
5898 0 0 if ( _upper < _lower )
5902 0 0 if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < _mid[0] )
5904 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > _mid[1] )
5918 0 0 if ( _NNS_trans_actions[_trans] == 0 )
5923 0 0 while ( _nacts-- > 0 )
5928 0 0 { if (best > 'a') best = 'a', remove = 2, append = "an"; }
5931 0 0 { if (best > 'b') best = 'b', remove = 1, append = nullptr; }
5934 0 0 { if (best > 'c') best = 'c', remove = 3, append = "fe"; }
5937 0 0 { if (best > 'd') best = 'd', remove = 2, append = nullptr; }
5940 0 0 { if (best > 'e') best = 'e', remove = 1, append = nullptr; }
5943 0 0 { if (best > 'f') best = 'f', remove = 2, append = nullptr; }
5946 0 0 { if (best > 'g') best = 'g', remove = 1, append = nullptr; }
5949 0 0 { if (best > 'h') best = 'h', remove = 2, append = nullptr; }
5952 0 0 { if (best > 'i') best = 'i', remove = 1, append = nullptr; }
5955 0 0 { if (best > 'j') best = 'j', remove = 1, append = nullptr; }
5958 0 0 { if (best > 'k') best = 'k', remove = 2, append = nullptr; }
5961 0 0 { if (best > 'l') best = 'l', remove = 3, append = "y"; }
5964 0 0 { if (best > 'm') best = 'm', remove = 2, append = nullptr; }
5967 0 0 { if (best > 'n') best = 'n', remove = 1, append = nullptr; }
5973 0 0 if ( cs == 0 )
5975 0 0 if ( ++p != ( (form.c_str() + form.size())) )
5981 0 0 add(NNS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas);
0 0 add(NNS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas);
0 0 add(NNS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas);
6107 0 0 if ( p == ( (form.c_str() + form.size())) )
6116 0 0 if ( _klen > 0 ) {
6121 0 0 if ( _upper < _lower )
6125 0 0 if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid )
6127 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid )
6139 0 0 if ( _klen > 0 ) {
6144 0 0 if ( _upper < _lower )
6148 0 0 if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] )
6150 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] )
6164 0 0 if ( _NNPS_trans_actions[_trans] == 0 )
6169 0 0 while ( _nacts-- > 0 )
6174 0 0 { if (best > 'a') best = 'a', remove = 2, append = "AN"; }
6177 0 0 { if (best > 'b') best = 'b', remove = 2, append = "an"; }
6180 0 0 { if (best > 'c') best = 'c', remove = 1, append = nullptr; }
6183 0 0 { if (best > 'd') best = 'd', remove = 3, append = "FE"; }
6186 0 0 { if (best > 'e') best = 'e', remove = 3, append = "fe"; }
6189 0 0 { if (best > 'f') best = 'f', remove = 2, append = nullptr; }
6192 0 0 { if (best > 'g') best = 'g', remove = 1, append = nullptr; }
6195 0 0 { if (best > 'h') best = 'h', remove = 2, append = nullptr; }
6198 0 0 { if (best > 'i') best = 'i', remove = 1, append = nullptr; }
6201 0 0 { if (best > 'j') best = 'j', remove = 2, append = nullptr; }
6204 0 0 { if (best > 'k') best = 'k', remove = 1, append = nullptr; }
6207 0 0 { if (best > 'l') best = 'l', remove = 1, append = nullptr; }
6210 0 0 { if (best > 'm') best = 'm', remove = 2, append = nullptr; }
6213 0 0 { if (best > 'n') best = 'n', remove = 3, append = "Y"; }
6216 0 0 { if (best > 'o') best = 'o', remove = 3, append = "y"; }
6219 0 0 { if (best > 'p') best = 'p', remove = 2, append = nullptr; }
6222 0 0 { if (best > 'q') best = 'q', remove = 1, append = nullptr; }
6228 0 0 if ( cs == 0 )
6230 0 0 if ( ++p != ( (form.c_str() + form.size())) )
6236 0 0 add(NNPS, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas);
0 0 add(NNPS, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas);
6536 0 0 if ( p == ( (form.c_str() + form.size())) )
6545 0 0 if ( _klen > 0 ) {
6550 0 0 if ( _upper < _lower )
6554 0 0 if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid )
6556 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid )
6568 0 0 if ( _klen > 0 ) {
6573 0 0 if ( _upper < _lower )
6577 0 0 if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] )
6579 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] )
6593 0 0 if ( _VBG_trans_actions[_trans] == 0 )
6598 0 0 while ( _nacts-- > 0 )
6603 0 0 { if (best > 'a') best = 'a', remove = 3, append = nullptr; }
6606 0 0 { if (best > 'b') best = 'b', remove = 3, append = "e"; }
6609 0 0 { if (best > 'c') best = 'c', remove = 3, append = nullptr; }
6612 0 0 { if (best > 'd') best = 'd', remove = 3, append = "e"; }
6615 0 0 { if (best > 'e') best = 'e', remove = 3, append = nullptr; }
6618 0 0 { if (best > 'f') best = 'f', remove = 3, append = "e"; }
6621 0 0 { if (best > 'g') best = 'g', remove = 3, append = nullptr; }
6624 0 0 { if (best > 'h') best = 'h', remove = 3, append = "e"; }
6627 0 0 { if (best > 'i') best = 'i', remove = 3, append = nullptr; }
6630 0 0 { if (best > 'j') best = 'j', remove = 3, append = "e"; }
6633 0 0 { if (best > 'k') best = 'k', remove = 3, append = nullptr; }
6636 0 0 { if (best > 'l') best = 'l', remove = 3, append = "e"; }
6639 0 0 { if (best > 'm') best = 'm', remove = 3, append = nullptr; }
6642 0 0 { if (best > 'n') best = 'n', remove = 3, append = "e"; }
6645 0 0 { if (best > 'o') best = 'o', remove = 3, append = nullptr; }
6648 0 0 { if (best > 'p') best = 'p', remove = 3, append = "e"; }
6651 0 0 { if (best > 'q') best = 'q', remove = 3, append = nullptr; }
6654 0 0 { if (best > 'r') best = 'r', remove = 3, append = "e"; }
6660 0 0 if ( cs == 0 )
6662 0 0 if ( ++p != ( (form.c_str() + form.size())) )
6665 0 0 if ( p == ( (form.c_str() + form.size())) )
6669 0 0 while ( __nacts-- > 0 ) {
6672 0 0 { if (best > 'c') best = 'c', remove = 3, append = nullptr; }
6675 0 0 { if (best > 'f') best = 'f', remove = 3, append = "e"; }
6678 0 0 { if (best > 'p') best = 'p', remove = 3, append = "e"; }
6687 0 0 add(VBG, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas);
0 0 add(VBG, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas);
6990 0 0 if ( p == ( (form.c_str() + form.size())) )
6999 0 0 if ( _klen > 0 ) {
7004 0 0 if ( _upper < _lower )
7008 0 0 if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid )
7010 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid )
7022 0 0 if ( _klen > 0 ) {
7027 0 0 if ( _upper < _lower )
7031 0 0 if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] )
7033 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] )
7047 0 0 if ( _VBD_VBN_trans_actions[_trans] == 0 )
7052 0 0 while ( _nacts-- > 0 )
7057 0 0 { if (best > 'a') best = 'a', remove = 1, append = nullptr; }
7060 0 0 { if (best > 'b') best = 'b', remove = 2, append = nullptr; }
7063 0 0 { if (best > 'c') best = 'c', remove = 1, append = nullptr; }
7066 0 0 { if (best > 'd') best = 'd', remove = 2, append = nullptr; }
7069 0 0 { if (best > 'e') best = 'e', remove = 1, append = nullptr; }
7072 0 0 { if (best > 'f') best = 'f', remove = 2, append = nullptr; }
7075 0 0 { if (best > 'h') best = 'h', remove = 2, append = nullptr; }
7078 0 0 { if (best > 'i') best = 'i', remove = 3, append = "y"; }
7081 0 0 { if (best > 'j') best = 'j', remove = 1, append = nullptr; }
7084 0 0 { if (best > 'k') best = 'k', remove = 2, append = nullptr; }
7087 0 0 { if (best > 'l') best = 'l', remove = 1, append = nullptr; }
7090 0 0 { if (best > 'm') best = 'm', remove = 2, append = nullptr; }
7093 0 0 { if (best > 'n') best = 'n', remove = 1, append = nullptr; }
7096 0 0 { if (best > 'o') best = 'o', remove = 2, append = nullptr; }
7099 0 0 { if (best > 'p') best = 'p', remove = 1, append = nullptr; }
7102 0 0 { if (best > 'q') best = 'q', remove = 2, append = nullptr; }
7105 0 0 { if (best > 'r') best = 'r', remove = 1, append = nullptr; }
7111 0 0 if ( cs == 0 )
7113 0 0 if ( ++p != ( (form.c_str() + form.size())) )
7116 0 0 if ( p == ( (form.c_str() + form.size())) )
7120 0 0 while ( __nacts-- > 0 ) {
7123 0 0 { if (best > 'd') best = 'd', remove = 2, append = nullptr; }
7126 0 0 { if (best > 'g') best = 'g', remove = 1, append = nullptr; }
7129 0 0 { if (best > 'j') best = 'j', remove = 1, append = nullptr; }
7138 0 0 add(VBD, VBN, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas);
0 0 add(VBD, VBN, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas);
7217 0 0 if ( p == ( (form.c_str() + form.size())) )
7226 0 0 if ( _klen > 0 ) {
7231 0 0 if ( _upper < _lower )
7235 0 0 if ( ( form[form.size() - 1 - (p - form.c_str())]) < *_mid )
7237 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str())]) > *_mid )
7249 0 0 if ( _klen > 0 ) {
7254 0 0 if ( _upper < _lower )
7258 0 0 if ( ( form[form.size() - 1 - (p - form.c_str())]) < _mid[0] )
7260 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str())]) > _mid[1] )
7274 0 0 if ( _VBZ_trans_actions[_trans] == 0 )
7279 0 0 while ( _nacts-- > 0 )
7284 0 0 { if (best > 'a') best = 'a', remove = 1, append = nullptr; }
7287 0 0 { if (best > 'b') best = 'b', remove = 2, append = nullptr; }
7290 0 0 { if (best > 'c') best = 'c', remove = 1, append = nullptr; }
7293 0 0 { if (best > 'd') best = 'd', remove = 2, append = nullptr; }
7296 0 0 { if (best > 'e') best = 'e', remove = 1, append = nullptr; }
7299 0 0 { if (best > 'f') best = 'f', remove = 2, append = nullptr; }
7302 0 0 { if (best > 'g') best = 'g', remove = 3, append = "y"; }
7305 0 0 { if (best > 'h') best = 'h', remove = 2, append = nullptr; }
7308 0 0 { if (best > 'i') best = 'i', remove = 1, append = nullptr; }
7314 0 0 if ( cs == 0 )
7316 0 0 if ( ++p != ( (form.c_str() + form.size())) )
7322 0 0 add(VBZ, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas);
0 0 add(VBZ, form.substr(0, form.size() - remove).append(append ? append : ""), lemmas);
7448 0 0 if ( p == ( (form.c_str() + form.size())) )
7457 0 0 if ( _klen > 0 ) {
7462 0 0 if ( _upper < _lower )
7466 0 0 if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < *_mid )
7468 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > *_mid )
7480 0 0 if ( _klen > 0 ) {
7485 0 0 if ( _upper < _lower )
7489 0 0 if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < _mid[0] )
7491 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > _mid[1] )
7505 0 0 if ( _JJR_RBR_trans_actions[_trans] == 0 )
7510 0 0 while ( _nacts-- > 0 )
7515 0 0 { if (best > 'a') best = 'a', remove = 2, append = nullptr; }
7518 0 0 { if (best > 'b') best = 'b', remove = 3, append = nullptr; }
7521 0 0 { if (best > 'c') best = 'c', remove = 3, append = "y"; }
7524 0 0 { if (best > 'd') best = 'd', remove = 2, append = nullptr; }
7527 0 0 { if (best > 'e') best = 'e', remove = 1, append = nullptr; }
7530 0 0 { if (best > 'f') best = 'f', remove = 2, append = nullptr; }
7536 0 0 if ( cs == 0 )
7538 0 0 if ( ++p != ( (form.c_str() + form.size())) )
7544 0 0 add(JJR, RBR, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas);
0 0 add(JJR, RBR, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas);
0 0 add(JJR, RBR, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas);
7674 0 0 if ( p == ( (form.c_str() + form.size())) )
7683 0 0 if ( _klen > 0 ) {
7688 0 0 if ( _upper < _lower )
7692 0 0 if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < *_mid )
7694 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > *_mid )
7706 0 0 if ( _klen > 0 ) {
7711 0 0 if ( _upper < _lower )
7715 0 0 if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) < _mid[0] )
7717 0 0 else if ( ( form[form.size() - 1 - (p - form.c_str() - negation_len)]) > _mid[1] )
7731 0 0 if ( _JJS_RBS_trans_actions[_trans] == 0 )
7736 0 0 while ( _nacts-- > 0 )
7741 0 0 { if (best > 'a') best = 'a', remove = 3, append = nullptr; }
7744 0 0 { if (best > 'b') best = 'b', remove = 4, append = nullptr; }
7747 0 0 { if (best > 'c') best = 'c', remove = 4, append = "y"; }
7750 0 0 { if (best > 'd') best = 'd', remove = 3, append = nullptr; }
7753 0 0 { if (best > 'e') best = 'e', remove = 2, append = nullptr; }
7756 0 0 { if (best > 'f') best = 'f', remove = 3, append = nullptr; }
7762 0 0 if ( cs == 0 )
7764 0 0 if ( ++p != ( (form.c_str() + form.size())) )
7770 0 0 add(JJS, RBS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas);
0 0 add(JJS, RBS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas);
0 0 add(JJS, RBS, form.substr(0, form.size() - remove).append(append ? append : ""), negation_len, lemmas);
7853 0 0 if (!compressor::load(is, data)) return false;
0 0 if (!compressor::load(is, data)) return false;
7857 0 0 unsigned length = data.next_1B();
7858 0 0 unknown_tag.assign(data.next(length), length);
0 0 unknown_tag.assign(data.next(length), length);
7869 0 0 if (form.len) {
7872 0 0 while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++;
0 0 while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++;
7873 0 0 if (lemmatags.len) lemmatags.len--, lemmatags.str++;
7876 0 0 while (lemmatags.len) {
7878 0 0 while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++;
0 0 while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++;
7879 0 0 if (!lemmatags.len) break;
7884 0 0 while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++;
0 0 while (lemmatags.len && *lemmatags.str != ' ') lemmatags.len--, lemmatags.str++;
7886 0 0 if (lemmatags.len) lemmatags.len--, lemmatags.str++;
7888 0 0 lemmas.emplace_back(string(lemma_start, lemma_len), string(tag_start, tag_len));
7891 0 0 if (!lemmas.empty()) return NO_GUESSER;
7894 0 0 lemmas.emplace_back(string(form.str, form.len), unknown_tag);
7903 0 0 if (lemma.len) {
7906 0 0 while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++;
0 0 while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++;
7908 0 0 if (formtags.len) formtags.len--, formtags.str++;
7912 0 0 while (formtags.len) {
7914 0 0 while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++;
0 0 while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++;
7915 0 0 if (!formtags.len) break;
7920 0 0 while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++;
0 0 while (formtags.len && *formtags.str != ' ') formtags.len--, formtags.str++;
7922 0 0 if (formtags.len) formtags.len--, formtags.str++;
7926 0 0 if (filter.matches(tag.c_str())) {
7927 0 0 if (forms.empty()) forms.emplace_back(string(real_lemma.str, real_lemma.len));
0 0 if (forms.empty()) forms.emplace_back(string(real_lemma.str, real_lemma.len));
7928 0 0 forms.back().forms.emplace_back(string(form_start, form_len), tag);
7932 0 0 if (any_result) return NO_GUESSER;
7940 0 0 while (lemma_len < lemma.len && lemma.str[lemma_len] != ' ') lemma_len++;
0 0 while (lemma_len < lemma.len && lemma.str[lemma_len] != ' ') lemma_len++;
7946 0 0 while (lemma_len < lemma.len && lemma.str[lemma_len] != ' ') lemma_len++;
0 0 while (lemma_len < lemma.len && lemma.str[lemma_len] != ' ') lemma_len++;
7952 0 0 while (form_len < form.len && form.str[form_len] != ' ') form_len++;
0 0 while (form_len < form.len && form.str[form_len] != ' ') form_len++;
8073 1 0 if (!compressor::load(is, data)) return false;
1 0 if (!compressor::load(is, data)) return false;
8077 1 0 unsigned length = data.next_1B();
8078 1 0 unknown_tag.assign(data.next(length), length);
8079 1 0 length = data.next_1B();
8080 1 0 number_tag.assign(data.next(length), length);
8081 1 0 length = data.next_1B();
8082 1 0 punctuation_tag.assign(data.next(length), length);
8083 1 0 length = data.next_1B();
8084 1 0 symbol_tag.assign(data.next(length), length);
8087 1 0 dictionary.load(data);
8091 1 0 if (data.next_1B()) {
0 1 if (data.next_1B()) {
8092 0 0 statistical_guesser.reset(new morpho_statistical_guesser());
8093 0 0 statistical_guesser->load(data);
8094 0 0 }
8105 14 0 if (form.len) {
8109 14 0 generate_casing_variants(form, form_uclc, form_lc);
8112 14 0 dictionary.analyze(form, lemmas);
8113 0 14 if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas);
0 0 if (!form_uclc.empty()) dictionary.analyze(form_uclc, lemmas);
8114 4 10 if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas);
4 0 if (!form_lc.empty()) dictionary.analyze(form_lc, lemmas);
8115 4 10 if (!lemmas.empty()) return NO_GUESSER;
8118 4 0 analyze_special(form, lemmas);
8119 0 4 if (!lemmas.empty()) return NO_GUESSER;
8122 0 0 if (guesser == GUESSER && statistical_guesser) {
0 0 if (guesser == GUESSER && statistical_guesser) {
0 0 if (guesser == GUESSER && statistical_guesser) {
8123 0 0 if (form_uclc.empty() && form_lc.empty())
0 0 if (form_uclc.empty() && form_lc.empty())
0 0 if (form_uclc.empty() && form_lc.empty())
8124 0 0 statistical_guesser->analyze(form, lemmas, nullptr);
8126 0 0 morpho_statistical_guesser::used_rules used_rules; used_rules.reserve(3);
8127 0 0 statistical_guesser->analyze(form, lemmas, &used_rules);
8128 0 0 if (!form_uclc.empty()) statistical_guesser->analyze(form_uclc, lemmas, &used_rules);
0 0 if (!form_uclc.empty()) statistical_guesser->analyze(form_uclc, lemmas, &used_rules);
8129 0 0 if (!form_lc.empty()) statistical_guesser->analyze(form_lc, lemmas, &used_rules);
0 0 if (!form_lc.empty()) statistical_guesser->analyze(form_lc, lemmas, &used_rules);
8132 0 0 if (!lemmas.empty()) return GUESSER;
8135 0 0 lemmas.emplace_back(string(form.str, form.len), unknown_tag);
8144 0 0 if (lemma.len) {
8145 0 0 if (dictionary.generate(lemma, filter, forms))
0 0 if (dictionary.generate(lemma, filter, forms))
8175 4 0 if (!form.len) return;
8183 0 4 if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len);
8184 0 4 while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len);
8185 4 0 if ((codepoint == '.' && number.len) || codepoint == ',') codepoint = utf8::decode(number.str, number.len);
4 0 if ((codepoint == '.' && number.len) || codepoint == ',') codepoint = utf8::decode(number.str, number.len);
0 4 if ((codepoint == '.' && number.len) || codepoint == ',') codepoint = utf8::decode(number.str, number.len);
8186 0 4 while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len);
8187 0 4 if (any_digit && (codepoint == 'e' || codepoint == 'E')) {
0 0 if (any_digit && (codepoint == 'e' || codepoint == 'E')) {
8189 0 0 if (codepoint == '+' || codepoint == '-') codepoint = utf8::decode(number.str, number.len);
8191 0 0 while (unicode::category(codepoint) & unicode::N) any_digit = true, codepoint = utf8::decode(number.str, number.len);
8194 0 4 if (any_digit && !number.len && (!codepoint || codepoint == '.')) {
0 0 if (any_digit && !number.len && (!codepoint || codepoint == '.')) {
0 0 if (any_digit && !number.len && (!codepoint || codepoint == '.')) {
8195 0 0 lemmas.emplace_back(string(form.str, form.len), number_tag);
8202 4 4 while (form.len) {
8204 4 0 punctuation = punctuation && unicode::category(codepoint) & unicode::P;
0 4 punctuation = punctuation && unicode::category(codepoint) & unicode::P;
8205 0 4 symbol = symbol && unicode::category(codepoint) & unicode::S;
0 4 symbol = symbol && unicode::category(codepoint) & unicode::S;
8207 4 0 if (punctuation)
8208 4 0 lemmas.emplace_back(string(form_ori.str, form_ori.len), punctuation_tag);
8209 0 0 else if (symbol)
8210 0 0 lemmas.emplace_back(string(form_ori.str, form_ori.len), symbol_tag);
8274 0 0 return unique_ptr(new T(std::forward(args)...));
1 0 return unique_ptr(new T(std::forward(args)...));
0 0 return unique_ptr(new T(std::forward(args)...));
8300 0 0 if (res->load(is)) return res.release();
0 0 if (res->load(is)) return res.release();
8309 0 0 3);
0 0 3);
8310 0 0 if (res->load(is)) return res.release();
0 0 if (res->load(is)) return res.release();
8316 0 0 if (res->load(is)) return res.release();
0 0 if (res->load(is)) return res.release();
8322 1 0 if (res->load(is)) return res.release();
1 0 if (res->load(is)) return res.release();
8328 0 0 if (res->load(is)) return res.release();
0 0 if (res->load(is)) return res.release();
8334 0 0 if (!derinet->load(is)) return nullptr;
0 0 if (!derinet->load(is)) return nullptr;
8336 0 0 unique_ptr dictionary(load(is));
8337 0 0 if (!dictionary) return nullptr;
8348 0 0 ifstream f(path_from_utf8(fname).c_str(), ifstream::binary);
8349 0 0 if (!f) return nullptr;
8351 0 0 return load(f);
8378 0 0 for (auto&& tag : tags) {
8380 0 0 for (unsigned i = 0; i < tag.size(); i++)
8391 0 0 if (!used) return false;
8393 0 0 for (auto&& used_rule : *used)
8394 0 0 if (used_rule == rule)
8406 0 0 string rule_label; rule_label.reserve(12);
8408 0 0 for (; suffix_len < form.len; suffix_len++) {
8409 0 0 rule_label.push_back(form.str[form.len - (suffix_len + 1)]);
8410 0 0 if (!rules.at(rule_label.c_str(), rule_label.size(), [](pointer_decoder& data){ data.next(data.next_2B()); }))
8414 0 0 for (suffix_len++; suffix_len--; ) {
8416 0 0 rule_label.push_back(' ');
8420 0 0 for (unsigned prefix_len = 0; prefix_len + suffix_len <= form.len; prefix_len++) {
8421 0 0 if (prefix_len) rule_label.push_back(form.str[prefix_len - 1]);
0 0 if (prefix_len) rule_label.push_back(form.str[prefix_len - 1]);
8423 0 0 if (!found) break;
8424 0 0 if (*(found += sizeof(uint16_t))) {
8430 0 0 if (rule) {
8432 0 0 if (rule_label.size() > 1 && !contains(used, rule_label)) { // ignore rule ' '
0 0 if (rule_label.size() > 1 && !contains(used, rule_label)) { // ignore rule ' '
0 0 if (rule_label.size() > 1 && !contains(used, rule_label)) { // ignore rule ' '
8433 0 0 if (used) used->push_back(rule_label);
0 0 if (used) used->push_back(rule_label);
8434 0 0 for (int rules_len = *rule++; rules_len; rules_len--) {
8441 0 0 if (pref_del_len + suff_del_len > form.len ||
0 0 if (pref_del_len + suff_del_len > form.len ||
8442 0 0 (pref_del_len && !small_memeq(pref_del, form.str, pref_del_len)) ||
0 0 (pref_del_len && !small_memeq(pref_del, form.str, pref_del_len)) ||
8443 0 0 (suff_del_len && !small_memeq(suff_del, form.str + form.len - suff_del_len, suff_del_len)) ||
0 0 (suff_del_len && !small_memeq(suff_del, form.str + form.len - suff_del_len, suff_del_len)) ||
0 0 (suff_del_len && !small_memeq(suff_del, form.str + form.len - suff_del_len, suff_del_len)) ||
8448 0 0 lemma.reserve(form.len + pref_add_len - pref_del_len + suff_add_len - suff_del_len);
8449 0 0 if (pref_add_len) lemma.append(pref_add, pref_add_len);
0 0 if (pref_add_len) lemma.append(pref_add, pref_add_len);
8450 0 0 if (pref_del_len + suff_del_len < form.len) lemma.append(form.str + pref_del_len, form.len - pref_del_len - suff_del_len);
0 0 if (pref_del_len + suff_del_len < form.len) lemma.append(form.str + pref_del_len, form.len - pref_del_len - suff_del_len);
8451 0 0 if (suff_add_len) lemma.append(suff_add, suff_add_len);
0 0 if (suff_add_len) lemma.append(suff_add, suff_add_len);
8452 0 0 while (tags_len--)
8453 0 0 lemmas.emplace_back(lemma, this->tags[unaligned_load_inc(tags)]);
8461 0 0 if (lemmas.size() == lemmas_initial_size)
8462 0 0 if (!contains(used, string())) {
8463 0 0 if (used) used->push_back(string());
8464 0 0 lemmas.emplace_back(string(form.str, form.len), tags[default_tag]);
8486 0 0 if (!filter) return;
8488 0 0 wildcard.assign(filter);
8491 0 0 for (int tag_pos = 0, filter_pos = 0; filter[filter_pos]; tag_pos++, filter_pos++) {
8492 0 0 if (filter[filter_pos] == '?') continue;
8493 0 0 if (filter[filter_pos] == '[') {
8497 0 0 if (filter[filter_pos] == '^') negate = true, filter_pos++;
8500 0 0 for (bool first = true; filter[filter_pos] && (first || filter[filter_pos] != ']'); first = false)
0 0 for (bool first = true; filter[filter_pos] && (first || filter[filter_pos] != ']'); first = false)
0 0 for (bool first = true; filter[filter_pos] && (first || filter[filter_pos] != ']'); first = false)
8503 0 0 filters.emplace_back(tag_pos, negate, chars_start, filter_pos - chars_start);
8504 0 0 if (!filter[filter_pos]) break;
8506 0 0 filters.emplace_back(tag_pos, false, filter_pos, 1);
8599 0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
8 3 return it ? unaligned_load(it) : elementary_feature_unknown;
0 11 return it ? unaligned_load(it) : elementary_feature_unknown;
0 11 return it ? unaligned_load(it) : elementary_feature_unknown;
0 11 return it ? unaligned_load(it) : elementary_feature_unknown;
0 2 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
7 0 return it ? unaligned_load(it) : elementary_feature_unknown;
6 1 return it ? unaligned_load(it) : elementary_feature_unknown;
2 0 return it ? unaligned_load(it) : elementary_feature_unknown;
2 0 return it ? unaligned_load(it) : elementary_feature_unknown;
2 0 return it ? unaligned_load(it) : elementary_feature_unknown;
2 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 2 return it ? unaligned_load(it) : elementary_feature_unknown;
0 2 return it ? unaligned_load(it) : elementary_feature_unknown;
0 2 return it ? unaligned_load(it) : elementary_feature_unknown;
0 2 return it ? unaligned_load(it) : elementary_feature_unknown;
0 2 return it ? unaligned_load(it) : elementary_feature_unknown;
2 0 return it ? unaligned_load(it) : elementary_feature_unknown;
2 0 return it ? unaligned_load(it) : elementary_feature_unknown;
2 0 return it ? unaligned_load(it) : elementary_feature_unknown;
2 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 2 return it ? unaligned_load(it) : elementary_feature_unknown;
0 2 return it ? unaligned_load(it) : elementary_feature_unknown;
0 2 return it ? unaligned_load(it) : elementary_feature_unknown;
0 2 return it ? unaligned_load(it) : elementary_feature_unknown;
0 2 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
0 0 return it ? unaligned_load(it) : elementary_feature_unknown;
8607 1 0 if (!compressor::load(is, data)) return false;
1 0 if (!compressor::load(is, data)) return false;
8610 1 0 maps.resize(data.next_1B());
1 0 maps.resize(data.next_1B());
8611 26 1 for (auto&& map : maps)
8612 26 0 map.load(data);
0 0 map.load(data);
8654 267 0 if (value < 0x80) *where++ = value;
8655 0 0 else if (value < 0x4000) *where++ = (value >> 7) | 0x80u, *where++ = value & 0x7Fu;
8656 0 0 else if (value < 0x200000) *where++ = (value >> 14) | 0x80u, *where++ = ((value >> 7) & 0x7Fu) | 0x80u, *where++ = value & 0x7Fu;
8657 0 0 else if (value < 0x10000000) *where++ = (value >> 21) | 0x80u, *where++ = ((value >> 14) & 0x7Fu) | 0x80u, *where++ = ((value >> 7) & 0x7Fu) | 0x80u, *where++ = value & 0x7Fu;
8705 0 0 class feature_sequences {
1 0 class feature_sequences {
0 0 class feature_sequences {
8734 0 0 return it ? unaligned_load(it) : 0;
55 53 return it ? unaligned_load(it) : 0;
0 0 return it ? unaligned_load(it) : 0;
8743 0 0 if (!elementary.load(is)) return false;
1 0 if (!elementary.load(is)) return false;
0 0 if (!elementary.load(is)) return false;
8746 0 0 if (!compressor::load(is, data)) return false;
0 0 if (!compressor::load(is, data)) return false;
1 0 if (!compressor::load(is, data)) return false;
1 0 if (!compressor::load(is, data)) return false;
0 0 if (!compressor::load(is, data)) return false;
0 0 if (!compressor::load(is, data)) return false;
8749 0 0 sequences.resize(data.next_1B());
0 0 sequences.resize(data.next_1B());
1 0 sequences.resize(data.next_1B());
1 0 sequences.resize(data.next_1B());
0 0 sequences.resize(data.next_1B());
0 0 sequences.resize(data.next_1B());
8750 0 0 for (auto&& sequence : sequences) {
21 1 for (auto&& sequence : sequences) {
0 0 for (auto&& sequence : sequences) {
8751 0 0 sequence.dependant_range = data.next_4B();
21 0 sequence.dependant_range = data.next_4B();
0 0 sequence.dependant_range = data.next_4B();
8752 0 0 sequence.elements.resize(data.next_1B());
0 0 sequence.elements.resize(data.next_1B());
21 0 sequence.elements.resize(data.next_1B());
21 0 sequence.elements.resize(data.next_1B());
0 0 sequence.elements.resize(data.next_1B());
0 0 sequence.elements.resize(data.next_1B());
8753 0 0 for (auto&& element : sequence.elements) {
45 21 for (auto&& element : sequence.elements) {
0 0 for (auto&& element : sequence.elements) {
8754 0 0 element.type = elementary_feature_type(data.next_4B());
45 0 element.type = elementary_feature_type(data.next_4B());
0 0 element.type = elementary_feature_type(data.next_4B());
8755 0 0 element.elementary_index = data.next_4B();
45 0 element.elementary_index = data.next_4B();
0 0 element.elementary_index = data.next_4B();
8756 0 0 element.sequence_index = data.next_4B();
45 0 element.sequence_index = data.next_4B();
0 0 element.sequence_index = data.next_4B();
8760 0 0 scores.resize(data.next_1B());
0 0 scores.resize(data.next_1B());
1 0 scores.resize(data.next_1B());
1 0 scores.resize(data.next_1B());
0 0 scores.resize(data.next_1B());
0 0 scores.resize(data.next_1B());
8761 0 0 for (auto&& score : scores)
21 1 for (auto&& score : scores)
0 0 for (auto&& score : scores)
8762 0 0 score.load(data);
21 0 score.load(data);
0 0 score.load(data);
0 0 score.load(data);
0 0 score.load(data);
0 0 score.load(data);
8782 0 0 cache_element(int elements) : key(vli::max_length() * elements), key_size(0), score(0) {}
0 0 cache_element(int elements) : key(vli::max_length() * elements), key_size(0), score(0) {}
0 0 cache_element(int elements) : key(vli::max_length() * elements), key_size(0), score(0) {}
8790 0 0 caches.reserve(self.sequences.size());
1 0 caches.reserve(self.sequences.size());
0 0 caches.reserve(self.sequences.size());
8792 0 0 for (auto&& sequence : self.sequences) {
21 1 for (auto&& sequence : self.sequences) {
0 0 for (auto&& sequence : self.sequences) {
8793 0 0 caches.emplace_back(int(sequence.elements.size()));
21 0 caches.emplace_back(int(sequence.elements.size()));
0 0 caches.emplace_back(int(sequence.elements.size()));
8794 0 0 if (int(sequence.elements.size()) > max_sequence_elements) max_sequence_elements = sequence.elements.size();
1 20 if (int(sequence.elements.size()) > max_sequence_elements) max_sequence_elements = sequence.elements.size();
0 0 if (int(sequence.elements.size()) > max_sequence_elements) max_sequence_elements = sequence.elements.size();
8795 0 0 for (auto&& element : sequence.elements)
45 21 for (auto&& element : sequence.elements)
0 0 for (auto&& element : sequence.elements)
8796 0 0 if (element.type == PER_TAG && 1 - element.sequence_index > max_window_size)
0 0 if (element.type == PER_TAG && 1 - element.sequence_index > max_window_size)
26 19 if (element.type == PER_TAG && 1 - element.sequence_index > max_window_size)
2 24 if (element.type == PER_TAG && 1 - element.sequence_index > max_window_size)
0 0 if (element.type == PER_TAG && 1 - element.sequence_index > max_window_size)
0 0 if (element.type == PER_TAG && 1 - element.sequence_index > max_window_size)
8799 0 0 key.resize(max_sequence_elements * vli::max_length());
1 0 key.resize(max_sequence_elements * vli::max_length());
0 0 key.resize(max_sequence_elements * vli::max_length());
8800 0 0 window.resize(max_window_size);
1 0 window.resize(max_window_size);
0 0 window.resize(max_window_size);
8811 0 0 if (forms.size() > c.elementary_per_form.size()) c.elementary_per_form.resize(forms.size() * 2);
1 1 if (forms.size() > c.elementary_per_form.size()) c.elementary_per_form.resize(forms.size() * 2);
0 0 if (forms.size() > c.elementary_per_form.size()) c.elementary_per_form.resize(forms.size() * 2);
8812 0 0 if (forms.size() > c.elementary_per_tag.size()) c.elementary_per_tag.resize(forms.size() * 2);
1 1 if (forms.size() > c.elementary_per_tag.size()) c.elementary_per_tag.resize(forms.size() * 2);
0 0 if (forms.size() > c.elementary_per_tag.size()) c.elementary_per_tag.resize(forms.size() * 2);
8813 0 0 for (unsigned i = 0; i < forms.size(); i++)
7 2 for (unsigned i = 0; i < forms.size(); i++)
0 0 for (unsigned i = 0; i < forms.size(); i++)
8814 0 0 if (analyses[i].size() > c.elementary_per_tag[i].size())
5 2 if (analyses[i].size() > c.elementary_per_tag[i].size())
0 0 if (analyses[i].size() > c.elementary_per_tag[i].size())
8822 0 0 for (auto&& cache : c.caches)
42 2 for (auto&& cache : c.caches)
0 0 for (auto&& cache : c.caches)
8828 0 0 elementary.compute_dynamic_features((*c.analyses)[form_index][tag_index], c.elementary_per_form[form_index], c.elementary_per_tag[form_index][tag_index], form_index > 0 ? prev_dynamic : nullptr, dynamic);
20 2 elementary.compute_dynamic_features((*c.analyses)[form_index][tag_index], c.elementary_per_form[form_index], c.elementary_per_tag[form_index][tag_index], form_index > 0 ? prev_dynamic : nullptr, dynamic);
0 0 elementary.compute_dynamic_features((*c.analyses)[form_index][tag_index], c.elementary_per_form[form_index], c.elementary_per_tag[form_index][tag_index], form_index > 0 ? prev_dynamic : nullptr, dynamic);
8834 0 0 for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++)
0 0 for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++)
0 0 for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++)
54 16 for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++)
2 52 for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++)
52 18 for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++)
0 0 for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++)
0 0 for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++)
0 0 for (int i = 0; i < int(c.window.size()) && form_index - i >= 0; i++)
8839 0 0 for (unsigned i = 0; i < sequences.size(); i++) {
190 7 for (unsigned i = 0; i < sequences.size(); i++) {
0 0 for (unsigned i = 0; i < sequences.size(); i++) {
8840 0 0 if (tags_unchanged >= sequences[i].dependant_range)
179 11 if (tags_unchanged >= sequences[i].dependant_range)
0 0 if (tags_unchanged >= sequences[i].dependant_range)
8844 0 0 for (unsigned j = 0; j < sequences[i].elements.size(); j++) {
332 114 for (unsigned j = 0; j < sequences[i].elements.size(); j++) {
0 0 for (unsigned j = 0; j < sequences[i].elements.size(); j++) {
8850 0 0 value = form_index + element.sequence_index < 0 || unsigned(form_index + element.sequence_index) >= c.forms->size() ? elementary_feature_empty : c.elementary_per_form[form_index + element.sequence_index].values[element.elementary_index];
0 0 value = form_index + element.sequence_index < 0 || unsigned(form_index + element.sequence_index) >= c.forms->size() ? elementary_feature_empty : c.elementary_per_form[form_index + element.sequence_index].values[element.elementary_index];
91 4 value = form_index + element.sequence_index < 0 || unsigned(form_index + element.sequence_index) >= c.forms->size() ? elementary_feature_empty : c.elementary_per_form[form_index + element.sequence_index].values[element.elementary_index];
86 5 value = form_index + element.sequence_index < 0 || unsigned(form_index + element.sequence_index) >= c.forms->size() ? elementary_feature_empty : c.elementary_per_form[form_index + element.sequence_index].values[element.elementary_index];
0 0 value = form_index + element.sequence_index < 0 || unsigned(form_index + element.sequence_index) >= c.forms->size() ? elementary_feature_empty : c.elementary_per_form[form_index + element.sequence_index].values[element.elementary_index];
0 0 value = form_index + element.sequence_index < 0 || unsigned(form_index + element.sequence_index) >= c.forms->size() ? elementary_feature_empty : c.elementary_per_form[form_index + element.sequence_index].values[element.elementary_index];
8853 0 0 value = form_index + element.sequence_index < 0 ? elementary_feature_empty : c.window[-element.sequence_index]->values[element.elementary_index];
235 2 value = form_index + element.sequence_index < 0 ? elementary_feature_empty : c.window[-element.sequence_index]->values[element.elementary_index];
0 0 value = form_index + element.sequence_index < 0 ? elementary_feature_empty : c.window[-element.sequence_index]->values[element.elementary_index];
8860 0 0 if (value == elementary_feature_unknown) {
65 267 if (value == elementary_feature_unknown) {
0 0 if (value == elementary_feature_unknown) {
8869 0 0 if (!key_size) {
65 114 if (!key_size) {
0 0 if (!key_size) {
8872 0 0 } else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) {
0 0 } else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) {
0 0 } else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) {
84 30 } else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) {
78 6 } else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) {
108 6 } else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) {
0 0 } else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) {
0 0 } else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) {
0 0 } else if (key_size != c.caches[i].key_size || !small_memeq(c.key.data(), c.caches[i].key.data(), key_size)) {
8933 0 0 cache(const viterbi& self) : features_cache(self.features) {}
1 0 cache(const viterbi& self) : features_cache(self.features) {}
0 0 cache(const viterbi& self) : features_cache(self.features) {}
8946 0 0 if (!forms.size()) return;
2 0 if (!forms.size()) return;
0 0 if (!forms.size()) return;
8950 0 0 for (unsigned i = 0, states = 1; i < forms.size(); i++) {
7 2 for (unsigned i = 0, states = 1; i < forms.size(); i++) {
0 0 for (unsigned i = 0, states = 1; i < forms.size(); i++) {
8951 0 0 if (analyses[i].empty()) return;
7 0 if (analyses[i].empty()) return;
0 0 if (analyses[i].empty()) return;
8952 0 0 states = (i+1 >= unsigned(decoding_order) ? states / analyses[i-decoding_order+1].size() : states) * analyses[i].size();
3 4 states = (i+1 >= unsigned(decoding_order) ? states / analyses[i-decoding_order+1].size() : states) * analyses[i].size();
0 0 states = (i+1 >= unsigned(decoding_order) ? states / analyses[i-decoding_order+1].size() : states) * analyses[i].size();
8955 0 0 if (nodes > c.nodes.size()) c.nodes.resize(nodes);
2 0 if (nodes > c.nodes.size()) c.nodes.resize(nodes);
0 0 if (nodes > c.nodes.size()) c.nodes.resize(nodes);
8961 0 0 int* window = window_size <= 16 ? window_stack : (window_heap.resize(window_size), window_heap.data());
0 0 int* window = window_size <= 16 ? window_stack : (window_heap.resize(window_size), window_heap.data());
0 2 int* window = window_size <= 16 ? window_stack : (window_heap.resize(window_size), window_heap.data());
0 0 int* window = window_size <= 16 ? window_stack : (window_heap.resize(window_size), window_heap.data());
0 0 int* window = window_size <= 16 ? window_stack : (window_heap.resize(window_size), window_heap.data());
0 0 int* window = window_size <= 16 ? window_stack : (window_heap.resize(window_size), window_heap.data());
8967 0 0 for (unsigned i = 0; i < forms.size(); i++) {
7 2 for (unsigned i = 0; i < forms.size(); i++) {
0 0 for (unsigned i = 0; i < forms.size(); i++) {
8970 0 0 for (int j = 0; j < window_size; j++) window[j] = -1;
7 21 for (int j = 0; j < window_size; j++) window[j] = -1;
0 0 for (int j = 0; j < window_size; j++) window[j] = -1;
8971 0 0 for (int tag = 0; tag < int(analyses[i].size()); tag++)
11 7 for (int tag = 0; tag < int(analyses[i].size()); tag++)
0 0 for (int tag = 0; tag < int(analyses[i].size()); tag++)
8972 0 0 for (int prev = nodes_prev; prev < nodes_now; prev++) {
22 11 for (int prev = nodes_prev; prev < nodes_now; prev++) {
0 0 for (int prev = nodes_prev; prev < nodes_now; prev++) {
8976 0 0 for (int p = prev, n = 1; p >= 0 && n < window_size; p = c.nodes[p].prev, n++) {
0 0 for (int p = prev, n = 1; p >= 0 && n < window_size; p = c.nodes[p].prev, n++) {
45 14 for (int p = prev, n = 1; p >= 0 && n < window_size; p = c.nodes[p].prev, n++) {
37 8 for (int p = prev, n = 1; p >= 0 && n < window_size; p = c.nodes[p].prev, n++) {
0 0 for (int p = prev, n = 1; p >= 0 && n < window_size; p = c.nodes[p].prev, n++) {
0 0 for (int p = prev, n = 1; p >= 0 && n < window_size; p = c.nodes[p].prev, n++) {
8977 0 0 same_tags += same_tags == n && window[n] == c.nodes[p].tag;
0 0 same_tags += same_tags == n && window[n] == c.nodes[p].tag;
15 22 same_tags += same_tags == n && window[n] == c.nodes[p].tag;
11 4 same_tags += same_tags == n && window[n] == c.nodes[p].tag;
0 0 same_tags += same_tags == n && window[n] == c.nodes[p].tag;
0 0 same_tags += same_tags == n && window[n] == c.nodes[p].tag;
8982 0 0 features.compute_dynamic_features(i, tag, prev >= 0 ? &c.nodes[prev].dynamic : nullptr, dynamic, c.features_cache);
20 2 features.compute_dynamic_features(i, tag, prev >= 0 ? &c.nodes[prev].dynamic : nullptr, dynamic, c.features_cache);
0 0 features.compute_dynamic_features(i, tag, prev >= 0 ? &c.nodes[prev].dynamic : nullptr, dynamic, c.features_cache);
8983 0 0 score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) +
0 0 score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) +
0 0 score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) +
6 16 score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) +
2 4 score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) +
20 2 score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) +
0 0 score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) +
0 0 score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) +
0 0 score = (nodes_prev + 1 == nodes_now && analyses[i].size() == 1 ? 0 : features.score(i, window, same_tags, dynamic, c.features_cache)) +
8987 0 0 if (same_tags >= decoding_order-1) {
4 18 if (same_tags >= decoding_order-1) {
0 0 if (same_tags >= decoding_order-1) {
8988 0 0 if (score <= c.nodes[nodes_next-1].score) continue;
4 0 if (score <= c.nodes[nodes_next-1].score) continue;
0 0 if (score <= c.nodes[nodes_next-1].score) continue;
9003 0 0 for (int node = nodes_prev + 1; node < nodes_now; node++)
3 2 for (int node = nodes_prev + 1; node < nodes_now; node++)
0 0 for (int node = nodes_prev + 1; node < nodes_now; node++)
9004 0 0 if (c.nodes[node].score > c.nodes[best].score)
1 2 if (c.nodes[node].score > c.nodes[best].score)
0 0 if (c.nodes[node].score > c.nodes[best].score)
9007 0 0 for (int i = forms.size() - 1; i >= 0; i--, best = c.nodes[best].prev)
7 2 for (int i = forms.size() - 1; i >= 0; i--, best = c.nodes[best].prev)
0 0 for (int i = forms.size() - 1; i >= 0; i--, best = c.nodes[best].prev)
9056 0 0 maps.resize(MAP_TOTAL);
9105 0 0 for (unsigned i = forms.size(); i--;) {
9109 0 0 for (unsigned j = 0; j < analyses[i].size(); j++) {
9120 0 0 per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == lemma ? per_tag[i][j-1].values[LEMMA] :
0 0 per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == lemma ? per_tag[i][j-1].values[LEMMA] :
9125 0 0 if (index == string::npos) index = tag.size();
9126 0 0 per_tag[i][j].values[TAG_UPOS] = maps[MAP_TAG_UPOS].value(tag.c_str() + (index ? 1 : 0), index - (index ? 1 : 0));
0 0 per_tag[i][j].values[TAG_UPOS] = maps[MAP_TAG_UPOS].value(tag.c_str() + (index ? 1 : 0), index - (index ? 1 : 0));
9128 0 0 if (index < tag.size()) index++;
9129 0 0 if (index < tag.size()) index = tag.find(separator, index);
9130 0 0 if (index < tag.size()) index++;
9131 0 0 for (size_t length; index < tag.size(); index += length + 1) {
9133 0 0 length = (length == string::npos ? tag.size() : length) - index;
9135 0 0 for (size_t equal_sign = 0; equal_sign + 1 < length; equal_sign++)
9136 0 0 if (tag[index + equal_sign] == '=') {
9140 0 0 if (tag.compare(index, equal_sign, "Case") == 0) value = TAG_CASE, map = MAP_TAG_CASE;
9143 0 0 if (tag.compare(index, equal_sign, "Gender") == 0) value = TAG_GENDER, map = MAP_TAG_GENDER;
9144 0 0 if (tag.compare(index, equal_sign, "Number") == 0) value = TAG_NUMBER, map = MAP_TAG_NUMBER;
9145 0 0 if (tag.compare(index, equal_sign, "Person") == 0) value = TAG_PERSON, map = MAP_TAG_PERSON;
9148 0 0 if (tag.compare(index, equal_sign, "Negative") == 0) value = TAG_NEGATIVE, map = MAP_TAG_NEGATIVE;
9152 0 0 if (value >= 0)
9158 0 0 if (tag.size() >= 2 && tag[1] == 'V') {
0 0 if (tag.size() >= 2 && tag[1] == 'V') {
0 0 if (tag.size() >= 2 && tag[1] == 'V') {
9160 0 0 verb_candidate = verb_candidate < 0 || (tag_compare = tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate;
0 0 verb_candidate = verb_candidate < 0 || (tag_compare = tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate;
9170 0 0 if (verb_candidate >= 0) {
9176 0 0 if (analyses[i].size() == 1) {
9184 0 0 } else if (forms[i].len <= 0) {
9199 0 0 while (form.len) {
9203 0 0 num = num || cat & unicode::N;
0 0 num = num || cat & unicode::N;
9204 0 0 cap = cap || cat & unicode::Lut;
0 0 cap = cap || cat & unicode::Lut;
9205 0 0 dash = dash || cat & unicode::Pd;
0 0 dash = dash || cat & unicode::Pd;
9207 0 0 if (index == 10 || (!form.len && index < 10)) {
0 0 if (index == 10 || (!form.len && index < 10)) {
0 0 if (index == 10 || (!form.len && index < 10)) {
9237 0 0 if (prev_dynamic) {
9245 0 0 if (tag.tag.size() >= 2 && tag.tag[1] == 'V') {
0 0 if (tag.tag.size() >= 2 && tag.tag[1] == 'V') {
0 0 if (tag.tag.size() >= 2 && tag.tag[1] == 'V') {
9299 0 0 maps.resize(MAP_TOTAL);
9335 0 0 for (unsigned i = forms.size(); i--;) {
9339 0 0 for (unsigned j = 0; j < analyses[i].size(); j++) {
9342 0 0 per_tag[i][j].values[TAG3] = analyses[i][j].tag.size() >= 3 ? maps[MAP_TAG3].value(analyses[i][j].tag.c_str() + 2, 1) : elementary_feature_empty;
9343 0 0 per_tag[i][j].values[TAG5] = analyses[i][j].tag.size() >= 5 ? maps[MAP_TAG5].value(analyses[i][j].tag.c_str() + 4, 1) : elementary_feature_empty;
9344 0 0 per_tag[i][j].values[TAG25] = analyses[i][j].tag.size() >= 5 ? maps[MAP_TAG25].value((tag25[0] = analyses[i][j].tag[1], tag25[1] = analyses[i][j].tag[4], tag25), 2) : elementary_feature_empty;
9345 0 0 per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == analyses[i][j].lemma ? per_tag[i][j-1].values[LEMMA] :
0 0 per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == analyses[i][j].lemma ? per_tag[i][j-1].values[LEMMA] :
9348 0 0 if (analyses[i][j].tag[0] == 'V') {
9350 0 0 verb_candidate = verb_candidate < 0 || (tag_compare = analyses[i][j].tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && analyses[i][j].lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate;
0 0 verb_candidate = verb_candidate < 0 || (tag_compare = analyses[i][j].tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && analyses[i][j].lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate;
9360 0 0 if (verb_candidate >= 0) {
9366 0 0 if (analyses[i].size() == 1) {
9370 0 0 } else if (forms[i].len <= 0) {
9381 0 0 while (form.len) {
9385 0 0 num = num || cat & unicode::N;
0 0 num = num || cat & unicode::N;
9386 0 0 cap = cap || cat & unicode::Lut;
0 0 cap = cap || cat & unicode::Lut;
9387 0 0 dash = dash || cat & unicode::Pd;
0 0 dash = dash || cat & unicode::Pd;
9389 0 0 if (index == 5 || (!form.len && index < 5)) {
0 0 if (index == 5 || (!form.len && index < 5)) {
0 0 if (index == 5 || (!form.len && index < 5)) {
9409 0 0 if (prev_dynamic) {
9417 0 0 if (tag.tag[0] == 'V') {
9471 1 0 maps.resize(MAP_TOTAL);
9519 7 2 for (unsigned i = forms.size(); i--;) {
9523 11 7 for (unsigned j = 0; j < analyses[i].size(); j++) {
9525 11 0 per_tag[i][j].values[TAG1] = analyses[i][j].tag.size() >= 1 ? maps[MAP_TAG1].value(analyses[i][j].tag.c_str() + 0, 1) : elementary_feature_empty;
9526 11 0 per_tag[i][j].values[TAG2] = analyses[i][j].tag.size() >= 2 ? maps[MAP_TAG2].value(analyses[i][j].tag.c_str() + 1, 1) : elementary_feature_empty;
9527 11 0 per_tag[i][j].values[TAG3] = analyses[i][j].tag.size() >= 3 ? maps[MAP_TAG3].value(analyses[i][j].tag.c_str() + 2, 1) : elementary_feature_empty;
9528 2 9 per_tag[i][j].values[TAG4] = analyses[i][j].tag.size() >= 4 ? maps[MAP_TAG4].value(analyses[i][j].tag.c_str() + 3, 1) : elementary_feature_empty;
9529 0 11 per_tag[i][j].values[TAG5] = analyses[i][j].tag.size() >= 5 ? maps[MAP_TAG5].value(analyses[i][j].tag.c_str() + 4, 1) : elementary_feature_empty;
9530 4 7 per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == analyses[i][j].lemma ? per_tag[i][j-1].values[LEMMA] :
4 0 per_tag[i][j].values[LEMMA] = j && analyses[i][j-1].lemma == analyses[i][j].lemma ? per_tag[i][j-1].values[LEMMA] :
9533 3 8 if (analyses[i][j].tag[0] == 'V') {
9535 1 2 verb_candidate = verb_candidate < 0 || (tag_compare = analyses[i][j].tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && analyses[i][j].lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate;
1 0 verb_candidate = verb_candidate < 0 || (tag_compare = analyses[i][j].tag.compare(analyses[i][verb_candidate].tag), tag_compare < 0) || (tag_compare == 0 && analyses[i][j].lemma < analyses[i][verb_candidate].lemma) ? j : verb_candidate;
9545 2 5 if (verb_candidate >= 0) {
9551 5 2 if (analyses[i].size() == 1) {
9559 0 2 } else if (forms[i].len <= 0) {
9574 9 2 while (form.len) {
9578 9 0 num = num || cat & unicode::N;
9 0 num = num || cat & unicode::N;
9579 9 0 cap = cap || cat & unicode::Lut;
9 0 cap = cap || cat & unicode::Lut;
9580 9 0 dash = dash || cat & unicode::Pd;
9 0 dash = dash || cat & unicode::Pd;
9582 9 0 if (index == 10 || (!form.len && index < 10)) {
7 2 if (index == 10 || (!form.len && index < 10)) {
0 2 if (index == 10 || (!form.len && index < 10)) {
9612 20 2 if (prev_dynamic) {
9620 3 19 if (tag.tag[0] == 'V') {
9667 0 0 while (lock.test_and_set(memory_order_acquire)) {}
0 2 while (lock.test_and_set(memory_order_acquire)) {}
0 0 while (lock.test_and_set(memory_order_acquire)) {}
0 2 while (lock.test_and_set(memory_order_acquire)) {}
0 2 while (lock.test_and_set(memory_order_acquire)) {}
9676 0 0 while (lock.test_and_set(memory_order_acquire)) {}
0 2 while (lock.test_and_set(memory_order_acquire)) {}
0 0 while (lock.test_and_set(memory_order_acquire)) {}
0 2 while (lock.test_and_set(memory_order_acquire)) {}
0 2 while (lock.test_and_set(memory_order_acquire)) {}
9677 0 0 if (!stack.empty()) {
1 1 if (!stack.empty()) {
0 0 if (!stack.empty()) {
1 1 if (!stack.empty()) {
1 1 if (!stack.empty()) {
9728 0 0 cache(const perceptron_tagger& self) : decoder_cache(self.decoder) {}
1 0 cache(const perceptron_tagger& self) : decoder_cache(self.decoder) {}
0 0 cache(const perceptron_tagger& self) : decoder_cache(self.decoder) {}
9742 0 0 if (dict.reset(morpho::load(is)), !dict) return false;
1 0 if (dict.reset(morpho::load(is)), !dict) return false;
0 0 if (dict.reset(morpho::load(is)), !dict) return false;
9744 0 0 if (!features.load(is)) return false;
1 0 if (!features.load(is)) return false;
0 0 if (!features.load(is)) return false;
9756 0 0 if (!dict) return;
2 0 if (!dict) return;
0 0 if (!dict) return;
9759 0 0 if (!c) c = new cache(*this);
0 0 if (!c) c = new cache(*this);
1 1 if (!c) c = new cache(*this);
1 0 if (!c) c = new cache(*this);
0 0 if (!c) c = new cache(*this);
0 0 if (!c) c = new cache(*this);
9762 0 0 if (c->analyses.size() < forms.size()) c->analyses.resize(forms.size());
2 0 if (c->analyses.size() < forms.size()) c->analyses.resize(forms.size());
0 0 if (c->analyses.size() < forms.size()) c->analyses.resize(forms.size());
9763 0 0 for (unsigned i = 0; i < forms.size(); i++) {
7 2 for (unsigned i = 0; i < forms.size(); i++) {
0 0 for (unsigned i = 0; i < forms.size(); i++) {
9766 0 0 dict->analyze(forms[i], guesser >= 0 ? guesser : use_guesser ? morpho::GUESSER : morpho::NO_GUESSER, c->analyses[i]);
0 0 dict->analyze(forms[i], guesser >= 0 ? guesser : use_guesser ? morpho::GUESSER : morpho::NO_GUESSER, c->analyses[i]);
7 0 dict->analyze(forms[i], guesser >= 0 ? guesser : use_guesser ? morpho::GUESSER : morpho::NO_GUESSER, c->analyses[i]);
7 0 dict->analyze(forms[i], guesser >= 0 ? guesser : use_guesser ? morpho::GUESSER : morpho::NO_GUESSER, c->analyses[i]);
0 0 dict->analyze(forms[i], guesser >= 0 ? guesser : use_guesser ? morpho::GUESSER : morpho::NO_GUESSER, c->analyses[i]);
0 0 dict->analyze(forms[i], guesser >= 0 ? guesser : use_guesser ? morpho::GUESSER : morpho::NO_GUESSER, c->analyses[i]);
9769 0 0 if (c->tags.size() < forms.size()) c->tags.resize(forms.size() * 2);
1 1 if (c->tags.size() < forms.size()) c->tags.resize(forms.size() * 2);
0 0 if (c->tags.size() < forms.size()) c->tags.resize(forms.size() * 2);
9772 0 0 for (unsigned i = 0; i < forms.size(); i++)
7 2 for (unsigned i = 0; i < forms.size(); i++)
0 0 for (unsigned i = 0; i < forms.size(); i++)
9783 0 0 if (!c) c = new cache(*this);
0 0 if (!c) c = new cache(*this);
0 0 if (!c) c = new cache(*this);
0 0 if (!c) c = new cache(*this);
0 0 if (!c) c = new cache(*this);
0 0 if (!c) c = new cache(*this);
9878 1 0 tagger_id id = tagger_id(is.get());
0 1 tagger_id id = tagger_id(is.get());
0 0 tagger_id id = tagger_id(is.get());
0 0 tagger_id id = tagger_id(is.get());
0 0 tagger_id id = tagger_id(is.get());
0 0 tagger_id id = tagger_id(is.get());
0 0 tagger_id id = tagger_id(is.get());
9884 0 0 auto res = new_unique_ptr>>(tagger_ids::decoding_order(id), tagger_ids::window_size(id));
9885 0 0 if (res->load(is)) return res.release();
0 0 if (res->load(is)) return res.release();
9893 1 0 auto res = new_unique_ptr>>(tagger_ids::decoding_order(id), tagger_ids::window_size(id));
9894 1 0 if (res->load(is)) return res.release();
1 0 if (res->load(is)) return res.release();
9901 0 0 auto res = new_unique_ptr>>(tagger_ids::decoding_order(id), tagger_ids::window_size(id));
9902 0 0 if (res->load(is)) return res.release();
0 0 if (res->load(is)) return res.release();
9911 0 0 ifstream f(path_from_utf8(fname).c_str(), ifstream::binary);
9912 0 0 if (!f) return nullptr;
9914 0 0 return load(f);
9919 0 0 return morpho ? morpho->new_tokenizer() : nullptr;
10024 0 0 for (int i = 0; i < 15 && pdt_tag[i]; i++)
0 0 for (int i = 0; i < 15 && pdt_tag[i]; i++)
10025 0 0 if (pdt_tag[i] != '-') {
10026 0 0 if (!tag.empty()) tag.push_back('|');
10033 0 0 for (unsigned i = 0; i + 2 < lemma.size(); i++)
10034 0 0 if (lemma[i] == '_' && lemma[i + 1] == ';') {
0 0 if (lemma[i] == '_' && lemma[i + 1] == ';') {
0 0 if (lemma[i] == '_' && lemma[i + 1] == ';') {
10035 0 0 if (!tag.empty()) tag.push_back('|');
10044 0 0 return raw_lemma < lemma.size() ? (lemma.resize(raw_lemma), true) : false;
10055 0 0 for (auto&& tagged_lemma : tagged_lemmas) {
10061 0 0 if (!lemma_changed || tagged_lemmas.size() < 2) return;
0 0 if (!lemma_changed || tagged_lemmas.size() < 2) return;
0 0 if (!lemma_changed || tagged_lemmas.size() < 2) return;
10069 0 0 for (auto&& tagged_lemma_forms : forms) {
10070 0 0 for (auto&& tagged_form : tagged_lemma_forms.forms)
10076 0 0 if (!lemma_changed || forms.size() < 2) return;
0 0 if (!lemma_changed || forms.size() < 2) return;
0 0 if (!lemma_changed || forms.size() < 2) return;
10130 0 0 return lemma_id_len < lemma.size() ? (lemma.resize(lemma_id_len), true) : false;
10140 0 0 for (auto&& tagged_lemma : tagged_lemmas)
10144 0 0 if (!lemma_changed || tagged_lemmas.size() < 2) return;
0 0 if (!lemma_changed || tagged_lemmas.size() < 2) return;
0 0 if (!lemma_changed || tagged_lemmas.size() < 2) return;
10152 0 0 for (auto&& tagged_lemma_forms : forms)
10156 0 0 if (!lemma_changed || forms.size() < 2) return;
0 0 if (!lemma_changed || forms.size() < 2) return;
0 0 if (!lemma_changed || forms.size() < 2) return;
10210 0 0 return raw_lemma_len < lemma.size() ? (lemma.resize(raw_lemma_len), true) : false;
10220 0 0 for (auto&& tagged_lemma : tagged_lemmas)
10224 0 0 if (!lemma_changed || tagged_lemmas.size() < 2) return;
0 0 if (!lemma_changed || tagged_lemmas.size() < 2) return;
0 0 if (!lemma_changed || tagged_lemmas.size() < 2) return;
10232 0 0 for (auto&& tagged_lemma_forms : forms)
10236 0 0 if (!lemma_changed || forms.size() < 2) return;
0 0 if (!lemma_changed || forms.size() < 2) return;
0 0 if (!lemma_changed || forms.size() < 2) return;
10275 0 0 if (name == "pdt_to_conll2009") return tagset_converter::new_pdt_to_conll2009_converter();
10276 0 0 if (name == "strip_lemma_comment") return tagset_converter::new_strip_lemma_comment_converter(dictionary);
10277 0 0 if (name == "strip_lemma_id") return tagset_converter::new_strip_lemma_id_converter(dictionary);
10284 0 0 inline static bool eq(const tagged_lemma& a, const tagged_lemma& b) { return a.lemma == b.lemma && a.tag == b.tag; }
0 0 inline static bool eq(const tagged_lemma& a, const tagged_lemma& b) { return a.lemma == b.lemma && a.tag == b.tag; }
10285 0 0 inline static bool lt(const tagged_lemma& a, const tagged_lemma& b) { int lemma_compare = a.lemma.compare(b.lemma); return lemma_compare < 0 || (lemma_compare == 0 && a.tag < b.tag); }
10294 0 0 for (unsigned i = 0; i < forms.size(); i++) {
10296 0 0 for (unsigned j = forms.size() - 1; j > i; j--)
10297 0 0 if (forms[j].lemma == forms[i].lemma) {
10299 0 0 for (auto&& tagged_form : forms[j].forms)
10303 0 0 if (j < forms.size() - 1) {
10311 0 0 if (any_merged && forms[i].forms.size() > 1) {
0 0 if (any_merged && forms[i].forms.size() > 1) {
0 0 if (any_merged && forms[i].forms.size() > 1) {
10314 0 0 inline static bool eq(const tagged_form& a, const tagged_form& b) { return a.tag == b.tag && a.form == b.form; }
0 0 inline static bool eq(const tagged_form& a, const tagged_form& b) { return a.tag == b.tag && a.form == b.form; }
10315 0 0 inline static bool lt(const tagged_form& a, const tagged_form& b) { int tag_compare = a.tag.compare(b.tag); return tag_compare < 0 || (tag_compare == 0 && a.form < b.form); }
10473 321 3 const unordered_set czech_tokenizer::abbreviations_czech = {
0 0 const unordered_set czech_tokenizer::abbreviations_czech = {
10489 309 3 const unordered_set czech_tokenizer::abbreviations_slovak = {
0 0 const unordered_set czech_tokenizer::abbreviations_slovak = {
10506 0 0 : ragel_tokenizer(version <= 1 ? 1 : 2), m(m) {
0 0 : ragel_tokenizer(version <= 1 ? 1 : 2), m(m) {
10520 0 0 if (!m) return;
10521 0 0 if (tokens.empty() || chars[tokens.back().start].cat & ~unicode::L) return;
0 0 if (tokens.empty() || chars[tokens.back().start].cat & ~unicode::L) return;
0 0 if (tokens.empty() || chars[tokens.back().start].cat & ~unicode::L) return;
10524 0 0 for (unsigned hyphens = 1; hyphens <= 2; hyphens++) {
10526 0 0 if (tokens.size() < 2*hyphens + 1) break;
10528 0 0 if (tokens[first_hyphen].length != 1 || chars[tokens[first_hyphen].start].cat & ~unicode::P ||
0 0 if (tokens[first_hyphen].length != 1 || chars[tokens[first_hyphen].start].cat & ~unicode::P ||
0 0 if (tokens[first_hyphen].length != 1 || chars[tokens[first_hyphen].start].cat & ~unicode::P ||
10529 0 0 tokens[first_hyphen].start + tokens[first_hyphen].length != tokens[first_hyphen + 1].start ||
10530 0 0 tokens[first_hyphen-1].start + tokens[first_hyphen-1].length != tokens[first_hyphen].start ||
0 0 tokens[first_hyphen-1].start + tokens[first_hyphen-1].length != tokens[first_hyphen].start ||
10534 0 0 if (m->analyze(string_piece(chars[tokens[first_hyphen-1].start].str, chars[tokens.back().start + tokens.back().length].str - chars[tokens[first_hyphen-1].start].str), morpho::NO_GUESSER, lemmas) >= 0)
10538 0 0 if (matched_hyphens) {
10552 0 0 while (tokenize_url_email(tokens))
10553 0 0 if (emergency_sentence_split(tokens))
10569 0 0 if ( ( current) == ( (chars.size() - 1)) )
10574 0 0 switch ( _czech_tokenizer_from_state_actions[cs] ) {
10583 0 0 if ( _klen > 0 ) {
10588 0 0 if ( _upper < _lower )
10592 0 0 if ( _widec < _mid[0] )
10594 0 0 else if ( _widec > _mid[1] )
10600 0 0 if (
10601 0 0 !current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256;
0 0 !current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256;
10606 0 0 if (
10607 0 0 !current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256;
0 0 !current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256;
0 0 !current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256;
10620 0 0 if ( _klen > 0 ) {
10625 0 0 if ( _upper < _lower )
10629 0 0 if ( _widec < *_mid )
10631 0 0 else if ( _widec > *_mid )
10643 0 0 if ( _klen > 0 ) {
10648 0 0 if ( _upper < _lower )
10652 0 0 if ( _widec < _mid[0] )
10654 0 0 else if ( _widec > _mid[1] )
10669 0 0 if ( _czech_tokenizer_trans_actions[_trans] == 0 )
10683 0 0 do
10684 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
10692 0 0 for (current = ts; current < whitespace; current++)
10695 0 0 if (eos) {( current)++; goto _out; }
10700 0 0 if (!tokens.empty()) {( current)++; goto _out; }
10702 0 0 do
10703 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
10712 0 0 do
10713 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
10721 0 0 do
10722 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
10729 0 0 if (!tokens.empty()) {( current)++; goto _out; }
10731 0 0 do
10732 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
10741 0 0 do
10742 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
10750 0 0 switch ( _czech_tokenizer_to_state_actions[cs] ) {
10756 0 0 if ( cs == 0 )
10758 0 0 if ( ++( current) != ( (chars.size() - 1)) )
10761 0 0 if ( ( current) == ( (chars.size() - 1)) )
10763 0 0 if ( _czech_tokenizer_eof_trans[cs] > 0 ) {
10795 342 3 const unordered_set english_tokenizer::abbreviations = {
0 0 const unordered_set english_tokenizer::abbreviations = {
10894 0 0 if (tokens.empty() || chars[tokens.back().start].cat & ~unilib::unicode::L) return;
0 0 if (tokens.empty() || chars[tokens.back().start].cat & ~unilib::unicode::L) return;
0 0 if (tokens.empty() || chars[tokens.back().start].cat & ~unilib::unicode::L) return;
10909 0 0 if ( ( index) == ( end) )
10918 0 0 if ( _klen > 0 ) {
10923 0 0 if ( _upper < _lower )
10927 0 0 if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) < *_mid )
10929 0 0 else if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) > *_mid )
10941 0 0 if ( _klen > 0 ) {
10946 0 0 if ( _upper < _lower )
10950 0 0 if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) < _mid[0] )
10952 0 0 else if ( ( ragel_char(chars[tokens.back().start + end - index - 1])) > _mid[1] )
10966 0 0 if ( _english_tokenizer_split_token_trans_actions[_trans] == 0 )
10980 0 0 if ( cs == 0 )
10982 0 0 if ( ++( index) != ( end) )
10985 0 0 if ( ( index) == ( end) )
10987 0 0 switch ( _english_tokenizer_split_token_eof_actions[cs] ) {
10997 0 0 if (split_len && split_len < end) {
11151 0 0 english_tokenizer::english_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {}
0 0 english_tokenizer::english_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {}
0 0 english_tokenizer::english_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {}
0 0 english_tokenizer::english_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {}
11160 0 0 while (tokenize_url_email(tokens))
11161 0 0 if (emergency_sentence_split(tokens))
11177 0 0 if ( ( current) == ( (chars.size() - 1)) )
11182 0 0 switch ( _english_tokenizer_from_state_actions[cs] ) {
11191 0 0 if ( _klen > 0 ) {
11196 0 0 if ( _upper < _lower )
11200 0 0 if ( _widec < _mid[0] )
11202 0 0 else if ( _widec > _mid[1] )
11208 0 0 if (
11209 0 0 !current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256;
0 0 !current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256;
11214 0 0 if (
11215 0 0 !current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256;
0 0 !current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256;
0 0 !current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256;
11228 0 0 if ( _klen > 0 ) {
11233 0 0 if ( _upper < _lower )
11237 0 0 if ( _widec < *_mid )
11239 0 0 else if ( _widec > *_mid )
11251 0 0 if ( _klen > 0 ) {
11256 0 0 if ( _upper < _lower )
11260 0 0 if ( _widec < _mid[0] )
11262 0 0 else if ( _widec > _mid[1] )
11277 0 0 if ( _english_tokenizer_trans_actions[_trans] == 0 )
11291 0 0 do
11292 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
11300 0 0 for (current = ts; current < whitespace; current++)
11303 0 0 if (eos) {( current)++; goto _out; }
11308 0 0 if (!tokens.empty()) {( current)++; goto _out; }
11310 0 0 do
11311 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
11320 0 0 do
11321 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
11329 0 0 do
11330 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
11337 0 0 if (!tokens.empty()) {( current)++; goto _out; }
11339 0 0 do
11340 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
11349 0 0 do
11350 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
11358 0 0 switch ( _english_tokenizer_to_state_actions[cs] ) {
11364 0 0 if ( cs == 0 )
11366 0 0 if ( ++( current) != ( (chars.size() - 1)) )
11369 0 0 if ( ( current) == ( (chars.size() - 1)) )
11371 0 0 if ( _english_tokenizer_eof_trans[cs] > 0 ) {
11528 3 0 generic_tokenizer::generic_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {}
0 0 generic_tokenizer::generic_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {}
0 0 generic_tokenizer::generic_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {}
0 0 generic_tokenizer::generic_tokenizer(unsigned version) : ragel_tokenizer(version <= 1 ? 1 : 2) {}
11537 0 2 while (tokenize_url_email(tokens))
11538 0 0 if (emergency_sentence_split(tokens))
11554 2 0 if ( ( current) == ( (chars.size() - 1)) )
11559 10 27 switch ( _generic_tokenizer_from_state_actions[cs] ) {
11568 10 27 if ( _klen > 0 ) {
11573 17 10 if ( _upper < _lower )
11577 3 14 if ( _widec < _mid[0] )
11579 14 0 else if ( _widec > _mid[1] )
11585 0 0 if (
11586 0 0 !current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256;
0 0 !current || (chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N | unicode::Pd)) ) _widec += 256;
11591 0 0 if (
11592 0 0 !current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256;
0 0 !current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256;
0 0 !current || ((chars[current-1].cat & ~(unicode::L | unicode::M | unicode::N)) && chars[current-1].chr != '+') ) _widec += 256;
11605 37 0 if ( _klen > 0 ) {
11610 112 27 if ( _upper < _lower )
11614 66 46 if ( _widec < *_mid )
11616 36 10 else if ( _widec > *_mid )
11628 24 3 if ( _klen > 0 ) {
11633 32 5 if ( _upper < _lower )
11637 10 22 if ( _widec < _mid[0] )
11639 3 19 else if ( _widec > _mid[1] )
11654 12 27 if ( _generic_tokenizer_trans_actions[_trans] == 0 )
11667 0 0 do
11668 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
11676 0 0 for (current = ts; current < whitespace; current++)
11679 0 0 if (eos) {( current)++; goto _out; }
11684 0 0 if (!tokens.empty()) {( current)++; goto _out; }
11686 0 0 do
11687 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
11695 0 7 do
11696 0 7 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
11704 0 3 do
11705 0 3 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
11712 0 0 if (!tokens.empty()) {( current)++; goto _out; }
11714 0 0 do
11715 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
11723 0 0 do
11724 0 0 if (emergency_sentence_split(tokens)) { ( current)--; {( current)++; goto _out; } }
11732 10 29 switch ( _generic_tokenizer_to_state_actions[cs] ) {
11738 39 0 if ( cs == 0 )
11740 35 4 if ( ++( current) != ( (chars.size() - 1)) )
11743 4 0 if ( ( current) == ( (chars.size() - 1)) )
11745 2 2 if ( _generic_tokenizer_eof_trans[cs] > 0 ) {
12127 3 0 initialize_ragel_map();
12131 0 7 while (ragel_map_flag.test_and_set()) {}
12132 2 5 if (ragel_map.empty()) {
12133 256 2 for (uint8_t ascii = 0; ascii < 128; ascii++)
12145 2 6 if (chr >= ragel_map.size())
12165 10 0 if ( ( current) == ( (chars.size() - 1)) )
12173 0 22 if ( _klen > 0 ) {
12178 0 0 if ( _upper < _lower )
12182 0 0 if ( _widec < _mid[0] )
12184 0 0 else if ( _widec > _mid[1] )
12190 0 0 if (
12196 0 0 if (
12210 22 0 if ( _klen > 0 ) {
12215 63 22 if ( _upper < _lower )
12219 12 51 if ( _widec < *_mid )
12221 51 0 else if ( _widec > *_mid )
12233 22 0 if ( _klen > 0 ) {
12238 61 8 if ( _upper < _lower )
12242 8 53 if ( _widec < _mid[0] )
12244 39 14 else if ( _widec > _mid[1] )
12258 0 22 if ( _ragel_url_email_trans_actions[_trans] == 0 )
12279 14 8 if ( cs == 0 )
12281 12 2 if ( ++( current) != ( (chars.size() - 1)) )
12287 0 10 if (end > start) {
12316 1 0 vertical_tokenizer() : unicode_tokenizer(0) {}
12372 4 0 ragel_tokenizer::initialize_ragel_map();
12374 4 0 set_text(string_piece(nullptr, 0));
12380 3 4 if (make_copy && text.str) {
3 0 if (make_copy && text.str) {
12387 145 7 for (const char* curr_str = text.str; text.len; curr_str = text.str)
12393 7 0 vector& tokens = tokens_ptr ? *tokens_ptr : tokens_buffer;
12395 7 0 if (forms) forms->clear();
12396 4 3 if (current >= chars.size() - 1) return false;
12399 4 0 if (forms)
12400 33 4 for (auto&& token : tokens)
12407 10 2 if (current >= chars.size() - 1) return false;
12409 10 0 return url_email_tokenizer ? ragel_tokenizer::ragel_url_email(url_email_tokenizer, chars, current, tokens) : false;
12416 0 10 return tokens.size() >= 500 ||
12417 10 0 (tokens.size() >= 450 && chars[tokens.back().start].cat & unicode::P) ||
0 0 (tokens.size() >= 450 && chars[tokens.back().start].cat & unicode::P) ||
0 10 (tokens.size() >= 450 && chars[tokens.back().start].cat & unicode::P) ||
12418 0 0 (tokens.size() >= 400 && chars[tokens.back().start].cat & unicode::Po);
12424 0 0 if (eos_chr == '.' && !tokens.empty()) {
0 0 if (eos_chr == '.' && !tokens.empty()) {
0 0 if (eos_chr == '.' && !tokens.empty()) {
12426 0 0 if (tokens.back().length == 1 && chars[tokens.back().start].cat & unicode::Lut)
0 0 if (tokens.back().length == 1 && chars[tokens.back().start].cat & unicode::Lut)
0 0 if (tokens.back().length == 1 && chars[tokens.back().start].cat & unicode::Lut)
12430 0 0 if (abbreviations) {
12432 0 0 for (size_t i = 0; i < tokens.back().length; i++)
12434 0 0 if (abbreviations->count(eos_buffer))
12459 2 0 if (current >= chars.size() - 1) return false;
12463 116 2 while (current < chars.size() - 1 && chars[current].chr != '\r' && chars[current].chr != '\n') current++;
116 0 while (current < chars.size() - 1 && chars[current].chr != '\r' && chars[current].chr != '\n') current++;
26 90 while (current < chars.size() - 1 && chars[current].chr != '\r' && chars[current].chr != '\n') current++;
90 28 while (current < chars.size() - 1 && chars[current].chr != '\r' && chars[current].chr != '\n') current++;
12466 26 2 if (current < chars.size() - 1) {
12468 26 0 if (current < chars.size() - 1 &&
0 26 if (current < chars.size() - 1 &&
0 26 if (current < chars.size() - 1 &&
12469 0 0 ((chars[current-1].chr == '\r' && chars[current].chr == '\n') ||
26 0 ((chars[current-1].chr == '\r' && chars[current].chr == '\n') ||
12470 0 26 (chars[current-1].chr == '\n' && chars[current].chr == '\r')))
12474 26 2 if (line_start < line_end)
12562 0 0 return {1, 11, 1, ""};
0 0 return {1, 11, 1, ""};
12573 0 0 << (morphodita.prerelease.empty() ? "" : "-") << morphodita.prerelease
0 0 << (morphodita.prerelease.empty() ? "" : "-") << morphodita.prerelease
12575 0 0 << (other_libraries.empty() ? "" : " and ") << other_libraries << ")\n"
0 0 << (other_libraries.empty() ? "" : " and ") << other_libraries << ")\n"
12577 0 0 "Mathematics and Physics, Charles University in Prague, Czech Republic.";
12611 42 42 default: return (bilou_entity - B_first) & 1 ? bilou_type_U : bilou_type_B;
12615 28 42 switch (bilou_entity) {
12750 1 0 if (tagger.reset(tagger::load_instance(is)), !tagger) return false;
12751 1 0 if (!named_entities.load(is)) return false;
12754 1 0 if (!templates.load(is, nlp_pipeline(tokenizer.get(), tagger.get()))) return false;
1 0 if (!templates.load(is, nlp_pipeline(tokenizer.get(), tagger.get()))) return false;
12756 1 0 int stages = is.get();
12757 1 0 if (stages == EOF) return false;
12758 1 0 networks.resize(stages);
12759 2 1 for (auto&& network : networks)
12760 2 0 if (!network.load(is)) return false;
2 0 if (!network.load(is)) return false;
12767 2 0 if (forms.empty() || !tagger || !named_entities.size() || !networks.size()) return;
2 0 if (forms.empty() || !tagger || !named_entities.size() || !networks.size()) return;
2 0 if (forms.empty() || !tagger || !named_entities.size() || !networks.size()) return;
0 2 if (forms.empty() || !tagger || !named_entities.size() || !networks.size()) return;
2 0 if (forms.empty() || !tagger || !named_entities.size() || !networks.size()) return;
12771 1 1 if (!c) c = new cache();
12777 2 0 if (sentence.size) {
12781 4 2 for (auto&& network : networks) {
12789 14 4 for (unsigned i = 0; i < sentence.size; i++) {
12790 14 0 if (!sentence.probabilities[i].local_filled) {
12796 4 10 if (i == 0) {
12808 7 2 for (unsigned i = 0; i < sentence.size; i++)
12809 3 4 if (sentence.probabilities[i].global.best == bilou_type_U) {
12811 0 4 } else if (sentence.probabilities[i].global.best == bilou_type_B) {
12813 0 0 while (i < sentence.size && sentence.probabilities[i].global.best != bilou_type_L) i++;
0 0 while (i < sentence.size && sentence.probabilities[i].global.best != bilou_type_L) i++;
0 0 while (i < sentence.size && sentence.probabilities[i].global.best != bilou_type_L) i++;
12830 0 0 for (unsigned i = 0; i < types.size(); i++)
12836 0 0 if (gazetteer_types) gazetteer_types->clear();
12842 14 70 for (auto&& prob_bilou : prob.bilou)
12845 126 14 for (bilou_entity::value i = 0; i < outcomes.size(); i++) {
12847 70 56 if (outcomes[i] > prob.bilou[bilou].probability) {
12882 0 0 if (it == str2id.end() && add_entity) {
0 0 if (it == str2id.end() && add_entity) {
0 0 if (it == str2id.end() && add_entity) {
12886 0 0 return it == str2id.end() ? entity_type_unknown : it->second;
12890 0 0 return entity < id2str.size() ? id2str[entity] : empty;
0 0 return entity < id2str.size() ? id2str[entity] : empty;
3 0 return entity < id2str.size() ? id2str[entity] : empty;
0 0 return entity < id2str.size() ? id2str[entity] : empty;
0 0 return entity < id2str.size() ? id2str[entity] : empty;
12895 1 0 if (!compressor::load(is, data)) return false;
1 0 if (!compressor::load(is, data)) return false;
12899 1 0 id2str.resize(data.next_4B());
1 0 id2str.resize(data.next_4B());
12900 3 1 for (unsigned i = 0; i < id2str.size(); i++) {
12901 3 0 data.next_str(id2str[i]);
12903 0 0 }
12930 1 0 switch (id) {
12936 1 0 if (res->load(is)) return res.release();
0 1 if (res->load(is)) return res.release();
12945 1 0 ifstream in(path_from_utf8(fname).c_str(), ifstream::in | ifstream::binary);
12946 1 0 if (!in.is_open()) return nullptr;
12948 1 0 return load(in);
12986 0 0 for (; len--; str++, pos++)
0 0 for (; len--; str++, pos++)
0 0 for (; len--; str++, pos++)
12987 0 0 if (*str == c)
0 0 if (*str == c)
0 0 if (*str == c)
13003 0 0 for (unsigned i = 0; i < forms.size(); i++) {
13007 0 0 if (space < form.len) {
13013 0 0 if (space < form.len) {
13081 1 0 morpho = tagger ? tagger->get_morpho() : nullptr;
13082 1 0 return tagger && morpho;
0 1 return tagger && morpho;
13086 0 0 if (params.empty()) return cerr << "Missing tagger_file argument to morphodita_tagger!" << endl, false;
13089 0 0 if (!in.is_open()) return cerr << "Cannot open morphodita tagger file '" << params << "'!" << endl, false;
13090 0 0 if (!load(in)) return cerr << "Cannot load morphodita tagger from file '" << params << "'!" << endl, false;
0 0 if (!load(in)) return cerr << "Cannot load morphodita tagger from file '" << params << "'!" << endl, false;
13092 0 0 if (!in.seekg(0, ifstream::beg)) return cerr << "Cannot seek in morphodita tagger file '" << params << "'!" << endl, false;
0 0 if (!in.seekg(0, ifstream::beg)) return cerr << "Cannot seek in morphodita tagger file '" << params << "'!" << endl, false;
13093 0 0 os << in.rdbuf();
13100 2 0 if (!tagger || !morpho) return;
2 0 if (!tagger || !morpho) return;
2 0 if (!tagger || !morpho) return;
13104 1 1 if (!c) c = new cache();
13110 2 0 if (c->tags.size() >= forms.size()) {
13112 7 2 for (unsigned i = 0; i < forms.size(); i++) {
13122 11 7 for (auto&& analysis : c->analyses)
13123 11 0 sentence.words[i].raw_lemmas_all.emplace_back(analysis.lemma, 0, morpho->raw_lemma_len(analysis.lemma));
11 0 sentence.words[i].raw_lemmas_all.emplace_back(analysis.lemma, 0, morpho->raw_lemma_len(analysis.lemma));
13174 1 0 if (!res) return nullptr;
13175 1 0 if (!res->load(is)) return nullptr;
1 0 if (!res->load(is)) return nullptr;
13185 0 0 if (colon == string::npos) {
13188 0 0 tagger_id = tagger_id_and_params.substr(0, colon);
13189 0 0 params = tagger_id_and_params.substr(colon + 1);
13194 0 0 if (!tagger_ids::parse(tagger_id, id)) return cerr << "Unknown tagger_id '" << tagger_id << "'!" << endl, nullptr;
0 0 if (!tagger_ids::parse(tagger_id, id)) return cerr << "Unknown tagger_id '" << tagger_id << "'!" << endl, nullptr;
13197 0 0 unique_ptr res(create(id));
13198 0 0 if (!res) return cerr << "Cannot create instance for tagger_id '" << tagger_id << "'!" << endl, nullptr;
13201 0 0 os.put(id);
13202 0 0 if (!res->create_and_encode(params, os)) return cerr << "Cannot encode instance of tagger_id '" << tagger_id << "'!" << endl, nullptr;
0 0 if (!res->create_and_encode(params, os)) return cerr << "Cannot encode instance of tagger_id '" << tagger_id << "'!" << endl, nullptr;
13243 0 0 for (unsigned i = 0; i < forms.size(); i++) {
13544 0 0 for (; *str; str++)
13545 0 0 if (((unsigned char)*str) >= 0x80) {
13546 0 0 if (((unsigned char)*str) < 0xC0) return false;
13547 0 0 else if (((unsigned char)*str) < 0xE0) {
13548 0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
13549 0 0 } else if (((unsigned char)*str) < 0xF0) {
13550 0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
13551 0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
13552 0 0 } else if (((unsigned char)*str) < 0xF8) {
13553 0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
13554 0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
13555 0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
13562 0 0 for (; len > 0; str++, len--)
13563 0 0 if (((unsigned char)*str) >= 0x80) {
13564 0 0 if (((unsigned char)*str) < 0xC0) return false;
13565 0 0 else if (((unsigned char)*str) < 0xE0) {
13566 0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
13567 0 0 } else if (((unsigned char)*str) < 0xF0) {
13568 0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
13569 0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
13570 0 0 } else if (((unsigned char)*str) < 0xF8) {
13571 0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
13572 0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
13573 0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
0 0 str++; if (!--len || ((unsigned char)*str) < 0x80 || ((unsigned char)*str) >= 0xC0) return false;
13582 0 0 for (char32_t chr; (chr = decode(str)); )
13589 0 0 while (len)
13596 0 0 for (auto&& chr : str)
13624 0 0 return {3, 3, 0, ""};
0 0 return {3, 3, 0, ""};
14154 244 31893 IF_BIT_0(prob)
784 31353 IF_BIT_0(prob)
14159 777 7 if (checkDicSize != 0 || processedPos != 0)
14161 0 777 (dic[(dicPos == 0 ? dicBufSize : dicPos) - 1] >> (8 - lc))));
14163 440 344 if (state < kNumLitStates)
14167 385 3135 do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100);
2054 1466 do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100);
3080 440 do { GET_BIT(prob + symbol, symbol) } while (symbol < 0x100);
14171 0 344 unsigned matchByte = p->dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
14173 95 249 state -= (state < 10) ? 3 : 6;
14182 310 2442 GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit)
1825 927 GET_BIT2(probLit, symbol, offs &= ~bit, offs &= bit)
14184 2408 344 while (symbol < 0x100);
14194 198 31155 IF_BIT_0(prob)
267 31086 IF_BIT_0(prob)
14203 31086 0 if (checkDicSize == 0 && processedPos == 0)
14206 120 30966 IF_BIT_0(prob)
30975 111 IF_BIT_0(prob)
14210 123 30852 IF_BIT_0(prob)
54 30921 IF_BIT_0(prob)
14213 0 54 dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
14216 3 51 state = state < kNumLitStates ? 9 : 11;
14226 22 89 IF_BIT_0(prob)
61 50 IF_BIT_0(prob)
14235 4 46 IF_BIT_0(prob)
31 19 IF_BIT_0(prob)
14251 30873 159 state = state < kNumLitStates ? 8 : 11;
14257 206 31093 IF_BIT_0(probLen)
276 31023 IF_BIT_0(probLen)
14268 111 30912 IF_BIT_0(probLen)
54 30969 IF_BIT_0(probLen)
14283 1134 247608 TREE_DECODE(probLen, limit, len);
1269 247473 TREE_DECODE(probLen, limit, len);
217443 31299 TREE_DECODE(probLen, limit, len);
14287 267 31032 if (state >= kNumStates)
14292 34 233 TREE_6_DECODE(prob, distance);
194 73 TREE_6_DECODE(prob, distance);
39 228 TREE_6_DECODE(prob, distance);
244 23 TREE_6_DECODE(prob, distance);
22 245 TREE_6_DECODE(prob, distance);
169 98 TREE_6_DECODE(prob, distance);
29 238 TREE_6_DECODE(prob, distance);
112 155 TREE_6_DECODE(prob, distance);
25 242 TREE_6_DECODE(prob, distance);
154 113 TREE_6_DECODE(prob, distance);
30 237 TREE_6_DECODE(prob, distance);
178 89 TREE_6_DECODE(prob, distance);
14293 229 38 if (distance >= kStartPosModelIndex)
14298 119 110 if (posSlot < kEndPosModelIndex)
14305 218 119 do
14307 41 296 GET_BIT2(prob + i, i, ; , distance |= mask);
147 190 GET_BIT2(prob + i, i, ; , distance |= mask);
14316 1025 110 do
14318 137 998 NORMALIZE
14342 14 96 GET_BIT2(prob + i, i, ; , distance |= 1);
53 57 GET_BIT2(prob + i, i, ; , distance |= 1);
14343 20 90 GET_BIT2(prob + i, i, ; , distance |= 2);
51 59 GET_BIT2(prob + i, i, ; , distance |= 2);
14344 9 101 GET_BIT2(prob + i, i, ; , distance |= 4);
66 44 GET_BIT2(prob + i, i, ; , distance |= 4);
14345 18 92 GET_BIT2(prob + i, i, ; , distance |= 8);
70 40 GET_BIT2(prob + i, i, ; , distance |= 8);
14347 0 110 if (distance == (uint32_t)0xFFFFFFFF)
14359 267 0 if (checkDicSize == 0)
14361 267 0 if (distance >= processedPos)
14364 0 0 else if (distance >= checkDicSize)
14366 130 137 state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
14371 31299 0 if (limit == dicPos)
14375 0 31299 unsigned curLen = ((rem < len) ? (unsigned)rem : len);
14376 0 31299 size_t pos = (dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0);
14381 31299 0 if (pos + curLen <= dicBufSize)
14387 8397330 31299 do
14393 0 0 do
14396 0 0 if (++pos == dicBufSize)
14404 32017 120 while (dicPos < limit && buf < bufLimit);
14405 26 94 NORMALIZE;
14423 0 127 if (p->remainLen != 0 && p->remainLen < kMatchSpecLenStart)
14430 0 0 if (limit - dicPos < len)
14433 0 0 if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
0 0 if (p->checkDicSize == 0 && p->prop.dicSize - p->processedPos <= len)
14438 0 0 while (len-- != 0)
14440 0 0 dic[dicPos] = dic[(dicPos - rep0) + ((dicPos < rep0) ? dicBufSize : 0)];
14452 120 0 if (p->checkDicSize == 0)
14455 0 120 if (limit - p->dicPos > rem)
14458 120 0 RINOK(LzmaDec_DecodeReal(p, limit2, bufLimit));
14459 0 120 if (p->processedPos >= p->prop.dicSize)
14463 113 7 while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
0 113 while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
0 0 while (p->dicPos < limit && p->buf < bufLimit && p->remainLen < kMatchSpecLenStart);
14465 0 120 if (p->remainLen > kMatchSpecLenStart)
14496 0 110 IF_BIT_0_CHECK(prob)
0 0 IF_BIT_0_CHECK(prob)
69 41 IF_BIT_0_CHECK(prob)
14503 68 1 if (p->checkDicSize != 0 || p->processedPos != 0)
14506 0 68 (p->dic[(p->dicPos == 0 ? p->dicBufSize : p->dicPos) - 1] >> (8 - p->prop.lc))));
14508 44 25 if (state < kNumLitStates)
14511 28 324 do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);
28 0 do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);
218 134 do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);
308 44 do { GET_BIT_CHECK(prob + symbol, symbol) } while (symbol < 0x100);
14516 0 25 ((p->dicPos < p->reps[0]) ? p->dicBufSize : 0)];
14526 20 180 GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit)
20 0 GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit)
146 54 GET_BIT2_CHECK(probLit, symbol, offs &= ~bit, offs &= bit)
14528 175 25 while (symbol < 0x100);
14538 3 38 IF_BIT_0_CHECK(prob)
3 0 IF_BIT_0_CHECK(prob)
20 21 IF_BIT_0_CHECK(prob)
14550 1 20 IF_BIT_0_CHECK(prob)
1 0 IF_BIT_0_CHECK(prob)
15 6 IF_BIT_0_CHECK(prob)
14554 2 13 IF_BIT_0_CHECK(prob)
2 0 IF_BIT_0_CHECK(prob)
9 6 IF_BIT_0_CHECK(prob)
14557 3 6 NORMALIZE_CHECK;
3 0 NORMALIZE_CHECK;
14569 2 4 IF_BIT_0_CHECK(prob)
2 0 IF_BIT_0_CHECK(prob)
4 2 IF_BIT_0_CHECK(prob)
14577 1 3 IF_BIT_0_CHECK(prob)
1 0 IF_BIT_0_CHECK(prob)
1 3 IF_BIT_0_CHECK(prob)
14593 2 30 IF_BIT_0_CHECK(probLen)
2 0 IF_BIT_0_CHECK(probLen)
23 9 IF_BIT_0_CHECK(probLen)
14604 0 9 IF_BIT_0_CHECK(probLen)
0 0 IF_BIT_0_CHECK(probLen)
4 5 IF_BIT_0_CHECK(probLen)
14619 15 106 TREE_DECODE_CHECK(probLen, limit, len);
15 0 TREE_DECODE_CHECK(probLen, limit, len);
75 46 TREE_DECODE_CHECK(probLen, limit, len);
89 32 TREE_DECODE_CHECK(probLen, limit, len);
14623 20 12 if (state < 4)
14629 15 105 TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
15 0 TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
80 40 TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
100 20 TREE_DECODE_CHECK(prob, 1 << kNumPosSlotBits, posSlot);
14630 18 2 if (posSlot >= kStartPosModelIndex)
14636 10 8 if (posSlot < kEndPosModelIndex)
14643 47 8 do
14645 6 49 NORMALIZE_CHECK
6 0 NORMALIZE_CHECK
14656 45 18 do
14658 8 55 GET_BIT_CHECK(prob + i, i);
8 0 GET_BIT_CHECK(prob + i, i);
18 45 GET_BIT_CHECK(prob + i, i);
14666 22 79 NORMALIZE_CHECK;
22 0 NORMALIZE_CHECK;
14683 0 0 if (initDic)
14689 0 0 if (initState)
14704 55930 7 for (i = 0; i < numProbs; i++)
14720 127 0 while (p->remainLen != kMatchSpecLenStart)
14724 7 120 if (p->needFlush != 0)
14726 42 0 for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
35 7 for (; inSize > 0 && p->tempBufSize < RC_INIT_SIZE; (*srcLen)++, inSize--)
14728 0 7 if (p->tempBufSize < RC_INIT_SIZE)
14733 7 0 if (p->tempBuf[0] != 0)
14741 7 120 if (p->dicPos >= dicLimit)
14743 7 0 if (p->remainLen == 0 && p->code == 0)
7 0 if (p->remainLen == 0 && p->code == 0)
14748 0 0 if (finishMode == LZMA_FINISH_ANY)
14753 0 0 if (p->remainLen != 0)
14761 7 113 if (p->needInitState)
14764 0 120 if (p->tempBufSize == 0)
14768 110 10 if (inSize < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
14771 0 110 if (dummyRes == DUMMY_ERROR)
14779 0 110 if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
14789 120 0 if (LzmaDec_DecodeReal2(p, dicLimit, bufLimit) != 0)
14799 0 0 while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
0 0 while (rem < LZMA_REQUIRED_INPUT_MAX && lookAhead < inSize)
14802 0 0 if (rem < LZMA_REQUIRED_INPUT_MAX || checkEndMarkNow)
14805 0 0 if (dummyRes == DUMMY_ERROR)
14811 0 0 if (checkEndMarkNow && dummyRes != DUMMY_MATCH)
14818 0 0 if (LzmaDec_DecodeReal2(p, dicLimit, p->buf) != 0)
14827 0 0 if (p->code == 0)
14842 0 0 if (p->dicPos == p->dicBufSize)
14845 0 0 if (outSize > p->dicBufSize - dicPos)
14865 0 0 if (res != 0)
14867 0 0 if (outSizeCur == 0 || outSize == 0)
14895 7 0 if (size < LZMA_PROPS_SIZE)
14900 0 7 if (dicSize < LZMA_DIC_MIN)
14905 7 0 if (d >= (9 * 5 * 5))
14919 0 7 if (p->probs == 0 || numProbs != p->numProbs)
0 0 if (p->probs == 0 || numProbs != p->numProbs)
14924 7 0 if (p->probs == 0)
14933 7 0 RINOK(LzmaProps_Decode(&propNew, props, propsSize));
14934 7 0 RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
14943 0 0 RINOK(LzmaProps_Decode(&propNew, props, propsSize));
14944 0 0 RINOK(LzmaDec_AllocateProbs2(p, &propNew, alloc));
14946 0 0 if (p->dic == 0 || dicBufSize != p->dicBufSize)
0 0 if (p->dic == 0 || dicBufSize != p->dicBufSize)
14950 0 0 if (p->dic == 0)
14970 7 0 if (inSize < RC_INIT_SIZE)
14975 7 0 if (res != 0)
14985 7 0 if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
0 7 if (res == SZ_OK && *status == LZMA_STATUS_NEEDS_MORE_INPUT)
14999 7 7 static void LzmaFree(void* /*p*/, void *address) { delete[] (char*) address; }
15007 7 0 if (!is.read((char *) &uncompressed_len, sizeof(uncompressed_len))) return false;
15008 7 0 if (!is.read((char *) &compressed_len, sizeof(compressed_len))) return false;
15009 7 0 if (!is.read((char *) &poor_crc, sizeof(poor_crc))) return false;
15010 7 0 if (poor_crc != uncompressed_len * 19991 + compressed_len * 199999991 + 1234567890) return false;
15011 7 0 if (!is.read((char *) props_encoded, sizeof(props_encoded))) return false;
15014 7 0 if (!is.read((char *) compressed.data(), compressed_len)) return false;
7 0 if (!is.read((char *) compressed.data(), compressed_len)) return false;
15018 7 0 auto res = lzma::LzmaDecode(data.fill(uncompressed_len), &uncompressed_size, compressed.data(), &compressed_size, props_encoded, LZMA_PROPS_SIZE, lzma::LZMA_FINISH_ANY, &status, &lzmaAllocator);
15019 7 0 if (res != SZ_OK || uncompressed_size != uncompressed_len || compressed_size != compressed_len) return false;
7 0 if (res != SZ_OK || uncompressed_size != uncompressed_len || compressed_size != compressed_len) return false;
7 0 if (res != SZ_OK || uncompressed_size != uncompressed_len || compressed_size != compressed_len) return false;
16264 14 0 if ( p == ( (str.str + str.len)) )
16272 0 38 if ( _klen > 0 ) {
16277 0 0 if ( _upper < _lower )
16281 0 0 if ( _widec < _mid[0] )
16283 0 0 else if ( _widec > _mid[1] )
16286 0 0 switch ( _url_detector_cond_spaces[_url_detector_cond_offsets[cs] + ((_mid - _keys)>>1)] ) {
16289 0 0 if (
16303 38 0 if ( _klen > 0 ) {
16308 104 38 if ( _upper < _lower )
16312 32 72 if ( _widec < *_mid )
16314 72 0 else if ( _widec > *_mid )
16326 38 0 if ( _klen > 0 ) {
16331 100 10 if ( _upper < _lower )
16335 24 76 if ( _widec < _mid[0] )
16337 48 28 else if ( _widec > _mid[1] )
16351 0 38 if ( _url_detector_trans_actions[_trans] == 0 )
16356 0 0 while ( _nacts-- > 0 )
16376 28 10 if ( cs == 0 )
16378 24 4 if ( ++p != ( (str.str + str.len)) )
16407 0 14 if (length) *length = result_length;
16408 14 0 return length || result_length == str.len ? result : NO_URL;
0 14 return length || result_length == str.len ? result : NO_URL;
16454 0 0 return {1, 2, 0, ""};
16466 0 0 << (nametag.prerelease.empty() ? "" : "-") << nametag.prerelease
0 0 << (nametag.prerelease.empty() ? "" : "-") << nametag.prerelease
16468 0 0 << (unilib.prerelease.empty() ? "" : "-") << unilib.prerelease
0 0 << (unilib.prerelease.empty() ? "" : "-") << unilib.prerelease
16470 0 0 << (morphodita.prerelease.empty() ? "" : "-") << morphodita.prerelease
0 0 << (morphodita.prerelease.empty() ? "" : "-") << morphodita.prerelease
16471 0 0 << (other_libraries.empty() ? "" : "\nand ") << other_libraries << ")\n"
0 0 << (other_libraries.empty() ? "" : "\nand ") << other_libraries << ")\n"
16473 0 0 "Mathematics and Physics, Charles University in Prague, Czech Republic.";
16479 3 0 } // namespace ufal
3 0 } // namespace ufal