File Coverage

blib/lib/Treex/Core/Types.pm
Criterion Covered Total %
statement 17 19 89.4
branch 0 2 0.0
condition n/a
subroutine 7 8 87.5
pod 3 3 100.0
total 27 32 84.3


line stmt bran cond sub pod time code
1             package Treex::Core::Types;
2             $Treex::Core::Types::VERSION = '2.20210102';
3 27     27   192 use strict;
  27         71  
  27         863  
4 27     27   143 use warnings;
  27         55  
  27         661  
5 27     27   141 use utf8;
  27         49  
  27         209  
6 27     27   754 use Moose::Util::TypeConstraints;
  27         62  
  27         397  
7              
8             subtype 'Treex::Type::NonNegativeInt'
9             => as 'Int'
10             => where { $_ >= 0 }
11             => message {"$_ isn't non-negative"};
12              
13             subtype 'Treex::Type::Selector'
14             => as 'Str'
15             => where {m/^[a-z\d]*$/i}
16             => message {"Selector must =~ /^[a-z\\d]*\$/i. You've provided $_"}; #TODO: this message is not printed
17              
18             subtype 'Treex::Type::Layer'
19             => as 'Str'
20             => where {m/^[ptan]$/i}
21             => message {"Layer must be one of: [P]hrase structure, [T]ectogrammatical, [A]nalytical, [N]amed entities, you've provided $_"};
22              
23             sub layers {
24 1     1 1 5 return qw(A T P N);
25             }
26              
27             subtype 'Treex::Type::Message' #nonempty string
28             => as 'Str'
29             => where { $_ ne q{} }
30             => message {'Message must be nonempty'};
31              
32             #preparation for possible future constraints
33             subtype 'Treex::Type::Id'
34             => as 'Str';
35              
36             # TODO: Should this be named ZoneCode or ZoneLabel?
37             subtype 'Treex::Type::ZoneCode'
38             => as 'Str'
39             => where { my ( $l, $s ) = split /_/, $_; is_lang_code($l) && ( !defined $s || $s =~ /^[a-z\d]*$/i ) }
40             => message {'ZoneCode must be LangCode or LangCode_Selector, e.g. "en_src"'};
41              
42             # ISO 639-1 language code with some extensions from ISO 639-2, 639-3 and ISO 15924 (script names).
43             # Added code for Modern Greek which comes under ISO 639-3 (but normally it is encoded using ISO 639-1 'el').
44 27     27   85026 use Locale::Language;
  27         5699788  
  27         11393  
45             my %EXTRA_LANG_CODES = (
46             'abq' => "Abaza",
47             'aii' => "Assyrian",
48             'ajp' => "South Levantine Arabic",
49             'akk' => "Akkadian",
50             'apu' => "Apurina", # Apurinã
51             'aqz' => "Akuntsu",
52             'bho' => "Bhojpuri",
53             'bxr' => "Buryat",
54             'ckb' => "Sorani", # Central Kurdish
55             'ckt' => "Chukchi",
56             'cop' => "Coptic", # ISO 639-2
57             'dbl' => "Dyirbal",
58             'dsb' => "Lower Sorbian",
59             'ell' => "Modern Greek", # ISO 639-3
60             'fro' => "Old French",
61             'got' => "Gothic", # ISO 639-2
62             'grc' => "Ancient Greek", # ISO 639-2
63             'gsw' => "Swiss German",
64             'gun' => "Mbya Guarani",
65             'hit' => "Hittite", # ISO 639-2
66             'hsb' => "Upper Sorbian",
67             'hak' => "Hakka",
68             'kaa' => "Karakalpak",
69             'kfm' => "Khunsari",
70             'kmr' => "Kurmanji", # Northern Kurdish
71             'koi' => "Komi Permyak",
72             'kpv' => "Komi Zyrian",
73             'krl' => "Karelian",
74             'ku-latn' => "Kurdish in Latin script",
75             'ku-arab' => "Kurdish in Arabic script",
76             'ku-cyrl' => "Kurdish in Cyrillic script",
77             'lzh' => "Classical Chinese",
78             'mdf' => "Moksha",
79             'mga' => "Middle Irish",
80             'mul' => "multiple languages", # ISO 639-2 code
81             'myu' => "Munduruku", # Mundurukú
82             'myv' => "Erzya",
83             'nan' => "Taiwanese",
84             'ndg' => "Ndengeleko",
85             'nyq' => "Nayini",
86             'olo' => "Livvi", # Olonets Karelian
87             'orv' => "Old Russian",
88             'otk' => "Old Turkish",
89             'pcm' => "Nigerian Pidgin (Naija)",
90             'pgl' => "Archaic Irish",
91             'rmy' => "Romany",
92             'qhe' => "Hindi-English", # used in UD bilingual corpora
93             'qtd' => "Turkish-German", # used in UD bilingual corpora
94             'quz' => "Cusco Quechua",
95             'sah' => "Yakut",
96             'sga' => "Old Irish",
97             'sme' => "North Sami",
98             'sms' => "Skolt Sami",
99             'soj' => "Soi",
100             'swl' => "Swedish Sign Language",
101             'tpn' => "Tupinamba", # Tupinambá
102             'und' => "unknown", # ISO 639-2 code for undetermined/unknown language
103             'xal' => "Kalmyk",
104             'wbp' => "Warlpiri",
105             'yii' => "Yidiny",
106             'yue' => "Cantonese",
107             );
108              
109             my %IS_LANG_CODE = map { $_ => 1 } ( all_language_codes(), keys %EXTRA_LANG_CODES );
110              
111             subtype 'Treex::Type::LangCode'
112             => as 'Str'
113             => where { defined $IS_LANG_CODE{$_} }
114             => message {'LangCode must be valid ISO 639-1 code. E.g. en, de, cs'};
115 11     11 1 67 sub is_lang_code { return $IS_LANG_CODE{ $_[0] }; }
116              
117             sub get_lang_name {
118 0     0 1   my $code = shift;
119 0 0         return exists $EXTRA_LANG_CODES{$code} ? $EXTRA_LANG_CODES{$code} : code2language($code);
120             }
121             1;
122             __END__
123              
124             =encoding utf-8
125              
126             =head1 NAME
127              
128             Treex::Core::Types - types used in Treex framework
129              
130             =head1 VERSION
131              
132             version 2.20210102
133              
134             =head1 DESCRIPTION
135              
136             =head1 TYPES
137              
138             =over 4
139              
140             =item Treex::Type::NonNegativeInt
141              
142             0, 1, 2, ...
143              
144             =item Treex::Type::Layer
145              
146             one of: P, T, A, N
147             case insensitive
148              
149             =item Treex::Type::Selector
150              
151             Selector - only alphanumeric characters, may be empty
152              
153             =item Treex::Type::LangCode
154              
155             ISO 639-1 code
156              
157             =item Treex::Type::ZoneCode
158              
159             Combination of LangCode and Selector, e.g. "en_src"
160              
161             =item Treex::Type::Message
162              
163             just nonempty string, future constraints may be set
164              
165             =item Treex::Type::Id
166              
167             identifier, prepared for future constraints, now it is any string
168              
169             =back
170              
171             =head1 METHODS
172              
173             =over 4
174              
175             =item get_lang_name
176              
177             Returns language name for given LangCode
178              
179             =item is_lang_code
180              
181             Checks whether given argument is valid LangCode
182              
183             =item layers
184              
185             Returns array of layers available in Treex, now (A, T, P, N)
186              
187             =back
188              
189             =head1 AUTHOR
190              
191             Tomáš Kraut <kraut@ufal.mff.cuni.cz>
192              
193             =head1 COPYRIGHT AND LICENSE
194              
195             Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague
196              
197             This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.