line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Treex::Core::Types; |
2
|
|
|
|
|
|
|
$Treex::Core::Types::VERSION = '2.20160630'; |
3
|
27
|
|
|
27
|
|
173
|
use strict; |
|
27
|
|
|
|
|
55
|
|
|
27
|
|
|
|
|
758
|
|
4
|
27
|
|
|
27
|
|
136
|
use warnings; |
|
27
|
|
|
|
|
57
|
|
|
27
|
|
|
|
|
630
|
|
5
|
27
|
|
|
27
|
|
135
|
use utf8; |
|
27
|
|
|
|
|
56
|
|
|
27
|
|
|
|
|
206
|
|
6
|
27
|
|
|
27
|
|
567
|
use Moose::Util::TypeConstraints; |
|
27
|
|
|
|
|
212
|
|
|
27
|
|
|
|
|
354
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
subtype 'Treex::Type::NonNegativeInt' |
9
|
|
|
|
|
|
|
=> as 'Int' |
10
|
|
|
|
|
|
|
=> where { $_ >= 0 } |
11
|
|
|
|
|
|
|
=> message {"$_ isn't non-negative"}; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
subtype 'Treex::Type::Selector' |
14
|
|
|
|
|
|
|
=> as 'Str' |
15
|
|
|
|
|
|
|
=> where {m/^[a-z\d]*$/i} |
16
|
|
|
|
|
|
|
=> message {"Selector must =~ /^[a-z\\d]*\$/i. You've provided $_"}; #TODO: this message is not printed |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
subtype 'Treex::Type::Layer' |
19
|
|
|
|
|
|
|
=> as 'Str' |
20
|
|
|
|
|
|
|
=> where {m/^[ptan]$/i} |
21
|
|
|
|
|
|
|
=> message {"Layer must be one of: [P]hrase structure, [T]ectogrammatical, [A]nalytical, [N]amed entities, you've provided $_"}; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
sub layers { |
24
|
0
|
|
|
0
|
1
|
0
|
return qw(A T P N); |
25
|
|
|
|
|
|
|
} |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
subtype 'Treex::Type::Message' #nonempty string |
28
|
|
|
|
|
|
|
=> as 'Str' |
29
|
|
|
|
|
|
|
=> where { $_ ne q{} } |
30
|
|
|
|
|
|
|
=> message {'Message must be nonempty'}; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
#preparation for possible future constraints |
33
|
|
|
|
|
|
|
subtype 'Treex::Type::Id' |
34
|
|
|
|
|
|
|
=> as 'Str'; |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# TODO: Should this be named ZoneCode or ZoneLabel? |
37
|
|
|
|
|
|
|
subtype 'Treex::Type::ZoneCode' |
38
|
|
|
|
|
|
|
=> as 'Str' |
39
|
|
|
|
|
|
|
=> where { my ( $l, $s ) = split /_/, $_; is_lang_code($l) && ( !defined $s || $s =~ /^[a-z\d]*$/i ) } |
40
|
|
|
|
|
|
|
=> message {'ZoneCode must be LangCode or LangCode_Selector, e.g. "en_src"'}; |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# ISO 639-1 language code with some extensions from ISO 639-2, 639-3 and ISO 15924 (script names). |
43
|
|
|
|
|
|
|
# Added code for Modern Greek which comes under ISO 639-3 (but normally it is encoded using ISO 639-1 'el'). |
44
|
27
|
|
|
27
|
|
79167
|
use Locale::Language; |
|
27
|
|
|
|
|
4614767
|
|
|
27
|
|
|
|
|
7472
|
|
45
|
|
|
|
|
|
|
my %EXTRA_LANG_CODES = ( |
46
|
|
|
|
|
|
|
'bxr' => "Buryat", |
47
|
|
|
|
|
|
|
'cop' => "Coptic", # ISO 639-2 |
48
|
|
|
|
|
|
|
'dsb' => "Lower Sorbian", |
49
|
|
|
|
|
|
|
'ell' => "Modern Greek", # ISO 639-3 |
50
|
|
|
|
|
|
|
'got' => "Gothic", # ISO 639-2 |
51
|
|
|
|
|
|
|
'grc' => "Ancient Greek", # ISO 639-2 |
52
|
|
|
|
|
|
|
'hit' => "Hittite", # ISO 639-2 |
53
|
|
|
|
|
|
|
'hsb' => "Upper Sorbian", |
54
|
|
|
|
|
|
|
'hak' => "Hakka", |
55
|
|
|
|
|
|
|
'kaa' => "Karakalpak", |
56
|
|
|
|
|
|
|
'ku-latn' => "Kurdish in Latin script", |
57
|
|
|
|
|
|
|
'ku-arab' => "Kurdish in Arabic script", |
58
|
|
|
|
|
|
|
'ku-cyrl' => "Kurdish in Cyrillic script", |
59
|
|
|
|
|
|
|
'mul' => "multiple languages", # ISO 639-2 code |
60
|
|
|
|
|
|
|
'nan' => "Taiwanese", |
61
|
|
|
|
|
|
|
'rmy' => "Romany", |
62
|
|
|
|
|
|
|
'sah' => "Yakut", |
63
|
|
|
|
|
|
|
'und' => "unknown", # ISO 639-2 code for undetermined/unknown language |
64
|
|
|
|
|
|
|
'xal' => "Kalmyk", |
65
|
|
|
|
|
|
|
'yue' => "Cantonese", |
66
|
|
|
|
|
|
|
); |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
my %IS_LANG_CODE = map { $_ => 1 } ( all_language_codes(), keys %EXTRA_LANG_CODES ); |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
subtype 'Treex::Type::LangCode' |
71
|
|
|
|
|
|
|
=> as 'Str' |
72
|
|
|
|
|
|
|
=> where { defined $IS_LANG_CODE{$_} } |
73
|
|
|
|
|
|
|
=> message {'LangCode must be valid ISO 639-1 code. E.g. en, de, cs'}; |
74
|
7
|
|
|
7
|
1
|
35
|
sub is_lang_code { return $IS_LANG_CODE{ $_[0] }; } |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
sub get_lang_name { |
77
|
0
|
|
|
0
|
1
|
|
my $code = shift; |
78
|
0
|
0
|
|
|
|
|
return exists $EXTRA_LANG_CODES{$code} ? $EXTRA_LANG_CODES{$code} : code2language($code); |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
1; |
81
|
|
|
|
|
|
|
__END__ |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=encoding utf-8 |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=head1 NAME |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
Treex::Core::Types - types used in Treex framework |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=head1 VERSION |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
version 2.20160630 |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=head1 DESCRIPTION |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=head1 TYPES |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=over 4 |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=item Treex::Type::NonNegativeInt |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
0, 1, 2, ... |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=item Treex::Type::Layer |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
one of: P, T, A, N |
106
|
|
|
|
|
|
|
case insensitive |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=item Treex::Type::Selector |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
Selector - only alphanumeric characters, may be empty |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=item Treex::Type::LangCode |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
ISO 639-1 code |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=item Treex::Type::ZoneCode |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
Combination of LangCode and Selector, e.g. "en_src" |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=item Treex::Type::Message |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
just nonempty string, future constraints may be set |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=item Treex::Type::Id |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
identifier, prepared for future constraints, now it is any string |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=back |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head1 METHODS |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=over 4 |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=item get_lang_name |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Returns language name for given LangCode |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=item is_lang_code |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Checks whether given argument is valid LangCode |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=item layers |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
Returns array of layers available in Treex, now (A, T, P, N) |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=back |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=head1 AUTHOR |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
Tomáš Kraut <kraut@ufal.mff.cuni.cz> |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. |