line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Treex::Core::Types; |
2
|
|
|
|
|
|
|
$Treex::Core::Types::VERSION = '2.20150928'; |
3
|
27
|
|
|
27
|
|
139
|
use strict; |
|
27
|
|
|
|
|
103
|
|
|
27
|
|
|
|
|
635
|
|
4
|
27
|
|
|
27
|
|
130
|
use warnings; |
|
27
|
|
|
|
|
47
|
|
|
27
|
|
|
|
|
681
|
|
5
|
27
|
|
|
27
|
|
128
|
use utf8; |
|
27
|
|
|
|
|
45
|
|
|
27
|
|
|
|
|
234
|
|
6
|
27
|
|
|
27
|
|
596
|
use Moose::Util::TypeConstraints; |
|
27
|
|
|
|
|
53
|
|
|
27
|
|
|
|
|
416
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
subtype 'Treex::Type::NonNegativeInt' |
9
|
|
|
|
|
|
|
=> as 'Int' |
10
|
|
|
|
|
|
|
=> where { $_ >= 0 } |
11
|
|
|
|
|
|
|
=> message {"$_ isn't non-negative"}; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
subtype 'Treex::Type::Selector' |
14
|
|
|
|
|
|
|
=> as 'Str' |
15
|
|
|
|
|
|
|
=> where {m/^[a-z\d]*$/i} |
16
|
|
|
|
|
|
|
=> message {"Selector must =~ /^[a-z\\d]*\$/i. You've provided $_"}; #TODO: this message is not printed |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
subtype 'Treex::Type::Layer' |
19
|
|
|
|
|
|
|
=> as 'Str' |
20
|
|
|
|
|
|
|
=> where {m/^[ptan]$/i} |
21
|
|
|
|
|
|
|
=> message {"Layer must be one of: [P]hrase structure, [T]ectogrammatical, [A]nalytical, [N]amed entities, you've provided $_"}; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
sub layers { |
24
|
0
|
|
|
0
|
1
|
0
|
return qw(A T P N); |
25
|
|
|
|
|
|
|
} |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
subtype 'Treex::Type::Message' #nonempty string |
28
|
|
|
|
|
|
|
=> as 'Str' |
29
|
|
|
|
|
|
|
=> where { $_ ne q{} } |
30
|
|
|
|
|
|
|
=> message {'Message must be nonempty'}; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
#preparation for possible future constraints |
33
|
|
|
|
|
|
|
subtype 'Treex::Type::Id' |
34
|
|
|
|
|
|
|
=> as 'Str'; |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# TODO: Should this be named ZoneCode or ZoneLabel? |
37
|
|
|
|
|
|
|
subtype 'Treex::Type::ZoneCode' |
38
|
|
|
|
|
|
|
=> as 'Str' |
39
|
|
|
|
|
|
|
=> where { my ( $l, $s ) = split /_/, $_; is_lang_code($l) && ( !defined $s || $s =~ /^[a-z\d]*$/i ) } |
40
|
|
|
|
|
|
|
=> message {'ZoneCode must be LangCode or LangCode_Selector, e.g. "en_src"'}; |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# ISO 639-1 language code with some extensions from ISO 639-2 |
43
|
|
|
|
|
|
|
# Added code for Modern Greek which comes under ISO 639-3 |
44
|
27
|
|
|
27
|
|
85038
|
use Locale::Language; |
|
27
|
|
|
|
|
6897488
|
|
|
27
|
|
|
|
|
8826
|
|
45
|
|
|
|
|
|
|
my %EXTRA_LANG_CODES = ( |
46
|
|
|
|
|
|
|
'bxr' => "Buryat", |
47
|
|
|
|
|
|
|
'dsb' => "Lower Sorbian", |
48
|
|
|
|
|
|
|
'ell' => "Modern Greek", # ISO 639-3 |
49
|
|
|
|
|
|
|
'grc' => "Ancient Greek",# ISO 639-2 code |
50
|
|
|
|
|
|
|
'hsb' => "Upper Sorbian", |
51
|
|
|
|
|
|
|
'hak' => "Hakka", |
52
|
|
|
|
|
|
|
'kaa' => "Karakalpak", |
53
|
|
|
|
|
|
|
'ku-latn' => "Kurdish in Latin script", |
54
|
|
|
|
|
|
|
'ku-arab' => "Kurdish in Arabic script", |
55
|
|
|
|
|
|
|
'ku-cyrl' => "Kurdish in Cyrillic script", |
56
|
|
|
|
|
|
|
'nan' => "Taiwanese", |
57
|
|
|
|
|
|
|
'rmy' => "Romany", |
58
|
|
|
|
|
|
|
'sah' => "Yakut", |
59
|
|
|
|
|
|
|
'und' => "unknown", # ISO 639-2 code for undetermined/unknown language |
60
|
|
|
|
|
|
|
'xal' => "Kalmyk", |
61
|
|
|
|
|
|
|
'yue' => "Cantonese", |
62
|
|
|
|
|
|
|
'mul' => "multiple languages", # ISO 639-2 code |
63
|
|
|
|
|
|
|
); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
my %IS_LANG_CODE = map { $_ => 1 } ( all_language_codes(), keys %EXTRA_LANG_CODES ); |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
subtype 'Treex::Type::LangCode' |
68
|
|
|
|
|
|
|
=> as 'Str' |
69
|
|
|
|
|
|
|
=> where { defined $IS_LANG_CODE{$_} } |
70
|
|
|
|
|
|
|
=> message {'LangCode must be valid ISO 639-1 code. E.g. en, de, cs'}; |
71
|
7
|
|
|
7
|
1
|
37
|
sub is_lang_code { return $IS_LANG_CODE{ $_[0] }; } |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub get_lang_name { |
74
|
0
|
|
|
0
|
1
|
|
my $code = shift; |
75
|
0
|
0
|
|
|
|
|
return exists $EXTRA_LANG_CODES{$code} ? $EXTRA_LANG_CODES{$code} : code2language($code); |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
1; |
78
|
|
|
|
|
|
|
__END__ |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=encoding utf-8 |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head1 NAME |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
Treex::Core::Types - types used in Treex framework |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 VERSION |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
version 2.20150928 |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=head1 DESCRIPTION |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=head1 TYPES |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=over 4 |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=item Treex::Type::NonNegativeInt |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
0, 1, 2, ... |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=item Treex::Type::Layer |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
one of: P, T, A, N |
103
|
|
|
|
|
|
|
case insensitive |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=item Treex::Type::Selector |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
Selector - only alphanumeric characters, may be empty |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=item Treex::Type::LangCode |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
ISO 639-1 code |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=item Treex::Type::ZoneCode |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
Combination of LangCode and Selector, e.g. "en_src" |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=item Treex::Type::Message |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
just nonempty string, future constraints may be set |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=item Treex::Type::Id |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
identifier, prepared for future constraints, now it is any string |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=back |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=head1 METHODS |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=over 4 |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=item get_lang_name |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
Returns language name for given LangCode |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=item is_lang_code |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
Checks whether given argument is valid LangCode |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=item layers |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
Returns array of layers available in Treex, now (A, T, P, N) |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=back |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=head1 AUTHOR |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
Tomáš Kraut <kraut@ufal.mff.cuni.cz> |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. |