line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/usr/bin/env perl |
2
|
|
|
|
|
|
|
#-*-perl-*- |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# convert between 2-letter and 3-letter language codes |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# USAGE: ./iso639 [-2|-3|-m] [langcode]* |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# convert to 3-letter-code if 2-letter code is given and vice versa |
9
|
|
|
|
|
|
|
# -2 ... print 2-letter code (even if the input is a 2-letter code) |
10
|
|
|
|
|
|
|
# -3 ... print 3-letter code (even if the input is a 3-letter code) |
11
|
|
|
|
|
|
|
# -m ... print macro language instead of local language variants |
12
|
|
|
|
|
|
|
# -n ... don't print a final new-line |
13
|
|
|
|
|
|
|
# -k ... keep original code if no mapping is found |
14
|
|
|
|
|
|
|
# |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
## make this a module |
17
|
|
|
|
|
|
|
package ISO::639::3; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# use 5.006; |
20
|
1
|
|
|
1
|
|
68817
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
29
|
|
21
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
24
|
|
22
|
|
|
|
|
|
|
|
23
|
1
|
|
|
1
|
|
642
|
use utf8; |
|
1
|
|
|
|
|
14
|
|
|
1
|
|
|
|
|
5
|
|
24
|
1
|
|
|
1
|
|
548
|
use open ':locale'; |
|
1
|
|
|
|
|
1215
|
|
|
1
|
|
|
|
|
8
|
|
25
|
1
|
|
|
1
|
|
16382
|
use vars qw($opt_2 $opt_3 $opt_h $opt_m $opt_n $opt_k); |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
73
|
|
26
|
1
|
|
|
1
|
|
2120
|
use Getopt::Std; |
|
1
|
|
|
|
|
49
|
|
|
1
|
|
|
|
|
70
|
|
27
|
1
|
|
|
1
|
|
551
|
use Pod::Usage; |
|
1
|
|
|
|
|
42002
|
|
|
1
|
|
|
|
|
172
|
|
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head1 NAME |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
ISO::639::3 - Language codes and names from ISO::639 |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=head1 VERSION |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
Version 0.03 |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=cut |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
our $VERSION = '0.03'; |
40
|
|
|
|
|
|
|
|
41
|
1
|
|
|
1
|
|
22
|
use Exporter 'import'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
2525
|
|
42
|
|
|
|
|
|
|
our @EXPORT = qw( |
43
|
|
|
|
|
|
|
convert_iso639 |
44
|
|
|
|
|
|
|
get_iso639_1 |
45
|
|
|
|
|
|
|
get_iso639_3 |
46
|
|
|
|
|
|
|
get_macro_language |
47
|
|
|
|
|
|
|
get_language_name |
48
|
|
|
|
|
|
|
); |
49
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( all => \@EXPORT ); |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head1 SYNOPSIS |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
The module provides simple functions for retrieving language names and codes from the ISO-639 standards. The main purpose is to convert between different variants of codes and to get the English names of languages from codes. The module contains basic functions. There is no object-oriented interface. All functions can be exported. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
use ISO::639::3 qw/:all/; |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
print convert_iso639( 'iso639-1', 'fra' ); |
59
|
|
|
|
|
|
|
print convert_iso639( 'iso639-3', 'de' ); |
60
|
|
|
|
|
|
|
print convert_iso639( 'name', 'fa' ); |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
print get_iso639_1( 'deu' ); |
63
|
|
|
|
|
|
|
print get_iso639_3( 'de' ); |
64
|
|
|
|
|
|
|
print get_language_name( 'de' ); |
65
|
|
|
|
|
|
|
print get_language_name( 'eng' ); |
66
|
|
|
|
|
|
|
print get_macro_language( 'yue' ); |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
The module can be run as a script: |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
perl ISO/639/3.pm [OPTIONS] LANGCODE* |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
This converts all language codes given as LANGCODE to corresponding language names. OPTIONS can be set to convert between different variants of language codes or to convert from language names to codes. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=head2 OPTIONS |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
-2: convert to two-letter code (ISO 639-1) |
77
|
|
|
|
|
|
|
-3: convert to three-letter code (ISO 639-3) |
78
|
|
|
|
|
|
|
-m: convert to three-letter code but return the macro-language if available (ISO 639-3) |
79
|
|
|
|
|
|
|
-n: don't print a final new line |
80
|
|
|
|
|
|
|
-k: keep original code if no mapping is found |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=cut |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
our %TwoToThree = (); |
85
|
|
|
|
|
|
|
our %ThreeToTwo = (); |
86
|
|
|
|
|
|
|
our %ThreeToThree = (); |
87
|
|
|
|
|
|
|
our %ThreeToMacro = (); |
88
|
|
|
|
|
|
|
our %NameToTwo = (); |
89
|
|
|
|
|
|
|
our %NameToThree = (); |
90
|
|
|
|
|
|
|
our %TwoToName = (); |
91
|
|
|
|
|
|
|
our %ThreeToName = (); |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
&_read_iso639_codes; |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
## run the script if not called as a module |
96
|
|
|
|
|
|
|
__PACKAGE__->run() unless caller(); |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
## function to run if this is used as a script |
99
|
|
|
|
|
|
|
sub run{ |
100
|
0
|
|
|
0
|
0
|
0
|
&getopts('23hkmn'); |
101
|
|
|
|
|
|
|
# pod2usage( { -verbose => 2 } ) if ($opt_h); |
102
|
0
|
0
|
|
|
|
0
|
pod2usage if ($opt_h); |
103
|
|
|
|
|
|
|
|
104
|
0
|
0
|
|
|
|
0
|
my $type = $opt_2 ? 'iso639-1' : $opt_3 ? 'iso639-3' : $opt_m ? 'macro' : 'name'; |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
105
|
0
|
|
|
|
|
0
|
my @converted = map($_ = convert_iso639($type,$_,$opt_k), @ARGV); |
106
|
0
|
0
|
0
|
|
|
0
|
if ($type eq 'name' and @converted){ |
107
|
0
|
|
|
|
|
0
|
print '"',join('" "',@converted),'"'; |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
else{ |
110
|
0
|
|
|
|
|
0
|
print join(' ',@converted); |
111
|
|
|
|
|
|
|
} |
112
|
0
|
0
|
|
|
|
0
|
print "\n" unless ($opt_n); |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=head1 SUBROUTINES |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=head2 $converted = convert_iso639( $type, $id ) |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
Convert the language code or language name given in C<$id>. The C<$type> specifies the output type that is generated. Possible types are C (two-letter code), C (three-letter-code), C (three-letter code of the corresponding macro language) or C (language name). Default is to return the language name. |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=cut |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub convert_iso639{ |
125
|
0
|
|
|
0
|
1
|
0
|
my $code = $_[1]; |
126
|
|
|
|
|
|
|
# my $code = lc($_[1]); |
127
|
|
|
|
|
|
|
# $code=~s/[\-\_].*$//; |
128
|
0
|
0
|
|
|
|
0
|
return get_iso639_1($code,$_[2]) if ($_[0] eq 'iso639-1'); |
129
|
0
|
0
|
|
|
|
0
|
return get_iso639_3($code,$_[2]) if ($_[0] eq 'iso639-3'); |
130
|
0
|
0
|
|
|
|
0
|
return get_macro_language($code,$_[2]) if ($_[0] eq 'macro'); |
131
|
0
|
|
|
|
|
0
|
return get_language_name($code); |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=head2 $iso639_1 = get_iso639_1( $id ) |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
Return the ISO 639-1 code for a given language or three-letter code. Returns the same code if it is a ISO 639-1 code or 'xx' if it is not recognized. |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=cut |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
sub get_iso639_1{ |
142
|
0
|
0
|
|
0
|
1
|
0
|
return $_[0] if (exists $TwoToName{$_[0]}); |
143
|
0
|
0
|
|
|
|
0
|
return $ThreeToTwo{$_[0]} if (exists $ThreeToTwo{$_[0]}); |
144
|
0
|
0
|
|
|
|
0
|
return $NameToTwo{lc($_[0])} if (exists $NameToTwo{lc($_[0])}); |
145
|
0
|
0
|
|
|
|
0
|
return $_[0] if (exists $TwoToThree{$_[0]}); |
146
|
|
|
|
|
|
|
## TODO: is it OK to fallback to macro language in this conversion? |
147
|
|
|
|
|
|
|
## (should we add some regional code?) |
148
|
0
|
0
|
|
|
|
0
|
if (exists $ThreeToMacro{$_[0]}){ |
149
|
|
|
|
|
|
|
return $ThreeToTwo{$ThreeToMacro{$_[0]}} |
150
|
0
|
0
|
|
|
|
0
|
if (exists $ThreeToTwo{$ThreeToMacro{$_[0]}}); |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
## try without regional extension |
153
|
0
|
|
|
|
|
0
|
my $code = $_[0]; |
154
|
0
|
0
|
|
|
|
0
|
return &get_iso639_1($code) if ($code=~s/[\-\_].*$//); |
155
|
0
|
0
|
|
|
|
0
|
return &get_iso639_1(lc($code)) if ($code ne lc($code)); |
156
|
0
|
0
|
|
|
|
0
|
return $_[0] if ($_[1]); |
157
|
0
|
|
|
|
|
0
|
return 'xx'; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=head2 $iso639_3 = get_iso639_3( $id ) |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
Return the ISO 639-3 code for a given language or any ISO 639 code. Returns 'xxx' if the code is not recognized. |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=cut |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
sub get_iso639_3{ |
167
|
0
|
0
|
|
0
|
1
|
0
|
return $_[0] if (exists $ThreeToName{$_[0]}); |
168
|
0
|
0
|
|
|
|
0
|
return $TwoToThree{$_[0]} if (exists $TwoToThree{$_[0]}); |
169
|
0
|
0
|
|
|
|
0
|
return $NameToThree{lc($_[0])} if (exists $NameToThree{lc($_[0])}); |
170
|
0
|
0
|
|
|
|
0
|
return $ThreeToThree{$_[0]} if (exists $ThreeToThree{$_[0]}); |
171
|
|
|
|
|
|
|
|
172
|
0
|
|
|
|
|
0
|
my $code = $_[0]; |
173
|
0
|
0
|
|
|
|
0
|
return &get_iso639_3($code) if ($code=~s/[\-\_].*$//); |
174
|
0
|
0
|
|
|
|
0
|
return &get_iso639_3(lc($code)) if ($code ne lc($code)); |
175
|
0
|
0
|
|
|
|
0
|
return $_[0] if ($_[1]); |
176
|
0
|
|
|
|
|
0
|
return 'xxx'; |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=head2 $macro_language = get_macro_language( $id ) |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
Return the ISO 639-3 code of the macro language for a given language or any ISO 639 code. Returns 'xxx' if the code is not recognized. |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=cut |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
sub get_macro_language{ |
188
|
0
|
|
|
0
|
1
|
0
|
my $code = get_iso639_3($_[0],$_[1]); |
189
|
0
|
0
|
|
|
|
0
|
return $ThreeToMacro{$code} if (exists $ThreeToMacro{$code}); |
190
|
0
|
|
|
|
|
0
|
return $code; |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=head2 $language = get_language_name( $id ) |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
Return the name of the language that corresponds to the given language code (any ISO639 code) |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=cut |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
sub get_language_name{ |
200
|
0
|
0
|
|
0
|
1
|
0
|
return $TwoToName{$_[0]} if (exists $TwoToName{$_[0]}); |
201
|
0
|
0
|
|
|
|
0
|
return $ThreeToName{$_[0]} if (exists $ThreeToName{$_[0]}); |
202
|
0
|
0
|
|
|
|
0
|
return $_[0] if (exists $NameToThree{$_[0]}); |
203
|
0
|
0
|
|
|
|
0
|
return &get_language_name($_[0]) if ($_[0]=~s/[\-\_].*$//); |
204
|
0
|
0
|
|
|
|
0
|
return &get_language_name(lc($_[0])) if ($_[0] ne lc($_[0])); |
205
|
0
|
|
|
|
|
0
|
return 'unknown'; |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
#################################### |
216
|
|
|
|
|
|
|
# internal functions that |
217
|
|
|
|
|
|
|
# read all codes from the data part |
218
|
|
|
|
|
|
|
#################################### |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
## read all codes |
221
|
|
|
|
|
|
|
sub _read_iso639_codes{ |
222
|
1
|
|
|
1
|
|
8
|
while (){ |
223
|
9
|
|
|
|
|
15
|
chomp; |
224
|
9
|
100
|
|
|
|
24
|
next unless($_); |
225
|
6
|
|
|
|
|
24
|
my @f = split(/\t/); |
226
|
6
|
100
|
|
|
|
41
|
if ($f[1] eq 'Part2B'){ |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
227
|
1
|
|
|
|
|
3
|
&_read_main_code_table(); |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
elsif ($f[0] eq 'M_Id'){ |
230
|
1
|
|
|
|
|
5
|
&_read_macrolanguage_table(); |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
elsif ($f[0] eq 'NS_Id'){ |
233
|
1
|
|
|
|
|
4
|
&_read_nonstandard_code_table(); |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
elsif ($f[0] eq 'C_Id'){ |
236
|
1
|
|
|
|
|
4
|
&_read_collective_language_table(); |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
elsif ($f[0] eq 'URI'){ |
239
|
1
|
|
|
|
|
3
|
&_read_iso639_5(); |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
elsif ($f[4] eq 'Ret_Remedy'){ |
242
|
1
|
|
|
|
|
3
|
&_read_retired_code_table(); |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
sub _read_retired_code_table{ |
249
|
|
|
|
|
|
|
## retired codes |
250
|
|
|
|
|
|
|
# print STDERR "read retired"; |
251
|
1
|
|
|
1
|
|
6
|
while (){ |
252
|
353
|
|
|
|
|
484
|
chomp; |
253
|
353
|
100
|
|
|
|
521
|
last unless ($_); |
254
|
352
|
|
|
|
|
1104
|
my @f = split(/\t/); |
255
|
352
|
50
|
|
|
|
569
|
next unless ($f[0]); |
256
|
352
|
100
|
|
|
|
832
|
unless (exists $ThreeToThree{$f[0]}){ |
257
|
335
|
|
|
|
|
1347
|
$ThreeToName{$f[0]} = $f[1]; |
258
|
335
|
100
|
|
|
|
1837
|
$ThreeToThree{$f[0]} = $f[3] ? $f[3] : $f[0]; |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
} |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
sub _read_macrolanguage_table{ |
264
|
|
|
|
|
|
|
## macro-languages |
265
|
|
|
|
|
|
|
# print STDERR "read macrolanguages"; |
266
|
1
|
|
|
1
|
|
6
|
while (){ |
267
|
454
|
|
|
|
|
615
|
chomp; |
268
|
454
|
100
|
|
|
|
665
|
last unless ($_); |
269
|
453
|
|
|
|
|
941
|
my @f = split(/\t/); |
270
|
453
|
50
|
|
|
|
704
|
next unless ($f[0]); |
271
|
453
|
|
|
|
|
1111
|
$ThreeToThree{$f[1]} = $f[1]; |
272
|
453
|
|
|
|
|
1292
|
$ThreeToMacro{$f[1]} = $f[0]; |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
sub _read_collective_language_table{ |
277
|
|
|
|
|
|
|
## collective languages from ISO639-2 |
278
|
|
|
|
|
|
|
# print STDERR "read collective language codes"; |
279
|
1
|
|
|
1
|
|
5
|
while (){ |
280
|
66
|
|
|
|
|
102
|
chomp; |
281
|
66
|
100
|
|
|
|
99
|
last unless ($_); |
282
|
65
|
|
|
|
|
164
|
my @f = split(/\t/); |
283
|
65
|
50
|
|
|
|
103
|
next unless ($f[0]); |
284
|
65
|
50
|
|
|
|
121
|
unless (exists $ThreeToThree{$f[0]}){ |
285
|
65
|
|
|
|
|
464
|
$ThreeToThree{$f[0]} = $f[0]; |
286
|
65
|
100
|
|
|
|
100
|
if ($f[1]){ |
287
|
1
|
|
|
|
|
7
|
$ThreeToTwo{$f[0]} = $f[1]; |
288
|
1
|
|
|
|
|
5
|
$TwoToThree{$f[1]} = $f[0]; |
289
|
1
|
50
|
|
|
|
3
|
if ($f[2]){ |
290
|
1
|
|
|
|
|
2
|
$TwoToName{$f[1]} = $f[2]; |
291
|
1
|
|
|
|
|
3
|
$NameToTwo{$f[2]} = $f[1]; |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
} |
294
|
65
|
50
|
|
|
|
98
|
if ($f[2]){ |
295
|
65
|
|
|
|
|
101
|
$ThreeToName{$f[0]} = $f[2]; |
296
|
65
|
|
|
|
|
258
|
$NameToThree{lc($f[2])} = $f[0]; |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
} |
299
|
|
|
|
|
|
|
} |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
sub _read_nonstandard_code_table{ |
303
|
|
|
|
|
|
|
## non-standard codes |
304
|
|
|
|
|
|
|
# print STDERR "read non-standard codes"; |
305
|
1
|
|
|
1
|
|
10
|
while (){ |
306
|
16
|
|
|
|
|
25
|
chomp; |
307
|
16
|
100
|
|
|
|
28
|
last unless ($_); |
308
|
15
|
|
|
|
|
51
|
my @f = split(/\t/); |
309
|
15
|
50
|
|
|
|
34
|
next unless ($f[0]); |
310
|
15
|
100
|
|
|
|
50
|
unless (exists $ThreeToThree{$f[0]}){ |
311
|
11
|
50
|
|
|
|
29
|
$ThreeToThree{$f[0]} = $f[1] ? $f[1] : $f[0]; |
312
|
11
|
100
|
|
|
|
19
|
if ($f[2]){ |
313
|
9
|
|
|
|
|
15
|
$ThreeToTwo{$f[0]} = $f[2]; |
314
|
|
|
|
|
|
|
} |
315
|
11
|
100
|
|
|
|
27
|
$ThreeToMacro{$f[0]} = $f[3] if ($f[3]); |
316
|
11
|
50
|
|
|
|
16
|
if ($f[4]){ |
317
|
11
|
|
|
|
|
20
|
$ThreeToName{$f[0]} = $f[4]; |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
} |
320
|
15
|
50
|
|
|
|
26
|
if ($f[4]){ |
321
|
15
|
50
|
|
|
|
69
|
$NameToThree{lc($f[4])} = $f[0] unless (exists $NameToThree{$f[4]}); |
322
|
15
|
100
|
|
|
|
49
|
if ($f[2]){ |
323
|
13
|
50
|
|
|
|
49
|
$NameToTwo{lc($f[4])} = $f[2] unless (exists $NameToTwo{$f[4]}); |
324
|
|
|
|
|
|
|
} |
325
|
|
|
|
|
|
|
} |
326
|
15
|
100
|
|
|
|
29
|
if ($f[2]){ |
327
|
13
|
100
|
|
|
|
37
|
$TwoToThree{$f[2]} = $f[0] unless (exists $TwoToThree{$f[2]}); |
328
|
13
|
50
|
|
|
|
21
|
if ($f[4]){ |
329
|
13
|
100
|
|
|
|
49
|
$TwoToName{$f[2]} = $f[4] unless (exists $TwoToName{$f[2]}); |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
sub _read_main_code_table{ |
337
|
1
|
|
|
1
|
|
5
|
while (){ |
338
|
7868
|
|
|
|
|
10776
|
chomp; |
339
|
7868
|
100
|
|
|
|
11768
|
return unless ($_); |
340
|
7867
|
|
|
|
|
24320
|
my @f = split(/\t/); |
341
|
7867
|
50
|
|
|
|
12441
|
next unless ($f[0]); |
342
|
7867
|
|
|
|
|
19592
|
$ThreeToName{$f[0]} = $f[6]; |
343
|
7867
|
|
|
|
|
13858
|
$ThreeToThree{$f[0]} = $f[0]; |
344
|
1
|
|
|
1
|
|
8
|
$NameToThree{lc($f[6])} = $f[0]; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
17
|
|
|
7867
|
|
|
|
|
21360
|
|
345
|
7867
|
100
|
|
|
|
39825
|
if ($f[3]){ |
346
|
184
|
|
|
|
|
375
|
$ThreeToTwo{$f[0]} = $f[3]; |
347
|
184
|
|
|
|
|
457
|
$TwoToThree{$f[3]} = $f[0]; |
348
|
184
|
|
|
|
|
359
|
$TwoToName{$f[3]} = $f[6]; |
349
|
184
|
|
|
|
|
404
|
$NameToTwo{lc($f[6])} = $f[3]; |
350
|
|
|
|
|
|
|
} |
351
|
7867
|
100
|
|
|
|
11583
|
if ($f[1]){ |
352
|
420
|
|
|
|
|
657
|
$ThreeToThree{$f[1]} = $f[0]; |
353
|
420
|
|
|
|
|
643
|
$ThreeToName{$f[1]} = $f[6]; |
354
|
420
|
100
|
|
|
|
631
|
if ($f[3]){ |
355
|
183
|
|
|
|
|
293
|
$ThreeToTwo{$f[1]} = $f[3]; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
} |
358
|
7867
|
100
|
|
|
|
21493
|
if ($f[2]){ |
359
|
420
|
|
|
|
|
587
|
$ThreeToThree{$f[2]} = $f[0]; |
360
|
420
|
|
|
|
|
578
|
$ThreeToName{$f[2]} = $f[6]; |
361
|
420
|
100
|
|
|
|
947
|
if ($f[3]){ |
362
|
183
|
|
|
|
|
561
|
$ThreeToTwo{$f[2]} = $f[3]; |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
sub _read_iso639_5{ |
369
|
|
|
|
|
|
|
## collective languages from ISO639-2 |
370
|
|
|
|
|
|
|
# print STDERR "read collective language codes"; |
371
|
1
|
|
|
1
|
|
4
|
while (){ |
372
|
115
|
|
|
|
|
168
|
chomp; |
373
|
115
|
50
|
|
|
|
173
|
return unless ($_); |
374
|
|
|
|
|
|
|
## URI code English-name French-name |
375
|
115
|
|
|
|
|
366
|
my @f = split(/\t/); |
376
|
115
|
50
|
|
|
|
181
|
next unless ($f[0]); |
377
|
115
|
|
|
|
|
203
|
$ThreeToName{$f[1]} = $f[2]; |
378
|
115
|
|
|
|
|
432
|
$NameToThree{lc($f[2])} = $f[1]; |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
} |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=head1 AUTHOR |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
Joerg Tiedemann, C<< >> |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
=head1 BUGS |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
Please report any bugs or feature requests to C, or through |
390
|
|
|
|
|
|
|
the web interface at L. I will be notified, and then you'll |
391
|
|
|
|
|
|
|
automatically be notified of progress on your bug as I make changes. |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
=head1 SUPPORT |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
perldoc ISO::639::3 |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
You can also look for information at: |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=over 4 |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
L |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
L |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
=item * CPAN Ratings |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
L |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
=item * Search CPAN |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
L |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=back |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
The language codes are taken from SIL International L. Please, check the terms of use listed at L. The current version uses the UTF-8 tables distributed in C< iso-639-3_Code_Tables_20200130.zip> from that website. This module adds some non-standard codes that are not specified in the original tables to be compatible with some ad-hoc solutions in some resources and tools. |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
--------------------------------------------------------------------------- |
432
|
|
|
|
|
|
|
Copyright (c) 2020 Joerg Tiedemann |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy |
435
|
|
|
|
|
|
|
of this software and associated documentation files (the "Software"), to deal |
436
|
|
|
|
|
|
|
in the Software without restriction, including without limitation the rights |
437
|
|
|
|
|
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
438
|
|
|
|
|
|
|
copies of the Software, and to permit persons to whom the Software is |
439
|
|
|
|
|
|
|
furnished to do so, subject to the following conditions: |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
The above copyright notice and this permission notice shall be included in all |
442
|
|
|
|
|
|
|
copies or substantial portions of the Software. |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
445
|
|
|
|
|
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
446
|
|
|
|
|
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
447
|
|
|
|
|
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
448
|
|
|
|
|
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
449
|
|
|
|
|
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
450
|
|
|
|
|
|
|
SOFTWARE. |
451
|
|
|
|
|
|
|
--------------------------------------------------------------------------- |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
=cut |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
1; # End of ISO::639-3 |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
__DATA__ |