line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package WWW::Translate::Apertium; |
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
32504
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
114
|
|
4
|
2
|
|
|
2
|
|
14
|
use warnings; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
69
|
|
5
|
2
|
|
|
2
|
|
16
|
use Carp qw(carp); |
|
2
|
|
|
|
|
9
|
|
|
2
|
|
|
|
|
151
|
|
6
|
2
|
|
|
2
|
|
6841
|
use LWP::UserAgent; |
|
2
|
|
|
|
|
193398
|
|
|
2
|
|
|
|
|
297
|
|
7
|
2
|
|
|
2
|
|
24
|
use URI::Escape; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
174
|
|
8
|
2
|
|
|
2
|
|
2512
|
use HTML::Entities; |
|
2
|
|
|
|
|
19137
|
|
|
2
|
|
|
|
|
200
|
|
9
|
2
|
|
|
2
|
|
2084
|
use Encode; |
|
2
|
|
|
|
|
27904
|
|
|
2
|
|
|
|
|
198
|
|
10
|
2
|
|
|
2
|
|
2408
|
use utf8; |
|
2
|
|
|
|
|
25
|
|
|
2
|
|
|
|
|
11
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
our $VERSION = '0.16'; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
my %lang_pairs = ( |
17
|
|
|
|
|
|
|
'es-ca' => 'Spanish -> Catalan', # Default |
18
|
|
|
|
|
|
|
'ca-es' => 'Catalan -> Spanish', |
19
|
|
|
|
|
|
|
'es-gl' => 'Spanish -> Galician', |
20
|
|
|
|
|
|
|
'gl-es' => 'Galician -> Spanish', |
21
|
|
|
|
|
|
|
'es-pt' => 'Spanish -> Portuguese', |
22
|
|
|
|
|
|
|
'pt-es' => 'Portuguese -> Spanish', |
23
|
|
|
|
|
|
|
'ca-pt' => 'Catalan -> Portuguese', |
24
|
|
|
|
|
|
|
'pt-ca' => 'Portuguese -> Catalan', |
25
|
|
|
|
|
|
|
'gl-pt' => 'Galician -> Portuguese', |
26
|
|
|
|
|
|
|
'pt-gl' => 'Portuguese -> Galician', |
27
|
|
|
|
|
|
|
'es-pt_BR' => 'Spanish -> Brazilian Portuguese', |
28
|
|
|
|
|
|
|
'oc-ca' => 'Occitan -> Catalan', |
29
|
|
|
|
|
|
|
'ca-oc' => 'Catalan -> Occitan', |
30
|
|
|
|
|
|
|
'oc-es' => 'Occitan -> Spanish', |
31
|
|
|
|
|
|
|
'es-oc' => 'Spanish -> Occitan', |
32
|
|
|
|
|
|
|
'oc_aran-ca' => 'Aranese -> Catalan', |
33
|
|
|
|
|
|
|
'ca-oc_aran' => 'Catalan -> Aranese', |
34
|
|
|
|
|
|
|
'en-ca' => 'English -> Catalan', |
35
|
|
|
|
|
|
|
'ca-en' => 'Catalan -> English', |
36
|
|
|
|
|
|
|
'fr-ca' => 'French -> Catalan', |
37
|
|
|
|
|
|
|
'ca-fr' => 'Catalan -> French', |
38
|
|
|
|
|
|
|
'fr-es' => 'French -> Spanish', |
39
|
|
|
|
|
|
|
'es-fr' => 'Spanish -> French', |
40
|
|
|
|
|
|
|
'ca-eo' => 'Catalan -> Esperanto', |
41
|
|
|
|
|
|
|
'es-eo' => 'Spanish -> Esperanto', |
42
|
|
|
|
|
|
|
'en-eo' => 'English -> Esperanto', |
43
|
|
|
|
|
|
|
'eo-en' => 'Esperanto -> English', |
44
|
|
|
|
|
|
|
'ro-es' => 'Romanian -> Spanish', |
45
|
|
|
|
|
|
|
'es-en' => 'Spanish -> English', |
46
|
|
|
|
|
|
|
'en-es' => 'English -> Spanish', |
47
|
|
|
|
|
|
|
'cy-en' => 'Welsh -> English', |
48
|
|
|
|
|
|
|
'eu-es' => 'Basque -> Spanish', |
49
|
|
|
|
|
|
|
'en-gl' => 'English -> Galician', |
50
|
|
|
|
|
|
|
'gl-en' => 'Galician -> English', |
51
|
|
|
|
|
|
|
'br-fr' => 'Breton -> French', |
52
|
|
|
|
|
|
|
'nb-nn' => 'Norwegian Bokmål -> Norwegian Nynorsk', |
53
|
|
|
|
|
|
|
'nn-nb' => 'Norwegian Nynorsk -> Norwegian Bokmål', |
54
|
|
|
|
|
|
|
'sv-da' => 'Swedish-Danish', |
55
|
|
|
|
|
|
|
'es-ast' => 'Spanish-Asturian', |
56
|
|
|
|
|
|
|
'is-en' => 'Icelandic-English', |
57
|
|
|
|
|
|
|
'bg-mk' => 'Bulgarian-Macedonian', |
58
|
|
|
|
|
|
|
'mk-bg' => 'Macedonian-Bulgarian', |
59
|
|
|
|
|
|
|
); |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
my %output = ( |
62
|
|
|
|
|
|
|
plain_text => 'txtf', # default |
63
|
|
|
|
|
|
|
marked_text => 'txt', |
64
|
|
|
|
|
|
|
); |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
my %defaults = ( |
67
|
|
|
|
|
|
|
lang_pair => 'ca-es', |
68
|
|
|
|
|
|
|
output => 'plain_text', |
69
|
|
|
|
|
|
|
store_unknown => 0, |
70
|
|
|
|
|
|
|
); |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub new { |
74
|
2
|
|
|
2
|
1
|
829
|
my $class = shift; |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# validate overrides |
77
|
2
|
|
|
|
|
20
|
my %overrides = @_; |
78
|
2
|
|
|
|
|
8
|
foreach (keys %overrides) { |
79
|
|
|
|
|
|
|
# check key; warn if illegal |
80
|
2
|
50
|
|
|
|
43
|
carp "Unknown parameter: $_\n" unless exists $defaults{$_}; |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# check value; warn and delete if illegal |
83
|
2
|
50
|
66
|
|
|
92
|
if ($_ eq 'output' && !exists $output{$overrides{output}}) { |
84
|
0
|
|
|
|
|
0
|
carp _message('output', $overrides{output}); |
85
|
0
|
|
|
|
|
0
|
delete $overrides{output}; |
86
|
|
|
|
|
|
|
} |
87
|
2
|
50
|
66
|
|
|
15
|
if ($_ eq 'lang_pair' && !exists $lang_pairs{$overrides{lang_pair}}) { |
88
|
0
|
|
|
|
|
0
|
carp _message('lang_pair', $overrides{lang_pair}); |
89
|
0
|
|
|
|
|
0
|
delete $overrides{lang_pair}; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
# replace defaults with overrides |
94
|
2
|
|
|
|
|
13
|
my %args = (%defaults, %overrides); |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# remove invalid parameters |
97
|
2
|
|
|
|
|
9
|
my @fields = keys %defaults; |
98
|
2
|
|
|
|
|
4
|
my %this; |
99
|
2
|
|
|
|
|
9
|
@this{@fields} = @args{@fields}; |
100
|
|
|
|
|
|
|
|
101
|
2
|
50
|
|
|
|
11
|
if ($this{store_unknown}) { |
102
|
0
|
|
|
|
|
0
|
$this{unknown} = (); |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
|
106
|
2
|
|
|
|
|
34
|
$this{agent} = LWP::UserAgent->new( agent => 'apertium2perl' ); |
107
|
2
|
|
|
|
|
6654
|
$this{agent}->env_proxy(); |
108
|
2
|
|
|
|
|
6164
|
$this{url} = 'http://xixona.dlsi.ua.es/webservice/ws.php'; |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
|
111
|
2
|
|
|
|
|
33
|
return bless(\%this, $class); |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
sub translate { |
116
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
117
|
|
|
|
|
|
|
|
118
|
0
|
|
|
|
|
0
|
my $string; |
119
|
0
|
0
|
|
|
|
0
|
if (@_ > 0) { |
120
|
0
|
|
|
|
|
0
|
$string = shift; |
121
|
|
|
|
|
|
|
} else { |
122
|
0
|
|
|
|
|
0
|
carp "Nothing to translate\n"; |
123
|
0
|
|
|
|
|
0
|
return ''; |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
0
|
0
|
|
|
|
0
|
return '' if ($string eq ''); |
127
|
|
|
|
|
|
|
|
128
|
0
|
|
|
|
|
0
|
$string = _fix_source($string); |
129
|
0
|
|
|
|
|
0
|
$string = uri_escape_utf8($string); |
130
|
|
|
|
|
|
|
|
131
|
0
|
|
|
|
|
0
|
my $browser = $self->{agent}; |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
|
134
|
0
|
|
|
|
|
0
|
my $source_lang = substr($self->{lang_pair}, 0, 2); |
135
|
0
|
|
|
|
|
0
|
my $target_lang = substr($self->{lang_pair}, 3, 2); |
136
|
|
|
|
|
|
|
|
137
|
0
|
|
|
|
|
0
|
my $url = "$self->{url}?mode=$self->{lang_pair}&format=txt&text=$string"; |
138
|
|
|
|
|
|
|
|
139
|
0
|
0
|
|
|
|
0
|
if ($self->{output} eq 'marked_text') { |
140
|
0
|
|
|
|
|
0
|
$url .= "&mark=1"; |
141
|
|
|
|
|
|
|
} else { |
142
|
0
|
|
|
|
|
0
|
$url .= "&mark=0"; |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
|
145
|
0
|
|
|
|
|
0
|
my $response = $browser->get($url); |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
|
148
|
0
|
0
|
|
|
|
0
|
unless ($response->is_success) { |
149
|
0
|
|
|
|
|
0
|
carp $response->status_line; |
150
|
0
|
|
|
|
|
0
|
return undef; |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
|
154
|
0
|
0
|
|
|
|
0
|
if (!defined $response) { |
155
|
0
|
|
|
|
|
0
|
carp "Didn't receive a translation from the Apertium server.\n" . |
156
|
|
|
|
|
|
|
"Please check the length of the source text.\n"; |
157
|
0
|
|
|
|
|
0
|
return ''; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
0
|
|
|
|
|
0
|
my $translated = _fix_translated($response->{'_content'}); |
161
|
|
|
|
|
|
|
|
162
|
0
|
|
|
|
|
0
|
$translated = decode_utf8($translated); |
163
|
0
|
|
|
|
|
0
|
$translated = decode_entities($translated); |
164
|
|
|
|
|
|
|
|
165
|
0
|
0
|
|
|
|
0
|
if ($self->{output} eq 'marked_text') { |
166
|
|
|
|
|
|
|
|
167
|
0
|
0
|
|
|
|
0
|
if ($self->{store_unknown}) { |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# store unknown words |
170
|
0
|
0
|
|
|
|
0
|
if ($translated =~ /(?:^|\W)\*/) { |
171
|
|
|
|
|
|
|
|
172
|
0
|
|
|
|
|
0
|
while ($translated =~ /(?:^|\W)\*(\w+?)\b/g) { |
173
|
0
|
|
|
|
|
0
|
$self->{unknown}->{$source_lang}->{$1}++; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
|
179
|
0
|
|
|
|
|
0
|
return $translated; |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
sub from_into { |
183
|
53
|
|
|
53
|
1
|
946
|
my $self = shift; |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
|
186
|
53
|
100
|
|
|
|
128
|
if (@_) { |
187
|
26
|
|
|
|
|
35
|
my $pair = shift; |
188
|
26
|
50
|
|
|
|
208
|
if (!exists $lang_pairs{$pair}) { |
189
|
0
|
|
|
|
|
0
|
carp _message('lang_pair', $pair); |
190
|
0
|
|
|
|
|
0
|
$self->{lang_pair} = $defaults{'lang_pair'}; |
191
|
|
|
|
|
|
|
} else { |
192
|
26
|
|
|
|
|
77
|
$self->{lang_pair} = $pair; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
} else { |
195
|
27
|
|
|
|
|
135
|
return $self->{lang_pair}; |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
sub output_format { |
200
|
4
|
|
|
4
|
1
|
8
|
my $self = shift; |
201
|
|
|
|
|
|
|
|
202
|
4
|
100
|
|
|
|
14
|
if (@_) { |
203
|
1
|
|
|
|
|
5
|
my $format = shift; |
204
|
1
|
50
|
|
|
|
15
|
$self->{output} = $format if exists $output{$format}; |
205
|
|
|
|
|
|
|
} else { |
206
|
3
|
|
|
|
|
17
|
return $self->{output}; |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
sub get_unknown { |
211
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
212
|
|
|
|
|
|
|
|
213
|
0
|
0
|
0
|
|
|
|
if (@_ && $self->{store_unknown}) { |
214
|
0
|
|
|
|
|
|
my $lang_code = shift; |
215
|
0
|
0
|
|
|
|
|
if ($lang_code =~ /^(?:br|ca|cy|en|eo|es|eu|fr|gl|is|nb|nn|oc|oc_aran|pt|ro|sv|bg|mk)$/) { |
216
|
0
|
|
|
|
|
|
return $self->{unknown}->{$lang_code}; |
217
|
|
|
|
|
|
|
} else { |
218
|
0
|
|
|
|
|
|
carp "Invalid language code\n"; |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
} else { |
221
|
0
|
|
|
|
|
|
carp "I'm not configured to store unknown words\n"; |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
sub get_pairs { |
226
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
227
|
|
|
|
|
|
|
|
228
|
0
|
|
|
|
|
|
return %lang_pairs; |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
sub _message { |
232
|
0
|
|
|
0
|
|
|
my ($key, $value) = @_; |
233
|
|
|
|
|
|
|
|
234
|
0
|
|
|
|
|
|
my $string = "Invalid value for parameter $key, $value.\n" . |
235
|
|
|
|
|
|
|
"Will use the default value instead.\n"; |
236
|
|
|
|
|
|
|
|
237
|
0
|
|
|
|
|
|
return $string; |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
sub _fix_source { |
241
|
0
|
|
|
0
|
|
|
my ($string) = @_; |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
# fix geminated l; replace . by chr(183) = hex B7 |
244
|
0
|
|
|
|
|
|
$string =~ s/l\.l/l\xB7l/g; |
245
|
|
|
|
|
|
|
|
246
|
0
|
|
|
|
|
|
return $string; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
sub _fix_translated { |
250
|
0
|
|
|
0
|
|
|
my ($string) = @_; |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
# remove double spaces |
253
|
0
|
|
|
|
|
|
$string =~ s/(?<=\S)\s{2}(?=\S)/ /g; |
254
|
|
|
|
|
|
|
|
255
|
0
|
|
|
|
|
|
return $string; |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
1; |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
__END__ |