line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/usr/bin/perl -w |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# Copyright (c) 2002, Sam Vilain. All rights reserved. This program |
4
|
|
|
|
|
|
|
# is free software; you may use it under the same terms as Perl |
5
|
|
|
|
|
|
|
# itself. |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# Portions taken from WWW::Babelfish, by Daniel J. Urist |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
package Lingua::Translate::Babelfish; |
11
|
|
|
|
|
|
|
|
12
|
3
|
|
|
3
|
|
916
|
use strict; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
111
|
|
13
|
3
|
|
|
3
|
|
15
|
use Carp; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
223
|
|
14
|
3
|
|
|
3
|
|
3152
|
use LWP::UserAgent; |
|
3
|
|
|
|
|
160777
|
|
|
3
|
|
|
|
|
110
|
|
15
|
3
|
|
|
3
|
|
2905
|
use HTTP::Request::Common qw(GET POST); |
|
3
|
|
|
|
|
7397
|
|
|
3
|
|
|
|
|
335
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# package globals: |
18
|
|
|
|
|
|
|
# %config is default values to use for new objects |
19
|
|
|
|
|
|
|
# %valid_langs is a hash from a babelfish URI to a hash of "XX_YY" |
20
|
|
|
|
|
|
|
# language pair tags to true values. |
21
|
3
|
|
|
3
|
|
26
|
use vars qw($VERSION %config %valid_langs); |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
264
|
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# WARNING: Some constants have their default values extracted from the |
24
|
|
|
|
|
|
|
# POD. See the bottom of the file for the code that does this. |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=head1 NAME |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
Lingua::Translate::Babelfish - Translation back-end for Altavista's |
29
|
|
|
|
|
|
|
Babelfish, version 0.01 |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 SYNOPSIS |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
use Lingua::Translate; |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
Lingua::Translate::config |
36
|
|
|
|
|
|
|
( |
37
|
|
|
|
|
|
|
backend => "Babelfish", |
38
|
|
|
|
|
|
|
babelfish_uri => |
39
|
|
|
|
|
|
|
'http://babelfish.yahoo.com/translate_txt', |
40
|
|
|
|
|
|
|
ua => LWP::UserAgent->new(), |
41
|
|
|
|
|
|
|
); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
my $xl8r = Lingua::Translate->new(src => "de", dest => "en"); |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
# prints "My hovercraft is full of eels" |
46
|
|
|
|
|
|
|
print $xl8r->translate("Mein Luftkissenfahrzeug ist voll von den Aalen"); |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head1 DESCRIPTION |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
Lingua::Translate::Babelfish is a translation back-end for |
51
|
|
|
|
|
|
|
Lingua::Translate that contacts babelfish.altavisa.com to do the real |
52
|
|
|
|
|
|
|
work. |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
It is normally invoked by Lingua::Translate; there should be no need |
55
|
|
|
|
|
|
|
to call it directly. If you do call it directly, you will lose the |
56
|
|
|
|
|
|
|
ability to easily switch your programs over to alternate back-ends |
57
|
|
|
|
|
|
|
that are later produced. |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=head2 new(src => $lang, dest => lang, option => $value) |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
Creates a new translation handle. This method contacts Babelfish to |
64
|
|
|
|
|
|
|
determine whether the requested language pair is available. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=over |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=item src |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
Source language, in RFC-3066 form. See L for a |
71
|
|
|
|
|
|
|
discussion of RFC-3066 language tags. |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=item dest |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
Destination Language |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=item ua |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=back |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
Other options that may be passed to the config() function (see below) |
82
|
|
|
|
|
|
|
may also be passed as arguments to this constructor. |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=cut |
85
|
|
|
|
|
|
|
|
86
|
3
|
|
|
3
|
|
1272
|
use I18N::LangTags qw(is_language_tag); |
|
3
|
|
|
|
|
2834
|
|
|
3
|
|
|
|
|
1916
|
|
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
sub new { |
89
|
2
|
|
|
2
|
1
|
496
|
my ($class, %options) = (@_); |
90
|
|
|
|
|
|
|
|
91
|
2
|
|
|
|
|
17
|
my $self = bless { %config }, $class; |
92
|
|
|
|
|
|
|
|
93
|
2
|
50
|
33
|
|
|
21
|
croak "Must supply source and destination language" |
94
|
|
|
|
|
|
|
unless (defined $options{src} and defined $options{dest}); |
95
|
|
|
|
|
|
|
|
96
|
2
|
50
|
|
|
|
23
|
is_language_tag($self->{src} = delete $options{src}) |
97
|
|
|
|
|
|
|
or croak "$self->{src} is not a valid RFC3066 language tag"; |
98
|
|
|
|
|
|
|
|
99
|
2
|
50
|
|
|
|
35
|
is_language_tag($self->{dest} = delete $options{dest}) |
100
|
|
|
|
|
|
|
or croak "$self->{dest} is not a valid RFC3066 language tag"; |
101
|
|
|
|
|
|
|
|
102
|
2
|
|
|
|
|
28
|
$self->config(%options); |
103
|
|
|
|
|
|
|
|
104
|
2
|
|
|
|
|
7
|
return $self; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=head1 METHODS |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
The following methods may be called on Lingua::Translate::Babelfish |
110
|
|
|
|
|
|
|
objects. |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=head2 translate($text) : $translated |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
Translates the given text. die's on any kind of error. |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
If too large a block of text is given to translate, it is broken up to |
117
|
|
|
|
|
|
|
the nearest sentence, which may fail if you have extremely long |
118
|
|
|
|
|
|
|
sentences that wouldn't normally be found in normal language, unless |
119
|
|
|
|
|
|
|
you were either sending in some text that has no punctuation at all in |
120
|
|
|
|
|
|
|
it or for some reason some person was rambling on and on about totally |
121
|
|
|
|
|
|
|
irrelevant things such as cheese, one of the finest foods produced by |
122
|
|
|
|
|
|
|
mankind, or simply was the sort of person who don't like ending |
123
|
|
|
|
|
|
|
sentences but instead merely keep going with one big sentence with the |
124
|
|
|
|
|
|
|
hope that it keeps readers going, though its usual effect is merely to |
125
|
|
|
|
|
|
|
confuse. |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
The previous paragraph gets translated by Babelfish OK. |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=cut |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
sub translate { |
133
|
4
|
|
|
4
|
1
|
339
|
my $self = shift; |
134
|
4
|
50
|
|
|
|
27
|
UNIVERSAL::isa($self, __PACKAGE__) |
135
|
|
|
|
|
|
|
or croak __PACKAGE__."::translate() called as function"; |
136
|
|
|
|
|
|
|
|
137
|
4
|
|
|
|
|
8
|
my $text = shift; |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
# chunkify the text. Knowing how to do this properly is really |
140
|
|
|
|
|
|
|
# the job of an expert system, so I'll keep it simple and break on |
141
|
|
|
|
|
|
|
# English sentence terminators (which might be completely the |
142
|
|
|
|
|
|
|
# wrong thing to do for other languages. Oh well) |
143
|
4
|
|
|
|
|
129
|
my @chunks = ($text =~ m/\G\s* # strip excess white space |
144
|
|
|
|
|
|
|
( |
145
|
|
|
|
|
|
|
# some non-whitespace, then some data |
146
|
|
|
|
|
|
|
\S.{0,$self->{chunk_size}} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
# either a full stop or the end of |
149
|
|
|
|
|
|
|
# string |
150
|
|
|
|
|
|
|
(?:[\.;:!\?]|$) |
151
|
|
|
|
|
|
|
) |
152
|
|
|
|
|
|
|
/xsg); |
153
|
4
|
50
|
33
|
|
|
25
|
die "Could not break up given text into chunks" |
154
|
|
|
|
|
|
|
if (pos($text) and pos($text) < length($text)); |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
# the translated text |
157
|
4
|
|
|
|
|
8
|
my ($translated, $error); |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
CHUNK: |
160
|
4
|
|
|
|
|
9
|
for my $chunk ( @chunks ) { |
161
|
|
|
|
|
|
|
# make a new request object |
162
|
4
|
|
|
|
|
48
|
my $req = POST ($self->{babelfish_uri}, |
163
|
|
|
|
|
|
|
[ |
164
|
|
|
|
|
|
|
'doit' => 'done', |
165
|
|
|
|
|
|
|
'intl' => '1', |
166
|
|
|
|
|
|
|
'tt' => 'urltext', |
167
|
|
|
|
|
|
|
'trtext' => $chunk, |
168
|
4
|
|
|
|
|
12
|
'lp' => join("_", @{$self}{qw(src dest)}), |
169
|
|
|
|
|
|
|
'Submit' => 'Translate', |
170
|
|
|
|
|
|
|
'ei' => 'UTF-8', |
171
|
|
|
|
|
|
|
'fr' => 'bf-res', |
172
|
|
|
|
|
|
|
]); |
173
|
|
|
|
|
|
|
|
174
|
4
|
|
|
|
|
20936
|
$req->header("Accept-Charset", "utf-8"); |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
RETRY: |
177
|
|
|
|
|
|
|
# try several times to reach babelfish |
178
|
4
|
|
|
|
|
186
|
for my $attempt ( 1 .. $self->{retries} + 1 ) { |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
# go go gadget LWP::UserAgent |
181
|
12
|
|
|
|
|
220
|
my $res = $self->agent->request($req); |
182
|
|
|
|
|
|
|
|
183
|
12
|
50
|
|
|
|
2812324
|
if( $res->is_success ){ |
184
|
|
|
|
|
|
|
|
185
|
0
|
|
|
|
|
0
|
my $output = $self->_extract_text($res->as_string, $res->header("Content-Type")); |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
# Reject outputs that are too useless. |
188
|
0
|
0
|
|
|
|
0
|
next RETRY if $output =~ /^\*\*time-out\*\*|^ $/; |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
# babelfish errors (will these always be reported in |
191
|
|
|
|
|
|
|
# English?) |
192
|
0
|
0
|
|
|
|
0
|
next RETRY if $output =~ /We were unable to process your? request within the available time/; |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
# Babelfish likes to append newlines |
195
|
0
|
|
|
|
|
0
|
$output =~ s/\n$//; |
196
|
|
|
|
|
|
|
|
197
|
0
|
|
|
|
|
0
|
$translated .= $output; |
198
|
|
|
|
|
|
|
|
199
|
0
|
|
|
|
|
0
|
next CHUNK; |
200
|
|
|
|
|
|
|
} else { |
201
|
12
|
|
|
|
|
210
|
$error .= "Request $attempt:".$res->status_line."; "; |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
# give up |
206
|
4
|
|
|
|
|
373
|
die "Request timed out more than $self->{retries} times ($error)"; |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
} # for my $chunk ... |
209
|
|
|
|
|
|
|
|
210
|
0
|
|
|
|
|
0
|
return $translated; |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
|
214
|
3
|
|
|
3
|
|
1132
|
use Unicode::MapUTF8 qw(to_utf8); |
|
3
|
|
|
|
|
353944
|
|
|
3
|
|
|
|
|
3656
|
|
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
# Extract the text from the html we get back from babelfish and return |
217
|
|
|
|
|
|
|
# it. It seems that SysTrans are really trying to make this hard to |
218
|
|
|
|
|
|
|
# screen scrape, oh well. |
219
|
|
|
|
|
|
|
sub _extract_text { |
220
|
0
|
|
|
0
|
|
0
|
my($self, $html, $contenttype) = @_; |
221
|
|
|
|
|
|
|
|
222
|
0
|
0
|
|
|
|
0
|
my ($translated) = |
223
|
|
|
|
|
|
|
($html =~ m{ |
|
|
|
|
|
|
(?: )? |
225
|
|
|
|
|
|
|
([^<]*)}xs) |
226
|
|
|
|
|
|
|
or die "Babelfish response unparsable, brain needed"; |
227
|
|
|
|
|
|
|
|
228
|
0
|
|
|
|
|
0
|
my ($encoding) = ($contenttype =~ m/charset=(\S*)/); |
229
|
|
|
|
|
|
|
|
230
|
0
|
0
|
|
|
|
0
|
if ( $encoding =~ /^utf-?8$/i ) { |
231
|
0
|
|
|
|
|
0
|
return $translated |
232
|
|
|
|
|
|
|
} else { |
233
|
0
|
|
|
|
|
0
|
return to_utf8({ -string => $translated, -charset => $encoding }); |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
=head2 available() : @list |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
Returns a list of available language pairs, in the form of "XX_YY", |
240
|
|
|
|
|
|
|
where XX is the source language and YY is the destination. If you |
241
|
|
|
|
|
|
|
want the english name of a language tag, call |
242
|
|
|
|
|
|
|
I18N::LangTags::List::name() on it. See L. |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
=cut |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
sub available { |
247
|
|
|
|
|
|
|
|
248
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
249
|
0
|
0
|
|
|
|
0
|
UNIVERSAL::isa($self, __PACKAGE__) |
250
|
|
|
|
|
|
|
or croak __PACKAGE__."::available() called as function"; |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
# return a cached result |
253
|
0
|
0
|
|
|
|
0
|
if ( my $ok_langs = $valid_langs{$self->{babelfish_uri}} ) { |
254
|
0
|
|
|
|
|
0
|
return keys %$ok_langs; |
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
# create a new request |
258
|
0
|
|
|
|
|
0
|
my $req = GET $self->{babelfish_uri}; |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
# go get it |
261
|
0
|
|
|
|
|
0
|
my $res = $self->agent->request($req); |
262
|
0
|
0
|
|
|
|
0
|
die "Babelfish fetch failed; ".$res->status_line |
263
|
|
|
|
|
|
|
unless $res->is_success; |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
# extract out the languages |
266
|
0
|
|
|
|
|
0
|
my $page = $res->content; |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
# OK, so this only works for languages with two letter language |
269
|
|
|
|
|
|
|
# codes. But can YOU see babelfish supporting any language that |
270
|
|
|
|
|
|
|
# don't have a two letter code in the near future? |
271
|
0
|
|
|
|
|
0
|
my @list; |
272
|
0
|
|
|
|
|
0
|
for my $pair ($page =~ m/option value="([a-z][a-z]_[a-z][a-z])"/g) { |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
# check that the pair is really a language |
275
|
0
|
|
|
|
|
0
|
my ($src, $dest) = split /_/, $pair; |
276
|
0
|
0
|
0
|
|
|
0
|
unless ( is_language_tag($src) and is_language_tag($dest) ) { |
277
|
0
|
|
|
|
|
0
|
warn "Don't recognise `$pair' as a valid language pair"; |
278
|
0
|
|
|
|
|
0
|
next; |
279
|
|
|
|
|
|
|
}; |
280
|
|
|
|
|
|
|
|
281
|
0
|
|
|
|
|
0
|
push @list, $pair; |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
# save the result |
285
|
0
|
|
|
|
|
0
|
$valid_langs{$self->{babelfish_uri}} = map { $_ => 1 } @list; |
|
0
|
|
|
|
|
0
|
|
286
|
|
|
|
|
|
|
|
287
|
0
|
|
|
|
|
0
|
return @list; |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=head2 agent() : LWP::UserAgent |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
Returns the LWP::UserAgent object used to contact Babelfish. |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
=cut |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
sub agent { |
297
|
|
|
|
|
|
|
|
298
|
12
|
|
|
12
|
1
|
22
|
my $self; |
299
|
12
|
50
|
|
|
|
81
|
if ( UNIVERSAL::isa($_[0], __PACKAGE__) ) { |
300
|
12
|
|
|
|
|
22
|
$self = shift; |
301
|
|
|
|
|
|
|
} else { |
302
|
0
|
|
|
|
|
0
|
$self = \%config; |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
|
305
|
12
|
100
|
|
|
|
59
|
unless ( $self->{ua} ) { |
306
|
2
|
|
|
|
|
25
|
$self->{ua} = LWP::UserAgent->new(); |
307
|
2
|
|
|
|
|
6758
|
$self->{ua}->agent($self->{agent}); |
308
|
2
|
|
|
|
|
161
|
$self->{ua}->env_proxy(); |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
|
311
|
12
|
|
|
|
|
11532
|
$self->{ua}; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
=head1 CONFIGURATION FUNCTIONS |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
The following are functions, and not method calls. You will probably |
318
|
|
|
|
|
|
|
not need them with normal use of the module. |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=head2 config(option => $value) |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
This function sets defaults for use when constructing objects. |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
=cut |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
sub config { |
327
|
|
|
|
|
|
|
|
328
|
14
|
|
|
14
|
1
|
16
|
my $self; |
329
|
14
|
100
|
|
|
|
94
|
if ( UNIVERSAL::isa($_[0], __PACKAGE__) ) { |
330
|
2
|
|
|
|
|
7
|
$self = shift; |
331
|
|
|
|
|
|
|
} else { |
332
|
12
|
|
|
|
|
22
|
$self = \%config; |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
14
|
|
|
|
|
61
|
while ( my ($option, $value) = splice @_, 0, 2 ) { |
336
|
|
|
|
|
|
|
|
337
|
12
|
100
|
|
|
|
64
|
if ( $option eq "babelfish_uri" ) { |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
# set the Babelfish URI |
340
|
3
|
50
|
|
|
|
39
|
($self->{babelfish_uri} = $value) =~ m/\?(.*&)?$/ |
341
|
|
|
|
|
|
|
or croak "Babelfish URI `$value' not a query URI"; |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
} elsif ( $option eq "ua" ) { |
344
|
0
|
|
|
|
|
0
|
$self->{ua} = $value; |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
} elsif ( $option eq "agent" ) { |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
# set the user-agent |
349
|
3
|
|
|
|
|
8
|
$self->{agent} = $value; |
350
|
3
|
50
|
|
|
|
27
|
$self->{ua}->agent($value) if $self->{ua}; |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
} elsif ( $option eq "chunk_size" ) { |
353
|
|
|
|
|
|
|
|
354
|
3
|
|
|
|
|
19
|
$self->{chunk_size} = $value; |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
} elsif ( $option eq "retries" ) { |
357
|
|
|
|
|
|
|
|
358
|
3
|
|
|
|
|
23
|
$self->{retries} = $value; |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
} else { |
361
|
|
|
|
|
|
|
|
362
|
0
|
|
|
|
|
|
croak "Unknown configuration option $option"; |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
=over |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=item babelfish_uri |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
The uri to use when contacting Babelfish. |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
The default value is |
374
|
|
|
|
|
|
|
"http://babelfish.yahoo.com/translate_txt?" |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
=item agent |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
The User-Agent to pretend to be when contacting Babelfish. |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
The default value is "Lingua::Translate::Babelfish/", plus the version |
381
|
|
|
|
|
|
|
number of the package. |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=item chunk_size |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
The size to break chunks into before handing them off to Babelfish. |
386
|
|
|
|
|
|
|
The default value is "1000" (bytes). |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=item retries |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
The number of times to retry contacting Babelfish if the first attempt |
391
|
|
|
|
|
|
|
fails. The default value is "2". |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=back |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
=cut |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
# extract configuration options from the POD |
398
|
|
|
|
|
|
|
use Pod::Constants |
399
|
3
|
|
|
|
|
2310
|
'NAME' => sub { ($VERSION) = (m/(\d+\.\d+)/); }, |
400
|
|
|
|
|
|
|
'CONFIGURATION FUNCTIONS' => sub { |
401
|
|
|
|
|
|
|
Pod::Constants::add_hook |
402
|
|
|
|
|
|
|
('*item' => sub { |
403
|
12
|
|
|
|
|
2998
|
my ($varname) = m/(\w+)/; |
404
|
|
|
|
|
|
|
#my ($default) = m/The default value is\s+"(.*)"\./s; |
405
|
12
|
|
|
|
|
56
|
my ($default) = m/The default value is\s+"(.*)"/s; |
406
|
12
|
|
|
|
|
34
|
config($varname => $default); |
407
|
|
|
|
|
|
|
} |
408
|
3
|
|
|
|
|
9557
|
); |
409
|
|
|
|
|
|
|
Pod::Constants::add_hook |
410
|
|
|
|
|
|
|
( |
411
|
|
|
|
|
|
|
'*back' => sub { |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
# an ugly hack? |
414
|
3
|
|
|
|
|
729
|
$config{agent} .= $VERSION; |
415
|
|
|
|
|
|
|
|
416
|
3
|
|
|
|
|
14
|
Pod::Constants::delete_hook('*item'); |
417
|
3
|
|
|
|
|
54
|
Pod::Constants::delete_hook('*back'); |
418
|
|
|
|
|
|
|
} |
419
|
3
|
|
|
|
|
103
|
); |
420
|
3
|
|
|
3
|
|
1264
|
}; |
|
3
|
|
|
|
|
28780
|
|
|
3
|
|
|
|
|
51
|
|
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
1; |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
__END__ |