blib/lib/App/Greple/xlate.pm | |||
---|---|---|---|
Criterion | Covered | Total | % |
statement | 26 | 127 | 20.4 |
branch | 0 | 70 | 0.0 |
condition | 0 | 14 | 0.0 |
subroutine | 9 | 24 | 37.5 |
pod | 0 | 15 | 0.0 |
total | 35 | 250 | 14.0 |
line | stmt | bran | cond | sub | pod | time | code |
---|---|---|---|---|---|---|---|
1 | package App::Greple::xlate; | ||||||
2 | |||||||
3 | our $VERSION = "0.23"; | ||||||
4 | |||||||
5 | =encoding utf-8 | ||||||
6 | |||||||
7 | =head1 NAME | ||||||
8 | |||||||
9 | App::Greple::xlate - translation support module for greple | ||||||
10 | |||||||
11 | =head1 SYNOPSIS | ||||||
12 | |||||||
13 | greple -Mxlate::deepl --xlate pattern target-file | ||||||
14 | |||||||
15 | =head1 VERSION | ||||||
16 | |||||||
17 | Version 0.23 | ||||||
18 | |||||||
19 | =head1 DESCRIPTION | ||||||
20 | |||||||
21 | B |
||||||
22 | translated text. Currently only DeepL service is supported by the | ||||||
23 | B |
||||||
24 | |||||||
25 | If you want to translate normal text block in L |
||||||
26 | use B |
||||||
27 | this: | ||||||
28 | |||||||
29 | greple -Mxlate::deepl -Mperl --pod --re '^(\w.*\n)+' --all foo.pm | ||||||
30 | |||||||
31 | Pattern C<^(\w.*\n)+> means consecutive lines starting with | ||||||
32 | alpha-numeric letter. This command show the area to be translated. | ||||||
33 | Option B<--all> is used to produce entire text. | ||||||
34 | |||||||
35 | =for html
|
||||||
36 | |||||||
37 | |||||||
38 | |||||||
39 | Then add C<--xlate> option to translate the selected area. It will | ||||||
40 | find and replace them by the B |
||||||
41 | |||||||
42 | By default, original and translated text is printed in the "conflict | ||||||
43 | marker" format compatible with L |
||||||
44 | can get desired part by L |
||||||
45 | specified by B<--xlate-format> option. | ||||||
46 | |||||||
47 | =for html
|
||||||
48 | |||||||
49 | |||||||
50 | |||||||
51 | If you want to translate entire text, use B<--match-all> option. | ||||||
52 | This is a short-cut to specify the pattern matches entire text | ||||||
53 | C<(?s).+>. | ||||||
54 | |||||||
55 | =head1 OPTIONS | ||||||
56 | |||||||
57 | =over 7 | ||||||
58 | |||||||
59 | =item B<--xlate> | ||||||
60 | |||||||
61 | =item B<--xlate-color> | ||||||
62 | |||||||
63 | =item B<--xlate-fold> | ||||||
64 | |||||||
65 | =item B<--xlate-fold-width>=I |
||||||
66 | |||||||
67 | Invoke the translation process for each matched area. | ||||||
68 | |||||||
69 | Without this option, B |
||||||
70 | you can check which part of the file will be subject of the | ||||||
71 | translation before invoking actual work. | ||||||
72 | |||||||
73 | Command result goes to standard out, so redirect to file if necessary, | ||||||
74 | or consider to use L |
||||||
75 | |||||||
76 | Option B<--xlate> calls B<--xlate-color> option with B<--color=never> | ||||||
77 | option. | ||||||
78 | |||||||
79 | With B<--xlate-fold> option, converted text is folded by the specified | ||||||
80 | width. Default width is 70 and can be set by B<--xlate-fold-width> | ||||||
81 | option. Four columns are reserved for run-in operation, so each line | ||||||
82 | could hold 74 characters at most. | ||||||
83 | |||||||
84 | =item B<--xlate-engine>=I |
||||||
85 | |||||||
86 | Specify the translation engine to be used. You don't have to use this | ||||||
87 | option because module C |
||||||
88 | C<--xlate-engine=deepl>. | ||||||
89 | |||||||
90 | =item B<--xlate-labor> | ||||||
91 | |||||||
92 | =item B<--xlabor> | ||||||
93 | |||||||
94 | Insted of calling translation engine, you are expected to work for. | ||||||
95 | After preparing text to be translated, they are copied to the | ||||||
96 | clipboard. You are expected to paste them to the form, copy the | ||||||
97 | result to the clipboard, and hit return. | ||||||
98 | |||||||
99 | =item B<--xlate-to> (Default: C |
||||||
100 | |||||||
101 | Specify the target language. You can get available languages by | ||||||
102 | C |
||||||
103 | |||||||
104 | =item B<--xlate-format>=I |
||||||
105 | |||||||
106 | Specify the output format for original and translated text. | ||||||
107 | |||||||
108 | =over 4 | ||||||
109 | |||||||
110 | =item B |
||||||
111 | |||||||
112 | Print original and translated text in L |
||||||
113 | |||||||
114 | <<<<<<< ORIGINAL | ||||||
115 | original text | ||||||
116 | ======= | ||||||
117 | translated Japanese text | ||||||
118 | >>>>>>> JA | ||||||
119 | |||||||
120 | You can recover the original file by next L |
||||||
121 | |||||||
122 | sed -e '/^<<<<<<< /d' -e '/^=======$/,/^>>>>>>> /d' | ||||||
123 | |||||||
124 | =item B |
||||||
125 | |||||||
126 | Print original and translated text in L |
||||||
127 | |||||||
128 | #ifdef ORIGINAL | ||||||
129 | original text | ||||||
130 | #endif | ||||||
131 | #ifdef JA | ||||||
132 | translated Japanese text | ||||||
133 | #endif | ||||||
134 | |||||||
135 | You can retrieve only Japanese text by the B |
||||||
136 | |||||||
137 | unifdef -UORIGINAL -DJA foo.ja.pm | ||||||
138 | |||||||
139 | =item B |
||||||
140 | |||||||
141 | Print original and translated text separated by single blank line. | ||||||
142 | |||||||
143 | =item B |
||||||
144 | |||||||
145 | If the format is C |
||||||
146 | text is printed. | ||||||
147 | |||||||
148 | =back | ||||||
149 | |||||||
150 | =item B<--xlate-maxlen>=I |
||||||
151 | |||||||
152 | Specify the maximum length of text to be sent to the API at once. | ||||||
153 | Default value is set as for free account service: 128K for the API | ||||||
154 | (B<--xlate>) and 5000 for the clipboard interface (B<--xlate-labor>). | ||||||
155 | You may be able to change these value if you are using Pro service. | ||||||
156 | |||||||
157 | =item B<-->[B |
||||||
158 | |||||||
159 | See the tranlsation result in real time in the STDERR output. | ||||||
160 | |||||||
161 | =item B<--match-all> | ||||||
162 | |||||||
163 | Set the whole text of the file as a target area. | ||||||
164 | |||||||
165 | =back | ||||||
166 | |||||||
167 | =head1 CACHE OPTIONS | ||||||
168 | |||||||
169 | B |
||||||
170 | read it before execution to eliminate the overhead of asking to | ||||||
171 | server. With the default cache strategy C |
||||||
172 | data only when the cache file exists for target file. | ||||||
173 | |||||||
174 | =over 7 | ||||||
175 | |||||||
176 | =item --cache-clear | ||||||
177 | |||||||
178 | The B<--cache-clear> option can be used to initiate cache management | ||||||
179 | or to refresh all existing cache data. Once executed with this option, | ||||||
180 | a new cache file will be created if one does not exist and then | ||||||
181 | automatically maintained afterward. | ||||||
182 | |||||||
183 | =item --xlate-cache=I |
||||||
184 | |||||||
185 | =over 4 | ||||||
186 | |||||||
187 | =item C |
||||||
188 | |||||||
189 | Maintain the cache file if it exists. | ||||||
190 | |||||||
191 | =item C |
||||||
192 | |||||||
193 | Create empty cache file and exit. | ||||||
194 | |||||||
195 | =item C |
||||||
196 | |||||||
197 | Maintain cache anyway as far as the target is normal file. | ||||||
198 | |||||||
199 | =item C |
||||||
200 | |||||||
201 | Clear the cache data first. | ||||||
202 | |||||||
203 | =item C |
||||||
204 | |||||||
205 | Never use cache file even if it exists. | ||||||
206 | |||||||
207 | =item C |
||||||
208 | |||||||
209 | By default behavior, unused data is removed from the cache file. If | ||||||
210 | you don't want to remove them and keep in the file, use C |
||||||
211 | |||||||
212 | =back | ||||||
213 | |||||||
214 | =back | ||||||
215 | |||||||
216 | =head1 COMMAND LINE INTERFACE | ||||||
217 | |||||||
218 | You can easily use this module from the command line by using the | ||||||
219 | C |
||||||
220 | information for usage. | ||||||
221 | |||||||
222 | =head1 EMACS | ||||||
223 | |||||||
224 | Load the F |
||||||
225 | command from Emacs editor. C |
||||||
226 | given region. Default language is C |
||||||
227 | language invoking it with prefix argument. | ||||||
228 | |||||||
229 | =head1 ENVIRONMENT | ||||||
230 | |||||||
231 | =over 7 | ||||||
232 | |||||||
233 | =item DEEPL_AUTH_KEY | ||||||
234 | |||||||
235 | Set your authentication key for DeepL service. | ||||||
236 | |||||||
237 | =back | ||||||
238 | |||||||
239 | =head1 INSTALL | ||||||
240 | |||||||
241 | =head2 CPANMINUS | ||||||
242 | |||||||
243 | $ cpanm App::Greple::xlate | ||||||
244 | |||||||
245 | =head1 SEE ALSO | ||||||
246 | |||||||
247 | L |
||||||
248 | |||||||
249 | =over 7 | ||||||
250 | |||||||
251 | =item L |
||||||
252 | |||||||
253 | DeepL Python library and CLI command. | ||||||
254 | |||||||
255 | =item L |
||||||
256 | |||||||
257 | See the B |
||||||
258 | Use B<--inside>, B<--outside>, B<--include>, B<--exclude> options to | ||||||
259 | limit the matching area. | ||||||
260 | |||||||
261 | =item L |
||||||
262 | |||||||
263 | You can use C<-Mupdate> module to modify files by the result of | ||||||
264 | B |
||||||
265 | |||||||
266 | =item L |
||||||
267 | |||||||
268 | Use B |
||||||
269 | option. | ||||||
270 | |||||||
271 | =back | ||||||
272 | |||||||
273 | =head1 AUTHOR | ||||||
274 | |||||||
275 | Kazumasa Utashiro | ||||||
276 | |||||||
277 | =head1 LICENSE | ||||||
278 | |||||||
279 | Copyright © 2023 Kazumasa Utashiro. | ||||||
280 | |||||||
281 | This library is free software; you can redistribute it and/or modify | ||||||
282 | it under the same terms as Perl itself. | ||||||
283 | |||||||
284 | =cut | ||||||
285 | |||||||
286 | 1 | 1 | 885 | use v5.14; | |||
1 | 4 | ||||||
287 | 1 | 1 | 6 | use warnings; | |||
1 | 1 | ||||||
1 | 45 | ||||||
288 | |||||||
289 | 1 | 1 | 649 | use Data::Dumper; | |||
1 | 6937 | ||||||
1 | 63 | ||||||
290 | |||||||
291 | 1 | 1 | 708 | use JSON; | |||
1 | 8444 | ||||||
1 | 7 | ||||||
292 | 1 | 1 | 666 | use Text::ANSI::Fold ':constants'; | |||
1 | 63459 | ||||||
1 | 166 | ||||||
293 | 1 | 1 | 428 | use App::cdif::Command; | |||
1 | 16939 | ||||||
1 | 44 | ||||||
294 | 1 | 1 | 542 | use Hash::Util qw(lock_keys); | |||
1 | 2757 | ||||||
1 | 7 | ||||||
295 | 1 | 1 | 86 | use Unicode::EastAsianWidth; | |||
1 | 3 | ||||||
1 | 662 | ||||||
296 | |||||||
297 | our %opt = ( | ||||||
298 | engine => \(our $xlate_engine), | ||||||
299 | progress => \(our $show_progress = 1), | ||||||
300 | format => \(our $output_format = 'conflict'), | ||||||
301 | collapse => \(our $collapse_spaces = 1), | ||||||
302 | from => \(our $lang_from = 'ORIGINAL'), | ||||||
303 | to => \(our $lang_to = 'EN-US'), | ||||||
304 | fold => \(our $fold_line = 0), | ||||||
305 | width => \(our $fold_width = 70), | ||||||
306 | auth_key => \(our $auth_key), | ||||||
307 | method => \(our $cache_method //= $ENV{GREPLE_XLATE_CACHE} || 'auto'), | ||||||
308 | dryrun => \(our $dryrun = 0), | ||||||
309 | maxlen => \(our $max_length = 0), | ||||||
310 | ); | ||||||
311 | lock_keys %opt; | ||||||
312 | 0 | 0 | 0 | sub opt :lvalue { ${$opt{+shift}} } | |||
0 | |||||||
313 | |||||||
314 | my $current_file; | ||||||
315 | |||||||
316 | our %formatter = ( | ||||||
317 | xtxt => undef, | ||||||
318 | none => undef, | ||||||
319 | conflict => sub { | ||||||
320 | join '', | ||||||
321 | "<<<<<<< $lang_from\n", | ||||||
322 | $_[0], | ||||||
323 | "=======\n", | ||||||
324 | $_[1], | ||||||
325 | ">>>>>>> $lang_to\n"; | ||||||
326 | }, | ||||||
327 | cm => 'conflict', | ||||||
328 | ifdef => sub { | ||||||
329 | join '', | ||||||
330 | "#ifdef $lang_from\n", | ||||||
331 | $_[0], | ||||||
332 | "#endif\n", | ||||||
333 | "#ifdef $lang_to\n", | ||||||
334 | $_[1], | ||||||
335 | "#endif\n"; | ||||||
336 | }, | ||||||
337 | space => sub { join "\n", @_ }, | ||||||
338 | discard => sub { '' }, | ||||||
339 | ); | ||||||
340 | |||||||
341 | # aliases | ||||||
342 | for (keys %formatter) { | ||||||
343 | next if ! $formatter{$_} or ref $formatter{$_}; | ||||||
344 | $formatter{$_} = $formatter{$formatter{$_}} // die; | ||||||
345 | } | ||||||
346 | |||||||
347 | my $old_cache = {}; | ||||||
348 | my $new_cache = {}; | ||||||
349 | my $xlate_cache_update; | ||||||
350 | |||||||
351 | sub setup { | ||||||
352 | 0 | 0 | 0 | 0 | return if state $once_called++; | ||
353 | 0 | 0 | if (defined $cache_method) { | ||||
354 | 0 | 0 | if ($cache_method eq '') { | ||||
355 | 0 | $cache_method = 'auto'; | |||||
356 | } | ||||||
357 | 0 | 0 | if (lc $cache_method eq 'accumulate') { | ||||
358 | 0 | $new_cache = $old_cache; | |||||
359 | } | ||||||
360 | 0 | 0 | if ($cache_method =~ /^(no|never)/i) { | ||||
361 | 0 | $cache_method = ''; | |||||
362 | } | ||||||
363 | } | ||||||
364 | 0 | 0 | if ($xlate_engine) { | ||||
365 | 0 | my $mod = __PACKAGE__ . "::$xlate_engine"; | |||||
366 | 0 | 0 | if (eval "require $mod") { | ||||
367 | 0 | $mod->import; | |||||
368 | } else { | ||||||
369 | 0 | die "Engine $xlate_engine is not available.\n"; | |||||
370 | } | ||||||
371 | 1 | 1 | 9 | no strict 'refs'; | |||
1 | 3 | ||||||
1 | 198 | ||||||
372 | 0 | ${"$mod\::lang_from"} = $lang_from; | |||||
0 | |||||||
373 | 0 | ${"$mod\::lang_to"} = $lang_to; | |||||
0 | |||||||
374 | 0 | *XLATE = \&{"$mod\::xlate"}; | |||||
0 | |||||||
375 | 0 | 0 | if (not defined &XLATE) { | ||||
376 | 0 | die "No \"xlate\" function in $mod.\n"; | |||||
377 | } | ||||||
378 | } | ||||||
379 | } | ||||||
380 | |||||||
381 | sub normalize { | ||||||
382 | 0 | 0 | 0 | $_[0] =~ s{^.+(?:\n.+)*}{ | |||
383 | 0 | ${^MATCH} | |||||
384 | =~ s/\A\s+|\s+\z//gr | ||||||
385 | =~ s/(?<=\p{InFullwidth})\n(?=\p{InFullwidth})//gr | ||||||
386 | =~ s/\s+/ /gr | ||||||
387 | }pmger; | ||||||
388 | } | ||||||
389 | |||||||
390 | sub postgrep { | ||||||
391 | 0 | 0 | 0 | my $grep = shift; | |||
392 | 0 | my @miss; | |||||
393 | 0 | for my $r ($grep->result) { | |||||
394 | 0 | my($b, @match) = @$r; | |||||
395 | 0 | for my $m (@match) { | |||||
396 | 0 | my $key = normalize $grep->cut(@$m); | |||||
397 | 0 | 0 | $new_cache->{$key} //= delete $old_cache->{$key} // do { | ||||
0 | |||||||
398 | 0 | push @miss, $key; | |||||
399 | 0 | "NOT TRANSLATED YET\n"; | |||||
400 | }; | ||||||
401 | } | ||||||
402 | } | ||||||
403 | 0 | 0 | cache_update(@miss) if @miss; | ||||
404 | } | ||||||
405 | |||||||
406 | sub cache_update { | ||||||
407 | 0 | 0 | 0 | binmode STDERR, ':encoding(utf8)'; | |||
408 | |||||||
409 | 0 | my @from = @_; | |||||
410 | 0 | 0 | print STDERR "From:\n", map s/^/\t< /mgr, @from if $show_progress; | ||||
411 | 0 | 0 | return @from if $dryrun; | ||||
412 | |||||||
413 | 0 | my @to = &XLATE(@from); | |||||
414 | |||||||
415 | 0 | 0 | print STDERR "To:\n", map s/^/\t> /mgr, @to if $show_progress; | ||||
416 | 0 | 0 | die "Unmatched response:\n@to" if @from != @to; | ||||
417 | 0 | $xlate_cache_update += @from; | |||||
418 | 0 | @{$new_cache}{@from} = @to; | |||||
0 | |||||||
419 | } | ||||||
420 | |||||||
421 | sub fold_lines { | ||||||
422 | 0 | 0 | 0 | state $fold = Text::ANSI::Fold->new( | |||
423 | width => $fold_width, | ||||||
424 | boundary => 'word', | ||||||
425 | linebreak => LINEBREAK_ALL, | ||||||
426 | runin => 4, | ||||||
427 | runout => 4, | ||||||
428 | ); | ||||||
429 | 0 | local $_ = shift; | |||||
430 | 0 | s/(.+)/join "\n", $fold->text($1)->chops/ge; | |||||
0 | |||||||
431 | 0 | $_; | |||||
432 | } | ||||||
433 | |||||||
434 | sub xlate { | ||||||
435 | 0 | 0 | 0 | my $text = shift; | |||
436 | 0 | my $key = normalize $text; | |||||
437 | 0 | 0 | my $s = $new_cache->{$key} // "!!! TRANSLATION ERROR !!!\n"; | ||||
438 | 0 | 0 | $s = fold_lines $s if $fold_line; | ||||
439 | 0 | 0 | if (state $formatter = $formatter{$output_format}) { | ||||
440 | 0 | return $formatter->($text, $s); | |||||
441 | } else { | ||||||
442 | 0 | return $s; | |||||
443 | } | ||||||
444 | } | ||||||
445 | 0 | 0 | 0 | sub colormap { xlate $_ } | |||
446 | 0 | 0 | 0 | sub callback { xlate { @_ }->{match} } | |||
447 | |||||||
448 | sub cache_file { | ||||||
449 | 0 | 0 | 0 | my $file = sprintf("%s.xlate-%s-%s.json", | |||
450 | $current_file, $xlate_engine, $lang_to); | ||||||
451 | 0 | 0 | if ($cache_method eq 'auto') { | ||||
452 | 0 | 0 | -f $file ? $file : undef; | ||||
453 | } else { | ||||||
454 | 0 | 0 | 0 | if ($cache_method and -f $current_file) { | |||
455 | 0 | $file; | |||||
456 | } else { | ||||||
457 | 0 | undef; | |||||
458 | } | ||||||
459 | } | ||||||
460 | } | ||||||
461 | |||||||
462 | my $json_obj = JSON->new->utf8->canonical->pretty; | ||||||
463 | |||||||
464 | sub read_cache { | ||||||
465 | 0 | 0 | 0 | my $file = shift; | |||
466 | 0 | %$new_cache = %$old_cache = (); | |||||
467 | 0 | 0 | if (open my $fh, $file) { | ||||
468 | 0 | my $json = do { local $/; <$fh> }; | |||||
0 | |||||||
0 | |||||||
469 | 0 | 0 | my $hash = $json eq '' ? {} : $json_obj->decode($json); | ||||
470 | 0 | %$old_cache = %$hash; | |||||
471 | 0 | warn "read cache from $file\n"; | |||||
472 | } | ||||||
473 | } | ||||||
474 | |||||||
475 | sub write_cache { | ||||||
476 | 0 | 0 | 0 | 0 | return if $dryrun; | ||
477 | 0 | my $file = shift; | |||||
478 | 0 | 0 | if (open my $fh, '>', $file) { | ||||
479 | 0 | my $json = $json_obj->encode($new_cache); | |||||
480 | 0 | print $fh $json; | |||||
481 | 0 | warn "write cache to $file\n"; | |||||
482 | } | ||||||
483 | } | ||||||
484 | |||||||
485 | sub begin { | ||||||
486 | 0 | 0 | 0 | 0 | setup if not (state $done++); | ||
487 | 0 | my %args = @_; | |||||
488 | 0 | 0 | $current_file = delete $args{&::FILELABEL} or die; | ||||
489 | 0 | 0 | s/\z/\n/ if /.\z/; | ||||
490 | 0 | $xlate_cache_update = 0; | |||||
491 | 0 | 0 | if (not defined $xlate_engine) { | ||||
492 | 0 | die "Select translation engine.\n"; | |||||
493 | } | ||||||
494 | 0 | 0 | if (my $cache = cache_file) { | ||||
495 | 0 | 0 | if ($cache_method =~ /^(create|clear)/) { | ||||
496 | 0 | 0 | warn "created $cache\n" unless -f $cache; | ||||
497 | 0 | 0 | open my $fh, '>', $cache or die "$cache: $!\n"; | ||||
498 | 0 | print $fh "{}\n"; | |||||
499 | 0 | 0 | die "skip $current_file" if $cache_method eq 'create'; | ||||
500 | } | ||||||
501 | 0 | read_cache $cache; | |||||
502 | } | ||||||
503 | } | ||||||
504 | |||||||
505 | sub end { | ||||||
506 | 0 | 0 | 0 | 0 | if (my $cache = cache_file) { | ||
507 | 0 | 0 | 0 | if ($xlate_cache_update or %$old_cache) { | |||
508 | 0 | write_cache $cache; | |||||
509 | } | ||||||
510 | } | ||||||
511 | } | ||||||
512 | |||||||
513 | sub setopt { | ||||||
514 | 0 | 0 | 0 | while (my($key, $val) = splice @_, 0, 2) { | |||
515 | 0 | 0 | next if $key eq &::FILELABEL; | ||||
516 | 0 | 0 | die "$key: Invalid option.\n" if not exists $opt{$key}; | ||||
517 | 0 | opt($key) = $val; | |||||
518 | } | ||||||
519 | } | ||||||
520 | |||||||
521 | 1; | ||||||
522 | |||||||
523 | __DATA__ |