File Coverage

blib/lib/App/Greple/tee.pm
Criterion Covered Total %
statement 97 109 88.9
branch 28 42 66.6
condition 15 21 71.4
subroutine 22 22 100.0
pod 0 10 0.0
total 162 204 79.4


line stmt bran cond sub pod time code
1             =encoding utf-8
2              
3             =head1 NAME
4              
5             App::Greple::tee - module to replace matched text by the external command result
6              
7             =head1 SYNOPSIS
8              
9             greple -Mtee command -- ...
10              
11             =head1 VERSION
12              
13             Version 1.05
14              
15             =head1 DESCRIPTION
16              
17             Greple's B<-Mtee> module sends matched text part to the given filter
18             command, and replace them by the command result. The idea is derived
19             from the command called B. It is like bypassing partial data to
20             the external filter command.
21              
22             Filter command follows module declaration (C<-Mtee>) and terminates by
23             two dashes (C<-->). For example, next command call command C
24             command with C arguments for the matched word in the data.
25              
26             greple -Mtee tr a-z A-Z -- '\w+' ...
27              
28             Above command convert all matched words from lower-case to upper-case.
29             Actually this example itself is not so useful because B can do
30             the same thing more effectively with B<--cm> option.
31              
32             By default, the command is executed as a single process, and all
33             matched data is sent to the process mixed together. If the matched
34             text does not end with newline, it is added before sending and removed
35             after receiving. Input and output data are mapped line by line, so
36             the number of lines of input and output must be identical.
37              
38             Using B<--discrete> option, individual command is called for each
39             matched text area. You can tell the difference by following commands.
40              
41             greple -Mtee cat -n -- copyright LICENSE
42             greple -Mtee cat -n -- copyright LICENSE --discrete
43              
44             Lines of input and output data do not have to be identical when used
45             with B<--discrete> option.
46              
47             =head1 OPTIONS
48              
49             =over 7
50              
51             =item B<--discrete>
52              
53             Invoke new command individually for every matched part.
54              
55             =item B<--bulkmode>
56              
57             With the <--discrete> option, each command is executed on demand. The
58             <--bulkmode> option causes all conversions to be performed at once.
59              
60             =item B<--crmode>
61              
62             This option replaces all newline characters in the middle of each
63             block with carriage return characters. Carriage returns contained in
64             the result of executing the command are reverted back to the newline
65             character. Thus, blocks consisting of multiple lines can be processed
66             in batches without using the B<--discrete> option.
67              
68             This works well with L command's B<--crmode> option, which
69             joins CR-separated text and outputs folded lines separated by CR.
70              
71             =item B<--fillup>
72              
73             Combine a sequence of non-blank lines into a single line before
74             passing them to the filter command. Newline characters between wide
75             width characters (Japanese, Chinese) are deleted, and other newline
76             characters are replaced with spaces. Korean (Hangul) is treated
77             like ASCII text and joined with space.
78              
79             =item B<--squeeze>
80              
81             Combines two or more consecutive newline characters into one.
82              
83             =item B<-ML> B<--offload> I
84              
85             L's B<--offload> option is implemented in the different
86             module L (B<-ML>).
87              
88             greple -Mtee cat -n -- -ML --offload 'seq 10 20'
89              
90             You can also use the B<-ML> module to process only even-numbered lines
91             as follows.
92              
93             greple -Mtee cat -n -- -ML 2::2
94              
95             =back
96              
97             =head1 CONFIGURATION
98              
99             Module parameters can be set with B module using
100             the following syntax:
101              
102             greple -Mtee::config(discrete) ...
103             greple -Mtee::config(fillup,crmode) ...
104              
105             This is useful when combined with shell aliases or module files.
106              
107             Available parameters are: B, B, B,
108             B, B, B, B.
109              
110             =head1 FUNCTION CALL
111              
112             Instead of an external command, you can call a Perl function by
113             prefixing the command name with C<&>.
114              
115             greple -Mtee '&App::ansifold::ansifold' -w40 -- ...
116              
117             The function is executed in a forked child process, so it must follow
118             these requirements:
119              
120             =over 4
121              
122             =item *
123              
124             Read matched text from B
125              
126             =item *
127              
128             Print converted result to B
129              
130             =item *
131              
132             Arguments are passed via both C<@ARGV> and C<@_>
133              
134             =back
135              
136             Any fully qualified function name can be used:
137              
138             greple -Mtee '&Your::Module::function' -- ...
139              
140             The module is automatically loaded if not already loaded.
141              
142             For convenience, the following short aliases are available:
143              
144             =over 4
145              
146             =item B<&ansicolumn>
147              
148             Calls C.
149              
150             =item B<&ansifold>
151              
152             Calls C.
153              
154             =item B<&cat-v>
155              
156             Calls Cnew-Erun(@_)>.
157              
158             =back
159              
160             Using a function call avoids the overhead of forking an external
161             process for each invocation, which can significantly improve
162             performance when used with the B<--discrete> option.
163              
164             =head1 LEGACIES
165              
166             The B<--blocks> option is no longer needed now that the B<--stretch>
167             (B<-S>) option has been implemented in B. You can simply
168             perform the following.
169              
170             greple -Mtee cat -n -- --all -SE foo
171              
172             It is not recommended to use B<--blocks> as it may be deprecated in
173             the future.
174              
175              
176             =over 7
177              
178             =item B<--blocks>
179              
180             Normally, the area matching the specified search pattern is sent to
181             the external command. If this option is specified, not the matched
182             area but the entire block containing it will be processed.
183              
184             For example, to send lines containing the pattern C to the
185             external command, you need to specify the pattern which matches to
186             entire line:
187              
188             greple -Mtee cat -n -- '^.*foo.*\n' --all
189              
190             But with the B<--blocks> option, it can be done as simply as follows:
191              
192             greple -Mtee cat -n -- foo --blocks
193              
194             With B<--blocks> option, this module behave more like L's
195             B<-g> option. Otherwise, the behavior is similar to L with
196             the B<-o> option.
197              
198             Do not use the B<--blocks> with the B<--all> option, since the block
199             will be the entire data.
200              
201             =back
202              
203             =head1 WHY DO NOT USE TEIP
204              
205             First of all, whenever you can do it with the B command, use
206             it. It is an excellent tool and much faster than B.
207              
208             Because B is designed to process document files, it has many
209             features that are appropriate for it, such as match area controls. It
210             might be worth using B to take advantage of those features.
211              
212             Also, B cannot handle multiple lines of data as a single unit,
213             while B can execute individual commands on a data chunk
214             consisting of multiple lines.
215              
216             =head1 EXAMPLE
217              
218             Next command will find text blocks inside L style document
219             included in Perl module file.
220              
221             greple --inside '^=(?s:.*?)(^=cut|\z)' --re '^([\w\pP].+\n)+' tee.pm
222              
223             You can translate them by DeepL service by executing the above command
224             convined with B<-Mtee> module which calls B command like this:
225              
226             greple -Mtee deepl text --to JA - -- --fillup ...
227              
228             The dedicated module L is more effective
229             for this purpose, though. In fact, the implementation hint of B
230             module came from B module.
231              
232             =head1 EXAMPLE 2
233              
234             Next command will find some indented part in LICENSE document.
235              
236             greple --re '^[ ]{2}[a-z][)] .+\n([ ]{5}.+\n)*' -C LICENSE
237              
238             a) distribute a Standard Version of the executables and library files,
239             together with instructions (in the manual page or equivalent) on where to
240             get the Standard Version.
241              
242             b) accompany the distribution with the machine-readable source of the Package
243             with your modifications.
244              
245             You can reformat this part by using B module with B
246             command. Using both B<--crmode> options together allows efficient
247             processing of multi-line blocks:
248              
249             greple -Mtee ansifold -sw40 --prefix ' ' --crmode -- --crmode --re ...
250              
251             a) distribute a Standard Version of
252             the executables and library files,
253             together with instructions (in the
254             manual page or equivalent) on where
255             to get the Standard Version.
256              
257             b) accompany the distribution with the
258             machine-readable source of the
259             Package with your modifications.
260              
261             The B<--discrete> option can also be used but will start multiple
262             processes, so it takes longer to execute.
263              
264             =head1 EXAMPLE 3
265              
266             Consider a situation where you want to grep for strings from
267             non-header lines. For example, you may want to search for Docker image
268             names from the C command, but leave the header line.
269             You can do it by following command.
270              
271             greple -Mtee grep perl -- -ML 2: --discrete --all
272              
273             Option C<-ML 2:> retrieves the second to last lines and sends
274             them to the C command. The option --discrete is required
275             because the number of lines of input and output changes, but since the
276             command is only executed once, there is no performance drawback.
277              
278             If you try to do the same thing with the B command,
279             C will give an error because the number of output
280             lines is less than the number of input lines. However, there is no
281             problem with the result obtained.
282              
283             =head1 INSTALL
284              
285             =head2 CPANMINUS
286              
287             $ cpanm App::Greple::tee
288              
289             =head1 SEE ALSO
290              
291             L, L
292              
293             L
294              
295             L, L
296              
297             L
298              
299             L
300              
301             L, L
302              
303             =head1 AUTHOR
304              
305             Kazumasa Utashiro
306              
307             =head1 LICENSE
308              
309             Copyright © 2023-2026 Kazumasa Utashiro.
310              
311             This library is free software; you can redistribute it and/or modify
312             it under the same terms as Perl itself.
313              
314             =cut
315              
316             package App::Greple::tee;
317              
318             our $VERSION = "1.05";
319              
320 18     18   334201 use v5.24;
  18         73  
321 18     18   125 use warnings;
  18         38  
  18         1156  
322 18     18   10988 use experimental 'refaliasing';
  18         32498  
  18         228  
323 18     18   4111 use Carp;
  18         42  
  18         1550  
324 18     18   161 use List::Util qw(sum first);
  18         79  
  18         2074  
325 18     18   499 use Text::ParseWords qw(shellwords);
  18         1342  
  18         1054  
326 18     18   11350 use Command::Run;
  18         224860  
  18         822  
327 18     18   670 use Data::Dumper;
  18         7842  
  18         1358  
328 18     18   10492 use Getopt::EX::Config;
  18         62179  
  18         179  
329 18     18   22638 use App::Greple::tee::Autoload qw(resolve);
  18         237  
  18         13481  
330              
331             my $config = Getopt::EX::Config->new(
332             debug => 0,
333             blocks => 0,
334             discrete => 0,
335             fillup => 0,
336             squeeze => 0,
337             bulkmode => 0,
338             crmode => 0,
339             nofork => 1,
340             use => '',
341             );
342              
343             our $command;
344             \our $debug = \$config->{debug};
345             \our $blocks = \$config->{blocks};
346             \our $discrete = \$config->{discrete};
347             \our $fillup = \$config->{fillup};
348             \our $squeeze = \$config->{squeeze};
349             \our $bulkmode = \$config->{bulkmode};
350             \our $crmode = \$config->{crmode};
351             \our $nofork = \$config->{nofork};
352              
353             my($mod, $argv);
354              
355             sub initialize {
356 17     17 0 46951 ($mod, $argv) = @_;
357 17 50   69   152 if (defined (my $i = first { $argv->[$_] eq '--' } keys @$argv)) {
  69         241  
358 17 50       147 if (my @command = splice @$argv, 0, $i) {
359 17         45 $command = \@command;
360             }
361 17 50       162 shift @$argv eq '--' or die;
362             }
363             }
364              
365             sub finalize {
366 17     17 0 1552 ($mod, $argv) = @_;
367 17         118 for my $mod (grep length, split /,/, $config->{use}) {
368 0         0 eval "require $mod; $mod->import()";
369 0 0       0 die $@ if $@;
370             }
371             }
372              
373 18     18   12933 use Unicode::EastAsianWidth;
  18         27426  
  18         5346  
374              
375             sub InConcatScript {
376 18     18 0 43171 return <<"END";
377             +App::Greple::tee::InFullwidth
378             -utf8::Hangul
379             END
380             }
381              
382             sub InFullwidthPunctuation {
383 18     18 0 452 return <<"END";
384             +App::Greple::tee::InFullwidth
385             &utf8::Punctuation
386             END
387             }
388              
389             sub fillup_block {
390 36 50   36 0 904 (my $s1, local $_, my $s2) = $_[0] =~ /\A(\s*)(.*?)(\s*)\z/s or die;
391 36         233 s/(?<=\p{InFullwidthPunctuation})\n//g;
392 36         248 s/(?<=\p{InConcatScript})\n(?=\p{InConcatScript})//g;
393 36         305 s/[ ]*\n[ ]*/ /g;
394 36         330 $s1 . $_ . $s2;
395             }
396              
397             sub fillup_paragraphs {
398 36 100   36 0 205 local *_ = @_ > 0 ? \$_[0] : \$_;
399 36         359 s{^.+(?:\n.+)*}{ fillup_block ${^MATCH} }pmge;
  36         184  
400             }
401              
402             sub call {
403 66     66 0 226 my $data = shift;
404 66   50     238 $command // return $data;
405 66         976 my $exec = Command::Run->new;
406 66 100 100     38444 if ($discrete and $fillup) {
407 22         3878 fillup_paragraphs $data;
408             }
409 66 50       386 if (ref $command ne 'ARRAY') {
410 0         0 $command = [ shellwords $command ];
411             }
412 66         408 my @command = @$command;
413             # Resolve &function to code reference
414 66 50 33     719 if (@command and $command[0] =~ /^&(.+)/) {
415 0         0 shift @command;
416 0         0 unshift @command, resolve($1);
417             }
418 66         307 my %run_opt = (stdin => $data);
419 66 50 66     617 if ($config->{nofork} and ref $command[0] eq 'CODE') {
420 0         0 $run_opt{nofork} = 1;
421 0         0 $run_opt{raw} = 1;
422             }
423 66   50     519 my $out = $exec->command(@command)->with(%run_opt)->update->data // '';
424 66 50       19011859 if ($squeeze) {
425 0         0 $out =~ s/\n\n+/\n/g;
426             }
427 66         49870 $out;
428             }
429              
430             sub bundle_call {
431 8 100   8 0 41 if ($fillup) {
432 2         8 fillup_paragraphs for @_;
433             }
434 8         36 my @chop = grep { $_[$_] =~ s/(?
  135         477  
435 8         34 my @lines = map { int tr/\n/\n/ } @_;
  135         312  
436 8         69 my $lines = sum @lines;
437 8         182 my $out = call join '', @_;
438 8         494 my @out = $out =~ /.*\n/g;
439 8 50       192 if (@out < $lines) {
    50          
440 0         0 die "Unexpected short response:\n\n$out\n";
441             } elsif (@out > $lines) {
442 0         0 warn "Unexpected long response:\n\n$out\n";
443             }
444 8         86 my @ret = map { join '', splice @out, 0, $_ } @lines;
  135         314  
445 8         72 chop for @ret[@chop];
446 8         145 return @ret;
447             }
448              
449             my @bundle;
450              
451             sub postgrep {
452 17     17 0 302 my $grep = shift;
453 17 50       87 if ($blocks) {
454             $grep->{RESULT} = [
455             [ [ 0, length ],
456             map {
457 0         0 [ $_->[0][0], $_->[0][1], 0, $grep->{callback}->[0] ]
  0         0  
458             } $grep->result
459             ] ];
460             }
461 17 100 100     116 return if $discrete and not $bulkmode;
462 11         35 @bundle = my @block = ();
463 11         70 for my $r ($grep->result) {
464 11         158 my($b, @match) = @$r;
465 11         30 for my $m (@match) {
466 155         1581 push @block, $grep->cut(@$m);
467             }
468             }
469              
470 11 100       123 if ($crmode) {
471 1         32 s/\n(?!\z)/\r/g for @block;
472             }
473              
474 11 50       47 @bundle = do {
475 11 100       45 if ($discrete) {
476 3         8 map { call $_ } @block;
  20         132  
477             } else {
478 8         46 bundle_call @block;
479             }
480             } if @block;
481              
482 11 100       408 if ($crmode) {
483 1         41 s/\r/\n/g for @bundle;
484             }
485             }
486              
487             sub callback {
488 193 100 100 193 0 3223 if ($discrete and not $bulkmode) {
489 38         453 call { @_ }->{match};
490             }
491             else {
492 155   50     504 shift @bundle // die;
493             }
494             }
495              
496             1;
497              
498             __DATA__