File Coverage

blib/lib/App/Greple/tee.pm
Criterion Covered Total %
statement 26 76 34.2
branch 0 20 0.0
condition 0 6 0.0
subroutine 9 16 56.2
pod 0 6 0.0
total 35 124 28.2


line stmt bran cond sub pod time code
1             =encoding utf-8
2              
3             =head1 NAME
4              
5             App::Greple::tee - module to replace matched text by the external command result
6              
7             =head1 SYNOPSIS
8              
9             greple -Mtee command -- ...
10              
11             =head1 DESCRIPTION
12              
13             Greple's B<-Mtee> module sends matched text part to the given filter
14             command, and replace them by the command result. The idea is derived
15             from the command called B. It is like bypassing partial data to
16             the external filter command.
17              
18             Filter command follows module declaration (C<-Mtee>) and terminates by
19             two dashes (C<-->). For example, next command call command C
20             command with C arguments for the matched word in the data.
21              
22             greple -Mtee tr a-z A-Z -- '\w+' ...
23              
24             Above command convert all matched words from lower-case to upper-case.
25             Actually this example itself is not so useful because B can do
26             the same thing more effectively with B<--cm> option.
27              
28             By default, the command is executed as a single process, and all
29             matched data is sent to it mixed together. If the matched text does
30             not end with newline, it is added before and removed after. Data are
31             mapped line by line, so the number of lines of input and output data
32             must be identical.
33              
34             Using B<--discrete> option, individual command is called for each
35             matched part. You can tell the difference by following commands.
36              
37             greple -Mtee cat -n -- copyright LICENSE
38             greple -Mtee cat -n -- copyright LICENSE --discrete
39              
40             Lines of input and output data do not have to be identical when used
41             with B<--discrete> option.
42              
43             =head1 OPTIONS
44              
45             =over 7
46              
47             =item B<--discrete>
48              
49             Invoke new command individually for every matched part.
50              
51             =item B<--fillup>
52              
53             Combine a sequence of non-blank lines into a single line before
54             passing them to the filter command. Newline characters between wide
55             characters are deleted, and other newline characters are replaced with
56             spaces.
57              
58             =back
59              
60             =head1 WHY DO NOT USE TEIP
61              
62             First of all, whenever you can do it with the B command, use
63             it. It is an excellent tool and much faster than B.
64              
65             Because B is designed to process document files, it has many
66             features that are appropriate for it, such as match area controls. It
67             might be worth using B to take advantage of those features.
68              
69             Also, B cannot handle multiple lines of data as a single unit,
70             while B can execute individual commands on a data chunk
71             consisting of multiple lines.
72              
73             =head1 EXAMPLE
74              
75             Next command will find text blocks inside L style document
76             included in Perl module file.
77              
78             greple --inside '^=(?s:.*?)(^=cut|\z)' --re '^(\w.+\n)+' tee.pm
79              
80             You can translate them by DeepL service by executing the above command
81             convined with B<-Mtee> module which calls B command like this:
82              
83             greple -Mtee deepl text --to JA - -- --fillup ...
84              
85             The dedicated module L is more effective
86             for this purpose, though. In fact, the implementation hint of B
87             module came from B module.
88              
89             =head1 EXAMPLE 2
90              
91             Next command will find some indented part in LICENSE document.
92              
93             greple --re '^[ ]{2}[a-z][)] .+\n([ ]{5}.+\n)*' -C LICENSE
94              
95             a) distribute a Standard Version of the executables and library files,
96             together with instructions (in the manual page or equivalent) on where to
97             get the Standard Version.
98            
99             b) accompany the distribution with the machine-readable source of the Package
100             with your modifications.
101            
102             You can reformat this part by using B module with B
103             command:
104              
105             greple -Mtee ansifold -rsw40 --prefix ' ' -- --discrete --re ...
106              
107             a) distribute a Standard Version of
108             the executables and library files,
109             together with instructions (in the
110             manual page or equivalent) on where
111             to get the Standard Version.
112            
113             b) accompany the distribution with the
114             machine-readable source of the
115             Package with your modifications.
116              
117             Using C<--discrete> option is time consuming. So you can use
118             C<--separate '\r'> option with C which produce single line
119             using CR character instead of NL.
120              
121             greple -Mtee ansifold -rsw40 --prefix ' ' --separate '\r' --
122              
123             Then convert CR char to NL after by L command or some.
124              
125             ... | tr '\r' '\n'
126              
127             =head1 EXAMPLE 3
128              
129             Consider a situation where you want to grep for strings from
130             non-header lines. For example, you may want to search for images from
131             the C command, but leave the header line. You can do
132             it by following command.
133              
134             greple -Mtee grep perl -- -Mline -L 2: --discrete --all
135              
136             Option C<-Mline -L 2:> retrieves the second to last lines and sends
137             them to the C command. Option C<--discrete> is required,
138             but this is called only once, so there is no performance drawback.
139              
140             In this case, C produces error because the number
141             of lines in the output is less than input. However, result is quite
142             satisfactory :)
143              
144             =head1 INSTALL
145              
146             =head2 CPANMINUS
147              
148             $ cpanm App::Greple::tee
149              
150             =head1 SEE ALSO
151              
152             L, L
153              
154             L
155              
156             L, L
157              
158             L
159              
160             L
161              
162             =head1 BUGS
163              
164             The C<--fillup> option may not work correctly for Korean text.
165              
166             =head1 AUTHOR
167              
168             Kazumasa Utashiro
169              
170             =head1 LICENSE
171              
172             Copyright © 2023 Kazumasa Utashiro.
173              
174             This library is free software; you can redistribute it and/or modify
175             it under the same terms as Perl itself.
176              
177             =cut
178              
179             package App::Greple::tee;
180              
181             our $VERSION = "0.99";
182              
183 1     1   836 use v5.14;
  1         3  
184 1     1   5 use warnings;
  1         7  
  1         29  
185 1     1   4 use Carp;
  1         2  
  1         58  
186 1     1   5 use List::Util qw(sum first);
  1         1  
  1         122  
187 1     1   554 use Text::ParseWords qw(shellwords);
  1         1343  
  1         55  
188 1     1   465 use App::cdif::Command;
  1         26962  
  1         43  
189 1     1   6 use Data::Dumper;
  1         2  
  1         304  
190              
191             our $command;
192             our $blockmatch;
193             our $discrete;
194             our $fillup;
195              
196             my($mod, $argv);
197              
198             sub initialize {
199 0     0 0   ($mod, $argv) = @_;
200 0 0   0     if (defined (my $i = first { $argv->[$_] eq '--' } 0 .. $#{$argv})) {
  0            
  0            
201 0 0         if (my @command = splice @$argv, 0, $i) {
202 0           $command = \@command;
203             }
204 0           shift @$argv;
205             }
206             }
207              
208             sub call {
209 0     0 0   my $data = shift;
210 0   0       $command // return $data;
211 0           state $exec = App::cdif::Command->new;
212 0 0         if (ref $command ne 'ARRAY') {
213 0           $command = [ shellwords $command ];
214             }
215 0   0       $exec->command($command)->setstdin($data)->update->data // '';
216             }
217              
218 1     1   586 use Unicode::EastAsianWidth;
  1         1200  
  1         148  
219              
220             sub fillup_paragraph {
221 0 0   0 0   (my $s1, local $_, my $s2) = $_[0] =~ /\A(\s*)(.*?)(\s*)\z/s or die;
222 1     1   7 s/(?<=\p{InFullwidth})\n(?=\p{InFullwidth})//g;
  1         2  
  1         13  
  0            
223 0           s/\s+/ /g;
224 0           $s1 . $_ . $s2;
225             }
226              
227             sub jammed_call {
228 0     0 0   my @need_nl = grep { $_[$_] !~ /\n\z/ } 0 .. $#_;
  0            
229 0           my @from = @_;
230 0 0         if ($fillup) {
231 0           for (@from) {
232 0           s{^.+(?:\n.+)*}{
233 0           fillup_paragraph ${^MATCH}
234             }pmge;
235             }
236             }
237 0           $from[$_] .= "\n" for @need_nl;
238 0           my @lines = map { int tr/\n/\n/ } @from;
  0            
239 0           my $from = join '', @from;
240 0           my $out = call $from;
241 0           my @out = $out =~ /.*\n/g;
242 0 0         if (@out < sum @lines) {
243 0           die "Unexpected response from command:\n\n$out\n";
244             }
245 0           my @to = map { join '', splice @out, 0, $_ } @lines;
  0            
246 0           $to[$_] =~ s/\n\z// for @need_nl;
247 0           return @to;
248             }
249              
250             my @jammed;
251              
252             sub postgrep {
253 0     0 0   my $grep = shift;
254 0 0         if ($blockmatch) {
255             $grep->{RESULT} = [
256             [ [ 0, length ],
257             map {
258 0           [ $_->[0][0], $_->[0][1], 0, $grep->{callback}->[0] ]
  0            
259             } $grep->result
260             ] ];
261             }
262 0 0         return if $discrete;
263 0           @jammed = my @block = ();
264 0           for my $r ($grep->result) {
265 0           my($b, @match) = @$r;
266 0           for my $m (@match) {
267 0           push @block, $grep->cut(@$m);
268             }
269             }
270 0 0         @jammed = jammed_call @block if @block;
271             }
272              
273             sub callback {
274 0 0   0 0   if ($discrete) {
275 0           call { @_ }->{match};
276             }
277             else {
278 0   0       shift @jammed // die;
279             }
280             }
281              
282             1;
283              
284             __DATA__