File Coverage

blib/lib/App/Greple/tee.pm

Criterion	Covered	Total	%
statement	26	76	34.2
branch	0	20	0.0
condition	0	6	0.0
subroutine	9	16	56.2
pod	0	6	0.0
total	35	124	28.2

line	stmt	bran	cond	sub	pod	time	code
1							=encoding utf-8
2
3							=head1 NAME
4
5							App::Greple::tee - module to replace matched text by the external command result
6
7							=head1 SYNOPSIS
8
9							greple -Mtee command -- ...
10
11							=head1 DESCRIPTION
12
13							Greple's B<-Mtee> module sends matched text part to the given filter
14							command, and replace them by the command result. The idea is derived
15							from the command called B. It is like bypassing partial data to
16							the external filter command.
17
18							Filter command follows module declaration (C<-Mtee>) and terminates by
19							two dashes (C<-->). For example, next command call command C
20							command with C arguments for the matched word in the data.
21
22							greple -Mtee tr a-z A-Z -- '\w+' ...
23
24							Above command convert all matched words from lower-case to upper-case.
25							Actually this example itself is not so useful because B can do
26							the same thing more effectively with B<--cm> option.
27
28							By default, the command is executed as a single process, and all
29							matched data is sent to it mixed together. If the matched text does
30							not end with newline, it is added before and removed after. Data are
31							mapped line by line, so the number of lines of input and output data
32							must be identical.
33
34							Using B<--discrete> option, individual command is called for each
35							matched part. You can tell the difference by following commands.
36
37							greple -Mtee cat -n -- copyright LICENSE
38							greple -Mtee cat -n -- copyright LICENSE --discrete
39
40							Lines of input and output data do not have to be identical when used
41							with B<--discrete> option.
42
43							=head1 OPTIONS
44
45							=over 7
46
47							=item B<--discrete>
48
49							Invoke new command individually for every matched part.
50
51							=item B<--fillup>
52
53							Combine a sequence of non-blank lines into a single line before
54							passing them to the filter command. Newline characters between wide
55							characters are deleted, and other newline characters are replaced with
56							spaces.
57
58							=back
59
60							=head1 WHY DO NOT USE TEIP
61
62							First of all, whenever you can do it with the B command, use
63							it. It is an excellent tool and much faster than B.
64
65							Because B is designed to process document files, it has many
66							features that are appropriate for it, such as match area controls. It
67							might be worth using B to take advantage of those features.
68
69							Also, B cannot handle multiple lines of data as a single unit,
70							while B can execute individual commands on a data chunk
71							consisting of multiple lines.
72
73							=head1 EXAMPLE
74
75							Next command will find text blocks inside L style document
76							included in Perl module file.
77
78							greple --inside '^=(?s:.*?)(^=cut\|\z)' --re '^(\w.+\n)+' tee.pm
79
80							You can translate them by DeepL service by executing the above command
81							convined with B<-Mtee> module which calls B command like this:
82
83							greple -Mtee deepl text --to JA - -- --fillup ...
84
85							The dedicated module L is more effective
86							for this purpose, though. In fact, the implementation hint of B
87							module came from B module.
88
89							=head1 EXAMPLE 2
90
91							Next command will find some indented part in LICENSE document.
92
93							greple --re '^[ ]{2}[a-z][)] .+\n([ ]{5}.+\n)*' -C LICENSE
94
95							a) distribute a Standard Version of the executables and library files,
96							together with instructions (in the manual page or equivalent) on where to
97							get the Standard Version.
98
99							b) accompany the distribution with the machine-readable source of the Package
100							with your modifications.
101
102							You can reformat this part by using B module with B
103							command:
104
105							greple -Mtee ansifold -rsw40 --prefix ' ' -- --discrete --re ...
106
107							a) distribute a Standard Version of
108							the executables and library files,
109							together with instructions (in the
110							manual page or equivalent) on where
111							to get the Standard Version.
112
113							b) accompany the distribution with the
114							machine-readable source of the
115							Package with your modifications.
116
117							Using C<--discrete> option is time consuming. So you can use
118							C<--separate '\r'> option with C which produce single line
119							using CR character instead of NL.
120
121							greple -Mtee ansifold -rsw40 --prefix ' ' --separate '\r' --
122
123							Then convert CR char to NL after by L command or some.
124
125							... \| tr '\r' '\n'
126
127							=head1 EXAMPLE 3
128
129							Consider a situation where you want to grep for strings from
130							non-header lines. For example, you may want to search for images from
131							the C command, but leave the header line. You can do
132							it by following command.
133
134							greple -Mtee grep perl -- -Mline -L 2: --discrete --all
135
136							Option C<-Mline -L 2:> retrieves the second to last lines and sends
137							them to the C command. Option C<--discrete> is required,
138							but this is called only once, so there is no performance drawback.
139
140							In this case, C produces error because the number
141							of lines in the output is less than input. However, result is quite
142							satisfactory :)
143
144							=head1 INSTALL
145
146							=head2 CPANMINUS
147
148							$ cpanm App::Greple::tee
149
150							=head1 SEE ALSO
151
152							L, L
153
154							L
155
156							L, L
157
158							L
159
160							L
161
162							=head1 BUGS
163
164							The C<--fillup> option may not work correctly for Korean text.
165
166							=head1 AUTHOR
167
168							Kazumasa Utashiro
169
170							=head1 LICENSE
171
172							Copyright © 2023 Kazumasa Utashiro.
173
174							This library is free software; you can redistribute it and/or modify
175							it under the same terms as Perl itself.
176
177							=cut
178
179							package App::Greple::tee;
180
181							our $VERSION = "0.99";
182
183	1			1		836	use v5.14;
	1					3
184	1			1		5	use warnings;
	1					7
	1					29
185	1			1		4	use Carp;
	1					2
	1					58
186	1			1		5	use List::Util qw(sum first);
	1					1
	1					122
187	1			1		554	use Text::ParseWords qw(shellwords);
	1					1343
	1					55
188	1			1		465	use App::cdif::Command;
	1					26962
	1					43
189	1			1		6	use Data::Dumper;
	1					2
	1					304
190
191							our $command;
192							our $blockmatch;
193							our $discrete;
194							our $fillup;
195
196							my($mod, $argv);
197
198							sub initialize {
199	0			0	0		($mod, $argv) = @_;
200	0	0		0			if (defined (my $i = first { $argv->[$_] eq '--' } 0 .. $#{$argv})) {
	0
	0
201	0	0					if (my @command = splice @$argv, 0, $i) {
202	0						$command = \@command;
203							}
204	0						shift @$argv;
205							}
206							}
207
208							sub call {
209	0			0	0		my $data = shift;
210	0		0				$command // return $data;
211	0						state $exec = App::cdif::Command->new;
212	0	0					if (ref $command ne 'ARRAY') {
213	0						$command = [ shellwords $command ];
214							}
215	0		0				$exec->command($command)->setstdin($data)->update->data // '';
216							}
217
218	1			1		586	use Unicode::EastAsianWidth;
	1					1200
	1					148
219
220							sub fillup_paragraph {
221	0	0		0	0		(my $s1, local $_, my $s2) = $_[0] =~ /\A(\s)(.?)(\s*)\z/s or die;
222	1			1		7	s/(?<=\p{InFullwidth})\n(?=\p{InFullwidth})//g;
	1					2
	1					13
	0
223	0						s/\s+/ /g;
224	0						$s1 . $_ . $s2;
225							}
226
227							sub jammed_call {
228	0			0	0		my @need_nl = grep { $_[$_] !~ /\n\z/ } 0 .. $#_;
	0
229	0						my @from = @_;
230	0	0					if ($fillup) {
231	0						for (@from) {
232	0						s{^.+(?:\n.+)*}{
233	0						fillup_paragraph ${^MATCH}
234							}pmge;
235							}
236							}
237	0						$from[$_] .= "\n" for @need_nl;
238	0						my @lines = map { int tr/\n/\n/ } @from;
	0
239	0						my $from = join '', @from;
240	0						my $out = call $from;
241	0						my @out = $out =~ /.*\n/g;
242	0	0					if (@out < sum @lines) {
243	0						die "Unexpected response from command:\n\n$out\n";
244							}
245	0						my @to = map { join '', splice @out, 0, $_ } @lines;
	0
246	0						$to[$_] =~ s/\n\z// for @need_nl;
247	0						return @to;
248							}
249
250							my @jammed;
251
252							sub postgrep {
253	0			0	0		my $grep = shift;
254	0	0					if ($blockmatch) {
255							$grep->{RESULT} = [
256							[ [ 0, length ],
257							map {
258	0						[ $_->[0][0], $_->[0][1], 0, $grep->{callback}->[0] ]
	0
259							} $grep->result
260							] ];
261							}
262	0	0					return if $discrete;
263	0						@jammed = my @block = ();
264	0						for my $r ($grep->result) {
265	0						my($b, @match) = @$r;
266	0						for my $m (@match) {
267	0						push @block, $grep->cut(@$m);
268							}
269							}
270	0	0					@jammed = jammed_call @block if @block;
271							}
272
273							sub callback {
274	0	0		0	0		if ($discrete) {
275	0						call { @_ }->{match};
276							}
277							else {
278	0		0				shift @jammed // die;
279							}
280							}
281
282							1;
283
284							__DATA__