File Coverage

blib/lib/Text/Filter.pm
Criterion Covered Total %
statement 128 171 74.8
branch 67 104 64.4
condition 10 24 41.6
subroutine 31 42 73.8
pod 7 24 29.1
total 243 365 66.5


line stmt bran cond sub pod time code
1             #! perl
2              
3             package Text::Filter;
4              
5             # Author : Johan Vromans
6             # Last Modified By: Johan Vromans
7             # Last Modified On: Thu Jan 17 13:26:07 2013
8             # Update Count : 169
9             # Status : Released
10              
11             =head1 NAME
12              
13             Text::Filter - base class for objects that can read and write text lines
14              
15             =head1 SYNOPSIS
16              
17             A plethora of tools exist that operate as filters: they get data from
18             a source, operate on this data, and write possibly modified data to a
19             destination. In the Unix world, these tools can be chained using a
20             technique called pipelining, where the output of one filter is
21             connected to the input of another filter. Some non-Unix worlds are
22             reported to have similar provisions.
23              
24             To create Perl modules for filter functionality seems trivial at
25             first. Just open the input file, read and process it, and write output
26             to a destination file. But for really reusable modules this approach
27             is too simple. A reusable module should not read and write files
28             itself, but rely on the calling program to provide input as well as to
29             handle the output.
30              
31             C is a base class for modules that have in common
32             that they process text lines by reading from some source (usually a
33             file), manipulating the contents and writing something back to some
34             destination (usually some other file).
35              
36             This module can be used on itself, but it is most powerfull when used
37             to derive modules from it. See section EXAMPLES for an extensive
38             example.
39              
40             =head1 DESCRIPTION
41              
42             The main purpose of the C class is to abstract out the
43             details out how input and output must be done. Although in most cases
44             input will come from a file, and output will be written to a file,
45             advanced modules require more detailed control over the input and
46             output. For example, the module could be called from another module,
47             in this case the callee could be allowed to process only a part of the
48             input. Or, a program could have prepared data in an array and wants to
49             call the module to process this data as if it were read from a file.
50             Also, the input stream provides a pushback functionality to make
51             peeking at the input easy.
52              
53             C can be used on its own as a convenient input/output
54             handler. For example:
55              
56             use Text::Filter;
57             my $filter = Text::Filter->(input => *STDIN, output => *STDOUT);
58             my $line;
59             while ( defined($line = $filter->readline) ) {
60             $filter->writeline($line);
61             }
62              
63             Or, even simpler:
64              
65             use Text::Filter;
66             Text::Filter->run(input => *STDIN, output => *STDOUT);
67              
68             Its real power shows when such a program is turned into a module for
69             optimal reuse.
70              
71             When creating a module that is to process lines of text, it can be
72             derived from C, for example:
73              
74             package MyFilter;
75             use base 'Text::Filter';
76              
77             The constructor method must then call the new() method of the
78             C class to set up the base class. This is conveniently
79             done by calling SUPER::new(). A hash containing attributes must be
80             passed to this method, some of these attributes will be used by the
81             base class setup.
82              
83             sub new {
84             my $class = shift;
85             # ... fetch non-attribute arguments from @_ ...
86             # Create the instance, using the attribute arguments.
87             my $self = $class->SUPER::new(@_);
88              
89             Finally, the newly created object must be re-blessed into the desired
90             class, and returned:
91              
92             # Rebless into the desired class.
93             bless($self, $class);
94             }
95              
96             When creating new instances for this class, attributes C and
97             C can be used to specify how input and output is to be
98             handled. Several possible values can be supplied for these attributes.
99              
100             For C:
101              
102             =over
103              
104             =item *
105              
106             A scalar, containing a file name.
107             The named file will be opened,
108             input lines will be read using C<<>>.
109              
110             =item *
111              
112             A file handle (glob).
113             Lines will be read using C<<>>.
114              
115             =item *
116              
117             An instance of class C.
118             Lines will be read using C<<>>.
119              
120             =item *
121              
122             A reference to an array.
123             Input lines will be shift()ed from the array.
124              
125             =item *
126              
127             A reference to a scalar.
128             Input lines will be taken from the contents of the scalar (which will
129             be modified). When exhausted, it will be set to undefined.
130              
131             =item *
132              
133             A reference to an anonymous subroutine.
134             This routine will be called to get the next line of data.
135              
136             =back
137              
138             The default is to read input using de C<< <> >> operator.
139              
140             For C:
141              
142             =over
143              
144             =item *
145              
146             A scalar, containing a file name.
147             The named file will be created automatically,
148             output lines will be written using print().
149              
150             =item *
151              
152             A file handle (glob).
153             Lines will be written using print().
154              
155             =item *
156              
157             An instance of class C.
158             Lines will be written using print().
159              
160             =item *
161              
162             A reference to an array.
163             Output lines will be push()ed into the array.
164             The array will be initialised to C<()> if necessary.
165              
166             =item *
167              
168             A reference to a scalar.
169             Output lines will be appended to the scalar.
170             The scalar will be initialised to C<""> if necessary.
171              
172             =item *
173              
174             A reference to an anonymous subroutine.
175             This routine will be called to append a line of text to the destination.
176              
177             =back
178              
179             The default is to write output to STDOUT.
180              
181             Additional attributes can be used to specify actions to be performed
182             after the data is fetched, or prior to being written. For example, to
183             strip line endings upon input, and add them upon output.
184              
185             =head1 CONSTRUCTOR
186              
187             The constructor is called new() and takes a hash with attributes as
188             its parameter.
189              
190             The following attributes are recognized and used by the constructor,
191             all others are ignored.
192              
193             The constructor will return a blessed hash containing all the original
194             attributes, plus some new attributes. The names of the new attributes
195             all start with C<_filter_>, the new attributes should I be touched.
196              
197             =over 4
198              
199             =item input
200              
201             This designates the input source. The value must be a scalar
202             (containing a file name), a file handle (either a glob or an instance
203             of class C), an array reference, or a reference to a
204             subroutine, as described above.
205              
206             If a subroutine is specified, it must return the next line to be
207             processed, and C at end.
208              
209             =item input_postread
210              
211             This attribute can be used to select an action to be performed after
212             the data has been read.
213             Its prime purpose is to handle line endings (e.g. remove a trailing newline).
214              
215             The value can be 'none' or 0 (no action), 'chomp' or 1 (standard
216             chomp() operation), or a reference to a subroutine. Default value is 0
217             (no chomping).
218              
219             If the value is a reference to a subroutine, this will be called with
220             the text line that was just read as its only argument, and it must
221             return the new contents of the text line..
222             If it returns undef, this line will be skipped.
223              
224             =item filter
225              
226             If specified, a reference to a subroutine that performs filtering. It
227             will be called after input_postread, with the text line that was just
228             read as its only argument, and it must return the new contents of the
229             text line.
230             If it returns undef, this line will be skipped.
231              
232             =item output
233              
234             This designates the output. The value must be a scalar
235             (containing a file name), a file handle (either a glob or an instance
236             of class C), or a reference to a subroutine, as described
237             above.
238              
239             Note: when a file name is passed, a C<< > >> will be prepended if necessary.
240              
241             =item output_prewrite
242              
243             This attribute can be used to select an action to be performed just
244             before the data is added to the output.
245             Its prime purpose is to handle line endings (e.g. add a trailing newline).
246             The value can be 'none' or 0 (no action) , 'newline' or 1 (append the
247             value of C<$/> to the line), or a reference to a subroutine. Default
248             value is 0 (no action).
249              
250             If the value is 'newline' or 1, and the value of C<$/> is C<"">
251             (paragraph mode), two newlines will be added.
252              
253             If the value is a reference to a subroutine, this will be called with
254             the text line as its only argument, and it must return the new
255             contents of the line to be output.
256             If it returns undef, no output occurs.
257              
258             =back
259              
260             =head1 CLASS METHODS
261              
262             =over 4
263              
264             =item Text::Filter->run([ I ])
265              
266             This creates a temporary filter object using the attibutes as in
267             C, and runs its C method.
268              
269             =back
270              
271             =head1 INSTANCE METHODS
272              
273             =over 4
274              
275             =item $filter->readline
276              
277             If there is anything in the pushback buffer, this is returned and the
278             pushback buffer is marked empty.
279              
280             Otherwise, returns the next line from the input stream, or C if
281             there is no more input.
282              
283             =item $filter->pushback($line)
284              
285             Pushes a line of text back to the input stream.
286             Returns the line.
287              
288             =item $filter->peek
289              
290             Peeks at the input.
291             Short for pushback(readline()).
292              
293             =item $filter->writeline ($line)
294              
295             Adds C<$line> to the output stream.
296              
297             =item $filter->set_input($input [ , $postread ])
298              
299             Sets the input method to C<$input>.
300             If the optional argument C<$postread> is defined, sets the input line
301             postprocessing strategy as well.
302              
303             =item $filter->set_output($output, [ $prewrite ])
304              
305             Sets the output method to C<$output>.
306             If the optional argument C<$prewrite> is defined, sets the output line
307             preprocessing strategy as well.
308              
309             =item $filter->run( [ I ])
310              
311             This will run the readline/writeline loop. Optionally a filter
312             argument (see CONSTRUCTOR, above) can be passed if filtering is
313             desired and not yet otherwise designated.
314              
315             =back
316              
317             =head1 EXAMPLE
318              
319             This example shows how to filter empty and whitespace lines.
320              
321             use Text::Filter;
322             Text::Filter->run(filter => sub { my $line = shift;
323             return unless $line =~ /\S/;
324             return $line;
325             });
326              
327             This is an example of how to use C as a base class.
328              
329             It implements a module that provides a single instance method: grep(),
330             that performs some kind of grep(1)-style function (how surprising!).
331              
332             A class method grepper() is also provided for easy access to do 'the
333             right thing' in the most common case.
334              
335             package Grepper;
336              
337             use strict;
338             use base qw(Exporter Text::Filter);
339             our @EXPORT;
340              
341             # Setup.
342             BEGIN {
343             @EXPORT = qw(grepper);
344             }
345              
346             # Constructor. Major part of the job is done by the superclass.
347             sub new {
348             my $class = shift;
349              
350             # Create a new instance by calling the superclass constructor.
351             my $self = $class->SUPER::new(@_);
352             # The superclass constructor will take care of handling
353             # the input and output attributes, and setup everything for
354             # handling the IO.
355              
356             # Bless the object into the desired class.
357             bless ($self, $class);
358              
359             # And return it.
360             $self;
361             }
362              
363             # Instance method, just an example. No magic.
364             sub grep {
365             my $self = shift;
366             my $pat = shift;
367             my $line;
368             while ( defined($line = $self->readline) ) {
369             $self->writeline($line) if $line =~ $pat;
370             }
371             }
372              
373             # Class method, for convenience.
374             # Usage: grepper (, , );
375             sub grepper {
376             my ($input, $output, $pat) = @_;
377              
378             # Create a Grepper object.
379             my $grepper = Grepper->new(input => $input, output => $output);
380              
381             # Call its grep method.
382             $grepper->grep ($pat);
383             }
384              
385             =head1 AUTHOR AND CREDITS
386              
387             Johan Vromans (jvromans@squirrel.nl) wrote this module.
388              
389             =head1 COPYRIGHT AND DISCLAIMER
390              
391             This program is Copyright 1998,2013 by Squirrel Consultancy. All
392             rights reserved.
393              
394             This program is free software; you can redistribute it and/or modify
395             it under the terms of either: a) the GNU General Public License as
396             published by the Free Software Foundation; either version 1, or (at
397             your option) any later version, or b) the "Artistic License" which
398             comes with Perl.
399              
400             This program is distributed in the hope that it will be useful, but
401             WITHOUT ANY WARRANTY; without even the implied warranty of
402             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See either the
403             GNU General Public License or the Artistic License for more details.
404              
405             =cut
406              
407 3     3   96599 use 5.8.8;
  3         9  
  3         165  
408 3     3   16 use strict;
  3         5  
  3         99  
409 3     3   13 use warnings;
  3         10  
  3         100  
410 3     3   14 use warnings::register;
  3         4  
  3         1334  
411              
412             our $VERSION = "1.10";
413              
414 3     3   3678 use IO::File;
  3         61960  
  3         416  
415 3     3   65 use Carp;
  3         6  
  3         6839  
416              
417             ################ Attribute Controls ################
418              
419             my %_attributes =
420             ( input => sub { scalar <> },
421             output => *STDOUT,
422             input_postread => 0,
423             output_prewrite => 0,
424             filter => undef,
425             );
426              
427             sub _standard_atts {
428 15     15   23 my $self = shift;
429 15         77 return keys %_attributes;
430             # In derived class:
431             # return ($self->SUPER::_standard_atts, keys %_attributes);
432             }
433              
434             sub _attr_default {
435 36     36   54 my ($self, $attr) = @_;
436 36 50       124 return $_attributes{$attr} if exists $_attributes{$attr};
437 0         0 return;
438             # In derived class:
439             # return $_attributes{$attr} if exists $_attributes{$attr};
440             # return $self->SUPER::_attr_default;
441             }
442              
443             ################ Constructor ################
444              
445             sub new {
446 15     15 0 2513 my ($class, %atts) = @_;
447 15 50       54 croak("Constructor 'new' cannot be called as an object method")
448             if ref $class;
449              
450             # Create the object out of the attributes.
451             # API: All original attributes are published in the hash.
452             # (Unfortunately)
453 15         73 my $self = bless { %atts }, $class;
454              
455             # Fill in initial attribute values.
456 15         59 foreach my $attr ( $self->_standard_atts ) {
457 99 100       282 my $op = $attr =~ /^_/ ? "_set$attr" : "set_$attr";
458 99         95 my $value;
459              
460 99 100       218 if ( exists $atts{$attr} ) {
461 39         75 $value = delete $atts{$attr};
462             }
463             else {
464 60         150 $value = $self->_attr_default($attr);
465             }
466 99         331 $self->$op($value);
467             }
468              
469 15 50       53 if ( %atts ) {
470 0         0 warnings::warnif("Unprocessed $class attributes: " .
471             join(" ", sort keys %atts));
472             }
473              
474             # Customization.
475 15         49 $self->init;
476              
477             # Return the object.
478 15         59 return $self;
479             }
480              
481             # Stub for customization. Subclasses can override this.
482 15     15 0 22 sub init { }
483              
484             ################ Attributes ################
485              
486             sub set_input {
487 15     15 1 26 my ($self, $handler, $postread) = @_;
488 15         16 my $input;
489              
490 15 100       43 if ( ref($handler) ) {
    100          
491 12 50 33     86 if ( $handler =~ /=/ && $handler->isa("IO::File") ) {
    100          
    100          
    50          
492 0     0   0 $input = sub { scalar <$handler> };
  0         0  
493 0         0 $self->{_filter_input_fd} = $handler;
494             }
495             elsif ( ref($handler) eq 'CODE' ) {
496 2         3 $input = $handler;
497             }
498             elsif ( ref($handler) eq 'SCALAR' ) {
499             $input = sub {
500 60 100   60   139 return unless defined $$handler;
501 54 100       130 if ( (my $i = index($$handler, "\n")) >= 0 ) {
502 46         192 return substr($$handler, 0, $i+1, "");
503             }
504             else {
505 8         16 my $res = $$handler;
506 8         13 $$handler = undef;
507 8         24 return $res;
508             }
509             }
510 8         49 }
511             elsif ( ref($handler) eq 'ARRAY' ) {
512 2     8   10 $input = sub { shift (@$handler) };
  8         30  
513 2         6 $self->{_filter_input_fd} = $handler;
514             }
515             }
516             elsif ( $handler =~ /^\*/ ) {
517 1     6   4 $input = sub { scalar <$handler> };
  6         55  
518             }
519             else {
520 2         3 my $fd;
521 2 50       12 $fd = IO::File->new($handler)
522             or croak("Error opening $handler: $!");
523 2     12   219 $input = sub { scalar <$fd> };
  12         128  
524 2         7 $self->{_filter_input_fd} = $fd;
525             }
526              
527 15 50       37 croak("Unrecognized value for 'input' attribute: ".
528             $handler) unless defined $input;
529              
530 15         31 $self->{_filter_input} = $input;
531              
532 15 50       38 $self->set_input_postread($postread)
533             if defined ($postread);
534              
535 15         27 $self->{_filter_pushback} = [];
536              
537 15         43 return $self;
538             }
539              
540             sub set_output {
541 15     15 1 22 my ($self, $handler, $prewrite) = @_;
542 15         17 my $output;
543              
544 15 50       32 if ( ref($handler) ) {
    0          
545 15 50 33     116 if ( $handler =~ /=/ && $handler->isa("IO::File") ) {
    100          
    100          
    50          
546 0     0   0 $output = sub { print { $handler } (shift) };
  0         0  
  0         0  
547 0         0 $self->{_filter_output_fd} = $handler;
548             }
549             elsif ( ref($handler) eq 'ARRAY' ) {
550 3     9   13 $output = sub { push (@$handler, shift) };
  9         31  
551 3 50       13 @$handler = () unless @$handler;
552             }
553             elsif ( ref($handler) eq 'SCALAR' ) {
554 8     26   41 $output = sub { $$handler .= shift };
  26         90  
555 8 100       27 $$handler = "" unless defined $$handler;
556             }
557             elsif ( ref($handler) eq 'CODE' ) {
558 4         6 $output = $handler;
559             }
560             }
561             elsif ( $handler =~ /^\*/ ) {
562 0     0   0 $output = sub { print { $handler } (shift) };
  0         0  
  0         0  
563 0         0 $self->{_filter_output_fd} = $handler;
564             }
565             else {
566 0 0       0 $handler = ">" . $handler unless $handler =~ /^>/;
567 0         0 my $fd;
568 0 0       0 $fd = IO::File->new($handler)
569             or croak("Error opening $handler: $!");
570 0     0   0 $output = sub { print { $fd } (shift) };
  0         0  
  0         0  
571 0         0 $self->{_filter_output_fd} = $fd;
572             }
573              
574 15 50       37 croak("Unrecognized value for 'output' attribute: " . $handler)
575             unless defined $output;
576              
577 15         31 $self->{_filter_output} = $output;
578              
579 15 50       32 $self->set_output_prewrite($prewrite)
580             if defined $prewrite;
581              
582 15         37 return $self;
583             }
584              
585             sub set_input_postread {
586 15     15 0 23 my ($self, $postread) = @_;
587              
588 15         17 my $posthandler;
589 15 50 33     138 if ( ref($postread) && ref($postread) eq 'CODE' ) {
    100 66        
    50 33        
590 0         0 $posthandler = $postread;
591             }
592             elsif ( $postread eq 'none' || $postread eq '0' ) {
593             }
594             elsif ( $postread eq 'chomp' || $postread eq '1' ) {
595 4         7 $posthandler = '';
596             }
597             else {
598 0         0 croak("Unrecognized value for 'input_postread' attribute: ".
599             $postread);
600             }
601              
602 15         40 $self->{_filter_postread} = $posthandler;
603              
604 15         39 return;
605             }
606              
607             sub set_output_prewrite {
608 15     15 0 24 my ($self, $prewrite) = @_;
609              
610 15         43 my $prehandler;
611 15 50 33     136 if ( ref($prewrite) && ref($prewrite) eq 'CODE' ) {
    100 66        
    50 33        
612 0         0 $prehandler = $prewrite;
613             }
614             elsif ( $prewrite eq 'none' || $prewrite eq '0' ) {
615             }
616             elsif ( $prewrite eq 'newline' || $prewrite eq '1' ) {
617 4         21 $prehandler = '';
618             }
619             else {
620 0         0 croak("Unrecognized value for 'output_prewrite' attribute: ".
621             $prewrite);
622             }
623              
624 15         42 $self->{_filter_prewrite} = $prehandler;
625              
626 15         30 return;
627             }
628              
629             sub set_filter_filter {
630 16     16 0 24 my ($self, $filter) = @_;
631 16 100       54 return unless defined($filter);
632 2 50       6 croak("filter must be a subroutine (CODE ref)")
633             unless ref($filter) eq "CODE";
634 2         8 $self->{_filter_filter} = $filter;
635             }
636              
637             sub get_filter_filter {
638 81     81 0 199 return shift->{_filter_filter};
639             }
640              
641             sub get_filter_pushback {
642 94     94 0 269 return shift->{_filter_pushback};
643             }
644              
645             sub get_filter_input {
646 94     94 0 153 return shift->{_filter_input};
647             }
648              
649             sub get_filter_postread {
650 81     81 0 225 return shift->{_filter_postread};
651             }
652              
653             sub get_filter_output {
654 47     47 0 109 return shift->{_filter_output};
655             }
656              
657             sub get_filter_prewrite {
658 47     47 0 83 return shift->{_filter_prewrite};
659             }
660              
661             sub get_filter_input_fd {
662 0     0 0 0 return shift->{_filter_input_fd};
663             }
664              
665             sub get_filter_output_fd {
666 0     0 0 0 return shift->{_filter_output_fd};
667             }
668              
669             ################ Methods ################
670              
671             sub readline {
672 90     90 1 154 my ($self) = shift;
673              
674 90         92 while ( 1 ) {
675              
676 0         0 return shift(@{$self->get_filter_pushback})
  94         174  
677 94 50       115 if @{$self->get_filter_pushback} > 0;
678              
679 94         104 my $line;
680 94         162 my $input = $self->get_filter_input;
681 94 100       154 return undef unless defined($line = $input->());
682              
683 81 100       183 if ( defined(my $postread = $self->get_filter_postread) ) {
684 33 50       53 if ( $postread ne '' ) {
685 0 0       0 next unless defined($line = $postread->($line));
686             }
687             else {
688 33         60 chomp $line;
689             }
690             }
691              
692 81 100       153 if ( defined(my $filter = $self->get_filter_filter) ) {
693 10 100       21 next unless defined($line = $filter->($line));
694             }
695              
696 77         432 return $line;
697             }
698             }
699              
700             sub pushback {
701 0     0 1 0 my ($self, $line) = @_;
702 0         0 push (@{$self->get_filter_pushback}, $line);
  0         0  
703              
704 0         0 return $line;
705             }
706              
707             sub peek {
708 0     0 1 0 my ($self) = @_;
709 0         0 return $self->get_filter_pushback->[0]
710 0 0       0 if @{$self->get_filter_pushback} > 0;
711              
712 0         0 return $self->pushback($self->readline);
713             }
714              
715             sub get_input {
716 0     0 0 0 my $self = shift;
717              
718 0         0 return $self->get_filter_input;
719             }
720              
721             sub writeline {
722 47     47 1 76 my ($self, $line) = @_;
723 47         96 my $prewrite = $self->get_filter_prewrite;
724 47 100       96 if ( defined $prewrite ) {
725 12 50       42 if ( $prewrite ne '' ) {
    50          
726 0         0 $line = $prewrite->($line);
727 0 0       0 return unless defined $line;
728             }
729             elsif ( defined $/ ) {
730             # Add the line terminator.
731             # In paragraph mode, just add two newlines.
732 12 50       42 $line .= ($/ eq '' ? "\n\n" : $/);
733             }
734             }
735 47         91 return $self->get_filter_output->($line);
736             }
737              
738             sub get_output {
739 0     0 0 0 my $self = shift;
740              
741 0         0 return $self->get_filter_output;
742             }
743              
744             sub run {
745 6     6 1 693 my $self = shift;
746              
747 6 100       20 if ( ref($self) ) {
748 4 50       12 croak("Usage: \$f->run( [ filterproc ] )") if @_ > 1;
749 4 100       13 $self->set_filter(shift) if @_;
750             }
751             else {
752 2         11 $self = $self->new(@_);
753             }
754              
755 6         8 my $line;
756 6         20 while ( $line = $self->readline ) {
757 20         44 $self->writeline($line);
758             }
759             }
760              
761             sub set_filter {
762 16     16 0 77 $_[0]->set_filter_filter($_[1]);
763             }
764              
765             sub get_filter {
766 0     0 0   return $_[0]->get_filter_filter($_[1]);
767             }
768              
769             1;