File Coverage

blib/lib/MARC/Batch.pm
Criterion Covered Total %
statement 55 61 90.1
branch 20 26 76.9
condition 3 3 100.0
subroutine 9 11 81.8
pod 8 8 100.0
total 95 109 87.1


line stmt bran cond sub pod time code
1             package MARC::Batch;
2              
3             =head1 NAME
4              
5             MARC::Batch - Perl module for handling files of MARC::Record objects
6              
7             =head1 SYNOPSIS
8              
9             MARC::Batch hides all the file handling of files of Cs.
10             C still does the file I/O, but C handles the
11             multiple-file aspects.
12              
13             use MARC::Batch;
14              
15             my $batch = new MARC::Batch->new( 'USMARC', @files );
16             while ( my $marc = $batch->next ) {
17             print $marc->subfield(245,"a"), "\n";
18             }
19              
20             =head1 EXPORT
21              
22             None. Everything is a class method.
23              
24             =cut
25              
26 12     12   122156 use strict;
  12         25  
  12         432  
27 12     12   2043 use integer;
  12         43  
  12         55  
28 12     12   291 use Carp qw( croak );
  12         15  
  12         6586  
29              
30             =head1 METHODS
31              
32             =head2 new( $type, @files )
33              
34             Create a C object that will process C<@files>.
35              
36             C<$type> must be either "USMARC" or "MicroLIF". If you want to specify
37             "MARC::File::USMARC" or "MARC::File::MicroLIF", that's OK, too. C returns a
38             new MARC::Batch object.
39              
40             C<@files> can be a list of filenames:
41              
42             my $batch = MARC::Batch->new( 'USMARC', 'file1.marc', 'file2.marc' );
43              
44             Your C<@files> may also contain filehandles. So if you've got a large
45             file that's gzipped you can open a pipe to F and pass it in:
46              
47             my $fh = IO::File->new( 'gunzip -c marc.dat.gz |' );
48             my $batch = MARC::Batch->new( 'USMARC', $fh );
49              
50             And you can mix and match if you really want to:
51              
52             my $batch = MARC::Batch->new( 'USMARC', $fh, 'file1.marc' );
53              
54             =cut
55              
56             sub new {
57 17     17 1 10716 my $class = shift;
58 17         38 my $type = shift;
59              
60 17 100       107 my $marcclass = ($type =~ /^MARC::File/) ? $type : "MARC::File::$type";
61              
62 17         1213 eval "require $marcclass";
63 17 50       84 croak $@ if $@;
64              
65 17         54 my @files = @_;
66              
67 17         145 my $self = {
68             filestack => \@files,
69             filename => undef,
70             marcclass => $marcclass,
71             file => undef,
72             warnings => [],
73             'warn' => 1,
74             strict => 1,
75             };
76              
77 17         48 bless $self, $class;
78              
79 17         72 return $self;
80             } # new()
81              
82              
83             =head2 next()
84              
85             Read the next record from that batch, and return it as a MARC::Record
86             object. If the current file is at EOF, close it and open the next
87             one. C will return C when there is no more data to be
88             read from any batch files.
89              
90             By default, C also will return C if an error is
91             encountered while reading from the batch. If not checked for this can
92             cause your iteration to terminate prematurely. To alter this behavior,
93             see C. You can retrieve warning messages using the
94             C method.
95              
96             Optionally you can pass in a filter function as a subroutine reference
97             if you are only interested in particular fields from the record. This
98             can boost performance.
99              
100             =cut
101              
102             sub next {
103 303     303 1 145667 my ( $self, $filter ) = @_;
104 303 100 100     1012 if ( $filter and ref($filter) ne 'CODE' ) {
105 1         155 croak( "filter function in next() must be a subroutine reference" );
106             }
107              
108 302 100       802 if ( $self->{file} ) {
109              
110             # get the next record
111 285         1016 my $rec = $self->{file}->next( $filter );
112              
113             # collect warnings from MARC::File::* object
114             # we use the warnings() method here since MARC::Batch
115             # hides access to MARC::File objects, and we don't
116             # need to preserve the warnings buffer.
117 285         1162 my @warnings = $self->{file}->warnings();
118 285 50       601 if ( @warnings ) {
119 0         0 $self->warnings( @warnings );
120 0 0       0 return if $self->{ strict };
121             }
122              
123 285 100       563 if ($rec) {
124              
125             # collect warnings from the MARC::Record object
126             # IMPORTANT: here we don't use warnings() but dig
127             # into the the object to get at the warnings without
128             # erasing the buffer. This is so a user can call
129             # warnings() on the MARC::Record object and get back
130             # warnings for that specific record.
131 271         252 my @warnings = @{ $rec->{_warnings} };
  271         469  
132              
133 271 100       523 if (@warnings) {
134 5         18 $self->warnings( @warnings );
135 5 100       20 return if $self->{ strict };
136             }
137              
138             # return the MARC::Record object
139 270         892 return($rec);
140              
141             }
142              
143             }
144              
145             # Get the next file off the stack, if there is one
146 31 100       50 $self->{filename} = shift @{$self->{filestack}} or return;
  31         196  
147              
148             # Instantiate a filename for it
149 24         57 my $marcclass = $self->{marcclass};
150 24 50       191 $self->{file} = $marcclass->in( $self->{filename} ) or return;
151              
152             # call this method again now that we've got a file open
153 24         161 return( $self->next( $filter ) );
154              
155             }
156              
157             =head2 strict_off()
158              
159             If you would like C to continue after it has encountered what
160             it believes to be bad MARC data then use this method to turn strict B.
161             A call to C always returns true (1).
162              
163             C can be handy when you don't care about the quality of your
164             MARC data, and just want to plow through it. For safety, C
165             strict is B by default.
166              
167             =cut
168              
169             sub strict_off {
170 3     3 1 14 my $self = shift;
171 3         10 $self->{ strict } = 0;
172 3         6 return(1);
173             }
174              
175             =head2 strict_on()
176              
177             The opposite of C, and the default state. You shouldn't
178             have to use this method unless you've previously used C, and
179             want it back on again. When strict is B calls to next() will return
180             undef when an error is encountered while reading MARC data. strict_on()
181             always returns true (1).
182              
183             =cut
184              
185             sub strict_on {
186 1     1 1 4 my $self = shift;
187 1         3 $self->{ strict } = 1;
188 1         2 return(1);
189             }
190              
191             =head2 warnings()
192              
193             Returns a list of warnings that have accumulated while processing a particular
194             batch file. As a side effect the warning buffer will be cleared.
195              
196             my @warnings = $batch->warnings();
197              
198             This method is also used internally to set warnings, so you probably don't
199             want to be passing in anything as this will set warnings on your batch object.
200              
201             C will return the empty list when there are no warnings.
202              
203             =cut
204              
205             sub warnings {
206 9     9 1 42 my ($self,@new) = @_;
207 9 100       22 if ( @new ) {
208 5         8 push( @{ $self->{warnings} }, @new );
  5         11  
209 5 50       20 print STDERR join( "\n", @new ) if $self->{'warn'};
210             } else {
211 4         6 my @old = @{ $self->{warnings} };
  4         10  
212 4         10 $self->{warnings} = [];
213 4         17 return(@old);
214             }
215             }
216              
217              
218             =head2 warnings_off()
219              
220             Turns off the default behavior of printing warnings to STDERR. However, even
221             with warnings off the messages can still be retrieved using the warnings()
222             method if you wish to check for them.
223              
224             C always returns true (1).
225              
226             =cut
227              
228             sub warnings_off {
229 4     4 1 1118 my $self = shift;
230 4         17 $self->{ 'warn' } = 0;
231              
232 4         6 return 1;
233             }
234              
235             =head2 warnings_on()
236              
237             Turns on warnings so that diagnostic information is printed to STDERR. This
238             is on by default so you shouldn't have to use it unless you've previously
239             turned off warnings using warnings_off().
240              
241             warnings_on() always returns true (1).
242              
243             =cut
244              
245             sub warnings_on {
246 0     0 1   my $self = shift;
247 0           $self->{ 'warn' } = 1;
248             }
249              
250             =head2 filename()
251              
252             Returns the currently open filename or C if there is not currently a file
253             open on this batch object.
254              
255             =cut
256              
257             sub filename {
258 0     0 1   my $self = shift;
259              
260 0           return $self->{filename};
261             }
262              
263              
264             1;
265              
266             __END__