line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package MARC::Batch; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
MARC::Batch - Perl module for handling files of MARC::Record objects |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
MARC::Batch hides all the file handling of files of Cs. |
10
|
|
|
|
|
|
|
C still does the file I/O, but C handles the |
11
|
|
|
|
|
|
|
multiple-file aspects. |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
use MARC::Batch; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# If you have weird control fields... |
16
|
|
|
|
|
|
|
use MARC::Field; |
17
|
|
|
|
|
|
|
MARC::Field->allow_controlfield_tags('FMT', 'LDX'); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
my $batch = MARC::Batch->new( 'USMARC', @files ); |
21
|
|
|
|
|
|
|
while ( my $marc = $batch->next ) { |
22
|
|
|
|
|
|
|
print $marc->subfield(245,"a"), "\n"; |
23
|
|
|
|
|
|
|
} |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head1 EXPORT |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
None. Everything is a class method. |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=cut |
30
|
|
|
|
|
|
|
|
31
|
13
|
|
|
13
|
|
113129
|
use strict; |
|
13
|
|
|
|
|
34
|
|
|
13
|
|
|
|
|
417
|
|
32
|
13
|
|
|
13
|
|
1291
|
use integer; |
|
13
|
|
|
|
|
105
|
|
|
13
|
|
|
|
|
68
|
|
33
|
13
|
|
|
13
|
|
344
|
use Carp qw( croak ); |
|
13
|
|
|
|
|
29
|
|
|
13
|
|
|
|
|
7006
|
|
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=head1 METHODS |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head2 new( $type, @files ) |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
Create a C object that will process C<@files>. |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
C<$type> must be either "USMARC" or "MicroLIF". If you want to specify |
42
|
|
|
|
|
|
|
"MARC::File::USMARC" or "MARC::File::MicroLIF", that's OK, too. C returns a |
43
|
|
|
|
|
|
|
new MARC::Batch object. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
C<@files> can be a list of filenames: |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
my $batch = MARC::Batch->new( 'USMARC', 'file1.marc', 'file2.marc' ); |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
Your C<@files> may also contain filehandles. So if you've got a large |
50
|
|
|
|
|
|
|
file that's gzipped you can open a pipe to F and pass it in: |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
my $fh = IO::File->new( 'gunzip -c marc.dat.gz |' ); |
53
|
|
|
|
|
|
|
my $batch = MARC::Batch->new( 'USMARC', $fh ); |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
And you can mix and match if you really want to: |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
my $batch = MARC::Batch->new( 'USMARC', $fh, 'file1.marc' ); |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=cut |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
sub new { |
62
|
17
|
|
|
17
|
1
|
11699
|
my $class = shift; |
63
|
17
|
|
|
|
|
51
|
my $type = shift; |
64
|
|
|
|
|
|
|
|
65
|
17
|
100
|
|
|
|
106
|
my $marcclass = ($type =~ /^MARC::File/) ? $type : "MARC::File::$type"; |
66
|
|
|
|
|
|
|
|
67
|
17
|
|
|
|
|
1034
|
eval "require $marcclass"; |
68
|
17
|
50
|
|
|
|
124
|
croak $@ if $@; |
69
|
|
|
|
|
|
|
|
70
|
17
|
|
|
|
|
75
|
my @files = @_; |
71
|
|
|
|
|
|
|
|
72
|
17
|
|
|
|
|
147
|
my $self = { |
73
|
|
|
|
|
|
|
filestack => \@files, |
74
|
|
|
|
|
|
|
filename => undef, |
75
|
|
|
|
|
|
|
marcclass => $marcclass, |
76
|
|
|
|
|
|
|
file => undef, |
77
|
|
|
|
|
|
|
warnings => [], |
78
|
|
|
|
|
|
|
'warn' => 1, |
79
|
|
|
|
|
|
|
strict => 1, |
80
|
|
|
|
|
|
|
}; |
81
|
|
|
|
|
|
|
|
82
|
17
|
|
|
|
|
66
|
bless $self, $class; |
83
|
|
|
|
|
|
|
|
84
|
17
|
|
|
|
|
80
|
return $self; |
85
|
|
|
|
|
|
|
} # new() |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=head2 next() |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
Read the next record from that batch, and return it as a MARC::Record |
91
|
|
|
|
|
|
|
object. If the current file is at EOF, close it and open the next |
92
|
|
|
|
|
|
|
one. C will return C when there is no more data to be |
93
|
|
|
|
|
|
|
read from any batch files. |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
By default, C also will return C if an error is |
96
|
|
|
|
|
|
|
encountered while reading from the batch. If not checked for this can |
97
|
|
|
|
|
|
|
cause your iteration to terminate prematurely. To alter this behavior, |
98
|
|
|
|
|
|
|
see C. You can retrieve warning messages using the |
99
|
|
|
|
|
|
|
C method. |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
Optionally you can pass in a filter function as a subroutine reference |
102
|
|
|
|
|
|
|
if you are only interested in particular fields from the record. This |
103
|
|
|
|
|
|
|
can boost performance. |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=cut |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
sub next { |
108
|
303
|
|
|
303
|
1
|
164030
|
my ( $self, $filter ) = @_; |
109
|
303
|
100
|
100
|
|
|
1338
|
if ( $filter and ref($filter) ne 'CODE' ) { |
110
|
1
|
|
|
|
|
132
|
croak( "filter function in next() must be a subroutine reference" ); |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
302
|
100
|
|
|
|
1149
|
if ( $self->{file} ) { |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
# get the next record |
116
|
285
|
|
|
|
|
1224
|
my $rec = $self->{file}->next( $filter ); |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
# collect warnings from MARC::File::* object |
119
|
|
|
|
|
|
|
# we use the warnings() method here since MARC::Batch |
120
|
|
|
|
|
|
|
# hides access to MARC::File objects, and we don't |
121
|
|
|
|
|
|
|
# need to preserve the warnings buffer. |
122
|
285
|
|
|
|
|
1564
|
my @warnings = $self->{file}->warnings(); |
123
|
285
|
50
|
|
|
|
941
|
if ( @warnings ) { |
124
|
0
|
|
|
|
|
0
|
$self->warnings( @warnings ); |
125
|
0
|
0
|
|
|
|
0
|
return if $self->{ strict }; |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
285
|
100
|
|
|
|
890
|
if ($rec) { |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
# collect warnings from the MARC::Record object |
131
|
|
|
|
|
|
|
# IMPORTANT: here we don't use warnings() but dig |
132
|
|
|
|
|
|
|
# into the the object to get at the warnings without |
133
|
|
|
|
|
|
|
# erasing the buffer. This is so a user can call |
134
|
|
|
|
|
|
|
# warnings() on the MARC::Record object and get back |
135
|
|
|
|
|
|
|
# warnings for that specific record. |
136
|
271
|
|
|
|
|
490
|
my @warnings = @{ $rec->{_warnings} }; |
|
271
|
|
|
|
|
659
|
|
137
|
|
|
|
|
|
|
|
138
|
271
|
100
|
|
|
|
812
|
if (@warnings) { |
139
|
5
|
|
|
|
|
15
|
$self->warnings( @warnings ); |
140
|
5
|
100
|
|
|
|
17
|
return if $self->{ strict }; |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
# return the MARC::Record object |
144
|
270
|
|
|
|
|
1188
|
return($rec); |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
# Get the next file off the stack, if there is one |
151
|
31
|
100
|
|
|
|
72
|
$self->{filename} = shift @{$self->{filestack}} or return; |
|
31
|
|
|
|
|
191
|
|
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
# Instantiate a filename for it |
154
|
24
|
|
|
|
|
78
|
my $marcclass = $self->{marcclass}; |
155
|
24
|
50
|
|
|
|
172
|
$self->{file} = $marcclass->in( $self->{filename} ) or return; |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
# call this method again now that we've got a file open |
158
|
24
|
|
|
|
|
1448
|
return( $self->next( $filter ) ); |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=head2 strict_off() |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
If you would like C to continue after it has encountered what |
165
|
|
|
|
|
|
|
it believes to be bad MARC data then use this method to turn strict B. |
166
|
|
|
|
|
|
|
A call to C always returns true (1). |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
C can be handy when you don't care about the quality of your |
169
|
|
|
|
|
|
|
MARC data, and just want to plow through it. For safety, C |
170
|
|
|
|
|
|
|
strict is B by default. |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
=cut |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
sub strict_off { |
175
|
3
|
|
|
3
|
1
|
17
|
my $self = shift; |
176
|
3
|
|
|
|
|
10
|
$self->{ strict } = 0; |
177
|
3
|
|
|
|
|
8
|
return(1); |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=head2 strict_on() |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
The opposite of C, and the default state. You shouldn't |
183
|
|
|
|
|
|
|
have to use this method unless you've previously used C, and |
184
|
|
|
|
|
|
|
want it back on again. When strict is B calls to next() will return |
185
|
|
|
|
|
|
|
undef when an error is encountered while reading MARC data. strict_on() |
186
|
|
|
|
|
|
|
always returns true (1). |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=cut |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
sub strict_on { |
191
|
1
|
|
|
1
|
1
|
6
|
my $self = shift; |
192
|
1
|
|
|
|
|
2
|
$self->{ strict } = 1; |
193
|
1
|
|
|
|
|
3
|
return(1); |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=head2 warnings() |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
Returns a list of warnings that have accumulated while processing a particular |
199
|
|
|
|
|
|
|
batch file. As a side effect the warning buffer will be cleared. |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
my @warnings = $batch->warnings(); |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
This method is also used internally to set warnings, so you probably don't |
204
|
|
|
|
|
|
|
want to be passing in anything as this will set warnings on your batch object. |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
C will return the empty list when there are no warnings. |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
=cut |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
sub warnings { |
211
|
9
|
|
|
9
|
1
|
46
|
my ($self,@new) = @_; |
212
|
9
|
100
|
|
|
|
23
|
if ( @new ) { |
213
|
5
|
|
|
|
|
10
|
push( @{ $self->{warnings} }, @new ); |
|
5
|
|
|
|
|
13
|
|
214
|
5
|
50
|
|
|
|
20
|
print STDERR join( "\n", @new ) . "\n" if $self->{'warn'}; |
215
|
|
|
|
|
|
|
} else { |
216
|
4
|
|
|
|
|
7
|
my @old = @{ $self->{warnings} }; |
|
4
|
|
|
|
|
12
|
|
217
|
4
|
|
|
|
|
9
|
$self->{warnings} = []; |
218
|
4
|
|
|
|
|
20
|
return(@old); |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
=head2 warnings_off() |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
Turns off the default behavior of printing warnings to STDERR. However, even |
226
|
|
|
|
|
|
|
with warnings off the messages can still be retrieved using the warnings() |
227
|
|
|
|
|
|
|
method if you wish to check for them. |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
C always returns true (1). |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=cut |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
sub warnings_off { |
234
|
4
|
|
|
4
|
1
|
1279
|
my $self = shift; |
235
|
4
|
|
|
|
|
13
|
$self->{ 'warn' } = 0; |
236
|
|
|
|
|
|
|
|
237
|
4
|
|
|
|
|
12
|
return 1; |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=head2 warnings_on() |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
Turns on warnings so that diagnostic information is printed to STDERR. This |
243
|
|
|
|
|
|
|
is on by default so you shouldn't have to use it unless you've previously |
244
|
|
|
|
|
|
|
turned off warnings using warnings_off(). |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
warnings_on() always returns true (1). |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
=cut |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
sub warnings_on { |
251
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
252
|
0
|
|
|
|
|
|
$self->{ 'warn' } = 1; |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
=head2 filename() |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
Returns the currently open filename or C if there is not currently a file |
258
|
|
|
|
|
|
|
open on this batch object. |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=cut |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
sub filename { |
263
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
264
|
|
|
|
|
|
|
|
265
|
0
|
|
|
|
|
|
return $self->{filename}; |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
1; |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
__END__ |