File Coverage

blib/lib/MARC/File/USMARC.pm
Criterion Covered Total %
statement 111 120 92.5
branch 40 60 66.6
condition 7 12 58.3
subroutine 14 15 93.3
pod 3 3 100.0
total 175 210 83.3


line stmt bran cond sub pod time code
1             package MARC::File::USMARC;
2              
3             =head1 NAME
4              
5             MARC::File::USMARC - USMARC-specific file handling
6              
7             =cut
8              
9 26     26   133905 use strict;
  26         42  
  26         1339  
10 26     26   1695 use integer;
  26         53  
  26         154  
11              
12 26     26   616 use vars qw( $ERROR );
  26         39  
  26         1237  
13              
14 26     26   8948 use MARC::File;
  26         63  
  26         759  
15 26     26   136 use vars qw( @ISA ); @ISA = qw( MARC::File );
  26         34  
  26         1522  
16              
17             ## conditionally use this module for doing byte oriented
18             ## substr() and length() on utf8 data.
19             eval( "use MARC::File::Utils" ) if MARC::File::utf8_safe();
20              
21 26     26   7684 use MARC::Record qw( LEADER_LEN );
  26         46  
  26         1662  
22 26     26   152 use constant SUBFIELD_INDICATOR => "\x1F";
  26         36  
  26         1521  
23 26     26   151 use constant END_OF_FIELD => "\x1E";
  26         39  
  26         1234  
24 26     26   122 use constant END_OF_RECORD => "\x1D";
  26         74  
  26         1114  
25 26     26   126 use constant DIRECTORY_ENTRY_LEN => 12;
  26         38  
  26         37726  
26              
27             =head1 SYNOPSIS
28              
29             use MARC::File::USMARC;
30              
31             my $file = MARC::File::USMARC->in( $filename );
32              
33             while ( my $marc = $file->next() ) {
34             # Do something
35             }
36             $file->close();
37             undef $file;
38              
39             =head1 EXPORT
40              
41             None.
42              
43             =head1 METHODS
44              
45             =cut
46              
47             sub _next {
48 157     157   195 my $self = shift;
49 157         300 my $fh = $self->{fh};
50              
51 157         294 my $reclen;
52 157 100       1302 return if eof($fh);
53              
54 146         770 local $/ = END_OF_RECORD;
55 146         1429 my $usmarc = <$fh>;
56              
57             # remove illegal garbage that sometimes occurs between records
58 146         662 $usmarc =~ s/^[ \x00\x0a\x0d]+//;
59              
60 146         1117 return $usmarc;
61             }
62              
63             =head2 decode( $string [, \&filter_func ] )
64              
65             Constructor for handling data from a USMARC file. This function takes care of
66             all the tag directory parsing & mangling.
67              
68             Any warnings or coercions can be checked in the C function.
69              
70             The C<$filter_func> is an optional reference to a user-supplied function
71             that determines on a tag-by-tag basis if you want the tag passed to it
72             to be put into the MARC record. The function is passed the tag number
73             and the raw tag data, and must return a boolean. The return of a true
74             value tells MARC::File::USMARC::decode that the tag should get put into
75             the resulting MARC record.
76              
77             For example, if you only want title and subject tags in your MARC record,
78             try this:
79              
80             sub filter {
81             my ($tagno,$tagdata) = @_;
82              
83             return ($tagno == 245) || ($tagno >= 600 && $tagno <= 699);
84             }
85              
86             my $marc = MARC::File::USMARC->decode( $string, \&filter );
87              
88             Why would you want to do such a thing? The big reason is that creating
89             fields is processor-intensive, and if your program is doing read-only
90             data analysis and needs to be as fast as possible, you can save time by
91             not creating fields that you'll be ignoring anyway.
92              
93             Another possible use is if you're only interested in printing certain
94             tags from the record, then you can filter them when you read from disc
95             and not have to delete unwanted tags yourself.
96              
97             =cut
98              
99             sub decode {
100              
101 152     152 1 3165 my $text;
102 152         226 my $location = '';
103              
104             ## decode can be called in a variety of ways
105             ## $object->decode( $string )
106             ## MARC::File::USMARC->decode( $string )
107             ## MARC::File::USMARC::decode( $string )
108             ## this bit of code covers all three
109              
110 152         232 my $self = shift;
111 152 100       707 if ( ref($self) =~ /^MARC::File/ ) {
112 146         396 $location = 'in record '.$self->{recnum};
113 146         394 $text = shift;
114             } else {
115 6         12 $location = 'in record 1';
116 6 100       35 $text = $self=~/MARC::File/ ? shift : $self;
117             }
118              
119 152         211 my $filter_func = shift;
120              
121 152         653 my $marc = MARC::Record->new();
122              
123             # Check for an all-numeric record length
124 152 100       827 ($text =~ /^(\d{5})/)
125             or return $marc->_warn( "Record length \"", substr( $text, 0, 5 ), "\" is not numeric $location" );
126              
127 149         451 my $reclen = $1;
128 149 50       465 my $realLength = MARC::File::utf8_safe()
129             ? MARC::File::Utils::byte_length($text)
130             : length( $text );
131 149 50       870 $marc->_warn( "Invalid record length $location: Leader says $reclen " .
132             "bytes but it's actually $realLength" ) unless $reclen == $realLength;
133              
134 149 50       682 (substr($text, -1, 1) eq END_OF_RECORD)
135             or $marc->_warn( "Invalid record terminator $location" );
136              
137 149         671 $marc->leader( substr( $text, 0, LEADER_LEN ) );
138              
139             # bytes 12 - 16 of leader give offset to the body of the record
140 149 50       404 my $data_start = 0 + MARC::File::utf8_safe()
141             ? MARC::File::Utils::byte_substr( $text, 12, 5 )
142             : substr( $text, 12, 5 );
143              
144             # immediately after the leader comes the directory (no separator)
145 149         634 my $dir = substr( $text, LEADER_LEN, $data_start - LEADER_LEN - 1 ); # -1 to allow for \x1e at end of directory
146              
147             # character after the directory must be \x1e
148 149 100       666 (substr($text, $data_start-1, 1) eq END_OF_FIELD)
149             or $marc->_warn( "No directory found $location" );
150              
151             # all directory entries 12 bytes long, so length % 12 must be 0
152 149 50       710 (length($dir) % DIRECTORY_ENTRY_LEN == 0)
153             or $marc->_warn( "Invalid directory length $location" );
154              
155              
156             # go through all the fields
157 149         409 my $nfields = length($dir)/DIRECTORY_ENTRY_LEN;
158 149         529 for ( my $n = 0; $n < $nfields; $n++ ) {
159 2490         13942 my ( $tagno, $len, $offset ) = unpack( "A3 A4 A5", substr($dir, $n*DIRECTORY_ENTRY_LEN, DIRECTORY_ENTRY_LEN) );
160              
161             # Check directory validity
162 2490 50       9644 ($tagno =~ /^[0-9A-Za-z]{3}$/)
163             or $marc->_warn( "Invalid tag in directory $location: \"$tagno\"" );
164              
165 2490 50       5972 ($len =~ /^\d{4}$/)
166             or $marc->_warn( "Invalid length in directory $location tag $tagno: \"$len\"" );
167              
168 2490 50       5405 ($offset =~ /^\d{5}$/)
169             or $marc->_warn( "Invalid offset in directory $location tag $tagno: \"$offset\"" );
170              
171 2490 50       6999 ($offset + $len <= $reclen)
172             or $marc->_warn( "Directory entry $location runs off the end of the record tag $tagno" );
173              
174 2490 50       5717 my $tagdata = MARC::File::utf8_safe()
175             ? MARC::File::Utils::byte_substr($text,$data_start+$offset,$len)
176             : substr( $text, $data_start+$offset, $len );
177              
178 2490 50       5132 $marc->_warn( "Invalid length in directory for tag $tagno $location" )
    50          
179             unless ( $len == MARC::File::utf8_safe()
180             ? MARC::File::Utils::byte_length($tagdata)
181             : length($tagdata) );
182              
183 2490 50       5920 if ( substr($tagdata, -1, 1) eq END_OF_FIELD ) {
184             # get rid of the end-of-tag character
185 2490         3603 chop $tagdata;
186 2490         3946 --$len;
187             } else {
188 0         0 $marc->_warn( "field does not end in end of field character in tag $tagno $location" );
189             }
190              
191 2490 50       4384 warn "Specs: ", join( "|", $tagno, $len, $offset, $tagdata ), "\n" if $MARC::Record::DEBUG;
192              
193              
194 2490 100       3852 if ( $filter_func ) {
195 167 100       232 next unless $filter_func->( $tagno, $tagdata );
196             }
197              
198              
199 2336 100 100     13862 if ( ($tagno =~ /^\d+$/) && ($tagno < 10) ) {
200 506         1726 $marc->append_fields( MARC::Field->new( $tagno, $tagdata ) );
201             } else {
202 1830         7920 my @subfields = split( SUBFIELD_INDICATOR, $tagdata );
203 1830         3947 my $indicators = shift @subfields;
204 1830         2174 my ($ind1, $ind2);
205              
206 1830 50 33     8486 if ( length( $indicators ) > 2 or length( $indicators ) == 0 ) {
207 0         0 $marc->_warn( "Invalid indicators \"$indicators\" forced to blanks $location for tag $tagno\n" );
208 0         0 ($ind1,$ind2) = (" ", " ");
209             } else {
210 1830         3611 $ind1 = substr( $indicators,0, 1 );
211 1830         3174 $ind2 = substr( $indicators,1, 1 );
212             }
213              
214             # Split the subfield data into subfield name and data pairs
215 1830         1755 my @subfield_data;
216 1830         2965 for ( @subfields ) {
217 3050 50       5559 if ( length > 0 ) {
218 3050         9838 push( @subfield_data, substr($_,0,1),substr($_,1) );
219             } else {
220 0         0 $marc->_warn( "Entirely empty subfield found in tag $tagno" );
221             }
222             }
223              
224 1830 50       3850 if ( !@subfield_data ) {
225 0         0 $marc->_warn( "no subfield data found $location for tag $tagno" );
226 0         0 next;
227             }
228              
229 1830         5495 my $field = MARC::Field->new($tagno, $ind1, $ind2, @subfield_data );
230 1830 100       4643 if ( $field->warnings() ) {
231 2         5 $marc->_warn( $field->warnings() );
232             }
233 1830         4259 $marc->append_fields( $field );
234             }
235             } # looping through all the fields
236              
237              
238 149         1014 return $marc;
239             }
240              
241             =head2 update_leader()
242              
243             If any changes get made to the MARC record, the first 5 bytes of the
244             leader (the length) will be invalid. This function updates the
245             leader with the correct length of the record as it would be if
246             written out to a file.
247              
248             =cut
249              
250             sub update_leader() {
251 0     0 1 0 my $self = shift;
252              
253 0         0 my (undef,undef,$reclen,$baseaddress) = $self->_build_tag_directory();
254              
255 0         0 $self->_set_leader_lengths( $reclen, $baseaddress );
256             }
257              
258             =head2 _build_tag_directory()
259              
260             Function for internal use only: Builds the tag directory that gets
261             put in front of the data in a MARC record.
262              
263             Returns two array references, and two lengths: The tag directory, and the data fields themselves,
264             the length of all data (including the Leader that we expect will be added),
265             and the size of the Leader and tag directory.
266              
267             =cut
268              
269             sub _build_tag_directory {
270 8     8   12 my $marc = shift;
271 8 50 33     33 $marc = shift if (ref($marc)||$marc) =~ /^MARC::File/;
272 8 50       25 die "Wanted a MARC::Record but got a ", ref($marc) unless ref($marc) eq "MARC::Record";
273              
274 8         11 my @fields;
275             my @directory;
276              
277 8         9 my $dataend = 0;
278 8         32 for my $field ( $marc->fields() ) {
279             # Dump data into proper format
280 42         116 my $str = $field->as_usmarc;
281 42         58 push( @fields, $str );
282              
283             # Create directory entry
284 42 50       77 my $len = MARC::File::utf8_safe()
285             ? MARC::File::Utils::byte_length( $str )
286             : length( $str );
287              
288 42         83 my $direntry = sprintf( "%03s%04d%05d", $field->tag, $len, $dataend );
289 42         58 push( @directory, $direntry );
290 42         60 $dataend += $len;
291             }
292              
293 8         21 my $baseaddress =
294             LEADER_LEN + # better be 24
295             ( @directory * DIRECTORY_ENTRY_LEN ) +
296             # all the directory entries
297             1; # end-of-field marker
298              
299              
300 8         9 my $total =
301             $baseaddress + # stuff before first field
302             $dataend + # Length of the fields
303             1; # End-of-record marker
304              
305              
306              
307 8         24 return (\@fields, \@directory, $total, $baseaddress);
308             }
309              
310             =head2 encode()
311              
312             Returns a string of characters suitable for writing out to a USMARC file,
313             including the leader, directory and all the fields.
314              
315             =cut
316              
317             sub encode() {
318 8     8 1 546 my $marc = shift;
319 8 100 66     49 $marc = shift if (ref($marc)||$marc) =~ /^MARC::File/;
320              
321 8         43 my ($fields,$directory,$reclen,$baseaddress) = _build_tag_directory($marc);
322 8         29 $marc->set_leader_lengths( $reclen, $baseaddress );
323              
324             # Glomp it all together
325 8         25 return join("",$marc->leader, @$directory, END_OF_FIELD, @$fields, END_OF_RECORD);
326             }
327             1;
328              
329             __END__