File Coverage

blib/lib/MARC/File/MicroLIF.pm

Criterion	Covered	Total	%
statement	106	109	97.2
branch	35	42	83.3
condition	14	17	82.3
subroutine	15	15	100.0
pod	3	3	100.0
total	173	186	93.0

line	stmt	bran	cond	sub	pod	time	code
1							package MARC::File::MicroLIF;
2
3							=head1 NAME
4
5							MARC::File::MicroLIF - MicroLIF-specific file handling
6
7							=cut
8
9	7			7		23580	use strict;
	7					19
	7					192
10	7			7		36	use warnings;
	7					15
	7					198
11	7			7		40	use integer;
	7					46
	7					46
12	7			7		503	use vars qw( $ERROR );
	7					16
	7					282
13
14	7			7		881	use MARC::File;
	7					16
	7					208
15	7			7		38	use vars qw( @ISA ); @ISA = qw( MARC::File );
	7					14
	7					302
16
17	7			7		374	use MARC::Record qw( LEADER_LEN );
	7					14
	7					411
18
19							=head1 SYNOPSIS
20
21							use MARC::File::MicroLIF;
22
23							my $file = MARC::File::MicroLIF->in( $filename );
24
25							while ( my $marc = $file->next() ) {
26							# Do something
27							}
28							$file->close();
29							undef $file;
30
31							=head1 EXPORT
32
33							None.
34
35							=cut
36
37
38							=for internal
39
40							The buffer must be large enough to handle any valid record because
41							we don't check for cases like a CR/LF pair or an end-of-record/CR/LF
42							trio being only partially in the buffer.
43
44							The max valid record is the max MARC record size (99999) plus one
45							or two characters per tag (CR, LF, or CR/LF). It's hard to say
46							what the max number of tags is, so here we use 6000. (6000 tags
47							can be squeezed into a MARC record only if every tag has only one
48							subfield containing a maximum of one character, or if data from
49							multiple tags overlaps in the MARC record body. We're pretty safe.)
50
51							=cut
52
53	7			7		41	use constant BUFFER_MIN => (99999 + 6000 * 2);
	7					14
	7					7404
54
55							=head1 METHODS
56
57							=head2 in()
58
59							Opens a MicroLIF file for reading.
60
61							=cut
62
63							sub in {
64	14			14	1	2092	my $class = shift;
65	14					103	my $self = $class->SUPER::in( @_ );
66
67	14	50				98	if ( $self ) {
68	14					36	bless $self, $class;
69
70	14					41	$self->{exhaustedfh} = 0;
71	14					40	$self->{inputbuf} = '';
72	14					37	$self->{header} = undef;
73
74							# get the MicroLIF header, but handle the case in
75							# which it's missing.
76	14					54	my $header = $self->_get_chunk( 1 );
77	14	50				57	if ( defined $header ) {
78	14	100				57	if ( $header =~ /^LDR/ ) {
79							# header missing, put this back
80	2					12	$self->_unget_chunk( $header . "\n" );
81
82							# XXX should we warn of a missing header?
83							}
84							else {
85	12					38	$self->{header} = $header;
86							}
87							}
88							else {
89							# can't read from the file
90	0					0	undef $self;
91							}
92							}
93
94	14					67	return $self;
95							} # new
96
97
98							# fill the buffer if we need to
99							sub _fill_buffer {
100	247			247		499	my $self = shift;
101	247					497	my $ok = 1;
102
103	247	100	66			1127	if ( !$self->{exhaustedfh} && length( $self->{inputbuf} ) < BUFFER_MIN ) {
104							# append the next chunk of bytes to the buffer
105	26					712	my $read = read $self->{fh}, $self->{inputbuf}, BUFFER_MIN, length($self->{inputbuf});
106	26	50				187	if ( !defined $read ) {
		100
107							# error!
108	0					0	$ok = undef;
109	0					0	$MARC::File::ERROR = "error reading from file " . $self->{filename};
110							}
111							elsif ( $read < 1 ) {
112	12					49	$self->{exhaustedfh} = 1;
113							}
114							}
115
116	247					1944	return $ok;
117							}
118
119
120							=for internal
121
122							Gets the next chunk of data. If C<$want_line> is true then you get
123							the next chunk ending with any combination of \r and \n of any length.
124							If it is false or not passed then you get the next chunk ending with
125							\x60 followed by any combination of \r and \n of any length.
126
127							All trailing \r and \n are stripped.
128
129							=cut
130
131							sub _get_chunk {
132	247			247		517	my $self = shift;
133	247		100			1204	my $want_line = shift \|\| 0;
134
135	247					554	my $chunk = undef;
136
137	247	100	66			760	if ( $self->_fill_buffer() && length($self->{inputbuf}) > 0 ) {
138
139							# the buffer always has at least one full line in it, so we're
140							# guaranteed that if there are no line endings then we're
141							# on the last line.
142
143	239	100				750	if ( $want_line ) {
144	14	50				151	if ( $self->{inputbuf} =~ /^([^\x0d\x0a]*)([\x0d\x0a]+)/ ) {
145	14					55	$chunk = $1;
146	14					156	$self->{inputbuf} = substr( $self->{inputbuf}, length($1)+length($2) );
147							}
148							}
149							else {
150							# couldn't figure out how to make this work as a regex
151	225					476	my $pos = -1;
152	225					712	while ( !$chunk ) {
153	225					3209	$pos = index( $self->{inputbuf}, '`', $pos+1 );
154	225	100				1046	last if $pos < 0;
155	224	50	66			2018	if ( substr($self->{inputbuf}, $pos+1, 1) eq "\x0d" or substr($self->{inputbuf}, $pos+1, 1) eq "\x0a" ) {
156	224					1033	$chunk = substr( $self->{inputbuf}, 0, $pos+1 ); # include the '`' but not the newlines
157	224		100			1914	while ( substr($self->{inputbuf}, $pos+1, 1) eq "\x0d" or substr($self->{inputbuf}, $pos+1, 1) eq "\x0a" ) {
158	225					1684	++$pos;
159							}
160							# $pos now pointing at last newline char
161	224					1875	$self->{inputbuf} = substr( $self->{inputbuf}, $pos+1 );
162							}
163							}
164							}
165
166	239	100				962	if ( !$chunk ) {
167	1					5	$chunk = $self->{inputbuf};
168	1					4	$self->{inputbuf} = '';
169	1					3	$self->{exhaustedfh} = 1;
170							}
171							}
172
173	247					997	return $chunk;
174							}
175
176
177							# $chunk is put at the beginning of the buffer exactly as
178							# passed in. No line endings are added.
179							sub _unget_chunk {
180	2			2		7	my $self = shift;
181	2					6	my $chunk = shift;
182	2					11	$self->{inputbuf} = $chunk . $self->{inputbuf};
183	2					8	return;
184							}
185
186
187							sub _next {
188	233			233		455	my $self = shift;
189
190	233					654	my $lifrec = $self->_get_chunk();
191
192							# for ease, make the newlines match this platform
193	233	100				3542	$lifrec =~ s/[\x0a\x0d]+/\n/g if defined $lifrec;
194
195	233					1601	return $lifrec;
196							}
197
198
199							=head2 header()
200
201							If the MicroLIF file has a file header then the header is returned.
202							If the file has no header or the file has not yet been opened then
203							C is returned.
204
205							=cut
206
207							sub header {
208	2			2	1	1017	my $self = shift;
209	2					13	return $self->{header};
210							}
211
212							=head2 decode()
213
214							Decodes a MicroLIF record and returns a USMARC record.
215
216							Can be called in one of three different ways:
217
218							$object->decode( $lif )
219							MARC::File::MicroLIF->decode( $lif )
220							MARC::File::MicroLIF::decode( $lif )
221
222							=cut
223
224							sub decode {
225	232			232	1	4507	my $self = shift;
226	232					470	my $location = '';
227	232					452	my $text = '';
228
229							## decode can be called in a variety of ways
230							## this bit of code covers all three
231
232	232	100				1101	if ( ref($self) =~ /^MARC::File/ ) {
233	225					703	$location = 'in record '.$self->{recnum};
234	225					711	$text = shift;
235							} else {
236	7					13	$location = 'in record 1';
237	7	100				32	$text = $self=~/MARC::File/ ? shift : $self;
238							}
239
240	232					1077	my $marc = MARC::Record->new();
241
242							# for ease, make the newlines match this platform
243	232	50				2968	$text =~ s/[\x0a\x0d]+/\n/g if defined $text;
244
245	232					2223	my @lines = split( /\n/, $text );
246	232					735	for my $line ( @lines ) {
247
248	2335	50				13701	($line =~ s/^([0-9A-Za-z]{3})//) or
249							$marc->_warn( "Invalid tag number: ".substr( $line, 0, 3 )." $location" );
250	2335					7217	my $tagno = $1;
251
252	2335	100				12473	($line =~ s/\^`?$//)
253							or $marc->_warn( "Tag $tagno $location is missing a trailing caret." );
254
255	2335	100	100			16092	if ( $tagno eq "LDR" ) {
		100
256	232					1186	$marc->leader( substr( $line, 0, LEADER_LEN ) );
257							} elsif ( $tagno =~ /^\d+$/ and $tagno < 10 ) {
258	289					1172	$marc->add_fields( $tagno, $line );
259							} else {
260	1814					7322	$line =~ s/^(.)(.)//;
261	1814					6343	my ($ind1,$ind2) = ($1,$2);
262	1814					3620	my @subfields;
263	1814					10471	my @subfield_data_pairs = split( /_(?=[a-z0-9])/, $line );
264	1814	100				5635	if ( scalar @subfield_data_pairs < 2 ) {
265	8					36	$marc->_warn( "Tag $tagno $location has no subfields--discarded." );
266							}
267							else {
268	1806					3582	shift @subfield_data_pairs; # Leading _ makes an empty pair
269	1806					4715	for my $pair ( @subfield_data_pairs ) {
270	3020					13506	my ($subfield,$data) = (substr( $pair, 0, 1 ), substr( $pair, 1 ));
271	3020					12878	push( @subfields, $subfield, $data );
272							}
273	1806					6791	$marc->add_fields( $tagno, $ind1, $ind2, @subfields );
274							}
275							}
276							} # for
277
278	232					1413	return $marc;
279							}
280
281							1;
282
283							__END__