File Coverage

blib/lib/MARC/File/MicroLIF.pm

Criterion	Covered	Total	%
statement	102	105	97.1
branch	34	40	85.0
condition	14	17	82.3
subroutine	14	14	100.0
pod	3	3	100.0
total	167	179	93.3

line	stmt	bran	cond	sub	pod	time	code
1							package MARC::File::MicroLIF;
2
3							=head1 NAME
4
5							MARC::File::MicroLIF - MicroLIF-specific file handling
6
7							=cut
8
9	7			7		27702	use strict;
	7					12
	7					245
10	7			7		31	use integer;
	7					8
	7					38
11	7			7		137	use vars qw( $ERROR );
	7					10
	7					270
12
13	7			7		971	use MARC::File;
	7					10
	7					178
14	7			7		29	use vars qw( @ISA ); @ISA = qw( MARC::File );
	7					9
	7					309
15
16	7			7		401	use MARC::Record qw( LEADER_LEN );
	7					8
	7					469
17
18							=head1 SYNOPSIS
19
20							use MARC::File::MicroLIF;
21
22							my $file = MARC::File::MicroLIF->in( $filename );
23
24							while ( my $marc = $file->next() ) {
25							# Do something
26							}
27							$file->close();
28							undef $file;
29
30							=head1 EXPORT
31
32							None.
33
34							=cut
35
36
37							=for internal
38
39							The buffer must be large enough to handle any valid record because
40							we don't check for cases like a CR/LF pair or an end-of-record/CR/LF
41							trio being only partially in the buffer.
42
43							The max valid record is the max MARC record size (99999) plus one
44							or two characters per tag (CR, LF, or CR/LF). It's hard to say
45							what the max number of tags is, so here we use 6000. (6000 tags
46							can be squeezed into a MARC record only if every tag has only one
47							subfield containing a maximum of one character, or if data from
48							multiple tags overlaps in the MARC record body. We're pretty safe.)
49
50							=cut
51
52	7			7		33	use constant BUFFER_MIN => (99999 + 6000 * 2);
	7					8
	7					7535
53
54							=head1 METHODS
55
56							=head2 in()
57
58							Opens a MicroLIF file for reading.
59
60							=cut
61
62							sub in {
63	14			14	1	2148	my $class = shift;
64	14					91	my $self = $class->SUPER::in( @_ );
65
66	14	50				68	if ( $self ) {
67	14					25	bless $self, $class;
68
69	14					37	$self->{exhaustedfh} = 0;
70	14					29	$self->{inputbuf} = '';
71	14					26	$self->{header} = undef;
72
73							# get the MicroLIF header, but handle the case in
74							# which it's missing.
75	14					48	my $header = $self->_get_chunk( 1 );
76	14	50				42	if ( defined $header ) {
77	14	100				79	if ( $header =~ /^LDR/ ) {
78							# header missing, put this back
79	2					8	$self->_unget_chunk( $header . "\n" );
80
81							# XXX should we warn of a missing header?
82							}
83							else {
84	12					32	$self->{header} = $header;
85							}
86							}
87							else {
88							# can't read from the file
89	0					0	undef $self;
90							}
91							}
92
93	14					55	return $self;
94							} # new
95
96
97							# fill the buffer if we need to
98							sub _fill_buffer {
99	247			247		266	my $self = shift;
100	247					321	my $ok = 1;
101
102	247	100	66			815	if ( !$self->{exhaustedfh} && length( $self->{inputbuf} ) < BUFFER_MIN ) {
103							# append the next chunk of bytes to the buffer
104	26					772	my $read = read $self->{fh}, $self->{inputbuf}, BUFFER_MIN, length($self->{inputbuf});
105	26	50				159	if ( !defined $read ) {
		100
106							# error!
107	0					0	$ok = undef;
108	0					0	$MARC::File::ERROR = "error reading from file " . $self->{filename};
109							}
110							elsif ( $read < 1 ) {
111	12					41	$self->{exhaustedfh} = 1;
112							}
113							}
114
115	247					1669	return $ok;
116							}
117
118
119							=for internal
120
121							Gets the next chunk of data. If C<$want_line> is true then you get
122							the next chunk ending with any combination of \r and \n of any length.
123							If it is false or not passed then you get the next chunk ending with
124							\x60 followed by any combination of \r and \n of any length.
125
126							All trailing \r and \n are stripped.
127
128							=cut
129
130							sub _get_chunk {
131	247			247		275	my $self = shift;
132	247		100			862	my $want_line = shift \|\| 0;
133
134	247					294	my $chunk = undef;
135
136	247	100	66			411	if ( $self->_fill_buffer() && length($self->{inputbuf}) > 0 ) {
137
138							# the buffer always has at least one full line in it, so we're
139							# guaranteed that if there are no line endings then we're
140							# on the last line.
141
142	239	100				503	if ( $want_line ) {
143	14	50				133	if ( $self->{inputbuf} =~ /^([^\x0d\x0a]*)([\x0d\x0a]+)/ ) {
144	14					72	$chunk = $1;
145	14					136	$self->{inputbuf} = substr( $self->{inputbuf}, length($1)+length($2) );
146							}
147							}
148							else {
149							# couldn't figure out how to make this work as a regex
150	225					266	my $pos = -1;
151	225					402	while ( !$chunk ) {
152	225					2411	$pos = index( $self->{inputbuf}, '`', $pos+1 );
153	225	100				746	last if $pos < 0;
154	224	50	66			1669	if ( substr($self->{inputbuf}, $pos+1, 1) eq "\x0d" or substr($self->{inputbuf}, $pos+1, 1) eq "\x0a" ) {
155	224					741	$chunk = substr( $self->{inputbuf}, 0, $pos+1 ); # include the '`' but not the newlines
156	224		100			1412	while ( substr($self->{inputbuf}, $pos+1, 1) eq "\x0d" or substr($self->{inputbuf}, $pos+1, 1) eq "\x0a" ) {
157	225					1318	++$pos;
158							}
159							# $pos now pointing at last newline char
160	224					1326	$self->{inputbuf} = substr( $self->{inputbuf}, $pos+1 );
161							}
162							}
163							}
164
165	239	100				695	if ( !$chunk ) {
166	1					2	$chunk = $self->{inputbuf};
167	1					2	$self->{inputbuf} = '';
168	1					2	$self->{exhaustedfh} = 1;
169							}
170							}
171
172	247					779	return $chunk;
173							}
174
175
176							# $chunk is put at the beginning of the buffer exactly as
177							# passed in. No line endings are added.
178							sub _unget_chunk {
179	2			2		3	my $self = shift;
180	2					3	my $chunk = shift;
181	2					7	$self->{inputbuf} = $chunk . $self->{inputbuf};
182	2					6	return;
183							}
184
185
186							sub _next {
187	233			233		244	my $self = shift;
188
189	233					379	my $lifrec = $self->_get_chunk();
190
191							# for ease, make the newlines match this platform
192	233	100				3108	$lifrec =~ s/[\x0a\x0d]+/\n/g if defined $lifrec;
193
194	233					1149	return $lifrec;
195							}
196
197
198							=head2 header()
199
200							If the MicroLIF file has a file header then the header is returned.
201							If the file has no header or the file has not yet been opened then
202							C is returned.
203
204							=cut
205
206							sub header {
207	2			2	1	1006	my $self = shift;
208	2					10	return $self->{header};
209							}
210
211							=head2 decode()
212
213							Decodes a MicroLIF record and returns a USMARC record.
214
215							Can be called in one of three different ways:
216
217							$object->decode( $lif )
218							MARC::File::MicroLIF->decode( $lif )
219							MARC::File::MicroLIF::decode( $lif )
220
221							=cut
222
223							sub decode {
224	228			228	1	1879	my $self = shift;
225	228					295	my $location = '';
226	228					220	my $text = '';
227
228							## decode can be called in a variety of ways
229							## this bit of code covers all three
230
231	228	100				818	if ( ref($self) =~ /^MARC::File/ ) {
232	225					394	$location = 'in record '.$self->{recnum};
233	225					374	$text = shift;
234							} else {
235	3					7	$location = 'in record 1';
236	3	100				22	$text = $self=~/MARC::File/ ? shift : $self;
237							}
238
239	228					672	my $marc = MARC::Record->new();
240
241	228					2030	my @lines = split( /\n/, $text );
242	228					608	for my $line ( @lines ) {
243
244	2297	50				8823	($line =~ s/^([0-9A-Za-z]{3})//) or
245							$marc->_warn( "Invalid tag number: ".substr( $line, 0, 3 )." $location" );
246	2297					3940	my $tagno = $1;
247
248	2297	100				8015	($line =~ s/\^`?$//)
249							or $marc->_warn( "Tag $tagno $location is missing a trailing caret." );
250
251	2297	100	100			11221	if ( $tagno eq "LDR" ) {
		100
252	228					764	$marc->leader( substr( $line, 0, LEADER_LEN ) );
253							} elsif ( $tagno =~ /^\d+$/ and $tagno < 10 ) {
254	285					743	$marc->add_fields( $tagno, $line );
255							} else {
256	1784					3956	$line =~ s/^(.)(.)//;
257	1784					3136	my ($ind1,$ind2) = ($1,$2);
258	1784					1583	my @subfields;
259	1784					8505	my @subfield_data_pairs = split( /_(?=[a-z0-9])/, $line );
260	1784	100				3542	if ( scalar @subfield_data_pairs < 2 ) {
261	2					14	$marc->_warn( "Tag $tagno $location has no subfields--discarded." );
262							}
263							else {
264	1782					1771	shift @subfield_data_pairs; # Leading _ makes an empty pair
265	1782					2615	for my $pair ( @subfield_data_pairs ) {
266	2984					7026	my ($subfield,$data) = (substr( $pair, 0, 1 ), substr( $pair, 1 ));
267	2984					7899	push( @subfields, $subfield, $data );
268							}
269	1782					4419	$marc->add_fields( $tagno, $ind1, $ind2, @subfields );
270							}
271							}
272							} # for
273
274	228					1086	return $marc;
275							}
276
277							1;
278
279							__END__