File Coverage

blib/lib/BibTeX/Parser.pm

Criterion	Covered	Total	%
statement	125	136	91.9
branch	54	66	81.8
condition	16	24	66.6
subroutine	9	9	100.0
pod	2	2	100.0
total	206	237	86.9

line	stmt	bran	cond	sub	pod	time	code
1							package BibTeX::Parser;
2							{
3							$BibTeX::Parser::VERSION = '1.93';
4							}
5							# ABSTRACT: A pure perl BibTeX parser
6	19			19		2011812	use warnings;
	19					46
	19					1344
7	19			19		157	use strict;
	19					36
	19					46825
8
9							require BibTeX::Parser::Entry; # mutual dependency, so use instead of require
10
11
12							my $re_namechar = qr/[a-zA-Z0-9\!\$\&\*\+\-\.\/\:\;\<\>\?\[\]\^\_\`\\|\']/o;
13							my $re_name = qr/$re_namechar+/o;
14
15
16							sub new {
17	15			15	1	1893606	my ( $class, $fh, $opts ) = @_;
18
19	15		50			431	return bless {
20							fh => $fh,
21							opts => $opts \|\| {},
22							strings => {
23							jan => "January",
24							feb => "February",
25							mar => "March",
26							apr => "April",
27							may => "May",
28							jun => "June",
29							jul => "July",
30							aug => "August",
31							sep => "September",
32							oct => "October",
33							nov => "November",
34							dec => "December",
35
36							},
37							line => -1,
38							buffer => "",
39							}, $class;
40							}
41
42							sub _slurp_close_bracket;
43
44							sub _parse_next {
45	42			42		79	my $self = shift;
46
47	42					90	while (1) { # loop until regular entry is finished
48	46	100				334	return 0 if $self->{fh}->eof;
49	41					926	local $_ = $self->{buffer};
50
51	41					138	until (/@/m) {
52	111					521	my $line = $self->{fh}->getline;
53	111	100				386	return 0 unless defined $line;
54	107					191	$line =~ s/^%.*$//;
55	107					367	$_ .= $line;
56							}
57
58	37					242	my $current_entry = new BibTeX::Parser::Entry;
59	37	100				753	if (/@($re_name)/cgo) {
60	36					134	my $type = uc $1;
61	36					142	$current_entry->type( $type );
62	36					87	my $start_pos = pos($_) - length($type) - 1;
63
64							# read rest of entry (matches braces)
65	36					74	my $bracelevel = 0;
66	36					89	$bracelevel += tr/\{/\{/; #count braces
67	36					71	$bracelevel -= tr/\}/\}/;
68	36					131	while ( $bracelevel != 0 ) {
69	156					213	my $position = pos($_);
70	156					425	my $line = $self->{fh}->getline;
71	156	50				708	last unless defined $line;
72	156					317	$bracelevel =
73							$bracelevel + ( $line =~ tr/\{/\{/ ) - ( $line =~ tr/\}/\}/ );
74	156					327	$_ .= $line;
75	156					439	pos($_) = $position;
76							}
77
78							# Remember text before the entry
79	36					109	my $pre = substr($_, 0, $start_pos-1);
80	36	100				114	if ($start_pos == 0) {
81	10					19	$pre = '';
82							}
83	36					201	$current_entry->pre($pre);
84
85
86							# Remember raw bibtex code
87	36					104	my $raw = substr($_, $start_pos);
88	36					148	$raw =~ s/^\s+//;
89	36					350	$raw =~ s/\s+$//;
90	36					159	$current_entry->raw_bibtex($raw);
91
92	36					58	my $pos = pos $_;
93	36					89	tr/\n/ /;
94	36					94	pos($_) = $pos;
95
96	36	100	100			239	if ( $type eq "STRING" ) {
		100
97	2	50				118	if (/\G\{\s($re_name)\s=\s*/cgo) {
98	2					8	my $key = lc($1);
99							my $value = _parse_string( $self->{strings},
100	2					8	exists $self->{opts}->{"no-warn-ack"} );
101	2	50				7	if ( defined $self->{strings}->{$key} ) {
102	0					0	warn("Redefining string $key!");
103							}
104	2					5	$self->{strings}->{$key} = $value;
105	2					6	/\G[\s\n]*\}/cg;
106							} else {
107	0					0	$current_entry->error("Malformed string! ($raw)");
108	0					0	return $current_entry;
109							}
110							} elsif ( $type eq "COMMENT" or $type eq "PREAMBLE" ) {
111	2					6	/\G\{./cgo;
112	2					5	_slurp_close_bracket;
113							} else { # normal entry
114	32					118	$current_entry->parse_ok(1);
115
116							# parse key
117	32	100				1599	if (/\G\s\{(?:\s($re_name)\s,[\s\n]\|\s+\r?\s*)/cgo) {
118	31					164	$current_entry->key($1);
119
120							# fields
121	31					1111	while (/\G[\s\n]($re_name)[\s\n]=[\s\n]*/cgo) {
122							$current_entry->field(
123							$1 => _parse_string( $self->{strings},
124	135					378	exists $self->{opts}->{"no-warn-ack"} ) );
125	135					330	my $idx = index( $_, ',', pos($_) );
126	135	100				715	pos($_) = $idx + 1 if $idx > 0;
127							}
128
129	31					252	return $current_entry;
130
131							} else {
132
133	1		50			8	$current_entry->error("Malformed entry (key contains invalid characters) at " . substr($_, pos($_) \|\| 0, 20) . ", ignoring");
134	1					3	_slurp_close_bracket;
135	1					11	return $current_entry;
136							}
137							}
138
139	4					48	$self->{buffer} = substr $_, pos($_);
140
141							} else {
142	1		50			46	$current_entry->error("Did not find type at " . substr($_, pos($_) \|\| 0, 20));
143	1					7	return $current_entry;
144							}
145
146							}
147							}
148
149
150							sub next {
151	42			42	1	6557	my $self = shift;
152
153	42					130	return $self->_parse_next;
154							}
155
156							# slurp everything till the next closing brace. Handles
157							# nested brackets
158							sub _slurp_close_bracket {
159	3			3		6	my $bracelevel = 0;
160							BRACE: {
161	3	50				6	/\G[^\}]*\{/cg && do { $bracelevel++; redo BRACE };
	3					8
	0					0
	0					0
162							/\G[^\{]*\}/cg
163	3	100				62	&& do {
164	2	50				11	if ( $bracelevel > 0 ) {
165	0					0	$bracelevel--;
166	0					0	redo BRACE;
167							} else {
168	2					5	return;
169							}
170							}
171							}
172							}
173
174							# parse bibtex string in $_ and return. A BibTeX string is either enclosed
175							# in double quotes '""' or matching braces '{}'. The braced form may contain
176							# nested braces.
177							#
178							# Second argument NO_WARN_ACK says whether to emit the warning
179							# "Using undefined string" if the name of the undefined string starts
180							# with "ack-". The default is to warn. The TUGboat config file
181							# ltx2crossrefxml-tugboat.cfg sets this.
182							#
183							# It is an unfortunate fact that people routinely copy bib entries
184							# without copying the "ack-nhfb" or other "ack-..." @string definitions
185							# in Nelson Beebe's bibliography files, resulting in this warning. It is
186							# too irritating to have to define them (they are never used), and also
187							# too irritating to have to see the many warnings on every run.
188							#
189							sub _parse_string {
190	144			144		195768	my ($strings_ref, $no_warn_ack) = @_;
191	144		50			593	$no_warn_ack \|\|= 0;
192
193	144					196	my $value = "";
194
195							PART: {
196	144	100				213	if (/\G(\d+)/cg) {
	151	100				1204
		100
197	12					63	$value .= $1;
198							} elsif (/\G($re_name)/cgo) {
199	10	50				47	if (! defined $strings_ref->{lc($1)}) {
200	0	0	0			0	warn("Using undefined string $1 (", lc($1), ")")
201							unless $no_warn_ack && $1 =~ /^ack-/;
202							}
203	10		50			51	$value .= $strings_ref->{$1} \|\| "";
204							} elsif (/\G"(([^"\\](\\.)[^\\"]))"/cgs)
205							{ # quoted string with embedded escapes
206	81					244	$value .= $1;
207							} else {
208	48					133	my $part = _extract_bracketed( $_ );
209	48					117	$value .= substr $part, 1, length($part) - 2; # strip quotes
210							}
211
212	151	100				401	if (/\G\s#\s/cg) { # string concatenation by #
213	7					18	redo PART;
214							}
215							}
216	144					554	$value =~ s/[\s\n]+/ /g;
217	144					596	return $value;
218							}
219
220							sub _extract_bracketed
221							{
222	48			48		135	for($_[0]) # alias to $_
223							{
224	48					93	/\G\s+/cg;
225	48					93	my $start = pos($_);
226	48					61	my $depth = 0;
227	48					63	while(1)
228							{
229	175	100				398	/\G\\./cg && next;
230	173	100				372	/\G\{/cg && (++$depth, next);
231	118	100				539	/\G\}/cg && (--$depth > 0 ? next : last);
		100
232	63	50				205	/\G([^\\\{\}]+)/cg && next;
233	0					0	last; # end of string
234							}
235	48					179	return substr($_, $start, pos($_)-$start);
236							}
237							}
238
239							# Split the $string using $pattern as a delimiter with
240							# each part having balanced braces (so "{$pattern}"
241							# does NOT split).
242							# Return empty list if unmatched braces
243
244							sub _split_braced_string {
245	239			239		158369	my $string = shift;
246	239					414	my $pattern = shift;
247	239					353	my @tokens;
248	239	100				603	return () if $string eq '';
249	238					376	my $buffer;
250	238		100			696	while (!defined pos $string \|\| pos $string < length $string) {
251	406	100				12635	if ( $string =~ /\G(.*?)(\{\|$pattern)/cgi ) {
252	186					551	my $match = $1;
253	186	100				3164	if ( $2 =~ /$pattern/i ) {
		50
254	157					458	$buffer .= $match;
255	157					366	push @tokens, $buffer;
256	157					849	$buffer = "";
257							} elsif ( $2 =~ /\{/ ) {
258	29					81	$buffer .= $match . "{";
259	29					65	my $numbraces=1;
260	29		100			160	while ($numbraces !=0 && pos $string < length $string) {
261	345					513	my $symbol = substr($string, pos $string, 1);
262	345					464	$buffer .= $symbol;
263	345	50				725	if ($symbol eq '{') {
		100
264	0					0	$numbraces ++;
265							} elsif ($symbol eq '}') {
266	27					42	$numbraces --;
267							}
268	345					1177	pos($string) ++;
269							}
270	29	100				187	if ($numbraces != 0) {
271	2					13	return ();
272							}
273							} else {
274	0					0	$buffer .= $match;
275							}
276							} else {
277	220		100			1057	$buffer .= substr $string, (pos $string \|\| 0);
278	220					402	last;
279							}
280							}
281	236	50				772	push @tokens, $buffer if $buffer;
282	236					957	return @tokens;
283							}
284
285
286							1; # End of BibTeX::Parser
287
288
289							__END__