File Coverage

blib/lib/BibTeX/Parser.pm

Criterion	Covered	Total	%
statement	126	136	92.6
branch	54	64	84.3
condition	14	17	82.3
subroutine	10	10	100.0
pod	2	2	100.0
total	206	229	89.9

line	stmt	bran	cond	sub	pod	time	code
1							package BibTeX::Parser;
2							{
3							$BibTeX::Parser::VERSION = '1.03';
4							}
5							# ABSTRACT: A pure perl BibTeX parser
6	18			18		1043121	use warnings;
	18					156
	18					1056
7	18			18		98	use strict;
	18					50
	18					408
8
9	18			18		7535	use BibTeX::Parser::Entry;
	18					52
	18					30287
10
11
12							my $re_namechar = qr/[a-zA-Z0-9\!\$\&\*\+\-\.\/\:\;\<\>\?\[\]\^\_\`\\|]/o;
13							my $re_name = qr/$re_namechar+/o;
14
15
16							sub new {
17	15			15	1	5685	my ( $class, $fh ) = @_;
18
19	15					263	return bless {
20							fh => $fh,
21							strings => {
22							jan => "January",
23							feb => "February",
24							mar => "March",
25							apr => "April",
26							may => "May",
27							jun => "June",
28							jul => "July",
29							aug => "August",
30							sep => "September",
31							oct => "October",
32							nov => "November",
33							dec => "December",
34
35							},
36							line => -1,
37							buffer => "",
38							}, $class;
39							}
40
41							sub _slurp_close_bracket;
42
43							sub _parse_next {
44	42			42		70	my $self = shift;
45
46	42					67	while (1) { # loop until regular entry is finished
47	46	100				245	return 0 if $self->{fh}->eof;
48	41					550	local $_ = $self->{buffer};
49
50	41					119	until (/@/m) {
51	111					2104	my $line = $self->{fh}->getline;
52	111	100				2569	return 0 unless defined $line;
53	107					353	$_ .= $line;
54							}
55
56	37					179	my $current_entry = new BibTeX::Parser::Entry;
57	37	100				499	if (/@($re_name)/cgo) {
58	36					109	my $type = uc $1;
59	36					130	$current_entry->type( $type );
60	36					95	my $start_pos = pos($_) - length($type) - 1;
61
62							# read rest of entry (matches braces)
63	36					55	my $bracelevel = 0;
64	36					94	$bracelevel += tr/\{/\{/; #count braces
65	36					86	$bracelevel -= tr/\}/\}/;
66	36					123	while ( $bracelevel != 0 ) {
67	156					222	my $position = pos($_);
68	156					2400	my $line = $self->{fh}->getline;
69	156	50				3347	last unless defined $line;
70	156					297	$bracelevel =
71							$bracelevel + ( $line =~ tr/\{/\{/ ) - ( $line =~ tr/\}/\}/ );
72	156					326	$_ .= $line;
73	156					471	pos($_) = $position;
74							}
75
76							# Remember text before the entry
77	36					103	my $pre = substr($_, 0, $start_pos-1);
78	36	100				87	if ($start_pos == 0) {
79	9					33	$pre = '';
80							}
81	36					134	$current_entry->pre($pre);
82
83
84							# Remember raw bibtex code
85	36					83	my $raw = substr($_, $start_pos);
86	36					98	$raw =~ s/^\s+//;
87	36					267	$raw =~ s/\s+$//;
88	36					143	$current_entry->raw_bibtex($raw);
89
90	36					64	my $pos = pos $_;
91	36					92	tr/\n/ /;
92	36					98	pos($_) = $pos;
93
94	36	100	100			241	if ( $type eq "STRING" ) {
		100
95	2	50				52	if (/\G\{\s($re_name)\s=\s*/cgo) {
96	2					6	my $key = $1;
97	2					7	my $value = _parse_string( $self->{strings} );
98	2	50				25	if ( defined $self->{strings}->{$key} ) {
99	0					0	warn("Redefining string $key!");
100							}
101	2					7	$self->{strings}->{$key} = $value;
102	2					10	/\G[\s\n]*\}/cg;
103							} else {
104	0					0	$current_entry->error("Malformed string!");
105	0					0	return $current_entry;
106							}
107							} elsif ( $type eq "COMMENT" or $type eq "PREAMBLE" ) {
108	2					7	/\G\{./cgo;
109	2					6	_slurp_close_bracket;
110							} else { # normal entry
111	32					123	$current_entry->parse_ok(1);
112
113							# parse key
114	32	100				757	if (/\G\s\{(?:\s($re_name)\s,[\s\n]\|\s+\r?\s*)/cgo) {
115	31					185	$current_entry->key($1);
116
117							# fields
118	31					738	while (/\G[\s\n]($re_name)[\s\n]=[\s\n]*/cgo) {
119							$current_entry->field(
120	135					362	$1 => _parse_string( $self->{strings} ) );
121	135					305	my $idx = index( $_, ',', pos($_) );
122	135	100				719	pos($_) = $idx + 1 if $idx > 0;
123							}
124
125	31					181	return $current_entry;
126
127							} else {
128
129	1		50			8	$current_entry->error("Malformed entry (key contains illegal characters) at " . substr($_, pos($_) \|\| 0, 20) . ", ignoring");
130	1					3	_slurp_close_bracket;
131	1					12	return $current_entry;
132							}
133							}
134
135	4					27	$self->{buffer} = substr $_, pos($_);
136
137							} else {
138	1		50			24	$current_entry->error("Did not find type at " . substr($_, pos($_) \|\| 0, 20));
139	1					4	return $current_entry;
140							}
141
142							}
143							}
144
145
146							sub next {
147	42			42	1	5021	my $self = shift;
148
149	42					111	return $self->_parse_next;
150							}
151
152							# slurp everything till the next closing brace. Handles
153							# nested brackets
154							sub _slurp_close_bracket {
155	3			3		6	my $bracelevel = 0;
156							BRACE: {
157	3	50				6	/\G[^\}]*\{/cg && do { $bracelevel++; redo BRACE };
	3					23
	0					0
	0					0
158							/\G[^\{]*\}/cg
159	3	100				15	&& do {
160	2	50				7	if ( $bracelevel > 0 ) {
161	0					0	$bracelevel--;
162	0					0	redo BRACE;
163							} else {
164	2					4	return;
165							}
166							}
167							}
168							}
169
170							# parse bibtex string in $_ and return. A BibTeX string is either enclosed
171							# in double quotes '"' or matching braces '{}'. The braced form may contain
172							# nested braces.
173							sub _parse_string {
174	144			144		4135	my $strings_ref = shift;
175
176	144					209	my $value = "";
177
178							PART: {
179	144	100				189	if (/\G(\d+)/cg) {
	151	100				1085
		100
180	12					31	$value .= $1;
181							} elsif (/\G($re_name)/cgo) {
182	10	50				70	warn("Using undefined string $1") unless defined $strings_ref->{$1};
183	10		50			408	$value .= $strings_ref->{$1} \|\| "";
184							} elsif (/\G"(([^"\\](\\.)[^\\"]))"/cgs)
185							{ # quoted string with embeded escapes
186	81					216	$value .= $1;
187							} else {
188	48					120	my $part = _extract_bracketed( $_ );
189	48					129	$value .= substr $part, 1, length($part) - 2; # strip quotes
190							}
191
192	151	100				390	if (/\G\s#\s/cg) { # string concatenation by #
193	7					18	redo PART;
194							}
195							}
196	144					521	$value =~ s/[\s\n]+/ /g;
197	144					519	return $value;
198							}
199
200							sub _extract_bracketed
201							{
202	48			48		93	for($_[0]) # alias to $_
203							{
204	48					93	/\G\s+/cg;
205	48					77	my $start = pos($_);
206	48					64	my $depth = 0;
207	48					75	while(1)
208							{
209	175	100				328	/\G\\./cg && next;
210	173	100				358	/\G\{/cg && (++$depth, next);
211	118	100				286	/\G\}/cg && (--$depth > 0 ? next : last);
		100
212	63	50				176	/\G([^\\\{\}]+)/cg && next;
213	0					0	last; # end of string
214							}
215	48					165	return substr($_, $start, pos($_)-$start);
216							}
217							}
218
219							# Split the $string using $pattern as a delimiter with
220							# each part having balanced braces (so "{$pattern}"
221							# does NOT split).
222							# Return empty list if unmatched braces
223
224							sub _split_braced_string {
225	239			239		4455	my $string = shift;
226	239					335	my $pattern = shift;
227	239					326	my @tokens;
228	239	100				504	return () if $string eq '';
229	238					374	my $buffer;
230	238		100			586	while (!defined pos $string \|\| pos $string < length $string) {
231	406	100				5970	if ( $string =~ /\G(.*?)(\{\|$pattern)/cgi ) {
232	186					555	my $match = $1;
233	186	100				1252	if ( $2 =~ /$pattern/i ) {
		50
234	157					296	$buffer .= $match;
235	157					312	push @tokens, $buffer;
236	157					745	$buffer = "";
237							} elsif ( $2 =~ /\{/ ) {
238	29					65	$buffer .= $match . "{";
239	29					49	my $numbraces=1;
240	29		100			189	while ($numbraces !=0 && pos $string < length $string) {
241	345					585	my $symbol = substr($string, pos $string, 1);
242	345					459	$buffer .= $symbol;
243	345	50				669	if ($symbol eq '{') {
		100
244	0					0	$numbraces ++;
245							} elsif ($symbol eq '}') {
246	27					36	$numbraces --;
247							}
248	345					1120	pos($string) ++;
249							}
250	29	100				136	if ($numbraces != 0) {
251	2					11	return ();
252							}
253							} else {
254	0					0	$buffer .= $match;
255							}
256							} else {
257	220		100			840	$buffer .= substr $string, (pos $string \|\| 0);
258	220					428	last;
259							}
260							}
261	236	50				632	push @tokens, $buffer if $buffer;
262	236					844	return @tokens;
263							}
264
265
266							1; # End of BibTeX::Parser
267
268
269							__END__