File Coverage

blib/lib/Net/Z3950/PQF.pm

Criterion	Covered	Total	%
statement	59	69	85.5
branch	22	28	78.5
condition	5	6	83.3
subroutine	9	11	81.8
pod	3	3	100.0
total	98	117	83.7

line	stmt	bran	cond	sub	pod	time	code
1							# $Id: PQF.pm,v 1.8 2007/10/05 12:12:34 mike Exp $
2
3							package Net::Z3950::PQF;
4
5	3			3		85255	use 5.006;
	3					10
	3					125
6	3			3		17	use strict;
	3					8
	3					143
7	3			3		16	use warnings;
	3					11
	3					93
8
9	3			3		1809	use Net::Z3950::PQF::Node;
	3					8
	3					2804
10
11							our $VERSION = '0.04';
12
13
14							=head1 NAME
15
16							Net::Z3950::PQF - Perl extension for parsing PQF (Prefix Query Format)
17
18							=head1 SYNOPSIS
19
20							use Net::Z3950::PQF;
21							$parser = new Net::Z3950::PQF();
22							$node = $parser->parse('@and @attr 1=1003 kernighan @attr 1=4 unix');
23							print $node->render(0);
24
25							=head1 DESCRIPTION
26
27							This library provides a parser for PQF (Prefix Query Format), an ugly
28							but precise string format for expressing Z39.50 Type-1 queries. This
29							format is widely used behind the scenes of Z39.50 applications, and is
30							also used extensively with test-harness programs such as the YAZ
31							command-line client, C. A few particularly misguided
32							souls have been known to type it by hand.
33
34							Unlike PQF itself, this module
35							is simple to use. Create a parser object, then pass PQF strings
36							into its C method to yield parse-trees. The trees are made
37							up of nodes whose types are subclasses of
38							C.
39							and have names of the form
40							C. You may find it helpful to use
41							C to visualise the structure of the returned
42							parse-trees.
43
44							What is a PQF parse-tree good for? Not much. You can render a
45							human-readable version by invoking the top node's C method,
46							which is probably useful only for debugging. Or you can turn it into
47							tree of nodes like those passed into SimpleServer search handlers
48							using C. If you want to do anything useful, such as
49							implementing an actual query server that understands PQF, you'll have
50							to walk the tree.
51
52							=head1 METHODS
53
54							=head2 new()
55
56							$parser = new Net::Z3950::PQF();
57
58							Creates a new parser object.
59
60							=cut
61
62							sub new {
63	2			2	1	27	my $class = shift();
64
65	2					13	return bless {
66							text => undef,
67							errmsg => undef,
68							}, $class;
69							}
70
71
72							=head2 parse()
73
74							$query = '@and @attr 1=1003 kernighan @attr 1=4 unix';
75							$node = $parser->parse($query);
76							if (!defined $node) {
77							die "parse($query) failed: " . $parser->errmsg();
78							}
79
80							Parses the PQF string provided as its argument. If an error occurs,
81							then an undefined value is returned, and the error message can be
82							obtained by calling the C method. Otherwise, the top node
83							of the parse tree is returned.
84
85							$node2 = $parser->parse($query, "zthes");
86							$node3 = $parser->parse($query, "1.2.840.10003.3.13");
87
88							A second argument may be provided after the query itself. If it is
89							provided, then it is taken to be either the name or the OID of a
90							default attribute set, which attributes specified in the query belong
91							to if no alternative attribute set is explicitly specified within the
92							query. When this second argument is absent, the default attribute set
93							is BIB-1.
94
95							=cut
96
97							sub parse {
98	21			21	1	12162	my $this = shift();
99	21					41	my($text, $attrset) = @_;
100	21	50				60	$attrset = "bib-1" if !defined $attrset;
101
102	21					344	$this->{text} = $text;
103	21					65	return $this->_parse($attrset, {});
104							}
105
106
107							# PRIVATE to parse();
108							#
109							# Underlying parse function. $attrset is the default attribute-set to
110							# use for attributes that are not specified with an explicit set, and
111							# $attrhash is hash of attributes (at most one per type per
112							# attribute-set) to be applied to all nodes below this point. The
113							# keys of this hash are of the form ":" and the values
114							# are the corresponding attribute values.
115							#
116							sub _parse {
117	67			67		96	my $this = shift();
118	67					90	my($attrset, $attrhash) = @_;
119
120	67					377	$this->{text} =~ s/^\s+//;
121
122							### This rather nasty hack for quoted terms doesn't recognised
123							# backslash-quoted embedded double quotes.
124	67	100				251	if ($this->{text} =~ s/^"(.*?)"//) {
125	3					10	return $this->_leaf('term', $1, $attrhash);
126							}
127
128							# Also recognise multi-word terms enclosed in {curly braces}
129	64	100				512	if ($this->{text} =~ s/^{(.*?)}//) {
130	1					3	return $this->_leaf('term', $1, $attrhash);
131							}
132
133	63					134	my $word = $this->_word();
134	63	100	100			671	if ($word eq '@attrset') {
		100	66
		100
		50
		100
135	2					7	$attrset = $this->_word();
136	2					6	return $this->_parse($attrset, $attrhash);
137
138							} elsif ($word eq '@attr') {
139	20					507	$word = $this->_word();
140	20	100				66	if ($word !~ /=/) {
141	4					8	$attrset = $word;
142	4					10	$word = $this->_word();
143							}
144	20					88	my($type, $val) = ($word =~ /(.)=(.)/);
145	20					56	my %h = %$attrhash;
146	20					54	$h{"$attrset:$type"} = $val;
147	20					160	return $this->_parse($attrset, \%h);
148
149							} elsif ($word eq '@and' \|\| $word eq '@or' \|\| $word eq '@not') {
150	12					148	my $sub1 = $this->_parse($attrset, $attrhash);
151	12					126	my $sub2 = $this->_parse($attrset, $attrhash);
152	12	100				38	if ($word eq '@and') {
		50
		0
153	4					38	return new Net::Z3950::PQF::AndNode($sub1, $sub2);
154							} elsif ($word eq '@or') {
155	8					46	return new Net::Z3950::PQF::OrNode($sub1, $sub2);
156							} elsif ($word eq '@not') {
157	0					0	return new Net::Z3950::PQF::NotNode($sub1, $sub2);
158							} else {
159	0					0	die "Houston, we have a problem";
160							}
161
162							} elsif ($word eq '@prox') {
163	0					0	return $this->_error("proximity not yet implemented");
164
165							} elsif ($word eq '@set') {
166	3					17	$word = $this->_word();
167	3					10	return $this->_leaf('rset', $word, $attrhash);
168							}
169
170							# It must be a bareword
171	26					60	return $this->_leaf('term', $word, $attrhash);
172							}
173
174
175							# PRIVATE to _parse();
176							sub _word {
177	92			92		172	my $this = shift();
178
179	92					227	$this->{text} =~ s/^\s+//;
180	92					352	$this->{text} =~ s/^(\S+)//;
181	92					267	return $1;
182							}
183
184
185							# PRIVATE to _parse();
186							sub _error {
187	0			0		0	my $this = shift();
188	0					0	my (@msg) = @_;
189
190	0					0	$this->{errmsg} = join("", @msg);
191	0					0	return undef;
192							}
193
194
195							# PRIVATE to _parse();
196							sub _leaf {
197	33			33		126	my $this = shift();
198	33					57	my($type, $word, $attrhash) = @_;
199
200	33					41	my @attrs;
201	33					136	foreach my $key (sort keys %$attrhash) {
202	24					144	my($attrset, $type) = split /:/, $key;
203	24					138	push @attrs, [ $attrset, $type, $attrhash->{$key} ];
204							}
205
206	33	100				95	if ($type eq 'term') {
		50
207	30					122	return new Net::Z3950::PQF::TermNode($word, @attrs);
208							} elsif ($type eq 'rset') {
209	3					39	return new Net::Z3950::PQF::RsetNode($word, @attrs);
210							} else {
211	0						die "_leaf() called with type='$type' (should be 'term' or 'rset')";
212							}
213							}
214
215
216							=head2 errmsg()
217
218							print $parser->errmsg();
219
220							Returns the last error-message generated by a failed attempt to parse
221							a query.
222
223							=cut
224
225							sub errmsg {
226	0			0	1		my $this = shift();
227	0						return $this->{errmsg};
228							}
229
230
231							=head1 SEE ALSO
232
233							The C module.
234
235							The definition of the Type-1 query in the Z39.50 standard, the
236							relevant section of which is on-line at
237							http://www.loc.gov/z3950/agency/markup/09.html#3.7
238
239							The documentation of Prefix Query Format in the YAZ Manual, the
240							relevant section of which is on-line at
241							http://indexdata.com/yaz/doc/tools.tkl#PQF
242
243							=head1 AUTHOR
244
245							Mike Taylor, Emike@indexdata.comE
246
247							=head1 COPYRIGHT AND LICENSE
248
249							Copyright 2004 by Index Data ApS.
250
251							This library is free software; you can redistribute it and/or modify
252							it under the same terms as Perl itself.
253
254							=cut
255
256
257							1;