File Coverage

blib/lib/Net/Z3950/PQF.pm

Criterion	Covered	Total	%
statement	66	76	86.8
branch	24	30	80.0
condition	5	6	83.3
subroutine	10	12	83.3
pod	3	3	100.0
total	108	127	85.0

line	stmt	bran	cond	sub	pod	time	code
1							# $Id: PQF.pm,v 1.8 2007/10/05 12:12:34 mike Exp $
2
3							package Net::Z3950::PQF;
4
5	4			4		269068	use 5.006;
	4					40
6	4			4		21	use strict;
	4					6
	4					113
7	4			4		29	use warnings;
	4					7
	4					145
8
9	4			4		1716	use Net::Z3950::PQF::Node;
	4					9
	4					3909
10
11							our $VERSION = '1.0';
12
13
14							=head1 NAME
15
16							Net::Z3950::PQF - Perl extension for parsing PQF (Prefix Query Format)
17
18							=head1 SYNOPSIS
19
20							use Net::Z3950::PQF;
21							$parser = new Net::Z3950::PQF();
22							$node = $parser->parse('@and @attr 1=1003 kernighan @attr 1=4 unix');
23							print $node->render(0);
24
25							=head1 DESCRIPTION
26
27							This library provides a parser for PQF (Prefix Query Format), an ugly
28							but precise string format for expressing Z39.50 Type-1 queries. This
29							format is widely used behind the scenes of Z39.50 applications, and is
30							also used extensively with test-harness programs such as the YAZ
31							command-line client, C. A few particularly misguided
32							souls have been known to type it by hand.
33
34							Unlike PQF itself, this module
35							is simple to use. Create a parser object, then pass PQF strings
36							into its C method to yield parse-trees. The trees are made
37							up of nodes whose types are subclasses of
38							C.
39							and have names of the form
40							C. You may find it helpful to use
41							C to visualise the structure of the returned
42							parse-trees.
43
44							What is a PQF parse-tree good for? Not much. You can render a
45							human-readable version by invoking the top node's C method,
46							which is probably useful only for debugging. Or you can turn it into
47							tree of nodes like those passed into SimpleServer search handlers
48							using C. If you want to do anything useful, such as
49							implementing an actual query server that understands PQF, you'll have
50							to walk the tree.
51
52							=head1 METHODS
53
54							=head2 new()
55
56							$parser = new Net::Z3950::PQF();
57							$parser = new Net::Z3950::PQF({ 'Net::Z3950::PQF::AndNode' => 'MyApp::And' });
58
59							Creates a new parser object.
60
61							One optional argument may be provided. If present, it is interpreted
62							as a map of classnames. The keys are the names of classed generated by
63							this module (C, etc.) and the corresponding
64							values are the names of classes for the various nodes to be blessed
65							into instead of the standard classes.
66
67							B Doing this will make the C and
68							C unavailable, since they are implemented using
69							private methods of the standard node classes.
70
71							=cut
72
73							sub new {
74	3			3	1	278	my $class = shift();
75	3					10	my($classnameMap) = @_;
76
77	3					20	return bless {
78							text => undef,
79							errmsg => undef,
80							classnameMap => $classnameMap,
81							}, $class;
82							}
83
84
85							=head2 parse()
86
87							$query = '@and @attr 1=1003 kernighan @attr 1=4 unix';
88							$node = $parser->parse($query);
89							if (!defined $node) {
90							die "parse($query) failed: " . $parser->errmsg();
91							}
92
93							Parses the PQF string provided as its argument. If an error occurs,
94							then an undefined value is returned, and the error message can be
95							obtained by calling the C method. Otherwise, the top node
96							of the parse tree is returned.
97
98							$node2 = $parser->parse($query, "zthes");
99							$node3 = $parser->parse($query, "1.2.840.10003.3.13");
100
101							A second argument may be provided after the query itself. If it is
102							provided, then it is taken to be either the name or the OID of a
103							default attribute set, which attributes specified in the query belong
104							to if no alternative attribute set is explicitly specified within the
105							query. When this second argument is absent, the default attribute set
106							is BIB-1.
107
108							=cut
109
110							sub parse {
111	22			22	1	7176	my $this = shift();
112	22					44	my($text, $attrset) = @_;
113	22	50				60	$attrset = "bib-1" if !defined $attrset;
114
115	22					48	$this->{text} = $text;
116	22					58	return $this->_parse($attrset, {});
117							}
118
119
120							# PRIVATE to parse();
121							#
122							# Underlying parse function. $attrset is the default attribute-set to
123							# use for attributes that are not specified with an explicit set, and
124							# $attrhash is hash of attributes (at most one per type per
125							# attribute-set) to be applied to all nodes below this point. The
126							# keys of this hash are of the form ":" and the values
127							# are the corresponding attribute values.
128							#
129							sub _parse {
130	74			74		103	my $this = shift();
131	74					170	my($attrset, $attrhash) = @_;
132
133	74					239	$this->{text} =~ s/^\s+//;
134
135							### This rather nasty hack for quoted terms doesn't recognised
136							# backslash-quoted embedded double quotes.
137	74	100				221	if ($this->{text} =~ s/^"(.*?)"//) {
138	3					9	return $this->_leaf('term', $1, $attrhash);
139							}
140
141							# Also recognise multi-word terms enclosed in {curly braces}
142	71	100				141	if ($this->{text} =~ s/^{(.*?)}//) {
143	1					5	return $this->_leaf('term', $1, $attrhash);
144							}
145
146	70					120	my $word = $this->_word();
147	70	100	100			369	if ($word eq '@attrset') {
		100	66
		100
		50
		100
148	2					5	$attrset = $this->_word();
149	2					7	return $this->_parse($attrset, $attrhash);
150
151							} elsif ($word eq '@attr') {
152	22					41	$word = $this->_word();
153	22	100				67	if ($word !~ /=/) {
154	4					8	$attrset = $word;
155	4					9	$word = $this->_word();
156							}
157	22					92	my($type, $val) = ($word =~ /(.)=(.)/);
158	22					77	my %h = %$attrhash;
159	22					93	$h{"$attrset:$type"} = $val;
160	22					72	return $this->_parse($attrset, \%h);
161
162							} elsif ($word eq '@and' \|\| $word eq '@or' \|\| $word eq '@not') {
163	14					57	my $sub1 = $this->_parse($attrset, $attrhash);
164	14					32	my $sub2 = $this->_parse($attrset, $attrhash);
165	14	100				75	if ($word eq '@and') {
		50
		0
166	5					39	return $this->_maybe_rebless(new Net::Z3950::PQF::AndNode($sub1, $sub2));
167							} elsif ($word eq '@or') {
168	9					64	return $this->_maybe_rebless(new Net::Z3950::PQF::OrNode($sub1, $sub2));
169							} elsif ($word eq '@not') {
170	0					0	return $this->_maybe_rebless(new Net::Z3950::PQF::NotNode($sub1, $sub2));
171							} else {
172	0					0	die "Houston, we have a problem";
173							}
174
175							} elsif ($word eq '@prox') {
176	0					0	return $this->_error("proximity not yet implemented");
177
178							} elsif ($word eq '@set') {
179	4					11	$word = $this->_word();
180	4					14	return $this->_leaf('rset', $word, $attrhash);
181							}
182
183							# It must be a bareword
184	28					74	return $this->_leaf('term', $word, $attrhash);
185							}
186
187
188							# PRIVATE to _parse();
189							sub _word {
190	102			102		131	my $this = shift();
191
192	102					218	$this->{text} =~ s/^\s+//;
193	102					292	$this->{text} =~ s/^(\S+)//;
194	102					262	return $1;
195							}
196
197
198							# PRIVATE to _parse();
199							sub _error {
200	0			0		0	my $this = shift();
201	0					0	my (@msg) = @_;
202
203	0					0	$this->{errmsg} = join("", @msg);
204	0					0	return undef;
205							}
206
207
208							# PRIVATE to _parse();
209							sub _leaf {
210	36			36		53	my $this = shift();
211	36					79	my($type, $word, $attrhash) = @_;
212
213	36					53	my @attrs;
214	36					123	foreach my $key (sort keys %$attrhash) {
215	26					86	my($attrset, $type) = split /:/, $key;
216	26					121	push @attrs, [ $attrset, $type, $attrhash->{$key} ];
217							}
218
219	36	100				81	if ($type eq 'term') {
		50
220	32					126	return $this->_maybe_rebless(new Net::Z3950::PQF::TermNode($word, @attrs));
221							} elsif ($type eq 'rset') {
222	4					38	return $this->_maybe_rebless(new Net::Z3950::PQF::RsetNode($word, @attrs));
223							} else {
224	0					0	die "_leaf() called with type='$type' (should be 'term' or 'rset')";
225							}
226							}
227
228
229							# PRIVATE to _parse() and _leaf();
230							sub _maybe_rebless {
231	50			50		84	my $this = shift();
232	50					76	my($node) = @_;
233
234	50					80	my $oldClassname = ref $node;
235	50					83	my $newClassname = $this->{classnameMap}->{$oldClassname};
236	50	100				102	if ($newClassname) {
237	5					32	bless $node, $newClassname;
238							}
239
240	50					187	return $node;
241							}
242
243
244							=head2 errmsg()
245
246							print $parser->errmsg();
247
248							Returns the last error-message generated by a failed attempt to parse
249							a query.
250
251							=cut
252
253							sub errmsg {
254	0			0	1		my $this = shift();
255	0						return $this->{errmsg};
256							}
257
258
259							=head1 SEE ALSO
260
261							The C module.
262
263							The definition of the Type-1 query in the Z39.50 standard, the
264							relevant section of which is on-line at
265							http://www.loc.gov/z3950/agency/markup/09.html#3.7
266
267							The documentation of Prefix Query Format in the YAZ Manual, the
268							relevant section of which is on-line at
269							http://indexdata.com/yaz/doc/tools.tkl#PQF
270
271							=head1 AUTHOR
272
273							Mike Taylor, Emike@indexdata.comE
274
275							=head1 COPYRIGHT AND LICENSE
276
277							Copyright 2004 by Index Data ApS.
278
279							This library is free software; you can redistribute it and/or modify
280							it under the same terms as Perl itself.
281
282							=cut
283
284
285							1;