File Coverage

blib/lib/Net/Z3950/PQF.pm
Criterion Covered Total %
statement 66 76 86.8
branch 24 30 80.0
condition 5 6 83.3
subroutine 10 12 83.3
pod 3 3 100.0
total 108 127 85.0


line stmt bran cond sub pod time code
1             # $Id: PQF.pm,v 1.8 2007/10/05 12:12:34 mike Exp $
2              
3             package Net::Z3950::PQF;
4              
5 4     4   269068 use 5.006;
  4         40  
6 4     4   21 use strict;
  4         6  
  4         113  
7 4     4   29 use warnings;
  4         7  
  4         145  
8              
9 4     4   1716 use Net::Z3950::PQF::Node;
  4         9  
  4         3909  
10              
11             our $VERSION = '1.0';
12              
13              
14             =head1 NAME
15              
16             Net::Z3950::PQF - Perl extension for parsing PQF (Prefix Query Format)
17              
18             =head1 SYNOPSIS
19              
20             use Net::Z3950::PQF;
21             $parser = new Net::Z3950::PQF();
22             $node = $parser->parse('@and @attr 1=1003 kernighan @attr 1=4 unix');
23             print $node->render(0);
24              
25             =head1 DESCRIPTION
26              
27             This library provides a parser for PQF (Prefix Query Format), an ugly
28             but precise string format for expressing Z39.50 Type-1 queries. This
29             format is widely used behind the scenes of Z39.50 applications, and is
30             also used extensively with test-harness programs such as the YAZ
31             command-line client, C. A few particularly misguided
32             souls have been known to type it by hand.
33              
34             Unlike PQF itself, this module
35             is simple to use. Create a parser object, then pass PQF strings
36             into its C method to yield parse-trees. The trees are made
37             up of nodes whose types are subclasses of
38             C.
39             and have names of the form
40             C. You may find it helpful to use
41             C to visualise the structure of the returned
42             parse-trees.
43              
44             What is a PQF parse-tree good for? Not much. You can render a
45             human-readable version by invoking the top node's C method,
46             which is probably useful only for debugging. Or you can turn it into
47             tree of nodes like those passed into SimpleServer search handlers
48             using C. If you want to do anything useful, such as
49             implementing an actual query server that understands PQF, you'll have
50             to walk the tree.
51              
52             =head1 METHODS
53              
54             =head2 new()
55              
56             $parser = new Net::Z3950::PQF();
57             $parser = new Net::Z3950::PQF({ 'Net::Z3950::PQF::AndNode' => 'MyApp::And' });
58              
59             Creates a new parser object.
60              
61             One optional argument may be provided. If present, it is interpreted
62             as a map of classnames. The keys are the names of classed generated by
63             this module (C, etc.) and the corresponding
64             values are the names of classes for the various nodes to be blessed
65             into instead of the standard classes.
66              
67             B Doing this will make the C and
68             C unavailable, since they are implemented using
69             private methods of the standard node classes.
70              
71             =cut
72              
73             sub new {
74 3     3 1 278 my $class = shift();
75 3         10 my($classnameMap) = @_;
76              
77 3         20 return bless {
78             text => undef,
79             errmsg => undef,
80             classnameMap => $classnameMap,
81             }, $class;
82             }
83              
84              
85             =head2 parse()
86              
87             $query = '@and @attr 1=1003 kernighan @attr 1=4 unix';
88             $node = $parser->parse($query);
89             if (!defined $node) {
90             die "parse($query) failed: " . $parser->errmsg();
91             }
92              
93             Parses the PQF string provided as its argument. If an error occurs,
94             then an undefined value is returned, and the error message can be
95             obtained by calling the C method. Otherwise, the top node
96             of the parse tree is returned.
97              
98             $node2 = $parser->parse($query, "zthes");
99             $node3 = $parser->parse($query, "1.2.840.10003.3.13");
100              
101             A second argument may be provided after the query itself. If it is
102             provided, then it is taken to be either the name or the OID of a
103             default attribute set, which attributes specified in the query belong
104             to if no alternative attribute set is explicitly specified within the
105             query. When this second argument is absent, the default attribute set
106             is BIB-1.
107              
108             =cut
109              
110             sub parse {
111 22     22 1 7176 my $this = shift();
112 22         44 my($text, $attrset) = @_;
113 22 50       60 $attrset = "bib-1" if !defined $attrset;
114              
115 22         48 $this->{text} = $text;
116 22         58 return $this->_parse($attrset, {});
117             }
118              
119              
120             # PRIVATE to parse();
121             #
122             # Underlying parse function. $attrset is the default attribute-set to
123             # use for attributes that are not specified with an explicit set, and
124             # $attrhash is hash of attributes (at most one per type per
125             # attribute-set) to be applied to all nodes below this point. The
126             # keys of this hash are of the form ":" and the values
127             # are the corresponding attribute values.
128             #
129             sub _parse {
130 74     74   103 my $this = shift();
131 74         170 my($attrset, $attrhash) = @_;
132              
133 74         239 $this->{text} =~ s/^\s+//;
134              
135             ### This rather nasty hack for quoted terms doesn't recognised
136             # backslash-quoted embedded double quotes.
137 74 100       221 if ($this->{text} =~ s/^"(.*?)"//) {
138 3         9 return $this->_leaf('term', $1, $attrhash);
139             }
140              
141             # Also recognise multi-word terms enclosed in {curly braces}
142 71 100       141 if ($this->{text} =~ s/^{(.*?)}//) {
143 1         5 return $this->_leaf('term', $1, $attrhash);
144             }
145              
146 70         120 my $word = $this->_word();
147 70 100 100     369 if ($word eq '@attrset') {
    100 66        
    100          
    50          
    100          
148 2         5 $attrset = $this->_word();
149 2         7 return $this->_parse($attrset, $attrhash);
150              
151             } elsif ($word eq '@attr') {
152 22         41 $word = $this->_word();
153 22 100       67 if ($word !~ /=/) {
154 4         8 $attrset = $word;
155 4         9 $word = $this->_word();
156             }
157 22         92 my($type, $val) = ($word =~ /(.*)=(.*)/);
158 22         77 my %h = %$attrhash;
159 22         93 $h{"$attrset:$type"} = $val;
160 22         72 return $this->_parse($attrset, \%h);
161              
162             } elsif ($word eq '@and' || $word eq '@or' || $word eq '@not') {
163 14         57 my $sub1 = $this->_parse($attrset, $attrhash);
164 14         32 my $sub2 = $this->_parse($attrset, $attrhash);
165 14 100       75 if ($word eq '@and') {
    50          
    0          
166 5         39 return $this->_maybe_rebless(new Net::Z3950::PQF::AndNode($sub1, $sub2));
167             } elsif ($word eq '@or') {
168 9         64 return $this->_maybe_rebless(new Net::Z3950::PQF::OrNode($sub1, $sub2));
169             } elsif ($word eq '@not') {
170 0         0 return $this->_maybe_rebless(new Net::Z3950::PQF::NotNode($sub1, $sub2));
171             } else {
172 0         0 die "Houston, we have a problem";
173             }
174              
175             } elsif ($word eq '@prox') {
176 0         0 return $this->_error("proximity not yet implemented");
177              
178             } elsif ($word eq '@set') {
179 4         11 $word = $this->_word();
180 4         14 return $this->_leaf('rset', $word, $attrhash);
181             }
182              
183             # It must be a bareword
184 28         74 return $this->_leaf('term', $word, $attrhash);
185             }
186              
187              
188             # PRIVATE to _parse();
189             sub _word {
190 102     102   131 my $this = shift();
191              
192 102         218 $this->{text} =~ s/^\s+//;
193 102         292 $this->{text} =~ s/^(\S+)//;
194 102         262 return $1;
195             }
196              
197              
198             # PRIVATE to _parse();
199             sub _error {
200 0     0   0 my $this = shift();
201 0         0 my (@msg) = @_;
202              
203 0         0 $this->{errmsg} = join("", @msg);
204 0         0 return undef;
205             }
206              
207              
208             # PRIVATE to _parse();
209             sub _leaf {
210 36     36   53 my $this = shift();
211 36         79 my($type, $word, $attrhash) = @_;
212              
213 36         53 my @attrs;
214 36         123 foreach my $key (sort keys %$attrhash) {
215 26         86 my($attrset, $type) = split /:/, $key;
216 26         121 push @attrs, [ $attrset, $type, $attrhash->{$key} ];
217             }
218              
219 36 100       81 if ($type eq 'term') {
    50          
220 32         126 return $this->_maybe_rebless(new Net::Z3950::PQF::TermNode($word, @attrs));
221             } elsif ($type eq 'rset') {
222 4         38 return $this->_maybe_rebless(new Net::Z3950::PQF::RsetNode($word, @attrs));
223             } else {
224 0         0 die "_leaf() called with type='$type' (should be 'term' or 'rset')";
225             }
226             }
227              
228              
229             # PRIVATE to _parse() and _leaf();
230             sub _maybe_rebless {
231 50     50   84 my $this = shift();
232 50         76 my($node) = @_;
233              
234 50         80 my $oldClassname = ref $node;
235 50         83 my $newClassname = $this->{classnameMap}->{$oldClassname};
236 50 100       102 if ($newClassname) {
237 5         32 bless $node, $newClassname;
238             }
239              
240 50         187 return $node;
241             }
242              
243              
244             =head2 errmsg()
245              
246             print $parser->errmsg();
247              
248             Returns the last error-message generated by a failed attempt to parse
249             a query.
250              
251             =cut
252              
253             sub errmsg {
254 0     0 1   my $this = shift();
255 0           return $this->{errmsg};
256             }
257              
258              
259             =head1 SEE ALSO
260              
261             The C module.
262              
263             The definition of the Type-1 query in the Z39.50 standard, the
264             relevant section of which is on-line at
265             http://www.loc.gov/z3950/agency/markup/09.html#3.7
266              
267             The documentation of Prefix Query Format in the YAZ Manual, the
268             relevant section of which is on-line at
269             http://indexdata.com/yaz/doc/tools.tkl#PQF
270              
271             =head1 AUTHOR
272              
273             Mike Taylor, Emike@indexdata.comE
274              
275             =head1 COPYRIGHT AND LICENSE
276              
277             Copyright 2004 by Index Data ApS.
278              
279             This library is free software; you can redistribute it and/or modify
280             it under the same terms as Perl itself.
281              
282             =cut
283              
284              
285             1;