File Coverage

lib/XML/XPath.pm
Criterion Covered Total %
statement 13 15 86.6
branch n/a
condition n/a
subroutine 5 5 100.0
pod n/a
total 18 20 90.0


line stmt bran cond sub pod time code
1             package XML::XPath;
2              
3             =head1 NAME
4              
5             XML::XPath - Parse and evaluate XPath statements.
6              
7             =head1 VERSION
8              
9             Version 1.41
10              
11             =cut
12              
13 1     1   1801 use strict; use warnings;
  1     1   2  
  1         30  
  1         5  
  1         2  
  1         36  
14 1     1   4 use vars qw($VERSION $AUTOLOAD $revision);
  1         1  
  1         80  
15              
16             $VERSION = '1.41';
17             $XML::XPath::Namespaces = 1;
18             $XML::XPath::ParseParamEnt = 1;
19             $XML::XPath::Debug = 0;
20              
21 1     1   542 use Data::Dumper;
  1         6933  
  1         178  
22 1     1   500 use XML::XPath::XMLParser;
  0            
  0            
23             use XML::XPath::Parser;
24             use IO::File;
25              
26             # Parameters for new()
27             my @options = qw(
28             filename
29             parser
30             xml
31             ioref
32             context
33             );
34              
35             =head1 DESCRIPTION
36              
37             This module aims to comply exactly to the XPath specification at http://www.w3.org/TR/xpath
38             and yet allow extensions to be added in the form of functions.Modules such as XSLT
39             and XPointer may need to do this as they support functionality beyond XPath.
40              
41             =head1 SYNOPSIS
42              
43             use XML::XPath;
44             use XML::XPath::XMLParser;
45              
46             my $xp = XML::XPath->new(filename => 'test.xhtml');
47              
48             my $nodeset = $xp->find('/html/body/p'); # find all paragraphs
49              
50             foreach my $node ($nodeset->get_nodelist) {
51             print "FOUND\n\n",
52             XML::XPath::XMLParser::as_string($node),
53             "\n\n";
54             }
55              
56             =head1 DETAILS
57              
58             There is an awful lot to all of this, so bear with it - if you stick it out it
59             should be worth it. Please get a good understanding of XPath by reading the spec
60             before asking me questions. All of the classes and parts herein are named to be
61             synonymous with the names in the specification, so consult that if you don't
62             understand why I'm doing something in the code.
63              
64             =head1 METHODS
65              
66             The API of XML::XPath itself is extremely simple to allow you to get going almost
67             immediately. The deeper API's are more complex, but you shouldn't have to touch
68             most of that.
69              
70             =head2 new()
71              
72             This constructor follows the often seen named parameter method call. Parameters
73             you can use are: filename, parser, xml, ioref and context. The filename parameter
74             specifies an XML file to parse. The xml parameter specifies a string to parse,
75             and the ioref parameter specifies an ioref to parse. The context option allows
76             you to specify a context node. The context node has to be in the format of a node
77             as specified in L. The 4 parameters filename, xml, ioref
78             and context are mutually exclusive - you should only specify one (if you specify
79             anything other than context, the context node is the root of your document). The
80             parser option allows you to pass in an already prepared XML::Parser object, to
81             save you having to create more than one in your application (if, for example, you
82             are doing more than just XPath).
83              
84             my $xp = XML::XPath->new( context => $node );
85              
86             It is very much recommended that you use only 1 XPath object throughout the life
87             of your application. This is because the object (and it's sub-objects) maintain
88             certain bits of state information that will be useful (such as XPath variables)
89             to later calls to find(). It's also a good idea because you'll use less memory
90             this way.
91              
92             =cut
93              
94             sub new {
95             my $proto = shift;
96             my $class = ref($proto) || $proto;
97              
98             my(%args);
99             # Try to figure out what the user passed
100             if ($#_ == 0) { # passed a scalar
101             my $string = $_[0];
102             if ($string =~ m{<.*?>}s) { # it's an XML string
103             $args{'xml'} = $string;
104             } elsif (ref($string)) { # read XML from file handle
105             $args{'ioref'} = $string;
106             } elsif ($string eq '-') { # read XML from stdin
107             $args{'ioref'} = IO::File->new($string);
108             } else { # read XML from a file
109             $args{'filename'} = $string;
110             }
111             } else { # passed a hash or hash reference
112             # just pass the parameters on to the XPath constructor
113             %args = ((ref($_[0]) eq "HASH") ? %{$_[0]} : @_);
114             }
115              
116             if ($args{filename} && (!-e $args{filename} || !-r $args{filename})) {
117             die "Cannot open file '$args{filename}'";
118             }
119              
120             my %hash = map(( "_$_" => $args{$_} ), @options);
121             $hash{path_parser} = XML::XPath::Parser->new();
122             return bless \%hash, $class;
123             }
124              
125             =head2 find($path, [$context])
126              
127             The find function takes an XPath expression (a string) and returns either an XML::XPath::NodeSet
128             object containing the nodes it found (or empty if no nodes matched the path), or
129             one of L (a string), L or L.
130             It should always return something - and you can use ->isa() to find out what it
131             returned. If you need to check how many nodes it found you should check $nodeset->size.
132             See L. An optional second parameter of a context node allows
133             you to use this method repeatedly, for example XSLT needs to do this.
134              
135             =cut
136              
137             sub find {
138             my ($self, $path, $context) = @_;
139              
140             die "No path to find" unless $path;
141              
142             if (!defined $context) {
143             $context = $self->get_context;
144             }
145              
146             if (!defined $context) {
147             # Still no context? Need to parse.
148             my $parser = XML::XPath::XMLParser->new(
149             filename => $self->get_filename,
150             xml => $self->get_xml,
151             ioref => $self->get_ioref,
152             parser => $self->get_parser,
153             );
154             $context = $parser->parse;
155             $self->set_context($context);
156             print "CONTEXT:\n", Dumper([$context], ['context']) if $XML::XPath::Debug;
157             }
158              
159             my $parsed_path = $self->{path_parser}->parse($path);
160             print "\n\nPATH: ", $parsed_path->as_string, "\n\n" if $XML::XPath::Debug;
161              
162             #warn "evaluating path\n";
163             return $parsed_path->evaluate($context);
164             }
165              
166             =head2 findnodes($path, [$context])
167              
168             Returns a list of nodes found by $path, optionally in context $context. In scalar
169             context returns an XML::XPath::NodeSet object.
170              
171             =cut
172              
173             sub findnodes {
174             my ($self, $path, $context) = @_;
175              
176             my $results = $self->find($path, $context);
177              
178             if ($results->isa('XML::XPath::NodeSet')) {
179             return wantarray ? $results->get_nodelist : $results;
180             }
181              
182             # warn("findnodes returned a ", ref($results), " object\n") if $XML::XPath::Debug;
183             return wantarray ? () : XML::XPath::NodeSet->new();
184             }
185              
186             =head2 matches($node, $path, [$context])
187              
188             Returns true if the node matches the path (optionally in context $context).
189              
190             =cut
191              
192             sub matches {
193             my $self = shift;
194             my ($node, $path, $context) = @_;
195              
196             my @nodes = $self->findnodes($path, $context);
197              
198             if (grep { "$node" eq "$_" } @nodes) {
199             return 1;
200             }
201             return;
202             }
203              
204             =head2 findnodes_as_string($path, [$context])
205              
206             Returns the nodes found reproduced as XML.The result isn't guaranteed to be valid
207             XML though.
208              
209             =cut
210              
211             sub findnodes_as_string {
212             my ($self, $path, $context) = @_;
213              
214             my $results = $self->find($path, $context);
215              
216             if ($results->isa('XML::XPath::NodeSet')) {
217             return join('', map { $_->toString } $results->get_nodelist);
218             }
219             elsif ($results->isa('XML::XPath::Node')) {
220             return $results->toString;
221             }
222             else {
223             return XML::XPath::Node::XMLescape($results->value);
224             }
225             }
226              
227             =head2 findvalue($path, [$context])
228              
229             Returns either a C, a C or a C
230             object.If the path returns a NodeSet,$nodeset->to_literal is called automatically
231             for you (and thus a C is returned).Note that for each of the
232             objects stringification is overloaded, so you can just print the value found, or
233             manipulate it in the ways you would a normal perl value (e.g. using regular expressions).
234              
235             =cut
236              
237             sub findvalue {
238             my ($self, $path, $context) = @_;
239              
240             my $results = $self->find($path, $context);
241             if ($results->isa('XML::XPath::NodeSet')) {
242             return $results->to_literal;
243             }
244              
245             return $results;
246             }
247              
248             =head2 exists($path, [$context])
249              
250             Returns true if the given path exists.
251              
252             =cut
253              
254             sub exists {
255             my ($self, $path, $context) = @_;
256              
257             $path = '/' if (!defined $path);
258             my @nodeset = $self->findnodes($path, $context);
259             return 1 if (scalar( @nodeset ));
260             return 0;
261             }
262              
263             sub getNodeAsXML {
264             my ($self, $node_path) = @_;
265              
266             $node_path = '/' if (!defined $node_path);
267             if (ref($node_path)) {
268             return $node_path->as_string();
269             } else {
270             return $self->findnodes_as_string($node_path);
271             }
272             }
273              
274             =head2 getNodeText($path)
275              
276             Returns the L for a particular XML node. Returns a string if
277             exists or '' (empty string) if the node doesn't exist.
278              
279             =cut
280              
281             sub getNodeText {
282             my ($self, $node_path) = @_;
283              
284             if (ref($node_path)) {
285             return $node_path->string_value();
286             } else {
287             return $self->findvalue($node_path);
288             }
289             }
290              
291             =head2 setNodeText($path, $text)
292              
293             Sets the text string for a particular XML node. The node can be an element or an
294             attribute. If the node to be set is an attribute, and the attribute node does not
295             exist, it will be created automatically.
296              
297             =cut
298              
299             sub setNodeText {
300             my ($self, $node_path, $new_text) = @_;
301              
302             my $nodeset = $self->findnodes($node_path);
303             return undef if (!defined $nodeset);
304              
305             my @nodes = $nodeset->get_nodelist;
306             if ($#nodes < 0) {
307             if ($node_path =~ m{/(?:@|attribute::)([^/]+)$}) {
308             # attribute not found, so try to create it
309              
310             # Based upon the 'perlvar' documentation located at:
311             # http://perldoc.perl.org/perlvar.html
312             #
313             # The @LAST_MATCH_START section indicates that there's a more efficient
314             # version of $` that can be used.
315             #
316             # Specifically, after a match against some variable $var:
317             # * $` is the same as substr($var, 0, $-[0])
318             my $parent_path = substr($node_path, 0, $-[0]);
319             my $attr = $1;
320             $nodeset = $self->findnodes($parent_path);
321             return undef if (!defined $nodeset);
322             foreach my $node ($nodeset->get_nodelist) {
323             my $newnode = XML::XPath::Node::Attribute->new($attr, $new_text);
324             return undef if (!defined $newnode);
325             $node->appendAttribute($newnode);
326             }
327             } else {
328             return undef;
329             }
330             }
331              
332             foreach my $node (@nodes) {
333             if ($node->getNodeType == XML::XPath::Node::ATTRIBUTE_NODE) {
334             $node->setNodeValue($new_text);
335             } else {
336             foreach my $delnode ($node->getChildNodes()) {
337             $node->removeChild($delnode);
338             }
339             my $newnode = XML::XPath::Node::Text->new($new_text);
340             return undef if (!defined $newnode);
341             $node->appendChild($newnode);
342             }
343             }
344              
345             return 1;
346             }
347              
348             =head2 createNode($path)
349              
350             Creates the node matching the C<$path> given. If part of the path given or all of
351             the path do not exist, the necessary nodes will be created automatically.
352              
353             =cut
354              
355             sub createNode {
356             my ($self, $node_path) = @_;
357              
358             my $path_steps = $self->{path_parser}->parse($node_path);
359             my @path_steps = ();
360             my (undef, @path_steps_lhs) = @{$path_steps->get_lhs()};
361             foreach my $step (@path_steps_lhs) { # precompute paths as string
362             my $string = $step->as_string();
363             push(@path_steps, $string) if (defined $string && $string ne "");
364             }
365              
366             my $prev_node = undef;
367             my $nodeset = undef;
368             my $nodes = undef;
369             my $p = undef;
370             my $test_path = "";
371              
372             # Start with the deepest node, working up the path (right to left),
373             # trying to find a node that exists.
374             for ($p = $#path_steps_lhs; $p >= 0; $p--) {
375             my $path = $path_steps_lhs[$p];
376             $test_path = "(/" . join("/", @path_steps[0..$p]) . ")";
377              
378             $nodeset = $self->findnodes($test_path);
379             return undef if (!defined $nodeset); # error looking for node
380             $nodes = $nodeset->size;
381             return undef if ($nodes > 1); # too many paths - path not specific enough
382             if ($nodes == 1) { # found a node -- need to create nodes below it
383             $prev_node = $nodeset->get_node(1);
384             last;
385             }
386             }
387             if (!defined $prev_node) {
388             my @root_nodes = $self->findnodes('/')->get_nodelist();
389             $prev_node = $root_nodes[0];
390             }
391              
392             # We found a node that exists, or we'll start at the root.
393             # Create all lower nodes working left to right along the path.
394             for ($p++ ; $p <= $#path_steps_lhs; $p++) {
395             my $path = $path_steps_lhs[$p];
396             my $newnode = undef;
397              
398             my $axis = $path->{axis};
399             my $name = $path->{literal};
400              
401             do {
402             if ($axis =~ /^child$/i) {
403             if ($name =~ /(\S+):(\S+)/) {
404             $newnode = XML::XPath::Node::Element->new($name, $1);
405             } else {
406             $newnode = XML::XPath::Node::Element->new($name);
407             }
408             return undef if (!defined $newnode); # could not create new node
409             $prev_node->appendChild($newnode);
410             } elsif ($axis =~ /^attribute$/i) {
411             if ($name =~ /(\S+):(\S+)/) {
412             $newnode = XML::XPath::Node::Attribute->new($name, "", $1);
413             } else {
414             $newnode = XML::XPath::Node::Attribute->new($name, "");
415             }
416             return undef if (!defined $newnode); # could not create new node
417             $prev_node->appendAttribute($newnode);
418             }
419              
420             $test_path = "(/" . join("/", @path_steps[0..$p]) . ")";
421             $nodeset = $self->findnodes($test_path);
422             $nodes = $nodeset->size;
423             die "failed to find node '$test_path'" if (!defined $nodeset); # error looking for node
424             } while ($nodes < 1);
425              
426             $prev_node = $nodeset->get_node(1);
427             }
428              
429             return $prev_node;
430             }
431              
432             sub get_filename {
433             my $self = shift;
434             $self->{_filename};
435             }
436              
437             sub set_filename {
438             my $self = shift;
439             $self->{_filename} = shift;
440             }
441              
442             sub get_parser {
443             my $self = shift;
444             $self->{_parser};
445             }
446              
447             sub set_parser {
448             my $self = shift;
449             $self->{_parser} = shift;
450             }
451              
452             sub get_xml {
453             my $self = shift;
454             $self->{_xml};
455             }
456              
457             sub set_xml {
458             my $self = shift;
459             $self->{_xml} = shift;
460             }
461              
462             sub get_ioref {
463             my $self = shift;
464             $self->{_ioref};
465             }
466              
467             sub set_ioref {
468             my $self = shift;
469             $self->{_ioref} = shift;
470             }
471              
472             sub get_context {
473             my $self = shift;
474             $self->{_context};
475             }
476              
477             sub set_context {
478             my $self = shift;
479             $self->{_context} = shift;
480             }
481              
482             sub cleanup {
483             my $self = shift;
484             if ($XML::XPath::SafeMode) {
485             my $context = $self->get_context;
486             return unless $context;
487             $context->dispose;
488             }
489             }
490              
491             =head2 set_namespace($prefix, $uri)
492              
493             Sets the namespace prefix mapping to the uri.
494              
495             Normally in C the prefixes in XPath node test take their context from
496             the current node. This means that foo:bar will always match an element
497             regardless of the namespace that the prefix foo is mapped to (which might even
498             change within the document, resulting in unexpected results). In order to make
499             prefixes in XPath node tests actually map to a real URI, you need to enable that
500             via a call to the set_namespace method of your C object.
501              
502             =cut
503              
504             sub set_namespace {
505             my $self = shift;
506             my ($prefix, $expanded) = @_;
507             $self->{path_parser}->set_namespace($prefix, $expanded);
508             }
509              
510             =head2 clear_namespaces()
511              
512             Clears all previously set namespace mappings.
513              
514             =cut
515              
516             sub clear_namespaces {
517             my $self = shift;
518             $self->{path_parser}->clear_namespaces();
519             }
520              
521             =head2 $XML::XPath::Namespaces
522              
523             Set this to 0 if you I want namespace processing to occur. This will make
524             everything a little (tiny) bit faster, but you'll suffer for it, probably.
525              
526             =head1 Node Object Model
527              
528             See L, L,
529             L, L,
530             L, L,
531             and L.
532              
533             =head1 On Garbage Collection
534              
535             XPath nodes work in a special way that allows circular references, and yet still
536             lets Perl's reference counting garbage collector to clean up the nodes after use.
537             This should be totally transparent to the user, with one caveat: B
538             your tree before letting go of a sub-tree,consider that playing with fire and you
539             may get burned>. What does this mean to the average user? Not much. Provided you
540             don't free (or let go out of scope) either the tree you passed to XML::XPath->new,
541             or if you didn't pass a tree, and passed a filename or IO-ref, then provided you
542             don't let the XML::XPath object go out of scope before you let results of find()
543             and its friends go out of scope, then you'll be fine. Even if you B let the
544             tree go out of scope before results, you'll probably still be fine. The only case
545             where you may get stung is when the last part of your path/query is either an
546             ancestor or parent axis. In that case the worst that will happen is you'll end up
547             with a circular reference that won't get cleared until interpreter destruction
548             time.You can get around that by explicitly calling $node->DESTROY on each of your
549             result nodes, if you really need to do that.
550              
551             Mail me direct if that's not clear. Note that it's not doom and gloom. It's by no
552             means perfect,but the worst that will happen is a long running process could leak
553             memory. Most long running processes will therefore be able to explicitly be
554             careful not to free the tree (or XML::XPath object) before freeing results.AxKit,
555             an application that uses XML::XPath, does this and I didn't have to make any
556             changes to the code - it's already sensible programming.
557              
558             If you I don't want all this to happen, then set the variable $XML::XPath::SafeMode,
559             and call $xp->cleanup() on the XML::XPath object when you're finished, or $tree->dispose()
560             if you have a tree instead.
561              
562             =head1 Example
563              
564             Please see the test files in t/ for examples on how to use XPath.
565              
566             =head1 AUTHOR
567              
568             Original author Matt Sergeant, C<< >>
569              
570             Currently maintained by Mohammad S Anwar, C<< >>
571              
572             =head1 SEE ALSO
573              
574             L, L, L,
575             L, L, L,
576             L.
577              
578             =head1 LICENSE AND COPYRIGHT
579              
580             This module is copyright 2000 AxKit.com Ltd. This is free software, and as such
581             comes with NO WARRANTY. No dates are used in this module. You may distribute this
582             module under the terms of either the Gnu GPL, or the Artistic License (the same
583             terms as Perl itself).
584              
585             For support, please subscribe to the L
586             mailing list at the URL
587              
588             =cut
589              
590             1; # End of XML::XPath