File Coverage

blib/lib/XML/Easy/NodeBasics.pm
Criterion Covered Total %
statement 101 102 99.0
branch 33 34 97.0
condition 3 3 100.0
subroutine 26 26 100.0
pod 14 14 100.0
total 177 179 98.8


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             XML::Easy::NodeBasics - basic manipulation of XML data nodes
4              
5             =head1 SYNOPSIS
6              
7             use XML::Easy::NodeBasics qw(xml_content_object xml_element);
8              
9             $content = xml_content_object("this", "&", "that");
10             $content = xml_content_object(@sublems);
11              
12             $element = xml_element("a", { href => "there" }, "there");
13             $element = xml_element("div", @subelems);
14              
15             use XML::Easy::NodeBasics qw(
16             xml_c_content_object xml_c_content_twine);
17              
18             $content = xml_c_content_object($content);
19             $twine = xml_c_content_twine($content);
20              
21             use XML::Easy::NodeBasics qw(
22             xml_e_type_name
23             xml_e_attributes xml_e_attribute
24             xml_e_content_object);
25              
26             $type_name = xml_e_type_name($element);
27             $attributes = xml_e_attributes($element);
28             $href = xml_e_attribute($element, "href");
29             $content = xml_e_content_object($element);
30              
31             use XML::Easy::NodeBasics qw(
32             xml_c_equal xml_e_equal xml_c_unequal xml_e_unequal);
33              
34             if(xml_c_equal($content0, $content1)) { ...
35             if(xml_e_equal($element0, $element1)) { ...
36             if(xml_c_unequal($content0, $content1)) { ...
37             if(xml_e_unequal($element0, $element1)) { ...
38              
39             =head1 DESCRIPTION
40              
41             This module supplies functions concerned with the creation, examination,
42             and other manipulation of XML data nodes (content chunks and elements).
43             The nodes are dumb data objects, best manipulated using plain functions
44             such as the ones in this module.
45              
46             The nodes are objects of the classes L and
47             L. The data contained within an existing node
48             cannot be modified. This means that references to nodes can be copied
49             and passed around arbitrarily, without worrying about who might write to
50             them, or deep versus shallow copying. As a result, tasks that you might
51             think of as "modifying an XML node" actually involve creating a new node.
52              
53             The node classes do not have any interesting object-oriented behaviour,
54             and their minimalistic methods are not meant to be called directly.
55             Instead, node creation and examination should be performed using the
56             functions of this module.
57              
58             =head2 Twine
59              
60             For the purposes of examining what is contained within a chunk of
61             content, there is a standard representation of content known as "twine".
62             (It's stronger than a string, and has an alternating structure as will
63             be described.)
64              
65             A piece of twine is a reference to an array with an odd number of members.
66             The first and last members, and all members in between with an even index,
67             are strings giving the chunk's character data. Each member with an odd
68             index is a reference to an L object, representing
69             an XML element contained directly within the chunk. Any of the strings
70             may be empty, if the chunk has no character data between subelements or
71             at the start or end of the chunk.
72              
73             When not looking inside a content chunk, it is preferred to represent
74             it in encapsulated form as an L object.
75              
76             =cut
77              
78             package XML::Easy::NodeBasics;
79              
80 3     3   124603 { use 5.010001; }
  3         12  
81 3     3   16 use warnings;
  3         5  
  3         179  
82 3     3   22 use strict;
  3         3  
  3         91  
83              
84 3     3   11 use Params::Classify 0.000 qw(is_string is_ref);
  3         46  
  3         230  
85 3         228 use XML::Easy::Classify 0.001 qw(
86             is_xml_name check_xml_chardata check_xml_attributes
87             is_xml_content_object check_xml_content_object
88             is_xml_element check_xml_element
89 3     3   390 );
  3         48  
90 3     3   33 use XML::Easy::Content 0.007 ();
  3         38  
  3         61  
91 3     3   13 use XML::Easy::Element 0.007 ();
  3         38  
  3         251  
92              
93             BEGIN {
94 3 50   3   8 if(eval { local $SIG{__DIE__};
  3         13  
95 3         748 require Internals;
96 3         1019 exists &Internals::SetReadOnly;
97             }) {
98 3         142 *_set_readonly = \&Internals::SetReadOnly;
99             } else {
100 0         0 *_set_readonly = sub { };
101             }
102             }
103              
104             our $VERSION = "0.014";
105              
106 3     3   20 use parent "Exporter";
  3         10  
  3         23  
107             our @EXPORT_OK = qw(
108             xml_content_object xc xml_content_twine xct xml_content xml_element xe
109             xml_c_content_object xc_cont xml_c_content_twine xc_twine xml_c_content
110             xml_e_type_name xe_type
111             xml_e_attributes xe_attrs xml_e_attribute xe_attr
112             xml_e_content_object xe_cont xml_e_content_twine xe_twine xml_e_content
113             xml_c_equal xc_eq xml_e_equal xe_eq
114             xml_c_unequal xc_ne xml_e_unequal xe_ne
115             );
116              
117             sub _throw_data_error($) {
118 218     218   530 my($msg) = @_;
119 218         1790 die "invalid XML data: $msg\n";
120             }
121              
122             =head1 FUNCTIONS
123              
124             Each function has two names. There is a longer descriptive name, and
125             a shorter name to spare screen space and the programmer's fingers.
126              
127             =head2 Construction
128              
129             The construction functions each accept any number of items of XML content.
130             These items may be supplied in any of several forms. Content item
131             types may be mixed arbitrarily, in any sequence. The permitted forms
132             of content item are:
133              
134             =over
135              
136             =item character data
137              
138             A plain string of characters that are acceptable to XML.
139              
140             =item element
141              
142             A reference to an L object representing an XML
143             element.
144              
145             =item content object
146              
147             A reference to an L object representing a chunk of
148             XML content.
149              
150             =item twine array
151              
152             A reference to a L array listing a chunk of XML content.
153              
154             =back
155              
156             The construction functions are:
157              
158             =over
159              
160             =item xml_content_object(ITEM ...)
161              
162             =item xc(ITEM ...)
163              
164             Constructs and returns a XML content object based on a list of
165             constituents. Any number of Is (zero or more) may be supplied; each
166             one must be a content item of a permitted type. All the constituents
167             are checked for validity, against the XML 1.0 specification, and the
168             function Cs if any are invalid.
169              
170             All the supplied content items are concatenated to form a single chunk.
171             The function returns a reference to an L object.
172              
173             =cut
174              
175             sub xml_content_twine(@);
176              
177 11352     11352 1 6132512 sub xml_content_object(@) { XML::Easy::Content->new(&xml_content_twine) }
178              
179             *xc = \&xml_content_object;
180              
181             =item xml_content_twine(ITEM ...)
182              
183             =item xct(ITEM ...)
184              
185             Performs the same construction job as L, but returns
186             the resulting content chunk in the form of L rather than
187             a content object.
188              
189             The returned array must not be subsequently modified. If possible,
190             it will be marked as read-only in order to prevent modification.
191              
192             =cut
193              
194             sub xml_content_twine(@) {
195 16750     16750 1 3952375 my @content = ("");
196 16750         48010 foreach(@_) {
197 23522 100       80269 if(is_string($_)) {
    100          
    100          
    100          
198 11084         37712 check_xml_chardata($_);
199 8396         54818 $content[-1] .= $_;
200             } elsif(is_xml_element($_)) {
201 2138         6669 push @content, $_, "";
202             } elsif(is_xml_content_object($_)) {
203 18         63 my $twine = $_->twine;
204 18         43 $content[-1] .= $twine->[0];
205 18         42 push @content, @{$twine}[1 .. $#$twine];
  18         58  
206             } elsif(is_ref($_, "ARRAY")) {
207 10074         1004370 my $twine = XML::Easy::Content->new($_)->twine;
208 3210         33072 $content[-1] .= $twine->[0];
209 3210         14221 push @content, @{$twine}[1 .. $#$twine];
  3210         21182  
210             } else {
211 208         628 _throw_data_error("invalid content item");
212             }
213             }
214 6990         34391 _set_readonly(\$_) foreach @content;
215 6990         18661 _set_readonly(\@content);
216 6990         1029267 return \@content;
217             }
218              
219             *xct = \&xml_content_twine;
220              
221             =item xml_content(ITEM ...)
222              
223             Deprecated alias for L.
224              
225             =cut
226              
227             *xml_content = \&xml_content_twine;
228              
229             =item xml_element(TYPE_NAME, ITEM ...)
230              
231             =item xe(TYPE_NAME, ITEM ...)
232              
233             Constructs and returns an L object, representing an
234             XML element, based on a list of consitutents. I must be a
235             string, and gives the name of the element's type. Any number of Is
236             (zero or more) may be supplied; each one must be either a content item
237             of a permitted type or a reference to a hash of attributes. All the
238             constituents are checked for validity, against the XML 1.0 specification,
239             and the function Cs if any are invalid.
240              
241             All the attributes supplied are gathered together to form the element's
242             attribute set. It is an error if an attribute name has been used more
243             than once (even if the same value was given each time). All the supplied
244             content items are concatenated to form the element's content.
245             The function returns a reference to an L object.
246              
247             =cut
248              
249             sub xml_element($@) {
250 16212     16212 1 20370016 my $type_name = shift(@_);
251 16212 100       78141 XML::Easy::Element->new($type_name, {}, [""])
252             unless is_xml_name($type_name);
253 14372         33395 my %attrs;
254 14372         51877 for(my $i = 0; $i != @_; ) {
255 30638         61866 my $item = $_[$i];
256 30638 100       74641 if(is_ref($item, "HASH")) {
257 12380         51389 while(my($k, $v) = each(%$item)) {
258             _throw_data_error("duplicate attribute name")
259 294508 100       536724 if exists $attrs{$k};
260 294498         831418 $attrs{$k} = $v;
261             }
262 12370         44144 splice @_, $i, 1, ();
263             } else {
264 18258         47689 $i++;
265             }
266             }
267 14362         64405 check_xml_attributes(\%attrs);
268 5952         18485 return XML::Easy::Element->new($type_name, \%attrs,
269             &xml_content_object);
270             }
271              
272             *xe = \&xml_element;
273              
274             =back
275              
276             =head2 Examination of content chunks
277              
278             =over
279              
280             =item xml_c_content_object(CONTENT)
281              
282             =item xc_cont(CONTENT)
283              
284             I must be a reference to either an L
285             object or a L array.
286             Returns a reference to an L object encapsulating
287             the content.
288              
289             =cut
290              
291             sub xml_c_content_object($) {
292 13600 100   13600 1 699326 if(is_ref($_[0], "ARRAY")) {
293 7848         1070742 return XML::Easy::Content->new($_[0]);
294             } else {
295 5752         22659 &check_xml_content_object;
296 5584         24294 return $_[0];
297             }
298             }
299              
300             *xc_cont = \&xml_c_content_object;
301              
302             =item xml_c_content_twine(CONTENT)
303              
304             =item xc_twine(CONTENT)
305              
306             I must be a reference to either an L
307             object or a L array.
308             Returns a reference to a L array listing the content.
309              
310             The returned array must not be subsequently modified. If possible,
311             it will be marked as read-only in order to prevent modification.
312              
313             =cut
314              
315 12992     12992 1 555164 sub xml_c_content_twine($) { xml_c_content_object($_[0])->twine }
316              
317             *xc_twine = \&xml_c_content_twine;
318              
319             =item xml_c_content(CONTENT)
320              
321             Deprecated alias for L.
322              
323             =cut
324              
325             *xml_c_content = \&xml_c_content_twine;
326              
327             =back
328              
329             =head2 Examination of elements
330              
331             =over
332              
333             =item xml_e_type_name(ELEMENT)
334              
335             =item xe_type(ELEMENT)
336              
337             I must be a reference to an L object.
338             Returns the element's type's name, as a string.
339              
340             =cut
341              
342             sub xml_e_type_name($) {
343 36     36 1 20435 &check_xml_element;
344 4         44 return $_[0]->type_name;
345             }
346              
347             *xe_type = \&xml_e_type_name;
348              
349             =item xml_e_attributes(ELEMENT)
350              
351             =item xe_attrs(ELEMENT)
352              
353             I must be a reference to an L object.
354             Returns a reference to a hash encapsulating
355             the element's attributes. In the hash, each key is an attribute name,
356             and the corresponding value is the attribute's value as a string.
357              
358             The returned hash must not be subsequently modified. If possible, it
359             will be marked as read-only in order to prevent modification. As a side
360             effect, the read-only-ness may make lookup of any non-existent attribute
361             generate an exception rather than returning C.
362              
363             =cut
364              
365             sub xml_e_attributes($) {
366 36     36 1 22403 &check_xml_element;
367 4         30 return $_[0]->attributes;
368             }
369              
370             *xe_attrs = \&xml_e_attributes;
371              
372             =item xml_e_attribute(ELEMENT, NAME)
373              
374             =item xe_attr(ELEMENT, NAME)
375              
376             I must be a reference to an L object.
377             Looks up a specific attribute of the
378             element, by a name supplied as a string. If there is an attribute by
379             that name then its value is returned, as a string. If there is no such
380             attribute then C is returned.
381              
382             =cut
383              
384             sub xml_e_attribute($$) {
385 536     536 1 584548 check_xml_element($_[0]);
386 472         38360 return $_[0]->attribute($_[1]);
387             }
388              
389             *xe_attr = \&xml_e_attribute;
390              
391             =item xml_e_content_object(ELEMENT)
392              
393             =item xe_cont(ELEMENT)
394              
395             I must be a reference to an L object.
396             Returns a reference to an L object encapsulating
397             the element's content.
398              
399             =cut
400              
401             sub xml_e_content_object($) {
402 36     36 1 24979 &check_xml_element;
403 4         34 return $_[0]->content_object;
404             }
405              
406             *xe_cont = \&xml_e_content_object;
407              
408             =item xml_e_content_twine(ELEMENT)
409              
410             =item xe_twine(ELEMENT)
411              
412             I must be a reference to an L object.
413             Returns a reference to a L array listing the element's content.
414              
415             The returned array must not be subsequently modified. If possible,
416             it will be marked as read-only in order to prevent modification.
417              
418             =cut
419              
420             sub xml_e_content_twine($) {
421 36     36 1 26078 &check_xml_element;
422 4         36 return $_[0]->content_twine;
423             }
424              
425             *xe_twine = \&xml_e_content_twine;
426              
427             =item xml_e_content(ELEMENT)
428              
429             Deprecated alias for L.
430              
431             =cut
432              
433             *xml_e_content = \&xml_e_content_twine;
434              
435             =back
436              
437             =head2 Comparison
438              
439             =over
440              
441             =item xml_c_equal(A, B)
442              
443             =item xc_eq(A, B)
444              
445             I and I must each be a reference to either an L
446             object or a L array.
447             Returns true if they represent exactly the same content,
448             and false if they do not.
449              
450             =cut
451              
452             sub _xe_eq($$);
453              
454             sub _xct_eq($$) {
455 4424     4424   11285 my($a, $b) = @_;
456 4424 100       14460 return !!1 if $a == $b;
457 4408 100       17375 return !!0 unless @$a == @$b;
458 2960         9458 for(my $i = $#$a; $i >= 0; $i -= 2) {
459 3032 100       18663 return !!0 unless $a->[$i] eq $b->[$i];
460             }
461 104         342 for(my $i = $#$a-1; $i >= 0; $i -= 2) {
462 56 100       131 return !!0 unless _xe_eq($a->[$i], $b->[$i]);
463             }
464 96         478 return !!1;
465             }
466              
467             sub xml_c_equal($$) {
468 6792     6792 1 3226285 return _xct_eq(xml_c_content_twine($_[0]), xml_c_content_twine($_[1]));
469             }
470              
471             *xc_eq = \&xml_c_equal;
472              
473             =item xml_e_equal(A, B)
474              
475             =item xe_eq(A, B)
476              
477             I and I must each be a reference to an L object.
478             Returns true if they represent exactly the same element,
479             and false if they do not.
480              
481             =cut
482              
483             sub _xe_eq($$) {
484 192     192   351 my($a, $b) = @_;
485 192 100       611 return !!1 if $a == $b;
486 128 100       667 return !!0 unless $a->type_name eq $b->type_name;
487 88         240 my $aattr = $a->attributes;
488 88         157 my $battr = $b->attributes;
489 88         299 foreach(keys %$aattr) {
490             return !!0 unless exists($battr->{$_}) &&
491 104 100 100     590 $aattr->{$_} eq $battr->{$_};
492             }
493 52         120 foreach(keys %$battr) {
494 62 100       198 return !!0 unless exists $aattr->{$_};
495             }
496 32         157 return _xct_eq($a->content_twine, $b->content_twine);
497             }
498              
499             sub xml_e_equal($$) {
500 264     264 1 82083 check_xml_element($_[0]);
501 200         554 check_xml_element($_[1]);
502 136         282 return &_xe_eq;
503             }
504              
505             *xe_eq = \&xml_e_equal;
506              
507             =item xml_c_unequal(A, B)
508              
509             =item xc_ne(A, B)
510              
511             I and I must each be a reference to either an L
512             object or a L array.
513             Returns true if they do not represent exactly the same content,
514             and false if they do.
515              
516             =cut
517              
518 3396     3396 1 3210602 sub xml_c_unequal($$) { !&xml_c_equal }
519              
520             *xc_ne = \&xml_c_unequal;
521              
522             =item xml_e_unequal(A, B)
523              
524             =item xe_ne(A, B)
525              
526             I and I must each be a reference to an L object.
527             Returns true if they do not represent exactly the same element,
528             and false if they do.
529              
530             =cut
531              
532 132     132 1 54260 sub xml_e_unequal($$) { !&xml_e_equal }
533              
534             *xe_ne = \&xml_e_unequal;
535              
536             =back
537              
538             =head1 SEE ALSO
539              
540             L,
541             L,
542             L,
543             L,
544             L,
545             L
546              
547             =head1 AUTHOR
548              
549             Andrew Main (Zefram)
550              
551             =head1 COPYRIGHT
552              
553             Copyright (C) 2009, 2010, 2011, 2017
554             Andrew Main (Zefram)
555              
556             =head1 LICENSE
557              
558             This module is free software; you can redistribute it and/or modify it
559             under the same terms as Perl itself.
560              
561             =cut
562              
563             1;