File Coverage

blib/lib/Test/XML/Easy.pm

Criterion	Covered	Total	%
statement	208	221	94.1
branch	85	94	90.4
condition	25	28	89.2
subroutine	16	16	100.0
pod	4	4	100.0
total	338	363	93.1

line	stmt	bran	cond	sub	pod	time	code
1							package Test::XML::Easy;
2
3	11			11		699329	use strict;
	11					33
	11					542
4	11			11		69	use warnings;
	11					25
	11					514
5
6	11			11		63	use vars qw(@EXPORT @ISA);
	11					25
	11					933
7	11			11		75	use Exporter;
	11					36
	11					964
8							@ISA = qw(Exporter);
9
10							our $VERSION = '0.01';
11
12	11			11		68	use Carp qw(croak);
	11					21
	11					875
13
14	11			11		22998	use XML::Easy::Text qw(xml10_read_document xml10_write_document);
	11					119297
	11					1348
15	11			11		20470	use XML::Easy::Classify qw(is_xml_element);
	11					409732
	11					1859
16	11			11		138	use XML::Easy::Syntax qw($xml10_s_rx);
	11					25
	11					1216
17
18	11			11		70	use Test::Builder;
	11					29
	11					35164
19							my $tester = Test::Builder->new();
20
21							=head1 NAME
22
23							Test::XML::Easy - test XML with XML::Easy
24
25							=head1 SYNOPSIS
26
27							use Test::More tests => 2;
28							use Test::XML::Easy;
29
30							is_xml $some_xml, <<'ENDOFXML', "a test";
31
32
33
34							fuzz
35
36							ENDOFXML
37
38							is_xml $some_xml, <<'ENDOFXML', { ignore_whitespace => 1, description => "my test" };
39
40
41							fuzz
42
43							ENDOFXML
44
45							isnt_xml $some_xml, $some_xml_it_must_not_be;
46
47							is_well_formed_xml $some_xml;
48
49							=head1 DESCRIPTION
50
51							A simple testing tool, with only pure Perl dependancies, that checks if
52							two XML documents are "the same". In particular this module will check if
53							the documents schemantically equal as defined by the XML 1.0 specification
54							(i.e. that the two documents would construct the same DOM
55							model when parsed, so things like character sets and if you've used two tags
56							or a self closing tags aren't important.)
57
58							This modules is a strict superset of B's interface, meaning if you
59							were using that module to check if two identical documents were the same then
60							this module should function as a drop in replacement. Be warned, however,
61							that this module by default is a lot stricter about how the XML documents
62							are allowed to differ.
63
64							=head2 Functions
65
66							This module, by default, exports a number of functions into your namespace.
67
68							=over
69
70							=item is_xml($xml_to_test, $expected_xml[, $options_hashref])
71
72							Tests that the passed XML is "the same" as the expected XML.
73
74							XML can be passed into this function in one of two ways; Either you can
75							provide a string (which the function will parse for you) or you can pass in
76							B objects that you've constructed yourself somehow.
77
78							This funtion takes several options as the third argument. These can be
79							passed in as a hashref:
80
81							=over
82
83							=item description
84
85							The name of the test that will be used in constructing the C / C
86							test output.
87
88							=item ignore_whitespace
89
90							Ignore many whitespace differences in text nodes. Currently
91							this has the same effect as turning on C
92							and C.
93
94							=item ignore_surrounding_whitespace
95
96							Ignore differences in leading and trailing whitespace
97							between elements. This means that
98
99							foo bar baz
100
101							Is considered the same as
102
103
104							foo bar baz
105
106
107							And even
108
109
110							this is my cat:
111
112
113							Is considered the same as:
114
115
116							this is my cat:
117
118
119							Even though, to a web-browser, that extra space is significant whitespace
120							and the two documents would be renderd differently.
121
122							However, as comments are completely ignored (we treat them as if they were
123							never even in the document) the following:
124
125							foobar
126
127							would be considered different to
128
129
130							foo
131
132							bar
133
134
135							As it's the same as comparing the string
136
137							"foobar"
138
139							And:
140
141							"foo
142
143							bar"
144
145							The same is true for processing instructions and DTD declarations.
146
147							=item ignore_leading_whitespace
148
149							The same as C but only ignore
150							the whitespace immediately after an element start or end tag not
151							immedately before.
152
153							=item ignore_trailing_whitespace
154
155							The same as C but only ignore
156							the whitespace immediately before an element start or end tag not
157							immedately after.
158
159							=item ignore_different_whitespace
160
161							If set to a true value ignores differences in what characters
162							make up whitespace in text nodes. In other words, this option
163							makes the comparison only care that wherever there's whitespace
164							in the expected XML there's any whitespace in the actual XML
165							at all, not what that whitespace is made up of.
166
167							It means the following
168
169
170							foo bar baz
171
172
173							Is the same as
174
175
176							foo
177							bar
178							baz
179
180
181							But not the same as
182
183
184							foobarbaz
185
186
187							This setting has no effect on attribute comparisons.
188
189							=item verbose
190
191							If true, print obsessive amounts of debug info out while
192							checking things
193
194							=item show_xml
195
196							This prints out in the diagnostic messages the expected and
197							actual XML on failure.
198
199							=back
200
201							If a third argument is passed to this function and that argument
202							is not a hashref then it will be assumed that this argument is
203							the the description as passed above. i.e.
204
205							is_xml $xml, $expected, "my test";
206
207							is the same as
208
209							is_xml $xml, $expected, { description => "my test" };
210
211							=cut
212
213							sub is_xml($$;$) {
214	57			57	1	57678	my $got = shift;
215	57					102	my $expected = shift;
216
217	57	100				180	unless (defined $expected) {
218	2					535	croak("expected argument must be defined");
219							}
220
221							# munge the options
222
223	55					85	my $got_original = $got;
224	55					74	my $expected_original = $expected;
225
226	55					80	my $options = shift;
227	55	100				424	$options = { description => $options } unless ref $options eq "HASH";
228	55	100				210	$options = { %{$options}, description => "xml test" } unless defined $options->{description};
	42					195
229	55	100				244	unless (is_xml_element($expected)) {
230							# throws an exception if there isn't a problem.
231	54					298	$expected = eval { xml10_read_document($expected) };
	54					844
232	54	100				181	if ($@) {
233	2					528	croak "Couldn't parse expected XML document: $@";
234							}
235							}
236
237							# convert into something useful if needed
238	53	100				174	unless (is_xml_element($got)) {
239	51					270	my $parsed = eval { xml10_read_document($got) };
	51					582
240	51	100				126	if ($@) {
241	4					15	$tester->ok(0, $options->{description});
242	4					11	$tester->diag("Couldn't parse submitted XML document:");
243	4					14	$tester->diag(" $@");
244	4					28	return;
245							}
246
247	47					96	$got = $parsed;
248							}
249
250	49	100				164	if(_is_xml($got,$expected,$options,"", {})) {
251	23					109	$tester->ok(1,$options->{description});
252	23					6219	return 1;
253							}
254
255	26	100				272	if ($options->{show_xml}) {
256	3					10	$tester->diag("The XML that we expected was:");
257	3	100				173	if (is_xml_element($expected_original))
258	1					14	{ $tester->diag(xml10_write_document($expected_original)) }
259							else
260	2					15	{ $tester->diag($expected_original) }
261
262	3					216	$tester->diag("The XML that we received was:");
263	3	100				153	if (is_xml_element($got_original))
264	1					12	{ $tester->diag(xml10_write_document($got_original)) }
265							else
266	2					12	{ $tester->diag($got_original) }
267							}
268
269	26					421	return;
270							}
271							push @EXPORT, "is_xml";
272
273							sub _is_xml {
274	75			75		102	my $got = shift;
275	75					99	my $expected = shift;
276	75					87	my $options = shift;
277
278							# this is the path
279	75					295	my $path = shift;
280
281							# the index is used to keep track of how many of a particular
282							# typename of a particular element we've seen as previous siblings
283							# of the node that just got in. It's a hashref with type_name and
284							# the index.
285	75					119	my $index = shift;
286
287							# change where the errors are reported from
288	75					118	local $Test::Builder::Level = $Test::Builder::Level + 1;
289
290							# work out the details of the node we're looking at
291							# nb add one to the index because xpath is weirdly 1-index
292							# not 0-indexed like most other modern languages
293	75					249	my $got_name = $got->type_name();
294	75		100			359	my $got_index = ($index->{ $got_name } \|\| 0) + 1;
295
296							### check if we've got a node to compare to
297
298	75	50				199	unless ($expected) {
299	0					0	$tester->ok(0, $options->{description});
300	0					0	$tester->diag("Element '$path/$got_name\[$got_index]' was not expected");
301	0					0	return;
302							}
303
304							### check the node name
305
306							# work out the details of the node we're comparing with
307	75					195	my $expected_name = $expected->type_name();
308	75		100			279	my $expected_index = ($index->{ $expected_name } \|\| 0) + 1;
309
310							# alter the index hashref to record we've seen another node
311							# of this name
312	75					247	$index->{$got_name}++;
313
314	75	50				190	$tester->diag("comparing '$path/$got_name\[$expected_index]' to '$path/$expected_name\[$expected_index]'...") if $options->{verbose};
315
316	75	100				184	if ($got_name ne $expected_name) {
317	4					20	$tester->ok(0, $options->{description});
318	4					1180	$tester->diag("Element '$path/$got_name\[$got_index]' does not match '$path/$expected_name\[$expected_index]'");
319	4					197	return;
320							}
321	71	50				174	$tester->diag("...matched name") if $options->{verbose};
322
323							### check the attributes
324
325							# we're not looking at decendents, so burn the path of
326							# this node into the path we got passed in
327	71					265	$path .= "/$got_name\[$got_index]";
328
329							# XML::Easy returns read only data structures
330							# we want to modify these to keep track of what
331							# we've processed, so we need to copy them
332	71					130	my %got_attr = %{ $got->attributes };
	71					542
333	71					178	my $expected_attr = $expected->attributes;
334
335	71					89	foreach my $attr (keys %{ $expected_attr }) {
	71					244
336	6	50				16	$tester->diag("checking attribute '$path/\@$attr'...") if $options->{verbose};
337
338	6	100				19	if (!exists($got_attr{$attr})) {
339	1					8	$tester->ok(0, $options->{description});
340	1					676	$tester->diag("expected attribute '$path/\@$attr' not found");
341	1					113	return;
342							}
343	5	50				13	$tester->diag("...found attribute") if $options->{verbose};
344
345	5					12	my $expected_string = $expected_attr->{$attr};
346	5					10	my $got_string = delete $got_attr{$attr};
347
348	5	100				208	if ($expected_string ne $got_string) {
349	1					12	$tester->ok(0, $options->{description});
350	1					3189	$tester->diag("attribute value for '$path/\@$attr' didn't match");
351	1					258	$tester->diag("found value:\n");
352	1					230	$tester->diag(" '$got_string'\n");
353	1					67	$tester->diag("expected value:\n");
354	1					228	$tester->diag(" '$expected_string'\n");
355	1					241	return;
356							}
357	4	50				13	$tester->diag("...the attribute contents matched") if $options->{verbose};
358							}
359	69	100				669	if (keys %got_attr) {
360	2					9	$tester->ok(0, $options->{description});
361	2	100				1132	$tester->diag("found extra unexpected attribute".(keys %got_attr>1 ? "s":"").":");
362	2					383	$tester->diag(" '$path/\@$_'") foreach sort keys %got_attr;
363	2					264	return;
364							}
365	67	50				154	$tester->diag("the attributes all matched") if $options->{verbose};
366
367							### check the child nodes
368
369							# create a new index to pass to our children distint from
370							# the index that was passed in to us (as that one was created
371							# by our parent for me and my siblings)
372	67					102	my $child_index = {};
373
374							# grab the child text...element...text...element...text...
375	67					181	my $got_content = $got->content;
376	67					135	my $expected_content = $expected->content;
377
378							# step though the text/elements
379							# nb this loop works in steps of two; The other $i++
380							# is half way through the loop below
381	67					117	for (my $i = 0; $i < @{$got_content}; $i++) {
	86					209
382
383							### check the text node
384
385							# extract the text from the object
386	86					132	my $got_text = $got_content->[ $i ];
387	86					115	my $expected_text = $expected_content->[ $i ];
388	86					122	my $comp_got_text = $got_text;
389	86					97	my $comp_expected_text = $expected_text;
390
391	86	100	100			559	if ($options->{ignore_whitespace} \|\| $options->{ignore_leading_whitespace} \|\| $options->{ignore_surrounding_whitespace}) {
			66
392	10					133	$comp_got_text =~ s/ \A (?:$xml10_s_rx)* //x;
393	10					89	$comp_expected_text =~ s/ \A (?:$xml10_s_rx)* //x;
394							}
395
396	86	100	100			628	if ($options->{ignore_whitespace} \|\| $options->{ignore_trailing_whitespace} \|\| $options->{ignore_surrounding_whitespace}) {
			66
397	10					184	$comp_got_text =~ s/ (?:$xml10_s_rx)* \z//x;
398	10					173	$comp_expected_text =~ s/ (?:$xml10_s_rx)* \z//x;
399							}
400
401	86	100	100			570	if ($options->{ignore_whitespace} \|\| $options->{ignore_different_whitespace}) {
402	8					86	$comp_got_text =~ s/ (?:$xml10_s_rx)+ / /gx;
403	8					66	$comp_expected_text =~ s/ (?:$xml10_s_rx)+ / /gx;
404							}
405
406	86	100				277	if ($comp_got_text ne $comp_expected_text) {
407
408	18					71	$tester->ok(0, $options->{description});
409
410							# I don't like these error message not being specific with xpath but as
411							# far as I know there's no easy way to express in xpath the text immediatly following
412							# a particular element. The best I could come up with was this mouthful:
413							# "$path/following-sibling::text()[ previous-sibling::*[1] == $path ]"
414
415	18	100	100			14597	if ($i == 0) {
	5	100				18
416	13	100	100			34	if (@{ $got_content } == 1 && @{ $expected_content } == 1) {
	13					48
	11					43
417	10					52	$tester->diag("text inside '$path' didn't match");
418							} else {
419	3					11	$tester->diag("text immediately inside opening tag of '$path' didn't match");
420							}
421	2					8	} elsif ($i == @{ $got_content} - 1 && $i == @{ $expected_content } - 1 ) {
422	1					6	$tester->diag("text immediately before closing tag of '$path' didn't match");
423							} else {
424	4					14	my $name = $got_content->[ $i - 1 ]->type_name;
425	4					7	my $ind = $child_index->{ $name };
426	4					18	$tester->diag("text immediately after '$path/$name\[$ind]' didn't match");
427							}
428
429	18					1259	$tester->diag("found:\n");
430	18					1208	$tester->diag(" '$got_text'\n");
431	18					1255	$tester->diag("expected:\n");
432	18					1214	$tester->diag(" '$expected_text'\n");
433
434	18	50				1233	if ($options->{verbose}) {
435	0					0	$tester->diag("compared found text:\n");
436	0					0	$tester->diag(" '$comp_got_text'\n");
437	0					0	$tester->diag("against text:\n");
438	0					0	$tester->diag(" '$comp_expected_text'\n");
439							}
440
441	18					115	return;
442							}
443
444							# move onto the next (elemnent) node if we didn't reach the end
445	68					312	$i++;
446	68	100				83	last if $i >= @{$got_content};
	68					210
447
448							### check the element node
449
450							# simply recurse for that node
451							# (don't bother checking if the expected node is defined or not, the case
452							# where it isn't is handled at the start of _is_xml)
453	26	100				157	return unless _is_xml(
454							$got_content->[$i],
455							$expected_content->[$i],
456							$options,
457							$path,
458							$child_index
459							);
460							}
461
462							# check if we expected more nodes
463	42	50				58	if (@{ $expected_content } > @{ $got_content }) {
	42					71
	42					113
464	0					0	my $expected_nom = $expected_content->[ scalar @{ $got_content } ]->type_name;
	0					0
465	0					0	my $expected_ind = $child_index->{ $expected_nom } + 1;
466	0					0	$tester->diag("Couldn't find expected node '$path/$expected_nom\[$expected_ind]'");
467	0					0	$tester->ok(0, $options->{description});
468	0					0	return;
469							}
470
471	42					292	return 1;
472							}
473
474							=item isnt_xml($xml_to_test, $not_expected_xml[, $options_hashref])
475
476							Exactly the same as C (taking exactly the same options) but passes
477							if and only if what is passed is different to the not expected XML.
478
479							By different, of course, we mean schematically different according to the
480							XML 1.0 specification. For example, this will fail:
481
482							isnt_xml "", "";
483
484							as those are schematically the same XML documents.
485
486							However, it's worth noting that the first argument doesn't even have to be
487							valid XML for the test to pass. Both these pass as they're not schemantically
488							identical to the not expected XML:
489
490							isnt_xml undef, $not_expecteded_xml;
491							isnt_xml "", $not_expected_xml;
492
493							as invalid XML is not ever schemanitcally identical to a valid XML document.
494
495							If you want to insist what you pass in is valid XML, but just not the
496							same as the other xml document you pass in then you can use two tests:
497
498							is_well_formed_xml $xml;
499							isnt_xml $xml, $not_expected_xml;
500
501							This function accepts the C option (just as C does) but
502							turning it on doesn't actually output anything extra - there's not useful this
503							function can output that would help you diagnose the failure case.
504
505							=cut
506
507							sub isnt_xml($$;$) {
508	11			11	1	9510	my $got = shift;
509	11					21	my $expected = shift;
510	11					19	my $options = shift;
511
512	11	100				53	$options = { description => $options } unless ref $options eq "HASH";
513	11	100				35	$options = { %{$options}, description => "not xml test" }
	9					47
514							unless defined $options->{description};
515
516							# temporarly ignore test output and just get the result of running
517							# the is_xml function as normal
518	11					57	$tester = bless {}, "Test::XML::Easy::Ignore";
519	11	100				18	my $result = eval { is_xml($got, $expected, $options) ? 0 : 1 };
	11					42
520	11					61	$tester = Test::Builder->new();
521
522							# did we get an error? Note we don't check $@ directly incase
523							# it's been reset by a weird DESTROY() eval...
524	11	100	66			145	unless (defined($result) && length $result) { croak $@; }
	2					365
525
526	9	100				21	if ($result) {
527	6					35	$tester->ok(1, $options->{description});
528	6					2798	return 1;
529							}
530
531	3					15	$tester->ok(0, $options->{description});
532	3					1499	$tester->diag("Unexpectedly matched the XML we didn't expect");
533	3	100				211	if ($options->{show_xml}) {
534	2					8	$tester->diag("The XML that we received was:");
535	2	100				131	if (is_xml_element($got))
536	1					10	{ $tester->diag(xml10_write_document($got)) }
537							else
538	1					9	{ $tester->diag($got) }
539							}
540	3					143	return;
541							}
542							push @EXPORT, "isnt_xml";
543
544							=item is_well_formed_xml($string_containing_xml[, $description])
545
546							Passes if and only if the string passed contains well formed XML.
547
548							=cut
549
550							sub is_well_formed_xml($;$) {
551	4			4	1	1989	my $xml_string = shift;
552	4					7	my $options = shift;
553
554	4	100				19	$options = { description => $options } unless ref $options eq "HASH";
555	4	100				13	$options = { %{$options}, description => "xml well formed test" }
	2					11
556							unless defined $options->{description};
557
558	4	100				8	if(eval { xml10_read_document($xml_string); 1 }) {
	4					39
	3					22
559	3					14	$tester->ok(1, $options->{description});
560	3					941	return 1;
561							}
562
563	1					6	$tester->ok(0, $options->{description});
564	1					570	$tester->diag($@);
565	1					69	return;
566							}
567							push @EXPORT, "is_well_formed_xml";
568
569							=item isnt_well_formed_xml($string_not_containing_xml[, $description])
570
571							Passes if and only if the string passed does not contain well formed XML.
572
573							=cut
574
575							sub isnt_well_formed_xml($;$) {
576	4			4	1	2271	my $xml_string = shift;
577	4					6	my $options = shift;
578
579	4	100				19	$options = { description => $options } unless ref $options eq "HASH";
580	4	100				12	$options = { %{$options}, description => "xml not well formed test" }
	2					10
581							unless defined $options->{description};
582
583	4	100				8	unless (eval { xml10_read_document($xml_string); 1 }) {
	4					27
	1					30
584	3					12	$tester->ok(1, $options->{description});
585	3					723	return 1;
586							}
587
588	1					5	$tester->ok(0, $options->{description});
589	1					472	$tester->diag("Unexpectedly well formed XML");
590	1					242	return;
591							}
592							push @EXPORT, "isnt_well_formed_xml";
593
594							=back
595
596							=head2 A note on Character Handling
597
598							If you do not pass it an XML::Easy::Element object then these functions will happly parse
599							XML from the characters contained in whatever scalars you passed in. They will not
600							(and cannot) correctly parse data from a scalar that contains binary data (e.g. that
601							you've sucked in from a raw file handle) as they would have no idea what characters
602							those octlets would represent
603
604							As long as your XML document contains legal characters from the ASCII range (i.e.
605							chr(1) to chr(127)) this distintion will not matter to you.
606
607							However, if you use characters above codepoint 127 then you will probably need to
608							convert any bytes you have read in into characters. This is usually done by using
609							C, or by using a PerlIO layer on the filehandle as you read the data
610							in.
611
612							If you don't know what any of this means I suggest you read the Encode::encode manpage
613							very carefully. Tom Insam's slides at L
614							may or may not help you understand this more (they at the very least contain a
615							cheatsheet for conversion.)
616
617							The author highly recommends those of you using latin-1 characters from a utf-8 source
618							to use B to check the string for common mistakes before handing it C.
619
620							=head1 AUTHOR
621
622							Mark Fowler, C<< >>
623
624							Copyright 2009 PhotoBox, All Rights Reserved.
625
626							This program is free software; you can redistribute it and/or modify it
627							under the same terms as Perl itself.
628
629							=head1 BUGS
630
631							There's a few cavets when using this module:
632
633							=over
634
635							=item Not a validating parser
636
637							Infact, we don't process (or compare) DTDs at all. These nodes are completely
638							ignored (it's as if you didn't include them in the string at all.)
639
640							=item Comments and processing instructions are ignored
641
642							We totally ignore comments and processing instructions, and it's as
643							if you didn't include them in the string at all either.
644
645							=item Limited entity handling
646
647							We only support the five "core" named entities (i.e. C<&>,
648							C<<>, C<>>, C<'> and C<">) and numerical character references
649							(in decimal or hex form.) It is not possible to declare further named
650							entities and the precence of undeclared named entities will either cause
651							an exception to be thrown (in the case of the expected string) or the test to
652							fail (in the case of the string you are testing)
653
654							=item No namespace support
655
656							Currently this is only an XML 1.0 parser, and not XML Namespaces aware (further
657							options may be added to later version of this module to enable namespace support)
658
659							This means the following document:
660
661
662
663							Is considered to be different to
664
665
666
667							=item XML whitespace handling
668
669							This module considers "whitespace" to be what the XML specification considers
670							to be whitespace. This is subtily different to what Perl considers to be
671							whitespace.
672
673							=item No node reordering support
674
675							Unlike B this module considers the order of sibling nodes to be
676							significant, and you cannot tell it to ignore the differring order of nodes
677							when comparing the expected and actual output.
678
679							=back
680
681							Please see L for
682							details of how to submit bugs, access the source control for
683							this project, and contact the author.
684
685							=head1 SEE ALSO
686
687							L (for instructions on how to test), L (for info
688							on the underlying xml parser) and L (for a similar module that
689							tests using XML::SchemanticDiff)
690
691							=cut
692
693							1; # End of Test::XML::Easy
694
695							package Test::XML::Easy::Ignore;
696
697							# a handy class you can bless your tester into so we ignore all
698							# calls and don't actually produce any test output
699
700	9			9		16	sub ok { return }
701	14			14		20	sub diag { return }
702
703							1; # End of Test::XML::Easy::Ignore;