File Coverage

lib/YAX/Parser.pm

Criterion	Covered	Total	%
statement	97	153	63.4
branch	22	52	42.3
condition	0	18	0.0
subroutine	16	22	72.7
pod	5	10	50.0
total	140	255	54.9

line	stmt	bran	cond	sub	pod	time	code
1							package YAX::Parser;
2
3	3			3		41108	use strict;
	3					5
	3					98
4
5	3			3		796	use YAX::Node;
	3					8
	3					80
6	3			3		857	use YAX::Text;
	3					25
	3					96
7	3			3		888	use YAX::Element;
	3					7
	3					66
8	3			3		460	use YAX::Fragment;
	3					6
	3					59
9	3			3		1465	use YAX::Document;
	3					7
	3					107
10	3			3		17	use YAX::Constants qw/:all/;
	3					5
	3					7385
11
12							#========================================================================
13							# These regular expressions have been gratefully borrowed from:
14							#
15							# REX/Perl 1.0
16							# Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions",
17							# Technical Report TR 1998-17, School of Computing Science, Simon Fraser
18							# University, November, 1998.
19							# Copyright (c) 1998, Robert D. Cameron.
20							# The following code may be freely used and distributed provided that
21							# this copyright and citation notice remains intact and that modifications
22							# or additions are clearly identified.
23
24							our $TextSE = "[^<]+";
25							our $UntilHyphen = "[^-]*-";
26							our $Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-";
27							our $CommentCE = "$Until2Hyphens>?";
28							our $UntilRSBs = "[^\\]]](?:[^\\]]+])]+";
29							our $CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>";
30							our $S = "[ \\n\\t\\r]+";
31							our $NameStrt = "[A-Za-z_:]\|[^\\x00-\\x7F]";
32							our $NameChar = "[A-Za-z0-9_:.-]\|[^\\x00-\\x7F]";
33							our $Name = "(?:$NameStrt)(?:$NameChar)*";
34							our $QuoteSE = "\"[^\"]\"\|'[^']'";
35							our $DT_IdentSE = "$S$Name(?:$S(?:$Name\|$QuoteSE))*";
36							our $MarkupDeclCE = "(?:[^\\]\"'><]+\|$QuoteSE)*>";
37							our $S1 = "[\\n\\r\\t ]";
38							our $UntilQMs = "[^?]*\\?+";
39							our $PI_Tail = "\\?>\|$S1$UntilQMs(?:[^>?]$UntilQMs)*>";
40							our $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>\|[^-]$MarkupDeclCE)\|\\?$Name(?:$PI_Tail))\|%$Name;\|$S";
41							our $DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?>?";
42							our $DeclCE = "--(?:$CommentCE)?\|\\[CDATA\\[(?:$CDATA_CE)?\|DOCTYPE(?:$DocTypeCE)?";
43							our $PI_CE = "$Name(?:$PI_Tail)?";
44							our $EndTagCE = "$Name(?:$S)?>?";
45							our $AttValSE = "\"[^<\"]\"\|'[^<']'";
46							our $ElemTagCE = "$Name(?:$S$Name(?:$S)?=(?:$S)?(?:$AttValSE))*(?:$S)?/?>?";
47							our $ElementCE = "/(?:$EndTagCE)?\|(?:$ElemTagCE)?";
48							our $MarkupSPE = "<(?:!(?:$DeclCE)?\|\\?(?:$PI_CE)?\|(?:$ElementCE)?)";
49							our $XML_SPE = "$TextSE\|$MarkupSPE";
50
51							#========================================================================
52
53							# these have captures for parsing attributes
54							our $AttValSE2 = "\"([^<\"])\"\|'([^<'])'";
55							our $ElemTagCE2 = "(?:($Name)(?:$S)?=(?:$S)?(?:$AttValSE2))+(?:$S)?/?>?";
56
57							sub new {
58	1			1	0	10	my ( $class ) = @_;
59	1					3	my $self = bless { }, $class;
60	1					3	return $self;
61							}
62
63							sub parse {
64	2			2	1	21	my ( $self, $xstr ) = ( shift, shift );
65	2	50				7	return unless $xstr;
66	2					11	my @nodes = $self->tokenize( $xstr );
67
68	2					34	my $xdoc = YAX::Document->new();
69	2					6	my @stack = ( $xdoc );
70	2					4	my ( $spec, $elmt );
71	2					6	foreach my $node ( $self->tokenize( $xstr ) ) {
72	102					165	$spec = substr( $node, 0, 2 );
73	102	100				219	if ( index( $spec, '<' ) != 0 ) {
74	48					111	$self->_mk_text( $node, $stack[-1] );
75	48					71	next;
76							}
77	54	100				110	if ( $spec eq '
78	22					24	pop @stack;
79	22					30	next;
80							}
81	32	100				57	if ( $spec eq '
82	3					11	$self->_mk_decl( $node, $stack[-1] );
83	3					8	next;
84							}
85	29	100				53	if ( $spec eq '
86	2					9	$self->_mk_proc( $node, $stack[-1] );
87	2					4	next;
88							}
89
90	27					63	$elmt = $self->_mk_elmt( $node, $stack[-1] );
91	27	100				85	push( @stack, $elmt ) unless ( $node =~ m{/>$} );
92	27	100				55	$xdoc->set( $elmt->{id} => $elmt ) if $elmt->{id}
93							}
94
95	2					28	return $xdoc;
96							}
97
98							sub stream {
99	0			0	1	0	my ( $self, $xstr, $state ) = ( shift, shift, shift );
100	0					0	my %subs;
101	0	0	0			0	if ( @_ == 1 and ref $_[0] eq 'HASH' ) {
102	0					0	%subs = %{$_[0]};
	0					0
103							} else {
104	0					0	%subs = @_;
105							}
106
107	0		0			0	my $text = delete $subs{text} \|\| $subs{pass};
108	0		0			0	my $decl = delete $subs{decl} \|\| $subs{pass};
109	0		0			0	my $proc = delete $subs{proc} \|\| $subs{pass};
110	0		0			0	my $elmt = delete $subs{elmt} \|\| $subs{pass};
111	0		0			0	my $elcl = delete $subs{elcl} \|\| $subs{pass};
112
113	0					0	my ( $spec, $name, $copy, $atts, %atts );
114	0					0	foreach my $node ( $self->tokenize( $xstr ) ) {
115	0					0	$spec = substr( $node, 0, 2 );
116	0	0				0	if ( index( $spec, '<' ) != 0 ) {
117	0	0				0	$text && $text->( $state, $node );
118	0					0	next;
119							}
120	0	0				0	if ( $spec eq '
121	0	0				0	$elcl && $elcl->( $state, substr( $node, 2, -1 ) );
122	0					0	next;
123							}
124	0	0				0	if ( $spec eq '
125	0	0				0	$decl && $decl->( $state, $node );
126	0					0	next;
127							}
128	0	0				0	if ( $spec eq '
129	0	0				0	$proc && $proc->( $state, $node );
130	0					0	next;
131							}
132
133	0	0				0	$elmt && do {
134	0					0	$copy = substr( $node, 1, -1 );
135	0					0	( $name, $atts ) = split( /\s+/, $copy, 2 );
136	0					0	$name =~ s{/$}{};
137	0	0				0	%atts = $atts ? $self->parse_attributes( $atts ) : ( );
138	0					0	$elmt->( $state, $name, %atts );
139							};
140
141	0	0				0	if ( substr( $node, -2 ) eq '/>' ) {
142	0	0				0	$elcl && $elcl->( $state, $name );
143							}
144							}
145							}
146
147							sub read_file {
148	0			0	0	0	my ( $self, $file ) = @_;
149	0					0	my $xstr;
150							{
151	0	0				0	open FH, $file or return;
	0					0
152	0					0	local $/ = undef;
153	0					0	$xstr = ;
154	0					0	close FH;
155							}
156	0					0	return $xstr;
157							}
158
159							sub parse_file {
160	0			0	1	0	my ( $self, $file ) = @_;
161	0					0	return $self->parse( $self->read_file( $file ) );
162							}
163
164							sub stream_file {
165	0			0	1	0	my ( $self, $file, $state, %subs ) = @_;
166	0					0	return $self->stream( $self->read_file( $file ), $state, %subs );
167							}
168
169							sub parse_as_fragment {
170	0			0	0	0	my ( $self, $xstr ) = @_;
171	0					0	my $xdoc = $self->parse( ''.$xstr.'' );
172	0					0	my $root = $xdoc->root;
173	0					0	my $frag = YAX::Fragment->new;
174	0					0	$frag->append( $root->[0] ) while @$root;
175	0					0	return $frag;
176							}
177
178							sub parse_file_as_fragment {
179	0			0	0	0	my ( $self, $file ) = @_;
180	0					0	my $xstr = $self->read_file( $file );
181	0					0	my $frag = $self->parse_as_fragment( $xstr );
182	0					0	return $frag;
183							}
184
185							sub tokenize {
186	4			4	1	9	my ( $self, $xstr ) = @_;
187	4					1199	return $xstr =~ /$XML_SPE/g;
188							}
189
190							sub _mk_decl {
191	3			3		7	my ( $self, $decl, $parent ) = @_;
192	3					4	my ( $type, $name );
193	3					4	my $offset = 1;
194	3					5	my $length = length( $decl );
195
196	3	100				8	substr( $decl, 0, 4 ) eq '