line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package YAX::Parser; |
2
|
|
|
|
|
|
|
|
3
|
3
|
|
|
3
|
|
41108
|
use strict; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
98
|
|
4
|
|
|
|
|
|
|
|
5
|
3
|
|
|
3
|
|
796
|
use YAX::Node; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
80
|
|
6
|
3
|
|
|
3
|
|
857
|
use YAX::Text; |
|
3
|
|
|
|
|
25
|
|
|
3
|
|
|
|
|
96
|
|
7
|
3
|
|
|
3
|
|
888
|
use YAX::Element; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
66
|
|
8
|
3
|
|
|
3
|
|
460
|
use YAX::Fragment; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
59
|
|
9
|
3
|
|
|
3
|
|
1465
|
use YAX::Document; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
107
|
|
10
|
3
|
|
|
3
|
|
17
|
use YAX::Constants qw/:all/; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
7385
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
#======================================================================== |
13
|
|
|
|
|
|
|
# These regular expressions have been gratefully borrowed from: |
14
|
|
|
|
|
|
|
# |
15
|
|
|
|
|
|
|
# REX/Perl 1.0 |
16
|
|
|
|
|
|
|
# Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions", |
17
|
|
|
|
|
|
|
# Technical Report TR 1998-17, School of Computing Science, Simon Fraser |
18
|
|
|
|
|
|
|
# University, November, 1998. |
19
|
|
|
|
|
|
|
# Copyright (c) 1998, Robert D. Cameron. |
20
|
|
|
|
|
|
|
# The following code may be freely used and distributed provided that |
21
|
|
|
|
|
|
|
# this copyright and citation notice remains intact and that modifications |
22
|
|
|
|
|
|
|
# or additions are clearly identified. |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
our $TextSE = "[^<]+"; |
25
|
|
|
|
|
|
|
our $UntilHyphen = "[^-]*-"; |
26
|
|
|
|
|
|
|
our $Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-"; |
27
|
|
|
|
|
|
|
our $CommentCE = "$Until2Hyphens>?"; |
28
|
|
|
|
|
|
|
our $UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+"; |
29
|
|
|
|
|
|
|
our $CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>"; |
30
|
|
|
|
|
|
|
our $S = "[ \\n\\t\\r]+"; |
31
|
|
|
|
|
|
|
our $NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]"; |
32
|
|
|
|
|
|
|
our $NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]"; |
33
|
|
|
|
|
|
|
our $Name = "(?:$NameStrt)(?:$NameChar)*"; |
34
|
|
|
|
|
|
|
our $QuoteSE = "\"[^\"]*\"|'[^']*'"; |
35
|
|
|
|
|
|
|
our $DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*"; |
36
|
|
|
|
|
|
|
our $MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>"; |
37
|
|
|
|
|
|
|
our $S1 = "[\\n\\r\\t ]"; |
38
|
|
|
|
|
|
|
our $UntilQMs = "[^?]*\\?+"; |
39
|
|
|
|
|
|
|
our $PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>"; |
40
|
|
|
|
|
|
|
our $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S"; |
41
|
|
|
|
|
|
|
our $DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?>?"; |
42
|
|
|
|
|
|
|
our $DeclCE = "--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?"; |
43
|
|
|
|
|
|
|
our $PI_CE = "$Name(?:$PI_Tail)?"; |
44
|
|
|
|
|
|
|
our $EndTagCE = "$Name(?:$S)?>?"; |
45
|
|
|
|
|
|
|
our $AttValSE = "\"[^<\"]*\"|'[^<']*'"; |
46
|
|
|
|
|
|
|
our $ElemTagCE = "$Name(?:$S$Name(?:$S)?=(?:$S)?(?:$AttValSE))*(?:$S)?/?>?"; |
47
|
|
|
|
|
|
|
our $ElementCE = "/(?:$EndTagCE)?|(?:$ElemTagCE)?"; |
48
|
|
|
|
|
|
|
our $MarkupSPE = "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|(?:$ElementCE)?)"; |
49
|
|
|
|
|
|
|
our $XML_SPE = "$TextSE|$MarkupSPE"; |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
#======================================================================== |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# these have captures for parsing attributes |
54
|
|
|
|
|
|
|
our $AttValSE2 = "\"([^<\"]*)\"|'([^<']*)'"; |
55
|
|
|
|
|
|
|
our $ElemTagCE2 = "(?:($Name)(?:$S)?=(?:$S)?(?:$AttValSE2))+(?:$S)?/?>?"; |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
sub new { |
58
|
1
|
|
|
1
|
0
|
10
|
my ( $class ) = @_; |
59
|
1
|
|
|
|
|
3
|
my $self = bless { }, $class; |
60
|
1
|
|
|
|
|
3
|
return $self; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
sub parse { |
64
|
2
|
|
|
2
|
1
|
21
|
my ( $self, $xstr ) = ( shift, shift ); |
65
|
2
|
50
|
|
|
|
7
|
return unless $xstr; |
66
|
2
|
|
|
|
|
11
|
my @nodes = $self->tokenize( $xstr ); |
67
|
|
|
|
|
|
|
|
68
|
2
|
|
|
|
|
34
|
my $xdoc = YAX::Document->new(); |
69
|
2
|
|
|
|
|
6
|
my @stack = ( $xdoc ); |
70
|
2
|
|
|
|
|
4
|
my ( $spec, $elmt ); |
71
|
2
|
|
|
|
|
6
|
foreach my $node ( $self->tokenize( $xstr ) ) { |
72
|
102
|
|
|
|
|
165
|
$spec = substr( $node, 0, 2 ); |
73
|
102
|
100
|
|
|
|
219
|
if ( index( $spec, '<' ) != 0 ) { |
74
|
48
|
|
|
|
|
111
|
$self->_mk_text( $node, $stack[-1] ); |
75
|
48
|
|
|
|
|
71
|
next; |
76
|
|
|
|
|
|
|
} |
77
|
54
|
100
|
|
|
|
110
|
if ( $spec eq '' ) { |
78
|
22
|
|
|
|
|
24
|
pop @stack; |
79
|
22
|
|
|
|
|
30
|
next; |
80
|
|
|
|
|
|
|
} |
81
|
32
|
100
|
|
|
|
57
|
if ( $spec eq '
|
82
|
3
|
|
|
|
|
11
|
$self->_mk_decl( $node, $stack[-1] ); |
83
|
3
|
|
|
|
|
8
|
next; |
84
|
|
|
|
|
|
|
} |
85
|
29
|
100
|
|
|
|
53
|
if ( $spec eq '' ) { |
86
|
2
|
|
|
|
|
9
|
$self->_mk_proc( $node, $stack[-1] ); |
87
|
2
|
|
|
|
|
4
|
next; |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
27
|
|
|
|
|
63
|
$elmt = $self->_mk_elmt( $node, $stack[-1] ); |
91
|
27
|
100
|
|
|
|
85
|
push( @stack, $elmt ) unless ( $node =~ m{/>$} ); |
92
|
27
|
100
|
|
|
|
55
|
$xdoc->set( $elmt->{id} => $elmt ) if $elmt->{id} |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
2
|
|
|
|
|
28
|
return $xdoc; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
sub stream { |
99
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $xstr, $state ) = ( shift, shift, shift ); |
100
|
0
|
|
|
|
|
0
|
my %subs; |
101
|
0
|
0
|
0
|
|
|
0
|
if ( @_ == 1 and ref $_[0] eq 'HASH' ) { |
102
|
0
|
|
|
|
|
0
|
%subs = %{$_[0]}; |
|
0
|
|
|
|
|
0
|
|
103
|
|
|
|
|
|
|
} else { |
104
|
0
|
|
|
|
|
0
|
%subs = @_; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
0
|
|
0
|
|
|
0
|
my $text = delete $subs{text} || $subs{pass}; |
108
|
0
|
|
0
|
|
|
0
|
my $decl = delete $subs{decl} || $subs{pass}; |
109
|
0
|
|
0
|
|
|
0
|
my $proc = delete $subs{proc} || $subs{pass}; |
110
|
0
|
|
0
|
|
|
0
|
my $elmt = delete $subs{elmt} || $subs{pass}; |
111
|
0
|
|
0
|
|
|
0
|
my $elcl = delete $subs{elcl} || $subs{pass}; |
112
|
|
|
|
|
|
|
|
113
|
0
|
|
|
|
|
0
|
my ( $spec, $name, $copy, $atts, %atts ); |
114
|
0
|
|
|
|
|
0
|
foreach my $node ( $self->tokenize( $xstr ) ) { |
115
|
0
|
|
|
|
|
0
|
$spec = substr( $node, 0, 2 ); |
116
|
0
|
0
|
|
|
|
0
|
if ( index( $spec, '<' ) != 0 ) { |
117
|
0
|
0
|
|
|
|
0
|
$text && $text->( $state, $node ); |
118
|
0
|
|
|
|
|
0
|
next; |
119
|
|
|
|
|
|
|
} |
120
|
0
|
0
|
|
|
|
0
|
if ( $spec eq '' ) { |
121
|
0
|
0
|
|
|
|
0
|
$elcl && $elcl->( $state, substr( $node, 2, -1 ) ); |
122
|
0
|
|
|
|
|
0
|
next; |
123
|
|
|
|
|
|
|
} |
124
|
0
|
0
|
|
|
|
0
|
if ( $spec eq '
|
125
|
0
|
0
|
|
|
|
0
|
$decl && $decl->( $state, $node ); |
126
|
0
|
|
|
|
|
0
|
next; |
127
|
|
|
|
|
|
|
} |
128
|
0
|
0
|
|
|
|
0
|
if ( $spec eq '' ) { |
129
|
0
|
0
|
|
|
|
0
|
$proc && $proc->( $state, $node ); |
130
|
0
|
|
|
|
|
0
|
next; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
|
133
|
0
|
0
|
|
|
|
0
|
$elmt && do { |
134
|
0
|
|
|
|
|
0
|
$copy = substr( $node, 1, -1 ); |
135
|
0
|
|
|
|
|
0
|
( $name, $atts ) = split( /\s+/, $copy, 2 ); |
136
|
0
|
|
|
|
|
0
|
$name =~ s{/$}{}; |
137
|
0
|
0
|
|
|
|
0
|
%atts = $atts ? $self->parse_attributes( $atts ) : ( ); |
138
|
0
|
|
|
|
|
0
|
$elmt->( $state, $name, %atts ); |
139
|
|
|
|
|
|
|
}; |
140
|
|
|
|
|
|
|
|
141
|
0
|
0
|
|
|
|
0
|
if ( substr( $node, -2 ) eq '/>' ) { |
142
|
0
|
0
|
|
|
|
0
|
$elcl && $elcl->( $state, $name ); |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
sub read_file { |
148
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $file ) = @_; |
149
|
0
|
|
|
|
|
0
|
my $xstr; |
150
|
|
|
|
|
|
|
{ |
151
|
0
|
0
|
|
|
|
0
|
open FH, $file or return; |
|
0
|
|
|
|
|
0
|
|
152
|
0
|
|
|
|
|
0
|
local $/ = undef; |
153
|
0
|
|
|
|
|
0
|
$xstr = ; |
154
|
0
|
|
|
|
|
0
|
close FH; |
155
|
|
|
|
|
|
|
} |
156
|
0
|
|
|
|
|
0
|
return $xstr; |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
sub parse_file { |
160
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $file ) = @_; |
161
|
0
|
|
|
|
|
0
|
return $self->parse( $self->read_file( $file ) ); |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
sub stream_file { |
165
|
0
|
|
|
0
|
1
|
0
|
my ( $self, $file, $state, %subs ) = @_; |
166
|
0
|
|
|
|
|
0
|
return $self->stream( $self->read_file( $file ), $state, %subs ); |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
sub parse_as_fragment { |
170
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $xstr ) = @_; |
171
|
0
|
|
|
|
|
0
|
my $xdoc = $self->parse( ''.$xstr.'' ); |
172
|
0
|
|
|
|
|
0
|
my $root = $xdoc->root; |
173
|
0
|
|
|
|
|
0
|
my $frag = YAX::Fragment->new; |
174
|
0
|
|
|
|
|
0
|
$frag->append( $root->[0] ) while @$root; |
175
|
0
|
|
|
|
|
0
|
return $frag; |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
sub parse_file_as_fragment { |
179
|
0
|
|
|
0
|
0
|
0
|
my ( $self, $file ) = @_; |
180
|
0
|
|
|
|
|
0
|
my $xstr = $self->read_file( $file ); |
181
|
0
|
|
|
|
|
0
|
my $frag = $self->parse_as_fragment( $xstr ); |
182
|
0
|
|
|
|
|
0
|
return $frag; |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
sub tokenize { |
186
|
4
|
|
|
4
|
1
|
9
|
my ( $self, $xstr ) = @_; |
187
|
4
|
|
|
|
|
1199
|
return $xstr =~ /$XML_SPE/g; |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
sub _mk_decl { |
191
|
3
|
|
|
3
|
|
7
|
my ( $self, $decl, $parent ) = @_; |
192
|
3
|
|
|
|
|
4
|
my ( $type, $name ); |
193
|
3
|
|
|
|
|
4
|
my $offset = 1; |
194
|
3
|
|
|
|
|
5
|
my $length = length( $decl ); |
195
|
|
|
|
|
|
|
|
196
|
3
|
100
|
|
|
|
8
|
substr( $decl, 0, 4 ) eq ' |