| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package CFDI::Parser::XML; |
|
2
|
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
5
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
22
|
|
|
4
|
1
|
|
|
1
|
|
213
|
use CFDI::Constants::Class; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
45
|
|
|
5
|
1
|
|
|
1
|
|
207
|
use CFDI::Regex::XML; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
1092
|
|
|
6
|
|
|
|
|
|
|
require Exporter; |
|
7
|
|
|
|
|
|
|
our @EXPORT = qw(parse); |
|
8
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
|
9
|
|
|
|
|
|
|
our $VERSION = 0.85; |
|
10
|
|
|
|
|
|
|
our $BUFLEN = 256; |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=todo |
|
13
|
|
|
|
|
|
|
namespaces... |
|
14
|
|
|
|
|
|
|
#processing instructions |
|
15
|
|
|
|
|
|
|
#entities() < & & " something; |
|
16
|
|
|
|
|
|
|
$attr{'xml:space'} eq 'default'){ #remove space |
|
17
|
|
|
|
|
|
|
=cut |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
sub parse(_){ |
|
20
|
0
|
|
|
0
|
0
|
|
my $file = shift; |
|
21
|
0
|
0
|
|
|
|
|
die "file required$/" unless defined $file; |
|
22
|
0
|
|
|
|
|
|
local $_ = ''; |
|
23
|
0
|
0
|
0
|
|
|
|
die "cannot access file $file$/" unless -e $file && -r _; |
|
24
|
0
|
0
|
|
|
|
|
open(XML,'<:encoding(UTF-8)',$file) or die "cannot open file $file as UTF-8: $!$/"; |
|
25
|
0
|
|
|
|
|
|
my ($t,$squote,$dquote,$cmntOpen,$char,$buf,@tokns,$dec,$hasTags) = (0,0,0,0); |
|
26
|
0
|
0
|
|
0
|
|
|
local $SIG{__DIE__} = sub {close XML or warn "cannot close file $file: $!$/"}; |
|
|
0
|
|
|
|
|
|
|
|
27
|
0
|
|
|
|
|
|
my ($chars,$buffer,$BOM); |
|
28
|
0
|
0
|
|
|
|
|
die "file required$/" unless defined $file; |
|
29
|
0
|
0
|
0
|
|
|
|
die "cannot access file $file$/" unless -e $file && -r _; |
|
30
|
0
|
|
|
|
|
|
$chars = sysread XML,$buffer,1; |
|
31
|
0
|
0
|
|
|
|
|
die "error reading first char$/" unless defined $chars; |
|
32
|
0
|
0
|
|
|
|
|
die "file $file is empty$/" unless $chars; |
|
33
|
0
|
0
|
|
|
|
|
$BOM = 65279 == ord $buffer ? 1 : 0; |
|
34
|
0
|
|
|
|
|
|
local $_; |
|
35
|
|
|
|
|
|
|
# RD1: $chars = sysread XML,$buffer,1; |
|
36
|
|
|
|
|
|
|
# die "error reading file $file$/" unless defined $chars; |
|
37
|
|
|
|
|
|
|
# die "parsing error at: $_$/" unless $chars; |
|
38
|
|
|
|
|
|
|
# $_ .= $buffer; |
|
39
|
|
|
|
|
|
|
# goto RD1 if -1 == index $_,'>'; |
|
40
|
|
|
|
|
|
|
# die "declaration error: $_$/" unless s/^<\?xml($qr_at*)\?>//s; |
|
41
|
|
|
|
|
|
|
# $attr = $1; |
|
42
|
|
|
|
|
|
|
# push @attr,$1,substr$2,1,-1 while defined $attr && $attr=~s/\s*($qr_na)\s*=\s*($qr_va)\s*//; |
|
43
|
|
|
|
|
|
|
# exists $n{$_} ? die "attribute '$_' is not unique$/" : $n{$_}++ for grep ++$i%2, @attr; |
|
44
|
|
|
|
|
|
|
# %attr = @attr; |
|
45
|
|
|
|
|
|
|
# die "bad xml 1.0 declaration$/" if grep !/^(?:version|encoding|standalone)$/, keys %attr; |
|
46
|
|
|
|
|
|
|
# if(exists $attr{version}){ |
|
47
|
|
|
|
|
|
|
# if(!defined $attr{version} || $attr{version} ne '1.0'){ |
|
48
|
|
|
|
|
|
|
# die "xml version 1.0 only$/"}} |
|
49
|
|
|
|
|
|
|
# if(exists $attr{standalone}){ |
|
50
|
|
|
|
|
|
|
# if(!defined $attr{standalone} || $attr{standalone} !~ /^(?:yes|no)$/){ |
|
51
|
|
|
|
|
|
|
# die "standalone error declaration$/"}} |
|
52
|
|
|
|
|
|
|
# if(exists $attr{encoding}){ |
|
53
|
|
|
|
|
|
|
# die "encoding error declaration$/" if !defined $attr{encoding} || $attr{encoding} !~ m!^UTF[-_ /]?8$!i} |
|
54
|
|
|
|
|
|
|
# $dec = bless \@attr,DECLARATION; |
|
55
|
0
|
0
|
|
|
|
|
my ($buffer2,$buffer1) = ($BOM ? '' : $buffer); |
|
56
|
0
|
|
0
|
|
|
|
while(length($buffer2) || ($char = sysread XML,$buffer1,$BUFLEN) || length){ |
|
|
|
|
0
|
|
|
|
|
|
57
|
0
|
0
|
|
|
|
|
if(length $buffer2){ |
|
|
|
0
|
|
|
|
|
|
|
58
|
0
|
|
|
|
|
|
$char = 0; |
|
59
|
|
|
|
|
|
|
}elsif($char){ |
|
60
|
0
|
|
|
|
|
|
$buffer2 = $buffer1; |
|
61
|
0
|
|
|
|
|
|
undef $buffer1; |
|
62
|
|
|
|
|
|
|
}else{ |
|
63
|
0
|
|
|
|
|
|
s/^\s*|\s*$//; |
|
64
|
0
|
0
|
|
|
|
|
$_ = "<$_" if $t; |
|
65
|
0
|
0
|
|
|
|
|
die "parsing error: $_$/" if length; |
|
66
|
0
|
|
|
|
|
|
last; |
|
67
|
|
|
|
|
|
|
} |
|
68
|
0
|
|
|
|
|
|
$buf = substr $buffer2,0,1,''; |
|
69
|
0
|
0
|
0
|
|
|
|
if($buf eq '<' && !$cmntOpen){ |
|
|
|
0
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
70
|
0
|
0
|
|
|
|
|
die "parsing error: <$_<$buffer2$/" if $t == 1; |
|
71
|
0
|
|
|
|
|
|
$t = 1; |
|
72
|
0
|
0
|
|
|
|
|
if(length){ |
|
73
|
0
|
0
|
0
|
|
|
|
die "parsing error: $_<$buffer2$/" if !$hasTags && /\S/; |
|
74
|
0
|
|
|
|
|
|
my $text = $_; |
|
75
|
0
|
|
|
|
|
|
$tokns[$#tokns+1] = bless \$text,TEXT; |
|
76
|
0
|
|
|
|
|
|
$_ = ''; |
|
77
|
|
|
|
|
|
|
} |
|
78
|
|
|
|
|
|
|
}elsif($t && $buf eq '>' && !$squote && !$dquote && (!$cmntOpen || (5 <= length $_ && '--' eq substr $_,-2)) ){ |
|
79
|
0
|
0
|
|
|
|
|
die "parsing error: <$_>$buffer2$/" unless /$qr_ta/; |
|
80
|
0
|
|
|
|
|
|
$t = 0; |
|
81
|
0
|
0
|
0
|
|
|
|
if(defined $1 && length $1){ |
|
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
82
|
0
|
|
|
|
|
|
my ($name,$attr,$slsh,@attr,%n,$i) = ($1,$2,$3); |
|
83
|
0
|
|
0
|
|
|
|
push @attr,$1,substr$2,1,-1 while defined $attr && $attr=~s/\s*($qr_na)\s*=\s*($qr_va)\s*//; |
|
84
|
0
|
|
|
|
|
|
my $data = $_; |
|
85
|
0
|
0
|
|
|
|
|
exists $n{$_} ? die "parsing error: attribute '$_' is not unique at <$data>$buffer2$/" : $n{$_}++ for grep ++$i%2, @attr; |
|
86
|
|
|
|
|
|
|
#parse namespaces |
|
87
|
0
|
0
|
|
|
|
|
$attr = $#attr+1 ? bless \@attr,ATTRIBUTES : undef; |
|
88
|
0
|
|
|
|
|
|
my $Name = bless \$name,NAME; |
|
89
|
0
|
0
|
|
|
|
|
my $token = $attr ? [$Name,$attr] : [$Name]; |
|
90
|
0
|
0
|
0
|
|
|
|
bless $token,ELEMENT if defined $slsh && length $slsh; |
|
91
|
0
|
|
|
|
|
|
$hasTags = 1; |
|
92
|
0
|
|
|
|
|
|
$tokns[$#tokns+1] = $token; |
|
93
|
|
|
|
|
|
|
}elsif(defined $4 && length $4){#closing tag - check for content and former opening tag |
|
94
|
0
|
|
|
|
|
|
my $name = $4; |
|
95
|
0
|
|
|
|
|
|
my $i = $#tokns; |
|
96
|
0
|
|
|
|
|
|
my $found = 0; |
|
97
|
0
|
|
|
|
|
|
my @content; |
|
98
|
0
|
|
|
|
|
|
while($i >= 0){ |
|
99
|
0
|
|
|
|
|
|
my $token = $tokns[$i]; |
|
100
|
0
|
0
|
|
|
|
|
if(ref $token eq 'ARRAY'){ |
|
101
|
0
|
0
|
|
|
|
|
die "parsing error: <$_>$buffer2$/" unless ${$$token[0]} eq $name; |
|
|
0
|
|
|
|
|
|
|
|
102
|
0
|
|
|
|
|
|
$found = 1; |
|
103
|
0
|
|
|
|
|
|
if(0 && (my ($attr) = grep ref eq ATTRIBUTES,@$token)){ |
|
104
|
|
|
|
|
|
|
my %attr = @$attr; |
|
105
|
|
|
|
|
|
|
if(defined $attr{'xml:space'} && $attr{'xml:space'} eq 'default'){ |
|
106
|
|
|
|
|
|
|
#remove space |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
} |
|
109
|
0
|
|
|
|
|
|
$$token[$#$token+1] = bless \@content,CONTENT; |
|
110
|
0
|
|
|
|
|
|
bless $token,ELEMENT; |
|
111
|
0
|
|
|
|
|
|
last; |
|
112
|
|
|
|
|
|
|
}else{ |
|
113
|
0
|
|
|
|
|
|
unshift @content,splice @tokns,$i,1; |
|
114
|
|
|
|
|
|
|
} |
|
115
|
0
|
|
|
|
|
|
$i--; |
|
116
|
|
|
|
|
|
|
} |
|
117
|
0
|
0
|
|
|
|
|
die "parsing error: <$_>$buffer2$/" unless $found; |
|
118
|
|
|
|
|
|
|
}elsif(defined $5 && length $5){#comment |
|
119
|
0
|
|
|
|
|
|
$cmntOpen = 0; |
|
120
|
|
|
|
|
|
|
#$tokns[$#tokns+1] = $_; #contains !-- -- |
|
121
|
0
|
|
|
|
|
|
my $comment = $5; |
|
122
|
0
|
|
|
|
|
|
$tokns[$#tokns+1] = bless \$comment,COMMENT; |
|
123
|
|
|
|
|
|
|
}elsif(defined $6 && length $6){#instruction |
|
124
|
0
|
|
|
|
|
|
my $instr = $6; |
|
125
|
0
|
|
|
|
|
|
$tokns[$#tokns+1] = bless \$instr,INSTRUCTION; |
|
126
|
|
|
|
|
|
|
}else{ |
|
127
|
0
|
|
|
|
|
|
die "parsing error: <$_>$buffer2$/"; |
|
128
|
|
|
|
|
|
|
} |
|
129
|
0
|
|
|
|
|
|
$_ = ''; |
|
130
|
|
|
|
|
|
|
}else{ |
|
131
|
0
|
0
|
0
|
|
|
|
$cmntOpen = 1 if $_ eq '!-' && $buf eq '-' && $t; |
|
|
|
|
0
|
|
|
|
|
|
132
|
0
|
0
|
0
|
|
|
|
$squote = !$squote if $buf eq "'" && $t && !($dquote || $cmntOpen); |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
133
|
0
|
0
|
0
|
|
|
|
$dquote = !$dquote if $buf eq '"' && $t && !($squote || $cmntOpen); |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
134
|
0
|
|
|
|
|
|
$_ .= $buf; |
|
135
|
|
|
|
|
|
|
} |
|
136
|
|
|
|
|
|
|
} |
|
137
|
0
|
0
|
|
|
|
|
die "error reading file $file$/" unless defined $char; |
|
138
|
0
|
0
|
|
|
|
|
close XML or warn "cannot close file $file: $!$/"; |
|
139
|
0
|
0
|
0
|
|
|
|
die "uncommented text was found$/" if grep ref eq TEXT && $$_=~/S/,@tokns; |
|
140
|
0
|
|
|
|
|
|
my @elements = grep ref eq ELEMENT,@tokns; |
|
141
|
0
|
0
|
|
|
|
|
die "error identifying content$/" if $#elements == -1; |
|
142
|
0
|
0
|
|
|
|
|
die "error identifying root$/" if $#elements; |
|
143
|
0
|
|
|
|
|
|
my $cfdi = bless \@tokns,CONTENT; |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
1; |