line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package CFDI::Parser::XML; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
22
|
|
4
|
1
|
|
|
1
|
|
213
|
use CFDI::Constants::Class; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
45
|
|
5
|
1
|
|
|
1
|
|
207
|
use CFDI::Regex::XML; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
1092
|
|
6
|
|
|
|
|
|
|
require Exporter; |
7
|
|
|
|
|
|
|
our @EXPORT = qw(parse); |
8
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
9
|
|
|
|
|
|
|
our $VERSION = 0.85; |
10
|
|
|
|
|
|
|
our $BUFLEN = 256; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=todo |
13
|
|
|
|
|
|
|
namespaces... |
14
|
|
|
|
|
|
|
#processing instructions |
15
|
|
|
|
|
|
|
#entities() < & & " something; |
16
|
|
|
|
|
|
|
$attr{'xml:space'} eq 'default'){ #remove space |
17
|
|
|
|
|
|
|
=cut |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
sub parse(_){ |
20
|
0
|
|
|
0
|
0
|
|
my $file = shift; |
21
|
0
|
0
|
|
|
|
|
die "file required$/" unless defined $file; |
22
|
0
|
|
|
|
|
|
local $_ = ''; |
23
|
0
|
0
|
0
|
|
|
|
die "cannot access file $file$/" unless -e $file && -r _; |
24
|
0
|
0
|
|
|
|
|
open(XML,'<:encoding(UTF-8)',$file) or die "cannot open file $file as UTF-8: $!$/"; |
25
|
0
|
|
|
|
|
|
my ($t,$squote,$dquote,$cmntOpen,$char,$buf,@tokns,$dec,$hasTags) = (0,0,0,0); |
26
|
0
|
0
|
|
0
|
|
|
local $SIG{__DIE__} = sub {close XML or warn "cannot close file $file: $!$/"}; |
|
0
|
|
|
|
|
|
|
27
|
0
|
|
|
|
|
|
my ($chars,$buffer,$BOM); |
28
|
0
|
0
|
|
|
|
|
die "file required$/" unless defined $file; |
29
|
0
|
0
|
0
|
|
|
|
die "cannot access file $file$/" unless -e $file && -r _; |
30
|
0
|
|
|
|
|
|
$chars = sysread XML,$buffer,1; |
31
|
0
|
0
|
|
|
|
|
die "error reading first char$/" unless defined $chars; |
32
|
0
|
0
|
|
|
|
|
die "file $file is empty$/" unless $chars; |
33
|
0
|
0
|
|
|
|
|
$BOM = 65279 == ord $buffer ? 1 : 0; |
34
|
0
|
|
|
|
|
|
local $_; |
35
|
|
|
|
|
|
|
# RD1: $chars = sysread XML,$buffer,1; |
36
|
|
|
|
|
|
|
# die "error reading file $file$/" unless defined $chars; |
37
|
|
|
|
|
|
|
# die "parsing error at: $_$/" unless $chars; |
38
|
|
|
|
|
|
|
# $_ .= $buffer; |
39
|
|
|
|
|
|
|
# goto RD1 if -1 == index $_,'>'; |
40
|
|
|
|
|
|
|
# die "declaration error: $_$/" unless s/^<\?xml($qr_at*)\?>//s; |
41
|
|
|
|
|
|
|
# $attr = $1; |
42
|
|
|
|
|
|
|
# push @attr,$1,substr$2,1,-1 while defined $attr && $attr=~s/\s*($qr_na)\s*=\s*($qr_va)\s*//; |
43
|
|
|
|
|
|
|
# exists $n{$_} ? die "attribute '$_' is not unique$/" : $n{$_}++ for grep ++$i%2, @attr; |
44
|
|
|
|
|
|
|
# %attr = @attr; |
45
|
|
|
|
|
|
|
# die "bad xml 1.0 declaration$/" if grep !/^(?:version|encoding|standalone)$/, keys %attr; |
46
|
|
|
|
|
|
|
# if(exists $attr{version}){ |
47
|
|
|
|
|
|
|
# if(!defined $attr{version} || $attr{version} ne '1.0'){ |
48
|
|
|
|
|
|
|
# die "xml version 1.0 only$/"}} |
49
|
|
|
|
|
|
|
# if(exists $attr{standalone}){ |
50
|
|
|
|
|
|
|
# if(!defined $attr{standalone} || $attr{standalone} !~ /^(?:yes|no)$/){ |
51
|
|
|
|
|
|
|
# die "standalone error declaration$/"}} |
52
|
|
|
|
|
|
|
# if(exists $attr{encoding}){ |
53
|
|
|
|
|
|
|
# die "encoding error declaration$/" if !defined $attr{encoding} || $attr{encoding} !~ m!^UTF[-_ /]?8$!i} |
54
|
|
|
|
|
|
|
# $dec = bless \@attr,DECLARATION; |
55
|
0
|
0
|
|
|
|
|
my ($buffer2,$buffer1) = ($BOM ? '' : $buffer); |
56
|
0
|
|
0
|
|
|
|
while(length($buffer2) || ($char = sysread XML,$buffer1,$BUFLEN) || length){ |
|
|
|
0
|
|
|
|
|
57
|
0
|
0
|
|
|
|
|
if(length $buffer2){ |
|
|
0
|
|
|
|
|
|
58
|
0
|
|
|
|
|
|
$char = 0; |
59
|
|
|
|
|
|
|
}elsif($char){ |
60
|
0
|
|
|
|
|
|
$buffer2 = $buffer1; |
61
|
0
|
|
|
|
|
|
undef $buffer1; |
62
|
|
|
|
|
|
|
}else{ |
63
|
0
|
|
|
|
|
|
s/^\s*|\s*$//; |
64
|
0
|
0
|
|
|
|
|
$_ = "<$_" if $t; |
65
|
0
|
0
|
|
|
|
|
die "parsing error: $_$/" if length; |
66
|
0
|
|
|
|
|
|
last; |
67
|
|
|
|
|
|
|
} |
68
|
0
|
|
|
|
|
|
$buf = substr $buffer2,0,1,''; |
69
|
0
|
0
|
0
|
|
|
|
if($buf eq '<' && !$cmntOpen){ |
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
70
|
0
|
0
|
|
|
|
|
die "parsing error: <$_<$buffer2$/" if $t == 1; |
71
|
0
|
|
|
|
|
|
$t = 1; |
72
|
0
|
0
|
|
|
|
|
if(length){ |
73
|
0
|
0
|
0
|
|
|
|
die "parsing error: $_<$buffer2$/" if !$hasTags && /\S/; |
74
|
0
|
|
|
|
|
|
my $text = $_; |
75
|
0
|
|
|
|
|
|
$tokns[$#tokns+1] = bless \$text,TEXT; |
76
|
0
|
|
|
|
|
|
$_ = ''; |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
}elsif($t && $buf eq '>' && !$squote && !$dquote && (!$cmntOpen || (5 <= length $_ && '--' eq substr $_,-2)) ){ |
79
|
0
|
0
|
|
|
|
|
die "parsing error: <$_>$buffer2$/" unless /$qr_ta/; |
80
|
0
|
|
|
|
|
|
$t = 0; |
81
|
0
|
0
|
0
|
|
|
|
if(defined $1 && length $1){ |
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
82
|
0
|
|
|
|
|
|
my ($name,$attr,$slsh,@attr,%n,$i) = ($1,$2,$3); |
83
|
0
|
|
0
|
|
|
|
push @attr,$1,substr$2,1,-1 while defined $attr && $attr=~s/\s*($qr_na)\s*=\s*($qr_va)\s*//; |
84
|
0
|
|
|
|
|
|
my $data = $_; |
85
|
0
|
0
|
|
|
|
|
exists $n{$_} ? die "parsing error: attribute '$_' is not unique at <$data>$buffer2$/" : $n{$_}++ for grep ++$i%2, @attr; |
86
|
|
|
|
|
|
|
#parse namespaces |
87
|
0
|
0
|
|
|
|
|
$attr = $#attr+1 ? bless \@attr,ATTRIBUTES : undef; |
88
|
0
|
|
|
|
|
|
my $Name = bless \$name,NAME; |
89
|
0
|
0
|
|
|
|
|
my $token = $attr ? [$Name,$attr] : [$Name]; |
90
|
0
|
0
|
0
|
|
|
|
bless $token,ELEMENT if defined $slsh && length $slsh; |
91
|
0
|
|
|
|
|
|
$hasTags = 1; |
92
|
0
|
|
|
|
|
|
$tokns[$#tokns+1] = $token; |
93
|
|
|
|
|
|
|
}elsif(defined $4 && length $4){#closing tag - check for content and former opening tag |
94
|
0
|
|
|
|
|
|
my $name = $4; |
95
|
0
|
|
|
|
|
|
my $i = $#tokns; |
96
|
0
|
|
|
|
|
|
my $found = 0; |
97
|
0
|
|
|
|
|
|
my @content; |
98
|
0
|
|
|
|
|
|
while($i >= 0){ |
99
|
0
|
|
|
|
|
|
my $token = $tokns[$i]; |
100
|
0
|
0
|
|
|
|
|
if(ref $token eq 'ARRAY'){ |
101
|
0
|
0
|
|
|
|
|
die "parsing error: <$_>$buffer2$/" unless ${$$token[0]} eq $name; |
|
0
|
|
|
|
|
|
|
102
|
0
|
|
|
|
|
|
$found = 1; |
103
|
0
|
|
|
|
|
|
if(0 && (my ($attr) = grep ref eq ATTRIBUTES,@$token)){ |
104
|
|
|
|
|
|
|
my %attr = @$attr; |
105
|
|
|
|
|
|
|
if(defined $attr{'xml:space'} && $attr{'xml:space'} eq 'default'){ |
106
|
|
|
|
|
|
|
#remove space |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
} |
109
|
0
|
|
|
|
|
|
$$token[$#$token+1] = bless \@content,CONTENT; |
110
|
0
|
|
|
|
|
|
bless $token,ELEMENT; |
111
|
0
|
|
|
|
|
|
last; |
112
|
|
|
|
|
|
|
}else{ |
113
|
0
|
|
|
|
|
|
unshift @content,splice @tokns,$i,1; |
114
|
|
|
|
|
|
|
} |
115
|
0
|
|
|
|
|
|
$i--; |
116
|
|
|
|
|
|
|
} |
117
|
0
|
0
|
|
|
|
|
die "parsing error: <$_>$buffer2$/" unless $found; |
118
|
|
|
|
|
|
|
}elsif(defined $5 && length $5){#comment |
119
|
0
|
|
|
|
|
|
$cmntOpen = 0; |
120
|
|
|
|
|
|
|
#$tokns[$#tokns+1] = $_; #contains !-- -- |
121
|
0
|
|
|
|
|
|
my $comment = $5; |
122
|
0
|
|
|
|
|
|
$tokns[$#tokns+1] = bless \$comment,COMMENT; |
123
|
|
|
|
|
|
|
}elsif(defined $6 && length $6){#instruction |
124
|
0
|
|
|
|
|
|
my $instr = $6; |
125
|
0
|
|
|
|
|
|
$tokns[$#tokns+1] = bless \$instr,INSTRUCTION; |
126
|
|
|
|
|
|
|
}else{ |
127
|
0
|
|
|
|
|
|
die "parsing error: <$_>$buffer2$/"; |
128
|
|
|
|
|
|
|
} |
129
|
0
|
|
|
|
|
|
$_ = ''; |
130
|
|
|
|
|
|
|
}else{ |
131
|
0
|
0
|
0
|
|
|
|
$cmntOpen = 1 if $_ eq '!-' && $buf eq '-' && $t; |
|
|
|
0
|
|
|
|
|
132
|
0
|
0
|
0
|
|
|
|
$squote = !$squote if $buf eq "'" && $t && !($dquote || $cmntOpen); |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
133
|
0
|
0
|
0
|
|
|
|
$dquote = !$dquote if $buf eq '"' && $t && !($squote || $cmntOpen); |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
134
|
0
|
|
|
|
|
|
$_ .= $buf; |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
} |
137
|
0
|
0
|
|
|
|
|
die "error reading file $file$/" unless defined $char; |
138
|
0
|
0
|
|
|
|
|
close XML or warn "cannot close file $file: $!$/"; |
139
|
0
|
0
|
0
|
|
|
|
die "uncommented text was found$/" if grep ref eq TEXT && $$_=~/S/,@tokns; |
140
|
0
|
|
|
|
|
|
my @elements = grep ref eq ELEMENT,@tokns; |
141
|
0
|
0
|
|
|
|
|
die "error identifying content$/" if $#elements == -1; |
142
|
0
|
0
|
|
|
|
|
die "error identifying root$/" if $#elements; |
143
|
0
|
|
|
|
|
|
my $cfdi = bless \@tokns,CONTENT; |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
1; |