File Coverage

lib/XML/DOM/Lite/Parser.pm
Criterion Covered Total %
statement 116 143 81.1
branch 36 52 69.2
condition n/a
subroutine 17 19 89.4
pod 0 3 0.0
total 169 217 77.8


line stmt bran cond sub pod time code
1             package XML::DOM::Lite::Parser;
2 8     8   14150 use warnings;
  8         16  
  8         212  
3 8     8   36 use strict;
  8         12  
  8         131  
4              
5 8     8   1966 use XML::DOM::Lite::Document;
  8         29  
  8         203  
6 8     8   48 use XML::DOM::Lite::Node;
  8         10  
  8         148  
7 8     8   44 use XML::DOM::Lite::Constants qw(:all);
  8         15  
  8         14098  
8              
9             #========================================================================
10             # These regular expressions have been gratefully borrowed from:
11             #
12             # REX/Perl 1.0
13             # Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions",
14             # Technical Report TR 1998-17, School of Computing Science, Simon Fraser
15             # University, November, 1998.
16             # Copyright (c) 1998, Robert D. Cameron.
17             # The following code may be freely used and distributed provided that
18             # this copyright and citation notice remains intact and that modifications
19             # or additions are clearly identified.
20              
21             our $TextSE = "[^<]+";
22             our $UntilHyphen = "[^-]*-";
23             our $Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-";
24             our $CommentCE = "$Until2Hyphens>?";
25             our $UntilRSBs = "[^\\]]*](?:[^\\]]+])*]+";
26             our $CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>";
27             our $S = "[ \\n\\t\\r]+";
28             our $NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]";
29             our $NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]";
30             our $Name = "(?:$NameStrt)(?:$NameChar)*";
31             our $QuoteSE = "\"[^\"]*\"|'[^']*'";
32             our $DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*";
33             our $MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>";
34             our $S1 = "[\\n\\r\\t ]";
35             our $UntilQMs = "[^?]*\\?+";
36             our $PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>";
37             our $DT_ItemSE = "<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S";
38             our $DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*](?:$S)?)?>?";
39             our $DeclCE = "--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?";
40             our $PI_CE = "$Name(?:$PI_Tail)?";
41             our $EndTagCE = "$Name(?:$S)?>?";
42             our $AttValSE = "\"[^<\"]*\"|'[^<']*'";
43             our $ElemTagCE = "$Name(?:$S$Name(?:$S)?=(?:$S)?(?:$AttValSE))*(?:$S)?/?>?";
44             our $ElementCE = "/(?:$EndTagCE)?|(?:$ElemTagCE)?";
45             our $MarkupSPE = "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|(?:$ElementCE)?)";
46             our $XML_SPE = "$TextSE|$MarkupSPE";
47              
48             #========================================================================
49              
50             # these have captures for parsing the attributes
51             our $AttValSE2 = "\"([^<\"]*)\"|'([^<']*)'";
52             our $ElemTagCE2 = "(?:($Name)(?:$S)?=(?:$S)?($AttValSE2))+(?:$S)?/?>?";
53              
54             sub new {
55 11     11 0 156 my ($class, %options) = @_;
56 11         53 my $self = bless {
57             stack => [ ],
58             options => \%options,
59             }, $class;
60 11         69 return $self;
61             }
62              
63             sub parse {
64 12     12 0 3975 my ($self, $XML) = (shift, shift);
65 12 50       45 unless (ref($self)) {
66 0         0 $self = __PACKAGE__->new(@_);
67             }
68 12         47 my @nodes = $self->_shallow_parse($XML);
69              
70 12         133 $self->{document} = XML::DOM::Lite::Document->new();
71 12         22 push @{$self->{stack}}, $self->{document};
  12         31  
72              
73 12         29 STEP : foreach my $n ( @nodes ) {
74 103 100       265 substr($n, 0, 1) eq '<' && do {
75 76 100       161 substr($n, 1, 1) eq '!' && do {
76 1         4 $self->_handle_decl_node($n);
77 1         2 next STEP;
78             };
79 75 100       168 substr($n, 1, 1) eq '?' && do {
80 1         5 $self->_handle_pi_node($n);
81 1         3 next STEP;
82             };
83 74         201 $self->_handle_element_node($n);
84 74         144 next STEP;
85             };
86 27         66 $self->_handle_text_node($n);
87             }
88              
89 12         44 return $self->{document};
90             }
91              
92             sub parseFile {
93 0     0 0 0 my ($self, $filename) = @_;
94 0 0       0 unless (ref $self) {
95 0         0 $self = __PACKAGE__->new;
96             }
97 0         0 my $stream;
98             {
99 0 0       0 open FH, '<', $filename or
  0         0  
100             die "can't open file $filename for reading: $!";
101 0         0 local $/ = undef;
102 0         0 $stream = ;
103 0         0 close FH;
104             }
105 0         0 return $self->parse($stream);
106             }
107              
108             sub _shallow_parse {
109 12     12   35 my ($self, $XML) = @_;
110              
111             # Check the options.
112 12         21 my %options = %{$self->{options}};
  12         57  
113 12 100       48 if (defined($options{'whitespace'})) {
114 11         27 my $mode = $options{'whitespace'};
115 11 100       107 if (index($mode, 'strip') >= 0) {
116 9         243 $XML =~ s/>$S/>/sg;
117 9         129 $XML =~ s/$S
118             }
119 11 100       51 if (index($mode, 'normalize') >= 0) {
120 2         42 $XML =~ s/$S/ /sg
121             }
122             }
123              
124 12         2685 return $XML =~ /$XML_SPE/go;
125             }
126              
127             sub _handle_decl_node {
128 1     1   3 my ($self, $decl) = @_;
129 1         2 my $kind;
130 1         2 my $length = length($decl);
131 1         1 my $start = 1;
132 1         5 my $parent = $self->{stack}->[$#{$self->{stack}}];
  1         4  
133 1 50       5 substr($decl, 0, 4) eq '