|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # $Id: Nested.pm 4647 2010-03-09 18:10:10Z chris $  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 HTML::PullParser::Nested - Wrapper around HTML::PullParser with awareness of tag nesting.  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 use HTML::PullParser::Nested;  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $p = HTML::PullParser::Nested->new(  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         doc         => \ "......",  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         start       => "'S',tagname,attr,attrseq,text",  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         end         => "'E',tagname,text",  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         text        => "'T',text,is_cdata",  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         );  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     while (my $token = $p->get_token()) {  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if ($token->[0] eq "S" && $token->[1] eq "ul") {  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $p->push_nest($token);  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             print "List:\n";  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             while (my $token = $p->get_token()) {  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 if ($token->[0] eq "S" && $token->[1] eq "li") {  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     print $p->get_token()->[1], "\n";  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             print "\n";  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $p->pop_nest();  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This class is a wrapper around HTML::PullParser with awareness of the nesting  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of tags.  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 There is a cursor, which points to the current position within the   | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 document.  It should be thought of as pointing to the start of the   | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 next token, or to 'EOL' (eof of level).  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Tokens can be read sequentially, and the cursor will be advanced after   | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 each read.  They can also be unread, reversing any effects of their having  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 been read.  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 As noted, the class is aware of tag nesting, giving the concept of   | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 nesting levels.  Level 1 encompasses the whole document.  As any point  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a new nesting level can be pushed on, specifying a tag type.  In effect,   | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the parser then behaves as if it had instead been opened on a document  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 only containing the content up the closing tag.  It is then possible to  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 pop a nesting level, which then moves the cursor to the start of the   | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 closing tag and switches to the parent nesting level.  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package HTML::PullParser::Nested;  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
60
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
823
 | 
 use strict;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
    | 
| 
61
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
5
 | 
 use warnings;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
    | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION = '0.04';  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
65
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
5
 | 
 use Carp;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
85
 | 
    | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
67
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
906
 | 
 use HTML::PullParser;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13856
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2712
 | 
    | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 METHODS  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 new(file => $file, %options), new(doc => \$doc, %options)  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Constructor.  %options gets passed to the encapsulated HTML::PullParser   | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 object and largely has the same restrictions.  As HTML::PullParser::Nested  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 needs to be able to process tokens returned by HTML::PullParser, there are  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 some restrictions on the argspecs for each token type.  Firstly, so that  | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the token type can be identified, either event, or distinct literal strings  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 must be present at the same array index in the argspec for each returned  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 token type.  For start and end tags, tagname must also be present somewhere.  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 get_token()  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Read and return the next token and advance the cursor.  If the cursor   | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 points to eol, undef will be returned on the first read attempt, and   | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 an error raised thereafter.  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 unget_token(@tokens)  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Reverse the effects of get_token().  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 eol()  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 End of level flag.  Returns true after get_token() has returned undef to   | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 signify end of level.  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 push_nest($token)  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Push a new nesting level onto the stack.  $token should be on start   | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 tag.  The current level will now correspond of all tags up to the   | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 corresponding close tag.  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The corresponding closing tag is determined by counting the start and   | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 end tags of the current nesting level.  This means that if   | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is encountered whilst the current nesting level is tracking  tags,   | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the parser will end either end up 3 tags deeper or at the same depth   | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 depending whether push_nest(), pop_nest() are called for the  tag.   | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 It is safe to call push_nest() twice for the same tag type.  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 pop_nest()  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Pop a nesting level from the stack.  Skips to the end of the current  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 nesting level if necessary.  | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {  | 
| 
128
 | 
23
 | 
 
 | 
 
 | 
  
23
  
 | 
  
1
  
 | 
1400
 | 
     my $class = shift;  | 
| 
129
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
90
 | 
     my %args = @_;  | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
131
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
     my $self = {};  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
133
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
144
 | 
     bless $self, $class;  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
135
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
91
 | 
     $self->_parse_argspecs(%args);  | 
| 
136
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
165
 | 
     $self->{'p'} = HTML::PullParser->new(%args);  | 
| 
137
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1455
 | 
     $self->{'nest'} = [{'tagname' => undef, 'depth' => 0}];  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
139
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
65
 | 
     return $self;  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub push_nest {  | 
| 
144
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
  
1
  
 | 
61
 | 
     my $self = shift;  | 
| 
145
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
     my $token = shift;  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
147
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     my $tagname = $self->_canon_token($token)->[1];;  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
149
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
     unshift @{$self->{'nest'}}, {'tagname' => $tagname, 'depth' => 0};  | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
    | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub pop_nest {  | 
| 
154
 | 
7
 | 
 
 | 
 
 | 
  
7
  
 | 
  
1
  
 | 
33
 | 
     my $self = shift;  | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
156
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
     my $nest = $self->{'nest'}->[0];  | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
158
 | 
7
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     if (scalar @{$self->{'nest'}} == 1) {  | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
    | 
| 
159
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
185
 | 
 	croak "nesting level underflow";  | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
162
 | 
6
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
17
 | 
     if ($nest->{'depth'} >= 0) {  | 
| 
163
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	while ($self->get_token()) { }  | 
| 
164
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	die "Assert failed" unless ($nest->{'depth'} == -1);  | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
167
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     shift  @{$self->{'nest'}};  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
    | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub eol {  | 
| 
172
 | 
53
 | 
 
 | 
 
 | 
  
53
  
 | 
  
1
  
 | 
302
 | 
     my $self = shift;  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
174
 | 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
191
 | 
     my $nest = $self->{'nest'}->[0];  | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
176
 | 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
138
 | 
     return $nest->{'depth'} == -1;  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub get_token {  | 
| 
181
 | 
114
 | 
 
 | 
 
 | 
  
114
  
 | 
  
1
  
 | 
610
 | 
     my $self = shift;  | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
183
 | 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
177
 | 
     my $nest = $self->{'nest'}->[0];  | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
185
 | 
114
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
266
 | 
     if ($nest->{'depth'} == -1) { croak "read past eol"; }  | 
| 
 
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4246
 | 
    | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
187
 | 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
288
 | 
     my $token = $self->{'p'}->get_token();  | 
| 
188
 | 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1230
 | 
     my $canon = $self->_canon_token($token);  | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
190
 | 
102
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
151
 | 
     if (scalar @{$self->{'nest'}} == 1) {  | 
| 
 
 | 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
219
 | 
    | 
| 
191
 | 
63
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
130
 | 
 	if (!defined $canon) {  | 
| 
192
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
 	    $nest->{'depth'}--;  | 
| 
193
 | 
14
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
32
 | 
 	    die "Assert failed" unless ($nest->{'depth'} == -1);  | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
196
 | 
39
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
180
 | 
 	if (!defined $canon) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
197
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
111
 | 
 	    croak "tokens don't nest correctly";  | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	} elsif ($canon->[0] =~ m/^(?:start|end)$/) {  | 
| 
199
 | 
38
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
250
 | 
 	    if ($canon->[1] eq $nest->{'tagname'}) {  | 
| 
200
 | 
22
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
45
 | 
 		if ($canon->[0] eq "start") {  | 
| 
201
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
 		    $nest->{'depth'}++;  | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		} else {  | 
| 
203
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
 		    $nest->{'depth'}--;  | 
| 
204
 | 
13
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
36
 | 
 		    if ($nest->{'depth'} == -1) {  | 
| 
205
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
 			$self->{'p'}->unget_token($token); # Leave token for parent level;  | 
| 
206
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
53
 | 
 			$token = undef; $canon = undef;  | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
    | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    }  | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
213
 | 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
307
 | 
     return $token;  | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub unget_token {  | 
| 
218
 | 
14
 | 
 
 | 
 
 | 
  
14
  
 | 
  
1
  
 | 
92
 | 
     my $self = shift;  | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
220
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
     my $nest = $self->{'nest'}->[0];  | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
222
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
     while (@_) { # Need to recognise undef items in arg list.  | 
| 
223
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
 	my $token = shift;  | 
| 
224
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
 	my $canon = $self->_canon_token($token);  | 
| 
225
 | 
17
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
18
 | 
 	if (scalar @{$self->{'nest'}} == 1) {  | 
| 
 
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
    | 
| 
226
 | 
6
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
158
 | 
 	    if (!defined $canon) {  | 
| 
227
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 		$nest->{'depth'}++;  | 
| 
228
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 		die "Assert failed" unless ($nest->{'depth'} == 0);  | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }	      | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	} else {  | 
| 
231
 | 
11
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
50
 | 
 	    if (!defined $canon) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
232
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 		$nest->{'depth'}++;  | 
| 
233
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 		die "Assert failed" unless ($nest->{'depth'} == 0);  | 
| 
234
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 		next; # Don't want to add token back onto stack, that was done in get_token()  | 
| 
235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    } elsif ($canon->[0] =~ m/^(?:start|end)$/) {  | 
| 
237
 | 
9
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
24
 | 
 		if ($canon->[1] eq $nest->{'tagname'}) {  | 
| 
238
 | 
8
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
15
 | 
 		    if ($canon->[0] eq "start") {  | 
| 
239
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
 			$nest->{'depth'}--;  | 
| 
240
 | 
7
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
18
 | 
 			if ($nest->{'depth'} == -1) {  | 
| 
241
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
121
 | 
 			    croak "nesting tag underflow";  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    } else {  | 
| 
244
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 			$nest->{'depth'}++;  | 
| 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    }  | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
250
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
50
 | 
 	$self->{'p'}->unget_token($token);  | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # HTML::PullParser allows the client to supply an argspec, specifying what data about a token should be  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # returned by get_token().  We want to preserve this flexibility, but also need to be able to process  | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # start and end tags returned by get_token().  We therefore parse the argspecs supplied by the client   | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # to try to find a way to turn this format into a canonical token with argspec "event,tagname" for  | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # start / end tags (and "'other'" for other tokens).  | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _parse_argspecs {  | 
| 
262
 | 
23
 | 
 
 | 
 
 | 
  
23
  
 | 
 
 | 
36
 | 
     my $self = shift;  | 
| 
263
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
68
 | 
     my %args = @_;  | 
| 
264
 | 
23
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
139
 | 
     if (!defined $args{'start'} || !defined $args{'end'}) { croak "need argspec for start and end"; }  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
148
 | 
    | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Firstly, for each token type, get the array index of (if present) event, tagname and literal string (plus the string content)  | 
| 
267
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
     my $argspec_info = {};  | 
| 
268
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
     foreach (qw(start end text process comment declaration)) {  | 
| 
269
 | 
132
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
333
 | 
 	if (defined $args{$_}) {  | 
| 
270
 | 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
116
 | 
 	    $argspec_info->{$_} = $self->_parse_argspec($args{$_});  | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Now try to find an array index corresponding to either event or a literal string for each token type.  | 
| 
275
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
50
 | 
     my $arg_idx = { map {$_ => -1} qw(event_idx string_idx)  };  | 
| 
 
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
118
 | 
    | 
| 
276
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
85
 | 
     foreach my $event (keys %$argspec_info) {  | 
| 
277
 | 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
78
 | 
 	foreach ( qw(event_idx string_idx) ) {  | 
| 
278
 | 
102
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
3087
 | 
 	    if (defined $arg_idx->{$_} && $arg_idx->{$_} == -1) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
279
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
132
 | 
 		$arg_idx->{$_} = $argspec_info->{$event}->{$_};  | 
| 
280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    } elsif ( defined $arg_idx->{$_} && (!defined $argspec_info->{$event}->{$_} || $arg_idx->{$_} != $argspec_info->{$event}->{$_}) ) {  | 
| 
281
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 		$arg_idx->{$_} = undef;  | 
| 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Finally, store the info require to identify each token type (and tag name).  | 
| 
288
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
70
 | 
     $self->{'arg_info'} = {};  | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # We can now identidy the token type either by event, or by the literal string.  | 
| 
291
 | 
22
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
70
 | 
     if (defined $arg_idx->{'event_idx'}) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
292
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	$self->{'arg_info'}->{'event_idx'} = $arg_idx->{'event_idx'};  | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif (defined $arg_idx->{'string_idx'}) {  | 
| 
294
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
39
 | 
 	my %strs = map {$argspec_info->{$_}->{'string'} => 1} keys %$argspec_info;  | 
| 
 
 | 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
140
 | 
    | 
| 
295
 | 
18
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
61
 | 
 	if (keys %strs != keys %$argspec_info) { croak "'string' must be unique across all argspecs"; }  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
148
 | 
    | 
| 
296
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
 	$self->{'arg_info'}->{'string_idx'} = $arg_idx->{'string_idx'};  | 
| 
297
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2471
 | 
 	$self->{'arg_info'}->{'start_string'} = $argspec_info->{'start'}->{'string'};  | 
| 
298
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
56
 | 
 	$self->{'arg_info'}->{'end_string'} = $argspec_info->{'end'}->{'string'};  | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
300
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
434
 | 
 	croak "need either event or 'string' at a consistent index across all argspecs"  | 
| 
301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # For start and end tags, we also need the tagname.  | 
| 
304
 | 
19
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
94
 | 
     if (defined $argspec_info->{'start'}->{'tagname_idx'} && defined defined $argspec_info->{'end'}->{'tagname_idx'} ) {  | 
| 
305
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
48
 | 
 	$self->{'arg_info'}->{'start_tagname_idx'} = $argspec_info->{'start'}->{'tagname_idx'};  | 
| 
306
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
113
 | 
 	$self->{'arg_info'}->{'end_tagname_idx'} = $argspec_info->{'end'}->{'tagname_idx'};  | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
308
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
141
 | 
 	croak "need tagname in argspec for start and end tags";  | 
| 
309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Get the array index of (if present) event, tagname and literal string (plus the string content)  | 
| 
314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _parse_argspec {  | 
| 
315
 | 
51
 | 
 
 | 
 
 | 
  
51
  
 | 
 
 | 
67
 | 
     my $self = shift;  | 
| 
316
 | 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
299
 | 
     my @argspec = split(/,/, shift);  | 
| 
317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
318
 | 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
66
 | 
     my $i;  | 
| 
319
 | 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
56
 | 
     my ($event_idx, $tagname_idx, $string_idx, $string);  | 
| 
320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
321
 | 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
137
 | 
     for ($i = 0; $i < @argspec; $i++) {  | 
| 
322
 | 
192
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
2077
 | 
 	if ($argspec[$i] eq "event" && !defined $event_idx) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
323
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
 	    $event_idx = $i;  | 
| 
324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	} elsif ($argspec[$i] eq "tagname" && !defined $tagname_idx) {  | 
| 
325
 | 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
115
 | 
 	    $tagname_idx = $i;  | 
| 
326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	} elsif ((my ($str) = $argspec[$i] =~ m/^'(.+)'$/) && !defined $string_idx) {  | 
| 
327
 | 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
 	    $string_idx = $i;  | 
| 
328
 | 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
115
 | 
 	    $string = $str;  | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
332
 | 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
360
 | 
     return {'event_idx' => $event_idx, 'tagname_idx' => $tagname_idx, 'string_idx' => $string_idx, 'string' => $string};  | 
| 
333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # For start + end tags, return result in the form "event,tagname".  For other tokens, uses the form "'other'"  | 
| 
337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _canon_token {  | 
| 
338
 | 
128
 | 
 
 | 
 
 | 
  
128
  
 | 
 
 | 
137
 | 
     my $self = shift;  | 
| 
339
 | 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
135
 | 
     my $token = shift;  | 
| 
340
 | 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
163
 | 
     my $canon = [];  | 
| 
341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
342
 | 
128
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
492
 | 
     if (!defined $token) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
343
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
 	return undef;  | 
| 
344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif (defined $self->{'arg_info'}->{'event_idx'}) {  | 
| 
345
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 	$canon->[0] = $token->[$self->{'arg_info'}->{'event_idx'}];  | 
| 
346
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
22
 | 
 	if ($canon->[0] !~ m/^(?:start|end)$/) { $canon->[0] = "other";	} # Flatten other token types to 'other' for consistency with detection based upon string.  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif (defined $self->{'arg_info'}->{'string_idx'}) {  | 
| 
348
 | 
103
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
283
 | 
 	if ($token->[$self->{'arg_info'}->{'string_idx'}] eq $self->{'arg_info'}->{'start_string'}) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
349
 | 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
171
 | 
 	    $canon->[0] = "start";  | 
| 
350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	} elsif ($token->[$self->{'arg_info'}->{'string_idx'}] eq $self->{'arg_info'}->{'end_string'}) {  | 
| 
351
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
 	    $canon->[0] = "end";  | 
| 
352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	} else {  | 
| 
353
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	    $canon->[0] = "other";  | 
| 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
357
 | 
108
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
226
 | 
     if ($canon->[0] eq "start") {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
358
 | 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
179
 | 
 	$canon->[1] = $token->[$self->{'arg_info'}->{'start_tagname_idx'}];  | 
| 
359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } elsif ($canon->[0] eq "end") {  | 
| 
360
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52
 | 
 	$canon->[1] = $token->[$self->{'arg_info'}->{'end_tagname_idx'}];  | 
| 
361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
363
 | 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
182
 | 
     return $canon;  | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SEE ALSO  | 
| 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 AUTHOR  | 
| 
374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Christopher Key   | 
| 
376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 COPYRIGHT AND LICENCE  | 
| 
379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Copyright (C) 2010 Christopher Key   | 
| 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This library is free software; you can redistribute it and/or modify  | 
| 
383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 it under the same terms as Perl itself, either Perl version 5.8.4 or,  | 
| 
384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 at your option, any later version of Perl 5 you may have available.  | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  |