line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTML::TokeParser::Simple::Token; |
2
|
|
|
|
|
|
|
|
3
|
5
|
|
|
5
|
|
31
|
use strict; |
|
5
|
|
|
|
|
8
|
|
|
5
|
|
|
|
|
6239
|
|
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
our $VERSION = '3.16'; |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
sub new { |
8
|
137
|
|
|
137
|
0
|
210
|
my ($class, $token) = @_; |
9
|
137
|
50
|
|
|
|
314
|
$class->_croak("This class should not be instantiated") if __PACKAGE__ eq $class; |
10
|
137
|
|
|
|
|
924
|
return bless $token, $class; |
11
|
|
|
|
|
|
|
} |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
sub _croak { |
14
|
0
|
|
|
0
|
|
0
|
my ($proto, $message) = @_; |
15
|
0
|
|
|
|
|
0
|
require Carp; |
16
|
0
|
|
|
|
|
0
|
Carp::croak($message); |
17
|
|
|
|
|
|
|
} |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
sub _carp { |
20
|
1
|
|
|
1
|
|
2
|
my ($proto, $message) = @_; |
21
|
1
|
|
|
|
|
6
|
require Carp; |
22
|
1
|
|
|
|
|
114
|
Carp::carp($message); |
23
|
|
|
|
|
|
|
} |
24
|
|
|
|
|
|
|
|
25
|
0
|
|
|
0
|
1
|
0
|
sub is_tag {} |
26
|
9
|
|
|
9
|
1
|
18
|
sub is_start_tag {} |
27
|
32
|
|
|
32
|
1
|
75
|
sub is_end_tag {} |
28
|
0
|
|
|
0
|
1
|
0
|
sub is_text {} |
29
|
1
|
|
|
1
|
1
|
503
|
sub is_comment {} |
30
|
1
|
|
|
1
|
1
|
6649
|
sub is_declaration {} |
31
|
0
|
|
|
0
|
1
|
0
|
sub is_pi {} |
32
|
0
|
|
|
0
|
1
|
0
|
sub is_process_instruction {} |
33
|
|
|
|
|
|
|
|
34
|
18
|
|
|
18
|
1
|
97
|
sub rewrite_tag { shift } |
35
|
0
|
|
|
0
|
1
|
0
|
sub delete_attr {} |
36
|
0
|
|
|
0
|
1
|
0
|
sub set_attr {} |
37
|
0
|
|
|
0
|
1
|
0
|
sub get_tag {} |
38
|
0
|
|
|
0
|
1
|
0
|
sub return_tag {} # deprecated |
39
|
0
|
|
|
0
|
1
|
0
|
sub get_attr {} |
40
|
0
|
|
|
0
|
1
|
0
|
sub return_attr {} # deprecated |
41
|
0
|
|
|
0
|
1
|
0
|
sub get_attrseq {} |
42
|
0
|
|
|
0
|
1
|
0
|
sub return_attrseq {} # deprecated |
43
|
0
|
|
|
0
|
1
|
0
|
sub get_token0 {} |
44
|
0
|
|
|
0
|
1
|
0
|
sub return_token0 {} # deprecated |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# get_foo methods |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
sub return_text { |
49
|
1
|
|
|
1
|
1
|
2
|
my ($self) = @_; |
50
|
1
|
|
|
|
|
9
|
$self->_carp('return_text() is deprecated. Use as_is() instead'); |
51
|
1
|
|
|
|
|
75
|
goto &as_is; |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
|
54
|
10
|
|
|
10
|
1
|
1052
|
sub as_is { return shift->[-1] } |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
1; |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
__END__ |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=head1 NAME |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
HTML::TokeParser::Simple::Token - Base class for C<HTML::TokeParser::Simple> |
63
|
|
|
|
|
|
|
tokens. |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=head1 SYNOPSIS |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
use HTML::TokeParser::Simple; |
68
|
|
|
|
|
|
|
my $p = HTML::TokeParser::Simple->new( $somefile ); |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
while ( my $token = $p->get_token ) { |
71
|
|
|
|
|
|
|
# This prints all text in an HTML doc (i.e., it strips the HTML) |
72
|
|
|
|
|
|
|
next unless $token->is_text; |
73
|
|
|
|
|
|
|
print $token->as_is; |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=head1 DESCRIPTION |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
This is the base class for all returned tokens. It should never be |
79
|
|
|
|
|
|
|
instantiated directly. In fact, it will C<croak()> if it is. |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=head1 METHODS |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
The following list of methods are provided by this class. Most of these are |
84
|
|
|
|
|
|
|
stub methods which must be overridden in a subclass. See |
85
|
|
|
|
|
|
|
L<HTML::TokeParser::Simple> for descriptions of these methods. |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=over 4 |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=item * as_is |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
=item * delete_attr |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=item * get_attr |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=item * get_attrseq |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=item * get_tag |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=item * get_token0 |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=item * is_comment |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=item * is_declaration |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=item * is_end_tag |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=item * is_pi |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=item * is_process_instruction |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=item * is_start_tag |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=item * is_tag |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=item * is_text |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=item * return_attr |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=item * return_attrseq |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=item * return_tag |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=item * return_text |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=item * return_token0 |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=item * rewrite_tag |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=item * set_attr |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=back |