line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTML::DOM; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# If you are looking at the source code (which you are obviously doing |
4
|
|
|
|
|
|
|
# if you are reading this), note that '# ~~~' is my way of marking |
5
|
|
|
|
|
|
|
# something to be done still (except in this sentence). |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
|
8
|
24
|
|
|
24
|
|
179837
|
use 5.008003; |
|
24
|
|
|
|
|
55
|
|
9
|
|
|
|
|
|
|
|
10
|
24
|
|
|
24
|
|
85
|
use strict; |
|
24
|
|
|
|
|
26
|
|
|
24
|
|
|
|
|
395
|
|
11
|
24
|
|
|
24
|
|
67
|
use warnings; |
|
24
|
|
|
|
|
29
|
|
|
24
|
|
|
|
|
492
|
|
12
|
|
|
|
|
|
|
|
13
|
24
|
|
|
24
|
|
67
|
use Carp 'croak'; |
|
24
|
|
|
|
|
32
|
|
|
24
|
|
|
|
|
911
|
|
14
|
24
|
|
|
24
|
|
9991
|
use HTML::DOM::Element; |
|
24
|
|
|
|
|
48
|
|
|
24
|
|
|
|
|
1012
|
|
15
|
24
|
|
|
24
|
|
101
|
use HTML::DOM::Exception 'NOT_SUPPORTED_ERR'; |
|
24
|
|
|
|
|
26
|
|
|
24
|
|
|
|
|
802
|
|
16
|
24
|
|
|
24
|
|
77
|
use HTML::DOM::Node 'DOCUMENT_NODE'; |
|
24
|
|
|
|
|
24
|
|
|
24
|
|
|
|
|
651
|
|
17
|
24
|
|
|
24
|
|
73
|
use Scalar::Util 'weaken'; |
|
24
|
|
|
|
|
19
|
|
|
24
|
|
|
|
|
683
|
|
18
|
24
|
|
|
24
|
|
71
|
use URI; |
|
24
|
|
|
|
|
28
|
|
|
24
|
|
|
|
|
2579
|
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
our $VERSION = '0.056'; |
21
|
|
|
|
|
|
|
our @ISA = 'HTML::DOM::Node'; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
require HTML::DOM::Collection; |
24
|
|
|
|
|
|
|
require HTML::DOM::Comment; |
25
|
|
|
|
|
|
|
require HTML::DOM::DocumentFragment; |
26
|
|
|
|
|
|
|
require HTML::DOM::Implementation; |
27
|
|
|
|
|
|
|
require HTML::DOM::NodeList::Magic; |
28
|
|
|
|
|
|
|
require HTML::DOM::Text; |
29
|
|
|
|
|
|
|
require HTML::Tagset; |
30
|
|
|
|
|
|
|
require HTML::DOM::_TreeBuilder; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
use overload fallback => 1, |
33
|
|
|
|
|
|
|
'%{}' => sub { |
34
|
20284
|
|
|
20284
|
|
23495
|
my $self = shift; |
35
|
|
|
|
|
|
|
#return $self; # for debugging |
36
|
20284
|
100
|
100
|
|
|
80485
|
$self->isa(scalar caller) || caller->isa('HTML::DOM::_TreeBuilder') |
37
|
|
|
|
|
|
|
and return $self; |
38
|
3
|
|
|
|
|
11
|
$self->forms; |
39
|
24
|
|
|
24
|
|
85
|
}; |
|
24
|
|
|
|
|
25
|
|
|
24
|
|
|
|
|
125
|
|
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head1 NAME |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
HTML::DOM - A Perl implementation of the HTML Document Object Model |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=head1 VERSION |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
Version 0.056 (alpha) |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
B This module is still at an experimental stage. The API is |
51
|
|
|
|
|
|
|
subject to change without |
52
|
|
|
|
|
|
|
notice. |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=head1 SYNOPSIS |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
use HTML::DOM; |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
my $dom_tree = new HTML::DOM; # empty tree |
59
|
|
|
|
|
|
|
$dom_tree->write($source_code); |
60
|
|
|
|
|
|
|
$dom_tree->close; |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
my $other_dom_tree = new HTML::DOM; |
63
|
|
|
|
|
|
|
$other_dom_tree->parse_file($filename); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
$dom_tree->getElementsByTagName('body')->[0]->appendChild( |
66
|
|
|
|
|
|
|
$dom_tree->createElement('input') |
67
|
|
|
|
|
|
|
); |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
print $dom_tree->innerHTML, "\n"; |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
my $text = $dom_tree->createTextNode('text'); |
72
|
|
|
|
|
|
|
$text->data; # get attribute |
73
|
|
|
|
|
|
|
$text->data('new value'); # set attribute |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=head1 DESCRIPTION |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
This module implements the HTML Document Object Model by extending the |
78
|
|
|
|
|
|
|
HTML::Tree modules. The HTML::DOM class serves both as an HTML parser and |
79
|
|
|
|
|
|
|
as the document class. |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
The following DOM modules are currently supported: |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
Feature Version (aka level) |
84
|
|
|
|
|
|
|
------- ------------------- |
85
|
|
|
|
|
|
|
HTML 2.0 |
86
|
|
|
|
|
|
|
Core 2.0 |
87
|
|
|
|
|
|
|
Events 2.0 |
88
|
|
|
|
|
|
|
UIEvents 2.0 |
89
|
|
|
|
|
|
|
MouseEvents 2.0 |
90
|
|
|
|
|
|
|
MutationEvents 2.0 |
91
|
|
|
|
|
|
|
HTMLEvents 2.0 |
92
|
|
|
|
|
|
|
StyleSheets 2.0 |
93
|
|
|
|
|
|
|
CSS 2.0 (partially) |
94
|
|
|
|
|
|
|
CSS2 2.0 |
95
|
|
|
|
|
|
|
Views 2.0 |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
StyleSheets, CSS and CSS2 are actually provided by L. This list |
98
|
|
|
|
|
|
|
corresponds to CSS::DOM versions 0.02 to 0.14. |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=for comment |
101
|
|
|
|
|
|
|
Level 2 interfaces not yet included: Range, Traversal |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=head1 METHODS |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=head2 Construction and Parsing |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=over 4 |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=item $tree = new HTML::DOM %options; |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
This class method constructs and returns a new HTML::DOM object. The |
112
|
|
|
|
|
|
|
C<%options>, which are all optional, are as follows: |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=over 4 |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=item url |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
The value that the C method will return. This value is also used by |
119
|
|
|
|
|
|
|
the C method. |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=item referrer |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
The value that the C method will return |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=item response |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
An HTTP::Response object. This will be used for information needed for |
128
|
|
|
|
|
|
|
writing cookies. It is expected to have a reference to a request object |
129
|
|
|
|
|
|
|
(accessible via its C method--see L). Passing a |
130
|
|
|
|
|
|
|
parameter to the 'cookie' method will be a no-op |
131
|
|
|
|
|
|
|
without this. |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=item weaken_response |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
If this is passed a true value, then the HTML::DOM object will hold a weak |
136
|
|
|
|
|
|
|
reference to the response. |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=item cookie_jar |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
An HTTP::Cookies object. As with C, if you omit this, arguments |
141
|
|
|
|
|
|
|
passed to the |
142
|
|
|
|
|
|
|
C method will be ignored. |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
=item charset |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
The original character set of the document. This does not affect parsing |
147
|
|
|
|
|
|
|
via the C method (which always assumes Unicode). C will |
148
|
|
|
|
|
|
|
use this, if specified, or L otherwise. |
149
|
|
|
|
|
|
|
L's C method uses this to encode form data |
150
|
|
|
|
|
|
|
unless the form has a valid 'accept-charset' attribute. |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=back |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
If C and C are omitted, they can be inferred from |
155
|
|
|
|
|
|
|
C. |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=cut |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
{ |
160
|
|
|
|
|
|
|
# This HTML::DOM::Element::HTML package represents the |
161
|
|
|
|
|
|
|
# documentElement. It inherits from |
162
|
|
|
|
|
|
|
# HTML::DOM::_TreeBuilder and acts |
163
|
|
|
|
|
|
|
# as the parser. It is also used as a parser for innerHTML. |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
# Note for potential developers: You can’t refer to ->parent in |
166
|
|
|
|
|
|
|
# this package and expect it to provide the document, because |
167
|
|
|
|
|
|
|
# that’s not the case with innerHTML. Use ->ownerDocument. |
168
|
|
|
|
|
|
|
# Use ->parent only to distinguish between innerHTML and |
169
|
|
|
|
|
|
|
# the regular parser. |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# Concerning magic associations between forms and fields: To cope |
172
|
|
|
|
|
|
|
# with bad markup, an implicitly closed form (with no end tag) is |
173
|
|
|
|
|
|
|
# associated with any form fields that occur after that are not |
174
|
|
|
|
|
|
|
# inside any form. So when a start tag for a form is encountered, |
175
|
|
|
|
|
|
|
# we make that the ‘current form’, by pushing it on to |
176
|
|
|
|
|
|
|
# @{ $$self{_HTML_DOM_cf} }. When the element is closed, if it |
177
|
|
|
|
|
|
|
# is closed by an end tag, we simply pop it off the cf array. If |
178
|
|
|
|
|
|
|
# it is implicitly closed we pop it off and also make it the |
179
|
|
|
|
|
|
|
# ‘magic form’ (_HTML_DOM_mg_f). When we encounter a form field, |
180
|
|
|
|
|
|
|
# we give it a magic association with the form if the cf |
181
|
|
|
|
|
|
|
# stack is empty. |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
package HTML::DOM::Element::HTML; |
185
|
|
|
|
|
|
|
our @ISA = qw' HTML::DOM::Element HTML::DOM::_TreeBuilder'; |
186
|
|
|
|
|
|
|
|
187
|
24
|
|
|
24
|
|
2342
|
use Scalar::Util qw 'weaken isweak'; |
|
24
|
|
|
|
|
26
|
|
|
24
|
|
|
|
|
31299
|
|
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
# I have to override this so it doesn't delete _HTML_DOM_* attri- |
190
|
|
|
|
|
|
|
# butes and so that it doesn’t rebless the object. |
191
|
|
|
|
|
|
|
sub elementify { |
192
|
93
|
|
|
93
|
|
87
|
my $self = shift; |
193
|
93
|
100
|
|
|
|
2500
|
my %attrs = map /^[a-z_]*\z/ ? () : ($_ => $self->{$_}), |
194
|
|
|
|
|
|
|
keys %$self; |
195
|
93
|
|
|
|
|
1263
|
my @weak = grep isweak $self->{$_}, keys %$self; |
196
|
93
|
|
|
|
|
365
|
$self->SUPER::elementify; |
197
|
93
|
|
|
|
|
585
|
%$self = (%$self, %attrs); # this invigorates feeble refs |
198
|
93
|
|
|
|
|
500
|
weaken $self->{$_} for @weak; |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
sub new { |
202
|
147
|
|
|
147
|
|
133
|
my $tb; # hafta declare it separately so the closures can |
203
|
|
|
|
|
|
|
# c it |
204
|
|
|
|
|
|
|
($tb = shift->HTML::DOM::_TreeBuilder::new( |
205
|
|
|
|
|
|
|
element_class => 'HTML::DOM::Element', |
206
|
|
|
|
|
|
|
'tweak_~text' => sub { |
207
|
547
|
|
|
547
|
|
569
|
my ($text, $parent) = @_; |
208
|
|
|
|
|
|
|
# $parent->ownerDocument will be undef if |
209
|
|
|
|
|
|
|
# $parent is the doc. |
210
|
547
|
|
33
|
|
|
1140
|
$parent->splice_content( -1,1, |
211
|
|
|
|
|
|
|
($parent->ownerDocument || $parent) |
212
|
|
|
|
|
|
|
->createTextNode($text) ); |
213
|
|
|
|
|
|
|
$parent->content_offset( |
214
|
|
|
|
|
|
|
$$tb{_HTML_DOM_tb_c_offset} |
215
|
547
|
|
|
|
|
1390
|
); |
216
|
|
|
|
|
|
|
}, |
217
|
|
|
|
|
|
|
'tweak_*' => sub { |
218
|
807
|
|
|
807
|
|
814
|
my($elem, $tag, $doc_elem) = @_; |
219
|
807
|
100
|
|
|
|
1218
|
$tag =~ /^~/ and return; |
220
|
|
|
|
|
|
|
|
221
|
800
|
100
|
|
|
|
1095
|
if( |
222
|
|
|
|
|
|
|
$tag eq 'link' |
223
|
|
|
|
|
|
|
) { |
224
|
16
|
|
|
|
|
31
|
HTML'DOM'Element'Link'_reset_style_sheet( |
225
|
|
|
|
|
|
|
$elem |
226
|
|
|
|
|
|
|
); |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
# If a form is being closed, determine |
230
|
|
|
|
|
|
|
# whether it is closed implicitly and set |
231
|
|
|
|
|
|
|
# the current form and magic form |
232
|
|
|
|
|
|
|
# accordingly. |
233
|
800
|
100
|
|
|
|
1022
|
if($tag eq 'form') { |
234
|
|
|
|
|
|
|
pop |
235
|
40
|
50
|
|
|
|
39
|
@{$$doc_elem{_HTML_DOM_cf}||[]}; |
|
40
|
|
|
|
|
85
|
|
236
|
|
|
|
|
|
|
delete $$doc_elem{_HTML_DOM_etif} |
237
|
|
|
|
|
|
|
or $$doc_elem{_HTML_DOM_mg_f} |
238
|
40
|
100
|
|
|
|
106
|
= $elem |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
# If a formie is being closed, create a |
242
|
|
|
|
|
|
|
# magic association where appropriate. |
243
|
800
|
100
|
100
|
|
|
2749
|
if(!$$doc_elem{_HTML_DOM_no_mg} |
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
244
|
|
|
|
|
|
|
and $tag =~ /^(?: |
245
|
|
|
|
|
|
|
button|(?: |
246
|
|
|
|
|
|
|
fieldse|inpu|(?:obj|sel)ec |
247
|
|
|
|
|
|
|
)t|label|textarea |
248
|
|
|
|
|
|
|
)\z/x |
249
|
|
|
|
|
|
|
and $$doc_elem{_HTML_DOM_mg_f} |
250
|
|
|
|
|
|
|
and !$$doc_elem{_HTML_DOM_cf} |
251
|
|
|
|
|
|
|
||!@{$$doc_elem{_HTML_DOM_cf}}) { |
252
|
|
|
|
|
|
|
$elem->form( |
253
|
|
|
|
|
|
|
$$doc_elem{_HTML_DOM_mg_f} |
254
|
9
|
|
|
|
|
27
|
); |
255
|
9
|
|
|
|
|
26
|
$doc_elem->ownerDocument-> |
256
|
|
|
|
|
|
|
magic_forms(1); |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
my $event_offsets = delete |
260
|
|
|
|
|
|
|
$elem->{_HTML_DOM_tb_event_offsets} |
261
|
800
|
100
|
|
|
|
1855
|
or return; |
262
|
4
|
|
|
|
|
7
|
_create_events( |
263
|
|
|
|
|
|
|
$doc_elem, $elem, $event_offsets |
264
|
|
|
|
|
|
|
); |
265
|
|
|
|
|
|
|
}, |
266
|
147
|
|
|
|
|
1041
|
)) |
267
|
|
|
|
|
|
|
->ignore_ignorable_whitespace(0); # stop eof()'s cleanup |
268
|
147
|
|
|
|
|
354
|
$tb->store_comments(1); # from changing an |
269
|
147
|
|
|
|
|
335
|
$tb->unbroken_text(1); # necessary, con- # elem_han- |
270
|
|
|
|
|
|
|
# sidering what # dler's view |
271
|
|
|
|
|
|
|
# _tweak_~text does # of the tree |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
# Web browsers preserve whitespace, at least from the point |
274
|
|
|
|
|
|
|
# of view of the DOM; but the main reason we are using this |
275
|
|
|
|
|
|
|
# option is that a parser for innerHTML doesn’t know |
276
|
|
|
|
|
|
|
# whether the nodes will be inserted in a . |
277
|
147
|
|
|
|
|
305
|
no_space_compacting $tb 1; |
278
|
|
|
|
|
|
|
|
279
|
147
|
|
|
|
|
667
|
$tb->handler(text => "text", # so we can get line |
280
|
|
|
|
|
|
|
"self, text, is_cdata, offset"); # numbers for scripts |
281
|
147
|
|
|
|
|
472
|
$tb->handler(start => "start", |
282
|
|
|
|
|
|
|
"self, tagname, attr, attrseq, offset, tokenpos"); |
283
|
147
|
|
|
|
|
442
|
$tb->handler((declaration=>)x2,'self,tagname,tokens,text'); |
284
|
|
|
|
|
|
|
|
285
|
147
|
|
|
|
|
445
|
$tb->{_HTML_DOM_tweakall} = $tb->{'_tweak_*'}; |
286
|
|
|
|
|
|
|
|
287
|
147
|
|
|
|
|
199
|
my %opts = @_; |
288
|
147
|
|
|
|
|
178
|
$tb->{_HTML_DOM_no_mg} = delete $opts{no_magic_forms}; |
289
|
|
|
|
|
|
|
# used by an element’s innerHTML |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
# We have to copy it like this, because our circular ref- |
292
|
|
|
|
|
|
|
# erence is thus: $tb -> object -> closure -> $tb |
293
|
|
|
|
|
|
|
# We can’t weaken $tb without a copy of it, because it is |
294
|
|
|
|
|
|
|
# the only reference to the object. |
295
|
147
|
|
|
|
|
130
|
my $life_raft = $tb; weaken $tb; $tb; |
|
147
|
|
|
|
|
202
|
|
|
147
|
|
|
|
|
344
|
|
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
sub start { |
299
|
594
|
100
|
|
594
|
|
1175
|
return shift->SUPER::start(@_) if @_ < 6; # shirt-çorcuit |
300
|
|
|
|
|
|
|
|
301
|
589
|
|
|
|
|
490
|
my $tokenpos = pop; |
302
|
589
|
|
|
|
|
428
|
my $offset = pop; |
303
|
589
|
|
|
|
|
467
|
my %event_offsets; |
304
|
589
|
|
|
|
|
423
|
my $attr_names = pop; |
305
|
589
|
|
|
|
|
1213
|
for(0..$#$attr_names) { |
306
|
|
|
|
|
|
|
$$attr_names[$_] =~ /^on(.*)/is |
307
|
535
|
100
|
|
|
|
1333
|
and $event_offsets{$1} = |
308
|
|
|
|
|
|
|
$$tokenpos[$_*4 + 4] + $offset; |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
|
311
|
589
|
|
|
|
|
1453
|
my $elem = (my $self = shift)->SUPER::start(@_); |
312
|
|
|
|
|
|
|
|
313
|
589
|
100
|
100
|
|
|
936
|
$_[0] eq 'form' and push @{ $$self{_HTML_DOM_cf} ||= [] }, |
|
40
|
|
|
|
|
158
|
|
314
|
|
|
|
|
|
|
$elem; |
315
|
|
|
|
|
|
|
|
316
|
589
|
100
|
|
|
|
4212
|
return $elem unless %event_offsets; |
317
|
|
|
|
|
|
|
|
318
|
5
|
100
|
|
|
|
17
|
if(!$HTML::Tagset::emptyElement{$_[0]}) { # container |
319
|
|
|
|
|
|
|
$$elem{_HTML_DOM_tb_event_offsets} = |
320
|
4
|
|
|
|
|
7
|
\%event_offsets; |
321
|
|
|
|
|
|
|
} else { |
322
|
1
|
|
|
|
|
3
|
_create_events( |
323
|
|
|
|
|
|
|
$self, |
324
|
|
|
|
|
|
|
$elem, |
325
|
|
|
|
|
|
|
\%event_offsets, |
326
|
|
|
|
|
|
|
); |
327
|
|
|
|
|
|
|
} |
328
|
|
|
|
|
|
|
|
329
|
5
|
|
|
|
|
31
|
return $elem; |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
sub _create_events { |
333
|
5
|
|
|
5
|
|
7
|
my ($doc_elem,$elem,$event_offsets) = @_; |
334
|
5
|
100
|
|
|
|
14
|
defined(my $event_attr_handler = |
335
|
|
|
|
|
|
|
$doc_elem->ownerDocument->event_attr_handler) |
336
|
|
|
|
|
|
|
or return; |
337
|
3
|
|
|
|
|
7
|
for(keys %$event_offsets) { |
338
|
|
|
|
|
|
|
my $l = |
339
|
|
|
|
|
|
|
&$event_attr_handler( |
340
|
|
|
|
|
|
|
$elem, |
341
|
|
|
|
|
|
|
$_, |
342
|
|
|
|
|
|
|
$elem->attr("on$_"), |
343
|
3
|
|
|
|
|
19
|
$$event_offsets{$_} |
344
|
|
|
|
|
|
|
); |
345
|
3
|
50
|
|
|
|
280
|
defined $l and |
346
|
|
|
|
|
|
|
$elem->event_handler ( |
347
|
|
|
|
|
|
|
$_, $l |
348
|
|
|
|
|
|
|
); |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
sub text { |
353
|
547
|
|
|
547
|
|
702
|
$_[0]{_HTML_DOM_tb_c_offset} = pop; |
354
|
547
|
|
|
|
|
1128
|
shift->SUPER::text(@_) |
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
sub insert_element { |
358
|
860
|
|
|
860
|
|
1071
|
my ($self, $tag) = (shift, @_); |
359
|
860
|
100
|
100
|
|
|
2482
|
if((ref $tag ? $tag->tag : $tag) eq 'tr' |
|
|
100
|
|
|
|
|
|
360
|
|
|
|
|
|
|
and $self->pos->tag eq 'table') { |
361
|
12
|
|
|
|
|
23
|
$self->insert_element('tbody', 1); |
362
|
|
|
|
|
|
|
} |
363
|
860
|
|
|
|
|
1772
|
$self->SUPER::insert_element(@_); |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
sub end { |
367
|
601
|
|
|
601
|
|
533
|
my $self = shift; |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# If this is a form, record that we’ve seen an end tag, so |
370
|
|
|
|
|
|
|
# that this does not become a ‘magic form’. |
371
|
|
|
|
|
|
|
++$$self{_HTML_DOM_etif} # end tag is 'form' |
372
|
601
|
100
|
|
|
|
1009
|
if $_[0] eq 'form'; |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
# Make sure cannot close a cell outside the cur- |
375
|
|
|
|
|
|
|
# rent table. |
376
|
601
|
100
|
|
|
|
1066
|
$_[0] =~ /^t[hd]\z/ and @_ = (\$_[0], 'table'); |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
# HTML::TreeBuilder expects the element to be the |
379
|
|
|
|
|
|
|
# topmost element, and gets confused when it’s inside the |
380
|
|
|
|
|
|
|
# ~doc. It sets _pos to the doc when it encounters . |
381
|
|
|
|
|
|
|
# This works around that. |
382
|
601
|
|
|
|
|
564
|
my $pos = $self->{_pos}; |
383
|
601
|
|
|
|
|
1327
|
my @ret = $self->SUPER::end(@_); |
384
|
|
|
|
|
|
|
$self->{_pos} = $pos |
385
|
600
|
100
|
100
|
|
|
1497
|
if ($self->{_pos}||return @ret)->{_tag} eq '~doc'; |
386
|
561
|
|
|
|
|
1697
|
@ret; # TB relies on this retval |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
sub declaration { |
390
|
9
|
|
|
9
|
|
17
|
my($self,$tagname,$tokens,$source) = @_; |
391
|
|
|
|
|
|
|
return |
392
|
9
|
100
|
66
|
|
|
50
|
unless $tagname eq 'doctype' |
393
|
|
|
|
|
|
|
and my $parent = $self->parent; |
394
|
|
|
|
|
|
|
package HTML::DOM; # bypass overloading |
395
|
|
|
|
|
|
|
$parent->{_HTML_DOM_doctype} = $source |
396
|
8
|
50
|
|
|
|
11
|
unless defined $parent->{_HTML_DOM_doctype}; |
397
|
8
|
100
|
|
|
|
29
|
return unless @$tokens > 3; |
398
|
7
|
|
|
|
|
16
|
for ($self->{_HTML_DOM_version} = $tokens->[3]){ |
399
|
7
|
50
|
|
|
|
81
|
s/^['"]// and s/['"]\z//; |
400
|
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
} |
402
|
|
|
|
|
|
|
|
403
|
1147
|
|
|
1147
|
|
2939
|
sub element_class { 'HTML::DOM::Element' } |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
# HTMLHtmlElement interface |
406
|
5
|
|
|
5
|
|
362
|
sub version { shift->_attr('version' => @_) } |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
} # end of special TreeBuilder package |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
sub new { |
411
|
92
|
|
|
92
|
1
|
2033822
|
my $self = shift->SUPER::new('~doc'); |
412
|
|
|
|
|
|
|
|
413
|
92
|
|
|
|
|
153
|
my %opts = @_; |
414
|
92
|
|
|
|
|
1358
|
$self->{_HTML_DOM_url} = $opts{url}; # might be undef |
415
|
92
|
|
|
|
|
272
|
$self->{_HTML_DOM_referrer} = $opts{referrer}; # might be undef |
416
|
92
|
100
|
|
|
|
255
|
if($opts{response}) { |
417
|
9
|
|
|
|
|
12
|
$self->{_HTML_DOM_response} = $opts{response}; |
418
|
9
|
100
|
|
|
|
13
|
if(!defined $self->{_HTML_DOM_url}) {{ |
419
|
7
|
|
|
|
|
6
|
$self->{_HTML_DOM_url} = |
420
|
|
|
|
|
|
|
($opts{response}->request || last) |
421
|
7
|
|
100
|
|
|
18
|
->url; |
422
|
|
|
|
|
|
|
}} |
423
|
9
|
100
|
|
|
|
34
|
if(!defined $self->{_HTML_DOM_referrer}) {{ |
424
|
7
|
|
|
|
|
6
|
$self->{_HTML_DOM_referrer} = |
425
|
|
|
|
|
|
|
($opts{response}->request || last) |
426
|
7
|
|
100
|
|
|
14
|
->header('Referer') |
427
|
|
|
|
|
|
|
}} |
428
|
9
|
100
|
|
|
|
42
|
if($opts{weaken_response}) { |
429
|
|
|
|
|
|
|
weaken $self->{_HTML_DOM_response} |
430
|
1
|
|
|
|
|
2
|
} |
431
|
|
|
|
|
|
|
} |
432
|
92
|
|
|
|
|
116
|
$self->{_HTML_DOM_jar} = $opts{cookie_jar}; # might be undef |
433
|
92
|
|
|
|
|
113
|
$self->{_HTML_DOM_cs} = $opts{charset}; |
434
|
|
|
|
|
|
|
|
435
|
92
|
|
|
|
|
321
|
$self; |
436
|
|
|
|
|
|
|
} |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
=item $tree->elem_handler($elem_name => sub { ... }) |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
If you call this method first, then, when the DOM tree is in the |
441
|
|
|
|
|
|
|
process of |
442
|
|
|
|
|
|
|
being built (as a result of a call to C or C), the |
443
|
|
|
|
|
|
|
subroutine will be called after each C<$elem_name> element |
444
|
|
|
|
|
|
|
is |
445
|
|
|
|
|
|
|
added to the tree. If you give '*' as the element name, the subroutine |
446
|
|
|
|
|
|
|
will be called for each element that does not have a handler. The |
447
|
|
|
|
|
|
|
subroutine's |
448
|
|
|
|
|
|
|
two arguments will be the tree itself |
449
|
|
|
|
|
|
|
and the element in question. The subroutine can call the DOM object's |
450
|
|
|
|
|
|
|
C |
451
|
|
|
|
|
|
|
method to insert HTML code into the source after the element. |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
Here is a lame example (which does not take Content-Script-Type headers |
454
|
|
|
|
|
|
|
or security into account): |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
$tree->elem_handler(script => sub { |
457
|
|
|
|
|
|
|
my($document,$elem) = @_; |
458
|
|
|
|
|
|
|
return unless $elem->attr('type') eq 'application/x-perl'; |
459
|
|
|
|
|
|
|
eval($elem->firstChild->data); |
460
|
|
|
|
|
|
|
}); |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
$tree->write( |
463
|
|
|
|
|
|
|
' The time is |
464
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
precisely. |
468
|
|
|
|
|
|
|
' |
469
|
|
|
|
|
|
|
); |
470
|
|
|
|
|
|
|
$tree->close; |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
print $tree->documentElement->as_text, "\n"; |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
(Note: L's |
475
|
|
|
|
|
|
|
L|HTML::DOM::Element/content_offset> method might come in |
476
|
|
|
|
|
|
|
handy for reporting line numbers for script errors.) |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
=cut |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
sub elem_handler { |
481
|
133
|
|
|
133
|
1
|
747
|
my ($self,$elem_name,$sub) = @_; |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
# ~~~ temporary; for internal use only: |
484
|
133
|
100
|
|
|
|
303
|
@_ < 3 and return $$self{_HTML_DOM_nih}{$elem_name}; |
485
|
|
|
|
|
|
|
|
486
|
9
|
|
|
|
|
14
|
$$self{_HTML_DOM_nih}{$elem_name} = $sub; # nih = node inser- |
487
|
|
|
|
|
|
|
# tion handler |
488
|
|
|
|
|
|
|
my $h = $self->{_HTML_DOM_elem_handlers}{$elem_name} = sub { |
489
|
|
|
|
|
|
|
# I can’t put $doc_elem outside the closure, because |
490
|
|
|
|
|
|
|
# ->open replaces it with another object, and we’d be |
491
|
|
|
|
|
|
|
# referring to the wrong one. |
492
|
16
|
|
|
16
|
|
15
|
my $doc_elem = $_[2]; |
493
|
16
|
|
|
|
|
29
|
$doc_elem->{_HTML_DOM_tweakall}->(@_); |
494
|
16
|
|
|
|
|
29
|
$self->_modified; # in case there are node lists hanging |
495
|
|
|
|
|
|
|
# around that the handler references |
496
|
16
|
|
|
|
|
37
|
&$sub($self, $_[0]); |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
# See the comment in sub write. |
499
|
15
|
|
|
|
|
310
|
(my $level = $$self{_HTML_DOM_buffered}); |
500
|
15
|
50
|
66
|
|
|
72
|
if( $level |
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
501
|
|
|
|
|
|
|
and ($level -= 1, 1) |
502
|
|
|
|
|
|
|
and $$self{_HTML_DOM_p} |
503
|
|
|
|
|
|
|
and $$self{_HTML_DOM_p}[$level] |
504
|
|
|
|
|
|
|
) { |
505
|
7
|
|
|
|
|
7
|
$$self{_HTML_DOM_p}[$level]->eof; |
506
|
|
|
|
|
|
|
$level |
507
|
1
|
|
|
|
|
1
|
? --$#{$$self{_HTML_DOM_p}} |
508
|
7
|
100
|
|
|
|
19
|
: delete $$self{_HTML_DOM_p}; |
509
|
|
|
|
|
|
|
} |
510
|
9
|
|
|
|
|
32
|
}; |
511
|
9
|
100
|
|
|
|
12
|
if(my $p = $$self{_HTML_DOM_parser}) { |
512
|
1
|
|
|
|
|
2
|
$$p{"_tweak_$elem_name"} = $h |
513
|
|
|
|
|
|
|
} |
514
|
9
|
|
|
|
|
24
|
weaken $self; |
515
|
9
|
|
|
|
|
13
|
return; |
516
|
|
|
|
|
|
|
} |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
=item css_url_fetcher( \&sub ) |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
With this method you can provide a subroutine that fetches URLs referenced |
522
|
|
|
|
|
|
|
by 'link' tags. Its sole argument is the URL, which is made absolute based |
523
|
|
|
|
|
|
|
on the HTML page's own base URL (it is assumed that this is absolute). It |
524
|
|
|
|
|
|
|
should return C or an empty list on failure. Upon |
525
|
|
|
|
|
|
|
success, it should return just the CSS code, if it has been decoded (and is |
526
|
|
|
|
|
|
|
in Unicode), or, if it has not been decoded, the CSS code followed by |
527
|
|
|
|
|
|
|
C<< decode => 1 >>. See L for details on |
528
|
|
|
|
|
|
|
when you should or should not decode it. (Note that HTML::DOM |
529
|
|
|
|
|
|
|
automatically |
530
|
|
|
|
|
|
|
provides an encoding hint based on the HTML document.) |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
HTML::DOM passes the result of the url fetcher to L and |
533
|
|
|
|
|
|
|
turns |
534
|
|
|
|
|
|
|
it into a style sheet object accessible via the link element's |
535
|
|
|
|
|
|
|
L|HTML::DOM::Element::Link/sheet> method. |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
=cut |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
sub css_url_fetcher { |
540
|
11
|
|
|
11
|
1
|
598
|
my $old = (my $self = shift)->{_HTML_DOM_cuf}; |
541
|
11
|
100
|
|
|
|
21
|
$self->{_HTML_DOM_cuf} = shift if @_; |
542
|
11
|
100
|
|
|
|
41
|
$old||(); |
543
|
|
|
|
|
|
|
} |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
=item $tree->write(...) (DOM method) |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
This parses the HTML code passed to it, adding it to the end of |
548
|
|
|
|
|
|
|
the |
549
|
|
|
|
|
|
|
document. It assumes that its input is a normal Perl Unicode string. Like |
550
|
|
|
|
|
|
|
L's |
551
|
|
|
|
|
|
|
C method, it can take a coderef. |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
When it is called from an an element handler (see |
554
|
|
|
|
|
|
|
C, above), the value passed to it |
555
|
|
|
|
|
|
|
will be inserted into the HTML code after the current element when the |
556
|
|
|
|
|
|
|
element handler returns. (In this case a coderef won't do--maybe that will |
557
|
|
|
|
|
|
|
be added later.) |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
If the C method has been called, C will call C before |
560
|
|
|
|
|
|
|
parsing the HTML code passed to it. |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
=item $tree->writeln(...) (DOM method) |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
Just like C except that it appends "\n" to its argument and does |
565
|
|
|
|
|
|
|
not work with code refs. (Rather |
566
|
|
|
|
|
|
|
pointless, if you ask me. :-) |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
=item $tree->close() (DOM method) |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
Call this method to signal to the parser that the end of the HTML code has |
571
|
|
|
|
|
|
|
been reached. It will then parse any residual HTML that happens to be |
572
|
|
|
|
|
|
|
buffered. It also makes the next C call C. |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
=item $tree->open (DOM method) |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
Deletes the HTML tree, resetting it so that it has just an element, |
577
|
|
|
|
|
|
|
and a parser hungry for HTML code. |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
=item $tree->parse_file($file) |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
This method takes a file name or handle and parses the content, |
582
|
|
|
|
|
|
|
(effectively) calling C afterwards. In the former case (a file |
583
|
|
|
|
|
|
|
name), L will be used to detect the encoding. In the |
584
|
|
|
|
|
|
|
latter (a file handle), you'll have to C it yourself. This could |
585
|
|
|
|
|
|
|
be considered a bug. If you have a solution to this (how to make |
586
|
|
|
|
|
|
|
HTML::Encoding detect an encoding from a file handle), please let me know. |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
As of version 0.12, this method returns true upon success, or undef/empty |
589
|
|
|
|
|
|
|
list on failure. |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
=item $tree->charset |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
This method returns the name of the character |
594
|
|
|
|
|
|
|
set that was passed to C, or, if that was not given, that which |
595
|
|
|
|
|
|
|
C used. |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
It returns undef if C was not given a charset and if C was |
598
|
|
|
|
|
|
|
not |
599
|
|
|
|
|
|
|
used or was |
600
|
|
|
|
|
|
|
passed a file handle. |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
You can also set the charset by passing an argument, in which case the old |
603
|
|
|
|
|
|
|
value is returned. |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
=cut |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
sub parse_file { |
609
|
5
|
|
|
5
|
1
|
9
|
my $file = $_[1]; |
610
|
|
|
|
|
|
|
|
611
|
5
|
|
|
|
|
11
|
$_[0]->open; |
612
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
# This ‘if’ statement uses the same check that HTML::Parser uses. |
614
|
|
|
|
|
|
|
# We are not strictly checking to see whether it’s a handle, |
615
|
|
|
|
|
|
|
# but whether HTML::Parser would consider it one. |
616
|
5
|
50
|
33
|
|
|
32
|
if (ref($file) || ref(\$file) eq "GLOB") { |
617
|
|
|
|
|
|
|
(my $a = shift->{_HTML_DOM_parser}) |
618
|
0
|
0
|
|
|
|
0
|
->parse_file($file) || return; |
619
|
0
|
|
|
|
|
0
|
$a ->elementify; |
620
|
0
|
|
|
|
|
0
|
return 1; |
621
|
|
|
|
|
|
|
} |
622
|
|
|
|
|
|
|
|
623
|
24
|
|
|
24
|
|
116
|
no warnings 'parenthesis'; # 5.8.3 Grrr!! |
|
24
|
|
|
|
|
32
|
|
|
24
|
|
|
|
|
13037
|
|
624
|
5
|
100
|
|
|
|
8
|
if(my $charset = $_[0]{_HTML_DOM_cs}) { |
625
|
3
|
50
|
|
|
|
72
|
open my $fh, $file or return; |
626
|
3
|
|
|
|
|
15
|
$charset =~ s/^(?:x-?)?mac-?/mac/i; |
627
|
3
|
|
|
1
|
|
47
|
binmode $fh, ":encoding($charset)"; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
4
|
|
628
|
|
|
|
|
|
|
$$_{_HTML_DOM_parser}->parse_file($fh) || return, |
629
|
|
|
|
|
|
|
$_->close |
630
|
3
|
|
50
|
|
|
2974
|
for shift; |
631
|
3
|
|
|
|
|
73
|
return 1; |
632
|
|
|
|
|
|
|
} |
633
|
|
|
|
|
|
|
|
634
|
2
|
100
|
|
|
|
80
|
open my $fh, $file or return; |
635
|
1
|
|
|
|
|
3
|
local $/; |
636
|
1
|
|
|
|
|
13
|
my $contents = <$fh>; |
637
|
1
|
|
|
|
|
457
|
require HTML::Encoding; |
638
|
1
|
|
50
|
|
|
9434
|
my $encoding = HTML::Encoding::encoding_from_html_document( |
639
|
|
|
|
|
|
|
$contents |
640
|
|
|
|
|
|
|
) || 'iso-8859-1'; |
641
|
|
|
|
|
|
|
# Since we’ve already slurped the file, we might as well |
642
|
|
|
|
|
|
|
# avoid having HTML::Parser read it again, even if we could |
643
|
|
|
|
|
|
|
# use binmode. |
644
|
1
|
|
|
|
|
4093
|
require Encode; |
645
|
|
|
|
|
|
|
$_->write(Encode::decode($encoding, $contents)), $_->close, |
646
|
|
|
|
|
|
|
$_->{_HTML_DOM_cs} = $encoding |
647
|
1
|
|
|
|
|
5
|
for shift; |
648
|
1
|
|
|
|
|
35
|
return 1; |
649
|
|
|
|
|
|
|
} |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
sub charset { |
652
|
38
|
|
|
38
|
1
|
998
|
my $old = (my$ self = shift)->{_HTML_DOM_cs}; |
653
|
38
|
100
|
|
|
|
82
|
$self->{_HTML_DOM_cs} = shift if @_; |
654
|
38
|
|
|
|
|
122
|
$old; |
655
|
|
|
|
|
|
|
} |
656
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
sub write { |
658
|
110
|
|
|
110
|
1
|
11014
|
my $self = shift; |
659
|
110
|
100
|
|
|
|
151
|
if($$self{_HTML_DOM_buffered}) { |
660
|
|
|
|
|
|
|
# Although we call this buffered, it’s actually not. Before |
661
|
|
|
|
|
|
|
# version 0.040, a recursive call to ->write on the same |
662
|
|
|
|
|
|
|
# doc object would simply record the HTML code in a buffer |
663
|
|
|
|
|
|
|
# that was processed when the elem handler that made the |
664
|
|
|
|
|
|
|
# inner call to ->write finished. Every elem handler would |
665
|
|
|
|
|
|
|
# have a wrapper (created in the elem_handler sub above) |
666
|
|
|
|
|
|
|
# that took care of this after calling the handler, by cre- |
667
|
|
|
|
|
|
|
# ating a new, temporary, parser object that would call the |
668
|
|
|
|
|
|
|
# start/end, etc., methods of our tree builder. |
669
|
|
|
|
|
|
|
# |
670
|
|
|
|
|
|
|
# This approach stops JS code like this from working (yes, |
671
|
|
|
|
|
|
|
# there *are* websites with code like this!): |
672
|
|
|
|
|
|
|
# document.write("") |
673
|
|
|
|
|
|
|
# document.getElementById("img1").src="..." |
674
|
|
|
|
|
|
|
# |
675
|
|
|
|
|
|
|
# So, now we take care of creating a new parser immedi- |
676
|
|
|
|
|
|
|
# ately. This does mean, however that we end up with mul- |
677
|
|
|
|
|
|
|
# tiple parser objects floating around in the case of |
678
|
|
|
|
|
|
|
# nested . So we have to be careful to create and |
679
|
|
|
|
|
|
|
# delete them at the right time. |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
# $$self{_HTML_DOM_buffered} actually contains a number |
682
|
|
|
|
|
|
|
# indicating the number of nested calls to ->write. |
683
|
7
|
|
|
|
|
8
|
my $level = $$self{_HTML_DOM_buffered}; |
684
|
7
|
|
|
|
|
11
|
local $$self{_HTML_DOM_buffered} = $level + 1; |
685
|
|
|
|
|
|
|
|
686
|
7
|
|
|
|
|
10
|
my($doc_elem) = $$self{_HTML_DOM_parser}; |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
# These handlers delegate the handling to methods of |
689
|
|
|
|
|
|
|
# *another* HTML::Parser object. |
690
|
|
|
|
|
|
|
my $p = $$self{_HTML_DOM_p}[$level-1] ||= |
691
|
|
|
|
|
|
|
HTML::Parser->new( |
692
|
|
|
|
|
|
|
start_h => [ |
693
|
5
|
|
|
5
|
|
10
|
sub { $doc_elem->start(@_) }, |
694
|
|
|
|
|
|
|
'tagname, attr, attrseq' |
695
|
|
|
|
|
|
|
], |
696
|
|
|
|
|
|
|
end_h => [ |
697
|
1
|
|
|
1
|
|
3
|
sub { $doc_elem->end(@_) }, |
698
|
|
|
|
|
|
|
'tagname, text' |
699
|
|
|
|
|
|
|
], |
700
|
|
|
|
|
|
|
text_h => [ |
701
|
7
|
|
|
7
|
|
9
|
sub { $doc_elem->text(@_) }, |
702
|
7
|
|
33
|
|
|
9
|
'text, is_cdata' |
703
|
|
|
|
|
|
|
], |
704
|
|
|
|
|
|
|
); |
705
|
|
|
|
|
|
|
|
706
|
7
|
|
|
|
|
243
|
$p->unbroken_text(1); # push_content, which is called by |
707
|
|
|
|
|
|
|
# H:TB:text, won't concatenate two |
708
|
|
|
|
|
|
|
# text portions if the first one |
709
|
|
|
|
|
|
|
# is a node. |
710
|
|
|
|
|
|
|
|
711
|
7
|
|
|
|
|
27
|
$p->parse(shift); |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
# We can’t get rid of our parser at this point, as a subse- |
714
|
|
|
|
|
|
|
# quent ->write call from the same nested level (e.g., from |
715
|
|
|
|
|
|
|
# the same ), then we need to remove it, so we have |
719
|
|
|
|
|
|
|
# elem_handler do that for us. |
720
|
|
|
|
|
|
|
} |
721
|
|
|
|
|
|
|
else { |
722
|
|
|
|
|
|
|
my $parser |
723
|
|
|
|
|
|
|
= $$self{_HTML_DOM_parser} |
724
|
103
|
|
66
|
|
|
117
|
|| ($self->open, $$self{_HTML_DOM_parser}); |
725
|
103
|
|
|
|
|
142
|
local $$self{_HTML_DOM_buffered} = 1; |
726
|
103
|
|
|
|
|
1055
|
$parser->parse($_) for @_; |
727
|
|
|
|
|
|
|
} |
728
|
110
|
|
|
|
|
237
|
$self->_modified; |
729
|
|
|
|
|
|
|
return # nothing; |
730
|
110
|
|
|
|
|
209
|
} |
731
|
|
|
|
|
|
|
|
732
|
4
|
|
|
4
|
1
|
7
|
sub writeln { shift->write(@_,"\n") } |
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
sub close { |
735
|
97
|
|
|
97
|
1
|
587
|
my $a = (my $self = shift)->{_HTML_DOM_parser}; |
736
|
97
|
100
|
|
|
|
200
|
return unless $a; |
737
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
# We can’t use eval { $a->eof } because that would catch errors |
739
|
|
|
|
|
|
|
# that are meant to propagate (a nasty bug [the so-called |
740
|
|
|
|
|
|
|
# ‘content—offset’ bug] was hidden because of an eval in ver- |
741
|
|
|
|
|
|
|
# sion 0.010). |
742
|
|
|
|
|
|
|
# return unless $a->can('eof'); |
743
|
|
|
|
|
|
|
|
744
|
94
|
|
|
|
|
323
|
$a->eof(@_); |
745
|
93
|
|
|
|
|
109
|
delete $$self{_HTML_DOM_parser}; |
746
|
93
|
|
|
|
|
186
|
$a->elementify; |
747
|
|
|
|
|
|
|
return # nothing; |
748
|
93
|
|
|
|
|
176
|
} |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
sub open { |
751
|
116
|
|
|
116
|
1
|
1988
|
(my $self = shift)->detach_content; |
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
# We have to use push_content instead of simply putting it there |
754
|
|
|
|
|
|
|
# ourselves, because push_content takes care of weakening the |
755
|
|
|
|
|
|
|
# parent (and that code doesn’t belong in this package). |
756
|
|
|
|
|
|
|
$self->push_content( |
757
|
116
|
|
|
|
|
1265
|
my $tb = $$self{_HTML_DOM_parser} = new HTML::DOM::Element::HTML |
758
|
|
|
|
|
|
|
); |
759
|
|
|
|
|
|
|
|
760
|
116
|
|
|
|
|
2295
|
delete @$self{<_HTML_DOM_sheets _HTML_DOM_doctype>}; |
761
|
|
|
|
|
|
|
|
762
|
116
|
100
|
|
|
|
269
|
return unless $self->{_HTML_DOM_elem_handlers}; |
763
|
14
|
|
|
|
|
18
|
for(keys %{$self->{_HTML_DOM_elem_handlers}}) { |
|
14
|
|
|
|
|
15
|
|
764
|
|
|
|
|
|
|
$$tb{"_tweak_$_"} = |
765
|
14
|
|
|
|
|
19
|
$self->{_HTML_DOM_elem_handlers}{$_} |
766
|
|
|
|
|
|
|
} |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
return # nothing; |
769
|
14
|
|
|
|
|
25
|
} |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
=back |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
=head2 Other DOM Methods |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
=over 4 |
776
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
=cut |
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
#-------------- DOM STUFF (CORE) ---------------- # |
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
=item doctype |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
Returns nothing |
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
=item implementation |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
Returns the L object. |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
=item documentElement |
791
|
|
|
|
|
|
|
|
792
|
|
|
|
|
|
|
Returns the element. |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
=item createElement ( $tag ) |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
=item createDocumentFragment |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
=item createTextNode ( $text ) |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
=item createComment ( $text ) |
801
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
=item createAttribute ( $name ) |
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
Each of these creates a node of the appropriate type. |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
=item createProcessingInstruction |
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
=item createEntityReference |
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
These two throw an exception. |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
=for comment |
813
|
|
|
|
|
|
|
=item createCSSStyleSheet |
814
|
|
|
|
|
|
|
This creates a style sheet (L object). |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
=item getElementsByTagName ( $name ) |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
C<$name> can be the name of the tag, or '*', to match all tag names. This |
819
|
|
|
|
|
|
|
returns a node list object in scalar context, or a list in list context. |
820
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
=item importNode ( $node, $deep ) |
822
|
|
|
|
|
|
|
|
823
|
|
|
|
|
|
|
Clones the C<$node>, setting its C attribute to the document |
824
|
|
|
|
|
|
|
with which this method is called. If C<$deep> is true, the C<$node> will |
825
|
|
|
|
|
|
|
be |
826
|
|
|
|
|
|
|
cloned recursively. |
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
=cut |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
2
|
1
|
|
sub doctype {} # always null |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
sub implementation { |
833
|
24
|
|
|
24
|
|
101
|
no warnings 'once'; |
|
24
|
|
|
|
|
28
|
|
|
24
|
|
|
|
|
14675
|
|
834
|
2
|
|
|
2
|
1
|
4
|
return $HTML::DOM::Implementation::it; |
835
|
|
|
|
|
|
|
} |
836
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
sub documentElement { |
838
|
103
|
|
|
103
|
1
|
517
|
($_[0]->content_list)[0] |
839
|
|
|
|
|
|
|
} |
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
sub createElement { |
842
|
354
|
|
|
354
|
1
|
59433
|
my $elem = HTML::DOM::Element->new($_[1]); |
843
|
354
|
|
|
|
|
1237
|
$elem->_set_ownerDocument(shift); |
844
|
354
|
|
|
|
|
884
|
$elem; |
845
|
|
|
|
|
|
|
} |
846
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
sub createDocumentFragment { |
848
|
9
|
|
|
9
|
1
|
1236
|
my $thing = HTML::DOM::DocumentFragment->new; |
849
|
9
|
|
|
|
|
25
|
$thing->_set_ownerDocument(shift); |
850
|
9
|
|
|
|
|
16
|
$thing; |
851
|
|
|
|
|
|
|
} |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
sub createTextNode { |
854
|
606
|
|
|
606
|
1
|
2543
|
my $thing = HTML::DOM::Text->new(@_[1..$#_]); |
855
|
606
|
|
|
|
|
1220
|
$thing->_set_ownerDocument(shift); |
856
|
606
|
|
|
|
|
1433
|
$thing; |
857
|
|
|
|
|
|
|
} |
858
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
sub createComment { |
860
|
7
|
|
|
7
|
1
|
309
|
my $thing = HTML::DOM::Comment->new(@_[1..$#_]); |
861
|
7
|
|
|
|
|
24
|
$thing->_set_ownerDocument(shift); |
862
|
7
|
|
|
|
|
21
|
$thing; |
863
|
|
|
|
|
|
|
} |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
sub createCDATASection { |
866
|
1
|
|
|
1
|
0
|
261
|
die HTML::DOM::Exception->new( NOT_SUPPORTED_ERR, |
867
|
|
|
|
|
|
|
'The HTML DOM does not support CDATA sections' ); |
868
|
|
|
|
|
|
|
} |
869
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
sub createProcessingInstruction { |
871
|
1
|
|
|
1
|
1
|
189
|
die HTML::DOM::Exception->new( NOT_SUPPORTED_ERR, |
872
|
|
|
|
|
|
|
'The HTML DOM does not support processing instructions' ); |
873
|
|
|
|
|
|
|
} |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
sub createAttribute { |
876
|
22
|
|
|
22
|
1
|
1697
|
my $thing = HTML::DOM::Attr->new(@_[1..$#_]); |
877
|
22
|
|
|
|
|
62
|
$thing->_set_ownerDocument(shift); |
878
|
22
|
|
|
|
|
49
|
$thing; |
879
|
|
|
|
|
|
|
} |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
sub createEntityReference { |
882
|
1
|
|
|
1
|
1
|
248
|
die HTML::DOM::Exception->new( NOT_SUPPORTED_ERR, |
883
|
|
|
|
|
|
|
'The HTML DOM does not support entity references' ); |
884
|
|
|
|
|
|
|
} |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
#sub createCSSStyleSheet { |
887
|
|
|
|
|
|
|
# shift; |
888
|
|
|
|
|
|
|
# require CSS'DOM; |
889
|
|
|
|
|
|
|
# ~~~ |
890
|
|
|
|
|
|
|
#} |
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
sub getElementsByTagName { |
893
|
17
|
|
|
17
|
1
|
374
|
my($self,$tagname) = @_; |
894
|
|
|
|
|
|
|
#warn "You didn't give me a tag name." if !defined $tagname; |
895
|
17
|
100
|
|
|
|
38
|
if (wantarray) { |
896
|
4
|
100
|
|
|
|
24
|
return $tagname eq '*' |
897
|
|
|
|
|
|
|
? grep tag $_ !~ /^~/, $self->descendants |
898
|
|
|
|
|
|
|
: $self->find($tagname); |
899
|
|
|
|
|
|
|
} |
900
|
|
|
|
|
|
|
else { |
901
|
|
|
|
|
|
|
my $list = HTML::DOM::NodeList::Magic->new( |
902
|
|
|
|
|
|
|
$tagname eq '*' |
903
|
2
|
|
|
2
|
|
6
|
? sub { grep tag $_ !~ /^~/, $self->descendants } |
904
|
17
|
|
|
17
|
|
60
|
: sub { $self->find($tagname) } |
905
|
13
|
100
|
|
|
|
123
|
); |
906
|
13
|
|
|
|
|
33
|
$self-> _register_magic_node_list($list); |
907
|
13
|
|
|
|
|
122
|
$list; |
908
|
|
|
|
|
|
|
} |
909
|
|
|
|
|
|
|
} |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
sub importNode { |
912
|
12
|
|
|
12
|
1
|
214
|
my ($self, $node, $deep) = @_; |
913
|
12
|
100
|
|
|
|
39
|
die HTML::DOM::Exception->new( NOT_SUPPORTED_ERR, |
914
|
|
|
|
|
|
|
'Documents cannot be imported.' ) |
915
|
|
|
|
|
|
|
if $node->nodeType ==DOCUMENT_NODE; |
916
|
10
|
|
|
|
|
34
|
(my $clown = $node->cloneNode($deep)) |
917
|
|
|
|
|
|
|
->_set_ownerDocument($self); |
918
|
10
|
100
|
|
|
|
46
|
if($clown->can('descendants')) { # otherwise it’s an Attr, so this |
919
|
8
|
|
|
|
|
16
|
for($clown->descendants) { # isn’t necessary |
920
|
4
|
|
|
|
|
4
|
delete $_->{_HTML_DOM_owner}; |
921
|
|
|
|
|
|
|
}} |
922
|
10
|
|
|
|
|
18
|
$clown; |
923
|
|
|
|
|
|
|
} |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
#-------------- DOM STUFF (HTML) ---------------- # |
926
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
=item alinkColor |
928
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
=item background |
930
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
=item bgColor |
932
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
=item fgColor |
934
|
|
|
|
|
|
|
|
935
|
|
|
|
|
|
|
=item linkColor |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
=item vlinkColor |
938
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
These six methods return (optionally set) the corresponding attributes of |
940
|
|
|
|
|
|
|
the body element. Note that most of the names do not map directly to the |
941
|
|
|
|
|
|
|
names of |
942
|
|
|
|
|
|
|
the attributes. C refers to the C attribute. Those that |
943
|
|
|
|
|
|
|
end |
944
|
|
|
|
|
|
|
with 'linkColor' refer to the attributes of the same name but without the |
945
|
|
|
|
|
|
|
'Color' on the end. |
946
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
=cut |
948
|
|
|
|
|
|
|
|
949
|
6
|
|
100
|
6
|
1
|
10
|
sub alinkColor { (shift->body||return "")->aLink (@_) } |
950
|
6
|
|
100
|
6
|
1
|
731
|
sub background { (shift->body||return "")->background(@_) } |
951
|
6
|
|
100
|
6
|
1
|
712
|
sub bgColor { (shift->body||return "")->bgColor (@_) } |
952
|
6
|
|
100
|
6
|
1
|
766
|
sub fgColor { (shift->body||return "")->text (@_) } |
953
|
6
|
|
100
|
6
|
1
|
772
|
sub linkColor { (shift->body||return "")->link (@_) } |
954
|
6
|
|
100
|
6
|
1
|
766
|
sub vlinkColor { (shift->body||return "")->vLink (@_) } |
955
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
=item title |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
Returns (or optionally sets) the title of the page. |
959
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
=item referrer |
961
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
Returns the page's referrer. |
963
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
=item domain |
965
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
Returns the domain name portion of the document's URL. |
967
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
=item URL |
969
|
|
|
|
|
|
|
|
970
|
|
|
|
|
|
|
Returns the document's URL. |
971
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
=item body |
973
|
|
|
|
|
|
|
|
974
|
|
|
|
|
|
|
Returns the body element, or the outermost frame set if the document has |
975
|
|
|
|
|
|
|
frames. You can set the body by passing an element as an argument, in |
976
|
|
|
|
|
|
|
which |
977
|
|
|
|
|
|
|
case the old body element is returned. |
978
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
=item images |
980
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
=item applets |
982
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
=item links |
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
=item forms |
986
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
=item anchors |
988
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
These five methods each return a list of the appropriate elements in list |
990
|
|
|
|
|
|
|
context, or an L object in scalar context. In this |
991
|
|
|
|
|
|
|
latter case, the object will update automatically when the document is |
992
|
|
|
|
|
|
|
modified. |
993
|
|
|
|
|
|
|
|
994
|
|
|
|
|
|
|
In the case of C you can access those by using the HTML::DOM object |
995
|
|
|
|
|
|
|
itself as a hash. I.e., you can write C<< $doc->{f} >> instead of |
996
|
|
|
|
|
|
|
S<< C<< $doc->forms->{f} >> >>. |
997
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
=for comment |
999
|
|
|
|
|
|
|
# ~~~ Why on earth did I ever put this in the docs?! |
1000
|
|
|
|
|
|
|
B I need to make these methods cache the HTML collection objects |
1001
|
|
|
|
|
|
|
that they create. Once I've done this, I can make list context use those |
1002
|
|
|
|
|
|
|
objects, as well as scalar context. |
1003
|
|
|
|
|
|
|
|
1004
|
|
|
|
|
|
|
=item cookie |
1005
|
|
|
|
|
|
|
|
1006
|
|
|
|
|
|
|
This returns a string containing the document's cookies (the format may |
1007
|
|
|
|
|
|
|
still change). If you pass an |
1008
|
|
|
|
|
|
|
argument, it |
1009
|
|
|
|
|
|
|
will set a cookie as well. Both Netscape-style and RFC2965-style cookie |
1010
|
|
|
|
|
|
|
headers are supported. |
1011
|
|
|
|
|
|
|
|
1012
|
|
|
|
|
|
|
=cut |
1013
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
sub title { |
1015
|
12
|
|
|
12
|
1
|
290
|
my $doc = shift; |
1016
|
12
|
100
|
|
|
|
28
|
if(my $title_elem = $doc->find('title')) { |
1017
|
8
|
|
|
|
|
19
|
$title_elem->text(@_); |
1018
|
|
|
|
|
|
|
} |
1019
|
|
|
|
|
|
|
else { |
1020
|
4
|
100
|
|
|
|
10
|
return "" unless @_; |
1021
|
3
|
|
66
|
|
|
6
|
( $doc->find('head') |
1022
|
|
|
|
|
|
|
|| ( $doc->find('html') |
1023
|
|
|
|
|
|
|
|| $doc->appendChild($doc->createElement('html')) |
1024
|
|
|
|
|
|
|
)->appendChild($doc->createElement('head')) |
1025
|
|
|
|
|
|
|
)->appendChild( |
1026
|
|
|
|
|
|
|
my $t = $doc->createElement('title') |
1027
|
|
|
|
|
|
|
); |
1028
|
3
|
|
|
|
|
9
|
$t->text(@_); |
1029
|
3
|
|
|
|
|
9
|
return ""; |
1030
|
|
|
|
|
|
|
} |
1031
|
|
|
|
|
|
|
} |
1032
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
sub referrer { |
1034
|
5
|
|
|
5
|
1
|
740
|
my $referrer = shift->{_HTML_DOM_referrer}; |
1035
|
5
|
50
|
|
|
|
19
|
defined $referrer ? $referrer : (); |
1036
|
|
|
|
|
|
|
} |
1037
|
|
|
|
|
|
|
|
1038
|
24
|
|
|
24
|
|
103
|
sub domain { no strict; |
|
24
|
|
|
|
|
26
|
|
|
24
|
|
|
|
|
14253
|
|
1039
|
2
|
|
|
2
|
1
|
4
|
my $doc = shift; |
1040
|
2
|
|
|
|
|
1
|
host {ref $doc->{_HTML_DOM_url} ? $doc->{_HTML_DOM_url} |
1041
|
2
|
100
|
|
|
|
4
|
: ($doc->{_HTML_DOM_url} = URI->new($doc->{_HTML_DOM_url}))}; |
1042
|
|
|
|
|
|
|
} |
1043
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
sub URL { |
1045
|
151
|
|
|
151
|
1
|
1281
|
my $url = shift->{_HTML_DOM_url}; |
1046
|
151
|
100
|
|
|
|
529
|
defined $url ? "$url" : undef; |
1047
|
|
|
|
|
|
|
} |
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
sub body { # ~~~ this needs to return the outermost frameset element if |
1050
|
|
|
|
|
|
|
# there is one (if the frameset is always the second child |
1051
|
|
|
|
|
|
|
# of , then it already does). |
1052
|
78
|
|
|
78
|
1
|
1505
|
my $body = ($_[0]->documentElement->content_list)[1]; |
1053
|
78
|
100
|
66
|
|
|
276
|
if (!$body || $body->tag !~ /^(?:body|frameset)\z/) { |
1054
|
24
|
|
|
|
|
40
|
$body = $_[0]->find('body','frameset'); |
1055
|
|
|
|
|
|
|
} |
1056
|
78
|
100
|
|
|
|
128
|
if(@_>1) { |
1057
|
2
|
|
|
|
|
3
|
my $doc_elem = $_[0]->documentElement; |
1058
|
|
|
|
|
|
|
# I'm using the replaceChild rather than replace_with, |
1059
|
|
|
|
|
|
|
# despite the former's convoluted syntax, since the former |
1060
|
|
|
|
|
|
|
# has the appropriate error-checking code (or will), and |
1061
|
|
|
|
|
|
|
# also because it triggers mutation events. |
1062
|
2
|
|
|
|
|
8
|
$doc_elem->replaceChild($_[1],$body) |
1063
|
|
|
|
|
|
|
} |
1064
|
|
|
|
|
|
|
else { |
1065
|
76
|
|
|
|
|
316
|
$body |
1066
|
|
|
|
|
|
|
} |
1067
|
|
|
|
|
|
|
} |
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
sub images { |
1070
|
2
|
|
|
2
|
1
|
6
|
my $self = shift; |
1071
|
2
|
100
|
|
|
|
5
|
if (wantarray) { |
1072
|
1
|
|
|
|
|
4
|
return grep tag $_ eq 'img', $self->descendants; |
1073
|
|
|
|
|
|
|
} |
1074
|
|
|
|
|
|
|
else { |
1075
|
|
|
|
|
|
|
my $collection = HTML::DOM::Collection->new( |
1076
|
|
|
|
|
|
|
my $list = HTML::DOM::NodeList::Magic->new( |
1077
|
1
|
|
|
1
|
|
6
|
sub { grep tag $_ eq 'img', $self->descendants } |
1078
|
1
|
|
|
|
|
9
|
)); |
1079
|
1
|
|
|
|
|
3
|
$self-> _register_magic_node_list($list); |
1080
|
1
|
|
|
|
|
20
|
$collection; |
1081
|
|
|
|
|
|
|
} |
1082
|
|
|
|
|
|
|
} |
1083
|
|
|
|
|
|
|
|
1084
|
|
|
|
|
|
|
sub applets { |
1085
|
2
|
|
|
2
|
1
|
3
|
my $self = shift; |
1086
|
2
|
100
|
|
|
|
5
|
if (wantarray) { |
1087
|
1
|
|
|
|
|
3
|
return grep $_->tag =~ /^(?:objec|apple)t\z/, |
1088
|
|
|
|
|
|
|
$self->descendants; |
1089
|
|
|
|
|
|
|
} |
1090
|
|
|
|
|
|
|
else { |
1091
|
|
|
|
|
|
|
my $collection = HTML::DOM::Collection->new( |
1092
|
|
|
|
|
|
|
my $list = HTML::DOM::NodeList::Magic->new( |
1093
|
1
|
|
|
1
|
|
3
|
sub { grep $_->tag =~ /^(?:objec|apple)t\z/, |
1094
|
|
|
|
|
|
|
$self->descendants } |
1095
|
1
|
|
|
|
|
10
|
)); |
1096
|
1
|
|
|
|
|
2
|
$self-> _register_magic_node_list($list); |
1097
|
1
|
|
|
|
|
3
|
$collection; |
1098
|
|
|
|
|
|
|
} |
1099
|
|
|
|
|
|
|
} |
1100
|
|
|
|
|
|
|
|
1101
|
|
|
|
|
|
|
sub links { |
1102
|
4
|
|
|
4
|
1
|
351
|
my $self = shift; |
1103
|
4
|
100
|
|
|
|
10
|
if (wantarray) { |
1104
|
|
|
|
|
|
|
return grep { |
1105
|
1
|
|
|
|
|
3
|
my $tag = tag $_; |
|
38
|
|
|
|
|
50
|
|
1106
|
38
|
100
|
100
|
|
|
98
|
$tag eq 'area' || $tag eq 'a' |
1107
|
|
|
|
|
|
|
&& defined $_->attr('href') |
1108
|
|
|
|
|
|
|
} $self->descendants; |
1109
|
|
|
|
|
|
|
} |
1110
|
|
|
|
|
|
|
else { |
1111
|
|
|
|
|
|
|
my $collection = HTML::DOM::Collection->new( |
1112
|
|
|
|
|
|
|
my $list = HTML::DOM::NodeList::Magic->new( |
1113
|
|
|
|
|
|
|
sub { grep { |
1114
|
3
|
|
|
3
|
|
17
|
my $tag = tag $_; |
|
90
|
|
|
|
|
139
|
|
1115
|
90
|
100
|
100
|
|
|
228
|
$tag eq 'area' || $tag eq 'a' |
1116
|
|
|
|
|
|
|
&& defined $_->attr('href') |
1117
|
|
|
|
|
|
|
} $self->descendants } |
1118
|
3
|
|
|
|
|
24
|
)); |
1119
|
3
|
|
|
|
|
8
|
$self-> _register_magic_node_list($list); |
1120
|
3
|
|
|
|
|
44
|
$collection; |
1121
|
|
|
|
|
|
|
} |
1122
|
|
|
|
|
|
|
} |
1123
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
sub forms { |
1125
|
39
|
|
|
39
|
1
|
484
|
my $self = shift; |
1126
|
39
|
100
|
|
|
|
66
|
if (wantarray) { |
1127
|
17
|
|
|
|
|
46
|
return grep tag $_ eq 'form', $self->descendants; |
1128
|
|
|
|
|
|
|
} |
1129
|
|
|
|
|
|
|
else { |
1130
|
|
|
|
|
|
|
my $collection = HTML::DOM::Collection->new( |
1131
|
|
|
|
|
|
|
my $list = HTML::DOM::NodeList::Magic->new( |
1132
|
22
|
|
|
22
|
|
71
|
sub { grep tag $_ eq 'form', $self->descendants } |
1133
|
22
|
|
|
|
|
151
|
)); |
1134
|
22
|
|
|
|
|
45
|
$self-> _register_magic_node_list($list); |
1135
|
22
|
|
|
|
|
203
|
$collection; |
1136
|
|
|
|
|
|
|
} |
1137
|
|
|
|
|
|
|
} |
1138
|
|
|
|
|
|
|
|
1139
|
|
|
|
|
|
|
sub anchors { |
1140
|
2
|
|
|
2
|
1
|
2
|
my $self = shift; |
1141
|
2
|
100
|
|
|
|
5
|
if (wantarray) { |
1142
|
1
|
|
100
|
|
|
3
|
return grep tag $_ eq 'a' && defined $_->attr('name'), |
1143
|
|
|
|
|
|
|
$self->descendants; |
1144
|
|
|
|
|
|
|
} |
1145
|
|
|
|
|
|
|
else { |
1146
|
|
|
|
|
|
|
my $collection = HTML::DOM::Collection->new( |
1147
|
|
|
|
|
|
|
my $list = HTML::DOM::NodeList::Magic->new( |
1148
|
1
|
|
100
|
1
|
|
3
|
sub { grep tag $_ eq 'a' && defined $_->attr('name'), |
1149
|
|
|
|
|
|
|
$self->descendants } |
1150
|
1
|
|
|
|
|
34
|
)); |
1151
|
1
|
|
|
|
|
2
|
$self-> _register_magic_node_list($list); |
1152
|
1
|
|
|
|
|
3
|
$collection; |
1153
|
|
|
|
|
|
|
} |
1154
|
|
|
|
|
|
|
} |
1155
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
|
1157
|
|
|
|
|
|
|
sub cookie { |
1158
|
8
|
|
|
8
|
1
|
445
|
my $self = shift; |
1159
|
8
|
100
|
|
|
|
12
|
return '' unless defined (my $jar = $self->{_HTML_DOM_jar}); |
1160
|
6
|
|
|
|
|
7
|
my $return; |
1161
|
6
|
50
|
|
|
|
11
|
if (defined wantarray) { |
1162
|
|
|
|
|
|
|
# Yes, this is nuts (getting HTTP::Cookies to join the cookies, and |
1163
|
|
|
|
|
|
|
# splitting them, filtering them, and joining them again[!]), but |
1164
|
|
|
|
|
|
|
# &HTTP::Cookies::add_cookie_header is long and complicated, and I |
1165
|
|
|
|
|
|
|
# don't want to replicate it here. |
1166
|
24
|
|
|
24
|
|
100
|
no warnings 'uninitialized'; |
|
24
|
|
|
|
|
25
|
|
|
24
|
|
|
|
|
7547
|
|
1167
|
6
|
|
|
|
|
7
|
my $reqclone = $self->{_HTML_DOM_response}->request->clone; |
1168
|
|
|
|
|
|
|
# Yes this is a bit strange, but we don’t want to put |
1169
|
|
|
|
|
|
|
# ‘use HTTP::Header 1.59’ in this file, as it would mean loading the |
1170
|
|
|
|
|
|
|
# module even for people who are not using this feature or who are |
1171
|
|
|
|
|
|
|
# duck-typing. |
1172
|
6
|
50
|
66
|
|
|
587
|
if (!$reqclone->can('header_field_names') |
1173
|
0
|
|
|
|
|
0
|
&& $reqclone->isa("HTTP::Headers")) { VERSION HTTP::Headers:: 1.59 } |
1174
|
6
|
|
|
|
|
13
|
for($reqclone->header_field_names) { |
1175
|
2
|
50
|
|
|
|
80
|
/cookie/i and remove_header $reqclone $_; |
1176
|
|
|
|
|
|
|
} |
1177
|
6
|
|
|
|
|
91
|
$return = join ';', grep !/\$/, |
1178
|
|
|
|
|
|
|
$jar->add_cookie_header( |
1179
|
|
|
|
|
|
|
$reqclone |
1180
|
|
|
|
|
|
|
)-> header ('Cookie') |
1181
|
|
|
|
|
|
|
# Pieces of this regexp were stolen from HTTP::Headers::Util: |
1182
|
|
|
|
|
|
|
=~ /\G\s* # initial whitespace |
1183
|
|
|
|
|
|
|
( |
1184
|
|
|
|
|
|
|
[^\s=;,]+ # name |
1185
|
|
|
|
|
|
|
\s*=\s* # = |
1186
|
|
|
|
|
|
|
(?: |
1187
|
|
|
|
|
|
|
\"(?:[^\"\\]*(?:\\.[^\"\\]*)*)\" # quoted value |
1188
|
|
|
|
|
|
|
| |
1189
|
|
|
|
|
|
|
[^;,\s]* # unquoted value |
1190
|
|
|
|
|
|
|
) |
1191
|
|
|
|
|
|
|
) |
1192
|
|
|
|
|
|
|
\s*;? |
1193
|
|
|
|
|
|
|
/xg; |
1194
|
|
|
|
|
|
|
} |
1195
|
6
|
100
|
|
|
|
1409
|
if (@_) { |
1196
|
3
|
50
|
|
|
|
7
|
return unless defined $self->{_HTML_DOM_response}; |
1197
|
3
|
|
|
|
|
13
|
require HTTP::Headers::Util; |
1198
|
|
|
|
|
|
|
(undef,undef, my%split) = |
1199
|
3
|
|
|
|
|
3
|
@{(HTTP::Headers::Util::split_header_words($_[0]))[0]}; |
|
3
|
|
|
|
|
7
|
|
1200
|
3
|
|
|
|
|
224
|
my $rfc; |
1201
|
3
|
|
|
|
|
6
|
for(keys %split){ |
1202
|
|
|
|
|
|
|
# I *hope* this always works! (NS cookies should have no version.) |
1203
|
6
|
100
|
|
|
|
14
|
++ $rfc, last if lc $_ eq 'version'; |
1204
|
|
|
|
|
|
|
} |
1205
|
|
|
|
|
|
|
(my $clone = $self->{_HTML_DOM_response}->clone) |
1206
|
3
|
|
|
|
|
6
|
->remove_header(qw/ Set-Cookie Set-Cookie2 /); |
1207
|
3
|
|
|
|
|
556
|
$clone->header('Set-Cookie' . 2 x!! $rfc => $_[0]); |
1208
|
3
|
|
|
|
|
133
|
$jar->extract_cookies($clone); |
1209
|
|
|
|
|
|
|
} |
1210
|
6
|
100
|
|
|
|
894
|
$return||''; |
1211
|
|
|
|
|
|
|
} |
1212
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
=item getElementById |
1214
|
|
|
|
|
|
|
|
1215
|
|
|
|
|
|
|
=item getElementsByName |
1216
|
|
|
|
|
|
|
|
1217
|
|
|
|
|
|
|
=item getElementsByClassName |
1218
|
|
|
|
|
|
|
|
1219
|
|
|
|
|
|
|
These three do what their names imply. The last two |
1220
|
|
|
|
|
|
|
will return a list in list context, or a node list |
1221
|
|
|
|
|
|
|
object in scalar context. Calling them in list |
1222
|
|
|
|
|
|
|
context is probably more efficient. |
1223
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
=cut |
1225
|
|
|
|
|
|
|
|
1226
|
|
|
|
|
|
|
sub getElementById { |
1227
|
163
|
|
|
163
|
1
|
5682
|
my(@pile) = grep ref($_), @{shift->{'_content'}}; |
|
163
|
|
|
|
|
239
|
|
1228
|
163
|
|
|
|
|
177
|
my $id = shift; |
1229
|
163
|
|
|
|
|
111
|
my $this; |
1230
|
163
|
|
|
|
|
260
|
while(@pile) { |
1231
|
24
|
|
|
24
|
|
104
|
no warnings 'uninitialized'; |
|
24
|
|
|
|
|
29
|
|
|
24
|
|
|
|
|
10978
|
|
1232
|
2501
|
|
|
|
|
1733
|
$this = shift @pile; |
1233
|
2501
|
100
|
|
|
|
3269
|
$this->id eq $id and return $this; |
1234
|
2339
|
|
|
|
|
3119
|
unshift @pile, grep ref($_), $this->content_list; |
1235
|
|
|
|
|
|
|
} |
1236
|
1
|
|
|
|
|
3
|
return; |
1237
|
|
|
|
|
|
|
} |
1238
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
sub getElementsByName { |
1240
|
3
|
|
|
3
|
1
|
9
|
my($self,$name) = @_; |
1241
|
3
|
100
|
|
|
|
7
|
if (wantarray) { |
1242
|
2
|
|
|
|
|
25
|
return $self->look_down(name => "$name"); |
1243
|
|
|
|
|
|
|
} |
1244
|
|
|
|
|
|
|
else { |
1245
|
|
|
|
|
|
|
my $list = HTML::DOM::NodeList::Magic->new( |
1246
|
1
|
|
|
1
|
|
6
|
sub { $self->look_down(name => "$name"); } |
1247
|
1
|
|
|
|
|
5
|
); |
1248
|
1
|
|
|
|
|
2
|
$self-> _register_magic_node_list($list); |
1249
|
1
|
|
|
|
|
3
|
$list; |
1250
|
|
|
|
|
|
|
} |
1251
|
|
|
|
|
|
|
} |
1252
|
|
|
|
|
|
|
|
1253
|
|
|
|
|
|
|
sub getElementsByClassName { |
1254
|
9
|
|
|
9
|
1
|
398
|
splice @_, 2, @_, 1; # Remove extra elements; add a true third elem |
1255
|
9
|
|
|
|
|
25
|
goto &HTML'DOM'Element'_getElementsByClassName; |
1256
|
|
|
|
|
|
|
} |
1257
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
# ---------- DocumentEvent interface -------------- # |
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
=item createEvent ( $category ) |
1261
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
Creates a new event object, believe it or not. |
1263
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
The C<$category> is the DOM event category, which determines what type of |
1265
|
|
|
|
|
|
|
event object will be returned. The currently supported event categories |
1266
|
|
|
|
|
|
|
are MouseEvents, UIEvents, HTMLEvents and MutationEvents. |
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
You can omit the C<$category> to create an instance of the event base class |
1269
|
|
|
|
|
|
|
(not officially part of the DOM). |
1270
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
=cut |
1272
|
|
|
|
|
|
|
|
1273
|
|
|
|
|
|
|
sub createEvent { |
1274
|
267
|
|
|
267
|
1
|
2310
|
require HTML'DOM'Event; |
1275
|
267
|
|
100
|
|
|
761
|
HTML'DOM'Event'create_event($_[1]||''); |
1276
|
|
|
|
|
|
|
} |
1277
|
|
|
|
|
|
|
|
1278
|
|
|
|
|
|
|
# ---------- DocumentView interface -------------- # |
1279
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
=item defaultView |
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
Returns the L object associated with the document. |
1283
|
|
|
|
|
|
|
|
1284
|
|
|
|
|
|
|
There is no such object by default; you have to put one there yourself: |
1285
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
Although it is supposed to be read-only according to the DOM, you can set |
1287
|
|
|
|
|
|
|
this attribute by passing an argument to it. It I still marked as |
1288
|
|
|
|
|
|
|
read-only in |
1289
|
|
|
|
|
|
|
L|HTML::DOM::Interface>. |
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
If you do set it, it is recommended that the object be a subclass of |
1292
|
|
|
|
|
|
|
L. |
1293
|
|
|
|
|
|
|
|
1294
|
|
|
|
|
|
|
This attribute holds a weak reference to the object. |
1295
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
=cut |
1297
|
|
|
|
|
|
|
|
1298
|
|
|
|
|
|
|
sub defaultView { |
1299
|
1886
|
|
|
1886
|
1
|
1401
|
my $self = shift; |
1300
|
1886
|
|
|
|
|
2126
|
my $old = $self->{_HTML_DOM_view}; |
1301
|
1886
|
100
|
|
|
|
3075
|
if(@_) { |
1302
|
4
|
|
|
|
|
5
|
weaken($self->{_HTML_DOM_view} = shift); |
1303
|
|
|
|
|
|
|
} |
1304
|
1886
|
100
|
|
|
|
5551
|
return defined $old ? $old : (); |
1305
|
|
|
|
|
|
|
} |
1306
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
# ---------- DocumentStyle interface -------------- # |
1308
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
=item styleSheets |
1310
|
|
|
|
|
|
|
|
1311
|
|
|
|
|
|
|
Returns a L of the document's style sheets, or a |
1312
|
|
|
|
|
|
|
simple list in list context. |
1313
|
|
|
|
|
|
|
|
1314
|
|
|
|
|
|
|
=cut |
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
sub styleSheets { |
1317
|
25
|
|
|
25
|
1
|
282
|
my $doc = shift; |
1318
|
|
|
|
|
|
|
my $ret = ( |
1319
|
|
|
|
|
|
|
$doc->{_HTML_DOM_sheets} or |
1320
|
|
|
|
|
|
|
$doc->{_HTML_DOM_sheets} = ( |
1321
|
|
|
|
|
|
|
require CSS::DOM::StyleSheetList, |
1322
|
|
|
|
|
|
|
new CSS::DOM::StyleSheetList |
1323
|
|
|
|
|
|
|
), |
1324
|
|
|
|
|
|
|
$doc->_populate_sheet_list, |
1325
|
|
|
|
|
|
|
$doc->{_HTML_DOM_sheets} |
1326
|
25
|
|
66
|
|
|
44
|
); |
1327
|
25
|
100
|
|
|
|
88
|
wantarray ? @$ret : $ret; |
1328
|
|
|
|
|
|
|
} |
1329
|
|
|
|
|
|
|
|
1330
|
|
|
|
|
|
|
=item innerHTML |
1331
|
|
|
|
|
|
|
|
1332
|
|
|
|
|
|
|
Serialises and returns the HTML document. If you pass an argument, it will |
1333
|
|
|
|
|
|
|
set the contents of the document via C, C and C, |
1334
|
|
|
|
|
|
|
returning a serialisation of the old contents. |
1335
|
|
|
|
|
|
|
|
1336
|
|
|
|
|
|
|
=cut |
1337
|
|
|
|
|
|
|
|
1338
|
|
|
|
|
|
|
sub innerHTML { |
1339
|
25
|
|
|
25
|
1
|
5910
|
my $self = shift; |
1340
|
25
|
|
|
|
|
22
|
my $old; |
1341
|
25
|
50
|
100
|
|
|
67
|
$old = join '' , $self->{_HTML_DOM_doctype}||'', |
|
|
100
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
map |
1343
|
|
|
|
|
|
|
HTML'DOM'Element'_html_element_adds_newline |
1344
|
|
|
|
|
|
|
? substr(( |
1345
|
|
|
|
|
|
|
as_HTML $_ (undef)x2,{} |
1346
|
|
|
|
|
|
|
), 0, -1) |
1347
|
|
|
|
|
|
|
: $_->as_HTML((undef)x2,{}), |
1348
|
|
|
|
|
|
|
$self->content_list |
1349
|
|
|
|
|
|
|
if defined wantarray; |
1350
|
25
|
100
|
|
|
|
59
|
if(@_){ |
1351
|
13
|
|
|
|
|
28
|
$self->open(); |
1352
|
13
|
|
|
|
|
34
|
$self->write(shift); |
1353
|
13
|
|
|
|
|
27
|
$self->close(); |
1354
|
|
|
|
|
|
|
} |
1355
|
|
|
|
|
|
|
$old |
1356
|
25
|
|
|
|
|
86
|
} |
1357
|
|
|
|
|
|
|
|
1358
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
=item location |
1360
|
|
|
|
|
|
|
|
1361
|
|
|
|
|
|
|
=item set_location_object (non-DOM) |
1362
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
C returns the location object, if you've put one there with |
1364
|
|
|
|
|
|
|
C. HTML::DOM doesn't actually implement such an object |
1365
|
|
|
|
|
|
|
itself, but provides the appropriate magic to make |
1366
|
|
|
|
|
|
|
C<< $doc->location($foo) >> translate into |
1367
|
|
|
|
|
|
|
C<< $doc->location->href($foo) >>. |
1368
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
BTW, the location object had better be true when used as a boolean, or |
1370
|
|
|
|
|
|
|
HTML::DOM will think it doesn't exist. |
1371
|
|
|
|
|
|
|
|
1372
|
|
|
|
|
|
|
=cut |
1373
|
|
|
|
|
|
|
|
1374
|
|
|
|
|
|
|
sub location { |
1375
|
3
|
|
|
3
|
1
|
7
|
my $self = shift; |
1376
|
3
|
100
|
50
|
|
|
8
|
@_ and ($$self{_HTML_DOM_loc}||die "Can't assign to location" |
1377
|
|
|
|
|
|
|
." without a location object")->href(@_); |
1378
|
|
|
|
|
|
|
$$self{_HTML_DOM_loc}||() |
1379
|
3
|
100
|
|
|
|
5
|
} |
1380
|
|
|
|
|
|
|
|
1381
|
|
|
|
|
|
|
sub set_location_object { |
1382
|
1
|
|
|
1
|
1
|
3
|
$_[0]{_HTML_DOM_loc} = $_[1]; |
1383
|
|
|
|
|
|
|
} |
1384
|
|
|
|
|
|
|
|
1385
|
|
|
|
|
|
|
|
1386
|
|
|
|
|
|
|
=item lastModified |
1387
|
|
|
|
|
|
|
|
1388
|
|
|
|
|
|
|
This method returns the document's modification date as gleaned from the |
1389
|
|
|
|
|
|
|
response object passed to the constructor, in MM/DD/YYYY HH:MM:SS format. |
1390
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
If there is no modification date, an empty string is returned, but this |
1392
|
|
|
|
|
|
|
may change in the future. |
1393
|
|
|
|
|
|
|
|
1394
|
|
|
|
|
|
|
=begin comment |
1395
|
|
|
|
|
|
|
|
1396
|
|
|
|
|
|
|
When there is no modification date, the return value is different in every |
1397
|
|
|
|
|
|
|
browser. |
1398
|
|
|
|
|
|
|
NS 2-4 and Opera 9 have the epoch (in GMT format). |
1399
|
|
|
|
|
|
|
Firefox 3 has the time the page was loaded. |
1400
|
|
|
|
|
|
|
Safari 4 has an empty string (it uses GMT format when there is a mod time). |
1401
|
|
|
|
|
|
|
IE, 6-8 the only one to comply with HTML 5, has the current time; but HTML |
1402
|
|
|
|
|
|
|
5 is illogical, since it makes no sense for the modification time to keep |
1403
|
|
|
|
|
|
|
ticking away. |
1404
|
|
|
|
|
|
|
|
1405
|
|
|
|
|
|
|
I’ve opted to use the empty string for now, since we can’t *really* find |
1406
|
|
|
|
|
|
|
out the modification time--only what the server *says* it is. And if the |
1407
|
|
|
|
|
|
|
server doesn’t say, it’s no use pretending that it did say it. |
1408
|
|
|
|
|
|
|
|
1409
|
|
|
|
|
|
|
=end comment |
1410
|
|
|
|
|
|
|
|
1411
|
|
|
|
|
|
|
=cut |
1412
|
|
|
|
|
|
|
|
1413
|
|
|
|
|
|
|
sub lastModified { |
1414
|
4
|
100
|
100
|
4
|
1
|
517
|
my $time = ($_[0]{_HTML_DOM_response} || return '')->last_modified |
1415
|
|
|
|
|
|
|
or return ''; |
1416
|
2
|
|
|
|
|
553
|
require Date'Format; |
1417
|
2
|
|
|
|
|
3359
|
Date'Format'time2str("%d/%m/%Y %X", $time); |
1418
|
|
|
|
|
|
|
} |
1419
|
|
|
|
|
|
|
|
1420
|
|
|
|
|
|
|
|
1421
|
|
|
|
|
|
|
=back |
1422
|
|
|
|
|
|
|
|
1423
|
|
|
|
|
|
|
=cut |
1424
|
|
|
|
|
|
|
|
1425
|
|
|
|
|
|
|
|
1426
|
|
|
|
|
|
|
# ---------- OVERRIDDEN NODE & EVENT TARGET METHODS -------------- # |
1427
|
|
|
|
|
|
|
|
1428
|
|
|
|
44
|
1
|
|
sub ownerDocument {} # empty list |
1429
|
1
|
|
|
1
|
1
|
309
|
sub nodeName { '#document' } |
1430
|
24
|
|
|
24
|
|
101
|
{ no warnings 'once'; *nodeType = \& DOCUMENT_NODE; } |
|
24
|
|
|
|
|
24
|
|
|
24
|
|
|
|
|
1818
|
|
1431
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
=head2 Other (Non-DOM) Methods |
1433
|
|
|
|
|
|
|
|
1434
|
|
|
|
|
|
|
(See also L, below.) |
1435
|
|
|
|
|
|
|
|
1436
|
|
|
|
|
|
|
=over 4 |
1437
|
|
|
|
|
|
|
|
1438
|
|
|
|
|
|
|
=item $tree->base |
1439
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
Returns the base URL of the page; either from a tag or the |
1441
|
|
|
|
|
|
|
URL passed to C. |
1442
|
|
|
|
|
|
|
|
1443
|
|
|
|
|
|
|
=cut |
1444
|
|
|
|
|
|
|
|
1445
|
|
|
|
|
|
|
sub base { |
1446
|
157
|
|
|
157
|
1
|
154
|
my $doc = shift; |
1447
|
157
|
100
|
|
|
|
587
|
if( |
1448
|
|
|
|
|
|
|
my $base_elem = $doc->look_down(_tag => 'base', href => qr)(?:\))) |
1449
|
|
|
|
|
|
|
){ |
1450
|
10
|
|
|
|
|
23
|
return ''.$base_elem->attr('href'); |
1451
|
|
|
|
|
|
|
} |
1452
|
|
|
|
|
|
|
else { |
1453
|
24
|
|
|
24
|
|
85
|
no warnings 'uninitialized'; |
|
24
|
|
|
|
|
26
|
|
|
24
|
|
|
|
|
11319
|
|
1454
|
147
|
100
|
|
|
|
114
|
''.base{$$doc{_HTML_DOM_response}||return$doc->URL} |
|
147
|
|
|
|
|
165
|
|
1455
|
|
|
|
|
|
|
} |
1456
|
|
|
|
|
|
|
} |
1457
|
|
|
|
|
|
|
|
1458
|
|
|
|
|
|
|
=item $tree->magic_forms |
1459
|
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
|
This is mainly for internal use. This returns a boolean indicating whether |
1461
|
|
|
|
|
|
|
the parser needed to associate formies with a form that did not contain |
1462
|
|
|
|
|
|
|
them. This happens when a closing tag is missing and the form is |
1463
|
|
|
|
|
|
|
closed implicitly, but a formie is encountered later. |
1464
|
|
|
|
|
|
|
|
1465
|
|
|
|
|
|
|
=cut |
1466
|
|
|
|
|
|
|
|
1467
|
1580
|
50
|
|
1580
|
1
|
3295
|
sub magic_forms { @_ and ++$_[0]{_HTML_DOM_mg_f}; $_[0]{_HTML_DOM_mg_f} } |
|
1580
|
|
|
|
|
1951
|
|
1468
|
|
|
|
|
|
|
|
1469
|
|
|
|
|
|
|
=back |
1470
|
|
|
|
|
|
|
|
1471
|
|
|
|
|
|
|
=head1 HASH ACCESS |
1472
|
|
|
|
|
|
|
|
1473
|
|
|
|
|
|
|
You can use an HTML::DOM object as a hash ref to access it's form elements |
1474
|
|
|
|
|
|
|
by name. So C<< $doc->{yayaya} >> is short for |
1475
|
|
|
|
|
|
|
S<< C<< $doc->forms->{yayaya} >> >>. |
1476
|
|
|
|
|
|
|
|
1477
|
|
|
|
|
|
|
=head1 EVENT HANDLING |
1478
|
|
|
|
|
|
|
|
1479
|
|
|
|
|
|
|
HTML::DOM supports both the DOM Level 2 event model and the HTML 4 event |
1480
|
|
|
|
|
|
|
model. |
1481
|
|
|
|
|
|
|
|
1482
|
|
|
|
|
|
|
Throughout this documentation, we make use of HTML 5's distinction between |
1483
|
|
|
|
|
|
|
handlers and listeners: An event handler is the result of an HTML element |
1484
|
|
|
|
|
|
|
beginning with 'on', e.g. onsubmit. These are also accessible via the DOM. |
1485
|
|
|
|
|
|
|
(We also use the word 'handler' in other contexts, such as the 'default |
1486
|
|
|
|
|
|
|
event handler'.) |
1487
|
|
|
|
|
|
|
Event listeners are registered solely with the C method |
1488
|
|
|
|
|
|
|
and can be removed with C. |
1489
|
|
|
|
|
|
|
|
1490
|
|
|
|
|
|
|
HTML::DOM accepts as an event handler a coderef, an object with a |
1491
|
|
|
|
|
|
|
C method, or an object with C<&{}> overloading. If the |
1492
|
|
|
|
|
|
|
C method is present, it is called with the current event |
1493
|
|
|
|
|
|
|
target as the first argument and the event object as the second. |
1494
|
|
|
|
|
|
|
This is to allow for objects that wrap JavaScript functions (which must be called with the event target as the B value). |
1495
|
|
|
|
|
|
|
|
1496
|
|
|
|
|
|
|
An event listener is a coderef, an object with a C |
1497
|
|
|
|
|
|
|
method or an object with C<&{}> overloading. HTML::DOM does not implement |
1498
|
|
|
|
|
|
|
any classes that provide a C method, but will support any |
1499
|
|
|
|
|
|
|
object that has one. |
1500
|
|
|
|
|
|
|
|
1501
|
|
|
|
|
|
|
Listeners and handlers differ in one important aspect. A listener has to |
1502
|
|
|
|
|
|
|
call C on the event object to cancel the default action. A |
1503
|
|
|
|
|
|
|
handler simply returns a defined false value (except for mouseover events, |
1504
|
|
|
|
|
|
|
which must return a true value to cancel the default). |
1505
|
|
|
|
|
|
|
|
1506
|
|
|
|
|
|
|
=head2 Default Actions |
1507
|
|
|
|
|
|
|
|
1508
|
|
|
|
|
|
|
Default actions that HTML::DOM is capable of handling internally (such as |
1509
|
|
|
|
|
|
|
triggering a DOMActivate event when an element is clicked, and triggering a |
1510
|
|
|
|
|
|
|
form's submit event when the submit button is activated) are dealt with |
1511
|
|
|
|
|
|
|
automatically. You don't have to worry about those. For others, read |
1512
|
|
|
|
|
|
|
on.... |
1513
|
|
|
|
|
|
|
|
1514
|
|
|
|
|
|
|
To specify the default actions associated with an event, provide a |
1515
|
|
|
|
|
|
|
subroutine (in this case, it not being part of the DOM, you can't use an |
1516
|
|
|
|
|
|
|
object with a C method) via the C |
1517
|
|
|
|
|
|
|
and |
1518
|
|
|
|
|
|
|
C methods. |
1519
|
|
|
|
|
|
|
|
1520
|
|
|
|
|
|
|
With the former, you can specify the |
1521
|
|
|
|
|
|
|
default action to be taken when a particular type of event occurs. The |
1522
|
|
|
|
|
|
|
currently supported types are: |
1523
|
|
|
|
|
|
|
|
1524
|
|
|
|
|
|
|
submit when a form is submitted |
1525
|
|
|
|
|
|
|
link called when a link is activated (DOMActivate event) |
1526
|
|
|
|
|
|
|
|
1527
|
|
|
|
|
|
|
Pass the type of event as the first argument and a code ref as the second |
1528
|
|
|
|
|
|
|
argument. When the code ref is called, its sole argument will |
1529
|
|
|
|
|
|
|
be the event object. For instance: |
1530
|
|
|
|
|
|
|
|
1531
|
|
|
|
|
|
|
$dom_tree->default_event_handler_for( link => sub { |
1532
|
|
|
|
|
|
|
my $event = shift; |
1533
|
|
|
|
|
|
|
go_to( $event->target->href ); |
1534
|
|
|
|
|
|
|
}); |
1535
|
|
|
|
|
|
|
sub go_to { ... } |
1536
|
|
|
|
|
|
|
|
1537
|
|
|
|
|
|
|
C with just one argument returns the |
1538
|
|
|
|
|
|
|
currently |
1539
|
|
|
|
|
|
|
assigned coderef. With two arguments it returns the old one after |
1540
|
|
|
|
|
|
|
assigning the new one. |
1541
|
|
|
|
|
|
|
|
1542
|
|
|
|
|
|
|
Use C (without the C<_for>) to specify a fallback |
1543
|
|
|
|
|
|
|
subroutine that will be used for events not in the list above, and for |
1544
|
|
|
|
|
|
|
events in the list above that do not have subroutines assigned to them. |
1545
|
|
|
|
|
|
|
Without any arguments it will return the currently |
1546
|
|
|
|
|
|
|
assigned coderef. With an argument it will return the old one after |
1547
|
|
|
|
|
|
|
assigning the new one. |
1548
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
=head2 Dispatching Events |
1550
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
HTML::DOM::Node's C method triggers the appropriate event |
1552
|
|
|
|
|
|
|
listeners, but does B call any default actions associated with it. |
1553
|
|
|
|
|
|
|
The return value is a boolean that indicates whether the default action |
1554
|
|
|
|
|
|
|
should be taken. |
1555
|
|
|
|
|
|
|
|
1556
|
|
|
|
|
|
|
H:D:Node's C method will trigger the event for real. It will |
1557
|
|
|
|
|
|
|
call C and, provided it returns true, will call the default |
1558
|
|
|
|
|
|
|
event handler. |
1559
|
|
|
|
|
|
|
|
1560
|
|
|
|
|
|
|
=head2 HTML Event Attributes |
1561
|
|
|
|
|
|
|
|
1562
|
|
|
|
|
|
|
The C can be used to assign a coderef that will turn |
1563
|
|
|
|
|
|
|
text assigned to an event attribute (e.g., C) into an event |
1564
|
|
|
|
|
|
|
handler. The |
1565
|
|
|
|
|
|
|
arguments to the routine will be (0) the element, (1) the name (aka |
1566
|
|
|
|
|
|
|
type) of |
1567
|
|
|
|
|
|
|
the event (without the initial 'on'), (2) the value of the attribute and |
1568
|
|
|
|
|
|
|
(3) the offset within the source of the attribute's value. (Actually, if |
1569
|
|
|
|
|
|
|
the value is within quotes, it is the offset of the first quotation mark. |
1570
|
|
|
|
|
|
|
Also, it will be C for generated HTML [source code passed to the |
1571
|
|
|
|
|
|
|
C method by an element handler].) |
1572
|
|
|
|
|
|
|
As |
1573
|
|
|
|
|
|
|
with C, you |
1574
|
|
|
|
|
|
|
can replace an existing handler with a new one, in which case the old |
1575
|
|
|
|
|
|
|
handler is returned. If you call this method without arguments, it returns |
1576
|
|
|
|
|
|
|
the current handler. Here is an example of its use, that assumes that |
1577
|
|
|
|
|
|
|
handlers are Perl code: |
1578
|
|
|
|
|
|
|
|
1579
|
|
|
|
|
|
|
$dom_tree->event_attr_handler(sub { |
1580
|
|
|
|
|
|
|
my($elem, $name, $code, $offset) = @_; |
1581
|
|
|
|
|
|
|
my $sub = eval "sub { $code }"; |
1582
|
|
|
|
|
|
|
return sub { |
1583
|
|
|
|
|
|
|
local *_ = \$elem; |
1584
|
|
|
|
|
|
|
&$sub; |
1585
|
|
|
|
|
|
|
}; |
1586
|
|
|
|
|
|
|
}); |
1587
|
|
|
|
|
|
|
|
1588
|
|
|
|
|
|
|
The event attribute handler will be called whenever an element attribute |
1589
|
|
|
|
|
|
|
whose name |
1590
|
|
|
|
|
|
|
begins with 'on' (case-tolerant) is modified. (For efficiency's sake, I may |
1591
|
|
|
|
|
|
|
change it to call the event attribute handler only when the event is |
1592
|
|
|
|
|
|
|
triggered, so it is not called unnecessarily.) |
1593
|
|
|
|
|
|
|
|
1594
|
|
|
|
|
|
|
=head2 When an Event Handler Dies |
1595
|
|
|
|
|
|
|
|
1596
|
|
|
|
|
|
|
Use C to assign a coderef that will be called whenever an |
1597
|
|
|
|
|
|
|
event listener (or handler) raises an error. The error will be contained in |
1598
|
|
|
|
|
|
|
C<$@>. |
1599
|
|
|
|
|
|
|
|
1600
|
|
|
|
|
|
|
=head2 Other Event-Related Methods |
1601
|
|
|
|
|
|
|
|
1602
|
|
|
|
|
|
|
=over |
1603
|
|
|
|
|
|
|
|
1604
|
|
|
|
|
|
|
=item $tree->event_parent |
1605
|
|
|
|
|
|
|
|
1606
|
|
|
|
|
|
|
=item $tree->event_parent( $new_val ) |
1607
|
|
|
|
|
|
|
|
1608
|
|
|
|
|
|
|
This method lets you provide an object that is added to the top of the |
1609
|
|
|
|
|
|
|
event dispatch chain. E.g., if you want the view object (the value of |
1610
|
|
|
|
|
|
|
C, aka the window) to have event handlers called before the |
1611
|
|
|
|
|
|
|
document in the capture phase, and after it in the bubbling phase, you can |
1612
|
|
|
|
|
|
|
set it like this (see also L, above): |
1613
|
|
|
|
|
|
|
|
1614
|
|
|
|
|
|
|
$tree->event_parent( $tree->defaultView ); |
1615
|
|
|
|
|
|
|
|
1616
|
|
|
|
|
|
|
This holds a weak reference. |
1617
|
|
|
|
|
|
|
|
1618
|
|
|
|
|
|
|
=item $tree->event_listeners_enabled |
1619
|
|
|
|
|
|
|
|
1620
|
|
|
|
|
|
|
=item $tree->event_listeners_enabled( $new_val ) |
1621
|
|
|
|
|
|
|
|
1622
|
|
|
|
|
|
|
This attribute, which is true by default, can be used to disable event |
1623
|
|
|
|
|
|
|
handlers and listeners. (Default event handlers [see above] still run, |
1624
|
|
|
|
|
|
|
though.) |
1625
|
|
|
|
|
|
|
|
1626
|
|
|
|
|
|
|
=back |
1627
|
|
|
|
|
|
|
|
1628
|
|
|
|
|
|
|
=cut |
1629
|
|
|
|
|
|
|
|
1630
|
|
|
|
|
|
|
|
1631
|
|
|
|
|
|
|
# ---------- NON-DOM EVENT METHODS -------------- # |
1632
|
|
|
|
|
|
|
|
1633
|
|
|
|
|
|
|
sub event_attr_handler { |
1634
|
21
|
|
|
21
|
0
|
278
|
my $old = $_[0]->{_HTML_DOM_event_attr_handler}; |
1635
|
21
|
100
|
|
|
|
44
|
$_[0]->{_HTML_DOM_event_attr_handler} = $_[1] if @_ > 1; |
1636
|
21
|
|
|
|
|
66
|
$old; |
1637
|
|
|
|
|
|
|
} |
1638
|
|
|
|
|
|
|
sub default_event_handler { |
1639
|
1886
|
|
|
1886
|
0
|
1966
|
my $old = $_[0]->{_HTML_DOM_default_event_handler}; |
1640
|
1886
|
100
|
|
|
|
3286
|
$_[0]->{_HTML_DOM_default_event_handler} = $_[1] if @_ > 1; |
1641
|
1886
|
|
|
|
|
3126
|
$old; |
1642
|
|
|
|
|
|
|
} |
1643
|
|
|
|
|
|
|
sub default_event_handler_for { |
1644
|
222
|
|
|
222
|
0
|
293
|
my $old = $_[0]->{_HTML_DOM_dehf}{$_[1]}; |
1645
|
222
|
100
|
|
|
|
435
|
$_[0]->{_HTML_DOM_dehf}{$_[1]} = $_[2] if @_ > 2; |
1646
|
222
|
|
|
|
|
709
|
$old; |
1647
|
|
|
|
|
|
|
} |
1648
|
|
|
|
|
|
|
sub error_handler { |
1649
|
1895
|
|
|
1895
|
1
|
2214
|
my $old = $_[0]->{_HTML_DOM_error_handler}; |
1650
|
1895
|
100
|
|
|
|
3381
|
$_[0]->{_HTML_DOM_error_handler} = $_[1] if @_ > 1; |
1651
|
1895
|
|
|
|
|
4588
|
$old; |
1652
|
|
|
|
|
|
|
} |
1653
|
|
|
|
|
|
|
|
1654
|
|
|
|
|
|
|
sub event_parent { |
1655
|
677
|
|
|
677
|
1
|
769
|
my $old = (my $self = shift) ->{_HTML_DOM_event_parent}; |
1656
|
677
|
100
|
|
|
|
1106
|
weaken($self->{_HTML_DOM_event_parent} = shift) if @_; |
1657
|
677
|
|
|
|
|
1968
|
$old |
1658
|
|
|
|
|
|
|
} |
1659
|
|
|
|
|
|
|
|
1660
|
|
|
|
|
|
|
sub event_listeners_enabled { |
1661
|
1891
|
|
|
1891
|
1
|
2692
|
my $old = (my $Self = shift)->{_HTML_DOM_doevents}; |
1662
|
1891
|
100
|
|
|
|
3231
|
@_ and $$Self{_HTML_DOM_doevents} = !!shift; |
1663
|
1891
|
100
|
|
|
|
4985
|
defined $old ? $old : 1; # true by default |
1664
|
|
|
|
|
|
|
} |
1665
|
|
|
|
|
|
|
|
1666
|
|
|
|
|
|
|
|
1667
|
|
|
|
|
|
|
# ---------- NODE AND SHEET LIST HELPER METHODS -------------- # |
1668
|
|
|
|
|
|
|
|
1669
|
|
|
|
|
|
|
sub _modified { # tells all it's magic nodelists that they're stale |
1670
|
|
|
|
|
|
|
# and also rewrites the style sheet list if present |
1671
|
376
|
|
|
376
|
|
576
|
my $list = $_[0]{_HTML_DOM_node_lists}; |
1672
|
376
|
|
|
|
|
380
|
my $list_is_stale; |
1673
|
376
|
|
|
|
|
541
|
for (@$list) { |
1674
|
276
|
100
|
|
|
|
591
|
defined() ? $_->_you_are_stale : ++$list_is_stale |
1675
|
|
|
|
|
|
|
} |
1676
|
376
|
100
|
|
|
|
533
|
if($list_is_stale) { |
1677
|
48
|
|
|
|
|
134
|
@$list = grep defined, @$list; |
1678
|
48
|
|
|
|
|
115
|
weaken $_ for @$list; |
1679
|
|
|
|
|
|
|
} |
1680
|
|
|
|
|
|
|
|
1681
|
376
|
|
|
|
|
585
|
$_[0]->_populate_sheet_list |
1682
|
|
|
|
|
|
|
} |
1683
|
|
|
|
|
|
|
|
1684
|
|
|
|
|
|
|
sub _populate_sheet_list { # called both by styleSheets and _modified |
1685
|
413
|
|
100
|
413
|
|
414
|
for($_[0]->{_HTML_DOM_sheets}||return) { |
1686
|
12
|
|
|
|
|
59
|
@$_ = map sheet $_, |
1687
|
|
|
|
|
|
|
$_[0]->look_down(_tag => qr/^(?:link|style)\z/); |
1688
|
|
|
|
|
|
|
} |
1689
|
|
|
|
|
|
|
} |
1690
|
|
|
|
|
|
|
|
1691
|
|
|
|
|
|
|
sub _register_magic_node_list { # adds the node list to the list of magic |
1692
|
|
|
|
|
|
|
# node lists that get notified automatic- |
1693
|
|
|
|
|
|
|
# ally whenever the doc structure changes |
1694
|
204
|
|
|
204
|
|
191
|
push @{$_[0]{_HTML_DOM_node_lists}}, $_[1]; |
|
204
|
|
|
|
|
313
|
|
1695
|
204
|
|
|
|
|
294
|
weaken $_[0]{_HTML_DOM_node_lists}[-1]; |
1696
|
|
|
|
|
|
|
} |
1697
|
|
|
|
|
|
|
|
1698
|
|
|
|
|
|
|
|
1699
|
|
|
|
|
|
|
|
1700
|
|
|
|
|
|
|
1; |
1701
|
|
|
|
|
|
|
__END__ |