File Coverage

blib/lib/HTML/DOM.pm
Criterion Covered Total %
statement 417 441 94.5
branch 168 198 84.8
condition 86 109 78.9
subroutine 106 106 100.0
pod 55 59 93.2
total 832 913 91.1


line stmt bran cond sub pod time code
1             package HTML::DOM;
2              
3             # If you are looking at the source code (which you are obviously doing
4             # if you are reading this), note that '# ~~~' is my way of marking
5             # something to be done still (except in this sentence).
6              
7              
8 24     24   669720 use 5.008003;
  24         195  
9              
10 24     24   109 use strict;
  24         32  
  24         478  
11 24     24   92 use warnings;
  24         41  
  24         590  
12              
13 24     24   105 use Carp 'croak';
  24         45  
  24         959  
14 24     24   10440 use HTML::DOM::Element;
  24         77  
  24         1129  
15 24     24   136 use HTML::DOM::Exception 'NOT_SUPPORTED_ERR';
  24         37  
  24         790  
16 24     24   102 use HTML::DOM::Node 'DOCUMENT_NODE';
  24         38  
  24         711  
17 24     24   100 use Scalar::Util 'weaken';
  24         32  
  24         687  
18 24     24   98 use URI;
  24         37  
  24         3049  
19              
20             our $VERSION = '0.058';
21             our @ISA = 'HTML::DOM::Node';
22              
23             require HTML::DOM::Collection;
24             require HTML::DOM::Comment;
25             require HTML::DOM::DocumentFragment;
26             require HTML::DOM::Implementation;
27             require HTML::DOM::NodeList::Magic;
28             require HTML::DOM::Text;
29             require HTML::Tagset;
30             require HTML::DOM::_TreeBuilder;
31              
32             use overload fallback => 1,
33             '%{}' => sub {
34 20313     20313   31977 my $self = shift;
35             #return $self; # for debugging
36 20313 100 100     81560 $self->isa(scalar caller) || caller->isa('HTML::DOM::_TreeBuilder')
37             and return $self;
38 3         13 $self->forms;
39 24     24   130 };
  24         39  
  24         187  
40              
41              
42             =head1 NAME
43              
44             HTML::DOM - A Perl implementation of the HTML Document Object Model
45              
46             =head1 VERSION
47              
48             Version 0.058 (alpha)
49              
50             B This module is still at an experimental stage. The API is
51             subject to change without
52             notice.
53              
54             =head1 SYNOPSIS
55              
56             use HTML::DOM;
57            
58             my $dom_tree = new HTML::DOM; # empty tree
59             $dom_tree->write($source_code);
60             $dom_tree->close;
61            
62             my $other_dom_tree = new HTML::DOM;
63             $other_dom_tree->parse_file($filename);
64            
65             $dom_tree->getElementsByTagName('body')->[0]->appendChild(
66             $dom_tree->createElement('input')
67             );
68            
69             print $dom_tree->innerHTML, "\n";
70              
71             my $text = $dom_tree->createTextNode('text');
72             $text->data; # get attribute
73             $text->data('new value'); # set attribute
74            
75             =head1 DESCRIPTION
76              
77             This module implements the HTML Document Object Model by extending the
78             HTML::Tree modules. The HTML::DOM class serves both as an HTML parser and
79             as the document class.
80              
81             The following DOM modules are currently supported:
82              
83             Feature Version (aka level)
84             ------- -------------------
85             HTML 2.0
86             Core 2.0
87             Events 2.0
88             UIEvents 2.0
89             MouseEvents 2.0
90             MutationEvents 2.0
91             HTMLEvents 2.0
92             StyleSheets 2.0
93             CSS 2.0 (partially)
94             CSS2 2.0
95             Views 2.0
96              
97             StyleSheets, CSS and CSS2 are actually provided by L. This list
98             corresponds to CSS::DOM versions 0.02 to 0.14.
99              
100             =for comment
101             Level 2 interfaces not yet included: Range, Traversal
102              
103             =head1 METHODS
104              
105             =head2 Construction and Parsing
106              
107             =over 4
108              
109             =item $tree = new HTML::DOM %options;
110              
111             This class method constructs and returns a new HTML::DOM object. The
112             C<%options>, which are all optional, are as follows:
113              
114             =over 4
115              
116             =item url
117              
118             The value that the C method will return. This value is also used by
119             the C method.
120              
121             =item referrer
122              
123             The value that the C method will return
124              
125             =item response
126              
127             An HTTP::Response object. This will be used for information needed for
128             writing cookies. It is expected to have a reference to a request object
129             (accessible via its C method--see L). Passing a
130             parameter to the 'cookie' method will be a no-op
131             without this.
132              
133             =item weaken_response
134              
135             If this is passed a true value, then the HTML::DOM object will hold a weak
136             reference to the response.
137              
138             =item cookie_jar
139              
140             An HTTP::Cookies object. As with C, if you omit this, arguments
141             passed to the
142             C method will be ignored.
143              
144             =item charset
145              
146             The original character set of the document. This does not affect parsing
147             via the C method (which always assumes Unicode). C will
148             use this, if specified, or L otherwise.
149             L's C method uses this to encode form data
150             unless the form has a valid 'accept-charset' attribute.
151              
152             =back
153              
154             If C and C are omitted, they can be inferred from
155             C.
156              
157             =cut
158              
159             {
160             # This HTML::DOM::Element::HTML package represents the
161             # documentElement. It inherits from
162             # HTML::DOM::_TreeBuilder and acts
163             # as the parser. It is also used as a parser for innerHTML.
164              
165             # Note for potential developers: You can’t refer to ->parent in
166             # this package and expect it to provide the document, because
167             # that’s not the case with innerHTML. Use ->ownerDocument.
168             # Use ->parent only to distinguish between innerHTML and
169             # the regular parser.
170              
171             # Concerning magic associations between forms and fields: To cope
172             # with bad markup, an implicitly closed form (with no end tag) is
173             # associated with any form fields that occur after that are not
174             # inside any form. So when a start tag for a form is encountered,
175             # we make that the ‘current form’, by pushing it on to
176             # @{ $$self{_HTML_DOM_cf} }. When the element is closed, if it
177             # is closed by an end tag, we simply pop it off the cf array. If
178             # it is implicitly closed we pop it off and also make it the
179             # ‘magic form’ (_HTML_DOM_mg_f). When we encounter a form field,
180             # we give it a magic association with the form if the cf
181             # stack is empty.
182              
183              
184             package HTML::DOM::Element::HTML;
185             our @ISA = qw' HTML::DOM::Element HTML::DOM::_TreeBuilder';
186              
187 24     24   2304 use Scalar::Util qw 'weaken isweak';
  24         40  
  24         36779  
188              
189             # I have to override this so it doesn't delete _HTML_DOM_* attri-
190             # butes and so that it doesn’t rebless the object.
191             sub elementify {
192 93     93   127 my $self = shift;
193 93 100       2413 my %attrs = map /^[a-z_]*\z/ ? () : ($_ => $self->{$_}),
194             keys %$self;
195 93         1539 my @weak = grep isweak $self->{$_}, keys %$self;
196 93         432 $self->SUPER::elementify;
197 93         629 %$self = (%$self, %attrs); # this invigorates feeble refs
198 93         517 weaken $self->{$_} for @weak;
199             }
200              
201             sub new {
202 147     147   229 my $tb; # hafta declare it separately so the closures can
203             # c it
204             ($tb = shift->HTML::DOM::_TreeBuilder::new(
205             element_class => 'HTML::DOM::Element',
206             'tweak_~text' => sub {
207 548     548   950 my ($text, $parent) = @_;
208             # $parent->ownerDocument will be undef if
209             # $parent is the doc.
210 548   33     1282 $parent->splice_content( -1,1,
211             ($parent->ownerDocument || $parent)
212             ->createTextNode($text) );
213             $parent->content_offset(
214             $$tb{_HTML_DOM_tb_c_offset}
215 548         1572 );
216             },
217             'tweak_*' => sub {
218 808     808   1315 my($elem, $tag, $doc_elem) = @_;
219 808 100       1430 $tag =~ /^~/ and return;
220              
221 801 100       1309 if(
222             $tag eq 'link'
223             ) {
224 16         39 HTML'DOM'Element'Link'_reset_style_sheet(
225             $elem
226             );
227             }
228              
229             # If a form is being closed, determine
230             # whether it is closed implicitly and set
231             # the current form and magic form
232             # accordingly.
233 801 100       1256 if($tag eq 'form') {
234             pop
235 40 50       48 @{$$doc_elem{_HTML_DOM_cf}||[]};
  40         100  
236             delete $$doc_elem{_HTML_DOM_etif}
237             or $$doc_elem{_HTML_DOM_mg_f}
238 40 100       102 = $elem
239             }
240              
241             # If a formie is being closed, create a
242             # magic association where appropriate.
243 801 100 100     2668 if(!$$doc_elem{_HTML_DOM_no_mg}
      100        
      66        
      100        
244             and $tag =~ /^(?:
245             button|(?:
246             fieldse|inpu|(?:obj|sel)ec
247             )t|label|textarea
248             )\z/x
249             and $$doc_elem{_HTML_DOM_mg_f}
250             and !$$doc_elem{_HTML_DOM_cf}
251             ||!@{$$doc_elem{_HTML_DOM_cf}}) {
252             $elem->form(
253             $$doc_elem{_HTML_DOM_mg_f}
254 9         27 );
255 9         18 $doc_elem->ownerDocument->
256             magic_forms(1);
257             }
258              
259             my $event_offsets = delete
260             $elem->{_HTML_DOM_tb_event_offsets}
261 801 100       1893 or return;
262 4         12 _create_events(
263             $doc_elem, $elem, $event_offsets
264             );
265             },
266 147         1187 ))
267             ->ignore_ignorable_whitespace(0); # stop eof()'s cleanup
268 147         576 $tb->store_comments(1); # from changing an
269 147         542 $tb->unbroken_text(1); # necessary, con- # elem_han-
270             # sidering what # dler's view
271             # _tweak_~text does # of the tree
272              
273             # Web browsers preserve whitespace, at least from the point
274             # of view of the DOM; but the main reason we are using this
275             # option is that a parser for innerHTML doesn’t know
276             # whether the nodes will be inserted in a
. 
277 147         431 no_space_compacting $tb 1;
278              
279 147         793 $tb->handler(text => "text", # so we can get line
280             "self, text, is_cdata, offset"); # numbers for scripts
281 147         676 $tb->handler(start => "start",
282             "self, tagname, attr, attrseq, offset, tokenpos");
283 147         578 $tb->handler((declaration=>)x2,'self,tagname,tokens,text');
284              
285 147         605 $tb->{_HTML_DOM_tweakall} = $tb->{'_tweak_*'};
286              
287 147         280 my %opts = @_;
288 147         248 $tb->{_HTML_DOM_no_mg} = delete $opts{no_magic_forms};
289             # used by an element’s innerHTML
290              
291             # We have to copy it like this, because our circular ref-
292             # erence is thus: $tb -> object -> closure -> $tb
293             # We can’t weaken $tb without a copy of it, because it is
294             # the only reference to the object.
295 147         189 my $life_raft = $tb; weaken $tb; $tb;
  147         307  
  147         468  
296             }
297              
298             sub start {
299 595 100   595   1512 return shift->SUPER::start(@_) if @_ < 6; # shirt-çorcuit
300            
301 590         716 my $tokenpos = pop;
302 590         668 my $offset = pop;
303 590         695 my %event_offsets;
304 590         635 my $attr_names = pop;
305 590         1244 for(0..$#$attr_names) {
306             $$attr_names[$_] =~ /^on(.*)/is
307 536 100       1484 and $event_offsets{$1} =
308             $$tokenpos[$_*4 + 4] + $offset;
309             }
310              
311 590         1840 my $elem = (my $self = shift)->SUPER::start(@_);
312            
313 590 100 100     1150 $_[0] eq 'form' and push @{ $$self{_HTML_DOM_cf} ||= [] },
  40         163  
314             $elem;
315              
316 590 100       4402 return $elem unless %event_offsets;
317              
318 5 100       16 if(!$HTML::Tagset::emptyElement{$_[0]}) { # container
319             $$elem{_HTML_DOM_tb_event_offsets} =
320 4         8 \%event_offsets;
321             } else {
322 1         3 _create_events(
323             $self,
324             $elem,
325             \%event_offsets,
326             );
327             }
328              
329 5         35 return $elem;
330             }
331              
332             sub _create_events {
333 5     5   11 my ($doc_elem,$elem,$event_offsets) = @_;
334 5 100       21 defined(my $event_attr_handler =
335             $doc_elem->ownerDocument->event_attr_handler)
336             or return;
337 3         8 for(keys %$event_offsets) {
338             my $l =
339             &$event_attr_handler(
340             $elem,
341             $_,
342             $elem->attr("on$_"),
343 3         14 $$event_offsets{$_}
344             );
345 3 50       612 defined $l and
346             $elem->event_handler (
347             $_, $l
348             );
349             }
350             }
351              
352             sub text {
353 548     548   1068 $_[0]{_HTML_DOM_tb_c_offset} = pop;
354 548         1286 shift->SUPER::text(@_)
355             }
356              
357             sub insert_element {
358 861     861   1547 my ($self, $tag) = (shift, @_);
359 861 100 100     2768 if((ref $tag ? $tag->tag : $tag) eq 'tr'
    100          
360             and $self->pos->tag eq 'table') {
361 12         38 $self->insert_element('tbody', 1);
362             }
363 861         1995 $self->SUPER::insert_element(@_);
364             }
365              
366             sub end {
367 602     602   890 my $self = shift;
368              
369             # If this is a form, record that we’ve seen an end tag, so
370             # that this does not become a ‘magic form’.
371             ++$$self{_HTML_DOM_etif} # end tag is 'form'
372 602 100       1139 if $_[0] eq 'form';
373              
374             # Make sure cannot close a cell outside the cur-
375             # rent table.
376 602 100       1254 $_[0] =~ /^t[hd]\z/ and @_ = (\$_[0], 'table');
377              
378             # HTML::TreeBuilder expects the element to be the
379             # topmost element, and gets confused when it’s inside the
380             # ~doc. It sets _pos to the doc when it encounters .
381             # This works around that.
382 602         780 my $pos = $self->{_pos};
383 602         1413 my @ret = $self->SUPER::end(@_);
384             $self->{_pos} = $pos
385 601 100 100     1681 if ($self->{_pos}||return @ret)->{_tag} eq '~doc';
386 562         1908 @ret; # TB relies on this retval
387             }
388              
389             sub declaration {
390 9     9   30 my($self,$tagname,$tokens,$source) = @_;
391             return
392 9 100 66     64 unless $tagname eq 'doctype'
393             and my $parent = $self->parent;
394             package HTML::DOM; # bypass overloading
395             $parent->{_HTML_DOM_doctype} = $source
396 8 50       20 unless defined $parent->{_HTML_DOM_doctype};
397 8 100       34 return unless @$tokens > 3;
398 7         34 for ($self->{_HTML_DOM_version} = $tokens->[3]){
399 7 50       82 s/^['"]// and s/['"]\z//;
400             }
401             }
402              
403 1148     1148   3027 sub element_class { 'HTML::DOM::Element' }
404              
405             # HTMLHtmlElement interface
406 5     5   443 sub version { shift->_attr('version' => @_) }
407              
408             } # end of special TreeBuilder package
409              
410             sub new {
411 92     92 1 104137 my $self = shift->SUPER::new('~doc');
412              
413 92         238 my %opts = @_;
414 92         1623 $self->{_HTML_DOM_url} = $opts{url}; # might be undef
415 92         211 $self->{_HTML_DOM_referrer} = $opts{referrer}; # might be undef
416 92 100       290 if($opts{response}) {
417 8         17 $self->{_HTML_DOM_response} = $opts{response};
418 8 100       19 if(!defined $self->{_HTML_DOM_url}) {{
419 6         13 $self->{_HTML_DOM_url} =
420             ($opts{response}->request || last)
421 6   100     22 ->url;
422             }}
423 8 100       52 if(!defined $self->{_HTML_DOM_referrer}) {{
424 6         11 $self->{_HTML_DOM_referrer} =
425             ($opts{response}->request || last)
426 6   100     17 ->header('Referer')
427             }}
428 8 100       60 if($opts{weaken_response}) {
429             weaken $self->{_HTML_DOM_response}
430 1         3 }
431             }
432 92         165 $self->{_HTML_DOM_jar} = $opts{cookie_jar}; # might be undef
433 92         182 $self->{_HTML_DOM_cs} = $opts{charset};
434              
435 92         558 $self;
436             }
437              
438             =item $tree->elem_handler($elem_name => sub { ... })
439              
440             If you call this method first, then, when the DOM tree is in the
441             process of
442             being built (as a result of a call to C or C), the
443             subroutine will be called after each C<$elem_name> element
444             is
445             added to the tree. If you give '*' as the element name, the subroutine
446             will be called for each element that does not have a handler. The
447             subroutine's
448             two arguments will be the tree itself
449             and the element in question. The subroutine can call the DOM object's
450             C
451             method to insert HTML code into the source after the element.
452              
453             Here is a lame example (which does not take Content-Script-Type headers
454             or security into account):
455              
456             $tree->elem_handler(script => sub {
457             my($document,$elem) = @_;
458             return unless $elem->attr('type') eq 'application/x-perl';
459             eval($elem->firstChild->data);
460             });
461              
462             $tree->write(
463             '

The time is

464            
467             precisely.
468            

'
469             );
470             $tree->close;
471              
472             print $tree->documentElement->as_text, "\n";
473              
474             (Note: L's
475             L|HTML::DOM::Element/content_offset> method might come in
476             handy for reporting line numbers for script errors.)
477              
478             =cut
479              
480             sub elem_handler {
481 133     133 1 1625 my ($self,$elem_name,$sub) = @_;
482              
483             # ~~~ temporary; for internal use only:
484 133 100       368 @_ < 3 and return $$self{_HTML_DOM_nih}{$elem_name};
485              
486 9         19 $$self{_HTML_DOM_nih}{$elem_name} = $sub; # nih = node inser-
487             # tion handler
488             my $h = $self->{_HTML_DOM_elem_handlers}{$elem_name} = sub {
489             # I can’t put $doc_elem outside the closure, because
490             # ->open replaces it with another object, and we’d be
491             # referring to the wrong one.
492 16     16   24 my $doc_elem = $_[2];
493 16         45 $doc_elem->{_HTML_DOM_tweakall}->(@_);
494 16         68 $self->_modified; # in case there are node lists hanging
495             # around that the handler references
496 16         57 &$sub($self, $_[0]);
497              
498             # See the comment in sub write.
499 15         602 (my $level = $$self{_HTML_DOM_buffered});
500 15 50 66     88 if( $level
      100        
      66        
501             and ($level -= 1, 1)
502             and $$self{_HTML_DOM_p}
503             and $$self{_HTML_DOM_p}[$level]
504             ) {
505 7         25 $$self{_HTML_DOM_p}[$level]->eof;
506             $level
507 1         3 ? --$#{$$self{_HTML_DOM_p}}
508 7 100       22 : delete $$self{_HTML_DOM_p};
509             }
510 9         42 };
511 9 100       21 if(my $p = $$self{_HTML_DOM_parser}) {
512 1         8 $$p{"_tweak_$elem_name"} = $h
513             }
514 9         33 weaken $self;
515 9         20 return;
516             }
517              
518              
519             =item css_url_fetcher( \&sub )
520              
521             With this method you can provide a subroutine that fetches URLs referenced
522             by 'link' tags. Its sole argument is the URL, which is made absolute based
523             on the HTML page's own base URL (it is assumed that this is absolute). It
524             should return C or an empty list on failure. Upon
525             success, it should return just the CSS code, if it has been decoded (and is
526             in Unicode), or, if it has not been decoded, the CSS code followed by
527             C<< decode => 1 >>. See L for details on
528             when you should or should not decode it. (Note that HTML::DOM
529             automatically
530             provides an encoding hint based on the HTML document.)
531              
532             HTML::DOM passes the result of the url fetcher to L and
533             turns
534             it into a style sheet object accessible via the link element's
535             L|HTML::DOM::Element::Link/sheet> method.
536              
537             =cut
538              
539             sub css_url_fetcher {
540 11     11 1 900 my $old = (my $self = shift)->{_HTML_DOM_cuf};
541 11 100       26 $self->{_HTML_DOM_cuf} = shift if @_;
542 11 100       35 $old||();
543             }
544              
545             =item $tree->write(...) (DOM method)
546              
547             This parses the HTML code passed to it, adding it to the end of
548             the
549             document. It assumes that its input is a normal Perl Unicode string. Like
550             L's
551             C method, it can take a coderef.
552              
553             When it is called from an an element handler (see
554             C, above), the value passed to it
555             will be inserted into the HTML code after the current element when the
556             element handler returns. (In this case a coderef won't do--maybe that will
557             be added later.)
558              
559             If the C method has been called, C will call C before
560             parsing the HTML code passed to it.
561              
562             =item $tree->writeln(...) (DOM method)
563              
564             Just like C except that it appends "\n" to its argument and does
565             not work with code refs. (Rather
566             pointless, if you ask me. :-)
567              
568             =item $tree->close() (DOM method)
569              
570             Call this method to signal to the parser that the end of the HTML code has
571             been reached. It will then parse any residual HTML that happens to be
572             buffered. It also makes the next C call C.
573              
574             =item $tree->open (DOM method)
575              
576             Deletes the HTML tree, resetting it so that it has just an element,
577             and a parser hungry for HTML code.
578              
579             =item $tree->parse_file($file)
580              
581             This method takes a file name or handle and parses the content,
582             (effectively) calling C afterwards. In the former case (a file
583             name), L will be used to detect the encoding. In the
584             latter (a file handle), you'll have to C it yourself. This could
585             be considered a bug. If you have a solution to this (how to make
586             HTML::Encoding detect an encoding from a file handle), please let me know.
587              
588             As of version 0.12, this method returns true upon success, or undef/empty
589             list on failure.
590              
591             =item $tree->charset
592              
593             This method returns the name of the character
594             set that was passed to C, or, if that was not given, that which
595             C used.
596              
597             It returns undef if C was not given a charset and if C was
598             not
599             used or was
600             passed a file handle.
601              
602             You can also set the charset by passing an argument, in which case the old
603             value is returned.
604              
605              
606             =cut
607              
608             sub parse_file {
609 5     5 1 17 my $file = $_[1];
610              
611 5         20 $_[0]->open;
612              
613             # This ‘if’ statement uses the same check that HTML::Parser uses.
614             # We are not strictly checking to see whether it’s a handle,
615             # but whether HTML::Parser would consider it one.
616 5 50 33     38 if (ref($file) || ref(\$file) eq "GLOB") {
617             (my $a = shift->{_HTML_DOM_parser})
618 0 0       0 ->parse_file($file) || return;
619 0         0 $a ->elementify;
620 0         0 return 1;
621             }
622              
623 24     24   159 no warnings 'parenthesis'; # 5.8.3 Grrr!!
  24         45  
  24         14850  
624 5 100       19 if(my $charset = $_[0]{_HTML_DOM_cs}) {
625 3 50       114 open my $fh, $file or return;
626 3         22 $charset =~ s/^(?:x-?)?mac-?/mac/i;
627 3     1   59 binmode $fh, ":encoding($charset)";
  1         6  
  1         1  
  1         9  
628             $$_{_HTML_DOM_parser}->parse_file($fh) || return,
629             $_->close
630 3   50     4046 for shift;
631 3         47 return 1;
632             }
633              
634 2 100       96 open my $fh, $file or return;
635 1         6 local $/;
636 1         27 my $contents = <$fh>;
637 1         529 require HTML::Encoding;
638 1   50     12412 my $encoding = HTML::Encoding::encoding_from_html_document(
639             $contents
640             ) || 'iso-8859-1';
641             # Since we’ve already slurped the file, we might as well
642             # avoid having HTML::Parser read it again, even if we could
643             # use binmode.
644 1         5855 require Encode;
645             $_->write(Encode::decode($encoding, $contents)), $_->close,
646             $_->{_HTML_DOM_cs} = $encoding
647 1         5 for shift;
648 1         24 return 1;
649             }
650              
651             sub charset {
652 39     39 1 2288 my $old = (my$ self = shift)->{_HTML_DOM_cs};
653 39 100       102 $self->{_HTML_DOM_cs} = shift if @_;
654 39         123 $old;
655             }
656              
657             sub write {
658 110     110 1 19637 my $self = shift;
659 110 100       206 if($$self{_HTML_DOM_buffered}) {
660             # Although we call this buffered, it’s actually not. Before
661             # version 0.040, a recursive call to ->write on the same
662             # doc object would simply record the HTML code in a buffer
663             # that was processed when the elem handler that made the
664             # inner call to ->write finished. Every elem handler would
665             # have a wrapper (created in the elem_handler sub above)
666             # that took care of this after calling the handler, by cre-
667             # ating a new, temporary, parser object that would call the
668             # start/end, etc., methods of our tree builder.
669             #
670             # This approach stops JS code like this from working (yes,
671             # there *are* websites with code like this!):
672             # document.write("")
673             # document.getElementById("img1").src="..."
674             #
675             # So, now we take care of creating a new parser immedi-
676             # ately. This does mean, however that we end up with mul-
677             # tiple parser objects floating around in the case of
678             # nested . So we have to be careful to create and
679             # delete them at the right time.
680              
681             # $$self{_HTML_DOM_buffered} actually contains a number
682             # indicating the number of nested calls to ->write.
683 7         14 my $level = $$self{_HTML_DOM_buffered};
684 7         13 local $$self{_HTML_DOM_buffered} = $level + 1;
685              
686 7         17 my($doc_elem) = $$self{_HTML_DOM_parser};
687              
688             # These handlers delegate the handling to methods of
689             # *another* HTML::Parser object.
690             my $p = $$self{_HTML_DOM_p}[$level-1] ||=
691             HTML::Parser->new(
692             start_h => [
693 5     5   24 sub { $doc_elem->start(@_) },
694             'tagname, attr, attrseq'
695             ],
696             end_h => [
697 1     1   5 sub { $doc_elem->end(@_) },
698             'tagname, text'
699             ],
700             text_h => [
701 7     7   26 sub { $doc_elem->text(@_) },
702 7   33     13 'text, is_cdata'
703             ],
704             );
705              
706 7         383 $p->unbroken_text(1); # push_content, which is called by
707             # H:TB:text, won't concatenate two
708             # text portions if the first one
709             # is a node.
710              
711 7         37 $p->parse(shift);
712              
713             # We can’t get rid of our parser at this point, as a subse-
714             # quent ->write call from the same nested level (e.g., from
715             # the same ), then we need to remove it, so we have
719             # elem_handler do that for us.
720             }
721             else {
722             my $parser
723             = $$self{_HTML_DOM_parser}
724 103   66     171 || ($self->open, $$self{_HTML_DOM_parser});
725 103         209 local $$self{_HTML_DOM_buffered} = 1;
726 103         1470 $parser->parse($_) for @_;
727             }
728 110         376 $self->_modified;
729             return # nothing;
730 110         293 }
731              
732 4     4 1 12 sub writeln { shift->write(@_,"\n") }
733              
734             sub close {
735 97     97 1 1055 my $a = (my $self = shift)->{_HTML_DOM_parser};
736 97 100       233 return unless $a;
737              
738             # We can’t use eval { $a->eof } because that would catch errors
739             # that are meant to propagate (a nasty bug [the so-called
740             # ‘content—offset’ bug] was hidden because of an eval in ver-
741             # sion 0.010).
742             # return unless $a->can('eof');
743            
744 94         416 $a->eof(@_);
745 93         155 delete $$self{_HTML_DOM_parser};
746 93         280 $a->elementify;
747             return # nothing;
748 93         208 }
749              
750             sub open {
751 116     116 1 3901 (my $self = shift)->detach_content;
752              
753             # We have to use push_content instead of simply putting it there
754             # ourselves, because push_content takes care of weakening the
755             # parent (and that code doesn’t belong in this package).
756             $self->push_content(
757 116         1399 my $tb = $$self{_HTML_DOM_parser} = new HTML::DOM::Element::HTML
758             );
759              
760 116         2999 delete @$self{<_HTML_DOM_sheets _HTML_DOM_doctype>};
761              
762 116 100       339 return unless $self->{_HTML_DOM_elem_handlers};
763 14         27 for(keys %{$self->{_HTML_DOM_elem_handlers}}) {
  14         22  
764             $$tb{"_tweak_$_"} =
765 14         31 $self->{_HTML_DOM_elem_handlers}{$_}
766             }
767              
768             return # nothing;
769 14         35 }
770              
771             =back
772              
773             =head2 Other DOM Methods
774              
775             =over 4
776              
777             =cut
778              
779              
780             #-------------- DOM STUFF (CORE) ---------------- #
781              
782             =item doctype
783              
784             Returns nothing
785              
786             =item implementation
787              
788             Returns the L object.
789              
790             =item documentElement
791              
792             Returns the element.
793              
794             =item createElement ( $tag )
795              
796             =item createDocumentFragment
797              
798             =item createTextNode ( $text )
799              
800             =item createComment ( $text )
801              
802             =item createAttribute ( $name )
803              
804             Each of these creates a node of the appropriate type.
805              
806             =item createProcessingInstruction
807              
808             =item createEntityReference
809              
810             These two throw an exception.
811              
812             =for comment
813             =item createCSSStyleSheet
814             This creates a style sheet (L object).
815              
816             =item getElementsByTagName ( $name )
817              
818             C<$name> can be the name of the tag, or '*', to match all tag names. This
819             returns a node list object in scalar context, or a list in list context.
820              
821             =item importNode ( $node, $deep )
822              
823             Clones the C<$node>, setting its C attribute to the document
824             with which this method is called. If C<$deep> is true, the C<$node> will
825             be
826             cloned recursively.
827              
828             =cut
829              
830       2 1   sub doctype {} # always null
831              
832             sub implementation {
833 24     24   157 no warnings 'once';
  24         44  
  24         18065  
834 2     2 1 5 return $HTML::DOM::Implementation::it;
835             }
836              
837             sub documentElement {
838 103     103 1 771 ($_[0]->content_list)[0]
839             }
840              
841             sub createElement {
842 354     354 1 95706 my $elem = HTML::DOM::Element->new($_[1]);
843 354         1499 $elem->_set_ownerDocument(shift);
844 354         1049 $elem;
845             }
846              
847             sub createDocumentFragment {
848 9     9 1 2172 my $thing = HTML::DOM::DocumentFragment->new;
849 9         34 $thing->_set_ownerDocument(shift);
850 9         33 $thing;
851             }
852              
853             sub createTextNode {
854 607     607 1 3083 my $thing = HTML::DOM::Text->new(@_[1..$#_]);
855 607         1557 $thing->_set_ownerDocument(shift);
856 607         1830 $thing;
857             }
858              
859             sub createComment {
860 7     7 1 508 my $thing = HTML::DOM::Comment->new(@_[1..$#_]);
861 7         36 $thing->_set_ownerDocument(shift);
862 7         29 $thing;
863             }
864              
865             sub createCDATASection {
866 1     1 0 438 die HTML::DOM::Exception->new( NOT_SUPPORTED_ERR,
867             'The HTML DOM does not support CDATA sections' );
868             }
869              
870             sub createProcessingInstruction {
871 1     1 1 236 die HTML::DOM::Exception->new( NOT_SUPPORTED_ERR,
872             'The HTML DOM does not support processing instructions' );
873             }
874              
875             sub createAttribute {
876 22     22 1 2610 my $thing = HTML::DOM::Attr->new(@_[1..$#_]);
877 22         65 $thing->_set_ownerDocument(shift);
878 22         62 $thing;
879             }
880              
881             sub createEntityReference {
882 1     1 1 464 die HTML::DOM::Exception->new( NOT_SUPPORTED_ERR,
883             'The HTML DOM does not support entity references' );
884             }
885              
886             #sub createCSSStyleSheet {
887             # shift;
888             # require CSS'DOM;
889             # ~~~
890             #}
891              
892             sub getElementsByTagName {
893 18     18 1 1202 my($self,$tagname) = @_;
894             #warn "You didn't give me a tag name." if !defined $tagname;
895 18 100       75 if (wantarray) {
896 4 100       27 return $tagname eq '*'
897             ? grep tag $_ !~ /^~/, $self->descendants
898             : $self->find($tagname);
899             }
900             else {
901             my $list = HTML::DOM::NodeList::Magic->new(
902             $tagname eq '*'
903 2     2   8 ? sub { grep tag $_ !~ /^~/, $self->descendants }
904 18     18   89 : sub { $self->find($tagname) }
905 14 100       160 );
906 14         58 $self-> _register_magic_node_list($list);
907 14         534 $list;
908             }
909             }
910              
911             sub importNode {
912 12     12 1 264 my ($self, $node, $deep) = @_;
913 12 100       48 die HTML::DOM::Exception->new( NOT_SUPPORTED_ERR,
914             'Documents cannot be imported.' )
915             if $node->nodeType ==DOCUMENT_NODE;
916 10         43 (my $clown = $node->cloneNode($deep))
917             ->_set_ownerDocument($self);
918 10 100       57 if($clown->can('descendants')) { # otherwise it’s an Attr, so this
919 8         20 for($clown->descendants) { # isn’t necessary
920 4         9 delete $_->{_HTML_DOM_owner};
921             }}
922 10         53 $clown;
923             }
924              
925             #-------------- DOM STUFF (HTML) ---------------- #
926              
927             =item alinkColor
928              
929             =item background
930              
931             =item bgColor
932              
933             =item fgColor
934              
935             =item linkColor
936              
937             =item vlinkColor
938              
939             These six methods return (optionally set) the corresponding attributes of
940             the body element. Note that most of the names do not map directly to the
941             names of
942             the attributes. C refers to the C attribute. Those that
943             end
944             with 'linkColor' refer to the attributes of the same name but without the
945             'Color' on the end.
946              
947             =cut
948              
949 6   100 6 1 13 sub alinkColor { (shift->body||return "")->aLink (@_) }
950 6   100 6 1 1145 sub background { (shift->body||return "")->background(@_) }
951 6   100 6 1 1127 sub bgColor { (shift->body||return "")->bgColor (@_) }
952 6   100 6 1 1072 sub fgColor { (shift->body||return "")->text (@_) }
953 6   100 6 1 1132 sub linkColor { (shift->body||return "")->link (@_) }
954 6   100 6 1 1141 sub vlinkColor { (shift->body||return "")->vLink (@_) }
955              
956             =item title
957              
958             Returns (or optionally sets) the title of the page.
959              
960             =item referrer
961              
962             Returns the page's referrer.
963              
964             =item domain
965              
966             Returns the domain name portion of the document's URL.
967              
968             =item URL
969              
970             Returns the document's URL.
971              
972             =item body
973              
974             Returns the body element, or the outermost frame set if the document has
975             frames. You can set the body by passing an element as an argument, in
976             which
977             case the old body element is returned.
978              
979             =item images
980              
981             =item applets
982              
983             =item links
984              
985             =item forms
986              
987             =item anchors
988              
989             These five methods each return a list of the appropriate elements in list
990             context, or an L object in scalar context. In this
991             latter case, the object will update automatically when the document is
992             modified.
993              
994             In the case of C you can access those by using the HTML::DOM object
995             itself as a hash. I.e., you can write C<< $doc->{f} >> instead of
996             S<< C<< $doc->forms->{f} >> >>.
997              
998             =for comment
999             # ~~~ Why on earth did I ever put this in the docs?!
1000             B I need to make these methods cache the HTML collection objects
1001             that they create. Once I've done this, I can make list context use those
1002             objects, as well as scalar context.
1003              
1004             =item cookie
1005              
1006             This returns a string containing the document's cookies (the format may
1007             still change). If you pass an
1008             argument, it
1009             will set a cookie as well. Both Netscape-style and RFC2965-style cookie
1010             headers are supported.
1011              
1012             =cut
1013              
1014             sub title {
1015 12     12 1 443 my $doc = shift;
1016 12 100       36 if(my $title_elem = $doc->find('title')) {
1017 8         39 $title_elem->text(@_);
1018             }
1019             else {
1020 4 100       15 return "" unless @_;
1021 3   66     9 ( $doc->find('head')
1022             || ( $doc->find('html')
1023             || $doc->appendChild($doc->createElement('html'))
1024             )->appendChild($doc->createElement('head'))
1025             )->appendChild(
1026             my $t = $doc->createElement('title')
1027             );
1028 3         13 $t->text(@_);
1029 3         13 return "";
1030             }
1031             }
1032              
1033             sub referrer {
1034 5     5 1 1245 my $referrer = shift->{_HTML_DOM_referrer};
1035 5 50       30 defined $referrer ? $referrer : ();
1036             }
1037              
1038 24     24   158 sub domain { no strict;
  24         33  
  24         17338  
1039 2     2 1 4 my $doc = shift;
1040 2         3 host {ref $doc->{_HTML_DOM_url} ? $doc->{_HTML_DOM_url}
1041 2 100       5 : ($doc->{_HTML_DOM_url} = URI->new($doc->{_HTML_DOM_url}))};
1042             }
1043              
1044             sub URL {
1045 154     154 1 1987 my $url = shift->{_HTML_DOM_url};
1046 154 100       675 defined $url ? "$url" : undef;
1047             }
1048              
1049             sub body { # ~~~ this needs to return the outermost frameset element if
1050             # there is one (if the frameset is always the second child
1051             # of , then it already does).
1052 78     78 1 2127 my $body = ($_[0]->documentElement->content_list)[1];
1053 78 100 100     292 if (!$body || $body->tag !~ /^(?:body|frameset)\z/) {
1054 24         47 $body = $_[0]->find('body','frameset');
1055             }
1056 78 100       176 if(@_>1) {
1057 2         6 my $doc_elem = $_[0]->documentElement;
1058             # I'm using the replaceChild rather than replace_with,
1059             # despite the former's convoluted syntax, since the former
1060             # has the appropriate error-checking code (or will), and
1061             # also because it triggers mutation events.
1062 2         10 $doc_elem->replaceChild($_[1],$body)
1063             }
1064             else {
1065 76         326 $body
1066             }
1067             }
1068              
1069             sub images {
1070 2     2 1 8 my $self = shift;
1071 2 100       6 if (wantarray) {
1072 1         6 return grep tag $_ eq 'img', $self->descendants;
1073             }
1074             else {
1075             my $collection = HTML::DOM::Collection->new(
1076             my $list = HTML::DOM::NodeList::Magic->new(
1077 1     1   3 sub { grep tag $_ eq 'img', $self->descendants }
1078 1         14 ));
1079 1         5 $self-> _register_magic_node_list($list);
1080 1         29 $collection;
1081             }
1082             }
1083              
1084             sub applets {
1085 2     2 1 4 my $self = shift;
1086 2 100       5 if (wantarray) {
1087 1         4 return grep $_->tag =~ /^(?:objec|apple)t\z/,
1088             $self->descendants;
1089             }
1090             else {
1091             my $collection = HTML::DOM::Collection->new(
1092             my $list = HTML::DOM::NodeList::Magic->new(
1093 1     1   4 sub { grep $_->tag =~ /^(?:objec|apple)t\z/,
1094             $self->descendants }
1095 1         7 ));
1096 1         3 $self-> _register_magic_node_list($list);
1097 1         4 $collection;
1098             }
1099             }
1100              
1101             sub links {
1102 4     4 1 496 my $self = shift;
1103 4 100       12 if (wantarray) {
1104             return grep {
1105 1         4 my $tag = tag $_;
  38         70  
1106 38 100 100     106 $tag eq 'area' || $tag eq 'a'
1107             && defined $_->attr('href')
1108             } $self->descendants;
1109             }
1110             else {
1111             my $collection = HTML::DOM::Collection->new(
1112             my $list = HTML::DOM::NodeList::Magic->new(
1113             sub { grep {
1114 3     3   20 my $tag = tag $_;
  90         232  
1115 90 100 100     268 $tag eq 'area' || $tag eq 'a'
1116             && defined $_->attr('href')
1117             } $self->descendants }
1118 3         29 ));
1119 3         13 $self-> _register_magic_node_list($list);
1120 3         31 $collection;
1121             }
1122             }
1123              
1124             sub forms {
1125 39     39 1 809 my $self = shift;
1126 39 100       88 if (wantarray) {
1127 17         70 return grep tag $_ eq 'form', $self->descendants;
1128             }
1129             else {
1130             my $collection = HTML::DOM::Collection->new(
1131             my $list = HTML::DOM::NodeList::Magic->new(
1132 22     22   72 sub { grep tag $_ eq 'form', $self->descendants }
1133 22         144 ));
1134 22         68 $self-> _register_magic_node_list($list);
1135 22         216 $collection;
1136             }
1137             }
1138              
1139             sub anchors {
1140 2     2 1 5 my $self = shift;
1141 2 100       7 if (wantarray) {
1142 1   100     4 return grep tag $_ eq 'a' && defined $_->attr('name'),
1143             $self->descendants;
1144             }
1145             else {
1146             my $collection = HTML::DOM::Collection->new(
1147             my $list = HTML::DOM::NodeList::Magic->new(
1148 1   100 1   4 sub { grep tag $_ eq 'a' && defined $_->attr('name'),
1149             $self->descendants }
1150 1         8 ));
1151 1         4 $self-> _register_magic_node_list($list);
1152 1         3 $collection;
1153             }
1154             }
1155              
1156              
1157             sub cookie {
1158 2     2 1 680 my $self = shift;
1159 2 50       5 return '' unless defined (my $jar = $self->{_HTML_DOM_jar});
1160 0         0 my $return;
1161 0 0       0 if (defined wantarray) {
1162             # Yes, this is nuts (getting HTTP::Cookies to join the cookies, and
1163             # splitting them, filtering them, and joining them again[!]), but
1164             # &HTTP::Cookies::add_cookie_header is long and complicated, and I
1165             # don't want to replicate it here.
1166 24     24   160 no warnings 'uninitialized';
  24         43  
  24         9399  
1167 0         0 my $reqclone = $self->{_HTML_DOM_response}->request->clone;
1168             # Yes this is a bit strange, but we don’t want to put
1169             # ‘use HTTP::Header 1.59’ in this file, as it would mean loading the
1170             # module even for people who are not using this feature or who are
1171             # duck-typing.
1172 0 0 0     0 if (!$reqclone->can('header_field_names')
1173 0         0 && $reqclone->isa("HTTP::Headers")) { VERSION HTTP::Headers:: 1.59 }
1174 0         0 for($reqclone->header_field_names) {
1175 0 0       0 /cookie/i and remove_header $reqclone $_;
1176             }
1177 0         0 $return = join ';', grep !/\$/,
1178             $jar->add_cookie_header(
1179             $reqclone
1180             )-> header ('Cookie')
1181             # Pieces of this regexp were stolen from HTTP::Headers::Util:
1182             =~ /\G\s* # initial whitespace
1183             (
1184             [^\s=;,]+ # name
1185             \s*=\s* # =
1186             (?:
1187             \"(?:[^\"\\]*(?:\\.[^\"\\]*)*)\" # quoted value
1188             |
1189             [^;,\s]* # unquoted value
1190             )
1191             )
1192             \s*;?
1193             /xg;
1194             }
1195 0 0       0 if (@_) {
1196 0 0       0 return unless defined $self->{_HTML_DOM_response};
1197 0         0 require HTTP::Headers::Util;
1198             (undef,undef, my%split) =
1199 0         0 @{(HTTP::Headers::Util::split_header_words($_[0]))[0]};
  0         0  
1200 0         0 my $rfc;
1201 0         0 for(keys %split){
1202             # I *hope* this always works! (NS cookies should have no version.)
1203 0 0       0 ++ $rfc, last if lc $_ eq 'version';
1204             }
1205             (my $clone = $self->{_HTML_DOM_response}->clone)
1206 0         0 ->remove_header(qw/ Set-Cookie Set-Cookie2 /);
1207 0         0 $clone->header('Set-Cookie' . 2 x!! $rfc => $_[0]);
1208 0         0 $jar->extract_cookies($clone);
1209             }
1210 0 0       0 $return||'';
1211             }
1212              
1213             =item getElementById
1214              
1215             =item getElementsByName
1216              
1217             =item getElementsByClassName
1218              
1219             These three do what their names imply. The last two
1220             will return a list in list context, or a node list
1221             object in scalar context. Calling them in list
1222             context is probably more efficient.
1223              
1224             =cut
1225              
1226             sub getElementById {
1227 163     163 1 8555 my(@pile) = grep ref($_), @{shift->{'_content'}};
  163         309  
1228 163         267 my $id = shift;
1229 163         178 my $this;
1230 163         313 while(@pile) {
1231 24     24   162 no warnings 'uninitialized';
  24         43  
  24         13238  
1232 2501         3006 $this = shift @pile;
1233 2501 100       3773 $this->id eq $id and return $this;
1234 2339         3726 unshift @pile, grep ref($_), $this->content_list;
1235             }
1236 1         4 return;
1237             }
1238              
1239             sub getElementsByName {
1240 3     3 1 11 my($self,$name) = @_;
1241 3 100       7 if (wantarray) {
1242 2         29 return $self->look_down(name => "$name");
1243             }
1244             else {
1245             my $list = HTML::DOM::NodeList::Magic->new(
1246 1     1   3 sub { $self->look_down(name => "$name"); }
1247 1         43 );
1248 1         16 $self-> _register_magic_node_list($list);
1249 1         4 $list;
1250             }
1251             }
1252              
1253             sub getElementsByClassName {
1254 9     9 1 667 splice @_, 2, @_, 1; # Remove extra elements; add a true third elem
1255 9         28 goto &HTML'DOM'Element'_getElementsByClassName;
1256             }
1257              
1258             # ---------- DocumentEvent interface -------------- #
1259              
1260             =item createEvent ( $category )
1261              
1262             Creates a new event object, believe it or not.
1263              
1264             The C<$category> is the DOM event category, which determines what type of
1265             event object will be returned. The currently supported event categories
1266             are MouseEvents, UIEvents, HTMLEvents and MutationEvents.
1267              
1268             You can omit the C<$category> to create an instance of the event base class
1269             (not officially part of the DOM).
1270              
1271             =cut
1272              
1273             sub createEvent {
1274 267     267 1 3157 require HTML'DOM'Event;
1275 267   100     778 HTML'DOM'Event'create_event($_[1]||'');
1276             }
1277              
1278             # ---------- DocumentView interface -------------- #
1279              
1280             =item defaultView
1281              
1282             Returns the L object associated with the document.
1283              
1284             There is no such object by default; you have to put one there yourself:
1285              
1286             Although it is supposed to be read-only according to the DOM, you can set
1287             this attribute by passing an argument to it. It I still marked as
1288             read-only in
1289             L|HTML::DOM::Interface>.
1290              
1291             If you do set it, it is recommended that the object be a subclass of
1292             L.
1293              
1294             This attribute holds a weak reference to the object.
1295              
1296             =cut
1297              
1298             sub defaultView {
1299 1889     1889 1 2249 my $self = shift;
1300 1889         2487 my $old = $self->{_HTML_DOM_view};
1301 1889 100       3449 if(@_) {
1302 4         9 weaken($self->{_HTML_DOM_view} = shift);
1303             }
1304 1889 100       5748 return defined $old ? $old : ();
1305             }
1306              
1307             # ---------- DocumentStyle interface -------------- #
1308              
1309             =item styleSheets
1310              
1311             Returns a L of the document's style sheets, or a
1312             simple list in list context.
1313              
1314             =cut
1315              
1316             sub styleSheets {
1317 25     25 1 467 my $doc = shift;
1318             my $ret = (
1319             $doc->{_HTML_DOM_sheets} or
1320             $doc->{_HTML_DOM_sheets} = (
1321             require CSS::DOM::StyleSheetList,
1322             new CSS::DOM::StyleSheetList
1323             ),
1324             $doc->_populate_sheet_list,
1325             $doc->{_HTML_DOM_sheets}
1326 25   66     46 );
1327 25 100       104 wantarray ? @$ret : $ret;
1328             }
1329              
1330             =item innerHTML
1331              
1332             Serialises and returns the HTML document. If you pass an argument, it will
1333             set the contents of the document via C, C and C,
1334             returning a serialisation of the old contents.
1335              
1336             =cut
1337              
1338             sub innerHTML {
1339 25     25 1 1212 my $self = shift;
1340 25         38 my $old;
1341 25 50 100     62 $old = join '' , $self->{_HTML_DOM_doctype}||'',
    100          
1342             map
1343             HTML'DOM'Element'_html_element_adds_newline
1344             ? substr((
1345             as_HTML $_ (undef)x2,{}
1346             ), 0, -1)
1347             : $_->as_HTML((undef)x2,{}),
1348             $self->content_list
1349             if defined wantarray;
1350 25 100       67 if(@_){
1351 13         34 $self->open();
1352 13         44 $self->write(shift);
1353 13         32 $self->close();
1354             }
1355             $old
1356 25         101 }
1357              
1358              
1359             =item location
1360              
1361             =item set_location_object (non-DOM)
1362              
1363             C returns the location object, if you've put one there with
1364             C. HTML::DOM doesn't actually implement such an object
1365             itself, but provides the appropriate magic to make
1366             C<< $doc->location($foo) >> translate into
1367             C<< $doc->location->href($foo) >>.
1368              
1369             BTW, the location object had better be true when used as a boolean, or
1370             HTML::DOM will think it doesn't exist.
1371              
1372             =cut
1373              
1374             sub location {
1375 3     3 1 8 my $self = shift;
1376 3 100 50     9 @_ and ($$self{_HTML_DOM_loc}||die "Can't assign to location"
1377             ." without a location object")->href(@_);
1378             $$self{_HTML_DOM_loc}||()
1379 3 100       7 }
1380              
1381             sub set_location_object {
1382 1     1 1 3 $_[0]{_HTML_DOM_loc} = $_[1];
1383             }
1384              
1385              
1386             =item lastModified
1387              
1388             This method returns the document's modification date as gleaned from the
1389             response object passed to the constructor, in MM/DD/YYYY HH:MM:SS format.
1390              
1391             If there is no modification date, an empty string is returned, but this
1392             may change in the future.
1393              
1394             =begin comment
1395              
1396             When there is no modification date, the return value is different in every
1397             browser.
1398             NS 2-4 and Opera 9 have the epoch (in GMT format).
1399             Firefox 3 has the time the page was loaded.
1400             Safari 4 has an empty string (it uses GMT format when there is a mod time).
1401             IE, 6-8 the only one to comply with HTML 5, has the current time; but HTML
1402             5 is illogical, since it makes no sense for the modification time to keep
1403             ticking away.
1404              
1405             I’ve opted to use the empty string for now, since we can’t *really* find
1406             out the modification time--only what the server *says* it is. And if the
1407             server doesn’t say, it’s no use pretending that it did say it.
1408              
1409             =end comment
1410              
1411             =cut
1412              
1413             sub lastModified {
1414 4 100 100 4 1 771 my $time = ($_[0]{_HTML_DOM_response} || return '')->last_modified
1415             or return '';
1416 2         544 require Date'Format;
1417 2         4031 Date'Format'time2str("%d/%m/%Y %X", $time);
1418             }
1419              
1420              
1421             =back
1422              
1423             =cut
1424              
1425              
1426             # ---------- OVERRIDDEN NODE & EVENT TARGET METHODS -------------- #
1427              
1428       44 1   sub ownerDocument {} # empty list
1429 1     1 1 494 sub nodeName { '#document' }
1430 24     24   167 { no warnings 'once'; *nodeType = \& DOCUMENT_NODE; }
  24         48  
  24         4499  
1431              
1432             =head2 Other (Non-DOM) Methods
1433              
1434             (See also L, below.)
1435              
1436             =over 4
1437              
1438             =item $tree->base
1439              
1440             Returns the base URL of the page; either from a tag, from
1441             the response object passed to C, or the
1442             URL passed to C.
1443              
1444             =cut
1445              
1446             sub base {
1447 161     161 1 220 my $doc = shift;
1448 161 100       709 if(
    100          
1449             my $base_elem = $doc->look_down(_tag => 'base', href => qr)(?:\)))
1450             ){
1451 10         31 return ''.$base_elem->attr('href');
1452             }
1453             elsif (my $r = $$doc{_HTML_DOM_response}) {
1454 2         5 my $base;
1455 2 100 66     18 ($base) = $r->header('Content-Base')
1456             or ($base) = $r->header('Content-Location')
1457             or $base = $r->header('Base');
1458             # URI does not document $URI::scheme_re, but HTTP::Response
1459             # (which is in a separate distribution) uses it. It seems
1460             # unlikely that it will go away in future URI versions, as
1461             # that would break existing versions of HTTP::Response.
1462 2 100 66     227 if ($base && $base =~ /^$URI::scheme_re:/o) {
1463             # already absolute
1464 1         5 return $base;
1465             }
1466 1         4 my $req = request $r;
1467 1 50       13 my $uri = $req ? uri $req : $doc->URL;
1468 1 50       37 return undef unless $uri;
1469             # Work around URI bug.
1470 1 50 33     12 if (!defined $base && $uri =~ /^[Dd][Aa][Tt][Aa]:/) {
1471 1         12 return $uri;
1472             }
1473 24     24   149 no warnings 'uninitialized';
  24         51  
  24         13406  
1474 0         0 ''.new_abs URI $base,$uri;
1475             }
1476             else {
1477 149         303 $doc->URL
1478             }
1479             }
1480              
1481             =item $tree->magic_forms
1482              
1483             This is mainly for internal use. It returns a boolean indicating whether
1484             the parser needed to associate formies with a form that did not contain
1485             them. This happens when a closing tag is missing and the form is
1486             closed implicitly, but a formie is encountered later.
1487              
1488             =cut
1489              
1490 1583 50   1583 1 3753 sub magic_forms { @_ and ++$_[0]{_HTML_DOM_mg_f}; $_[0]{_HTML_DOM_mg_f} }
  1583         2592  
1491              
1492             =back
1493              
1494             =head1 HASH ACCESS
1495              
1496             You can use an HTML::DOM object as a hash ref to access it's form elements
1497             by name. So C<< $doc->{yayaya} >> is short for
1498             S<< C<< $doc->forms->{yayaya} >> >>.
1499              
1500             =head1 EVENT HANDLING
1501              
1502             HTML::DOM supports both the DOM Level 2 event model and the HTML 4 event
1503             model.
1504              
1505             Throughout this documentation, we make use of HTML 5's distinction between
1506             handlers and listeners: An event handler is the result of an HTML element
1507             beginning with 'on', e.g. onsubmit. These are also accessible via the DOM.
1508             (We also use the word 'handler' in other contexts, such as the 'default
1509             event handler'.)
1510             Event listeners are registered solely with the C method
1511             and can be removed with C.
1512              
1513             HTML::DOM accepts as an event handler a coderef, an object with a
1514             C method, or an object with C<&{}> overloading. If the
1515             C method is present, it is called with the current event
1516             target as the first argument and the event object as the second.
1517             This is to allow for objects that wrap JavaScript functions (which must be called with the event target as the B value).
1518              
1519             An event listener is a coderef, an object with a C
1520             method or an object with C<&{}> overloading. HTML::DOM does not implement
1521             any classes that provide a C method, but will support any
1522             object that has one.
1523              
1524             Listeners and handlers differ in one important aspect. A listener has to
1525             call C on the event object to cancel the default action. A
1526             handler simply returns a defined false value (except for mouseover events,
1527             which must return a true value to cancel the default).
1528              
1529             =head2 Default Actions
1530              
1531             Default actions that HTML::DOM is capable of handling internally (such as
1532             triggering a DOMActivate event when an element is clicked, and triggering a
1533             form's submit event when the submit button is activated) are dealt with
1534             automatically. You don't have to worry about those. For others, read
1535             on....
1536              
1537             To specify the default actions associated with an event, provide a
1538             subroutine (in this case, it not being part of the DOM, you can't use an
1539             object with a C method) via the C
1540             and
1541             C methods.
1542              
1543             With the former, you can specify the
1544             default action to be taken when a particular type of event occurs. The
1545             currently supported types are:
1546              
1547             submit when a form is submitted
1548             link called when a link is activated (DOMActivate event)
1549              
1550             Pass the type of event as the first argument and a code ref as the second
1551             argument. When the code ref is called, its sole argument will
1552             be the event object. For instance:
1553              
1554             $dom_tree->default_event_handler_for( link => sub {
1555             my $event = shift;
1556             go_to( $event->target->href );
1557             });
1558             sub go_to { ... }
1559              
1560             C with just one argument returns the
1561             currently
1562             assigned coderef. With two arguments it returns the old one after
1563             assigning the new one.
1564              
1565             Use C (without the C<_for>) to specify a fallback
1566             subroutine that will be used for events not in the list above, and for
1567             events in the list above that do not have subroutines assigned to them.
1568             Without any arguments it will return the currently
1569             assigned coderef. With an argument it will return the old one after
1570             assigning the new one.
1571              
1572             =head2 Dispatching Events
1573              
1574             HTML::DOM::Node's C method triggers the appropriate event
1575             listeners, but does B call any default actions associated with it.
1576             The return value is a boolean that indicates whether the default action
1577             should be taken.
1578              
1579             H:D:Node's C method will trigger the event for real. It will
1580             call C and, provided it returns true, will call the default
1581             event handler.
1582              
1583             =head2 HTML Event Attributes
1584              
1585             The C can be used to assign a coderef that will turn
1586             text assigned to an event attribute (e.g., C) into an event
1587             handler. The
1588             arguments to the routine will be (0) the element, (1) the name (aka
1589             type) of
1590             the event (without the initial 'on'), (2) the value of the attribute and
1591             (3) the offset within the source of the attribute's value. (Actually, if
1592             the value is within quotes, it is the offset of the first quotation mark.
1593             Also, it will be C for generated HTML [source code passed to the
1594             C method by an element handler].)
1595             As
1596             with C, you
1597             can replace an existing handler with a new one, in which case the old
1598             handler is returned. If you call this method without arguments, it returns
1599             the current handler. Here is an example of its use, that assumes that
1600             handlers are Perl code:
1601              
1602             $dom_tree->event_attr_handler(sub {
1603             my($elem, $name, $code, $offset) = @_;
1604             my $sub = eval "sub { $code }";
1605             return sub {
1606             local *_ = \$elem;
1607             &$sub;
1608             };
1609             });
1610              
1611             The event attribute handler will be called whenever an element attribute
1612             whose name
1613             begins with 'on' (case-tolerant) is modified. (For efficiency's sake, I may
1614             change it to call the event attribute handler only when the event is
1615             triggered, so it is not called unnecessarily.)
1616              
1617             =head2 When an Event Handler Dies
1618              
1619             Use C to assign a coderef that will be called whenever an
1620             event listener (or handler) raises an error. The error will be contained in
1621             C<$@>.
1622              
1623             =head2 Other Event-Related Methods
1624              
1625             =over
1626              
1627             =item $tree->event_parent
1628              
1629             =item $tree->event_parent( $new_val )
1630              
1631             This method lets you provide an object that is added to the top of the
1632             event dispatch chain. E.g., if you want the view object (the value of
1633             C, aka the window) to have event handlers called before the
1634             document in the capture phase, and after it in the bubbling phase, you can
1635             set it like this (see also L, above):
1636              
1637             $tree->event_parent( $tree->defaultView );
1638              
1639             This holds a weak reference.
1640              
1641             =item $tree->event_listeners_enabled
1642              
1643             =item $tree->event_listeners_enabled( $new_val )
1644              
1645             This attribute, which is true by default, can be used to disable event
1646             handlers and listeners. (Default event handlers [see above] still run,
1647             though.)
1648              
1649             =back
1650              
1651             =cut
1652              
1653              
1654             # ---------- NON-DOM EVENT METHODS -------------- #
1655              
1656             sub event_attr_handler {
1657 21     21 0 538 my $old = $_[0]->{_HTML_DOM_event_attr_handler};
1658 21 100       53 $_[0]->{_HTML_DOM_event_attr_handler} = $_[1] if @_ > 1;
1659 21         70 $old;
1660             }
1661             sub default_event_handler {
1662 1889     1889 0 2772 my $old = $_[0]->{_HTML_DOM_default_event_handler};
1663 1889 100       3796 $_[0]->{_HTML_DOM_default_event_handler} = $_[1] if @_ > 1;
1664 1889         3613 $old;
1665             }
1666             sub default_event_handler_for {
1667 223     223 0 430 my $old = $_[0]->{_HTML_DOM_dehf}{$_[1]};
1668 223 100       508 $_[0]->{_HTML_DOM_dehf}{$_[1]} = $_[2] if @_ > 2;
1669 223         837 $old;
1670             }
1671             sub error_handler {
1672 1898     1898 1 3226 my $old = $_[0]->{_HTML_DOM_error_handler};
1673 1898 100       3685 $_[0]->{_HTML_DOM_error_handler} = $_[1] if @_ > 1;
1674 1898         4479 $old;
1675             }
1676              
1677             sub event_parent {
1678 680     680 1 994 my $old = (my $self = shift) ->{_HTML_DOM_event_parent};
1679 680 100       1314 weaken($self->{_HTML_DOM_event_parent} = shift) if @_;
1680 680         2087 $old
1681             }
1682              
1683             sub event_listeners_enabled {
1684 1894     1894 1 3816 my $old = (my $Self = shift)->{_HTML_DOM_doevents};
1685 1894 100       3716 @_ and $$Self{_HTML_DOM_doevents} = !!shift;
1686 1894 100       5265 defined $old ? $old : 1; # true by default
1687             }
1688              
1689              
1690             # ---------- NODE AND SHEET LIST HELPER METHODS -------------- #
1691              
1692             sub _modified { # tells all it's magic nodelists that they're stale
1693             # and also rewrites the style sheet list if present
1694 376     376   696 my $list = $_[0]{_HTML_DOM_node_lists};
1695 376         580 my $list_is_stale;
1696 376         662 for (@$list) {
1697 276 100       645 defined() ? $_->_you_are_stale : ++$list_is_stale
1698             }
1699 376 100       650 if($list_is_stale) {
1700 48         146 @$list = grep defined, @$list;
1701 48         129 weaken $_ for @$list;
1702             }
1703            
1704 376         704 $_[0]->_populate_sheet_list
1705             }
1706              
1707             sub _populate_sheet_list { # called both by styleSheets and _modified
1708 413   100 413   586 for($_[0]->{_HTML_DOM_sheets}||return) {
1709 12         50 @$_ = map sheet $_,
1710             $_[0]->look_down(_tag => qr/^(?:link|style)\z/);
1711             }
1712             }
1713              
1714             sub _register_magic_node_list { # adds the node list to the list of magic
1715             # node lists that get notified automatic-
1716             # ally whenever the doc structure changes
1717 205     205   256 push @{$_[0]{_HTML_DOM_node_lists}}, $_[1];
  205         433  
1718 205         402 weaken $_[0]{_HTML_DOM_node_lists}[-1];
1719             }
1720              
1721              
1722              
1723             1;
1724             __END__