File Coverage

blib/lib/HTML/HTML5/Parser.pm

Criterion	Covered	Total	%
statement	16	18	88.8
branch			n/a
condition			n/a
subroutine	6	6	100.0
pod			n/a
total	22	24	91.6

line	stmt	sub	time	code
1				package HTML::HTML5::Parser;
2
3				## skip Test::Tabs
4	10	10	505889	use 5.008001;
	10		45
	10		537
5	10	10	68	use strict;
	10		172
	10		483
6	10	10	56	use warnings;
	10		27
	10		659
7
8				our $AUTOLOAD;
9				our $VERSION = '0.301';
10
11	10	10	58	use Carp;
	10		27
	10		1224
12	10	10	15550	use HTML::HTML5::Parser::Error;
	10		111
	10		317
13	10	10	20978	use HTML::HTML5::Parser::TagSoupParser;
	0
	0
14				use Scalar::Util qw(blessed);
15				use URI::file;
16				use XML::LibXML;
17
18				BEGIN {
19				croak "Please upgrade to XML::LibXML 1.94"
20				if XML::LibXML->VERSION =~ /^1\.9[12]/;
21				}
22
23				sub new
24				{
25				my $class = shift;
26				my %p = @_;
27				my $self = bless {
28				errors => [],
29				parser => HTML::HTML5::Parser::TagSoupParser->new(%p),
30				}, $class;
31				return $self;
32				}
33
34				sub parse_file
35				{
36				require HTML::HTML5::Parser::UA;
37
38				my $self = shift;
39				my $file = shift;
40				my $opts = shift \|\| {};
41
42				unless (blessed($file) and $file->isa('URI'))
43				{
44				if ($file =~ /^[a-z0-9_\.-]+:\S+$/i)
45				{ $file = URI->new($file); }
46				else
47				{ $file = URI::file->new_abs($file); }
48				}
49
50				my $response = HTML::HTML5::Parser::UA->get($file, $opts->{user_agent});
51				croak "HTTP response code was not 200 OK. (Set \$opts{ignore_http_response_code} to ignore this error.)"
52				unless ($response->{success} \|\| $opts->{ignore_http_response_code});
53
54				my $content = $response->{decoded_content};
55				my $c_type = $response->{headers}{'content-type'};
56
57				$opts->{'response'} = $response;
58
59				if ($c_type =~ /xml/i and not $opts->{'force_html'})
60				{
61				$opts->{'parser_used'} = 'XML::LibXML::Parser';
62				my $xml_parser = XML::LibXML->new;
63				$xml_parser->validation(0);
64				$xml_parser->recover(2);
65				$xml_parser->base_uri($response->base);
66				$xml_parser->load_catalog($opts->{'xml_catalogue'})
67				if -r $opts->{'xml_catalogue'};
68				return $xml_parser->parse_string($content);
69				}
70
71				return $self->parse_string($content, $opts);
72				}
73				*parse_html_file = \&parse_file;
74
75				sub parse_fh
76				{
77				my $self = shift;
78				my $handle = shift;
79				my $opts = shift \|\| {};
80
81				my $string = '';
82				while (<$handle>)
83				{
84				$string .= $_;
85				}
86
87				return $self->parse_string($string, $opts);
88				}
89				*parse_html_fh = \&parse_fh;
90
91				sub parse_string
92				{
93				my $self = shift;
94				my $text = shift;
95				my $opts = shift \|\| {};
96
97				$self->{'errors'} = [];
98				$opts->{'parser_used'} = 'HTML::HTML5::Parser';
99				my $dom = XML::LibXML::Document->createDocument;
100
101				if (defined $opts->{'encoding'}\|\|1)
102				{
103				# XXX AGAIN DO THIS TO STOP ENORMOUS MEMORY LEAKS
104				my ($errh, $errors) = @{$self}{qw(error_handler errors)};
105				$self->{parser}->parse_byte_string(
106				$opts->{'encoding'}, $text, $dom,
107				sub {
108				my $err = HTML::HTML5::Parser::Error->new(@_);
109				$errh->($err) if $errh;
110				push @$errors, $err;
111				});
112				}
113				else
114				{
115				$self->{parser}->parse_char_string($text, $dom, sub{
116				my $err = HTML::HTML5::Parser::Error->new(@_);
117				$self->{error_handler}->($err) if $self->{error_handler};
118				push @{$self->{'errors'}}, $err;
119				});
120				}
121
122				return $dom;
123				}
124				*parse_html_string = \&parse_string;
125
126				# TODO: noembed, noframes, noscript
127				my %within = (
128				html => [qw/html/],
129				frameset => [qw/html frameset/],
130				frame => [qw/html frameset frame/],
131				head => [qw/html head/],
132				title => [qw/html head title/],
133				style => [qw/html head style/],
134				(map { $_ => undef }
135				qw/base link meta basefont bgsound/),
136				body => [qw/html body/],
137				script => [qw/html body script/],
138				div => [qw/html body div/],
139				(map { $_ => [qw/html body div/, $_] }
140				qw/a abbr acronym address applet area article aside big blockquote
141				button center code details dir dl em fieldset figure font
142				footer form h1 h2 h3 h4 h5 h6 header hgroup i iframe
143				listing marquee menu nav nobr object ol p plaintext pre
144				ruby s section small strike strong tt u ul xmp/),
145				(map { $_ => undef }
146				qw/br col command datagrid embed hr img input keygen
147				param wbr/),
148				dd => [qw/html body dl dd/],
149				dd => [qw/html body dl dt/],
150				figcaption => [qw/html body figure/],
151				li => [qw/html body ul li/],
152				ul__li => [qw/html body ul li/],
153				ol__li => [qw/html body ol li/],
154				optgroup => [qw/html body form div select/],
155				option => [qw/html body form div select/],
156				rp => [qw/html body div ruby/],
157				rt => [qw/html body div ruby/],
158				select => [qw/html body form div select/],
159				summary => [qw/html body div details/],
160				table => [qw/html body table/],
161				(map { $_ => [qw/html body table/, $_] }
162				qw/thead tfoot tbody tr caption colgroup/),
163				(map { $_ => [qw/html body table tbody tr/, $_] }
164				qw/td th/),
165				textarea => [qw/html body form div textarea/],
166				);
167
168				sub parse_balanced_chunk
169				{
170				my ($self, $chunk, $o) = @_;
171				my %options = %{ $o \|\| {} };
172
173				$options{as} = 'default' unless defined $options{as};
174
175				my $w = $options{force_within} \|\| $options{within} \|\| 'div';
176				my $ancestors = $within{ lc $w };
177				croak "Cannot parse chunk as if within $w."
178				if !defined $ancestors;
179
180				my $parent = $ancestors->[-1];
181				my $n = scalar(@$ancestors) - 2;
182				my @a = $n ? @$ancestors[0 .. $n] : ();
183
184				my $uniq = sprintf('rand_id_%09d', int rand 1_000_000_000);
185				my $document =
186				"<!doctype html>\n".
187				(join q{}, map { "<$_>" } @a).
188				"<$parent id='$uniq'>".
189				$chunk.
190				''.# "</$parent>".
191				'';# (join q{}, map { "</$_>" } reverse @a);
192
193				my $dom = $self->parse_html_string($document);
194				$parent = $dom->findnodes("//*[\@id='$uniq']")->get_node(1);
195
196				if ($options{debug})
197				{
198				if (exists &Test::More::diag)
199				{
200				Test::More::diag($document);
201				Test::More::diag($dom->toString);
202				}
203				else
204				{
205				warn $document."\n";
206				warn $dom->toString."\n";
207				}
208				}
209
210				my @results = $parent->childNodes;
211
212				unless ($options{force_within})
213				{
214				while ($parent)
215				{
216				my $sibling = $parent->nextSibling;
217				while ($sibling)
218				{
219				unless ($sibling->nodeName =~ /^(head\|body)$/)
220				{
221				$sibling->setAttribute('data-perl-html-html5-parser-outlier', 1)
222				if $options{mark_outliers}
223				&& $sibling->can('setAttribute');
224				push @results, $sibling;
225				}
226				$sibling = $sibling->nextSibling;
227				}
228
229				$sibling = $parent->previousSibling;
230				while ($sibling)
231				{
232				unless ($sibling->nodeName =~ /^(head\|body)$/)
233				{
234				$sibling->setAttribute('data-perl-html-html5-parser-outlier', 1)
235				if $options{mark_outliers}
236				&& $sibling->can('setAttribute');
237				unshift @results, $sibling;
238				}
239				$sibling = $sibling->previousSibling;
240				}
241
242				$parent = $parent->parentNode;
243				}
244				}
245
246				my $frag = XML::LibXML::DocumentFragment->new;
247				$frag->appendChild($_) foreach @results;
248
249				if (lc $options{as} eq 'list')
250				{
251				return wantarray ? @results : XML::LibXML::NodeList->new(@results);
252				}
253
254				return wantarray ? @results : $frag;
255				}
256
257				sub load_html
258				{
259				my $class_or_self = shift;
260
261				my %args = map { ref($_) eq 'HASH' ? (%$_) : $_ } @_;
262				my $URI = delete($args{URI});
263				$URI = "$URI" if defined $URI; # stringify in case it is an URI object
264				my $parser = ref($class_or_self)
265				? $class_or_self
266				: $class_or_self->new;
267
268				my $dom;
269				if ( defined $args{location} )
270				{ $dom = $parser->parse_file( "$args{location}" ) }
271				elsif ( defined $args{string} )
272				{ $dom = $parser->parse_string( $args{string}, $URI ) }
273				elsif ( defined $args{IO} )
274				{ $dom = $parser->parse_fh( $args{IO}, $URI ) }
275				else
276				{ croak("HTML::HTML5::Parser->load_html: specify location, string, or IO"); }
277
278				return $dom;
279				}
280
281				sub load_xml
282				{
283				my $self = shift;
284				my $dom;
285				eval {
286				$dom = XML::LibXML->load_xml(@_);
287				};
288				return $dom if blessed($dom);
289				return $self->load_html(@_);
290				}
291
292				sub AUTOLOAD
293				{
294				my $self = shift;
295				my $func = $AUTOLOAD;
296				$func =~ s/.*://;
297
298				# LibXML Push Parser.
299				if ($func =~ /^( parse_chunk \| start_push \| push \| finish_push )$/xi)
300				{
301				croak "Push parser ($func) not implemented by HTML::HTML5::Parser.";
302				}
303
304				# Misc LibXML functions with no compatible interface provided.
305				if ($func =~ /^( parse_balanced_chunk \| parse_xml_chunk \|
306				process_?xincludes \| get_last_error )$/xi)
307				{
308				croak "$func not implemented by HTML::HTML5::Parser.";
309				}
310
311				# Fixed options which are true.
312				if ($func =~ /^( recover \| recover_silently \| expand_entities \|
313				keep_blanks \| no_network )$/xi)
314				{
315				my $set = shift;
316				if ((!$set) && defined $set)
317				{
318				carp "Option $func cannot be switched off.";
319				}
320				return 1;
321				}
322
323				# Fixed options which are false.
324				if ($func =~ /^( validation \| pedantic_parser \| line_numbers
325				load_ext_dtd \| complete_attributes \| expand_xinclude \|
326				load_catalog \| base_uri \| gdome_dom \| clean_namespaces )$/xi)
327				{
328				my $set = shift;
329				if (($set) && defined $set)
330				{
331				carp "Option $func cannot be switched on.";
332				}
333				return 0;
334				}
335
336				carp "HTML::HTML5::Parser doesn't understand '$func'." if length $func;
337				}
338
339				sub error_handler
340				{
341				my $self = shift;
342				$self->{error_handler} = shift if @_;
343				return $self->{error_handler};
344				}
345
346				sub errors
347				{
348				my $self = shift;
349				return @{ $self->{errors} };
350				}
351
352				sub compat_mode
353				{
354				my $self = shift;
355				my $node = shift;
356
357				return $self->{parser}->_data($node)->{'manakai_compat_mode'};
358				}
359
360				sub charset
361				{
362				my $self = shift;
363				my $node = shift;
364
365				return $self->{parser}->_data($node)->{'charset'};
366				}
367
368				sub dtd_public_id
369				{
370				my $self = shift;
371				my $node = shift;
372
373				return $self->{parser}->_data($node)->{'DTD_PUBLIC_ID'};
374				}
375
376				sub dtd_system_id
377				{
378				my $self = shift;
379				my $node = shift;
380
381				return $self->{parser}->_data($node)->{'DTD_SYSTEM_ID'};
382				}
383
384				sub dtd_element
385				{
386				my $self = shift;
387				my $node = shift;
388
389				return $self->{parser}->_data($node)->{'DTD_ELEMENT'};
390				}
391
392				sub source_line
393				{
394				my $self = shift;
395				my $node = shift;
396
397				my $data = ref $self ? $self->{parser}->_data($node) :
398				HTML::HTML5::Parser::TagSoupParser::DATA($node);
399				my $line = $data->{'manakai_source_line'};
400
401				if (wantarray)
402				{
403				return (
404				$line,
405				$data->{'manakai_source_column'},
406				($data->{'implied'} \|\| 0),
407				);
408				}
409				else
410				{
411				return $line;
412				}
413				}
414
415				sub DESTROY {}
416
417				__END__
418
419				=pod
420
421				=encoding utf8
422
423				=begin stopwords
424
425				XML::LibXML-like
426				XML::LibXML-Compatible
427				'utf-8')
428				foobar
429				doctype:
430				html
431				implictness
432
433				=end stopwords
434
435				=head1 NAME
436
437				HTML::HTML5::Parser - parse HTML reliably
438
439				=head1 SYNOPSIS
440
441				use HTML::HTML5::Parser;
442
443				my $parser = HTML::HTML5::Parser->new;
444				my $doc = $parser->parse_string(<<'EOT');
445				<!doctype html>
446				<title>Foo</title>
447				<p><b><i>Foo</b> bar</i>.
448				<p>Baz</br>Quux.
449				EOT
450
451				my $fdoc = $parser->parse_file( $html_file_name );
452				my $fhdoc = $parser->parse_fh( $html_file_handle );
453
454				=head1 DESCRIPTION
455
456				This library is substantially the same as the non-CPAN module Whatpm::HTML.
457				Changes include:
458
459				=over 8
460
461				=item * Provides an XML::LibXML-like DOM interface. If you usually use XML::LibXML's DOM parser, this should be a drop-in solution for tag soup HTML.
462
463				=item * Constructs an XML::LibXML::Document as the result of parsing.
464
465				=item * Via bundling and modifications, removed external dependencies on non-CPAN packages.
466
467				=back
468
469				=head2 Constructor
470
471				=over 8
472
473				=item C<new>
474
475				$parser = HTML::HTML5::Parser->new;
476				# or
477				$parser = HTML::HTML5::Parser->new(no_cache => 1);
478
479				The constructor does nothing interesting besides take one flag
480				argument, C<no_cache =E<gt> 1>, to disable the global element metadata
481				cache. Disabling the cache is handy for conserving memory if you parse
482				a large number of documents, however, class methods such as
483				C</source_line> will not work, and must be run from an instance of
484				this parser.
485
486				=back
487
488				=head2 XML::LibXML-Compatible Methods
489
490				=over
491
492				=item C<parse_file>, C<parse_html_file>
493
494				$doc = $parser->parse_file( $html_file_name [,\%opts] );
495
496				This function parses an HTML document from a file or network;
497				C<$html_file_name> can be either a filename or an URL.
498
499				Options include 'encoding' to indicate file encoding (e.g.
500				'utf-8') and 'user_agent' which should be a blessed C<LWP::UserAgent>
501				(or L<HTTP::Tiny>) object to be used when retrieving URLs.
502
503				If requesting a URL and the response Content-Type header indicates
504				an XML-based media type (such as XHTML), XML::LibXML::Parser
505				will be used automatically (instead of the tag soup parser). The XML
506				parser can be told to use a DTD catalogue by setting the option
507				'xml_catalogue' to the filename of the catalogue.
508
509				HTML (tag soup) parsing can be forced using the option 'force_html', even
510				when an XML media type is returned. If an options hashref was passed,
511				parse_file will set $options->{'parser_used'} to the name of the class used
512				to parse the URL, to allow the calling code to double-check which parser
513				was used afterwards.
514
515				If an options hashref was passed, parse_file will set $options->{'response'}
516				to the HTTP::Response object obtained by retrieving the URI.
517
518				=item C<parse_fh>, C<parse_html_fh>
519
520				$doc = $parser->parse_fh( $io_fh [,\%opts] );
521
522				C<parse_fh()> parses a IOREF or a subclass of C<IO::Handle>.
523
524				Options include 'encoding' to indicate file encoding (e.g.
525				'utf-8').
526
527				=item C<parse_string>, C<parse_html_string>
528
529				$doc = $parser->parse_string( $html_string [,\%opts] );
530
531				This function is similar to C<parse_fh()>, but it parses an HTML
532				document that is available as a single string in memory.
533
534				Options include 'encoding' to indicate file encoding (e.g.
535				'utf-8').
536
537				=item C<load_xml>, C<load_html>
538
539				Wrappers for the parse_* functions. These should be roughly compatible with
540				the equivalently named functions in L<XML::LibXML>.
541
542				Note that C<load_xml> first attempts to parse as real XML, falling back to
543				HTML5 parsing; C<load_html> just goes straight for HTML5.
544
545				=item C<parse_balanced_chunk>
546
547				$fragment = $parser->parse_balanced_chunk( $string [,\%opts] );
548
549				This method is roughly equivalent to XML::LibXML's method of the same
550				name, but unlike XML::LibXML, and despite its name it does not require
551				the chunk to be "balanced". This method is somewhat black magic, but
552				should work, and do the proper thing in most cases. Of course, the
553				proper thing might not be what you'd expect! I'll try to keep this
554				explanation as brief as possible...
555
556				Consider the following string:
557
558				<b>Hello</b></td></tr> <i>World</i>
559
560				What is the proper way to parse that? If it were found in a document like
561				this:
562
563				<html>
564				<head><title>X</title></head>
565				<body>
566				<div>
567				<b>Hello</b></td></tr> <i>World</i>
568				</div>
569				</body>
570				</html>
571
572				Then the document would end up equivalent to the following XHTML:
573
574				<html>
575				<head><title>X</title></head>
576				<body>
577				<div>
578				<b>Hello</b> <i>World</i>
579				</div>
580				</body>
581				</html>
582
583				The superfluous C<< </td></tr> >> is simply ignored. However, if it
584				were found in a document like this:
585
586				<html>
587				<head><title>X</title></head>
588				<body>
589				<table><tbody><tr><td>
590				<b>Hello</b></td></tr> <i>World</i>
591				</td></tr></tbody></table>
592				</body>
593				</html>
594
595				Then the result would be:
596
597				<html>
598				<head><title>X</title></head>
599				<body>
600				<i>World</i>
601				<table><tbody><tr><td>
602				<b>Hello</b></td></tr>
603				</tbody></table>
604				</body>
605				</html>
606
607				Yes, C<< <i>World</i> >> gets hoisted up before the C<< <table> >>. This
608				is weird, I know, but it's how browsers do it in real life.
609
610				So what should:
611
612				$string = q{<b>Hello</b></td></tr> <i>World</i>};
613				$fragment = $parser->parse_balanced_chunk($string);
614
615				actually return? Well, you can choose...
616
617				$string = q{<b>Hello</b></td></tr> <i>World</i>};
618
619				$frag1 = $parser->parse_balanced_chunk($string, {within=>'div'});
620				say $frag1->toString; # <b>Hello</b> <i>World</i>
621
622				$frag2 = $parser->parse_balanced_chunk($string, {within=>'td'});
623				say $frag2->toString; # <i>World</i><b>Hello</b>
624
625				If you don't pass a "within" option, then the chunk is parsed as if it
626				were within a C<< <div> >> element. This is often the most sensible
627				option. If you pass something like C<< { within => "foobar" } >>
628				where "foobar" is not a real HTML element name (as found in the HTML5
629				spec), then this method will croak; if you pass the name of a void
630				element (e.g. C<< "br" >> or C<< "meta" >>) then this method will
631				croak; there are a handful of other unsupported elements which will
632				croak (namely: C<< "noscript" >>, C<< "noembed" >>, C<< "noframes" >>).
633
634				Note that the second time around, although we parsed the string "as
635				if it were within a C<< <td> >> element", the C<< <i>Hello</i> >>
636				bit did not strictly end up within the C<< <td> >> element (not
637				even within the C<< <table> >> element!) yet it still gets returned.
638				We'll call things such as this "outliers". There is a "force_within"
639				option which tells parse_balanced_chunk to ignore outliers:
640
641				$frag3 = $parser->parse_balanced_chunk($string,
642				{force_within=>'td'});
643				say $frag3->toString; # <b>Hello</b>
644
645				There is a boolean option "mark_outliers" which marks each outlier
646				with an attribute (C<< data-perl-html-html5-parser-outlier >>) to
647				indicate its outlier status. Clearly, this is ignored when you use
648				"force_within" because no outliers are returned. Some outliers may
649				be XML::LibXML::Text elements; text nodes don't have attributes, so
650				these will not be marked with an attribute.
651
652				A last note is to mention what gets returned by this method. Normally
653				it's an L<XML::LibXML::DocumentFragment> object, but if you call the
654				method in list context, a list of the individual node elements is
655				returned. Alternatively you can request the data to be returned as an
656				L<XML::LibXML::NodeList> object:
657
658				# Get an XML::LibXML::NodeList
659				my $list = $parser->parse_balanced_chunk($str, {as=>'list'});
660
661				The exact implementation of this method may change from version to
662				version, but the long-term goal will be to approach how common
663				desktop browsers parse HTML fragments when implementing the setter
664				for DOM's C<innerHTML> attribute.
665
666				=back
667
668				The push parser and SAX-based parser are not supported. Trying
669				to change an option (such as recover_silently) will make
670				HTML::HTML5::Parser carp a warning. (But you can inspect the
671				options.)
672
673				=head2 Error Handling
674
675				Error handling is obviously different to XML::LibXML, as errors are
676				(bugs notwithstanding) non-fatal.
677
678				=over
679
680				=item C<error_handler>
681
682				Get/set an error handling function. Must be set to a coderef or undef.
683
684				The error handling function will be called with a single parameter, a
685				L<HTML::HTML5::Parser::Error> object.
686
687				=item C<errors>
688
689				Returns a list of errors that occurred during the last parse.
690
691				See L<HTML::HTML5::Parser::Error>.
692
693				=back
694
695				=head2 Additional Methods
696
697				The module provides a few methods to obtain additional, non-DOM data from
698				DOM nodes.
699
700				=over
701
702				=item C<dtd_public_id>
703
704				$pubid = $parser->dtd_public_id( $doc );
705
706				For an XML::LibXML::Document which has been returned by
707				HTML::HTML5::Parser, using this method will tell you the
708				Public Identifier of the DTD used (if any).
709
710				=item C<dtd_system_id>
711
712				$sysid = $parser->dtd_system_id( $doc );
713
714				For an XML::LibXML::Document which has been returned by
715				HTML::HTML5::Parser, using this method will tell you the
716				System Identifier of the DTD used (if any).
717
718				=item C<dtd_element>
719
720				$element = $parser->dtd_element( $doc );
721
722				For an XML::LibXML::Document which has been returned by
723				HTML::HTML5::Parser, using this method will tell you the
724				root element declared in the DTD used (if any). That is,
725				if the document has this doctype:
726
727				<!doctype html>
728
729				... it will return "html".
730
731				This may return the empty string if a DTD was present but
732				did not contain a root element; or undef if no DTD was
733				present.
734
735				=item C<compat_mode>
736
737				$mode = $parser->compat_mode( $doc );
738
739				Returns 'quirks', 'limited quirks' or undef (standards mode).
740
741				=item C<charset>
742
743				$charset = $parser->charset( $doc );
744
745				The character set apparently used by the document.
746
747				=item C<source_line>
748
749				($line, $col) = $parser->source_line( $node );
750				$line = $parser->source_line( $node );
751
752				In scalar context, C<source_line> returns the line number of the
753				source code that started a particular node (element, attribute or
754				comment).
755
756				In list context, returns a tuple: $line, $column, $implicitness.
757				Tab characters count as one column, not eight.
758
759				$implicitness indicates that the node was not explicitly marked
760				up in the source code, but its existence was inferred by the parser.
761				For example, in the following markup, the HTML, TITLE and P elements
762				are explicit, but the HEAD and BODY elements are implicit.
763
764				<html>
765				<title>I have an implicit head</title>
766				<p>And an implicit body too!</p>
767				</html>
768
769				(Note that implicit elements do still have a line number and column
770				number.) The implictness indicator is a new feature, and I'd appreciate
771				any bug reports where it gets things wrong.
772
773				L<XML::LibXML::Node> has a C<line_number> method. In general this
774				will always return 0 and HTML::HTML5::Parser has no way of influencing
775				it. However, if you install L<XML::LibXML::Devel::SetLineNumber> on
776				your system, the C<line_number> method will start working (at least for
777				elements).
778
779				=back
780
781				=head1 SEE ALSO
782
783				L<http://suika.fam.cx/www/markup/html/whatpm/Whatpm/HTML.html>.
784
785				L<HTML::HTML5::Writer>,
786				L<HTML::HTML5::Builder>,
787				L<XML::LibXML>,
788				L<XML::LibXML::PrettyPrint>,
789				L<XML::LibXML::Devel::SetLineNumber>.
790
791				=head1 AUTHOR
792
793				Toby Inkster, E<lt>tobyink@cpan.orgE<gt>
794
795				=head1 COPYRIGHT AND LICENCE
796
797				Copyright (C) 2007-2011 by Wakaba
798
799				Copyright (C) 2009-2012 by Toby Inkster
800
801				This library is free software; you can redistribute it and/or modify
802				it under the same terms as Perl itself.
803
804				=head1 DISCLAIMER OF WARRANTIES
805
806				THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
807				WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
808				MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
809