File Coverage

blib/lib/HTML/Element/Convert.pm

Criterion	Covered	Total	%
statement	31	85	36.4
branch	5	32	15.6
condition	0	5	0.0
subroutine	6	18	33.3
pod	0	3	0.0
total	42	143	29.3

line	stmt	bran	cond	sub	pod	time	code
1							package HTML::Element::Convert;
2
3	1			1		20551	use warnings;
	1					3
	1					28
4	1			1		5	use strict;
	1					2
	1					49
5
6							=head1 NAME
7
8							HTML::Element::Convert - Monkeypatch content conversion methods into HTML::Element
9
10							=head1 VERSION
11
12							Version 0.10
13
14							=cut
15
16							our $VERSION = '0.10';
17
18							=head1 SYNOPSIS
19
20							use HTML::TreeBulder;
21							use HTML::Element::Convert;
22
23							my $tree = HTML::TreeBuilder->new_from_content($html);
24
25							# Search for some JSON-encoded meta-data embedded in the document and extract it:
26							my $element = $tree->look_down(...);
27							my $hash = $element->extract_content;
28
29							# This time extract the YAML data and delete the containing element from the tree:
30							$element = $tree->look_down(...);
31							$hash = $element->pull_content;
32
33							# Convert the content of any with a 'lang="markdown"' attribute into HTML:
34							$tree->convert_content;
35
36							=over 4
37
38							=item $element->convert_content
39
40							Look for every div below $element containing a C attribute. If it recognizes C, it
41							will convert and replace the div's content.
42
43							Currently, only markdown is supported.
44
45							=item $content = $element->extract_content([TYPE])
46
47							Extract and parse the content of $element. If C is given, then this method will assume the content is of the given type, and try to parse it accordingly. Otherwise it will use the C attribute of $element to detemine the type.
48
49							=item $content = $element->pull_content([TYPE])
50
51							Like C, extract and parse the content of $element. It will also delete $element from the tree.
52
53							=back
54
55							=cut
56
57	1			1		5	use Carp;
	1					6
	1					242
58
59							our %PARSE_FUNC;
60							our %EXTRACT_FUNC;
61							BEGIN {
62	1	50		1		3	1 and do { eval { require Text::Markdown }; $PARSE_FUNC{markdown} = \&Text::Markdown::markdown unless $@ };
	1					2
	1					413
	1					7
63	1	50				1	1 and do { eval { require JSON }; $PARSE_FUNC{JSON} = \&JSON::jsonToObj unless $@ };
	1					1
	1					312
	1					4
64	1	50				2	1 and do { eval { require YAML::Syck }; $PARSE_FUNC{YAML} = \&YAML::Syck::Load unless $@ };
	1					1
	1					308
	1					6
65	1	50				4	$PARSE_FUNC{YAML} or do { eval { require YAML }; $PARSE_FUNC{YAML} = \&YAML::Load unless $@ };
	1	50				2
	1					299
	1					387
66							}
67
68
69	0			0			sub _as_text($) { return shift->as_text }
70	0	0		0			sub _as_raw_HTML($) { return join '', map { if (ref $_) { $_ = $_->as_XML; chomp $_; $_ =~ s/"/'/g } $_ } shift->content_list }
	0
	0
	0
	0
	0
71
72	0			0			sub _extract_text { return _as_text shift }
73							$EXTRACT_FUNC{text} = \&_extract_text;
74
75	0			0			sub _extract_YAML { return $PARSE_FUNC{YAML}->(_as_raw_HTML(shift) . "\n") }
76							$EXTRACT_FUNC{YAML} = \&_extract_YAML;
77
78	0			0			sub _extract_JSON { return $PARSE_FUNC{JSON}->(_as_raw_HTML shift) }
79							$EXTRACT_FUNC{JSON} = \&_extract_JSON;
80
81	0			0			sub _extract_markdown { return HTML::TreeBuilder->new_from_content($PARSE_FUNC{markdown}->(_as_text shift)) }
82							$EXTRACT_FUNC{markdown} = \&_extract_markdown;
83
84							sub _extract {
85	0			0			my $element = shift;
86	0						my $type = shift;
87
88	0	0					$type = "plain" if $type eq "text";
89	0						my $func;
90	0						for (qw(plain JSON YAML markdown)) {
91	0	0					if ($type =~ m/^$_$/i) {
92	0						$func = $_;
93	0						last;
94							}
95							}
96
97	0	0					return unless $func;
98	0						return $EXTRACT_FUNC{$func}->($element);
99							}
100
101							package HTML::Element;
102
103	1			1		5	use Carp;
	1					1
	1					52
104	1			1		949	use UNIVERSAL;
	1					12
	1					3
105
106							sub extract_content {
107	0			0	0		my $self = shift;
108	0						my $type = shift;
109	0		0				$type \|\|= $self->attr("lang");
110	0		0				$type \|\|= "text";
111	0						my $content;
112	0	0					if (! $type) {}
113	0						else { $content = HTML::Element::Convert::_extract($self, $type) }
114	0						return $content;
115							}
116
117							sub convert_content {
118	0			0	0		my $self = shift;
119	0						for (qw(markdown)) {
120	0						my @elements = $self->look_down("_tag", "div", "lang", qr/$_/i);
121	0						for my $element (@elements) {
122	0						my $content = $element->extract_content;
123	0	0					if (UNIVERSAL::can($content, "guts")) {
124	0						my $new_element = $content->guts;
125	0	0					if ($element eq $self) {
126	0						$self->delete_content;
127	0						$self->push_content($new_element->content_list);
128	0						$self->attr("lang", undef);
129	0						$new_element->delete;
130							}
131							else {
132	0						$element->replace_with($new_element)->delete;
133							}
134							}
135							}
136							}
137							}
138
139							sub pull_content {
140	0			0	0		my $self = shift;
141	0						my $content = $self->extract_content(@_);
142	0						$self->delete;
143	0						return $content;
144							}
145
146							# TODO Check to see if we're using HTML::TreeBuilder::Select first...
147
148							# Alpha function
149							sub _extract_child_content {
150	0			0			my $self = shift;
151	0	0					if (UNIVERSAL::can($self, "select")) {
152	0	0					my $query = shift or croak "Need a query (a CSS selector or XPath)";
153	0						return $self->select($query => 'extract-content');
154							}
155							}
156
157							# Alpha function
158							sub _pull_child_content {
159	0			0			my $self = shift;
160	0	0					if (UNIVERSAL::can($self, "select")) {
161	0	0					my $query = shift or croak "Need a query (a CSS selector or XPath)";
162	0						return $self->select($query => 'pull-content');
163							}
164							}
165
166
167							=head1 AUTHOR
168
169							Robert Krimen, C<< >>
170
171							=head1 BUGS
172
173							Please report any bugs or feature requests to
174							C, or through the web interface at
175							L.
176							I will be notified, and then you'll automatically be notified of progress on
177							your bug as I make changes.
178
179							=head1 SUPPORT
180
181							You can find documentation for this module with the perldoc command.
182
183							perldoc HTML::Element::Convert
184
185							You can also look for information at:
186
187							=over 4
188
189							=item * AnnoCPAN: Annotated CPAN documentation
190
191							L
192
193							=item * CPAN Ratings
194
195							L
196
197							=item * RT: CPAN's request tracker
198
199							L
200
201							=item * Search CPAN
202
203							L
204
205							=back
206
207							=head1 ACKNOWLEDGEMENTS
208
209							=head1 COPYRIGHT & LICENSE
210
211							Copyright 2007 Robert Krimen, all rights reserved.
212
213							This program is free software; you can redistribute it and/or modify it
214							under the same terms as Perl itself.
215
216							=cut
217
218							1; # End of HTML::Element::Convert