line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTML::Element::Convert; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
20551
|
use warnings; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
28
|
|
4
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
49
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
=head1 NAME |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
HTML::Element::Convert - Monkeypatch content conversion methods into HTML::Element |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=head1 VERSION |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
Version 0.10 |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=cut |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our $VERSION = '0.10'; |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
use HTML::TreeBulder; |
21
|
|
|
|
|
|
|
use HTML::Element::Convert; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
my $tree = HTML::TreeBuilder->new_from_content($html); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# Search for some JSON-encoded meta-data embedded in the document and extract it: |
26
|
|
|
|
|
|
|
my $element = $tree->look_down(...); |
27
|
|
|
|
|
|
|
my $hash = $element->extract_content; |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# This time extract the YAML data and delete the containing element from the tree: |
30
|
|
|
|
|
|
|
$element = $tree->look_down(...); |
31
|
|
|
|
|
|
|
$hash = $element->pull_content; |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# Convert the content of any with a 'lang="markdown"' attribute into HTML: |
34
|
|
|
|
|
|
|
$tree->convert_content; |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=over 4 |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=item $element->convert_content |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
Look for every div below $element containing a C attribute. If it recognizes C, it |
41
|
|
|
|
|
|
|
will convert and replace the div's content. |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
Currently, only markdown is supported. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=item $content = $element->extract_content([TYPE]) |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
Extract and parse the content of $element. If C is given, then this method will assume the content is of the given type, and try to parse it accordingly. Otherwise it will use the C attribute of $element to detemine the type. |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=item $content = $element->pull_content([TYPE]) |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
Like C, extract and parse the content of $element. It will also delete $element from the tree. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=back |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=cut |
56
|
|
|
|
|
|
|
|
57
|
1
|
|
|
1
|
|
5
|
use Carp; |
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
242
|
|
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
our %PARSE_FUNC; |
60
|
|
|
|
|
|
|
our %EXTRACT_FUNC; |
61
|
|
|
|
|
|
|
BEGIN { |
62
|
1
|
50
|
|
1
|
|
3
|
1 and do { eval { require Text::Markdown }; $PARSE_FUNC{markdown} = \&Text::Markdown::markdown unless $@ }; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
413
|
|
|
1
|
|
|
|
|
7
|
|
63
|
1
|
50
|
|
|
|
1
|
1 and do { eval { require JSON }; $PARSE_FUNC{JSON} = \&JSON::jsonToObj unless $@ }; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
312
|
|
|
1
|
|
|
|
|
4
|
|
64
|
1
|
50
|
|
|
|
2
|
1 and do { eval { require YAML::Syck }; $PARSE_FUNC{YAML} = \&YAML::Syck::Load unless $@ }; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
308
|
|
|
1
|
|
|
|
|
6
|
|
65
|
1
|
50
|
|
|
|
4
|
$PARSE_FUNC{YAML} or do { eval { require YAML }; $PARSE_FUNC{YAML} = \&YAML::Load unless $@ }; |
|
1
|
50
|
|
|
|
2
|
|
|
1
|
|
|
|
|
299
|
|
|
1
|
|
|
|
|
387
|
|
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
|
69
|
0
|
|
|
0
|
|
|
sub _as_text($) { return shift->as_text } |
70
|
0
|
0
|
|
0
|
|
|
sub _as_raw_HTML($) { return join '', map { if (ref $_) { $_ = $_->as_XML; chomp $_; $_ =~ s/"/'/g } $_ } shift->content_list } |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
|
72
|
0
|
|
|
0
|
|
|
sub _extract_text { return _as_text shift } |
73
|
|
|
|
|
|
|
$EXTRACT_FUNC{text} = \&_extract_text; |
74
|
|
|
|
|
|
|
|
75
|
0
|
|
|
0
|
|
|
sub _extract_YAML { return $PARSE_FUNC{YAML}->(_as_raw_HTML(shift) . "\n") } |
76
|
|
|
|
|
|
|
$EXTRACT_FUNC{YAML} = \&_extract_YAML; |
77
|
|
|
|
|
|
|
|
78
|
0
|
|
|
0
|
|
|
sub _extract_JSON { return $PARSE_FUNC{JSON}->(_as_raw_HTML shift) } |
79
|
|
|
|
|
|
|
$EXTRACT_FUNC{JSON} = \&_extract_JSON; |
80
|
|
|
|
|
|
|
|
81
|
0
|
|
|
0
|
|
|
sub _extract_markdown { return HTML::TreeBuilder->new_from_content($PARSE_FUNC{markdown}->(_as_text shift)) } |
82
|
|
|
|
|
|
|
$EXTRACT_FUNC{markdown} = \&_extract_markdown; |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub _extract { |
85
|
0
|
|
|
0
|
|
|
my $element = shift; |
86
|
0
|
|
|
|
|
|
my $type = shift; |
87
|
|
|
|
|
|
|
|
88
|
0
|
0
|
|
|
|
|
$type = "plain" if $type eq "text"; |
89
|
0
|
|
|
|
|
|
my $func; |
90
|
0
|
|
|
|
|
|
for (qw(plain JSON YAML markdown)) { |
91
|
0
|
0
|
|
|
|
|
if ($type =~ m/^$_$/i) { |
92
|
0
|
|
|
|
|
|
$func = $_; |
93
|
0
|
|
|
|
|
|
last; |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
|
97
|
0
|
0
|
|
|
|
|
return unless $func; |
98
|
0
|
|
|
|
|
|
return $EXTRACT_FUNC{$func}->($element); |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
package HTML::Element; |
102
|
|
|
|
|
|
|
|
103
|
1
|
|
|
1
|
|
5
|
use Carp; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
52
|
|
104
|
1
|
|
|
1
|
|
949
|
use UNIVERSAL; |
|
1
|
|
|
|
|
12
|
|
|
1
|
|
|
|
|
3
|
|
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub extract_content { |
107
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
108
|
0
|
|
|
|
|
|
my $type = shift; |
109
|
0
|
|
0
|
|
|
|
$type ||= $self->attr("lang"); |
110
|
0
|
|
0
|
|
|
|
$type ||= "text"; |
111
|
0
|
|
|
|
|
|
my $content; |
112
|
0
|
0
|
|
|
|
|
if (! $type) {} |
113
|
0
|
|
|
|
|
|
else { $content = HTML::Element::Convert::_extract($self, $type) } |
114
|
0
|
|
|
|
|
|
return $content; |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
sub convert_content { |
118
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
119
|
0
|
|
|
|
|
|
for (qw(markdown)) { |
120
|
0
|
|
|
|
|
|
my @elements = $self->look_down("_tag", "div", "lang", qr/$_/i); |
121
|
0
|
|
|
|
|
|
for my $element (@elements) { |
122
|
0
|
|
|
|
|
|
my $content = $element->extract_content; |
123
|
0
|
0
|
|
|
|
|
if (UNIVERSAL::can($content, "guts")) { |
124
|
0
|
|
|
|
|
|
my $new_element = $content->guts; |
125
|
0
|
0
|
|
|
|
|
if ($element eq $self) { |
126
|
0
|
|
|
|
|
|
$self->delete_content; |
127
|
0
|
|
|
|
|
|
$self->push_content($new_element->content_list); |
128
|
0
|
|
|
|
|
|
$self->attr("lang", undef); |
129
|
0
|
|
|
|
|
|
$new_element->delete; |
130
|
|
|
|
|
|
|
} |
131
|
|
|
|
|
|
|
else { |
132
|
0
|
|
|
|
|
|
$element->replace_with($new_element)->delete; |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub pull_content { |
140
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
141
|
0
|
|
|
|
|
|
my $content = $self->extract_content(@_); |
142
|
0
|
|
|
|
|
|
$self->delete; |
143
|
0
|
|
|
|
|
|
return $content; |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# TODO Check to see if we're using HTML::TreeBuilder::Select first... |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
# Alpha function |
149
|
|
|
|
|
|
|
sub _extract_child_content { |
150
|
0
|
|
|
0
|
|
|
my $self = shift; |
151
|
0
|
0
|
|
|
|
|
if (UNIVERSAL::can($self, "select")) { |
152
|
0
|
0
|
|
|
|
|
my $query = shift or croak "Need a query (a CSS selector or XPath)"; |
153
|
0
|
|
|
|
|
|
return $self->select($query => 'extract-content'); |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
# Alpha function |
158
|
|
|
|
|
|
|
sub _pull_child_content { |
159
|
0
|
|
|
0
|
|
|
my $self = shift; |
160
|
0
|
0
|
|
|
|
|
if (UNIVERSAL::can($self, "select")) { |
161
|
0
|
0
|
|
|
|
|
my $query = shift or croak "Need a query (a CSS selector or XPath)"; |
162
|
0
|
|
|
|
|
|
return $self->select($query => 'pull-content'); |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
=head1 AUTHOR |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
Robert Krimen, C<< >> |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=head1 BUGS |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
Please report any bugs or feature requests to |
174
|
|
|
|
|
|
|
C, or through the web interface at |
175
|
|
|
|
|
|
|
L. |
176
|
|
|
|
|
|
|
I will be notified, and then you'll automatically be notified of progress on |
177
|
|
|
|
|
|
|
your bug as I make changes. |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=head1 SUPPORT |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
perldoc HTML::Element::Convert |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
You can also look for information at: |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=over 4 |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
L |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=item * CPAN Ratings |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
L |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
L |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
=item * Search CPAN |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
L |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=back |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=head1 COPYRIGHT & LICENSE |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
Copyright 2007 Robert Krimen, all rights reserved. |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
214
|
|
|
|
|
|
|
under the same terms as Perl itself. |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
=cut |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
1; # End of HTML::Element::Convert |