blib/lib/DOM/Tiny.pm | |||
---|---|---|---|
Criterion | Covered | Total | % |
statement | 230 | 230 | 100.0 |
branch | 127 | 128 | 99.2 |
condition | 57 | 62 | 91.9 |
subroutine | 76 | 76 | 100.0 |
pod | 40 | 41 | 97.5 |
total | 530 | 537 | 98.7 |
line | stmt | bran | cond | sub | pod | time | code |
---|---|---|---|---|---|---|---|
1 | package DOM::Tiny; | ||||||
2 | |||||||
3 | 1 | 1 | 21879 | use strict; | |||
1 | 1 | ||||||
1 | 24 | ||||||
4 | 1 | 1 | 3 | use warnings; | |||
1 | 0 | ||||||
1 | 72 | ||||||
5 | |||||||
6 | use overload | ||||||
7 | 4 | 4 | 8 | '@{}' => sub { shift->child_nodes }, | |||
8 | 76 | 76 | 163 | '%{}' => sub { shift->attr }, | |||
9 | 103 | 103 | 142 | bool => sub {1}, | |||
10 | 105 | 105 | 3156 | '""' => sub { shift->to_string }, | |||
11 | 1 | 1 | 906 | fallback => 1; | |||
1 | 738 | ||||||
1 | 9 | ||||||
12 | |||||||
13 | 1 | 1 | 67 | use Carp 'croak'; | |||
1 | 1 | ||||||
1 | 54 | ||||||
14 | 1 | 1 | 295 | use DOM::Tiny::_Collection; | |||
1 | 1 | ||||||
1 | 20 | ||||||
15 | 1 | 1 | 337 | use DOM::Tiny::_CSS; | |||
1 | 2 | ||||||
1 | 25 | ||||||
16 | 1 | 1 | 346 | use DOM::Tiny::_HTML; | |||
1 | 2 | ||||||
1 | 37 | ||||||
17 | 1 | 1 | 5 | use Scalar::Util qw(blessed weaken); | |||
1 | 1 | ||||||
1 | 2079 | ||||||
18 | |||||||
19 | our $VERSION = '0.003'; | ||||||
20 | |||||||
21 | sub new { | ||||||
22 | 1884 | 1884 | 1 | 5193 | my $class = shift; | ||
23 | 1884 | 66 | 3994 | my $self = bless \DOM::Tiny::_HTML->new, ref $class || $class; | |||
24 | 1884 | 100 | 5258 | return @_ ? $self->parse(@_) : $self; | |||
25 | } | ||||||
26 | |||||||
27 | 1 | 1 | 0 | 77 | sub TO_JSON { shift->_delegate('render') } | ||
28 | |||||||
29 | 27 | 27 | 1 | 96 | sub all_text { shift->_all_text(1, @_) } | ||
30 | |||||||
31 | 9 | 9 | 1 | 24 | sub ancestors { _select($_[0]->_collect($_[0]->_ancestors), $_[1]) } | ||
32 | |||||||
33 | 7 | 7 | 1 | 26 | sub append { shift->_add(1, @_) } | ||
34 | 9 | 9 | 1 | 25 | sub append_content { shift->_content(1, 0, @_) } | ||
35 | |||||||
36 | sub at { | ||||||
37 | 453 | 453 | 1 | 1239 | my $self = shift; | ||
38 | 453 | 100 | 795 | return undef unless my $result = $self->_css->select_one(@_); | |||
39 | 418 | 1836 | return $self->_build($result, $self->xml); | ||||
40 | } | ||||||
41 | |||||||
42 | sub attr { | ||||||
43 | 149 | 149 | 1 | 178 | my $self = shift; | ||
44 | |||||||
45 | # Hash | ||||||
46 | 149 | 247 | my $tree = $self->tree; | ||||
47 | 149 | 100 | 389 | my $attrs = $tree->[0] ne 'tag' ? {} : $tree->[2]; | |||
48 | 149 | 100 | 723 | return $attrs unless @_; | |||
49 | |||||||
50 | # Get | ||||||
51 | 40 | 100 | 100 | 252 | return $attrs->{$_[0]} unless @_ > 1 || ref $_[0]; | ||
52 | |||||||
53 | # Set | ||||||
54 | 4 | 100 | 14 | my $values = ref $_[0] ? $_[0] : {@_}; | |||
55 | 4 | 14 | @$attrs{keys %$values} = values %$values; | ||||
56 | |||||||
57 | 4 | 18 | return $self; | ||||
58 | } | ||||||
59 | |||||||
60 | 58 | 58 | 1 | 121 | sub child_nodes { $_[0]->_collect(_nodes($_[0]->tree)) } | ||
61 | |||||||
62 | 13 | 13 | 1 | 39 | sub children { _select($_[0]->_collect(_nodes($_[0]->tree, 1)), $_[1]) } | ||
63 | |||||||
64 | sub content { | ||||||
65 | 53 | 53 | 1 | 58 | my $self = shift; | ||
66 | |||||||
67 | 53 | 70 | my $type = $self->type; | ||||
68 | 53 | 100 | 100 | 184 | if ($type eq 'root' || $type eq 'tag') { | ||
69 | 22 | 100 | 56 | return $self->_content(0, 1, @_) if @_; | |||
70 | 7 | 14 | my $html = DOM::Tiny::_HTML->new(xml => $self->xml); | ||||
71 | 7 | 14 | return join '', map { $html->tree($_)->render } _nodes($self->tree); | ||||
12 | 21 | ||||||
72 | } | ||||||
73 | |||||||
74 | 31 | 100 | 77 | return $self->tree->[1] unless @_; | |||
75 | 3 | 7 | $self->tree->[1] = shift; | ||||
76 | 3 | 7 | return $self; | ||||
77 | } | ||||||
78 | |||||||
79 | 13 | 13 | 1 | 36 | sub descendant_nodes { $_[0]->_collect(_all(_nodes($_[0]->tree))) } | ||
80 | |||||||
81 | 389 | 389 | 1 | 1936 | sub find { $_[0]->_collect(@{$_[0]->_css->select($_[1])}) } | ||
389 | 851 | ||||||
82 | |||||||
83 | 8 | 8 | 1 | 14 | sub following { _select($_[0]->_collect(@{$_[0]->_siblings(1)->[1]}), $_[1]) } | ||
8 | 17 | ||||||
84 | 7 | 7 | 1 | 6 | sub following_nodes { $_[0]->_collect(@{$_[0]->_siblings->[1]}) } | ||
7 | 13 | ||||||
85 | |||||||
86 | 29 | 29 | 1 | 44 | sub matches { shift->_css->matches(@_) } | ||
87 | |||||||
88 | sub namespace { | ||||||
89 | 18 | 18 | 1 | 23 | my $self = shift; | ||
90 | |||||||
91 | 18 | 100 | 24 | return undef if (my $tree = $self->tree)->[0] ne 'tag'; | |||
92 | |||||||
93 | # Extract namespace prefix and search parents | ||||||
94 | 16 | 100 | 63 | my $ns = $tree->[1] =~ /^(.*?):/ ? "xmlns:$1" : undef; | |||
95 | 16 | 29 | for my $node ($tree, $self->_ancestors) { | ||||
96 | |||||||
97 | # Namespace for prefix | ||||||
98 | 35 | 28 | my $attrs = $node->[2]; | ||||
99 | 35 | 100 | 100 | 58 | if ($ns) { $_ eq $ns and return $attrs->{$_} for keys %$attrs } | ||
13 | 100 | 53 | |||||
100 | |||||||
101 | # Namespace attribute | ||||||
102 | 10 | 41 | elsif (defined $attrs->{xmlns}) { return $attrs->{xmlns} } | ||||
103 | } | ||||||
104 | |||||||
105 | 1 | 5 | return undef; | ||||
106 | } | ||||||
107 | |||||||
108 | 13 | 13 | 1 | 26 | sub next { $_[0]->_maybe($_[0]->_siblings(1, 0)->[1]) } | ||
109 | 5 | 5 | 1 | 9 | sub next_node { $_[0]->_maybe($_[0]->_siblings(0, 0)->[1]) } | ||
110 | |||||||
111 | sub parent { | ||||||
112 | 101 | 101 | 1 | 86 | my $self = shift; | ||
113 | 101 | 100 | 119 | return undef if $self->tree->[0] eq 'root'; | |||
114 | 95 | 137 | return $self->_build($self->_parent, $self->xml); | ||||
115 | } | ||||||
116 | |||||||
117 | 133 | 133 | 1 | 302 | sub parse { shift->_delegate(parse => @_) } | ||
118 | |||||||
119 | 5 | 5 | 1 | 7 | sub preceding { _select($_[0]->_collect(@{$_[0]->_siblings(1)->[0]}), $_[1]) } | ||
5 | 8 | ||||||
120 | 7 | 7 | 1 | 7 | sub preceding_nodes { $_[0]->_collect(@{$_[0]->_siblings->[0]}) } | ||
7 | 13 | ||||||
121 | |||||||
122 | 8 | 8 | 1 | 17 | sub prepend { shift->_add(0, @_) } | ||
123 | 4 | 4 | 1 | 13 | sub prepend_content { shift->_content(0, 0, @_) } | ||
124 | |||||||
125 | 7 | 7 | 1 | 18 | sub previous { $_[0]->_maybe($_[0]->_siblings(1, -1)->[0]) } | ||
126 | 5 | 5 | 1 | 10 | sub previous_node { $_[0]->_maybe($_[0]->_siblings(0, -1)->[0]) } | ||
127 | |||||||
128 | 6 | 6 | 1 | 12 | sub remove { shift->replace('') } | ||
129 | |||||||
130 | sub replace { | ||||||
131 | 22 | 22 | 1 | 38 | my ($self, $new) = @_; | ||
132 | 22 | 100 | 27 | return $self->parse($new) if (my $tree = $self->tree)->[0] eq 'root'; | |||
133 | 14 | 20 | return $self->_replace($self->_parent, $tree, _nodes($self->_parse($new))); | ||||
134 | } | ||||||
135 | |||||||
136 | sub root { | ||||||
137 | 11 | 11 | 1 | 15 | my $self = shift; | ||
138 | 11 | 100 | 24 | return $self unless my $tree = $self->_ancestors(1); | |||
139 | 8 | 18 | return $self->_build($tree, $self->xml); | ||||
140 | } | ||||||
141 | |||||||
142 | sub strip { | ||||||
143 | 9 | 9 | 1 | 15 | my $self = shift; | ||
144 | 9 | 100 | 16 | return $self if (my $tree = $self->tree)->[0] ne 'tag'; | |||
145 | 7 | 16 | return $self->_replace($tree->[3], $tree, _nodes($tree)); | ||||
146 | } | ||||||
147 | |||||||
148 | sub tag { | ||||||
149 | 85 | 85 | 1 | 247 | my ($self, $tag) = @_; | ||
150 | 85 | 100 | 133 | return undef if (my $tree = $self->tree)->[0] ne 'tag'; | |||
151 | 83 | 100 | 381 | return $tree->[1] unless $tag; | |||
152 | 1 | 4 | $tree->[1] = $tag; | ||||
153 | 1 | 5 | return $self; | ||||
154 | } | ||||||
155 | |||||||
156 | 1 | 1 | 1 | 4 | sub tap { DOM::Tiny::_Collection::tap(@_) } | ||
157 | |||||||
158 | 630 | 630 | 1 | 1759 | sub text { shift->_all_text(0, @_) } | ||
159 | |||||||
160 | 112 | 112 | 1 | 202 | sub to_string { shift->_delegate('render') } | ||
161 | |||||||
162 | 5687 | 5687 | 1 | 7547 | sub tree { shift->_delegate(tree => @_) } | ||
163 | |||||||
164 | 878 | 878 | 1 | 1087 | sub type { shift->tree->[0] } | ||
165 | |||||||
166 | sub val { | ||||||
167 | 25 | 25 | 1 | 30 | my $self = shift; | ||
168 | |||||||
169 | # "option" | ||||||
170 | 25 | 100 | 48 | return defined($self->{value}) ? $self->{value} : $self->text | |||
100 | |||||||
171 | if (my $tag = $self->tag) eq 'option'; | ||||||
172 | |||||||
173 | # "input" ("type=checkbox" and "type=radio") | ||||||
174 | 16 | 100 | 43 | my $type = $self->{type} || ''; | |||
175 | 16 | 100 | 100 | 76 | return defined $self->{value} ? $self->{value} : 'on' | ||
100 | 66 | ||||||
176 | if $tag eq 'input' && ($type eq 'radio' || $type eq 'checkbox'); | ||||||
177 | |||||||
178 | # "textarea", "input" or "button" | ||||||
179 | 11 | 100 | 39 | return $tag eq 'textarea' ? $self->text : $self->{value} if $tag ne 'select'; | |||
100 | |||||||
180 | |||||||
181 | # "select" | ||||||
182 | 4 | 13 | my $v = $self->find('option:checked')->map('val'); | ||||
183 | 4 | 100 | 29 | return exists $self->{multiple} ? $v->size ? $v->to_array : undef : $v->last; | |||
100 | |||||||
184 | } | ||||||
185 | |||||||
186 | 7 | 7 | 1 | 24 | sub wrap { shift->_wrap(0, @_) } | ||
187 | 5 | 5 | 1 | 16 | sub wrap_content { shift->_wrap(1, @_) } | ||
188 | |||||||
189 | 2907 | 2907 | 1 | 4419 | sub xml { shift->_delegate(xml => @_) } | ||
190 | |||||||
191 | sub _add { | ||||||
192 | 15 | 15 | 22 | my ($self, $offset, $new) = @_; | |||
193 | |||||||
194 | 15 | 100 | 22 | return $self if (my $tree = $self->tree)->[0] eq 'root'; | |||
195 | |||||||
196 | 11 | 19 | my $parent = $self->_parent; | ||||
197 | 11 | 712 | splice @$parent, _offset($parent, $tree) + $offset, 0, | ||||
198 | _link($parent, _nodes($self->_parse($new))); | ||||||
199 | |||||||
200 | 11 | 49 | return $self; | ||||
201 | } | ||||||
202 | |||||||
203 | sub _all { | ||||||
204 | 21 | 100 | 21 | 28 | map { $_->[0] eq 'tag' ? ($_, _all(_nodes($_))) : ($_) } @_; | ||
60 | 125 | ||||||
205 | } | ||||||
206 | |||||||
207 | sub _all_text { | ||||||
208 | 657 | 657 | 812 | my ($self, $recurse, $trim) = @_; | |||
209 | |||||||
210 | # Detect "pre" tag | ||||||
211 | 657 | 918 | my $tree = $self->tree; | ||||
212 | 657 | 100 | 1312 | $trim = 1 unless defined $trim; | |||
213 | 657 | 100 | 100 | 3436 | map { $_->[1] eq 'pre' and $trim = 0 } $self->_ancestors, $tree | ||
1461 | 100 | 3300 | |||||
214 | if $trim && $tree->[0] ne 'root'; | ||||||
215 | |||||||
216 | 657 | 1268 | return _text([_nodes($tree)], $recurse, $trim); | ||||
217 | } | ||||||
218 | |||||||
219 | sub _ancestors { | ||||||
220 | 677 | 677 | 711 | my ($self, $root) = @_; | |||
221 | |||||||
222 | 677 | 100 | 1027 | return () unless my $tree = $self->_parent; | |||
223 | 674 | 714 | my @ancestors; | ||||
224 | 674 | 66 | 628 | do { push @ancestors, $tree } | |||
1546 | 4783 | ||||||
225 | while ($tree->[0] eq 'tag') && ($tree = $tree->[3]); | ||||||
226 | 674 | 100 | 2275 | return $root ? $ancestors[-1] : @ancestors[0 .. $#ancestors - 1]; | |||
227 | } | ||||||
228 | |||||||
229 | 1759 | 1759 | 2402 | sub _build { shift->new->tree(shift)->xml(shift) } | |||
230 | |||||||
231 | sub _collect { | ||||||
232 | 509 | 509 | 504 | my $self = shift; | |||
233 | 509 | 854 | my $xml = $self->xml; | ||||
234 | 509 | 835 | return DOM::Tiny::_Collection->new(map { $self->_build($_, $xml) } @_); | ||||
1218 | 1784 | ||||||
235 | } | ||||||
236 | |||||||
237 | sub _content { | ||||||
238 | 28 | 28 | 49 | my ($self, $start, $offset, $new) = @_; | |||
239 | |||||||
240 | 28 | 39 | my $tree = $self->tree; | ||||
241 | 28 | 100 | 100 | 114 | unless ($tree->[0] eq 'root' || $tree->[0] eq 'tag') { | ||
242 | 2 | 4 | my $old = $self->content; | ||||
243 | 2 | 100 | 9 | return $self->content($start ? $old . $new : $new . $old); | |||
244 | } | ||||||
245 | |||||||
246 | 26 | 100 | 52 | $start = $start ? ($#$tree + 1) : _start($tree); | |||
247 | 26 | 100 | 45 | $offset = $offset ? $#$tree : 0; | |||
248 | 26 | 54 | splice @$tree, $start, $offset, _link($tree, _nodes($self->_parse($new))); | ||||
249 | |||||||
250 | 26 | 108 | return $self; | ||||
251 | } | ||||||
252 | |||||||
253 | 871 | 871 | 1593 | sub _css { DOM::Tiny::_CSS->new(tree => shift->tree) } | |||
254 | |||||||
255 | sub _delegate { | ||||||
256 | 8840 | 8840 | 8456 | my ($self, $method) = (shift, shift); | |||
257 | 8840 | 100 | 22799 | return $$self->$method unless @_; | |||
258 | 3657 | 7728 | $$self->$method(@_); | ||||
259 | 3657 | 8735 | return $self; | ||||
260 | } | ||||||
261 | |||||||
262 | sub _link { | ||||||
263 | 74 | 74 | 86 | my ($parent, @children) = @_; | |||
264 | |||||||
265 | # Link parent to children | ||||||
266 | 74 | 107 | for my $node (@children) { | ||||
267 | 79 | 100 | 135 | my $offset = $node->[0] eq 'tag' ? 3 : 2; | |||
268 | 79 | 93 | $node->[$offset] = $parent; | ||||
269 | 79 | 167 | weaken $node->[$offset]; | ||||
270 | } | ||||||
271 | |||||||
272 | 74 | 210 | return @children; | ||||
273 | } | ||||||
274 | |||||||
275 | 30 | 100 | 30 | 82 | sub _maybe { $_[1] ? $_[0]->_build($_[1], $_[0]->xml) : undef } | ||
276 | |||||||
277 | sub _nodes { | ||||||
278 | 1029 | 50 | 1029 | 1882 | return () unless my $tree = shift; | ||
279 | 1029 | 1579 | my @nodes = @$tree[_start($tree) .. $#$tree]; | ||||
280 | 1029 | 100 | 3374 | return shift() ? grep { $_->[0] eq 'tag' } @nodes : @nodes; | |||
72 | 184 | ||||||
281 | } | ||||||
282 | |||||||
283 | sub _offset { | ||||||
284 | 37 | 37 | 41 | my ($parent, $child) = @_; | |||
285 | 37 | 41 | my $i = _start($parent); | ||||
286 | 37 | 100 | 171 | $_ eq $child ? last : $i++ for @$parent[$i .. $#$parent]; | |||
287 | 37 | 85 | return $i; | ||||
288 | } | ||||||
289 | |||||||
290 | 802 | 100 | 802 | 1184 | sub _parent { $_[0]->tree->[$_[0]->type eq 'tag' ? 3 : 2] } | ||
291 | |||||||
292 | 61 | 61 | 104 | sub _parse { DOM::Tiny::_HTML->new(xml => shift->xml)->parse(shift)->tree } | |||
293 | |||||||
294 | sub _replace { | ||||||
295 | 26 | 26 | 35 | my ($self, $parent, $child, @nodes) = @_; | |||
296 | 26 | 70 | splice @$parent, _offset($parent, $child), 1, _link($parent, @nodes); | ||||
297 | 26 | 63 | return $self->parent; | ||||
298 | } | ||||||
299 | |||||||
300 | sub _select { | ||||||
301 | 35 | 35 | 65 | my ($collection, $selector) = @_; | |||
302 | 35 | 100 | 112 | return $collection unless $selector; | |||
303 | 15 | 25 | return $collection->new(grep { $_->matches($selector) } @$collection); | ||||
26 | 56 | ||||||
304 | } | ||||||
305 | |||||||
306 | sub _siblings { | ||||||
307 | 57 | 57 | 57 | my ($self, $tags, $i) = @_; | |||
308 | |||||||
309 | 57 | 100 | 90 | return [] unless my $parent = $self->parent; | |||
310 | |||||||
311 | 51 | 74 | my $tree = $self->tree; | ||||
312 | 51 | 42 | my (@before, @after, $match); | ||||
313 | 51 | 62 | for my $node (_nodes($parent->tree)) { | ||||
314 | 284 | 100 | 50 | 668 | ++$match and next if !$match && $node eq $tree; | ||
100 | |||||||
315 | 233 | 100 | 100 | 534 | next if $tags && $node->[0] ne 'tag'; | ||
316 | 115 | 100 | 162 | $match ? push @after, $node : push @before, $node; | |||
317 | } | ||||||
318 | |||||||
319 | 51 | 100 | 205 | return defined $i ? [$before[$i], $after[$i]] : [\@before, \@after]; | |||
320 | } | ||||||
321 | |||||||
322 | sub _squish { | ||||||
323 | 675 | 675 | 647 | my $str = shift; | |||
324 | 675 | 2605 | $str =~ s/^\s+//; | ||||
325 | 675 | 1266 | $str =~ s/\s+$//; | ||||
326 | 675 | 713 | $str =~ s/\s+/ /g; | ||||
327 | 675 | 1142 | return $str; | ||||
328 | } | ||||||
329 | |||||||
330 | 1087 | 100 | 1087 | 3110 | sub _start { $_[0][0] eq 'root' ? 1 : 4 } | ||
331 | |||||||
332 | sub _text { | ||||||
333 | 790 | 790 | 939 | my ($nodes, $recurse, $trim) = @_; | |||
334 | |||||||
335 | # Merge successive text nodes | ||||||
336 | 790 | 655 | my $i = 0; | ||||
337 | 790 | 2134 | while (my $next = $nodes->[$i + 1]) { | ||||
338 | 167 | 100 | 50 | 2002 | ++$i and next unless $nodes->[$i][0] eq 'text' && $next->[0] eq 'text'; | ||
100 | |||||||
339 | 15 | 64 | splice @$nodes, $i, 2, ['text', $nodes->[$i][1] . $next->[1]]; | ||||
340 | } | ||||||
341 | |||||||
342 | 790 | 1093 | my $text = ''; | ||||
343 | 790 | 1038 | for my $node (@$nodes) { | ||||
344 | 932 | 956 | my $type = $node->[0]; | ||||
345 | |||||||
346 | # Text | ||||||
347 | 932 | 796 | my $chunk = ''; | ||||
348 | 932 | 100 | 100 | 2514 | if ($type eq 'text') { $chunk = $trim ? _squish $node->[1] : $node->[1] } | ||
703 | 100 | 100 | 1549 | ||||
100 | |||||||
100 | |||||||
349 | |||||||
350 | # CDATA or raw text | ||||||
351 | 33 | 52 | elsif ($type eq 'cdata' || $type eq 'raw') { $chunk = $node->[1] } | ||||
352 | |||||||
353 | # Nested tag | ||||||
354 | elsif ($type eq 'tag' && $recurse) { | ||||||
355 | 1 | 1 | 5 | no warnings 'recursion'; | |||
1 | 1 | ||||||
1 | 454 | ||||||
356 | 133 | 100 | 198 | $chunk = _text([_nodes($node)], 1, $node->[1] eq 'pre' ? 0 : $trim); | |||
357 | } | ||||||
358 | |||||||
359 | # Add leading whitespace if punctuation allows it | ||||||
360 | 932 | 100 | 100 | 2458 | $chunk = " $chunk" if $text =~ /\S\z/ && $chunk =~ /^[^.!?,;:\s]+/; | ||
361 | |||||||
362 | # Trim whitespace blocks | ||||||
363 | 932 | 100 | 100 | 3894 | $text .= $chunk if $chunk =~ /\S+/ || !$trim; | ||
364 | } | ||||||
365 | |||||||
366 | 790 | 3311 | return $text; | ||||
367 | } | ||||||
368 | |||||||
369 | sub _wrap { | ||||||
370 | 12 | 12 | 23 | my ($self, $content, $new) = @_; | |||
371 | |||||||
372 | 12 | 100 | 100 | 23 | return $self if (my $tree = $self->tree)->[0] eq 'root' && !$content; | ||
373 | 11 | 100 | 100 | 72 | return $self if $tree->[0] ne 'root' && $tree->[0] ne 'tag' && $content; | ||
100 | |||||||
374 | |||||||
375 | # Find innermost tag | ||||||
376 | 10 | 11 | my $current; | ||||
377 | 10 | 31 | my $first = $new = $self->_parse($new); | ||||
378 | 10 | 44 | $current = $first while $first = (_nodes($first, 1))[0]; | ||||
379 | 10 | 100 | 33 | return $self unless $current; | |||
380 | |||||||
381 | # Wrap content | ||||||
382 | 8 | 100 | 17 | if ($content) { | |||
383 | 3 | 8 | push @$current, _link($current, _nodes($tree)); | ||||
384 | 3 | 9 | splice @$tree, _start($tree), $#$tree, _link($tree, _nodes($new)); | ||||
385 | 3 | 20 | return $self; | ||||
386 | } | ||||||
387 | |||||||
388 | # Wrap element | ||||||
389 | 5 | 14 | $self->_replace($self->_parent, $tree, _nodes($new)); | ||||
390 | 5 | 19 | push @$current, _link($current, $tree); | ||||
391 | 5 | 27 | return $self; | ||||
392 | } | ||||||
393 | |||||||
394 | 1; | ||||||
395 | |||||||
396 | =encoding utf8 | ||||||
397 | |||||||
398 | =head1 NAME | ||||||
399 | |||||||
400 | DOM::Tiny - Minimalistic HTML/XML DOM parser with CSS selectors | ||||||
401 | |||||||
402 | =head1 SYNOPSIS | ||||||
403 | |||||||
404 | use DOM::Tiny; | ||||||
405 | |||||||
406 | # Parse | ||||||
407 | my $dom = DOM::Tiny->new(' Test 123 |
||||||
408 | |||||||
409 | # Find | ||||||
410 | say $dom->at('#b')->text; | ||||||
411 | say $dom->find('p')->map('text')->join("\n"); | ||||||
412 | say $dom->find('[id]')->map(attr => 'id')->join("\n"); | ||||||
413 | |||||||
414 | # Iterate | ||||||
415 | $dom->find('p[id]')->reverse->each(sub { say $_->{id} }); | ||||||
416 | |||||||
417 | # Loop | ||||||
418 | for my $e ($dom->find('p[id]')->each) { | ||||||
419 | say $e->{id}, ':', $e->text; | ||||||
420 | } | ||||||
421 | |||||||
422 | # Modify | ||||||
423 | $dom->find('div p')->last->append(' 456 '); |
||||||
424 | $dom->find(':not(p)')->map('strip'); | ||||||
425 | |||||||
426 | # Render | ||||||
427 | say "$dom"; | ||||||
428 | |||||||
429 | =head1 DESCRIPTION | ||||||
430 | |||||||
431 | L |
||||||
432 | on L |
||||||
433 | and L |
||||||
434 | matching based on L |
||||||
435 | even try to interpret broken HTML and XML, so you should not use it for | ||||||
436 | validation. | ||||||
437 | |||||||
438 | =head1 NODES AND ELEMENTS | ||||||
439 | |||||||
440 | When we parse an HTML/XML fragment, it gets turned into a tree of nodes. | ||||||
441 | |||||||
442 | |||||||
443 | |||||||
444 | |
||||||
445 | World! | ||||||
446 | |||||||
447 | |||||||
448 | There are currently eight different kinds of nodes, C |
||||||
449 | C |
||||||
450 | the type C |
||||||
451 | |||||||
452 | root | ||||||
453 | |- doctype (html) | ||||||
454 | +- tag (html) | ||||||
455 | |- tag (head) | ||||||
456 | | +- tag (title) | ||||||
457 | | +- raw (Hello) | ||||||
458 | +- tag (body) | ||||||
459 | +- text (World!) | ||||||
460 | |||||||
461 | While all node types are represented as L |
||||||
462 | L"attr"> and L"namespace"> only apply to elements. | ||||||
463 | |||||||
464 | =head1 CASE-SENSITIVITY | ||||||
465 | |||||||
466 | L |
||||||
467 | names are lowercased and selectors need to be lowercase as well. | ||||||
468 | |||||||
469 | # HTML semantics | ||||||
470 | my $dom = DOM::Tiny->new(' Hi! '); |
||||||
471 | say $dom->at('p[id]')->text; | ||||||
472 | |||||||
473 | If an XML declaration is found, the parser will automatically switch into XML | ||||||
474 | mode and everything becomes case-sensitive. | ||||||
475 | |||||||
476 | # XML semantics | ||||||
477 | my $dom = DOM::Tiny->new(' Hi! '); |
||||||
478 | say $dom->at('P[ID]')->text; | ||||||
479 | |||||||
480 | XML detection can also be disabled with the L"xml"> method. | ||||||
481 | |||||||
482 | # Force XML semantics | ||||||
483 | my $dom = DOM::Tiny->new->xml(1)->parse(' Hi! '); |
||||||
484 | say $dom->at('P[ID]')->text; | ||||||
485 | |||||||
486 | # Force HTML semantics | ||||||
487 | my $dom = DOM::Tiny->new->xml(0)->parse(' Hi! '); |
||||||
488 | say $dom->at('p[id]')->text; | ||||||
489 | |||||||
490 | =head1 SELECTORS | ||||||
491 | |||||||
492 | L |
||||||
493 | selectors that make sense for a standalone parser are supported. | ||||||
494 | |||||||
495 | =over | ||||||
496 | |||||||
497 | =item Z<>* | ||||||
498 | |||||||
499 | Any element. | ||||||
500 | |||||||
501 | my $all = $dom->find('*'); | ||||||
502 | |||||||
503 | =item E | ||||||
504 | |||||||
505 | An element of type C |
||||||
506 | |||||||
507 | my $title = $dom->at('title'); | ||||||
508 | |||||||
509 | =item E[foo] | ||||||
510 | |||||||
511 | An C |
||||||
512 | |||||||
513 | my $links = $dom->find('a[href]'); | ||||||
514 | |||||||
515 | =item E[foo="bar"] | ||||||
516 | |||||||
517 | An C |
||||||
518 | |||||||
519 | my $case_sensitive = $dom->find('input[type="hidden"]'); | ||||||
520 | my $case_sensitive = $dom->find('input[type=hidden]'); | ||||||
521 | |||||||
522 | =item E[foo="bar" i] | ||||||
523 | |||||||
524 | An C |
||||||
525 | (ASCII-range) case-permutation of C |
||||||
526 | EXPERIMENTAL and might change without warning! | ||||||
527 | |||||||
528 | my $case_insensitive = $dom->find('input[type="hidden" i]'); | ||||||
529 | my $case_insensitive = $dom->find('input[type=hidden i]'); | ||||||
530 | my $case_insensitive = $dom->find('input[class~="foo" i]'); | ||||||
531 | |||||||
532 | This selector is part of | ||||||
533 | L |
||||||
534 | in progress. | ||||||
535 | |||||||
536 | =item E[foo~="bar"] | ||||||
537 | |||||||
538 | An C |
||||||
539 | values, one of which is exactly equal to C |
||||||
540 | |||||||
541 | my $foo = $dom->find('input[class~="foo"]'); | ||||||
542 | my $foo = $dom->find('input[class~=foo]'); | ||||||
543 | |||||||
544 | =item E[foo^="bar"] | ||||||
545 | |||||||
546 | An C |
||||||
547 | C |
||||||
548 | |||||||
549 | my $begins_with = $dom->find('input[name^="f"]'); | ||||||
550 | my $begins_with = $dom->find('input[name^=f]'); | ||||||
551 | |||||||
552 | =item E[foo$="bar"] | ||||||
553 | |||||||
554 | An C |
||||||
555 | C |
||||||
556 | |||||||
557 | my $ends_with = $dom->find('input[name$="o"]'); | ||||||
558 | my $ends_with = $dom->find('input[name$=o]'); | ||||||
559 | |||||||
560 | =item E[foo*="bar"] | ||||||
561 | |||||||
562 | An C |
||||||
563 | |||||||
564 | my $contains = $dom->find('input[name*="fo"]'); | ||||||
565 | my $contains = $dom->find('input[name*=fo]'); | ||||||
566 | |||||||
567 | =item E:root | ||||||
568 | |||||||
569 | An C |
||||||
570 | |||||||
571 | my $root = $dom->at(':root'); | ||||||
572 | |||||||
573 | =item E:nth-child(n) | ||||||
574 | |||||||
575 | An C |
||||||
576 | |||||||
577 | my $third = $dom->find('div:nth-child(3)'); | ||||||
578 | my $odd = $dom->find('div:nth-child(odd)'); | ||||||
579 | my $even = $dom->find('div:nth-child(even)'); | ||||||
580 | my $top3 = $dom->find('div:nth-child(-n+3)'); | ||||||
581 | |||||||
582 | =item E:nth-last-child(n) | ||||||
583 | |||||||
584 | An C |
||||||
585 | |||||||
586 | my $third = $dom->find('div:nth-last-child(3)'); | ||||||
587 | my $odd = $dom->find('div:nth-last-child(odd)'); | ||||||
588 | my $even = $dom->find('div:nth-last-child(even)'); | ||||||
589 | my $bottom3 = $dom->find('div:nth-last-child(-n+3)'); | ||||||
590 | |||||||
591 | =item E:nth-of-type(n) | ||||||
592 | |||||||
593 | An C |
||||||
594 | |||||||
595 | my $third = $dom->find('div:nth-of-type(3)'); | ||||||
596 | my $odd = $dom->find('div:nth-of-type(odd)'); | ||||||
597 | my $even = $dom->find('div:nth-of-type(even)'); | ||||||
598 | my $top3 = $dom->find('div:nth-of-type(-n+3)'); | ||||||
599 | |||||||
600 | =item E:nth-last-of-type(n) | ||||||
601 | |||||||
602 | An C |
||||||
603 | |||||||
604 | my $third = $dom->find('div:nth-last-of-type(3)'); | ||||||
605 | my $odd = $dom->find('div:nth-last-of-type(odd)'); | ||||||
606 | my $even = $dom->find('div:nth-last-of-type(even)'); | ||||||
607 | my $bottom3 = $dom->find('div:nth-last-of-type(-n+3)'); | ||||||
608 | |||||||
609 | =item E:first-child | ||||||
610 | |||||||
611 | An C |
||||||
612 | |||||||
613 | my $first = $dom->find('div p:first-child'); | ||||||
614 | |||||||
615 | =item E:last-child | ||||||
616 | |||||||
617 | An C |
||||||
618 | |||||||
619 | my $last = $dom->find('div p:last-child'); | ||||||
620 | |||||||
621 | =item E:first-of-type | ||||||
622 | |||||||
623 | An C |
||||||
624 | |||||||
625 | my $first = $dom->find('div p:first-of-type'); | ||||||
626 | |||||||
627 | =item E:last-of-type | ||||||
628 | |||||||
629 | An C |
||||||
630 | |||||||
631 | my $last = $dom->find('div p:last-of-type'); | ||||||
632 | |||||||
633 | =item E:only-child | ||||||
634 | |||||||
635 | An C |
||||||
636 | |||||||
637 | my $lonely = $dom->find('div p:only-child'); | ||||||
638 | |||||||
639 | =item E:only-of-type | ||||||
640 | |||||||
641 | An C |
||||||
642 | |||||||
643 | my $lonely = $dom->find('div p:only-of-type'); | ||||||
644 | |||||||
645 | =item E:empty | ||||||
646 | |||||||
647 | An C |
||||||
648 | |||||||
649 | my $empty = $dom->find(':empty'); | ||||||
650 | |||||||
651 | =item E:checked | ||||||
652 | |||||||
653 | A user interface element C |
||||||
654 | checkbox). | ||||||
655 | |||||||
656 | my $input = $dom->find(':checked'); | ||||||
657 | |||||||
658 | =item E.warning | ||||||
659 | |||||||
660 | An C |
||||||
661 | |||||||
662 | my $warning = $dom->find('div.warning'); | ||||||
663 | |||||||
664 | =item E#myid | ||||||
665 | |||||||
666 | An C |
||||||
667 | |||||||
668 | my $foo = $dom->at('div#foo'); | ||||||
669 | |||||||
670 | =item E:not(s) | ||||||
671 | |||||||
672 | An C |
||||||
673 | |||||||
674 | my $others = $dom->find('div p:not(:first-child)'); | ||||||
675 | |||||||
676 | =item E F | ||||||
677 | |||||||
678 | An C |
||||||
679 | |||||||
680 | my $headlines = $dom->find('div h1'); | ||||||
681 | |||||||
682 | =item E E |
||||||
683 | |||||||
684 | An C |
||||||
685 | |||||||
686 | my $headlines = $dom->find('html > body > div > h1'); | ||||||
687 | |||||||
688 | =item E + F | ||||||
689 | |||||||
690 | An C |
||||||
691 | |||||||
692 | my $second = $dom->find('h1 + h2'); | ||||||
693 | |||||||
694 | =item E ~ F | ||||||
695 | |||||||
696 | An C |
||||||
697 | |||||||
698 | my $second = $dom->find('h1 ~ h2'); | ||||||
699 | |||||||
700 | =item E, F, G | ||||||
701 | |||||||
702 | Elements of type C |
||||||
703 | |||||||
704 | my $headlines = $dom->find('h1, h2, h3'); | ||||||
705 | |||||||
706 | =item E[foo=bar][bar=baz] | ||||||
707 | |||||||
708 | An C |
||||||
709 | |||||||
710 | my $links = $dom->find('a[foo^=b][foo$=ar]'); | ||||||
711 | |||||||
712 | =back | ||||||
713 | |||||||
714 | =head1 OPERATORS | ||||||
715 | |||||||
716 | L |
||||||
717 | |||||||
718 | =head2 array | ||||||
719 | |||||||
720 | my @nodes = @$dom; | ||||||
721 | |||||||
722 | Alias for L"child_nodes">. | ||||||
723 | |||||||
724 | # "" | ||||||
725 | $dom->parse('123')->[0]; | ||||||
726 | |||||||
727 | =head2 bool | ||||||
728 | |||||||
729 | my $bool = !!$dom; | ||||||
730 | |||||||
731 | Always true. | ||||||
732 | |||||||
733 | =head2 hash | ||||||
734 | |||||||
735 | my %attrs = %$dom; | ||||||
736 | |||||||
737 | Alias for L"attr">. | ||||||
738 | |||||||
739 | # "test" | ||||||
740 | $dom->parse(' Test ')->at('div')->{id}; |
||||||
741 | |||||||
742 | =head2 stringify | ||||||
743 | |||||||
744 | my $str = "$dom"; | ||||||
745 | |||||||
746 | Alias for L"to_string">. | ||||||
747 | |||||||
748 | =head1 METHODS | ||||||
749 | |||||||
750 | L |
||||||
751 | |||||||
752 | =head2 new | ||||||
753 | |||||||
754 | my $dom = DOM::Tiny->new; | ||||||
755 | my $dom = DOM::Tiny->new(' |
||||||
756 | |||||||
757 | Construct a new scalar-based L |
||||||
758 | fragment if necessary. | ||||||
759 | |||||||
760 | =head2 all_text | ||||||
761 | |||||||
762 | my $trimmed = $dom->all_text; | ||||||
763 | my $untrimmed = $dom->all_text(0); | ||||||
764 | |||||||
765 | Extract text content from all descendant nodes of this element, smart | ||||||
766 | whitespace trimming is enabled by default. | ||||||
767 | |||||||
768 | # "foo bar baz" | ||||||
769 | $dom->parse(" foo\n ")->at('div')->all_text; bar baz\n |
||||||
770 | |||||||
771 | # "foo\nbarbaz\n" | ||||||
772 | $dom->parse(" foo\n ")->at('div')->all_text(0); bar baz\n |
||||||
773 | |||||||
774 | =head2 ancestors | ||||||
775 | |||||||
776 | my $collection = $dom->ancestors; | ||||||
777 | my $collection = $dom->ancestors('div ~ p'); | ||||||
778 | |||||||
779 | Find all ancestor elements of this node matching the CSS selector and return a | ||||||
780 | L |
||||||
781 | objects. All selectors listed in L"SELECTORS"> are supported. | ||||||
782 | |||||||
783 | # List tag names of ancestor elements | ||||||
784 | say $dom->ancestors->map('tag')->join("\n"); | ||||||
785 | |||||||
786 | =head2 append | ||||||
787 | |||||||
788 | $dom = $dom->append(' I ♥ DOM::Tiny! '); |
||||||
789 | |||||||
790 | Append HTML/XML fragment to this node (for all node types other than C |
||||||
791 | |||||||
792 | # "Test123 |
||||||
793 | $dom->parse('Test |
||||||
794 | ->at('h1')->append('123')->root; |
||||||
795 | |||||||
796 | # " Test 123 " |
||||||
797 | $dom->parse(' Test ')->at('p') |
||||||
798 | ->child_nodes->first->append(' 123')->root; | ||||||
799 | |||||||
800 | =head2 append_content | ||||||
801 | |||||||
802 | $dom = $dom->append_content(' I ♥ DOM::Tiny! '); |
||||||
803 | |||||||
804 | Append HTML/XML fragment (for C |
||||||
805 | node's content. | ||||||
806 | |||||||
807 | # "Test123 |
||||||
808 | $dom->parse('Test |
||||||
809 | ->at('h1')->append_content('123')->root; | ||||||
810 | |||||||
811 | # " " |
||||||
812 | $dom->parse(' ') |
||||||
813 | ->child_nodes->first->append_content('123 ')->root; | ||||||
814 | |||||||
815 | # " Test123 " |
||||||
816 | $dom->parse(' Test ')->at('p')->append_content('123')->root; |
||||||
817 | |||||||
818 | =head2 at | ||||||
819 | |||||||
820 | my $result = $dom->at('div ~ p'); | ||||||
821 | |||||||
822 | Find first descendant element of this element matching the CSS selector and | ||||||
823 | return it as a L |
||||||
824 | selectors listed in L"SELECTORS"> are supported. | ||||||
825 | |||||||
826 | # Find first element with "svg" namespace definition | ||||||
827 | my $namespace = $dom->at('[xmlns\:svg]')->{'xmlns:svg'}; | ||||||
828 | |||||||
829 | =head2 attr | ||||||
830 | |||||||
831 | my $hash = $dom->attr; | ||||||
832 | my $foo = $dom->attr('foo'); | ||||||
833 | $dom = $dom->attr({foo => 'bar'}); | ||||||
834 | $dom = $dom->attr(foo => 'bar'); | ||||||
835 | |||||||
836 | This element's attributes. | ||||||
837 | |||||||
838 | # Remove an attribute | ||||||
839 | delete $dom->attr->{id}; | ||||||
840 | |||||||
841 | # Attribute without value | ||||||
842 | $dom->attr(selected => undef); | ||||||
843 | |||||||
844 | # List id attributes | ||||||
845 | say $dom->find('*')->map(attr => 'id')->compact->join("\n"); | ||||||
846 | |||||||
847 | =head2 child_nodes | ||||||
848 | |||||||
849 | my $collection = $dom->child_nodes; | ||||||
850 | |||||||
851 | Return a L |
||||||
852 | element as L |
||||||
853 | |||||||
854 | # " 123 " |
||||||
855 | $dom->parse(' Test123 ')->at('p')->child_nodes->first->remove; |
||||||
856 | |||||||
857 | # "" | ||||||
858 | $dom->parse('123')->child_nodes->first; | ||||||
859 | |||||||
860 | # " Test " | ||||||
861 | $dom->parse('123')->child_nodes->last->content; | ||||||
862 | |||||||
863 | =head2 children | ||||||
864 | |||||||
865 | my $collection = $dom->children; | ||||||
866 | my $collection = $dom->children('div ~ p'); | ||||||
867 | |||||||
868 | Find all child elements of this element matching the CSS selector and return a | ||||||
869 | L |
||||||
870 | objects. All selectors listed in L"SELECTORS"> are supported. | ||||||
871 | |||||||
872 | # Show tag name of random child element | ||||||
873 | say $dom->children->shuffle->first->tag; | ||||||
874 | |||||||
875 | =head2 content | ||||||
876 | |||||||
877 | my $str = $dom->content; | ||||||
878 | $dom = $dom->content(' I ♥ DOM::Tiny! '); |
||||||
879 | |||||||
880 | Return this node's content or replace it with HTML/XML fragment (for C |
||||||
881 | and C |
||||||
882 | |||||||
883 | # "Test" | ||||||
884 | $dom->parse(' Test ')->at('div')->content; |
||||||
885 | |||||||
886 | # "123 |
||||||
887 | $dom->parse('Test |
||||||
888 | |||||||
889 | # " 123 " |
||||||
890 | $dom->parse(' Test ')->at('p')->content('123')->root; |
||||||
891 | |||||||
892 | # " " |
||||||
893 | $dom->parse('Test |
||||||
894 | |||||||
895 | # " Test " | ||||||
896 | $dom->parse(' ')->child_nodes->first->content; |
||||||
897 | |||||||
898 | # " 456 " |
||||||
899 | $dom->parse(' 456 ') |
||||||
900 | ->at('div')->child_nodes->first->content(' 123 ')->root; | ||||||
901 | |||||||
902 | =head2 descendant_nodes | ||||||
903 | |||||||
904 | my $collection = $dom->descendant_nodes; | ||||||
905 | |||||||
906 | Return a L |
||||||
907 | this element as L |
||||||
908 | |||||||
909 | # " 123 " |
||||||
910 | $dom->parse(' 123 ') |
||||||
911 | ->descendant_nodes->grep(sub { $_->type eq 'comment' }) | ||||||
912 | ->map('remove')->first; | ||||||
913 | |||||||
914 | # " testtest " |
||||||
915 | $dom->parse(' 123456 ') |
||||||
916 | ->at('p')->descendant_nodes->grep(sub { $_->type eq 'text' }) | ||||||
917 | ->map(content => 'test')->first->root; | ||||||
918 | |||||||
919 | =head2 find | ||||||
920 | |||||||
921 | my $collection = $dom->find('div ~ p'); | ||||||
922 | |||||||
923 | Find all descendant elements of this element matching the CSS selector and | ||||||
924 | return a L |
||||||
925 | L |
||||||
926 | |||||||
927 | # Find a specific element and extract information | ||||||
928 | my $id = $dom->find('div')->[23]{id}; | ||||||
929 | |||||||
930 | # Extract information from multiple elements | ||||||
931 | my @headers = $dom->find('h1, h2, h3')->map('text')->each; | ||||||
932 | |||||||
933 | # Count all the different tags | ||||||
934 | my $hash = $dom->find('*')->reduce(sub { $a->{$b->tag}++; $a }, {}); | ||||||
935 | |||||||
936 | # Find elements with a class that contains dots | ||||||
937 | my @divs = $dom->find('div.foo\.bar')->each; | ||||||
938 | |||||||
939 | =head2 following | ||||||
940 | |||||||
941 | my $collection = $dom->following; | ||||||
942 | my $collection = $dom->following('div ~ p'); | ||||||
943 | |||||||
944 | Find all sibling elements after this node matching the CSS selector and return | ||||||
945 | a L |
||||||
946 | objects. All selectors listen in L"SELECTORS"> are supported. | ||||||
947 | |||||||
948 | # List tags of sibling elements after this node | ||||||
949 | say $dom->following->map('tag')->join("\n"); | ||||||
950 | |||||||
951 | =head2 following_nodes | ||||||
952 | |||||||
953 | my $collection = $dom->following_nodes; | ||||||
954 | |||||||
955 | Return a L |
||||||
956 | this node as L |
||||||
957 | |||||||
958 | # "C" | ||||||
959 | $dom->parse(' A C')->at('p')->following_nodes->last->content; |
||||||
960 | |||||||
961 | =head2 matches | ||||||
962 | |||||||
963 | my $bool = $dom->matches('div ~ p'); | ||||||
964 | |||||||
965 | Check if this element matches the CSS selector. All selectors listed in | ||||||
966 | L"SELECTORS"> are supported. | ||||||
967 | |||||||
968 | # True | ||||||
969 | $dom->parse(' A ')->at('p')->matches('.a'); |
||||||
970 | $dom->parse(' A ')->at('p')->matches('p[class]'); |
||||||
971 | |||||||
972 | # False | ||||||
973 | $dom->parse(' A ')->at('p')->matches('.b'); |
||||||
974 | $dom->parse(' A ')->at('p')->matches('p[id]'); |
||||||
975 | |||||||
976 | =head2 namespace | ||||||
977 | |||||||
978 | my $namespace = $dom->namespace; | ||||||
979 | |||||||
980 | Find this element's namespace, or return C |
||||||
981 | |||||||
982 | # Find namespace for an element with namespace prefix | ||||||
983 | my $namespace = $dom->at('svg > svg\:circle')->namespace; | ||||||
984 | |||||||
985 | # Find namespace for an element that may or may not have a namespace prefix | ||||||
986 | my $namespace = $dom->at('svg > circle')->namespace; | ||||||
987 | |||||||
988 | =head2 next | ||||||
989 | |||||||
990 | my $sibling = $dom->next; | ||||||
991 | |||||||
992 | Return L |
||||||
993 | no more siblings. | ||||||
994 | |||||||
995 | # "123" |
||||||
996 | $dom->parse('Test123 |
||||||
997 | |||||||
998 | =head2 next_node | ||||||
999 | |||||||
1000 | my $sibling = $dom->next_node; | ||||||
1001 | |||||||
1002 | Return L |
||||||
1003 | more siblings. | ||||||
1004 | |||||||
1005 | # "456" | ||||||
1006 | $dom->parse(' 123456 ') |
||||||
1007 | ->at('b')->next_node->next_node; | ||||||
1008 | |||||||
1009 | # " Test " | ||||||
1010 | $dom->parse(' 123456 ') |
||||||
1011 | ->at('b')->next_node->content; | ||||||
1012 | |||||||
1013 | =head2 parent | ||||||
1014 | |||||||
1015 | my $parent = $dom->parent; | ||||||
1016 | |||||||
1017 | Return L |
||||||
1018 | has no parent. | ||||||
1019 | |||||||
1020 | # "Test" | ||||||
1021 | $dom->parse(' Test ')->at('i')->parent; |
||||||
1022 | |||||||
1023 | =head2 parse | ||||||
1024 | |||||||
1025 | $dom = $dom->parse(' |
||||||
1026 | |||||||
1027 | Parse HTML/XML fragment. | ||||||
1028 | |||||||
1029 | # Parse XML | ||||||
1030 | my $dom = DOM::Tiny->new->xml(1)->parse(' |
||||||
1031 | |||||||
1032 | =head2 preceding | ||||||
1033 | |||||||
1034 | my $collection = $dom->preceding; | ||||||
1035 | my $collection = $dom->preceding('div ~ p'); | ||||||
1036 | |||||||
1037 | Find all sibling elements before this node matching the CSS selector and return | ||||||
1038 | a L |
||||||
1039 | objects. All selectors listed in L"SELECTORS"> are supported. | ||||||
1040 | |||||||
1041 | # List tags of sibling elements before this node | ||||||
1042 | say $dom->preceding->map('tag')->join("\n"); | ||||||
1043 | |||||||
1044 | =head2 preceding_nodes | ||||||
1045 | |||||||
1046 | my $collection = $dom->preceding_nodes; | ||||||
1047 | |||||||
1048 | Return a L |
||||||
1049 | before this node as L |
||||||
1050 | |||||||
1051 | # "A" | ||||||
1052 | $dom->parse('A C ')->at('p')->preceding_nodes->first->content; |
||||||
1053 | |||||||
1054 | =head2 prepend | ||||||
1055 | |||||||
1056 | $dom = $dom->prepend(' I ♥ DOM::Tiny! '); |
||||||
1057 | |||||||
1058 | Prepend HTML/XML fragment to this node (for all node types other than C |
||||||
1059 | |||||||
1060 | # "Test123 |
||||||
1061 | $dom->parse('123 |
||||||
1062 | ->at('h2')->prepend('Test')->root; |
||||||
1063 | |||||||
1064 | # " Test 123 " |
||||||
1065 | $dom->parse(' 123 ') |
||||||
1066 | ->at('p')->child_nodes->first->prepend('Test ')->root; | ||||||
1067 | |||||||
1068 | =head2 prepend_content | ||||||
1069 | |||||||
1070 | $dom = $dom->prepend_content(' I ♥ DOM::Tiny! '); |
||||||
1071 | |||||||
1072 | Prepend HTML/XML fragment (for C |
||||||
1073 | node's content. | ||||||
1074 | |||||||
1075 | # "Test123 |
||||||
1076 | $dom->parse('123 |
||||||
1077 | ->at('h2')->prepend_content('Test')->root; | ||||||
1078 | |||||||
1079 | # " " |
||||||
1080 | $dom->parse(' ') |
||||||
1081 | ->child_nodes->first->prepend_content(' Test')->root; | ||||||
1082 | |||||||
1083 | # " 123Test " |
||||||
1084 | $dom->parse(' Test ')->at('p')->prepend_content('123')->root; |
||||||
1085 | |||||||
1086 | =head2 previous | ||||||
1087 | |||||||
1088 | my $sibling = $dom->previous; | ||||||
1089 | |||||||
1090 | Return L |
||||||
1091 | are no more siblings. | ||||||
1092 | |||||||
1093 | # "Test" |
||||||
1094 | $dom->parse('Test123 |
||||||
1095 | |||||||
1096 | =head2 previous_node | ||||||
1097 | |||||||
1098 | my $sibling = $dom->previous_node; | ||||||
1099 | |||||||
1100 | Return L |
||||||
1101 | no more siblings. | ||||||
1102 | |||||||
1103 | # "123" | ||||||
1104 | $dom->parse(' 123456 ') |
||||||
1105 | ->at('b')->previous_node->previous_node; | ||||||
1106 | |||||||
1107 | # " Test " | ||||||
1108 | $dom->parse(' 123456 ') |
||||||
1109 | ->at('b')->previous_node->content; | ||||||
1110 | |||||||
1111 | =head2 remove | ||||||
1112 | |||||||
1113 | my $parent = $dom->remove; | ||||||
1114 | |||||||
1115 | Remove this node and return L"root"> (for C |
||||||
1116 | |||||||
1117 | # "" | ||||||
1118 | $dom->parse('Test |
||||||
1119 | |||||||
1120 | # " 456 " |
||||||
1121 | $dom->parse(' 123456 ') |
||||||
1122 | ->at('p')->child_nodes->first->remove->root; | ||||||
1123 | |||||||
1124 | =head2 replace | ||||||
1125 | |||||||
1126 | my $parent = $dom->replace(' I ♥ DOM::Tiny! '); |
||||||
1127 | |||||||
1128 | Replace this node with HTML/XML fragment and return L"root"> (for C |
||||||
1129 | nodes) or L"parent">. | ||||||
1130 | |||||||
1131 | # "123 |
||||||
1132 | $dom->parse('Test123'); |
||||||
1133 | |||||||
1134 | # " 123 " |
||||||
1135 | $dom->parse(' Test ') |
||||||
1136 | ->at('p')->child_nodes->[0]->replace('123')->root; | ||||||
1137 | |||||||
1138 | =head2 root | ||||||
1139 | |||||||
1140 | my $root = $dom->root; | ||||||
1141 | |||||||
1142 | Return L |
||||||
1143 | |||||||
1144 | =head2 strip | ||||||
1145 | |||||||
1146 | my $parent = $dom->strip; | ||||||
1147 | |||||||
1148 | Remove this element while preserving its content and return L"parent">. | ||||||
1149 | |||||||
1150 | # " Test " |
||||||
1151 | $dom->parse('Test |
||||||
1152 | |||||||
1153 | =head2 tag | ||||||
1154 | |||||||
1155 | my $tag = $dom->tag; | ||||||
1156 | $dom = $dom->tag('div'); | ||||||
1157 | |||||||
1158 | This element's tag name. | ||||||
1159 | |||||||
1160 | # List tag names of child elements | ||||||
1161 | say $dom->children->map('tag')->join("\n"); | ||||||
1162 | |||||||
1163 | =head2 tap | ||||||
1164 | |||||||
1165 | $dom = $dom->tap(sub {...}); | ||||||
1166 | |||||||
1167 | Equivalent to L |
||||||
1168 | |||||||
1169 | =head2 text | ||||||
1170 | |||||||
1171 | my $trimmed = $dom->text; | ||||||
1172 | my $untrimmed = $dom->text(0); | ||||||
1173 | |||||||
1174 | Extract text content from this element only (not including child elements), | ||||||
1175 | smart whitespace trimming is enabled by default. | ||||||
1176 | |||||||
1177 | # "foo baz" | ||||||
1178 | $dom->parse(" foo\n ")->at('div')->text; bar baz\n |
||||||
1179 | |||||||
1180 | # "foo\nbaz\n" | ||||||
1181 | $dom->parse(" foo\n ")->at('div')->text(0); bar baz\n |
||||||
1182 | |||||||
1183 | =head2 to_string | ||||||
1184 | |||||||
1185 | my $str = $dom->to_string; | ||||||
1186 | |||||||
1187 | Render this node and its content to HTML/XML. | ||||||
1188 | |||||||
1189 | # "Test" | ||||||
1190 | $dom->parse(' Test ')->at('div b')->to_string; |
||||||
1191 | |||||||
1192 | =head2 tree | ||||||
1193 | |||||||
1194 | my $tree = $dom->tree; | ||||||
1195 | $dom = $dom->tree(['root']); | ||||||
1196 | |||||||
1197 | Document Object Model. Note that this structure should only be used very | ||||||
1198 | carefully since it is very dynamic. | ||||||
1199 | |||||||
1200 | =head2 type | ||||||
1201 | |||||||
1202 | my $type = $dom->type; | ||||||
1203 | |||||||
1204 | This node's type, usually C |
||||||
1205 | C |
||||||
1206 | |||||||
1207 | # "cdata" | ||||||
1208 | $dom->parse('')->child_nodes->first->type; | ||||||
1209 | |||||||
1210 | # "comment" | ||||||
1211 | $dom->parse('')->child_nodes->first->type; | ||||||
1212 | |||||||
1213 | # "doctype" | ||||||
1214 | $dom->parse('')->child_nodes->first->type; | ||||||
1215 | |||||||
1216 | # "pi" | ||||||
1217 | $dom->parse('')->child_nodes->first->type; | ||||||
1218 | |||||||
1219 | # "raw" | ||||||
1220 | $dom->parse(' |
||||||
1221 | |||||||
1222 | # "root" | ||||||
1223 | $dom->parse(' Test ')->type; |
||||||
1224 | |||||||
1225 | # "tag" | ||||||
1226 | $dom->parse(' Test ')->at('p')->type; |
||||||
1227 | |||||||
1228 | # "text" | ||||||
1229 | $dom->parse(' Test ')->at('p')->child_nodes->first->type; |
||||||
1230 | |||||||
1231 | =head2 val | ||||||
1232 | |||||||
1233 | my $value = $dom->val; | ||||||
1234 | |||||||
1235 | Extract value from form element (such as C | ||||||
1236 | C | ||||||
1237 | the case of C | ||||||
1238 | C |
||||||
1239 | C |
||||||
1240 | |||||||
1241 | # "a" | ||||||
1242 | $dom->parse('')->at('input')->val; | ||||||
1243 | |||||||
1244 | # "b" | ||||||
1245 | $dom->parse('')->at('textarea')->val; | ||||||
1246 | |||||||
1247 | # "c" | ||||||
1248 | $dom->parse('')->at('option')->val; | ||||||
1249 | |||||||
1250 | # "d" | ||||||
1251 | $dom->parse('') | ||||||
1252 | ->at('select')->val; | ||||||
1253 | |||||||
1254 | # "e" | ||||||
1255 | $dom->parse('') | ||||||
1256 | ->at('select')->val->[0]; | ||||||
1257 | |||||||
1258 | # "on" | ||||||
1259 | $dom->parse('')->at('input')->val; | ||||||
1260 | |||||||
1261 | =head2 wrap | ||||||
1262 | |||||||
1263 | $dom = $dom->wrap(''); | ||||||
1264 | |||||||
1265 | Wrap HTML/XML fragment around this node (for all node types other than C |
||||||
1266 | placing it as the last child of the first innermost element. | ||||||
1267 | |||||||
1268 | # " 123Test " |
||||||
1269 | $dom->parse('Test')->at('b')->wrap(' 123 ')->root; |
||||||
1270 | |||||||
1271 | # " Test 123 |
||||||
1272 | $dom->parse('Test')->at('b')->wrap(' 123 ')->root; |
||||||
1273 | |||||||
1274 | # " Test 123 " |
||||||
1275 | $dom->parse('Test')->at('b')->wrap(' 123 ')->root; |
||||||
1276 | |||||||
1277 | # " Test " |
||||||
1278 | $dom->parse(' Test ')->at('p')->child_nodes->first->wrap('')->root; |
||||||
1279 | |||||||
1280 | =head2 wrap_content | ||||||
1281 | |||||||
1282 | $dom = $dom->wrap_content(''); | ||||||
1283 | |||||||
1284 | Wrap HTML/XML fragment around this node's content (for C |
||||||
1285 | nodes), placing it as the last children of the first innermost element. | ||||||
1286 | |||||||
1287 | # " 123Test " |
||||||
1288 | $dom->parse(' Test ')->at('p')->wrap_content('123')->root; |
||||||
1289 | |||||||
1290 | # " Test 123 " |
||||||
1291 | $dom->parse('Test')->wrap_content(' 123 '); |
||||||
1292 | |||||||
1293 | =head2 xml | ||||||
1294 | |||||||
1295 | my $bool = $dom->xml; | ||||||
1296 | $dom = $dom->xml($bool); | ||||||
1297 | |||||||
1298 | Disable HTML semantics in parser and activate case-sensitivity, defaults to | ||||||
1299 | auto detection based on XML declarations. | ||||||
1300 | |||||||
1301 | =head1 COLLECTION METHODS | ||||||
1302 | |||||||
1303 | Some L |
||||||
1304 | L |
||||||
1305 | reference, or with the following methods. | ||||||
1306 | |||||||
1307 | # Chain methods | ||||||
1308 | $collection->map(sub { ucfirst })->shuffle->each(sub { | ||||||
1309 | my ($word, $num) = @_; | ||||||
1310 | say "$num: $word"; | ||||||
1311 | }); | ||||||
1312 | |||||||
1313 | # Access array directly to manipulate collection | ||||||
1314 | $collection->[23] += 100; | ||||||
1315 | say for @$collection; | ||||||
1316 | |||||||
1317 | =head2 compact | ||||||
1318 | |||||||
1319 | my $new = $collection->compact; | ||||||
1320 | |||||||
1321 | Create a new collection with all elements that are defined and not an empty | ||||||
1322 | string. | ||||||
1323 | |||||||
1324 | # $collection contains (0, 1, undef, 2, '', 3) | ||||||
1325 | $collection->compact->join(', '); # "0, 1, 2, 3" | ||||||
1326 | |||||||
1327 | =head2 each | ||||||
1328 | |||||||
1329 | my @elements = $collection->each; | ||||||
1330 | $collection = $collection->each(sub {...}); | ||||||
1331 | |||||||
1332 | Evaluate callback for each element in collection or return all elements as a | ||||||
1333 | list if none has been provided. The element will be the first argument passed | ||||||
1334 | to the callback and is also available as C<$_>. | ||||||
1335 | |||||||
1336 | # Make a numbered list | ||||||
1337 | $collection->each(sub { | ||||||
1338 | my ($e, $num) = @_; | ||||||
1339 | say "$num: $e"; | ||||||
1340 | }); | ||||||
1341 | |||||||
1342 | =head2 first | ||||||
1343 | |||||||
1344 | my $first = $collection->first; | ||||||
1345 | my $first = $collection->first(qr/foo/); | ||||||
1346 | my $first = $collection->first(sub {...}); | ||||||
1347 | my $first = $collection->first($method); | ||||||
1348 | my $first = $collection->first($method, @args); | ||||||
1349 | |||||||
1350 | Evaluate regular expression/callback for, or call method on, each element in | ||||||
1351 | collection and return the first one that matched the regular expression, or for | ||||||
1352 | which the callback/method returned true. The element will be the first argument | ||||||
1353 | passed to the callback and is also available as C<$_>. | ||||||
1354 | |||||||
1355 | # Longer version | ||||||
1356 | my $first = $collection->first(sub { $_->$method(@args) }); | ||||||
1357 | |||||||
1358 | # Find first value that contains the word "tiny" | ||||||
1359 | my $interesting = $collection->first(qr/tiny/i); | ||||||
1360 | |||||||
1361 | # Find first value that is greater than 5 | ||||||
1362 | my $greater = $collection->first(sub { $_ > 5 }); | ||||||
1363 | |||||||
1364 | =head2 flatten | ||||||
1365 | |||||||
1366 | my $new = $collection->flatten; | ||||||
1367 | |||||||
1368 | Flatten nested collections/arrays recursively and create a new collection with | ||||||
1369 | all elements. | ||||||
1370 | |||||||
1371 | # $collection contains (1, [2, [3, 4], 5, [6]], 7) | ||||||
1372 | $collection->flatten->join(', '); # "1, 2, 3, 4, 5, 6, 7" | ||||||
1373 | |||||||
1374 | =head2 grep | ||||||
1375 | |||||||
1376 | my $new = $collection->grep(qr/foo/); | ||||||
1377 | my $new = $collection->grep(sub {...}); | ||||||
1378 | my $new = $collection->grep($method); | ||||||
1379 | my $new = $collection->grep($method, @args); | ||||||
1380 | |||||||
1381 | Evaluate regular expression/callback for, or call method on, each element in | ||||||
1382 | collection and create a new collection with all elements that matched the | ||||||
1383 | regular expression, or for which the callback/method returned true. The element | ||||||
1384 | will be the first argument passed to the callback and is also available as | ||||||
1385 | C<$_>. | ||||||
1386 | |||||||
1387 | # Longer version | ||||||
1388 | my $new = $collection->grep(sub { $_->$method(@args) }); | ||||||
1389 | |||||||
1390 | # Find all values that contain the word "tiny" | ||||||
1391 | my $interesting = $collection->grep(qr/tiny/i); | ||||||
1392 | |||||||
1393 | # Find all values that are greater than 5 | ||||||
1394 | my $greater = $collection->grep(sub { $_ > 5 }); | ||||||
1395 | |||||||
1396 | =head2 join | ||||||
1397 | |||||||
1398 | my $stream = $collection->join; | ||||||
1399 | my $stream = $collection->join("\n"); | ||||||
1400 | |||||||
1401 | Turn collection into string. | ||||||
1402 | |||||||
1403 | # Join all values with commas | ||||||
1404 | $collection->join(', '); | ||||||
1405 | |||||||
1406 | =head2 last | ||||||
1407 | |||||||
1408 | my $last = $collection->last; | ||||||
1409 | |||||||
1410 | Return the last element in collection. | ||||||
1411 | |||||||
1412 | =head2 map | ||||||
1413 | |||||||
1414 | my $new = $collection->map(sub {...}); | ||||||
1415 | my $new = $collection->map($method); | ||||||
1416 | my $new = $collection->map($method, @args); | ||||||
1417 | |||||||
1418 | Evaluate callback for, or call method on, each element in collection and create | ||||||
1419 | a new collection from the results. The element will be the first argument | ||||||
1420 | passed to the callback and is also available as C<$_>. | ||||||
1421 | |||||||
1422 | # Longer version | ||||||
1423 | my $new = $collection->map(sub { $_->$method(@args) }); | ||||||
1424 | |||||||
1425 | # Append the word "tiny" to all values | ||||||
1426 | my $domified = $collection->map(sub { $_ . 'tiny' }); | ||||||
1427 | |||||||
1428 | =head2 reduce | ||||||
1429 | |||||||
1430 | my $result = $collection->reduce(sub {...}); | ||||||
1431 | my $result = $collection->reduce(sub {...}, $initial); | ||||||
1432 | |||||||
1433 | Reduce elements in collection with callback, the first element will be used as | ||||||
1434 | initial value if none has been provided. | ||||||
1435 | |||||||
1436 | # Calculate the sum of all values | ||||||
1437 | my $sum = $collection->reduce(sub { $a + $b }); | ||||||
1438 | |||||||
1439 | # Count how often each value occurs in collection | ||||||
1440 | my $hash = $collection->reduce(sub { $a->{$b}++; $a }, {}); | ||||||
1441 | |||||||
1442 | =head2 reverse | ||||||
1443 | |||||||
1444 | my $new = $collection->reverse; | ||||||
1445 | |||||||
1446 | Create a new collection with all elements in reverse order. | ||||||
1447 | |||||||
1448 | =head2 slice | ||||||
1449 | |||||||
1450 | my $new = $collection->slice(4 .. 7); | ||||||
1451 | |||||||
1452 | Create a new collection with all selected elements. | ||||||
1453 | |||||||
1454 | # $collection contains ('A', 'B', 'C', 'D', 'E') | ||||||
1455 | $collection->slice(1, 2, 4)->join(' '); # "B C E" | ||||||
1456 | |||||||
1457 | =head2 shuffle | ||||||
1458 | |||||||
1459 | my $new = $collection->shuffle; | ||||||
1460 | |||||||
1461 | Create a new collection with all elements in random order. | ||||||
1462 | |||||||
1463 | =head2 size | ||||||
1464 | |||||||
1465 | my $size = $collection->size; | ||||||
1466 | |||||||
1467 | Number of elements in collection. | ||||||
1468 | |||||||
1469 | =head2 sort | ||||||
1470 | |||||||
1471 | my $new = $collection->sort; | ||||||
1472 | my $new = $collection->sort(sub {...}); | ||||||
1473 | |||||||
1474 | Sort elements based on return value of callback and create a new collection | ||||||
1475 | from the results. | ||||||
1476 | |||||||
1477 | # Sort values case-insensitive | ||||||
1478 | my $case_insensitive = $collection->sort(sub { uc($a) cmp uc($b) }); | ||||||
1479 | |||||||
1480 | =head2 tap | ||||||
1481 | |||||||
1482 | $collection = $collection->tap(sub {...}); | ||||||
1483 | |||||||
1484 | Equivalent to L |
||||||
1485 | |||||||
1486 | =head2 to_array | ||||||
1487 | |||||||
1488 | my $array = $collection->to_array; | ||||||
1489 | |||||||
1490 | Turn collection into array reference. | ||||||
1491 | |||||||
1492 | =head2 uniq | ||||||
1493 | |||||||
1494 | my $new = $collection->uniq; | ||||||
1495 | my $new = $collection->uniq(sub {...}); | ||||||
1496 | my $new = $collection->uniq($method); | ||||||
1497 | my $new = $collection->uniq($method, @args); | ||||||
1498 | |||||||
1499 | Create a new collection without duplicate elements, using the string | ||||||
1500 | representation of either the elements or the return value of the | ||||||
1501 | callback/method. | ||||||
1502 | |||||||
1503 | # Longer version | ||||||
1504 | my $new = $collection->uniq(sub { $_->$method(@args) }); | ||||||
1505 | |||||||
1506 | # $collection contains ('foo', 'bar', 'bar', 'baz') | ||||||
1507 | $collection->uniq->join(' '); # "foo bar baz" | ||||||
1508 | |||||||
1509 | # $collection contains ([1, 2], [2, 1], [3, 2]) | ||||||
1510 | $collection->uniq(sub{ $_->[1] })->to_array; # "[[1, 2], [2, 1]]" | ||||||
1511 | |||||||
1512 | =head1 BUGS | ||||||
1513 | |||||||
1514 | Report any issues on the public bugtracker. | ||||||
1515 | |||||||
1516 | =head1 AUTHOR | ||||||
1517 | |||||||
1518 | Dan Book |
||||||
1519 | |||||||
1520 | Code and tests adapted from L |
||||||
1521 | |||||||
1522 | =head1 CONTRIBUTORS | ||||||
1523 | |||||||
1524 | =over | ||||||
1525 | |||||||
1526 | =item Matt S Trout (mst) | ||||||
1527 | |||||||
1528 | =back | ||||||
1529 | |||||||
1530 | =head1 COPYRIGHT AND LICENSE | ||||||
1531 | |||||||
1532 | Copyright (c) 2008-2015 Sebastian Riedel. | ||||||
1533 | |||||||
1534 | Copyright (c) 2015 L"AUTHOR"> and L"CONTRIBUTORS"> for adaptation to standalone format. | ||||||
1535 | |||||||
1536 | This is free software, licensed under: | ||||||
1537 | |||||||
1538 | The Artistic License 2.0 (GPL Compatible) | ||||||
1539 | |||||||
1540 | =head1 SEE ALSO | ||||||
1541 | |||||||
1542 | L |
||||||
1543 | |||||||
1544 | =for Pod::Coverage TO_JSON | ||||||
1545 | |||||||
1546 | =cut |