File Coverage

blib/lib/Markdown/Perl/InlineTree.pm
Criterion Covered Total %
statement 291 311 93.5
branch 135 164 82.3
condition 83 99 83.8
subroutine 46 47 97.8
pod 23 28 82.1
total 578 649 89.0


line stmt bran cond sub pod time code
1             # A tree data structure to represent the content of an inline text of a block
2             # element.
3              
4             package Markdown::Perl::InlineTree;
5              
6 32     32   5431 use strict;
  32         67  
  32         1529  
7 32     32   183 use warnings;
  32         90  
  32         2067  
8 32     32   198 use utf8;
  32         60  
  32         248  
9 32     32   1394 use feature ':5.24';
  32         70  
  32         7425  
10              
11 32     32   261 use Carp;
  32         63  
  32         10807  
12 32     32   1196 use English;
  32         2543  
  32         231  
13 32     32   31446 use Exporter 'import';
  32         77  
  32         1401  
14 32     32   760 use Hash::Util ();
  32         3162  
  32         1003  
15 32     32   168 use Scalar::Util 'blessed';
  32         62  
  32         21516  
16              
17             our $VERSION = 0.01;
18              
19             our @EXPORT_OK =
20             qw(new_text new_code new_link new_html new_style new_literal is_node is_tree UNESCAPE_LITERAL);
21             our %EXPORT_TAGS = (all => \@EXPORT_OK);
22              
23             =pod
24              
25             =encoding utf8
26              
27             =head1 NAME
28              
29             Markdown::Perl::InlineTree
30              
31             =head1 SYNOPSIS
32              
33             A tree structure meant to represent the inline elements of a Markdown paragraph.
34              
35             This package is internal to the implementation of L and its
36             documentation should be useful only if you are trying to modify the library.
37              
38             Otherwise please refer to the L and L documentation.
39              
40             =head1 DESCRIPTION
41              
42             =head2 new
43              
44             my $tree = Markdown::Perl::InlineTree->new();
45              
46             The constructor currently does not support any options.
47              
48             =cut
49              
50             sub new {
51 326274     326274 1 1124732 my ($class) = @_;
52              
53 326274         1400065 return bless {children => []}, $class;
54             }
55              
56             package Markdown::Perl::InlineNode { ## no critic (ProhibitMultiplePackages)
57 32     32   290 use Carp;
  32         110  
  32         2869  
58 32     32   875 use Markdown::Perl::HTML 'decode_entities', 'html_escape', 'http_escape';
  32         74  
  32         617565  
59              
60             sub hashpush (\%%) {
61 301596     301596   853787 my ($hash, %args) = @_;
62 301596         1087402 while (my ($k, $v) = each %args) {
63 301596         1214201 $hash->{$k} = $v;
64             }
65 301596         744053 return;
66             }
67              
68             sub new {
69 267966     267966   994230 my ($class, $type, $content, %options) = @_;
70              
71 267966         976883 my $this = {type => $type, escaped => 0};
72 267966 100       680328 $this->{debug} = delete $options{debug} if exists $options{debug};
73 267966         590765 my $content_ref = ref $content;
74 267966 100 66     922258 if (Scalar::Util::blessed($content)
    50          
75             && $content->isa('Markdown::Perl::InlineTree')) {
76 12445         21047 hashpush %{$this}, subtree => $content;
  12445         43816  
77             } elsif (!ref($content)) {
78 255521         399795 hashpush %{$this}, content => $content;
  255521         632527  
79             } else {
80 0         0 confess "Unexpected content for inline ${type} node: ".ref($content);
81             }
82             # There is one more node type, not created here, that looks like a text
83             # node but that is a 'delimiter' node. These nodes are created manually
84             # inside the Inlines module.
85 267966 100 100     1154471 if ($type eq 'text' || $type eq 'code' || $type eq 'literal' || $type eq 'html') {
    100 100        
    50 100        
86 246079 50       541955 confess "Unexpected content for inline ${type} node: ${content_ref}" if $content_ref;
87 246079 50       595496 confess "Unexpected parameters for inline ${type} node: ".join(', ', %options)
88             if %options;
89             } elsif ($type eq 'link') {
90 11650 50       39591 confess 'Missing required option "type" for inline link node' unless exists $options{type};
91 11650         20372 hashpush %{$this}, linktype => delete $options{type};
  11650         39441  
92             confess 'Missing required option "target" for inline link node'
93 11650 50       31489 unless exists $options{target};
94 11650         20450 hashpush %{$this}, target => delete $options{target};
  11650         42776  
95 11650 100       35431 hashpush %{$this}, title => delete $options{title} if exists $options{title};
  92         6460  
96 11650 100       37075 hashpush %{$this}, content => delete $options{content} if exists $options{content};
  1         5  
97 11650 50       34522 confess 'Unexpected parameters for inline link node: '.join(', ', %options) if keys %options;
98             } elsif ($type eq 'style') {
99             confess 'Unexpected parameters for inline style node: '.join(', ', %options)
100 10237 50 33     52284 if keys %options > 1 || !exists $options{tag};
101 10237 50       25481 confess 'The content of a style node must be an InlineTree' unless $content_ref;
102 10237         17391 hashpush %{$this}, tag => $options{tag};
  10237         28163  
103             } else {
104 0         0 confess "Unexpected type for an InlineNode: ${type}";
105             }
106 267966         516639 bless $this, $class;
107              
108 267966         480919 Hash::Util::lock_keys %{$this};
  267966         1064447  
109 267966         4011657 return $this;
110             }
111              
112             sub clone {
113 117719     117719   260391 my ($this) = @_;
114              
115 117719         233023 return bless {%{$this}}, ref($this);
  117719         961795  
116             }
117              
118             sub has_subtree {
119 421777     421777   790241 my ($this) = @_;
120              
121 421777         1310277 return exists $this->{subtree};
122             }
123              
124             sub escape_content {
125 180326     180326   399226 my ($this, $char_class_to_escape, $char_class_to_escape_in_code) = @_;
126              
127 180326 50       494596 confess 'Node should not already be escaped when calling to_text' if $this->{escaped};
128 180326         341289 $this->{escaped} = 1;
129              
130 180326 100 66     710744 if ($this->{type} eq 'text') {
    100          
    100          
    100          
    50          
131 108739         437151 decode_entities($this->{content});
132 108739         325931 html_escape($this->{content}, $char_class_to_escape);
133             } elsif ($this->{type} eq 'literal') {
134 21284         69459 html_escape($this->{content}, $char_class_to_escape);
135             } elsif ($this->{type} eq 'code') {
136             # New lines are treated like spaces in code.
137 3801         35551 $this->{content} =~ s/\n/ /g;
138             # If the content is not just whitespace and it has one space at the
139             # beginning and one at the end, then we remove them.
140 3801         18861 $this->{content} =~ s/^ (.*[^ ].*) $/$1/g;
141 3801         13935 html_escape($this->{content}, $char_class_to_escape_in_code);
142             } elsif ($this->{type} eq 'link') {
143 11650 100 66     53392 if ($this->{linktype} eq 'autolink') {
    50          
144             # For autolinks we don’t decode entities as these are treated like html
145             # construct.
146 9442         42592 html_escape($this->{content}, $char_class_to_escape);
147 9442         42912 http_escape($this->{target});
148 9442         27377 html_escape($this->{target}, $char_class_to_escape);
149             } elsif ($this->{linktype} eq 'link' || $this->{linktype} eq 'img') {
150             # This is a real MD link definition (or image). The target and title
151             # have been generated through the to_source_text() method, so they need
152             # to be decoded and html_escaped
153 2208 100       6756 if (exists $this->{title}) {
154 92         1240 decode_entities($this->{title});
155 92         373 html_escape($this->{title}, $char_class_to_escape);
156             }
157 2208         8606 decode_entities($this->{target});
158 2208         9014 http_escape($this->{target});
159 2208         10370 html_escape($this->{target}, $char_class_to_escape);
160             } else {
161 0         0 confess 'Unexpected link type in render_node_html: '.$this->{linktype};
162             }
163             } elsif ($this->{type} eq 'html' || $this->{type} eq 'style') {
164             # Nothing here on purpose
165             } else {
166 0         0 confess 'Unexpected node type in render_node_html: '.$this->{type};
167             }
168 180326         381248 return;
169             }
170             } # package Markdown::Perl::InlineNode
171              
172             =pod
173              
174             =head2 new_text, new_code, new_link, new_literal
175              
176             my $text_node = new_text('text content');
177             my $code_node = new_code('code content');
178             my $link_node = new_link('text content', type=> 'type', target => 'the target'[, title => 'the title']);
179             my $link_node = new_link($subtree_content, type=> 'type', target => 'the target'[, title => 'the title'][, content => 'override content']);
180             my $html_node = new_html('');
181             my $style_node = new_literal($subtree_content, 'html_tag');
182             my $literal_node = new_literal('literal content');
183              
184             These methods return a text node that can be inserted in an C.
185              
186             =cut
187              
188 217498     217498 1 739231 sub new_text { return Markdown::Perl::InlineNode->new(text => @_) }
189 3804     3804 1 14655 sub new_code { return Markdown::Perl::InlineNode->new(code => @_) }
190 11650     11650 1 47458 sub new_link { return Markdown::Perl::InlineNode->new(link => @_) }
191 24620     24620 0 86169 sub new_html { return Markdown::Perl::InlineNode->new(html => @_) }
192 10237     10237 0 38492 sub new_style { return Markdown::Perl::InlineNode->new(style => @_) }
193 157     157 1 443 sub new_literal { return Markdown::Perl::InlineNode->new(literal => @_) }
194              
195             =pod
196              
197             =head2 is_node, is_tree
198              
199             These two methods returns whether a given object is a node that can be inserted
200             in an C and whether it’s an C object.
201              
202             =cut
203              
204             sub is_node {
205 693625     693625 1 1321628 my ($obj) = @_;
206 693625   66     4305673 return blessed($obj) && $obj->isa('Markdown::Perl::InlineNode');
207             }
208              
209             sub is_tree {
210 66151     66151 1 131400 my ($obj) = @_;
211 66151   33     357743 return blessed($obj) && $obj->isa('Markdown::Perl::InlineTree');
212             }
213              
214             =pod
215              
216             =head2 push
217              
218             $tree->push(@nodes_or_trees);
219              
220             Push a list of nodes at the end of the top-level nodes of the current tree.
221              
222             If passed C objects, then the nodes of these trees are pushed (not
223             the tree itself).
224              
225             =cut
226              
227             sub push { ## no critic (ProhibitBuiltinHomonyms)
228 492346     492346 1 1170002 my ($this, @nodes_or_trees) = @_;
229              
230 492346         1055555 for my $node_or_tree (@nodes_or_trees) {
231 491177 100       974745 if (is_node($node_or_tree)) {
    50          
232 425026         743883 push @{$this->{children}}, $node_or_tree;
  425026         1416043  
233             } elsif (is_tree($node_or_tree)) {
234 66151         132510 push @{$this->{children}}, @{$node_or_tree->{children}};
  66151         155174  
  66151         240905  
235             } else {
236 0         0 confess 'Invalid argument type for InlineTree::push: '.ref($node_or_tree);
237             }
238             }
239              
240 492346         1595888 return;
241             }
242              
243             =pod
244              
245             =head2 replace
246              
247             $tree->replace($index, @nodes);
248              
249             Remove the existing node at the given index and replace it by the given list of
250             nodes (or, if passed C objects, their own nodes).
251              
252             =cut
253              
254             sub replace {
255 78224     78224 1 240340 my ($this, $child_index, @new_nodes) = @_;
256 78224         235707 splice @{$this->{children}}, $child_index, 1,
257 78224 50       121743 map { is_node($_) ? $_ : @{$_->{children}} } @new_nodes;
  139096         277699  
  0         0  
258 78224         184200 return;
259             }
260              
261             =pod
262              
263             =head2 insert
264              
265             $tree->insert($index, @new_nodes);
266              
267             Inserts the given nodes (or, if passed C objects, their own nodes)
268             at the given index. After the operation, the first inserted node will have that
269             index.
270              
271             =cut
272              
273             sub insert {
274 53219     53219 1 132148 my ($this, $index, @new_nodes) = @_;
275 53219 100       78574 splice @{$this->{children}}, $index, 0, map { is_node($_) ? $_ : @{$_->{children}} } @new_nodes;
  53219         140340  
  63352         138660  
  40877         155607  
276 53219         130420 return;
277             }
278              
279             =pod
280              
281             =head2 extract
282              
283             $tree->extract($start_child, $start_offset, $end_child, $end_offset);
284              
285             Extract the content of the given tree, starting at the child with the given
286             index (which must be a B node) and at the given offset in the child’s
287             text, and ending at the given node and offset (which must also be a B
288             node).
289              
290             That content is removed from the input tree and returned as a new C
291             object. Returns a pair with the new tree and the index of the first child after
292             the removed content in the input tree. Usually it will be C<$start_child + 1>,
293             but it can be C<$start_child> if C<$start_offset> was 0.
294              
295             In scalar context, returns only the extracted tree.
296              
297             =cut
298              
299             sub extract {
300 81699     81699 1 234788 my ($this, $child_start, $text_start, $child_end, $text_end) = @_;
301              
302             # In this method, we should not read $sn and $en when they are not split (that
303             # is if text_start or text_end are 0), so that the method works at the
304             # boundary of non-text nodes.
305              
306 81699         215673 my $sn = $this->{children}[$child_start];
307             confess 'Start node in an extract operation is not of type text: '.$sn->{type}
308 81699 50 66     259335 unless $sn->{type} eq 'text' || $text_start == 0;
309              
310             ## I don’t think that this block is useful (I should add tests for this case
311             ## to check if this is needed).
312             ## The code after this block will be invalid if we extract an empty span, but
313             ## I don’t think that this can happen in practice.
314             # if ($child_start == $child_end && $text_start == $text_end) {
315             # my $offset = 0;
316             # if ($text_start != 0) {
317             # if ($text_start != length($sn->{content})) {
318             # my $nn = new_text(substr $sn->{content}, $text_start, length($sn->{content}), '');
319             # $this->insert($child_start + 1, $nn);
320             # }
321             # $offset = 1;
322             # }
323             # return (Markdown::Perl::InlineTree->new(), $child_start + $offset) if wantarray;
324             # return Markdown::Perl::InlineTree->new();
325             # }
326              
327 81699         184520 my $en = $this->{children}[$child_end];
328             confess 'End node in an extract operation is not of type text: '.$en->{type}
329 81699 50 66     390116 unless $text_end == 0 || $en->{type} eq 'text';
330 81699 50       184639 confess 'Start offset is less than 0 in an extract operation' if $text_start < 0;
331             confess 'End offset is past the end of the text in an extract operation'
332 81699 50 66     336839 if $text_end != 0 && $text_end > length($en->{content});
333              
334 81699         132673 my $empty_last = 0;
335 81699 100       182981 if ($text_end == 0) {
336 250         838 $empty_last = 1;
337 250         554 $child_end--;
338             }
339              
340             # Clone will not recurse into sub-trees. But the start and end nodes can’t
341             # have sub-trees, and the middle ones don’t matter because they are not shared
342             # with the initial tree.
343             my @nodes =
344 81699         233724 map { $_->clone() } @{$this->{children}}[$child_start .. $child_end];
  87136         261554  
  81699         225836  
345             ## no critic (ProhibitLvalueSubstr)
346 81699 100       372233 substr($nodes[-1]{content}, $text_end) = '' unless $empty_last; ## We have already removed the empty last node.
347 81699         230105 substr($nodes[0]{content}, 0, $text_start) = ''; # We must do this after text_end in case they are the same node.
348 81699 100       225834 shift @nodes if length($nodes[0]{content}) == 0;
349 81699 50 100     472756 pop @nodes if !$empty_last && @nodes && length($nodes[-1]{content}) == 0;
      66        
350 81699         267627 my $new_tree = Markdown::Perl::InlineTree->new();
351 81699         268506 $new_tree->push(@nodes);
352              
353 81699 100       187359 if ($child_start != $child_end) {
354 3475 100       11033 if ($text_start == 0) {
355 486         1145 $child_start--;
356             } else {
357 2989         9939 substr($sn->{content}, $text_start) = '';
358             }
359 3475 100 100     18687 if ($empty_last || $text_end == length($en->{content})) {
360 2411         4954 $child_end++;
361             } else {
362 1064         2978 substr($en->{content}, 0, $text_end) = '';
363             }
364 3475         6271 splice @{$this->{children}}, $child_start + 1, $child_end - $child_start - 1;
  3475         13302  
365             } else {
366 78224         124960 my @new_nodes;
367 78224 100       201956 if ($text_start > 0) {
368 72603         267395 CORE::push @new_nodes, new_text(substr $sn->{content}, 0, $text_start);
369             }
370 78224 100 100     465982 if (!$empty_last && $text_end < length($sn->{content})) {
371 66493         204938 CORE::push @new_nodes, new_text(substr $sn->{content}, $text_end);
372             }
373 78224         304053 $this->replace($child_start, @new_nodes);
374 78224 100       223859 $child_start-- if $text_start == 0;
375             }
376             ## use critic (ProhibitLvalueSubstr)
377              
378 81699 100       443445 return ($new_tree, $child_start + 1) if wantarray;
379 38614         198621 return $new_tree;
380             }
381              
382             =pod
383              
384             =head2 map_shallow
385              
386             $tree->map_shallow($sub);
387              
388             Apply the given sub to each direct child of the tree. The sub can return a node
389             or a tree and that returned content is concatenated to form a new tree.
390              
391             Only the top-level nodes of the tree are visited.
392              
393             In void context, update the tree in-place. Otherwise, the new tree is returned.
394              
395             In all cases, C<$sub> must return new nodes or trees, it can’t modify the input
396             object. The argument to C<$sub> are passed in the usual way in C<@_>, not in
397             C<$_>.
398              
399             =cut
400              
401             sub map_shallow {
402 0     0 1 0 my ($this, $sub) = @_;
403              
404 0         0 my $new_tree = Markdown::Perl::InlineTree->new();
405              
406 0         0 for (@{$this->{children}}) {
  0         0  
407 0         0 $new_tree->push($sub->());
408             }
409              
410 0 0       0 return $new_tree if defined wantarray;
411 0         0 %{$this} = %{$new_tree};
  0         0  
  0         0  
412 0         0 return;
413             }
414              
415             =pod
416              
417             =head2 map
418              
419             $tree->map($sub);
420              
421             Same as C, but the tree is visited recursively. The subtree of
422             individual nodes are visited and their content replaced before the node itself
423             are visited.
424              
425             =cut
426              
427             sub map { ## no critic (ProhibitBuiltinHomonyms)
428 123619     123619 1 309726 my ($this, $sub, $start, $stop) = @_;
429             # $start and $stop are not documented for this function, they are used by
430             # clone().
431              
432 123619         449733 my $new_tree = Markdown::Perl::InlineTree->new();
433              
434 123619   100     569782 for (@{$this->{children}}[$start // 0 .. $stop // $#{$this->{children}}]) {
  123619   100     360756  
  95384         300128  
435 241451 100       615624 if ($_->has_subtree()) {
436 2203 100       5615 if (defined wantarray) {
437 39         126 my $new_node = $_->clone();
438 39         214 $new_node->{subtree}->map($sub);
439 39         94 local *_ = \$new_node;
440 39         156 $new_tree->push($sub->());
441             } else {
442             # Is there a risk that this modifies $_ before the call to $sub?
443 2164         7613 $_->{subtree}->map($sub);
444 2164         6869 $new_tree->push($sub->());
445             }
446             } else {
447 239248 100       517170 if (defined wantarray) {
448 30544         71310 my $new_node = $_->clone();
449 30544         101174 local *_ = \$new_node;
450 30544         70661 $new_tree->push($sub->());
451             } else {
452 208704         519744 $new_tree->push($sub->());
453             }
454             }
455             }
456              
457 123619 100       393048 return $new_tree if defined wantarray;
458 95384         143562 %{$this} = %{$new_tree};
  95384         325763  
  95384         282981  
459 95384         369594 return;
460             }
461              
462             =pod
463              
464             =head2 apply
465              
466             $tree->apply($sub);
467              
468             Apply the given C<$sub> to all nodes of the tree. The sub receives the current
469             node in C<$_> and can modify it. The return value of the sub is ignored.
470              
471             =cut
472              
473             sub apply {
474 57010     57010 1 143836 my ($this, $sub) = @_;
475              
476 57010         100707 for (@{$this->{children}}) {
  57010         215077  
477 180326         557825 $sub->();
478 180326 100       472975 $_->{subtree}->apply($sub) if $_->has_subtree();
479             }
480              
481 57010         141921 return;
482             }
483              
484             =pod
485              
486             =head2 clone
487              
488             my $new_tree = $tree->clone([$child_start, $child_end]);
489              
490             Clone (deep copy) the entire tree or a portion of it.
491              
492             =cut
493              
494             sub clone {
495 28235     28235 1 62515 my ($this, $start, $stop) = @_;
496 28235     30622   166806 return $this->map(sub { $_ }, $start, $stop);
  30622         89036  
497             }
498              
499             =head2 fold
500              
501             $tree->fold($sub, $init);
502              
503             Iterates over the top-level nodes of the tree, calling C<$sub> for each of them.
504             It receives two arguments, the current node and the result of the previous call.
505             The first call receives C<$init> as its second argument.
506              
507             Returns the result of the last call of C<$sub>.
508              
509             =cut
510              
511             # TODO: maybe have a "cat" methods that expects each node to return a string and
512             # concatenate them, so that we can concatenate them all together at once, which
513             # might be more efficient.
514             sub fold {
515 87282     87282 1 231193 my ($this, $sub, $init) = @_;
516              
517 87282         151209 my $out = $init;
518              
519 87282         161214 for (@{$this->{children}}) {
  87282         227198  
520 204150         426939 $out = $sub->($_, $out);
521             }
522              
523 87282         712184 return $out;
524             }
525              
526             =pod
527              
528             =head2 find_in_text
529              
530             $tree->find_in_text($regex, $start_child, $start_offset, [$end_child, $end_offset]);
531              
532             Find the first match of the given regex in the tree, starting at the given
533             offset in the node. This only considers top-level nodes of the tree and skip
534             over non B node (including the first one).
535              
536             If C<$end_child> and C<$end_offset> are given, then does not look for anything
537             starting at or after that bound.
538              
539             Does not match the regex across multiple nodes.
540              
541             Returns C<$child_number, $match_start_offset, $match_end_offset> (or just a
542             I value in scalar context) or C.
543              
544             =cut
545              
546             sub find_in_text {
547 92217     92217 1 292466 my ($this, $re, $child_start, $text_start, $child_bound, $text_bound) = @_;
548             # qr/^\b$/ is a regex that can’t match anything.
549 92217         420016 return $this->find_balanced_in_text(qr/^\b$/, $re, $child_start, $text_start, $child_bound,
550             $text_bound);
551             }
552              
553             =pod
554              
555             =head2 find_balanced_in_text
556              
557             $tree->find_balanced_in_text(
558             $open_re, $close_re, $start_child, $start_offset, $child_bound, $text_bound);
559              
560             Same as C except that this method searches for both C<$open_re> and
561             C<$close_re> and, each time C<$open_re> is found, it needs to find C<$close_re>
562             one more time before we it returns. The method assumes that C<$open_re> has
563             already been seen once before the given C<$start_child> and C<$start_offset>.
564              
565             =cut
566              
567             sub find_balanced_in_text {
568 92277     92277 1 285165 my ($this, $open_re, $close_re, $child_start, $text_start, $child_bound, $text_bound) = @_;
569              
570 92277         171977 my $open = 1;
571              
572 92277   100     288505 for my $i ($child_start .. ($child_bound // $#{$this->{children}})) {
  88368         481498  
573 159322 100       495382 next unless $this->{children}[$i]{type} eq 'text';
574 119043 100 100     413949 if ($i == $child_start && $text_start != 0) {
575 4625         17007 pos($this->{children}[$i]{content}) = $text_start;
576             } else {
577 114418         409157 pos($this->{children}[$i]{content}) = 0;
578             }
579              
580             # When the code in this regex is executed, we are sure that the engine
581             # won’t backtrack (as we are at the end of the regex).
582 119043         2772183 while (
583 26         189 $this->{children}[$i]{content} =~ m/ ${open_re}(?{$open++}) | ${close_re}(?{$open--}) /gx)
  41776         263406  
584             {
585 41802 100 100     224674 return if $i == ($child_bound // -1) && $LAST_MATCH_START[0] >= $text_bound;
      100        
586 41473 100       97135 if ($open == 0) {
587 41429 100       442888 return ($i, $LAST_MATCH_START[0], $LAST_MATCH_END[0]) if wantarray;
588 46         322 return 1;
589             }
590             }
591             }
592              
593 50519         308612 return;
594             }
595              
596             =pod
597              
598             =head2 find_in_text_with_balanced_content
599              
600             $tree->find_in_text_with_balanced_content(
601             $open_re, $close_re, $end_re, $start_child, $start_offset,
602             $child_bound, $text_bound);
603              
604             Similar to C except that this method ends when C<$end_re>
605             is seen, after the C<$open_re> and C<$close_re> regex have been seen a balanced
606             number of time. If the closing one is seen more than the opening one, the search
607             succeeds too. The method does B assumes that C<$open_re> has already been
608             seen before the given C<$start_child> and C<$start_offset> (as opposed to
609             C).
610              
611             =cut
612              
613             sub find_in_text_with_balanced_content {
614 3741     3741 1 13582 my ($this, $open_re, $close_re, $end_re, $child_start, $text_start, $child_bound, $text_bound) =
615             @_;
616              
617 3741         8620 my $open = 0;
618              
619 3741   66     14710 for my $i ($child_start .. ($child_bound // $#{$this->{children}})) {
  3741         19616  
620 4302 100       17353 next unless $this->{children}[$i]{type} eq 'text';
621 4012 100 100     22153 if ($i == $child_start && $text_start != 0) {
622 3739         22548 pos($this->{children}[$i]{content}) = $text_start;
623             } else {
624 273         983 pos($this->{children}[$i]{content}) = 0;
625             }
626              
627             # When the code in this regex is executed, we are sure that the engine
628             # won’t backtrack (as we are at the end of the regex).
629              
630 4012         9324 my $done = 0;
631 4012         99188 while ($this->{children}[$i]{content} =~
632 1697         8910 m/ ${end_re}(?{$done = 1}) | ${open_re}(?{$open++}) | ${close_re}(?{$open--}) /gx) {
  466         2487  
  2208         10988  
633 4371 50 50     22659 return if $i == ($child_bound // -1) && $LAST_MATCH_START[0] >= $text_bound;
      33        
634 4371 100 100     49843 return ($i, $LAST_MATCH_START[0], $LAST_MATCH_END[0]) if ($open == 0 && $done) || $open < 0;
      100        
635 1331         13355 $done = 0;
636             }
637             }
638              
639 701         5517 return;
640             }
641              
642             =pod
643              
644             =head2 render_html
645              
646             $tree->render_html();
647              
648             Returns the HTML representation of that C.
649              
650             =cut
651              
652             sub render_html {
653 56096     56096 1 135747 my ($tree) = @_;
654 56096         210513 return $tree->fold(\&render_node_html, '');
655             }
656              
657             sub render_node_html {
658 178498     178498 0 344369 my ($n, $acc) = @_;
659              
660 178498 50       428301 confess 'Node should already be escaped when calling render_html' unless $n->{escaped};
661              
662 178498 100 100     825982 if ($n->{type} eq 'text' || $n->{type} eq 'literal' || $n->{type} eq 'html') {
    100 100        
    100          
    50          
663 153015         536539 return $acc.$n->{content};
664             } elsif ($n->{type} eq 'code') {
665 3781         17139 return $acc.''.$n->{content}.'';
666             } elsif ($n->{type} eq 'link') {
667 11553         27159 my $title = '';
668 11553 100       39004 if (exists $n->{title}) {
669 92         279 $title = " title=\"$n->{title}\"";
670             }
671 11553 100 100     68131 if ($n->{linktype} eq 'link' || $n->{linktype} eq 'autolink') {
    50          
672             # $n->{content} can only be set in the case of autolink or through the
673             # resolve_link_ref hook (in which case it takes precedence over whatever
674             # was in the link definition).
675 10741 100       39332 my $content = exists $n->{content} ? $n->{content} : $n->{subtree}->render_html();
676 10741         59600 return $acc."{target}\"${title}>${content}";
677             } elsif ($n->{linktype} eq 'img') {
678 812         3766 my $content = $n->{subtree}->to_text();
679 812         7997 return $acc."{target}\" alt=\"${content}\"${title} />";
680             } else {
681 0         0 confess 'Unexpected link type in render_node_html: '.$n->{linktype};
682             }
683             } elsif ($n->{type} eq 'style') {
684 10149         42657 my $content = $n->{subtree}->render_html();
685 10149 100       38736 if ($n->{tag} =~ m/^\.(.*)$/) {
686 1         5 my $class = $1;
687 1         7 return $acc."${content}";
688             } else {
689 10148         25737 my $tag = $n->{tag};
690 10148         52206 return $acc."<${tag}>${content}";
691             }
692             } else {
693 0         0 confess 'Unexpected node type in render_node_html: '.$n->{type};
694             }
695             }
696              
697             =pod
698              
699             =head2 to_text
700              
701             $tree->to_text();
702              
703             Returns the text content of this C discarding all HTML formatting.
704             This is used mainly to produce the C text of image nodes (which can contain
705             any Markdown construct in the source).
706              
707             =cut
708              
709             sub to_text {
710 913     913 1 2472 my ($tree) = @_;
711 913         3721 return $tree->fold(\&node_to_text, '');
712             }
713              
714             sub node_to_text {
715 1827     1827 0 5145 my ($n, $acc) = @_;
716 1827 50       5329 confess 'Node should already be escaped when calling to_text' unless $n->{escaped};
717 1827 100 100     10975 if ($n->{type} eq 'text') {
    100          
    100          
    100          
    50          
718 1188         4720 return $acc.$n->{content};
719             } elsif ($n->{type} eq 'style') {
720 88         433 return $acc.$n->{subtree}->to_text();
721             } elsif ($n->{type} eq 'literal' || $n->{type} eq 'html') {
722             # This is not the exact same behavior as cmark because we will escape
723             # literals here, while cmark would not escape them. The cmark behavior is
724             # probably faulty here (and is not tested by the test suite).
725 434         1749 return $acc.$n->{content};
726             } elsif ($n->{type} eq 'link') {
727 97 100       462 if ($n->{linktype} ne 'autolink') {
728 13         67 return $acc.$n->{subtree}->to_text();
729             } else {
730 84         409 return $acc.$n->{content};
731             }
732             } elsif ($n->{type} eq 'code') {
733 20         122 return $acc.''.$n->{content}.'';
734             } else {
735 0         0 confess 'Unsupported node type for to_text: '.$n->{type};
736             }
737             }
738              
739             =pod
740              
741             =head2 to_source_text
742              
743             $tree->to_source_text($unescape_literal);
744              
745             Returns the original Markdown source corresponding to this C. This
746             is used to produce the reference label, target and title of link elements and so
747             can support only node types that have a higher priority than links (nodes that
748             may have been built already when this is called).
749              
750             The source is returned as-is, the HTML entities are neither decoded nor escaped.
751              
752             If C<$unescape_literal> is true, then literal values that were escaped in the
753             source are unescaped (e.g. C<\;> will appear again as C<\;>). Otherwise they
754             will just appear as their literal value (e.g. C<;>).
755              
756             As a readability facility, the C symbol can be used to pass
757             this option (with a value of C<1>).
758              
759             =cut
760              
761             # It’s a feature that this does not interpolate.
762 32     32   516 use constant UNESCAPE_LITERAL => 1; ## no critic (ProhibitConstantPragma)
  32         77  
  32         78493  
763              
764             sub to_source_text {
765 30269     30269 1 94639 my ($tree, $unescape_literal) = @_;
766 30269         96834 return $tree->fold(node_to_source_text($unescape_literal), '');
767             }
768              
769             sub node_to_source_text {
770 30269     30269 0 60938 my ($unescape_literal) = @_;
771             # TODO: ideally all this should be replaced by the fact that the nodes should
772             # store the span of text that they represent, to be able to extract the actual
773             # source text.
774             # This probably requires that the inlines processing should be rewritten to do
775             # the link at the same time as the auto-links and inline HTML so that this
776             # operates on text.
777             return sub {
778 23816     23816   72012 my ($n, $acc) = @_;
779 23816 50       88557 confess 'Node should not already be escaped when calling to_source_text' if $n->{escaped};
780 23816 100 100     72430 if ($n->{type} eq 'text') {
    100 100        
    100          
    100          
    50          
781 22592         124255 return $acc.$n->{content};
782             } elsif ($n->{type} eq 'literal' && $unescape_literal) {
783 8         38 return $acc.'\\'.$n->{content};
784             } elsif ($n->{type} eq 'literal' || $n->{type} eq 'html') {
785 721         2963 return $acc.$n->{content};
786             } elsif ($n->{type} eq 'code') {
787             # TODO: This also need to be the source string with the right delimiters.
788 121         561 return $acc.''.$n->{content}.'';
789             } elsif ($n->{type} eq 'link') {
790 374 100       957 if ($n->{linktype} eq 'autolink') {
791 335         1260 return $acc.'<'.$n->{content}.'>';
792             } else {
793             # 'img' can appear inside other links and links can appear inside images
794             # and, as-such, we may try to treat them as link reference label, so we
795             # need this case.
796             # Because their structure is complex, we return a dummy value.
797             # BUG: we can’t have a link reference using a label that looks like an
798             # image.
799 39         138 return $acc.'dummy_text_hopefully_this_does_not_collide_with_anything';
800             }
801             } else {
802 0         0 confess 'Unsupported node type for to_source_text: '.$n->{type};
803             }
804 30269         313912 };
805             }
806              
807             =head2 span_to_source_text
808              
809             $tree->span_to_source_text($child_start, $text_start, $child_end, $text_end[, $unescape_literal]);
810              
811             Same as C but only renders the specified span of the
812             C.
813              
814             =cut
815              
816             sub span_to_source_text {
817 28235     28235 1 112081 my ($tree, $child_start, $text_start, $child_end, $text_end, $unescape_literal) = @_;
818 28235         110307 my $copy = $tree->clone($child_start, $child_end);
819 28235         156122 my $extract = $copy->extract(0, $text_start, $child_end - $child_start, $text_end);
820 28235         106046 return $extract->to_source_text($unescape_literal);
821             }
822              
823             1;