line
stmt
bran
cond
sub
pod
time
code
1
package Markdown::Perl::BlockParser;
2
3
31
31
266
use strict;
31
76
31
2869
4
31
31
243
use warnings;
31
70
31
18109
5
31
31
244
use utf8;
31
66
31
268
6
31
31
2269
use feature ':5.24';
31
138
31
8318
7
8
31
31
389
use feature 'refaliasing';
31
104
31
1578
9
31
31
174
no warnings 'experimental::refaliasing';
31
129
31
1837
10
11
31
31
216
use Carp;
31
91
31
7571
12
31
31
212
use English;
31
78
31
236
13
31
31
43921
use Hash::Util 'lock_keys_plus';
31
69
31
278
14
31
31
2719
use List::MoreUtils 'first_index';
31
69
31
281
15
31
31
49279
use List::Util 'pairs', 'min';
31
69
31
6544
16
31
31
70086
use Markdown::Perl::HTML 'html_escape', 'decode_entities', 'remove_disallowed_tags';
31
124
31
3598
17
31
31
42914
use Markdown::Perl::Util ':all';
31
146
31
19902
18
31
31
47633
use YAML::Tiny;
31
638635
31
1053802
19
20
our $VERSION = '0.02';
21
22
=pod
23
24
=encoding utf8
25
26
=cut
27
28
sub new {
29
# $md must be a reference
30
41367
41367
0
114493
my ($class, $pmarkdown, $md) = @_;
31
32
41367
773168
my $this = bless {
33
pmarkdown => $pmarkdown,
34
blocks => [],
35
blocks_stack => [],
36
paragraph => [],
37
last_line_is_blank => 0,
38
last_line_was_blank => 0,
39
skip_next_block_matching => 0,
40
is_lazy_continuation => 0,
41
md => undef,
42
last_pos => 0,
43
line_ending => '',
44
continuation_re => qr//,
45
linkrefs => {},
46
matched_prefix_size => 0,
47
}, $class;
48
41367
108055
lock_keys_plus(%{$this}, qw(forced_line));
41367
234438
49
50
41367
1236489
\$this->{md} = $md; # aliasing to avoid copying the input, does this work? is it useful?
51
52
41367
119054
return $this;
53
}
54
55
# This autoload method allows to call option accessors from the parent object
56
# transparently.
57
my $pkg = __PACKAGE__;
58
59
sub AUTOLOAD { ## no critic (ProhibitAutoloading, RequireArgUnpacking)
60
692319
692319
1326370
our $AUTOLOAD; # Automatically populated when the method is called.
61
692319
3870336
$AUTOLOAD =~ s/${pkg}:://;
62
692319
100
2200752
return if $AUTOLOAD eq 'DESTROY';
63
655175
50
2053220
confess "Undefined method ${AUTOLOAD}" unless $AUTOLOAD =~ m/^get_/;
64
655175
1319015
my $this = shift @_;
65
655175
2968697
return $this->{pmarkdown}->$AUTOLOAD(@_);
66
}
67
68
my $eol_re = qr/ \r\n | \n | \r /x;
69
70
sub next_line {
71
181190
181190
0
398672
my ($this) = @_;
72
# When we are forcing a line, we don’t recompute the line_ending, but it
73
# should already be correct because the forced one is a substring of the last
74
# line.
75
181190
100
708862
return delete $this->{forced_line} if exists $this->{forced_line};
76
156692
100
655737
return if pos($this->{md}) == length($this->{md});
77
129887
343565
$this->{last_pos} = pos($this->{md});
78
129887
50
1226574
$this->{md} =~ m/\G([^\n\r]*)(${eol_re})?/g or confess 'Should not happen';
79
129887
560357
my ($t, $e) = ($1, $2);
80
129887
100
494863
if ($1 =~ /^[ \t]+$/) {
81
1528
100
100
10344
$this->{line_ending} = $t.($e // '') if $this->get_preserve_white_lines;
82
1528
6247
return '';
83
} else {
84
128359
100
100
509886
$this->{line_ending} = $e // ($this->get_force_final_new_line ? "\n" : '');
85
128359
550739
return $t;
86
}
87
}
88
89
sub line_ending {
90
29772
29772
0
58387
my ($this) = @_;
91
29772
157689
return $this->{line_ending};
92
}
93
94
# last_pos should be passed whenever set_pos can be followed by a "return;" in
95
# one of the _do_..._block method (so, if the method fails), to reset the parser
96
# to its previous state, when the pos was manipulated.
97
# TODO: add a better abstraction to save and restore parser state.
98
sub set_pos {
99
22124
22124
0
63946
my ($this, $pos, $last_pos) = @_;
100
22124
83989
pos($this->{md}) = $pos;
101
22124
100
84495
$this->{last_pos} = $last_pos if defined $last_pos;
102
22124
44050
return;
103
}
104
105
sub get_pos {
106
20963
20963
0
54553
my ($this) = @_;
107
20963
68217
return pos($this->{md});
108
}
109
110
sub redo_line {
111
9569
9569
0
21439
my ($this) = @_;
112
9569
50
36938
confess 'Cannot push back more than one line' unless exists $this->{last_pos};
113
9569
56836
$this->set_pos(delete $this->{last_pos});
114
9569
18137
return;
115
}
116
117
# Takes a string and converts it to HTML. Can be called as a free function or as
118
# class method. In the latter case, provided options override those set in the
119
# class constructor.
120
# Both the input and output are unicode strings.
121
sub process {
122
17563
17563
0
50853
my ($this) = @_;
123
17563
81956
pos($this->{md}) = 0;
124
125
# https://spec.commonmark.org/0.30/#characters-and-lines
126
# TODO: The spec asks for this, however we can’t apply it, because md is a
127
# reference to the value passed by the user and we don’t want to modify it (it
128
# may even be a read-only value). I’m too lazy to find another place to
129
# implement this behavior.
130
# $this->{md} =~ s/\000/\xfffd/g;
131
132
# https://spec.commonmark.org/0.30/#tabs
133
# TODO: nothing to do at this stage.
134
135
# https://spec.commonmark.org/0.30/#backslash-escapes
136
# https://spec.commonmark.org/0.30/#entity-and-numeric-character-references
137
# Done at a later stage, as escaped characters don’t have their Markdown
138
# meaning, we need a way to represent that.
139
140
17563
100
195773
$this->_parse_yaml_metadata() if $this->get_parse_file_metadata eq 'yaml';
141
142
17562
95223
while (defined (my $l = $this->next_line())) {
143
# This field might be set to true at the beginning of the processing, while
144
# we’re looking at the conditions of the currently open containers.
145
124890
259728
$this->{is_lazy_continuation} = 0;
146
124890
308238
$this->_parse_blocks($l);
147
}
148
17562
107728
$this->_finalize_paragraph();
149
17562
44976
while (@{$this->{blocks_stack}}) {
24671
87566
150
7109
30231
$this->_restore_parent_block();
151
}
152
17562
99781
return delete $this->{linkrefs}, delete $this->{blocks};
153
}
154
155
# The $force_loose_test parameter is used when we are actually starting a new
156
# block. In that case, or if we are actually after a paragraph, then we possibly
157
# convert the enclosing list to a loose list.
158
# TODO: the logic to decide if a list is loose is extremely complex and split in
159
# many different place. It would be great to rewrite it in a simpler way.
160
sub _finalize_paragraph {
161
125744
125744
297783
my ($this, $force_loose_test) = @_;
162
125744
100
100
198636
if (@{$this->{paragraph}} || $force_loose_test) {
125744
540094
163
87946
100
230659
if ($this->{last_line_was_blank}) {
164
5920
100
100
11670
if (@{$this->{blocks_stack}}
5920
28836
165
&& $this->{blocks_stack}[-1]{block}{type} eq 'list_item') {
166
141
471
$this->{blocks_stack}[-1]{block}{loose} = 1;
167
}
168
}
169
}
170
125744
100
220586
return unless @{$this->{paragraph}};
125744
342421
171
54516
99529
push @{$this->{blocks}}, {type => 'paragraph', content => $this->{paragraph}};
54516
323770
172
54516
138607
$this->{paragraph} = [];
173
54516
113148
return;
174
}
175
176
# Whether the list_item match the most recent list (should we add to the same
177
# list or create a new one).
178
sub _list_match {
179
35780
35780
88373
my ($this, $item) = @_;
180
35780
100
53710
return 0 unless @{$this->{blocks}};
35780
123057
181
25654
69360
my $list = $this->{blocks}[-1];
182
return
183
$list->{type} eq 'list'
184
&& $list->{style} eq $item->{style}
185
25654
100
198326
&& $list->{marker} eq $item->{marker};
186
}
187
188
sub _add_block {
189
37217
37217
86645
my ($this, $block) = @_;
190
37217
100
147591
if ($block->{type} eq 'list_item') {
191
16898
47871
$this->_finalize_paragraph(0);
192
# https://spec.commonmark.org/0.30/#lists
193
16898
100
48633
if ($this->_list_match($block)) {
194
2697
6115
push @{$this->{blocks}[-1]{items}}, $block;
2697
11020
195
2697
100
14256
$this->{blocks}[-1]{loose} ||= $block->{loose};
196
} else {
197
my $list = {
198
type => 'list',
199
style => $block->{style},
200
marker => $block->{marker},
201
start_num => $block->{num},
202
items => [$block],
203
loose => $block->{loose}
204
14201
153311
};
205
14201
28241
push @{$this->{blocks}}, $list;
14201
38130
206
}
207
} else {
208
20319
81524
$this->_finalize_paragraph(1);
209
20319
50802
push @{$this->{blocks}}, $block;
20319
51446
210
}
211
37217
88426
return;
212
}
213
214
# Anything passed to $prefix_re must necessary accept an empty string unless the
215
# block cannot accept lazy continuations. This is a best effort simulation of
216
# the block condition, to be used for some complex multi-line constructs that
217
# are parsed through a single regex.
218
sub _enter_child_block {
219
30946
30946
98172
my ($this, $new_block, $cond, $prefix_re, $forced_next_line) = @_;
220
30946
108332
$this->_finalize_paragraph(1);
221
30946
100
86725
if (defined $forced_next_line) {
222
28448
79015
$this->{forced_line} = $forced_next_line;
223
}
224
30946
207899
push @{$this->{blocks_stack}}, {
225
cond => $cond,
226
block => $new_block,
227
parent_blocks => $this->{blocks},
228
continuation_re => $this->{continuation_re}
229
30946
54201
};
230
30946
613438
$this->{continuation_re} = qr/$this->{continuation_re} $prefix_re/x;
231
30946
87728
$this->{blocks} = [];
232
30946
74593
return;
233
}
234
235
sub _restore_parent_block {
236
26715
26715
62098
my ($this) = @_;
237
# TODO: rename the variables here with something better.
238
26715
40924
my $last_block = pop @{$this->{blocks_stack}};
26715
82154
239
26715
63377
my $block = delete $last_block->{block};
240
# TODO: maybe rename content to blocks here.
241
26715
98511
$block->{content} = $this->{blocks};
242
26715
63657
$this->{blocks} = delete $last_block->{parent_blocks};
243
26715
90685
$this->{continuation_re} = delete $last_block->{continuation_re};
244
26715
83461
$this->_add_block($block);
245
26715
257540
return;
246
}
247
248
# Returns true if $l would be parsed as the continuation of a paragraph in the
249
# context of $this (which is not modified).
250
sub _test_lazy_continuation {
251
30491
30491
90196
my ($this, $l) = @_;
252
30491
100
50972
return unless @{$this->{paragraph}};
30491
126124
253
23804
94662
my $tester = new(ref($this), $this->{pmarkdown}, \'');
254
23804
86241
pos($tester->{md}) = 0;
255
# What is a paragraph depends on whether we already have a paragraph or not.
256
23804
50
52331
$tester->{paragraph} = [@{$this->{paragraph}} ? ('foo') : ()];
23804
135423
257
# We use this field both in the tester and in the actual object when we
258
# matched a lazy continuation.
259
23804
49261
$tester->{is_lazy_continuation} = 1;
260
# We’re ignoring the eol of the original line as it should not affect parsing.
261
23804
79814
$tester->_parse_blocks($l);
262
# BUG: there is a bug here which is that a construct like a fenced code block
263
# or a link ref definition, whose validity depends on more than one line,
264
# might be misclassified. The probability of that is low.
265
23804
100
58677
if (@{$tester->{paragraph}}) {
23804
101525
266
12837
28803
$this->{is_lazy_continuation} = 1;
267
12837
107505
return 1;
268
}
269
10967
82205
return 0;
270
}
271
272
sub _count_matching_blocks {
273
153691
153691
326149
my ($this, $lr) = @_; # $lr is a scalar *reference* to the current line text.
274
153691
327398
$this->{matched_prefix_size} = 0;
275
153691
299308
for my $i (0 .. $#{$this->{blocks_stack}}) {
153691
567745
276
39359
104076
local *::_ = $lr;
277
39359
137107
my $r = $this->{blocks_stack}[$i]{cond}();
278
39359
100
100
214537
$this->{matched_prefix_size} += $r if defined $r && $r > 0; # $r < 0 means match but no prefix.
279
39359
100
192370
return $i unless $r;
280
}
281
133680
242039
return @{$this->{blocks_stack}};
133680
432374
282
}
283
284
sub _all_blocks_match {
285
29495
29495
68517
my ($this, $lr) = @_;
286
29495
48006
return @{$this->{blocks_stack}} == $this->_count_matching_blocks($lr);
29495
92412
287
}
288
289
my $thematic_break_re = qr/^\ {0,3} (?: (?:-[ \t]*){3,} | (_[ \t]*){3,} | (\*[ \t]*){3,} ) $/x;
290
my $block_quotes_re = qr/^ {0,3}>/;
291
my $indented_code_re = qr/^(?: {0,3}\t| {4})/;
292
my $list_item_marker_re = qr/ [-+*] | (?\d{1,9}) (?[.)])/x;
293
my $list_item_re =
294
qr/^ (?\ {0,3}) (?${list_item_marker_re}) (?(?:[ \t].*)?) $/x;
295
my $supported_html_tags = join('|',
296
qw(address article aside base basefont blockquote body caption center col colgroup dd details dialog dir div dl dt fieldset figcaption figure footer form frame frameset h1 h2 h3 h4 h5 h6 head header hr html iframe legend li link main menu menuitem nav noframes ol optgroup option p param search section summary table tbody td tfoot th thead title tr track ul)
297
);
298
299
my $directive_name_re = qr/(? [-\w]+ )?/x;
300
my $directive_content_re = qr/(?: \s* \[ (? [^\]]+ ) \] )?/x;
301
my $directive_attribute_re = qr/(?: \s* \{ (? .* ) \} )?/x;
302
my $directive_data_re = qr/${directive_name_re} ${directive_content_re} ${directive_attribute_re}/x;
303
my $directive_block_re = qr/^\ {0,3} (? :{3,} ) \s* ${directive_data_re} \s* :* \s* $/x;
304
305
# TODO: Share these regex with the Inlines.pm file that has a copy of them.
306
my $html_tag_name_re = qr/[a-zA-Z][-a-zA-Z0-9]*/;
307
my $html_attribute_name_re = qr/[a-zA-Z_:][-a-zA-Z0-9_.:]*/;
308
# We include new lines in these regex as the spec mentions them, but we can’t
309
# match them for now as the regex will see lines one at a time.
310
my $html_space_re = qr/\n[ \t]*|[ \t][ \t]*\n?[ \t]*/; # Spaces, tabs, and up to one line ending.
311
my $opt_html_space_re = qr/[ \t]*\n?[ \t]*/; # Optional spaces.
312
my $html_attribute_value_re = qr/ [^ \t\n"'=<>`]+ | '[^']*' | "[^"]*" /x;
313
my $html_attribute_re =
314
qr/ ${html_space_re} ${html_attribute_name_re} (?: ${opt_html_space_re} = ${opt_html_space_re} ${html_attribute_value_re} )? /x;
315
my $html_open_tag_re =
316
qr/ < ${html_tag_name_re} ${html_attribute_re}* ${opt_html_space_re} \/? > /x;
317
my $html_close_tag_re = qr/ <\/ ${html_tag_name_re} ${opt_html_space_re} > /x;
318
319
# Parse at least one line of text to build a new block; and possibly several
320
# lines, depending on the block type.
321
# https://spec.commonmark.org/0.30/#blocks-and-inlines
322
our $l; # global variable, localized during the call to _parse_blocks.
323
324
sub _parse_blocks { ## no critic (RequireArgUnpacking)
325
148694
148694
280678
my $this = shift @_;
326
# TODO do the localization in process to avoid the copy (but this will need
327
# change in the continuation tester).
328
148694
329867
local $l = shift @_; ## no critic (ProhibitLocalVars)
329
330
148694
100
441377
if (!$this->{skip_next_block_matching}) {
331
124196
481566
my $matched_block = $this->_count_matching_blocks(\$l);
332
124196
100
261869
if (@{$this->{blocks_stack}} > $matched_block) {
124196
425425
333
18265
56651
$this->_finalize_paragraph();
334
18265
32707
while (@{$this->{blocks_stack}} > $matched_block) {
37871
110304
335
19606
53628
$this->_restore_parent_block();
336
}
337
}
338
} else {
339
24498
48928
$this->{skip_next_block_matching} = 0;
340
}
341
342
# There are two different cases. The first one, handled here, is when we have
343
# multiple blocks inside a list item separated by a blank line. The second
344
# case (when the list items themselves are separated by a blank line) is
345
# handled when parsing the list item itself (based on the last_line_was_blank
346
# setting).
347
148694
100
439775
if ($this->{last_line_is_blank}) {
348
14906
100
100
28540
if (@{$this->{blocks_stack}}
14906
67872
349
&& $this->{blocks_stack}[-1]{block}{type} eq 'list_item') {
350
# $this->{blocks_stack}[-1]{block}{loose} = 1;
351
}
352
}
353
148694
363038
$this->{last_line_was_blank} = $this->{last_line_is_blank};
354
148694
370880
$this->{last_line_is_blank} = 0;
355
356
148694
50
100
366229
_do_atx_heading($this)
100
100
100
100
100
100
100
100
100
100
100
100
66
357
|| ($this->get_use_setext_headings && _do_setext_heading($this))
358
# Thematic breaks are described first in the spec, but the setext headings has
359
# precedence in case of conflict, so we test for the break after the heading.
360
|| _do_thematic_break($this)
361
|| _do_indented_code_block($this)
362
|| ($this->get_use_fenced_code_blocks && _do_fenced_code_block($this))
363
|| _do_html_block($this)
364
|| _do_block_quotes($this)
365
|| _do_list_item($this)
366
|| _do_directive_block($this)
367
|| _do_link_reference_definition($this)
368
|| ($this->get_use_table_blocks && _do_table_block($this))
369
|| _do_paragraph($this)
370
|| croak "Current line could not be parsed as anything: $l";
371
148694
797446
return;
372
}
373
374
sub _load_yaml_module {
375
1
1
4
my ($module_name) = @_;
376
1
50
202
if (!eval "require $module_name; 1") { ## no critic (BuiltinFunctions::ProhibitStringyEval)
377
0
0
croak "Cannot load module $module_name: ${EVAL_ERROR}";
378
}
379
1
20
return;
380
}
381
382
sub _call_yaml_parser {
383
9
9
112
my ($this, $yaml) = @_;
384
9
59
my $parser = $this->get_yaml_parser;
385
9
50
my $metadata;
386
9
100
33
36
if ($parser eq 'YAML::Tiny') {
50
387
8
22
return eval { YAML::Tiny->read_string($yaml)->[0] };
8
71
388
} elsif ($parser eq 'YAML::PP' || $parser eq 'YAML::PP::LibYAML') {
389
1
6
_load_yaml_module($parser);
390
1
3
return eval { ($parser->new()->load_string($yaml))[0] };
1
10
391
}
392
0
0
croak "Unsupported YAML parser: $parser";
393
}
394
395
sub _parse_yaml_metadata {
396
4235
4235
11274
my ($this) = @_;
397
398
# At this point, pos(md) is guaranteed to be 0.
399
4235
100
25628
my $line_re = $this->get_yaml_file_metadata_allows_empty_lines ? qr/.*\n/ : qr/.+\n/;
400
4235
100
38970
if ($this->{md} =~ m/ ^ ---\n (? (?: $line_re )+? ) (?: --- | \.\.\. ) \n /gxc) { ## no critic (ProhibitUnusedCapture)
401
9
72
my $metadata = $this->_call_yaml_parser($+{YAML});
402
9
100
35971
if ($EVAL_ERROR) {
403
2
12
pos($this->{md}) = 0;
404
2
50
29
carp 'YAML Metadata (Markdown frontmatter) is invalid' if $this->get_warn_for_unused_input();
405
2
21
return;
406
}
407
7
100
56
if (exists($this->{pmarkdown}{hooks}{yaml_metadata})) {
408
5
39
$this->{pmarkdown}{hooks}{yaml_metadata}->($metadata);
409
}
410
}
411
4232
65166
return;
412
}
413
414
# https://spec.commonmark.org/0.30/#atx-headings
415
sub _do_atx_heading {
416
148694
148694
322437
my ($this) = @_;
417
148694
100
552271
if ($l =~ /^ \ {0,3} (\#{1,6}) (?:[ \t]+(.+?))?? (?:[ \t]+\#+)? [ \t]* $/x) {
418
# Note: heading breaks can interrupt a paragraph or a list
419
# TODO: the content of the header needs to be interpreted for inline content.
420
489
100
5747
$this->_add_block({
421
type => 'heading',
422
level => length($1),
423
content => $2 // '',
424
debug => 'atx'
425
});
426
489
1668
return 1;
427
}
428
148205
1160572
return;
429
}
430
431
# https://spec.commonmark.org/0.30/#setext-headings
432
sub _do_setext_heading {
433
103113
103113
204402
my ($this) = @_;
434
103113
100
957383
return unless $l =~ /^ {0,3}(-+|=+)[ \t]*$/;
435
1679
100
66
3662
if ( !@{$this->{paragraph}}
1679
100
10942
436
|| indent_size($this->{paragraph}[0]) >= 4
437
|| $this->{is_lazy_continuation}) {
438
1136
7453
return;
439
}
440
# TODO: this should not interrupt a list if the heading is just one -
441
543
10462
my $c = substr $1, 0, 1;
442
543
1230
my $p = $this->{paragraph};
443
543
5011
my $m = $this->get_multi_lines_setext_headings;
444
543
100
66
4616
if ($m eq 'single_line' && @{$p} > 1) {
1
100
66
3
100
445
1
3
my $last_line = pop @{$p};
1
2
446
1
18
$this->_finalize_paragraph();
447
1
3
$p = [$last_line];
448
} elsif ($m eq 'break' && $l =~ m/${thematic_break_re}/) {
449
1
8
$this->_finalize_paragraph();
450
1
6
$this->_add_block({type => 'break', debug => 'setext_as_break'});
451
1
4
return 1;
452
} elsif ($m eq 'ignore') {
453
# TODO: maybe we should just do nothing and return 0 here.
454
1
2
push @{$this->{paragraph}}, $l;
1
3
455
1
3
return 1;
456
}
457
541
1430
$this->{paragraph} = [];
458
541
100
5082
$this->_add_block({
459
type => 'heading',
460
level => ($c eq '=' ? 1 : 2),
461
content => $p,
462
debug => 'setext'
463
});
464
541
3067
return 1;
465
}
466
467
# https://spec.commonmark.org/0.30/#thematic-breaks
468
sub _do_thematic_break {
469
147662
147662
306287
my ($this) = @_;
470
147662
100
1307606
if ($l !~ /${thematic_break_re}/) {
471
147330
818480
return;
472
}
473
332
7929
$this->_add_block({type => 'break', debug => 'native_break'});
474
332
1773
return 1;
475
}
476
477
# https://spec.commonmark.org/0.30/#indented-code-blocks
478
sub _do_indented_code_block {
479
147330
147330
292202
my ($this) = @_;
480
# Indented code blocks cannot interrupt a paragraph.
481
147330
100
100
271895
if (@{$this->{paragraph}} || $l !~ m/${indented_code_re}/) {
147330
905691
482
146279
1189914
return;
483
}
484
1051
11702
my $convert_tabs = $this->get_code_blocks_convert_tabs_to_spaces;
485
1051
100
13219
tabs_to_space($l, $this->{matched_prefix_size}) if $convert_tabs;
486
1051
4434
my @code_lines = scalar(remove_prefix_spaces(4, $l.$this->line_ending()));
487
1051
2710
my $count = 1; # The number of lines we have read
488
1051
1954
my $valid_count = 1; # The number of lines we know are in the code block.
489
1051
3635
my $valid_pos = $this->get_pos();
490
1051
3189
while (defined (my $nl = $this->next_line())) {
491
1046
100
3975
if ($this->_all_blocks_match(\$nl)) {
492
808
1499
$count++;
493
808
100
6561
if ($nl =~ m/${indented_code_re}/) {
100
494
69
235
$valid_pos = $this->get_pos();
495
69
163
$valid_count = $count;
496
69
100
321
tabs_to_space($nl, $this->{matched_prefix_size}) if $convert_tabs;
497
69
235
push @code_lines, scalar(remove_prefix_spaces(4, $nl.$this->line_ending()));
498
} elsif ($nl eq '') {
499
151
533
push @code_lines, scalar(remove_prefix_spaces(4, $nl.$this->line_ending(), !$convert_tabs));
500
} else {
501
588
1680
last;
502
}
503
} else {
504
238
828
last;
505
}
506
}
507
1051
3335
splice @code_lines, $valid_count;
508
1051
4293
$this->set_pos($valid_pos);
509
1051
4240
my $code = join('', @code_lines);
510
1051
7165
$this->_add_block({type => 'code', content => $code, debug => 'indented'});
511
1051
5140
return 1;
512
}
513
514
# https://spec.commonmark.org/0.30/#fenced-code-blocks
515
sub _do_fenced_code_block {
516
109271
109271
211433
my ($this) = @_;
517
109271
100
943184
return unless $l =~ /^ (?\ {0,3}) (?`{3,}|~{3,}) [ \t]* (?.*?) [ \t]* $/x; ## no critic (ProhibitComplexRegexes)
518
10571
95043
my $f = substr $+{fence}, 0, 1;
519
10571
100
100
92654
if ($f eq '`' && index($+{info}, '`') != -1) {
520
1579
9637
return;
521
}
522
8992
36214
my $fl = length($+{fence});
523
8992
36817
my $info = $+{info};
524
8992
37783
my $indent = length($+{indent});
525
# This is one of the few case where we need to process character escaping
526
# outside of the full inlines rendering process.
527
# TODO: Consider if it would be cleaner to do it inside the render_html method.
528
8992
25544
$info =~ s/\\(\p{PosixPunct})/$1/g;
529
# The spec does not describe what we should do with fenced code blocks inside
530
# other containers if we don’t match them.
531
8992
15548
my @code_lines; # The first line is not part of the block.
532
8992
17959
my $end_fence_seen = 0;
533
8992
47392
my $start_pos = $this->get_pos();
534
8992
24854
while (defined (my $nl = $this->next_line())) {
535
23802
100
73715
if ($this->_all_blocks_match(\$nl)) {
536
22915
100
179387
if ($nl =~ m/^ {0,3}${f}{$fl,}[ \t]*$/) {
537
443
1029
$end_fence_seen = 1;
538
443
1316
last;
539
} else {
540
# We’re adding one line to the fenced code block
541
22472
71066
push @code_lines, scalar(remove_prefix_spaces($indent, $nl.$this->line_ending()));
542
}
543
} else {
544
# We’re out of our enclosing block and we haven’t seen the end of the
545
# fence. If we accept enclosed fence, then that last line must be tried
546
# again (and, otherwise, we will start again from start_pos).
547
887
100
6262
$this->redo_line() if !$this->get_fenced_code_blocks_must_be_closed;
548
887
2476
last;
549
}
550
}
551
552
8992
100
100
63726
if (!$end_fence_seen && $this->get_fenced_code_blocks_must_be_closed) {
553
3658
14435
$this->set_pos($start_pos);
554
3658
23626
return;
555
}
556
5334
21375
my $code = join('', @code_lines);
557
5334
41829
$this->_add_block({
558
type => 'code',
559
content => $code,
560
info => $info,
561
debug => 'fenced'
562
});
563
5334
49413
return 1;
564
}
565
566
# https://spec.commonmark.org/0.31.2/#html-blocks
567
sub _do_html_block {
568
140945
140945
314983
my ($this) = @_;
569
# HTML blocks can interrupt a paragraph.
570
# TODO: add an option so that they don’t interrupt a paragraph (make it be
571
# the default?).
572
# TODO: PERF: test that $l =~ m/^ {0,3} to short circuit all these regex.
573
140945
259276
my $html_end_condition;
574
140945
100
100
1596738
if ($l =~ m/ ^\ {0,3} < (?:pre|script|style|textarea) (?:\ |\t|>|$) /x) {
100
100
100
100
100
100
575
16
69
$html_end_condition = qr/ <\/ (?:pre|script|style|textarea) > /x;
576
} elsif ($l =~ m/^ {0,3}/;
578
} elsif ($l =~ m/^ {0,3}<\?/) {
579
2
13
$html_end_condition = qr/\?>/;
580
} elsif ($l =~ m/^ {0,3}
581
2
9
$html_end_condition = qr/=>/;
582
} elsif ($l =~ m/^ {0,3}
583
2
45
$html_end_condition = qr/]]>/;
584
} elsif ($l =~ m/^\ {0,3} < \/? (?:${supported_html_tags}) (?:\ |\t|\/?>|$) /x) {
585
2691
12011
$html_end_condition = qr/^$/; ## no critic (ProhibitFixedStringMatches)
586
138222
1037132
} elsif (!@{$this->{paragraph}}
587
&& $l =~ m/^\ {0,3} (?: ${html_open_tag_re} | ${html_close_tag_re} ) [ \t]* $ /x) {
588
# TODO: the spec seem to say that the tag can take more than one line, but
589
# this is not tested, so we don’t implement this for now.
590
23
101
$html_end_condition = qr/^$/; ## no critic (ProhibitFixedStringMatches)
591
}
592
# TODO: Implement rule 7 about any possible tag.
593
140945
100
441873
if (!$html_end_condition) {
594
138199
654745
return;
595
}
596
# TODO: see if some code could be shared with the code blocks
597
2746
10879
my @html_lines = $l.$this->line_ending();
598
# TODO: add an option to not parse a tag if it’s closing condition is never
599
# seen.
600
2746
100
29844
if ($l !~ m/${html_end_condition}/) {
601
# The end condition can occur on the opening line.
602
2736
13127
while (defined (my $nl = $this->next_line())) {
603
4647
100
14707
if ($this->_all_blocks_match(\$nl)) {
604
4026
100
32471
if ($nl !~ m/${html_end_condition}/) {
100
605
3267
9331
push @html_lines, $nl.$this->line_ending();
606
} elsif ($nl eq '') {
607
# This can only happen for rules 6 and 7 where the end condition
608
# line is not part of the HTML block.
609
743
3282
$this->redo_line();
610
743
2375
last;
611
} else {
612
16
54
push @html_lines, $nl.$this->line_ending();
613
16
49
last;
614
}
615
} else {
616
621
2737
$this->redo_line();
617
621
1923
last;
618
}
619
}
620
}
621
2746
11660
my $html = join('', @html_lines);
622
2746
14839
remove_disallowed_tags($html, $this->get_disallowed_html_tags);
623
2746
17950
$this->_add_block({type => 'html', content => $html});
624
2746
20281
return 1;
625
}
626
627
# https://spec.commonmark.org/0.30/#block-quotes
628
sub _do_block_quotes {
629
138199
138199
286422
my ($this) = @_;
630
138199
100
1022649
return unless $l =~ /${block_quotes_re}/;
631
# TODO: handle laziness (block quotes where the > prefix is missing)
632
my $cond = sub {
633
27129
100
27129
208170
if (s/(${block_quotes_re})/' ' x length($1)/e) {
14056
113645
634
# We remove the '>' character that we replaced by a space, and the
635
# optional space after it. We’re using this approach to correctly handle
636
# the case of a line like '>\t\tfoo' where we need to retain the 6
637
# spaces of indentation, to produce a code block starting with two
638
# spaces.
639
14056
27719
my $m;
640
14056
68613
($_, $m) = remove_prefix_spaces(length($1) + 1, $_);
641
# Returns the matched horizontal size.
642
14056
71064
return $m;
643
}
644
13073
41046
return $this->_test_lazy_continuation($_);
645
11986
91238
};
646
{
647
11986
22962
local *::_ = \$l;
11986
39229
648
11986
39106
$this->{matched_prefix_size} += $cond->();
649
}
650
11986
35504
$this->{skip_next_block_matching} = 1;
651
11986
103728
$this->_enter_child_block({type => 'quotes'}, $cond, qr/ {0,3}(?:> ?)?/, $l);
652
11986
77197
return 1;
653
}
654
655
# https://spec.commonmark.org/0.30/#list-items
656
sub _do_list_item {
657
126213
126213
322314
my ($this) = @_;
658
126213
100
1104425
return unless $l =~ m/${list_item_re}/;
659
# There is a note in the spec on thematic breaks that are not list items,
660
# it’s not exactly clear what is intended, and there are no examples.
661
24449
494140
my ($indent_outside, $marker, $text, $digits, $symbol) = @+{qw(indent marker text digits symbol)};
662
24449
124789
my $indent_marker = length($indent_outside) + length($marker);
663
24449
100
136230
my $type = $marker =~ m/[-+*]/ ? 'ul' : 'ol';
664
# The $indent_marker is passed in case the text starts with tabs, to properly
665
# compute the tab stops. This is better than nothing but won’t work inside
666
# other container blocks. In all cases, using tabs instead of space should not
667
# be encouraged.
668
24449
131073
my $text_indent = indent_size($text, $indent_marker + $this->{matched_prefix_size});
669
# When interrupting a paragraph, the rules are stricter.
670
24449
152260
my $mode = $this->get_lists_can_interrupt_paragraph;
671
24449
100
58587
if (@{$this->{paragraph}}) {
24449
83603
672
13016
100
40047
return if $mode eq 'never';
673
13010
100
100
47383
if ($mode eq 'within_list'
100
674
&& !(@{$this->{blocks_stack}} && $this->{blocks_stack}[-1]{block}{type} eq 'list_item')) {
675
5481
33280
return;
676
}
677
7529
100
100
75923
if ($mode eq 'strict' && ($text eq '' || ($type eq 'ol' && $digits != 1))) {
100
678
80
562
return;
679
}
680
}
681
18882
50
66
84927
return if $text ne '' && $text_indent == 0;
682
# in the current implementation, $text_indent is enough to know if $text
683
# is matching $indented_code_re, but let’s not depend on that.
684
18882
83497
my $first_line_blank = $text =~ m/^[ \t]*$/;
685
18882
100
106484
my $discard_text_indent = $first_line_blank || indented(4 + 1, $text); # 4 + 1 is an indented code block, plus the required space after marker.
686
18882
100
49336
my $indent_inside = $discard_text_indent ? 1 : $text_indent;
687
18882
37716
my $indent = $indent_inside + $indent_marker;
688
my $cond = sub {
689
23957
100
100
23957
96729
if ($first_line_blank && m/^[ \t]*$/) {
690
# A list item can start with at most one blank line
691
288
1006
return 0;
692
} else {
693
23669
44238
$first_line_blank = 0;
694
}
695
23669
100
159845
if (indent_size($_) >= $indent) {
696
538
1886
$_ = remove_prefix_spaces($indent, $_);
697
# Returns the matched horizontal size.
698
538
1876
return $indent;
699
}
700
# TODO: we probably don’t need to test the list_item_re case here, just
701
# the lazy continuation and the emptiness is enough.
702
23131
100
254879
return (!m/${list_item_re}/ && $this->_test_lazy_continuation($_))
703
|| $_ eq '';
704
18882
118856
};
705
18882
40356
my $forced_next_line = undef;
706
18882
100
50875
if (!$first_line_blank) {
707
# We are doing a weird compensation for the fact that we are not
708
# processing the condition and to correctly handle the case where the
709
# list marker was followed by tabs.
710
16462
96323
$forced_next_line = remove_prefix_spaces($indent, (' ' x $indent_marker).$text);
711
16462
77938
$this->{matched_prefix_size} = $indent;
712
16462
49773
$this->{skip_next_block_matching} = 1;
713
}
714
# Note that we are handling the creation of the lists themselves in the
715
# _add_block method. See https://spec.commonmark.org/0.30/#lists for
716
# reference.
717
# TODO: handle tight and loose lists.
718
18882
66
178911
my $item = {
100
719
type => 'list_item',
720
style => $type,
721
marker => $symbol // $marker,
722
num => int($digits // 1),
723
};
724
$item->{loose} =
725
18882
100
62191
$this->_list_match($item) && $this->{last_line_was_blank};
726
18882
354964
$this->_enter_child_block($item, $cond, qr/ {0,${indent}}/, $forced_next_line);
727
18882
187031
return 1;
728
}
729
730
# https://talk.commonmark.org/t/generic-directives-plugins-syntax/444
731
# See also https://github.com/mkende/pmarkdown/issues/5
732
sub _do_directive_block {
733
107331
107331
245553
my ($this) = @_;
734
107331
100
558021
return unless $this->get_use_directive_blocks();
735
# marker, name, content, attributes
736
32669
100
276365
return unless $l =~ /${directive_block_re}/; # TODO: add an option to allow this block type.
737
78
828
my $lm = length($+{marker});
738
my $cond = sub {
739
259
100
259
3781
if (m/^\ {0,3} :{$lm,} \s* $/x) {
740
12
30
$_ = '';
741
12
30
return 0;
742
}
743
247
617
return -1;
744
78
564
};
745
# At rendering time, a hook should be able to intercept the name and
746
# attributes of the directive to do fancy things with it.
747
$this->_enter_child_block({
748
type => 'directive',
749
name => $+{name},
750
inline => $+{content},
751
attributes => $+{attributes}
752
},
753
78
1629
$cond,
754
qr/ {0,3}/); # Unclear if we need the continuation prefix here.
755
78
593
return 1;
756
}
757
758
# https://spec.commonmark.org/0.31.2/#link-reference-definitions
759
sub _do_link_reference_definition {
760
107253
107253
262901
my ($this) = @_;
761
# Link reference definitions cannot interrupt paragraphs
762
#
763
# This construct needs to be parsed across multiple lines, so we are directly
764
# using the {md} string rather than our parsed $l line
765
# TODO: another maybe much simpler approach would be to parse the block as a
766
# normal paragraph but immediately try to parse the content as a link
767
# reference definition (and otherwise to keep it as a normal paragraph).
768
# That would allow to use the higher lever InlineTree parsing constructs.
769
107253
100
100
193560
return if @{$this->{paragraph}} || $l !~ m/^ {0,3}\[/;
107253
1007770
770
2803
8470
my $last_pos = $this->{last_pos};
771
2803
10338
my $init_pos = $this->get_pos();
772
2803
15944
$this->redo_line();
773
2803
7851
my $start_pos = $this->get_pos();
774
775
# We consume the continuation prefix of enclosing blocks. Note that in the big
776
# regex we allow any number of space after the continuation because it’s what
777
# cmark does.
778
2803
6232
my $cont = $this->{continuation_re};
779
2803
50
46796
confess 'Unexpected regex match failure' unless $this->{md} =~ m/\G${cont}/g;
780
781
# TODO:
782
# - Support for escaped or balanced parenthesis in naked destination
783
# - break this up in smaller pieces and test them independently.
784
# - The need to disable ProhibitUnusedCapture seems to be buggy...
785
# - most of the regex parses only \n and not other eol sequence. The regex
786
# should either be fixed or the entry be normalized.
787
## no critic (ProhibitComplexRegexes, ProhibitUnusedCapture)
788
2803
100
285688
if (
789
$this->{md} =~ m/\G
790
\ {0,3} \[
791
(?>(? # The link label (in square brackets), matched as an atomic group
792
(?:
793
[^\\\[\]]{0,100} (?:(?:\\\\)* \\ .)? # The label cannot contain unescaped ]
794
# With 5.38 this could be (?(*{ ...}) (*FAIL)) which will be more efficient.
795
1831
18151
(*COMMIT) (?(?{ pos() > $start_pos + 1004 }) (*FAIL) ) # As our block can be repeated, we prune the search when we are far enough.
796
)+
797
)) \]:
798
[ \t]* (?:\n ${cont})? [ \t]* # optional spaces and tabs with up to one line ending
799
(?>(? # the destination can be either:
800
< (?: [^\n>]* (? # - enclosed in <> and containing no unescaped >
801
| [^< [:cntrl:]] [^ [:cntrl:]]* # - not enclosed but cannot contains spaces, new lines, etc. and only balanced or escaped parenthesis
802
))
803
(?:
804
# Note that this is an atomic pattern so that we don’t backtrack in it
805
# (so the pattern must not erroneously accept one of its branch).
806
(?> [ \t]* (?:\n ${cont}) [ \t]* | [ \t]+ ) # The spec says that spaces must be present here, but it seems that a new line is fine too.
807
(? # The title can be between ", ' or (). The matching characters can’t appear unescaped in the title
808
" (:?[^\n"]* (?: (?
809
| ' (:?[^\n']* (?: (?
810
| \( (:?[^\n"()]* (?: (?
811
)
812
)?
813
[ \t]*(:?\r\n|\n|\r|$) # The spec says that no characters can occur after the title, but it seems that whitespace is tolerated.
814
/gx
815
## use critic
816
) {
817
202
3443
my ($ref, $target, $title) = @LAST_PAREN_MATCH{qw(LABEL TARGET TITLE)};
818
202
1541
$ref = normalize_label($ref);
819
202
100
839
if ($ref ne '') {
820
194
918
$this->_finalize_paragraph(1);
821
# TODO: option to keep the last appearance instead of the first one.
822
194
100
718
if (exists $this->{linkrefs}{$ref}) {
823
# We keep the first appearance of a label.
824
# TODO: mention the link label.
825
4
50
35
carp 'Only the first appearance of a link reference definition is kept'
826
if $this->get_warn_for_unused_input();
827
4
24
return 1;
828
}
829
190
100
563
if (defined $title) {
830
71
507
$title =~ s/^.(.*).$/$1/s;
831
71
257
_unescape_char($title);
832
}
833
190
8617
$target =~ s/^<(.*)>$/$1/;
834
190
820
_unescape_char($target);
835
190
100
6451
$this->{linkrefs}{$ref} = {
836
target => $target,
837
(defined $title ? ('title' => $title) : ())
838
};
839
190
1144
return 1;
840
}
841
#pass-through intended;
842
}
843
2609
14161
$this->set_pos($init_pos, $last_pos);
844
2609
29140
return;
845
}
846
847
# https://github.github.com/gfm/#tables-extension-
848
sub _do_table_block {
849
53886
53886
111222
my ($this) = @_;
850
851
# TODO: add an option to prevent interrupting a paragraph with a table (and
852
# make it be true for pmarkdown, but not for github where tables can interrupt
853
# a paragraph).
854
# TODO: github supports omitting the first | even on the first line when we
855
# are not interrupting a paragraph and when subsequent the delimiter line has
856
# more than one dash per cell.
857
53886
77698
my $i = !!@{$this->{paragraph}};
53886
156914
858
53886
100
100
262017
return if $i && !$this->get_table_blocks_can_interrupt_paragraph;
859
37053
166160
my $m = $this->get_table_blocks_pipes_requirements;
860
# The tests here are quite lenient and there are many ways in which parsing
861
# the table can fail even if these tests pass.
862
37053
100
66
193325
if ($m eq 'strict' || ($m eq 'loose' && $i)) {
100
863
24597
100
180070
return unless $l =~ m/^ {0,3}\|/;
864
} else {
865
12456
100
127590
return unless $l =~ m/ (?
866
}
867
5245
13572
my $last_pos = $this->{last_pos};
868
5245
33716
my $init_pos = $this->get_pos();
869
5245
21068
$this->redo_line();
870
5245
18816
my $table = $this->_parse_table_structure();
871
5245
100
16636
if (!$table) {
872
5237
19868
$this->set_pos($init_pos, $last_pos);
873
5237
37428
return;
874
}
875
876
8
79
$this->_add_block({type => 'table', content => $table});
877
878
8
48
return 1;
879
}
880
881
sub _parse_table_structure { ## no critic (ProhibitExcessComplexity)
882
5245
5245
12505
my ($this) = @_;
883
884
5245
26480
my $m = $this->get_table_blocks_pipes_requirements;
885
5245
15372
my $i = !!@{$this->{paragraph}};
5245
14174
886
887
# A regexp that matches no back-slashes or an even number of them, so that the
888
# next character cannot be escaped.
889
5245
26602
my $e = qr/(?
890
891
# We consume the continuation prefix of enclosing blocks. Note that,
892
# as opposed to what happens for links, subsequent lines can have at most
893
# 3 more spaces than the initial one with the GitHub implementation (but not
894
# some other GFM implementations).
895
5245
13295
my $cont = $this->{continuation_re};
896
5245
50
112896
confess 'Unexpected regex match failure' unless $this->{md} =~ m/\G${cont}/g;
897
# We want to allow successive 0 length matches. For more details on this
898
# behavior, see:
899
# https://perldoc.perl.org/perlre#Repeated-Patterns-Matching-a-Zero-length-Substring
900
5245
22061
pos($this->{md}) = pos($this->{md});
901
902
# Now we consume the initial | marking the beginning of the table that we know
903
# is here because of the initial match against $l in _do_table_block.
904
5245
50
31580
confess 'Unexpected missing table markers' unless $this->{md} =~ m/\G (\ {0,3}) (\|)?/gcx;
905
906
5245
17037
my $n = length($1) + 3; # Maximum amount of space allowed on subsequent line
907
5245
12476
my $has_pipe = defined $2;
908
909
# We parse the header row
910
5245
140714
my @headers = $this->{md} =~ m/\G [ \t]* (.*? [ \t]* $e) \| /gcx;
911
5245
100
19596
return unless @headers;
912
# We parse the last header if it is not followed by a pipe, and the newline.
913
# The only failure case here is if we have reached the end of the file.
914
4516
100
47596
return unless $this->{md} =~ m/\G [ \t]* (.+)? [ \t]* ${eol_re} /gcx;
915
3932
100
12973
if (defined $1) {
916
3486
11676
push @headers, $1;
917
3486
8354
$has_pipe = 0;
918
}
919
920
3932
100
100
41578
return if ($m eq 'strict' || ($m eq 'loose' && $i) || @headers == 1) && !$has_pipe;
100
921
922
# We consume the continuation marker at the beginning of the delimiter row.
923
3257
50
94015
return unless $this->{md} =~ m/\G ${cont} \ {0,$n} (\|)? /gx;
924
925
3257
100
14010
$has_pipe &&= defined $1;
926
927
3257
17227
my @separators = $this->{md} =~ m/\G [ \t]* ( :? -+ :? ) [ \t]* \| /gcx;
928
3257
100
46313
return unless $this->{md} =~ m/\G [ \t]* (:? -+ :?)? [ \t]* (:? ${eol_re} | $ ) /gcx;
929
616
100
2906
if (defined $1) {
930
73
227
push @separators, $1;
931
73
139
$has_pipe = 0;
932
}
933
616
100
4283
return unless @separators == @headers;
934
my @align =
935
10
100
44
map { s/^(:)?-+(:)?$/ $1 ? ($2 ? 'center' : 'left') : ($2 ? 'right' : '') /er } @separators;
18
100
140
18
100
278
936
937
10
100
66
250
return if ($m eq 'strict' || ($m eq 'loose' && $i) || @headers == 1) && !$has_pipe;
100
938
9
50
66
105
return if $m ne 'lax' && @headers == 1 && !$has_pipe;
66
939
9
100
66
147
return if $m ne 'lax' && !$has_pipe && min(map { length } @separators) < 2;
6
100
1034
940
941
# And now we try to read as many lines as possible
942
8
28
my @table;
943
8
17
while (1) {
944
17
100
75
last if pos($this->{md}) == length($this->{md});
945
12
50
210
last unless $this->{md} =~ m/\G ${cont} \ {0,$n} (\|)? /gcx;
946
# TODO: use a simulator to decide whether we are entering a new block-level
947
# structure, rather than using this half baked regex.
948
12
100
79
$has_pipe &&= defined $1;
949
12
100
100
301
last if !defined $1 && $this->{md} =~ m/\G (?: [ ] | > | ${list_item_marker_re} )/x;
950
10
169
my @cells = $this->{md} =~ m/\G [ \t]* (.*? [ \t]* $e) \| /gcx;
951
10
38
pos($this->{md}) = pos($this->{md});
952
confess 'Unexpected match failure'
953
10
50
160
unless $this->{md} =~ m/\G [ \t]* (.+)? [ \t]* (?: ${eol_re} | $ ) /gcx;
954
10
100
33
if (defined $1) {
955
2
6
push @cells, $1;
956
2
5
$has_pipe = 0;
957
}
958
# There is a small bug when we exit here which is that we have consumed a
959
# blank line. The only case where it would matter would be to decide whether
960
# a list is loose or not, which is a pretty "edge" case with tables.
961
# Otherwise, we will start a new paragraph in all cases.
962
10
100
33
last unless @cells;
963
9
100
24
if (@cells != @headers) {
964
3
11
$#cells = @headers - 1;
965
# TODO: mention the line number in the file (if possible to track
966
# correctly).
967
3
50
33
17
carp 'Excess cells in table row are ignored'
968
if @cells > @headers && $this->get_warn_for_unused_input();
969
}
970
# Pipes need to be escaped inside table, and we need to unescape them here
971
# in case one would appear in a code block for example (where the escaping
972
# would appear literally otherwise). Given that pipes don’t have other
973
# meaning by default, there is not a big risk to do that (and this is
974
# mandated) by the GitHub Spec anyway.
975
9
100
28
push @table, [map { defined ? s/($e)\\\|/${1}|/gr : undef } @cells];
16
150
976
}
977
978
8
111
return {headers => \@headers, align => \@align, table => \@table};
979
}
980
981
# https://spec.commonmark.org/0.30/#paragraphs
982
sub _do_paragraph {
983
107051
107051
230034
my ($this) = @_;
984
# We need to test for blank lines here (not just emptiness) because after we
985
# have removed the markers of container blocks our line can become empty. The
986
# fact that we need to do this, seems to imply that we don’t really need to
987
# check for emptiness when initially building $l.
988
# TODO: check if the blank-line detection in next_line() is needed or not.
989
107051
100
469356
if ($l !~ m/^[ \t]*$/) {
990
85493
162549
push @{$this->{paragraph}}, $l;
85493
303092
991
85493
301978
return 1;
992
}
993
994
# https://spec.commonmark.org/0.30/#blank-lines
995
# if ($l eq '')
996
21558
85843
$this->_finalize_paragraph();
997
# Needed to detect loose lists. But ignore blank lines when they are inside
998
# block quotes
999
$this->{last_line_is_blank} =
1000
21558
100
72029
!@{$this->{blocks_stack}} || $this->{blocks_stack}[-1]{block}{type} ne 'quotes';
1001
21558
81015
return 1;
1002
}
1003
1004
sub _unescape_char {
1005
# TODO: configure the set of escapable character. Note that this regex is
1006
# shared with Inlines.pm process_char_escaping.
1007
261
261
728
$_[0] =~ s/\\(\p{PosixPunct})/$1/g;
1008
261
592
return;
1009
}
1010
1011
1;