File Coverage

blib/lib/Markdent/Dialect/Theory/BlockParser.pm

Criterion	Covered	Total	%
statement	184	186	98.9
branch	57	66	86.3
condition	15	22	68.1
subroutine	27	27	100.0
pod			n/a
total	283	301	94.0

line	stmt	bran	cond	sub	time	code
1						package Markdent::Dialect::Theory::BlockParser;
2
3	4			4	251709	use strict;
	4				19
	4				152
4	4			4	23	use warnings;
	4				14
	4				107
5	4			4	528	use namespace::autoclean;
	4				19962
	4				27
6
7						our $VERSION = '0.40';
8
9	4			4	1113	use List::AllUtils qw( insert_after_string sum );
	4				11728
	4				274
10	4			4	2040	use Markdent::Event::StartTable;
	4				18
	4				165
11	4			4	3157	use Markdent::Event::EndTable;
	4				15
	4				154
12	4			4	2285	use Markdent::Event::StartTableHeader;
	4				17
	4				153
13	4			4	2482	use Markdent::Event::EndTableHeader;
	4				18
	4				170
14	4			4	2529	use Markdent::Event::StartTableBody;
	4				15
	4				157
15	4			4	2332	use Markdent::Event::EndTableBody;
	4				18
	4				184
16	4			4	2537	use Markdent::Event::StartTableRow;
	4				17
	4				174
17	4			4	2592	use Markdent::Event::EndTableRow;
	4				18
	4				161
18	4			4	2487	use Markdent::Event::StartTableCell;
	4				18
	4				188
19	4			4	2355	use Markdent::Event::EndTableCell;
	4				21
	4				206
20	4			4	639	use Markdent::Regexes qw( $HorizontalWS $EmptyLine $BlockStart $BlockEnd );
	4				11
	4				726
21	4			4	34	use Markdent::Types;
	4				9
	4				82
22
23	4			4	104424	use Moose::Role;
	4				13
	4				61
24
25						with 'Markdent::Role::Dialect::BlockParser';
26
27						has _in_table => (
28						traits => ['Bool'],
29						is => 'ro',
30						isa => t('Bool'),
31						default => 0,
32						init_arg => undef,
33						handles => {
34						_enter_table => 'set',
35						_leave_table => 'unset',
36						},
37						);
38
39						around _possible_block_matches => sub {
40						my $orig = shift;
41						my $self = shift;
42
43						my @look_for = $self->$orig();
44
45						return @look_for if $self->_list_level;
46
47						if ( $self->_in_table ) {
48						insert_after_string 'list', 'table_cell', @look_for;
49						}
50						else {
51						insert_after_string 'list', 'table', @look_for;
52						}
53
54						return @look_for;
55						};
56
57						my $TableCaption = qr{ ^
58						$HorizontalWS*
59						\[
60						(.*)
61						\]
62						$HorizontalWS*
63						\n
64						}xm;
65
66						# The use of (?> ... ) in the various regexes below forces the regex engine
67						# not to backtrack once it matches the relevant subsection. Using this where
68						# possible _hugely_ speeds up matching, and seems to be safe. At least, the
69						# tests pass.
70
71						my $PipeRow = qr{ ^
72						[\|]? # optional starting pipe
73						(?:
74						(?:
75						(?>[^\\|\\\n]*) # safe chars (not pipe or escape or newline)
76						\|
77						\\[\|] # an escaped newline
78						)+
79						[\|] # must have at least one pipe
80						)+
81						.* # can have a final cell after the last pipe
82						}xm;
83
84						my $ColonRow = qr{ ^
85						:?
86						(?:
87						(?:
88						(?>[^:\\\n]*)
89						\|
90						\\:
91						)+
92						:
93						)+
94						.*
95						}xm;
96
97						my $TableRow = qr{ (?>$PipeRow) # must have at least one starting row
98						\n
99						(?>
100						(?:
101						$ColonRow
102						\n
103						)* # ... can have 0+ continuation lines
104						)
105						}xm;
106
107						my $HeaderMarkerLine = qr/^[\-\+=]+\n/xm;
108
109						my $TableHeader = qr{ $TableRow
110						$HeaderMarkerLine
111						}xm;
112
113						## no critic (Subroutines::ProhibitUnusedPrivateSubroutines)
114						sub _match_table {
115	29			29	68	my $self = shift;
116	29				52	my $text = shift;
117
118	29	100			52	return unless ${$text} =~ / \G
	29				2847
119						$BlockStart
120						(
121						$TableCaption?
122						$HeaderMarkerLine?
123						($TableHeader+)?
124						(
125						$TableRow
126						(?:
127						$TableRow
128						\|
129						$EmptyLine
130						)*
131						)
132						$HeaderMarkerLine?
133						$TableCaption?
134						)
135						$BlockEnd
136						/xmgc;
137
138	21	50			2567	$self->_debug_parse_result(
139						$1,
140						'table',
141						) if $self->debug;
142
143	21	100			101	my $caption = defined $2 ? $2 : $5;
144
145	21	50	66		272	$self->_debug_parse_result(
146						$caption,
147						'table caption',
148						) if defined $caption && $self->debug;
149
150	21				58	my $header = $3;
151	21				61	my $body = $4;
152
153	21	50			585	$self->_debug_parse_result(
154						$header,
155						'table header',
156						) if $self->debug;
157
158	21	50			563	$self->_debug_parse_result(
159						$body,
160						'table body',
161						) if $self->debug;
162
163	21				44	my @header;
164
165	21	100			70	if ( defined $header ) {
166	17				110	@header = $self->_parse_rows( qr/$HeaderMarkerLine/m, $header );
167	17				54	$_->{is_header_cell} = 1 for map { @{$_} } @header;
	20				45
	20				103
168						}
169
170	21				104	my @body = $self->_parse_rows( qr/\n/, $body );
171
172	21				112	$self->_normalize_cell_count_and_alignments( @header, @body );
173
174	21	100			72	if (@header) {
175	17				42	my $first_header_cell_content = $header[0][0]{content};
176	17	100	66		139	unless ( defined $first_header_cell_content
177						&& $first_header_cell_content =~ /\S/ ) {
178	1				5	$_->[0]{is_header_cell} = 1 for @body;
179						}
180						}
181
182	21				1034	$self->_enter_table;
183
184	21	100			78	my %caption = defined $caption ? ( caption => $caption ) : ();
185	21				118	$self->_send_event( 'StartTable', %caption );
186
187	21	100			665	$self->_events_for_rows( \@header, 'Header' )
188						if @header;
189	21				555	$self->_events_for_rows( \@body, 'Body' );
190
191	21				644	$self->_send_event('EndTable');
192
193	21				632	$self->_leave_table;
194
195	21				554	return 1;
196						}
197						## use critic
198
199						sub _parse_rows {
200	38			38	80	my $self = shift;
201	38				64	my $split_re = shift;
202	38				73	my $rows = shift;
203
204	38				63	my @rows;
205
206	38				369	for my $chunk ( split $split_re, $rows ) {
207
208						# Splitting on an empty string returns nothing, so we need to
209						# special-case that, as we want to preserve empty lines.
210	168	100			659	for my $line ( length $chunk ? ( split /\n/, $chunk ) : $chunk ) {
211	168	100			893	if ( $line =~ /^$HorizontalWS*$/ ) {
		100
212	2				7	push @rows, undef;
213						}
214						elsif ( $self->_is_continuation_line($line) ) {
215
216						# If the $TableRow regex is correct, this shouldn't be
217						# possible.
218	7	50			22	die q{Continuation of a row before we've seen a row start?!}
219						unless @rows;
220
221	7				20	my $cells = $self->_cells_from_line( $line, ':' );
222
223	7				14	for my $i ( 0 .. $#{$cells} ) {
	7				20
224	15	100	66		74	if ( defined $cells->[$i]{content}
225						&& $cells->[$i]{content} =~ /\S/ ) {
226						$rows[-1][$i]{content}
227	9				29	.= "\n" . $cells->[$i]{content};
228	9		100		35	$rows[-1][$i]{colspan} \|\|= 1;
229						}
230						}
231						}
232						else {
233	159				369	push @rows, $self->_cells_from_line( $line, '\|' );
234						}
235						}
236						}
237
238	38				131	return @rows;
239						}
240
241						sub _is_continuation_line {
242	166			166	632	my $self = shift;
243	166				268	my $line = shift;
244
245	166	100			645	return 0
246						if $line =~ /(?<!\\)[\|]/x;
247
248	7	50			39	return 1
249						if $line =~ /(^\|\p{SpaceSeparator}+)(?<!\\):(\p{SpaceSeparator}\|$)/x;
250
251						# a blank line, presumably
252	0				0	return 0;
253						}
254
255						sub _cells_from_line {
256	166			166	261	my $self = shift;
257	166				255	my $line = shift;
258	166				233	my $div = shift;
259
260	166				237	my @row;
261
262	166				335	for my $cell ( $self->_split_cells( $line, $div ) ) {
263	657	100			1449	if ( length $cell ) {
		50
264	642				1204	push @row, $self->_cell_params($cell);
265						}
266
267						# If the first cell is empty, that means the line started with a
268						# divider, and we can ignore the "cell". If we already have cells in
269						# the row, that means we just saw a repeated divider, which means the
270						# most recent cell has a colspan+1.
271						elsif (@row) {
272	15				35	$row[-1]{colspan}++;
273						}
274						}
275
276	166				564	return \@row;
277						}
278
279						sub _split_cells {
280	166			166	238	my $self = shift;
281	166				236	my $line = shift;
282	166				231	my $div = shift;
283
284	166				816	$line =~ s/^\Q$div//;
285	166				1337	$line =~ s/\Q$div\E$HorizontalWS*$/$div/;
286
287						# We don't want to split on a backslash-escaped divider, thus the
288						# lookbehind. The -1 ensures that Perl gives us the trailing empty fields.
289	166				2481	my @cells = split /(?<!\\)\Q$div/, $line, -1;
290
291						# If the line has just one divider as the line-ending, it should not be
292						# treated as marking an empty cell.
293	166	100	66		1365	if ( $cells[-1] eq q{} && $line =~ /\Q$div\E$HorizontalWS*$/ ) {
294	156				1594	pop @cells;
295						}
296
297	166				625	return @cells;
298						}
299
300						sub _cell_params {
301	642			642	968	my $self = shift;
302	642				893	my $cell = shift;
303
304	642				946	my $alignment;
305						my $content;
306
307	642	100	66		2470	if ( defined $cell && $cell =~ /\S/ ) {
308	633				1316	$alignment = $self->_alignment_for_cell($cell);
309
310	633				4242	( $content = $cell ) =~ s/^$HorizontalWS+\|$HorizontalWS+$//g;
311						}
312
313	642				2611	my %p = (
314						colspan => 1,
315						content => $content,
316						);
317
318	642	100			1500	$p{alignment} = $alignment
319						if defined $alignment;
320
321	642				1673	return \%p;
322						}
323
324						sub _alignment_for_cell {
325	633			633	887	my $self = shift;
326	633				848	my $cell = shift;
327
328	633	100			1576	return 'center'
329						if $cell =~ /^\p{SpaceSeparator}{2,}.+?\p{SpaceSeparator}{2,}$/;
330
331	632	100			1949	return 'left'
332						if $cell =~ /\p{SpaceSeparator}{2,}$/;
333
334	151	100			339	return 'right'
335						if $cell =~ /^\p{SpaceSeparator}{2,}/;
336
337	148				248	return;
338						}
339
340						sub _normalize_cell_count_and_alignments {
341	21			21	44	my $self = shift;
342	21				83	my @rows = @_;
343
344						# We use the first header row as an indicator for how many cells we expect
345						# on each line.
346	21				49	my $default_cells = sum( map { $_->{colspan} } @{ $rows[0] } );
	48				171
	21				65
347
348						# Alignments are inherited from the cell above, or they default to
349						# "left". We loop through all the rules and set alignments accordingly.
350	21				42	my %alignments;
351
352	21				57	for my $row ( grep {defined} @rows ) {
	161				287
353
354						# If we have one extra column and the final cell has a colspan > 1 it
355						# means we misinterpreted a trailing divider as indicating that the
356						# prior cell had a colspan > 1. We adjust for that by comparing it to
357						# the number of columns in the first row.
358	159	50	33		227	if ( sum( map { $_->{colspan} } @{$row} ) == $default_cells + 1
	628				1179
	159				251
359						&& $row->[-1]{colspan} > 1 ) {
360	0				0	$row->[-1]{colspan}--;
361						}
362
363	159				277	my $i = 0;
364	159				230	for my $cell ( @{$row} ) {
	159				317
365	628	100			1016	if ( $cell->{alignment} ) {
366	480				735	$alignments{$i} = $cell->{alignment};
367						}
368						else {
369	148		100		440	$cell->{alignment} = $alignments{$i} \|\| 'left';
370						}
371
372	628				1021	$i += $cell->{colspan};
373						}
374						}
375						}
376
377						sub _events_for_rows {
378	38			38	83	my $self = shift;
379	38				63	my $rows = shift;
380	38				81	my $type = shift;
381
382	38				96	my $start = 'StartTable' . $type;
383	38				76	my $end = 'EndTable' . $type;
384
385	38				153	$self->_send_event($start);
386
387	38				1209	for my $row ( @{$rows} ) {
	38				104
388	161	100			424	if ( !defined $row ) {
389	2				9	$self->_send_event($end);
390	2				61	$self->_send_event($start);
391	2				62	next;
392						}
393
394	159				526	$self->_send_event('StartTableRow');
395
396	159				4877	for my $cell ( @{$row} ) {
	159				434
397	628				1687	my $content = delete $cell->{content};
398
399						$self->_send_event(
400						'StartTableCell',
401	628				1139	%{$cell}
	628				3224
402						);
403
404	628	100			19390	if ( defined $content ) {
405
406						# If the content has newlines, it should be matched as a
407						# block-level construct (blockquote, list, etc), but to make
408						# that work, it has to have a trailing newline.
409	626	100			1721	$content .= "\n"
410						if $content =~ /\n/;
411
412	626				2437	$self->_parse_text( \$content );
413						}
414
415						$self->_send_event(
416						'EndTableCell',
417						is_header_cell => $cell->{is_header_cell},
418	628				2894	);
419						}
420
421	159				560	$self->_send_event('EndTableRow');
422						}
423
424	38				127	$self->_send_event($end);
425						}
426
427						# A table cell's contents can be a single line _not_ terminated by a
428						# newline. If that's the case, it won't match as a paragraph.
429						#
430						## no critic (Subroutines::ProhibitUnusedPrivateSubroutines)
431						sub _match_table_cell {
432	625			625	1099	my $self = shift;
433	625				888	my $text = shift;
434
435	625	100			935	return unless ${$text} =~ / \G
	625				3335
436						(
437						^
438						\p{SpaceSeparator}*
439						\S
440						.*
441						)
442						\z
443						/xmgc;
444
445	620	50			18486	$self->_debug_parse_result(
446						$1,
447						'table cell',
448						) if $self->debug;
449
450	620				17607	$self->_span_parser->parse_block($1);
451						}
452						## use critic
453
454						1;
455
456						# ABSTRACT: Block parser for Theory's proposed Markdown extensions
457
458						__END__
459
460						=pod
461
462						=encoding UTF-8
463
464						=head1 NAME
465
466						Markdent::Dialect::Theory::BlockParser - Block parser for Theory's proposed Markdown extensions
467
468						=head1 VERSION
469
470						version 0.40
471
472						=head1 DESCRIPTION
473
474						This role adds parsing for Markdown extensions proposed by David Wheeler (aka
475						Theory). See L<http://justatheory.com/computers/markup/markdown-table-rfc.html>
476						and L<http://justatheory.com/computers/markup/modest-markdown-proposal.html>
477						for details.
478
479						For now, this role handles tables only.
480
481						This role should be applied to L<Markdent::Parser::BlockParser> class or a
482						subclass of that class.
483
484						=head1 ROLES
485
486						This role does the L<Markdent::Role::Dialect::BlockParser> role.
487
488						=head1 BUGS
489
490						See L<Markdent> for bug reporting details.
491
492						Bugs may be submitted at L<https://github.com/houseabsolute/Markdent/issues>.
493
494						I am also usually active on IRC as 'autarch' on C<irc://irc.perl.org>.
495
496						=head1 SOURCE
497
498						The source code repository for Markdent can be found at L<https://github.com/houseabsolute/Markdent>.
499
500						=head1 AUTHOR
501
502						Dave Rolsky <autarch@urth.org>
503
504						=head1 COPYRIGHT AND LICENSE
505
506						This software is copyright (c) 2021 by Dave Rolsky.
507
508						This is free software; you can redistribute it and/or modify it under
509						the same terms as the Perl 5 programming language system itself.
510
511						The full text of the license can be found in the
512						F<LICENSE> file included with this distribution.
513
514						=cut