File Coverage

blib/lib/TAP/Parser/Grammar.pm

Criterion	Covered	Total	%
statement	95	96	98.9
branch	21	22	95.4
condition	9	11	81.8
subroutine	22	22	100.0
pod	5	5	100.0
total	152	156	97.4

line	stmt	bran	cond	sub	pod	time	code
1							package TAP::Parser::Grammar;
2
3	31			31		1159	use strict;
	31					152
	31					1031
4	31			31		271	use warnings;
	31					81
	31					1016
5
6	31			31		14733	use TAP::Parser::ResultFactory ();
	31					122
	31					824
7	31			31		14879	use TAP::Parser::YAMLish::Reader ();
	31					119
	31					1031
8
9	31			31		284	use base 'TAP::Object';
	31					138
	31					78292
10
11							=head1 NAME
12
13							TAP::Parser::Grammar - A grammar for the Test Anything Protocol.
14
15							=head1 VERSION
16
17							Version 3.40_01
18
19							=cut
20
21							our $VERSION = '3.40_01';
22
23							=head1 SYNOPSIS
24
25							use TAP::Parser::Grammar;
26							my $grammar = $self->make_grammar({
27							iterator => $tap_parser_iterator,
28							parser => $tap_parser,
29							version => 12,
30							});
31
32							my $result = $grammar->tokenize;
33
34							=head1 DESCRIPTION
35
36							C tokenizes lines from a L and
37							constructs L subclasses to represent the tokens.
38
39							Do not attempt to use this class directly. It won't make sense. It's mainly
40							here to ensure that we will be able to have pluggable grammars when TAP is
41							expanded at some future date (plus, this stuff was really cluttering the
42							parser).
43
44							=head1 METHODS
45
46							=head2 Class Methods
47
48							=head3 C
49
50							my $grammar = TAP::Parser::Grammar->new({
51							iterator => $iterator,
52							parser => $parser,
53							version => $version,
54							});
55
56							Returns L grammar object that will parse the TAP stream from the
57							specified iterator. Both C and C are required arguments.
58							If C is not set it defaults to C<12> (see L for more
59							details).
60
61							=cut
62
63							# new() implementation supplied by TAP::Object
64							sub _initialize {
65	285			285		1413	my ( $self, $args ) = @_;
66	285					1274	$self->{iterator} = $args->{iterator}; # TODO: accessor
67	285		66			1391	$self->{iterator} \|\|= $args->{stream}; # deprecated
68	285					1078	$self->{parser} = $args->{parser}; # TODO: accessor
69	285		100			2992	$self->set_version( $args->{version} \|\| 12 );
70	285					2299	return $self;
71							}
72
73							my %language_for;
74
75							{
76
77							# XXX the 'not' and 'ok' might be on separate lines in VMS ...
78							my $ok = qr/(?:not )?ok\b/;
79							my $num = qr/\d+/;
80
81							my %v12 = (
82							version => {
83							syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i,
84							handler => sub {
85							my ( $self, $line ) = @_;
86							my $version = $1;
87							return $self->_make_version_token( $line, $version, );
88							},
89							},
90							plan => {
91							syntax => qr/^1\.\.(\d+)\s(.)\z/,
92							handler => sub {
93							my ( $self, $line ) = @_;
94							my ( $tests_planned, $tail ) = ( $1, $2 );
95							my $explanation = undef;
96							my $skip = '';
97
98							if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
99							my @todo = split /\s+/, _trim($1);
100							return $self->_make_plan_token(
101							$line, $tests_planned, 'TODO',
102							'', \@todo
103							);
104							}
105							elsif ( 0 == $tests_planned ) {
106							$skip = 'SKIP';
107
108							# If we can't match # SKIP the directive should be undef.
109							($explanation) = $tail =~ /^#\sSKIP\S\s+(.*)/i;
110							}
111							elsif ( $tail !~ /^\s*$/ ) {
112							return $self->_make_unknown_token($line);
113							}
114
115							$explanation = '' unless defined $explanation;
116
117							return $self->_make_plan_token(
118							$line, $tests_planned, $skip,
119							$explanation, []
120							);
121
122							},
123							},
124
125							# An optimization to handle the most common test lines without
126							# directives.
127							simple_test => {
128							syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
129							handler => sub {
130							my ( $self, $line ) = @_;
131							my ( $ok, $num, $desc ) = ( $1, $2, $3 );
132
133							return $self->_make_test_token(
134							$line, $ok, $num,
135							$desc
136							);
137							},
138							},
139							test => {
140							syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
141							handler => sub {
142							my ( $self, $line ) = @_;
143							my ( $ok, $num, $desc ) = ( $1, $2, $3 );
144							my ( $dir, $explanation ) = ( '', '' );
145							if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
146							\# \s* (SKIP\|TODO) \b \s* (.*) $/ix
147							)
148							{
149							( $desc, $dir, $explanation ) = ( $1, $2, $3 );
150							}
151							return $self->_make_test_token(
152							$line, $ok, $num, $desc,
153							$dir, $explanation
154							);
155							},
156							},
157							comment => {
158							syntax => qr/^#(.*)/,
159							handler => sub {
160							my ( $self, $line ) = @_;
161							my $comment = $1;
162							return $self->_make_comment_token( $line, $comment );
163							},
164							},
165							bailout => {
166							syntax => qr/^\sBail out!\s(.*)/,
167							handler => sub {
168							my ( $self, $line ) = @_;
169							my $explanation = $1;
170							return $self->_make_bailout_token(
171							$line,
172							$explanation
173							);
174							},
175							},
176							);
177
178							my %v13 = (
179							%v12,
180							plan => {
181							syntax => qr/^1\.\.(\d+)\s(?:\s#\sSKIP\b(.))?\z/i,
182							handler => sub {
183							my ( $self, $line ) = @_;
184							my ( $tests_planned, $explanation ) = ( $1, $2 );
185							my $skip
186							= ( 0 == $tests_planned \|\| defined $explanation )
187							? 'SKIP'
188							: '';
189							$explanation = '' unless defined $explanation;
190							return $self->_make_plan_token(
191							$line, $tests_planned, $skip,
192							$explanation, []
193							);
194							},
195							},
196							yaml => {
197							syntax => qr/^ (\s+) (---.*) $/x,
198							handler => sub {
199							my ( $self, $line ) = @_;
200							my ( $pad, $marker ) = ( $1, $2 );
201							return $self->_make_yaml_token( $pad, $marker );
202							},
203							},
204							pragma => {
205							syntax =>
206							qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x,
207							handler => sub {
208							my ( $self, $line ) = @_;
209							my $pragmas = $1;
210							return $self->_make_pragma_token( $line, $pragmas );
211							},
212							},
213							);
214
215							%language_for = (
216							'12' => {
217							tokens => \%v12,
218							},
219							'13' => {
220							tokens => \%v13,
221							setup => sub {
222							shift->{iterator}->handle_unicode;
223							},
224							},
225							);
226							}
227
228							##############################################################################
229
230							=head2 Instance Methods
231
232							=head3 C
233
234							$grammar->set_version(13);
235
236							Tell the grammar which TAP syntax version to support. The lowest
237							supported version is 12. Although 'TAP version' isn't valid version 12
238							syntax it is accepted so that higher version numbers may be parsed.
239
240							=cut
241
242							sub set_version {
243	314			314	1	993	my $self = shift;
244	314					918	my $version = shift;
245
246	314	100				2175	if ( my $language = $language_for{$version} ) {
247	313					1225	$self->{version} = $version;
248	313					1360	$self->{tokens} = $language->{tokens};
249
250	313	100				1383	if ( my $setup = $language->{setup} ) {
251	25					172	$self->$setup();
252							}
253
254	313					1774	$self->_order_tokens;
255							}
256							else {
257	1					12	require Carp;
258	1					197	Carp::croak("Unsupported syntax version: $version");
259							}
260							}
261
262							# Optimization to put the most frequent tokens first.
263							sub _order_tokens {
264	313			313		900	my $self = shift;
265
266	313					843	my %copy = %{ $self->{tokens} };
	313					4208
267	1252					4945	my @ordered_tokens = grep {defined}
268	313					1705	map { delete $copy{$_} } qw( simple_test test comment plan );
	1252					4025
269	313					1847	push @ordered_tokens, values %copy;
270
271	313					2220	$self->{ordered_tokens} = \@ordered_tokens;
272							}
273
274							##############################################################################
275
276							=head3 C
277
278							my $token = $grammar->tokenize;
279
280							This method will return a L object representing the
281							current line of TAP.
282
283							=cut
284
285							sub tokenize {
286	1609			1609	1	3960	my $self = shift;
287
288	1609					9973	my $line = $self->{iterator}->next;
289	1607	100				6349	unless ( defined $line ) {
290	269					914	delete $self->{parser}; # break circular ref
291	269					1582	return;
292							}
293
294	1338					2934	my $token;
295
296	1338					2934	for my $token_data ( @{ $self->{ordered_tokens} } ) {
	1338					7463
297	2966	100				29537	if ( $line =~ $token_data->{syntax} ) {
298	1306					4171	my $handler = $token_data->{handler};
299	1306					5371	$token = $self->$handler($line);
300	1306					4254	last;
301							}
302							}
303
304	1338	100				5488	$token = $self->_make_unknown_token($line) unless $token;
305
306	1338					7533	return $self->{parser}->make_result($token);
307							}
308
309							##############################################################################
310
311							=head3 C
312
313							my @types = $grammar->token_types;
314
315							Returns the different types of tokens which this grammar can parse.
316
317							=cut
318
319							sub token_types {
320	2			2	1	792	my $self = shift;
321	2					4	return keys %{ $self->{tokens} };
	2					24
322							}
323
324							##############################################################################
325
326							=head3 C
327
328							my $syntax = $grammar->syntax_for($token_type);
329
330							Returns a pre-compiled regular expression which will match a chunk of TAP
331							corresponding to the token type. For example (not that you should really pay
332							attention to this, C<< $grammar->syntax_for('comment') >> will return
333							C<< qr/^#(.*)/ >>.
334
335							=cut
336
337							sub syntax_for {
338	8			8	1	3401	my ( $self, $type ) = @_;
339	8					51	return $self->{tokens}->{$type}->{syntax};
340							}
341
342							##############################################################################
343
344							=head3 C
345
346							my $handler = $grammar->handler_for($token_type);
347
348							Returns a code reference which, when passed an appropriate line of TAP,
349							returns the lexed token corresponding to that line. As a result, the basic
350							TAP parsing loop looks similar to the following:
351
352							my @tokens;
353							my $grammar = TAP::Grammar->new;
354							LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
355							for my $type ( $grammar->token_types ) {
356							my $syntax = $grammar->syntax_for($type);
357							if ( $line =~ $syntax ) {
358							my $handler = $grammar->handler_for($type);
359							push @tokens => $grammar->$handler($line);
360							next LINE;
361							}
362							}
363							push @tokens => $grammar->_make_unknown_token($line);
364							}
365
366							=cut
367
368							sub handler_for {
369	8			8	1	23	my ( $self, $type ) = @_;
370	8					43	return $self->{tokens}->{$type}->{handler};
371							}
372
373							sub _make_version_token {
374	30			30		152	my ( $self, $line, $version ) = @_;
375							return {
376	30					332	type => 'version',
377							raw => $line,
378							version => $version,
379							};
380							}
381
382							sub _make_plan_token {
383	268			268		1373	my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;
384
385	268	100	100			1789	if ( $directive eq 'SKIP'
			66
386							&& 0 != $tests_planned
387							&& $self->{version} < 13 )
388							{
389	1					9	warn
390							"Specified SKIP directive in plan but more than 0 tests ($line)\n";
391							}
392
393							return {
394	268					1788	type => 'plan',
395							raw => $line,
396							tests_planned => $tests_planned,
397							directive => $directive,
398							explanation => _trim($explanation),
399							todo_list => $todo,
400							};
401							}
402
403							sub _make_test_token {
404	875			875		4072	my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
405							return {
406	875	100				5806	ok => $ok,
		100
407
408							# forcing this to be an integer (and not a string) reduces memory
409							# consumption. RT #84939
410							test_num => ( defined $num ? 0 + $num : undef ),
411							description => _trim($desc),
412							directive => ( defined $dir ? uc $dir : '' ),
413							explanation => _trim($explanation),
414							raw => $line,
415							type => 'test',
416							};
417							}
418
419							sub _make_unknown_token {
420	32			32		137	my ( $self, $line ) = @_;
421							return {
422	32					187	raw => $line,
423							type => 'unknown',
424							};
425							}
426
427							sub _make_comment_token {
428	111			111		364	my ( $self, $line, $comment ) = @_;
429							return {
430	111					420	type => 'comment',
431							raw => $line,
432							comment => _trim($comment)
433							};
434							}
435
436							sub _make_bailout_token {
437	9			9		39	my ( $self, $line, $explanation ) = @_;
438							return {
439	9					42	type => 'bailout',
440							raw => $line,
441							bailout => _trim($explanation)
442							};
443							}
444
445							sub _make_yaml_token {
446	10			10		53	my ( $self, $pad, $marker ) = @_;
447
448	10					127	my $yaml = TAP::Parser::YAMLish::Reader->new;
449
450	10					40	my $iterator = $self->{iterator};
451
452							# Construct a reader that reads from our input stripping leading
453							# spaces from each line.
454	10					38	my $leader = length($pad);
455	10					196	my $strip = qr{ ^ (\s{$leader}) (.*) $ }x;
456	10					50	my @extra = ($marker);
457							my $reader = sub {
458	50	100		50		194	return shift @extra if @extra;
459	40					158	my $line = $iterator->next;
460	40	50				579	return $2 if $line =~ $strip;
461	0					0	return;
462	10					92	};
463
464	10					91	my $data = $yaml->read($reader);
465
466							# Reconstitute input. This is convoluted. Maybe we should just
467							# record it on the way in...
468	10					79	chomp( my $raw = $yaml->get_raw );
469	10					125	$raw =~ s/^/$pad/mg;
470
471							return {
472	10					180	type => 'yaml',
473							raw => $raw,
474							data => $data
475							};
476							}
477
478							sub _make_pragma_token {
479	7			7		25	my ( $self, $line, $pragmas ) = @_;
480							return {
481	7					31	type => 'pragma',
482							raw => $line,
483							pragmas => [ split /\s,\s/, _trim($pragmas) ],
484							};
485							}
486
487							sub _trim {
488	2147			2147		5025	my $data = shift;
489
490	2147	100				13929	return '' unless defined $data;
491
492	954					3883	$data =~ s/^\s+//;
493	954					3665	$data =~ s/\s+$//;
494	954					9698	return $data;
495							}
496
497							1;
498
499							=head1 TAP GRAMMAR
500
501							B This grammar is slightly out of date. There's still some discussion
502							about it and a new one will be provided when we have things better defined.
503
504							The L does not use a formal grammar because TAP is essentially a
505							stream-based protocol. In fact, it's quite legal to have an infinite stream.
506							For the same reason that we don't apply regexes to streams, we're not using a
507							formal grammar here. Instead, we parse the TAP in lines.
508
509							For purposes for forward compatibility, any result which does not match the
510							following grammar is currently referred to as
511							L. It is I a parse error.
512
513							A formal grammar would look similar to the following:
514
515							(*
516							For the time being, I'm cheating on the EBNF by allowing
517							certain terms to be defined by POSIX character classes by
518							using the following syntax:
519
520							digit ::= [:digit:]
521
522							As far as I am aware, that's not valid EBNF. Sue me. I
523							didn't know how to write "char" otherwise (Unicode issues).
524							Suggestions welcome.
525							*)
526
527							tap ::= version? { comment \| unknown } leading_plan lines
528							\|
529							lines trailing_plan {comment}
530
531							version ::= 'TAP version ' positiveInteger {positiveInteger} "\n"
532
533							leading_plan ::= plan skip_directive? "\n"
534
535							trailing_plan ::= plan "\n"
536
537							plan ::= '1..' nonNegativeInteger
538
539							lines ::= line {line}
540
541							line ::= (comment \| test \| unknown \| bailout ) "\n"
542
543							test ::= status positiveInteger? description? directive?
544
545							status ::= 'not '? 'ok '
546
547							description ::= (character - (digit \| '#')) {character - '#'}
548
549							directive ::= todo_directive \| skip_directive
550
551							todo_directive ::= hash_mark 'TODO' ' ' {character}
552
553							skip_directive ::= hash_mark 'SKIP' ' ' {character}
554
555							comment ::= hash_mark {character}
556
557							hash_mark ::= '#' {' '}
558
559							bailout ::= 'Bail out!' {character}
560
561							unknown ::= { (character - "\n") }
562
563							(* POSIX character classes and other terminals *)
564
565							digit ::= [:digit:]
566							character ::= ([:print:] - "\n")
567							positiveInteger ::= ( digit - '0' ) {digit}
568							nonNegativeInteger ::= digit {digit}
569
570							=head1 SUBCLASSING
571
572							Please see L for a subclassing overview.
573
574							If you I want to subclass L's grammar the best thing to
575							do is read through the code. There's no easy way of summarizing it here.
576
577							=head1 SEE ALSO
578
579							L,
580							L,
581							L,
582							L,
583
584							=cut