File Coverage

blib/lib/Lingua/Awkwords/Parser.pm

Criterion	Covered	Total	%
statement	97	99	97.9
branch	29	30	96.6
condition			n/a
subroutine	21	21	100.0
pod	1	1	100.0
total	148	151	98.0

line	stmt	bran	sub	pod	time	code
1						# -- Perl --
2						#
3						# Parser::MGC subclass that parses awkword patterns into a data structure
4
5						package Lingua::Awkwords::Parser;
6
7	3		3		85425	use strict;
	3				15
	3				69
8	3		3		12	use warnings;
	3				4
	3				66
9
10	3		3		633	use parent qw( Parser::MGC );
	3				657
	3				12
11
12	3		3		37753	use Lingua::Awkwords::ListOf;
	3				8
	3				84
13	3		3		646	use Lingua::Awkwords::OneOf;
	3				11
	3				63
14	3		3		474	use Lingua::Awkwords::String;
	3				6
	3				64
15	3		3		497	use Lingua::Awkwords::Subpattern;
	3				7
	3				2300
16
17						our $VERSION = '0.07';
18
19						sub parse {
20	32		32	1	8168	my $self = shift;
21	32				65	my $unit = $self->_parse_unit;
22
23						my $filters =
24	32		16		207	$self->sequence_of( sub { $self->expect('^'); $self->_parse_filter } );
	16				554
	6				230
25	32	100			1420	$unit->add_filters(@$filters) if @$filters;
26
27	32				63	return $unit;
28						}
29
30						# filters exclude strings from previous units
31						sub _parse_filter {
32	10		10		16	my $self = shift;
33
34	10				13	my $filter = '';
35
36						1 while $self->any_of(
37						# NOTE code for these duplicted from the unit parse
38						sub {
39	20		20		330	$filter .= ( $self->expect(qr/"([^"]*)"/) )[-1];
40	0				0	1;
41						},
42						sub {
43	20		20		1213	$filter .= $self->generic_token( 'other', qr{[^ "A-Z\[\]/\*\^]+} );
44	10				379	1;
45						},
46						sub {
47	10		10		559	0;
48						}
49	10				51	);
50
51	10				68	return $filter;
52						}
53
54						# units, which might be a ::ListOf choices [VV] or ::OneOf [a/b] or both
55						# [VV/CV] or neither [asdf]. units can also contain other units
56						sub _parse_unit {
57	49		49		323	my $self = shift;
58
59	49				67	my ( $oneof, $weight );
60	49				88	my @terms = '';
61
62						1 while $self->any_of(
63						sub {
64	121		121		2856	$self->expect('*');
65
66						# NOTE original version instead treats [a1020/b] as a
67						# weight of 1020 then reduces that to 128 (with a warning)
68						# as of 0.06 upper limit removed from this implementation
69	3	100			113	$self->fail("weight already set") if $weight;
70
71	2				8	my $num = $self->token_int;
72	2	100			178	$self->fail("weight must be positive integer") if $num < 1;
73	1				1	$weight = $num;
74	1				3	1;
75						},
76						sub {
77	120		120		8865	$self->expect('/');
78
79	11	100			530	$oneof = Lingua::Awkwords::OneOf->new if !defined $oneof;
80
81	11				22	for my $term (@terms) {
82						# TODO cache these strings so only one obj instance per str?
83	12	100			167	$term = Lingua::Awkwords::String->new( string => $term ) if !ref $term;
84						}
85	11				257	$oneof->add_choice( Lingua::Awkwords::ListOf->new( terms => [@terms] ),
86						$weight );
87
88						# empty string here is so [a/] parses correctly as a choice
89						# between a and nothing instead of dropping out of the unit
90						# upon ]
91	11				20	undef $weight;
92	11				22	@terms = '';
93	11				19	1;
94						},
95						sub {
96						# recurse into sub-units [...] or (...)
97	109		109		7177	my $delim = $self->expect(qr/[ \[\( ]/x);
98	17				611	$delim =~ tr/[(/])/;
99
100	17				50	my $ret = $self->scope_of( undef, \&_parse_unit, $delim );
101	12	100			488	if ( $terms[-1] eq '' ) {
102	8				15	$terms[-1] = $ret;
103						} else {
104	4				7	push @terms, $ret;
105						}
106
107						# () needs additional code as (a) is equivalent to [a/] so
108						# we must add an empty string to what must become a oneof
109	12	100			24	if ( $delim eq ')' ) {
110	1				1	my $newof;
111	1	50			7	unless ( $terms[-1]->can('add_choice') ) {
112	1				16	$newof = Lingua::Awkwords::OneOf->new;
113	1				10	$newof->add_choice( $terms[-1] );
114	1				1	$terms[-1] = $newof;
115						} else {
116	0				0	$newof = $terms[-1];
117						}
118
119						# TODO cache this string in a hash so only one obj?
120	1				15	$newof->add_choice( Lingua::Awkwords::String->new( string => '' ) );
121						}
122
123						# filters in [VV]^aa form (as opposed to the top-level
124						# parse() VV^aa form which lack the trailing ] or ) of this
125						# code path
126						$self->maybe(
127						sub {
128						my $filters =
129	12				145	$self->sequence_of( sub { $self->expect('^'); $self->_parse_filter } );
	7				233
	4				146
130	12	100			490	$terms[-1]->add_filters(@$filters) if @$filters;
131						}
132	12				56	);
133	12				66	1;
134						},
135						sub {
136	97		97		6485	my $pat = $self->generic_token( 'subpattern', qr{[A-Z]} );
137	12	100			527	$self->fail("not a defined pattern")
138						if !Lingua::Awkwords::Subpattern->is_pattern($pat);
139
140	11				158	my $ret = Lingua::Awkwords::Subpattern->new( pattern => $pat );
141	11	100			49	if ( $terms[-1] eq '' ) {
142	5				9	$terms[-1] = $ret;
143						} else {
144	6				13	push @terms, $ret;
145						}
146	11				21	1;
147						},
148						# NOTE code from these two also used in _parse_filter
149						sub {
150	86		86		5706	my $ret = ( $self->expect(qr/"([^"]*)"/) )[-1];
151	5	100			277	if ( ref $terms[-1] ) {
152	1				3	push @terms, $ret;
153						} else {
154	4				8	$terms[-1] .= $ret;
155						}
156	5				10	1;
157						},
158						sub {
159	81		81		5364	my $ret = $self->generic_token( 'other', qr{[^ "A-Z\[\]/\*\^]+} );
160	32	100			1799	if ( ref $terms[-1] ) {
161	1				3	push @terms, $ret;
162						} else {
163	31				57	$terms[-1] .= $ret;
164						}
165	32				57	1;
166						},
167						sub {
168	49		49		3111	0;
169						}
170	49				461	);
171
172	49				663	for my $term (@terms) {
173						# TODO cache these strings so only one obj instance per str?
174	60	100			832	$term = Lingua::Awkwords::String->new( string => $term ) if !ref $term;
175						}
176
177	49	100			2196	if ( defined $oneof ) {
178	8				114	$oneof->add_choice( Lingua::Awkwords::ListOf->new( terms => [@terms] ),
179						$weight );
180	8				19	return $oneof;
181						} else {
182	41				599	return Lingua::Awkwords::ListOf->new( terms => \@terms );
183						}
184						}
185
186						1;
187						__END__