File Coverage

blib/lib/Lingua/Awkwords/Parser.pm

Criterion	Covered	Total	%
statement	97	99	97.9
branch	29	30	96.6
condition			n/a
subroutine	21	21	100.0
pod	1	1	100.0
total	148	151	98.0

line	stmt	bran	sub	pod	time	code
1						# -- Perl --
2						#
3						# Parser::MGC subclass that parses awkword patterns into a data structure
4
5						package Lingua::Awkwords::Parser;
6
7	3		3		110154	use strict;
	3				13
	3				72
8	3		3		14	use warnings;
	3				5
	3				73
9
10	3		3		1136	use parent qw( Parser::MGC );
	3				835
	3				12
11
12	3		3		46717	use Lingua::Awkwords::ListOf;
	3				11
	3				91
13	3		3		1046	use Lingua::Awkwords::OneOf;
	3				13
	3				65
14	3		3		723	use Lingua::Awkwords::String;
	3				7
	3				68
15	3		3		786	use Lingua::Awkwords::Subpattern;
	3				5
	3				2824
16
17						our $VERSION = '0.10';
18
19						sub parse {
20	31		31	1	9154	my $self = shift;
21	31				67	my $unit = $self->_parse_unit;
22
23						my $filters =
24	31		16		373	$self->sequence_of(sub { $self->expect('^'); $self->_parse_filter });
	16				629
	6				326
25	31	100			1682	$unit->add_filters(@$filters) if @$filters;
26
27	31				61	return $unit;
28						}
29
30						# filters exclude strings from previous units
31						sub _parse_filter {
32	10		10		15	my $self = shift;
33
34	10				14	my $filter = '';
35
36						1 while $self->any_of(
37						# NOTE code for these duplicted from the unit parse
38						sub {
39	20		20		329	$filter .= ($self->expect(qr/"([^"]*)"/))[-1];
40	0				0	1;
41						},
42						sub {
43	20		20		1343	$filter .= $self->generic_token('other', qr{[^ "A-Z\[\]/\*\^]+});
44	10				387	1;
45						},
46						sub {
47	10		10		794	0;
48						}
49	10				53	);
50
51	10				71	return $filter;
52						}
53
54						# units, which might be a ::ListOf choices [VV] or ::OneOf [a/b] or both
55						# [VV/CV] or neither [asdf]. units can also contain other units
56						sub _parse_unit {
57	46		46		317	my $self = shift;
58
59	46				58	my ($oneof, $weight);
60	46				92	my @terms = '';
61
62						1 while $self->any_of(
63						sub {
64	107		107		2913	$self->expect('*');
65
66						# NOTE original version instead treats [a1020/b] as a
67						# weight of 1020 then reduces that to 128 (with a warning)
68						# as of 0.06 upper limit removed from this implementation
69	3	100			120	$self->fail("weight already set") if $weight;
70
71	2				10	my $num = $self->token_int;
72	2	100			266	$self->fail("weight must be positive integer") if $num < 1;
73	1				2	$weight = $num;
74	1				3	1;
75						},
76						sub {
77	106		106		9466	$self->expect('/');
78
79	8	100			522	$oneof = Lingua::Awkwords::OneOf->new if !defined $oneof;
80
81	8				17	for my $term (@terms) {
82						# TODO cache these strings so only one obj instance per str?
83	8	100			126	$term = Lingua::Awkwords::String->new(string => $term) if !ref $term;
84						}
85	8				372	$oneof->add_choice(Lingua::Awkwords::ListOf->new(terms => [@terms]), $weight);
86
87						# empty string here is so [a/] parses correctly as a choice
88						# between a and nothing instead of dropping out of the unit
89						# upon ]
90	8				12	undef $weight;
91	8				16	@terms = '';
92	8				14	1;
93						},
94						sub {
95						# recurse into sub-units [...] or (...)
96	98		98		8106	my $delim = $self->expect(qr/[ \[\( ]/x);
97	15				658	$delim =~ tr/[(/])/;
98
99	15				49	my $ret = $self->scope_of(undef, \&_parse_unit, $delim);
100	10	100			587	if ($terms[-1] eq '') {
101	8				14	$terms[-1] = $ret;
102						} else {
103	2				4	push @terms, $ret;
104						}
105
106						# () needs additional code as (a) is equivalent to [a/] so
107						# we must add an empty string to what must become a oneof
108	10	100			21	if ($delim eq ')') {
109	1				2	my $newof;
110	1	50			9	unless ($terms[-1]->can('add_choice')) {
111	1				23	$newof = Lingua::Awkwords::OneOf->new;
112	1				12	$newof->add_choice($terms[-1]);
113	1				2	$terms[-1] = $newof;
114						} else {
115	0				0	$newof = $terms[-1];
116						}
117
118						# TODO cache this string in a hash so only one obj?
119	1				14	$newof->add_choice(Lingua::Awkwords::String->new(string => ''));
120						}
121
122						# filters in [VV]^aa form (as opposed to the top-level
123						# parse() VV^aa form which lack the trailing ] or ) of this
124						# code path
125						$self->maybe(
126						sub {
127						my $filters =
128	10				150	$self->sequence_of(sub { $self->expect('^'); $self->_parse_filter });
	6				189
	4				144
129	10	100			420	$terms[-1]->add_filters(@$filters) if @$filters;
130						}
131	10				56	);
132	10				56	1;
133						},
134						sub {
135	88		88		6160	my $pat = $self->generic_token('subpattern', qr{[A-Z]});
136	12	100			536	$self->fail("not a defined pattern")
137						if !Lingua::Awkwords::Subpattern->is_pattern($pat);
138
139	11				209	my $ret = Lingua::Awkwords::Subpattern->new(pattern => $pat);
140	11	100			52	if ($terms[-1] eq '') {
141	5				8	$terms[-1] = $ret;
142						} else {
143	6				11	push @terms, $ret;
144						}
145	11				21	1;
146						},
147						# NOTE code from these two also used in _parse_filter
148						sub {
149	77		77		5687	my $ret = ($self->expect(qr/"([^"]*)"/))[-1];
150	5	100			404	if (ref $terms[-1]) {
151	1				2	push @terms, $ret;
152						} else {
153	4				11	$terms[-1] .= $ret;
154						}
155	5				12	1;
156						},
157						sub {
158	72		72		6192	my $ret = $self->generic_token('other', qr{[^ "A-Z\[\]/\*\^]+});
159	26	100			1971	if (ref $terms[-1]) {
160	1				3	push @terms, $ret;
161						} else {
162	25				49	$terms[-1] .= $ret;
163						}
164	26				46	1;
165						},
166						sub {
167	46		46		3250	0;
168						}
169	46				532	);
170
171	46				666	for my $term (@terms) {
172						# TODO cache these strings so only one obj instance per str?
173	56	100			845	$term = Lingua::Awkwords::String->new(string => $term) if !ref $term;
174						}
175
176	46	100			2735	if (defined $oneof) {
177	5				75	$oneof->add_choice(Lingua::Awkwords::ListOf->new(terms => [@terms]), $weight);
178	5				13	return $oneof;
179						} else {
180	41				822	return Lingua::Awkwords::ListOf->new(terms => \@terms);
181						}
182						}
183
184						1;
185						__END__