File Coverage

blib/lib/Regexp/Whitespace/Parser.pm

Criterion	Covered	Total	%
statement	53	57	92.9
branch	11	16	68.7
condition	1	3	33.3
subroutine	14	16	87.5
pod			n/a
total	79	92	85.8

line	stmt	bran	cond	sub	time	code
1
2	2			2	10	use strict;
	2				2
	2				68
3	2			2	9	use warnings;
	2				4
	2				44
4
5	2			2	1607	use Clone;
	2				8248
	2				263
6
7						package Regexp::Whitespace::Parser;
8
9						our $VERSION = '0.001_0';
10
11	2			2	5812	use YAPE::Regex qw( Regexp::Whitespace::Parser );
	2				129102
	2				30
12
13						package Regexp::Whitespace::Parser::Element;
14
15						our @ISA;
16
17						BEGIN {
18	2			2	3549	push @ISA, qw( Clone );
19						}
20
21						sub convert {
22						# clone by default
23	0			0	0	return shift->clone;
24						}
25
26						package Regexp::Whitespace::Parser::container;
27
28						sub convert {
29	17			17	32	my $self = shift;
30	17				226	my $clone = $self->clone(1); # shallow copy
31	17				37	my @content = map { $_->convert } @{$self->{CONTENT}};
	17				94
	17				158
32	17				167	$clone->{CONTENT} = \@content;
33	17				189	return $clone;
34						}
35
36						# register this package into @ISA of container types
37						{
38						my @container_types = qw( cut lookahead lookbehind group capture );
39	2			2	27	no strict 'refs';
	2				5
	2				1022
40						for my $type (@container_types) {
41						unshift @{ 'Regexp::Whitespace::Parser::' . $type . '::ISA' }, __PACKAGE__;
42						}
43						}
44
45						package Regexp::Whitespace::Parser::exact;
46
47						sub convert {
48	16			16	26	my $self = shift;
49	16				140	my $exact = $self->exact_text;
50
51						# are there ocurrences of \s ?
52	16	50			74	if ( $exact =~ /\s/ ) {
53	16				24	my @pieces;
54
55	16	100			40	if ( length $exact > 1 ) {
56						# note: only 'text' types need this loop,
57						# being the only one whose exact text may have
58						# a length greater than 1
59
60						# assertions
61	4	50			29	die "panic: quantity modifier should not present for multi-character text" if $self->quant;
62	4	50			41	die "panic: non-greedy modifier should not present for multi-character text" if $self->ngreed;
63
64						LOOP : {
65	4	100			52	if ( $exact =~ / \G \z /xgc ) {
	22				68
66	4				9	last LOOP;
67						}
68	18	100			54	if ( $exact =~ / \G \s+ /xgc ) {
69						# replace matches of /\s+/ with a macro '\s+'
70	7				40	push @pieces, Regexp::Whitespace::Parser::macro->new( 's', '+', '' );
71	7				60	redo LOOP;
72						}
73	11	50			43	if ( $exact =~ / \G (\S+) /xgc ) {
74	11				39	push @pieces, Regexp::Whitespace::Parser::text->new( $1, '', '' );
75	11				112	redo LOOP;
76						}
77						}
78						} else {
79						# FIXME: these conversion rules needs checking
80						# s is any char that matches /\s/
81						# s becomes \s+
82						# s? becomes \s*
83						# s* becomes \s*
84						# s{0} becomes \s*
85						# s{0,N} becomes \s*
86						# s{M,N} becomes \s+
87						# s+ becomes \s+
88						#
89						# the non-greedy flag is kept (don't know if that's correct)
90	12				351	my ($q, $ng) = ($self->quant, $self->ngreed);
91	12	50			140	my $nq = ( $q =~ /\A ( [?] \| [{]0 ) /x ) ? '' : '+';
92	12				45	return Regexp::Whitespace::Parser::macro->new( 's', $nq, $ng );
93
94						# TODO: some tests would be nice
95						}
96
97	4				28	return @pieces;
98
99						} else {
100						# no needed conversion
101	0				0	return $self->clone;
102						}
103						}
104
105						# register this package into @ISA of exact types
106						{
107						my @exact_types = qw( text oct hex slash ctrl named ); # ?! utf8hex
108	2			2	15	no strict 'refs';
	2				4
	2				879
109						for my $type (@exact_types) {
110						unshift @{ 'Regexp::Whitespace::Parser::' . $type . '::ISA' }, __PACKAGE__;
111						}
112						}
113
114						package Regexp::Whitespace::Parser::text;
115
116						sub exact_text {
117	8			8	27	return shift->{TEXT};
118						}
119
120						package Regexp::Whitespace::Parser::oct;
121
122						sub exact_text {
123	1			1	5	return chr oct(shift->{TEXT});
124						}
125
126						package Regexp::Whitespace::Parser::hex;
127
128						sub exact_text {
129	2			2	11	return chr hex(shift->{TEXT});
130						}
131
132						#package ...::utf8hext; # FIXME wait for new release of YAPE::Regex
133
134						package Regexp::Whitespace::Parser::slash;
135
136						my %known_sequences = (
137						't' => "\t",
138						'n' => "\n",
139						'r' => "\r",
140						'a' => "\a",
141						'f' => "\f",
142						'b' => "\b",
143						'e' => "\e",
144						);
145
146						sub exact_text {
147	1			1	3	my $t = shift->{TEXT};
148	1		33		12	return $known_sequences{$t} \|\| $t;
149						}
150
151						package Regexp::Whitespace::Parser::ctrl;
152
153						sub exact_text {
154	4			4	9	my $t = shift->{TEXT};
155	4				19	return chr( ord(uc $t) ^ 0x40 );
156						}
157
158						package Regexp::Whitespace::Parser::named;
159
160						sub exact_text {
161	0			0		require charnames;
162	0					return charnames::vianame(shift->{TEXT});
163						}
164
165
166						1;
167