File Coverage

blib/lib/FieldParser.pm

Criterion	Covered	Total	%
statement	73	92	79.3
branch	8	22	36.3
condition	13	30	43.3
subroutine	15	17	88.2
pod	5	5	100.0
total	114	166	68.6

line	stmt	bran	cond	sub	pod	time	code
1							package FieldParser;
2
3	1			1		21032	use 5.006;
	1					3
	1					30
4	1			1		4	use strict;
	1					2
	1					34
5	1			1		3	use warnings;
	1					1
	1					25
6	1			1		9	use Exporter ();
	1					2
	1					18
7	1			1		3	use base qw(Exporter);
	1					1
	1					94
8	1			1		524	use List::MoreUtils qw(first_index last_index);
	1					10055
	1					10
9
10							our @EXPORT = qw(parser);
11							our @EXPORT_OK = qw(tokenizer weeder extractor);
12
13							our $VERSION = '0.01';
14
15	1			1	1	2	sub Iterator(&) { $_[0] };
16
17
18							sub tokenizer {
19	1		50	1	1	3	my $input = shift \|\| die "No input passed!Exiting...";
20	1					2	my $weedout = shift;
21	1		33			9	my $split_on = shift \|\| qr/"\s+?"/;
22	1		50			3	my $ignore = shift \|\| [];
23	1					2	my @tokens;
24
25	1					14	@tokens = split /$split_on/, $input;
26
27							return Iterator {
28	7			7		17	while( @tokens ) {
29	6					7	my $token = shift @tokens;
30
31	6					4	for my $to_ignore ( @{ $ignore } ) {
	6					9
32	0	0				0	return $weedout->($token) if $token ne $to_ignore;
33							}
34
35	6	50				4	return $weedout->($token) if ! @{ $ignore };
	6					12
36							}
37							}
38	1					7	}
39
40
41							sub weeder {
42	1		33	1	1	3	my $weed = shift \|\| qr/\<\/\|\>\|\<\|\"\|\'/;
43
44							return sub {
45	6			6		6	my $token = shift;
46
47	6	50				30	$token =~ s/$weed//g if $token;
48	6					19	return $token;
49							}
50
51	1					6	}
52
53
54							sub extractor {
55	1		50	1	1	3	my $tokens = shift \|\| die "No tokens passed!Exiting...";
56	1		50			2	my $all = shift \|\| [];
57	1		50			3	my $ignore = shift \|\| [];
58	1					15	my $what = shift;
59	1					2	my $how = shift;
60	1					1	my %request;
61
62	1	50				4	return $how->($tokens, $what) if ref $how eq 'CODE';
63
64	1	50				3	if (! $what) {
65	1					1	for $what ( @{ $all } ) {
	1					2
66	2					1	my ($s_idx, $e_idx);
67	2			5		5	$s_idx = first_index { $what eq $_ } @{ $tokens };
	5					5
	2					11
68	2	50				6	if ( $s_idx == -1 ) {
69	0					0	print "Search for $what failed!Error - $what doesn't exist";
70	0					0	next;
71							}
72
73	2			5		4	$e_idx = last_index { $what eq $_ } @{ $tokens };
	5					5
	2					5
74	2	50				4	if ( $s_idx == $e_idx ) {
75	0					0	print "Search for $what failed!Error - Only one tag Found";
76	0					0	next;
77							}
78
79	2					4	$request{$what} = [ @{ $tokens }[$s_idx+1..$e_idx-1] ];
	2					10
80
81							}
82	1					7	return \%request;
83							}
84							else {
85	0					0	my ($s_idx, $e_idx);
86	0			0		0	$s_idx = first_index { $what eq $_ } @{ $tokens };
	0					0
	0					0
87	0					0	print "Search for $what failed!Error -
88	0	0				0	$what doesn't exist"; return \%request if $s_idx == -1;
89
90	0			0		0	$e_idx = last_index { $what eq $_ } @{ $tokens };
	0					0
	0					0
91	0					0	print "Search for $what failed!Error -
92	0	0				0	Only one tag Found"; return \%request if $s_idx == $e_idx;
93
94	0					0	return [ @{ $tokens }[$s_idx+1..$e_idx-1] ];
	0					0
95							}
96							}
97
98
99							sub parser {
100	1		50	1	1	10	my $inp = shift \|\| die "No input passed!Exiting...";
101	1		50			3	my $extract = shift \|\| die "No interested tags passed!Exiting...";
102	1		33			8	my $sep = shift \|\| qr/"\s+?"/;
103							#my $wo = shift \|\| qr/\<\/\|\>\|\<\|\"\|\'/;
104	1		33			6	my $wo = shift \|\| qr/\<\/\|\>\|\<\|\"\|\'\|\s+$/;
105	1		50			5	my $it = shift \|\| [];
106	1		50			3	my $ig = shift \|\| [];
107	1					1	my ($inputs, $weed, @tokens, $token);
108
109	1					2	$inputs = $inp;
110	1	50				6	$inputs = $inp->[0] if ref $inp eq 'ARRAY';
111
112	1					3	$weed = weeder($wo);
113	1	50				2	if (! ref $inputs) {
114	1					3	my $iter = tokenizer($inputs, $weed, $sep, $it);
115
116	1					2	push @tokens, $token while ( $token = $iter->() );
117							}
118							else {
119							#push @tokens, $token for token @$inputs;
120	0					0	push @tokens, $weed->($_) for @$inputs;
121							}
122
123	1					3	return extractor(\@tokens, $extract, $ig);
124							}
125
126							1; # End of FieldParser
127
128							__END__
129
130							=head1 NAME
131
132							FieldParser - A generic parser.
133
134							=head1 VERSION
135
136							Version 0.01
137
138							=head1 SYNOPSIS
139
140							A generic parser made on the principles of Higher Order
141							Programming.The parser is meant to parse the input and
142							store the parsed text in a hashref.
143
144							use FieldParser;
145
146							my $interesting_tags = ['requestType', 'serviceUrl'];
147
148							my $input = '"<serviceUrl>" "http://d.com" "</serviceUrl>" "<requestType>" "AIS" "</requestType>"'
149
150							my $parsed = FieldParser::parser($input, $interesting_tags);
151
152							=head1 EXPORT
153
154							parser (default export)
155
156							tokenizer (ondemand export)
157
158							weeder (ondemand export)
159
160							extractor (ondemand export)
161
162							=head1 SUBROUTINES
163
164							=head2 Iterator
165
166							Syntactic sugar for iterator functionality. Not intended for direct use.
167
168							=head2 tokenizer
169
170							Convert raw input string into units of interest.Weedout and ignore
171							text not needed.
172
173							=head2 weeder
174
175							Sanitize input - remove weeds/unwanted text
176
177							=head2 extractor
178
179							Extract tokens embedded between specific tags.One can extract
180							tokens between a specific tag or ask for all tokens embedded
181							between all tags of interest.
182
183							=head2 parser
184
185							Intended interface to the outside unsuspecting world.Takes in the
186							raw input, interested tags, token separator(regexp), unwanted text
187							in tokens(regexp), unwanted tokens and all unwanted tokens between
188							specific tags.
189
190							=head1 AUTHOR
191
192							Varun Juyal, C<< <varunjuyal123 at yahoo.com> >>
193
194							=head1 BUGS
195
196							Please report any bugs or feature requests to C<bug-fieldparser at rt.cpan.org>, or through
197							the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=FieldParser>. I will be notified, and then you'll
198							automatically be notified of progress on your bug as I make changes.
199
200							=head1 SUPPORT
201
202							You can find documentation for this module with the perldoc command.
203
204							perldoc FieldParser
205
206
207							You can also look for information at:
208
209							=over 4
210
211							=item * RT: CPAN's request tracker (report bugs here)
212
213							L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=FieldParser>
214
215							=item * AnnoCPAN: Annotated CPAN documentation
216
217							L<http://annocpan.org/dist/FieldParser>
218
219							=item * CPAN Ratings
220
221							L<http://cpanratings.perl.org/d/FieldParser>
222
223							=item * Search CPAN
224
225							L<http://search.cpan.org/dist/FieldParser/>
226
227							=back
228
229
230							=head1 ACKNOWLEDGEMENTS
231
232
233							=head1 LICENSE AND COPYRIGHT
234
235							Copyright 2013 Varun Juyal.
236
237							This program is free software; you can redistribute it and/or modify it
238							under the terms of either: the GNU General Public License as published
239							by the Free Software Foundation; or the Artistic License.
240
241							See http://dev.perl.org/licenses/ for more information.
242
243							=cut
244