line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package FieldParser; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
21032
|
use 5.006; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
30
|
|
4
|
1
|
|
|
1
|
|
4
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
34
|
|
5
|
1
|
|
|
1
|
|
3
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
25
|
|
6
|
1
|
|
|
1
|
|
9
|
use Exporter (); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
18
|
|
7
|
1
|
|
|
1
|
|
3
|
use base qw(Exporter); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
94
|
|
8
|
1
|
|
|
1
|
|
524
|
use List::MoreUtils qw(first_index last_index); |
|
1
|
|
|
|
|
10055
|
|
|
1
|
|
|
|
|
10
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our @EXPORT = qw(parser); |
11
|
|
|
|
|
|
|
our @EXPORT_OK = qw(tokenizer weeder extractor); |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
our $VERSION = '0.01'; |
14
|
|
|
|
|
|
|
|
15
|
1
|
|
|
1
|
1
|
2
|
sub Iterator(&) { $_[0] }; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
sub tokenizer { |
19
|
1
|
|
50
|
1
|
1
|
3
|
my $input = shift || die "No input passed!Exiting..."; |
20
|
1
|
|
|
|
|
2
|
my $weedout = shift; |
21
|
1
|
|
33
|
|
|
9
|
my $split_on = shift || qr/"\s+?"/; |
22
|
1
|
|
50
|
|
|
3
|
my $ignore = shift || []; |
23
|
1
|
|
|
|
|
2
|
my @tokens; |
24
|
|
|
|
|
|
|
|
25
|
1
|
|
|
|
|
14
|
@tokens = split /$split_on/, $input; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
return Iterator { |
28
|
7
|
|
|
7
|
|
17
|
while( @tokens ) { |
29
|
6
|
|
|
|
|
7
|
my $token = shift @tokens; |
30
|
|
|
|
|
|
|
|
31
|
6
|
|
|
|
|
4
|
for my $to_ignore ( @{ $ignore } ) { |
|
6
|
|
|
|
|
9
|
|
32
|
0
|
0
|
|
|
|
0
|
return $weedout->($token) if $token ne $to_ignore; |
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
|
35
|
6
|
50
|
|
|
|
4
|
return $weedout->($token) if ! @{ $ignore }; |
|
6
|
|
|
|
|
12
|
|
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
} |
38
|
1
|
|
|
|
|
7
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
sub weeder { |
42
|
1
|
|
33
|
1
|
1
|
3
|
my $weed = shift || qr/\<\/|\>|\<|\"|\'/; |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
return sub { |
45
|
6
|
|
|
6
|
|
6
|
my $token = shift; |
46
|
|
|
|
|
|
|
|
47
|
6
|
50
|
|
|
|
30
|
$token =~ s/$weed//g if $token; |
48
|
6
|
|
|
|
|
19
|
return $token; |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
|
51
|
1
|
|
|
|
|
6
|
} |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
sub extractor { |
55
|
1
|
|
50
|
1
|
1
|
3
|
my $tokens = shift || die "No tokens passed!Exiting..."; |
56
|
1
|
|
50
|
|
|
2
|
my $all = shift || []; |
57
|
1
|
|
50
|
|
|
3
|
my $ignore = shift || []; |
58
|
1
|
|
|
|
|
15
|
my $what = shift; |
59
|
1
|
|
|
|
|
2
|
my $how = shift; |
60
|
1
|
|
|
|
|
1
|
my %request; |
61
|
|
|
|
|
|
|
|
62
|
1
|
50
|
|
|
|
4
|
return $how->($tokens, $what) if ref $how eq 'CODE'; |
63
|
|
|
|
|
|
|
|
64
|
1
|
50
|
|
|
|
3
|
if (! $what) { |
65
|
1
|
|
|
|
|
1
|
for $what ( @{ $all } ) { |
|
1
|
|
|
|
|
2
|
|
66
|
2
|
|
|
|
|
1
|
my ($s_idx, $e_idx); |
67
|
2
|
|
|
5
|
|
5
|
$s_idx = first_index { $what eq $_ } @{ $tokens }; |
|
5
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
11
|
|
68
|
2
|
50
|
|
|
|
6
|
if ( $s_idx == -1 ) { |
69
|
0
|
|
|
|
|
0
|
print "Search for $what failed!Error - $what doesn't exist"; |
70
|
0
|
|
|
|
|
0
|
next; |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
|
73
|
2
|
|
|
5
|
|
4
|
$e_idx = last_index { $what eq $_ } @{ $tokens }; |
|
5
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
5
|
|
74
|
2
|
50
|
|
|
|
4
|
if ( $s_idx == $e_idx ) { |
75
|
0
|
|
|
|
|
0
|
print "Search for $what failed!Error - Only one tag Found"; |
76
|
0
|
|
|
|
|
0
|
next; |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
2
|
|
|
|
|
4
|
$request{$what} = [ @{ $tokens }[$s_idx+1..$e_idx-1] ]; |
|
2
|
|
|
|
|
10
|
|
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
} |
82
|
1
|
|
|
|
|
7
|
return \%request; |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
else { |
85
|
0
|
|
|
|
|
0
|
my ($s_idx, $e_idx); |
86
|
0
|
|
|
0
|
|
0
|
$s_idx = first_index { $what eq $_ } @{ $tokens }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
87
|
0
|
|
|
|
|
0
|
print "Search for $what failed!Error - |
88
|
0
|
0
|
|
|
|
0
|
$what doesn't exist"; return \%request if $s_idx == -1; |
89
|
|
|
|
|
|
|
|
90
|
0
|
|
|
0
|
|
0
|
$e_idx = last_index { $what eq $_ } @{ $tokens }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
91
|
0
|
|
|
|
|
0
|
print "Search for $what failed!Error - |
92
|
0
|
0
|
|
|
|
0
|
Only one tag Found"; return \%request if $s_idx == $e_idx; |
93
|
|
|
|
|
|
|
|
94
|
0
|
|
|
|
|
0
|
return [ @{ $tokens }[$s_idx+1..$e_idx-1] ]; |
|
0
|
|
|
|
|
0
|
|
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub parser { |
100
|
1
|
|
50
|
1
|
1
|
10
|
my $inp = shift || die "No input passed!Exiting..."; |
101
|
1
|
|
50
|
|
|
3
|
my $extract = shift || die "No interested tags passed!Exiting..."; |
102
|
1
|
|
33
|
|
|
8
|
my $sep = shift || qr/"\s+?"/; |
103
|
|
|
|
|
|
|
#my $wo = shift || qr/\<\/|\>|\<|\"|\'/; |
104
|
1
|
|
33
|
|
|
6
|
my $wo = shift || qr/\<\/|\>|\<|\"|\'|\s+$/; |
105
|
1
|
|
50
|
|
|
5
|
my $it = shift || []; |
106
|
1
|
|
50
|
|
|
3
|
my $ig = shift || []; |
107
|
1
|
|
|
|
|
1
|
my ($inputs, $weed, @tokens, $token); |
108
|
|
|
|
|
|
|
|
109
|
1
|
|
|
|
|
2
|
$inputs = $inp; |
110
|
1
|
50
|
|
|
|
6
|
$inputs = $inp->[0] if ref $inp eq 'ARRAY'; |
111
|
|
|
|
|
|
|
|
112
|
1
|
|
|
|
|
3
|
$weed = weeder($wo); |
113
|
1
|
50
|
|
|
|
2
|
if (! ref $inputs) { |
114
|
1
|
|
|
|
|
3
|
my $iter = tokenizer($inputs, $weed, $sep, $it); |
115
|
|
|
|
|
|
|
|
116
|
1
|
|
|
|
|
2
|
push @tokens, $token while ( $token = $iter->() ); |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
else { |
119
|
|
|
|
|
|
|
#push @tokens, $token for token @$inputs; |
120
|
0
|
|
|
|
|
0
|
push @tokens, $weed->($_) for @$inputs; |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
|
123
|
1
|
|
|
|
|
3
|
return extractor(\@tokens, $extract, $ig); |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
1; # End of FieldParser |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
__END__ |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head1 NAME |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
FieldParser - A generic parser. |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=head1 VERSION |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Version 0.01 |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=head1 SYNOPSIS |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
A generic parser made on the principles of Higher Order |
141
|
|
|
|
|
|
|
Programming.The parser is meant to parse the input and |
142
|
|
|
|
|
|
|
store the parsed text in a hashref. |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
use FieldParser; |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
my $interesting_tags = ['requestType', 'serviceUrl']; |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
my $input = '"<serviceUrl>" "http://d.com" "</serviceUrl>" "<requestType>" "AIS" "</requestType>"' |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
my $parsed = FieldParser::parser($input, $interesting_tags); |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=head1 EXPORT |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
parser (default export) |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
tokenizer (ondemand export) |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
weeder (ondemand export) |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
extractor (ondemand export) |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=head1 SUBROUTINES |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=head2 Iterator |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
Syntactic sugar for iterator functionality. Not intended for direct use. |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=head2 tokenizer |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
Convert raw input string into units of interest.Weedout and ignore |
171
|
|
|
|
|
|
|
text not needed. |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=head2 weeder |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
Sanitize input - remove weeds/unwanted text |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=head2 extractor |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
Extract tokens embedded between specific tags.One can extract |
180
|
|
|
|
|
|
|
tokens between a specific tag or ask for all tokens embedded |
181
|
|
|
|
|
|
|
between all tags of interest. |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=head2 parser |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
Intended interface to the outside unsuspecting world.Takes in the |
186
|
|
|
|
|
|
|
raw input, interested tags, token separator(regexp), unwanted text |
187
|
|
|
|
|
|
|
in tokens(regexp), unwanted tokens and all unwanted tokens between |
188
|
|
|
|
|
|
|
specific tags. |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=head1 AUTHOR |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
Varun Juyal, C<< <varunjuyal123 at yahoo.com> >> |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=head1 BUGS |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
Please report any bugs or feature requests to C<bug-fieldparser at rt.cpan.org>, or through |
197
|
|
|
|
|
|
|
the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=FieldParser>. I will be notified, and then you'll |
198
|
|
|
|
|
|
|
automatically be notified of progress on your bug as I make changes. |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=head1 SUPPORT |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
perldoc FieldParser |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
You can also look for information at: |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=over 4 |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker (report bugs here) |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
L<http://rt.cpan.org/NoAuth/Bugs.html?Dist=FieldParser> |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
L<http://annocpan.org/dist/FieldParser> |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=item * CPAN Ratings |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
L<http://cpanratings.perl.org/d/FieldParser> |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
=item * Search CPAN |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
L<http://search.cpan.org/dist/FieldParser/> |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
=back |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
Copyright 2013 Varun Juyal. |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
238
|
|
|
|
|
|
|
under the terms of either: the GNU General Public License as published |
239
|
|
|
|
|
|
|
by the Free Software Foundation; or the Artistic License. |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
See http://dev.perl.org/licenses/ for more information. |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
=cut |
244
|
|
|
|
|
|
|
|