| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
package String::Tokenizer; |
|
3
|
|
|
|
|
|
|
|
|
4
|
2
|
|
|
2
|
|
46277
|
use strict; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
79
|
|
|
5
|
2
|
|
|
2
|
|
10
|
use warnings; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
191
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
our $VERSION = '0.05'; |
|
8
|
|
|
|
|
|
|
|
|
9
|
2
|
|
|
2
|
|
13
|
use constant RETAIN_WHITESPACE => 1; |
|
|
2
|
|
|
|
|
18
|
|
|
|
2
|
|
|
|
|
176
|
|
|
10
|
2
|
|
|
2
|
|
11
|
use constant IGNORE_WHITESPACE => 0; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
1246
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
### constructor |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
sub new { |
|
15
|
10
|
|
|
10
|
1
|
7921
|
my ($_class, @args) = @_; |
|
16
|
10
|
|
33
|
|
|
61
|
my $class = ref($_class) || $_class; |
|
17
|
10
|
|
|
|
|
46
|
my $string_tokenizer = { |
|
18
|
|
|
|
|
|
|
tokens => [], |
|
19
|
|
|
|
|
|
|
delimiter => undef, |
|
20
|
|
|
|
|
|
|
handle_whitespace => IGNORE_WHITESPACE |
|
21
|
|
|
|
|
|
|
}; |
|
22
|
10
|
|
|
|
|
29
|
bless($string_tokenizer, $class); |
|
23
|
10
|
100
|
|
|
|
43
|
$string_tokenizer->tokenize(@args) if @args; |
|
24
|
10
|
|
|
|
|
31
|
return $string_tokenizer; |
|
25
|
|
|
|
|
|
|
} |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
### methods |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
sub setDelimiter { |
|
30
|
8
|
|
|
8
|
1
|
16
|
my ($self, $delimiter) = @_; |
|
31
|
8
|
|
|
|
|
31
|
my $delimiter_reg_exp = join "\|" => map { s/(\W)/\\$1/g; $_ } split // => $delimiter; |
|
|
26
|
|
|
|
|
149
|
|
|
|
26
|
|
|
|
|
73
|
|
|
32
|
8
|
|
|
|
|
202
|
$self->{delimiter} = qr/$delimiter_reg_exp/; |
|
33
|
|
|
|
|
|
|
} |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
sub handleWhitespace { |
|
36
|
3
|
|
|
3
|
1
|
7
|
my ($self, $value) = @_; |
|
37
|
3
|
|
|
|
|
9
|
$self->{handle_whitespace} = $value; |
|
38
|
|
|
|
|
|
|
} |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub tokenize { |
|
41
|
10
|
|
|
10
|
1
|
932
|
my ($self, $string, $delimiter, $handle_whitespace) = @_; |
|
42
|
|
|
|
|
|
|
# if we have a delimiter passed in then use it |
|
43
|
10
|
100
|
|
|
|
49
|
$self->setDelimiter($delimiter) if defined $delimiter; |
|
44
|
|
|
|
|
|
|
# if we are asking about whitespace then handle it |
|
45
|
10
|
100
|
|
|
|
41
|
$self->handleWhitespace($handle_whitespace) if defined $handle_whitespace; |
|
46
|
|
|
|
|
|
|
# if the two above are not handled, then the object will use |
|
47
|
|
|
|
|
|
|
# the values set already. |
|
48
|
|
|
|
|
|
|
# split everything by whitespace no matter what |
|
49
|
|
|
|
|
|
|
# (possible multiple occurances of white space too) |
|
50
|
10
|
|
|
|
|
13
|
my @tokens; |
|
51
|
10
|
100
|
|
|
|
34
|
if ($self->{handle_whitespace}) { |
|
52
|
2
|
|
|
|
|
35
|
@tokens = split /(\s+)/ => $string; |
|
53
|
|
|
|
|
|
|
} |
|
54
|
|
|
|
|
|
|
else { |
|
55
|
8
|
|
|
|
|
55
|
@tokens = split /\s+/ => $string; |
|
56
|
|
|
|
|
|
|
} |
|
57
|
10
|
100
|
|
|
|
38
|
if ($self->{delimiter}) { |
|
58
|
|
|
|
|
|
|
# create the delimiter reg-ex |
|
59
|
|
|
|
|
|
|
# escape all non-alpha-numeric |
|
60
|
|
|
|
|
|
|
# characters, just to be safe |
|
61
|
9
|
|
|
|
|
18
|
my $delimiter = $self->{delimiter}; |
|
62
|
|
|
|
|
|
|
# loop through the tokens |
|
63
|
|
|
|
|
|
|
@tokens = map { |
|
64
|
|
|
|
|
|
|
# if the token contains a delimiter then ... |
|
65
|
9
|
100
|
|
|
|
15
|
if (/$delimiter/) { |
|
|
148
|
|
|
|
|
592
|
|
|
66
|
60
|
|
|
|
|
64
|
my ($token, @_tokens); |
|
67
|
|
|
|
|
|
|
# split the token up into characters |
|
68
|
|
|
|
|
|
|
# and the loop through all the characters |
|
69
|
60
|
|
|
|
|
146
|
foreach my $char (split //) { |
|
70
|
|
|
|
|
|
|
# if the character is a delimiter |
|
71
|
196
|
100
|
|
|
|
839
|
if ($char =~ /^$delimiter$/) { |
|
72
|
|
|
|
|
|
|
# and we already have a token in the works |
|
73
|
75
|
100
|
66
|
|
|
289
|
if (defined($token) && $token =~ /^.*$/) { |
|
74
|
|
|
|
|
|
|
# add the token to the |
|
75
|
|
|
|
|
|
|
# temp tokens list |
|
76
|
31
|
|
|
|
|
47
|
push @_tokens => $token; |
|
77
|
|
|
|
|
|
|
} |
|
78
|
|
|
|
|
|
|
# and then push our delimiter character |
|
79
|
|
|
|
|
|
|
# onto the temp tokens list |
|
80
|
75
|
|
|
|
|
96
|
push @_tokens => $char; |
|
81
|
|
|
|
|
|
|
# now we need to undefine our token |
|
82
|
75
|
|
|
|
|
163
|
$token = undef; |
|
83
|
|
|
|
|
|
|
} |
|
84
|
|
|
|
|
|
|
# if the character is not a delimiter then |
|
85
|
|
|
|
|
|
|
else { |
|
86
|
|
|
|
|
|
|
# check to make sure the token is defined |
|
87
|
121
|
100
|
|
|
|
236
|
$token = "" unless defined $token; |
|
88
|
|
|
|
|
|
|
# and then add the chracter to it |
|
89
|
121
|
|
|
|
|
222
|
$token .= $char; |
|
90
|
|
|
|
|
|
|
} |
|
91
|
|
|
|
|
|
|
} |
|
92
|
|
|
|
|
|
|
# now push any remaining token onto |
|
93
|
|
|
|
|
|
|
# the temp tokens list |
|
94
|
60
|
100
|
|
|
|
158
|
push @_tokens => $token if defined $token; |
|
95
|
|
|
|
|
|
|
# and return tokens |
|
96
|
60
|
|
|
|
|
193
|
@_tokens; |
|
97
|
|
|
|
|
|
|
} |
|
98
|
|
|
|
|
|
|
# if our token does not have |
|
99
|
|
|
|
|
|
|
# the delimiter in it |
|
100
|
|
|
|
|
|
|
else { |
|
101
|
|
|
|
|
|
|
# just return it |
|
102
|
88
|
|
|
|
|
201
|
$_ |
|
103
|
|
|
|
|
|
|
} |
|
104
|
|
|
|
|
|
|
} @tokens; |
|
105
|
|
|
|
|
|
|
} |
|
106
|
10
|
|
|
|
|
53
|
$self->{tokens} = \@tokens; |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
sub getTokens { |
|
110
|
6
|
|
|
6
|
1
|
1544
|
my ($self) = @_; |
|
111
|
|
|
|
|
|
|
return wantarray ? |
|
112
|
6
|
100
|
|
|
|
34
|
@{$self->{tokens}} |
|
|
1
|
|
|
|
|
91
|
|
|
113
|
|
|
|
|
|
|
: |
|
114
|
|
|
|
|
|
|
$self->{tokens}; |
|
115
|
|
|
|
|
|
|
} |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
sub iterator { |
|
118
|
4
|
|
|
4
|
1
|
2440
|
my ($self) = @_; |
|
119
|
|
|
|
|
|
|
# returns a copy of the array |
|
120
|
4
|
|
|
|
|
19
|
return String::Tokenizer::Iterator->new($self->{tokens}); |
|
121
|
|
|
|
|
|
|
} |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
package String::Tokenizer::Iterator; |
|
124
|
|
|
|
|
|
|
|
|
125
|
2
|
|
|
2
|
|
16
|
use strict; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
71
|
|
|
126
|
2
|
|
|
2
|
|
9
|
use warnings; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
1731
|
|
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
sub new { |
|
129
|
5
|
100
|
|
5
|
|
56
|
((caller())[0] eq "String::Tokenizer") |
|
130
|
|
|
|
|
|
|
|| die "Insufficient Access Priviledges : Only String::Tokenizer can create String::Tokenizer::Iterator instances"; |
|
131
|
4
|
|
|
|
|
36
|
my ($_class, $tokens) = @_; |
|
132
|
4
|
|
33
|
|
|
55
|
my $class = ref($_class) || $_class; |
|
133
|
4
|
|
|
|
|
14
|
my $iterator = { |
|
134
|
|
|
|
|
|
|
tokens => $tokens, |
|
135
|
|
|
|
|
|
|
index => 0 |
|
136
|
|
|
|
|
|
|
}; |
|
137
|
4
|
|
|
|
|
13
|
bless($iterator, $class); |
|
138
|
4
|
|
|
|
|
13
|
return $iterator; |
|
139
|
|
|
|
|
|
|
} |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
sub reset { |
|
142
|
1
|
|
|
1
|
|
2
|
my ($self) = @_; |
|
143
|
1
|
|
|
|
|
4
|
$self->{index} = 0; |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
sub hasNextToken { |
|
147
|
108
|
|
|
108
|
|
5207
|
my ($self) = @_; |
|
148
|
108
|
100
|
|
|
|
199
|
return ($self->{index} < scalar @{$self->{tokens}}) ? 1 : 0; |
|
|
108
|
|
|
|
|
359
|
|
|
149
|
|
|
|
|
|
|
} |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
sub hasPrevToken { |
|
152
|
26
|
|
|
26
|
|
35
|
my ($self) = @_; |
|
153
|
26
|
|
|
|
|
58
|
return ($self->{index} > 0); |
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub nextToken { |
|
157
|
118
|
|
|
118
|
|
1566
|
my ($self) = @_; |
|
158
|
118
|
100
|
|
|
|
166
|
return undef if ($self->{index} >= scalar @{$self->{tokens}}); |
|
|
118
|
|
|
|
|
296
|
|
|
159
|
117
|
|
|
|
|
469
|
return $self->{tokens}->[$self->{index}++]; |
|
160
|
|
|
|
|
|
|
} |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
sub prevToken { |
|
163
|
26
|
|
|
26
|
|
93
|
my ($self) = @_; |
|
164
|
26
|
100
|
|
|
|
59
|
return undef if ($self->{index} <= 0); |
|
165
|
25
|
|
|
|
|
103
|
return $self->{tokens}->[--$self->{index}]; |
|
166
|
|
|
|
|
|
|
} |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
sub currentToken { |
|
169
|
25
|
|
|
25
|
|
74
|
my ($self) = @_; |
|
170
|
25
|
|
|
|
|
78
|
return $self->{tokens}->[$self->{index} - 1]; |
|
171
|
|
|
|
|
|
|
} |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
sub lookAheadToken { |
|
174
|
41
|
|
|
41
|
|
26505
|
my ($self) = @_; |
|
175
|
40
|
|
|
|
|
175
|
return undef if ( $self->{index} <= 0 |
|
176
|
41
|
100
|
100
|
|
|
163
|
|| $self->{index} >= scalar @{$self->{tokens}}); |
|
177
|
38
|
|
|
|
|
196
|
return $self->{tokens}->[$self->{index}]; |
|
178
|
|
|
|
|
|
|
} |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
sub collectTokensUntil { |
|
181
|
5
|
|
|
5
|
|
12
|
my ($self, $token_to_match) = @_; |
|
182
|
|
|
|
|
|
|
# if this matches our current token ... |
|
183
|
|
|
|
|
|
|
# then we just return nothing as there |
|
184
|
|
|
|
|
|
|
# is nothing to accumulate |
|
185
|
5
|
100
|
|
|
|
12
|
if ($self->lookAheadToken() eq $token_to_match) { |
|
186
|
|
|
|
|
|
|
# then just advance it one |
|
187
|
1
|
|
|
|
|
4
|
$self->nextToken(); |
|
188
|
|
|
|
|
|
|
# and return nothing |
|
189
|
1
|
|
|
|
|
5
|
return; |
|
190
|
|
|
|
|
|
|
} |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# if it doesnt match our current token then, ... |
|
193
|
4
|
|
|
|
|
4
|
my @collection; |
|
194
|
|
|
|
|
|
|
# store the index we start at |
|
195
|
4
|
|
|
|
|
8
|
my $old_index = $self->{index}; |
|
196
|
4
|
|
|
|
|
4
|
my $matched; |
|
197
|
|
|
|
|
|
|
# loop through the tokens |
|
198
|
4
|
|
|
|
|
9
|
while ($self->hasNextToken()) { |
|
199
|
23
|
|
|
|
|
42
|
my $token = $self->nextToken(); |
|
200
|
23
|
100
|
|
|
|
48
|
if ($token ne $token_to_match) { |
|
201
|
20
|
|
|
|
|
52
|
push @collection => $token; |
|
202
|
|
|
|
|
|
|
} |
|
203
|
|
|
|
|
|
|
else { |
|
204
|
3
|
|
|
|
|
4
|
$matched++; |
|
205
|
3
|
|
|
|
|
6
|
last; |
|
206
|
|
|
|
|
|
|
} |
|
207
|
|
|
|
|
|
|
} |
|
208
|
4
|
100
|
|
|
|
10
|
unless ($matched) { |
|
209
|
|
|
|
|
|
|
# reset back to where we started, and ... |
|
210
|
1
|
|
|
|
|
3
|
$self->{index} = $old_index; |
|
211
|
|
|
|
|
|
|
# and return nothing |
|
212
|
1
|
|
|
|
|
7
|
return; |
|
213
|
|
|
|
|
|
|
} |
|
214
|
|
|
|
|
|
|
# and return our collection |
|
215
|
3
|
|
|
|
|
36
|
return @collection; |
|
216
|
|
|
|
|
|
|
} |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
sub skipTokensUntil { |
|
220
|
3
|
|
|
3
|
|
7
|
my ($self, $token_to_match) = @_; |
|
221
|
|
|
|
|
|
|
# if this matches our current token ... |
|
222
|
3
|
100
|
|
|
|
9
|
if ($self->lookAheadToken() eq $token_to_match) { |
|
223
|
|
|
|
|
|
|
# then just advance it one |
|
224
|
1
|
|
|
|
|
4
|
$self->nextToken(); |
|
225
|
|
|
|
|
|
|
# and return success |
|
226
|
1
|
|
|
|
|
6
|
return 1; |
|
227
|
|
|
|
|
|
|
} |
|
228
|
|
|
|
|
|
|
# if it doesnt match our current token then, ... |
|
229
|
|
|
|
|
|
|
# store the index we start at |
|
230
|
2
|
|
|
|
|
3
|
my $old_index = $self->{index}; |
|
231
|
|
|
|
|
|
|
# and loop through the tokens |
|
232
|
2
|
|
|
|
|
7
|
while ($self->hasNextToken()) { |
|
233
|
|
|
|
|
|
|
# return success if we match our token |
|
234
|
18
|
100
|
|
|
|
31
|
return 1 if ($self->nextToken() eq $token_to_match); |
|
235
|
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
# otherwise we didnt match, and should |
|
237
|
|
|
|
|
|
|
# reset back to where we started, and ... |
|
238
|
1
|
|
|
|
|
3
|
$self->{index} = $old_index; |
|
239
|
|
|
|
|
|
|
# return failure |
|
240
|
1
|
|
|
|
|
6
|
return 0; |
|
241
|
|
|
|
|
|
|
} |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
sub skipTokenIfWhitespace { |
|
244
|
4
|
|
|
4
|
|
6
|
my ($self) = @_; |
|
245
|
4
|
100
|
|
|
|
10
|
$self->{index}++ if $self->lookAheadToken() =~ /^\s+$/; |
|
246
|
|
|
|
|
|
|
} |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
sub skipTokens { |
|
249
|
13
|
|
|
13
|
|
41
|
my ($self, $num_token_to_skip) = @_; |
|
250
|
13
|
|
100
|
|
|
52
|
$num_token_to_skip ||= 1; |
|
251
|
13
|
|
|
|
|
35
|
$self->{index} += $num_token_to_skip; |
|
252
|
|
|
|
|
|
|
} |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
*skipToken = \&skipTokens; |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
1; |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
__END__ |