line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
package String::Tokenizer; |
3
|
|
|
|
|
|
|
|
4
|
2
|
|
|
2
|
|
46277
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
79
|
|
5
|
2
|
|
|
2
|
|
10
|
use warnings; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
191
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
our $VERSION = '0.05'; |
8
|
|
|
|
|
|
|
|
9
|
2
|
|
|
2
|
|
13
|
use constant RETAIN_WHITESPACE => 1; |
|
2
|
|
|
|
|
18
|
|
|
2
|
|
|
|
|
176
|
|
10
|
2
|
|
|
2
|
|
11
|
use constant IGNORE_WHITESPACE => 0; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
1246
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
### constructor |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
sub new { |
15
|
10
|
|
|
10
|
1
|
7921
|
my ($_class, @args) = @_; |
16
|
10
|
|
33
|
|
|
61
|
my $class = ref($_class) || $_class; |
17
|
10
|
|
|
|
|
46
|
my $string_tokenizer = { |
18
|
|
|
|
|
|
|
tokens => [], |
19
|
|
|
|
|
|
|
delimiter => undef, |
20
|
|
|
|
|
|
|
handle_whitespace => IGNORE_WHITESPACE |
21
|
|
|
|
|
|
|
}; |
22
|
10
|
|
|
|
|
29
|
bless($string_tokenizer, $class); |
23
|
10
|
100
|
|
|
|
43
|
$string_tokenizer->tokenize(@args) if @args; |
24
|
10
|
|
|
|
|
31
|
return $string_tokenizer; |
25
|
|
|
|
|
|
|
} |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
### methods |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
sub setDelimiter { |
30
|
8
|
|
|
8
|
1
|
16
|
my ($self, $delimiter) = @_; |
31
|
8
|
|
|
|
|
31
|
my $delimiter_reg_exp = join "\|" => map { s/(\W)/\\$1/g; $_ } split // => $delimiter; |
|
26
|
|
|
|
|
149
|
|
|
26
|
|
|
|
|
73
|
|
32
|
8
|
|
|
|
|
202
|
$self->{delimiter} = qr/$delimiter_reg_exp/; |
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
sub handleWhitespace { |
36
|
3
|
|
|
3
|
1
|
7
|
my ($self, $value) = @_; |
37
|
3
|
|
|
|
|
9
|
$self->{handle_whitespace} = $value; |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub tokenize { |
41
|
10
|
|
|
10
|
1
|
932
|
my ($self, $string, $delimiter, $handle_whitespace) = @_; |
42
|
|
|
|
|
|
|
# if we have a delimiter passed in then use it |
43
|
10
|
100
|
|
|
|
49
|
$self->setDelimiter($delimiter) if defined $delimiter; |
44
|
|
|
|
|
|
|
# if we are asking about whitespace then handle it |
45
|
10
|
100
|
|
|
|
41
|
$self->handleWhitespace($handle_whitespace) if defined $handle_whitespace; |
46
|
|
|
|
|
|
|
# if the two above are not handled, then the object will use |
47
|
|
|
|
|
|
|
# the values set already. |
48
|
|
|
|
|
|
|
# split everything by whitespace no matter what |
49
|
|
|
|
|
|
|
# (possible multiple occurances of white space too) |
50
|
10
|
|
|
|
|
13
|
my @tokens; |
51
|
10
|
100
|
|
|
|
34
|
if ($self->{handle_whitespace}) { |
52
|
2
|
|
|
|
|
35
|
@tokens = split /(\s+)/ => $string; |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
else { |
55
|
8
|
|
|
|
|
55
|
@tokens = split /\s+/ => $string; |
56
|
|
|
|
|
|
|
} |
57
|
10
|
100
|
|
|
|
38
|
if ($self->{delimiter}) { |
58
|
|
|
|
|
|
|
# create the delimiter reg-ex |
59
|
|
|
|
|
|
|
# escape all non-alpha-numeric |
60
|
|
|
|
|
|
|
# characters, just to be safe |
61
|
9
|
|
|
|
|
18
|
my $delimiter = $self->{delimiter}; |
62
|
|
|
|
|
|
|
# loop through the tokens |
63
|
|
|
|
|
|
|
@tokens = map { |
64
|
|
|
|
|
|
|
# if the token contains a delimiter then ... |
65
|
9
|
100
|
|
|
|
15
|
if (/$delimiter/) { |
|
148
|
|
|
|
|
592
|
|
66
|
60
|
|
|
|
|
64
|
my ($token, @_tokens); |
67
|
|
|
|
|
|
|
# split the token up into characters |
68
|
|
|
|
|
|
|
# and the loop through all the characters |
69
|
60
|
|
|
|
|
146
|
foreach my $char (split //) { |
70
|
|
|
|
|
|
|
# if the character is a delimiter |
71
|
196
|
100
|
|
|
|
839
|
if ($char =~ /^$delimiter$/) { |
72
|
|
|
|
|
|
|
# and we already have a token in the works |
73
|
75
|
100
|
66
|
|
|
289
|
if (defined($token) && $token =~ /^.*$/) { |
74
|
|
|
|
|
|
|
# add the token to the |
75
|
|
|
|
|
|
|
# temp tokens list |
76
|
31
|
|
|
|
|
47
|
push @_tokens => $token; |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
# and then push our delimiter character |
79
|
|
|
|
|
|
|
# onto the temp tokens list |
80
|
75
|
|
|
|
|
96
|
push @_tokens => $char; |
81
|
|
|
|
|
|
|
# now we need to undefine our token |
82
|
75
|
|
|
|
|
163
|
$token = undef; |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
# if the character is not a delimiter then |
85
|
|
|
|
|
|
|
else { |
86
|
|
|
|
|
|
|
# check to make sure the token is defined |
87
|
121
|
100
|
|
|
|
236
|
$token = "" unless defined $token; |
88
|
|
|
|
|
|
|
# and then add the chracter to it |
89
|
121
|
|
|
|
|
222
|
$token .= $char; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
# now push any remaining token onto |
93
|
|
|
|
|
|
|
# the temp tokens list |
94
|
60
|
100
|
|
|
|
158
|
push @_tokens => $token if defined $token; |
95
|
|
|
|
|
|
|
# and return tokens |
96
|
60
|
|
|
|
|
193
|
@_tokens; |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
# if our token does not have |
99
|
|
|
|
|
|
|
# the delimiter in it |
100
|
|
|
|
|
|
|
else { |
101
|
|
|
|
|
|
|
# just return it |
102
|
88
|
|
|
|
|
201
|
$_ |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
} @tokens; |
105
|
|
|
|
|
|
|
} |
106
|
10
|
|
|
|
|
53
|
$self->{tokens} = \@tokens; |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
sub getTokens { |
110
|
6
|
|
|
6
|
1
|
1544
|
my ($self) = @_; |
111
|
|
|
|
|
|
|
return wantarray ? |
112
|
6
|
100
|
|
|
|
34
|
@{$self->{tokens}} |
|
1
|
|
|
|
|
91
|
|
113
|
|
|
|
|
|
|
: |
114
|
|
|
|
|
|
|
$self->{tokens}; |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
sub iterator { |
118
|
4
|
|
|
4
|
1
|
2440
|
my ($self) = @_; |
119
|
|
|
|
|
|
|
# returns a copy of the array |
120
|
4
|
|
|
|
|
19
|
return String::Tokenizer::Iterator->new($self->{tokens}); |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
package String::Tokenizer::Iterator; |
124
|
|
|
|
|
|
|
|
125
|
2
|
|
|
2
|
|
16
|
use strict; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
71
|
|
126
|
2
|
|
|
2
|
|
9
|
use warnings; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
1731
|
|
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
sub new { |
129
|
5
|
100
|
|
5
|
|
56
|
((caller())[0] eq "String::Tokenizer") |
130
|
|
|
|
|
|
|
|| die "Insufficient Access Priviledges : Only String::Tokenizer can create String::Tokenizer::Iterator instances"; |
131
|
4
|
|
|
|
|
36
|
my ($_class, $tokens) = @_; |
132
|
4
|
|
33
|
|
|
55
|
my $class = ref($_class) || $_class; |
133
|
4
|
|
|
|
|
14
|
my $iterator = { |
134
|
|
|
|
|
|
|
tokens => $tokens, |
135
|
|
|
|
|
|
|
index => 0 |
136
|
|
|
|
|
|
|
}; |
137
|
4
|
|
|
|
|
13
|
bless($iterator, $class); |
138
|
4
|
|
|
|
|
13
|
return $iterator; |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
sub reset { |
142
|
1
|
|
|
1
|
|
2
|
my ($self) = @_; |
143
|
1
|
|
|
|
|
4
|
$self->{index} = 0; |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
sub hasNextToken { |
147
|
108
|
|
|
108
|
|
5207
|
my ($self) = @_; |
148
|
108
|
100
|
|
|
|
199
|
return ($self->{index} < scalar @{$self->{tokens}}) ? 1 : 0; |
|
108
|
|
|
|
|
359
|
|
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
sub hasPrevToken { |
152
|
26
|
|
|
26
|
|
35
|
my ($self) = @_; |
153
|
26
|
|
|
|
|
58
|
return ($self->{index} > 0); |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub nextToken { |
157
|
118
|
|
|
118
|
|
1566
|
my ($self) = @_; |
158
|
118
|
100
|
|
|
|
166
|
return undef if ($self->{index} >= scalar @{$self->{tokens}}); |
|
118
|
|
|
|
|
296
|
|
159
|
117
|
|
|
|
|
469
|
return $self->{tokens}->[$self->{index}++]; |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
sub prevToken { |
163
|
26
|
|
|
26
|
|
93
|
my ($self) = @_; |
164
|
26
|
100
|
|
|
|
59
|
return undef if ($self->{index} <= 0); |
165
|
25
|
|
|
|
|
103
|
return $self->{tokens}->[--$self->{index}]; |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
sub currentToken { |
169
|
25
|
|
|
25
|
|
74
|
my ($self) = @_; |
170
|
25
|
|
|
|
|
78
|
return $self->{tokens}->[$self->{index} - 1]; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
sub lookAheadToken { |
174
|
41
|
|
|
41
|
|
26505
|
my ($self) = @_; |
175
|
40
|
|
|
|
|
175
|
return undef if ( $self->{index} <= 0 |
176
|
41
|
100
|
100
|
|
|
163
|
|| $self->{index} >= scalar @{$self->{tokens}}); |
177
|
38
|
|
|
|
|
196
|
return $self->{tokens}->[$self->{index}]; |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
sub collectTokensUntil { |
181
|
5
|
|
|
5
|
|
12
|
my ($self, $token_to_match) = @_; |
182
|
|
|
|
|
|
|
# if this matches our current token ... |
183
|
|
|
|
|
|
|
# then we just return nothing as there |
184
|
|
|
|
|
|
|
# is nothing to accumulate |
185
|
5
|
100
|
|
|
|
12
|
if ($self->lookAheadToken() eq $token_to_match) { |
186
|
|
|
|
|
|
|
# then just advance it one |
187
|
1
|
|
|
|
|
4
|
$self->nextToken(); |
188
|
|
|
|
|
|
|
# and return nothing |
189
|
1
|
|
|
|
|
5
|
return; |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# if it doesnt match our current token then, ... |
193
|
4
|
|
|
|
|
4
|
my @collection; |
194
|
|
|
|
|
|
|
# store the index we start at |
195
|
4
|
|
|
|
|
8
|
my $old_index = $self->{index}; |
196
|
4
|
|
|
|
|
4
|
my $matched; |
197
|
|
|
|
|
|
|
# loop through the tokens |
198
|
4
|
|
|
|
|
9
|
while ($self->hasNextToken()) { |
199
|
23
|
|
|
|
|
42
|
my $token = $self->nextToken(); |
200
|
23
|
100
|
|
|
|
48
|
if ($token ne $token_to_match) { |
201
|
20
|
|
|
|
|
52
|
push @collection => $token; |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
else { |
204
|
3
|
|
|
|
|
4
|
$matched++; |
205
|
3
|
|
|
|
|
6
|
last; |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
} |
208
|
4
|
100
|
|
|
|
10
|
unless ($matched) { |
209
|
|
|
|
|
|
|
# reset back to where we started, and ... |
210
|
1
|
|
|
|
|
3
|
$self->{index} = $old_index; |
211
|
|
|
|
|
|
|
# and return nothing |
212
|
1
|
|
|
|
|
7
|
return; |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
# and return our collection |
215
|
3
|
|
|
|
|
36
|
return @collection; |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
sub skipTokensUntil { |
220
|
3
|
|
|
3
|
|
7
|
my ($self, $token_to_match) = @_; |
221
|
|
|
|
|
|
|
# if this matches our current token ... |
222
|
3
|
100
|
|
|
|
9
|
if ($self->lookAheadToken() eq $token_to_match) { |
223
|
|
|
|
|
|
|
# then just advance it one |
224
|
1
|
|
|
|
|
4
|
$self->nextToken(); |
225
|
|
|
|
|
|
|
# and return success |
226
|
1
|
|
|
|
|
6
|
return 1; |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
# if it doesnt match our current token then, ... |
229
|
|
|
|
|
|
|
# store the index we start at |
230
|
2
|
|
|
|
|
3
|
my $old_index = $self->{index}; |
231
|
|
|
|
|
|
|
# and loop through the tokens |
232
|
2
|
|
|
|
|
7
|
while ($self->hasNextToken()) { |
233
|
|
|
|
|
|
|
# return success if we match our token |
234
|
18
|
100
|
|
|
|
31
|
return 1 if ($self->nextToken() eq $token_to_match); |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
# otherwise we didnt match, and should |
237
|
|
|
|
|
|
|
# reset back to where we started, and ... |
238
|
1
|
|
|
|
|
3
|
$self->{index} = $old_index; |
239
|
|
|
|
|
|
|
# return failure |
240
|
1
|
|
|
|
|
6
|
return 0; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
sub skipTokenIfWhitespace { |
244
|
4
|
|
|
4
|
|
6
|
my ($self) = @_; |
245
|
4
|
100
|
|
|
|
10
|
$self->{index}++ if $self->lookAheadToken() =~ /^\s+$/; |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
sub skipTokens { |
249
|
13
|
|
|
13
|
|
41
|
my ($self, $num_token_to_skip) = @_; |
250
|
13
|
|
100
|
|
|
52
|
$num_token_to_skip ||= 1; |
251
|
13
|
|
|
|
|
35
|
$self->{index} += $num_token_to_skip; |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
*skipToken = \&skipTokens; |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
1; |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
__END__ |