line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Pistachio::Tokenizer; |
2
|
|
|
|
|
|
|
# ABSTRACT: provides iterator(), which turns source code text into a Pistachio::Token iterator |
3
|
|
|
|
|
|
|
|
4
|
3
|
|
|
3
|
|
4026
|
use strict; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
114
|
|
5
|
3
|
|
|
3
|
|
15
|
use warnings; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
129
|
|
6
|
|
|
|
|
|
|
our $VERSION = '0.10'; # VERSION |
7
|
|
|
|
|
|
|
|
8
|
3
|
|
|
3
|
|
5659
|
use Module::Load; |
|
3
|
|
|
|
|
3574
|
|
|
3
|
|
|
|
|
17
|
|
9
|
3
|
|
|
3
|
|
169
|
use Carp 'croak'; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
242
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
use constant { |
12
|
3
|
|
|
|
|
3549
|
LNG => 0, |
13
|
|
|
|
|
|
|
IDX => 1, |
14
|
|
|
|
|
|
|
GOT => 2, |
15
|
|
|
|
|
|
|
MAX => 3, |
16
|
|
|
|
|
|
|
TOK => 4 |
17
|
3
|
|
|
3
|
|
15
|
}; |
|
3
|
|
|
|
|
5
|
|
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# @param string $type Object type. |
20
|
|
|
|
|
|
|
# @param Pistachio::Language $lang Language object. |
21
|
|
|
|
|
|
|
# @return Pistachio::Tokenizer |
22
|
|
|
|
|
|
|
sub new { |
23
|
4
|
|
|
4
|
0
|
14
|
my $type = shift; |
24
|
4
|
50
|
33
|
|
|
44
|
my $lang = ref $_[0] eq 'Pistachio::Language' && $_[0] |
25
|
|
|
|
|
|
|
or croak 'A Pistachio::Language is required'; |
26
|
4
|
|
|
|
|
29
|
bless [$lang], $type; |
27
|
|
|
|
|
|
|
} |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# @param Pistachio::Tokenizer $this |
30
|
|
|
|
|
|
|
# @param scalarref $text reference to text |
31
|
|
|
|
|
|
|
# @return coderef Pistachio::Token iterator |
32
|
|
|
|
|
|
|
sub iterator { |
33
|
4
|
|
|
4
|
0
|
1613
|
my ($this, $text) = @_; |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# initialize iterator data |
36
|
4
|
|
|
|
|
25
|
$this->[TOK] = $this->[LNG]->tokens($text); |
37
|
4
|
|
|
|
|
150
|
$this->[MAX] = scalar @{$this->[TOK]}; |
|
4
|
|
|
|
|
12
|
|
38
|
4
|
|
|
|
|
46
|
$this->[IDX] = 0; |
39
|
4
|
|
|
|
|
10
|
$this->[GOT] = 0; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# iterator closure |
42
|
|
|
|
|
|
|
sub { |
43
|
24
|
100
|
|
24
|
|
116
|
return undef if $this->_finished; |
44
|
23
|
|
|
|
|
53
|
my $token = $this->_transform($this->_curr); |
45
|
23
|
|
|
|
|
40
|
$this->[GOT]++; |
46
|
23
|
|
|
|
|
55
|
$this->_next; |
47
|
23
|
|
|
|
|
61
|
$token; |
48
|
4
|
|
|
|
|
26
|
}; |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# @param Pistachio::Tokenizer $this |
52
|
|
|
|
|
|
|
# @return int 1 if we're finished iterating, or 0 |
53
|
|
|
|
|
|
|
sub _finished { |
54
|
24
|
|
|
24
|
|
34
|
my $this = shift; |
55
|
24
|
100
|
|
|
|
106
|
$this->[MAX] - $this->[GOT] < 1 ? 1 : 0; |
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# @param Pistachio::Tokenizer $this |
59
|
|
|
|
|
|
|
# @return Pistachio::Token |
60
|
|
|
|
|
|
|
sub _curr { |
61
|
86
|
|
|
86
|
|
106
|
my $this = shift; |
62
|
86
|
|
|
|
|
263
|
$this->[TOK]->[$this->[IDX]]; |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# @param Pistachio::Tokenizer |
66
|
|
|
|
|
|
|
# @return int 1 if there is a previous element, or 0 |
67
|
60
|
100
|
|
60
|
|
219
|
sub _has_prev { shift->[IDX] > 0 ? 1 : 0 } |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# @param Pistachio::Tokenizer $this |
70
|
|
|
|
|
|
|
# @return int 1 if there is a next element, or 0 |
71
|
|
|
|
|
|
|
sub _has_next { |
72
|
41
|
|
|
41
|
|
52
|
my $this = shift; |
73
|
41
|
50
|
|
|
|
157
|
$this->[MAX] - $this->[IDX] > 0 ? 1 : 0; |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# @param Pistachio::Tokenizer $this |
77
|
|
|
|
|
|
|
# @return Pistachio::Token, or undef |
78
|
|
|
|
|
|
|
sub _prev { |
79
|
28
|
|
|
28
|
|
43
|
my $this = shift; |
80
|
28
|
50
|
|
|
|
45
|
return undef unless $this->_has_prev; |
81
|
28
|
|
|
|
|
42
|
$this->[IDX]--; |
82
|
28
|
|
|
|
|
59
|
$this->_curr; |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# @param Pistachio::Tokenizer $this |
86
|
|
|
|
|
|
|
# @return Pistachio::Token, or undef |
87
|
|
|
|
|
|
|
sub _next { |
88
|
35
|
|
|
35
|
|
49
|
my $this = shift; |
89
|
35
|
50
|
|
|
|
68
|
return undef unless $this->_has_next; |
90
|
35
|
|
|
|
|
52
|
$this->[IDX]++; |
91
|
35
|
|
|
|
|
71
|
$this->_curr; |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# @param Pistachio::Tokenizer $this |
95
|
|
|
|
|
|
|
# @param string $meth '_prev' or '_next' |
96
|
|
|
|
|
|
|
# @return Pistachio::Token, or undef |
97
|
|
|
|
|
|
|
sub _skip_whitespace { |
98
|
20
|
|
|
20
|
|
29
|
my ($this, $meth) = @_; |
99
|
20
|
100
|
|
|
|
43
|
while ($_ = $this->$meth) { return $_ if !$_->whitespace } |
|
40
|
|
|
|
|
117
|
|
100
|
0
|
|
|
|
|
0
|
undef; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# @param Pistachio::Tokenizer $this |
104
|
|
|
|
|
|
|
# @param Pistachio::Token $token |
105
|
|
|
|
|
|
|
# @return Pistachio::Token |
106
|
|
|
|
|
|
|
sub _transform { |
107
|
23
|
|
|
23
|
|
35
|
my ($this, $token) = @_; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# Some token types will get transformed into |
110
|
|
|
|
|
|
|
# more specific types by transformation rules. |
111
|
|
|
|
|
|
|
|
112
|
23
|
|
|
|
|
27
|
my $into; |
113
|
23
|
|
|
|
|
28
|
for my $rule (@{$this->[LNG]->transform_rules}) { |
|
23
|
|
|
|
|
76
|
|
114
|
391
|
100
|
|
|
|
1027
|
$token->match($rule->type, $rule->value) or next; |
115
|
|
|
|
|
|
|
|
116
|
39
|
100
|
|
|
|
117
|
$rule->prec and do { |
117
|
32
|
100
|
|
|
|
104
|
$this->_has_prev or next; |
118
|
12
|
100
|
|
|
|
35
|
$this->_juxtaposed($rule->prec, '_prev') or next; |
119
|
|
|
|
|
|
|
}; |
120
|
|
|
|
|
|
|
|
121
|
9
|
100
|
|
|
|
27
|
$rule->succ and do { |
122
|
6
|
50
|
|
|
|
14
|
$this->_has_next or next; |
123
|
6
|
100
|
|
|
|
18
|
$this->_juxtaposed($rule->succ, '_next') or next; |
124
|
|
|
|
|
|
|
}; |
125
|
|
|
|
|
|
|
|
126
|
5
|
|
|
|
|
16
|
$into = $rule->into; |
127
|
|
|
|
|
|
|
} |
128
|
23
|
100
|
|
|
|
530
|
$token->type($into) if $into; |
129
|
|
|
|
|
|
|
|
130
|
23
|
|
|
|
|
49
|
$token; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# @param Pistachio::Tokenizer $this |
134
|
|
|
|
|
|
|
# @param arrayref $neighbors (type, val) pairs that might either |
135
|
|
|
|
|
|
|
# precede or succeed the current |
136
|
|
|
|
|
|
|
# Pistachio::Token, depending on $meth |
137
|
|
|
|
|
|
|
# @param string $meth '_prev' or '_next' |
138
|
|
|
|
|
|
|
# @return int 1 if the current pair is juxtaposed |
139
|
|
|
|
|
|
|
# with the pairs from $neighbors, or 0 |
140
|
|
|
|
|
|
|
sub _juxtaposed { |
141
|
18
|
|
|
18
|
|
32
|
my ($this, $neighbors, $meth) = @_; |
142
|
|
|
|
|
|
|
|
143
|
18
|
|
|
|
|
29
|
my ($match, $idx) = (1, $this->[IDX]); |
144
|
|
|
|
|
|
|
|
145
|
18
|
|
|
|
|
32
|
for my $n (@$neighbors) { |
146
|
20
|
|
|
|
|
44
|
my $token = $this->_skip_whitespace($meth); |
147
|
20
|
|
|
8
|
|
84
|
my ($type, $val) = ($n->[0], sub {shift eq $n->[1]}); |
|
8
|
|
|
|
|
51
|
|
148
|
20
|
|
66
|
|
|
88
|
$match = $token && $token->match($type, $val); |
149
|
20
|
|
|
|
|
96
|
$this->[IDX] = $idx; |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
18
|
|
|
|
|
72
|
$match; |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
1; |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
__END__ |