|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Pistachio::Tokenizer;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ABSTRACT: provides iterator(), which turns source code text into a Pistachio::Token iterator  | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
4
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
4026
 | 
 use strict;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
114
 | 
    | 
| 
5
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
15
 | 
 use warnings;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
129
 | 
    | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION = '0.10'; # VERSION  | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
8
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
5659
 | 
 use Module::Load;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3574
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
    | 
| 
9
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
169
 | 
 use Carp 'croak';  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
242
 | 
    | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 use constant {  | 
| 
12
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3549
 | 
     LNG => 0,  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     IDX => 1,   | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     GOT => 2,   | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     MAX => 3,   | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     TOK => 4  | 
| 
17
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
15
 | 
 };  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @param string $type Object type.  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @param Pistachio::Language $lang Language object.   | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @return Pistachio::Tokenizer  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {  | 
| 
23
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
0
  
 | 
14
 | 
     my $type = shift;  | 
| 
24
 | 
4
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
44
 | 
     my $lang = ref $_[0] eq 'Pistachio::Language' && $_[0]  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                or croak 'A Pistachio::Language is required';  | 
| 
26
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
     bless [$lang], $type;  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @param Pistachio::Tokenizer $this  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @param scalarref $text    reference to text  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @return coderef    Pistachio::Token iterator  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub iterator {  | 
| 
33
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
0
  
 | 
1613
 | 
     my ($this, $text) = @_;  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # initialize iterator data  | 
| 
36
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
     $this->[TOK] = $this->[LNG]->tokens($text);  | 
| 
37
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
150
 | 
     $this->[MAX] = scalar @{$this->[TOK]};  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
    | 
| 
38
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
     $this->[IDX] = 0;  | 
| 
39
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     $this->[GOT] = 0;  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # iterator closure  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sub {  | 
| 
43
 | 
24
 | 
  
100
  
 | 
 
 | 
  
24
  
 | 
 
 | 
116
 | 
         return undef if $this->_finished;  | 
| 
44
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
53
 | 
         my $token = $this->_transform($this->_curr);  | 
| 
45
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
         $this->[GOT]++;  | 
| 
46
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
55
 | 
         $this->_next;  | 
| 
47
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
61
 | 
         $token;  | 
| 
48
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
     };  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @param Pistachio::Tokenizer $this  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @return int    1 if we're finished iterating, or 0  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _finished {   | 
| 
54
 | 
24
 | 
 
 | 
 
 | 
  
24
  
 | 
 
 | 
34
 | 
     my $this = shift;  | 
| 
55
 | 
24
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
106
 | 
     $this->[MAX] - $this->[GOT] < 1 ? 1 : 0;  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @param Pistachio::Tokenizer $this  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @return Pistachio::Token  | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _curr {   | 
| 
61
 | 
86
 | 
 
 | 
 
 | 
  
86
  
 | 
 
 | 
106
 | 
     my $this = shift;  | 
| 
62
 | 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
263
 | 
     $this->[TOK]->[$this->[IDX]];  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @param Pistachio::Tokenizer  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @return int    1 if there is a previous element, or 0  | 
| 
67
 | 
60
 | 
  
100
  
 | 
 
 | 
  
60
  
 | 
 
 | 
219
 | 
 sub _has_prev { shift->[IDX] > 0 ? 1 : 0 }  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @param Pistachio::Tokenizer $this  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @return int    1 if there is a next element, or 0  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _has_next {  | 
| 
72
 | 
41
 | 
 
 | 
 
 | 
  
41
  
 | 
 
 | 
52
 | 
     my $this = shift;  | 
| 
73
 | 
41
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
157
 | 
     $this->[MAX] - $this->[IDX] > 0 ? 1 : 0;  | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @param Pistachio::Tokenizer $this  | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @return Pistachio::Token, or undef  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _prev {  | 
| 
79
 | 
28
 | 
 
 | 
 
 | 
  
28
  
 | 
 
 | 
43
 | 
     my $this = shift;  | 
| 
80
 | 
28
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
45
 | 
     return undef unless $this->_has_prev;  | 
| 
81
 | 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
     $this->[IDX]--;  | 
| 
82
 | 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
59
 | 
     $this->_curr;  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @param Pistachio::Tokenizer $this  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @return Pistachio::Token, or undef  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _next {  | 
| 
88
 | 
35
 | 
 
 | 
 
 | 
  
35
  
 | 
 
 | 
49
 | 
     my $this = shift;  | 
| 
89
 | 
35
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
68
 | 
     return undef unless $this->_has_next;  | 
| 
90
 | 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52
 | 
     $this->[IDX]++;  | 
| 
91
 | 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
71
 | 
     $this->_curr;  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @param Pistachio::Tokenizer $this  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @param string $meth    '_prev' or '_next'  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @return Pistachio::Token, or undef  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _skip_whitespace {  | 
| 
98
 | 
20
 | 
 
 | 
 
 | 
  
20
  
 | 
 
 | 
29
 | 
     my ($this, $meth) = @_;  | 
| 
99
 | 
20
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
43
 | 
     while ($_ = $this->$meth) { return $_ if !$_->whitespace }  | 
| 
 
 | 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
117
 | 
    | 
| 
100
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     undef;  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @param Pistachio::Tokenizer $this  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @param Pistachio::Token $token  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @return Pistachio::Token  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _transform {  | 
| 
107
 | 
23
 | 
 
 | 
 
 | 
  
23
  
 | 
 
 | 
35
 | 
     my ($this, $token) = @_;  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Some token types will get transformed into   | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # more specific types by transformation rules.  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
112
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
     my $into;  | 
| 
113
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
     for my $rule (@{$this->[LNG]->transform_rules}) {  | 
| 
 
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
76
 | 
    | 
| 
114
 | 
391
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1027
 | 
         $token->match($rule->type, $rule->value) or next;  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
116
 | 
39
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
117
 | 
         $rule->prec and do {  | 
| 
117
 | 
32
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
104
 | 
            $this->_has_prev or next;  | 
| 
118
 | 
12
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
35
 | 
            $this->_juxtaposed($rule->prec, '_prev') or next;  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         };  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
121
 | 
9
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
27
 | 
         $rule->succ and do {  | 
| 
122
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
14
 | 
            $this->_has_next or next;  | 
| 
123
 | 
6
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
18
 | 
            $this->_juxtaposed($rule->succ, '_next') or next;  | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         };  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
126
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
         $into = $rule->into;  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
128
 | 
23
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
530
 | 
     $token->type($into) if $into;  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
130
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
49
 | 
     $token;  | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @param Pistachio::Tokenizer $this  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @param arrayref $neighbors    (type, val) pairs that might either  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #                               precede or succeed the current  | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #                               Pistachio::Token, depending on $meth  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @param string $meth    '_prev' or '_next'  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # @return int    1 if the current pair is juxtaposed  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #                with the pairs from $neighbors, or 0  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _juxtaposed {  | 
| 
141
 | 
18
 | 
 
 | 
 
 | 
  
18
  
 | 
 
 | 
32
 | 
     my ($this, $neighbors, $meth) = @_;  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
143
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
     my ($match, $idx) = (1, $this->[IDX]);  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
145
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
     for my $n (@$neighbors) {  | 
| 
146
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
         my $token = $this->_skip_whitespace($meth);  | 
| 
147
 | 
20
 | 
 
 | 
 
 | 
  
8
  
 | 
 
 | 
84
 | 
         my ($type, $val) = ($n->[0], sub {shift eq $n->[1]});  | 
| 
 
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
51
 | 
    | 
| 
148
 | 
20
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
88
 | 
         $match = $token && $token->match($type, $val);  | 
| 
149
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
96
 | 
         $this->[IDX] = $idx;  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
152
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
72
 | 
     $match;  | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  |