| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Parse::Highlife::Tokenizer; |
|
2
|
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
434
|
use Parse::Highlife::Utils qw(params offset_to_coordinate get_source_info extend_match); |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
78
|
|
|
4
|
1
|
|
|
1
|
|
864
|
use Parse::Highlife::Token::Regex; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
38
|
|
|
5
|
1
|
|
|
1
|
|
492
|
use Parse::Highlife::Token::Delimited; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
25
|
|
|
6
|
1
|
|
|
1
|
|
465
|
use Parse::Highlife::Token::Characters; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
27
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
5
|
use Data::Dump qw(dump); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
769
|
|
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
sub new |
|
11
|
|
|
|
|
|
|
{ |
|
12
|
0
|
|
|
0
|
0
|
|
my( $class, @args ) = @_; |
|
13
|
0
|
|
|
|
|
|
my $self = bless {}, $class; |
|
14
|
0
|
|
|
|
|
|
return $self -> _init( @args ); |
|
15
|
|
|
|
|
|
|
} |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub _init |
|
18
|
|
|
|
|
|
|
{ |
|
19
|
0
|
|
|
0
|
|
|
my( $self, @args ) = @_; |
|
20
|
0
|
|
|
|
|
|
$self->{'tokens'} = []; |
|
21
|
0
|
|
|
|
|
|
$self->{'tokennames'} = []; # to preserve order |
|
22
|
0
|
|
|
|
|
|
$self->{'debug'} = 1; |
|
23
|
0
|
|
|
|
|
|
return $self; |
|
24
|
|
|
|
|
|
|
} |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub get_token |
|
27
|
|
|
|
|
|
|
{ |
|
28
|
0
|
|
|
0
|
0
|
|
my( $self, $tokenname ) = @_; |
|
29
|
0
|
|
|
|
|
|
my $pos = -1; |
|
30
|
0
|
|
|
|
|
|
my $p = 0; |
|
31
|
0
|
|
|
|
|
|
for( my $p = 0; $p < scalar @{$self->{'tokennames'}}; $p++ ) { |
|
|
0
|
|
|
|
|
|
|
|
32
|
0
|
0
|
|
|
|
|
if( $self->{'tokennames'}->[$p] eq $tokenname ) { |
|
33
|
0
|
|
|
|
|
|
$pos = $p; |
|
34
|
0
|
|
|
|
|
|
last; |
|
35
|
|
|
|
|
|
|
} |
|
36
|
|
|
|
|
|
|
} |
|
37
|
0
|
0
|
|
|
|
|
die "ERR: I do not know about a token named '$tokenname'\n" |
|
38
|
|
|
|
|
|
|
if $pos == -1; |
|
39
|
0
|
|
|
|
|
|
return $self->{'tokens'}->[$pos]; |
|
40
|
|
|
|
|
|
|
} |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
sub token |
|
43
|
|
|
|
|
|
|
{ |
|
44
|
0
|
|
|
0
|
0
|
|
my( $self, $name, $regex, $start, $end, $escape, $characters ) |
|
45
|
|
|
|
|
|
|
= params( \@_, |
|
46
|
|
|
|
|
|
|
-name => '', |
|
47
|
|
|
|
|
|
|
-regex => '', |
|
48
|
|
|
|
|
|
|
-start => '', |
|
49
|
|
|
|
|
|
|
-end => '', |
|
50
|
|
|
|
|
|
|
-escape => "\\", |
|
51
|
|
|
|
|
|
|
-characters => '', |
|
52
|
|
|
|
|
|
|
); |
|
53
|
0
|
|
|
|
|
|
my @args = splice( @_, 1 ); |
|
54
|
|
|
|
|
|
|
|
|
55
|
0
|
0
|
|
|
|
|
die "ERR: token has no name.\n" unless length $name; |
|
56
|
|
|
|
|
|
|
|
|
57
|
0
|
|
|
|
|
|
my $token; |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# try to find a same token definition that can be reused |
|
60
|
0
|
|
|
|
|
|
my $already_defined = 0; |
|
61
|
0
|
|
|
|
|
|
foreach my $t ( @{$self->{'tokens'}} ) { |
|
|
0
|
|
|
|
|
|
|
|
62
|
0
|
0
|
0
|
|
|
|
if( |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
63
|
|
|
|
|
|
|
( length $regex && |
|
64
|
|
|
|
|
|
|
ref $t eq 'Parse::Highlife::Token::Regex' && |
|
65
|
|
|
|
|
|
|
$t->{'regex'} eq $regex ) |
|
66
|
|
|
|
|
|
|
|| |
|
67
|
|
|
|
|
|
|
( length $start && length $end && |
|
68
|
|
|
|
|
|
|
ref $t eq 'Parse::Highlife::Token::Delimited' && |
|
69
|
|
|
|
|
|
|
$t->{'start'} eq $start && |
|
70
|
|
|
|
|
|
|
$t->{'end'} eq $end ) |
|
71
|
|
|
|
|
|
|
|| |
|
72
|
|
|
|
|
|
|
( length $characters && |
|
73
|
|
|
|
|
|
|
ref $t eq 'Parse::Highlife::Token::Characters' && |
|
74
|
|
|
|
|
|
|
$t->{'characters'} eq $characters ) |
|
75
|
|
|
|
|
|
|
) |
|
76
|
|
|
|
|
|
|
{ |
|
77
|
0
|
|
|
|
|
|
$token = $t; |
|
78
|
0
|
|
|
|
|
|
$already_defined = 1; |
|
79
|
0
|
|
|
|
|
|
last; |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
} |
|
82
|
|
|
|
|
|
|
|
|
83
|
0
|
0
|
|
|
|
|
if( ! $already_defined ) { |
|
84
|
|
|
|
|
|
|
|
|
85
|
0
|
0
|
0
|
|
|
|
if( length $regex ) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
86
|
0
|
|
|
|
|
|
$token = Parse::Highlife::Token::Regex -> new( @args ); |
|
87
|
|
|
|
|
|
|
} |
|
88
|
|
|
|
|
|
|
elsif( length $start && length $end ) { |
|
89
|
0
|
|
|
|
|
|
$token = Parse::Highlife::Token::Delimited -> new( @args ); |
|
90
|
|
|
|
|
|
|
} |
|
91
|
|
|
|
|
|
|
elsif( length $characters ) { |
|
92
|
0
|
|
|
|
|
|
$token = Parse::Highlife::Token::Characters -> new( @args ); |
|
93
|
|
|
|
|
|
|
} |
|
94
|
|
|
|
|
|
|
else { |
|
95
|
0
|
|
|
|
|
|
die "ERR: incomplete token definition.\n"; |
|
96
|
|
|
|
|
|
|
} |
|
97
|
|
|
|
|
|
|
|
|
98
|
0
|
|
|
|
|
|
$token->{'name'} = $name; |
|
99
|
|
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
push @{$self->{'tokens'}}, $token; |
|
|
0
|
|
|
|
|
|
|
|
101
|
0
|
|
|
|
|
|
push @{$self->{'tokennames'}}, $name; |
|
|
0
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
} |
|
103
|
0
|
|
|
|
|
|
return $token; |
|
104
|
|
|
|
|
|
|
} |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub tokenize |
|
107
|
|
|
|
|
|
|
{ |
|
108
|
0
|
|
|
0
|
0
|
|
my( $self, $string ) = @_; |
|
109
|
0
|
|
|
|
|
|
my $tokens = []; |
|
110
|
|
|
|
|
|
|
|
|
111
|
0
|
|
|
|
|
|
my $i = 0; |
|
112
|
0
|
|
|
|
|
|
my $unknown_characters = ''; |
|
113
|
0
|
|
|
|
|
|
while( $i < length $string ) { |
|
114
|
|
|
|
|
|
|
# find the first matching token |
|
115
|
0
|
|
|
|
|
|
my $found = 0; |
|
116
|
0
|
|
|
|
|
|
my $match; |
|
117
|
0
|
|
|
|
|
|
for( my $t = 0; $t < @{$self->{'tokens'}}; $t++ ) { |
|
|
0
|
|
|
|
|
|
|
|
118
|
0
|
|
|
|
|
|
my $tokenname = $self->{'tokennames'}->[$t]; |
|
119
|
0
|
|
|
|
|
|
my $token = $self->{'tokens'}->[$t]; |
|
120
|
0
|
|
|
|
|
|
$match = $token -> match( $string, $i ); # returns 0 oder hash with info |
|
121
|
0
|
0
|
|
|
|
|
if( $match ) { |
|
122
|
0
|
|
|
|
|
|
$match->{'token-name'} = $tokenname; # only the Tokenizer knows this |
|
123
|
0
|
|
|
|
|
|
$match->{'is-ignored'} = $token -> is_ignored(); |
|
124
|
0
|
|
|
|
|
|
$i = $match->{'offset-after-match'}; |
|
125
|
0
|
|
|
|
|
|
$found = 1; |
|
126
|
0
|
|
|
|
|
|
last; |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
} |
|
129
|
0
|
0
|
|
|
|
|
if( $found ) { |
|
130
|
|
|
|
|
|
|
# save unknown token |
|
131
|
0
|
0
|
|
|
|
|
if( length $unknown_characters ) { |
|
132
|
0
|
|
|
|
|
|
my $unknown = |
|
133
|
|
|
|
|
|
|
extend_match( |
|
134
|
|
|
|
|
|
|
$string, |
|
135
|
|
|
|
|
|
|
{ |
|
136
|
|
|
|
|
|
|
'token-classname' => 'Parse::Highlife::Token::Unknown', |
|
137
|
|
|
|
|
|
|
'matched-substring' => $unknown_characters, |
|
138
|
|
|
|
|
|
|
'first-offset' => $i - length( $unknown_characters ), |
|
139
|
|
|
|
|
|
|
'token-name' => '', |
|
140
|
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
); |
|
142
|
0
|
|
|
|
|
|
$unknown->{'is-ignored'} = 1; # unknown tokens are ignored (good?) |
|
143
|
0
|
|
|
|
|
|
push @{$tokens}, $unknown; |
|
|
0
|
|
|
|
|
|
|
|
144
|
0
|
|
|
|
|
|
$unknown_characters = ''; |
|
145
|
|
|
|
|
|
|
} |
|
146
|
0
|
|
|
|
|
|
push @{$tokens}, $match; |
|
|
0
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
} |
|
148
|
|
|
|
|
|
|
else { |
|
149
|
0
|
|
|
|
|
|
$unknown_characters .= substr $string, $i, 1; |
|
150
|
0
|
|
|
|
|
|
$i ++; |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
#my( $line, $column ) = offset_to_coordinate( $string, $i ); |
|
153
|
|
|
|
|
|
|
#print "ERR: could not find a matching token at line $line, column $column:\n\n"; |
|
154
|
|
|
|
|
|
|
#print get_source_info( $string, $i ); |
|
155
|
|
|
|
|
|
|
#exit; |
|
156
|
|
|
|
|
|
|
} |
|
157
|
|
|
|
|
|
|
} |
|
158
|
0
|
|
|
|
|
|
return $tokens; |
|
159
|
|
|
|
|
|
|
} |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
1; |