line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Lingua::LinkParser::MatchPath::Lex; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
15
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
31
|
|
4
|
1
|
|
|
1
|
|
1334
|
use Exporter::Lite; |
|
1
|
|
|
|
|
867
|
|
|
1
|
|
|
|
|
6
|
|
5
|
1
|
|
|
1
|
|
8919
|
use Lex; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
our @tokens = ( |
8
|
|
|
|
|
|
|
# word class |
9
|
|
|
|
|
|
|
POS => '_[pavding]_', |
10
|
|
|
|
|
|
|
WORD_REGEXP => '(?:/.+?/)[i]?', |
11
|
|
|
|
|
|
|
WORD => '\w+', |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# label class |
14
|
|
|
|
|
|
|
LABEL_REGEXP => '<\/.+?\/>', |
15
|
|
|
|
|
|
|
LABEL => '<.+?>', |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
POUND_SIGN => '#(?=[<(])', |
18
|
|
|
|
|
|
|
EXCLM_SIGN => '!(?=[<\w\/(])', |
19
|
|
|
|
|
|
|
AT_SIGN => '@(?=[<(])', |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
NEWLINE => '\n', |
22
|
|
|
|
|
|
|
EOR => ';', |
23
|
|
|
|
|
|
|
LPAREN => '[(]', |
24
|
|
|
|
|
|
|
RPAREN => '[)]', |
25
|
|
|
|
|
|
|
# QM => '[?]', |
26
|
|
|
|
|
|
|
# AND => '[&,]', |
27
|
|
|
|
|
|
|
OR => '[|]', |
28
|
|
|
|
|
|
|
PERCENT => '[%]', |
29
|
|
|
|
|
|
|
COMMENT => '^\s*#.+?$', |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
ERROR => '.+', |
32
|
|
|
|
|
|
|
); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub new { |
35
|
|
|
|
|
|
|
my $class = shift; |
36
|
|
|
|
|
|
|
my %opt = @_; |
37
|
|
|
|
|
|
|
bless { |
38
|
|
|
|
|
|
|
lexer => Lex->new(@tokens), |
39
|
|
|
|
|
|
|
debug => $opt{debug}, |
40
|
|
|
|
|
|
|
}, $class; |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
sub _get_tokens { |
44
|
|
|
|
|
|
|
my $self = shift; |
45
|
|
|
|
|
|
|
my $token; |
46
|
|
|
|
|
|
|
my ($name, $content); |
47
|
|
|
|
|
|
|
while($token = $self->{lexer}->nextToken){ |
48
|
|
|
|
|
|
|
($name, $content) = ($token->name(), $token->get); |
49
|
|
|
|
|
|
|
$name =~ s/.+:://; |
50
|
|
|
|
|
|
|
$content =~ s/\n$// if $name =~ /EOR$/; |
51
|
|
|
|
|
|
|
die "Error occurred during tokenizing text: ( $content )" if $name =~ /ERROR/; |
52
|
|
|
|
|
|
|
last unless $token->name =~ /(?:NEWLINE|COMMENT)$/; |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
if (not $self->{lexer}->eof) { |
55
|
|
|
|
|
|
|
[ $name, $content ]; |
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
# post-processing |
60
|
|
|
|
|
|
|
sub _pp_tokens { |
61
|
|
|
|
|
|
|
my $self = shift; |
62
|
|
|
|
|
|
|
my $token = $self->{token}; |
63
|
|
|
|
|
|
|
my @token; |
64
|
|
|
|
|
|
|
for ( my $i = 0; $i<@$token; ){ |
65
|
|
|
|
|
|
|
# one-step matching |
66
|
|
|
|
|
|
|
if( |
67
|
|
|
|
|
|
|
$token->[$i][0] =~ /^(?:POUND|EXCML|AT)_SIGN$/o && |
68
|
|
|
|
|
|
|
$token->[$i+1][0] =~ /^LABEL/o |
69
|
|
|
|
|
|
|
){ |
70
|
|
|
|
|
|
|
push |
71
|
|
|
|
|
|
|
@token, |
72
|
|
|
|
|
|
|
$token->[$i], |
73
|
|
|
|
|
|
|
[ 'LPAREN' => '(' ], |
74
|
|
|
|
|
|
|
$token->[$i+1], |
75
|
|
|
|
|
|
|
$token->[$i+2], |
76
|
|
|
|
|
|
|
[ 'RPAREN' => ')' ]; |
77
|
|
|
|
|
|
|
$i+=3; |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
# append '@' if there is none before '(' |
80
|
|
|
|
|
|
|
elsif( |
81
|
|
|
|
|
|
|
$token->[$i][0] eq 'LPAREN' && |
82
|
|
|
|
|
|
|
$token->[$i-1][0] =~ /^(?:WORD|POS)/ |
83
|
|
|
|
|
|
|
){ |
84
|
|
|
|
|
|
|
push |
85
|
|
|
|
|
|
|
@token, |
86
|
|
|
|
|
|
|
[ 'AT_SIGN' => '@' ], |
87
|
|
|
|
|
|
|
$token->[$i]; |
88
|
|
|
|
|
|
|
$i+=1; |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
else { |
92
|
|
|
|
|
|
|
push @token, $token->[$i]; |
93
|
|
|
|
|
|
|
$i++; |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
$self->{token} = \@token; |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub load { |
100
|
|
|
|
|
|
|
my $self = shift; |
101
|
|
|
|
|
|
|
$self->{lexer}->from(shift); |
102
|
|
|
|
|
|
|
while( my $t = $self->_get_tokens() ){ |
103
|
|
|
|
|
|
|
push @{$self->{token}}, $t; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
$self->_pp_tokens; |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
sub lex { |
110
|
|
|
|
|
|
|
my $self = shift; |
111
|
|
|
|
|
|
|
my $t = shift @{$self->{token}}; |
112
|
|
|
|
|
|
|
if( $t->[0] ){ |
113
|
|
|
|
|
|
|
printf (" - %-15s ==> %s\n", @{$t}[1,0]) if $self->{debug}; |
114
|
|
|
|
|
|
|
return @$t; |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
('', undef); |
117
|
|
|
|
|
|
|
} |