line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# RDF::Trine::Parser::Turtle::Lexer |
2
|
|
|
|
|
|
|
# ----------------------------------------------------------------------------- |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
=head1 NAME |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
RDF::Trine::Parser::Turtle::Lexer - Tokenizer for parsing Turtle, TriG, and N-Triples |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
=head1 VERSION |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
This document describes RDF::Trine::Parser::Turtle::Lexer version 1.018 |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 SYNOPSIS |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
use RDF::Trine::Parser::Lexer; |
15
|
|
|
|
|
|
|
my $l = RDF::Trine::Parser::Lexer->new( file => $fh ); |
16
|
|
|
|
|
|
|
while (my $t = $l->get_token) { |
17
|
|
|
|
|
|
|
... |
18
|
|
|
|
|
|
|
} |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 METHODS |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=over 4 |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=cut |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
package RDF::Trine::Parser::Turtle::Lexer; |
27
|
|
|
|
|
|
|
|
28
|
68
|
|
|
68
|
|
440
|
use RDF::Trine::Parser::Turtle::Constants; |
|
68
|
|
|
|
|
156
|
|
|
68
|
|
|
|
|
6333
|
|
29
|
68
|
|
|
68
|
|
973
|
use 5.010; |
|
68
|
|
|
|
|
226
|
|
30
|
68
|
|
|
68
|
|
327
|
use strict; |
|
68
|
|
|
|
|
155
|
|
|
68
|
|
|
|
|
1217
|
|
31
|
68
|
|
|
68
|
|
319
|
use warnings; |
|
68
|
|
|
|
|
168
|
|
|
68
|
|
|
|
|
12477
|
|
32
|
68
|
|
|
68
|
|
32218
|
use Moose; |
|
68
|
|
|
|
|
28702906
|
|
|
68
|
|
|
|
|
482
|
|
33
|
68
|
|
|
68
|
|
470263
|
use Data::Dumper; |
|
68
|
|
|
|
|
405
|
|
|
68
|
|
|
|
|
3816
|
|
34
|
68
|
|
|
68
|
|
428
|
use RDF::Trine::Error; |
|
68
|
|
|
|
|
385
|
|
|
68
|
|
|
|
|
630
|
|
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
our $VERSION; |
37
|
|
|
|
|
|
|
BEGIN { |
38
|
68
|
|
|
68
|
|
120393
|
$VERSION = '1.018'; |
39
|
|
|
|
|
|
|
} |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
my $r_nameChar_extra = qr'[-0-9\x{B7}\x{0300}-\x{036F}\x{203F}-\x{2040}]'o; |
42
|
|
|
|
|
|
|
my $r_nameStartChar_minus_underscore = qr'[A-Za-z\x{00C0}-\x{00D6}\x{00D8}-\x{00F6}\x{00F8}-\x{02FF}\x{0370}-\x{037D}\x{037F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{00010000}-\x{000EFFFF}]'o; |
43
|
|
|
|
|
|
|
my $r_nameStartChar = qr/[A-Za-z_\x{00C0}-\x{00D6}\x{00D8}-\x{00F6}\x{00F8}-\x{02FF}\x{0370}-\x{037D}\x{037F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]/; |
44
|
|
|
|
|
|
|
my $r_nameChar = qr/${r_nameStartChar}|[-0-9\x{b7}\x{0300}-\x{036f}\x{203F}-\x{2040}]/; |
45
|
|
|
|
|
|
|
my $r_prefixName = qr/(?:(?!_)${r_nameStartChar})(?:$r_nameChar)*/; |
46
|
|
|
|
|
|
|
my $r_nameChar_test = qr"(?:$r_nameStartChar|$r_nameChar_extra)"; |
47
|
|
|
|
|
|
|
my $r_double = qr'[+-]?([0-9]+\.[0-9]*[eE][+-]?[0-9]+|\.[0-9]+[eE][+-]?[0-9]+|[0-9]+[eE][+-]?[0-9]+)'; |
48
|
|
|
|
|
|
|
my $r_decimal = qr'[+-]?(([0-9]+\.[0-9]+)|\.([0-9])+)'; |
49
|
|
|
|
|
|
|
my $r_integer = qr'[+-]?[0-9]+'; |
50
|
|
|
|
|
|
|
my $r_PN_CHARS_U = qr/[_A-Za-z_\x{00C0}-\x{00D6}\x{00D8}-\x{00F6}\x{00F8}-\x{02FF}\x{0370}-\x{037D}\x{037F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]/; |
51
|
|
|
|
|
|
|
my $r_PN_CHARS = qr"${r_PN_CHARS_U}|[-0-9\x{00B7}\x{0300}-\x{036F}\x{203F}-\x{2040}]"; |
52
|
|
|
|
|
|
|
my $r_bnode_id = qr"(?:${r_PN_CHARS_U}|[0-9])((${r_PN_CHARS}|[.])*${r_PN_CHARS})?"; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
my $r_PN_CHARS_BASE = qr/([A-Z]|[a-z]|[\x{00C0}-\x{00D6}]|[\x{00D8}-\x{00F6}]|[\x{00F8}-\x{02FF}]|[\x{0370}-\x{037D}]|[\x{037F}-\x{1FFF}]|[\x{200C}-\x{200D}]|[\x{2070}-\x{218F}]|[\x{2C00}-\x{2FEF}]|[\x{3001}-\x{D7FF}]|[\x{F900}-\x{FDCF}]|[\x{FDF0}-\x{FFFD}]|[\x{10000}-\x{EFFFF}])/; |
55
|
|
|
|
|
|
|
# my $r_PN_CHARS_U = qr/([_]|${r_PN_CHARS_BASE})/; |
56
|
|
|
|
|
|
|
# my $r_PN_CHARS = qr/${r_PN_CHARS_U}|-|[0-9]|\x{00B7}|[\x{0300}-\x{036F}]|[\x{203F}-\x{2040}]/; |
57
|
|
|
|
|
|
|
my $r_PN_PREFIX = qr/(${r_PN_CHARS_BASE}((${r_PN_CHARS}|[.])*${r_PN_CHARS})?)/; |
58
|
|
|
|
|
|
|
my $r_PN_LOCAL_ESCAPED = qr{(\\([-~.!&'()*+,;=/?#@%_\$]))|%[0-9A-Fa-f]{2}}; |
59
|
|
|
|
|
|
|
my $r_PN_LOCAL = qr/((${r_PN_CHARS_U}|[:0-9]|${r_PN_LOCAL_ESCAPED})((${r_PN_CHARS}|${r_PN_LOCAL_ESCAPED}|[:.])*(${r_PN_CHARS}|[:]|${r_PN_LOCAL_ESCAPED}))?)/; |
60
|
|
|
|
|
|
|
my $r_PN_LOCAL_BNODE = qr/((${r_PN_CHARS_U}|[0-9])((${r_PN_CHARS}|[.])*${r_PN_CHARS})?)/; |
61
|
|
|
|
|
|
|
my $r_PNAME_NS = qr/((${r_PN_PREFIX})?:)/; |
62
|
|
|
|
|
|
|
my $r_PNAME_LN = qr/(${r_PNAME_NS}${r_PN_LOCAL})/; |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
has file => ( |
65
|
|
|
|
|
|
|
is => 'ro', |
66
|
|
|
|
|
|
|
isa => 'FileHandle', |
67
|
|
|
|
|
|
|
required => 1, |
68
|
|
|
|
|
|
|
); |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
has linebuffer => ( |
71
|
|
|
|
|
|
|
is => 'rw', |
72
|
|
|
|
|
|
|
isa => 'Str', |
73
|
|
|
|
|
|
|
default => '', |
74
|
|
|
|
|
|
|
); |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
has line => ( |
77
|
|
|
|
|
|
|
is => 'rw', |
78
|
|
|
|
|
|
|
isa => 'Int', |
79
|
|
|
|
|
|
|
default => 1, |
80
|
|
|
|
|
|
|
); |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
has column => ( |
83
|
|
|
|
|
|
|
is => 'rw', |
84
|
|
|
|
|
|
|
isa => 'Int', |
85
|
|
|
|
|
|
|
default => 1, |
86
|
|
|
|
|
|
|
); |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
has buffer => ( |
89
|
|
|
|
|
|
|
is => 'rw', |
90
|
|
|
|
|
|
|
isa => 'Str', |
91
|
|
|
|
|
|
|
default => '', |
92
|
|
|
|
|
|
|
); |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
has start_column => ( |
95
|
|
|
|
|
|
|
is => 'rw', |
96
|
|
|
|
|
|
|
isa => 'Int', |
97
|
|
|
|
|
|
|
default => -1, |
98
|
|
|
|
|
|
|
); |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
has start_line => ( |
101
|
|
|
|
|
|
|
is => 'rw', |
102
|
|
|
|
|
|
|
isa => 'Int', |
103
|
|
|
|
|
|
|
default => -1, |
104
|
|
|
|
|
|
|
); |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub BUILDARGS { |
107
|
412
|
|
|
412
|
1
|
869
|
my $class = shift; |
108
|
412
|
50
|
|
|
|
1323
|
if (scalar(@_) == 1) { |
109
|
412
|
|
|
|
|
11468
|
return { file => shift }; |
110
|
|
|
|
|
|
|
} else { |
111
|
0
|
|
|
|
|
0
|
return $class->SUPER::BUILDARGS(@_); |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=item C<< new_token ( $type, @values ) >> |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
Returns a new token with the given type and optional values, capturing the |
118
|
|
|
|
|
|
|
current line and column of the input data. |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=cut |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub new_token { |
123
|
9482
|
|
|
9482
|
1
|
16164
|
my $self = shift; |
124
|
9482
|
|
|
|
|
16215
|
my $type = shift; |
125
|
9482
|
|
|
|
|
257420
|
my $start_line = $self->start_line; |
126
|
9482
|
|
|
|
|
236045
|
my $start_col = $self->start_column; |
127
|
9482
|
|
|
|
|
221919
|
my $line = $self->line; |
128
|
9482
|
|
|
|
|
224318
|
my $col = $self->column; |
129
|
9482
|
|
|
|
|
41069
|
return RDF::Trine::Parser::Turtle::Token->fast_constructor( |
130
|
|
|
|
|
|
|
$type, |
131
|
|
|
|
|
|
|
$start_line, |
132
|
|
|
|
|
|
|
$start_col, |
133
|
|
|
|
|
|
|
$line, |
134
|
|
|
|
|
|
|
$col, |
135
|
|
|
|
|
|
|
\@_, |
136
|
|
|
|
|
|
|
); |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
my %CHAR_TOKEN = ( |
140
|
|
|
|
|
|
|
'.' => DOT, |
141
|
|
|
|
|
|
|
';' => SEMICOLON, |
142
|
|
|
|
|
|
|
'[' => LBRACKET, |
143
|
|
|
|
|
|
|
']' => RBRACKET, |
144
|
|
|
|
|
|
|
'(' => LPAREN, |
145
|
|
|
|
|
|
|
')' => RPAREN, |
146
|
|
|
|
|
|
|
'{' => LBRACE, |
147
|
|
|
|
|
|
|
'}' => RBRACE, |
148
|
|
|
|
|
|
|
',' => COMMA, |
149
|
|
|
|
|
|
|
'=' => EQUALS, |
150
|
|
|
|
|
|
|
); |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
my %METHOD_TOKEN = ( |
153
|
|
|
|
|
|
|
# q[#] => '_get_comment', |
154
|
|
|
|
|
|
|
q[@] => '_get_keyword', |
155
|
|
|
|
|
|
|
q[<] => '_get_iriref', |
156
|
|
|
|
|
|
|
q[_] => '_get_bnode', |
157
|
|
|
|
|
|
|
q['] => '_get_single_literal', |
158
|
|
|
|
|
|
|
q["] => '_get_double_literal', |
159
|
|
|
|
|
|
|
q[:] => '_get_pname', |
160
|
|
|
|
|
|
|
(map {$_ => '_get_number'} (0 .. 9, '-', '+')) |
161
|
|
|
|
|
|
|
); |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=item C<< get_token >> |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
Returns the next token present in the input. |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
=cut |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
sub get_token { |
170
|
9620
|
|
|
9620
|
1
|
15357
|
my $self = shift; |
171
|
9620
|
|
|
|
|
14766
|
while (1) { |
172
|
18827
|
100
|
|
|
|
49559
|
unless (length($self->{buffer})) { |
173
|
516
|
|
|
|
|
1392
|
$self->fill_buffer; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
# warn "getting token with buffer: " . Dumper($self->{buffer}); |
176
|
18827
|
|
|
|
|
40430
|
my $c = $self->_peek_char(); |
177
|
18827
|
100
|
66
|
|
|
78803
|
return unless (defined($c) and length($c)); |
178
|
|
|
|
|
|
|
|
179
|
18526
|
|
|
|
|
500320
|
$self->start_column( $self->column ); |
180
|
18526
|
|
|
|
|
447738
|
$self->start_line( $self->line ); |
181
|
|
|
|
|
|
|
|
182
|
18526
|
100
|
100
|
|
|
57434
|
if ($c eq '.' and $self->{buffer} =~ $r_decimal) { |
183
|
1
|
|
|
|
|
5
|
return $self->_get_number(); |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
|
186
|
18525
|
100
|
|
|
|
87213
|
if (defined(my $name = $CHAR_TOKEN{$c})) { $self->_get_char; return $self->new_token($name); } |
|
2953
|
100
|
|
|
|
8499
|
|
|
2953
|
100
|
|
|
|
7613
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
187
|
3672
|
|
|
|
|
13827
|
elsif (defined(my $method = $METHOD_TOKEN{$c})) { return $self->$method() } |
188
|
|
|
|
|
|
|
elsif ($c eq '#') { |
189
|
|
|
|
|
|
|
# we're ignoring comment tokens, but we could return them here instead of falling through to the 'next': |
190
|
196
|
|
|
|
|
697
|
$self->_get_comment(); |
191
|
196
|
|
|
|
|
5323
|
next; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
elsif ($c =~ /[ \r\n\t]/) { |
194
|
9011
|
|
66
|
|
|
57176
|
while (defined($c) and length($c) and $c =~ /[\t\r\n ]/) { |
|
|
|
100
|
|
|
|
|
195
|
18407
|
|
|
|
|
46829
|
$self->_get_char; |
196
|
18407
|
|
|
|
|
33858
|
$c = $self->_peek_char; |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
# we're ignoring whitespace tokens, but we could return them here instead of falling through to the 'next': |
200
|
|
|
|
|
|
|
# return $self->new_token(WS); |
201
|
9011
|
|
|
|
|
18283
|
next; |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
elsif ($c =~ /[A-Za-z\x{00C0}-\x{00D6}\x{00D8}-\x{00F6}\x{00F8}-\x{02FF}\x{0370}-\x{037D}\x{037F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]/) { |
204
|
2649
|
100
|
|
|
|
16937
|
if ($self->{buffer} =~ /^a(?!:)\s/) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
205
|
60
|
|
|
|
|
203
|
$self->_get_char; |
206
|
60
|
|
|
|
|
187
|
return $self->new_token(A); |
207
|
|
|
|
|
|
|
} elsif ($self->{buffer} =~ /^(?:true|false)(?!:)\b/) { |
208
|
10
|
|
|
|
|
49
|
my $bool = $self->_read_length($+[0]); |
209
|
10
|
|
|
|
|
43
|
return $self->new_token(BOOLEAN, $bool); |
210
|
|
|
|
|
|
|
} elsif ($self->{buffer} =~ /^BASE(?!:)\b/i) { |
211
|
4
|
|
|
|
|
22
|
$self->_read_length(4); |
212
|
4
|
|
|
|
|
16
|
return $self->new_token(SPARQLBASE); |
213
|
|
|
|
|
|
|
} elsif ($self->{buffer} =~ /^PREFIX(?!:)\b/i) { |
214
|
3
|
|
|
|
|
16
|
$self->_read_length(6); |
215
|
3
|
|
|
|
|
15
|
return $self->new_token(SPARQLPREFIX); |
216
|
|
|
|
|
|
|
} else { |
217
|
2572
|
|
|
|
|
7579
|
return $self->_get_pname; |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
} |
220
|
38
|
|
|
|
|
146
|
elsif ($c eq '^') { $self->_read_word('^^'); return $self->new_token(HATHAT); } |
|
37
|
|
|
|
|
115
|
|
221
|
|
|
|
|
|
|
else { |
222
|
|
|
|
|
|
|
# Carp::cluck sprintf("Unexpected byte '$c' (0x%02x)", ord($c)); |
223
|
6
|
|
|
|
|
41
|
return $self->_throw_error(sprintf("Unexpected byte '%s' (0x%02x)", $c, ord($c))); |
224
|
|
|
|
|
|
|
} |
225
|
0
|
|
|
|
|
0
|
warn 'byte: ' . Dumper($c); |
226
|
|
|
|
|
|
|
} |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
=begin private |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=cut |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=item C<< fill_buffer >> |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
Fills the internal parse buffer with a new line from the input source. |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
=cut |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
sub fill_buffer { |
240
|
4590
|
|
|
4590
|
1
|
7026
|
my $self = shift; |
241
|
4590
|
50
|
|
|
|
122495
|
unless (length($self->buffer)) { |
242
|
4590
|
|
|
|
|
111133
|
my $line = $self->file->getline; |
243
|
4590
|
100
|
|
|
|
114401
|
if (defined($line)) { |
244
|
3698
|
|
|
|
|
10073
|
$self->{buffer} .= $line; |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
=item C<< check_for_bom >> |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
Checks the input buffer for a Unicode BOM, and consumes it if it is present. |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=cut |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
sub check_for_bom { |
256
|
387
|
|
|
387
|
1
|
746
|
my $self = shift; |
257
|
387
|
|
|
|
|
1207
|
my $c = $self->_peek_char(); |
258
|
387
|
100
|
100
|
|
|
2668
|
if (defined($c) and $c eq "\x{FEFF}") { |
259
|
1
|
|
|
|
|
4
|
$self->_get_char; |
260
|
|
|
|
|
|
|
} |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
sub _get_char_safe { |
264
|
5888
|
|
|
5888
|
|
9434
|
my $self = shift; |
265
|
5888
|
|
|
|
|
9209
|
my $char = shift; |
266
|
5888
|
|
|
|
|
11781
|
my $c = $self->_get_char; |
267
|
5888
|
50
|
|
|
|
14091
|
if ($c ne $char) { |
268
|
0
|
|
|
|
|
0
|
$self->_throw_error("Expected '$char' but got '$c'"); |
269
|
|
|
|
|
|
|
} |
270
|
5888
|
|
|
|
|
9712
|
return $c; |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
sub _get_char_fill_buffer { |
274
|
26
|
|
|
26
|
|
44
|
my $self = shift; |
275
|
26
|
50
|
|
|
|
74
|
if (length($self->{buffer}) == 0) { |
276
|
0
|
|
|
|
|
0
|
$self->fill_buffer; |
277
|
0
|
0
|
|
|
|
0
|
if (length($self->{buffer}) == 0) { |
278
|
0
|
|
|
|
|
0
|
return; |
279
|
|
|
|
|
|
|
} |
280
|
|
|
|
|
|
|
} |
281
|
26
|
|
|
|
|
72
|
my $c = substr($self->{buffer}, 0, 1, ''); |
282
|
26
|
50
|
|
|
|
62
|
if ($c eq "\n") { |
283
|
|
|
|
|
|
|
# $self->{linebuffer} = ''; |
284
|
0
|
|
|
|
|
0
|
$self->{line} = 1+$self->{line}; |
285
|
0
|
|
|
|
|
0
|
$self->{column} = 1; |
286
|
|
|
|
|
|
|
} else { |
287
|
|
|
|
|
|
|
# $self->{linebuffer} .= $c; |
288
|
26
|
|
|
|
|
49
|
$self->{column} = 1+$self->{column}; |
289
|
|
|
|
|
|
|
} |
290
|
26
|
|
|
|
|
49
|
return $c; |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
sub _get_char { |
294
|
33052
|
|
|
33052
|
|
48907
|
my $self = shift; |
295
|
33052
|
|
|
|
|
83739
|
my $c = substr($self->{buffer}, 0, 1, ''); |
296
|
33052
|
100
|
|
|
|
67754
|
if ($c eq "\n") { |
297
|
|
|
|
|
|
|
# $self->{linebuffer} = ''; |
298
|
3555
|
|
|
|
|
6856
|
$self->{line} = 1+$self->{line}; |
299
|
3555
|
|
|
|
|
5589
|
$self->{column} = 1; |
300
|
|
|
|
|
|
|
} else { |
301
|
|
|
|
|
|
|
# $self->{linebuffer} .= $c; |
302
|
29497
|
|
|
|
|
49954
|
$self->{column} = 1+$self->{column}; |
303
|
|
|
|
|
|
|
} |
304
|
33052
|
|
|
|
|
56848
|
return $c; |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
sub _peek_char { |
308
|
47760
|
|
|
47760
|
|
70117
|
my $self = shift; |
309
|
47760
|
100
|
|
|
|
110593
|
if (length($self->{buffer}) == 0) { |
310
|
4047
|
|
|
|
|
10886
|
$self->fill_buffer; |
311
|
4047
|
100
|
|
|
|
14168
|
if (length($self->{buffer}) == 0) { |
312
|
587
|
|
|
|
|
1839
|
return; |
313
|
|
|
|
|
|
|
} |
314
|
|
|
|
|
|
|
} |
315
|
47173
|
|
|
|
|
84402
|
my $c = substr($self->{buffer}, 0, 1); |
316
|
47173
|
|
|
|
|
185426
|
return $c; |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
sub _read_word { |
320
|
507
|
|
|
507
|
|
855
|
my $self = shift; |
321
|
507
|
|
|
|
|
898
|
my $word = shift; |
322
|
507
|
|
|
|
|
1678
|
while (length($self->{buffer}) < length($word)) { |
323
|
0
|
|
|
|
|
0
|
$self->fill_buffer; |
324
|
|
|
|
|
|
|
} |
325
|
|
|
|
|
|
|
|
326
|
507
|
100
|
|
|
|
1649
|
if (substr($self->{buffer}, 0, length($word)) ne $word) { |
327
|
1
|
|
|
|
|
6
|
$self->_throw_error("Expected '$word'"); |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
|
330
|
506
|
|
|
|
|
1186
|
my $lines = ($word =~ tr/\n//); |
331
|
506
|
|
|
|
|
1373
|
my $lastnl = rindex($word, "\n"); |
332
|
506
|
|
|
|
|
992
|
my $cols = length($word) - $lastnl - 1; |
333
|
506
|
|
|
|
|
1230
|
$self->{lines} += $lines; |
334
|
506
|
50
|
|
|
|
1175
|
if ($lines) { |
335
|
0
|
|
|
|
|
0
|
$self->{column} = $cols; |
336
|
|
|
|
|
|
|
} else { |
337
|
506
|
|
|
|
|
917
|
$self->{column} += $cols; |
338
|
|
|
|
|
|
|
} |
339
|
506
|
|
|
|
|
1519
|
substr($self->{buffer}, 0, length($word), ''); |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
sub _read_length { |
343
|
6029
|
|
|
6029
|
|
10249
|
my $self = shift; |
344
|
6029
|
|
|
|
|
11821
|
my $len = shift; |
345
|
6029
|
|
|
|
|
17341
|
while (length($self->{buffer}) < $len) { |
346
|
0
|
|
|
|
|
0
|
$self->fill_buffer; |
347
|
|
|
|
|
|
|
} |
348
|
|
|
|
|
|
|
|
349
|
6029
|
|
|
|
|
18268
|
my $word = substr($self->{buffer}, 0, $len, ''); |
350
|
6029
|
|
|
|
|
13075
|
my $lines = ($word =~ tr/\n//); |
351
|
6029
|
|
|
|
|
11980
|
my $lastnl = rindex($word, "\n"); |
352
|
6029
|
|
|
|
|
11149
|
my $cols = length($word) - $lastnl - 1; |
353
|
6029
|
|
|
|
|
11498
|
$self->{lines} += $lines; |
354
|
6029
|
100
|
|
|
|
12455
|
if ($lines) { |
355
|
31
|
|
|
|
|
58
|
$self->{column} = $cols; |
356
|
|
|
|
|
|
|
} else { |
357
|
5998
|
|
|
|
|
10064
|
$self->{column} += $cols; |
358
|
|
|
|
|
|
|
} |
359
|
6029
|
|
|
|
|
17251
|
return $word; |
360
|
|
|
|
|
|
|
} |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
sub _get_pname { |
363
|
3119
|
|
|
3119
|
|
4968
|
my $self = shift; |
364
|
3119
|
|
|
|
|
5215
|
my $prefix = ''; |
365
|
|
|
|
|
|
|
|
366
|
3119
|
100
|
|
|
|
39991
|
if ($self->{buffer} =~ /^$r_PNAME_LN/) { |
367
|
2839
|
|
|
|
|
10908
|
my $ln = $self->_read_length($+[0]); |
368
|
2839
|
|
|
|
|
13920
|
my ($ns,$local) = ($ln =~ /^([^:]*:)(.*)$/); |
369
|
68
|
|
|
68
|
|
640
|
no warnings 'uninitialized'; |
|
68
|
|
|
|
|
185
|
|
|
68
|
|
|
|
|
154519
|
|
370
|
2839
|
|
|
|
|
7221
|
$local =~ s{\\([-~.!&'()*+,;=:/?#@%_\$])}{$1}g; |
371
|
2839
|
|
|
|
|
7357
|
return $self->new_token(PREFIXNAME, $ns, $local); |
372
|
|
|
|
|
|
|
} else { |
373
|
280
|
|
|
|
|
2100
|
$self->{buffer} =~ $r_PNAME_NS; |
374
|
280
|
|
|
|
|
1150
|
my $ns = $self->_read_length($+[0]); |
375
|
280
|
|
|
|
|
952
|
return $self->new_token(PREFIXNAME, $ns); |
376
|
|
|
|
|
|
|
} |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
sub _get_iriref { |
380
|
1876
|
|
|
1876
|
|
3694
|
my $self = shift; |
381
|
1876
|
|
|
|
|
5482
|
$self->_get_char_safe(q[<]); |
382
|
1876
|
|
|
|
|
3300
|
my $iri = ''; |
383
|
1876
|
|
|
|
|
3066
|
while (1) { |
384
|
3755
|
|
|
|
|
7876
|
my $c = $self->_peek_char; |
385
|
3755
|
50
|
|
|
|
9391
|
last unless defined($c); |
386
|
3755
|
100
|
|
|
|
18247
|
if (substr($self->{buffer}, 0, 1) eq '\\') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
387
|
11
|
|
|
|
|
36
|
$self->_get_char_safe('\\'); |
388
|
11
|
|
|
|
|
25
|
my $esc = $self->_get_char; |
389
|
11
|
50
|
|
|
|
56
|
if ($esc eq '\\') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
390
|
0
|
|
|
|
|
0
|
$iri .= "\\"; |
391
|
|
|
|
|
|
|
} elsif ($esc eq 'U') { |
392
|
3
|
|
|
|
|
9
|
my $codepoint = $self->_read_length(8); |
393
|
3
|
100
|
|
|
|
20
|
unless ($codepoint =~ /^[0-9A-Fa-f]+$/) { |
394
|
1
|
|
|
|
|
4
|
$self->_throw_error("Bad unicode escape codepoint '$codepoint'"); |
395
|
|
|
|
|
|
|
} |
396
|
2
|
|
|
|
|
12
|
$iri .= chr(hex($codepoint)); |
397
|
|
|
|
|
|
|
} elsif ($esc eq 'u') { |
398
|
6
|
|
|
|
|
22
|
my $codepoint = $self->_read_length(4); |
399
|
6
|
100
|
|
|
|
40
|
unless ($codepoint =~ /^[0-9A-Fa-f]+$/) { |
400
|
1
|
|
|
|
|
6
|
$self->_throw_error("Bad unicode escape codepoint '$codepoint'"); |
401
|
|
|
|
|
|
|
} |
402
|
5
|
|
|
|
|
27
|
my $char = chr(hex($codepoint)); |
403
|
5
|
100
|
|
|
|
24
|
if ($char =~ /[<>" {}|\\^`]/) { |
404
|
3
|
|
|
|
|
30
|
$self->_throw_error(sprintf("Bad IRI character: '%s' (0x%x)", $char, ord($char))); |
405
|
|
|
|
|
|
|
} |
406
|
2
|
|
|
|
|
5
|
$iri .= $char; |
407
|
|
|
|
|
|
|
} else { |
408
|
2
|
|
|
|
|
12
|
$self->_throw_error("Unrecognized iri escape '$esc'"); |
409
|
|
|
|
|
|
|
} |
410
|
|
|
|
|
|
|
} elsif ($self->{buffer} =~ /^[^<>\x00-\x20\\"{}|^`]+/) { |
411
|
1875
|
|
|
|
|
6702
|
$iri .= $self->_read_length($+[0]); |
412
|
|
|
|
|
|
|
} elsif (substr($self->{buffer}, 0, 1) eq '>') { |
413
|
1865
|
|
|
|
|
3806
|
last; |
414
|
|
|
|
|
|
|
} else { |
415
|
4
|
|
|
|
|
25
|
$self->_throw_error("Got '$c' while expecting IRI character"); |
416
|
|
|
|
|
|
|
} |
417
|
|
|
|
|
|
|
} |
418
|
1865
|
|
|
|
|
4918
|
$self->_get_char_safe(q[>]); |
419
|
1865
|
|
|
|
|
4793
|
return $self->new_token(IRI, $iri); |
420
|
|
|
|
|
|
|
} |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
sub _get_bnode { |
423
|
83
|
|
|
83
|
|
183
|
my $self = shift; |
424
|
83
|
|
|
|
|
275
|
$self->_read_word('_:'); |
425
|
83
|
50
|
|
|
|
1156
|
unless ($self->{buffer} =~ /^${r_bnode_id}/o) { |
426
|
0
|
|
|
|
|
0
|
$self->_throw_error("Expected: name"); |
427
|
|
|
|
|
|
|
} |
428
|
83
|
|
|
|
|
379
|
my $name = substr($self->{buffer}, 0, $+[0]); |
429
|
83
|
|
|
|
|
292
|
$self->_read_word($name); |
430
|
83
|
|
|
|
|
244
|
return $self->new_token(BNODE, $name); |
431
|
|
|
|
|
|
|
} |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
sub _get_number { |
434
|
81
|
|
|
81
|
|
158
|
my $self = shift; |
435
|
81
|
100
|
|
|
|
1429
|
if ($self->{buffer} =~ /^${r_double}/) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
436
|
9
|
|
|
|
|
46
|
return $self->new_token(DOUBLE, $self->_read_length($+[0])); |
437
|
|
|
|
|
|
|
} elsif ($self->{buffer} =~ /^${r_decimal}/) { |
438
|
11
|
|
|
|
|
56
|
return $self->new_token(DECIMAL, $self->_read_length($+[0])); |
439
|
|
|
|
|
|
|
} elsif ($self->{buffer} =~ /^${r_integer}/) { |
440
|
59
|
|
|
|
|
287
|
return $self->new_token(INTEGER, $self->_read_length($+[0])); |
441
|
|
|
|
|
|
|
} else { |
442
|
2
|
|
|
|
|
9
|
$self->_throw_error("Expected number"); |
443
|
|
|
|
|
|
|
} |
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
sub _get_comment { |
447
|
196
|
|
|
196
|
|
383
|
my $self = shift; |
448
|
196
|
|
|
|
|
558
|
$self->_get_char_safe('#'); |
449
|
196
|
|
|
|
|
335
|
my $comment = ''; |
450
|
196
|
|
|
|
|
460
|
my $c = $self->_peek_char; |
451
|
196
|
|
66
|
|
|
1044
|
while (length($c) and $c !~ /[\r\n]/) { |
452
|
5287
|
|
|
|
|
9683
|
$comment .= $self->_get_char; |
453
|
5287
|
|
|
|
|
9875
|
$c = $self->_peek_char; |
454
|
|
|
|
|
|
|
} |
455
|
196
|
50
|
33
|
|
|
999
|
if (length($c) and $c =~ /[\r\n]/) { |
456
|
196
|
|
|
|
|
460
|
$self->_get_char; |
457
|
|
|
|
|
|
|
} |
458
|
196
|
|
|
|
|
574
|
return $self->new_token(COMMENT, $comment); |
459
|
|
|
|
|
|
|
} |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
sub _get_double_literal { |
462
|
765
|
|
|
765
|
|
1447
|
my $self = shift; |
463
|
765
|
|
|
|
|
1918
|
my $c = $self->_peek_char(); |
464
|
765
|
|
|
|
|
2463
|
$self->_get_char_safe(q["]); |
465
|
765
|
100
|
|
|
|
2776
|
if (substr($self->{buffer}, 0, 2) eq q[""]) { |
466
|
|
|
|
|
|
|
# #x22 #x22 #x22 lcharacter* #x22 #x22 #x22 |
467
|
29
|
|
|
|
|
91
|
$self->_read_word(q[""]); |
468
|
|
|
|
|
|
|
|
469
|
29
|
|
|
|
|
68
|
my $quote_count = 0; |
470
|
29
|
|
|
|
|
57
|
my $string = ''; |
471
|
29
|
|
|
|
|
52
|
while (1) { |
472
|
184
|
100
|
|
|
|
462
|
if (length($self->{buffer}) == 0) { |
473
|
23
|
|
|
|
|
58
|
$self->fill_buffer; |
474
|
23
|
100
|
|
|
|
78
|
if (length($self->{buffer}) == 0) { |
475
|
3
|
|
|
|
|
12
|
$self->_throw_error("Found EOF in string literal"); |
476
|
|
|
|
|
|
|
} |
477
|
|
|
|
|
|
|
} |
478
|
181
|
100
|
|
|
|
394
|
if (substr($self->{buffer}, 0, 1) eq '"') { |
479
|
88
|
|
|
|
|
166
|
my $c = $self->_get_char; |
480
|
88
|
|
|
|
|
133
|
$quote_count++; |
481
|
88
|
100
|
|
|
|
202
|
if ($quote_count == 3) { |
482
|
26
|
|
|
|
|
53
|
last; |
483
|
|
|
|
|
|
|
} |
484
|
|
|
|
|
|
|
} else { |
485
|
93
|
100
|
|
|
|
204
|
if ($quote_count) { |
486
|
8
|
|
|
|
|
31
|
$string .= '"' foreach (1..$quote_count); |
487
|
8
|
|
|
|
|
16
|
$quote_count = 0; |
488
|
|
|
|
|
|
|
} |
489
|
93
|
100
|
|
|
|
232
|
if (substr($self->{buffer}, 0, 1) eq '\\') { |
490
|
26
|
|
|
|
|
54
|
my $c = $self->_get_char; |
491
|
|
|
|
|
|
|
# $self->_get_char_safe('\\'); |
492
|
26
|
|
|
|
|
54
|
my $esc = $self->_get_char_fill_buffer; |
493
|
26
|
100
|
|
|
|
147
|
if ($esc eq '\\'){ $string .= "\\" } |
|
1
|
100
|
|
|
|
4
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
494
|
5
|
|
|
|
|
12
|
elsif ($esc eq '"'){ $string .= '"' } |
495
|
0
|
|
|
|
|
0
|
elsif ($esc eq "'"){ $string .= "'" } |
496
|
4
|
|
|
|
|
10
|
elsif ($esc eq 'r'){ $string .= "\r" } |
497
|
4
|
|
|
|
|
10
|
elsif ($esc eq 't'){ $string .= "\t" } |
498
|
4
|
|
|
|
|
11
|
elsif ($esc eq 'n'){ $string .= "\n" } |
499
|
0
|
|
|
|
|
0
|
elsif ($esc eq 'b'){ $string .= "\b" } |
500
|
0
|
|
|
|
|
0
|
elsif ($esc eq 'f'){ $string .= "\f" } |
501
|
0
|
|
|
|
|
0
|
elsif ($esc eq '>'){ $string .= ">" } |
502
|
|
|
|
|
|
|
elsif ($esc eq 'U'){ |
503
|
4
|
|
|
|
|
11
|
my $codepoint = $self->_read_length(8); |
504
|
4
|
50
|
|
|
|
20
|
unless ($codepoint =~ /^[0-9A-Fa-f]+$/) { |
505
|
0
|
|
|
|
|
0
|
$self->_throw_error("Bad unicode escape codepoint '$codepoint'"); |
506
|
|
|
|
|
|
|
} |
507
|
4
|
|
|
|
|
15
|
$string .= chr(hex($codepoint)); |
508
|
|
|
|
|
|
|
} |
509
|
|
|
|
|
|
|
elsif ($esc eq 'u'){ |
510
|
4
|
|
|
|
|
11
|
my $codepoint = $self->_read_length(4); |
511
|
4
|
50
|
|
|
|
20
|
unless ($codepoint =~ /^[0-9A-Fa-f]+$/) { |
512
|
0
|
|
|
|
|
0
|
$self->_throw_error("Bad unicode escape codepoint '$codepoint'"); |
513
|
|
|
|
|
|
|
} |
514
|
4
|
|
|
|
|
13
|
$string .= chr(hex($codepoint)); |
515
|
|
|
|
|
|
|
} |
516
|
|
|
|
|
|
|
else { |
517
|
0
|
|
|
|
|
0
|
$self->_throw_error("Unrecognized string escape '$esc'"); |
518
|
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
|
} else { |
520
|
67
|
|
|
|
|
195
|
$self->{buffer} =~ /^[^"\\]+/; |
521
|
67
|
|
|
|
|
191
|
$string .= $self->_read_length($+[0]); |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
} |
524
|
|
|
|
|
|
|
} |
525
|
26
|
|
|
|
|
79
|
return $self->new_token(STRING3D, $string); |
526
|
|
|
|
|
|
|
} else { |
527
|
|
|
|
|
|
|
### #x22 scharacter* #x22 |
528
|
736
|
|
|
|
|
1666
|
my $string = ''; |
529
|
736
|
|
|
|
|
1188
|
while (1) { |
530
|
1623
|
100
|
|
|
|
7992
|
if (substr($self->{buffer}, 0, 1) eq '\\') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
531
|
96
|
|
|
|
|
205
|
my $c = $self->_peek_char; |
532
|
96
|
|
|
|
|
258
|
$self->_get_char_safe('\\'); |
533
|
96
|
|
|
|
|
196
|
my $esc = $self->_get_char; |
534
|
96
|
100
|
|
|
|
291
|
if ($esc eq '\\'){ $string .= "\\" } |
|
45
|
100
|
|
|
|
87
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
535
|
44
|
|
|
|
|
81
|
elsif ($esc eq '"'){ $string .= '"' } |
536
|
0
|
|
|
|
|
0
|
elsif ($esc eq "'"){ $string .= "'" } |
537
|
0
|
|
|
|
|
0
|
elsif ($esc eq 'r'){ $string .= "\r" } |
538
|
0
|
|
|
|
|
0
|
elsif ($esc eq 't'){ $string .= "\t" } |
539
|
1
|
|
|
|
|
3
|
elsif ($esc eq 'n'){ $string .= "\n" } |
540
|
0
|
|
|
|
|
0
|
elsif ($esc eq 'b'){ $string .= "\b" } |
541
|
0
|
|
|
|
|
0
|
elsif ($esc eq 'f'){ $string .= "\f" } |
542
|
0
|
|
|
|
|
0
|
elsif ($esc eq '>'){ $string .= ">" } |
543
|
|
|
|
|
|
|
elsif ($esc eq 'U'){ |
544
|
3
|
|
|
|
|
9
|
my $codepoint = $self->_read_length(8); |
545
|
3
|
100
|
|
|
|
18
|
unless ($codepoint =~ /^[0-9A-Fa-f]+$/) { |
546
|
2
|
|
|
|
|
10
|
$self->_throw_error("Bad unicode escape codepoint '$codepoint'"); |
547
|
|
|
|
|
|
|
} |
548
|
1
|
|
|
|
|
7
|
$string .= chr(hex($codepoint)); |
549
|
|
|
|
|
|
|
} |
550
|
|
|
|
|
|
|
elsif ($esc eq 'u'){ |
551
|
2
|
|
|
|
|
8
|
my $codepoint = $self->_read_length(4); |
552
|
2
|
100
|
|
|
|
11
|
unless ($codepoint =~ /^[0-9A-Fa-f]+$/) { |
553
|
1
|
|
|
|
|
5
|
$self->_throw_error("Bad unicode escape codepoint '$codepoint'"); |
554
|
|
|
|
|
|
|
} |
555
|
1
|
|
|
|
|
7
|
$string .= chr(hex($codepoint)); |
556
|
|
|
|
|
|
|
} |
557
|
|
|
|
|
|
|
else { |
558
|
1
|
|
|
|
|
6
|
$self->_throw_error("Unrecognized string escape '$esc'"); |
559
|
|
|
|
|
|
|
} |
560
|
|
|
|
|
|
|
} elsif ($self->{buffer} =~ /^[^"\\]+/) { |
561
|
795
|
|
|
|
|
3150
|
$string .= $self->_read_length($+[0]); |
562
|
|
|
|
|
|
|
} elsif (substr($self->{buffer}, 0, 1) eq '"') { |
563
|
730
|
|
|
|
|
1570
|
last; |
564
|
|
|
|
|
|
|
} else { |
565
|
2
|
|
|
|
|
10
|
$self->_throw_error("Got '$c' while expecting string character"); |
566
|
|
|
|
|
|
|
} |
567
|
|
|
|
|
|
|
} |
568
|
730
|
|
|
|
|
2305
|
$self->_get_char_safe(q["]); |
569
|
730
|
|
|
|
|
1839
|
return $self->new_token(STRING1D, $string); |
570
|
|
|
|
|
|
|
} |
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
sub _get_single_literal { |
574
|
32
|
|
|
32
|
|
79
|
my $self = shift; |
575
|
32
|
|
|
|
|
84
|
my $c = $self->_peek_char(); |
576
|
32
|
|
|
|
|
104
|
$self->_get_char_safe("'"); |
577
|
32
|
100
|
|
|
|
120
|
if (substr($self->{buffer}, 0, 2) eq q['']) { |
578
|
|
|
|
|
|
|
# #x22 #x22 #x22 lcharacter* #x22 #x22 #x22 |
579
|
10
|
|
|
|
|
37
|
$self->_read_word(q['']); |
580
|
|
|
|
|
|
|
|
581
|
10
|
|
|
|
|
24
|
my $quote_count = 0; |
582
|
10
|
|
|
|
|
25
|
my $string = ''; |
583
|
10
|
|
|
|
|
21
|
while (1) { |
584
|
47
|
100
|
|
|
|
127
|
if (length($self->{buffer}) == 0) { |
585
|
4
|
|
|
|
|
15
|
$self->fill_buffer; |
586
|
4
|
100
|
|
|
|
16
|
if (length($self->{buffer}) == 0) { |
587
|
1
|
|
|
|
|
4
|
$self->_throw_error("Found EOF in string literal"); |
588
|
|
|
|
|
|
|
} |
589
|
|
|
|
|
|
|
} |
590
|
46
|
100
|
|
|
|
121
|
if (substr($self->{buffer}, 0, 1) eq "'") { |
591
|
31
|
|
|
|
|
69
|
my $c = $self->_get_char; |
592
|
31
|
|
|
|
|
51
|
$quote_count++; |
593
|
31
|
100
|
|
|
|
84
|
if ($quote_count == 3) { |
594
|
9
|
|
|
|
|
20
|
last; |
595
|
|
|
|
|
|
|
} |
596
|
|
|
|
|
|
|
} else { |
597
|
15
|
100
|
|
|
|
46
|
if ($quote_count) { |
598
|
3
|
|
|
|
|
15
|
$string .= "'" foreach (1..$quote_count); |
599
|
3
|
|
|
|
|
8
|
$quote_count = 0; |
600
|
|
|
|
|
|
|
} |
601
|
15
|
50
|
|
|
|
51
|
if (substr($self->{buffer}, 0, 1) eq '\\') { |
602
|
0
|
|
|
|
|
0
|
my $c = $self->_get_char; |
603
|
|
|
|
|
|
|
# $self->_get_char_safe('\\'); |
604
|
0
|
|
|
|
|
0
|
my $esc = $self->_get_char_fill_buffer; |
605
|
0
|
0
|
|
|
|
0
|
if ($esc eq '\\'){ $string .= "\\" } |
|
0
|
0
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
606
|
0
|
|
|
|
|
0
|
elsif ($esc eq '"'){ $string .= '"' } |
607
|
0
|
|
|
|
|
0
|
elsif ($esc eq "'"){ $string .= "'" } |
608
|
0
|
|
|
|
|
0
|
elsif ($esc eq 'r'){ $string .= "\r" } |
609
|
0
|
|
|
|
|
0
|
elsif ($esc eq 't'){ $string .= "\t" } |
610
|
0
|
|
|
|
|
0
|
elsif ($esc eq 'n'){ $string .= "\n" } |
611
|
0
|
|
|
|
|
0
|
elsif ($esc eq 'b'){ $string .= "\b" } |
612
|
0
|
|
|
|
|
0
|
elsif ($esc eq 'f'){ $string .= "\f" } |
613
|
0
|
|
|
|
|
0
|
elsif ($esc eq '>'){ $string .= ">" } |
614
|
|
|
|
|
|
|
elsif ($esc eq 'U'){ |
615
|
0
|
|
|
|
|
0
|
my $codepoint = $self->_read_length(8); |
616
|
0
|
0
|
|
|
|
0
|
unless ($codepoint =~ /^[0-9A-Fa-f]+$/) { |
617
|
0
|
|
|
|
|
0
|
$self->_throw_error("Bad unicode escape codepoint '$codepoint'"); |
618
|
|
|
|
|
|
|
} |
619
|
0
|
|
|
|
|
0
|
$string .= chr(hex($codepoint)); |
620
|
|
|
|
|
|
|
} |
621
|
|
|
|
|
|
|
elsif ($esc eq 'u'){ |
622
|
0
|
|
|
|
|
0
|
my $codepoint = $self->_read_length(4); |
623
|
0
|
0
|
|
|
|
0
|
unless ($codepoint =~ /^[0-9A-Fa-f]+$/) { |
624
|
0
|
|
|
|
|
0
|
$self->_throw_error("Bad unicode escape codepoint '$codepoint'"); |
625
|
|
|
|
|
|
|
} |
626
|
0
|
|
|
|
|
0
|
$string .= chr(hex($codepoint)); |
627
|
|
|
|
|
|
|
} |
628
|
|
|
|
|
|
|
else { |
629
|
0
|
|
|
|
|
0
|
$self->_throw_error("Unrecognized string escape '$esc'"); |
630
|
|
|
|
|
|
|
} |
631
|
|
|
|
|
|
|
} else { |
632
|
15
|
|
|
|
|
58
|
$self->{buffer} =~ /^[^'\\]+/; |
633
|
15
|
|
|
|
|
57
|
$string .= $self->_read_length($+[0]); |
634
|
|
|
|
|
|
|
} |
635
|
|
|
|
|
|
|
} |
636
|
|
|
|
|
|
|
} |
637
|
9
|
|
|
|
|
28
|
return $self->new_token(STRING3S, $string); |
638
|
|
|
|
|
|
|
} else { |
639
|
|
|
|
|
|
|
### #x22 scharacter* #x22 |
640
|
22
|
|
|
|
|
52
|
my $string = ''; |
641
|
22
|
|
|
|
|
46
|
while (1) { |
642
|
44
|
100
|
|
|
|
262
|
if (substr($self->{buffer}, 0, 1) eq '\\') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
643
|
8
|
|
|
|
|
19
|
my $c = $self->_peek_char; |
644
|
8
|
|
|
|
|
21
|
$self->_get_char_safe('\\'); |
645
|
8
|
|
|
|
|
18
|
my $esc = $self->_get_char; |
646
|
8
|
100
|
|
|
|
62
|
if ($esc eq '\\'){ $string .= "\\" } |
|
1
|
50
|
|
|
|
3
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
647
|
0
|
|
|
|
|
0
|
elsif ($esc eq '"'){ $string .= '"' } |
648
|
0
|
|
|
|
|
0
|
elsif ($esc eq "'"){ $string .= "'" } |
649
|
1
|
|
|
|
|
3
|
elsif ($esc eq 'r'){ $string .= "\r" } |
650
|
1
|
|
|
|
|
3
|
elsif ($esc eq 't'){ $string .= "\t" } |
651
|
1
|
|
|
|
|
3
|
elsif ($esc eq 'n'){ $string .= "\n" } |
652
|
1
|
|
|
|
|
4
|
elsif ($esc eq 'b'){ $string .= "\b" } |
653
|
1
|
|
|
|
|
3
|
elsif ($esc eq 'f'){ $string .= "\f" } |
654
|
0
|
|
|
|
|
0
|
elsif ($esc eq '>'){ $string .= ">" } |
655
|
|
|
|
|
|
|
elsif ($esc eq 'U'){ |
656
|
1
|
|
|
|
|
3
|
my $codepoint = $self->_read_length(8); |
657
|
1
|
50
|
|
|
|
6
|
unless ($codepoint =~ /^[0-9A-Fa-f]+$/) { |
658
|
0
|
|
|
|
|
0
|
$self->_throw_error("Bad unicode escape codepoint '$codepoint'"); |
659
|
|
|
|
|
|
|
} |
660
|
1
|
|
|
|
|
5
|
$string .= chr(hex($codepoint)); |
661
|
|
|
|
|
|
|
} |
662
|
|
|
|
|
|
|
elsif ($esc eq 'u'){ |
663
|
1
|
|
|
|
|
5
|
my $codepoint = $self->_read_length(4); |
664
|
1
|
50
|
|
|
|
6
|
unless ($codepoint =~ /^[0-9A-Fa-f]+$/) { |
665
|
0
|
|
|
|
|
0
|
$self->_throw_error("Bad unicode escape codepoint '$codepoint'"); |
666
|
|
|
|
|
|
|
} |
667
|
1
|
|
|
|
|
5
|
$string .= chr(hex($codepoint)); |
668
|
|
|
|
|
|
|
} |
669
|
|
|
|
|
|
|
else { |
670
|
0
|
|
|
|
|
0
|
$self->_throw_error("Unrecognized string escape '$esc'"); |
671
|
|
|
|
|
|
|
} |
672
|
|
|
|
|
|
|
} elsif ($self->{buffer} =~ /^[^'\\]+/) { |
673
|
14
|
|
|
|
|
66
|
$string .= $self->_read_length($+[0]); |
674
|
|
|
|
|
|
|
} elsif (substr($self->{buffer}, 0, 1) eq "'") { |
675
|
20
|
|
|
|
|
41
|
last; |
676
|
|
|
|
|
|
|
} else { |
677
|
2
|
|
|
|
|
10
|
$self->_throw_error("Got '$c' while expecting string character"); |
678
|
|
|
|
|
|
|
} |
679
|
|
|
|
|
|
|
} |
680
|
20
|
|
|
|
|
66
|
$self->_get_char_safe(q[']); |
681
|
20
|
|
|
|
|
60
|
return $self->new_token(STRING1S, $string); |
682
|
|
|
|
|
|
|
} |
683
|
|
|
|
|
|
|
} |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
sub _get_keyword { |
686
|
289
|
|
|
289
|
|
547
|
my $self = shift; |
687
|
289
|
|
|
|
|
868
|
$self->_get_char_safe('@'); |
688
|
289
|
100
|
|
|
|
1393
|
if ($self->{buffer} =~ /^base/) { |
|
|
100
|
|
|
|
|
|
689
|
7
|
|
|
|
|
27
|
$self->_read_word('base'); |
690
|
7
|
|
|
|
|
26
|
return $self->new_token(BASE); |
691
|
|
|
|
|
|
|
} elsif ($self->{buffer} =~ /^prefix/) { |
692
|
257
|
|
|
|
|
905
|
$self->_read_word('prefix'); |
693
|
257
|
|
|
|
|
849
|
return $self->new_token(PREFIX); |
694
|
|
|
|
|
|
|
} else { |
695
|
25
|
100
|
|
|
|
139
|
if ($self->{buffer} =~ /^[a-zA-Z]+(-[a-zA-Z0-9]+)*\b/) { |
696
|
24
|
|
|
|
|
98
|
my $lang = $self->_read_length($+[0]); |
697
|
24
|
|
|
|
|
83
|
return $self->new_token(LANG, $lang); |
698
|
|
|
|
|
|
|
} else { |
699
|
1
|
|
|
|
|
5
|
$self->_throw_error("Expected keyword or language tag"); |
700
|
|
|
|
|
|
|
} |
701
|
|
|
|
|
|
|
} |
702
|
|
|
|
|
|
|
} |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
sub _throw_error { |
705
|
37
|
|
|
37
|
|
78
|
my $self = shift; |
706
|
37
|
|
|
|
|
68
|
my $error = shift; |
707
|
37
|
|
|
|
|
1068
|
my $line = $self->line; |
708
|
37
|
|
|
|
|
925
|
my $col = $self->column; |
709
|
|
|
|
|
|
|
# Carp::cluck "$line:$col: $error: " . Dumper($self->{buffer}); |
710
|
37
|
|
|
|
|
452
|
RDF::Trine::Error::ParserError::Positioned->throw( |
711
|
|
|
|
|
|
|
-text => "$error at $line:$col", |
712
|
|
|
|
|
|
|
-value => [$line, $col], |
713
|
|
|
|
|
|
|
); |
714
|
|
|
|
|
|
|
} |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
1; |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
__END__ |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
=end private |
723
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
=back |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
=head1 BUGS |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
Please report any bugs or feature requests to through the GitHub web interface |
729
|
|
|
|
|
|
|
at L<https://github.com/kasei/perlrdf/issues>. |
730
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
=head1 AUTHOR |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
Gregory Todd Williams C<< <gwilliams@cpan.org> >> |
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
=head1 COPYRIGHT |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
Copyright (c) 2006-2012 Gregory Todd Williams. This |
738
|
|
|
|
|
|
|
program is free software; you can redistribute it and/or modify it under |
739
|
|
|
|
|
|
|
the same terms as Perl itself. |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
=cut |