line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
6
|
|
|
6
|
|
3043
|
use v5.14; |
|
6
|
|
|
|
|
20
|
|
2
|
6
|
|
|
6
|
|
33
|
use warnings; |
|
6
|
|
|
|
|
12
|
|
|
6
|
|
|
|
|
219
|
|
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
=head1 NAME |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
AtteanX::Parser::NTuples - Shared functionality for N-Triples and N-Quads parsers |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
=head1 VERSION |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
This document describes AtteanX::Parser::NTuples version 0.032 |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 SYNOPSIS |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
use Attean; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 DESCRIPTION |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
This module provides a base class for RDF formats N-Triples and N-Quads. |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 METHODS |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=over 4 |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=cut |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
use utf8; |
27
|
6
|
|
|
6
|
|
35
|
use Moo; |
|
6
|
|
|
|
|
12
|
|
|
6
|
|
|
|
|
30
|
|
28
|
6
|
|
|
6
|
|
109
|
use Attean; |
|
6
|
|
|
|
|
11
|
|
|
6
|
|
|
|
|
28
|
|
29
|
6
|
|
|
6
|
|
1682
|
use Carp qw(carp); |
|
6
|
|
|
|
|
12
|
|
|
6
|
|
|
|
|
33
|
|
30
|
6
|
|
|
6
|
|
32
|
use Encode qw(decode); |
|
6
|
|
|
|
|
11
|
|
|
6
|
|
|
|
|
345
|
|
31
|
6
|
|
|
6
|
|
40
|
use namespace::clean; |
|
6
|
|
|
|
|
12
|
|
|
6
|
|
|
|
|
302
|
|
32
|
6
|
|
|
6
|
|
40
|
|
|
6
|
|
|
|
|
21
|
|
|
6
|
|
|
|
|
54
|
|
33
|
|
|
|
|
|
|
=item C<< parse_term_from_bytes( $bytes ) >> |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
Parses the given C<< $bytes >> and returns a corresponding L<Attean::API::Term> object. |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=cut |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
my $self = shift; |
40
|
|
|
|
|
|
|
unless (ref($self)) { |
41
|
1
|
|
|
1
|
1
|
60
|
$self = $self->new(); |
42
|
1
|
50
|
|
|
|
4
|
} |
43
|
0
|
|
|
|
|
0
|
my $string = shift; |
44
|
|
|
|
|
|
|
my $n = $self->_eat_node( 0, $string ); |
45
|
1
|
|
|
|
|
2
|
return $n; |
46
|
1
|
|
|
|
|
6
|
} |
47
|
1
|
|
|
|
|
5
|
|
48
|
|
|
|
|
|
|
=item C<< parse_iter_from_bytes( $data ) >> |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
Returns an iterator of L<Attean::API::Binding> objects that result from parsing |
51
|
|
|
|
|
|
|
the data read from the UTF-8 encoded byte string C<< $data >>. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=cut |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
my $self = shift; |
56
|
|
|
|
|
|
|
my $data = shift; |
57
|
|
|
|
|
|
|
|
58
|
13
|
|
|
13
|
1
|
183
|
$data = Encode::encode("utf-8", $data); |
59
|
13
|
|
|
|
|
27
|
open(my $fh, '<:encoding(UTF-8)', \$data); |
60
|
|
|
|
|
|
|
return $self->parse_iter_from_io($fh); |
61
|
13
|
|
|
|
|
109
|
} |
62
|
3
|
|
|
3
|
|
18
|
|
|
3
|
|
|
3
|
|
9
|
|
|
3
|
|
|
|
|
28
|
|
|
3
|
|
|
|
|
2350
|
|
|
3
|
|
|
|
|
10
|
|
|
3
|
|
|
|
|
13
|
|
|
13
|
|
|
|
|
1282
|
|
63
|
13
|
|
|
|
|
3210
|
=item C<< parse_iter_from_io( $fh ) >> |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
Returns an iterator of L<Attean::API::Binding> objects that result from parsing |
66
|
|
|
|
|
|
|
the data read from the L<IO::Handle> object C<< $fh >>. |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=cut |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
my $self = shift; |
71
|
|
|
|
|
|
|
my $fh = shift; |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
my $lineno = 0; |
74
|
15
|
|
|
15
|
1
|
31
|
my $line; |
75
|
15
|
|
|
|
|
24
|
my $gen = sub { |
76
|
|
|
|
|
|
|
while (defined($line = <$fh>)) { |
77
|
15
|
|
|
|
|
26
|
($line, my @extra) = split(/\r\n|\r|\n/, $line, 2); |
78
|
15
|
|
|
|
|
25
|
$lineno++; |
79
|
|
|
|
|
|
|
|
80
|
51
|
|
|
51
|
|
454
|
next unless (defined($line) and length($line)); |
81
|
39
|
|
|
|
|
414
|
next unless ($line =~ /\S/); |
82
|
39
|
|
|
|
|
69
|
chomp($line); |
83
|
|
|
|
|
|
|
$line =~ s/^\s*//; |
84
|
39
|
100
|
66
|
|
|
190
|
$line =~ s/\s*$//; |
85
|
38
|
100
|
|
|
|
144
|
next if ($line =~ /^#/); |
86
|
37
|
|
|
|
|
84
|
|
87
|
37
|
|
|
|
|
175
|
my @nodes = (); |
88
|
37
|
|
|
|
|
322
|
while (my $n = $self->_eat_node( $lineno, $line )) { |
89
|
37
|
100
|
|
|
|
120
|
push(@nodes, $n); |
90
|
|
|
|
|
|
|
$line =~ s/^\s*//; |
91
|
36
|
|
|
|
|
52
|
} |
92
|
36
|
|
|
|
|
122
|
$line =~ s/^\s//g; |
93
|
122
|
|
|
|
|
1621
|
unless ($line eq '.') { |
94
|
122
|
|
|
|
|
490
|
die "Missing expected '.' at line $lineno"; |
95
|
|
|
|
|
|
|
} |
96
|
36
|
|
|
|
|
83
|
|
97
|
36
|
50
|
|
|
|
87
|
my $binding = $self->_binding( \@nodes, $lineno ); |
98
|
0
|
|
|
|
|
0
|
if (@extra and $extra[0] ne '') { |
99
|
|
|
|
|
|
|
$line = shift(@extra); |
100
|
|
|
|
|
|
|
goto LINE; |
101
|
36
|
|
|
|
|
134
|
} |
102
|
36
|
50
|
33
|
|
|
919
|
return $binding; |
103
|
0
|
|
|
|
|
0
|
} |
104
|
0
|
|
|
|
|
0
|
return; |
105
|
|
|
|
|
|
|
}; |
106
|
36
|
|
|
|
|
147
|
return Attean::CodeIterator->new( |
107
|
|
|
|
|
|
|
generator => $gen, |
108
|
15
|
|
|
|
|
35
|
item_type => $self->handled_type->role, |
109
|
15
|
|
|
|
|
73
|
); |
110
|
15
|
|
|
|
|
75
|
} |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
my $self = shift; |
113
|
|
|
|
|
|
|
my $lineno = shift; |
114
|
|
|
|
|
|
|
$_[0] =~ s/^\s*//; |
115
|
|
|
|
|
|
|
return unless length($_[0]); |
116
|
|
|
|
|
|
|
my $char = substr($_[0], 0, 1); |
117
|
159
|
|
|
159
|
|
232
|
return if ($char eq '.'); |
118
|
159
|
|
|
|
|
190
|
|
119
|
159
|
|
|
|
|
400
|
if ($char eq '<') { |
120
|
159
|
50
|
|
|
|
349
|
my ($uri) = $_[0] =~ m/^<([^>]*)>/; |
121
|
159
|
|
|
|
|
256
|
substr($_[0], 0, length($uri)+2) = ''; |
122
|
159
|
100
|
|
|
|
331
|
state %cache; |
123
|
|
|
|
|
|
|
if (my $i = $cache{$uri}) { |
124
|
123
|
100
|
|
|
|
274
|
return $i; |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
125
|
85
|
|
|
|
|
331
|
} else { |
126
|
85
|
|
|
|
|
248
|
if (rand() < 0.02) { |
127
|
85
|
|
|
|
|
108
|
# clear out the cache roughly every 50 IRIs |
128
|
85
|
100
|
|
|
|
206
|
%cache = (); |
129
|
53
|
|
|
|
|
136
|
} |
130
|
|
|
|
|
|
|
my $iri = $self->new_iri( value => _unescape($uri, $lineno) ); |
131
|
32
|
50
|
|
|
|
149
|
$cache{$uri} = $iri; |
132
|
|
|
|
|
|
|
return $iri; |
133
|
0
|
|
|
|
|
0
|
} |
134
|
|
|
|
|
|
|
} elsif ($char eq '_') { |
135
|
32
|
|
|
|
|
72
|
my ($name) = $_[0] =~ m/^_:([A-Za-z][A-Za-z0-9]*)/; |
136
|
32
|
|
|
|
|
5140
|
substr($_[0], 0, length($name)+2) = ''; |
137
|
32
|
|
|
|
|
110
|
return Attean::Blank->new( $name ); |
138
|
|
|
|
|
|
|
} elsif ($char eq '"') { |
139
|
|
|
|
|
|
|
substr($_[0], 0, 1) = ''; |
140
|
21
|
|
|
|
|
87
|
my $value = decode('utf8', ''); |
141
|
21
|
|
|
|
|
70
|
while (length($_[0]) and substr($_[0], 0, 1) ne '"') { |
142
|
21
|
|
|
|
|
415
|
while ($_[0] =~ m/^([^"\\]+)/) { |
143
|
|
|
|
|
|
|
$value .= $1; |
144
|
17
|
|
|
|
|
43
|
substr($_[0],0,length($1)) = ''; |
145
|
17
|
|
|
|
|
106
|
} |
146
|
17
|
|
66
|
|
|
446
|
if (substr($_[0],0,1) eq '\\') { |
147
|
21
|
|
|
|
|
75
|
while ($_[0] =~ m/^\\(.)/) { |
148
|
20
|
|
|
|
|
59
|
if ($1 eq 't') { |
149
|
20
|
|
|
|
|
81
|
$value .= "\t"; |
150
|
|
|
|
|
|
|
substr($_[0],0,2) = ''; |
151
|
21
|
100
|
|
|
|
119
|
} elsif ($1 eq 'r') { |
152
|
5
|
|
|
|
|
23
|
$value .= "\r"; |
153
|
6
|
100
|
|
|
|
39
|
substr($_[0],0,2) = ''; |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
154
|
1
|
|
|
|
|
3
|
} elsif ($1 eq 'n') { |
155
|
1
|
|
|
|
|
6
|
$value .= "\n"; |
156
|
|
|
|
|
|
|
substr($_[0],0,2) = ''; |
157
|
0
|
|
|
|
|
0
|
} elsif ($1 eq '"') { |
158
|
0
|
|
|
|
|
0
|
$value .= '"'; |
159
|
|
|
|
|
|
|
substr($_[0],0,2) = ''; |
160
|
1
|
|
|
|
|
2
|
} elsif ($1 eq '\\') { |
161
|
1
|
|
|
|
|
6
|
$value .= "\\"; |
162
|
|
|
|
|
|
|
substr($_[0],0,2) = ''; |
163
|
1
|
|
|
|
|
3
|
} elsif ($1 eq 'u') { |
164
|
1
|
|
|
|
|
5
|
$_[0] =~ m/^\\u([0-9A-Fa-f]{4})/ or die qq[Bad N-Triples \\u escape at line $lineno, near "$_[0]"]; |
165
|
|
|
|
|
|
|
$value .= chr(oct('0x' . $1)); |
166
|
1
|
|
|
|
|
2
|
substr($_[0],0,6) = ''; |
167
|
1
|
|
|
|
|
7
|
} elsif ($1 eq 'U') { |
168
|
|
|
|
|
|
|
$_[0] =~ m/^\\U([0-9A-Fa-f]{8})/ or die qq[Bad N-Triples \\U escape at line $lineno, near "$_[0]"]; |
169
|
1
|
50
|
|
|
|
6
|
$value .= chr(oct('0x' . $1)); |
170
|
1
|
|
|
|
|
8
|
substr($_[0],0,10) = ''; |
171
|
1
|
|
|
|
|
6
|
} else { |
172
|
|
|
|
|
|
|
die qq[Not valid N-Triples escape character '\\$1' at line $lineno, near "$_[0]"]; |
173
|
1
|
50
|
|
|
|
9
|
} |
174
|
1
|
|
|
|
|
8
|
} |
175
|
1
|
|
|
|
|
8
|
} |
176
|
|
|
|
|
|
|
} |
177
|
0
|
|
|
|
|
0
|
if (substr($_[0],0,1) eq '"') { |
178
|
|
|
|
|
|
|
substr($_[0],0,1) = ''; |
179
|
|
|
|
|
|
|
} else { |
180
|
|
|
|
|
|
|
die qq[Ending double quote not found at line $lineno]; |
181
|
|
|
|
|
|
|
} |
182
|
17
|
50
|
|
|
|
52
|
|
183
|
17
|
|
|
|
|
38
|
if ($_[0] =~ m/^@([a-z]+(-[a-zA-Z0-9]+)*)/) { |
184
|
|
|
|
|
|
|
my $lang = $1; |
185
|
0
|
|
|
|
|
0
|
substr($_[0],0,1+length($lang)) = ''; |
186
|
|
|
|
|
|
|
return Attean::Literal->new( value => $value, language => $lang ); |
187
|
|
|
|
|
|
|
} elsif (substr($_[0],0,3) eq '^^<') { |
188
|
17
|
100
|
|
|
|
87
|
substr($_[0],0,3) = ''; |
|
|
100
|
|
|
|
|
|
189
|
2
|
|
|
|
|
7
|
my ($uri) = $_[0] =~ m/^([^>]*)>/; |
190
|
2
|
|
|
|
|
7
|
substr($_[0], 0, length($uri)+1) = ''; |
191
|
2
|
|
|
|
|
48
|
my $dt = $self->new_iri(value => $uri); |
192
|
|
|
|
|
|
|
return Attean::Literal->new( value => $value, datatype => $dt); |
193
|
5
|
|
|
|
|
12
|
} else { |
194
|
5
|
|
|
|
|
21
|
return Attean::Literal->new($value); |
195
|
5
|
|
|
|
|
17
|
} |
196
|
5
|
|
|
|
|
17
|
} else { |
197
|
5
|
|
|
|
|
1151
|
Carp::cluck; |
198
|
|
|
|
|
|
|
die qq[Not valid N-Triples node start character '$char' at line $lineno, near "$_[0]"]; |
199
|
10
|
|
|
|
|
190
|
} |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
0
|
|
|
|
|
0
|
my $string = shift; |
203
|
0
|
|
|
|
|
0
|
my $lineno = shift; |
204
|
|
|
|
|
|
|
my $value = ''; |
205
|
|
|
|
|
|
|
while (length($string)) { |
206
|
|
|
|
|
|
|
while ($string =~ m/^([^\\]+)/) { |
207
|
|
|
|
|
|
|
$value .= $1; |
208
|
32
|
|
|
32
|
|
59
|
substr($string,0,length($1)) = ''; |
209
|
32
|
|
|
|
|
40
|
} |
210
|
32
|
|
|
|
|
50
|
if (length($string)) { |
211
|
32
|
|
|
|
|
75
|
if ($string eq '\\') { |
212
|
32
|
|
|
|
|
105
|
die qq[Backslash in N-Triples node without escaped character at line $lineno]; |
213
|
32
|
|
|
|
|
76
|
} |
214
|
32
|
|
|
|
|
114
|
if ($string =~ m/^\\([tbnrf"'uU])/) { |
215
|
|
|
|
|
|
|
while ($string =~ m/^\\([tbnrf"'uU])/) { |
216
|
32
|
100
|
|
|
|
103
|
if ($1 eq 't') { |
217
|
1
|
50
|
|
|
|
5
|
$value .= "\t"; |
218
|
0
|
|
|
|
|
0
|
substr($string,0,2) = ''; |
219
|
|
|
|
|
|
|
} elsif ($1 eq 'b') { |
220
|
1
|
50
|
|
|
|
8
|
$value .= "\b"; |
221
|
1
|
|
|
|
|
6
|
substr($string,0,2) = ''; |
222
|
1
|
50
|
|
|
|
13
|
} elsif ($1 eq 'n') { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
223
|
0
|
|
|
|
|
0
|
$value .= "\n"; |
224
|
0
|
|
|
|
|
0
|
substr($string,0,2) = ''; |
225
|
|
|
|
|
|
|
} elsif ($1 eq 'r') { |
226
|
0
|
|
|
|
|
0
|
$value .= "\r"; |
227
|
0
|
|
|
|
|
0
|
substr($string,0,2) = ''; |
228
|
|
|
|
|
|
|
} elsif ($1 eq 'f') { |
229
|
0
|
|
|
|
|
0
|
$value .= "\f"; |
230
|
0
|
|
|
|
|
0
|
substr($string,0,2) = ''; |
231
|
|
|
|
|
|
|
} elsif ($1 eq '"') { |
232
|
0
|
|
|
|
|
0
|
$value .= '"'; |
233
|
0
|
|
|
|
|
0
|
substr($string,0,2) = ''; |
234
|
|
|
|
|
|
|
} elsif ($1 eq '\\') { |
235
|
0
|
|
|
|
|
0
|
$value .= "\\"; |
236
|
0
|
|
|
|
|
0
|
substr($string,0,2) = ''; |
237
|
|
|
|
|
|
|
} elsif ($1 eq 'u') { |
238
|
0
|
|
|
|
|
0
|
$string =~ m/^\\u([0-9A-F]{4})/ or die qq[Bad N-Triples \\u escape at line $lineno, near "$string"]; |
239
|
0
|
|
|
|
|
0
|
$value .= chr(oct('0x' . $1)); |
240
|
|
|
|
|
|
|
substr($string,0,6) = ''; |
241
|
0
|
|
|
|
|
0
|
} elsif ($1 eq 'U') { |
242
|
0
|
|
|
|
|
0
|
$string =~ m/^\\U([0-9A-F]{8})/ or die qq[Bad N-Triples \\U escape at line $lineno, near "$string"]; |
243
|
|
|
|
|
|
|
$value .= chr(oct('0x' . $1)); |
244
|
1
|
50
|
|
|
|
8
|
substr($string,0,10) = ''; |
245
|
1
|
|
|
|
|
7
|
} |
246
|
1
|
|
|
|
|
5
|
} |
247
|
|
|
|
|
|
|
} else { |
248
|
0
|
0
|
|
|
|
0
|
my $esc = substr($string, 0, 2); |
249
|
0
|
|
|
|
|
0
|
die qq[Not a valid N-Triples escape sequence '$esc' at line $lineno, near "$string"]; |
250
|
0
|
|
|
|
|
0
|
} |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
return $value; |
254
|
0
|
|
|
|
|
0
|
} |
255
|
0
|
|
|
|
|
0
|
} |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
1; |
258
|
|
|
|
|
|
|
|
259
|
32
|
|
|
|
|
137
|
|
260
|
|
|
|
|
|
|
=back |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
=head1 BUGS |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
Please report any bugs or feature requests to through the GitHub web interface |
265
|
|
|
|
|
|
|
at L<https://github.com/kasei/perlrdf/issues>. |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=head1 AUTHOR |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
Gregory Todd Williams C<< <gwilliams@cpan.org> >> |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
=head1 COPYRIGHT |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
Copyright (c) 2014--2022 Gregory Todd Williams. This |
274
|
|
|
|
|
|
|
program is free software; you can redistribute it and/or modify it under |
275
|
|
|
|
|
|
|
the same terms as Perl itself. |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
=cut |