| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # AtteanX::Parser::Turtle::Lexer | 
| 2 |  |  |  |  |  |  | # ----------------------------------------------------------------------------- | 
| 3 |  |  |  |  |  |  |  | 
| 4 |  |  |  |  |  |  | =head1 NAME | 
| 5 |  |  |  |  |  |  |  | 
| 6 |  |  |  |  |  |  | AtteanX::Parser::Turtle::Lexer - Tokenizer for parsing Turtle, TriG, and N-Triples | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  | =head1 VERSION | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | This document describes AtteanX::Parser::Turtle::Lexer version 0.032 | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | use AtteanX::Parser::Turtle::Lexer; | 
| 15 |  |  |  |  |  |  | my $l = AtteanX::Parser::Turtle::Lexer->new( file => $fh ); | 
| 16 |  |  |  |  |  |  | while (my $t = $l->get_token) { | 
| 17 |  |  |  |  |  |  | ... | 
| 18 |  |  |  |  |  |  | } | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | =head1 METHODS | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | =over 4 | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | =cut | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | use AtteanX::Parser::Turtle::Constants; | 
| 27 | 13 |  |  | 13 |  | 96 | use v5.14; | 
|  | 13 |  |  |  |  | 30 |  | 
|  | 13 |  |  |  |  | 1513 |  | 
| 28 | 13 |  |  | 13 |  | 141 | use strict; | 
|  | 13 |  |  |  |  | 41 |  | 
| 29 | 13 |  |  | 13 |  | 81 | use warnings; | 
|  | 13 |  |  |  |  | 38 |  | 
|  | 13 |  |  |  |  | 299 |  | 
| 30 | 13 |  |  | 13 |  | 75 | use Data::Dumper; | 
|  | 13 |  |  |  |  | 38 |  | 
|  | 13 |  |  |  |  | 385 |  | 
| 31 | 13 |  |  | 13 |  | 92 | use Moo; | 
|  | 13 |  |  |  |  | 36 |  | 
|  | 13 |  |  |  |  | 591 |  | 
| 32 | 13 |  |  | 13 |  | 82 | use Types::Standard qw(FileHandle Ref Str Int ArrayRef HashRef ConsumerOf InstanceOf); | 
|  | 13 |  |  |  |  | 30 |  | 
|  | 13 |  |  |  |  | 74 |  | 
| 33 | 13 |  |  | 13 |  | 5006 | use namespace::clean; | 
|  | 13 |  |  |  |  | 38 |  | 
|  | 13 |  |  |  |  | 98 |  | 
| 34 | 13 |  |  | 13 |  | 17340 |  | 
|  | 13 |  |  |  |  | 29 |  | 
|  | 13 |  |  |  |  | 117 |  | 
| 35 |  |  |  |  |  |  | my $r_nameChar_extra		= qr'[-0-9\x{B7}\x{0300}-\x{036F}\x{203F}-\x{2040}]'o; | 
| 36 |  |  |  |  |  |  | my $r_nameStartChar_minus_underscore	= qr'[A-Za-z\x{00C0}-\x{00D6}\x{00D8}-\x{00F6}\x{00F8}-\x{02FF}\x{0370}-\x{037D}\x{037F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{00010000}-\x{000EFFFF}]'o; | 
| 37 |  |  |  |  |  |  | my $r_nameStartChar			= qr/[A-Za-z_\x{00C0}-\x{00D6}\x{00D8}-\x{00F6}\x{00F8}-\x{02FF}\x{0370}-\x{037D}\x{037F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]/o; | 
| 38 |  |  |  |  |  |  | my $r_nameChar				= qr/${r_nameStartChar}|[-0-9\x{b7}\x{0300}-\x{036f}\x{203F}-\x{2040}]/o; | 
| 39 |  |  |  |  |  |  | my $r_prefixName			= qr/(?:(?!_)${r_nameStartChar})(?:$r_nameChar)*/o; | 
| 40 |  |  |  |  |  |  | my $r_nameChar_test			= qr"(?:$r_nameStartChar|$r_nameChar_extra)"o; | 
| 41 |  |  |  |  |  |  | my $r_double				= qr'[+-]?([0-9]+\.[0-9]*[eE][+-]?[0-9]+|\.[0-9]+[eE][+-]?[0-9]+|[0-9]+[eE][+-]?[0-9]+)'o; | 
| 42 |  |  |  |  |  |  | my $r_decimal				= qr'[+-]?(([0-9]+\.[0-9]+)|\.([0-9])+)'o; | 
| 43 |  |  |  |  |  |  | my $r_integer				= qr'[+-]?[0-9]+'o; | 
| 44 |  |  |  |  |  |  | my $r_PN_CHARS_U			= qr/[_A-Za-z_\x{00C0}-\x{00D6}\x{00D8}-\x{00F6}\x{00F8}-\x{02FF}\x{0370}-\x{037D}\x{037F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]/o; | 
| 45 |  |  |  |  |  |  | my $r_PN_CHARS				= qr"${r_PN_CHARS_U}|[-0-9\x{00B7}\x{0300}-\x{036F}\x{203F}-\x{2040}]"o; | 
| 46 |  |  |  |  |  |  | my $r_bnode_id				= qr"(?:${r_PN_CHARS_U}|[0-9])((${r_PN_CHARS}|[.])*${r_PN_CHARS})?"o; | 
| 47 |  |  |  |  |  |  |  | 
| 48 |  |  |  |  |  |  | my $r_PN_CHARS_BASE			= qr/([A-Z]|[a-z]|[\x{00C0}-\x{00D6}]|[\x{00D8}-\x{00F6}]|[\x{00F8}-\x{02FF}]|[\x{0370}-\x{037D}]|[\x{037F}-\x{1FFF}]|[\x{200C}-\x{200D}]|[\x{2070}-\x{218F}]|[\x{2C00}-\x{2FEF}]|[\x{3001}-\x{D7FF}]|[\x{F900}-\x{FDCF}]|[\x{FDF0}-\x{FFFD}]|[\x{10000}-\x{EFFFF}])/o; | 
| 49 |  |  |  |  |  |  | my $r_PN_PREFIX				= qr/(?:${r_PN_CHARS_BASE}(?:(?:${r_PN_CHARS}|[.])*${r_PN_CHARS})?)/o; | 
| 50 |  |  |  |  |  |  | my $r_PN_LOCAL_ESCAPED		= qr{(?:\\(?:[-~.!&'()*+,;=/?#@%_\$]))|%[0-9A-Fa-f]{2}}o; | 
| 51 |  |  |  |  |  |  | our $r_PN_LOCAL				= qr/(?:(?:${r_PN_CHARS_U}|[:0-9]|${r_PN_LOCAL_ESCAPED})(?:(?:${r_PN_CHARS}|${r_PN_LOCAL_ESCAPED}|[:.])*(?:${r_PN_CHARS}|[:]|${r_PN_LOCAL_ESCAPED}))?)/o; | 
| 52 |  |  |  |  |  |  | my $r_PN_LOCAL_BNODE		= qr/(?:(?:${r_PN_CHARS_U}|[0-9])(?:(?:${r_PN_CHARS}|[.])*${r_PN_CHARS})?)/o; | 
| 53 |  |  |  |  |  |  | our $r_PNAME_NS				= qr/(?:(?:${r_PN_PREFIX})?:)/o; | 
| 54 |  |  |  |  |  |  | our $r_PNAME_LN				= qr/(?:${r_PNAME_NS}${r_PN_LOCAL})/o; | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | with 'AtteanX::API::Lexer'; | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  | =item C<< new_token ( $type, $start_line, $start_column, @values ) >> | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  | Returns a new token with the given type and optional values, capturing the | 
| 61 |  |  |  |  |  |  | current line and column of the input data. | 
| 62 |  |  |  |  |  |  |  | 
| 63 |  |  |  |  |  |  | =cut | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  | my $self		= shift; | 
| 66 |  |  |  |  |  |  | my $type		= shift; | 
| 67 | 483 |  |  | 483 | 1 | 6935 | my $start_line	= shift; | 
| 68 | 483 |  |  |  |  | 714 | my $start_col	= shift; | 
| 69 | 483 |  |  |  |  | 623 | my $line		= $self->line; | 
| 70 | 483 |  |  |  |  | 595 | my $col			= $self->column; | 
| 71 | 483 |  |  |  |  | 6321 | return AtteanX::Parser::Turtle::Token->fast_constructor( $type, $start_line, $start_col, $line, $col, \@_ ); | 
| 72 | 483 |  |  |  |  | 7457 | } | 
| 73 | 483 |  |  |  |  | 3296 |  | 
| 74 |  |  |  |  |  |  | my %CHAR_TOKEN	= ( | 
| 75 |  |  |  |  |  |  | '.'	=> DOT, | 
| 76 |  |  |  |  |  |  | ';'	=> SEMICOLON, | 
| 77 |  |  |  |  |  |  | '['	=> LBRACKET, | 
| 78 |  |  |  |  |  |  | ']'	=> RBRACKET, | 
| 79 |  |  |  |  |  |  | '('	=> LPAREN, | 
| 80 |  |  |  |  |  |  | ')'	=> RPAREN, | 
| 81 |  |  |  |  |  |  | '}'	=> RBRACE, | 
| 82 |  |  |  |  |  |  | ','	=> COMMA, | 
| 83 |  |  |  |  |  |  | '='	=> EQUALS, | 
| 84 |  |  |  |  |  |  | ); | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  | my %METHOD_TOKEN	= ( | 
| 87 |  |  |  |  |  |  | # 	q[#]	=> '_get_comment', | 
| 88 |  |  |  |  |  |  | q[@]	=> '_get_keyword', | 
| 89 |  |  |  |  |  |  | q[<]	=> '_get_iriref_or_ltlt', | 
| 90 |  |  |  |  |  |  | q[>]	=> '_get_gtgt', | 
| 91 |  |  |  |  |  |  | q[|]	=> '_get_rannot', | 
| 92 |  |  |  |  |  |  | q[{]	=> '_get_lbrace_or_lannot', | 
| 93 |  |  |  |  |  |  | q[_]	=> '_get_bnode', | 
| 94 |  |  |  |  |  |  | q[']	=> '_get_single_literal', | 
| 95 |  |  |  |  |  |  | q["]	=> '_get_double_literal', | 
| 96 |  |  |  |  |  |  | q[:]	=> '_get_pname', | 
| 97 |  |  |  |  |  |  | (map {$_ => '_get_number'} (0 .. 9, '-', '+')) | 
| 98 |  |  |  |  |  |  | ); | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  | =item C<< get_token >> | 
| 101 |  |  |  |  |  |  |  | 
| 102 |  |  |  |  |  |  | Returns the next token present in the input. | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | =cut | 
| 105 |  |  |  |  |  |  |  | 
| 106 |  |  |  |  |  |  | my $self	= shift; | 
| 107 |  |  |  |  |  |  | while (1) { | 
| 108 |  |  |  |  |  |  | $self->fill_buffer unless (length($self->buffer)); | 
| 109 | 516 |  |  | 516 | 1 | 8876 |  | 
| 110 | 516 |  |  |  |  | 658 | if ($self->buffer =~ /^[ \r\n\t]+/o) { | 
| 111 | 1009 | 100 |  |  |  | 14835 | $self->read_length($+[0]); | 
| 112 |  |  |  |  |  |  | # we're ignoring whitespace tokens, but we could return them here instead of falling through to the 'next': | 
| 113 | 1009 | 100 |  |  |  | 16706 | # 			return $self->new_token(WS); | 
| 114 | 493 |  |  |  |  | 4023 | next; | 
| 115 |  |  |  |  |  |  | } | 
| 116 |  |  |  |  |  |  |  | 
| 117 | 493 |  |  |  |  | 914 | my $c	= $self->peek_char(); | 
| 118 |  |  |  |  |  |  | return unless (defined($c)); | 
| 119 |  |  |  |  |  |  |  | 
| 120 | 516 |  |  |  |  | 3431 | if ($c eq '#') { | 
| 121 | 516 | 100 |  |  |  | 1046 | # we're ignoring comment tokens, but we could return them here instead of falling through to the 'next': | 
| 122 |  |  |  |  |  |  | $self->_get_comment(); | 
| 123 | 483 | 50 |  |  |  | 938 | next; | 
| 124 |  |  |  |  |  |  | } | 
| 125 | 0 |  |  |  |  | 0 |  | 
| 126 | 0 |  |  |  |  | 0 | my $start_column	= $self->column; | 
| 127 |  |  |  |  |  |  | my $start_line		= $self->line; | 
| 128 |  |  |  |  |  |  |  | 
| 129 | 483 |  |  |  |  | 6513 | $self->start_column( $start_column ); | 
| 130 | 483 |  |  |  |  | 7572 | $self->start_line( $start_line ); | 
| 131 |  |  |  |  |  |  |  | 
| 132 | 483 |  |  |  |  | 7669 | if ($c eq '.' and $self->buffer =~ /^$r_decimal/) { | 
| 133 | 483 |  |  |  |  | 16738 | return $self->_get_number(); | 
| 134 |  |  |  |  |  |  | } | 
| 135 | 483 | 50 | 66 |  |  | 11593 |  | 
| 136 | 0 |  |  |  |  | 0 | if (defined(my $name = $CHAR_TOKEN{$c})) { $self->get_char; return $self->new_token($name, $start_line, $start_column, $c); } | 
| 137 |  |  |  |  |  |  | elsif (defined(my $method = $METHOD_TOKEN{$c})) { return $self->$method() } | 
| 138 |  |  |  |  |  |  | elsif ($c =~ /[A-Za-z\x{00C0}-\x{00D6}\x{00D8}-\x{00F6}\x{00F8}-\x{02FF}\x{0370}-\x{037D}\x{037F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]/o) { | 
| 139 | 483 | 100 |  |  |  | 2719 | if ($self->buffer =~ /^a(?!:)\s/o) { | 
|  | 129 | 100 |  |  |  | 431 |  | 
|  | 129 | 50 |  |  |  | 316 |  | 
|  |  | 0 |  |  |  |  |  | 
| 140 | 231 |  |  |  |  | 779 | $self->get_char; | 
| 141 |  |  |  |  |  |  | return $self->new_token(A, $start_line, $start_column, 'a'); | 
| 142 | 123 | 100 |  |  |  | 1694 | } elsif ($self->buffer =~ /^(?:true|false)(?!:)\b/o) { | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 143 | 22 |  |  |  |  | 609 | my $bool	= $self->read_length($+[0]); | 
| 144 | 22 |  |  |  |  | 78 | return $self->new_token(BOOLEAN, $start_line, $start_column, $bool); | 
| 145 |  |  |  |  |  |  | } elsif ($self->buffer =~ /^BASE(?!:)\b/oi) { | 
| 146 | 0 |  |  |  |  | 0 | $self->read_length(4); | 
| 147 | 0 |  |  |  |  | 0 | return $self->new_token(BASE, $start_line, $start_column); | 
| 148 |  |  |  |  |  |  | } elsif ($self->buffer =~ /^PREFIX(?!:)\b/io) { | 
| 149 | 1 |  |  |  |  | 47 | $self->read_length(6); | 
| 150 | 1 |  |  |  |  | 5 | return $self->new_token(PREFIX, $start_line, $start_column); | 
| 151 |  |  |  |  |  |  | } elsif ($self->buffer =~ /^GRAPH(?!:)\b/io) { | 
| 152 | 2 |  |  |  |  | 117 | $self->read_length(5); | 
| 153 | 2 |  |  |  |  | 9 | return $self->new_token(GRAPH, $start_line, $start_column); | 
| 154 |  |  |  |  |  |  | } else { | 
| 155 | 0 |  |  |  |  | 0 | return $self->_get_pname; | 
| 156 | 0 |  |  |  |  | 0 | } | 
| 157 |  |  |  |  |  |  | } | 
| 158 | 98 |  |  |  |  | 6583 | elsif ($c eq '^') { | 
| 159 |  |  |  |  |  |  | $self->read_word('^^'); return $self->new_token(HATHAT, $start_line, $start_column); } | 
| 160 |  |  |  |  |  |  | else { | 
| 161 |  |  |  |  |  |  | # 			Carp::cluck sprintf("Unexpected byte '$c' (0x%02x)", ord($c)); | 
| 162 | 0 |  |  |  |  | 0 | return $self->_throw_error(sprintf("Unexpected byte '%s' (0x%02x)", $c, ord($c))); | 
|  | 0 |  |  |  |  | 0 |  | 
| 163 |  |  |  |  |  |  | } | 
| 164 |  |  |  |  |  |  | warn sprintf('byte: 0x%x', ord($c)); | 
| 165 | 0 |  |  |  |  | 0 | } | 
| 166 |  |  |  |  |  |  | } | 
| 167 | 0 |  |  |  |  | 0 |  | 
| 168 |  |  |  |  |  |  | =begin private | 
| 169 |  |  |  |  |  |  |  | 
| 170 |  |  |  |  |  |  | =cut | 
| 171 |  |  |  |  |  |  |  | 
| 172 |  |  |  |  |  |  |  | 
| 173 |  |  |  |  |  |  | my $self	= shift; | 
| 174 |  |  |  |  |  |  | my $prefix	= ''; | 
| 175 |  |  |  |  |  |  |  | 
| 176 |  |  |  |  |  |  | if ($self->buffer =~ /^$r_PNAME_LN/o) { | 
| 177 | 137 |  |  | 137 |  | 220 | my $ln	= $self->read_length($+[0]); | 
| 178 | 137 |  |  |  |  | 195 | my ($ns, $local)	= ($ln =~ /^([^:]*:)(.*)$/); | 
| 179 |  |  |  |  |  |  | no warnings 'uninitialized'; | 
| 180 | 137 | 100 |  |  |  | 1797 | $local	=~ s{\\([-~.!&'()*+,;=:/?#@%_\$])}{$1}g; | 
| 181 | 104 |  |  |  |  | 1255 | return $self->new_token(PREFIXNAME, $self->start_line, $self->start_column, $ns, $local); | 
| 182 | 104 |  |  |  |  | 501 | } else { | 
| 183 | 13 |  |  | 13 |  | 23280 | $self->buffer =~ $r_PNAME_NS; | 
|  | 13 |  |  |  |  | 39 |  | 
|  | 13 |  |  |  |  | 32148 |  | 
| 184 | 104 |  |  |  |  | 220 | my $ns	= $self->read_length($+[0]); | 
| 185 | 104 |  |  |  |  | 1502 | return $self->new_token(PREFIXNAME, $self->start_line, $self->start_column, $ns); | 
| 186 |  |  |  |  |  |  | } | 
| 187 | 33 |  |  |  |  | 2667 | } | 
| 188 | 33 |  |  |  |  | 428 |  | 
| 189 | 33 |  |  |  |  | 500 | my $self	= shift; | 
| 190 |  |  |  |  |  |  | $self->read_word('>>'); | 
| 191 |  |  |  |  |  |  | return $self->new_token(GTGT, $self->start_line, $self->start_column, '>>'); | 
| 192 |  |  |  |  |  |  | } | 
| 193 |  |  |  |  |  |  |  | 
| 194 | 1 |  |  | 1 |  | 3 | my $self	= shift; | 
| 195 | 1 |  |  |  |  | 4 | $self->get_char_safe(q[{]); | 
| 196 | 1 |  |  |  |  | 19 | if ($self->buffer =~ /^\|/o) { | 
| 197 |  |  |  |  |  |  | $self->get_char_safe(q[|]); | 
| 198 |  |  |  |  |  |  | return $self->new_token(LANNOT, $self->start_line, $self->start_column, '{|'); | 
| 199 |  |  |  |  |  |  | } | 
| 200 | 1 |  |  | 1 |  | 4 | return $self->new_token(LBRACE, $self->start_line, $self->start_column, '{'); | 
| 201 | 1 |  |  |  |  | 6 | } | 
| 202 | 1 | 50 |  |  |  | 23 |  | 
| 203 | 1 |  |  |  |  | 11 | my $self	= shift; | 
| 204 | 1 |  |  |  |  | 17 | $self->read_word('|}'); | 
| 205 |  |  |  |  |  |  | return $self->new_token(RANNOT, $self->start_line, $self->start_column, '|}'); | 
| 206 | 0 |  |  |  |  | 0 | } | 
| 207 |  |  |  |  |  |  |  | 
| 208 |  |  |  |  |  |  | my $self	= shift; | 
| 209 |  |  |  |  |  |  | $self->get_char_safe(q[<]); | 
| 210 | 1 |  |  | 1 |  | 3 | if ($self->buffer =~ /^</o) { | 
| 211 | 1 |  |  |  |  | 6 | $self->get_char_safe(q[<]); | 
| 212 | 1 |  |  |  |  | 18 | return $self->new_token(LTLT, $self->start_line, $self->start_column, '<<'); | 
| 213 |  |  |  |  |  |  | } | 
| 214 |  |  |  |  |  |  |  | 
| 215 |  |  |  |  |  |  | if ($self->buffer =~ m/^[\x23-\x3d\x3f-\x5a\x5d-\x7e]*>/o) { | 
| 216 | 88 |  |  | 88 |  | 154 | my $iri	.= $self->read_length($+[0]); | 
| 217 | 88 |  |  |  |  | 269 | chop($iri); | 
| 218 | 88 | 100 |  |  |  | 1322 | return $self->new_token(IRI, $self->start_line, $self->start_column, $iri); | 
| 219 | 1 |  |  |  |  | 9 | } | 
| 220 | 1 |  |  |  |  | 14 |  | 
| 221 |  |  |  |  |  |  | my $iri	= ''; | 
| 222 |  |  |  |  |  |  | while (1) { | 
| 223 | 87 | 50 |  |  |  | 1918 | if (length($self->buffer) == 0) { | 
| 224 | 87 |  |  |  |  | 793 | my $c	= $self->peek_char; | 
| 225 | 87 |  |  |  |  | 199 | last unless defined($c); | 
| 226 | 87 |  |  |  |  | 1253 | } | 
| 227 |  |  |  |  |  |  | if (substr($self->buffer, 0, 1) eq '\\') { | 
| 228 |  |  |  |  |  |  | $self->get_char_safe('\\'); | 
| 229 | 0 |  |  |  |  | 0 | my $esc	= $self->get_char; | 
| 230 | 0 |  |  |  |  | 0 | if ($esc eq '\\') { | 
| 231 | 0 | 0 |  |  |  | 0 | $iri	.= "\\"; | 
| 232 | 0 |  |  |  |  | 0 | } elsif ($esc eq 'U') { | 
| 233 | 0 | 0 |  |  |  | 0 | my $codepoint	= $self->read_length(8); | 
| 234 |  |  |  |  |  |  | $self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o); | 
| 235 | 0 | 0 |  |  |  | 0 | $iri .= chr(hex($codepoint)); | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 236 | 0 |  |  |  |  | 0 | } elsif ($esc eq 'u') { | 
| 237 | 0 |  |  |  |  | 0 | my $codepoint	= $self->read_length(4); | 
| 238 | 0 | 0 |  |  |  | 0 | $self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o); | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 239 | 0 |  |  |  |  | 0 | my $char	= chr(hex($codepoint)); | 
| 240 |  |  |  |  |  |  | if ($char =~ /[<>" {}|\\^`]/o) { | 
| 241 | 0 |  |  |  |  | 0 | $self->_throw_error(sprintf("Bad IRI character: '%s' (0x%x)", $char, ord($char))); | 
| 242 | 0 | 0 |  |  |  | 0 | } | 
| 243 | 0 |  |  |  |  | 0 | $iri .= $char; | 
| 244 |  |  |  |  |  |  | } else { | 
| 245 | 0 |  |  |  |  | 0 | $self->_throw_error("Unrecognized iri escape '$esc'"); | 
| 246 | 0 | 0 |  |  |  | 0 | } | 
| 247 | 0 |  |  |  |  | 0 | } elsif ($self->buffer =~ /^[^<>\x00-\x20\\"{}|^`]+/o) { | 
| 248 | 0 | 0 |  |  |  | 0 | $iri	.= $self->read_length($+[0]); | 
| 249 | 0 |  |  |  |  | 0 | } elsif (substr($self->buffer, 0, 1) eq '>') { | 
| 250 |  |  |  |  |  |  | last; | 
| 251 | 0 |  |  |  |  | 0 | } else { | 
| 252 |  |  |  |  |  |  | my $c	= $self->peek_char; | 
| 253 | 0 |  |  |  |  | 0 | $self->_throw_error("Got '$c' while expecting IRI character"); | 
| 254 |  |  |  |  |  |  | } | 
| 255 |  |  |  |  |  |  | } | 
| 256 | 0 |  |  |  |  | 0 | $self->get_char_safe(q[>]); | 
| 257 |  |  |  |  |  |  | return $self->new_token(IRI, $self->start_line, $self->start_column, $iri); | 
| 258 | 0 |  |  |  |  | 0 | } | 
| 259 |  |  |  |  |  |  |  | 
| 260 | 0 |  |  |  |  | 0 | my $self	= shift; | 
| 261 | 0 |  |  |  |  | 0 | $self->read_word('_:'); | 
| 262 |  |  |  |  |  |  | $self->_throw_error("Expected: name") unless ($self->buffer =~ /^${r_bnode_id}/o); | 
| 263 |  |  |  |  |  |  | my $name	= substr($self->buffer, 0, $+[0]); | 
| 264 | 0 |  |  |  |  | 0 | $self->read_word($name); | 
| 265 | 0 |  |  |  |  | 0 | return $self->new_token(BNODE, $self->start_line, $self->start_column, $name); | 
| 266 |  |  |  |  |  |  | } | 
| 267 |  |  |  |  |  |  |  | 
| 268 |  |  |  |  |  |  | my $self	= shift; | 
| 269 | 26 |  |  | 26 |  | 51 | if ($self->buffer =~ /^${r_double}/o) { | 
| 270 | 26 |  |  |  |  | 84 | return $self->new_token(DOUBLE, $self->start_line, $self->start_column, $self->read_length($+[0])); | 
| 271 | 26 | 50 |  |  |  | 393 | } elsif ($self->buffer =~ /^${r_decimal}/o) { | 
| 272 | 26 |  |  |  |  | 938 | return $self->new_token(DECIMAL, $self->start_line, $self->start_column, $self->read_length($+[0])); | 
| 273 | 26 |  |  |  |  | 230 | } elsif ($self->buffer =~ /^${r_integer}/o) { | 
| 274 | 26 |  |  |  |  | 366 | return $self->new_token(INTEGER, $self->start_line, $self->start_column, $self->read_length($+[0])); | 
| 275 |  |  |  |  |  |  | } | 
| 276 |  |  |  |  |  |  | $self->_throw_error("Expected number"); | 
| 277 |  |  |  |  |  |  | } | 
| 278 | 10 |  |  | 10 |  | 18 |  | 
| 279 | 10 | 50 |  |  |  | 171 | my $self	= shift; | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 280 | 0 |  |  |  |  | 0 | $self->get_char_safe('#'); | 
| 281 |  |  |  |  |  |  | my $comment	= ''; | 
| 282 | 0 |  |  |  |  | 0 | my $c		= $self->peek_char; | 
| 283 |  |  |  |  |  |  | while (length($c) and $c !~ /[\r\n]/o) { | 
| 284 | 10 |  |  |  |  | 975 | $comment	.= $self->get_char; | 
| 285 |  |  |  |  |  |  | $c			= $self->peek_char; | 
| 286 | 0 |  |  |  |  | 0 | } | 
| 287 |  |  |  |  |  |  | if (length($c) and $c =~ /[\r\n]/o) { | 
| 288 |  |  |  |  |  |  | $self->get_char; | 
| 289 |  |  |  |  |  |  | } | 
| 290 | 0 |  |  | 0 |  | 0 | return $self->new_token(COMMENT, $self->start_line, $self->start_column, $comment); | 
| 291 | 0 |  |  |  |  | 0 | } | 
| 292 | 0 |  |  |  |  | 0 |  | 
| 293 | 0 |  |  |  |  | 0 | my $self	= shift; | 
| 294 | 0 |  | 0 |  |  | 0 | # 	my $c		= $self->peek_char(); | 
| 295 | 0 |  |  |  |  | 0 | $self->get_char_safe(q["]); | 
| 296 | 0 |  |  |  |  | 0 | if (substr($self->buffer, 0, 2) eq q[""]) { | 
| 297 |  |  |  |  |  |  | # #x22 #x22 #x22 lcharacter* #x22 #x22 #x22 | 
| 298 | 0 | 0 | 0 |  |  | 0 | $self->read_word(q[""]); | 
| 299 | 0 |  |  |  |  | 0 |  | 
| 300 |  |  |  |  |  |  | my $quote_count	= 0; | 
| 301 | 0 |  |  |  |  | 0 | my $string	= ''; | 
| 302 |  |  |  |  |  |  | while (1) { | 
| 303 |  |  |  |  |  |  | if (length($self->buffer) == 0) { | 
| 304 |  |  |  |  |  |  | $self->fill_buffer; | 
| 305 | 26 |  |  | 26 |  | 56 | $self->_throw_error("Found EOF in string literal") if (length($self->buffer) == 0); | 
| 306 |  |  |  |  |  |  | } | 
| 307 | 26 |  |  |  |  | 111 | if (substr($self->buffer, 0, 1) eq '"') { | 
| 308 | 26 | 100 |  |  |  | 418 | my $c	= $self->get_char; | 
| 309 |  |  |  |  |  |  | $quote_count++; | 
| 310 | 1 |  |  |  |  | 10 | last if ($quote_count == 3); | 
| 311 |  |  |  |  |  |  | } else { | 
| 312 | 1 |  |  |  |  | 3 | if ($quote_count) { | 
| 313 | 1 |  |  |  |  | 2 | $string	.= '"' foreach (1..$quote_count); | 
| 314 | 1 |  |  |  |  | 2 | $quote_count	= 0; | 
| 315 | 4 | 50 |  |  |  | 50 | } | 
| 316 | 0 |  |  |  |  | 0 | if (substr($self->buffer, 0, 1) eq '\\') { | 
| 317 | 0 | 0 |  |  |  | 0 | $string	.= $self->_get_escaped_char(); | 
| 318 |  |  |  |  |  |  | } else { | 
| 319 | 4 | 100 |  |  |  | 68 | $self->buffer	=~ /^[^"\\]+/; | 
| 320 | 3 |  |  |  |  | 20 | $string	.= $self->read_length($+[0]); | 
| 321 | 3 |  |  |  |  | 4 | } | 
| 322 | 3 | 100 |  |  |  | 8 | } | 
| 323 |  |  |  |  |  |  | } | 
| 324 | 1 | 50 |  |  |  | 10 | return $self->new_token(STRING3D, $self->start_line, $self->start_column, $string); | 
| 325 | 0 |  |  |  |  | 0 | } else { | 
| 326 | 0 |  |  |  |  | 0 | ### #x22 scharacter* #x22 | 
| 327 |  |  |  |  |  |  | my $string	= ''; | 
| 328 | 1 | 50 |  |  |  | 14 | while (1) { | 
| 329 | 0 |  |  |  |  | 0 | if (substr($self->buffer, 0, 1) eq '\\') { | 
| 330 |  |  |  |  |  |  | $string	.= $self->_get_escaped_char(); | 
| 331 | 1 |  |  |  |  | 19 | } elsif ($self->buffer =~ /^[^"\\]+/o) { | 
| 332 | 1 |  |  |  |  | 10 | $string	.= $self->read_length($+[0]); | 
| 333 |  |  |  |  |  |  | } elsif (substr($self->buffer, 0, 1) eq '"') { | 
| 334 |  |  |  |  |  |  | last; | 
| 335 |  |  |  |  |  |  | } else { | 
| 336 | 1 |  |  |  |  | 15 | my $c	= $self->peek_char; | 
| 337 |  |  |  |  |  |  | $self->_throw_error("Got '$c' while expecting string character"); | 
| 338 |  |  |  |  |  |  | } | 
| 339 | 25 |  |  |  |  | 200 | } | 
| 340 | 25 |  |  |  |  | 40 | $self->get_char_safe(q["]); | 
| 341 | 50 | 100 |  |  |  | 698 | return $self->new_token(STRING1D, $self->start_line, $self->start_column, $string); | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 342 | 1 |  |  |  |  | 19 | } | 
| 343 |  |  |  |  |  |  | } | 
| 344 | 24 |  |  |  |  | 707 |  | 
| 345 |  |  |  |  |  |  | my $self	= shift; | 
| 346 | 25 |  |  |  |  | 1248 | $self->get_char_safe("'"); | 
| 347 |  |  |  |  |  |  | if (substr($self->buffer, 0, 2) eq q['']) { | 
| 348 | 0 |  |  |  |  | 0 | # #x22 #x22 #x22 lcharacter* #x22 #x22 #x22 | 
| 349 | 0 |  |  |  |  | 0 | $self->read_word(q['']); | 
| 350 |  |  |  |  |  |  |  | 
| 351 |  |  |  |  |  |  | my $quote_count	= 0; | 
| 352 | 25 |  |  |  |  | 95 | my $string	= ''; | 
| 353 | 25 |  |  |  |  | 373 | while (1) { | 
| 354 |  |  |  |  |  |  | if (length($self->buffer) == 0) { | 
| 355 |  |  |  |  |  |  | $self->fill_buffer; | 
| 356 |  |  |  |  |  |  | $self->_throw_error("Found EOF in string literal") if (length($self->buffer) == 0); | 
| 357 |  |  |  |  |  |  | } | 
| 358 | 4 |  |  | 4 |  | 7 | if (substr($self->buffer, 0, 1) eq "'") { | 
| 359 | 4 |  |  |  |  | 10 | my $c	= $self->get_char; | 
| 360 | 4 | 50 |  |  |  | 56 | $quote_count++; | 
| 361 |  |  |  |  |  |  | last if ($quote_count == 3); | 
| 362 | 0 |  |  |  |  | 0 | } else { | 
| 363 |  |  |  |  |  |  | if ($quote_count) { | 
| 364 | 0 |  |  |  |  | 0 | $string	.= "'" foreach (1..$quote_count); | 
| 365 | 0 |  |  |  |  | 0 | $quote_count	= 0; | 
| 366 | 0 |  |  |  |  | 0 | } | 
| 367 | 0 | 0 |  |  |  | 0 | if (substr($self->buffer, 0, 1) eq '\\') { | 
| 368 | 0 |  |  |  |  | 0 | $string	.= $self->_get_escaped_char(); | 
| 369 | 0 | 0 |  |  |  | 0 | } else { | 
| 370 |  |  |  |  |  |  | $self->buffer	=~ /^[^'\\]+/; | 
| 371 | 0 | 0 |  |  |  | 0 | $string	.= $self->read_length($+[0]); | 
| 372 | 0 |  |  |  |  | 0 | } | 
| 373 | 0 |  |  |  |  | 0 | } | 
| 374 | 0 | 0 |  |  |  | 0 | } | 
| 375 |  |  |  |  |  |  | return $self->new_token(STRING3S, $self->start_line, $self->start_column, $string); | 
| 376 | 0 | 0 |  |  |  | 0 | } else { | 
| 377 | 0 |  |  |  |  | 0 | ### #x22 scharacter* #x22 | 
| 378 | 0 |  |  |  |  | 0 | my $string	= ''; | 
| 379 |  |  |  |  |  |  | while (1) { | 
| 380 | 0 | 0 |  |  |  | 0 | if (substr($self->buffer, 0, 1) eq '\\') { | 
| 381 | 0 |  |  |  |  | 0 | $string	.= $self->_get_escaped_char(); | 
| 382 |  |  |  |  |  |  | } elsif ($self->buffer =~ /^[^'\\]+/o) { | 
| 383 | 0 |  |  |  |  | 0 | $string	.= $self->read_length($+[0]); | 
| 384 | 0 |  |  |  |  | 0 | } elsif (substr($self->buffer, 0, 1) eq "'") { | 
| 385 |  |  |  |  |  |  | last; | 
| 386 |  |  |  |  |  |  | } else { | 
| 387 |  |  |  |  |  |  | my $c		= $self->peek_char(); | 
| 388 | 0 |  |  |  |  | 0 | $self->_throw_error("Got '$c' while expecting string character"); | 
| 389 |  |  |  |  |  |  | } | 
| 390 |  |  |  |  |  |  | } | 
| 391 | 4 |  |  |  |  | 28 | $self->get_char_safe(q[']); | 
| 392 | 4 |  |  |  |  | 6 | return $self->new_token(STRING1S, $self->start_line, $self->start_column, $string); | 
| 393 | 9 | 100 |  |  |  | 118 | } | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 394 | 5 |  |  |  |  | 31 | } | 
| 395 |  |  |  |  |  |  |  | 
| 396 | 0 |  |  |  |  | 0 | my $self	= shift; | 
| 397 |  |  |  |  |  |  | my $c	= $self->peek_char; | 
| 398 | 4 |  |  |  |  | 181 | $self->get_char_safe('\\'); | 
| 399 |  |  |  |  |  |  | my $esc	= $self->get_char; | 
| 400 | 0 |  |  |  |  | 0 | if ($esc eq '\\') { return "\\" } | 
| 401 | 0 |  |  |  |  | 0 | elsif ($esc =~ /^['">]$/) { return $esc } | 
| 402 |  |  |  |  |  |  | elsif ($esc eq 'r') { return "\r" } | 
| 403 |  |  |  |  |  |  | elsif ($esc eq 't') { return "\t" } | 
| 404 | 4 |  |  |  |  | 14 | elsif ($esc eq 'n') { return "\n" } | 
| 405 | 4 |  |  |  |  | 56 | elsif ($esc eq 'b') { return "\b" } | 
| 406 |  |  |  |  |  |  | elsif ($esc eq 'f') { return "\f" } | 
| 407 |  |  |  |  |  |  | elsif ($esc eq 'U') { | 
| 408 |  |  |  |  |  |  | my $codepoint	= $self->read_length(8); | 
| 409 |  |  |  |  |  |  | $self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o); | 
| 410 | 6 |  |  | 6 |  | 9 | return chr(hex($codepoint)); | 
| 411 | 6 |  |  |  |  | 14 | } elsif ($esc eq 'u'){ | 
| 412 | 6 |  |  |  |  | 14 | my $codepoint	= $self->read_length(4); | 
| 413 | 6 |  |  |  |  | 14 | $self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o); | 
| 414 | 6 | 50 |  |  |  | 46 | return chr(hex($codepoint)); | 
|  | 0 | 100 |  |  |  | 0 |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 415 | 2 |  |  |  |  | 7 | } | 
| 416 | 0 |  |  |  |  | 0 | $self->_throw_error("Unrecognized string escape '$esc'"); | 
| 417 | 0 |  |  |  |  | 0 | } | 
| 418 | 0 |  |  |  |  | 0 |  | 
| 419 | 0 |  |  |  |  | 0 | my $self	= shift; | 
| 420 | 0 |  |  |  |  | 0 | $self->get_char_safe('@'); | 
| 421 |  |  |  |  |  |  | if ($self->buffer =~ /^base/o) { | 
| 422 | 2 |  |  |  |  | 7 | $self->read_word('base'); | 
| 423 | 2 | 50 |  |  |  | 15 | return $self->new_token(TURTLEBASE, $self->start_line, $self->start_column); | 
| 424 | 2 |  |  |  |  | 9 | } elsif ($self->buffer =~ /^prefix/o) { | 
| 425 |  |  |  |  |  |  | $self->read_word('prefix'); | 
| 426 | 2 |  |  |  |  | 7 | return $self->new_token(TURTLEPREFIX, $self->start_line, $self->start_column); | 
| 427 | 2 | 50 |  |  |  | 8 | } else { | 
| 428 | 2 |  |  |  |  | 11 | if ($self->buffer =~ /^[a-zA-Z]+(-[a-zA-Z0-9]+)*\b/o) { | 
| 429 |  |  |  |  |  |  | my $lang	= $self->read_length($+[0]); | 
| 430 | 0 |  |  |  |  | 0 | return $self->new_token(LANG, $self->start_line, $self->start_column, $lang); | 
| 431 |  |  |  |  |  |  | } | 
| 432 |  |  |  |  |  |  | $self->_throw_error("Expected keyword or language tag"); | 
| 433 |  |  |  |  |  |  | } | 
| 434 | 35 |  |  | 35 |  | 77 | } | 
| 435 | 35 |  |  |  |  | 164 |  | 
| 436 | 35 | 50 |  |  |  | 542 | my $self	= shift; | 
|  |  | 100 |  |  |  |  |  | 
| 437 | 0 |  |  |  |  | 0 | my $error	= shift; | 
| 438 | 0 |  |  |  |  | 0 | my $line	= $self->line; | 
| 439 |  |  |  |  |  |  | my $col		= $self->column; | 
| 440 | 31 |  |  |  |  | 815 | Carp::confess "$error at $line:$col with buffer: " . Dumper($self->buffer); | 
| 441 | 31 |  |  |  |  | 485 | } | 
| 442 |  |  |  |  |  |  | } | 
| 443 | 4 | 50 |  |  |  | 154 |  | 
| 444 | 4 |  |  |  |  | 49 | 1; | 
| 445 | 4 |  |  |  |  | 71 |  | 
| 446 |  |  |  |  |  |  |  | 
| 447 | 0 |  |  |  |  |  | =end private | 
| 448 |  |  |  |  |  |  |  | 
| 449 |  |  |  |  |  |  | =back | 
| 450 |  |  |  |  |  |  |  | 
| 451 |  |  |  |  |  |  | =head1 BUGS | 
| 452 | 0 |  |  | 0 |  |  |  | 
| 453 | 0 |  |  |  |  |  | Please report any bugs or feature requests to through the GitHub web interface | 
| 454 | 0 |  |  |  |  |  | at L<https://github.com/kasei/perlrdf/issues>. | 
| 455 | 0 |  |  |  |  |  |  | 
| 456 | 0 |  |  |  |  |  | =head1 AUTHOR | 
| 457 |  |  |  |  |  |  |  | 
| 458 |  |  |  |  |  |  | Gregory Todd Williams  C<< <gwilliams@cpan.org> >> | 
| 459 |  |  |  |  |  |  |  | 
| 460 |  |  |  |  |  |  | =head1 COPYRIGHT | 
| 461 |  |  |  |  |  |  |  | 
| 462 |  |  |  |  |  |  | Copyright (c) 2014--2022 Gregory Todd Williams. This | 
| 463 |  |  |  |  |  |  | program is free software; you can redistribute it and/or modify it under | 
| 464 |  |  |  |  |  |  | the same terms as Perl itself. | 
| 465 |  |  |  |  |  |  |  | 
| 466 |  |  |  |  |  |  | =cut |