| line | stmt | bran | cond | sub | pod | time | code | 
| 1 | 13 |  |  | 13 |  | 28634 | use v5.14; | 
|  | 13 |  |  |  |  | 85 |  | 
| 2 | 13 |  |  | 13 |  | 68 | use warnings; | 
|  | 13 |  |  |  |  | 31 |  | 
|  | 13 |  |  |  |  | 616 |  | 
| 3 |  |  |  |  |  |  |  | 
| 4 |  |  |  |  |  |  | =head1 NAME | 
| 5 |  |  |  |  |  |  |  | 
| 6 |  |  |  |  |  |  | AtteanX::Parser::SPARQLLex - SPARQL Lexer | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  | =head1 VERSION | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | This document describes AtteanX::Parser::SPARQLLex version 0.032 | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | use Attean; | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | ... | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | =head1 ATTRIBUTES | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | =over 4 | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | =item C<< canonical_media_type >> | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | =item C<< media_types >> | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | =item C<< file_extensions >> | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | =item C<< handled_type >> | 
| 31 |  |  |  |  |  |  |  | 
| 32 |  |  |  |  |  |  | =item C<< extend >> | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  | =back | 
| 35 |  |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  | =head1 METHODS | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | =over 4 | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | =cut | 
| 41 |  |  |  |  |  |  |  | 
| 42 |  |  |  |  |  |  | use utf8; | 
| 43 | 13 |  |  | 13 |  | 72 | use Moo; | 
|  | 13 |  |  |  |  | 29 |  | 
|  | 13 |  |  |  |  | 118 |  | 
| 44 | 13 |  |  | 13 |  | 291 | use Attean; | 
|  | 13 |  |  |  |  | 25 |  | 
|  | 13 |  |  |  |  | 97 |  | 
| 45 | 13 |  |  | 13 |  | 5131 | use Encode; | 
|  | 13 |  |  |  |  | 40 |  | 
|  | 13 |  |  |  |  | 85 |  | 
| 46 | 13 |  |  | 13 |  | 77 | use Encode qw(decode); | 
|  | 13 |  |  |  |  | 35 |  | 
|  | 13 |  |  |  |  | 1210 |  | 
| 47 | 13 |  |  | 13 |  | 95 | use List::MoreUtils qw(zip); | 
|  | 13 |  |  |  |  | 42 |  | 
|  | 13 |  |  |  |  | 547 |  | 
| 48 | 13 |  |  | 13 |  | 80 | use Types::Standard qw(ArrayRef); | 
|  | 13 |  |  |  |  | 25 |  | 
|  | 13 |  |  |  |  | 128 |  | 
| 49 | 13 |  |  | 13 |  | 8893 | use namespace::clean; | 
|  | 13 |  |  |  |  | 28 |  | 
|  | 13 |  |  |  |  | 147 |  | 
| 50 | 13 |  |  | 13 |  | 7272 |  | 
|  | 13 |  |  |  |  | 31 |  | 
|  | 13 |  |  |  |  | 121 |  | 
| 51 |  |  |  |  |  |  |  | 
| 52 | 1 |  |  | 1 | 1 | 904 | # these pass through to the lexer iterator | 
| 53 |  |  |  |  |  |  | has extend		=> ( is => 'ro', isa => ArrayRef, default => sub { [] } ); | 
| 54 |  |  |  |  |  |  |  | 
| 55 |  |  |  |  |  |  | return [qw(application/x-sparql-query-tokens)]; | 
| 56 |  |  |  |  |  |  | } | 
| 57 |  |  |  |  |  |  |  | 
| 58 | 3 |  |  | 3 | 1 | 12 | state $ITEM_TYPE = Type::Tiny::Role->new(role => 'AtteanX::SPARQL::Token'); | 
| 59 |  |  |  |  |  |  | return $ITEM_TYPE; | 
| 60 |  |  |  |  |  |  | } | 
| 61 |  |  |  |  |  |  |  | 
| 62 | 2 |  |  | 2 | 1 | 7 | =item C<< file_extensions >> | 
| 63 | 2 |  |  |  |  | 62 |  | 
| 64 |  |  |  |  |  |  | Returns a list of file extensions that may be parsed with the parser. | 
| 65 |  |  |  |  |  |  |  | 
| 66 |  |  |  |  |  |  | =cut | 
| 67 |  |  |  |  |  |  |  | 
| 68 |  |  |  |  |  |  |  | 
| 69 |  |  |  |  |  |  | with 'Attean::API::PullParser', 'Attean::API::Parser'; | 
| 70 |  |  |  |  |  |  |  | 
| 71 |  |  |  |  |  |  | =item C<< parse_iter_from_bytes( $data ) >> | 
| 72 | 4 |  |  | 4 | 1 | 16 |  | 
| 73 |  |  |  |  |  |  | Returns an iterator of SPARQL tokens that result from parsing | 
| 74 |  |  |  |  |  |  | the SPARQL query/update read from the UTF-8 encoded byte string C<< $data >>. | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | =cut | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | my $self	= shift; | 
| 79 |  |  |  |  |  |  | my $data	= shift; | 
| 80 |  |  |  |  |  |  | open(my $fh, '<:encoding(UTF-8)', \$data); | 
| 81 |  |  |  |  |  |  | return $self->parse_iter_from_io($fh); | 
| 82 |  |  |  |  |  |  | } | 
| 83 |  |  |  |  |  |  |  | 
| 84 | 56 |  |  | 56 | 1 | 162 | =item C<< parse_iter_from_io( $fh ) >> | 
| 85 | 56 |  |  |  |  | 124 |  | 
| 86 | 8 |  |  | 8 |  | 63 | Returns an iterator of SPARQL tokens that result from parsing | 
|  | 8 |  |  | 7 |  | 20 |  | 
|  | 8 |  |  |  |  | 72 |  | 
|  | 7 |  |  |  |  | 6228 |  | 
|  | 7 |  |  |  |  | 24 |  | 
|  | 7 |  |  |  |  | 33 |  | 
|  | 56 |  |  |  |  | 1606 |  | 
| 87 | 56 |  |  |  |  | 11371 | the SPARQL query/update read from the L<IO::Handle> object C<< $fh >>. | 
| 88 |  |  |  |  |  |  |  | 
| 89 |  |  |  |  |  |  | =cut | 
| 90 |  |  |  |  |  |  |  | 
| 91 |  |  |  |  |  |  | my $self	= shift; | 
| 92 |  |  |  |  |  |  | my $fh		= shift; | 
| 93 |  |  |  |  |  |  | return AtteanX::Parser::SPARQLLex::Iterator->new( | 
| 94 |  |  |  |  |  |  | extend => $self->extend, | 
| 95 |  |  |  |  |  |  | file => $fh, | 
| 96 |  |  |  |  |  |  | ); | 
| 97 |  |  |  |  |  |  | } | 
| 98 | 61 |  |  | 61 | 1 | 251 | } | 
| 99 | 61 |  |  |  |  | 120 |  | 
| 100 | 61 |  |  |  |  | 1481 | use utf8; | 
| 101 |  |  |  |  |  |  | use Moo; | 
| 102 |  |  |  |  |  |  | use Attean; | 
| 103 |  |  |  |  |  |  | use Encode; | 
| 104 |  |  |  |  |  |  | use Encode qw(decode); | 
| 105 |  |  |  |  |  |  | use List::MoreUtils qw(zip); | 
| 106 |  |  |  |  |  |  | use AtteanX::SPARQL::Token; | 
| 107 |  |  |  |  |  |  | use AtteanX::SPARQL::Constants; | 
| 108 | 13 |  |  | 13 |  | 16185 | use Types::Standard qw(FileHandle Ref Str Int ArrayRef HashRef ConsumerOf InstanceOf); | 
|  | 13 |  |  |  |  | 34 |  | 
|  | 13 |  |  |  |  | 72 |  | 
| 109 | 13 |  |  | 13 |  | 369 | use namespace::clean; | 
|  | 13 |  |  |  |  | 29 |  | 
|  | 13 |  |  |  |  | 68 |  | 
| 110 | 13 |  |  | 13 |  | 4733 |  | 
|  | 13 |  |  |  |  | 53 |  | 
|  | 13 |  |  |  |  | 79 |  | 
| 111 | 13 |  |  | 13 |  | 74 | has lookahead_methods		=> ( is => 'ro', isa => HashRef, default => sub { +{} } ); | 
|  | 13 |  |  |  |  | 34 |  | 
|  | 13 |  |  |  |  | 1022 |  | 
| 112 | 13 |  |  | 13 |  | 98 | has lookahead_tokens		=> ( is => 'ro', isa => HashRef, default => sub { +{} } ); | 
|  | 13 |  |  |  |  | 28 |  | 
|  | 13 |  |  |  |  | 603 |  | 
| 113 | 13 |  |  | 13 |  | 93 | has extend					=> ( is => 'ro', isa => ArrayRef, default => sub { [] } ); | 
|  | 13 |  |  |  |  | 25 |  | 
|  | 13 |  |  |  |  | 97 |  | 
| 114 | 13 |  |  | 13 |  | 12210 | has token_buffer			=> ( is => 'ro', isa => ArrayRef, default => sub { [] } ); | 
|  | 13 |  |  |  |  | 30 |  | 
|  | 13 |  |  |  |  | 348 |  | 
| 115 | 13 |  |  | 13 |  | 63 |  | 
|  | 13 |  |  |  |  | 27 |  | 
|  | 13 |  |  |  |  | 2781 |  | 
| 116 | 13 |  |  | 13 |  | 102 | with 'AtteanX::API::Lexer'; | 
|  | 13 |  |  |  |  | 26 |  | 
|  | 13 |  |  |  |  | 71 |  | 
| 117 | 13 |  |  | 13 |  | 15567 |  | 
|  | 13 |  |  |  |  | 28 |  | 
|  | 13 |  |  |  |  | 77 |  | 
| 118 |  |  |  |  |  |  | my $r_ECHAR					= qr/\\([tbnrf\\"'])/o; | 
| 119 |  |  |  |  |  |  | my $r_STRING_LITERAL1		= qr/'(([^\x{27}\x{5C}\x{0A}\x{0D}])|${r_ECHAR})*'/o; | 
| 120 |  |  |  |  |  |  | my $r_STRING_LITERAL2		= qr/"(([^\x{22}\x{5C}\x{0A}\x{0D}])|${r_ECHAR})*"/o; | 
| 121 |  |  |  |  |  |  | my $r_STRING_LITERAL_LONG1	= qr/'''(('|'')?([^'\\]|${r_ECHAR}))*'''/o; | 
| 122 |  |  |  |  |  |  | my $r_STRING_LITERAL_LONG2	= qr/"""(("|"")?([^"\\]|${r_ECHAR}))*"""/o; | 
| 123 |  |  |  |  |  |  | my $r_LANGTAG				= qr/@[a-zA-Z]+(-[a-zA-Z0-9]+)*/o; | 
| 124 |  |  |  |  |  |  | my $r_IRI_REF				= qr/<([^<>"{}|^`\\\x{00}-\x{20}])*>/o; | 
| 125 |  |  |  |  |  |  | my $r_PN_CHARS_BASE			= qr/([A-Z]|[a-z]|[\x{00C0}-\x{00D6}]|[\x{00D8}-\x{00F6}]|[\x{00F8}-\x{02FF}]|[\x{0370}-\x{037D}]|[\x{037F}-\x{1FFF}]|[\x{200C}-\x{200D}]|[\x{2070}-\x{218F}]|[\x{2C00}-\x{2FEF}]|[\x{3001}-\x{D7FF}]|[\x{F900}-\x{FDCF}]|[\x{FDF0}-\x{FFFD}]|[\x{10000}-\x{EFFFF}])/o; | 
| 126 |  |  |  |  |  |  | my $r_PN_CHARS_U			= qr/([_]|${r_PN_CHARS_BASE})/o; | 
| 127 |  |  |  |  |  |  | my $r_VARNAME				= qr/((${r_PN_CHARS_U}|[0-9])(${r_PN_CHARS_U}|[0-9]|\x{00B7}|[\x{0300}-\x{036F}]|[\x{203F}-\x{2040}])*)/o; | 
| 128 |  |  |  |  |  |  | my $r_VAR1					= qr/[?]${r_VARNAME}/o; | 
| 129 |  |  |  |  |  |  | my $r_VAR2					= qr/[\$]${r_VARNAME}/o; | 
| 130 |  |  |  |  |  |  | my $r_PN_CHARS				= qr/${r_PN_CHARS_U}|-|[0-9]|\x{00B7}|[\x{0300}-\x{036F}]|[\x{203F}-\x{2040}]/o; | 
| 131 |  |  |  |  |  |  | my $r_PN_PREFIX				= qr/(${r_PN_CHARS_BASE}((${r_PN_CHARS}|[.])*${r_PN_CHARS})?)/o; | 
| 132 |  |  |  |  |  |  | my $r_PN_LOCAL_ESCAPED		= qr{(\\([-~.!&'()*+,;=/?#@%_\$]))|%[0-9A-Fa-f]{2}}o; | 
| 133 |  |  |  |  |  |  | my $r_PN_LOCAL				= qr/((${r_PN_CHARS_U}|[:0-9]|${r_PN_LOCAL_ESCAPED})((${r_PN_CHARS}|${r_PN_LOCAL_ESCAPED}|[:.])*(${r_PN_CHARS}|[:]|${r_PN_LOCAL_ESCAPED}))?)/o; | 
| 134 |  |  |  |  |  |  | my $r_PN_LOCAL_BNODE		= qr/((${r_PN_CHARS_U}|[0-9])((${r_PN_CHARS}|[.])*${r_PN_CHARS})?)/o; | 
| 135 |  |  |  |  |  |  | my $r_PNAME_NS				= qr/((${r_PN_PREFIX})?:)/o; | 
| 136 |  |  |  |  |  |  | my $r_PNAME_LN				= qr/(${r_PNAME_NS}${r_PN_LOCAL})/o; | 
| 137 |  |  |  |  |  |  | my $r_EXPONENT				= qr/[eE][-+]?\d+/o; | 
| 138 |  |  |  |  |  |  | my $r_DOUBLE				= qr/\d+[.]\d*${r_EXPONENT}|[.]\d+${r_EXPONENT}|\d+${r_EXPONENT}/o; | 
| 139 |  |  |  |  |  |  | my $r_DECIMAL				= qr/(\d+[.]\d*)|([.]\d+)/o; | 
| 140 |  |  |  |  |  |  | my $r_INTEGER				= qr/\d+/o; | 
| 141 |  |  |  |  |  |  | my $r_BLANK_NODE_LABEL		= qr/_:${r_PN_LOCAL_BNODE}/o; | 
| 142 |  |  |  |  |  |  | my $r_ANON					= qr/\[[\t\r\n ]*\]/o; | 
| 143 |  |  |  |  |  |  | my $r_NIL					= qr/\([\n\r\t ]*\)/o; | 
| 144 |  |  |  |  |  |  | my $r_KEYWORDS				= qr/(ABS|ADD|ALL|ASC|ASK|AS|AVG|BASE|BIND|BNODE|BOUND|BY|CEIL|CLEAR|COALESCE|CONCAT|CONSTRUCT|CONTAINS|COPY|COUNT|CREATE|DATATYPE|DAY|DEFAULT|DELETE|DELETE WHERE|DESCRIBE|DESC|DISTINCT|DISTINCT|DROP|ENCODE_FOR_URI|EXISTS|FILTER|FLOOR|FROM|GRAPH|GROUP_CONCAT|GROUP|HAVING|HOURS|IF|INSERT|INSERT|DATA|INTO|IN|IRI|ISBLANK|ISIRI|ISLITERAL|ISNUMERIC|ISURI|LANGMATCHES|LANG|LCASE|LIMIT|LOAD|MAX|MD5|MINUS|MINUTES|MIN|MONTH|MOVE|NAMED|NOT|NOW|OFFSET|OPTIONAL|ORDER|PREFIX|RAND|REDUCED|REGEX|REPLACE|ROUND|SAMETERM|SAMPLE|SECONDS|SELECT|SEPARATOR|SERVICE|SHA1|SHA256|SHA384|SHA512|SILENT|STRAFTER|STRBEFORE|STRDT|STRENDS|STRLANG|STRLEN|STRSTARTS|STRUUID|STR|SUBSTR|SUM|TIMEZONE|TO|TZ|UCASE|UNDEF|UNION|URI|USING|UUID|VALUES|WHERE|WITH|YEAR|TRIPLE|ISTRIPLE|SUBJECT|PREDICATE|OBJECT|HINT)(?!:)\b/io; | 
| 145 |  |  |  |  |  |  |  | 
| 146 |  |  |  |  |  |  | my $self	= shift; | 
| 147 |  |  |  |  |  |  | my %METHOD_TOKEN	= ( | 
| 148 |  |  |  |  |  |  | # 	q[#]	=> '_get_comment', | 
| 149 |  |  |  |  |  |  | q[@]	=> '_get_lang', | 
| 150 |  |  |  |  |  |  | q[<]	=> '_get_iriref_or_relational', | 
| 151 |  |  |  |  |  |  | q[{]	=> '_get_brace_or_annotation_or_or', | 
| 152 |  |  |  |  |  |  | q[}]	=> '_get_brace_or_annotation_or_or', | 
| 153 |  |  |  |  |  |  | q[|]	=> '_get_brace_or_annotation_or_or', | 
| 154 |  |  |  |  |  |  | q[_]	=> '_get_bnode', | 
| 155 | 61 |  |  | 61 | 0 | 1532 | q[']	=> '_get_single_literal', | 
| 156 |  |  |  |  |  |  | q["]	=> '_get_double_literal', | 
| 157 |  |  |  |  |  |  | q[:]	=> '_get_pname', | 
| 158 |  |  |  |  |  |  | q[?]	=> '_get_variable', | 
| 159 |  |  |  |  |  |  | q[$]	=> '_get_variable', | 
| 160 |  |  |  |  |  |  | q[!]	=> '_get_bang', | 
| 161 |  |  |  |  |  |  | q[>]	=> '_get_iriref_or_relational', | 
| 162 |  |  |  |  |  |  | q([)	=> '_get_lbracket_or_anon', | 
| 163 |  |  |  |  |  |  | q[(]	=> '_get_lparen_or_nil', | 
| 164 |  |  |  |  |  |  | (map {$_ => '_get_number'} (0 .. 9, '-', '+')) | 
| 165 |  |  |  |  |  |  | ); | 
| 166 |  |  |  |  |  |  | while (my ($k,$v) = each(%METHOD_TOKEN)) { | 
| 167 |  |  |  |  |  |  | if (length($k) != 1) { | 
| 168 |  |  |  |  |  |  | die "Cannot set a lookahead token handler method with lookahead > 1 char"; | 
| 169 |  |  |  |  |  |  | } | 
| 170 |  |  |  |  |  |  | $self->lookahead_methods->{$k}	//= $v; | 
| 171 |  |  |  |  |  |  | } | 
| 172 |  |  |  |  |  |  |  | 
| 173 | 61 |  |  |  |  | 355 | my %CHAR_TOKEN	= ( | 
|  | 732 |  |  |  |  | 2446 |  | 
| 174 |  |  |  |  |  |  | ','	=> COMMA, | 
| 175 | 61 |  |  |  |  | 402 | '.'	=> DOT, | 
| 176 | 1647 | 50 |  |  |  | 2425 | '='	=> EQUALS, | 
| 177 | 0 |  |  |  |  | 0 | ']'	=> RBRACKET, | 
| 178 |  |  |  |  |  |  | ')'	=> RPAREN, | 
| 179 | 1647 |  | 33 |  |  | 5297 | '-'	=> MINUS, | 
| 180 |  |  |  |  |  |  | '+'	=> PLUS, | 
| 181 |  |  |  |  |  |  | ';'	=> SEMICOLON, | 
| 182 | 61 |  |  |  |  | 811 | '/'	=> SLASH, | 
| 183 |  |  |  |  |  |  | '*'	=> STAR, | 
| 184 |  |  |  |  |  |  | ); | 
| 185 |  |  |  |  |  |  | while (my ($k,$v) = each(%CHAR_TOKEN)) { | 
| 186 |  |  |  |  |  |  | if (length($k) != 1) { | 
| 187 |  |  |  |  |  |  | die "Cannot set a lookahead token with lookahead > 1 char"; | 
| 188 |  |  |  |  |  |  | } | 
| 189 |  |  |  |  |  |  | $self->lookahead_tokens->{$k}	//= $v; | 
| 190 |  |  |  |  |  |  | } | 
| 191 |  |  |  |  |  |  |  | 
| 192 |  |  |  |  |  |  | $self->add_regex_rule( $r_KEYWORDS, KEYWORD, sub { return uc(shift) } ); | 
| 193 |  |  |  |  |  |  | } | 
| 194 | 61 |  |  |  |  | 292 |  | 
| 195 | 610 | 50 |  |  |  | 974 | my $self	= shift; | 
| 196 | 0 |  |  |  |  | 0 | my $b		= $self->token_buffer; | 
| 197 |  |  |  |  |  |  | my $t		= $self->next; | 
| 198 | 610 |  | 33 |  |  | 2238 | return unless ($t); | 
| 199 |  |  |  |  |  |  | push(@$b, $t); | 
| 200 |  |  |  |  |  |  | return $t; | 
| 201 | 61 |  |  | 178 |  | 502 | } | 
|  | 178 |  |  |  |  | 520 |  | 
| 202 |  |  |  |  |  |  |  | 
| 203 |  |  |  |  |  |  | my $self	= shift; | 
| 204 |  |  |  |  |  |  | my $b		= $self->token_buffer; | 
| 205 | 5333 |  |  | 5333 | 0 | 6720 | if (scalar(@$b)) { | 
| 206 | 5333 |  |  |  |  | 8297 | return shift(@$b); | 
| 207 | 5333 |  |  |  |  | 7827 | } else { | 
| 208 | 5333 | 100 |  |  |  | 37435 | return $self->get_token(); | 
| 209 | 5065 |  |  |  |  | 7215 | } | 
| 210 | 5065 |  |  |  |  | 8068 | } | 
| 211 |  |  |  |  |  |  |  | 
| 212 |  |  |  |  |  |  | my $self	= shift; | 
| 213 |  |  |  |  |  |  | unless (length($self->buffer)) { | 
| 214 | 6090 |  |  | 6090 | 0 | 34578 | my $line	= $self->file->getline; | 
| 215 | 6090 |  |  |  |  | 7562 | if (defined($line)) { | 
| 216 | 6090 | 100 |  |  |  | 9585 | no warnings 'uninitialized'; | 
| 217 | 5064 |  |  |  |  | 7805 | $line		=~ s{\\(?:(?:u([0-9A-Fa-f]{4}))|(?:U([0-9A-Fa-f]{8})))}{ | 
| 218 |  |  |  |  |  |  | my $h			= $1 . $2; | 
| 219 | 1026 |  |  |  |  | 2171 | my $codepoint	= hex($h); | 
| 220 |  |  |  |  |  |  | if ($codepoint >= 0xD800 and $codepoint <= 0xDFFF) { | 
| 221 |  |  |  |  |  |  | die "Unicode surrogate U+$h is illegal in UTF-8"; | 
| 222 |  |  |  |  |  |  | } | 
| 223 |  |  |  |  |  |  | chr($codepoint); | 
| 224 | 630 |  |  | 630 | 0 | 3004 | }ge; | 
| 225 | 630 | 50 |  |  |  | 7651 | # 				$line		=~ s/\\u([0-9A-Fa-f]{4})/chr(hex($1))/ge; | 
| 226 | 630 |  |  |  |  | 12663 | # 				$line		=~ s/\\U([0-9A-Fa-f]{8})/chr(hex($1))/ge; | 
| 227 | 630 | 100 |  |  |  | 14547 | $self->{buffer}	.= $line; | 
| 228 | 13 |  |  | 13 |  | 33032 | } | 
|  | 13 |  |  |  |  | 56 |  | 
|  | 13 |  |  |  |  | 14097 |  | 
| 229 | 86 |  |  |  |  | 296 | } | 
| 230 | 3 |  |  |  |  | 11 | } | 
| 231 | 3 |  |  |  |  | 9 |  | 
| 232 | 3 | 50 | 33 |  |  | 9 | my $self		= shift; | 
| 233 | 0 |  |  |  |  | 0 | my $type		= shift; | 
| 234 |  |  |  |  |  |  | my $start_line	= shift; | 
| 235 | 3 |  |  |  |  | 14 | my $start_col	= shift; | 
| 236 |  |  |  |  |  |  | my $line		= $self->line; | 
| 237 |  |  |  |  |  |  | my $col			= $self->column; | 
| 238 |  |  |  |  |  |  | return AtteanX::SPARQL::Token->fast_constructor( $type, $start_line, $start_col, $line, $col, \@_ ); | 
| 239 | 86 |  |  |  |  | 306 | } | 
| 240 |  |  |  |  |  |  |  | 
| 241 |  |  |  |  |  |  | my $self	= shift; | 
| 242 |  |  |  |  |  |  | my $r		= shift; | 
| 243 |  |  |  |  |  |  | my $ttype	= shift; | 
| 244 |  |  |  |  |  |  | my $convert	= shift; | 
| 245 | 752 |  |  | 752 | 0 | 13737 | my $extend	= $self->extend; | 
| 246 | 752 |  |  |  |  | 1295 | push(@$extend, sub { | 
| 247 | 752 |  |  |  |  | 1111 | my $l	= shift; | 
| 248 | 752 |  |  |  |  | 995 |  | 
| 249 | 752 |  |  |  |  | 10274 | if ($l->buffer =~ /^$r\b/) { | 
| 250 | 752 |  |  |  |  | 12164 | my $value	= $self->read_length($+[0]); | 
| 251 | 752 |  |  |  |  | 6309 | my $c		= $convert ? $convert->($value) : $value; | 
| 252 |  |  |  |  |  |  | return $l->new_token($ttype, $l->start_line, $l->start_column, $c); | 
| 253 |  |  |  |  |  |  | } | 
| 254 |  |  |  |  |  |  | }); | 
| 255 | 113 |  |  | 113 | 0 | 214 | } | 
| 256 | 113 |  |  |  |  | 181 |  | 
| 257 | 113 |  |  |  |  | 204 | my $self	= shift; | 
| 258 | 113 |  |  |  |  | 211 | while (1) { | 
| 259 | 113 |  |  |  |  | 264 | $self->fill_buffer unless (length($self->buffer)); | 
| 260 |  |  |  |  |  |  |  | 
| 261 | 1307 |  |  | 1307 |  | 2283 | if ($self->buffer =~ /^[ \r\n\t]+/o) { | 
| 262 |  |  |  |  |  |  | $self->read_length($+[0]); | 
| 263 | 1307 | 100 |  |  |  | 20694 | # we're ignoring whitespace tokens, but we could return them here instead of falling through to the 'next': | 
| 264 | 179 |  |  |  |  | 90071 | # 			return $self->new_token(WS); | 
| 265 | 179 | 50 |  |  |  | 724 | next; | 
| 266 | 179 |  |  |  |  | 3671 | } | 
| 267 |  |  |  |  |  |  |  | 
| 268 | 113 |  |  |  |  | 1166 | my $c	= $self->peek_char(); | 
| 269 |  |  |  |  |  |  | return unless (defined($c)); | 
| 270 |  |  |  |  |  |  |  | 
| 271 |  |  |  |  |  |  | if ($c eq '#') { | 
| 272 | 1026 |  |  | 1026 | 0 | 1435 | # we're ignoring comment tokens, but we could return them here instead of falling through to the 'next': | 
| 273 | 1026 |  |  |  |  | 1231 | $self->_get_comment(); | 
| 274 | 1670 | 100 |  |  |  | 22832 | next; | 
| 275 |  |  |  |  |  |  | } | 
| 276 | 1670 | 100 |  |  |  | 27507 |  | 
| 277 | 644 |  |  |  |  | 5843 | my $start_column	= $self->column; | 
| 278 |  |  |  |  |  |  | my $start_line		= $self->line; | 
| 279 |  |  |  |  |  |  |  | 
| 280 | 644 |  |  |  |  | 1376 | $self->start_column( $start_column ); | 
| 281 |  |  |  |  |  |  | $self->start_line( $start_line ); | 
| 282 |  |  |  |  |  |  |  | 
| 283 | 1026 |  |  |  |  | 7191 | foreach my $e (@{ $self->extend }) { | 
| 284 | 1026 | 100 |  |  |  | 2213 | if (my $t = $e->( $self )) { | 
| 285 |  |  |  |  |  |  | return $t; | 
| 286 | 754 | 50 |  |  |  | 1541 | } | 
| 287 |  |  |  |  |  |  | } | 
| 288 | 0 |  |  |  |  | 0 |  | 
| 289 | 0 |  |  |  |  | 0 | if ($c eq '.' and $self->buffer =~ /^$r_DECIMAL/) { | 
| 290 |  |  |  |  |  |  | return $self->_get_number(); | 
| 291 |  |  |  |  |  |  | } | 
| 292 | 754 |  |  |  |  | 10160 |  | 
| 293 | 754 |  |  |  |  | 12234 | if (defined(my $name = $self->lookahead_tokens->{$c})) { $self->get_char; return $self->new_token($name, $start_line, $start_column, $c); } | 
| 294 |  |  |  |  |  |  | elsif (defined(my $method = $self->lookahead_methods->{$c})) { return $self->$method() } | 
| 295 | 754 |  |  |  |  | 12483 | elsif ($c =~ /[A-Za-z\x{00C0}-\x{00D6}\x{00D8}-\x{00F6}\x{00F8}-\x{02FF}\x{0370}-\x{037D}\x{037F}-\x{1FFF}\x{200C}-\x{200D}\x{2070}-\x{218F}\x{2C00}-\x{2FEF}\x{3001}-\x{D7FF}\x{F900}-\x{FDCF}\x{FDF0}-\x{FFFD}\x{10000}-\x{EFFFF}]/o) { | 
| 296 | 754 |  |  |  |  | 27725 | if ($self->buffer =~ /^a(?!:)\s/o) { | 
| 297 |  |  |  |  |  |  | $self->get_char; | 
| 298 | 754 |  |  |  |  | 16593 | return $self->new_token(A, $start_line, $start_column, 'a'); | 
|  | 754 |  |  |  |  | 2339 |  | 
| 299 | 1307 | 100 |  |  |  | 216409 | } elsif ($self->buffer =~ /^(?:true|false)(?!:)\b/o) { | 
| 300 | 179 |  |  |  |  | 12099 | my $bool	= $self->read_length($+[0]); | 
| 301 |  |  |  |  |  |  | return $self->new_token(BOOLEAN, $start_line, $start_column, $bool); | 
| 302 |  |  |  |  |  |  | # 				} elsif ($self->buffer =~ /^$r_KEYWORDS/) { | 
| 303 |  |  |  |  |  |  | # 					my $bool	= $self->read_length($+[0]); | 
| 304 | 575 | 50 | 66 |  |  | 24009 | # 					return $self->new_token(KEYWORD, $start_line, $start_column, $bool); | 
| 305 | 0 |  |  |  |  | 0 | } elsif ($self->buffer =~ /^BASE(?!:)\b/oi) { | 
| 306 |  |  |  |  |  |  | $self->read_length(4); | 
| 307 |  |  |  |  |  |  | return $self->new_token(BASE, $start_line, $start_column, 'BASE'); | 
| 308 | 575 | 100 |  |  |  | 4419 | } elsif ($self->buffer =~ /^PREFIX(?!:)\b/io) { | 
|  | 75 | 100 |  |  |  | 319 |  | 
|  | 75 | 100 |  |  |  | 253 |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 309 | 444 |  |  |  |  | 2394 | $self->read_length(6); | 
| 310 |  |  |  |  |  |  | return $self->new_token(PREFIX, $start_line, $start_column, 'PREFIX'); | 
| 311 | 55 | 100 |  |  |  | 1008 | } else { | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 312 | 13 |  |  |  |  | 135 | return $self->_get_pname; | 
| 313 | 13 |  |  |  |  | 55 | } | 
| 314 |  |  |  |  |  |  | } elsif ($c eq '^') { | 
| 315 | 0 |  |  |  |  | 0 | if ($self->buffer =~ /^\^\^/) { | 
| 316 | 0 |  |  |  |  | 0 | $self->read_word('^^'); | 
| 317 |  |  |  |  |  |  | return $self->new_token(HATHAT, $start_line, $start_column, '^^'); | 
| 318 |  |  |  |  |  |  | } else { | 
| 319 |  |  |  |  |  |  | $self->read_word('^'); | 
| 320 |  |  |  |  |  |  | return $self->new_token(HAT, $start_line, $start_column, '^'); | 
| 321 | 0 |  |  |  |  | 0 | } | 
| 322 | 0 |  |  |  |  | 0 | } elsif ($c eq '&') { | 
| 323 |  |  |  |  |  |  | $self->read_word('&&'); | 
| 324 | 0 |  |  |  |  | 0 | return $self->new_token(ANDAND, $start_line, $start_column, '&&'); | 
| 325 | 0 |  |  |  |  | 0 | } else { | 
| 326 |  |  |  |  |  |  | # 			Carp::cluck sprintf("Unexpected byte '$c' (0x%02x)", ord($c)); | 
| 327 | 42 |  |  |  |  | 2369 | return $self->_throw_error(sprintf("Unexpected byte '%s' (0x%02x)", $c, ord($c))); | 
| 328 |  |  |  |  |  |  | } | 
| 329 |  |  |  |  |  |  | warn sprintf('byte: 0x%x', ord($c)); | 
| 330 | 0 | 0 |  |  |  | 0 | } | 
| 331 | 0 |  |  |  |  | 0 | } | 
| 332 | 0 |  |  |  |  | 0 |  | 
| 333 |  |  |  |  |  |  | my $self	= shift; | 
| 334 | 0 |  |  |  |  | 0 | my $prefix	= ''; | 
| 335 | 0 |  |  |  |  | 0 |  | 
| 336 |  |  |  |  |  |  | if ($self->buffer =~ /^$r_PNAME_LN/o) { | 
| 337 |  |  |  |  |  |  | my $ln	= $self->read_length($+[0]); | 
| 338 | 1 |  |  |  |  | 8 | my ($ns, $local)	= ($ln =~ /^([^:]*:)(.*)$/); | 
| 339 | 1 |  |  |  |  | 5 | no warnings 'uninitialized'; | 
| 340 |  |  |  |  |  |  | $local	=~ s{\\([-~.!&'()*+,;=:/?#@%_\$])}{$1}g; | 
| 341 |  |  |  |  |  |  | return $self->new_token(PREFIXNAME, $self->start_line, $self->start_column, $ns, $local); | 
| 342 | 0 |  |  |  |  | 0 | } elsif ($self->buffer =~ $r_PNAME_NS) { | 
| 343 |  |  |  |  |  |  | my $ns	= $self->read_length($+[0]); | 
| 344 | 0 |  |  |  |  | 0 | return $self->new_token(PREFIXNAME, $self->start_line, $self->start_column, $ns); | 
| 345 |  |  |  |  |  |  | } else { | 
| 346 |  |  |  |  |  |  | $self->_throw_error("Expected PNAME"); | 
| 347 |  |  |  |  |  |  | } | 
| 348 |  |  |  |  |  |  | } | 
| 349 | 53 |  |  | 53 |  | 111 |  | 
| 350 | 53 |  |  |  |  | 92 | my $self	= shift; | 
| 351 |  |  |  |  |  |  | if (substr($self->buffer, 0, 1) eq '$') { | 
| 352 | 53 | 100 |  |  |  | 721 | $self->get_char_safe('$'); | 
|  |  | 100 |  |  |  |  |  | 
| 353 | 35 |  |  |  |  | 1000 | if ($self->buffer =~ /^$r_VARNAME/) { | 
| 354 | 35 |  |  |  |  | 206 | my $name	= $self->read_length($+[0]); | 
| 355 | 13 |  |  | 13 |  | 110 | return $self->new_token(VAR, $self->start_line, $self->start_column, $name); | 
|  | 13 |  |  |  |  | 41 |  | 
|  | 13 |  |  |  |  | 40198 |  | 
| 356 | 35 |  |  |  |  | 87 | } else { | 
| 357 | 35 |  |  |  |  | 536 | $self->_throw_error("Invalid variable name"); | 
| 358 |  |  |  |  |  |  | } | 
| 359 | 16 |  |  |  |  | 1603 | } else { | 
| 360 | 16 |  |  |  |  | 254 | $self->get_char_safe('?'); | 
| 361 |  |  |  |  |  |  | if ($self->buffer =~ /^$r_VARNAME/) { | 
| 362 | 2 |  |  |  |  | 59 | my $name	= $self->read_length($+[0]); | 
| 363 |  |  |  |  |  |  | return $self->new_token(VAR, $self->start_line, $self->start_column, $name); | 
| 364 |  |  |  |  |  |  | } else { | 
| 365 |  |  |  |  |  |  | return $self->new_token(QUESTION, $self->start_line, $self->start_column, '?'); | 
| 366 |  |  |  |  |  |  | } | 
| 367 | 142 |  |  | 142 |  | 293 | } | 
| 368 | 142 | 50 |  |  |  | 2822 | } | 
| 369 | 0 |  |  |  |  | 0 |  | 
| 370 | 0 | 0 |  |  |  | 0 | my $self	= shift; | 
| 371 | 0 |  |  |  |  | 0 | my $buffer	= $self->buffer; | 
| 372 | 0 |  |  |  |  | 0 | if ($buffer =~ m/^<([^<>"{}|^`\x00-\x20])*>/) { | 
| 373 |  |  |  |  |  |  | $self->get_char_safe(q[<]); | 
| 374 | 0 |  |  |  |  | 0 | if ($self->buffer =~ m/^[\x23-\x3d\x3f-\x5a\x5d-\x7e]*>/o) { | 
| 375 |  |  |  |  |  |  | my $iri	.= $self->read_length($+[0]); | 
| 376 |  |  |  |  |  |  | chop($iri); | 
| 377 | 142 |  |  |  |  | 1441 | return $self->new_token(IRI, $self->start_line, $self->start_column, $iri); | 
| 378 | 142 | 50 |  |  |  | 2117 | } | 
| 379 | 142 |  |  |  |  | 2999 |  | 
| 380 | 142 |  |  |  |  | 2270 | my $iri	= ''; | 
| 381 |  |  |  |  |  |  | while (1) { | 
| 382 | 0 |  |  |  |  | 0 | if (length($self->buffer) == 0) { | 
| 383 |  |  |  |  |  |  | my $c	= $self->peek_char; | 
| 384 |  |  |  |  |  |  | last unless defined($c); | 
| 385 |  |  |  |  |  |  | } | 
| 386 |  |  |  |  |  |  | if (substr($self->buffer, 0, 1) eq '\\') { | 
| 387 |  |  |  |  |  |  | $self->get_char_safe('\\'); | 
| 388 | 95 |  |  | 95 |  | 199 | my $esc	= $self->get_char; | 
| 389 | 95 |  |  |  |  | 1851 | if ($esc eq '\\') { | 
| 390 | 95 | 100 |  |  |  | 1105 | $iri	.= "\\"; | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 391 | 82 |  |  |  |  | 405 | } elsif ($esc eq 'U') { | 
| 392 | 82 | 50 |  |  |  | 1174 | my $codepoint	= $self->read_length(8); | 
| 393 | 82 |  |  |  |  | 916 | $self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o); | 
| 394 | 82 |  |  |  |  | 242 | $iri .= chr(hex($codepoint)); | 
| 395 | 82 |  |  |  |  | 1255 | } elsif ($esc eq 'u') { | 
| 396 |  |  |  |  |  |  | my $codepoint	= $self->read_length(4); | 
| 397 |  |  |  |  |  |  | $self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o); | 
| 398 | 0 |  |  |  |  | 0 | my $char	= chr(hex($codepoint)); | 
| 399 | 0 |  |  |  |  | 0 | if ($char =~ /[<>" {}|\\^`]/o) { | 
| 400 | 0 | 0 |  |  |  | 0 | $self->_throw_error(sprintf("Bad IRI character: '%s' (0x%x)", $char, ord($char))); | 
| 401 | 0 |  |  |  |  | 0 | } | 
| 402 | 0 | 0 |  |  |  | 0 | $iri .= $char; | 
| 403 |  |  |  |  |  |  | } else { | 
| 404 | 0 | 0 |  |  |  | 0 | $self->_throw_error("Unrecognized iri escape '$esc'"); | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 405 | 0 |  |  |  |  | 0 | } | 
| 406 | 0 |  |  |  |  | 0 | } elsif ($self->buffer =~ /^[^<>\x00-\x20\\"{}|^`]+/o) { | 
| 407 | 0 | 0 |  |  |  | 0 | $iri	.= $self->read_length($+[0]); | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 408 | 0 |  |  |  |  | 0 | } elsif (substr($self->buffer, 0, 1) eq '>') { | 
| 409 |  |  |  |  |  |  | last; | 
| 410 | 0 |  |  |  |  | 0 | } else { | 
| 411 | 0 | 0 |  |  |  | 0 | my $c	= $self->peek_char; | 
| 412 | 0 |  |  |  |  | 0 | $self->_throw_error("Got '$c' while expecting IRI character"); | 
| 413 |  |  |  |  |  |  | } | 
| 414 | 0 |  |  |  |  | 0 | } | 
| 415 | 0 | 0 |  |  |  | 0 | $self->get_char_safe(q[>]); | 
| 416 | 0 |  |  |  |  | 0 | return $self->new_token(IRI, $self->start_line, $self->start_column, $iri); | 
| 417 | 0 | 0 |  |  |  | 0 | } elsif (substr($buffer, 0, 2) eq '<=') { | 
| 418 | 0 |  |  |  |  | 0 | $self->read_length(2); | 
| 419 |  |  |  |  |  |  | return $self->new_token(LE, $self->start_line, $self->start_column, '<='); | 
| 420 | 0 |  |  |  |  | 0 | } elsif (substr($buffer, 0, 2) eq '>=') { | 
| 421 |  |  |  |  |  |  | $self->read_length(2); | 
| 422 | 0 |  |  |  |  | 0 | return $self->new_token(GE, $self->start_line, $self->start_column, '>='); | 
| 423 |  |  |  |  |  |  | } elsif (substr($buffer, 0, 2) eq '<<') { | 
| 424 |  |  |  |  |  |  | $self->read_length(2); | 
| 425 | 0 |  |  |  |  | 0 | return $self->new_token(LTLT, $self->start_line, $self->start_column, '<<'); | 
| 426 |  |  |  |  |  |  | } elsif (substr($buffer, 0, 2) eq '>>') { | 
| 427 | 0 |  |  |  |  | 0 | $self->read_length(2); | 
| 428 |  |  |  |  |  |  | return $self->new_token(GTGT, $self->start_line, $self->start_column, '>>'); | 
| 429 | 0 |  |  |  |  | 0 | } elsif (substr($buffer, 0, 1) eq '>') { | 
| 430 | 0 |  |  |  |  | 0 | $self->get_char; | 
| 431 |  |  |  |  |  |  | return $self->new_token(GT, $self->start_line, $self->start_column, '>'); | 
| 432 |  |  |  |  |  |  | } elsif (substr($buffer, 0, 1) eq '<') { | 
| 433 | 0 |  |  |  |  | 0 | $self->get_char; | 
| 434 | 0 |  |  |  |  | 0 | return $self->new_token(LT, $self->start_line, $self->start_column, '<'); | 
| 435 |  |  |  |  |  |  | } else { | 
| 436 | 0 |  |  |  |  | 0 | die "Unrecognized relational op near '$buffer'"; | 
| 437 | 0 |  |  |  |  | 0 | } | 
| 438 |  |  |  |  |  |  | } | 
| 439 | 0 |  |  |  |  | 0 |  | 
| 440 | 0 |  |  |  |  | 0 | my $self	= shift; | 
| 441 |  |  |  |  |  |  | if ($self->buffer =~ /^!=/) { | 
| 442 | 5 |  |  |  |  | 23 | $self->read_length(2); | 
| 443 | 5 |  |  |  |  | 82 | return $self->new_token(NOTEQUALS, $self->start_line, $self->start_column, '!='); | 
| 444 |  |  |  |  |  |  | } else { | 
| 445 | 5 |  |  |  |  | 22 | $self->get_char; | 
| 446 | 5 |  |  |  |  | 77 | return $self->new_token(BANG, $self->start_line, $self->start_column, '!'); | 
| 447 |  |  |  |  |  |  | } | 
| 448 | 1 |  |  |  |  | 5 | } | 
| 449 | 1 |  |  |  |  | 15 |  | 
| 450 |  |  |  |  |  |  | my $self	= shift; | 
| 451 | 2 |  |  |  |  | 12 | unless ($self->buffer =~ /^$r_BLANK_NODE_LABEL/o) { | 
| 452 | 2 |  |  |  |  | 33 | $self->_throw_error("Expected: name"); | 
| 453 |  |  |  |  |  |  | } | 
| 454 | 0 |  |  |  |  | 0 | my $ln	= $self->read_length($+[0]); | 
| 455 |  |  |  |  |  |  | my $name	= substr($ln, 2); | 
| 456 |  |  |  |  |  |  | return $self->new_token(BNODE, $self->start_line, $self->start_column, $name); | 
| 457 |  |  |  |  |  |  | } | 
| 458 |  |  |  |  |  |  |  | 
| 459 | 1 |  |  | 1 |  | 3 | my $self	= shift; | 
| 460 | 1 | 50 |  |  |  | 20 | if ($self->buffer =~ /^${r_DOUBLE}/o) { | 
| 461 | 0 |  |  |  |  | 0 | return $self->new_token(DOUBLE, $self->start_line, $self->start_column, $self->read_length($+[0])); | 
| 462 | 0 |  |  |  |  | 0 | } elsif ($self->buffer =~ /^${r_DECIMAL}/o) { | 
| 463 |  |  |  |  |  |  | return $self->new_token(DECIMAL, $self->start_line, $self->start_column, $self->read_length($+[0])); | 
| 464 | 1 |  |  |  |  | 11 | } elsif ($self->buffer =~ /^${r_INTEGER}/o) { | 
| 465 | 1 |  |  |  |  | 16 | return $self->new_token(INTEGER, $self->start_line, $self->start_column, $self->read_length($+[0])); | 
| 466 |  |  |  |  |  |  | } | 
| 467 |  |  |  |  |  |  | $self->_throw_error("Expected number"); | 
| 468 |  |  |  |  |  |  | } | 
| 469 |  |  |  |  |  |  |  | 
| 470 | 0 |  |  | 0 |  | 0 | my $self	= shift; | 
| 471 | 0 | 0 |  |  |  | 0 | if ($self->buffer =~ /^$r_NIL/) { | 
| 472 | 0 |  |  |  |  | 0 | $self->read_length($+[0]); | 
| 473 |  |  |  |  |  |  | return $self->new_token(NIL, $self->start_line, $self->start_column, '()'); | 
| 474 | 0 |  |  |  |  | 0 | } else { | 
| 475 | 0 |  |  |  |  | 0 | $self->get_char_safe('('); | 
| 476 | 0 |  |  |  |  | 0 | return $self->new_token(LPAREN, $self->start_line, $self->start_column, '('); | 
| 477 |  |  |  |  |  |  | } | 
| 478 |  |  |  |  |  |  | } | 
| 479 |  |  |  |  |  |  |  | 
| 480 | 18 |  |  | 18 |  | 46 | my $self	= shift; | 
| 481 | 18 | 100 |  |  |  | 429 | if (substr($self->buffer, 0, 2) eq '{|') { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 482 | 1 |  |  |  |  | 22 | $self->read_length(2); | 
| 483 |  |  |  |  |  |  | return $self->new_token(LANNOT, $self->start_line, $self->start_column, '{|'); | 
| 484 | 1 |  |  |  |  | 39 | } elsif (substr($self->buffer, 0, 2) eq '|}') { | 
| 485 |  |  |  |  |  |  | $self->read_length(2); | 
| 486 | 16 |  |  |  |  | 1105 | return $self->new_token(RANNOT, $self->start_line, $self->start_column, '|}'); | 
| 487 |  |  |  |  |  |  | } elsif (substr($self->buffer, 0, 2) eq '||') { | 
| 488 | 0 |  |  |  |  | 0 | $self->read_length(2); | 
| 489 |  |  |  |  |  |  | return $self->new_token(OROR, $self->start_line, $self->start_column, '||'); | 
| 490 |  |  |  |  |  |  | } elsif (substr($self->buffer, 0, 1) eq '{') { | 
| 491 |  |  |  |  |  |  | $self->get_char_safe('{'); | 
| 492 | 20 |  |  | 20 |  | 48 | return $self->new_token(LBRACE, $self->start_line, $self->start_column, '{'); | 
| 493 | 20 | 100 |  |  |  | 388 | } elsif (substr($self->buffer, 0, 1) eq '}') { | 
| 494 | 1 |  |  |  |  | 62 | $self->get_char_safe('}'); | 
| 495 | 1 |  |  |  |  | 22 | return $self->new_token(RBRACE, $self->start_line, $self->start_column, '}'); | 
| 496 |  |  |  |  |  |  | } else { | 
| 497 | 19 |  |  |  |  | 341 | $self->get_char_safe('|'); | 
| 498 | 19 |  |  |  |  | 287 | return $self->new_token(OR, $self->start_line, $self->start_column, '|'); | 
| 499 |  |  |  |  |  |  | } | 
| 500 |  |  |  |  |  |  | } | 
| 501 |  |  |  |  |  |  |  | 
| 502 |  |  |  |  |  |  | my $self	= shift; | 
| 503 | 143 |  |  | 143 |  | 337 | if ($self->buffer =~ /^$r_ANON/) { | 
| 504 | 143 | 100 |  |  |  | 2807 | $self->read_length($+[0]); | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 505 | 2 |  |  |  |  | 22 | return $self->new_token(ANON, $self->start_line, $self->start_column, '[]'); | 
| 506 | 2 |  |  |  |  | 37 | } else { | 
| 507 |  |  |  |  |  |  | $self->get_char_safe('['); | 
| 508 | 2 |  |  |  |  | 50 | return $self->new_token(LBRACKET, $self->start_line, $self->start_column, '['); | 
| 509 | 2 |  |  |  |  | 30 | } | 
| 510 |  |  |  |  |  |  | } | 
| 511 | 0 |  |  |  |  | 0 |  | 
| 512 | 0 |  |  |  |  | 0 | my $self	= shift; | 
| 513 |  |  |  |  |  |  | $self->get_char_safe('#'); | 
| 514 | 70 |  |  |  |  | 4418 | my $comment	= ''; | 
| 515 | 70 |  |  |  |  | 1019 | my $c		= $self->peek_char; | 
| 516 |  |  |  |  |  |  | while (length($c) and $c !~ /[\r\n]/o) { | 
| 517 | 69 |  |  |  |  | 5393 | $comment	.= $self->get_char; | 
| 518 | 69 |  |  |  |  | 1152 | $c			= $self->peek_char; | 
| 519 |  |  |  |  |  |  | } | 
| 520 | 0 |  |  |  |  | 0 | if (length($c) and $c =~ /[\r\n]/o) { | 
| 521 | 0 |  |  |  |  | 0 | $self->get_char; | 
| 522 |  |  |  |  |  |  | } | 
| 523 |  |  |  |  |  |  | return $self->new_token(COMMENT, $self->start_line, $self->start_column, $comment); | 
| 524 |  |  |  |  |  |  | } | 
| 525 |  |  |  |  |  |  |  | 
| 526 | 0 |  |  | 0 |  | 0 | my $self	= shift; | 
| 527 | 0 | 0 |  |  |  | 0 | $self->get_char_safe('@'); | 
| 528 | 0 |  |  |  |  | 0 | if ($self->buffer =~ /^[a-zA-Z]+(-[a-zA-Z0-9]+)*\b/o) { | 
| 529 | 0 |  |  |  |  | 0 | my $lang	= $self->read_length($+[0]); | 
| 530 |  |  |  |  |  |  | return $self->new_token(LANG, $self->start_line, $self->start_column, $lang); | 
| 531 | 0 |  |  |  |  | 0 | } | 
| 532 | 0 |  |  |  |  | 0 | $self->_throw_error("Expected keyword or language tag"); | 
| 533 |  |  |  |  |  |  | } | 
| 534 |  |  |  |  |  |  |  | 
| 535 |  |  |  |  |  |  | my $self	= shift; | 
| 536 |  |  |  |  |  |  | # 	my $c		= $self->peek_char(); | 
| 537 | 0 |  |  | 0 |  | 0 | $self->get_char_safe(q["]); | 
| 538 | 0 |  |  |  |  | 0 | if (substr($self->buffer, 0, 2) eq q[""]) { | 
| 539 | 0 |  |  |  |  | 0 | # #x22 #x22 #x22 lcharacter* #x22 #x22 #x22 | 
| 540 | 0 |  |  |  |  | 0 | $self->read_word(q[""]); | 
| 541 | 0 |  | 0 |  |  | 0 |  | 
| 542 | 0 |  |  |  |  | 0 | my $quote_count	= 0; | 
| 543 | 0 |  |  |  |  | 0 | my $string	= ''; | 
| 544 |  |  |  |  |  |  | while (1) { | 
| 545 | 0 | 0 | 0 |  |  | 0 | if (length($self->buffer) == 0) { | 
| 546 | 0 |  |  |  |  | 0 | $self->fill_buffer; | 
| 547 |  |  |  |  |  |  | $self->_throw_error("Found EOF in string literal") if (length($self->buffer) == 0); | 
| 548 | 0 |  |  |  |  | 0 | } | 
| 549 |  |  |  |  |  |  | if (substr($self->buffer, 0, 1) eq '"') { | 
| 550 |  |  |  |  |  |  | my $c	= $self->get_char; | 
| 551 |  |  |  |  |  |  | $quote_count++; | 
| 552 | 2 |  |  | 2 |  | 9 | last if ($quote_count == 3); | 
| 553 | 2 |  |  |  |  | 11 | } else { | 
| 554 | 2 | 50 |  |  |  | 39 | if ($quote_count) { | 
| 555 | 2 |  |  |  |  | 28 | $string	.= '"' foreach (1..$quote_count); | 
| 556 | 2 |  |  |  |  | 33 | $quote_count	= 0; | 
| 557 |  |  |  |  |  |  | } | 
| 558 | 0 |  |  |  |  | 0 | if (substr($self->buffer, 0, 1) eq '\\') { | 
| 559 |  |  |  |  |  |  | $string	.= $self->_get_escaped_char(); | 
| 560 |  |  |  |  |  |  | } else { | 
| 561 |  |  |  |  |  |  | $self->buffer	=~ /^[^"\\]+/; | 
| 562 | 8 |  |  | 8 |  | 28 | $string	.= $self->read_length($+[0]); | 
| 563 |  |  |  |  |  |  | } | 
| 564 | 8 |  |  |  |  | 38 | } | 
| 565 | 8 | 50 |  |  |  | 152 | } | 
| 566 |  |  |  |  |  |  | return $self->new_token(STRING3D, $self->start_line, $self->start_column, $string); | 
| 567 | 0 |  |  |  |  | 0 | } else { | 
| 568 |  |  |  |  |  |  | ### #x22 scharacter* #x22 | 
| 569 | 0 |  |  |  |  | 0 | my $string	= ''; | 
| 570 | 0 |  |  |  |  | 0 | while (1) { | 
| 571 | 0 |  |  |  |  | 0 | if (substr($self->buffer, 0, 1) eq '\\') { | 
| 572 | 0 | 0 |  |  |  | 0 | $string	.= $self->_get_escaped_char(); | 
| 573 | 0 |  |  |  |  | 0 | } elsif ($self->buffer =~ /^[^"\\]+/o) { | 
| 574 | 0 | 0 |  |  |  | 0 | $string	.= $self->read_length($+[0]); | 
| 575 |  |  |  |  |  |  | } elsif (substr($self->buffer, 0, 1) eq '"') { | 
| 576 | 0 | 0 |  |  |  | 0 | last; | 
| 577 | 0 |  |  |  |  | 0 | } else { | 
| 578 | 0 |  |  |  |  | 0 | my $c	= $self->peek_char; | 
| 579 | 0 | 0 |  |  |  | 0 | $self->_throw_error("Got '$c' while expecting string character"); | 
| 580 |  |  |  |  |  |  | } | 
| 581 | 0 | 0 |  |  |  | 0 | } | 
| 582 | 0 |  |  |  |  | 0 | $self->get_char_safe(q["]); | 
| 583 | 0 |  |  |  |  | 0 | return $self->new_token(STRING1D, $self->start_line, $self->start_column, $string); | 
| 584 |  |  |  |  |  |  | } | 
| 585 | 0 | 0 |  |  |  | 0 | } | 
| 586 | 0 |  |  |  |  | 0 |  | 
| 587 |  |  |  |  |  |  | my $self	= shift; | 
| 588 | 0 |  |  |  |  | 0 | $self->get_char_safe("'"); | 
| 589 | 0 |  |  |  |  | 0 | if (substr($self->buffer, 0, 2) eq q['']) { | 
| 590 |  |  |  |  |  |  | # #x22 #x22 #x22 lcharacter* #x22 #x22 #x22 | 
| 591 |  |  |  |  |  |  | $self->read_word(q['']); | 
| 592 |  |  |  |  |  |  |  | 
| 593 | 0 |  |  |  |  | 0 | my $quote_count	= 0; | 
| 594 |  |  |  |  |  |  | my $string	= ''; | 
| 595 |  |  |  |  |  |  | while (1) { | 
| 596 | 8 |  |  |  |  | 97 | if (length($self->buffer) == 0) { | 
| 597 | 8 |  |  |  |  | 16 | $self->fill_buffer; | 
| 598 | 16 | 100 |  |  |  | 224 | $self->_throw_error("Found EOF in string literal") if (length($self->buffer) == 0); | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 599 | 1 |  |  |  |  | 12 | } | 
| 600 |  |  |  |  |  |  | if (substr($self->buffer, 0, 1) eq "'") { | 
| 601 | 7 |  |  |  |  | 269 | my $c	= $self->get_char; | 
| 602 |  |  |  |  |  |  | $quote_count++; | 
| 603 | 8 |  |  |  |  | 325 | last if ($quote_count == 3); | 
| 604 |  |  |  |  |  |  | } else { | 
| 605 | 0 |  |  |  |  | 0 | if ($quote_count) { | 
| 606 | 0 |  |  |  |  | 0 | $string	.= "'" foreach (1..$quote_count); | 
| 607 |  |  |  |  |  |  | $quote_count	= 0; | 
| 608 |  |  |  |  |  |  | } | 
| 609 | 8 |  |  |  |  | 551 | if (substr($self->buffer, 0, 1) eq '\\') { | 
| 610 | 8 |  |  |  |  | 139 | $string	.= $self->_get_escaped_char(); | 
| 611 |  |  |  |  |  |  | } else { | 
| 612 |  |  |  |  |  |  | $self->buffer	=~ /^[^'\\]+/; | 
| 613 |  |  |  |  |  |  | $string	.= $self->read_length($+[0]); | 
| 614 |  |  |  |  |  |  | } | 
| 615 | 4 |  |  | 4 |  | 16 | } | 
| 616 | 4 |  |  |  |  | 24 | } | 
| 617 | 4 | 100 |  |  |  | 69 | return $self->new_token(STRING3S, $self->start_line, $self->start_column, $string); | 
| 618 |  |  |  |  |  |  | } else { | 
| 619 | 1 |  |  |  |  | 22 | ### #x22 scharacter* #x22 | 
| 620 |  |  |  |  |  |  | my $string	= ''; | 
| 621 | 1 |  |  |  |  | 2 | while (1) { | 
| 622 | 1 |  |  |  |  | 2 | if (substr($self->buffer, 0, 1) eq '\\') { | 
| 623 | 1 |  |  |  |  | 2 | $string	.= $self->_get_escaped_char(); | 
| 624 | 4 | 50 |  |  |  | 54 | } elsif ($self->buffer =~ /^[^'\\]+/o) { | 
| 625 | 0 |  |  |  |  | 0 | $string	.= $self->read_length($+[0]); | 
| 626 | 0 | 0 |  |  |  | 0 | } elsif (substr($self->buffer, 0, 1) eq "'") { | 
| 627 |  |  |  |  |  |  | last; | 
| 628 | 4 | 100 |  |  |  | 70 | } else { | 
| 629 | 3 |  |  |  |  | 17 | my $c		= $self->peek_char(); | 
| 630 | 3 |  |  |  |  | 5 | $self->_throw_error("Got '$c' while expecting string character"); | 
| 631 | 3 | 100 |  |  |  | 7 | } | 
| 632 |  |  |  |  |  |  | } | 
| 633 | 1 | 50 |  |  |  | 11 | $self->get_char_safe(q[']); | 
| 634 | 0 |  |  |  |  | 0 | return $self->new_token(STRING1S, $self->start_line, $self->start_column, $string); | 
| 635 | 0 |  |  |  |  | 0 | } | 
| 636 |  |  |  |  |  |  | } | 
| 637 | 1 | 50 |  |  |  | 14 |  | 
| 638 | 0 |  |  |  |  | 0 | my $self	= shift; | 
| 639 |  |  |  |  |  |  | my $c	= $self->peek_char; | 
| 640 | 1 |  |  |  |  | 22 | $self->get_char_safe('\\'); | 
| 641 | 1 |  |  |  |  | 11 | my $esc	= $self->get_char; | 
| 642 |  |  |  |  |  |  | if ($esc eq '\\') { return "\\" } | 
| 643 |  |  |  |  |  |  | elsif ($esc =~ /^['">]$/) { return $esc } | 
| 644 |  |  |  |  |  |  | elsif ($esc eq 'r') { return "\r" } | 
| 645 | 1 |  |  |  |  | 33 | elsif ($esc eq 't') { return "\t" } | 
| 646 |  |  |  |  |  |  | elsif ($esc eq 'n') { return "\n" } | 
| 647 |  |  |  |  |  |  | elsif ($esc eq 'b') { return "\b" } | 
| 648 | 3 |  |  |  |  | 25 | elsif ($esc eq 'f') { return "\f" } | 
| 649 | 3 |  |  |  |  | 4 | elsif ($esc eq 'U') { | 
| 650 | 6 | 100 |  |  |  | 100 | my $codepoint	= $self->read_length(8); | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 651 | 1 |  |  |  |  | 9 | $self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o); | 
| 652 |  |  |  |  |  |  | return chr(hex($codepoint)); | 
| 653 | 2 |  |  |  |  | 50 | } elsif ($esc eq 'u'){ | 
| 654 |  |  |  |  |  |  | my $codepoint	= $self->read_length(4); | 
| 655 | 3 |  |  |  |  | 110 | $self->_throw_error("Bad unicode escape codepoint '$codepoint'") unless ($codepoint =~ /^[0-9A-Fa-f]+$/o); | 
| 656 |  |  |  |  |  |  | return chr(hex($codepoint)); | 
| 657 | 0 |  |  |  |  | 0 | } | 
| 658 | 0 |  |  |  |  | 0 | $self->_throw_error("Unrecognized string escape '$esc'"); | 
| 659 |  |  |  |  |  |  | } | 
| 660 |  |  |  |  |  |  |  | 
| 661 | 3 |  |  |  |  | 10 | my $self	= shift; | 
| 662 | 3 |  |  |  |  | 41 | my $error	= shift; | 
| 663 |  |  |  |  |  |  | my $line	= $self->line; | 
| 664 |  |  |  |  |  |  | my $col		= $self->column; | 
| 665 |  |  |  |  |  |  | use Data::Dumper; | 
| 666 |  |  |  |  |  |  | Carp::confess "$error at $line:$col with buffer: " . Dumper($self->buffer); | 
| 667 | 2 |  |  | 2 |  | 4 | } | 
| 668 | 2 |  |  |  |  | 6 | } | 
| 669 | 2 |  |  |  |  | 8 |  | 
| 670 | 2 |  |  |  |  | 5 | 1; | 
| 671 | 2 | 50 |  |  |  | 10 |  | 
|  | 0 | 50 |  |  |  | 0 |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 672 | 2 |  |  |  |  | 7 |  | 
| 673 | 0 |  |  |  |  | 0 | =back | 
| 674 | 0 |  |  |  |  | 0 |  | 
| 675 | 0 |  |  |  |  | 0 | =head1 BUGS | 
| 676 | 0 |  |  |  |  | 0 |  | 
| 677 | 0 |  |  |  |  | 0 | Please report any bugs or feature requests to through the GitHub web interface | 
| 678 |  |  |  |  |  |  | at L<https://github.com/kasei/perlrdf/issues>. | 
| 679 | 0 |  |  |  |  | 0 |  | 
| 680 | 0 | 0 |  |  |  | 0 | =head1 AUTHOR | 
| 681 | 0 |  |  |  |  | 0 |  | 
| 682 |  |  |  |  |  |  | Gregory Todd Williams  C<< <gwilliams@cpan.org> >> | 
| 683 | 0 |  |  |  |  | 0 |  | 
| 684 | 0 | 0 |  |  |  | 0 | =head1 COPYRIGHT | 
| 685 | 0 |  |  |  |  | 0 |  | 
| 686 |  |  |  |  |  |  | Copyright (c) 2014--2022 Gregory Todd Williams. This | 
| 687 | 0 |  |  |  |  | 0 | program is free software; you can redistribute it and/or modify it under | 
| 688 |  |  |  |  |  |  | the same terms as Perl itself. | 
| 689 |  |  |  |  |  |  |  | 
| 690 |  |  |  |  |  |  | =cut |