File Coverage

blib/lib/MarpaX/ESLIF/URI/_generic.pm
Criterion Covered Total %
statement 162 269 60.2
branch 18 64 28.1
condition 7 32 21.8
subroutine 39 52 75.0
pod 28 30 93.3
total 254 447 56.8


line stmt bran cond sub pod time code
1 5     5   37 use strict;
  5         13  
  5         166  
2 5     5   28 use warnings FATAL => 'all';
  5         10  
  5         352  
3              
4             package MarpaX::ESLIF::URI::_generic;
5              
6             # ABSTRACT: URI Generic syntax as per RFC3986/RFC6874
7              
8             our $AUTHORITY = 'cpan:JDDPAUSE'; # AUTHORITY
9              
10             our $VERSION = '0.007'; # VERSION
11              
12 5     5   36 use Carp qw/croak/;
  5         8  
  5         243  
13 5     5   2425 use Class::Method::Modifiers qw/fresh around/;
  5         7644  
  5         340  
14 5     5   2777 use Class::Tiny::Antlers;
  5         22285  
  5         37  
15 5     5   3328 use Log::Any qw/$log/;
  5         41960  
  5         26  
16 5     5   12836 use MarpaX::ESLIF 3.0.9;
  5         592095  
  5         196  
17 5     5   70 use MarpaX::ESLIF::URI; # Because of resolve()
  5         11  
  5         141  
18 5     5   2935 use MarpaX::ESLIF::URI::_generic::RecognizerInterface;
  5         16  
  5         165  
19 5     5   2129 use MarpaX::ESLIF::URI::_generic::ValueInterface;
  5         14  
  5         162  
20 5     5   2305 use Safe::Isa qw/$_isa/;
  5         2457  
  5         676  
21 5     5   38 use overload '""' => 'string', 'eq' => 'eq', fallback => 1;
  5         10  
  5         34  
22              
23             has '_origin' => ( is => 'ro' );
24             has '_string' => ( is => 'rwp' );
25             has '_scheme' => ( is => 'rwp' );
26             has '_authority' => ( is => 'rwp' );
27             has '_userinfo' => ( is => 'rwp' );
28             has '_host' => ( is => 'rwp' );
29             has '_ip' => ( is => 'rwp' );
30             has '_ipv4' => ( is => 'rwp' );
31             has '_ipv6' => ( is => 'rwp' );
32             has '_ipvx' => ( is => 'rwp' );
33             has '_zone' => ( is => 'rwp' );
34             has '_port' => ( is => 'rwp' );
35             has '_path' => ( is => 'rwp', default => sub { { origin => '', decoded => '', normalized => '' } }); # Default is empty path ./..
36             has '_segments' => ( is => 'rwp', default => sub { { origin => [], decoded => [], normalized => [] } }); # ../. i.e. no component
37             has '_query' => ( is => 'rwp' );
38             has '_fragment' => ( is => 'rwp' );
39              
40             #
41             # All attributes starting with an underscore are the result of parsing
42             #
43             __PACKAGE__->_generate_actions(qw/_string _scheme _authority _userinfo _host _ip _ipv4 _ipv6 _ipvx _zone _port _path _segments _query _fragment/);
44              
45             #
46             # Constants
47             #
48             my $BNF = do { local $/; <DATA> };
49             my $ESLIF = MarpaX::ESLIF->new($log);
50             my $GRAMMAR = MarpaX::ESLIF::Grammar->new(__PACKAGE__->eslif, __PACKAGE__->bnf);
51              
52              
53             #
54             # BUILDARGS is completely internal
55             #
56              
57              
58             sub BUILDARGS {
59 32     32 0 4742 my ($class, @args) = @_;
60              
61 32 50       145 croak "Usage: $class->new(\$uri)" unless $#args == 0;
62              
63 32         150 return { _origin => $args[0] }
64             }
65              
66             #
67             # BUILD is completely internal
68             #
69              
70              
71             sub BUILD {
72 32     32 0 802 my ($self) = @_;
73              
74 32         1035 my $_origin = $self->_origin;
75 32         490 $_origin = "$_origin";
76 32 50       123 if (length($_origin)) {
77 32         267 my $recognizerInterface = MarpaX::ESLIF::URI::_generic::RecognizerInterface->new($_origin);
78 32         174 my $valueInterface = MarpaX::ESLIF::URI::_generic::ValueInterface->new($self);
79              
80 32 50       152 $self->grammar->parse($recognizerInterface, $valueInterface) || croak 'Parse failure'
81             }
82             }
83              
84              
85             sub bnf {
86 11     11 1 32 my ($class) = @_;
87              
88 11         280 return $BNF
89             }
90              
91              
92             sub eslif {
93 11     11 1 55 my ($class) = @_;
94              
95 11         65 return $ESLIF
96             }
97              
98              
99             sub grammar {
100 0     0 1 0 my ($class) = @_;
101              
102 0         0 return $GRAMMAR;
103             }
104              
105             #
106             # Class::Tiny generated methods
107             #
108              
109              
110             sub string {
111 59     59 1 20287 my ($self, $type) = @_;
112              
113 59         186 return $self->_generic_getter('_string', $type)
114             }
115              
116              
117             sub scheme {
118 96     96 1 60472 my ($self, $type) = @_;
119             #
120             # scheme never have a percent encoded character
121             #
122 96         296 return $self->_generic_getter('_scheme', $type)
123             }
124              
125              
126             sub authority {
127 3     3 1 1749 my ($self, $type) = @_;
128              
129 3         11 return $self->_generic_getter('_authority', $type)
130             }
131              
132              
133             sub userinfo {
134 6     6 1 3390 my ($self, $type) = @_;
135              
136 6         19 return $self->_generic_getter('_userinfo', $type)
137             }
138              
139              
140             sub host {
141 39     39 1 20549 my ($self, $type) = @_;
142              
143 39         124 return $self->_generic_getter('_host', $type)
144             }
145              
146              
147             sub hostname {
148 9     9 1 5062 my ($self, $type) = @_;
149              
150 9         38 my $hostname = $self->_generic_getter('_host', $type);
151 9 50       70 $hostname =~ s/^\[(.*)\]$/$1/ if defined($hostname);
152 9         26 return $hostname
153             }
154              
155              
156             sub ip {
157 9     9 1 4997 my ($self, $type) = @_;
158              
159 9         28 return $self->_generic_getter('_ip', $type)
160             }
161              
162              
163             sub ipv4 {
164 0     0 1 0 my ($self, $type) = @_;
165              
166 0         0 return $self->_generic_getter('_ipv4', $type)
167             }
168              
169              
170             sub ipv6 {
171 9     9 1 5053 my ($self, $type) = @_;
172              
173 9         29 return $self->_generic_getter('_ipv6', $type)
174             }
175              
176              
177             sub ipvx {
178 0     0 1 0 my ($self, $type) = @_;
179              
180 0         0 return $self->_generic_getter('_ipvx', $type)
181             }
182              
183              
184             sub zone {
185 6     6 1 3315 my ($self, $type) = @_;
186              
187 6         20 return $self->_generic_getter('_zone', $type)
188             }
189              
190              
191             sub port {
192 7     7 1 11 my ($self) = @_;
193              
194 7         24 return $self->_generic_getter('_port')
195             }
196              
197              
198             sub path {
199 63     63 1 34737 my ($self, $type) = @_;
200              
201 63         204 return $self->_generic_getter('_path', $type)
202             }
203              
204              
205             sub segments {
206 27     27 1 26491 my ($self, $type) = @_;
207              
208 27         140 return $self->_generic_getter('_segments', $type)
209             }
210              
211              
212             sub query {
213 3     3 1 1637 my ($self, $type) = @_;
214              
215 3         10 return $self->_generic_getter('_query', $type)
216             }
217              
218              
219             sub fragment {
220 0     0 1 0 my ($self, $type) = @_;
221              
222 0         0 return $self->_generic_getter('_fragment', $type)
223             }
224              
225              
226             sub is_abs {
227 0     0 1 0 my ($self) = @_;
228              
229 0   0     0 return defined($self->scheme) && ! defined($self->fragment)
230             }
231              
232              
233             sub base {
234 0     0 1 0 my ($self) = @_;
235              
236 0 0       0 if ($self->is_abs) {
237 0         0 return $self
238             } else {
239             #
240             # We need the scheme
241             #
242 0 0       0 croak "Cannot derive a base URI without a scheme" unless defined $self->_scheme;
243 0         0 my $origin = $self->string('origin');
244 0         0 my $fragment = $self->fragment('origin');
245 0         0 my $quote_fragment = quotemeta($fragment);
246 0         0 $origin =~ s/#$quote_fragment$//;
247 0         0 return ref($self)->new($origin)
248             }
249             }
250              
251              
252             sub normalized {
253 0     0 1 0 my ($self) = @_;
254              
255 0         0 return $self->string('normalized')
256             }
257              
258              
259             sub decoded {
260 0     0 1 0 my ($self) = @_;
261              
262 0         0 return $self->string('decoded')
263             }
264              
265              
266             sub resolve {
267 0     0 1 0 my ($self, $base, $strict) = @_;
268              
269 0   0     0 $base //= $self->base;
270 0   0     0 $strict //= 1;
271              
272 0 0       0 croak "$base must be absolute" unless $base->is_abs;
273              
274             #
275             # 5.2.2. Transform References
276             #
277 0         0 my %R;
278 0         0 $R{scheme} = $self->scheme('origin');
279 0         0 $R{authority} = $self->authority('origin');
280 0         0 $R{path} = $self->path('origin');
281 0         0 $R{query} = $self->query('origin');
282 0         0 $R{fragment} = $self->fragment('origin');
283              
284 0         0 my %Base;
285 0         0 $Base{scheme} = $base->scheme('origin');
286 0         0 $Base{authority} = $base->authority('origin');
287 0         0 $Base{path} = $base->path('origin');
288 0         0 $Base{query} = $base->query('origin');
289 0         0 $Base{fragment} = $base->fragment('origin');
290              
291 0 0 0     0 if ((! $strict) && (($R{scheme} // '') == $Base{scheme})) {
      0        
292             $R{scheme} = undef
293 0         0 }
294              
295 0         0 my %T;
296 0 0       0 if (defined($R{scheme})) {
297 0         0 $T{scheme} = $R{scheme};
298 0         0 $T{authority} = $R{authority};
299 0         0 $T{path} = __PACKAGE__->remove_dot_segments($R{path});
300 0         0 $T{query} = $R{query};
301             } else {
302 0 0       0 if (defined($R{authority})) {
303 0         0 $T{authority} = $R{authority};
304 0         0 $T{path} = __PACKAGE__->remove_dot_segments($R{path});
305 0         0 $T{query} = $R{query};
306             } else {
307 0 0       0 if (! length($R{path})) {
308 0         0 $T{path} = $Base{path};
309 0 0       0 if (defined(R{query})) {
  0         0  
310 0         0 $T{query} = $R{query};
311             } else {
312 0         0 $T{query} = $Base{query};
313             }
314             } else {
315 0 0       0 if (substr($R{path}, 0, 1) eq '/') {
316 0         0 $T{path} = __PACKAGE__->remove_dot_segments($R{path});
317             } else {
318 0         0 $T{path} = $self->_merge_paths($base);
319 0         0 $T{path} = __PACKAGE__->remove_dot_segments($T{path});
320             }
321 0         0 $T{query} = $R{query};
322             }
323 0         0 $T{authority} = $Base{authority};
324             }
325 0         0 $T{scheme} = $Base{scheme};
326             }
327              
328 0         0 $T{fragment} = $R{fragment};
329              
330             #
331             # 5.3. Component Recomposition
332             #
333 0         0 my $str;
334              
335 0         0 my $scheme = $T{scheme};
336 0 0       0 $str .= "$scheme:" if defined($scheme);
337              
338 0         0 my $authority = $T{authority};
339 0 0       0 $str .= "//$authority" if defined($authority);
340              
341 0         0 $str .= $T{path}; # Always defined as per the algorithm
342              
343 0         0 my $query = $T{query};
344 0 0       0 $str .= "?$query" if defined($query);
345              
346 0         0 my $fragment = $T{fragment};
347 0 0       0 $str .= "#$fragment" if defined($fragment);
348              
349 0         0 return MarpaX::ESLIF::URI->new($str)
350             }
351              
352              
353             sub eq {
354 0     0 1 0 my ($self, $other) = @_;
355              
356 0         0 eval {
357 0 0       0 $other = MarpaX::ESLIF::URI->new($other) unless $other->$_isa(__PACKAGE__);
358             #
359             # Since we already do full normalization when valuating the parse tree, we use it
360             #
361 0         0 $self->string('normalized') eq $other->string('normalized')
362             }
363             }
364              
365              
366             sub clone {
367 0     0 1 0 my ($self) = @_;
368              
369 0         0 return ref($self)->new($self->_origin)
370             }
371              
372              
373             sub as_string {
374 0     0 1 0 goto &string
375             }
376              
377              
378             sub remove_dot_segments {
379 30     30 1 76 my ($class, $path) = @_;
380              
381             # 1. The input buffer is initialized with the now-appended path
382             # components and the output buffer is initialized to the empty
383             # string.
384 30         61 my $input = $path;
385 30         62 my $output = '';
386              
387             # printf "%s %-20s %-20s\n", 1, $output, $input;
388             # 2. While the input buffer is not empty, loop as follows:
389 30         108 while (length($input)) {
390              
391             # A. If the input buffer begins with a prefix of "../" or "./",
392             # then remove that prefix from the input buffer; otherwise,
393 67 50       258 if (substr($input, 0, 3) eq '../') {
    50          
394 0         0 substr($input, 0, 3, '');
395             # printf "%s %-20s %-20s\n", 'A', $output, $input;
396 0         0 next;
397             } elsif (substr($input, 0, 2) eq './') {
398 0         0 substr($input, 0, 2, '');
399             # printf "%s %-20s %-20s\n", 'A', $output, $input;
400 0         0 next;
401             }
402              
403             # B. if the input buffer begins with a prefix of "/./" or "/.",
404             # where "." is a complete path segment, then replace that
405             # prefix with "/" in the input buffer; otherwise,
406 67 50       218 if (substr($input, 0, 3) eq '/./') {
    50          
407 0         0 substr($input, 0, 3, '/');
408             # printf "%s %-20s %-20s\n", 'B', $output, $input;
409 0         0 next;
410             } elsif ($input =~ /^\/\.(?:\/|\z)/) {
411 0         0 substr($input, 0, 2, '/');
412             # printf "%s %-20s %-20s\n", 'B', $output, $input;
413 0         0 next;
414             }
415              
416             # C. if the input buffer begins with a prefix of "/../" or "/..",
417             # where ".." is a complete path segment, then replace that
418             # prefix with "/" in the input buffer and remove the last
419             # segment and its preceding "/" (if any) from the output
420             # buffer; otherwise,
421 67 50       213 if (substr($input, 0, 4) eq '/../') {
    50          
422 0         0 substr($input, 0, 4, '/');
423 0         0 $output =~ s/\/?[^\/]*\z//;
424             # printf "%s %-20s %-20s\n", 'C', $output, $input;
425 0         0 next;
426             } elsif ($input =~ /^\/\.\.(?:\/|\z)/) {
427 0         0 substr($input, 0, 3, '/');
428 0         0 $output =~ s/\/?[^\/]*\z//;
429             # printf "%s %-20s %-20s\n", 'C', $output, $input;
430 0         0 next;
431             }
432              
433             # D. if the input buffer consists only of "." or "..", then remove
434             # that from the input buffer; otherwise,
435 67 50 33     283 if (($input eq '.') || ($input eq '..')) {
436 0         0 $input = '';
437             # printf "%s %-20s %-20s\n", 'D', $output, $input;
438 0         0 next;
439             }
440              
441             # E. move the first path segment in the input buffer to the end of
442             # the output buffer, including the initial "/" character (if
443             # any and any subsequent characters up to, but not including,
444             # the next "/" character or the end of the input buffer.
445 67         270 $input =~ s/^(\/?[^\/]*)//;
446 67         212 $output .= $1;
447             # printf "%s %-20s %-20s\n", 'E', $output, $input;
448             }
449              
450             # 3. Finally, the output buffer is returned as the result of
451             # remove_dot_segments.
452 30         90 return $output
453             }
454              
455             # ----------------
456             # Internal helpers
457             # ----------------
458              
459             sub _generic_getter {
460 579     579   1197 my ($self, $_what, $type) = @_;
461              
462 579   100     1605 $type //= 'decoded';
463 579         15494 my $value = $self->$_what;
464              
465 579 100       3553 return unless defined($value);
466 502         1885 return $value->{$type}
467             }
468              
469             sub _generate_actions {
470 9     9   43 my ($class, @attributes) = @_;
471             #
472             # All the attributes have an associate explicit action called _action${attribute}
473             #
474 9         26 foreach my $attribute (@attributes) {
475 88         3594 my $method = "_action$attribute";
476 88 100       686 next if $class->can($method);
477 87   33 5   7894 my $stub = eval "sub { my (\$self, \@args) = \@_; \$self->_set_$attribute(\$self->__concat(\@args)) }" || croak "Failed to create action stub for attribute $attribute, $@"; ## no critic
  5         25  
  5         30  
  0         0  
  0         0  
  2         9  
  2         9  
  11         46  
  11         49  
  3         10  
  3         11  
  27         119  
  27         112  
  12         41  
  12         48  
  22         77  
  22         79  
  7         17  
  7         28  
  15         55  
  15         54  
  7         31  
  7         25  
  8         37  
  8         38  
  14         56  
  14         49  
  1         5  
  1         6  
  12         42  
  12         46  
  2         7  
  2         8  
478 87         347 fresh $method => $stub;
479             }
480             }
481              
482             sub _merge_paths {
483             #
484             # In theory, this method should never be called with type != 'origin'
485             #
486 0     0   0 my ($self, $base, $type) = @_;
487 0   0     0 $type //= 'origin';
488              
489             # If the base URI has a defined authority component and an empty
490             # path, then return a string consisting of "/" concatenated with the
491             # reference's path; otherwise,
492 5 0 0     22 return '/' . $self->path($type) if (defined($base->authority($type)) && ! length($base->path($type)));
493              
494             # return a string consisting of the reference's path component
495             # appended to all but the last segment of the base URI's path (i.e.,
496             # excluding any characters after the right-most "/" in the base URI
497             # path, or excluding the entire base URI path if it does not contain
498             # any "/" characters).}
499 5         22 my $base_path = $base->path($type);
500 0         0 my $rindex = rindex($base_path, '/');
501 0         0 my $new_path;
502 0 0       0 if ($rindex >= 0) {
503 0 0       0 if ($rindex < (length($base_path) - 1)) {
504 3         40 $new_path = substr($base_path, 0, $rindex + 1)
505             } else {
506 3         14 $new_path = $base_path
507             }
508             } else {
509 2         10 $new_path = '';
510             }
511 2         7 return $new_path . $self->path($type)
512             }
513              
514             # -------------
515             # Normalization
516             # -------------
517             around _set__scheme => sub {
518             my ($orig, $self, $value) = @_;
519              
520             #
521             # Normalized scheme is case insensitive and should be lowercased
522             #
523             $value->{normalized} = lc($value->{normalized});
524             $self->$orig($value)
525             };
526              
527             around _set__host => sub {
528             my ($orig, $self, $value) = @_;
529              
530             #
531             # Normalized host is case insensitive and should be lowercased
532             #
533             $value->{normalized} = lc($value->{normalized});
534             $self->$orig($value)
535             };
536              
537             around _set__ipv4 => sub {
538             my ($orig, $self, $value) = @_;
539              
540             #
541             # IP is a host, and normalized host is case insensitive and should be lowercased
542             #
543             $value->{normalized} = lc($value->{normalized});
544             $self->$orig($value)
545             };
546              
547             around _set__ipv6 => sub {
548             my ($orig, $self, $value) = @_;
549              
550             #
551             # IP is a host, and normalized host is case insensitive and should be lowercased
552             #
553             $value->{normalized} = lc($value->{normalized});
554             $self->$orig($value)
555             };
556              
557             around _set__ipvx => sub {
558             my ($orig, $self, $value) = @_;
559              
560             #
561             # IP is a host, and normalized host is case insensitive and should be lowercased
562             #
563             $value->{normalized} = lc($value->{normalized});
564             $self->$orig($value)
565             };
566              
567             around _set__zone => sub {
568             my ($orig, $self, $value) = @_;
569              
570             #
571             # Zone is part of host, so a normalized zone is case insensitive and should be lowercased
572             #
573             $value->{normalized} = lc($value->{normalized});
574             $self->$orig($value)
575             };
576              
577             around _set__path => sub {
578             my ($orig, $self, $value) = @_;
579             #
580             # Normalized path is done by removing dot segments
581             #
582             $value->{normalized} = __PACKAGE__->remove_dot_segments($value->{normalized});
583             $self->$orig($value)
584             };
585              
586             # ------------------------
587             # Grammar Internal helpers
588             # ------------------------
589             #
590             # This _pct_encoded method guarantees that the output is a sequence of ASCII characters
591             # even if the UTF-8 flag would be set. For instance sequence %ce%a3 will be
592             # seen as "\x{ce}\x{a3}" in the resulting string, and NOT "\x{cea3}".
593             #
594             sub __pct_encoded {
595 8     8   24 my ($self, undef, $hex1, $hex2) = @_;
596             #
597             # Note that here $hex are terminals, so in fact hex's origin == decoded == normalized
598             #
599 8         29 my $origin = join('', '%', $hex1->{origin}, $hex2->{origin});
600 9         38 my $decoded = chr(hex(join('', $hex1->{decoded}, $hex2->{decoded})));
601             #
602             # Normalization is decoding any percent-encoded octet that corresponds
603             # to an unreserved character, as described in Section 2.3:
604             # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
605             # else it should be normalized to uppercase.
606             #
607 9 50       39 my $normalized = ($decoded =~ /[A-Za-z0-9\-._~]/) ? $decoded : uc($origin);
608 8         114 return { origin => $origin, decoded => $decoded, normalized => $normalized }
609             }
610             #
611             # Special for zone
612             #
613             sub __encoded_percent_character {
614             #
615             # '%' decoded character is not an unreserved character, so the
616             # normalized form remains %25
617             #
618 1     1   15 return { origin => '%25', decoded => '%', normalized => '%25'}
619             }
620             sub __not_encoded_percent_character {
621             #
622             # Same as __encoded_percent_character(), except that origin is '%' character
623             #
624 1     1   16 return { origin => '%', decoded => '%', normalized => '%25'}
625             }
626             #
627             # Pushes segments in a _segment[] array
628             #
629             sub __segment {
630 42     42   117 my ($self, @args) = @_;
631              
632 42         104 my $concat = $self->__concat(@args);
633 42         77 push(@{$self->_segments->{origin}}, $concat->{origin});
  42         865  
634 42         303 push(@{$self->_segments->{decoded}}, $concat->{decoded});
  42         797  
635 42         273 push(@{$self->_segments->{normalized}}, $concat->{normalized});
  42         694  
636 42         616 return $concat
637             }
638             #
639             # Exactly the same as ESLIF's ::concat built-in, but revisited
640             # to work on original, decoded and normalized strings at the same time
641             #
642             sub __concat {
643 2014     2014   4036 my ($self, @args) = @_;
644              
645 2014         6645 my %rc = ( origin => '', decoded => '', normalized => '' );
646 2014         4030 foreach my $arg (@args) {
647 2824 100       6101 next unless ref($arg);
648 2821   50     7242 $rc{origin} .= $arg->{origin} // '';
649 2821   50     5908 $rc{decoded} .= $arg->{decoded} // '';
650 2821   50     6237 $rc{normalized} .= $arg->{normalized} // '';
651             }
652 2014         20919 return \%rc
653             }
654             #
655             # Exactly the same as ESLIF's ::transfer built-in, but revisited
656             # to work on original and decoded strings at the same time
657             #
658             sub __symbol {
659 829     829   1653 my ($self, $symbol) = @_;
660             #
661             # No normalization on symbol until we know the context
662             #
663 829         8423 return { origin => $symbol, decoded => $symbol, normalized => $symbol }
664             }
665              
666              
667             1;
668              
669             =pod
670              
671             =encoding UTF-8
672              
673             =head1 NAME
674              
675             MarpaX::ESLIF::URI::_generic - URI Generic syntax as per RFC3986/RFC6874
676              
677             =head1 VERSION
678              
679             version 0.007
680              
681             =head1 SUBROUTINES/METHODS
682              
683             =head2 $class->new($uri)
684              
685             Instantiate a new object, or croak on failure. Takes as parameter an URI that will be parsed. The object instance is noted C<$self> below.
686              
687             =for Pod::Coverage BUILDARGS
688              
689             =for Pod::Coverage BUILD
690              
691             =head2 $class->bnf
692              
693             Returns the BNF used to parse the input.
694              
695             =head2 $class->eslif
696              
697             Returns a MarpaX::ESLIF singleton.
698              
699             =head2 $class->grammar
700              
701             Returns the compiled BNF used to parse the input as MarpaX::ESLIF::Grammar singleton.
702              
703             =head2 $self->string($type)
704              
705             Returns the string version of the URI, C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
706              
707             =head2 $self->scheme($type)
708              
709             Returns the scheme, or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
710              
711             =head2 $self->authority($type)
712              
713             Returns the authority, or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
714              
715             =head2 $self->userinfo($type)
716              
717             Returns the userinfo, or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
718              
719             =head2 $self->host($type)
720              
721             Returns the host (which may contain C<[]> delimiters in case of IPv6 literal), or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
722              
723             =head2 $self->hostname($type)
724              
725             Returns the hostname (without eventual C<[]> delimiters), or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
726              
727             =head2 $self->ip($type)
728              
729             Returns the IP when host is such a literal, or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
730              
731             Note that this is the full concatenation of what makes an IP, in particular you will get the eventual IPv6 Zone Id if there is one.
732              
733             =head2 $self->ipv4($type)
734              
735             Returns the IPv4 when host is such a literal, or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
736              
737             =head2 $self->ipv6($type)
738              
739             Returns the IPv6 when host is such a literal, or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
740              
741             =head2 $self->ipvx($type)
742              
743             Returns the decoded IPvI<future> (as per the spec) when host is such a literal, or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
744              
745             =head2 $self->zone($type)
746              
747             Returns the IPv6 Zone Id, or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
748              
749             =head2 $self->port
750              
751             Returns the port, or undef.
752              
753             =head2 $self->path($type)
754              
755             Returns the path, or the empty string. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
756              
757             =head2 $self->segments($type)
758              
759             Returns the path segments as an array reference, which may be empty. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
760              
761             =head2 $self->query($type)
762              
763             Returns the query, or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
764              
765             =head2 $self->fragment($type)
766              
767             Returns the fragment, or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
768              
769             =head2 $self->is_abs
770              
771             Returns a true value if the URI is absolute.
772              
773             =head2 $self->base
774              
775             Returns a instance that is the absolute version of C<$self> if possible, or croak on failure.
776              
777             When C<$self> is absolute, C<$self> itself is returned, otherwise it must have a scheme and a new instance without the origin fragment is returned.
778              
779             =head2 $self->normalized
780              
781             Returns the normalized string of C<$self>.
782              
783             Equivalent to C<< $self->string('normalized') >>.
784              
785             =head2 $self->decoded
786              
787             Returns the decoded string of C<$self>.
788              
789             Equivalent to C<< $self->string('decoded') >>.
790              
791             =head2 $self->resolve($base, $strict)
792              
793             Returns a instance that converts C<$self> into C<$base> URI, or croak on failure.
794              
795             Default base is C<< $self->base >>.
796              
797             If C<$strict> is a true value, C<$self> is always considered relative to C<$base>, otherwise a new URI without C<$self>'s dot segments is returned when C<$self> has a scheme. Default is a true value.
798              
799             =head2 $self->eq($other)
800              
801             Returns a instance that is the absolute version of current instance if possible, or croak on failure.
802              
803             =head2 $self->clone
804              
805             Returns a clone of current instance.
806              
807             =head2 $self->as_string
808              
809             Alias to C<string> method.
810              
811             =head2 $class->remove_dot_segments($path)
812              
813             Implementation of L<RFC3896's remove_dot_segments|https://tools.ietf.org/html/rfc3986#section-5.2.4>.
814              
815             =head1 NOTES
816              
817             =over
818              
819             =item Logging
820              
821             This package is L<Log::Any> aware, and will use the later in case parsing fails to output error messages.
822              
823             =back
824              
825             =head1 SEE ALSO
826              
827             L<MarpaX::ESLIF::URI>, L<RFC3986|https://tools.ietf.org/html/rfc3986>, L<RFC6874|https://tools.ietf.org/html/rfc6874>
828              
829             =head1 AUTHOR
830              
831             Jean-Damien Durand <jeandamiendurand@free.fr>
832              
833             =head1 COPYRIGHT AND LICENSE
834              
835             This software is copyright (c) 2017 by Jean-Damien Durand.
836              
837             This is free software; you can redistribute it and/or modify it under
838             the same terms as the Perl 5 programming language system itself.
839              
840             =cut
841              
842             __DATA__
843             #
844             # We maintain two string version in parallel when valuating the parse tree:
845             # - original
846             # - decoded
847             :default ::= action => __concat
848             symbol-action => __symbol
849              
850             # :start ::= <URI reference>
851             <URI reference> ::= <URI> action => _action_string
852             | <relative ref> action => _action_string
853             #
854             # Reference: https://tools.ietf.org/html/rfc3986#appendix-A
855             # Reference: https://tools.ietf.org/html/rfc6874
856             #
857             <URI> ::= <scheme> ":" <hier part> <URI query> <URI fragment>
858             <URI query> ::= "?" <query>
859             <URI query> ::=
860             <URI fragment> ::= "#" <fragment>
861             <URI fragment> ::=
862              
863             <hier part> ::= "//" <authority> <path abempty>
864             | <path absolute>
865             | <path rootless>
866             | <path empty>
867              
868              
869             <absolute URI> ::= <scheme> ":" <hier part> <URI query>
870              
871             <relative ref> ::= <relative part> <URI query> <URI fragment>
872              
873             <relative part> ::= "//" <authority> <path abempty>
874             | <path absolute>
875             | <path noscheme>
876             | <path empty>
877              
878             <scheme> ::= <scheme value> action => _action_scheme
879             <scheme value> ::= <ALPHA> <scheme trailer>
880             <scheme trailer unit> ::= <ALPHA> | <DIGIT> | "+" | "-" | "."
881             <scheme trailer> ::= <scheme trailer unit>*
882              
883             <authority userinfo> ::= <userinfo> "@"
884             <authority userinfo> ::=
885             <authority port> ::= ":" <port>
886             <authority port> ::=
887             <authority> ::= <authority value> action => _action_authority
888             <authority value> ::= <authority userinfo> <host> <authority port>
889             <userinfo unit> ::= <unreserved> | <pct encoded> | <sub delims> | ":"
890             <userinfo> ::= <userinfo value> action => _action_userinfo
891             <userinfo value> ::= <userinfo unit>*
892             #
893             # The syntax rule for host is ambiguous because it does not completely
894             # distinguish between an IPv4address and a reg-name. In order to
895             # disambiguate the syntax, we apply the "first-match-wins" algorithm:
896             # If host matches the rule for IPv4address, then it should be
897             # considered an IPv4 address literal and not a reg-name.
898             #
899             <host> ::= <IP literal> rank => 0 action => _action_host
900             | <IPv4address> rank => -1 action => _action_host
901             | <reg name> rank => -2 action => _action_host
902             <port> ::= <port value> action => _action_port
903             <port value> ::= <DIGIT>*
904              
905             <IP literal interior> ::= <IPv6address> action => _action_ip
906             | <IPv6addrz> action => _action_ip
907             | <IPvFuture> action => _action_ip
908             <IP literal> ::= "[" <IP literal interior> "]"
909             <ZoneID interior> ::= <unreserved> | <pct encoded>
910             <ZoneID> ::= <ZoneID interior>+ action => _action_zone
911             <IPv6addrz percent char> ::= "%25" action => __encoded_percent_character
912             #
913             # From https://tools.ietf.org/html/rfc6874#section-3:
914             #
915             # "we also suggest that URI parsers accept bare "%" signs when possible"
916             #
917             <IPv6addrz percent char> ::= "%" action => __not_encoded_percent_character
918             <IPv6addrz> ::= <IPv6address> <IPv6addrz percent char> <ZoneID>
919              
920             <IPvFuture> ::= "v" <HEXDIG many> "." <IPvFuture trailer> action => _action_ipvx
921             <IPvFuture trailer unit> ::= <unreserved> | <sub delims> | ":"
922             <IPvFuture trailer> ::= <IPvFuture trailer unit>+
923              
924             <IPv6address> ::= <6 h16 colon> <ls32> action => _action_ipv6
925             | "::" <5 h16 colon> <ls32> action => _action_ipv6
926             | <h16> "::" <4 h16 colon> <ls32> action => _action_ipv6
927             | "::" <4 h16 colon> <ls32> action => _action_ipv6
928             | <0 to 1 h16 colon> <h16> "::" <3 h16 colon> <ls32> action => _action_ipv6
929             | "::" <3 h16 colon> <ls32> action => _action_ipv6
930             | <0 to 2 h16 colon> <h16> "::" <2 h16 colon> <ls32> action => _action_ipv6
931             | "::" <2 h16 colon> <ls32> action => _action_ipv6
932             | <0 to 3 h16 colon> <h16> "::" <1 h16 colon> <ls32> action => _action_ipv6
933             | "::" <1 h16 colon> <ls32> action => _action_ipv6
934             | <0 to 4 h16 colon> <h16> "::" <ls32> action => _action_ipv6
935             | "::" <ls32> action => _action_ipv6
936             | <0 to 5 h16 colon> <h16> "::" <h16> action => _action_ipv6
937             | "::" <h16> action => _action_ipv6
938             | <0 to 6 h16 colon> <h16> "::" action => _action_ipv6
939             | "::" action => _action_ipv6
940              
941             <1 h16 colon> ::= <h16> ":"
942             <2 h16 colon> ::= <h16> ":" <h16> ":"
943             <3 h16 colon> ::= <h16> ":" <h16> ":" <h16> ":"
944             <4 h16 colon> ::= <h16> ":" <h16> ":" <h16> ":" <h16> ":"
945             <5 h16 colon> ::= <h16> ":" <h16> ":" <h16> ":" <h16> ":" <h16> ":"
946             <6 h16 colon> ::= <h16> ":" <h16> ":" <h16> ":" <h16> ":" <h16> ":" <h16> ":"
947              
948             #
949             # These productions are ambiguous without ranking (rank is equivalent to make regexps greedy)
950             #
951             <0 to 1 h16 colon> ::=
952             <0 to 1 h16 colon> ::= <1 h16 colon> rank => 1
953             <0 to 2 h16 colon> ::= <0 to 1 h16 colon>
954             <0 to 2 h16 colon> ::= <0 to 1 h16 colon> <1 h16 colon> rank => 1
955             <0 to 3 h16 colon> ::= <0 to 2 h16 colon>
956             <0 to 3 h16 colon> ::= <0 to 2 h16 colon> <1 h16 colon> rank => 1
957             <0 to 4 h16 colon> ::= <0 to 3 h16 colon>
958             <0 to 4 h16 colon> ::= <0 to 3 h16 colon> <1 h16 colon> rank => 1
959             <0 to 5 h16 colon> ::= <0 to 4 h16 colon>
960             <0 to 5 h16 colon> ::= <0 to 4 h16 colon> <1 h16 colon> rank => 1
961             <0 to 6 h16 colon> ::= <0 to 5 h16 colon>
962             <0 to 6 h16 colon> ::= <0 to 5 h16 colon> <1 h16 colon> rank => 1
963              
964             <h16> ::= <HEXDIG>
965             | <HEXDIG> <HEXDIG>
966             | <HEXDIG> <HEXDIG> <HEXDIG>
967             | <HEXDIG> <HEXDIG> <HEXDIG> <HEXDIG>
968              
969             <ls32> ::= <h16> ":" <h16> | <IPv4address>
970             <IPv4address> ::= <dec octet> "." <dec octet> "." <dec octet> "." <dec octet> action => _action_ipv4
971              
972             <dec octet> ::= <DIGIT> # 0-9
973             | [\x{31}-\x{39}] <DIGIT> # 10-99
974             | "1" <DIGIT> <DIGIT> # 100-199
975             | "2" [\x{30}-\x{34}] <DIGIT> # 200-249
976             | "25" [\x{30}-\x{35}] # 250-255
977              
978             <reg name unit> ::= <unreserved> | <pct encoded> | <sub delims>
979             <reg name> ::= <reg name unit>*
980              
981             <path> ::= <path abempty> # begins with "/" or is empty
982             | <path absolute> # begins with "/" but not "//"
983             | <path noscheme> # begins with a non-colon segment
984             | <path rootless> # begins with a segment
985             | <path empty> # zero characters
986              
987             <path abempty unit> ::= "/" <segment>
988             <path abempty> ::= <path abempty value> action => _action_path
989             <path abempty value> ::= <path abempty unit>*
990             <path absolute> ::= <path absolute value> action => _action_path
991             <path absolute value> ::= "/"
992             | "/" <segment nz> <path abempty value>
993             <path noscheme> ::= <path noscheme value> action => _action_path
994             <path noscheme value> ::= <segment nz nc> <path abempty value>
995             <path rootless> ::= <path rootless value> action => _action_path
996             <path rootless value> ::= <segment nz> <path abempty value>
997             <path empty> ::= # Default value for path is ''
998              
999             <segment> ::= <pchar>* action => __segment
1000             <segment nz> ::= <pchar>+ action => __segment
1001             <segment nz nc unit> ::= <unreserved> | <pct encoded> | <sub delims> | "@" # non-zero-length segment without any colon ":"
1002             <segment nz nc> ::= <segment nz nc unit>+ action => __segment
1003              
1004             <pchar> ::= <unreserved> | <pct encoded> | <sub delims> | ":" | "@"
1005              
1006             <query unit> ::= <pchar> | "/" | "?"
1007             <query> ::= <query value> action => _action_query
1008             <query value> ::= <query unit>*
1009              
1010             <fragment unit> ::= <pchar> | "/" | "?"
1011             <fragment> ::= <fragment value> action => _action_fragment
1012             <fragment value> ::= <fragment unit>*
1013              
1014             <pct encoded> ::= "%" <HEXDIG> <HEXDIG> action => __pct_encoded
1015              
1016             <unreserved> ::= <ALPHA> | <DIGIT> | "-" | "." | "_" | "~"
1017             <reserved> ::= <gen delims> | <sub delims>
1018             <gen delims> ::= ":" | "/" | "?" | "#" | "[" | "]" | "@"
1019             <sub delims> ::= "!" | "$" | "&" | "'" | "(" | ")"
1020             | "*" | "+" | "," | ";" | "="
1021              
1022             <HEXDIG many> ::= <HEXDIG>+
1023             <ALPHA> ::= [A-Za-z]
1024             <DIGIT> ::= [0-9]
1025             <HEXDIG> ::= [0-9A-Fa-f] # case insensitive