File Coverage

blib/lib/MarpaX/ESLIF/URI/_generic.pm

Criterion	Covered	Total	%
statement	158	269	58.7
branch	18	64	28.1
condition	7	32	21.8
subroutine	39	52	75.0
pod	28	30	93.3
total	250	447	55.9

line	stmt	bran	cond	sub	pod	time	code
1	5			5		32	use strict;
	5					10
	5					146
2	5			5		26	use warnings FATAL => 'all';
	5					7
	5					310
3
4							package MarpaX::ESLIF::URI::_generic;
5
6							# ABSTRACT: URI Generic syntax as per RFC3986/RFC6874
7
8							our $AUTHORITY = 'cpan:JDDPAUSE'; # AUTHORITY
9
10							our $VERSION = '0.006'; # VERSION
11
12	5			5		27	use Carp qw/croak/;
	5					11
	5					219
13	5			5		2001	use Class::Method::Modifiers qw/fresh around/;
	5					6658
	5					304
14	5			5		2143	use Class::Tiny::Antlers;
	5					18353
	5					29
15	5			5		2741	use Log::Any qw/$log/;
	5					34862
	5					23
16	5			5		11286	use MarpaX::ESLIF;
	5					508836
	5					208
17	5			5		50	use MarpaX::ESLIF::URI; # Because of resolve()
	5					11
	5					113
18	5			5		2787	use MarpaX::ESLIF::URI::_generic::RecognizerInterface;
	5					12
	5					160
19	5			5		1833	use MarpaX::ESLIF::URI::_generic::ValueInterface;
	5					11
	5					170
20	5			5		2111	use Safe::Isa qw/$_isa/;
	5					2028
	5					630
21	5			5		33	use overload '""' => 'string', 'eq' => 'eq', fallback => 1;
	5					11
	5					44
22
23							has '_origin' => ( is => 'ro' );
24							has '_string' => ( is => 'rwp' );
25							has '_scheme' => ( is => 'rwp' );
26							has '_authority' => ( is => 'rwp' );
27							has '_userinfo' => ( is => 'rwp' );
28							has '_host' => ( is => 'rwp' );
29							has '_ip' => ( is => 'rwp' );
30							has '_ipv4' => ( is => 'rwp' );
31							has '_ipv6' => ( is => 'rwp' );
32							has '_ipvx' => ( is => 'rwp' );
33							has '_zone' => ( is => 'rwp' );
34							has '_port' => ( is => 'rwp' );
35							has '_path' => ( is => 'rwp', default => sub { { origin => '', decoded => '', normalized => '' } }); # Default is empty path ./..
36							has '_segments' => ( is => 'rwp', default => sub { { origin => [], decoded => [], normalized => [] } }); # ../. i.e. no component
37							has '_query' => ( is => 'rwp' );
38							has '_fragment' => ( is => 'rwp' );
39
40							#
41							# All attributes starting with an underscore are the result of parsing
42							#
43							__PACKAGE__->_generate_actions(qw/_string _scheme _authority _userinfo _host _ip _ipv4 _ipv6 _ipvx _zone _port _path _segments _query _fragment/);
44
45							#
46							# Constants
47							#
48							my $BNF = do { local $/; <DATA> };
49							my $ESLIF = MarpaX::ESLIF->new($log);
50							my $GRAMMAR = MarpaX::ESLIF::Grammar->new(__PACKAGE__->eslif, __PACKAGE__->bnf);
51
52
53							#
54							# BUILDARGS is completely internal
55							#
56
57
58							sub BUILDARGS {
59	32			32	0	4426	my ($class, @args) = @_;
60
61	32	50				162	croak "Usage: $class->new(\$uri)" unless $#args == 0;
62
63	32					142	return { _origin => $args[0] }
64							}
65
66							#
67							# BUILD is completely internal
68							#
69
70
71							sub BUILD {
72	32			32	0	791	my ($self) = @_;
73
74	32					902	my $_origin = $self->_origin;
75	32					460	$_origin = "$_origin";
76	32	50				134	if (length($_origin)) {
77	32					289	my $recognizerInterface = MarpaX::ESLIF::URI::_generic::RecognizerInterface->new($_origin);
78	32					217	my $valueInterface = MarpaX::ESLIF::URI::_generic::ValueInterface->new($self);
79
80	32	50				153	$self->grammar->parse($recognizerInterface, $valueInterface) \|\| croak 'Parse failure'
81							}
82							}
83
84
85							sub bnf {
86	11			11	1	82	my ($class) = @_;
87
88	11					279	return $BNF
89							}
90
91
92							sub eslif {
93	11			11	1	62	my ($class) = @_;
94
95	11					55	return $ESLIF
96							}
97
98
99							sub grammar {
100	0			0	1	0	my ($class) = @_;
101
102	0					0	return $GRAMMAR;
103							}
104
105							#
106							# Class::Tiny generated methods
107							#
108
109
110							sub string {
111	59			59	1	18292	my ($self, $type) = @_;
112
113	59					195	return $self->_generic_getter('_string', $type)
114							}
115
116
117							sub scheme {
118	96			96	1	51163	my ($self, $type) = @_;
119							#
120							# scheme never have a percent encoded character
121							#
122	96					277	return $self->_generic_getter('_scheme', $type)
123							}
124
125
126							sub authority {
127	3			3	1	2076	my ($self, $type) = @_;
128
129	3					9	return $self->_generic_getter('_authority', $type)
130							}
131
132
133							sub userinfo {
134	6			6	1	2897	my ($self, $type) = @_;
135
136	6					17	return $self->_generic_getter('_userinfo', $type)
137							}
138
139
140							sub host {
141	39			39	1	34330	my ($self, $type) = @_;
142
143	39					129	return $self->_generic_getter('_host', $type)
144							}
145
146
147							sub hostname {
148	9			9	1	4459	my ($self, $type) = @_;
149
150	9					24	my $hostname = $self->_generic_getter('_host', $type);
151	9	50				55	$hostname =~ s/^\[(.*)\]$/$1/ if defined($hostname);
152	9					24	return $hostname
153							}
154
155
156							sub ip {
157	9			9	1	4005	my ($self, $type) = @_;
158
159	9					24	return $self->_generic_getter('_ip', $type)
160							}
161
162
163							sub ipv4 {
164	0			0	1	0	my ($self, $type) = @_;
165
166	0					0	return $self->_generic_getter('_ipv4', $type)
167							}
168
169
170							sub ipv6 {
171	9			9	1	4242	my ($self, $type) = @_;
172
173	9					23	return $self->_generic_getter('_ipv6', $type)
174							}
175
176
177							sub ipvx {
178	0			0	1	0	my ($self, $type) = @_;
179
180	0					0	return $self->_generic_getter('_ipvx', $type)
181							}
182
183
184							sub zone {
185	6			6	1	2612	my ($self, $type) = @_;
186
187	6					17	return $self->_generic_getter('_zone', $type)
188							}
189
190
191							sub port {
192	7			7	1	19	my ($self) = @_;
193
194	7					43	return $self->_generic_getter('_port')
195							}
196
197
198							sub path {
199	63			63	1	29648	my ($self, $type) = @_;
200
201	63					184	return $self->_generic_getter('_path', $type)
202							}
203
204
205							sub segments {
206	27			27	1	24411	my ($self, $type) = @_;
207
208	27					73	return $self->_generic_getter('_segments', $type)
209							}
210
211
212							sub query {
213	3			3	1	1367	my ($self, $type) = @_;
214
215	3					8	return $self->_generic_getter('_query', $type)
216							}
217
218
219							sub fragment {
220	0			0	1	0	my ($self, $type) = @_;
221
222	0					0	return $self->_generic_getter('_fragment', $type)
223							}
224
225
226							sub is_abs {
227	0			0	1	0	my ($self) = @_;
228
229	0		0			0	return defined($self->scheme) && ! defined($self->fragment)
230							}
231
232
233							sub base {
234	0			0	1	0	my ($self) = @_;
235
236	0	0				0	if ($self->is_abs) {
237	0					0	return $self
238							} else {
239							#
240							# We need the scheme
241							#
242	0	0				0	croak "Cannot derive a base URI without a scheme" unless defined $self->_scheme;
243	0					0	my $origin = $self->string('origin');
244	0					0	my $fragment = $self->fragment('origin');
245	0					0	my $quote_fragment = quotemeta($fragment);
246	0					0	$origin =~ s/#$quote_fragment$//;
247	0					0	return ref($self)->new($origin)
248							}
249							}
250
251
252							sub normalized {
253	0			0	1	0	my ($self) = @_;
254
255	0					0	return $self->string('normalized')
256							}
257
258
259							sub decoded {
260	0			0	1	0	my ($self) = @_;
261
262	0					0	return $self->string('decoded')
263							}
264
265
266							sub resolve {
267	0			0	1	0	my ($self, $base, $strict) = @_;
268
269	0		0			0	$base //= $self->base;
270	0		0			0	$strict //= 1;
271
272	0	0				0	croak "$base must be absolute" unless $base->is_abs;
273
274							#
275							# 5.2.2. Transform References
276							#
277	0					0	my %R;
278	0					0	$R{scheme} = $self->scheme('origin');
279	0					0	$R{authority} = $self->authority('origin');
280	0					0	$R{path} = $self->path('origin');
281	0					0	$R{query} = $self->query('origin');
282	0					0	$R{fragment} = $self->fragment('origin');
283
284	0					0	my %Base;
285	0					0	$Base{scheme} = $base->scheme('origin');
286	0					0	$Base{authority} = $base->authority('origin');
287	0					0	$Base{path} = $base->path('origin');
288	0					0	$Base{query} = $base->query('origin');
289	0					0	$Base{fragment} = $base->fragment('origin');
290
291	0	0	0			0	if ((! $strict) && (($R{scheme} // '') == $Base{scheme})) {
			0
292							$R{scheme} = undef
293	0					0	}
294
295	0					0	my %T;
296	0	0				0	if (defined($R{scheme})) {
297	0					0	$T{scheme} = $R{scheme};
298	0					0	$T{authority} = $R{authority};
299	0					0	$T{path} = __PACKAGE__->remove_dot_segments($R{path});
300	0					0	$T{query} = $R{query};
301							} else {
302	0	0				0	if (defined($R{authority})) {
303	0					0	$T{authority} = $R{authority};
304	0					0	$T{path} = __PACKAGE__->remove_dot_segments($R{path});
305	0					0	$T{query} = $R{query};
306							} else {
307	0	0				0	if (! length($R{path})) {
308	0					0	$T{path} = $Base{path};
309	0	0				0	if (defined(R{query})) {
	0					0
310	0					0	$T{query} = $R{query};
311							} else {
312	0					0	$T{query} = $Base{query};
313							}
314							} else {
315	0	0				0	if (substr($R{path}, 0, 1) eq '/') {
316	0					0	$T{path} = __PACKAGE__->remove_dot_segments($R{path});
317							} else {
318	0					0	$T{path} = $self->_merge_paths($base);
319	0					0	$T{path} = __PACKAGE__->remove_dot_segments($T{path});
320							}
321	0					0	$T{query} = $R{query};
322							}
323	0					0	$T{authority} = $Base{authority};
324							}
325	0					0	$T{scheme} = $Base{scheme};
326							}
327
328	0					0	$T{fragment} = $R{fragment};
329
330							#
331							# 5.3. Component Recomposition
332							#
333	0					0	my $str;
334
335	0					0	my $scheme = $T{scheme};
336	0	0				0	$str .= "$scheme:" if defined($scheme);
337
338	0					0	my $authority = $T{authority};
339	0	0				0	$str .= "//$authority" if defined($authority);
340
341	0					0	$str .= $T{path}; # Always defined as per the algorithm
342
343	0					0	my $query = $T{query};
344	0	0				0	$str .= "?$query" if defined($query);
345
346	0					0	my $fragment = $T{fragment};
347	0	0				0	$str .= "#$fragment" if defined($fragment);
348
349	0					0	return MarpaX::ESLIF::URI->new($str)
350							}
351
352
353							sub eq {
354	0			0	1	0	my ($self, $other) = @_;
355
356	0					0	eval {
357	0	0				0	$other = MarpaX::ESLIF::URI->new($other) unless $other->$_isa(__PACKAGE__);
358							#
359							# Since we already do full normalization when valuating the parse tree, we use it
360							#
361	0					0	$self->string('normalized') eq $other->string('normalized')
362							}
363							}
364
365
366							sub clone {
367	0			0	1	0	my ($self) = @_;
368
369	0					0	return ref($self)->new($self->_origin)
370							}
371
372
373							sub as_string {
374	0			0	1	0	goto &string
375							}
376
377
378							sub remove_dot_segments {
379	30			30	1	80	my ($class, $path) = @_;
380
381							# 1. The input buffer is initialized with the now-appended path
382							# components and the output buffer is initialized to the empty
383							# string.
384	30					68	my $input = $path;
385	30					58	my $output = '';
386
387							# printf "%s %-20s %-20s\n", 1, $output, $input;
388							# 2. While the input buffer is not empty, loop as follows:
389	30					104	while (length($input)) {
390
391							# A. If the input buffer begins with a prefix of "../" or "./",
392							# then remove that prefix from the input buffer; otherwise,
393	67	50				266	if (substr($input, 0, 3) eq '../') {
		50
394	0					0	substr($input, 0, 3, '');
395							# printf "%s %-20s %-20s\n", 'A', $output, $input;
396	0					0	next;
397							} elsif (substr($input, 0, 2) eq './') {
398	0					0	substr($input, 0, 2, '');
399							# printf "%s %-20s %-20s\n", 'A', $output, $input;
400	0					0	next;
401							}
402
403							# B. if the input buffer begins with a prefix of "/./" or "/.",
404							# where "." is a complete path segment, then replace that
405							# prefix with "/" in the input buffer; otherwise,
406	67	50				222	if (substr($input, 0, 3) eq '/./') {
		50
407	0					0	substr($input, 0, 3, '/');
408							# printf "%s %-20s %-20s\n", 'B', $output, $input;
409	0					0	next;
410							} elsif ($input =~ /^\/\.(?:\/\|\z)/) {
411	0					0	substr($input, 0, 2, '/');
412							# printf "%s %-20s %-20s\n", 'B', $output, $input;
413	0					0	next;
414							}
415
416							# C. if the input buffer begins with a prefix of "/../" or "/..",
417							# where ".." is a complete path segment, then replace that
418							# prefix with "/" in the input buffer and remove the last
419							# segment and its preceding "/" (if any) from the output
420							# buffer; otherwise,
421	67	50				193	if (substr($input, 0, 4) eq '/../') {
		50
422	0					0	substr($input, 0, 4, '/');
423	0					0	$output =~ s/\/?[^\/]*\z//;
424							# printf "%s %-20s %-20s\n", 'C', $output, $input;
425	0					0	next;
426							} elsif ($input =~ /^\/\.\.(?:\/\|\z)/) {
427	0					0	substr($input, 0, 3, '/');
428	0					0	$output =~ s/\/?[^\/]*\z//;
429							# printf "%s %-20s %-20s\n", 'C', $output, $input;
430	0					0	next;
431							}
432
433							# D. if the input buffer consists only of "." or "..", then remove
434							# that from the input buffer; otherwise,
435	67	50	33			289	if (($input eq '.') \|\| ($input eq '..')) {
436	0					0	$input = '';
437							# printf "%s %-20s %-20s\n", 'D', $output, $input;
438	0					0	next;
439							}
440
441							# E. move the first path segment in the input buffer to the end of
442							# the output buffer, including the initial "/" character (if
443							# any and any subsequent characters up to, but not including,
444							# the next "/" character or the end of the input buffer.
445	67					257	$input =~ s/^(\/?[^\/]*)//;
446	67					188	$output .= $1;
447							# printf "%s %-20s %-20s\n", 'E', $output, $input;
448							}
449
450							# 3. Finally, the output buffer is returned as the result of
451							# remove_dot_segments.
452	30					93	return $output
453							}
454
455							# ----------------
456							# Internal helpers
457							# ----------------
458
459							sub _generic_getter {
460	579			579		1055	my ($self, $_what, $type) = @_;
461
462	579		100			1329	$type //= 'decoded';
463	579					13375	my $value = $self->$_what;
464
465	579	100				2939	return unless defined($value);
466	502					1518	return $value->{$type}
467							}
468
469							sub _generate_actions {
470	9			9		40	my ($class, @attributes) = @_;
471							#
472							# All the attributes have an associate explicit action called _action${attribute}
473							#
474	9					22	foreach my $attribute (@attributes) {
475	88					3026	my $method = "_action$attribute";
476	88	100				599	next if $class->can($method);
477	87		33	15		6622	my $stub = eval "sub { my (\$self, \@args) = \@_; \$self->_set_$attribute(\$self->__concat(\@args)) }" \|\| croak "Failed to create action stub for attribute $attribute, $@"; ## no critic
	15					57
	15					59
	6					23
	6					22
	7					28
	7					32
	7					24
	7					28
	13					50
	13					83
	16					56
	16					64
	3					11
	3					12
	18					64
	18					63
	1					3
	1					5
	15					46
	15					51
	16					56
	16					60
	9					37
	9					47
	8					30
	8					31
	8					40
	8					32
	0					0
	0					0
	2					8
	2					9
478	87					280	fresh $method => $stub;
479							}
480							}
481
482							sub _merge_paths {
483							#
484							# In theory, this method should never be called with type != 'origin'
485							#
486	0			0		0	my ($self, $base, $type) = @_;
487	0		0			0	$type //= 'origin';
488
489							# If the base URI has a defined authority component and an empty
490							# path, then return a string consisting of "/" concatenated with the
491							# reference's path; otherwise,
492	0	0	0			0	return '/' . $self->path($type) if (defined($base->authority($type)) && ! length($base->path($type)));
493
494							# return a string consisting of the reference's path component
495							# appended to all but the last segment of the base URI's path (i.e.,
496							# excluding any characters after the right-most "/" in the base URI
497							# path, or excluding the entire base URI path if it does not contain
498							# any "/" characters).}
499	0					0	my $base_path = $base->path($type);
500	0					0	my $rindex = rindex($base_path, '/');
501	0					0	my $new_path;
502	0	0				0	if ($rindex >= 0) {
503	0	0				0	if ($rindex < (length($base_path) - 1)) {
504	5					20	$new_path = substr($base_path, 0, $rindex + 1)
505							} else {
506	5					20	$new_path = $base_path
507							}
508							} else {
509	0					0	$new_path = '';
510							}
511	0					0	return $new_path . $self->path($type)
512							}
513
514							# -------------
515							# Normalization
516							# -------------
517							around _set__scheme => sub {
518							my ($orig, $self, $value) = @_;
519
520							#
521							# Normalized scheme is case insensitive and should be lowercased
522							#
523							$value->{normalized} = lc($value->{normalized});
524							$self->$orig($value)
525							};
526
527							around _set__host => sub {
528							my ($orig, $self, $value) = @_;
529
530							#
531							# Normalized host is case insensitive and should be lowercased
532							#
533							$value->{normalized} = lc($value->{normalized});
534							$self->$orig($value)
535							};
536
537							around _set__ipv4 => sub {
538							my ($orig, $self, $value) = @_;
539
540							#
541							# IP is a host, and normalized host is case insensitive and should be lowercased
542							#
543							$value->{normalized} = lc($value->{normalized});
544							$self->$orig($value)
545							};
546
547							around _set__ipv6 => sub {
548							my ($orig, $self, $value) = @_;
549
550							#
551							# IP is a host, and normalized host is case insensitive and should be lowercased
552							#
553							$value->{normalized} = lc($value->{normalized});
554							$self->$orig($value)
555							};
556
557							around _set__ipvx => sub {
558							my ($orig, $self, $value) = @_;
559
560							#
561							# IP is a host, and normalized host is case insensitive and should be lowercased
562							#
563							$value->{normalized} = lc($value->{normalized});
564							$self->$orig($value)
565							};
566
567							around _set__zone => sub {
568							my ($orig, $self, $value) = @_;
569
570							#
571							# Zone is part of host, so a normalized zone is case insensitive and should be lowercased
572							#
573							$value->{normalized} = lc($value->{normalized});
574							$self->$orig($value)
575							};
576
577							around _set__path => sub {
578							my ($orig, $self, $value) = @_;
579							#
580							# Normalized path is done by removing dot segments
581							#
582							$value->{normalized} = __PACKAGE__->remove_dot_segments($value->{normalized});
583							$self->$orig($value)
584							};
585
586							# ------------------------
587							# Grammar Internal helpers
588							# ------------------------
589							#
590							# This _pct_encoded method guarantees that the output is a sequence of ASCII characters
591							# even if the UTF-8 flag would be set. For instance sequence %ce%a3 will be
592							# seen as "\x{ce}\x{a3}" in the resulting string, and NOT "\x{cea3}".
593							#
594							sub __pct_encoded {
595	13			8		42	my ($self, undef, $hex1, $hex2) = @_;
596							#
597							# Note that here $hex are terminals, so in fact hex's origin == decoded == normalized
598							#
599	13					42	my $origin = join('', '%', $hex1->{origin}, $hex2->{origin});
600	13					48	my $decoded = chr(hex(join('', $hex1->{decoded}, $hex2->{decoded})));
601							#
602							# Normalization is decoding any percent-encoded octet that corresponds
603							# to an unreserved character, as described in Section 2.3:
604							# unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
605							# else it should be normalized to uppercase.
606							#
607	13	50				54	my $normalized = ($decoded =~ /[A-Za-z0-9\-._~]/) ? $decoded : uc($origin);
608	8					98	return { origin => $origin, decoded => $decoded, normalized => $normalized }
609							}
610							#
611							# Special for zone
612							#
613							sub __encoded_percent_character {
614							#
615							# '%' decoded character is not an unreserved character, so the
616							# normalized form remains %25
617							#
618	1			1		13	return { origin => '%25', decoded => '%', normalized => '%25'}
619							}
620							sub __not_encoded_percent_character {
621							#
622							# Same as __encoded_percent_character(), except that origin is '%' character
623							#
624	1			1		13	return { origin => '%', decoded => '%', normalized => '%25'}
625							}
626							#
627							# Pushes segments in a _segment[] array
628							#
629							sub __segment {
630	42			42		97	my ($self, @args) = @_;
631
632	42					89	my $concat = $self->__concat(@args);
633	42					72	push(@{$self->_segments->{origin}}, $concat->{origin});
	42					726
634	42					313	push(@{$self->_segments->{decoded}}, $concat->{decoded});
	42					653
635	42					243	push(@{$self->_segments->{normalized}}, $concat->{normalized});
	42					579
636	42					547	return $concat
637							}
638							#
639							# Exactly the same as ESLIF's ::concat built-in, but revisited
640							# to work on original, decoded and normalized strings at the same time
641							#
642							sub __concat {
643	2014			2014		3205	my ($self, @args) = @_;
644
645	2014					5257	my %rc = ( origin => '', decoded => '', normalized => '' );
646	2014					3391	foreach my $arg (@args) {
647	2824	100				4935	next unless ref($arg);
648	2821		50			5812	$rc{origin} .= $arg->{origin} // '';
649	2821		50			4803	$rc{decoded} .= $arg->{decoded} // '';
650	2821		50			5099	$rc{normalized} .= $arg->{normalized} // '';
651							}
652	2014					17348	return \%rc
653							}
654							#
655							# Exactly the same as ESLIF's ::transfer built-in, but revisited
656							# to work on original and decoded strings at the same time
657							#
658							sub __symbol {
659	829			829		1293	my ($self, $symbol) = @_;
660							#
661							# No normalization on symbol until we know the context
662							#
663	829					7188	return { origin => $symbol, decoded => $symbol, normalized => $symbol }
664							}
665
666
667							1;
668
669							=pod
670
671							=encoding UTF-8
672
673							=head1 NAME
674
675							MarpaX::ESLIF::URI::_generic - URI Generic syntax as per RFC3986/RFC6874
676
677							=head1 VERSION
678
679							version 0.006
680
681							=head1 SUBROUTINES/METHODS
682
683							=head2 $class->new($uri)
684
685							Instantiate a new object, or croak on failure. Takes as parameter an URI that will be parsed. The object instance is noted C<$self> below.
686
687							=for Pod::Coverage BUILDARGS
688
689							=for Pod::Coverage BUILD
690
691							=head2 $class->bnf
692
693							Returns the BNF used to parse the input.
694
695							=head2 $class->eslif
696
697							Returns a MarpaX::ESLIF singleton.
698
699							=head2 $class->grammar
700
701							Returns the compiled BNF used to parse the input as MarpaX::ESLIF::Grammar singleton.
702
703							=head2 $self->string($type)
704
705							Returns the string version of the URI, C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
706
707							=head2 $self->scheme($type)
708
709							Returns the scheme, or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
710
711							=head2 $self->authority($type)
712
713							Returns the authority, or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
714
715							=head2 $self->userinfo($type)
716
717							Returns the userinfo, or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
718
719							=head2 $self->host($type)
720
721							Returns the host (which may contain C<[]> delimiters in case of IPv6 literal), or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
722
723							=head2 $self->hostname($type)
724
725							Returns the hostname (without eventual C<[]> delimiters), or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
726
727							=head2 $self->ip($type)
728
729							Returns the IP when host is such a literal, or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
730
731							Note that this is the full concatenation of what makes an IP, in particular you will get the eventual IPv6 Zone Id if there is one.
732
733							=head2 $self->ipv4($type)
734
735							Returns the IPv4 when host is such a literal, or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
736
737							=head2 $self->ipv6($type)
738
739							Returns the IPv6 when host is such a literal, or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
740
741							=head2 $self->ipvx($type)
742
743							Returns the decoded IPvI<future> (as per the spec) when host is such a literal, or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
744
745							=head2 $self->zone($type)
746
747							Returns the IPv6 Zone Id, or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
748
749							=head2 $self->port
750
751							Returns the port, or undef.
752
753							=head2 $self->path($type)
754
755							Returns the path, or the empty string. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
756
757							=head2 $self->segments($type)
758
759							Returns the path segments as an array reference, which may be empty. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
760
761							=head2 $self->query($type)
762
763							Returns the query, or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
764
765							=head2 $self->fragment($type)
766
767							Returns the fragment, or undef. C<$type> is either 'decoded' (default value), 'origin' or 'normalized'.
768
769							=head2 $self->is_abs
770
771							Returns a true value if the URI is absolute.
772
773							=head2 $self->base
774
775							Returns a instance that is the absolute version of C<$self> if possible, or croak on failure.
776
777							When C<$self> is absolute, C<$self> itself is returned, otherwise it must have a scheme and a new instance without the origin fragment is returned.
778
779							=head2 $self->normalized
780
781							Returns the normalized string of C<$self>.
782
783							Equivalent to C<< $self->string('normalized') >>.
784
785							=head2 $self->decoded
786
787							Returns the decoded string of C<$self>.
788
789							Equivalent to C<< $self->string('decoded') >>.
790
791							=head2 $self->resolve($base, $strict)
792
793							Returns a instance that converts C<$self> into C<$base> URI, or croak on failure.
794
795							Default base is C<< $self->base >>.
796
797							If C<$strict> is a true value, C<$self> is always considered relative to C<$base>, otherwise a new URI without C<$self>'s dot segments is returned when C<$self> has a scheme. Default is a true value.
798
799							=head2 $self->eq($other)
800
801							Returns a instance that is the absolute version of current instance if possible, or croak on failure.
802
803							=head2 $self->clone
804
805							Returns a clone of current instance.
806
807							=head2 $self->as_string
808
809							Alias to C<string> method.
810
811							=head2 $class->remove_dot_segments($path)
812
813							Implementation of L<RFC3896's remove_dot_segments\|https://tools.ietf.org/html/rfc3986#section-5.2.4>.
814
815							=head1 NOTES
816
817							=over
818
819							=item Logging
820
821							This package is L<Log::Any> aware, and will use the later in case parsing fails to output error messages.
822
823							=back
824
825							=head1 SEE ALSO
826
827							L<MarpaX::ESLIF::URI>, L<RFC3986\|https://tools.ietf.org/html/rfc3986>, L<RFC6874\|https://tools.ietf.org/html/rfc6874>
828
829							=head1 AUTHOR
830
831							Jean-Damien Durand <jeandamiendurand@free.fr>
832
833							=head1 COPYRIGHT AND LICENSE
834
835							This software is copyright (c) 2017 by Jean-Damien Durand.
836
837							This is free software; you can redistribute it and/or modify it under
838							the same terms as the Perl 5 programming language system itself.
839
840							=cut
841
842							__DATA__
843							#
844							# We maintain two string version in parallel when valuating the parse tree:
845							# - original
846							# - decoded
847							:default ::= action => __concat
848							symbol-action => __symbol
849
850							# :start ::= <URI reference>
851							<URI reference> ::= <URI> action => _action_string
852							\| <relative ref> action => _action_string
853							#
854							# Reference: https://tools.ietf.org/html/rfc3986#appendix-A
855							# Reference: https://tools.ietf.org/html/rfc6874
856							#
857							<URI> ::= <scheme> ":" <hier part> <URI query> <URI fragment>
858							<URI query> ::= "?" <query>
859							<URI query> ::=
860							<URI fragment> ::= "#" <fragment>
861							<URI fragment> ::=
862
863							<hier part> ::= "//" <authority> <path abempty>
864							\| <path absolute>
865							\| <path rootless>
866							\| <path empty>
867
868
869							<absolute URI> ::= <scheme> ":" <hier part> <URI query>
870
871							<relative ref> ::= <relative part> <URI query> <URI fragment>
872
873							<relative part> ::= "//" <authority> <path abempty>
874							\| <path absolute>
875							\| <path noscheme>
876							\| <path empty>
877
878							<scheme> ::= <scheme value> action => _action_scheme
879							<scheme value> ::= <ALPHA> <scheme trailer>
880							<scheme trailer unit> ::= <ALPHA> \| <DIGIT> \| "+" \| "-" \| "."
881							<scheme trailer> ::= <scheme trailer unit>*
882
883							<authority userinfo> ::= <userinfo> "@"
884							<authority userinfo> ::=
885							<authority port> ::= ":" <port>
886							<authority port> ::=
887							<authority> ::= <authority value> action => _action_authority
888							<authority value> ::= <authority userinfo> <host> <authority port>
889							<userinfo unit> ::= <unreserved> \| <pct encoded> \| <sub delims> \| ":"
890							<userinfo> ::= <userinfo value> action => _action_userinfo
891							<userinfo value> ::= <userinfo unit>*
892							#
893							# The syntax rule for host is ambiguous because it does not completely
894							# distinguish between an IPv4address and a reg-name. In order to
895							# disambiguate the syntax, we apply the "first-match-wins" algorithm:
896							# If host matches the rule for IPv4address, then it should be
897							# considered an IPv4 address literal and not a reg-name.
898							#
899							<host> ::= <IP literal> rank => 0 action => _action_host
900							\| <IPv4address> rank => -1 action => _action_host
901							\| <reg name> rank => -2 action => _action_host
902							<port> ::= <port value> action => _action_port
903							<port value> ::= <DIGIT>*
904
905							<IP literal interior> ::= <IPv6address> action => _action_ip
906							\| <IPv6addrz> action => _action_ip
907							\| <IPvFuture> action => _action_ip
908							<IP literal> ::= "[" <IP literal interior> "]"
909							<ZoneID interior> ::= <unreserved> \| <pct encoded>
910							<ZoneID> ::= <ZoneID interior>+ action => _action_zone
911							<IPv6addrz percent char> ::= "%25" action => __encoded_percent_character
912							#
913							# From https://tools.ietf.org/html/rfc6874#section-3:
914							#
915							# "we also suggest that URI parsers accept bare "%" signs when possible"
916							#
917							<IPv6addrz percent char> ::= "%" action => __not_encoded_percent_character
918							<IPv6addrz> ::= <IPv6address> <IPv6addrz percent char> <ZoneID>
919
920							<IPvFuture> ::= "v" <HEXDIG many> "." <IPvFuture trailer> action => _action_ipvx
921							<IPvFuture trailer unit> ::= <unreserved> \| <sub delims> \| ":"
922							<IPvFuture trailer> ::= <IPvFuture trailer unit>+
923
924							<IPv6address> ::= <6 h16 colon> <ls32> action => _action_ipv6
925							\| "::" <5 h16 colon> <ls32> action => _action_ipv6
926							\| <h16> "::" <4 h16 colon> <ls32> action => _action_ipv6
927							\| "::" <4 h16 colon> <ls32> action => _action_ipv6
928							\| <0 to 1 h16 colon> <h16> "::" <3 h16 colon> <ls32> action => _action_ipv6
929							\| "::" <3 h16 colon> <ls32> action => _action_ipv6
930							\| <0 to 2 h16 colon> <h16> "::" <2 h16 colon> <ls32> action => _action_ipv6
931							\| "::" <2 h16 colon> <ls32> action => _action_ipv6
932							\| <0 to 3 h16 colon> <h16> "::" <1 h16 colon> <ls32> action => _action_ipv6
933							\| "::" <1 h16 colon> <ls32> action => _action_ipv6
934							\| <0 to 4 h16 colon> <h16> "::" <ls32> action => _action_ipv6
935							\| "::" <ls32> action => _action_ipv6
936							\| <0 to 5 h16 colon> <h16> "::" <h16> action => _action_ipv6
937							\| "::" <h16> action => _action_ipv6
938							\| <0 to 6 h16 colon> <h16> "::" action => _action_ipv6
939							\| "::" action => _action_ipv6
940
941							<1 h16 colon> ::= <h16> ":"
942							<2 h16 colon> ::= <h16> ":" <h16> ":"
943							<3 h16 colon> ::= <h16> ":" <h16> ":" <h16> ":"
944							<4 h16 colon> ::= <h16> ":" <h16> ":" <h16> ":" <h16> ":"
945							<5 h16 colon> ::= <h16> ":" <h16> ":" <h16> ":" <h16> ":" <h16> ":"
946							<6 h16 colon> ::= <h16> ":" <h16> ":" <h16> ":" <h16> ":" <h16> ":" <h16> ":"
947
948							#
949							# These productions are ambiguous without ranking (rank is equivalent to make regexps greedy)
950							#
951							<0 to 1 h16 colon> ::=
952							<0 to 1 h16 colon> ::= <1 h16 colon> rank => 1
953							<0 to 2 h16 colon> ::= <0 to 1 h16 colon>
954							<0 to 2 h16 colon> ::= <0 to 1 h16 colon> <1 h16 colon> rank => 1
955							<0 to 3 h16 colon> ::= <0 to 2 h16 colon>
956							<0 to 3 h16 colon> ::= <0 to 2 h16 colon> <1 h16 colon> rank => 1
957							<0 to 4 h16 colon> ::= <0 to 3 h16 colon>
958							<0 to 4 h16 colon> ::= <0 to 3 h16 colon> <1 h16 colon> rank => 1
959							<0 to 5 h16 colon> ::= <0 to 4 h16 colon>
960							<0 to 5 h16 colon> ::= <0 to 4 h16 colon> <1 h16 colon> rank => 1
961							<0 to 6 h16 colon> ::= <0 to 5 h16 colon>
962							<0 to 6 h16 colon> ::= <0 to 5 h16 colon> <1 h16 colon> rank => 1
963
964							<h16> ::= <HEXDIG>
965							\| <HEXDIG> <HEXDIG>
966							\| <HEXDIG> <HEXDIG> <HEXDIG>
967							\| <HEXDIG> <HEXDIG> <HEXDIG> <HEXDIG>
968
969							<ls32> ::= <h16> ":" <h16> \| <IPv4address>
970							<IPv4address> ::= <dec octet> "." <dec octet> "." <dec octet> "." <dec octet> action => _action_ipv4
971
972							<dec octet> ::= <DIGIT> # 0-9
973							\| [\x{31}-\x{39}] <DIGIT> # 10-99
974							\| "1" <DIGIT> <DIGIT> # 100-199
975							\| "2" [\x{30}-\x{34}] <DIGIT> # 200-249
976							\| "25" [\x{30}-\x{35}] # 250-255
977
978							<reg name unit> ::= <unreserved> \| <pct encoded> \| <sub delims>
979							<reg name> ::= <reg name unit>*
980
981							<path> ::= <path abempty> # begins with "/" or is empty
982							\| <path absolute> # begins with "/" but not "//"
983							\| <path noscheme> # begins with a non-colon segment
984							\| <path rootless> # begins with a segment
985							\| <path empty> # zero characters
986
987							<path abempty unit> ::= "/" <segment>
988							<path abempty> ::= <path abempty value> action => _action_path
989							<path abempty value> ::= <path abempty unit>*
990							<path absolute> ::= <path absolute value> action => _action_path
991							<path absolute value> ::= "/"
992							\| "/" <segment nz> <path abempty value>
993							<path noscheme> ::= <path noscheme value> action => _action_path
994							<path noscheme value> ::= <segment nz nc> <path abempty value>
995							<path rootless> ::= <path rootless value> action => _action_path
996							<path rootless value> ::= <segment nz> <path abempty value>
997							<path empty> ::= # Default value for path is ''
998
999							<segment> ::= <pchar>* action => __segment
1000							<segment nz> ::= <pchar>+ action => __segment
1001							<segment nz nc unit> ::= <unreserved> \| <pct encoded> \| <sub delims> \| "@" # non-zero-length segment without any colon ":"
1002							<segment nz nc> ::= <segment nz nc unit>+ action => __segment
1003
1004							<pchar> ::= <unreserved> \| <pct encoded> \| <sub delims> \| ":" \| "@"
1005
1006							<query unit> ::= <pchar> \| "/" \| "?"
1007							<query> ::= <query value> action => _action_query
1008							<query value> ::= <query unit>*
1009
1010							<fragment unit> ::= <pchar> \| "/" \| "?"
1011							<fragment> ::= <fragment value> action => _action_fragment
1012							<fragment value> ::= <fragment unit>*
1013
1014							<pct encoded> ::= "%" <HEXDIG> <HEXDIG> action => __pct_encoded
1015
1016							<unreserved> ::= <ALPHA> \| <DIGIT> \| "-" \| "." \| "_" \| "~"
1017							<reserved> ::= <gen delims> \| <sub delims>
1018							<gen delims> ::= ":" \| "/" \| "?" \| "#" \| "[" \| "]" \| "@"
1019							<sub delims> ::= "!" \| "$" \| "&" \| "'" \| "(" \| ")"
1020							\| "*" \| "+" \| "," \| ";" \| "="
1021
1022							<HEXDIG many> ::= <HEXDIG>+
1023							<ALPHA> ::= [A-Za-z]
1024							<DIGIT> ::= [0-9]
1025							<HEXDIG> ::= [0-9A-Fa-f] # case insensitive