File Coverage

blib/lib/XRI/Parse.pm

Criterion	Covered	Total	%
statement	84	84	100.0
branch	31	32	96.8
condition	6	6	100.0
subroutine	9	9	100.0
pod	0	7	0.0
total	130	138	94.2

line	stmt	bran	cond	sub	pod	time	code
1							# Copyright (C) 2004 Identity Commons. All Rights Reserved.
2							# See LICENSE for licensing details
3
4							# Author: Fen Labalme ,
5							# with a tip-of-the-cap to parse.py written by Gabe Wachob
6
7							# TODO:
8							# fix FIXME sections
9							# add UNICODE support
10
11							package XRI::Parse;
12
13							our $VERSION = 0.1;
14
15	6			6		100127	use Text::Balanced qw( extract_bracketed );
	6					166321
	6					709
16	6			6		5686	use URI::Escape;
	6					9437
	6					7189
17
18							our @SEPARATORS = qw( / * : );
19							our @GCS_CHARS = qw( @ = + $ * );
20
21
22							sub new {
23	23			23	0	22568	my $self = shift;
24	23					35	my $xri = shift;
25	23					77	$xri =~ s/^xri://i;
26							# $xri = stripComments( $xri );
27	23					108	my $this = { token=>undef,
28							remainder=>undef,
29							authority=>undef,
30							xri=>$xri };
31	23					85	bless $this, $self;
32							}
33
34
35							# escapes an XRI (including relative XRIs) for inclusion in an HTTP request
36							# FIXME: currently handles xrefs identically to sub-segments
37							#
38							sub escapeURI {
39	6			6	0	19	my $this = shift;
40	6					7	my $result;
41	6					16	while (my $seg = $this->nextSegment) {
42	8					9	$result = shift @$seg; # always one of qw( @ // /. /: )
43	8					14	foreach my $subseg ( @$seg ) {
44	15	100				471	if ($subseg =~ m\|^\(\|) { # xref
45	1					4	$result .= uri_escape($subseg, "^A-Za-z0-9\\\-\_\.\!\~\*\'");
46							}
47							else { # sub-segment
48	14					36	$result .= uri_escape($subseg, "^A-Za-z0-9\\\-\_\.\!\~\*\'");
49							}
50							}
51							}
52	6					20	return $result;
53							}
54
55							# if an absolute-xri, emit the array ref [ [ firstSegment ], local-path ]
56							# if a relative-xri, emit the local-path or relative-path as a string
57							#
58							sub splitAuthLocal {
59	3			3	0	5	my $this = shift;
60	3					7	my $firstRef = $this->nextSegment;
61
62	3	100				8	if ( defined $this->{'authority'} ) {
63	2					3	my @auth = ();
64							#
65							# lowercase the authority segments
66							#
67	2					5	foreach my $seg (@$firstRef) {
68	5					13	push @auth, lc $seg;
69							}
70	2					11	return [ \@auth, $this->{remainder} ];
71							}
72							else {
73	1					6	return $this->{xri};
74							}
75							}
76
77							# Emits a series of segments, each of which is a
78							# list of (separator, part, separator...) tuples
79							# Segments are separated by forward slash '/'
80							# Emits (gcs-char, part, separator, part...)
81							# for the first segment if using a gcs-char
82							# Separator is one of "/.", "/:", ".", or ":"
83							#
84							sub nextSegment {
85	31			31	0	302	my $this = shift;
86	31					33	my ( $token, @segment );
87
88	31	100				90	if (defined $this->{token}) {
89	8					16	@segment = ( $this->{token} );
90	8					13	undef $this->{token};
91							}
92							else {
93	23	100				51	if ( $token = $this->nextToken ) {
94	13					36	@segment = ( $token );
95							}
96							else {
97	10					36	return undef;
98							}
99							}
100	21		100			48	while (( $token = $this->nextToken ) && $token !~ m\|^\/\| ) {
101	45					103	push @segment, $token;
102							}
103	21	100				48	$this->{token} = $token if $token;
104	21					57	return \@segment;
105							}
106
107							sub getCrossReference {
108	169			169	0	169	my $this = shift;
109	169					179	my $xri = shift;
110
111							# FIXME: what to do if: 'xri:(!comment1).(!comment2)' -- (is this legal?)
112							# FIXME: raise error if unbalanced parens
113	169					402	while (($this->{remainder} = $xri) =~ m\|^\(\|) { # cross-reference
114	8					12	my $xref;
115	8					37	($xref, $xri) = extract_bracketed($xri, '()');
116	8	100				1313	next if $xref =~ m\|^\(\!\|; # skip leading comments
117	6					13	$this->{remainder} = $xri;
118	6					30	return $xref;
119							}
120	163					353	return undef;
121							}
122
123							# return initial qw( @ = * // ) or undef
124							# created to better strip leading comments
125							# perhaps comment stripping should occur on object instantiation?
126							#
127							sub getAuthority {
128	23			23	0	32	my $this = shift;
129	23					32	my $xri = $this->{xri};
130	23					24	my $xref;
131
132	23	100				46	if ( $xref = $this->getCrossReference( $xri )) {
133	2					5	$this->{'authority'} = $xref;
134	2					10	return $xref;
135							}
136	21	100				126	if ($this->{remainder} =~ m\|^\/\/(.*)$\|) { # initial '//'
137	9					13	$this->{'authority'} = '//';
138	9					27	$this->{remainder} = $1;
139	9					45	return '//';
140							}
141	12	100				51	if ($this->{remainder} =~ m\|^([\@\=\])(.)$\|) { # gcs-char
142	4					11	my ($gcs, $rem) = ($1, $2);
143	4	50				17	$this->{remainder} = (($rem =~ m\|^[\/\\:]\|)?'':'') . $rem;
144	4					30	$this->{'authority'} = $gcs;
145	4					19	return $gcs;
146							}
147	8					15	$this->{remainder} = $xri;
148	8					34	return;
149							}
150
151
152							# Generates a list of (separator, string) pairs
153							# Ignores the leading xri:
154							# If the first two characters (ignoring the xri:) are //, returns this once as the
155							# first token, as the // is only legal at the very beginning
156							# Everything within () is treated as a single token
157							# Yields a series of strings, one of the characters in SEPARATORS, or
158							# a string of characters (a sub-segment)
159							# FIXME: fix handling of '*'
160							# FIXME: add handling of '&'
161							# FIXME: strip comments: including multiple, before or after GCS
162							#
163							sub nextToken {
164	161			161	0	371	my $this = shift;
165	161					144	my $auth;
166
167	161	100	100			415	if (!defined $this->{remainder} && ($auth = $this->getAuthority)) {
168	15					42	return $auth;
169							}
170	146	100				312	return $xref if $xref = $this->getCrossReference( $this->{remainder} );
171
172	142	100				452	if ($this->{remainder} =~ m\|^([\/\\:])(.)$\|) { # initial separators
173	57					131	my ($sep, $rem) = ($1, $2);
174	57	100				119	if ($sep eq '/') {
175	30	100				73	if ($rem =~ m\|^([\\:])(.)$\|) {
176	1					2	$sep .= $1; # '/.' or '/:'
177	1					3	$rem = $2;
178							}
179							else {
180	29					41	$sep = '/*';
181							}
182							}
183	57					82	$this->{remainder} = $rem;
184	57					257	return $sep;
185							}
186	85	100				785	if ($this->{remainder} =~ m\|^([^\/\\:]+)(.)$\|) { # sub-segment
187	55					103	$this->{remainder} = $2;
188	55					237	return $1;
189							}
190	30					76	return undef;
191							}
192
193							1;
194							__END__