line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# Copyright (C) 2004 Identity Commons. All Rights Reserved. |
2
|
|
|
|
|
|
|
# See LICENSE for licensing details |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
# Author: Fen Labalme , |
5
|
|
|
|
|
|
|
# with a tip-of-the-cap to parse.py written by Gabe Wachob |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
# TODO: |
8
|
|
|
|
|
|
|
# fix FIXME sections |
9
|
|
|
|
|
|
|
# add UNICODE support |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
package XRI::Parse; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
our $VERSION = 0.1; |
14
|
|
|
|
|
|
|
|
15
|
6
|
|
|
6
|
|
100127
|
use Text::Balanced qw( extract_bracketed ); |
|
6
|
|
|
|
|
166321
|
|
|
6
|
|
|
|
|
709
|
|
16
|
6
|
|
|
6
|
|
5686
|
use URI::Escape; |
|
6
|
|
|
|
|
9437
|
|
|
6
|
|
|
|
|
7189
|
|
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
our @SEPARATORS = qw( / * : ); |
19
|
|
|
|
|
|
|
our @GCS_CHARS = qw( @ = + $ * ); |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
sub new { |
23
|
23
|
|
|
23
|
0
|
22568
|
my $self = shift; |
24
|
23
|
|
|
|
|
35
|
my $xri = shift; |
25
|
23
|
|
|
|
|
77
|
$xri =~ s/^xri://i; |
26
|
|
|
|
|
|
|
# $xri = stripComments( $xri ); |
27
|
23
|
|
|
|
|
108
|
my $this = { token=>undef, |
28
|
|
|
|
|
|
|
remainder=>undef, |
29
|
|
|
|
|
|
|
authority=>undef, |
30
|
|
|
|
|
|
|
xri=>$xri }; |
31
|
23
|
|
|
|
|
85
|
bless $this, $self; |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# escapes an XRI (including relative XRIs) for inclusion in an HTTP request |
36
|
|
|
|
|
|
|
# FIXME: currently handles xrefs identically to sub-segments |
37
|
|
|
|
|
|
|
# |
38
|
|
|
|
|
|
|
sub escapeURI { |
39
|
6
|
|
|
6
|
0
|
19
|
my $this = shift; |
40
|
6
|
|
|
|
|
7
|
my $result; |
41
|
6
|
|
|
|
|
16
|
while (my $seg = $this->nextSegment) { |
42
|
8
|
|
|
|
|
9
|
$result = shift @$seg; # always one of qw( @ // /. /: ) |
43
|
8
|
|
|
|
|
14
|
foreach my $subseg ( @$seg ) { |
44
|
15
|
100
|
|
|
|
471
|
if ($subseg =~ m|^\(|) { # xref |
45
|
1
|
|
|
|
|
4
|
$result .= uri_escape($subseg, "^A-Za-z0-9\\\-\_\.\!\~\*\'"); |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
else { # sub-segment |
48
|
14
|
|
|
|
|
36
|
$result .= uri_escape($subseg, "^A-Za-z0-9\\\-\_\.\!\~\*\'"); |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
} |
52
|
6
|
|
|
|
|
20
|
return $result; |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# if an absolute-xri, emit the array ref [ [ firstSegment ], local-path ] |
56
|
|
|
|
|
|
|
# if a relative-xri, emit the local-path or relative-path as a string |
57
|
|
|
|
|
|
|
# |
58
|
|
|
|
|
|
|
sub splitAuthLocal { |
59
|
3
|
|
|
3
|
0
|
5
|
my $this = shift; |
60
|
3
|
|
|
|
|
7
|
my $firstRef = $this->nextSegment; |
61
|
|
|
|
|
|
|
|
62
|
3
|
100
|
|
|
|
8
|
if ( defined $this->{'authority'} ) { |
63
|
2
|
|
|
|
|
3
|
my @auth = (); |
64
|
|
|
|
|
|
|
# |
65
|
|
|
|
|
|
|
# lowercase the authority segments |
66
|
|
|
|
|
|
|
# |
67
|
2
|
|
|
|
|
5
|
foreach my $seg (@$firstRef) { |
68
|
5
|
|
|
|
|
13
|
push @auth, lc $seg; |
69
|
|
|
|
|
|
|
} |
70
|
2
|
|
|
|
|
11
|
return [ \@auth, $this->{remainder} ]; |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
else { |
73
|
1
|
|
|
|
|
6
|
return $this->{xri}; |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# Emits a series of segments, each of which is a |
78
|
|
|
|
|
|
|
# list of (separator, part, separator...) tuples |
79
|
|
|
|
|
|
|
# Segments are separated by forward slash '/' |
80
|
|
|
|
|
|
|
# Emits (gcs-char, part, separator, part...) |
81
|
|
|
|
|
|
|
# for the first segment if using a gcs-char |
82
|
|
|
|
|
|
|
# Separator is one of "/.", "/:", ".", or ":" |
83
|
|
|
|
|
|
|
# |
84
|
|
|
|
|
|
|
sub nextSegment { |
85
|
31
|
|
|
31
|
0
|
302
|
my $this = shift; |
86
|
31
|
|
|
|
|
33
|
my ( $token, @segment ); |
87
|
|
|
|
|
|
|
|
88
|
31
|
100
|
|
|
|
90
|
if (defined $this->{token}) { |
89
|
8
|
|
|
|
|
16
|
@segment = ( $this->{token} ); |
90
|
8
|
|
|
|
|
13
|
undef $this->{token}; |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
else { |
93
|
23
|
100
|
|
|
|
51
|
if ( $token = $this->nextToken ) { |
94
|
13
|
|
|
|
|
36
|
@segment = ( $token ); |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
else { |
97
|
10
|
|
|
|
|
36
|
return undef; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
} |
100
|
21
|
|
100
|
|
|
48
|
while (( $token = $this->nextToken ) && $token !~ m|^\/| ) { |
101
|
45
|
|
|
|
|
103
|
push @segment, $token; |
102
|
|
|
|
|
|
|
} |
103
|
21
|
100
|
|
|
|
48
|
$this->{token} = $token if $token; |
104
|
21
|
|
|
|
|
57
|
return \@segment; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
sub getCrossReference { |
108
|
169
|
|
|
169
|
0
|
169
|
my $this = shift; |
109
|
169
|
|
|
|
|
179
|
my $xri = shift; |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# FIXME: what to do if: 'xri:(!comment1).(!comment2)' -- (is this legal?) |
112
|
|
|
|
|
|
|
# FIXME: raise error if unbalanced parens |
113
|
169
|
|
|
|
|
402
|
while (($this->{remainder} = $xri) =~ m|^\(|) { # cross-reference |
114
|
8
|
|
|
|
|
12
|
my $xref; |
115
|
8
|
|
|
|
|
37
|
($xref, $xri) = extract_bracketed($xri, '()'); |
116
|
8
|
100
|
|
|
|
1313
|
next if $xref =~ m|^\(\!|; # skip leading comments |
117
|
6
|
|
|
|
|
13
|
$this->{remainder} = $xri; |
118
|
6
|
|
|
|
|
30
|
return $xref; |
119
|
|
|
|
|
|
|
} |
120
|
163
|
|
|
|
|
353
|
return undef; |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
# return initial qw( @ = * // ) or undef |
124
|
|
|
|
|
|
|
# created to better strip leading comments |
125
|
|
|
|
|
|
|
# perhaps comment stripping should occur on object instantiation? |
126
|
|
|
|
|
|
|
# |
127
|
|
|
|
|
|
|
sub getAuthority { |
128
|
23
|
|
|
23
|
0
|
32
|
my $this = shift; |
129
|
23
|
|
|
|
|
32
|
my $xri = $this->{xri}; |
130
|
23
|
|
|
|
|
24
|
my $xref; |
131
|
|
|
|
|
|
|
|
132
|
23
|
100
|
|
|
|
46
|
if ( $xref = $this->getCrossReference( $xri )) { |
133
|
2
|
|
|
|
|
5
|
$this->{'authority'} = $xref; |
134
|
2
|
|
|
|
|
10
|
return $xref; |
135
|
|
|
|
|
|
|
} |
136
|
21
|
100
|
|
|
|
126
|
if ($this->{remainder} =~ m|^\/\/(.*)$|) { # initial '//' |
137
|
9
|
|
|
|
|
13
|
$this->{'authority'} = '//'; |
138
|
9
|
|
|
|
|
27
|
$this->{remainder} = $1; |
139
|
9
|
|
|
|
|
45
|
return '//'; |
140
|
|
|
|
|
|
|
} |
141
|
12
|
100
|
|
|
|
51
|
if ($this->{remainder} =~ m|^([\@\=\*])(.*)$|) { # gcs-char |
142
|
4
|
|
|
|
|
11
|
my ($gcs, $rem) = ($1, $2); |
143
|
4
|
50
|
|
|
|
17
|
$this->{remainder} = (($rem =~ m|^[\/\*\:]|)?'':'*') . $rem; |
144
|
4
|
|
|
|
|
30
|
$this->{'authority'} = $gcs; |
145
|
4
|
|
|
|
|
19
|
return $gcs; |
146
|
|
|
|
|
|
|
} |
147
|
8
|
|
|
|
|
15
|
$this->{remainder} = $xri; |
148
|
8
|
|
|
|
|
34
|
return; |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
# Generates a list of (separator, string) pairs |
153
|
|
|
|
|
|
|
# Ignores the leading xri: |
154
|
|
|
|
|
|
|
# If the first two characters (ignoring the xri:) are //, returns this *once* as the |
155
|
|
|
|
|
|
|
# first token, as the // is only legal at the very beginning |
156
|
|
|
|
|
|
|
# Everything within () is treated as a single token |
157
|
|
|
|
|
|
|
# Yields a series of strings, one of the characters in SEPARATORS, or |
158
|
|
|
|
|
|
|
# a string of characters (a sub-segment) |
159
|
|
|
|
|
|
|
# FIXME: fix handling of '*' |
160
|
|
|
|
|
|
|
# FIXME: add handling of '&' |
161
|
|
|
|
|
|
|
# FIXME: strip comments: including multiple, before or after GCS |
162
|
|
|
|
|
|
|
# |
163
|
|
|
|
|
|
|
sub nextToken { |
164
|
161
|
|
|
161
|
0
|
371
|
my $this = shift; |
165
|
161
|
|
|
|
|
144
|
my $auth; |
166
|
|
|
|
|
|
|
|
167
|
161
|
100
|
100
|
|
|
415
|
if (!defined $this->{remainder} && ($auth = $this->getAuthority)) { |
168
|
15
|
|
|
|
|
42
|
return $auth; |
169
|
|
|
|
|
|
|
} |
170
|
146
|
100
|
|
|
|
312
|
return $xref if $xref = $this->getCrossReference( $this->{remainder} ); |
171
|
|
|
|
|
|
|
|
172
|
142
|
100
|
|
|
|
452
|
if ($this->{remainder} =~ m|^([\/\*\:])(.*)$|) { # initial separators |
173
|
57
|
|
|
|
|
131
|
my ($sep, $rem) = ($1, $2); |
174
|
57
|
100
|
|
|
|
119
|
if ($sep eq '/') { |
175
|
30
|
100
|
|
|
|
73
|
if ($rem =~ m|^([\*\:])(.*)$|) { |
176
|
1
|
|
|
|
|
2
|
$sep .= $1; # '/.' or '/:' |
177
|
1
|
|
|
|
|
3
|
$rem = $2; |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
else { |
180
|
29
|
|
|
|
|
41
|
$sep = '/*'; |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
} |
183
|
57
|
|
|
|
|
82
|
$this->{remainder} = $rem; |
184
|
57
|
|
|
|
|
257
|
return $sep; |
185
|
|
|
|
|
|
|
} |
186
|
85
|
100
|
|
|
|
785
|
if ($this->{remainder} =~ m|^([^\/\*\:]+)(.*)$|) { # sub-segment |
187
|
55
|
|
|
|
|
103
|
$this->{remainder} = $2; |
188
|
55
|
|
|
|
|
237
|
return $1; |
189
|
|
|
|
|
|
|
} |
190
|
30
|
|
|
|
|
76
|
return undef; |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
1; |
194
|
|
|
|
|
|
|
__END__ |