line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
2
|
|
|
2
|
|
2174
|
use strict; |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
85
|
|
2
|
2
|
|
|
2
|
|
12
|
use warnings; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
135
|
|
3
|
|
|
|
|
|
|
package Data::Beacon; |
4
|
|
|
|
|
|
|
#ABSTRACT: BEACON format validating parser and serializer |
5
|
|
|
|
|
|
|
our $VERSION = '0.3.1'; #VERSION |
6
|
|
|
|
|
|
|
|
7
|
2
|
|
|
2
|
|
79
|
use 5.008; |
|
2
|
|
|
|
|
7
|
|
|
2
|
|
|
|
|
81
|
|
8
|
2
|
|
|
2
|
|
2089
|
use Time::Piece; |
|
2
|
|
|
|
|
28224
|
|
|
2
|
|
|
|
|
13
|
|
9
|
2
|
|
|
2
|
|
170
|
use Scalar::Util qw(blessed); |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
175
|
|
10
|
2
|
|
|
2
|
|
1880
|
use URI::Escape; |
|
2
|
|
|
|
|
2933
|
|
|
2
|
|
|
|
|
120
|
|
11
|
2
|
|
|
2
|
|
12
|
use Carp; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
117
|
|
12
|
|
|
|
|
|
|
|
13
|
2
|
|
|
2
|
|
12
|
use base 'Exporter'; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
8814
|
|
14
|
|
|
|
|
|
|
our @EXPORT = qw(plainbeaconlink beacon); |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
sub new { |
18
|
7
|
|
|
7
|
1
|
12
|
my $class = shift; |
19
|
7
|
|
|
|
|
19
|
my $self = bless { }, $class; |
20
|
7
|
|
|
|
|
19
|
$self->_initparams( @_ ); |
21
|
6
|
|
|
|
|
11
|
$self->_startparsing; |
22
|
6
|
|
|
|
|
18
|
return $self; |
23
|
|
|
|
|
|
|
} |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub meta { # TODO: document meta fields |
27
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
28
|
0
|
0
|
|
|
|
0
|
return %{$self->{meta}} unless @_; |
|
0
|
|
|
|
|
0
|
|
29
|
|
|
|
|
|
|
|
30
|
0
|
0
|
|
|
|
0
|
if (@_ == 1) { |
31
|
0
|
|
|
|
|
0
|
my $key = uc(shift @_); |
32
|
0
|
|
|
|
|
0
|
$key =~ s/^\s+|\s+$//g; |
33
|
0
|
|
|
|
|
0
|
return $self->{meta}->{$key}; |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
|
36
|
0
|
0
|
|
|
|
0
|
croak('Wrong number of arguments in SeeAlso::Beacon->meta') if @_ % 2; |
37
|
|
|
|
|
|
|
|
38
|
0
|
|
|
|
|
0
|
my %list = (@_); |
39
|
0
|
|
|
|
|
0
|
foreach my $key (keys %list) { |
40
|
0
|
0
|
|
|
|
0
|
croak('invalid meta name: "'.$key.'"') |
41
|
|
|
|
|
|
|
unless $key =~ /^\s*([a-zA-Z_-]+)\s*$/; |
42
|
0
|
|
|
|
|
0
|
my $value = $list{$key}; |
43
|
0
|
|
|
|
|
0
|
$key = uc($1); |
44
|
0
|
0
|
|
|
|
0
|
if ( defined $value ) { |
45
|
0
|
|
|
|
|
0
|
$value =~ s/^\s+|\s+$|\n//g; |
46
|
|
|
|
|
|
|
} else { |
47
|
0
|
|
|
|
|
0
|
$value = ''; |
48
|
|
|
|
|
|
|
} |
49
|
0
|
0
|
|
|
|
0
|
if ($value eq '') { # empty field: unset |
50
|
0
|
0
|
|
|
|
0
|
croak 'You cannot unset meta field #FORMAT' if $key eq 'FORMAT'; |
51
|
0
|
|
|
|
|
0
|
delete $self->{meta}->{$key}; |
52
|
|
|
|
|
|
|
} else { # check format of known meta fields |
53
|
0
|
0
|
|
|
|
0
|
if ($key eq 'TARGET') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
54
|
0
|
|
|
|
|
0
|
$value =~ s/{id}/{ID}/g; |
55
|
|
|
|
|
|
|
# TODO: document that {ID} in target is optional (will be appended) |
56
|
0
|
0
|
|
|
|
0
|
$value .= '{ID}' unless $value =~ /{ID}}/; |
57
|
0
|
|
|
|
|
0
|
my $uri = $value; |
58
|
0
|
|
|
|
|
0
|
$uri =~ s/{ID}//g; |
59
|
0
|
0
|
|
|
|
0
|
croak 'Invalid #TARGET field: must be an URI pattern' |
60
|
|
|
|
|
|
|
unless _is_uri($uri); |
61
|
|
|
|
|
|
|
} elsif ($key eq 'FEED') { |
62
|
0
|
0
|
|
|
|
0
|
croak 'FEED meta value must be a HTTP/HTTPS URL' |
63
|
|
|
|
|
|
|
unless $value =~ |
64
|
|
|
|
|
|
|
/^http(s)?:\/\/[a-z0-9-]+(.[a-z0-9-]+)*(:[0-9]+)?(\/[^#|]*)?(\?[^#|]*)?$/i; |
65
|
|
|
|
|
|
|
} elsif ($key eq 'PREFIX') { |
66
|
0
|
0
|
|
|
|
0
|
croak 'PREFIX meta value must be a URI' |
67
|
|
|
|
|
|
|
unless _is_uri($value); |
68
|
|
|
|
|
|
|
} elsif ( $key =~ /^(REVISIT|TIMESTAMP)$/) { |
69
|
0
|
0
|
|
|
|
0
|
if ($value =~ /^[0-9]+$/) { # seconds since epoch |
70
|
0
|
|
|
|
|
0
|
$value = gmtime($value)->datetime() . 'Z'; |
71
|
|
|
|
|
|
|
# Note that this conversion does not trigger an error |
72
|
|
|
|
|
|
|
# or warning, but may be dropped in a future version |
73
|
|
|
|
|
|
|
} else { |
74
|
|
|
|
|
|
|
# ISO 8601 combined date and time in UTC |
75
|
0
|
|
|
|
|
0
|
$value =~ s/Z$//; |
76
|
0
|
0
|
|
|
|
0
|
croak $key . ' meta value must be of form YYYY-MM-DDTHH:MM:SSZ' |
77
|
|
|
|
|
|
|
unless $value = Time::Piece->strptime( |
78
|
|
|
|
|
|
|
$value, '%Y-%m-%dT%T' ); |
79
|
0
|
|
|
|
|
0
|
$value = $value->datetime(); |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
} elsif ( $key eq 'FORMAT' ) { |
82
|
0
|
0
|
|
|
|
0
|
croak 'Invalid FORMAT, must be BEACON or end with -BEACON' |
83
|
|
|
|
|
|
|
unless $value =~ /^([A-Z]+-)?BEACON$/; |
84
|
|
|
|
|
|
|
} elsif ( $key eq 'EXAMPLES' ) { |
85
|
0
|
|
|
|
|
0
|
my @examples = map { s/^\s+|\s+$//g; $_ } split '\|', $value; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
86
|
0
|
|
|
|
|
0
|
$self->{examples} = [ grep { $_ ne '' } @examples ]; |
|
0
|
|
|
|
|
0
|
|
87
|
0
|
|
|
|
|
0
|
%{$self->{expected_examples}} = |
|
0
|
|
|
|
|
0
|
|
88
|
0
|
|
|
|
|
0
|
map { $_ => 1 } @{$self->{examples}}; |
|
0
|
|
|
|
|
0
|
|
89
|
0
|
|
|
|
|
0
|
$value = join '|', @{$self->{examples}}; |
|
0
|
|
|
|
|
0
|
|
90
|
0
|
0
|
|
|
|
0
|
if ($value eq '') { # yet another edge case: "EXAMPLES: |" etc. |
91
|
0
|
|
|
|
|
0
|
delete $self->{meta}->{EXAMPLES}; |
92
|
0
|
|
|
|
|
0
|
$self->{expected_examples} = undef; |
93
|
0
|
|
|
|
|
0
|
next; |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
# Note that examples are not checked for validity, |
96
|
|
|
|
|
|
|
# because PREFIX may not be set yet. |
97
|
|
|
|
|
|
|
} |
98
|
0
|
|
|
|
|
0
|
$self->{meta}->{$key} = $value; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
sub count { |
105
|
0
|
|
|
0
|
1
|
0
|
my $count = $_[0]->meta('COUNT'); |
106
|
0
|
0
|
|
|
|
0
|
return defined $count ? $count : 0; |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub line { |
111
|
0
|
|
|
0
|
1
|
0
|
return $_[0]->{line}; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
sub lasterror { |
116
|
0
|
0
|
|
0
|
1
|
0
|
return wantarray ? @{$_[0]->{lasterror}} : $_[0]->{lasterror}->[0]; |
|
0
|
|
|
|
|
0
|
|
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
sub errors { |
121
|
5
|
|
|
5
|
1
|
379
|
return $_[0]->{errors}; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub metafields { |
126
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
127
|
0
|
|
|
|
|
0
|
my %meta = $self->meta(); |
128
|
0
|
|
|
|
|
0
|
my %fields = %meta; |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
# determine default order |
131
|
0
|
|
|
|
|
0
|
my @order = (qw(FORMAT PREFIX TARGET MESSAGE RELATION ANNOTATION), |
132
|
|
|
|
|
|
|
qw(DESCRIPTION CREATOR CONTACT HOMEPAGE FEED TIMESTAMP UPDATE), |
133
|
|
|
|
|
|
|
qw(SOURCESET TARGETSET NAME INSTITUTION)); |
134
|
|
|
|
|
|
|
|
135
|
0
|
|
|
|
|
0
|
my @lines = map { "#$_: " . $meta{$_} } grep { defined $meta{$_} } @order; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
136
|
0
|
0
|
|
|
|
0
|
return @lines ? join ("\n", @lines) . "\n" : ""; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
sub parse { |
141
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
142
|
|
|
|
|
|
|
|
143
|
0
|
|
|
|
|
0
|
$self->_initparams( @_ ); |
144
|
0
|
0
|
|
|
|
0
|
$self->_startparsing if defined $self->{from}; # start from new source |
145
|
|
|
|
|
|
|
|
146
|
0
|
|
|
|
|
0
|
my $line = $self->{lookaheadline}; |
147
|
0
|
0
|
|
|
|
0
|
$line = $self->_readline() unless defined $line; |
148
|
|
|
|
|
|
|
|
149
|
0
|
|
|
|
|
0
|
while (defined $line) { |
150
|
0
|
|
|
|
|
0
|
$self->appendline( $line ); |
151
|
0
|
|
|
|
|
0
|
$line = $self->_readline(); |
152
|
|
|
|
|
|
|
} |
153
|
|
|
|
|
|
|
|
154
|
0
|
|
|
|
|
0
|
return $self->errors == 0; |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
sub nextlink { |
159
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
160
|
|
|
|
|
|
|
|
161
|
0
|
|
|
|
|
0
|
my $line = $self->{lookaheadline}; |
162
|
0
|
0
|
|
|
|
0
|
if (defined $line) { |
163
|
0
|
|
|
|
|
0
|
$self->{lookaheadline} = undef; |
164
|
|
|
|
|
|
|
} else { |
165
|
0
|
|
|
|
|
0
|
$line = $self->_readline(); |
166
|
0
|
0
|
|
|
|
0
|
return unless defined $line; # undef => EOF |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
0
|
|
|
|
|
0
|
do { |
170
|
0
|
|
|
|
|
0
|
my @link = $self->appendline( $line ); |
171
|
0
|
0
|
|
|
|
0
|
return @link if @link; # proceed on empty lines or errors |
172
|
|
|
|
|
|
|
} while($line = $self->_readline()); |
173
|
|
|
|
|
|
|
|
174
|
0
|
|
|
|
|
0
|
return; # EOF |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
sub link { |
179
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
180
|
0
|
0
|
|
|
|
0
|
return @{$self->{link}} if $self->{link}; |
|
0
|
|
|
|
|
0
|
|
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
sub expanded { |
185
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
186
|
0
|
0
|
|
|
|
0
|
if ( $self->{link} ) { |
187
|
0
|
0
|
|
|
|
0
|
unless ( $self->{expanded} ) { |
188
|
0
|
|
|
|
|
0
|
@{$self->{expanded}} = @{$self->{link}}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
189
|
0
|
|
|
|
|
0
|
$self->_expandlink( $self->{expanded} ) |
190
|
|
|
|
|
|
|
} |
191
|
0
|
|
|
|
|
0
|
return @{$self->{expanded}}; |
|
0
|
|
|
|
|
0
|
|
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
sub expand { |
197
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
198
|
|
|
|
|
|
|
|
199
|
0
|
0
|
|
|
|
0
|
my @fields = @_ > 0 ? @_ : ''; |
200
|
0
|
0
|
|
|
|
0
|
@fields = map { s/^\s+|\s+$//g; $_ } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
201
|
0
|
|
|
|
|
0
|
map { defined $_ ? $_ : '' } @fields; |
202
|
|
|
|
|
|
|
|
203
|
0
|
0
|
0
|
|
|
0
|
return if $fields[0] eq '' or (grep { $_ =~ /\||\n|\r/ } @fields); |
|
0
|
|
|
|
|
0
|
|
204
|
|
|
|
|
|
|
|
205
|
0
|
|
|
|
|
0
|
$self->_expandlink( \@fields ); |
206
|
|
|
|
|
|
|
|
207
|
0
|
0
|
0
|
|
|
0
|
return unless _is_uri($fields[0]) && _is_uri($fields[3]); |
208
|
|
|
|
|
|
|
|
209
|
0
|
|
|
|
|
0
|
return @fields; |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
sub expandsource { |
214
|
0
|
|
|
0
|
1
|
0
|
my ($self, $source) = @_; |
215
|
0
|
0
|
|
|
|
0
|
return '' unless defined $source; |
216
|
0
|
|
|
|
|
0
|
$source =~ s/^\s+|\s+$//g; |
217
|
0
|
0
|
|
|
|
0
|
return '' if $source eq ''; |
218
|
|
|
|
|
|
|
|
219
|
0
|
0
|
|
|
|
0
|
$source = $self->{meta}->{PREFIX} . $source |
220
|
|
|
|
|
|
|
if defined $self->{meta}->{PREFIX}; |
221
|
|
|
|
|
|
|
|
222
|
0
|
0
|
|
|
|
0
|
return _is_uri($source) ? $source : ''; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
sub appendline { |
227
|
0
|
|
|
0
|
1
|
0
|
my ($self, $line) = @_; |
228
|
0
|
0
|
|
|
|
0
|
return unless defined $line; |
229
|
0
|
|
|
|
|
0
|
chomp $line; |
230
|
|
|
|
|
|
|
|
231
|
0
|
|
|
|
|
0
|
$self->{line}++; |
232
|
0
|
|
|
|
|
0
|
$self->{currentline} = $line; |
233
|
0
|
|
|
|
|
0
|
my @parts = split ('\|',$line); |
234
|
|
|
|
|
|
|
|
235
|
0
|
0
|
0
|
|
|
0
|
return if (@parts < 1 || $parts[0] =~ /^\s*$/ ); |
236
|
0
|
|
|
|
|
0
|
my $link = $self->_fields( @parts ); |
237
|
|
|
|
|
|
|
|
238
|
0
|
|
|
|
|
0
|
my $has_link = $self->appendlink( @$link ); |
239
|
|
|
|
|
|
|
|
240
|
0
|
|
|
|
|
0
|
$self->{currentline} = undef; |
241
|
|
|
|
|
|
|
|
242
|
0
|
0
|
|
|
|
0
|
if ( $has_link ) { |
243
|
0
|
0
|
|
|
|
0
|
return wantarray ? @{ $self->{link} } : 1; |
|
0
|
|
|
|
|
0
|
|
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
0
|
|
|
|
|
0
|
return; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
sub appendlink { |
251
|
5
|
|
|
5
|
1
|
33
|
my $self = shift; |
252
|
|
|
|
|
|
|
|
253
|
5
|
100
|
|
|
|
17
|
my @fields = map { defined $_ ? $_ : '' } @_[0..3]; |
|
20
|
|
|
|
|
83
|
|
254
|
5
|
|
|
|
|
10
|
@fields = map { s/^\s+|\s+$//g; $_ } @fields; |
|
20
|
|
|
|
|
32
|
|
|
20
|
|
|
|
|
42
|
|
255
|
|
|
|
|
|
|
|
256
|
5
|
50
|
|
|
|
14
|
if ( $fields[0] eq '' ) { |
|
20
|
50
|
|
|
|
51
|
|
|
|
0
|
|
|
|
|
|
257
|
0
|
|
|
|
|
0
|
$self->_handle_error( 'missing source' ); |
258
|
0
|
|
|
|
|
0
|
return; |
259
|
0
|
|
|
|
|
0
|
} elsif ( grep { $_ =~ /\|/ } @fields ) { |
260
|
5
|
|
|
|
|
12
|
$self->_handle_error( 'link fields must not contain \'|\'' ); |
261
|
4
|
|
|
|
|
40
|
return; |
262
|
|
|
|
|
|
|
} elsif ( grep { $_ =~ /[\n\r]/ } @fields ) { |
263
|
0
|
|
|
|
|
0
|
$self->_handle_error( 'link fields must not contain line breaks' ); |
264
|
0
|
|
|
|
|
0
|
return; |
265
|
|
|
|
|
|
|
} |
266
|
|
|
|
|
|
|
|
267
|
0
|
|
|
|
|
0
|
my $msg = $self->_checklink( @fields ); |
268
|
0
|
0
|
|
|
|
0
|
if ( $msg ) { |
269
|
0
|
|
|
|
|
0
|
$self->_handle_error( $msg ); |
270
|
0
|
|
|
|
|
0
|
return; |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
# finally got a valid link |
274
|
0
|
|
|
|
|
0
|
$self->{link} = \@fields; |
275
|
0
|
|
|
|
|
0
|
$self->{expanded} = undef; |
276
|
0
|
|
|
|
|
0
|
$self->{meta}->{COUNT}++; |
277
|
|
|
|
|
|
|
|
278
|
0
|
0
|
|
|
|
0
|
if ( defined $self->{expected_examples} ) { # examples may contain prefix |
279
|
0
|
|
|
|
|
0
|
my @idforms = $fields[0]; |
280
|
0
|
|
|
|
|
0
|
my $prefix = $self->{meta}->{PREFIX}; |
281
|
0
|
0
|
|
|
|
0
|
push @idforms, $prefix . $fields[0] if defined $prefix; |
282
|
0
|
|
|
|
|
0
|
foreach my $source (@idforms) { |
283
|
0
|
0
|
|
|
|
0
|
if ( $self->{expected_examples}->{$source} ) { |
284
|
0
|
|
|
|
|
0
|
delete $self->{expected_examples}->{$source}; |
285
|
0
|
|
|
|
|
0
|
$self->{expected_examples} = undef |
286
|
0
|
0
|
|
|
|
0
|
unless keys %{ $self->{expected_examples} }; |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
|
291
|
0
|
0
|
|
|
|
0
|
if ( $self->{link_handler} ) { |
292
|
0
|
0
|
|
|
|
0
|
if ( $self->{link_handler} eq 'print' ) { |
|
|
0
|
|
|
|
|
|
293
|
0
|
|
|
|
|
0
|
print plainbeaconlink( @fields ) . "\n"; |
294
|
|
|
|
|
|
|
} elsif ( $self->{link_handler} eq 'expand' ) { |
295
|
0
|
|
|
|
|
0
|
print join('|',$self->expanded) . "\n"; |
296
|
|
|
|
|
|
|
} else { |
297
|
|
|
|
|
|
|
# TODO: call with expanded link on request |
298
|
0
|
|
|
|
|
0
|
eval { $self->{link_handler}->( @fields ); }; |
|
0
|
|
|
|
|
0
|
|
299
|
0
|
0
|
|
|
|
0
|
if ( $@ ) { |
300
|
0
|
|
|
|
|
0
|
$self->_handle_error( "link handler died: $@" ); |
301
|
0
|
|
|
|
|
0
|
return; |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
|
306
|
0
|
|
|
|
|
0
|
return @fields; # TODO: return expanded on request |
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
sub beacon { |
311
|
7
|
|
|
7
|
1
|
1771
|
return Data::Beacon->new( @_ ); |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
sub plainbeaconlink { |
316
|
0
|
0
|
0
|
0
|
1
|
0
|
shift if ref($_[0]) and UNIVERSAL::isa($_[0],'Data::Beacon'); |
317
|
0
|
0
|
|
|
|
0
|
return '' unless @_; |
318
|
0
|
0
|
|
|
|
0
|
my @link = map { defined $_ ? $_ : '' } @_[0..3]; |
|
0
|
|
|
|
|
0
|
|
319
|
0
|
|
|
|
|
0
|
@link = map { s/^\s+|\s+$//g; $_; } @link; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
320
|
0
|
0
|
|
|
|
0
|
return '' if $link[0] eq ''; |
321
|
|
|
|
|
|
|
|
322
|
0
|
0
|
|
|
|
0
|
if ( $link[3] eq '' ){ |
|
|
0
|
|
|
|
|
|
323
|
0
|
|
|
|
|
0
|
pop @link; |
324
|
0
|
0
|
|
|
|
0
|
if ($link[2] eq '') { |
325
|
0
|
|
|
|
|
0
|
pop @link; |
326
|
0
|
0
|
|
|
|
0
|
pop @link if ($link[1] eq ''); |
327
|
|
|
|
|
|
|
} |
328
|
|
|
|
|
|
|
} elsif ( _is_uri($link[3]) ) { # only position of _is_uri where argument may be undefined |
329
|
0
|
|
|
|
|
0
|
my $uri = pop @link; |
330
|
0
|
0
|
|
|
|
0
|
if ($link[2] eq '') { |
331
|
0
|
|
|
|
|
0
|
pop @link; |
332
|
0
|
0
|
|
|
|
0
|
pop @link if ($link[1] eq ''); |
333
|
|
|
|
|
|
|
} |
334
|
0
|
|
|
|
|
0
|
push @link, $uri; |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
|
337
|
0
|
|
|
|
|
0
|
return join('|', @link); |
338
|
|
|
|
|
|
|
} |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
sub _initparams { |
342
|
7
|
|
|
7
|
|
10
|
my $self = shift; |
343
|
7
|
|
|
|
|
10
|
my %param; |
344
|
|
|
|
|
|
|
|
345
|
7
|
50
|
33
|
|
|
36
|
if ( @_ % 2 && !blessed($_[0]) && ref($_[0]) && ref($_[0]) eq 'HASH' ) { |
|
|
|
33
|
|
|
|
|
|
|
|
0
|
|
|
|
|
346
|
0
|
|
|
|
|
0
|
my $pre = shift; |
347
|
0
|
|
|
|
|
0
|
%param = @_; |
348
|
0
|
|
|
|
|
0
|
$param{pre} = $pre; |
349
|
|
|
|
|
|
|
} else { |
350
|
7
|
50
|
|
|
|
25
|
$self->{from} = (@_ % 2) ? shift(@_) : undef; |
351
|
7
|
|
|
|
|
18
|
%param = @_; |
352
|
|
|
|
|
|
|
} |
353
|
|
|
|
|
|
|
|
354
|
7
|
50
|
|
|
|
23
|
$self->{from} = $param{from} |
355
|
|
|
|
|
|
|
if exists $param{from}; |
356
|
|
|
|
|
|
|
|
357
|
7
|
100
|
|
|
|
16
|
if ( $param{errors} ) { |
358
|
3
|
|
|
|
|
7
|
my $handler = $param{errors}; |
359
|
3
|
50
|
|
|
|
13
|
$handler = $Data::Beacon::ERROR_HANDLERS{lc($handler)} |
360
|
|
|
|
|
|
|
unless ref($handler); |
361
|
3
|
100
|
66
|
|
|
17
|
unless ( ref($handler) and ref($handler) eq 'CODE' ) { |
362
|
1
|
50
|
|
|
|
8
|
my $msg = 'error handler must be code or ' |
363
|
|
|
|
|
|
|
. join('/',keys %Data::Beacon::ERROR_HANDLERS) |
364
|
|
|
|
|
|
|
. ', got ' |
365
|
|
|
|
|
|
|
. (defined $handler ? $handler : 'undef'); |
366
|
1
|
|
|
|
|
237
|
croak $msg; |
367
|
|
|
|
|
|
|
} |
368
|
2
|
|
|
|
|
5
|
$self->{error_handler} = $handler; |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
|
371
|
6
|
50
|
|
|
|
15
|
if ( $param{links} ) { |
372
|
0
|
|
|
|
|
0
|
my $handler = $param{links}; |
373
|
0
|
0
|
0
|
|
|
0
|
croak 'link handler must be code or \'print\' or \'expand\'' |
|
|
|
0
|
|
|
|
|
374
|
|
|
|
|
|
|
unless $handler =~ /^(print|expand)$/ |
375
|
|
|
|
|
|
|
or (ref($handler) and ref($handler) eq 'CODE'); |
376
|
0
|
|
|
|
|
0
|
$self->{link_handler} = $handler; |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
|
379
|
6
|
50
|
|
|
|
21
|
if ( defined $param{pre} ) { |
|
|
50
|
|
|
|
|
|
380
|
0
|
0
|
0
|
|
|
0
|
croak "pre option must be a hash reference" |
381
|
|
|
|
|
|
|
unless ref($param{pre}) and ref($param{pre}) eq 'HASH'; |
382
|
0
|
|
|
|
|
0
|
$self->{pre} = $param{pre}; |
383
|
|
|
|
|
|
|
} elsif ( exists $param{pre} ) { |
384
|
0
|
|
|
|
|
0
|
$self->{pre} = undef; |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
|
387
|
6
|
|
|
|
|
19
|
$self->{mtime} = $param{mtime}; |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
sub _startparsing { |
392
|
6
|
|
|
6
|
|
9
|
my $self = shift; |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
# we do not init $self->{meta} because it is set in initparams; |
395
|
6
|
|
|
|
|
17
|
$self->{meta} = { 'FORMAT' => 'BEACON' }; |
396
|
6
|
50
|
|
|
|
17
|
$self->meta( %{ $self->{pre} } ) if $self->{pre}; |
|
0
|
|
|
|
|
0
|
|
397
|
6
|
|
|
|
|
10
|
$self->{line} = 0; |
398
|
6
|
|
|
|
|
10
|
$self->{link} = undef; |
399
|
6
|
|
|
|
|
8
|
$self->{expanded} = undef; |
400
|
6
|
|
|
|
|
13
|
$self->{errors} = 0; |
401
|
6
|
|
|
|
|
13
|
$self->{lasterror} = []; |
402
|
6
|
|
|
|
|
10
|
$self->{lookaheadline} = undef; |
403
|
6
|
|
|
|
|
10
|
$self->{fh} = undef; |
404
|
6
|
|
|
|
|
9
|
$self->{inputlines} = []; |
405
|
6
|
|
|
|
|
12
|
$self->{examples} = []; |
406
|
|
|
|
|
|
|
|
407
|
6
|
50
|
|
|
|
37
|
return unless defined $self->{from}; |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
# decide where to parse from |
410
|
0
|
|
|
|
|
0
|
my $type = ref($self->{from}); |
411
|
0
|
0
|
|
|
|
0
|
if ($type) { |
|
|
0
|
|
|
|
|
|
412
|
0
|
0
|
|
|
|
0
|
if ($type eq 'SCALAR') { |
|
|
0
|
|
|
|
|
|
413
|
0
|
|
|
|
|
0
|
$self->{inputlines} = [ split("\n",${$self->{from}}) ]; |
|
0
|
|
|
|
|
0
|
|
414
|
|
|
|
|
|
|
} elsif ($type ne 'CODE') { |
415
|
0
|
|
|
|
|
0
|
$self->_handle_error( "Unknown input $type" ); |
416
|
0
|
|
|
|
|
0
|
return; |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
} elsif( $self->{from} eq '-' ) { |
419
|
0
|
|
|
|
|
0
|
$self->{fh} = \*STDIN; |
420
|
|
|
|
|
|
|
} else { |
421
|
0
|
0
|
|
|
|
0
|
if(!(open $self->{fh}, $self->{from})) { |
422
|
0
|
|
|
|
|
0
|
$self->_handle_error( 'Failed to open ' . $self->{from} ); |
423
|
0
|
|
|
|
|
0
|
return; |
424
|
|
|
|
|
|
|
} |
425
|
|
|
|
|
|
|
} |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
# initlialize TIMESTAMP |
428
|
0
|
0
|
|
|
|
0
|
if ($self->{mtime}) { |
429
|
0
|
|
|
|
|
0
|
my @stat = stat( $self->{from} ); |
430
|
0
|
|
|
|
|
0
|
$self->meta('TIMESTAMP', gmtime( $stat[9] )->datetime() . 'Z' ); |
431
|
|
|
|
|
|
|
} |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
# start parsing |
434
|
0
|
|
|
|
|
0
|
my $line = $self->_readline(); |
435
|
0
|
0
|
|
|
|
0
|
return unless defined $line; |
436
|
0
|
|
|
|
|
0
|
$line =~ s/^\xEF\xBB\xBF//; # UTF-8 BOM (optional) |
437
|
|
|
|
|
|
|
|
438
|
0
|
|
|
|
|
0
|
do { |
439
|
0
|
|
|
|
|
0
|
$line =~ s/^\s+|\s*\n?$//g; |
440
|
0
|
0
|
|
|
|
0
|
if ($line eq '') { |
|
|
0
|
|
|
|
|
|
441
|
0
|
|
|
|
|
0
|
$self->{line}++; |
442
|
|
|
|
|
|
|
} elsif ($line =~ /^#([^:=\s]+)(\s*[:=]?\s*|\s+)(.*)$/) { |
443
|
0
|
|
|
|
|
0
|
$self->{line}++; |
444
|
0
|
|
|
|
|
0
|
eval { $self->meta($1,$3); }; |
|
0
|
|
|
|
|
0
|
|
445
|
0
|
0
|
|
|
|
0
|
if ($@) { |
446
|
0
|
|
|
|
|
0
|
my $msg = $@; $msg =~ s/ at .*$//; |
|
0
|
|
|
|
|
0
|
|
447
|
0
|
|
|
|
|
0
|
$self->_handle_error( $msg, $line ); |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
} else { |
450
|
0
|
|
|
|
|
0
|
$self->{lookaheadline} = $line; |
451
|
0
|
|
|
|
|
0
|
return; |
452
|
|
|
|
|
|
|
} |
453
|
0
|
|
|
|
|
0
|
$line = $self->_readline(); |
454
|
|
|
|
|
|
|
} while (defined $line); |
455
|
|
|
|
|
|
|
} |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
sub _handle_error { |
459
|
5
|
|
|
5
|
|
8
|
my $self = shift; |
460
|
5
|
|
|
|
|
8
|
my $msg = shift; |
461
|
5
|
|
50
|
|
|
36
|
my $line = shift || $self->{currentline} || ''; |
462
|
5
|
|
|
|
|
8
|
chomp $line; |
463
|
5
|
|
|
|
|
16
|
$self->{lasterror} = [ $msg, $self->{line}, $line ]; |
464
|
5
|
|
|
|
|
8
|
$self->{errors}++; |
465
|
5
|
100
|
|
|
|
23
|
$self->{error_handler}->( $msg, $self->{line}, $line ) if $self->{error_handler}; |
466
|
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
our %ERROR_HANDLERS = ( |
469
|
|
|
|
|
|
|
'print' => sub { |
470
|
|
|
|
|
|
|
my ($msg, $lineno) = @_; |
471
|
|
|
|
|
|
|
$msg .= " at line $lineno" if $lineno ; |
472
|
|
|
|
|
|
|
print STDERR "$msg\n"; |
473
|
|
|
|
|
|
|
}, |
474
|
|
|
|
|
|
|
'warn' => sub { |
475
|
|
|
|
|
|
|
my ($msg, $lineno) = @_; |
476
|
|
|
|
|
|
|
$msg .= " at line $lineno" if $lineno; |
477
|
|
|
|
|
|
|
carp $msg; |
478
|
|
|
|
|
|
|
}, |
479
|
|
|
|
|
|
|
'die' => sub { |
480
|
|
|
|
|
|
|
my ($msg, $lineno) = @_; |
481
|
|
|
|
|
|
|
$msg .= " at line $lineno" if $lineno; |
482
|
|
|
|
|
|
|
croak $msg; |
483
|
|
|
|
|
|
|
} |
484
|
|
|
|
|
|
|
); |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
sub _readline { |
488
|
0
|
|
|
0
|
|
|
my $self = shift; |
489
|
0
|
0
|
0
|
|
|
|
if ($self->{fh}) { |
|
|
0
|
|
|
|
|
|
490
|
2
|
|
|
2
|
|
22
|
return eval { no warnings; readline $self->{fh} }; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
2832
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
} elsif (ref($self->{from}) && ref($self->{from}) eq 'CODE') { |
492
|
0
|
|
|
|
|
|
my $line = eval { $self->{from}->(); }; |
|
0
|
|
|
|
|
|
|
493
|
0
|
0
|
|
|
|
|
if ($@) { # input handler died |
494
|
0
|
|
|
|
|
|
$self->_handle_error( $@, '' ); |
495
|
0
|
|
|
|
|
|
$self->{from} = undef; |
496
|
|
|
|
|
|
|
} |
497
|
0
|
|
|
|
|
|
return $line; |
498
|
|
|
|
|
|
|
} else { |
499
|
0
|
0
|
|
|
|
|
return @{$self->{inputlines}} ? shift(@{$self->{inputlines}}) : undef; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
} |
501
|
|
|
|
|
|
|
} |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
sub _fields { |
505
|
0
|
|
|
0
|
|
|
my $self = shift; |
506
|
0
|
|
|
|
|
|
my @parts = @_; |
507
|
|
|
|
|
|
|
|
508
|
0
|
|
|
|
|
|
my $n = scalar @parts; |
509
|
|
|
|
|
|
|
|
510
|
0
|
|
|
|
|
|
my $link = [shift @parts,"","",""]; |
511
|
|
|
|
|
|
|
|
512
|
0
|
|
|
|
|
|
my $target = $self->{meta}->{TARGET}; |
513
|
0
|
|
|
|
|
|
my $targetprefix = $self->{meta}->{TARGETPREFIX}; |
514
|
0
|
0
|
0
|
|
|
|
if ($target or $targetprefix) { |
515
|
0
|
0
|
|
|
|
|
$link->[1] = shift @parts if @parts; |
516
|
0
|
0
|
|
|
|
|
$link->[2] = shift @parts if @parts; |
517
|
|
|
|
|
|
|
# TODO: do we want both #TARGET links and explicit links in one file? |
518
|
0
|
0
|
|
|
|
|
$link->[3] = shift @parts if @parts; |
519
|
|
|
|
|
|
|
} else { |
520
|
0
|
0
|
0
|
|
|
|
$link->[3] = pop @parts |
521
|
|
|
|
|
|
|
if ($n > 1 && _is_uri($parts[$n-2])); |
522
|
0
|
0
|
|
|
|
|
$link->[1] = shift @parts if @parts; |
523
|
0
|
0
|
|
|
|
|
$link->[2] = shift @parts if @parts; |
524
|
|
|
|
|
|
|
} |
525
|
|
|
|
|
|
|
|
526
|
0
|
|
|
|
|
|
return $link |
527
|
|
|
|
|
|
|
} |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
sub _checklink { |
530
|
0
|
|
|
0
|
|
|
my ($self, @fields) = @_; |
531
|
|
|
|
|
|
|
|
532
|
0
|
|
|
|
|
|
my @exp = @fields; |
533
|
|
|
|
|
|
|
# TODO: check only - we don't need full expansion |
534
|
0
|
|
|
|
|
|
$self->_expandlink( \@exp ); |
535
|
|
|
|
|
|
|
|
536
|
0
|
0
|
|
|
|
|
return "source is no URI: ".$exp[0] |
537
|
|
|
|
|
|
|
unless _is_uri($exp[0]); |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
# TODO: we could encode bad characters etc. |
540
|
0
|
0
|
|
|
|
|
return "target is no URI: ".$exp[3] |
541
|
|
|
|
|
|
|
unless _is_uri($exp[3]); |
542
|
|
|
|
|
|
|
|
543
|
0
|
|
|
|
|
|
return undef; |
544
|
|
|
|
|
|
|
} |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
sub _expandlink { |
548
|
0
|
|
|
0
|
|
|
my ($self, $link) = @_; |
549
|
|
|
|
|
|
|
|
550
|
0
|
|
|
|
|
|
my $prefix = $self->{meta}->{PREFIX}; |
551
|
|
|
|
|
|
|
|
552
|
0
|
|
|
|
|
|
my $source = $link->[0]; |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
# TODO: document this expansion |
555
|
0
|
0
|
|
|
|
|
if ( $link->[1] =~ /^[0-9]*$/ ) { # if label is number (of hits) or empty |
556
|
0
|
|
|
|
|
|
my $label = $link->[1]; |
557
|
0
|
|
|
|
|
|
my $descr = $link->[2]; |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
# TODO: handle zero hits |
560
|
0
|
|
0
|
|
|
|
my $msg = $self->{meta}->{$label eq '1' ? 'ONEMESSAGE' : 'SOMEMESSAGE'} |
561
|
|
|
|
|
|
|
|| $self->{meta}->{'MESSAGE'}; |
562
|
|
|
|
|
|
|
|
563
|
0
|
0
|
|
|
|
|
if ( defined $msg ) { |
564
|
0
|
|
|
|
|
|
_str_replace( $msg, '{id}', $link->[0] ); # unexpanded |
565
|
0
|
|
|
|
|
|
_str_replace( $msg, '{hits}', $link->[1] ); |
566
|
0
|
|
|
|
|
|
_str_replace( $msg, '{label}', $link->[1] ); |
567
|
0
|
|
|
|
|
|
_str_replace( $msg, '{description}', $link->[2] ); |
568
|
0
|
|
|
|
|
|
_str_replace( $msg, '{target}', $link->[3] ); # unexpanded |
569
|
|
|
|
|
|
|
} else { |
570
|
0
|
|
0
|
|
|
|
$msg = $self->{meta}->{'NAME'} || $self->{meta}->{'INSTITUTION'}; |
571
|
|
|
|
|
|
|
} |
572
|
0
|
0
|
0
|
|
|
|
if ( defined $msg && $msg ne '' ) { |
573
|
|
|
|
|
|
|
# if ( $link->[1] == "") $descr = $label; |
574
|
0
|
|
|
|
|
|
$link->[1] = $msg; |
575
|
0
|
|
|
|
|
|
$link->[1] =~ s/^\s+|\s+$//g; |
576
|
0
|
|
|
|
|
|
$link->[1] =~ s/\s+/ /g; |
577
|
|
|
|
|
|
|
} |
578
|
|
|
|
|
|
|
} else { |
579
|
0
|
|
|
|
|
|
_str_replace( $link->[1], '{id}', $link->[0] ); # unexpanded |
580
|
0
|
|
|
|
|
|
_str_replace( $link->[1], '{description}', $link->[2] ); |
581
|
0
|
|
|
|
|
|
_str_replace( $link->[1], '{target}', $link->[3] ); # unexpanded |
582
|
|
|
|
|
|
|
# trim label, because it may have changed |
583
|
0
|
|
|
|
|
|
$link->[1] =~ s/^\s+|\s+$//g; |
584
|
0
|
|
|
|
|
|
$link->[1] =~ s/\s+/ /g; |
585
|
|
|
|
|
|
|
} |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
# expand source |
588
|
0
|
0
|
|
|
|
|
$link->[0] = $prefix . $link->[0] if defined $prefix; |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
# expand target |
591
|
0
|
|
|
|
|
|
my $target = $self->{meta}->{TARGET}; |
592
|
0
|
|
|
|
|
|
my $targetprefix = $self->{meta}->{TARGETPREFIX}; |
593
|
0
|
0
|
|
|
|
|
if (defined $target) { |
|
|
0
|
|
|
|
|
|
594
|
0
|
|
|
|
|
|
$link->[3] = $target; |
595
|
0
|
|
|
|
|
|
my $label = $link->[1]; |
596
|
0
|
|
|
|
|
|
$link->[3] =~ s/{ID}/$source/g; |
597
|
|
|
|
|
|
|
} elsif( defined $targetprefix ) { |
598
|
0
|
|
|
|
|
|
$link->[3] = $targetprefix . $link->[3]; |
599
|
|
|
|
|
|
|
} |
600
|
|
|
|
|
|
|
|
601
|
0
|
|
|
|
|
|
return @$link; |
602
|
|
|
|
|
|
|
} |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
sub _str_replace { |
605
|
0
|
|
|
0
|
|
|
$_[0] =~ s/\Q$_[1]\E/$_[2]/g; |
606
|
|
|
|
|
|
|
} |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
sub _is_uri { |
610
|
0
|
|
|
0
|
|
|
my $value = $_[0]; |
611
|
|
|
|
|
|
|
|
612
|
0
|
0
|
|
|
|
|
return unless defined($value); |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
# check for illegal characters |
615
|
0
|
0
|
|
|
|
|
return if $value =~ /[^a-z0-9\:\/\?\#\[\]\@\!\$\&\'\(\)\*\+\,\;\=\.\-\_\~\%]/i; |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
# check for hex escapes that aren't complete |
618
|
0
|
0
|
|
|
|
|
return if $value =~ /%[^0-9a-f]/i; |
619
|
0
|
0
|
|
|
|
|
return if $value =~ /%[0-9a-f](:?[^0-9a-f]|$)/i; |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
# split uri (from RFC 3986) |
622
|
0
|
|
|
|
|
|
my($scheme, $authority, $path, $query, $fragment) |
623
|
|
|
|
|
|
|
= $value =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
# scheme and path are required, though the path can be empty |
626
|
0
|
0
|
0
|
|
|
|
return unless (defined($scheme) && length($scheme) && defined($path)); |
|
|
|
0
|
|
|
|
|
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
# if authority is present, the path must be empty or begin with a / |
629
|
0
|
0
|
0
|
|
|
|
if(defined($authority) && length($authority)){ |
630
|
0
|
0
|
0
|
|
|
|
return unless(length($path) == 0 || $path =~ m!^/!); |
631
|
|
|
|
|
|
|
} else { |
632
|
|
|
|
|
|
|
# if authority is not present, the path must not start with // |
633
|
0
|
0
|
|
|
|
|
return if $path =~ m!^//!; |
634
|
|
|
|
|
|
|
} |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
# scheme must begin with a letter, then consist of letters, digits, +, ., or - |
637
|
0
|
0
|
|
|
|
|
return unless lc($scheme) =~ m!^[a-z][a-z0-9\+\-\.]*$!; |
638
|
|
|
|
|
|
|
|
639
|
0
|
|
|
|
|
|
return 1; |
640
|
|
|
|
|
|
|
} |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
1; |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
__END__ |