line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package URI::Find::Iterator; |
2
|
|
|
|
|
|
|
|
3
|
5
|
|
|
5
|
|
4063
|
use strict; |
|
5
|
|
|
|
|
11
|
|
|
5
|
|
|
|
|
199
|
|
4
|
|
|
|
|
|
|
require URI; |
5
|
5
|
|
|
5
|
|
4323
|
use URI::Find; |
|
5
|
|
|
|
|
41754
|
|
|
5
|
|
|
|
|
321
|
|
6
|
5
|
|
|
5
|
|
4195
|
use URI::URL; |
|
5
|
|
|
|
|
20729
|
|
|
5
|
|
|
|
|
271
|
|
7
|
5
|
|
|
5
|
|
4477
|
use UNIVERSAL::require; |
|
5
|
|
|
|
|
8248
|
|
|
5
|
|
|
|
|
57
|
|
8
|
|
|
|
|
|
|
|
9
|
5
|
|
|
5
|
|
188
|
use vars qw($VERSION); |
|
5
|
|
|
|
|
11
|
|
|
5
|
|
|
|
|
3857
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
$VERSION = "0.6"; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
# Identifying characters accidentally picked up with a URI. |
15
|
|
|
|
|
|
|
my($cruft_set) = q{),.'";}; #'# |
16
|
|
|
|
|
|
|
my($scheme_re) = $URI::scheme_re; |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=pod |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 NAME |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
URI::Find::Iterator - provides an iterator interface to B |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 SYNOPSIS |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
use URI::Find::Iterator; |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
my $string = "foo http://thegestalt.org/simon/ bar\n"; |
29
|
|
|
|
|
|
|
my $it = URI::Find::Iterator->new($string); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
while (my ($uri, $orig_match) = $it->match()) { |
32
|
|
|
|
|
|
|
print "Matched $uri\n"; |
33
|
|
|
|
|
|
|
$it->replace("$uri"); |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# prints |
37
|
|
|
|
|
|
|
# foo http://thegestalt.org/simon bar |
38
|
|
|
|
|
|
|
print $it->result(); |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head1 DESCRIPTION |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
Inspired by Mark Jason Dominus' talk I |
45
|
|
|
|
|
|
|
(available from http://perl.plover.com/yak/iterators/) this is an iterative |
46
|
|
|
|
|
|
|
version of B that hopefully makes code a little easier to understand |
47
|
|
|
|
|
|
|
and works slightly better with people's brains than callbacks do. |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head1 METHODS |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head2 new [%opts] |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
Takes a string checking as an argument. Optionally can also take a |
55
|
|
|
|
|
|
|
class name to extract regexes from (the class must have uri_re and |
56
|
|
|
|
|
|
|
schemeless_uri_re methods). |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
URI::Find::Iterator->new($string, class => "URI::Find::Schemeless"); |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
would be the canonical example. |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
Alterantively it could take a straight regexp of your own devising |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
URI::Find::Iterator->new($string, re => "http://[^ ]+"); |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=cut |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
sub new { |
70
|
5
|
|
|
5
|
1
|
3553
|
my ($class, $string, %opts) = @_; |
71
|
|
|
|
|
|
|
|
72
|
5
|
|
|
|
|
13
|
my $re; |
73
|
|
|
|
|
|
|
|
74
|
5
|
100
|
|
|
|
23
|
if (defined $opts{'re'}) { |
75
|
1
|
|
|
|
|
2
|
$re = $opts{'re'}; |
76
|
|
|
|
|
|
|
} else { |
77
|
|
|
|
|
|
|
|
78
|
4
|
|
100
|
|
|
31
|
my $re_class = $opts{'class'} || "URI::Find"; |
79
|
|
|
|
|
|
|
|
80
|
4
|
50
|
|
|
|
41
|
$re_class->require() || die "No such class $re_class\n"; |
81
|
4
|
50
|
|
|
|
35352
|
$re_class->can('uri_re') || die "$re_class has no method uri_re\n"; |
82
|
4
|
50
|
|
|
|
28
|
$re_class->can('schemeless_uri_re') || die "$re_class has no method schemeless_uri_re\n"; |
83
|
|
|
|
|
|
|
|
84
|
4
|
|
|
|
|
19
|
$re = sprintf '(?:%s|%s)', $re_class->uri_re, $re_class->schemeless_uri_re; |
85
|
4
|
|
|
|
|
4266
|
$re = "(<$re>|$re)"; |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
|
89
|
5
|
|
|
|
|
11
|
my $self = {}; |
90
|
5
|
|
|
|
|
14
|
$self->{_re} = $re; |
91
|
5
|
|
|
|
|
12
|
$self->{_result} = ""; |
92
|
5
|
|
|
|
|
11
|
$self->{_remain} = $string; |
93
|
5
|
|
|
|
|
10
|
$self->{_match} = undef; |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
|
96
|
5
|
|
|
|
|
41
|
return bless $self, $class; |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=head2 match |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
Returns the current match as a tuple - the first element of which is |
103
|
|
|
|
|
|
|
a B object and the second is the original text of the URI found. |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
Just like B. |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
It then advances to the next one. |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=cut |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
sub match { |
113
|
14
|
|
|
14
|
1
|
5746
|
my $self = shift; |
114
|
14
|
50
|
|
|
|
50
|
return undef unless defined $self->{_remain}; |
115
|
14
|
|
|
|
|
38
|
$self->_next(); |
116
|
|
|
|
|
|
|
|
117
|
14
|
|
|
|
|
26
|
my $re = $self->{_re}; |
118
|
|
|
|
|
|
|
|
119
|
3
|
|
|
3
|
|
3586
|
$self->{_remain} =~ /(<$re>|$re)/; |
|
3
|
|
|
|
|
31
|
|
|
3
|
|
|
|
|
38
|
|
|
14
|
|
|
|
|
1205
|
|
120
|
|
|
|
|
|
|
|
121
|
14
|
100
|
|
|
|
85793
|
return undef unless defined $1; |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
# stolen from URI::Find |
124
|
9
|
|
|
|
|
25
|
my $orig = $1; |
125
|
9
|
|
100
|
|
|
55
|
my $pre = $` || ""; |
126
|
9
|
|
100
|
|
|
44
|
my $post = $' || ""; |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# A heruristic. Often you'll see things like: |
130
|
|
|
|
|
|
|
# "I saw this site, http://www.foo.com, and its really neat!" |
131
|
|
|
|
|
|
|
# or "Foo Industries (at http://www.foo.com)" |
132
|
|
|
|
|
|
|
# We want to avoid picking up the trailing paren, period or comma. |
133
|
|
|
|
|
|
|
# Of course, this might wreck a perfectly valid URI, more often than |
134
|
|
|
|
|
|
|
# not it corrects a parse mistake. |
135
|
9
|
|
|
|
|
40
|
my $clean_match = $self->_decruft($orig); |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
# Translate schemeless to schemed if necessary. |
138
|
9
|
100
|
|
|
|
213
|
my $uri = $self->_schemeless_to_schemed($clean_match) unless |
139
|
|
|
|
|
|
|
$clean_match =~ /^${scheme_re}:/; |
140
|
|
|
|
|
|
|
|
141
|
9
|
|
|
|
|
70
|
eval { |
142
|
9
|
|
|
|
|
79
|
$uri = URI::URL->new($uri); |
143
|
|
|
|
|
|
|
}; |
144
|
|
|
|
|
|
|
|
145
|
9
|
50
|
33
|
|
|
29546
|
if (!$@ && defined $uri) { |
146
|
9
|
|
|
|
|
28
|
$self->{_result} .= $pre; |
147
|
9
|
|
|
|
|
22
|
$self->{_remain} = $post; |
148
|
9
|
|
|
|
|
20
|
$self->{_match} = $orig; |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
|
152
|
9
|
|
|
|
|
44
|
return ($uri, $clean_match); |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sub _schemeless_to_schemed { |
156
|
1
|
|
|
1
|
|
2
|
my($self, $uri_cand) = @_; |
157
|
|
|
|
|
|
|
|
158
|
1
|
50
|
|
|
|
13
|
$uri_cand =~ s|^()ftp\.|$1ftp://ftp\.| |
159
|
|
|
|
|
|
|
or $uri_cand =~ s|^()|${1}http://|; |
160
|
|
|
|
|
|
|
|
161
|
1
|
|
|
|
|
5
|
return $uri_cand; |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
sub _decruft { |
167
|
9
|
|
|
9
|
|
20
|
my($self, $orig_match) = @_; |
168
|
|
|
|
|
|
|
|
169
|
9
|
|
|
|
|
38
|
$self->{start_cruft} = ''; |
170
|
9
|
|
|
|
|
21
|
$self->{end_cruft} = ''; |
171
|
|
|
|
|
|
|
|
172
|
9
|
50
|
|
|
|
122
|
if( $orig_match =~ s/([${cruft_set}]+)$// ) { |
173
|
0
|
|
|
|
|
0
|
$self->{end_cruft} = $1; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
9
|
|
|
|
|
28
|
return $orig_match; |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
=head2 replace |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
Replaces the current match with I |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=cut |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
sub replace { |
191
|
3
|
|
|
3
|
1
|
1378
|
my ($self, $replace) = @_; |
192
|
3
|
50
|
|
|
|
14
|
return unless defined $self->{_match}; |
193
|
3
|
|
|
|
|
15
|
$self->{_match} = $replace; |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=head2 result |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
Returns the string with all replacements. |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
=cut |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
sub result { |
204
|
5
|
|
|
5
|
1
|
48
|
my $self = shift; |
205
|
5
|
|
50
|
|
|
23
|
my $start = $self->{_result} || ""; |
206
|
5
|
|
50
|
|
|
33
|
my $match = $self->{_match} || ""; |
207
|
5
|
|
100
|
|
|
46
|
my $end = $self->{_remain} || ""; |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
|
210
|
5
|
|
|
|
|
28
|
return "${start}${match}${end}"; |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
sub _next { |
215
|
14
|
|
|
14
|
|
19
|
my $self = shift; |
216
|
14
|
100
|
|
|
|
47
|
return undef unless defined $self->{_match}; |
217
|
|
|
|
|
|
|
|
218
|
9
|
|
|
|
|
25
|
$self->{_result} .= $self->{_match}; |
219
|
9
|
|
|
|
|
37
|
$self->{_match} = undef; |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
=pod |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=head1 BUGS |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
None that I know of but there are probably loads. |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
It could possibly be split out into a generic |
230
|
|
|
|
|
|
|
B module. |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
=head1 COPYING |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
Distributed under the same terms as Perl itself. |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
=head1 AUTHOR |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
Copyright (c) 2003, Simon Wistow |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=head1 SEE ALSO |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
L, http://perl.plover.com/yak/iterators/ |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
=cut |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# keep perl happy |
247
|
|
|
|
|
|
|
1; |