line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package URI::Find::Delimited; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
518
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
32
|
|
4
|
|
|
|
|
|
|
|
5
|
1
|
|
|
1
|
|
4
|
use vars qw( $VERSION ); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
46
|
|
6
|
|
|
|
|
|
|
$VERSION = '0.03'; |
7
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
15
|
use base qw(URI::Find); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
494
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
# For 5.005_03 compatibility (copied from URI::Find::Schemeless) |
11
|
1
|
|
|
1
|
|
5821
|
use URI::Find (); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
12
|
|
12
|
1
|
|
|
1
|
|
390
|
use URI::URL; |
|
1
|
|
|
|
|
2589
|
|
|
1
|
|
|
|
|
353
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
URI::Find::Delimited - Find URIs which may be wrapped in enclosing delimiters. |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 DESCRIPTION |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
Works like L, but is prepared for URIs in your text to be |
21
|
|
|
|
|
|
|
wrapped in a pair of delimiters and optionally have a title. This will |
22
|
|
|
|
|
|
|
be useful for processing text that already has some minimal markup in |
23
|
|
|
|
|
|
|
it, like bulletin board posts or wiki text. |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head1 SYNOPSIS |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
my $finder = URI::Find::Delimited->new; |
28
|
|
|
|
|
|
|
my $text = "This is a [http://the.earth.li/ titled link]."; |
29
|
|
|
|
|
|
|
$finder->find(\$text); |
30
|
|
|
|
|
|
|
print $text; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=head1 METHODS |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=over 4 |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=item B |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
my $finder = URI::Find::Delimited->new( |
39
|
|
|
|
|
|
|
callback => \&callback, |
40
|
|
|
|
|
|
|
delimiter_re => [ '\[', '\]' ], |
41
|
|
|
|
|
|
|
ignore_quoted => 1 # defaults to 0 |
42
|
|
|
|
|
|
|
); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
All arguments are optional; defaults are provided (see below). |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
Creates a new URI::Find::Delimited object. This object works similarly |
47
|
|
|
|
|
|
|
to a L object, but as well as just looking for URIs it is also |
48
|
|
|
|
|
|
|
aware of the concept of a wrapped, titled URI. These look something like |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
[http://foo.com/ the foo website] |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
where: |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=over 4 |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
=item * C<[> is the opening delimiter |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=item * C<]> is the closing delimiter |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=item * C is the URI |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=item * C is the title |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
=item * the URI and title are separated by spaces and/or tabs |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=back |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
The URI::Find::Delimited object will extract each of these parts |
69
|
|
|
|
|
|
|
separately and pass them to your callback. |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=over 4 |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=item B |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
C is a function which is called on each URI found. It is |
76
|
|
|
|
|
|
|
passed five arguments: the opening delimiter (if found), the closing |
77
|
|
|
|
|
|
|
delimiter (if found), the URI, the title (if found), and any |
78
|
|
|
|
|
|
|
whitespace found between the URI and title. |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
The return value of the callback will replace the original URI in the |
81
|
|
|
|
|
|
|
text. |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
If you do not supply your own callback, the object will create a |
84
|
|
|
|
|
|
|
default one which will put your URIs in 'a href' tags using the URI |
85
|
|
|
|
|
|
|
for the target and the title for the link text. If no title is |
86
|
|
|
|
|
|
|
provided for a URI then the URI itself will be used as the title. If |
87
|
|
|
|
|
|
|
the delimiters aren't balanced (eg if the opening one is present but |
88
|
|
|
|
|
|
|
no closing one is found) then the URI is treated as not being wrapped. |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
Note: the default callback will not remove the delimiters from the |
91
|
|
|
|
|
|
|
text. It should be simple enough to write your own callback to remove |
92
|
|
|
|
|
|
|
them, based on the one in the source, if that's what you want. In fact |
93
|
|
|
|
|
|
|
there's an example in this distribution, in C. |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=item B |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
The C parameter is optional. If you do supply it then it |
98
|
|
|
|
|
|
|
should be a ref to an array containing two regexes. It defaults to |
99
|
|
|
|
|
|
|
using single square brackets as the delimiters. |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
Don't use capturing groupings C<( )> in your delimiters or things |
102
|
|
|
|
|
|
|
will break. Use non-capturing C<(?: )> instead. |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=item B |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
If the C parameter is supplied and set to a true value, |
107
|
|
|
|
|
|
|
then any URIs immediately preceded with a double-quote character will |
108
|
|
|
|
|
|
|
not be matched, ie your callback will not be executed for them and |
109
|
|
|
|
|
|
|
they'll be treated just as normal text. |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
This is a bit of a hack but it's in here because I need to be able to |
112
|
|
|
|
|
|
|
ignore things like |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
A better implementation may happen at some point. |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=back |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=cut |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub new { |
123
|
7
|
|
|
7
|
1
|
2073
|
my ($class, %args) = @_; |
124
|
|
|
|
|
|
|
|
125
|
7
|
|
|
|
|
14
|
my ( $callback, $delimiter_re, $ignore_quoted ) = |
126
|
|
|
|
|
|
|
@args{ qw( callback delimiter_re ignore_quoted ) }; |
127
|
|
|
|
|
|
|
|
128
|
7
|
100
|
|
|
|
18
|
unless (defined $callback) { |
129
|
|
|
|
|
|
|
$callback = sub { |
130
|
14
|
|
|
14
|
|
22
|
my ($open, $close, $uri, $title, $whitespace) = @_; |
131
|
14
|
100
|
66
|
|
|
39
|
if ( $open && $close ) { |
132
|
4
|
|
66
|
|
|
8
|
$title ||= $uri; |
133
|
4
|
|
|
|
|
19
|
qq|$open$title$close|; |
134
|
|
|
|
|
|
|
} else { |
135
|
10
|
|
|
|
|
75
|
qq|$open$uri$whitespace$title$close|; |
136
|
|
|
|
|
|
|
} |
137
|
6
|
|
|
|
|
25
|
}; |
138
|
|
|
|
|
|
|
} |
139
|
7
|
|
100
|
|
|
26
|
$delimiter_re ||= [ '\[', '\]' ]; |
140
|
|
|
|
|
|
|
|
141
|
7
|
|
|
|
|
24
|
my $self = bless { callback => $callback, |
142
|
|
|
|
|
|
|
delimiter_re => $delimiter_re, |
143
|
|
|
|
|
|
|
ignore_quoted => $ignore_quoted |
144
|
|
|
|
|
|
|
}, $class; |
145
|
7
|
|
|
|
|
16
|
return $self; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub find { |
149
|
16
|
|
|
16
|
1
|
3006
|
my($self, $r_text) = @_; |
150
|
|
|
|
|
|
|
|
151
|
16
|
|
|
|
|
16
|
my $urlsfound = 0; |
152
|
|
|
|
|
|
|
|
153
|
16
|
|
|
|
|
35
|
URI::URL::strict(1); # Don't assume any old thing followed by : is a scheme |
154
|
|
|
|
|
|
|
|
155
|
16
|
|
|
|
|
72
|
my $uri_re = $self->uri_re; |
156
|
16
|
100
|
|
|
|
188
|
my $prefix_re = $self->{ignore_quoted} ? '(?
|
157
|
16
|
|
|
|
|
22
|
my $open_re = $self->{delimiter_re}[0]; |
158
|
16
|
|
|
|
|
19
|
my $close_re = $self->{delimiter_re}[1]; |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# Note we only allow spaces and tabs, not all whitespace, between a URI |
161
|
|
|
|
|
|
|
# and its title. Also we disallow newlines *in* the title. These are |
162
|
|
|
|
|
|
|
# both to avoid the bug where $uri1\n$uri2 leads to $uri2 being considered |
163
|
|
|
|
|
|
|
# as part of the title, and thus not wrapped. |
164
|
1
|
|
|
1
|
|
486
|
$$r_text =~ s{$prefix_re # maybe don't match things preceded by a " |
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
9
|
|
|
16
|
|
|
|
|
310
|
|
165
|
|
|
|
|
|
|
(?: |
166
|
|
|
|
|
|
|
($open_re) # opening delimiter |
167
|
|
|
|
|
|
|
($uri_re) # the URI itself |
168
|
|
|
|
|
|
|
([ \t]*) # optional whitespace between URI and title |
169
|
|
|
|
|
|
|
((?<=[ \t])[^\n$close_re]+)? #title if there was whitespace |
170
|
|
|
|
|
|
|
($close_re) # closing delimiter |
171
|
|
|
|
|
|
|
| |
172
|
|
|
|
|
|
|
($uri_re) # just the URI itself |
173
|
|
|
|
|
|
|
) |
174
|
|
|
|
|
|
|
}{ |
175
|
16
|
|
|
|
|
892
|
my ($open, $uri_match, $whitespace, $title, $close, $just_uri) = |
176
|
|
|
|
|
|
|
($1, $2, $3, $4, $5, $6); |
177
|
16
|
100
|
|
|
|
32
|
$uri_match = $just_uri if $just_uri; |
178
|
16
|
|
|
|
|
25
|
foreach ( $open, $whitespace, $title, $close ) { |
179
|
64
|
|
100
|
|
|
143
|
$_ ||= ""; |
180
|
|
|
|
|
|
|
} |
181
|
16
|
|
|
|
|
27
|
my $orig_text = qq|$open$uri_match$whitespace$title$close|; |
182
|
|
|
|
|
|
|
|
183
|
16
|
100
|
|
|
|
41
|
if( my $uri = $self->_is_uri( \$uri_match ) ) { # if not a false alarm |
184
|
15
|
|
|
|
|
6149
|
$urlsfound++; |
185
|
15
|
|
|
|
|
27
|
$self->{callback}->($open,$close,$uri_match,$title,$whitespace); |
186
|
|
|
|
|
|
|
} else { |
187
|
1
|
|
|
|
|
786
|
$orig_text; |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
}egx; |
190
|
|
|
|
|
|
|
|
191
|
16
|
|
|
|
|
14451
|
return $urlsfound; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=head1 SEE ALSO |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
L. |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=head1 AUTHOR |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
Kake Pugh (kake@earth.li). |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=head1 COPYRIGHT |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
Copyright (C) 2003 Kake Pugh. All Rights Reserved. |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
This module is free software; you can redistribute it and/or modify it |
207
|
|
|
|
|
|
|
under the same terms as Perl itself. |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=head1 CREDITS |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
Tim Bagot helped me stop faffing over the name, by pointing out that |
212
|
|
|
|
|
|
|
RFC 2396 Appendix E uses "delimited". Dave Hinton helped me fix the |
213
|
|
|
|
|
|
|
regex to make it work for delimited URIs with no title. Nick Cleaton |
214
|
|
|
|
|
|
|
helped me make C work. Some of the code was taken from |
215
|
|
|
|
|
|
|
L. |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=cut |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
1; |