File Coverage

blib/lib/URI/Find/Delimited.pm

Criterion	Covered	Total	%
statement	22	42	52.3
branch	1	10	10.0
condition	1	10	10.0
subroutine	6	7	85.7
pod	2	2	100.0
total	32	71	45.0

line	stmt	bran	cond	sub	pod	time	code
1							package URI::Find::Delimited;
2
3	1			1		1787	use strict;
	1					2
	1					43
4
5	1			1		5	use vars qw( $VERSION );
	1					1
	1					58
6							$VERSION = '0.02';
7
8	1			1		22	use base qw(URI::Find);
	1					2
	1					939
9
10							# For 5.005_03 compatibility (copied from URI::Find::Schemeless)
11	1			1		17814	use URI::Find ();
	1					2
	1					592
12
13							=head1 NAME
14
15							URI::Find::Delimited - Find URIs which may be wrapped in enclosing delimiters.
16
17							=head1 DESCRIPTION
18
19							Works like L, but is prepared for URIs in your text to be
20							wrapped in a pair of delimiters and optionally have a title. This will
21							be useful for processing text that already has some minimal markup in
22							it, like bulletin board posts or wiki text.
23
24							=head1 SYNOPSIS
25
26							my $finder = URI::Find::Delimited->new;
27							my $text = "This is a [http://the.earth.li/ titled link].";
28							$finder->find(\$text);
29							print $text;
30
31							=head1 METHODS
32
33							=over 4
34
35							=item B
36
37							my $finder = URI::Find::Delimited->new(
38							callback => \&callback,
39							delimiter_re => [ '\[', '\]' ],
40							ignore_quoted => 1 # defaults to 0
41							);
42
43							All arguments are optional; defaults are provided (see below).
44
45							Creates a new URI::Find::Delimited object. This object works similarly
46							to a L object, but as well as just looking for URIs it is also
47							aware of the concept of a wrapped, titled URI. These look something like
48
49							[http://foo.com/ the foo website]
50
51							where:
52
53							=over 4
54
55							=item * C<[> is the opening delimiter
56
57							=item * C<]> is the closing delimiter
58
59							=item * C is the URI
60
61							=item * C is the title
62
63							=item * the URI and title are separated by spaces and/or tabs
64
65							=back
66
67							The URI::Find::Delimited object will extract each of these parts
68							separately and pass them to your callback.
69
70							=over 4
71
72							=item B
73
74							C is a function which is called on each URI found. It is
75							passed five arguments: the opening delimiter (if found), the closing
76							delimiter (if found), the URI, the title (if found), and any
77							whitespace found between the URI and title.
78
79							The return value of the callback will replace the original URI in the
80							text.
81
82							If you do not supply your own callback, the object will create a
83							default one which will put your URIs in 'a href' tags using the URI
84							for the target and the title for the link text. If no title is
85							provided for a URI then the URI itself will be used as the title. If
86							the delimiters aren't balanced (eg if the opening one is present but
87							no closing one is found) then the URI is treated as not being wrapped.
88
89							Note: the default callback will not remove the delimiters from the
90							text. It should be simple enough to write your own callback to remove
91							them, based on the one in the source, if that's what you want. In fact
92							there's an example in this distribution, in C.
93
94							=item B
95
96							The C parameter is optional. If you do supply it then it
97							should be a ref to an array containing two regexes. It defaults to
98							using single square brackets as the delimiters.
99
100							Don't use capturing groupings C<( )> in your delimiters or things
101							will break. Use non-capturing C<(?: )> instead.
102
103							=item B
104
105							If the C parameter is supplied and set to a true value,
106							then any URIs immediately preceded with a double-quote character will
107							not be matched, ie your callback will not be executed for them and
108							they'll be treated just as normal text.
109
110							This is kinda lame but it's in here because I need to be able to
111							ignore things like
112
113
114
115							A better implementation may happen at some point.
116
117							=back
118
119							=cut
120
121							sub new {
122	1			1	1	492	my ($class, %args) = @_;
123
124	1					3	my ( $callback, $delimiter_re, $ignore_quoted ) =
125							@args{ qw( callback delimiter_re ignore_quoted ) };
126
127	1	50				5	unless (defined $callback) {
128							$callback = sub {
129	0			0		0	my ($open, $close, $uri, $title, $whitespace) = @_;
130	0	0	0			0	if ( $open && $close ) {
131	0		0			0	$title \|\|= $uri;
132	0					0	qq\|$open$title$close\|;
133							} else {
134	0					0	qq\|$open$uri$whitespace$title$close\|;
135							}
136	1					7	};
137							}
138	1		50			10	$delimiter_re \|\|= [ '\[', '\]' ];
139
140	1					6	my $self = bless { callback => $callback,
141							delimiter_re => $delimiter_re,
142							ignore_quoted => $ignore_quoted
143							}, $class;
144	1					4	return $self;
145							}
146
147							sub find {
148	1			1	1	11	my($self, $r_text) = @_;
149
150	1					2	my $urlsfound = 0;
151
152	1					166	URI::URL::strict(1); # Don't assume any old thing followed by : is a scheme
153
154	0						my $uri_re = $self->uri_re;
155	0	0					my $prefix_re = $self->{ignore_quoted} ? '(?
156	0						my $open_re = $self->{delimiter_re}[0];
157	0						my $close_re = $self->{delimiter_re}[1];
158
159							# Note we only allow spaces and tabs, not all whitespace, between a URI
160							# and its title. Also we disallow newlines in the title. These are
161							# both to avoid the bug where $uri1\n$uri2 leads to $uri2 being considered
162							# as part of the title, and thus not wrapped.
163	0						$$r_text =~ s{$prefix_re # maybe don't match things preceded by a "
164							(?:
165							($open_re) # opening delimiter
166							($uri_re) # the URI itself
167							([ \t]*) # optional whitespace between URI and title
168							((?<=[ \t])[^\n$close_re]+)? #title if there was whitespace
169							($close_re) # closing delimiter
170							\|
171							($uri_re) # just the URI itself
172							)
173							}{
174	0						my ($open, $uri_match, $whitespace, $title, $close, $just_uri) =
175							($1, $2, $3, $4, $5, $6);
176	0	0					$uri_match = $just_uri if $just_uri;
177	0						foreach ( $open, $whitespace, $title, $close ) {
178	0		0				$_ \|\|= "";
179							}
180	0						my $orig_text = qq\|$open$uri_match$whitespace$title$close\|;
181
182	0	0					if( my $uri = $self->_is_uri( \$uri_match ) ) { # if not a false alarm
183	0						$urlsfound++;
184	0						$self->{callback}->($open,$close,$uri_match,$title,$whitespace);
185							} else {
186	0						$orig_text;
187							}
188							}egx;
189
190	0						return $urlsfound;
191							}
192
193							=head1 SEE ALSO
194
195							L.
196
197							=head1 AUTHOR
198
199							Kake Pugh (kake@earth.li).
200
201							=head1 COPYRIGHT
202
203							Copyright (C) 2003 Kake Pugh. All Rights Reserved.
204
205							This module is free software; you can redistribute it and/or modify it
206							under the same terms as Perl itself.
207
208							=head1 CREDITS
209
210							Tim Bagot helped me stop faffing over the name, by pointing out that
211							RFC 2396 Appendix E uses "delimited". Dave Hinton helped me fix the
212							regex to make it work for delimited URIs with no title. Nick Cleaton
213							helped me make C work. Some of the code was taken from
214							L.
215
216							=cut
217
218							1;