File Coverage

blib/lib/Text/FindLinks.pm

Criterion	Covered	Total	%
statement	31	31	100.0
branch	5	6	83.3
condition	2	2	100.0
subroutine	8	8	100.0
pod	3	3	100.0
total	49	50	98.0

line	stmt	bran	cond	sub	pod	time	code
1							package Text::FindLinks;
2
3							=encoding utf8
4
5							=head1 NAME
6
7							Text::FindLinks - Find and markup URLs in plain text
8
9							=cut
10
11	4			4		108116	use warnings;
	4					11
	4					119
12	4			4		21	use strict;
	4					6
	4					119
13	4			4		19	use Exporter;
	4					11
	4					180
14	4			4		4307	use Params::Validate qw/validate CODEREF/;
	4					42764
	4					1900
15
16							our @ISA = 'Exporter';
17							our @EXPORT_OK = qw/find_links markup_links/;
18							our $VERSION = '0.04';
19
20							=head1 SYNOPSIS
21
22							use Text::FindLinks 'markup_links';
23							my $text = "Have you seen www.foo.com yet?";
24							# Have you seen http://www.foo.com yet?
25							print markup_links(text => $text);
26
27							=head1 FUNCTIONS
28
29							=head2 markup_links(text => ..., [handler => sub { ... }])
30
31							Finds all URLs in the given text and replaces them using
32							the given handler. The handler gets passed three arguments:
33							the URL itself, all the text to the left from it and all the
34							text to the right. (The context is passed in case you would
35							like to keep some URLs untouched.) If no handler is given,
36							the default handler will be used that simply creates a link
37							to the URL and skips URLs already turned into links.
38
39							=cut
40
41							sub markup_links
42							{
43	12			12	1	4959	validate(@_,
44							{
45							text => 1,
46							handler =>
47							{
48							type => CODEREF,
49							optional => 1,
50							},
51							});
52
53	12					78	my %args = @_;
54	12					18	my $text = $args{'text'};
55	12		100			47	my $decorator = $args{'handler'} \|\| \&decorate_link;
56
57	12					135	$text =~ s{(
58							(
59							(((https?)\|(ftp))://) # either a schema...
60							\| (www\.) # or the ‘www’ token
61							)
62							[^\s<]+ # anything except whitespace and ‘<’
63							(?
64							)}
65	13					41	{&$decorator($1, $`, $')}gex;
66
67	12					118	return $text;
68							}
69
70							=head2 find_links(text => ...)
71
72							Returns an array with all the URLs found in given text.
73							Just a simple wrapper around C, see the
74							sources.
75
76							=cut
77
78							sub find_links
79							{
80	1			1	1	43	validate(@_, { text => 1 });
81	1					5	my %args = @_;
82	1					2	my @links;
83							markup_links(
84							text => $args{'text'},
85	1			2		9	handler => sub { push @links, shift });
	2					58
86	1					7	return @links;
87							}
88
89							=head2 decorate_link($url, $left, $right)
90
91							Default URL handler that will be used if you don’t pass your
92							own to the C sub using the C attribute.
93							It turns an URL into a HTML link and skips URLs that are
94							apparently already links. Not exported.
95
96							=cut
97
98							sub decorate_link
99							{
100	3			3	1	13	my ($url, $left, $right) = @_;
101
102							# Skip already marked links.
103	3	100				20	return $url if ($left =~ /href=["']$/);
104	2	100				25	return $url if ($right =~ qr\|^\|);
105
106	1					4	my $label = $url;
107	1	50				8	$url = "http://$url" if ($url =~ /^www/i);
108	1					7	return qq\|$label\|;
109							}
110
111							=head1 BUGS
112
113							The algorithm is extremely naive, a simple regex. It is almost
114							certain that some URLs will not be recognized and some things
115							that are not URLs will (to keep the balance). I’d be glad to
116							hear if there is some URL that misbehaves.
117
118							L
119
120							=head1 AUTHOR
121
122							Tomáš Znamenáček, zoul@fleuron.cz
123
124							L
125
126							=head1 COPYRIGHT & LICENSE
127
128							Copyright 2009 Tomáš Znamenáček
129
130							This program is free software; you can redistribute it and/or modify it
131							under the terms of either: the GNU General Public License as published
132							by the Free Software Foundation; or the Artistic License.
133
134							See L for more information.
135
136							=cut
137
138							1;