File Coverage

blib/lib/Text/FindLinks.pm
Criterion Covered Total %
statement 31 31 100.0
branch 5 6 83.3
condition 2 2 100.0
subroutine 8 8 100.0
pod 3 3 100.0
total 49 50 98.0


line stmt bran cond sub pod time code
1             package Text::FindLinks;
2              
3             =encoding utf8
4              
5             =head1 NAME
6              
7             Text::FindLinks - Find and markup URLs in plain text
8              
9             =cut
10              
11 4     4   108116 use warnings;
  4         11  
  4         119  
12 4     4   21 use strict;
  4         6  
  4         119  
13 4     4   19 use Exporter;
  4         11  
  4         180  
14 4     4   4307 use Params::Validate qw/validate CODEREF/;
  4         42764  
  4         1900  
15              
16             our @ISA = 'Exporter';
17             our @EXPORT_OK = qw/find_links markup_links/;
18             our $VERSION = '0.04';
19              
20             =head1 SYNOPSIS
21              
22             use Text::FindLinks 'markup_links';
23             my $text = "Have you seen www.foo.com yet?";
24             # Have you seen http://www.foo.com yet?
25             print markup_links(text => $text);
26              
27             =head1 FUNCTIONS
28              
29             =head2 markup_links(text => ..., [handler => sub { ... }])
30              
31             Finds all URLs in the given text and replaces them using
32             the given handler. The handler gets passed three arguments:
33             the URL itself, all the text to the left from it and all the
34             text to the right. (The context is passed in case you would
35             like to keep some URLs untouched.) If no handler is given,
36             the default handler will be used that simply creates a link
37             to the URL and skips URLs already turned into links.
38              
39             =cut
40              
41             sub markup_links
42             {
43 12     12 1 4959 validate(@_,
44             {
45             text => 1,
46             handler =>
47             {
48             type => CODEREF,
49             optional => 1,
50             },
51             });
52              
53 12         78 my %args = @_;
54 12         18 my $text = $args{'text'};
55 12   100     47 my $decorator = $args{'handler'} || \&decorate_link;
56              
57 12         135 $text =~ s{(
58             (
59             (((https?)|(ftp))://) # either a schema...
60             | (www\.) # or the ‘www’ token
61             )
62             [^\s<]+ # anything except whitespace and ‘<’
63             (?
64             )}
65 13         41 {&$decorator($1, $`, $')}gex;
66              
67 12         118 return $text;
68             }
69              
70             =head2 find_links(text => ...)
71              
72             Returns an array with all the URLs found in given text.
73             Just a simple wrapper around C, see the
74             sources.
75              
76             =cut
77              
78             sub find_links
79             {
80 1     1 1 43 validate(@_, { text => 1 });
81 1         5 my %args = @_;
82 1         2 my @links;
83             markup_links(
84             text => $args{'text'},
85 1     2   9 handler => sub { push @links, shift });
  2         58  
86 1         7 return @links;
87             }
88              
89             =head2 decorate_link($url, $left, $right)
90              
91             Default URL handler that will be used if you don’t pass your
92             own to the C sub using the C attribute.
93             It turns an URL into a HTML link and skips URLs that are
94             apparently already links. Not exported.
95              
96             =cut
97              
98             sub decorate_link
99             {
100 3     3 1 13 my ($url, $left, $right) = @_;
101              
102             # Skip already marked links.
103 3 100       20 return $url if ($left =~ /href=["']$/);
104 2 100       25 return $url if ($right =~ qr|^|);
105              
106 1         4 my $label = $url;
107 1 50       8 $url = "http://$url" if ($url =~ /^www/i);
108 1         7 return qq|$label|;
109             }
110              
111             =head1 BUGS
112              
113             The algorithm is extremely naive, a simple regex. It is almost
114             certain that some URLs will not be recognized and some things
115             that are not URLs will (to keep the balance). I’d be glad to
116             hear if there is some URL that misbehaves.
117              
118             L
119              
120             =head1 AUTHOR
121              
122             Tomáš Znamenáček, zoul@fleuron.cz
123              
124             L
125              
126             =head1 COPYRIGHT & LICENSE
127              
128             Copyright 2009 Tomáš Znamenáček
129              
130             This program is free software; you can redistribute it and/or modify it
131             under the terms of either: the GNU General Public License as published
132             by the Free Software Foundation; or the Artistic License.
133              
134             See L for more information.
135              
136             =cut
137              
138             1;