line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::FindLinks; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=encoding utf8 |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
Text::FindLinks - Find and markup URLs in plain text |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=cut |
10
|
|
|
|
|
|
|
|
11
|
4
|
|
|
4
|
|
108116
|
use warnings; |
|
4
|
|
|
|
|
11
|
|
|
4
|
|
|
|
|
119
|
|
12
|
4
|
|
|
4
|
|
21
|
use strict; |
|
4
|
|
|
|
|
6
|
|
|
4
|
|
|
|
|
119
|
|
13
|
4
|
|
|
4
|
|
19
|
use Exporter; |
|
4
|
|
|
|
|
11
|
|
|
4
|
|
|
|
|
180
|
|
14
|
4
|
|
|
4
|
|
4307
|
use Params::Validate qw/validate CODEREF/; |
|
4
|
|
|
|
|
42764
|
|
|
4
|
|
|
|
|
1900
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our @ISA = 'Exporter'; |
17
|
|
|
|
|
|
|
our @EXPORT_OK = qw/find_links markup_links/; |
18
|
|
|
|
|
|
|
our $VERSION = '0.04'; |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 SYNOPSIS |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
use Text::FindLinks 'markup_links'; |
23
|
|
|
|
|
|
|
my $text = "Have you seen www.foo.com yet?"; |
24
|
|
|
|
|
|
|
# Have you seen http://www.foo.com yet? |
25
|
|
|
|
|
|
|
print markup_links(text => $text); |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
=head1 FUNCTIONS |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head2 markup_links(text => ..., [handler => sub { ... }]) |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
Finds all URLs in the given text and replaces them using |
32
|
|
|
|
|
|
|
the given handler. The handler gets passed three arguments: |
33
|
|
|
|
|
|
|
the URL itself, all the text to the left from it and all the |
34
|
|
|
|
|
|
|
text to the right. (The context is passed in case you would |
35
|
|
|
|
|
|
|
like to keep some URLs untouched.) If no handler is given, |
36
|
|
|
|
|
|
|
the default handler will be used that simply creates a link |
37
|
|
|
|
|
|
|
to the URL and skips URLs already turned into links. |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=cut |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
sub markup_links |
42
|
|
|
|
|
|
|
{ |
43
|
12
|
|
|
12
|
1
|
4959
|
validate(@_, |
44
|
|
|
|
|
|
|
{ |
45
|
|
|
|
|
|
|
text => 1, |
46
|
|
|
|
|
|
|
handler => |
47
|
|
|
|
|
|
|
{ |
48
|
|
|
|
|
|
|
type => CODEREF, |
49
|
|
|
|
|
|
|
optional => 1, |
50
|
|
|
|
|
|
|
}, |
51
|
|
|
|
|
|
|
}); |
52
|
|
|
|
|
|
|
|
53
|
12
|
|
|
|
|
78
|
my %args = @_; |
54
|
12
|
|
|
|
|
18
|
my $text = $args{'text'}; |
55
|
12
|
|
100
|
|
|
47
|
my $decorator = $args{'handler'} || \&decorate_link; |
56
|
|
|
|
|
|
|
|
57
|
12
|
|
|
|
|
135
|
$text =~ s{( |
58
|
|
|
|
|
|
|
( |
59
|
|
|
|
|
|
|
(((https?)|(ftp))://) # either a schema... |
60
|
|
|
|
|
|
|
| (www\.) # or the ‘www’ token |
61
|
|
|
|
|
|
|
) |
62
|
|
|
|
|
|
|
[^\s<]+ # anything except whitespace and ‘<’ |
63
|
|
|
|
|
|
|
(?
|
64
|
|
|
|
|
|
|
)} |
65
|
13
|
|
|
|
|
41
|
{&$decorator($1, $`, $')}gex; |
66
|
|
|
|
|
|
|
|
67
|
12
|
|
|
|
|
118
|
return $text; |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=head2 find_links(text => ...) |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
Returns an array with all the URLs found in given text. |
73
|
|
|
|
|
|
|
Just a simple wrapper around C, see the |
74
|
|
|
|
|
|
|
sources. |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=cut |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
sub find_links |
79
|
|
|
|
|
|
|
{ |
80
|
1
|
|
|
1
|
1
|
43
|
validate(@_, { text => 1 }); |
81
|
1
|
|
|
|
|
5
|
my %args = @_; |
82
|
1
|
|
|
|
|
2
|
my @links; |
83
|
|
|
|
|
|
|
markup_links( |
84
|
|
|
|
|
|
|
text => $args{'text'}, |
85
|
1
|
|
|
2
|
|
9
|
handler => sub { push @links, shift }); |
|
2
|
|
|
|
|
58
|
|
86
|
1
|
|
|
|
|
7
|
return @links; |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=head2 decorate_link($url, $left, $right) |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
Default URL handler that will be used if you don’t pass your |
92
|
|
|
|
|
|
|
own to the C sub using the C attribute. |
93
|
|
|
|
|
|
|
It turns an URL into a HTML link and skips URLs that are |
94
|
|
|
|
|
|
|
apparently already links. Not exported. |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=cut |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
sub decorate_link |
99
|
|
|
|
|
|
|
{ |
100
|
3
|
|
|
3
|
1
|
13
|
my ($url, $left, $right) = @_; |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
# Skip already marked links. |
103
|
3
|
100
|
|
|
|
20
|
return $url if ($left =~ /href=["']$/); |
104
|
2
|
100
|
|
|
|
25
|
return $url if ($right =~ qr|^|); |
105
|
|
|
|
|
|
|
|
106
|
1
|
|
|
|
|
4
|
my $label = $url; |
107
|
1
|
50
|
|
|
|
8
|
$url = "http://$url" if ($url =~ /^www/i); |
108
|
1
|
|
|
|
|
7
|
return qq|$label|; |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=head1 BUGS |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
The algorithm is extremely naive, a simple regex. It is almost |
114
|
|
|
|
|
|
|
certain that some URLs will not be recognized and some things |
115
|
|
|
|
|
|
|
that are not URLs will (to keep the balance). I’d be glad to |
116
|
|
|
|
|
|
|
hear if there is some URL that misbehaves. |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
L |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=head1 AUTHOR |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
Tomáš Znamenáček, zoul@fleuron.cz |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
L |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=head1 COPYRIGHT & LICENSE |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
Copyright 2009 Tomáš Znamenáček |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
131
|
|
|
|
|
|
|
under the terms of either: the GNU General Public License as published |
132
|
|
|
|
|
|
|
by the Free Software Foundation; or the Artistic License. |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
See L for more information. |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=cut |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
1; |