line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Dancer::Plugin::SiteMap; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
26844
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
36
|
|
4
|
1
|
|
|
1
|
|
468
|
use Dancer qw(:syntax); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
use Dancer::Plugin; |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use Scalar::Util; |
8
|
|
|
|
|
|
|
use XML::Simple; |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our $VERSION = '0.14'; |
11
|
|
|
|
|
|
|
my $OMIT_ROUTES = []; |
12
|
|
|
|
|
|
|
my @sitemap_urls; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
# Add syntactic sugar for omitting routes. |
15
|
|
|
|
|
|
|
register 'sitemap_ignore' => sub { |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
# Dancer 2 keywords receive a reference to the DSL object as a first param, |
18
|
|
|
|
|
|
|
# So if we're running under D2, we need to make sure we don't pass that on |
19
|
|
|
|
|
|
|
# to the route gathering code. |
20
|
|
|
|
|
|
|
shift if Scalar::Util::blessed($_[0]) && $_[0]->isa('Dancer::Core::DSL'); |
21
|
|
|
|
|
|
|
push @$Dancer::Plugin::SiteMap::OMIT_ROUTES, @_; |
22
|
|
|
|
|
|
|
}; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# Add this plugin to Dancer, both Dancer 1 and Dancer 2 :-) |
25
|
|
|
|
|
|
|
register_plugin( for_versions => [ qw( 1 2 ) ] ); |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
my $conf = plugin_setting(); |
28
|
|
|
|
|
|
|
my %routes = ( |
29
|
|
|
|
|
|
|
html => { |
30
|
|
|
|
|
|
|
urlpath => '/sitemap', |
31
|
|
|
|
|
|
|
coderef => \&_html_sitemap, |
32
|
|
|
|
|
|
|
}, |
33
|
|
|
|
|
|
|
xml => { |
34
|
|
|
|
|
|
|
urlpath => '/sitemap.xml', |
35
|
|
|
|
|
|
|
coderef => \&_xml_sitemap, |
36
|
|
|
|
|
|
|
}, |
37
|
|
|
|
|
|
|
); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# If a route exists but it's not defined within the app settings, this means the |
40
|
|
|
|
|
|
|
# developer wishes the app omit that particular sitemap type. If the route |
41
|
|
|
|
|
|
|
# doesn't exist in the plugin settings at all, we go with the default urlpath |
42
|
|
|
|
|
|
|
# for that route. |
43
|
|
|
|
|
|
|
for my $route_type (keys %routes) { |
44
|
|
|
|
|
|
|
my $route = $routes{$route_type}; |
45
|
|
|
|
|
|
|
my $config_key = $route_type . "_route"; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
if (exists $conf->{$config_key}) { |
48
|
|
|
|
|
|
|
$route->{urlpath} = $conf->{$config_key} || undef; |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
get $route->{urlpath} => $route->{coderef} if $route->{urlpath}; |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# Add omissions defined in the robots.txt file, if that option is specified in |
55
|
|
|
|
|
|
|
# the config. |
56
|
|
|
|
|
|
|
if ( defined $conf->{'robots_disallow'} ) { |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# Read the Disallow lines from robots.txt and add to $OMIT_ROUTES |
59
|
|
|
|
|
|
|
my $robots_txt = $conf->{'robots_disallow'}; |
60
|
|
|
|
|
|
|
my @disallowed_list = (); |
61
|
|
|
|
|
|
|
open my $robots_fh, '<', $robots_txt or die "Error reading $robots_txt $!"; |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
while (my $line = <$robots_fh>) { |
64
|
|
|
|
|
|
|
if ($line =~ m/^\s*Disallow: \s*(\/[^\s#]*)/) { |
65
|
|
|
|
|
|
|
push @disallowed_list, $1; |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
close $robots_fh; |
70
|
|
|
|
|
|
|
sitemap_ignore(@disallowed_list); |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# The action handler for the automagic /sitemap route. Uses the list of |
74
|
|
|
|
|
|
|
# URLs from _retreive_get_urls and outputs a basic HTML template to the |
75
|
|
|
|
|
|
|
# browser using the standard layout if one is defined. |
76
|
|
|
|
|
|
|
sub _html_sitemap { |
77
|
|
|
|
|
|
|
my @urls = _retreive_get_urls(); |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
my $content = qq[Site Map\n |
80
|
|
|
|
|
|
|
for my $url (@urls) { |
81
|
|
|
|
|
|
|
$content .= qq[ $url\n]; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
$content .= qq[\n]; |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# If the config specifies a HTML Wrapper for the HTML SiteMap - then use |
86
|
|
|
|
|
|
|
# that (which handily also stuffs it in the layout). Failing that, we need |
87
|
|
|
|
|
|
|
# to just take the sitemap and whack it in the site layout |
88
|
|
|
|
|
|
|
return ($conf->{html_template}) |
89
|
|
|
|
|
|
|
? template $conf->{html_template}, { sitemap => $content } |
90
|
|
|
|
|
|
|
: engine('template')->apply_layout($content); |
91
|
|
|
|
|
|
|
}; |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# The action handler for the automagic /sitemap.xml route. Uses the list of |
95
|
|
|
|
|
|
|
# URLs from _retreive_get_urls and outputs an XML document to the browser. |
96
|
|
|
|
|
|
|
sub _xml_sitemap { |
97
|
|
|
|
|
|
|
my @urls = _retreive_get_urls(); |
98
|
|
|
|
|
|
|
my @sitemap_urls; |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# add the "loc" key to each url so XML::Simple creates tags. |
101
|
|
|
|
|
|
|
for my $url (@urls) { |
102
|
|
|
|
|
|
|
my $uri = uri_for($url); |
103
|
|
|
|
|
|
|
push @sitemap_urls, { loc => [ "$uri" ] }; # $uri has to be stringified |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
# create a hash for XML::Simple to turn into XML. |
107
|
|
|
|
|
|
|
my %urlset = ( |
108
|
|
|
|
|
|
|
xmlns => 'http://www.sitemaps.org/schemas/sitemap/0.9', |
109
|
|
|
|
|
|
|
url => \@sitemap_urls |
110
|
|
|
|
|
|
|
); |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
my $xs = new XML::Simple( KeepRoot => 1, |
113
|
|
|
|
|
|
|
ForceArray => 0, |
114
|
|
|
|
|
|
|
KeyAttr => {urlset => 'xmlns'}, |
115
|
|
|
|
|
|
|
XMLDecl => '' ); |
116
|
|
|
|
|
|
|
my $xml = $xs->XMLout( { urlset => \%urlset } ); |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
content_type "text/xml"; |
119
|
|
|
|
|
|
|
return $xml; |
120
|
|
|
|
|
|
|
}; |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
# Obtains the list of URLs from Dancers Route Registry. |
124
|
|
|
|
|
|
|
sub _retreive_get_urls { |
125
|
|
|
|
|
|
|
return @sitemap_urls if @sitemap_urls; |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
my $version = (exists &dancer_version) ? int( dancer_version() ) : 1; |
128
|
|
|
|
|
|
|
my @apps = ($version == 2) ? @{ runner->server->apps } |
129
|
|
|
|
|
|
|
: Dancer::App->applications; |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
my ($route, @urls); |
132
|
|
|
|
|
|
|
for my $app ( @apps ) { |
133
|
|
|
|
|
|
|
my $routes = ($version == 2) ? $app->routes |
134
|
|
|
|
|
|
|
: $app->{registry}->{routes}; |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
# push the static get routes into an array. |
137
|
|
|
|
|
|
|
get_route: |
138
|
|
|
|
|
|
|
for my $get_route ( @{ $routes->{get} } ) { |
139
|
|
|
|
|
|
|
my $pattern = ($version == 2) ? $get_route->spec_route |
140
|
|
|
|
|
|
|
: $get_route->{pattern}; |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
if (ref($pattern) !~ m/HASH/i) { |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
# If the pattern is a true comprehensive regexp or the route |
145
|
|
|
|
|
|
|
# has a :variable element to it, then omit it. Dancer 2 also |
146
|
|
|
|
|
|
|
# has /** entries - we'll dump them too. |
147
|
|
|
|
|
|
|
next get_route if ($pattern =~ m/[()[\]|]|:\w/); |
148
|
|
|
|
|
|
|
next get_route if ($pattern =~ m{/\*\*}); |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
# If there is a wildcard modifier, then drop it and have the |
151
|
|
|
|
|
|
|
# full route. |
152
|
|
|
|
|
|
|
$pattern =~ s/\?//g; |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# Only add any given route once. |
155
|
|
|
|
|
|
|
next get_route if grep { $_ eq $pattern } @urls; |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
# Other than that, its cool to be added. |
158
|
|
|
|
|
|
|
push (@urls, $pattern) |
159
|
|
|
|
|
|
|
if ! grep { $pattern =~ m/^$_/i } |
160
|
|
|
|
|
|
|
@$Dancer::Plugin::SiteMap::OMIT_ROUTES; |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
return @sitemap_urls = sort(@urls); |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
1; # End of Dancer::Plugin::SiteMap |
170
|
|
|
|
|
|
|
__END__ |