| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
=head1 NAME |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
WWW::Link::Reporter::HTML - Report on status of links in HTML |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use WWW::Link; |
|
8
|
|
|
|
|
|
|
use WWW::Link::Reporter::HTML; |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
$link=new WWW::Link; |
|
11
|
|
|
|
|
|
|
#over time do things to the link ...... |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
$::reporter=new WWW::Link::Reporter::HTML; |
|
14
|
|
|
|
|
|
|
$::reporter->examine($link) |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
or see WWW::Link::Selector for a way to recurse through all of the links. |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
This class will output information about any link that it is given. |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
If it's constructor is given an index (CDB_File::BiIndex or BiIndex) then it |
|
23
|
|
|
|
|
|
|
can use that to generate lists of urls containing links being reported |
|
24
|
|
|
|
|
|
|
on. |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=cut |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
package WWW::Link::Reporter::HTML; |
|
29
|
|
|
|
|
|
|
$REVISION=q$Revision: 1.7 $ ; $VERSION = sprintf ( "%d.%02d", $REVISION =~ /(\d+).(\d+)/ ); |
|
30
|
|
|
|
|
|
|
|
|
31
|
1
|
|
|
1
|
|
647
|
use WWW::Link; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
27
|
|
|
32
|
1
|
|
|
1
|
|
1082
|
use HTML::Stream; |
|
|
1
|
|
|
|
|
3622
|
|
|
|
1
|
|
|
|
|
57
|
|
|
33
|
1
|
|
|
1
|
|
26
|
use WWW::Link::Reporter; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
32
|
|
|
34
|
|
|
|
|
|
|
@ISA=qw(WWW::Link::Reporter); |
|
35
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
34
|
|
|
36
|
1
|
|
|
1
|
|
5
|
use strict; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
1149
|
|
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 new |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
This |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=cut |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
sub new ($$;$){ |
|
45
|
0
|
|
|
0
|
1
|
|
my ($class,$stream, $index)=@_; |
|
46
|
0
|
|
|
|
|
|
my $self=WWW::Link::Reporter::new($class, $index); |
|
47
|
0
|
|
|
|
|
|
$self->{"hstr"} = new HTML::Stream::LinkReport $stream; |
|
48
|
0
|
|
|
|
|
|
$self->{"docurl"} = "http://localhost/linkcontroler/docs/"; |
|
49
|
0
|
|
|
|
|
|
return $self; |
|
50
|
|
|
|
|
|
|
} |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
sub heading ($) { |
|
53
|
0
|
|
|
0
|
0
|
|
my $self=shift; |
|
54
|
0
|
|
|
|
|
|
$self->{"hstr"}->Heading; |
|
55
|
|
|
|
|
|
|
} |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
sub footer ($) { |
|
58
|
0
|
|
|
0
|
0
|
|
my $self=shift; |
|
59
|
0
|
|
|
|
|
|
$self->{"hstr"}->Footer; |
|
60
|
|
|
|
|
|
|
} |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub not_found { |
|
63
|
0
|
|
|
0
|
1
|
|
my $self=shift; |
|
64
|
0
|
|
|
|
|
|
my $url=shift; |
|
65
|
0
|
|
|
|
|
|
$self->{"hstr"}-> P |
|
66
|
|
|
|
|
|
|
-> t("Sorry, the link $url is not in the database.\n") |
|
67
|
|
|
|
|
|
|
-> _P; |
|
68
|
|
|
|
|
|
|
} |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
sub broken { |
|
71
|
0
|
|
|
0
|
1
|
|
my ($self, $link, $redir)=@_; |
|
72
|
|
|
|
|
|
|
|
|
73
|
0
|
|
|
|
|
|
my $url=$link->url(); |
|
74
|
0
|
|
|
|
|
|
my $hstr=$self->{"hstr"}; |
|
75
|
|
|
|
|
|
|
|
|
76
|
0
|
|
|
|
|
|
$hstr-> P; |
|
77
|
|
|
|
|
|
|
|
|
78
|
0
|
|
|
|
|
|
$hstr -> STRONG -> nl |
|
79
|
|
|
|
|
|
|
->Link_Heading($url, "Link found BROKEN") |
|
80
|
|
|
|
|
|
|
-> _STRONG -> nl; |
|
81
|
|
|
|
|
|
|
|
|
82
|
0
|
0
|
|
|
|
|
$redir && $self->redirections($link); |
|
83
|
0
|
|
|
|
|
|
$self->suggestions($link); |
|
84
|
|
|
|
|
|
|
|
|
85
|
0
|
0
|
|
|
|
|
if ($self->{"index"}) { |
|
86
|
0
|
|
|
|
|
|
$hstr->page_list( $self->{"index"}->lookup_second($url) ); |
|
87
|
|
|
|
|
|
|
} |
|
88
|
|
|
|
|
|
|
|
|
89
|
0
|
|
|
|
|
|
$hstr-> _P; |
|
90
|
|
|
|
|
|
|
} |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
sub okay { |
|
93
|
0
|
|
|
0
|
1
|
|
my ($self, $link, $redir)=@_; |
|
94
|
|
|
|
|
|
|
|
|
95
|
0
|
|
|
|
|
|
my $url=$link->url(); |
|
96
|
0
|
|
|
|
|
|
my $hstr=$self->{"hstr"}; |
|
97
|
|
|
|
|
|
|
|
|
98
|
0
|
|
|
|
|
|
$hstr -> P; |
|
99
|
|
|
|
|
|
|
|
|
100
|
0
|
|
|
|
|
|
$hstr->Link_Heading($url, "Link tested okay"); |
|
101
|
|
|
|
|
|
|
|
|
102
|
0
|
0
|
|
|
|
|
$redir && $self->redirections($link); |
|
103
|
0
|
|
|
|
|
|
$self->suggestions($link); |
|
104
|
|
|
|
|
|
|
|
|
105
|
0
|
0
|
|
|
|
|
if ($self->{"index"}) { |
|
106
|
0
|
|
|
|
|
|
$hstr->page_list( $self->{"index"}->lookup_second($url) ); |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
|
|
109
|
0
|
|
|
|
|
|
$hstr -> _P; |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
sub damaged { |
|
113
|
0
|
|
|
0
|
1
|
|
my ($self, $link, $redir)=@_; |
|
114
|
|
|
|
|
|
|
|
|
115
|
0
|
|
|
|
|
|
my $url=$link->url(); |
|
116
|
0
|
|
|
|
|
|
my $hstr=$self->{"hstr"}; |
|
117
|
|
|
|
|
|
|
|
|
118
|
0
|
|
|
|
|
|
$hstr -> P; |
|
119
|
|
|
|
|
|
|
|
|
120
|
0
|
|
|
|
|
|
$hstr->Link_Heading($url, "Link may be broken"); |
|
121
|
|
|
|
|
|
|
|
|
122
|
0
|
0
|
|
|
|
|
$redir && $self->redirections($link); |
|
123
|
0
|
|
|
|
|
|
$self->suggestions($link); |
|
124
|
|
|
|
|
|
|
|
|
125
|
0
|
0
|
|
|
|
|
if ($self->{"index"}) { |
|
126
|
0
|
|
|
|
|
|
$hstr->page_list( $self->{"index"}->lookup_second($url) ); |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
|
|
129
|
0
|
|
|
|
|
|
$hstr -> _P; |
|
130
|
|
|
|
|
|
|
} |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
sub not_checked { |
|
133
|
0
|
|
|
0
|
1
|
|
my ($self, $link )=@_; |
|
134
|
|
|
|
|
|
|
|
|
135
|
0
|
|
|
|
|
|
my $url=$link->url(); |
|
136
|
0
|
|
|
|
|
|
my $hstr=$self->{"hstr"}; |
|
137
|
|
|
|
|
|
|
|
|
138
|
0
|
|
|
|
|
|
$hstr -> P; |
|
139
|
|
|
|
|
|
|
|
|
140
|
0
|
|
|
|
|
|
$hstr->Link_Heading($url, "Link not yet checked"); |
|
141
|
|
|
|
|
|
|
|
|
142
|
0
|
|
|
|
|
|
$self->suggestions($link); |
|
143
|
|
|
|
|
|
|
|
|
144
|
0
|
0
|
|
|
|
|
if ($self->{"index"}) { |
|
145
|
0
|
|
|
|
|
|
$hstr->page_list( $self->{"index"}->lookup_second($url) ); |
|
146
|
|
|
|
|
|
|
} |
|
147
|
|
|
|
|
|
|
|
|
148
|
0
|
|
|
|
|
|
$hstr -> _P; |
|
149
|
|
|
|
|
|
|
} |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
sub disallowed { |
|
152
|
0
|
|
|
0
|
1
|
|
my ($self, $link)=@_; |
|
153
|
|
|
|
|
|
|
|
|
154
|
0
|
|
|
|
|
|
my $url=$link->url(); |
|
155
|
0
|
|
|
|
|
|
my $hstr=$self->{"hstr"}; |
|
156
|
|
|
|
|
|
|
|
|
157
|
0
|
|
|
|
|
|
$hstr -> P; |
|
158
|
|
|
|
|
|
|
|
|
159
|
0
|
|
|
|
|
|
$hstr->Link_Heading($url, "Link checking not allowed"); |
|
160
|
|
|
|
|
|
|
|
|
161
|
0
|
|
|
|
|
|
$self->suggestions($link); |
|
162
|
|
|
|
|
|
|
|
|
163
|
0
|
0
|
|
|
|
|
if ($self->{"index"}) { |
|
164
|
0
|
|
|
|
|
|
$hstr->page_list( $self->{"index"}->lookup_second($url) ); |
|
165
|
|
|
|
|
|
|
} |
|
166
|
|
|
|
|
|
|
|
|
167
|
0
|
|
|
|
|
|
$hstr -> _P; |
|
168
|
|
|
|
|
|
|
} |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
sub unsupported { |
|
171
|
0
|
|
|
0
|
1
|
|
my ($self, $link)=@_; |
|
172
|
|
|
|
|
|
|
|
|
173
|
0
|
|
|
|
|
|
my $url=$link->url(); |
|
174
|
0
|
|
|
|
|
|
my $hstr=$self->{"hstr"}; |
|
175
|
|
|
|
|
|
|
|
|
176
|
0
|
|
|
|
|
|
$hstr -> P; |
|
177
|
|
|
|
|
|
|
|
|
178
|
0
|
|
|
|
|
|
$hstr->Link_Heading($url, "Link uses unsupported protocol"); |
|
179
|
|
|
|
|
|
|
|
|
180
|
0
|
|
|
|
|
|
$self->suggestions($link); |
|
181
|
|
|
|
|
|
|
|
|
182
|
0
|
0
|
|
|
|
|
if ($self->{"index"}) { |
|
183
|
0
|
|
|
|
|
|
$hstr->page_list( $self->{"index"}->lookup_second($url) ); |
|
184
|
|
|
|
|
|
|
} |
|
185
|
|
|
|
|
|
|
|
|
186
|
0
|
|
|
|
|
|
$hstr -> _P; |
|
187
|
|
|
|
|
|
|
} |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
sub unknown { |
|
190
|
0
|
|
|
0
|
1
|
|
my ($self, $link)=@_; |
|
191
|
|
|
|
|
|
|
|
|
192
|
0
|
|
|
|
|
|
my $url=$link->url(); |
|
193
|
0
|
|
|
|
|
|
my $hstr=$self->{"hstr"}; |
|
194
|
|
|
|
|
|
|
|
|
195
|
0
|
|
|
|
|
|
$hstr -> P; |
|
196
|
|
|
|
|
|
|
|
|
197
|
0
|
|
|
|
|
|
$hstr->Link_Heading($url, "Link status unknown; error?"); |
|
198
|
|
|
|
|
|
|
|
|
199
|
0
|
|
|
|
|
|
$self->suggestions($link); |
|
200
|
|
|
|
|
|
|
|
|
201
|
0
|
0
|
|
|
|
|
if ($self->{"index"}) { |
|
202
|
0
|
|
|
|
|
|
$hstr->page_list( $self->{"index"}->lookup_second($url) ); |
|
203
|
|
|
|
|
|
|
} |
|
204
|
|
|
|
|
|
|
|
|
205
|
0
|
|
|
|
|
|
$hstr -> _P; |
|
206
|
|
|
|
|
|
|
} |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
#we should separately deal with temporary redirections (generally |
|
209
|
|
|
|
|
|
|
#ignored) and long term redirections (should generally be applied) |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
sub redirections { |
|
212
|
0
|
|
|
0
|
0
|
|
my ($self, $link)=@_; |
|
213
|
|
|
|
|
|
|
|
|
214
|
0
|
|
|
|
|
|
my $hstr=$self->{"hstr"}; |
|
215
|
|
|
|
|
|
|
|
|
216
|
0
|
|
|
|
|
|
my @redirects=$link->redirect_urls(); |
|
217
|
0
|
0
|
|
|
|
|
if (@redirects) { |
|
218
|
0
|
|
|
|
|
|
$hstr -> DL; |
|
219
|
0
|
|
|
|
|
|
foreach my $redir ( @redirects ) { |
|
220
|
0
|
|
|
|
|
|
$hstr -> DT -> t("redirected to") -> _DT |
|
221
|
|
|
|
|
|
|
-> DD -> Link($redir) -> _DD -> nl; |
|
222
|
|
|
|
|
|
|
} |
|
223
|
0
|
|
|
|
|
|
$hstr -> _DL; |
|
224
|
|
|
|
|
|
|
} |
|
225
|
|
|
|
|
|
|
} |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
sub suggestions { |
|
228
|
0
|
|
|
0
|
0
|
|
my ($self, $link)=@_; |
|
229
|
0
|
|
|
|
|
|
my $hstr=$self->{"hstr"}; |
|
230
|
|
|
|
|
|
|
|
|
231
|
0
|
|
|
|
|
|
$hstr -> DL; |
|
232
|
|
|
|
|
|
|
|
|
233
|
0
|
|
|
|
|
|
my $suggest; |
|
234
|
0
|
|
|
|
|
|
my $suggestions=$link->fix_suggestions(); |
|
235
|
0
|
0
|
|
|
|
|
if ($suggestions) { |
|
236
|
0
|
|
|
|
|
|
foreach $suggest ( @{$suggestions} ) { |
|
|
0
|
|
|
|
|
|
|
|
237
|
0
|
|
|
|
|
|
$hstr -> DT -> t("suggest:") -> _DT |
|
238
|
|
|
|
|
|
|
-> DD -> Link($suggest) -> _DD -> nl; |
|
239
|
|
|
|
|
|
|
} |
|
240
|
|
|
|
|
|
|
} |
|
241
|
0
|
|
|
|
|
|
$hstr -> _DL; |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
} |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
package HTML::Stream::LinkReport; |
|
246
|
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
our @ISA; |
|
248
|
|
|
|
|
|
|
@ISA=qw(HTML::Stream); |
|
249
|
|
|
|
|
|
|
|
|
250
|
1
|
|
|
1
|
|
7
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
38
|
|
|
251
|
1
|
|
|
1
|
|
5
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
36
|
|
|
252
|
|
|
|
|
|
|
|
|
253
|
1
|
|
|
1
|
|
5
|
use HTML::Stream; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
376
|
|
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
sub Heading { |
|
256
|
0
|
|
|
0
|
|
|
my $self=shift; |
|
257
|
0
|
|
|
|
|
|
$self->HTML |
|
258
|
|
|
|
|
|
|
->HEAD |
|
259
|
|
|
|
|
|
|
->TITLE ->t("Link Controller Report.") -> _TITLE |
|
260
|
|
|
|
|
|
|
->_HEAD |
|
261
|
|
|
|
|
|
|
->BODY |
|
262
|
|
|
|
|
|
|
->H1 ->t("Report Contents") -> _H1; |
|
263
|
0
|
|
|
|
|
|
return $self; |
|
264
|
|
|
|
|
|
|
} |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
sub Footer { |
|
267
|
0
|
|
|
0
|
|
|
my $self=shift; |
|
268
|
0
|
|
|
|
|
|
$self->_BODY |
|
269
|
|
|
|
|
|
|
->_HTML; |
|
270
|
0
|
|
|
|
|
|
return $self; |
|
271
|
|
|
|
|
|
|
} |
|
272
|
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
=head2 Link_heading |
|
274
|
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
This function simply prints a heading for a link with the url and text |
|
276
|
|
|
|
|
|
|
given as arguments. |
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=cut |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
sub Link_Heading ($$$) { |
|
281
|
0
|
|
|
0
|
|
|
my ($self, $url, $text) = @_; |
|
282
|
0
|
|
|
|
|
|
$self -> nl->H2 -> t($text . " ") |
|
283
|
|
|
|
|
|
|
-> Link($url)-> _H2->nl; |
|
284
|
|
|
|
|
|
|
} |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
=head2 page_list |
|
287
|
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
This takes a list of urls as an argument and generates a unnumbered |
|
289
|
|
|
|
|
|
|
html list consisting of those urls inside links to those urls. It is |
|
290
|
|
|
|
|
|
|
for use for refering to pages on which urls occur. |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
Obviously, if the URLs are file urls, then the machine they are being |
|
293
|
|
|
|
|
|
|
read on must be the same as the one the file urls refer to. |
|
294
|
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
=cut |
|
296
|
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
sub page_list { |
|
298
|
0
|
|
|
0
|
|
|
my $self=shift; |
|
299
|
0
|
|
|
|
|
|
my $array=shift; |
|
300
|
0
|
|
|
|
|
|
$self->UL; |
|
301
|
0
|
|
|
|
|
|
foreach (@$array) { |
|
302
|
0
|
|
|
|
|
|
$self -> LI |
|
303
|
|
|
|
|
|
|
-> Link($_) |
|
304
|
|
|
|
|
|
|
-> _LI -> nl; |
|
305
|
|
|
|
|
|
|
} |
|
306
|
0
|
|
|
|
|
|
$self->_UL; |
|
307
|
0
|
|
|
|
|
|
return $self; |
|
308
|
|
|
|
|
|
|
} |
|
309
|
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
=head2 $LR->Link() |
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
This method puts out a url inside a link refering to that url. I |
|
313
|
|
|
|
|
|
|
don't want to encourage this for general use: it's much better to use |
|
314
|
|
|
|
|
|
|
a description generally. This program, however, deals directly with |
|
315
|
|
|
|
|
|
|
links so it suits us here. |
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
=cut |
|
318
|
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
sub Link { |
|
320
|
0
|
|
|
0
|
|
|
my $self=shift; |
|
321
|
0
|
|
|
|
|
|
my $url=shift; |
|
322
|
0
|
|
|
|
|
|
$self -> A(HREF=>$url) |
|
323
|
|
|
|
|
|
|
-> t($url) |
|
324
|
|
|
|
|
|
|
-> _A ; |
|
325
|
0
|
|
|
|
|
|
return $self; |
|
326
|
|
|
|
|
|
|
} |