File Coverage

blib/lib/WWW/FetchStory/Fetcher/PotterPlace.pm

Criterion	Covered	Total	%
statement	9	88	10.2
branch	0	18	0.0
condition			n/a
subroutine	3	12	25.0
pod	9	9	100.0
total	21	127	16.5

line	stmt	bran	sub	pod	time	code
1						package WWW::FetchStory::Fetcher::PotterPlace;
2						$WWW::FetchStory::Fetcher::PotterPlace::VERSION = '0.2602';
3	1		1		217207	use strict;
	1				2
	1				42
4	1		1		6	use warnings;
	1				2
	1				81
5						=head1 NAME
6
7						WWW::FetchStory::Fetcher::PotterPlace - fetching module for WWW::FetchStory
8
9						=head1 VERSION
10
11						version 0.2602
12
13						=head1 DESCRIPTION
14
15						This is the PotterPlace story-fetching plugin for WWW::FetchStory.
16
17						=cut
18
19	1		1		6	use parent qw(WWW::FetchStory::Fetcher);
	1				2
	1				8
20
21						=head2 info
22
23						Information about the fetcher.
24
25						$info = $self->info();
26
27						=cut
28
29						sub info {
30	0		0	1		my $self = shift;
31
32	0					my $info = "(http://www.potterplacearchives.com) A Harry Potter fiction archive.";
33
34	0					return $info;
35						} # info
36
37						=head2 priority
38
39						The priority of this fetcher. Fetchers with higher priority
40						get tried first. This is useful where there may be a generic
41						fetcher for a particular site, and then a more specialized fetcher
42						for particular sections of a site. For example, there may be a
43						generic PotterPlace fetcher, and then refinements for particular
44						PotterPlace community, such as the sshg_exchange community.
45						This works as either a class function or a method.
46
47						This must be overridden by the specific fetcher class.
48
49						$priority = $self->priority();
50
51						$priority = WWW::FetchStory::Fetcher::priority($class);
52
53						=cut
54
55						sub priority {
56	0		0	1		my $class = shift;
57
58	0					return 1;
59						} # priority
60
61						=head2 allow
62
63						If this fetcher can be used for the given URL, then this returns
64						true.
65						This must be overridden by the specific fetcher class.
66
67						if ($obj->allow($url))
68						{
69						....
70						}
71
72						=cut
73
74						sub allow {
75	0		0	1		my $self = shift;
76	0					my $url = shift;
77
78	0					return ($url =~ /potterplacearchives\.com/);
79						} # allow
80
81						=head1 Private Methods
82
83						=head2 parse_toc
84
85						Parse the table-of-contents file.
86
87						%info = $self->parse_toc(content=>$content,
88						url=>$url,
89						urls=>\@urls);
90
91						This should return a hash containing:
92
93						=over
94
95						=item chapters
96
97						An array of URLs for the chapters of the story. In the case where the
98						story only takes one page, that will be the chapter.
99						In the case where multiple URLs have been passed in, it will be those URLs.
100
101						=item title
102
103						The title of the story.
104
105						=back
106
107						It may also return additional information, such as Summary.
108
109						=cut
110
111						sub parse_toc {
112	0		0	1		my $self = shift;
113	0					my %args = (
114						url=>'',
115						content=>'',
116						@_
117						);
118
119	0					my %info = ();
120	0					my $content = $args{content};
121
122	0					$info{url} = $args{url};
123	0					my $sid='';
124	0	0				if ($args{url} =~ m#sid=(\d+)#)
125						{
126	0					$sid = $1;
127						}
128						else
129						{
130	0					return $self->SUPER::parse_toc(%args);
131						}
132	0					$info{title} = $self->parse_title(%args);
133	0					$info{author} = $self->parse_author(%args);
134	0					$info{summary} = $self->parse_summary(%args);
135	0					$info{characters} = $self->parse_characters(%args);
136	0					$info{universe} = 'Harry Potter';
137	0					$info{chapters} = $self->parse_chapter_urls(%args, sid=>$sid);
138
139	0					return %info;
140						} # parse_toc
141
142						=head2 parse_chapter_urls
143
144						Figure out the URLs for the chapters of this story.
145
146						=cut
147						sub parse_chapter_urls {
148	0		0	1		my $self = shift;
149	0					my %args = (
150						url=>'',
151						content=>'',
152						@_
153						);
154	0					my $content = $args{content};
155	0					my $sid = $args{sid};
156	0					my @chapters = ();
157	0	0				if (defined $args{urls})
158						{
159	0					@chapters = @{$args{urls}};
	0
160						}
161	0	0				if (@chapters == 1)
162						{
163						# fortunately Potter Place has a sane chapter system
164	0					my $fmt = 'http://www.potterplacearchives.com/viewstory.php?action=printable&textsize=0&sid=%d&chapter=%d';
165	0	0				if ($content =~ m#<span class="label">Chapters:\s</span>\s(\d+)#s)
166						{
167	0					@chapters = ();
168	0					my $num_ch = $1;
169	0					for (my $i=1; $i <= $num_ch; $i++)
170						{
171	0					my $ch_url = sprintf($fmt, $sid, $i);
172	0	0				warn "chapter=$ch_url\n" if ($self->{verbose} > 1);
173	0					push @chapters, $ch_url;
174						}
175						}
176						}
177
178	0					return \@chapters;
179						} # parse_chapter_urls
180
181						=head2 parse_title
182
183						Get the title from the content
184
185						=cut
186						sub parse_title {
187	0		0	1		my $self = shift;
188	0					my %args = (
189						url=>'',
190						content=>'',
191						@_
192						);
193
194	0					my $content = $args{content};
195	0					my $title = '';
196	0	0				if ($content =~ m#<div id="pagetitle"><a href="viewstory.php\?sid=\d+">([^<]+)</a>#s)
197						{
198	0					$title = $1;
199						}
200						else
201						{
202	0					$title = $self->SUPER::parse_title(%args);
203						}
204	0					return $title;
205						} # parse_title
206
207						=head2 parse_author
208
209						Get the author from the content
210
211						=cut
212						sub parse_author {
213	0		0	1		my $self = shift;
214	0					my %args = (
215						url=>'',
216						content=>'',
217						@_
218						);
219
220	0					my $content = $args{content};
221	0					my $author = '';
222	0	0				if ($content =~ m#\sby\s<a href="viewuser.php\?uid=\d+">([^<]+)</a></div>#s)
223						{
224	0					$author = $1;
225						}
226						else
227						{
228	0					$author = $self->SUPER::parse_author(%args);
229						}
230	0					return $author;
231						} # parse_author
232
233						=head2 parse_summary
234
235						Get the summary from the content
236
237						=cut
238						sub parse_summary {
239	0		0	1		my $self = shift;
240	0					my %args = (
241						url=>'',
242						content=>'',
243						@_
244						);
245
246	0					my $content = $args{content};
247	0					my $summary = '';
248	0	0				if ($content =~ m#<span class="label">Summary:\s</span>\s(.?)\s<br#s)
249						{
250	0					$summary = $1;
251						}
252						else
253						{
254	0					$summary = $self->SUPER::parse_summary(%args);
255						}
256	0					return $summary;
257						} # parse_summary
258
259						=head2 parse_characters
260
261						Get the characters from the content
262
263						=cut
264						sub parse_characters {
265	0		0	1		my $self = shift;
266	0					my %args = (
267						url=>'',
268						content=>'',
269						@_
270						);
271
272	0					my $content = $args{content};
273	0					my $characters = '';
274	0	0				if ($content =~ m#<span class="label">Characters:\s</span>(.?)<br#s)
275						{
276	0					my $chars_str = $1;
277	0					my @chars = ();
278	0					while ($chars_str =~ m#charid=\d*'>([^<]+)</a>#sg)
279						{
280	0					my $character = $1;
281	0					push @chars, $character;
282						}
283	0					$characters = join(', ', @chars);
284						}
285						else
286						{
287	0					$characters = $self->SUPER::parse_characters(%args);
288						}
289	0					return $characters;
290						} # parse_characters
291
292						1; # End of WWW::FetchStory::Fetcher::PotterPlace
293						__END__