File Coverage

blib/lib/WWW/FetchStory/Fetcher/FictionAlley.pm

Criterion	Covered	Total	%
statement	9	104	8.6
branch	0	30	0.0
condition			n/a
subroutine	3	14	21.4
pod	11	11	100.0
total	23	159	14.4

line	stmt	bran	sub	pod	time	code
1						package WWW::FetchStory::Fetcher::FictionAlley;
2						$WWW::FetchStory::Fetcher::FictionAlley::VERSION = '0.2602';
3	1		1		132773	use strict;
	1				2
	1				35
4	1		1		3	use warnings;
	1				1
	1				57
5						=head1 NAME
6
7						WWW::FetchStory::Fetcher::FictionAlley - fetching module for WWW::FetchStory
8
9						=head1 VERSION
10
11						version 0.2602
12
13						=head1 DESCRIPTION
14
15						This is the FictionAlley story-fetching plugin for WWW::FetchStory.
16
17						=cut
18
19	1		1		4	use parent qw(WWW::FetchStory::Fetcher);
	1				4
	1				6
20
21						=head1 METHODS
22
23						=head2 new
24
25						$obj->WWW::FetchStory::Fetcher->new();
26
27						=cut
28
29						sub new {
30	0		0	1		my $class = shift;
31	0					my $self = $class->SUPER::new(@_);
32						# disable the User-Agent for FictionAlley
33						# because it blocks wget
34	0					$self->{wget} .= " --user-agent=''";
35
36	0					return ($self);
37						} # new
38
39						=head2 info
40
41						Information about the fetcher.
42
43						$info = $self->info();
44
45						=cut
46
47						sub info {
48	0		0	1		my $self = shift;
49
50	0					my $info = "(http://www.fictionalley.org/) A Harry Potter fiction archive.";
51
52	0					return $info;
53						} # info
54
55						=head2 priority
56
57						The priority of this fetcher. Fetchers with higher priority
58						get tried first. This is useful where there may be a generic
59						fetcher for a particular site, and then a more specialized fetcher
60						for particular sections of a site. For example, there may be a
61						generic FictionAlley fetcher, and then refinements for particular
62						FictionAlley community, such as the sshg_exchange community.
63						This works as either a class function or a method.
64
65						This must be overridden by the specific fetcher class.
66
67						$priority = $self->priority();
68
69						$priority = WWW::FetchStory::Fetcher::priority($class);
70
71						=cut
72
73						sub priority {
74	0		0	1		my $class = shift;
75
76	0					return 1;
77						} # priority
78
79						=head2 allow
80
81						If this fetcher can be used for the given URL, then this returns
82						true.
83						This must be overridden by the specific fetcher class.
84
85						if ($obj->allow($url))
86						{
87						....
88						}
89
90						=cut
91
92						sub allow {
93	0		0	1		my $self = shift;
94	0					my $url = shift;
95
96	0					return ($url =~ /(fictionalley\|schnoogle\.com\|astronomytower\.org\|riddikulus\.org\|thedarkarts\.org)/);
97						} # allow
98
99						=head1 Private Methods
100
101						=head2 extract_story
102
103						Extract the story-content from the fetched content.
104
105						($story, $title) = $self->extract_story(content=>$content,
106						title=>$title);
107
108						=cut
109
110						sub extract_story {
111	0		0	1		my $self = shift;
112	0					my %args = (
113						content=>'',
114						title=>'',
115						@_
116						);
117	0					my $content = $args{content};
118
119	0					my $story = '';
120	0					my $story_title = '';
121	0	0				if ($content =~ m#<title>Astronomy Tower -\s+(.*?)</title>#)
		0
		0
122						{
123	0					$story_title = $1;
124						}
125						elsif ($content =~ m#<title>\w+ -\s+(.*?)</title>#)
126						{
127	0					$story_title = $1;
128						}
129						elsif ($content =~ m#<title>(.*?)</title>#)
130						{
131	0					$story_title = $1;
132						}
133	0	0				if ($content =~ m#<!-- headerstart -->(.*?)<!-- footerstart -->#s)
134						{
135	0					$story = $1;
136						}
137	0	0				if ($story)
138						{
139	0					$story = $self->tidy_chars($story);
140						}
141						else
142						{
143	0					return $content;
144						}
145	0	0				warn "title=$story_title\n" if ($self->{verbose} > 1);
146
147	0					my $out = <<EOT;
148						<h1>$story_title</h1>
149						$story
150						EOT
151	0					return $out;
152						} # extract_story
153
154						=head2 parse_toc
155
156						Parse the table-of-contents file.
157
158						%info = $self->parse_toc(content=>$content,
159						url=>$url,
160						urls=>\@urls);
161
162						This should return a hash containing:
163
164						=over
165
166						=item chapters
167
168						An array of URLs for the chapters of the story. In the case where the
169						story only takes one page, that will be the chapter.
170						In the case where multiple URLs have been passed in, it will be those URLs.
171
172						=item title
173
174						The title of the story.
175
176						=back
177
178						It may also return additional information, such as Summary.
179
180						=cut
181
182						sub parse_toc {
183	0		0	1		my $self = shift;
184	0					my %args = (
185						url=>'',
186						content=>'',
187						@_
188						);
189
190	0					my %info = ();
191	0					my $content = $args{content};
192
193	0					my @chapters = ();
194	0					$info{url} = $args{url};
195	0					$info{title} = $self->parse_title(%args);
196	0					$info{author} = $self->parse_author(%args);
197	0					$info{summary} = $self->parse_summary(%args);
198	0					$info{characters} = $self->parse_characters(%args);
199	0					$info{universe} = 'Harry Potter';
200	0					$info{chapters} = $self->parse_chapter_urls(%args);
201
202	0					return %info;
203						} # parse_toc
204
205						=head2 parse_chapter_urls
206
207						Figure out the URLs for the chapters of this story.
208
209						=cut
210						sub parse_chapter_urls {
211	0		0	1		my $self = shift;
212	0					my %args = (
213						url=>'',
214						content=>'',
215						@_
216						);
217	0					my $content = $args{content};
218	0					my @chapters = ();
219	0	0				if (defined $args{urls})
220						{
221	0					@chapters = @{$args{urls}};
	0
222						}
223	0	0				if (@chapters == 1)
224						{
225	0					@chapters = ();
226	0					while ($content =~ m#<a href\s=\s"(http://www.fictionalley.org/authors/\w+/\w+\.html)"\sclass\s=\s*"chapterlink">#g)
227						{
228	0					my $ch_url = $1;
229	0	0				warn "chapter=$ch_url\n" if ($self->{verbose} > 1);
230	0					push @chapters, $ch_url;
231						}
232						}
233
234	0					return \@chapters;
235						} # parse_chapter_urls
236
237						=head2 parse_ch_title
238
239						Get the chapter title from the content
240
241						=cut
242						sub parse_ch_title {
243	0		0	1		my $self = shift;
244	0					my %args = (
245						content=>'',
246						@_
247						);
248
249	0					my $content = $args{content};
250	0					my $title = '';
251	0	0				if ($content =~ /$(Chapter \d+)$/si)
252						{
253	0					$title = $1;
254						}
255						else
256						{
257	0					$title = $self->parse_title(%args);
258						}
259	0					$title =~ s/<u>//ig;
260	0					$title =~ s/<\/u>//ig;
261	0					return $title;
262						} # parse_ch_title
263
264						=head2 parse_title
265
266						Get the title from the content
267
268						=cut
269						sub parse_title {
270	0		0	1		my $self = shift;
271	0					my %args = (
272						url=>'',
273						content=>'',
274						@_
275						);
276
277	0					my $content = $args{content};
278	0					my $title = '';
279	0	0				if ($content =~ m/<h1\sclass\s=\s"title"[^>]>([^<]+)\s+by\s+<a/s)
280						{
281	0					$title = $1;
282	0					$title =~ s/'/'/g;
283						}
284						else
285						{
286	0					$title = $self->SUPER::parse_title(%args);
287						}
288	0					return $title;
289						} # parse_title
290
291						=head2 parse_author
292
293						Get the author from the content
294
295						=cut
296						sub parse_author {
297	0		0	1		my $self = shift;
298	0					my %args = (
299						url=>'',
300						content=>'',
301						@_
302						);
303
304	0					my $content = $args{content};
305	0					my $author = '';
306	0	0				if ($content =~ m!"http://www.fictionalley.org/authors/\w+/">([^<]+)</a>!s)
		0
307						{
308	0					$author = $1;
309						}
310						elsif ($content =~ m/<h1\sclass\s=\s"title"[^>]>[^<]+\sby\s<a href="[^"]+">([^<]+)<\/a>/s)
311						{
312	0					$author = $1;
313						}
314						else
315						{
316	0					$author = $self->SUPER::parse_author(%args);
317						}
318	0					$author =~ s/\./ /g;
319	0					$author =~ s/\s\s+/ /g;
320	0					return $author;
321						} # parse_author
322
323						=head2 parse_summary
324
325						Get the summary from the content
326
327						=cut
328						sub parse_summary {
329	0		0	1		my $self = shift;
330	0					my %args = (
331						url=>'',
332						content=>'',
333						@_
334						);
335
336	0					my $content = $args{content};
337	0					my $summary = '';
338	0	0				if ($content =~ m{<div class="summary" align = "center">Rating: \w+<br />\s*<i>([^<]+)</i></div>}s)
		0
339						{
340	0					$summary = $1;
341						}
342						elsif ($content =~ /<div class="summary"[^>]>[\w\s]<br \/>\s<i>\s(.?)<\/i>\s<\/div>/s)
343						{
344	0					$summary = $1;
345						}
346						else
347						{
348	0					$summary = $self->SUPER::parse_summary(%args);
349						}
350	0					return $summary;
351						} # parse_summary
352
353						1; # End of WWW::FetchStory::Fetcher::FictionAlley
354						__END__