File Coverage

blib/lib/WWW/FetchStory/Fetcher/RestrictedSection.pm

Criterion	Covered	Total	%
statement	9	94	9.5
branch	0	30	0.0
condition			n/a
subroutine	3	12	25.0
pod	9	9	100.0
total	21	145	14.4

line	stmt	bran	sub	pod	time	code
1						package WWW::FetchStory::Fetcher::RestrictedSection;
2						$WWW::FetchStory::Fetcher::RestrictedSection::VERSION = '0.2602';
3	1		1		212244	use strict;
	1				2
	1				45
4	1		1		7	use warnings;
	1				3
	1				85
5						=head1 NAME
6
7						WWW::FetchStory::Fetcher::RestrictedSection - fetching module for WWW::FetchStory
8
9						=head1 VERSION
10
11						version 0.2602
12
13						=head1 DESCRIPTION
14
15						This is the RestrictedSection story-fetching plugin for WWW::FetchStory.
16
17						=cut
18
19	1		1		39	use parent qw(WWW::FetchStory::Fetcher);
	1				3
	1				8
20
21						=head1 METHODS
22
23						=head2 info
24
25						Information about the fetcher.
26
27						$info = $self->info();
28
29						=cut
30
31						sub info {
32	0		0	1		my $self = shift;
33
34	0					my $info = "(http://restrictedsection.org) An adult Harry Potter fiction archive.";
35
36	0					return $info;
37						} # info
38
39						=head2 priority
40
41						The priority of this fetcher. Fetchers with higher priority
42						get tried first. This is useful where there may be a generic
43						fetcher for a particular site, and then a more specialized fetcher
44						for particular sections of a site.
45
46						This must be overridden by the specific fetcher class.
47
48						$priority = $self->priority();
49
50						$priority = WWW::FetchStory::Fetcher::priority($class);
51
52						=cut
53
54						sub priority {
55	0		0	1		my $class = shift;
56
57	0					return 2;
58						} # priority
59
60						=head2 allow
61
62						If this fetcher can be used for the given URL, then this returns
63						true.
64						This must be overridden by the specific fetcher class.
65
66						if ($obj->allow($url))
67						{
68						....
69						}
70
71						=cut
72
73						sub allow {
74	0		0	1		my $self = shift;
75	0					my $url = shift;
76
77	0					return ($url =~ /restrictedsection\.org/);
78						} # allow
79
80						=head1 Private Methods
81
82						=head2 extract_story
83
84						Extract the story-content from the fetched content.
85
86						my ($story, $title) = $self->extract_story(content=>$content,
87						title=>$title);
88
89						=cut
90
91						sub extract_story {
92	0		0	1		my $self = shift;
93	0					my %args = (
94						content=>'',
95						title=>'',
96						@_
97						);
98	0					my $content = $args{content};
99
100	0					my $title = $args{title};
101
102	0					my $chapter = $self->parse_ch_title(%args);
103	0	0				warn "chapter=$chapter\n" if ($self->{verbose} > 1);
104
105	0					my $story = '';
106	0	0				if ($content =~ m#<td id="page_content">(.?)</td></tr>\s<tr class="inverse" id="page_footer">#s)
107						{
108	0					$story = $1;
109						}
110
111	0	0				if ($story)
112						{
113	0					$story = $self->tidy_chars($story);
114						}
115						else
116						{
117	0					die "Failed to extract story for $title";
118						}
119
120	0					my $story_title = "$title: $chapter";
121	0	0				$story_title = $title if ($title eq $chapter);
122	0	0				$story_title = $title if ($chapter eq '');
123
124	0					return ($story, $story_title);
125						} # extract_story
126
127						=head2 parse_toc
128
129						Parse the table-of-contents file.
130
131						%info = $self->parse_toc(content=>$content,
132						url=>$url,
133						urls=>\@urls);
134
135						This should return a hash containing:
136
137						=over
138
139						=item chapters
140
141						An array of URLs for the chapters of the story. In the case where the
142						story only takes one page, that will be the chapter.
143						In the case where multiple URLs have been passed in, it will be those URLs.
144
145						=item title
146
147						The title of the story.
148
149						=back
150
151						It may also return additional information, such as Summary.
152
153						=cut
154
155						sub parse_toc {
156	0		0	1		my $self = shift;
157	0					my %args = (
158						url=>'',
159						content=>'',
160						@_
161						);
162
163	0					my %info = ();
164	0					my $content = $args{content};
165
166	0					$info{url} = $args{url};
167	0					my $sid='';
168	0	0				if ($args{url} =~ m#file=(\d+)#)
		0
169						{
170	0					$sid = $1;
171						}
172						elsif ($args{url} =~ m#story=(\d+)#)
173						{
174	0					$sid = $1;
175						}
176						else
177						{
178	0					return $self->SUPER::parse_toc(%args);
179						}
180	0					$info{title} = $self->tidy_chars($self->parse_title(%args));
181	0					$info{author} = $self->parse_author(%args);
182	0					$info{summary} = $self->parse_summary(%args);
183	0					$info{characters} = $self->parse_characters(%args);
184	0					$info{universe} = 'Harry Potter';
185	0					$info{category} = $self->parse_category(%args);
186	0					$info{rating} = 'Adult';
187	0					$info{chapters} = $self->parse_chapter_urls(%args, sid=>$sid);
188
189	0					return %info;
190						} # parse_toc
191
192						=head2 parse_chapter_urls
193
194						Figure out the URLs for the chapters of this story.
195
196						=cut
197						sub parse_chapter_urls {
198	0		0	1		my $self = shift;
199	0					my %args = (
200						url=>'',
201						content=>'',
202						@_
203						);
204	0					my $content = $args{content};
205	0					my $sid = $args{sid};
206	0					my @chapters = ();
207	0	0				if (defined $args{urls})
208						{
209	0					@chapters = @{$args{urls}};
	0
210						}
211	0	0				if (@chapters == 1)
212						{
213	0	0				if ($args{url} =~ /file.php/) # a single file
214						{
215	0					@chapters = ($args{url});
216						}
217						else
218						{
219	0					@chapters = ();
220	0					my $fmt = 'http://www.restrictedsection.org/file.php?file=%d';
221	0					while ($content =~ m#file\.php\?file=(\d+)'#gs)
222						{
223	0					my $ch = $1;
224	0					my $ch_url = sprintf($fmt, $ch);
225	0	0				warn "chapter=$ch_url\n" if ($self->{verbose} > 1);
226	0					push @chapters, $ch_url;
227						}
228						}
229						}
230
231	0					return \@chapters;
232						} # parse_chapter_urls
233
234						=head2 parse_title
235
236						Get the title from the content
237
238						=cut
239						sub parse_title {
240	0		0	1		my $self = shift;
241	0					my %args = (
242						url=>'',
243						content=>'',
244						@_
245						);
246
247	0					my $content = $args{content};
248	0					my $title = '';
249	0	0				if ($content =~ m#<a href="story\.php\?story=\d+">([^<]+)</a>#m)
		0
		0
250						{
251	0					$title = $1;
252						}
253						elsif ($content =~ m#<title>\sRestrictedSection\.org\s-\sStory Info\s-\s([^<]+)\s</title>#is)
254						{
255	0					$title = $1;
256						}
257						elsif ($content =~ m#<title>\sRestrictedSection\.org\s-\s([^<]+)\s</title>#is)
258						{
259	0					$title = $1;
260						}
261						else
262						{
263	0					$title = $self->SUPER::parse_title(%args);
264						}
265	0					return $title;
266						} # parse_title
267
268						=head2 parse_author
269
270						Get the author from the content
271
272						=cut
273						sub parse_author {
274	0		0	1		my $self = shift;
275	0					my %args = (
276						url=>'',
277						content=>'',
278						@_
279						);
280
281	0					my $content = $args{content};
282	0					my $author = '';
283	0	0				if ($content =~ m#<a href="author\.php\?author=\d+">([^<]+)</a>#)
284						{
285	0					$author = $1;
286						}
287						else
288						{
289	0					$author = $self->SUPER::parse_author(%args);
290						}
291	0					return $author;
292						} # parse_author
293
294						=head2 parse_ch_title
295
296						Get the chapter title from the content
297
298						=cut
299						sub parse_ch_title {
300	0		0	1		my $self = shift;
301	0					my %args = (
302						url=>'',
303						content=>'',
304						@_
305						);
306
307	0					my $title = $self->SUPER::parse_ch_title(%args);
308	0					$title = $self->tidy_chars($title);
309	0					return $title;
310						} # parse_ch_title
311
312						1; # End of WWW::FetchStory::Fetcher::RestrictedSection
313						__END__