File Coverage

blib/lib/WWW/FetchStory/Fetcher/TardisBigBang3.pm

Criterion	Covered	Total	%
statement	9	87	10.3
branch	0	22	0.0
condition			n/a
subroutine	3	11	27.2
pod	8	8	100.0
total	20	128	15.6

line	stmt	bran	sub	pod	time	code
1						package WWW::FetchStory::Fetcher::TardisBigBang3;
2						$WWW::FetchStory::Fetcher::TardisBigBang3::VERSION = '0.2602';
3	1		1		148948	use strict;
	1				3
	1				41
4	1		1		6	use warnings;
	1				2
	1				72
5						=head1 NAME
6
7						WWW::FetchStory::Fetcher::TardisBigBang3 - fetching module for WWW::FetchStory
8
9						=head1 VERSION
10
11						version 0.2602
12
13						=head1 DESCRIPTION
14
15						This is the TardisBigBang3 story-fetching plugin for WWW::FetchStory.
16
17						=cut
18
19	1		1		10	use parent qw(WWW::FetchStory::Fetcher);
	1				2
	1				8
20
21						=head2 info
22
23						Information about the fetcher.
24
25						$info = $self->info();
26
27						=cut
28
29						sub info {
30	0		0	1		my $self = shift;
31
32	0					my $info = "(http://www.tardisbigbang.com/Round3/) Round 3 of the TARDIS BigBang challenge.";
33
34	0					return $info;
35						} # info
36
37						=head2 priority
38
39						The priority of this fetcher. Fetchers with higher priority
40						get tried first. This is useful where there may be a generic
41						fetcher for a particular site, and then a more specialized fetcher
42						for particular sections of a site.
43
44						This must be overridden by the specific fetcher class.
45
46						$priority = $self->priority();
47
48						$priority = WWW::FetchStory::Fetcher::priority($class);
49
50						=cut
51
52						sub priority {
53	0		0	1		my $class = shift;
54
55	0					return 1;
56						} # priority
57
58						=head2 allow
59
60						If this fetcher can be used for the given URL, then this returns
61						true.
62						This must be overridden by the specific fetcher class.
63
64						if ($obj->allow($url))
65						{
66						....
67						}
68
69						=cut
70
71						sub allow {
72	0		0	1		my $self = shift;
73	0					my $url = shift;
74
75	0					return ($url =~ /www\.tardisbigbang\.com\/Round3/);
76						} # allow
77
78						=head1 Private Methods
79
80						=head2 extract_story
81
82						Extract the story-content from the fetched content.
83
84						my ($story, $title) = $self->extract_story(content=>$content,
85						title=>$title);
86
87						=cut
88
89						sub extract_story {
90	0		0	1		my $self = shift;
91	0					my %args = (
92						content=>'',
93						title=>'',
94						@_
95						);
96
97	0					my $content = $args{content};
98	0					my $title = $args{title};
99	0					my $story = '';
100	0	0				if ($content =~ m#<div class="main">(.?)</div>\s<p class="bottomcomment">#s)
		0
101						{
102	0					$story = $1;
103						}
104						elsif ($content =~ m#<body[^>]>(.)</body>#is)
105						{
106	0					$story = $1;
107						}
108
109	0	0				if ($story)
110						{
111	0					$story = $self->tidy_chars($story);
112						}
113						else
114						{
115	0					$story = $content;
116						}
117
118	0					return ($story, $title);
119						} # extract_story
120
121						=head2 parse_toc
122
123						Parse the table-of-contents file.
124
125						%info = $self->parse_toc(content=>$content,
126						url=>$url,
127						urls=>\@urls);
128
129						This should return a hash containing:
130
131						=over
132
133						=item chapters
134
135						An array of URLs for the chapters of the story. In the case where the
136						story only takes one page, that will be the chapter.
137						In the case where multiple URLs have been passed in, it will be those URLs.
138
139						=item title
140
141						The title of the story.
142
143						=back
144
145						It may also return additional information, such as Summary.
146
147						=cut
148
149						sub parse_toc {
150	0		0	1		my $self = shift;
151	0					my %args = (
152						url=>'',
153						content=>'',
154						@_
155						);
156
157	0					my %info = ();
158	0					my $content = $args{content};
159
160	0					my @chapters = ();
161
162	0					$info{url} = $args{url};
163	0					my $sid='';
164	0	0				if ($args{url} =~ m#storyID=(S\d+)#)
165						{
166	0					$sid = $1;
167						}
168						else
169						{
170	0					return $self->SUPER::parse_toc(%args);
171						}
172	0					$info{author} = $self->parse_author(%args);
173	0					$info{title} = $self->parse_title(%args);
174	0					$info{summary} = $self->parse_summary(%args);
175	0	0				if ($content =~ m#<span class="storyinfo">([\w\s]+) \\| ([\w-]+) \\| (.*?) \\| ([\d,]+) words</span>#)
176						{
177	0					$info{universe} = $1;
178	0					$info{rating} = $2;
179	0					$info{summary2} = $3;
180	0					$info{size} = $4;
181
182	0					$info{size} =~ s/,//g;
183	0					$info{size} .= 'w';
184	0					$info{universe} =~ s/New Who/Doctor Who/;
185						}
186						else
187						{
188	0					$info{universe} = 'Doctor Who';
189						}
190	0					$info{chapters} = $self->parse_chapter_urls(%args, sid=>$sid);
191
192	0					return %info;
193						} # parse_toc
194
195						=head2 parse_chapter_urls
196
197						Figure out the URLs for the chapters of this story.
198
199						=cut
200						sub parse_chapter_urls {
201	0		0	1		my $self = shift;
202	0					my %args = (
203						url=>'',
204						content=>'',
205						@_
206						);
207	0					my $content = $args{content};
208	0					my $sid = $args{sid};
209	0					my @chapters = ();
210	0	0				if (defined $args{urls})
211						{
212	0					@chapters = @{$args{urls}};
	0
213						}
214	0	0				if (@chapters == 1)
215						{
216	0	0				if ($content =~ m#part=2#)
217						{
218	0					my $fmt = $args{url};
219	0					$fmt =~ s/part=\d+/part=\%d/;
220	0					while ($content =~ m#storyID=${sid}\&part=(\d+)">Part#sg)
221						{
222	0					my $ch_num = $1;
223	0					my $ch_url = sprintf($fmt, $ch_num);
224	0	0				warn "chapter=$ch_url\n" if ($self->{verbose} > 1);
225	0					push @chapters, $ch_url;
226						}
227						}
228						}
229
230	0					return \@chapters;
231						} # parse_chapter_urls
232
233						=head2 parse_author
234
235						Get the author from the content
236
237						=cut
238						sub parse_author {
239	0		0	1		my $self = shift;
240	0					my %args = (
241						url=>'',
242						content=>'',
243						@_
244						);
245
246	0					my $content = $args{content};
247	0					my $author = '';
248	0	0				if ($content =~ m#<p id="authorinfo">by <strong>([^<]+)</strong>#)
249						{
250	0					$author = $1;
251						}
252						else
253						{
254	0					$author = $self->SUPER::parse_author(%args);
255						}
256	0					return $author;
257						} # parse_author
258
259						=head2 parse_summary
260
261						Get the summary from the content
262
263						=cut
264						sub parse_summary {
265	0		0	1		my $self = shift;
266	0					my %args = (
267						url=>'',
268						content=>'',
269						@_
270						);
271
272	0					my $content = $args{content};
273	0					my $summary = '';
274	0	0				if ($content =~ m#<p class="summary">(.*?)</p>#)
275						{
276	0					$summary = $1;
277						}
278						else
279						{
280	0					$summary = $self->SUPER::parse_summary(%args);
281						}
282	0					return $summary;
283						} # parse_summary
284
285						1; # End of WWW::FetchStory::Fetcher::TardisBigBang3
286						__END__