File Coverage

blib/lib/WWW/FetchStory/Fetcher/Teaspoon.pm

Criterion	Covered	Total	%
statement	9	114	7.8
branch	0	60	0.0
condition			n/a
subroutine	3	10	30.0
pod	7	7	100.0
total	19	191	9.9

line	stmt	bran	sub	pod	time	code
1						package WWW::FetchStory::Fetcher::Teaspoon;
2						$WWW::FetchStory::Fetcher::Teaspoon::VERSION = '0.2602';
3	1		1		125673	use strict;
	1				1
	1				32
4	1		1		4	use warnings;
	1				1
	1				59
5						=head1 NAME
6
7						WWW::FetchStory::Fetcher::Teaspoon - fetching module for WWW::FetchStory
8
9						=head1 VERSION
10
11						version 0.2602
12
13						=head1 DESCRIPTION
14
15						This is the Teaspoon story-fetching plugin for WWW::FetchStory.
16
17						=cut
18
19	1		1		5	use parent qw(WWW::FetchStory::Fetcher);
	1				1
	1				6
20
21						=head2 info
22
23						Information about the fetcher.
24
25						$info = $self->info();
26
27						=cut
28
29						sub info {
30	0		0	1		my $self = shift;
31
32	0					my $info = "(http://www.whofic.com) A Teaspoon And An Open Mind; a Doctor Who fiction archive.";
33
34	0					return $info;
35						} # info
36
37						=head2 priority
38
39						The priority of this fetcher. Fetchers with higher priority
40						get tried first. This is useful where there may be a generic
41						fetcher for a particular site, and then a more specialized fetcher
42						for particular sections of a site. For example, there may be a
43						generic Teaspoon fetcher, and then refinements for particular
44						Teaspoon community, such as the sshg_exchange community.
45						This works as either a class function or a method.
46
47						This must be overridden by the specific fetcher class.
48
49						$priority = $self->priority();
50
51						$priority = WWW::FetchStory::Fetcher::priority($class);
52
53						=cut
54
55						sub priority {
56	0		0	1		my $class = shift;
57
58	0					return 1;
59						} # priority
60
61						=head2 allow
62
63						If this fetcher can be used for the given URL, then this returns
64						true.
65						This must be overridden by the specific fetcher class.
66
67						if ($obj->allow($url))
68						{
69						....
70						}
71
72						=cut
73
74						sub allow {
75	0		0	1		my $self = shift;
76	0					my $url = shift;
77
78	0					return ($url =~ /whofic\.com/);
79						} # allow
80
81						=head1 Private Methods
82
83						=head2 extract_story
84
85						Extract the story-content from the fetched content.
86
87						my ($story, $title) = $self->extract_story(content=>$content,
88						title=>$title);
89
90						=cut
91
92						sub extract_story {
93	0		0	1		my $self = shift;
94	0					my %args = (
95						content=>'',
96						title=>'',
97						@_
98						);
99	0					my $content = $args{content};
100
101	0					my $user= '';
102	0					my $title = '';
103	0					my $url = '';
104	0	0				if ($content =~ m#<u><a name="top"></a>(.?) by ([\w\s])</u>#s)
105						{
106	0					$title = $1;
107	0					$user= $2;
108						}
109	0	0				warn "user=$user, title=$title\n" if ($self->{verbose} > 1);
110
111	0					my $story = '';
112	0	0				if ($content =~ m#(<strong>Summary:.*)<u>Disclaimer:</u>#s)
		0
113						{
114	0					$story = $1;
115						}
116						elsif ($content =~ m#<body[^>]>(.)</body>#s)
117						{
118	0					$story = $1;
119						}
120	0	0				if ($story)
121						{
122	0					$story = $self->tidy_chars($story);
123						}
124						else
125						{
126	0					return $content;
127						}
128
129	0					my $out = '';
130	0					$out .= "<h1>$title</h1>\n";
131	0					$out .= "<p>by $user</p>\n";
132	0					$out .= "<p>Title: $title</p>\n";
133	0					$out .= "<p>$story\n";
134	0					return ($out, $title);
135						} # extract_story
136
137						=head2 make_css
138
139						Create site-specific CSS styling.
140
141						$css = $self->make_css();
142
143						=cut
144
145						sub make_css {
146	0		0	1		my $self = shift;
147
148	0					my $out = '';
149	0					$out .= <<EOT;
150						<style type="text/css">
151						.title {
152						font-weight: bold;
153						}
154						#notes {
155						border: solid black 1px;
156						padding: 4px;
157						}
158						</style>
159						EOT
160	0					return $out;
161						} # make_css
162
163						=head2 parse_toc
164
165						Parse the table-of-contents file.
166
167						%info = $self->parse_toc(content=>$content,
168						url=>$url,
169						urls=>\@urls);
170
171						This should return a hash containing:
172
173						=over
174
175						=item chapters
176
177						An array of URLs for the chapters of the story. In the case where the
178						story only takes one page, that will be the chapter.
179						In the case where multiple URLs have been passed in, it will be those URLs.
180
181						=item title
182
183						The title of the story.
184
185						=back
186
187						It may also return additional information, such as Summary.
188
189						=cut
190
191						sub parse_toc {
192	0		0	1		my $self = shift;
193	0					my %args = (
194						url=>'',
195						content=>'',
196						@_
197						);
198
199	0					my %info = ();
200	0					my $content = $args{content};
201
202	0					my $fmt = 'http://www.whofic.com/viewstory.php?action=printable&sid=%s&textsize=0&chapter=%d';
203
204	0					$info{url} = $args{url};
205	0					my $sid='';
206	0	0				if ($args{url} =~ m#sid=(\d+)#)
207						{
208	0					$sid = $1;
209						}
210						else
211						{
212	0					return $self->SUPER::parse_toc(%args);
213						}
214	0	0				if ($content =~ m#<b>([^<]+)</b> by <a href="viewuser.php\?uid=\d+">([^<]+)</a>#s)
215						{
216	0					$info{title} = $1;
217	0					$info{author} = $2;
218						}
219						else
220						{
221	0					$info{title} = $self->parse_title(%args);
222	0					$info{author} = $self->parse_author(%args);
223						}
224						# In order to get the summary and characters,
225						# look at the "print" version of chapter 1
226	0					my $ch1_url = sprintf($fmt, $sid, 1);
227	0					my $chapter1 = $self->get_page($ch1_url);
228	0					$info{summary} = $self->parse_summary(%args,content=>$chapter1);
229
230						# the "Categories" here is which Era it is, and we can get that from the Characters
231						# So let's look at the "Genres" instead.
232	0	0				if ($chapter1 =~ m#<strong>Genres:</strong>\s*([^<]+)<br>#s)
233						{
234	0					$info{category} = $1;
235						}
236
237	0					my $characters = $self->parse_characters(%args,content=>$chapter1);
238						# Rename the characters to match a different convention
239						# and filter out things like 'Other Characters'
240						# Do it in a hash because some characters get repeated in Multi-Era stories.
241	0					my @chars = split(/,\s*/, $characters);
242	0					my %char_hash = ();
243	0					foreach my $ch (@chars)
244						{
245	0	0				if ($ch =~ /The Doctor $(\d+\w+)$/)
		0
		0
		0
		0
246						{
247	0					my $numero = $1;
248	0	0				if ($numero eq '1st')
		0
		0
		0
		0
		0
		0
		0
		0
		0
		0
249						{
250	0					$char_hash{'First Doctor'} = 1;
251						}
252						elsif ($numero eq '2nd')
253						{
254	0					$char_hash{'Second Doctor'} = 1;
255						}
256						elsif ($numero eq '3rd')
257						{
258	0					$char_hash{'Third Doctor'} = 1;
259						}
260						elsif ($numero eq '4th')
261						{
262	0					$char_hash{'Fourth Doctor'} = 1;
263						}
264						elsif ($numero eq '5th')
265						{
266	0					$char_hash{'Fifth Doctor'} = 1;
267						}
268						elsif ($numero eq '6th')
269						{
270	0					$char_hash{'Sixth Doctor'} = 1;
271						}
272						elsif ($numero eq '7th')
273						{
274	0					$char_hash{'Seventh Doctor'} = 1;
275						}
276						elsif ($numero eq '8th')
277						{
278	0					$char_hash{'Eighth Doctor'} = 1;
279						}
280						elsif ($numero eq '9th')
281						{
282	0					$char_hash{'Ninth Doctor'} = 1;
283						}
284						elsif ($numero eq '10th')
285						{
286	0					$char_hash{'Tenth Doctor'} = 1;
287						}
288						elsif ($numero eq '11th')
289						{
290	0					$char_hash{'Eleventh Doctor'} = 1;
291						}
292						}
293						elsif ($ch =~ /The (Master\|Doctor)\s$(.)$/i)
294						{
295	0					my $who = $1;
296	0					my $when = $2;
297	0	0				if ($when =~ /(:?other\|unspecified\|author.created)/i)
298						{
299	0					$char_hash{$who} = 1;
300						}
301						else
302						{
303	0					$char_hash{"$when $who"} = 1;
304						}
305						}
306						elsif ($ch =~ /Romana.*author created/i)
307						{
308	0					$char_hash{'Romana'} = 1;
309						}
310						elsif ($ch =~ /(?:Other Character\|Original Companion\|Unspecified Companion\|None)/i)
311						{
312						# skip
313						}
314						elsif ($ch =~ /^The\s(.*)/)
315						{
316	0					$char_hash{$1} = 1;
317						}
318						else
319						{
320	0					$char_hash{$ch} = 1;
321						}
322						}
323	0	0				if (%char_hash)
324						{
325	0					$info{characters} = join(', ', sort keys %char_hash);
326						}
327	0					$info{universe} = 'Doctor Who';
328	0					$info{chapters} = $self->parse_chapter_urls(%args,
329						sid=>$sid, fmt=>$fmt);
330
331	0					return %info;
332						} # parse_toc
333
334						=head2 parse_chapter_urls
335
336						Figure out the URLs for the chapters of this story.
337
338						=cut
339						sub parse_chapter_urls {
340	0		0	1		my $self = shift;
341	0					my %args = (
342						url=>'',
343						content=>'',
344						@_
345						);
346	0					my $content = $args{content};
347	0					my $sid = $args{sid};
348	0					my $fmt = $args{fmt};
349	0					my @chapters = ();
350	0	0				if (defined $args{urls})
351						{
352	0					@chapters = @{$args{urls}};
	0
353						}
354	0	0				if (@chapters == 1)
355						{
356						# fortunately Teaspoon has a sane chapter system
357	0	0				if ($content =~ m#chapter=all#s)
358						{
359	0					@chapters = ();
360	0					while ($content =~ m#<a href="viewstory.php\?sid=${sid}&chapter=(\d+)">#sg)
361						{
362	0					my $ch_num = $1;
363	0					my $ch_url = sprintf($fmt, $sid, $ch_num);
364	0	0				warn "chapter=$ch_url\n" if ($self->{verbose} > 1);
365	0					push @chapters, $ch_url;
366						}
367						}
368						else
369						{
370	0					@chapters = (sprintf($fmt, $sid, 1));
371						}
372						}
373
374	0					return \@chapters;
375						} # parse_chapter_urls
376
377						1; # End of WWW::FetchStory::Fetcher::Teaspoon
378						__END__