File Coverage

blib/lib/WWW/FetchStory/Fetcher/Owl.pm

Criterion	Covered	Total	%
statement	9	88	10.2
branch	0	22	0.0
condition			n/a
subroutine	3	12	25.0
pod	9	9	100.0
total	21	131	16.0

line	stmt	bran	sub	pod	time	code
1						package WWW::FetchStory::Fetcher::Owl;
2						$WWW::FetchStory::Fetcher::Owl::VERSION = '0.2602';
3	1		1		142619	use strict;
	1				3
	1				47
4	1		1		4	use warnings;
	1				2
	1				77
5						=head1 NAME
6
7						WWW::FetchStory::Fetcher::Owl - fetching module for WWW::FetchStory
8
9						=head1 VERSION
10
11						version 0.2602
12
13						=head1 DESCRIPTION
14
15						This is the Owl story-fetching plugin for WWW::FetchStory.
16
17						=cut
18
19	1		1		5	use parent qw(WWW::FetchStory::Fetcher);
	1				1
	1				16
20
21						=head2 info
22
23						Information about the fetcher.
24
25						$info = $self->info();
26
27						=cut
28
29						sub info {
30	0		0	1		my $self = shift;
31
32	0					my $info = "(http://owl.tauri.org/) A Harry Potter fiction archive.";
33
34	0					return $info;
35						} # info
36
37						=head2 priority
38
39						The priority of this fetcher. Fetchers with higher priority
40						get tried first. This is useful where there may be a generic
41						fetcher for a particular site, and then a more specialized fetcher
42						for particular sections of a site. For example, there may be a
43						generic Owl fetcher, and then refinements for particular
44						Owl community, such as the sshg_exchange community.
45						This works as either a class function or a method.
46
47						This must be overridden by the specific fetcher class.
48
49						$priority = $self->priority();
50
51						$priority = WWW::FetchStory::Fetcher::priority($class);
52
53						=cut
54
55						sub priority {
56	0		0	1		my $class = shift;
57
58	0					return 1;
59						} # priority
60
61						=head2 allow
62
63						If this fetcher can be used for the given URL, then this returns
64						true.
65						This must be overridden by the specific fetcher class.
66
67						if ($obj->allow($url))
68						{
69						....
70						}
71
72						=cut
73
74						sub allow {
75	0		0	1		my $self = shift;
76	0					my $url = shift;
77
78	0					return ($url =~ /owl\.tauri\.org/);
79						} # allow
80
81						=head1 Private Methods
82
83						=head2 extract_story
84
85						Extract the story-content from the fetched content.
86
87						my ($story, $title) = $self->extract_story(content=>$content,
88						title=>$title);
89
90						=cut
91
92						sub extract_story {
93	0		0	1		my $self = shift;
94	0					my %args = (
95						content=>'',
96						title=>'',
97						@_
98						);
99
100	0					my $content = $args{content};
101	0					my $story = '';
102	0					my $title = '';
103	0	0				if ($content =~ m#<title>OWL\s::\s([^<]+)</title>#)
104						{
105	0					$title = $1;
106						}
107						else
108						{
109	0					$title = $args{title};
110						}
111
112	0	0				if ($content =~ m#<div class=pagehead></div>(.*?)<div align=left><span class=credits>#s)
		0
113						{
114	0					$story = $1;
115						}
116						elsif ($content =~ m#<body[^>]>(.)</body>#is)
117						{
118	0					$story = $1;
119						}
120
121	0	0				if ($story)
122						{
123	0					$story = $self->tidy_chars($story);
124						}
125						else
126						{
127	0					$story = $content;
128						}
129
130	0					return ($story, $title);
131						} # extract_story
132
133						=head2 parse_toc
134
135						Parse the table-of-contents file.
136
137						%info = $self->parse_toc(content=>$content,
138						url=>$url,
139						urls=>\@urls);
140
141						This should return a hash containing:
142
143						=over
144
145						=item chapters
146
147						An array of URLs for the chapters of the story. In the case where the
148						story only takes one page, that will be the chapter.
149						In the case where multiple URLs have been passed in, it will be those URLs.
150
151						=item title
152
153						The title of the story.
154
155						=back
156
157						It may also return additional information, such as Summary.
158
159						=cut
160
161						sub parse_toc {
162	0		0	1		my $self = shift;
163	0					my %args = (
164						url=>'',
165						content=>'',
166						@_
167						);
168
169	0					my %info = ();
170	0					my $content = $args{content};
171	0					$info{url} = $args{url};
172	0					my $sid='';
173	0	0				if ($args{url} =~ m#psid=(\d+)#)
174						{
175	0					$sid = $1;
176						}
177						else
178						{
179	0					return $self->SUPER::parse_toc(%args);
180						}
181
182	0					$info{title} = $self->parse_title(%args);
183	0					$info{author} = $self->parse_author(%args);
184	0					$info{summary} = $self->parse_summary(%args);
185	0					$info{characters} = $self->parse_characters(%args);
186	0					$info{universe} = 'Harry Potter';
187	0					$info{chapters} = $self->parse_chapter_urls(%args, sid=>$sid);
188
189	0					return %info;
190						} # parse_toc
191
192						=head2 parse_chapter_urls
193
194						Figure out the URLs for the chapters of this story.
195
196						=cut
197						sub parse_chapter_urls {
198	0		0	1		my $self = shift;
199	0					my %args = (
200						url=>'',
201						content=>'',
202						@_
203						);
204	0					my $content = $args{content};
205	0					my $sid = $args{sid};
206	0					my @chapters = ();
207	0	0				if (defined $args{urls})
208						{
209	0					@chapters = @{$args{urls}};
	0
210						}
211	0	0				if (@chapters == 1)
212						{
213	0					@chapters = ();
214						# Owl does not have a sane chapter system
215	0					my $fmt = 'http://owl.tauri.org/stories.php?sid=%d&action=print';
216	0					while ($content =~ m#stories.php\?sid=(\d+)#sg)
217						{
218	0					my $ch_sid = $1;
219	0					my $ch_url = sprintf($fmt, $ch_sid);
220	0	0				warn "chapter=$ch_url\n" if ($self->{verbose} > 1);
221	0					push @chapters, $ch_url;
222						}
223						}
224
225	0					return \@chapters;
226						} # parse_chapter_urls
227
228						=head2 parse_title
229
230						Get the title from the content
231
232						=cut
233						sub parse_title {
234	0		0	1		my $self = shift;
235	0					my %args = (
236						url=>'',
237						content=>'',
238						@_
239						);
240
241	0					my $content = $args{content};
242	0					my $title = $self->SUPER::parse_title(%args);
243	0	0				if ($title =~ m#OWL\s::\s([^<]+)#)
244						{
245	0					$title = $1;
246						}
247	0					return $title;
248						} # parse_title
249
250						=head2 parse_author
251
252						Get the author from the content
253
254						=cut
255						sub parse_author {
256	0		0	1		my $self = shift;
257	0					my %args = (
258						url=>'',
259						content=>'',
260						@_
261						);
262
263	0					my $content = $args{content};
264	0					my $author = '';
265	0	0				if ($content =~ m#by <a href="users.php\?uid=\d+">([^<]+)</a>#s)
266						{
267	0					$author = $1;
268						}
269						else
270						{
271	0					$author = $self->SUPER::parse_author(%args);
272						}
273	0					return $author;
274						} # parse_author
275
276						=head2 parse_summary
277
278						Get the summary from the content
279
280						=cut
281						sub parse_summary {
282	0		0	1		my $self = shift;
283	0					my %args = (
284						url=>'',
285						content=>'',
286						@_
287						);
288
289	0					my $content = $args{content};
290	0					my $summary = '';
291	0	0				if ($content =~ m#<span class=summary>([^<]+)</span>#s)
292						{
293	0					$summary = $1;
294						}
295						else
296						{
297	0					$summary = $self->SUPER::parse_summary(%args);
298						}
299	0					return $summary;
300						} # parse_summary
301
302						1; # End of WWW::FetchStory::Fetcher::Owl
303						__END__