blib/lib/WWW/Scraper/ISBN/Yahoo_Driver.pm | |||
---|---|---|---|
Criterion | Covered | Total | % |
statement | 37 | 68 | 54.4 |
branch | 1 | 16 | 6.2 |
condition | 6 | 30 | 20.0 |
subroutine | 9 | 9 | 100.0 |
pod | 1 | 1 | 100.0 |
total | 54 | 124 | 43.5 |
line | stmt | bran | cond | sub | pod | time | code | ||
---|---|---|---|---|---|---|---|---|---|
1 | package WWW::Scraper::ISBN::Yahoo_Driver; | ||||||||
2 | |||||||||
3 | 6 | 6 | 179167 | use strict; | |||||
6 | 23 | ||||||||
6 | 420 | ||||||||
4 | 6 | 6 | 47 | use warnings; | |||||
6 | 203 | ||||||||
6 | 468 | ||||||||
5 | |||||||||
6 | 6 | 6 | 36 | use vars qw($VERSION @ISA); | |||||
6 | 16 | ||||||||
6 | 721 | ||||||||
7 | $VERSION = '0.21'; | ||||||||
8 | |||||||||
9 | #-------------------------------------------------------------------------- | ||||||||
10 | |||||||||
11 | =head1 NAME | ||||||||
12 | |||||||||
13 | WWW::Scraper::ISBN::Yahoo_Driver - Search driver for Yahoo Books online catalog. | ||||||||
14 | |||||||||
15 | =head1 SYNOPSIS | ||||||||
16 | |||||||||
17 | See parent class documentation (L |
||||||||
18 | |||||||||
19 | =head1 DESCRIPTION | ||||||||
20 | |||||||||
21 | Searches for book information from the Yahoo Books online catalog. | ||||||||
22 | |||||||||
23 | =cut | ||||||||
24 | |||||||||
25 | #-------------------------------------------------------------------------- | ||||||||
26 | |||||||||
27 | ########################################################################### | ||||||||
28 | # Inheritence | ||||||||
29 | |||||||||
30 | 6 | 6 | 44 | use base qw(WWW::Scraper::ISBN::Driver); | |||||
6 | 17 | ||||||||
6 | 6697 | ||||||||
31 | |||||||||
32 | ########################################################################### | ||||||||
33 | # Modules | ||||||||
34 | |||||||||
35 | 6 | 6 | 6857 | use WWW::Scraper::ISBN::Driver; | |||||
6 | 14 | ||||||||
6 | 262 | ||||||||
36 | 6 | 6 | 12329 | use WWW::Mechanize; | |||||
6 | 1334264 | ||||||||
6 | 298 | ||||||||
37 | |||||||||
38 | ########################################################################### | ||||||||
39 | # Constants | ||||||||
40 | |||||||||
41 | 6 | 6 | 72 | use constant YAHOO => 'http://shopping.yahoo.com'; | |||||
6 | 9 | ||||||||
6 | 460 | ||||||||
42 | 6 | 6 | 35 | use constant SEARCH => 'http://shopping.yahoo.com/search?p='; | |||||
6 | 13 | ||||||||
6 | 5762 | ||||||||
43 | |||||||||
44 | #-------------------------------------------------------------------------- | ||||||||
45 | |||||||||
46 | ########################################################################### | ||||||||
47 | # Public Interface | ||||||||
48 | |||||||||
49 | =head1 METHODS | ||||||||
50 | |||||||||
51 | =over 4 | ||||||||
52 | |||||||||
53 | =item C |
||||||||
54 | |||||||||
55 | Creates a query string, then passes the appropriate form fields to the | ||||||||
56 | Yahoo Books server. | ||||||||
57 | |||||||||
58 | The returned page should be the correct catalog page for that ISBN. If not the | ||||||||
59 | function returns zero and allows the next driver in the chain to have a go. If | ||||||||
60 | a valid page is returned, the following fields are returned via the book hash: | ||||||||
61 | |||||||||
62 | isbn (now returns isbn13) | ||||||||
63 | isbn10 (no longer provided by Yahoo on page) | ||||||||
64 | isbn13 | ||||||||
65 | ean13 (industry name) | ||||||||
66 | title | ||||||||
67 | author | ||||||||
68 | pubdate (no longer provided by Yahoo on page) | ||||||||
69 | publisher | ||||||||
70 | book_link | ||||||||
71 | image_link | ||||||||
72 | thumb_link (same as image_link) | ||||||||
73 | description | ||||||||
74 | binding (if known) | ||||||||
75 | pages (no longer provided by Yahoo on page) | ||||||||
76 | weight (no longer provided by Yahoo on page) | ||||||||
77 | width (no longer provided by Yahoo on page) | ||||||||
78 | height (no longer provided by Yahoo on page) | ||||||||
79 | |||||||||
80 | The book_link and image_link refer back to the Yahoo Books website. | ||||||||
81 | |||||||||
82 | =back | ||||||||
83 | |||||||||
84 | =cut | ||||||||
85 | |||||||||
86 | sub search { | ||||||||
87 | 1 | 1 | 1 | 421 | my $self = shift; | ||||
88 | 1 | 3 | my $isbn = shift; | ||||||
89 | 1 | 10 | $self->found(0); | ||||||
90 | 1 | 35 | $self->book(undef); | ||||||
91 | 1 | 17 | my $data = {}; | ||||||
92 | |||||||||
93 | # validate and convert into EAN13 format | ||||||||
94 | 1 | 10 | my $ean = $self->convert_to_ean13($isbn); | ||||||
95 | 1 | 50 | 33 | 76 | return $self->handler("Invalid ISBN specified [$isbn]") | ||||
33 | |||||||||
33 | |||||||||
33 | |||||||||
96 | if(!$ean || (length $isbn == 13 && $isbn ne $ean) | ||||||||
97 | || (length $isbn == 10 && $isbn ne $self->convert_to_isbn10($ean))); | ||||||||
98 | 1 | 47 | $isbn = $ean; | ||||||
99 | |||||||||
100 | 1 | 7 | my $mech = WWW::Mechanize->new; | ||||||
101 | 1 | 20284 | $mech->agent_alias( 'Linux Mozilla' ); | ||||||
102 | |||||||||
103 | 1 | 81 | eval { $mech->get( YAHOO . '/books' ) }; | ||||||
1 | 6 | ||||||||
104 | 1 | 0 | 33 | 352808 | return $self->handler("Yahoo! book website appears to be unavailable.") | ||||
33 | |||||||||
105 | if($@ || !$mech->success() || !$mech->content()); | ||||||||
106 | |||||||||
107 | 0 | eval { $mech->get( SEARCH . $isbn ) }; | |||||||
0 | |||||||||
108 | 0 | 0 | 0 | return $self->handler("Yahoo! book search website appears to be unavailable.") | |||||
0 | |||||||||
109 | if($@ || !$mech->success() || !$mech->content()); | ||||||||
110 | |||||||||
111 | |||||||||
112 | # The Results page | ||||||||
113 | 0 | my $content = $mech->content(); | |||||||
114 | #print STDERR "\n# results=[\n$content\n]\n"; | ||||||||
115 | |||||||||
116 | 0 | my ($list) = $content =~ m!
|
|||||||
117 | 0 | my ($link,$thumb) = $list =~ m! \s* \s* | |||||||
118 | |||||||||
119 | #print STDERR "\n# link=[$link], thumb=[$thumb], list=[$list]\n"; | ||||||||
120 | |||||||||
121 | 0 | 0 | return $self->handler("Failed to find that book on Yahoo! book website.") | ||||||
122 | unless(defined $link); | ||||||||
123 | |||||||||
124 | 0 | $data->{book_link} = YAHOO . $link; | |||||||
125 | |||||||||
126 | 0 | eval { $mech->get( $data->{book_link} ) }; | |||||||
0 | |||||||||
127 | 0 | 0 | 0 | return $self->handler("Yahoo! book search website appears to be unavailable.") | |||||
0 | |||||||||
128 | if($@ || !$mech->success() || !$mech->content()); | ||||||||
129 | |||||||||
130 | |||||||||
131 | # The Book page | ||||||||
132 | 0 | my $html = $mech->content(); | |||||||
133 | #print STDERR "\n# page=[\n$html\n]\n"; | ||||||||
134 | |||||||||
135 | 0 | 0 | return $self->handler("Could not extract data from Yahoo! result page.") | ||||||
136 | unless($html =~ m!\s*!); | ||||||||
137 | |||||||||
138 | 0 | $data->{thumb_link} = $thumb; | |||||||
139 | 0 | ($data->{image_link}) = $html =~ m!!is; | |||||||
140 | 0 | ($data->{title}) = $html =~ m!!is; | |||||||
141 | 0 | ($data->{description}) = $html =~ m! (.*?) !is; |
|||||||
142 | 0 | ($data->{publisher}) = $html =~ m! | Publisher | ([^<]+) | |||||
143 | 0 | ($data->{binding}) = $html =~ m! | Book Format | ([^<]+) | |||||
144 | 0 | ($data->{author}) = $html =~ m! | Author | ([^<]+) | |||||
145 | 0 | ($data->{isbn13}) = $ean; | |||||||
146 | 0 | ($data->{isbn10}) = $self->convert_to_isbn10($ean); | |||||||
147 | |||||||||
148 | 0 | 0 | return $self->handler("Could not extract data from Yahoo! result page.") | ||||||
149 | unless(defined $data); | ||||||||
150 | |||||||||
151 | # trim top and tail | ||||||||
152 | 0 | 0 | foreach (keys %$data) { next unless(defined $data->{$_});$data->{$_} =~ s/^\s+//;$data->{$_} =~ s/\s+$//; } | ||||||
0 | |||||||||
0 | |||||||||
0 | |||||||||
153 | |||||||||
154 | 0 | my $bk = { | |||||||
155 | 'isbn13' => $data->{isbn13}, | ||||||||
156 | 'isbn10' => $data->{isbn10}, | ||||||||
157 | 'isbn' => $data->{isbn13}, | ||||||||
158 | 'ean13' => $data->{isbn13}, | ||||||||
159 | 'author' => $data->{author}, | ||||||||
160 | 'title' => $data->{title}, | ||||||||
161 | 'book_link' => $data->{book_link}, | ||||||||
162 | 'image_link' => $data->{image_link}, | ||||||||
163 | 'thumb_link' => $data->{image_link}, | ||||||||
164 | 'description' => $data->{description}, | ||||||||
165 | 'publisher' => $data->{publisher}, | ||||||||
166 | 'pubdate' => $data->{pubdate}, | ||||||||
167 | 'binding' => $data->{binding}, | ||||||||
168 | 'pages' => $data->{pages}, | ||||||||
169 | 'weight' => $data->{weight}, | ||||||||
170 | 'width' => $data->{width}, | ||||||||
171 | 'height' => $data->{height}, | ||||||||
172 | 'depth' => $data->{depth}, | ||||||||
173 | 'html' => $html | ||||||||
174 | }; | ||||||||
175 | 0 | $self->book($bk); | |||||||
176 | 0 | $self->found(1); | |||||||
177 | 0 | return $self->book; | |||||||
178 | } | ||||||||
179 | |||||||||
180 | q{currently listening to: 'Drunk In Public' by The Levellers}; | ||||||||
181 | |||||||||
182 | __END__ |