| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package WWW::Scraper::ISBN::LibUniverIt_Driver; |
|
2
|
2
|
|
|
2
|
|
55664
|
use strict; |
|
|
2
|
|
|
|
|
6
|
|
|
|
2
|
|
|
|
|
113
|
|
|
3
|
2
|
|
|
2
|
|
14
|
use warnings; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
67
|
|
|
4
|
2
|
|
|
2
|
|
7628
|
use LWP::UserAgent; |
|
|
2
|
|
|
|
|
303184
|
|
|
|
2
|
|
|
|
|
86
|
|
|
5
|
2
|
|
|
2
|
|
2261
|
use WWW::Scraper::ISBN::Driver; |
|
|
2
|
|
|
|
|
2238
|
|
|
|
2
|
|
|
|
|
72
|
|
|
6
|
2
|
|
|
2
|
|
1788
|
use HTML::Entities qw(decode_entities); |
|
|
2
|
|
|
|
|
45653
|
|
|
|
2
|
|
|
|
|
2932
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
our @ISA = qw(WWW::Scraper::ISBN::Driver); |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our $VERSION = '0.13'; |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
sub search { |
|
13
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
14
|
0
|
|
|
|
|
|
my $isbn = shift; |
|
15
|
0
|
|
|
|
|
|
$self->found(0); |
|
16
|
0
|
|
|
|
|
|
$self->book(undef); |
|
17
|
|
|
|
|
|
|
|
|
18
|
0
|
|
|
|
|
|
my $post_url = 'http://www.libreriauniversitaria.it/c_power_search.php?shelf=BIT&q=' . $isbn . '&submit=Invia'; |
|
19
|
0
|
|
|
|
|
|
my $ua = new LWP::UserAgent; |
|
20
|
0
|
|
|
|
|
|
my $res = $ua->get($post_url); |
|
21
|
0
|
|
|
|
|
|
my $doc = $res->as_string; |
|
22
|
|
|
|
|
|
|
|
|
23
|
0
|
|
|
|
|
|
my $title = ""; |
|
24
|
0
|
|
|
|
|
|
my $authors = ""; |
|
25
|
0
|
|
|
|
|
|
my $editor = ""; |
|
26
|
0
|
|
|
|
|
|
my $date = ""; |
|
27
|
0
|
|
|
|
|
|
my $price = ""; |
|
28
|
0
|
|
|
|
|
|
my $pages = ""; |
|
29
|
0
|
|
|
|
|
|
my $series = ""; |
|
30
|
0
|
|
|
|
|
|
my $shelf = ""; |
|
31
|
0
|
|
|
|
|
|
my $trans = ""; |
|
32
|
0
|
0
|
|
|
|
|
if ($doc =~ /Nessun prodotto corrisponde ai criteri di ricerca/) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
33
|
0
|
|
|
|
|
|
$self->error("book not found."); |
|
34
|
0
|
|
|
|
|
|
$self->found(0); |
|
35
|
0
|
|
|
|
|
|
return 0; |
|
36
|
|
|
|
|
|
|
} elsif ($doc =~ m|Ricerca - $isbn - libreriauniversitaria\.it|i){ |
|
37
|
0
|
|
|
|
|
|
my $info; |
|
38
|
0
|
0
|
|
|
|
|
if ($doc =~ m| | ]+>]+ class="product_heading_title_link" [^>]+>([^<]+)(.*?) | |){
|
39
|
0
|
|
|
|
|
|
$title = $1; |
|
40
|
0
|
|
|
|
|
|
$info = $2; |
|
41
|
0
|
|
|
|
|
|
$authors = parse_authors($info); |
|
42
|
0
|
0
|
|
|
|
|
if ($info =~ m|]+ href="libri-editore[^"]+" [^>]+/>([^<]+) - (\d+)|){ |
|
43
|
0
|
|
|
|
|
|
$editor = $1; |
|
44
|
0
|
|
|
|
|
|
$date = $2; |
|
45
|
|
|
|
|
|
|
} |
|
46
|
|
|
|
|
|
|
} |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
##Editore: Einaudi |
|
49
|
0
|
0
|
|
|
|
|
$price = $1 if ($doc =~ /Prezzo: .*?€ (\d+)/); |
|
50
|
|
|
|
|
|
|
} elsif ($doc =~ /Dettagli del libro/){ |
|
51
|
0
|
0
|
|
|
|
|
$price = $1 if ($doc =~ m|€ ([^<]+)|); |
|
52
|
0
|
0
|
|
|
|
|
$title = $1 if ($doc =~ m|Titolo: \n\t+\s+([^>]+)|); |
|
53
|
0
|
0
|
|
|
|
|
$authors = parse_authors($1) if ($doc =~ m|Autor[ei]: \n\t+\s+(.*?)\n\t+\s+|); |
|
54
|
0
|
0
|
|
|
|
|
$editor = $1 if ($doc =~m|Editore: \n\t+\s+
55
|
0
|
0
|
|
|
|
|
$date = $1 if ($doc =~ m|Data di Pubblicazione: \n\t+\s+(\d+)|); |
|
56
|
0
|
0
|
|
|
|
|
$pages = $1 if ($doc =~ m|Pagine: \n\t+\s+(\d+)|); |
|
57
|
0
|
0
|
|
|
|
|
$series = $1 if ($doc =~ m|Collana: \n\t+\s+
58
|
0
|
0
|
|
|
|
|
$shelf = $1 if ($doc =~ m|Reparto: \n\t+\s+
59
|
0
|
0
|
|
|
|
|
$trans = $1 if ($doc =~ m|Traduttore: \n\t+\s+([^<]+)|); |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
} else { |
|
62
|
0
|
|
|
|
|
|
$self->error("libreriauniversitaria.it answered in an unattended way, book information cannot be found."); |
|
63
|
0
|
|
|
|
|
|
$self->found(0); |
|
64
|
|
|
|
|
|
|
}; |
|
65
|
|
|
|
|
|
|
|
|
66
|
0
|
|
|
|
|
|
decode_entities($title); |
|
67
|
0
|
|
|
|
|
|
decode_entities($authors); |
|
68
|
0
|
|
|
|
|
|
decode_entities($editor); |
|
69
|
0
|
|
|
|
|
|
my $bk = { |
|
70
|
|
|
|
|
|
|
'isbn' => $isbn, |
|
71
|
|
|
|
|
|
|
'author' => $authors, |
|
72
|
|
|
|
|
|
|
'title' => $title, |
|
73
|
|
|
|
|
|
|
'publisher' => $editor, |
|
74
|
|
|
|
|
|
|
'date' => $date, |
|
75
|
|
|
|
|
|
|
'price' => $price, |
|
76
|
|
|
|
|
|
|
'pages' => $pages, |
|
77
|
|
|
|
|
|
|
'series' => $series, |
|
78
|
|
|
|
|
|
|
'shelf' => $shelf, |
|
79
|
|
|
|
|
|
|
'trans' => $trans, |
|
80
|
|
|
|
|
|
|
}; |
|
81
|
0
|
|
|
|
|
|
$self->book($bk); |
|
82
|
0
|
|
|
|
|
|
$self->found(1); |
|
83
|
0
|
|
|
|
|
|
return $bk; |
|
84
|
|
|
|
|
|
|
} |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub parse_authors { |
|
87
|
0
|
|
|
0
|
|
|
my $info = shift; |
|
88
|
0
|
|
|
|
|
|
my $sep = ""; |
|
89
|
0
|
|
|
|
|
|
my $authors; |
|
90
|
0
|
|
|
|
|
|
while ($info =~ s|]+>([^<]+)||){ |
|
91
|
0
|
|
|
|
|
|
$authors .= $sep . $1; |
|
92
|
0
|
|
|
|
|
|
$sep = ", "; |
|
93
|
|
|
|
|
|
|
} |
|
94
|
0
|
|
|
|
|
|
return $authors; |
|
95
|
|
|
|
|
|
|
} |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
1; |
|
98
|
|
|
|
|
|
|
__END__ |