line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package WWW::CPANRatings; |
2
|
3
|
|
|
3
|
|
191463
|
use strict; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
116
|
|
3
|
3
|
|
|
3
|
|
14
|
use warnings; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
127
|
|
4
|
|
|
|
|
|
|
our $VERSION = '0.03'; |
5
|
3
|
|
|
3
|
|
2995
|
use utf8; |
|
3
|
|
|
|
|
33
|
|
|
3
|
|
|
|
|
15
|
|
6
|
3
|
|
|
3
|
|
115
|
use List::Util qw(sum); |
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
337
|
|
7
|
3
|
|
|
3
|
|
3143
|
use LWP::UserAgent; |
|
3
|
|
|
|
|
169246
|
|
|
3
|
|
|
|
|
94
|
|
8
|
3
|
|
|
3
|
|
2932
|
use DateTime::Format::DateParse; |
|
3
|
|
|
|
|
673298
|
|
|
3
|
|
|
|
|
96
|
|
9
|
3
|
|
|
3
|
|
3404
|
use HTML::TokeParser::Simple; |
|
3
|
|
|
|
|
73550
|
|
|
3
|
|
|
|
|
80
|
|
10
|
3
|
|
|
3
|
|
53
|
use URI; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
61
|
|
11
|
3
|
|
|
3
|
|
1522
|
use Web::Scraper; |
|
3
|
|
|
|
|
197002
|
|
|
3
|
|
|
|
|
29
|
|
12
|
3
|
|
|
3
|
|
4610
|
use JSON::XS; |
|
3
|
|
|
|
|
28287
|
|
|
3
|
|
|
|
|
302
|
|
13
|
3
|
|
|
3
|
|
4809
|
use Text::CSV_PP; |
|
3
|
|
|
|
|
35489
|
|
|
3
|
|
|
|
|
49
|
|
14
|
3
|
|
|
3
|
|
138
|
use feature 'say'; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
5092
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
sub new { |
17
|
1
|
|
|
1
|
0
|
15
|
my $class = shift; |
18
|
1
|
|
50
|
|
|
9
|
my $args = shift || {}; |
19
|
1
|
|
|
|
|
4
|
my $self = bless $args,$class; |
20
|
|
|
|
|
|
|
$self->setup_request(sub{ |
21
|
2
|
|
|
2
|
|
5
|
my $url = shift; |
22
|
2
|
|
|
|
|
26
|
my $ua = LWP::UserAgent->new; |
23
|
2
|
|
|
|
|
191143
|
my $response = $ua->get( $url ); |
24
|
2
|
|
|
|
|
1852650
|
return $response->decoded_content; |
25
|
1
|
|
|
|
|
11
|
}); |
26
|
1
|
|
|
|
|
3
|
return $self; |
27
|
|
|
|
|
|
|
} |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
sub setup_request { |
30
|
1
|
|
|
1
|
0
|
3
|
my ($self,$cb) = @_; |
31
|
1
|
|
|
|
|
9
|
$self->{requester} = $cb; |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub request { |
35
|
2
|
|
|
2
|
0
|
5
|
my ($self,$url) = @_; |
36
|
2
|
|
|
|
|
8
|
return $self->{requester}->( $url ); |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
sub fetch_ratings { |
40
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
41
|
1
|
|
|
|
|
3
|
my $arg = shift; |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
# if it's file |
44
|
1
|
|
|
|
|
1
|
my $text; |
45
|
1
|
50
|
33
|
|
|
16
|
if( $arg && -e $arg ) { |
|
|
50
|
33
|
|
|
|
|
46
|
0
|
|
|
|
|
0
|
open my $fh , "<" , $arg; |
47
|
0
|
|
|
|
|
0
|
local $/; |
48
|
0
|
|
|
|
|
0
|
$text = <$fh>; |
49
|
0
|
|
|
|
|
0
|
close $fh; |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
elsif( $arg && $arg =~ /^http/ ) { |
52
|
0
|
|
|
|
|
0
|
$text = get( $arg ); |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
1
|
50
|
|
|
|
5
|
unless ( $text ) { |
56
|
1
|
|
|
|
|
5
|
$text = $self->request('http://cpanratings.perl.org/csv/all_ratings.csv'); |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
|
59
|
1
|
|
|
|
|
37173
|
my @lines = split /\n/,$text; |
60
|
1
|
|
|
|
|
117
|
my $csv = Text::CSV_PP->new(); # create a new object |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
# drop first 2 lines |
63
|
1
|
|
|
|
|
218
|
splice @lines,0,2; |
64
|
1
|
|
|
|
|
4
|
my %rating_data; |
65
|
|
|
|
|
|
|
|
66
|
1
|
|
|
|
|
3
|
for my $line ( @lines ) { |
67
|
3007
|
|
|
|
|
5681
|
chomp($line); |
68
|
3007
|
|
|
|
|
8449
|
my $status = $csv->parse($line); |
69
|
3007
|
50
|
|
|
|
641901
|
die 'csv file parse failed.' unless $status; |
70
|
3007
|
|
|
|
|
9052
|
my ($dist,$rating,$review_count) = $csv->fields(); |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# say $dist, $rating, $review_count; |
73
|
3007
|
|
|
|
|
40378
|
$rating_data{ $dist } = { |
74
|
|
|
|
|
|
|
dist => $dist, |
75
|
|
|
|
|
|
|
rating => $rating, |
76
|
|
|
|
|
|
|
review_cnt => $review_count, |
77
|
|
|
|
|
|
|
}; |
78
|
|
|
|
|
|
|
} |
79
|
1
|
|
|
|
|
266
|
return $self->{rating_data} = \%rating_data; |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
sub rating_data { |
83
|
2
|
|
|
2
|
1
|
918
|
my $self = shift; |
84
|
2
|
100
|
|
|
|
12
|
$self->fetch_ratings unless $self->{rating_data}; |
85
|
2
|
|
|
|
|
17
|
return $self->{rating_data}; |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
sub get_ratings { |
89
|
1
|
|
|
1
|
1
|
4
|
my ($self,$distname) = @_; |
90
|
1
|
|
|
|
|
3
|
$distname =~ s/::/-/g; |
91
|
1
|
|
|
|
|
5
|
return $self->rating_data->{ $distname }; |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# dist_name format |
95
|
|
|
|
|
|
|
sub get_reviews { |
96
|
1
|
|
|
1
|
1
|
2
|
my ($self,$modname) = @_; |
97
|
1
|
|
|
|
|
3
|
my $distname = $modname; |
98
|
1
|
|
|
|
|
3
|
$distname =~ s/::/-/g; |
99
|
1
|
|
|
|
|
3
|
my $base_url = "http://cpanratings.perl.org/dist/"; |
100
|
1
|
|
|
|
|
3
|
my $url = $base_url . $distname; |
101
|
1
|
|
|
|
|
6
|
my $content = $self->request($url); |
102
|
1
|
50
|
|
|
|
4659
|
return unless $content; |
103
|
1
|
50
|
|
|
|
26
|
return unless $content =~ /$modname reviews/; |
104
|
1
|
|
|
|
|
8
|
my $result = $self->parse_review_page($content); |
105
|
1
|
|
|
|
|
3
|
return @{ $result->{reviews} }; |
|
1
|
|
|
|
|
21
|
|
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# returned structure, |
110
|
|
|
|
|
|
|
# $VAR1 = { |
111
|
|
|
|
|
|
|
# 'reviews' => [ |
112
|
|
|
|
|
|
|
# { |
113
|
|
|
|
|
|
|
# 'body' => ' Moose got me laid. Could you ask anything more of a CPAN module? ', |
114
|
|
|
|
|
|
|
# 'user_link' => bless( do{\(my $o = 'http://cpanratings.perl.org/user/funguy')}, 'URI::http' ), |
115
|
|
|
|
|
|
|
# 'attrs' => 'Fun Guy - 2011-04-12T14:30:46 ', |
116
|
|
|
|
|
|
|
# 'user' => 'Fun Guy', |
117
|
|
|
|
|
|
|
# 'dist' => ' Moose', |
118
|
|
|
|
|
|
|
# 'dist_link' => bless( do{\(my $o = 'http://search.cpan.org/dist/Moose/')}, 'URI::http' ) |
119
|
|
|
|
|
|
|
# }, |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
sub rating_scraper { |
124
|
2
|
|
|
2
|
0
|
16
|
my $self = shift; |
125
|
|
|
|
|
|
|
return scraper { |
126
|
|
|
|
|
|
|
process '.review' => 'reviews[]' => scraper { |
127
|
44
|
|
|
|
|
1106030
|
process '.review_header a', |
128
|
|
|
|
|
|
|
dist_link => '@href', |
129
|
|
|
|
|
|
|
dist => 'TEXT'; |
130
|
|
|
|
|
|
|
|
131
|
44
|
|
|
|
|
513231
|
process '.review_header', |
132
|
|
|
|
|
|
|
header => 'TEXT'; |
133
|
|
|
|
|
|
|
|
134
|
44
|
|
|
|
|
496037
|
process '.review_header img', |
135
|
|
|
|
|
|
|
ratings => '@alt'; |
136
|
|
|
|
|
|
|
|
137
|
44
|
|
|
|
|
511264
|
process '.review_text', body => 'TEXT'; |
138
|
|
|
|
|
|
|
|
139
|
44
|
|
|
|
|
469658
|
process '.review_attribution' , |
140
|
|
|
|
|
|
|
'attrs' => 'TEXT'; |
141
|
44
|
|
|
|
|
496052
|
process '.review_attribution a' , |
142
|
|
|
|
|
|
|
'user' => 'TEXT', |
143
|
|
|
|
|
|
|
'user_link' => '@href'; |
144
|
2
|
|
|
2
|
|
1304115
|
}; |
145
|
2
|
|
|
|
|
25
|
}; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub parse_review_page { |
149
|
1
|
|
|
1
|
0
|
3
|
my ($self,$content) = @_; |
150
|
|
|
|
|
|
|
|
151
|
1
|
|
|
|
|
7
|
my $rating_scraper = $self->rating_scraper; |
152
|
1
|
|
|
|
|
18
|
my $res = $rating_scraper->scrape( $content ); |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# post process |
155
|
|
|
|
|
|
|
|
156
|
1
|
|
|
|
|
16548
|
for my $review ( @{ $res->{reviews} } ) { |
|
1
|
|
|
|
|
7
|
|
157
|
22
|
50
|
|
|
|
140
|
if( $review->{header} =~ m{^\s*([a-zA-Z:]+)\s+\(([0-9.]+)\)\s*$} ) { |
158
|
22
|
|
|
|
|
74
|
$review->{version} = $2; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
|
161
|
22
|
|
|
|
|
98
|
$review->{dist} =~ s{^\s*}{}; |
162
|
22
|
|
|
|
|
106
|
$review->{dist} =~ s{\s*$}{}; |
163
|
|
|
|
|
|
|
|
164
|
22
|
50
|
|
|
|
182
|
if( $review->{attrs} =~ m{([0-9-T:]+)\s*$} ) { |
165
|
0
|
|
|
|
|
0
|
$review->{created_on} = |
166
|
|
|
|
|
|
|
DateTime::Format::DateParse->parse_datetime( $1 ); |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
22
|
|
|
|
|
52
|
delete $review->{attrs}; |
170
|
|
|
|
|
|
|
} |
171
|
1
|
|
|
|
|
16
|
return $res; |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
sub get_all_reviews { |
176
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
177
|
0
|
|
|
|
|
|
my $all_ratings = $self->rating_data; |
178
|
0
|
|
|
|
|
|
while( my( $distname,$ratings) = each %$all_ratings ) { |
179
|
|
|
|
|
|
|
# $ratings->{review_cnt}; |
180
|
|
|
|
|
|
|
# $ratings->{dist}; |
181
|
|
|
|
|
|
|
# $ratings->{rating}; |
182
|
0
|
|
|
|
|
|
$ratings->{reviews} = [ $self->get_reviews( $ratings->{dist} ) ]; |
183
|
|
|
|
|
|
|
} |
184
|
0
|
|
|
|
|
|
return $all_ratings; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
1; |
188
|
|
|
|
|
|
|
__END__ |