| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package WWW::CPANRatings; |
|
2
|
3
|
|
|
3
|
|
191463
|
use strict; |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
116
|
|
|
3
|
3
|
|
|
3
|
|
14
|
use warnings; |
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
127
|
|
|
4
|
|
|
|
|
|
|
our $VERSION = '0.03'; |
|
5
|
3
|
|
|
3
|
|
2995
|
use utf8; |
|
|
3
|
|
|
|
|
33
|
|
|
|
3
|
|
|
|
|
15
|
|
|
6
|
3
|
|
|
3
|
|
115
|
use List::Util qw(sum); |
|
|
3
|
|
|
|
|
4
|
|
|
|
3
|
|
|
|
|
337
|
|
|
7
|
3
|
|
|
3
|
|
3143
|
use LWP::UserAgent; |
|
|
3
|
|
|
|
|
169246
|
|
|
|
3
|
|
|
|
|
94
|
|
|
8
|
3
|
|
|
3
|
|
2932
|
use DateTime::Format::DateParse; |
|
|
3
|
|
|
|
|
673298
|
|
|
|
3
|
|
|
|
|
96
|
|
|
9
|
3
|
|
|
3
|
|
3404
|
use HTML::TokeParser::Simple; |
|
|
3
|
|
|
|
|
73550
|
|
|
|
3
|
|
|
|
|
80
|
|
|
10
|
3
|
|
|
3
|
|
53
|
use URI; |
|
|
3
|
|
|
|
|
8
|
|
|
|
3
|
|
|
|
|
61
|
|
|
11
|
3
|
|
|
3
|
|
1522
|
use Web::Scraper; |
|
|
3
|
|
|
|
|
197002
|
|
|
|
3
|
|
|
|
|
29
|
|
|
12
|
3
|
|
|
3
|
|
4610
|
use JSON::XS; |
|
|
3
|
|
|
|
|
28287
|
|
|
|
3
|
|
|
|
|
302
|
|
|
13
|
3
|
|
|
3
|
|
4809
|
use Text::CSV_PP; |
|
|
3
|
|
|
|
|
35489
|
|
|
|
3
|
|
|
|
|
49
|
|
|
14
|
3
|
|
|
3
|
|
138
|
use feature 'say'; |
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
5092
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
sub new { |
|
17
|
1
|
|
|
1
|
0
|
15
|
my $class = shift; |
|
18
|
1
|
|
50
|
|
|
9
|
my $args = shift || {}; |
|
19
|
1
|
|
|
|
|
4
|
my $self = bless $args,$class; |
|
20
|
|
|
|
|
|
|
$self->setup_request(sub{ |
|
21
|
2
|
|
|
2
|
|
5
|
my $url = shift; |
|
22
|
2
|
|
|
|
|
26
|
my $ua = LWP::UserAgent->new; |
|
23
|
2
|
|
|
|
|
191143
|
my $response = $ua->get( $url ); |
|
24
|
2
|
|
|
|
|
1852650
|
return $response->decoded_content; |
|
25
|
1
|
|
|
|
|
11
|
}); |
|
26
|
1
|
|
|
|
|
3
|
return $self; |
|
27
|
|
|
|
|
|
|
} |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
sub setup_request { |
|
30
|
1
|
|
|
1
|
0
|
3
|
my ($self,$cb) = @_; |
|
31
|
1
|
|
|
|
|
9
|
$self->{requester} = $cb; |
|
32
|
|
|
|
|
|
|
} |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub request { |
|
35
|
2
|
|
|
2
|
0
|
5
|
my ($self,$url) = @_; |
|
36
|
2
|
|
|
|
|
8
|
return $self->{requester}->( $url ); |
|
37
|
|
|
|
|
|
|
} |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
sub fetch_ratings { |
|
40
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
|
41
|
1
|
|
|
|
|
3
|
my $arg = shift; |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
# if it's file |
|
44
|
1
|
|
|
|
|
1
|
my $text; |
|
45
|
1
|
50
|
33
|
|
|
16
|
if( $arg && -e $arg ) { |
|
|
|
50
|
33
|
|
|
|
|
|
46
|
0
|
|
|
|
|
0
|
open my $fh , "<" , $arg; |
|
47
|
0
|
|
|
|
|
0
|
local $/; |
|
48
|
0
|
|
|
|
|
0
|
$text = <$fh>; |
|
49
|
0
|
|
|
|
|
0
|
close $fh; |
|
50
|
|
|
|
|
|
|
} |
|
51
|
|
|
|
|
|
|
elsif( $arg && $arg =~ /^http/ ) { |
|
52
|
0
|
|
|
|
|
0
|
$text = get( $arg ); |
|
53
|
|
|
|
|
|
|
} |
|
54
|
|
|
|
|
|
|
|
|
55
|
1
|
50
|
|
|
|
5
|
unless ( $text ) { |
|
56
|
1
|
|
|
|
|
5
|
$text = $self->request('http://cpanratings.perl.org/csv/all_ratings.csv'); |
|
57
|
|
|
|
|
|
|
} |
|
58
|
|
|
|
|
|
|
|
|
59
|
1
|
|
|
|
|
37173
|
my @lines = split /\n/,$text; |
|
60
|
1
|
|
|
|
|
117
|
my $csv = Text::CSV_PP->new(); # create a new object |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
# drop first 2 lines |
|
63
|
1
|
|
|
|
|
218
|
splice @lines,0,2; |
|
64
|
1
|
|
|
|
|
4
|
my %rating_data; |
|
65
|
|
|
|
|
|
|
|
|
66
|
1
|
|
|
|
|
3
|
for my $line ( @lines ) { |
|
67
|
3007
|
|
|
|
|
5681
|
chomp($line); |
|
68
|
3007
|
|
|
|
|
8449
|
my $status = $csv->parse($line); |
|
69
|
3007
|
50
|
|
|
|
641901
|
die 'csv file parse failed.' unless $status; |
|
70
|
3007
|
|
|
|
|
9052
|
my ($dist,$rating,$review_count) = $csv->fields(); |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# say $dist, $rating, $review_count; |
|
73
|
3007
|
|
|
|
|
40378
|
$rating_data{ $dist } = { |
|
74
|
|
|
|
|
|
|
dist => $dist, |
|
75
|
|
|
|
|
|
|
rating => $rating, |
|
76
|
|
|
|
|
|
|
review_cnt => $review_count, |
|
77
|
|
|
|
|
|
|
}; |
|
78
|
|
|
|
|
|
|
} |
|
79
|
1
|
|
|
|
|
266
|
return $self->{rating_data} = \%rating_data; |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
sub rating_data { |
|
83
|
2
|
|
|
2
|
1
|
918
|
my $self = shift; |
|
84
|
2
|
100
|
|
|
|
12
|
$self->fetch_ratings unless $self->{rating_data}; |
|
85
|
2
|
|
|
|
|
17
|
return $self->{rating_data}; |
|
86
|
|
|
|
|
|
|
} |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
sub get_ratings { |
|
89
|
1
|
|
|
1
|
1
|
4
|
my ($self,$distname) = @_; |
|
90
|
1
|
|
|
|
|
3
|
$distname =~ s/::/-/g; |
|
91
|
1
|
|
|
|
|
5
|
return $self->rating_data->{ $distname }; |
|
92
|
|
|
|
|
|
|
} |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# dist_name format |
|
95
|
|
|
|
|
|
|
sub get_reviews { |
|
96
|
1
|
|
|
1
|
1
|
2
|
my ($self,$modname) = @_; |
|
97
|
1
|
|
|
|
|
3
|
my $distname = $modname; |
|
98
|
1
|
|
|
|
|
3
|
$distname =~ s/::/-/g; |
|
99
|
1
|
|
|
|
|
3
|
my $base_url = "http://cpanratings.perl.org/dist/"; |
|
100
|
1
|
|
|
|
|
3
|
my $url = $base_url . $distname; |
|
101
|
1
|
|
|
|
|
6
|
my $content = $self->request($url); |
|
102
|
1
|
50
|
|
|
|
4659
|
return unless $content; |
|
103
|
1
|
50
|
|
|
|
26
|
return unless $content =~ /$modname reviews/; |
|
104
|
1
|
|
|
|
|
8
|
my $result = $self->parse_review_page($content); |
|
105
|
1
|
|
|
|
|
3
|
return @{ $result->{reviews} }; |
|
|
1
|
|
|
|
|
21
|
|
|
106
|
|
|
|
|
|
|
} |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# returned structure, |
|
110
|
|
|
|
|
|
|
# $VAR1 = { |
|
111
|
|
|
|
|
|
|
# 'reviews' => [ |
|
112
|
|
|
|
|
|
|
# { |
|
113
|
|
|
|
|
|
|
# 'body' => ' Moose got me laid. Could you ask anything more of a CPAN module? ', |
|
114
|
|
|
|
|
|
|
# 'user_link' => bless( do{\(my $o = 'http://cpanratings.perl.org/user/funguy')}, 'URI::http' ), |
|
115
|
|
|
|
|
|
|
# 'attrs' => 'Fun Guy - 2011-04-12T14:30:46 ', |
|
116
|
|
|
|
|
|
|
# 'user' => 'Fun Guy', |
|
117
|
|
|
|
|
|
|
# 'dist' => ' Moose', |
|
118
|
|
|
|
|
|
|
# 'dist_link' => bless( do{\(my $o = 'http://search.cpan.org/dist/Moose/')}, 'URI::http' ) |
|
119
|
|
|
|
|
|
|
# }, |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
sub rating_scraper { |
|
124
|
2
|
|
|
2
|
0
|
16
|
my $self = shift; |
|
125
|
|
|
|
|
|
|
return scraper { |
|
126
|
|
|
|
|
|
|
process '.review' => 'reviews[]' => scraper { |
|
127
|
44
|
|
|
|
|
1106030
|
process '.review_header a', |
|
128
|
|
|
|
|
|
|
dist_link => '@href', |
|
129
|
|
|
|
|
|
|
dist => 'TEXT'; |
|
130
|
|
|
|
|
|
|
|
|
131
|
44
|
|
|
|
|
513231
|
process '.review_header', |
|
132
|
|
|
|
|
|
|
header => 'TEXT'; |
|
133
|
|
|
|
|
|
|
|
|
134
|
44
|
|
|
|
|
496037
|
process '.review_header img', |
|
135
|
|
|
|
|
|
|
ratings => '@alt'; |
|
136
|
|
|
|
|
|
|
|
|
137
|
44
|
|
|
|
|
511264
|
process '.review_text', body => 'TEXT'; |
|
138
|
|
|
|
|
|
|
|
|
139
|
44
|
|
|
|
|
469658
|
process '.review_attribution' , |
|
140
|
|
|
|
|
|
|
'attrs' => 'TEXT'; |
|
141
|
44
|
|
|
|
|
496052
|
process '.review_attribution a' , |
|
142
|
|
|
|
|
|
|
'user' => 'TEXT', |
|
143
|
|
|
|
|
|
|
'user_link' => '@href'; |
|
144
|
2
|
|
|
2
|
|
1304115
|
}; |
|
145
|
2
|
|
|
|
|
25
|
}; |
|
146
|
|
|
|
|
|
|
} |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub parse_review_page { |
|
149
|
1
|
|
|
1
|
0
|
3
|
my ($self,$content) = @_; |
|
150
|
|
|
|
|
|
|
|
|
151
|
1
|
|
|
|
|
7
|
my $rating_scraper = $self->rating_scraper; |
|
152
|
1
|
|
|
|
|
18
|
my $res = $rating_scraper->scrape( $content ); |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# post process |
|
155
|
|
|
|
|
|
|
|
|
156
|
1
|
|
|
|
|
16548
|
for my $review ( @{ $res->{reviews} } ) { |
|
|
1
|
|
|
|
|
7
|
|
|
157
|
22
|
50
|
|
|
|
140
|
if( $review->{header} =~ m{^\s*([a-zA-Z:]+)\s+\(([0-9.]+)\)\s*$} ) { |
|
158
|
22
|
|
|
|
|
74
|
$review->{version} = $2; |
|
159
|
|
|
|
|
|
|
} |
|
160
|
|
|
|
|
|
|
|
|
161
|
22
|
|
|
|
|
98
|
$review->{dist} =~ s{^\s*}{}; |
|
162
|
22
|
|
|
|
|
106
|
$review->{dist} =~ s{\s*$}{}; |
|
163
|
|
|
|
|
|
|
|
|
164
|
22
|
50
|
|
|
|
182
|
if( $review->{attrs} =~ m{([0-9-T:]+)\s*$} ) { |
|
165
|
0
|
|
|
|
|
0
|
$review->{created_on} = |
|
166
|
|
|
|
|
|
|
DateTime::Format::DateParse->parse_datetime( $1 ); |
|
167
|
|
|
|
|
|
|
} |
|
168
|
|
|
|
|
|
|
|
|
169
|
22
|
|
|
|
|
52
|
delete $review->{attrs}; |
|
170
|
|
|
|
|
|
|
} |
|
171
|
1
|
|
|
|
|
16
|
return $res; |
|
172
|
|
|
|
|
|
|
} |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
sub get_all_reviews { |
|
176
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
177
|
0
|
|
|
|
|
|
my $all_ratings = $self->rating_data; |
|
178
|
0
|
|
|
|
|
|
while( my( $distname,$ratings) = each %$all_ratings ) { |
|
179
|
|
|
|
|
|
|
# $ratings->{review_cnt}; |
|
180
|
|
|
|
|
|
|
# $ratings->{dist}; |
|
181
|
|
|
|
|
|
|
# $ratings->{rating}; |
|
182
|
0
|
|
|
|
|
|
$ratings->{reviews} = [ $self->get_reviews( $ratings->{dist} ) ]; |
|
183
|
|
|
|
|
|
|
} |
|
184
|
0
|
|
|
|
|
|
return $all_ratings; |
|
185
|
|
|
|
|
|
|
} |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
1; |
|
188
|
|
|
|
|
|
|
__END__ |