line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package CPAN::Testers::Data::Generator; |
2
|
|
|
|
|
|
|
|
3
|
9
|
|
|
9
|
|
181400
|
use warnings; |
|
9
|
|
|
|
|
17
|
|
|
9
|
|
|
|
|
379
|
|
4
|
9
|
|
|
9
|
|
45
|
use strict; |
|
9
|
|
|
|
|
13
|
|
|
9
|
|
|
|
|
244
|
|
5
|
|
|
|
|
|
|
|
6
|
9
|
|
|
9
|
|
41
|
use vars qw($VERSION); |
|
9
|
|
|
|
|
17
|
|
|
9
|
|
|
|
|
636
|
|
7
|
|
|
|
|
|
|
$VERSION = '1.21'; |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
#---------------------------------------------------------------------------- |
10
|
|
|
|
|
|
|
# Library Modules |
11
|
|
|
|
|
|
|
|
12
|
9
|
|
|
9
|
|
6162
|
use Config::IniFiles; |
|
9
|
|
|
|
|
269800
|
|
|
9
|
|
|
|
|
422
|
|
13
|
9
|
|
|
9
|
|
6368
|
use CPAN::Testers::Common::Article; |
|
9
|
|
|
|
|
162597
|
|
|
9
|
|
|
|
|
71
|
|
14
|
9
|
|
|
9
|
|
5462
|
use CPAN::Testers::Common::DBUtils; |
|
9
|
|
|
|
|
163793
|
|
|
9
|
|
|
|
|
68
|
|
15
|
|
|
|
|
|
|
#use Data::Dumper; |
16
|
9
|
|
|
9
|
|
6704
|
use Data::FlexSerializer; |
|
9
|
|
|
|
|
15808686
|
|
|
9
|
|
|
|
|
85
|
|
17
|
9
|
|
|
9
|
|
17707
|
use DateTime; |
|
9
|
|
|
|
|
1394971
|
|
|
9
|
|
|
|
|
439
|
|
18
|
9
|
|
|
9
|
|
90
|
use DateTime::Duration; |
|
9
|
|
|
|
|
15
|
|
|
9
|
|
|
|
|
216
|
|
19
|
9
|
|
|
9
|
|
126
|
use File::Basename; |
|
9
|
|
|
|
|
14
|
|
|
9
|
|
|
|
|
915
|
|
20
|
9
|
|
|
9
|
|
50
|
use File::Path; |
|
9
|
|
|
|
|
13
|
|
|
9
|
|
|
|
|
517
|
|
21
|
9
|
|
|
9
|
|
7714
|
use File::Slurp; |
|
9
|
|
|
|
|
41225
|
|
|
9
|
|
|
|
|
778
|
|
22
|
9
|
|
|
9
|
|
5964
|
use HTML::Entities; |
|
9
|
|
|
|
|
47225
|
|
|
9
|
|
|
|
|
773
|
|
23
|
9
|
|
|
9
|
|
81
|
use IO::File; |
|
9
|
|
|
|
|
17
|
|
|
9
|
|
|
|
|
1315
|
|
24
|
9
|
|
|
9
|
|
6079
|
use JSON; |
|
9
|
|
|
|
|
40736
|
|
|
9
|
|
|
|
|
44
|
|
25
|
9
|
|
|
9
|
|
1252
|
use Time::Local; |
|
9
|
|
|
|
|
13
|
|
|
9
|
|
|
|
|
560
|
|
26
|
|
|
|
|
|
|
|
27
|
9
|
|
|
9
|
|
4665
|
use Metabase 0.004; |
|
9
|
|
|
|
|
1481
|
|
|
9
|
|
|
|
|
242
|
|
28
|
9
|
|
|
9
|
|
28283
|
use Metabase::Fact; |
|
9
|
|
|
|
|
142104
|
|
|
9
|
|
|
|
|
337
|
|
29
|
9
|
|
|
9
|
|
81
|
use Metabase::Resource; |
|
9
|
|
|
|
|
14
|
|
|
9
|
|
|
|
|
164
|
|
30
|
9
|
|
|
9
|
|
5777
|
use CPAN::Testers::Fact::LegacyReport; |
|
9
|
|
|
|
|
11844
|
|
|
9
|
|
|
|
|
265
|
|
31
|
9
|
|
|
9
|
|
4556
|
use CPAN::Testers::Fact::TestSummary; |
|
9
|
|
|
|
|
2850
|
|
|
9
|
|
|
|
|
261
|
|
32
|
9
|
|
|
9
|
|
5027
|
use CPAN::Testers::Metabase::AWS; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
use CPAN::Testers::Report; |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
#---------------------------------------------------------------------------- |
36
|
|
|
|
|
|
|
# Variables |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
my $DIFF = 30; # max difference allowed in seconds |
39
|
|
|
|
|
|
|
my $MINS = 15; # split time in minutes |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
my %testers; |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
my $FROM = 'CPAN Tester Report Server <do_not_reply@cpantesters.org>'; |
44
|
|
|
|
|
|
|
my $HOW = '/usr/sbin/sendmail -bm'; |
45
|
|
|
|
|
|
|
my $HEAD = 'To: EMAIL |
46
|
|
|
|
|
|
|
From: FROM |
47
|
|
|
|
|
|
|
Date: DATE |
48
|
|
|
|
|
|
|
Subject: CPAN Testers Generator Error Report |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
'; |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
my $BODY = ' |
53
|
|
|
|
|
|
|
The following reports failed to parse into the cpanstats database: |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
INVALID |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
Thanks, |
58
|
|
|
|
|
|
|
CPAN Testers Server. |
59
|
|
|
|
|
|
|
'; |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
my @admins = ( |
62
|
|
|
|
|
|
|
'barbie@missbarbell.co.uk', |
63
|
|
|
|
|
|
|
#'david@dagolden.com' |
64
|
|
|
|
|
|
|
); |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
my ($OSNAMES,%MAPPINGS); |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
#---------------------------------------------------------------------------- |
69
|
|
|
|
|
|
|
# The Application Programming Interface |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
sub new { |
72
|
|
|
|
|
|
|
my $class = shift; |
73
|
|
|
|
|
|
|
my %hash = @_; |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
my $self = { |
76
|
|
|
|
|
|
|
meta_count => 0, |
77
|
|
|
|
|
|
|
stat_count => 0, |
78
|
|
|
|
|
|
|
last => '', |
79
|
|
|
|
|
|
|
}; |
80
|
|
|
|
|
|
|
bless $self, $class; |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# load configuration |
83
|
|
|
|
|
|
|
my $cfg = Config::IniFiles->new( -file => $hash{config} ); |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# configure databases |
86
|
|
|
|
|
|
|
for my $db (qw(CPANSTATS METABASE)) { |
87
|
|
|
|
|
|
|
die "No configuration for $db database\n" unless($cfg->SectionExists($db)); |
88
|
|
|
|
|
|
|
my %opts = map {$_ => ($cfg->val($db,$_)||undef);} qw(driver database dbfile dbhost dbport dbuser dbpass); |
89
|
|
|
|
|
|
|
$opts{AutoCommit} = 0; |
90
|
|
|
|
|
|
|
$self->{$db} = CPAN::Testers::Common::DBUtils->new(%opts); |
91
|
|
|
|
|
|
|
die "Cannot configure $db database\n" unless($self->{$db}); |
92
|
|
|
|
|
|
|
$self->{$db}->{'mysql_enable_utf8'} = 1 if($opts{driver} =~ /mysql/i); |
93
|
|
|
|
|
|
|
$self->{$db}->{'mysql_auto_reconnect'} = 1 if($opts{driver} =~ /mysql/i); |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
if($cfg->SectionExists('ADMINISTRATION')) { |
97
|
|
|
|
|
|
|
my @admins = $cfg->val('ADMINISTRATION','admins'); |
98
|
|
|
|
|
|
|
$self->{admins} = \@admins; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# command line swtiches override configuration settings |
102
|
|
|
|
|
|
|
for my $key (qw(logfile poll_limit stopfile offset aws_bucket aws_namespace)) { |
103
|
|
|
|
|
|
|
$self->{$key} = $hash{$key} || $cfg->val('MAIN',$key); |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
$self->{offset} ||= 1; |
107
|
|
|
|
|
|
|
$self->{poll_limit} ||= 1000; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
my @rows = $self->{METABASE}->get_query('hash','SELECT * FROM testers_email'); |
110
|
|
|
|
|
|
|
for my $row (@rows) { |
111
|
|
|
|
|
|
|
$testers{$row->{resource}} = $row->{email}; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
# build OS names map |
115
|
|
|
|
|
|
|
@rows = $self->{CPANSTATS}->get_query('array','SELECT osname,ostitle FROM osname'); |
116
|
|
|
|
|
|
|
for my $row (@rows) { |
117
|
|
|
|
|
|
|
$self->{OSNAMES}{lc $row->[0]} ||= $row->[1]; |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
$OSNAMES = join('|',keys %{$self->{OSNAMES}}) if(keys %{$self->{OSNAMES}}); |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
$self->load_uploads(); |
122
|
|
|
|
|
|
|
$self->load_authors(); |
123
|
|
|
|
|
|
|
$self->load_perl_versions(); |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
if($cfg->SectionExists('DISABLE')) { |
126
|
|
|
|
|
|
|
my @values = $cfg->val('DISABLE','LIST'); |
127
|
|
|
|
|
|
|
$self->{DISABLE}{$_} = 1 for(@values); |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
if($cfg->SectionExists('OSNAMES')) { |
131
|
|
|
|
|
|
|
for my $param ($cfg->Parameters('OSNAMES')) { |
132
|
|
|
|
|
|
|
$self->{OSNAMES}{lc $param} ||= lc $cfg->val('OSNAMES',$param); |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
if($cfg->SectionExists('MAPPINGS')) { |
137
|
|
|
|
|
|
|
for my $param ($cfg->Parameters('MAPPINGS')) { |
138
|
|
|
|
|
|
|
$MAPPINGS{$param} = [ split(',', $cfg->val('MAPPINGS',$param), 2) ]; |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
eval { |
143
|
|
|
|
|
|
|
$self->{metabase} = CPAN::Testers::Metabase::AWS->new( |
144
|
|
|
|
|
|
|
bucket => $self->{aws_bucket}, |
145
|
|
|
|
|
|
|
namespace => $self->{aws_namespace}, |
146
|
|
|
|
|
|
|
); |
147
|
|
|
|
|
|
|
$self->{librarian} = $self->{metabase}->public_librarian; |
148
|
|
|
|
|
|
|
}; |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
# if we require remote access, we need the librarian |
151
|
|
|
|
|
|
|
unless($hash{localonly}) { |
152
|
|
|
|
|
|
|
return unless($self->{metabase} && $self->{librarian}); |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
# reports are now stored in a compressed format |
156
|
|
|
|
|
|
|
$self->{serializer} = Data::FlexSerializer->new( |
157
|
|
|
|
|
|
|
detect_compression => 1, |
158
|
|
|
|
|
|
|
detect_json => 1, |
159
|
|
|
|
|
|
|
output_format => 'json' |
160
|
|
|
|
|
|
|
); |
161
|
|
|
|
|
|
|
$self->{serializer2} = Data::FlexSerializer->new( |
162
|
|
|
|
|
|
|
detect_compression => 1, |
163
|
|
|
|
|
|
|
detect_sereal => 1, |
164
|
|
|
|
|
|
|
output_format => 'sereal' |
165
|
|
|
|
|
|
|
); |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
return $self; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
sub DESTROY { |
171
|
|
|
|
|
|
|
my $self = shift; |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
$self->save_perl_versions(); |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
#---------------------------------------------------------------------------- |
177
|
|
|
|
|
|
|
# Public Methods |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
sub generate { |
180
|
|
|
|
|
|
|
my $self = shift; |
181
|
|
|
|
|
|
|
my $nonstop = shift || 0; |
182
|
|
|
|
|
|
|
my $maxdate = shift; |
183
|
|
|
|
|
|
|
my ($to,@reports); |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
$self->{reparse} = 0; |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
$self->_log("START GENERATE nonstop=$nonstop\n"); |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
do { |
190
|
|
|
|
|
|
|
my $start = localtime(time); |
191
|
|
|
|
|
|
|
($self->{processed},$self->{stored},$self->{cached}) = (0,0,0); |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
if($maxdate) { |
194
|
|
|
|
|
|
|
$to = $maxdate; |
195
|
|
|
|
|
|
|
} else { |
196
|
|
|
|
|
|
|
$to = sprintf "%sT%sZ", DateTime->now->ymd, DateTime->now->hms; |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
$self->_log("DATES maxdate=$maxdate, to=$to \n"); |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
my $data = $self->get_next_dates($to); |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
$self->_consume_reports( $to, $data ); |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
$nonstop = 0 if($self->{processed} == 0); |
206
|
|
|
|
|
|
|
$nonstop = 0 if($self->{stopfile} && -f $self->{stopfile}); |
207
|
|
|
|
|
|
|
$nonstop = 0 if($maxdate && $maxdate le $to); |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
$self->load_uploads() if($nonstop); |
210
|
|
|
|
|
|
|
$self->load_authors() if($nonstop); |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
$self->_log("CHECK nonstop=$nonstop\n"); |
213
|
|
|
|
|
|
|
} while($nonstop); |
214
|
|
|
|
|
|
|
$self->_log("STOP GENERATE nonstop=$nonstop\n"); |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
sub regenerate { |
218
|
|
|
|
|
|
|
my ($self,$hash) = @_; |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
$self->{reparse} = 0; |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
my $maxdate = sprintf "%sT%sZ", DateTime->now->ymd, DateTime->now->hms; |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
$self->_log("START REGENERATE\n"); |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
my @data; |
227
|
|
|
|
|
|
|
if($hash->{file}) { |
228
|
|
|
|
|
|
|
my $fh = IO::File->new($hash->{file},'r') or die "Cannot open file [$hash->{file}]: $!\n"; |
229
|
|
|
|
|
|
|
while(<$fh>) { |
230
|
|
|
|
|
|
|
s/\s+$//; |
231
|
|
|
|
|
|
|
my ($fval,$tval) = split(/,/,$_,2); |
232
|
|
|
|
|
|
|
my %data; |
233
|
|
|
|
|
|
|
$data{gstart} = $fval if($fval =~ /^\w+-\w+-\w+-\w+-\w+$/); |
234
|
|
|
|
|
|
|
$data{dstart} = $fval if($fval =~ /^\d+-\d+-\d+T\d+:\d+:\d+Z$/); |
235
|
|
|
|
|
|
|
$data{gend} = $tval if($tval =~ /^\w+-\w+-\w+-\w+-\w+$/); |
236
|
|
|
|
|
|
|
$data{dend} = $tval if($tval =~ /^\d+-\d+-\d+T\d+:\d+:\d+Z$/); |
237
|
|
|
|
|
|
|
push @data, \%data; |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
$fh->close; |
240
|
|
|
|
|
|
|
} else { |
241
|
|
|
|
|
|
|
push @data, { gstart => $hash->{gstart}, gend => $hash->{gend}, |
242
|
|
|
|
|
|
|
dstart => $hash->{dstart}, dend => $hash->{dend} }; |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
$self->_consume_reports( $maxdate, \@data ); |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
$self->_log("STOP REGENERATE\n"); |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
sub rebuild { |
251
|
|
|
|
|
|
|
my ($self,$hash) = @_; |
252
|
|
|
|
|
|
|
$self->_log("START REBUILD\n"); |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
my $start = localtime(time); |
255
|
|
|
|
|
|
|
($self->{processed},$self->{stored},$self->{cached}) = (0,0,0); |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
$self->{reparse} = 1; |
258
|
|
|
|
|
|
|
$self->{localonly} = $hash->{localonly} ? 1 : 0; |
259
|
|
|
|
|
|
|
$self->{check} = $hash->{check} ? 1 : 0; |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
# selection choices: |
263
|
|
|
|
|
|
|
# 1) from guid [to guid] |
264
|
|
|
|
|
|
|
# 2) from date [to date] |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
$hash->{dstart} = $self->_get_createdate( $hash->{gstart}, $hash->{dstart} ); |
267
|
|
|
|
|
|
|
$hash->{dend} = $self->_get_createdate( $hash->{gend}, $hash->{dend} ); |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
my @where; |
270
|
|
|
|
|
|
|
push @where, "updated >= '$hash->{dstart}'" if($hash->{dstart}); |
271
|
|
|
|
|
|
|
push @where, "updated <= '$hash->{dend}'" if($hash->{dend}); |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
my $sql = 'SELECT * FROM metabase' . |
274
|
|
|
|
|
|
|
(@where ? ' WHERE ' . join(' AND ',@where) : '') . |
275
|
|
|
|
|
|
|
' ORDER BY updated ASC'; |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
$self->_log("START sql=[$sql]\n"); |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
# $self->{CPANSTATS}->do_query("DELETE FROM cpanstats WHERE id >= $start AND id <= $end"); |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
my $iterator = $self->{METABASE}->iterator('hash',$sql); |
282
|
|
|
|
|
|
|
while(my $row = $iterator->()) { |
283
|
|
|
|
|
|
|
$self->_log("GUID [$row->{guid}]"); |
284
|
|
|
|
|
|
|
$self->{processed}++; |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
my $report = $self->load_fact(undef,0,$row); |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
unless($report) { |
289
|
|
|
|
|
|
|
$self->_log(" ... no report\n"); |
290
|
|
|
|
|
|
|
warn "No report returned [$row->{id},$row->{guid}]\n"; |
291
|
|
|
|
|
|
|
next; |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
$self->{report}{id} = $row->{id}; |
295
|
|
|
|
|
|
|
$self->{report}{guid} = $row->{guid}; |
296
|
|
|
|
|
|
|
$self->{report}{metabase} = $self->{facts}; |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
# corrupt cached report? |
299
|
|
|
|
|
|
|
if($self->reparse_report()) { # true if invalid report |
300
|
|
|
|
|
|
|
$self->_log(".. cannot parse metabase cache report\n"); |
301
|
|
|
|
|
|
|
warn "Cannot parse cached report [$row->{id},$row->{guid}]\n"; |
302
|
|
|
|
|
|
|
next; |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
if($self->store_report()) { $self->_log(".. cpanstats stored\n") } |
306
|
|
|
|
|
|
|
else { $self->_log(".. cpanstats not stored\n") } |
307
|
|
|
|
|
|
|
if($self->cache_update()) { $self->_log(".. metabase stored\n") } |
308
|
|
|
|
|
|
|
else { $self->_log(".. bad metabase cache data\n") } |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
$self->{stored}++; |
311
|
|
|
|
|
|
|
$self->{cached}++; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
my $invalid = $self->{invalid} ? scalar(@{$self->{invalid}}) : 0; |
315
|
|
|
|
|
|
|
my $stop = localtime(time); |
316
|
|
|
|
|
|
|
$self->_log("MARKER: processed=$self->{processed}, stored=$self->{stored}, cached=$self->{cached}, invalid=$invalid, start=$start, stop=$stop\n"); |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
$self->commit(); |
319
|
|
|
|
|
|
|
$self->_log("STOP REBUILD\n"); |
320
|
|
|
|
|
|
|
} |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
sub parse { |
323
|
|
|
|
|
|
|
my ($self,$hash) = @_; |
324
|
|
|
|
|
|
|
$self->_log("START PARSE\n"); |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
my @guids = $self->_get_guid_list($hash->{guid},$hash->{file}); |
327
|
|
|
|
|
|
|
return unless(@guids); |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
$self->{force} ||= 0; |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
for my $guid (@guids) { |
332
|
|
|
|
|
|
|
$self->_log("GUID [$guid]"); |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
my ($report,$stored); |
335
|
|
|
|
|
|
|
unless($hash->{force}) { |
336
|
|
|
|
|
|
|
$report = $self->load_fact($guid,1); |
337
|
|
|
|
|
|
|
$stored = $self->retrieve_report($guid); |
338
|
|
|
|
|
|
|
} |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
if($report && $stored) { |
341
|
|
|
|
|
|
|
$self->_log(".. report already stored and cached\n"); |
342
|
|
|
|
|
|
|
next; |
343
|
|
|
|
|
|
|
} |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
$report = $self->get_fact($guid); |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
unless($report) { |
348
|
|
|
|
|
|
|
$self->_log(".. report not found [$guid]\n"); |
349
|
|
|
|
|
|
|
next; |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
$self->{report}{guid} = $guid; |
353
|
|
|
|
|
|
|
$hash->{report} = $report; |
354
|
|
|
|
|
|
|
if($self->parse_report(%$hash)) { # true if invalid report |
355
|
|
|
|
|
|
|
$self->_log(".. cannot parse report [$guid]\n"); |
356
|
|
|
|
|
|
|
next; |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
if($self->store_report()) { $self->_log(".. stored"); } |
360
|
|
|
|
|
|
|
else { $self->_log(".. already stored"); } |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
if($self->cache_report()) { $self->_log(".. cached\n"); } |
363
|
|
|
|
|
|
|
else { $self->_log(".. FAIL: bad cache data\n"); } |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
$self->commit(); |
367
|
|
|
|
|
|
|
$self->_log("STOP PARSE\n"); |
368
|
|
|
|
|
|
|
return 1; |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
sub reparse { |
372
|
|
|
|
|
|
|
my ($self,$hash) = @_; |
373
|
|
|
|
|
|
|
$self->_log("START REPARSE\n"); |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
my @guids = $self->_get_guid_list($hash->{guid},$hash->{file}); |
376
|
|
|
|
|
|
|
return unless(@guids); |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
$self->{reparse} = $self->{force} ? 0 : 1; |
379
|
|
|
|
|
|
|
$self->{localonly} = $hash->{localonly} ? 1 : 0; |
380
|
|
|
|
|
|
|
$self->{check} = $hash->{check} ? 1 : 0; |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
for my $guid (@guids) { |
383
|
|
|
|
|
|
|
$self->_log("GUID [$guid]"); |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
my $report; |
386
|
|
|
|
|
|
|
$report = $self->load_fact($guid) unless($hash->{force}); |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
if($report) { |
389
|
|
|
|
|
|
|
$self->{report}{metabase} = $report; |
390
|
|
|
|
|
|
|
$self->{report}{guid} = $guid; |
391
|
|
|
|
|
|
|
$hash->{report} = $report; |
392
|
|
|
|
|
|
|
if($self->reparse_report(%$hash)) { # true if invalid report |
393
|
|
|
|
|
|
|
$self->_log(".. cannot parse report [$guid]\n"); |
394
|
|
|
|
|
|
|
return 0; |
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
} else { |
397
|
|
|
|
|
|
|
$report = $self->get_fact($guid) unless($report || $hash->{localonly}); |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
unless($report) { |
400
|
|
|
|
|
|
|
if($self->{localonly}) { |
401
|
|
|
|
|
|
|
$self->_log(".. report not available locally [$guid]\n"); |
402
|
|
|
|
|
|
|
return 0; |
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
$self->_log(".. report not found [$guid]\n"); |
405
|
|
|
|
|
|
|
return 0; |
406
|
|
|
|
|
|
|
} |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
$self->{report}{guid} = $guid; |
409
|
|
|
|
|
|
|
$hash->{report} = $report; |
410
|
|
|
|
|
|
|
if($self->parse_report(%$hash)) { # true if invalid report |
411
|
|
|
|
|
|
|
$self->_log(".. cannot parse report [$guid]\n"); |
412
|
|
|
|
|
|
|
return 0; |
413
|
|
|
|
|
|
|
} |
414
|
|
|
|
|
|
|
} |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
if($self->store_report()) { $self->_log(".. stored"); } |
417
|
|
|
|
|
|
|
else { |
418
|
|
|
|
|
|
|
if($self->{time} gt $self->{report}{updated}) { |
419
|
|
|
|
|
|
|
$self->_log(".. FAIL: older than requested [$self->{time}]\n"); |
420
|
|
|
|
|
|
|
return 0; |
421
|
|
|
|
|
|
|
} |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
$self->_log(".. already stored"); |
424
|
|
|
|
|
|
|
} |
425
|
|
|
|
|
|
|
if($self->cache_report()) { $self->_log(".. cached\n"); } |
426
|
|
|
|
|
|
|
else { $self->_log(".. FAIL: bad cache data\n"); } |
427
|
|
|
|
|
|
|
} |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
$self->commit(); |
430
|
|
|
|
|
|
|
$self->_log("STOP REPARSE\n"); |
431
|
|
|
|
|
|
|
return 1; |
432
|
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
sub tail { |
435
|
|
|
|
|
|
|
my ($self,$hash) = @_; |
436
|
|
|
|
|
|
|
return unless($hash->{file}); |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
$self->_log("START TAIL\n"); |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
my $guids = $self->get_tail_guids(); |
441
|
|
|
|
|
|
|
my $fh = IO::File->new($hash->{file},'a+') or die "Cannot read file [$hash->{file}]: $!"; |
442
|
|
|
|
|
|
|
print $fh "$_\n" for(@$guids); |
443
|
|
|
|
|
|
|
$fh->close; |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
$self->_log("STOP TAIL\n"); |
446
|
|
|
|
|
|
|
} |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
#---------------------------------------------------------------------------- |
449
|
|
|
|
|
|
|
# Internal Methods |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
sub commit { |
452
|
|
|
|
|
|
|
my $self = shift; |
453
|
|
|
|
|
|
|
for(qw(CPANSTATS)) { |
454
|
|
|
|
|
|
|
next unless($self->{$_}); |
455
|
|
|
|
|
|
|
$self->{$_}->do_commit; |
456
|
|
|
|
|
|
|
} |
457
|
|
|
|
|
|
|
} |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
sub get_tail_guids { |
460
|
|
|
|
|
|
|
my $self = shift; |
461
|
|
|
|
|
|
|
my $guids; |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
eval { |
464
|
|
|
|
|
|
|
# $guids = $self->{librarian}->search( |
465
|
|
|
|
|
|
|
# 'core.type' => 'CPAN-Testers-Report', |
466
|
|
|
|
|
|
|
# 'core.update_time' => { ">", 0 }, |
467
|
|
|
|
|
|
|
# '-desc' => 'core.update_time', |
468
|
|
|
|
|
|
|
# '-limit' => $self->{poll_limit}, |
469
|
|
|
|
|
|
|
# ); |
470
|
|
|
|
|
|
|
$guids = $self->{librarian}->search( |
471
|
|
|
|
|
|
|
'-where' => [ |
472
|
|
|
|
|
|
|
'-and' => |
473
|
|
|
|
|
|
|
[ '-eq' => 'core.type' => 'CPAN-Testers-Report' ], |
474
|
|
|
|
|
|
|
[ '-ge' => 'core.update_time' => 0 ] |
475
|
|
|
|
|
|
|
], |
476
|
|
|
|
|
|
|
'-order' => [ '-desc' => 'core.update_time' ], |
477
|
|
|
|
|
|
|
'-limit' => $self->{poll_limit}, |
478
|
|
|
|
|
|
|
); |
479
|
|
|
|
|
|
|
}; |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
$self->_log(" ... Metabase Tail Failed [$@]\n") if($@); |
482
|
|
|
|
|
|
|
$self->_log("Retrieved ".($guids ? scalar(@$guids) : 0)." guids\n"); |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
return $guids; |
485
|
|
|
|
|
|
|
} |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
sub get_next_dates { |
488
|
|
|
|
|
|
|
my ($self,$to) = @_; |
489
|
|
|
|
|
|
|
my (@data,$from); |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
my $time = sprintf "%sT%sZ", DateTime->now->ymd, DateTime->now->hms; |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
$self->_log("DATES to=$to, time=$time\n"); |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
# note that because Amazon's SimpleDB can return odd entries out of sync, we have to look at previous entries |
496
|
|
|
|
|
|
|
# to ensure we are starting from the right point. Also ignore date/times in the future. |
497
|
|
|
|
|
|
|
my @rows = $self->{METABASE}->get_query('array','SELECT updated FROM metabase WHERE updated <= ? ORDER BY updated DESC LIMIT 10',$time); |
498
|
|
|
|
|
|
|
for my $row (@rows) { |
499
|
|
|
|
|
|
|
if($from) { |
500
|
|
|
|
|
|
|
my $diff = abs( _date_diff($from,$row->[0]) ); # just interested in the difference |
501
|
|
|
|
|
|
|
$self->_log("get_next_dates from=[$from], updated=[$row->[0]], diff=$diff, DIFF=$DIFF\n"); |
502
|
|
|
|
|
|
|
next if($diff < $DIFF); |
503
|
|
|
|
|
|
|
} |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
$from = $row->[0]; |
506
|
|
|
|
|
|
|
} |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
$from ||= '1999-01-01T00:00:00Z'; |
509
|
|
|
|
|
|
|
if($from gt $to) { |
510
|
|
|
|
|
|
|
my $xx = $from; |
511
|
|
|
|
|
|
|
$from = $to; |
512
|
|
|
|
|
|
|
$to = $xx; |
513
|
|
|
|
|
|
|
} |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
$self->_log("NEXT from=[$from], to=[$to]\n"); |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
while($from lt $to) { |
518
|
|
|
|
|
|
|
my @from = $from =~ /(\d+)\-(\d+)\-(\d+)T(\d+):(\d+):(\d+)/; |
519
|
|
|
|
|
|
|
my $dt = DateTime->new( |
520
|
|
|
|
|
|
|
year => $from[0], month => $from[1], day => $from[2], |
521
|
|
|
|
|
|
|
hour => $from[3], minute => $from[4], second => $from[5], |
522
|
|
|
|
|
|
|
); |
523
|
|
|
|
|
|
|
$dt->add( DateTime::Duration->new( minutes => $MINS ) ); |
524
|
|
|
|
|
|
|
my $split = sprintf "%sT%sZ", $dt->ymd, $dt->hms; |
525
|
|
|
|
|
|
|
if($split lt $to) { |
526
|
|
|
|
|
|
|
push @data, { dstart => $from, dend => $split }; |
527
|
|
|
|
|
|
|
} else { |
528
|
|
|
|
|
|
|
push @data, { dstart => $from, dend => $to }; |
529
|
|
|
|
|
|
|
} |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
$from = $split; |
532
|
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
return \@data; |
535
|
|
|
|
|
|
|
} |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
sub get_next_guids { |
538
|
|
|
|
|
|
|
my ($self,$start,$end) = @_; |
539
|
|
|
|
|
|
|
my ($guids); |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
$self->{time} ||= 0; |
542
|
|
|
|
|
|
|
$self->{last} ||= 0; |
543
|
|
|
|
|
|
|
$start ||= 0; |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
$self->_log("PRE time=[$self->{time}], last=[$self->{last}], start=[".($start||'')."], end=[".($end||'')."]\n"); |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
if($start) { |
548
|
|
|
|
|
|
|
$self->{time} = $start; |
549
|
|
|
|
|
|
|
$self->{time_to} = $end || ''; |
550
|
|
|
|
|
|
|
} else { |
551
|
|
|
|
|
|
|
my $time = sprintf "%sT%sZ", DateTime->now->ymd, DateTime->now->hms; |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
# note that because Amazon's SimpleDB can return odd entries out of sync, we have to look at previous entries |
554
|
|
|
|
|
|
|
# to ensure we are starting from the right point. Also ignore date/times in the future. |
555
|
|
|
|
|
|
|
my @rows = $self->{METABASE}->get_query('array','SELECT updated FROM metabase WHERE updated <= ? ORDER BY updated DESC LIMIT 10',$time); |
556
|
|
|
|
|
|
|
for my $row (@rows) { |
557
|
|
|
|
|
|
|
if($self->{time}) { |
558
|
|
|
|
|
|
|
my $diff = abs( _date_diff($self->{time},$row->[0]) ); # just interested in the difference |
559
|
|
|
|
|
|
|
next if($diff < $DIFF); |
560
|
|
|
|
|
|
|
} |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
$self->{time} = $row->[0]; |
563
|
|
|
|
|
|
|
} |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
$self->{time} ||= '1999-01-01T00:00:00Z'; |
566
|
|
|
|
|
|
|
if($self->{last} ge $self->{time}) { |
567
|
|
|
|
|
|
|
my @ts = $self->{last} =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)Z/; |
568
|
|
|
|
|
|
|
$ts[1]--; |
569
|
|
|
|
|
|
|
my $ts = timelocal(reverse @ts); |
570
|
|
|
|
|
|
|
@ts = localtime($ts + $self->{offset}); # increment the offset for next time |
571
|
|
|
|
|
|
|
$self->{time} = sprintf "%04d-%02d-%02dT%02d:%02d:%02dZ", $ts[5]+1900,$ts[4]+1,$ts[3], $ts[2],$ts[1],$ts[0]; |
572
|
|
|
|
|
|
|
} |
573
|
|
|
|
|
|
|
} |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
$self->_log("START time=[$self->{time}], last=[$self->{last}]\n"); |
576
|
|
|
|
|
|
|
$self->{last} = $self->{time}; |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
eval { |
579
|
|
|
|
|
|
|
# if($self->{time_to}) { |
580
|
|
|
|
|
|
|
# $guids = $self->{librarian}->search( |
581
|
|
|
|
|
|
|
# 'core.type' => 'CPAN-Testers-Report', |
582
|
|
|
|
|
|
|
# 'core.update_time' => { -and => { ">=" => $self->{time}, "<=" => $self->{time_to} } }, |
583
|
|
|
|
|
|
|
# '-asc' => 'core.update_time', |
584
|
|
|
|
|
|
|
# '-limit' => $self->{poll_limit}, |
585
|
|
|
|
|
|
|
# ); |
586
|
|
|
|
|
|
|
# } else { |
587
|
|
|
|
|
|
|
$guids = $self->{librarian}->search( |
588
|
|
|
|
|
|
|
'-where' => [ |
589
|
|
|
|
|
|
|
'-and' => |
590
|
|
|
|
|
|
|
[ '-eq' => 'core.type' => 'CPAN-Testers-Report' ], |
591
|
|
|
|
|
|
|
[ '-ge' => 'core.update_time' => $self->{time} ] |
592
|
|
|
|
|
|
|
], |
593
|
|
|
|
|
|
|
'-order' => [ '-asc' => 'core.update_time' ], |
594
|
|
|
|
|
|
|
'-limit' => $self->{poll_limit}, |
595
|
|
|
|
|
|
|
); |
596
|
|
|
|
|
|
|
# } |
597
|
|
|
|
|
|
|
}; |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
$self->_log(" ... Metabase Search Failed [$@]\n") if($@); |
600
|
|
|
|
|
|
|
$self->_log("Retrieved ".($guids ? scalar(@$guids) : 0)." guids\n"); |
601
|
|
|
|
|
|
|
return $guids; |
602
|
|
|
|
|
|
|
} |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
sub retrieve_reports { |
605
|
|
|
|
|
|
|
my ($self,$guids,$start) = @_; |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
if($guids) { |
608
|
|
|
|
|
|
|
for my $guid (@$guids) { |
609
|
|
|
|
|
|
|
$self->_log("GUID [$guid]"); |
610
|
|
|
|
|
|
|
$self->{processed}++; |
611
|
|
|
|
|
|
|
$self->{msg} = ''; |
612
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
if(my $report = $self->get_fact($guid)) { |
614
|
|
|
|
|
|
|
$self->{report}{guid} = $guid; |
615
|
|
|
|
|
|
|
next if($self->parse_report(report => $report)); # true if invalid report |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
if($self->store_report()) { |
618
|
|
|
|
|
|
|
$self->{msg} .= ".. stored"; |
619
|
|
|
|
|
|
|
$self->{stored}++; |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
} else { |
622
|
|
|
|
|
|
|
if($self->{time} gt $self->{report}{updated}) { |
623
|
|
|
|
|
|
|
$self->_log(".. FAIL: older than requested [$self->{time}]\n"); |
624
|
|
|
|
|
|
|
next; |
625
|
|
|
|
|
|
|
} |
626
|
|
|
|
|
|
|
$self->{msg} .= ".. already stored"; |
627
|
|
|
|
|
|
|
} |
628
|
|
|
|
|
|
|
if($self->cache_report()) { $self->_log(".. cached\n"); $self->{cached}++; } |
629
|
|
|
|
|
|
|
else { $self->_log(".. bad cache data\n"); } |
630
|
|
|
|
|
|
|
} else { |
631
|
|
|
|
|
|
|
$self->_log(".. FAIL\n"); |
632
|
|
|
|
|
|
|
} |
633
|
|
|
|
|
|
|
} |
634
|
|
|
|
|
|
|
} |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
$self->commit(); |
637
|
|
|
|
|
|
|
my $invalid = $self->{invalid} ? scalar(@{$self->{invalid}}) : 0; |
638
|
|
|
|
|
|
|
my $stop = localtime(time); |
639
|
|
|
|
|
|
|
$self->_log("MARKER: processed=$self->{processed}, stored=$self->{stored}, cached=$self->{cached}, invalid=$invalid, start=$start, stop=$stop\n"); |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
# only email invalid reports during the generate process |
642
|
|
|
|
|
|
|
$self->_send_email() if($self->{invalid}); |
643
|
|
|
|
|
|
|
} |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
sub already_saved { |
646
|
|
|
|
|
|
|
my ($self,$guid) = @_; |
647
|
|
|
|
|
|
|
my @rows = $self->{METABASE}->get_query('array','SELECT updated FROM metabase WHERE guid=?',$guid); |
648
|
|
|
|
|
|
|
return $rows[0]->[0] if(@rows); |
649
|
|
|
|
|
|
|
return 0; |
650
|
|
|
|
|
|
|
} |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
sub load_fact { |
653
|
|
|
|
|
|
|
my ($self,$guid,$check,$row) = @_; |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
if(!$row && $guid) { |
656
|
|
|
|
|
|
|
my @rows = $self->{METABASE}->get_query('hash','SELECT report,fact FROM metabase WHERE guid=?',$guid); |
657
|
|
|
|
|
|
|
$row = $rows[0] if(@rows); |
658
|
|
|
|
|
|
|
} |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
if($row) { |
661
|
|
|
|
|
|
|
if($row->{fact}) { |
662
|
|
|
|
|
|
|
$self->{fact} = $self->{serializer2}->deserialize($row->{fact}); |
663
|
|
|
|
|
|
|
$self->{facts} = $self->dereference_report($self->{fact}); |
664
|
|
|
|
|
|
|
return $self->{facts}; |
665
|
|
|
|
|
|
|
} |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
if($row->{report}) { |
668
|
|
|
|
|
|
|
$self->{facts} = $self->{serializer}->deserialize($row->{report}); |
669
|
|
|
|
|
|
|
return $self->{facts}; |
670
|
|
|
|
|
|
|
} |
671
|
|
|
|
|
|
|
} |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
$self->_log(" ... no report [guid=$guid]\n") unless($check); |
674
|
|
|
|
|
|
|
return; |
675
|
|
|
|
|
|
|
} |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
sub get_fact { |
678
|
|
|
|
|
|
|
my ($self,$guid) = @_; |
679
|
|
|
|
|
|
|
my $fact; |
680
|
|
|
|
|
|
|
#print STDERR "guid=$guid\n"; |
681
|
|
|
|
|
|
|
eval { $fact = $self->{librarian}->extract( $guid ) }; |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
if($fact) { |
684
|
|
|
|
|
|
|
$self->{fact} = $fact; |
685
|
|
|
|
|
|
|
return $fact; |
686
|
|
|
|
|
|
|
} |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
$self->_log(" ... no report [guid=$guid] [$@]\n"); |
689
|
|
|
|
|
|
|
return; |
690
|
|
|
|
|
|
|
} |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
sub dereference_report { |
693
|
|
|
|
|
|
|
my ($self,$report) = @_; |
694
|
|
|
|
|
|
|
my %facts; |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
my @facts = $report->facts(); |
697
|
|
|
|
|
|
|
for my $fact (@facts) { |
698
|
|
|
|
|
|
|
my $name = ref $fact; |
699
|
|
|
|
|
|
|
$facts{$name} = $fact->as_struct; |
700
|
|
|
|
|
|
|
$facts{$name}{content} = decode_json($facts{$name}{content}); |
701
|
|
|
|
|
|
|
} |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
return \%facts; |
704
|
|
|
|
|
|
|
} |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
sub parse_report { |
707
|
|
|
|
|
|
|
my ($self,%hash) = @_; |
708
|
|
|
|
|
|
|
my $options = $hash{options}; |
709
|
|
|
|
|
|
|
my $report = $hash{report}; |
710
|
|
|
|
|
|
|
my $guid = $self->{report}{guid}; |
711
|
|
|
|
|
|
|
my $invalid; |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
$self->{report}{created} = $report->{metadata}{core}{creation_time}; |
714
|
|
|
|
|
|
|
$self->{report}{updated} = $report->{metadata}{core}{update_time}; |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
unless(ref($report) eq 'CPAN::Testers::Report') { |
717
|
|
|
|
|
|
|
$self->{msg} .= ".. ref [" . ref($report) . "]"; |
718
|
|
|
|
|
|
|
return 1; |
719
|
|
|
|
|
|
|
} |
720
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
my @facts = $report->facts(); |
722
|
|
|
|
|
|
|
for my $fact (@facts) { |
723
|
|
|
|
|
|
|
if(ref $fact eq 'CPAN::Testers::Fact::TestSummary') { |
724
|
|
|
|
|
|
|
$self->{report}{metabase}{'CPAN::Testers::Fact::TestSummary'} = $fact->as_struct; |
725
|
|
|
|
|
|
|
$self->{report}{metabase}{'CPAN::Testers::Fact::TestSummary'}{content} = decode_json($self->{report}{metabase}{'CPAN::Testers::Fact::TestSummary'}{content}); |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
$self->{report}{state} = lc $fact->{content}{grade}; |
728
|
|
|
|
|
|
|
$self->{report}{platform} = $fact->{content}{archname}; |
729
|
|
|
|
|
|
|
$self->{report}{osname} = $self->_osname($fact->{content}{osname}); |
730
|
|
|
|
|
|
|
$self->{report}{osvers} = $fact->{content}{osversion}; |
731
|
|
|
|
|
|
|
$self->{report}{perl} = $fact->{content}{perl_version}; |
732
|
|
|
|
|
|
|
#$self->{report}{created} = $fact->{metadata}{core}{creation_time}; |
733
|
|
|
|
|
|
|
#$self->{report}{updated} = $fact->{metadata}{core}{update_time}; |
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
my $dist = Metabase::Resource->new( $fact->resource ); |
736
|
|
|
|
|
|
|
$self->{report}{dist} = $dist->metadata->{dist_name}; |
737
|
|
|
|
|
|
|
$self->{report}{version} = $dist->metadata->{dist_version}; |
738
|
|
|
|
|
|
|
$self->{report}{resource} = $dist->metadata->{resource}; |
739
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
# some distros are a pain! |
741
|
|
|
|
|
|
|
if($self->{report}{version} eq '' && $MAPPINGS{$self->{report}{dist}}) { |
742
|
|
|
|
|
|
|
$self->{report}{version} = $MAPPINGS{$self->{report}{dist}}->[1]; |
743
|
|
|
|
|
|
|
$self->{report}{dist} = $MAPPINGS{$self->{report}{dist}}->[0]; |
744
|
|
|
|
|
|
|
} elsif($self->{report}{version} eq '') { |
745
|
|
|
|
|
|
|
$self->{report}{version} = 0; |
746
|
|
|
|
|
|
|
} |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
$self->{report}{from} = $self->_get_tester( $fact->creator->resource ); |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
# alternative API |
751
|
|
|
|
|
|
|
#my $profile = $fact->creator->user; |
752
|
|
|
|
|
|
|
#$self->{report}{from} = $profile->{email}; |
753
|
|
|
|
|
|
|
#$self->{report}{from} =~ s/'/''/g; #' |
754
|
|
|
|
|
|
|
#$self->{report}{dist} = $fact->resource->dist_name; |
755
|
|
|
|
|
|
|
#$self->{report}{version} = $fact->resource->dist_version; |
756
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
} elsif(ref $fact eq 'CPAN::Testers::Fact::LegacyReport') { |
758
|
|
|
|
|
|
|
$self->{report}{metabase}{'CPAN::Testers::Fact::LegacyReport'} = $fact->as_struct; |
759
|
|
|
|
|
|
|
$self->{report}{metabase}{'CPAN::Testers::Fact::LegacyReport'}{content} = decode_json($self->{report}{metabase}{'CPAN::Testers::Fact::LegacyReport'}{content}); |
760
|
|
|
|
|
|
|
$invalid = 'missing textreport' if(length $fact->{content}{textreport} < 10); # what is the smallest report? |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
$self->{report}{perl} = $fact->{content}{perl_version}; |
763
|
|
|
|
|
|
|
} |
764
|
|
|
|
|
|
|
} |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
if($invalid) { |
767
|
|
|
|
|
|
|
push @{$self->{invalid}}, {msg => $invalid, guid => $guid}; |
768
|
|
|
|
|
|
|
return 1; |
769
|
|
|
|
|
|
|
} |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
# fixes from metabase formatting |
772
|
|
|
|
|
|
|
$self->{report}{perl} =~ s/^v//; # no leading 'v' |
773
|
|
|
|
|
|
|
$self->_check_arch_os(); |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
if($self->{report}{created}) { |
776
|
|
|
|
|
|
|
my @created = $self->{report}{created} =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)Z/; # 2010-02-23T20:33:52Z |
777
|
|
|
|
|
|
|
$self->{report}{postdate} = sprintf "%04d%02d", $created[0], $created[1]; |
778
|
|
|
|
|
|
|
$self->{report}{fulldate} = sprintf "%04d%02d%02d%02d%02d", $created[0], $created[1], $created[2], $created[3], $created[4]; |
779
|
|
|
|
|
|
|
} else { |
780
|
|
|
|
|
|
|
my @created = localtime(time); |
781
|
|
|
|
|
|
|
$self->{report}{postdate} = sprintf "%04d%02d", $created[5]+1900, $created[4]+1; |
782
|
|
|
|
|
|
|
$self->{report}{fulldate} = sprintf "%04d%02d%02d%02d%02d", $created[5]+1900, $created[4]+1, $created[3], $created[2], $created[1]; |
783
|
|
|
|
|
|
|
} |
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
$self->{msg} .= ".. time [$self->{report}{created}][$self->{report}{updated}]"; |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
$self->{report}{type} = 2; |
788
|
|
|
|
|
|
|
if($self->{DISABLE} && $self->{DISABLE}{$self->{report}{from}}) { |
789
|
|
|
|
|
|
|
$self->{report}{state} .= ':invalid'; |
790
|
|
|
|
|
|
|
$self->{report}{type} = 3; |
791
|
|
|
|
|
|
|
} elsif($self->{report}{response} && $self->{report}{response} =~ m!/perl6/!) { |
792
|
|
|
|
|
|
|
# $self->{report}{type} = 6; |
793
|
|
|
|
|
|
|
return 1; |
794
|
|
|
|
|
|
|
} |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
#print STDERR "\n====\nreport=".Dumper($self->{report}); |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
return 1 unless($self->_valid_field($guid, 'dist' => $self->{report}{dist}) || ($options && $options->{exclude}{dist})); |
799
|
|
|
|
|
|
|
return 1 unless($self->_valid_field($guid, 'version' => $self->{report}{version}) || ($options && $options->{exclude}{version})); |
800
|
|
|
|
|
|
|
return 1 unless($self->_valid_field($guid, 'from' => $self->{report}{from}) || ($options && $options->{exclude}{from})); |
801
|
|
|
|
|
|
|
return 1 unless($self->_valid_field($guid, 'perl' => $self->{report}{perl}) || ($options && $options->{exclude}{perl})); |
802
|
|
|
|
|
|
|
return 1 unless($self->_valid_field($guid, 'platform' => $self->{report}{platform}) || ($options && $options->{exclude}{platform})); |
803
|
|
|
|
|
|
|
return 1 unless($self->_valid_field($guid, 'osname' => $self->{report}{osname}) || ($options && $options->{exclude}{osname})); |
804
|
|
|
|
|
|
|
return 1 unless($self->_valid_field($guid, 'osvers' => $self->{report}{osvers}) || ($options && $options->{exclude}{osname})); |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
return 0 |
807
|
|
|
|
|
|
|
} |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
sub reparse_report { |
810
|
|
|
|
|
|
|
my ($self,%hash) = @_; |
811
|
|
|
|
|
|
|
my $fact = 'CPAN::Testers::Fact::TestSummary'; |
812
|
|
|
|
|
|
|
my $options = $hash{options}; |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
$self->{report}{metabase}{$fact}{content} = encode_json($self->{report}{metabase}{$fact}{content}); |
815
|
|
|
|
|
|
|
my $report = CPAN::Testers::Fact::TestSummary->from_struct( $self->{report}{metabase}{$fact} ); |
816
|
|
|
|
|
|
|
my $guid = $self->{report}{guid}; |
817
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
$self->{report}{state} = lc $report->{content}{grade}; |
819
|
|
|
|
|
|
|
$self->{report}{platform} = $report->{content}{archname}; |
820
|
|
|
|
|
|
|
$self->{report}{osname} = $self->_osname($report->{content}{osname}); |
821
|
|
|
|
|
|
|
$self->{report}{osvers} = $report->{content}{osversion}; |
822
|
|
|
|
|
|
|
$self->{report}{perl} = $report->{content}{perl_version}; |
823
|
|
|
|
|
|
|
$self->{report}{created} = $report->{metadata}{core}{creation_time}; |
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
my $dist = Metabase::Resource->new( $report->{metadata}{core}{resource} ); |
826
|
|
|
|
|
|
|
$self->{report}{dist} = $dist->metadata->{dist_name}; |
827
|
|
|
|
|
|
|
$self->{report}{version} = $dist->metadata->{dist_version}; |
828
|
|
|
|
|
|
|
$self->{report}{resource} = $dist->metadata->{resource}; |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
$self->{report}{from} = $self->_get_tester( $report->{metadata}{core}{creator}{resource} ); |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
if($self->{report}{created}) { |
833
|
|
|
|
|
|
|
my @created = $self->{report}{created} =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)Z/; # 2010-02-23T20:33:52Z |
834
|
|
|
|
|
|
|
$self->{report}{postdate} = sprintf "%04d%02d", $created[0], $created[1]; |
835
|
|
|
|
|
|
|
$self->{report}{fulldate} = sprintf "%04d%02d%02d%02d%02d", $created[0], $created[1], $created[2], $created[3], $created[4]; |
836
|
|
|
|
|
|
|
} else { |
837
|
|
|
|
|
|
|
my @created = localtime(time); |
838
|
|
|
|
|
|
|
$self->{report}{postdate} = sprintf "%04d%02d", $created[5]+1900, $created[4]+1; |
839
|
|
|
|
|
|
|
$self->{report}{fulldate} = sprintf "%04d%02d%02d%02d%02d", $created[5]+1900, $created[4]+1, $created[3], $created[2], $created[1]; |
840
|
|
|
|
|
|
|
} |
841
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
$self->{report}{type} = 2; |
843
|
|
|
|
|
|
|
if($self->{DISABLE} && $self->{DISABLE}{$self->{report}{from}}) { |
844
|
|
|
|
|
|
|
$self->{report}{state} .= ':invalid'; |
845
|
|
|
|
|
|
|
$self->{report}{type} = 3; |
846
|
|
|
|
|
|
|
} elsif($self->{report}{response} && $self->{report}{response} =~ m!/perl6/!) { |
847
|
|
|
|
|
|
|
# $self->{report}{type} = 6; |
848
|
|
|
|
|
|
|
return 1; |
849
|
|
|
|
|
|
|
} |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
return 1 unless($self->_valid_field($guid, 'dist' => $self->{report}{dist}) || ($options && $options->{exclude}{dist})); |
852
|
|
|
|
|
|
|
return 1 unless($self->_valid_field($guid, 'version' => $self->{report}{version}) || ($options && $options->{exclude}{version})); |
853
|
|
|
|
|
|
|
return 1 unless($self->_valid_field($guid, 'from' => $self->{report}{from}) || ($options && $options->{exclude}{from})); |
854
|
|
|
|
|
|
|
return 1 unless($self->_valid_field($guid, 'perl' => $self->{report}{perl}) || ($options && $options->{exclude}{perl})); |
855
|
|
|
|
|
|
|
return 1 unless($self->_valid_field($guid, 'platform' => $self->{report}{platform}) || ($options && $options->{exclude}{platform})); |
856
|
|
|
|
|
|
|
return 1 unless($self->_valid_field($guid, 'osname' => $self->{report}{osname}) || ($options && $options->{exclude}{osname})); |
857
|
|
|
|
|
|
|
return 1 unless($self->_valid_field($guid, 'osvers' => $self->{report}{osvers}) || ($options && $options->{exclude}{osname})); |
858
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
return 0; |
860
|
|
|
|
|
|
|
} |
861
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
sub retrieve_report { |
863
|
|
|
|
|
|
|
my $self = shift; |
864
|
|
|
|
|
|
|
my $guid = shift or return; |
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
my @rows = $self->{CPANSTATS}->get_query('hash','SELECT * FROM cpanstats WHERE guid=?',$guid); |
867
|
|
|
|
|
|
|
return $rows[0] if(@rows); |
868
|
|
|
|
|
|
|
return; |
869
|
|
|
|
|
|
|
} |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
sub store_report { |
872
|
|
|
|
|
|
|
my $self = shift; |
873
|
|
|
|
|
|
|
my @fields = qw(guid state postdate from dist version platform perl osname osvers fulldate type uploadid); |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
my %fields = map {$_ => $self->{report}{$_}} @fields; |
876
|
|
|
|
|
|
|
$fields{$_} ||= 0 for(qw(type uploadid)); |
877
|
|
|
|
|
|
|
$fields{$_} ||= '0' for(qw(perl)); |
878
|
|
|
|
|
|
|
$fields{$_} ||= '' for(@fields); |
879
|
|
|
|
|
|
|
$fields{uploadid} ||= $self->{upload}{$fields{dist}}{$fields{version}}; |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
my @values = map {$fields{$_}} @fields; |
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
my %SQL = ( |
884
|
|
|
|
|
|
|
'SELECT' => { |
885
|
|
|
|
|
|
|
CPANSTATS => 'SELECT id FROM cpanstats WHERE guid=?', |
886
|
|
|
|
|
|
|
RELEASE => 'SELECT id FROM release_data WHERE guid=?', |
887
|
|
|
|
|
|
|
}, |
888
|
|
|
|
|
|
|
'INSERT' => { |
889
|
|
|
|
|
|
|
CPANSTATS => 'INSERT INTO cpanstats (guid,state,postdate,tester,dist,version,platform,perl,osname,osvers,fulldate,type,uploadid) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)', |
890
|
|
|
|
|
|
|
RELEASE => 'INSERT INTO release_data (id,guid,dist,version,oncpan,distmat,perlmat,patched,pass,fail,na,unknown,uploadid) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)', |
891
|
|
|
|
|
|
|
PASSES => 'INSERT IGNORE passreports SET platform=?, osname=?, perl=?, dist=?, postdate=?', |
892
|
|
|
|
|
|
|
}, |
893
|
|
|
|
|
|
|
'UPDATE' => { |
894
|
|
|
|
|
|
|
CPANSTATS => 'UPDATE cpanstats SET state=?,postdate=?,tester=?,dist=?,version=?,platform=?,perl=?,osname=?,osvers=?,fulldate=?,type=?,uploadid=? WHERE guid=?', |
895
|
|
|
|
|
|
|
RELEASE => 'UPDATE release_data SET id=?,dist=?,version=?,oncpan=?,distmat=?,perlmat=?,patched=?,pass=?,fail=?,na=?,unknown=?,uploadid=? WHERE guid=?', |
896
|
|
|
|
|
|
|
}, |
897
|
|
|
|
|
|
|
); |
898
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
# update the mysql database |
900
|
|
|
|
|
|
|
my @rows = $self->{CPANSTATS}->get_query('array',$SQL{SELECT}{CPANSTATS},$values[0]); |
901
|
|
|
|
|
|
|
if(@rows) { |
902
|
|
|
|
|
|
|
if($self->{reparse}) { |
903
|
|
|
|
|
|
|
my ($guid,@update) = @values; |
904
|
|
|
|
|
|
|
if($self->{check}) { |
905
|
|
|
|
|
|
|
$self->_log( "CHECK: $SQL{UPDATE}{CPANSTATS},[" . join(',',@update,$guid) . "]\n" ); |
906
|
|
|
|
|
|
|
} else { |
907
|
|
|
|
|
|
|
$self->{CPANSTATS}->do_query($SQL{UPDATE}{CPANSTATS},@update,$guid); |
908
|
|
|
|
|
|
|
} |
909
|
|
|
|
|
|
|
} else { |
910
|
|
|
|
|
|
|
$self->{report}{id} = $rows[0]->[0]; |
911
|
|
|
|
|
|
|
return 0; |
912
|
|
|
|
|
|
|
} |
913
|
|
|
|
|
|
|
} else { |
914
|
|
|
|
|
|
|
if($self->{check}) { |
915
|
|
|
|
|
|
|
$self->_log( "CHECK: $SQL{INSERT}{CPANSTATS},[" . join(',',@values) . "]\n" ); |
916
|
|
|
|
|
|
|
} else { |
917
|
|
|
|
|
|
|
$self->{report}{id} = $self->{CPANSTATS}->id_query($SQL{INSERT}{CPANSTATS},@values); |
918
|
|
|
|
|
|
|
} |
919
|
|
|
|
|
|
|
} |
920
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
# in check mode, assume the rest happens |
922
|
|
|
|
|
|
|
return 1 if($self->{check}); |
923
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
# perl version components |
925
|
|
|
|
|
|
|
my ($perl,$patch,$devel) = $self->_get_perl_version($fields{perl}); |
926
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
# only valid perl5 reports |
928
|
|
|
|
|
|
|
if($self->{report}{type} == 2) { |
929
|
|
|
|
|
|
|
$fields{id} = $self->{report}{id}; |
930
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
# push page requests |
932
|
|
|
|
|
|
|
# - note we only update the author if this is the *latest* version of the distribution |
933
|
|
|
|
|
|
|
my $author = $self->{report}{pauseid} || $self->_get_author($fields{uploadid},$fields{dist},$fields{version}); |
934
|
|
|
|
|
|
|
$self->{CPANSTATS}->do_query("INSERT INTO page_requests (type,name,weight,id) VALUES ('author',?,1,?)",$author,$fields{id}) if($author); |
935
|
|
|
|
|
|
|
$self->{CPANSTATS}->do_query("INSERT INTO page_requests (type,name,weight,id) VALUES ('distro',?,1,?)",$fields{dist},$fields{id}); |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
my @rows = $self->{CPANSTATS}->get_query('array',$SQL{SELECT}{RELEASE},$fields{guid}); |
938
|
|
|
|
|
|
|
#print STDERR "# select release $SQL{SELECT}{RELEASE},$fields{guid}\n"; |
939
|
|
|
|
|
|
|
if(@rows) { |
940
|
|
|
|
|
|
|
if($self->{reparse}) { |
941
|
|
|
|
|
|
|
$self->{CPANSTATS}->do_query($SQL{UPDATE}{RELEASE}, |
942
|
|
|
|
|
|
|
$fields{id}, # id, |
943
|
|
|
|
|
|
|
$fields{dist},$fields{version}, # dist, version |
944
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
$self->_oncpan($fields{uploadid},$fields{dist},$fields{version}) ? 1 : 2, |
946
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
$fields{version} =~ /_/ ? 2 : 1, |
948
|
|
|
|
|
|
|
$devel ? 2 : 1, |
949
|
|
|
|
|
|
|
$patch ? 2 : 1, |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
$fields{state} eq 'pass' ? 1 : 0, |
952
|
|
|
|
|
|
|
$fields{state} eq 'fail' ? 1 : 0, |
953
|
|
|
|
|
|
|
$fields{state} eq 'na' ? 1 : 0, |
954
|
|
|
|
|
|
|
$fields{state} eq 'unknown' ? 1 : 0, |
955
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
$fields{uploadid}, |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
$fields{guid}); # guid |
959
|
|
|
|
|
|
|
} |
960
|
|
|
|
|
|
|
} else { |
961
|
|
|
|
|
|
|
#print STDERR "# insert release $SQL{INSERT}{RELEASE},$fields[0],$fields[1]\n"; |
962
|
|
|
|
|
|
|
$self->{CPANSTATS}->do_query($SQL{INSERT}{RELEASE}, |
963
|
|
|
|
|
|
|
$fields{id},$fields{guid}, # id, guid |
964
|
|
|
|
|
|
|
$fields{dist},$fields{version}, # dist, version |
965
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
$self->_oncpan($fields{uploadid},$fields{dist},$fields{version}) ? 1 : 2, |
967
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
$fields{version} =~ /_/ ? 2 : 1, |
969
|
|
|
|
|
|
|
$devel ? 2 : 1, |
970
|
|
|
|
|
|
|
$patch ? 2 : 1, |
971
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
$fields{state} eq 'pass' ? 1 : 0, |
973
|
|
|
|
|
|
|
$fields{state} eq 'fail' ? 1 : 0, |
974
|
|
|
|
|
|
|
$fields{state} eq 'na' ? 1 : 0, |
975
|
|
|
|
|
|
|
$fields{state} eq 'unknown' ? 1 : 0, |
976
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
$fields{uploadid}); |
978
|
|
|
|
|
|
|
} |
979
|
|
|
|
|
|
|
} |
980
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
if($fields{state} eq 'pass') { |
982
|
|
|
|
|
|
|
$fields{perl} =~ s/\s.*//; # only need to know the main release |
983
|
|
|
|
|
|
|
$self->{CPANSTATS}->do_query($SQL{INSERT}{PASSES}, |
984
|
|
|
|
|
|
|
$fields{platform}, |
985
|
|
|
|
|
|
|
$fields{osname}, |
986
|
|
|
|
|
|
|
$fields{perl}, |
987
|
|
|
|
|
|
|
$fields{dist}, |
988
|
|
|
|
|
|
|
$fields{postdate}); |
989
|
|
|
|
|
|
|
} |
990
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
if((++$self->{stat_count} % 500) == 0) { |
992
|
|
|
|
|
|
|
$self->commit; |
993
|
|
|
|
|
|
|
} |
994
|
|
|
|
|
|
|
|
995
|
|
|
|
|
|
|
return 1; |
996
|
|
|
|
|
|
|
} |
997
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
sub cache_report { |
999
|
|
|
|
|
|
|
my $self = shift; |
1000
|
|
|
|
|
|
|
return 0 unless($self->{report}{guid} && $self->{report}{metabase}); |
1001
|
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
# in check mode, assume the rest happens |
1003
|
|
|
|
|
|
|
return 1 if($self->{check}); |
1004
|
|
|
|
|
|
|
return 1 if($self->{localonly}); |
1005
|
|
|
|
|
|
|
|
1006
|
|
|
|
|
|
|
my ($json,$data,$fact); |
1007
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
eval { $json = encode_json($self->{report}{metabase}); }; |
1009
|
|
|
|
|
|
|
eval { $data = $self->{serializer}->serialize("$json"); }; |
1010
|
|
|
|
|
|
|
eval { $data = $self->{serializer}->serialize( $self->{report}{metabase} ); } if($@); |
1011
|
|
|
|
|
|
|
eval { $fact = $self->{serializer2}->serialize($self->{fact}); }; |
1012
|
|
|
|
|
|
|
|
1013
|
|
|
|
|
|
|
$data ||= ''; |
1014
|
|
|
|
|
|
|
$fact ||= ''; |
1015
|
|
|
|
|
|
|
|
1016
|
|
|
|
|
|
|
$self->{METABASE}->do_query('INSERT IGNORE INTO metabase (guid,id,updated,report,fact) VALUES (?,?,?,?,?)', |
1017
|
|
|
|
|
|
|
$self->{report}{guid},$self->{report}{id},$self->{report}{updated},$data,$fact); |
1018
|
|
|
|
|
|
|
|
1019
|
|
|
|
|
|
|
if((++$self->{meta_count} % 500) == 0) { |
1020
|
|
|
|
|
|
|
$self->{METABASE}->do_commit; |
1021
|
|
|
|
|
|
|
} |
1022
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
return 1; |
1024
|
|
|
|
|
|
|
} |
1025
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
sub cache_update { |
1027
|
|
|
|
|
|
|
my $self = shift; |
1028
|
|
|
|
|
|
|
return 0 unless($self->{report}{guid} && $self->{report}{id}); |
1029
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
# in check mode, assume the rest happens |
1031
|
|
|
|
|
|
|
return 1 if($self->{check}); |
1032
|
|
|
|
|
|
|
return 1 if($self->{localonly}); |
1033
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
$self->{METABASE}->do_query('UPDATE metabase SET id=? WHERE guid=?',$self->{report}{id},$self->{report}{guid}); |
1035
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
if((++$self->{meta_count} % 500) == 0) { |
1037
|
|
|
|
|
|
|
$self->{METABASE}->do_commit; |
1038
|
|
|
|
|
|
|
} |
1039
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
return 1; |
1041
|
|
|
|
|
|
|
} |
1042
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
#---------------------------------------------------------------------------- |
1044
|
|
|
|
|
|
|
# Internal Cache Methods |
1045
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
sub load_uploads { |
1047
|
|
|
|
|
|
|
my $self = shift; |
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
my @rows = $self->{CPANSTATS}->get_query('hash','SELECT uploadid,dist,version,type FROM uploads'); |
1050
|
|
|
|
|
|
|
for my $row (@rows) { |
1051
|
|
|
|
|
|
|
$self->{oncpan}{$row->{uploadid}} = $row->{type}; |
1052
|
|
|
|
|
|
|
$self->{upload}{$row->{dist}}{$row->{version}} = $row->{uploadid}; |
1053
|
|
|
|
|
|
|
} |
1054
|
|
|
|
|
|
|
} |
1055
|
|
|
|
|
|
|
|
1056
|
|
|
|
|
|
|
sub load_authors { |
1057
|
|
|
|
|
|
|
my $self = shift; |
1058
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
my @rows = $self->{CPANSTATS}->get_query('hash','SELECT author,dist,version,uploadid FROM ixlatest'); |
1060
|
|
|
|
|
|
|
for my $row (@rows) { |
1061
|
|
|
|
|
|
|
$self->{author}{$row->{dist}}{$row->{version}} = $row->{author}; |
1062
|
|
|
|
|
|
|
$self->{author2}{$row->{uploadid}} = $row->{author}; |
1063
|
|
|
|
|
|
|
} |
1064
|
|
|
|
|
|
|
} |
1065
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
sub load_perl_versions { |
1067
|
|
|
|
|
|
|
my $self = shift; |
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
my @rows = $self->{CPANSTATS}->get_query('hash','SELECT * FROM perl_version'); |
1070
|
|
|
|
|
|
|
for my $row (@rows) { |
1071
|
|
|
|
|
|
|
$self->{perls}{$row->{version}} = { |
1072
|
|
|
|
|
|
|
perl => $row->{perl}, |
1073
|
|
|
|
|
|
|
patch => $row->{patch}, |
1074
|
|
|
|
|
|
|
devel => $row->{devel}, |
1075
|
|
|
|
|
|
|
saved => 1 |
1076
|
|
|
|
|
|
|
}; |
1077
|
|
|
|
|
|
|
} |
1078
|
|
|
|
|
|
|
} |
1079
|
|
|
|
|
|
|
|
1080
|
|
|
|
|
|
|
sub save_perl_versions { |
1081
|
|
|
|
|
|
|
my $self = shift; |
1082
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
for my $vers (keys %{ $self->{perls} }) { |
1084
|
|
|
|
|
|
|
next if($self->{perls}{$vers}{saved}); |
1085
|
|
|
|
|
|
|
$self->{CPANSTATS}->do_query("INSERT INTO perl_version (version,perl,patch,devel) VALUES (?,?,?,?)", |
1086
|
|
|
|
|
|
|
$vers, $self->{perls}{$vers}{perl}, $self->{perls}{$vers}{patch}, $self->{perls}{$vers}{devel}); |
1087
|
|
|
|
|
|
|
} |
1088
|
|
|
|
|
|
|
} |
1089
|
|
|
|
|
|
|
|
1090
|
|
|
|
|
|
|
#---------------------------------------------------------------------------- |
1091
|
|
|
|
|
|
|
# Private Methods |
1092
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
sub _consume_reports { |
1094
|
|
|
|
|
|
|
my ($self,$maxdate,$dataset) = @_; |
1095
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
for my $data (@$dataset) { |
1097
|
|
|
|
|
|
|
my $start = $self->_get_createdate( $data->{gstart}, $data->{dstart} ); |
1098
|
|
|
|
|
|
|
my $end = $self->_get_createdate( $data->{gend}, $data->{dend} ); |
1099
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
unless($start && $end) { |
1101
|
|
|
|
|
|
|
$start ||= ''; |
1102
|
|
|
|
|
|
|
$end ||= ''; |
1103
|
|
|
|
|
|
|
$self->_log("BAD DATES: start=$start, end=$end [missing dates]\n"); |
1104
|
|
|
|
|
|
|
next; |
1105
|
|
|
|
|
|
|
} |
1106
|
|
|
|
|
|
|
if($start ge $end) { |
1107
|
|
|
|
|
|
|
$self->_log("BAD DATES: start=$start, end=$end [end before start]\n"); |
1108
|
|
|
|
|
|
|
next; |
1109
|
|
|
|
|
|
|
} |
1110
|
|
|
|
|
|
|
# if($end gt $maxdate) { |
1111
|
|
|
|
|
|
|
# $self->_log("BAD DATES: start=$start, end=$end [exceeds $maxdate]\n"); |
1112
|
|
|
|
|
|
|
# next; |
1113
|
|
|
|
|
|
|
# } |
1114
|
|
|
|
|
|
|
|
1115
|
|
|
|
|
|
|
$self->_log("LOOP: start=$start, end=$end\n"); |
1116
|
|
|
|
|
|
|
|
1117
|
|
|
|
|
|
|
($self->{processed},$self->{stored},$self->{cached}) = (0,0,0); |
1118
|
|
|
|
|
|
|
|
1119
|
|
|
|
|
|
|
# what guids do we already have? |
1120
|
|
|
|
|
|
|
my $sql = 'SELECT guid FROM metabase WHERE updated >= ? AND updated <= ? ORDER BY updated asc'; |
1121
|
|
|
|
|
|
|
my @guids = $self->{METABASE}->get_query('hash',$sql,$data->{dstart},$data->{dend}); |
1122
|
|
|
|
|
|
|
my %guids = map {$_->{guid} => 1} @guids; |
1123
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
# note that because Amazon's SimpleDB can return odd entries out of |
1125
|
|
|
|
|
|
|
# sync, we have to look at previous entries to ensure we are starting |
1126
|
|
|
|
|
|
|
# from the right point |
1127
|
|
|
|
|
|
|
my ($update,$prev,$last) = ($start,$start,$start); |
1128
|
|
|
|
|
|
|
my @times = (); |
1129
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
my $prior = [ 0, 0 ]; |
1131
|
|
|
|
|
|
|
my $saved = 0; |
1132
|
|
|
|
|
|
|
while($update lt $end) { |
1133
|
|
|
|
|
|
|
$self->_log("UPDATE: update=$update, end=$end, saved=$saved, guids=".(scalar(@guids))."\n"); |
1134
|
|
|
|
|
|
|
|
1135
|
|
|
|
|
|
|
# get list of guids from last update date |
1136
|
|
|
|
|
|
|
my $guids = $self->get_next_guids($update,$end); |
1137
|
|
|
|
|
|
|
last unless($guids); |
1138
|
|
|
|
|
|
|
|
1139
|
|
|
|
|
|
|
@guids = grep { !$guids{$_} } @$guids; |
1140
|
|
|
|
|
|
|
last unless(@guids); |
1141
|
|
|
|
|
|
|
last if($prior->[0] eq $guids[0] && $prior->[1] eq $guids[-1]); # prevent an endless loop |
1142
|
|
|
|
|
|
|
$prior = [ $guids[0], $guids[-1] ]; |
1143
|
|
|
|
|
|
|
|
1144
|
|
|
|
|
|
|
$self->_log("UPDATE: todo guids=".(scalar(@guids))."\n"); |
1145
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
my $current = $update; |
1147
|
|
|
|
|
|
|
for my $guid (@guids) { |
1148
|
|
|
|
|
|
|
# don't process too far |
1149
|
|
|
|
|
|
|
shift @times if(@times > 9); # one off |
1150
|
|
|
|
|
|
|
push @times, [ $current, (_date_diff($end,$current) <= 0 ? 0 : 1) ]; # one on ... max 10 |
1151
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
my $times = 0; |
1153
|
|
|
|
|
|
|
$times += $_->[1] for(@times); |
1154
|
|
|
|
|
|
|
last if($times == 10); # stop if all greater than end |
1155
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
# okay process |
1157
|
|
|
|
|
|
|
$self->_log("GUID [$guid]"); |
1158
|
|
|
|
|
|
|
|
1159
|
|
|
|
|
|
|
$self->{processed}++; |
1160
|
|
|
|
|
|
|
|
1161
|
|
|
|
|
|
|
if(my $time = $self->already_saved($guid)) { |
1162
|
|
|
|
|
|
|
$self->_log(".. already saved [$time]\n"); |
1163
|
|
|
|
|
|
|
$current = $time; |
1164
|
|
|
|
|
|
|
$saved++; |
1165
|
|
|
|
|
|
|
next; |
1166
|
|
|
|
|
|
|
} |
1167
|
|
|
|
|
|
|
|
1168
|
|
|
|
|
|
|
if(my $report = $self->get_fact($guid)) { |
1169
|
|
|
|
|
|
|
$current = $report->{metadata}{core}{update_time}; |
1170
|
|
|
|
|
|
|
$self->{report}{guid} = $guid; |
1171
|
|
|
|
|
|
|
next if($self->parse_report(report => $report)); # true if invalid report |
1172
|
|
|
|
|
|
|
|
1173
|
|
|
|
|
|
|
if($self->store_report()) { $self->_log(".. stored"); $self->{stored}++; } |
1174
|
|
|
|
|
|
|
else { $self->_log(".. already stored"); } |
1175
|
|
|
|
|
|
|
if($self->cache_report()) { $self->_log(".. cached\n"); $self->{cached}++; } |
1176
|
|
|
|
|
|
|
else { $self->_log(".. bad cache data\n"); } |
1177
|
|
|
|
|
|
|
} else { |
1178
|
|
|
|
|
|
|
$self->_log(".. FAIL\n"); |
1179
|
|
|
|
|
|
|
} |
1180
|
|
|
|
|
|
|
} |
1181
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
$update = $times[0]->[0]; |
1183
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
$self->commit(); |
1185
|
|
|
|
|
|
|
} |
1186
|
|
|
|
|
|
|
|
1187
|
|
|
|
|
|
|
$self->commit(); |
1188
|
|
|
|
|
|
|
my $invalid = $self->{invalid} ? scalar(@{$self->{invalid}}) : 0; |
1189
|
|
|
|
|
|
|
my $stop = localtime(time); |
1190
|
|
|
|
|
|
|
$self->_log("MARKER: processed=$self->{processed}, stored=$self->{stored}, cached=$self->{cached}, invalid=$invalid, start=$start, stop=$stop\n"); |
1191
|
|
|
|
|
|
|
} |
1192
|
|
|
|
|
|
|
|
1193
|
|
|
|
|
|
|
# only email invalid reports during the generate process |
1194
|
|
|
|
|
|
|
$self->_send_email() if($self->{invalid}); |
1195
|
|
|
|
|
|
|
} |
1196
|
|
|
|
|
|
|
|
1197
|
|
|
|
|
|
|
sub _get_perl_version { |
1198
|
|
|
|
|
|
|
my $self = shift; |
1199
|
|
|
|
|
|
|
my $vers = shift; |
1200
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
unless($self->{perls}{$vers}) { |
1202
|
|
|
|
|
|
|
my $patch = $vers =~ /^5.(7|9|[1-9][13579])/ ? 1 : 0, # odd numbers now mark development releases |
1203
|
|
|
|
|
|
|
my $devel = $vers =~ /(RC\d+|patch)/ ? 1 : 0, |
1204
|
|
|
|
|
|
|
my ($perl) = $vers =~ /(5\.\d+(?:\.\d+)?)/; |
1205
|
|
|
|
|
|
|
|
1206
|
|
|
|
|
|
|
$self->{perls}{$vers} = { |
1207
|
|
|
|
|
|
|
perl => $perl, |
1208
|
|
|
|
|
|
|
patch => $patch, |
1209
|
|
|
|
|
|
|
devel => $devel, |
1210
|
|
|
|
|
|
|
saved => 0 |
1211
|
|
|
|
|
|
|
}; |
1212
|
|
|
|
|
|
|
} |
1213
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
return $self->{perls}{$vers}{perl}, $self->{perls}{$vers}{patch}, $self->{perls}{$vers}{devel}; |
1215
|
|
|
|
|
|
|
} |
1216
|
|
|
|
|
|
|
|
1217
|
|
|
|
|
|
|
sub _get_guid_list { |
1218
|
|
|
|
|
|
|
my ($self,$guid,$file) = @_; |
1219
|
|
|
|
|
|
|
my (@ids,@guids); |
1220
|
|
|
|
|
|
|
|
1221
|
|
|
|
|
|
|
# we're only parsing one id |
1222
|
|
|
|
|
|
|
if($guid) { |
1223
|
|
|
|
|
|
|
if($guid =~ /^\d+$/) { push @ids, $guid } |
1224
|
|
|
|
|
|
|
else { push @guids, $guid } |
1225
|
|
|
|
|
|
|
} elsif($file) { |
1226
|
|
|
|
|
|
|
my $fh = IO::File->new($file,'r') or die "Cannot read file [$file]: $!"; |
1227
|
|
|
|
|
|
|
while(<$fh>) { |
1228
|
|
|
|
|
|
|
chomp; |
1229
|
|
|
|
|
|
|
my ($num) = (m/^([\da-z-]+)/i); |
1230
|
|
|
|
|
|
|
if($num =~ /^\d+$/) { push @ids, $num } |
1231
|
|
|
|
|
|
|
else { push @guids, $num } |
1232
|
|
|
|
|
|
|
} |
1233
|
|
|
|
|
|
|
$fh->close; |
1234
|
|
|
|
|
|
|
} else { |
1235
|
|
|
|
|
|
|
return; |
1236
|
|
|
|
|
|
|
} |
1237
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
# turn ids into guids |
1239
|
|
|
|
|
|
|
if(@ids) { |
1240
|
|
|
|
|
|
|
my @rows = $self->{CPANSTATS}->get_query('array','SELECT guid FROM cpanstats WHERE id IN ('.join(',',@ids).')'); |
1241
|
|
|
|
|
|
|
push @guids, $_->[0] for(@rows); |
1242
|
|
|
|
|
|
|
} |
1243
|
|
|
|
|
|
|
|
1244
|
|
|
|
|
|
|
my %guids = map {$_ => 1} @guids; |
1245
|
|
|
|
|
|
|
my @list = keys %guids; |
1246
|
|
|
|
|
|
|
return @list; |
1247
|
|
|
|
|
|
|
} |
1248
|
|
|
|
|
|
|
|
1249
|
|
|
|
|
|
|
sub _get_createdate { |
1250
|
|
|
|
|
|
|
my ($self,$guid,$date) = @_; |
1251
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
return unless($guid || $date); |
1253
|
|
|
|
|
|
|
if($guid) { |
1254
|
|
|
|
|
|
|
my @rows = $self->{METABASE}->get_query('hash','SELECT updated FROM metabase WHERE guid=?',$guid); |
1255
|
|
|
|
|
|
|
$date = $rows[0]->{updated} if(@rows); |
1256
|
|
|
|
|
|
|
} |
1257
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
return unless($date && $date =~ /^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z$/); |
1259
|
|
|
|
|
|
|
return $date; |
1260
|
|
|
|
|
|
|
} |
1261
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
sub _get_tester { |
1263
|
|
|
|
|
|
|
my ($self,$creator) = @_; |
1264
|
|
|
|
|
|
|
return $testers{$creator} if($testers{$creator}); |
1265
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
my $profile = Metabase::Resource->new( $creator ); |
1267
|
|
|
|
|
|
|
return $creator unless($profile); |
1268
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
my $user; |
1270
|
|
|
|
|
|
|
eval { $user = $self->{librarian}->extract( $profile->guid ) }; |
1271
|
|
|
|
|
|
|
return $creator unless($user); |
1272
|
|
|
|
|
|
|
|
1273
|
|
|
|
|
|
|
my ($name,@emails); |
1274
|
|
|
|
|
|
|
for my $fact ($user->facts()) { |
1275
|
|
|
|
|
|
|
if(ref $fact eq 'Metabase::User::EmailAddress') { |
1276
|
|
|
|
|
|
|
push @emails, $fact->{content}; |
1277
|
|
|
|
|
|
|
} elsif(ref $fact eq 'Metabase::User::FullName') { |
1278
|
|
|
|
|
|
|
$name = encode_entities($fact->{content}); |
1279
|
|
|
|
|
|
|
} |
1280
|
|
|
|
|
|
|
} |
1281
|
|
|
|
|
|
|
|
1282
|
|
|
|
|
|
|
$name ||= 'NONAME'; # shouldn't happen, but allows for checks later |
1283
|
|
|
|
|
|
|
|
1284
|
|
|
|
|
|
|
for my $em (@emails) { |
1285
|
|
|
|
|
|
|
$self->{METABASE}->do_query('INSERT INTO testers_email (resource,fullname,email) VALUES (?,?,?)',$creator,$name,$em); |
1286
|
|
|
|
|
|
|
} |
1287
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
$testers{$creator} = @emails ? $emails[0] : $creator; |
1289
|
|
|
|
|
|
|
$testers{$creator} =~ s/\'/''/g if($testers{$creator}); |
1290
|
|
|
|
|
|
|
return $testers{$creator}; |
1291
|
|
|
|
|
|
|
} |
1292
|
|
|
|
|
|
|
|
1293
|
|
|
|
|
|
|
sub _get_author { |
1294
|
|
|
|
|
|
|
my ($self,$upid,$dist,$vers) = @_; |
1295
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
my $author = $self->{author2}{$upid} || ''; |
1297
|
|
|
|
|
|
|
$author ||= $self->{author}{$dist}{$vers} || ''; |
1298
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
return $author; |
1300
|
|
|
|
|
|
|
} |
1301
|
|
|
|
|
|
|
|
1302
|
|
|
|
|
|
|
sub _valid_field { |
1303
|
|
|
|
|
|
|
my ($self,$id,$name,$value) = @_; |
1304
|
|
|
|
|
|
|
return 1 if(defined $value); |
1305
|
|
|
|
|
|
|
$self->_log(" . [$id] ... missing field: $name\n"); |
1306
|
|
|
|
|
|
|
return 0; |
1307
|
|
|
|
|
|
|
} |
1308
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
sub _get_lastid { |
1310
|
|
|
|
|
|
|
my $self = shift; |
1311
|
|
|
|
|
|
|
|
1312
|
|
|
|
|
|
|
my @rows = $self->{METABASE}->get_query('array',"SELECT MAX(id) FROM metabase"); |
1313
|
|
|
|
|
|
|
return 0 unless(@rows); |
1314
|
|
|
|
|
|
|
return $rows[0]->[0] || 0; |
1315
|
|
|
|
|
|
|
} |
1316
|
|
|
|
|
|
|
|
1317
|
|
|
|
|
|
|
sub _oncpan { |
1318
|
|
|
|
|
|
|
my ($self,$upid,$dist,$vers) = @_; |
1319
|
|
|
|
|
|
|
|
1320
|
|
|
|
|
|
|
$upid ||= $self->{upload}{$dist}{$vers}; |
1321
|
|
|
|
|
|
|
my $type = $self->{oncpan}{$upid}; |
1322
|
|
|
|
|
|
|
|
1323
|
|
|
|
|
|
|
return 1 unless($type); # assume it's a new release |
1324
|
|
|
|
|
|
|
return 0 if($type eq 'backpan'); # on backpan only |
1325
|
|
|
|
|
|
|
return 1; # on cpan or new upload |
1326
|
|
|
|
|
|
|
} |
1327
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
sub _osname { |
1329
|
|
|
|
|
|
|
my $self = shift; |
1330
|
|
|
|
|
|
|
my $name = shift || return ''; |
1331
|
|
|
|
|
|
|
|
1332
|
|
|
|
|
|
|
my $lname = lc $name; |
1333
|
|
|
|
|
|
|
my $uname = uc $name; |
1334
|
|
|
|
|
|
|
$self->{OSNAMES}{$lname} ||= do { |
1335
|
|
|
|
|
|
|
$self->{CPANSTATS}->do_query(qq{INSERT INTO osname (osname,ostitle) VALUES ('$name','$uname')}); |
1336
|
|
|
|
|
|
|
$uname; |
1337
|
|
|
|
|
|
|
}; |
1338
|
|
|
|
|
|
|
|
1339
|
|
|
|
|
|
|
return $self->{OSNAMES}{$lname}; |
1340
|
|
|
|
|
|
|
} |
1341
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
sub _check_arch_os { |
1343
|
|
|
|
|
|
|
my $self = shift; |
1344
|
|
|
|
|
|
|
|
1345
|
|
|
|
|
|
|
my $text = $self->_platform_to_osname($self->{report}{platform}); |
1346
|
|
|
|
|
|
|
#print STDERR "_check: text=$text\n"; |
1347
|
|
|
|
|
|
|
#print STDERR "_check: platform=$self->{report}{platform}\n"; |
1348
|
|
|
|
|
|
|
#print STDERR "_check: osname=$self->{report}{osname}\n"; |
1349
|
|
|
|
|
|
|
return if($text && $self->{report}{osname} && lc $text eq lc $self->{report}{osname}); |
1350
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
#print STDERR "_check: metabase=".Dumper($self->{report}{metabase})."\n"; |
1352
|
|
|
|
|
|
|
my $textreport = $self->{report}{metabase}{'CPAN::Testers::Fact::LegacyReport'}{content}{textreport}; |
1353
|
|
|
|
|
|
|
$textreport =~ s/\\n/\n/g; # newlines may be escaped |
1354
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
# create a fake mail, as CTC::Article parses a mail like text block |
1356
|
|
|
|
|
|
|
my $mail = <<EMAIL; |
1357
|
|
|
|
|
|
|
From: fake\@example.com |
1358
|
|
|
|
|
|
|
To: fake\@example.com |
1359
|
|
|
|
|
|
|
Subject: PASS Fake-0.01 |
1360
|
|
|
|
|
|
|
Date: 01-01-2010 01:01:01 Z |
1361
|
|
|
|
|
|
|
|
1362
|
|
|
|
|
|
|
$textreport |
1363
|
|
|
|
|
|
|
EMAIL |
1364
|
|
|
|
|
|
|
my $object = CPAN::Testers::Common::Article->new( $mail ) or return; |
1365
|
|
|
|
|
|
|
$object->parse_report(); |
1366
|
|
|
|
|
|
|
|
1367
|
|
|
|
|
|
|
$self->{report}{osname} = $object->osname; |
1368
|
|
|
|
|
|
|
$self->{report}{platform} = $object->archname; |
1369
|
|
|
|
|
|
|
} |
1370
|
|
|
|
|
|
|
|
1371
|
|
|
|
|
|
|
sub _platform_to_osname { |
1372
|
|
|
|
|
|
|
my $self = shift; |
1373
|
|
|
|
|
|
|
my $arch = shift || return ''; |
1374
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
$OSNAMES = join('|',keys %{$self->{OSNAMES}}) if(keys %{$self->{OSNAMES}}); |
1376
|
|
|
|
|
|
|
|
1377
|
|
|
|
|
|
|
return $1 if($arch =~ /($OSNAMES)/i); |
1378
|
|
|
|
|
|
|
|
1379
|
|
|
|
|
|
|
for my $rx (keys %{ $self->{OSNAMES} }) { |
1380
|
|
|
|
|
|
|
return $self->{OSNAMES}{$rx} if($arch =~ /$rx/i); |
1381
|
|
|
|
|
|
|
} |
1382
|
|
|
|
|
|
|
|
1383
|
|
|
|
|
|
|
return ''; |
1384
|
|
|
|
|
|
|
} |
1385
|
|
|
|
|
|
|
|
1386
|
|
|
|
|
|
|
sub _send_email { |
1387
|
|
|
|
|
|
|
my $self = shift; |
1388
|
|
|
|
|
|
|
my $t = localtime; |
1389
|
|
|
|
|
|
|
my $DATE = $t->strftime("%a, %d %b %Y %H:%M:%S +0000"); |
1390
|
|
|
|
|
|
|
$DATE =~ s/\s+$//; |
1391
|
|
|
|
|
|
|
my $INVALID = join("\n",@{$self->{invalid}}); |
1392
|
|
|
|
|
|
|
$self->_log("INVALID:\n$INVALID\n"); |
1393
|
|
|
|
|
|
|
|
1394
|
|
|
|
|
|
|
for my $admin (@{$self->{admins}}) { |
1395
|
|
|
|
|
|
|
my $cmd = qq!| $HOW $admin!; |
1396
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
my $body = $HEAD . $BODY; |
1398
|
|
|
|
|
|
|
$body =~ s/FROM/$FROM/g; |
1399
|
|
|
|
|
|
|
$body =~ s/EMAIL/$admin/g; |
1400
|
|
|
|
|
|
|
$body =~ s/DATE/$DATE/g; |
1401
|
|
|
|
|
|
|
$body =~ s/INVALID/$INVALID/g; |
1402
|
|
|
|
|
|
|
|
1403
|
|
|
|
|
|
|
if(my $fh = IO::File->new($cmd)) { |
1404
|
|
|
|
|
|
|
print $fh $body; |
1405
|
|
|
|
|
|
|
$fh->close; |
1406
|
|
|
|
|
|
|
$self->_log(".. MAIL SEND - SUCCESS - $admin\n"); |
1407
|
|
|
|
|
|
|
} else { |
1408
|
|
|
|
|
|
|
$self->_log(".. MAIL SEND - FAILED - $admin\n"); |
1409
|
|
|
|
|
|
|
} |
1410
|
|
|
|
|
|
|
} |
1411
|
|
|
|
|
|
|
} |
1412
|
|
|
|
|
|
|
|
1413
|
|
|
|
|
|
|
sub _date_diff { |
1414
|
|
|
|
|
|
|
my ($date1,$date2) = @_; |
1415
|
|
|
|
|
|
|
|
1416
|
|
|
|
|
|
|
my (@dt1) = $date1 =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)Z/; |
1417
|
|
|
|
|
|
|
my (@dt2) = $date2 =~ /(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)Z/; |
1418
|
|
|
|
|
|
|
|
1419
|
|
|
|
|
|
|
return -1 unless(@dt1 && @dt2); |
1420
|
|
|
|
|
|
|
|
1421
|
|
|
|
|
|
|
my $dt1 = DateTime->new( year => $dt1[0], month => $dt1[1], day => $dt1[2], hour => $dt1[3], minute => $dt1[4], second => $dt1[5], time_zone => 'UTC' )->epoch; |
1422
|
|
|
|
|
|
|
my $dt2 = DateTime->new( year => $dt2[0], month => $dt2[1], day => $dt2[2], hour => $dt2[3], minute => $dt2[4], second => $dt2[5], time_zone => 'UTC' )->epoch; |
1423
|
|
|
|
|
|
|
|
1424
|
|
|
|
|
|
|
return $dt2 - $dt1; |
1425
|
|
|
|
|
|
|
} |
1426
|
|
|
|
|
|
|
|
1427
|
|
|
|
|
|
|
sub _log { |
1428
|
|
|
|
|
|
|
my $self = shift; |
1429
|
|
|
|
|
|
|
my $log = $self->{logfile} or return; |
1430
|
|
|
|
|
|
|
mkpath(dirname($log)) unless(-f $log); |
1431
|
|
|
|
|
|
|
my $fh = IO::File->new($log,'a+') or die "Cannot append to log file [$log]: $!\n"; |
1432
|
|
|
|
|
|
|
print $fh $self->{msg} if($self->{msg}); |
1433
|
|
|
|
|
|
|
print $fh @_; |
1434
|
|
|
|
|
|
|
$fh->close; |
1435
|
|
|
|
|
|
|
$self->{msg} = ''; |
1436
|
|
|
|
|
|
|
} |
1437
|
|
|
|
|
|
|
|
1438
|
|
|
|
|
|
|
1; |
1439
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
__END__ |
1441
|
|
|
|
|
|
|
|
1442
|
|
|
|
|
|
|
=head1 NAME |
1443
|
|
|
|
|
|
|
|
1444
|
|
|
|
|
|
|
CPAN::Testers::Data::Generator - Download and summarize CPAN Testers data |
1445
|
|
|
|
|
|
|
|
1446
|
|
|
|
|
|
|
=head1 SYNOPSIS |
1447
|
|
|
|
|
|
|
|
1448
|
|
|
|
|
|
|
% cpanstats |
1449
|
|
|
|
|
|
|
# ... wait patiently, very patiently |
1450
|
|
|
|
|
|
|
# ... then use the cpanstats MySQL database |
1451
|
|
|
|
|
|
|
|
1452
|
|
|
|
|
|
|
=head1 DESCRIPTION |
1453
|
|
|
|
|
|
|
|
1454
|
|
|
|
|
|
|
This distribution was originally written by Leon Brocard to download and |
1455
|
|
|
|
|
|
|
summarize CPAN Testers data. However, all of the original code has been |
1456
|
|
|
|
|
|
|
rewritten to use the CPAN Testers Statistics database generation code. This |
1457
|
|
|
|
|
|
|
now means that all the CPAN Testers sites including the Reports site, the |
1458
|
|
|
|
|
|
|
Statistics site and the CPAN Dependencies site, can use the same database. |
1459
|
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
|
This module retrieves and parses reports from the Metabase, generating or |
1461
|
|
|
|
|
|
|
updating entries in the cpanstats database, which extracts specific metadata |
1462
|
|
|
|
|
|
|
from the reports. The information in the cpanstats database is then presented |
1463
|
|
|
|
|
|
|
via CPAN::Testers::WWW::Reports on the CPAN Testers Reports website. |
1464
|
|
|
|
|
|
|
|
1465
|
|
|
|
|
|
|
A good example query from the cpanstats database for Acme-Colour would be: |
1466
|
|
|
|
|
|
|
|
1467
|
|
|
|
|
|
|
SELECT version, status, count(*) FROM cpanstats WHERE |
1468
|
|
|
|
|
|
|
dist = "Acme-Colour" group by version, state; |
1469
|
|
|
|
|
|
|
|
1470
|
|
|
|
|
|
|
To create a database from scratch can take several days, as there are now over |
1471
|
|
|
|
|
|
|
24 million submitted reports. As such updating from a known copy of the |
1472
|
|
|
|
|
|
|
database is much more advisable. If you don't want to generate the database |
1473
|
|
|
|
|
|
|
yourself, you can obtain a feed using CPAN::Testers::WWW::Report::Query::Reports. |
1474
|
|
|
|
|
|
|
|
1475
|
|
|
|
|
|
|
With over 24 million reports in the database, if you do plan to run this |
1476
|
|
|
|
|
|
|
software to generate the databases it is recommended you utilise a high-end |
1477
|
|
|
|
|
|
|
processor machine. Even with a reasonable processor it can take over a week! |
1478
|
|
|
|
|
|
|
|
1479
|
|
|
|
|
|
|
=head1 DATABASE SCHEMA |
1480
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
The cpanstats database schema is very straightforward, one main table with |
1482
|
|
|
|
|
|
|
several index tables to speed up searches. The main table is as below: |
1483
|
|
|
|
|
|
|
|
1484
|
|
|
|
|
|
|
CREATE TABLE `cpanstats` ( |
1485
|
|
|
|
|
|
|
|
1486
|
|
|
|
|
|
|
`id` int(10) unsigned NOT NULL AUTO_INCREMENT, |
1487
|
|
|
|
|
|
|
`guid` char(36) NOT NULL DEFAULT '', |
1488
|
|
|
|
|
|
|
`state` varchar(32) DEFAULT NULL, |
1489
|
|
|
|
|
|
|
`postdate` varchar(8) DEFAULT NULL, |
1490
|
|
|
|
|
|
|
`tester` varchar(255) DEFAULT NULL, |
1491
|
|
|
|
|
|
|
`dist` varchar(255) DEFAULT NULL, |
1492
|
|
|
|
|
|
|
`version` varchar(255) DEFAULT NULL, |
1493
|
|
|
|
|
|
|
`platform` varchar(255) DEFAULT NULL, |
1494
|
|
|
|
|
|
|
`perl` varchar(255) DEFAULT NULL, |
1495
|
|
|
|
|
|
|
`osname` varchar(255) DEFAULT NULL, |
1496
|
|
|
|
|
|
|
`osvers` varchar(255) DEFAULT NULL, |
1497
|
|
|
|
|
|
|
`fulldate` varchar(32) DEFAULT NULL, |
1498
|
|
|
|
|
|
|
`type` int(2) DEFAULT '0', |
1499
|
|
|
|
|
|
|
|
1500
|
|
|
|
|
|
|
PRIMARY KEY (`id`), |
1501
|
|
|
|
|
|
|
KEY `guid` (`guid`), |
1502
|
|
|
|
|
|
|
KEY `distvers` (`dist`,`version`), |
1503
|
|
|
|
|
|
|
KEY `tester` (`tester`), |
1504
|
|
|
|
|
|
|
KEY `state` (`state`), |
1505
|
|
|
|
|
|
|
KEY `postdate` (`postdate`) |
1506
|
|
|
|
|
|
|
|
1507
|
|
|
|
|
|
|
) |
1508
|
|
|
|
|
|
|
|
1509
|
|
|
|
|
|
|
It should be noted that 'postdate' refers to the YYYYMM formatted date, whereas |
1510
|
|
|
|
|
|
|
the 'fulldate' field refers to the YYYYMMDDhhmm formatted date and time. |
1511
|
|
|
|
|
|
|
|
1512
|
|
|
|
|
|
|
The metabase database schema is again very straightforward, and consists of one |
1513
|
|
|
|
|
|
|
main table, as below: |
1514
|
|
|
|
|
|
|
|
1515
|
|
|
|
|
|
|
CREATE TABLE `metabase` ( |
1516
|
|
|
|
|
|
|
|
1517
|
|
|
|
|
|
|
`guid` char(36) NOT NULL, |
1518
|
|
|
|
|
|
|
`id` int(10) unsigned NOT NULL, |
1519
|
|
|
|
|
|
|
`updated` varchar(32) DEFAULT NULL, |
1520
|
|
|
|
|
|
|
`report` longblob NOT NULL, |
1521
|
|
|
|
|
|
|
`fact` longblob NOT NULL, |
1522
|
|
|
|
|
|
|
|
1523
|
|
|
|
|
|
|
PRIMARY KEY (`guid`), |
1524
|
|
|
|
|
|
|
KEY `id` (`id`), |
1525
|
|
|
|
|
|
|
KEY `updated` (`updated`) |
1526
|
|
|
|
|
|
|
|
1527
|
|
|
|
|
|
|
) |
1528
|
|
|
|
|
|
|
|
1529
|
|
|
|
|
|
|
The id field is a reference to the cpanstats.id field. |
1530
|
|
|
|
|
|
|
|
1531
|
|
|
|
|
|
|
The report field is JSON encoded, and is a cached version of the facts of a |
1532
|
|
|
|
|
|
|
report, while the fact field is the full report fact, and associated child |
1533
|
|
|
|
|
|
|
facts, Sereal encoded. Both are extracted from the returned fact from |
1534
|
|
|
|
|
|
|
Metabase::Librarian. |
1535
|
|
|
|
|
|
|
|
1536
|
|
|
|
|
|
|
See F<examples/cpanstats-createdb> for the full list of tables used. |
1537
|
|
|
|
|
|
|
|
1538
|
|
|
|
|
|
|
=head1 SIGNIFICANT CHANGES |
1539
|
|
|
|
|
|
|
|
1540
|
|
|
|
|
|
|
=head2 v0.31 CHANGES |
1541
|
|
|
|
|
|
|
|
1542
|
|
|
|
|
|
|
With the release of v0.31, a number of changes to the codebase were made as |
1543
|
|
|
|
|
|
|
a further move towards CPAN Testers 2.0. The first change is the name for this |
1544
|
|
|
|
|
|
|
distribution. Now titled 'CPAN-Testers-Data-Generator', this now fits more |
1545
|
|
|
|
|
|
|
appropriately within the CPAN-Testers namespace on CPAN. |
1546
|
|
|
|
|
|
|
|
1547
|
|
|
|
|
|
|
The second significant change is to now reference a MySQL cpanstats database. |
1548
|
|
|
|
|
|
|
The SQLite version is still updated as before, as a number of other websites |
1549
|
|
|
|
|
|
|
and toolsets still rely on that database file format. However, in order to make |
1550
|
|
|
|
|
|
|
the CPAN Testers Reports website more dynamic, an SQLite database is not really |
1551
|
|
|
|
|
|
|
appropriate for a high demand website. |
1552
|
|
|
|
|
|
|
|
1553
|
|
|
|
|
|
|
The database creation code is now available as a standalone program, in the |
1554
|
|
|
|
|
|
|
examples directory, and all the database communication is now handled by the |
1555
|
|
|
|
|
|
|
new distribution CPAN-Testers-Common-DBUtils. |
1556
|
|
|
|
|
|
|
|
1557
|
|
|
|
|
|
|
=head2 v0.41 CHANGES |
1558
|
|
|
|
|
|
|
|
1559
|
|
|
|
|
|
|
In the next stage of development of CPAN Testers 2.0, the id field used within |
1560
|
|
|
|
|
|
|
the database schema above for the cpanstats table no longer matches the NNTP |
1561
|
|
|
|
|
|
|
ID value, although the id in the articles does still reference the NNTP ID, at |
1562
|
|
|
|
|
|
|
least for the reports submitted prior to the switch to the Metabase in 2010. |
1563
|
|
|
|
|
|
|
|
1564
|
|
|
|
|
|
|
In order to correctly reference the id in the articles table, you will need to |
1565
|
|
|
|
|
|
|
use the function guid_to_nntp() with CPAN::Testers::Common::Utils, using the |
1566
|
|
|
|
|
|
|
new guid field in the cpanstats table. |
1567
|
|
|
|
|
|
|
|
1568
|
|
|
|
|
|
|
As of this release the cpanstats id field is a unique auto incrementing field. |
1569
|
|
|
|
|
|
|
|
1570
|
|
|
|
|
|
|
The next release of this distribution will be focused on generation of stats |
1571
|
|
|
|
|
|
|
using the Metabase storage API. |
1572
|
|
|
|
|
|
|
|
1573
|
|
|
|
|
|
|
=head2 v1.00 CHANGES |
1574
|
|
|
|
|
|
|
|
1575
|
|
|
|
|
|
|
Moved to Metabase API. The change to a definite major version number hopefully |
1576
|
|
|
|
|
|
|
indicates that this is a major interface change. All previous NNTP access has |
1577
|
|
|
|
|
|
|
been dropped and is no longer relavent. All report updates are now fed from |
1578
|
|
|
|
|
|
|
the Metabase API. |
1579
|
|
|
|
|
|
|
|
1580
|
|
|
|
|
|
|
=head1 INTERFACE |
1581
|
|
|
|
|
|
|
|
1582
|
|
|
|
|
|
|
=head2 The Constructor |
1583
|
|
|
|
|
|
|
|
1584
|
|
|
|
|
|
|
=over |
1585
|
|
|
|
|
|
|
|
1586
|
|
|
|
|
|
|
=item * new |
1587
|
|
|
|
|
|
|
|
1588
|
|
|
|
|
|
|
Instatiates the object CPAN::Testers::Data::Generator. Accepts a hash containing |
1589
|
|
|
|
|
|
|
values to prepare the object. These are described as: |
1590
|
|
|
|
|
|
|
|
1591
|
|
|
|
|
|
|
my $obj = CPAN::Testers::Data::Generator->new( |
1592
|
|
|
|
|
|
|
logfile => './here/logfile', |
1593
|
|
|
|
|
|
|
config => './here/config.ini' |
1594
|
|
|
|
|
|
|
); |
1595
|
|
|
|
|
|
|
|
1596
|
|
|
|
|
|
|
Where 'logfile' is the location to write log messages. Log messages are only |
1597
|
|
|
|
|
|
|
written if a logfile entry is specified, and will always append to any existing |
1598
|
|
|
|
|
|
|
file. The 'config' should contain the path to the configuration file, used |
1599
|
|
|
|
|
|
|
to define the database access and general operation settings. |
1600
|
|
|
|
|
|
|
|
1601
|
|
|
|
|
|
|
=back |
1602
|
|
|
|
|
|
|
|
1603
|
|
|
|
|
|
|
=head2 Public Methods |
1604
|
|
|
|
|
|
|
|
1605
|
|
|
|
|
|
|
=over |
1606
|
|
|
|
|
|
|
|
1607
|
|
|
|
|
|
|
=item * generate |
1608
|
|
|
|
|
|
|
|
1609
|
|
|
|
|
|
|
Starting from the last cached report, retrieves all the more recent reports |
1610
|
|
|
|
|
|
|
from the Metabase Report Submission server, parsing each and recording each |
1611
|
|
|
|
|
|
|
report in both the cpanstats database and the metabase cache database. |
1612
|
|
|
|
|
|
|
|
1613
|
|
|
|
|
|
|
=item * regenerate |
1614
|
|
|
|
|
|
|
|
1615
|
|
|
|
|
|
|
For a given date range, retrieves all the reports from the Metabase Report |
1616
|
|
|
|
|
|
|
Submission server, parsing each and recording each report in both the cpanstats |
1617
|
|
|
|
|
|
|
database and the metabase cache database. |
1618
|
|
|
|
|
|
|
|
1619
|
|
|
|
|
|
|
Note that as only 2500 can be returned at any one time due to Amazon SimpleDB |
1620
|
|
|
|
|
|
|
restrictions, this method will only process the guids returned from a given |
1621
|
|
|
|
|
|
|
start data, up to a maxiumu of 2500 guids. |
1622
|
|
|
|
|
|
|
|
1623
|
|
|
|
|
|
|
This method will return the guid of the last report processed. |
1624
|
|
|
|
|
|
|
|
1625
|
|
|
|
|
|
|
=item * rebuild |
1626
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
In the event that the cpanstats database needs regenerating, either in part or |
1628
|
|
|
|
|
|
|
for the whole database, this method allow you to do so. You may supply |
1629
|
|
|
|
|
|
|
parameters as to the 'start' and 'end' values (inclusive), where all records |
1630
|
|
|
|
|
|
|
are assumed by default. Records are rebuilt using the local metabase cache |
1631
|
|
|
|
|
|
|
database. |
1632
|
|
|
|
|
|
|
|
1633
|
|
|
|
|
|
|
=item * reparse |
1634
|
|
|
|
|
|
|
|
1635
|
|
|
|
|
|
|
Rather than a complete rebuild the option to selective reparse selected entries |
1636
|
|
|
|
|
|
|
is useful if there are reports which were previously unable to correctly supply |
1637
|
|
|
|
|
|
|
a particular field, which now has supporting parsing code within the codebase. |
1638
|
|
|
|
|
|
|
|
1639
|
|
|
|
|
|
|
In addition there is the option to exclude fields from parsing checks, where |
1640
|
|
|
|
|
|
|
they may be corrupted, and can be later amended using the 'cpanstats-update' |
1641
|
|
|
|
|
|
|
tool. |
1642
|
|
|
|
|
|
|
|
1643
|
|
|
|
|
|
|
=item * parse |
1644
|
|
|
|
|
|
|
|
1645
|
|
|
|
|
|
|
Unlike reparse, parse is used to parse just missing reports. As such if a |
1646
|
|
|
|
|
|
|
report has already been stored and cached, it won't be processed again, unless |
1647
|
|
|
|
|
|
|
the 'force' option is used. |
1648
|
|
|
|
|
|
|
|
1649
|
|
|
|
|
|
|
In addition, as per reparse, there is the option to exclude fields from parsing |
1650
|
|
|
|
|
|
|
checks, where they may be corrupted, and can be later amended using the |
1651
|
|
|
|
|
|
|
'cpanstats-update' tool. |
1652
|
|
|
|
|
|
|
|
1653
|
|
|
|
|
|
|
=item * tail |
1654
|
|
|
|
|
|
|
|
1655
|
|
|
|
|
|
|
Write to a file, the list of GUIDs returned from a tail request. |
1656
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
=back |
1658
|
|
|
|
|
|
|
|
1659
|
|
|
|
|
|
|
=head2 Private Methods |
1660
|
|
|
|
|
|
|
|
1661
|
|
|
|
|
|
|
=over |
1662
|
|
|
|
|
|
|
|
1663
|
|
|
|
|
|
|
=item * commit |
1664
|
|
|
|
|
|
|
|
1665
|
|
|
|
|
|
|
To speed up the transaction process, a commit is performed every 500 inserts. |
1666
|
|
|
|
|
|
|
This method is used as part of the clean up process to ensure all transactions |
1667
|
|
|
|
|
|
|
are completed. |
1668
|
|
|
|
|
|
|
|
1669
|
|
|
|
|
|
|
=item * get_tail_guids |
1670
|
|
|
|
|
|
|
|
1671
|
|
|
|
|
|
|
Get the list of GUIDs as would be seen for a tail log. |
1672
|
|
|
|
|
|
|
|
1673
|
|
|
|
|
|
|
=item * get_next_dates |
1674
|
|
|
|
|
|
|
|
1675
|
|
|
|
|
|
|
Get the list of dates to use in the next cycle of report retrieval. |
1676
|
|
|
|
|
|
|
|
1677
|
|
|
|
|
|
|
=item * get_next_guids |
1678
|
|
|
|
|
|
|
|
1679
|
|
|
|
|
|
|
Get the list of GUIDs for the reports that have been submitted since the last |
1680
|
|
|
|
|
|
|
cached report. |
1681
|
|
|
|
|
|
|
|
1682
|
|
|
|
|
|
|
=item * retrieve_reports |
1683
|
|
|
|
|
|
|
|
1684
|
|
|
|
|
|
|
Abstracted loop of requesting GUIDs, then parsing, storing and caching each |
1685
|
|
|
|
|
|
|
report as appropriate. |
1686
|
|
|
|
|
|
|
|
1687
|
|
|
|
|
|
|
=item * already_saved |
1688
|
|
|
|
|
|
|
|
1689
|
|
|
|
|
|
|
Given a guid, determines whether it has already been saved in the local |
1690
|
|
|
|
|
|
|
metabase cache. |
1691
|
|
|
|
|
|
|
|
1692
|
|
|
|
|
|
|
=item * load_fact |
1693
|
|
|
|
|
|
|
|
1694
|
|
|
|
|
|
|
Get a specific report fact for a given GUID, from the local database. |
1695
|
|
|
|
|
|
|
|
1696
|
|
|
|
|
|
|
=item * get_fact |
1697
|
|
|
|
|
|
|
|
1698
|
|
|
|
|
|
|
Get a specific report fact for a given GUID, from the Metabase. |
1699
|
|
|
|
|
|
|
|
1700
|
|
|
|
|
|
|
=item * dereference_report |
1701
|
|
|
|
|
|
|
|
1702
|
|
|
|
|
|
|
When you retrieve the parent report fact from the database, you'll need to |
1703
|
|
|
|
|
|
|
dereference it to ensure the child elements contain the child facts in the |
1704
|
|
|
|
|
|
|
correct format for processing. |
1705
|
|
|
|
|
|
|
|
1706
|
|
|
|
|
|
|
=item * parse_report |
1707
|
|
|
|
|
|
|
|
1708
|
|
|
|
|
|
|
Parses a report extracting the metadata required for the cpanstats database. |
1709
|
|
|
|
|
|
|
|
1710
|
|
|
|
|
|
|
=item * reparse_report |
1711
|
|
|
|
|
|
|
|
1712
|
|
|
|
|
|
|
Parses a report (from a local metabase cache) extracting the metadata required |
1713
|
|
|
|
|
|
|
for the stats database. |
1714
|
|
|
|
|
|
|
|
1715
|
|
|
|
|
|
|
=item * retrieve_report |
1716
|
|
|
|
|
|
|
|
1717
|
|
|
|
|
|
|
Given a guid will attempt to return the report metadata from the cpanstats |
1718
|
|
|
|
|
|
|
database. |
1719
|
|
|
|
|
|
|
|
1720
|
|
|
|
|
|
|
=item * store_report |
1721
|
|
|
|
|
|
|
|
1722
|
|
|
|
|
|
|
Inserts the components of a parsed report into the cpanstats database. |
1723
|
|
|
|
|
|
|
|
1724
|
|
|
|
|
|
|
=item * cache_report |
1725
|
|
|
|
|
|
|
|
1726
|
|
|
|
|
|
|
Inserts a serialised report into a local metabase cache database. |
1727
|
|
|
|
|
|
|
|
1728
|
|
|
|
|
|
|
=item * cache_update |
1729
|
|
|
|
|
|
|
|
1730
|
|
|
|
|
|
|
For the current report will update the local metabase cache with the id used |
1731
|
|
|
|
|
|
|
within the cpanstats database. |
1732
|
|
|
|
|
|
|
|
1733
|
|
|
|
|
|
|
=back |
1734
|
|
|
|
|
|
|
|
1735
|
|
|
|
|
|
|
=head2 Very Private methods |
1736
|
|
|
|
|
|
|
|
1737
|
|
|
|
|
|
|
The following modules load information enmasse to avoid DB connection hogging |
1738
|
|
|
|
|
|
|
and IO blocking. Thus improving performance. |
1739
|
|
|
|
|
|
|
|
1740
|
|
|
|
|
|
|
=over 4 |
1741
|
|
|
|
|
|
|
|
1742
|
|
|
|
|
|
|
=item * load_uploads |
1743
|
|
|
|
|
|
|
|
1744
|
|
|
|
|
|
|
Loads the upload information. |
1745
|
|
|
|
|
|
|
|
1746
|
|
|
|
|
|
|
=item * load_authors |
1747
|
|
|
|
|
|
|
|
1748
|
|
|
|
|
|
|
Loads information regarding each author's distribution. |
1749
|
|
|
|
|
|
|
|
1750
|
|
|
|
|
|
|
=item * load_perl_versions |
1751
|
|
|
|
|
|
|
|
1752
|
|
|
|
|
|
|
Loads all the known Perl versions. |
1753
|
|
|
|
|
|
|
|
1754
|
|
|
|
|
|
|
=item * save_perl_versions |
1755
|
|
|
|
|
|
|
|
1756
|
|
|
|
|
|
|
Saves any new Perl versions |
1757
|
|
|
|
|
|
|
|
1758
|
|
|
|
|
|
|
=back |
1759
|
|
|
|
|
|
|
|
1760
|
|
|
|
|
|
|
=head1 HISTORY |
1761
|
|
|
|
|
|
|
|
1762
|
|
|
|
|
|
|
The CPAN Testers was conceived back in May 1998 by Graham Barr and Chris |
1763
|
|
|
|
|
|
|
Nandor as a way to provide multi-platform testing for modules. Today there |
1764
|
|
|
|
|
|
|
are over 40 million tester reports and more than 100 testers each month |
1765
|
|
|
|
|
|
|
giving valuable feedback for users and authors alike. |
1766
|
|
|
|
|
|
|
|
1767
|
|
|
|
|
|
|
=head1 BECOME A TESTER |
1768
|
|
|
|
|
|
|
|
1769
|
|
|
|
|
|
|
Whether you have a common platform or a very unusual one, you can help by |
1770
|
|
|
|
|
|
|
testing modules you install and submitting reports. There are plenty of |
1771
|
|
|
|
|
|
|
module authors who could use test reports and helpful feedback on their |
1772
|
|
|
|
|
|
|
modules and distributions. |
1773
|
|
|
|
|
|
|
|
1774
|
|
|
|
|
|
|
If you'd like to get involved, please take a look at the CPAN Testers Wiki, |
1775
|
|
|
|
|
|
|
where you can learn how to install and configure one of the recommended |
1776
|
|
|
|
|
|
|
smoke tools. |
1777
|
|
|
|
|
|
|
|
1778
|
|
|
|
|
|
|
For further help and advice, please subscribe to the the CPAN Testers |
1779
|
|
|
|
|
|
|
discussion mailing list. |
1780
|
|
|
|
|
|
|
|
1781
|
|
|
|
|
|
|
CPAN Testers Wiki |
1782
|
|
|
|
|
|
|
- http://wiki.cpantesters.org |
1783
|
|
|
|
|
|
|
CPAN Testers Discuss mailing list |
1784
|
|
|
|
|
|
|
- http://lists.cpan.org/showlist.cgi?name=cpan-testers-discuss |
1785
|
|
|
|
|
|
|
|
1786
|
|
|
|
|
|
|
=head1 BUCKETS |
1787
|
|
|
|
|
|
|
|
1788
|
|
|
|
|
|
|
beta6 - 2014-01-21 |
1789
|
|
|
|
|
|
|
beta7 - 2014-11-12 |
1790
|
|
|
|
|
|
|
|
1791
|
|
|
|
|
|
|
=head1 BUGS, PATCHES & FIXES |
1792
|
|
|
|
|
|
|
|
1793
|
|
|
|
|
|
|
There are no known bugs at the time of this release. However, if you spot a |
1794
|
|
|
|
|
|
|
bug or are experiencing difficulties, that is not explained within the POD |
1795
|
|
|
|
|
|
|
documentation, please send bug reports and patches to the RT Queue (see below). |
1796
|
|
|
|
|
|
|
|
1797
|
|
|
|
|
|
|
Fixes are dependent upon their severity and my availability. Should a fix not |
1798
|
|
|
|
|
|
|
be forthcoming, please feel free to (politely) remind me. |
1799
|
|
|
|
|
|
|
|
1800
|
|
|
|
|
|
|
RT Queue - |
1801
|
|
|
|
|
|
|
http://rt.cpan.org/Public/Dist/Display.html?Name=CPAN-Testers-Data-Generator |
1802
|
|
|
|
|
|
|
|
1803
|
|
|
|
|
|
|
=head1 SEE ALSO |
1804
|
|
|
|
|
|
|
|
1805
|
|
|
|
|
|
|
L<CPAN::Testers::Report>, |
1806
|
|
|
|
|
|
|
L<Metabase>, |
1807
|
|
|
|
|
|
|
L<Metabase::Fact>, |
1808
|
|
|
|
|
|
|
L<CPAN::Testers::Fact::LegacyReport>, |
1809
|
|
|
|
|
|
|
L<CPAN::Testers::Fact::TestSummary>, |
1810
|
|
|
|
|
|
|
L<CPAN::Testers::Metabase::AWS> |
1811
|
|
|
|
|
|
|
|
1812
|
|
|
|
|
|
|
L<CPAN::Testers::WWW::Statistics> |
1813
|
|
|
|
|
|
|
|
1814
|
|
|
|
|
|
|
F<http://www.cpantesters.org/>, |
1815
|
|
|
|
|
|
|
F<http://stats.cpantesters.org/>, |
1816
|
|
|
|
|
|
|
F<http://wiki.cpantesters.org/> |
1817
|
|
|
|
|
|
|
|
1818
|
|
|
|
|
|
|
=head1 AUTHOR |
1819
|
|
|
|
|
|
|
|
1820
|
|
|
|
|
|
|
It should be noted that the original code for this distribution began life |
1821
|
|
|
|
|
|
|
under another name. The original distribution generated data for the original |
1822
|
|
|
|
|
|
|
CPAN Testers website. However, in 2008 the code was reworked to generate data |
1823
|
|
|
|
|
|
|
in the format for the statistics data analysis, which in turn was reworked to |
1824
|
|
|
|
|
|
|
drive the redesign of the all the CPAN Testers websites. To reflect the code |
1825
|
|
|
|
|
|
|
changes, a new name was given to the distribution. |
1826
|
|
|
|
|
|
|
|
1827
|
|
|
|
|
|
|
=head2 CPAN-WWW-Testers-Generator |
1828
|
|
|
|
|
|
|
|
1829
|
|
|
|
|
|
|
Original author: Leon Brocard <acme@astray.com> (C) 2002-2008 |
1830
|
|
|
|
|
|
|
Current maintainer: Barbie <barbie@cpan.org> (C) 2008-2010 |
1831
|
|
|
|
|
|
|
|
1832
|
|
|
|
|
|
|
=head2 CPAN-Testers-Data-Generator |
1833
|
|
|
|
|
|
|
|
1834
|
|
|
|
|
|
|
Original author: Barbie <barbie@cpan.org> (C) 2008-2015 |
1835
|
|
|
|
|
|
|
|
1836
|
|
|
|
|
|
|
=head1 LICENSE |
1837
|
|
|
|
|
|
|
|
1838
|
|
|
|
|
|
|
This module is free software; you can redistribute it and/or |
1839
|
|
|
|
|
|
|
modify it under the Artistic License 2.0. |