File Coverage

blib/lib/CPANTS/Weight.pm
Criterion Covered Total %
statement 66 160 41.2
branch 1 44 2.2
condition 1 21 4.7
subroutine 22 30 73.3
pod 1 8 12.5
total 91 263 34.6


line stmt bran cond sub pod time code
1             package CPANTS::Weight;
2              
3             =pod
4              
5             =head1 NAME
6              
7             CPANTS::Weight - Graph based weights for CPAN Distributions
8              
9             =head1 DESCRIPTION
10              
11             C is a module that consumes the CPANTS database, and
12             generates a variety of graph-based weighting values for the distributions,
13             producing a SQLite database of the weighting data, for use in higher-level
14             applications that work with the CPANTS data.
15              
16             =head1 METHODS
17              
18             =cut
19              
20 3     3   61651 use 5.008005;
  3         14  
  3         133  
21 3     3   20 use strict;
  3         6  
  3         181  
22 3     3   29 use warnings;
  3         6  
  3         119  
23 3     3   18 use File::Spec 3.2701 ();
  3         109  
  3         101  
24 3     3   555917 use File::HomeDir 0.82 ();
  3         23716  
  3         91  
25 3     3   2731 use File::ShareDir 1.00 ();
  3         45323  
  3         101  
26 3     3   8164 use Params::Util 0.38 ();
  3         25177  
  3         95  
27 3     3   12318 use DateTime 0.4501 ();
  3         1206599  
  3         139  
28 3     3   3124 use CPAN::Version 5.5 ();
  3         6954  
  3         87  
29 3     3   2917 use Algorithm::Dependency 1.108 ();
  3         11216  
  3         77  
30 3     3   3492 use Algorithm::Dependency::Weight ();
  3         2270  
  3         68  
31 3     3   3015 use Algorithm::Dependency::Source::DBI 0.05 ();
  3         2701  
  3         68  
32 3     3   3930 use Algorithm::Dependency::Source::Invert ();
  3         6350  
  3         75  
33 3     3   2750 use ORDB::CPANTS 0.05 ();
  3         624863  
  3         112  
34 3     3   3037 use ORDB::CPANUploads 0.04 ();
  3         2212  
  3         96  
35 3     3   2875 use ORDB::CPANTesters 0.09 ();
  3         1007  
  3         401  
36              
37             our $VERSION = '0.15';
38              
39             our $DEBUG;
40              
41             sub trace {
42 0 0   0 0 0 print STDERR "# $_[0]\n" if $DEBUG;
43             }
44              
45 3 50       31 use constant ORLITE_FILE => File::Spec->catfile(
46             File::HomeDir->my_data,
47             ($^O eq 'MSWin32' ? 'Perl' : '.perl'),
48             'CPANTS-Weight',
49             'CPANTS-Weight.sqlite',
50 3     3   23 );
  3         9  
51              
52 3         20 use constant ORLITE_TIMELINE => File::Spec->catdir(
53             File::ShareDir::dist_dir('CPANTS-Weight'),
54             'timeline',
55 3     3   592 );
  3         9  
56              
57 3     3   608 use ORLite 1.20 ();
  3         56  
  3         69  
58 3     3   18 use ORLite::Mirror 1.12 ();
  3         64  
  3         197  
59             use ORLite::Migrate 0.03 {
60 3         30 file => ORLITE_FILE,
61             create => 1,
62             timeline => ORLITE_TIMELINE,
63             user_version => 3,
64 3     3   2936 };
  3         5280  
65              
66             # Delay download/inflate for the ORDB:: modules until import,
67             # so we can pass them a common maxage param.
68             sub import {
69 3     3   77 my $class = shift;
70 3   50     42 my $params = Params::Util::_HASH(shift) || {};
71              
72             # Download/inflate the CPANTS database
73 3         63 ORDB::CPANTS->import( {
74             maxage => $params->{maxage},
75             } );
76              
77             # Download/inflate the CPAN PAUSE uploads database
78 0           ORDB::CPANTUploads->import( {
79             maxage => $params->{maxage},
80             } );
81              
82             # Download/inflate the (huge) CPAN Testers database
83 0           ORDB::CPANTesters->import( {
84             maxage => $params->{maxage},
85             } );
86              
87 0           return 1;
88             }
89              
90             # Common string fragments
91             my $SELECT_IDS = <<'END_SQL';
92             select
93             id
94             from
95             dist
96             where
97             id > 0
98             END_SQL
99              
100             my $SELECT_DEPENDS = <<'END_SQL';
101             select
102             dist,
103             in_dist
104             from
105             prereq
106             where
107             in_dist is not null
108             and
109             dist > 0
110             and
111             in_dist > 0
112             END_SQL
113              
114              
115              
116              
117              
118             #####################################################################
119             # Main Methods
120              
121             # Only used internally, for caching reasons
122             sub new {
123 0     0 0   my $class = shift;
124 0           my $self = bless { }, $class;
125 0           return $self;
126             }
127              
128             =pod
129              
130             =head2 run
131              
132             CPANTS::Weight->run;
133              
134             The main C method does a complete generation cycle for the CPANTS
135             weighting database. It will retrieve the CPANTS data (if needed) calculate
136             the weights, and then (re)populate the CPANTS-Weight.sqlite database.
137              
138             Once completed, the Csqlite> method can be used to
139             locate the completed SQLite database file.
140              
141             =cut
142              
143             sub run {
144 0 0   0 1   my $self = ref($_[0]) ? shift : shift->new;
145              
146             # Run import if we haven't already
147 0           ref($self)->import;
148              
149             # Skip if the output database is newer than the input database
150             # (but is not a new database)
151 0           my $input_t = (stat(ORDB::CPANTS->sqlite ))[9];
152 0           my $output_t = (stat(CPANTS::Weight->sqlite))[9];
153             # if ( $output_t > $input_t and CPANTS::Weight::AuthorWeight->count ) {
154             # return 1;
155             # }
156              
157             # Prefetch the author and dist lists
158 0           trace("Loading CPANTS Authors...");
159 0           my @authors = ORDB::CPANTS::Author->select(
160             'where pauseid is not null'
161             );
162              
163 0           trace("Loading CPANTS Distributions...");
164 0           my @dists = ORDB::CPANTS::Dist->select(
165             'where author not in ( select id from author where pauseid is null )'
166             );
167              
168 0           trace("Loading Kwalitee...");
169 0           my $kwalitee = ORDB::CPANTS->selectall_hashref(
170             'select * from kwalitee',
171             'dist',
172             );
173              
174             # Indexed table of weighting scores
175 0           trace("Precalculating weight...");
176 0           my $weight = $self->algorithm_weight->weight_all;
177 0           trace("Precalculating volatility...");
178 0           my $volatility = $self->algorithm_volatility->weight_all;
179              
180 0           trace("Generating FAIL counts");
181 0           my $fails = CPANTS::Weight->fail_report;
182              
183             # Populate the AuthorWeight objects
184 0           trace("Populating Author metrics...");
185 0           CPANTS::Weight->begin;
186 0           CPANTS::Weight::AuthorWeight->truncate;
187 0           foreach my $author ( @authors ) { ### Authors [===| ] % done
188             # Find the list of distros for this author
189 0           my $id = $author->id;
190             # my @ids = grep { $_->author } @dists;
191 0           CPANTS::Weight::AuthorWeight->create(
192             id => $author->id,
193             pauseid => $author->pauseid,
194             );
195             }
196 0           CPANTS::Weight->commit;
197              
198             # Populate the DistWeight objects
199 0           trace("Populating Distribution metrics...");
200 0           CPANTS::Weight->begin;
201 0           CPANTS::Weight::DistWeight->truncate;
202 0           foreach my $dist ( @dists ) { ### Distributions [===| ] % done
203 0           my $id = $dist->id;
204              
205             # Does this distribution make life difficult
206             # for downstream packagers.
207 0   0       my $k = $kwalitee->{$id} || {};
208 0 0         my $enemy_downstream = $k->{easily_repackagable} ? 0 : 1;
209              
210             # Is this distribution popular, but NOT provided in
211             # Debian, making it a good candidate for packaging.
212 0 0         my $debian_candidate = $k->{distributed_by_debian} ? 0 : 1;
213              
214             # Does this distribution supply useful metadata.
215             # Level 1 requires a parsable META.yml file
216             # Level 2 requires META.yml conforms to a known specification,
217             # and has a license declaration.
218             # Level 3 requires META.yml conform to the current specification,
219             # and declares the required minimum Perl version.
220 0 0 0       my $meta1 = ($k->{has_meta_yml} and $k->{metayml_parsable}) ? 0 : 1;
221 0 0 0       my $meta2 = ($k->{metayml_conforms_to_known_spec} and $k->{metayml_has_license}) ? 0 : 1;
222 0 0 0       my $meta3 = ($k->{metayml_conforms_current_spec} and $k->{metayml_declares_perl_version}) ? 0 : 1;
223 0 0         if ( $meta1 ) {
224 0           $meta2 = 0;
225             }
226 0 0 0       if ( $meta1 or $meta2 ) {
227 0           $meta3 = 0;
228             }
229             CPANTS::Weight::DistWeight->create(
230 0   0       id => $id,
231             dist => $dist->dist,
232             author => $dist->author,
233             weight => $weight->{$id},
234             volatility => $volatility->{$id} - 1,
235             enemy_downstream => $enemy_downstream,
236             debian_candidate => $debian_candidate,
237             meta1 => $meta1,
238             meta2 => $meta2,
239             meta3 => $meta3,
240             fails => $fails->{$dist->dist} || 0,
241             );
242             }
243 0           CPANTS::Weight->commit;
244              
245             # Manually remove bogus records
246 0           my $sth = CPANTS::Weight->prepare('delete from dist_weight where dist = ?');
247 0           $sth->execute('Msql-Mysql-modules');
248 0           $sth->execute('HTTP-BrowserDetect');
249 0           $sth->execute('HTML-Widgets-Index');
250 0           $sth->execute('Text-Tabs+Wrap');
251 0           $sth->execute('FreeWRL');
252 0           $sth->execute('Apache-LoggedAuthDBI');
253 0           $sth->execute('Win32-File-Summary'); #contains Archive::Tar, IO::Zlib
254 0           $sth->finish;
255              
256 0           return 1;
257             }
258              
259              
260              
261              
262              
263             #####################################################################
264             # Utility Methods
265              
266             sub algorithm_weight {
267 0     0 0   my $self = shift;
268 0 0         unless ( $self->{algorithm_weight} ) {
269 0           $self->{algorithm_weight} = Algorithm::Dependency::Weight->new(
270             source => $self->source_weight,
271             );
272             }
273 0           return $self->{algorithm_weight};
274             }
275              
276             sub algorithm_volatility {
277 0     0 0   my $self = shift;
278 0 0         unless ( $self->{algorithm_volatility} ) {
279 0           $self->{algorithm_volatility} = Algorithm::Dependency::Weight->new(
280             source => $self->source_volatility,
281             );
282             }
283 0           return $self->{algorithm_volatility};
284             }
285              
286             sub source_weight {
287 0     0 0   my $self = shift;
288 0 0         unless ( $self->{source_weight} ) {
289 0           $self->{source_weight} = Algorithm::Dependency::Source::DBI->new(
290             dbh => ORDB::CPANTS->dbh,
291             select_ids => "$SELECT_IDS",
292             select_depends => "$SELECT_DEPENDS and ( is_prereq = 1 or is_build_prereq = 1 )",
293             );
294             }
295 0           return $self->{source_weight};
296             }
297              
298             sub source_volatility {
299 0     0 0   my $self = shift;
300 0 0         unless ( $self->{source_volatility} ) {
301 0           $self->{source_volatility} = Algorithm::Dependency::Source::Invert->new(
302             $self->source_weight,
303             );
304             }
305 0           return $self->{source_volatility};
306             }
307              
308             # Generate a FAIL count report
309             sub fail_report {
310 0     0 0   my %fail = ();
311 0           my %version = ();
312              
313             # Build the statement
314 0           my $rows = 0;
315 0 0         my $sth = ORDB::CPANTesters->prepare(<<'END_SQL') or die("prepare: $DBI::errstr");
316             select dist, version, state, perl from cpanstats
317             where state = ? or (
318             state in ( ?, ? ) and
319             perl not like ? and
320             perl not like ? and
321             (
322             perl like ? or
323             perl like ? or
324             perl like ? or
325             perl like ? or
326             perl like ?
327             )
328             )
329             END_SQL
330 0 0         $sth->execute(
331             'cpan', 'fail', 'unknown', '%patch%', '%RC%',
332             '5.4%', '5.5%', '5.6%', '5.8%', '5.10%'
333             ) or die("execute: $DBI::errstr");
334 0           while ( my $row = $sth->fetchrow_arrayref ) {
335 0           my ($dist, $version, $state) = @$row;
336              
337             # If this is the first time we've seen the distribution,
338             # create the entry for it
339 0 0         unless ( exists $fail{$dist} ) {
340 0           $fail{$dist} = 0;
341 0           $version{$dist} = $version;
342             }
343              
344             # Ignore developer releases and weird versions
345 0 0         next unless defined $version;
346 0 0         next unless $version =~ /^[\d\.]+$/;
347              
348             # If the version is older than the current version,
349             # shortcut and go to the next row.
350 0           my $vcmp = CPAN::Version->vcmp($version, $version{$dist});
351 0 0         if ( $vcmp < 0 ) {
352 0           next;
353             }
354              
355             # If the version is newer than the current version,
356             # reset the current fail count back to zero.
357 0 0         if ( $vcmp > 0 ) {
358 0           $fail{$dist} = 0;
359 0           $version{$dist} = $version;
360             }
361              
362             # If the row is a FAIL or UNKNOWN record, increment the fail count
363 0 0 0       if ( $state eq 'fail' or $state eq 'unknown' ) {
364 0           $fail{$dist}++;
365             }
366             }
367              
368 0           return \%fail;
369             }
370              
371             1;
372              
373             =pod
374              
375             =head2 dsn
376              
377             my $string = Foo::Bar->dsn;
378              
379             The C accessor returns the dbi connection string used to connect
380             to the SQLite database as a string.
381              
382             =head2 dbh
383              
384             my $handle = Foo::Bar->dbh;
385              
386             To reliably prevent potential SQLite deadlocks resulting from multiple
387             connections in a single process, each ORLite package will only ever
388             maintain a single connection to the database.
389              
390             During a transaction, this will be the same (cached) database handle.
391              
392             Although in most situations you should not need a direct DBI connection
393             handle, the C method provides a method for getting a direct
394             connection in a way that is compatible with ORLite's connection
395             management.
396              
397             Please note that these connections should be short-lived, you should
398             never hold onto a connection beyond the immediate scope.
399              
400             The transaction system in ORLite is specifically designed so that code
401             using the database should never have to know whether or not it is in a
402             transation.
403              
404             Because of this, you should B call the -Edisconnect method
405             on the database handles yourself, as the handle may be that of a
406             currently running transaction.
407              
408             Further, you should do your own transaction management on a handle
409             provided by the method.
410              
411             In cases where there are extreme needs, and you B have to
412             violate these connection handling rules, you should create your own
413             completely manual DBI-Econnect call to the database, using the connect
414             string provided by the C method.
415              
416             The C method returns a L object, or throws an exception on
417             error.
418              
419             =head2 begin
420              
421             Foo::Bar->begin;
422              
423             The C method indicates the start of a transaction.
424              
425             In the same way that ORLite allows only a single connection, likewise
426             it allows only a single application-wide transaction.
427              
428             No indication is given as to whether you are currently in a transaction
429             or not, all code should be written neutrally so that it works either way
430             or doesn't need to care.
431              
432             Returns true or throws an exception on error.
433              
434             =head2 commit
435              
436             Foo::Bar->commit;
437              
438             The C method commits the current transaction. If called outside
439             of a current transaction, it is accepted and treated as a null operation.
440              
441             Once the commit has been completed, the database connection falls back
442             into auto-commit state. If you wish to immediately start another
443             transaction, you will need to issue a separate -Ebegin call.
444              
445             Returns true or throws an exception on error.
446              
447             =head2 rollback
448              
449             The C method rolls back the current transaction. If called outside
450             of a current transaction, it is accepted and treated as a null operation.
451              
452             Once the rollback has been completed, the database connection falls back
453             into auto-commit state. If you wish to immediately start another
454             transaction, you will need to issue a separate -Ebegin call.
455              
456             If a transaction exists at END-time as the process exits, it will be
457             automatically rolled back.
458              
459             Returns true or throws an exception on error.
460              
461             =head2 do
462              
463             Foo::Bar->do('insert into table (foo, bar) values (?, ?)', {},
464             $foo_value,
465             $bar_value,
466             );
467              
468             The C method is a direct wrapper around the equivalent L method,
469             but applied to the appropriate locally-provided connection or transaction.
470              
471             It takes the same parameters and has the same return values and error
472             behaviour.
473              
474             =head2 selectall_arrayref
475              
476             The C method is a direct wrapper around the equivalent
477             L method, but applied to the appropriate locally-provided connection
478             or transaction.
479              
480             It takes the same parameters and has the same return values and error
481             behaviour.
482              
483             =head2 selectall_hashref
484              
485             The C method is a direct wrapper around the equivalent
486             L method, but applied to the appropriate locally-provided connection
487             or transaction.
488              
489             It takes the same parameters and has the same return values and error
490             behaviour.
491              
492             =head2 selectcol_arrayref
493              
494             The C method is a direct wrapper around the equivalent
495             L method, but applied to the appropriate locally-provided connection
496             or transaction.
497              
498             It takes the same parameters and has the same return values and error
499             behaviour.
500              
501             =head2 selectrow_array
502              
503             The C method is a direct wrapper around the equivalent
504             L method, but applied to the appropriate locally-provided connection
505             or transaction.
506              
507             It takes the same parameters and has the same return values and error
508             behaviour.
509              
510             =head2 selectrow_arrayref
511              
512             The C method is a direct wrapper around the equivalent
513             L method, but applied to the appropriate locally-provided connection
514             or transaction.
515              
516             It takes the same parameters and has the same return values and error
517             behaviour.
518              
519             =head2 selectrow_hashref
520              
521             The C method is a direct wrapper around the equivalent
522             L method, but applied to the appropriate locally-provided connection
523             or transaction.
524              
525             It takes the same parameters and has the same return values and error
526             behaviour.
527              
528             =head2 prepare
529              
530             The C method is a direct wrapper around the equivalent
531             L method, but applied to the appropriate locally-provided connection
532             or transaction
533              
534             It takes the same parameters and has the same return values and error
535             behaviour.
536              
537             In general though, you should try to avoid the use of your own prepared
538             statements if possible, although this is only a recommendation and by
539             no means prohibited.
540              
541             =head2 pragma
542              
543             # Get the user_version for the schema
544             my $version = Foo::Bar->pragma('user_version');
545              
546             The C method provides a convenient method for fetching a pragma
547             for a datase. See the SQLite documentation for more details.
548              
549             =head1 SUPPORT
550              
551             Bugs should be reported via the CPAN bug tracker at
552              
553             L
554              
555             For other issues, contact the author.
556              
557             =head1 AUTHOR
558              
559             Adam Kennedy Eadamk@cpan.orgE
560              
561             =head1 COPYRIGHT
562              
563             Copyright 2009 Adam Kennedy.
564              
565             This program is free software; you can redistribute
566             it and/or modify it under the same terms as Perl itself.
567              
568             The full text of the license can be found in the
569             LICENSE file included with this module.
570              
571             =cut