File Coverage

blib/lib/CPAN/Digger.pm
Criterion Covered Total %
statement 109 171 63.7
branch 21 60 35.0
condition 2 15 13.3
subroutine 16 18 88.8
pod 0 5 0.0
total 148 269 55.0


line stmt bran cond sub pod time code
1             package CPAN::Digger;
2 1     1   7 use strict;
  1         2  
  1         34  
3 1     1   4 use warnings FATAL => 'all';
  1         3  
  1         48  
4              
5             our $VERSION = '1.03';
6              
7 1     1   7 use Capture::Tiny qw(capture);
  1         1  
  1         47  
8 1     1   5 use Cwd qw(getcwd);
  1         1  
  1         42  
9 1     1   601 use Data::Dumper qw(Dumper);
  1         6557  
  1         64  
10 1     1   7 use Exporter qw(import);
  1         2  
  1         25  
11 1     1   5 use File::Spec ();
  1         1  
  1         16  
12 1     1   5 use File::Temp qw(tempdir);
  1         2  
  1         49  
13 1     1   778 use Log::Log4perl ();
  1         44013  
  1         28  
14 1     1   667 use LWP::UserAgent ();
  1         44626  
  1         32  
15 1     1   10 use MetaCPAN::Client ();
  1         2  
  1         20  
16              
17              
18 1     1   484 use CPAN::Digger::DB qw(get_fields);
  1         4  
  1         1785  
19              
20             my $tempdir = tempdir( CLEANUP => ($ENV{KEEP_TEMPDIR} ? 0 : 1) );
21              
22             my %known_licenses = map {$_ => 1} qw(apache_2_0 artistic_2 bsd gpl_3 lgpl_2_1 lgpl_3_0 perl_5); # open_source, unknown
23              
24             sub new {
25 1     1 0 8 my ($class, %args) = @_;
26 1         3 my $self = bless {}, $class;
27 1         6 for my $key (keys %args) {
28 10         22 $self->{$key} = $args{$key};
29             }
30 1         4 $self->{log} = uc $self->{log};
31 1         4 $self->{check_github} = delete $self->{github};
32              
33 1         8 $self->{db} = CPAN::Digger::DB->new(db => $self->{db});
34              
35 1         6 return $self;
36             }
37              
38             sub get_vcs {
39 1     1 0 3 my ($repository) = @_;
40 1 50       5 if ($repository) {
41             # $html .= sprintf qq{<a href="%s">%s %s</a><br>\n}, $repository->{$k}, $k, $repository->{$k};
42             # Try to get the web link
43 1         3 my $url = $repository->{web};
44 1 50       4 if (not $url) {
45 0         0 $url = $repository->{url};
46 0         0 $url =~ s{^git://}{https://};
47 0         0 $url =~ s{\.git$}{};
48             }
49 1         2 my $name = "repository";
50 1 50       19 if ($url =~ m{^https?://github.com/}) {
51 1         4 $name = 'GitHub';
52             }
53 1 50       5 if ($url =~ m{^https?://gitlab.com/}) {
54 0         0 $name = 'GitLab';
55             }
56 1         5 return $url, $name;
57             }
58             }
59              
60             sub get_data {
61 2     2 0 4 my ($self, $item) = @_;
62              
63 2         8 my $logger = Log::Log4perl->get_logger();
64 2         109 my %data = (
65             distribution => $item->distribution,
66             version => $item->version,
67             author => $item->author,
68             date => $item->date,
69             );
70             #die Dumper $item;
71              
72 2         179 $logger->debug('dist: ', $item->distribution);
73 2         57 $logger->debug(' ', $item->author);
74 2         24 my @licenses = @{ $item->license };
  2         33  
75 2         21 $data{licenses} = join ' ', @licenses;
76 2         7 $logger->debug(' ', $data{licenses});
77 2         14 for my $license (@licenses) {
78 1 50       6 if ($license eq 'unknown') {
    50          
79 0         0 $logger->error("Unknown license '$license'");
80             } elsif (not exists $known_licenses{$license}) {
81 1         7 $logger->warn("Unknown license '$license'. Probably CPAN::Digger needs to be updated");
82             }
83             }
84             # if there are not licenses =>
85             # if there is a license called "unknonws"
86             # check against a known list of licenses (grow it later, or look it up somewhere?)
87 2         10 my %resources = %{ $item->resources };
  2         35  
88             #say ' ', join ' ', keys %resources;
89 2 100       23 if ($resources{repository}) {
90 1         20 my ($vcs_url, $vcs_name) = get_vcs($resources{repository});
91 1 50       3 if ($vcs_url) {
92 1         3 $data{vcs_url} = $vcs_url;
93 1         2 $data{vcs_name} = $vcs_name;
94 1         7 $logger->debug(" $vcs_name: $vcs_url");
95             }
96             } else {
97 1         17 $logger->error('No repository for ', $item->distribution);
98             }
99 2         36 return %data;
100             }
101              
102              
103             sub analyze_github {
104 0     0 0 0 my ($data) = @_;
105 0         0 my $logger = Log::Log4perl->get_logger();
106              
107 0         0 my $vcs_url = $data->{vcs_url};
108 0         0 my $repo_name = (split '\/', $vcs_url)[-1];
109 0         0 $logger->info("Analyze GitHub repo '$vcs_url' in directory $repo_name");
110              
111 0         0 my $ua = LWP::UserAgent->new(timeout => 5);
112 0         0 my $response = $ua->get($vcs_url);
113 0         0 my $status_line = $response->status_line;
114 0 0       0 if ($status_line eq '404 Not Found') {
115 0         0 $logger->error("Repository '$vcs_url' Received 404 Not Found. Please update the link in the META file");
116 0         0 return;
117             }
118 0 0       0 if ($response->code != 200) {
119 0         0 $logger->error("Repository '$vcs_url' got a response of '$status_line'. Please report this to the maintainer of CPAN::Digger.");
120 0         0 return;
121             }
122 0 0       0 if ($response->redirects) {
123 0         0 $logger->error("Repository '$vcs_url' is being redirected. Please update the link in the META file");
124             }
125              
126 0         0 my $git = 'git';
127              
128 0         0 my @cmd = ($git, "clone", "--depth", "1", $data->{vcs_url});
129 0         0 my $cwd = getcwd();
130 0         0 chdir($tempdir);
131             my ($out, $err, $exit_code) = capture {
132 0     0   0 system(@cmd);
133 0         0 };
134 0         0 chdir($cwd);
135 0         0 my $repo = "$tempdir/$repo_name";
136 0         0 $logger->debug("REPO path '$repo'");
137              
138 0 0       0 if ($exit_code != 0) {
139             # TODO capture stderr and include in the log
140 0         0 $logger->error("Failed to clone $vcs_url");
141 0         0 return;
142             }
143              
144 0         0 $data->{travis} = -e "$repo/.travis.yml";
145 0         0 my @ga = glob("$repo/.github/workflows/*");
146 0 0       0 $data->{github_actions} = (scalar(@ga) ? 1 : 0);
147 0         0 $data->{circleci} = -e "$repo/.circleci";
148 0   0     0 $data->{appveyor} = (-e "$repo/.appveyor.yml") || (-e "$repo/appveyor.yml");
149 0         0 $data->{azure_pipelines} = -e "$repo/azure-pipelines.yml";
150              
151 0         0 for my $ci (qw(travis github_actions circleci appveyor)) {
152 0         0 $logger->debug("Is CI '$ci'?");
153 0 0       0 if ($data->{$ci}) {
154 0         0 $logger->debug("CI '$ci' found!");
155 0         0 $data->{has_ci} = 1;
156             }
157             }
158             }
159              
160             sub collect {
161 1     1 0 3 my ($self) = @_;
162              
163 1         3 my @all_the_distributions;
164              
165 1         2 my $log_level = $self->{log}; # TODO: shall we validate?
166 1         25 Log::Log4perl->easy_init({
167             level => $log_level,
168             layout => '%d{yyyy-MM-dd HH:mm:ss} - %p - %m%n',
169             });
170              
171 1         2704 my $logger = Log::Log4perl->get_logger();
172 1         384 $logger->info('Starting');
173 1         14 $logger->info("Tempdir: $tempdir");
174 1 50       14 $logger->info("Recent: $self->{recent}") if $self->{recent};
175 1 50       8 $logger->info("Author: $self->{author}") if $self->{author};
176              
177 1         40 my $mcpan = MetaCPAN::Client->new();
178 1         19 my $rset;
179 1 50       4 if ($self->{author}) {
180 0         0 my $author = $mcpan->author($self->{author});
181             #print $author;
182 0         0 $rset = $author->releases;
183             } else {
184 1         5 $rset = $mcpan->recent($self->{recent});
185             }
186 1         167 $logger->info("MetaCPAN::Client::ResultSet received with a total of $rset->{total} items");
187 1         9 my %distros;
188 1         5 my @fields = get_fields();
189 1         5 while ( my $item = $rset->next ) {
190 2 50       4143 next if $distros{ $item->distribution }; # We have already deal with this in this session
191 2         47 $distros{ $item->distribution } = 1;
192              
193 2         58 my $row = $self->{db}->db_get_distro($item->distribution);
194 2 50 33     8 next if $row and $row->{version} eq $item->version; # we already have this in the database (shall we call last?)
195 2         9 my %data = $self->get_data($item);
196             #die Dumper \%data;
197 2         22 $self->{db}->db_insert_into(@data{@fields});
198 2         36 push @all_the_distributions, \%data;
199             }
200              
201 1 50       15 if ($self->{author}) {
202 0         0 @all_the_distributions = reverse sort {$a->{date} cmp $b->{date}} @all_the_distributions;
  0         0  
203 0 0 0     0 if ($self->{limit} and @all_the_distributions > $self->{limit}) {
204 0         0 @all_the_distributions = @all_the_distributions[0 .. $self->{limit}-1];
205             }
206             }
207              
208             # Check on the VCS
209 1 50       4 if ($self->{check_github}) {
210 0         0 $logger->info("Starting to check GitHub");
211 0         0 for my $data (@all_the_distributions) {
212 0         0 my $distribution = $data->{distribution};
213 0         0 my $data_ref = $self->{db}->db_get_distro($distribution);
214 0 0       0 next if not $data_ref->{vcs_name};
215              
216 0 0 0     0 if ($self->{check_github} and $data_ref->{vcs_name} eq 'GitHub') {
217 0         0 analyze_github($data_ref);
218             }
219 0         0 my %data = %$data_ref;
220 0         0 $self->{db}->db_update($distribution, @data{@fields});
221 0 0       0 sleep $self->{sleep} if $self->{sleep};
222             }
223             }
224              
225              
226 1 50       6 if ($self->{report}) {
227             #print "Text report\n";
228 1         2 my @distros = @{ $self->{db}->db_get_every_distro() };
  1         4  
229 1 50 33     4 if ($self->{limit} and @distros > $self->{limit}) {
230 0         0 @distros = @distros[0 .. $self->{limit}-1];
231             }
232 1         4 for my $distro (@distros) {
233             #die Dumper $distro;
234 2 100       75 printf "%s %-40s %-7s", $distro->{date}, $distro->{distribution}, ($distro->{vcs_url} ? '' : 'NO VCS');
235 2 50       11 if ($self->{check_github}) {
236 0 0       0 printf "%-7s", ($distro->{has_ci} ? '' : 'NO CI');
237             }
238 2         160 print "\n";
239             }
240             }
241             }
242              
243              
244             42;
245              
246              
247             =head1 NAME
248              
249             CPAN::Digger - To dig CPAN
250              
251             =head1 SYNOPSIS
252              
253             cpan-digger
254              
255             =head1 DESCRIPTION
256              
257             This is a command line program to collect some meta information about CPAN modules.
258              
259              
260             =head1 COPYRIGHT AND LICENSE
261              
262             Copyright (C) 2020 by L<Gabor Szabo|https://szabgab.com/>
263              
264             This library is free software; you can redistribute it and/or modify
265             it under the same terms as Perl itself, either Perl version 5.26.1 or,
266             at your option, any later version of Perl 5 you may have available.
267              
268             =cut
269