line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package CPAN::Digger; |
2
|
1
|
|
|
1
|
|
7
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
34
|
|
3
|
1
|
|
|
1
|
|
4
|
use warnings FATAL => 'all'; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
48
|
|
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
our $VERSION = '1.03'; |
6
|
|
|
|
|
|
|
|
7
|
1
|
|
|
1
|
|
7
|
use Capture::Tiny qw(capture); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
47
|
|
8
|
1
|
|
|
1
|
|
5
|
use Cwd qw(getcwd); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
42
|
|
9
|
1
|
|
|
1
|
|
601
|
use Data::Dumper qw(Dumper); |
|
1
|
|
|
|
|
6557
|
|
|
1
|
|
|
|
|
64
|
|
10
|
1
|
|
|
1
|
|
7
|
use Exporter qw(import); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
25
|
|
11
|
1
|
|
|
1
|
|
5
|
use File::Spec (); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
16
|
|
12
|
1
|
|
|
1
|
|
5
|
use File::Temp qw(tempdir); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
49
|
|
13
|
1
|
|
|
1
|
|
778
|
use Log::Log4perl (); |
|
1
|
|
|
|
|
44013
|
|
|
1
|
|
|
|
|
28
|
|
14
|
1
|
|
|
1
|
|
667
|
use LWP::UserAgent (); |
|
1
|
|
|
|
|
44626
|
|
|
1
|
|
|
|
|
32
|
|
15
|
1
|
|
|
1
|
|
10
|
use MetaCPAN::Client (); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
20
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
|
18
|
1
|
|
|
1
|
|
484
|
use CPAN::Digger::DB qw(get_fields); |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
1785
|
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
my $tempdir = tempdir( CLEANUP => ($ENV{KEEP_TEMPDIR} ? 0 : 1) ); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
my %known_licenses = map {$_ => 1} qw(apache_2_0 artistic_2 bsd gpl_3 lgpl_2_1 lgpl_3_0 perl_5); # open_source, unknown |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
sub new { |
25
|
1
|
|
|
1
|
0
|
8
|
my ($class, %args) = @_; |
26
|
1
|
|
|
|
|
3
|
my $self = bless {}, $class; |
27
|
1
|
|
|
|
|
6
|
for my $key (keys %args) { |
28
|
10
|
|
|
|
|
22
|
$self->{$key} = $args{$key}; |
29
|
|
|
|
|
|
|
} |
30
|
1
|
|
|
|
|
4
|
$self->{log} = uc $self->{log}; |
31
|
1
|
|
|
|
|
4
|
$self->{check_github} = delete $self->{github}; |
32
|
|
|
|
|
|
|
|
33
|
1
|
|
|
|
|
8
|
$self->{db} = CPAN::Digger::DB->new(db => $self->{db}); |
34
|
|
|
|
|
|
|
|
35
|
1
|
|
|
|
|
6
|
return $self; |
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
sub get_vcs { |
39
|
1
|
|
|
1
|
0
|
3
|
my ($repository) = @_; |
40
|
1
|
50
|
|
|
|
5
|
if ($repository) { |
41
|
|
|
|
|
|
|
# $html .= sprintf qq{<a href="%s">%s %s</a><br>\n}, $repository->{$k}, $k, $repository->{$k}; |
42
|
|
|
|
|
|
|
# Try to get the web link |
43
|
1
|
|
|
|
|
3
|
my $url = $repository->{web}; |
44
|
1
|
50
|
|
|
|
4
|
if (not $url) { |
45
|
0
|
|
|
|
|
0
|
$url = $repository->{url}; |
46
|
0
|
|
|
|
|
0
|
$url =~ s{^git://}{https://}; |
47
|
0
|
|
|
|
|
0
|
$url =~ s{\.git$}{}; |
48
|
|
|
|
|
|
|
} |
49
|
1
|
|
|
|
|
2
|
my $name = "repository"; |
50
|
1
|
50
|
|
|
|
19
|
if ($url =~ m{^https?://github.com/}) { |
51
|
1
|
|
|
|
|
4
|
$name = 'GitHub'; |
52
|
|
|
|
|
|
|
} |
53
|
1
|
50
|
|
|
|
5
|
if ($url =~ m{^https?://gitlab.com/}) { |
54
|
0
|
|
|
|
|
0
|
$name = 'GitLab'; |
55
|
|
|
|
|
|
|
} |
56
|
1
|
|
|
|
|
5
|
return $url, $name; |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
sub get_data { |
61
|
2
|
|
|
2
|
0
|
4
|
my ($self, $item) = @_; |
62
|
|
|
|
|
|
|
|
63
|
2
|
|
|
|
|
8
|
my $logger = Log::Log4perl->get_logger(); |
64
|
2
|
|
|
|
|
109
|
my %data = ( |
65
|
|
|
|
|
|
|
distribution => $item->distribution, |
66
|
|
|
|
|
|
|
version => $item->version, |
67
|
|
|
|
|
|
|
author => $item->author, |
68
|
|
|
|
|
|
|
date => $item->date, |
69
|
|
|
|
|
|
|
); |
70
|
|
|
|
|
|
|
#die Dumper $item; |
71
|
|
|
|
|
|
|
|
72
|
2
|
|
|
|
|
179
|
$logger->debug('dist: ', $item->distribution); |
73
|
2
|
|
|
|
|
57
|
$logger->debug(' ', $item->author); |
74
|
2
|
|
|
|
|
24
|
my @licenses = @{ $item->license }; |
|
2
|
|
|
|
|
33
|
|
75
|
2
|
|
|
|
|
21
|
$data{licenses} = join ' ', @licenses; |
76
|
2
|
|
|
|
|
7
|
$logger->debug(' ', $data{licenses}); |
77
|
2
|
|
|
|
|
14
|
for my $license (@licenses) { |
78
|
1
|
50
|
|
|
|
6
|
if ($license eq 'unknown') { |
|
|
50
|
|
|
|
|
|
79
|
0
|
|
|
|
|
0
|
$logger->error("Unknown license '$license'"); |
80
|
|
|
|
|
|
|
} elsif (not exists $known_licenses{$license}) { |
81
|
1
|
|
|
|
|
7
|
$logger->warn("Unknown license '$license'. Probably CPAN::Digger needs to be updated"); |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
# if there are not licenses => |
85
|
|
|
|
|
|
|
# if there is a license called "unknonws" |
86
|
|
|
|
|
|
|
# check against a known list of licenses (grow it later, or look it up somewhere?) |
87
|
2
|
|
|
|
|
10
|
my %resources = %{ $item->resources }; |
|
2
|
|
|
|
|
35
|
|
88
|
|
|
|
|
|
|
#say ' ', join ' ', keys %resources; |
89
|
2
|
100
|
|
|
|
23
|
if ($resources{repository}) { |
90
|
1
|
|
|
|
|
20
|
my ($vcs_url, $vcs_name) = get_vcs($resources{repository}); |
91
|
1
|
50
|
|
|
|
3
|
if ($vcs_url) { |
92
|
1
|
|
|
|
|
3
|
$data{vcs_url} = $vcs_url; |
93
|
1
|
|
|
|
|
2
|
$data{vcs_name} = $vcs_name; |
94
|
1
|
|
|
|
|
7
|
$logger->debug(" $vcs_name: $vcs_url"); |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
} else { |
97
|
1
|
|
|
|
|
17
|
$logger->error('No repository for ', $item->distribution); |
98
|
|
|
|
|
|
|
} |
99
|
2
|
|
|
|
|
36
|
return %data; |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
sub analyze_github { |
104
|
0
|
|
|
0
|
0
|
0
|
my ($data) = @_; |
105
|
0
|
|
|
|
|
0
|
my $logger = Log::Log4perl->get_logger(); |
106
|
|
|
|
|
|
|
|
107
|
0
|
|
|
|
|
0
|
my $vcs_url = $data->{vcs_url}; |
108
|
0
|
|
|
|
|
0
|
my $repo_name = (split '\/', $vcs_url)[-1]; |
109
|
0
|
|
|
|
|
0
|
$logger->info("Analyze GitHub repo '$vcs_url' in directory $repo_name"); |
110
|
|
|
|
|
|
|
|
111
|
0
|
|
|
|
|
0
|
my $ua = LWP::UserAgent->new(timeout => 5); |
112
|
0
|
|
|
|
|
0
|
my $response = $ua->get($vcs_url); |
113
|
0
|
|
|
|
|
0
|
my $status_line = $response->status_line; |
114
|
0
|
0
|
|
|
|
0
|
if ($status_line eq '404 Not Found') { |
115
|
0
|
|
|
|
|
0
|
$logger->error("Repository '$vcs_url' Received 404 Not Found. Please update the link in the META file"); |
116
|
0
|
|
|
|
|
0
|
return; |
117
|
|
|
|
|
|
|
} |
118
|
0
|
0
|
|
|
|
0
|
if ($response->code != 200) { |
119
|
0
|
|
|
|
|
0
|
$logger->error("Repository '$vcs_url' got a response of '$status_line'. Please report this to the maintainer of CPAN::Digger."); |
120
|
0
|
|
|
|
|
0
|
return; |
121
|
|
|
|
|
|
|
} |
122
|
0
|
0
|
|
|
|
0
|
if ($response->redirects) { |
123
|
0
|
|
|
|
|
0
|
$logger->error("Repository '$vcs_url' is being redirected. Please update the link in the META file"); |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
0
|
|
|
|
|
0
|
my $git = 'git'; |
127
|
|
|
|
|
|
|
|
128
|
0
|
|
|
|
|
0
|
my @cmd = ($git, "clone", "--depth", "1", $data->{vcs_url}); |
129
|
0
|
|
|
|
|
0
|
my $cwd = getcwd(); |
130
|
0
|
|
|
|
|
0
|
chdir($tempdir); |
131
|
|
|
|
|
|
|
my ($out, $err, $exit_code) = capture { |
132
|
0
|
|
|
0
|
|
0
|
system(@cmd); |
133
|
0
|
|
|
|
|
0
|
}; |
134
|
0
|
|
|
|
|
0
|
chdir($cwd); |
135
|
0
|
|
|
|
|
0
|
my $repo = "$tempdir/$repo_name"; |
136
|
0
|
|
|
|
|
0
|
$logger->debug("REPO path '$repo'"); |
137
|
|
|
|
|
|
|
|
138
|
0
|
0
|
|
|
|
0
|
if ($exit_code != 0) { |
139
|
|
|
|
|
|
|
# TODO capture stderr and include in the log |
140
|
0
|
|
|
|
|
0
|
$logger->error("Failed to clone $vcs_url"); |
141
|
0
|
|
|
|
|
0
|
return; |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
|
144
|
0
|
|
|
|
|
0
|
$data->{travis} = -e "$repo/.travis.yml"; |
145
|
0
|
|
|
|
|
0
|
my @ga = glob("$repo/.github/workflows/*"); |
146
|
0
|
0
|
|
|
|
0
|
$data->{github_actions} = (scalar(@ga) ? 1 : 0); |
147
|
0
|
|
|
|
|
0
|
$data->{circleci} = -e "$repo/.circleci"; |
148
|
0
|
|
0
|
|
|
0
|
$data->{appveyor} = (-e "$repo/.appveyor.yml") || (-e "$repo/appveyor.yml"); |
149
|
0
|
|
|
|
|
0
|
$data->{azure_pipelines} = -e "$repo/azure-pipelines.yml"; |
150
|
|
|
|
|
|
|
|
151
|
0
|
|
|
|
|
0
|
for my $ci (qw(travis github_actions circleci appveyor)) { |
152
|
0
|
|
|
|
|
0
|
$logger->debug("Is CI '$ci'?"); |
153
|
0
|
0
|
|
|
|
0
|
if ($data->{$ci}) { |
154
|
0
|
|
|
|
|
0
|
$logger->debug("CI '$ci' found!"); |
155
|
0
|
|
|
|
|
0
|
$data->{has_ci} = 1; |
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
sub collect { |
161
|
1
|
|
|
1
|
0
|
3
|
my ($self) = @_; |
162
|
|
|
|
|
|
|
|
163
|
1
|
|
|
|
|
3
|
my @all_the_distributions; |
164
|
|
|
|
|
|
|
|
165
|
1
|
|
|
|
|
2
|
my $log_level = $self->{log}; # TODO: shall we validate? |
166
|
1
|
|
|
|
|
25
|
Log::Log4perl->easy_init({ |
167
|
|
|
|
|
|
|
level => $log_level, |
168
|
|
|
|
|
|
|
layout => '%d{yyyy-MM-dd HH:mm:ss} - %p - %m%n', |
169
|
|
|
|
|
|
|
}); |
170
|
|
|
|
|
|
|
|
171
|
1
|
|
|
|
|
2704
|
my $logger = Log::Log4perl->get_logger(); |
172
|
1
|
|
|
|
|
384
|
$logger->info('Starting'); |
173
|
1
|
|
|
|
|
14
|
$logger->info("Tempdir: $tempdir"); |
174
|
1
|
50
|
|
|
|
14
|
$logger->info("Recent: $self->{recent}") if $self->{recent}; |
175
|
1
|
50
|
|
|
|
8
|
$logger->info("Author: $self->{author}") if $self->{author}; |
176
|
|
|
|
|
|
|
|
177
|
1
|
|
|
|
|
40
|
my $mcpan = MetaCPAN::Client->new(); |
178
|
1
|
|
|
|
|
19
|
my $rset; |
179
|
1
|
50
|
|
|
|
4
|
if ($self->{author}) { |
180
|
0
|
|
|
|
|
0
|
my $author = $mcpan->author($self->{author}); |
181
|
|
|
|
|
|
|
#print $author; |
182
|
0
|
|
|
|
|
0
|
$rset = $author->releases; |
183
|
|
|
|
|
|
|
} else { |
184
|
1
|
|
|
|
|
5
|
$rset = $mcpan->recent($self->{recent}); |
185
|
|
|
|
|
|
|
} |
186
|
1
|
|
|
|
|
167
|
$logger->info("MetaCPAN::Client::ResultSet received with a total of $rset->{total} items"); |
187
|
1
|
|
|
|
|
9
|
my %distros; |
188
|
1
|
|
|
|
|
5
|
my @fields = get_fields(); |
189
|
1
|
|
|
|
|
5
|
while ( my $item = $rset->next ) { |
190
|
2
|
50
|
|
|
|
4143
|
next if $distros{ $item->distribution }; # We have already deal with this in this session |
191
|
2
|
|
|
|
|
47
|
$distros{ $item->distribution } = 1; |
192
|
|
|
|
|
|
|
|
193
|
2
|
|
|
|
|
58
|
my $row = $self->{db}->db_get_distro($item->distribution); |
194
|
2
|
50
|
33
|
|
|
8
|
next if $row and $row->{version} eq $item->version; # we already have this in the database (shall we call last?) |
195
|
2
|
|
|
|
|
9
|
my %data = $self->get_data($item); |
196
|
|
|
|
|
|
|
#die Dumper \%data; |
197
|
2
|
|
|
|
|
22
|
$self->{db}->db_insert_into(@data{@fields}); |
198
|
2
|
|
|
|
|
36
|
push @all_the_distributions, \%data; |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
1
|
50
|
|
|
|
15
|
if ($self->{author}) { |
202
|
0
|
|
|
|
|
0
|
@all_the_distributions = reverse sort {$a->{date} cmp $b->{date}} @all_the_distributions; |
|
0
|
|
|
|
|
0
|
|
203
|
0
|
0
|
0
|
|
|
0
|
if ($self->{limit} and @all_the_distributions > $self->{limit}) { |
204
|
0
|
|
|
|
|
0
|
@all_the_distributions = @all_the_distributions[0 .. $self->{limit}-1]; |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
# Check on the VCS |
209
|
1
|
50
|
|
|
|
4
|
if ($self->{check_github}) { |
210
|
0
|
|
|
|
|
0
|
$logger->info("Starting to check GitHub"); |
211
|
0
|
|
|
|
|
0
|
for my $data (@all_the_distributions) { |
212
|
0
|
|
|
|
|
0
|
my $distribution = $data->{distribution}; |
213
|
0
|
|
|
|
|
0
|
my $data_ref = $self->{db}->db_get_distro($distribution); |
214
|
0
|
0
|
|
|
|
0
|
next if not $data_ref->{vcs_name}; |
215
|
|
|
|
|
|
|
|
216
|
0
|
0
|
0
|
|
|
0
|
if ($self->{check_github} and $data_ref->{vcs_name} eq 'GitHub') { |
217
|
0
|
|
|
|
|
0
|
analyze_github($data_ref); |
218
|
|
|
|
|
|
|
} |
219
|
0
|
|
|
|
|
0
|
my %data = %$data_ref; |
220
|
0
|
|
|
|
|
0
|
$self->{db}->db_update($distribution, @data{@fields}); |
221
|
0
|
0
|
|
|
|
0
|
sleep $self->{sleep} if $self->{sleep}; |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
|
226
|
1
|
50
|
|
|
|
6
|
if ($self->{report}) { |
227
|
|
|
|
|
|
|
#print "Text report\n"; |
228
|
1
|
|
|
|
|
2
|
my @distros = @{ $self->{db}->db_get_every_distro() }; |
|
1
|
|
|
|
|
4
|
|
229
|
1
|
50
|
33
|
|
|
4
|
if ($self->{limit} and @distros > $self->{limit}) { |
230
|
0
|
|
|
|
|
0
|
@distros = @distros[0 .. $self->{limit}-1]; |
231
|
|
|
|
|
|
|
} |
232
|
1
|
|
|
|
|
4
|
for my $distro (@distros) { |
233
|
|
|
|
|
|
|
#die Dumper $distro; |
234
|
2
|
100
|
|
|
|
75
|
printf "%s %-40s %-7s", $distro->{date}, $distro->{distribution}, ($distro->{vcs_url} ? '' : 'NO VCS'); |
235
|
2
|
50
|
|
|
|
11
|
if ($self->{check_github}) { |
236
|
0
|
0
|
|
|
|
0
|
printf "%-7s", ($distro->{has_ci} ? '' : 'NO CI'); |
237
|
|
|
|
|
|
|
} |
238
|
2
|
|
|
|
|
160
|
print "\n"; |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
42; |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
=head1 NAME |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
CPAN::Digger - To dig CPAN |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
=head1 SYNOPSIS |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
cpan-digger |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
=head1 DESCRIPTION |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
This is a command line program to collect some meta information about CPAN modules. |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
Copyright (C) 2020 by L<Gabor Szabo|https://szabgab.com/> |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
265
|
|
|
|
|
|
|
it under the same terms as Perl itself, either Perl version 5.26.1 or, |
266
|
|
|
|
|
|
|
at your option, any later version of Perl 5 you may have available. |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=cut |
269
|
|
|
|
|
|
|
|