File Coverage

blib/lib/CPAN/SQLite/Info.pm
Criterion Covered Total %
statement 118 137 86.1
branch 26 44 59.0
condition 10 21 47.6
subroutine 17 18 94.4
pod 0 9 0.0
total 171 229 74.6


line stmt bran cond sub pod time code
1             # $Id: Info.pm 85 2022-10-29 05:44:36Z stro $
2              
3             package CPAN::SQLite::Info;
4 5     5   168827 use strict;
  5         31  
  5         151  
5 5     5   25 use warnings;
  5         12  
  5         221  
6              
7             our $VERSION = '0.220';
8              
9 5     5   1118 use English qw/-no_match_vars/;
  5         7415  
  5         32  
10              
11 5     5   2742 use CPAN::DistnameInfo;
  5         2195  
  5         174  
12 5     5   471 use File::Spec::Functions qw(catfile);
  5         810  
  5         422  
13 5     5   3339 use Compress::Zlib;
  5         321605  
  5         1234  
14 5     5   51 use File::Basename;
  5         12  
  5         368  
15 5     5   2425 use Safe;
  5         151130  
  5         354  
16 5     5   2379 use CPAN::SQLite::Util qw(vcmp print_debug);
  5         14  
  5         6239  
17              
18             my $ext = qr/\.(tar\.gz|tar\.Z|tgz|zip)$/;
19              
20             sub new {
21 1     1 0 1390 my ($class, %args) = @_;
22 1         7 my $self = { dists => {}, auths => {}, mods => {}, info => {}, %args };
23 1         5 return bless $self, $class;
24             }
25              
26             sub fetch_info {
27 1     1 0 404 my $self = shift;
28 1 50       4 $self->mailrc() or return;
29 1 50       4 $self->dists_and_mods() or return;
30 1         16 return 1;
31             }
32              
33             sub dists_and_mods {
34 1     1 0 2 my $self = shift;
35 1         7 my ($packages, $cpan_files) = $self->packages();
36              
37 1         3 my ($dists, $mods);
38 1         4 my $ignore = $self->{ignore};
39 1         3 my $pat;
40 1 50 33     5 if ($ignore and ref($ignore) eq 'ARRAY') {
41 0         0 $pat = join '|', @$ignore;
42             }
43 1         26 foreach my $cpan_file (keys %$cpan_files) {
44 105 50 33     206 if ($pat and ($cpan_file =~ /^($pat)/)) {
45 0         0 delete $cpan_files->{$cpan_file};
46 0         0 print_debug("Ignoring $cpan_file\n");
47 0         0 next;
48             }
49 105         222 my $d = CPAN::DistnameInfo->new($cpan_file);
50 105 50       6249 next unless ($d->maturity eq 'released');
51 105         469 my $dist_name = $d->dist;
52 105         368 my $dist_vers = $d->version;
53 105         383 my $cpanid = $d->cpanid;
54 105         414 my $dist_file = $d->filename;
55 105 50 33     593 unless ($dist_name and $dist_vers and $cpanid) {
      33        
56 0         0 print_debug("No dist_name/version/cpanid for $cpan_file: skipping\n");
57 0         0 delete $cpan_files->{$cpan_file};
58 0         0 next;
59             }
60              
61             # ignore specified dists
62 105 50 33     190 if ($pat and ($dist_name =~ /^($pat)$/)) {
63 0         0 delete $cpan_files->{$cpan_file};
64 0         0 print_debug("Ignoring $dist_name\n");
65 0         0 next;
66             }
67 105 100 100     250 if (not $dists->{$dist_name} or vcmp($dist_vers, $dists->{$dist_name}->{dist_vers}) > 0) {
68 96         271 $dists->{$dist_name}->{dist_vers} = $dist_vers;
69 96         165 $dists->{$dist_name}->{dist_file} = $dist_file;
70 96         320 $dists->{$dist_name}->{cpanid} = $cpanid;
71             }
72             }
73              
74 1         7 my $wanted;
75 1         16 foreach my $dist_name (keys %$dists) {
76 92         1778 $wanted->{ basename($dists->{$dist_name}->{dist_file}) } = $dist_name;
77             }
78 1         137 foreach my $mod_name (keys %$packages) {
79 606         11233 my $file = basename($packages->{$mod_name}->{dist_file});
80 606         1391 my $dist_name = $wanted->{$file};
81 606 100 66     1749 unless ($dist_name and $dists->{$dist_name}) {
82 62         121 delete $packages->{$mod_name};
83 62         128 next;
84             }
85 544         1487 $mods->{$mod_name}->{dist_name} = $dist_name;
86 544         1059 $dists->{$dist_name}->{modules}->{$mod_name}++;
87 544         1148 $mods->{$mod_name}->{mod_vers} = $packages->{$mod_name}->{mod_vers};
88             }
89 1         29 $self->{dists} = $dists;
90 1         162 return $self->{mods} = $mods;
91             }
92              
93             sub modlist {
94 0     0 0 0 my $self = shift;
95 0         0 warn 'Modlist does not contain any useful info anymore';
96 0         0 return;
97             }
98              
99             sub packages {
100 1     1 0 3 my $self = shift;
101 1         3 my $index = 'modules/02packages.details.txt.gz';
102             my $packages =
103             $self->{keep_source_where}
104             ? CPAN::FTP->localize($index, catfile($self->{keep_source_where}, $index))
105 1 50       10 : catfile($self->{CPAN}, $index);
106 1 50       5 return unless check_file('modules/02packages.details.txt.gz', $packages);
107 1         11 print_debug("Reading information from $packages\n");
108 1         5 my $lines = zcat($packages);
109 1         45 while (@$lines) {
110 9         15 my $shift = shift(@$lines);
111 9 100       35 last if $shift =~ /^\s*$/;
112             }
113 1         2 my ($mods, $cpan_files);
114 1         4 foreach (@$lines) {
115 606         1593 my ($mod_name, $mod_vers, $dist_file) = split(" ", $_, 4);
116 606 100       1158 $mod_vers = undef if $mod_vers eq 'undef';
117 606         1874 $mods->{$mod_name} = { mod_vers => $mod_vers, dist_file => $dist_file };
118 606         1159 $cpan_files->{$dist_file}++;
119             }
120 1         28 return ($mods, $cpan_files);
121             }
122              
123             sub mailrc {
124 1     1 0 2 my $self = shift;
125 1         3 my $index = 'authors/01mailrc.txt.gz';
126             my $mailrc =
127             $self->{keep_source_where}
128             ? CPAN::FTP->localize($index, catfile($self->{keep_source_where}, $index))
129 1 50       12 : catfile($self->{CPAN}, $index);
130 1 50       6 return unless check_file('authors/01mailrc.txt.gz', $mailrc);
131 1         21 print_debug("Reading information from $mailrc\n");
132 1         4 my $lines = zcat($mailrc);
133 1         57 my $auths;
134 1         4 foreach (@$lines) {
135              
136             #my($cpanid,$fullname,$email) =
137             #m/alias\s+(\S+)\s+\"([^\"\<]+)\s+\<([^\>]+)\>\"/;
138 4         26 my ($cpanid, $authinfo) = m/alias\s+(\S+)\s+\"([^\"]+)\"/;
139 4 50       12 next unless $cpanid;
140 4         5 my ($fullname, $email);
141 4 50       18 if ($authinfo =~ m/([^<]+)\<(.*)\>/) {
142 4         10 $fullname = $1;
143 4         7 $email = $2;
144             } else {
145 0         0 $fullname = '';
146 0         0 $email = lc($cpanid) . '@cpan.org';
147             }
148 4         10 $auths->{$cpanid} = {
149             fullname => trim($fullname),
150             email => trim($email) };
151             }
152 1         7 return $self->{auths} = $auths;
153             }
154              
155             sub check_file {
156 2     2 0 6 my ($index, $file) = @_;
157 2 50       8 unless ($file) {
158 0         0 warn qq{index file '$index' not defined};
159 0         0 return;
160             }
161 2 50       61 unless (-f $file) {
162 0         0 warn qq{index file '$file' not found};
163 0         0 return;
164             }
165 2         12 return 1;
166             }
167              
168             sub zcat {
169 2     2 0 4 my $file = shift;
170 2         4 my ($buffer, $lines);
171 2 50       9 my $gz = gzopen($file, 'rb')
172             or die "Cannot open $file: $gzerrno";
173 2         18740 while ($gz->gzreadline($buffer) > 0) {
174 619         64764 push @$lines, $buffer;
175             }
176 2 50       309 die "Error reading from $file: $gzerrno" . ($gzerrno + 0)
177             if $gzerrno != Z_STREAM_END;
178 2         17 $gz->gzclose();
179 2         254 return $lines;
180             }
181              
182             sub trim {
183 8     8 0 15 my $string = shift;
184 8 50       15 return '' unless $string;
185 8         15 $string =~ s/^\s+//;
186 8         22 $string =~ s/\s+$//;
187 8         20 $string =~ s/\s+/ /g;
188 8         26 return $string;
189             }
190              
191             1;
192              
193             =head1 NAME
194              
195             CPAN::SQLite::Info - extract information from CPAN indices
196              
197             =head1 VERSION
198              
199             version 0.220
200              
201             =head1 DESCRIPTION
202              
203             This module extracts information from the CPAN indices
204             F<$CPAN/modules/02packages.details.txt.gz> and
205             F<$CPAN/authors/01mailrc.txt.gz>.
206              
207             A C object is created with
208              
209             my $info = CPAN::SQLite::Info->new(CPAN => $cpan);
210              
211             where C<$cpan> specifies the top-level CPAN directory
212             underneath which the index files are found. Calling
213              
214             $info->fetch_info();
215              
216             will result in the object being populated with 3 hash references:
217              
218             =over 3
219              
220             =item * C<$info-E{dists}>
221              
222             This contains information on distributions. Keys of this hash
223             reference are the distribution names, with the associated value being a
224             hash reference with keys of
225              
226             =over 3
227              
228             =item C - the version of the CPAN file
229              
230             =item C - the CPAN filename
231              
232             =item C - the CPAN author id
233              
234             =item C - a description, if available
235              
236             =item C - specifies the modules present in the distribution:
237              
238             for my $module (keys %{$info->{$distname}->{modules}}) {
239             print "Module: $module\n";
240             }
241              
242             =back
243              
244             =item * C<$info-E{mods}>
245              
246             This contains information on modules. Keys of this hash
247             reference are the module names, with the associated values being a
248             hash reference with keys of
249              
250             =over 3
251              
252             =item C - the distribution name containing the module
253              
254             =item C - the version
255              
256             =item C - a description, if available
257              
258             =back
259              
260             =item * C<$info-E{auths}>
261              
262             This contains information on CPAN authors. Keys of this hash
263             reference are the CPAN ids, with the associated value being a
264             hash reference with keys of
265              
266             =over 3
267              
268             =item C - the author's full name
269              
270             =item C - the author's email address
271              
272             =back
273              
274             =back
275              
276             =head1 SEE ALSO
277              
278             L
279              
280             =cut