line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package License::Syntax; |
2
|
|
|
|
|
|
|
|
3
|
5
|
|
|
5
|
|
242254
|
use warnings; |
|
5
|
|
|
|
|
12
|
|
|
5
|
|
|
|
|
189
|
|
4
|
5
|
|
|
5
|
|
31
|
use strict; |
|
5
|
|
|
|
|
11
|
|
|
5
|
|
|
|
|
179
|
|
5
|
5
|
|
|
5
|
|
32
|
use Carp; |
|
5
|
|
|
|
|
15
|
|
|
5
|
|
|
|
|
407
|
|
6
|
5
|
|
|
5
|
|
19891
|
use DBI; |
|
5
|
|
|
|
|
158154
|
|
|
5
|
|
|
|
|
460
|
|
7
|
5
|
|
|
5
|
|
9367
|
use Text::CSV; |
|
5
|
|
|
|
|
84009
|
|
|
5
|
|
|
|
|
41
|
|
8
|
5
|
|
|
5
|
|
6324
|
use POSIX; |
|
5
|
|
|
|
|
47598
|
|
|
5
|
|
|
|
|
52
|
|
9
|
5
|
|
|
5
|
|
24016
|
use Data::Dumper; |
|
5
|
|
|
|
|
9613
|
|
|
5
|
|
|
|
|
19445
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 NAME |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
License::Syntax - Coding and Decoding of License strings using SPDX and SUSE syntax. |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 VERSION |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Version 0.13 |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=cut |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
our $VERSION = '0.13'; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 SYNOPSIS |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
This implements the SUSE License Syntax. |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
use License::Syntax; |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
my $obj = new License::Syntax licensemap => 'licenselist.csv;as'; |
31
|
|
|
|
|
|
|
my $obj = new License::Syntax map => { 'GNU General Public License V2.0' => 'GPLv2' }; |
32
|
|
|
|
|
|
|
my $obj = new License::Syntax 'pathname.sqlite;table(alias,name)'; |
33
|
|
|
|
|
|
|
$obj->loadmap_csv($filename_csv); |
34
|
|
|
|
|
|
|
$obj->loadmap_sqlite($filename_sqlite, $table_name, $alias_col, $name_col); |
35
|
|
|
|
|
|
|
$obj->savemap_csv($filename_csv, scalar(localtime)); |
36
|
|
|
|
|
|
|
$obj->savemap_sqlite($filename_sqlite, $table_name, $alias_col, $name_col, 'TRUNCATE'); |
37
|
|
|
|
|
|
|
$obj->add_alias($alias, $canonical_name); |
38
|
|
|
|
|
|
|
$name = $obj->canonical_name($alias, $disambiguate); |
39
|
|
|
|
|
|
|
$tree = $obj->tokenize('GPLv2 & Apache 1.1; LGPLv2.1 | BSD4c<
|
40
|
|
|
|
|
|
|
$name = $obj->format_tokens($tree); |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=head1 FUNCTIONS |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=head2 new |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
License::Syntax is an object oriented module. |
48
|
|
|
|
|
|
|
When constructing new License::Syntax objects, you can provide a mapping table for |
49
|
|
|
|
|
|
|
license names. The table is used for recognizing alternate alias names for the |
50
|
|
|
|
|
|
|
licenses (left hand side) and also defines the canonical short names of the licenses |
51
|
|
|
|
|
|
|
(right hand side). |
52
|
|
|
|
|
|
|
The mapping table is consulted twice, before and after decoding the syntax. |
53
|
|
|
|
|
|
|
(Thus non-terminal mappings may actually be followed.) |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
The mapping table can be provided either |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=over 2 |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=item * as a CSV files of two columns. Column seperator is a comma (,) |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=item * as a hash, or |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=item * as table in an sqlite database using the given columns as left hand side and right hand side respectivly. |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=back |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
As an alternative to specifying a mapping with new(), or additionally, mappings |
68
|
|
|
|
|
|
|
can also be provided via loadmap_sqlite(), loadmap_csv(), or add_alias() |
69
|
|
|
|
|
|
|
methods. Earlier mappings take precedence over later mappings. |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=cut |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
sub new |
75
|
|
|
|
|
|
|
{ |
76
|
6
|
|
|
6
|
1
|
2181
|
my $self = shift; |
77
|
6
|
|
33
|
|
|
51
|
my $class = ref($self) || $self; |
78
|
6
|
100
|
100
|
|
|
47
|
if (1 == scalar @_ and !ref $_[0]) |
79
|
|
|
|
|
|
|
{ |
80
|
2
|
|
|
|
|
12
|
$self = { new => { licensemap => $_[0] } }; |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
else |
83
|
|
|
|
|
|
|
{ |
84
|
4
|
100
|
|
|
|
28
|
$self = { new => { (ref $_[0] eq 'HASH') ? %{$_[0]} : @_ } }; |
|
2
|
|
|
|
|
11
|
|
85
|
|
|
|
|
|
|
} |
86
|
6
|
|
|
|
|
22
|
$self = bless $self, $class; |
87
|
|
|
|
|
|
|
|
88
|
6
|
|
|
|
|
33
|
$self->set_rejects('REJECT'); |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# use Data::Dumper; |
91
|
|
|
|
|
|
|
# carp Dumper $self, $class, \@_; |
92
|
6
|
50
|
|
|
|
35
|
if ($self->{new}{map}) |
93
|
|
|
|
|
|
|
{ |
94
|
0
|
|
|
|
|
0
|
for my $k (%{$self->{new}{map}}) |
|
0
|
|
|
|
|
0
|
|
95
|
|
|
|
|
|
|
{ |
96
|
0
|
|
|
|
|
0
|
$self->add_alias($k, $self->{new}{map}{$k}); |
97
|
|
|
|
|
|
|
} |
98
|
0
|
|
|
|
|
0
|
delete $self->{new}{map}; |
99
|
|
|
|
|
|
|
} |
100
|
6
|
100
|
|
|
|
137
|
$self->_loadmap($self->{new}{licensemap}) |
101
|
|
|
|
|
|
|
if $self->{new}{licensemap}; |
102
|
6
|
|
|
|
|
41
|
return $self; |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# dispatch into either loadmap_sqlite or loadmap_csv. |
106
|
|
|
|
|
|
|
sub _loadmap |
107
|
|
|
|
|
|
|
{ |
108
|
3
|
|
|
3
|
|
10
|
my ($s,$f) = @_; |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# "filename.csv" |
111
|
|
|
|
|
|
|
# "filename.csv;garbage" |
112
|
3
|
100
|
|
|
|
35
|
my $suf = $1 if $f =~ s{;([\w,;\#\(\)]+)$}{}; |
113
|
3
|
100
|
|
|
|
24
|
return $s->loadmap_csv($f, $suf) if $f =~ m{\.csv$}i; |
114
|
|
|
|
|
|
|
|
115
|
2
|
50
|
|
|
|
19
|
croak "$f: needs either .csv or .sqlite suffix\n" unless $f =~ m{\.sql(ite)?$}i; |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
# "filename.sqlite;table" |
118
|
|
|
|
|
|
|
# "filename.sqlite;table(alias,name)" |
119
|
2
|
50
|
|
|
|
19
|
my ($table,$left,$right) = ($1,'alias','name') if $suf =~ m{^(\w+)}; |
120
|
2
|
50
|
|
|
|
19
|
($left,$right) = ($1,$2) if $suf =~ m{\((\w+)\W(\w+)}; |
121
|
2
|
|
|
|
|
13
|
return $s->loadmap_sqlite($f, $table, $left, $right); |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
## returns a two column array with the minimum representation of a license map. |
125
|
|
|
|
|
|
|
sub _saveable_map |
126
|
|
|
|
|
|
|
{ |
127
|
4
|
|
|
4
|
|
443
|
my ($s) = @_; |
128
|
4
|
|
|
|
|
11
|
my %identity; |
129
|
|
|
|
|
|
|
my %done; |
130
|
|
|
|
|
|
|
|
131
|
0
|
|
|
|
|
0
|
my @r; |
132
|
4
|
|
|
|
|
7
|
for my $k (sort keys %{$s->{licensemap}{ex}}) |
|
4
|
|
|
|
|
2030
|
|
133
|
|
|
|
|
|
|
{ |
134
|
2380
|
|
|
|
|
5034
|
my $v = $s->{licensemap}{ex}{$k}; |
135
|
2380
|
100
|
|
|
|
4021
|
if ($v eq $k) |
136
|
|
|
|
|
|
|
{ |
137
|
772
|
|
|
|
|
1560
|
$identity{$v}++; |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
else |
140
|
|
|
|
|
|
|
{ |
141
|
1608
|
|
|
|
|
3733
|
push @r, [$k,$v]; |
142
|
1608
|
|
|
|
|
3239
|
$done{$v}++; |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
4
|
|
|
|
|
250
|
for my $k (keys %identity) |
147
|
|
|
|
|
|
|
{ |
148
|
772
|
100
|
|
|
|
1601
|
next if $done{$k}; |
149
|
40
|
|
|
|
|
93
|
push @r, ['',$k]; |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
4
|
|
|
|
|
174
|
return \@r; |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=head2 canonical_name |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
$name = $obj->canonical_name($alias); |
158
|
|
|
|
|
|
|
is equivalent to |
159
|
|
|
|
|
|
|
$name = $obj->format_tokens($obj->tokenize($alias)); |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=cut |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
sub canonical_name |
164
|
|
|
|
|
|
|
{ |
165
|
2
|
|
|
2
|
1
|
11
|
my ($s, $name) = @_; |
166
|
2
|
|
|
|
|
8
|
return $s->format_tokens($s->tokenize($name)); |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
=head2 savemap_csv |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
$obj->savemap_csv('filename.csv', scalar(localtime)); |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
Writes the current mapping table as a comma seperated file. |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
=cut |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
sub savemap_csv |
178
|
|
|
|
|
|
|
{ |
179
|
2
|
|
|
2
|
1
|
1521
|
my ($s, $f, $header_suffix) = @_; |
180
|
2
|
50
|
|
|
|
243
|
open O, ">", $f or croak "$f: write failed: $!"; |
181
|
2
|
|
|
|
|
96
|
print O qq{# "Alias name","Canonical Name" -- saved by License::Syntax $VERSION}; |
182
|
2
|
50
|
|
|
|
10
|
$header_suffix = '' unless defined $header_suffix; |
183
|
2
|
50
|
|
|
|
10
|
$header_suffix .= "\n" unless $header_suffix =~ m{\n$}s; |
184
|
2
|
|
|
|
|
4
|
print O $header_suffix; |
185
|
|
|
|
|
|
|
|
186
|
2
|
|
|
|
|
9
|
my $list = $s->_saveable_map(); |
187
|
2
|
|
|
|
|
9
|
for my $r (@$list) |
188
|
|
|
|
|
|
|
{ |
189
|
824
|
|
|
|
|
2046
|
print O qq{"$r->[0]","$r->[1]"\n}; |
190
|
|
|
|
|
|
|
} |
191
|
2
|
50
|
|
|
|
263
|
close O or croak "$f: write failed: $!"; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=head2 set_rejects |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
$obj->set_rejects('REJECT', ...); |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
define the license names to be rejected. Per default, |
199
|
|
|
|
|
|
|
exactly one name 'REJECT' is rejected. |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
=cut |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
sub set_rejects |
204
|
|
|
|
|
|
|
{ |
205
|
6
|
|
|
6
|
1
|
19
|
my ($s, @r) = @_; |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
# store as a hash for faster test. |
208
|
6
|
|
|
|
|
16
|
$s->{REJECT} = { map { $_ => 1 } @r }; |
|
6
|
|
|
|
|
47
|
|
209
|
6
|
|
|
|
|
19
|
return $s; |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
=head2 add_alias |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
$obj->add_alias($alias,$name); |
215
|
|
|
|
|
|
|
$obj->add_alias(undef,$name); |
216
|
|
|
|
|
|
|
$obj->add_alias('',$name); |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
adds $name (and optionally $alias) to the objects licensemap. |
219
|
|
|
|
|
|
|
Both, lower case and exact mappings are maintained. |
220
|
|
|
|
|
|
|
(add_url is used in loadmap_csv) |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
add_alias() takes care to extend to the right. That is, if it's right hand side |
223
|
|
|
|
|
|
|
parameter is already known to be an alias, the new alias is added pointing to the old alias's canonical name (rahter than to the old alias that the caller provided). |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
CAVEAT: |
226
|
|
|
|
|
|
|
add_alias() does not maintain full tranitivity, as it does not extend to the left. |
227
|
|
|
|
|
|
|
If its left hand side is already known to be a canonical name, a warning is |
228
|
|
|
|
|
|
|
issued, but the situation cannot be corrected, as this would require rewriting |
229
|
|
|
|
|
|
|
existing entries. This is non-obvious, as mappings are applied more than once |
230
|
|
|
|
|
|
|
during format_tokens(), so indirect mappings involving non-terminal names, may |
231
|
|
|
|
|
|
|
or may not work. A two step mapping currently works reliably, though. |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
add_alias() does nothing, if it would directly redo an existing mapping. |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
See also new() for more details about mappings. |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
=cut |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
=head2 add_url |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
$obj->add_url($urls, $name); |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
Add one or multiple URLs to the canonical license name. URLs can be seperated by comma or whitespace. |
244
|
|
|
|
|
|
|
May be called multiple times for the same name, and fills an array of urls. |
245
|
|
|
|
|
|
|
(add_url is used in loadmap_csv) |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
=cut |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
=head2 set_compat_class |
250
|
|
|
|
|
|
|
$obj->set_compat_class($cc, $name); |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
Specify the compatibility class, for a canonical license name. |
253
|
|
|
|
|
|
|
compatibility classes are numerical. These classes allow to derive certain compatibility issues |
254
|
|
|
|
|
|
|
amongst liceses. Some classes are always incompatible (even amongst themselves), other |
255
|
|
|
|
|
|
|
classes are always comaptible, and for some other classses, compatibility is uncertain. |
256
|
|
|
|
|
|
|
The exact semantics are to be defined. (set_compat_class is used in loadmap_csv). |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
=cut |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
sub set_compat_class |
261
|
|
|
|
|
|
|
{ |
262
|
0
|
|
|
0
|
1
|
0
|
my ($s, $cc, $canonical_name) = @_; |
263
|
0
|
|
|
|
|
0
|
$cc += 0; |
264
|
0
|
0
|
|
|
|
0
|
croak "compatibility class should be numeric and > 0\n" unless $cc; |
265
|
0
|
|
|
|
|
0
|
$s->{licensemap}{cc}{$canonical_name} = $cc; |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
sub add_url |
269
|
|
|
|
|
|
|
{ |
270
|
149
|
|
|
149
|
1
|
267
|
my ($s, $url, $canonical_name) = @_; |
271
|
149
|
|
|
|
|
715
|
my @url = split(/[,\s]+/, $url); |
272
|
149
|
|
|
|
|
189
|
push @{$s->{licensemap}{url}{$canonical_name}}, @url; |
|
149
|
|
|
|
|
858
|
|
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
sub add_alias |
276
|
|
|
|
|
|
|
{ |
277
|
1483
|
|
|
1483
|
1
|
2913
|
my ($s, $from, $to) = @_; |
278
|
|
|
|
|
|
|
|
279
|
1483
|
100
|
100
|
|
|
6484
|
$from = '' if defined($from) and $from eq $to; # not an alias. |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
# normalize whitespace: |
282
|
1483
|
|
|
|
|
3466
|
$to =~ s{\s+}{ }g; |
283
|
1483
|
100
|
|
|
|
7154
|
$from =~ s{\s+}{ }g if defined $from; |
284
|
|
|
|
|
|
|
|
285
|
1483
|
100
|
|
|
|
5510
|
if (defined(my $nn = $s->{licensemap}{ex}{$to})) |
286
|
|
|
|
|
|
|
{ |
287
|
|
|
|
|
|
|
# do right extend |
288
|
|
|
|
|
|
|
# simple loopdetection first: |
289
|
856
|
50
|
33
|
|
|
4131
|
croak "cyclic alias '$from' -> '$to' with already known canonical name '$nn'\n" |
290
|
|
|
|
|
|
|
if defined($from) && $from eq $nn; |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
# now extend: |
293
|
856
|
50
|
|
|
|
2273
|
carp "add_alias: '$from' -> '$to' extended to '$nn'\n" if $s->{debug}; |
294
|
856
|
|
|
|
|
1808
|
$to = $nn; |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
|
297
|
1483
|
100
|
100
|
|
|
7120
|
if (defined $from and $from ne '') |
298
|
|
|
|
|
|
|
{ |
299
|
1322
|
|
|
|
|
3153
|
my $aa; |
300
|
1322
|
100
|
|
|
|
4517
|
if (defined($aa = $s->{licensemap}{ex}{$from})) |
301
|
|
|
|
|
|
|
{ |
302
|
44
|
100
|
|
|
|
97
|
if ($aa eq $from) |
303
|
|
|
|
|
|
|
{ |
304
|
|
|
|
|
|
|
# this alias is a right hand side. |
305
|
|
|
|
|
|
|
# We recognize this, because all right hand sides map to itself. |
306
|
1
|
|
|
|
|
6
|
my $msg = "mapping error: '$from' is now both alias and canonical name. Try to load '$from' -> '$to' earlier."; |
307
|
1
|
50
|
|
|
|
5
|
carp "$msg\n" if $s->{debug}; |
308
|
1
|
|
|
|
|
3
|
push @{$s->{diagnostics}}, $msg; |
|
1
|
|
|
|
|
4
|
|
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
else |
311
|
|
|
|
|
|
|
{ |
312
|
|
|
|
|
|
|
# this is a chane attempt to an existing mapping. |
313
|
|
|
|
|
|
|
# silently ignored. |
314
|
43
|
100
|
|
|
|
289
|
carp "mapping ignored: '$from' => '$to', it already maps to '$aa'\n" if $s->{debug}; |
315
|
43
|
|
|
|
|
271
|
return $s; |
316
|
|
|
|
|
|
|
} |
317
|
|
|
|
|
|
|
} |
318
|
1279
|
|
|
|
|
4583
|
$s->{licensemap}{ex}{$from} = $to; |
319
|
1279
|
|
|
|
|
4165
|
$s->{licensemap}{lc}{lc $from} = $to; |
320
|
1279
|
100
|
|
|
|
2362
|
if (scalar(my @a = _tokenize_linear($from)) > 1) |
321
|
|
|
|
|
|
|
{ |
322
|
138
|
|
|
|
|
1358
|
$s->{licensemap}{tok}{lc $a[0]}{$from} = [ @a ]; |
323
|
|
|
|
|
|
|
} |
324
|
|
|
|
|
|
|
} |
325
|
1440
|
|
|
|
|
4569
|
$s->{licensemap}{ex}{$to} = $to; |
326
|
1440
|
|
|
|
|
3950
|
$s->{licensemap}{lc}{lc $to} = $to; |
327
|
1440
|
100
|
|
|
|
2792
|
if (scalar(my @a = _tokenize_linear($to)) > 1) |
328
|
|
|
|
|
|
|
{ |
329
|
156
|
|
|
|
|
847
|
$s->{licensemap}{tok}{lc $a[0]}{$to} = [ @a ]; |
330
|
|
|
|
|
|
|
} |
331
|
1440
|
|
|
|
|
4080
|
return $s; |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
=head2 savemap_sqlite |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
$obj->savemap_sqlite('filename.sqlite', 'lic_map', 'alias', 'shortname', $trunc_flag); |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
# sqlite3 filename.sqlite |
340
|
|
|
|
|
|
|
sqlite> select * from lic_map |
341
|
|
|
|
|
|
|
alias | shortname |
342
|
|
|
|
|
|
|
------|---------- |
343
|
|
|
|
|
|
|
... |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
If $trunc_flag is true and the table previously exists, the table is truncated before it is written to; |
346
|
|
|
|
|
|
|
otherwise new contents merges over old contents, if any. |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
=cut |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
sub savemap_sqlite |
351
|
|
|
|
|
|
|
{ |
352
|
1
|
|
|
1
|
1
|
2312
|
my ($s, $f, $t, $a, $n, $trunc_flag) = @_; |
353
|
|
|
|
|
|
|
|
354
|
1
|
50
|
|
|
|
15
|
my $dbh = DBI->connect("dbi:SQLite:dbname=$f","","") or carp "DBI-connect($f) failed: $!"; |
355
|
|
|
|
|
|
|
|
356
|
1
|
50
|
|
|
|
37932
|
$dbh->do("PRAGMA default_synchronous = OFF") if $s->{new}{nofsync}; |
357
|
1
|
|
|
|
|
222
|
$dbh->do("CREATE TABLE IF NOT EXISTS $t ( $a TEXT, $n TEXT )"); |
358
|
1
|
50
|
|
|
|
499440
|
$dbh->do("DELETE FROM $t") if $trunc_flag; |
359
|
|
|
|
|
|
|
|
360
|
1
|
|
|
|
|
40002
|
my $list = $s->_saveable_map(); |
361
|
1
|
|
|
|
|
4
|
for my $r (@$list) |
362
|
|
|
|
|
|
|
{ |
363
|
412
|
|
|
|
|
114046412
|
$dbh->do("INSERT OR REPLACE INTO $t ($a,$n) VALUES(?,?)", {}, $r->[0], $r->[1]); |
364
|
|
|
|
|
|
|
} |
365
|
1
|
|
|
|
|
779793
|
$dbh->disconnect(); |
366
|
1
|
|
|
|
|
310
|
return $s; |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=head2 loadmap_sqlite |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
See also new() for more details about mappings. |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=cut |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
sub loadmap_sqlite |
376
|
|
|
|
|
|
|
{ |
377
|
2
|
|
|
2
|
1
|
6
|
my ($s, $f, $t, $a, $n) = @_; |
378
|
|
|
|
|
|
|
|
379
|
2
|
50
|
|
|
|
25
|
my $dbh = DBI->connect("dbi:SQLite:dbname=$f","","") or carp "DBI-connect($f) failed: $!"; |
380
|
2
|
|
|
|
|
38464
|
my $list = $dbh->selectall_arrayref("SELECT $a,$n FROM $t"); |
381
|
2
|
|
|
|
|
2684
|
for my $r (@$list) |
382
|
|
|
|
|
|
|
{ |
383
|
824
|
|
|
|
|
2725
|
$s->add_alias($r->[0], $r->[1]); |
384
|
|
|
|
|
|
|
} |
385
|
2
|
|
|
|
|
5132
|
$dbh->disconnect(); |
386
|
2
|
|
|
|
|
519
|
return $s; |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
=head2 tokenize |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
$tree_arr = $obj->tokenize($complex_license_expr); |
392
|
|
|
|
|
|
|
$tree_arr = $obj->tokenize($complex_license_expr, 1); |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
Returns an array reference containing tokens and sub-arrays, |
395
|
|
|
|
|
|
|
describing how the $complex_license_expr is parsed. |
396
|
|
|
|
|
|
|
If a second parameter disambiguate is provided and is true, |
397
|
|
|
|
|
|
|
extra parenthesis are inserted to unambiguiusly show how the |
398
|
|
|
|
|
|
|
complex expression is interpreted. |
399
|
|
|
|
|
|
|
If names have been loaded with add_alias, before calling tokenize, |
400
|
|
|
|
|
|
|
all names and aliases are recognized as one token. E.g. "GPL 2.0 or later" |
401
|
|
|
|
|
|
|
would be split as ["GPL 2.0", "or", "later"] otherwise. |
402
|
|
|
|
|
|
|
No name mapping is performed here. |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=cut |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
sub _tokenize_linear |
408
|
|
|
|
|
|
|
{ |
409
|
2725
|
|
|
2725
|
|
3905
|
my ($text) = @_; |
410
|
2725
|
|
|
|
|
8302
|
$text =~ s{\s+}{ }g; # normalize whitespace |
411
|
2725
|
|
|
|
|
1011728
|
my @a = ($text =~ m{\s*(.*?)?\s*(;|\||&|\bor\b|\band\b|<<|\(|\)|$)}gi); |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
## the above regexp often returns ['somthing', '', '', ''] |
414
|
|
|
|
|
|
|
## remove the empty trailers. |
415
|
2725
|
|
66
|
|
|
13931
|
while ((scalar @a) and ($a[-1] eq '')) |
416
|
|
|
|
|
|
|
{ |
417
|
7957
|
|
|
|
|
40906
|
pop @a; |
418
|
|
|
|
|
|
|
} |
419
|
2725
|
|
|
|
|
12087
|
return @a; |
420
|
|
|
|
|
|
|
} |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
sub tokenize |
423
|
|
|
|
|
|
|
{ |
424
|
6
|
|
|
6
|
1
|
604
|
my ($s, $text, $disambiguate) = @_; |
425
|
|
|
|
|
|
|
|
426
|
6
|
50
|
|
|
|
35
|
$text = "REJECT(?undefined($text)?)" unless $text =~ m{\w\w}; |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
#### accept a comma instead of a semicolon, unless there are semicolons. |
429
|
|
|
|
|
|
|
## Not done, we have to digest this: "The PHP License, version 3.01" |
430
|
|
|
|
|
|
|
## $text =~ s{,}{;} unless $text =~ m{;}; |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
## tokenize the expression by cutting at all operators and parenthesis. |
433
|
|
|
|
|
|
|
## we cut before and after such operators and parenthesis, so that we |
434
|
|
|
|
|
|
|
## do not lose anything by cutting. |
435
|
|
|
|
|
|
|
|
436
|
6
|
|
|
|
|
20
|
my @a = _tokenize_linear($text); |
437
|
6
|
|
|
|
|
16
|
my $i = 0; |
438
|
6
|
|
|
|
|
24
|
for (; $i <= $#a; $i++) # this may shorten while we walk along. |
439
|
|
|
|
|
|
|
{ |
440
|
92
|
100
|
|
|
|
343
|
if (my $m = $s->{licensemap}{tok}{lc $a[$i]}) |
441
|
|
|
|
|
|
|
{ |
442
|
14
|
|
|
|
|
42
|
for my $k (keys %$m) |
443
|
|
|
|
|
|
|
{ |
444
|
62
|
|
|
|
|
77
|
my $match = 1; |
445
|
62
|
|
|
|
|
64
|
for my $j (1..$#{$m->{$k}}) |
|
62
|
|
|
|
|
144
|
|
446
|
|
|
|
|
|
|
{ |
447
|
84
|
100
|
|
|
|
219
|
if ($a[$i+$j] ne $m->{$k}[$j]) |
448
|
|
|
|
|
|
|
{ |
449
|
62
|
|
|
|
|
73
|
$match = 0; |
450
|
62
|
|
|
|
|
86
|
last; |
451
|
|
|
|
|
|
|
} |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
|
454
|
62
|
50
|
|
|
|
191
|
if ($match) |
455
|
|
|
|
|
|
|
{ |
456
|
|
|
|
|
|
|
# Undo tokenization: |
457
|
|
|
|
|
|
|
# Replace tokenized version with original license name |
458
|
0
|
|
|
|
|
0
|
splice @a, $i, (scalar @{$m->{$k}}), $k; |
|
0
|
|
|
|
|
0
|
|
459
|
0
|
|
|
|
|
0
|
last; |
460
|
|
|
|
|
|
|
} |
461
|
|
|
|
|
|
|
} |
462
|
|
|
|
|
|
|
} |
463
|
92
|
100
|
|
|
|
202
|
$a[$i] = 'and' if $a[$i] eq '&'; |
464
|
92
|
100
|
|
|
|
319
|
$a[$i] = 'or' if $a[$i] eq '|'; |
465
|
|
|
|
|
|
|
} |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
## before we group tokens, we pull back license names that contain or. |
468
|
|
|
|
|
|
|
|
469
|
6
|
100
|
|
|
|
23
|
$s->{disambiguate}++ if $disambiguate; |
470
|
6
|
|
|
|
|
30
|
my $r = [ $s->_group_tokens(0, @a) ]; |
471
|
6
|
100
|
|
|
|
21
|
$s->{disambiguate}-- if $disambiguate; |
472
|
6
|
|
|
|
|
33
|
return $r; |
473
|
|
|
|
|
|
|
} |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
sub _group_tokens |
476
|
|
|
|
|
|
|
{ |
477
|
10
|
|
|
10
|
|
128
|
my ($s, $l, @a) = @_; |
478
|
10
|
|
50
|
|
|
50
|
$s->{debug} ||= 0; # manually enable debugging here, in new(). |
479
|
|
|
|
|
|
|
|
480
|
10
|
|
|
|
|
18
|
push @a, ''; # helps flushing $arr |
481
|
|
|
|
|
|
|
|
482
|
10
|
|
|
|
|
17
|
my @r; |
483
|
10
|
|
|
|
|
18
|
my $arr = []; |
484
|
10
|
|
|
|
|
24
|
my $in_word = 0; |
485
|
10
|
|
|
|
|
13
|
my $in_parens = 0; |
486
|
|
|
|
|
|
|
|
487
|
10
|
|
|
|
|
24
|
for my $a (@a) |
488
|
|
|
|
|
|
|
{ |
489
|
132
|
100
|
|
|
|
347
|
$in_parens++ if ($a =~ m{\(}); |
490
|
132
|
100
|
66
|
|
|
327
|
$in_parens-- if ($a =~ m{\)}) and $in_parens; |
491
|
132
|
100
|
100
|
|
|
499
|
$in_word++ if ($a =~ m{\w}) and !$in_parens; |
492
|
|
|
|
|
|
|
|
493
|
132
|
50
|
|
|
|
271
|
carp "$l: a='$a' in_parens=$in_parens in_word=$in_word\n" if $s->{debug}; |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
## operators, but not parenthesis |
496
|
|
|
|
|
|
|
## must include the empty string here! |
497
|
132
|
100
|
100
|
|
|
640
|
if ($a =~ m{^(;|\||&|\bor\b|\band\b|<<|)$}i and !$in_parens) |
498
|
|
|
|
|
|
|
{ |
499
|
36
|
50
|
|
|
|
79
|
carp "$l: emit [@$arr]\n" if $s->{debug}; |
500
|
36
|
100
|
|
|
|
74
|
if (scalar @$arr) |
501
|
|
|
|
|
|
|
{ |
502
|
30
|
100
|
|
|
|
61
|
if ($in_word) |
503
|
|
|
|
|
|
|
{ |
504
|
|
|
|
|
|
|
# put whitespace around some operators, so that it looks nicer. |
505
|
|
|
|
|
|
|
# ; only has a trailing whitespace, << has no whitespaces. |
506
|
|
|
|
|
|
|
# KEEP IN SYNC with format_tokens() |
507
|
26
|
100
|
|
|
|
41
|
map { $_ = " $1 " if /^(;|\||&|\bor\b|\band\b)$/; s{^ ; $}{; } } @$arr; |
|
52
|
|
|
|
|
172
|
|
|
52
|
|
|
|
|
124
|
|
508
|
26
|
|
|
|
|
72
|
push @r, join '', @$arr; |
509
|
|
|
|
|
|
|
} |
510
|
|
|
|
|
|
|
else |
511
|
|
|
|
|
|
|
{ |
512
|
|
|
|
|
|
|
## must be an expression in parenthesis |
513
|
4
|
50
|
33
|
|
|
22
|
unless ($arr->[0] eq '(' and $arr->[-1] eq ')') |
514
|
|
|
|
|
|
|
{ |
515
|
0
|
|
|
|
|
0
|
my $msg = "parse error: not in_word, and not in parens: a='$a' [@$arr]"; |
516
|
0
|
|
|
|
|
0
|
push @{$s->{diagnostics}}, $msg; |
|
0
|
|
|
|
|
0
|
|
517
|
0
|
0
|
|
|
|
0
|
carp "$msg\n" if $s->{debug}; |
518
|
|
|
|
|
|
|
} |
519
|
4
|
50
|
|
|
|
11
|
shift @$arr if $arr->[0] eq '('; |
520
|
4
|
50
|
|
|
|
11
|
pop @$arr if $arr->[-1] eq ')'; |
521
|
4
|
50
|
|
|
|
9
|
carp "$l: recursion into [@$arr]\n" if $s->{debug}; |
522
|
4
|
|
|
|
|
23
|
push @r, [ $s->_group_tokens($l+1, @$arr) ]; |
523
|
|
|
|
|
|
|
} |
524
|
|
|
|
|
|
|
} |
525
|
36
|
|
|
|
|
60
|
$arr = []; |
526
|
36
|
|
|
|
|
70
|
$in_word = 0 |
527
|
|
|
|
|
|
|
} |
528
|
132
|
100
|
100
|
|
|
563
|
if ($in_word or $in_parens or $a eq ')') |
|
|
|
100
|
|
|
|
|
529
|
|
|
|
|
|
|
{ |
530
|
96
|
50
|
|
|
|
196
|
carp "$l: add '$a' to [@$arr]\n" if $s->{debug}; |
531
|
96
|
50
|
100
|
|
|
254
|
if ($a eq ')' and !$in_parens and !@$arr) |
|
|
|
66
|
|
|
|
|
532
|
|
|
|
|
|
|
{ |
533
|
0
|
|
|
|
|
0
|
my $msg = "parse error: bogus '$a'"; |
534
|
0
|
|
|
|
|
0
|
push @{$s->{diagnostics}}, $msg; |
|
0
|
|
|
|
|
0
|
|
535
|
0
|
0
|
|
|
|
0
|
carp "$msg\n" if $s->{debug}; |
536
|
|
|
|
|
|
|
} |
537
|
96
|
100
|
|
|
|
296
|
push @$arr, $a if length $a; |
538
|
|
|
|
|
|
|
} |
539
|
|
|
|
|
|
|
else |
540
|
|
|
|
|
|
|
{ |
541
|
36
|
50
|
|
|
|
89
|
carp "$l: emit '$a'\n" if $s->{debug}; |
542
|
36
|
100
|
|
|
|
117
|
push @r, $a if length $a; |
543
|
|
|
|
|
|
|
} |
544
|
|
|
|
|
|
|
} |
545
|
|
|
|
|
|
|
|
546
|
10
|
50
|
|
|
|
27
|
if ($in_parens) |
547
|
|
|
|
|
|
|
{ |
548
|
0
|
|
|
|
|
0
|
my $msg = "parse error: missing closing ')'"; |
549
|
0
|
|
|
|
|
0
|
push @{$s->{diagnostics}}, $msg; |
|
0
|
|
|
|
|
0
|
|
550
|
0
|
0
|
|
|
|
0
|
carp "$msg\n" if $s->{debug}; |
551
|
0
|
|
|
|
|
0
|
return $s->_group_tokens($l, @a, ')') |
552
|
|
|
|
|
|
|
} |
553
|
|
|
|
|
|
|
|
554
|
10
|
100
|
|
|
|
25
|
if ($s->{disambiguate}) |
555
|
|
|
|
|
|
|
{ |
556
|
|
|
|
|
|
|
## the ordering here defines operator precedence. |
557
|
4
|
|
|
|
|
9
|
for my $op ('<<', '&', 'and', '|', 'or', ';') |
558
|
|
|
|
|
|
|
{ |
559
|
24
|
|
|
|
|
50
|
@r = _disambiguate($op, @r); |
560
|
|
|
|
|
|
|
} |
561
|
|
|
|
|
|
|
} |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
## we remove extra parens unconditionally. |
564
|
10
|
|
100
|
|
|
54
|
while (scalar(@r) == 1 and ref $r[0] eq 'ARRAY') |
565
|
|
|
|
|
|
|
{ |
566
|
4
|
50
|
|
|
|
13
|
if ($s->{debug}) |
567
|
|
|
|
|
|
|
{ |
568
|
5
|
|
|
5
|
|
65
|
use Data::Dumper; |
|
5
|
|
|
|
|
13
|
|
|
5
|
|
|
|
|
23294
|
|
569
|
0
|
|
|
|
|
0
|
warn "removing extra parens from" . Dumper(\@r). Dumper $r[0]; |
570
|
|
|
|
|
|
|
} |
571
|
4
|
|
|
|
|
4
|
@r = @{$r[0]}; |
|
4
|
|
|
|
|
23
|
|
572
|
|
|
|
|
|
|
} |
573
|
10
|
|
|
|
|
58
|
return @r; |
574
|
|
|
|
|
|
|
} |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
## find stretches of indentical operators $tok |
577
|
|
|
|
|
|
|
## and replace them by one sub-array each, containing the same. |
578
|
|
|
|
|
|
|
sub _disambiguate |
579
|
|
|
|
|
|
|
{ |
580
|
24
|
|
|
24
|
|
53
|
my ($tok, @a) = @_; |
581
|
|
|
|
|
|
|
|
582
|
24
|
|
|
|
|
27
|
my $i; |
583
|
24
|
|
|
|
|
58
|
for ($i = 1; $i <= $#a; $i+= 2) |
584
|
|
|
|
|
|
|
{ |
585
|
36
|
100
|
66
|
|
|
225
|
if (defined($a[$i]) && $a[$i] eq $tok) |
586
|
|
|
|
|
|
|
{ |
587
|
8
|
|
|
|
|
10
|
my $e = $i; |
588
|
8
|
|
100
|
|
|
39
|
while (defined($a[$e+2]) && $a[$e+2] eq $tok) { $e += 2; } |
|
2
|
|
|
|
|
7
|
|
589
|
8
|
|
|
|
|
59
|
splice @a, $i-1, $e-$i+3, [ @a[$i-1 .. $e+1] ]; |
590
|
|
|
|
|
|
|
# assert: $a[$i+2] cannot be $tok now. |
591
|
|
|
|
|
|
|
} |
592
|
|
|
|
|
|
|
} |
593
|
24
|
|
|
|
|
126
|
return @a; |
594
|
|
|
|
|
|
|
} |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
=head2 format_tokens |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
reverse operation of tokenize() |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
=cut |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
sub _map_license_name |
603
|
|
|
|
|
|
|
{ |
604
|
26
|
|
|
26
|
|
60
|
my ($s, $name, $prev_op) = @_; |
605
|
26
|
|
|
|
|
29
|
my $parens; |
606
|
|
|
|
|
|
|
|
607
|
26
|
|
|
|
|
32
|
my $origname = $name; |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
# used as a flag, so that we know if we mapped the name at least once. |
610
|
26
|
|
|
|
|
29
|
my $mapped; |
611
|
|
|
|
|
|
|
# we try the mapping three times: |
612
|
|
|
|
|
|
|
# first including any parenthesis |
613
|
|
|
|
|
|
|
# second: after splitting parethetical description |
614
|
|
|
|
|
|
|
# third: after shaping into possibly conforming syntax |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
{ |
617
|
26
|
|
|
|
|
30
|
my $new = $s->{licensemap}{ex}{$name}; |
|
26
|
|
|
|
|
81
|
|
618
|
26
|
100
|
|
|
|
121
|
$new = $s->{licensemap}{lc}{lc $name} unless defined $new; |
619
|
26
|
100
|
|
|
|
76
|
$mapped = $name = $new if defined $new; |
620
|
|
|
|
|
|
|
} |
621
|
|
|
|
|
|
|
|
622
|
26
|
50
|
|
|
|
256
|
($name,$parens) = ($1,$3) if $name =~ m{^\s*(.*?)\s*(\((.*)\))?\s*$}; |
623
|
26
|
50
|
|
|
|
69
|
$origname = $name unless $name eq 'REJECT'; |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
## allow for underscores in these name, to make vim users happy. |
626
|
26
|
|
|
|
|
85
|
$name =~ s{(PERMISSIVE|NON|COPYLEFT)[-_]OSI[-_]COMPLIANT}{$1-OSI-COMPLIANT}g; |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
{ |
630
|
26
|
|
|
|
|
33
|
my $new = $s->{licensemap}{ex}{$name}; |
|
26
|
|
|
|
|
73
|
|
631
|
26
|
100
|
|
|
|
69
|
$new = $s->{licensemap}{lc}{lc $name} unless defined $new; |
632
|
26
|
100
|
|
|
|
76
|
$mapped = $name = $new if defined $new; |
633
|
|
|
|
|
|
|
} |
634
|
|
|
|
|
|
|
|
635
|
26
|
|
|
|
|
29
|
if (1) |
636
|
|
|
|
|
|
|
{ |
637
|
|
|
|
|
|
|
## policy: version numbers are appended with '-', not with 'v', ' V' |
638
|
|
|
|
|
|
|
## version numbers do not end in '.0' |
639
|
|
|
|
|
|
|
# LGPLv2.1 -> LGPL-2.1 |
640
|
|
|
|
|
|
|
# Apache V2.0 -> Apache-2.0 |
641
|
|
|
|
|
|
|
### |
642
|
26
|
100
|
|
|
|
98
|
$name =~ s{(\w+)(-v|-V| V| v|v| )(\d[\.\d]*\+?|\d+\.\d[-~\w]*\+?)\s*$}{$1-$3} unless $s->{licensemap}{lc}{lc $name}; |
643
|
26
|
100
|
|
|
|
113
|
$name =~ s{(\d)\.0$}{$1} unless $s->{licensemap}{lc}{lc $name}; |
644
|
|
|
|
|
|
|
} |
645
|
|
|
|
|
|
|
else # this is old policy, we do it the other way round now. |
646
|
|
|
|
|
|
|
{ |
647
|
|
|
|
|
|
|
## policy: version numbers are appended with a lower case v, |
648
|
|
|
|
|
|
|
## if the name is all caps and without white space. |
649
|
|
|
|
|
|
|
## otherwise append with space and capital V. |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
## LGPL-2.1 -> LGPL v2.1 -> LGPLv2.1 |
652
|
|
|
|
|
|
|
## LGPL-V2.1 -> LGPL v2.1 -> LGPLv2.1 |
653
|
|
|
|
|
|
|
## GPL-2+ -> GPL v2+ -> GPLv2+ |
654
|
|
|
|
|
|
|
## Apache-2.0 -> Apache v2.0 -> Apache V2.0 |
655
|
|
|
|
|
|
|
## do this only, if it really looks like a version number. |
656
|
|
|
|
|
|
|
## e.g. XXX-3 XXX-3.0~alpha BUT NOT vision-3d or BSD-4clause |
657
|
|
|
|
|
|
|
$name =~ s{(\w+)-[vV]?(\d[\.\d]*\+?|\d+\.\d[-~\w]*\+?)\s*$}{$1 v$2}; |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
## LGPL v2.1 -> LGPLv2.1 |
661
|
|
|
|
|
|
|
## LGPL 2.1+ -> LGPLv2.1+ |
662
|
|
|
|
|
|
|
## PERMISSIVE -> PERMISSIVE |
663
|
|
|
|
|
|
|
unless ($name =~ s{^([A-Z_\d\.-]+)\s*[vV ](\d\S*?)$}{$1v$2}) |
664
|
|
|
|
|
|
|
{ |
665
|
|
|
|
|
|
|
## CC BY-SA v3.5 -> CC BY-SA V3.5 |
666
|
|
|
|
|
|
|
## Apache v2.0 -> Apache V2.0 |
667
|
|
|
|
|
|
|
$name =~ s{^(.*\S)\s*[vV ](\d\S*?)$}{$1 V$2}; |
668
|
|
|
|
|
|
|
} |
669
|
|
|
|
|
|
|
} |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
## |
672
|
|
|
|
|
|
|
## policy: modifiers are all lower case, licenses start upper case. |
673
|
26
|
100
|
100
|
|
|
111
|
if (($prev_op||'') eq '<<') |
674
|
|
|
|
|
|
|
{ |
675
|
4
|
|
|
|
|
9
|
$name = lc $name; |
676
|
|
|
|
|
|
|
} |
677
|
|
|
|
|
|
|
else |
678
|
|
|
|
|
|
|
{ |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
{ |
681
|
22
|
|
|
|
|
25
|
my $new = $s->{licensemap}{ex}{$name}; |
|
22
|
|
|
|
|
60
|
|
682
|
22
|
100
|
|
|
|
53
|
$new = $s->{licensemap}{lc}{lc $name} unless defined $new; |
683
|
22
|
100
|
|
|
|
67
|
$mapped = $name = $new if defined $new; |
684
|
|
|
|
|
|
|
} |
685
|
|
|
|
|
|
|
|
686
|
22
|
100
|
|
|
|
44
|
unless (defined $mapped) |
687
|
|
|
|
|
|
|
{ |
688
|
|
|
|
|
|
|
# names not in the mapping table get an questionmark! |
689
|
4
|
|
|
|
|
9
|
$name = ucfirst $origname; |
690
|
4
|
50
|
|
|
|
18
|
$name = '?' . $name unless $name =~ m{^\?}; |
691
|
4
|
50
|
|
|
|
13
|
$name = $name . '?' unless $name =~ m{\?$}; |
692
|
4
|
|
|
|
|
7
|
push @{$s->{diagnostics}}, "unknown name: '$origname'"; |
|
4
|
|
|
|
|
14
|
|
693
|
|
|
|
|
|
|
} |
694
|
|
|
|
|
|
|
else |
695
|
|
|
|
|
|
|
{ |
696
|
18
|
|
|
|
|
45
|
$name = ucfirst $name; |
697
|
|
|
|
|
|
|
} |
698
|
|
|
|
|
|
|
} |
699
|
26
|
50
|
33
|
|
|
68
|
if ($name eq 'REJECT' and $origname !~ m{REJECT}) |
700
|
|
|
|
|
|
|
{ |
701
|
0
|
|
|
|
|
0
|
$parens = "?$origname?"; |
702
|
0
|
|
|
|
|
0
|
push @{$s->{diagnostics}}, "rejected name: '$origname'"; |
|
0
|
|
|
|
|
0
|
|
703
|
|
|
|
|
|
|
} |
704
|
26
|
100
|
|
|
|
61
|
$name .= "($parens)" if $parens; |
705
|
26
|
|
|
|
|
81
|
return $name; |
706
|
|
|
|
|
|
|
} |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
sub format_tokens |
709
|
|
|
|
|
|
|
{ |
710
|
14
|
|
|
14
|
1
|
9104
|
my ($s, $aa) = @_; |
711
|
14
|
|
|
|
|
19
|
my @a; |
712
|
14
|
|
|
|
|
29
|
for my $a (@$aa) |
713
|
|
|
|
|
|
|
{ |
714
|
54
|
100
|
|
|
|
118
|
if (ref $a) |
715
|
|
|
|
|
|
|
{ |
716
|
8
|
|
|
|
|
24
|
push @a, '(', $s->format_tokens($a), ')'; |
717
|
|
|
|
|
|
|
} |
718
|
|
|
|
|
|
|
else |
719
|
|
|
|
|
|
|
{ |
720
|
46
|
100
|
100
|
|
|
296
|
if ($a =~ m{\w\w} and $a !~ m{^(and|or)$}i) |
721
|
|
|
|
|
|
|
{ |
722
|
26
|
100
|
|
|
|
101
|
$a = $s->_map_license_name($a, @a?$a[-1]:undef); |
723
|
|
|
|
|
|
|
} |
724
|
|
|
|
|
|
|
else |
725
|
|
|
|
|
|
|
{ |
726
|
|
|
|
|
|
|
# put whitespace around some operators, so that it looks nicer. |
727
|
|
|
|
|
|
|
# ; only has a trailing whitespace, << has no whitespaces. |
728
|
|
|
|
|
|
|
# KEEP IN SYNC with _group_tokens() if in_word |
729
|
20
|
|
|
|
|
114
|
$a =~ s{^(;|\||&|or|and)$}{ $1 }i; $a =~ s{^ ; $}{; }; |
|
20
|
|
|
|
|
59
|
|
730
|
|
|
|
|
|
|
} |
731
|
46
|
|
|
|
|
117
|
push @a, $a; |
732
|
|
|
|
|
|
|
} |
733
|
|
|
|
|
|
|
} |
734
|
14
|
|
|
|
|
80
|
return join '', @a; |
735
|
|
|
|
|
|
|
} |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
=head2 loadmap_csv |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
$obj->loadmap_csv('license_map.csv', 'as'); |
740
|
|
|
|
|
|
|
$obj->loadmap_csv('synopsis.csv', 'lauaas#c'); |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
Object method to |
743
|
|
|
|
|
|
|
load (or merge) contents of a CVS table into the object. |
744
|
|
|
|
|
|
|
This uses a trivial csv parser. Field seperator must be ; |
745
|
|
|
|
|
|
|
linebreaks are record seperators, and the first line is ignored, |
746
|
|
|
|
|
|
|
if it starts with '#'. |
747
|
|
|
|
|
|
|
Fields can be surrounded by doublequotes, if a comma may be embedded. |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
The second parameter is a field template, defining what the meaning of the fields is. |
750
|
|
|
|
|
|
|
l Long name (none or once). This is a speaking name of the License: Example "Creative Commons Attribution 1.0" |
751
|
|
|
|
|
|
|
a Alias name (any number). Any other name by which the license is known: Example: "CC-BY 1.0" |
752
|
|
|
|
|
|
|
s Short name (once). The canonical (unique) short license identifiner: Example: "CC-BY-1" |
753
|
|
|
|
|
|
|
u URL (any). Multiple URLs can also be written in one filed, seperated by whitespace. |
754
|
|
|
|
|
|
|
# License classification number (none or once). (1..5) |
755
|
|
|
|
|
|
|
c Comment (none or once) |
756
|
|
|
|
|
|
|
The default template is "as", an alias, followed by the canonical short name. |
757
|
|
|
|
|
|
|
Empty fields are ignored, as well as fields that contian only one '?'. Thus you |
758
|
|
|
|
|
|
|
can use records like |
759
|
|
|
|
|
|
|
"","Name" |
760
|
|
|
|
|
|
|
to pass in a valid name without an alias. |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
See also new() for more details about mappings. |
763
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
=cut |
765
|
|
|
|
|
|
|
sub loadmap_csv |
766
|
|
|
|
|
|
|
{ |
767
|
3
|
|
|
3
|
1
|
20
|
my ($self,$file,$template) = @_; |
768
|
3
|
|
100
|
|
|
14
|
$template ||= "as"; |
769
|
|
|
|
|
|
|
|
770
|
3
|
|
|
|
|
15
|
my @colum_types = split(//, $template); |
771
|
3
|
|
|
|
|
8
|
my $canon_idx = undef; |
772
|
3
|
|
|
|
|
12
|
for my $i (0..$#colum_types) |
773
|
|
|
|
|
|
|
{ |
774
|
12
|
100
|
|
|
|
34
|
if ($colum_types[$i] eq 's') |
775
|
|
|
|
|
|
|
{ |
776
|
3
|
50
|
|
|
|
11
|
die "multiple canonical short name columns in template '$template'\n" if defined $canon_idx; |
777
|
3
|
|
|
|
|
7
|
$canon_idx = $i; |
778
|
|
|
|
|
|
|
} |
779
|
|
|
|
|
|
|
} |
780
|
3
|
50
|
|
|
|
10
|
die "no canonical short name columns in template '$template'\n" unless defined $canon_idx; |
781
|
|
|
|
|
|
|
|
782
|
3
|
50
|
|
|
|
156
|
open my $in, "<", $file or croak "open($file) failed: $!\n"; |
783
|
3
|
|
|
|
|
23
|
my %opts = ( binary => 1, sep_char => ',', empty_is_undef => 1, eol => $/ ); |
784
|
3
|
|
|
|
|
31
|
my $csv = Text::CSV->new(\%opts); |
785
|
3
|
|
|
|
|
393
|
my $line_no = 0; |
786
|
3
|
|
|
|
|
6
|
for (;;) |
787
|
|
|
|
|
|
|
{ |
788
|
325
|
|
|
|
|
3359
|
my $row = $csv->getline ($in); |
789
|
325
|
100
|
|
|
|
221086
|
last if $csv->eof(); |
790
|
322
|
100
|
|
|
|
2025
|
if (!$line_no++) |
791
|
|
|
|
|
|
|
{ |
792
|
|
|
|
|
|
|
# be forgiving, if we have errors while parsing the first line. |
793
|
|
|
|
|
|
|
# it may be a comment, or the seperator chacracter may be wrong. |
794
|
4
|
50
|
|
|
|
18
|
if (my @err = $csv->error_diag()) |
795
|
|
|
|
|
|
|
{ |
796
|
4
|
|
|
|
|
178
|
my $line = $csv->error_input(); |
797
|
4
|
100
|
66
|
|
|
75
|
if ($err[0] == 2025) |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
798
|
|
|
|
|
|
|
{ |
799
|
|
|
|
|
|
|
# no field seperator seen in the line |
800
|
1
|
50
|
|
|
|
4
|
if ($opts{sep_char} eq ',') |
801
|
|
|
|
|
|
|
{ |
802
|
1
|
|
|
|
|
3
|
$opts{sep_char} = ';'; |
803
|
1
|
|
|
|
|
9
|
POSIX::rewind $in; |
804
|
1
|
|
|
|
|
240
|
$csv = Text::CSV->new(\%opts); |
805
|
1
|
|
|
|
|
130
|
$line_no = 0; |
806
|
1
|
|
|
|
|
4
|
next; |
807
|
|
|
|
|
|
|
} |
808
|
|
|
|
|
|
|
else |
809
|
|
|
|
|
|
|
{ |
810
|
0
|
|
|
|
|
0
|
die "neither comma nor semicolon work as a field seperator.\n"; |
811
|
|
|
|
|
|
|
} |
812
|
|
|
|
|
|
|
} |
813
|
|
|
|
|
|
|
elsif (defined($line) and $line =~ s{^#\s*}{}) |
814
|
|
|
|
|
|
|
{ |
815
|
|
|
|
|
|
|
# can we do something with this header line now? |
816
|
2
|
|
|
|
|
14
|
next; |
817
|
|
|
|
|
|
|
} |
818
|
|
|
|
|
|
|
elsif ($err[0]) |
819
|
|
|
|
|
|
|
{ |
820
|
|
|
|
|
|
|
# sometimes we come here, although there is no error. |
821
|
|
|
|
|
|
|
# e.g. after restarting with changed sep_char, |
822
|
0
|
|
|
|
|
0
|
print Dumper @err, $line; |
823
|
0
|
|
|
|
|
0
|
next; |
824
|
|
|
|
|
|
|
} |
825
|
|
|
|
|
|
|
else |
826
|
|
|
|
|
|
|
{ |
827
|
|
|
|
|
|
|
# "# heading","fields","...." |
828
|
|
|
|
|
|
|
# "\160SPDX Full name","..." |
829
|
|
|
|
|
|
|
# " SPDX Full name","..." |
830
|
1
|
50
|
|
|
|
10
|
next if $row->[0] =~ m{^(\#|\W*SPDX)}; # heading |
831
|
|
|
|
|
|
|
} |
832
|
|
|
|
|
|
|
} |
833
|
|
|
|
|
|
|
} |
834
|
|
|
|
|
|
|
|
835
|
318
|
100
|
|
|
|
760
|
next unless defined $row->[$canon_idx]; # a dummy entry? |
836
|
|
|
|
|
|
|
|
837
|
309
|
|
|
|
|
410
|
my $alias_count = 0; |
838
|
309
|
|
|
|
|
3780
|
for my $i (0..$#colum_types) |
839
|
|
|
|
|
|
|
{ |
840
|
1632
|
100
|
|
|
|
4530
|
next unless defined $row->[$i]; |
841
|
1431
|
100
|
|
|
|
5578
|
next if $row->[$i] =~ m{^[\?\s-]+$}; |
842
|
1269
|
100
|
100
|
|
|
7144
|
if ($colum_types[$i] eq 'l' or |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
843
|
|
|
|
|
|
|
$colum_types[$i] eq 'a') |
844
|
|
|
|
|
|
|
{ |
845
|
644
|
|
|
|
|
2415
|
$self->add_alias($row->[$i], $row->[$canon_idx]); |
846
|
644
|
|
|
|
|
1317
|
$alias_count++; |
847
|
|
|
|
|
|
|
} |
848
|
|
|
|
|
|
|
elsif($colum_types[$i] eq 'u') |
849
|
|
|
|
|
|
|
{ |
850
|
149
|
|
|
|
|
518
|
$self->add_url($row->[$i], $row->[$canon_idx]); |
851
|
|
|
|
|
|
|
} |
852
|
|
|
|
|
|
|
elsif($colum_types[$i] eq 'c') |
853
|
|
|
|
|
|
|
{ |
854
|
0
|
|
|
|
|
0
|
$self->set_compat_class($row->[$i], $row->[$canon_idx]); |
855
|
|
|
|
|
|
|
} |
856
|
|
|
|
|
|
|
} |
857
|
309
|
100
|
|
|
|
905
|
unless ($alias_count) |
858
|
|
|
|
|
|
|
{ |
859
|
11
|
|
|
|
|
37
|
$self->add_alias(undef, $row->[$canon_idx]); |
860
|
|
|
|
|
|
|
} |
861
|
309
|
50
|
|
|
|
1021
|
last if $csv->eof(); |
862
|
|
|
|
|
|
|
} |
863
|
3
|
|
|
|
|
207
|
return $self; |
864
|
|
|
|
|
|
|
} |
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
sub _loadmap_csv_old |
867
|
|
|
|
|
|
|
{ |
868
|
0
|
|
|
0
|
|
|
my ($self,$file) = @_; |
869
|
|
|
|
|
|
|
|
870
|
0
|
0
|
|
|
|
|
open IN, "<", $file or croak "open($file) failed: $!\n"; |
871
|
0
|
|
|
|
|
|
my $linecount = 0; |
872
|
0
|
|
|
|
|
|
while (defined (my $line = )) |
873
|
|
|
|
|
|
|
{ |
874
|
0
|
|
|
|
|
|
chomp $line; |
875
|
0
|
0
|
0
|
|
|
|
next if $line =~ m{^#} and !$linecount++; |
876
|
0
|
0
|
0
|
|
|
|
if (($line =~ m{^"([^"]*)",\s*"([^"]*)"}) or |
|
|
0
|
0
|
|
|
|
|
877
|
|
|
|
|
|
|
($line =~ m{^([^,"]*),\s*([^,]*)})) |
878
|
|
|
|
|
|
|
{ |
879
|
|
|
|
|
|
|
# actual mapping from old name to new name |
880
|
0
|
|
|
|
|
|
$self->add_alias($1,$2); |
881
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
} |
883
|
|
|
|
|
|
|
elsif ($line =~ m{^("",\s*)?"([^"]*)"\s*$} or |
884
|
|
|
|
|
|
|
$line =~ m{^(,)?([^,"]*)\s*$}) |
885
|
|
|
|
|
|
|
{ |
886
|
|
|
|
|
|
|
# simple mentioning of good ones, needs no mapping |
887
|
0
|
|
|
|
|
|
$self->add_alias(undef,$2); |
888
|
|
|
|
|
|
|
} |
889
|
|
|
|
|
|
|
else |
890
|
|
|
|
|
|
|
{ |
891
|
0
|
|
|
|
|
|
die "$file:$linecount:\n\t$line\n not my csv syntax."; |
892
|
|
|
|
|
|
|
} |
893
|
|
|
|
|
|
|
} |
894
|
0
|
|
|
|
|
|
return $self; |
895
|
|
|
|
|
|
|
} |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
=head1 AUTHOR |
898
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
Juergen Weigert, C<< >> |
900
|
|
|
|
|
|
|
|
901
|
|
|
|
|
|
|
=head1 BUGS |
902
|
|
|
|
|
|
|
|
903
|
|
|
|
|
|
|
This module defines a different syntax than |
904
|
|
|
|
|
|
|
http://rpmlint.zarb.org/cgi-bin/trac.cgi/browser/trunk/TagsCheck.py |
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
Please report any bugs or feature requests to C, or through |
907
|
|
|
|
|
|
|
the web interface at L. I will be notified, and then you'll |
908
|
|
|
|
|
|
|
automatically be notified of progress on your bug as I make changes. |
909
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
=head1 SUPPORT |
914
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
916
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
perldoc License::Syntax |
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
You can also look for information at: |
921
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
=over 4 |
923
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker |
925
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
L |
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
L |
931
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
=item * CPAN Ratings |
933
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
L |
935
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
=item * Search CPAN |
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
L |
939
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
=back |
941
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
944
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
|
946
|
|
|
|
|
|
|
=head1 COPYRIGHT & LICENSE |
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
Copyright 2009 Juergen Weigert. |
949
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
951
|
|
|
|
|
|
|
under the terms of either: the GNU General Public License as published |
952
|
|
|
|
|
|
|
by the Free Software Foundation; or the Artistic License. |
953
|
|
|
|
|
|
|
|
954
|
|
|
|
|
|
|
See http://dev.perl.org/licenses/ for more information. |
955
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
=cut |
958
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
1; # End of License::Syntax |