| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Data::Sah::Normalize; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY |
|
4
|
|
|
|
|
|
|
our $DATE = '2021-07-29'; # DATE |
|
5
|
|
|
|
|
|
|
our $DIST = 'Data-Sah-Normalize'; # DIST |
|
6
|
|
|
|
|
|
|
our $VERSION = '0.051'; # VERSION |
|
7
|
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
75960
|
use 5.010001; |
|
|
1
|
|
|
|
|
15
|
|
|
9
|
1
|
|
|
1
|
|
6
|
use strict; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
41
|
|
|
10
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
1451
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
require Exporter; |
|
13
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
|
14
|
|
|
|
|
|
|
our @EXPORT_OK = qw( |
|
15
|
|
|
|
|
|
|
normalize_clset |
|
16
|
|
|
|
|
|
|
normalize_schema |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
$type_re |
|
19
|
|
|
|
|
|
|
$clause_name_re |
|
20
|
|
|
|
|
|
|
$clause_re |
|
21
|
|
|
|
|
|
|
$attr_re |
|
22
|
|
|
|
|
|
|
$funcset_re |
|
23
|
|
|
|
|
|
|
$compiler_re |
|
24
|
|
|
|
|
|
|
); |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
our $type_re = qr/\A(?:[A-Za-z_]\w*::)*[A-Za-z_]\w*\z/; |
|
27
|
|
|
|
|
|
|
our $clause_name_re = qr/\A[A-Za-z_]\w*\z/; |
|
28
|
|
|
|
|
|
|
our $clause_re = qr/\A[A-Za-z_]\w*(?:\.[A-Za-z_]\w*)*\z/; |
|
29
|
|
|
|
|
|
|
our $attr_re = $clause_re; |
|
30
|
|
|
|
|
|
|
our $funcset_re = qr/\A(?:[A-Za-z_]\w*::)*[A-Za-z_]\w*\z/; |
|
31
|
|
|
|
|
|
|
our $compiler_re = qr/\A[A-Za-z_]\w*\z/; |
|
32
|
|
|
|
|
|
|
our $clause_attr_on_empty_clause_re = qr/\A(?:\.[A-Za-z_]\w*)+\z/; |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub normalize_clset($;$) { |
|
35
|
3
|
|
|
3
|
1
|
110
|
my ($clset0, $opts) = @_; |
|
36
|
3
|
|
100
|
|
|
12
|
$opts //= {}; |
|
37
|
|
|
|
|
|
|
|
|
38
|
3
|
|
|
|
|
7
|
my $clset = {}; |
|
39
|
3
|
|
|
|
|
15
|
for my $c (sort keys %$clset0) { |
|
40
|
5
|
|
|
|
|
10
|
my $c0 = $c; |
|
41
|
|
|
|
|
|
|
|
|
42
|
5
|
|
|
|
|
7
|
my $v = $clset0->{$c}; |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# ignore expression |
|
45
|
5
|
|
|
|
|
10
|
my $expr; |
|
46
|
5
|
100
|
|
|
|
14
|
if ($c =~ s/=\z//) { |
|
47
|
1
|
|
|
|
|
3
|
$expr++; |
|
48
|
|
|
|
|
|
|
# XXX currently can't disregard merge prefix when checking |
|
49
|
|
|
|
|
|
|
# conflict |
|
50
|
1
|
50
|
|
|
|
3
|
die "Conflict between '$c=' and '$c'" if exists $clset0->{$c}; |
|
51
|
1
|
|
|
|
|
4
|
$clset->{"$c.is_expr"} = 1; |
|
52
|
|
|
|
|
|
|
} |
|
53
|
|
|
|
|
|
|
|
|
54
|
5
|
|
|
|
|
7
|
my $sc = ""; |
|
55
|
5
|
|
|
|
|
7
|
my $cn; |
|
56
|
|
|
|
|
|
|
{ |
|
57
|
5
|
|
|
|
|
8
|
my $errp = "Invalid clause name syntax '$c0'"; # error prefix |
|
|
5
|
|
|
|
|
9
|
|
|
58
|
5
|
100
|
100
|
|
|
57
|
if (!$expr && $c =~ s/\A!(?=.)//) { |
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
59
|
1
|
50
|
|
|
|
22
|
die "$errp, syntax should be !CLAUSE" |
|
60
|
|
|
|
|
|
|
unless $c =~ $clause_name_re; |
|
61
|
1
|
|
|
|
|
3
|
$sc = "!"; |
|
62
|
|
|
|
|
|
|
} elsif (!$expr && $c =~ s/(?<=.)\|\z//) { |
|
63
|
1
|
50
|
|
|
|
6
|
die "$errp, syntax should be CLAUSE|" |
|
64
|
|
|
|
|
|
|
unless $c =~ $clause_name_re; |
|
65
|
1
|
|
|
|
|
3
|
$sc = "|"; |
|
66
|
|
|
|
|
|
|
} elsif (!$expr && $c =~ s/(?<=.)\&\z//) { |
|
67
|
1
|
50
|
|
|
|
6
|
die "$errp, syntax should be CLAUSE&" |
|
68
|
|
|
|
|
|
|
unless $c =~ $clause_name_re; |
|
69
|
1
|
|
|
|
|
3
|
$sc = "&"; |
|
70
|
|
|
|
|
|
|
} elsif (!$expr && $c =~ /\A([^.]+)(?:\.(.+))?\((\w+)\)\z/) { |
|
71
|
1
|
|
|
|
|
7
|
my ($c2, $a, $lang) = ($1, $2, $3); |
|
72
|
1
|
50
|
33
|
|
|
10
|
die "$errp, syntax should be CLAUSE(LANG) or C.ATTR(LANG)" |
|
|
|
|
33
|
|
|
|
|
|
73
|
|
|
|
|
|
|
unless $c2 =~ $clause_name_re && |
|
74
|
|
|
|
|
|
|
(!defined($a) || $a =~ $attr_re); |
|
75
|
1
|
|
|
|
|
3
|
$sc = "(LANG)"; |
|
76
|
1
|
50
|
|
|
|
5
|
$cn = $c2 . (defined($a) ? ".$a" : "") . ".alt.lang.$lang"; |
|
77
|
|
|
|
|
|
|
} elsif ($c !~ $clause_re && |
|
78
|
|
|
|
|
|
|
$c !~ $clause_attr_on_empty_clause_re) { |
|
79
|
0
|
|
|
|
|
0
|
die "$errp, please use letter/digit/underscore only"; |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
} |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
# XXX can't disregard merge prefix when checking conflict |
|
84
|
5
|
100
|
|
|
|
20
|
if ($sc eq '!') { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
die "Conflict between clause shortcuts '!$c' and '$c'" |
|
86
|
1
|
50
|
|
|
|
4
|
if exists $clset0->{$c}; |
|
87
|
|
|
|
|
|
|
die "Conflict between clause shortcuts '!$c' and '$c|'" |
|
88
|
1
|
50
|
|
|
|
5
|
if exists $clset0->{"$c|"}; |
|
89
|
|
|
|
|
|
|
die "Conflict between clause shortcuts '!$c' and '$c&'" |
|
90
|
1
|
50
|
|
|
|
5
|
if exists $clset0->{"$c&"}; |
|
91
|
1
|
|
|
|
|
3
|
$clset->{$c} = $v; |
|
92
|
1
|
|
|
|
|
4
|
$clset->{"$c.op"} = "not"; |
|
93
|
|
|
|
|
|
|
} elsif ($sc eq '&') { |
|
94
|
|
|
|
|
|
|
die "Conflict between clause shortcuts '$c&' and '$c'" |
|
95
|
1
|
50
|
|
|
|
4
|
if exists $clset0->{$c}; |
|
96
|
|
|
|
|
|
|
die "Conflict between clause shortcuts '$c&' and '$c|'" |
|
97
|
1
|
50
|
|
|
|
4
|
if exists $clset0->{"$c|"}; |
|
98
|
1
|
50
|
|
|
|
4
|
die "Clause 'c&' value must be an array" |
|
99
|
|
|
|
|
|
|
unless ref($v) eq 'ARRAY'; |
|
100
|
1
|
|
|
|
|
3
|
$clset->{$c} = $v; |
|
101
|
1
|
|
|
|
|
4
|
$clset->{"$c.op"} = "and"; |
|
102
|
|
|
|
|
|
|
} elsif ($sc eq '|') { |
|
103
|
|
|
|
|
|
|
die "Conflict between clause shortcuts '$c|' and '$c'" |
|
104
|
1
|
50
|
|
|
|
3
|
if exists $clset0->{$c}; |
|
105
|
1
|
50
|
|
|
|
5
|
die "Clause 'c|' value must be an array" |
|
106
|
|
|
|
|
|
|
unless ref($v) eq 'ARRAY'; |
|
107
|
1
|
|
|
|
|
2
|
$clset->{$c} = $v; |
|
108
|
1
|
|
|
|
|
3
|
$clset->{"$c.op"} = "or"; |
|
109
|
|
|
|
|
|
|
} elsif ($sc eq '(LANG)') { |
|
110
|
|
|
|
|
|
|
die "Conflict between clause '$c' and '$cn'" |
|
111
|
1
|
50
|
|
|
|
4
|
if exists $clset0->{$cn}; |
|
112
|
1
|
|
|
|
|
3
|
$clset->{$cn} = $v; |
|
113
|
|
|
|
|
|
|
} else { |
|
114
|
1
|
|
|
|
|
3
|
$clset->{$c} = $v; |
|
115
|
|
|
|
|
|
|
} |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
} |
|
118
|
3
|
50
|
|
|
|
10
|
$clset->{req} = 1 if $opts->{has_req}; |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# XXX option to recursively normalize clset, any's of, all's of, ... |
|
121
|
|
|
|
|
|
|
#if ($clset->{clset}) { |
|
122
|
|
|
|
|
|
|
# local $opts->{has_req}; |
|
123
|
|
|
|
|
|
|
# if ($clset->{'clset.op'} && $clset->{'clset.op'} =~ /and|or/) { |
|
124
|
|
|
|
|
|
|
# # multiple clause sets |
|
125
|
|
|
|
|
|
|
# $clset->{clset} = map { $self->normalize_clset($_, $opts) } |
|
126
|
|
|
|
|
|
|
# @{ $clset->{clset} }; |
|
127
|
|
|
|
|
|
|
# } else { |
|
128
|
|
|
|
|
|
|
# $clset->{clset} = $self->normalize_clset($_, $opts); |
|
129
|
|
|
|
|
|
|
# } |
|
130
|
|
|
|
|
|
|
#} |
|
131
|
|
|
|
|
|
|
|
|
132
|
3
|
|
|
|
|
19
|
$clset; |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
sub normalize_schema($) { |
|
136
|
4
|
|
|
4
|
1
|
992
|
my $s = shift; |
|
137
|
|
|
|
|
|
|
|
|
138
|
4
|
|
|
|
|
9
|
my $ref = ref($s); |
|
139
|
4
|
50
|
|
|
|
19
|
if (!defined($s)) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
|
|
141
|
0
|
|
|
|
|
0
|
die "Schema is missing"; |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
} elsif (!$ref) { |
|
144
|
|
|
|
|
|
|
|
|
145
|
2
|
|
|
|
|
10
|
my $has_req = $s =~ s/\*\z//; |
|
146
|
2
|
50
|
|
|
|
18
|
$s =~ $type_re or die "Invalid type syntax $s, please use ". |
|
147
|
|
|
|
|
|
|
"letter/digit/underscore only"; |
|
148
|
2
|
100
|
|
|
|
16
|
return [$s, $has_req ? {req=>1} : {}]; |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
} elsif ($ref eq 'ARRAY') { |
|
151
|
|
|
|
|
|
|
|
|
152
|
2
|
|
|
|
|
4
|
my $t = $s->[0]; |
|
153
|
2
|
|
33
|
|
|
13
|
my $has_req = $t && $t =~ s/\*\z//; |
|
154
|
2
|
50
|
|
|
|
8
|
if (!defined($t)) { |
|
|
|
50
|
|
|
|
|
|
|
155
|
0
|
|
|
|
|
0
|
die "For array form, at least 1 element is needed for type"; |
|
156
|
|
|
|
|
|
|
} elsif (ref $t) { |
|
157
|
0
|
|
|
|
|
0
|
die "For array form, first element must be a string"; |
|
158
|
|
|
|
|
|
|
} |
|
159
|
2
|
50
|
|
|
|
19
|
$t =~ $type_re or die "Invalid type syntax $s, please use ". |
|
160
|
|
|
|
|
|
|
"letter/digit/underscore only"; |
|
161
|
|
|
|
|
|
|
|
|
162
|
2
|
|
|
|
|
4
|
my $clset0; |
|
163
|
|
|
|
|
|
|
my $extras; |
|
164
|
2
|
50
|
|
|
|
5
|
if (defined($s->[1])) { |
|
165
|
2
|
50
|
|
|
|
6
|
if (ref($s->[1]) eq 'HASH') { |
|
166
|
2
|
|
|
|
|
5
|
$clset0 = $s->[1]; |
|
167
|
2
|
|
|
|
|
3
|
$extras = $s->[2]; |
|
168
|
2
|
50
|
|
|
|
6
|
die "For array form, there should not be more than 3 elements" |
|
169
|
|
|
|
|
|
|
if @$s > 3; |
|
170
|
|
|
|
|
|
|
} else { |
|
171
|
|
|
|
|
|
|
# flattened clause set [t, c=>1, c2=>2, ...] |
|
172
|
0
|
0
|
|
|
|
0
|
die "For array in the form of [t, c1=>1, ...], there must be ". |
|
173
|
|
|
|
|
|
|
"3 elements (or 5, 7, ...)" |
|
174
|
|
|
|
|
|
|
unless @$s % 2; |
|
175
|
0
|
|
|
|
|
0
|
$clset0 = { @{$s}[1..@$s-1] }; |
|
|
0
|
|
|
|
|
0
|
|
|
176
|
|
|
|
|
|
|
} |
|
177
|
|
|
|
|
|
|
} else { |
|
178
|
0
|
|
|
|
|
0
|
$clset0 = {}; |
|
179
|
|
|
|
|
|
|
} |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
# check clauses and parse shortcuts (!c, c&, c|, c=) |
|
182
|
2
|
|
|
|
|
8
|
my $clset = normalize_clset($clset0, {has_req=>$has_req}); |
|
183
|
2
|
50
|
|
|
|
7
|
if (defined $extras) { |
|
184
|
2
|
50
|
|
|
|
6
|
die "For array form with 3 elements, extras must be hash" |
|
185
|
|
|
|
|
|
|
unless ref($extras) eq 'HASH'; |
|
186
|
2
|
100
|
|
|
|
14
|
die "Extras must be empty hashref (Sah 0.9.47)" if keys %$extras; |
|
187
|
|
|
|
|
|
|
# we remove extras to comply with Sah 0.9.47+ |
|
188
|
1
|
|
|
|
|
8
|
return [$t, $clset]; |
|
189
|
|
|
|
|
|
|
} else { |
|
190
|
0
|
|
|
|
|
|
return [$t, $clset]; |
|
191
|
|
|
|
|
|
|
} |
|
192
|
|
|
|
|
|
|
} |
|
193
|
|
|
|
|
|
|
|
|
194
|
0
|
|
|
|
|
|
die "Schema must be a string or arrayref (not $ref)"; |
|
195
|
|
|
|
|
|
|
} |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
1; |
|
198
|
|
|
|
|
|
|
# ABSTRACT: Normalize Sah schema |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
__END__ |