| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# Copyright (c) 2025 Philipp Schafft |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
# licensed under Artistic License 2.0 (see LICENSE file) |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
# ABSTRACT: Compactor for superstrings |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
package String::Super; |
|
8
|
|
|
|
|
|
|
|
|
9
|
4
|
|
|
4
|
|
1470632
|
use v5.20; |
|
|
4
|
|
|
|
|
16
|
|
|
10
|
4
|
|
|
4
|
|
25
|
use strict; |
|
|
4
|
|
|
|
|
12
|
|
|
|
4
|
|
|
|
|
125
|
|
|
11
|
4
|
|
|
4
|
|
42
|
use warnings; |
|
|
4
|
|
|
|
|
7
|
|
|
|
4
|
|
|
|
|
276
|
|
|
12
|
|
|
|
|
|
|
|
|
13
|
4
|
|
|
4
|
|
39
|
use Carp; |
|
|
4
|
|
|
|
|
10
|
|
|
|
4
|
|
|
|
|
5623
|
|
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
our $VERSION = v0.02; |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
sub new { |
|
20
|
10
|
|
|
10
|
1
|
8277
|
my ($pkg, %opts) = @_; |
|
21
|
10
|
|
|
|
|
44
|
my $self = bless { |
|
22
|
|
|
|
|
|
|
strings => [], |
|
23
|
|
|
|
|
|
|
result => undef, |
|
24
|
|
|
|
|
|
|
keep_first => undef, |
|
25
|
|
|
|
|
|
|
}, $pkg; |
|
26
|
|
|
|
|
|
|
|
|
27
|
10
|
50
|
|
|
|
37
|
if (defined(my $prefix_blob = delete $opts{prefix_blob})) { |
|
28
|
0
|
|
|
|
|
0
|
$self->add_blob($prefix_blob); |
|
29
|
0
|
|
|
|
|
0
|
$self->{keep_first} = 1; |
|
30
|
|
|
|
|
|
|
} |
|
31
|
|
|
|
|
|
|
|
|
32
|
10
|
50
|
|
|
|
30
|
croak 'Stray options passed' if scalar keys %opts; |
|
33
|
|
|
|
|
|
|
|
|
34
|
10
|
|
|
|
|
30
|
return $self; |
|
35
|
|
|
|
|
|
|
} |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
sub add_blob { |
|
39
|
12
|
|
|
12
|
1
|
4116
|
my ($self, @blobs) = @_; |
|
40
|
12
|
|
|
|
|
20
|
my $res = scalar(@{$self->{strings}}); |
|
|
12
|
|
|
|
|
36
|
|
|
41
|
|
|
|
|
|
|
|
|
42
|
12
|
50
|
|
|
|
39
|
croak 'Already compacted' if defined $self->{result}; |
|
43
|
|
|
|
|
|
|
|
|
44
|
12
|
|
|
|
|
23
|
foreach my $blob (@blobs) { |
|
45
|
20
|
50
|
|
|
|
99
|
croak 'Provided blob is a reference' if ref $blob; |
|
46
|
|
|
|
|
|
|
} |
|
47
|
|
|
|
|
|
|
|
|
48
|
12
|
|
|
|
|
17
|
push(@{$self->{strings}}, @blobs); |
|
|
12
|
|
|
|
|
34
|
|
|
49
|
|
|
|
|
|
|
|
|
50
|
12
|
|
|
|
|
90
|
return $res; |
|
51
|
|
|
|
|
|
|
} |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
sub add_utf8 { |
|
55
|
2
|
|
|
2
|
1
|
468
|
require Encode; |
|
56
|
|
|
|
|
|
|
|
|
57
|
2
|
|
|
|
|
7
|
my ($self, @strings) = @_; |
|
58
|
2
|
|
|
|
|
7
|
state $UTF_8 = Encode::find_encoding('UTF-8'); |
|
59
|
|
|
|
|
|
|
|
|
60
|
2
|
|
|
|
|
52
|
foreach my $string (@strings) { |
|
61
|
2
|
50
|
|
|
|
7
|
croak 'Provided string is a reference' if ref $string; |
|
62
|
|
|
|
|
|
|
} |
|
63
|
|
|
|
|
|
|
|
|
64
|
2
|
|
|
|
|
6
|
return $self->add_blob(map {$UTF_8->encode($_)} @strings); |
|
|
2
|
|
|
|
|
17
|
|
|
65
|
|
|
|
|
|
|
} |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
sub compact { |
|
69
|
10
|
|
|
10
|
1
|
18
|
my ($self, %opts) = @_; |
|
70
|
|
|
|
|
|
|
|
|
71
|
10
|
50
|
|
|
|
26
|
croak 'Stray options passed' if scalar keys %opts; |
|
72
|
|
|
|
|
|
|
|
|
73
|
10
|
50
|
|
|
|
31
|
return if defined $self->{result}; |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
{ |
|
76
|
10
|
|
|
|
|
13
|
my @data = @{$self->{strings}}; |
|
|
10
|
|
|
|
|
19
|
|
|
|
10
|
|
|
|
|
30
|
|
|
77
|
10
|
50
|
|
|
|
28
|
my $j_start = $self->{keep_first} ? 1 : 0; |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
# eliminate all strings first that are already part of other strings |
|
80
|
|
|
|
|
|
|
outer: |
|
81
|
10
|
|
|
|
|
31
|
for (my $i = 0; $i < scalar(@data); $i++) { |
|
82
|
20
|
|
|
|
|
80
|
for (my $j = $j_start; $j < scalar(@data); $j++) { |
|
83
|
47
|
100
|
|
|
|
157
|
next if $i == $j; |
|
84
|
|
|
|
|
|
|
|
|
85
|
27
|
100
|
|
|
|
96
|
if (index($data[$i], $data[$j]) >= 0) { |
|
86
|
1
|
|
|
|
|
2
|
splice(@data, $j, 1); |
|
87
|
1
|
|
|
|
|
33
|
$i--; |
|
88
|
1
|
|
|
|
|
4
|
next outer; |
|
89
|
|
|
|
|
|
|
} |
|
90
|
|
|
|
|
|
|
} |
|
91
|
|
|
|
|
|
|
} |
|
92
|
|
|
|
|
|
|
|
|
93
|
10
|
|
|
|
|
28
|
for (my $n = 8; $n > 0; $n--) { |
|
94
|
80
|
|
|
|
|
169
|
$self->_compact_n(\@data, $n); |
|
95
|
|
|
|
|
|
|
} |
|
96
|
|
|
|
|
|
|
|
|
97
|
10
|
|
|
|
|
49
|
$self->{result} = join('', @data); |
|
98
|
|
|
|
|
|
|
} |
|
99
|
|
|
|
|
|
|
} |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
sub result { |
|
103
|
10
|
|
|
10
|
1
|
26
|
my ($self, %opts) = @_; |
|
104
|
|
|
|
|
|
|
|
|
105
|
10
|
50
|
|
|
|
33
|
croak 'Stray options passed' if scalar keys %opts; |
|
106
|
|
|
|
|
|
|
|
|
107
|
10
|
|
|
|
|
31
|
$self->compact; |
|
108
|
|
|
|
|
|
|
|
|
109
|
10
|
|
|
|
|
51
|
return $self->{result}; |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
sub offset { |
|
114
|
8
|
|
|
8
|
1
|
28
|
my ($self, @args) = @_; |
|
115
|
8
|
|
|
|
|
15
|
my @res; |
|
116
|
|
|
|
|
|
|
|
|
117
|
8
|
|
|
|
|
23
|
while (scalar(@args) >= 2) { |
|
118
|
8
|
|
|
|
|
18
|
my ($key, $value) = (shift(@args), shift(@args)); |
|
119
|
8
|
|
|
|
|
14
|
my $d; |
|
120
|
|
|
|
|
|
|
|
|
121
|
8
|
50
|
|
|
|
19
|
if ($key eq 'index') { |
|
122
|
8
|
|
|
|
|
16
|
$d = $self->{strings}[$value]; |
|
123
|
|
|
|
|
|
|
} else { |
|
124
|
0
|
|
|
|
|
0
|
croak 'Invalid type: '.$key; |
|
125
|
|
|
|
|
|
|
} |
|
126
|
|
|
|
|
|
|
|
|
127
|
8
|
50
|
|
|
|
40
|
if (!defined($d)) { |
|
|
|
50
|
|
|
|
|
|
|
128
|
0
|
|
|
|
|
0
|
croak 'Undefined value'; |
|
129
|
|
|
|
|
|
|
} elsif (ref($d)) { |
|
130
|
0
|
|
|
|
|
0
|
croak 'Not a valid value (reference passed as blob?)'; |
|
131
|
|
|
|
|
|
|
} |
|
132
|
|
|
|
|
|
|
|
|
133
|
8
|
|
|
|
|
22
|
$d = index($self->{result}, $d); |
|
134
|
8
|
50
|
|
|
|
16
|
if ($d < 0) { |
|
135
|
0
|
|
|
|
|
0
|
croak 'Substring not found (this is most likely a bug in the callers code)'; |
|
136
|
|
|
|
|
|
|
} |
|
137
|
|
|
|
|
|
|
|
|
138
|
8
|
|
|
|
|
23
|
push(@res, $d); |
|
139
|
|
|
|
|
|
|
} |
|
140
|
|
|
|
|
|
|
|
|
141
|
8
|
50
|
|
|
|
17
|
croak 'Stray options passed' if scalar @args; |
|
142
|
|
|
|
|
|
|
|
|
143
|
8
|
50
|
|
|
|
16
|
if (wantarray) { |
|
144
|
0
|
|
|
|
|
0
|
return @res; |
|
145
|
|
|
|
|
|
|
} else { |
|
146
|
8
|
50
|
|
|
|
23
|
croak 'Not exactly one result' unless scalar(@res) == 1; |
|
147
|
8
|
|
|
|
|
44
|
return $res[0]; |
|
148
|
|
|
|
|
|
|
} |
|
149
|
|
|
|
|
|
|
} |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
# ---- Private helpers ---- |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
sub _compact_n { |
|
154
|
80
|
|
|
80
|
|
134
|
my ($self, $data, $n) = @_; |
|
155
|
80
|
50
|
|
|
|
177
|
my $j_start = $self->{keep_first} ? 1 : 0; |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
outer: |
|
158
|
80
|
|
|
|
|
110
|
for (my $i = 0; $i < scalar(@{$data}); $i++) { |
|
|
222
|
|
|
|
|
525
|
|
|
159
|
142
|
|
|
|
|
229
|
my $suffix = substr($data->[$i], -$n); |
|
160
|
|
|
|
|
|
|
|
|
161
|
142
|
100
|
|
|
|
270
|
next if length($suffix) != $n; |
|
162
|
|
|
|
|
|
|
|
|
163
|
67
|
|
|
|
|
120
|
for (my $j = $j_start; $j < scalar(@{$data}); $j++) { |
|
|
199
|
|
|
|
|
392
|
|
|
164
|
138
|
100
|
|
|
|
330
|
next if $i == $j; |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
#warn sprintf('%u, %u, -> %s, %s', $i, $j, defined($data->[$i]) ? 't' : 'f', defined($data->[$j]) ? 't' : 'f') unless defined($data->[$i]) && defined($data->[$j]); |
|
167
|
72
|
100
|
|
|
|
198
|
if ($suffix eq substr($data->[$j], 0, $n)) { |
|
168
|
6
|
|
|
|
|
19
|
substr($data->[$i], -$n, $n, $data->[$j]); |
|
169
|
6
|
|
|
|
|
10
|
splice(@{$data}, $j, 1); |
|
|
6
|
|
|
|
|
14
|
|
|
170
|
6
|
|
|
|
|
10
|
$i--; |
|
171
|
6
|
|
|
|
|
16
|
next outer; |
|
172
|
|
|
|
|
|
|
} |
|
173
|
|
|
|
|
|
|
} |
|
174
|
|
|
|
|
|
|
} |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
1; |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
__END__ |