line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
## -*- Mode: CPerl -*- |
2
|
|
|
|
|
|
|
## File: DiaColloDB::Client::list.pm |
3
|
|
|
|
|
|
|
## Author: Bryan Jurish <moocow@cpan.org> |
4
|
|
|
|
|
|
|
## Description: collocation db, client: list |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
package DiaColloDB::Client::list; |
7
|
|
|
|
|
|
|
|
8
|
2
|
|
|
2
|
|
890
|
use DiaColloDB::threads; |
|
1
|
|
|
|
|
14
|
|
|
1
|
|
|
|
|
10
|
|
9
|
|
|
|
|
|
|
|
10
|
1
|
|
|
1
|
|
135
|
use DiaColloDB::Client; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
35
|
|
11
|
1
|
|
|
1
|
|
5
|
use DiaColloDB::Utils qw(:list :math :si); |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
134
|
|
12
|
1
|
|
|
1
|
|
410
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
96
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
##-- OLD: try to use threads |
15
|
|
|
|
|
|
|
## + weird cpantesters errors for DiaColloDB v0.12.01[23], e.g. |
16
|
|
|
|
|
|
|
## - http://www.cpantesters.org/cpan/report/b8caf29a-4121-11ea-9d04-93d2cf6284ad |
17
|
|
|
|
|
|
|
## - http://www.cpantesters.org/cpan/report/acb1841c-41b5-11ea-81ed-d3b978f58c5e |
18
|
|
|
|
|
|
|
## + error: "Attempt to reload DiaColloDB.pm aborted." on perl v5.31.7 during make test |
19
|
|
|
|
|
|
|
## + perldiag says: |
20
|
|
|
|
|
|
|
## Attempt to reload %s aborted. |
21
|
|
|
|
|
|
|
## (F) You tried to load a file with "use" or "require" that failed to |
22
|
|
|
|
|
|
|
## compile once already. Perl will not try to compile this file again |
23
|
|
|
|
|
|
|
## unless you delete its entry from %INC. See "require" in perlfunc |
24
|
|
|
|
|
|
|
## and "%INC" in perlvar. |
25
|
|
|
|
|
|
|
## + DiaColloDB 0.12.013 - tried checking $INC{'threads.pm'} here -> no joy |
26
|
|
|
|
|
|
|
## + DiaColloDB 0.12.014 - always 'use threads' (added to PREREQ_PM), just set WANT_THREADS for debugging |
27
|
|
|
|
|
|
|
## + see also https://www.perlmonks.org/?node_id=1029344 for runtime workaround |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
our ($WANT_THREADS); |
30
|
|
|
|
|
|
|
BEGIN { |
31
|
1
|
50
|
|
1
|
|
8
|
$WANT_THREADS = ($^P |
32
|
|
|
|
|
|
|
? 0 ##-- disable threads if running under debugger |
33
|
|
|
|
|
|
|
: $DiaColloDB::threads::MODULE); |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
##-- avoid heinous death with JSON::XS backend using threads |
36
|
1
|
|
|
|
|
3457
|
$DDC::Client::JSON_BACKEND = 'JSON::PP'; |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
##============================================================================== |
41
|
|
|
|
|
|
|
## Globals & Constants |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
our @ISA = qw(DiaColloDB::Client); |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
##============================================================================== |
46
|
|
|
|
|
|
|
## Constructors etc. |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
## $cli = CLASS_OR_OBJECT->new(%args) |
49
|
|
|
|
|
|
|
## $cli = CLASS_OR_OBJECT->new(\@urls, %args) |
50
|
|
|
|
|
|
|
## + %args, object structure: |
51
|
|
|
|
|
|
|
## ( |
52
|
|
|
|
|
|
|
## ##-- DiaColloDB::Client: options |
53
|
|
|
|
|
|
|
## url => $url, ##-- list url (sub-urls separated by whitespace, "+SCHEME://" or "+://") |
54
|
|
|
|
|
|
|
## ## |
55
|
|
|
|
|
|
|
## ##-- DiaColloDB::Client::list |
56
|
|
|
|
|
|
|
## urls => \@urls, ##-- db urls |
57
|
|
|
|
|
|
|
## opts => \%opts, ##-- sub-client options (includes all list-client "log*" options and "sub.OPT" options) |
58
|
|
|
|
|
|
|
## fudge => $coef, ##-- get ($coef*$kbest) items from sub-clients (-1:all, 0|1:none, default=10) |
59
|
|
|
|
|
|
|
## fork => $bool, ##-- run each subclient query in its own fork? (default=if available) |
60
|
|
|
|
|
|
|
## lazy => $bool, ##-- use temporary on-demand sub-clients (true,default) or persistent sub-clients (false) |
61
|
|
|
|
|
|
|
## extend => $bool, ##-- use extend() queries to acquire correct f2 counts? (default=true) |
62
|
|
|
|
|
|
|
## logFudge => $level, ##-- log-level for fudge-factor debugging (default='debug') |
63
|
|
|
|
|
|
|
## logThread => $level, ##-- log-level for thread (fork) options (default='none') |
64
|
|
|
|
|
|
|
## ## |
65
|
|
|
|
|
|
|
## ##-- guts |
66
|
|
|
|
|
|
|
## #clis => \@clis, ##-- per-url clients for mode, v0.11.000 |
67
|
|
|
|
|
|
|
## ) |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
## %defaults = $CLASS_OR_OBJ->defaults() |
70
|
|
|
|
|
|
|
## + called by new() |
71
|
|
|
|
|
|
|
sub defaults { |
72
|
|
|
|
|
|
|
return ( |
73
|
|
|
|
|
|
|
#urls=>[], |
74
|
|
|
|
|
|
|
#clis=>[], |
75
|
0
|
|
|
0
|
1
|
|
opts=>{}, |
76
|
|
|
|
|
|
|
fudge=>10, |
77
|
|
|
|
|
|
|
logFudge => 'debug', |
78
|
|
|
|
|
|
|
logThread => 'none', |
79
|
|
|
|
|
|
|
fork => $WANT_THREADS, |
80
|
|
|
|
|
|
|
lazy => 1, |
81
|
|
|
|
|
|
|
extend => 1, |
82
|
|
|
|
|
|
|
); |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
##============================================================================== |
86
|
|
|
|
|
|
|
## I/O: open/close |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
## $cli_or_undef = $cli->open_list( \@urls, %opts) |
89
|
|
|
|
|
|
|
## $cli_or_undef = $cli->open_list($list_url, %opts) |
90
|
|
|
|
|
|
|
## $cli_or_undef = $cli->open_list() |
91
|
|
|
|
|
|
|
## + creates new client for each url, passing %opts to DiaColloDB::Client->new() |
92
|
|
|
|
|
|
|
## + component URLs beginning with '?' are treated as options to $cli itself |
93
|
|
|
|
|
|
|
sub open_list { |
94
|
0
|
|
|
0
|
1
|
|
my ($cli,$url) = (shift,shift); |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
##-- parse URLs |
97
|
0
|
|
0
|
|
|
|
$url //= $cli->{url}; |
98
|
0
|
|
|
|
|
|
my ($urls); |
99
|
0
|
0
|
|
|
|
|
if (UNIVERSAL::isa($url,'ARRAY')) { |
100
|
0
|
|
|
|
|
|
$urls = $url; |
101
|
0
|
|
|
|
|
|
$url = "list://".join(' ', @$urls); |
102
|
|
|
|
|
|
|
} else { |
103
|
0
|
|
|
|
|
|
($urls=$url) =~ s{^list://}{}; |
104
|
0
|
|
0
|
|
|
|
$urls = [map {s{^://}{}; $_} grep {($_//'') ne ''} split(m{\s+|\+(?=[a-zA-Z0-9\+\-\.]*://)},$urls)]; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
##-- parse list-client options (query-only URLs) |
108
|
0
|
|
|
|
|
|
my $curls = []; |
109
|
0
|
|
|
|
|
|
foreach (@$urls) { |
110
|
0
|
0
|
|
|
|
|
if (UNIVERSAL::isa($_,'HASH')) { |
|
|
0
|
|
|
|
|
|
111
|
|
|
|
|
|
|
##-- HASH-ref: clobber local options |
112
|
0
|
|
|
|
|
|
@$cli{keys %$_} = values %$_; |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
elsif (m{^(?:://)?\?}) { |
115
|
|
|
|
|
|
|
##-- query-string only: clobber local options |
116
|
0
|
|
|
|
|
|
my %form = URI->new($_)->query_form; |
117
|
0
|
|
|
|
|
|
@$cli{keys %form} = values %form; |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
else { |
120
|
|
|
|
|
|
|
##-- sub-URL |
121
|
0
|
|
|
|
|
|
push(@$curls,$_); |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
} |
124
|
0
|
|
|
|
|
|
@$cli{qw(url urls)} = ($url,$curls); |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
##-- sanity check(s) |
127
|
0
|
0
|
0
|
|
|
|
if ($cli->{fork} && !$WANT_THREADS) { |
128
|
0
|
|
|
|
|
|
$cli->warn("fork-mode requested, but 'threads' module unavailable"); |
129
|
0
|
|
|
|
|
|
$cli->{fork} = 0; |
130
|
|
|
|
|
|
|
} |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
##-- save sub-client options in $cli->{opts} |
133
|
0
|
0
|
|
|
|
|
if (@_) { |
134
|
0
|
|
|
|
|
|
my %opts = @_; |
135
|
0
|
|
|
|
|
|
$cli->{opts}{keys %opts} = values %opts; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
##-- pass sub-client options "log*"=VAL |
139
|
0
|
|
|
|
|
|
foreach my $key (grep {/^sub\./} keys %$cli) { |
|
0
|
|
|
|
|
|
|
140
|
0
|
|
|
|
|
|
my $subkey = $key; |
141
|
0
|
|
|
|
|
|
$subkey =~ s/^sub\.//; |
142
|
0
|
|
|
|
|
|
$cli->{opts}{$subkey} = $cli->{$key}; |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
##-- open sub-clients (non-lazy mode) |
146
|
0
|
0
|
|
|
|
|
$cli->{clis} = [map {$cli->client($_)} (0..$#$curls)] if (!$cli->{lazy}); |
|
0
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
|
148
|
0
|
|
|
|
|
|
return $cli; |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
## $cli_or_undef = $cli->close() |
152
|
|
|
|
|
|
|
## + default just returns $cli |
153
|
|
|
|
|
|
|
sub close { |
154
|
0
|
|
|
0
|
1
|
|
my $cli = shift; |
155
|
0
|
|
0
|
|
|
|
$_->close() foreach (grep {defined($_)} @{$cli->{clis}//[]}); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
156
|
0
|
|
|
|
|
|
delete $cli->{clis}; |
157
|
0
|
|
|
|
|
|
return $cli; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
## $bool = $cli->opened() |
161
|
|
|
|
|
|
|
## + override checks for non-empty $cli->{urls} |
162
|
|
|
|
|
|
|
## + ensures all sub-clients are opened in non-lazy mode |
163
|
|
|
|
|
|
|
sub opened { |
164
|
|
|
|
|
|
|
return (ref($_[0]) |
165
|
|
|
|
|
|
|
&& $_[0]{urls} |
166
|
|
|
|
|
|
|
&& @{$_[0]{urls}} |
167
|
|
|
|
|
|
|
&& ($_[0]{lazy} || ( |
168
|
|
|
|
|
|
|
$_[0]{clis} |
169
|
|
|
|
|
|
|
&& @{$_[0]{clis}}==@{$_[0]{urls}} |
170
|
0
|
|
0
|
0
|
1
|
|
&& !grep {!defined($_) || !$_->opened} @{$_[0]{clis}} |
171
|
|
|
|
|
|
|
)) |
172
|
|
|
|
|
|
|
); |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
## %opts = $cli->dbOptions() |
176
|
|
|
|
|
|
|
## + options to be passed down to bottom-level DB |
177
|
|
|
|
|
|
|
## + override includes $cli->{opts} |
178
|
|
|
|
|
|
|
sub dbOptions { |
179
|
0
|
|
|
0
|
0
|
|
my $cli = shift; |
180
|
0
|
0
|
0
|
|
|
|
return ($cli->SUPER::dbOptions, (ref($cli) && $cli->{opts} ? %{$cli->{opts}} : qw())); |
|
0
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
## $cli = $cli->client($i, %opts) |
184
|
|
|
|
|
|
|
## + open (temporary) sub-client #$i |
185
|
|
|
|
|
|
|
sub client { |
186
|
0
|
|
|
0
|
0
|
|
my ($cli,$i,%opts) = @_; |
187
|
0
|
0
|
0
|
|
|
|
return $cli->{clis}[$i] if (!$cli->{lazy} && $cli->{clis} && $cli->{clis}[$i]); ##-- non-lazy mode |
|
|
|
0
|
|
|
|
|
188
|
0
|
0
|
|
|
|
|
my $url = $cli->{urls}[$i] |
189
|
|
|
|
|
|
|
or $cli->logconfess("client(): no URL for client #$i"); |
190
|
0
|
0
|
|
|
|
|
my $sub = DiaColloDB::Client->new($url,$cli->dbOptions,%opts) |
191
|
|
|
|
|
|
|
or $cli->logconfess("client(): failed to create client for URL '$url': $!"); |
192
|
0
|
|
|
|
|
|
return $sub; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
##============================================================================== |
196
|
|
|
|
|
|
|
## I/O: Persistent API: header |
197
|
|
|
|
|
|
|
## + largely INHERITED from DiaColloDB::Persistent |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
## @keys = $coldb->headerKeys() |
200
|
|
|
|
|
|
|
## + keys to save as header |
201
|
|
|
|
|
|
|
sub headerKeys { |
202
|
0
|
|
0
|
0
|
1
|
|
return (qw(url urls), grep {!ref($_[0]{$_}) && $_ !~ m{^log}} keys %{$_[0]}); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
##============================================================================== |
208
|
|
|
|
|
|
|
## utils: threaded sub-client calls |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
## @results = $cli->subcall(\&CODE, @args) |
211
|
|
|
|
|
|
|
## \@results = $cli->subcall(\&CODE, @args) |
212
|
|
|
|
|
|
|
## + calls CODE($cli, $i, @args) in scalar context foreach $i (0..$#{$cli->{urls}}) |
213
|
|
|
|
|
|
|
## + CODE is expected to return anything other than undef |
214
|
|
|
|
|
|
|
sub subcall { |
215
|
0
|
|
|
0
|
0
|
|
my ($cli,$code,@args) = @_; |
216
|
0
|
|
|
|
|
|
my ($i,@results); |
217
|
0
|
0
|
0
|
|
|
|
if ($WANT_THREADS && $cli->{fork}) { |
218
|
|
|
|
|
|
|
##-- threaded call |
219
|
0
|
0
|
|
|
|
|
PDL::no_clone_skip_warning() if (UNIVERSAL::can('PDL','no_clone_skip_warning')); ##-- ithreads warning |
220
|
|
|
|
|
|
|
|
221
|
0
|
|
|
|
|
|
my (@thrs); |
222
|
0
|
|
|
|
|
|
for ($i=0; $i <= $#{$cli->{urls}}; ++$i) { |
|
0
|
|
|
|
|
|
|
223
|
0
|
|
|
|
|
|
$cli->vlog($cli->{logThread}, "subcall(): spawning thread for subclient[$i]"); |
224
|
0
|
|
|
|
|
|
push(@thrs, threads->create({context=>'scalar'}, $code, $cli, $i, @args)); |
225
|
|
|
|
|
|
|
} |
226
|
0
|
|
|
|
|
|
for ($i=0; $i <= $#{$cli->{urls}}; ++$i) { |
|
0
|
|
|
|
|
|
|
227
|
0
|
|
|
|
|
|
$cli->vlog($cli->{logThread}, "subcall(): joining thread for subclient[$i]"); |
228
|
0
|
|
|
|
|
|
my $rv = $thrs[$i]->join(); ##-- perl 'threads' module (ithreads) segfaults here at 2nd encounter (client #0:ok, client #1:segfault) |
229
|
0
|
0
|
|
|
|
|
$cli->logconfess("subcall(): error processing subclient[$i] ($cli->{urls}[$i])") if ($thrs[$i]->error); |
230
|
0
|
|
|
|
|
|
push(@results, $rv); |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
else { |
234
|
|
|
|
|
|
|
##-- non-threaded call |
235
|
0
|
|
|
|
|
|
$cli->vlog($cli->{logThread}, "subcall(): running in serial mode"); |
236
|
0
|
|
|
|
|
|
for ($i=0; $i <= $#{$cli->{urls}}; ++$i) { |
|
0
|
|
|
|
|
|
|
237
|
0
|
|
|
|
|
|
push(@results, scalar($code->($cli,$i,@args))); |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
} |
240
|
0
|
0
|
|
|
|
|
return wantarray ? @results : \@results; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
##============================================================================== |
244
|
|
|
|
|
|
|
## dbinfo |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
## \%info = $cli->dbinfo() |
247
|
|
|
|
|
|
|
## + returned info is {dtrs=>\@dtr_info, fudge=>$coef}, |
248
|
|
|
|
|
|
|
sub dbinfo { |
249
|
0
|
|
|
0
|
1
|
|
my $cli = shift; |
250
|
|
|
|
|
|
|
my @dtrs = $cli->subcall(sub { |
251
|
0
|
|
|
0
|
|
|
my $sub = $_[0]->client($_[1]); |
252
|
0
|
0
|
|
|
|
|
$sub->dbinfo() |
253
|
|
|
|
|
|
|
or $_[0]->logconfess("dbinfo() failed for client URL $sub->{url}: $sub->{error}"); |
254
|
0
|
|
|
|
|
|
}); |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
##-- collect & merge daughter info |
257
|
0
|
|
|
|
|
|
my $info = {dtrs=>\@dtrs, (map {($_=>$cli->{$_})} qw(fudge fork lazy)), urls=>join(' ',@{$cli->{urls}})}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
258
|
0
|
|
|
|
|
|
my %attrs = qw(); |
259
|
0
|
|
|
|
|
|
my %rels = qw(); |
260
|
0
|
|
|
|
|
|
my ($di,$d); |
261
|
0
|
|
|
|
|
|
foreach $di (0..$#dtrs) { |
262
|
0
|
|
|
|
|
|
$d = $dtrs[$di]; |
263
|
0
|
|
|
|
|
|
$d->{url} = $cli->{urls}[$di]; |
264
|
0
|
|
|
|
|
|
foreach (@{$d->{attrs}}) { |
|
0
|
|
|
|
|
|
|
265
|
0
|
|
|
|
|
|
$attrs{$_->{name}}[$di] = $_; |
266
|
|
|
|
|
|
|
} |
267
|
0
|
|
|
|
|
|
foreach (keys %{$d->{relations}}) { |
|
0
|
|
|
|
|
|
|
268
|
0
|
|
|
|
|
|
$rels{$_}[$di] = $d->{relations}{$_}; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
} |
271
|
0
|
0
|
|
|
|
|
$info->{timestamp} = (sort map {$_->{timestamp}||''} @dtrs)[$#dtrs]; |
|
0
|
|
|
|
|
|
|
272
|
0
|
|
|
|
|
|
$info->{xdmax} = lmax(map {$_->{xdmax}} @dtrs); |
|
0
|
|
|
|
|
|
|
273
|
0
|
|
|
|
|
|
$info->{xdmin} = lmin(map {$_->{xdmin}} @dtrs); |
|
0
|
|
|
|
|
|
|
274
|
0
|
|
|
|
|
|
$info->{du_b} = lsum(map {$_->{du_b}} @dtrs); |
|
0
|
|
|
|
|
|
|
275
|
0
|
|
|
|
|
|
$info->{du_h} = si_str($info->{du_b}); |
276
|
0
|
|
|
|
|
|
$info->{version} = $DiaColloDB::VERSION; |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
##-- extract common attributes |
279
|
0
|
|
|
|
|
|
my ($aname,$avals,$a,$counts); |
280
|
0
|
|
|
|
|
|
foreach $aname (keys %attrs) { |
281
|
0
|
|
|
|
|
|
$avals = $attrs{$aname}; |
282
|
0
|
0
|
|
|
|
|
next if ((grep {defined $_} @$avals) != @dtrs); |
|
0
|
|
|
|
|
|
|
283
|
0
|
|
|
|
|
|
$a = { name=>$aname, title=>$avals->[0]{title} }; |
284
|
0
|
|
|
|
|
|
$a->{size} = join('+', map {$_->{size}} @$avals); |
|
0
|
|
|
|
|
|
|
285
|
0
|
|
0
|
|
|
|
$a->{alias} = [sort grep {$counts->{$_} >= @dtrs} keys %{$counts = lcounts([map {@{$_->{alias}//[]}} @$avals])}]; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
286
|
0
|
|
|
|
|
|
push(@{$info->{attrs}}, $a); |
|
0
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
##-- extract common relations |
290
|
0
|
|
|
|
|
|
my ($rname,$rvals,$r); |
291
|
0
|
|
|
|
|
|
foreach $rname (keys %rels) { |
292
|
0
|
|
|
|
|
|
$rvals = $rels{$rname}; |
293
|
0
|
0
|
|
|
|
|
next if ((grep {defined $_} @$rvals) != @dtrs); |
|
0
|
|
|
|
|
|
|
294
|
0
|
|
|
|
|
|
$r = { }; |
295
|
0
|
|
|
|
|
|
$r->{class} = join(' ', @{luniq([map {$_->{class}} @$rvals])}); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
296
|
0
|
|
|
|
|
|
$r->{du_b} = lsum(map {$_->{du_b}} @$rvals); |
|
0
|
|
|
|
|
|
|
297
|
0
|
|
|
|
|
|
$r->{du_h} = si_str($r->{du_b}); |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
|
300
|
0
|
|
0
|
|
|
|
$r->{attrs} = [sort grep {$counts->{$_} >= @dtrs} keys %{$counts = lcounts([map {@{$_->{attrs}//[]}} @$rvals])}] |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
301
|
0
|
0
|
|
|
|
|
if (grep {$_->{attrs}} @$rvals); |
|
0
|
|
|
|
|
|
|
302
|
0
|
|
0
|
|
|
|
$r->{meta} = [sort grep {$counts->{$_} >= @dtrs} keys %{$counts = lcounts([map {@{$_->{meta}//[]}} @$rvals])}] |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
303
|
0
|
0
|
|
|
|
|
if (grep {$_->{meta}} @$rvals); |
|
0
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
|
305
|
0
|
|
|
|
|
|
$info->{relations}{$rname} = $r; |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
|
308
|
0
|
|
|
|
|
|
return $info; |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
##============================================================================== |
313
|
|
|
|
|
|
|
## Profiling |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
316
|
|
|
|
|
|
|
## Profiling: Generic |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
## $mprf = $cli->profile($relation, %opts) |
319
|
|
|
|
|
|
|
## + get a relation profile for selected items as a DiaColloDB::Profile::Multi object |
320
|
|
|
|
|
|
|
## + %opts: as for DiaColloDB::profile() |
321
|
|
|
|
|
|
|
## + sets $cli->{error} on error |
322
|
|
|
|
|
|
|
sub profile { |
323
|
0
|
|
|
0
|
1
|
|
my ($cli,$rel,%opts) = @_; |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
##-- kludge: ddc metaserver dispatch |
326
|
|
|
|
|
|
|
## + BUG 2020-03-13a: incorrect f2 values (too low) from %xkeys-like situations for metacorpora |
327
|
|
|
|
|
|
|
## - f2 values are queried with COUNT(KEYS(...)), so f2 gets overlooked for physical subcorpora whenever f12=0 but f2>0 |
328
|
|
|
|
|
|
|
## - "proper" workaround would be iterative f2-acquisition in Relation::DDC (beware of ddc query size limit = 4kB) |
329
|
|
|
|
|
|
|
## * maybe via dynamic "groupby" clause generation? |
330
|
|
|
|
|
|
|
## * maybe by passing literal groupby-tuples to DDC (e.g. COUNT( $(l,p)={[Haus,NN],[laufen,VVFIN],...} ) ? |
331
|
|
|
|
|
|
|
## * maybe by post-filtering DDC counts? |
332
|
|
|
|
|
|
|
## - "hacky" workaround might use lexdb (if present ... another infrastructure variable to worry about) |
333
|
|
|
|
|
|
|
## + BUG 2020-03-13b: disabling this to force default %xkeys strategy doesn't help |
334
|
|
|
|
|
|
|
## - b/c "ddcServer" option isn't set for list-client daughters --> no DDC relation for daughters |
335
|
|
|
|
|
|
|
## - even if we tweaked *that* in, we'd still have (f12=0,f2>0) cases in physical subcorpora, which would get mis-counted |
336
|
|
|
|
|
|
|
## - best overall workaround is probably to ditch KEYS() and do full iterative f2-acquisition in Relation::DDC, |
337
|
|
|
|
|
|
|
## then re-implement DDC::extend() as iterative profile() |
338
|
0
|
0
|
0
|
|
|
|
return $cli->ddcMeta('profile',$rel,%opts) if ($rel eq 'ddc' && $cli->{ddcServer}); |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
##-- defaults |
341
|
0
|
|
|
|
|
|
DiaColloDB->profileOptions(\%opts); |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
##-- fudge coefficient |
344
|
|
|
|
|
|
|
## + disabled for ddc relation always stringifies: fetch full f12 sub-results in 1st pass (b/c DDC::extend() only updates f2) |
345
|
0
|
0
|
0
|
|
|
|
my $fudge = ($rel eq 'ddc' ? -1 : $cli->{fudge}) // 0; |
346
|
0
|
|
0
|
|
|
|
my $kbest = $opts{kbest} // 0; |
347
|
0
|
0
|
|
|
|
|
my $kfudge = ($fudge < 0 ? -1 |
|
|
0
|
|
|
|
|
|
348
|
|
|
|
|
|
|
: ($fudge == 0 ? $kbest |
349
|
|
|
|
|
|
|
: ($fudge * $kbest))); |
350
|
0
|
|
|
|
|
|
$cli->vlog($cli->{logFudge}, "profile(): querying ", scalar(@{$cli->{urls}}), " client URL(s) with (fudge=$fudge) * (kbest=$kbest) = $kfudge"); |
|
0
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
##-- query clients |
353
|
|
|
|
|
|
|
my @mps = $cli->subcall(sub { |
354
|
0
|
|
|
0
|
|
|
my $sub = $_[0]->client($_[1]); |
355
|
0
|
0
|
|
|
|
|
$sub->profile($rel,%opts,strings=>1,kbest=>$kfudge,cutoff=>'',fill=>1) |
356
|
|
|
|
|
|
|
or $_[0]->logconfess("profile() failed for client URL $sub->{url}: $sub->{error}"); |
357
|
0
|
|
|
|
|
|
}); |
358
|
|
|
|
|
|
|
|
359
|
0
|
0
|
0
|
|
|
|
if ($cli->{extend} && @mps > 1) { |
360
|
0
|
|
|
|
|
|
$cli->vlog($cli->{logFudge}, "profile(): extending sub-profiles"); |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
##-- extend: delayed fudge-coefficient for DDC profiles |
363
|
0
|
0
|
0
|
|
|
|
if ($rel eq 'ddc' && ($cli->{fudge}//0) > 0) { |
|
|
|
0
|
|
|
|
|
364
|
0
|
|
|
|
|
|
$cli->vlog($cli->{logFudge}, "profile(): fudging DDC sub-profiles"); |
365
|
0
|
|
0
|
|
|
|
$fudge = $cli->{fudge}//0; |
366
|
0
|
0
|
|
|
|
|
$kfudge = ($fudge == 0 ? $kbest : ($fudge * $kbest)); |
367
|
0
|
|
|
|
|
|
foreach my $mp (@mps) { |
368
|
0
|
|
|
|
|
|
$mp->compile($opts{score}, eps=>$opts{eps})->trim(global=>$opts{global}, drop=>[''], kbest=>$kfudge, cutoff=>$opts{cutoff}, empty=>0); |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
} |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
##-- extend: fill-out multi-profiles (ensure compatible slice-partitioning & find "missing" keys) |
373
|
0
|
|
|
|
|
|
DiaColloDB::Profile::Multi->xfill(\@mps); |
374
|
0
|
|
|
|
|
|
my $xkeys = DiaColloDB::Profile::Multi->xkeys(\@mps); |
375
|
|
|
|
|
|
|
#$cli->trace("extend(): xkeys=", DiaColloDB::Utils::saveJsonString($xkeys, utf8=>0)); |
376
|
|
|
|
|
|
|
#$cli->trace("extend(): N.pre=", join('+',map {$_->{profiles}[0]{N}} @mps)); |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
##-- extend multi-profiles with "missing" keys |
379
|
|
|
|
|
|
|
my @mpx = $cli->subcall(sub { |
380
|
|
|
|
|
|
|
#return undef if (!$xkeys->[$_[1]] || !grep {@$_} values(%{$xkeys->[$_[1]]})); ##-- don't need extend here |
381
|
0
|
|
|
0
|
|
|
my $sub = $_[0]->client($_[1]); |
382
|
0
|
0
|
|
|
|
|
$sub->extend($rel,%opts,strings=>1,score=>'f',cutoff=>'',fill=>1,slice2keys=>JSON::to_json($xkeys->[$_[1]], {allow_nonref=>1})) |
383
|
|
|
|
|
|
|
or $_[0]->logconfess("extend() failed for client url $sub->{url}: $sub->{error}"); |
384
|
0
|
|
|
|
|
|
}); |
385
|
|
|
|
|
|
|
|
386
|
0
|
|
|
|
|
|
foreach (0..$#mpx) { |
387
|
0
|
0
|
|
|
|
|
$mps[$_]->_add($mpx[$_], N=>0,f1=>0) if (defined($mpx[$_])); |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
} |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
##-- create final profile |
392
|
0
|
0
|
|
|
|
|
my $mp = shift(@mps) or return undef; |
393
|
0
|
|
|
|
|
|
$mp->_add($_) foreach (@mps); |
394
|
|
|
|
|
|
|
$cli->vlog($cli->{logFudge}, "profile(): collected fudged profile of size ", $mp->size) |
395
|
0
|
0
|
0
|
|
|
|
if (($cli->{logFudge}//'off') !~ /^(?:off|none)$/); |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
##-- re-compile and -trim |
398
|
0
|
|
|
|
|
|
$mp->compile($opts{score}, eps=>$opts{eps})->trim(global=>$opts{global}, drop=>[''], kbest=>$kbest, cutoff=>$opts{cutoff}, empty=>!$opts{fill}); |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
$cli->vlog($cli->{logFudge}, "profile(): trimmed final profile to size ", $mp->size) |
401
|
0
|
0
|
0
|
|
|
|
if (($cli->{logFudge}//'off') !~ /^(?:off|none)$/); |
402
|
|
|
|
|
|
|
|
403
|
0
|
|
|
|
|
|
return $mp; |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
407
|
|
|
|
|
|
|
## Profiling: extend (pass-2 for multi-clients) |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
## $mprf = $cli->extend($relation, %opts) |
410
|
|
|
|
|
|
|
## + get an extension-profile for selected items as a DiaColloDB::Profile::Multi object |
411
|
|
|
|
|
|
|
## + %opts: as for DiaColloDB::extend() |
412
|
|
|
|
|
|
|
## + sets $cli->{error} on error |
413
|
|
|
|
|
|
|
sub extend { |
414
|
0
|
|
|
0
|
1
|
|
my ($cli,$rel,%opts) = @_; |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
##-- kludge: ddc metaserver dispatch |
417
|
0
|
0
|
0
|
|
|
|
return $cli->ddcMeta('extend',$rel,%opts) if ($rel eq 'ddc' && $cli->{ddcServer}); |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
##-- defaults |
420
|
0
|
|
|
|
|
|
DiaColloDB->profileOptions(\%opts); |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
##-- query clients |
423
|
|
|
|
|
|
|
my @mps = $cli->subcall(sub { |
424
|
0
|
|
|
0
|
|
|
my $sub = $_[0]->client($_[1]); |
425
|
0
|
0
|
|
|
|
|
$sub->extend($rel,%opts,strings=>1) |
426
|
|
|
|
|
|
|
or $_[0]->logconfess("extend() failed for client URL $sub->{url}: $sub->{error}"); |
427
|
0
|
|
|
|
|
|
}); |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
##-- create final profile |
430
|
0
|
0
|
|
|
|
|
my $mp = shift(@mps) or return undef; |
431
|
0
|
|
|
|
|
|
$mp->_add($_) foreach (@mps); |
432
|
|
|
|
|
|
|
|
433
|
0
|
|
|
|
|
|
return $mp; |
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
437
|
|
|
|
|
|
|
## Profiling: Comparison (diff) |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
## $mprf = $cli->compare($relation, %opts) |
440
|
|
|
|
|
|
|
## + get a relation comparison profile for selected items as a DiaColloDB::Profile::MultiDiff object |
441
|
|
|
|
|
|
|
## + adpated from generic DiaColloDB::Relation::profile() |
442
|
|
|
|
|
|
|
## + %opts: as for DiaColloDB::compare() |
443
|
|
|
|
|
|
|
## + sets $cli->{error} on error |
444
|
|
|
|
|
|
|
sub compare { |
445
|
0
|
|
|
0
|
1
|
|
my ($cli,$rel,%opts) = @_; |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
##-- kludge: ddc metaserver dispatch |
448
|
0
|
0
|
0
|
|
|
|
return $cli->ddcMeta('compare',$rel,%opts) if ($rel eq 'ddc' && $cli->{ddcServer}); |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
##-- defaults |
451
|
0
|
|
|
|
|
|
DiaColloDB->compareOptions(\%opts); |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
##-- common variables |
454
|
0
|
0
|
0
|
|
|
|
my %aopts = map {exists($opts{"a$_"}) ? ($_=>$opts{"a$_"}) : qw()} (qw(query date slice), @{$opts{_abkeys}//[]}); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
455
|
0
|
0
|
0
|
|
|
|
my %bopts = map {exists($opts{"b$_"}) ? ($_=>$opts{"b$_"}) : qw()} (qw(query date slice), @{$opts{_abkeys}//[]}); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
456
|
0
|
|
|
|
|
|
my %popts = (kbest=>-1,cutoff=>'',global=>0,strings=>0,fill=>1); |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
##-- get profiles to compare |
459
|
0
|
0
|
|
|
|
|
my $mpa = $cli->profile($rel,%opts, %aopts,%popts) or return undef; |
460
|
0
|
0
|
|
|
|
|
my $mpb = $cli->profile($rel,%opts, %bopts,%popts) or return undef; |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
##-- alignment and trimming |
463
|
0
|
|
|
|
|
|
my $ppairs = DiaColloDB::Profile::MultiDiff->align($mpa,$mpb); |
464
|
0
|
|
|
|
|
|
DiaColloDB::Profile::MultiDiff->trimPairs($ppairs, %opts); |
465
|
0
|
|
|
|
|
|
my $diff = DiaColloDB::Profile::MultiDiff->new($mpa,$mpb, titles=>$mpa->{titles}, diff=>$opts{diff}); |
466
|
0
|
0
|
|
|
|
|
$diff->trim( DiaColloDB::Profile::Diff->diffkbest($opts{diff})=>$opts{kbest} ) if (!$opts{global}); |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
##-- return |
469
|
0
|
|
|
|
|
|
return $diff; |
470
|
|
|
|
|
|
|
} |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
473
|
|
|
|
|
|
|
## Profiling: DDC (via metaserver in $list->{ddcServer}) |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
## $rc = $cli->ddcMeta($method_name, @args) |
476
|
|
|
|
|
|
|
## + calls $COLDB->can($method_name)->($COLDB,@args) on temporary ddc metaserver object |
477
|
|
|
|
|
|
|
sub ddcMeta { |
478
|
0
|
|
|
0
|
0
|
|
my $cli = shift; |
479
|
0
|
0
|
|
|
|
|
return undef if (!$cli->{ddcServer}); |
480
|
0
|
|
|
|
|
|
$cli->vlog('trace', "ddcMeta(): dispatching to $cli->{ddcServer}"); |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
##-- create temporary dummy DiaColloDB object |
483
|
|
|
|
|
|
|
## + force sort attributes, otherwise we get different default attribute orders for different clients |
484
|
0
|
|
|
|
|
|
my $dbinfo = $cli->dbinfo(); |
485
|
|
|
|
|
|
|
my $coldb = DiaColloDB->new(ddcServer=>$cli->{ddcServer}, |
486
|
0
|
0
|
|
|
|
|
attrs=>[sort map {$_->{name}} @{$dbinfo->{attrs}}], |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
) |
488
|
|
|
|
|
|
|
or $cli->logconfess("ddcMeta(): failed to create DiaColloDB wrapper object"); |
489
|
0
|
|
|
|
|
|
$coldb->{ddc} = DiaColloDB::Relation::DDC->create($coldb); |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
##-- dispatch |
492
|
0
|
|
|
|
|
|
my $method = shift; |
493
|
0
|
0
|
|
|
|
|
my $coderef = $coldb->can($method) |
494
|
|
|
|
|
|
|
or $cli->logconfess("ddcMeta(): failed to resolve method name '$method'"); |
495
|
0
|
|
|
|
|
|
return $coderef->($coldb,@_); |
496
|
|
|
|
|
|
|
} |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
##============================================================================== |
500
|
|
|
|
|
|
|
## Footer |
501
|
|
|
|
|
|
|
1; |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
__END__ |