line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
## -*- Mode: CPerl -*- |
2
|
|
|
|
|
|
|
## |
3
|
|
|
|
|
|
|
## File: DiaColloDB::Profile::MultiDiff.pm |
4
|
|
|
|
|
|
|
## Author: Bryan Jurish <moocow@cpan.org> |
5
|
|
|
|
|
|
|
## Description: collocation db, co-frequency profile diffs, by date |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
package DiaColloDB::Profile::MultiDiff; |
9
|
1
|
|
|
1
|
|
8
|
use DiaColloDB::Profile::Multi; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
32
|
|
10
|
1
|
|
|
1
|
|
6
|
use DiaColloDB::Profile::Diff; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
51
|
|
11
|
1
|
|
|
1
|
|
7
|
use DiaColloDB::Utils qw(:html :list); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
50
|
|
12
|
1
|
|
|
1
|
|
315
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
1308
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
##============================================================================== |
15
|
|
|
|
|
|
|
## Globals & Constants |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
our @ISA = qw(DiaColloDB::Profile::Multi); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
##============================================================================== |
20
|
|
|
|
|
|
|
## Constructors etc. |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
## $mpd = CLASS_OR_OBJECT->new(%args) |
23
|
|
|
|
|
|
|
## $mpd = CLASS_OR_OBJECT->new($mp1,$mp2,%args) |
24
|
|
|
|
|
|
|
## + %args, object structure: |
25
|
|
|
|
|
|
|
## ( |
26
|
|
|
|
|
|
|
## profiles => \@profiles, ##-- ($profile, ...) : sub-diffs, with {label} key |
27
|
|
|
|
|
|
|
## titles => \@titles, ##-- item group titles (default:undef: unknown) |
28
|
|
|
|
|
|
|
## qinfo => \%qinfo, ##-- query info (optional; keys prefixed with 'a' or 'b'): see DiaColloDB::Profile::Multi |
29
|
|
|
|
|
|
|
## ) |
30
|
|
|
|
|
|
|
## + additional %args: |
31
|
|
|
|
|
|
|
## ( |
32
|
|
|
|
|
|
|
## populate => $bool, ##-- auto-populate() if $mp1 and $mp2 are specified? (default=1) |
33
|
|
|
|
|
|
|
## diff => $diffop, ##-- low-level diff operation (see DiaColloDB::Profile::Diff) |
34
|
|
|
|
|
|
|
## ) |
35
|
|
|
|
|
|
|
sub new { |
36
|
0
|
|
|
0
|
1
|
|
my $that = shift; |
37
|
0
|
0
|
|
|
|
|
my $mp1 = UNIVERSAL::isa(ref($_[0]),'DiaColloDB::Profile::Multi') ? shift : undef; |
38
|
0
|
0
|
|
|
|
|
my $mp2 = UNIVERSAL::isa(ref($_[0]),'DiaColloDB::Profile::Multi') ? shift : undef; |
39
|
0
|
|
|
|
|
|
my %opts = @_; |
40
|
0
|
|
0
|
|
|
|
my $populate = $opts{populate}//1; |
41
|
0
|
|
|
|
|
|
delete($opts{populate}); |
42
|
0
|
|
|
|
|
|
my $mpd = $that->SUPER::new(%opts); |
43
|
0
|
0
|
0
|
|
|
|
return $mpd->populate($mp1,$mp2) if ($populate && $mp1 && $mp2); |
|
|
|
0
|
|
|
|
|
44
|
0
|
|
|
|
|
|
return $mpd; |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
## $mp2 = $mp->clone() |
49
|
|
|
|
|
|
|
## $mp2 = $mp->clone($keep_compiled) |
50
|
|
|
|
|
|
|
## + clones %$mp |
51
|
|
|
|
|
|
|
## + if $keep_score is true, compiled data is cloned too |
52
|
|
|
|
|
|
|
## + INHERITED from DiaColloDB::Profile::Multi |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
##============================================================================== |
55
|
|
|
|
|
|
|
## I/O |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
58
|
|
|
|
|
|
|
## I/O: JSON |
59
|
|
|
|
|
|
|
## + mostly INHERITED from DiaCollocDB::Persistent |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
## $obj = $CLASS_OR_OBJECT->loadJsonData( $data,%opts) |
62
|
|
|
|
|
|
|
## + guts for loadJsonString(), loadJsonFile() |
63
|
|
|
|
|
|
|
sub loadJsonData { |
64
|
0
|
|
|
0
|
1
|
|
my $that = shift; |
65
|
0
|
|
|
|
|
|
my $mp = $that->DiaColloDB::Persistent::loadJsonData(@_); |
66
|
0
|
|
0
|
|
|
|
foreach (@{$mp->{profiles}//[]}) { |
|
0
|
|
|
|
|
|
|
67
|
0
|
|
|
|
|
|
bless($_,'DiaColloDB::Profile::Diff'); |
68
|
0
|
0
|
|
|
|
|
bless($_->{prf1}, 'DiaColloDB::Profile') if ($_->{prf1}); |
69
|
0
|
0
|
|
|
|
|
bless($_->{prf2}, 'DiaColloDB::Profile') if ($_->{prf2}); |
70
|
|
|
|
|
|
|
} |
71
|
0
|
|
|
|
|
|
return $mp; |
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
75
|
|
|
|
|
|
|
## I/O: Text |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
## undef = $CLASS_OR_OBJECT->saveTextHeader($fh, hlabel=>$hlabel, titles=>\@titles) |
78
|
|
|
|
|
|
|
sub saveTextHeader { |
79
|
0
|
|
|
0
|
1
|
|
my ($that,$fh,%opts) = @_; |
80
|
0
|
|
|
|
|
|
DiaColloDB::Profile::Diff::saveTextHeader($that,$fh,hlabel=>'label',@_); |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
## $bool = $obj->saveTextFile($filename_or_handle, %opts) |
84
|
|
|
|
|
|
|
## + wraps saveTextFh(); INHERITED from DiaCollocDB::Persistent |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
## $bool = $mp->saveTextFh($fh,%opts) |
87
|
|
|
|
|
|
|
## + save text representation to a filehandle (guts) |
88
|
|
|
|
|
|
|
## + INHERITED from DiaCollocDB::Profile::Multi |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
91
|
|
|
|
|
|
|
## I/O: HTML |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
## $bool = $mp->saveHtmlFile($filename_or_handle, %opts) |
94
|
|
|
|
|
|
|
## + %opts: |
95
|
|
|
|
|
|
|
## ( |
96
|
|
|
|
|
|
|
## table => $bool, ##-- include <table>..</table> ? (default=1) |
97
|
|
|
|
|
|
|
## body => $bool, ##-- include <html><body>..</html></body> ? (default=1) |
98
|
|
|
|
|
|
|
## verbose => $bool, ##-- include verbose output? (default=0) |
99
|
|
|
|
|
|
|
## qinfo => $varname, ##-- include <script> for qinfo data? (default='qinfo') |
100
|
|
|
|
|
|
|
## header => $bool, ##-- include header-row? (default=1) |
101
|
|
|
|
|
|
|
## format => $fmt, ##-- printf score formatting (default="%.4f") |
102
|
|
|
|
|
|
|
## ) |
103
|
|
|
|
|
|
|
sub saveHtmlFile { |
104
|
0
|
|
|
0
|
1
|
|
my ($mp,$file,%opts) = @_; |
105
|
0
|
0
|
|
|
|
|
my $fh = ref($file) ? $file : IO::File->new(">$file"); |
106
|
0
|
0
|
|
|
|
|
$mp->logconfess("saveHtmlFile(): failed to open '$file': $!") if (!ref($fh)); |
107
|
0
|
0
|
0
|
|
|
|
$fh->print("<html><body>\n") if ($opts{body}//1); |
108
|
|
|
|
|
|
|
$fh->print("<script type=\"text/javascript\">$opts{qinfo}=", DiaColloDB::Utils::saveJsonString($mp->{qinfo}, pretty=>0), ";</script>\n") |
109
|
0
|
0
|
0
|
|
|
|
if ($mp->{qinfo} && ($opts{qinfo} //= 'qinfo')); |
|
|
|
0
|
|
|
|
|
110
|
0
|
0
|
0
|
|
|
|
$fh->print("<table><tbody>\n") if ($opts{table}//1); |
111
|
|
|
|
|
|
|
$fh->print("<tr>",( |
112
|
0
|
|
|
|
|
|
map {"<th>".htmlesc($_)."</th>"} |
113
|
0
|
|
|
|
|
|
($opts{verbose} ? (map {("${_}a","${_}b")} qw(N f1 f2 f12)) : qw()), |
114
|
|
|
|
|
|
|
qw(ascore bscore diff label), |
115
|
0
|
|
0
|
|
|
|
@{$mp->{titles}//[qw(item2)]}, |
116
|
|
|
|
|
|
|
), |
117
|
|
|
|
|
|
|
"</tr>\n" |
118
|
0
|
0
|
0
|
|
|
|
) if ($opts{header}//1); |
|
|
0
|
|
|
|
|
|
119
|
0
|
|
|
|
|
|
my $ps = $mp->{profiles}; |
120
|
0
|
|
|
|
|
|
foreach (@$ps) { |
121
|
0
|
0
|
|
|
|
|
$_->saveHtmlFile($file, %opts,table=>0,body=>0,header=>0) |
122
|
|
|
|
|
|
|
or $mp->logconfess("saveHtmlFile() saved for sub-profile with label '", $_->label, "': $!"); |
123
|
|
|
|
|
|
|
} |
124
|
0
|
0
|
0
|
|
|
|
$fh->print("</tbody><table>\n") if ($opts{table}//1); |
125
|
0
|
0
|
0
|
|
|
|
$fh->print("</body></html>\n") if ($opts{body}//1); |
126
|
0
|
0
|
|
|
|
|
$fh->close() if (!ref($file)); |
127
|
0
|
|
|
|
|
|
return $mp; |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
##============================================================================== |
131
|
|
|
|
|
|
|
## Compilation |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
## @ppairs = $CLASS_OR_OBJECT->align($mp1,$mp2) |
134
|
|
|
|
|
|
|
## \@ppairs = $CLASS_OR_OBJECT->align($mp1,$mp2) |
135
|
|
|
|
|
|
|
## + aligns subprofile-pairs from $mp1 and $mp2 |
136
|
|
|
|
|
|
|
## + INHERITED from DiaColloDB::Profile::Multi |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
## $mpd = $mpd->populate($mp1,$mp2,%opts) |
139
|
|
|
|
|
|
|
## + populates multi-diff by subtracting $mp2 sub-profile scores from $mp1 |
140
|
|
|
|
|
|
|
## + uses $mpd->align() to align sub-profiles |
141
|
|
|
|
|
|
|
## + %opts: clobbers %$mpd |
142
|
|
|
|
|
|
|
sub populate { |
143
|
0
|
|
|
0
|
1
|
|
my ($mpd,$mpa,$mpb,%opts) = @_; |
144
|
0
|
|
|
|
|
|
@$mpd{keys %opts} = values %opts; |
145
|
0
|
|
|
|
|
|
@{$mpd->{profiles}} = map { |
146
|
|
|
|
|
|
|
DiaColloDB::Profile::Diff->new($_->[0],$_->[1], diff=>$mpd->{diff}) |
147
|
0
|
|
|
|
|
|
} @{$mpd->align($mpa,$mpb)}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
148
|
0
|
0
|
0
|
|
|
|
if ($mpa->{qinfo} || $mpb->{qinfo}) { |
149
|
|
|
|
|
|
|
$mpd->{qinfo} = { |
150
|
0
|
|
0
|
|
|
|
(map {("a$_"=>$mpa->{qinfo}{$_})} keys %{$mpa->{qinfo}//{}}), |
|
0
|
|
|
|
|
|
|
151
|
0
|
|
0
|
|
|
|
(map {("b$_"=>$mpb->{qinfo}{$_})} keys %{$mpb->{qinfo}//{}}), |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
}; |
153
|
|
|
|
|
|
|
} |
154
|
0
|
|
|
|
|
|
return $mpd; |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
## $mp_or_undef = $mp->compile($func,%opts) |
158
|
|
|
|
|
|
|
## + compile all sub-profiles for score-function $func; default='f' |
159
|
|
|
|
|
|
|
## + INHERITED from DiaColloDB::Profile::Multi |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
## $mp = $mp->uncompile() |
162
|
|
|
|
|
|
|
## + un-compiles all scores for $mp |
163
|
|
|
|
|
|
|
## + INHERITED from DiaColloDB::Profile::Multi |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
## $class = $CLASS_OR_OBJECT->pclass() |
166
|
|
|
|
|
|
|
## + class for psum() |
167
|
|
|
|
|
|
|
sub pclass { |
168
|
0
|
|
|
0
|
1
|
|
return 'DiaColloDB::Profile::Diff'; |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
## $prf = $mp->psum() |
172
|
|
|
|
|
|
|
## $prf = $CLASS_OR_OBJECT->psum(\@profiles) |
173
|
|
|
|
|
|
|
## + sum of sub-profiles, compiled as for $profiles[0] |
174
|
|
|
|
|
|
|
## + used for global trimming |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
## $mp_or_undef = $mp->trim(%opts) |
177
|
|
|
|
|
|
|
## + calls $prf->trim(%opts) for each sub-profile $prf |
178
|
|
|
|
|
|
|
## + INHERITED from DiaColloDB::Profile::Multi |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
## $mp_or_undef = $CLASS_OR_OBJECT->trimPairs(\@pairs, %opts) |
181
|
|
|
|
|
|
|
## + %opts: as for DiaColloDB::Profile::Multi::trim(), including 'global' and 'diff' options |
182
|
|
|
|
|
|
|
sub trimPairs { |
183
|
0
|
|
|
0
|
1
|
|
my ($that,$ppairs,%opts) = @_; |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
##-- defaults |
186
|
0
|
|
0
|
|
|
|
$opts{kbest} //= -1; |
187
|
0
|
|
0
|
|
|
|
$opts{cutoff} //= ''; |
188
|
0
|
|
0
|
|
|
|
$opts{global} //= 0; |
189
|
0
|
|
0
|
|
|
|
$opts{diff} //= 'adiff'; |
190
|
|
|
|
|
|
|
|
191
|
0
|
0
|
|
|
|
|
if ($opts{global}) { |
192
|
|
|
|
|
|
|
##-- (pre-)trim globally |
193
|
0
|
|
|
|
|
|
my $gpa = DiaColloDB::Profile::Multi->sumover(luniq([map {$_->[0]} @$ppairs]), eps=>$opts{eps}); |
|
0
|
|
|
|
|
|
|
194
|
0
|
|
|
|
|
|
my $gpb = DiaColloDB::Profile::Multi->sumover(luniq([map {$_->[1]} @$ppairs]), eps=>$opts{eps}); |
|
0
|
|
|
|
|
|
|
195
|
0
|
|
|
|
|
|
DiaColloDB::Profile::Diff->pretrim($gpa,$gpb,%opts); |
196
|
|
|
|
|
|
|
|
197
|
0
|
|
|
|
|
|
my $gdiff = DiaColloDB::Profile::Diff->new($gpa,$gpb, diff=>$opts{diff}); |
198
|
0
|
|
|
|
|
|
my %keep = map {($_=>undef)} @{$gdiff->which( DiaColloDB::Profile::Diff->diffkbest($opts{diff})=>$opts{kbest} )}; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
199
|
0
|
|
|
|
|
|
$_->trim(keep=>\%keep) foreach (grep {$_} map {@$_} @$ppairs); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
else { |
202
|
|
|
|
|
|
|
##-- (pre-)trim locally |
203
|
0
|
|
|
|
|
|
foreach (@$ppairs) { |
204
|
0
|
|
|
|
|
|
DiaColloDB::Profile::Diff->pretrim(@$_[0,1],%opts); |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
|
208
|
0
|
|
|
|
|
|
return $ppairs; |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
## $mp = $mp->stringify( $obj) |
212
|
|
|
|
|
|
|
## $mp = $mp->stringify(\@key2str) |
213
|
|
|
|
|
|
|
## $mp = $mp->stringify(\&key2str) |
214
|
|
|
|
|
|
|
## $mp = $mp->stringify(\%key2str) |
215
|
|
|
|
|
|
|
## + stringifies multi-profile (destructive) via $obj->i2s($key2), $key2str->($i2) or $key2str->{$i2} |
216
|
|
|
|
|
|
|
## + INHERITED from DiaColloDB::Profile::Multi |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
##============================================================================== |
219
|
|
|
|
|
|
|
## Binary operations |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
## $mp = $mp->_add($mp2,%opts) |
222
|
|
|
|
|
|
|
## + adds $mp2 frequency data to $mp (destructive) |
223
|
|
|
|
|
|
|
## + implicitly un-compiles sub-profiles |
224
|
|
|
|
|
|
|
## + %opts: passed to Profile::_add() |
225
|
|
|
|
|
|
|
## + INHERITED but probably useless |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
## $mp3 = $mp1->add($mp2,%opts) |
228
|
|
|
|
|
|
|
## + returns sum of $mp1 and $mp2 frequency data (destructive) |
229
|
|
|
|
|
|
|
## + %opts: passed to Profile::_add() |
230
|
|
|
|
|
|
|
## + INHERITED but probably useless |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
## $diff = $mp1->diff($mp2) |
233
|
|
|
|
|
|
|
## + returns score-diff of $mp1 and $mp2 frequency data (destructive) |
234
|
|
|
|
|
|
|
## + INHERITED but probably useless |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
##============================================================================== |
237
|
|
|
|
|
|
|
## Package DiaColloDB::Profile::Multi::Diff : alias |
238
|
|
|
|
|
|
|
package DiaColloDB::Profile::Multi::Diff; |
239
|
|
|
|
|
|
|
our @ISA = qw(DiaColloDB::Profile::MultiDiff); |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
##============================================================================== |
242
|
|
|
|
|
|
|
## Package DiaColloDB::Profile::Diff::Multi : alias |
243
|
|
|
|
|
|
|
package DiaColloDB::Profile::Diff::Multi; |
244
|
|
|
|
|
|
|
our @ISA = qw(DiaColloDB::Profile::MultiDiff); |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
##============================================================================== |
248
|
|
|
|
|
|
|
## Footer |
249
|
|
|
|
|
|
|
1; |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
__END__ |