| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
## -*- Mode: CPerl -*- |
|
2
|
|
|
|
|
|
|
## File: DiaColloDB::Relation::Unigrams.pm |
|
3
|
|
|
|
|
|
|
## Author: Bryan Jurish <moocow@cpan.org> |
|
4
|
|
|
|
|
|
|
## Description: collocation db, profiling relation: unigram database (using DiaColloDB::PackedFile) |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
package DiaColloDB::Relation::Unigrams; |
|
7
|
1
|
|
|
1
|
|
7
|
use DiaColloDB::Relation; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
27
|
|
|
8
|
1
|
|
|
1
|
|
4
|
use DiaColloDB::PackedFile; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
22
|
|
|
9
|
1
|
|
|
1
|
|
4
|
use DiaColloDB::Utils qw(:fcntl :sort :env :run :pack :file :jobs); |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
43
|
|
|
10
|
1
|
|
|
1
|
|
343
|
use Fcntl qw(:DEFAULT :seek); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
27
|
|
|
11
|
1
|
|
|
1
|
|
420
|
use File::Basename qw(dirname); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
23
|
|
|
12
|
1
|
|
|
1
|
|
104
|
use version; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
6
|
|
|
13
|
1
|
|
|
1
|
|
61
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
2768
|
|
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
##============================================================================== |
|
16
|
|
|
|
|
|
|
## Globals & Constants |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
our @ISA = qw(DiaColloDB::Relation); |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
## $PFCLASS : object class for nested PackedFile objects |
|
21
|
|
|
|
|
|
|
our $PFCLASS = 'DiaColloDB::PackedFile::MMap'; |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
##============================================================================== |
|
24
|
|
|
|
|
|
|
## Constructors etc. |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
## $ug = CLASS_OR_OBJECT->new(%args) |
|
27
|
|
|
|
|
|
|
## + %args, object structure: |
|
28
|
|
|
|
|
|
|
## ( |
|
29
|
|
|
|
|
|
|
## ##-- user options |
|
30
|
|
|
|
|
|
|
## base => $basename, ##-- file basename (default=undef:none); use files "${base}.dba1", "${base}.dba2", "${base}.hdr" |
|
31
|
|
|
|
|
|
|
## flags => $flags, ##-- fcntl flags or open-mode (default='r') |
|
32
|
|
|
|
|
|
|
## perms => $perms, ##-- creation permissions (default=(0666 &~umask)) |
|
33
|
|
|
|
|
|
|
## pack_i => $pack_i, ##-- pack-template for IDs (default='N') |
|
34
|
|
|
|
|
|
|
## pack_f => $pack_f, ##-- pack-template for frequencies (default='N') |
|
35
|
|
|
|
|
|
|
## pack_d => $pack_d, ##-- pack-tempalte for dates (default='n') |
|
36
|
|
|
|
|
|
|
## keeptmp => $bool, ##-- keep temporary files? (default=false) |
|
37
|
|
|
|
|
|
|
## mmap => $bool, ##-- use mmap access? (default=true) |
|
38
|
|
|
|
|
|
|
## logCompat => $level, ##-- log-level for compatibility warnings (default='warn') |
|
39
|
|
|
|
|
|
|
## ## |
|
40
|
|
|
|
|
|
|
## ##-- size info (after open() or load()) |
|
41
|
|
|
|
|
|
|
## size1 => $size1, ##-- == $r1->size() |
|
42
|
|
|
|
|
|
|
## size2 => $size2, ##-- == $r2->size() |
|
43
|
|
|
|
|
|
|
## sizeN => $sizeN, ##-- == $rN->size() |
|
44
|
|
|
|
|
|
|
## ## |
|
45
|
|
|
|
|
|
|
## ##-- low-level data |
|
46
|
|
|
|
|
|
|
## r1 => $r1, ##-- pf: [$end2] @ $i1 : constant (logical index) |
|
47
|
|
|
|
|
|
|
## r2 => $r2, ##-- pf: [$d1,$f1]* @ end2($i1-1)..(end2($i1+1)-1) : sorted by $d1 for each $i1 |
|
48
|
|
|
|
|
|
|
## rN => $rN, ##-- pf: [$fN] @ $date - $ymin : totals by date |
|
49
|
|
|
|
|
|
|
## ymin => $dmin, ##-- constant == $coldb->{xdmin} |
|
50
|
|
|
|
|
|
|
## N => $N, ##-- sum($f12) [always used for version <= 0.11; used here only for slice==0] |
|
51
|
|
|
|
|
|
|
## version => $version, ##-- file version, for compatibility checks |
|
52
|
|
|
|
|
|
|
## ) |
|
53
|
|
|
|
|
|
|
sub new { |
|
54
|
0
|
|
|
0
|
1
|
|
my $that = shift; |
|
55
|
0
|
|
0
|
|
|
|
my $ug = bless({ |
|
56
|
|
|
|
|
|
|
base =>undef, |
|
57
|
|
|
|
|
|
|
flags =>'r', |
|
58
|
|
|
|
|
|
|
perms =>(0666 & ~umask), |
|
59
|
|
|
|
|
|
|
pack_i=>'N', |
|
60
|
|
|
|
|
|
|
pack_f=>'N', |
|
61
|
|
|
|
|
|
|
pack_d=>'n', |
|
62
|
|
|
|
|
|
|
N => 0, |
|
63
|
|
|
|
|
|
|
version => $DiaColloDB::VERSION, |
|
64
|
|
|
|
|
|
|
logCompat => 'warn', |
|
65
|
|
|
|
|
|
|
#keeptmp => 0, |
|
66
|
|
|
|
|
|
|
#mmap => 1, |
|
67
|
|
|
|
|
|
|
@_ |
|
68
|
|
|
|
|
|
|
}, (ref($that)||$that)); |
|
69
|
0
|
|
0
|
|
|
|
$ug->{$_} //= $ug->mmclass($PFCLASS)->new() foreach (qw(r1 r2 rN)); |
|
70
|
0
|
|
|
|
|
|
$ug->{class} = ref($ug); |
|
71
|
0
|
0
|
|
|
|
|
return $ug->open() if (defined($ug->{base})); |
|
72
|
0
|
|
|
|
|
|
return $ug; |
|
73
|
|
|
|
|
|
|
} |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
sub DESTROY { |
|
76
|
0
|
0
|
|
0
|
|
|
$_[0]->close() if ($_[0]->opened); |
|
77
|
|
|
|
|
|
|
} |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
##============================================================================== |
|
80
|
|
|
|
|
|
|
## Persistent API: disk usage |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
## @files = $obj->diskFiles() |
|
83
|
|
|
|
|
|
|
## + returns disk storage files, used by du() and timestamp() |
|
84
|
|
|
|
|
|
|
sub diskFiles { |
|
85
|
0
|
|
|
0
|
1
|
|
return map {"$_[0]{base}$_"} (qw(.hdr .dba1 .dba1.hdr .dba2 .dba2.hdr)); |
|
|
0
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
} |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
##============================================================================== |
|
89
|
|
|
|
|
|
|
## I/O |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
|
92
|
|
|
|
|
|
|
## I/O: open/close |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
## $ug_or_undef = $ug->open($base,$flags) |
|
95
|
|
|
|
|
|
|
## $ug_or_undef = $ug->open($base) |
|
96
|
|
|
|
|
|
|
## $ug_or_undef = $ug->open() |
|
97
|
|
|
|
|
|
|
sub open { |
|
98
|
0
|
|
|
0
|
1
|
|
my ($ug,$base,$flags) = @_; |
|
99
|
0
|
|
0
|
|
|
|
$base //= $ug->{base}; |
|
100
|
0
|
|
0
|
|
|
|
$flags //= $ug->{flags}; |
|
101
|
0
|
0
|
|
|
|
|
$ug->close() if ($ug->opened); |
|
102
|
0
|
|
|
|
|
|
$ug->{base} = $base; |
|
103
|
0
|
|
|
|
|
|
$ug->{flags} = $flags = fcflags($flags); |
|
104
|
0
|
|
|
|
|
|
my ($hdr); ##-- save header, for version-checking |
|
105
|
0
|
0
|
0
|
|
|
|
if (fcread($flags) && !fctrunc($flags)) { |
|
106
|
0
|
0
|
0
|
|
|
|
$hdr = ($ug->readHeader() || $ug->readHeader("$ug->{base}.dba.hdr")) |
|
107
|
|
|
|
|
|
|
or $ug->logconfess("failed to read header data from '$ug->{base}.hdr': $!"); |
|
108
|
0
|
0
|
|
|
|
|
$ug->loadHeaderData($hdr) |
|
109
|
|
|
|
|
|
|
or $ug->logconess("failed to load header data from '$ug->{base}.hdr': $!"); |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
##-- check compatibility |
|
113
|
0
|
|
|
|
|
|
my $min_version = qv(0.12.000); |
|
114
|
0
|
0
|
0
|
|
|
|
if ($hdr && (!defined($hdr->{version}) || version->parse($hdr->{version}) < $min_version)) { |
|
|
|
|
0
|
|
|
|
|
|
115
|
0
|
|
|
|
|
|
$ug->vlog($ug->{logCompat}, "using v0.11 compatibility mode for $ug->{base}.*; consider running \`dcdb-upgrade.perl ", dirname($ug->{base}), "\'"); |
|
116
|
0
|
|
|
|
|
|
DiaColloDB::Compat->usecompat('v0_11'); |
|
117
|
0
|
|
|
|
|
|
bless($ug, 'DiaColloDB::Compat::v0_11::Relation::Unigrams'); |
|
118
|
0
|
|
|
|
|
|
$ug->{version} = $hdr->{version}; |
|
119
|
0
|
|
|
|
|
|
return $ug->open($base,$flags); |
|
120
|
|
|
|
|
|
|
} |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
##-- open low-level data structures |
|
123
|
0
|
0
|
|
|
|
|
$ug->{r1}->open("$base.dba1", $flags, perms=>$ug->{perms}, packas=>"$ug->{pack_i}") |
|
124
|
|
|
|
|
|
|
or $ug->logconfess("open failed for $base.dba1: $!"); |
|
125
|
0
|
0
|
|
|
|
|
$ug->{r2}->open("$base.dba2", $flags, perms=>$ug->{perms}, packas=>"$ug->{pack_d}$ug->{pack_f}") |
|
126
|
|
|
|
|
|
|
or $ug->logconfess("open failed for $base.dba2: $!"); |
|
127
|
0
|
0
|
|
|
|
|
$ug->{rN}->open("$base.dbaN", $flags, perms=>$ug->{perms}, packas=>"$ug->{pack_f}") |
|
128
|
|
|
|
|
|
|
or $ug->logconfess("open failed for $base.dbaN: $!"); |
|
129
|
0
|
|
|
|
|
|
$ug->{size1} = $ug->{r1}->size; |
|
130
|
0
|
|
|
|
|
|
$ug->{size2} = $ug->{r2}->size; |
|
131
|
0
|
|
|
|
|
|
$ug->{sizeN} = $ug->{rN}->size; |
|
132
|
|
|
|
|
|
|
|
|
133
|
0
|
|
|
|
|
|
return $ug; |
|
134
|
|
|
|
|
|
|
} |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
## $ug_or_undef = $ug->close() |
|
137
|
|
|
|
|
|
|
sub close { |
|
138
|
0
|
|
|
0
|
1
|
|
my $ug = shift; |
|
139
|
0
|
0
|
0
|
|
|
|
if ($ug->opened && fcwrite($ug->{flags})) { |
|
140
|
0
|
0
|
|
|
|
|
$ug->saveHeader() or return undef; |
|
141
|
|
|
|
|
|
|
} |
|
142
|
0
|
0
|
|
|
|
|
$ug->{r1}->close() or return undef; |
|
143
|
0
|
0
|
|
|
|
|
$ug->{r2}->close() or return undef; |
|
144
|
0
|
0
|
|
|
|
|
$ug->{rN}->close() or return undef; |
|
145
|
0
|
|
|
|
|
|
undef $ug->{base}; |
|
146
|
0
|
|
|
|
|
|
return $ug; |
|
147
|
|
|
|
|
|
|
} |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
## $bool = $ug->opened() |
|
150
|
|
|
|
|
|
|
sub opened { |
|
151
|
0
|
|
|
0
|
1
|
|
my $ug = shift; |
|
152
|
|
|
|
|
|
|
return |
|
153
|
|
|
|
|
|
|
(defined($ug->{base}) |
|
154
|
|
|
|
|
|
|
&& defined($ug->{r1}) && $ug->{r1}->opened |
|
155
|
|
|
|
|
|
|
&& defined($ug->{r2}) && $ug->{r2}->opened |
|
156
|
|
|
|
|
|
|
&& defined($ug->{rN}) && $ug->{rN}->opened |
|
157
|
0
|
|
0
|
|
|
|
); |
|
158
|
|
|
|
|
|
|
} |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
|
161
|
|
|
|
|
|
|
## I/O: header |
|
162
|
|
|
|
|
|
|
## + largely INHERITED from DiaColloDB::Persistent |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
## @keys = $ug->headerKeys() |
|
165
|
|
|
|
|
|
|
## + keys to save as header |
|
166
|
|
|
|
|
|
|
sub headerKeys { |
|
167
|
0
|
|
0
|
0
|
1
|
|
return grep {!ref($_[0]{$_}) && $_ !~ m{^(?:base|flags|perms|log.*|packas|reclen)$}} keys %{$_[0]}; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
} |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
## $bool = $ug->loadHeaderData($hdr) |
|
171
|
|
|
|
|
|
|
## + instantiates header data from $hdr |
|
172
|
|
|
|
|
|
|
## + overrides DiaColloDB::Persistent implementation |
|
173
|
|
|
|
|
|
|
sub loadHeaderData { |
|
174
|
0
|
|
|
0
|
1
|
|
my ($ug,$hdr) = @_; |
|
175
|
0
|
0
|
0
|
|
|
|
if (!defined($hdr) && !fccreat($ug->{flags})) { |
|
|
|
0
|
|
|
|
|
|
|
176
|
0
|
|
|
|
|
|
$ug->logconfess("loadHeaderData() failed to load header data from ", $ug->headerFile, ": $!"); |
|
177
|
|
|
|
|
|
|
} |
|
178
|
|
|
|
|
|
|
elsif (defined($hdr)) { |
|
179
|
0
|
|
|
|
|
|
return $ug->SUPER::loadHeaderData($hdr); |
|
180
|
|
|
|
|
|
|
} |
|
181
|
0
|
|
|
|
|
|
return $ug; |
|
182
|
|
|
|
|
|
|
} |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
## $bool = $enum->saveHeader() |
|
185
|
|
|
|
|
|
|
## + inherited from DiaColloDB::Persistent |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
|
188
|
|
|
|
|
|
|
## I/O: text |
|
189
|
|
|
|
|
|
|
## + largely INHERITED from DiaColloDB::Persistent |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
## $bool = $obj->loadTextFile($filename_or_handle, %opts) |
|
192
|
|
|
|
|
|
|
## + wraps loadTextFh() |
|
193
|
|
|
|
|
|
|
## + INHERITED from DiaColloDB::Persistent |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
## $ug = $ug->loadTextFh($fh,%opts) |
|
196
|
|
|
|
|
|
|
## + loads from text file as saved by saveTextFh() |
|
197
|
|
|
|
|
|
|
## + input fh must be sorted by $i1,$d1 |
|
198
|
|
|
|
|
|
|
## + supports multiple lines for pairs ($i1,$d1) provided the above conditions hold |
|
199
|
|
|
|
|
|
|
## + supports loading of $ug->{N} from single-component lines |
|
200
|
|
|
|
|
|
|
## + %opts: clobber %$ug |
|
201
|
|
|
|
|
|
|
sub loadTextFh { |
|
202
|
0
|
|
|
0
|
1
|
|
my ($ug,$infh,%opts) = @_; |
|
203
|
0
|
0
|
|
|
|
|
if (!ref($ug)) { |
|
204
|
0
|
|
|
|
|
|
$ug = $ug->new(%opts); |
|
205
|
|
|
|
|
|
|
} else { |
|
206
|
0
|
|
|
|
|
|
@$ug{keys %opts} = values %opts; |
|
207
|
|
|
|
|
|
|
} |
|
208
|
0
|
0
|
|
|
|
|
$ug->logconfess("loadTextFh(): cannot load unopened database!") if (!$ug->opened); |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
##-- common variables |
|
211
|
|
|
|
|
|
|
## $r1 : [$end2] @ $i1 |
|
212
|
|
|
|
|
|
|
## $r2 : [$d1,$f1]* @ end2($i1-1)..(end2($i1+1)-1) |
|
213
|
0
|
|
|
|
|
|
my ($r1,$r2,$rN) = @$ug{qw(r1 r2 rN)}; |
|
214
|
0
|
|
|
|
|
|
my ($pack_r1,$pack_r2) = map {$_->{packas}} ($r1,$r2); |
|
|
0
|
|
|
|
|
|
|
|
215
|
0
|
|
|
|
|
|
$r1->truncate(); |
|
216
|
0
|
|
|
|
|
|
$r2->truncate(); |
|
217
|
0
|
|
|
|
|
|
$rN->truncate(); |
|
218
|
0
|
|
|
|
|
|
my ($fh1,$fh2) = ($r1->{fh},$r2->{fh}); |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
##-- iteration variables |
|
221
|
0
|
|
|
|
|
|
my ($pos1,$pos2) = (0,0); |
|
222
|
0
|
|
|
|
|
|
my ($i1_cur,$f1) = (-1,undef,0); |
|
223
|
0
|
|
|
|
|
|
my ($i1,$d1); |
|
224
|
0
|
|
|
|
|
|
my $N = 0; ##-- total marginal frequency as extracted from %fd |
|
225
|
0
|
|
|
|
|
|
my $N1 = 0; ##-- total N as extracted from single-element records |
|
226
|
0
|
|
|
|
|
|
my %fd = qw(); ##-- ($d=>$f1d, ...) for $i1_cur |
|
227
|
0
|
|
|
|
|
|
my %fN = qw(); ##-- ($d=>$fd, ...) global |
|
228
|
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
##-- guts for inserting records from $i1_cur,%fd,$pos1,$pos2 : call on changed ($i1_cur) |
|
230
|
|
|
|
|
|
|
my $insert = sub { |
|
231
|
0
|
0
|
|
0
|
|
|
if ($i1_cur >= 0) { |
|
232
|
0
|
0
|
|
|
|
|
if ($i1_cur != $pos1) { |
|
233
|
|
|
|
|
|
|
##-- we've skipped one or more $i1 because it had no data-lines |
|
234
|
0
|
|
|
|
|
|
$fh1->print( pack($pack_r1,$pos2) x ($i1_cur-$pos1) ); |
|
235
|
0
|
|
|
|
|
|
$pos1 = $i1_cur; |
|
236
|
|
|
|
|
|
|
} |
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
##-- dump r2-record(s) for ($i1_cur) |
|
239
|
0
|
|
|
|
|
|
foreach (sort {$a<=>$b} keys %fd) { |
|
|
0
|
|
|
|
|
|
|
|
240
|
0
|
|
|
|
|
|
$fh2->print(pack($pack_r2, $_,$fd{$_})); |
|
241
|
0
|
|
|
|
|
|
++$pos2; |
|
242
|
|
|
|
|
|
|
} |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
##-- dump r1-record for $i1_cur |
|
245
|
0
|
|
|
|
|
|
$fh1->print(pack($pack_r1, $pos2)); |
|
246
|
0
|
|
|
|
|
|
$pos1 = $i1_cur+1; |
|
247
|
|
|
|
|
|
|
} |
|
248
|
0
|
|
|
|
|
|
$i1_cur = $i1; |
|
249
|
0
|
|
|
|
|
|
%fd = qw(); |
|
250
|
0
|
|
|
|
|
|
}; |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
##-- ye olde loope |
|
253
|
0
|
|
|
|
|
|
binmode($infh,':raw'); |
|
254
|
0
|
|
|
|
|
|
while (defined($_=<$infh>)) { |
|
255
|
0
|
|
|
|
|
|
chomp; |
|
256
|
0
|
|
|
|
|
|
($f1,$i1,$d1) = split(' ',$_,3); |
|
257
|
0
|
0
|
|
|
|
|
if (!defined($i1)) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
258
|
0
|
|
|
|
|
|
$N1 += $f1; ##-- load N values |
|
259
|
0
|
|
|
|
|
|
next; |
|
260
|
|
|
|
|
|
|
} |
|
261
|
|
|
|
|
|
|
elsif ($i1 eq '') { |
|
262
|
0
|
|
|
|
|
|
next; ##-- ignore EOS counts from create() |
|
263
|
|
|
|
|
|
|
} |
|
264
|
|
|
|
|
|
|
elsif (!defined($d1)) { |
|
265
|
0
|
|
|
|
|
|
$ug->logconfess("loadTextFh(): failed to parse input line ", $infh->input_line_number); |
|
266
|
|
|
|
|
|
|
} |
|
267
|
0
|
0
|
|
|
|
|
$insert->() ##-- insert record(s) for ($i1_cur) |
|
268
|
|
|
|
|
|
|
if ($i1 != $i1_cur); |
|
269
|
0
|
|
|
|
|
|
$fd{$d1} += $f1; ##-- buffer frequencies for ($i1_cur,$d1_cur) |
|
270
|
0
|
|
|
|
|
|
$fN{$d1} += $f1; ##-- track N by date |
|
271
|
0
|
|
|
|
|
|
$N += $f1; ##-- track marginal N |
|
272
|
|
|
|
|
|
|
} |
|
273
|
0
|
|
|
|
|
|
$i1 = -1; |
|
274
|
0
|
|
|
|
|
|
$insert->(); ##-- write record(s) for final ($i1_cur) |
|
275
|
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
##-- create $rN by date |
|
277
|
0
|
|
|
|
|
|
my @dates = sort {$a<=>$b} keys %fN; |
|
|
0
|
|
|
|
|
|
|
|
278
|
0
|
|
|
|
|
|
my $ymin = $ug->{ymin} = $dates[0]; |
|
279
|
0
|
|
0
|
|
|
|
$rN->{fh}->print(pack("($rN->{packas})*", map {$fN{$_}//0} ($ymin..$dates[$#dates]))); |
|
|
0
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
##-- adopt final $N and sizes |
|
282
|
0
|
0
|
|
|
|
|
$ug->{N} = $N1>$N ? $N1 : $N; |
|
283
|
0
|
|
|
|
|
|
foreach (qw(1 2 N)) { |
|
284
|
0
|
|
|
|
|
|
my $r = $ug->{"r$_"}; |
|
285
|
0
|
|
|
|
|
|
$r->flush(); |
|
286
|
0
|
|
|
|
|
|
$ug->{"size$_"} = $r->size; |
|
287
|
|
|
|
|
|
|
} |
|
288
|
|
|
|
|
|
|
|
|
289
|
0
|
|
|
|
|
|
return $ug; |
|
290
|
|
|
|
|
|
|
} |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
## $bool = $obj->saveTextFile($filename_or_handle, %opts) |
|
293
|
|
|
|
|
|
|
## + wraps saveTextFh() |
|
294
|
|
|
|
|
|
|
## + INHERITED from DiaColloDB::Persistent |
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
## $bool = $ug->saveTextFh($fh,%opts) |
|
297
|
|
|
|
|
|
|
## + save from text file with lines of the form: |
|
298
|
|
|
|
|
|
|
## N ##-- 1 field : N |
|
299
|
|
|
|
|
|
|
## FREQ ID1 DATE ##-- 3 fields: unigram frequency for (ID1,DATE) |
|
300
|
|
|
|
|
|
|
## + %opts: |
|
301
|
|
|
|
|
|
|
## i2s => \&CODE, ##-- code-ref for formatting indices; called as $s=CODE($i) |
|
302
|
|
|
|
|
|
|
sub saveTextFh { |
|
303
|
0
|
|
|
0
|
1
|
|
my ($ug,$outfh,%opts) = @_; |
|
304
|
0
|
0
|
|
|
|
|
$ug->logconfess("saveTextFile(): cannot save unopened DB") if (!$ug->opened); |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
##-- common variables |
|
307
|
|
|
|
|
|
|
## $r1 : [$end2] @ $i1 |
|
308
|
|
|
|
|
|
|
## $r2 : [$d1,$f1]* @ end2($i1-1)..(end2($i1+1)-1) |
|
309
|
0
|
|
|
|
|
|
my ($r1,$r2) = @$ug{qw(r1 r2)}; |
|
310
|
0
|
|
|
|
|
|
my ($pack1,$pack2) = map {$_->{packas}} ($r1,$r2); |
|
|
0
|
|
|
|
|
|
|
|
311
|
0
|
|
|
|
|
|
my $i2s = $opts{i2s}; |
|
312
|
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
##-- iteration variables |
|
314
|
0
|
|
|
|
|
|
my ($buf1,$i1,$s1,$end2); |
|
315
|
0
|
|
|
|
|
|
my ($buf2,$off2,$d1,$f1); |
|
316
|
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
##-- ye olde loope |
|
318
|
0
|
|
|
|
|
|
binmode($outfh,':raw'); |
|
319
|
0
|
|
|
|
|
|
$outfh->print($ug->{N}, "\n"); |
|
320
|
0
|
|
|
|
|
|
for ($r1->seek($i1=0), $r2->seek($off2=0); !$r1->eof(); ++$i1) { |
|
321
|
0
|
0
|
|
|
|
|
$r1->read(\$buf1) or $ug->logconfess("saveTextFile(): failed to read record $i1 from $r1->{file}: $!"); |
|
322
|
0
|
|
|
|
|
|
$end2 = unpack($pack1,$buf1); |
|
323
|
0
|
0
|
|
|
|
|
$s1 = $i2s ? $i2s->($i1) : $i1; |
|
324
|
|
|
|
|
|
|
|
|
325
|
0
|
|
0
|
|
|
|
for ( ; $off2 < $end2 && !$r2->eof(); ++$off2) { |
|
326
|
0
|
0
|
|
|
|
|
$r2->read(\$buf2) or $ug->logconfess("saveTextFile(): failed to read record $off2 from $r2->{file}: $!"); |
|
327
|
0
|
|
|
|
|
|
($d1,$f1) = unpack($pack2,$buf2); |
|
328
|
|
|
|
|
|
|
|
|
329
|
0
|
|
|
|
|
|
$outfh->print(join("\t", $f1, $s1, $d1), "\n"); |
|
330
|
|
|
|
|
|
|
} |
|
331
|
|
|
|
|
|
|
} |
|
332
|
|
|
|
|
|
|
|
|
333
|
0
|
|
|
|
|
|
return $ug; |
|
334
|
|
|
|
|
|
|
} |
|
335
|
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
##============================================================================== |
|
338
|
|
|
|
|
|
|
## Relation API: create |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
## $ug = $CLASS_OR_OBJECT->create($coldb,$tokdat_file,%opts) |
|
341
|
|
|
|
|
|
|
## + populates current database from $tokdat_file, |
|
342
|
|
|
|
|
|
|
## a tt-style text file containing with lines of the form: |
|
343
|
|
|
|
|
|
|
## TID DATE ##-- single token |
|
344
|
|
|
|
|
|
|
## "\n" ##-- blank line --> EOS |
|
345
|
|
|
|
|
|
|
## + %opts: clobber %$ug |
|
346
|
|
|
|
|
|
|
sub create { |
|
347
|
0
|
|
|
0
|
1
|
|
my ($ug,$coldb,$datfile,%opts) = @_; |
|
348
|
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
##-- create/clobber |
|
350
|
0
|
0
|
|
|
|
|
$ug = $ug->new() if (!ref($ug)); |
|
351
|
0
|
|
|
|
|
|
@$ug{keys %opts} = values %opts; |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
##-- ensure openend |
|
354
|
0
|
0
|
0
|
|
|
|
$ug->opened |
|
355
|
|
|
|
|
|
|
or $ug->open() |
|
356
|
|
|
|
|
|
|
or $ug->logconfess("create(): failed to open unigrams database: $!"); |
|
357
|
|
|
|
|
|
|
|
|
358
|
0
|
|
|
|
|
|
env_push(LC_ALL=>'C'); |
|
359
|
0
|
0
|
|
|
|
|
my $cmdfh = opencmd("sort -nk1 -nk2 ".sortJobs()." $datfile | uniq -c |") |
|
360
|
|
|
|
|
|
|
or $ug->logconfess("create(): failed to open pipe from sort: $!"); |
|
361
|
0
|
0
|
|
|
|
|
$ug->loadTextFh($cmdfh) |
|
362
|
|
|
|
|
|
|
or $ug->logconfess("create(): failed to load unigram data: $!"); |
|
363
|
0
|
0
|
|
|
|
|
$cmdfh->close() |
|
364
|
|
|
|
|
|
|
or $ug->logconfess("create(): failed to close pipe from sort: $!"); |
|
365
|
0
|
|
|
|
|
|
env_pop(); |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
##-- save header |
|
368
|
0
|
0
|
|
|
|
|
$ug->saveHeader() |
|
369
|
|
|
|
|
|
|
or $ug->logconfess("create(): failed to save header: $!"); |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
##-- done |
|
372
|
0
|
|
|
|
|
|
return $ug; |
|
373
|
|
|
|
|
|
|
} |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
##============================================================================== |
|
376
|
|
|
|
|
|
|
## Relation API: union |
|
377
|
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
## $ug = CLASS_OR_OBJECT->union($coldb, \@pairs, %opts) |
|
379
|
|
|
|
|
|
|
## + merge multiple co-frequency indices into new object |
|
380
|
|
|
|
|
|
|
## + @pairs : array of pairs ([$ug,\@ti2u],...) |
|
381
|
|
|
|
|
|
|
## of unigram-objects $ug and tuple-id maps \@ti2u for $ug |
|
382
|
|
|
|
|
|
|
## - \@ti2u may also be a mapping object supporting a toArray() method |
|
383
|
|
|
|
|
|
|
## + %opts: clobber %$ug |
|
384
|
|
|
|
|
|
|
## + implicitly flushes the new index |
|
385
|
|
|
|
|
|
|
sub union { |
|
386
|
0
|
|
|
0
|
1
|
|
my ($ug,$coldb,$pairs,%opts) = @_; |
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
##-- create/clobber |
|
389
|
0
|
0
|
|
|
|
|
$ug = $ug->new() if (!ref($ug)); |
|
390
|
0
|
|
|
|
|
|
@$ug{keys %opts} = values %opts; |
|
391
|
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
##-- tempfile (input for sort) |
|
393
|
0
|
|
|
|
|
|
my $tmpfile = "$ug->{base}.udat"; |
|
394
|
0
|
0
|
|
|
|
|
my $tmpfh = IO::File->new(">$tmpfile") |
|
395
|
|
|
|
|
|
|
or $ug->logconfess("union(): open failed for tempfile $tmpfile: $!"); |
|
396
|
0
|
|
|
|
|
|
binmode($tmpfh,':raw'); |
|
397
|
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
##-- stage1: dump argument relations to text tempfile |
|
399
|
0
|
|
|
|
|
|
$ug->vlog('trace', "union(): stage1: collect items"); |
|
400
|
0
|
|
|
|
|
|
my ($pair,$pxf,$pi2u,$pi2s); |
|
401
|
0
|
|
|
|
|
|
my $pairi =0; |
|
402
|
0
|
|
|
|
|
|
foreach $pair (@$pairs) { |
|
403
|
0
|
|
|
|
|
|
($pxf,$pi2u) = @$pair; |
|
404
|
0
|
0
|
|
|
|
|
$pi2u = $pi2u->toArray() if (UNIVERSAL::can($pi2u,'toArray')); |
|
405
|
0
|
|
|
0
|
|
|
$pxf->saveTextFh($tmpfh, i2s=>sub { $pi2u->[$_[0]] }) |
|
406
|
0
|
0
|
|
|
|
|
or $ug->logconfess("union(): failed to extract data for argument $pairi"); |
|
407
|
0
|
|
|
|
|
|
++$pairi; |
|
408
|
|
|
|
|
|
|
} |
|
409
|
|
|
|
|
|
|
$tmpfh->close() |
|
410
|
0
|
0
|
|
|
|
|
or $ug->logconfess("union(): failed to close tempfile $tmpfile: $!"); |
|
411
|
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
##-- stage2: sort & load tempfile |
|
413
|
0
|
|
|
|
|
|
env_push(LC_ALL=>'C'); |
|
414
|
0
|
|
|
|
|
|
$ug->vlog('trace', "union(): stage2: load unigram frequencies"); |
|
415
|
0
|
0
|
|
|
|
|
my $sortfh = opencmd("sort -n -k2 -k3 ".sortJobs()." $tmpfile |") |
|
416
|
|
|
|
|
|
|
or $ug->logconfess("union(): open failed for pipe from sort: $!"); |
|
417
|
0
|
|
|
|
|
|
binmode($sortfh,':raw'); |
|
418
|
0
|
0
|
|
|
|
|
$ug->loadTextFh($sortfh) |
|
419
|
|
|
|
|
|
|
or $ug->logconfess("union(): failed to load unigram frequencies from $tmpfile: $!"); |
|
420
|
0
|
0
|
|
|
|
|
$sortfh->close() |
|
421
|
|
|
|
|
|
|
or $ug->logconfess("union(): failed to close pipe from sort: $!"); |
|
422
|
0
|
|
|
|
|
|
env_pop(); |
|
423
|
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
##-- stage3: header |
|
425
|
0
|
0
|
|
|
|
|
$ug->saveHeader() |
|
426
|
|
|
|
|
|
|
or $ug->logconfess("union(): failed to save header: $!"); |
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
##-- cleanup: unlink temp file(s) |
|
429
|
0
|
0
|
|
|
|
|
CORE::unlink($tmpfile) if (!$ug->{keeptmp}); |
|
430
|
|
|
|
|
|
|
|
|
431
|
0
|
|
|
|
|
|
return $ug; |
|
432
|
|
|
|
|
|
|
} |
|
433
|
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
##============================================================================== |
|
435
|
|
|
|
|
|
|
## Relation API: dbinfo |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
## \%info = $rel->dbinfo($coldb) |
|
438
|
|
|
|
|
|
|
## + embedded info-hash for $coldb->dbinfo() |
|
439
|
|
|
|
|
|
|
sub dbinfo { |
|
440
|
0
|
|
|
0
|
0
|
|
my $ug = shift; |
|
441
|
0
|
|
|
|
|
|
my $info = $ug->SUPER::dbinfo(); |
|
442
|
0
|
|
|
|
|
|
@$info{qw(size1 size2 sizeN N)} = @$ug{qw(size1 size2 sizeN N)}; |
|
443
|
0
|
|
|
|
|
|
return $info; |
|
444
|
|
|
|
|
|
|
} |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
##============================================================================== |
|
448
|
|
|
|
|
|
|
## Utils: lookup |
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
## $N = $cof->sliceN($sliceBy, $dateLo) |
|
451
|
|
|
|
|
|
|
## + get total slice co-occurrence count, used by subprofile1() |
|
452
|
|
|
|
|
|
|
## + INHERITED from DiaColloDB::Relation |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
##============================================================================== |
|
455
|
|
|
|
|
|
|
## Relation API: default |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
|
458
|
|
|
|
|
|
|
## Relation API: default: sliceN |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
## $N = $rel->sliceN($sliceBy, $dateLo) |
|
461
|
|
|
|
|
|
|
## + get total slice-wise co-occurrence count for a slice of size $sliceBy starting at $dateLo |
|
462
|
|
|
|
|
|
|
## + INHERITED from DiaColloDB::Relation |
|
463
|
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
|
465
|
|
|
|
|
|
|
## Relation API: default: profile |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
## \%slice2prf = $rel->subprofile1(\@tids,\%opts) |
|
468
|
|
|
|
|
|
|
## + get slice-wise joint co-frequency profile(s) for @tids (db must be opened; f1 and f12 only) |
|
469
|
|
|
|
|
|
|
## + %opts: as for profile(), also: |
|
470
|
|
|
|
|
|
|
## coldb => $coldb, ##-- parent DiaColloDB object (for shared data, debugging) |
|
471
|
|
|
|
|
|
|
## dreq => \%dreq, ##-- parsed date request |
|
472
|
|
|
|
|
|
|
sub subprofile1 { |
|
473
|
0
|
|
|
0
|
1
|
|
my ($ug,$tids,$opts) = @_; |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
##-- common variables |
|
476
|
0
|
0
|
|
|
|
|
$tids = [$tids] if (!UNIVERSAL::isa($tids,'ARRAY')); |
|
477
|
0
|
|
|
|
|
|
my $coldb = $opts->{coldb}; |
|
478
|
0
|
|
|
|
|
|
my $slice = $opts->{slice}; |
|
479
|
0
|
|
|
|
|
|
my $dreq = $opts->{dreq}; |
|
480
|
0
|
|
|
|
|
|
my $dfilter = $dreq->{dfilter}; |
|
481
|
0
|
|
|
|
|
|
my $groupby = $opts->{groupby}{ti2g}; |
|
482
|
0
|
|
|
|
|
|
my $extend = $opts->{extend}; |
|
483
|
0
|
|
|
|
|
|
my $onepass = $opts->{onepass}; |
|
484
|
0
|
|
|
|
|
|
my $pack_id = $coldb->{pack_id}; |
|
485
|
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
##-- vars: relation-wise |
|
487
|
|
|
|
|
|
|
## $r1 : [$end2] @ $i1 |
|
488
|
|
|
|
|
|
|
## $r2 : [$d1,$f1]* @ end2($i1-1)..(end2($i1+1)-1) |
|
489
|
0
|
|
|
|
|
|
my ($r1,$r2) = @$ug{qw(r1 r2)}; |
|
490
|
0
|
|
|
|
|
|
my ($pack1,$pack2) = map {$_->{packas}} ($r1,$r2); |
|
|
0
|
|
|
|
|
|
|
|
491
|
0
|
|
|
|
|
|
my $pack2d = $ug->{pack_d}; |
|
492
|
0
|
|
|
|
|
|
my $pack2f = '@'.packsize("$ug->{pack_i}").$ug->{pack_f}; |
|
493
|
0
|
|
0
|
|
|
|
my $size1 = $ug->{size1} // ($ug->{size1}=$r1->size); |
|
494
|
0
|
|
0
|
|
|
|
my $size2 = $ug->{size2} // ($ug->{size2}=$r2->size); |
|
495
|
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
##-- setup %slice2prf |
|
497
|
|
|
|
|
|
|
my %slice2prf = map { |
|
498
|
0
|
|
|
|
|
|
($_ => DiaColloDB::Profile->new(f1=>0, N=>$ug->sliceN($slice,$_))) |
|
499
|
0
|
0
|
|
|
|
|
} ($slice ? (map {$_*$slice} (($dreq->{slo}/$slice)..($dreq->{shi}/$slice))) : 0); |
|
|
0
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
##-- ye olde loope |
|
503
|
0
|
|
|
|
|
|
my ($i1,$beg2,$end2, $pos2,$d1,$ds,$dprf,$f1, $key2,$buf); |
|
504
|
0
|
|
|
|
|
|
foreach $i1 (@$tids) { |
|
505
|
0
|
0
|
|
|
|
|
next if ($i1 >= $size1); |
|
506
|
0
|
0
|
|
|
|
|
$beg2 = ($i1==0 ? 0 : unpack($pack1,$r1->fetchraw($i1-1,\$buf))); |
|
507
|
0
|
|
|
|
|
|
$end2 = unpack($pack1, $r1->fetchraw($i1,\$buf)); |
|
508
|
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
##-- check groupby "having" filter |
|
510
|
0
|
0
|
|
|
|
|
$key2 = $groupby ? $groupby->($i1) : pack($pack_id,$i1); |
|
511
|
|
|
|
|
|
|
|
|
512
|
0
|
0
|
|
|
|
|
next if ($beg2 >= $size2); |
|
513
|
0
|
|
|
|
|
|
for ($pos2=$beg2; $pos2 < $end2; ++$pos2) { |
|
514
|
0
|
|
|
|
|
|
($d1,$f1) = unpack($pack2, $r2->fetchraw($pos2,\$buf)); |
|
515
|
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
##-- check date-filter & get slice-local profile $dprf |
|
517
|
0
|
0
|
0
|
|
|
|
next if ($dfilter && !$dfilter->($d1)); |
|
518
|
0
|
0
|
|
|
|
|
$ds = $slice ? int($d1/$slice)*$slice : 0; |
|
519
|
0
|
|
|
|
|
|
$dprf = $slice2prf{$ds}; |
|
520
|
0
|
|
|
|
|
|
$dprf->{f1} += $f1; |
|
521
|
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
next if (!defined($key2) ##-- item2 selection via groupby CODE-ref |
|
523
|
0
|
0
|
0
|
|
|
|
|| ($extend && !exists($extend->{$ds}{$key2})) ##-- ... or via 'extend' parameter |
|
|
|
|
0
|
|
|
|
|
|
524
|
|
|
|
|
|
|
); |
|
525
|
0
|
|
|
|
|
|
$dprf->{f12}{$key2} += $f1; |
|
526
|
0
|
|
|
|
|
|
$dprf->{f2}{$key2} += $f1; |
|
527
|
|
|
|
|
|
|
} |
|
528
|
|
|
|
|
|
|
} |
|
529
|
|
|
|
|
|
|
|
|
530
|
0
|
|
|
|
|
|
return \%slice2prf; |
|
531
|
|
|
|
|
|
|
} |
|
532
|
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
|
534
|
|
|
|
|
|
|
## Relation API: default: subprofile2 |
|
535
|
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
## \%slice2prf = $rel->subprofile2(\%slice2prf, \%opts) |
|
537
|
|
|
|
|
|
|
## + populate f2 frequencies for profiles in \%slice2prf |
|
538
|
|
|
|
|
|
|
## + %opts: as for subprofile1() |
|
539
|
|
|
|
|
|
|
## + INHERITED from DiaColloDB::Relation : no-op |
|
540
|
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
|
542
|
|
|
|
|
|
|
## Relation API: default: qinfo |
|
543
|
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
## \%qinfo = $rel->qinfo($coldb, %opts) |
|
545
|
|
|
|
|
|
|
## + get query-info hash for profile administrivia (ddc hit links) |
|
546
|
|
|
|
|
|
|
## + %opts: as for profile(), additionally: |
|
547
|
|
|
|
|
|
|
## ( |
|
548
|
|
|
|
|
|
|
## qreqs => \@qreqs, ##-- as returned by $coldb->parseRequest($opts{query}) |
|
549
|
|
|
|
|
|
|
## gbreq => \%groupby, ##-- as returned by $coldb->groupby($opts{groupby}) |
|
550
|
|
|
|
|
|
|
## ) |
|
551
|
|
|
|
|
|
|
sub qinfo { |
|
552
|
0
|
|
|
0
|
1
|
|
my ($rel,$coldb,%opts) = @_; |
|
553
|
0
|
|
|
|
|
|
my ($q1strs,$q2strs,$qxstrs,$fstrs) = $rel->qinfoData($coldb,%opts); |
|
554
|
|
|
|
|
|
|
|
|
555
|
0
|
|
|
|
|
|
my @qstrs = (@$q1strs, @$q2strs, @$qxstrs); |
|
556
|
0
|
0
|
|
|
|
|
@qstrs = ('*') if (!@qstrs); |
|
557
|
0
|
0
|
|
|
|
|
my $qstr = ('('.join(' WITH ', @qstrs).') =1' |
|
558
|
|
|
|
|
|
|
.' #SEPARATE' |
|
559
|
|
|
|
|
|
|
.(@$fstrs ? (' '.join(' ',@$fstrs)) : ''), |
|
560
|
|
|
|
|
|
|
); |
|
561
|
|
|
|
|
|
|
return { |
|
562
|
0
|
|
|
|
|
|
fcoef => 1, |
|
563
|
|
|
|
|
|
|
qtemplate => $qstr, |
|
564
|
|
|
|
|
|
|
qcanon => $rel->qcanon($coldb,%opts), |
|
565
|
|
|
|
|
|
|
}; |
|
566
|
|
|
|
|
|
|
} |
|
567
|
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
##============================================================================== |
|
569
|
|
|
|
|
|
|
## Pacakge Alias(es) |
|
570
|
|
|
|
|
|
|
package DiaColloDB::Unigrams; |
|
571
|
1
|
|
|
1
|
|
8
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
37
|
|
|
572
|
|
|
|
|
|
|
our @ISA = qw(DiaColloDB::Relation::Unigrams); |
|
573
|
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
##============================================================================== |
|
575
|
|
|
|
|
|
|
## Footer |
|
576
|
|
|
|
|
|
|
1; |
|
577
|
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
__END__ |