line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
## -*- Mode: CPerl -*- |
2
|
|
|
|
|
|
|
## File: DiaColloDB::Compat::v0_11::Relation::Cofreqs.pm |
3
|
|
|
|
|
|
|
## Author: Bryan Jurish <moocow@cpan.org> |
4
|
|
|
|
|
|
|
## Description: collocation db, profiling relation: co-frequency database (using pair of DiaColloDB::PackedFile) |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
package DiaColloDB::Compat::v0_11::Relation::Cofreqs; |
7
|
1
|
|
|
1
|
|
7
|
use DiaColloDB::Compat; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
32
|
|
8
|
1
|
|
|
1
|
|
6
|
use DiaColloDB::Relation::Cofreqs; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
23
|
|
9
|
1
|
|
|
1
|
|
5
|
use DiaColloDB::PackedFile; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
18
|
|
10
|
1
|
|
|
1
|
|
5
|
use DiaColloDB::PackedFile::MMap; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
38
|
|
11
|
1
|
|
|
1
|
|
7
|
use DiaColloDB::Utils qw(:fcntl :env :run :json :pack); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
41
|
|
12
|
1
|
|
|
1
|
|
372
|
use Fcntl qw(:DEFAULT :seek); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
33
|
|
13
|
1
|
|
|
1
|
|
476
|
use File::Basename qw(dirname); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
24
|
|
14
|
1
|
|
|
1
|
|
131
|
use version; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
7
|
|
15
|
1
|
|
|
1
|
|
78
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
803
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
##============================================================================== |
18
|
|
|
|
|
|
|
## Globals & Constants |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
our @ISA = qw(DiaColloDB::Relation::Cofreqs DiaColloDB::Compat); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
##============================================================================== |
23
|
|
|
|
|
|
|
## Constructors etc. |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
## $cof = CLASS_OR_OBJECT->new(%args) |
26
|
|
|
|
|
|
|
## + %args, object structure: |
27
|
|
|
|
|
|
|
## ( |
28
|
|
|
|
|
|
|
## ##-- user options |
29
|
|
|
|
|
|
|
## class => $class, ##-- optional, useful for debugging from header file |
30
|
|
|
|
|
|
|
## base => $basename, ##-- file basename (default=undef:none); use files "${base}.dba1", "${base}.dba2", "${base}.dba3", "${base}.hdr" |
31
|
|
|
|
|
|
|
## flags => $flags, ##-- fcntl flags or open-mode (default='r') |
32
|
|
|
|
|
|
|
## perms => $perms, ##-- creation permissions (default=(0666 &~umask)) |
33
|
|
|
|
|
|
|
## dmax => $dmax, ##-- maximum distance for co-occurrences (default=5) |
34
|
|
|
|
|
|
|
## fmin => $fmin, ##-- minimum pair frequency (default=0) |
35
|
|
|
|
|
|
|
## pack_i => $pack_i, ##-- pack-template for IDs (default='N') |
36
|
|
|
|
|
|
|
## pack_f => $pack_f, ##-- pack-template for frequencies (default='N') |
37
|
|
|
|
|
|
|
## pack_d => $pack_d, ##-- pack-tempalte for dates (default='n') |
38
|
|
|
|
|
|
|
## keeptmp => $bool, ##-- keep temporary files? (default=false) |
39
|
|
|
|
|
|
|
## logCompat => $level, ##-- log-level for compatibility warnings (default='warn') |
40
|
|
|
|
|
|
|
## ## |
41
|
|
|
|
|
|
|
## ##-- size info (after open() or load()) |
42
|
|
|
|
|
|
|
## size1 => $size1, ##-- == $r1->size() |
43
|
|
|
|
|
|
|
## size2 => $size2, ##-- == $r2->size() |
44
|
|
|
|
|
|
|
## size3 => $size3, ##-- == $r3->size() |
45
|
|
|
|
|
|
|
## sizeN => $sizeN, ##-- == $rN->size() |
46
|
|
|
|
|
|
|
## ## |
47
|
|
|
|
|
|
|
## ##-- low-level data |
48
|
|
|
|
|
|
|
## r1 => $r1, ##-- pf: [$end2] @ $i1 : constant (logical index) |
49
|
|
|
|
|
|
|
## r2 => $r2, ##-- pf: [$end3,$d1,$f1]* @ end2($i1-1)..(end2($i1+1)-1) : sorted by $d1 for each $i1 |
50
|
|
|
|
|
|
|
## r3 => $r3, ##-- pf: [$i2,$f12]* @ end3($d1-1)..(end3($d1+1)-1) : sorted by $i2 for each ($i1,$d1) |
51
|
|
|
|
|
|
|
## rN => $rN, ##-- pf: [$fN] @ $date - $coldb->{xdmin} : totals by date |
52
|
|
|
|
|
|
|
## N => $N, ##-- sum($f12) [only used for version <= 0.11; thereafter replaced by rN] |
53
|
|
|
|
|
|
|
## version => $version, ##-- file version, for compatibility checks |
54
|
|
|
|
|
|
|
## ) |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
#inherited |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
##============================================================================== |
59
|
|
|
|
|
|
|
## I/O |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
62
|
|
|
|
|
|
|
## I/O: open/close |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
## $cof_or_undef = $cof->open($base,$flags) |
65
|
|
|
|
|
|
|
## $cof_or_undef = $cof->open($base) |
66
|
|
|
|
|
|
|
## $cof_or_undef = $cof->open() |
67
|
|
|
|
|
|
|
sub open { |
68
|
0
|
|
|
0
|
1
|
|
my ($cof,$base,$flags) = @_; |
69
|
0
|
|
0
|
|
|
|
$base //= $cof->{base}; |
70
|
0
|
|
0
|
|
|
|
$flags //= $cof->{flags}; |
71
|
0
|
0
|
|
|
|
|
$cof->close() if ($cof->opened); |
72
|
0
|
|
|
|
|
|
$cof->{base} = $base; |
73
|
0
|
|
|
|
|
|
$cof->{flags} = $flags = fcflags($flags); |
74
|
0
|
|
|
|
|
|
my ($hdr); ##-- save header, for version-checking |
75
|
0
|
0
|
0
|
|
|
|
if (fcread($flags) && !fctrunc($flags)) { |
76
|
0
|
0
|
|
|
|
|
$hdr = $cof->readHeader() |
77
|
|
|
|
|
|
|
or $cof->logconess("failed to read header data from '$cof->{base}.hdr': $!"); |
78
|
0
|
0
|
|
|
|
|
$cof->loadHeaderData($hdr) |
79
|
|
|
|
|
|
|
or $cof->logconess("failed to load header data from '$cof->{base}.hdr': $!"); |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
##-- check compatibility |
83
|
0
|
|
|
|
|
|
my $min_version = qv(0.10.000); |
84
|
0
|
0
|
0
|
|
|
|
if ($hdr && (!defined($hdr->{version}) || version->parse($hdr->{version}) < $min_version)) { |
|
|
|
0
|
|
|
|
|
85
|
0
|
|
|
|
|
|
$cof->vlog($cof->{logCompat}, "using v0.09 compatibility mode for $cof->{base}.*; consider running \`dcdb-upgrade.perl ", dirname($cof->{base}), "\'"); |
86
|
0
|
|
|
|
|
|
DiaColloDB::Compat->usecompat('v0_09'); |
87
|
0
|
|
|
|
|
|
bless($cof, 'DiaColloDB::Compat::v0_09::Relation::Cofreqs'); |
88
|
0
|
|
|
|
|
|
$cof->{version} = $hdr->{version}; |
89
|
0
|
|
|
|
|
|
return $cof->open($base,$flags); |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
##-- open low-level data structures |
93
|
0
|
0
|
|
|
|
|
$cof->{r1}->open("$base.dba1", $flags, perms=>$cof->{perms}, packas=>"$cof->{pack_i}") |
94
|
|
|
|
|
|
|
or $cof->logconfess("open failed for $base.dba1: $!"); |
95
|
0
|
0
|
|
|
|
|
$cof->{r2}->open("$base.dba2", $flags, perms=>$cof->{perms}, packas=>"$cof->{pack_i}$cof->{pack_d}$cof->{pack_f}") |
96
|
|
|
|
|
|
|
or $cof->logconfess("open failed for $base.dba2: $!"); |
97
|
0
|
0
|
|
|
|
|
$cof->{r3}->open("$base.dba3", $flags, perms=>$cof->{perms}, packas=>"$cof->{pack_i}$cof->{pack_f}") |
98
|
|
|
|
|
|
|
or $cof->logconfess("open failed for $base.dba3: $!"); |
99
|
0
|
|
|
|
|
|
$cof->{size1} = $cof->{r1}->size; |
100
|
0
|
|
|
|
|
|
$cof->{size2} = $cof->{r2}->size; |
101
|
0
|
|
|
|
|
|
$cof->{size3} = $cof->{r3}->size; |
102
|
|
|
|
|
|
|
|
103
|
0
|
|
|
|
|
|
return $cof; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
## $cof_or_undef = $cof->close() |
107
|
|
|
|
|
|
|
sub close { |
108
|
0
|
|
|
0
|
1
|
|
my $cof = shift; |
109
|
0
|
0
|
0
|
|
|
|
if ($cof->opened && fcwrite($cof->{flags})) { |
110
|
0
|
0
|
|
|
|
|
$cof->saveHeader() or return undef; |
111
|
|
|
|
|
|
|
} |
112
|
0
|
0
|
|
|
|
|
$cof->{r1}->close() or return undef; |
113
|
0
|
0
|
|
|
|
|
$cof->{r2}->close() or return undef; |
114
|
0
|
0
|
|
|
|
|
$cof->{r3}->close() or return undef; |
115
|
0
|
|
|
|
|
|
undef $cof->{base}; |
116
|
0
|
|
|
|
|
|
return $cof; |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
## $bool = $cof->opened() |
120
|
|
|
|
|
|
|
sub opened { |
121
|
0
|
|
|
0
|
1
|
|
my $cof = shift; |
122
|
|
|
|
|
|
|
return |
123
|
|
|
|
|
|
|
(defined($cof->{base}) |
124
|
|
|
|
|
|
|
&& defined($cof->{r1}) && $cof->{r1}->opened |
125
|
|
|
|
|
|
|
&& defined($cof->{r2}) && $cof->{r2}->opened |
126
|
|
|
|
|
|
|
&& defined($cof->{r3}) && $cof->{r3}->opened |
127
|
0
|
|
0
|
|
|
|
); |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
131
|
|
|
|
|
|
|
## I/O: header |
132
|
|
|
|
|
|
|
## + inherited |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
135
|
|
|
|
|
|
|
## I/O: text |
136
|
|
|
|
|
|
|
## + mostly inherited |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
## $bool = $obj->loadTextFile($filename_or_handle, %opts) |
139
|
|
|
|
|
|
|
## + wraps loadTextFh() |
140
|
|
|
|
|
|
|
## + INHERITED from DiaColloDB::Persistent |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
## $cof = $cof->loadTextFh($fh,%opts) |
143
|
|
|
|
|
|
|
## + loads from text file as saved by saveTextFh(): |
144
|
|
|
|
|
|
|
## N ##-- 1 field : N |
145
|
|
|
|
|
|
|
## FREQ ID1 DATE ##-- 3 fields: un-collocated portion of $f1 |
146
|
|
|
|
|
|
|
## FREQ ID1 DATE ID2 ##-- 4 fields: co-frequency pair (ID2 >= 0) |
147
|
|
|
|
|
|
|
## FREQ ID1 DATE ID2 DATE2 ##-- 5 fields: redundant date (used by create(); DATE2 is ignored) |
148
|
|
|
|
|
|
|
## + supports semi-sorted input: input fh must be sorted by $i1,$d1 |
149
|
|
|
|
|
|
|
## and all $i2 for each $i1,$d1 must be adjacent (i.e. no intervening ($j1,$e1) with $j1 != $i1 or $e1 != $d1) |
150
|
|
|
|
|
|
|
## + supports multiple lines for pairs ($i1,$d1,$i2) provided the above conditions hold |
151
|
|
|
|
|
|
|
## + supports loading of $cof->{N} from single-value lines |
152
|
|
|
|
|
|
|
## + %opts: clobber %$cof |
153
|
|
|
|
|
|
|
*loadTextFh = __PACKAGE__->nocompat('loadTextFh'); |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
## $bool = $cof->saveTextFh($fh,%opts) |
156
|
|
|
|
|
|
|
## + save from text file with lines of the form: |
157
|
|
|
|
|
|
|
## N ##-- 1 field : N |
158
|
|
|
|
|
|
|
## FREQ ID1 DATE ##-- 3 fields: un-collocated portion of $f1 |
159
|
|
|
|
|
|
|
## FREQ ID1 DATE ID2 ##-- 4 fields: co-frequency pair (ID2 >= 0) |
160
|
|
|
|
|
|
|
## + %opts: |
161
|
|
|
|
|
|
|
## i2s => \&CODE, ##-- code-ref for formatting indices; called as $s=CODE($i) |
162
|
|
|
|
|
|
|
## i2s1 => \&CODE, ##-- code-ref for formatting item1 indices (overrides 'i2s') |
163
|
|
|
|
|
|
|
## i2s2 => \&CODE, ##-- code-ref for formatting item2 indices (overrides 'i2s') |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
##============================================================================== |
166
|
|
|
|
|
|
|
## Relation API: create, union |
167
|
|
|
|
|
|
|
## + disabled |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
*create = __PACKAGE__->nocompat('create'); |
170
|
|
|
|
|
|
|
*union = __PACKAGE__->nocompat('union'); |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
##============================================================================== |
173
|
|
|
|
|
|
|
## Relation API: dbinfo |
174
|
|
|
|
|
|
|
## + inherited |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
##============================================================================== |
177
|
|
|
|
|
|
|
## Utilities: lookup |
178
|
|
|
|
|
|
|
## + mostly BROKEN in v0.10.000 (x(+date)->t(-date) db tuples) |
179
|
|
|
|
|
|
|
## + inherited |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
## $N = $cof->sliceN($slice,$dateLo) |
182
|
|
|
|
|
|
|
## + get total slice co-occurrence count (compatible wrapper uses constant $cof->{N} for all slices) |
183
|
|
|
|
|
|
|
sub sliceN { |
184
|
|
|
|
|
|
|
#my ($cof,$slice,$dlo) = @_; |
185
|
0
|
|
|
0
|
1
|
|
return $_[0]{N}; |
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
##============================================================================== |
189
|
|
|
|
|
|
|
## Relation API: default |
190
|
|
|
|
|
|
|
## + inherited |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
##============================================================================== |
193
|
|
|
|
|
|
|
## Footer |
194
|
|
|
|
|
|
|
1; |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
__END__ |