line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
## -*- Mode: CPerl -*- |
2
|
|
|
|
|
|
|
## File: DiaColloDB::Compat::v0_11::Relation::Unigrams.pm |
3
|
|
|
|
|
|
|
## Author: Bryan Jurish <moocow@cpan.org> |
4
|
|
|
|
|
|
|
## Description: collocation db, profiling relation: unigram database (using DiaColloDB::PackedFile) |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
package DiaColloDB::Compat::v0_11::Relation::Unigrams; |
7
|
1
|
|
|
1
|
|
7
|
use DiaColloDB::Relation; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
31
|
|
8
|
1
|
|
|
1
|
|
6
|
use DiaColloDB::PackedFile; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
23
|
|
9
|
1
|
|
|
1
|
|
5
|
use DiaColloDB::Utils qw(:fcntl :sort :env :run :pack :file); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
36
|
|
10
|
1
|
|
|
1
|
|
373
|
use Fcntl qw(:DEFAULT :seek); |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
33
|
|
11
|
1
|
|
|
1
|
|
451
|
use File::Basename qw(dirname); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
18
|
|
12
|
1
|
|
|
1
|
|
118
|
use version; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
8
|
|
13
|
1
|
|
|
1
|
|
64
|
use strict; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
687
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
##============================================================================== |
16
|
|
|
|
|
|
|
## Globals & Constants |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
our @ISA = qw(DiaColloDB::Relation::Unigrams DiaColloDB::Compat); |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
##============================================================================== |
21
|
|
|
|
|
|
|
## Constructors etc. |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
## $ug = CLASS_OR_OBJECT->new(%args) |
24
|
|
|
|
|
|
|
## + %args, object structure: |
25
|
|
|
|
|
|
|
## ( |
26
|
|
|
|
|
|
|
## ##-- user options |
27
|
|
|
|
|
|
|
## base => $basename, ##-- file basename (default=undef:none); use files "${base}.dba1", "${base}.dba2", "${base}.hdr" |
28
|
|
|
|
|
|
|
## flags => $flags, ##-- fcntl flags or open-mode (default='r') |
29
|
|
|
|
|
|
|
## perms => $perms, ##-- creation permissions (default=(0666 &~umask)) |
30
|
|
|
|
|
|
|
## pack_i => $pack_i, ##-- pack-template for IDs (default='N') |
31
|
|
|
|
|
|
|
## pack_f => $pack_f, ##-- pack-template for frequencies (default='N') |
32
|
|
|
|
|
|
|
## pack_d => $pack_d, ##-- pack-tempalte for dates (default='n') |
33
|
|
|
|
|
|
|
## keeptmp => $bool, ##-- keep temporary files? (default=false) |
34
|
|
|
|
|
|
|
## mmap => $bool, ##-- use mmap access? (default=true) |
35
|
|
|
|
|
|
|
## logCompat => $level, ##-- log-level for compatibility warnings (default='warn') |
36
|
|
|
|
|
|
|
## ## |
37
|
|
|
|
|
|
|
## ##-- size info (after open() or load()) |
38
|
|
|
|
|
|
|
## size1 => $size1, ##-- == $r1->size() |
39
|
|
|
|
|
|
|
## size2 => $size2, ##-- == $r2->size() |
40
|
|
|
|
|
|
|
## ## |
41
|
|
|
|
|
|
|
## ##-- low-level data |
42
|
|
|
|
|
|
|
## r1 => $r1, ##-- pf: [$end2] @ $i1 : constant (logical index) |
43
|
|
|
|
|
|
|
## r2 => $r2, ##-- pf: [$d1,$f1]* @ end2($i1-1)..(end2($i1+1)-1) : sorted by $d1 for each $i1 |
44
|
|
|
|
|
|
|
## N => $N, ##-- sum($f1) |
45
|
|
|
|
|
|
|
## version => $version, ##-- file version, for compatibility checks |
46
|
|
|
|
|
|
|
## ) |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
## inherited |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
##============================================================================== |
51
|
|
|
|
|
|
|
## Persistent API: disk usage |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# inherited |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
##============================================================================== |
56
|
|
|
|
|
|
|
## I/O |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
59
|
|
|
|
|
|
|
## I/O: open/close |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
## $ug_or_undef = $ug->open($base,$flags) |
62
|
|
|
|
|
|
|
## $ug_or_undef = $ug->open($base) |
63
|
|
|
|
|
|
|
## $ug_or_undef = $ug->open() |
64
|
|
|
|
|
|
|
sub open { |
65
|
0
|
|
|
0
|
1
|
|
my ($ug,$base,$flags) = @_; |
66
|
0
|
|
0
|
|
|
|
$base //= $ug->{base}; |
67
|
0
|
|
0
|
|
|
|
$flags //= $ug->{flags}; |
68
|
0
|
0
|
|
|
|
|
$ug->close() if ($ug->opened); |
69
|
0
|
|
|
|
|
|
$ug->{base} = $base; |
70
|
0
|
|
|
|
|
|
$ug->{flags} = $flags = fcflags($flags); |
71
|
0
|
|
|
|
|
|
my ($hdr); ##-- save header, for version-checking |
72
|
0
|
0
|
0
|
|
|
|
if (fcread($flags) && !fctrunc($flags)) { |
73
|
0
|
0
|
0
|
|
|
|
$hdr = ($ug->readHeader() || $ug->readHeader("$ug->{base}.dba.hdr")) |
74
|
|
|
|
|
|
|
or $ug->logconfess("failed to read header data from '$ug->{base}.hdr': $!"); |
75
|
0
|
0
|
|
|
|
|
$ug->loadHeaderData($hdr) |
76
|
|
|
|
|
|
|
or $ug->logconess("failed to load header data from '$ug->{base}.hdr': $!"); |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
##-- check compatibility |
80
|
0
|
|
|
|
|
|
my $min_version = qv(0.10.000); |
81
|
0
|
0
|
0
|
|
|
|
if ($hdr && (!defined($hdr->{version}) || version->parse($hdr->{version}) < $min_version)) { |
|
|
|
0
|
|
|
|
|
82
|
0
|
|
|
|
|
|
$ug->vlog($ug->{logCompat}, "using v0.09 compatibility mode for $ug->{base}.*; consider running \`dcdb-upgrade.perl ", dirname($ug->{base}), "\'"); |
83
|
0
|
|
|
|
|
|
DiaColloDB::Compat->usecompat('v0_09'); |
84
|
0
|
|
|
|
|
|
bless($ug, 'DiaColloDB::Compat::v0_09::Relation::Unigrams'); |
85
|
0
|
|
|
|
|
|
$ug->{version} = $hdr->{version}; |
86
|
0
|
|
|
|
|
|
return $ug->open("$base.dba",$flags); |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
##-- open low-level data structures |
90
|
0
|
0
|
|
|
|
|
$ug->{r1}->open("$base.dba1", $flags, perms=>$ug->{perms}, packas=>"$ug->{pack_i}") |
91
|
|
|
|
|
|
|
or $ug->logconfess("open failed for $base.dba1: $!"); |
92
|
0
|
0
|
|
|
|
|
$ug->{r2}->open("$base.dba2", $flags, perms=>$ug->{perms}, packas=>"$ug->{pack_d}$ug->{pack_f}") |
93
|
|
|
|
|
|
|
or $ug->logconfess("open failed for $base.dba2: $!"); |
94
|
0
|
|
|
|
|
|
$ug->{size1} = $ug->{r1}->size; |
95
|
0
|
|
|
|
|
|
$ug->{size2} = $ug->{r2}->size; |
96
|
|
|
|
|
|
|
|
97
|
0
|
|
|
|
|
|
return $ug; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
## $ug_or_undef = $ug->close() |
101
|
|
|
|
|
|
|
sub close { |
102
|
0
|
|
|
0
|
1
|
|
my $ug = shift; |
103
|
0
|
0
|
0
|
|
|
|
if ($ug->opened && fcwrite($ug->{flags})) { |
104
|
0
|
0
|
|
|
|
|
$ug->saveHeader() or return undef; |
105
|
|
|
|
|
|
|
} |
106
|
0
|
0
|
|
|
|
|
$ug->{r1}->close() or return undef; |
107
|
0
|
0
|
|
|
|
|
$ug->{r2}->close() or return undef; |
108
|
0
|
|
|
|
|
|
undef $ug->{base}; |
109
|
0
|
|
|
|
|
|
return $ug; |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
## $bool = $ug->opened() |
113
|
|
|
|
|
|
|
sub opened { |
114
|
0
|
|
|
0
|
1
|
|
my $ug = shift; |
115
|
|
|
|
|
|
|
return |
116
|
|
|
|
|
|
|
(defined($ug->{base}) |
117
|
|
|
|
|
|
|
&& defined($ug->{r1}) && $ug->{r1}->opened |
118
|
|
|
|
|
|
|
&& defined($ug->{r2}) && $ug->{r2}->opened |
119
|
0
|
|
0
|
|
|
|
); |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
123
|
|
|
|
|
|
|
## I/O: header |
124
|
|
|
|
|
|
|
## + inherited |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
127
|
|
|
|
|
|
|
## I/O: text |
128
|
|
|
|
|
|
|
## + inherited |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
## $bool = $obj->loadTextFile($filename_or_handle, %opts) |
131
|
|
|
|
|
|
|
## + wraps loadTextFh() |
132
|
|
|
|
|
|
|
## + INHERITED from DiaColloDB::Persistent |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
## $ug = $ug->loadTextFh($fh,%opts) |
135
|
|
|
|
|
|
|
## + loads from text file as saved by saveTextFh() |
136
|
|
|
|
|
|
|
## + input fh must be sorted by $i1,$d1 |
137
|
|
|
|
|
|
|
## + supports multiple lines for pairs ($i1,$d1) provided the above conditions hold |
138
|
|
|
|
|
|
|
## + supports loading of $ug->{N} from single-component lines |
139
|
|
|
|
|
|
|
## + %opts: clobber %$ug |
140
|
|
|
|
|
|
|
*loadTextFh = __PACKAGE__->nocompat('loadTextFh'); |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
## $bool = $obj->saveTextFile($filename_or_handle, %opts) |
143
|
|
|
|
|
|
|
## + wraps saveTextFh() |
144
|
|
|
|
|
|
|
## + INHERITED from DiaColloDB::Persistent |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
## $bool = $ug->saveTextFh($fh,%opts) |
147
|
|
|
|
|
|
|
## + save from text file with lines of the form: |
148
|
|
|
|
|
|
|
## N ##-- 1 field : N |
149
|
|
|
|
|
|
|
## FREQ ID1 DATE ##-- 3 fields: unigram frequency for (ID1,DATE) |
150
|
|
|
|
|
|
|
## + %opts: |
151
|
|
|
|
|
|
|
## i2s => \&CODE, ##-- code-ref for formatting indices; called as $s=CODE($i) |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
##============================================================================== |
154
|
|
|
|
|
|
|
## Relation API: create, union |
155
|
|
|
|
|
|
|
## + disabled |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
*create = __PACKAGE__->nocompat('create'); |
158
|
|
|
|
|
|
|
*union = __PACKAGE__->nocompat('union'); |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
##============================================================================== |
161
|
|
|
|
|
|
|
## Relation API: dbinfo |
162
|
|
|
|
|
|
|
## + inherited |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
##============================================================================== |
165
|
|
|
|
|
|
|
## Utilities: lookup |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
## $N = $cof->sliceN($slice,$dateLo) |
168
|
|
|
|
|
|
|
## + get total slice co-occurrence count (compatible wrapper uses constant $cof->{N} for all slices) |
169
|
|
|
|
|
|
|
sub sliceN { |
170
|
|
|
|
|
|
|
#my ($cof,$slice,$dlo) = @_; |
171
|
0
|
|
|
0
|
1
|
|
return $_[0]{N}; |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
##============================================================================== |
175
|
|
|
|
|
|
|
## Relation API: default |
176
|
|
|
|
|
|
|
## + inherited |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
##============================================================================== |
179
|
|
|
|
|
|
|
## Footer |
180
|
|
|
|
|
|
|
1; |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
__END__ |