line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
## -*- Mode: CPerl -*- |
2
|
|
|
|
|
|
|
## |
3
|
|
|
|
|
|
|
## File: DiaColloDB::Upgrade::v0_12_sliceN.pm |
4
|
|
|
|
|
|
|
## Author: Bryan Jurish <moocow@cpan.org> |
5
|
|
|
|
|
|
|
## Description: DiaColloDB utilities: auto-magic upgrade: v0.11.x -> v0.12.x: allow slice-wise N |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
package DiaColloDB::Upgrade::v0_12_sliceN; |
8
|
1
|
|
|
1
|
|
7
|
use DiaColloDB::Upgrade::Base; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
27
|
|
9
|
1
|
|
|
1
|
|
340
|
use DiaColloDB::Compat::v0_11; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
28
|
|
10
|
1
|
|
|
1
|
|
6
|
use DiaColloDB::Utils qw(:pack :env :run :file :pdl); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
28
|
|
11
|
1
|
|
|
1
|
|
275
|
use version; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
3
|
|
12
|
1
|
|
|
1
|
|
47
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
1302
|
|
13
|
|
|
|
|
|
|
our @ISA = qw(DiaColloDB::Upgrade::Base); |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
##============================================================================== |
16
|
|
|
|
|
|
|
## API |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
## $version = $CLASS_OR_OBJECT->toversion() |
19
|
|
|
|
|
|
|
## + returns default target version; default just returns $DiaColloDB::VERSION |
20
|
|
|
|
|
|
|
sub toversion { |
21
|
0
|
|
|
0
|
1
|
|
return '0.12.000'; |
22
|
|
|
|
|
|
|
} |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
## $bool = $CLASS_OR_OBJECT->upgrade() |
25
|
|
|
|
|
|
|
## + performs upgrade |
26
|
|
|
|
|
|
|
sub upgrade { |
27
|
0
|
|
|
0
|
1
|
|
my $up = shift; |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
##-- backup |
30
|
0
|
0
|
|
|
|
|
$up->backup() or return undef; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
##-- read header |
33
|
0
|
|
|
|
|
|
my $dbdir = $up->{dbdir}; |
34
|
0
|
|
|
|
|
|
my $hdr = $up->dbheader(); |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
##-- convert relations: unigrams |
37
|
|
|
|
|
|
|
{ |
38
|
0
|
0
|
|
|
|
|
my $ug = DiaColloDB::Relation::Unigrams->new(base=>"$dbdir/xf", logCompat=>'off') |
39
|
|
|
|
|
|
|
or $up->logconfess("failed to open unigram index $dbdir/xf.*: $!"); |
40
|
0
|
|
|
|
|
|
$up->info("upgrading unigram index $dbdir/xf.*"); |
41
|
0
|
0
|
|
|
|
|
$up->warn("unigram data in $dbdir/xf.* doesn't seem to be v0.11 format; trying to upgrade anyways") |
42
|
|
|
|
|
|
|
if (!$ug->isa('DiaColloDB::Compat::v0_11::Relation::Unigrams')); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
##-- xf: extract total counts by date |
45
|
0
|
|
|
|
|
|
my $r2 = $ug->{r2}; ##-- pf: [$d1,$f1]* @ end2($i1-1)..(end2($i1+1)-1) |
46
|
0
|
|
|
|
|
|
my $packas = $r2->{packas}; |
47
|
0
|
|
|
|
|
|
my ($buf, $d,$f); |
48
|
0
|
|
|
|
|
|
my %fN = qw(); |
49
|
0
|
|
|
|
|
|
for (my $i=0; $i < $ug->{size2}; ++$i) { |
50
|
0
|
|
|
|
|
|
$r2->read(\$buf); |
51
|
0
|
|
|
|
|
|
($d,$f) = unpack($packas,$buf); |
52
|
0
|
|
|
|
|
|
$fN{$d} += $f; |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
##-- xf: create $rN by date |
56
|
0
|
|
|
|
|
|
my @dates = sort {$a<=>$b} keys %fN; |
|
0
|
|
|
|
|
|
|
57
|
0
|
|
|
|
|
|
my $ymin = $dates[0]; |
58
|
0
|
|
|
|
|
|
my $rN = $ug->{rN} = DiaColloDB::PackedFile->new(file=>"$dbdir/xf.dbaN", flags=>'rw', perms=>$ug->{perms}, packas=>"$ug->{pack_f}"); |
59
|
0
|
|
|
|
|
|
$rN->store(($_-$ymin)=>$fN{$_}) foreach (@dates); |
60
|
0
|
|
|
|
|
|
$rN->flush(); |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
##-- xf: update header |
63
|
0
|
|
|
|
|
|
$ug->{ymin} = $ymin; |
64
|
0
|
|
|
|
|
|
$ug->{sizeN} = $rN->size; |
65
|
0
|
|
|
|
|
|
$ug->{version} = $up->toversion; |
66
|
0
|
0
|
|
|
|
|
$ug->saveHeader() |
67
|
|
|
|
|
|
|
or $up->logconfess("failed to save new unigram index header $dbdir/xf.hdr"); |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
##-- convert relations: cofreqs |
71
|
|
|
|
|
|
|
{ |
72
|
0
|
0
|
|
|
|
|
my $cof = DiaColloDB::Relation::Cofreqs->new(base=>"$dbdir/cof", logCompat=>'off') |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
or $up->logconfess("failed to open co-frequency index $dbdir/cof.*: $!"); |
74
|
0
|
|
|
|
|
|
$up->info("upgrading co-frequency index $dbdir/cof.*"); |
75
|
0
|
0
|
|
|
|
|
$up->warn("co-frequency data in $dbdir/cof.* doesn't seem to be v0.11 format; trying to upgrade anyways") |
76
|
|
|
|
|
|
|
if (!$cof->isa('DiaColloDB::Compat::v0_11::Relation::Cofreqs')); |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
##-- cof: extract total counts by date |
79
|
0
|
|
|
|
|
|
my $r2 = $cof->{r2}; ##-- pf: [$end3,$d1,$f1]* @ end2($i1-1)..(end2($i1+1)-1) |
80
|
0
|
|
|
|
|
|
my $packas = $r2->{packas}; |
81
|
0
|
|
|
|
|
|
my ($buf, $end3,$d,$f); |
82
|
0
|
|
|
|
|
|
my %fN = qw(); |
83
|
0
|
|
|
|
|
|
for (my $i=0; $i < $cof->{size2}; ++$i) { |
84
|
0
|
|
|
|
|
|
$r2->read(\$buf); |
85
|
0
|
|
|
|
|
|
($end3,$d,$f) = unpack($packas,$buf); |
86
|
0
|
|
|
|
|
|
$fN{$d} += $f; |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
##-- cof: create $rN by date |
90
|
0
|
|
|
|
|
|
my @dates = sort {$a<=>$b} keys %fN; |
|
0
|
|
|
|
|
|
|
91
|
0
|
|
|
|
|
|
my $ymin = $dates[0]; |
92
|
0
|
|
|
|
|
|
my $rN = $cof->{rN} = DiaColloDB::PackedFile->new(file=>"$dbdir/cof.dbaN", flags=>'rw', perms=>$cof->{perms}, packas=>"$cof->{pack_f}"); |
93
|
0
|
|
|
|
|
|
$rN->store(($_-$ymin)=>$fN{$_}) foreach (@dates); |
94
|
0
|
|
|
|
|
|
$rN->flush(); |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
##-- cof: update header |
97
|
0
|
|
|
|
|
|
$cof->{ymin} = $ymin; |
98
|
0
|
|
|
|
|
|
$cof->{sizeN} = $rN->size; |
99
|
0
|
|
|
|
|
|
$cof->{version} = $up->toversion; |
100
|
0
|
0
|
|
|
|
|
$cof->saveHeader() |
101
|
|
|
|
|
|
|
or $up->logconfess("failed to save new co-frequency index header $dbdir/cof.hdr"); |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
##-- convert relations: tdf |
105
|
0
|
0
|
0
|
|
|
|
if ($hdr->{index_tdf} && -r "$dbdir/tdf.hdr") { |
106
|
0
|
|
|
|
|
|
DiaColloDB::Compat->usecompat('v0_11::Relation::TDF'); |
107
|
0
|
0
|
|
|
|
|
my $vs = DiaColloDB::Relation::TDF->new(base=>"$dbdir/tdf", logCompat=>'off') |
108
|
|
|
|
|
|
|
or $up->logconfess("failed to open (term x document) index $dbdir/tdf.*: $!"); |
109
|
0
|
|
|
|
|
|
$up->info("upgrading (term x document) index $dbdir/tdf.*"); |
110
|
0
|
0
|
|
|
|
|
$up->warn("term-document data in $dbdir/tdf.* doesn't seem to be v0.11 format; trying to upgrade anyways") |
111
|
|
|
|
|
|
|
if (!$vs->isa('DiaColloDB::Compat::v0_11::Relation::TDF')); |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
##-- tdf: create {yf}, {ymin} |
114
|
0
|
|
|
|
|
|
my ($cf,$c2date) = @$vs{qw(cf c2date)}; |
115
|
0
|
|
|
|
|
|
my ($ymin,$ymax) = $c2date->minmax; |
116
|
0
|
|
|
|
|
|
my $NY = $ymax-$ymin+1; |
117
|
0
|
|
|
|
|
|
$cf->indadd( ($c2date-$ymin), my $yf=mmzeroes("$dbdir/tdf.d/yf.pdl",$vs->vtype,$NY) ); |
118
|
0
|
|
|
|
|
|
undef $yf; |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
##-- tdf: update header |
121
|
0
|
|
|
|
|
|
$vs->{y0} = $ymin; |
122
|
0
|
|
|
|
|
|
$vs->{version} = $up->toversion; |
123
|
0
|
0
|
|
|
|
|
$vs->saveHeader() |
124
|
|
|
|
|
|
|
or $up->logconfess("failed to save new (term x document) index header $dbdir/tdf.hdr"); |
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
##-- cleanup |
128
|
0
|
|
|
|
|
|
if (0 && !$up->{keep}) { |
129
|
|
|
|
|
|
|
$up->info("removing temporary file(s)"); |
130
|
|
|
|
|
|
|
} |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
##-- update header |
133
|
0
|
|
|
|
|
|
return $up->updateHeader(); |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
##============================================================================== |
137
|
|
|
|
|
|
|
## Backup & Revert |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
## $bool = $up->backup() |
140
|
|
|
|
|
|
|
## + perform backup any files we expect to change to $up->backupdir() |
141
|
|
|
|
|
|
|
sub backup { |
142
|
0
|
|
|
0
|
1
|
|
my $up = shift; |
143
|
0
|
0
|
|
|
|
|
$up->SUPER::backup() or return undef; |
144
|
0
|
0
|
|
|
|
|
return 1 if (!$up->{backup}); |
145
|
|
|
|
|
|
|
|
146
|
0
|
|
|
|
|
|
my $dbdir = $up->{dbdir}; |
147
|
0
|
|
|
|
|
|
my $hdr = $up->dbheader; |
148
|
0
|
|
|
|
|
|
my $backd = $up->backupdir; |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
##-- backup: top-level |
151
|
0
|
|
|
|
|
|
foreach my $base (map {"$dbdir/$_"} qw(xf cof tdf)) { |
|
0
|
|
|
|
|
|
|
152
|
0
|
|
|
|
|
|
$up->info("backing up $base.*"); |
153
|
0
|
0
|
|
|
|
|
copyto_a([grep {-e $_} map {($_,"$_.hdr")} ($base,"$base.dbaN")], $backd) |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
or $up->logconfess("backup failed for $base.*: $!"); |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
##-- backup: tdf.d |
158
|
0
|
|
|
|
|
|
foreach my $base (map {"$dbdir/tdf.d/$_"} qw(yf.pdl)) { |
|
0
|
|
|
|
|
|
|
159
|
0
|
|
|
|
|
|
$up->info("backing up $base.*"); |
160
|
0
|
0
|
|
|
|
|
copyto_a([grep {-e $_} map {($_,"$_.hdr")} ($base)], "$backd/tdf.d") |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
or $up->logconfess("backup failed for $base*: $!"); |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
0
|
|
|
|
|
|
return 1; |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
## @files = $up->revert_created() |
168
|
|
|
|
|
|
|
## + returns list of files created by this upgrade, for use with default revert() implementation |
169
|
|
|
|
|
|
|
sub revert_created { |
170
|
0
|
|
|
0
|
1
|
|
my $up = shift; |
171
|
0
|
|
|
|
|
|
my $hdr = $up->dbheader; |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
return ( |
174
|
|
|
|
|
|
|
##-- unigrams |
175
|
0
|
|
|
|
|
|
(grep {!-e $up->backupdir."/$_"} map {"xf.$_"} qw(dbaN dbaN.hdr)), |
|
0
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
##-- cofreqs |
178
|
0
|
|
|
|
|
|
(grep {!-e $up->backupdir."/$_"} map {"cof.$_"} qw(dbaN dbaN.hdr)), |
|
0
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
##-- tdf |
181
|
0
|
0
|
|
|
|
|
(grep {-e "$up->{dbdir}/$_" && !-e $up->backupdir."/$_"} map {"tdf.d/$_"} qw(yf.pdl yf.pdl.hdr)), |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
##-- header |
184
|
|
|
|
|
|
|
#'header.json', |
185
|
|
|
|
|
|
|
); |
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
## @files = $up->revert_updated() |
189
|
|
|
|
|
|
|
## + returns list of files updated by this upgrade, for use with default revert() implementation |
190
|
|
|
|
|
|
|
sub revert_updated { |
191
|
0
|
|
|
0
|
1
|
|
my $up = shift; |
192
|
0
|
|
|
|
|
|
my $hdr = $up->dbheader; |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
return ( |
195
|
|
|
|
|
|
|
##-- unigrams |
196
|
|
|
|
|
|
|
"xf.hdr", |
197
|
0
|
|
|
|
|
|
(grep {-e $up->backupdir."/$_"} map {"xf.$_"} qw(dbaN dbaN.hdr)), |
|
0
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
##-- cofreqs |
200
|
|
|
|
|
|
|
"cof.hdr", |
201
|
0
|
|
|
|
|
|
(grep {-e $up->backupdir."/$_"} map {"cof.$_"} qw(dbaN dbaN.hdr)), |
|
0
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
##-- tdf |
204
|
0
|
|
|
|
|
|
(grep {-e $up->backupdir."/$_"} map {"tdf.$_"} qw(hdr d/yf.pdl d/yf.pdl.hdr)), |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
##-- header |
207
|
|
|
|
|
|
|
'header.json', |
208
|
|
|
|
|
|
|
); |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
##============================================================================== |
213
|
|
|
|
|
|
|
## Footer |
214
|
|
|
|
|
|
|
1; ##-- be happy |