| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
## -*- Mode: CPerl -*- |
|
2
|
|
|
|
|
|
|
## |
|
3
|
|
|
|
|
|
|
## File: DiaColloDB::Upgrade::v0_12_sliceN.pm |
|
4
|
|
|
|
|
|
|
## Author: Bryan Jurish <moocow@cpan.org> |
|
5
|
|
|
|
|
|
|
## Description: DiaColloDB utilities: auto-magic upgrade: v0.11.x -> v0.12.x: allow slice-wise N |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
package DiaColloDB::Upgrade::v0_12_sliceN; |
|
8
|
1
|
|
|
1
|
|
10
|
use DiaColloDB::Upgrade::Base; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
35
|
|
|
9
|
1
|
|
|
1
|
|
453
|
use DiaColloDB::Compat::v0_11; |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
40
|
|
|
10
|
1
|
|
|
1
|
|
8
|
use DiaColloDB::Utils qw(:pack :env :run :file :pdl); |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
48
|
|
|
11
|
1
|
|
|
1
|
|
390
|
use version; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
7
|
|
|
12
|
1
|
|
|
1
|
|
71
|
use strict; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
1594
|
|
|
13
|
|
|
|
|
|
|
our @ISA = qw(DiaColloDB::Upgrade::Base); |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
##============================================================================== |
|
16
|
|
|
|
|
|
|
## API |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
## $version = $CLASS_OR_OBJECT->toversion() |
|
19
|
|
|
|
|
|
|
## + returns default target version; default just returns $DiaColloDB::VERSION |
|
20
|
|
|
|
|
|
|
sub toversion { |
|
21
|
0
|
|
|
0
|
1
|
|
return '0.12.000'; |
|
22
|
|
|
|
|
|
|
} |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
## $bool = $CLASS_OR_OBJECT->upgrade() |
|
25
|
|
|
|
|
|
|
## + performs upgrade |
|
26
|
|
|
|
|
|
|
sub upgrade { |
|
27
|
0
|
|
|
0
|
1
|
|
my $up = shift; |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
##-- backup |
|
30
|
0
|
0
|
|
|
|
|
$up->backup() or return undef; |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
##-- read header |
|
33
|
0
|
|
|
|
|
|
my $dbdir = $up->{dbdir}; |
|
34
|
0
|
|
|
|
|
|
my $hdr = $up->dbheader(); |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
##-- convert relations: unigrams |
|
37
|
|
|
|
|
|
|
{ |
|
38
|
0
|
0
|
|
|
|
|
my $ug = DiaColloDB::Relation::Unigrams->new(base=>"$dbdir/xf", logCompat=>'off') |
|
39
|
|
|
|
|
|
|
or $up->logconfess("failed to open unigram index $dbdir/xf.*: $!"); |
|
40
|
0
|
|
|
|
|
|
$up->info("upgrading unigram index $dbdir/xf.*"); |
|
41
|
0
|
0
|
|
|
|
|
$up->warn("unigram data in $dbdir/xf.* doesn't seem to be v0.11 format; trying to upgrade anyways") |
|
42
|
|
|
|
|
|
|
if (!$ug->isa('DiaColloDB::Compat::v0_11::Relation::Unigrams')); |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
##-- xf: extract total counts by date |
|
45
|
0
|
|
|
|
|
|
my $r2 = $ug->{r2}; ##-- pf: [$d1,$f1]* @ end2($i1-1)..(end2($i1+1)-1) |
|
46
|
0
|
|
|
|
|
|
my $packas = $r2->{packas}; |
|
47
|
0
|
|
|
|
|
|
my ($buf, $d,$f); |
|
48
|
0
|
|
|
|
|
|
my %fN = qw(); |
|
49
|
0
|
|
|
|
|
|
for (my $i=0; $i < $ug->{size2}; ++$i) { |
|
50
|
0
|
|
|
|
|
|
$r2->read(\$buf); |
|
51
|
0
|
|
|
|
|
|
($d,$f) = unpack($packas,$buf); |
|
52
|
0
|
|
|
|
|
|
$fN{$d} += $f; |
|
53
|
|
|
|
|
|
|
} |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
##-- xf: create $rN by date |
|
56
|
0
|
|
|
|
|
|
my @dates = sort {$a<=>$b} keys %fN; |
|
|
0
|
|
|
|
|
|
|
|
57
|
0
|
|
|
|
|
|
my $ymin = $dates[0]; |
|
58
|
0
|
|
|
|
|
|
my $rN = $ug->{rN} = DiaColloDB::PackedFile->new(file=>"$dbdir/xf.dbaN", flags=>'rw', perms=>$ug->{perms}, packas=>"$ug->{pack_f}"); |
|
59
|
0
|
|
|
|
|
|
$rN->store(($_-$ymin)=>$fN{$_}) foreach (@dates); |
|
60
|
0
|
|
|
|
|
|
$rN->flush(); |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
##-- xf: update header |
|
63
|
0
|
|
|
|
|
|
$ug->{ymin} = $ymin; |
|
64
|
0
|
|
|
|
|
|
$ug->{sizeN} = $rN->size; |
|
65
|
0
|
|
|
|
|
|
$ug->{version} = $up->toversion; |
|
66
|
0
|
0
|
|
|
|
|
$ug->saveHeader() |
|
67
|
|
|
|
|
|
|
or $up->logconfess("failed to save new unigram index header $dbdir/xf.hdr"); |
|
68
|
|
|
|
|
|
|
} |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
##-- convert relations: cofreqs |
|
71
|
|
|
|
|
|
|
{ |
|
72
|
0
|
0
|
|
|
|
|
my $cof = DiaColloDB::Relation::Cofreqs->new(base=>"$dbdir/cof", logCompat=>'off') |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
or $up->logconfess("failed to open co-frequency index $dbdir/cof.*: $!"); |
|
74
|
0
|
|
|
|
|
|
$up->info("upgrading co-frequency index $dbdir/cof.*"); |
|
75
|
0
|
0
|
|
|
|
|
$up->warn("co-frequency data in $dbdir/cof.* doesn't seem to be v0.11 format; trying to upgrade anyways") |
|
76
|
|
|
|
|
|
|
if (!$cof->isa('DiaColloDB::Compat::v0_11::Relation::Cofreqs')); |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
##-- cof: extract total counts by date |
|
79
|
0
|
|
|
|
|
|
my $r2 = $cof->{r2}; ##-- pf: [$end3,$d1,$f1]* @ end2($i1-1)..(end2($i1+1)-1) |
|
80
|
0
|
|
|
|
|
|
my $packas = $r2->{packas}; |
|
81
|
0
|
|
|
|
|
|
my ($buf, $end3,$d,$f); |
|
82
|
0
|
|
|
|
|
|
my %fN = qw(); |
|
83
|
0
|
|
|
|
|
|
for (my $i=0; $i < $cof->{size2}; ++$i) { |
|
84
|
0
|
|
|
|
|
|
$r2->read(\$buf); |
|
85
|
0
|
|
|
|
|
|
($end3,$d,$f) = unpack($packas,$buf); |
|
86
|
0
|
|
|
|
|
|
$fN{$d} += $f; |
|
87
|
|
|
|
|
|
|
} |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
##-- cof: create $rN by date |
|
90
|
0
|
|
|
|
|
|
my @dates = sort {$a<=>$b} keys %fN; |
|
|
0
|
|
|
|
|
|
|
|
91
|
0
|
|
|
|
|
|
my $ymin = $dates[0]; |
|
92
|
0
|
|
|
|
|
|
my $rN = $cof->{rN} = DiaColloDB::PackedFile->new(file=>"$dbdir/cof.dbaN", flags=>'rw', perms=>$cof->{perms}, packas=>"$cof->{pack_f}"); |
|
93
|
0
|
|
|
|
|
|
$rN->store(($_-$ymin)=>$fN{$_}) foreach (@dates); |
|
94
|
0
|
|
|
|
|
|
$rN->flush(); |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
##-- cof: update header |
|
97
|
0
|
|
|
|
|
|
$cof->{ymin} = $ymin; |
|
98
|
0
|
|
|
|
|
|
$cof->{sizeN} = $rN->size; |
|
99
|
0
|
|
|
|
|
|
$cof->{version} = $up->toversion; |
|
100
|
0
|
0
|
|
|
|
|
$cof->saveHeader() |
|
101
|
|
|
|
|
|
|
or $up->logconfess("failed to save new co-frequency index header $dbdir/cof.hdr"); |
|
102
|
|
|
|
|
|
|
} |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
##-- convert relations: tdf |
|
105
|
0
|
0
|
0
|
|
|
|
if ($hdr->{index_tdf} && -r "$dbdir/tdf.hdr") { |
|
106
|
0
|
|
|
|
|
|
DiaColloDB::Compat->usecompat('v0_11::Relation::TDF'); |
|
107
|
0
|
0
|
|
|
|
|
my $vs = DiaColloDB::Relation::TDF->new(base=>"$dbdir/tdf", logCompat=>'off') |
|
108
|
|
|
|
|
|
|
or $up->logconfess("failed to open (term x document) index $dbdir/tdf.*: $!"); |
|
109
|
0
|
|
|
|
|
|
$up->info("upgrading (term x document) index $dbdir/tdf.*"); |
|
110
|
0
|
0
|
|
|
|
|
$up->warn("term-document data in $dbdir/tdf.* doesn't seem to be v0.11 format; trying to upgrade anyways") |
|
111
|
|
|
|
|
|
|
if (!$vs->isa('DiaColloDB::Compat::v0_11::Relation::TDF')); |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
##-- tdf: create {yf}, {ymin} |
|
114
|
0
|
|
|
|
|
|
my ($cf,$c2date) = @$vs{qw(cf c2date)}; |
|
115
|
0
|
|
|
|
|
|
my ($ymin,$ymax) = $c2date->minmax; |
|
116
|
0
|
|
|
|
|
|
my $NY = $ymax-$ymin+1; |
|
117
|
0
|
|
|
|
|
|
$cf->indadd( ($c2date-$ymin), my $yf=mmzeroes("$dbdir/tdf.d/yf.pdl",$vs->vtype,$NY) ); |
|
118
|
0
|
|
|
|
|
|
undef $yf; |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
##-- tdf: update header |
|
121
|
0
|
|
|
|
|
|
$vs->{y0} = $ymin; |
|
122
|
0
|
|
|
|
|
|
$vs->{version} = $up->toversion; |
|
123
|
0
|
0
|
|
|
|
|
$vs->saveHeader() |
|
124
|
|
|
|
|
|
|
or $up->logconfess("failed to save new (term x document) index header $dbdir/tdf.hdr"); |
|
125
|
|
|
|
|
|
|
} |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
##-- cleanup |
|
128
|
0
|
|
|
|
|
|
if (0 && !$up->{keep}) { |
|
129
|
|
|
|
|
|
|
$up->info("removing temporary file(s)"); |
|
130
|
|
|
|
|
|
|
} |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
##-- update header |
|
133
|
0
|
|
|
|
|
|
return $up->updateHeader(); |
|
134
|
|
|
|
|
|
|
} |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
##============================================================================== |
|
137
|
|
|
|
|
|
|
## Backup & Revert |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
## $bool = $up->backup() |
|
140
|
|
|
|
|
|
|
## + perform backup any files we expect to change to $up->backupdir() |
|
141
|
|
|
|
|
|
|
sub backup { |
|
142
|
0
|
|
|
0
|
1
|
|
my $up = shift; |
|
143
|
0
|
0
|
|
|
|
|
$up->SUPER::backup() or return undef; |
|
144
|
0
|
0
|
|
|
|
|
return 1 if (!$up->{backup}); |
|
145
|
|
|
|
|
|
|
|
|
146
|
0
|
|
|
|
|
|
my $dbdir = $up->{dbdir}; |
|
147
|
0
|
|
|
|
|
|
my $hdr = $up->dbheader; |
|
148
|
0
|
|
|
|
|
|
my $backd = $up->backupdir; |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
##-- backup: top-level |
|
151
|
0
|
|
|
|
|
|
foreach my $base (map {"$dbdir/$_"} qw(xf cof tdf)) { |
|
|
0
|
|
|
|
|
|
|
|
152
|
0
|
|
|
|
|
|
$up->info("backing up $base.*"); |
|
153
|
0
|
0
|
|
|
|
|
copyto_a([grep {-e $_} map {($_,"$_.hdr")} ($base,"$base.dbaN")], $backd) |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
or $up->logconfess("backup failed for $base.*: $!"); |
|
155
|
|
|
|
|
|
|
} |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
##-- backup: tdf.d |
|
158
|
0
|
|
|
|
|
|
foreach my $base (map {"$dbdir/tdf.d/$_"} qw(yf.pdl)) { |
|
|
0
|
|
|
|
|
|
|
|
159
|
0
|
|
|
|
|
|
$up->info("backing up $base.*"); |
|
160
|
0
|
0
|
|
|
|
|
copyto_a([grep {-e $_} map {($_,"$_.hdr")} ($base)], "$backd/tdf.d") |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
or $up->logconfess("backup failed for $base*: $!"); |
|
162
|
|
|
|
|
|
|
} |
|
163
|
|
|
|
|
|
|
|
|
164
|
0
|
|
|
|
|
|
return 1; |
|
165
|
|
|
|
|
|
|
} |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
## @files = $up->revert_created() |
|
168
|
|
|
|
|
|
|
## + returns list of files created by this upgrade, for use with default revert() implementation |
|
169
|
|
|
|
|
|
|
sub revert_created { |
|
170
|
0
|
|
|
0
|
1
|
|
my $up = shift; |
|
171
|
0
|
|
|
|
|
|
my $hdr = $up->dbheader; |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
return ( |
|
174
|
|
|
|
|
|
|
##-- unigrams |
|
175
|
0
|
|
|
|
|
|
(grep {!-e $up->backupdir."/$_"} map {"xf.$_"} qw(dbaN dbaN.hdr)), |
|
|
0
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
##-- cofreqs |
|
178
|
0
|
|
|
|
|
|
(grep {!-e $up->backupdir."/$_"} map {"cof.$_"} qw(dbaN dbaN.hdr)), |
|
|
0
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
##-- tdf |
|
181
|
0
|
0
|
|
|
|
|
(grep {-e "$up->{dbdir}/$_" && !-e $up->backupdir."/$_"} map {"tdf.d/$_"} qw(yf.pdl yf.pdl.hdr)), |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
##-- header |
|
184
|
|
|
|
|
|
|
#'header.json', |
|
185
|
|
|
|
|
|
|
); |
|
186
|
|
|
|
|
|
|
} |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
## @files = $up->revert_updated() |
|
189
|
|
|
|
|
|
|
## + returns list of files updated by this upgrade, for use with default revert() implementation |
|
190
|
|
|
|
|
|
|
sub revert_updated { |
|
191
|
0
|
|
|
0
|
1
|
|
my $up = shift; |
|
192
|
0
|
|
|
|
|
|
my $hdr = $up->dbheader; |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
return ( |
|
195
|
|
|
|
|
|
|
##-- unigrams |
|
196
|
|
|
|
|
|
|
"xf.hdr", |
|
197
|
0
|
|
|
|
|
|
(grep {-e $up->backupdir."/$_"} map {"xf.$_"} qw(dbaN dbaN.hdr)), |
|
|
0
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
##-- cofreqs |
|
200
|
|
|
|
|
|
|
"cof.hdr", |
|
201
|
0
|
|
|
|
|
|
(grep {-e $up->backupdir."/$_"} map {"cof.$_"} qw(dbaN dbaN.hdr)), |
|
|
0
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
##-- tdf |
|
204
|
0
|
|
|
|
|
|
(grep {-e $up->backupdir."/$_"} map {"tdf.$_"} qw(hdr d/yf.pdl d/yf.pdl.hdr)), |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
##-- header |
|
207
|
|
|
|
|
|
|
'header.json', |
|
208
|
|
|
|
|
|
|
); |
|
209
|
|
|
|
|
|
|
} |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
##============================================================================== |
|
213
|
|
|
|
|
|
|
## Footer |
|
214
|
|
|
|
|
|
|
1; ##-- be happy |