line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
## -*- Mode: CPerl -*- |
2
|
|
|
|
|
|
|
## File: DiaColloDB::methods::export.pm |
3
|
|
|
|
|
|
|
## Author: Bryan Jurish <moocow@cpan.org> |
4
|
|
|
|
|
|
|
## Description: collocation db, top-level import/export methods |
5
|
|
|
|
|
|
|
## + really just adds methods to top-level DiaColloDB package |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
##-- dummy package |
8
|
|
|
|
|
|
|
package DiaColloDB::methods::export; |
9
|
1
|
|
|
1
|
|
9
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
56
|
|
10
|
|
|
|
|
|
|
1; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
package DiaColloDB; |
13
|
1
|
|
|
1
|
|
6
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
1236
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
##============================================================================== |
16
|
|
|
|
|
|
|
## Export/Import |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
## $bool = $coldb->dbexport() |
19
|
|
|
|
|
|
|
## $bool = $coldb->dbexport($outdir,%opts) |
20
|
|
|
|
|
|
|
## + $outdir defaults to "$coldb->{dbdir}/export" |
21
|
|
|
|
|
|
|
## + %opts: |
22
|
|
|
|
|
|
|
## export_sdat => $bool, ##-- whether to export *.sdat (stringified tuple files for debugging; default=0) |
23
|
|
|
|
|
|
|
## export_cof => $bool, ##-- do/don't export cof.* (default=do) |
24
|
|
|
|
|
|
|
## export_tdf => $bool, ##-- do/don't export tdf.* (default=do) |
25
|
|
|
|
|
|
|
sub dbexport { |
26
|
0
|
|
|
0
|
0
|
|
my ($coldb,$outdir,%opts) = @_; |
27
|
0
|
0
|
|
|
|
|
$coldb->logconfess("cannot dbexport() an un-opened DB") if (!$coldb->opened); |
28
|
0
|
|
0
|
|
|
|
$outdir //= "$coldb->{dbdir}/export"; |
29
|
0
|
|
|
|
|
|
$outdir =~ s{/$}{}; |
30
|
0
|
|
|
|
|
|
$coldb->vlog('info', "export($outdir/)"); |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
##-- options |
33
|
0
|
0
|
|
|
|
|
my $export_sdat = exists($opts{export_sdat}) ? $opts{export_sdat} : 0; |
34
|
0
|
0
|
|
|
|
|
my $export_cof = exists($opts{export_cof}) ? $opts{export_cof} : 1; |
35
|
0
|
0
|
|
|
|
|
my $export_tdf = exists($opts{export_tdf}) ? $opts{export_tdf} : 1; |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
##-- create export directory |
38
|
0
|
0
|
0
|
|
|
|
-d $outdir |
39
|
|
|
|
|
|
|
or make_path($outdir) |
40
|
|
|
|
|
|
|
or $coldb->logconfess("dbexport(): could not create export directory $outdir: $!"); |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
##-- dump: header |
43
|
0
|
0
|
|
|
|
|
$coldb->saveHeader("$outdir/header.json") |
44
|
|
|
|
|
|
|
or $coldb->logconfess("dbexport(): could not export header to $outdir/header.json: $!"); |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
##-- dump: load enums |
47
|
0
|
|
|
|
|
|
my $adata = $coldb->attrData(); |
48
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): loading enums to memory"); |
49
|
0
|
0
|
0
|
|
|
|
$coldb->{tenum}->load() if ($coldb->{tenum} && !$coldb->{tenum}->loaded); |
50
|
0
|
|
|
|
|
|
foreach (@$adata) { |
51
|
0
|
0
|
0
|
|
|
|
$_->{enum}->load() if ($_->{enum} && !$_->{enum}->loaded); |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
##-- dump: common: stringification |
55
|
0
|
|
|
|
|
|
my $pack_t = $coldb->{pack_t}; |
56
|
0
|
|
|
|
|
|
my ($ts2txt,$ti2txt); |
57
|
0
|
0
|
|
|
|
|
if ($export_sdat) { |
58
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): preparing tuple-stringification structures"); |
59
|
|
|
|
|
|
|
|
60
|
0
|
|
|
|
|
|
foreach (@$adata) { |
61
|
0
|
|
|
|
|
|
my $i2s = $_->{i2s} = $_->{enum}->toArray; |
62
|
0
|
|
0
|
0
|
|
|
$_->{i2txt} = sub { return $i2s->[$_[0]//0]//''; }; |
|
0
|
|
0
|
|
|
|
|
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
0
|
|
|
|
|
|
my $ti2s = $coldb->{tenum}->toArray; |
66
|
0
|
|
|
|
|
|
my @ai2s = map {$_->{i2s}} @$adata; |
|
0
|
|
|
|
|
|
|
67
|
0
|
|
|
|
|
|
my (@t); |
68
|
|
|
|
|
|
|
$ts2txt = sub { |
69
|
0
|
|
|
0
|
|
|
@t = unpack($pack_t,$_[0]); |
70
|
0
|
|
0
|
|
|
|
return join("\t", (map {$ai2s[$_][$t[$_]//0]//''} (0..$#ai2s))); |
|
0
|
|
0
|
|
|
|
|
71
|
0
|
|
|
|
|
|
}; |
72
|
|
|
|
|
|
|
$ti2txt = sub { |
73
|
0
|
|
0
|
0
|
|
|
@t = unpack($pack_t, $ti2s->[$_[0]//0]//''); |
|
|
|
0
|
|
|
|
|
74
|
0
|
|
0
|
|
|
|
return join("\t", (map {$ai2s[$_][$t[$_]//0]//''} (0..$#ai2s))); |
|
0
|
|
0
|
|
|
|
|
75
|
0
|
|
|
|
|
|
}; |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
##-- dump: tenum: raw |
79
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): exporting tuple-enum file $outdir/tenum.dat (raw)"); |
80
|
0
|
0
|
|
|
|
|
$coldb->{tenum}->saveTextFile("$outdir/tenum.dat") |
81
|
|
|
|
|
|
|
or $coldb->logconfess("export failed for $outdir/tenum.dat"); |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
##-- dump: xenum: stringified |
84
|
0
|
0
|
|
|
|
|
if ($export_sdat) { |
85
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): exporting tuple-enum file $outdir/tenum.sdat (strings)"); |
86
|
0
|
0
|
|
|
|
|
$coldb->{tenum}->saveTextFile("$outdir/tenum.sdat", pack_s=>$ts2txt) |
87
|
|
|
|
|
|
|
or $coldb->logconfess("dbexport() failed for $outdir/tenum.sdat"); |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
##-- dump: by attribute: enum |
91
|
0
|
|
|
|
|
|
foreach (@$adata) { |
92
|
|
|
|
|
|
|
##-- dump: by attribute: enum |
93
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): exporting attribute enum file $outdir/$_->{a}_enum.dat"); |
94
|
0
|
0
|
|
|
|
|
$_->{enum}->saveTextFile("$outdir/$_->{a}_enum.dat") |
95
|
|
|
|
|
|
|
or $coldb->logconfess("dbexport() failed for $outdir/$_->{a}_enum.dat"); |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
##-- dump: by attribute: a2t |
99
|
0
|
|
|
|
|
|
foreach (@$adata) { |
100
|
|
|
|
|
|
|
##-- dump: by attribute: a2t: raw |
101
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): exporting attribute expansion multimap $outdir/$_->{a}_2t.dat (raw)"); |
102
|
0
|
0
|
|
|
|
|
$_->{a2t}->saveTextFile("$outdir/$_->{a}_2t.dat") |
103
|
|
|
|
|
|
|
or $coldb->logconfess("dbexport() failed for $outdir/$_->{a}_2t.dat"); |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
##-- dump: by attribute: a2x: stringified |
106
|
0
|
0
|
|
|
|
|
if ($export_sdat) { |
107
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): exporting attribute expansion multimap $outdir/$_->{a}_2t.sdat (strings)"); |
108
|
0
|
0
|
|
|
|
|
$_->{a2t}->saveTextFile("$outdir/$_->{a}_2t.sdat", a2s=>$_->{i2txt}, b2s=>$ti2txt) |
109
|
|
|
|
|
|
|
or $coldb->logconfess("dbexport() failed for $outdir/$_->{a}_2t.sdat"); |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
##-- dump: xf |
114
|
0
|
0
|
|
|
|
|
if ($coldb->{xf}) { |
115
|
|
|
|
|
|
|
##-- dump: xf: raw |
116
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): exporting tuple-frequency index $outdir/xf.dat (raw)"); |
117
|
0
|
0
|
|
|
|
|
$coldb->{xf}->saveTextFile("$outdir/xf.dat") |
118
|
|
|
|
|
|
|
or $coldb->logconfess("export failed for $outdir/xf.dat"); |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
##-- dump: xf: stringified |
121
|
0
|
0
|
|
|
|
|
if ($export_sdat) { |
122
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): exporting tuple-frequency index $outdir/xf.sdat (strings)"); |
123
|
0
|
0
|
|
|
|
|
$coldb->{xf}->saveTextFile("$outdir/xf.sdat", i2s=>$ti2txt) |
124
|
|
|
|
|
|
|
or $coldb->logconfess("dbexport() failed for $outdir/xf.sdat"); |
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
##-- dump: cof |
129
|
0
|
0
|
0
|
|
|
|
if ($coldb->{cof} && $export_cof) { |
130
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): exporting co-frequency index $outdir/cof.dat (raw)"); |
131
|
0
|
0
|
|
|
|
|
$coldb->{cof}->saveTextFile("$outdir/cof.dat") |
132
|
|
|
|
|
|
|
or $coldb->logconfess("export failed for $outdir/cof.dat"); |
133
|
|
|
|
|
|
|
|
134
|
0
|
0
|
|
|
|
|
if ($export_sdat) { |
135
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): exporting co-frequency index $outdir/cof.sdat (strings)"); |
136
|
0
|
0
|
|
|
|
|
$coldb->{cof}->saveTextFile("$outdir/cof.sdat", i2s=>$ti2txt) |
137
|
|
|
|
|
|
|
or $coldb->logconfess("export failed for $outdir/cof.sdat"); |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
##-- dump: tdf |
142
|
0
|
0
|
0
|
|
|
|
if ($coldb->{tdf} && $coldb->{index_tdf} && $export_tdf) { |
|
|
|
0
|
|
|
|
|
143
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): exporting term-document index $outdir/tdf.*"); |
144
|
0
|
0
|
|
|
|
|
$coldb->{tdf}->export("$outdir/tdf", $coldb) |
145
|
|
|
|
|
|
|
or $coldb->logconfess("export failed for $outdir/tdf.*"); |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
##-- all done |
149
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): export to $outdir complete."); |
150
|
0
|
|
|
|
|
|
return $coldb; |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
## $coldb = $coldb->dbimport() |
154
|
|
|
|
|
|
|
## $coldb = $coldb->dbimport($txtdir,%opts) |
155
|
|
|
|
|
|
|
## + import ColocDB data from $txtdir |
156
|
|
|
|
|
|
|
## + TODO |
157
|
|
|
|
|
|
|
sub dbimport { |
158
|
0
|
|
|
0
|
0
|
|
my ($coldb,$txtdir,%opts) = @_; |
159
|
0
|
0
|
|
|
|
|
$coldb = $coldb->new() if (!ref($coldb)); |
160
|
0
|
|
|
|
|
|
$coldb->logconfess("dbimport(): not yet implemented"); |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
1; ##-- be happy |