| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
## -*- Mode: CPerl -*- |
|
2
|
|
|
|
|
|
|
## File: DiaColloDB::methods::export.pm |
|
3
|
|
|
|
|
|
|
## Author: Bryan Jurish <moocow@cpan.org> |
|
4
|
|
|
|
|
|
|
## Description: collocation db, top-level import/export methods |
|
5
|
|
|
|
|
|
|
## + really just adds methods to top-level DiaColloDB package |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
##-- dummy package |
|
8
|
|
|
|
|
|
|
package DiaColloDB::methods::export; |
|
9
|
1
|
|
|
1
|
|
10
|
use strict; |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
47
|
|
|
10
|
|
|
|
|
|
|
1; |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
package DiaColloDB; |
|
13
|
1
|
|
|
1
|
|
8
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
1272
|
|
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
##============================================================================== |
|
16
|
|
|
|
|
|
|
## Export/Import |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
## $bool = $coldb->dbexport() |
|
19
|
|
|
|
|
|
|
## $bool = $coldb->dbexport($outdir,%opts) |
|
20
|
|
|
|
|
|
|
## + $outdir defaults to "$coldb->{dbdir}/export" |
|
21
|
|
|
|
|
|
|
## + %opts: |
|
22
|
|
|
|
|
|
|
## export_sdat => $bool, ##-- whether to export *.sdat (stringified tuple files for debugging; default=0) |
|
23
|
|
|
|
|
|
|
## export_cof => $bool, ##-- do/don't export cof.* (default=do) |
|
24
|
|
|
|
|
|
|
## export_tdf => $bool, ##-- do/don't export tdf.* (default=do) |
|
25
|
|
|
|
|
|
|
sub dbexport { |
|
26
|
0
|
|
|
0
|
0
|
|
my ($coldb,$outdir,%opts) = @_; |
|
27
|
0
|
0
|
|
|
|
|
$coldb->logconfess("cannot dbexport() an un-opened DB") if (!$coldb->opened); |
|
28
|
0
|
|
0
|
|
|
|
$outdir //= "$coldb->{dbdir}/export"; |
|
29
|
0
|
|
|
|
|
|
$outdir =~ s{/$}{}; |
|
30
|
0
|
|
|
|
|
|
$coldb->vlog('info', "export($outdir/)"); |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
##-- options |
|
33
|
0
|
0
|
|
|
|
|
my $export_sdat = exists($opts{export_sdat}) ? $opts{export_sdat} : 0; |
|
34
|
0
|
0
|
|
|
|
|
my $export_cof = exists($opts{export_cof}) ? $opts{export_cof} : 1; |
|
35
|
0
|
0
|
|
|
|
|
my $export_tdf = exists($opts{export_tdf}) ? $opts{export_tdf} : 1; |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
##-- create export directory |
|
38
|
0
|
0
|
0
|
|
|
|
-d $outdir |
|
39
|
|
|
|
|
|
|
or make_path($outdir) |
|
40
|
|
|
|
|
|
|
or $coldb->logconfess("dbexport(): could not create export directory $outdir: $!"); |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
##-- dump: header |
|
43
|
0
|
0
|
|
|
|
|
$coldb->saveHeader("$outdir/header.json") |
|
44
|
|
|
|
|
|
|
or $coldb->logconfess("dbexport(): could not export header to $outdir/header.json: $!"); |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
##-- dump: load enums |
|
47
|
0
|
|
|
|
|
|
my $adata = $coldb->attrData(); |
|
48
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): loading enums to memory"); |
|
49
|
0
|
0
|
0
|
|
|
|
$coldb->{tenum}->load() if ($coldb->{tenum} && !$coldb->{tenum}->loaded); |
|
50
|
0
|
|
|
|
|
|
foreach (@$adata) { |
|
51
|
0
|
0
|
0
|
|
|
|
$_->{enum}->load() if ($_->{enum} && !$_->{enum}->loaded); |
|
52
|
|
|
|
|
|
|
} |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
##-- dump: common: stringification |
|
55
|
0
|
|
|
|
|
|
my $pack_t = $coldb->{pack_t}; |
|
56
|
0
|
|
|
|
|
|
my ($ts2txt,$ti2txt); |
|
57
|
0
|
0
|
|
|
|
|
if ($export_sdat) { |
|
58
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): preparing tuple-stringification structures"); |
|
59
|
|
|
|
|
|
|
|
|
60
|
0
|
|
|
|
|
|
foreach (@$adata) { |
|
61
|
0
|
|
|
|
|
|
my $i2s = $_->{i2s} = $_->{enum}->toArray; |
|
62
|
0
|
|
0
|
0
|
|
|
$_->{i2txt} = sub { return $i2s->[$_[0]//0]//''; }; |
|
|
0
|
|
0
|
|
|
|
|
|
63
|
|
|
|
|
|
|
} |
|
64
|
|
|
|
|
|
|
|
|
65
|
0
|
|
|
|
|
|
my $ti2s = $coldb->{tenum}->toArray; |
|
66
|
0
|
|
|
|
|
|
my @ai2s = map {$_->{i2s}} @$adata; |
|
|
0
|
|
|
|
|
|
|
|
67
|
0
|
|
|
|
|
|
my (@t); |
|
68
|
|
|
|
|
|
|
$ts2txt = sub { |
|
69
|
0
|
|
|
0
|
|
|
@t = unpack($pack_t,$_[0]); |
|
70
|
0
|
|
0
|
|
|
|
return join("\t", (map {$ai2s[$_][$t[$_]//0]//''} (0..$#ai2s))); |
|
|
0
|
|
0
|
|
|
|
|
|
71
|
0
|
|
|
|
|
|
}; |
|
72
|
|
|
|
|
|
|
$ti2txt = sub { |
|
73
|
0
|
|
0
|
0
|
|
|
@t = unpack($pack_t, $ti2s->[$_[0]//0]//''); |
|
|
|
|
0
|
|
|
|
|
|
74
|
0
|
|
0
|
|
|
|
return join("\t", (map {$ai2s[$_][$t[$_]//0]//''} (0..$#ai2s))); |
|
|
0
|
|
0
|
|
|
|
|
|
75
|
0
|
|
|
|
|
|
}; |
|
76
|
|
|
|
|
|
|
} |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
##-- dump: tenum: raw |
|
79
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): exporting tuple-enum file $outdir/tenum.dat (raw)"); |
|
80
|
0
|
0
|
|
|
|
|
$coldb->{tenum}->saveTextFile("$outdir/tenum.dat") |
|
81
|
|
|
|
|
|
|
or $coldb->logconfess("export failed for $outdir/tenum.dat"); |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
##-- dump: xenum: stringified |
|
84
|
0
|
0
|
|
|
|
|
if ($export_sdat) { |
|
85
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): exporting tuple-enum file $outdir/tenum.sdat (strings)"); |
|
86
|
0
|
0
|
|
|
|
|
$coldb->{tenum}->saveTextFile("$outdir/tenum.sdat", pack_s=>$ts2txt) |
|
87
|
|
|
|
|
|
|
or $coldb->logconfess("dbexport() failed for $outdir/tenum.sdat"); |
|
88
|
|
|
|
|
|
|
} |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
##-- dump: by attribute: enum |
|
91
|
0
|
|
|
|
|
|
foreach (@$adata) { |
|
92
|
|
|
|
|
|
|
##-- dump: by attribute: enum |
|
93
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): exporting attribute enum file $outdir/$_->{a}_enum.dat"); |
|
94
|
0
|
0
|
|
|
|
|
$_->{enum}->saveTextFile("$outdir/$_->{a}_enum.dat") |
|
95
|
|
|
|
|
|
|
or $coldb->logconfess("dbexport() failed for $outdir/$_->{a}_enum.dat"); |
|
96
|
|
|
|
|
|
|
} |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
##-- dump: by attribute: a2t |
|
99
|
0
|
|
|
|
|
|
foreach (@$adata) { |
|
100
|
|
|
|
|
|
|
##-- dump: by attribute: a2t: raw |
|
101
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): exporting attribute expansion multimap $outdir/$_->{a}_2t.dat (raw)"); |
|
102
|
0
|
0
|
|
|
|
|
$_->{a2t}->saveTextFile("$outdir/$_->{a}_2t.dat") |
|
103
|
|
|
|
|
|
|
or $coldb->logconfess("dbexport() failed for $outdir/$_->{a}_2t.dat"); |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
##-- dump: by attribute: a2x: stringified |
|
106
|
0
|
0
|
|
|
|
|
if ($export_sdat) { |
|
107
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): exporting attribute expansion multimap $outdir/$_->{a}_2t.sdat (strings)"); |
|
108
|
0
|
0
|
|
|
|
|
$_->{a2t}->saveTextFile("$outdir/$_->{a}_2t.sdat", a2s=>$_->{i2txt}, b2s=>$ti2txt) |
|
109
|
|
|
|
|
|
|
or $coldb->logconfess("dbexport() failed for $outdir/$_->{a}_2t.sdat"); |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
} |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
##-- dump: xf |
|
114
|
0
|
0
|
|
|
|
|
if ($coldb->{xf}) { |
|
115
|
|
|
|
|
|
|
##-- dump: xf: raw |
|
116
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): exporting tuple-frequency index $outdir/xf.dat (raw)"); |
|
117
|
0
|
0
|
|
|
|
|
$coldb->{xf}->saveTextFile("$outdir/xf.dat") |
|
118
|
|
|
|
|
|
|
or $coldb->logconfess("export failed for $outdir/xf.dat"); |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
##-- dump: xf: stringified |
|
121
|
0
|
0
|
|
|
|
|
if ($export_sdat) { |
|
122
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): exporting tuple-frequency index $outdir/xf.sdat (strings)"); |
|
123
|
0
|
0
|
|
|
|
|
$coldb->{xf}->saveTextFile("$outdir/xf.sdat", i2s=>$ti2txt) |
|
124
|
|
|
|
|
|
|
or $coldb->logconfess("dbexport() failed for $outdir/xf.sdat"); |
|
125
|
|
|
|
|
|
|
} |
|
126
|
|
|
|
|
|
|
} |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
##-- dump: cof |
|
129
|
0
|
0
|
0
|
|
|
|
if ($coldb->{cof} && $export_cof) { |
|
130
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): exporting co-frequency index $outdir/cof.dat (raw)"); |
|
131
|
0
|
0
|
|
|
|
|
$coldb->{cof}->saveTextFile("$outdir/cof.dat") |
|
132
|
|
|
|
|
|
|
or $coldb->logconfess("export failed for $outdir/cof.dat"); |
|
133
|
|
|
|
|
|
|
|
|
134
|
0
|
0
|
|
|
|
|
if ($export_sdat) { |
|
135
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): exporting co-frequency index $outdir/cof.sdat (strings)"); |
|
136
|
0
|
0
|
|
|
|
|
$coldb->{cof}->saveTextFile("$outdir/cof.sdat", i2s=>$ti2txt) |
|
137
|
|
|
|
|
|
|
or $coldb->logconfess("export failed for $outdir/cof.sdat"); |
|
138
|
|
|
|
|
|
|
} |
|
139
|
|
|
|
|
|
|
} |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
##-- dump: tdf |
|
142
|
0
|
0
|
0
|
|
|
|
if ($coldb->{tdf} && $coldb->{index_tdf} && $export_tdf) { |
|
|
|
|
0
|
|
|
|
|
|
143
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): exporting term-document index $outdir/tdf.*"); |
|
144
|
0
|
0
|
|
|
|
|
$coldb->{tdf}->export("$outdir/tdf", $coldb) |
|
145
|
|
|
|
|
|
|
or $coldb->logconfess("export failed for $outdir/tdf.*"); |
|
146
|
|
|
|
|
|
|
} |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
##-- all done |
|
149
|
0
|
|
|
|
|
|
$coldb->vlog($coldb->{logExport}, "dbexport(): export to $outdir complete."); |
|
150
|
0
|
|
|
|
|
|
return $coldb; |
|
151
|
|
|
|
|
|
|
} |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
## $coldb = $coldb->dbimport() |
|
154
|
|
|
|
|
|
|
## $coldb = $coldb->dbimport($txtdir,%opts) |
|
155
|
|
|
|
|
|
|
## + import ColocDB data from $txtdir |
|
156
|
|
|
|
|
|
|
## + TODO |
|
157
|
|
|
|
|
|
|
sub dbimport { |
|
158
|
0
|
|
|
0
|
0
|
|
my ($coldb,$txtdir,%opts) = @_; |
|
159
|
0
|
0
|
|
|
|
|
$coldb = $coldb->new() if (!ref($coldb)); |
|
160
|
0
|
|
|
|
|
|
$coldb->logconfess("dbimport(): not yet implemented"); |
|
161
|
|
|
|
|
|
|
} |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
1; ##-- be happy |