| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | ## -*- Mode: CPerl -*- | 
| 2 |  |  |  |  |  |  | ## File: DiaColloDB::methods::export.pm | 
| 3 |  |  |  |  |  |  | ## Author: Bryan Jurish <moocow@cpan.org> | 
| 4 |  |  |  |  |  |  | ## Description: collocation db, top-level import/export methods | 
| 5 |  |  |  |  |  |  | ##  + really just adds methods to top-level DiaColloDB package | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | ##-- dummy package | 
| 8 |  |  |  |  |  |  | package DiaColloDB::methods::export; | 
| 9 | 1 |  |  | 1 |  | 8 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 32 |  | 
| 10 |  |  |  |  |  |  | 1; | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | package DiaColloDB; | 
| 13 | 1 |  |  | 1 |  | 5 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 999 |  | 
| 14 |  |  |  |  |  |  |  | 
| 15 |  |  |  |  |  |  | ##============================================================================== | 
| 16 |  |  |  |  |  |  | ## Export/Import | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | ## $bool = $coldb->dbexport() | 
| 19 |  |  |  |  |  |  | ## $bool = $coldb->dbexport($outdir,%opts) | 
| 20 |  |  |  |  |  |  | ##  + $outdir defaults to "$coldb->{dbdir}/export" | 
| 21 |  |  |  |  |  |  | ##  + %opts: | 
| 22 |  |  |  |  |  |  | ##     export_sdat => $bool,  ##-- whether to export *.sdat (stringified tuple files for debugging; default=0) | 
| 23 |  |  |  |  |  |  | ##     export_cof  => $bool,  ##-- do/don't export cof.* (default=do) | 
| 24 |  |  |  |  |  |  | ##     export_tdf  => $bool,  ##-- do/don't export tdf.* (default=do) | 
| 25 |  |  |  |  |  |  | sub dbexport { | 
| 26 | 0 |  |  | 0 | 0 |  | my ($coldb,$outdir,%opts) = @_; | 
| 27 | 0 | 0 |  |  |  |  | $coldb->logconfess("cannot dbexport() an un-opened DB") if (!$coldb->opened); | 
| 28 | 0 |  | 0 |  |  |  | $outdir //= "$coldb->{dbdir}/export"; | 
| 29 | 0 |  |  |  |  |  | $outdir  =~ s{/$}{}; | 
| 30 | 0 |  |  |  |  |  | $coldb->vlog('info', "export($outdir/)"); | 
| 31 |  |  |  |  |  |  |  | 
| 32 |  |  |  |  |  |  | ##-- options | 
| 33 | 0 | 0 |  |  |  |  | my $export_sdat = exists($opts{export_sdat}) ? $opts{export_sdat} : 0; | 
| 34 | 0 | 0 |  |  |  |  | my $export_cof  = exists($opts{export_cof}) ? $opts{export_cof} : 1; | 
| 35 | 0 | 0 |  |  |  |  | my $export_tdf  = exists($opts{export_tdf}) ? $opts{export_tdf} : 1; | 
| 36 |  |  |  |  |  |  |  | 
| 37 |  |  |  |  |  |  | ##-- create export directory | 
| 38 | 0 | 0 | 0 |  |  |  | -d $outdir | 
| 39 |  |  |  |  |  |  | or make_path($outdir) | 
| 40 |  |  |  |  |  |  | or $coldb->logconfess("dbexport(): could not create export directory $outdir: $!"); | 
| 41 |  |  |  |  |  |  |  | 
| 42 |  |  |  |  |  |  | ##-- dump: header | 
| 43 | 0 | 0 |  |  |  |  | $coldb->saveHeader("$outdir/header.json") | 
| 44 |  |  |  |  |  |  | or $coldb->logconfess("dbexport(): could not export header to $outdir/header.json: $!"); | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | ##-- dump: load enums | 
| 47 | 0 |  |  |  |  |  | my $adata  = $coldb->attrData(); | 
| 48 | 0 |  |  |  |  |  | $coldb->vlog($coldb->{logExport}, "dbexport(): loading enums to memory"); | 
| 49 | 0 | 0 | 0 |  |  |  | $coldb->{tenum}->load() if ($coldb->{tenum} && !$coldb->{tenum}->loaded); | 
| 50 | 0 |  |  |  |  |  | foreach (@$adata) { | 
| 51 | 0 | 0 | 0 |  |  |  | $_->{enum}->load() if ($_->{enum} && !$_->{enum}->loaded); | 
| 52 |  |  |  |  |  |  | } | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | ##-- dump: common: stringification | 
| 55 | 0 |  |  |  |  |  | my $pack_t = $coldb->{pack_t}; | 
| 56 | 0 |  |  |  |  |  | my ($ts2txt,$ti2txt); | 
| 57 | 0 | 0 |  |  |  |  | if ($export_sdat) { | 
| 58 | 0 |  |  |  |  |  | $coldb->vlog($coldb->{logExport}, "dbexport(): preparing tuple-stringification structures"); | 
| 59 |  |  |  |  |  |  |  | 
| 60 | 0 |  |  |  |  |  | foreach (@$adata) { | 
| 61 | 0 |  |  |  |  |  | my $i2s     = $_->{i2s} = $_->{enum}->toArray; | 
| 62 | 0 |  | 0 | 0 |  |  | $_->{i2txt} = sub { return $i2s->[$_[0]//0]//''; }; | 
|  | 0 |  | 0 |  |  |  |  | 
| 63 |  |  |  |  |  |  | } | 
| 64 |  |  |  |  |  |  |  | 
| 65 | 0 |  |  |  |  |  | my $ti2s = $coldb->{tenum}->toArray; | 
| 66 | 0 |  |  |  |  |  | my @ai2s = map {$_->{i2s}} @$adata; | 
|  | 0 |  |  |  |  |  |  | 
| 67 | 0 |  |  |  |  |  | my (@t); | 
| 68 |  |  |  |  |  |  | $ts2txt = sub { | 
| 69 | 0 |  |  | 0 |  |  | @t = unpack($pack_t,$_[0]); | 
| 70 | 0 |  | 0 |  |  |  | return join("\t", (map {$ai2s[$_][$t[$_]//0]//''} (0..$#ai2s))); | 
|  | 0 |  | 0 |  |  |  |  | 
| 71 | 0 |  |  |  |  |  | }; | 
| 72 |  |  |  |  |  |  | $ti2txt = sub { | 
| 73 | 0 |  | 0 | 0 |  |  | @t = unpack($pack_t, $ti2s->[$_[0]//0]//''); | 
|  |  |  | 0 |  |  |  |  | 
| 74 | 0 |  | 0 |  |  |  | return join("\t", (map {$ai2s[$_][$t[$_]//0]//''} (0..$#ai2s))); | 
|  | 0 |  | 0 |  |  |  |  | 
| 75 | 0 |  |  |  |  |  | }; | 
| 76 |  |  |  |  |  |  | } | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | ##-- dump: tenum: raw | 
| 79 | 0 |  |  |  |  |  | $coldb->vlog($coldb->{logExport}, "dbexport(): exporting tuple-enum file $outdir/tenum.dat (raw)"); | 
| 80 | 0 | 0 |  |  |  |  | $coldb->{tenum}->saveTextFile("$outdir/tenum.dat") | 
| 81 |  |  |  |  |  |  | or $coldb->logconfess("export failed for $outdir/tenum.dat"); | 
| 82 |  |  |  |  |  |  |  | 
| 83 |  |  |  |  |  |  | ##-- dump: xenum: stringified | 
| 84 | 0 | 0 |  |  |  |  | if ($export_sdat) { | 
| 85 | 0 |  |  |  |  |  | $coldb->vlog($coldb->{logExport}, "dbexport(): exporting tuple-enum file $outdir/tenum.sdat (strings)"); | 
| 86 | 0 | 0 |  |  |  |  | $coldb->{tenum}->saveTextFile("$outdir/tenum.sdat", pack_s=>$ts2txt) | 
| 87 |  |  |  |  |  |  | or $coldb->logconfess("dbexport() failed for $outdir/tenum.sdat"); | 
| 88 |  |  |  |  |  |  | } | 
| 89 |  |  |  |  |  |  |  | 
| 90 |  |  |  |  |  |  | ##-- dump: by attribute: enum | 
| 91 | 0 |  |  |  |  |  | foreach (@$adata) { | 
| 92 |  |  |  |  |  |  | ##-- dump: by attribute: enum | 
| 93 | 0 |  |  |  |  |  | $coldb->vlog($coldb->{logExport}, "dbexport(): exporting attribute enum file $outdir/$_->{a}_enum.dat"); | 
| 94 | 0 | 0 |  |  |  |  | $_->{enum}->saveTextFile("$outdir/$_->{a}_enum.dat") | 
| 95 |  |  |  |  |  |  | or $coldb->logconfess("dbexport() failed for $outdir/$_->{a}_enum.dat"); | 
| 96 |  |  |  |  |  |  | } | 
| 97 |  |  |  |  |  |  |  | 
| 98 |  |  |  |  |  |  | ##-- dump: by attribute: a2t | 
| 99 | 0 |  |  |  |  |  | foreach (@$adata) { | 
| 100 |  |  |  |  |  |  | ##-- dump: by attribute: a2t: raw | 
| 101 | 0 |  |  |  |  |  | $coldb->vlog($coldb->{logExport}, "dbexport(): exporting attribute expansion multimap $outdir/$_->{a}_2t.dat (raw)"); | 
| 102 | 0 | 0 |  |  |  |  | $_->{a2t}->saveTextFile("$outdir/$_->{a}_2t.dat") | 
| 103 |  |  |  |  |  |  | or $coldb->logconfess("dbexport() failed for $outdir/$_->{a}_2t.dat"); | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | ##-- dump: by attribute: a2x: stringified | 
| 106 | 0 | 0 |  |  |  |  | if ($export_sdat) { | 
| 107 | 0 |  |  |  |  |  | $coldb->vlog($coldb->{logExport}, "dbexport(): exporting attribute expansion multimap $outdir/$_->{a}_2t.sdat (strings)"); | 
| 108 | 0 | 0 |  |  |  |  | $_->{a2t}->saveTextFile("$outdir/$_->{a}_2t.sdat", a2s=>$_->{i2txt}, b2s=>$ti2txt) | 
| 109 |  |  |  |  |  |  | or $coldb->logconfess("dbexport() failed for $outdir/$_->{a}_2t.sdat"); | 
| 110 |  |  |  |  |  |  | } | 
| 111 |  |  |  |  |  |  | } | 
| 112 |  |  |  |  |  |  |  | 
| 113 |  |  |  |  |  |  | ##-- dump: xf | 
| 114 | 0 | 0 |  |  |  |  | if ($coldb->{xf}) { | 
| 115 |  |  |  |  |  |  | ##-- dump: xf: raw | 
| 116 | 0 |  |  |  |  |  | $coldb->vlog($coldb->{logExport}, "dbexport(): exporting tuple-frequency index $outdir/xf.dat (raw)"); | 
| 117 | 0 | 0 |  |  |  |  | $coldb->{xf}->saveTextFile("$outdir/xf.dat") | 
| 118 |  |  |  |  |  |  | or $coldb->logconfess("export failed for $outdir/xf.dat"); | 
| 119 |  |  |  |  |  |  |  | 
| 120 |  |  |  |  |  |  | ##-- dump: xf: stringified | 
| 121 | 0 | 0 |  |  |  |  | if ($export_sdat) { | 
| 122 | 0 |  |  |  |  |  | $coldb->vlog($coldb->{logExport}, "dbexport(): exporting tuple-frequency index $outdir/xf.sdat (strings)"); | 
| 123 | 0 | 0 |  |  |  |  | $coldb->{xf}->saveTextFile("$outdir/xf.sdat", i2s=>$ti2txt) | 
| 124 |  |  |  |  |  |  | or $coldb->logconfess("dbexport() failed for $outdir/xf.sdat"); | 
| 125 |  |  |  |  |  |  | } | 
| 126 |  |  |  |  |  |  | } | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  | ##-- dump: cof | 
| 129 | 0 | 0 | 0 |  |  |  | if ($coldb->{cof} && $export_cof) { | 
| 130 | 0 |  |  |  |  |  | $coldb->vlog($coldb->{logExport}, "dbexport(): exporting co-frequency index $outdir/cof.dat (raw)"); | 
| 131 | 0 | 0 |  |  |  |  | $coldb->{cof}->saveTextFile("$outdir/cof.dat") | 
| 132 |  |  |  |  |  |  | or $coldb->logconfess("export failed for $outdir/cof.dat"); | 
| 133 |  |  |  |  |  |  |  | 
| 134 | 0 | 0 |  |  |  |  | if ($export_sdat) { | 
| 135 | 0 |  |  |  |  |  | $coldb->vlog($coldb->{logExport}, "dbexport(): exporting co-frequency index $outdir/cof.sdat (strings)"); | 
| 136 | 0 | 0 |  |  |  |  | $coldb->{cof}->saveTextFile("$outdir/cof.sdat", i2s=>$ti2txt) | 
| 137 |  |  |  |  |  |  | or $coldb->logconfess("export failed for $outdir/cof.sdat"); | 
| 138 |  |  |  |  |  |  | } | 
| 139 |  |  |  |  |  |  | } | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  | ##-- dump: tdf | 
| 142 | 0 | 0 | 0 |  |  |  | if ($coldb->{tdf} && $coldb->{index_tdf} && $export_tdf) { | 
|  |  |  | 0 |  |  |  |  | 
| 143 | 0 |  |  |  |  |  | $coldb->vlog($coldb->{logExport}, "dbexport(): exporting term-document index $outdir/tdf.*"); | 
| 144 | 0 | 0 |  |  |  |  | $coldb->{tdf}->export("$outdir/tdf", $coldb) | 
| 145 |  |  |  |  |  |  | or $coldb->logconfess("export failed for $outdir/tdf.*"); | 
| 146 |  |  |  |  |  |  | } | 
| 147 |  |  |  |  |  |  |  | 
| 148 |  |  |  |  |  |  | ##-- all done | 
| 149 | 0 |  |  |  |  |  | $coldb->vlog($coldb->{logExport}, "dbexport(): export to $outdir complete."); | 
| 150 | 0 |  |  |  |  |  | return $coldb; | 
| 151 |  |  |  |  |  |  | } | 
| 152 |  |  |  |  |  |  |  | 
| 153 |  |  |  |  |  |  | ## $coldb = $coldb->dbimport() | 
| 154 |  |  |  |  |  |  | ## $coldb = $coldb->dbimport($txtdir,%opts) | 
| 155 |  |  |  |  |  |  | ##  + import ColocDB data from $txtdir | 
| 156 |  |  |  |  |  |  | ##  + TODO | 
| 157 |  |  |  |  |  |  | sub dbimport { | 
| 158 | 0 |  |  | 0 | 0 |  | my ($coldb,$txtdir,%opts) = @_; | 
| 159 | 0 | 0 |  |  |  |  | $coldb = $coldb->new() if (!ref($coldb)); | 
| 160 | 0 |  |  |  |  |  | $coldb->logconfess("dbimport(): not yet implemented"); | 
| 161 |  |  |  |  |  |  | } | 
| 162 |  |  |  |  |  |  |  | 
| 163 |  |  |  |  |  |  |  | 
| 164 |  |  |  |  |  |  | 1; ##-- be happy |