File Coverage

blib/lib/DiaColloDB/Compat/v0_09/DiaColloDB.pm
Criterion Covered Total %
statement 30 260 11.5
branch 0 138 0.0
condition 0 96 0.0
subroutine 12 24 50.0
pod 5 9 55.5
total 47 527 8.9


line stmt bran cond sub pod time code
1             ## -*- Mode: CPerl -*-
2             ##
3             ## File: Compat::v0_09::DiaColloDB.pm
4             ## Author: Bryan Jurish <moocow@cpan.org>
5             ## Description: collocation db, top-level: v0.09.x compatibility hack
6              
7             package DiaColloDB::Compat::v0_09::DiaColloDB;
8 1     1   8 use DiaColloDB;
  1         3  
  1         31  
9 1     1   5 use DiaColloDB::Compat::v0_09::Relation;
  1         3  
  1         19  
10 1     1   7 use DiaColloDB::Compat::v0_09::Relation::Unigrams;
  1         1  
  1         18  
11 1     1   4 use DiaColloDB::Compat::v0_09::Relation::Cofreqs;
  1         3  
  1         37  
12 1     1   6 use DiaColloDB::Utils qw(:math :fcntl :json :sort :pack :regex :file :si :run :env :temp);
  1         2  
  1         37  
13 1     1   541 use DDC::Any; ##-- for query parsing
  1         3  
  1         9  
14 1     1   63 use File::Path qw(make_path remove_tree);
  1         2  
  1         26  
15 1     1   131 use Fcntl;
  1         3  
  1         78  
16 1     1   247 use strict;
  1         3  
  1         1553  
17              
18             ##==============================================================================
19             ## Globals & Constants
20              
21             our @ISA = qw(DiaColloDB DiaColloDB::Compat);
22              
23             ## $ECLASS
24             ## + enum class
25             #our $ECLASS = 'DiaColloDB::EnumFile';
26             our $ECLASS = 'DiaColloDB::EnumFile::MMap';
27              
28             ## $XECLASS
29             ## + fixed-length enum class
30             #our $XECLASS = 'DiaColloDB::EnumFile::FixedLen';
31             our $XECLASS = 'DiaColloDB::EnumFile::FixedLen::MMap';
32              
33             ## $MMCLASS
34             ## + multimap class
35             #our $MMCLASS = 'DiaColloDB::MultiMapFile';
36             our $MMCLASS = 'DiaColloDB::MultiMapFile::MMap';
37              
38             ##==============================================================================
39             ## Constructors etc.
40              
41             ## $coldb = CLASS_OR_OBJECT->new(%args)
42             ## + %args, object structure:
43             ## (
44             ## ##-- options
45             ## dbdir => $dbdir, ##-- database directory; REQUIRED
46             ## flags => $fcflags, ##-- fcntl flags or open()-style mode string; default='r'
47             ## attrs => \@attrs, ##-- index attributes (input as space-separated or array; compiled to array); default=undef (==>['l'])
48             ## ## + each attribute can be token-attribute qw(w p l) or a document metadata attribute "doc.ATTR"
49             ## ## + document "date" attribute is always indexed
50             ## info => \%info, ##-- additional data to return in info() method (e.g. collection, maintainer)
51             ## #bos => $bos, ##-- special string to use for BOS, undef or empty for none (default=undef) DISABLED
52             ## #eos => $eos, ##-- special string to use for EOS, undef or empty for none (default=undef) DISABLED
53             ## pack_id => $fmt, ##-- pack-format for IDs (default='N')
54             ## pack_f => $fmt, ##-- pack-format for frequencies (default='N')
55             ## pack_date => $fmt, ##-- pack-format for dates (default='n')
56             ## pack_off => $fmt, ##-- pack-format for file offsets (default='N')
57             ## pack_len => $len, ##-- pack-format for string lengths (default='n')
58             ## dmax => $dmax, ##-- maximum distance for collocation-frequencies and implicit ddc near() queries (default=5)
59             ## cfmin => $cfmin, ##-- minimum co-occurrence frequency for Cofreqs and ddc queries (default=2)
60             ## tfmin => $tfmin, ##-- minimum global term-frequency WITHOUT date component (default=2)
61             ## fmin_${a} => $fmin, ##-- minimum independent frequency for value of attribute ${a} (default=undef:from $tfmin)
62             ## keeptmp => $bool, ##-- keep temporary files? (default=0)
63             ## index_tdf => $bool, ##-- tdf: create/use (term x document) frequency matrix index? (default=undef: if available)
64             ## index_cof => $bool, ##-- cof: create/use co-frequency index (default=1)
65             ## dbreak => $dbreak, ##-- tdf: use break-type $break for tdf index (default=undef: files)
66             ## tdfopts=>\%tdfopts, ##-- tdf: options for DiaColloDB::Relation::TDF->new(); default=undef (all inherited from %TDF_OPTS)
67             ## ##
68             ## ##-- runtime ddc relation options
69             ## ddcServer => "$host:$port", ##-- server for ddc relation
70             ## ddcTimeout => $seconds, ##-- timeout for ddc relation
71             ## ##
72             ## ##-- source filtering (for create())
73             ## pgood => $regex, ##-- positive filter regex for part-of-speech tags
74             ## pbad => $regex, ##-- negative filter regex for part-of-speech tags
75             ## wgood => $regex, ##-- positive filter regex for word text
76             ## wbad => $regex, ##-- negative filter regex for word text
77             ## lgood => $regex, ##-- positive filter regex for lemma text
78             ## lbad => $regex, ##-- negative filter regex for lemma text
79             ## ##
80             ## ##-- logging
81             ## logOpen => $level, ##-- log-level for open/close (default='info')
82             ## logCreate => $level, ##-- log-level for create messages (default='info')
83             ## logCorpusFile => $level, ##-- log-level for corpus file-parsing (default='info')
84             ## logCorpusFileN => $N, ##-- log corpus file-parsing only for every N files (0 for none; default:undef ~ $corpus->size()/100)
85             ## logExport => $level, ##-- log-level for export messages (default='info')
86             ## logProfile => $level, ##-- log-level for verbose profiling messages (default='trace')
87             ## logRequest => $level, ##-- log-level for request-level profiling messages (default='debug')
88             ## ##
89             ## ##-- runtime limits
90             ## maxExpand => $size, ##-- maximum number of elements in query expansions (default=65535)
91             ## ##
92             ## ##-- administrivia
93             ## version=>$version, ##-- DiaColloDB version of stored db (==$DiaColloDB::VERSION)
94             ## upgraded=>\@upgraded, ##-- optional administrative information about auto-magic upgrades
95             ## ##
96             ## ##-- attribute data
97             ## ${a}enum => $aenum, ##-- attribute enum: $aenum : ($dbdir/${a}_enum.*) : $astr<=>$ai : A*<=>N
98             ## ## e.g. lemmata: $lenum : ($dbdir/l_enum.* ) : $lstr<=>$li : A*<=>N
99             ## ${a}2x => $a2x, ##-- attribute multimap: $a2x : ($dbdir/${a}_2x.*) : $ai=>@xis : N=>N*
100             ## pack_x$a => $fmt ##-- pack format: extract attribute-id $ai from a packed tuple-string $xs ; $ai=unpack($coldb->{"pack_x$a"},$xs)
101             ## ##
102             ## ##-- tuple data (+dates)
103             ## xenum => $xenum, ##-- enum: tuples ($dbdir/xenum.*) : [@ais,$di]<=>$xi : N*n<=>N
104             ## pack_x => $fmt, ##-- symbol pack-format for $xenum : "${pack_id}[Nattrs]${pack_date}"
105             ## xdmin => $xdmin, ##-- minimum date
106             ## xdmax => $xdmax, ##-- maximum date
107             ## ##
108             ## ##-- relation data
109             ## xf => $xf, ##-- ug: $xi => $f($xi) : N=>N
110             ## cof => $cof, ##-- cf: [$xi1,$xi2] => $f12
111             ## ddc => $ddc, ##-- ddc: ddc client relation
112             ## tdf => $tdf, ##-- tdf: (term x document) frequency matrix relation
113             ## )
114             sub new {
115 0     0 1   my $that = shift;
116 0   0       my $coldb = bless({
117             ##-- options
118             dbdir => undef,
119             flags => 'r',
120             attrs => undef,
121             #bos => undef,
122             #eos => undef,
123             pack_id => 'N',
124             pack_f => 'N',
125             pack_date => 'n',
126             pack_off => 'N',
127             pack_len =>'n',
128             dmax => 5,
129             cfmin => 2,
130             tfmin => 2,
131             #keeptmp => 0,
132             index_tdf => undef,
133             index_cof => 1,
134             dbreak => undef,
135             tdfopts => {},
136              
137             ##-- filters
138             pgood => $DiaColloDB::PGOOD_DEFAULT,
139             pbad => $DiaColloDB::PBAD_DEFAULT,
140             wgood => $DiaColloDB::WGOOD_DEFAULT,
141             wbad => $DiaColloDB::WBAD_DEFAULT,
142             lgood => $DiaColloDB::LGOOD_DEFAULT,
143             lbad => $DiaColloDB::LBAD_DEFAULT,
144             #vsmgood => $DiaColloDB::TDF_MGOOD_DEFAULT,
145             #vsmbad => $DiaColloDB::TDF_MBAD_DEFAULT,
146              
147             ##-- logging
148             logOpen => 'info',
149             logCreate => 'info',
150             logCorpusFile => 'info',
151             logCorpusFileN => undef,
152             logExport => 'info',
153             logProfile => 'trace',
154             logRequest => 'debug',
155              
156             ##-- limits
157             maxExpand => 65535,
158              
159             ##-- administrivia
160             version => "v0.09.000",
161             #upgraded=>[],
162              
163             ##-- attributes
164             #lenum => undef, #$ECLASS->new(pack_i=>$coldb->{pack_id}, pack_o=>$coldb->{pack_off}, pack_l=>$coldb->{pack_len}),
165             #l2x => undef, #$MMCLASS->new(pack_i=>$coldb->{pack_id}, pack_o=>$coldb->{pack_off}, pack_l=>$coldb->{pack_id}),
166             #pack_xl => 'N',
167              
168             ##-- tuples (+dates)
169             #xenum => undef, #$XECLASS::FixedLen->new(pack_i=>$coldb->{pack_id}, pack_s=>$coldb->{pack_x}),
170             #pack_x => 'Nn',
171              
172             ##-- relations
173             #xf => undef, #DiaColloDB::Relation::Unigrams->new(packas=>$coldb->{pack_f}),
174             #cof => undef, #DiaColloDB::Relation::Cofreqs->new(pack_f=>$pack_f, pack_i=>$pack_i, dmax=>$dmax, fmin=>$cfmin),
175             #ddc => undef, #DiaColloDB::Relation::DDC->new(),
176             #tdf => undef, #DiaColloDB::Relation::TDF->new(),
177              
178             @_, ##-- user arguments
179             },
180             ref($that)||$that);
181 0           $coldb->{class} = ref($coldb);
182 0           $coldb->{pack_w} = $coldb->{pack_id};
183 0           $coldb->{pack_x} = $coldb->{pack_w} . $coldb->{pack_date};
184 0 0         if (defined($coldb->{dbdir})) {
185             ##-- avoid initial close() if called with dbdir=>$dbdir argument
186 0           my $dbdir = $coldb->{dbdir};
187 0           delete $coldb->{dbdir};
188 0           return $coldb->open($dbdir);
189             }
190 0           return $coldb;
191             }
192              
193             ## undef = $obj->DESTROY
194             ## + destructor calls close() if necessary
195             ## + INHERITED from DiaColloDB::Client
196              
197             ## $cli_or_undef = $cli->promote($class,%opts)
198             ## + DiaColloDB::Client method override
199             sub promote {
200 0     0 1   $_[0]->logconfess("promote(): not supported");
201             }
202              
203             ##==============================================================================
204             ## I/O: open/close
205              
206             ## $coldb_or_undef = $coldb->open($dbdir,%opts)
207             ## $coldb_or_undef = $coldb->open()
208             sub open {
209 0     0 1   my ($coldb,$dbdir,%opts) = @_;
210 0           DiaColloDB::Logger->ensureLog();
211 0 0         $coldb = $coldb->new() if (!ref($coldb));
212             #@$coldb{keys %opts} = values %opts; ##-- clobber options after loadHeader()
213 0   0       $dbdir //= $coldb->{dbdir};
214 0           $dbdir =~ s{/$}{};
215 0 0         $coldb->close() if ($coldb->opened);
216 0           $coldb->{dbdir} = $dbdir;
217 0   0       my $flags = fcflags($opts{flags} // $coldb->{flags});
218 0           $coldb->vlog($coldb->{logOpen}, "open($dbdir)");
219              
220             ##-- open: truncate
221 0 0         if (fctrunc($flags)) {
222 0           $flags |= O_CREAT;
223 0 0 0       !-d $dbdir
224             or remove_tree($dbdir)
225             or $coldb->logconfess("open(): could not remove old $dbdir: $!");
226             }
227              
228             ##-- open: create
229 0 0         if (!-d $dbdir) {
230 0 0         $coldb->logconfess("open(): no such directory '$dbdir'") if (!fccreat($flags));
231 0 0         make_path($dbdir)
232             or $coldb->logconfess("open(): could not create DB directory '$dbdir': $!");
233             }
234              
235             ##-- open: header
236             $coldb->loadHeader()
237 0 0         or $coldb->logconfess("open(): failed to load header file", $coldb->headerFile, ": $!");
238 0           @$coldb{keys %opts} = values %opts; ##-- clobber header options with user-supplied values
239              
240             ##-- open: tdf: require
241 0 0         $coldb->{index_tdf} = 0 if (!-r "$dbdir/tdf.hdr");
242 0 0         if ($coldb->{index_tdf}) {
243 0 0         if (!require "DiaColloDB/Relation/TDF.pm") {
244 0 0         $coldb->logwarn("open(): require failed for DiaColloDB/Relation/TDF.pm ; (term x document) matrix modelling disabled", ($@ ? "\n: $@" : ''));
245 0           $coldb->{index_tdf} = 0;
246             }
247             }
248              
249             ##-- open: common options
250 0           my %efopts = (flags=>$flags, pack_i=>$coldb->{pack_id}, pack_o=>$coldb->{pack_off}, pack_l=>$coldb->{pack_len});
251 0           my %mmopts = (flags=>$flags, pack_i=>$coldb->{pack_id});
252              
253             ##-- open: attributes
254 0           my $attrs = $coldb->{attrs} = $coldb->attrs(undef,['l']);
255              
256             ##-- open: by attribute
257 0           my $axat = 0;
258 0           foreach my $attr (@$attrs) {
259             ##-- open: ${attr}*
260 0 0         my $abase = (-r "$dbdir/${attr}_enum.hdr" ? "$dbdir/${attr}_" : "$dbdir/${attr}"); ##-- v0.03-compatibility hack
261 0 0         $coldb->{"${attr}enum"} = $ECLASS->new(base=>"${abase}enum", %efopts)
262             or $coldb->logconfess("open(): failed to open enum ${abase}enum.*: $!");
263 0 0         $coldb->{"${attr}2x"} = $MMCLASS->new(base=>"${abase}2x", %mmopts)
264             or $coldb->logconfess("open(): failed to open expansion multimap ${abase}2x.*: $!");
265 0   0       $coldb->{"pack_x$attr"} //= "\@${axat}$coldb->{pack_id}";
266 0           $axat += packsize($coldb->{pack_id});
267             }
268              
269             ##-- open: xenum
270             $coldb->{xenum} = $XECLASS->new(base=>"$dbdir/xenum", %efopts, pack_s=>$coldb->{pack_x})
271 0 0         or $coldb->logconfess("open(): failed to open tuple-enum $dbdir/xenum.*: $!");
272 0 0 0       if (!defined($coldb->{xdmin}) || !defined($coldb->{xdmax})) {
273             ##-- hack: guess date-range if not specified
274 0           $coldb->vlog('warn', "Warning: extracting date-range from xenum: you should update $coldb->{dbdir}/header.json");
275 0           my $pack_xdate = '@'.(packsize($coldb->{pack_id}) * scalar(@{$coldb->attrs})).$coldb->{pack_date};
  0            
276 0           my ($dmin,$dmax,$d) = ('inf','-inf');
277 0           foreach (@{$coldb->{xenum}->toArray}) {
  0            
278 0 0         next if (!$_);
279 0 0         next if (!defined($d = unpack($pack_xdate,$_))); ##-- strangeness: getting only 9-bytes in $_ for 10-byte values in file and toArray(): why?!
280 0 0         $dmin = $d if ($d < $dmin);
281 0 0         $dmax = $d if ($d > $dmax);
282             }
283 0           $coldb->vlog('warn', "extracted date-range \"xdmin\":$dmin, \"xdmax\":$dmax");
284 0           @$coldb{qw(xdmin xdmax)} = ($dmin,$dmax);
285             }
286              
287             ##-- open: xf
288 0 0         $coldb->{xf} = DiaColloDB::Compat::v0_09::Relation::Unigrams->new(file=>"$dbdir/xf.dba", flags=>$flags, packas=>$coldb->{pack_f}, logCompat=>'off')
289             or $coldb->logconfess("open(): failed to open tuple-unigrams $dbdir/xf.dba: $!");
290 0 0 0       $coldb->{xf}{N} = $coldb->{xN} if ($coldb->{xN} && !$coldb->{xf}{N}); ##-- compat
291              
292             ##-- open: cof
293 0 0 0       if ($coldb->{index_cof}//1) {
294             $coldb->{cof} = DiaColloDB::Compat::v0_09::Relation::Cofreqs->new(base=>"$dbdir/cof", flags=>$flags,
295             pack_i=>$coldb->{pack_id}, pack_f=>$coldb->{pack_f},
296             dmax=>$coldb->{dmax}, fmin=>$coldb->{cfmin},
297 0 0         logCompat=>'off',
298             )
299             or $coldb->logconfess("open(): failed to open co-frequency file $dbdir/cof.*: $!");
300             }
301              
302             ##-- open: ddc (undef if ddcServer isn't set in ddc.hdr or $coldb)
303 0 0 0       $coldb->{ddc} = DiaColloDB::Relation::DDC->new(-r "$dbdir/ddc.hdr" ? (base=>"$dbdir/ddc") : qw())->fromDB($coldb)
304             // 'DiaColloDB::Relation::DDC';
305              
306             ##-- open: tdf (if available)
307 0 0         if ($coldb->{index_tdf}) {
308 0   0       $coldb->{tdfopts} //= {};
309 0   0       $coldb->{tdfopts}{$_} //= $DiaColloDB::TDF_OPTS{$_} foreach (keys %DiaColloDB::TDF_OPTS); ##-- tdf: default options
310             $coldb->{tdf} = DiaColloDB::Relation::TDF->new((-r "$dbdir/tdf.hdr" ? (base=>"$dbdir/tdf") : qw()),
311             dbreak => $coldb->{dbreak},
312 0 0         %{$coldb->{tdfopts}},
  0            
313             );
314             }
315              
316             ##-- all done
317 0           return $coldb;
318             }
319              
320              
321             ## @dbkeys = $coldb->dbkeys()
322             sub dbkeys {
323             return (
324 0 0   0 1   (ref($_[0]) ? (map {($_."enum",$_."2x")} @{$_[0]->attrs}) : qw()),
  0            
  0            
325             qw(xenum xf cof tdf),
326             );
327             }
328              
329             ## $coldb_or_undef = $coldb->close()
330             ## + INHERITED from DiaColloDB
331              
332             ## $bool = $coldb->opened()
333             ## + INHERITED from DiaColloDB
334              
335             ## @files = $obj->diskFiles()
336             ## + returns list of dist files for this db
337             ## + INHERITED from DiaColloDB
338              
339             ##==============================================================================
340             ## Create/compile
341              
342             ##--------------------------------------------------------------
343             ## create: utils
344              
345             ## $multimap = $coldb->create_xmap($base, \%xs2i, $packfmt, $label="multimap")
346 1     1   11 BEGIN { *create_xmap = DiaColloDB::Compat->nocompat('create_xmap'); }
347              
348             ## \@attrs = $coldb->attrs()
349             ## \@attrs = $coldb->attrs($attrs=$coldb->{attrs}, $default=[])
350             ## + parse attributes in $attrs as array
351             ## + INHERITED from DiaColloDB
352              
353             ## $aname = $CLASS_OR_OBJECT->attrName($attr)
354             ## + returns canonical (short) attribute name for $attr
355             ## + supports aliases in %ATTR_ALIAS = ($alias=>$name, ...)
356             ## + see also:
357             ## %ATTR_RALIAS = ($name=>\@aliases, ...)
358             ## %ATTR_CBEXPR = ($name=>$ddcCountByExpr, ...)
359             ## %ATTR_TITLE = ($name_or_alias=>$title, ...)
360             ## + INHERITED from DiaColloDB
361             our %ATTR_ALIAS = %DiaColloDB::ATTR_ALIAS;
362             our %ATTR_RALIAS = %DiaColloDB::ATTR_RALIAS;
363             our %ATTR_TITLE = %DiaColloDB::ATTR_TITLE;
364             our %ATTR_CBEXPR = %DiaColloDB::ATTR_CBEXPR;
365              
366             ## $atitle = $CLASS_OR_OBJECT->attrTitle($attr_or_alias)
367             ## + returns an attribute title for $attr_or_alias
368             ## + INHERITED from DiaColloDB
369              
370             ## $acbexpr = $CLASS_OR_OBJECT->attrCountBy($attr_or_alias,$matchid=0)
371             ## + INHERITED from DiaColloDB
372              
373             ## $aquery_or_filter_or_undef = $CLASS_OR_OBJECT->attrQuery($attr_or_alias,$cquery)
374             ## + returns a CQuery or CQFilter object for condition $cquery on $attr_or_alias
375             ## + INHERITED from DiaColloDB
376              
377             ## \@attrdata = $coldb->attrData()
378             ## \@attrdata = $coldb->attrData(\@attrs=$coldb->attrs)
379             ## + get attribute data for \@attrs
380             ## + return @attrdata = ({a=>$attr, i=>$i, enum=>$aenum, pack_x=>$pack_xa, a2x=>$a2x, ...})
381             sub attrData {
382 0     0 0   my ($coldb,$attrs) = @_;
383 0   0       $attrs //= $coldb->attrs;
384 0           my ($attr);
385             return [map {
386 0           $attr = $coldb->attrName($attrs->[$_]);
  0            
387 0           {i=>$_, a=>$attr, enum=>$coldb->{"${attr}enum"}, pack_x=>$coldb->{"pack_x$attr"}, a2x=>$coldb->{"${attr}2x"}}
388             } (0..$#$attrs)];
389             }
390              
391             ## $bool = $coldb->hasAttr($attr)
392             ## + INHERITED from DiaColloDB
393              
394              
395             ##--------------------------------------------------------------
396             ## create: from corpus
397              
398             ## $bool = $coldb->create($corpus,%opts)
399             ## + %opts:
400             ## $key => $val, ##-- clobbers $coldb->{$key}
401             ## + DISABLED
402 1     1   7 BEGIN { *create = DiaColloDB::Compat->nocompat('create'); }
403              
404             ##--------------------------------------------------------------
405             ## create: union (aka merge)
406              
407             ## $coldb = $CLASS_OR_OBJECT->union(\@coldbs_or_dbdirs,%opts)
408             ## + populates $coldb as union over @coldbs_or_dbdirs
409             ## + clobbers argument dbs {_union_${a}i2u}, {_union_xi2u}, {_union_argi}
410             ## + DISABLED
411 1     1   6 BEGIN { *merge = *union = DiaColloDB::Compat->nocompat('union'); }
412              
413             ##--------------------------------------------------------------
414             ## I/O: header
415             ## + largely INHERITED from DiaColloDB::Persistent
416              
417             ## @keys = $coldb->headerKeys()
418             ## + keys to save as header
419             ## + INHERITED from DiaColloDB
420              
421             ## $bool = $coldb->loadHeaderData()
422             ## $bool = $coldb->loadHeaderData($data)
423             ## + INHERITED from DiaColloDB
424              
425             ## $bool = $coldb->saveHeader()
426             ## $bool = $coldb->saveHeader($headerFile)
427             ## + INHERITED from DiaColloDB::Persistent
428              
429             ##==============================================================================
430             ## Export/Import
431              
432             ## $bool = $coldb->dbexport()
433             ## $bool = $coldb->dbexport($outdir,%opts)
434             ## + $outdir defaults to "$coldb->{dbdir}/export"
435             ## + %opts:
436             ## export_sdat => $bool, ##-- whether to export *.sdat (stringified tuple files for debugging; default=0)
437             ## export_cof => $bool, ##-- do/don't export cof.* (default=do)
438             ## export_tdf => $bool, ##-- do/don't export tdf.* (default=do)
439             sub dbexport {
440 0     0 0   my ($coldb,$outdir,%opts) = @_;
441 0 0         $coldb->logconfess("cannot dbexport() an un-opened DB") if (!$coldb->opened);
442 0   0       $outdir //= "$coldb->{dbdir}/export";
443 0           $outdir =~ s{/$}{};
444 0           $coldb->vlog('info', "export($outdir/)");
445              
446             ##-- options
447 0 0         my $export_sdat = exists($opts{export_sdat}) ? $opts{export_sdat} : 0;
448 0 0         my $export_cof = exists($opts{export_cof}) ? $opts{export_cof} : 1;
449 0 0         my $export_tdf = exists($opts{export_tdf}) ? $opts{export_tdf} : 1;
450              
451             ##-- create export directory
452 0 0 0       -d $outdir
453             or make_path($outdir)
454             or $coldb->logconfess("dbexport(): could not create export directory $outdir: $!");
455              
456             ##-- dump: header
457 0 0         $coldb->saveHeader("$outdir/header.json")
458             or $coldb->logconfess("dbexport(): could not export header to $outdir/header.json: $!");
459              
460             ##-- dump: load enums
461 0           my $adata = $coldb->attrData();
462 0           $coldb->vlog($coldb->{logExport}, "dbexport(): loading enums to memory");
463 0 0 0       $coldb->{xenum}->load() if ($coldb->{xenum} && !$coldb->{xenum}->loaded);
464 0           foreach (@$adata) {
465 0 0 0       $_->{enum}->load() if ($_->{enum} && !$_->{enum}->loaded);
466             }
467              
468             ##-- dump: common: stringification
469 0           my $pack_x = $coldb->{pack_x};
470 0           my ($xs2txt,$xi2txt);
471 0 0         if ($export_sdat) {
472 0           $coldb->vlog($coldb->{logExport}, "dbexport(): preparing tuple-stringification structures");
473              
474 0           foreach (@$adata) {
475 0           my $i2s = $_->{i2s} = $_->{enum}->toArray;
476 0   0 0     $_->{i2txt} = sub { return $i2s->[$_[0]//0]//''; };
  0   0        
477             }
478              
479 0           my $xi2s = $coldb->{xenum}->toArray;
480 0           my @ai2s = map {$_->{i2s}} @$adata;
  0            
481 0           my (@x);
482             $xs2txt = sub {
483 0     0     @x = unpack($pack_x,$_[0]);
484 0   0       return join("\t", (map {$ai2s[$_][$x[$_]//0]//''} (0..$#ai2s)), $x[$#x]//0);
  0   0        
      0        
485 0           };
486             $xi2txt = sub {
487 0   0 0     @x = unpack($pack_x, $xi2s->[$_[0]//0]//'');
      0        
488 0   0       return join("\t", (map {$ai2s[$_][$x[$_]//0]//''} (0..$#ai2s)), $x[$#x]//0);
  0   0        
      0        
489 0           };
490             }
491              
492             ##-- dump: xenum: raw
493 0           $coldb->vlog($coldb->{logExport}, "dbexport(): exporting raw tuple-enum file $outdir/xenum.dat");
494 0 0         $coldb->{xenum}->saveTextFile("$outdir/xenum.dat", pack_s=>$pack_x)
495             or $coldb->logconfess("export failed for $outdir/xenum.dat");
496              
497             ##-- dump: xenum: stringified
498 0 0         if ($export_sdat) {
499 0           $coldb->vlog($coldb->{logExport}, "dbexport(): exporting stringified tuple-enum file $outdir/xenum.sdat");
500 0 0         $coldb->{xenum}->saveTextFile("$outdir/xenum.sdat", pack_s=>$xs2txt)
501             or $coldb->logconfess("dbexport() failed for $outdir/xenum.sdat");
502             }
503              
504             ##-- dump: by attribute: enum
505 0           foreach (@$adata) {
506             ##-- dump: by attribute: enum
507 0           $coldb->vlog($coldb->{logExport}, "dbexport(): exporting attribute enum file $outdir/$_->{a}_enum.dat");
508 0 0         $_->{enum}->saveTextFile("$outdir/$_->{a}_enum.dat")
509             or $coldb->logconfess("dbexport() failed for $outdir/$_->{a}_enum.dat");
510             }
511              
512             ##-- dump: by attribute: a2x
513 0           foreach (@$adata) {
514             ##-- dump: by attribute: a2x: raw
515 0           $coldb->vlog($coldb->{logExport}, "dbexport(): exporting attribute expansion multimap $outdir/$_->{a}_2x.dat (raw)");
516 0 0         $_->{a2x}->saveTextFile("$outdir/$_->{a}_2x.dat")
517             or $coldb->logconfess("dbexport() failed for $outdir/$_->{a}_2x.dat");
518              
519             ##-- dump: by attribute: a2x: stringified
520 0 0         if ($export_sdat) {
521 0           $coldb->vlog($coldb->{logExport}, "dbexport(): exporting attribute expansion multimap $outdir/$_->{a}_2x.sdat (strings)");
522 0 0         $_->{a2x}->saveTextFile("$outdir/$_->{a}_2x.sdat", a2s=>$_->{i2txt}, b2s=>$xi2txt)
523             or $coldb->logconfess("dbexport() failed for $outdir/$_->{a}_2x.sdat");
524             }
525             }
526              
527             ##-- dump: xf
528 0 0         if ($coldb->{xf}) {
529             ##-- dump: xf: raw
530 0           $coldb->vlog($coldb->{logExport}, "dbexport(): exporting tuple-frequency index $outdir/xf.dat");
531 0           $coldb->{xf}->setFilters($coldb->{pack_f});
532 0 0         $coldb->{xf}->saveTextFile("$outdir/xf.dat", keys=>1)
533             or $coldb->logconfess("export failed for $outdir/xf.dat");
534 0           $coldb->{xf}->setFilters();
535              
536             ##-- dump: xf: stringified
537 0 0         if ($export_sdat) {
538 0           $coldb->vlog($coldb->{logExport}, "dbexport(): exporting stringified tuple-frequency index $outdir/xf.sdat");
539 0 0         $coldb->{xf}->saveTextFile("$outdir/xf.sdat", key2s=>$xi2txt)
540             or $coldb->logconfess("dbexport() failed for $outdir/xf.sdat");
541             }
542             }
543              
544             ##-- dump: cof
545 0 0 0       if ($coldb->{cof} && $export_cof) {
546 0           $coldb->vlog($coldb->{logExport}, "dbexport(): exporting raw co-frequency index $outdir/cof.dat");
547 0 0         $coldb->{cof}->saveTextFile("$outdir/cof.dat")
548             or $coldb->logconfess("export failed for $outdir/cof.dat");
549              
550 0 0         if ($export_sdat) {
551 0           $coldb->vlog($coldb->{logExport}, "dbexport(): exporting stringified co-frequency index $outdir/cof.sdat");
552 0 0         $coldb->{cof}->saveTextFile("$outdir/cof.sdat", i2s=>$xi2txt)
553             or $coldb->logconfess("export failed for $outdir/cof.sdat");
554             }
555             }
556              
557             ##-- dump: tdf
558 0 0 0       if ($coldb->{tdf} && $coldb->{index_tdf} && $export_tdf) {
      0        
559 0           $coldb->vlog($coldb->{logExport}, "dbexport(): exporting term-document index $outdir/tdf.*");
560 0 0         $coldb->{tdf}->export("$outdir/tdf", $coldb)
561             or $coldb->logconfess("export failed for $outdir/tdf.*");
562             }
563              
564             ##-- all done
565 0           $coldb->vlog($coldb->{logExport}, "dbexport(): export to $outdir complete.");
566 0           return $coldb;
567             }
568              
569             ## $coldb = $coldb->dbimport()
570             ## $coldb = $coldb->dbimport($txtdir,%opts)
571             ## + import ColocDB data from $txtdir
572             ## + TODO
573             sub dbimport {
574 0     0 0   my ($coldb,$txtdir,%opts) = @_;
575 0 0         $coldb = $coldb->new() if (!ref($coldb));
576 0           $coldb->logconfess("dbimport(): not yet implemented");
577             }
578              
579             ##==============================================================================
580             ## Info
581              
582             ## \%info = $coldb->dbinfo()
583             ## + get db info
584             ## + INHERITED from DiaColloDB
585              
586              
587             ##==============================================================================
588             ## Profiling
589              
590             ##--------------------------------------------------------------
591             ## Profiling: Wrappers
592             ## + INHERITED from DiaColloDB::Client
593              
594             ## $mprf = $coldb->query($rel,%opts)
595             ## + get a generic DiaColloDB::Profile::Multi object for $rel
596             ## + calls $coldb->profile() or $coldb->compare() as appropriate
597             ## + INHERITED from DiaColloDB::Client
598              
599             ## $mprf = $coldb->profile1(%opts)
600             ## + get unigram frequency profile for selected items as a DiaColloDB::Profile::Multi object
601             ## + really just wraps $coldb->profile('xf', %opts)
602             ## + %opts: see profile() method
603             ## + INHERITED from DiaColloDB::Client
604              
605             ## $mprf = $coldb->profile2(%opts)
606             ## + get co-frequency profile for selected items as a DiaColloDB::Profile::Multi object
607             ## + really just wraps $coldb->profile('cof', %opts)
608             ## + %opts: see profile() method
609             ## + INHERITED from DiaColloDB::Client
610              
611             ## $mprf = $coldb->compare1(%opts)
612             ## + get unigram comparison profile for selected items as a DiaColloDB::Profile::MultiDiff object
613             ## + really just wraps $coldb->compare('xf', %opts)
614             ## + %opts: see compare() method
615             ## + INHERITED from DiaColloDB::Client
616              
617             ## $mprf = $coldb->compare2(%opts)
618             ## + get co-frequency comparison profile for selected items as a DiaColloDB::Profile::MultiDiff object
619             ## + really just wraps $coldb->profile('cof', %opts)
620             ## + %opts: see compare() method
621             ## + INHERITED from DiaColloDB::Client
622              
623              
624             ##--------------------------------------------------------------
625             ## Profiling: Utils
626              
627             ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
628             ## $relname = $coldb->relname($rel)
629             ## + returns an appropriate relation name for profile() and friends
630             ## + returns $rel if $coldb->{$rel} supports a profile() method
631             ## + otherwise heuristically parses $relationName /xf|f?1|ug/ or /f1?2|c/
632             ## + INHERITED from DiaColloDB
633              
634             ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
635             ## $obj_or_undef = $coldb->relation($rel)
636             ## + returns an appropriate relation-like object for profile() and friends
637             ## + wraps $coldb->{$coldb->relname($rel)}
638             ## + INHERITED from DiaColloDB
639              
640             ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
641             ## @relnames = $coldb->relations()
642             ## + gets list of defined relations
643             ## + INHERITED from DiaColloDB
644              
645             ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
646             ## \@ids = $coldb->enumIds($enum,$req,%opts)
647             ## + parses enum IDs for $req, which is one of:
648             ## - a DDC::Any::CQTokExact, ::CQTokInfl, ::CQTokSet, ::CQTokSetInfl, or ::CQTokRegex : interpreted
649             ## - an ARRAY-ref : list of literal symbol-values
650             ## - a Regexp ref : regexp for target strings, passed to $enum->re2i()
651             ## - a string /REGEX/ : regexp for target strings, passed to $enum->re2i()
652             ## - another string : space-, comma-, or |-separated list of literal values
653             ## + %opts:
654             ## logLevel => $logLevel, ##-- logging level (default=undef)
655             ## logPrefix => $prefix, ##-- logging prefix (default="enumIds(): fetch ids")
656             ## + INHERITED from DiaColloDB
657              
658             ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
659             ## ($dfilter,$sliceLo,$sliceHi,$dateLo,$dateHi) = $coldb->parseDateRequest($dateRequest='', $sliceRequest=0, $fill=0, $ddcMode=0)
660             ## + parses date request and returns limit and filter information as a list (list context) or HASH-ref (scalar context);
661             ## + %dateRequest =
662             ## (
663             ## dfilter => $dfilter, ##-- filter-sub, called as: $wanted=$dfilter->($date); undef for none
664             ## slo => $sliceLo, ##-- minimum slice (inclusive)
665             ## shi => $sliceHi, ##-- maximum slice (inclusive)
666             ## dlo => $dateLo, ##-- minimum date (inclusive); undef for none, always defined if $fill is true
667             ## dhi => $dateHi, ##-- maximum date (inclusive); undef for none, always defined if $fill is true
668             ## )
669             ## + INHERITED from DiaColloDB
670              
671             ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
672             ## \%slice2xids = $coldb->xidsByDate(\@xids, $dateRequest, $sliceRequest, $fill)
673             ## + parse and filter \@xids by $dateRequest, $sliceRequest
674             ## + returns a HASH-ref from slice-ids to \@xids in that date-slice
675             ## + if $fill is true, returned HASH-ref has a key for each date-slice in range
676             ## + OBSOLETE in DiaColloDB
677             sub xidsByDate {
678 0     0 0   my ($coldb,$xids,$date,$slice,$fill) = @_;
679 0           my ($dfilter,$slo,$shi,$dlo,$dhi) = $coldb->parseDateRequest($date,$slice,$fill);
680              
681             ##-- filter xids
682 0           my $xenum = $coldb->{xenum};
683 0           my $pack_x = $coldb->{pack_x};
684 0           my $pack_i = $coldb->{pack_id};
685 0           my $pack_d = $coldb->{pack_date};
686 0           my $pack_xd = "@".(packsize($pack_i) * scalar(@{$coldb->{attrs}})).$pack_d;
  0            
687 0           my $d2xis = {}; ##-- ($dateKey => \@xis_at_date, ...)
688 0           my ($xi,$d);
689 0           foreach $xi (@$xids) {
690 0           $d = unpack($pack_xd, $xenum->i2s($xi));
691 0 0 0       next if (($dfilter && !$dfilter->($d)) || $d < $coldb->{xdmin} || $d > $coldb->{xdmax});
      0        
      0        
692 0 0         $d = $slice ? int($d/$slice)*$slice : 0;
693 0           push(@{$d2xis->{$d}}, $xi);
  0            
694             }
695              
696             ##-- force-fill?
697 0 0 0       if ($fill && $slice) {
698 0           for ($d=$slo; $d <= $shi; $d += $slice) {
699 0   0       $d2xis->{$d} //= [];
700             }
701             }
702              
703 0           return $d2xis;
704             }
705              
706             ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
707             ## $compiler = $coldb->qcompiler();
708             ## + get DDC::Any::CQueryCompiler for this object (cached in $coldb->{_qcompiler})
709             ## + INHERITED from DiaColloDB
710              
711             ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
712             ## $cquery_or_undef = $coldb->qparse($ddc_query_string)
713             ## + wraps parse in an eval {...} block and sets $coldb->{error} on failure
714             ## + INHERITED from DiaColloDB
715              
716              
717             ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
718             ## $cquery = $coldb->parseQuery([[$attr1,$val1],...], %opts) ##-- compat: ARRAY-of-ARRAYs
719             ## $cquery = $coldb->parseQuery(["$attr1:$val1",...], %opts) ##-- compat: ARRAY-of-requests
720             ## $cquery = $coldb->parseQuery({$attr1=>$val1, ...}, %opts) ##-- compat: HASH
721             ## $cquery = $coldb->parseQuery("$attr1=$val1, ...", %opts) ##-- compat: string
722             ## $cquery = $coldb->parseQuery($ddcQueryString, %opts) ##-- ddc string (with shorthand ","->WITH, "&&"->WITH)
723             ## + guts for parsing user target and groupby requests
724             ## + returns a DDC::Any::CQuery object representing the request
725             ## + index-only items "$l" are mapped to $l=@{}
726             ## + %opts:
727             ## warn => $level, ##-- log-level for unknown attributes (default: 'warn')
728             ## logas => $reqtype, ##-- request type for warnings
729             ## default => $attr, ##-- default attribute (for query requests)
730             ## mapand => $bool, ##-- map CQAnd to CQWith? (default=true unless '&&' occurs in query string)
731             ## ddcmode => $bool, ##-- force ddc query mode? (default=false)
732             ## + INHERITED from DiaColloDB
733              
734             ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
735             ## \@aqs = $coldb->queryAttributes($cquery,%opts)
736             ## + utility for decomposing DDC queries into attribute-wise requests
737             ## + returns an ARRAY-ref [[$attr1,$val1], ...]
738             ## + each value $vali is empty or undef (all values), a CQTokSet, a CQTokExact, CQTokRegex, or CQTokAny
739             ## + chokes on unsupported query types or filters
740             ## + %opts:
741             ## warn => $level, ##-- log-level for unknown attributes (default: 'warn')
742             ## logas => $reqtype, ##-- request type for warnings
743             ## default => $attr, ##-- default attribute (for query requests)
744             ## allowExtra => \@attrs, ##-- allow extra attributes @attrs (may also be HASH-ref)
745             ## allowUnknown => $bool, ##-- allow unknown attributes? (default: 0)
746             ## + INHERITED from DiaColloDB
747              
748             ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
749             ## \@aqs = $coldb->parseRequest($request, %opts)
750             ## + guts for parsing user target and groupby requests into attribute-wise ARRAY-ref [[$attr1,$val1], ...]
751             ## + see parseQuery() method for supported $request formats and %opts
752             ## + wraps $coldb->queryAttributes($coldb->parseQuery($request,%opts))
753             ## + INHERITED from DiaColloDB
754              
755             ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
756             ## \%groupby = $coldb->groupby($groupby_request, %opts)
757             ## \%groupby = $coldb->groupby(\%groupby, %opts)
758             ## + $grouby_request : see parseRequest()
759             ## + returns a HASH-ref:
760             ## (
761             ## req => $request, ##-- save request
762             ## #x2g => \&x2g, ##-- group-tuple extraction code suitable for e.g. DiaColloDB::Relation::Cofreqs::profile(groupby=>\&x2g) ##--OLD
763             ## xi2g => \&xi2g, ##-- group-tuple extraction code ($xi => $gtuple) suitable for e.g. DiaColloDB::Relation::Cofreqs::profile(groupby=>\&x2g) ##--OLD
764             ## xs2g => \&xs2g, ##-- group-tuple extraction code ($xs => $gtuple)
765             ## g2s => \&g2s, ##-- stringification object suitable for DiaColloDB::Profile::stringify() [CODE,enum, or undef]
766             ## g2txt => \&g2txt, ##-- compatible join()-string stringifcation sub
767             ## xpack => \@xpack, ##-- group-attribute-wise pack-templates, given @xtuple
768             ## gpack => \@gpack, ##-- group-attribute-wise pack-templates, given @gtuple
769             ## areqs => \@areqs, ##-- parsed attribute requests ([$attr,$ahaving],...)
770             ## attrs => \@attrs, ##-- like $coldb->attrs($groupby_request), modulo "having" parts
771             ## titles => \@titles, ##-- like map {$coldb->attrTitle($_)} @attrs
772             ## )
773             ## + %opts:
774             ## warn => $level, ##-- log-level for unknown attributes (default: 'warn')
775             ## relax => $bool, ##-- allow unsupported attributes (default=0)
776             ## xenum => $xenum, ##-- enum to use for \&x2g and \&g2s (default: $coldb->{xenum})
777             ## + OVERRIDES DiaColloDB
778             sub groupby {
779 0     0 1   my ($coldb,$gbreq,%opts) = @_;
780 0 0         return $gbreq if (UNIVERSAL::isa($gbreq,'HASH'));
781              
782             ##-- get data
783 0   0       my $wlevel = $opts{warn} // 'warn';
784 0           my $gb = { req=>$gbreq };
785              
786             ##-- get attribute requests
787 0           my $gbareqs = $gb->{areqs} = $coldb->parseRequest($gb->{req}, %opts,logas=>'groupby');
788              
789             ##-- get attribute names (compat)
790 0           my $gbattrs = $gb->{attrs} = [map {$_->[0]} @$gbareqs];
  0            
791              
792             ##-- get attribute titles
793 0           $gb->{titles} = [map {$coldb->attrTitle($_)} @$gbattrs];
  0            
794              
795             ##-- get groupby-sub
796 0   0       my $xenum = $opts{xenum} // $coldb->{xenum};
797 0           my $pack_id = $coldb->{pack_id};
798 0           my $pack_ids = "($pack_id)*";
799 0           my $len_id = packsize($pack_id);
800 0           my @gbxpack = @{$gb->{xpack} = [map {$coldb->{"pack_x$_"}} @$gbattrs]};
  0            
  0            
801 0           my $gbxpack = join('',@gbxpack);
802 0           my @gbgpack = @{$gb->{gpack} = [map {'@'.($_*$len_id).$pack_id} (0..$#$gbattrs)]};
  0            
  0            
803 0           my ($ids);
804             my @gbids = (
805             map {
806 0           ($_->[1] && !UNIVERSAL::isa($_->[1],'DDC::Any::CQTokAny')
807             ? {
808 0           map {($_=>undef)}
809 0 0 0       @{$coldb->enumIds($coldb->{"$_->[0]enum"}, $_->[1], logLevel=>$coldb->{logProfile}, logPrefix=>"groupby(): fetch filter ids: $_->[0]")}
  0            
810             }
811             : undef)
812             } @$gbareqs);
813              
814 0           my (@gi,$xi2g_code,$xs2g_code);
815 0 0         if (grep {$_} @gbids) {
  0            
816             ##-- group-by code: with having-filters
817             $xs2g_code = (''
818             .qq{ \@gi=unpack('$gbxpack',\$_[0]);}
819 0           .qq{ return undef if (}.join(' || ', map {"!exists(\$gbids[$_]{\$gi[$_]})"} grep {defined($gbids[$_])} (0..$#gbids)).qq{);}
  0            
  0            
820             .qq{ return pack('$pack_ids',\@gi); }
821             );
822             }
823             else {
824             ##-- group-by code: no filters
825 0           $xs2g_code = qq{ pack('$pack_ids', unpack('$gbxpack', \$_[0])) };
826             }
827 0           my $xs2g_sub = eval qq{sub {$xs2g_code}};
828 0 0         $coldb->logconfess($coldb->{error}="groupby(): could not compile tuple-based aggregation code sub {$xs2g_code}: $@") if (!$xs2g_sub);
829 0           $@='';
830 0           $gb->{xs2g} = $xs2g_sub;
831              
832 0           ($xi2g_code = $xs2g_code) =~ s{\$_\[0\]}{\$xenum->i2s(\$_[0])};
833 0           my $xi2g_sub = eval qq{sub {$xi2g_code}};
834 0 0         $coldb->logconfess($coldb->{error}="groupby(): could not compile id-base aggregation code sub {$xi2g_code}: $@") if (!$xi2g_sub);
835 0           $@='';
836 0           $gb->{xi2g} = $xi2g_sub;
837              
838             ##-- get stringification sub
839 0           my ($genum,@genums,$g2scode);
840 0 0         if (@$gbattrs == 1) {
841             ##-- stringify a single attribute
842 0           $genum = $coldb->{$gbattrs->[0]."enum"};
843 0           $g2scode = qq{ \$genum->i2s(unpack('$pack_id',\$_[0])) };
844             }
845             else {
846 0           @genums = map {$coldb->{$_."enum"}} @$gbattrs;
  0            
847             $g2scode = (''
848             .qq{ \@gi=unpack('$pack_ids', \$_[0]); }
849 0           .q{ join("\t",}.join(', ', map {"\$genums[$_]->i2s(\$gi[$_])"} (0..$#genums)).q{)}
  0            
850             );
851             }
852 0           my $g2s = eval qq{sub {$g2scode}};
853 0 0         $coldb->logconfess($coldb->{error}="groupby(): could not compile stringification code sub {$g2scode}: $@") if (!$g2s);
854 0           $@='';
855 0           $gb->{g2s} = $g2s;
856              
857             ##-- get pseudo-stringification sub ("\t"-joined decimal integer ids)
858 0           my ($g2txt_code);
859 0 0         if (@$gbattrs == 1) {
860             ##-- stringify a single attribute
861 0           $g2txt_code = qq{ unpack('$pack_id',\$_[0]) };
862             }
863             else {
864 0           $g2txt_code = qq{ join("\t",unpack('$pack_ids', \$_[0])); };
865             }
866 0           my $g2txt = eval qq{sub {$g2txt_code}};
867 0 0         $coldb->logconfess($coldb->{error}="groupby(): could not compile pseudo-stringification code sub {$g2txt_code}: $@") if (!$g2txt);
868 0           $@='';
869 0           $gb->{g2txt} = $g2txt;
870              
871 0           return $gb;
872             }
873              
874             ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
875             ## $cqfilter = $coldb->query2filter($attr,$cquery,%opts)
876             ## + converts a CQToken to a CQFilter, for ddc parsing
877             ## + %opts:
878             ## logas => $logas, ##-- log-prefix for warnings
879             ## + INHERITED from DiaColloDB
880              
881             ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
882             ## ($CQCountKeyExprs,\$CQRestrict,\@CQFilters) = $coldb->parseGroupBy($groupby_string_or_request,%opts)
883             ## + for ddc-mode parsing
884             ## + %opts:
885             ## date => $date,
886             ## slice => $slice,
887             ## matchid => $matchid, ##-- default match-id
888             ## + INHERITED from DiaColloDB
889              
890             ##--------------------------------------------------------------
891             ## Profiling: Generic
892              
893             ## $mprf = $coldb->profile($relation, %opts)
894             ## + get a relation profile for selected items as a DiaColloDB::Profile::Multi object
895             ## + %opts:
896             ## (
897             ## ##-- selection parameters
898             ## query => $query, ##-- target request ATTR:REQ...
899             ## date => $date1, ##-- string or array or range "MIN-MAX" (inclusive) : default=all
900             ## ##
901             ## ##-- aggregation parameters
902             ## slice => $slice, ##-- date slice (default=1, 0 for global profile)
903             ## groupby => $groupby, ##-- string or array "ATTR1[:HAVING1] ...": default=$coldb->attrs; see groupby() method
904             ## ##
905             ## ##-- scoring and trimming parameters
906             ## eps => $eps, ##-- smoothing constant (default=0)
907             ## score => $func, ##-- scoring function (f|fm|lf|lfm|mi|ld) : default="f"
908             ## kbest => $k, ##-- return only $k best collocates per date (slice) : default=-1:all
909             ## cutoff => $cutoff, ##-- minimum score
910             ## global => $bool, ##-- trim profiles globally (vs. locally for each date-slice?) (default=0)
911             ## ##
912             ## ##-- profiling and debugging parameters
913             ## strings => $bool, ##-- do/don't stringify (default=do)
914             ## fill => $bool, ##-- if true, returned multi-profile will have null profiles inserted for missing slices
915             ## onepass => $bool, ##-- if true, use fast but incorrect 1-pass method (Cofreqs profiling only)
916             ## )
917             ## + sets default %opts and wraps $coldb->relation($rel)->profile($coldb, %opts)
918             ## + INHERITED from DiaColloDB
919              
920             ## \%opts = $CLASS_OR_OBJECT->profileOptions(\%opts)
921             ## + instantiates default options for profile() method
922             ## + INHERITED from DiaColloDB
923              
924             ##--------------------------------------------------------------
925             ## Profiling: Comparison (diff)
926              
927             ## $mprf = $coldb->compare($relation, %opts)
928             ## + get a relation comparison profile for selected items as a DiaColloDB::Profile::MultiDiff object
929             ## + %opts:
930             ## (
931             ## ##-- selection parameters
932             ## (a|b)?query => $query, ##-- target query as for parseRequest()
933             ## (a|b)?date => $date1, ##-- string or array or range "MIN-MAX" (inclusive) : default=all
934             ## ##
935             ## ##-- aggregation parameters
936             ## groupby => $groupby, ##-- string or array "ATTR1[:HAVING1] ...": default=$coldb->attrs; see groupby() method
937             ## (a|b)?slice => $slice, ##-- date slice (default=1, 0 for global profile)
938             ## ##
939             ## ##-- scoring and trimming parameters
940             ## eps => $eps, ##-- smoothing constant (default=0)
941             ## score => $func, ##-- scoring function (f|fm|lf|lfm|mi|ld) : default="f"
942             ## kbest => $k, ##-- return only $k best collocates per date (slice) : default=-1:all
943             ## cutoff => $cutoff, ##-- minimum score (UNUSED for comparison profiles)
944             ## global => $bool, ##-- trim profiles globally (vs. locally for each date-slice?) (default=0)
945             ## diff => $diff, ##-- low-level score-diff operation (diff|adiff|sum|min|max|avg|havg); default='adiff'
946             ## ##
947             ## ##-- profiling and debugging parameters
948             ## strings => $bool, ##-- do/don't stringify (default=do)
949             ## )
950             ## + sets default %opts and wraps $coldb->relation($rel)->compare($coldb, %opts)
951             ## + INHERITED from DiaColloDB
952              
953             ## \%opts = $CLASS_OR_OBJECT->compareOptions(\%opts)
954             ## + instantiates default options for compare() method
955             ## + INHERITED from DiaColloDB
956              
957             ##==============================================================================
958             ## Footer
959             1;
960              
961             __END__