| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | ## -*- Mode: CPerl -*- | 
| 2 |  |  |  |  |  |  | ## File: DiaColloDB.pm | 
| 3 |  |  |  |  |  |  | ## Author: Bryan Jurish <moocow@cpan.org> | 
| 4 |  |  |  |  |  |  | ## Description: collocation db, top-level | 
| 5 |  |  |  |  |  |  |  | 
| 6 |  |  |  |  |  |  | package DiaColloDB; | 
| 7 | 2 |  |  | 2 |  | 172160 | use 5.010; ##-- v5.10.0: for // operator | 
|  | 2 |  |  |  |  | 8 |  | 
| 8 |  |  |  |  |  |  |  | 
| 9 | 2 |  |  | 2 |  | 1310 | use DiaColloDB::Compat; | 
|  | 2 |  |  |  |  | 4 |  | 
|  | 2 |  |  |  |  | 68 |  | 
| 10 | 2 |  |  | 2 |  | 1964 | use DiaColloDB::Client; | 
|  | 1 |  |  |  |  | 12 |  | 
|  | 1 |  |  |  |  | 86 |  | 
| 11 | 1 |  |  | 1 |  | 9 | use DiaColloDB::Logger; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 25 |  | 
| 12 | 1 |  |  | 1 |  | 657 | use DiaColloDB::EnumFile; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 40 |  | 
| 13 |  |  |  |  |  |  | #use DiaColloDB::EnumFile::Identity; | 
| 14 | 1 |  |  | 1 |  | 501 | use DiaColloDB::EnumFile::FixedLen; | 
|  | 1 |  |  |  |  | 4 |  | 
|  | 1 |  |  |  |  | 41 |  | 
| 15 | 1 |  |  | 1 |  | 463 | use DiaColloDB::EnumFile::FixedMap; | 
|  | 1 |  |  |  |  | 4 |  | 
|  | 1 |  |  |  |  | 40 |  | 
| 16 | 1 |  |  | 1 |  | 468 | use DiaColloDB::EnumFile::MMap; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 42 |  | 
| 17 | 1 |  |  | 1 |  | 471 | use DiaColloDB::EnumFile::Tied; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 34 |  | 
| 18 | 1 |  |  | 1 |  | 469 | use DiaColloDB::MultiMapFile; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 38 |  | 
| 19 | 1 |  |  | 1 |  | 526 | use DiaColloDB::MultiMapFile::MMap; | 
|  | 1 |  |  |  |  | 4 |  | 
|  | 1 |  |  |  |  | 35 |  | 
| 20 | 1 |  |  | 1 |  | 499 | use DiaColloDB::PackedFile; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 37 |  | 
| 21 | 1 |  |  | 1 |  | 481 | use DiaColloDB::PackedFile::MMap; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 43 |  | 
| 22 | 1 |  |  | 1 |  | 480 | use DiaColloDB::Relation; | 
|  | 1 |  |  |  |  | 4 |  | 
|  | 1 |  |  |  |  | 40 |  | 
| 23 | 1 |  |  | 1 |  | 542 | use DiaColloDB::Relation::Unigrams; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 38 |  | 
| 24 | 1 |  |  | 1 |  | 543 | use DiaColloDB::Relation::Cofreqs; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 34 |  | 
| 25 | 1 |  |  | 1 |  | 546 | use DiaColloDB::Relation::DDC; | 
|  | 1 |  |  |  |  | 5 |  | 
|  | 1 |  |  |  |  | 41 |  | 
| 26 |  |  |  |  |  |  | #use DiaColloDB::Relation::TDF; ##-- loaded on-demand | 
| 27 | 1 |  |  | 1 |  | 7 | use DiaColloDB::Profile; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 21 |  | 
| 28 | 1 |  |  | 1 |  | 6 | use DiaColloDB::Profile::Multi; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 22 |  | 
| 29 | 1 |  |  | 1 |  | 531 | use DiaColloDB::Profile::MultiDiff; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 32 |  | 
| 30 | 1 |  |  | 1 |  | 433 | use DiaColloDB::Corpus; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 33 |  | 
| 31 | 1 |  |  | 1 |  | 504 | use DiaColloDB::Corpus::Compiled; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 43 |  | 
| 32 | 1 |  |  | 1 |  | 8 | use DiaColloDB::Corpus::Filters qw(:defaults); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 39 |  | 
| 33 | 1 |  |  | 1 |  | 288 | use DiaColloDB::Persistent; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 27 |  | 
| 34 | 1 |  |  | 1 |  | 5 | use DiaColloDB::Utils qw(:math :fcntl :json :sort :pack :regex :file :si :run :env :temp :jobs); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 32 |  | 
| 35 |  |  |  |  |  |  | #use DiaColloDB::Temp::Vec; | 
| 36 | 1 |  |  | 1 |  | 1267 | use DiaColloDB::Timer; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 32 |  | 
| 37 | 1 |  |  | 1 |  | 986 | use DDC::Any; ##-- for query parsing | 
|  | 1 |  |  |  |  | 22482 |  | 
|  | 1 |  |  |  |  | 6 |  | 
| 38 | 1 |  |  | 1 |  | 77730 | use Fcntl; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 78 |  | 
| 39 | 1 |  |  | 1 |  | 350 | use File::Path qw(make_path remove_tree); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 25 |  | 
| 40 | 1 |  |  | 1 |  | 116 | use version; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 11 |  | 
| 41 | 1 |  |  | 1 |  | 77 | use strict; | 
|  | 1 |  |  |  |  | 12 |  | 
|  | 1 |  |  |  |  | 540 |  | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | ##============================================================================== | 
| 45 |  |  |  |  |  |  | ## Globals & Constants | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  | our $VERSION = "0.12.019"; | 
| 48 |  |  |  |  |  |  | our @ISA = qw(DiaColloDB::Client); | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | ## $TDF_MGOOD_DEFAULT | 
| 51 |  |  |  |  |  |  | ##  + default positive meta-field regex for document parsing (tdf only) | 
| 52 |  |  |  |  |  |  | ##  + don't use qr// here, since Storable doesn't like pre-compiled Regexps | 
| 53 |  |  |  |  |  |  | our $TDF_MGOOD_DEFAULT = q/^(?:author|pnd|title|basename|collection|flags|textClass|genre|country|region|party|role)$/; | 
| 54 |  |  |  |  |  |  |  | 
| 55 |  |  |  |  |  |  | ## $TDF_MBAD_DEFAULT | 
| 56 |  |  |  |  |  |  | ##  + default negative meta-field regex for document parsing (tdf only) | 
| 57 |  |  |  |  |  |  | ##  + don't use qr// here, since Storable doesn't like pre-compiled Regexps. | 
| 58 |  |  |  |  |  |  | our $TDF_MBAD_DEFAULT = q/_$/; | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  | ## $ECLASS | 
| 61 |  |  |  |  |  |  | ##  + enum class | 
| 62 |  |  |  |  |  |  | #our $ECLASS = 'DiaColloDB::EnumFile'; | 
| 63 |  |  |  |  |  |  | our $ECLASS = 'DiaColloDB::EnumFile::MMap'; | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  | ## $XECLASS | 
| 66 |  |  |  |  |  |  | ##  + fixed-length enum class | 
| 67 |  |  |  |  |  |  | #our $XECLASS = 'DiaColloDB::EnumFile::FixedLen'; | 
| 68 |  |  |  |  |  |  | our $XECLASS = 'DiaColloDB::EnumFile::FixedLen::MMap'; | 
| 69 |  |  |  |  |  |  |  | 
| 70 |  |  |  |  |  |  | ## $MMCLASS | 
| 71 |  |  |  |  |  |  | ##  + multimap class | 
| 72 |  |  |  |  |  |  | #our $MMCLASS = 'DiaColloDB::MultiMapFile'; | 
| 73 |  |  |  |  |  |  | our $MMCLASS = 'DiaColloDB::MultiMapFile::MMap'; | 
| 74 |  |  |  |  |  |  |  | 
| 75 |  |  |  |  |  |  | ## %TDF_OPTS : tdf: default options for DiaColloDB::Relation::TDF->new() | 
| 76 |  |  |  |  |  |  | our %TDF_OPTS = ( | 
| 77 |  |  |  |  |  |  | mgood => $TDF_MGOOD_DEFAULT, ##-- positive filter regex for metadata attributes | 
| 78 |  |  |  |  |  |  | mbad  => $TDF_MBAD_DEFAULT,  ##-- negative filter regex for metadata attributes | 
| 79 |  |  |  |  |  |  | ## | 
| 80 |  |  |  |  |  |  | minFreq=>undef,    ##-- minimum total term-frequency for model inclusion (default=from $coldb->{tfmin}) | 
| 81 |  |  |  |  |  |  | minDocFreq=>4,     ##-- minimim "doc-frequency" (#/docs per term) for model inclusion | 
| 82 |  |  |  |  |  |  | minDocSize=>4,     ##-- minimum doc size (#/tokens per doc) for model inclusion (default=8; formerly $coldb->{vbnmin}) | 
| 83 |  |  |  |  |  |  | ##   + for kern[page?] (n:%sigs,%toks): 1:0%,0%, 2:5.1%,0.5%, 4:18%,1.6%, 5:22%,2.3%, 8:34%,4.6%, 10:40%,6.5%, 16:54%,12.8% | 
| 84 |  |  |  |  |  |  | maxDocSize=>'inf', ##-- maximum doc size (#/tokens per doc) for model inclusion (default=inf; formerly $coldb->{vbnmax}) | 
| 85 |  |  |  |  |  |  | ## | 
| 86 |  |  |  |  |  |  | #smoothf=>1,       ##-- smoothing constant | 
| 87 |  |  |  |  |  |  | #saveMem=>1, 	    ##-- slower but memory-friendlier compilation | 
| 88 |  |  |  |  |  |  | ## | 
| 89 |  |  |  |  |  |  | vtype=>'float',    ##-- store compiled values as 32-bit floats | 
| 90 |  |  |  |  |  |  | itype=>'long',     ##-- store compiled indices as 32-bit integers | 
| 91 |  |  |  |  |  |  | ); | 
| 92 |  |  |  |  |  |  |  | 
| 93 |  |  |  |  |  |  | ## $NJOBS | 
| 94 |  |  |  |  |  |  | ##  + number of parallel jobs for various operations | 
| 95 |  |  |  |  |  |  | ##  + setting this to 0 (zero) will run in pure serial | 
| 96 |  |  |  |  |  |  | ##  + on unix/linux, setting this to "-1" will use the total number of cores on your system, | 
| 97 |  |  |  |  |  |  | ##    otherwise behaves like 0 | 
| 98 |  |  |  |  |  |  | our $NJOBS = -1; | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  | ##============================================================================== | 
| 101 |  |  |  |  |  |  | ## Constructors etc. | 
| 102 |  |  |  |  |  |  |  | 
| 103 |  |  |  |  |  |  | ## $coldb = CLASS_OR_OBJECT->new(%args) | 
| 104 |  |  |  |  |  |  | ## + %args, object structure: | 
| 105 |  |  |  |  |  |  | ##   ( | 
| 106 |  |  |  |  |  |  | ##    ##-- options | 
| 107 |  |  |  |  |  |  | ##    dbdir => $dbdir,    ##-- database directory; REQUIRED | 
| 108 |  |  |  |  |  |  | ##    flags => $fcflags,  ##-- fcntl flags or open()-style mode string; default='r' | 
| 109 |  |  |  |  |  |  | ##    attrs => \@attrs,   ##-- index attributes (input as space-separated or array; compiled to array); default=undef (==>['l']) | 
| 110 |  |  |  |  |  |  | ##                        ##    + each attribute can be token-attribute qw(w p l) or a document metadata attribute "doc.ATTR" | 
| 111 |  |  |  |  |  |  | ##                        ##    + document "date" attribute is always indexed | 
| 112 |  |  |  |  |  |  | ##    info => \%info,     ##-- additional data to return in info() method (e.g. collection, maintainer) | 
| 113 |  |  |  |  |  |  | ##    #bos => $bos,        ##-- special string to use for BOS, undef or empty for none (default=undef) DISABLED | 
| 114 |  |  |  |  |  |  | ##    #eos => $eos,        ##-- special string to use for EOS, undef or empty for none (default=undef) DISABLED | 
| 115 |  |  |  |  |  |  | ##    pack_id => $fmt,    ##-- pack-format for IDs (default='N') | 
| 116 |  |  |  |  |  |  | ##    pack_f  => $fmt,    ##-- pack-format for frequencies (default='N') | 
| 117 |  |  |  |  |  |  | ##    pack_date => $fmt,  ##-- pack-format for dates (default='n') | 
| 118 |  |  |  |  |  |  | ##    pack_off => $fmt,   ##-- pack-format for file offsets (default='N') | 
| 119 |  |  |  |  |  |  | ##    pack_len => $len,   ##-- pack-format for string lengths (default='n') | 
| 120 |  |  |  |  |  |  | ##    dmax  => $dmax,     ##-- maximum distance for collocation-frequencies and implicit ddc near() queries (default=5) | 
| 121 |  |  |  |  |  |  | ##    cfmin => $cfmin,    ##-- minimum co-occurrence frequency for Cofreqs and ddc queries (default=2) | 
| 122 |  |  |  |  |  |  | ##    tfmin => $tfmin,    ##-- minimum global term-frequency WITHOUT date component (default=2) | 
| 123 |  |  |  |  |  |  | ##    fmin_${a} => $fmin, ##-- minimum independent frequency for value of attribute ${a} (default=undef:from $tfmin) | 
| 124 |  |  |  |  |  |  | ##    keeptmp => $bool,   ##-- keep temporary files? (default=0) | 
| 125 |  |  |  |  |  |  | ##    mmap => $bool,      ##-- use mmap() subclasses if available? (default: true) | 
| 126 |  |  |  |  |  |  | ##    debug => $bool,     ##-- enable painful debugging code? (default: false) | 
| 127 |  |  |  |  |  |  | ##    index_tdf => $bool, ##-- tdf: create/use (term x document) frequency matrix index? (default=undef: if available) | 
| 128 |  |  |  |  |  |  | ##    index_cof => $bool, ##-- cof: create/use co-frequency index (default=1) | 
| 129 |  |  |  |  |  |  | ##    index_xf => $bool,  ##-- xf: create/use unigram index (default=1) | 
| 130 |  |  |  |  |  |  | ##    dbreak => $dbreak,  ##-- tdf: use break-type $break for tdf index (default=undef: files) | 
| 131 |  |  |  |  |  |  | ##    tdfopts=>\%tdfopts, ##-- tdf: options for DiaColloDB::Relation::TDF->new(); default=undef (all inherited from %TDF_OPTS) | 
| 132 |  |  |  |  |  |  | ##    ## | 
| 133 |  |  |  |  |  |  | ##    ##-- runtime ddc relation options | 
| 134 |  |  |  |  |  |  | ##    ddcServer => "$host:$port", ##-- server for ddc relation | 
| 135 |  |  |  |  |  |  | ##    ddcTimeout => $seconds,     ##-- timeout for ddc relation | 
| 136 |  |  |  |  |  |  | ##    ## | 
| 137 |  |  |  |  |  |  | ##    ##-- source filtering (for create()) | 
| 138 |  |  |  |  |  |  | ##    pgood  => $regex,   ##-- positive filter regex for part-of-speech tags | 
| 139 |  |  |  |  |  |  | ##    pbad   => $regex,   ##-- negative filter regex for part-of-speech tags | 
| 140 |  |  |  |  |  |  | ##    wgood  => $regex,   ##-- positive filter regex for word text | 
| 141 |  |  |  |  |  |  | ##    wbad   => $regex,   ##-- negative filter regex for word text | 
| 142 |  |  |  |  |  |  | ##    lgood  => $regex,   ##-- positive filter regex for lemma text | 
| 143 |  |  |  |  |  |  | ##    lbad   => $regex,   ##-- negative filter regex for lemma text | 
| 144 |  |  |  |  |  |  | ##    ## | 
| 145 |  |  |  |  |  |  | ##    ##-- logging | 
| 146 |  |  |  |  |  |  | ##    logOpen => $level,        ##-- log-level for open/close (default='info') | 
| 147 |  |  |  |  |  |  | ##    logCreate => $level,      ##-- log-level for create messages (default='info') | 
| 148 |  |  |  |  |  |  | ##    logThread => $level,      ##-- log-level for multithreading operations (default='debug') | 
| 149 |  |  |  |  |  |  | ##    logCorpusFile => $level,  ##-- log-level for corpus file-parsing (default='info') | 
| 150 |  |  |  |  |  |  | ##    logCorpusFileN => $N,     ##-- log corpus file-parsing only for every N files (0 for none; default:undef ~ $corpus->size()/100) | 
| 151 |  |  |  |  |  |  | ##    logExport => $level,      ##-- log-level for export messages (default='info') | 
| 152 |  |  |  |  |  |  | ##    logProfile => $level,     ##-- log-level for verbose profiling messages (default='trace') | 
| 153 |  |  |  |  |  |  | ##    logRequest => $level,     ##-- log-level for request-level profiling messages (default='debug') | 
| 154 |  |  |  |  |  |  | ##    logCompat => $level,      ##-- log-level for compatibility warnings (default='warn') | 
| 155 |  |  |  |  |  |  | ##    ## | 
| 156 |  |  |  |  |  |  | ##    ##-- runtime limits | 
| 157 |  |  |  |  |  |  | ##    maxExpand => $size,   ##-- maximum number of elements in query expansions (default=65535) | 
| 158 |  |  |  |  |  |  | ##    ## | 
| 159 |  |  |  |  |  |  | ##    ##-- administrivia | 
| 160 |  |  |  |  |  |  | ##    version=>$version,    ##-- DiaColloDB version of stored db (==$DiaColloDB::VERSION) | 
| 161 |  |  |  |  |  |  | ##    upgraded=>\@upgraded, ##-- optional administrative information about auto-magic upgrades | 
| 162 |  |  |  |  |  |  | ##    ## | 
| 163 |  |  |  |  |  |  | ##    ##-- attribute data | 
| 164 |  |  |  |  |  |  | ##    ${a}enum => $aenum,   ##-- attribute enum: $aenum : ($dbdir/${a}_enum.*) : $astr<=>$ai : A*<=>N | 
| 165 |  |  |  |  |  |  | ##                          ##    e.g.  lemmata: $lenum : ($dbdir/l_enum.*   )  : $lstr<=>$li : A*<=>N | 
| 166 |  |  |  |  |  |  | ##    ${a}2t   => $a2t,     ##-- attribute multimap: $a2t : ($dbdir/${a}_2t.*) : $ai=>@tis  : N=>N* | 
| 167 |  |  |  |  |  |  | ##    pack_t$a => $fmt      ##-- pack format: extract attribute-id $ai from a packed tuple-string $ts ; $ai=unpack($coldb->{"pack_t$a"},$ts) | 
| 168 |  |  |  |  |  |  | ##    ## | 
| 169 |  |  |  |  |  |  | ##    ##-- tuple data (-dates) | 
| 170 |  |  |  |  |  |  | ##    ##   + as of v0.10.000, packed term tuples EXCLUDING dates ("t-tuples") are mapped by $coldb->{tenum} | 
| 171 |  |  |  |  |  |  | ##    ##   + prior to v0.10.000, term tuples INCLUDING dates ("x-tuples") were mapped by $coldb->{xenum}, now obsolete | 
| 172 |  |  |  |  |  |  | ##    tenum  => $tenum,     ##-- enum: tuples ($dbdir/tenum.*) : \@ais<=>$ti : N*<=>N | 
| 173 |  |  |  |  |  |  | ##    pack_t => $fmt,       ##-- symbol pack-format for $tenum : "${pack_id}[Nattrs]" | 
| 174 |  |  |  |  |  |  | ##    xdmin => $xdmin,      ##-- minimum date (>= v0.04) | 
| 175 |  |  |  |  |  |  | ##    xdmax => $xdmax,      ##-- maximum date (>= v0.04) | 
| 176 |  |  |  |  |  |  | ##    ## | 
| 177 |  |  |  |  |  |  | ##    ##-- relation data | 
| 178 |  |  |  |  |  |  | ##    #xf    => $xf,       ##-- ug: $xi => $f($xi) : N=>N | 
| 179 |  |  |  |  |  |  | ##    #cof   => $cof,      ##-- cf: [$xi1,$xi2] => $f12 | 
| 180 |  |  |  |  |  |  | ##    xf    => $xf,       ##-- ug: [$ti,$date]       => f($ti,$date) | 
| 181 |  |  |  |  |  |  | ##    cof   => $cof,      ##-- cf: [$ti1,$date,$ti2] => f($ti1,$date,$ti2) | 
| 182 |  |  |  |  |  |  | ##    ddc   => $ddc,      ##-- ddc: ddc client relation | 
| 183 |  |  |  |  |  |  | ##    tdf   => $tdf,      ##-- tdf: (term x document) frequency matrix relation | 
| 184 |  |  |  |  |  |  | ##   ) | 
| 185 |  |  |  |  |  |  | sub new { | 
| 186 | 0 |  |  | 0 | 1 |  | my $that = shift; | 
| 187 |  |  |  |  |  |  | my $coldb  = bless({ | 
| 188 |  |  |  |  |  |  | ##-- options | 
| 189 |  |  |  |  |  |  | dbdir => undef, | 
| 190 |  |  |  |  |  |  | flags => 'r', | 
| 191 |  |  |  |  |  |  | attrs => undef, | 
| 192 |  |  |  |  |  |  | #bos => undef, | 
| 193 |  |  |  |  |  |  | #eos => undef, | 
| 194 |  |  |  |  |  |  | pack_id => 'N', | 
| 195 |  |  |  |  |  |  | pack_f  => 'N', | 
| 196 |  |  |  |  |  |  | pack_date => 'n', | 
| 197 |  |  |  |  |  |  | pack_off => 'N', | 
| 198 |  |  |  |  |  |  | pack_len =>'n', | 
| 199 |  |  |  |  |  |  | dmax => 5, | 
| 200 |  |  |  |  |  |  | cfmin => 2, | 
| 201 |  |  |  |  |  |  | tfmin => 2, | 
| 202 |  |  |  |  |  |  | #keeptmp => 0, | 
| 203 |  |  |  |  |  |  | #mmap => 1, | 
| 204 |  |  |  |  |  |  | #debug => 0, | 
| 205 |  |  |  |  |  |  | index_tdf => undef, | 
| 206 |  |  |  |  |  |  | index_cof => 1, | 
| 207 |  |  |  |  |  |  | index_xf => 1, | 
| 208 |  |  |  |  |  |  | dbreak => undef, | 
| 209 |  |  |  |  |  |  | tdfopts => {}, | 
| 210 |  |  |  |  |  |  |  | 
| 211 |  |  |  |  |  |  | ##-- filters (pgood, pbad, etc. now in DiaColloDB::Corpus::Filters; default value see below) | 
| 212 | 0 |  | 0 |  |  |  | %{DiaColloDB::Corpus::Filters->new}, | 
|  | 0 |  |  |  |  |  |  | 
| 213 |  |  |  |  |  |  | #vsmgood => $TDF_MGOOD_DEFAULT, | 
| 214 |  |  |  |  |  |  | #vsmbad  => $TDF_MBAD_DEFAULT, | 
| 215 |  |  |  |  |  |  |  | 
| 216 |  |  |  |  |  |  | ##-- logging | 
| 217 |  |  |  |  |  |  | logOpen => 'info', | 
| 218 |  |  |  |  |  |  | logCreate => 'info', | 
| 219 |  |  |  |  |  |  | logThread => 'debug', | 
| 220 |  |  |  |  |  |  | logCorpusFile => 'info', | 
| 221 |  |  |  |  |  |  | logCorpusFileN => undef, | 
| 222 |  |  |  |  |  |  | logExport => 'info', | 
| 223 |  |  |  |  |  |  | logProfile => 'trace', | 
| 224 |  |  |  |  |  |  | logRequest => 'debug', | 
| 225 |  |  |  |  |  |  | logCompat => 'warn', | 
| 226 |  |  |  |  |  |  |  | 
| 227 |  |  |  |  |  |  | ##-- limits | 
| 228 |  |  |  |  |  |  | maxExpand => 65535, | 
| 229 |  |  |  |  |  |  |  | 
| 230 |  |  |  |  |  |  | ##-- administrivia | 
| 231 |  |  |  |  |  |  | version => "$VERSION", | 
| 232 |  |  |  |  |  |  | #upgraded=>[], | 
| 233 |  |  |  |  |  |  |  | 
| 234 |  |  |  |  |  |  | ##-- attributes | 
| 235 |  |  |  |  |  |  | #lenum => undef, #$ECLASS->new(pack_i=>$coldb->{pack_id}, pack_o=>$coldb->{pack_off}, pack_l=>$coldb->{pack_len}), | 
| 236 |  |  |  |  |  |  | #l2t   => undef, #$MMCLASS->new(pack_i=>$coldb->{pack_id}, pack_o=>$coldb->{pack_off}, pack_l=>$coldb->{pack_id}), | 
| 237 |  |  |  |  |  |  | #pack_tl => 'N', | 
| 238 |  |  |  |  |  |  |  | 
| 239 |  |  |  |  |  |  | ##-- tuples (-dates) | 
| 240 |  |  |  |  |  |  | #tenum  => undef, #$XECLASS::FixedLen->new(pack_i=>$coldb->{pack_id}, pack_s=>$coldb->{pack_t}, pack_d=>$coldb->{pack_date}), | 
| 241 |  |  |  |  |  |  | #pack_t => 'N', | 
| 242 |  |  |  |  |  |  |  | 
| 243 |  |  |  |  |  |  | ##-- relations | 
| 244 |  |  |  |  |  |  | #xf   => undef, #DiaColloDB::Relation::Unigrams->new(pack_i=>$pack_i, pack_f=>$pack_f, pack_d=>$pack_date), | 
| 245 |  |  |  |  |  |  | #cof => undef, #DiaColloDB::Relation::Cofreqs->new(pack_f=>$pack_f, pack_i=>$pack_i, pack_d=>$pack_date, dmax=>$dmax, fmin=>$cfmin), | 
| 246 |  |  |  |  |  |  | #ddc  => undef, #DiaColloDB::Relation::DDC->new(), | 
| 247 |  |  |  |  |  |  | #tdf  => undef, #DiaColloDB::Relation::TDF->new(), | 
| 248 |  |  |  |  |  |  |  | 
| 249 |  |  |  |  |  |  | @_,	##-- user arguments | 
| 250 |  |  |  |  |  |  | }, | 
| 251 |  |  |  |  |  |  | ref($that)||$that); | 
| 252 | 0 |  |  |  |  |  | $coldb->{class}  = ref($coldb); | 
| 253 | 0 |  |  |  |  |  | $coldb->{pack_t} = $coldb->{pack_id}; | 
| 254 | 0 | 0 |  |  |  |  | if (defined($coldb->{dbdir})) { | 
| 255 |  |  |  |  |  |  | ##-- avoid initial close() if called with dbdir=>$dbdir argument | 
| 256 | 0 |  |  |  |  |  | my $dbdir = $coldb->{dbdir}; | 
| 257 | 0 |  |  |  |  |  | delete $coldb->{dbdir}; | 
| 258 | 0 |  |  |  |  |  | return $coldb->open($dbdir); | 
| 259 |  |  |  |  |  |  | } | 
| 260 | 0 |  |  |  |  |  | return $coldb; | 
| 261 |  |  |  |  |  |  | } | 
| 262 |  |  |  |  |  |  |  | 
| 263 |  |  |  |  |  |  | ## undef = $obj->DESTROY | 
| 264 |  |  |  |  |  |  | ##  + destructor calls close() if necessary | 
| 265 |  |  |  |  |  |  | ##  + INHERITED from DiaColloDB::Client | 
| 266 |  |  |  |  |  |  |  | 
| 267 |  |  |  |  |  |  | ## $cli_or_undef = $cli->promote($class,%opts) | 
| 268 |  |  |  |  |  |  | ##  + DiaColloDB::Client method override | 
| 269 |  |  |  |  |  |  | sub promote { | 
| 270 | 0 |  |  | 0 | 1 |  | $_[0]->logconfess("promote(): not supported"); | 
| 271 |  |  |  |  |  |  | } | 
| 272 |  |  |  |  |  |  |  | 
| 273 |  |  |  |  |  |  | ##======================================================================== | 
| 274 |  |  |  |  |  |  | ## Create/compile | 
| 275 |  |  |  |  |  |  | our (%ATTR_ALIAS,%ATTR_RALIAS,%ATTR_TITLE,%ATTR_CBEXPR); | 
| 276 | 1 |  |  | 1 |  | 729 | use DiaColloDB::methods::compile; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 1600 |  | 
| 277 |  |  |  |  |  |  |  | 
| 278 |  |  |  |  |  |  | ##============================================================================== | 
| 279 |  |  |  |  |  |  | ## I/O: open/close | 
| 280 |  |  |  |  |  |  |  | 
| 281 |  |  |  |  |  |  | ## $coldb_or_undef = $coldb->open($dbdir,%opts) | 
| 282 |  |  |  |  |  |  | ## $coldb_or_undef = $coldb->open() | 
| 283 |  |  |  |  |  |  | sub open { | 
| 284 | 0 |  |  | 0 | 1 |  | my ($coldb,$dbdir,%opts) = @_; | 
| 285 | 0 |  |  |  |  |  | DiaColloDB::Logger->ensureLog(); | 
| 286 | 0 | 0 |  |  |  |  | $coldb = $coldb->new() if (!ref($coldb)); | 
| 287 |  |  |  |  |  |  | #@$coldb{keys %opts} = values %opts; ##-- clobber options after loadHeader() | 
| 288 | 0 |  | 0 |  |  |  | $dbdir //= $coldb->{dbdir}; | 
| 289 | 0 |  |  |  |  |  | $dbdir =~ s{/$}{}; | 
| 290 | 0 | 0 |  |  |  |  | $coldb->close() if ($coldb->opened); | 
| 291 | 0 |  |  |  |  |  | $coldb->{dbdir} = $dbdir; | 
| 292 | 0 |  | 0 |  |  |  | my $flags = fcflags($opts{flags} // $coldb->{flags}); | 
| 293 | 0 |  |  |  |  |  | $coldb->vlog($coldb->{logOpen}, "open($dbdir)"); | 
| 294 |  |  |  |  |  |  |  | 
| 295 |  |  |  |  |  |  | ##-- open: truncate | 
| 296 | 0 | 0 |  |  |  |  | if (fctrunc($flags)) { | 
| 297 | 0 |  |  |  |  |  | $flags |= O_CREAT; | 
| 298 | 0 | 0 | 0 |  |  |  | !-d $dbdir | 
| 299 |  |  |  |  |  |  | or remove_tree($dbdir) | 
| 300 |  |  |  |  |  |  | or $coldb->logconfess("open(): could not remove old $dbdir: $!"); | 
| 301 |  |  |  |  |  |  | } | 
| 302 |  |  |  |  |  |  |  | 
| 303 |  |  |  |  |  |  | ##-- open: create | 
| 304 | 0 | 0 |  |  |  |  | if (!-d $dbdir) { | 
| 305 | 0 | 0 |  |  |  |  | $coldb->logconfess("open(): no such directory '$dbdir'") if (!fccreat($flags)); | 
| 306 | 0 | 0 |  |  |  |  | make_path($dbdir) | 
| 307 |  |  |  |  |  |  | or $coldb->logconfess("open(): could not create DB directory '$dbdir': $!"); | 
| 308 |  |  |  |  |  |  | } | 
| 309 |  |  |  |  |  |  |  | 
| 310 |  |  |  |  |  |  | ##-- open: header | 
| 311 | 0 |  |  |  |  |  | my ($hdr); | 
| 312 | 0 | 0 | 0 |  |  |  | if (fcread($flags) && !fctrunc($flags)) { | 
| 313 | 0 | 0 |  |  |  |  | $hdr = $coldb->readHeader() | 
| 314 |  |  |  |  |  |  | or $coldb->logconfess("open(): failed to read header file '", $coldb->headerFile, "': $!"); | 
| 315 | 0 | 0 |  |  |  |  | $coldb->loadHeaderData($hdr) | 
| 316 |  |  |  |  |  |  | or $coldb->logconess("failed to instantiate header from '", $coldb->headerFile, "': $!"); | 
| 317 |  |  |  |  |  |  | } | 
| 318 |  |  |  |  |  |  |  | 
| 319 |  |  |  |  |  |  | ##-- clobber header options with user-supplied values | 
| 320 | 0 |  |  |  |  |  | @$coldb{keys %opts} = values %opts; | 
| 321 |  |  |  |  |  |  |  | 
| 322 |  |  |  |  |  |  | ##-- open: check compatiblity | 
| 323 | 0 |  |  |  |  |  | my $min_version_compat = '0.10.000'; | 
| 324 | 0 | 0 | 0 |  |  |  | if (!$coldb->{version} || version->parse($coldb->{version}) < version->parse($min_version_compat)) { | 
|  |  | 0 | 0 |  |  |  |  | 
| 325 | 0 |  |  |  |  |  | $coldb->vlog($coldb->{logCompat}, "using compatibility mode for DB directory '$dbdir'; consider running \`dcdb-upgrade.perl $dbdir\'"); | 
| 326 | 0 |  |  |  |  |  | DiaColloDB::Compat->usecompat('v0_09'); | 
| 327 | 0 |  |  |  |  |  | bless($coldb, 'DiaColloDB::Compat::v0_09::DiaColloDB'); | 
| 328 | 0 |  |  |  |  |  | $coldb->{version} = $hdr->{version}; | 
| 329 | 0 |  |  |  |  |  | delete $coldb->{dbdir}; | 
| 330 | 0 |  |  |  |  |  | return $coldb->open($dbdir,%opts); | 
| 331 |  |  |  |  |  |  | } | 
| 332 |  |  |  |  |  |  | elsif (!defined($coldb->{xdmin}) || !defined($coldb->{xdmax})) { | 
| 333 | 0 |  |  |  |  |  | $coldb->logconfess("open(): no date-range keys {xdmin,xdmax} found in header; try running \`dcdb-upgrade.perl $dbdir'"); | 
| 334 |  |  |  |  |  |  | } | 
| 335 |  |  |  |  |  |  |  | 
| 336 |  |  |  |  |  |  | ##-- open: tdf: require | 
| 337 | 0 | 0 |  |  |  |  | $coldb->{index_tdf} = 0 if (!-r "$dbdir/tdf.hdr"); | 
| 338 | 0 | 0 |  |  |  |  | if ($coldb->{index_tdf}) { | 
| 339 | 0 | 0 |  |  |  |  | if (!require "DiaColloDB/Relation/TDF.pm") { | 
| 340 | 0 | 0 |  |  |  |  | $coldb->logwarn("open(): require failed for DiaColloDB/Relation/TDF.pm ; (term x document) matrix modelling disabled", ($@ ? "\n: $@" : '')); | 
| 341 | 0 |  |  |  |  |  | $coldb->{index_tdf} = 0; | 
| 342 |  |  |  |  |  |  | } | 
| 343 |  |  |  |  |  |  | } | 
| 344 |  |  |  |  |  |  |  | 
| 345 |  |  |  |  |  |  | ##-- open: common options | 
| 346 | 0 |  |  |  |  |  | my %efopts = (flags=>$flags, pack_i=>$coldb->{pack_id}, pack_o=>$coldb->{pack_off}, pack_l=>$coldb->{pack_len}); | 
| 347 | 0 |  |  |  |  |  | my %mmopts = (flags=>$flags, pack_i=>$coldb->{pack_id}); | 
| 348 |  |  |  |  |  |  |  | 
| 349 |  |  |  |  |  |  | ##-- open: attributes | 
| 350 | 0 |  |  |  |  |  | my $attrs = $coldb->{attrs} = $coldb->attrs(undef,['l']); | 
| 351 |  |  |  |  |  |  |  | 
| 352 |  |  |  |  |  |  | ##-- open: by attribute | 
| 353 | 0 |  |  |  |  |  | my $atat = 0; | 
| 354 | 0 |  |  |  |  |  | foreach my $attr (@$attrs) { | 
| 355 |  |  |  |  |  |  | ##-- open: ${attr}* | 
| 356 | 0 | 0 |  |  |  |  | my $abase = (-r "$dbdir/${attr}_enum.hdr" ? "$dbdir/${attr}_" : "$dbdir/${attr}"); ##-- v0.03-compatibility hack | 
| 357 | 0 | 0 |  |  |  |  | $coldb->{"${attr}enum"} = $coldb->mmclass($ECLASS)->new(base=>"${abase}enum", %efopts) | 
| 358 |  |  |  |  |  |  | or $coldb->logconfess("open(): failed to open enum ${abase}enum.*: $!"); | 
| 359 | 0 | 0 |  |  |  |  | $coldb->{"${attr}2t"} = $coldb->mmclass($MMCLASS)->new(base=>"${abase}2t", %mmopts) | 
| 360 |  |  |  |  |  |  | or $coldb->logconfess("open(): failed to open expansion multimap ${abase}2x.*: $!"); | 
| 361 | 0 |  | 0 |  |  |  | $coldb->{"pack_t$attr"} //= "\@${atat}$coldb->{pack_id}"; | 
| 362 | 0 |  |  |  |  |  | $atat += packsize($coldb->{pack_id}); | 
| 363 |  |  |  |  |  |  | } | 
| 364 |  |  |  |  |  |  |  | 
| 365 |  |  |  |  |  |  | ##-- open: tenum | 
| 366 |  |  |  |  |  |  | $coldb->{tenum} = $coldb->mmclass($XECLASS)->new(base=>"$dbdir/tenum", %efopts, pack_s=>$coldb->{pack_t}) | 
| 367 | 0 | 0 |  |  |  |  | or $coldb->logconfess("open(): failed to open tuple-enum $dbdir/tenum.*: $!"); | 
| 368 |  |  |  |  |  |  |  | 
| 369 |  |  |  |  |  |  | ##-- open: xf | 
| 370 | 0 | 0 | 0 |  |  |  | if ($coldb->{index_xf}//1) { | 
| 371 |  |  |  |  |  |  | $coldb->{xf} = DiaColloDB::Relation::Unigrams->new(base=>"$dbdir/xf", flags=>$flags, mmap=>$coldb->{mmap}, | 
| 372 |  |  |  |  |  |  | pack_i=>$coldb->{pack_id}, pack_f=>$coldb->{pack_f}, pack_d=>$coldb->{pack_date} | 
| 373 |  |  |  |  |  |  | ) | 
| 374 | 0 | 0 |  |  |  |  | or $coldb->logconfess("open(): failed to open tuple-unigrams $dbdir/xf.*: $!"); | 
| 375 | 0 | 0 | 0 |  |  |  | $coldb->{xf}{N} = $coldb->{xN} if ($coldb->{xN} && !$coldb->{xf}{N}); ##-- compat | 
| 376 |  |  |  |  |  |  | } | 
| 377 |  |  |  |  |  |  |  | 
| 378 |  |  |  |  |  |  | ##-- open: cof | 
| 379 | 0 | 0 | 0 |  |  |  | if ($coldb->{index_cof}//1) { | 
| 380 |  |  |  |  |  |  | $coldb->{cof} = DiaColloDB::Relation::Cofreqs->new(base=>"$dbdir/cof", flags=>$flags, mmap=>$coldb->{mmap}, | 
| 381 |  |  |  |  |  |  | pack_i=>$coldb->{pack_id}, pack_f=>$coldb->{pack_f}, pack_d=>$coldb->{pack_date}, | 
| 382 |  |  |  |  |  |  | dmax=>$coldb->{dmax}, fmin=>$coldb->{cfmin}, | 
| 383 |  |  |  |  |  |  | ) | 
| 384 | 0 | 0 |  |  |  |  | or $coldb->logconfess("open(): failed to open co-frequency file $dbdir/cof.*: $!"); | 
| 385 |  |  |  |  |  |  | } | 
| 386 |  |  |  |  |  |  |  | 
| 387 |  |  |  |  |  |  | ##-- open: ddc (undef if ddcServer isn't set in ddc.hdr or $coldb) | 
| 388 | 0 | 0 | 0 |  |  |  | $coldb->{ddc} = DiaColloDB::Relation::DDC->new(-r "$dbdir/ddc.hdr" ? (base=>"$dbdir/ddc") : qw())->fromDB($coldb) | 
| 389 |  |  |  |  |  |  | // 'DiaColloDB::Relation::DDC'; | 
| 390 |  |  |  |  |  |  |  | 
| 391 |  |  |  |  |  |  | ##-- open: tdf (if available) | 
| 392 | 0 | 0 |  |  |  |  | if ($coldb->{index_tdf}) { | 
| 393 | 0 |  | 0 |  |  |  | $coldb->{tdfopts}     //= {}; | 
| 394 | 0 |  | 0 |  |  |  | $coldb->{tdfopts}{$_} //= $TDF_OPTS{$_} foreach (keys %TDF_OPTS);                ##-- tdf: default options | 
| 395 |  |  |  |  |  |  | $coldb->{tdf} = DiaColloDB::Relation::TDF->new((-r "$dbdir/tdf.hdr" ? (base=>"$dbdir/tdf") : qw()), | 
| 396 |  |  |  |  |  |  | dbreak => $coldb->{dbreak}, | 
| 397 | 0 | 0 |  |  |  |  | %{$coldb->{tdfopts}}, | 
|  | 0 |  |  |  |  |  |  | 
| 398 |  |  |  |  |  |  | ); | 
| 399 |  |  |  |  |  |  | } | 
| 400 |  |  |  |  |  |  |  | 
| 401 |  |  |  |  |  |  | ##-- all done | 
| 402 | 0 |  |  |  |  |  | return $coldb; | 
| 403 |  |  |  |  |  |  | } | 
| 404 |  |  |  |  |  |  |  | 
| 405 |  |  |  |  |  |  |  | 
| 406 |  |  |  |  |  |  | ## @dbkeys = $coldb->dbkeys() | 
| 407 |  |  |  |  |  |  | sub dbkeys { | 
| 408 |  |  |  |  |  |  | return ( | 
| 409 | 0 | 0 |  | 0 | 1 |  | (ref($_[0]) ? (map {($_."enum",$_."2t")} @{$_[0]->attrs}) : qw()), | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 410 |  |  |  |  |  |  | qw(tenum xf cof tdf), | 
| 411 |  |  |  |  |  |  | ); | 
| 412 |  |  |  |  |  |  | } | 
| 413 |  |  |  |  |  |  |  | 
| 414 |  |  |  |  |  |  | ## $coldb_or_undef = $coldb->close() | 
| 415 |  |  |  |  |  |  | sub close { | 
| 416 | 0 |  |  | 0 | 1 |  | my $coldb = shift; | 
| 417 | 0 | 0 |  |  |  |  | return $coldb if (!ref($coldb)); | 
| 418 | 0 |  | 0 |  |  |  | $coldb->vlog($coldb->{logOpen}, "close(".($coldb->{dbdir}//'').")"); | 
| 419 | 0 |  |  |  |  |  | foreach ($coldb->dbkeys) { | 
| 420 | 0 | 0 |  |  |  |  | next if (!defined($coldb->{$_})); | 
| 421 | 0 | 0 |  |  |  |  | return undef if (!$coldb->{$_}->close()); | 
| 422 | 0 |  |  |  |  |  | delete $coldb->{$_}; | 
| 423 |  |  |  |  |  |  | } | 
| 424 | 0 |  |  |  |  |  | $coldb->{dbdir} = undef; | 
| 425 | 0 |  |  |  |  |  | return $coldb; | 
| 426 |  |  |  |  |  |  | } | 
| 427 |  |  |  |  |  |  |  | 
| 428 |  |  |  |  |  |  | ## $bool = $coldb->opened() | 
| 429 |  |  |  |  |  |  | sub opened { | 
| 430 | 0 |  |  | 0 | 1 |  | my $coldb = shift; | 
| 431 |  |  |  |  |  |  | return (defined($coldb->{dbdir}) | 
| 432 | 0 |  | 0 |  |  |  | && !grep {!$_->opened} grep {defined($_)} @$coldb{$coldb->dbkeys} | 
| 433 |  |  |  |  |  |  | ); | 
| 434 |  |  |  |  |  |  | } | 
| 435 |  |  |  |  |  |  |  | 
| 436 |  |  |  |  |  |  | ## @files = $obj->diskFiles() | 
| 437 |  |  |  |  |  |  | ##  + returns list of dist files for this db | 
| 438 |  |  |  |  |  |  | sub diskFiles { | 
| 439 | 0 |  |  | 0 | 1 |  | my $coldb = shift; | 
| 440 | 0 |  |  |  |  |  | return ("$coldb->{dbdir}/header.json", map {$_->diskFiles} grep {UNIVERSAL::can(ref($_),'diskFiles')} values %$coldb); | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 441 |  |  |  |  |  |  | } | 
| 442 |  |  |  |  |  |  |  | 
| 443 |  |  |  |  |  |  | ##============================================================================== | 
| 444 |  |  |  |  |  |  | ## I/O: header | 
| 445 |  |  |  |  |  |  | ##  + largely INHERITED from DiaColloDB::Persistent | 
| 446 |  |  |  |  |  |  |  | 
| 447 |  |  |  |  |  |  | ## @keys = $coldb->headerKeys() | 
| 448 |  |  |  |  |  |  | ##  + keys to save as header | 
| 449 |  |  |  |  |  |  | sub headerKeys { | 
| 450 | 0 |  | 0 | 0 | 1 |  | return (qw(attrs upgraded), grep {!ref($_[0]{$_}) && $_ !~ m{^(?:dbdir$|flags$|njobs$|perms$|info$|tdfopts$|log|debug)}} keys %{$_[0]}); | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 451 |  |  |  |  |  |  | } | 
| 452 |  |  |  |  |  |  |  | 
| 453 |  |  |  |  |  |  | ## $bool = $coldb->loadHeaderData() | 
| 454 |  |  |  |  |  |  | ## $bool = $coldb->loadHeaderData($data) | 
| 455 |  |  |  |  |  |  | sub loadHeaderData { | 
| 456 | 0 |  |  | 0 | 1 |  | my ($coldb,$hdr) = @_; | 
| 457 | 0 | 0 | 0 |  |  |  | if (!defined($hdr) && !fccreat($coldb->{flags})) { | 
|  |  | 0 |  |  |  |  |  | 
| 458 | 0 |  |  |  |  |  | $coldb->logconfess("loadHeader() failed to load header data from ", $coldb->headerFile, ": $!"); | 
| 459 |  |  |  |  |  |  | } | 
| 460 |  |  |  |  |  |  | elsif (defined($hdr)) { | 
| 461 | 0 |  |  |  |  |  | $coldb->{version} = undef; | 
| 462 | 0 |  |  |  |  |  | return $coldb->SUPER::loadHeaderData($hdr); | 
| 463 |  |  |  |  |  |  | } | 
| 464 | 0 |  |  |  |  |  | return $coldb; | 
| 465 |  |  |  |  |  |  | } | 
| 466 |  |  |  |  |  |  |  | 
| 467 |  |  |  |  |  |  | ## $bool = $coldb->saveHeader() | 
| 468 |  |  |  |  |  |  | ## $bool = $coldb->saveHeader($headerFile) | 
| 469 |  |  |  |  |  |  | ##  + INHERITED from DiaColloDB::Persistent | 
| 470 |  |  |  |  |  |  |  | 
| 471 |  |  |  |  |  |  | ##======================================================================== | 
| 472 |  |  |  |  |  |  | ## export/import | 
| 473 | 1 |  |  | 1 |  | 520 | use DiaColloDB::methods::export; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 7022 |  | 
| 474 |  |  |  |  |  |  |  | 
| 475 |  |  |  |  |  |  |  | 
| 476 |  |  |  |  |  |  | ##============================================================================== | 
| 477 |  |  |  |  |  |  | ## Info | 
| 478 |  |  |  |  |  |  |  | 
| 479 |  |  |  |  |  |  | ## \%info = $coldb->dbinfo() | 
| 480 |  |  |  |  |  |  | ##  + get db info | 
| 481 |  |  |  |  |  |  | sub dbinfo { | 
| 482 | 0 |  |  | 0 | 1 |  | my $coldb = shift; | 
| 483 | 0 |  |  |  |  |  | my $adata = $coldb->attrData(); | 
| 484 | 0 |  |  |  |  |  | my $du    = $coldb->du(); | 
| 485 |  |  |  |  |  |  | my $info  = { | 
| 486 |  |  |  |  |  |  | ##-- literals | 
| 487 | 0 | 0 |  |  |  |  | (map {exists($coldb->{$_}) ? ($_=>$coldb->{$_}) : qw()} | 
| 488 |  |  |  |  |  |  | qw(dbdir bos eos dmax cfmin xdmin xdmax version upgraded label collection maintainer)), | 
| 489 |  |  |  |  |  |  |  | 
| 490 |  |  |  |  |  |  | ##-- disk usage | 
| 491 |  |  |  |  |  |  | du_b => $du, | 
| 492 |  |  |  |  |  |  | du_h => si_str($du), | 
| 493 |  |  |  |  |  |  |  | 
| 494 |  |  |  |  |  |  | ##-- timestamp | 
| 495 |  |  |  |  |  |  | timestamp => $coldb->timestamp, | 
| 496 |  |  |  |  |  |  |  | 
| 497 |  |  |  |  |  |  | ##-- attributes | 
| 498 |  |  |  |  |  |  | attrs => [map { | 
| 499 |  |  |  |  |  |  | {( | 
| 500 |  |  |  |  |  |  | name  => $_->{a}, | 
| 501 |  |  |  |  |  |  | title => $coldb->attrTitle($_->{a}), | 
| 502 |  |  |  |  |  |  | size  => $_->{enum}->size, | 
| 503 |  |  |  |  |  |  | alias => $ATTR_RALIAS{$_->{a}}, | 
| 504 | 0 |  |  |  |  |  | )} | 
| 505 |  |  |  |  |  |  | } @$adata], | 
| 506 |  |  |  |  |  |  |  | 
| 507 |  |  |  |  |  |  | ##-- relations | 
| 508 |  |  |  |  |  |  | #relations => [$coldb->relations], | 
| 509 | 0 |  |  |  |  |  | relations => { map {($_=>$coldb->{$_}->dbinfo($coldb))} $coldb->relations }, | 
| 510 |  |  |  |  |  |  |  | 
| 511 |  |  |  |  |  |  | ##-- overrides | 
| 512 | 0 |  | 0 |  |  |  | %{$coldb->{info}//{}}, | 
|  | 0 |  |  |  |  |  |  | 
| 513 |  |  |  |  |  |  | }; | 
| 514 | 0 |  |  |  |  |  | return $info; | 
| 515 |  |  |  |  |  |  | } | 
| 516 |  |  |  |  |  |  |  | 
| 517 |  |  |  |  |  |  |  | 
| 518 |  |  |  |  |  |  | ##============================================================================== | 
| 519 |  |  |  |  |  |  | ## Profiling | 
| 520 |  |  |  |  |  |  |  | 
| 521 |  |  |  |  |  |  | ##-------------------------------------------------------------- | 
| 522 |  |  |  |  |  |  | ## Profiling: Wrappers | 
| 523 |  |  |  |  |  |  | ##  + INHERITED from DiaColloDB::Client | 
| 524 |  |  |  |  |  |  |  | 
| 525 |  |  |  |  |  |  | ## $mprf = $coldb->query($rel,%opts) | 
| 526 |  |  |  |  |  |  | ##  + get a generic DiaColloDB::Profile::Multi object for $rel | 
| 527 |  |  |  |  |  |  | ##  + calls $coldb->profile() or $coldb->compare() as appropriate | 
| 528 |  |  |  |  |  |  | ##  + INHERITED from DiaColloDB::Client | 
| 529 |  |  |  |  |  |  |  | 
| 530 |  |  |  |  |  |  | ## $mprf = $coldb->profile1(%opts) | 
| 531 |  |  |  |  |  |  | ##  + get unigram frequency profile for selected items as a DiaColloDB::Profile::Multi object | 
| 532 |  |  |  |  |  |  | ##  + really just wraps $coldb->profile('xf', %opts) | 
| 533 |  |  |  |  |  |  | ##  + %opts: see profile() method | 
| 534 |  |  |  |  |  |  | ##  + INHERITED from DiaColloDB::Client | 
| 535 |  |  |  |  |  |  |  | 
| 536 |  |  |  |  |  |  | ## $mprf = $coldb->profile2(%opts) | 
| 537 |  |  |  |  |  |  | ##  + get co-frequency profile for selected items as a DiaColloDB::Profile::Multi object | 
| 538 |  |  |  |  |  |  | ##  + really just wraps $coldb->profile('cof', %opts) | 
| 539 |  |  |  |  |  |  | ##  + %opts: see profile() method | 
| 540 |  |  |  |  |  |  | ##  + INHERITED from DiaColloDB::Client | 
| 541 |  |  |  |  |  |  |  | 
| 542 |  |  |  |  |  |  | ## $mprf = $coldb->compare1(%opts) | 
| 543 |  |  |  |  |  |  | ##  + get unigram comparison profile for selected items as a DiaColloDB::Profile::MultiDiff object | 
| 544 |  |  |  |  |  |  | ##  + really just wraps $coldb->compare('xf', %opts) | 
| 545 |  |  |  |  |  |  | ##  + %opts: see compare() method | 
| 546 |  |  |  |  |  |  | ##  + INHERITED from DiaColloDB::Client | 
| 547 |  |  |  |  |  |  |  | 
| 548 |  |  |  |  |  |  | ## $mprf = $coldb->compare2(%opts) | 
| 549 |  |  |  |  |  |  | ##  + get co-frequency comparison profile for selected items as a DiaColloDB::Profile::MultiDiff object | 
| 550 |  |  |  |  |  |  | ##  + really just wraps $coldb->profile('cof', %opts) | 
| 551 |  |  |  |  |  |  | ##  + %opts: see compare() method | 
| 552 |  |  |  |  |  |  | ##  + INHERITED from DiaColloDB::Client | 
| 553 |  |  |  |  |  |  |  | 
| 554 |  |  |  |  |  |  |  | 
| 555 |  |  |  |  |  |  | ##-------------------------------------------------------------- | 
| 556 |  |  |  |  |  |  | ## Profiling: Utils | 
| 557 |  |  |  |  |  |  |  | 
| 558 |  |  |  |  |  |  | ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 
| 559 |  |  |  |  |  |  | ## $relname = $coldb->relname($rel) | 
| 560 |  |  |  |  |  |  | ##  + returns an appropriate relation name for profile() and friends | 
| 561 |  |  |  |  |  |  | ##  + returns $rel if $coldb->{$rel} supports a profile() method | 
| 562 |  |  |  |  |  |  | ##  + otherwise heuristically parses $relationName /xf|f?1|ug/ or /f1?2|c/ | 
| 563 |  |  |  |  |  |  | sub relname { | 
| 564 | 0 |  |  | 0 | 1 |  | my ($coldb,$rel) = @_; | 
| 565 | 0 | 0 |  |  |  |  | if (UNIVERSAL::can($coldb->{$rel},'profile')) { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 566 | 0 |  |  |  |  |  | return $rel; | 
| 567 |  |  |  |  |  |  | } | 
| 568 |  |  |  |  |  |  | elsif ($rel =~ m/^(?:[ux]|f?1$)/) { | 
| 569 | 0 |  |  |  |  |  | return 'xf'; | 
| 570 |  |  |  |  |  |  | } | 
| 571 |  |  |  |  |  |  | elsif ($rel =~ m/^(?:c|f?1?2$)/) { | 
| 572 | 0 |  |  |  |  |  | return 'cof'; | 
| 573 |  |  |  |  |  |  | } | 
| 574 |  |  |  |  |  |  | elsif ($rel =~ m/^(?:v|vec|vs|vsem|sem|td[mf])$/) { | 
| 575 | 0 |  |  |  |  |  | return 'tdf'; | 
| 576 |  |  |  |  |  |  | } | 
| 577 | 0 |  |  |  |  |  | return $rel; | 
| 578 |  |  |  |  |  |  | } | 
| 579 |  |  |  |  |  |  |  | 
| 580 |  |  |  |  |  |  | ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 
| 581 |  |  |  |  |  |  | ## $obj_or_undef = $coldb->relation($rel) | 
| 582 |  |  |  |  |  |  | ##  + returns an appropriate relation-like object for profile() and friends | 
| 583 |  |  |  |  |  |  | ##  + wraps $coldb->{$coldb->relname($rel)} | 
| 584 |  |  |  |  |  |  | sub relation { | 
| 585 | 0 |  |  | 0 | 1 |  | return $_[0]->{$_[0]->relname($_[1])}; | 
| 586 |  |  |  |  |  |  | } | 
| 587 |  |  |  |  |  |  |  | 
| 588 |  |  |  |  |  |  | ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 
| 589 |  |  |  |  |  |  | ## @relnames = $coldb->relations() | 
| 590 |  |  |  |  |  |  | ##  + gets list of defined relations | 
| 591 |  |  |  |  |  |  | sub relations { | 
| 592 | 0 |  |  | 0 | 1 |  | return grep {UNIVERSAL::isa(ref($_[0]{$_}),'DiaColloDB::Relation')} keys %{$_[0]}; | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 593 |  |  |  |  |  |  | } | 
| 594 |  |  |  |  |  |  |  | 
| 595 |  |  |  |  |  |  | ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 
| 596 |  |  |  |  |  |  | ## \@ids = $coldb->enumIds($enum,$req,%opts) | 
| 597 |  |  |  |  |  |  | ##  + parses enum IDs for $req, which is one of: | 
| 598 |  |  |  |  |  |  | ##    - a DDC::Any::CQTokExact, ::CQTokInfl, ::CQTokSet, ::CQTokSetInfl, or ::CQTokRegex : interpreted | 
| 599 |  |  |  |  |  |  | ##    - an ARRAY-ref     : list of literal symbol-values | 
| 600 |  |  |  |  |  |  | ##    - a Regexp ref     : regexp for target strings, passed to $enum->re2i() | 
| 601 |  |  |  |  |  |  | ##    - a string /REGEX/ : regexp for target strings, passed to $enum->re2i() | 
| 602 |  |  |  |  |  |  | ##    - another string   : space-, comma-, or |-separated list of literal values | 
| 603 |  |  |  |  |  |  | ##  + %opts: | 
| 604 |  |  |  |  |  |  | ##     logLevel => $logLevel, ##-- logging level (default=undef) | 
| 605 |  |  |  |  |  |  | ##     logPrefix => $prefix,  ##-- logging prefix (default="enumIds(): fetch ids") | 
| 606 |  |  |  |  |  |  | sub enumIds { | 
| 607 | 0 |  |  | 0 | 1 |  | my ($coldb,$enum,$req,%opts) = @_; | 
| 608 | 0 |  | 0 |  |  |  | $opts{logPrefix} //= "enumIds(): fetch ids"; | 
| 609 | 0 | 0 | 0 |  |  |  | if (UNIVERSAL::isa($req,'DDC::Any::CQTokInfl') || UNIVERSAL::isa($req,'DDC::Any::CQTokExact')) { | 
|  |  | 0 | 0 |  |  |  |  | 
|  |  | 0 | 0 |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 610 |  |  |  |  |  |  | ##-- CQuery: CQTokExact | 
| 611 | 0 |  |  |  |  |  | $coldb->vlog($opts{logLevel}, $opts{logPrefix}, " (", ref($req), ")"); | 
| 612 | 0 |  |  |  |  |  | return [$enum->s2i($req->getValue)]; | 
| 613 |  |  |  |  |  |  | } | 
| 614 |  |  |  |  |  |  | elsif (UNIVERSAL::isa($req,'DDC::Any::CQTokSet') || UNIVERSAL::isa($req,'DDC::Any::CQTokSetInfl')) { | 
| 615 |  |  |  |  |  |  | ##-- CQuery: CQTokSet | 
| 616 | 0 |  |  |  |  |  | $coldb->vlog($opts{logLevel}, $opts{logPrefix}, " (", ref($req), ")"); | 
| 617 | 0 |  |  |  |  |  | return [map {$enum->s2i($_)} @{$req->getValues}]; | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 618 |  |  |  |  |  |  | } | 
| 619 |  |  |  |  |  |  | elsif (UNIVERSAL::isa($req,'DDC::Any::CQTokRegex')) { | 
| 620 |  |  |  |  |  |  | ##-- CQuery: CQTokRegex | 
| 621 | 0 |  |  |  |  |  | $coldb->vlog($opts{logLevel}, $opts{logPrefix}, " (", ref($req), ")"); | 
| 622 | 0 |  |  |  |  |  | return $enum->re2i($req->getValue); | 
| 623 |  |  |  |  |  |  | } | 
| 624 |  |  |  |  |  |  | elsif (UNIVERSAL::isa($req,'DDC::Any::CQTokAny')) { | 
| 625 | 0 |  |  |  |  |  | $coldb->vlog($opts{logLevel}, $opts{logPrefix}, " (", ref($req), ")"); | 
| 626 | 0 |  |  |  |  |  | return undef; | 
| 627 |  |  |  |  |  |  | } | 
| 628 |  |  |  |  |  |  | elsif (UNIVERSAL::isa($req,'ARRAY')) { | 
| 629 |  |  |  |  |  |  | ##-- compat: array | 
| 630 | 0 |  |  |  |  |  | $coldb->vlog($opts{logLevel}, $opts{logPrefix}, " (ARRAY)"); | 
| 631 | 0 |  |  |  |  |  | return [map {$enum->s2i($_)} @$req]; | 
|  | 0 |  |  |  |  |  |  | 
| 632 |  |  |  |  |  |  | } | 
| 633 |  |  |  |  |  |  | elsif (UNIVERSAL::isa($req,'Regexp') || $req =~ m{^/}) { | 
| 634 |  |  |  |  |  |  | ##-- compat: regex | 
| 635 | 0 |  |  |  |  |  | $coldb->vlog($opts{logLevel}, $opts{logPrefix}, " (REGEX)"); | 
| 636 | 0 |  |  |  |  |  | return $enum->re2i($req); | 
| 637 |  |  |  |  |  |  | } | 
| 638 |  |  |  |  |  |  | elsif (!ref($req)) { | 
| 639 |  |  |  |  |  |  | ##-- compat: space-, comma-, or |-separated literals | 
| 640 | 0 |  |  |  |  |  | $coldb->vlog($opts{logLevel}, $opts{logPrefix}, " (STRINGS)"); | 
| 641 | 0 |  | 0 |  |  |  | return [grep {defined($_)} map {$enum->s2i($_)} grep {($_//'') ne ''} map {s{\\(.)}{$1}g; $_} split(/(?:(?<!\\)[\,\s\|])+/,$req)]; | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 642 |  |  |  |  |  |  | } | 
| 643 |  |  |  |  |  |  | else { | 
| 644 |  |  |  |  |  |  | ##-- reference: unhandled | 
| 645 | 0 |  |  |  |  |  | $coldb->logconfess($coldb->{error}="$opts{logPrefix}: can't handle request of type ".ref($req)); | 
| 646 |  |  |  |  |  |  | } | 
| 647 | 0 |  |  |  |  |  | return []; | 
| 648 |  |  |  |  |  |  | } | 
| 649 |  |  |  |  |  |  |  | 
| 650 |  |  |  |  |  |  | ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 
| 651 |  |  |  |  |  |  | ## ($dfilter,$sliceLo,$sliceHi,$dateLo,$dateHi) = $coldb->parseDateRequest($dateRequest='', $sliceRequest=0, $fill=0, $ddcMode=0) | 
| 652 |  |  |  |  |  |  | ## \%dateRequest                                = $coldb->parseDateRequest($dateRequest='', $sliceRequest=0, $fill=0, $ddcMode=0) | 
| 653 |  |  |  |  |  |  | ##   + parses date request and returns limit and filter information as a list (list context) or HASH-ref (scalar context); | 
| 654 |  |  |  |  |  |  | ##   + %dateRequest = | 
| 655 |  |  |  |  |  |  | ##     ( | 
| 656 |  |  |  |  |  |  | ##      dfilter => $dfilter,  ##-- filter-sub, called as: $wanted=$dfilter->($date); undef for none | 
| 657 |  |  |  |  |  |  | ##      slo  => $sliceLo,     ##-- minimum slice (inclusive) | 
| 658 |  |  |  |  |  |  | ##      shi  => $sliceHi,     ##-- maximum slice (inclusive) | 
| 659 |  |  |  |  |  |  | ##      dlo  => $dateLo,      ##-- minimum date (inclusive); undef for none, always defined if $fill is true | 
| 660 |  |  |  |  |  |  | ##      dhi  => $dateHi,      ##-- maximum date (inclusive); undef for none, always defined if $fill is true | 
| 661 |  |  |  |  |  |  | ##     ) | 
| 662 |  |  |  |  |  |  | sub parseDateRequest { | 
| 663 | 0 |  |  | 0 | 1 |  | my ($coldb,$date,$slice,$fill,$ddcmode) = @_; | 
| 664 | 0 |  |  |  |  |  | my ($dfilter,$slo,$shi,$dlo,$dhi); | 
| 665 | 0 |  | 0 |  |  |  | $date //= ''; | 
| 666 | 0 | 0 | 0 |  |  |  | if ($date =~ /^[\s\*]*$/) { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 667 |  |  |  |  |  |  | ##-- empty date request or universal wildcard: ignore | 
| 668 | 0 |  |  |  |  |  | $dlo = $dhi = undef; | 
| 669 |  |  |  |  |  |  | } | 
| 670 |  |  |  |  |  |  | elsif (UNIVERSAL::isa($date,'Regexp') || $date =~ /^\//) { | 
| 671 |  |  |  |  |  |  | ##-- date request: regex string | 
| 672 | 0 | 0 |  |  |  |  | $coldb->logconfess("parseDateRequest(): can't handle date regex '$date' in ddc mode") if ($ddcmode); | 
| 673 | 0 |  |  |  |  |  | my $dre  = regex($date); | 
| 674 | 0 |  |  | 0 |  |  | $dfilter = sub { $_[0] =~ $dre }; | 
|  | 0 |  |  |  |  |  |  | 
| 675 |  |  |  |  |  |  | } | 
| 676 |  |  |  |  |  |  | elsif ($date =~ /^\s*((?:[0-9]+|\*?))\s*[\-\:]+\s*((?:[0-9]+|\*?))\s*$/) { | 
| 677 |  |  |  |  |  |  | ##-- date request: range MIN:MAX (inclusive) | 
| 678 | 0 |  |  |  |  |  | ($dlo,$dhi) = ($1,$2); | 
| 679 | 0 | 0 | 0 |  |  |  | $dlo  = $coldb->{xdmin} if (($dlo//'') =~ /^\*?$/); | 
| 680 | 0 | 0 | 0 |  |  |  | $dhi  = $coldb->{xdmax} if (($dhi//'') =~ /^\*?$/); | 
| 681 | 0 |  |  |  |  |  | $dlo += 0; | 
| 682 | 0 |  |  |  |  |  | $dhi += 0; | 
| 683 | 0 | 0 |  | 0 |  |  | $dfilter = sub { $_[0]>=$dlo && $_[0]<=$dhi }; | 
|  | 0 |  |  |  |  |  |  | 
| 684 |  |  |  |  |  |  | } | 
| 685 |  |  |  |  |  |  | elsif ($date =~ /[\s\,\|]+/) { | 
| 686 |  |  |  |  |  |  | ##-- date request: list | 
| 687 | 0 | 0 |  |  |  |  | $coldb->logconfess("parseDateRequest(): can't handle date list '$date' in ddc mode") if ($ddcmode); | 
| 688 | 0 |  | 0 |  |  |  | my %dwant = map {($_=>undef)} grep {($_//'') ne ''} split(/[\s\,\|]+/,$date); | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 689 | 0 |  |  | 0 |  |  | $dfilter  = sub { exists($dwant{$_[0]}) }; | 
|  | 0 |  |  |  |  |  |  | 
| 690 |  |  |  |  |  |  | } | 
| 691 |  |  |  |  |  |  | else { | 
| 692 |  |  |  |  |  |  | ##-- date request: single value | 
| 693 | 0 |  |  |  |  |  | $dlo = $dhi = $date; | 
| 694 | 0 |  |  | 0 |  |  | $dfilter = sub { $_[0] == $date }; | 
|  | 0 |  |  |  |  |  |  | 
| 695 |  |  |  |  |  |  | } | 
| 696 |  |  |  |  |  |  |  | 
| 697 |  |  |  |  |  |  | ##-- force-fill? | 
| 698 | 0 | 0 |  |  |  |  | if ($fill) { | 
| 699 | 0 | 0 | 0 |  |  |  | $dlo = $coldb->{xdmin} if (!$dlo || $dlo < $coldb->{xdmin}); | 
| 700 | 0 | 0 | 0 |  |  |  | $dhi = $coldb->{xdmax} if (!$dhi || $dhi > $coldb->{xdmax}); | 
| 701 |  |  |  |  |  |  | } | 
| 702 |  |  |  |  |  |  |  | 
| 703 |  |  |  |  |  |  | ##-- slice-range | 
| 704 | 0 | 0 | 0 |  |  |  | ($slo,$shi) = map {$slice ? ($slice*int($_/$slice)) : 0} (($dlo//$coldb->{xdmin}),($dhi//$coldb->{xdmax})); | 
|  | 0 |  | 0 |  |  |  |  | 
| 705 |  |  |  |  |  |  |  | 
| 706 |  |  |  |  |  |  | return wantarray | 
| 707 | 0 | 0 |  |  |  |  | ? ($dfilter,$slo,$shi,$dlo,$dhi) | 
| 708 |  |  |  |  |  |  | : { dfilter=>$dfilter, slo=>$slo, shi=>$shi, dlo=>$dlo, dhi=>$dhi }; | 
| 709 |  |  |  |  |  |  | } | 
| 710 |  |  |  |  |  |  |  | 
| 711 |  |  |  |  |  |  | ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 
| 712 |  |  |  |  |  |  | ## $compiler = $coldb->qcompiler(); | 
| 713 |  |  |  |  |  |  | ##  + get DDC::Any::CQueryCompiler for this object (cached in $coldb->{_qcompiler}) | 
| 714 |  |  |  |  |  |  | sub qcompiler { | 
| 715 | 0 |  | 0 | 0 | 1 |  | return $_[0]{_qcompiler} ||= DDC::Any::CQueryCompiler->new(); | 
| 716 |  |  |  |  |  |  | } | 
| 717 |  |  |  |  |  |  |  | 
| 718 |  |  |  |  |  |  | ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 
| 719 |  |  |  |  |  |  | ## $cquery_or_undef = $coldb->qparse($ddc_query_string) | 
| 720 |  |  |  |  |  |  | ##  + wraps parse in an eval {...} block and sets $coldb->{error} on failure | 
| 721 |  |  |  |  |  |  | sub qparse { | 
| 722 | 0 |  |  | 0 | 1 |  | my ($coldb,$qstr) = @_; | 
| 723 | 0 |  |  |  |  |  | my ($q); | 
| 724 | 0 |  |  |  |  |  | eval { $q=$coldb->qcompiler->ParseQuery($qstr); }; | 
|  | 0 |  |  |  |  |  |  | 
| 725 | 0 | 0 | 0 |  |  |  | if ($@ || !defined($q)) { | 
| 726 | 0 |  |  |  |  |  | $coldb->{error}="$@"; | 
| 727 | 0 |  |  |  |  |  | return undef; | 
| 728 |  |  |  |  |  |  | } | 
| 729 | 0 |  |  |  |  |  | return $q; | 
| 730 |  |  |  |  |  |  | } | 
| 731 |  |  |  |  |  |  |  | 
| 732 |  |  |  |  |  |  |  | 
| 733 |  |  |  |  |  |  | ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 
| 734 |  |  |  |  |  |  | ## $cquery = $coldb->parseQuery([[$attr1,$val1],...], %opts) ##-- compat: ARRAY-of-ARRAYs | 
| 735 |  |  |  |  |  |  | ## $cquery = $coldb->parseQuery(["$attr1:$val1",...], %opts) ##-- compat: ARRAY-of-requests | 
| 736 |  |  |  |  |  |  | ## $cquery = $coldb->parseQuery({$attr1=>$val1, ...}, %opts) ##-- compat: HASH | 
| 737 |  |  |  |  |  |  | ## $cquery = $coldb->parseQuery("$attr1=$val1, ...", %opts)  ##-- compat: string | 
| 738 |  |  |  |  |  |  | ## $cquery = $coldb->parseQuery($ddcQueryString, %opts)      ##-- ddc string (with shorthand ","->WITH, "&&"->WITH) | 
| 739 |  |  |  |  |  |  | ##  + guts for parsing user target and groupby requests | 
| 740 |  |  |  |  |  |  | ##  + returns a DDC::Any::CQuery object representing the request | 
| 741 |  |  |  |  |  |  | ##  + index-only items "$l" are mapped to $l=@{} | 
| 742 |  |  |  |  |  |  | ##  + if query request is wrapped in "(...)" or "[...]", native parsing is NOT attempted | 
| 743 |  |  |  |  |  |  | ##  + %opts: | 
| 744 |  |  |  |  |  |  | ##     warn  => $level,       ##-- log-level for unknown attributes (default: 'warn') | 
| 745 |  |  |  |  |  |  | ##     logas => $reqtype,     ##-- request type for warnings | 
| 746 |  |  |  |  |  |  | ##     parseas => $reqtype,   ##-- request type for parsing ('groupby' or 'query' (default)) | 
| 747 |  |  |  |  |  |  | ##     qref => \$qref,        ##-- store parsed query in SCALAR-ref $qref | 
| 748 |  |  |  |  |  |  | ##     default => $attr,      ##-- default attribute (for query requests) | 
| 749 |  |  |  |  |  |  | ##     mapand => $bool,       ##-- map CQAnd to CQWith? (default=true unless '&&' occurs in query string) | 
| 750 |  |  |  |  |  |  | ##     ddcmode => $bool,      ##-- force ddc query parsing? (0:no:default, >0:always, <0:fallback) | 
| 751 |  |  |  |  |  |  | sub parseQuery { | 
| 752 | 0 |  |  | 0 | 1 |  | my ($coldb,$req,%opts) = @_; | 
| 753 | 0 |  |  |  |  |  | my $req0   = $req; | 
| 754 | 0 |  | 0 |  |  |  | my $wlevel = $opts{warn} // 'warn'; | 
| 755 | 0 |  |  |  |  |  | my $defaultIndex = $opts{default}; | 
| 756 | 0 |  | 0 |  |  |  | my $logas = $opts{logas}//''; | 
| 757 | 0 |  | 0 |  |  |  | my $parseas = $opts{parseas} || $logas || 'query'; | 
| 758 | 0 |  | 0 |  |  |  | my $ddcmode = $opts{ddcmode} || 0; | 
| 759 |  |  |  |  |  |  |  | 
| 760 |  |  |  |  |  |  | ##-- compat: accept ARRAY or HASH requests | 
| 761 | 0 | 0 |  |  |  |  | my $areqs = (UNIVERSAL::isa($req,'ARRAY') ? [@$req] | 
|  |  | 0 |  |  |  |  |  | 
| 762 |  |  |  |  |  |  | : (UNIVERSAL::isa($req,'HASH') ? [%$req] | 
| 763 |  |  |  |  |  |  | : undef)); | 
| 764 |  |  |  |  |  |  |  | 
| 765 |  |  |  |  |  |  | ##-- ddcmode: detect "[...]" queries | 
| 766 | 0 | 0 |  |  |  |  | $ddcmode = 1 if ($req =~ s{^\s*\[(.*)\]\s*$}{$1}); | 
| 767 |  |  |  |  |  |  |  | 
| 768 |  |  |  |  |  |  | ##-- compat: parse into attribute-local requests $areqs=[[$attr1,$areq1],...] | 
| 769 | 0 |  |  |  |  |  | my $sepre  = qr{[\s\,]}; | 
| 770 | 0 |  |  |  |  |  | my $charre = qr{(?:\\[^ux0-9]|[\w\x{80}-\x{ffff}])}; | 
| 771 | 0 |  |  |  |  |  | my $attrre = qr{(?:\$?(?:doc\.)?${charre}+)}; | 
| 772 | 0 |  |  |  |  |  | my $orre   = qr{(?:\s*\|?\s*)}; | 
| 773 | 0 |  |  |  |  |  | my $setre  = qr{(?:(?:${charre}+)(?:${orre}${charre}+)*)};	##-- value: |-separated barewords | 
| 774 | 0 |  |  |  |  |  | my $regre  = qr{(?:/(?:\\/|[^/]*)/(?:[gimsadlux]*))};		##-- value regexes | 
| 775 | 0 |  |  |  |  |  | my $valre  = qr{(?:${setre}|${regre})}; | 
| 776 | 0 | 0 |  |  |  |  | my $reqre  = ($parseas eq 'groupby' | 
| 777 |  |  |  |  |  |  | ? qr{(?:(?:${attrre}(?:[:=]${valre})?)|${valre})}  ##-- groupby mode: require ATTRIBUTES | 
| 778 |  |  |  |  |  |  | : qr{(?:(?:${attrre}[:=])?${valre})}               ##-- query   mode: reqzure VALUES | 
| 779 |  |  |  |  |  |  | ); | 
| 780 | 0 | 0 | 0 |  |  |  | if (!$areqs | 
|  |  |  | 0 |  |  |  |  | 
| 781 |  |  |  |  |  |  | && ($ddcmode <= 0)			##-- allow native parsing? | 
| 782 |  |  |  |  |  |  | && $req =~ m/^${sepre}*			##-- initial separators (optional) | 
| 783 |  |  |  |  |  |  | (?:${reqre}${sepre}+)*	##-- separated components | 
| 784 |  |  |  |  |  |  | (?:${reqre})			##-- final component | 
| 785 |  |  |  |  |  |  | ${sepre}*			##-- final separators (optional) | 
| 786 |  |  |  |  |  |  | $/x) { | 
| 787 | 0 | 0 |  |  |  |  | $coldb->debug("parseQuery($logas): parsing native query request [ddcmode=$ddcmode]") if ($coldb->{debug}); | 
| 788 | 0 |  |  |  |  |  | $areqs = [grep {defined($_)} ($req =~ m/${sepre}*(${reqre})/g)]; | 
|  | 0 |  |  |  |  |  |  | 
| 789 |  |  |  |  |  |  | } | 
| 790 |  |  |  |  |  |  |  | 
| 791 |  |  |  |  |  |  | ##-- construct DDC query $q | 
| 792 | 0 |  |  |  |  |  | my ($q); | 
| 793 | 0 | 0 |  |  |  |  | if ($areqs) { | 
| 794 |  |  |  |  |  |  | ##-- compat: diacollo<=v0.06-style attribute-wise request in @$areqs; construct DDC query by hand | 
| 795 | 0 |  |  |  |  |  | my ($attr,$areq,$aq); | 
| 796 | 0 |  |  |  |  |  | foreach (@$areqs) { | 
| 797 | 0 | 0 |  |  |  |  | if (UNIVERSAL::isa($_,'ARRAY')) { | 
|  |  | 0 |  |  |  |  |  | 
| 798 |  |  |  |  |  |  | ##-- compat: attribute request: ARRAY | 
| 799 | 0 |  |  |  |  |  | ($attr,$areq) = @$_; | 
| 800 |  |  |  |  |  |  | } elsif (UNIVERSAL::isa($_,'HASH')) { | 
| 801 |  |  |  |  |  |  | ##-- compat: attribute request: HASH | 
| 802 | 0 |  |  |  |  |  | ($attr,$areq) = %$_; | 
| 803 |  |  |  |  |  |  | } else { | 
| 804 |  |  |  |  |  |  | ##-- compat: attribute request: STRING (native) | 
| 805 | 0 | 0 |  |  |  |  | next if (uc($_) eq 'WITH'); ##-- avoid ddc keyword | 
| 806 | 0 | 0 |  |  |  |  | ($attr,$areq) = m{^(${attrre})[:=](${valre})$} ? ($1,$2) : ($_,undef); | 
| 807 | 0 |  |  |  |  |  | $attr =~ s/\\(.)/$1/g; | 
| 808 | 0 | 0 |  |  |  |  | $areq =~ s/\\(.)/$1/g if (defined($areq)); | 
| 809 |  |  |  |  |  |  | } | 
| 810 |  |  |  |  |  |  |  | 
| 811 |  |  |  |  |  |  | ##-- compat: parse defaults | 
| 812 | 0 | 0 | 0 |  |  |  | ($attr,$areq) = ('',$attr)   if (defined($defaultIndex) && !defined($areq)); | 
| 813 | 0 | 0 | 0 |  |  |  | $attr = $defaultIndex//'' if (($attr//'') eq ''); | 
|  |  |  | 0 |  |  |  |  | 
| 814 | 0 |  |  |  |  |  | $attr =~ s/^\$//; | 
| 815 |  |  |  |  |  |  |  | 
| 816 | 0 | 0 | 0 |  |  |  | $coldb->debug("parseQuery($logas): parsing native request clause: (".($attr//'')." = ".($areq//'').")") if ($coldb->{debug}); | 
|  |  |  | 0 |  |  |  |  | 
| 817 |  |  |  |  |  |  |  | 
| 818 | 0 | 0 | 0 |  |  |  | if (UNIVERSAL::isa($areq,'DDC::Any::CQuery')) { | 
|  |  | 0 | 0 |  |  |  |  | 
|  |  | 0 | 0 |  |  |  |  | 
|  |  | 0 | 0 |  |  |  |  | 
|  |  | 0 | 0 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
| 819 |  |  |  |  |  |  | ##-- compat: value: ddc query object | 
| 820 | 0 |  |  |  |  |  | $aq = $areq; | 
| 821 | 0 | 0 | 0 |  |  |  | $aq->setIndexName($attr) if ($aq->can('setIndexName') && $attr ne ''); | 
| 822 |  |  |  |  |  |  | } | 
| 823 |  |  |  |  |  |  | elsif (UNIVERSAL::isa($areq,'ARRAY')) { | 
| 824 |  |  |  |  |  |  | ##-- compat: value: array --> CQTokSet @{VAL1,...,VALN} | 
| 825 | 0 |  |  |  |  |  | $aq = DDC::Any::CQTokSet->new($attr, '', $areq); | 
| 826 |  |  |  |  |  |  | } | 
| 827 |  |  |  |  |  |  | elsif (UNIVERSAL::isa($areq,'RegExp') || (($opts{ddcmode}||0)<1 && $areq && $areq =~ m{^${regre}$})) { | 
| 828 |  |  |  |  |  |  | ##-- compat: value: regex --> CQTokRegex /REGEX/ | 
| 829 | 0 |  |  |  |  |  | my $re = regex($areq).""; | 
| 830 | 0 |  |  |  |  |  | $re =~ s{\G(.*?\(\?\^[^adlu:]*)[adlu]*}{$1}g; ##-- trim perl-5.14 character-set modifiers: they break KWIC-links, since DDC (PCRE) doesn't support them! | 
| 831 | 0 |  |  |  |  |  | $re =~ s{^\(\?\^[adlu]*\:(.*)\)$}{$1};        ##-- trim redundant top-level grouping inserted by qr{}-stringification | 
| 832 | 0 |  |  |  |  |  | $aq = DDC::Any::CQTokRegex->new($attr, $re); | 
| 833 |  |  |  |  |  |  | } | 
| 834 |  |  |  |  |  |  | elsif (!$areq || $areq =~ /^\s*${reqre}\s*$/) { | 
| 835 |  |  |  |  |  |  | ##-- compat: value: space- or |-separated literals --> CQTokExact $a=@VAL or CQTokSet $a=@{VAL1,...VALN} or CQTokAny $a=* | 
| 836 |  |  |  |  |  |  | ##   + also applies to empty $areq, e.g. in groupby clauses | 
| 837 | 0 |  | 0 |  |  |  | my $vals = [grep {($_//'') ne ''} map {s{\\(.)}{$1}g; $_} split(/(?:(?<!\\)[\,\s\|])+/,($areq//''))]; | 
|  | 0 |  | 0 |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 838 | 0 | 0 | 0 |  |  |  | $aq = (@$vals<=1 | 
|  |  | 0 | 0 |  |  |  |  | 
| 839 |  |  |  |  |  |  | ? (($vals->[0]//'*') eq '*' | 
| 840 |  |  |  |  |  |  | ? DDC::Any::CQTokAny->new($attr,'*') | 
| 841 |  |  |  |  |  |  | : DDC::Any::CQTokExact->new($attr,$vals->[0])) | 
| 842 |  |  |  |  |  |  | : DDC::Any::CQTokSet->new($attr,($areq//''),$vals)); | 
| 843 |  |  |  |  |  |  | } | 
| 844 |  |  |  |  |  |  | elsif ($ddcmode && ($areq//'') ne '') { | 
| 845 |  |  |  |  |  |  | ##-- compat: ddcmode: parse requests as ddc queries | 
| 846 |  |  |  |  |  |  | $aq = $coldb->qparse($areq) | 
| 847 | 0 | 0 |  |  |  |  | or $coldb->logconfess($coldb->{error}="parseQuery(): failed to parse request \`$areq': $coldb->{error}"); | 
| 848 |  |  |  |  |  |  | } | 
| 849 |  |  |  |  |  |  | ##-- push request to query | 
| 850 | 0 | 0 |  |  |  |  | $q = $q ? DDC::Any::CQWith->new($q,$aq) : $aq; | 
| 851 |  |  |  |  |  |  | } | 
| 852 |  |  |  |  |  |  | } | 
| 853 |  |  |  |  |  |  | else { | 
| 854 |  |  |  |  |  |  | ##-- ddc: diacollo>=v0.06: ddc request parsing: allow shorthands (',' --> 'WITH'), ('INDEX=VAL' --> '$INDEX=VAL'), and ($INDEX --> $INDEX=@{}) | 
| 855 | 0 |  |  |  |  |  | my $compiler = $coldb->qcompiler(); | 
| 856 | 0 |  |  |  |  |  | my ($err); | 
| 857 | 0 |  |  |  |  |  | while (!defined($q)) { | 
| 858 |  |  |  |  |  |  | #$coldb->trace("req=$req\n"); | 
| 859 | 0 |  |  |  |  |  | undef $@; | 
| 860 | 0 |  |  |  |  |  | eval { $q=$compiler->ParseQuery($req); }; | 
|  | 0 |  |  |  |  |  |  | 
| 861 | 0 | 0 | 0 |  |  |  | last if (!($err=$@) && defined($q)); | 
| 862 | 0 | 0 |  |  |  |  | if ($err =~ /syntax error/) { | 
| 863 | 0 | 0 |  |  |  |  | if ($err =~ /unexpected ','/) { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 864 |  |  |  |  |  |  | ##-- (X Y) --> (X WITH Y) | 
| 865 | 0 |  |  |  |  |  | $req =~ s/(?!<\\)\s*,\s*/ WITH /; | 
| 866 | 0 |  |  |  |  |  | next; | 
| 867 |  |  |  |  |  |  | } | 
| 868 |  |  |  |  |  |  | elsif ($err =~ /expecting '='/) { | 
| 869 |  |  |  |  |  |  | ##-- ($INDEX) --> ($INDEX=*) (for group-by) | 
| 870 | 0 |  |  |  |  |  | $req =~ s/(\$\w+)(?!\s*\=)/$1=*/; | 
| 871 | 0 |  |  |  |  |  | next; | 
| 872 |  |  |  |  |  |  | } | 
| 873 |  |  |  |  |  |  | elsif ($err =~ /unexpected SYMBOL, expecting INTEGER at line \d+, near token \`([^\']*)\'/) { | 
| 874 |  |  |  |  |  |  | ##-- (INDEX=) --> ($INDEX=) | 
| 875 | 0 |  |  |  |  |  | my $tok = $1; | 
| 876 | 0 |  |  |  |  |  | $req =~ s/(?!<\$)(\S+)\s*=\s*\Q$tok\E/\$$1=$tok/; | 
| 877 | 0 |  |  |  |  |  | next; | 
| 878 |  |  |  |  |  |  | } | 
| 879 |  |  |  |  |  |  | } | 
| 880 | 0 |  | 0 |  |  |  | $coldb->logconfess("parseQuery(): could not parse request '$req0': ", ($err//'')); | 
| 881 |  |  |  |  |  |  | } | 
| 882 |  |  |  |  |  |  | } | 
| 883 |  |  |  |  |  |  |  | 
| 884 |  |  |  |  |  |  | ##-- tweak query: map CQAnd to CQWith | 
| 885 |  |  |  |  |  |  | $q = $q->mapTraverse(sub { | 
| 886 | 0 | 0 |  | 0 |  |  | return UNIVERSAL::isa($_[0],'DDC::Any::CQAnd') ? DDC::Any::CQWith->new($_[0]->getDtr1,$_[0]->getDtr2) : $_[0]; | 
| 887 |  |  |  |  |  |  | }) | 
| 888 | 0 | 0 | 0 |  |  |  | if ($opts{mapand} || (!defined($opts{mapand}) && $req0 !~ /\&\&/)); | 
|  |  |  | 0 |  |  |  |  | 
| 889 |  |  |  |  |  |  |  | 
| 890 |  |  |  |  |  |  | ##-- maybe store parsed query | 
| 891 | 0 | 0 |  |  |  |  | ${$opts{qref}} = $q if (ref($opts{qref})); | 
|  | 0 |  |  |  |  |  |  | 
| 892 |  |  |  |  |  |  |  | 
| 893 | 0 | 0 |  |  |  |  | $coldb->debug("parseQuery($logas): parsed query: ", $q->toString) if ($coldb->{debug}); | 
| 894 |  |  |  |  |  |  |  | 
| 895 | 0 |  |  |  |  |  | return $q; | 
| 896 |  |  |  |  |  |  | } | 
| 897 |  |  |  |  |  |  |  | 
| 898 |  |  |  |  |  |  | ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 
| 899 |  |  |  |  |  |  | ## \@aqs = $coldb->queryAttributes($cquery,%opts) | 
| 900 |  |  |  |  |  |  | ##  + utility for decomposing DDC queries into attribute-wise requests | 
| 901 |  |  |  |  |  |  | ##   + returns an ARRAY-ref [[$attr1,$val1], ...] | 
| 902 |  |  |  |  |  |  | ##   + each value $vali is empty or undef (all values), a CQTokSet, a CQTokExact, CQTokRegex, or CQTokAny | 
| 903 |  |  |  |  |  |  | ##   + chokes on unsupported query types or filters | 
| 904 |  |  |  |  |  |  | ##   + %opts: | 
| 905 |  |  |  |  |  |  | ##     warn  => $level,       ##-- log-level for unknown attributes (default: 'warn') | 
| 906 |  |  |  |  |  |  | ##     logas => $reqtype,     ##-- request type for warnings | 
| 907 |  |  |  |  |  |  | ##     default => $attr,      ##-- default attribute (for query requests) | 
| 908 |  |  |  |  |  |  | ##     allowExtra => \@attrs, ##-- allow extra attributes @attrs (may also be HASH-ref) | 
| 909 |  |  |  |  |  |  | ##     allowUnknown => $bool, ##-- allow unknown attributes? (default: 0) | 
| 910 |  |  |  |  |  |  | sub queryAttributes { | 
| 911 | 0 |  |  | 0 | 1 |  | my ($coldb,$cquery,%opts) = @_; | 
| 912 | 0 |  | 0 |  |  |  | my $wlevel = $opts{warn} // 'warn'; | 
| 913 | 0 |  |  |  |  |  | my $default = $opts{default}; | 
| 914 | 0 |  | 0 |  |  |  | my $logas  = $opts{logas}//''; | 
| 915 |  |  |  |  |  |  |  | 
| 916 |  |  |  |  |  |  | my $warnsub = sub { | 
| 917 | 0 | 0 |  | 0 |  |  | $coldb->logconfess($coldb->{error}="queryAttributes(): can't handle ".join('',@_)) if (!$opts{relax}); | 
| 918 | 0 |  |  |  |  |  | $coldb->vlog($wlevel, "queryAttributes(): ignoring ", @_); | 
| 919 | 0 |  |  |  |  |  | }; | 
| 920 |  |  |  |  |  |  |  | 
| 921 | 0 |  |  |  |  |  | my $areqs = []; | 
| 922 | 0 |  |  |  |  |  | my ($q,$attr,$aq); | 
| 923 | 0 |  |  |  |  |  | foreach $q (@{$cquery->Descendants}) { | 
|  | 0 |  |  |  |  |  |  | 
| 924 | 0 | 0 |  |  |  |  | if (!defined($q)) { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 925 |  |  |  |  |  |  | ##-- NULL: ignore | 
| 926 | 0 |  |  |  |  |  | next; | 
| 927 |  |  |  |  |  |  | } | 
| 928 |  |  |  |  |  |  | elsif ($q->isa('DDC::Any::CQWith')) { | 
| 929 |  |  |  |  |  |  | ##-- CQWith: ignore (just recurse) | 
| 930 | 0 |  |  |  |  |  | next; | 
| 931 |  |  |  |  |  |  | } | 
| 932 |  |  |  |  |  |  | elsif ($q->isa('DDC::Any::CQueryOptions')) { | 
| 933 |  |  |  |  |  |  | ##-- CQueryOptions: check for nontrivial user requests | 
| 934 | 0 | 0 |  |  |  |  | $warnsub->("#WITHIN clause") if (@{$q->getWithin}); | 
|  | 0 |  |  |  |  |  |  | 
| 935 | 0 | 0 |  |  |  |  | $warnsub->("#CNTXT clause") if ($q->getContextSentencesCount); | 
| 936 |  |  |  |  |  |  | } | 
| 937 |  |  |  |  |  |  | elsif ($q->isa('DDC::Any::CQToken')) { | 
| 938 |  |  |  |  |  |  | ##-- CQToken: create attribute clause | 
| 939 | 0 | 0 |  |  |  |  | $warnsub->("negated query clause in native $logas request (".$q->toString.")") if ($q->getNegated); | 
| 940 | 0 | 0 | 0 |  |  |  | $warnsub->("explicit term-expansion chain in native $logas request (".$q->toString.")") if ($q->can('getExpanders') && @{$q->getExpanders}); | 
|  | 0 |  |  |  |  |  |  | 
| 941 |  |  |  |  |  |  |  | 
| 942 | 0 |  | 0 |  |  |  | my $attr = $q->getIndexName || $default; | 
| 943 | 0 | 0 |  |  |  |  | if (ref($q) =~ /^DDC::\w+::CQTok(?:Exact|Set|Regex|Any)$/) { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 944 | 0 |  |  |  |  |  | $aq = $q; | 
| 945 |  |  |  |  |  |  | } elsif (ref($q) =~ /^DDC::\w+::CQTokInfl$/) { | 
| 946 | 0 |  |  |  |  |  | $aq = DDC::Any::CQTokExact->new($q->getIndexName, $q->getValue); | 
| 947 |  |  |  |  |  |  | } elsif (ref($q) =~ /^DDC::\w+::CQTokSetInfl$/) { | 
| 948 | 0 |  |  |  |  |  | $aq = DDC::Any::CQTokSet->new($q->getIndexName, $q->getValue, $q->getValues); | 
| 949 |  |  |  |  |  |  | } elsif (ref($q) =~ /^DDC::\w+::CQTokPrefix$/) { | 
| 950 | 0 |  |  |  |  |  | $aq = DDC::Any::CQTokRegex->new($q->getIndexName, '^'.quotemeta($q->getValue)); | 
| 951 |  |  |  |  |  |  | } elsif (ref($q) =~ /^DDC::\w+::CQTokSuffix$/) { | 
| 952 | 0 |  |  |  |  |  | $aq = DDC::Any::CQTokRegex->new($q->getIndexName, quotemeta($q->getValue).'$'); | 
| 953 |  |  |  |  |  |  | } elsif (ref($q) =~ /^DDC::\w+::CQTokInfix$/) { | 
| 954 | 0 |  |  |  |  |  | $aq = DDC::Any::CQTokRegex->new($q->getIndexName, quotemeta($q->getValue)); | 
| 955 |  |  |  |  |  |  | } else { | 
| 956 | 0 |  |  |  |  |  | $warnsub->("token query clause of type ".ref($q)." in native $logas request (".$q->toString.")"); | 
| 957 |  |  |  |  |  |  | } | 
| 958 | 0 | 0 | 0 |  |  |  | $aq=undef if ($aq && $aq->isa('DDC::Any::CQTokAny')); ##-- empty value, e.g. for groupby | 
| 959 | 0 |  |  |  |  |  | push(@$areqs, [$attr,$aq]); | 
| 960 |  |  |  |  |  |  | } | 
| 961 |  |  |  |  |  |  | elsif ($q->isa('DDC::Any::CQFilter')) { | 
| 962 |  |  |  |  |  |  | ##-- CQFilter | 
| 963 | 0 | 0 | 0 |  |  |  | if ($q->isa('DDC::Any::CQFHasField')) { | 
|  |  | 0 | 0 |  |  |  |  | 
|  |  | 0 | 0 |  |  |  |  | 
| 964 |  |  |  |  |  |  | ##-- CQFilter: CQFHasField | 
| 965 | 0 |  |  |  |  |  | my $attr = $q->getArg0; | 
| 966 | 0 | 0 |  |  |  |  | if ($q->isa('DDC::Any::CQFHasFieldValue')) { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 967 | 0 |  |  |  |  |  | $aq = DDC::Any::CQTokExact->new($attr, $q->getArg1); | 
| 968 |  |  |  |  |  |  | } | 
| 969 |  |  |  |  |  |  | elsif ($q->isa('DDC::Any::CQFHasFieldSet')) { | 
| 970 | 0 |  |  |  |  |  | $aq = DDC::Any::CQTokSet->new($attr, $q->getArg1, $q->getValues); | 
| 971 |  |  |  |  |  |  | } | 
| 972 |  |  |  |  |  |  | elsif ($q->isa('DDC::Any::CQFHasFieldRegex')) { | 
| 973 | 0 |  |  |  |  |  | $aq = DDC::Any::CQTokRegex->new($attr, $q->getArg1); | 
| 974 |  |  |  |  |  |  | } | 
| 975 |  |  |  |  |  |  | elsif ($q->isa('DDC::Any::CQFHasFieldPrefix')) { | 
| 976 | 0 |  |  |  |  |  | $aq = DDC::Any::CQTokRegex->new($attr, '^'.quotemeta($q->getArg1)); | 
| 977 |  |  |  |  |  |  | } | 
| 978 |  |  |  |  |  |  | elsif ($q->isa('DDC::Any::CQFHasFieldSuffix')) { | 
| 979 | 0 |  |  |  |  |  | $aq = DDC::Any::CQTokRegex->new($attr, quotemeta($q->getArg1).'$'); | 
| 980 |  |  |  |  |  |  | } | 
| 981 |  |  |  |  |  |  | elsif ($q->isa('DDC::Any::CQFHasFieldInfix')) { | 
| 982 | 0 |  |  |  |  |  | $aq = DDC::Any::CQTokRegex->new($attr, quotemeta($q->getArg1)); | 
| 983 |  |  |  |  |  |  | } | 
| 984 |  |  |  |  |  |  | else { | 
| 985 | 0 |  |  |  |  |  | $warnsub->("filter of type ".ref($q)." unsupported in native $logas request (".$q->toString.")"); | 
| 986 |  |  |  |  |  |  | } | 
| 987 | 0 | 0 | 0 |  |  |  | $aq=undef if ($aq && $aq->isa('DDC::Any::CQTokAny')); ##-- empty value, e.g. for groupby | 
| 988 | 0 |  |  |  |  |  | push(@$areqs, [$attr,$aq]); | 
| 989 |  |  |  |  |  |  | } | 
| 990 |  |  |  |  |  |  | elsif ($q->isa('DDC::Any::CQFRandomSort') || $q->isa('DDC::Any::CQFRankSort')) { | 
| 991 |  |  |  |  |  |  | ##-- CQFilter: CQFRandomSort, CQFRanksort: ignore | 
| 992 | 0 |  |  |  |  |  | next; | 
| 993 |  |  |  |  |  |  | } | 
| 994 |  |  |  |  |  |  | elsif ($q->isa('DDC::Any::CQFSort') && ($q->getArg1 ne '' || $q->getArg2 ne '')) { | 
| 995 |  |  |  |  |  |  | ##-- CQFilter: CQFSort: other | 
| 996 | 0 |  |  |  |  |  | $warnsub->("filter of type ".ref($q)." with nontrivial bounds in native $logas request (".$q->toString.")"); | 
| 997 |  |  |  |  |  |  | } | 
| 998 |  |  |  |  |  |  | } | 
| 999 |  |  |  |  |  |  | else { | 
| 1000 |  |  |  |  |  |  | ##-- something else | 
| 1001 | 0 |  |  |  |  |  | $warnsub->("query clause of type ".ref($q)." in native $logas request (".$q->toString.")"); | 
| 1002 |  |  |  |  |  |  | } | 
| 1003 |  |  |  |  |  |  | } | 
| 1004 |  |  |  |  |  |  |  | 
| 1005 |  |  |  |  |  |  | ##-- check for unsupported attributes & normalize attribute names | 
| 1006 | 0 |  |  |  |  |  | my $allowExtra = $opts{allowExtra}; | 
| 1007 | 0 | 0 |  |  |  |  | $allowExtra    = { map {($_=>undef)} @$allowExtra } if (!UNIVERSAL::isa($allowExtra,'HASH')); | 
|  | 0 |  |  |  |  |  |  | 
| 1008 |  |  |  |  |  |  | @$areqs = grep { | 
| 1009 | 0 |  |  |  |  |  | $attr = $coldb->attrName($_->[0]); | 
|  | 0 |  |  |  |  |  |  | 
| 1010 | 0 | 0 | 0 |  |  |  | if ( !$opts{allowUnknown} && !$coldb->hasAttr($attr) && !($allowExtra && exists($allowExtra->{$attr})) ) { | 
| 1011 | 0 |  | 0 |  |  |  | $warnsub->("unsupported attribute '".($_->[0]//'(undef)')."' in $logas request"); | 
| 1012 | 0 |  |  |  |  |  | 0 | 
| 1013 |  |  |  |  |  |  | } else { | 
| 1014 | 0 |  |  |  |  |  | $_->[0] = $attr; | 
| 1015 | 0 |  |  |  |  |  | 1 | 
| 1016 |  |  |  |  |  |  | } | 
| 1017 |  |  |  |  |  |  | } @$areqs; | 
| 1018 |  |  |  |  |  |  |  | 
| 1019 | 0 |  |  |  |  |  | return $areqs; | 
| 1020 |  |  |  |  |  |  | } | 
| 1021 |  |  |  |  |  |  |  | 
| 1022 |  |  |  |  |  |  | ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 
| 1023 |  |  |  |  |  |  | ## \@aqs = $coldb->parseRequest($request, %opts) | 
| 1024 |  |  |  |  |  |  | ##  + guts for parsing user target and groupby requests into attribute-wise ARRAY-ref [[$attr1,$val1], ...] | 
| 1025 |  |  |  |  |  |  | ##  + see parseQuery() method for supported $request formats and %opts | 
| 1026 |  |  |  |  |  |  | ##  + wraps $coldb->queryAttributes($coldb->parseQuery($request,%opts)) | 
| 1027 |  |  |  |  |  |  | sub parseRequest { | 
| 1028 | 0 |  |  | 0 | 1 |  | my ($coldb,$req,%opts) = @_; | 
| 1029 | 0 |  |  |  |  |  | return $coldb->queryAttributes($coldb->parseQuery($req,%opts),%opts); | 
| 1030 |  |  |  |  |  |  | } | 
| 1031 |  |  |  |  |  |  |  | 
| 1032 |  |  |  |  |  |  | ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 
| 1033 |  |  |  |  |  |  | ## \%groupby = $coldb->groupby($groupby_request, %opts) | 
| 1034 |  |  |  |  |  |  | ## \%groupby = $coldb->groupby(\%groupby,        %opts) | 
| 1035 |  |  |  |  |  |  | ##  + $grouby_request : see parseRequest() | 
| 1036 |  |  |  |  |  |  | ##  + returns a HASH-ref: | 
| 1037 |  |  |  |  |  |  | ##    ( | 
| 1038 |  |  |  |  |  |  | ##     req => $request,      ##-- save request | 
| 1039 |  |  |  |  |  |  | ##     ti2g => \&ti2g,       ##-- group-tuple extraction code ($ti => $gtuple) : $g_packed = $ti2g->($ti) | 
| 1040 |  |  |  |  |  |  | ##     ts2g => \&ts2g,       ##-- group-tuple extraction code ($ts => $gtuple) : $g_packed = $ts2g->($ts) | 
| 1041 |  |  |  |  |  |  | ##     g2s   => \&g2s,       ##-- stringification object suitable for DiaColloDB::Profile::stringify() [CODE,enum, or undef] : $g_str = $g2s->($g_packed) | 
| 1042 |  |  |  |  |  |  | ##     s2g   => \&s2g,       ##-- inverse-stringification object (for 2nd-pass processing) | 
| 1043 |  |  |  |  |  |  | ##     s2gx  => \&s2gx,      ##-- inverse-stringification object (for use with extend(), returns undef for unknown string components) | 
| 1044 |  |  |  |  |  |  | ##     g2txt => \&g2txt,     ##-- compatible join()-string stringifcation sub (decimal numeric strings) | 
| 1045 |  |  |  |  |  |  | ##     txt2g => \&txt2g,     ##-- compatible inverse-string stringifcation sub (decimal numeric strings) | 
| 1046 |  |  |  |  |  |  | ##     tpack => \@tpack,     ##-- group-attribute-wise pack-templates, given @ttuple | 
| 1047 |  |  |  |  |  |  | ##     gpack => \@gpack,     ##-- group-attribute-wise pack-templates, given @gtuple | 
| 1048 |  |  |  |  |  |  | ##     areqs => \@areqs,     ##-- parsed attribute requests ([$attr,$ahaving],...) | 
| 1049 |  |  |  |  |  |  | ##     attrs => \@attrs,     ##-- like $coldb->attrs($groupby_request), modulo "having" parts | 
| 1050 |  |  |  |  |  |  | ##     titles => \@titles,   ##-- like map {$coldb->attrTitle($_)} @attrs | 
| 1051 |  |  |  |  |  |  | ##    ) | 
| 1052 |  |  |  |  |  |  | ##  + %opts: | 
| 1053 |  |  |  |  |  |  | ##     warn  => $level,    ##-- log-level for unknown attributes (default: 'warn') | 
| 1054 |  |  |  |  |  |  | ##     relax => $bool,     ##-- allow unsupported attributes (default=0) | 
| 1055 |  |  |  |  |  |  | ##     tenum => $tenum,    ##-- enum to use for \&t2g and \&t2s (default: $coldb->{tenum}) | 
| 1056 |  |  |  |  |  |  | sub groupby { | 
| 1057 | 0 |  |  | 0 | 1 |  | my ($coldb,$gbreq,%opts) = @_; | 
| 1058 | 0 | 0 |  |  |  |  | return $gbreq if (UNIVERSAL::isa($gbreq,'HASH')); | 
| 1059 |  |  |  |  |  |  |  | 
| 1060 |  |  |  |  |  |  | ##-- get data | 
| 1061 | 0 |  | 0 |  |  |  | my $wlevel = $opts{warn} // 'warn'; | 
| 1062 | 0 |  |  |  |  |  | my $gb = { req=>$gbreq }; | 
| 1063 |  |  |  |  |  |  |  | 
| 1064 |  |  |  |  |  |  | ##-- get attribute requests | 
| 1065 | 0 |  |  |  |  |  | my $gbareqs = $gb->{areqs} = $coldb->parseRequest($gb->{req}, %opts,parseas=>'groupby',logas=>'groupby'); | 
| 1066 |  |  |  |  |  |  |  | 
| 1067 |  |  |  |  |  |  | ##-- get attribute names (compat) | 
| 1068 | 0 |  |  |  |  |  | my $gbattrs = $gb->{attrs} = [map {$_->[0]} @$gbareqs]; | 
|  | 0 |  |  |  |  |  |  | 
| 1069 |  |  |  |  |  |  |  | 
| 1070 |  |  |  |  |  |  | ##-- get attribute titles | 
| 1071 | 0 |  |  |  |  |  | $gb->{titles} = [map {$coldb->attrTitle($_)} @$gbattrs]; | 
|  | 0 |  |  |  |  |  |  | 
| 1072 |  |  |  |  |  |  |  | 
| 1073 |  |  |  |  |  |  | ##-- get groupby-sub | 
| 1074 | 0 |  | 0 |  |  |  | my $tenum  = $opts{tenum} // $coldb->{tenum}; | 
| 1075 | 0 |  |  |  |  |  | my $pack_id = $coldb->{pack_id}; | 
| 1076 | 0 |  |  |  |  |  | my $pack_ids = "($pack_id)*"; | 
| 1077 | 0 |  |  |  |  |  | my $len_id  = packsize($pack_id); | 
| 1078 | 0 |  |  |  |  |  | my @gbtpack = @{$gb->{tpack} = [map {$coldb->{"pack_t$_"}} @$gbattrs]}; | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 1079 | 0 |  |  |  |  |  | my $gbtpack = join('',@gbtpack); | 
| 1080 | 0 |  |  |  |  |  | my @gbgpack = @{$gb->{gpack} = [map {'@'.($_*$len_id).$pack_id} (0..$#$gbattrs)]}; | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 1081 | 0 |  |  |  |  |  | my ($ids); | 
| 1082 |  |  |  |  |  |  | my @gbids  = ( | 
| 1083 |  |  |  |  |  |  | map { | 
| 1084 | 0 |  |  |  |  |  | ($_->[1] && !UNIVERSAL::isa($_->[1],'DDC::Any::CQTokAny') | 
| 1085 |  |  |  |  |  |  | ? { | 
| 1086 | 0 |  |  |  |  |  | map {($_=>undef)} | 
| 1087 | 0 | 0 | 0 |  |  |  | @{$coldb->enumIds($coldb->{"$_->[0]enum"}, $_->[1], logLevel=>$coldb->{logProfile}, logPrefix=>"groupby(): fetch filter ids: $_->[0]")} | 
|  | 0 |  |  |  |  |  |  | 
| 1088 |  |  |  |  |  |  | } | 
| 1089 |  |  |  |  |  |  | : undef) | 
| 1090 |  |  |  |  |  |  | } @$gbareqs); | 
| 1091 |  |  |  |  |  |  |  | 
| 1092 | 0 |  |  |  |  |  | my (@gi,$gi,$ti2g_code,$ts2g_code); | 
| 1093 | 0 | 0 |  |  |  |  | if (grep {$_} @gbids) { | 
|  | 0 |  |  |  |  |  |  | 
| 1094 |  |  |  |  |  |  | ##-- group-by code: with having-filters | 
| 1095 |  |  |  |  |  |  | $ts2g_code = ('' | 
| 1096 |  |  |  |  |  |  | .qq{ \@gi=unpack('$gbtpack',\$_[0]);} | 
| 1097 | 0 |  |  |  |  |  | .qq{ return undef if (}.join(' || ', map {"!exists(\$gbids[$_]{\$gi[$_]})"} grep {defined($gbids[$_])} (0..$#gbids)).qq{);} | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 1098 |  |  |  |  |  |  | .qq{ return pack('$pack_ids',\@gi); } | 
| 1099 |  |  |  |  |  |  | ); | 
| 1100 |  |  |  |  |  |  | } | 
| 1101 |  |  |  |  |  |  | else { | 
| 1102 |  |  |  |  |  |  | ##-- group-by code: no filters | 
| 1103 | 0 |  |  |  |  |  | $ts2g_code = qq{ pack('$pack_ids', unpack('$gbtpack', \$_[0])) }; | 
| 1104 |  |  |  |  |  |  | } | 
| 1105 | 0 |  |  |  |  |  | my $ts2g_sub  = eval qq{sub {$ts2g_code}}; | 
| 1106 | 0 | 0 |  |  |  |  | $coldb->logconfess($coldb->{error}="groupby(): could not compile tuple-based aggregation code sub {$ts2g_code}: $@") if (!$ts2g_sub); | 
| 1107 | 0 |  |  |  |  |  | $@=''; | 
| 1108 | 0 |  |  |  |  |  | $gb->{ts2g} = $ts2g_sub; | 
| 1109 |  |  |  |  |  |  |  | 
| 1110 | 0 |  |  |  |  |  | ($ti2g_code = $ts2g_code) =~ s{\$_\[0\]}{\$tenum->i2s(\$_[0])}; | 
| 1111 | 0 |  |  |  |  |  | my $ti2g_sub  = eval qq{sub {$ti2g_code}}; | 
| 1112 | 0 | 0 |  |  |  |  | $coldb->logconfess($coldb->{error}="groupby(): could not compile id-based aggregation code sub {$ti2g_code}: $@") if (!$ti2g_sub); | 
| 1113 | 0 |  |  |  |  |  | $@=''; | 
| 1114 | 0 |  |  |  |  |  | $gb->{ti2g} = $ti2g_sub; | 
| 1115 |  |  |  |  |  |  |  | 
| 1116 |  |  |  |  |  |  | ##-- get stringification sub(s) | 
| 1117 | 0 |  |  |  |  |  | my ($genum,@genums,$g2scode,$s2gcode,$s2gxcode); | 
| 1118 | 0 | 0 |  |  |  |  | if (@$gbattrs == 1) { | 
| 1119 |  |  |  |  |  |  | ##-- stringify a single attribute | 
| 1120 | 0 |  |  |  |  |  | $genum   = $coldb->{$gbattrs->[0]."enum"}; | 
| 1121 | 0 |  |  |  |  |  | $g2scode = qq{ \$genum->i2s(unpack('$pack_id',\$_[0])) }; | 
| 1122 | 0 |  |  |  |  |  | $s2gcode = qq{ pack('$pack_id', \$genum->s2i(\$_[0]) // 0) }; | 
| 1123 | 0 |  |  |  |  |  | $s2gxcode = qq{ return undef if (!defined(\$gi=\$genum->s2i(\$_[0]))); return pack('$pack_id',\$gi); }; | 
| 1124 |  |  |  |  |  |  | } | 
| 1125 |  |  |  |  |  |  | else { | 
| 1126 | 0 |  |  |  |  |  | @genums = map {$coldb->{$_."enum"}} @$gbattrs; | 
|  | 0 |  |  |  |  |  |  | 
| 1127 |  |  |  |  |  |  | $g2scode = ('' | 
| 1128 |  |  |  |  |  |  | .qq{ \@gi=unpack('$pack_ids', \$_[0]); } | 
| 1129 | 0 |  |  |  |  |  | .q{ join("\t",}.join(', ', map {"\$genums[$_]->i2s(\$gi[$_])"} (0..$#genums)).q{)} | 
|  | 0 |  |  |  |  |  |  | 
| 1130 |  |  |  |  |  |  | ); | 
| 1131 |  |  |  |  |  |  | $s2gcode = ('' | 
| 1132 |  |  |  |  |  |  | .qq{ \@gi=split(/\\t/, \$_[0]); } | 
| 1133 | 0 |  |  |  |  |  | .qq{ pack('$pack_ids',}.join(', ', map {"\$genums[$_]->s2i(\$gi[$_]) // 0"} (0..$#genums)).q{)} | 
|  | 0 |  |  |  |  |  |  | 
| 1134 |  |  |  |  |  |  | ); | 
| 1135 |  |  |  |  |  |  | $s2gxcode = ('' | 
| 1136 |  |  |  |  |  |  | .qq{ \@gi=split(/\\t/, \$_[0]); } | 
| 1137 | 0 |  |  |  |  |  | .qq{ return undef if (}.join(' || ', map {"!defined(\$gi[$_]=\$genums[$_]->s2i(\$gi[$_]))"} (0..$#genums)).q{);} | 
|  | 0 |  |  |  |  |  |  | 
| 1138 |  |  |  |  |  |  | .qq{ return pack('$pack_ids',\@gi); } | 
| 1139 |  |  |  |  |  |  | ); | 
| 1140 |  |  |  |  |  |  | } | 
| 1141 | 0 |  |  |  |  |  | my $g2s = eval qq{sub {$g2scode}}; | 
| 1142 | 0 | 0 |  |  |  |  | $coldb->logconfess($coldb->{error}="groupby(): could not compile stringification code sub {$g2scode}: $@") if (!$g2s); | 
| 1143 | 0 |  |  |  |  |  | $@=''; | 
| 1144 | 0 |  |  |  |  |  | $gb->{g2s} = $g2s; | 
| 1145 |  |  |  |  |  |  |  | 
| 1146 | 0 |  |  |  |  |  | my $s2g = eval qq{sub {$s2gcode}}; | 
| 1147 | 0 | 0 |  |  |  |  | $coldb->logconfess($coldb->{error}="groupby(): could not compile inverse-stringification code sub {$s2gcode}: $@") if (!$s2g); | 
| 1148 | 0 |  |  |  |  |  | $@=''; | 
| 1149 | 0 |  |  |  |  |  | $gb->{s2g} = $s2g; | 
| 1150 |  |  |  |  |  |  |  | 
| 1151 | 0 |  |  |  |  |  | my $s2gx = eval qq{sub {$s2gxcode}}; | 
| 1152 | 0 | 0 |  |  |  |  | $coldb->logconfess($coldb->{error}="groupby(): could not compile inverse-stringification code sub for extend {$s2gxcode}: $@") if (!$s2gx); | 
| 1153 | 0 |  |  |  |  |  | $@=''; | 
| 1154 | 0 |  |  |  |  |  | $gb->{s2gx} = $s2gx; | 
| 1155 |  |  |  |  |  |  |  | 
| 1156 |  |  |  |  |  |  | ##-- get pseudo-stringification sub ("\t"-joined decimal integer ids) | 
| 1157 | 0 |  |  |  |  |  | my ($g2txt_code,$txt2g_code); | 
| 1158 | 0 | 0 |  |  |  |  | if (@$gbattrs == 1) { | 
| 1159 |  |  |  |  |  |  | ##-- stringify a single attribute | 
| 1160 | 0 |  |  |  |  |  | $g2txt_code = qq{ unpack('$pack_id',\$_[0]) }; | 
| 1161 | 0 |  |  |  |  |  | $txt2g_code = qq{ pack('$pack_id',\$_[0] // 0) }; | 
| 1162 |  |  |  |  |  |  | } | 
| 1163 |  |  |  |  |  |  | else { | 
| 1164 | 0 |  |  |  |  |  | $g2txt_code = qq{ join("\t",unpack('$pack_ids', \$_[0])); }; | 
| 1165 | 0 |  |  |  |  |  | $txt2g_code = qq{ pack('$pack_ids', split(/\t/, \$_[0] // 0)); }; | 
| 1166 |  |  |  |  |  |  | } | 
| 1167 | 0 |  |  |  |  |  | my $g2txt = eval qq{sub {$g2txt_code}}; | 
| 1168 | 0 | 0 |  |  |  |  | $coldb->logconfess($coldb->{error}="groupby(): could not compile pseudo-stringification code sub {$g2txt_code}: $@") if (!$g2txt); | 
| 1169 | 0 |  |  |  |  |  | $@=''; | 
| 1170 | 0 |  |  |  |  |  | $gb->{g2txt} = $g2txt; | 
| 1171 |  |  |  |  |  |  |  | 
| 1172 | 0 |  |  |  |  |  | my $txt2g = eval qq{sub {$txt2g_code}}; | 
| 1173 | 0 | 0 |  |  |  |  | $coldb->logconfess($coldb->{error}="groupby(): could not compile inverse pseudo-stringification code sub {$txt2g_code}: $@") if (!$txt2g); | 
| 1174 | 0 |  |  |  |  |  | $@=''; | 
| 1175 | 0 |  |  |  |  |  | $gb->{txt2g} = $txt2g; | 
| 1176 |  |  |  |  |  |  |  | 
| 1177 | 0 |  |  |  |  |  | return $gb; | 
| 1178 |  |  |  |  |  |  | } | 
| 1179 |  |  |  |  |  |  |  | 
| 1180 |  |  |  |  |  |  | ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 
| 1181 |  |  |  |  |  |  | ## $cqfilter = $coldb->query2filter($attr,$cquery,%opts) | 
| 1182 |  |  |  |  |  |  | ##  + converts a CQToken to a CQFilter, for ddc parsing | 
| 1183 |  |  |  |  |  |  | ##  + %opts: | 
| 1184 |  |  |  |  |  |  | ##     logas => $logas,   ##-- log-prefix for warnings | 
| 1185 |  |  |  |  |  |  | sub query2filter { | 
| 1186 | 0 |  |  | 0 | 1 |  | my ($coldb,$attr,$q,%opts) = @_; | 
| 1187 | 0 | 0 |  |  |  |  | return undef if (!defined($q)); | 
| 1188 | 0 |  | 0 |  |  |  | my $logas = $opts{logas} || 'query2filter'; | 
| 1189 |  |  |  |  |  |  |  | 
| 1190 |  |  |  |  |  |  | ##-- document attribute ("doc.ATTR" convention) | 
| 1191 | 0 |  | 0 |  |  |  | my $field = $coldb->attrName( $attr // $q->getIndexName ); | 
| 1192 | 0 | 0 |  |  |  |  | $field = $1 if ($field =~ /^doc\.(.*)$/); | 
| 1193 | 0 | 0 | 0 |  |  |  | if (UNIVERSAL::isa($q, 'DDC::Any::CQTokAny')) { | 
|  |  | 0 | 0 |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 1194 | 0 |  |  |  |  |  | return undef; | 
| 1195 |  |  |  |  |  |  | } elsif (UNIVERSAL::isa($q, 'DDC::Any::CQTokExact') || UNIVERSAL::isa($q, 'DDC::Any::CQTokInfl')) { | 
| 1196 | 0 |  |  |  |  |  | return DDC::Any::CQFHasField->new($field, $q->getValue, $q->getNegated); | 
| 1197 |  |  |  |  |  |  | } elsif (UNIVERSAL::isa($q, 'DDC::Any::CQTokSet') || UNIVERSAL::isa($q, 'DDC::Any::CQTokSetInfl')) { | 
| 1198 | 0 |  |  |  |  |  | return DDC::Any::CQFHasFieldSet->new($field, $q->getValues, $q->getNegated); | 
| 1199 |  |  |  |  |  |  | } elsif (UNIVERSAL::isa($q, 'DDC::Any::CQTokRegex')) { | 
| 1200 | 0 |  |  |  |  |  | return DDC::Any::CQFHasFieldRegex->new($field, $q->getValue, $q->getNegated); | 
| 1201 |  |  |  |  |  |  | } elsif (UNIVERSAL::isa($q, 'DDC::Any::CQTokPrefix')) { | 
| 1202 | 0 |  |  |  |  |  | return DDC::Any::CQFHasFieldPrefix->new($field, $q->getValue, $q->getNegated); | 
| 1203 |  |  |  |  |  |  | } elsif (UNIVERSAL::isa($q, 'DDC::Any::CQTokSuffix')) { | 
| 1204 | 0 |  |  |  |  |  | return DDC::Any::CQFHasFieldSuffix->new($field, $q->getValue, $q->getNegated); | 
| 1205 |  |  |  |  |  |  | } elsif (UNIVERSAL::isa($q, 'DDC::Any::CQTokInfix')) { | 
| 1206 | 0 |  |  |  |  |  | return DDC::Any::CQFHasFieldInfix->new($field, $q->getValue, $q->getNegated); | 
| 1207 |  |  |  |  |  |  | } else { | 
| 1208 | 0 |  |  |  |  |  | $coldb->logconfess("can't handle metadata restriction of type ", ref($q), " in $logas request: \`", $q->toString, "'"); | 
| 1209 |  |  |  |  |  |  | } | 
| 1210 |  |  |  |  |  |  | } | 
| 1211 |  |  |  |  |  |  |  | 
| 1212 |  |  |  |  |  |  | ##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | 
| 1213 |  |  |  |  |  |  | ## ($CQCountKeyExprs,\$CQRestrict,\@CQFilters) = $coldb->parseGroupBy($groupby_string_or_request,%opts) | 
| 1214 |  |  |  |  |  |  | ##  + for ddc-mode parsing | 
| 1215 |  |  |  |  |  |  | ##  + %opts: | 
| 1216 |  |  |  |  |  |  | ##     date => $date, | 
| 1217 |  |  |  |  |  |  | ##     slice => $slice, | 
| 1218 |  |  |  |  |  |  | ##     matchid => $matchid,    ##-- default match-id | 
| 1219 |  |  |  |  |  |  | sub parseGroupBy { | 
| 1220 | 0 |  |  | 0 | 1 |  | my ($coldb,$req,%opts) = @_; | 
| 1221 | 0 |  | 0 |  |  |  | $req //= $coldb->attrs; | 
| 1222 |  |  |  |  |  |  |  | 
| 1223 |  |  |  |  |  |  | ##-- groupby clause: date | 
| 1224 |  |  |  |  |  |  | my $gbdate = ($opts{slice}<=0 | 
| 1225 |  |  |  |  |  |  | ? DDC::Any::CQCountKeyExprConstant->new($opts{slice}||'0') | 
| 1226 | 0 | 0 | 0 |  |  |  | : DDC::Any::CQCountKeyExprDateSlice->new('date',$opts{slice})); | 
| 1227 |  |  |  |  |  |  |  | 
| 1228 |  |  |  |  |  |  | ##-- groupby clause: user request | 
| 1229 | 0 |  |  |  |  |  | my $gbexprs   = [$gbdate]; | 
| 1230 | 0 |  |  |  |  |  | my $gbfilters = []; | 
| 1231 | 0 |  |  |  |  |  | my ($gbrestr); | 
| 1232 | 0 | 0 | 0 |  |  |  | if (!ref($req) && $req =~ m{^\s*(?:\#by)?\s*\[(.*)\]\s*$}) { | 
| 1233 |  |  |  |  |  |  | ##-- ddc-style request; no restriction-clauses are allowed | 
| 1234 | 0 |  |  |  |  |  | my $cbstr = $1; | 
| 1235 |  |  |  |  |  |  | my $gbq = $coldb->qparse("count(*) #by[$cbstr]") | 
| 1236 | 0 | 0 |  |  |  |  | or $coldb->logconfess($coldb->{error}="failed to parse DDC groupby request \`$req': $coldb->{error}"); | 
| 1237 | 0 |  |  |  |  |  | push(@$gbexprs, @{$gbq->getKeys->getExprs}); | 
|  | 0 |  |  |  |  |  |  | 
| 1238 |  |  |  |  |  |  | $_->setMatchId($opts{matchid}//0) | 
| 1239 | 0 | 0 | 0 |  |  |  | foreach (grep {UNIVERSAL::isa($_,'DDC::Any::CQCountKeyExprToken') && !$_->HasMatchId} @$gbexprs); | 
|  | 0 |  |  |  |  |  |  | 
| 1240 |  |  |  |  |  |  | } | 
| 1241 |  |  |  |  |  |  | else { | 
| 1242 |  |  |  |  |  |  | ##-- native-style request with optional restrictions | 
| 1243 | 0 |  |  |  |  |  | my $gbreq  = $coldb->parseRequest($req, parseas=>'groupby', logas=>'groupby', default=>undef, relax=>1, allowUnknown=>1); | 
| 1244 | 0 |  |  |  |  |  | my ($filter); | 
| 1245 | 0 |  |  |  |  |  | foreach (@$gbreq) { | 
| 1246 | 0 |  |  |  |  |  | push(@$gbexprs, $coldb->attrCountBy($_->[0], 2)); | 
| 1247 | 0 | 0 |  |  |  |  | if ($_->[0] =~ /^doc\./) { | 
| 1248 |  |  |  |  |  |  | ##-- document attribute ("doc.ATTR" convention) | 
| 1249 | 0 | 0 |  |  |  |  | push(@$gbfilters, $filter) if (defined($filter=$coldb->query2filter($_->[0], $_->[1]))); | 
| 1250 |  |  |  |  |  |  | } | 
| 1251 |  |  |  |  |  |  | else { | 
| 1252 |  |  |  |  |  |  | ##-- token attribute | 
| 1253 | 0 | 0 | 0 |  |  |  | if (defined($_->[1]) && !UNIVERSAL::isa($_->[1], 'DDC::Any::CQTokAny')) { | 
| 1254 | 0 | 0 |  |  |  |  | $gbrestr = (defined($gbrestr) ? DDC::Any::CQWith->new($gbrestr,$_->[1]) : $_->[1]); | 
| 1255 |  |  |  |  |  |  | } | 
| 1256 |  |  |  |  |  |  | } | 
| 1257 |  |  |  |  |  |  | } | 
| 1258 |  |  |  |  |  |  | } | 
| 1259 |  |  |  |  |  |  |  | 
| 1260 |  |  |  |  |  |  | ##-- finalize: expression list | 
| 1261 | 0 |  |  |  |  |  | my $xlist = DDC::Any::CQCountKeyExprList->new; | 
| 1262 | 0 |  |  |  |  |  | $xlist->setExprs($gbexprs); | 
| 1263 |  |  |  |  |  |  |  | 
| 1264 | 0 |  |  |  |  |  | return ($xlist,$gbrestr,$gbfilters); | 
| 1265 |  |  |  |  |  |  | } | 
| 1266 |  |  |  |  |  |  |  | 
| 1267 |  |  |  |  |  |  | ##-------------------------------------------------------------- | 
| 1268 |  |  |  |  |  |  | ## Profiling: Generic | 
| 1269 |  |  |  |  |  |  |  | 
| 1270 |  |  |  |  |  |  | ## $mprf = $coldb->profile($relation, %opts) | 
| 1271 |  |  |  |  |  |  | ##  + get a relation profile for selected items as a DiaColloDB::Profile::Multi object | 
| 1272 |  |  |  |  |  |  | ##  + %opts: | 
| 1273 |  |  |  |  |  |  | ##    ( | 
| 1274 |  |  |  |  |  |  | ##     ##-- selection parameters | 
| 1275 |  |  |  |  |  |  | ##     query => $query,           ##-- target request ATTR:REQ... | 
| 1276 |  |  |  |  |  |  | ##     date  => $date1,           ##-- string or array or range "MIN-MAX" (inclusive) : default=all | 
| 1277 |  |  |  |  |  |  | ##     ## | 
| 1278 |  |  |  |  |  |  | ##     ##-- aggregation parameters | 
| 1279 |  |  |  |  |  |  | ##     slice   => $slice,         ##-- date slice (default=1, 0 for global profile) | 
| 1280 |  |  |  |  |  |  | ##     groupby => $groupby,       ##-- string or array "ATTR1[:HAVING1] ...": default=$coldb->attrs; see groupby() method | 
| 1281 |  |  |  |  |  |  | ##     ## | 
| 1282 |  |  |  |  |  |  | ##     ##-- scoring and trimming parameters | 
| 1283 |  |  |  |  |  |  | ##     eps     => $eps,           ##-- smoothing constant (default=0) | 
| 1284 |  |  |  |  |  |  | ##     score   => $func,          ##-- scoring function (f|fm|lf|lfm|mi|ld) : default="f" | 
| 1285 |  |  |  |  |  |  | ##     kbest   => $k,             ##-- return only $k best collocates per date (slice) : default=-1:all | 
| 1286 |  |  |  |  |  |  | ##     cutoff  => $cutoff,        ##-- minimum score | 
| 1287 |  |  |  |  |  |  | ##     global  => $bool,          ##-- trim profiles globally (vs. locally for each date-slice?) (default=0) | 
| 1288 |  |  |  |  |  |  | ##     ## | 
| 1289 |  |  |  |  |  |  | ##     ##-- profiling and debugging parameters | 
| 1290 |  |  |  |  |  |  | ##     strings => $bool,          ##-- do/don't stringify output profile(s) (default=do) | 
| 1291 |  |  |  |  |  |  | ##     fill    => $bool,          ##-- if true, returned multi-profile will have null profiles inserted for missing slices | 
| 1292 |  |  |  |  |  |  | ##     onepass => $bool,          ##-- if true, use fast but incorrect 1-pass method (Cofreqs profiling only, >= v0.09.001) | 
| 1293 |  |  |  |  |  |  | ##    ) | 
| 1294 |  |  |  |  |  |  | ##  + sets default %opts and wraps $coldb->relation($rel)->profile($coldb, %opts) | 
| 1295 |  |  |  |  |  |  | sub profile { | 
| 1296 | 0 |  |  | 0 | 1 |  | my ($coldb,$rel,%opts) = @_; | 
| 1297 |  |  |  |  |  |  |  | 
| 1298 |  |  |  |  |  |  | ##-- defaults | 
| 1299 | 0 |  |  |  |  |  | $coldb->profileOptions(\%opts); | 
| 1300 |  |  |  |  |  |  |  | 
| 1301 |  |  |  |  |  |  | ##-- debug | 
| 1302 |  |  |  |  |  |  | $coldb->vlog($coldb->{logRequest}, | 
| 1303 |  |  |  |  |  |  | "profile(" | 
| 1304 |  |  |  |  |  |  | .join(', ', | 
| 1305 | 0 |  | 0 |  |  |  | map {"$_->[0]='".quotemeta($_->[1]//'')."'"} | 
| 1306 |  |  |  |  |  |  | ([rel=>$rel], | 
| 1307 |  |  |  |  |  |  | [query=>$opts{query}], | 
| 1308 | 0 |  |  |  |  |  | [groupby=>UNIVERSAL::isa($opts{groupby},'ARRAY') ? join(',', @{$opts{groupby}}) : $opts{groupby}], | 
| 1309 | 0 | 0 |  |  |  |  | (map {[$_=>$opts{$_}]} qw(date slice score eps kbest cutoff global onepass)), | 
|  | 0 |  |  |  |  |  |  | 
| 1310 |  |  |  |  |  |  | )) | 
| 1311 |  |  |  |  |  |  | .")"); | 
| 1312 |  |  |  |  |  |  |  | 
| 1313 |  |  |  |  |  |  | ##-- relation | 
| 1314 | 0 |  |  |  |  |  | my ($reldb); | 
| 1315 | 0 | 0 | 0 |  |  |  | if (!defined($reldb=$coldb->relation($rel||'cof'))) { | 
| 1316 | 0 |  | 0 |  |  |  | $coldb->logwarn($coldb->{error}="profile(): unknown relation '".($rel//'-undef-')."'"); | 
| 1317 | 0 |  |  |  |  |  | return undef; | 
| 1318 |  |  |  |  |  |  | } | 
| 1319 |  |  |  |  |  |  |  | 
| 1320 |  |  |  |  |  |  | ##-- delegate | 
| 1321 | 0 |  |  |  |  |  | return $reldb->profile($coldb,%opts); | 
| 1322 |  |  |  |  |  |  | } | 
| 1323 |  |  |  |  |  |  |  | 
| 1324 |  |  |  |  |  |  | ## \%opts = $CLASS_OR_OBJECT->profileOptions(\%opts) | 
| 1325 |  |  |  |  |  |  | ##  + instantiates default options for profile() method | 
| 1326 |  |  |  |  |  |  | sub profileOptions { | 
| 1327 | 0 |  |  | 0 | 1 |  | my ($that,$opts) = @_; | 
| 1328 |  |  |  |  |  |  |  | 
| 1329 |  |  |  |  |  |  | ##-- defaults | 
| 1330 | 0 |  | 0 |  |  |  | $opts->{query}     = (grep {defined($_)} @$opts{qw(query q lemma lem l)})[0] // ''; | 
|  | 0 |  |  |  |  |  |  | 
| 1331 | 0 |  | 0 |  |  |  | $opts->{date}    //= ''; | 
| 1332 | 0 |  | 0 |  |  |  | $opts->{slice}   //= 1; | 
| 1333 | 0 | 0 | 0 |  |  |  | $opts->{groupby} ||= join(',', map {quotemeta($_)} @{$that->attrs}) if (ref($that)); | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 1334 | 0 |  | 0 |  |  |  | $opts->{score}   //= 'f'; | 
| 1335 | 0 |  | 0 |  |  |  | $opts->{eps}     //= 0; #0.5; | 
| 1336 | 0 |  | 0 |  |  |  | $opts->{kbest}   //= -1; | 
| 1337 | 0 |  | 0 |  |  |  | $opts->{cutoff}  //= ''; | 
| 1338 | 0 |  | 0 |  |  |  | $opts->{global}  //= 0; | 
| 1339 | 0 |  | 0 |  |  |  | $opts->{strings} //= 1; | 
| 1340 | 0 |  | 0 |  |  |  | $opts->{fill}    //= 0; | 
| 1341 | 0 |  | 0 |  |  |  | $opts->{onepass} //= 0; | 
| 1342 |  |  |  |  |  |  |  | 
| 1343 | 0 |  |  |  |  |  | return $opts; | 
| 1344 |  |  |  |  |  |  | } | 
| 1345 |  |  |  |  |  |  |  | 
| 1346 |  |  |  |  |  |  | ##-------------------------------------------------------------- | 
| 1347 |  |  |  |  |  |  | ## Profiling: extend (pass-2 for multi-clients) | 
| 1348 |  |  |  |  |  |  |  | 
| 1349 |  |  |  |  |  |  | ## $mprf = $coldb->extend($relation, %opts) | 
| 1350 |  |  |  |  |  |  | ##  + get independent f2 frequencies for $opts{slice2keys}, which is EITHER | 
| 1351 |  |  |  |  |  |  | ##    - a HASH-ref {$slice1=>\@keys1, ...}, | 
| 1352 |  |  |  |  |  |  | ##      OR | 
| 1353 |  |  |  |  |  |  | ##    - a JSON-string encoding a such a HASH-ref | 
| 1354 |  |  |  |  |  |  | ##  + %opts, as for profile(), except: | 
| 1355 |  |  |  |  |  |  | ##    ( | 
| 1356 |  |  |  |  |  |  | ##     ##-- selection parameters | 
| 1357 |  |  |  |  |  |  | ##     query => $query,           ##-- target request ATTR:REQ... : mostly IGNORED (but used e.g. by ddc back-end) | 
| 1358 |  |  |  |  |  |  | ##     slice2keys => \%slice2keys, ##-- target f2-items or JSON-string | 
| 1359 |  |  |  |  |  |  | ##     ##-- scoring and trimming parameters : IGNORED | 
| 1360 |  |  |  |  |  |  | ##     ##-- profiling and debugging parameters: IGNORED | 
| 1361 |  |  |  |  |  |  | ##    ) | 
| 1362 |  |  |  |  |  |  | ##  + returns a DiaColloDB::Profile::Multi containing the appropriate f2 entries | 
| 1363 |  |  |  |  |  |  | sub extend { | 
| 1364 | 0 |  |  | 0 | 1 |  | my ($coldb,$rel,%opts) = @_; | 
| 1365 |  |  |  |  |  |  |  | 
| 1366 |  |  |  |  |  |  | ##-- defaults | 
| 1367 | 0 |  |  |  |  |  | $coldb->profileOptions(\%opts); | 
| 1368 |  |  |  |  |  |  |  | 
| 1369 |  |  |  |  |  |  | ##-- items | 
| 1370 | 0 |  | 0 |  |  |  | $opts{slice2keys} //= ''; | 
| 1371 |  |  |  |  |  |  | $opts{slice2keys}   = DiaColloDB::Utils::loadJsonString($opts{slice2keys}) | 
| 1372 | 0 | 0 | 0 |  |  |  | if ($opts{slice2keys} && !ref($opts{slice2keys})); | 
| 1373 |  |  |  |  |  |  |  | 
| 1374 |  |  |  |  |  |  | ##-- debug | 
| 1375 |  |  |  |  |  |  | $coldb->vlog($coldb->{logRequest}, | 
| 1376 |  |  |  |  |  |  | "extend(" | 
| 1377 |  |  |  |  |  |  | .join(', ', | 
| 1378 | 0 |  | 0 |  |  |  | map {"$_->[0]='".quotemeta($_->[1]//'')."'"} | 
| 1379 |  |  |  |  |  |  | ([rel=>$rel], | 
| 1380 |  |  |  |  |  |  | [query=>$opts{query}], | 
| 1381 | 0 |  |  |  |  |  | [groupby=>UNIVERSAL::isa($opts{groupby},'ARRAY') ? join(',', @{$opts{groupby}}) : $opts{groupby}], | 
| 1382 | 0 | 0 |  |  |  |  | (map {[$_=>$opts{$_}]} qw(date slice)), | 
|  | 0 |  |  |  |  |  |  | 
| 1383 |  |  |  |  |  |  | )) | 
| 1384 |  |  |  |  |  |  | .")"); | 
| 1385 |  |  |  |  |  |  |  | 
| 1386 |  |  |  |  |  |  | ##-- relation | 
| 1387 | 0 |  |  |  |  |  | my ($reldb); | 
| 1388 | 0 | 0 | 0 |  |  |  | if (!defined($reldb=$coldb->relation($rel||'cof'))) { | 
| 1389 | 0 |  | 0 |  |  |  | $coldb->logwarn($coldb->{error}="extend(): unknown relation '".($rel//'-undef-')."'"); | 
| 1390 | 0 |  |  |  |  |  | return undef; | 
| 1391 |  |  |  |  |  |  | } | 
| 1392 |  |  |  |  |  |  |  | 
| 1393 |  |  |  |  |  |  | ##-- delegate | 
| 1394 | 0 |  |  |  |  |  | return $reldb->extend($coldb,%opts); | 
| 1395 |  |  |  |  |  |  | } | 
| 1396 |  |  |  |  |  |  |  | 
| 1397 |  |  |  |  |  |  |  | 
| 1398 |  |  |  |  |  |  |  | 
| 1399 |  |  |  |  |  |  | ##-------------------------------------------------------------- | 
| 1400 |  |  |  |  |  |  | ## Profiling: Comparison (diff) | 
| 1401 |  |  |  |  |  |  |  | 
| 1402 |  |  |  |  |  |  | ## $mprf = $coldb->compare($relation, %opts) | 
| 1403 |  |  |  |  |  |  | ##  + get a relation comparison profile for selected items as a DiaColloDB::Profile::MultiDiff object | 
| 1404 |  |  |  |  |  |  | ##  + %opts: | 
| 1405 |  |  |  |  |  |  | ##    ( | 
| 1406 |  |  |  |  |  |  | ##     ##-- selection parameters | 
| 1407 |  |  |  |  |  |  | ##     (a|b)?query => $query,       ##-- target query as for parseRequest() | 
| 1408 |  |  |  |  |  |  | ##     (a|b)?date  => $date1,       ##-- string or array or range "MIN-MAX" (inclusive) : default=all | 
| 1409 |  |  |  |  |  |  | ##     ## | 
| 1410 |  |  |  |  |  |  | ##     ##-- aggregation parameters | 
| 1411 |  |  |  |  |  |  | ##     groupby     => $groupby,     ##-- string or array "ATTR1[:HAVING1] ...": default=$coldb->attrs; see groupby() method | 
| 1412 |  |  |  |  |  |  | ##     (a|b)?slice => $slice,       ##-- date slice (default=1, 0 for global profile) | 
| 1413 |  |  |  |  |  |  | ##     ## | 
| 1414 |  |  |  |  |  |  | ##     ##-- scoring and trimming parameters | 
| 1415 |  |  |  |  |  |  | ##     eps     => $eps,           ##-- smoothing constant (default=0) | 
| 1416 |  |  |  |  |  |  | ##     score   => $func,          ##-- scoring function (f|fm|lf|lfm|mi|ld) : default="f" | 
| 1417 |  |  |  |  |  |  | ##     kbest   => $k,             ##-- return only $k best collocates per date (slice) : default=-1:all | 
| 1418 |  |  |  |  |  |  | ##     cutoff  => $cutoff,        ##-- minimum score (UNUSED for comparison profiles) | 
| 1419 |  |  |  |  |  |  | ##     global  => $bool,          ##-- trim profiles globally (vs. locally for each date-slice?) (default=0) | 
| 1420 |  |  |  |  |  |  | ##     diff    => $diff,          ##-- low-level score-diff operation (diff|adiff|sum|min|max|avg|havg); default='adiff' | 
| 1421 |  |  |  |  |  |  | ##     ## | 
| 1422 |  |  |  |  |  |  | ##     ##-- profiling and debugging parameters | 
| 1423 |  |  |  |  |  |  | ##     strings => $bool,          ##-- do/don't stringify (default=do) | 
| 1424 |  |  |  |  |  |  | ##    ) | 
| 1425 |  |  |  |  |  |  | ##  + sets default %opts and wraps $coldb->relation($rel)->compare($coldb, %opts) | 
| 1426 | 1 |  |  | 1 |  | 639 | BEGIN { *diff = \&compare; } | 
| 1427 |  |  |  |  |  |  | sub compare { | 
| 1428 | 0 |  |  | 0 | 1 |  | my ($coldb,$rel,%opts) = @_; | 
| 1429 | 0 |  | 0 |  |  |  | $rel //= 'cof'; | 
| 1430 |  |  |  |  |  |  |  | 
| 1431 |  |  |  |  |  |  | ##-- defaults and '[ab]OPTION' parsing | 
| 1432 | 0 |  |  |  |  |  | $coldb->compareOptions(\%opts); | 
| 1433 |  |  |  |  |  |  |  | 
| 1434 |  |  |  |  |  |  | ##-- debug | 
| 1435 |  |  |  |  |  |  | $coldb->vlog($coldb->{logRequest}, | 
| 1436 |  |  |  |  |  |  | "compare(" | 
| 1437 |  |  |  |  |  |  | .join(', ', | 
| 1438 | 0 |  | 0 |  |  |  | map {"$_->[0]=".quotemeta($_->[1]//'')."'"} | 
| 1439 |  |  |  |  |  |  | ([rel=>$rel], | 
| 1440 | 0 |  |  |  |  |  | (map {["a$_"=>$opts{"a$_"}]} (qw(query date slice))), | 
| 1441 | 0 |  |  |  |  |  | (map {["b$_"=>$opts{"b$_"}]} (qw(query date slice))), | 
| 1442 | 0 |  |  |  |  |  | [groupby=>(UNIVERSAL::isa($opts{groupby},'ARRAY') ? join(',',@{$opts{groupby}}) : $opts{groupby})], | 
| 1443 | 0 | 0 |  |  |  |  | (map {[$_=>$opts{$_}]} qw(score eps kbest cutoff global diff)), | 
|  | 0 |  |  |  |  |  |  | 
| 1444 |  |  |  |  |  |  | )) | 
| 1445 |  |  |  |  |  |  | .")"); | 
| 1446 |  |  |  |  |  |  |  | 
| 1447 |  |  |  |  |  |  | ##-- relation | 
| 1448 | 0 |  |  |  |  |  | my ($reldb); | 
| 1449 | 0 | 0 | 0 |  |  |  | if (!defined($reldb=$coldb->relation($rel||'cof'))) { | 
| 1450 | 0 |  | 0 |  |  |  | $coldb->logwarn($coldb->{error}="profile(): unknown relation '".($rel//'-undef-')."'"); | 
| 1451 | 0 |  |  |  |  |  | return undef; | 
| 1452 |  |  |  |  |  |  | } | 
| 1453 |  |  |  |  |  |  |  | 
| 1454 |  |  |  |  |  |  | ##-- delegate | 
| 1455 | 0 |  |  |  |  |  | return $reldb->compare($coldb,%opts); | 
| 1456 |  |  |  |  |  |  | } | 
| 1457 |  |  |  |  |  |  |  | 
| 1458 |  |  |  |  |  |  | ## \%opts = $CLASS_OR_OBJECT->compareOptions(\%opts) | 
| 1459 |  |  |  |  |  |  | ##  + instantiates default options for compare() method | 
| 1460 |  |  |  |  |  |  | sub compareOptions { | 
| 1461 | 0 |  |  | 0 | 1 |  | my ($that,$opts) = @_; | 
| 1462 |  |  |  |  |  |  |  | 
| 1463 |  |  |  |  |  |  | ##-- defaults and '[ab]OPTION' parsing | 
| 1464 | 0 |  |  |  |  |  | foreach my $ab (qw(a b)) { | 
| 1465 | 0 |  |  |  |  |  | $opts->{"${ab}query"} = ((grep {defined($_)} @$opts{map {"${ab}$_"} qw(query q lemma lem l)}), | 
|  | 0 |  |  |  |  |  |  | 
| 1466 | 0 |  | 0 |  |  |  | (grep {defined($_)} @$opts{qw(query q lemma lem l)}), | 
|  | 0 |  |  |  |  |  |  | 
| 1467 |  |  |  |  |  |  | )[0]//''; | 
| 1468 |  |  |  |  |  |  | } | 
| 1469 | 0 |  |  |  |  |  | foreach my $attr (qw(date slice)) { | 
| 1470 | 0 | 0 |  |  |  |  | $opts->{"a$attr"} = ((map {defined($opts->{"a$_"}) ? $opts->{"a$_"} : qw()} @{$ATTR_RALIAS{$attr}}), | 
|  | 0 |  |  |  |  |  |  | 
| 1471 | 0 | 0 | 0 |  |  |  | (map {defined($opts->{$_})    ? $opts->{$_}    : qw()} @{$ATTR_RALIAS{$attr}}), | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 1472 |  |  |  |  |  |  | )[0]//''; | 
| 1473 | 0 | 0 |  |  |  |  | $opts->{"b$attr"} = ((map {defined($opts->{"b$_"}) ? $opts->{"b$_"} : qw()} @{$ATTR_RALIAS{$attr}}), | 
|  | 0 |  |  |  |  |  |  | 
| 1474 | 0 | 0 | 0 |  |  |  | (map {defined($opts->{$_})    ? $opts->{$_}    : qw()} @{$ATTR_RALIAS{$attr}}), | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 1475 |  |  |  |  |  |  | )[0]//''; | 
| 1476 |  |  |  |  |  |  | } | 
| 1477 | 0 |  |  |  |  |  | delete @$opts{keys %ATTR_ALIAS}; | 
| 1478 |  |  |  |  |  |  |  | 
| 1479 |  |  |  |  |  |  | ##-- diff defaults | 
| 1480 | 0 |  | 0 |  |  |  | $opts->{diff} //= 'adiff'; | 
| 1481 | 0 |  | 0 |  |  |  | $opts->{fill} //= 1; | 
| 1482 |  |  |  |  |  |  |  | 
| 1483 |  |  |  |  |  |  | ##-- common defaults | 
| 1484 | 0 |  |  |  |  |  | $that->profileOptions($opts); | 
| 1485 |  |  |  |  |  |  |  | 
| 1486 | 0 |  |  |  |  |  | return $opts; | 
| 1487 |  |  |  |  |  |  | } | 
| 1488 |  |  |  |  |  |  |  | 
| 1489 |  |  |  |  |  |  | ##============================================================================== | 
| 1490 |  |  |  |  |  |  | ## Footer | 
| 1491 |  |  |  |  |  |  | 1; | 
| 1492 |  |  |  |  |  |  |  | 
| 1493 |  |  |  |  |  |  | __END__ |