| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | ## -*- Mode: CPerl -*- | 
| 2 |  |  |  |  |  |  | ## File: DiaColloDB::Compat::v0_09::Relation::Cofreqs.pm | 
| 3 |  |  |  |  |  |  | ## Author: Bryan Jurish <moocow@cpan.org> | 
| 4 |  |  |  |  |  |  | ## Description: collocation db, profiling relation: co-frequency database (v0.9x format) | 
| 5 |  |  |  |  |  |  |  | 
| 6 |  |  |  |  |  |  | package DiaColloDB::Compat::v0_09::Relation::Cofreqs; | 
| 7 | 1 |  |  | 1 |  | 8 | use DiaColloDB::Compat::v0_09::Relation; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 34 |  | 
| 8 | 1 |  |  | 1 |  | 6 | use DiaColloDB::PackedFile; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 22 |  | 
| 9 | 1 |  |  | 1 |  | 5 | use DiaColloDB::PackedFile::MMap; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 25 |  | 
| 10 | 1 |  |  | 1 |  | 4 | use DiaColloDB::Utils qw(:fcntl :env :run :json :pack); | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 39 |  | 
| 11 | 1 |  |  | 1 |  | 387 | use Fcntl qw(:DEFAULT :seek); | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 33 |  | 
| 12 | 1 |  |  | 1 |  | 452 | use strict; | 
|  | 1 |  |  |  |  | 4 |  | 
|  | 1 |  |  |  |  | 1508 |  | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | ##============================================================================== | 
| 15 |  |  |  |  |  |  | ## Globals & Constants | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | our @ISA = qw(DiaColloDB::Compat::v0_09::Relation); | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | ## $PFCLASS : object class for nested PackedFile objects | 
| 20 |  |  |  |  |  |  | our $PFCLASS = 'DiaColloDB::PackedFile::MMap'; | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | ##============================================================================== | 
| 23 |  |  |  |  |  |  | ## Constructors etc. | 
| 24 |  |  |  |  |  |  |  | 
| 25 |  |  |  |  |  |  | ## $cof = CLASS_OR_OBJECT->new(%args) | 
| 26 |  |  |  |  |  |  | ## + %args, object structure: | 
| 27 |  |  |  |  |  |  | ##   ( | 
| 28 |  |  |  |  |  |  | ##    ##-- user options | 
| 29 |  |  |  |  |  |  | ##    class    => $class,      ##-- optional, useful for debugging from header file | 
| 30 |  |  |  |  |  |  | ##    base     => $basename,   ##-- file basename (default=undef:none); use files "${base}.dba1", "${base}.dba2", "${base}.hdr" | 
| 31 |  |  |  |  |  |  | ##    flags    => $flags,      ##-- fcntl flags or open-mode (default='r') | 
| 32 |  |  |  |  |  |  | ##    perms    => $perms,      ##-- creation permissions (default=(0666 &~umask)) | 
| 33 |  |  |  |  |  |  | ##    dmax     => $dmax,       ##-- maximum distance for co-occurrences (default=5) | 
| 34 |  |  |  |  |  |  | ##    fmin     => $fmin,       ##-- minimum pair frequency (default=0) | 
| 35 |  |  |  |  |  |  | ##    pack_i   => $pack_i,     ##-- pack-template for IDs (default='N') | 
| 36 |  |  |  |  |  |  | ##    pack_f   => $pack_f,     ##-- pack-template for IDs (default='N') | 
| 37 |  |  |  |  |  |  | ##    keeptmp  => $bool,       ##-- keep temporary files? (default=false) | 
| 38 |  |  |  |  |  |  | ##    ## | 
| 39 |  |  |  |  |  |  | ##    ##-- size info (after open() or load()) | 
| 40 |  |  |  |  |  |  | ##    size1    => $size1,      ##-- == $r1->size() | 
| 41 |  |  |  |  |  |  | ##    size2    => $size2,      ##-- == $r2->size() | 
| 42 |  |  |  |  |  |  | ##    ## | 
| 43 |  |  |  |  |  |  | ##    ##-- low-level data | 
| 44 |  |  |  |  |  |  | ##    r1 => $r1,               ##-- pf: [$end2,$f1] @ $i1 | 
| 45 |  |  |  |  |  |  | ##    r2 => $r2,               ##-- pf: [$i2,$f12]  @ end2($i1-1)..(end2($i1)-1) | 
| 46 |  |  |  |  |  |  | ##    N  => $N,                ##-- sum($f12) | 
| 47 |  |  |  |  |  |  | ##   ) | 
| 48 |  |  |  |  |  |  | sub new { | 
| 49 | 0 |  |  | 0 | 1 |  | my $that = shift; | 
| 50 | 0 |  | 0 |  |  |  | my $cof  = bless({ | 
| 51 |  |  |  |  |  |  | base  =>undef, | 
| 52 |  |  |  |  |  |  | flags =>'r', | 
| 53 |  |  |  |  |  |  | perms =>(0666 & ~umask), | 
| 54 |  |  |  |  |  |  | dmax  =>5, | 
| 55 |  |  |  |  |  |  | fmin  =>0, | 
| 56 |  |  |  |  |  |  | pack_i=>'N', | 
| 57 |  |  |  |  |  |  | pack_f=>'N', | 
| 58 |  |  |  |  |  |  | r1 => $PFCLASS->new(), | 
| 59 |  |  |  |  |  |  | r2 => $PFCLASS->new(), | 
| 60 |  |  |  |  |  |  | N  => 0, | 
| 61 |  |  |  |  |  |  | @_ | 
| 62 |  |  |  |  |  |  | }, (ref($that)||$that)); | 
| 63 | 0 |  |  |  |  |  | $cof->{class} = ref($cof); | 
| 64 | 0 | 0 |  |  |  |  | return $cof->open() if (defined($cof->{base})); | 
| 65 | 0 |  |  |  |  |  | return $cof; | 
| 66 |  |  |  |  |  |  | } | 
| 67 |  |  |  |  |  |  |  | 
| 68 |  |  |  |  |  |  | sub DESTROY { | 
| 69 | 0 | 0 |  | 0 |  |  | $_[0]->close() if ($_[0]->opened); | 
| 70 |  |  |  |  |  |  | } | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  | ##============================================================================== | 
| 73 |  |  |  |  |  |  | ## I/O | 
| 74 |  |  |  |  |  |  |  | 
| 75 |  |  |  |  |  |  | ##-------------------------------------------------------------- | 
| 76 |  |  |  |  |  |  | ## I/O: open/close | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | ## $cof_or_undef = $cof->open($base,$flags) | 
| 79 |  |  |  |  |  |  | ## $cof_or_undef = $cof->open($base) | 
| 80 |  |  |  |  |  |  | ## $cof_or_undef = $cof->open() | 
| 81 |  |  |  |  |  |  | sub open { | 
| 82 | 0 |  |  | 0 | 0 |  | my ($cof,$base,$flags) = @_; | 
| 83 | 0 |  | 0 |  |  |  | $base  //= $cof->{base}; | 
| 84 | 0 |  | 0 |  |  |  | $flags //= $cof->{flags}; | 
| 85 | 0 | 0 |  |  |  |  | $cof->close() if ($cof->opened); | 
| 86 | 0 |  |  |  |  |  | $cof->{base}  = $base; | 
| 87 | 0 |  |  |  |  |  | $cof->{flags} = $flags = fcflags($flags); | 
| 88 | 0 | 0 | 0 |  |  |  | if (fcread($flags) && !fctrunc($flags)) { | 
| 89 | 0 | 0 |  |  |  |  | $cof->loadHeader() | 
| 90 |  |  |  |  |  |  | or $cof->logconess("failed to load header from '$cof->{base}.hdr': $!"); | 
| 91 |  |  |  |  |  |  | } | 
| 92 | 0 | 0 |  |  |  |  | $cof->{r1}->open("$base.dba1", $flags, perms=>$cof->{perms}, packas=>"$cof->{pack_i}$cof->{pack_f}") | 
| 93 |  |  |  |  |  |  | or $cof->logconfess("open failed for $base.dba1: $!"); | 
| 94 | 0 | 0 |  |  |  |  | $cof->{r2}->open("$base.dba2", $flags, perms=>$cof->{perms}, packas=>"$cof->{pack_i}$cof->{pack_f}") | 
| 95 |  |  |  |  |  |  | or $cof->logconfess("open failed for $base.dba2: $!"); | 
| 96 | 0 |  |  |  |  |  | $cof->{size1} = $cof->{r1}->size; | 
| 97 | 0 |  |  |  |  |  | $cof->{size2} = $cof->{r2}->size; | 
| 98 |  |  |  |  |  |  |  | 
| 99 |  |  |  |  |  |  | #$cof->debug("open(): opened level-1 relation $cof->{r1}{file}.* as ", ref($cof->{r1})); | 
| 100 |  |  |  |  |  |  | #$cof->debug("open(): opened level-2 relation $cof->{r2}{file}.* as ", ref($cof->{r2})); | 
| 101 | 0 |  |  |  |  |  | return $cof; | 
| 102 |  |  |  |  |  |  | } | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | ## $cof_or_undef = $cof->close() | 
| 105 |  |  |  |  |  |  | sub close { | 
| 106 | 0 |  |  | 0 | 0 |  | my $cof = shift; | 
| 107 | 0 | 0 | 0 |  |  |  | if ($cof->opened && fcwrite($cof->{flags})) { | 
| 108 | 0 | 0 |  |  |  |  | $cof->saveHeader() or return undef; | 
| 109 |  |  |  |  |  |  | } | 
| 110 | 0 | 0 |  |  |  |  | $cof->{r1}->close() or return undef; | 
| 111 | 0 | 0 |  |  |  |  | $cof->{r2}->close() or return undef; | 
| 112 | 0 |  |  |  |  |  | undef $cof->{base}; | 
| 113 | 0 |  |  |  |  |  | return $cof; | 
| 114 |  |  |  |  |  |  | } | 
| 115 |  |  |  |  |  |  |  | 
| 116 |  |  |  |  |  |  | ## $bool = $cof->opened() | 
| 117 |  |  |  |  |  |  | sub opened { | 
| 118 | 0 |  |  | 0 | 0 |  | my $cof = shift; | 
| 119 |  |  |  |  |  |  | return | 
| 120 |  |  |  |  |  |  | (defined($cof->{base}) | 
| 121 |  |  |  |  |  |  | && defined($cof->{r1}) && $cof->{r1}->opened | 
| 122 | 0 |  | 0 |  |  |  | && defined($cof->{r2}) && $cof->{r2}->opened); | 
| 123 |  |  |  |  |  |  | } | 
| 124 |  |  |  |  |  |  |  | 
| 125 |  |  |  |  |  |  | ##-------------------------------------------------------------- | 
| 126 |  |  |  |  |  |  | ## I/O: header | 
| 127 |  |  |  |  |  |  | ##  + largely INHERITED from DiaColloDB::Persistent | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | ## @keys = $cof->headerKeys() | 
| 130 |  |  |  |  |  |  | ##  + keys to save as header | 
| 131 |  |  |  |  |  |  | sub headerKeys { | 
| 132 | 0 |  | 0 | 0 | 1 |  | return grep {!ref($_[0]{$_}) && $_ !~ m{^(?:base|flags|perms)$}} keys %{$_[0]}; | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  | } | 
| 134 |  |  |  |  |  |  |  | 
| 135 |  |  |  |  |  |  | ## $bool = $cof->loadHeaderData($hdr) | 
| 136 |  |  |  |  |  |  | ##  + instantiates header data from $hdr | 
| 137 |  |  |  |  |  |  | ##  + overrides DiaColloDB::Persistent implementation | 
| 138 |  |  |  |  |  |  | sub loadHeaderData { | 
| 139 | 0 |  |  | 0 | 1 |  | my ($cof,$hdr) = @_; | 
| 140 | 0 | 0 | 0 |  |  |  | if (!defined($hdr) && !fccreat($cof->{flags})) { | 
|  |  | 0 |  |  |  |  |  | 
| 141 | 0 |  |  |  |  |  | $cof->logconfess("loadHeaderData() failed to load header data from ", $cof->headerFile, ": $!"); | 
| 142 |  |  |  |  |  |  | } | 
| 143 |  |  |  |  |  |  | elsif (defined($hdr)) { | 
| 144 | 0 |  |  |  |  |  | return $cof->SUPER::loadHeaderData($hdr); | 
| 145 |  |  |  |  |  |  | } | 
| 146 | 0 |  |  |  |  |  | return $cof; | 
| 147 |  |  |  |  |  |  | } | 
| 148 |  |  |  |  |  |  |  | 
| 149 |  |  |  |  |  |  | ## $bool = $enum->saveHeader() | 
| 150 |  |  |  |  |  |  | ##  + inherited from DiaColloDB::Persistent | 
| 151 |  |  |  |  |  |  |  | 
| 152 |  |  |  |  |  |  | ##-------------------------------------------------------------- | 
| 153 |  |  |  |  |  |  | ## I/O: text | 
| 154 |  |  |  |  |  |  | ##  + largely INHERITED from DiaColloDB::Persistent | 
| 155 |  |  |  |  |  |  |  | 
| 156 |  |  |  |  |  |  | ## $bool = $obj->loadTextFile($filename_or_handle, %opts) | 
| 157 |  |  |  |  |  |  | ##  + wraps loadTextFh() | 
| 158 |  |  |  |  |  |  | ##  + INHERITED from DiaColloDB::Persistent | 
| 159 |  |  |  |  |  |  |  | 
| 160 |  |  |  |  |  |  | ## $cof = $cof->loadTextFh($fh,%opts) | 
| 161 |  |  |  |  |  |  | ##  + loads from text file as saved by saveTextFh() | 
| 162 |  |  |  |  |  |  | ##  + supports semi-sorted input: input fh must be sorted by $i1, | 
| 163 |  |  |  |  |  |  | ##    and all $i2 for each $i1 must be adjacent (i.e. no intervening $j1 != $i1) | 
| 164 |  |  |  |  |  |  | ##  + supports multiple lines for pairs ($i1,$i2) provided the above conditions hold | 
| 165 |  |  |  |  |  |  | ##  + supports loading of $cof->{N} from single-value lines | 
| 166 |  |  |  |  |  |  | ##  + %opts: clobber %$cof | 
| 167 |  |  |  |  |  |  | sub loadTextFh { | 
| 168 | 0 |  |  | 0 | 1 |  | my ($cof,$infh,%opts) = @_; | 
| 169 | 0 | 0 |  |  |  |  | if (!ref($cof)) { | 
| 170 | 0 |  |  |  |  |  | $cof = $cof->new(%opts); | 
| 171 |  |  |  |  |  |  | } else { | 
| 172 | 0 |  |  |  |  |  | @$cof{keys %opts} = values %opts; | 
| 173 |  |  |  |  |  |  | } | 
| 174 | 0 | 0 |  |  |  |  | $cof->logconfess("loadTextFh(): cannot load unopened database!") if (!$cof->opened); | 
| 175 |  |  |  |  |  |  |  | 
| 176 |  |  |  |  |  |  | ##-- common variables | 
| 177 | 0 |  |  |  |  |  | my $pack_f   = $cof->{pack_f}; | 
| 178 | 0 |  |  |  |  |  | my $pack_i   = $cof->{pack_i}; | 
| 179 | 0 |  |  |  |  |  | my $pack_r1  = "${pack_i}${pack_f}"; ##-- $r1 : [$end2,$f1] @ $i1 | 
| 180 | 0 |  |  |  |  |  | my $pack_r2  = "${pack_i}${pack_f}"; ##-- $r2 : [$i2,$f12]  @ end2($i1-1)..(end2($i1)-1) | 
| 181 | 0 |  |  |  |  |  | my $len_r2   = packsize($pack_r2); | 
| 182 | 0 |  | 0 |  |  |  | my $fmin     = $cof->{fmin} // 0; | 
| 183 | 0 |  |  |  |  |  | my ($r1,$r2) = @$cof{qw(r1 r2)}; | 
| 184 | 0 |  |  |  |  |  | $r1->truncate(); | 
| 185 | 0 |  |  |  |  |  | $r2->truncate(); | 
| 186 | 0 |  |  |  |  |  | my ($fh1,$fh2) = ($r1->{fh},$r2->{fh}); | 
| 187 |  |  |  |  |  |  |  | 
| 188 |  |  |  |  |  |  | ##-- iteration variables | 
| 189 | 0 |  |  |  |  |  | my ($pos1,$pos2) = (0,0); | 
| 190 | 0 |  |  |  |  |  | my ($i1_cur,$f1) = (-1,0); | 
| 191 | 0 |  |  |  |  |  | my ($f12,$i1,$i2,$f); | 
| 192 | 0 |  |  |  |  |  | my $N  = 0;	  ##-- total marginal frequency as extracted from %f12 | 
| 193 | 0 |  |  |  |  |  | my $N1 = 0;     ##-- total N as extracted from single-element records | 
| 194 | 0 |  |  |  |  |  | my %f12 = qw(); ##-- ($i2=>$f12, ...) for $i1_cur; un-collocated f1 counts appear as $i2='-1' | 
| 195 |  |  |  |  |  |  |  | 
| 196 |  |  |  |  |  |  | ##-- guts for inserting records from $i1_cur,%f12,$pos1,$pos2 | 
| 197 |  |  |  |  |  |  | my $insert = sub { | 
| 198 | 0 | 0 |  | 0 |  |  | if ($i1_cur >= 0) { | 
| 199 | 0 | 0 |  |  |  |  | if ($i1_cur != $pos1) { | 
| 200 |  |  |  |  |  |  | ##-- we've skipped one or more $i1 because it had no collocates (e.g. kern01 i1=287123="Untier/1906") | 
| 201 | 0 |  |  |  |  |  | $fh1->print( pack($pack_r1,$pos2,0) x ($i1_cur-$pos1) ); | 
| 202 |  |  |  |  |  |  | } | 
| 203 |  |  |  |  |  |  | ##-- dump r2-records for $i1_cur | 
| 204 | 0 |  |  |  |  |  | $f1 = 0; | 
| 205 | 0 |  |  |  |  |  | foreach (sort {$a<=>$b} keys %f12) { | 
|  | 0 |  |  |  |  |  |  | 
| 206 | 0 |  |  |  |  |  | $f    = $f12{$_}; | 
| 207 | 0 |  |  |  |  |  | $f1  += $f; | 
| 208 | 0 | 0 | 0 |  |  |  | next if ($f < $fmin || $_ < 0); ##-- skip here so we can track "real" marginal frequencies | 
| 209 | 0 |  |  |  |  |  | $fh2->print(pack($pack_r2, $_,$f)); | 
| 210 | 0 |  |  |  |  |  | ++$pos2; | 
| 211 |  |  |  |  |  |  | } | 
| 212 |  |  |  |  |  |  | ##-- dump r1-record for $i1_cur | 
| 213 | 0 |  |  |  |  |  | $fh1->print(pack($pack_r1, $pos2,$f1)); | 
| 214 | 0 |  |  |  |  |  | $pos1  = $i1_cur+1; | 
| 215 | 0 |  |  |  |  |  | $N    += $f1; | 
| 216 |  |  |  |  |  |  | } | 
| 217 | 0 |  |  |  |  |  | $i1_cur = $i1; | 
| 218 | 0 |  |  |  |  |  | %f12    = qw(); | 
| 219 | 0 |  |  |  |  |  | }; | 
| 220 |  |  |  |  |  |  |  | 
| 221 |  |  |  |  |  |  | ##-- ye olde loope | 
| 222 | 0 |  |  |  |  |  | binmode($infh,':raw'); | 
| 223 | 0 |  |  |  |  |  | while (defined($_=<$infh>)) { | 
| 224 | 0 |  |  |  |  |  | chomp; | 
| 225 | 0 |  |  |  |  |  | ($f12,$i1,$i2) = split(' ',$_,3); | 
| 226 | 0 | 0 |  |  |  |  | if (!defined($i1)) { | 
| 227 |  |  |  |  |  |  | #$cof->debug("N1 += $f12"); | 
| 228 | 0 |  |  |  |  |  | $N1 += $f12;		      ##-- load N values | 
| 229 | 0 |  |  |  |  |  | next; | 
| 230 |  |  |  |  |  |  | } | 
| 231 | 0 | 0 |  |  |  |  | $insert->() if ($i1 != $i1_cur);  ##-- insert record(s) for $i1_cur | 
| 232 | 0 |  | 0 |  |  |  | $f12{($i2//-1)} += $f12;          ##-- buffer co-frequencies for $i1_cur; track un-collocated frequencies as $i2=-1 | 
| 233 |  |  |  |  |  |  | } | 
| 234 | 0 |  |  |  |  |  | $insert->();                        ##-- write record(s) for final $i1_cur | 
| 235 |  |  |  |  |  |  |  | 
| 236 |  |  |  |  |  |  | ##-- adopt final $N and sizes | 
| 237 |  |  |  |  |  |  | #$cof->debug("FINAL: N1=$N1, N=$N"); | 
| 238 | 0 | 0 |  |  |  |  | $cof->{N} = $N1>$N ? $N1 : $N; | 
| 239 | 0 |  |  |  |  |  | $_->remap() foreach (grep {$_->can('remap')} @$cof{qw(r1 r2)}); | 
|  | 0 |  |  |  |  |  |  | 
| 240 | 0 |  |  |  |  |  | $cof->{size1} = $r1->size; | 
| 241 | 0 |  |  |  |  |  | $cof->{size2} = $r2->size; | 
| 242 |  |  |  |  |  |  |  | 
| 243 | 0 |  |  |  |  |  | return $cof; | 
| 244 |  |  |  |  |  |  | } | 
| 245 |  |  |  |  |  |  |  | 
| 246 |  |  |  |  |  |  | ## $cof = $cof->loadTextFile_create($fh,%opts) | 
| 247 |  |  |  |  |  |  | ##  + old version of loadTextFile() which doesn't support N, semi-sorted input, or multiple ($i1,$i2) entries | 
| 248 |  |  |  |  |  |  | ##  + not useable by union() method | 
| 249 |  |  |  |  |  |  | BEGIN { | 
| 250 | 1 |  |  | 1 |  | 10 | *loadTextFile_create = DiaColloDB::Compat->nocompat("loadTextFile_create"); | 
| 251 |  |  |  |  |  |  | } | 
| 252 |  |  |  |  |  |  |  | 
| 253 |  |  |  |  |  |  | ## $bool = $obj->saveTextFile($filename_or_handle, %opts) | 
| 254 |  |  |  |  |  |  | ##  + wraps saveTextFh() | 
| 255 |  |  |  |  |  |  | ##  + INHERITED from DiaColloDB::Persistent | 
| 256 |  |  |  |  |  |  |  | 
| 257 |  |  |  |  |  |  | ## $bool = $cof->saveTextFh($fh,%opts) | 
| 258 |  |  |  |  |  |  | ##  + save from text file with lines of the form: | 
| 259 |  |  |  |  |  |  | ##      N               ##-- 1 field : N | 
| 260 |  |  |  |  |  |  | ##      FREQ ID1        ##-- 2 fields: un-collocated portion of $f1 | 
| 261 |  |  |  |  |  |  | ##      FREQ ID1 ID2    ##-- 3 fields: co-frequency pair (ID2 >= 0) | 
| 262 |  |  |  |  |  |  | ##  + %opts: | 
| 263 |  |  |  |  |  |  | ##      i2s  => \&CODE,   ##-- code-ref for formatting indices; called as $s=CODE($i) | 
| 264 |  |  |  |  |  |  | ##      i2s1 => \&CODE,   ##-- code-ref for formatting item1 indices (overrides 'i2s') | 
| 265 |  |  |  |  |  |  | ##      i2s2 => \&CODE,   ##-- code-ref for formatting item2 indices (overrides 'i2s') | 
| 266 |  |  |  |  |  |  | sub saveTextFh { | 
| 267 | 0 |  |  | 0 | 1 |  | my ($cof,$outfh,%opts) = @_; | 
| 268 | 0 | 0 |  |  |  |  | $cof->logconfess("saveTextFile(): cannot save unopened DB") if (!$cof->opened); | 
| 269 |  |  |  |  |  |  |  | 
| 270 |  |  |  |  |  |  | ##-- common variables | 
| 271 | 0 |  |  |  |  |  | my ($r1,$r2)   = @$cof{qw(r1 r2)}; | 
| 272 | 0 |  |  |  |  |  | my $pack_r1    = $r1->{packas}; | 
| 273 | 0 |  |  |  |  |  | my $pack_r2    = $r2->{packas}; | 
| 274 | 0 |  |  |  |  |  | my $i2s        = $opts{i2s}; | 
| 275 | 0 | 0 |  |  |  |  | my $i2s1       = exists($opts{i2s1}) ? $opts{i2s1} : $i2s; | 
| 276 | 0 | 0 |  |  |  |  | my $i2s2       = exists($opts{i2s2}) ? $opts{i2s2} : $i2s; | 
| 277 | 0 |  |  |  |  |  | my ($fh1,$fh2) = ($r1->{fh},$r2->{fh}); | 
| 278 |  |  |  |  |  |  |  | 
| 279 |  |  |  |  |  |  | ##-- iteration variables | 
| 280 | 0 |  |  |  |  |  | my ($buf1,$i1,$s1,$f1,$end2); | 
| 281 | 0 |  |  |  |  |  | my ($buf2,$off2,$i2,$s2,$f12,$f12sum); | 
| 282 |  |  |  |  |  |  |  | 
| 283 |  |  |  |  |  |  | ##-- ye olde loope | 
| 284 | 0 |  |  |  |  |  | binmode($outfh,':raw'); | 
| 285 | 0 |  |  |  |  |  | $outfh->print($cof->{N}, "\n"); | 
| 286 | 0 |  |  |  |  |  | for ($r1->seek($i1=0), $r2->seek($off2=0); !$r1->eof(); ++$i1) { | 
| 287 | 0 | 0 |  |  |  |  | $r1->read(\$buf1) or $cof->logconfess("saveTextFile(): failed to read record $i1 from $r1->{file}: $!"); | 
| 288 | 0 |  |  |  |  |  | ($end2,$f1) = unpack($pack_r1,$buf1); | 
| 289 | 0 | 0 |  |  |  |  | $s1 = $i2s1 ? $i2s1->($i1) : $i1; | 
| 290 |  |  |  |  |  |  |  | 
| 291 | 0 |  | 0 |  |  |  | for ($f12sum=0; $off2 < $end2 && !$r2->eof(); ++$off2) { | 
| 292 | 0 | 0 |  |  |  |  | $r2->read(\$buf2) or $cof->logconfess("saveTextFile(): failed to read record $off2 from $r2->{file}: $!"); | 
| 293 | 0 |  |  |  |  |  | ($i2,$f12) = unpack($pack_r2,$buf2); | 
| 294 | 0 |  |  |  |  |  | $f12sum   += $f12; | 
| 295 | 0 | 0 |  |  |  |  | $s2        = $i2s2 ? $i2s2->($i2) : $i2; | 
| 296 | 0 |  |  |  |  |  | $outfh->print(join("\t", $f12, $s1,$s2), "\n"); | 
| 297 |  |  |  |  |  |  | } | 
| 298 |  |  |  |  |  |  |  | 
| 299 |  |  |  |  |  |  | ##-- track un-collocated portion of $f1, if any | 
| 300 | 0 | 0 |  |  |  |  | $outfh->print(join("\t", $f1-$f12sum, $s1), "\n") if ($f12sum != $f1); | 
| 301 |  |  |  |  |  |  | } | 
| 302 |  |  |  |  |  |  |  | 
| 303 | 0 |  |  |  |  |  | return $cof; | 
| 304 |  |  |  |  |  |  | } | 
| 305 |  |  |  |  |  |  |  | 
| 306 |  |  |  |  |  |  | ##============================================================================== | 
| 307 |  |  |  |  |  |  | ## Relation API: create | 
| 308 |  |  |  |  |  |  |  | 
| 309 |  |  |  |  |  |  | ## $rel = $CLASS_OR_OBJECT->create($coldb,$tokdat_file,%opts) | 
| 310 |  |  |  |  |  |  | ##  + populates current database from $tokdat_file, | 
| 311 |  |  |  |  |  |  | ##    a tt-style text file containing 1 token-id perl line with optional blank lines | 
| 312 |  |  |  |  |  |  | ##  + %opts: clobber %$ug, also: | 
| 313 |  |  |  |  |  |  | ##    ( | 
| 314 |  |  |  |  |  |  | ##     size=>$size,  ##-- set initial size (number of types) | 
| 315 |  |  |  |  |  |  | ##    ) | 
| 316 |  |  |  |  |  |  | ##  + DISABLED | 
| 317 |  |  |  |  |  |  |  | 
| 318 |  |  |  |  |  |  | ##============================================================================== | 
| 319 |  |  |  |  |  |  | ## Relation API: union | 
| 320 |  |  |  |  |  |  |  | 
| 321 |  |  |  |  |  |  | ## $cof = CLASS_OR_OBJECT->union($coldb, \@pairs, %opts) | 
| 322 |  |  |  |  |  |  | ##  + merge multiple unigram unigram indices from \@pairs into new object | 
| 323 |  |  |  |  |  |  | ##  + @pairs : array of pairs ([$cof,\@xi2u],...) | 
| 324 |  |  |  |  |  |  | ##    of unigram-objects $cof and tuple-id maps \@xi2u for $cof | 
| 325 |  |  |  |  |  |  | ##    - \@xi2u may also be a mapping object supporting a toArray() method | 
| 326 |  |  |  |  |  |  | ##  + %opts: clobber %$cof | 
| 327 |  |  |  |  |  |  | ##  + implicitly flushes the new index | 
| 328 |  |  |  |  |  |  | ##  + DISABLED | 
| 329 |  |  |  |  |  |  |  | 
| 330 |  |  |  |  |  |  | ##============================================================================== | 
| 331 |  |  |  |  |  |  | ## Relation API: dbinfo | 
| 332 |  |  |  |  |  |  |  | 
| 333 |  |  |  |  |  |  | ## \%info = $rel->dbinfo($coldb) | 
| 334 |  |  |  |  |  |  | ##  + embedded info-hash for $coldb->dbinfo() | 
| 335 |  |  |  |  |  |  | sub dbinfo { | 
| 336 | 0 |  |  | 0 | 0 |  | my $cof = shift; | 
| 337 | 0 |  |  |  |  |  | my $info = $cof->SUPER::dbinfo(); | 
| 338 | 0 |  |  |  |  |  | @$info{qw(fmin dmax size1 size2 N)} = @$cof{qw(fmin dmax size1 size2 N)}; | 
| 339 | 0 |  |  |  |  |  | return $info; | 
| 340 |  |  |  |  |  |  | } | 
| 341 |  |  |  |  |  |  |  | 
| 342 |  |  |  |  |  |  |  | 
| 343 |  |  |  |  |  |  | ##============================================================================== | 
| 344 |  |  |  |  |  |  | ## Utilities: lookup | 
| 345 |  |  |  |  |  |  |  | 
| 346 |  |  |  |  |  |  | ## $f = $cof->f1( @ids) | 
| 347 |  |  |  |  |  |  | ## $f = $cof->f1(\@ids) | 
| 348 |  |  |  |  |  |  | ##  + get total marginal unigram frequency (db must be opened) | 
| 349 |  |  |  |  |  |  | sub f1 { | 
| 350 | 0 |  |  | 0 | 0 |  | my $cof = shift; | 
| 351 | 0 | 0 |  |  |  |  | my $ids = UNIVERSAL::isa($_[0],'ARRAY') ? @{$_[0]} : \@_; | 
|  | 0 |  |  |  |  |  |  | 
| 352 | 0 |  |  |  |  |  | my $r1  = $cof->{r1}; | 
| 353 | 0 |  |  |  |  |  | my $f   = 0; | 
| 354 | 0 |  |  |  |  |  | foreach (@$ids) { | 
| 355 | 0 |  |  |  |  |  | $f += $r1->fetch($_)->[1]; | 
| 356 |  |  |  |  |  |  | } | 
| 357 | 0 |  |  |  |  |  | return $f; | 
| 358 |  |  |  |  |  |  | } | 
| 359 |  |  |  |  |  |  |  | 
| 360 |  |  |  |  |  |  | ## $f12 = $cof->f12($id1,$id2) | 
| 361 |  |  |  |  |  |  | ##  + return joint frequency for pair ($id1,$id2) | 
| 362 |  |  |  |  |  |  | ##  + UNUSED | 
| 363 |  |  |  |  |  |  | sub f12 { | 
| 364 | 0 |  |  | 0 | 0 |  | my ($cof,$i1,$i2) = @_; | 
| 365 | 0 | 0 |  |  |  |  | my $beg2 = ($i1==0 ? 0 : $cof->{r1}->fetch($i1-1)->[0]); | 
| 366 | 0 |  |  |  |  |  | my $end2 = $cof->{r1}->fetch($i1)->[0]; | 
| 367 | 0 |  |  |  |  |  | my $pos2 = $cof->{r2}->bsearch($i2, lo=>$beg2, hi=>$end2, packas=>$cof->{pack_i}); | 
| 368 | 0 | 0 |  |  |  |  | return defined($pos2) ? $cof->{r2}->fetch($pos2)->[1] : 0; | 
| 369 |  |  |  |  |  |  | } | 
| 370 |  |  |  |  |  |  |  | 
| 371 |  |  |  |  |  |  | ##============================================================================== | 
| 372 |  |  |  |  |  |  | ## Relation API: default: profiling | 
| 373 |  |  |  |  |  |  |  | 
| 374 |  |  |  |  |  |  | ## $prf = $cof->subprofile1(\@xids,%opts) | 
| 375 |  |  |  |  |  |  | ##  + get joint co-frequency profile for @xids (db must be opened; f1 and f12 only) | 
| 376 |  |  |  |  |  |  | ##  + %opts: | 
| 377 |  |  |  |  |  |  | ##     groupby => \&gbsub,  ##-- key-extractor $key2_or_undef = $gbsub->($i2) | 
| 378 |  |  |  |  |  |  | ##     coldb   => $coldb,   ##-- for debugging | 
| 379 |  |  |  |  |  |  | ##     onepass => $bool,    ##-- use fast but incorrect 1-pass method? | 
| 380 |  |  |  |  |  |  | sub subprofile1 { | 
| 381 | 0 |  |  | 0 | 1 |  | my ($cof,$ids,%opts) = @_; | 
| 382 |  |  |  |  |  |  |  | 
| 383 | 0 | 0 |  |  |  |  | $ids   = [$ids] if (!UNIVERSAL::isa($ids,'ARRAY')); | 
| 384 | 0 |  |  |  |  |  | my $r1 = $cof->{r1}; | 
| 385 | 0 |  |  |  |  |  | my $r2 = $cof->{r2}; | 
| 386 | 0 |  |  |  |  |  | my $pack1 = $r1->{packas}; | 
| 387 | 0 |  |  |  |  |  | my $pack2 = $r2->{packas}; | 
| 388 | 0 |  |  |  |  |  | my $pack1i = $cof->{pack_i}; | 
| 389 | 0 |  |  |  |  |  | my $pack1f = "@".packsize($cof->{pack_i}).$cof->{pack_f}; | 
| 390 | 0 |  | 0 |  |  |  | my $size1  = $cof->{size1} // ($cof->{size1}=$r1->size); | 
| 391 | 0 |  | 0 |  |  |  | my $size2  = $cof->{size2} // ($cof->{size2}=$r2->size); | 
| 392 | 0 |  |  |  |  |  | my $groupby = $opts{groupby}; | 
| 393 | 0 |  |  |  |  |  | my $pack_id = $opts{coldb}{pack_id}; | 
| 394 | 0 |  |  |  |  |  | my $onepass = $opts{onepass}; | 
| 395 | 0 |  |  |  |  |  | my $pf1 = 0; | 
| 396 | 0 |  |  |  |  |  | my $pf12 = {}; | 
| 397 | 0 |  |  |  |  |  | my $pf2  = {}; | 
| 398 | 0 |  |  |  |  |  | my ($i1,$i2,$key2, $beg2,$end2,$pos2, $f1,$f12, $buf, %i2); | 
| 399 |  |  |  |  |  |  |  | 
| 400 | 0 |  |  |  |  |  | foreach $i1 (@$ids) { | 
| 401 | 0 | 0 |  |  |  |  | next if ($i1 >= $size1); | 
| 402 | 0 | 0 |  |  |  |  | $beg2       = ($i1==0 ? 0 : unpack($pack1i,$r1->fetchraw($i1-1,\$buf))); | 
| 403 | 0 |  |  |  |  |  | ($end2,$f1) = unpack($pack1, $r1->fetchraw($i1,\$buf)); | 
| 404 |  |  |  |  |  |  |  | 
| 405 | 0 |  |  |  |  |  | $pf1       += $f1; | 
| 406 | 0 | 0 |  |  |  |  | next if ($beg2 >= $size2); | 
| 407 | 0 |  |  |  |  |  | for ($r2->seek($beg2), $pos2=$beg2; $pos2 < $end2; ++$pos2) { | 
| 408 | 0 | 0 |  |  |  |  | $r2->getraw(\$buf) or last; | 
| 409 | 0 |  |  |  |  |  | ($i2,$f12)    = unpack($pack2, $buf); | 
| 410 | 0 | 0 |  |  |  |  | $key2         = $groupby ? $groupby->($i2) : pack($pack_id,$i2); | 
| 411 | 0 | 0 |  |  |  |  | next if (!defined($key2)); ##-- item2 selection via groupby CODE-ref | 
| 412 | 0 |  |  |  |  |  | $pf12->{$key2} += $f12; | 
| 413 | 0 | 0 | 0 |  |  |  | if ($onepass && !exists($i2{$i2})) { | 
| 414 | 0 |  |  |  |  |  | $pf2->{$key2} += unpack($pack1f, $r1->fetchraw($i2,\$buf)); ##-- avoid double-counting f2 for shared collocates | 
| 415 | 0 |  |  |  |  |  | $i2{$i2}       = undef; | 
| 416 |  |  |  |  |  |  | } | 
| 417 |  |  |  |  |  |  | } | 
| 418 |  |  |  |  |  |  | } | 
| 419 |  |  |  |  |  |  | return DiaColloDB::Profile->new( | 
| 420 |  |  |  |  |  |  | N=>$cof->{N}, | 
| 421 | 0 |  |  |  |  |  | f1=>$pf1, | 
| 422 |  |  |  |  |  |  | f2=>$pf2, | 
| 423 |  |  |  |  |  |  | f12=>$pf12, | 
| 424 |  |  |  |  |  |  | ); | 
| 425 |  |  |  |  |  |  | } | 
| 426 |  |  |  |  |  |  |  | 
| 427 |  |  |  |  |  |  | ##  \%slice2prf = $rel->subprofile2(\%slice2prf, %opts) | 
| 428 |  |  |  |  |  |  | ##  + populate f2 frequencies for profiles in \%slice2prf | 
| 429 |  |  |  |  |  |  | ##  + %opts: | 
| 430 |  |  |  |  |  |  | ##     groupby => \%gbreq,  ##-- parsed groupby object | 
| 431 |  |  |  |  |  |  | ##     a2data  => \%a2data, ##-- maps indexed attributes to associated datastructures | 
| 432 |  |  |  |  |  |  | ##     coldb   => $coldb,   ##-- parent DiaColloDB object (for shared data, debugging) | 
| 433 |  |  |  |  |  |  | ##     ...                  ##-- other options as for profile(), esp. qw(slice) | 
| 434 |  |  |  |  |  |  | sub subprofile2 { | 
| 435 | 0 |  |  | 0 | 1 |  | my ($cof,$slice2prf,%opts) = @_; | 
| 436 |  |  |  |  |  |  |  | 
| 437 |  |  |  |  |  |  | ##-- vars: common | 
| 438 | 0 |  |  |  |  |  | my $coldb   = $opts{coldb}; | 
| 439 | 0 |  |  |  |  |  | my $groupby = $opts{groupby}; | 
| 440 | 0 |  |  |  |  |  | my $a2data  = $opts{a2data}; | 
| 441 | 0 |  |  |  |  |  | my $slice   = $opts{slice}; | 
| 442 |  |  |  |  |  |  | #my $slices  = $opts{slices} || [sort {$a<=>$b} keys %$slice2prf]; | 
| 443 | 0 |  |  |  |  |  | my ($dfilter,$slo,$shi,$dlo,$dhi) = $coldb->parseDateRequest(@opts{qw(date slice fill)}); | 
| 444 | 0 |  | 0 |  |  |  | my $filter_by_date = $slice || defined($dlo) || defined($dhi); | 
| 445 |  |  |  |  |  |  |  | 
| 446 |  |  |  |  |  |  | ##-- vars: relation-wise | 
| 447 | 0 |  |  |  |  |  | my $r1       = $cof->{r1}; | 
| 448 | 0 |  |  |  |  |  | my $pack_r1f = '@'.packsize($cof->{pack_i}).$cof->{pack_f}; | 
| 449 |  |  |  |  |  |  |  | 
| 450 |  |  |  |  |  |  | ##-- get "most specific projected attribute" ("MSPA"): that projected attribute with largest enum | 
| 451 |  |  |  |  |  |  | #my $gb1      = scalar(@{$groupby->{attrs}})==1; ##-- are we grouping by a single attribute? -->optimize! | 
| 452 | 0 |  |  |  |  |  | my $mspai    = (sort {$b->[1]<=>$a->[1]} map {[$_,$a2data->{$groupby->{attrs}[$_]}{enum}->size]} (0..$#{$groupby->{attrs}}))[0][0]; | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 453 | 0 |  |  |  |  |  | my $mspa     = $groupby->{attrs}[$mspai]; | 
| 454 | 0 |  |  |  |  |  | my $mspgpack = $groupby->{gpack}[$mspai]; | 
| 455 | 0 |  |  |  |  |  | my $mspxpack = $groupby->{xpack}[$mspai]; | 
| 456 | 0 |  |  |  |  |  | my $msp2x = $a2data->{$mspa}{a2x}; | 
| 457 | 0 |  |  |  |  |  | my %mspv  = qw(); ##-- checked MSPA-values ($mspvi) | 
| 458 | 0 |  |  |  |  |  | my $xenum = $coldb->{xenum}; | 
| 459 | 0 |  |  |  |  |  | my $pack_xd = "@".(packsize($coldb->{pack_id}) * scalar(@{$coldb->{attrs}})).$coldb->{pack_date}; | 
|  | 0 |  |  |  |  |  |  | 
| 460 | 0 |  |  |  |  |  | my $xs2g    = $groupby->{xs2g}; | 
| 461 | 0 |  |  |  |  |  | my $debug_xp2g = join('',@{$groupby->{xpack}}); | 
|  | 0 |  |  |  |  |  |  | 
| 462 | 0 |  |  |  |  |  | my $debug_gpack= "($coldb->{pack_id})*"; | 
| 463 | 0 |  |  |  |  |  | my ($prf,$pf12, $mspvi,$i2,$x2,$d2,$ds2,$prf2,$key2, $buf,$f2); | 
| 464 | 0 | 0 |  |  |  |  | $prf2     = (values %$slice2prf)[0] if (!$filter_by_date); | 
| 465 | 0 |  |  |  |  |  | foreach $prf (values %$slice2prf) { | 
| 466 | 0 |  |  |  |  |  | $pf12 = $prf->{f12}; | 
| 467 | 0 |  |  |  |  |  | foreach (keys %$pf12) { | 
| 468 | 0 |  |  |  |  |  | $mspvi = unpack($mspgpack,$_); | 
| 469 | 0 | 0 |  |  |  |  | next if (exists $mspv{$mspvi}); | 
| 470 | 0 |  |  |  |  |  | $mspv{$mspvi} = undef; | 
| 471 | 0 |  |  |  |  |  | foreach $i2 (@{$msp2x->fetch($mspvi)}) { | 
|  | 0 |  |  |  |  |  |  | 
| 472 |  |  |  |  |  |  | ##-- get item2 x-tuple | 
| 473 | 0 |  |  |  |  |  | $x2  = $xenum->i2s($i2); | 
| 474 |  |  |  |  |  |  |  | 
| 475 | 0 | 0 |  |  |  |  | if ($filter_by_date) { | 
| 476 |  |  |  |  |  |  | ##-- extract item2 date slice | 
| 477 | 0 |  |  |  |  |  | $d2  = unpack($pack_xd, $x2); | 
| 478 | 0 | 0 |  |  |  |  | $ds2 = $slice ? int($d2/$slice)*$slice : 0; | 
| 479 |  |  |  |  |  |  |  | 
| 480 |  |  |  |  |  |  | ##-- ignore if item2 slice isn't in our target range | 
| 481 | 0 | 0 | 0 |  |  |  | next if (!defined($prf2=$slice2prf->{$ds2}) | 
|  |  |  | 0 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
| 482 |  |  |  |  |  |  | || (defined($dlo) && $d2 < $dlo) | 
| 483 |  |  |  |  |  |  | || (defined($dhi) && $d2 > $dhi)); | 
| 484 |  |  |  |  |  |  | } | 
| 485 |  |  |  |  |  |  |  | 
| 486 |  |  |  |  |  |  | ##-- get groupby-key from x-tuple string & check for item2 membership in the appropriate slice-profile | 
| 487 | 0 | 0 |  |  |  |  | $key2 = $xs2g ? $xs2g->($x2) : pack($mspgpack, $i2); | 
| 488 |  |  |  |  |  |  | #$key2 = pack($debug_gpack, unpack($debug_xp2g, $x2)); ##-- ca. 6% faster than $xs2g, no having-checks | 
| 489 |  |  |  |  |  |  | #$key2 = pack($debug_gpack, $mspvi); ##-- ca. 12% faster than $xs2g, no having-checks, only valid for groupby single-attribute | 
| 490 | 0 | 0 | 0 |  |  |  | next if (!defined($key2) || !exists($prf2->{f12}{$key2})); ##-- having()-failure or no item2 in target slice | 
| 491 |  |  |  |  |  |  |  | 
| 492 |  |  |  |  |  |  | ##-- add item2 frequency | 
| 493 | 0 |  |  |  |  |  | $f2 = unpack($pack_r1f, $r1->fetchraw($i2,\$buf)); | 
| 494 | 0 |  |  |  |  |  | $prf2->{f2}{$key2} += $f2; | 
| 495 |  |  |  |  |  |  | } | 
| 496 |  |  |  |  |  |  | } | 
| 497 |  |  |  |  |  |  | } | 
| 498 |  |  |  |  |  |  |  | 
| 499 | 0 |  |  |  |  |  | return $slice2prf; | 
| 500 |  |  |  |  |  |  | } | 
| 501 |  |  |  |  |  |  |  | 
| 502 |  |  |  |  |  |  |  | 
| 503 |  |  |  |  |  |  | ##============================================================================== | 
| 504 |  |  |  |  |  |  | ## Relation API: default: query info | 
| 505 |  |  |  |  |  |  |  | 
| 506 |  |  |  |  |  |  | ## \%qinfo = $rel->qinfo($coldb, %opts) | 
| 507 |  |  |  |  |  |  | ##  + get query-info hash for profile administrivia (ddc hit links) | 
| 508 |  |  |  |  |  |  | ##  + %opts: as for profile(), additionally: | 
| 509 |  |  |  |  |  |  | ##    ( | 
| 510 |  |  |  |  |  |  | ##     qreqs => \@qreqs,      ##-- as returned by $coldb->parseRequest($opts{query}) | 
| 511 |  |  |  |  |  |  | ##     gbreq => \%groupby,    ##-- as returned by $coldb->groupby($opts{groupby}) | 
| 512 |  |  |  |  |  |  | ##    ) | 
| 513 |  |  |  |  |  |  | sub qinfo { | 
| 514 | 0 |  |  | 0 | 1 |  | my ($rel,$coldb,%opts) = @_; | 
| 515 | 0 |  |  |  |  |  | my ($q1strs,$q2strs,$qxstrs,$fstrs) = $rel->qinfoData($coldb,%opts); | 
| 516 |  |  |  |  |  |  |  | 
| 517 | 0 | 0 |  |  |  |  | my $q1str = '('.(@$q1strs ? join(' WITH ', @$q1strs,@$qxstrs) : '*').') =1'; | 
| 518 | 0 | 0 |  |  |  |  | my $q2str = '('.(@$q2strs ? join(' WITH ', @$q2strs,@$qxstrs) : '*').') =2'; | 
| 519 |  |  |  |  |  |  | my $qstr = ( | 
| 520 |  |  |  |  |  |  | #"$q1str && $q2str" ##-- approximate with &&-query (especially buggy since #sep doesn't work right here; see mantis bug #654) | 
| 521 | 0 | 0 |  |  |  |  | "NEAR( $q1str, $q2str, ".(2*($rel->{dmax}-1)).")" | 
| 522 |  |  |  |  |  |  | .' #SEPARATE' | 
| 523 |  |  |  |  |  |  | .(@$fstrs ? (' '.join(' ',@$fstrs)) : ''), | 
| 524 |  |  |  |  |  |  | ); | 
| 525 |  |  |  |  |  |  | return { | 
| 526 |  |  |  |  |  |  | fcoef => 2*$rel->{dmax}, | 
| 527 | 0 |  |  |  |  |  | qtemplate => $qstr, | 
| 528 |  |  |  |  |  |  | }; | 
| 529 |  |  |  |  |  |  | } | 
| 530 |  |  |  |  |  |  |  | 
| 531 |  |  |  |  |  |  | ##============================================================================== | 
| 532 |  |  |  |  |  |  | ## Pacakge Alias(es) | 
| 533 |  |  |  |  |  |  | package DiaColloDB::Compat::v0_09::Cofreqs; | 
| 534 | 1 |  |  | 1 |  | 10 | use strict; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 48 |  | 
| 535 |  |  |  |  |  |  | our @ISA = qw(DiaColloDB::Compat::v0_09::Relation::Cofreqs); | 
| 536 |  |  |  |  |  |  |  | 
| 537 |  |  |  |  |  |  | ##============================================================================== | 
| 538 |  |  |  |  |  |  | ## Footer | 
| 539 |  |  |  |  |  |  | 1; | 
| 540 |  |  |  |  |  |  |  | 
| 541 |  |  |  |  |  |  | __END__ |