| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | ## -*- Mode: CPerl -*- | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | ## File: DDC::Client.pm | 
| 4 |  |  |  |  |  |  | ## Author: Bryan Jurish | 
| 5 |  |  |  |  |  |  | ## Description: | 
| 6 |  |  |  |  |  |  | ##  + DDC Query utilities: client sockets | 
| 7 |  |  |  |  |  |  | ##====================================================================== | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | package DDC::Client; | 
| 10 | 26 |  |  | 26 |  | 10700 | use DDC::Utils qw(:escape); | 
|  | 26 |  |  |  |  | 69 |  | 
|  | 26 |  |  |  |  | 3983 |  | 
| 11 | 26 |  |  | 26 |  | 185 | use DDC::HitList; | 
|  | 26 |  |  |  |  | 58 |  | 
|  | 26 |  |  |  |  | 480 |  | 
| 12 | 26 |  |  | 26 |  | 133 | use DDC::Hit; | 
|  | 26 |  |  |  |  | 46 |  | 
|  | 26 |  |  |  |  | 473 |  | 
| 13 | 26 |  |  | 26 |  | 15940 | use IO::Handle; | 
|  | 26 |  |  |  |  | 168433 |  | 
|  | 26 |  |  |  |  | 1211 |  | 
| 14 | 26 |  |  | 26 |  | 12859 | use IO::File; | 
|  | 26 |  |  |  |  | 51522 |  | 
|  | 26 |  |  |  |  | 2862 |  | 
| 15 | 26 |  |  | 26 |  | 13996 | use IO::Socket::INET; | 
|  | 26 |  |  |  |  | 391671 |  | 
|  | 26 |  |  |  |  | 176 |  | 
| 16 | 26 |  |  | 26 |  | 27112 | use Encode qw(encode decode); | 
|  | 26 |  |  |  |  | 277139 |  | 
|  | 26 |  |  |  |  | 2040 |  | 
| 17 | 26 |  |  | 26 |  | 223 | use Carp; | 
|  | 26 |  |  |  |  | 62 |  | 
|  | 26 |  |  |  |  | 1424 |  | 
| 18 | 26 |  |  | 26 |  | 169 | use strict; | 
|  | 26 |  |  |  |  | 55 |  | 
|  | 26 |  |  |  |  | 1665 |  | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | ##====================================================================== | 
| 21 |  |  |  |  |  |  | ## Globals | 
| 22 |  |  |  |  |  |  |  | 
| 23 |  |  |  |  |  |  | ## $ifmt | 
| 24 |  |  |  |  |  |  | ## + pack format to use for integer sizes passed to and from DDC | 
| 25 |  |  |  |  |  |  | ## + default value should be right for ddc-2.x (always 32-bit unsigned little endian) | 
| 26 |  |  |  |  |  |  | ## + for ddc-1.x, use machine word size and endian-ness of server | 
| 27 |  |  |  |  |  |  | our $ifmt = 'V'; | 
| 28 |  |  |  |  |  |  |  | 
| 29 |  |  |  |  |  |  | ## $ilen | 
| 30 |  |  |  |  |  |  | ## + length in bytes of message size integer used for DDC protocol in bytes | 
| 31 |  |  |  |  |  |  | ## + default value should be right for ddc-2.x (always 32-bit unsigned little endian) | 
| 32 |  |  |  |  |  |  | ## + for ddc-1.x, use machine word size and endian-ness of server | 
| 33 |  |  |  |  |  |  | our $ilen = 4; | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  | ## $JSON_BACKEND | 
| 36 |  |  |  |  |  |  | ## + underlying JSON module (default='JSON') | 
| 37 |  |  |  |  |  |  | our ($JSON_BACKEND); | 
| 38 |  |  |  |  |  |  | BEGIN { | 
| 39 | 26 | 50 |  | 26 |  | 168386 | $JSON_BACKEND = 'JSON' if (!defined($JSON_BACKEND)); | 
| 40 |  |  |  |  |  |  | } | 
| 41 |  |  |  |  |  |  |  | 
| 42 |  |  |  |  |  |  | ##====================================================================== | 
| 43 |  |  |  |  |  |  | ## Constructors etc | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | ## $dc = $CLASS_OR_OBJ->new(%args) | 
| 46 |  |  |  |  |  |  | ##  + %args: | 
| 47 |  |  |  |  |  |  | ##    ( | 
| 48 |  |  |  |  |  |  | ##     ##-- connection options | 
| 49 |  |  |  |  |  |  | ##     connect=>\%connectArgs,  ##-- passed to IO::Socket::(INET|UNIX)->new(); also accepts connect=>$connectURL | 
| 50 |  |  |  |  |  |  | ##     mode   =>$queryMode,     ##-- one of 'table', 'html', 'text', 'json', or 'raw'; default='json' ('html' is not yet supported) | 
| 51 |  |  |  |  |  |  | ##     linger =>\@linger,       ##-- SO_LINGER socket option; default=[1,0]: immediate termination | 
| 52 |  |  |  |  |  |  | ##     ## | 
| 53 |  |  |  |  |  |  | ##     ##-- query options (formerly only in DDC::Client::Distributed) | 
| 54 |  |  |  |  |  |  | ##     start    =>$start,       ##-- index of first hit to fetch (default=0) | 
| 55 |  |  |  |  |  |  | ##     limit    =>$limit,       ##-- maximum number of hits to fetch (default=10) | 
| 56 |  |  |  |  |  |  | ##     timeout  =>$secs,        ##-- query timeout in seconds (lower bound, default=60) | 
| 57 |  |  |  |  |  |  | ##     hint     =>$hint,        ##-- navigation hint (optional; default=undef: none) | 
| 58 |  |  |  |  |  |  | ##     ## | 
| 59 |  |  |  |  |  |  | ##     ##-- hit parsing options (mostly obsolete) | 
| 60 |  |  |  |  |  |  | ##     parseMeta=>$bool,        ##-- if true, hit metadata will be parsed to %$hit (default=1) | 
| 61 |  |  |  |  |  |  | ##     parseContext=>$bool,     ##-- if true, hit context data will be parsed to $hit->{ctx_} (default=1) | 
| 62 |  |  |  |  |  |  | ##     metaNames =>\@names,     ##-- metadata field names (default=undef (none)) | 
| 63 |  |  |  |  |  |  | ##     expandFields => $bool,   ##-- whether to implicitly expand hit fields to HASH-refs (default=true; only valid for 'table' mode) | 
| 64 |  |  |  |  |  |  | ##     keepRaw=>$bool,          ##-- if false, raw context buffer will be deleted after parsing context data (default=false) | 
| 65 |  |  |  |  |  |  | ##     #defaultField => $name,   ##-- default field names (default='w') | 
| 66 |  |  |  |  |  |  | ## | 
| 67 |  |  |  |  |  |  | ##     fieldSeparator => $char, ##-- intra-token field separator (default="\x{1f}": ASCII unit separator) | 
| 68 |  |  |  |  |  |  | ##     tokenSeparator => $char, ##-- inter-token separator       (default="\x{1e}": ASCII record separator) | 
| 69 |  |  |  |  |  |  | ## | 
| 70 |  |  |  |  |  |  | ##     textHighlight => [$l0,$r0,$l1,$r1],  ##-- highlighting strings, text mode (default=[qw(&& && _& &_)]) | 
| 71 |  |  |  |  |  |  | ##     htmlHighlight => [$l0,$r0,$l1,$r1],  ##-- highlighting strings, html mode (default=[('','') x 2]) | 
| 72 |  |  |  |  |  |  | ##     tableHighlight => [$l0,$r0,$l1,$r1], ##-- highlighting strings, table mode (default=[qw(&& && _& &_)]) | 
| 73 |  |  |  |  |  |  | ##    ) | 
| 74 |  |  |  |  |  |  | ##  + default \%connectArgs: | 
| 75 |  |  |  |  |  |  | ##     Domain=>'INET',          ##-- also accepts 'UNIX' | 
| 76 |  |  |  |  |  |  | ##     PeerAddr=>'localhost', | 
| 77 |  |  |  |  |  |  | ##     PeerPort=>50000, | 
| 78 |  |  |  |  |  |  | ##     Proto=>'tcp', | 
| 79 |  |  |  |  |  |  | ##     Type=>SOCK_STREAM, | 
| 80 |  |  |  |  |  |  | ##     Blocking=>1, | 
| 81 |  |  |  |  |  |  | ##  + URL specification of \%connectArgs via connect=>{url=>$url} or connect=>$url (see parseAddr() method): | 
| 82 |  |  |  |  |  |  | ##     inet://ADDR:PORT?OPT=VAL...	# canonical INET socket URL | 
| 83 |  |  |  |  |  |  | ##     unix://UNIX_PATH?OPT=VAL...	# canonical UNIX socket URL | 
| 84 |  |  |  |  |  |  | ##     unix:UNIX_PATH?OPT=VAL...	# = unix://UNIX_PATH?OPT=val | 
| 85 |  |  |  |  |  |  | ##     ADDR?OPT=VAL...			# = inet://ADDR:5000?OPT=VAL... | 
| 86 |  |  |  |  |  |  | ##     :PORT?OPT=VAL...			# = inet://localhost:PORT?OPT=VAL... | 
| 87 |  |  |  |  |  |  | ##     ADDR:PORT?OPT=VAL...		# = inet://ADDR:PORT?OPT=VAL... | 
| 88 |  |  |  |  |  |  | ##     /UNIX_PATH?OPT=VAL...		# = unix:///UNIX_PATH?POT=VAL... | 
| 89 |  |  |  |  |  |  | sub new { | 
| 90 | 0 |  |  | 0 | 1 |  | my ($that,%args) = @_; | 
| 91 | 0 |  |  |  |  |  | my @connect_args = grep {exists $args{$_}} map {($_,lc($_),uc($_))} qw(Peer PeerAddr PeerPort Url); | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 92 |  |  |  |  |  |  | my $connect = $that->parseAddr | 
| 93 |  |  |  |  |  |  | ({ | 
| 94 |  |  |  |  |  |  | ##-- connect: default options | 
| 95 |  |  |  |  |  |  | Domain=>'INET', | 
| 96 |  |  |  |  |  |  | PeerAddr=>'localhost', | 
| 97 |  |  |  |  |  |  | PeerPort=>50000, | 
| 98 |  |  |  |  |  |  | Proto=>'tcp', | 
| 99 |  |  |  |  |  |  | Type=>SOCK_STREAM, | 
| 100 |  |  |  |  |  |  | Blocking=>1, | 
| 101 |  |  |  |  |  |  |  | 
| 102 |  |  |  |  |  |  | ##-- connect: user args | 
| 103 |  |  |  |  |  |  | (ref($args{'connect'}) | 
| 104 | 0 |  |  |  |  |  | ? %{$args{'connect'}} | 
| 105 |  |  |  |  |  |  | : ($args{connect} | 
| 106 | 0 |  |  |  |  |  | ? %{$that->parseAddr($args{connect})} | 
| 107 |  |  |  |  |  |  | : qw())), | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | ##-- connect: top-level args | 
| 110 | 0 | 0 |  |  |  |  | (map {($_=>$args{$_})} @connect_args), | 
|  | 0 | 0 |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | }); | 
| 112 | 0 |  |  |  |  |  | delete @args{'connect',@connect_args}; | 
| 113 |  |  |  |  |  |  |  | 
| 114 | 0 |  | 0 |  |  |  | my $dc =bless { | 
| 115 |  |  |  |  |  |  | ##-- connection options | 
| 116 |  |  |  |  |  |  | connect=> $connect, | 
| 117 |  |  |  |  |  |  | linger => [1,0], | 
| 118 |  |  |  |  |  |  | mode   =>'json', | 
| 119 |  |  |  |  |  |  | encoding => 'UTF-8', | 
| 120 |  |  |  |  |  |  |  | 
| 121 |  |  |  |  |  |  | ##-- query options (formerly in DDC::Client::Distributed) | 
| 122 |  |  |  |  |  |  | start=>0, | 
| 123 |  |  |  |  |  |  | limit=>10, | 
| 124 |  |  |  |  |  |  | timeout=>60, | 
| 125 |  |  |  |  |  |  | hint=>undef, | 
| 126 |  |  |  |  |  |  |  | 
| 127 |  |  |  |  |  |  | ##-- hit-parsing options | 
| 128 |  |  |  |  |  |  | parseMeta=>1, | 
| 129 |  |  |  |  |  |  | parseContext=>1, | 
| 130 |  |  |  |  |  |  | expandFields=>1, | 
| 131 |  |  |  |  |  |  | keepRaw=>0, | 
| 132 |  |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  | #fieldSeparator => "\x{1f}", | 
| 134 |  |  |  |  |  |  | #tokenSeparator => "\x{1e}", | 
| 135 |  |  |  |  |  |  | #defaultField => 'w', | 
| 136 |  |  |  |  |  |  | #metaNames => undef, | 
| 137 |  |  |  |  |  |  | #textHighlight=>undef, | 
| 138 |  |  |  |  |  |  | #tableHighlight=>undef, | 
| 139 |  |  |  |  |  |  | #htmlHighlight=>undef, | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  | %args, | 
| 142 |  |  |  |  |  |  | }, ref($that)||$that; | 
| 143 |  |  |  |  |  |  |  | 
| 144 | 0 | 0 |  |  |  |  | if (defined($args{optFile})) { | 
| 145 |  |  |  |  |  |  | $dc->loadOptFile($args{optFile}) | 
| 146 | 0 | 0 |  |  |  |  | or confess(__PACKAGE__ . "::new(): could not load options file '$args{optFile}': $!"); | 
| 147 |  |  |  |  |  |  | } | 
| 148 |  |  |  |  |  |  |  | 
| 149 | 0 | 0 |  |  |  |  | $dc->{fieldSeparator} = "\x{1f}" if (!$dc->{fieldSeparator}); | 
| 150 | 0 | 0 |  |  |  |  | $dc->{tokenSeparator} = "\x{1e}" if (!$dc->{tokenSeparator}); | 
| 151 | 0 | 0 |  |  |  |  | $dc->{textHighlight} = [qw(&& && _& &_)] if (!$dc->{textHighlight}); | 
| 152 | 0 | 0 |  |  |  |  | $dc->{tableHighlight} = [qw(&& && _& &_)] if (!$dc->{tableHighlight}); | 
| 153 |  |  |  |  |  |  | $dc->{htmlHighlight} = [ | 
| 154 |  |  |  |  |  |  | '','', | 
| 155 |  |  |  |  |  |  | '','', | 
| 156 | 0 | 0 |  |  |  |  | ] if (!$dc->{htmlHighlight}); | 
| 157 |  |  |  |  |  |  |  | 
| 158 | 0 |  |  |  |  |  | return $dc; | 
| 159 |  |  |  |  |  |  | } | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | ##====================================================================== | 
| 162 |  |  |  |  |  |  | ## DDC *.opt file | 
| 163 |  |  |  |  |  |  |  | 
| 164 |  |  |  |  |  |  | ## $dc = $dc->loadOptFile($filename, %opts); | 
| 165 |  |  |  |  |  |  | ## $dc = $dc->loadOptFile($fh,       %opts); | 
| 166 |  |  |  |  |  |  | ## $dc = $dc->loadOptFile(\$str,     %opts); | 
| 167 |  |  |  |  |  |  | ##  Sets client options from a DDC *.opt file: #fieldNames, metaNames, fieldSeparator. | 
| 168 |  |  |  |  |  |  | ##  %opts: | 
| 169 |  |  |  |  |  |  | ##  ( | 
| 170 |  |  |  |  |  |  | ##   clobber => $bool,  ##-- whether to clobber existing %$dc fields (default=false) | 
| 171 |  |  |  |  |  |  | ##  ) | 
| 172 |  |  |  |  |  |  | ## | 
| 173 |  |  |  |  |  |  | ##  WARNING: for whatever reason, DDC does not return metadata fields in the same | 
| 174 |  |  |  |  |  |  | ##   order in which they appeared in the *.opt file (nor in any lexicographic order | 
| 175 |  |  |  |  |  |  | ##   combination of the fields type, name, and xpath of the 'Bibl' directorive I | 
| 176 |  |  |  |  |  |  | ##   have tried), BUT this code assumes that the order in which the 'Bibl' directives | 
| 177 |  |  |  |  |  |  | ##   appear in the *.opt file are identical to the order in which DDC returns the | 
| 178 |  |  |  |  |  |  | ##   corresponding data in 'text' and 'html' modes.  The actual order used by the | 
| 179 |  |  |  |  |  |  | ##   server should appear in the server logs.  Change the *.opt file you pass to | 
| 180 |  |  |  |  |  |  | ##   this function accordingly. | 
| 181 |  |  |  |  |  |  | sub loadOptFile { | 
| 182 | 0 |  |  | 0 | 0 |  | my ($dc,$src,%opts) = @_; | 
| 183 | 0 |  |  |  |  |  | my ($fh); | 
| 184 |  |  |  |  |  |  |  | 
| 185 |  |  |  |  |  |  | ##-- get source fh | 
| 186 | 0 | 0 |  |  |  |  | if (!ref($src)) { | 
|  |  | 0 |  |  |  |  |  | 
| 187 | 0 | 0 |  |  |  |  | $fh = IO::File->new("<$src") | 
| 188 |  |  |  |  |  |  | or confess(__PACKAGE__ . "::loadOptFile(): open failed for '$src': $!"); | 
| 189 | 0 | 0 |  |  |  |  | binmode($fh,":encoding($dc->{encoding})") if ($dc->{encoding}); | 
| 190 |  |  |  |  |  |  | } | 
| 191 |  |  |  |  |  |  | elsif (ref($src) eq 'SCALAR') { | 
| 192 | 0 |  |  |  |  |  | $fh = IO::Handle->new; | 
| 193 | 0 | 0 |  |  |  |  | open($fh,'<',$src) | 
| 194 |  |  |  |  |  |  | or confess(__PACKAGE__ . "::loadOptFile(): open failed for buffer: $!"); | 
| 195 | 0 | 0 |  |  |  |  | binmode($fh,":encoding($dc->{encoding})") if ($dc->{encoding}); | 
| 196 |  |  |  |  |  |  | } | 
| 197 |  |  |  |  |  |  | else { | 
| 198 | 0 |  |  |  |  |  | $fh = $src; | 
| 199 |  |  |  |  |  |  | } | 
| 200 |  |  |  |  |  |  |  | 
| 201 |  |  |  |  |  |  | ##-- parse file | 
| 202 | 0 |  |  |  |  |  | my $clobber = $opts{clobber}; | 
| 203 | 0 |  |  |  |  |  | my (@indices,@show,@meta,$showMeta); | 
| 204 | 0 |  |  |  |  |  | while (defined($_=<$fh>)) { | 
| 205 | 0 |  |  |  |  |  | chomp; | 
| 206 | 0 | 0 |  |  |  |  | if (/^Indices\s(.*)$/) { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 207 | 0 |  |  |  |  |  | @indices = map {s/^\s*\[//; s/\]\s*$//; [split(' ',$_)]} split(/\;\s*/,$1); | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 208 |  |  |  |  |  |  | } | 
| 209 |  |  |  |  |  |  | elsif (/^Bibl\s+(\S+)\s+(\d)\s+(\S+)\s+(.*)$/) { | 
| 210 | 0 |  |  |  |  |  | my ($type,$visible,$name,$xpath) = ($1,$2,$3,$4); | 
| 211 | 0 | 0 |  |  |  |  | push(@meta,[$type,$visible,$name,$xpath]) if ($visible+0); | 
| 212 |  |  |  |  |  |  | } | 
| 213 |  |  |  |  |  |  | elsif (/^IndicesToShow\s+(.*)$/) { | 
| 214 | 0 |  |  |  |  |  | @show = map {$_-1} split(' ',$1); | 
|  | 0 |  |  |  |  |  |  | 
| 215 |  |  |  |  |  |  | } | 
| 216 |  |  |  |  |  |  | elsif (/^OutputBibliographyOfHits\b/) { | 
| 217 | 0 |  |  |  |  |  | $showMeta = 1; | 
| 218 |  |  |  |  |  |  | } | 
| 219 |  |  |  |  |  |  | elsif (/^InterpDelim[ie]ter\s(.*)$/) { | 
| 220 | 0 | 0 | 0 |  |  |  | $dc->{fieldSeparator} = unescape($1) if ($clobber || !$dc->{fieldSeparator}); | 
| 221 |  |  |  |  |  |  | } | 
| 222 |  |  |  |  |  |  | elsif (/^TokenDelim[ie]ter\s(.*)$/) { | 
| 223 | 0 | 0 | 0 |  |  |  | $dc->{tokenSeparator} = unescape($1) if ($clobber || !$dc->{tokenSeparator}); | 
| 224 |  |  |  |  |  |  | } | 
| 225 |  |  |  |  |  |  | elsif (/^Utf8\s*$/) { | 
| 226 | 0 | 0 | 0 |  |  |  | $dc->{encoding} = 'utf8' if ($clobber || !$dc->{encoding}); | 
| 227 |  |  |  |  |  |  | } | 
| 228 |  |  |  |  |  |  | elsif (/^HtmlHighlighting\s*(.*)$/) { | 
| 229 | 0 | 0 | 0 |  |  |  | $dc->{htmlHighlight} = [map {unescape($1)} split(/\s*\;\s*/,$1,4)] if ($clobber || !$dc->{htmlHighlight}); | 
|  | 0 |  |  |  |  |  |  | 
| 230 |  |  |  |  |  |  | } | 
| 231 |  |  |  |  |  |  | elsif (/^TextHighlighting\s*(.*)$/) { | 
| 232 | 0 | 0 | 0 |  |  |  | $dc->{textHighlight} = [map {unescape($1)} split(/\s*\;\s*/,$1,4)] if ($clobber || !$dc->{textHighlight}); | 
|  | 0 |  |  |  |  |  |  | 
| 233 |  |  |  |  |  |  | } | 
| 234 |  |  |  |  |  |  | elsif (/^TableHighlighting\s*(.*)$/) { | 
| 235 | 0 | 0 | 0 |  |  |  | $dc->{tableHighlight} = [map {unescape($_)} split(/\s*\;\s*/,$1,4)] if ($clobber || !$dc->{tableHighlight}); | 
|  | 0 |  |  |  |  |  |  | 
| 236 |  |  |  |  |  |  | } | 
| 237 |  |  |  |  |  |  | } | 
| 238 |  |  |  |  |  |  |  | 
| 239 |  |  |  |  |  |  | ##-- setup local options | 
| 240 | 0 | 0 |  |  |  |  | @show = (0) if (!@show); | 
| 241 | 0 | 0 | 0 |  |  |  | $dc->{fieldNames} = [map {$_->[1]} @indices[@show]] if ($clobber || !$dc->{fieldNames}); | 
|  | 0 |  |  |  |  |  |  | 
| 242 | 0 | 0 |  |  |  |  | if (!$dc->{metaNames}) { | 
| 243 | 0 | 0 |  |  |  |  | if (!$showMeta) { | 
|  |  | 0 |  |  |  |  |  | 
| 244 | 0 |  |  |  |  |  | $dc->{metaNames} = ['file_']; | 
| 245 |  |  |  |  |  |  | } | 
| 246 |  |  |  |  |  |  | elsif (@meta) { | 
| 247 | 0 | 0 | 0 |  |  |  | $dc->{metaNames} = [map {$_->[2]} @meta] if (@meta && ($clobber || !$dc->{metaNames})); | 
|  | 0 |  | 0 |  |  |  |  | 
| 248 |  |  |  |  |  |  | } | 
| 249 |  |  |  |  |  |  | } | 
| 250 |  |  |  |  |  |  |  | 
| 251 |  |  |  |  |  |  | ##-- cleanup | 
| 252 | 0 | 0 | 0 |  |  |  | $fh->close if (!ref($src) || ref($src) eq 'SCALAR'); | 
| 253 | 0 |  |  |  |  |  | return $dc; | 
| 254 |  |  |  |  |  |  | } | 
| 255 |  |  |  |  |  |  |  | 
| 256 |  |  |  |  |  |  | ##====================================================================== | 
| 257 |  |  |  |  |  |  | ## Query requests (formerly in DDC::Client::Distributed) | 
| 258 |  |  |  |  |  |  |  | 
| 259 |  |  |  |  |  |  | ## $buf = $dc->queryRaw($query_string) | 
| 260 |  |  |  |  |  |  | ## $buf = $dc->queryRaw(\@raw_strings) | 
| 261 |  |  |  |  |  |  | sub queryRaw { | 
| 262 | 0 |  |  | 0 | 1 |  | my $dc = shift; | 
| 263 | 0 |  |  |  |  |  | my $buf = $dc->queryRawNC(@_); | 
| 264 | 0 |  |  |  |  |  | $dc->close(); ##-- this apparently has to happen: bummer | 
| 265 | 0 |  |  |  |  |  | return $buf; | 
| 266 |  |  |  |  |  |  | } | 
| 267 |  |  |  |  |  |  |  | 
| 268 |  |  |  |  |  |  | ## $buf = $dc->queryRawNC($query_string) | 
| 269 |  |  |  |  |  |  | ## $buf = $dc->queryRawNC(\@raw_strings) | 
| 270 |  |  |  |  |  |  | ##  + guts for queryRaw() without implicit close() | 
| 271 |  |  |  |  |  |  | sub queryRawNC { | 
| 272 | 0 |  |  | 0 | 1 |  | my ($dc,$query) = @_; | 
| 273 | 0 | 0 |  |  |  |  | if (UNIVERSAL::isa($query,'ARRAY')) { | 
|  |  | 0 |  |  |  |  |  | 
| 274 |  |  |  |  |  |  | ##-- raw array: send raw data to DDC | 
| 275 | 0 |  |  |  |  |  | $dc->send(join("\001",@$query)); | 
| 276 |  |  |  |  |  |  | } | 
| 277 |  |  |  |  |  |  | elsif ($dc->{mode} =~ /^(?:raw|req)/i) { | 
| 278 |  |  |  |  |  |  | ##-- "raw" or "request" mode: send raw request to DDC | 
| 279 | 0 |  |  |  |  |  | $dc->send($query); | 
| 280 |  |  |  |  |  |  | } | 
| 281 |  |  |  |  |  |  | else { | 
| 282 |  |  |  |  |  |  | ##-- query string: send 'run-query Distributed' | 
| 283 |  |  |  |  |  |  | $dc->send(join("\001", | 
| 284 |  |  |  |  |  |  | "run_query Distributed", | 
| 285 |  |  |  |  |  |  | $query, | 
| 286 |  |  |  |  |  |  | $dc->{mode}, | 
| 287 | 0 | 0 |  |  |  |  | join(' ', @$dc{qw(start limit timeout)}, ($dc->{hint} ? $dc->{hint} : qw())))); | 
| 288 |  |  |  |  |  |  | } | 
| 289 |  |  |  |  |  |  | ##-- get output buffer | 
| 290 | 0 |  |  |  |  |  | return $dc->readData(); | 
| 291 |  |  |  |  |  |  | } | 
| 292 |  |  |  |  |  |  |  | 
| 293 |  |  |  |  |  |  | ## @bufs = $dc->queryMulti($queryString1, $queryString2, ...) | 
| 294 |  |  |  |  |  |  | ## @bufs = $dc->queryMulti(\@queryStrings1, \@queryStrings2, ...) | 
| 295 |  |  |  |  |  |  | sub queryMulti { | 
| 296 | 0 |  |  | 0 | 1 |  | my $dc   = shift; | 
| 297 | 0 |  |  |  |  |  | my @bufs = map {$dc->queryRawNC($_)} @_; | 
|  | 0 |  |  |  |  |  |  | 
| 298 | 0 |  |  |  |  |  | $dc->close(); ##-- this apparently has to happen: bummer | 
| 299 | 0 |  |  |  |  |  | return @bufs; | 
| 300 |  |  |  |  |  |  | } | 
| 301 |  |  |  |  |  |  |  | 
| 302 |  |  |  |  |  |  | ## $obj = $dc->queryJson($query_string) | 
| 303 |  |  |  |  |  |  | ## $obj = $dc->queryJson(\@raw_strings) | 
| 304 |  |  |  |  |  |  | sub queryJson { | 
| 305 | 0 |  |  | 0 | 0 |  | my ($dc,$query) = @_; | 
| 306 | 0 |  |  |  |  |  | return $dc->decodeJson($dc->queryRaw($query)); | 
| 307 |  |  |  |  |  |  | } | 
| 308 |  |  |  |  |  |  |  | 
| 309 |  |  |  |  |  |  | ## $hits = $dc->query($query_string) | 
| 310 |  |  |  |  |  |  | sub query { | 
| 311 | 0 |  |  | 0 | 1 |  | my ($dc,$query) = @_; | 
| 312 | 0 |  |  |  |  |  | return $dc->parseData($dc->queryRaw($query)); | 
| 313 |  |  |  |  |  |  | } | 
| 314 |  |  |  |  |  |  |  | 
| 315 |  |  |  |  |  |  |  | 
| 316 |  |  |  |  |  |  | ##====================================================================== | 
| 317 |  |  |  |  |  |  | ## Common Requests | 
| 318 |  |  |  |  |  |  |  | 
| 319 |  |  |  |  |  |  | ## $rsp = $dc->request($request_string) | 
| 320 |  |  |  |  |  |  | sub request { | 
| 321 | 0 |  |  | 0 | 1 |  | my $dc = shift; | 
| 322 | 0 |  |  |  |  |  | my $buf = $dc->requestNC(@_); | 
| 323 | 0 |  |  |  |  |  | $dc->close(); | 
| 324 | 0 |  |  |  |  |  | return $buf; | 
| 325 |  |  |  |  |  |  | } | 
| 326 |  |  |  |  |  |  |  | 
| 327 |  |  |  |  |  |  | ## $rsp = $dc->requestNC($request_string) | 
| 328 |  |  |  |  |  |  | ##  + guts for request() which doesn't implicitly call close() | 
| 329 |  |  |  |  |  |  | sub requestNC { | 
| 330 | 0 |  |  | 0 | 0 |  | my $dc = shift; | 
| 331 | 0 |  |  |  |  |  | $dc->send($_[0]); | 
| 332 | 0 |  |  |  |  |  | return $dc->readData(); | 
| 333 |  |  |  |  |  |  | } | 
| 334 |  |  |  |  |  |  |  | 
| 335 |  |  |  |  |  |  | ## $data = $dc->requestJson($request_string) | 
| 336 |  |  |  |  |  |  | sub requestJson { | 
| 337 | 0 |  |  | 0 | 1 |  | my $dc  = shift; | 
| 338 | 0 |  |  |  |  |  | return $dc->decodeJson($dc->request($_[0])); | 
| 339 |  |  |  |  |  |  | } | 
| 340 |  |  |  |  |  |  |  | 
| 341 |  |  |  |  |  |  | ## $server_version = $dc->version() | 
| 342 |  |  |  |  |  |  | sub version { | 
| 343 | 0 |  |  | 0 | 1 |  | my $dc = shift; | 
| 344 | 0 |  |  |  |  |  | return $dc->request("version"); | 
| 345 |  |  |  |  |  |  | } | 
| 346 |  |  |  |  |  |  |  | 
| 347 |  |  |  |  |  |  | ## $status = $dc->status() | 
| 348 |  |  |  |  |  |  | ## $status = $dc->status($timeout) | 
| 349 |  |  |  |  |  |  | sub status { | 
| 350 | 0 |  |  | 0 | 1 |  | my ($dc,$timeout) = @_; | 
| 351 | 0 | 0 |  |  |  |  | $timeout = $dc->{timeout} if (!defined($timeout)); | 
| 352 | 0 | 0 |  |  |  |  | return $dc->requestJson("status".(defined($timeout) ? " $timeout" : '')); | 
| 353 |  |  |  |  |  |  | } | 
| 354 |  |  |  |  |  |  |  | 
| 355 |  |  |  |  |  |  | ## $vstatus = $dc->vstatus() | 
| 356 |  |  |  |  |  |  | ## $vstatus = $dc->vstatus($timeout) | 
| 357 |  |  |  |  |  |  | sub vstatus { | 
| 358 | 0 |  |  | 0 | 1 |  | my ($dc,$timeout) = @_; | 
| 359 | 0 | 0 |  |  |  |  | $timeout = $dc->{timeout} if (!defined($timeout)); | 
| 360 | 0 | 0 |  |  |  |  | return $dc->requestJson("vstatus".(defined($timeout) ? " $timeout" : '')); | 
| 361 |  |  |  |  |  |  | } | 
| 362 |  |  |  |  |  |  |  | 
| 363 |  |  |  |  |  |  | ## $info = $dc->info() | 
| 364 |  |  |  |  |  |  | ## $info = $dc->info($timeout) | 
| 365 |  |  |  |  |  |  | sub info { | 
| 366 | 0 |  |  | 0 | 1 |  | my ($dc,$timeout) = @_; | 
| 367 | 0 | 0 |  |  |  |  | $timeout = $dc->{timeout} if (!defined($timeout)); | 
| 368 | 0 | 0 |  |  |  |  | return $dc->requestJson("info".(defined($timeout) ? " $timeout" : '')); | 
| 369 |  |  |  |  |  |  | } | 
| 370 |  |  |  |  |  |  |  | 
| 371 |  |  |  |  |  |  | ## $expandRaw = $dc->expand_terms($pipeline, $term) | 
| 372 |  |  |  |  |  |  | ## $expandRaw = $dc->expand_terms($pipeline, $term, $timeout) | 
| 373 |  |  |  |  |  |  | ## $expandRaw = $dc->expand_terms($pipeline, $term, $timeout, $subcorpus) | 
| 374 |  |  |  |  |  |  | ## $expandRaw = $dc->expand_terms(\@pipeline, \@terms) | 
| 375 |  |  |  |  |  |  | ## $expandRaw = $dc->expand_terms(\@pipeline, \@terms, $timeout) | 
| 376 |  |  |  |  |  |  | ## $expandRaw = $dc->expand_terms(\@pipeline, \@terms, $timeout, $subcorpus) | 
| 377 |  |  |  |  |  |  | sub expand_terms { | 
| 378 | 0 |  |  | 0 | 1 |  | my ($dc,$chain,$terms,$timeout,$subcorpus) = @_; | 
| 379 | 0 | 0 |  |  |  |  | $chain = join('|', @$chain)  if (UNIVERSAL::isa($chain,'ARRAY')); | 
| 380 | 0 | 0 |  |  |  |  | $terms = join("\t", @$terms) if (UNIVERSAL::isa($terms,'ARRAY')); | 
| 381 |  |  |  |  |  |  |  | 
| 382 |  |  |  |  |  |  | ##-- hack: detect swapping of $timeout and $subcorpus (old DDC::Client::Distributed-style) | 
| 383 | 0 | 0 |  |  |  |  | $timeout   = '' if (!defined($timeout)); | 
| 384 | 0 | 0 |  |  |  |  | $subcorpus = '' if (!defined($subcorpus)); | 
| 385 | 0 | 0 | 0 |  |  |  | ($timeout,$subcorpus) = ($subcorpus,$timeout) | 
|  |  |  | 0 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
| 386 |  |  |  |  |  |  | if ($timeout ne '' && $subcorpus ne '' && $timeout =~ /[0-9]/ && $subcorpus !~ /[0-9]/); | 
| 387 |  |  |  |  |  |  |  | 
| 388 | 0 | 0 |  |  |  |  | $timeout   = $dc->{timeout} if ($timeout eq ''); | 
| 389 | 0 | 0 | 0 |  |  |  | $timeout   = 5 if (!defined($timeout) || $timeout eq ''); | 
| 390 | 0 |  |  |  |  |  | $dc->send(join("\x01", 'expand_terms ', $chain, $terms, $timeout, $subcorpus)); | 
| 391 |  |  |  |  |  |  | ##-- get output buffer | 
| 392 | 0 |  |  |  |  |  | my $buf = $dc->readData(); | 
| 393 | 0 |  |  |  |  |  | $dc->close(); ##-- this apparently has to happen: bummer | 
| 394 | 0 |  |  |  |  |  | return $buf; | 
| 395 |  |  |  |  |  |  | } | 
| 396 |  |  |  |  |  |  |  | 
| 397 |  |  |  |  |  |  | ## \@terms = $dc->expand($pipeline, $term) | 
| 398 |  |  |  |  |  |  | ## \@terms = $dc->expand($pipeline, $term, $timeout) | 
| 399 |  |  |  |  |  |  | ## \@terms = $dc->expand($pipeline, $term, $timeout, $subcorpus) | 
| 400 |  |  |  |  |  |  | ## \@terms = $dc->expand(\@pipeline, \@terms) | 
| 401 |  |  |  |  |  |  | ## \@terms = $dc->expand(\@pipeline, \@terms, $timeout) | 
| 402 |  |  |  |  |  |  | ## \@terms = $dc->expand(\@pipeline, \@terms, $timeout, $subcorpus) | 
| 403 |  |  |  |  |  |  | sub expand { | 
| 404 | 0 |  |  | 0 | 1 |  | my $dc = shift; | 
| 405 | 0 |  |  |  |  |  | return $dc->parseExpandTermsResponse($dc->expand_terms(@_)); | 
| 406 |  |  |  |  |  |  | } | 
| 407 |  |  |  |  |  |  |  | 
| 408 |  |  |  |  |  |  | ## $buf = $dc->get_first_hits($query) | 
| 409 |  |  |  |  |  |  | ## $buf = $dc->get_first_hits($query,$timeout?,$limit?,$hint?) | 
| 410 |  |  |  |  |  |  | sub get_first_hits { | 
| 411 | 0 |  |  | 0 | 1 |  | my $dc = shift; | 
| 412 | 0 |  |  |  |  |  | my $query = shift; | 
| 413 | 0 | 0 |  |  |  |  | my $timeout = @_ ? shift : $dc->{timeout}; | 
| 414 | 0 | 0 |  |  |  |  | my $limit   = @_ ? shift : $dc->{limit}; | 
| 415 | 0 | 0 |  |  |  |  | my $hint    = @_ ? shift : $dc->{hint}; | 
| 416 | 0 | 0 |  |  |  |  | return $dc->request("get_first_hits $query\x{01}$timeout $limit".($hint ? " $hint" : '')); | 
| 417 |  |  |  |  |  |  | } | 
| 418 |  |  |  |  |  |  |  | 
| 419 |  |  |  |  |  |  | ## $buf = $dc->get_hit_strings($format?,$start?,$limit?) | 
| 420 |  |  |  |  |  |  | sub get_hit_strings { | 
| 421 | 0 |  |  | 0 | 1 |  | my $dc = shift; | 
| 422 | 0 | 0 |  |  |  |  | my $format  = @_ ? shift : ($dc->{mode} eq 'raw' ? 'json' : ''); | 
|  |  | 0 |  |  |  |  |  | 
| 423 | 0 | 0 |  |  |  |  | my $start   = @_ ? shift : $dc->{start}; | 
| 424 | 0 | 0 |  |  |  |  | my $limit   = @_ ? shift : $dc->{limit}; | 
| 425 | 0 |  |  |  |  |  | return $dc->request("get_hit_strings $format\x{01}$start $limit"); | 
| 426 |  |  |  |  |  |  | } | 
| 427 |  |  |  |  |  |  |  | 
| 428 |  |  |  |  |  |  |  | 
| 429 |  |  |  |  |  |  | ## $buf = $dc->run_query($corpus,$query,$format?,$start?,$limit?,$timeout?,$hint?) | 
| 430 |  |  |  |  |  |  | sub run_query { | 
| 431 | 0 |  |  | 0 | 1 |  | my $dc = shift; | 
| 432 | 0 |  |  |  |  |  | my $corpus = shift; | 
| 433 | 0 |  |  |  |  |  | my $query  = shift; | 
| 434 | 0 | 0 |  |  |  |  | my $format = @_ ? shift : $dc->{mode}; | 
| 435 | 0 | 0 |  |  |  |  | my $start  = @_ ? shift : $dc->{start}; | 
| 436 | 0 | 0 |  |  |  |  | my $limit  = @_ ? shift : $dc->{limit}; | 
| 437 | 0 | 0 |  |  |  |  | my $timeout = @_ ? shift : $dc->{timeout}; | 
| 438 | 0 | 0 |  |  |  |  | my $hint    = @_ ? shift : $dc->{hint}; | 
| 439 | 0 | 0 |  |  |  |  | $corpus = 'Distributed' if (!defined($corpus)); | 
| 440 | 0 | 0 |  |  |  |  | return $dc->request("run_query $corpus\x{01}$query\x{01}$format\x{01}$start $limit $timeout".($hint ? " $hint" : '')); | 
| 441 |  |  |  |  |  |  | } | 
| 442 |  |  |  |  |  |  |  | 
| 443 |  |  |  |  |  |  | ##====================================================================== | 
| 444 |  |  |  |  |  |  | ## Low-level communications | 
| 445 |  |  |  |  |  |  |  | 
| 446 |  |  |  |  |  |  | ## \%connect = $dc->parseAddr() | 
| 447 |  |  |  |  |  |  | ## \%connect = $CLASS_OR_OBJECT->parseAddr(\%connect, $PEER_OR_LOCAL='peer', %options) | 
| 448 |  |  |  |  |  |  | ## \%connect = $CLASS_OR_OBJECT->parserAddr({url=>$url}, $PEER_OR_LOCAL='peer', %options) | 
| 449 |  |  |  |  |  |  | ##  + parses connect URLs to option-hashes suitable for use as $dc->{connect} | 
| 450 |  |  |  |  |  |  | ##  + supported URLs formats: | 
| 451 |  |  |  |  |  |  | ##     inet://ADDR:PORT?OPT=VAL...	# canonical INET socket URL | 
| 452 |  |  |  |  |  |  | ##     unix://UNIX_PATH?OPT=VAL...	# canonical UNIX socket URL | 
| 453 |  |  |  |  |  |  | ##     unix:UNIX_PATH?OPT=VAL...	# = unix://UNIX_PATH?OPT=val | 
| 454 |  |  |  |  |  |  | ##     ADDR?OPT=VAL...			# = inet://ADDR:5000?OPT=VAL... | 
| 455 |  |  |  |  |  |  | ##     :PORT?OPT=VAL...			# = inet://localhost:PORT?OPT=VAL... | 
| 456 |  |  |  |  |  |  | ##     ADDR:PORT?OPT=VAL...		# = inet://ADDR:PORT?OPT=VAL... | 
| 457 |  |  |  |  |  |  | ##     /UNIX_PATH?OPT=VAL...		# = unix:///UNIX_PATH?POT=VAL... | 
| 458 |  |  |  |  |  |  | sub parseAddr { | 
| 459 | 0 |  |  | 0 | 1 |  | my ($that,$connect,$prefix,%opts) = @_; | 
| 460 | 0 |  |  |  |  |  | my ($override); | 
| 461 | 0 | 0 | 0 |  |  |  | if (!$connect && ref($that)) { | 
| 462 | 0 |  |  |  |  |  | $connect  = $that->{connect}; | 
| 463 | 0 |  |  |  |  |  | $override = 1; | 
| 464 |  |  |  |  |  |  | } | 
| 465 | 0 |  | 0 |  |  |  | $connect //= 'inet://localhost:50000'; | 
| 466 | 0 | 0 |  |  |  |  | $connect   = {url=>$connect} if (!UNIVERSAL::isa($connect,'HASH')); | 
| 467 |  |  |  |  |  |  |  | 
| 468 | 0 |  | 0 |  |  |  | $prefix ||= 'Peer'; | 
| 469 | 0 |  |  |  |  |  | $prefix   = ucfirst($prefix); | 
| 470 | 0 |  | 0 |  |  |  | my $url = $connect->{URL} || $connect->{Url} || $connect->{url}; | 
| 471 | 0 | 0 |  |  |  |  | if (defined($url)) { | 
| 472 | 0 |  |  |  |  |  | my ($base,$opts) = split(/\?/,$url,2); | 
| 473 | 0 | 0 |  |  |  |  | my $scheme = ($base =~ s{^([\w\+\-]+):(?://)?}{} ? $1 : ''); | 
| 474 | 0 | 0 | 0 |  |  |  | if (lc($scheme) eq 'unix' || (!$scheme && $base =~ m{^/})) { | 
|  |  | 0 | 0 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
| 475 | 0 |  |  |  |  |  | $connect->{Domain} = 'UNIX'; | 
| 476 | 0 |  |  |  |  |  | $connect->{$prefix} = $base; | 
| 477 |  |  |  |  |  |  | } | 
| 478 | 0 |  |  |  |  |  | elsif (!$scheme || grep {$_ eq lc($scheme)} qw(inet tcp)) { | 
| 479 | 0 |  |  |  |  |  | $connect->{Domain} = 'INET'; | 
| 480 | 0 |  |  |  |  |  | my ($host,$port) = split(':',$base,2); | 
| 481 | 0 |  | 0 |  |  |  | $host ||= 'localhost'; | 
| 482 | 0 |  | 0 |  |  |  | $port ||= 50000; | 
| 483 | 0 |  |  |  |  |  | @$connect{"${prefix}Addr","${prefix}Port"} = ($host,$port); | 
| 484 |  |  |  |  |  |  | } | 
| 485 |  |  |  |  |  |  | else { | 
| 486 | 0 |  |  |  |  |  | die(__PACKAGE__, "::parseAddr(): unsupported scheme '$scheme' for URL $url"); | 
| 487 |  |  |  |  |  |  | } | 
| 488 | 0 |  | 0 |  |  |  | my %urlopts = map {split(/=/,$_,2)} grep {$_} split(/[\&\;]/,($opts//'')); | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 489 | 0 |  |  |  |  |  | @$connect{keys %urlopts} = values %urlopts; | 
| 490 |  |  |  |  |  |  | } | 
| 491 | 0 |  |  |  |  |  | @$connect{keys %opts} = values %opts; | 
| 492 |  |  |  |  |  |  |  | 
| 493 | 0 | 0 |  |  |  |  | $that->{connect} = $connect if ($override); | 
| 494 | 0 |  |  |  |  |  | return $connect; | 
| 495 |  |  |  |  |  |  | } | 
| 496 |  |  |  |  |  |  |  | 
| 497 |  |  |  |  |  |  | ## $str = $dc->addrStr() | 
| 498 |  |  |  |  |  |  | ## $str = $CLASS_OR_OBJECT->addrStr(\%connect,$PEER_OR_LOCAL) | 
| 499 |  |  |  |  |  |  | ## $str = $CLASS_OR_OBJECT->addrStr($url,$PEER_OR_LOCAL) | 
| 500 |  |  |  |  |  |  | ## $str = $CLASS_OR_OBJECT->addrStr($sock,$PEER_OR_LOCAL) | 
| 501 |  |  |  |  |  |  | sub addrStr { | 
| 502 | 0 |  |  | 0 | 1 |  | my ($that,$addr,$prefix) = @_; | 
| 503 | 0 | 0 | 0 |  |  |  | $addr   = ($that->{sock} || $that->{connect}) if (ref($that) && !defined($addr)); | 
|  |  |  | 0 |  |  |  |  | 
| 504 | 0 |  | 0 |  |  |  | $prefix ||= 'Peer'; | 
| 505 | 0 |  |  |  |  |  | $prefix   = ucfirst($prefix); | 
| 506 |  |  |  |  |  |  |  | 
| 507 | 0 | 0 |  |  |  |  | if (UNIVERSAL::isa($addr,'IO::Socket::UNIX')) { | 
|  |  | 0 |  |  |  |  |  | 
| 508 | 0 |  |  |  |  |  | return "unix://$addr->{$prefix}"; | 
| 509 |  |  |  |  |  |  | } | 
| 510 |  |  |  |  |  |  | elsif (UNIVERSAL::isa($addr,'IO::Socket::INET')) { | 
| 511 | 0 | 0 |  |  |  |  | my $mprefix = (lc($prefix) eq 'peer' ? 'peer' : 'sock'); | 
| 512 | 0 |  |  |  |  |  | return "inet://".$addr->can($mprefix."host")->($addr).":".$addr->can($mprefix."port")->($addr); | 
| 513 |  |  |  |  |  |  | } | 
| 514 | 0 | 0 |  |  |  |  | $addr = $addr->{connect} if (UNIVERSAL::isa($addr,'DDC::Client')); | 
| 515 | 0 | 0 |  |  |  |  | $addr = $that->parseAddr($addr,$prefix) if (!ref($addr)); | 
| 516 | 0 |  |  |  |  |  | my ($url); | 
| 517 |  |  |  |  |  |  | #my %uopts = %$addr; | 
| 518 | 0 | 0 |  |  |  |  | if ($addr->{Domain} eq 'UNIX') { | 
| 519 | 0 |  |  |  |  |  | $url = "unix://$addr->{$prefix}"; | 
| 520 |  |  |  |  |  |  | #delete $uopts{$prefix}; | 
| 521 |  |  |  |  |  |  | } | 
| 522 |  |  |  |  |  |  | else { | 
| 523 |  |  |  |  |  |  | $url = "inet://".($addr->{"${prefix}Addr"} && $addr->{"${prefix}Port"} | 
| 524 |  |  |  |  |  |  | ? ($addr->{"${prefix}Addr"}.":".$addr->{"${prefix}Port"}) | 
| 525 | 0 | 0 | 0 |  |  |  | : $addr->{"${prefix}Addr"}); | 
| 526 |  |  |  |  |  |  | #delete @uopts{"${prefix}Addr","${prefix}Port"}; | 
| 527 |  |  |  |  |  |  | } | 
| 528 |  |  |  |  |  |  | #delete $opts{Domain}; | 
| 529 |  |  |  |  |  |  | #if (%uopts) { | 
| 530 |  |  |  |  |  |  | #  $url .= '?'.join('&',map {("$_=$uopts{$_}")} sort keys %uopts); | 
| 531 |  |  |  |  |  |  | #} | 
| 532 | 0 |  |  |  |  |  | return $url; | 
| 533 |  |  |  |  |  |  | } | 
| 534 |  |  |  |  |  |  |  | 
| 535 |  |  |  |  |  |  | ## $io_socket = $dc->open() | 
| 536 |  |  |  |  |  |  | sub open { | 
| 537 | 0 |  |  | 0 | 1 |  | my $dc = shift; | 
| 538 | 0 |  |  |  |  |  | $dc->parseAddr(); | 
| 539 | 0 |  | 0 |  |  |  | my $domain = $dc->{connect}{Domain} // 'INET'; | 
| 540 | 0 | 0 |  |  |  |  | if (lc($domain) eq 'unix') { | 
| 541 |  |  |  |  |  |  | ##-- v0.43: use unix-domain socket connection | 
| 542 | 0 |  |  |  |  |  | $dc->{sock} = IO::Socket::UNIX->new(%{$dc->{'connect'}}); | 
|  | 0 |  |  |  |  |  |  | 
| 543 |  |  |  |  |  |  | } else { | 
| 544 |  |  |  |  |  |  | ##-- compatibility hack: use INET-domain sockets (TCP) | 
| 545 | 0 |  |  |  |  |  | $dc->{sock} = IO::Socket::INET->new(%{$dc->{'connect'}}); | 
|  | 0 |  |  |  |  |  |  | 
| 546 |  |  |  |  |  |  | } | 
| 547 | 0 | 0 |  |  |  |  | return undef if (!$dc->{sock}); | 
| 548 | 0 | 0 |  |  |  |  | $dc->{sock}->setsockopt(SOL_SOCKET, SO_LINGER, pack('II',@{$dc->{linger}})) if ($dc->{linger}); | 
|  | 0 |  |  |  |  |  |  | 
| 549 | 0 |  |  |  |  |  | $dc->{sock}->autoflush(1); | 
| 550 | 0 |  |  |  |  |  | return $dc->{sock}; | 
| 551 |  |  |  |  |  |  | } | 
| 552 |  |  |  |  |  |  |  | 
| 553 |  |  |  |  |  |  | ## undef = $dc->close() | 
| 554 |  |  |  |  |  |  | sub close { | 
| 555 | 0 |  |  | 0 | 1 |  | my $dc = shift; | 
| 556 | 0 | 0 |  |  |  |  | $dc->{sock}->close() if (defined($dc->{sock})); | 
| 557 | 0 |  |  |  |  |  | delete($dc->{sock}); | 
| 558 |  |  |  |  |  |  | } | 
| 559 |  |  |  |  |  |  |  | 
| 560 |  |  |  |  |  |  | ## $encoded = $dc->ddc_encode(@message_strings) | 
| 561 |  |  |  |  |  |  | sub ddc_encode { | 
| 562 | 0 |  |  | 0 | 0 |  | my $dc = shift; | 
| 563 | 0 |  |  |  |  |  | my $msg = join('',@_); | 
| 564 | 0 | 0 | 0 |  |  |  | $msg = encode($dc->{encoding},$msg) if ($dc->{encoding} && utf8::is_utf8($msg)); | 
| 565 | 0 |  |  |  |  |  | return pack($ifmt,length($msg)) . $msg; | 
| 566 |  |  |  |  |  |  | } | 
| 567 |  |  |  |  |  |  |  | 
| 568 |  |  |  |  |  |  | ## $decoded = $dc->ddc_decode($response_buf) | 
| 569 |  |  |  |  |  |  | sub ddc_decode { | 
| 570 | 0 |  |  | 0 | 0 |  | my $dc  = shift; | 
| 571 | 0 |  |  |  |  |  | my $buf = unpack("$ifmt/a*",$_[0]); | 
| 572 | 0 | 0 |  |  |  |  | $buf = decode($dc->{encoding},$buf) if ($dc->{encoding}); | 
| 573 | 0 |  |  |  |  |  | return $buf; | 
| 574 |  |  |  |  |  |  | } | 
| 575 |  |  |  |  |  |  |  | 
| 576 |  |  |  |  |  |  | ## undef = $dc->send(@message_strings) | 
| 577 |  |  |  |  |  |  | ##  + sends @message_strings | 
| 578 |  |  |  |  |  |  | sub send { | 
| 579 | 0 |  |  | 0 | 1 |  | my $dc = shift; | 
| 580 | 0 | 0 |  |  |  |  | $dc->open() if (!defined($dc->{sock})); | 
| 581 | 0 |  |  |  |  |  | return $dc->sendfh($dc->{sock}, @_); | 
| 582 |  |  |  |  |  |  | } | 
| 583 |  |  |  |  |  |  |  | 
| 584 |  |  |  |  |  |  | ## undef = $dc->sendfh($fh,@message_strings) | 
| 585 |  |  |  |  |  |  | ##  + sends @message_strings to $fh, prepending total length | 
| 586 |  |  |  |  |  |  | sub sendfh { | 
| 587 | 0 |  |  | 0 | 1 |  | my ($dc,$fh) = (shift,shift); | 
| 588 | 0 |  |  |  |  |  | $fh->print( $dc->ddc_encode(@_) ); | 
| 589 |  |  |  |  |  |  | } | 
| 590 |  |  |  |  |  |  |  | 
| 591 |  |  |  |  |  |  | ## $size = $dc->readSize() | 
| 592 |  |  |  |  |  |  | ## $size = $dc->readSize($fh) | 
| 593 |  |  |  |  |  |  | sub readSize { | 
| 594 | 0 |  |  | 0 | 1 |  | my ($dc,$fh) = @_; | 
| 595 | 0 |  |  |  |  |  | my ($size_packed); | 
| 596 | 0 | 0 |  |  |  |  | $fh = $dc->{sock} if (!$fh); | 
| 597 | 0 | 0 | 0 |  |  |  | confess(ref($dc), "::readSize(): could not read size from socket: $!") | 
| 598 |  |  |  |  |  |  | if (($fh->read($size_packed,$ilen)||0) != $ilen); | 
| 599 | 0 | 0 |  |  |  |  | return 0 if (!defined($size_packed)); | 
| 600 | 0 |  |  |  |  |  | return unpack($ifmt,$size_packed); | 
| 601 |  |  |  |  |  |  | } | 
| 602 |  |  |  |  |  |  |  | 
| 603 |  |  |  |  |  |  | ## $data = $dc->readBytes($nbytes) | 
| 604 |  |  |  |  |  |  | ## $data = $dc->readBytes($nbytes,$fh) | 
| 605 |  |  |  |  |  |  | sub readBytes { | 
| 606 | 0 |  |  | 0 | 1 |  | my ($dc,$nbytes,$fh) = @_; | 
| 607 | 0 |  |  |  |  |  | my ($buf); | 
| 608 | 0 | 0 |  |  |  |  | $fh = $dc->{sock} if (!$fh); | 
| 609 | 0 |  |  |  |  |  | my $nread = $fh->read($buf,$nbytes); | 
| 610 | 0 | 0 |  |  |  |  | confess(ref($dc), "::readBytes(): failed to read $nbytes bytes of data (only found $nread): $!") | 
| 611 |  |  |  |  |  |  | if ($nread != $nbytes); | 
| 612 | 0 |  |  |  |  |  | return $buf; | 
| 613 |  |  |  |  |  |  | } | 
| 614 |  |  |  |  |  |  |  | 
| 615 |  |  |  |  |  |  | ## $data = $dc->readData() | 
| 616 |  |  |  |  |  |  | ## $data = $dc->readData($fh) | 
| 617 | 0 |  |  | 0 | 1 |  | sub readData { return $_[0]->readBytes($_[0]->readSize($_[1]),$_[1]); } | 
| 618 |  |  |  |  |  |  |  | 
| 619 |  |  |  |  |  |  | ##====================================================================== | 
| 620 |  |  |  |  |  |  | ## Hit Parsing | 
| 621 |  |  |  |  |  |  |  | 
| 622 |  |  |  |  |  |  | ## $hitList = $dc->parseData($buf) | 
| 623 |  |  |  |  |  |  | sub parseData { | 
| 624 | 0 | 0 |  | 0 | 0 |  | return $_[0]->parseJsonData($_[1])  if ($_[0]{mode} eq 'json'); | 
| 625 | 0 | 0 |  |  |  |  | return $_[0]->parseTableData($_[1]) if ($_[0]{mode} eq 'table'); | 
| 626 | 0 | 0 |  |  |  |  | return $_[0]->parseTextData($_[1])  if ($_[0]{mode} eq 'text'); | 
| 627 | 0 | 0 |  |  |  |  | return $_[0]->parseHtmlData($_[1])  if ($_[0]{mode} eq 'html'); | 
| 628 | 0 |  |  |  |  |  | confess(__PACKAGE__ . "::parseData(): unknown query mode '$_[0]{mode}'"); | 
| 629 |  |  |  |  |  |  | } | 
| 630 |  |  |  |  |  |  |  | 
| 631 |  |  |  |  |  |  | ##-------------------------------------------------------------- | 
| 632 |  |  |  |  |  |  | ## Hit Parsing: Text | 
| 633 |  |  |  |  |  |  |  | 
| 634 |  |  |  |  |  |  | ## $hitList = $dc->parseTextData($buf) | 
| 635 |  |  |  |  |  |  | ##  + returns a DDC::HitList | 
| 636 |  |  |  |  |  |  | sub parseTextData { | 
| 637 | 0 |  |  | 0 | 1 |  | my ($dc,$buf) = @_; | 
| 638 | 0 |  |  |  |  |  | my $hits = DDC::HitList->new(start=>$dc->{start},limit=>$dc->{limit}); | 
| 639 |  |  |  |  |  |  |  | 
| 640 |  |  |  |  |  |  | ##-- parse response macro structure | 
| 641 | 0 | 0 | 0 |  |  |  | $buf = decode($dc->{encoding},$buf) if ($dc->{encoding} && !utf8::is_utf8($buf)); | 
| 642 | 0 |  |  |  |  |  | my ($buflines,$bufinfo) = split("\001", $buf, 2); | 
| 643 |  |  |  |  |  |  |  | 
| 644 |  |  |  |  |  |  | ##-- parse administrative data from response footer | 
| 645 | 0 |  |  |  |  |  | chomp($bufinfo); | 
| 646 | 0 |  |  |  |  |  | @$hits{qw(istatus_ nstatus_ end_ nhits_ ndocs_ error_)} = split(' ', $bufinfo,6); | 
| 647 |  |  |  |  |  |  |  | 
| 648 |  |  |  |  |  |  | ##-- successful response: parse hit data | 
| 649 | 0 |  |  |  |  |  | my @buflines = split(/\n/,$buflines); | 
| 650 | 0 |  | 0 |  |  |  | my $metaNames = $dc->{metaNames} || []; | 
| 651 | 0 |  |  |  |  |  | my ($bufline,$hit,@fields,$ctxbuf); | 
| 652 | 0 |  |  |  |  |  | foreach $bufline (@buflines) { | 
| 653 | 0 | 0 |  |  |  |  | if ($bufline =~ /^Corpora Distribution\:(.*)$/) { | 
|  |  | 0 |  |  |  |  |  | 
| 654 | 0 |  |  |  |  |  | $hits->{dhits_} = $1; | 
| 655 | 0 |  |  |  |  |  | next; | 
| 656 |  |  |  |  |  |  | } elsif ($bufline =~ /^Relevant Documents Distribution:(.*)$/) { | 
| 657 | 0 |  |  |  |  |  | $hits->{ddocs_} = $1; | 
| 658 | 0 |  |  |  |  |  | next; | 
| 659 |  |  |  |  |  |  | } | 
| 660 | 0 |  |  |  |  |  | push(@{$hits->{hits_}},$hit=DDC::Hit->new); | 
|  | 0 |  |  |  |  |  |  | 
| 661 | 0 | 0 |  |  |  |  | $hit->{raw_} = $bufline if ($dc->{keepRaw}); | 
| 662 |  |  |  |  |  |  |  | 
| 663 | 0 | 0 | 0 |  |  |  | if ($dc->{parseMeta} || $dc->{parseContext}) { | 
| 664 | 0 |  |  |  |  |  | @fields = split(/ ### /, $bufline); | 
| 665 | 0 |  |  |  |  |  | $ctxbuf = pop(@fields); | 
| 666 |  |  |  |  |  |  |  | 
| 667 |  |  |  |  |  |  | ##-- parse: metadata | 
| 668 | 0 | 0 |  |  |  |  | if ($dc->{parseMeta}) { | 
| 669 | 0 |  |  |  |  |  | $hit->{meta_}{file_} = shift(@fields); | 
| 670 | 0 |  |  |  |  |  | $hit->{meta_}{page_} = shift(@fields); | 
| 671 | 0 |  |  |  |  |  | $hit->{meta_}{indices_} = [split(' ', pop(@fields))]; | 
| 672 | 0 |  | 0 |  |  |  | $hit->{meta_}{$metaNames->[$_]||"${_}_"} = $fields[$_] foreach (0..$#fields); | 
| 673 |  |  |  |  |  |  | } | 
| 674 |  |  |  |  |  |  |  | 
| 675 |  |  |  |  |  |  | ##-- parse: context | 
| 676 | 0 | 0 |  |  |  |  | $hit->{ctx_} = $dc->parseTextContext($ctxbuf) if ($dc->{parseContext}); | 
| 677 |  |  |  |  |  |  | } | 
| 678 |  |  |  |  |  |  | } | 
| 679 |  |  |  |  |  |  |  | 
| 680 | 0 | 0 |  |  |  |  | $hits->expandFields($dc->{fieldNames}) if ($dc->{expandFields}); | 
| 681 | 0 |  |  |  |  |  | return $hits; | 
| 682 |  |  |  |  |  |  | } | 
| 683 |  |  |  |  |  |  |  | 
| 684 |  |  |  |  |  |  |  | 
| 685 |  |  |  |  |  |  | ## \@context_data = $dc->parseTextContext($context_buf) | 
| 686 |  |  |  |  |  |  | sub parseTextContext { | 
| 687 | 0 |  |  | 0 | 0 |  | my ($dc,$ctx) = @_; | 
| 688 |  |  |  |  |  |  |  | 
| 689 |  |  |  |  |  |  | ##-- defaults | 
| 690 | 0 |  |  |  |  |  | my $fieldNames = $dc->{fieldNames}; | 
| 691 | 0 |  |  |  |  |  | my $fs         = qr(\Q$dc->{fieldSeparator}\E); | 
| 692 | 0 |  |  |  |  |  | my $ts         = qr(\Q$dc->{tokenSeparator}\E\ *); | 
| 693 | 0 |  |  |  |  |  | my $hl         = $dc->{textHighlight}; | 
| 694 | 0 |  |  |  |  |  | my $hls        = qr(\Q$dc->{tokenSeparator}\E\ *\Q$hl->[0]\E); | 
| 695 | 0 |  |  |  |  |  | my $hlw0       = qr(^(?:(?:\Q$hl->[0]\E)|(?:\Q$hl->[2]\E))); | 
| 696 | 0 |  |  |  |  |  | my $hlw1       = qr((?:(?:\Q$hl->[1]\E)|(?:\Q$hl->[3]\E))$); | 
| 697 |  |  |  |  |  |  |  | 
| 698 |  |  |  |  |  |  | ##-- split into sentences | 
| 699 | 0 |  |  |  |  |  | $ctx =~ s/^\s*//; | 
| 700 | 0 |  |  |  |  |  | my ($sbuf,@s,$w); | 
| 701 | 0 |  |  |  |  |  | my $sents = [[],[],[]]; | 
| 702 | 0 |  |  |  |  |  | foreach $sbuf (split(/ {4}/,$ctx)) { | 
| 703 |  |  |  |  |  |  |  | 
| 704 | 0 | 0 |  |  |  |  | if ($sbuf =~ $hls) { | 
| 705 |  |  |  |  |  |  | ##-- target sentence with index dump: parse it | 
| 706 | 0 |  |  |  |  |  | $sbuf =~ s/^$ts//; | 
| 707 | 0 |  |  |  |  |  | @s    = map {[0,split($fs,$_)]} split($ts,$sbuf); | 
|  | 0 |  |  |  |  |  |  | 
| 708 |  |  |  |  |  |  |  | 
| 709 |  |  |  |  |  |  | ##-- parse words | 
| 710 | 0 |  |  |  |  |  | foreach $w (@s) { | 
| 711 | 0 | 0 | 0 |  |  |  | if ($w->[1] =~ $hlw0 && $w->[$#$w] =~ $hlw1) { | 
| 712 |  |  |  |  |  |  | ##-- matched token | 
| 713 | 0 |  |  |  |  |  | $w->[1]    =~ s/$hlw0//; | 
| 714 | 0 |  |  |  |  |  | $w->[$#$w] =~ s/$hlw1//; | 
| 715 | 0 |  |  |  |  |  | $w->[0]    = 1; | 
| 716 |  |  |  |  |  |  | } | 
| 717 |  |  |  |  |  |  | } | 
| 718 | 0 |  |  |  |  |  | push(@{$sents->[1]},@s); | 
|  | 0 |  |  |  |  |  |  | 
| 719 |  |  |  |  |  |  | } | 
| 720 |  |  |  |  |  |  | else { | 
| 721 |  |  |  |  |  |  | ##-- context sentence: surface strings only | 
| 722 | 0 |  |  |  |  |  | $sbuf =~ s/^$ts//; | 
| 723 | 0 |  |  |  |  |  | @s = split($ts,$sbuf); | 
| 724 | 0 | 0 |  |  |  |  | if (!@{$sents->[1]}) { | 
|  | 0 |  |  |  |  |  |  | 
| 725 |  |  |  |  |  |  | ##-- left context | 
| 726 | 0 |  |  |  |  |  | push(@{$sents->[0]}, @s); | 
|  | 0 |  |  |  |  |  |  | 
| 727 |  |  |  |  |  |  | } else { | 
| 728 |  |  |  |  |  |  | ##-- right context | 
| 729 | 0 |  |  |  |  |  | push(@{$sents->[2]}, @s); | 
|  | 0 |  |  |  |  |  |  | 
| 730 |  |  |  |  |  |  | } | 
| 731 |  |  |  |  |  |  | } | 
| 732 |  |  |  |  |  |  | } | 
| 733 |  |  |  |  |  |  |  | 
| 734 | 0 |  |  |  |  |  | return $sents; | 
| 735 |  |  |  |  |  |  | } | 
| 736 |  |  |  |  |  |  |  | 
| 737 |  |  |  |  |  |  | ##-------------------------------------------------------------- | 
| 738 |  |  |  |  |  |  | ## Hit Parsing: Table | 
| 739 |  |  |  |  |  |  |  | 
| 740 |  |  |  |  |  |  | ## $hitList = $dc->parseTableData($buf) | 
| 741 |  |  |  |  |  |  | ##  + returns a DDC::HitList | 
| 742 |  |  |  |  |  |  | sub parseTableData { | 
| 743 | 0 |  |  | 0 | 1 |  | my ($dc,$buf) = @_; | 
| 744 | 0 |  |  |  |  |  | my $hits = DDC::HitList->new(start=>$dc->{start},limit=>$dc->{limit}); | 
| 745 |  |  |  |  |  |  |  | 
| 746 |  |  |  |  |  |  | ##-- parse response macro structure | 
| 747 | 0 | 0 | 0 |  |  |  | $buf = decode($dc->{encoding},$buf) if ($dc->{encoding} && !utf8::is_utf8($buf)); | 
| 748 | 0 |  |  |  |  |  | my ($buflines,$bufinfo) = split("\001", $buf, 2); | 
| 749 |  |  |  |  |  |  |  | 
| 750 |  |  |  |  |  |  | ##-- parse administrative data from response footer | 
| 751 | 0 |  |  |  |  |  | chomp($bufinfo); | 
| 752 | 0 |  |  |  |  |  | @$hits{qw(istatus_ nstatus_ end_ nhits_ ndocs_ error_)} = split(' ', $bufinfo,6); | 
| 753 |  |  |  |  |  |  |  | 
| 754 |  |  |  |  |  |  | ##-- successful response: parse hit data | 
| 755 | 0 |  |  |  |  |  | my @buflines = split(/\n/,$buflines); | 
| 756 | 0 |  |  |  |  |  | my ($bufline,$hit,@fields,$field,$val); | 
| 757 | 0 |  |  |  |  |  | foreach $bufline (@buflines) { | 
| 758 | 0 |  |  |  |  |  | push(@{$hits->{hits_}},$hit=DDC::Hit->new); | 
|  | 0 |  |  |  |  |  |  | 
| 759 | 0 | 0 |  |  |  |  | $hit->{raw_} = $bufline if ($dc->{keepRaw}); | 
| 760 |  |  |  |  |  |  |  | 
| 761 | 0 | 0 | 0 |  |  |  | if ($dc->{parseMeta} || $dc->{parseContext}) { | 
| 762 | 0 |  |  |  |  |  | @fields = split("\002", $bufline); | 
| 763 | 0 |  |  |  |  |  | while (defined($field=shift(@fields))) { | 
| 764 |  |  |  |  |  |  |  | 
| 765 | 0 | 0 |  |  |  |  | if ($field eq 'keyword') { | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 766 |  |  |  |  |  |  | ##-- special handling for 'keyword' field | 
| 767 | 0 |  |  |  |  |  | $val = shift(@fields); | 
| 768 | 0 |  |  |  |  |  | while ($val =~ /\(.*?\S)\s*\<\/orth\>/g) { | 
| 769 | 0 |  |  |  |  |  | push(@{$hit->{orth_}}, $1); | 
|  | 0 |  |  |  |  |  |  | 
| 770 |  |  |  |  |  |  | } | 
| 771 |  |  |  |  |  |  | } | 
| 772 |  |  |  |  |  |  | elsif ($field eq 'indices') { | 
| 773 |  |  |  |  |  |  | ##-- special handling for 'indices' field | 
| 774 | 0 |  |  |  |  |  | $val = shift(@fields); | 
| 775 | 0 |  |  |  |  |  | $hit->{meta_}{indices_} = [split(' ',$val)]; | 
| 776 |  |  |  |  |  |  | } | 
| 777 |  |  |  |  |  |  | elsif ($field =~ /^\s*\ 
 | 
| 778 |  |  |  |  |  |  | ##-- special handling for context pseudo-field | 
| 779 | 0 | 0 |  |  |  |  | $hit->{ctx_} = $dc->parseTableContext($field) if ($dc->{parseContext}); | 
| 780 |  |  |  |  |  |  | } | 
| 781 |  |  |  |  |  |  | elsif ($dc->{parseMeta}) { | 
| 782 |  |  |  |  |  |  | ##-- normal bibliographic field | 
| 783 | 0 | 0 |  |  |  |  | $field .= '_' if ($field =~ /^(?:scan|orig|page|rank(?:_debug)?)$/); ##-- special handling for ddc-internal fields | 
| 784 | 0 |  |  |  |  |  | $val = shift(@fields); | 
| 785 | 0 |  |  |  |  |  | $hit->{meta_}{$field} = $val; | 
| 786 |  |  |  |  |  |  | } | 
| 787 |  |  |  |  |  |  | } | 
| 788 |  |  |  |  |  |  | } | 
| 789 |  |  |  |  |  |  | } | 
| 790 |  |  |  |  |  |  |  | 
| 791 | 0 | 0 |  |  |  |  | $hits->expandFields($dc->{fieldNames}) if ($dc->{expandFields}); | 
| 792 | 0 |  |  |  |  |  | return $hits; | 
| 793 |  |  |  |  |  |  | } | 
| 794 |  |  |  |  |  |  |  | 
| 795 |  |  |  |  |  |  |  | 
| 796 |  |  |  |  |  |  | ## \@context_data = $dc->parseTableContext($context_buf) | 
| 797 |  |  |  |  |  |  | sub parseTableContext { | 
| 798 | 0 |  |  | 0 | 0 |  | my ($dc,$ctx) = @_; | 
| 799 |  |  |  |  |  |  |  | 
| 800 |  |  |  |  |  |  | ##-- defaults | 
| 801 | 0 |  |  |  |  |  | my $fieldNames = $dc->{fieldNames}; | 
| 802 | 0 |  |  |  |  |  | my $fs         = qr(\Q$dc->{fieldSeparator}\E); | 
| 803 | 0 |  |  |  |  |  | my $ts         = qr(\Q$dc->{tokenSeparator}\E\ *); | 
| 804 | 0 |  |  |  |  |  | my $hl         = $dc->{tableHighlight}; | 
| 805 | 0 |  |  |  |  |  | my $hlw0       = qr(^(?:(?:\Q$hl->[0]\E)|(?:\Q$hl->[2]\E))); | 
| 806 | 0 |  |  |  |  |  | my $hlw1       = qr((?:(?:\Q$hl->[1]\E)|(?:\Q$hl->[3]\E))$); | 
| 807 |  |  |  |  |  |  |  | 
| 808 |  |  |  |  |  |  | ##-- split into sentences | 
| 809 | 0 |  |  |  |  |  | my $sents = [[],[],[]]; | 
| 810 | 0 |  |  |  |  |  | my ($sbuf,@s,$w); | 
| 811 |  |  |  |  |  |  |  | 
| 812 | 0 |  |  |  |  |  | foreach $sbuf (split(/\<\/s\>\s*/,$ctx)) { | 
| 813 |  |  |  |  |  |  |  | 
| 814 | 0 | 0 |  |  |  |  | if ($sbuf =~ /^\s* /) {  | 
| 815 |  |  |  |  |  |  | ##-- target sentence with index dump: parse it | 
| 816 | 0 |  |  |  |  |  | $sbuf =~ s|^\s*\ ]*)?\>\s*$ts||;  | 
| 817 | 0 |  |  |  |  |  | @s    = map {[0,split($fs,$_)]} split($ts,$sbuf); | 
|  | 0 |  |  |  |  |  |  | 
| 818 |  |  |  |  |  |  |  | 
| 819 |  |  |  |  |  |  | ##-- parse words | 
| 820 | 0 |  |  |  |  |  | foreach $w (@s) { | 
| 821 | 0 | 0 | 0 |  |  |  | if ($w->[1] =~ $hlw0 && $w->[$#$w] =~ $hlw1) { | 
| 822 |  |  |  |  |  |  | ##-- matched token | 
| 823 | 0 |  |  |  |  |  | $w->[1]    =~ s/$hlw0//; | 
| 824 | 0 |  |  |  |  |  | $w->[$#$w] =~ s/$hlw1//; | 
| 825 | 0 |  |  |  |  |  | $w->[0]    = 1; | 
| 826 |  |  |  |  |  |  | } | 
| 827 |  |  |  |  |  |  | } | 
| 828 | 0 |  |  |  |  |  | push(@{$sents->[1]}, @s); | 
|  | 0 |  |  |  |  |  |  | 
| 829 |  |  |  |  |  |  | } | 
| 830 |  |  |  |  |  |  | else { | 
| 831 |  |  |  |  |  |  | ##-- context sentence; surface strings only | 
| 832 | 0 |  |  |  |  |  | $sbuf =~ s|^\s*\ ]*)?\>$ts||;  | 
| 833 | 0 |  |  |  |  |  | @s = split($ts,$sbuf); | 
| 834 | 0 | 0 |  |  |  |  | if (!@{$sents->[1]}) { | 
|  | 0 |  |  |  |  |  |  | 
| 835 |  |  |  |  |  |  | ##-- left context | 
| 836 | 0 |  |  |  |  |  | push(@{$sents->[0]}, @s); | 
|  | 0 |  |  |  |  |  |  | 
| 837 |  |  |  |  |  |  | } else { | 
| 838 |  |  |  |  |  |  | ##-- right context | 
| 839 | 0 |  |  |  |  |  | push(@{$sents->[2]}, @s); | 
|  | 0 |  |  |  |  |  |  | 
| 840 |  |  |  |  |  |  | } | 
| 841 |  |  |  |  |  |  | } | 
| 842 |  |  |  |  |  |  | } | 
| 843 |  |  |  |  |  |  |  | 
| 844 | 0 |  |  |  |  |  | return $sents; | 
| 845 |  |  |  |  |  |  | } | 
| 846 |  |  |  |  |  |  |  | 
| 847 |  |  |  |  |  |  |  | 
| 848 |  |  |  |  |  |  | ##-------------------------------------------------------------- | 
| 849 |  |  |  |  |  |  | ## Hit Parsing: JSON | 
| 850 |  |  |  |  |  |  |  | 
| 851 |  |  |  |  |  |  | ## $obj = $dc->decodeJson($buf) | 
| 852 |  |  |  |  |  |  | sub decodeJson { | 
| 853 | 0 |  |  | 0 | 0 |  | my $dc = shift; | 
| 854 | 0 |  |  |  |  |  | my ($bufr) = \$_[0]; | 
| 855 | 0 | 0 | 0 |  |  |  | if ($dc->{encoding} && !utf8::is_utf8($$bufr)) { | 
| 856 | 0 |  |  |  |  |  | my $buf = decode($dc->{encoding},$$bufr); | 
| 857 | 0 |  |  |  |  |  | $bufr   = \$buf; | 
| 858 |  |  |  |  |  |  | } | 
| 859 |  |  |  |  |  |  |  | 
| 860 | 0 |  | 0 |  |  |  | my $module = $JSON_BACKEND // 'JSON'; | 
| 861 | 0 |  |  |  |  |  | $module =~ s{::}{/}g; | 
| 862 | 0 |  |  |  |  |  | require "$module.pm"; | 
| 863 |  |  |  |  |  |  |  | 
| 864 | 0 |  |  |  |  |  | my $jxs = $dc->{jxs}; | 
| 865 | 0 | 0 |  |  |  |  | $jxs    = $dc->{jxs} = $JSON_BACKEND->new->utf8(0)->relaxed(1)->canonical(0) if (!defined($jxs)); | 
| 866 | 0 |  |  |  |  |  | return $jxs->decode($$bufr); | 
| 867 |  |  |  |  |  |  | } | 
| 868 |  |  |  |  |  |  |  | 
| 869 |  |  |  |  |  |  | ## $hitList = $dc->parseJsonData($buf) | 
| 870 |  |  |  |  |  |  | ##  + returns a DDC::HitList | 
| 871 |  |  |  |  |  |  | sub parseJsonData { | 
| 872 | 0 |  |  | 0 | 1 |  | my $dc = shift; | 
| 873 | 0 |  |  |  |  |  | my $data = $dc->decodeJson($_[0]); | 
| 874 |  |  |  |  |  |  | my $hits = DDC::HitList->new(%$data, | 
| 875 |  |  |  |  |  |  | start=>$dc->{start}, | 
| 876 |  |  |  |  |  |  | limit=>$dc->{limit}, | 
| 877 | 0 |  |  |  |  |  | ); | 
| 878 |  |  |  |  |  |  |  | 
| 879 | 0 | 0 |  |  |  |  | $_ = bless($_,'DDC::Hit') foreach (@{$hits->{hits_}||[]}); | 
|  | 0 |  |  |  |  |  |  | 
| 880 | 0 | 0 |  |  |  |  | $hits->expandFields($dc->{fieldNames}) if ($dc->{expandFields}); | 
| 881 | 0 |  |  |  |  |  | return $hits; | 
| 882 |  |  |  |  |  |  | } | 
| 883 |  |  |  |  |  |  |  | 
| 884 |  |  |  |  |  |  | ##-------------------------------------------------------------- | 
| 885 |  |  |  |  |  |  | ## Hit Parsing: expand_terms() | 
| 886 |  |  |  |  |  |  |  | 
| 887 |  |  |  |  |  |  | ## \@terms = $dc->parseExpandTermsResponse($buf) | 
| 888 |  |  |  |  |  |  | ##  @terms = $dc->parseExpandTermsResponse($buf) | 
| 889 |  |  |  |  |  |  | sub parseExpandTermsResponse { | 
| 890 | 0 |  |  | 0 | 1 |  | my $dc    = shift; | 
| 891 | 0 | 0 |  |  |  |  | my @items = grep {defined($_) && $_ ne ''} split(/[\t\r\n]+/,$_[0]); | 
|  | 0 |  |  |  |  |  |  | 
| 892 | 0 | 0 |  |  |  |  | die("error in expand_terms response") if ($items[0] !~ /^0 /); | 
| 893 | 0 |  |  |  |  |  | shift(@items); | 
| 894 | 0 | 0 |  |  |  |  | return wantarray ? @items : \@items; | 
| 895 |  |  |  |  |  |  | } | 
| 896 |  |  |  |  |  |  |  | 
| 897 |  |  |  |  |  |  | 1; ##-- be happy | 
| 898 |  |  |  |  |  |  |  | 
| 899 |  |  |  |  |  |  | __END__ |