| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
## -*- Mode: CPerl -*- |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
## File: DDC::Client.pm |
|
4
|
|
|
|
|
|
|
## Author: Bryan Jurish |
|
5
|
|
|
|
|
|
|
## Description: |
|
6
|
|
|
|
|
|
|
## + DDC Query utilities: client sockets |
|
7
|
|
|
|
|
|
|
##====================================================================== |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
package DDC::Client; |
|
10
|
26
|
|
|
26
|
|
9670
|
use DDC::Utils qw(:escape); |
|
|
26
|
|
|
|
|
71
|
|
|
|
26
|
|
|
|
|
3991
|
|
|
11
|
26
|
|
|
26
|
|
177
|
use DDC::HitList; |
|
|
26
|
|
|
|
|
61
|
|
|
|
26
|
|
|
|
|
450
|
|
|
12
|
26
|
|
|
26
|
|
116
|
use DDC::Hit; |
|
|
26
|
|
|
|
|
44
|
|
|
|
26
|
|
|
|
|
388
|
|
|
13
|
26
|
|
|
26
|
|
14525
|
use IO::Handle; |
|
|
26
|
|
|
|
|
156364
|
|
|
|
26
|
|
|
|
|
1137
|
|
|
14
|
26
|
|
|
26
|
|
11648
|
use IO::File; |
|
|
26
|
|
|
|
|
47796
|
|
|
|
26
|
|
|
|
|
2679
|
|
|
15
|
26
|
|
|
26
|
|
12390
|
use IO::Socket::INET; |
|
|
26
|
|
|
|
|
364435
|
|
|
|
26
|
|
|
|
|
169
|
|
|
16
|
26
|
|
|
26
|
|
25300
|
use Encode qw(encode decode); |
|
|
26
|
|
|
|
|
257621
|
|
|
|
26
|
|
|
|
|
1908
|
|
|
17
|
26
|
|
|
26
|
|
196
|
use Carp; |
|
|
26
|
|
|
|
|
58
|
|
|
|
26
|
|
|
|
|
1267
|
|
|
18
|
26
|
|
|
26
|
|
151
|
use strict; |
|
|
26
|
|
|
|
|
46
|
|
|
|
26
|
|
|
|
|
1516
|
|
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
##====================================================================== |
|
21
|
|
|
|
|
|
|
## Globals |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
## $ifmt |
|
24
|
|
|
|
|
|
|
## + pack format to use for integer sizes passed to and from DDC |
|
25
|
|
|
|
|
|
|
## + default value should be right for ddc-2.x (always 32-bit unsigned little endian) |
|
26
|
|
|
|
|
|
|
## + for ddc-1.x, use machine word size and endian-ness of server |
|
27
|
|
|
|
|
|
|
our $ifmt = 'V'; |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
## $ilen |
|
30
|
|
|
|
|
|
|
## + length in bytes of message size integer used for DDC protocol in bytes |
|
31
|
|
|
|
|
|
|
## + default value should be right for ddc-2.x (always 32-bit unsigned little endian) |
|
32
|
|
|
|
|
|
|
## + for ddc-1.x, use machine word size and endian-ness of server |
|
33
|
|
|
|
|
|
|
our $ilen = 4; |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
## $JSON_BACKEND |
|
36
|
|
|
|
|
|
|
## + underlying JSON module (default='JSON') |
|
37
|
|
|
|
|
|
|
our ($JSON_BACKEND); |
|
38
|
|
|
|
|
|
|
BEGIN { |
|
39
|
26
|
50
|
|
26
|
|
156911
|
$JSON_BACKEND = 'JSON' if (!defined($JSON_BACKEND)); |
|
40
|
|
|
|
|
|
|
} |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
##====================================================================== |
|
43
|
|
|
|
|
|
|
## Constructors etc |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
## $dc = $CLASS_OR_OBJ->new(%args) |
|
46
|
|
|
|
|
|
|
## + %args: |
|
47
|
|
|
|
|
|
|
## ( |
|
48
|
|
|
|
|
|
|
## ##-- connection options |
|
49
|
|
|
|
|
|
|
## connect=>\%connectArgs, ##-- passed to IO::Socket::(INET|UNIX)->new(); also accepts connect=>$connectURL |
|
50
|
|
|
|
|
|
|
## mode =>$queryMode, ##-- one of 'table', 'html', 'text', 'json', or 'raw'; default='json' ('html' is not yet supported) |
|
51
|
|
|
|
|
|
|
## linger =>\@linger, ##-- SO_LINGER socket option; default=[1,0]: immediate termination |
|
52
|
|
|
|
|
|
|
## ## |
|
53
|
|
|
|
|
|
|
## ##-- query options (formerly only in DDC::Client::Distributed) |
|
54
|
|
|
|
|
|
|
## start =>$start, ##-- index of first hit to fetch (default=0) |
|
55
|
|
|
|
|
|
|
## limit =>$limit, ##-- maximum number of hits to fetch (default=10) |
|
56
|
|
|
|
|
|
|
## timeout =>$secs, ##-- query timeout in seconds (lower bound, default=60) |
|
57
|
|
|
|
|
|
|
## hint =>$hint, ##-- navigation hint (optional; default=undef: none) |
|
58
|
|
|
|
|
|
|
## ## |
|
59
|
|
|
|
|
|
|
## ##-- hit parsing options (mostly obsolete) |
|
60
|
|
|
|
|
|
|
## parseMeta=>$bool, ##-- if true, hit metadata will be parsed to %$hit (default=1) |
|
61
|
|
|
|
|
|
|
## parseContext=>$bool, ##-- if true, hit context data will be parsed to $hit->{ctx_} (default=1) |
|
62
|
|
|
|
|
|
|
## metaNames =>\@names, ##-- metadata field names (default=undef (none)) |
|
63
|
|
|
|
|
|
|
## expandFields => $bool, ##-- whether to implicitly expand hit fields to HASH-refs (default=true; only valid for 'table' mode) |
|
64
|
|
|
|
|
|
|
## keepRaw=>$bool, ##-- if false, raw context buffer will be deleted after parsing context data (default=false) |
|
65
|
|
|
|
|
|
|
## #defaultField => $name, ##-- default field names (default='w') |
|
66
|
|
|
|
|
|
|
## |
|
67
|
|
|
|
|
|
|
## fieldSeparator => $char, ##-- intra-token field separator (default="\x{1f}": ASCII unit separator) |
|
68
|
|
|
|
|
|
|
## tokenSeparator => $char, ##-- inter-token separator (default="\x{1e}": ASCII record separator) |
|
69
|
|
|
|
|
|
|
## |
|
70
|
|
|
|
|
|
|
## textHighlight => [$l0,$r0,$l1,$r1], ##-- highlighting strings, text mode (default=[qw(&& && _& &_)]) |
|
71
|
|
|
|
|
|
|
## htmlHighlight => [$l0,$r0,$l1,$r1], ##-- highlighting strings, html mode (default=[('','') x 2]) |
|
72
|
|
|
|
|
|
|
## tableHighlight => [$l0,$r0,$l1,$r1], ##-- highlighting strings, table mode (default=[qw(&& && _& &_)]) |
|
73
|
|
|
|
|
|
|
## ) |
|
74
|
|
|
|
|
|
|
## + default \%connectArgs: |
|
75
|
|
|
|
|
|
|
## Domain=>'INET', ##-- also accepts 'UNIX' |
|
76
|
|
|
|
|
|
|
## PeerAddr=>'localhost', |
|
77
|
|
|
|
|
|
|
## PeerPort=>50000, |
|
78
|
|
|
|
|
|
|
## Proto=>'tcp', |
|
79
|
|
|
|
|
|
|
## Type=>SOCK_STREAM, |
|
80
|
|
|
|
|
|
|
## Blocking=>1, |
|
81
|
|
|
|
|
|
|
## + URL specification of \%connectArgs via connect=>{url=>$url} or connect=>$url (see parseAddr() method): |
|
82
|
|
|
|
|
|
|
## inet://ADDR:PORT?OPT=VAL... # canonical INET socket URL |
|
83
|
|
|
|
|
|
|
## unix://UNIX_PATH?OPT=VAL... # canonical UNIX socket URL |
|
84
|
|
|
|
|
|
|
## unix:UNIX_PATH?OPT=VAL... # = unix://UNIX_PATH?OPT=val |
|
85
|
|
|
|
|
|
|
## ADDR?OPT=VAL... # = inet://ADDR:5000?OPT=VAL... |
|
86
|
|
|
|
|
|
|
## :PORT?OPT=VAL... # = inet://localhost:PORT?OPT=VAL... |
|
87
|
|
|
|
|
|
|
## ADDR:PORT?OPT=VAL... # = inet://ADDR:PORT?OPT=VAL... |
|
88
|
|
|
|
|
|
|
## /UNIX_PATH?OPT=VAL... # = unix:///UNIX_PATH?POT=VAL... |
|
89
|
|
|
|
|
|
|
sub new { |
|
90
|
0
|
|
|
0
|
1
|
|
my ($that,%args) = @_; |
|
91
|
0
|
|
|
|
|
|
my @connect_args = grep {exists $args{$_}} map {($_,lc($_),uc($_))} qw(Peer PeerAddr PeerPort Url); |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
my $connect = $that->parseAddr |
|
93
|
|
|
|
|
|
|
({ |
|
94
|
|
|
|
|
|
|
##-- connect: default options |
|
95
|
|
|
|
|
|
|
Domain=>'INET', |
|
96
|
|
|
|
|
|
|
PeerAddr=>'localhost', |
|
97
|
|
|
|
|
|
|
PeerPort=>50000, |
|
98
|
|
|
|
|
|
|
Proto=>'tcp', |
|
99
|
|
|
|
|
|
|
Type=>SOCK_STREAM, |
|
100
|
|
|
|
|
|
|
Blocking=>1, |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
##-- connect: user args |
|
103
|
|
|
|
|
|
|
(ref($args{'connect'}) |
|
104
|
0
|
|
|
|
|
|
? %{$args{'connect'}} |
|
105
|
|
|
|
|
|
|
: ($args{connect} |
|
106
|
0
|
|
|
|
|
|
? %{$that->parseAddr($args{connect})} |
|
107
|
|
|
|
|
|
|
: qw())), |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
##-- connect: top-level args |
|
110
|
0
|
0
|
|
|
|
|
(map {($_=>$args{$_})} @connect_args), |
|
|
0
|
0
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
}); |
|
112
|
0
|
|
|
|
|
|
delete @args{'connect',@connect_args}; |
|
113
|
|
|
|
|
|
|
|
|
114
|
0
|
|
0
|
|
|
|
my $dc =bless { |
|
115
|
|
|
|
|
|
|
##-- connection options |
|
116
|
|
|
|
|
|
|
connect=> $connect, |
|
117
|
|
|
|
|
|
|
linger => [1,0], |
|
118
|
|
|
|
|
|
|
mode =>'json', |
|
119
|
|
|
|
|
|
|
encoding => 'UTF-8', |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
##-- query options (formerly in DDC::Client::Distributed) |
|
122
|
|
|
|
|
|
|
start=>0, |
|
123
|
|
|
|
|
|
|
limit=>10, |
|
124
|
|
|
|
|
|
|
timeout=>60, |
|
125
|
|
|
|
|
|
|
hint=>undef, |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
##-- hit-parsing options |
|
128
|
|
|
|
|
|
|
parseMeta=>1, |
|
129
|
|
|
|
|
|
|
parseContext=>1, |
|
130
|
|
|
|
|
|
|
expandFields=>1, |
|
131
|
|
|
|
|
|
|
keepRaw=>0, |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
#fieldSeparator => "\x{1f}", |
|
134
|
|
|
|
|
|
|
#tokenSeparator => "\x{1e}", |
|
135
|
|
|
|
|
|
|
#defaultField => 'w', |
|
136
|
|
|
|
|
|
|
#metaNames => undef, |
|
137
|
|
|
|
|
|
|
#textHighlight=>undef, |
|
138
|
|
|
|
|
|
|
#tableHighlight=>undef, |
|
139
|
|
|
|
|
|
|
#htmlHighlight=>undef, |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
%args, |
|
142
|
|
|
|
|
|
|
}, ref($that)||$that; |
|
143
|
|
|
|
|
|
|
|
|
144
|
0
|
0
|
|
|
|
|
if (defined($args{optFile})) { |
|
145
|
|
|
|
|
|
|
$dc->loadOptFile($args{optFile}) |
|
146
|
0
|
0
|
|
|
|
|
or confess(__PACKAGE__ . "::new(): could not load options file '$args{optFile}': $!"); |
|
147
|
|
|
|
|
|
|
} |
|
148
|
|
|
|
|
|
|
|
|
149
|
0
|
0
|
|
|
|
|
$dc->{fieldSeparator} = "\x{1f}" if (!$dc->{fieldSeparator}); |
|
150
|
0
|
0
|
|
|
|
|
$dc->{tokenSeparator} = "\x{1e}" if (!$dc->{tokenSeparator}); |
|
151
|
0
|
0
|
|
|
|
|
$dc->{textHighlight} = [qw(&& && _& &_)] if (!$dc->{textHighlight}); |
|
152
|
0
|
0
|
|
|
|
|
$dc->{tableHighlight} = [qw(&& && _& &_)] if (!$dc->{tableHighlight}); |
|
153
|
|
|
|
|
|
|
$dc->{htmlHighlight} = [ |
|
154
|
|
|
|
|
|
|
'','', |
|
155
|
|
|
|
|
|
|
'','', |
|
156
|
0
|
0
|
|
|
|
|
] if (!$dc->{htmlHighlight}); |
|
157
|
|
|
|
|
|
|
|
|
158
|
0
|
|
|
|
|
|
return $dc; |
|
159
|
|
|
|
|
|
|
} |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
##====================================================================== |
|
162
|
|
|
|
|
|
|
## DDC *.opt file |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
## $dc = $dc->loadOptFile($filename, %opts); |
|
165
|
|
|
|
|
|
|
## $dc = $dc->loadOptFile($fh, %opts); |
|
166
|
|
|
|
|
|
|
## $dc = $dc->loadOptFile(\$str, %opts); |
|
167
|
|
|
|
|
|
|
## Sets client options from a DDC *.opt file: #fieldNames, metaNames, fieldSeparator. |
|
168
|
|
|
|
|
|
|
## %opts: |
|
169
|
|
|
|
|
|
|
## ( |
|
170
|
|
|
|
|
|
|
## clobber => $bool, ##-- whether to clobber existing %$dc fields (default=false) |
|
171
|
|
|
|
|
|
|
## ) |
|
172
|
|
|
|
|
|
|
## |
|
173
|
|
|
|
|
|
|
## WARNING: for whatever reason, DDC does not return metadata fields in the same |
|
174
|
|
|
|
|
|
|
## order in which they appeared in the *.opt file (nor in any lexicographic order |
|
175
|
|
|
|
|
|
|
## combination of the fields type, name, and xpath of the 'Bibl' directorive I |
|
176
|
|
|
|
|
|
|
## have tried), BUT this code assumes that the order in which the 'Bibl' directives |
|
177
|
|
|
|
|
|
|
## appear in the *.opt file are identical to the order in which DDC returns the |
|
178
|
|
|
|
|
|
|
## corresponding data in 'text' and 'html' modes. The actual order used by the |
|
179
|
|
|
|
|
|
|
## server should appear in the server logs. Change the *.opt file you pass to |
|
180
|
|
|
|
|
|
|
## this function accordingly. |
|
181
|
|
|
|
|
|
|
sub loadOptFile { |
|
182
|
0
|
|
|
0
|
0
|
|
my ($dc,$src,%opts) = @_; |
|
183
|
0
|
|
|
|
|
|
my ($fh); |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
##-- get source fh |
|
186
|
0
|
0
|
|
|
|
|
if (!ref($src)) { |
|
|
|
0
|
|
|
|
|
|
|
187
|
0
|
0
|
|
|
|
|
$fh = IO::File->new("<$src") |
|
188
|
|
|
|
|
|
|
or confess(__PACKAGE__ . "::loadOptFile(): open failed for '$src': $!"); |
|
189
|
0
|
0
|
|
|
|
|
binmode($fh,":encoding($dc->{encoding})") if ($dc->{encoding}); |
|
190
|
|
|
|
|
|
|
} |
|
191
|
|
|
|
|
|
|
elsif (ref($src) eq 'SCALAR') { |
|
192
|
0
|
|
|
|
|
|
$fh = IO::Handle->new; |
|
193
|
0
|
0
|
|
|
|
|
open($fh,'<',$src) |
|
194
|
|
|
|
|
|
|
or confess(__PACKAGE__ . "::loadOptFile(): open failed for buffer: $!"); |
|
195
|
0
|
0
|
|
|
|
|
binmode($fh,":encoding($dc->{encoding})") if ($dc->{encoding}); |
|
196
|
|
|
|
|
|
|
} |
|
197
|
|
|
|
|
|
|
else { |
|
198
|
0
|
|
|
|
|
|
$fh = $src; |
|
199
|
|
|
|
|
|
|
} |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
##-- parse file |
|
202
|
0
|
|
|
|
|
|
my $clobber = $opts{clobber}; |
|
203
|
0
|
|
|
|
|
|
my (@indices,@show,@meta,$showMeta); |
|
204
|
0
|
|
|
|
|
|
while (defined($_=<$fh>)) { |
|
205
|
0
|
|
|
|
|
|
chomp; |
|
206
|
0
|
0
|
|
|
|
|
if (/^Indices\s(.*)$/) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
207
|
0
|
|
|
|
|
|
@indices = map {s/^\s*\[//; s/\]\s*$//; [split(' ',$_)]} split(/\;\s*/,$1); |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
} |
|
209
|
|
|
|
|
|
|
elsif (/^Bibl\s+(\S+)\s+(\d)\s+(\S+)\s+(.*)$/) { |
|
210
|
0
|
|
|
|
|
|
my ($type,$visible,$name,$xpath) = ($1,$2,$3,$4); |
|
211
|
0
|
0
|
|
|
|
|
push(@meta,[$type,$visible,$name,$xpath]) if ($visible+0); |
|
212
|
|
|
|
|
|
|
} |
|
213
|
|
|
|
|
|
|
elsif (/^IndicesToShow\s+(.*)$/) { |
|
214
|
0
|
|
|
|
|
|
@show = map {$_-1} split(' ',$1); |
|
|
0
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
} |
|
216
|
|
|
|
|
|
|
elsif (/^OutputBibliographyOfHits\b/) { |
|
217
|
0
|
|
|
|
|
|
$showMeta = 1; |
|
218
|
|
|
|
|
|
|
} |
|
219
|
|
|
|
|
|
|
elsif (/^InterpDelim[ie]ter\s(.*)$/) { |
|
220
|
0
|
0
|
0
|
|
|
|
$dc->{fieldSeparator} = unescape($1) if ($clobber || !$dc->{fieldSeparator}); |
|
221
|
|
|
|
|
|
|
} |
|
222
|
|
|
|
|
|
|
elsif (/^TokenDelim[ie]ter\s(.*)$/) { |
|
223
|
0
|
0
|
0
|
|
|
|
$dc->{tokenSeparator} = unescape($1) if ($clobber || !$dc->{tokenSeparator}); |
|
224
|
|
|
|
|
|
|
} |
|
225
|
|
|
|
|
|
|
elsif (/^Utf8\s*$/) { |
|
226
|
0
|
0
|
0
|
|
|
|
$dc->{encoding} = 'utf8' if ($clobber || !$dc->{encoding}); |
|
227
|
|
|
|
|
|
|
} |
|
228
|
|
|
|
|
|
|
elsif (/^HtmlHighlighting\s*(.*)$/) { |
|
229
|
0
|
0
|
0
|
|
|
|
$dc->{htmlHighlight} = [map {unescape($1)} split(/\s*\;\s*/,$1,4)] if ($clobber || !$dc->{htmlHighlight}); |
|
|
0
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
} |
|
231
|
|
|
|
|
|
|
elsif (/^TextHighlighting\s*(.*)$/) { |
|
232
|
0
|
0
|
0
|
|
|
|
$dc->{textHighlight} = [map {unescape($1)} split(/\s*\;\s*/,$1,4)] if ($clobber || !$dc->{textHighlight}); |
|
|
0
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
} |
|
234
|
|
|
|
|
|
|
elsif (/^TableHighlighting\s*(.*)$/) { |
|
235
|
0
|
0
|
0
|
|
|
|
$dc->{tableHighlight} = [map {unescape($_)} split(/\s*\;\s*/,$1,4)] if ($clobber || !$dc->{tableHighlight}); |
|
|
0
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
} |
|
237
|
|
|
|
|
|
|
} |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
##-- setup local options |
|
240
|
0
|
0
|
|
|
|
|
@show = (0) if (!@show); |
|
241
|
0
|
0
|
0
|
|
|
|
$dc->{fieldNames} = [map {$_->[1]} @indices[@show]] if ($clobber || !$dc->{fieldNames}); |
|
|
0
|
|
|
|
|
|
|
|
242
|
0
|
0
|
|
|
|
|
if (!$dc->{metaNames}) { |
|
243
|
0
|
0
|
|
|
|
|
if (!$showMeta) { |
|
|
|
0
|
|
|
|
|
|
|
244
|
0
|
|
|
|
|
|
$dc->{metaNames} = ['file_']; |
|
245
|
|
|
|
|
|
|
} |
|
246
|
|
|
|
|
|
|
elsif (@meta) { |
|
247
|
0
|
0
|
0
|
|
|
|
$dc->{metaNames} = [map {$_->[2]} @meta] if (@meta && ($clobber || !$dc->{metaNames})); |
|
|
0
|
|
0
|
|
|
|
|
|
248
|
|
|
|
|
|
|
} |
|
249
|
|
|
|
|
|
|
} |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
##-- cleanup |
|
252
|
0
|
0
|
0
|
|
|
|
$fh->close if (!ref($src) || ref($src) eq 'SCALAR'); |
|
253
|
0
|
|
|
|
|
|
return $dc; |
|
254
|
|
|
|
|
|
|
} |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
##====================================================================== |
|
257
|
|
|
|
|
|
|
## Query requests (formerly in DDC::Client::Distributed) |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
## $buf = $dc->queryRaw($query_string) |
|
260
|
|
|
|
|
|
|
## $buf = $dc->queryRaw(\@raw_strings) |
|
261
|
|
|
|
|
|
|
sub queryRaw { |
|
262
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
|
263
|
0
|
|
|
|
|
|
my $buf = $dc->queryRawNC(@_); |
|
264
|
0
|
|
|
|
|
|
$dc->close(); ##-- this apparently has to happen: bummer |
|
265
|
0
|
|
|
|
|
|
return $buf; |
|
266
|
|
|
|
|
|
|
} |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
## $buf = $dc->queryRawNC($query_string) |
|
269
|
|
|
|
|
|
|
## $buf = $dc->queryRawNC(\@raw_strings) |
|
270
|
|
|
|
|
|
|
## + guts for queryRaw() without implicit close() |
|
271
|
|
|
|
|
|
|
sub queryRawNC { |
|
272
|
0
|
|
|
0
|
1
|
|
my ($dc,$query) = @_; |
|
273
|
0
|
0
|
|
|
|
|
if (UNIVERSAL::isa($query,'ARRAY')) { |
|
|
|
0
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
##-- raw array: send raw data to DDC |
|
275
|
0
|
|
|
|
|
|
$dc->send(join("\001",@$query)); |
|
276
|
|
|
|
|
|
|
} |
|
277
|
|
|
|
|
|
|
elsif ($dc->{mode} =~ /^(?:raw|req)/i) { |
|
278
|
|
|
|
|
|
|
##-- "raw" or "request" mode: send raw request to DDC |
|
279
|
0
|
|
|
|
|
|
$dc->send($query); |
|
280
|
|
|
|
|
|
|
} |
|
281
|
|
|
|
|
|
|
else { |
|
282
|
|
|
|
|
|
|
##-- query string: send 'run-query Distributed' |
|
283
|
|
|
|
|
|
|
$dc->send(join("\001", |
|
284
|
|
|
|
|
|
|
"run_query Distributed", |
|
285
|
|
|
|
|
|
|
$query, |
|
286
|
|
|
|
|
|
|
$dc->{mode}, |
|
287
|
0
|
0
|
|
|
|
|
join(' ', @$dc{qw(start limit timeout)}, ($dc->{hint} ? $dc->{hint} : qw())))); |
|
288
|
|
|
|
|
|
|
} |
|
289
|
|
|
|
|
|
|
##-- get output buffer |
|
290
|
0
|
|
|
|
|
|
return $dc->readData(); |
|
291
|
|
|
|
|
|
|
} |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
## @bufs = $dc->queryMulti($queryString1, $queryString2, ...) |
|
294
|
|
|
|
|
|
|
## @bufs = $dc->queryMulti(\@queryStrings1, \@queryStrings2, ...) |
|
295
|
|
|
|
|
|
|
sub queryMulti { |
|
296
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
|
297
|
0
|
|
|
|
|
|
my @bufs = map {$dc->queryRawNC($_)} @_; |
|
|
0
|
|
|
|
|
|
|
|
298
|
0
|
|
|
|
|
|
$dc->close(); ##-- this apparently has to happen: bummer |
|
299
|
0
|
|
|
|
|
|
return @bufs; |
|
300
|
|
|
|
|
|
|
} |
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
## $obj = $dc->queryJson($query_string) |
|
303
|
|
|
|
|
|
|
## $obj = $dc->queryJson(\@raw_strings) |
|
304
|
|
|
|
|
|
|
sub queryJson { |
|
305
|
0
|
|
|
0
|
0
|
|
my ($dc,$query) = @_; |
|
306
|
0
|
|
|
|
|
|
return $dc->decodeJson($dc->queryRaw($query)); |
|
307
|
|
|
|
|
|
|
} |
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
## $hits = $dc->query($query_string) |
|
310
|
|
|
|
|
|
|
sub query { |
|
311
|
0
|
|
|
0
|
1
|
|
my ($dc,$query) = @_; |
|
312
|
0
|
|
|
|
|
|
return $dc->parseData($dc->queryRaw($query)); |
|
313
|
|
|
|
|
|
|
} |
|
314
|
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
##====================================================================== |
|
317
|
|
|
|
|
|
|
## Common Requests |
|
318
|
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
## $rsp = $dc->request($request_string) |
|
320
|
|
|
|
|
|
|
sub request { |
|
321
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
|
322
|
0
|
|
|
|
|
|
my $buf = $dc->requestNC(@_); |
|
323
|
0
|
|
|
|
|
|
$dc->close(); |
|
324
|
0
|
|
|
|
|
|
return $buf; |
|
325
|
|
|
|
|
|
|
} |
|
326
|
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
## $rsp = $dc->requestNC($request_string) |
|
328
|
|
|
|
|
|
|
## + guts for request() which doesn't implicitly call close() |
|
329
|
|
|
|
|
|
|
sub requestNC { |
|
330
|
0
|
|
|
0
|
0
|
|
my $dc = shift; |
|
331
|
0
|
|
|
|
|
|
$dc->send($_[0]); |
|
332
|
0
|
|
|
|
|
|
return $dc->readData(); |
|
333
|
|
|
|
|
|
|
} |
|
334
|
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
## $data = $dc->requestJson($request_string) |
|
336
|
|
|
|
|
|
|
sub requestJson { |
|
337
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
|
338
|
0
|
|
|
|
|
|
return $dc->decodeJson($dc->request($_[0])); |
|
339
|
|
|
|
|
|
|
} |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
## $server_version = $dc->version() |
|
342
|
|
|
|
|
|
|
sub version { |
|
343
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
|
344
|
0
|
|
|
|
|
|
return $dc->request("version"); |
|
345
|
|
|
|
|
|
|
} |
|
346
|
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
## $status = $dc->status() |
|
348
|
|
|
|
|
|
|
## $status = $dc->status($timeout) |
|
349
|
|
|
|
|
|
|
sub status { |
|
350
|
0
|
|
|
0
|
1
|
|
my ($dc,$timeout) = @_; |
|
351
|
0
|
0
|
|
|
|
|
$timeout = $dc->{timeout} if (!defined($timeout)); |
|
352
|
0
|
0
|
|
|
|
|
return $dc->requestJson("status".(defined($timeout) ? " $timeout" : '')); |
|
353
|
|
|
|
|
|
|
} |
|
354
|
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
## $vstatus = $dc->vstatus() |
|
356
|
|
|
|
|
|
|
## $vstatus = $dc->vstatus($timeout) |
|
357
|
|
|
|
|
|
|
sub vstatus { |
|
358
|
0
|
|
|
0
|
1
|
|
my ($dc,$timeout) = @_; |
|
359
|
0
|
0
|
|
|
|
|
$timeout = $dc->{timeout} if (!defined($timeout)); |
|
360
|
0
|
0
|
|
|
|
|
return $dc->requestJson("vstatus".(defined($timeout) ? " $timeout" : '')); |
|
361
|
|
|
|
|
|
|
} |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
## $info = $dc->info() |
|
364
|
|
|
|
|
|
|
## $info = $dc->info($timeout) |
|
365
|
|
|
|
|
|
|
sub info { |
|
366
|
0
|
|
|
0
|
1
|
|
my ($dc,$timeout) = @_; |
|
367
|
0
|
0
|
|
|
|
|
$timeout = $dc->{timeout} if (!defined($timeout)); |
|
368
|
0
|
0
|
|
|
|
|
return $dc->requestJson("info".(defined($timeout) ? " $timeout" : '')); |
|
369
|
|
|
|
|
|
|
} |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
## $expandRaw = $dc->expand_terms($pipeline, $term) |
|
372
|
|
|
|
|
|
|
## $expandRaw = $dc->expand_terms($pipeline, $term, $timeout) |
|
373
|
|
|
|
|
|
|
## $expandRaw = $dc->expand_terms($pipeline, $term, $timeout, $subcorpus) |
|
374
|
|
|
|
|
|
|
## $expandRaw = $dc->expand_terms(\@pipeline, \@terms) |
|
375
|
|
|
|
|
|
|
## $expandRaw = $dc->expand_terms(\@pipeline, \@terms, $timeout) |
|
376
|
|
|
|
|
|
|
## $expandRaw = $dc->expand_terms(\@pipeline, \@terms, $timeout, $subcorpus) |
|
377
|
|
|
|
|
|
|
sub expand_terms { |
|
378
|
0
|
|
|
0
|
1
|
|
my ($dc,$chain,$terms,$timeout,$subcorpus) = @_; |
|
379
|
0
|
0
|
|
|
|
|
$chain = join('|', @$chain) if (UNIVERSAL::isa($chain,'ARRAY')); |
|
380
|
0
|
0
|
|
|
|
|
$terms = join("\t", @$terms) if (UNIVERSAL::isa($terms,'ARRAY')); |
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
##-- hack: detect swapping of $timeout and $subcorpus (old DDC::Client::Distributed-style) |
|
383
|
0
|
0
|
|
|
|
|
$timeout = '' if (!defined($timeout)); |
|
384
|
0
|
0
|
|
|
|
|
$subcorpus = '' if (!defined($subcorpus)); |
|
385
|
0
|
0
|
0
|
|
|
|
($timeout,$subcorpus) = ($subcorpus,$timeout) |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
386
|
|
|
|
|
|
|
if ($timeout ne '' && $subcorpus ne '' && $timeout =~ /[0-9]/ && $subcorpus !~ /[0-9]/); |
|
387
|
|
|
|
|
|
|
|
|
388
|
0
|
0
|
|
|
|
|
$timeout = $dc->{timeout} if ($timeout eq ''); |
|
389
|
0
|
0
|
0
|
|
|
|
$timeout = 5 if (!defined($timeout) || $timeout eq ''); |
|
390
|
0
|
|
|
|
|
|
$dc->send(join("\x01", 'expand_terms ', $chain, $terms, $timeout, $subcorpus)); |
|
391
|
|
|
|
|
|
|
##-- get output buffer |
|
392
|
0
|
|
|
|
|
|
my $buf = $dc->readData(); |
|
393
|
0
|
|
|
|
|
|
$dc->close(); ##-- this apparently has to happen: bummer |
|
394
|
0
|
|
|
|
|
|
return $buf; |
|
395
|
|
|
|
|
|
|
} |
|
396
|
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
## \@terms = $dc->expand($pipeline, $term) |
|
398
|
|
|
|
|
|
|
## \@terms = $dc->expand($pipeline, $term, $timeout) |
|
399
|
|
|
|
|
|
|
## \@terms = $dc->expand($pipeline, $term, $timeout, $subcorpus) |
|
400
|
|
|
|
|
|
|
## \@terms = $dc->expand(\@pipeline, \@terms) |
|
401
|
|
|
|
|
|
|
## \@terms = $dc->expand(\@pipeline, \@terms, $timeout) |
|
402
|
|
|
|
|
|
|
## \@terms = $dc->expand(\@pipeline, \@terms, $timeout, $subcorpus) |
|
403
|
|
|
|
|
|
|
sub expand { |
|
404
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
|
405
|
0
|
|
|
|
|
|
return $dc->parseExpandTermsResponse($dc->expand_terms(@_)); |
|
406
|
|
|
|
|
|
|
} |
|
407
|
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
## $buf = $dc->get_first_hits($query) |
|
409
|
|
|
|
|
|
|
## $buf = $dc->get_first_hits($query,$timeout?,$limit?,$hint?) |
|
410
|
|
|
|
|
|
|
sub get_first_hits { |
|
411
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
|
412
|
0
|
|
|
|
|
|
my $query = shift; |
|
413
|
0
|
0
|
|
|
|
|
my $timeout = @_ ? shift : $dc->{timeout}; |
|
414
|
0
|
0
|
|
|
|
|
my $limit = @_ ? shift : $dc->{limit}; |
|
415
|
0
|
0
|
|
|
|
|
my $hint = @_ ? shift : $dc->{hint}; |
|
416
|
0
|
0
|
|
|
|
|
return $dc->request("get_first_hits $query\x{01}$timeout $limit".($hint ? " $hint" : '')); |
|
417
|
|
|
|
|
|
|
} |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
## $buf = $dc->get_hit_strings($format?,$start?,$limit?) |
|
420
|
|
|
|
|
|
|
sub get_hit_strings { |
|
421
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
|
422
|
0
|
0
|
|
|
|
|
my $format = @_ ? shift : ($dc->{mode} eq 'raw' ? 'json' : ''); |
|
|
|
0
|
|
|
|
|
|
|
423
|
0
|
0
|
|
|
|
|
my $start = @_ ? shift : $dc->{start}; |
|
424
|
0
|
0
|
|
|
|
|
my $limit = @_ ? shift : $dc->{limit}; |
|
425
|
0
|
|
|
|
|
|
return $dc->request("get_hit_strings $format\x{01}$start $limit"); |
|
426
|
|
|
|
|
|
|
} |
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
## $buf = $dc->run_query($corpus,$query,$format?,$start?,$limit?,$timeout?,$hint?) |
|
430
|
|
|
|
|
|
|
sub run_query { |
|
431
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
|
432
|
0
|
|
|
|
|
|
my $corpus = shift; |
|
433
|
0
|
|
|
|
|
|
my $query = shift; |
|
434
|
0
|
0
|
|
|
|
|
my $format = @_ ? shift : $dc->{mode}; |
|
435
|
0
|
0
|
|
|
|
|
my $start = @_ ? shift : $dc->{start}; |
|
436
|
0
|
0
|
|
|
|
|
my $limit = @_ ? shift : $dc->{limit}; |
|
437
|
0
|
0
|
|
|
|
|
my $timeout = @_ ? shift : $dc->{timeout}; |
|
438
|
0
|
0
|
|
|
|
|
my $hint = @_ ? shift : $dc->{hint}; |
|
439
|
0
|
0
|
|
|
|
|
$corpus = 'Distributed' if (!defined($corpus)); |
|
440
|
0
|
0
|
|
|
|
|
return $dc->request("run_query $corpus\x{01}$query\x{01}$format\x{01}$start $limit $timeout".($hint ? " $hint" : '')); |
|
441
|
|
|
|
|
|
|
} |
|
442
|
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
##====================================================================== |
|
444
|
|
|
|
|
|
|
## Low-level communications |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
## \%connect = $dc->parseAddr() |
|
447
|
|
|
|
|
|
|
## \%connect = $CLASS_OR_OBJECT->parseAddr(\%connect, $PEER_OR_LOCAL='peer', %options) |
|
448
|
|
|
|
|
|
|
## \%connect = $CLASS_OR_OBJECT->parserAddr({url=>$url}, $PEER_OR_LOCAL='peer', %options) |
|
449
|
|
|
|
|
|
|
## + parses connect URLs to option-hashes suitable for use as $dc->{connect} |
|
450
|
|
|
|
|
|
|
## + supported URLs formats: |
|
451
|
|
|
|
|
|
|
## inet://ADDR:PORT?OPT=VAL... # canonical INET socket URL |
|
452
|
|
|
|
|
|
|
## unix://UNIX_PATH?OPT=VAL... # canonical UNIX socket URL |
|
453
|
|
|
|
|
|
|
## unix:UNIX_PATH?OPT=VAL... # = unix://UNIX_PATH?OPT=val |
|
454
|
|
|
|
|
|
|
## ADDR?OPT=VAL... # = inet://ADDR:5000?OPT=VAL... |
|
455
|
|
|
|
|
|
|
## :PORT?OPT=VAL... # = inet://localhost:PORT?OPT=VAL... |
|
456
|
|
|
|
|
|
|
## ADDR:PORT?OPT=VAL... # = inet://ADDR:PORT?OPT=VAL... |
|
457
|
|
|
|
|
|
|
## /UNIX_PATH?OPT=VAL... # = unix:///UNIX_PATH?POT=VAL... |
|
458
|
|
|
|
|
|
|
sub parseAddr { |
|
459
|
0
|
|
|
0
|
1
|
|
my ($that,$connect,$prefix,%opts) = @_; |
|
460
|
0
|
|
|
|
|
|
my ($override); |
|
461
|
0
|
0
|
0
|
|
|
|
if (!$connect && ref($that)) { |
|
462
|
0
|
|
|
|
|
|
$connect = $that->{connect}; |
|
463
|
0
|
|
|
|
|
|
$override = 1; |
|
464
|
|
|
|
|
|
|
} |
|
465
|
0
|
|
0
|
|
|
|
$connect //= 'inet://localhost:50000'; |
|
466
|
0
|
0
|
|
|
|
|
$connect = {url=>$connect} if (!UNIVERSAL::isa($connect,'HASH')); |
|
467
|
|
|
|
|
|
|
|
|
468
|
0
|
|
0
|
|
|
|
$prefix ||= 'Peer'; |
|
469
|
0
|
|
|
|
|
|
$prefix = ucfirst($prefix); |
|
470
|
0
|
|
0
|
|
|
|
my $url = $connect->{URL} || $connect->{Url} || $connect->{url}; |
|
471
|
0
|
0
|
|
|
|
|
if (defined($url)) { |
|
472
|
0
|
|
|
|
|
|
my ($base,$opts) = split(/\?/,$url,2); |
|
473
|
0
|
0
|
|
|
|
|
my $scheme = ($base =~ s{^([\w\+\-]+):(?://)?}{} ? $1 : ''); |
|
474
|
0
|
0
|
0
|
|
|
|
if (lc($scheme) eq 'unix' || (!$scheme && $base =~ m{^/})) { |
|
|
|
0
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
475
|
0
|
|
|
|
|
|
$connect->{Domain} = 'UNIX'; |
|
476
|
0
|
|
|
|
|
|
$connect->{$prefix} = $base; |
|
477
|
|
|
|
|
|
|
} |
|
478
|
0
|
|
|
|
|
|
elsif (!$scheme || grep {$_ eq lc($scheme)} qw(inet tcp)) { |
|
479
|
0
|
|
|
|
|
|
$connect->{Domain} = 'INET'; |
|
480
|
0
|
|
|
|
|
|
my ($host,$port) = split(':',$base,2); |
|
481
|
0
|
|
0
|
|
|
|
$host ||= 'localhost'; |
|
482
|
0
|
|
0
|
|
|
|
$port ||= 50000; |
|
483
|
0
|
|
|
|
|
|
@$connect{"${prefix}Addr","${prefix}Port"} = ($host,$port); |
|
484
|
|
|
|
|
|
|
} |
|
485
|
|
|
|
|
|
|
else { |
|
486
|
0
|
|
|
|
|
|
die(__PACKAGE__, "::parseAddr(): unsupported scheme '$scheme' for URL $url"); |
|
487
|
|
|
|
|
|
|
} |
|
488
|
0
|
|
0
|
|
|
|
my %urlopts = map {split(/=/,$_,2)} grep {$_} split(/[\&\;]/,($opts//'')); |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
489
|
0
|
|
|
|
|
|
@$connect{keys %urlopts} = values %urlopts; |
|
490
|
|
|
|
|
|
|
} |
|
491
|
0
|
|
|
|
|
|
@$connect{keys %opts} = values %opts; |
|
492
|
|
|
|
|
|
|
|
|
493
|
0
|
0
|
|
|
|
|
$that->{connect} = $connect if ($override); |
|
494
|
0
|
|
|
|
|
|
return $connect; |
|
495
|
|
|
|
|
|
|
} |
|
496
|
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
## $str = $dc->addrStr() |
|
498
|
|
|
|
|
|
|
## $str = $CLASS_OR_OBJECT->addrStr(\%connect,$PEER_OR_LOCAL) |
|
499
|
|
|
|
|
|
|
## $str = $CLASS_OR_OBJECT->addrStr($url,$PEER_OR_LOCAL) |
|
500
|
|
|
|
|
|
|
## $str = $CLASS_OR_OBJECT->addrStr($sock,$PEER_OR_LOCAL) |
|
501
|
|
|
|
|
|
|
sub addrStr { |
|
502
|
0
|
|
|
0
|
1
|
|
my ($that,$addr,$prefix) = @_; |
|
503
|
0
|
0
|
0
|
|
|
|
$addr = ($that->{sock} || $that->{connect}) if (ref($that) && !defined($addr)); |
|
|
|
|
0
|
|
|
|
|
|
504
|
0
|
|
0
|
|
|
|
$prefix ||= 'Peer'; |
|
505
|
0
|
|
|
|
|
|
$prefix = ucfirst($prefix); |
|
506
|
|
|
|
|
|
|
|
|
507
|
0
|
0
|
|
|
|
|
if (UNIVERSAL::isa($addr,'IO::Socket::UNIX')) { |
|
|
|
0
|
|
|
|
|
|
|
508
|
0
|
|
|
|
|
|
return "unix://$addr->{$prefix}"; |
|
509
|
|
|
|
|
|
|
} |
|
510
|
|
|
|
|
|
|
elsif (UNIVERSAL::isa($addr,'IO::Socket::INET')) { |
|
511
|
0
|
0
|
|
|
|
|
my $mprefix = (lc($prefix) eq 'peer' ? 'peer' : 'sock'); |
|
512
|
0
|
|
|
|
|
|
return "inet://".$addr->can($mprefix."host")->($addr).":".$addr->can($mprefix."port")->($addr); |
|
513
|
|
|
|
|
|
|
} |
|
514
|
0
|
0
|
|
|
|
|
$addr = $addr->{connect} if (UNIVERSAL::isa($addr,'DDC::Client')); |
|
515
|
0
|
0
|
|
|
|
|
$addr = $that->parseAddr($addr,$prefix) if (!ref($addr)); |
|
516
|
0
|
|
|
|
|
|
my ($url); |
|
517
|
|
|
|
|
|
|
#my %uopts = %$addr; |
|
518
|
0
|
0
|
|
|
|
|
if ($addr->{Domain} eq 'UNIX') { |
|
519
|
0
|
|
|
|
|
|
$url = "unix://$addr->{$prefix}"; |
|
520
|
|
|
|
|
|
|
#delete $uopts{$prefix}; |
|
521
|
|
|
|
|
|
|
} |
|
522
|
|
|
|
|
|
|
else { |
|
523
|
|
|
|
|
|
|
$url = "inet://".($addr->{"${prefix}Addr"} && $addr->{"${prefix}Port"} |
|
524
|
|
|
|
|
|
|
? ($addr->{"${prefix}Addr"}.":".$addr->{"${prefix}Port"}) |
|
525
|
0
|
0
|
0
|
|
|
|
: $addr->{"${prefix}Addr"}); |
|
526
|
|
|
|
|
|
|
#delete @uopts{"${prefix}Addr","${prefix}Port"}; |
|
527
|
|
|
|
|
|
|
} |
|
528
|
|
|
|
|
|
|
#delete $opts{Domain}; |
|
529
|
|
|
|
|
|
|
#if (%uopts) { |
|
530
|
|
|
|
|
|
|
# $url .= '?'.join('&',map {("$_=$uopts{$_}")} sort keys %uopts); |
|
531
|
|
|
|
|
|
|
#} |
|
532
|
0
|
|
|
|
|
|
return $url; |
|
533
|
|
|
|
|
|
|
} |
|
534
|
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
## $io_socket = $dc->open() |
|
536
|
|
|
|
|
|
|
sub open { |
|
537
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
|
538
|
0
|
|
|
|
|
|
$dc->parseAddr(); |
|
539
|
0
|
|
0
|
|
|
|
my $domain = $dc->{connect}{Domain} // 'INET'; |
|
540
|
0
|
0
|
|
|
|
|
if (lc($domain) eq 'unix') { |
|
541
|
|
|
|
|
|
|
##-- v0.43: use unix-domain socket connection |
|
542
|
0
|
|
|
|
|
|
$dc->{sock} = IO::Socket::UNIX->new(%{$dc->{'connect'}}); |
|
|
0
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
} else { |
|
544
|
|
|
|
|
|
|
##-- compatibility hack: use INET-domain sockets (TCP) |
|
545
|
0
|
|
|
|
|
|
$dc->{sock} = IO::Socket::INET->new(%{$dc->{'connect'}}); |
|
|
0
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
} |
|
547
|
0
|
0
|
|
|
|
|
return undef if (!$dc->{sock}); |
|
548
|
0
|
0
|
|
|
|
|
$dc->{sock}->setsockopt(SOL_SOCKET, SO_LINGER, pack('II',@{$dc->{linger}})) if ($dc->{linger}); |
|
|
0
|
|
|
|
|
|
|
|
549
|
0
|
|
|
|
|
|
$dc->{sock}->autoflush(1); |
|
550
|
0
|
|
|
|
|
|
return $dc->{sock}; |
|
551
|
|
|
|
|
|
|
} |
|
552
|
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
## undef = $dc->close() |
|
554
|
|
|
|
|
|
|
sub close { |
|
555
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
|
556
|
0
|
0
|
|
|
|
|
$dc->{sock}->close() if (defined($dc->{sock})); |
|
557
|
0
|
|
|
|
|
|
delete($dc->{sock}); |
|
558
|
|
|
|
|
|
|
} |
|
559
|
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
## $encoded = $dc->ddc_encode(@message_strings) |
|
561
|
|
|
|
|
|
|
sub ddc_encode { |
|
562
|
0
|
|
|
0
|
0
|
|
my $dc = shift; |
|
563
|
0
|
|
|
|
|
|
my $msg = join('',@_); |
|
564
|
0
|
0
|
0
|
|
|
|
$msg = encode($dc->{encoding},$msg) if ($dc->{encoding} && utf8::is_utf8($msg)); |
|
565
|
0
|
|
|
|
|
|
return pack($ifmt,length($msg)) . $msg; |
|
566
|
|
|
|
|
|
|
} |
|
567
|
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
## $decoded = $dc->ddc_decode($response_buf) |
|
569
|
|
|
|
|
|
|
sub ddc_decode { |
|
570
|
0
|
|
|
0
|
0
|
|
my $dc = shift; |
|
571
|
0
|
|
|
|
|
|
my $buf = unpack("$ifmt/a*",$_[0]); |
|
572
|
0
|
0
|
|
|
|
|
$buf = decode($dc->{encoding},$buf) if ($dc->{encoding}); |
|
573
|
0
|
|
|
|
|
|
return $buf; |
|
574
|
|
|
|
|
|
|
} |
|
575
|
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
## undef = $dc->send(@message_strings) |
|
577
|
|
|
|
|
|
|
## + sends @message_strings |
|
578
|
|
|
|
|
|
|
sub send { |
|
579
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
|
580
|
0
|
0
|
|
|
|
|
$dc->open() if (!defined($dc->{sock})); |
|
581
|
0
|
|
|
|
|
|
return $dc->sendfh($dc->{sock}, @_); |
|
582
|
|
|
|
|
|
|
} |
|
583
|
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
## undef = $dc->sendfh($fh,@message_strings) |
|
585
|
|
|
|
|
|
|
## + sends @message_strings to $fh, prepending total length |
|
586
|
|
|
|
|
|
|
sub sendfh { |
|
587
|
0
|
|
|
0
|
1
|
|
my ($dc,$fh) = (shift,shift); |
|
588
|
0
|
|
|
|
|
|
$fh->print( $dc->ddc_encode(@_) ); |
|
589
|
|
|
|
|
|
|
} |
|
590
|
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
## $size = $dc->readSize() |
|
592
|
|
|
|
|
|
|
## $size = $dc->readSize($fh) |
|
593
|
|
|
|
|
|
|
sub readSize { |
|
594
|
0
|
|
|
0
|
1
|
|
my ($dc,$fh) = @_; |
|
595
|
0
|
|
|
|
|
|
my ($size_packed); |
|
596
|
0
|
0
|
|
|
|
|
$fh = $dc->{sock} if (!$fh); |
|
597
|
0
|
0
|
0
|
|
|
|
confess(ref($dc), "::readSize(): could not read size from socket: $!") |
|
598
|
|
|
|
|
|
|
if (($fh->read($size_packed,$ilen)||0) != $ilen); |
|
599
|
0
|
0
|
|
|
|
|
return 0 if (!defined($size_packed)); |
|
600
|
0
|
|
|
|
|
|
return unpack($ifmt,$size_packed); |
|
601
|
|
|
|
|
|
|
} |
|
602
|
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
## $data = $dc->readBytes($nbytes) |
|
604
|
|
|
|
|
|
|
## $data = $dc->readBytes($nbytes,$fh) |
|
605
|
|
|
|
|
|
|
sub readBytes { |
|
606
|
0
|
|
|
0
|
1
|
|
my ($dc,$nbytes,$fh) = @_; |
|
607
|
0
|
|
|
|
|
|
my ($buf); |
|
608
|
0
|
0
|
|
|
|
|
$fh = $dc->{sock} if (!$fh); |
|
609
|
0
|
|
|
|
|
|
my $nread = $fh->read($buf,$nbytes); |
|
610
|
0
|
0
|
|
|
|
|
confess(ref($dc), "::readBytes(): failed to read $nbytes bytes of data (only found $nread): $!") |
|
611
|
|
|
|
|
|
|
if ($nread != $nbytes); |
|
612
|
0
|
|
|
|
|
|
return $buf; |
|
613
|
|
|
|
|
|
|
} |
|
614
|
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
## $data = $dc->readData() |
|
616
|
|
|
|
|
|
|
## $data = $dc->readData($fh) |
|
617
|
0
|
|
|
0
|
1
|
|
sub readData { return $_[0]->readBytes($_[0]->readSize($_[1]),$_[1]); } |
|
618
|
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
##====================================================================== |
|
620
|
|
|
|
|
|
|
## Hit Parsing |
|
621
|
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
## $hitList = $dc->parseData($buf) |
|
623
|
|
|
|
|
|
|
sub parseData { |
|
624
|
0
|
0
|
|
0
|
0
|
|
return $_[0]->parseJsonData($_[1]) if ($_[0]{mode} eq 'json'); |
|
625
|
0
|
0
|
|
|
|
|
return $_[0]->parseTableData($_[1]) if ($_[0]{mode} eq 'table'); |
|
626
|
0
|
0
|
|
|
|
|
return $_[0]->parseTextData($_[1]) if ($_[0]{mode} eq 'text'); |
|
627
|
0
|
0
|
|
|
|
|
return $_[0]->parseHtmlData($_[1]) if ($_[0]{mode} eq 'html'); |
|
628
|
0
|
|
|
|
|
|
confess(__PACKAGE__ . "::parseData(): unknown query mode '$_[0]{mode}'"); |
|
629
|
|
|
|
|
|
|
} |
|
630
|
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
|
632
|
|
|
|
|
|
|
## Hit Parsing: Text |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
## $hitList = $dc->parseTextData($buf) |
|
635
|
|
|
|
|
|
|
## + returns a DDC::HitList |
|
636
|
|
|
|
|
|
|
sub parseTextData { |
|
637
|
0
|
|
|
0
|
1
|
|
my ($dc,$buf) = @_; |
|
638
|
0
|
|
|
|
|
|
my $hits = DDC::HitList->new(start=>$dc->{start},limit=>$dc->{limit}); |
|
639
|
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
##-- parse response macro structure |
|
641
|
0
|
0
|
0
|
|
|
|
$buf = decode($dc->{encoding},$buf) if ($dc->{encoding} && !utf8::is_utf8($buf)); |
|
642
|
0
|
|
|
|
|
|
my ($buflines,$bufinfo) = split("\001", $buf, 2); |
|
643
|
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
##-- parse administrative data from response footer |
|
645
|
0
|
|
|
|
|
|
chomp($bufinfo); |
|
646
|
0
|
|
|
|
|
|
@$hits{qw(istatus_ nstatus_ end_ nhits_ ndocs_ error_)} = split(' ', $bufinfo,6); |
|
647
|
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
##-- successful response: parse hit data |
|
649
|
0
|
|
|
|
|
|
my @buflines = split(/\n/,$buflines); |
|
650
|
0
|
|
0
|
|
|
|
my $metaNames = $dc->{metaNames} || []; |
|
651
|
0
|
|
|
|
|
|
my ($bufline,$hit,@fields,$ctxbuf); |
|
652
|
0
|
|
|
|
|
|
foreach $bufline (@buflines) { |
|
653
|
0
|
0
|
|
|
|
|
if ($bufline =~ /^Corpora Distribution\:(.*)$/) { |
|
|
|
0
|
|
|
|
|
|
|
654
|
0
|
|
|
|
|
|
$hits->{dhits_} = $1; |
|
655
|
0
|
|
|
|
|
|
next; |
|
656
|
|
|
|
|
|
|
} elsif ($bufline =~ /^Relevant Documents Distribution:(.*)$/) { |
|
657
|
0
|
|
|
|
|
|
$hits->{ddocs_} = $1; |
|
658
|
0
|
|
|
|
|
|
next; |
|
659
|
|
|
|
|
|
|
} |
|
660
|
0
|
|
|
|
|
|
push(@{$hits->{hits_}},$hit=DDC::Hit->new); |
|
|
0
|
|
|
|
|
|
|
|
661
|
0
|
0
|
|
|
|
|
$hit->{raw_} = $bufline if ($dc->{keepRaw}); |
|
662
|
|
|
|
|
|
|
|
|
663
|
0
|
0
|
0
|
|
|
|
if ($dc->{parseMeta} || $dc->{parseContext}) { |
|
664
|
0
|
|
|
|
|
|
@fields = split(/ ### /, $bufline); |
|
665
|
0
|
|
|
|
|
|
$ctxbuf = pop(@fields); |
|
666
|
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
##-- parse: metadata |
|
668
|
0
|
0
|
|
|
|
|
if ($dc->{parseMeta}) { |
|
669
|
0
|
|
|
|
|
|
$hit->{meta_}{file_} = shift(@fields); |
|
670
|
0
|
|
|
|
|
|
$hit->{meta_}{page_} = shift(@fields); |
|
671
|
0
|
|
|
|
|
|
$hit->{meta_}{indices_} = [split(' ', pop(@fields))]; |
|
672
|
0
|
|
0
|
|
|
|
$hit->{meta_}{$metaNames->[$_]||"${_}_"} = $fields[$_] foreach (0..$#fields); |
|
673
|
|
|
|
|
|
|
} |
|
674
|
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
##-- parse: context |
|
676
|
0
|
0
|
|
|
|
|
$hit->{ctx_} = $dc->parseTextContext($ctxbuf) if ($dc->{parseContext}); |
|
677
|
|
|
|
|
|
|
} |
|
678
|
|
|
|
|
|
|
} |
|
679
|
|
|
|
|
|
|
|
|
680
|
0
|
0
|
|
|
|
|
$hits->expandFields($dc->{fieldNames}) if ($dc->{expandFields}); |
|
681
|
0
|
|
|
|
|
|
return $hits; |
|
682
|
|
|
|
|
|
|
} |
|
683
|
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
## \@context_data = $dc->parseTextContext($context_buf) |
|
686
|
|
|
|
|
|
|
sub parseTextContext { |
|
687
|
0
|
|
|
0
|
0
|
|
my ($dc,$ctx) = @_; |
|
688
|
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
##-- defaults |
|
690
|
0
|
|
|
|
|
|
my $fieldNames = $dc->{fieldNames}; |
|
691
|
0
|
|
|
|
|
|
my $fs = qr(\Q$dc->{fieldSeparator}\E); |
|
692
|
0
|
|
|
|
|
|
my $ts = qr(\Q$dc->{tokenSeparator}\E\ *); |
|
693
|
0
|
|
|
|
|
|
my $hl = $dc->{textHighlight}; |
|
694
|
0
|
|
|
|
|
|
my $hls = qr(\Q$dc->{tokenSeparator}\E\ *\Q$hl->[0]\E); |
|
695
|
0
|
|
|
|
|
|
my $hlw0 = qr(^(?:(?:\Q$hl->[0]\E)|(?:\Q$hl->[2]\E))); |
|
696
|
0
|
|
|
|
|
|
my $hlw1 = qr((?:(?:\Q$hl->[1]\E)|(?:\Q$hl->[3]\E))$); |
|
697
|
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
##-- split into sentences |
|
699
|
0
|
|
|
|
|
|
$ctx =~ s/^\s*//; |
|
700
|
0
|
|
|
|
|
|
my ($sbuf,@s,$w); |
|
701
|
0
|
|
|
|
|
|
my $sents = [[],[],[]]; |
|
702
|
0
|
|
|
|
|
|
foreach $sbuf (split(/ {4}/,$ctx)) { |
|
703
|
|
|
|
|
|
|
|
|
704
|
0
|
0
|
|
|
|
|
if ($sbuf =~ $hls) { |
|
705
|
|
|
|
|
|
|
##-- target sentence with index dump: parse it |
|
706
|
0
|
|
|
|
|
|
$sbuf =~ s/^$ts//; |
|
707
|
0
|
|
|
|
|
|
@s = map {[0,split($fs,$_)]} split($ts,$sbuf); |
|
|
0
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
##-- parse words |
|
710
|
0
|
|
|
|
|
|
foreach $w (@s) { |
|
711
|
0
|
0
|
0
|
|
|
|
if ($w->[1] =~ $hlw0 && $w->[$#$w] =~ $hlw1) { |
|
712
|
|
|
|
|
|
|
##-- matched token |
|
713
|
0
|
|
|
|
|
|
$w->[1] =~ s/$hlw0//; |
|
714
|
0
|
|
|
|
|
|
$w->[$#$w] =~ s/$hlw1//; |
|
715
|
0
|
|
|
|
|
|
$w->[0] = 1; |
|
716
|
|
|
|
|
|
|
} |
|
717
|
|
|
|
|
|
|
} |
|
718
|
0
|
|
|
|
|
|
push(@{$sents->[1]},@s); |
|
|
0
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
} |
|
720
|
|
|
|
|
|
|
else { |
|
721
|
|
|
|
|
|
|
##-- context sentence: surface strings only |
|
722
|
0
|
|
|
|
|
|
$sbuf =~ s/^$ts//; |
|
723
|
0
|
|
|
|
|
|
@s = split($ts,$sbuf); |
|
724
|
0
|
0
|
|
|
|
|
if (!@{$sents->[1]}) { |
|
|
0
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
##-- left context |
|
726
|
0
|
|
|
|
|
|
push(@{$sents->[0]}, @s); |
|
|
0
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
} else { |
|
728
|
|
|
|
|
|
|
##-- right context |
|
729
|
0
|
|
|
|
|
|
push(@{$sents->[2]}, @s); |
|
|
0
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
} |
|
731
|
|
|
|
|
|
|
} |
|
732
|
|
|
|
|
|
|
} |
|
733
|
|
|
|
|
|
|
|
|
734
|
0
|
|
|
|
|
|
return $sents; |
|
735
|
|
|
|
|
|
|
} |
|
736
|
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
|
738
|
|
|
|
|
|
|
## Hit Parsing: Table |
|
739
|
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
## $hitList = $dc->parseTableData($buf) |
|
741
|
|
|
|
|
|
|
## + returns a DDC::HitList |
|
742
|
|
|
|
|
|
|
sub parseTableData { |
|
743
|
0
|
|
|
0
|
1
|
|
my ($dc,$buf) = @_; |
|
744
|
0
|
|
|
|
|
|
my $hits = DDC::HitList->new(start=>$dc->{start},limit=>$dc->{limit}); |
|
745
|
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
##-- parse response macro structure |
|
747
|
0
|
0
|
0
|
|
|
|
$buf = decode($dc->{encoding},$buf) if ($dc->{encoding} && !utf8::is_utf8($buf)); |
|
748
|
0
|
|
|
|
|
|
my ($buflines,$bufinfo) = split("\001", $buf, 2); |
|
749
|
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
##-- parse administrative data from response footer |
|
751
|
0
|
|
|
|
|
|
chomp($bufinfo); |
|
752
|
0
|
|
|
|
|
|
@$hits{qw(istatus_ nstatus_ end_ nhits_ ndocs_ error_)} = split(' ', $bufinfo,6); |
|
753
|
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
##-- successful response: parse hit data |
|
755
|
0
|
|
|
|
|
|
my @buflines = split(/\n/,$buflines); |
|
756
|
0
|
|
|
|
|
|
my ($bufline,$hit,@fields,$field,$val); |
|
757
|
0
|
|
|
|
|
|
foreach $bufline (@buflines) { |
|
758
|
0
|
|
|
|
|
|
push(@{$hits->{hits_}},$hit=DDC::Hit->new); |
|
|
0
|
|
|
|
|
|
|
|
759
|
0
|
0
|
|
|
|
|
$hit->{raw_} = $bufline if ($dc->{keepRaw}); |
|
760
|
|
|
|
|
|
|
|
|
761
|
0
|
0
|
0
|
|
|
|
if ($dc->{parseMeta} || $dc->{parseContext}) { |
|
762
|
0
|
|
|
|
|
|
@fields = split("\002", $bufline); |
|
763
|
0
|
|
|
|
|
|
while (defined($field=shift(@fields))) { |
|
764
|
|
|
|
|
|
|
|
|
765
|
0
|
0
|
|
|
|
|
if ($field eq 'keyword') { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
##-- special handling for 'keyword' field |
|
767
|
0
|
|
|
|
|
|
$val = shift(@fields); |
|
768
|
0
|
|
|
|
|
|
while ($val =~ /\(.*?\S)\s*\<\/orth\>/g) { |
|
769
|
0
|
|
|
|
|
|
push(@{$hit->{orth_}}, $1); |
|
|
0
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
} |
|
771
|
|
|
|
|
|
|
} |
|
772
|
|
|
|
|
|
|
elsif ($field eq 'indices') { |
|
773
|
|
|
|
|
|
|
##-- special handling for 'indices' field |
|
774
|
0
|
|
|
|
|
|
$val = shift(@fields); |
|
775
|
0
|
|
|
|
|
|
$hit->{meta_}{indices_} = [split(' ',$val)]; |
|
776
|
|
|
|
|
|
|
} |
|
777
|
|
|
|
|
|
|
elsif ($field =~ /^\s*\
|
|
778
|
|
|
|
|
|
|
##-- special handling for context pseudo-field |
|
779
|
0
|
0
|
|
|
|
|
$hit->{ctx_} = $dc->parseTableContext($field) if ($dc->{parseContext}); |
|
780
|
|
|
|
|
|
|
} |
|
781
|
|
|
|
|
|
|
elsif ($dc->{parseMeta}) { |
|
782
|
|
|
|
|
|
|
##-- normal bibliographic field |
|
783
|
0
|
0
|
|
|
|
|
$field .= '_' if ($field =~ /^(?:scan|orig|page|rank(?:_debug)?)$/); ##-- special handling for ddc-internal fields |
|
784
|
0
|
|
|
|
|
|
$val = shift(@fields); |
|
785
|
0
|
|
|
|
|
|
$hit->{meta_}{$field} = $val; |
|
786
|
|
|
|
|
|
|
} |
|
787
|
|
|
|
|
|
|
} |
|
788
|
|
|
|
|
|
|
} |
|
789
|
|
|
|
|
|
|
} |
|
790
|
|
|
|
|
|
|
|
|
791
|
0
|
0
|
|
|
|
|
$hits->expandFields($dc->{fieldNames}) if ($dc->{expandFields}); |
|
792
|
0
|
|
|
|
|
|
return $hits; |
|
793
|
|
|
|
|
|
|
} |
|
794
|
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
## \@context_data = $dc->parseTableContext($context_buf) |
|
797
|
|
|
|
|
|
|
sub parseTableContext { |
|
798
|
0
|
|
|
0
|
0
|
|
my ($dc,$ctx) = @_; |
|
799
|
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
##-- defaults |
|
801
|
0
|
|
|
|
|
|
my $fieldNames = $dc->{fieldNames}; |
|
802
|
0
|
|
|
|
|
|
my $fs = qr(\Q$dc->{fieldSeparator}\E); |
|
803
|
0
|
|
|
|
|
|
my $ts = qr(\Q$dc->{tokenSeparator}\E\ *); |
|
804
|
0
|
|
|
|
|
|
my $hl = $dc->{tableHighlight}; |
|
805
|
0
|
|
|
|
|
|
my $hlw0 = qr(^(?:(?:\Q$hl->[0]\E)|(?:\Q$hl->[2]\E))); |
|
806
|
0
|
|
|
|
|
|
my $hlw1 = qr((?:(?:\Q$hl->[1]\E)|(?:\Q$hl->[3]\E))$); |
|
807
|
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
##-- split into sentences |
|
809
|
0
|
|
|
|
|
|
my $sents = [[],[],[]]; |
|
810
|
0
|
|
|
|
|
|
my ($sbuf,@s,$w); |
|
811
|
|
|
|
|
|
|
|
|
812
|
0
|
|
|
|
|
|
foreach $sbuf (split(/\<\/s\>\s*/,$ctx)) { |
|
813
|
|
|
|
|
|
|
|
|
814
|
0
|
0
|
|
|
|
|
if ($sbuf =~ /^\s*/) { |
|
815
|
|
|
|
|
|
|
##-- target sentence with index dump: parse it |
|
816
|
0
|
|
|
|
|
|
$sbuf =~ s|^\s*\]*)?\>\s*$ts||; |
|
817
|
0
|
|
|
|
|
|
@s = map {[0,split($fs,$_)]} split($ts,$sbuf); |
|
|
0
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
##-- parse words |
|
820
|
0
|
|
|
|
|
|
foreach $w (@s) { |
|
821
|
0
|
0
|
0
|
|
|
|
if ($w->[1] =~ $hlw0 && $w->[$#$w] =~ $hlw1) { |
|
822
|
|
|
|
|
|
|
##-- matched token |
|
823
|
0
|
|
|
|
|
|
$w->[1] =~ s/$hlw0//; |
|
824
|
0
|
|
|
|
|
|
$w->[$#$w] =~ s/$hlw1//; |
|
825
|
0
|
|
|
|
|
|
$w->[0] = 1; |
|
826
|
|
|
|
|
|
|
} |
|
827
|
|
|
|
|
|
|
} |
|
828
|
0
|
|
|
|
|
|
push(@{$sents->[1]}, @s); |
|
|
0
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
} |
|
830
|
|
|
|
|
|
|
else { |
|
831
|
|
|
|
|
|
|
##-- context sentence; surface strings only |
|
832
|
0
|
|
|
|
|
|
$sbuf =~ s|^\s*\]*)?\>$ts||; |
|
833
|
0
|
|
|
|
|
|
@s = split($ts,$sbuf); |
|
834
|
0
|
0
|
|
|
|
|
if (!@{$sents->[1]}) { |
|
|
0
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
##-- left context |
|
836
|
0
|
|
|
|
|
|
push(@{$sents->[0]}, @s); |
|
|
0
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
} else { |
|
838
|
|
|
|
|
|
|
##-- right context |
|
839
|
0
|
|
|
|
|
|
push(@{$sents->[2]}, @s); |
|
|
0
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
} |
|
841
|
|
|
|
|
|
|
} |
|
842
|
|
|
|
|
|
|
} |
|
843
|
|
|
|
|
|
|
|
|
844
|
0
|
|
|
|
|
|
return $sents; |
|
845
|
|
|
|
|
|
|
} |
|
846
|
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
|
849
|
|
|
|
|
|
|
## Hit Parsing: JSON |
|
850
|
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
## $obj = $dc->decodeJson($buf) |
|
852
|
|
|
|
|
|
|
sub decodeJson { |
|
853
|
0
|
|
|
0
|
0
|
|
my $dc = shift; |
|
854
|
0
|
|
|
|
|
|
my ($bufr) = \$_[0]; |
|
855
|
0
|
0
|
0
|
|
|
|
if ($dc->{encoding} && !utf8::is_utf8($$bufr)) { |
|
856
|
0
|
|
|
|
|
|
my $buf = decode($dc->{encoding},$$bufr); |
|
857
|
0
|
|
|
|
|
|
$bufr = \$buf; |
|
858
|
|
|
|
|
|
|
} |
|
859
|
|
|
|
|
|
|
|
|
860
|
0
|
|
0
|
|
|
|
my $module = $JSON_BACKEND // 'JSON'; |
|
861
|
0
|
|
|
|
|
|
$module =~ s{::}{/}g; |
|
862
|
0
|
|
|
|
|
|
require "$module.pm"; |
|
863
|
|
|
|
|
|
|
|
|
864
|
0
|
|
|
|
|
|
my $jxs = $dc->{jxs}; |
|
865
|
0
|
0
|
|
|
|
|
$jxs = $dc->{jxs} = $JSON_BACKEND->new->utf8(0)->relaxed(1)->canonical(0) if (!defined($jxs)); |
|
866
|
0
|
|
|
|
|
|
return $jxs->decode($$bufr); |
|
867
|
|
|
|
|
|
|
} |
|
868
|
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
## $hitList = $dc->parseJsonData($buf) |
|
870
|
|
|
|
|
|
|
## + returns a DDC::HitList |
|
871
|
|
|
|
|
|
|
sub parseJsonData { |
|
872
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
|
873
|
0
|
|
|
|
|
|
my $data = $dc->decodeJson($_[0]); |
|
874
|
|
|
|
|
|
|
my $hits = DDC::HitList->new(%$data, |
|
875
|
|
|
|
|
|
|
start=>$dc->{start}, |
|
876
|
|
|
|
|
|
|
limit=>$dc->{limit}, |
|
877
|
0
|
|
|
|
|
|
); |
|
878
|
|
|
|
|
|
|
|
|
879
|
0
|
0
|
|
|
|
|
$_ = bless($_,'DDC::Hit') foreach (@{$hits->{hits_}||[]}); |
|
|
0
|
|
|
|
|
|
|
|
880
|
0
|
0
|
|
|
|
|
$hits->expandFields($dc->{fieldNames}) if ($dc->{expandFields}); |
|
881
|
0
|
|
|
|
|
|
return $hits; |
|
882
|
|
|
|
|
|
|
} |
|
883
|
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
|
885
|
|
|
|
|
|
|
## Hit Parsing: expand_terms() |
|
886
|
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
## \@terms = $dc->parseExpandTermsResponse($buf) |
|
888
|
|
|
|
|
|
|
## @terms = $dc->parseExpandTermsResponse($buf) |
|
889
|
|
|
|
|
|
|
sub parseExpandTermsResponse { |
|
890
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
|
891
|
0
|
0
|
|
|
|
|
my @items = grep {defined($_) && $_ ne ''} split(/[\t\r\n]+/,$_[0]); |
|
|
0
|
|
|
|
|
|
|
|
892
|
0
|
0
|
|
|
|
|
die("error in expand_terms response") if ($items[0] !~ /^0 /); |
|
893
|
0
|
|
|
|
|
|
shift(@items); |
|
894
|
0
|
0
|
|
|
|
|
return wantarray ? @items : \@items; |
|
895
|
|
|
|
|
|
|
} |
|
896
|
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
1; ##-- be happy |
|
898
|
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
__END__ |