line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
## -*- Mode: CPerl -*- |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
## File: DDC::Client.pm |
4
|
|
|
|
|
|
|
## Author: Bryan Jurish |
5
|
|
|
|
|
|
|
## Description: |
6
|
|
|
|
|
|
|
## + DDC Query utilities: client sockets |
7
|
|
|
|
|
|
|
##====================================================================== |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
package DDC::Client; |
10
|
26
|
|
|
26
|
|
10658
|
use DDC::Utils qw(:escape); |
|
26
|
|
|
|
|
68
|
|
|
26
|
|
|
|
|
3911
|
|
11
|
26
|
|
|
26
|
|
179
|
use DDC::HitList; |
|
26
|
|
|
|
|
64
|
|
|
26
|
|
|
|
|
474
|
|
12
|
26
|
|
|
26
|
|
120
|
use DDC::Hit; |
|
26
|
|
|
|
|
45
|
|
|
26
|
|
|
|
|
430
|
|
13
|
26
|
|
|
26
|
|
16170
|
use IO::Handle; |
|
26
|
|
|
|
|
168718
|
|
|
26
|
|
|
|
|
1213
|
|
14
|
26
|
|
|
26
|
|
13012
|
use IO::File; |
|
26
|
|
|
|
|
51376
|
|
|
26
|
|
|
|
|
2798
|
|
15
|
26
|
|
|
26
|
|
14327
|
use IO::Socket::INET; |
|
26
|
|
|
|
|
390312
|
|
|
26
|
|
|
|
|
184
|
|
16
|
26
|
|
|
26
|
|
27622
|
use Encode qw(encode decode); |
|
26
|
|
|
|
|
279189
|
|
|
26
|
|
|
|
|
2001
|
|
17
|
26
|
|
|
26
|
|
207
|
use Carp; |
|
26
|
|
|
|
|
58
|
|
|
26
|
|
|
|
|
1344
|
|
18
|
26
|
|
|
26
|
|
167
|
use strict; |
|
26
|
|
|
|
|
55
|
|
|
26
|
|
|
|
|
1649
|
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
##====================================================================== |
21
|
|
|
|
|
|
|
## Globals |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
## $ifmt |
24
|
|
|
|
|
|
|
## + pack format to use for integer sizes passed to and from DDC |
25
|
|
|
|
|
|
|
## + default value should be right for ddc-2.x (always 32-bit unsigned little endian) |
26
|
|
|
|
|
|
|
## + for ddc-1.x, use machine word size and endian-ness of server |
27
|
|
|
|
|
|
|
our $ifmt = 'V'; |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
## $ilen |
30
|
|
|
|
|
|
|
## + length in bytes of message size integer used for DDC protocol in bytes |
31
|
|
|
|
|
|
|
## + default value should be right for ddc-2.x (always 32-bit unsigned little endian) |
32
|
|
|
|
|
|
|
## + for ddc-1.x, use machine word size and endian-ness of server |
33
|
|
|
|
|
|
|
our $ilen = 4; |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
## $JSON_BACKEND |
36
|
|
|
|
|
|
|
## + underlying JSON module (default='JSON') |
37
|
|
|
|
|
|
|
our ($JSON_BACKEND); |
38
|
|
|
|
|
|
|
BEGIN { |
39
|
26
|
50
|
|
26
|
|
169625
|
$JSON_BACKEND = 'JSON' if (!defined($JSON_BACKEND)); |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
##====================================================================== |
43
|
|
|
|
|
|
|
## Constructors etc |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
## $dc = $CLASS_OR_OBJ->new(%args) |
46
|
|
|
|
|
|
|
## + %args: |
47
|
|
|
|
|
|
|
## ( |
48
|
|
|
|
|
|
|
## ##-- connection options |
49
|
|
|
|
|
|
|
## connect=>\%connectArgs, ##-- passed to IO::Socket::(INET|UNIX)->new(); also accepts connect=>$connectURL |
50
|
|
|
|
|
|
|
## mode =>$queryMode, ##-- one of 'table', 'html', 'text', 'json', or 'raw'; default='json' ('html' is not yet supported) |
51
|
|
|
|
|
|
|
## linger =>\@linger, ##-- SO_LINGER socket option; default=[1,0]: immediate termination |
52
|
|
|
|
|
|
|
## ## |
53
|
|
|
|
|
|
|
## ##-- query options (formerly only in DDC::Client::Distributed) |
54
|
|
|
|
|
|
|
## start =>$start, ##-- index of first hit to fetch (default=0) |
55
|
|
|
|
|
|
|
## limit =>$limit, ##-- maximum number of hits to fetch (default=10) |
56
|
|
|
|
|
|
|
## timeout =>$secs, ##-- query timeout in seconds (lower bound, default=60) |
57
|
|
|
|
|
|
|
## hint =>$hint, ##-- navigation hint (optional; default=undef: none) |
58
|
|
|
|
|
|
|
## ## |
59
|
|
|
|
|
|
|
## ##-- hit parsing options (mostly obsolete) |
60
|
|
|
|
|
|
|
## parseMeta=>$bool, ##-- if true, hit metadata will be parsed to %$hit (default=1) |
61
|
|
|
|
|
|
|
## parseContext=>$bool, ##-- if true, hit context data will be parsed to $hit->{ctx_} (default=1) |
62
|
|
|
|
|
|
|
## metaNames =>\@names, ##-- metadata field names (default=undef (none)) |
63
|
|
|
|
|
|
|
## expandFields => $bool, ##-- whether to implicitly expand hit fields to HASH-refs (default=true; only valid for 'table' mode) |
64
|
|
|
|
|
|
|
## keepRaw=>$bool, ##-- if false, raw context buffer will be deleted after parsing context data (default=false) |
65
|
|
|
|
|
|
|
## #defaultField => $name, ##-- default field names (default='w') |
66
|
|
|
|
|
|
|
## |
67
|
|
|
|
|
|
|
## fieldSeparator => $char, ##-- intra-token field separator (default="\x{1f}": ASCII unit separator) |
68
|
|
|
|
|
|
|
## tokenSeparator => $char, ##-- inter-token separator (default="\x{1e}": ASCII record separator) |
69
|
|
|
|
|
|
|
## |
70
|
|
|
|
|
|
|
## textHighlight => [$l0,$r0,$l1,$r1], ##-- highlighting strings, text mode (default=[qw(&& && _& &_)]) |
71
|
|
|
|
|
|
|
## htmlHighlight => [$l0,$r0,$l1,$r1], ##-- highlighting strings, html mode (default=[('','') x 2]) |
72
|
|
|
|
|
|
|
## tableHighlight => [$l0,$r0,$l1,$r1], ##-- highlighting strings, table mode (default=[qw(&& && _& &_)]) |
73
|
|
|
|
|
|
|
## ) |
74
|
|
|
|
|
|
|
## + default \%connectArgs: |
75
|
|
|
|
|
|
|
## Domain=>'INET', ##-- also accepts 'UNIX' |
76
|
|
|
|
|
|
|
## PeerAddr=>'localhost', |
77
|
|
|
|
|
|
|
## PeerPort=>50000, |
78
|
|
|
|
|
|
|
## Proto=>'tcp', |
79
|
|
|
|
|
|
|
## Type=>SOCK_STREAM, |
80
|
|
|
|
|
|
|
## Blocking=>1, |
81
|
|
|
|
|
|
|
## + URL specification of \%connectArgs via connect=>{url=>$url} or connect=>$url (see parseAddr() method): |
82
|
|
|
|
|
|
|
## inet://ADDR:PORT?OPT=VAL... # canonical INET socket URL |
83
|
|
|
|
|
|
|
## unix://UNIX_PATH?OPT=VAL... # canonical UNIX socket URL |
84
|
|
|
|
|
|
|
## unix:UNIX_PATH?OPT=VAL... # = unix://UNIX_PATH?OPT=val |
85
|
|
|
|
|
|
|
## ADDR?OPT=VAL... # = inet://ADDR:5000?OPT=VAL... |
86
|
|
|
|
|
|
|
## :PORT?OPT=VAL... # = inet://localhost:PORT?OPT=VAL... |
87
|
|
|
|
|
|
|
## ADDR:PORT?OPT=VAL... # = inet://ADDR:PORT?OPT=VAL... |
88
|
|
|
|
|
|
|
## /UNIX_PATH?OPT=VAL... # = unix:///UNIX_PATH?POT=VAL... |
89
|
|
|
|
|
|
|
sub new { |
90
|
0
|
|
|
0
|
1
|
|
my ($that,%args) = @_; |
91
|
0
|
|
|
|
|
|
my @connect_args = grep {exists $args{$_}} map {($_,lc($_),uc($_))} qw(Peer PeerAddr PeerPort Url); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
my $connect = $that->parseAddr |
93
|
|
|
|
|
|
|
({ |
94
|
|
|
|
|
|
|
##-- connect: default options |
95
|
|
|
|
|
|
|
Domain=>'INET', |
96
|
|
|
|
|
|
|
PeerAddr=>'localhost', |
97
|
|
|
|
|
|
|
PeerPort=>50000, |
98
|
|
|
|
|
|
|
Proto=>'tcp', |
99
|
|
|
|
|
|
|
Type=>SOCK_STREAM, |
100
|
|
|
|
|
|
|
Blocking=>1, |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
##-- connect: user args |
103
|
|
|
|
|
|
|
(ref($args{'connect'}) |
104
|
0
|
|
|
|
|
|
? %{$args{'connect'}} |
105
|
|
|
|
|
|
|
: ($args{connect} |
106
|
0
|
|
|
|
|
|
? %{$that->parseAddr($args{connect})} |
107
|
|
|
|
|
|
|
: qw())), |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
##-- connect: top-level args |
110
|
0
|
0
|
|
|
|
|
(map {($_=>$args{$_})} @connect_args), |
|
0
|
0
|
|
|
|
|
|
111
|
|
|
|
|
|
|
}); |
112
|
0
|
|
|
|
|
|
delete @args{'connect',@connect_args}; |
113
|
|
|
|
|
|
|
|
114
|
0
|
|
0
|
|
|
|
my $dc =bless { |
115
|
|
|
|
|
|
|
##-- connection options |
116
|
|
|
|
|
|
|
connect=> $connect, |
117
|
|
|
|
|
|
|
linger => [1,0], |
118
|
|
|
|
|
|
|
mode =>'json', |
119
|
|
|
|
|
|
|
encoding => 'UTF-8', |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
##-- query options (formerly in DDC::Client::Distributed) |
122
|
|
|
|
|
|
|
start=>0, |
123
|
|
|
|
|
|
|
limit=>10, |
124
|
|
|
|
|
|
|
timeout=>60, |
125
|
|
|
|
|
|
|
hint=>undef, |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
##-- hit-parsing options |
128
|
|
|
|
|
|
|
parseMeta=>1, |
129
|
|
|
|
|
|
|
parseContext=>1, |
130
|
|
|
|
|
|
|
expandFields=>1, |
131
|
|
|
|
|
|
|
keepRaw=>0, |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
#fieldSeparator => "\x{1f}", |
134
|
|
|
|
|
|
|
#tokenSeparator => "\x{1e}", |
135
|
|
|
|
|
|
|
#defaultField => 'w', |
136
|
|
|
|
|
|
|
#metaNames => undef, |
137
|
|
|
|
|
|
|
#textHighlight=>undef, |
138
|
|
|
|
|
|
|
#tableHighlight=>undef, |
139
|
|
|
|
|
|
|
#htmlHighlight=>undef, |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
%args, |
142
|
|
|
|
|
|
|
}, ref($that)||$that; |
143
|
|
|
|
|
|
|
|
144
|
0
|
0
|
|
|
|
|
if (defined($args{optFile})) { |
145
|
|
|
|
|
|
|
$dc->loadOptFile($args{optFile}) |
146
|
0
|
0
|
|
|
|
|
or confess(__PACKAGE__ . "::new(): could not load options file '$args{optFile}': $!"); |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
0
|
0
|
|
|
|
|
$dc->{fieldSeparator} = "\x{1f}" if (!$dc->{fieldSeparator}); |
150
|
0
|
0
|
|
|
|
|
$dc->{tokenSeparator} = "\x{1e}" if (!$dc->{tokenSeparator}); |
151
|
0
|
0
|
|
|
|
|
$dc->{textHighlight} = [qw(&& && _& &_)] if (!$dc->{textHighlight}); |
152
|
0
|
0
|
|
|
|
|
$dc->{tableHighlight} = [qw(&& && _& &_)] if (!$dc->{tableHighlight}); |
153
|
|
|
|
|
|
|
$dc->{htmlHighlight} = [ |
154
|
|
|
|
|
|
|
'','', |
155
|
|
|
|
|
|
|
'','', |
156
|
0
|
0
|
|
|
|
|
] if (!$dc->{htmlHighlight}); |
157
|
|
|
|
|
|
|
|
158
|
0
|
|
|
|
|
|
return $dc; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
##====================================================================== |
162
|
|
|
|
|
|
|
## DDC *.opt file |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
## $dc = $dc->loadOptFile($filename, %opts); |
165
|
|
|
|
|
|
|
## $dc = $dc->loadOptFile($fh, %opts); |
166
|
|
|
|
|
|
|
## $dc = $dc->loadOptFile(\$str, %opts); |
167
|
|
|
|
|
|
|
## Sets client options from a DDC *.opt file: #fieldNames, metaNames, fieldSeparator. |
168
|
|
|
|
|
|
|
## %opts: |
169
|
|
|
|
|
|
|
## ( |
170
|
|
|
|
|
|
|
## clobber => $bool, ##-- whether to clobber existing %$dc fields (default=false) |
171
|
|
|
|
|
|
|
## ) |
172
|
|
|
|
|
|
|
## |
173
|
|
|
|
|
|
|
## NOTE: this is for parsing legacy (v1.x) DDC server response formats (table,text); |
174
|
|
|
|
|
|
|
## you do NOT need to use this function if you're using DDC's JSON response |
175
|
|
|
|
|
|
|
## format. |
176
|
|
|
|
|
|
|
## |
177
|
|
|
|
|
|
|
## WARNING: for whatever reason, DDC does not return metadata fields in the same |
178
|
|
|
|
|
|
|
## order in which they appeared in the *.opt file (nor in any lexicographic order |
179
|
|
|
|
|
|
|
## combination of the fields type, name, and xpath of the 'Bibl' directorive I |
180
|
|
|
|
|
|
|
## have tried), BUT this code assumes that the order in which the 'Bibl' directives |
181
|
|
|
|
|
|
|
## appear in the *.opt file are identical to the order in which DDC returns the |
182
|
|
|
|
|
|
|
## corresponding data in 'text' and 'html' modes. The actual order used by the |
183
|
|
|
|
|
|
|
## server should appear in the server logs. Change the *.opt file you pass to |
184
|
|
|
|
|
|
|
## this function accordingly. |
185
|
|
|
|
|
|
|
sub loadOptFile { |
186
|
0
|
|
|
0
|
0
|
|
my ($dc,$src,%opts) = @_; |
187
|
0
|
|
|
|
|
|
my ($fh); |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
##-- get source fh |
190
|
0
|
0
|
|
|
|
|
if (!ref($src)) { |
|
|
0
|
|
|
|
|
|
191
|
0
|
0
|
|
|
|
|
$fh = IO::File->new("<$src") |
192
|
|
|
|
|
|
|
or confess(__PACKAGE__ . "::loadOptFile(): open failed for '$src': $!"); |
193
|
0
|
0
|
|
|
|
|
binmode($fh,":encoding($dc->{encoding})") if ($dc->{encoding}); |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
elsif (ref($src) eq 'SCALAR') { |
196
|
0
|
|
|
|
|
|
$fh = IO::Handle->new; |
197
|
0
|
0
|
|
|
|
|
open($fh,'<',$src) |
198
|
|
|
|
|
|
|
or confess(__PACKAGE__ . "::loadOptFile(): open failed for buffer: $!"); |
199
|
0
|
0
|
|
|
|
|
binmode($fh,":encoding($dc->{encoding})") if ($dc->{encoding}); |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
else { |
202
|
0
|
|
|
|
|
|
$fh = $src; |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
##-- parse file |
206
|
0
|
|
|
|
|
|
my $clobber = $opts{clobber}; |
207
|
0
|
|
|
|
|
|
my (@indices,@show,@meta,$showMeta); |
208
|
0
|
|
|
|
|
|
while (defined($_=<$fh>)) { |
209
|
0
|
|
|
|
|
|
chomp; |
210
|
0
|
0
|
|
|
|
|
if (/^Indices\s(.*)$/) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
211
|
0
|
|
|
|
|
|
@indices = map {s/^\s*\[//; s/\]\s*$//; [split(' ',$_)]} split(/\;\s*/,$1); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
elsif (/^Bibl\s+(\S+)\s+(\d)\s+(\S+)\s+(.*)$/) { |
214
|
0
|
|
|
|
|
|
my ($type,$visible,$name,$xpath) = ($1,$2,$3,$4); |
215
|
0
|
0
|
|
|
|
|
push(@meta,[$type,$visible,$name,$xpath]) if ($visible+0); |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
elsif (/^IndicesToShow\s+(.*)$/) { |
218
|
0
|
|
|
|
|
|
@show = map {$_-1} split(' ',$1); |
|
0
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
elsif (/^OutputBibliographyOfHits\b/) { |
221
|
0
|
|
|
|
|
|
$showMeta = 1; |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
elsif (/^InterpDelim[ie]ter\s(.*)$/) { |
224
|
0
|
0
|
0
|
|
|
|
$dc->{fieldSeparator} = unescape($1) if ($clobber || !$dc->{fieldSeparator}); |
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
elsif (/^TokenDelim[ie]ter\s(.*)$/) { |
227
|
0
|
0
|
0
|
|
|
|
$dc->{tokenSeparator} = unescape($1) if ($clobber || !$dc->{tokenSeparator}); |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
elsif (/^Utf8\s*$/) { |
230
|
0
|
0
|
0
|
|
|
|
$dc->{encoding} = 'utf8' if ($clobber || !$dc->{encoding}); |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
elsif (/^HtmlHighlighting\s*(.*)$/) { |
233
|
0
|
0
|
0
|
|
|
|
$dc->{htmlHighlight} = [map {unescape($1)} split(/\s*\;\s*/,$1,4)] if ($clobber || !$dc->{htmlHighlight}); |
|
0
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
elsif (/^TextHighlighting\s*(.*)$/) { |
236
|
0
|
0
|
0
|
|
|
|
$dc->{textHighlight} = [map {unescape($1)} split(/\s*\;\s*/,$1,4)] if ($clobber || !$dc->{textHighlight}); |
|
0
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
elsif (/^TableHighlighting\s*(.*)$/) { |
239
|
0
|
0
|
0
|
|
|
|
$dc->{tableHighlight} = [map {unescape($_)} split(/\s*\;\s*/,$1,4)] if ($clobber || !$dc->{tableHighlight}); |
|
0
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
##-- setup local options |
244
|
0
|
0
|
|
|
|
|
@show = (0) if (!@show); |
245
|
0
|
0
|
0
|
|
|
|
$dc->{fieldNames} = [map {$_->[1]} @indices[@show]] if ($clobber || !$dc->{fieldNames}); |
|
0
|
|
|
|
|
|
|
246
|
0
|
0
|
|
|
|
|
if (!$dc->{metaNames}) { |
247
|
0
|
0
|
|
|
|
|
if (!$showMeta) { |
|
|
0
|
|
|
|
|
|
248
|
0
|
|
|
|
|
|
$dc->{metaNames} = ['file_']; |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
elsif (@meta) { |
251
|
0
|
0
|
0
|
|
|
|
$dc->{metaNames} = [map {$_->[2]} @meta] if (@meta && ($clobber || !$dc->{metaNames})); |
|
0
|
|
0
|
|
|
|
|
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
##-- cleanup |
256
|
0
|
0
|
0
|
|
|
|
$fh->close if (!ref($src) || ref($src) eq 'SCALAR'); |
257
|
0
|
|
|
|
|
|
return $dc; |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
##====================================================================== |
261
|
|
|
|
|
|
|
## Query requests (formerly in DDC::Client::Distributed) |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
## $buf = $dc->queryRaw($query_string) |
264
|
|
|
|
|
|
|
## $buf = $dc->queryRaw(\@raw_strings) |
265
|
|
|
|
|
|
|
sub queryRaw { |
266
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
267
|
0
|
|
|
|
|
|
my $buf = $dc->queryRawNC(@_); |
268
|
0
|
|
|
|
|
|
$dc->close(); ##-- this apparently has to happen: bummer |
269
|
0
|
|
|
|
|
|
return $buf; |
270
|
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
## $buf = $dc->queryRawNC($query_string) |
273
|
|
|
|
|
|
|
## $buf = $dc->queryRawNC(\@raw_strings) |
274
|
|
|
|
|
|
|
## + guts for queryRaw() without implicit close() |
275
|
|
|
|
|
|
|
sub queryRawNC { |
276
|
0
|
|
|
0
|
1
|
|
my ($dc,$query) = @_; |
277
|
0
|
0
|
|
|
|
|
if (UNIVERSAL::isa($query,'ARRAY')) { |
|
|
0
|
|
|
|
|
|
278
|
|
|
|
|
|
|
##-- raw array: send raw data to DDC |
279
|
0
|
|
|
|
|
|
$dc->send(join("\001",@$query)); |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
elsif ($dc->{mode} =~ /^(?:raw|req)/i) { |
282
|
|
|
|
|
|
|
##-- "raw" or "request" mode: send raw request to DDC |
283
|
0
|
|
|
|
|
|
$dc->send($query); |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
else { |
286
|
|
|
|
|
|
|
##-- query string: send 'run-query Distributed' |
287
|
|
|
|
|
|
|
$dc->send(join("\001", |
288
|
|
|
|
|
|
|
"run_query Distributed", |
289
|
|
|
|
|
|
|
$query, |
290
|
|
|
|
|
|
|
$dc->{mode}, |
291
|
0
|
0
|
|
|
|
|
join(' ', @$dc{qw(start limit timeout)}, ($dc->{hint} ? $dc->{hint} : qw())))); |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
##-- get output buffer |
294
|
0
|
|
|
|
|
|
return $dc->readData(); |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
## @bufs = $dc->queryMulti($queryString1, $queryString2, ...) |
298
|
|
|
|
|
|
|
## @bufs = $dc->queryMulti(\@queryStrings1, \@queryStrings2, ...) |
299
|
|
|
|
|
|
|
sub queryMulti { |
300
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
301
|
0
|
|
|
|
|
|
my @bufs = map {$dc->queryRawNC($_)} @_; |
|
0
|
|
|
|
|
|
|
302
|
0
|
|
|
|
|
|
$dc->close(); ##-- this apparently has to happen: bummer |
303
|
0
|
|
|
|
|
|
return @bufs; |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
## $obj = $dc->queryJson($query_string) |
307
|
|
|
|
|
|
|
## $obj = $dc->queryJson(\@raw_strings) |
308
|
|
|
|
|
|
|
sub queryJson { |
309
|
0
|
|
|
0
|
0
|
|
my ($dc,$query) = @_; |
310
|
0
|
|
|
|
|
|
return $dc->decodeJson($dc->queryRaw($query)); |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
## $hits = $dc->query($query_string) |
314
|
|
|
|
|
|
|
sub query { |
315
|
0
|
|
|
0
|
1
|
|
my ($dc,$query) = @_; |
316
|
0
|
|
|
|
|
|
return $dc->parseData($dc->queryRaw($query)); |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
##====================================================================== |
321
|
|
|
|
|
|
|
## Common Requests |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
## $rsp = $dc->request($request_string) |
324
|
|
|
|
|
|
|
sub request { |
325
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
326
|
0
|
|
|
|
|
|
my $buf = $dc->requestNC(@_); |
327
|
0
|
|
|
|
|
|
$dc->close(); |
328
|
0
|
|
|
|
|
|
return $buf; |
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
## $rsp = $dc->requestNC($request_string) |
332
|
|
|
|
|
|
|
## + guts for request() which doesn't implicitly call close() |
333
|
|
|
|
|
|
|
sub requestNC { |
334
|
0
|
|
|
0
|
0
|
|
my $dc = shift; |
335
|
0
|
|
|
|
|
|
$dc->send($_[0]); |
336
|
0
|
|
|
|
|
|
return $dc->readData(); |
337
|
|
|
|
|
|
|
} |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
## $data = $dc->requestJson($request_string) |
340
|
|
|
|
|
|
|
sub requestJson { |
341
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
342
|
0
|
|
|
|
|
|
return $dc->decodeJson($dc->request($_[0])); |
343
|
|
|
|
|
|
|
} |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
## $server_version = $dc->version() |
346
|
|
|
|
|
|
|
sub version { |
347
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
348
|
0
|
|
|
|
|
|
return $dc->request("version"); |
349
|
|
|
|
|
|
|
} |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
## $status = $dc->status() |
352
|
|
|
|
|
|
|
## $status = $dc->status($timeout) ##-- not really supported by ddc |
353
|
|
|
|
|
|
|
sub status { |
354
|
0
|
|
|
0
|
1
|
|
my ($dc,$timeout) = @_; |
355
|
0
|
0
|
|
|
|
|
$timeout = $dc->{timeout} if (!defined($timeout)); |
356
|
0
|
0
|
|
|
|
|
return $dc->requestJson("status".(defined($timeout) ? " $timeout" : '')); |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
## $vstatus = $dc->vstatus() |
360
|
|
|
|
|
|
|
## $vstatus = $dc->vstatus($timeout) |
361
|
|
|
|
|
|
|
sub vstatus { |
362
|
0
|
|
|
0
|
1
|
|
my ($dc,$timeout) = @_; |
363
|
0
|
0
|
|
|
|
|
$timeout = $dc->{timeout} if (!defined($timeout)); |
364
|
0
|
0
|
|
|
|
|
return $dc->requestJson("vstatus".(defined($timeout) ? " $timeout" : '')); |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
## $info = $dc->info() |
368
|
|
|
|
|
|
|
## $info = $dc->info($timeout) |
369
|
|
|
|
|
|
|
sub info { |
370
|
0
|
|
|
0
|
1
|
|
my ($dc,$timeout) = @_; |
371
|
0
|
0
|
|
|
|
|
$timeout = $dc->{timeout} if (!defined($timeout)); |
372
|
0
|
0
|
|
|
|
|
return $dc->requestJson("info".(defined($timeout) ? " $timeout" : '')); |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
## $nodes = $dc->nodes() |
376
|
|
|
|
|
|
|
## $nodes = $dc->nodes($depth) |
377
|
|
|
|
|
|
|
sub nodes { |
378
|
0
|
|
|
0
|
1
|
|
my ($dc,$depth) = @_; |
379
|
0
|
0
|
|
|
|
|
return $dc->requestJson("nodes".(defined($depth) ? " $depth" : '')); |
380
|
|
|
|
|
|
|
} |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
## $expandRaw = $dc->expand_terms($pipeline, $term) |
384
|
|
|
|
|
|
|
## $expandRaw = $dc->expand_terms($pipeline, $term, $timeout) |
385
|
|
|
|
|
|
|
## $expandRaw = $dc->expand_terms($pipeline, $term, $timeout, $subcorpus) |
386
|
|
|
|
|
|
|
## $expandRaw = $dc->expand_terms(\@pipeline, \@terms) |
387
|
|
|
|
|
|
|
## $expandRaw = $dc->expand_terms(\@pipeline, \@terms, $timeout) |
388
|
|
|
|
|
|
|
## $expandRaw = $dc->expand_terms(\@pipeline, \@terms, $timeout, $subcorpus) |
389
|
|
|
|
|
|
|
sub expand_terms { |
390
|
0
|
|
|
0
|
1
|
|
my ($dc,$chain,$terms,$timeout,$subcorpus) = @_; |
391
|
0
|
0
|
|
|
|
|
$chain = join('|', @$chain) if (UNIVERSAL::isa($chain,'ARRAY')); |
392
|
0
|
0
|
|
|
|
|
$terms = join("\t", @$terms) if (UNIVERSAL::isa($terms,'ARRAY')); |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
##-- hack: detect swapping of $timeout and $subcorpus (old DDC::Client::Distributed-style) |
395
|
0
|
0
|
|
|
|
|
$timeout = '' if (!defined($timeout)); |
396
|
0
|
0
|
|
|
|
|
$subcorpus = '' if (!defined($subcorpus)); |
397
|
0
|
0
|
0
|
|
|
|
($timeout,$subcorpus) = ($subcorpus,$timeout) |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
398
|
|
|
|
|
|
|
if ($timeout ne '' && $subcorpus ne '' && $timeout =~ /[0-9]/ && $subcorpus !~ /[0-9]/); |
399
|
|
|
|
|
|
|
|
400
|
0
|
0
|
|
|
|
|
$timeout = $dc->{timeout} if ($timeout eq ''); |
401
|
0
|
0
|
0
|
|
|
|
$timeout = 5 if (!defined($timeout) || $timeout eq ''); |
402
|
0
|
|
|
|
|
|
$dc->send(join("\x01", 'expand_terms ', $chain, $terms, $timeout, $subcorpus)); |
403
|
|
|
|
|
|
|
##-- get output buffer |
404
|
0
|
|
|
|
|
|
my $buf = $dc->readData(); |
405
|
0
|
|
|
|
|
|
$dc->close(); ##-- this apparently has to happen: bummer |
406
|
0
|
|
|
|
|
|
return $buf; |
407
|
|
|
|
|
|
|
} |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
## \@terms = $dc->expand($pipeline, $term) |
410
|
|
|
|
|
|
|
## \@terms = $dc->expand($pipeline, $term, $timeout) |
411
|
|
|
|
|
|
|
## \@terms = $dc->expand($pipeline, $term, $timeout, $subcorpus) |
412
|
|
|
|
|
|
|
## \@terms = $dc->expand(\@pipeline, \@terms) |
413
|
|
|
|
|
|
|
## \@terms = $dc->expand(\@pipeline, \@terms, $timeout) |
414
|
|
|
|
|
|
|
## \@terms = $dc->expand(\@pipeline, \@terms, $timeout, $subcorpus) |
415
|
|
|
|
|
|
|
sub expand { |
416
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
417
|
0
|
|
|
|
|
|
return $dc->parseExpandTermsResponse($dc->expand_terms(@_)); |
418
|
|
|
|
|
|
|
} |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
## $buf = $dc->get_first_hits($query) |
421
|
|
|
|
|
|
|
## $buf = $dc->get_first_hits($query,$timeout?,$limit?,$hint?) |
422
|
|
|
|
|
|
|
sub get_first_hits { |
423
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
424
|
0
|
|
|
|
|
|
my $query = shift; |
425
|
0
|
0
|
|
|
|
|
my $timeout = @_ ? shift : $dc->{timeout}; |
426
|
0
|
0
|
|
|
|
|
my $limit = @_ ? shift : $dc->{limit}; |
427
|
0
|
0
|
|
|
|
|
my $hint = @_ ? shift : $dc->{hint}; |
428
|
0
|
0
|
|
|
|
|
return $dc->request("get_first_hits $query\x{01}$timeout $limit".($hint ? " $hint" : '')); |
429
|
|
|
|
|
|
|
} |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
## $buf = $dc->get_hit_strings($format?,$start?,$limit?) |
432
|
|
|
|
|
|
|
sub get_hit_strings { |
433
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
434
|
0
|
0
|
|
|
|
|
my $format = @_ ? shift : ($dc->{mode} eq 'raw' ? 'json' : ''); |
|
|
0
|
|
|
|
|
|
435
|
0
|
0
|
|
|
|
|
my $start = @_ ? shift : $dc->{start}; |
436
|
0
|
0
|
|
|
|
|
my $limit = @_ ? shift : $dc->{limit}; |
437
|
0
|
|
|
|
|
|
return $dc->request("get_hit_strings $format\x{01}$start $limit"); |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
## $buf = $dc->run_query($corpus,$query,$format?,$start?,$limit?,$timeout?,$hint?) |
442
|
|
|
|
|
|
|
sub run_query { |
443
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
444
|
0
|
|
|
|
|
|
my $corpus = shift; |
445
|
0
|
|
|
|
|
|
my $query = shift; |
446
|
0
|
0
|
|
|
|
|
my $format = @_ ? shift : $dc->{mode}; |
447
|
0
|
0
|
|
|
|
|
my $start = @_ ? shift : $dc->{start}; |
448
|
0
|
0
|
|
|
|
|
my $limit = @_ ? shift : $dc->{limit}; |
449
|
0
|
0
|
|
|
|
|
my $timeout = @_ ? shift : $dc->{timeout}; |
450
|
0
|
0
|
|
|
|
|
my $hint = @_ ? shift : $dc->{hint}; |
451
|
0
|
0
|
|
|
|
|
$corpus = 'Distributed' if (!defined($corpus)); |
452
|
0
|
0
|
|
|
|
|
return $dc->request("run_query $corpus\x{01}$query\x{01}$format\x{01}$start $limit $timeout".($hint ? " $hint" : '')); |
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
##====================================================================== |
456
|
|
|
|
|
|
|
## Low-level communications |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
## \%connect = $dc->parseAddr() |
459
|
|
|
|
|
|
|
## \%connect = $CLASS_OR_OBJECT->parseAddr(\%connect, $PEER_OR_LOCAL='peer', %options) |
460
|
|
|
|
|
|
|
## \%connect = $CLASS_OR_OBJECT->parserAddr({url=>$url}, $PEER_OR_LOCAL='peer', %options) |
461
|
|
|
|
|
|
|
## + parses connect URLs to option-hashes suitable for use as $dc->{connect} |
462
|
|
|
|
|
|
|
## + supported URLs formats: |
463
|
|
|
|
|
|
|
## inet://ADDR:PORT?OPT=VAL... # canonical INET socket URL |
464
|
|
|
|
|
|
|
## unix://UNIX_PATH?OPT=VAL... # canonical UNIX socket URL |
465
|
|
|
|
|
|
|
## unix:UNIX_PATH?OPT=VAL... # = unix://UNIX_PATH?OPT=val |
466
|
|
|
|
|
|
|
## ADDR?OPT=VAL... # = inet://ADDR:5000?OPT=VAL... |
467
|
|
|
|
|
|
|
## :PORT?OPT=VAL... # = inet://localhost:PORT?OPT=VAL... |
468
|
|
|
|
|
|
|
## ADDR:PORT?OPT=VAL... # = inet://ADDR:PORT?OPT=VAL... |
469
|
|
|
|
|
|
|
## /UNIX_PATH?OPT=VAL... # = unix:///UNIX_PATH?POT=VAL... |
470
|
|
|
|
|
|
|
sub parseAddr { |
471
|
0
|
|
|
0
|
1
|
|
my ($that,$connect,$prefix,%opts) = @_; |
472
|
0
|
|
|
|
|
|
my ($override); |
473
|
0
|
0
|
0
|
|
|
|
if (!$connect && ref($that)) { |
474
|
0
|
|
|
|
|
|
$connect = $that->{connect}; |
475
|
0
|
|
|
|
|
|
$override = 1; |
476
|
|
|
|
|
|
|
} |
477
|
0
|
|
0
|
|
|
|
$connect //= 'inet://localhost:50000'; |
478
|
0
|
0
|
|
|
|
|
$connect = {url=>$connect} if (!UNIVERSAL::isa($connect,'HASH')); |
479
|
|
|
|
|
|
|
|
480
|
0
|
|
0
|
|
|
|
$prefix ||= 'Peer'; |
481
|
0
|
|
|
|
|
|
$prefix = ucfirst($prefix); |
482
|
0
|
|
0
|
|
|
|
my $url = $connect->{URL} || $connect->{Url} || $connect->{url}; |
483
|
0
|
0
|
|
|
|
|
if (defined($url)) { |
484
|
0
|
|
|
|
|
|
my ($base,$opts) = split(/\?/,$url,2); |
485
|
0
|
0
|
|
|
|
|
my $scheme = ($base =~ s{^([\w\+\-]+):(?://)?}{} ? $1 : ''); |
486
|
0
|
0
|
0
|
|
|
|
if (lc($scheme) eq 'unix' || (!$scheme && $base =~ m{^/})) { |
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
487
|
0
|
|
|
|
|
|
$connect->{Domain} = 'UNIX'; |
488
|
0
|
|
|
|
|
|
$connect->{$prefix} = $base; |
489
|
|
|
|
|
|
|
} |
490
|
0
|
|
|
|
|
|
elsif (!$scheme || grep {$_ eq lc($scheme)} qw(inet tcp)) { |
491
|
0
|
|
|
|
|
|
$connect->{Domain} = 'INET'; |
492
|
0
|
|
|
|
|
|
my ($host,$port) = split(':',$base,2); |
493
|
0
|
|
0
|
|
|
|
$host ||= 'localhost'; |
494
|
0
|
|
0
|
|
|
|
$port ||= 50000; |
495
|
0
|
|
|
|
|
|
@$connect{"${prefix}Addr","${prefix}Port"} = ($host,$port); |
496
|
|
|
|
|
|
|
} |
497
|
|
|
|
|
|
|
else { |
498
|
0
|
|
|
|
|
|
die(__PACKAGE__, "::parseAddr(): unsupported scheme '$scheme' for URL $url"); |
499
|
|
|
|
|
|
|
} |
500
|
0
|
|
0
|
|
|
|
my %urlopts = map {split(/=/,$_,2)} grep {$_} split(/[\&\;]/,($opts//'')); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
501
|
0
|
|
|
|
|
|
@$connect{keys %urlopts} = values %urlopts; |
502
|
|
|
|
|
|
|
} |
503
|
0
|
|
|
|
|
|
@$connect{keys %opts} = values %opts; |
504
|
|
|
|
|
|
|
|
505
|
0
|
0
|
|
|
|
|
$that->{connect} = $connect if ($override); |
506
|
0
|
|
|
|
|
|
return $connect; |
507
|
|
|
|
|
|
|
} |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
## $str = $dc->addrStr() |
510
|
|
|
|
|
|
|
## $str = $CLASS_OR_OBJECT->addrStr(\%connect,$PEER_OR_LOCAL) |
511
|
|
|
|
|
|
|
## $str = $CLASS_OR_OBJECT->addrStr($url,$PEER_OR_LOCAL) |
512
|
|
|
|
|
|
|
## $str = $CLASS_OR_OBJECT->addrStr($sock,$PEER_OR_LOCAL) |
513
|
|
|
|
|
|
|
sub addrStr { |
514
|
0
|
|
|
0
|
1
|
|
my ($that,$addr,$prefix) = @_; |
515
|
0
|
0
|
0
|
|
|
|
$addr = ($that->{sock} || $that->{connect}) if (ref($that) && !defined($addr)); |
|
|
|
0
|
|
|
|
|
516
|
0
|
|
0
|
|
|
|
$prefix ||= 'Peer'; |
517
|
0
|
|
|
|
|
|
$prefix = ucfirst($prefix); |
518
|
|
|
|
|
|
|
|
519
|
0
|
0
|
|
|
|
|
if (UNIVERSAL::isa($addr,'IO::Socket::UNIX')) { |
|
|
0
|
|
|
|
|
|
520
|
0
|
|
|
|
|
|
return "unix://$addr->{$prefix}"; |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
elsif (UNIVERSAL::isa($addr,'IO::Socket::INET')) { |
523
|
0
|
0
|
|
|
|
|
my $mprefix = (lc($prefix) eq 'peer' ? 'peer' : 'sock'); |
524
|
0
|
|
|
|
|
|
return "inet://".$addr->can($mprefix."host")->($addr).":".$addr->can($mprefix."port")->($addr); |
525
|
|
|
|
|
|
|
} |
526
|
0
|
0
|
|
|
|
|
$addr = $addr->{connect} if (UNIVERSAL::isa($addr,'DDC::Client')); |
527
|
0
|
0
|
|
|
|
|
$addr = $that->parseAddr($addr,$prefix) if (!ref($addr)); |
528
|
0
|
|
|
|
|
|
my ($url); |
529
|
|
|
|
|
|
|
#my %uopts = %$addr; |
530
|
0
|
0
|
|
|
|
|
if ($addr->{Domain} eq 'UNIX') { |
531
|
0
|
|
|
|
|
|
$url = "unix://$addr->{$prefix}"; |
532
|
|
|
|
|
|
|
#delete $uopts{$prefix}; |
533
|
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
|
else { |
535
|
|
|
|
|
|
|
$url = "inet://".($addr->{"${prefix}Addr"} && $addr->{"${prefix}Port"} |
536
|
|
|
|
|
|
|
? ($addr->{"${prefix}Addr"}.":".$addr->{"${prefix}Port"}) |
537
|
0
|
0
|
0
|
|
|
|
: $addr->{"${prefix}Addr"}); |
538
|
|
|
|
|
|
|
#delete @uopts{"${prefix}Addr","${prefix}Port"}; |
539
|
|
|
|
|
|
|
} |
540
|
|
|
|
|
|
|
#delete $opts{Domain}; |
541
|
|
|
|
|
|
|
#if (%uopts) { |
542
|
|
|
|
|
|
|
# $url .= '?'.join('&',map {("$_=$uopts{$_}")} sort keys %uopts); |
543
|
|
|
|
|
|
|
#} |
544
|
0
|
|
|
|
|
|
return $url; |
545
|
|
|
|
|
|
|
} |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
## $io_socket = $dc->open() |
548
|
|
|
|
|
|
|
sub open { |
549
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
550
|
0
|
|
|
|
|
|
$dc->parseAddr(); |
551
|
0
|
|
0
|
|
|
|
my $domain = $dc->{connect}{Domain} // 'INET'; |
552
|
0
|
0
|
|
|
|
|
if (lc($domain) eq 'unix') { |
553
|
|
|
|
|
|
|
##-- v0.43: use unix-domain socket connection |
554
|
0
|
|
|
|
|
|
$dc->{sock} = IO::Socket::UNIX->new(%{$dc->{'connect'}}); |
|
0
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
} else { |
556
|
|
|
|
|
|
|
##-- compatibility hack: use INET-domain sockets (TCP) |
557
|
0
|
|
|
|
|
|
$dc->{sock} = IO::Socket::INET->new(%{$dc->{'connect'}}); |
|
0
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
} |
559
|
0
|
0
|
|
|
|
|
return undef if (!$dc->{sock}); |
560
|
0
|
0
|
|
|
|
|
$dc->{sock}->setsockopt(SOL_SOCKET, SO_LINGER, pack('II',@{$dc->{linger}})) if ($dc->{linger}); |
|
0
|
|
|
|
|
|
|
561
|
0
|
|
|
|
|
|
$dc->{sock}->autoflush(1); |
562
|
0
|
|
|
|
|
|
return $dc->{sock}; |
563
|
|
|
|
|
|
|
} |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
## undef = $dc->close() |
566
|
|
|
|
|
|
|
sub close { |
567
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
568
|
0
|
0
|
|
|
|
|
$dc->{sock}->close() if (defined($dc->{sock})); |
569
|
0
|
|
|
|
|
|
delete($dc->{sock}); |
570
|
|
|
|
|
|
|
} |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
## $encoded = $dc->ddc_encode(@message_strings) |
573
|
|
|
|
|
|
|
sub ddc_encode { |
574
|
0
|
|
|
0
|
0
|
|
my $dc = shift; |
575
|
0
|
|
|
|
|
|
my $msg = join('',@_); |
576
|
0
|
0
|
0
|
|
|
|
$msg = encode($dc->{encoding},$msg) if ($dc->{encoding} && utf8::is_utf8($msg)); |
577
|
0
|
|
|
|
|
|
return pack($ifmt,length($msg)) . $msg; |
578
|
|
|
|
|
|
|
} |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
## $decoded = $dc->ddc_decode($response_buf) |
581
|
|
|
|
|
|
|
sub ddc_decode { |
582
|
0
|
|
|
0
|
0
|
|
my $dc = shift; |
583
|
0
|
|
|
|
|
|
my $buf = unpack("$ifmt/a*",$_[0]); |
584
|
0
|
0
|
|
|
|
|
$buf = decode($dc->{encoding},$buf) if ($dc->{encoding}); |
585
|
0
|
|
|
|
|
|
return $buf; |
586
|
|
|
|
|
|
|
} |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
## undef = $dc->send(@message_strings) |
589
|
|
|
|
|
|
|
## + sends @message_strings |
590
|
|
|
|
|
|
|
sub send { |
591
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
592
|
0
|
0
|
|
|
|
|
$dc->open() if (!defined($dc->{sock})); |
593
|
0
|
|
|
|
|
|
return $dc->sendfh($dc->{sock}, @_); |
594
|
|
|
|
|
|
|
} |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
## undef = $dc->sendfh($fh,@message_strings) |
597
|
|
|
|
|
|
|
## + sends @message_strings to $fh, prepending total length |
598
|
|
|
|
|
|
|
sub sendfh { |
599
|
0
|
|
|
0
|
1
|
|
my ($dc,$fh) = (shift,shift); |
600
|
0
|
|
|
|
|
|
$fh->print( $dc->ddc_encode(@_) ); |
601
|
|
|
|
|
|
|
} |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
## $size = $dc->readSize() |
604
|
|
|
|
|
|
|
## $size = $dc->readSize($fh) |
605
|
|
|
|
|
|
|
sub readSize { |
606
|
0
|
|
|
0
|
1
|
|
my ($dc,$fh) = @_; |
607
|
0
|
|
|
|
|
|
my ($size_packed); |
608
|
0
|
0
|
|
|
|
|
$fh = $dc->{sock} if (!$fh); |
609
|
0
|
0
|
0
|
|
|
|
confess(ref($dc), "::readSize(): could not read size from socket: $!") |
610
|
|
|
|
|
|
|
if (($fh->read($size_packed,$ilen)||0) != $ilen); |
611
|
0
|
0
|
|
|
|
|
return 0 if (!defined($size_packed)); |
612
|
0
|
|
|
|
|
|
return unpack($ifmt,$size_packed); |
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
## $data = $dc->readBytes($nbytes) |
616
|
|
|
|
|
|
|
## $data = $dc->readBytes($nbytes,$fh) |
617
|
|
|
|
|
|
|
sub readBytes { |
618
|
0
|
|
|
0
|
1
|
|
my ($dc,$nbytes,$fh) = @_; |
619
|
0
|
|
|
|
|
|
my ($buf); |
620
|
0
|
0
|
|
|
|
|
$fh = $dc->{sock} if (!$fh); |
621
|
0
|
|
|
|
|
|
my $nread = $fh->read($buf,$nbytes); |
622
|
0
|
0
|
|
|
|
|
confess(ref($dc), "::readBytes(): failed to read $nbytes bytes of data (only found $nread): $!") |
623
|
|
|
|
|
|
|
if ($nread != $nbytes); |
624
|
0
|
|
|
|
|
|
return $buf; |
625
|
|
|
|
|
|
|
} |
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
## $data = $dc->readData() |
628
|
|
|
|
|
|
|
## $data = $dc->readData($fh) |
629
|
0
|
|
|
0
|
1
|
|
sub readData { return $_[0]->readBytes($_[0]->readSize($_[1]),$_[1]); } |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
##====================================================================== |
632
|
|
|
|
|
|
|
## Hit Parsing |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
## $hitList = $dc->parseData($buf) |
635
|
|
|
|
|
|
|
sub parseData { |
636
|
0
|
0
|
|
0
|
0
|
|
return $_[0]->parseJsonData($_[1]) if ($_[0]{mode} eq 'json'); |
637
|
0
|
0
|
|
|
|
|
return $_[0]->parseTableData($_[1]) if ($_[0]{mode} eq 'table'); |
638
|
0
|
0
|
|
|
|
|
return $_[0]->parseTextData($_[1]) if ($_[0]{mode} eq 'text'); |
639
|
0
|
0
|
|
|
|
|
return $_[0]->parseHtmlData($_[1]) if ($_[0]{mode} eq 'html'); |
640
|
0
|
|
|
|
|
|
confess(__PACKAGE__ . "::parseData(): unknown query mode '$_[0]{mode}'"); |
641
|
|
|
|
|
|
|
} |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
644
|
|
|
|
|
|
|
## Hit Parsing: Text |
645
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
## $hitList = $dc->parseTextData($buf) |
647
|
|
|
|
|
|
|
## + returns a DDC::HitList |
648
|
|
|
|
|
|
|
sub parseTextData { |
649
|
0
|
|
|
0
|
1
|
|
my ($dc,$buf) = @_; |
650
|
0
|
|
|
|
|
|
my $hits = DDC::HitList->new(start=>$dc->{start},limit=>$dc->{limit}); |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
##-- parse response macro structure |
653
|
0
|
0
|
0
|
|
|
|
$buf = decode($dc->{encoding},$buf) if ($dc->{encoding} && !utf8::is_utf8($buf)); |
654
|
0
|
|
|
|
|
|
my ($buflines,$bufinfo) = split("\001", $buf, 2); |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
##-- parse administrative data from response footer |
657
|
0
|
|
|
|
|
|
chomp($bufinfo); |
658
|
0
|
|
|
|
|
|
@$hits{qw(istatus_ nstatus_ end_ nhits_ ndocs_ error_)} = split(' ', $bufinfo,6); |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
##-- successful response: parse hit data |
661
|
0
|
|
|
|
|
|
my @buflines = split(/\n/,$buflines); |
662
|
0
|
|
0
|
|
|
|
my $metaNames = $dc->{metaNames} || []; |
663
|
0
|
|
|
|
|
|
my ($bufline,$hit,@fields,$ctxbuf); |
664
|
0
|
|
|
|
|
|
foreach $bufline (@buflines) { |
665
|
0
|
0
|
|
|
|
|
if ($bufline =~ /^Corpora Distribution\:(.*)$/) { |
|
|
0
|
|
|
|
|
|
666
|
0
|
|
|
|
|
|
$hits->{dhits_} = $1; |
667
|
0
|
|
|
|
|
|
next; |
668
|
|
|
|
|
|
|
} elsif ($bufline =~ /^Relevant Documents Distribution:(.*)$/) { |
669
|
0
|
|
|
|
|
|
$hits->{ddocs_} = $1; |
670
|
0
|
|
|
|
|
|
next; |
671
|
|
|
|
|
|
|
} |
672
|
0
|
|
|
|
|
|
push(@{$hits->{hits_}},$hit=DDC::Hit->new); |
|
0
|
|
|
|
|
|
|
673
|
0
|
0
|
|
|
|
|
$hit->{raw_} = $bufline if ($dc->{keepRaw}); |
674
|
|
|
|
|
|
|
|
675
|
0
|
0
|
0
|
|
|
|
if ($dc->{parseMeta} || $dc->{parseContext}) { |
676
|
0
|
|
|
|
|
|
@fields = split(/ ### /, $bufline); |
677
|
0
|
|
|
|
|
|
$ctxbuf = pop(@fields); |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
##-- parse: metadata |
680
|
0
|
0
|
|
|
|
|
if ($dc->{parseMeta}) { |
681
|
0
|
|
|
|
|
|
$hit->{meta_}{file_} = shift(@fields); |
682
|
0
|
|
|
|
|
|
$hit->{meta_}{page_} = shift(@fields); |
683
|
0
|
|
|
|
|
|
$hit->{meta_}{indices_} = [split(' ', pop(@fields))]; |
684
|
0
|
|
0
|
|
|
|
$hit->{meta_}{$metaNames->[$_]||"${_}_"} = $fields[$_] foreach (0..$#fields); |
685
|
|
|
|
|
|
|
} |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
##-- parse: context |
688
|
0
|
0
|
|
|
|
|
$hit->{ctx_} = $dc->parseTextContext($ctxbuf) if ($dc->{parseContext}); |
689
|
|
|
|
|
|
|
} |
690
|
|
|
|
|
|
|
} |
691
|
|
|
|
|
|
|
|
692
|
0
|
0
|
|
|
|
|
$hits->expandFields($dc->{fieldNames}) if ($dc->{expandFields}); |
693
|
0
|
|
|
|
|
|
return $hits; |
694
|
|
|
|
|
|
|
} |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
## \@context_data = $dc->parseTextContext($context_buf) |
698
|
|
|
|
|
|
|
sub parseTextContext { |
699
|
0
|
|
|
0
|
0
|
|
my ($dc,$ctx) = @_; |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
##-- defaults |
702
|
0
|
|
|
|
|
|
my $fieldNames = $dc->{fieldNames}; |
703
|
0
|
|
|
|
|
|
my $fs = qr(\Q$dc->{fieldSeparator}\E); |
704
|
0
|
|
|
|
|
|
my $ts = qr(\Q$dc->{tokenSeparator}\E\ *); |
705
|
0
|
|
|
|
|
|
my $hl = $dc->{textHighlight}; |
706
|
0
|
|
|
|
|
|
my $hls = qr(\Q$dc->{tokenSeparator}\E\ *\Q$hl->[0]\E); |
707
|
0
|
|
|
|
|
|
my $hlw0 = qr(^(?:(?:\Q$hl->[0]\E)|(?:\Q$hl->[2]\E))); |
708
|
0
|
|
|
|
|
|
my $hlw1 = qr((?:(?:\Q$hl->[1]\E)|(?:\Q$hl->[3]\E))$); |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
##-- split into sentences |
711
|
0
|
|
|
|
|
|
$ctx =~ s/^\s*//; |
712
|
0
|
|
|
|
|
|
my ($sbuf,@s,$w); |
713
|
0
|
|
|
|
|
|
my $sents = [[],[],[]]; |
714
|
0
|
|
|
|
|
|
foreach $sbuf (split(/ {4}/,$ctx)) { |
715
|
|
|
|
|
|
|
|
716
|
0
|
0
|
|
|
|
|
if ($sbuf =~ $hls) { |
717
|
|
|
|
|
|
|
##-- target sentence with index dump: parse it |
718
|
0
|
|
|
|
|
|
$sbuf =~ s/^$ts//; |
719
|
0
|
|
|
|
|
|
@s = map {[0,split($fs,$_)]} split($ts,$sbuf); |
|
0
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
##-- parse words |
722
|
0
|
|
|
|
|
|
foreach $w (@s) { |
723
|
0
|
0
|
0
|
|
|
|
if ($w->[1] =~ $hlw0 && $w->[$#$w] =~ $hlw1) { |
724
|
|
|
|
|
|
|
##-- matched token |
725
|
0
|
|
|
|
|
|
$w->[1] =~ s/$hlw0//; |
726
|
0
|
|
|
|
|
|
$w->[$#$w] =~ s/$hlw1//; |
727
|
0
|
|
|
|
|
|
$w->[0] = 1; |
728
|
|
|
|
|
|
|
} |
729
|
|
|
|
|
|
|
} |
730
|
0
|
|
|
|
|
|
push(@{$sents->[1]},@s); |
|
0
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
} |
732
|
|
|
|
|
|
|
else { |
733
|
|
|
|
|
|
|
##-- context sentence: surface strings only |
734
|
0
|
|
|
|
|
|
$sbuf =~ s/^$ts//; |
735
|
0
|
|
|
|
|
|
@s = split($ts,$sbuf); |
736
|
0
|
0
|
|
|
|
|
if (!@{$sents->[1]}) { |
|
0
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
##-- left context |
738
|
0
|
|
|
|
|
|
push(@{$sents->[0]}, @s); |
|
0
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
} else { |
740
|
|
|
|
|
|
|
##-- right context |
741
|
0
|
|
|
|
|
|
push(@{$sents->[2]}, @s); |
|
0
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
} |
743
|
|
|
|
|
|
|
} |
744
|
|
|
|
|
|
|
} |
745
|
|
|
|
|
|
|
|
746
|
0
|
|
|
|
|
|
return $sents; |
747
|
|
|
|
|
|
|
} |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
750
|
|
|
|
|
|
|
## Hit Parsing: Table |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
## $hitList = $dc->parseTableData($buf) |
753
|
|
|
|
|
|
|
## + returns a DDC::HitList |
754
|
|
|
|
|
|
|
sub parseTableData { |
755
|
0
|
|
|
0
|
1
|
|
my ($dc,$buf) = @_; |
756
|
0
|
|
|
|
|
|
my $hits = DDC::HitList->new(start=>$dc->{start},limit=>$dc->{limit}); |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
##-- parse response macro structure |
759
|
0
|
0
|
0
|
|
|
|
$buf = decode($dc->{encoding},$buf) if ($dc->{encoding} && !utf8::is_utf8($buf)); |
760
|
0
|
|
|
|
|
|
my ($buflines,$bufinfo) = split("\001", $buf, 2); |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
##-- parse administrative data from response footer |
763
|
0
|
|
|
|
|
|
chomp($bufinfo); |
764
|
0
|
|
|
|
|
|
@$hits{qw(istatus_ nstatus_ end_ nhits_ ndocs_ error_)} = split(' ', $bufinfo,6); |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
##-- successful response: parse hit data |
767
|
0
|
|
|
|
|
|
my @buflines = split(/\n/,$buflines); |
768
|
0
|
|
|
|
|
|
my ($bufline,$hit,@fields,$field,$val); |
769
|
0
|
|
|
|
|
|
foreach $bufline (@buflines) { |
770
|
0
|
|
|
|
|
|
push(@{$hits->{hits_}},$hit=DDC::Hit->new); |
|
0
|
|
|
|
|
|
|
771
|
0
|
0
|
|
|
|
|
$hit->{raw_} = $bufline if ($dc->{keepRaw}); |
772
|
|
|
|
|
|
|
|
773
|
0
|
0
|
0
|
|
|
|
if ($dc->{parseMeta} || $dc->{parseContext}) { |
774
|
0
|
|
|
|
|
|
@fields = split("\002", $bufline); |
775
|
0
|
|
|
|
|
|
while (defined($field=shift(@fields))) { |
776
|
|
|
|
|
|
|
|
777
|
0
|
0
|
|
|
|
|
if ($field eq 'keyword') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
778
|
|
|
|
|
|
|
##-- special handling for 'keyword' field |
779
|
0
|
|
|
|
|
|
$val = shift(@fields); |
780
|
0
|
|
|
|
|
|
while ($val =~ /\(.*?\S)\s*\<\/orth\>/g) { |
781
|
0
|
|
|
|
|
|
push(@{$hit->{orth_}}, $1); |
|
0
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
} |
783
|
|
|
|
|
|
|
} |
784
|
|
|
|
|
|
|
elsif ($field eq 'indices') { |
785
|
|
|
|
|
|
|
##-- special handling for 'indices' field |
786
|
0
|
|
|
|
|
|
$val = shift(@fields); |
787
|
0
|
|
|
|
|
|
$hit->{meta_}{indices_} = [split(' ',$val)]; |
788
|
|
|
|
|
|
|
} |
789
|
|
|
|
|
|
|
elsif ($field =~ /^\s*\
|
790
|
|
|
|
|
|
|
##-- special handling for context pseudo-field |
791
|
0
|
0
|
|
|
|
|
$hit->{ctx_} = $dc->parseTableContext($field) if ($dc->{parseContext}); |
792
|
|
|
|
|
|
|
} |
793
|
|
|
|
|
|
|
elsif ($dc->{parseMeta}) { |
794
|
|
|
|
|
|
|
##-- normal bibliographic field |
795
|
0
|
0
|
|
|
|
|
$field .= '_' if ($field =~ /^(?:scan|orig|page|rank(?:_debug)?)$/); ##-- special handling for ddc-internal fields |
796
|
0
|
|
|
|
|
|
$val = shift(@fields); |
797
|
0
|
|
|
|
|
|
$hit->{meta_}{$field} = $val; |
798
|
|
|
|
|
|
|
} |
799
|
|
|
|
|
|
|
} |
800
|
|
|
|
|
|
|
} |
801
|
|
|
|
|
|
|
} |
802
|
|
|
|
|
|
|
|
803
|
0
|
0
|
|
|
|
|
$hits->expandFields($dc->{fieldNames}) if ($dc->{expandFields}); |
804
|
0
|
|
|
|
|
|
return $hits; |
805
|
|
|
|
|
|
|
} |
806
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
## \@context_data = $dc->parseTableContext($context_buf) |
809
|
|
|
|
|
|
|
sub parseTableContext { |
810
|
0
|
|
|
0
|
0
|
|
my ($dc,$ctx) = @_; |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
##-- defaults |
813
|
0
|
|
|
|
|
|
my $fieldNames = $dc->{fieldNames}; |
814
|
0
|
|
|
|
|
|
my $fs = qr(\Q$dc->{fieldSeparator}\E); |
815
|
0
|
|
|
|
|
|
my $ts = qr(\Q$dc->{tokenSeparator}\E\ *); |
816
|
0
|
|
|
|
|
|
my $hl = $dc->{tableHighlight}; |
817
|
0
|
|
|
|
|
|
my $hlw0 = qr(^(?:(?:\Q$hl->[0]\E)|(?:\Q$hl->[2]\E))); |
818
|
0
|
|
|
|
|
|
my $hlw1 = qr((?:(?:\Q$hl->[1]\E)|(?:\Q$hl->[3]\E))$); |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
##-- split into sentences |
821
|
0
|
|
|
|
|
|
my $sents = [[],[],[]]; |
822
|
0
|
|
|
|
|
|
my ($sbuf,@s,$w); |
823
|
|
|
|
|
|
|
|
824
|
0
|
|
|
|
|
|
foreach $sbuf (split(/\<\/s\>\s*/,$ctx)) { |
825
|
|
|
|
|
|
|
|
826
|
0
|
0
|
|
|
|
|
if ($sbuf =~ /^\s*/) { |
827
|
|
|
|
|
|
|
##-- target sentence with index dump: parse it |
828
|
0
|
|
|
|
|
|
$sbuf =~ s|^\s*\]*)?\>\s*$ts||; |
829
|
0
|
|
|
|
|
|
@s = map {[0,split($fs,$_)]} split($ts,$sbuf); |
|
0
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
|
831
|
|
|
|
|
|
|
##-- parse words |
832
|
0
|
|
|
|
|
|
foreach $w (@s) { |
833
|
0
|
0
|
0
|
|
|
|
if ($w->[1] =~ $hlw0 && $w->[$#$w] =~ $hlw1) { |
834
|
|
|
|
|
|
|
##-- matched token |
835
|
0
|
|
|
|
|
|
$w->[1] =~ s/$hlw0//; |
836
|
0
|
|
|
|
|
|
$w->[$#$w] =~ s/$hlw1//; |
837
|
0
|
|
|
|
|
|
$w->[0] = 1; |
838
|
|
|
|
|
|
|
} |
839
|
|
|
|
|
|
|
} |
840
|
0
|
|
|
|
|
|
push(@{$sents->[1]}, @s); |
|
0
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
} |
842
|
|
|
|
|
|
|
else { |
843
|
|
|
|
|
|
|
##-- context sentence; surface strings only |
844
|
0
|
|
|
|
|
|
$sbuf =~ s|^\s*\]*)?\>$ts||; |
845
|
0
|
|
|
|
|
|
@s = split($ts,$sbuf); |
846
|
0
|
0
|
|
|
|
|
if (!@{$sents->[1]}) { |
|
0
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
##-- left context |
848
|
0
|
|
|
|
|
|
push(@{$sents->[0]}, @s); |
|
0
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
} else { |
850
|
|
|
|
|
|
|
##-- right context |
851
|
0
|
|
|
|
|
|
push(@{$sents->[2]}, @s); |
|
0
|
|
|
|
|
|
|
852
|
|
|
|
|
|
|
} |
853
|
|
|
|
|
|
|
} |
854
|
|
|
|
|
|
|
} |
855
|
|
|
|
|
|
|
|
856
|
0
|
|
|
|
|
|
return $sents; |
857
|
|
|
|
|
|
|
} |
858
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
861
|
|
|
|
|
|
|
## Hit Parsing: JSON |
862
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
## $obj = $dc->decodeJson($buf) |
864
|
|
|
|
|
|
|
sub decodeJson { |
865
|
0
|
|
|
0
|
0
|
|
my $dc = shift; |
866
|
0
|
|
|
|
|
|
my ($bufr) = \$_[0]; |
867
|
0
|
0
|
0
|
|
|
|
if ($dc->{encoding} && !utf8::is_utf8($$bufr)) { |
868
|
0
|
|
|
|
|
|
my $buf = decode($dc->{encoding},$$bufr); |
869
|
0
|
|
|
|
|
|
$bufr = \$buf; |
870
|
|
|
|
|
|
|
} |
871
|
|
|
|
|
|
|
|
872
|
0
|
|
0
|
|
|
|
my $module = $JSON_BACKEND // 'JSON'; |
873
|
0
|
|
|
|
|
|
$module =~ s{::}{/}g; |
874
|
0
|
|
|
|
|
|
require "$module.pm"; |
875
|
|
|
|
|
|
|
|
876
|
0
|
|
|
|
|
|
my $jxs = $dc->{jxs}; |
877
|
0
|
0
|
|
|
|
|
$jxs = $dc->{jxs} = $JSON_BACKEND->new->utf8(0)->relaxed(1)->canonical(0) if (!defined($jxs)); |
878
|
0
|
|
|
|
|
|
return $jxs->decode($$bufr); |
879
|
|
|
|
|
|
|
} |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
## $hitList = $dc->parseJsonData($buf) |
882
|
|
|
|
|
|
|
## + returns a DDC::HitList |
883
|
|
|
|
|
|
|
sub parseJsonData { |
884
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
885
|
0
|
|
|
|
|
|
my $data = $dc->decodeJson($_[0]); |
886
|
|
|
|
|
|
|
my $hits = DDC::HitList->new(%$data, |
887
|
|
|
|
|
|
|
start=>$dc->{start}, |
888
|
|
|
|
|
|
|
limit=>$dc->{limit}, |
889
|
0
|
|
|
|
|
|
); |
890
|
|
|
|
|
|
|
|
891
|
0
|
0
|
|
|
|
|
$_ = bless($_,'DDC::Hit') foreach (@{$hits->{hits_}||[]}); |
|
0
|
|
|
|
|
|
|
892
|
0
|
0
|
|
|
|
|
$hits->expandFields($dc->{fieldNames}) if ($dc->{expandFields}); |
893
|
0
|
|
|
|
|
|
return $hits; |
894
|
|
|
|
|
|
|
} |
895
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
897
|
|
|
|
|
|
|
## Hit Parsing: expand_terms() |
898
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
## \@terms = $dc->parseExpandTermsResponse($buf) |
900
|
|
|
|
|
|
|
## @terms = $dc->parseExpandTermsResponse($buf) |
901
|
|
|
|
|
|
|
sub parseExpandTermsResponse { |
902
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
903
|
0
|
0
|
|
|
|
|
my @items = grep {defined($_) && $_ ne ''} split(/[\t\r\n]+/,$_[0]); |
|
0
|
|
|
|
|
|
|
904
|
0
|
0
|
|
|
|
|
die("error in expand_terms response") if ($items[0] !~ /^0 /); |
905
|
0
|
|
|
|
|
|
shift(@items); |
906
|
0
|
0
|
|
|
|
|
return wantarray ? @items : \@items; |
907
|
|
|
|
|
|
|
} |
908
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
1; ##-- be happy |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
__END__ |