line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
## -*- Mode: CPerl -*- |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
## File: DDC::Client.pm |
4
|
|
|
|
|
|
|
## Author: Bryan Jurish |
5
|
|
|
|
|
|
|
## Description: |
6
|
|
|
|
|
|
|
## + DDC Query utilities: client sockets |
7
|
|
|
|
|
|
|
##====================================================================== |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
package DDC::Client; |
10
|
26
|
|
|
26
|
|
9670
|
use DDC::Utils qw(:escape); |
|
26
|
|
|
|
|
71
|
|
|
26
|
|
|
|
|
3991
|
|
11
|
26
|
|
|
26
|
|
177
|
use DDC::HitList; |
|
26
|
|
|
|
|
61
|
|
|
26
|
|
|
|
|
450
|
|
12
|
26
|
|
|
26
|
|
116
|
use DDC::Hit; |
|
26
|
|
|
|
|
44
|
|
|
26
|
|
|
|
|
388
|
|
13
|
26
|
|
|
26
|
|
14525
|
use IO::Handle; |
|
26
|
|
|
|
|
156364
|
|
|
26
|
|
|
|
|
1137
|
|
14
|
26
|
|
|
26
|
|
11648
|
use IO::File; |
|
26
|
|
|
|
|
47796
|
|
|
26
|
|
|
|
|
2679
|
|
15
|
26
|
|
|
26
|
|
12390
|
use IO::Socket::INET; |
|
26
|
|
|
|
|
364435
|
|
|
26
|
|
|
|
|
169
|
|
16
|
26
|
|
|
26
|
|
25300
|
use Encode qw(encode decode); |
|
26
|
|
|
|
|
257621
|
|
|
26
|
|
|
|
|
1908
|
|
17
|
26
|
|
|
26
|
|
196
|
use Carp; |
|
26
|
|
|
|
|
58
|
|
|
26
|
|
|
|
|
1267
|
|
18
|
26
|
|
|
26
|
|
151
|
use strict; |
|
26
|
|
|
|
|
46
|
|
|
26
|
|
|
|
|
1516
|
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
##====================================================================== |
21
|
|
|
|
|
|
|
## Globals |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
## $ifmt |
24
|
|
|
|
|
|
|
## + pack format to use for integer sizes passed to and from DDC |
25
|
|
|
|
|
|
|
## + default value should be right for ddc-2.x (always 32-bit unsigned little endian) |
26
|
|
|
|
|
|
|
## + for ddc-1.x, use machine word size and endian-ness of server |
27
|
|
|
|
|
|
|
our $ifmt = 'V'; |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
## $ilen |
30
|
|
|
|
|
|
|
## + length in bytes of message size integer used for DDC protocol in bytes |
31
|
|
|
|
|
|
|
## + default value should be right for ddc-2.x (always 32-bit unsigned little endian) |
32
|
|
|
|
|
|
|
## + for ddc-1.x, use machine word size and endian-ness of server |
33
|
|
|
|
|
|
|
our $ilen = 4; |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
## $JSON_BACKEND |
36
|
|
|
|
|
|
|
## + underlying JSON module (default='JSON') |
37
|
|
|
|
|
|
|
our ($JSON_BACKEND); |
38
|
|
|
|
|
|
|
BEGIN { |
39
|
26
|
50
|
|
26
|
|
156911
|
$JSON_BACKEND = 'JSON' if (!defined($JSON_BACKEND)); |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
##====================================================================== |
43
|
|
|
|
|
|
|
## Constructors etc |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
## $dc = $CLASS_OR_OBJ->new(%args) |
46
|
|
|
|
|
|
|
## + %args: |
47
|
|
|
|
|
|
|
## ( |
48
|
|
|
|
|
|
|
## ##-- connection options |
49
|
|
|
|
|
|
|
## connect=>\%connectArgs, ##-- passed to IO::Socket::(INET|UNIX)->new(); also accepts connect=>$connectURL |
50
|
|
|
|
|
|
|
## mode =>$queryMode, ##-- one of 'table', 'html', 'text', 'json', or 'raw'; default='json' ('html' is not yet supported) |
51
|
|
|
|
|
|
|
## linger =>\@linger, ##-- SO_LINGER socket option; default=[1,0]: immediate termination |
52
|
|
|
|
|
|
|
## ## |
53
|
|
|
|
|
|
|
## ##-- query options (formerly only in DDC::Client::Distributed) |
54
|
|
|
|
|
|
|
## start =>$start, ##-- index of first hit to fetch (default=0) |
55
|
|
|
|
|
|
|
## limit =>$limit, ##-- maximum number of hits to fetch (default=10) |
56
|
|
|
|
|
|
|
## timeout =>$secs, ##-- query timeout in seconds (lower bound, default=60) |
57
|
|
|
|
|
|
|
## hint =>$hint, ##-- navigation hint (optional; default=undef: none) |
58
|
|
|
|
|
|
|
## ## |
59
|
|
|
|
|
|
|
## ##-- hit parsing options (mostly obsolete) |
60
|
|
|
|
|
|
|
## parseMeta=>$bool, ##-- if true, hit metadata will be parsed to %$hit (default=1) |
61
|
|
|
|
|
|
|
## parseContext=>$bool, ##-- if true, hit context data will be parsed to $hit->{ctx_} (default=1) |
62
|
|
|
|
|
|
|
## metaNames =>\@names, ##-- metadata field names (default=undef (none)) |
63
|
|
|
|
|
|
|
## expandFields => $bool, ##-- whether to implicitly expand hit fields to HASH-refs (default=true; only valid for 'table' mode) |
64
|
|
|
|
|
|
|
## keepRaw=>$bool, ##-- if false, raw context buffer will be deleted after parsing context data (default=false) |
65
|
|
|
|
|
|
|
## #defaultField => $name, ##-- default field names (default='w') |
66
|
|
|
|
|
|
|
## |
67
|
|
|
|
|
|
|
## fieldSeparator => $char, ##-- intra-token field separator (default="\x{1f}": ASCII unit separator) |
68
|
|
|
|
|
|
|
## tokenSeparator => $char, ##-- inter-token separator (default="\x{1e}": ASCII record separator) |
69
|
|
|
|
|
|
|
## |
70
|
|
|
|
|
|
|
## textHighlight => [$l0,$r0,$l1,$r1], ##-- highlighting strings, text mode (default=[qw(&& && _& &_)]) |
71
|
|
|
|
|
|
|
## htmlHighlight => [$l0,$r0,$l1,$r1], ##-- highlighting strings, html mode (default=[('','') x 2]) |
72
|
|
|
|
|
|
|
## tableHighlight => [$l0,$r0,$l1,$r1], ##-- highlighting strings, table mode (default=[qw(&& && _& &_)]) |
73
|
|
|
|
|
|
|
## ) |
74
|
|
|
|
|
|
|
## + default \%connectArgs: |
75
|
|
|
|
|
|
|
## Domain=>'INET', ##-- also accepts 'UNIX' |
76
|
|
|
|
|
|
|
## PeerAddr=>'localhost', |
77
|
|
|
|
|
|
|
## PeerPort=>50000, |
78
|
|
|
|
|
|
|
## Proto=>'tcp', |
79
|
|
|
|
|
|
|
## Type=>SOCK_STREAM, |
80
|
|
|
|
|
|
|
## Blocking=>1, |
81
|
|
|
|
|
|
|
## + URL specification of \%connectArgs via connect=>{url=>$url} or connect=>$url (see parseAddr() method): |
82
|
|
|
|
|
|
|
## inet://ADDR:PORT?OPT=VAL... # canonical INET socket URL |
83
|
|
|
|
|
|
|
## unix://UNIX_PATH?OPT=VAL... # canonical UNIX socket URL |
84
|
|
|
|
|
|
|
## unix:UNIX_PATH?OPT=VAL... # = unix://UNIX_PATH?OPT=val |
85
|
|
|
|
|
|
|
## ADDR?OPT=VAL... # = inet://ADDR:5000?OPT=VAL... |
86
|
|
|
|
|
|
|
## :PORT?OPT=VAL... # = inet://localhost:PORT?OPT=VAL... |
87
|
|
|
|
|
|
|
## ADDR:PORT?OPT=VAL... # = inet://ADDR:PORT?OPT=VAL... |
88
|
|
|
|
|
|
|
## /UNIX_PATH?OPT=VAL... # = unix:///UNIX_PATH?POT=VAL... |
89
|
|
|
|
|
|
|
sub new { |
90
|
0
|
|
|
0
|
1
|
|
my ($that,%args) = @_; |
91
|
0
|
|
|
|
|
|
my @connect_args = grep {exists $args{$_}} map {($_,lc($_),uc($_))} qw(Peer PeerAddr PeerPort Url); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
my $connect = $that->parseAddr |
93
|
|
|
|
|
|
|
({ |
94
|
|
|
|
|
|
|
##-- connect: default options |
95
|
|
|
|
|
|
|
Domain=>'INET', |
96
|
|
|
|
|
|
|
PeerAddr=>'localhost', |
97
|
|
|
|
|
|
|
PeerPort=>50000, |
98
|
|
|
|
|
|
|
Proto=>'tcp', |
99
|
|
|
|
|
|
|
Type=>SOCK_STREAM, |
100
|
|
|
|
|
|
|
Blocking=>1, |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
##-- connect: user args |
103
|
|
|
|
|
|
|
(ref($args{'connect'}) |
104
|
0
|
|
|
|
|
|
? %{$args{'connect'}} |
105
|
|
|
|
|
|
|
: ($args{connect} |
106
|
0
|
|
|
|
|
|
? %{$that->parseAddr($args{connect})} |
107
|
|
|
|
|
|
|
: qw())), |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
##-- connect: top-level args |
110
|
0
|
0
|
|
|
|
|
(map {($_=>$args{$_})} @connect_args), |
|
0
|
0
|
|
|
|
|
|
111
|
|
|
|
|
|
|
}); |
112
|
0
|
|
|
|
|
|
delete @args{'connect',@connect_args}; |
113
|
|
|
|
|
|
|
|
114
|
0
|
|
0
|
|
|
|
my $dc =bless { |
115
|
|
|
|
|
|
|
##-- connection options |
116
|
|
|
|
|
|
|
connect=> $connect, |
117
|
|
|
|
|
|
|
linger => [1,0], |
118
|
|
|
|
|
|
|
mode =>'json', |
119
|
|
|
|
|
|
|
encoding => 'UTF-8', |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
##-- query options (formerly in DDC::Client::Distributed) |
122
|
|
|
|
|
|
|
start=>0, |
123
|
|
|
|
|
|
|
limit=>10, |
124
|
|
|
|
|
|
|
timeout=>60, |
125
|
|
|
|
|
|
|
hint=>undef, |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
##-- hit-parsing options |
128
|
|
|
|
|
|
|
parseMeta=>1, |
129
|
|
|
|
|
|
|
parseContext=>1, |
130
|
|
|
|
|
|
|
expandFields=>1, |
131
|
|
|
|
|
|
|
keepRaw=>0, |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
#fieldSeparator => "\x{1f}", |
134
|
|
|
|
|
|
|
#tokenSeparator => "\x{1e}", |
135
|
|
|
|
|
|
|
#defaultField => 'w', |
136
|
|
|
|
|
|
|
#metaNames => undef, |
137
|
|
|
|
|
|
|
#textHighlight=>undef, |
138
|
|
|
|
|
|
|
#tableHighlight=>undef, |
139
|
|
|
|
|
|
|
#htmlHighlight=>undef, |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
%args, |
142
|
|
|
|
|
|
|
}, ref($that)||$that; |
143
|
|
|
|
|
|
|
|
144
|
0
|
0
|
|
|
|
|
if (defined($args{optFile})) { |
145
|
|
|
|
|
|
|
$dc->loadOptFile($args{optFile}) |
146
|
0
|
0
|
|
|
|
|
or confess(__PACKAGE__ . "::new(): could not load options file '$args{optFile}': $!"); |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
0
|
0
|
|
|
|
|
$dc->{fieldSeparator} = "\x{1f}" if (!$dc->{fieldSeparator}); |
150
|
0
|
0
|
|
|
|
|
$dc->{tokenSeparator} = "\x{1e}" if (!$dc->{tokenSeparator}); |
151
|
0
|
0
|
|
|
|
|
$dc->{textHighlight} = [qw(&& && _& &_)] if (!$dc->{textHighlight}); |
152
|
0
|
0
|
|
|
|
|
$dc->{tableHighlight} = [qw(&& && _& &_)] if (!$dc->{tableHighlight}); |
153
|
|
|
|
|
|
|
$dc->{htmlHighlight} = [ |
154
|
|
|
|
|
|
|
'','', |
155
|
|
|
|
|
|
|
'','', |
156
|
0
|
0
|
|
|
|
|
] if (!$dc->{htmlHighlight}); |
157
|
|
|
|
|
|
|
|
158
|
0
|
|
|
|
|
|
return $dc; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
##====================================================================== |
162
|
|
|
|
|
|
|
## DDC *.opt file |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
## $dc = $dc->loadOptFile($filename, %opts); |
165
|
|
|
|
|
|
|
## $dc = $dc->loadOptFile($fh, %opts); |
166
|
|
|
|
|
|
|
## $dc = $dc->loadOptFile(\$str, %opts); |
167
|
|
|
|
|
|
|
## Sets client options from a DDC *.opt file: #fieldNames, metaNames, fieldSeparator. |
168
|
|
|
|
|
|
|
## %opts: |
169
|
|
|
|
|
|
|
## ( |
170
|
|
|
|
|
|
|
## clobber => $bool, ##-- whether to clobber existing %$dc fields (default=false) |
171
|
|
|
|
|
|
|
## ) |
172
|
|
|
|
|
|
|
## |
173
|
|
|
|
|
|
|
## WARNING: for whatever reason, DDC does not return metadata fields in the same |
174
|
|
|
|
|
|
|
## order in which they appeared in the *.opt file (nor in any lexicographic order |
175
|
|
|
|
|
|
|
## combination of the fields type, name, and xpath of the 'Bibl' directorive I |
176
|
|
|
|
|
|
|
## have tried), BUT this code assumes that the order in which the 'Bibl' directives |
177
|
|
|
|
|
|
|
## appear in the *.opt file are identical to the order in which DDC returns the |
178
|
|
|
|
|
|
|
## corresponding data in 'text' and 'html' modes. The actual order used by the |
179
|
|
|
|
|
|
|
## server should appear in the server logs. Change the *.opt file you pass to |
180
|
|
|
|
|
|
|
## this function accordingly. |
181
|
|
|
|
|
|
|
sub loadOptFile { |
182
|
0
|
|
|
0
|
0
|
|
my ($dc,$src,%opts) = @_; |
183
|
0
|
|
|
|
|
|
my ($fh); |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
##-- get source fh |
186
|
0
|
0
|
|
|
|
|
if (!ref($src)) { |
|
|
0
|
|
|
|
|
|
187
|
0
|
0
|
|
|
|
|
$fh = IO::File->new("<$src") |
188
|
|
|
|
|
|
|
or confess(__PACKAGE__ . "::loadOptFile(): open failed for '$src': $!"); |
189
|
0
|
0
|
|
|
|
|
binmode($fh,":encoding($dc->{encoding})") if ($dc->{encoding}); |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
elsif (ref($src) eq 'SCALAR') { |
192
|
0
|
|
|
|
|
|
$fh = IO::Handle->new; |
193
|
0
|
0
|
|
|
|
|
open($fh,'<',$src) |
194
|
|
|
|
|
|
|
or confess(__PACKAGE__ . "::loadOptFile(): open failed for buffer: $!"); |
195
|
0
|
0
|
|
|
|
|
binmode($fh,":encoding($dc->{encoding})") if ($dc->{encoding}); |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
else { |
198
|
0
|
|
|
|
|
|
$fh = $src; |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
##-- parse file |
202
|
0
|
|
|
|
|
|
my $clobber = $opts{clobber}; |
203
|
0
|
|
|
|
|
|
my (@indices,@show,@meta,$showMeta); |
204
|
0
|
|
|
|
|
|
while (defined($_=<$fh>)) { |
205
|
0
|
|
|
|
|
|
chomp; |
206
|
0
|
0
|
|
|
|
|
if (/^Indices\s(.*)$/) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
207
|
0
|
|
|
|
|
|
@indices = map {s/^\s*\[//; s/\]\s*$//; [split(' ',$_)]} split(/\;\s*/,$1); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
elsif (/^Bibl\s+(\S+)\s+(\d)\s+(\S+)\s+(.*)$/) { |
210
|
0
|
|
|
|
|
|
my ($type,$visible,$name,$xpath) = ($1,$2,$3,$4); |
211
|
0
|
0
|
|
|
|
|
push(@meta,[$type,$visible,$name,$xpath]) if ($visible+0); |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
elsif (/^IndicesToShow\s+(.*)$/) { |
214
|
0
|
|
|
|
|
|
@show = map {$_-1} split(' ',$1); |
|
0
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
elsif (/^OutputBibliographyOfHits\b/) { |
217
|
0
|
|
|
|
|
|
$showMeta = 1; |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
elsif (/^InterpDelim[ie]ter\s(.*)$/) { |
220
|
0
|
0
|
0
|
|
|
|
$dc->{fieldSeparator} = unescape($1) if ($clobber || !$dc->{fieldSeparator}); |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
elsif (/^TokenDelim[ie]ter\s(.*)$/) { |
223
|
0
|
0
|
0
|
|
|
|
$dc->{tokenSeparator} = unescape($1) if ($clobber || !$dc->{tokenSeparator}); |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
elsif (/^Utf8\s*$/) { |
226
|
0
|
0
|
0
|
|
|
|
$dc->{encoding} = 'utf8' if ($clobber || !$dc->{encoding}); |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
elsif (/^HtmlHighlighting\s*(.*)$/) { |
229
|
0
|
0
|
0
|
|
|
|
$dc->{htmlHighlight} = [map {unescape($1)} split(/\s*\;\s*/,$1,4)] if ($clobber || !$dc->{htmlHighlight}); |
|
0
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
elsif (/^TextHighlighting\s*(.*)$/) { |
232
|
0
|
0
|
0
|
|
|
|
$dc->{textHighlight} = [map {unescape($1)} split(/\s*\;\s*/,$1,4)] if ($clobber || !$dc->{textHighlight}); |
|
0
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
elsif (/^TableHighlighting\s*(.*)$/) { |
235
|
0
|
0
|
0
|
|
|
|
$dc->{tableHighlight} = [map {unescape($_)} split(/\s*\;\s*/,$1,4)] if ($clobber || !$dc->{tableHighlight}); |
|
0
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
##-- setup local options |
240
|
0
|
0
|
|
|
|
|
@show = (0) if (!@show); |
241
|
0
|
0
|
0
|
|
|
|
$dc->{fieldNames} = [map {$_->[1]} @indices[@show]] if ($clobber || !$dc->{fieldNames}); |
|
0
|
|
|
|
|
|
|
242
|
0
|
0
|
|
|
|
|
if (!$dc->{metaNames}) { |
243
|
0
|
0
|
|
|
|
|
if (!$showMeta) { |
|
|
0
|
|
|
|
|
|
244
|
0
|
|
|
|
|
|
$dc->{metaNames} = ['file_']; |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
elsif (@meta) { |
247
|
0
|
0
|
0
|
|
|
|
$dc->{metaNames} = [map {$_->[2]} @meta] if (@meta && ($clobber || !$dc->{metaNames})); |
|
0
|
|
0
|
|
|
|
|
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
##-- cleanup |
252
|
0
|
0
|
0
|
|
|
|
$fh->close if (!ref($src) || ref($src) eq 'SCALAR'); |
253
|
0
|
|
|
|
|
|
return $dc; |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
##====================================================================== |
257
|
|
|
|
|
|
|
## Query requests (formerly in DDC::Client::Distributed) |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
## $buf = $dc->queryRaw($query_string) |
260
|
|
|
|
|
|
|
## $buf = $dc->queryRaw(\@raw_strings) |
261
|
|
|
|
|
|
|
sub queryRaw { |
262
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
263
|
0
|
|
|
|
|
|
my $buf = $dc->queryRawNC(@_); |
264
|
0
|
|
|
|
|
|
$dc->close(); ##-- this apparently has to happen: bummer |
265
|
0
|
|
|
|
|
|
return $buf; |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
## $buf = $dc->queryRawNC($query_string) |
269
|
|
|
|
|
|
|
## $buf = $dc->queryRawNC(\@raw_strings) |
270
|
|
|
|
|
|
|
## + guts for queryRaw() without implicit close() |
271
|
|
|
|
|
|
|
sub queryRawNC { |
272
|
0
|
|
|
0
|
1
|
|
my ($dc,$query) = @_; |
273
|
0
|
0
|
|
|
|
|
if (UNIVERSAL::isa($query,'ARRAY')) { |
|
|
0
|
|
|
|
|
|
274
|
|
|
|
|
|
|
##-- raw array: send raw data to DDC |
275
|
0
|
|
|
|
|
|
$dc->send(join("\001",@$query)); |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
elsif ($dc->{mode} =~ /^(?:raw|req)/i) { |
278
|
|
|
|
|
|
|
##-- "raw" or "request" mode: send raw request to DDC |
279
|
0
|
|
|
|
|
|
$dc->send($query); |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
else { |
282
|
|
|
|
|
|
|
##-- query string: send 'run-query Distributed' |
283
|
|
|
|
|
|
|
$dc->send(join("\001", |
284
|
|
|
|
|
|
|
"run_query Distributed", |
285
|
|
|
|
|
|
|
$query, |
286
|
|
|
|
|
|
|
$dc->{mode}, |
287
|
0
|
0
|
|
|
|
|
join(' ', @$dc{qw(start limit timeout)}, ($dc->{hint} ? $dc->{hint} : qw())))); |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
##-- get output buffer |
290
|
0
|
|
|
|
|
|
return $dc->readData(); |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
## @bufs = $dc->queryMulti($queryString1, $queryString2, ...) |
294
|
|
|
|
|
|
|
## @bufs = $dc->queryMulti(\@queryStrings1, \@queryStrings2, ...) |
295
|
|
|
|
|
|
|
sub queryMulti { |
296
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
297
|
0
|
|
|
|
|
|
my @bufs = map {$dc->queryRawNC($_)} @_; |
|
0
|
|
|
|
|
|
|
298
|
0
|
|
|
|
|
|
$dc->close(); ##-- this apparently has to happen: bummer |
299
|
0
|
|
|
|
|
|
return @bufs; |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
## $obj = $dc->queryJson($query_string) |
303
|
|
|
|
|
|
|
## $obj = $dc->queryJson(\@raw_strings) |
304
|
|
|
|
|
|
|
sub queryJson { |
305
|
0
|
|
|
0
|
0
|
|
my ($dc,$query) = @_; |
306
|
0
|
|
|
|
|
|
return $dc->decodeJson($dc->queryRaw($query)); |
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
## $hits = $dc->query($query_string) |
310
|
|
|
|
|
|
|
sub query { |
311
|
0
|
|
|
0
|
1
|
|
my ($dc,$query) = @_; |
312
|
0
|
|
|
|
|
|
return $dc->parseData($dc->queryRaw($query)); |
313
|
|
|
|
|
|
|
} |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
##====================================================================== |
317
|
|
|
|
|
|
|
## Common Requests |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
## $rsp = $dc->request($request_string) |
320
|
|
|
|
|
|
|
sub request { |
321
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
322
|
0
|
|
|
|
|
|
my $buf = $dc->requestNC(@_); |
323
|
0
|
|
|
|
|
|
$dc->close(); |
324
|
0
|
|
|
|
|
|
return $buf; |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
## $rsp = $dc->requestNC($request_string) |
328
|
|
|
|
|
|
|
## + guts for request() which doesn't implicitly call close() |
329
|
|
|
|
|
|
|
sub requestNC { |
330
|
0
|
|
|
0
|
0
|
|
my $dc = shift; |
331
|
0
|
|
|
|
|
|
$dc->send($_[0]); |
332
|
0
|
|
|
|
|
|
return $dc->readData(); |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
## $data = $dc->requestJson($request_string) |
336
|
|
|
|
|
|
|
sub requestJson { |
337
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
338
|
0
|
|
|
|
|
|
return $dc->decodeJson($dc->request($_[0])); |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
## $server_version = $dc->version() |
342
|
|
|
|
|
|
|
sub version { |
343
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
344
|
0
|
|
|
|
|
|
return $dc->request("version"); |
345
|
|
|
|
|
|
|
} |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
## $status = $dc->status() |
348
|
|
|
|
|
|
|
## $status = $dc->status($timeout) |
349
|
|
|
|
|
|
|
sub status { |
350
|
0
|
|
|
0
|
1
|
|
my ($dc,$timeout) = @_; |
351
|
0
|
0
|
|
|
|
|
$timeout = $dc->{timeout} if (!defined($timeout)); |
352
|
0
|
0
|
|
|
|
|
return $dc->requestJson("status".(defined($timeout) ? " $timeout" : '')); |
353
|
|
|
|
|
|
|
} |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
## $vstatus = $dc->vstatus() |
356
|
|
|
|
|
|
|
## $vstatus = $dc->vstatus($timeout) |
357
|
|
|
|
|
|
|
sub vstatus { |
358
|
0
|
|
|
0
|
1
|
|
my ($dc,$timeout) = @_; |
359
|
0
|
0
|
|
|
|
|
$timeout = $dc->{timeout} if (!defined($timeout)); |
360
|
0
|
0
|
|
|
|
|
return $dc->requestJson("vstatus".(defined($timeout) ? " $timeout" : '')); |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
## $info = $dc->info() |
364
|
|
|
|
|
|
|
## $info = $dc->info($timeout) |
365
|
|
|
|
|
|
|
sub info { |
366
|
0
|
|
|
0
|
1
|
|
my ($dc,$timeout) = @_; |
367
|
0
|
0
|
|
|
|
|
$timeout = $dc->{timeout} if (!defined($timeout)); |
368
|
0
|
0
|
|
|
|
|
return $dc->requestJson("info".(defined($timeout) ? " $timeout" : '')); |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
## $expandRaw = $dc->expand_terms($pipeline, $term) |
372
|
|
|
|
|
|
|
## $expandRaw = $dc->expand_terms($pipeline, $term, $timeout) |
373
|
|
|
|
|
|
|
## $expandRaw = $dc->expand_terms($pipeline, $term, $timeout, $subcorpus) |
374
|
|
|
|
|
|
|
## $expandRaw = $dc->expand_terms(\@pipeline, \@terms) |
375
|
|
|
|
|
|
|
## $expandRaw = $dc->expand_terms(\@pipeline, \@terms, $timeout) |
376
|
|
|
|
|
|
|
## $expandRaw = $dc->expand_terms(\@pipeline, \@terms, $timeout, $subcorpus) |
377
|
|
|
|
|
|
|
sub expand_terms { |
378
|
0
|
|
|
0
|
1
|
|
my ($dc,$chain,$terms,$timeout,$subcorpus) = @_; |
379
|
0
|
0
|
|
|
|
|
$chain = join('|', @$chain) if (UNIVERSAL::isa($chain,'ARRAY')); |
380
|
0
|
0
|
|
|
|
|
$terms = join("\t", @$terms) if (UNIVERSAL::isa($terms,'ARRAY')); |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
##-- hack: detect swapping of $timeout and $subcorpus (old DDC::Client::Distributed-style) |
383
|
0
|
0
|
|
|
|
|
$timeout = '' if (!defined($timeout)); |
384
|
0
|
0
|
|
|
|
|
$subcorpus = '' if (!defined($subcorpus)); |
385
|
0
|
0
|
0
|
|
|
|
($timeout,$subcorpus) = ($subcorpus,$timeout) |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
386
|
|
|
|
|
|
|
if ($timeout ne '' && $subcorpus ne '' && $timeout =~ /[0-9]/ && $subcorpus !~ /[0-9]/); |
387
|
|
|
|
|
|
|
|
388
|
0
|
0
|
|
|
|
|
$timeout = $dc->{timeout} if ($timeout eq ''); |
389
|
0
|
0
|
0
|
|
|
|
$timeout = 5 if (!defined($timeout) || $timeout eq ''); |
390
|
0
|
|
|
|
|
|
$dc->send(join("\x01", 'expand_terms ', $chain, $terms, $timeout, $subcorpus)); |
391
|
|
|
|
|
|
|
##-- get output buffer |
392
|
0
|
|
|
|
|
|
my $buf = $dc->readData(); |
393
|
0
|
|
|
|
|
|
$dc->close(); ##-- this apparently has to happen: bummer |
394
|
0
|
|
|
|
|
|
return $buf; |
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
## \@terms = $dc->expand($pipeline, $term) |
398
|
|
|
|
|
|
|
## \@terms = $dc->expand($pipeline, $term, $timeout) |
399
|
|
|
|
|
|
|
## \@terms = $dc->expand($pipeline, $term, $timeout, $subcorpus) |
400
|
|
|
|
|
|
|
## \@terms = $dc->expand(\@pipeline, \@terms) |
401
|
|
|
|
|
|
|
## \@terms = $dc->expand(\@pipeline, \@terms, $timeout) |
402
|
|
|
|
|
|
|
## \@terms = $dc->expand(\@pipeline, \@terms, $timeout, $subcorpus) |
403
|
|
|
|
|
|
|
sub expand { |
404
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
405
|
0
|
|
|
|
|
|
return $dc->parseExpandTermsResponse($dc->expand_terms(@_)); |
406
|
|
|
|
|
|
|
} |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
## $buf = $dc->get_first_hits($query) |
409
|
|
|
|
|
|
|
## $buf = $dc->get_first_hits($query,$timeout?,$limit?,$hint?) |
410
|
|
|
|
|
|
|
sub get_first_hits { |
411
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
412
|
0
|
|
|
|
|
|
my $query = shift; |
413
|
0
|
0
|
|
|
|
|
my $timeout = @_ ? shift : $dc->{timeout}; |
414
|
0
|
0
|
|
|
|
|
my $limit = @_ ? shift : $dc->{limit}; |
415
|
0
|
0
|
|
|
|
|
my $hint = @_ ? shift : $dc->{hint}; |
416
|
0
|
0
|
|
|
|
|
return $dc->request("get_first_hits $query\x{01}$timeout $limit".($hint ? " $hint" : '')); |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
## $buf = $dc->get_hit_strings($format?,$start?,$limit?) |
420
|
|
|
|
|
|
|
sub get_hit_strings { |
421
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
422
|
0
|
0
|
|
|
|
|
my $format = @_ ? shift : ($dc->{mode} eq 'raw' ? 'json' : ''); |
|
|
0
|
|
|
|
|
|
423
|
0
|
0
|
|
|
|
|
my $start = @_ ? shift : $dc->{start}; |
424
|
0
|
0
|
|
|
|
|
my $limit = @_ ? shift : $dc->{limit}; |
425
|
0
|
|
|
|
|
|
return $dc->request("get_hit_strings $format\x{01}$start $limit"); |
426
|
|
|
|
|
|
|
} |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
## $buf = $dc->run_query($corpus,$query,$format?,$start?,$limit?,$timeout?,$hint?) |
430
|
|
|
|
|
|
|
sub run_query { |
431
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
432
|
0
|
|
|
|
|
|
my $corpus = shift; |
433
|
0
|
|
|
|
|
|
my $query = shift; |
434
|
0
|
0
|
|
|
|
|
my $format = @_ ? shift : $dc->{mode}; |
435
|
0
|
0
|
|
|
|
|
my $start = @_ ? shift : $dc->{start}; |
436
|
0
|
0
|
|
|
|
|
my $limit = @_ ? shift : $dc->{limit}; |
437
|
0
|
0
|
|
|
|
|
my $timeout = @_ ? shift : $dc->{timeout}; |
438
|
0
|
0
|
|
|
|
|
my $hint = @_ ? shift : $dc->{hint}; |
439
|
0
|
0
|
|
|
|
|
$corpus = 'Distributed' if (!defined($corpus)); |
440
|
0
|
0
|
|
|
|
|
return $dc->request("run_query $corpus\x{01}$query\x{01}$format\x{01}$start $limit $timeout".($hint ? " $hint" : '')); |
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
##====================================================================== |
444
|
|
|
|
|
|
|
## Low-level communications |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
## \%connect = $dc->parseAddr() |
447
|
|
|
|
|
|
|
## \%connect = $CLASS_OR_OBJECT->parseAddr(\%connect, $PEER_OR_LOCAL='peer', %options) |
448
|
|
|
|
|
|
|
## \%connect = $CLASS_OR_OBJECT->parserAddr({url=>$url}, $PEER_OR_LOCAL='peer', %options) |
449
|
|
|
|
|
|
|
## + parses connect URLs to option-hashes suitable for use as $dc->{connect} |
450
|
|
|
|
|
|
|
## + supported URLs formats: |
451
|
|
|
|
|
|
|
## inet://ADDR:PORT?OPT=VAL... # canonical INET socket URL |
452
|
|
|
|
|
|
|
## unix://UNIX_PATH?OPT=VAL... # canonical UNIX socket URL |
453
|
|
|
|
|
|
|
## unix:UNIX_PATH?OPT=VAL... # = unix://UNIX_PATH?OPT=val |
454
|
|
|
|
|
|
|
## ADDR?OPT=VAL... # = inet://ADDR:5000?OPT=VAL... |
455
|
|
|
|
|
|
|
## :PORT?OPT=VAL... # = inet://localhost:PORT?OPT=VAL... |
456
|
|
|
|
|
|
|
## ADDR:PORT?OPT=VAL... # = inet://ADDR:PORT?OPT=VAL... |
457
|
|
|
|
|
|
|
## /UNIX_PATH?OPT=VAL... # = unix:///UNIX_PATH?POT=VAL... |
458
|
|
|
|
|
|
|
sub parseAddr { |
459
|
0
|
|
|
0
|
1
|
|
my ($that,$connect,$prefix,%opts) = @_; |
460
|
0
|
|
|
|
|
|
my ($override); |
461
|
0
|
0
|
0
|
|
|
|
if (!$connect && ref($that)) { |
462
|
0
|
|
|
|
|
|
$connect = $that->{connect}; |
463
|
0
|
|
|
|
|
|
$override = 1; |
464
|
|
|
|
|
|
|
} |
465
|
0
|
|
0
|
|
|
|
$connect //= 'inet://localhost:50000'; |
466
|
0
|
0
|
|
|
|
|
$connect = {url=>$connect} if (!UNIVERSAL::isa($connect,'HASH')); |
467
|
|
|
|
|
|
|
|
468
|
0
|
|
0
|
|
|
|
$prefix ||= 'Peer'; |
469
|
0
|
|
|
|
|
|
$prefix = ucfirst($prefix); |
470
|
0
|
|
0
|
|
|
|
my $url = $connect->{URL} || $connect->{Url} || $connect->{url}; |
471
|
0
|
0
|
|
|
|
|
if (defined($url)) { |
472
|
0
|
|
|
|
|
|
my ($base,$opts) = split(/\?/,$url,2); |
473
|
0
|
0
|
|
|
|
|
my $scheme = ($base =~ s{^([\w\+\-]+):(?://)?}{} ? $1 : ''); |
474
|
0
|
0
|
0
|
|
|
|
if (lc($scheme) eq 'unix' || (!$scheme && $base =~ m{^/})) { |
|
|
0
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
475
|
0
|
|
|
|
|
|
$connect->{Domain} = 'UNIX'; |
476
|
0
|
|
|
|
|
|
$connect->{$prefix} = $base; |
477
|
|
|
|
|
|
|
} |
478
|
0
|
|
|
|
|
|
elsif (!$scheme || grep {$_ eq lc($scheme)} qw(inet tcp)) { |
479
|
0
|
|
|
|
|
|
$connect->{Domain} = 'INET'; |
480
|
0
|
|
|
|
|
|
my ($host,$port) = split(':',$base,2); |
481
|
0
|
|
0
|
|
|
|
$host ||= 'localhost'; |
482
|
0
|
|
0
|
|
|
|
$port ||= 50000; |
483
|
0
|
|
|
|
|
|
@$connect{"${prefix}Addr","${prefix}Port"} = ($host,$port); |
484
|
|
|
|
|
|
|
} |
485
|
|
|
|
|
|
|
else { |
486
|
0
|
|
|
|
|
|
die(__PACKAGE__, "::parseAddr(): unsupported scheme '$scheme' for URL $url"); |
487
|
|
|
|
|
|
|
} |
488
|
0
|
|
0
|
|
|
|
my %urlopts = map {split(/=/,$_,2)} grep {$_} split(/[\&\;]/,($opts//'')); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
489
|
0
|
|
|
|
|
|
@$connect{keys %urlopts} = values %urlopts; |
490
|
|
|
|
|
|
|
} |
491
|
0
|
|
|
|
|
|
@$connect{keys %opts} = values %opts; |
492
|
|
|
|
|
|
|
|
493
|
0
|
0
|
|
|
|
|
$that->{connect} = $connect if ($override); |
494
|
0
|
|
|
|
|
|
return $connect; |
495
|
|
|
|
|
|
|
} |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
## $str = $dc->addrStr() |
498
|
|
|
|
|
|
|
## $str = $CLASS_OR_OBJECT->addrStr(\%connect,$PEER_OR_LOCAL) |
499
|
|
|
|
|
|
|
## $str = $CLASS_OR_OBJECT->addrStr($url,$PEER_OR_LOCAL) |
500
|
|
|
|
|
|
|
## $str = $CLASS_OR_OBJECT->addrStr($sock,$PEER_OR_LOCAL) |
501
|
|
|
|
|
|
|
sub addrStr { |
502
|
0
|
|
|
0
|
1
|
|
my ($that,$addr,$prefix) = @_; |
503
|
0
|
0
|
0
|
|
|
|
$addr = ($that->{sock} || $that->{connect}) if (ref($that) && !defined($addr)); |
|
|
|
0
|
|
|
|
|
504
|
0
|
|
0
|
|
|
|
$prefix ||= 'Peer'; |
505
|
0
|
|
|
|
|
|
$prefix = ucfirst($prefix); |
506
|
|
|
|
|
|
|
|
507
|
0
|
0
|
|
|
|
|
if (UNIVERSAL::isa($addr,'IO::Socket::UNIX')) { |
|
|
0
|
|
|
|
|
|
508
|
0
|
|
|
|
|
|
return "unix://$addr->{$prefix}"; |
509
|
|
|
|
|
|
|
} |
510
|
|
|
|
|
|
|
elsif (UNIVERSAL::isa($addr,'IO::Socket::INET')) { |
511
|
0
|
0
|
|
|
|
|
my $mprefix = (lc($prefix) eq 'peer' ? 'peer' : 'sock'); |
512
|
0
|
|
|
|
|
|
return "inet://".$addr->can($mprefix."host")->($addr).":".$addr->can($mprefix."port")->($addr); |
513
|
|
|
|
|
|
|
} |
514
|
0
|
0
|
|
|
|
|
$addr = $addr->{connect} if (UNIVERSAL::isa($addr,'DDC::Client')); |
515
|
0
|
0
|
|
|
|
|
$addr = $that->parseAddr($addr,$prefix) if (!ref($addr)); |
516
|
0
|
|
|
|
|
|
my ($url); |
517
|
|
|
|
|
|
|
#my %uopts = %$addr; |
518
|
0
|
0
|
|
|
|
|
if ($addr->{Domain} eq 'UNIX') { |
519
|
0
|
|
|
|
|
|
$url = "unix://$addr->{$prefix}"; |
520
|
|
|
|
|
|
|
#delete $uopts{$prefix}; |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
else { |
523
|
|
|
|
|
|
|
$url = "inet://".($addr->{"${prefix}Addr"} && $addr->{"${prefix}Port"} |
524
|
|
|
|
|
|
|
? ($addr->{"${prefix}Addr"}.":".$addr->{"${prefix}Port"}) |
525
|
0
|
0
|
0
|
|
|
|
: $addr->{"${prefix}Addr"}); |
526
|
|
|
|
|
|
|
#delete @uopts{"${prefix}Addr","${prefix}Port"}; |
527
|
|
|
|
|
|
|
} |
528
|
|
|
|
|
|
|
#delete $opts{Domain}; |
529
|
|
|
|
|
|
|
#if (%uopts) { |
530
|
|
|
|
|
|
|
# $url .= '?'.join('&',map {("$_=$uopts{$_}")} sort keys %uopts); |
531
|
|
|
|
|
|
|
#} |
532
|
0
|
|
|
|
|
|
return $url; |
533
|
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
## $io_socket = $dc->open() |
536
|
|
|
|
|
|
|
sub open { |
537
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
538
|
0
|
|
|
|
|
|
$dc->parseAddr(); |
539
|
0
|
|
0
|
|
|
|
my $domain = $dc->{connect}{Domain} // 'INET'; |
540
|
0
|
0
|
|
|
|
|
if (lc($domain) eq 'unix') { |
541
|
|
|
|
|
|
|
##-- v0.43: use unix-domain socket connection |
542
|
0
|
|
|
|
|
|
$dc->{sock} = IO::Socket::UNIX->new(%{$dc->{'connect'}}); |
|
0
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
} else { |
544
|
|
|
|
|
|
|
##-- compatibility hack: use INET-domain sockets (TCP) |
545
|
0
|
|
|
|
|
|
$dc->{sock} = IO::Socket::INET->new(%{$dc->{'connect'}}); |
|
0
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
} |
547
|
0
|
0
|
|
|
|
|
return undef if (!$dc->{sock}); |
548
|
0
|
0
|
|
|
|
|
$dc->{sock}->setsockopt(SOL_SOCKET, SO_LINGER, pack('II',@{$dc->{linger}})) if ($dc->{linger}); |
|
0
|
|
|
|
|
|
|
549
|
0
|
|
|
|
|
|
$dc->{sock}->autoflush(1); |
550
|
0
|
|
|
|
|
|
return $dc->{sock}; |
551
|
|
|
|
|
|
|
} |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
## undef = $dc->close() |
554
|
|
|
|
|
|
|
sub close { |
555
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
556
|
0
|
0
|
|
|
|
|
$dc->{sock}->close() if (defined($dc->{sock})); |
557
|
0
|
|
|
|
|
|
delete($dc->{sock}); |
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
## $encoded = $dc->ddc_encode(@message_strings) |
561
|
|
|
|
|
|
|
sub ddc_encode { |
562
|
0
|
|
|
0
|
0
|
|
my $dc = shift; |
563
|
0
|
|
|
|
|
|
my $msg = join('',@_); |
564
|
0
|
0
|
0
|
|
|
|
$msg = encode($dc->{encoding},$msg) if ($dc->{encoding} && utf8::is_utf8($msg)); |
565
|
0
|
|
|
|
|
|
return pack($ifmt,length($msg)) . $msg; |
566
|
|
|
|
|
|
|
} |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
## $decoded = $dc->ddc_decode($response_buf) |
569
|
|
|
|
|
|
|
sub ddc_decode { |
570
|
0
|
|
|
0
|
0
|
|
my $dc = shift; |
571
|
0
|
|
|
|
|
|
my $buf = unpack("$ifmt/a*",$_[0]); |
572
|
0
|
0
|
|
|
|
|
$buf = decode($dc->{encoding},$buf) if ($dc->{encoding}); |
573
|
0
|
|
|
|
|
|
return $buf; |
574
|
|
|
|
|
|
|
} |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
## undef = $dc->send(@message_strings) |
577
|
|
|
|
|
|
|
## + sends @message_strings |
578
|
|
|
|
|
|
|
sub send { |
579
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
580
|
0
|
0
|
|
|
|
|
$dc->open() if (!defined($dc->{sock})); |
581
|
0
|
|
|
|
|
|
return $dc->sendfh($dc->{sock}, @_); |
582
|
|
|
|
|
|
|
} |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
## undef = $dc->sendfh($fh,@message_strings) |
585
|
|
|
|
|
|
|
## + sends @message_strings to $fh, prepending total length |
586
|
|
|
|
|
|
|
sub sendfh { |
587
|
0
|
|
|
0
|
1
|
|
my ($dc,$fh) = (shift,shift); |
588
|
0
|
|
|
|
|
|
$fh->print( $dc->ddc_encode(@_) ); |
589
|
|
|
|
|
|
|
} |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
## $size = $dc->readSize() |
592
|
|
|
|
|
|
|
## $size = $dc->readSize($fh) |
593
|
|
|
|
|
|
|
sub readSize { |
594
|
0
|
|
|
0
|
1
|
|
my ($dc,$fh) = @_; |
595
|
0
|
|
|
|
|
|
my ($size_packed); |
596
|
0
|
0
|
|
|
|
|
$fh = $dc->{sock} if (!$fh); |
597
|
0
|
0
|
0
|
|
|
|
confess(ref($dc), "::readSize(): could not read size from socket: $!") |
598
|
|
|
|
|
|
|
if (($fh->read($size_packed,$ilen)||0) != $ilen); |
599
|
0
|
0
|
|
|
|
|
return 0 if (!defined($size_packed)); |
600
|
0
|
|
|
|
|
|
return unpack($ifmt,$size_packed); |
601
|
|
|
|
|
|
|
} |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
## $data = $dc->readBytes($nbytes) |
604
|
|
|
|
|
|
|
## $data = $dc->readBytes($nbytes,$fh) |
605
|
|
|
|
|
|
|
sub readBytes { |
606
|
0
|
|
|
0
|
1
|
|
my ($dc,$nbytes,$fh) = @_; |
607
|
0
|
|
|
|
|
|
my ($buf); |
608
|
0
|
0
|
|
|
|
|
$fh = $dc->{sock} if (!$fh); |
609
|
0
|
|
|
|
|
|
my $nread = $fh->read($buf,$nbytes); |
610
|
0
|
0
|
|
|
|
|
confess(ref($dc), "::readBytes(): failed to read $nbytes bytes of data (only found $nread): $!") |
611
|
|
|
|
|
|
|
if ($nread != $nbytes); |
612
|
0
|
|
|
|
|
|
return $buf; |
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
## $data = $dc->readData() |
616
|
|
|
|
|
|
|
## $data = $dc->readData($fh) |
617
|
0
|
|
|
0
|
1
|
|
sub readData { return $_[0]->readBytes($_[0]->readSize($_[1]),$_[1]); } |
618
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
##====================================================================== |
620
|
|
|
|
|
|
|
## Hit Parsing |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
## $hitList = $dc->parseData($buf) |
623
|
|
|
|
|
|
|
sub parseData { |
624
|
0
|
0
|
|
0
|
0
|
|
return $_[0]->parseJsonData($_[1]) if ($_[0]{mode} eq 'json'); |
625
|
0
|
0
|
|
|
|
|
return $_[0]->parseTableData($_[1]) if ($_[0]{mode} eq 'table'); |
626
|
0
|
0
|
|
|
|
|
return $_[0]->parseTextData($_[1]) if ($_[0]{mode} eq 'text'); |
627
|
0
|
0
|
|
|
|
|
return $_[0]->parseHtmlData($_[1]) if ($_[0]{mode} eq 'html'); |
628
|
0
|
|
|
|
|
|
confess(__PACKAGE__ . "::parseData(): unknown query mode '$_[0]{mode}'"); |
629
|
|
|
|
|
|
|
} |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
632
|
|
|
|
|
|
|
## Hit Parsing: Text |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
## $hitList = $dc->parseTextData($buf) |
635
|
|
|
|
|
|
|
## + returns a DDC::HitList |
636
|
|
|
|
|
|
|
sub parseTextData { |
637
|
0
|
|
|
0
|
1
|
|
my ($dc,$buf) = @_; |
638
|
0
|
|
|
|
|
|
my $hits = DDC::HitList->new(start=>$dc->{start},limit=>$dc->{limit}); |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
##-- parse response macro structure |
641
|
0
|
0
|
0
|
|
|
|
$buf = decode($dc->{encoding},$buf) if ($dc->{encoding} && !utf8::is_utf8($buf)); |
642
|
0
|
|
|
|
|
|
my ($buflines,$bufinfo) = split("\001", $buf, 2); |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
##-- parse administrative data from response footer |
645
|
0
|
|
|
|
|
|
chomp($bufinfo); |
646
|
0
|
|
|
|
|
|
@$hits{qw(istatus_ nstatus_ end_ nhits_ ndocs_ error_)} = split(' ', $bufinfo,6); |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
##-- successful response: parse hit data |
649
|
0
|
|
|
|
|
|
my @buflines = split(/\n/,$buflines); |
650
|
0
|
|
0
|
|
|
|
my $metaNames = $dc->{metaNames} || []; |
651
|
0
|
|
|
|
|
|
my ($bufline,$hit,@fields,$ctxbuf); |
652
|
0
|
|
|
|
|
|
foreach $bufline (@buflines) { |
653
|
0
|
0
|
|
|
|
|
if ($bufline =~ /^Corpora Distribution\:(.*)$/) { |
|
|
0
|
|
|
|
|
|
654
|
0
|
|
|
|
|
|
$hits->{dhits_} = $1; |
655
|
0
|
|
|
|
|
|
next; |
656
|
|
|
|
|
|
|
} elsif ($bufline =~ /^Relevant Documents Distribution:(.*)$/) { |
657
|
0
|
|
|
|
|
|
$hits->{ddocs_} = $1; |
658
|
0
|
|
|
|
|
|
next; |
659
|
|
|
|
|
|
|
} |
660
|
0
|
|
|
|
|
|
push(@{$hits->{hits_}},$hit=DDC::Hit->new); |
|
0
|
|
|
|
|
|
|
661
|
0
|
0
|
|
|
|
|
$hit->{raw_} = $bufline if ($dc->{keepRaw}); |
662
|
|
|
|
|
|
|
|
663
|
0
|
0
|
0
|
|
|
|
if ($dc->{parseMeta} || $dc->{parseContext}) { |
664
|
0
|
|
|
|
|
|
@fields = split(/ ### /, $bufline); |
665
|
0
|
|
|
|
|
|
$ctxbuf = pop(@fields); |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
##-- parse: metadata |
668
|
0
|
0
|
|
|
|
|
if ($dc->{parseMeta}) { |
669
|
0
|
|
|
|
|
|
$hit->{meta_}{file_} = shift(@fields); |
670
|
0
|
|
|
|
|
|
$hit->{meta_}{page_} = shift(@fields); |
671
|
0
|
|
|
|
|
|
$hit->{meta_}{indices_} = [split(' ', pop(@fields))]; |
672
|
0
|
|
0
|
|
|
|
$hit->{meta_}{$metaNames->[$_]||"${_}_"} = $fields[$_] foreach (0..$#fields); |
673
|
|
|
|
|
|
|
} |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
##-- parse: context |
676
|
0
|
0
|
|
|
|
|
$hit->{ctx_} = $dc->parseTextContext($ctxbuf) if ($dc->{parseContext}); |
677
|
|
|
|
|
|
|
} |
678
|
|
|
|
|
|
|
} |
679
|
|
|
|
|
|
|
|
680
|
0
|
0
|
|
|
|
|
$hits->expandFields($dc->{fieldNames}) if ($dc->{expandFields}); |
681
|
0
|
|
|
|
|
|
return $hits; |
682
|
|
|
|
|
|
|
} |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
## \@context_data = $dc->parseTextContext($context_buf) |
686
|
|
|
|
|
|
|
sub parseTextContext { |
687
|
0
|
|
|
0
|
0
|
|
my ($dc,$ctx) = @_; |
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
##-- defaults |
690
|
0
|
|
|
|
|
|
my $fieldNames = $dc->{fieldNames}; |
691
|
0
|
|
|
|
|
|
my $fs = qr(\Q$dc->{fieldSeparator}\E); |
692
|
0
|
|
|
|
|
|
my $ts = qr(\Q$dc->{tokenSeparator}\E\ *); |
693
|
0
|
|
|
|
|
|
my $hl = $dc->{textHighlight}; |
694
|
0
|
|
|
|
|
|
my $hls = qr(\Q$dc->{tokenSeparator}\E\ *\Q$hl->[0]\E); |
695
|
0
|
|
|
|
|
|
my $hlw0 = qr(^(?:(?:\Q$hl->[0]\E)|(?:\Q$hl->[2]\E))); |
696
|
0
|
|
|
|
|
|
my $hlw1 = qr((?:(?:\Q$hl->[1]\E)|(?:\Q$hl->[3]\E))$); |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
##-- split into sentences |
699
|
0
|
|
|
|
|
|
$ctx =~ s/^\s*//; |
700
|
0
|
|
|
|
|
|
my ($sbuf,@s,$w); |
701
|
0
|
|
|
|
|
|
my $sents = [[],[],[]]; |
702
|
0
|
|
|
|
|
|
foreach $sbuf (split(/ {4}/,$ctx)) { |
703
|
|
|
|
|
|
|
|
704
|
0
|
0
|
|
|
|
|
if ($sbuf =~ $hls) { |
705
|
|
|
|
|
|
|
##-- target sentence with index dump: parse it |
706
|
0
|
|
|
|
|
|
$sbuf =~ s/^$ts//; |
707
|
0
|
|
|
|
|
|
@s = map {[0,split($fs,$_)]} split($ts,$sbuf); |
|
0
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
##-- parse words |
710
|
0
|
|
|
|
|
|
foreach $w (@s) { |
711
|
0
|
0
|
0
|
|
|
|
if ($w->[1] =~ $hlw0 && $w->[$#$w] =~ $hlw1) { |
712
|
|
|
|
|
|
|
##-- matched token |
713
|
0
|
|
|
|
|
|
$w->[1] =~ s/$hlw0//; |
714
|
0
|
|
|
|
|
|
$w->[$#$w] =~ s/$hlw1//; |
715
|
0
|
|
|
|
|
|
$w->[0] = 1; |
716
|
|
|
|
|
|
|
} |
717
|
|
|
|
|
|
|
} |
718
|
0
|
|
|
|
|
|
push(@{$sents->[1]},@s); |
|
0
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
} |
720
|
|
|
|
|
|
|
else { |
721
|
|
|
|
|
|
|
##-- context sentence: surface strings only |
722
|
0
|
|
|
|
|
|
$sbuf =~ s/^$ts//; |
723
|
0
|
|
|
|
|
|
@s = split($ts,$sbuf); |
724
|
0
|
0
|
|
|
|
|
if (!@{$sents->[1]}) { |
|
0
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
##-- left context |
726
|
0
|
|
|
|
|
|
push(@{$sents->[0]}, @s); |
|
0
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
} else { |
728
|
|
|
|
|
|
|
##-- right context |
729
|
0
|
|
|
|
|
|
push(@{$sents->[2]}, @s); |
|
0
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
} |
731
|
|
|
|
|
|
|
} |
732
|
|
|
|
|
|
|
} |
733
|
|
|
|
|
|
|
|
734
|
0
|
|
|
|
|
|
return $sents; |
735
|
|
|
|
|
|
|
} |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
738
|
|
|
|
|
|
|
## Hit Parsing: Table |
739
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
## $hitList = $dc->parseTableData($buf) |
741
|
|
|
|
|
|
|
## + returns a DDC::HitList |
742
|
|
|
|
|
|
|
sub parseTableData { |
743
|
0
|
|
|
0
|
1
|
|
my ($dc,$buf) = @_; |
744
|
0
|
|
|
|
|
|
my $hits = DDC::HitList->new(start=>$dc->{start},limit=>$dc->{limit}); |
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
##-- parse response macro structure |
747
|
0
|
0
|
0
|
|
|
|
$buf = decode($dc->{encoding},$buf) if ($dc->{encoding} && !utf8::is_utf8($buf)); |
748
|
0
|
|
|
|
|
|
my ($buflines,$bufinfo) = split("\001", $buf, 2); |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
##-- parse administrative data from response footer |
751
|
0
|
|
|
|
|
|
chomp($bufinfo); |
752
|
0
|
|
|
|
|
|
@$hits{qw(istatus_ nstatus_ end_ nhits_ ndocs_ error_)} = split(' ', $bufinfo,6); |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
##-- successful response: parse hit data |
755
|
0
|
|
|
|
|
|
my @buflines = split(/\n/,$buflines); |
756
|
0
|
|
|
|
|
|
my ($bufline,$hit,@fields,$field,$val); |
757
|
0
|
|
|
|
|
|
foreach $bufline (@buflines) { |
758
|
0
|
|
|
|
|
|
push(@{$hits->{hits_}},$hit=DDC::Hit->new); |
|
0
|
|
|
|
|
|
|
759
|
0
|
0
|
|
|
|
|
$hit->{raw_} = $bufline if ($dc->{keepRaw}); |
760
|
|
|
|
|
|
|
|
761
|
0
|
0
|
0
|
|
|
|
if ($dc->{parseMeta} || $dc->{parseContext}) { |
762
|
0
|
|
|
|
|
|
@fields = split("\002", $bufline); |
763
|
0
|
|
|
|
|
|
while (defined($field=shift(@fields))) { |
764
|
|
|
|
|
|
|
|
765
|
0
|
0
|
|
|
|
|
if ($field eq 'keyword') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
766
|
|
|
|
|
|
|
##-- special handling for 'keyword' field |
767
|
0
|
|
|
|
|
|
$val = shift(@fields); |
768
|
0
|
|
|
|
|
|
while ($val =~ /\(.*?\S)\s*\<\/orth\>/g) { |
769
|
0
|
|
|
|
|
|
push(@{$hit->{orth_}}, $1); |
|
0
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
} |
771
|
|
|
|
|
|
|
} |
772
|
|
|
|
|
|
|
elsif ($field eq 'indices') { |
773
|
|
|
|
|
|
|
##-- special handling for 'indices' field |
774
|
0
|
|
|
|
|
|
$val = shift(@fields); |
775
|
0
|
|
|
|
|
|
$hit->{meta_}{indices_} = [split(' ',$val)]; |
776
|
|
|
|
|
|
|
} |
777
|
|
|
|
|
|
|
elsif ($field =~ /^\s*\
|
778
|
|
|
|
|
|
|
##-- special handling for context pseudo-field |
779
|
0
|
0
|
|
|
|
|
$hit->{ctx_} = $dc->parseTableContext($field) if ($dc->{parseContext}); |
780
|
|
|
|
|
|
|
} |
781
|
|
|
|
|
|
|
elsif ($dc->{parseMeta}) { |
782
|
|
|
|
|
|
|
##-- normal bibliographic field |
783
|
0
|
0
|
|
|
|
|
$field .= '_' if ($field =~ /^(?:scan|orig|page|rank(?:_debug)?)$/); ##-- special handling for ddc-internal fields |
784
|
0
|
|
|
|
|
|
$val = shift(@fields); |
785
|
0
|
|
|
|
|
|
$hit->{meta_}{$field} = $val; |
786
|
|
|
|
|
|
|
} |
787
|
|
|
|
|
|
|
} |
788
|
|
|
|
|
|
|
} |
789
|
|
|
|
|
|
|
} |
790
|
|
|
|
|
|
|
|
791
|
0
|
0
|
|
|
|
|
$hits->expandFields($dc->{fieldNames}) if ($dc->{expandFields}); |
792
|
0
|
|
|
|
|
|
return $hits; |
793
|
|
|
|
|
|
|
} |
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
## \@context_data = $dc->parseTableContext($context_buf) |
797
|
|
|
|
|
|
|
sub parseTableContext { |
798
|
0
|
|
|
0
|
0
|
|
my ($dc,$ctx) = @_; |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
##-- defaults |
801
|
0
|
|
|
|
|
|
my $fieldNames = $dc->{fieldNames}; |
802
|
0
|
|
|
|
|
|
my $fs = qr(\Q$dc->{fieldSeparator}\E); |
803
|
0
|
|
|
|
|
|
my $ts = qr(\Q$dc->{tokenSeparator}\E\ *); |
804
|
0
|
|
|
|
|
|
my $hl = $dc->{tableHighlight}; |
805
|
0
|
|
|
|
|
|
my $hlw0 = qr(^(?:(?:\Q$hl->[0]\E)|(?:\Q$hl->[2]\E))); |
806
|
0
|
|
|
|
|
|
my $hlw1 = qr((?:(?:\Q$hl->[1]\E)|(?:\Q$hl->[3]\E))$); |
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
##-- split into sentences |
809
|
0
|
|
|
|
|
|
my $sents = [[],[],[]]; |
810
|
0
|
|
|
|
|
|
my ($sbuf,@s,$w); |
811
|
|
|
|
|
|
|
|
812
|
0
|
|
|
|
|
|
foreach $sbuf (split(/\<\/s\>\s*/,$ctx)) { |
813
|
|
|
|
|
|
|
|
814
|
0
|
0
|
|
|
|
|
if ($sbuf =~ /^\s*/) { |
815
|
|
|
|
|
|
|
##-- target sentence with index dump: parse it |
816
|
0
|
|
|
|
|
|
$sbuf =~ s|^\s*\]*)?\>\s*$ts||; |
817
|
0
|
|
|
|
|
|
@s = map {[0,split($fs,$_)]} split($ts,$sbuf); |
|
0
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
##-- parse words |
820
|
0
|
|
|
|
|
|
foreach $w (@s) { |
821
|
0
|
0
|
0
|
|
|
|
if ($w->[1] =~ $hlw0 && $w->[$#$w] =~ $hlw1) { |
822
|
|
|
|
|
|
|
##-- matched token |
823
|
0
|
|
|
|
|
|
$w->[1] =~ s/$hlw0//; |
824
|
0
|
|
|
|
|
|
$w->[$#$w] =~ s/$hlw1//; |
825
|
0
|
|
|
|
|
|
$w->[0] = 1; |
826
|
|
|
|
|
|
|
} |
827
|
|
|
|
|
|
|
} |
828
|
0
|
|
|
|
|
|
push(@{$sents->[1]}, @s); |
|
0
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
} |
830
|
|
|
|
|
|
|
else { |
831
|
|
|
|
|
|
|
##-- context sentence; surface strings only |
832
|
0
|
|
|
|
|
|
$sbuf =~ s|^\s*\]*)?\>$ts||; |
833
|
0
|
|
|
|
|
|
@s = split($ts,$sbuf); |
834
|
0
|
0
|
|
|
|
|
if (!@{$sents->[1]}) { |
|
0
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
##-- left context |
836
|
0
|
|
|
|
|
|
push(@{$sents->[0]}, @s); |
|
0
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
} else { |
838
|
|
|
|
|
|
|
##-- right context |
839
|
0
|
|
|
|
|
|
push(@{$sents->[2]}, @s); |
|
0
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
} |
841
|
|
|
|
|
|
|
} |
842
|
|
|
|
|
|
|
} |
843
|
|
|
|
|
|
|
|
844
|
0
|
|
|
|
|
|
return $sents; |
845
|
|
|
|
|
|
|
} |
846
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
849
|
|
|
|
|
|
|
## Hit Parsing: JSON |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
## $obj = $dc->decodeJson($buf) |
852
|
|
|
|
|
|
|
sub decodeJson { |
853
|
0
|
|
|
0
|
0
|
|
my $dc = shift; |
854
|
0
|
|
|
|
|
|
my ($bufr) = \$_[0]; |
855
|
0
|
0
|
0
|
|
|
|
if ($dc->{encoding} && !utf8::is_utf8($$bufr)) { |
856
|
0
|
|
|
|
|
|
my $buf = decode($dc->{encoding},$$bufr); |
857
|
0
|
|
|
|
|
|
$bufr = \$buf; |
858
|
|
|
|
|
|
|
} |
859
|
|
|
|
|
|
|
|
860
|
0
|
|
0
|
|
|
|
my $module = $JSON_BACKEND // 'JSON'; |
861
|
0
|
|
|
|
|
|
$module =~ s{::}{/}g; |
862
|
0
|
|
|
|
|
|
require "$module.pm"; |
863
|
|
|
|
|
|
|
|
864
|
0
|
|
|
|
|
|
my $jxs = $dc->{jxs}; |
865
|
0
|
0
|
|
|
|
|
$jxs = $dc->{jxs} = $JSON_BACKEND->new->utf8(0)->relaxed(1)->canonical(0) if (!defined($jxs)); |
866
|
0
|
|
|
|
|
|
return $jxs->decode($$bufr); |
867
|
|
|
|
|
|
|
} |
868
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
## $hitList = $dc->parseJsonData($buf) |
870
|
|
|
|
|
|
|
## + returns a DDC::HitList |
871
|
|
|
|
|
|
|
sub parseJsonData { |
872
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
873
|
0
|
|
|
|
|
|
my $data = $dc->decodeJson($_[0]); |
874
|
|
|
|
|
|
|
my $hits = DDC::HitList->new(%$data, |
875
|
|
|
|
|
|
|
start=>$dc->{start}, |
876
|
|
|
|
|
|
|
limit=>$dc->{limit}, |
877
|
0
|
|
|
|
|
|
); |
878
|
|
|
|
|
|
|
|
879
|
0
|
0
|
|
|
|
|
$_ = bless($_,'DDC::Hit') foreach (@{$hits->{hits_}||[]}); |
|
0
|
|
|
|
|
|
|
880
|
0
|
0
|
|
|
|
|
$hits->expandFields($dc->{fieldNames}) if ($dc->{expandFields}); |
881
|
0
|
|
|
|
|
|
return $hits; |
882
|
|
|
|
|
|
|
} |
883
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
##-------------------------------------------------------------- |
885
|
|
|
|
|
|
|
## Hit Parsing: expand_terms() |
886
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
## \@terms = $dc->parseExpandTermsResponse($buf) |
888
|
|
|
|
|
|
|
## @terms = $dc->parseExpandTermsResponse($buf) |
889
|
|
|
|
|
|
|
sub parseExpandTermsResponse { |
890
|
0
|
|
|
0
|
1
|
|
my $dc = shift; |
891
|
0
|
0
|
|
|
|
|
my @items = grep {defined($_) && $_ ne ''} split(/[\t\r\n]+/,$_[0]); |
|
0
|
|
|
|
|
|
|
892
|
0
|
0
|
|
|
|
|
die("error in expand_terms response") if ($items[0] !~ /^0 /); |
893
|
0
|
|
|
|
|
|
shift(@items); |
894
|
0
|
0
|
|
|
|
|
return wantarray ? @items : \@items; |
895
|
|
|
|
|
|
|
} |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
1; ##-- be happy |
898
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
__END__ |