File Coverage

blib/lib/Sphinx/Search.pm
Criterion Covered Total %
statement 301 1138 26.4
branch 23 410 5.6
condition 1 213 0.4
subroutine 87 144 60.4
pod 46 46 100.0
total 458 1951 23.4


line stmt bran cond sub pod time code
1             package Sphinx::Search;
2              
3 6     6   353375 use warnings;
  6         58  
  6         259  
4 6     6   44 use strict;
  6         15  
  6         184  
5              
6 6     6   39 use base 'Exporter';
  6         15  
  6         879  
7              
8 6     6   47 use Carp;
  6         17  
  6         488  
9 6     6   1686 use Socket;
  6         15284  
  6         3055  
10 6     6   62 use Config;
  6         16  
  6         262  
11 6     6   5308 use Math::BigInt;
  6         164154  
  6         41  
12 6     6   140188 use IO::Socket::INET;
  6         91176  
  6         56  
13 6     6   5336 use IO::Socket::UNIX;
  6         22  
  6         50  
14 6     6   6315 use Encode qw/encode_utf8 decode_utf8/;
  6         60273  
  6         588  
15 6     6   2264 use List::MoreUtils qw/any/;
  6         68219  
  6         46  
16              
17             my $is_native64 = $Config{longsize} == 8 || defined $Config{use64bitint} || defined $Config{use64bitall};
18            
19              
20             =head1 NAME
21              
22             Sphinx::Search - Sphinx search engine API Perl client
23              
24             =head1 VERSION
25              
26             Please note that you *MUST* install a version which is compatible with your version of Sphinx.
27              
28             Use version 0.30 for Sphinx-2.2.8-release or later (or use DBI instead)
29              
30             Use version 0.28 for Sphinx-2.0.8-release or later
31              
32             Use version 0.27.2 for Sphinx-2.0.3-release (svn-r3043)
33              
34             Use version 0.26.1 for Sphinx-2.0.1-beta (svn-r2792)
35              
36             Use version 0.25_03 for Sphinx svn-r2575
37              
38             Use version 0.24.1 for Sphinx-1.10-beta (svn-r2420)
39              
40             Use version 0.23_02 for Sphinx svn-r2269 (experimental)
41              
42             Use version 0.22 for Sphinx 0.9.9-rc2 and later (Please read the Compatibility Note under L regarding encoding changes)
43              
44             Use version 0.15 for Sphinx 0.9.9-svn-r1674
45              
46             Use version 0.12 for Sphinx 0.9.8
47              
48             Use version 0.11 for Sphinx 0.9.8-rc1
49              
50             Use version 0.10 for Sphinx 0.9.8-svn-r1112
51              
52             Use version 0.09 for Sphinx 0.9.8-svn-r985
53              
54             Use version 0.08 for Sphinx 0.9.8-svn-r871
55              
56             Use version 0.06 for Sphinx 0.9.8-svn-r820
57              
58             Use version 0.05 for Sphinx 0.9.8-cvs-20070907
59              
60             Use version 0.02 for Sphinx 0.9.8-cvs-20070818
61              
62             =cut
63              
64             our $VERSION = '0.30';
65              
66             =head1 SYNOPSIS
67              
68             use Sphinx::Search;
69              
70             $sph = Sphinx::Search->new();
71              
72             # Standard API query
73             $results = $sph->SetSortMode(SPH_SORT_RELEVANCE)
74             ->Query("search terms");
75              
76             # SphinxQL query
77             $results = $sph->SphinxQL("SELECT * FROM myindex WHERE MATCH('search terms')");
78              
79             =head1 DESCRIPTION
80              
81             This is the Perl API client for the Sphinx open-source SQL full-text indexing
82             search engine, L.
83              
84             Since 0.9.9, Sphinx supports a native MySQL-protocol client, i.e. DBI with DBD::mysql. That is, you can configure the server to have a mysql41 listening port and then simply do
85              
86             my $dbh = DBI->connect('dbi:mysql:host=127.0.0.1;port=9306;mysql_enable_utf8=1') or die "Failed to connect via DBI";
87             my $sth = $dbh->prepare_cached("SELECT * FROM myindex WHERE MATCH('search terms')");
88             $sth->execute();
89             while (my $row = $sth->fetchrow_arrayref) {
90             ... # Collect results
91             }
92              
93             The DBI client turns out to be significantly (about 5x) faster than this pure-Perl API. You should probably be using that instead.
94              
95             This module also supports SphinxQL queries, with the small advantage that you can use either the standard API or the SphinxQL API over the one port (i.e. the searchd server does not need to be configured with a mysql41 listening port).
96              
97             Given that the DBI client has several advantages over this API, future updates of this module are unlikely.
98              
99            
100             =cut
101              
102             # Constants to export.
103             our @EXPORT = qw(
104             SPH_MATCH_ALL SPH_MATCH_ANY SPH_MATCH_PHRASE SPH_MATCH_BOOLEAN SPH_MATCH_EXTENDED
105             SPH_MATCH_FULLSCAN SPH_MATCH_EXTENDED2
106             SPH_RANK_PROXIMITY_BM25 SPH_RANK_BM25 SPH_RANK_NONE SPH_RANK_WORDCOUNT
107             SPH_RANK_PROXIMITY SPH_RANK_MATCHANY SPH_RANK_FIELDMASK SPH_RANK_SPH04 SPH_RANK_EXPR
108             SPH_RANK_TOTAL
109             SPH_SORT_RELEVANCE SPH_SORT_ATTR_DESC SPH_SORT_ATTR_ASC SPH_SORT_TIME_SEGMENTS
110             SPH_SORT_EXTENDED SPH_SORT_EXPR
111             SPH_GROUPBY_DAY SPH_GROUPBY_WEEK SPH_GROUPBY_MONTH SPH_GROUPBY_YEAR SPH_GROUPBY_ATTR
112             SPH_GROUPBY_ATTRPAIR
113             SPH_ATTR_INTEGER SPH_ATTR_TIMESTAMP SPH_ATTR_ORDINAL SPH_ATTR_BOOL
114             SPH_ATTR_FLOAT SPH_ATTR_BIGINT SPH_ATTR_STRING SPH_ATTR_MULTI SPH_ATTR_MULTI64
115             SPH_QF_REVERSE_SCAN SPH_QF_SORT_METHOD SPH_QF_MAX_PREDICTED_TIME
116             SPH_QF_BOOLEAN_SIMPLIFY SPH_QF_IDF SPH_QF_GLOBAL_IDF
117             );
118              
119             # known searchd commands
120 6     6   7062 use constant SEARCHD_COMMAND_SEARCH => 0;
  6         29  
  6         510  
121 6     6   39 use constant SEARCHD_COMMAND_EXCERPT => 1;
  6         26  
  6         340  
122 6     6   46 use constant SEARCHD_COMMAND_UPDATE => 2;
  6         15  
  6         365  
123 6     6   50 use constant SEARCHD_COMMAND_KEYWORDS => 3;
  6         14  
  6         355  
124 6     6   40 use constant SEARCHD_COMMAND_PERSIST => 4;
  6         20  
  6         320  
125 6     6   44 use constant SEARCHD_COMMAND_STATUS => 5;
  6         13  
  6         314  
126 6     6   39 use constant SEARCHD_COMMAND_FLUSHATTRS => 7;
  6         15  
  6         314  
127 6     6   41 use constant SEARCHD_COMMAND_SPHINXQL => 8;
  6         14  
  6         359  
128              
129             # current client-side command implementation versions
130 6     6   43 use constant VER_COMMAND_SEARCH => 0x11E;
  6         16  
  6         333  
131 6     6   40 use constant VER_COMMAND_EXCERPT => 0x104;
  6         15  
  6         393  
132 6     6   44 use constant VER_COMMAND_UPDATE => 0x103;
  6         18  
  6         391  
133 6     6   42 use constant VER_COMMAND_KEYWORDS => 0x100;
  6         14  
  6         308  
134 6     6   38 use constant VER_COMMAND_STATUS => 0x101;
  6         14  
  6         310  
135 6     6   43 use constant VER_COMMAND_FLUSHATTRS => 0x100;
  6         14  
  6         323  
136 6     6   41 use constant VER_COMMAND_SPHINXQL => 0x100;
  6         16  
  6         320  
137              
138             # known searchd status codes
139 6     6   60 use constant SEARCHD_OK => 0;
  6         18  
  6         332  
140 6     6   43 use constant SEARCHD_ERROR => 1;
  6         13  
  6         344  
141 6     6   45 use constant SEARCHD_RETRY => 2;
  6         12  
  6         350  
142 6     6   45 use constant SEARCHD_WARNING => 3;
  6         27  
  6         348  
143              
144             # known match modes
145 6     6   48 use constant SPH_MATCH_ALL => 0;
  6         23  
  6         358  
146 6     6   46 use constant SPH_MATCH_ANY => 1;
  6         19  
  6         385  
147 6     6   60 use constant SPH_MATCH_PHRASE => 2;
  6         14  
  6         347  
148 6     6   45 use constant SPH_MATCH_BOOLEAN => 3;
  6         15  
  6         346  
149 6     6   48 use constant SPH_MATCH_EXTENDED => 4;
  6         16  
  6         349  
150 6     6   49 use constant SPH_MATCH_FULLSCAN => 5;
  6         14  
  6         364  
151 6     6   48 use constant SPH_MATCH_EXTENDED2 => 6; # extended engine V2 (TEMPORARY, WILL BE REMOVED
  6         15  
  6         360  
152              
153             # known ranking modes (ext2 only)
154 6     6   53 use constant SPH_RANK_PROXIMITY_BM25 => 0; # default mode, phrase proximity major factor and BM25 minor one
  6         14  
  6         358  
155 6     6   44 use constant SPH_RANK_BM25 => 1; # statistical mode, BM25 ranking only (faster but worse quality)
  6         13  
  6         356  
156 6     6   47 use constant SPH_RANK_NONE => 2; # no ranking, all matches get a weight of 1
  6         21  
  6         377  
157 6     6   46 use constant SPH_RANK_WORDCOUNT => 3; # simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
  6         21  
  6         322  
158 6     6   45 use constant SPH_RANK_PROXIMITY => 4;
  6         14  
  6         375  
159 6     6   50 use constant SPH_RANK_MATCHANY => 5;
  6         15  
  6         371  
160 6     6   50 use constant SPH_RANK_FIELDMASK => 6;
  6         21  
  6         346  
161 6     6   45 use constant SPH_RANK_SPH04 => 7;
  6         16  
  6         349  
162 6     6   41 use constant SPH_RANK_EXPR => 8;
  6         16  
  6         368  
163 6     6   44 use constant SPH_RANK_TOTAL => 9;
  6         16  
  6         361  
164              
165             # known sort modes
166 6     6   50 use constant SPH_SORT_RELEVANCE => 0;
  6         11  
  6         306  
167 6     6   37 use constant SPH_SORT_ATTR_DESC => 1;
  6         12  
  6         296  
168 6     6   43 use constant SPH_SORT_ATTR_ASC => 2;
  6         17  
  6         299  
169 6     6   40 use constant SPH_SORT_TIME_SEGMENTS => 3;
  6         11  
  6         270  
170 6     6   34 use constant SPH_SORT_EXTENDED => 4;
  6         13  
  6         269  
171 6     6   35 use constant SPH_SORT_EXPR => 5;
  6         17  
  6         303  
172              
173             # known filter types
174 6     6   52 use constant SPH_FILTER_VALUES => 0;
  6         16  
  6         297  
175 6     6   46 use constant SPH_FILTER_RANGE => 1;
  6         19  
  6         304  
176 6     6   58 use constant SPH_FILTER_FLOATRANGE => 2;
  6         15  
  6         291  
177 6     6   37 use constant SPH_FILTER_STRING => 3;
  6         12  
  6         302  
178              
179             # known attribute types
180 6     6   39 use constant SPH_ATTR_INTEGER => 1;
  6         12  
  6         297  
181 6     6   38 use constant SPH_ATTR_TIMESTAMP => 2;
  6         11  
  6         267  
182 6     6   38 use constant SPH_ATTR_ORDINAL => 3;
  6         13  
  6         374  
183 6     6   44 use constant SPH_ATTR_BOOL => 4;
  6         16  
  6         323  
184 6     6   46 use constant SPH_ATTR_FLOAT => 5;
  6         18  
  6         294  
185 6     6   42 use constant SPH_ATTR_BIGINT => 6;
  6         11  
  6         289  
186 6     6   45 use constant SPH_ATTR_STRING => 7;
  6         128  
  6         348  
187 6     6   42 use constant SPH_ATTR_FACTORS => 1001;
  6         74  
  6         342  
188 6     6   42 use constant SPH_ATTR_MULTI => 0x40000001;
  6         14  
  6         339  
189 6     6   41 use constant SPH_ATTR_MULTI64 => 0x40000002;
  6         13  
  6         329  
190              
191             # known grouping functions
192 6     6   40 use constant SPH_GROUPBY_DAY => 0;
  6         12  
  6         291  
193 6     6   40 use constant SPH_GROUPBY_WEEK => 1;
  6         13  
  6         288  
194 6     6   40 use constant SPH_GROUPBY_MONTH => 2;
  6         24  
  6         282  
195 6     6   37 use constant SPH_GROUPBY_YEAR => 3;
  6         12  
  6         291  
196 6     6   38 use constant SPH_GROUPBY_ATTR => 4;
  6         17  
  6         302  
197 6     6   58 use constant SPH_GROUPBY_ATTRPAIR => 5;
  6         14  
  6         441  
198              
199             use constant {
200 6         1031 SPH_QF_REVERSE_SCAN => 'reverse_scan',
201             SPH_QF_SORT_METHOD => 'sort_method',
202             SPH_QF_MAX_PREDICTED_TIME => 'max_predicted_time',
203             SPH_QF_BOOLEAN_SIMPLIFY => 'boolean_simplify',
204             SPH_QF_IDF => 'idf',
205             SPH_QF_GLOBAL_IDF => 'global_idf',
206 6     6   57 };
  6         19  
207              
208             my %query_flags = ( SPH_QF_REVERSE_SCAN() => [ 0, 1 ],
209             SPH_QF_SORT_METHOD() => [ qw/pq kbuffer/ ],
210             SPH_QF_MAX_PREDICTED_TIME() => [ 0 ],
211             SPH_QF_BOOLEAN_SIMPLIFY() => [1, 0],
212             SPH_QF_IDF() => [ qw/normalized plain tfidf_normalized tfidf_unnormalized/ ],
213             SPH_QF_GLOBAL_IDF() => [1, 0],
214             );
215              
216 6     6   44 use constant MYSQL_COL_STRING => 254;
  6         19  
  6         62841  
217              
218             # Floating point number matching expression
219             my $num_re = qr/^-?\d*\.?\d*(?:[eE][+-]?\d+)?$/;
220              
221             # portably pack numeric to 64 signed bits, network order
222             sub _sphPackI64 {
223 22     22   2875 my $self = shift;
224 22         47 my $v = shift;
225              
226             # x64 route
227 22 50       38 my $i = $is_native64 ? int($v) : Math::BigInt->new("$v");
228 22         67 return pack ( "NN", $i>>32, $i & 4294967295 );
229             }
230              
231             # portably pack numeric to 64 unsigned bits, network order
232             sub _sphPackU64 {
233 9     9   2557 my $self = shift;
234 9         15 my $v = shift;
235              
236 9 50       23 my $i = $is_native64 ? int($v) : Math::BigInt->new("$v");
237 9         43 return pack ( "NN", $i>>32, $i & 4294967295 );
238             }
239              
240             sub _sphPackI64array {
241 0     0   0 my $self = shift;
242 0   0     0 my $values = shift || [];
243              
244 0         0 my $s = pack("N", scalar @$values);
245 0         0 $s .= $self->_sphPackI64($_) for @$values;
246 0         0 return $s;
247             }
248              
249             # portably unpack 64 unsigned bits, network order to numeric
250             sub _sphUnpackU64
251             {
252 9     9   14 my $self = shift;
253 9         13 my $v = shift;
254              
255 9         28 my ($h,$l) = unpack ( "N*N*", $v );
256              
257             # x64 route
258 9 50       49 return ($h<<32) + $l if $is_native64;
259              
260             # x32 route, BigInt
261 0         0 $h = Math::BigInt->new($h);
262 0         0 $h->blsft(32)->badd($l);
263            
264 0         0 return $h->bstr;
265             }
266              
267             # portably unpack 64 signed bits, network order to numeric
268             sub _sphUnpackI64
269             {
270 11     11   15 my $self = shift;
271 11         14 my $v = shift;
272              
273 11         29 my ($h,$l) = unpack ( "N*N*", $v );
274              
275 11 100       21 my $neg = ($h & 0x80000000) ? 1 : 0;
276              
277             # x64 route
278 11 50       24 if ( $is_native64 ) {
279 11 100       44 return -(~(($h<<32) + $l) + 1) if $neg;
280 5         24 return ($h<<32) + $l;
281             }
282              
283             # x32 route, BigInt
284 0 0       0 if ($neg) {
285 0         0 $h = ~$h;
286 0         0 $l = ~$l;
287             }
288              
289 0         0 my $x = Math::BigInt->new($h);
290 0         0 $x->blsft(32)->badd($l);
291 0 0       0 $x->binc()->bneg() if $neg;
292              
293 0         0 return $x->bstr;
294             }
295              
296             sub _sphSetBit {
297 3     3   11 my ($self, $flag, $bit, $on) = @_;
298              
299 3 50       11 if ($on) {
300 3         10 $flag |= (1 << $bit);
301             }
302             else {
303 0         0 $flag &= ~(1 << $bit);
304             }
305 3         24 return $flag;
306             }
307              
308              
309              
310              
311              
312             =head1 CONSTRUCTOR
313              
314             =head2 new
315              
316             $sph = Sphinx::Search->new;
317             $sph = Sphinx::Search->new(\%options);
318              
319             Create a new Sphinx::Search instance.
320              
321             OPTIONS
322              
323             =over 4
324              
325             =item log
326              
327             Specify an optional logger instance. This can be any class that provides error,
328             warn, info, and debug methods (e.g. see L). Logging is disabled
329             if no logger instance is provided.
330              
331             =item debug
332              
333             Debug flag. If set (and a logger instance is specified), debugging messages
334             will be generated.
335              
336             =back
337              
338             =cut
339              
340             # create a new client object and fill defaults
341             sub new {
342 3     3 1 1145 my ($class, $options) = @_;
343 3         84 my $self = {
344             # per=client-object settings
345             _host => 'localhost',
346             _port => 9312,
347             _path => undef,
348             _socket => undef,
349              
350             _persistent => undef,
351             _connectretries => 1,
352              
353             # per-query settings
354             _offset => 0,
355             _limit => 20,
356             _mode => SPH_MATCH_EXTENDED2,
357             _weights => [],
358             _sort => SPH_SORT_RELEVANCE,
359             _sortby => "",
360             _min_id => 0,
361             _max_id => 0,
362             _filters => [],
363             _groupby => "",
364             _groupdistinct => "",
365             _groupfunc => SPH_GROUPBY_DAY,
366             _groupsort => '@group desc',
367             _maxmatches => 1000,
368             _cutoff => 0,
369             _retrycount => 0,
370             _retrydelay => 0,
371             _anchor => undef,
372             _indexweights => undef,
373             _ranker => SPH_RANK_PROXIMITY_BM25,
374             _rankexpr => "",
375             _maxquerytime => 0,
376             _fieldweights => {},
377             _overrides => {},
378             _select => q{*},
379              
380             # per-reply fields (for single-query case)
381             _error => '',
382             _warning => '',
383             _connerror => '',
384            
385             # request storage (for multi-query case)
386             _reqs => [],
387             _timeout => 0,
388              
389             _string_encoder => \&encode_utf8,
390             _string_decoder => \&decode_utf8,
391             };
392 3   33     25 bless $self, ref($class) || $class;
393              
394 3         15 $self->ResetQueryFlag;
395 3         13 $self->ResetOuterSelect;
396              
397             # These options are supported in the constructor, but not recommended
398             # since there is no validation. Use the Set* methods instead.
399 3         14 my %legal_opts = map { $_ => 1 } qw/host port offset limit mode weights sort sortby groupby groupbyfunc maxmatches cutoff retrycount retrydelay log debug string_encoder string_decoder/;
  54         126  
400 3         17 for my $opt (keys %$options) {
401 1 50       6 $self->{'_' . $opt} = $options->{$opt} if $legal_opts{$opt};
402             }
403             # Disable debug unless we have something to log to
404 3 50       11 $self->{_debug} = 0 unless $self->{_log};
405              
406 3         16 return $self;
407             }
408              
409              
410             =head1 METHODS
411              
412             =cut
413              
414             sub _Error {
415 2     2   5 my ($self, $msg) = @_;
416              
417 2         5 $self->{_error} = $msg;
418 2 50       8 $self->{_log}->error($msg) if $self->{_log};
419 2         5 return;
420             }
421              
422             sub _Throw {
423 0     0   0 my ($self, $msg) = @_;
424              
425 0         0 die $msg;
426             }
427              
428             =head2 GetLastError
429              
430             $error = $sph->GetLastError;
431              
432             Get last error message (string)
433              
434             =cut
435              
436             sub GetLastError {
437 0     0 1 0 my $self = shift;
438 0         0 return $self->{_error};
439             }
440              
441             sub _Warning {
442 0     0   0 my ($self, $msg) = @_;
443              
444 0         0 $self->{_warning} = $msg;
445 0 0       0 $self->{_log}->warn($msg) if $self->{_log};
446 0         0 return;
447             }
448              
449             =head2 GetLastWarning
450              
451             $warning = $sph->GetLastWarning;
452              
453             Get last warning message (string)
454              
455             =cut
456              
457             sub GetLastWarning {
458 0     0 1 0 my $self = shift;
459 0         0 return $self->{_warning};
460             }
461              
462              
463             =head2 IsConnectError
464              
465             Check connection error flag (to differentiate between network connection errors
466             and bad responses). Returns true value on connection error.
467              
468             =cut
469              
470             sub IsConnectError {
471 0     0 1 0 return shift->{_connerror};
472             }
473              
474             =head2 SetEncoders
475              
476             $sph->SetEncoders(\&encode_function, \&decode_function)
477              
478             COMPATIBILITY NOTE: SetEncoders() was introduced in version 0.17.
479             Prior to that, all strings were considered to be sequences of bytes
480             which may have led to issues with multi-byte characters. If you were
481             previously encoding/decoding strings external to Sphinx::Search, you
482             will need to disable encoding/decoding by setting Sphinx::Search to
483             use raw values as explained below (or modify your code and let
484             Sphinx::Search do the recoding).
485              
486             Set the string encoder/decoder functions for transferring strings
487             between perl and Sphinx. The encoder should take the perl internal
488             representation and convert to the bytestream that searchd expects, and
489             the decoder should take the bytestream returned by searchd and convert to
490             perl format.
491              
492             The searchd format will depend on the 'charset_type' index setting in
493             the Sphinx configuration file.
494              
495             The coders default to encode_utf8 and decode_utf8 respectively, which
496             are compatible with the 'utf8' charset_type.
497              
498             If either the encoder or decoder functions are left undefined in the
499             call to SetEncoders, they return to their default values.
500              
501             If you wish to send raw values (no encoding/decoding), supply a
502             function that simply returns its argument, e.g.
503              
504             $sph->SetEncoders( sub { shift }, sub { shift });
505              
506             Returns $sph.
507              
508             =cut
509              
510             sub SetEncoders {
511 0     0 1 0 my $self = shift;
512 0         0 my $encoder = shift;
513 0         0 my $decoder = shift;
514              
515 0 0       0 $self->{_string_encoder} = $encoder ? $encoder : \&encode_utf8;
516 0 0       0 $self->{_string_decoder} = $decoder ? $decoder : \&decode_utf8;
517            
518 0         0 return $self;
519             }
520              
521             =head2 SetServer
522              
523             $sph->SetServer($host, $port);
524             $sph->SetServer($path, $port);
525              
526             In the first form, sets the host (string) and port (integer) details for the
527             searchd server using a network (INET) socket (default is localhost:9312).
528              
529             In the second form, where $path is a local filesystem path (optionally prefixed
530             by 'unix://'), sets the client to access the searchd server via a local (UNIX
531             domain) socket at the specified path.
532              
533             Returns $sph.
534              
535             =cut
536              
537             sub SetServer {
538 0     0 1 0 my $self = shift;
539 0         0 my $host = shift;
540 0         0 my $port = shift;
541              
542 0 0       0 croak("host is not defined") unless defined($host);
543 0 0       0 if (substr($host, 0, 1) eq '/') {
    0          
544 0         0 $self->{_path} = $host;
545 0         0 return;
546             }
547             elsif (substr($host, 0, 7) eq 'unix://') {
548 0         0 $self->{_path} = substr($host, 7);
549 0         0 return;
550             }
551            
552 0   0     0 $port ||= 0;
553 0 0       0 croak("port is not an number") unless $port =~ m/^\d+/o;
554 0         0 $port = int($port);
555 0 0 0     0 croak("port $port out of range 0 to 65536") if $port <0 || $port >= 65536;
556              
557 0         0 $self->{_host} = $host;
558 0 0       0 $self->{_port} = $port == 0 ? 9312 : $port;
559 0         0 $self->{_path} = undef;
560              
561 0         0 return $self;
562             }
563              
564             =head2 SetConnectTimeout
565              
566             $sph->SetConnectTimeout($timeout)
567              
568             Set server connection timeout (in seconds).
569              
570             Returns $sph.
571              
572             =cut
573              
574             sub SetConnectTimeout {
575 1     1 1 312 my $self = shift;
576 1         2 my $timeout = shift;
577              
578 1 50       9 croak("timeout is not numeric") unless ($timeout =~ m/$num_re/);
579 1         2 $self->{_timeout} = $timeout;
580 1         2 return $self;
581             }
582              
583             =head2 SetConnectRetries
584              
585             $sph->SetConnectRetries($retries)
586              
587             Set server connection retries (in case of connection fail).
588              
589             Returns $sph.
590              
591             =cut
592              
593             sub SetConnectRetries {
594 0     0 1 0 my $self = shift;
595 0         0 my $retries = shift;
596 0 0       0 croak("connect retries is not numeric") unless ($retries =~ m/$num_re/);
597 0         0 $self->{_connectretries} = $retries;
598 0         0 return $self;
599             }
600              
601              
602             sub _Send {
603 0     0   0 my $self = shift;
604 0         0 my $fp = shift;
605 0         0 my $data = shift;
606              
607 0 0       0 $self->{_log}->debug("Writing to socket") if $self->{_debug};
608 0 0       0 unless ( send($fp,$data,0)){
609 0         0 $self->_Error("connection unexpectedly closed (timed out?): $!");
610 0         0 $self->{_connerror} = 1;
611 0 0       0 if ($self->{_socket}) {
612 0         0 close($self->{_socket});
613 0         0 undef $self->{_socket};
614             }
615 0         0 return 0;
616             }
617 0         0 return 1;
618             }
619              
620             # connect to searchd server
621              
622             sub _Connect {
623 1     1   3 my $self = shift;
624            
625 1         4 $self->_Error(); #reset old errors in new connection
626            
627 1 50       3 if ($self->{_socket}) {
628             # persistent connection, check it
629 0 0       0 return $self->{_socket} if $self->{_socket}->connected;
630             # force reopen
631 0         0 undef $self->{_socket};
632             }
633              
634 1         1 my $debug = $self->{_debug};
635 1 50       9 my $str_dest = $self->{_path} ? 'unix://' . $self->{_path} : "$self->{_host}:$self->{_port}";
636 1 50       4 $self->{_log}->debug("Connecting to $str_dest") if $debug;
637              
638             # connect socket
639 1         1 $self->{_connerror} = q{};
640              
641 1         2 my $fp;
642 1         2 my %params = (); # ( Blocking => 0 );
643 1 50       4 $params{Timeout} = $self->{_timeout} if $self->{_timeout};
644 1 50       2 if ($self->{_path}) {
645             $fp = IO::Socket::UNIX->new( Peer => $self->{_path},
646 0         0 %params,
647             );
648             }
649             else {
650             $fp = IO::Socket::INET->new( PeerPort => $self->{_port},
651             PeerAddr => $self->{_host},
652 1         12 Proto => 'tcp',
653             %params,
654             );
655             }
656 1 50       502 if (! $fp) {
657 1         9 $self->_Error("Failed to open connection to $str_dest: $!");
658 1         4 $self->{_connerror} = 1;
659 1         8 return 0;
660             }
661 0         0 binmode($fp, ':bytes');
662              
663             # check version
664 0         0 my $buf = '';
665 0 0       0 $fp->read($buf, 4) or do {
666 0         0 $self->_Error("Failed on initial read from $str_dest: $!");
667 0         0 $self->{_connerror} = 1;
668 0         0 return 0;
669             };
670 0         0 my $v = unpack("N*", $buf);
671 0         0 $v = int($v);
672 0 0       0 $self->{_log}->debug("Got version $v from searchd") if $debug;
673 0 0       0 if ($v < 1) {
674 0         0 close($fp);
675 0         0 $self->_Error("expected searchd protocol version 1+, got version '$v'");
676 0         0 return 0;
677             }
678              
679 0 0       0 $self->{_log}->debug("Sending version") if $debug;
680              
681             # All ok, send my version
682 0 0       0 unless ($self->_Send($fp, pack("N", 1))) {
683 0         0 $self->{_connerror} = 1;
684 0         0 $self->_Error("error on sending version");
685 0         0 return 0;
686             }
687 0 0       0 $self->{_log}->debug("Connection complete") if $debug;
688            
689 0 0       0 if ($self->{_persistent}) {
690 0         0 my $req = pack("nnNN", SEARCHD_COMMAND_PERSIST, 0, 4, 1);
691 0 0       0 unless ($self->_Send($fp, $req)) {
692 0         0 $self->{_connerror} = 1;
693 0         0 $self->_Error("error on setting persistent connection");
694 0         0 return 0;
695             }
696 0         0 $self->{_socket} = $fp;
697             }
698              
699 0         0 return $fp;
700              
701             }
702              
703             #-------------------------------------------------------------
704              
705             # get and check response packet from searchd server
706             sub _GetResponse {
707 0     0   0 my $self = shift;
708 0         0 my $fp = shift;
709 0         0 my $client_ver = shift;
710              
711 0         0 my $header;
712 0         0 my $resp = $fp->read($header, 8, 0);
713              
714 0 0 0     0 if (!defined($resp) || $resp==0) {
715 0         0 close $self->{_socket};
716 0         0 undef $self->{_socket};
717 0         0 $self->_Error("read failed: $!");
718 0         0 return 0;
719             }
720              
721 0         0 my ($status, $ver, $len ) = unpack("n2N", $header);
722 0 0       0 if ( ! defined($len) ) {
723 0         0 $self->_Error("read failed: $!");
724 0         0 return 0;
725             }
726              
727 0         0 my $response = q{};
728 0         0 my $lasterror = q{};
729 0         0 my $lentotal = 0;
730 0         0 while (my $rlen = $fp->read(my $chunk, $len)) {
731 0 0       0 if ($rlen < 0) {
732 0         0 $lasterror = $!;
733 0         0 last;
734             }
735 0         0 $response .= $chunk;
736 0         0 $lentotal += $rlen;
737 0 0       0 last if $lentotal >= $len;
738             }
739 0 0       0 close($fp) unless $self->{_socket};
740              
741             # check response
742 0 0       0 if ( length($response) != $len ) {
743 0 0       0 $self->_Error( $len
744             ? "failed to read searchd response (status=$status, ver=$ver, len=$len, read=". length($response) . ", last error=$lasterror)"
745             : "received zero-sized searchd response");
746 0         0 return 0;
747             }
748              
749             # check status
750 0 0       0 if ( $status==SEARCHD_WARNING ) {
751 0         0 my ($wlen) = unpack ( "N*", substr ( $response, 0, 4 ) );
752 0         0 $self->_Warning(substr ( $response, 4, $wlen ));
753 0         0 return substr ( $response, 4+$wlen );
754             }
755 0 0       0 if ( $status==SEARCHD_ERROR ) {
756 0         0 $self->_Error("searchd error: " . substr ( $response, 4 ));
757 0         0 return 0;
758             }
759 0 0       0 if ( $status==SEARCHD_RETRY ) {
760 0         0 $self->_Error("temporary searchd error: " . substr ( $response, 4 ));
761 0         0 return 0;
762             }
763 0 0       0 if ( $status!=SEARCHD_OK ) {
764 0         0 $self->_Error("unknown status code '$status'");
765 0         0 return 0;
766             }
767              
768             # check version
769 0 0       0 if ( $ver<$client_ver ) {
770 0         0 $self->_Warning(sprintf ( "searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
771             $ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff ));
772             }
773 0         0 return $response;
774             }
775              
776             #-----------------------------------------------
777             # connect to searchd, send request and get data
778              
779             sub _ProcessRequest {
780 0     0   0 my ($self, $req, $response_command_version) = @_;
781 0 0       0 return unless $req;
782 0         0 my $tries = $self->{_connectretries} + 1;
783 0         0 while( $tries-- ) {
784 0         0 my $fp = $self->_Connect;
785 0 0       0 if (! $fp) {
786 0 0       0 next if $self->IsConnectError;
787 0         0 last;
788             }
789 0 0       0 $self->_Send($fp, $req) or next;
790 0         0 my $response = $self->_GetResponse ($fp, $response_command_version);
791 0 0       0 return $response if $response;
792             }
793 0 0       0 $self->_Error($self->GetLastError . "... ConnectRetries exceed...") if $self->IsConnectError;
794 0         0 return 0;
795             }
796              
797              
798             =head2 SetLimits
799              
800             $sph->SetLimits($offset, $limit);
801             $sph->SetLimits($offset, $limit, $max);
802              
803             Set match offset/limits, and optionally the max number of matches to return.
804              
805             Returns $sph.
806              
807             =cut
808              
809             sub SetLimits {
810 0     0 1 0 my $self = shift;
811 0         0 my $offset = shift;
812 0         0 my $limit = shift;
813 0   0     0 my $max = shift || 0;
814 0 0 0     0 croak("offset should be an integer >= 0") unless ($offset =~ /^\d+$/ && $offset >= 0) ;
815 0 0 0     0 croak("limit should be an integer >= 0") unless ($limit =~ /^\d+$/ && $limit >= 0);
816 0         0 $self->{_offset} = $offset;
817 0         0 $self->{_limit} = $limit;
818 0 0       0 if($max > 0) {
819 0         0 $self->{_maxmatches} = $max;
820             }
821 0         0 return $self;
822             }
823              
824             =head2 SetMaxQueryTime
825              
826             $sph->SetMaxQueryTime($millisec);
827              
828             Set maximum query time, in milliseconds, per index.
829              
830             The value may not be negative; 0 means "do not limit".
831              
832             Returns $sph.
833              
834             =cut
835              
836             sub SetMaxQueryTime {
837 0     0 1 0 my $self = shift;
838 0         0 my $max = shift;
839              
840 0 0 0     0 croak("max value should be an integer >= 0") unless ($max =~ /^\d+$/ && $max >= 0) ;
841 0         0 $self->{_maxquerytime} = $max;
842 0         0 return $self;
843             }
844              
845              
846             =head2 SetMatchMode
847              
848             ** DEPRECATED **
849              
850             $sph->SetMatchMode($mode);
851              
852             Set match mode, which may be one of:
853              
854             =over 4
855              
856             =item * SPH_MATCH_ALL
857              
858             Match all words
859              
860             =item * SPH_MATCH_ANY
861              
862             Match any words
863              
864             =item * SPH_MATCH_PHRASE
865              
866             Exact phrase match
867              
868             =item * SPH_MATCH_BOOLEAN
869              
870             Boolean match, using AND (&), OR (|), NOT (!,-) and parenthetic grouping.
871              
872             =item * SPH_MATCH_EXTENDED
873              
874             Extended match, which includes the Boolean syntax plus field, phrase and
875             proximity operators.
876              
877             =back
878              
879             Returns $sph.
880              
881             =cut
882              
883             sub SetMatchMode {
884 0     0 1 0 my $self = shift;
885 0         0 my $mode = shift;
886 0         0 warn "SetMatchMode is DEPRECATED. Do not call this method - use extended query syntax instead.";
887              
888 0 0       0 croak("Match mode not defined") unless defined($mode);
889 0 0 0     0 croak("Unknown matchmode: $mode") unless ( $mode==SPH_MATCH_ALL
      0        
      0        
      0        
      0        
      0        
890             || $mode==SPH_MATCH_ANY
891             || $mode==SPH_MATCH_PHRASE
892             || $mode==SPH_MATCH_BOOLEAN
893             || $mode==SPH_MATCH_EXTENDED
894             || $mode==SPH_MATCH_FULLSCAN
895             || $mode==SPH_MATCH_EXTENDED2 );
896 0         0 $self->{_mode} = $mode;
897 0         0 return $self;
898             }
899              
900              
901             =head2 SetRankingMode
902              
903             $sph->SetRankingMode(SPH_RANK_BM25, $rank_exp);
904              
905             Set ranking mode, which may be one of:
906              
907             =over 4
908              
909             =item * SPH_RANK_PROXIMITY_BM25
910              
911             Default mode, phrase proximity major factor and BM25 minor one
912              
913             =item * SPH_RANK_BM25
914              
915             Statistical mode, BM25 ranking only (faster but worse quality)
916              
917             =item * SPH_RANK_NONE
918              
919             No ranking, all matches get a weight of 1
920              
921             =item * SPH_RANK_WORDCOUNT
922              
923             Simple word-count weighting, rank is a weighted sum of per-field keyword
924             occurence counts
925              
926             =item * SPH_RANK_MATCHANY
927              
928             Returns rank as it was computed in SPH_MATCH_ANY mode earlier, and is internally used to emulate SPH_MATCH_ANY queries.
929              
930             =item * SPH_RANK_FIELDMASK
931              
932             Returns a 32-bit mask with N-th bit corresponding to N-th fulltext field, numbering from 0. The bit will only be set when the respective field has any keyword occurences satisfiying the query.
933              
934             =item * SPH_RANK_SPH04
935              
936             SPH_RANK_SPH04 is generally based on the default SPH_RANK_PROXIMITY_BM25 ranker, but additionally boosts the matches when they occur in the very beginning or the very end of a text field.
937              
938             =item * SPH_RANK_EXPR
939              
940             Allows the ranking formula to be specified at run time. It exposes a number of internal text factors and lets you define how the final weight should be computed from those factors. $rank_exp should be set to the ranking expression string, e.g. to emulate SPH_RANK_PROXIMITY_BM25, use "sum(lcs*user_weight)*1000+bm25".
941              
942             =back
943              
944             Returns $sph.
945              
946             =cut
947              
948             sub SetRankingMode {
949 0     0 1 0 my $self = shift;
950 0         0 my $ranker = shift;
951 0         0 my $rankexp = shift;
952              
953 0 0 0     0 croak("Unknown ranking mode: $ranker") unless ( $ranker == 0
      0        
954             || ( $ranker >= 1
955             && $ranker < SPH_RANK_TOTAL ));
956              
957 0         0 $self->{_ranker} = $ranker;
958 0   0     0 $self->{_rankexpr} = $rankexp || "";
959              
960 0         0 return $self;
961             }
962              
963              
964             =head2 SetSortMode
965              
966             $sph->SetSortMode(SPH_SORT_RELEVANCE);
967             $sph->SetSortMode($mode, $sortby);
968              
969             Set sort mode, which may be any of:
970              
971             =over 4
972              
973             =item SPH_SORT_RELEVANCE - sort by relevance
974              
975             =item SPH_SORT_ATTR_DESC, SPH_SORT_ATTR_ASC
976              
977             Sort by attribute descending/ascending. $sortby specifies the sorting attribute.
978              
979             =item SPH_SORT_TIME_SEGMENTS
980              
981             Sort by time segments (last hour/day/week/month) in descending order, and then
982             by relevance in descending order. $sortby specifies the time attribute.
983              
984             =item SPH_SORT_EXTENDED
985              
986             Sort by SQL-like syntax. $sortby is the sorting specification.
987              
988             =item SPH_SORT_EXPR
989              
990              
991             =back
992              
993             Returns $sph.
994              
995             =cut
996              
997             sub SetSortMode {
998 0     0 1 0 my $self = shift;
999 0         0 my $mode = shift;
1000 0   0     0 my $sortby = shift || "";
1001 0 0       0 croak("Sort mode not defined") unless defined($mode);
1002 0 0 0     0 croak("Unknown sort mode: $mode") unless ( $mode == SPH_SORT_RELEVANCE
      0        
      0        
      0        
      0        
1003             || $mode == SPH_SORT_ATTR_DESC
1004             || $mode == SPH_SORT_ATTR_ASC
1005             || $mode == SPH_SORT_TIME_SEGMENTS
1006             || $mode == SPH_SORT_EXTENDED
1007             || $mode == SPH_SORT_EXPR
1008             );
1009 0 0 0     0 croak("Sortby must be defined") unless ($mode==SPH_SORT_RELEVANCE || length($sortby));
1010 0         0 $self->{_sort} = $mode;
1011 0         0 $self->{_sortby} = $sortby;
1012 0         0 return $self;
1013             }
1014              
1015             =head2 SetWeights
1016            
1017             ** DEPRECATED **
1018              
1019             $sph->SetWeights([ 1, 2, 3, 4]);
1020              
1021             This method is deprecated. Use L instead.
1022              
1023             Set per-field (integer) weights. The ordering of the weights correspond to the
1024             ordering of fields as indexed.
1025              
1026             Returns $sph.
1027              
1028             =cut
1029              
1030             sub SetWeights {
1031 0     0 1 0 my $self = shift;
1032 0         0 my $weights = shift;
1033 0         0 warn "SetWeights is DEPRECATED, Do not call this method; use SetFieldWeights instead";
1034              
1035 0 0       0 croak("Weights is not an array reference") unless (ref($weights) eq 'ARRAY');
1036 0         0 foreach my $weight (@$weights) {
1037 0 0       0 croak("Weight: $weight is not an integer") unless ($weight =~ /^\d+$/);
1038             }
1039 0         0 $self->{_weights} = $weights;
1040 0         0 return $self;
1041             }
1042              
1043             =head2 SetFieldWeights
1044            
1045             $sph->SetFieldWeights(\%weights);
1046              
1047             Set per-field (integer) weights by field name. The weights hash provides field
1048             name to weight mappings.
1049              
1050             Takes precedence over L.
1051              
1052             Unknown names will be silently ignored. Missing fields will be given a weight of 1.
1053              
1054             Returns $sph.
1055              
1056             =cut
1057              
1058             sub SetFieldWeights {
1059 0     0 1 0 my $self = shift;
1060 0         0 my $weights = shift;
1061 0 0       0 croak("Weights is not a hash reference") unless (ref($weights) eq 'HASH');
1062 0         0 foreach my $field (keys %$weights) {
1063 0 0       0 croak("Weight: $weights->{$field} is not an integer >= 0") unless ($weights->{$field} =~ /^\d+$/);
1064             }
1065 0         0 $self->{_fieldweights} = $weights;
1066 0         0 return $self;
1067             }
1068              
1069             =head2 SetIndexWeights
1070            
1071             $sph->SetIndexWeights(\%weights);
1072              
1073             Set per-index (integer) weights. The weights hash is a mapping of index name to integer weight.
1074              
1075             Returns $sph.
1076              
1077             =cut
1078              
1079             sub SetIndexWeights {
1080 0     0 1 0 my $self = shift;
1081 0         0 my $weights = shift;
1082 0 0       0 croak("Weights is not a hash reference") unless (ref($weights) eq 'HASH');
1083 0         0 foreach (keys %$weights) {
1084 0 0       0 croak("IndexWeight $_: $weights->{$_} is not an integer") unless ($weights->{$_} =~ /^\d+$/);
1085             }
1086 0         0 $self->{_indexweights} = $weights;
1087 0         0 return $self;
1088             }
1089              
1090              
1091              
1092             =head2 SetIDRange
1093              
1094             $sph->SetIDRange($min, $max);
1095              
1096             Set IDs range only match those records where document ID
1097             is between $min and $max (including $min and $max)
1098              
1099             Returns $sph.
1100              
1101             =cut
1102              
1103             sub SetIDRange {
1104 0     0 1 0 my $self = shift;
1105 0         0 my $min = shift;
1106 0         0 my $max = shift;
1107 0 0       0 croak("min_id is not numeric") unless ($min =~ m/$num_re/);
1108 0 0       0 croak("max_id is not numeric") unless ($max =~ m/$num_re/);
1109 0 0       0 croak("min_id is larger than or equal to max_id") unless ($min < $max);
1110 0         0 $self->{_min_id} = $min;
1111 0         0 $self->{_max_id} = $max;
1112 0         0 return $self;
1113             }
1114              
1115             =head2 SetFilter
1116              
1117             $sph->SetFilter($attr, \@values);
1118             $sph->SetFilter($attr, \@values, $exclude);
1119              
1120             Sets the results to be filtered on the given attribute. Only results which have
1121             attributes matching the given values will be returned. (Attribute values must be integers).
1122              
1123             This may be called multiple times with different attributes to select on
1124             multiple attributes.
1125              
1126             If 'exclude' is set, excludes results that match the filter.
1127              
1128             Returns $sph.
1129              
1130             =cut
1131              
1132             sub SetFilter {
1133 1     1 1 11 my ($self, $attribute, $values, $exclude) = @_;
1134              
1135 1 50       3 croak("attribute is not defined") unless (defined $attribute);
1136 1 50       4 croak("values is not an array reference") unless (ref($values) eq 'ARRAY');
1137 1 50       4 croak("values reference is empty") unless (scalar(@$values));
1138              
1139 1 50       2 push(@{$self->{_filters}}, {
  1         8  
1140             type => SPH_FILTER_VALUES,
1141             attr => $attribute,
1142             values => $values,
1143             exclude => $exclude ? 1 : 0,
1144             });
1145              
1146 1         3 return $self;
1147             }
1148              
1149             =head2 SetFilterString
1150              
1151             $sph->SetFilterString($attr, $value)
1152             $sph->SetFilterString($attr, $value, $exclude)
1153              
1154             Adds new string value filter. Only those documents where $attr column value matches the string value from $value will be matched (or rejected, if $exclude is true).
1155              
1156             =cut
1157              
1158             sub SetFilterString {
1159 0     0 1 0 my ($self, $attribute, $value, $exclude) = @_;
1160              
1161 0 0       0 croak("attribute is not defined") unless (defined $attribute);
1162 0 0 0     0 croak("value is not a string") unless ($value && ! ref($value));
1163              
1164 0 0       0 push(@{$self->{_filters}}, {
  0         0  
1165             type => SPH_FILTER_STRING,
1166             attr => $attribute,
1167             value => $value,
1168             exclude => $exclude ? 1 : 0,
1169             });
1170              
1171 0         0 return $self;
1172             }
1173              
1174             =head2 SetFilterRange
1175              
1176             $sph->SetFilterRange($attr, $min, $max);
1177             $sph->SetFilterRange($attr, $min, $max, $exclude);
1178              
1179             Sets the results to be filtered on a range of values for the given
1180             attribute. Only those records where $attr column value is between $min and $max
1181             (including $min and $max) will be returned.
1182              
1183             If 'exclude' is set, excludes results that fall within the given range.
1184              
1185             Returns $sph.
1186              
1187             =cut
1188              
1189             sub SetFilterRange {
1190 0     0 1 0 my ($self, $attribute, $min, $max, $exclude) = @_;
1191 0 0       0 croak("attribute is not defined") unless (defined $attribute);
1192 0 0       0 croak("min: $min is not an integer") unless ($min =~ m/$num_re/);
1193 0 0       0 croak("max: $max is not an integer") unless ($max =~ m/$num_re/);
1194 0 0       0 croak("min value should be <= max") unless ($min <= $max);
1195              
1196 0 0       0 push(@{$self->{_filters}}, {
  0         0  
1197             type => SPH_FILTER_RANGE,
1198             attr => $attribute,
1199             min => $min,
1200             max => $max,
1201             exclude => $exclude ? 1 : 0,
1202             });
1203              
1204 0         0 return $self;
1205             }
1206              
1207             =head2 SetFilterFloatRange
1208              
1209             $sph->SetFilterFloatRange($attr, $min, $max, $exclude);
1210              
1211             Same as L, but allows floating point values.
1212              
1213             Returns $sph.
1214              
1215             =cut
1216              
1217             sub SetFilterFloatRange {
1218 0     0 1 0 my ($self, $attribute, $min, $max, $exclude) = @_;
1219 0 0       0 croak("attribute is not defined") unless (defined $attribute);
1220 0 0       0 croak("min: $min is not numeric") unless ($min =~ m/$num_re/);
1221 0 0       0 croak("max: $max is not numeric") unless ($max =~ m/$num_re/);
1222 0 0       0 croak("min value should be <= max") unless ($min <= $max);
1223              
1224 0 0       0 push(@{$self->{_filters}}, {
  0         0  
1225             type => SPH_FILTER_FLOATRANGE,
1226             attr => $attribute,
1227             min => $min,
1228             max => $max,
1229             exclude => $exclude ? 1 : 0,
1230             });
1231              
1232 0         0 return $self;
1233              
1234             }
1235              
1236             =head2 SetGeoAnchor
1237              
1238             $sph->SetGeoAnchor($attrlat, $attrlong, $lat, $long);
1239              
1240             Setup anchor point for using geosphere distance calculations in filters and sorting.
1241             Distance will be computed with respect to this point
1242              
1243             =over 4
1244              
1245             =item $attrlat is the name of latitude attribute
1246              
1247             =item $attrlong is the name of longitude attribute
1248              
1249             =item $lat is anchor point latitude, in radians
1250              
1251             =item $long is anchor point longitude, in radians
1252              
1253             =back
1254              
1255             Returns $sph.
1256              
1257             =cut
1258              
1259             sub SetGeoAnchor {
1260 0     0 1 0 my ($self, $attrlat, $attrlong, $lat, $long) = @_;
1261              
1262 0 0       0 croak("attrlat is not defined") unless defined $attrlat;
1263 0 0       0 croak("attrlong is not defined") unless defined $attrlong;
1264 0 0       0 croak("lat: $lat is not numeric") unless ($lat =~ m/$num_re/);
1265 0 0       0 croak("long: $long is not numeric") unless ($long =~ m/$num_re/);
1266              
1267             $self->{_anchor} = {
1268 0         0 attrlat => $attrlat,
1269             attrlong => $attrlong,
1270             lat => $lat,
1271             long => $long,
1272             };
1273 0         0 return $self;
1274             }
1275              
1276             =head2 SetGroupBy
1277              
1278             $sph->SetGroupBy($attr, $func);
1279             $sph->SetGroupBy($attr, $func, $groupsort);
1280              
1281             Sets attribute and function of results grouping.
1282              
1283             In grouping mode, all matches are assigned to different groups based on grouping
1284             function value. Each group keeps track of the total match count, and the best
1285             match (in this group) according to current sorting function. The final result
1286             set contains one best match per group, with grouping function value and matches
1287             count attached.
1288              
1289             $attr is any valid attribute. Use L to disable grouping.
1290              
1291             $func is one of:
1292              
1293             =over 4
1294              
1295             =item * SPH_GROUPBY_DAY
1296              
1297             Group by day (assumes timestamp type attribute of form YYYYMMDD)
1298              
1299             =item * SPH_GROUPBY_WEEK
1300              
1301             Group by week (assumes timestamp type attribute of form YYYYNNN)
1302              
1303             =item * SPH_GROUPBY_MONTH
1304              
1305             Group by month (assumes timestamp type attribute of form YYYYMM)
1306              
1307             =item * SPH_GROUPBY_YEAR
1308              
1309             Group by year (assumes timestamp type attribute of form YYYY)
1310              
1311             =item * SPH_GROUPBY_ATTR
1312              
1313             Group by attribute value
1314              
1315             =item * SPH_GROUPBY_ATTRPAIR
1316              
1317             Group by two attributes, being the given attribute and the attribute that
1318             immediately follows it in the sequence of indexed attributes. The specified
1319             attribute may therefore not be the last of the indexed attributes.
1320              
1321             =back
1322              
1323             Groups in the set of results can be sorted by any SQL-like sorting clause,
1324             including both document attributes and the following special internal Sphinx
1325             attributes:
1326              
1327             =over 4
1328              
1329             =item @id - document ID;
1330              
1331             =item @weight, @rank, @relevance - match weight;
1332              
1333             =item @group - group by function value;
1334              
1335             =item @count - number of matches in group.
1336              
1337             =back
1338              
1339             The default mode is to sort by groupby value in descending order,
1340             ie. by "@group desc".
1341              
1342             In the results set, "total_found" contains the total amount of matching groups
1343             over the whole index.
1344              
1345             WARNING: grouping is done in fixed memory and thus its results
1346             are only approximate; so there might be more groups reported
1347             in total_found than actually present. @count might also
1348             be underestimated.
1349              
1350             For example, if sorting by relevance and grouping by a "published"
1351             attribute with SPH_GROUPBY_DAY function, then the result set will
1352             contain only the most relevant match for each day when there were any
1353             matches published, with day number and per-day match count attached,
1354             and sorted by day number in descending order (ie. recent days first).
1355              
1356             =cut
1357              
1358             sub SetGroupBy {
1359 0     0 1 0 my $self = shift;
1360 0         0 my $attribute = shift;
1361 0         0 my $func = shift;
1362 0   0     0 my $groupsort = shift || '@group desc';
1363 0 0       0 croak("attribute is not defined") unless (defined $attribute);
1364 0 0 0     0 croak("Unknown grouping function: $func") unless ($func==SPH_GROUPBY_DAY
      0        
      0        
      0        
      0        
1365             || $func==SPH_GROUPBY_WEEK
1366             || $func==SPH_GROUPBY_MONTH
1367             || $func==SPH_GROUPBY_YEAR
1368             || $func==SPH_GROUPBY_ATTR
1369             || $func==SPH_GROUPBY_ATTRPAIR
1370             );
1371              
1372 0         0 $self->{_groupby} = $attribute;
1373 0         0 $self->{_groupfunc} = $func;
1374 0         0 $self->{_groupsort} = $groupsort;
1375 0         0 return $self;
1376             }
1377              
1378             =head2 SetGroupDistinct
1379              
1380             $sph->SetGroupDistinct($attr);
1381              
1382             Set count-distinct attribute for group-by queries
1383              
1384             =cut
1385              
1386             sub SetGroupDistinct {
1387 0     0 1 0 my $self = shift;
1388 0         0 my $attribute = shift;
1389 0 0       0 croak("attribute is not defined") unless (defined $attribute);
1390 0         0 $self->{_groupdistinct} = $attribute;
1391 0         0 return $self;
1392             }
1393              
1394             =head2 SetRetries
1395              
1396             $sph->SetRetries($count, $delay);
1397              
1398             Set distributed retries count and delay
1399              
1400             =cut
1401              
1402             sub SetRetries {
1403 0     0 1 0 my $self = shift;
1404 0         0 my $count = shift;
1405 0   0     0 my $delay = shift || 0;
1406              
1407 0 0 0     0 croak("count: $count is not an integer >= 0") unless ($count =~ /^\d+$/o && $count >= 0);
1408 0 0 0     0 croak("delay: $delay is not an integer >= 0") unless ($delay =~ /^\d+$/o && $delay >= 0);
1409 0         0 $self->{_retrycount} = $count;
1410 0         0 $self->{_retrydelay} = $delay;
1411 0         0 return $self;
1412             }
1413              
1414             =head2 SetOverride
1415              
1416             ** DEPRECATED **
1417              
1418             $sph->SetOverride($attrname, $attrtype, $values);
1419              
1420             Set attribute values override. There can be only one override per attribute.
1421             $values must be a hash that maps document IDs to attribute values
1422              
1423             =cut
1424              
1425             sub SetOverride {
1426 0     0 1 0 my $self = shift;
1427 0         0 my $attrname = shift;
1428 0         0 my $attrtype = shift;
1429 0         0 my $values = shift;
1430              
1431 0         0 die "SetOverride is DEPRECATED. Do not call this method.";
1432              
1433 0 0       0 croak("attribute name is not defined") unless defined $attrname;
1434 0 0 0     0 croak("Uknown attribute type: $attrtype") unless ($attrtype == SPH_ATTR_INTEGER
      0        
      0        
      0        
1435             || $attrtype == SPH_ATTR_TIMESTAMP
1436             || $attrtype == SPH_ATTR_BOOL
1437             || $attrtype == SPH_ATTR_FLOAT
1438             || $attrtype == SPH_ATTR_BIGINT);
1439 0         0 $self->{_overrides}->{$attrname} = { attr => $attrname,
1440             type => $attrtype,
1441             values => $values,
1442             };
1443            
1444 0         0 return $self;
1445             }
1446              
1447              
1448             =head2 SetSelect
1449              
1450             $sph->SetSelect($select)
1451              
1452             Set select list (attributes or expressions). SQL-like syntax.
1453              
1454             =cut
1455              
1456             sub SetSelect {
1457 0     0 1 0 my $self = shift;
1458 0         0 $self->{_select} = shift;
1459 0         0 return $self;
1460             }
1461              
1462             =head2 SetQueryFlag
1463              
1464             $sph->SetQueryFlag($flag_name, $flag_value);
1465              
1466              
1467             =cut
1468              
1469             sub SetQueryFlag {
1470 0     0 1 0 my ($self, $flag_name, $flag_value) = @_;
1471              
1472 0 0       0 croak("Unknown flag $flag_name") unless exists $query_flags{$flag_name};
1473             croak("Unknown or illegal flag value ($flag_value) for '$flag_name'") unless
1474 0 0 0 0   0 (any { $_ eq $flag_value } @{$query_flags{$flag_name}})
  0   0     0  
  0         0  
1475             || ($flag_name eq 'max_predicted_time' && $flag_value =~ m/^\d+$/);
1476              
1477 0 0       0 if ($flag_name eq SPH_QF_REVERSE_SCAN) {
    0          
    0          
    0          
    0          
    0          
1478 0         0 $self->{_query_flags} = $self->_sphSetBit( $self->{_query_flags}, 0, $flag_value == 1);
1479             }
1480             elsif ($flag_name eq SPH_QF_SORT_METHOD) {
1481 0         0 $self->{_query_flags} = $self->_sphSetBit( $self->{_query_flags}, 1, $flag_value == "kbuffer");
1482             }
1483             elsif ($flag_name eq SPH_QF_MAX_PREDICTED_TIME) {
1484 0         0 $self->{_query_flags} = $self->_sphSetBit( $self->{_query_flags}, 2, $flag_value > 0);
1485 0         0 $self->{_predictedtime} = $flag_value;
1486             }
1487             elsif ($flag_name eq SPH_QF_BOOLEAN_SIMPLIFY) {
1488 0         0 $self->{_query_flags} = $self->_sphSetBit( $self->{_query_flags}, 3, $flag_value);
1489             }
1490             elsif ($flag_name eq SPH_QF_IDF) {
1491 0 0 0     0 if ($flag_value eq 'normalized' || $flag_value eq 'plain') {
1492 0         0 $self->{_query_flags} = $self->_sphSetBit( $self->{_query_flags}, 4, $flag_value eq 'normalized');
1493             }
1494             else { # must be tfidf_normalized or tfidf_unnormalized
1495 0         0 $self->{_query_flags} = $self->_sphSetBit( $self->{_query_flags}, 6, $flag_value eq 'tfidf_normalized');
1496             }
1497             }
1498             elsif ($flag_name eq SPH_QF_GLOBAL_IDF) {
1499 0         0 $self->{_query_flags} = $self->_sphSetBit( $self->{_query_flags}, 5, $flag_value);
1500             }
1501              
1502 0         0 return $self;
1503             }
1504              
1505             =head2 SetOuterSelect
1506              
1507             $sph->SetOuterSelect($orderby, $offset, $limit)
1508              
1509             =cut
1510              
1511             sub SetOuterSelect {
1512 0     0 1 0 my ($self, $orderby, $offset, $limit) = @_;
1513              
1514 0 0 0     0 croak("orderby must be a string") unless $orderby && ! ref($orderby);
1515 0 0 0     0 croak("offset and limit must be integers > 0") unless $offset =~ m/^\d+$/ && $limit =~ m/^\d+$/;
1516              
1517 0         0 $self->{_outerorderby} = $orderby;
1518 0         0 $self->{_outeroffsetlimit} = $offset;
1519 0         0 $self->{_outerlimit} = $limit;
1520 0         0 $self->{_hasouter} = 1;
1521              
1522 0         0 return $self;
1523             }
1524              
1525             =head2 ResetFilters
1526              
1527             $sph->ResetFilters;
1528              
1529             Clear all filters.
1530              
1531             =cut
1532              
1533             sub ResetFilters {
1534 0     0 1 0 my $self = shift;
1535              
1536 0         0 $self->{_filters} = [];
1537 0         0 $self->{_anchor} = undef;
1538              
1539 0         0 return $self;
1540             }
1541              
1542             =head2 ResetGroupBy
1543              
1544             $sph->ResetGroupBy;
1545              
1546             Clear all group-by settings (for multi-queries)
1547              
1548             =cut
1549              
1550             sub ResetGroupBy {
1551 0     0 1 0 my $self = shift;
1552              
1553 0         0 $self->{_groupby} = "";
1554 0         0 $self->{_groupfunc} = SPH_GROUPBY_DAY;
1555 0         0 $self->{_groupsort} = '@group desc';
1556 0         0 $self->{_groupdistinct} = "";
1557              
1558 0         0 return $self;
1559             }
1560              
1561             =head2 ResetOverrides
1562              
1563             Clear all attribute value overrides (for multi-queries)
1564              
1565             =cut
1566              
1567             sub ResetOverrides {
1568 0     0 1 0 my $self = shift;
1569              
1570 0         0 $self->{_select} = undef;
1571 0         0 return $self;
1572             }
1573              
1574             =head2 ResetQueryFlag
1575              
1576             Clear all query flags.
1577              
1578             =cut
1579              
1580             sub ResetQueryFlag {
1581 3     3 1 8 my $self = shift;
1582              
1583 3         14 $self->{_query_flags} = $self->_sphSetBit(0, 6, 1);
1584 3         9 $self->{_predictedtime} = 0;
1585 3         7 return $self;
1586             }
1587              
1588             =head2 ResetOuterSelect
1589              
1590             Clear all outer select settings.
1591              
1592             =cut
1593              
1594             sub ResetOuterSelect {
1595 3     3 1 7 my $self = shift;
1596              
1597 3         8 $self->{_outerorderby} = '';
1598 3         6 $self->{_outeroffset} = 0;
1599 3         7 $self->{_outerlimit} = 0;
1600 3         7 $self->{_hasouter} = 0;
1601              
1602 3         6 return $self;
1603             }
1604              
1605              
1606             =head2 Query
1607              
1608             $results = $sph->Query($query, $index);
1609              
1610             Connect to searchd server and run given search query.
1611              
1612             =over 4
1613              
1614             =item query is query string
1615              
1616             =item index is index name to query, default is "*" which means to query all indexes. Use a space or comma separated list to search multiple indexes.
1617              
1618             =back
1619              
1620             Returns undef on failure
1621              
1622             Returns hash which has the following keys on success:
1623              
1624             =over 4
1625              
1626             =item matches
1627            
1628             Array containing hashes with found documents ( "doc", "weight", "group", "stamp" )
1629            
1630             =item total
1631              
1632             Total amount of matches retrieved (upto SPH_MAX_MATCHES, see sphinx.h)
1633              
1634             =item total_found
1635            
1636             Total amount of matching documents in index
1637            
1638             =item time
1639            
1640             Search time
1641              
1642             =item words
1643            
1644             Hash which maps query terms (stemmed!) to ( "docs", "hits" ) hash
1645              
1646             =back
1647              
1648             Returns the results array on success, undef on error.
1649              
1650             =cut
1651              
1652             sub Query {
1653 0     0 1   my $self = shift;
1654 0           my $query = shift;
1655 0   0       my $index = shift || '*';
1656 0   0       my $comment = shift || '';
1657              
1658 0 0         croak("_reqs is not empty") unless @{$self->{_reqs}} == 0;
  0            
1659              
1660 0           $self->AddQuery($query, $index, $comment);
1661 0 0         my $results = $self->RunQueries or return;
1662 0 0         $self->_Error($results->[0]->{error}) if $results->[0]->{error};
1663 0 0         $self->_Warning($results->[0]->{warning}) if $results->[0]->{warning};
1664 0 0 0       return if $results->[0]->{status} && $results->[0]->{status} == SEARCHD_ERROR;
1665              
1666 0           return $results->[0];
1667             }
1668              
1669             # helper to pack floats in network byte order
1670             sub _PackFloat {
1671 0     0     my $f = shift;
1672 0           my $t1 = pack ( "f", $f ); # machine order
1673 0           my $t2 = unpack ( "L*", $t1 ); # int in machine order
1674 0           return pack ( "N", $t2 );
1675             }
1676              
1677              
1678             =head2 AddQuery
1679              
1680             $sph->AddQuery($query, $index);
1681              
1682             Add a query to a batch request.
1683              
1684             Batch queries enable searchd to perform internal optimizations,
1685             if possible; and reduce network connection overheads in all cases.
1686              
1687             For instance, running exactly the same query with different
1688             groupby settings will enable searched to perform expensive
1689             full-text search and ranking operation only once, but compute
1690             multiple groupby results from its output.
1691              
1692             Parameters are exactly the same as in Query() call.
1693              
1694             Returns corresponding index to the results array returned by RunQueries() call.
1695              
1696             =cut
1697              
1698             sub AddQuery {
1699 0     0 1   my $self = shift;
1700 0           my $query = shift;
1701 0   0       my $index = shift || '*';
1702 0   0       my $comment = shift || '';
1703              
1704             ##################
1705             # build request
1706             ##################
1707              
1708 0           my $req;
1709 0           $req = pack ( "NNNNN", $self->{_query_flags}, $self->{_offset}, $self->{_limit}, $self->{_mode}, $self->{_ranker}); # mode and limits
1710              
1711 0 0         if ($self->{_ranker} == SPH_RANK_EXPR) {
1712 0           $req .= pack ( "N/a*", $self->{_rankexpr});
1713             }
1714 0           $req .= pack ( "N", $self->{_sort} ); # (deprecated) sort mode
1715 0           $req .= pack ( "N/a*", $self->{_sortby});
1716 0           $req .= pack ( "N/a*", $self->{_string_encoder}->($query) ); # query itself
1717 0           $req .= pack ( "N*", scalar(@{$self->{_weights}}), @{$self->{_weights}});
  0            
  0            
1718 0           $req .= pack ( "N/a*", $index); # indexes
1719             $req .= pack ( "N", 1)
1720             . $self->_sphPackU64($self->{_min_id})
1721 0           . $self->_sphPackU64($self->{_max_id}); # id64 range
1722              
1723             # filters
1724 0           $req .= pack ( "N", scalar @{$self->{_filters}} );
  0            
1725 0           foreach my $filter (@{$self->{_filters}}) {
  0            
1726 0           $req .= pack ( "N/a*", $filter->{attr});
1727 0           $req .= pack ( "N", $filter->{type});
1728              
1729 0           my $t = $filter->{type};
1730 0 0         if ($t == SPH_FILTER_VALUES) {
    0          
    0          
    0          
1731 0           $req .= $self->_sphPackI64array($filter->{values});
1732             }
1733             elsif ($t == SPH_FILTER_RANGE) {
1734 0           $req .= $self->_sphPackI64($filter->{min}) . $self->_sphPackI64($filter->{max});
1735             }
1736             elsif ($t == SPH_FILTER_FLOATRANGE) {
1737 0           $req .= _PackFloat ( $filter->{"min"} ) . _PackFloat ( $filter->{"max"} );
1738             }
1739             elsif ($t == SPH_FILTER_STRING) {
1740 0           $req .= pack ( "N/a*", $filter->{value});
1741             }
1742             else {
1743 0           croak("Unhandled filter type $t");
1744             }
1745 0           $req .= pack ( "N", $filter->{exclude});
1746             }
1747              
1748             # group-by clause, max-matches count, group-sort clause, cutoff count
1749 0           $req .= pack ( "NN/a*", $self->{_groupfunc}, $self->{_groupby} );
1750 0           $req .= pack ( "N", $self->{_maxmatches} );
1751 0           $req .= pack ( "N/a*", $self->{_groupsort});
1752 0           $req .= pack ( "NNN", $self->{_cutoff}, $self->{_retrycount}, $self->{_retrydelay} );
1753 0           $req .= pack ( "N/a*", $self->{_groupdistinct});
1754              
1755 0 0         if (!defined $self->{_anchor}) {
1756 0           $req .= pack ( "N", 0);
1757             }
1758             else {
1759 0           my $a = $self->{_anchor};
1760 0           $req .= pack ( "N", 1);
1761 0           $req .= pack ( "N/a*", $a->{attrlat});
1762 0           $req .= pack ( "N/a*", $a->{attrlong});
1763 0           $req .= _PackFloat($a->{lat}) . _PackFloat($a->{long});
1764             }
1765              
1766             # per-index weights
1767 0           $req .= pack( "N", scalar keys %{$self->{_indexweights}});
  0            
1768 0           $req .= pack ( "N/a*N", $_, $self->{_indexweights}->{$_} ) for keys %{$self->{_indexweights}};
  0            
1769              
1770             # max query time
1771 0           $req .= pack ( "N", $self->{_maxquerytime} );
1772              
1773             # per-field weights
1774 0           $req .= pack ( "N", scalar keys %{$self->{_fieldweights}} );
  0            
1775 0           $req .= pack ( "N/a*N", $_, $self->{_fieldweights}->{$_}) for keys %{$self->{_fieldweights}};
  0            
1776             # comment
1777 0           $req .= pack ( "N/a*", $comment);
1778              
1779             # attribute overrides
1780 0           $req .= pack ( "N", scalar keys %{$self->{_overrides}} );
  0            
1781 0           for my $entry (values %{$self->{_overrides}}) {
  0            
1782             $req .= pack ("N/a*", $entry->{attr})
1783 0           . pack ("NN", $entry->{type}, scalar keys %{$entry->{values}});
  0            
1784 0           for my $id (keys %{$entry->{values}}) {
  0            
1785 0 0         croak "Attribute value key is not numeric" unless $id =~ m/$num_re/;
1786 0           my $v = $entry->{values}->{$id};
1787 0 0         croak "Attribute value key is not numeric" unless $v =~ m/$num_re/;
1788 0           $req .= $self->_sphPackU64($id);
1789 0 0         if ($entry->{type} == SPH_ATTR_FLOAT) {
    0          
1790 0           $req .= $self->_packfloat($v);
1791             }
1792             elsif ($entry->{type} == SPH_ATTR_BIGINT) {
1793 0           $req .= $self->_sphPackI64($v);
1794             }
1795             else {
1796 0           $req .= pack("N", $v);
1797             }
1798             }
1799             }
1800            
1801             # select list
1802 0   0       $req .= pack("N/a*", $self->{_select} || '');
1803              
1804             # max_predicted_time
1805 0 0         if ($self->{_predictedtime} > 0) {
1806 0           $req .= pack ( "N", $self->{_predictedtime} );
1807             }
1808              
1809 0           $req .= pack ( "N/a*", $self->{_outerorderby});
1810 0           $req .= pack ( "NN", $self->{_outeroffset}, $self->{_outerlimit} );
1811 0 0         $req .= pack ("N", $self->{_hasouter} ? 1 : 0 );
1812              
1813 0           push(@{$self->{_reqs}}, $req);
  0            
1814              
1815 0           return scalar $#{$self->{_reqs}};
  0            
1816             }
1817              
1818             =head2 RunQueries
1819              
1820             $sph->RunQueries
1821              
1822             Run batch of queries, as added by AddQuery.
1823              
1824             Returns undef on network IO failure.
1825              
1826             Returns an array of result sets on success.
1827              
1828             Each result set in the returned array is a hash which contains
1829             the same keys as the hash returned by L, plus:
1830              
1831             =over 4
1832              
1833             =item * error
1834              
1835             Errors, if any, for this query.
1836              
1837             =item * warning
1838            
1839             Any warnings associated with the query.
1840              
1841             =back
1842              
1843             =cut
1844              
1845             sub RunQueries {
1846 0     0 1   my $self = shift;
1847              
1848 0 0         unless (@{$self->{_reqs}}) {
  0            
1849 0           $self->_Error("no queries defined, issue AddQuery() first");
1850 0           return;
1851             }
1852              
1853             ##################
1854             # send query, get response
1855             ##################
1856 0           my $nreqs = @{$self->{_reqs}};
  0            
1857 0           my $req = pack("NNa*", 0, $nreqs, join("", @{$self->{_reqs}}));
  0            
1858 0           $req = pack ( "nnN/a*", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $req); # add header
1859 0           my $response = $self->_ProcessRequest($req, VER_COMMAND_SEARCH);
1860 0           $self->{_reqs} = [];
1861 0 0         return unless $response;
1862              
1863             ##################
1864             # parse response
1865             ##################
1866              
1867 0           my $p = 0;
1868 0           my $max = length($response); # Protection from broken response
1869              
1870 0           my @results;
1871 0           for (my $ires = 0; $ires < $nreqs; $ires++) {
1872 0           my $result = {}; # Empty hash ref
1873 0           push(@results, $result);
1874 0           $result->{matches} = []; # Empty array ref
1875 0           $result->{error} = "";
1876 0           $result->{warning} = "";
1877              
1878             # extract status
1879 0           my $status = unpack("N", substr ( $response, $p, 4 ) ); $p += 4;
  0            
1880 0 0         if ($status != SEARCHD_OK) {
1881 0           my $len = unpack("N", substr ( $response, $p, 4 ) ); $p += 4;
  0            
1882 0           my $message = substr ( $response, $p, $len ); $p += $len;
  0            
1883 0 0         if ($status == SEARCHD_WARNING) {
1884 0           $result->{warning} = $message;
1885             }
1886             else {
1887 0           $result->{error} = $message;
1888 0           next;
1889             }
1890             }
1891              
1892             # read schema
1893 0           my @fields;
1894 0           my (%attrs, @attr_list);
1895              
1896 0           my $nfields = unpack ( "N", substr ( $response, $p, 4 ) ); $p += 4;
  0            
1897 0   0       while ( $nfields-->0 && $p<$max ) {
1898 0           my $len = unpack ( "N", substr ( $response, $p, 4 ) ); $p += 4;
  0            
1899 0           push(@fields, substr ( $response, $p, $len )); $p += $len;
  0            
1900             }
1901 0           $result->{"fields"} = \@fields;
1902              
1903 0           my $nattrs = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  0            
1904 0   0       while ( $nattrs-->0 && $p<$max ) {
1905 0           my $len = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  0            
1906 0           my $attr = substr ( $response, $p, $len ); $p += $len;
  0            
1907 0           my $type = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  0            
1908 0           $attrs{$attr} = $type;
1909 0           push(@attr_list, $attr);
1910             }
1911 0           $result->{"attrs"} = \%attrs;
1912              
1913             # read match count
1914 0           my $count = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  0            
1915 0           my $id64 = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  0            
1916              
1917             # read matches
1918 0   0       while ( $count-->0 && $p<$max ) {
1919 0           my $data = {};
1920 0 0         if ($id64) {
1921 0           $data->{doc} = $self->_sphUnpackU64(substr($response, $p, 8)); $p += 8;
  0            
1922 0           $data->{weight} = unpack("N*", substr($response, $p, 4)); $p += 4;
  0            
1923             }
1924             else {
1925 0           ( $data->{doc}, $data->{weight} ) = unpack("N*N*", substr($response,$p,8));
1926 0           $p += 8;
1927             }
1928 0           foreach my $attr (@attr_list) {
1929 0 0         if ($attrs{$attr} == SPH_ATTR_BIGINT) {
1930 0           $data->{$attr} = $self->_sphUnpackI64(substr($response, $p, 8)); $p += 8;
  0            
1931 0           next;
1932             }
1933 0 0         if ($attrs{$attr} == SPH_ATTR_FLOAT) {
1934 0           my $uval = unpack( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  0            
1935 0           $data->{$attr} = [ unpack("f*", pack("L", $uval)) ];
1936 0           next;
1937             }
1938 0           my $val = unpack ( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  0            
1939 0 0         if ($attrs{$attr} == SPH_ATTR_MULTI) {
    0          
    0          
    0          
1940 0           my $nvalues = $val;
1941 0           $data->{$attr} = [];
1942 0   0       while ($nvalues-- > 0 && $p < $max) {
1943 0           $val = unpack( "N*", substr ( $response, $p, 4 ) ); $p += 4;
  0            
1944 0           push(@{$data->{$attr}}, $val);
  0            
1945             }
1946             }
1947             elsif ($attrs{$attr} == SPH_ATTR_MULTI64) {
1948 0           my $nvalues = $val;
1949 0           $data->{$attr} = [];
1950 0   0       while ($nvalues > 0 && $p < $max) {
1951 0           $val = unpack( "q*", substr ( $response, $p, 8 ) ); $p += 8;
  0            
1952 0           push(@{$data->{$attr}}, $val);
  0            
1953 0           $nvalues -= 2;
1954             }
1955             }
1956             elsif ($attrs{$attr} == SPH_ATTR_STRING) {
1957 0           $data->{$attr} = $self->{_string_decoder}->(substr ($response, $p, $val));
1958 0           $p += $val;
1959             }
1960             elsif ($attrs{$attr} == SPH_ATTR_FACTORS) {
1961 0           $data->{$attr} = $self->{_string_decoder}->(substr ($response, $p, $val - 4));
1962 0           $p += $val - 4;
1963             }
1964             else {
1965 0           $data->{$attr} = $val;
1966             }
1967             }
1968 0           push(@{$result->{matches}}, $data);
  0            
1969             }
1970 0           my $words;
1971 0           ($result->{total}, $result->{total_found}, $result->{time}, $words) = unpack("N*N*N*N*", substr($response, $p, 16));
1972 0           $result->{time} = sprintf ( "%.3f", $result->{"time"}/1000 );
1973 0           $p += 16;
1974              
1975 0   0       while ( $words-->0 && $p < $max) {
1976 0           my $len = unpack ( "N*", substr ( $response, $p, 4 ) );
1977 0           $p += 4;
1978 0           my $word = $self->{_string_decoder}->( substr ( $response, $p, $len ) );
1979 0           $p += $len;
1980 0           my ($docs, $hits) = unpack ("N*N*", substr($response, $p, 8));
1981 0           $p += 8;
1982 0           $result->{words}{$word} = {
1983             "docs" => $docs,
1984             "hits" => $hits
1985             };
1986             }
1987             }
1988              
1989 0           return \@results;
1990             }
1991              
1992             =head2 SphinxQL
1993              
1994             my $results = $sph->SphinxQL($sphinxql_query);
1995              
1996             This is an alternative implementation of the SphinxQL API to the DBI option. Frankly, it was an experiment, and the DBI driver proved to have much better performance. Whilst this may be useful to some, in general if you are considering using this method then you should probably look at connecting directly via DBI instead.
1997              
1998             Results are return in a hash containing an array of 'columns' and 'rows' and possibly a warning count. If a server-side error occurs, the hash contains the 'error' field. If a communication error occurs, the return value will be undefined. In either error case, GetLastError will return the error.
1999              
2000             =cut
2001              
2002             sub SphinxQL {
2003 0     0 1   my ($self, $ql) = @_;
2004              
2005 0           my $req = pack ("N/a*", $self->{_string_encoder}->($ql));
2006 0           $req = pack ( "nnN/a*", SEARCHD_COMMAND_SPHINXQL, VER_COMMAND_SPHINXQL, $req); # add header
2007 0           my $response = $self->_ProcessRequest($req, VER_COMMAND_SPHINXQL);
2008              
2009 0 0         return if ! $response;
2010              
2011 0           my ($result) = $self->_mysql_unpack_table($response, 0, length($response));
2012              
2013 0           return $result;
2014             }
2015              
2016             sub _mysql_unpack_header {
2017 0     0     my ($self, $response, $p) = @_;
2018              
2019 0           my $max = length($response);
2020 0 0         return (undef, undef, $p, 1, undef, undef) if $p == $max;
2021 0 0         $self->_Throw("Decode pointer ($p) beyond end of buffer ($max)") if $p > $max;
2022              
2023 0           my $header = unpack ( "L<", substr( $response, $p, 4 ) ); $p += 4; # packet ID << 24 | packet length
  0            
2024 0           my $packet_id = $header >> 24;
2025 0           my $packet_len = $header & 0x00FFFFFF;
2026 0 0         return ($packet_id, $packet_len, $p, 1, undef, undef) if $packet_len == 0;
2027              
2028             # Peek for EOF or error
2029 0           my ($is_eof, $warns, $more_results);
2030 0           my $peek_byte = unpack("C", substr($response, $p, 1));
2031 0 0 0       if ($packet_len == 5 && $peek_byte == 0xfe) { # EOF
    0          
2032 0           $is_eof = 1;
2033 0           $p += 1; # 0xFE
2034 0           my $warns = unpack ( "L<", substr( $response, $p, 4 ) ); $p += 4;
  0            
2035 0           my $more_results = (($warns >> 16) & 8) != 0; # SPH_MYSQL_FLAG_MORE_RESULTS = 8
2036 0           $warns &= 0xFFFF;
2037             }
2038             elsif ($peek_byte == 0xff) {
2039 0           $p += 1; # 0xFF
2040 0           my $error_code = unpack ( "S<", substr( $response, $p, 2 ) ); $p += 2;
  0            
2041 0           my $error_code2 = unpack("a*", substr($response, $p, 6)); $p += 6;
  0            
2042 0           my $len = $packet_len - 9;
2043 0           my $err_msg = $self->{_string_decoder}->(unpack("a*", substr($response, $p, $len))); $p += $len;
  0            
2044 0           $self->_Throw("Error Code $error_code: $error_code2 $err_msg");
2045             }
2046 0           return ($packet_id, $packet_len, $p, $is_eof, $warns, $more_results);
2047             }
2048              
2049             sub _mysql_unpack_table {
2050 0     0     my ($self, $response, $p, $max) = @_;
2051              
2052 0           my ($columns, $warns, $more_results, $rows);
2053              
2054 0           eval {
2055 0           ($columns, $warns, $more_results, $p) = $self->_mysql_unpack_table_header($response, $p);
2056 0           ($rows, $p, $warns, $more_results) = $self->_mysql_unpack_table_rows($response, $p, $max, $columns);
2057             };
2058 0 0         if (my $e = $@) {
2059 0           $self->_Error($e);
2060 0           return ({ error => $e }, $p);
2061             }
2062              
2063 0           my %result = (columns => $columns,
2064             rows => $rows,
2065             warnings => $warns,
2066             );
2067 0           return (\%result, $p);
2068             }
2069              
2070             sub _mysql_unpack_table_header {
2071 0     0     my ($self, $response, $p) = @_;
2072              
2073 0           my @columns;
2074              
2075             # table header begin
2076 0           my ($packet_id, $packet_len, $is_eof, $warns, $more_results);
2077 0           ($packet_id, $packet_len, $p, $is_eof, $warns, $more_results) = $self->_mysql_unpack_header($response, $p);
2078              
2079 0 0         if (! $is_eof) {
2080 0           my $ncols;
2081 0           ($ncols, $p) = $self->_mysql_unpack_varint($response, $p);
2082              
2083             # column info
2084 0           for my $i (0 .. $ncols - 1) {
2085 0           ($columns[$i], $p) = $self->_mysql_unpack_field_packet($response, $p);
2086             }
2087              
2088             # table header end
2089 0           ($packet_id, $packet_len, $p, $is_eof, $warns, $more_results) = $self->_mysql_unpack_header($response, $p);
2090             }
2091 0           return (\@columns, $warns, $more_results, $p);
2092             }
2093              
2094             sub _mysql_unpack_table_rows {
2095 0     0     my ($self, $response, $p, $max, $columns) = @_;
2096              
2097 0           my @rows;
2098 0           my ($warns, $more_results);
2099 0           while ($p < $max) {
2100 0           my $row;
2101 0           ($row, $p, $warns, $more_results) = $self->_mysql_unpack_table_row($response, $p, $columns);
2102 0 0         if ($row) {
2103 0           push(@rows, $row);
2104             }
2105             else {
2106 0           last;
2107             }
2108             }
2109 0           return (\@rows, $p, $warns, $more_results);
2110             }
2111              
2112             sub _mysql_unpack_table_row {
2113 0     0     my ($self, $response, $p, $columns) = @_;
2114              
2115 0           my ($packet_id, $packet_len, $is_eof, $warns, $more_results);
2116 0           ($packet_id, $packet_len, $p, $is_eof, $warns, $more_results) = $self->_mysql_unpack_header($response, $p);
2117 0 0         if ($is_eof) {
2118 0           return(undef, $p, $warns, $more_results);
2119             }
2120              
2121 0           my @row;
2122 0           for my $col (@$columns) {
2123 0           my $val;
2124 0 0         if ($col->{column_type} == MYSQL_COL_STRING) {
2125 0           ($val, $p) = $self->_mysql_unpack_string($response, $p, 1);
2126             }
2127             else {
2128 0           ($val, $p) = $self->_mysql_unpack_string($response, $p);
2129             }
2130 0           push(@row, $val);
2131             }
2132 0           return (\@row, $p);
2133             }
2134              
2135              
2136             sub _mysql_unpack_varint {
2137 0     0     my ($self, $response, $p) = @_;
2138              
2139 0           my $prefix = unpack("C", substr($response, $p, 1)); $p += 1;
  0            
2140 0 0         if ($prefix < 251) {
2141 0           return ($prefix, $p);
2142             }
2143 0 0         if ($prefix == 0xFC) {
2144 0           $prefix = unpack("S<", substr($response, $p, 2)); $p += 2;
  0            
2145 0           return ($prefix, $p);
2146             }
2147 0 0         if ($prefix == 0xFC) {
2148 0           $prefix = unpack("S<", substr($response, $p, 2)); $p += 2;
  0            
2149 0           $prefix += unpack("C", substr($response, $p, 1)) << 16; $p += 1;
  0            
2150 0           return ($prefix, $p);
2151             }
2152 0 0         if ($prefix == 0xFE) {
2153 0           $prefix = unpack("L<", substr($response, $p, 4)); $p += 4;
  0            
2154             }
2155 0           $p += 4; # discard 4 null bytes
2156 0           return ($prefix, $p);
2157             }
2158              
2159             sub _mysql_unpack_string {
2160 0     0     my ($self, $response, $p, $decode) = @_;
2161              
2162 0           my $len;
2163 0           ($len, $p) = $self->_mysql_unpack_varint($response, $p);
2164 0           my $s = substr($response, $p, $len); $p += $len;
  0            
2165 0 0         $s = $self->{_string_decoder}->($s) if $decode;
2166              
2167 0           return ($s, $p);
2168             }
2169              
2170             sub _mysql_unpack_field_packet {
2171 0     0     my ($self, $response, $p) = @_;
2172              
2173 0           my ($packet_id, $packet_len, $is_eof, $warns, $more_results);
2174 0           ($packet_id, $packet_len, $p, $is_eof, $warns, $more_results) = $self->_mysql_unpack_header($response, $p);
2175              
2176 0           my %field;
2177 0           ($field{catalog}, $p) = $self->_mysql_unpack_string($response, $p);
2178 0           ($field{db}, $p) = $self->_mysql_unpack_string($response, $p);
2179 0           ($field{table}, $p) = $self->_mysql_unpack_string($response, $p);
2180 0           ($field{org_table}, $p) = $self->_mysql_unpack_string($response, $p);
2181 0           ($field{name}, $p) = $self->_mysql_unpack_string($response, $p);
2182 0           ($field{org_name}, $p) = $self->_mysql_unpack_string($response, $p);
2183              
2184 0           $p += 3; # filler=12, charset_nr=0x21 (utf8)
2185 0           $field{column_length} = unpack ( "L<", substr( $response, $p, 4 ) ); $p += 4;
  0            
2186 0           $field{column_type} = unpack("C", substr($response, $p, 1)); $p += 1;
  0            
2187 0           $p += 5; # flags, decimals, filler
2188              
2189 0           return (\%field, $p);
2190             }
2191              
2192             =head2 BuildExcerpts
2193              
2194             $excerpts = $sph->BuildExcerpts($docs, $index, $words, $opts)
2195              
2196             Generate document excerpts for the specified documents.
2197              
2198             =over 4
2199              
2200             =item docs
2201              
2202             An array reference of strings which represent the document
2203             contents
2204              
2205             =item index
2206              
2207             A string specifiying the index whose settings will be used
2208             for stemming, lexing and case folding
2209              
2210             =item words
2211              
2212             A string which contains the words to highlight
2213              
2214             =item opts
2215              
2216             A hash which contains additional optional highlighting parameters:
2217              
2218             =over 4
2219              
2220             =item before_match - a string to insert before a set of matching words, default is ""
2221              
2222             =item after_match - a string to insert after a set of matching words, default is ""
2223              
2224             =item chunk_separator - a string to insert between excerpts chunks, default is " ... "
2225              
2226             =item limit - max excerpt size in symbols (codepoints), default is 256
2227              
2228             =item limit_passages - Limits the maximum number of passages that can be included into the snippet. Integer, default is 0 (no limit).
2229              
2230             =item limit_words - Limits the maximum number of keywords that can be included into the snippet. Integer, default is 0 (no limit).
2231              
2232             =item around - how many words to highlight around each match, default is 5
2233              
2234             =item exact_phrase - whether to highlight exact phrase matches only, default is false
2235              
2236             =item single_passage - whether to extract single best passage only, default is false
2237              
2238             =item use_boundaries
2239              
2240             =item weight_order - Whether to sort the extracted passages in order of relevance (decreasing weight), or in order of appearance in the document (increasing position). Boolean, default is false.
2241              
2242             =item query_mode - Whether to handle $words as a query in extended syntax, or as a bag of words (default behavior). For instance, in query mode ("one two" | "three four") will only highlight and include those occurrences "one two" or "three four" when the two words from each pair are adjacent to each other. In default mode, any single occurrence of "one", "two", "three", or "four" would be highlighted. Boolean, default is false.
2243              
2244             =item force_all_words - Ignores the snippet length limit until it includes all the keywords. Boolean, default is false.
2245              
2246             =item start_passage_id - Specifies the starting value of %PASSAGE_ID% macro (that gets detected and expanded in before_match, after_match strings). Integer, default is 1.
2247              
2248             =item load_files - Whether to handle $docs as data to extract snippets from (default behavior), or to treat it as file names, and load data from specified files on the server side. Boolean, default is false.
2249              
2250             =item html_strip_mode - HTML stripping mode setting. Defaults to "index", which means that index settings will be used. The other values are "none" and "strip", that forcibly skip or apply stripping irregardless of index settings; and "retain", that retains HTML markup and protects it from highlighting. The "retain" mode can only be used when highlighting full documents and thus requires that no snippet size limits are set. String, allowed values are "none", "strip", "index", and "retain".
2251              
2252             =item allow_empty - Allows empty string to be returned as highlighting result when a snippet could not be generated (no keywords match, or no passages fit the limit). By default, the beginning of original text would be returned instead of an empty string. Boolean, default is false.
2253              
2254             =item passage_boundary
2255              
2256             =item emit_zones
2257              
2258             =item load_files_scattered
2259              
2260             =back
2261              
2262             =back
2263              
2264             Returns undef on failure.
2265              
2266             Returns an array ref of string excerpts on success.
2267              
2268             =cut
2269              
2270             sub BuildExcerpts {
2271 0     0 1   my ($self, $docs, $index, $words, $opts) = @_;
2272 0   0       $opts ||= {};
2273 0 0 0       croak("BuildExcepts() called with incorrect parameters")
      0        
      0        
2274             unless (ref($docs) eq 'ARRAY'
2275             && defined($index)
2276             && defined($words)
2277             && ref($opts) eq 'HASH');
2278              
2279             ##################
2280             # fixup options
2281             ##################
2282 0   0       $opts->{"before_match"} ||= "";
2283 0   0       $opts->{"after_match"} ||= "";
2284 0   0       $opts->{"chunk_separator"} ||= " ... ";
2285 0   0       $opts->{"limit"} ||= 256;
2286 0   0       $opts->{"limit_passages"} ||= 0;
2287 0   0       $opts->{"limit_words"} ||= 0;
2288 0   0       $opts->{"around"} ||= 5;
2289 0   0       $opts->{"exact_phrase"} ||= 0;
2290 0   0       $opts->{"single_passage"} ||= 0;
2291 0   0       $opts->{"use_boundaries"} ||= 0;
2292 0   0       $opts->{"weight_order"} ||= 0;
2293 0   0       $opts->{"query_mode"} ||= 0;
2294 0   0       $opts->{"force_all_words"} ||= 0;
2295 0   0       $opts->{"start_passage_id"} ||= 1;
2296 0   0       $opts->{"load_files"} ||= 0;
2297 0   0       $opts->{"html_strip_mode"} ||= "index";
2298 0   0       $opts->{"allow_empty"} ||= 0;
2299 0   0       $opts->{"passage_boundary"} ||= "none";
2300 0   0       $opts->{"emit_zones"} ||= 0;
2301 0   0       $opts->{"load_files_scattered"} ||= 0;
2302              
2303             ##################
2304             # build request
2305             ##################
2306              
2307             # v.1.2 req
2308 0           my $req;
2309 0           my $flags = 1; # remove spaces
2310 0 0         $flags |= 2 if ( $opts->{"exact_phrase"} );
2311 0 0         $flags |= 4 if ( $opts->{"single_passage"} );
2312 0 0         $flags |= 8 if ( $opts->{"use_boundaries"} );
2313 0 0         $flags |= 16 if ( $opts->{"weight_order"} );
2314 0 0         $flags |= 32 if ( $opts->{"query_mode"} );
2315 0 0         $flags |= 64 if ( $opts->{"force_all_words"} );
2316 0 0         $flags |= 128 if ( $opts->{"load_files"} );
2317 0 0         $flags |= 256 if ( $opts->{"allow_empty"} );
2318 0 0         $flags |= 512 if ( $opts->{"emit_zones"} );
2319 0 0         $flags |= 1024 if ( $opts->{"load_files_scattered"} );
2320 0           $req = pack ( "NN", 0, $flags ); # mode=0, flags=$flags
2321              
2322 0           $req .= pack ( "N/a*", $index ); # req index
2323 0           $req .= pack ( "N/a*", $self->{_string_encoder}->($words)); # req words
2324              
2325             # options
2326 0           $req .= pack ( "N/a*", $opts->{"before_match"});
2327 0           $req .= pack ( "N/a*", $opts->{"after_match"});
2328 0           $req .= pack ( "N/a*", $opts->{"chunk_separator"});
2329 0           $req .= pack ( "NN", int($opts->{"limit"}), int($opts->{"around"}) );
2330             $req .= pack ( "NNN", int($opts->{"limit_passages"}),
2331             int($opts->{"limit_words"}),
2332 0           int($opts->{"start_passage_id"}) ); # v1.2
2333 0           $req .= pack ( "N/a*", $opts->{"html_strip_mode"});
2334 0           $req .= pack ( "N/a*", $opts->{"passage_boundary"});
2335              
2336             # documents
2337 0           $req .= pack ( "N", scalar(@$docs) );
2338 0           foreach my $doc (@$docs) {
2339 0 0         croak('BuildExcerpts: Found empty document in $docs') unless ($doc);
2340 0           $req .= pack("N/a*", $self->{_string_encoder}->($doc));
2341             }
2342              
2343             ##########################
2344             # send query, get response
2345             ##########################
2346              
2347 0           $req = pack ( "nnN/a*", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $req); # add header
2348 0           my $response = $self->_ProcessRequest($req, VER_COMMAND_EXCERPT);
2349 0 0         return unless $response;
2350 0           my ($pos, $i) = 0;
2351 0           my $res = []; # Empty hash ref
2352 0           my $rlen = length($response);
2353 0           for ( $i=0; $i< scalar(@$docs); $i++ ) {
2354 0           my $len = unpack ( "N*", substr ( $response, $pos, 4 ) );
2355 0           $pos += 4;
2356              
2357 0 0         if ( $pos+$len > $rlen ) {
2358 0           $self->_Error("incomplete reply");
2359 0           return;
2360             }
2361 0           push(@$res, $self->{_string_decoder}->( substr ( $response, $pos, $len ) ));
2362 0           $pos += $len;
2363             }
2364 0           return $res;
2365             }
2366              
2367              
2368             =head2 BuildKeywords
2369              
2370             $results = $sph->BuildKeywords($query, $index, $hits)
2371              
2372             Generate keyword list for a given query
2373             Returns undef on failure,
2374             Returns an array of hashes, where each hash describes a word in the query with the following keys:
2375              
2376             =over 4
2377              
2378             =item * tokenized
2379              
2380             Tokenised term from query
2381              
2382             =item * normalized
2383              
2384             Normalised term from query
2385              
2386             =item * docs
2387              
2388             Number of docs in which word was found (if $hits is true)
2389              
2390             =item * hits
2391              
2392             Number of occurrences of word (if $hits is true)
2393              
2394             =back
2395              
2396             =cut
2397              
2398             sub BuildKeywords {
2399 0     0 1   my ( $self, $query, $index, $hits ) = @_;
2400              
2401              
2402             # v.1.0 req
2403 0           my $req = pack("N/a*", $self->{_string_encoder}->($query) );
2404 0           $req .= pack("N/a*", $index);
2405 0           $req .= pack("N", $self->{_string_encoder}->($hits) );
2406              
2407             ##################
2408             # send query, get response
2409             ##################
2410              
2411 0           $req = pack ( "nnN/a*", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $req);
2412 0           my $response = $self->_ProcessRequest($req, VER_COMMAND_KEYWORDS);
2413 0 0         return unless $response;
2414              
2415             ##################
2416             # parse response
2417             ##################
2418              
2419 0           my $p = 0;
2420 0           my @res;
2421 0           my $rlen = length($response);
2422              
2423 0           my $nwords = unpack("N", substr ( $response, $p, 4 ) ); $p += 4;
  0            
2424              
2425 0           for (my $i=0; $i < $nwords; $i++ ) {
2426 0           my $len = unpack("N", substr ( $response, $p, 4 ) ); $p += 4;
  0            
2427              
2428 0 0         my $tokenized = $len ? $self->{_string_decoder}->( substr ( $response, $p, $len ) ) : ""; $p += $len;
  0            
2429 0           $len = unpack("N", substr ( $response, $p, 4 ) ); $p += 4;
  0            
2430              
2431 0 0         my $normalized = $len ? $self->{_string_decoder}->( substr ( $response, $p, $len ) ) : ""; $p += $len;
  0            
2432 0           my %data = ( tokenized => $tokenized, normalized => $normalized );
2433            
2434 0 0         if ($hits) {
2435 0           ( $data{docs}, $data{hits} ) = unpack("N*N*", substr($response,$p,8));
2436 0           $p += 8;
2437            
2438             }
2439 0           push(@res, \%data);
2440             }
2441 0 0         if ( $p > $rlen ) {
2442 0           $self->_Error("incomplete reply");
2443 0           return;
2444             }
2445              
2446 0           return \@res;
2447             }
2448              
2449             =head2 EscapeString
2450              
2451             $escaped = $sph->EscapeString('abcde!@#$%')
2452              
2453             Inserts backslash before all non-word characters in the given string.
2454              
2455             =cut
2456              
2457             sub EscapeString {
2458 0     0 1   my $self = shift;
2459 0           return quotemeta(shift);
2460             }
2461              
2462              
2463             =head2 UpdateAttributes
2464              
2465             $sph->UpdateAttributes($index, \@attrs, \%values);
2466             $sph->UpdateAttributes($index, \@attrs, \%values, $mva);
2467             $sph->UpdateAttributes($index, \@attrs, \%values, $mva, $ignorenonexistent);
2468              
2469             Update specified attributes on specified documents
2470              
2471             =over 4
2472              
2473             =item index
2474              
2475             Name of the index to be updated
2476              
2477             =item attrs
2478              
2479             Array of attribute name strings
2480              
2481             =item values
2482              
2483             A hash with key as document id, value as an array of new attribute values
2484              
2485             =item mva
2486              
2487             If set, indicates that there is update of MVA attributes
2488              
2489             =item ignorenonexistent
2490              
2491             If set, the update will silently ignore any warnings about trying to update a column which is not exists in current index schema.
2492              
2493             =back
2494              
2495             Returns number of actually updated documents (0 or more) on success
2496              
2497             Returns undef on failure
2498              
2499             Usage example:
2500              
2501             $sph->UpdateAttributes("test1", [ qw/group_id/ ], { 1 => [ 456] }) );
2502              
2503             =cut
2504              
2505             sub UpdateAttributes {
2506 0     0 1   my ($self, $index, $attrs, $values, $mva, $ignorenonexistent ) = @_;
2507              
2508 0 0         croak("index is not defined") unless (defined $index);
2509 0 0         croak("attrs must be an array") unless ref($attrs) eq "ARRAY";
2510 0           for my $attr (@$attrs) {
2511 0 0         croak("attribute is not defined") unless (defined $attr);
2512             }
2513 0 0         croak("values must be a hashref") unless ref($values) eq "HASH";
2514              
2515 0           for my $id (keys %$values) {
2516 0           my $entry = $values->{$id};
2517 0 0         croak("value id $id is not numeric") unless ($id =~ /$num_re/);
2518 0 0         croak("value entry must be an array") unless ref($entry) eq "ARRAY";
2519 0 0         croak("size of values must match size of attrs") unless @$entry == @$attrs;
2520 0           for my $v (@$entry) {
2521 0 0         if ($mva) {
2522 0 0         croak("multi-valued entry $v is not an array") unless ref($v) eq 'ARRAY';
2523 0           for my $vv (@$v) {
2524 0 0         croak("array entry value $vv is not an integer") unless ($vv =~ /^(\d+)$/o);
2525             }
2526             }
2527             else {
2528 0 0         croak("entry value $v is not an integer") unless ($v =~ /^(\d+)$/o);
2529             }
2530             }
2531             }
2532              
2533             ## build request
2534 0           my $req = pack ( "N/a*", $index);
2535              
2536 0           $req .= pack ( "N", scalar @$attrs );
2537 0 0         $req .= pack ( "N", $ignorenonexistent ? 1 : 0 );
2538 0           for my $attr (@$attrs) {
2539 0 0         $req .= pack ( "N/a*", $attr)
2540             . pack("N", $mva ? 1 : 0);
2541             }
2542 0           $req .= pack ( "N", scalar keys %$values );
2543 0           foreach my $id (keys %$values) {
2544 0           my $entry = $values->{$id};
2545 0           $req .= $self->_sphPackU64($id);
2546 0 0         if ($mva) {
2547 0           for my $v ( @$entry ) {
2548 0           $req .= pack ( "N", @$v );
2549 0           for my $vv (@$v) {
2550 0           $req .= pack ("N", $vv);
2551             }
2552             }
2553             }
2554             else {
2555 0           for my $v ( @$entry ) {
2556 0           $req .= pack ( "N", $v );
2557             }
2558             }
2559             }
2560              
2561             ## connect, send query, get response
2562              
2563 0           $req = pack ( "nnN/a*", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $req); ## add header
2564 0           my $response = $self->_ProcessRequest($req, VER_COMMAND_UPDATE);
2565              
2566 0 0         return unless $response;
2567              
2568             ## parse response
2569 0           my ($updated) = unpack ( "N*", substr ( $response, 0, 4 ) );
2570 0           return $updated;
2571             }
2572              
2573             =head2 Open
2574              
2575             $sph->Open()
2576              
2577             Opens a persistent connection for subsequent queries.
2578              
2579             To reduce the network connection overhead of making Sphinx queries, you can call
2580             $sph->Open(), then run any number of queries, and call $sph->Close() when
2581             finished.
2582              
2583             Returns 1 on success, 0 on failure.
2584              
2585             =cut
2586              
2587             sub Open {
2588 0     0 1   my $self = shift;
2589 0           $self->{_persistent} = 1;
2590 0 0         if ($self->{_socket}) {
2591 0           $self->_Error("already connected");
2592 0           return 0;
2593             }
2594 0 0         my $fp = $self->_Connect() or return 0;
2595 0           return 1;
2596             }
2597              
2598             =head2 Close
2599              
2600             $sph->Close()
2601              
2602             Closes a persistent connection.
2603              
2604             Returns 1 on success, 0 on failure.
2605              
2606             =cut
2607              
2608             sub Close {
2609 0     0 1   my $self = shift;
2610 0           $self->{_persistent} = 0;
2611              
2612 0 0         if (! $self->{_socket}) {
2613 0           $self->_Error("not connected");
2614 0           return 0;
2615             }
2616            
2617 0           close($self->{_socket});
2618 0           $self->{_socket} = undef;
2619              
2620 0           return 1;
2621             }
2622              
2623             =head2 Status
2624              
2625             $status = $sph->Status()
2626             $status = $sph->Status($session)
2627              
2628             Queries searchd status, and returns a hash of status variable name and value pairs.
2629              
2630             Returns undef on failure.
2631              
2632             =cut
2633              
2634             sub Status {
2635            
2636 0     0 1   my ($self, $session) = @_;
2637              
2638 0 0         my $req = pack("nnNN", SEARCHD_COMMAND_STATUS, VER_COMMAND_STATUS, 4, $session ? 0 : 1 ); # len=4, body=1
2639              
2640 0           my $response = $self->_ProcessRequest($req, VER_COMMAND_STATUS);
2641 0 0         return unless $response;
2642              
2643 0           my $p = 0;
2644 0           my ($rows, $cols) = unpack("N*N*", substr ( $response, $p, 8 ) ); $p += 8;
  0            
2645              
2646 0 0 0       return {} unless $rows && $cols;
2647 0           my %res;
2648 0           for (1 .. $rows ) {
2649 0           my @entry;
2650 0           for ( 1 .. $cols) {
2651 0           my $len = unpack("N*", substr ( $response, $p, 4 ) ); $p += 4;
  0            
2652 0 0         push(@entry, $len ? substr ( $response, $p, $len ) : ""); $p += $len;
  0            
2653             }
2654 0 0         if ($cols <= 2) {
2655 0           $res{$entry[0]} = $entry[1];
2656             }
2657             else {
2658 0           my $name = shift @entry;
2659 0           $res{$name} = \@entry;
2660             }
2661             }
2662 0           return \%res;
2663             }
2664            
2665             =head2 FlushAttributes
2666              
2667             =cut
2668              
2669             sub FlushAttributes {
2670 0     0 1   my $self = shift;
2671            
2672 0           my $req = pack("nnN", SEARCHD_COMMAND_FLUSHATTRS, VER_COMMAND_FLUSHATTRS, 0 ); # len=0
2673 0           my $response = $self->_ProcessRequest($req, VER_COMMAND_FLUSHATTRS);
2674 0 0         return unless $response;
2675            
2676 0           my $tag = -1;
2677 0 0         if (length($response) == 4) {
2678 0           $tag = unpack ( "N*", substr ( $response, 0, 4 ) );
2679             }
2680             else {
2681 0           $self->_Error("unexpected response length");
2682             }
2683 0           return $tag;
2684             }
2685              
2686             =head1 SEE ALSO
2687              
2688             L
2689              
2690             =head1 NOTES
2691              
2692             There is (or was) a bundled Sphinx.pm in the contrib area of the Sphinx source
2693             distribution, which was used as the starting point of Sphinx::Search.
2694             Maintenance of that version appears to have lapsed at sphinx-0.9.7, so many of
2695             the newer API calls are not available there. Sphinx::Search is mostly
2696             compatible with the old Sphinx.pm except:
2697              
2698             =over 4
2699              
2700             =item On failure, Sphinx::Search returns undef rather than 0 or -1.
2701              
2702             =item Sphinx::Search 'Set' functions are cascadable, e.g. you can do
2703             Sphinx::Search->new
2704             ->SetMatchMode(SPH_MATCH_ALL)
2705             ->SetSortMode(SPH_SORT_RELEVANCE)
2706             ->Query("search terms")
2707              
2708             =back
2709              
2710             Sphinx::Search also provides documentation and unit tests, which were the main
2711             motivations for branching from the earlier work.
2712              
2713             =head1 AUTHOR
2714              
2715             Jon Schutz
2716              
2717             L
2718              
2719             =head1 BUGS
2720              
2721             Please report any bugs or feature requests to
2722             C, or through the web interface at
2723             L.
2724             I will be notified, and then you'll automatically be notified of progress on
2725             your bug as I make changes.
2726              
2727             =head1 SUPPORT
2728              
2729             You can find documentation for this module with the perldoc command.
2730              
2731             perldoc Sphinx::Search
2732              
2733             You can also look for information at:
2734              
2735             =over 4
2736              
2737             =item * AnnoCPAN: Annotated CPAN documentation
2738              
2739             L
2740              
2741             =item * CPAN Ratings
2742              
2743             L
2744              
2745             =item * RT: CPAN's request tracker
2746              
2747             L
2748              
2749             =item * Search CPAN
2750              
2751             L
2752              
2753             =back
2754              
2755             =head1 ACKNOWLEDGEMENTS
2756              
2757             This module is based on Sphinx.pm (not deployed to CPAN) for Sphinx version
2758             0.9.7-rc1, by Len Kranendonk, which was in turn based on the Sphinx PHP API.
2759              
2760             Thanks to Alexey Kholodkov for contributing a significant patch for handling persistent connections.
2761              
2762             =head1 COPYRIGHT & LICENSE
2763              
2764             Copyright 2015 Jon Schutz, all rights reserved.
2765              
2766             This program is free software; you can redistribute it and/or modify it
2767             under the terms of the GNU General Public License.
2768              
2769             =cut
2770              
2771              
2772             1;