line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Catalyst::Model::Xapian; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
788
|
use base qw/Catalyst::Model/; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
546
|
|
4
|
1
|
|
|
1
|
|
1402
|
use Moose; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
use strict; |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
use Catalyst::Model::Xapian::Result; |
9
|
|
|
|
|
|
|
use Encode qw/from_to/; |
10
|
|
|
|
|
|
|
use Search::Xapian qw/:all/; |
11
|
|
|
|
|
|
|
use Storable; |
12
|
|
|
|
|
|
|
use MRO::Compat; |
13
|
|
|
|
|
|
|
use Time::HiRes qw/gettimeofday tv_interval/; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
our $VERSION='0.06'; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
__PACKAGE__->mk_accessors('db'); |
18
|
|
|
|
|
|
|
__PACKAGE__->mk_accessors('qp'); |
19
|
|
|
|
|
|
|
has 'db' => (isa => 'Search::Xapian::Database', is => 'rw'); |
20
|
|
|
|
|
|
|
has 'qp' => (isa => 'Search::Xapian::QueryParser', is => 'rw'); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 NAME |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
Catalyst::Model::Xapian - Catalyst model for Search::Xapian. |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
=head1 SYNOPSIS |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
my ($it,$res)= $c->comp('MyApp::M::Xapian')->search( |
30
|
|
|
|
|
|
|
$c->req->param('q'), |
31
|
|
|
|
|
|
|
$c->req->param('page') ||0 , |
32
|
|
|
|
|
|
|
$c->req->param('itemsperpage')||0 |
33
|
|
|
|
|
|
|
); |
34
|
|
|
|
|
|
|
$c->stash->{searchresults}=$res; |
35
|
|
|
|
|
|
|
$c->stash->{iterator}=$it; |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 DESCRIPTION |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
This model class wraps L<Search::Xapian> to provide a friendly, paged |
41
|
|
|
|
|
|
|
interface to Xapian (www.xapian.org) indexes. This class adds a little |
42
|
|
|
|
|
|
|
extra convenience on top of the Search::Xapian class. It expects you to |
43
|
|
|
|
|
|
|
use the QueryParser, and sets up some keywords based on the standard |
44
|
|
|
|
|
|
|
omega keywords (id, host, date, month, year,title), so that you can |
45
|
|
|
|
|
|
|
do searches like |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
'fubar site:microsoft.com' |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=head1 CONFIG OPTIONS |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=over 4 |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=item db |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
Path to the index directory. will default to <MyApp>/index. |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=item language |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
Language to use for stemming. Defaults to english |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=item page_size |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
Default page sizes for L<Data::Page>. Defaults to 10. |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=item utf8_query |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
Queries are passed as utf8 strings. defaults to 1. |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=item order_by_date |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
Sets weighting to order by docid descending rather than the usual BM25 |
72
|
|
|
|
|
|
|
weighting. Off by default. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=back |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=head1 METHODS |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=over 4 |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=item new |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
Constructor. sets up the db and qp accessors. Is called automatically by |
83
|
|
|
|
|
|
|
Catalyst at startup. |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=cut |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
sub new { |
88
|
|
|
|
|
|
|
my ( $self, $c ) = @_; |
89
|
|
|
|
|
|
|
$self = $self->NEXT::new($c); my %config = ( |
90
|
|
|
|
|
|
|
db => $c->config->{home}.'/index', |
91
|
|
|
|
|
|
|
language => "english", |
92
|
|
|
|
|
|
|
page_size => 10, |
93
|
|
|
|
|
|
|
utf8_query => 1, |
94
|
|
|
|
|
|
|
%{ $self->config() }, |
95
|
|
|
|
|
|
|
); |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
$self->db(Search::Xapian::Database->new($config{db})); |
98
|
|
|
|
|
|
|
$self->qp(Search::Xapian::QueryParser->new($self->db)); |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
if ( defined($config{language}) ) { |
101
|
|
|
|
|
|
|
my $stemmer=Search::Xapian::Stem->new($config{language}); |
102
|
|
|
|
|
|
|
$self->qp->set_stemmer($stemmer); |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
$self->qp->set_default_op(OP_AND); |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
$self->qp->add_boolean_prefix("site", "H"); |
107
|
|
|
|
|
|
|
$self->qp->add_boolean_prefix("year", "Y"); |
108
|
|
|
|
|
|
|
$self->qp->add_boolean_prefix("month", "M"); |
109
|
|
|
|
|
|
|
$self->qp->add_boolean_prefix("date", "D"); |
110
|
|
|
|
|
|
|
$self->qp->add_boolean_prefix("id", "Q"); |
111
|
|
|
|
|
|
|
$self->qp->add_prefix("title", "T"); |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
$self->config(\%config); |
114
|
|
|
|
|
|
|
return $self; |
115
|
|
|
|
|
|
|
} |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=item search <q>,[<page>],[<page_size>] |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
perform a search using the Xapian QueryBuilder. expands the document data |
121
|
|
|
|
|
|
|
using extract_data. You can override the page size per query by passing |
122
|
|
|
|
|
|
|
page size as a final argument to the function. returns a L<Data::Page> |
123
|
|
|
|
|
|
|
object and an arrayref to the extracted document data. |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=cut |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
sub search { |
129
|
|
|
|
|
|
|
my ( $class,$q, $page,$page_size) = @_; |
130
|
|
|
|
|
|
|
my $t=[gettimeofday]; |
131
|
|
|
|
|
|
|
$page ||= 1; |
132
|
|
|
|
|
|
|
$page_size ||= $class->config->{page_size}; |
133
|
|
|
|
|
|
|
$class->db->reopen(); |
134
|
|
|
|
|
|
|
my $query=$class->qp->parse_query( $q, 23 ); |
135
|
|
|
|
|
|
|
my $enq = $class->db->enquire ( $query ); |
136
|
|
|
|
|
|
|
$class->prepare_enq($enq); |
137
|
|
|
|
|
|
|
if( $class->config->{order_by_date} ) { |
138
|
|
|
|
|
|
|
$enq->set_docid_order(ENQ_DESCENDING); |
139
|
|
|
|
|
|
|
$enq->set_weighting_scheme(Search::Xapian::BoolWeight->new()); |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
my $mset = $enq->get_mset( ($page-1)*$page_size, |
142
|
|
|
|
|
|
|
$page_size ); |
143
|
|
|
|
|
|
|
my ($time)=tv_interval($t) =~ m/^(\d+\.\d{0,2})/; |
144
|
|
|
|
|
|
|
$time =~ s/\./\,/; |
145
|
|
|
|
|
|
|
from_to($q,'utf-8','iso-8859-1') if $class->config->{utf8_query}; |
146
|
|
|
|
|
|
|
#$q=utf8::decode($q) if $class->{config}->{utf8_query}; |
147
|
|
|
|
|
|
|
return Catalyst::Model::Xapian::Result->new({ mset=>$mset, |
148
|
|
|
|
|
|
|
search=>$class,query=>$q,query_obj=>$query,querytime=>$time,page=>$page,page_size=>$page_size }); |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=item prepare_enq <enq> |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
Prepare enquire object before getting mset. Allows you to modify |
154
|
|
|
|
|
|
|
ordering and such in your subclass. |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=cut |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
sub prepare_enq {} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=item extract_data <item> <query> |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
Extract data from a L<Search::Xapian::Document>. Defaults to |
163
|
|
|
|
|
|
|
using Storable::thaw. |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=cut |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
sub extract_data { |
168
|
|
|
|
|
|
|
my ( $self,$item, $query ) = @_; |
169
|
|
|
|
|
|
|
my $data=Storable::thaw( $item->get_data ); |
170
|
|
|
|
|
|
|
return $data; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
1; |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
=item qp |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
Query Parser. The L<Search::Xapian::QueryParser> object used to parse the query. |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=back |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
=head1 AUTHOR |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
Marcus Ramberg <mramberg@cpan.org> |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=head1 LICENSE |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
This library is free software . You can redistribute it and/or modify it under |
188
|
|
|
|
|
|
|
the same terms as perl itself. |