line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Catmandu::Store::Solr; |
2
|
|
|
|
|
|
|
|
3
|
3
|
|
|
3
|
|
376669
|
use Catmandu::Sane; |
|
3
|
|
|
|
|
160518
|
|
|
3
|
|
|
|
|
19
|
|
4
|
3
|
|
|
3
|
|
581
|
use Catmandu::Util qw(:is :array); |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
853
|
|
5
|
3
|
|
|
3
|
|
25
|
use Moo; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
15
|
|
6
|
3
|
|
|
3
|
|
2101
|
use MooX::Aliases; |
|
3
|
|
|
|
|
5433
|
|
|
3
|
|
|
|
|
20
|
|
7
|
3
|
|
|
3
|
|
2276
|
use WebService::Solr; |
|
3
|
|
|
|
|
470645
|
|
|
3
|
|
|
|
|
104
|
|
8
|
3
|
|
|
3
|
|
1345
|
use Catmandu::Store::Solr::Bag; |
|
3
|
|
|
|
|
12
|
|
|
3
|
|
|
|
|
97
|
|
9
|
3
|
|
|
3
|
|
23
|
use Catmandu::Error; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
72
|
|
10
|
3
|
|
|
3
|
|
16
|
use LWP::UserAgent; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
2195
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
with 'Catmandu::Store'; |
13
|
|
|
|
|
|
|
with 'Catmandu::Transactional'; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 NAME |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Catmandu::Store::Solr - A searchable store backed by Solr |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=cut |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
our $VERSION = '0.0304'; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 SYNOPSIS |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# From the command line |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# Import data into Solr |
28
|
|
|
|
|
|
|
$ catmandu import JSON to Solr < data.json |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# Export data from ElasticSearch |
31
|
|
|
|
|
|
|
$ catmandu export Solr to JSON > data.json |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# Export only one record |
34
|
|
|
|
|
|
|
$ catmandu export Solr --id 1234 |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# Export using an Solr query |
37
|
|
|
|
|
|
|
$ catmandu export Solr --query "name:Recruitment OR name:college" |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# Export using a CQL query (needs a CQL mapping) |
40
|
|
|
|
|
|
|
$ catmandu export Solr --q "name any college" |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# From Perl |
43
|
|
|
|
|
|
|
use Catmandu::Store::Solr; |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
my $store = Catmandu::Store::Solr->new(url => 'http://localhost:8983/solr' ); |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
my $obj1 = $store->bag->add({ name => 'Patrick' }); |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
printf "obj1 stored as %s\n" , $obj1->{_id}; |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# Force an id in the store |
52
|
|
|
|
|
|
|
my $obj2 = $store->bag->add({ _id => 'test123' , name => 'Nicolas' }); |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# send all changes to solr (committed automatically) |
55
|
|
|
|
|
|
|
$store->bag->commit; |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
#transaction: rollback issued after 'die' |
58
|
|
|
|
|
|
|
$store->transaction(sub{ |
59
|
|
|
|
|
|
|
$bag->delete_all(); |
60
|
|
|
|
|
|
|
die("oops, didn't want to do that!"); |
61
|
|
|
|
|
|
|
}); |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
my $obj3 = $store->bag->get('test123'); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
$store->bag->delete('test123'); |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
$store->bag->delete_all; |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# All bags are iterators |
70
|
|
|
|
|
|
|
$store->bag->each(sub { ... }); |
71
|
|
|
|
|
|
|
$store->bag->take(10)->each(sub { ... }); |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# Search |
74
|
|
|
|
|
|
|
# Any extra arguments will be passed on as is to Solr |
75
|
|
|
|
|
|
|
my $hits = $store->bag->search(query => 'name:Patrick'); |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=cut |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
has url => (is => 'ro', default => sub {'http://localhost:8983/solr'}); |
80
|
|
|
|
|
|
|
has keep_alive => (is => 'ro', default => sub {0}); |
81
|
|
|
|
|
|
|
has solr => (is => 'lazy'); |
82
|
|
|
|
|
|
|
has bag_key => (is => 'lazy', alias => 'bag_field'); |
83
|
|
|
|
|
|
|
has on_error => ( |
84
|
|
|
|
|
|
|
is => 'ro', |
85
|
|
|
|
|
|
|
isa => sub { |
86
|
|
|
|
|
|
|
array_includes([qw(throw ignore)], $_[0]) |
87
|
|
|
|
|
|
|
or die("on_error must be 'throw' or 'ignore'"); |
88
|
|
|
|
|
|
|
}, |
89
|
|
|
|
|
|
|
lazy => 1, |
90
|
|
|
|
|
|
|
default => sub {"throw"} |
91
|
|
|
|
|
|
|
); |
92
|
|
|
|
|
|
|
has _bags_used => (is => 'ro', lazy => 1, default => sub {[];}); |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
around 'bag' => sub { |
95
|
|
|
|
|
|
|
my $orig = shift; |
96
|
|
|
|
|
|
|
my $self = shift; |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
my $bags_used = $self->_bags_used; |
99
|
|
|
|
|
|
|
unless (array_includes($bags_used, $_[0])) { |
100
|
|
|
|
|
|
|
push @$bags_used, $_[0]; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
$orig->($self, @_); |
104
|
|
|
|
|
|
|
}; |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
sub _build_solr { |
107
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
108
|
0
|
|
|
|
|
|
WebService::Solr->new( |
109
|
|
|
|
|
|
|
$_[0]->url, |
110
|
|
|
|
|
|
|
{ |
111
|
|
|
|
|
|
|
autocommit => 0, |
112
|
|
|
|
|
|
|
default_params => {wt => 'json'}, |
113
|
|
|
|
|
|
|
agent => LWP::UserAgent->new(keep_alive => $self->keep_alive), |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
); |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
sub _build_bag_key { |
119
|
0
|
|
|
0
|
|
|
$_[0]->key_for('bag'); |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub transaction { |
123
|
0
|
|
|
0
|
1
|
|
my ($self, $sub) = @_; |
124
|
|
|
|
|
|
|
|
125
|
0
|
0
|
|
|
|
|
if ($self->{_tx}) { |
126
|
0
|
|
|
|
|
|
return $sub->(); |
127
|
|
|
|
|
|
|
} |
128
|
0
|
|
|
|
|
|
my $solr = $self->solr; |
129
|
0
|
|
|
|
|
|
my @res; |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
eval { |
132
|
|
|
|
|
|
|
#flush buffers of all known bags ( with commit=true ), to ensure correct state |
133
|
0
|
|
|
|
|
|
for my $bag_name (@{$self->_bags_used}) { |
|
0
|
|
|
|
|
|
|
134
|
0
|
|
|
|
|
|
$self->bag($bag_name)->commit; |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
#mark store as 'in transaction'. All subsequent calls to commit only flushes buffers without setting 'commit' to 'true' in solr |
138
|
0
|
|
|
|
|
|
$self->{_tx} = 1; |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
#transaction |
141
|
0
|
|
|
|
|
|
@res = $sub->(); |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
#flushing buffers of all known bags (with commit=false) |
144
|
0
|
|
|
|
|
|
for my $bag_name (@{$self->_bags_used}) { |
|
0
|
|
|
|
|
|
|
145
|
0
|
|
|
|
|
|
$self->bag($bag_name)->commit; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
#commit in solr |
149
|
0
|
|
|
|
|
|
$solr->commit; |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
#remove mark 'in transaction' |
152
|
0
|
|
|
|
|
|
$self->{_tx} = 0; |
153
|
0
|
|
|
|
|
|
1; |
154
|
0
|
0
|
|
|
|
|
} or do { |
155
|
0
|
|
|
|
|
|
my $err = $@; |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
#remove remaining documents from all buffers, because they were added during the transaction |
158
|
0
|
|
|
|
|
|
for my $bag_name (@{$self->_bags_used}) { |
|
0
|
|
|
|
|
|
|
159
|
0
|
|
|
|
|
|
$self->bag($bag_name)->clear_buffer; |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
#rollback in solr |
163
|
0
|
|
|
|
|
|
eval {$solr->rollback}; |
|
0
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
#remove mark 'in transaction' |
166
|
0
|
|
|
|
|
|
$self->{_tx} = 0; |
167
|
0
|
|
|
|
|
|
Catmandu::Error->throw($err); |
168
|
|
|
|
|
|
|
}; |
169
|
|
|
|
|
|
|
|
170
|
0
|
|
|
|
|
|
@res; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=head1 SOLR SCHEMA |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
The Solr schema needs to support at least the identifier field (C<_id> by default) and a bag |
176
|
|
|
|
|
|
|
field (C<_bag> by default) to be able to store Catmandu items: |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
# In schema.xml |
179
|
|
|
|
|
|
|
<field name="_id" type="string" indexed="true" stored="true" required="true" /> |
180
|
|
|
|
|
|
|
<field name="_bag" type="string" indexed="true" stored="true" required="true" /> |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
The names of these fields can optionally be changed using the C<id_field> and C<_bag> |
183
|
|
|
|
|
|
|
configuration parameters of L<Catmandu::Store::Solr>. |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
The C<_id> will contain the record identifier. The C<_bag> field will contain a string |
186
|
|
|
|
|
|
|
to support L<Catmandu::Bag>-s in Solr. |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=head1 CONFIGURATION |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=over |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=item url |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
URL of Solr core |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
Default: C<http://localhost:8983/solr> |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=item id_field |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
Name of unique field in Solr core. |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
Default: C<_id> |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
This Solr field is mapped to C<_id> when retrieved |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
=item bag_field |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
Name of field in Solr we can use to split the core into 'bags'. |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
Default: C<_bag> |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
This Solr field is mapped to C<_bag> when retrieved |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
=item on_error |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
Action to take when records cannot be saved to Solr. Default: throw. Available: ignore. |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
=back |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
=head1 METHODS |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
=head2 new( url => $url ) |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=head2 new( url => $url, id_field => '_id', bag_field => '_bag' ) |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=head2 new( url => $url, bags => { data => { cql_mapping => \%mapping } } ) |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
Creates a new Catmandu::Store::Solr store connected to a Solr core, specificied by $url. |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
The store supports CQL searches when a cql_mapping is provided. This hash |
231
|
|
|
|
|
|
|
contains a translation of CQL fields into Solr searchable fields. |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
# Example mapping |
234
|
|
|
|
|
|
|
$cql_mapping = { |
235
|
|
|
|
|
|
|
title => { |
236
|
|
|
|
|
|
|
op => { |
237
|
|
|
|
|
|
|
'any' => 1 , |
238
|
|
|
|
|
|
|
'all' => 1 , |
239
|
|
|
|
|
|
|
'=' => 1 , |
240
|
|
|
|
|
|
|
'<>' => 1 , |
241
|
|
|
|
|
|
|
'exact' => {field => 'mytitle.exact' } |
242
|
|
|
|
|
|
|
} , |
243
|
|
|
|
|
|
|
sort => 1, |
244
|
|
|
|
|
|
|
field => 'mytitle', |
245
|
|
|
|
|
|
|
cb => ['Biblio::Search', 'normalize_title'] |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
The CQL mapping above will support for the 'title' field the CQL operators: any, all, =, <> and exact. |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
For all the operators the 'title' field will be mapping into the Solr field 'mytitle', except |
252
|
|
|
|
|
|
|
for the 'exact' operator. In case of 'exact' we will search the field 'mytitle.exact'. |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
The CQL has an optional callback field 'cb' which contains a reference to subroutines to rewrite or |
255
|
|
|
|
|
|
|
augment the search query. In this case, in the Biblio::Search package there is a normalize_title |
256
|
|
|
|
|
|
|
subroutine which returns a string or an ARRAY of string with augmented title(s). E.g. |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
package Biblio::Search; |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
sub normalize_title { |
261
|
|
|
|
|
|
|
my ($self,$title) = @_; |
262
|
|
|
|
|
|
|
my $new_title =~ s{[^A-Z0-9]+}{}g; |
263
|
|
|
|
|
|
|
$new_title; |
264
|
|
|
|
|
|
|
} |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
1; |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=head2 transaction |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
When you issue $bag->commit, all changes made in the buffer are sent to solr, along with a commit. |
271
|
|
|
|
|
|
|
So committing in Catmandu merely means flushing changes;-). |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
When you wrap your subroutine within 'transaction', this behaviour is disabled temporarily. |
274
|
|
|
|
|
|
|
When you call 'die' within the subroutine, a rollback is sent to solr. |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
Remember that transactions happen at store level: after the transaction, all buffers of all bags are flushed to solr, |
277
|
|
|
|
|
|
|
and a commit is issued in solr. |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
# Record 'test' added |
280
|
|
|
|
|
|
|
$bag->add({ _id => "test" }); |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
# Buffer flushed, and 'commit' sent to solr |
283
|
|
|
|
|
|
|
$bag->commit(); |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
$bag->store->transaction(sub{ |
286
|
|
|
|
|
|
|
$bag->add({ _id => "test",title => "test" }); |
287
|
|
|
|
|
|
|
# Call to die: rollback sent to solr |
288
|
|
|
|
|
|
|
die("oops, didn't want to do that!"); |
289
|
|
|
|
|
|
|
}); |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
# Record is still { _id => "test" } |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
=head1 INHERITED METHODS |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
This Catmandu::Store implements: |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
=over 3 |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=item L<Catmandu::Store> |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
=item L<Catmandu::Transactional> |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=back |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
Each Catmandu::Bag in this Catmandu::Store implements: |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=over 3 |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=item L<Catmandu::Bag> |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
=item L<Catmandu::Searchable> |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
=item L<Catmandu::CQLSearchable> |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
=back |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
=head1 SEE ALSO |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
L<Catmandu::Store>, L<WebService::Solr> |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
=head1 AUTHOR |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
Nicolas Steenlant, C<< nicolas.steenlant at ugent.be >> |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
Patrick Hochstenbach, C<< patrick.hochstenbach at ugent.be >> |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
Nicolas Franck, C<< nicolas.franck at ugent.be >> |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
Pieter De Praetere |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
334
|
|
|
|
|
|
|
under the terms of either: the GNU General Public License as published |
335
|
|
|
|
|
|
|
by the Free Software Foundation; or the Artistic License. |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
See http://dev.perl.org/licenses/ for more information. |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
=cut |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
1; |