line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Catmandu::Store::Solr; |
2
|
|
|
|
|
|
|
|
3
|
3
|
|
|
3
|
|
298830
|
use Catmandu::Sane; |
|
3
|
|
|
|
|
175750
|
|
|
3
|
|
|
|
|
20
|
|
4
|
3
|
|
|
3
|
|
595
|
use Catmandu::Util qw(:is :array); |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
1035
|
|
5
|
3
|
|
|
3
|
|
33
|
use Moo; |
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
16
|
|
6
|
3
|
|
|
3
|
|
360547
|
use MooX::Aliases; |
|
3
|
|
|
|
|
5303
|
|
|
3
|
|
|
|
|
19
|
|
7
|
3
|
|
|
3
|
|
2164
|
use WebService::Solr; |
|
3
|
|
|
|
|
391330
|
|
|
3
|
|
|
|
|
104
|
|
8
|
3
|
|
|
3
|
|
1368
|
use Catmandu::Store::Solr::Bag; |
|
3
|
|
|
|
|
11
|
|
|
3
|
|
|
|
|
91
|
|
9
|
3
|
|
|
3
|
|
19
|
use Catmandu::Error; |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
1648
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
with 'Catmandu::Store'; |
12
|
|
|
|
|
|
|
with 'Catmandu::Transactional'; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Catmandu::Store::Solr - A searchable store backed by Solr |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=cut |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
our $VERSION = '0.0302'; |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
=head1 SYNOPSIS |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# From the command line |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# Import data into Solr |
27
|
|
|
|
|
|
|
$ catmandu import JSON to Solr < data.json |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# Export data from ElasticSearch |
30
|
|
|
|
|
|
|
$ catmandu export Solr to JSON > data.json |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# Export only one record |
33
|
|
|
|
|
|
|
$ catmandu export Solr --id 1234 |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# Export using an Solr query |
36
|
|
|
|
|
|
|
$ catmandu export Solr --query "name:Recruitment OR name:college" |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# Export using a CQL query (needs a CQL mapping) |
39
|
|
|
|
|
|
|
$ catmandu export Solr --q "name any college" |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# From Perl |
42
|
|
|
|
|
|
|
use Catmandu::Store::Solr; |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
my $store = Catmandu::Store::Solr->new(url => 'http://localhost:8983/solr' ); |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
my $obj1 = $store->bag->add({ name => 'Patrick' }); |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
printf "obj1 stored as %s\n" , $obj1->{_id}; |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
# Force an id in the store |
51
|
|
|
|
|
|
|
my $obj2 = $store->bag->add({ _id => 'test123' , name => 'Nicolas' }); |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# send all changes to solr (committed automatically) |
54
|
|
|
|
|
|
|
$store->bag->commit; |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
#transaction: rollback issued after 'die' |
57
|
|
|
|
|
|
|
$store->transaction(sub{ |
58
|
|
|
|
|
|
|
$bag->delete_all(); |
59
|
|
|
|
|
|
|
die("oops, didn't want to do that!"); |
60
|
|
|
|
|
|
|
}); |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
my $obj3 = $store->bag->get('test123'); |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
$store->bag->delete('test123'); |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
$store->bag->delete_all; |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# All bags are iterators |
69
|
|
|
|
|
|
|
$store->bag->each(sub { ... }); |
70
|
|
|
|
|
|
|
$store->bag->take(10)->each(sub { ... }); |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# Some stores can be searched |
73
|
|
|
|
|
|
|
my $hits = $store->bag->search(query => 'name:Patrick'); |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=cut |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
has url => (is => 'ro', default => sub { 'http://localhost:8983/solr' }); |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
has solr => ( |
80
|
|
|
|
|
|
|
is => 'ro', |
81
|
|
|
|
|
|
|
lazy => 1, |
82
|
|
|
|
|
|
|
builder => '_build_solr', |
83
|
|
|
|
|
|
|
); |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
has bag_key => (is => 'lazy', alias => 'bag_field'); |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
has on_error => ( |
88
|
|
|
|
|
|
|
is => 'ro', |
89
|
|
|
|
|
|
|
isa => sub { |
90
|
|
|
|
|
|
|
array_includes([qw(throw ignore)],$_[0]) or die("on_error must be 'throw' or 'ignore'"); |
91
|
|
|
|
|
|
|
}, |
92
|
|
|
|
|
|
|
lazy => 1, |
93
|
|
|
|
|
|
|
default => sub { "throw" } |
94
|
|
|
|
|
|
|
); |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
has _bags_used => ( |
97
|
|
|
|
|
|
|
is => 'ro', |
98
|
|
|
|
|
|
|
lazy => 1, |
99
|
|
|
|
|
|
|
default => sub { []; } |
100
|
|
|
|
|
|
|
); |
101
|
|
|
|
|
|
|
around 'bag' => sub { |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
my $orig = shift; |
104
|
|
|
|
|
|
|
my $self = shift; |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
my $bags_used = $self->_bags_used; |
107
|
|
|
|
|
|
|
unless(array_includes($bags_used,$_[0])){ |
108
|
|
|
|
|
|
|
push @$bags_used,$_[0]; |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
$orig->($self,@_); |
112
|
|
|
|
|
|
|
}; |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub _build_solr { |
115
|
0
|
|
|
0
|
|
|
WebService::Solr->new($_[0]->url, {autocommit => 0, default_params => {wt => 'json'}}); |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
sub _build_bag_key { |
119
|
0
|
|
|
0
|
|
|
$_[0]->key_for('bag'); |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub transaction { |
123
|
0
|
|
|
0
|
1
|
|
my($self,$sub)=@_; |
124
|
|
|
|
|
|
|
|
125
|
0
|
0
|
|
|
|
|
if($self->{_tx}){ |
126
|
0
|
|
|
|
|
|
return $sub->(); |
127
|
|
|
|
|
|
|
} |
128
|
0
|
|
|
|
|
|
my $solr = $self->solr; |
129
|
0
|
|
|
|
|
|
my @res; |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
eval { |
132
|
|
|
|
|
|
|
#flush buffers of all known bags ( with commit=true ), to ensure correct state |
133
|
0
|
|
|
|
|
|
for my $bag_name(@{ $self->_bags_used() }){ |
|
0
|
|
|
|
|
|
|
134
|
0
|
|
|
|
|
|
$self->bag($bag_name)->commit(); |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
#mark store as 'in transaction'. All subsequent calls to commit only flushes buffers without setting 'commit' to 'true' in solr |
138
|
0
|
|
|
|
|
|
$self->{_tx} = 1; |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
#transaction |
141
|
0
|
|
|
|
|
|
@res = $sub->(); |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
#flushing buffers of all known bags (with commit=false) |
144
|
0
|
|
|
|
|
|
for my $bag_name(@{ $self->_bags_used() }){ |
|
0
|
|
|
|
|
|
|
145
|
0
|
|
|
|
|
|
$self->bag($bag_name)->commit(); |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
#commit in solr |
149
|
0
|
|
|
|
|
|
$solr->commit; |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
#remove mark 'in transaction' |
152
|
0
|
|
|
|
|
|
$self->{_tx} = 0; |
153
|
0
|
|
|
|
|
|
1; |
154
|
0
|
0
|
|
|
|
|
} or do { |
155
|
0
|
|
|
|
|
|
my $err = $@; |
156
|
|
|
|
|
|
|
#remove remaining documents from all buffers, because they were added during the transaction |
157
|
0
|
|
|
|
|
|
for my $bag_name(@{ $self->_bags_used() }){ |
|
0
|
|
|
|
|
|
|
158
|
0
|
|
|
|
|
|
$self->bag($bag_name)->clear_buffer(); |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
#rollback in solr |
161
|
0
|
|
|
|
|
|
eval { $solr->rollback }; |
|
0
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
#remove mark 'in transaction' |
163
|
0
|
|
|
|
|
|
$self->{_tx} = 0; |
164
|
0
|
|
|
|
|
|
Catmandu::Error->throw($err); |
165
|
|
|
|
|
|
|
}; |
166
|
|
|
|
|
|
|
|
167
|
0
|
|
|
|
|
|
@res; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=head1 SOLR SCHEMA |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
The Solr schema needs to support at least the identifier field (C<_id> by default) and a bag |
173
|
|
|
|
|
|
|
field (C<_bag> by default) to be able to store Catmandu items: |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# In schema.xml |
176
|
|
|
|
|
|
|
<field name="_id" type="string" indexed="true" stored="true" required="true" /> |
177
|
|
|
|
|
|
|
<field name="_bag" type="string" indexed="true" stored="true" required="true" /> |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
The names of these fields can optionally be changed using the C<id_field> and C<_bag> |
180
|
|
|
|
|
|
|
configuration parameters of L<Catmandu::Store::Solr>. |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
The C<_id> will contain the record identifier. The C<_bag> field will contain a string |
183
|
|
|
|
|
|
|
to support L<Catmandu::Bag>-s in Solr. |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=head1 CONFIGURATION |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=over |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=item url |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
URL of Solr core |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
Default: C<http://localhost:8983/solr> |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=item id_field |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
Name of unique field in Solr core. |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
Default: C<_id> |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
This Solr field is mapped to C<_id> when retrieved |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=item bag_field |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
Name of field in Solr we can use to split the core into 'bags'. |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
Default: C<_bag> |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
This Solr field is mapped to C<_bag> when retrieved |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=item on_error |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
Action to take when records cannot be saved to Solr. Default: throw. Available: ignore. |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=back |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=head1 METHODS |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=head2 new( url => $url ) |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
=head2 new( url => $url, id_field => '_id', bag_field => '_bag' ) |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
=head2 new( url => $url, bags => { data => { cql_mapping => \%mapping } } ) |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
Creates a new Catmandu::Store::Solr store connected to a Solr core, specificied by $url. |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
The store supports CQL searches when a cql_mapping is provided. This hash |
228
|
|
|
|
|
|
|
contains a translation of CQL fields into Solr searchable fields. |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
# Example mapping |
231
|
|
|
|
|
|
|
$cql_mapping = { |
232
|
|
|
|
|
|
|
title => { |
233
|
|
|
|
|
|
|
op => { |
234
|
|
|
|
|
|
|
'any' => 1 , |
235
|
|
|
|
|
|
|
'all' => 1 , |
236
|
|
|
|
|
|
|
'=' => 1 , |
237
|
|
|
|
|
|
|
'<>' => 1 , |
238
|
|
|
|
|
|
|
'exact' => {field => 'mytitle.exact' } |
239
|
|
|
|
|
|
|
} , |
240
|
|
|
|
|
|
|
sort => 1, |
241
|
|
|
|
|
|
|
field => 'mytitle', |
242
|
|
|
|
|
|
|
cb => ['Biblio::Search', 'normalize_title'] |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
The CQL mapping above will support for the 'title' field the CQL operators: any, all, =, <> and exact. |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
For all the operators the 'title' field will be mapping into the Solr field 'mytitle', except |
249
|
|
|
|
|
|
|
for the 'exact' operator. In case of 'exact' we will search the field 'mytitle.exact'. |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
The CQL has an optional callback field 'cb' which contains a reference to subroutines to rewrite or |
252
|
|
|
|
|
|
|
augment the search query. In this case, in the Biblio::Search package there is a normalize_title |
253
|
|
|
|
|
|
|
subroutine which returns a string or an ARRAY of string with augmented title(s). E.g. |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
package Biblio::Search; |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
sub normalize_title { |
258
|
|
|
|
|
|
|
my ($self,$title) = @_; |
259
|
|
|
|
|
|
|
my $new_title =~ s{[^A-Z0-9]+}{}g; |
260
|
|
|
|
|
|
|
$new_title; |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
1; |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
=head2 transaction |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
When you issue $bag->commit, all changes made in the buffer are sent to solr, along with a commit. |
268
|
|
|
|
|
|
|
So committing in Catmandu merely means flushing changes;-). |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
When you wrap your subroutine within 'transaction', this behaviour is disabled temporarily. |
271
|
|
|
|
|
|
|
When you call 'die' within the subroutine, a rollback is sent to solr. |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
Remember that transactions happen at store level: after the transaction, all buffers of all bags are flushed to solr, |
274
|
|
|
|
|
|
|
and a commit is issued in solr. |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
# Record 'test' added |
277
|
|
|
|
|
|
|
$bag->add({ _id => "test" }); |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
# Buffer flushed, and 'commit' sent to solr |
280
|
|
|
|
|
|
|
$bag->commit(); |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
$bag->store->transaction(sub{ |
283
|
|
|
|
|
|
|
$bag->add({ _id => "test",title => "test" }); |
284
|
|
|
|
|
|
|
# Call to die: rollback sent to solr |
285
|
|
|
|
|
|
|
die("oops, didn't want to do that!"); |
286
|
|
|
|
|
|
|
}); |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
# Record is still { _id => "test" } |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=head1 SEE ALSO |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
L<Catmandu::Store>, L<WebService::Solr> |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
=head1 AUTHOR |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
Nicolas Steenlant, C<< nicolas.steenlant at ugent.be >> |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
Patrick Hochstenbach, C<< patrick.hochstenbach at ugent.be >> |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
Nicolas Franck, C<< nicolas.franck at ugent.be >> |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
Pieter De Praetere |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
307
|
|
|
|
|
|
|
under the terms of either: the GNU General Public License as published |
308
|
|
|
|
|
|
|
by the Free Software Foundation; or the Artistic License. |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
See http://dev.perl.org/licenses/ for more information. |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
=cut |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
1; |