File Coverage

blib/lib/Catmandu/Store/Solr.pm
Criterion Covered Total %
statement 24 53 45.2
branch 0 4 0.0
condition n/a
subroutine 8 11 72.7
pod 1 1 100.0
total 33 69 47.8


line stmt bran cond sub pod time code
1             package Catmandu::Store::Solr;
2              
3 3     3   376669 use Catmandu::Sane;
  3         160518  
  3         19  
4 3     3   581 use Catmandu::Util qw(:is :array);
  3         6  
  3         853  
5 3     3   25 use Moo;
  3         7  
  3         15  
6 3     3   2101 use MooX::Aliases;
  3         5433  
  3         20  
7 3     3   2276 use WebService::Solr;
  3         470645  
  3         104  
8 3     3   1345 use Catmandu::Store::Solr::Bag;
  3         12  
  3         97  
9 3     3   23 use Catmandu::Error;
  3         5  
  3         72  
10 3     3   16 use LWP::UserAgent;
  3         5  
  3         2195  
11              
12             with 'Catmandu::Store';
13             with 'Catmandu::Transactional';
14              
15             =head1 NAME
16              
17             Catmandu::Store::Solr - A searchable store backed by Solr
18              
19             =cut
20              
21             our $VERSION = '0.0304';
22              
23             =head1 SYNOPSIS
24              
25             # From the command line
26              
27             # Import data into Solr
28             $ catmandu import JSON to Solr < data.json
29              
30             # Export data from ElasticSearch
31             $ catmandu export Solr to JSON > data.json
32              
33             # Export only one record
34             $ catmandu export Solr --id 1234
35              
36             # Export using an Solr query
37             $ catmandu export Solr --query "name:Recruitment OR name:college"
38              
39             # Export using a CQL query (needs a CQL mapping)
40             $ catmandu export Solr --q "name any college"
41              
42             # From Perl
43             use Catmandu::Store::Solr;
44              
45             my $store = Catmandu::Store::Solr->new(url => 'http://localhost:8983/solr' );
46              
47             my $obj1 = $store->bag->add({ name => 'Patrick' });
48              
49             printf "obj1 stored as %s\n" , $obj1->{_id};
50              
51             # Force an id in the store
52             my $obj2 = $store->bag->add({ _id => 'test123' , name => 'Nicolas' });
53              
54             # send all changes to solr (committed automatically)
55             $store->bag->commit;
56              
57             #transaction: rollback issued after 'die'
58             $store->transaction(sub{
59             $bag->delete_all();
60             die("oops, didn't want to do that!");
61             });
62              
63             my $obj3 = $store->bag->get('test123');
64              
65             $store->bag->delete('test123');
66              
67             $store->bag->delete_all;
68              
69             # All bags are iterators
70             $store->bag->each(sub { ... });
71             $store->bag->take(10)->each(sub { ... });
72              
73             # Search
74             # Any extra arguments will be passed on as is to Solr
75             my $hits = $store->bag->search(query => 'name:Patrick');
76              
77             =cut
78              
79             has url => (is => 'ro', default => sub {'http://localhost:8983/solr'});
80             has keep_alive => (is => 'ro', default => sub {0});
81             has solr => (is => 'lazy');
82             has bag_key => (is => 'lazy', alias => 'bag_field');
83             has on_error => (
84             is => 'ro',
85             isa => sub {
86             array_includes([qw(throw ignore)], $_[0])
87             or die("on_error must be 'throw' or 'ignore'");
88             },
89             lazy => 1,
90             default => sub {"throw"}
91             );
92             has _bags_used => (is => 'ro', lazy => 1, default => sub {[];});
93              
94             around 'bag' => sub {
95             my $orig = shift;
96             my $self = shift;
97              
98             my $bags_used = $self->_bags_used;
99             unless (array_includes($bags_used, $_[0])) {
100             push @$bags_used, $_[0];
101             }
102              
103             $orig->($self, @_);
104             };
105              
106             sub _build_solr {
107 0     0     my ($self) = @_;
108 0           WebService::Solr->new(
109             $_[0]->url,
110             {
111             autocommit => 0,
112             default_params => {wt => 'json'},
113             agent => LWP::UserAgent->new(keep_alive => $self->keep_alive),
114             }
115             );
116             }
117              
118             sub _build_bag_key {
119 0     0     $_[0]->key_for('bag');
120             }
121              
122             sub transaction {
123 0     0 1   my ($self, $sub) = @_;
124              
125 0 0         if ($self->{_tx}) {
126 0           return $sub->();
127             }
128 0           my $solr = $self->solr;
129 0           my @res;
130              
131             eval {
132             #flush buffers of all known bags ( with commit=true ), to ensure correct state
133 0           for my $bag_name (@{$self->_bags_used}) {
  0            
134 0           $self->bag($bag_name)->commit;
135             }
136              
137             #mark store as 'in transaction'. All subsequent calls to commit only flushes buffers without setting 'commit' to 'true' in solr
138 0           $self->{_tx} = 1;
139              
140             #transaction
141 0           @res = $sub->();
142              
143             #flushing buffers of all known bags (with commit=false)
144 0           for my $bag_name (@{$self->_bags_used}) {
  0            
145 0           $self->bag($bag_name)->commit;
146             }
147              
148             #commit in solr
149 0           $solr->commit;
150              
151             #remove mark 'in transaction'
152 0           $self->{_tx} = 0;
153 0           1;
154 0 0         } or do {
155 0           my $err = $@;
156              
157             #remove remaining documents from all buffers, because they were added during the transaction
158 0           for my $bag_name (@{$self->_bags_used}) {
  0            
159 0           $self->bag($bag_name)->clear_buffer;
160             }
161              
162             #rollback in solr
163 0           eval {$solr->rollback};
  0            
164              
165             #remove mark 'in transaction'
166 0           $self->{_tx} = 0;
167 0           Catmandu::Error->throw($err);
168             };
169              
170 0           @res;
171             }
172              
173             =head1 SOLR SCHEMA
174              
175             The Solr schema needs to support at least the identifier field (C<_id> by default) and a bag
176             field (C<_bag> by default) to be able to store Catmandu items:
177              
178             # In schema.xml
179             <field name="_id" type="string" indexed="true" stored="true" required="true" />
180             <field name="_bag" type="string" indexed="true" stored="true" required="true" />
181              
182             The names of these fields can optionally be changed using the C<id_field> and C<_bag>
183             configuration parameters of L<Catmandu::Store::Solr>.
184              
185             The C<_id> will contain the record identifier. The C<_bag> field will contain a string
186             to support L<Catmandu::Bag>-s in Solr.
187              
188             =head1 CONFIGURATION
189              
190             =over
191              
192             =item url
193              
194             URL of Solr core
195              
196             Default: C<http://localhost:8983/solr>
197              
198             =item id_field
199              
200             Name of unique field in Solr core.
201              
202             Default: C<_id>
203              
204             This Solr field is mapped to C<_id> when retrieved
205              
206             =item bag_field
207              
208             Name of field in Solr we can use to split the core into 'bags'.
209              
210             Default: C<_bag>
211              
212             This Solr field is mapped to C<_bag> when retrieved
213              
214             =item on_error
215              
216             Action to take when records cannot be saved to Solr. Default: throw. Available: ignore.
217              
218             =back
219              
220             =head1 METHODS
221              
222             =head2 new( url => $url )
223              
224             =head2 new( url => $url, id_field => '_id', bag_field => '_bag' )
225              
226             =head2 new( url => $url, bags => { data => { cql_mapping => \%mapping } } )
227              
228             Creates a new Catmandu::Store::Solr store connected to a Solr core, specificied by $url.
229              
230             The store supports CQL searches when a cql_mapping is provided. This hash
231             contains a translation of CQL fields into Solr searchable fields.
232              
233             # Example mapping
234             $cql_mapping = {
235             title => {
236             op => {
237             'any' => 1 ,
238             'all' => 1 ,
239             '=' => 1 ,
240             '<>' => 1 ,
241             'exact' => {field => 'mytitle.exact' }
242             } ,
243             sort => 1,
244             field => 'mytitle',
245             cb => ['Biblio::Search', 'normalize_title']
246             }
247             }
248              
249             The CQL mapping above will support for the 'title' field the CQL operators: any, all, =, <> and exact.
250              
251             For all the operators the 'title' field will be mapping into the Solr field 'mytitle', except
252             for the 'exact' operator. In case of 'exact' we will search the field 'mytitle.exact'.
253              
254             The CQL has an optional callback field 'cb' which contains a reference to subroutines to rewrite or
255             augment the search query. In this case, in the Biblio::Search package there is a normalize_title
256             subroutine which returns a string or an ARRAY of string with augmented title(s). E.g.
257              
258             package Biblio::Search;
259              
260             sub normalize_title {
261             my ($self,$title) = @_;
262             my $new_title =~ s{[^A-Z0-9]+}{}g;
263             $new_title;
264             }
265              
266             1;
267              
268             =head2 transaction
269              
270             When you issue $bag->commit, all changes made in the buffer are sent to solr, along with a commit.
271             So committing in Catmandu merely means flushing changes;-).
272              
273             When you wrap your subroutine within 'transaction', this behaviour is disabled temporarily.
274             When you call 'die' within the subroutine, a rollback is sent to solr.
275              
276             Remember that transactions happen at store level: after the transaction, all buffers of all bags are flushed to solr,
277             and a commit is issued in solr.
278              
279             # Record 'test' added
280             $bag->add({ _id => "test" });
281              
282             # Buffer flushed, and 'commit' sent to solr
283             $bag->commit();
284              
285             $bag->store->transaction(sub{
286             $bag->add({ _id => "test",title => "test" });
287             # Call to die: rollback sent to solr
288             die("oops, didn't want to do that!");
289             });
290              
291             # Record is still { _id => "test" }
292              
293             =head1 INHERITED METHODS
294              
295             This Catmandu::Store implements:
296              
297             =over 3
298              
299             =item L<Catmandu::Store>
300              
301             =item L<Catmandu::Transactional>
302              
303             =back
304              
305             Each Catmandu::Bag in this Catmandu::Store implements:
306              
307             =over 3
308              
309             =item L<Catmandu::Bag>
310              
311             =item L<Catmandu::Searchable>
312              
313             =item L<Catmandu::CQLSearchable>
314              
315             =back
316              
317             =head1 SEE ALSO
318              
319             L<Catmandu::Store>, L<WebService::Solr>
320              
321             =head1 AUTHOR
322              
323             Nicolas Steenlant, C<< nicolas.steenlant at ugent.be >>
324              
325             Patrick Hochstenbach, C<< patrick.hochstenbach at ugent.be >>
326              
327             Nicolas Franck, C<< nicolas.franck at ugent.be >>
328              
329             Pieter De Praetere
330              
331             =head1 LICENSE AND COPYRIGHT
332              
333             This program is free software; you can redistribute it and/or modify it
334             under the terms of either: the GNU General Public License as published
335             by the Free Software Foundation; or the Artistic License.
336              
337             See http://dev.perl.org/licenses/ for more information.
338              
339             =cut
340              
341             1;