File Coverage

blib/lib/Lingua/Thesaurus/Storage/SQLite.pm
Criterion Covered Total %
statement 140 142 98.5
branch 36 48 75.0
condition 6 9 66.6
subroutine 19 20 95.0
pod 2 12 16.6
total 203 231 87.8


line stmt bran cond sub pod time code
1             package Lingua::Thesaurus::Storage::SQLite;
2 7     7   4093 use 5.010;
  7         15  
3 7     7   23 use Moose;
  7         8  
  7         52  
4             with 'Lingua::Thesaurus::Storage';
5              
6              
7 7     7   42499 use DBI;
  7         81413  
  7         361  
8 7     7   45 use Module::Load ();
  7         11  
  7         121  
9 7     7   23 use Carp qw(croak);
  7         7  
  7         328  
10 7     7   27 use namespace::clean -except => 'meta';
  7         8  
  7         55  
11              
12             has 'dbname' => (is => 'ro', isa => 'Str',
13             documentation => "database file (or might be ':memory:)");
14              
15             has 'dbh' => (is => 'ro', isa => 'DBI::db',
16             lazy => 1, builder => '_dbh',
17             documentation => "database handle");
18              
19              
20             #======================================================================
21             # construction
22             #======================================================================
23              
24             around BUILDARGS => sub {
25             my $orig = shift;
26             my $class = shift;
27             if (@_ == 1 && !ref $_[0]) {
28             # one single scalar arg => interpreted as dbname
29             return $class->$orig(dbname => $_[0]);
30             }
31             else {
32             return $class->$orig(@_);
33             }
34             };
35              
36              
37             sub _dbh {
38 7     7   10 my ($self) = @_;
39              
40             # connect to the SQLite database
41 7 50       181 my $dbname = $self->dbname
42             or croak "storage has no file";
43              
44 7 50       88 my $dbh = DBI->connect("dbi:SQLite:dbname=$dbname", "","",
45             {AutoCommit => 1,
46             RaiseError => 1,
47             private_was_connected_by => __PACKAGE__})
48             or croak $DBI::errstr;
49              
50             # activate foreign key control
51 7         57153 $dbh->do('PRAGMA FOREIGN_KEYS = ON');
52              
53 7         472 return $dbh;
54             }
55              
56             sub _params {
57 4     4   6 my ($self) = @_;
58              
59             # retrieve key-values that were stored in table _params during initialize()
60 4         4 my %params;
61 4         100 my $sth = $self->dbh->prepare('SELECT key, value FROM params');
62 4         2020 $sth->execute;
63 4         48 while (my ($key, $value) = $sth->fetchrow_array) {
64 3         21 $params{$key} = $value;
65             }
66 4         203 return \%params;
67             }
68              
69              
70             #======================================================================
71             # methods for populating the database
72             #======================================================================
73              
74             sub do_transaction {
75 4     4 1 12 my ($self, $coderef) = @_;
76              
77             # poor man's transaction ... just for efficiency (don't care about rollback)
78 4         163 $self->dbh->begin_work;
79 4         111 $coderef->();
80 4         165 $self->dbh->commit;
81             }
82              
83             sub initialize {
84 3     3 0 5 my ($self) = @_;
85 3         79 my $dbh = $self->dbh;
86              
87             # check that the database is empty
88 3 50       25 !$dbh->tables(undef, undef, undef, 'TABLE')
89             or croak "can't initialize(): database is not empty";
90              
91             # params to be injected into the '_params' table
92 3 100       2875 my $params = $self->has_params ? $self->params : {};
93              
94             # default representation for the term table (regular table)
95 3         4 my $term_table = "TABLE term(docid INTEGER PRIMARY KEY AUTOINCREMENT,
96             content CHAR NOT NULL,
97             origin CHAR,
98             UNIQUE (content, origin))";
99              
100             # alternative representations for the term table : fulltext
101 3 100       9 if ($params->{use_fulltext}) {
102 2         22 DBD::SQLite->VERSION("1.54"); # because earlier versions have a bug
103             # in tokenizer suport
104 2         5 my $tokenizer = "";
105 2 100       6 if ($params->{use_unaccent}) {
106 1         436 require Search::Tokenizer;
107 1         15340 $tokenizer = ", tokenize=perl 'Search::Tokenizer::unaccent'";
108             # NOTE: currently, 'use_unaccent' may produce crashes in the END
109             # phase of the user process (bug in DBD::SQLite tokenizers). So
110             # 'use_unaccent' is not recommended in production.
111             }
112 2         6 $term_table = "VIRTUAL TABLE term USING fts4(content, origin $tokenizer)";
113             }
114              
115 3         19 $dbh->do(<<"");
116             CREATE $term_table;
117              
118 3         242178 $dbh->do(<<"");
119             CREATE TABLE rel_type (
120             rel_id CHAR PRIMARY KEY,
121             description CHAR,
122             is_external BOOL
123             );
124              
125             # foreign key control : can't be used with fulltext, because 'docid'
126             # is not a regular column that can be referenced
127 3 100       26023 my $ref_docid = $params->{use_fulltext} ? '' : 'REFERENCES term(docid)';
128              
129 3         55 $dbh->do(<<"");
130             CREATE TABLE relation (
131             lead_term_id INTEGER NOT NULL $ref_docid,
132             rel_id CHAR NOT NULL REFERENCES rel_type(rel_id),
133             rel_order INTEGER DEFAULT 1,
134             other_term_id INTEGER $ref_docid,
135             external_info CHAR
136             );
137              
138 3         39503 $dbh->do(<<"");
139             CREATE INDEX ix_lead_term ON relation(lead_term_id);
140              
141 3         36544 $dbh->do(<<"");
142             CREATE INDEX ix_other_term ON relation(other_term_id);
143              
144 3         38826 $dbh->do(<<"");
145             CREATE TABLE params(key CHAR, value CHAR);
146              
147             # store additional params into the '_params' table, so they can be
148             # retrieved by other processes that will use this thesaurus
149 3         41938 my $sth;
150 3         62 while (my ($key, $value) = each %$params) {
151 3   66     30 $sth //= $dbh->prepare('INSERT INTO params(key, value) VALUES (?, ?)');
152 3         14847 $sth->execute($key, $value);
153             }
154             }
155              
156              
157             sub store_term {
158 2231     2231 1 2500 my ($self, $term_string, $origin) = @_;
159              
160 2231         1771 my $sql = 'INSERT INTO term(content, origin) VALUES(?, ?)';
161 2231         58154 my $sth = $self->dbh->prepare($sql);
162 2231         111356 $sth->execute($term_string, $origin);
163 2231         175820 return $self->dbh->last_insert_id('', '', '', '');
164             }
165              
166              
167             sub store_rel_type {
168 30     30 0 82 my ($self, $rel_id, $description, $is_external) = @_;
169              
170 30         48 my $sql = 'INSERT INTO rel_type VALUES(?, ?, ?)';
171 30         1066 my $sth = $self->dbh->prepare($sql);
172 30         309793 $sth->execute($rel_id, $description, $is_external);
173             }
174              
175              
176             sub store_relation {
177 606     606 0 754 my ($self, $lead_term_id, $rel_id, $related, $is_external, $inverse_id) = @_;
178              
179             # make sure that $related is a list
180 606 50       1007 $related = [$related] unless ref $related;
181              
182             # prepare insertion statement
183 606         473 my $sql = 'INSERT INTO relation VALUES(?, ?, ?, ?, ?)';
184 606         16536 my $sth = $self->dbh->prepare($sql);
185              
186             # insertion loop
187 606         23593 my $count = 1;
188 606         950 foreach my $rel (@$related) {
189 1941 100       3163 my ($other_term_id, $ext_info) = $is_external ? (undef, $rel)
190             : ($rel, undef);
191              
192             # insert first relation
193 1941         22427 $sth->execute($lead_term_id, $rel_id, $count++, $other_term_id, $ext_info);
194              
195             # insert inverse relation, if any
196 1941 100       24931 $sth->execute($other_term_id, $inverse_id, 1, $lead_term_id, undef)
197             if $inverse_id;
198             }
199             }
200              
201              
202       0 0   sub finalize {
203             # nothing to do -- db file is stored automatically by DBD::SQLite
204             }
205              
206             #======================================================================
207             # retrieval methods
208             #======================================================================
209              
210              
211             sub search_terms {
212 10     10 0 327 my ($self, $pattern, $origin) = @_;
213              
214             # retrieve terms data from database
215 10         20 my ($sql, @bind) = ('SELECT docid, content, origin FROM term');
216 10 50       19 if ($pattern) {
217 10 100       262 if ($self->params->{use_fulltext}) {
218              
219             # make sure that Search::Tokenizer is loaded so that SQLite can call
220             # the 'unaccent' tokenizer
221 6 100       144 require Search::Tokenizer if $self->params->{use_unaccent};
222              
223 6         15096 $sql .= " WHERE content MATCH ?";
224              
225             # SQLITE's fulltext engine doesn't like unbalanced parenthesis
226             # in a MATCH term. Besides, it replaces parenthesis by white
227             # space, which results in OR-ing the terms. So what we do is
228             # explicitly replace parenthesis by white space, and wrap the
229             # whole thing in a phrase query, to get more precise answers.
230 6         13 my $n_paren = $pattern =~ tr/()/ /;
231 6 100 66     36 $pattern = qq{"$pattern"} if $n_paren and $pattern !~ /"/;
232             }
233             else {
234 4         9 $sql .= " WHERE content LIKE ?";
235 4         9 $pattern =~ tr/*/%/;
236 4         6 $pattern =~ tr/?/_/;
237             };
238 10         18 @bind = ($pattern);
239             }
240 10 100       28 if (defined $origin) {
241 1 50       4 $sql .= ($pattern ? ' AND ' : ' WHERE ') . 'origin = ?';
242 1         2 push @bind, $origin;
243             }
244 10         300 my $sth = $self->dbh->prepare($sql);
245 10         202063 $sth->execute(@bind);
246 10         1619 my $rows = $sth->fetchall_arrayref;
247              
248             # build term objects
249 10         344 my $term_class = $self->term_class;
250 10         19 return map {$term_class->new(storage => $self,
  296         9031  
251             id => $_->[0],
252             string => $_->[1],
253             origin => $_->[2])} @$rows;
254             }
255              
256             sub fetch_term {
257 5     5 0 74 my ($self, $term_string, $origin) = @_;
258              
259             # retrieve term data from database
260 5         6 my $sql = 'SELECT docid, content, origin FROM term WHERE content = ?';
261 5         8 my @bind = ($term_string);
262 5 100       11 if (defined $origin) {
263 2         4 $sql .= ' AND origin = ?';
264 2         2 push @bind, $origin;
265             }
266 5         133 my $sth = $self->dbh->prepare($sql);
267 5         656 $sth->execute(@bind);
268 5 50       70 (my $id, $term_string, $origin) = $sth->fetchrow_array
269             or return;
270              
271             # build term object
272 5         166 return $self->term_class->new(storage => $self,
273             id => $id,
274             string => $term_string,
275             origin => $origin);
276             }
277              
278              
279             sub fetch_term_id {
280 10     10 0 12 my ($self, $id, $origin) = @_;
281              
282             # retrieve term data from database
283 10         9 my $sql = 'SELECT content, origin FROM term WHERE docid = ?';
284 10         12 my @bind = ($id);
285 10 50       16 if (defined $origin) {
286 0         0 $sql .= ' AND origin = ?';
287 0         0 push @bind, $origin;
288             }
289 10         246 my $sth = $self->dbh->prepare($sql);
290 10         639 $sth->execute(@bind);
291 10 50       82 (my $term_string, $origin) = $sth->fetchrow_array
292             or return;
293              
294             # build term object
295 10         311 return $self->term_class->new(storage => $self,
296             id => $id,
297             string => $term_string,
298             origin => $origin);
299             }
300              
301              
302             sub related {
303 5     5 0 6 my ($self, $term_id, $rel_ids) = @_;
304              
305             # construct the SQL request
306 5         8 my $sql = 'SELECT rel_id, other_term_id, external_info FROM relation '
307             . 'WHERE lead_term_id = ?';
308 5         7 my @bind = ($term_id);
309 5 50       9 if ($rel_ids) {
310             # optional restriction on one or several relation ids
311 5 50       19 $rel_ids = [$rel_ids] unless ref $rel_ids;
312 5         12 my $placeholders = join ", ", ('?') x @$rel_ids;
313 5         8 push @bind, @$rel_ids;
314 5         8 $sql .= " AND rel_id IN ($placeholders)";
315             }
316 5         7 $sql .= " ORDER BY rel_id, rel_order";
317              
318             # query database
319 5         129 my $sth = $self->dbh->prepare($sql);
320 5         737 $sth->execute(@bind);
321              
322             # build array of results
323 5         8 my @results;
324             my %rel_types;
325 5         47 while (my ($rel_id, $other_term_id, $external_info) = $sth->fetchrow_array) {
326 14   66     41 my $rel_type = $rel_types{$rel_id} //= $self->fetch_rel_type($rel_id);
327 14 100       4862 my $related
328             = $rel_type->is_external ? $external_info
329             : $self->fetch_term_id($other_term_id);
330 14         127 push @results, [$rel_type, $related];
331             }
332              
333 5         51 return @results;
334             }
335              
336              
337             sub rel_types {
338 4     4 0 5 my ($self) = @_;
339 4         11 my $sql = 'SELECT rel_id FROM rel_type';
340 4         108 my $rel_types = $self->dbh->selectcol_arrayref($sql);
341 4         652 return @$rel_types;
342             }
343              
344              
345              
346             sub fetch_rel_type {
347 5     5 0 7 my ($self, $rel_id) = @_;
348              
349             # retrieve rel_type data from database
350 5         5 my $sql = 'SELECT * FROM rel_type WHERE rel_id = ?';
351 5         159 my $sth = $self->dbh->prepare($sql);
352 5         345 $sth->execute($rel_id);
353 5 50       106 my $data = $sth->fetchrow_hashref
354             or return;
355              
356             # build RelType object
357 5         20 return $self->_relType_class->new(%$data);
358             }
359              
360              
361              
362              
363             1; # End of Lingua::Thesaurus::Storage::SQLite
364              
365             __END__
366              
367             =encoding ISO8859-1
368              
369             =head1 NAME
370              
371             Lingua::Thesaurus::Storage::SQLite - Thesaurus storage in an SQLite database
372              
373             =head1 DESCRIPTION
374              
375             This class implements the L<Lingua::Thesaurus::Storage> role,
376             by storing thesaurus data in a L<DBD::SQLite> database.
377              
378              
379             =head1 METHODS
380              
381             =head2 new
382              
383             my $storage = Lingua::Thesaurus::Storage::SQLite->new($dbname);
384             my $storage = Lingua::Thesaurus::Storage::SQLite->new(%args);
385              
386             If C<new()> has only one scalar argument, this is interpreted
387             as C<< new(dbname => $arg) >>. Otherwise, parameters should be
388             passed as a hash or hashref, with the following options :
389              
390             =over
391              
392             =item dbname
393              
394             Filename for storing the L<DBD::SQLite> database.
395             This could also be C<:memory:> for an in-memory database.
396              
397             =item dbh
398              
399             Optional handle to an already connected database (in that
400             case, the C<dbname> parameter will not be used).
401              
402             =item params
403              
404             Hashref of key-value pairs that will be stored into the database,
405             and can be retrieved by other processes using the thesaurus.
406             This package interprets the following keys :
407              
408             =over
409              
410             =item use_fulltext
411              
412             If true, the C<term> table will use SQLite's fulltext functionalities.
413             This means that C<< $thesaurus->search_terms('sci*') >> will also
414             retrieve C<'computer science'>; you can also issue boolean
415             queries like C<< 'sci* AND NOT comp*' >>.
416              
417             If true, the C<term> table is just a regular SQLite table, and queries
418             will be interpreted through SQLite's C<'LIKE'> operator.
419              
420             =item use_unaccent
421              
422             This parameter only makes sense together with C<use_fulltext>.
423             It will activate L<Search::Tokenizer/unaccent>, so that a
424             query for C<thésaurus> will also find C<thesaurus>, or vice-versa.
425              
426             =item term_class
427              
428             Name of the class for instanciating terms.
429             Default is L<Lingua::Thesaurus::Term>.
430              
431             =item relType_class
432              
433             Name of the class for instanciating "relation types".
434             Default is L<Lingua::Thesaurus::RelType>.
435              
436             =back
437              
438             =back
439              
440             =head2 Retrieval methods
441              
442             See L<Lingua::Thesaurus::Storage/"Retrieval methods">
443              
444             =head2 Populating the database
445              
446             See L<Lingua::Thesaurus::Storage/"Populating the database"> for the API.
447              
448             Below are some particular notes about the SQLite implementation.
449              
450             =head3 do_transaction
451              
452             This method just performs C<begin_work> .. C<commit>, because
453             inserts into an SQLite database are much faster under a transaction.
454             No support for rollbacks is programmed, because in this context
455             there is no need for it.
456              
457             =head3 store_term
458              
459             If C<use_fulltext> is false, terms are stored in a regular table
460             with a UNIQUE constraint, so it is not possible to store the same
461             term string twice.
462              
463             If C<use_fulltext> is true, no constraint is enforced.
464              
465             =cut