File Coverage

lib/Metadata/ByInode.pm
Criterion Covered Total %
statement 121 132 91.6
branch 39 72 54.1
condition 4 12 33.3
subroutine 18 19 94.7
pod 5 6 83.3
total 187 241 77.5


line stmt bran cond sub pod time code
1             package Metadata::ByInode;
2 2     2   84860 use strict;
  2         6  
  2         68  
3 2     2   11 use warnings;
  2         4  
  2         106  
4 2     2   13 use Carp;
  2         7  
  2         1313  
5 2     2   6219 use DBI;
  2         51216  
  2         150  
6 2     2   66 use Cwd;
  2         4  
  2         151  
7 2     2   12 use base 'Metadata::ByInode::Search';
  2         3  
  2         1703  
8 2     2   12 use base 'Metadata::ByInode::Indexer';
  2         12  
  2         936  
9              
10              
11             #our @ISA = qw(Metadata::ByInode::Search Metadata::ByInode::Indexer);
12             our $VERSION = sprintf "%d.%02d", q$Revision: 1.17 $ =~ /(\d+)/g;
13             my $DEBUG = 0;
14 6     6 0 47 sub DEBUG : lvalue { $DEBUG }
15              
16              
17             sub new {
18 2     2 1 5050 my ($class,$self)= (shift,shift);
19 2   50     25 $self||={};
20              
21 2 50 33     42 $self->{abs_dbfile} or $self->{dbh} or croak('no (abs_dbfile )arg or open (dbh) arg passed to constructor');
22            
23 2         13 bless $self, $class;
24              
25 2         91 return $self;
26             }
27              
28             =pod
29              
30             =head1 NAME
31              
32             Metadata::ByInode - Extend metadata in relation to file's inode using a database.
33              
34              
35             =head1 SYNOPSIS
36              
37             use Metadata::ByInode;
38            
39             my $mbi = new Metadata::ByInode({ abs_dbfile => '/home/myself/mbi.db' });
40            
41             # index files for quick lookup
42             $mbi->index('/home/myself/photos/family');
43              
44             # lookup a file by filename and location
45             my $results =
46             $mbi->search({
47             abs_loc => '/home/myself/photos/family',
48             filename => 'ralph'
49             });
50              
51             =head1 DESCRIPTION
52              
53             This is primarily meant to be support for an indexer.
54             Ideally, this will look at a slice of the filesystem, make some deductions with
55             the indexer, and save that info.
56             You can use this module bare bones to set and get data on any files in the system.
57              
58             The indexer is a module that inherits this one.
59              
60             =head1 SEE ALSO
61              
62             L
63              
64             =head1 METHODS
65              
66             =head2 new()
67              
68             Arguments are:
69              
70             =over 4
71              
72             =item dbh
73              
74             (optional) existing database handle, otherwise DBD::Sqlite is used
75              
76             =item abs_dbfile
77              
78             (optional, required if you don't pass an open dbh) absoute path to sqlite file, will be created if not found.
79              
80             =back
81              
82             Example usage:
83            
84             my $mbi = new Metadata::ByInode;
85            
86             my $mbi = new Metadata::ByInode({
87             abs_dbfile => '/home/myself/mystuff.db'
88             });
89              
90              
91             =head1 NOTE ON dbh
92              
93             If you do not pass a dbh, the dbh is opened using DBI::SQLite at abs_path argument.
94             It will take care of commit and disconnect for you.
95              
96             If you *do* pass it a dbh, we do not automatically commit and disconnect on DESTROY.
97             It is up to you what to do with it, if you set autocommit or need to commit later.
98              
99             _finish_open_handles()
100              
101             Will search the prepared handles we opened and finish them and commit.
102             It returns the number of prepared handles closed.
103              
104              
105             =cut
106            
107              
108              
109              
110              
111             sub _reset_db {
112 0     0   0 my $self = shift;
113 0 0       0 print STDERR __PACKAGE__."::_reset_db() called\n" if DEBUG;
114            
115 0 0       0 unless( $self->dbh->do('DROP TABLE metadata') ) {
116 0         0 my $err =$DBI::errstr;
117 0         0 die("cannot setup db, is DBD::SQLite installed? $! - ".$DBI::esstr);
118             }
119              
120              
121 0 0       0 $self->_setup_db or return 0;
122              
123 0 0       0 print STDERR __PACKAGE__."::_reset_db() done\n" if DEBUG;
124            
125 0         0 return 1;
126             }
127              
128             sub _setup_db {
129 2     2   5 my $self = shift;
130            
131 2 50       13 print STDERR __PACKAGE__."::_setup_db() called\n" if DEBUG;
132            
133 2         6 my $b = qq|CREATE TABLE IF NOT EXISTS metadata(
134             inode INTEGER(10) NOT NULL,
135             mkey VARCHAR(50) NOT NULL,
136             mvalue TEXT,
137             PRIMARY KEY (inode,mkey)
138             )|;
139              
140 2 50       20 unless( $self->dbh->do($b) ) {
141 0         0 my $err =$DBI::errstr;
142 0         0 die("cannot setup db, is DBD::SQLite installed? $! - ".$DBI::esstr);
143             }
144              
145             # must commit here to prevent error that when you search before you index, it fucks up
146              
147 2         2205 $self->dbh->commit;
148              
149 2 50       24 print STDERR __PACKAGE__."::_setup_db() done\n" if DEBUG;
150              
151 2         13 return 1;
152             }
153             =pod
154              
155             =head1 _setup_db()
156              
157             automatically called if using sqlite on a non existent file, and we just created it.
158             The table is :
159              
160             CREATE TABLE IF NOT EXISTS metadata (
161             inode INTEGER(10) NOT NULL,
162             mkey VARCHAR(50) NOT NULL,
163             mvalue TEXT,
164             PRIMARY KEY (inode,mkey)
165             );
166              
167             in previous version, mkey was 'key', but this caused problems in mysql
168              
169              
170             =head1 _reset_db()
171              
172             will reset the table, drop and recreate metadata table.
173              
174             =cut
175              
176              
177              
178              
179              
180              
181              
182             sub dbh {
183 37     37 1 777 my $self = shift;
184              
185            
186 37 100       459 unless( defined $self->{dbh} ){
187 2 50       9 print STDERR __PACKAGE__."::dbh() was not defined.. will set up for sqlite..\n" if DEBUG;
188            
189 2 50       12 $self->{abs_dbfile} or croak(
190             "need open database handle (dbh) or absolute path to sqlite databse file (abs_dbfile) "
191             ."as construcctor argument to Metadata::ByInode");
192              
193 2         4 my $isnew=0;
194 2 50       57 unless(-f $self->{abs_dbfile}){
195 2         5 $isnew=1;
196             }
197            
198             # attempt to open sqlite db file
199 2 50       120 if( $self->{dbh}= DBI->connect(
200             "dbi:SQLite:".$self->{abs_dbfile},'','',{RaiseError=>0, AutoCommit=>0})
201             ){
202 2         45723 $self->{_not_passed_as_argument} = 1;
203             }
204              
205             else {
206 0         0 croak("ERR: [$!], could not connect db[".$self->{abs_dbfile}."] -[$DBI::errstr]-");
207             }
208            
209             # if it didn't exist before, set up the metadata table.
210 2 50       35 if ($isnew) {
211 2         11 $self->_setup_db;
212             }
213             }
214 37         5117900 return $self->{dbh};
215             }
216              
217             =pod
218              
219             =head1 dbh()
220              
221             Returns open db handle. If you did not pass an open database handle to the constructor, it expects that you did pass an absolute path to where
222             you want an sqlite database file read. If it does not exist, it will be made and setup.
223              
224             =head1 GET AND SET METHODS
225              
226             There is distinguising difference between the get() and the set() methods.
227             The get() methods simply query the database. You can get metadata for a file that is
228             no longer on disk.
229              
230             The set() methods however, do NOT let you set metadata for a file that is not on disk.
231             This is on purpose. So if you use this for some kind of logging, you can get history.
232              
233             Again:
234              
235             You can get() metadata for files no longer on disk.
236             You can NOT set() metadata for files not on disk.
237              
238             If you are using the default indexer in this distribution, files no longer on disk
239             are automatically take out of the metadata database if they are not there any more.
240              
241             =cut
242              
243             sub set {
244             ### set called
245 29     29 1 35 my $self = shift;
246 29 50       37 my $arg = shift; $arg or confess('missing abs path or inode argument to set()');
  29         62  
247 29         30 my $hash = shift;
248            
249 29         52 my $inode = _get_inode($arg);
250            
251             # init replace query
252 29 100       99 unless( defined $self->{_open_handle}->{replace} ){
253            
254 2         7 $self->{_open_handle}->{replace} =
255             $self->dbh->prepare('REPLACE INTO metadata (inode,mkey,mvalue) VALUES(?,?,?)');
256             #$self->dbh->prepare('INSERT INTO metadata (inode,mkey,mvalue) VALUES(?,?,?)');
257            
258             }
259            
260 29         313 for (keys %{$hash}){
  29         127  
261 87 50       26039 $self->{_open_handle}->{replace}->execute($inode,$_,$hash->{$_}) or confess($DBI::errstr);
262             }
263            
264              
265 29         102 return 1;
266             }
267             =pod
268              
269             =head2 set()
270              
271             Sets meta for a file. First argument is abs_path or inode. Second argument is hash ref.
272            
273             $idx->set('/path/to/what',{ client => 'joe' });
274             $idx->set(1235,{ client => 'hey', size => 'medium' });
275            
276             =cut
277              
278              
279              
280             sub get {
281 6     6 1 11 my $self = shift;
282 6 50       8 my $arg = shift; $arg or croak('must provide inode or abs path arg');
  6         14  
283 6 50       8 my $key = shift; $key or croak('get() missing key argument');
  6         13  
284              
285             ### get called
286             ### $arg
287             ### $key
288            
289 6         12 my $inode = $self->_search_inode($arg); # should be a search to the db only
290             ### $inode
291 6 50       24 $inode or return;
292              
293            
294 6 100       18 unless( defined $self->{_open_handle}->{select_by_key} ){
295 1         8 $self->{_open_handle}->{select_by_key} =
296             $self->dbh->prepare('SELECT mvalue FROM metadata WHERE inode=? AND mkey=?');
297             }
298            
299 6         141 $self->{_open_handle}->{select_by_key}->execute($inode, $key);
300 6         38 my $value = ( $self->{_open_handle}->{select_by_key}->fetch )->[0];
301            
302             ### $value
303 6 50       17 defined $value or return; # could be 0
304              
305 6         33 return $value;
306              
307             }
308             =pod
309              
310             =head2 get()
311              
312             First argument is inode number, or absolute path to file.
313              
314             If no metadata *is* found, returns undef.
315              
316             $mbi->get('/path/to/file','description');
317             $mbi->get(1235,'description');
318              
319             If value is 0, returns 0
320              
321             =cut
322              
323             sub get_all {
324 8     8 1 15 my $self = shift;
325 8 50       11 my $inode = shift; $inode or croak('missing inode argument to get_all()');
  8         21  
326 8 50       25 $inode = $self->_search_inode($inode) or return;
327              
328             # init select query
329 8 100       29 unless( defined $self->{_open_handle}->{select_all} ){
330 2         7 $self->{_open_handle}->{select_all} =
331             $self->dbh->prepare('SELECT mkey,mvalue FROM metadata WHERE inode = ?');
332             }
333            
334              
335 8         290 $self->{_open_handle}->{select_all}->execute($inode);
336              
337            
338 8         18 my $meta = {};
339 8         72 while( my $row = $self->{_open_handle}->{select_all}->fetch ){
340 24         211 $meta->{ $row->[0] } = $row->[1];
341             }
342            
343 8 50       14 scalar ( keys %{$meta} ) or return;
  8         29  
344              
345             # create pseudo abs_path?
346 8         34 $meta->{abs_path} = $meta->{abs_loc}.'/'.$meta->{filename};
347            
348 8         30 return $meta;
349             }
350             =pod
351              
352             =head2 get_all()
353              
354             Returns hash with all metadata for one file.
355             First argument is abs_path or inode.
356              
357             my $meta = $idx->get_all('/path/to/this');
358              
359             my $meta = $idx->get_all(1245);
360              
361             Please note: get() methods do NOT check for file existence, they just query the database for
362             information.
363              
364             =cut
365             # TODO: REFINE THIS
366             =head2 NOTE ABOUT get() AND set()
367              
368             get() methods do NOT test for file existence on disk!
369             They just try to fetch the data from the database.
370              
371             however, if you use a set() method and you file definition is not inode, that is,
372             if you try to set() metadata and you specify an absolute path, then we DO test for
373             file existence.
374              
375             You cannot set() metadata for files that are not on disk
376              
377             You *can* query for metadata for files that are NOT on disk.
378              
379             =head1 INTERNAL METHODS
380              
381             =cut
382              
383             sub _search_inode {
384             #### _search_inode called
385 15     15   24 my $self = shift;
386            
387 15         18 my $arg = shift;
388             #### $arg
389 15 50       30 $arg or croak('_search_inode() missing argument');
390            
391 15 100       72 if ($arg=~/^\d+$/){
392             #### digits, assumed to be inode, will return it without lookup
393 10         33 return $arg;
394             }
395              
396 5         173 my $abs_path = Cwd::abs_path($arg);
397              
398 5 50       31 $abs_path=~/^(\/.+)\/([^\/]+)$/ or croak("arg is not filepath");
399 5         19 my ($abs_loc,$filename)=($1,$2);
400              
401             #### $abs_loc
402             #### $filename
403              
404 5 100       20 unless( defined $self->{_open_handle}->{f} ){
405 1         3 $self->{_open_handle}->{f} =
406             $self->dbh->prepare(q{
407             SELECT inode FROM metadata WHERE mkey='abs_loc' AND mvalue=? and inode=
408             (SELECT inode FROM metadata WHERE mkey='filename' AND mvalue=?);
409             });
410              
411             }
412              
413 5         287 $self->{_open_handle}->{f}->execute($abs_loc,$filename);
414              
415 5         42 my $row = $self->{_open_handle}->{f}->fetch;
416             #### $row
417 5         8 my $inode = $row->[0];
418             #### $inode
419 5         17 return $inode;
420             }
421             =pod
422              
423             =head1 _search_inode()
424              
425             To get the inode from database.
426              
427             argument is absolute path.
428             will look up in the database to see if we can resolve to an inode.
429              
430             If the path provided does not match up with our entries, returns undef.
431             This would mean no metadata matches this path.
432              
433             If argument provided is all digits, assumes this *is* an inode and returns it.
434              
435             Croaks if its not ann inode or we cant split argument into an absolute path and filename.
436             =cut
437              
438             sub _get_inode {
439 30 50   30   32 my $arg = shift; $arg or croak('_get_inode() missing argument');
  30         55  
440            
441 30 100       141 if ($arg!~/^\d+$/){
442 1         41 my $abs_path = Cwd::abs_path($arg);
443 1 50 0     22 my @s = stat $abs_path or warn("$! - File not on disk? cant stat normalized:[$abs_path]") and return;
444             # TODO: if no stat, then we should change the time metadata that this file does no longer exist
445             # furthermore, should we look up the inode in the database first?
446 1         3 $arg = $s[1];
447             }
448            
449 30         984 return $arg;
450             }
451             =pod
452              
453             =head1 _get_inode()
454              
455             To get the inode from disk.
456              
457             Takes argument and tries to return inode. Argument can be absolute file path.
458             If argument is an inode, returns same value.
459             If argument is word chars, tries to stat for inode.
460             Returns undef if absolute path not on disk.
461              
462             =head1 DESTROY() METHODS
463              
464             The destructor will close open db handles, and commit changes.
465             If the dbh was passed to the constructor, this will not happen
466             and it is up to you to deal with your database settings (autocommit
467             etc).
468              
469             =cut
470              
471              
472             sub _finish_open_handles {
473 2     2   6 my $self = shift;
474              
475 2   50     26 $self->{_commit} ||= 0;
476              
477 2         5 for ( keys %{$self->{_open_handle}} ){
  2         13  
478 6         8 my $handle = $_;
479             ### $handle
480 6 50       20 if (defined $self->{_open_handle}->{$handle}){
481 6         34 $self->{_open_handle}->{$handle}->finish;
482 6         14 $self->{_commit}++;
483             }
484             }
485 2         12 return $self->{_commit};
486             }
487              
488             sub DESTROY {
489 2     2   6 my $self = shift;
490              
491             # we only do these when the db was opened from this object. Otherwise it's their business.
492            
493 2 50 33     44 if ( defined $self->{dbh} and defined $self->{_not_passed_as_argument} ){
494             # TODO : what if they still want the handle!!!!!?????
495             # if the dbhandle was created here, then close it. otherwise, nothing.
496             # seems like a compromise.
497 2 50       11 if ( $self->_finish_open_handles ){
498 2         8 $self->dbh->commit;
499            
500             }
501            
502             # get rid of annoying warning
503 2         137 open (STDERR,">>/dev/null");
504              
505 2         18 $self->dbh->disconnect; # TODO : warns that 'closing dbh with active statement handles at lib/Metadata/ByInode.pm'
506             # WHY does it warn???
507 2         6389 close STDERR;
508             }
509              
510              
511             }
512              
513             1;
514              
515             =pod
516              
517             =head1 CAVEATS
518              
519             All paths are resolved for symlinks, NOTE!
520              
521             =head1 PROS AND CONS
522              
523             =head2 PROS
524              
525             Inode is very stable in a unix filesystem.
526              
527             If the file is moved within the filesystem(within the same partition), the inode does not change.
528             If you overrite the file with a copy command, the target file's inode does not change.
529             If you rename the file, the inode does not change.
530              
531             If you are indexing large ammounts of data, you can backup, and if you restore via copy, the inode does not change.
532              
533             =head2 CONS
534              
535             If you move the file to another filesystem (to another disk, to another partition) the inode of the file changes.
536              
537             =head1 BUGS
538              
539             Please contact AUTHOR.
540              
541             =head1 AUTHOR
542              
543             Leo Charre
544              
545             =cut