File Coverage

blib/lib/Plucene/SearchEngine/Index/File.pm
Criterion Covered Total %
statement 37 41 90.2
branch 4 8 50.0
condition n/a
subroutine 9 9 100.0
pod 0 1 0.0
total 50 59 84.7


line stmt bran cond sub pod time code
1             package Plucene::SearchEngine::Index::File;
2 1     1   1088 use strict;
  1         3  
  1         39  
3 1     1   7 use base "Plucene::SearchEngine::Index::Base";
  1         1  
  1         109  
4 1     1   5 use Carp;
  1         2  
  1         60  
5 1     1   1183 use File::MMagic;
  1         10012  
  1         20  
6 1     1   50 use File::Spec::Functions qw(rel2abs);
  1         2  
  1         80  
7 1     1   6 use File::Basename;
  1         3  
  1         108  
8 1     1   7 use Time::Piece;
  1         2  
  1         12  
9 1     1   1093 use File::stat;
  1         4228  
  1         5  
10             my $magic = File::MMagic->new();
11              
12             =head1 NAME
13              
14             Plucene::SearchEngine::Index::File - File reader for filesystem files
15              
16             =head1 DESCRIPTION
17              
18             This frontend module takes a filesystem file, extracts its metadata and
19             passes the file onto a backend. The frontend registers the following
20             Plucene fields:
21              
22             =over 3
23              
24             =item mimetype
25              
26             The MIME type of the file.
27              
28             =item filename
29              
30             The basename of the file's filename.
31              
32             =item id
33              
34             The URL of the file (C)
35              
36             =item modified
37              
38             A Plucene date field representing the last modified date of the file
39              
40             =back
41              
42             =head2 METHODS
43              
44             Plucene::SearchEngine::Index::File->examine($filename [, $encoding])
45              
46             This examines a file on the filesystem for the above metadata, before
47             handling it to a backend. If an encoding is given, the text will be
48             flagged as originally being that encoding, and then converted to UTF-8.
49              
50             =cut
51              
52             sub examine {
53 1     1 0 534 my ($class, $filename, $encoding) = @_;
54 1 50       24 return unless -r $filename;
55 1         9 my $mime = $magic->checktype_filename($filename);
56 1         20523 my $self = $class->handler_for($filename, $mime)->new();
57 1         6 $self->add_data("mimetype", "Text", $mime);
58 1         53 $self->add_data("filename", "Text", basename($filename));
59 1         11 $self->add_data("id", "Keyword", "file://".rel2abs($filename));
60 1         6 $self->add_data("modified", "Date", Time::Piece->new(stat($filename)->mtime));
61 1 50       15 if ($encoding) { $self->add_data("encoding", "Text", $encoding); }
  0         0  
62 1         7 my @docs = $self->gather_data_from_file($filename);
63 1 50       5 if (wantarray) { if (@docs > 1) { return @docs } else { return $self } }
  1 50       5  
  0         0  
  1         6  
64             else {
65 0           carp "Using ->examine in scalar context is deprecated";
66 0           return $self;
67             }
68             }
69              
70             1;