| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package AI::Categorizer::Collection::Files; | 
| 2 | 7 |  |  | 7 |  | 7142 | use strict; | 
|  | 7 |  |  |  |  | 14 |  | 
|  | 7 |  |  |  |  | 237 |  | 
| 3 |  |  |  |  |  |  |  | 
| 4 | 7 |  |  | 7 |  | 40 | use AI::Categorizer::Collection; | 
|  | 7 |  |  |  |  | 13 |  | 
|  | 7 |  |  |  |  | 168 |  | 
| 5 | 7 |  |  | 7 |  | 73 | use base qw(AI::Categorizer::Collection); | 
|  | 7 |  |  |  |  | 12 |  | 
|  | 7 |  |  |  |  | 554 |  | 
| 6 |  |  |  |  |  |  |  | 
| 7 | 7 |  |  | 7 |  | 36 | use Params::Validate qw(:types); | 
|  | 7 |  |  |  |  | 11 |  | 
|  | 7 |  |  |  |  | 1216 |  | 
| 8 | 7 |  |  | 7 |  | 36 | use File::Spec; | 
|  | 7 |  |  |  |  | 12 |  | 
|  | 7 |  |  |  |  | 4628 |  | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | __PACKAGE__->valid_params | 
| 11 |  |  |  |  |  |  | ( | 
| 12 |  |  |  |  |  |  | path => { type => SCALAR|ARRAYREF }, | 
| 13 |  |  |  |  |  |  | recurse => { type => BOOLEAN, default => 0 }, | 
| 14 |  |  |  |  |  |  | ); | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | sub new { | 
| 17 | 1 |  |  | 1 | 1 | 232 | my $class = shift; | 
| 18 | 1 |  |  |  |  | 11 | my $self = $class->SUPER::new(@_); | 
| 19 |  |  |  |  |  |  |  | 
| 20 | 1 |  |  |  |  | 2 | $self->{dir_fh} = do {local *FH; *FH};  # double *FH avoids a warning | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 5 |  | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | # Documents are contained in a directory, or list of directories | 
| 23 | 1 | 50 |  |  |  | 6 | $self->{path} = [$self->{path}] unless ref $self->{path}; | 
| 24 | 1 |  |  |  |  | 3 | $self->{used} = []; | 
| 25 |  |  |  |  |  |  |  | 
| 26 | 1 |  |  |  |  | 5 | $self->_next_path; | 
| 27 | 1 |  |  |  |  | 4 | return $self; | 
| 28 |  |  |  |  |  |  | } | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | sub _next_path { | 
| 31 | 4 |  |  | 4 |  | 5 | my $self = shift; | 
| 32 | 4 | 100 |  |  |  | 44 | closedir $self->{dir_fh} if $self->{cur_dir}; | 
| 33 |  |  |  |  |  |  |  | 
| 34 | 4 |  |  |  |  | 8 | $self->{cur_dir} = shift @{$self->{path}}; | 
|  | 4 |  |  |  |  | 9 |  | 
| 35 | 4 |  |  |  |  | 6 | push @{$self->{used}}, $self->{cur_dir}; | 
|  | 4 |  |  |  |  | 9 |  | 
| 36 | 4 | 50 |  |  |  | 122 | opendir $self->{dir_fh}, $self->{cur_dir} or die "$self->{cur_dir}: $!"; | 
| 37 |  |  |  |  |  |  | } | 
| 38 |  |  |  |  |  |  |  | 
| 39 |  |  |  |  |  |  | sub next { | 
| 40 | 3 |  |  | 3 | 1 | 415 | my $self = shift; | 
| 41 | 3 |  |  |  |  | 9 | my $file = $self->_read_file; | 
| 42 | 3 | 50 |  |  |  | 8 | return unless defined $file; | 
| 43 |  |  |  |  |  |  |  | 
| 44 | 3 | 50 |  |  |  | 10 | warn "No category information about '$file'" unless defined $self->{category_hash}{$file}; | 
| 45 | 3 | 50 |  |  |  | 3 | my @cats = map AI::Categorizer::Category->by_name(name => $_), @{ $self->{category_hash}{$file} || [] }; | 
|  | 3 |  |  |  |  | 35 |  | 
| 46 |  |  |  |  |  |  |  | 
| 47 | 3 |  |  |  |  | 50 | return $self->call_method('document', 'read', | 
| 48 |  |  |  |  |  |  | path => File::Spec->catfile($self->{cur_dir}, $file), | 
| 49 |  |  |  |  |  |  | name => $file, | 
| 50 |  |  |  |  |  |  | categories => \@cats, | 
| 51 |  |  |  |  |  |  | ); | 
| 52 |  |  |  |  |  |  | } | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | sub _read_file { | 
| 55 | 10 |  |  | 10 |  | 12 | my ($self) = @_; | 
| 56 |  |  |  |  |  |  |  | 
| 57 | 10 |  |  |  |  | 99 | my $file = readdir $self->{dir_fh}; | 
| 58 |  |  |  |  |  |  |  | 
| 59 | 10 | 100 | 100 |  |  | 192 | if (!defined $file) { # Directory has been exhausted | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 60 | 1 | 50 |  |  |  | 2 | return undef unless @{$self->{path}}; | 
|  | 1 |  |  |  |  | 9 |  | 
| 61 | 0 |  |  |  |  | 0 | $self->_next_path; | 
| 62 | 0 |  |  |  |  | 0 | return $self->_read_file; | 
| 63 |  |  |  |  |  |  | } elsif ($file eq '.' or $file eq '..') { | 
| 64 | 2 |  |  |  |  | 9 | return $self->_read_file; | 
| 65 |  |  |  |  |  |  | } elsif (-d (my $path = File::Spec->catdir($self->{cur_dir}, $file))) { | 
| 66 | 0 |  |  |  |  | 0 | push @{$self->{path}}, $path  # Add for later processing | 
|  | 0 |  |  |  |  | 0 |  | 
| 67 | 0 | 0 | 0 |  |  | 0 | if $self->{recurse} and !grep {$_ eq $path} @{$self->{path}}, @{$self->{used}}; | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 68 | 0 |  |  |  |  | 0 | return $self->_read_file; | 
| 69 |  |  |  |  |  |  | } | 
| 70 | 7 |  |  |  |  | 31 | return $file; | 
| 71 |  |  |  |  |  |  | } | 
| 72 |  |  |  |  |  |  |  | 
| 73 |  |  |  |  |  |  | sub rewind { | 
| 74 | 3 |  |  | 3 | 1 | 387 | my $self = shift; | 
| 75 | 3 |  |  |  |  | 4 | push @{$self->{path}}, @{$self->{used}}; | 
|  | 3 |  |  |  |  | 7 |  | 
|  | 3 |  |  |  |  | 10 |  | 
| 76 | 3 |  |  |  |  | 5 | @{$self->{used}} = (); | 
|  | 3 |  |  |  |  | 7 |  | 
| 77 | 3 |  |  |  |  | 8 | $self->_next_path; | 
| 78 |  |  |  |  |  |  | } | 
| 79 |  |  |  |  |  |  |  | 
| 80 |  |  |  |  |  |  | # This should share an iterator with next() | 
| 81 |  |  |  |  |  |  | sub count_documents { | 
| 82 | 2 |  |  | 2 | 1 | 21 | my $self = shift; | 
| 83 | 2 | 100 |  |  |  | 9 | return $self->{document_count} if defined $self->{document_count}; | 
| 84 |  |  |  |  |  |  |  | 
| 85 | 1 |  |  |  |  | 5 | $self->rewind; | 
| 86 |  |  |  |  |  |  |  | 
| 87 | 1 |  |  |  |  | 2 | my $count = 0; | 
| 88 | 1 |  |  |  |  | 4 | $count++ while defined $self->_read_file; | 
| 89 |  |  |  |  |  |  |  | 
| 90 | 1 |  |  |  |  | 3 | $self->rewind; | 
| 91 | 1 |  |  |  |  | 4 | return $self->{document_count} = $count; | 
| 92 |  |  |  |  |  |  | } | 
| 93 |  |  |  |  |  |  |  | 
| 94 |  |  |  |  |  |  | 1; | 
| 95 |  |  |  |  |  |  | __END__ |