line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package AI::Categorizer::Collection::Files; |
2
|
7
|
|
|
7
|
|
7142
|
use strict; |
|
7
|
|
|
|
|
14
|
|
|
7
|
|
|
|
|
237
|
|
3
|
|
|
|
|
|
|
|
4
|
7
|
|
|
7
|
|
40
|
use AI::Categorizer::Collection; |
|
7
|
|
|
|
|
13
|
|
|
7
|
|
|
|
|
168
|
|
5
|
7
|
|
|
7
|
|
73
|
use base qw(AI::Categorizer::Collection); |
|
7
|
|
|
|
|
12
|
|
|
7
|
|
|
|
|
554
|
|
6
|
|
|
|
|
|
|
|
7
|
7
|
|
|
7
|
|
36
|
use Params::Validate qw(:types); |
|
7
|
|
|
|
|
11
|
|
|
7
|
|
|
|
|
1216
|
|
8
|
7
|
|
|
7
|
|
36
|
use File::Spec; |
|
7
|
|
|
|
|
12
|
|
|
7
|
|
|
|
|
4628
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
__PACKAGE__->valid_params |
11
|
|
|
|
|
|
|
( |
12
|
|
|
|
|
|
|
path => { type => SCALAR|ARRAYREF }, |
13
|
|
|
|
|
|
|
recurse => { type => BOOLEAN, default => 0 }, |
14
|
|
|
|
|
|
|
); |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
sub new { |
17
|
1
|
|
|
1
|
1
|
232
|
my $class = shift; |
18
|
1
|
|
|
|
|
11
|
my $self = $class->SUPER::new(@_); |
19
|
|
|
|
|
|
|
|
20
|
1
|
|
|
|
|
2
|
$self->{dir_fh} = do {local *FH; *FH}; # double *FH avoids a warning |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
5
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# Documents are contained in a directory, or list of directories |
23
|
1
|
50
|
|
|
|
6
|
$self->{path} = [$self->{path}] unless ref $self->{path}; |
24
|
1
|
|
|
|
|
3
|
$self->{used} = []; |
25
|
|
|
|
|
|
|
|
26
|
1
|
|
|
|
|
5
|
$self->_next_path; |
27
|
1
|
|
|
|
|
4
|
return $self; |
28
|
|
|
|
|
|
|
} |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
sub _next_path { |
31
|
4
|
|
|
4
|
|
5
|
my $self = shift; |
32
|
4
|
100
|
|
|
|
44
|
closedir $self->{dir_fh} if $self->{cur_dir}; |
33
|
|
|
|
|
|
|
|
34
|
4
|
|
|
|
|
8
|
$self->{cur_dir} = shift @{$self->{path}}; |
|
4
|
|
|
|
|
9
|
|
35
|
4
|
|
|
|
|
6
|
push @{$self->{used}}, $self->{cur_dir}; |
|
4
|
|
|
|
|
9
|
|
36
|
4
|
50
|
|
|
|
122
|
opendir $self->{dir_fh}, $self->{cur_dir} or die "$self->{cur_dir}: $!"; |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
sub next { |
40
|
3
|
|
|
3
|
1
|
415
|
my $self = shift; |
41
|
3
|
|
|
|
|
9
|
my $file = $self->_read_file; |
42
|
3
|
50
|
|
|
|
8
|
return unless defined $file; |
43
|
|
|
|
|
|
|
|
44
|
3
|
50
|
|
|
|
10
|
warn "No category information about '$file'" unless defined $self->{category_hash}{$file}; |
45
|
3
|
50
|
|
|
|
3
|
my @cats = map AI::Categorizer::Category->by_name(name => $_), @{ $self->{category_hash}{$file} || [] }; |
|
3
|
|
|
|
|
35
|
|
46
|
|
|
|
|
|
|
|
47
|
3
|
|
|
|
|
50
|
return $self->call_method('document', 'read', |
48
|
|
|
|
|
|
|
path => File::Spec->catfile($self->{cur_dir}, $file), |
49
|
|
|
|
|
|
|
name => $file, |
50
|
|
|
|
|
|
|
categories => \@cats, |
51
|
|
|
|
|
|
|
); |
52
|
|
|
|
|
|
|
} |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
sub _read_file { |
55
|
10
|
|
|
10
|
|
12
|
my ($self) = @_; |
56
|
|
|
|
|
|
|
|
57
|
10
|
|
|
|
|
99
|
my $file = readdir $self->{dir_fh}; |
58
|
|
|
|
|
|
|
|
59
|
10
|
100
|
100
|
|
|
192
|
if (!defined $file) { # Directory has been exhausted |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
60
|
1
|
50
|
|
|
|
2
|
return undef unless @{$self->{path}}; |
|
1
|
|
|
|
|
9
|
|
61
|
0
|
|
|
|
|
0
|
$self->_next_path; |
62
|
0
|
|
|
|
|
0
|
return $self->_read_file; |
63
|
|
|
|
|
|
|
} elsif ($file eq '.' or $file eq '..') { |
64
|
2
|
|
|
|
|
9
|
return $self->_read_file; |
65
|
|
|
|
|
|
|
} elsif (-d (my $path = File::Spec->catdir($self->{cur_dir}, $file))) { |
66
|
0
|
|
|
|
|
0
|
push @{$self->{path}}, $path # Add for later processing |
|
0
|
|
|
|
|
0
|
|
67
|
0
|
0
|
0
|
|
|
0
|
if $self->{recurse} and !grep {$_ eq $path} @{$self->{path}}, @{$self->{used}}; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
68
|
0
|
|
|
|
|
0
|
return $self->_read_file; |
69
|
|
|
|
|
|
|
} |
70
|
7
|
|
|
|
|
31
|
return $file; |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub rewind { |
74
|
3
|
|
|
3
|
1
|
387
|
my $self = shift; |
75
|
3
|
|
|
|
|
4
|
push @{$self->{path}}, @{$self->{used}}; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
10
|
|
76
|
3
|
|
|
|
|
5
|
@{$self->{used}} = (); |
|
3
|
|
|
|
|
7
|
|
77
|
3
|
|
|
|
|
8
|
$self->_next_path; |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# This should share an iterator with next() |
81
|
|
|
|
|
|
|
sub count_documents { |
82
|
2
|
|
|
2
|
1
|
21
|
my $self = shift; |
83
|
2
|
100
|
|
|
|
9
|
return $self->{document_count} if defined $self->{document_count}; |
84
|
|
|
|
|
|
|
|
85
|
1
|
|
|
|
|
5
|
$self->rewind; |
86
|
|
|
|
|
|
|
|
87
|
1
|
|
|
|
|
2
|
my $count = 0; |
88
|
1
|
|
|
|
|
4
|
$count++ while defined $self->_read_file; |
89
|
|
|
|
|
|
|
|
90
|
1
|
|
|
|
|
3
|
$self->rewind; |
91
|
1
|
|
|
|
|
4
|
return $self->{document_count} = $count; |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
1; |
95
|
|
|
|
|
|
|
__END__ |