line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package AI::Classifier::Text; |
2
|
|
|
|
|
|
|
{ |
3
|
|
|
|
|
|
|
$AI::Classifier::Text::VERSION = '0.03'; |
4
|
|
|
|
|
|
|
} |
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
28009
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
40
|
|
7
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
30
|
|
8
|
1
|
|
|
1
|
|
29
|
use 5.010; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
55
|
|
9
|
1
|
|
|
1
|
|
1737
|
use Moose; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
use MooseX::Storage; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
use AI::Classifier::Text::Analyzer; |
13
|
|
|
|
|
|
|
use Module::Load (); # don't overwrite our sub load() with Module::Load::load() |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
with Storage(format => 'Storable', io => 'File'); |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
has classifier => (is => 'ro', required => 1 ); |
18
|
|
|
|
|
|
|
has analyzer => ( is => 'ro', default => sub{ AI::Classifier::Text::Analyzer->new() } ); |
19
|
|
|
|
|
|
|
# for store/load only, don't touch unless you really know what you're doing |
20
|
|
|
|
|
|
|
has classifier_class => (is => 'bare'); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
before store => sub { |
23
|
|
|
|
|
|
|
my $self = shift; |
24
|
|
|
|
|
|
|
$self->{classifier_class} = $self->classifier->meta->name; |
25
|
|
|
|
|
|
|
}; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
around load => sub { |
28
|
|
|
|
|
|
|
my ($orig, $class) = (shift, shift); |
29
|
|
|
|
|
|
|
my $self = $class->$orig(@_); |
30
|
|
|
|
|
|
|
Module::Load::load($self->{classifier_class}); |
31
|
|
|
|
|
|
|
return $self; |
32
|
|
|
|
|
|
|
}; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub classify { |
35
|
|
|
|
|
|
|
my( $self, $text, $features ) = @_; |
36
|
|
|
|
|
|
|
return $self->classifier->classify( $self->analyzer->analyze( $text, $features ) ); |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
1; |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=pod |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=head1 NAME |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
AI::Classifier::Text - A convenient class for text classification |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=head1 VERSION |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
version 0.03 |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=head1 SYNOPSIS |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
my $cl = AI::Classifier::Text->new(classifier => AI::NaiveBayes->new(...)); |
56
|
|
|
|
|
|
|
my $res = $cl->classify("do cats eat bats?"); |
57
|
|
|
|
|
|
|
$res = $cl->classify("do cats eat bats?", { new_user => 1 }); |
58
|
|
|
|
|
|
|
$cl->store('some-file'); |
59
|
|
|
|
|
|
|
# later |
60
|
|
|
|
|
|
|
my $cl = AI::Classifier::Text->load('some-file'); |
61
|
|
|
|
|
|
|
my $res = $cl->classify("do cats eat bats?"); |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=head1 DESCRIPTION |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
AI::Classifier::Text combines a lexical analyzer (by default being |
66
|
|
|
|
|
|
|
L<AI::Classifier::Text::Analyzer>) and a classifier (like AI::NaiveBayes) to |
67
|
|
|
|
|
|
|
perform text classification. |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
This is partially based on AI::TextCategorizer. |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=over 4 |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=item C<classifier> |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
An object that'll perform classification of supplied feature vectors. Has to |
78
|
|
|
|
|
|
|
define a C<classify()> method, which accepts a hash refence. The return value of |
79
|
|
|
|
|
|
|
C<AI::Classifier::Text->classify()> will be the return value of C<classifier>'s |
80
|
|
|
|
|
|
|
C<classify()> method. |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
This attribute has to be supplied to the C<new()> method during object creation. |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=item C<analyzer> |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
The class performing lexical analysis of the text in order to produce a feature |
87
|
|
|
|
|
|
|
vector. This defaults to C<AI::Classifier::Text::Analyzer>. |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=back |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
=head1 METHODS |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=over 4 |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
=item C<< new(classifier => $foo) >> |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
Creates a new C<AI::Classifier::Text> object. The classifier argument is mandatory. |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=item C<classify($document, $features)> |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
Categorize the given document. A lexical analyzer will be used to extract |
102
|
|
|
|
|
|
|
features from C<$document>, and in addition to that the features from |
103
|
|
|
|
|
|
|
C<$features> hash reference will be added. The return value comes directly from |
104
|
|
|
|
|
|
|
the C<classifier> object's C<classify> method. |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=back |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=head1 SEE ALSO |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
AI::NaiveBayes (3), AI::Categorizer(3) |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=head1 AUTHOR |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
Zbigniew Lukasiak <zlukasiak@opera.com>, Tadeusz SoÅnierz <tsosnierz@opera.com> |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
This software is copyright (c) 2012 by Opera Software ASA. |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
121
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=cut |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
__END__ |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
# ABSTRACT: A convenient class for text classification |
128
|
|
|
|
|
|
|
|