line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Hailo; |
2
|
|
|
|
|
|
|
our $AUTHORITY = 'cpan:AVAR'; |
3
|
|
|
|
|
|
|
$Hailo::VERSION = '0.75'; |
4
|
42
|
|
|
42
|
|
11404105
|
use v5.10.0; |
|
42
|
|
|
|
|
435
|
|
5
|
42
|
|
|
42
|
|
11454
|
use autodie qw(open close); |
|
42
|
|
|
|
|
347791
|
|
|
42
|
|
|
|
|
266
|
|
6
|
42
|
|
|
42
|
|
26350
|
use Moose; |
|
42
|
|
|
|
|
6232751
|
|
|
42
|
|
|
|
|
332
|
|
7
|
42
|
|
|
42
|
|
304918
|
use MooseX::StrictConstructor; |
|
42
|
|
|
|
|
805957
|
|
|
42
|
|
|
|
|
209
|
|
8
|
42
|
|
|
42
|
|
341503
|
use MooseX::Types::Moose ':all'; |
|
42
|
|
|
|
|
1038807
|
|
|
42
|
|
|
|
|
457
|
|
9
|
42
|
|
|
42
|
|
338353
|
use File::Glob ':glob'; |
|
42
|
|
|
|
|
90
|
|
|
42
|
|
|
|
|
10476
|
|
10
|
42
|
|
|
42
|
|
309
|
use Class::Load qw(try_load_class); |
|
42
|
|
|
|
|
84
|
|
|
42
|
|
|
|
|
2154
|
|
11
|
42
|
|
|
42
|
|
253
|
use Scalar::Util qw(blessed); |
|
42
|
|
|
|
|
187
|
|
|
42
|
|
|
|
|
2066
|
|
12
|
42
|
|
|
42
|
|
282
|
use List::Util qw(first); |
|
42
|
|
|
|
|
80
|
|
|
42
|
|
|
|
|
2787
|
|
13
|
42
|
|
|
42
|
|
269
|
use namespace::clean -except => 'meta'; |
|
42
|
|
|
|
|
85
|
|
|
42
|
|
|
|
|
266
|
|
14
|
|
|
|
|
|
|
|
15
|
42
|
|
|
|
|
22874
|
use constant PLUGINS => [ qw[ |
16
|
|
|
|
|
|
|
Hailo::Engine::Default |
17
|
|
|
|
|
|
|
Hailo::Engine::Scored |
18
|
|
|
|
|
|
|
Hailo::Storage::MySQL |
19
|
|
|
|
|
|
|
Hailo::Storage::PostgreSQL |
20
|
|
|
|
|
|
|
Hailo::Storage::SQLite |
21
|
|
|
|
|
|
|
Hailo::Tokenizer::Chars |
22
|
|
|
|
|
|
|
Hailo::Tokenizer::Words |
23
|
|
|
|
|
|
|
Hailo::UI::ReadLine |
24
|
42
|
|
|
42
|
|
41817
|
] ]; |
|
42
|
|
|
|
|
83
|
|
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
has brain => ( |
27
|
|
|
|
|
|
|
isa => Str, |
28
|
|
|
|
|
|
|
is => 'rw', |
29
|
|
|
|
|
|
|
); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
has order => ( |
32
|
|
|
|
|
|
|
isa => Int, |
33
|
|
|
|
|
|
|
is => 'rw', |
34
|
|
|
|
|
|
|
default => 2, |
35
|
|
|
|
|
|
|
trigger => sub { |
36
|
|
|
|
|
|
|
my ($self, $order) = @_; |
37
|
|
|
|
|
|
|
$self->_custom_order(1); |
38
|
|
|
|
|
|
|
}, |
39
|
|
|
|
|
|
|
); |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
has _custom_order => ( |
42
|
|
|
|
|
|
|
isa => Bool, |
43
|
|
|
|
|
|
|
is => 'rw', |
44
|
|
|
|
|
|
|
default => 0, |
45
|
|
|
|
|
|
|
init_arg => undef, |
46
|
|
|
|
|
|
|
documentation => "Here so we can differentiate between the default value of order being explictly set and being set by default", |
47
|
|
|
|
|
|
|
); |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
has _custom_tokenizer_class => ( |
50
|
|
|
|
|
|
|
isa => Bool, |
51
|
|
|
|
|
|
|
is => 'rw', |
52
|
|
|
|
|
|
|
default => 0, |
53
|
|
|
|
|
|
|
init_arg => undef, |
54
|
|
|
|
|
|
|
documentation => "Here so we can differentiate between the default value of tokenizer_class being explictly set and being set by default", |
55
|
|
|
|
|
|
|
); |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
has save_on_exit => ( |
58
|
|
|
|
|
|
|
isa => Bool, |
59
|
|
|
|
|
|
|
is => 'rw', |
60
|
|
|
|
|
|
|
default => 1, |
61
|
|
|
|
|
|
|
); |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
has brain_resource => ( |
64
|
|
|
|
|
|
|
documentation => "Alias for `brain' for backwards compatibility", |
65
|
|
|
|
|
|
|
isa => Str, |
66
|
|
|
|
|
|
|
is => 'rw', |
67
|
|
|
|
|
|
|
trigger => sub { |
68
|
|
|
|
|
|
|
my ($self, $brain) = @_; |
69
|
|
|
|
|
|
|
$self->brain($brain); |
70
|
|
|
|
|
|
|
}, |
71
|
|
|
|
|
|
|
); |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub BUILD { |
74
|
279
|
|
|
279
|
0
|
6881
|
my ($self) = @_; |
75
|
279
|
|
|
|
|
6091
|
my $brain = $self->brain; |
76
|
279
|
100
|
|
|
|
1373
|
return if !defined $brain; |
77
|
240
|
|
|
|
|
12045
|
$self->brain(bsd_glob($brain)); |
78
|
240
|
|
|
|
|
4865
|
return; |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
my %has = ( |
82
|
|
|
|
|
|
|
engine => { |
83
|
|
|
|
|
|
|
name => 'Engine', |
84
|
|
|
|
|
|
|
default => 'Default', |
85
|
|
|
|
|
|
|
}, |
86
|
|
|
|
|
|
|
storage => { |
87
|
|
|
|
|
|
|
name => 'Storage', |
88
|
|
|
|
|
|
|
default => 'SQLite', |
89
|
|
|
|
|
|
|
}, |
90
|
|
|
|
|
|
|
tokenizer => { |
91
|
|
|
|
|
|
|
name => 'Tokenizer', |
92
|
|
|
|
|
|
|
default => 'Words', |
93
|
|
|
|
|
|
|
}, |
94
|
|
|
|
|
|
|
ui => { |
95
|
|
|
|
|
|
|
name => 'UI', |
96
|
|
|
|
|
|
|
default => 'ReadLine', |
97
|
|
|
|
|
|
|
}, |
98
|
|
|
|
|
|
|
); |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
for my $k (keys %has) { |
101
|
|
|
|
|
|
|
my $name = $has{$k}->{name}; |
102
|
|
|
|
|
|
|
my $default = $has{$k}->{default}; |
103
|
|
|
|
|
|
|
my $method_class = "${k}_class"; |
104
|
|
|
|
|
|
|
my $method_args = "${k}_args"; |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
# working classes |
107
|
|
|
|
|
|
|
has "${k}_class" => ( |
108
|
|
|
|
|
|
|
isa => Str, |
109
|
|
|
|
|
|
|
is => "rw", |
110
|
|
|
|
|
|
|
default => $default, |
111
|
|
|
|
|
|
|
($k eq 'tokenizer' |
112
|
|
|
|
|
|
|
? (trigger => sub { |
113
|
|
|
|
|
|
|
my ($self, $class) = @_; |
114
|
|
|
|
|
|
|
$self->_custom_tokenizer_class(1); |
115
|
|
|
|
|
|
|
}) |
116
|
|
|
|
|
|
|
: ()) |
117
|
|
|
|
|
|
|
); |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
# Object arguments |
120
|
|
|
|
|
|
|
has "${k}_args" => ( |
121
|
|
|
|
|
|
|
documentation => "Arguments for the $name class", |
122
|
|
|
|
|
|
|
isa => HashRef, |
123
|
|
|
|
|
|
|
is => "ro", |
124
|
|
|
|
|
|
|
default => sub { +{} }, |
125
|
|
|
|
|
|
|
); |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
# Working objects |
128
|
|
|
|
|
|
|
has "_${k}" => ( |
129
|
|
|
|
|
|
|
does => "Hailo::Role::$name", |
130
|
|
|
|
|
|
|
lazy_build => 1, |
131
|
|
|
|
|
|
|
is => 'ro', |
132
|
|
|
|
|
|
|
init_arg => undef, |
133
|
|
|
|
|
|
|
); |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# Generate the object itself |
136
|
42
|
|
|
42
|
|
329
|
no strict 'refs'; |
|
42
|
|
|
|
|
81
|
|
|
42
|
|
|
|
|
22453
|
|
137
|
|
|
|
|
|
|
*{"_build__${k}"} = sub { |
138
|
354
|
|
|
354
|
|
920
|
my ($self) = @_; |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
my $obj = $self->_new_class( |
141
|
|
|
|
|
|
|
$name, |
142
|
|
|
|
|
|
|
$self->$method_class, |
143
|
|
|
|
|
|
|
{ |
144
|
|
|
|
|
|
|
arguments => $self->$method_args, |
145
|
|
|
|
|
|
|
($k =~ /^(?:engine|storage)$/s |
146
|
|
|
|
|
|
|
? (order => $self->order) |
147
|
|
|
|
|
|
|
: ()), |
148
|
|
|
|
|
|
|
($k eq 'engine' |
149
|
|
|
|
|
|
|
? (storage => $self->_storage) |
150
|
|
|
|
|
|
|
: ()), |
151
|
|
|
|
|
|
|
(($k eq 'storage' and defined $self->brain) |
152
|
|
|
|
|
|
|
? ( |
153
|
354
|
100
|
100
|
|
|
8668
|
hailo => do { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
154
|
239
|
|
|
|
|
1354
|
require Scalar::Util; |
155
|
239
|
|
|
|
|
1433
|
Scalar::Util::weaken(my $s = $self); |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
my %callback = ( |
158
|
200
|
|
|
200
|
|
4530
|
has_custom_order => sub { $s->_custom_order }, |
159
|
3
|
|
|
3
|
|
70
|
has_custom_tokenizer_class => sub { $s->_custom_tokenizer_class }, |
160
|
|
|
|
|
|
|
set_order => sub { |
161
|
1
|
|
|
1
|
|
3
|
my ($db_order) = @_; |
162
|
1
|
|
|
|
|
23
|
$s->order($db_order); |
163
|
1
|
|
|
|
|
21
|
$s->_engine->order($db_order); |
164
|
|
|
|
|
|
|
}, |
165
|
|
|
|
|
|
|
set_tokenizer_class => sub { |
166
|
1
|
|
|
1
|
|
3
|
my ($db_tokenizer_class) = @_; |
167
|
1
|
|
|
|
|
23
|
$s->tokenizer_class($db_tokenizer_class); |
168
|
|
|
|
|
|
|
}, |
169
|
239
|
|
|
|
|
2833
|
); |
170
|
|
|
|
|
|
|
|
171
|
239
|
|
|
|
|
5526
|
\%callback; |
172
|
|
|
|
|
|
|
}, |
173
|
|
|
|
|
|
|
brain => $self->brain |
174
|
|
|
|
|
|
|
) |
175
|
|
|
|
|
|
|
: ()), |
176
|
|
|
|
|
|
|
($k eq 'storage' |
177
|
|
|
|
|
|
|
? (tokenizer_class => $self->tokenizer_class) |
178
|
|
|
|
|
|
|
: ()) |
179
|
|
|
|
|
|
|
}, |
180
|
|
|
|
|
|
|
); |
181
|
|
|
|
|
|
|
|
182
|
351
|
|
|
|
|
9568
|
return $obj; |
183
|
|
|
|
|
|
|
}; |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
sub _new_class { |
187
|
354
|
|
|
354
|
|
1072
|
my ($self, $type, $class, $args) = @_; |
188
|
|
|
|
|
|
|
|
189
|
354
|
|
|
|
|
532
|
my $pkg; |
190
|
354
|
50
|
|
|
|
904
|
if ($class =~ m[^\+(?<custom_plugin>.+)$]) { |
191
|
39
|
|
|
39
|
|
20683
|
$pkg = $+{custom_plugin}; |
|
39
|
|
|
|
|
15568
|
|
|
39
|
|
|
|
|
44825
|
|
|
0
|
|
|
|
|
0
|
|
192
|
|
|
|
|
|
|
} else { |
193
|
354
|
|
|
|
|
611
|
my @plugins = @{ $self->PLUGINS }; |
|
354
|
|
|
|
|
1904
|
|
194
|
|
|
|
|
|
|
# Be fuzzy about includes, e.g. DBD::SQLite or SQLite or sqlite will go |
195
|
1836
|
|
|
1836
|
|
8374
|
$pkg = first { / $type : .* : $class /ix } |
196
|
354
|
|
|
|
|
2490
|
sort { length $a <=> length $b } |
|
6018
|
|
|
|
|
8915
|
|
197
|
|
|
|
|
|
|
@plugins; |
198
|
|
|
|
|
|
|
|
199
|
354
|
100
|
|
|
|
1820
|
unless ($pkg) { |
200
|
3
|
|
|
|
|
7
|
local $" = ', '; |
201
|
3
|
|
|
|
|
6
|
my @p = grep { /$type/ } @plugins; |
|
24
|
|
|
|
|
80
|
|
202
|
3
|
|
|
|
|
68
|
die "Couldn't find a class name matching '$class' in plugins '@p'"; |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
|
206
|
351
|
|
|
|
|
1461
|
my ($success, $error) = try_load_class($pkg); |
207
|
351
|
50
|
|
|
|
39331
|
die $error if !$success; |
208
|
|
|
|
|
|
|
|
209
|
351
|
|
|
|
|
11731
|
return $pkg->new(%$args); |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
sub save { |
213
|
259
|
|
|
259
|
1
|
592
|
my ($self, @args) = @_; |
214
|
259
|
|
|
|
|
5384
|
$self->_storage->save(@args); |
215
|
259
|
|
|
|
|
511
|
return; |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
sub train { |
219
|
23
|
|
|
23
|
1
|
2212
|
my ($self, $input, $fast) = @_; |
220
|
|
|
|
|
|
|
|
221
|
23
|
|
|
|
|
577
|
$self->_storage->start_training(); |
222
|
|
|
|
|
|
|
|
223
|
23
|
100
|
100
|
|
|
287
|
if (not ref $input and defined $input and $input eq '-') { |
|
|
100
|
100
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
224
|
|
|
|
|
|
|
# With STDIN |
225
|
1
|
50
|
|
|
|
6
|
die "You must provide STDIN when training from '-'" if $self->_is_interactive(*STDIN); |
226
|
0
|
|
|
|
|
0
|
$self->_train_fh(*STDIN, $fast); |
227
|
|
|
|
|
|
|
} elsif (ref $input eq 'GLOB') { |
228
|
|
|
|
|
|
|
# With a filehandle |
229
|
1
|
|
|
|
|
7
|
$self->_train_fh($input, $fast); |
230
|
|
|
|
|
|
|
} elsif (not ref $input) { |
231
|
|
|
|
|
|
|
# With a file |
232
|
13
|
|
|
|
|
72
|
open my $fh, '<:encoding(utf8)', $input; |
233
|
11
|
|
|
|
|
29650
|
$self->_train_fh($fh, $fast, $input); |
234
|
|
|
|
|
|
|
} elsif (ref $input eq 'ARRAY') { |
235
|
|
|
|
|
|
|
# With an Array |
236
|
7
|
|
|
|
|
25
|
for my $line (@$input) { |
237
|
936
|
|
|
|
|
2320
|
$self->_learn_one($line, $fast); |
238
|
936
|
50
|
|
|
|
24732
|
$self->_engine->flush_cache if !$fast; |
239
|
|
|
|
|
|
|
} |
240
|
7
|
50
|
|
|
|
30
|
$self->_engine->flush_cache if $fast; |
241
|
|
|
|
|
|
|
} else { |
242
|
|
|
|
|
|
|
# With something naughty |
243
|
1
|
|
|
|
|
36
|
die "Unknown input: $input"; |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
19
|
|
|
|
|
535
|
$self->_storage->stop_training(); |
247
|
|
|
|
|
|
|
|
248
|
19
|
|
|
|
|
174
|
return; |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
sub _train_fh { |
252
|
12
|
|
|
12
|
|
300
|
my ($self, $fh, $fast) = @_; |
253
|
|
|
|
|
|
|
|
254
|
12
|
|
|
|
|
313
|
while (my $line = <$fh>) { |
255
|
990
|
|
|
|
|
2194
|
chomp $line; |
256
|
990
|
|
|
|
|
2302
|
$self->_learn_one($line, $fast); |
257
|
990
|
50
|
|
|
|
24337
|
$self->_engine->flush_cache if !$fast; |
258
|
|
|
|
|
|
|
} |
259
|
12
|
50
|
|
|
|
58
|
$self->_engine->flush_cache if $fast; |
260
|
|
|
|
|
|
|
|
261
|
12
|
|
|
|
|
342
|
return; |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
sub learn { |
265
|
1639
|
|
|
1639
|
1
|
869510
|
my ($self, $input) = @_; |
266
|
1639
|
|
|
|
|
2812
|
my $inputs; |
267
|
|
|
|
|
|
|
|
268
|
1639
|
100
|
|
|
|
5295
|
if (not defined $input) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
269
|
2
|
|
|
|
|
55
|
die "Cannot learn from undef input"; |
270
|
|
|
|
|
|
|
} elsif (not ref $input) { |
271
|
1618
|
|
|
|
|
3567
|
$inputs = [$input]; |
272
|
|
|
|
|
|
|
} elsif (ref $input eq 'ARRAY') { |
273
|
17
|
|
|
|
|
35
|
$inputs = $input; # With an Array |
274
|
|
|
|
|
|
|
} else { |
275
|
2
|
|
|
|
|
84
|
die "Unknown input: $input"; |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
1635
|
|
|
|
|
47171
|
my $storage = $self->_storage; |
279
|
|
|
|
|
|
|
|
280
|
1634
|
|
|
|
|
5764
|
$storage->start_learning(); |
281
|
1634
|
|
|
|
|
5075
|
$self->_learn_one($_) for @$inputs; |
282
|
1633
|
|
|
|
|
5556
|
$storage->stop_learning(); |
283
|
1633
|
|
|
|
|
5741
|
return; |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
sub _learn_one { |
287
|
3561
|
|
|
3561
|
|
7403
|
my ($self, $input, $fast) = @_; |
288
|
3561
|
|
|
|
|
74945
|
my $engine = $self->_engine; |
289
|
|
|
|
|
|
|
|
290
|
3561
|
|
|
|
|
72954
|
my $tokens = $self->_tokenizer->make_tokens($input); |
291
|
3560
|
50
|
|
|
|
14290
|
$fast ? $engine->learn_cached($tokens) : $engine->learn($tokens); |
292
|
|
|
|
|
|
|
|
293
|
3560
|
|
|
|
|
19445
|
return; |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
sub learn_reply { |
297
|
5
|
|
|
5
|
1
|
23
|
my ($self, $input) = @_; |
298
|
5
|
|
|
|
|
20
|
$self->learn($input); |
299
|
3
|
|
|
|
|
16
|
return $self->reply($input); |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
sub reply { |
303
|
1417
|
|
|
1417
|
1
|
305508
|
my ($self, $input) = @_; |
304
|
|
|
|
|
|
|
|
305
|
1417
|
|
|
|
|
42451
|
my $storage = $self->_storage; |
306
|
|
|
|
|
|
|
# start_training() hasn't been called so we can't guarentee that |
307
|
|
|
|
|
|
|
# the storage has been engaged at this point. This must be called |
308
|
|
|
|
|
|
|
# before ->_engine() is called anywhere to ensure that the |
309
|
|
|
|
|
|
|
# lazy-loading in the engine works. |
310
|
1417
|
100
|
|
|
|
35445
|
$storage->_engage() unless $storage->_engaged; |
311
|
|
|
|
|
|
|
|
312
|
1216
|
|
|
|
|
25831
|
my $engine = $self->_engine; |
313
|
1216
|
|
|
|
|
25084
|
my $tokenizer = $self->_tokenizer; |
314
|
|
|
|
|
|
|
|
315
|
1216
|
|
|
|
|
1805
|
my $reply; |
316
|
1216
|
100
|
|
|
|
2588
|
if (defined $input) { |
317
|
1207
|
|
|
|
|
4099
|
my $tokens = $tokenizer->make_tokens($input); |
318
|
1207
|
|
|
|
|
3835
|
$reply = $engine->reply($tokens); |
319
|
|
|
|
|
|
|
} |
320
|
|
|
|
|
|
|
else { |
321
|
9
|
|
|
|
|
49
|
$reply = $engine->reply(); |
322
|
|
|
|
|
|
|
} |
323
|
|
|
|
|
|
|
|
324
|
1216
|
100
|
|
|
|
3166
|
return unless defined $reply; |
325
|
1213
|
|
|
|
|
4477
|
return $tokenizer->make_output($reply); |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
sub stats { |
329
|
29
|
|
|
29
|
1
|
335
|
my ($self) = @_; |
330
|
|
|
|
|
|
|
|
331
|
29
|
|
|
|
|
671
|
return $self->_storage->totals(); |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
sub DEMOLISH { |
335
|
281
|
|
|
281
|
0
|
879
|
my ($self) = @_; |
336
|
281
|
100
|
100
|
|
|
6811
|
$self->save() if blessed $self->{_storage} and $self->save_on_exit; |
337
|
281
|
|
|
|
|
678
|
return; |
338
|
|
|
|
|
|
|
} |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
sub _is_interactive { |
341
|
23
|
|
|
23
|
|
2780
|
require IO::Interactive; |
342
|
23
|
|
|
|
|
28438
|
return IO::Interactive::is_interactive(); |
343
|
|
|
|
|
|
|
} |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
=encoding utf8 |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
=head1 NAME |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
Hailo - A pluggable Markov engine analogous to MegaHAL |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
=head1 SYNOPSIS |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
This is the synopsis for using Hailo as a module. See L<hailo> for |
356
|
|
|
|
|
|
|
command-line invocation. |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
# Hailo requires Perl 5.10 |
359
|
|
|
|
|
|
|
use v5.10.0; |
360
|
|
|
|
|
|
|
use Moose; |
361
|
|
|
|
|
|
|
use Hailo; |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# Construct a new in-memory Hailo using the SQLite backend. See |
364
|
|
|
|
|
|
|
# backend documentation for other options. |
365
|
|
|
|
|
|
|
my $hailo = Hailo->new; |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
# Various ways to learn |
368
|
|
|
|
|
|
|
my @train_this = ("I like big butts", "and I can not lie"); |
369
|
|
|
|
|
|
|
$hailo->learn(\@train_this); |
370
|
|
|
|
|
|
|
$hailo->learn($_) for @train_this; |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
# Heavy-duty training interface. Backends may drop some safety |
373
|
|
|
|
|
|
|
# features like journals or synchronous IO to train faster using |
374
|
|
|
|
|
|
|
# this mode. |
375
|
|
|
|
|
|
|
$hailo->train("megahal.trn"); |
376
|
|
|
|
|
|
|
$hailo->train($filehandle); |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
# Make the brain babble |
379
|
|
|
|
|
|
|
say $hailo->reply("hello good sir."); |
380
|
|
|
|
|
|
|
# Just say something at random |
381
|
|
|
|
|
|
|
say $hailo->reply(); |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=head1 DESCRIPTION |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
Hailo is a fast and lightweight markov engine intended to replace |
386
|
|
|
|
|
|
|
L<AI::MegaHAL|AI::MegaHAL>. It has a pluggable |
387
|
|
|
|
|
|
|
L<storage|Hailo::Role::Storage>, L<tokenizer|Hailo::Role::Tokenizer> |
388
|
|
|
|
|
|
|
and L<engine|Hailo::Role::Engine> backends. |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
It is similar to MegaHAL in functionality, the main differences (with the |
391
|
|
|
|
|
|
|
default backends) being better scalability, drastically less memory usage, |
392
|
|
|
|
|
|
|
an improved tokenizer, and tidier output. |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
With this distribution, you can create, modify, and query Hailo brains. To |
395
|
|
|
|
|
|
|
use Hailo in event-driven POE applications, you can use the |
396
|
|
|
|
|
|
|
L<POE::Component::Hailo|POE::Component::Hailo> wrapper. One example is |
397
|
|
|
|
|
|
|
L<POE::Component::IRC::Plugin::Hailo|POE::Component::IRC::Plugin::Hailo>, |
398
|
|
|
|
|
|
|
which implements an IRC chat bot. |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
=head2 Etymology |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
I<Hailo> is a portmanteau of I<HAL> (as in MegaHAL) and |
403
|
|
|
|
|
|
|
L<failo|http://identi.ca/failo>. |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=head1 Backends |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
Hailo supports pluggable L<storage|Hailo::Role::Storage> and |
408
|
|
|
|
|
|
|
L<tokenizer|Hailo::Role::Tokenizer> backends, it also supports a |
409
|
|
|
|
|
|
|
pluggable L<UI|Hailo::Role::UI> backend which is used by the L<hailo> |
410
|
|
|
|
|
|
|
command-line utility. |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
=head2 Storage |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
Hailo can currently store its data in either a |
415
|
|
|
|
|
|
|
L<SQLite|Hailo::Storage::SQLite>, |
416
|
|
|
|
|
|
|
L<PostgreSQL|Hailo::Storage::PostgreSQL> or |
417
|
|
|
|
|
|
|
L<MySQL|Hailo::Storage::MySQL> database. Some NoSQL backends were |
418
|
|
|
|
|
|
|
supported in earlier versions, but they were removed as they had no |
419
|
|
|
|
|
|
|
redeeming quality. |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
SQLite is the primary target for Hailo. It's much faster and uses less |
422
|
|
|
|
|
|
|
resources than the other two. It's highly recommended that you use it. |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
See L<Hailo::Storage/"Comparison of backends"> for benchmarks showing |
425
|
|
|
|
|
|
|
how the various backends compare under different workloads, and how |
426
|
|
|
|
|
|
|
you can create your own. |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
=head2 Tokenizer |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
By default Hailo will use L<the word |
431
|
|
|
|
|
|
|
tokenizer|Hailo::Tokenizer::Words> to split up input by whitespace, |
432
|
|
|
|
|
|
|
taking into account things like quotes, sentence terminators and more. |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
There's also a L<the character |
435
|
|
|
|
|
|
|
tokenizer|Hailo::Tokenizer::Chars>. It's not generally useful for a |
436
|
|
|
|
|
|
|
conversation bot but can be used to e.g. generate new words given a |
437
|
|
|
|
|
|
|
list of existing words. |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
=head1 UPGRADING |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
Hailo makes no promises about brains generated with earlier versions |
442
|
|
|
|
|
|
|
being compatable with future version and due to the way Hailo works |
443
|
|
|
|
|
|
|
there's no practical way to make that promise. Learning in Hailo is |
444
|
|
|
|
|
|
|
lossy so an accurate conversion is impossible. |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
If you're maintaining a Hailo brain that you want to keep using you |
447
|
|
|
|
|
|
|
should save the input you trained it on and re-train when you upgrade. |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
Hailo is always going to lose information present in the input you |
450
|
|
|
|
|
|
|
give it. How input tokens get split up and saved to the storage |
451
|
|
|
|
|
|
|
backend depends on the version of the tokenizer being used and how |
452
|
|
|
|
|
|
|
that input gets saved to the database. |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
For instance if an earlier version of Hailo tokenized C<"foo+bar"> |
455
|
|
|
|
|
|
|
simply as C<"foo+bar"> but a later version split that up into |
456
|
|
|
|
|
|
|
C<"foo", "+", "bar">, then an input of C<"foo+bar are my favorite |
457
|
|
|
|
|
|
|
metasyntactic variables"> wouldn't take into account the existing |
458
|
|
|
|
|
|
|
C<"foo+bar"> string in the database. |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
Tokenizer changes like this would cause the brains to accumulate |
461
|
|
|
|
|
|
|
garbage and would leave other parts in a state they wouldn't otherwise |
462
|
|
|
|
|
|
|
have gotten into. |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
There have been more drastic changes to the database format itself in |
465
|
|
|
|
|
|
|
the past. |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
Having said all that the database format and the tokenizer are |
468
|
|
|
|
|
|
|
relatively stable. At the time of writing 0.33 is the latest release |
469
|
|
|
|
|
|
|
and it's compatable with brains down to at least 0.17. If you're |
470
|
|
|
|
|
|
|
upgrading and there isn't a big notice about the storage format being |
471
|
|
|
|
|
|
|
incompatable in the F<Changes> file your old brains will probably work |
472
|
|
|
|
|
|
|
just fine. |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
=head2 C<brain> |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
The name of the brain (file name, database name) to use as storage. |
479
|
|
|
|
|
|
|
There is no default. Whether this gets used at all depends on the |
480
|
|
|
|
|
|
|
storage backend, currently only SQLite uses it. |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=head2 C<save_on_exit> |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
A boolean value indicating whether Hailo should save its state before |
485
|
|
|
|
|
|
|
its object gets destroyed. This defaults to true and will simply call |
486
|
|
|
|
|
|
|
L<save|/save> at C<DEMOLISH> time. |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
See L<Hailo::Storage::SQLite/"in_memory"> for how the SQLite backend |
489
|
|
|
|
|
|
|
uses this option. |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
=head2 C<order> |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
The Markov order (chain length) you want to use for an empty brain. |
494
|
|
|
|
|
|
|
The default is 2. |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
=head2 C<engine_class> |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
=head2 C<storage_class> |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=head2 C<tokenizer_class> |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
=head2 C<ui_class> |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
A a short name name of the class we use for the engine, storage, |
505
|
|
|
|
|
|
|
tokenizer or ui backends. |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
By default this is B<Default> for the engine, B<SQLite> for storage, |
508
|
|
|
|
|
|
|
B<Words> for the tokenizer and B<ReadLine> for the UI. The UI backend |
509
|
|
|
|
|
|
|
is only used by the L<hailo> command-line interface. |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
You can only specify the short name of one of the packages Hailo |
512
|
|
|
|
|
|
|
itself ships with. If you need another class then just prefix the |
513
|
|
|
|
|
|
|
package with a plus (Catalyst style), e.g. C<+My::Foreign::Tokenizer>. |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
=head2 C<engine_args> |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=head2 C<storage_args> |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
=head2 C<tokenizer_args> |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
=head2 C<ui_args> |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
A C<HashRef> of arguments for engine/storage/tokenizer/ui |
524
|
|
|
|
|
|
|
backends. See the documentation for the backends for what sort of |
525
|
|
|
|
|
|
|
arguments they accept. |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
=head1 METHODS |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
=head2 C<new> |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
This is the constructor. It accepts the attributes specified in |
532
|
|
|
|
|
|
|
L</ATTRIBUTES>. |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
=head2 C<learn> |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
Takes a string or an array reference of strings and learns from them. |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
=head2 C<train> |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
Takes a filename, filehandle or array reference and learns from all its |
541
|
|
|
|
|
|
|
lines. If a filename is passed, the file is assumed to be UTF-8 encoded. |
542
|
|
|
|
|
|
|
Unlike L<C<learn>|/learn>, this method sacrifices some safety (disables |
543
|
|
|
|
|
|
|
the database journal, fsyncs, etc) for speed while learning. |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
You can prove a second parameter which, if true, will use aggressive |
546
|
|
|
|
|
|
|
caching while training, which will speed things up considerably for large |
547
|
|
|
|
|
|
|
inputs, but will take up quite a bit of memory. |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
=head2 C<reply> |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
Takes an optional line of text and generates a reply that might be relevant. |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
=head2 C<learn_reply> |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
Takes a string argument, learns from it, and generates a reply that |
556
|
|
|
|
|
|
|
might be relevant. This is equivalent to calling L<learn|/learn> |
557
|
|
|
|
|
|
|
followed by L<reply|/reply>. |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
=head2 C<save> |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
Tells the underlying storage backend to L<save its |
562
|
|
|
|
|
|
|
state|Hailo::Role::Storage/"save">, any arguments to this method will |
563
|
|
|
|
|
|
|
be passed as-is to the backend. |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
=head2 C<stats> |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
Takes no arguments. Returns the number of tokens, expressions, previous |
568
|
|
|
|
|
|
|
token links and next token links. |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
=head1 SUPPORT |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
You can join the IRC channel I<#hailo> on FreeNode if you have questions. |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
=head1 BUGS |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
Bugs, feature requests and other issues are tracked in L<Hailo's RT on |
577
|
|
|
|
|
|
|
rt.cpan.org|https://rt.cpan.org/Dist/Display.html?Name=Hailo> |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
=head1 SEE ALSO |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
=over |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
=item * L<POE::Component::Hailo> - A non-blocking POE wrapper around Hailo |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
=item * L<POE::Component::IRC::Plugin::Hailo> - A Hailo IRC bot plugin |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
=item * L<http://github.com/hinrik/failo> - Failo, an IRC bot that uses Hailo |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
=item * L<http://github.com/bingos/gumbybrain> - GumbyBRAIN, a more famous IRC bot that uses Hailo |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
=item * L<Hailo::UI::Web> - A L<Catalyst> and jQuery powered web |
592
|
|
|
|
|
|
|
interface to Hailo available at L<hailo.nix.is|http://hailo.nix.is> |
593
|
|
|
|
|
|
|
and as L<hailo-ui-web|http://github.com/avar/hailo-ui-web> on |
594
|
|
|
|
|
|
|
L<GitHub|http://github.com> |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
=item * L<tweetmix|http://www.tweetmix.me/>, a random tweet generator powered by Hailo |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
=item * L<http://github.com/pteichman/cobe> - cobe, a Python port of MegaHAL "inspired by the success of Hailo" |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
=back |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
=head1 LINKS |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
=over |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
=item * L<hailo.org|http://hailo.org> - Hailo's website |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
=item * L<http://bit.ly/hailo_rewrite_of_megahal> - Hailo: A Perl rewrite of |
609
|
|
|
|
|
|
|
MegaHAL, A blog posting about the motivation behind Hailo |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
=item * L<http://blogs.perl.org/users/aevar_arnfjor_bjarmason/hailo/> - |
612
|
|
|
|
|
|
|
More blog posts about Hailo on E<AElig>var ArnfjE<ouml>rE<eth> |
613
|
|
|
|
|
|
|
Bjarmason's L<blogs.perl.org|http://blogs.perl.org> blog |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
=item * Hailo on L<freshmeat|http://freshmeat.net/projects/hailo> and |
616
|
|
|
|
|
|
|
L<ohloh|https://www.ohloh.net/p/hailo> |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
=back |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
=head1 AUTHORS |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
Hinrik E<Ouml>rn SigurE<eth>sson, hinrik.sig@gmail.com |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason <avar@cpan.org> |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
Copyright 2010 Hinrik E<Ouml>rn SigurE<eth>sson and |
629
|
|
|
|
|
|
|
E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason <avar@cpan.org> |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
This program is free software, you can redistribute it and/or modify |
632
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
=cut |