| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Hailo; | 
| 2 |  |  |  |  |  |  | BEGIN { | 
| 3 | 14 |  |  | 14 |  | 343245 | $Hailo::AUTHORITY = 'cpan:AVAR'; | 
| 4 |  |  |  |  |  |  | } | 
| 5 |  |  |  |  |  |  | { | 
| 6 |  |  |  |  |  |  | $Hailo::VERSION = '0.72'; | 
| 7 |  |  |  |  |  |  | } | 
| 8 |  |  |  |  |  |  |  | 
| 9 | 14 |  |  | 14 |  | 325 | use 5.010; | 
|  | 14 |  |  |  |  | 49 |  | 
|  | 14 |  |  |  |  | 530 |  | 
| 10 | 14 |  |  | 14 |  | 17050 | use autodie qw(open close); | 
|  | 14 |  |  |  |  | 270781 |  | 
|  | 14 |  |  |  |  | 94 |  | 
| 11 | 14 |  |  | 14 |  | 18194 | use Any::Moose; | 
|  | 14 |  |  |  |  | 497629 |  | 
|  | 14 |  |  |  |  | 112 |  | 
| 12 | 14 |  |  | 14 |  | 9136 | use Any::Moose 'X::StrictConstructor'; | 
|  | 14 |  |  |  |  | 136 |  | 
|  | 14 |  |  |  |  | 72 |  | 
| 13 |  |  |  |  |  |  | use File::Glob ':glob'; | 
| 14 |  |  |  |  |  |  | use Class::Load qw(try_load_class); | 
| 15 |  |  |  |  |  |  | use Scalar::Util qw(blessed); | 
| 16 |  |  |  |  |  |  | use List::Util qw(first); | 
| 17 |  |  |  |  |  |  | use namespace::clean -except => 'meta'; | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | use constant PLUGINS => [ qw[ | 
| 20 |  |  |  |  |  |  | Hailo::Engine::Default | 
| 21 |  |  |  |  |  |  | Hailo::Engine::Scored | 
| 22 |  |  |  |  |  |  | Hailo::Storage::MySQL | 
| 23 |  |  |  |  |  |  | Hailo::Storage::PostgreSQL | 
| 24 |  |  |  |  |  |  | Hailo::Storage::SQLite | 
| 25 |  |  |  |  |  |  | Hailo::Tokenizer::Chars | 
| 26 |  |  |  |  |  |  | Hailo::Tokenizer::Words | 
| 27 |  |  |  |  |  |  | Hailo::UI::ReadLine | 
| 28 |  |  |  |  |  |  | ] ]; | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | has brain => ( | 
| 31 |  |  |  |  |  |  | isa => 'Str', | 
| 32 |  |  |  |  |  |  | is  => 'rw', | 
| 33 |  |  |  |  |  |  | ); | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  | has order => ( | 
| 36 |  |  |  |  |  |  | isa     => 'Int', | 
| 37 |  |  |  |  |  |  | is      => 'rw', | 
| 38 |  |  |  |  |  |  | default => 2, | 
| 39 |  |  |  |  |  |  | trigger => sub { | 
| 40 |  |  |  |  |  |  | my ($self, $order) = @_; | 
| 41 |  |  |  |  |  |  | $self->_custom_order(1); | 
| 42 |  |  |  |  |  |  | }, | 
| 43 |  |  |  |  |  |  | ); | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | has _custom_order => ( | 
| 46 |  |  |  |  |  |  | isa           => 'Bool', | 
| 47 |  |  |  |  |  |  | is            => 'rw', | 
| 48 |  |  |  |  |  |  | default       => 0, | 
| 49 |  |  |  |  |  |  | init_arg      => undef, | 
| 50 |  |  |  |  |  |  | documentation => "Here so we can differentiate between the default value of order being explictly set and being set by default", | 
| 51 |  |  |  |  |  |  | ); | 
| 52 |  |  |  |  |  |  |  | 
| 53 |  |  |  |  |  |  | has _custom_tokenizer_class => ( | 
| 54 |  |  |  |  |  |  | isa           => 'Bool', | 
| 55 |  |  |  |  |  |  | is            => 'rw', | 
| 56 |  |  |  |  |  |  | default       => 0, | 
| 57 |  |  |  |  |  |  | init_arg      => undef, | 
| 58 |  |  |  |  |  |  | documentation => "Here so we can differentiate between the default value of tokenizer_class being explictly set and being set by default", | 
| 59 |  |  |  |  |  |  | ); | 
| 60 |  |  |  |  |  |  |  | 
| 61 |  |  |  |  |  |  | has save_on_exit => ( | 
| 62 |  |  |  |  |  |  | isa     => 'Bool', | 
| 63 |  |  |  |  |  |  | is      => 'rw', | 
| 64 |  |  |  |  |  |  | default => 1, | 
| 65 |  |  |  |  |  |  | ); | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | has brain_resource => ( | 
| 68 |  |  |  |  |  |  | documentation => "Alias for `brain' for backwards compatibility", | 
| 69 |  |  |  |  |  |  | isa           => 'Str', | 
| 70 |  |  |  |  |  |  | is            => 'rw', | 
| 71 |  |  |  |  |  |  | trigger       => sub { | 
| 72 |  |  |  |  |  |  | my ($self, $brain) = @_; | 
| 73 |  |  |  |  |  |  | $self->brain($brain); | 
| 74 |  |  |  |  |  |  | }, | 
| 75 |  |  |  |  |  |  | ); | 
| 76 |  |  |  |  |  |  |  | 
| 77 |  |  |  |  |  |  | sub BUILD { | 
| 78 |  |  |  |  |  |  | my ($self) = @_; | 
| 79 |  |  |  |  |  |  | my $brain = $self->brain; | 
| 80 |  |  |  |  |  |  | return if !defined $brain; | 
| 81 |  |  |  |  |  |  | $self->brain(bsd_glob($brain)); | 
| 82 |  |  |  |  |  |  | return; | 
| 83 |  |  |  |  |  |  | } | 
| 84 |  |  |  |  |  |  |  | 
| 85 |  |  |  |  |  |  | my %has = ( | 
| 86 |  |  |  |  |  |  | engine => { | 
| 87 |  |  |  |  |  |  | name => 'Engine', | 
| 88 |  |  |  |  |  |  | default => 'Default', | 
| 89 |  |  |  |  |  |  | }, | 
| 90 |  |  |  |  |  |  | storage => { | 
| 91 |  |  |  |  |  |  | name => 'Storage', | 
| 92 |  |  |  |  |  |  | default => 'SQLite', | 
| 93 |  |  |  |  |  |  | }, | 
| 94 |  |  |  |  |  |  | tokenizer => { | 
| 95 |  |  |  |  |  |  | name => 'Tokenizer', | 
| 96 |  |  |  |  |  |  | default => 'Words', | 
| 97 |  |  |  |  |  |  | }, | 
| 98 |  |  |  |  |  |  | ui => { | 
| 99 |  |  |  |  |  |  | name => 'UI', | 
| 100 |  |  |  |  |  |  | default => 'ReadLine', | 
| 101 |  |  |  |  |  |  | }, | 
| 102 |  |  |  |  |  |  | ); | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | for my $k (keys %has) { | 
| 105 |  |  |  |  |  |  | my $name          = $has{$k}->{name}; | 
| 106 |  |  |  |  |  |  | my $default       = $has{$k}->{default}; | 
| 107 |  |  |  |  |  |  | my $method_class  = "${k}_class"; | 
| 108 |  |  |  |  |  |  | my $method_args   = "${k}_args"; | 
| 109 |  |  |  |  |  |  |  | 
| 110 |  |  |  |  |  |  | # working classes | 
| 111 |  |  |  |  |  |  | has "${k}_class" => ( | 
| 112 |  |  |  |  |  |  | isa           => 'Str', | 
| 113 |  |  |  |  |  |  | is            => "rw", | 
| 114 |  |  |  |  |  |  | default       => $default, | 
| 115 |  |  |  |  |  |  | ($k eq 'tokenizer' | 
| 116 |  |  |  |  |  |  | ? (trigger => sub { | 
| 117 |  |  |  |  |  |  | my ($self, $class) = @_; | 
| 118 |  |  |  |  |  |  | $self->_custom_tokenizer_class(1); | 
| 119 |  |  |  |  |  |  | }) | 
| 120 |  |  |  |  |  |  | : ()) | 
| 121 |  |  |  |  |  |  | ); | 
| 122 |  |  |  |  |  |  |  | 
| 123 |  |  |  |  |  |  | # Object arguments | 
| 124 |  |  |  |  |  |  | has "${k}_args" => ( | 
| 125 |  |  |  |  |  |  | documentation => "Arguments for the $name class", | 
| 126 |  |  |  |  |  |  | isa           => 'HashRef', | 
| 127 |  |  |  |  |  |  | is            => "ro", | 
| 128 |  |  |  |  |  |  | default       => sub { +{} }, | 
| 129 |  |  |  |  |  |  | ); | 
| 130 |  |  |  |  |  |  |  | 
| 131 |  |  |  |  |  |  | # Working objects | 
| 132 |  |  |  |  |  |  | has "_${k}" => ( | 
| 133 |  |  |  |  |  |  | does        => "Hailo::Role::$name", | 
| 134 |  |  |  |  |  |  | lazy_build  => 1, | 
| 135 |  |  |  |  |  |  | is          => 'ro', | 
| 136 |  |  |  |  |  |  | init_arg    => undef, | 
| 137 |  |  |  |  |  |  | ); | 
| 138 |  |  |  |  |  |  |  | 
| 139 |  |  |  |  |  |  | # Generate the object itself | 
| 140 |  |  |  |  |  |  | no strict 'refs'; | 
| 141 |  |  |  |  |  |  | *{"_build__${k}"} = sub { | 
| 142 |  |  |  |  |  |  | my ($self) = @_; | 
| 143 |  |  |  |  |  |  |  | 
| 144 |  |  |  |  |  |  | my $obj = $self->_new_class( | 
| 145 |  |  |  |  |  |  | $name, | 
| 146 |  |  |  |  |  |  | $self->$method_class, | 
| 147 |  |  |  |  |  |  | { | 
| 148 |  |  |  |  |  |  | arguments => $self->$method_args, | 
| 149 |  |  |  |  |  |  | ($k =~ /^(?:engine|storage)$/s | 
| 150 |  |  |  |  |  |  | ? (order     => $self->order) | 
| 151 |  |  |  |  |  |  | : ()), | 
| 152 |  |  |  |  |  |  | ($k eq 'engine' | 
| 153 |  |  |  |  |  |  | ? (storage   => $self->_storage) | 
| 154 |  |  |  |  |  |  | : ()), | 
| 155 |  |  |  |  |  |  | (($k eq 'storage' and defined $self->brain) | 
| 156 |  |  |  |  |  |  | ? ( | 
| 157 |  |  |  |  |  |  | hailo => do { | 
| 158 |  |  |  |  |  |  | require Scalar::Util; | 
| 159 |  |  |  |  |  |  | Scalar::Util::weaken(my $s = $self); | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | my %callback = ( | 
| 162 |  |  |  |  |  |  | has_custom_order           => sub { $s->_custom_order }, | 
| 163 |  |  |  |  |  |  | has_custom_tokenizer_class => sub { $s->_custom_tokenizer_class }, | 
| 164 |  |  |  |  |  |  | set_order => sub { | 
| 165 |  |  |  |  |  |  | my ($db_order) = @_; | 
| 166 |  |  |  |  |  |  | $s->order($db_order); | 
| 167 |  |  |  |  |  |  | $s->_engine->order($db_order); | 
| 168 |  |  |  |  |  |  | }, | 
| 169 |  |  |  |  |  |  | set_tokenizer_class => sub { | 
| 170 |  |  |  |  |  |  | my ($db_tokenizer_class) = @_; | 
| 171 |  |  |  |  |  |  | $s->tokenizer_class($db_tokenizer_class); | 
| 172 |  |  |  |  |  |  | }, | 
| 173 |  |  |  |  |  |  | ); | 
| 174 |  |  |  |  |  |  |  | 
| 175 |  |  |  |  |  |  | \%callback; | 
| 176 |  |  |  |  |  |  | }, | 
| 177 |  |  |  |  |  |  | brain => $self->brain | 
| 178 |  |  |  |  |  |  | ) | 
| 179 |  |  |  |  |  |  | : ()), | 
| 180 |  |  |  |  |  |  | ($k eq 'storage' | 
| 181 |  |  |  |  |  |  | ? (tokenizer_class => $self->tokenizer_class) | 
| 182 |  |  |  |  |  |  | : ()) | 
| 183 |  |  |  |  |  |  | }, | 
| 184 |  |  |  |  |  |  | ); | 
| 185 |  |  |  |  |  |  |  | 
| 186 |  |  |  |  |  |  | return $obj; | 
| 187 |  |  |  |  |  |  | }; | 
| 188 |  |  |  |  |  |  | } | 
| 189 |  |  |  |  |  |  |  | 
| 190 |  |  |  |  |  |  | sub _new_class { | 
| 191 |  |  |  |  |  |  | my ($self, $type, $class, $args) = @_; | 
| 192 |  |  |  |  |  |  |  | 
| 193 |  |  |  |  |  |  | my $pkg; | 
| 194 |  |  |  |  |  |  | if ($class =~ m[^\+(?.+)$]) { | 
| 195 |  |  |  |  |  |  | $pkg = $+{custom_plugin}; | 
| 196 |  |  |  |  |  |  | } else { | 
| 197 |  |  |  |  |  |  | my @plugins = @{ $self->PLUGINS }; | 
| 198 |  |  |  |  |  |  | # Be fuzzy about includes, e.g. DBD::SQLite or SQLite or sqlite will go | 
| 199 |  |  |  |  |  |  | $pkg = first { / $type : .* : $class /ix } | 
| 200 |  |  |  |  |  |  | sort { length $a <=> length $b } | 
| 201 |  |  |  |  |  |  | @plugins; | 
| 202 |  |  |  |  |  |  |  | 
| 203 |  |  |  |  |  |  | unless ($pkg) { | 
| 204 |  |  |  |  |  |  | local $" = ', '; | 
| 205 |  |  |  |  |  |  | my @p = grep { /$type/ } @plugins; | 
| 206 |  |  |  |  |  |  | die "Couldn't find a class name matching '$class' in plugins '@p'"; | 
| 207 |  |  |  |  |  |  | } | 
| 208 |  |  |  |  |  |  | } | 
| 209 |  |  |  |  |  |  |  | 
| 210 |  |  |  |  |  |  | my ($success, $error) = try_load_class($pkg); | 
| 211 |  |  |  |  |  |  | die $error if !$success; | 
| 212 |  |  |  |  |  |  |  | 
| 213 |  |  |  |  |  |  | return $pkg->new(%$args); | 
| 214 |  |  |  |  |  |  | } | 
| 215 |  |  |  |  |  |  |  | 
| 216 |  |  |  |  |  |  | sub save { | 
| 217 |  |  |  |  |  |  | my ($self, @args) = @_; | 
| 218 |  |  |  |  |  |  | $self->_storage->save(@args); | 
| 219 |  |  |  |  |  |  | return; | 
| 220 |  |  |  |  |  |  | } | 
| 221 |  |  |  |  |  |  |  | 
| 222 |  |  |  |  |  |  | sub train { | 
| 223 |  |  |  |  |  |  | my ($self, $input, $fast) = @_; | 
| 224 |  |  |  |  |  |  |  | 
| 225 |  |  |  |  |  |  | $self->_storage->start_training(); | 
| 226 |  |  |  |  |  |  |  | 
| 227 |  |  |  |  |  |  | if (not ref $input and defined $input and $input eq '-') { | 
| 228 |  |  |  |  |  |  | # With STDIN | 
| 229 |  |  |  |  |  |  | die "You must provide STDIN when training from '-'" if $self->_is_interactive(*STDIN); | 
| 230 |  |  |  |  |  |  | $self->_train_fh(*STDIN, $fast); | 
| 231 |  |  |  |  |  |  | } elsif (ref $input eq 'GLOB') { | 
| 232 |  |  |  |  |  |  | # With a filehandle | 
| 233 |  |  |  |  |  |  | $self->_train_fh($input, $fast); | 
| 234 |  |  |  |  |  |  | } elsif (not ref $input) { | 
| 235 |  |  |  |  |  |  | # With a file | 
| 236 |  |  |  |  |  |  | open my $fh, '<:encoding(utf8)', $input; | 
| 237 |  |  |  |  |  |  | $self->_train_fh($fh, $fast, $input); | 
| 238 |  |  |  |  |  |  | } elsif (ref $input eq 'ARRAY') { | 
| 239 |  |  |  |  |  |  | # With an Array | 
| 240 |  |  |  |  |  |  | for my $line (@$input) { | 
| 241 |  |  |  |  |  |  | $self->_learn_one($line, $fast); | 
| 242 |  |  |  |  |  |  | $self->_engine->flush_cache if !$fast; | 
| 243 |  |  |  |  |  |  | } | 
| 244 |  |  |  |  |  |  | $self->_engine->flush_cache if $fast; | 
| 245 |  |  |  |  |  |  | } else { | 
| 246 |  |  |  |  |  |  | # With something naughty | 
| 247 |  |  |  |  |  |  | die "Unknown input: $input"; | 
| 248 |  |  |  |  |  |  | } | 
| 249 |  |  |  |  |  |  |  | 
| 250 |  |  |  |  |  |  | $self->_storage->stop_training(); | 
| 251 |  |  |  |  |  |  |  | 
| 252 |  |  |  |  |  |  | return; | 
| 253 |  |  |  |  |  |  | } | 
| 254 |  |  |  |  |  |  |  | 
| 255 |  |  |  |  |  |  | sub _train_fh { | 
| 256 |  |  |  |  |  |  | my ($self, $fh, $fast) = @_; | 
| 257 |  |  |  |  |  |  |  | 
| 258 |  |  |  |  |  |  | while (my $line = <$fh>) { | 
| 259 |  |  |  |  |  |  | chomp $line; | 
| 260 |  |  |  |  |  |  | $self->_learn_one($line, $fast); | 
| 261 |  |  |  |  |  |  | $self->_engine->flush_cache if !$fast; | 
| 262 |  |  |  |  |  |  | } | 
| 263 |  |  |  |  |  |  | $self->_engine->flush_cache if $fast; | 
| 264 |  |  |  |  |  |  |  | 
| 265 |  |  |  |  |  |  | return; | 
| 266 |  |  |  |  |  |  | } | 
| 267 |  |  |  |  |  |  |  | 
| 268 |  |  |  |  |  |  | sub learn { | 
| 269 |  |  |  |  |  |  | my ($self, $input) = @_; | 
| 270 |  |  |  |  |  |  | my $inputs; | 
| 271 |  |  |  |  |  |  |  | 
| 272 |  |  |  |  |  |  | if (not defined $input) { | 
| 273 |  |  |  |  |  |  | die "Cannot learn from undef input"; | 
| 274 |  |  |  |  |  |  | } elsif (not ref $input) { | 
| 275 |  |  |  |  |  |  | $inputs = [$input]; | 
| 276 |  |  |  |  |  |  | } elsif (ref $input eq 'ARRAY') { | 
| 277 |  |  |  |  |  |  | $inputs = $input; # With an Array | 
| 278 |  |  |  |  |  |  | } else { | 
| 279 |  |  |  |  |  |  | die "Unknown input: $input"; | 
| 280 |  |  |  |  |  |  | } | 
| 281 |  |  |  |  |  |  |  | 
| 282 |  |  |  |  |  |  | my $storage = $self->_storage; | 
| 283 |  |  |  |  |  |  |  | 
| 284 |  |  |  |  |  |  | $storage->start_learning(); | 
| 285 |  |  |  |  |  |  | $self->_learn_one($_) for @$inputs; | 
| 286 |  |  |  |  |  |  | $storage->stop_learning(); | 
| 287 |  |  |  |  |  |  | return; | 
| 288 |  |  |  |  |  |  | } | 
| 289 |  |  |  |  |  |  |  | 
| 290 |  |  |  |  |  |  | sub _learn_one { | 
| 291 |  |  |  |  |  |  | my ($self, $input, $fast) = @_; | 
| 292 |  |  |  |  |  |  | my $engine  = $self->_engine; | 
| 293 |  |  |  |  |  |  |  | 
| 294 |  |  |  |  |  |  | my $tokens = $self->_tokenizer->make_tokens($input); | 
| 295 |  |  |  |  |  |  | $fast ? $engine->learn_cached($tokens) : $engine->learn($tokens); | 
| 296 |  |  |  |  |  |  |  | 
| 297 |  |  |  |  |  |  | return; | 
| 298 |  |  |  |  |  |  | } | 
| 299 |  |  |  |  |  |  |  | 
| 300 |  |  |  |  |  |  | sub learn_reply { | 
| 301 |  |  |  |  |  |  | my ($self, $input) = @_; | 
| 302 |  |  |  |  |  |  | $self->learn($input); | 
| 303 |  |  |  |  |  |  | return $self->reply($input); | 
| 304 |  |  |  |  |  |  | } | 
| 305 |  |  |  |  |  |  |  | 
| 306 |  |  |  |  |  |  | sub reply { | 
| 307 |  |  |  |  |  |  | my ($self, $input) = @_; | 
| 308 |  |  |  |  |  |  |  | 
| 309 |  |  |  |  |  |  | my $storage   = $self->_storage; | 
| 310 |  |  |  |  |  |  | # start_training() hasn't been called so we can't guarentee that | 
| 311 |  |  |  |  |  |  | # the storage has been engaged at this point. This must be called | 
| 312 |  |  |  |  |  |  | # before ->_engine() is called anywhere to ensure that the | 
| 313 |  |  |  |  |  |  | # lazy-loading in the engine works. | 
| 314 |  |  |  |  |  |  | $storage->_engage() unless $storage->_engaged; | 
| 315 |  |  |  |  |  |  |  | 
| 316 |  |  |  |  |  |  | my $engine    = $self->_engine; | 
| 317 |  |  |  |  |  |  | my $tokenizer = $self->_tokenizer; | 
| 318 |  |  |  |  |  |  |  | 
| 319 |  |  |  |  |  |  | my $reply; | 
| 320 |  |  |  |  |  |  | if (defined $input) { | 
| 321 |  |  |  |  |  |  | my $tokens = $tokenizer->make_tokens($input); | 
| 322 |  |  |  |  |  |  | $reply = $engine->reply($tokens); | 
| 323 |  |  |  |  |  |  | } | 
| 324 |  |  |  |  |  |  | else { | 
| 325 |  |  |  |  |  |  | $reply = $engine->reply(); | 
| 326 |  |  |  |  |  |  | } | 
| 327 |  |  |  |  |  |  |  | 
| 328 |  |  |  |  |  |  | return unless defined $reply; | 
| 329 |  |  |  |  |  |  | return $tokenizer->make_output($reply); | 
| 330 |  |  |  |  |  |  | } | 
| 331 |  |  |  |  |  |  |  | 
| 332 |  |  |  |  |  |  | sub stats { | 
| 333 |  |  |  |  |  |  | my ($self) = @_; | 
| 334 |  |  |  |  |  |  |  | 
| 335 |  |  |  |  |  |  | return $self->_storage->totals(); | 
| 336 |  |  |  |  |  |  | } | 
| 337 |  |  |  |  |  |  |  | 
| 338 |  |  |  |  |  |  | sub DEMOLISH { | 
| 339 |  |  |  |  |  |  | my ($self) = @_; | 
| 340 |  |  |  |  |  |  | $self->save() if blessed $self->{_storage} and $self->save_on_exit; | 
| 341 |  |  |  |  |  |  | return; | 
| 342 |  |  |  |  |  |  | } | 
| 343 |  |  |  |  |  |  |  | 
| 344 |  |  |  |  |  |  | sub _is_interactive { | 
| 345 |  |  |  |  |  |  | require IO::Interactive; | 
| 346 |  |  |  |  |  |  | return IO::Interactive::is_interactive(); | 
| 347 |  |  |  |  |  |  | } | 
| 348 |  |  |  |  |  |  |  | 
| 349 |  |  |  |  |  |  | __PACKAGE__->meta->make_immutable; | 
| 350 |  |  |  |  |  |  |  | 
| 351 |  |  |  |  |  |  | =encoding utf8 | 
| 352 |  |  |  |  |  |  |  | 
| 353 |  |  |  |  |  |  | =head1 NAME | 
| 354 |  |  |  |  |  |  |  | 
| 355 |  |  |  |  |  |  | Hailo - A pluggable Markov engine analogous to MegaHAL | 
| 356 |  |  |  |  |  |  |  | 
| 357 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 358 |  |  |  |  |  |  |  | 
| 359 |  |  |  |  |  |  | This is the synopsis for using Hailo as a module. See L for | 
| 360 |  |  |  |  |  |  | command-line invocation. | 
| 361 |  |  |  |  |  |  |  | 
| 362 |  |  |  |  |  |  | # Hailo requires Perl 5.10 | 
| 363 |  |  |  |  |  |  | use 5.010; | 
| 364 |  |  |  |  |  |  | use Any::Moose; | 
| 365 |  |  |  |  |  |  | use Hailo; | 
| 366 |  |  |  |  |  |  |  | 
| 367 |  |  |  |  |  |  | # Construct a new in-memory Hailo using the SQLite backend. See | 
| 368 |  |  |  |  |  |  | # backend documentation for other options. | 
| 369 |  |  |  |  |  |  | my $hailo = Hailo->new; | 
| 370 |  |  |  |  |  |  |  | 
| 371 |  |  |  |  |  |  | # Various ways to learn | 
| 372 |  |  |  |  |  |  | my @train_this = ("I like big butts", "and I can not lie"); | 
| 373 |  |  |  |  |  |  | $hailo->learn(\@train_this); | 
| 374 |  |  |  |  |  |  | $hailo->learn($_) for @train_this; | 
| 375 |  |  |  |  |  |  |  | 
| 376 |  |  |  |  |  |  | # Heavy-duty training interface. Backends may drop some safety | 
| 377 |  |  |  |  |  |  | # features like journals or synchronous IO to train faster using | 
| 378 |  |  |  |  |  |  | # this mode. | 
| 379 |  |  |  |  |  |  | $hailo->train("megahal.trn"); | 
| 380 |  |  |  |  |  |  | $hailo->train($filehandle); | 
| 381 |  |  |  |  |  |  |  | 
| 382 |  |  |  |  |  |  | # Make the brain babble | 
| 383 |  |  |  |  |  |  | say $hailo->reply("hello good sir."); | 
| 384 |  |  |  |  |  |  | # Just say something at random | 
| 385 |  |  |  |  |  |  | say $hailo->reply(); | 
| 386 |  |  |  |  |  |  |  | 
| 387 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 388 |  |  |  |  |  |  |  | 
| 389 |  |  |  |  |  |  | Hailo is a fast and lightweight markov engine intended to replace | 
| 390 |  |  |  |  |  |  | L. It has a L (or | 
| 391 |  |  |  |  |  |  | L) based core with pluggable | 
| 392 |  |  |  |  |  |  | L, L | 
| 393 |  |  |  |  |  |  | and L backends. | 
| 394 |  |  |  |  |  |  |  | 
| 395 |  |  |  |  |  |  | It is similar to MegaHAL in functionality, the main differences (with the | 
| 396 |  |  |  |  |  |  | default backends) being better scalability, drastically less memory usage, | 
| 397 |  |  |  |  |  |  | an improved tokenizer, and tidier output. | 
| 398 |  |  |  |  |  |  |  | 
| 399 |  |  |  |  |  |  | With this distribution, you can create, modify, and query Hailo brains. To | 
| 400 |  |  |  |  |  |  | use Hailo in event-driven POE applications, you can use the | 
| 401 |  |  |  |  |  |  | L wrapper. One example is | 
| 402 |  |  |  |  |  |  | L, | 
| 403 |  |  |  |  |  |  | which implements an IRC chat bot. | 
| 404 |  |  |  |  |  |  |  | 
| 405 |  |  |  |  |  |  | =head2 Etymology | 
| 406 |  |  |  |  |  |  |  | 
| 407 |  |  |  |  |  |  | I is a portmanteau of I (as in MegaHAL) and | 
| 408 |  |  |  |  |  |  | L. | 
| 409 |  |  |  |  |  |  |  | 
| 410 |  |  |  |  |  |  | =head1 Backends | 
| 411 |  |  |  |  |  |  |  | 
| 412 |  |  |  |  |  |  | Hailo supports pluggable L and | 
| 413 |  |  |  |  |  |  | L backends, it also supports a | 
| 414 |  |  |  |  |  |  | pluggable L backend which is used by the L | 
| 415 |  |  |  |  |  |  | command-line utility. | 
| 416 |  |  |  |  |  |  |  | 
| 417 |  |  |  |  |  |  | =head2 Storage | 
| 418 |  |  |  |  |  |  |  | 
| 419 |  |  |  |  |  |  | Hailo can currently store its data in either a | 
| 420 |  |  |  |  |  |  | L, | 
| 421 |  |  |  |  |  |  | L or | 
| 422 |  |  |  |  |  |  | L database. Some NoSQL backends were | 
| 423 |  |  |  |  |  |  | supported in earlier versions, but they were removed as they had no | 
| 424 |  |  |  |  |  |  | redeeming quality. | 
| 425 |  |  |  |  |  |  |  | 
| 426 |  |  |  |  |  |  | SQLite is the primary target for Hailo. It's much faster and uses less | 
| 427 |  |  |  |  |  |  | resources than the other two. It's highly recommended that you use it. | 
| 428 |  |  |  |  |  |  |  | 
| 429 |  |  |  |  |  |  | See L for benchmarks showing | 
| 430 |  |  |  |  |  |  | how the various backends compare under different workloads, and how | 
| 431 |  |  |  |  |  |  | you can create your own. | 
| 432 |  |  |  |  |  |  |  | 
| 433 |  |  |  |  |  |  | =head2 Tokenizer | 
| 434 |  |  |  |  |  |  |  | 
| 435 |  |  |  |  |  |  | By default Hailo will use L | 
| 436 |  |  |  |  |  |  | tokenizer|Hailo::Tokenizer::Words> to split up input by whitespace, | 
| 437 |  |  |  |  |  |  | taking into account things like quotes, sentence terminators and more. | 
| 438 |  |  |  |  |  |  |  | 
| 439 |  |  |  |  |  |  | There's also a L | 
| 440 |  |  |  |  |  |  | tokenizer|Hailo::Tokenizer::Chars>. It's not generally useful for a | 
| 441 |  |  |  |  |  |  | conversation bot but can be used to e.g. generate new words given a | 
| 442 |  |  |  |  |  |  | list of existing words. | 
| 443 |  |  |  |  |  |  |  | 
| 444 |  |  |  |  |  |  | =head1 UPGRADING | 
| 445 |  |  |  |  |  |  |  | 
| 446 |  |  |  |  |  |  | Hailo makes no promises about brains generated with earlier versions | 
| 447 |  |  |  |  |  |  | being compatable with future version and due to the way Hailo works | 
| 448 |  |  |  |  |  |  | there's no practical way to make that promise. Learning in Hailo is | 
| 449 |  |  |  |  |  |  | lossy so an accurate conversion is impossible. | 
| 450 |  |  |  |  |  |  |  | 
| 451 |  |  |  |  |  |  | If you're maintaining a Hailo brain that you want to keep using you | 
| 452 |  |  |  |  |  |  | should save the input you trained it on and re-train when you upgrade. | 
| 453 |  |  |  |  |  |  |  | 
| 454 |  |  |  |  |  |  | Hailo is always going to lose information present in the input you | 
| 455 |  |  |  |  |  |  | give it. How input tokens get split up and saved to the storage | 
| 456 |  |  |  |  |  |  | backend depends on the version of the tokenizer being used and how | 
| 457 |  |  |  |  |  |  | that input gets saved to the database. | 
| 458 |  |  |  |  |  |  |  | 
| 459 |  |  |  |  |  |  | For instance if an earlier version of Hailo tokenized C<"foo+bar"> | 
| 460 |  |  |  |  |  |  | simply as C<"foo+bar"> but a later version split that up into | 
| 461 |  |  |  |  |  |  | C<"foo", "+", "bar">, then an input of C<"foo+bar are my favorite | 
| 462 |  |  |  |  |  |  | metasyntactic variables"> wouldn't take into account the existing | 
| 463 |  |  |  |  |  |  | C<"foo+bar"> string in the database. | 
| 464 |  |  |  |  |  |  |  | 
| 465 |  |  |  |  |  |  | Tokenizer changes like this would cause the brains to accumulate | 
| 466 |  |  |  |  |  |  | garbage and would leave other parts in a state they wouldn't otherwise | 
| 467 |  |  |  |  |  |  | have gotten into. | 
| 468 |  |  |  |  |  |  |  | 
| 469 |  |  |  |  |  |  | There have been more drastic changes to the database format itself in | 
| 470 |  |  |  |  |  |  | the past. | 
| 471 |  |  |  |  |  |  |  | 
| 472 |  |  |  |  |  |  | Having said all that the database format and the tokenizer are | 
| 473 |  |  |  |  |  |  | relatively stable. At the time of writing 0.33 is the latest release | 
| 474 |  |  |  |  |  |  | and it's compatable with brains down to at least 0.17. If you're | 
| 475 |  |  |  |  |  |  | upgrading and there isn't a big notice about the storage format being | 
| 476 |  |  |  |  |  |  | incompatable in the F file your old brains will probably work | 
| 477 |  |  |  |  |  |  | just fine. | 
| 478 |  |  |  |  |  |  |  | 
| 479 |  |  |  |  |  |  | =head1 ATTRIBUTES | 
| 480 |  |  |  |  |  |  |  | 
| 481 |  |  |  |  |  |  | =head2 C | 
| 482 |  |  |  |  |  |  |  | 
| 483 |  |  |  |  |  |  | The name of the brain (file name, database name) to use as storage. | 
| 484 |  |  |  |  |  |  | There is no default. Whether this gets used at all depends on the | 
| 485 |  |  |  |  |  |  | storage backend, currently only SQLite uses it. | 
| 486 |  |  |  |  |  |  |  | 
| 487 |  |  |  |  |  |  | =head2 C | 
| 488 |  |  |  |  |  |  |  | 
| 489 |  |  |  |  |  |  | A boolean value indicating whether Hailo should save its state before | 
| 490 |  |  |  |  |  |  | its object gets destroyed. This defaults to true and will simply call | 
| 491 |  |  |  |  |  |  | L at C time. | 
| 492 |  |  |  |  |  |  |  | 
| 493 |  |  |  |  |  |  | See L for how the SQLite backend | 
| 494 |  |  |  |  |  |  | uses this option. | 
| 495 |  |  |  |  |  |  |  | 
| 496 |  |  |  |  |  |  | =head2 C | 
| 497 |  |  |  |  |  |  |  | 
| 498 |  |  |  |  |  |  | The Markov order (chain length) you want to use for an empty brain. | 
| 499 |  |  |  |  |  |  | The default is 2. | 
| 500 |  |  |  |  |  |  |  | 
| 501 |  |  |  |  |  |  | =head2 C | 
| 502 |  |  |  |  |  |  |  | 
| 503 |  |  |  |  |  |  | =head2 C | 
| 504 |  |  |  |  |  |  |  | 
| 505 |  |  |  |  |  |  | =head2 C | 
| 506 |  |  |  |  |  |  |  | 
| 507 |  |  |  |  |  |  | =head2 C | 
| 508 |  |  |  |  |  |  |  | 
| 509 |  |  |  |  |  |  | A a short name name of the class we use for the engine, storage, | 
| 510 |  |  |  |  |  |  | tokenizer or ui backends. | 
| 511 |  |  |  |  |  |  |  | 
| 512 |  |  |  |  |  |  | By default this is B for the engine, B for storage, | 
| 513 |  |  |  |  |  |  | B for the tokenizer and B for the UI. The UI backend | 
| 514 |  |  |  |  |  |  | is only used by the L command-line interface. | 
| 515 |  |  |  |  |  |  |  | 
| 516 |  |  |  |  |  |  | You can only specify the short name of one of the packages Hailo | 
| 517 |  |  |  |  |  |  | itself ships with. If you need another class then just prefix the | 
| 518 |  |  |  |  |  |  | package with a plus (Catalyst style), e.g. C<+My::Foreign::Tokenizer>. | 
| 519 |  |  |  |  |  |  |  | 
| 520 |  |  |  |  |  |  | =head2 C | 
| 521 |  |  |  |  |  |  |  | 
| 522 |  |  |  |  |  |  | =head2 C | 
| 523 |  |  |  |  |  |  |  | 
| 524 |  |  |  |  |  |  | =head2 C | 
| 525 |  |  |  |  |  |  |  | 
| 526 |  |  |  |  |  |  | =head2 C | 
| 527 |  |  |  |  |  |  |  | 
| 528 |  |  |  |  |  |  | A C of arguments for engine/storage/tokenizer/ui | 
| 529 |  |  |  |  |  |  | backends. See the documentation for the backends for what sort of | 
| 530 |  |  |  |  |  |  | arguments they accept. | 
| 531 |  |  |  |  |  |  |  | 
| 532 |  |  |  |  |  |  | =head1 METHODS | 
| 533 |  |  |  |  |  |  |  | 
| 534 |  |  |  |  |  |  | =head2 C | 
| 535 |  |  |  |  |  |  |  | 
| 536 |  |  |  |  |  |  | This is the constructor. It accepts the attributes specified in | 
| 537 |  |  |  |  |  |  | L. | 
| 538 |  |  |  |  |  |  |  | 
| 539 |  |  |  |  |  |  | =head2 C | 
| 540 |  |  |  |  |  |  |  | 
| 541 |  |  |  |  |  |  | Takes a string or an array reference of strings and learns from them. | 
| 542 |  |  |  |  |  |  |  | 
| 543 |  |  |  |  |  |  | =head2 C | 
| 544 |  |  |  |  |  |  |  | 
| 545 |  |  |  |  |  |  | Takes a filename, filehandle or array reference and learns from all its | 
| 546 |  |  |  |  |  |  | lines. If a filename is passed, the file is assumed to be UTF-8 encoded. | 
| 547 |  |  |  |  |  |  | Unlike L|/learn>, this method sacrifices some safety (disables | 
| 548 |  |  |  |  |  |  | the database journal, fsyncs, etc) for speed while learning. | 
| 549 |  |  |  |  |  |  |  | 
| 550 |  |  |  |  |  |  | You can prove a second parameter which, if true, will use aggressive | 
| 551 |  |  |  |  |  |  | caching while training, which will speed things up considerably for large | 
| 552 |  |  |  |  |  |  | inputs, but will take up quite a bit of memory. | 
| 553 |  |  |  |  |  |  |  | 
| 554 |  |  |  |  |  |  | =head2 C | 
| 555 |  |  |  |  |  |  |  | 
| 556 |  |  |  |  |  |  | Takes an optional line of text and generates a reply that might be relevant. | 
| 557 |  |  |  |  |  |  |  | 
| 558 |  |  |  |  |  |  | =head2 C | 
| 559 |  |  |  |  |  |  |  | 
| 560 |  |  |  |  |  |  | Takes a string argument, learns from it, and generates a reply that | 
| 561 |  |  |  |  |  |  | might be relevant. This is equivalent to calling L | 
| 562 |  |  |  |  |  |  | followed by L. | 
| 563 |  |  |  |  |  |  |  | 
| 564 |  |  |  |  |  |  | =head2 C | 
| 565 |  |  |  |  |  |  |  | 
| 566 |  |  |  |  |  |  | Tells the underlying storage backend to L | 
| 567 |  |  |  |  |  |  | state|Hailo::Role::Storage/"save">, any arguments to this method will | 
| 568 |  |  |  |  |  |  | be passed as-is to the backend. | 
| 569 |  |  |  |  |  |  |  | 
| 570 |  |  |  |  |  |  | =head2 C | 
| 571 |  |  |  |  |  |  |  | 
| 572 |  |  |  |  |  |  | Takes no arguments. Returns the number of tokens, expressions, previous | 
| 573 |  |  |  |  |  |  | token links and next token links. | 
| 574 |  |  |  |  |  |  |  | 
| 575 |  |  |  |  |  |  | =head1 SUPPORT | 
| 576 |  |  |  |  |  |  |  | 
| 577 |  |  |  |  |  |  | You can join the IRC channel I<#hailo> on FreeNode if you have questions. | 
| 578 |  |  |  |  |  |  |  | 
| 579 |  |  |  |  |  |  | =head1 BUGS | 
| 580 |  |  |  |  |  |  |  | 
| 581 |  |  |  |  |  |  | Bugs, feature requests and other issues are tracked in L | 
| 582 |  |  |  |  |  |  | rt.cpan.org|https://rt.cpan.org/Dist/Display.html?Name=Hailo> | 
| 583 |  |  |  |  |  |  |  | 
| 584 |  |  |  |  |  |  | =head1 SEE ALSO | 
| 585 |  |  |  |  |  |  |  | 
| 586 |  |  |  |  |  |  | =over | 
| 587 |  |  |  |  |  |  |  | 
| 588 |  |  |  |  |  |  | =item * L - A non-blocking POE wrapper around Hailo | 
| 589 |  |  |  |  |  |  |  | 
| 590 |  |  |  |  |  |  | =item * L - A Hailo IRC bot plugin | 
| 591 |  |  |  |  |  |  |  | 
| 592 |  |  |  |  |  |  | =item * L - Failo, an IRC bot that uses Hailo | 
| 593 |  |  |  |  |  |  |  | 
| 594 |  |  |  |  |  |  | =item * L - GumbyBRAIN, a more famous IRC bot that uses Hailo | 
| 595 |  |  |  |  |  |  |  | 
| 596 |  |  |  |  |  |  | =item * L - A L and jQuery powered web | 
| 597 |  |  |  |  |  |  | interface to Hailo available at L | 
| 598 |  |  |  |  |  |  | and as L on | 
| 599 |  |  |  |  |  |  | L | 
| 600 |  |  |  |  |  |  |  | 
| 601 |  |  |  |  |  |  | =item * L, a random tweet generator powered by Hailo | 
| 602 |  |  |  |  |  |  |  | 
| 603 |  |  |  |  |  |  | =item * L - cobe, a Python port of MegaHAL "inspired by the success of Hailo" | 
| 604 |  |  |  |  |  |  |  | 
| 605 |  |  |  |  |  |  | =back | 
| 606 |  |  |  |  |  |  |  | 
| 607 |  |  |  |  |  |  | =head1 LINKS | 
| 608 |  |  |  |  |  |  |  | 
| 609 |  |  |  |  |  |  | =over | 
| 610 |  |  |  |  |  |  |  | 
| 611 |  |  |  |  |  |  | =item * L - Hailo's website | 
| 612 |  |  |  |  |  |  |  | 
| 613 |  |  |  |  |  |  | =item * L - Hailo: A Perl rewrite of | 
| 614 |  |  |  |  |  |  | MegaHAL, A blog posting about the motivation behind Hailo | 
| 615 |  |  |  |  |  |  |  | 
| 616 |  |  |  |  |  |  | =item * L - | 
| 617 |  |  |  |  |  |  | More blog posts about Hailo on Evar ArnfjErE | 
| 618 |  |  |  |  |  |  | Bjarmason's L blog | 
| 619 |  |  |  |  |  |  |  | 
| 620 |  |  |  |  |  |  | =item * Hailo on L and | 
| 621 |  |  |  |  |  |  | L | 
| 622 |  |  |  |  |  |  |  | 
| 623 |  |  |  |  |  |  | =back | 
| 624 |  |  |  |  |  |  |  | 
| 625 |  |  |  |  |  |  | =head1 AUTHORS | 
| 626 |  |  |  |  |  |  |  | 
| 627 |  |  |  |  |  |  | Hinrik Ern SigurEsson, hinrik.sig@gmail.com | 
| 628 |  |  |  |  |  |  |  | 
| 629 |  |  |  |  |  |  | Evar ArnfjErE Bjarmason | 
| 630 |  |  |  |  |  |  |  | 
| 631 |  |  |  |  |  |  | =head1 LICENSE AND COPYRIGHT | 
| 632 |  |  |  |  |  |  |  | 
| 633 |  |  |  |  |  |  | Copyright 2010 Hinrik Ern SigurEsson and | 
| 634 |  |  |  |  |  |  | Evar ArnfjErE Bjarmason | 
| 635 |  |  |  |  |  |  |  | 
| 636 |  |  |  |  |  |  | This program is free software, you can redistribute it and/or modify | 
| 637 |  |  |  |  |  |  | it under the same terms as Perl itself. | 
| 638 |  |  |  |  |  |  |  | 
| 639 |  |  |  |  |  |  | =cut |