File Coverage

blib/lib/Hailo/Engine/Default.pm

Criterion	Covered	Total	%
statement	188	231	81.3
branch	53	72	73.6
condition	7	11	63.6
subroutine	23	24	95.8
pod	0	5	0.0
total	271	343	79.0

line	stmt	bran	cond	sub	pod	time	code
1							package Hailo::Engine::Default;
2							our $AUTHORITY = 'cpan:AVAR';
3							$Hailo::Engine::Default::VERSION = '0.75';
4	23			23		18716	use v5.10.0;
	23					96
5	23			23		140	use Moose;
	23					46
	23					204
6	23			23		163939	use MooseX::Types::Moose ':all';
	23					53
	23					269
7	23			23		198041	use List::Util qw<min first shuffle>;
	23					50
	23					2433
8	23			23		7655	use List::MoreUtils qw<uniq>;
	23					150067
	23					198
9
10							with qw[ Hailo::Role::Arguments Hailo::Role::Engine ];
11
12							has repeat_limit => (
13							isa => Int,
14							is => 'rw',
15							lazy => 1,
16							default => sub {
17							my ($self) = @_;
18							my $order = $self->order;
19							return min(($order * 10), 50);
20							}
21							);
22
23							sub BUILD {
24	46			46	0	141	my ($self) = @_;
25
26							# This performance hack is here because in our tight loops calling
27							# $self->storage->sth->{...} is actually a significant part of the
28							# overall program execution time since we're doing two method
29							# calls and hash dereferences for each call to the database.
30
31	46					1382	my $sth = $self->storage->sth;
32	46					315	while (my ($k, $v) = each %$sth) {
33	1337					4564	$self->{"_sth_$k"} = $v;
34							}
35
36	46					1269	return;
37							}
38
39							## no critic (Subroutines::ProhibitExcessComplexity)
40							sub reply {
41	1216			1216	0	2032	my $self = shift;
42	1216		100			2895	my $tokens = shift // [];
43
44							# we will favor these tokens when making the reply. Shuffle them
45							# and discard half.
46	1216					1700	my @key_tokens = do {
47	1216					1769	my $i = 0;
48	1216					4252	grep { $i++ % 2 == 0 } shuffle(@$tokens);
	4508					10119
49							};
50
51	1216					3295	my $token_cache = $self->_resolve_input_tokens($tokens);
52	1216					4798	my @key_ids = keys %$token_cache;
53
54							# sort the rest by rareness
55	1216					3486	@key_ids = $self->_find_rare_tokens(\@key_ids, 2);
56
57							# get the middle expression
58	1216					2354	my $pivot_token_id = shift @key_ids;
59	1216					2908	my ($pivot_expr_id, @token_ids) = $self->_random_expr($pivot_token_id);
60	1216	100				2751	return unless defined $pivot_expr_id; # we don't know any expressions yet
61
62							# remove key tokens we're already using
63	1213			4764		2385	@key_ids = grep { my $used = $_; !first { $_ == $used } @token_ids } @key_ids;
	2406					3541
	2406					6573
	4764					10952
64
65	1213					1640	my %expr_cache;
66
67							# construct the end of the reply
68	1213					3907	$self->_construct_reply('next', $pivot_expr_id, \@token_ids, \%expr_cache, \@key_ids);
69
70							# construct the beginning of the reply
71	1213					3830	$self->_construct_reply('prev', $pivot_expr_id, \@token_ids, \%expr_cache, \@key_ids);
72
73							# translate token ids to token spacing/text
74							my @output = map {
75	1213		66			2673	$token_cache->{$_} // ($token_cache->{$_} = $self->_token_info($_))
	13448					39769
76							} @token_ids;
77	1213					12890	return \@output;
78							}
79
80							sub _resolve_input_tokens {
81	1216			1216		2296	my ($self, $tokens) = @_;
82	1216					1679	my %token_cache;
83
84	1216	100				2471	if (@$tokens == 1) {
85	913					1253	my ($spacing, $text) = @{ $tokens->[0] };
	913					2038
86	913					2097	my $token_info = $self->_token_resolve($spacing, $text);
87
88	913	100				2383	if (defined $token_info) {
89	779					1593	my ($id, $count) = @$token_info;
90	779					3106	$token_cache{$id} = [$spacing, $text, $count];
91							}
92							else {
93							# when there's just one token, it could be ';' for example,
94							# which will have normal spacing when it appears alone, but
95							# suffix spacing in a sentence like "those things; foo, bar",
96							# so we'll be a bit more lax here by also looking for any
97							# token that has the same text
98	134					338	$token_info = $self->_token_similar($text);
99	134	100				458	if (defined $token_info) {
100	118					294	my ($id, $spacing, $count) = @$token_info;
101	118					508	$token_cache{$id} = [$spacing, $text, $count];
102							}
103							}
104							}
105							else {
106	303					681	for my $token (@$tokens) {
107	3595					7701	my ($spacing, $text) = @$token;
108	3595					7107	my $token_info = $self->_token_resolve($spacing, $text);
109	3595	100				12915	next if !defined $token_info;
110	3591					7623	my ($id, $count) = @$token_info;
111	3591					17504	$token_cache{$id} = [$spacing, $text, $count];
112							}
113							}
114
115	1216					2876	return \%token_cache;
116							}
117
118							sub _token_resolve {
119	4508			4508		7457	my ($self, $spacing, $text) = @_;
120
121	4508					192429	$self->{_sth_token_resolve}->execute($spacing, $text);
122	4508					67365	return $self->{_sth_token_resolve}->fetchrow_arrayref;
123							}
124
125							sub _token_info {
126	9720			9720		17686	my ($self, $id) = @_;
127
128	9720					233613	$self->{_sth_token_info}->execute($id);
129	9720					97993	my @res = $self->{_sth_token_info}->fetchrow_array;
130	9720					51419	return \@res;
131							}
132
133							sub learn {
134	3560			3560	0	6947	my ($self, $tokens) = @_;
135	3560					101186	my $order = $self->order;
136
137							# only learn from inputs which are long enough
138	3560	100				8199	return if @$tokens < $order;
139
140	3284					5206	my (%token_cache, %expr_cache);
141
142							# resolve/add tokens and update their counter
143	3284					6453	for my $token (@$tokens) {
144	31725					90009	my $key = join '', @$token; # the key is "$spacing$text"
145	31725	100				73297	if (!exists $token_cache{$key}) {
146	27739					53060	$token_cache{$key} = $self->_token_id_add($token);
147							}
148	31725					341621	$self->{_sth_inc_token_count}->execute(1, $token_cache{$key});
149							}
150
151							# process every expression of length $order
152	3284					11432	for my $i (0 .. @$tokens - $order) {
153	28278					65545	my @expr = map { $token_cache{ join('', @{ $tokens->[$_] }) } } $i .. $i+$order-1;
	57168					80391
	57168					164500
154	28278					74103	my $key = join('_', @expr);
155
156	28278	100				64854	if (!defined $expr_cache{$key}) {
157	27053					60821	$expr_cache{$key} = $self->_expr_id_add(\@expr);
158							}
159	28278					54194	my $expr_id = $expr_cache{$key};
160
161							# add link to next token for this expression, if any
162	28278	100				61764	if ($i < @$tokens - $order) {
163	24994					35027	my $next_id = $token_cache{ join('', @{ $tokens->[$i+$order] }) };
	24994					66060
164	24994					56215	$self->_inc_link('next_token', $expr_id, $next_id, 1);
165							}
166
167							# add link to previous token for this expression, if any
168	28278	100				57826	if ($i > 0) {
169	24994					34650	my $prev_id = $token_cache{ join('', @{ $tokens->[$i-1] }) };
	24994					67684
170	24994					53832	$self->_inc_link('prev_token', $expr_id, $prev_id, 1);
171							}
172
173							# add links to boundary token if appropriate
174	28278					827016	my $b = $self->storage->_boundary_token_id;
175	28278	100				63542	$self->_inc_link('prev_token', $expr_id, $b, 1) if $i == 0;
176	28278	100				80467	$self->_inc_link('next_token', $expr_id, $b, 1) if $i == @$tokens-$order;
177							}
178
179	3284					18644	return;
180							}
181
182							sub learn_cached {
183	0			0	0	0	my ($self, $tokens) = @_;
184	0					0	my $order = $self->order;
185
186							# only learn from inputs which are long enough
187	0	0				0	return if @$tokens < $order;
188
189	0					0	my (%token_cache, %expr_cache);
190
191							# resolve/add tokens and update their counter
192	0					0	for my $token (@$tokens) {
193	0					0	my $key = join '', @$token; # the key is "$spacing$text"
194	0	0				0	if (!exists $token_cache{$key}) {
195	0					0	my $token_id = $self->_token_id_add($token);
196	0					0	$token_cache{$key} = $token_id;
197	0					0	$self->{_updates}{token_count}{$token_id}++;
198							}
199							}
200
201							# process every expression of length $order
202	0					0	for my $i (0 .. @$tokens - $order) {
203	0					0	my @expr = map { $token_cache{ join('', @{ $tokens->[$_] }) } } $i .. $i+$order-1;
	0					0
	0					0
204	0					0	my $key = join('_', @expr);
205
206	0	0				0	if (!defined $expr_cache{$key}) {
207	0					0	$expr_cache{$key} = $self->_expr_id_add(\@expr);
208							}
209	0					0	my $expr_id = $expr_cache{$key};
210
211							# add link to next token for this expression, if any
212	0	0				0	if ($i < @$tokens - $order) {
213	0					0	my $next_id = $token_cache{ join('', @{ $tokens->[$i+$order] }) };
	0					0
214	0					0	$self->{_updates}{next_token}{$expr_id}{$next_id}++;
215							}
216
217							# add link to previous token for this expression, if any
218	0	0				0	if ($i > 0) {
219	0					0	my $prev_id = $token_cache{ join('', @{ $tokens->[$i-1] }) };
	0					0
220	0					0	$self->{_updates}{prev_token}{$expr_id}{$prev_id}++;
221							}
222
223							# add links to boundary token if appropriate
224	0					0	my $b = $self->storage->_boundary_token_id;
225	0	0				0	$self->{_updates}{prev_token}{$expr_id}{$b}++ if $i == 0;
226	0	0				0	$self->{_updates}{next_token}{$expr_id}{$b}++ if $i == @$tokens-$order;
227							}
228
229	0					0	return;
230							}
231
232							sub flush_cache {
233	1926			1926	0	3636	my ($self) = @_;
234
235	1926					3253	my $updates = $self->{_updates};
236	1926	50				8233	return if !$updates;
237
238	0					0	while (my ($token_id, $count) = each %{ $updates->{token_count} }) {
	0					0
239	0					0	$self->{_sth_inc_token_count}->execute($count, $token_id);
240							}
241
242	0					0	while (my ($expr_id, $links) = each %{ $updates->{next_token} }) {
	0					0
243	0					0	while (my ($next_token_id, $count) = each %$links) {
244	0					0	$self->_inc_link('next_token', $expr_id, $next_token_id, $count);
245							}
246							}
247
248	0					0	while (my ($expr_id, $links) = each %{ $updates->{prev_token} }) {
	0					0
249	0					0	while (my ($prev_token_id, $count) = each %$links) {
250	0					0	$self->_inc_link('prev_token', $expr_id, $prev_token_id, $count);
251							}
252							}
253							}
254
255							# sort token ids based on how rare they are
256							sub _find_rare_tokens {
257	1216			1216		2274	my ($self, $token_ids, $min) = @_;
258	1216	100				3005	return unless @$token_ids;
259
260	1190					1633	my %links;
261	1190					2308	for my $id (@$token_ids) {
262	4246	50				11739	next if exists $links{$id};
263	4246					152323	$self->{_sth_token_count}->execute($id);
264	4246					54751	$links{$id} = $self->{_sth_token_count}->fetchrow_array;
265							}
266
267							# remove tokens which are too rare
268	1190					3368	my @ids = grep { $links{$_} >= $min } @$token_ids;
	4246					9225
269
270	1190					3337	@ids = sort { $links{$a} <=> $links{$b} } @ids;
	6702					9306
271
272	1190					4149	return @ids;
273							}
274
275							# increase the link weight between an expression and a token
276							sub _inc_link {
277	56556			56556		107333	my ($self, $type, $expr_id, $token_id, $count) = @_;
278
279	56556					721809	$self->{"_sth_${type}_inc"}->execute($count, $expr_id, $token_id);
280	56556	100				243958	if (!$self->{"_sth_${type}_inc"}->rows) {
281	43864					538560	$self->{"_sth_${type}_add"}->execute($expr_id, $token_id, $count);
282							}
283
284	56556					136103	return;
285							}
286
287							# look up/add an expression id based on tokens
288							sub _expr_id_add {
289	37982			37982		64949	my ($self, $token_ids) = @_;
290
291	37982					644213	$self->{_sth_expr_id}->execute(@$token_ids);
292	37982					228941	my $expr_id = $self->{_sth_expr_id}->fetchrow_array();
293	37982	100				126775	return $expr_id if defined $expr_id;
294
295	18029					285922	$self->{_sth_add_expr}->execute(@$token_ids);
296	18029					539663	return $self->storage->dbh->last_insert_id(undef, undef, "expr", undef);
297							}
298
299							# return token id if the token exists
300							sub _token_id {
301	27739			27739		39301	my ($self, $token_info) = @_;
302
303	27739					315385	$self->{_sth_token_id}->execute(@$token_info);
304	27739					121582	my $token_id = $self->{_sth_token_id}->fetchrow_array();
305
306	27739	100				63972	return unless defined $token_id;
307	19329					33136	return $token_id;
308							}
309
310							# get token id (adding the token if it doesn't exist)
311							sub _token_id_add {
312	27739			27739		47679	my ($self, $token_info) = @_;
313
314	27739					45931	my $token_id = $self->_token_id($token_info);
315	27739	100				55296	$token_id = $self->_add_token($token_info) unless defined $token_id;
316	27739					74465	return $token_id;
317							}
318
319							# return all tokens (regardless of spacing) that consist of this text
320							sub _token_similar {
321	134			134		298	my ($self, $token_text) = @_;
322	134					4357	$self->{_sth_token_similar}->execute($token_text);
323	134					1532	return $self->{_sth_token_similar}->fetchrow_arrayref;
324							}
325
326							# add a new token and return its id
327							sub _add_token {
328	8410			8410		13905	my ($self, $token_info) = @_;
329	8410					109996	$self->{_sth_add_token}->execute(@$token_info);
330	8410					257417	return $self->storage->dbh->last_insert_id(undef, undef, "token", undef);
331							}
332
333							# return a random expression containing the given token
334							sub _random_expr {
335	1216			1216		2403	my ($self, $token_id) = @_;
336
337	1216					1685	my $expr;
338
339	1216	100				2570	if (!defined $token_id) {
340	175					4441	$self->{_sth_random_expr}->execute();
341	175					1479	$expr = $self->{_sth_random_expr}->fetchrow_arrayref();
342							}
343							else {
344							# try the positions in a random order
345	1041					35349	for my $pos (shuffle 0 .. $self->order-1) {
346	1054					3037	my $column = "token${pos}_id";
347
348							# get a random expression which includes the token at this position
349	1054					56089	$self->{"_sth_expr_by_$column"}->execute($token_id);
350	1054					15671	$expr = $self->{"_sth_expr_by_$column"}->fetchrow_arrayref();
351	1054	100				4418	last if defined $expr;
352							}
353							}
354
355	1216	100				3235	return unless defined $expr;
356	1213					4128	return @$expr;
357							}
358
359							# return a new next/previous token
360							sub _pos_token {
361	13446			13446		25101	my ($self, $pos, $expr_id, $key_tokens) = @_;
362
363	13446					402955	$self->{"_sth_${pos}_token_get"}->execute($expr_id);
364	13446					179677	my $pos_tokens = $self->{"_sth_${pos}_token_get"}->fetchall_arrayref();
365
366	13446	50				38868	if (defined $key_tokens) {
367	13446					18269	for my $i (0 .. $#{ $key_tokens }) {
	13446					34563
368	27121					38915	my $want_id = $key_tokens->[$i];
369	27121					38600	my @ids = map { $_->[0] } @$pos_tokens;
	40110					67481
370	27121					37177	my $has_id = grep { $_ == $want_id } @ids;
	40110					77743
371	27121	100				57719	next unless $has_id;
372	1225					4409	return splice @$key_tokens, $i, 1;
373							}
374							}
375
376	12221					18255	my @novel_tokens;
377	12221					20020	for my $token (@$pos_tokens) {
378	17594					34739	push @novel_tokens, ($token->[0]) x $token->[1];
379							}
380	12221					40991	return $novel_tokens[rand @novel_tokens];
381							}
382
383							sub _construct_reply {
384	2426			2426		5618	my ($self, $what, $expr_id, $token_ids, $expr_cache, $key_ids) = @_;
385	2426					78903	my $order = $self->order;
386	2426					64566	my $repeat_limit = $self->repeat_limit;
387	2426					58998	my $boundary_token = $self->storage->_boundary_token_id;
388
389	2426					3856	my $i = 0;
390	2426					3342	while (1) {
391	13446	50	33			53673	if (($i % $order) == 0 and
			66
392							(($i >= $repeat_limit * 3) \|\|
393							($i >= $repeat_limit and uniq(@$token_ids) <= $order))) {
394	0					0	last;
395							}
396
397	13446					31687	my $id = $self->_pos_token($what, $expr_id, $key_ids);
398	13446	100				29889	last if $id == $boundary_token;
399
400	11020					13449	my @ids;
401	11020	100				23809	if ($what eq 'next') {
		50
402	4945					7429	push @$token_ids, $id;
403	4945					12580	@ids = @$token_ids[-$order..-1];
404							} elsif ($what eq 'prev') {
405	6075					11066	unshift @$token_ids, $id;
406	6075					14260	@ids = @$token_ids[0..$order-1];
407							} else {
408	0					0	die "PANIC: Internal Error: Don't know what the '$what' argument means";
409							}
410
411	11020					29787	my $key = join '_', @ids;
412	11020	100				25562	if (!defined $expr_cache->{$key}) {
413	10929					25982	$expr_cache->{$key} = $self->_expr_id_add(\@ids);
414							}
415	11020					26100	$expr_id = $expr_cache->{$key};
416							} continue {
417	11020					15893	$i++;
418							}
419
420	2426					4265	return;
421							}
422
423							__PACKAGE__->meta->make_immutable;
424
425							=encoding utf8
426
427							=head1 NAME
428
429							Hailo::Engine::Default - The default engine backend for L<Hailo\|Hailo>
430
431							=head1 DESCRIPTION
432
433							This backend implements the logic of replying to and learning from
434							input using the resources given to the L<engine
435							roles\|Hailo::Role::Engine>.
436
437							It generates the reply in one go, while favoring some of the tokens in the
438							input, and returns it. It is fast and the replies are decent, but you can
439							get better replies (at the cost of speed) with the
440							L<Scored\|Hailo::Engine::Scored> engine.
441
442							=head1 AUTHORS
443
444							Hinrik E<Ouml>rn SigurE<eth>sson, hinrik.sig@gmail.com
445
446							E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason <avar@cpan.org>
447
448							=head1 LICENSE AND COPYRIGHT
449
450							Copyright 2010 Hinrik E<Ouml>rn SigurE<eth>sson and
451							E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason <avar@cpan.org>
452
453							This program is free software, you can redistribute it and/or modify
454							it under the same terms as Perl itself.
455
456							=cut