File Coverage

blib/lib/Chatbot/Eliza.pm
Criterion Covered Total %
statement 132 232 56.9
branch 35 86 40.7
condition 11 30 36.6
subroutine 10 14 71.4
pod 6 6 100.0
total 194 368 52.7


line stmt bran cond sub pod time code
1             ###################################################################
2              
3             package Chatbot::Eliza;
4             $Chatbot::Eliza::VERSION = '1.06';
5             # Copyright (c) 1997-2003 John Nolan. All rights reserved.
6             # This program is free software. You may modify and/or
7             # distribute it under the same terms as Perl itself.
8             # This copyright notice must remain attached to the file.
9             #
10             # You can run this file through either pod2man or pod2html
11             # to produce pretty documentation in manual or html file format
12             # (these utilities are part of the Perl 5 distribution).
13             #
14             # POD documentation is distributed throughout the actual code
15             # so that it also functions as comments.
16              
17             require 5.006;
18 2     2   1837 use strict;
  2         6  
  2         57  
19 2     2   10 use warnings;
  2         4  
  2         56  
20 2     2   17 use Carp;
  2         3  
  2         6747  
21              
22             our $AUTOLOAD;
23              
24              
25              
26             ####################################################################
27             # ---{ B E G I N P O D D O C U M E N T A T I O N }--------------
28             #
29              
30             =head1 NAME
31              
32             B - A clone of the classic Eliza program
33              
34             =head1 SYNOPSIS
35              
36             use Chatbot::Eliza;
37              
38             $mybot = new Chatbot::Eliza;
39             $mybot->command_interface;
40              
41             # see below for details
42              
43              
44             =head1 DESCRIPTION
45              
46             This module implements the classic Eliza algorithm.
47             The original Eliza program was written by Joseph
48             Weizenbaum and described in the Communications
49             of the ACM in 1966. Eliza is a mock Rogerian
50             psychotherapist. It prompts for user input,
51             and uses a simple transformation algorithm
52             to change user input into a follow-up question.
53             The program is designed to give the appearance
54             of understanding.
55              
56             This program is a faithful implementation of the program
57             described by Weizenbaum. It uses a simplified script
58             language (devised by Charles Hayden). The content
59             of the script is the same as Weizenbaum's.
60              
61             This module encapsulates the Eliza algorithm
62             in the form of an object. This should make
63             the functionality easy to incorporate in larger programs.
64              
65              
66             =head1 INSTALLATION
67              
68             The current version of Chatbot::Eliza.pm is available on CPAN:
69              
70             http://www.perl.com/CPAN/modules/by-module/Chatbot/
71              
72             To install this package, just change to the directory which
73             you created by untarring the package, and type the following:
74              
75             perl Makefile.PL
76             make test
77             make
78             make install
79              
80             This will copy Eliza.pm to your perl library directory for
81             use by all perl scripts. You probably must be root to do this,
82             unless you have installed a personal copy of perl.
83              
84              
85             =head1 USAGE
86              
87             This is all you need to do to launch a simple
88             Eliza session:
89              
90             use Chatbot::Eliza;
91              
92             $mybot = new Chatbot::Eliza;
93             $mybot->command_interface;
94              
95             You can also customize certain features of the
96             session:
97              
98             $myotherbot = new Chatbot::Eliza;
99              
100             $myotherbot->name( "Hortense" );
101             $myotherbot->debug( 1 );
102              
103             $myotherbot->command_interface;
104              
105             These lines set the name of the bot to be
106             "Hortense" and turn on the debugging output.
107              
108             When creating an Eliza object, you can specify
109             a name and an alternative scriptfile:
110              
111             $bot = new Chatbot::Eliza "Brian", "myscript.txt";
112              
113             You can also use an anonymous hash to set these parameters.
114             Any of the fields can be initialized using this syntax:
115              
116             $bot = new Chatbot::Eliza {
117             name => "Brian",
118             scriptfile => "myscript.txt",
119             debug => 1,
120             prompts_on => 1,
121             memory_on => 0,
122             myrand =>
123             sub { my $N = defined $_[0] ? $_[0] : 1; rand($N); },
124             };
125              
126             If you don't specify a script file, then the new object will be
127             initialized with a default script. The module contains this
128             script within itself.
129              
130             You can use any of the internal functions in
131             a calling program. The code below takes an
132             arbitrary string and retrieves the reply from
133             the Eliza object:
134              
135             my $string = "I have too many problems.";
136             my $reply = $mybot->transform( $string );
137              
138             You can easily create two bots, each with a different
139             script, and see how they interact:
140              
141             use Chatbot::Eliza
142              
143             my ($harry, $sally, $he_says, $she_says);
144              
145             $sally = new Chatbot::Eliza "Sally", "histext.txt";
146             $harry = new Chatbot::Eliza "Harry", "hertext.txt";
147              
148             $he_says = "I am sad.";
149              
150             # Seed the random number generator.
151             srand( time ^ ($$ + ($$ << 15)) );
152              
153             while (1) {
154             $she_says = $sally->transform( $he_says );
155             print $sally->name, ": $she_says \n";
156              
157             $he_says = $harry->transform( $she_says );
158             print $harry->name, ": $he_says \n";
159             }
160              
161             Mechanically, this works well. However, it critically depends
162             on the actual script data. Having two mock Rogerian therapists
163             talk to each other usually does not produce any sensible conversation,
164             of course.
165              
166             After each call to the transform() method, the debugging output
167             for that transformation is stored in a variable called $debug_text.
168              
169             my $reply = $mybot->transform( "My foot hurts" );
170             my $debugging = $mybot->debug_text;
171              
172             This feature always available, even if the instance's $debug
173             variable is set to 0.
174              
175             Calling programs can specify their own random-number generators.
176             Use this syntax:
177              
178             $chatbot = new Chatbot::Eliza;
179             $chatbot->myrand(
180             sub {
181             #function goes here!
182             }
183             );
184              
185             The custom random function should have the same prototype
186             as perl's built-in rand() function. That is, it should take
187             a single (numeric) expression as a parameter, and it should
188             return a floating-point value between 0 and that number.
189              
190             What this code actually does is pass a reference to an anonymous
191             subroutine ("code reference"). Make sure you've read the perlref
192             manpage for details on how code references actually work.
193              
194             If you don't specify any custom rand function, then the Eliza
195             object will just use the built-in rand() function.
196              
197             =head1 MAIN DATA MEMBERS
198              
199             Each Eliza object uses the following data structures
200             to hold the script data in memory:
201              
202             =head2 %decomplist
203              
204             I: the set of keywords; I: strings containing
205             the decomposition rules.
206              
207             =head2 %reasmblist
208              
209             I: a set of values which are each the join
210             of a keyword and a corresponding decomposition rule;
211             I: the set of possible reassembly statements
212             for that keyword and decomposition rule.
213              
214             =head2 %reasmblist_for_memory
215              
216             This structure is identical to C<%reasmblist>, except
217             that these rules are only invoked when a user comment
218             is being retrieved from memory. These contain comments
219             such as "Earlier you mentioned that...," which are only
220             appropriate for remembered comments. Rules in the script
221             must be specially marked in order to be included
222             in this list rather than C<%reasmblist>. The default
223             script only has a few of these rules.
224              
225             =head2 @memory
226              
227             A list of user comments which an Eliza instance is remembering
228             for future use. Eliza does not remember everything, only some things.
229             In this implementation, Eliza will only remember comments
230             which match a decomposition rule which actually has reassembly
231             rules that are marked with the keyword "reasm_for_memory"
232             rather than the normal "reasmb". The default script
233             only has a few of these.
234              
235             =head2 %keyranks
236              
237             I: the set of keywords; I: the ranks for each keyword
238              
239             =head2 @quit
240              
241             "quit" words -- that is, words the user might use
242             to try to exit the program.
243              
244             =head2 @initial
245              
246             Possible greetings for the beginning of the program.
247              
248             =head2 @final
249              
250             Possible farewells for the end of the program.
251              
252             =head2 %pre
253              
254             I: words which are replaced before any transformations;
255             I: the respective replacement words.
256              
257             =head2 %post
258              
259             I: words which are replaced after the transformations
260             and after the reply is constructed; I: the respective
261             replacement words.
262              
263             =head2 %synon
264              
265             I: words which are found in decomposition rules;
266             I: words which are treated just like their
267             corresponding synonyms during matching of decomposition
268             rules.
269              
270             =head2 Other data members
271              
272             There are several other internal data members. Hopefully
273             these are sufficiently obvious that you can learn about them
274             just by reading the source code.
275              
276             =cut
277              
278              
279             my %fields = (
280             name => 'Eliza',
281             scriptfile => '',
282              
283             debug => 0,
284             debug_text => '',
285             transform_text => '',
286             prompts_on => 1,
287             memory_on => 1,
288             botprompt => '',
289             userprompt => '',
290              
291             myrand =>
292             sub { my $N = defined $_[0] ? $_[0] : 1; rand($N); },
293              
294             keyranks => undef,
295             decomplist => undef,
296             reasmblist => undef,
297             reasmblist_for_memory => undef,
298              
299             pre => undef,
300             post => undef,
301             synon => undef,
302             initial => undef,
303             final => undef,
304             quit => undef,
305              
306             max_memory_size => 5,
307             likelihood_of_using_memory => 1,
308             memory => undef,
309             );
310              
311              
312             ####################################################################
313             # ---{ B E G I N M E T H O D S }----------------------------------
314             #
315              
316             =head1 METHODS
317              
318             =head2 new()
319              
320             my $chatterbot = new Chatbot::Eliza;
321              
322             new() creates a new Eliza object. This method
323             also calls the internal _initialize() method, which in turn
324             calls the parse_script_data() method, which initializes
325             the script data.
326              
327             my $chatterbot = new Chatbot::Eliza 'Ahmad', 'myfile.txt';
328              
329             The eliza object defaults to the name "Eliza", and it
330             contains default script data within itself. However,
331             using the syntax above, you can specify an alternative
332             name and an alternative script file.
333              
334             See the method parse_script_data(). for a description
335             of the format of the script file.
336              
337             =cut
338              
339             sub new {
340 1     1 1 189 my ($that,$name,$scriptfile) = @_;
341 1   33     13 my $class = ref($that) || $that;
342 1         19 my $self = {
343             _permitted => \%fields,
344             %fields,
345             };
346 1         4 bless $self, $class;
347 1         4 $self->_initialize($name,$scriptfile);
348 1         2 return $self;
349             } # end method new
350              
351             sub _initialize {
352 1     1   2 my ($self,$param1,$param2) = @_;
353              
354 1 50 33     7 if (defined $param1 and ref $param1 eq "HASH") {
355              
356             # Allow the calling program to pass in intial parameters
357             # as an anonymous hash
358 0         0 map { $self->{$_} = $param1->{$_}; } keys %$param1;
  0         0  
359              
360 0         0 $self->parse_script_data( $self->{scriptfile} );
361              
362             } else {
363 1 50       12 $self->name($param1) if $param1;
364 1         4 $self->parse_script_data($param2);
365             }
366              
367             # Initialize the memory array ref at instantiation time,
368             # rather than at class definition time.
369             # (THANKS to Randal Schwartz and Robert Chin for fixing this bug.)
370             #
371 1         3 $self->{memory} = [];
372             }
373              
374             sub AUTOLOAD {
375 53     53   74 my $self = shift;
376 53   33     112 my $class = ref($self) || croak "$self is not an object : $!\n";
377 53         78 my $field = $AUTOLOAD;
378 53         158 $field =~ s/.*://; # Strip fully-qualified portion
379              
380 53 50       137 unless (exists $self->{"_permitted"}->{$field} ) {
381 0         0 croak "Can't access `$field' field in object of class $class : $!\n";
382             }
383              
384 53 100       97 if (@_) {
385 25         56 return $self->{$field} = shift;
386             } else {
387 28         166 return $self->{$field};
388             }
389             } # end method AUTOLOAD
390              
391              
392             ####################################################################
393             # --- command_interface ---
394              
395             =head2 command_interface()
396              
397             $chatterbot->command_interface;
398              
399             command_interface() opens an interactive session with
400             the Eliza object, just like the original Eliza program.
401              
402             If you want to design your own session format, then
403             you can write your own while loop and your own functions
404             for prompting for and reading user input, and use the
405             transform() method to generate Eliza's responses.
406             (I: you do not need to invoke preprocess()
407             and postprocess() directly, because these are invoked
408             from within the transform() method.)
409              
410             But if you're lazy and you want to skip all that,
411             then just use command_interface(). It's all done for you.
412              
413             During an interactive session invoked using command_interface(),
414             you can enter the word "debug" to toggle debug mode on and off.
415             You can also enter the keyword "memory" to invoke the _debug_memory()
416             method and print out the contents of the Eliza instance's memory.
417              
418             =cut
419              
420             sub command_interface {
421 0     0 1 0 my $self = shift;
422 0         0 my ($user_input, $previous_user_input, $reply);
423              
424 0         0 $user_input = "";
425              
426 0         0 $self->botprompt($self->name . ":\t"); # Eliza's prompt
427 0         0 $self->userprompt("you:\t"); # User's prompt
428              
429             # Seed the random number generator.
430 0         0 srand( time() ^ ($$ + ($$ << 15)) );
431              
432             # Print the Eliza prompt
433 0 0       0 print $self->botprompt if $self->prompts_on;
434              
435             # Print an initial greeting
436 0         0 print "$self->{initial}->[ int &{$self->{myrand}}( scalar @{ $self->{initial} } ) ]\n";
  0         0  
  0         0  
437              
438              
439             ###################################################################
440             # command loop. This loop should go on forever,
441             # until we explicity break out of it.
442             #
443 0         0 while (1) {
444              
445 0 0       0 print $self->userprompt if $self->prompts_on;
446              
447 0         0 $previous_user_input = $user_input;
448 0         0 chomp( $user_input = );
449              
450              
451             # If the user wants to quit,
452             # print out a farewell and quit.
453 0 0       0 if ($self->_testquit($user_input) ) {
454 0         0 $reply = "$self->{final}->[ int &{$self->{myrand}}( scalar @{$self->{final}} ) ]";
  0         0  
  0         0  
455 0 0       0 print $self->botprompt if $self->prompts_on;
456 0         0 print "$reply\n";
457 0         0 last;
458             }
459              
460             # If the user enters the word "debug",
461             # then toggle on/off this Eliza's debug output.
462 0 0       0 if ($user_input eq "debug") {
463 0         0 $self->debug( ! $self->debug );
464 0         0 $user_input = $previous_user_input;
465             }
466              
467             # If the user enters the word "memory",
468             # then use the _debug_memory method to dump out
469             # the current contents of Eliza's memory
470 0 0 0     0 if ($user_input eq "memory" or $user_input eq "debug memory") {
471 0         0 print $self->_debug_memory();
472 0         0 redo;
473             }
474              
475             # If the user enters the word "debug that",
476             # then dump out the debugging of the
477             # most recent call to transform.
478 0 0       0 if ($user_input eq "debug that") {
479 0         0 print $self->debug_text();
480 0         0 redo;
481             }
482              
483             # Invoke the transform method
484             # to generate a reply.
485 0         0 $reply = $self->transform( $user_input );
486              
487              
488             # Print out the debugging text if debugging is set to on.
489             # This variable should have been set by the transform method.
490 0 0       0 print $self->debug_text if $self->debug;
491              
492             # Print the actual reply
493 0 0       0 print $self->botprompt if $self->prompts_on;
494 0         0 print "$reply\n";
495              
496             } # End UI command loop.
497              
498              
499             } # End method command_interface
500              
501              
502             ####################################################################
503             # --- preprocess ---
504              
505             =head2 preprocess()
506              
507             $string = preprocess($string);
508              
509             preprocess() applies simple substitution rules to the input string.
510             Mostly this is to catch varieties in spelling, misspellings,
511             contractions and the like.
512              
513             preprocess() is called from within the transform() method.
514             It is applied to user-input text, BEFORE any processing,
515             and before a reassebly statement has been selected.
516              
517             It uses the array C<%pre>, which is created
518             during the parse of the script.
519              
520             =cut
521              
522             sub preprocess {
523 4     4 1 6 my ($self,$string) = @_;
524              
525 4         6 my ($i, @wordsout, @wordsin, $keyword);
526              
527 4         12 @wordsout = @wordsin = split / /, $string;
528              
529 4         13 WORD: for ($i = 0; $i < @wordsin; $i++) {
530 8         9 foreach $keyword (keys %{ $self->{pre} }) {
  8         34  
531 0 0       0 if ($wordsin[$i] =~ /\b$keyword\b/i ) {
532 0         0 ($wordsout[$i] = $wordsin[$i]) =~ s/$keyword/$self->{pre}->{$keyword}/ig;
533 0         0 next WORD;
534             }
535             }
536             }
537 4         13 return join ' ', @wordsout;
538             }
539              
540              
541             ####################################################################
542             # --- postprocess ---
543              
544             =head2 postprocess()
545              
546             $string = postprocess($string);
547              
548             postprocess() applies simple substitution rules to the
549             reassembly rule. This is where all the "I"'s and "you"'s
550             are exchanged. postprocess() is called from within the
551             transform() function.
552              
553             It uses the array C<%post>, created
554             during the parse of the script.
555              
556             =cut
557              
558             sub postprocess {
559 27     27 1 41 my ($self,$string) = @_;
560              
561 27         30 my ($i, @wordsout, @wordsin, $keyword);
562              
563 27         50 @wordsin = @wordsout = split (/ /, $string);
564              
565 27         72 WORD: for ($i = 0; $i < @wordsin; $i++) {
566 5         6 foreach $keyword (keys %{ $self->{post} }) {
  5         20  
567 0 0       0 if ($wordsin[$i] =~ /\b$keyword\b/i ) {
568 0         0 ($wordsout[$i] = $wordsin[$i]) =~ s/$keyword/$self->{post}->{$keyword}/ig;
569 0         0 next WORD;
570             }
571             }
572             }
573 27         78 return join ' ', @wordsout;
574             }
575              
576             ####################################################################
577             # --- _testquit ---
578              
579             =head2 _testquit()
580              
581             if ($self->_testquit($user_input) ) { ... }
582              
583             _testquit() detects words like "bye" and "quit" and returns
584             true if it finds one of them as the first word in the sentence.
585              
586             These words are listed in the script, under the keyword "quit".
587              
588             =cut
589              
590             sub _testquit {
591 0     0   0 my ($self,$string) = @_;
592              
593 0         0 my ($quitword, @wordsin);
594              
595 0         0 foreach $quitword (@{ $self->{quit} }) {
  0         0  
596 0 0       0 return 1 if ($string =~ /\b$quitword\b/i ) ;
597             }
598             }
599              
600              
601             ####################################################################
602             # --- _debug_memory ---
603              
604             =head2 _debug_memory()
605              
606             $self->_debug_memory()
607              
608             _debug_memory() is a special function which returns
609             the contents of Eliza's memory stack.
610              
611              
612             =cut
613              
614             sub _debug_memory {
615              
616 0     0   0 my ($self) = @_;
617              
618 0         0 my $string = "\t";
619 0         0 $string .= $#{ $self->memory } + 1;
  0         0  
620 0         0 $string .= " item(s) in memory stack:\n";
621              
622             # [THANKS to Roy Stephan for helping me adjust this bit]
623             #
624 0         0 foreach (@{ $self->memory } ) {
  0         0  
625              
626 0         0 my $line = $_;
627 0         0 $string .= sprintf "\t\t->$line\n" ;
628             };
629              
630 0         0 return $string;
631             }
632              
633             ####################################################################
634             # --- transform ---
635              
636             =head2 transform()
637              
638             $reply = $chatterbot->transform( $string, $use_memory );
639              
640             transform() applies transformation rules to the user input
641             string. It invokes preprocess(), does transformations,
642             then invokes postprocess(). It returns the tranformed
643             output string, called C<$reasmb>.
644              
645             The algorithm embedded in the transform() method has three main parts:
646              
647             =over
648              
649             =item 1
650              
651             Search the input string for a keyword.
652              
653             =item 2
654              
655             If we find a keyword, use the list of decomposition rules
656             for that keyword, and pattern-match the input string against
657             each rule.
658              
659             =item 3
660              
661             If the input string matches any of the decomposition rules,
662             then randomly select one of the reassembly rules for that
663             decomposition rule, and use it to construct the reply.
664              
665             =back
666              
667             transform() takes two parameters. The first is the string we want
668             to transform. The second is a flag which indicates where this sting
669             came from. If the flag is set, then the string has been pulled
670             from memory, and we should use reassembly rules appropriate
671             for that. If the flag is not set, then the string is the most
672             recent user input, and we can use the ordinary reassembly rules.
673              
674             The memory flag is only set when the transform() function is called
675             recursively. The mechanism for setting this parameter is
676             embedded in the transoform method itself. If the flag is set
677             inappropriately, it is ignored.
678              
679             =cut
680              
681             sub transform{
682 4     4 1 906 my ($self,$string,$use_memory) = @_;
683              
684             # Initialize the debugging text buffer.
685 4         19 $self->debug_text('');
686              
687 4 50       10 $self->debug_text(sprintf "\t[Pulling string \"$string\" from memory.]\n")
688             if $use_memory;
689              
690 4         5 my ($i, @string_parts, $string_part, $rank, $goto, $reasmb, $keyword,
691             $decomp, $this_decomp, $reasmbkey, @these_reasmbs,
692             @decomp_matches, $synonyms, $synonym_index);
693              
694             # Default to a really low rank.
695 4         5 $rank = -2;
696 4         4 $reasmb = "";
697 4         5 $goto = "";
698              
699             # First run the string through the preprocessor.
700 4         10 $string = $self->preprocess( $string );
701              
702             # Convert punctuation to periods. We will assume that commas
703             # and certain conjunctions separate distinct thoughts/sentences.
704 4         8 $string =~ s/[?!,]/./g;
705 4         7 $string =~ s/but/./g; # Yikes! This is English-specific.
706              
707             # Split the string by periods into an array
708 4         8 @string_parts = split /\./, $string ;
709              
710             # Examine each part of the input string in turn.
711 4         7 STRING_PARTS: foreach $string_part (@string_parts) {
712              
713             # Run through the whole list of keywords.
714 4         5 KEYWORD: foreach $keyword (keys %{ $self->{decomplist} }) {
  4         10  
715              
716             # Check to see if the input string contains a keyword
717             # which outranks any we have found previously
718             # (On first loop, rank is set to -2.)
719 12 100 66     181 if ( ($string_part =~ /\b$keyword\b/i or $keyword eq $goto)
      66        
720             and
721             $rank < $self->{keyranks}->{$keyword}
722             )
723             {
724             # If we find one, then set $rank to equal
725             # the rank of that keyword.
726 3         7 $rank = $self->{keyranks}->{$keyword};
727              
728 3         14 $self->debug_text($self->debug_text . sprintf "\t$rank> $keyword");
729              
730             # Now let's check all the decomposition rules for that keyword.
731 3         6 DECOMP: foreach $decomp (@{ $self->{decomplist}->{$keyword} }) {
  3         7  
732              
733             # Change '*' to '\b(.*)\b' in this decomposition rule,
734             # so we can use it for regular expressions. Later,
735             # we will want to isolate individual matches to each wildcard.
736 3         14 ($this_decomp = $decomp) =~ s/\s*\*\s*/\\b\(\.\*\)\\b/g;
737              
738             # If this docomposition rule contains a word which begins with '@',
739             # then the script also contained some synonyms for that word.
740             # Find them all using %synon and generate a regular expression
741             # containing all of them.
742 3 50       14 if ($this_decomp =~ /\@/ ) {
743 0         0 ($synonym_index = $this_decomp) =~ s/.*\@(\w*).*/$1/i ;
744 0         0 $synonyms = join ('|', @{ $self->{synon}->{$synonym_index} });
  0         0  
745 0         0 $this_decomp =~ s/(.*)\@$synonym_index(.*)/$1($synonym_index\|$synonyms)$2/g;
746             }
747              
748 3         12 $self->debug_text($self->debug_text . sprintf "\n\t\t: $decomp");
749              
750             # Using the regular expression we just generated,
751             # match against the input string. Use empty "()"'s to
752             # eliminate warnings about uninitialized variables.
753 3 50       57 if ($string_part =~ /$this_decomp()()()()()()()()()()/i) {
754              
755             # If this decomp rule matched the string,
756             # then create an array, so that we can refer to matches
757             # to individual wildcards. Use '0' as a placeholder
758             # (we don't want to refer to any "zeroth" wildcard).
759 3         25 @decomp_matches = ("0", $1, $2, $3, $4, $5, $6, $7, $8, $9);
760 3         13 $self->debug_text($self->debug_text . sprintf " : @decomp_matches\n");
761              
762             # Using the keyword and the decomposition rule,
763             # reconstruct a key for the list of reassamble rules.
764 3         7 $reasmbkey = join ($;,$keyword,$decomp);
765              
766             # Get the list of possible reassembly rules for this key.
767             #
768 3 50 33     11 if (defined $use_memory and $#{ $self->{reasmblist_for_memory}->{$reasmbkey} } >= 0) {
  0         0  
769              
770             # If this transform function was invoked with the memory flag,
771             # and there are in fact reassembly rules which are appropriate
772             # for pulling out of memory, then include them.
773 0         0 @these_reasmbs = @{ $self->{reasmblist_for_memory}->{$reasmbkey} }
  0         0  
774              
775             } else {
776              
777             # Otherwise, just use the plain reassembly rules.
778             # (This is what normally happens.)
779 3         4 @these_reasmbs = @{ $self->{reasmblist}->{$reasmbkey} }
  3         9  
780             }
781              
782             # Pick out a reassembly rule at random.
783 3         5 $reasmb = $these_reasmbs[ int &{$self->{myrand}}( scalar @these_reasmbs ) ];
  3         7  
784              
785 3         14 $self->debug_text($self->debug_text . sprintf "\t\t--> $reasmb\n");
786              
787             # If the reassembly rule we picked contains the word "goto",
788             # then we start over with a new keyword. Set $keyword to equal
789             # that word, and start the whole loop over.
790 3 50       10 if ($reasmb =~ m/^goto\s(\w*).*/i) {
791 0         0 $self->debug_text($self->debug_text . sprintf "\$1 = $1\n");
792 0         0 $goto = $keyword = $1;
793 0         0 $rank = -2;
794 0         0 redo KEYWORD;
795             }
796              
797             # Otherwise, using the matches to wildcards which we stored above,
798             # insert words from the input string back into the reassembly rule.
799             # [THANKS to Gidon Wise for submitting a bugfix here]
800 3         9 for ($i=1; $i <= $#decomp_matches; $i++) {
801 27         72 $decomp_matches[$i] = $self->postprocess( $decomp_matches[$i] );
802 27         91 $decomp_matches[$i] =~ s/([,;?!]|\.*)$//;
803 27         269 $reasmb =~ s/\($i\)/$decomp_matches[$i]/g;
804             }
805              
806             # Move on to the next keyword. If no other keywords match,
807             # then we'll end up actually using the $reasmb string
808             # we just generated above.
809 3         11 next KEYWORD ;
810              
811             } # End if ($string_part =~ /$this_decomp/i)
812              
813 0         0 $self->debug_text($self->debug_text . sprintf "\n");
814              
815             } # End DECOMP: foreach $decomp (@{ $self->{decomplist}->{$keyword} })
816              
817             } # End if ( ($string_part =~ /\b$keyword\b/i or $keyword eq $goto)
818              
819             } # End KEYWORD: foreach $keyword (keys %{ $self->{decomplist})
820            
821             } # End STRING_PARTS: foreach $string_part (@string_parts) {
822              
823             =head2 How memory is used
824              
825             In the script, some reassembly rules are special. They are marked with
826             the keyword "reasm_for_memory", rather than just "reasm".
827             Eliza "remembers" any comment when it matches a docomposition rule
828             for which there are any reassembly rules for memory.
829             An Eliza object remembers up to C<$max_memory_size> (default: 5)
830             user input strings.
831              
832             If, during a subsequent run, the transform() method fails to find any
833             appropriate decomposition rule for a user's comment, and if there are
834             any comments inside the memory array, then Eliza may elect to ignore
835             the most recent comment and instead pull out one of the strings from memory.
836             In this case, the transform method is called recursively with the memory flag.
837              
838             Honestly, I am not sure exactly how this memory functionality
839             was implemented in the original Eliza program. Hopefully
840             this implementation is not too far from Weizenbaum's.
841              
842             If you don't want to use the memory functionality at all,
843             then you can disable it:
844              
845             $mybot->memory_on(0);
846              
847             You can also achieve the same effect by making sure
848             that the script data does not contain any reassembly rules
849             marked with the keyword "reasm_for_memory". The default
850             script data only has 4 such items.
851              
852             =cut
853              
854 4 100       21 if ($reasmb eq "") {
    50          
855              
856             # If all else fails, call this method recursively
857             # and make sure that it has something to parse.
858             # Use a string from memory if anything is available.
859             #
860             # $self-likelihood_of_using_memory should be some number
861             # between 1 and 0; it defaults to 1.
862             #
863 1 50 33     2 if (
864 1         5 $#{ $self->memory } >= 0
865             and
866 0         0 &{$self->{myrand}}(1) >= 1 - $self->likelihood_of_using_memory
867             ) {
868              
869 0         0 $reasmb = $self->transform( shift @{ $self->memory }, "use memory" );
  0         0  
870              
871             } else {
872 1         8 $reasmb = $self->transform("xnone");
873             }
874              
875             } elsif ($self->memory_on) {
876              
877             # If memory is switched on, then we handle memory.
878              
879             # Now that we have successfully transformed this string,
880             # push it onto the end of the memory stack... unless, of course,
881             # that's where we got it from in the first place, or if the rank
882             # is not the kind we remember.
883             #
884 3 50 33     4 if (
885 3         14 $#{ $self->{reasmblist_for_memory}->{$reasmbkey} } >= 0
886             and
887             not defined $use_memory
888             ) {
889              
890 0         0 push @{ $self->memory },$string ;
  0         0  
891             }
892              
893             # Shift out the least-recent item from the bottom
894             # of the memory stack if the stack exceeds the max size.
895 3 50       4 shift @{ $self->memory } if $#{ $self->memory } >= $self->max_memory_size;
  0         0  
  3         20  
896              
897             $self->debug_text($self->debug_text
898 3         18 . sprintf("\t%d item(s) in memory.\n", $#{ $self->memory } + 1 ) ) ;
  3         17  
899              
900             } # End if ($reasmb eq "")
901              
902 4         10 $reasmb =~ tr/ / /s; # Eliminate any duplicate space characters.
903 4         8 $reasmb =~ s/[ ][?]$/?/; # Eliminate any spaces before the question mark.
904              
905             # Save the return string so that forgetful calling programs
906             # can ask the bot what the last reply was.
907 4         17 $self->transform_text($reasmb);
908              
909 4         17 return $reasmb ;
910             }
911              
912              
913             ####################################################################
914             # --- parse_script_data ---
915              
916             =head2 parse_script_data()
917              
918             $self->parse_script_data;
919             $self->parse_script_data( $script_file );
920              
921             parse_script_data() is invoked from the _initialize() method,
922             which is called from the new() function. However, you can also
923             call this method at any time against an already-instantiated
924             Eliza instance. In that case, the new script data is I
925             to the old script data. The old script data is not deleted.
926              
927             You can pass a parameter to this function, which is the name of the
928             script file, and it will read in and parse that file.
929             If you do not pass any parameter to this method, then
930             it will read the data embedded at the end of the module as its
931             default script data.
932              
933             If you pass the name of a script file to parse_script_data(),
934             and that file is not available for reading, then the module dies.
935              
936              
937             =head1 Format of the script file
938              
939             This module includes a default script file within itself,
940             so it is not necessary to explicitly specify a script file
941             when instantiating an Eliza object.
942              
943             Each line in the script file can specify a key,
944             a decomposition rule, or a reassembly rule.
945              
946             key: remember 5
947             decomp: * i remember *
948             reasmb: Do you often think of (2) ?
949             reasmb: Does thinking of (2) bring anything else to mind ?
950             decomp: * do you remember *
951             reasmb: Did you think I would forget (2) ?
952             reasmb: What about (2) ?
953             reasmb: goto what
954             pre: equivalent alike
955             synon: belief feel think believe wish
956              
957             The number after the key specifies the rank.
958             If a user's input contains the keyword, then
959             the transform() function will try to match
960             one of the decomposition rules for that keyword.
961             If one matches, then it will select one of
962             the reassembly rules at random. The number
963             (2) here means "use whatever set of words
964             matched the second asterisk in the decomposition
965             rule."
966              
967             If you specify a list of synonyms for a word,
968             the you should use a "@" when you use that
969             word in a decomposition rule:
970              
971             decomp: * i @belief i *
972             reasmb: Do you really think so ?
973             reasmb: But you are not sure you (3).
974              
975             Otherwise, the script will never check to see
976             if there are any synonyms for that keyword.
977              
978             Reassembly rules should be marked with I
979             rather than I when it is appropriate for use
980             when a user's comment has been extracted from memory.
981              
982             key: my 2
983             decomp: * my *
984             reasm_for_memory: Let's discuss further why your (2).
985             reasm_for_memory: Earlier you said your (2).
986             reasm_for_memory: But your (2).
987             reasm_for_memory: Does that have anything to do with the fact that your (2) ?
988              
989             =head1 How the script file is parsed
990              
991             Each line in the script file contains an "entrytype"
992             (key, decomp, synon) and an "entry", separated by
993             a colon. In turn, each "entry" can itself be
994             composed of a "key" and a "value", separated by
995             a space. The parse_script_data() function
996             parses each line out, and splits the "entry" and
997             "entrytype" portion of each line into two variables,
998             C<$entry> and C<$entrytype>.
999              
1000             Next, it uses the string C<$entrytype> to determine
1001             what sort of stuff to expect in the C<$entry> variable,
1002             if anything, and parses it accordingly. In some cases,
1003             there is no second level of key-value pair, so the function
1004             does not even bother to isolate or create C<$key> and C<$value>.
1005              
1006             C<$key> is always a single word. C<$value> can be null,
1007             or one single word, or a string composed of several words,
1008             or an array of words.
1009              
1010             Based on all these entries and keys and values,
1011             the function creates two giant hashes:
1012             C<%decomplist>, which holds the decomposition rules for
1013             each keyword, and C<%reasmblist>, which holds the
1014             reassembly phrases for each decomposition rule.
1015             It also creates C<%keyranks>, which holds the ranks for
1016             each key.
1017              
1018             Six other arrays are created: C<%reasm_for_memory, %pre, %post,
1019             %synon, @initial,> and C<@final>.
1020              
1021             =cut
1022              
1023             sub parse_script_data {
1024              
1025 1     1 1 2 my ($self,$scriptfile) = @_;
1026 1         2 my @scriptlines;
1027              
1028 1 50       3 if ($scriptfile) {
1029              
1030             # If we have an external script file, open it
1031             # and read it in (the whole thing, all at once).
1032 1 50       35 open (SCRIPTFILE, "<$scriptfile")
1033             or die "Could not read from file $scriptfile : $!\n";
1034 1         27 @scriptlines = ; # read in script data
1035 1         8 $self->scriptfile($scriptfile);
1036 1         7 close (SCRIPTFILE);
1037              
1038             } else {
1039              
1040             # Otherwise, read in the data from the bottom
1041             # of this file. This data might be read several
1042             # times, so we save the offset pointer and
1043             # reset it when we're done.
1044 0         0 my $where= tell(DATA);
1045 0         0 @scriptlines = ; # read in script data
1046 0         0 seek(DATA, $where, 0);
1047 0         0 $self->scriptfile('');
1048             }
1049              
1050 1         3 my ($entrytype, $entry, $key, $value) ;
1051 1         2 my $thiskey = "";
1052 1         2 my $thisdecomp = "";
1053              
1054             ############################################################
1055             # Examine each line of script data.
1056 1         3 for (@scriptlines) {
1057              
1058             # Skip comments and lines with only whitespace.
1059 9 50 33     53 next if (/^\s*#/ || /^\s*$/);
1060              
1061             # Split entrytype and entry, using a colon as the delimiter.
1062 9         43 ($entrytype, $entry) = $_ =~ m/^\s*(\S*)\s*:\s*(.*)\s*$/;
1063              
1064             # Case loop, based on the entrytype.
1065 9         18 for ($entrytype) {
1066              
1067 9 50       21 /quit/ and do { push @{ $self->{quit} }, $entry; last; };
  0         0  
  0         0  
  0         0  
1068 9 50       18 /initial/ and do { push @{ $self->{initial} }, $entry; last; };
  0         0  
  0         0  
  0         0  
1069 9 50       18 /final/ and do { push @{ $self->{final} }, $entry; last; };
  0         0  
  0         0  
  0         0  
1070              
1071 9 100       18 /decomp/ and do {
1072 3 50       10 die "$0: error parsing script: decomposition rule with no keyword.\n"
1073             if $thiskey eq "";
1074 3         7 $thisdecomp = join($;,$thiskey,$entry);
1075 3         4 push @{ $self->{decomplist}->{$thiskey} }, $entry ;
  3         10  
1076 3         5 last;
1077             };
1078              
1079 6 100       15 /reasmb/ and do {
1080 3 50       8 die "$0: error parsing script: reassembly rule with no decomposition rule.\n"
1081             if $thisdecomp eq "";
1082 3         4 push @{ $self->{reasmblist}->{$thisdecomp} }, $entry ;
  3         10  
1083 3         8 last;
1084             };
1085              
1086 3 50       7 /reasm_for_memory/ and do {
1087 0 0       0 die "$0: error parsing script: reassembly rule with no decomposition rule.\n"
1088             if $thisdecomp eq "";
1089 0         0 push @{ $self->{reasmblist_for_memory}->{$thisdecomp} }, $entry ;
  0         0  
1090 0         0 last;
1091             };
1092              
1093             # The entrytypes below actually expect to see a key and value
1094             # pair in the entry, so we split them out. The first word,
1095             # separated by a space, is the key, and everything else is
1096             # an array of values.
1097              
1098 3         10 ($key,$value) = $entry =~ m/^\s*(\S*)\s*(.*)/;
1099              
1100 3 50       10 /pre/ and do { $self->{pre}->{$key} = $value; last; };
  0         0  
  0         0  
1101 3 50       6 /post/ and do { $self->{post}->{$key} = $value; last; };
  0         0  
  0         0  
1102              
1103             # synon expects an array, so we split $value into an array, using " " as delimiter.
1104 3 50       7 /synon/ and do { $self->{synon}->{$key} = [ split /\ /, $value ]; last; };
  0         0  
  0         0  
1105              
1106 3 50       8 /key/ and do {
1107 3         5 $thiskey = $key;
1108 3         5 $thisdecomp = "";
1109 3         16 $self->{keyranks}->{$thiskey} = $value ;
1110 3         22 last;
1111             };
1112            
1113             } # End for ($entrytype) (case loop)
1114              
1115             } # End for (@scriptlines)
1116              
1117             } # End of method parse_script_data
1118              
1119              
1120             # Eliminate some pesky warnings.
1121             #
1122       0     sub DESTROY {}
1123              
1124              
1125             # ---{ E N D M E T H O D S }----------------------------------
1126             ####################################################################
1127              
1128             1; # Return a true value.
1129              
1130              
1131             =head1 COPYRIGHT AND LICENSE
1132              
1133             This software is copyright (c) 2003 by John Nolan Ejpnolan@sonic.netE.
1134              
1135             This is free software; you can redistribute it and/or modify it under
1136             the same terms as the Perl 5 programming language system itself.
1137              
1138             =head1 AUTHOR
1139              
1140             John Nolan jpnolan@sonic.net January 2003.
1141              
1142             Implements the classic Eliza algorithm by Prof. Joseph Weizenbaum.
1143             Script format devised by Charles Hayden.
1144              
1145             =cut
1146              
1147              
1148              
1149             ####################################################################
1150             # ---{ B E G I N D E F A U L T S C R I P T D A T A }----------
1151             #
1152             # This script was prepared by Chris Hayden. Hayden's Eliza
1153             # program was written in Java, however, it attempted to match
1154             # the functionality of Weizenbaum's original program as closely
1155             # as possible.
1156             #
1157             # Hayden's script format was quite different from Weizenbaum's,
1158             # but it maintained the same content. I have adapted Hayden's
1159             # script format, since it was simple and convenient enough
1160             # for my purposes.
1161             #
1162             # I've made small modifications here and there.
1163             #
1164              
1165             # We use the token __DATA__ rather than __END__,
1166             # so that all this data is visible within the current package.
1167              
1168             __DATA__