line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
################################################################### |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package Chatbot::Eliza; |
4
|
|
|
|
|
|
|
$Chatbot::Eliza::VERSION = '1.08'; |
5
|
|
|
|
|
|
|
# Copyright (c) 1997-2003 John Nolan. All rights reserved. |
6
|
|
|
|
|
|
|
# This program is free software. You may modify and/or |
7
|
|
|
|
|
|
|
# distribute it under the same terms as Perl itself. |
8
|
|
|
|
|
|
|
# This copyright notice must remain attached to the file. |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You can run this file through either pod2man or pod2html |
11
|
|
|
|
|
|
|
# to produce pretty documentation in manual or html file format |
12
|
|
|
|
|
|
|
# (these utilities are part of the Perl 5 distribution). |
13
|
|
|
|
|
|
|
# |
14
|
|
|
|
|
|
|
# POD documentation is distributed throughout the actual code |
15
|
|
|
|
|
|
|
# so that it also functions as comments. |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
require 5.006; |
18
|
2
|
|
|
2
|
|
1444
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
57
|
|
19
|
2
|
|
|
2
|
|
10
|
use warnings; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
62
|
|
20
|
2
|
|
|
2
|
|
10
|
use Carp; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
6524
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
our $AUTOLOAD; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
#################################################################### |
27
|
|
|
|
|
|
|
# ---{ B E G I N P O D D O C U M E N T A T I O N }-------------- |
28
|
|
|
|
|
|
|
# |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 NAME |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
B - A clone of the classic Eliza program |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=head1 SYNOPSIS |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
use Chatbot::Eliza; |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
$mybot = new Chatbot::Eliza; |
39
|
|
|
|
|
|
|
$mybot->command_interface; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# see below for details |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=head1 DESCRIPTION |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
This module implements the classic Eliza algorithm. |
47
|
|
|
|
|
|
|
The original Eliza program was written by Joseph |
48
|
|
|
|
|
|
|
Weizenbaum and described in the Communications |
49
|
|
|
|
|
|
|
of the ACM in 1966. Eliza is a mock Rogerian |
50
|
|
|
|
|
|
|
psychotherapist. It prompts for user input, |
51
|
|
|
|
|
|
|
and uses a simple transformation algorithm |
52
|
|
|
|
|
|
|
to change user input into a follow-up question. |
53
|
|
|
|
|
|
|
The program is designed to give the appearance |
54
|
|
|
|
|
|
|
of understanding. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
This program is a faithful implementation of the program |
57
|
|
|
|
|
|
|
described by Weizenbaum. It uses a simplified script |
58
|
|
|
|
|
|
|
language (devised by Charles Hayden). The content |
59
|
|
|
|
|
|
|
of the script is the same as Weizenbaum's. |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
This module encapsulates the Eliza algorithm |
62
|
|
|
|
|
|
|
in the form of an object. This should make |
63
|
|
|
|
|
|
|
the functionality easy to incorporate in larger programs. |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=head1 INSTALLATION |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
The current version of Chatbot::Eliza.pm is available on CPAN: |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
http://www.perl.com/CPAN/modules/by-module/Chatbot/ |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
To install this package, just change to the directory which |
73
|
|
|
|
|
|
|
you created by untarring the package, and type the following: |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
perl Makefile.PL |
76
|
|
|
|
|
|
|
make test |
77
|
|
|
|
|
|
|
make |
78
|
|
|
|
|
|
|
make install |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
This will copy Eliza.pm to your perl library directory for |
81
|
|
|
|
|
|
|
use by all perl scripts. You probably must be root to do this, |
82
|
|
|
|
|
|
|
unless you have installed a personal copy of perl. |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=head1 USAGE |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
This is all you need to do to launch a simple |
88
|
|
|
|
|
|
|
Eliza session: |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
use Chatbot::Eliza; |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
$mybot = new Chatbot::Eliza; |
93
|
|
|
|
|
|
|
$mybot->command_interface; |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
You can also customize certain features of the |
96
|
|
|
|
|
|
|
session: |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
$myotherbot = new Chatbot::Eliza; |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
$myotherbot->name( "Hortense" ); |
101
|
|
|
|
|
|
|
$myotherbot->debug( 1 ); |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
$myotherbot->command_interface; |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
These lines set the name of the bot to be |
106
|
|
|
|
|
|
|
"Hortense" and turn on the debugging output. |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
When creating an Eliza object, you can specify |
109
|
|
|
|
|
|
|
a name and an alternative scriptfile: |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
$bot = new Chatbot::Eliza "Brian", "myscript.txt"; |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
You can also use an anonymous hash to set these parameters. |
114
|
|
|
|
|
|
|
Any of the fields can be initialized using this syntax: |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
$bot = new Chatbot::Eliza { |
117
|
|
|
|
|
|
|
name => "Brian", |
118
|
|
|
|
|
|
|
scriptfile => "myscript.txt", |
119
|
|
|
|
|
|
|
debug => 1, |
120
|
|
|
|
|
|
|
prompts_on => 1, |
121
|
|
|
|
|
|
|
memory_on => 0, |
122
|
|
|
|
|
|
|
myrand => |
123
|
|
|
|
|
|
|
sub { my $N = defined $_[0] ? $_[0] : 1; rand($N); }, |
124
|
|
|
|
|
|
|
}; |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
If you don't specify a script file, then the new object will be |
127
|
|
|
|
|
|
|
initialized with a default script. The module contains this |
128
|
|
|
|
|
|
|
script within itself. |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
You can use any of the internal functions in |
131
|
|
|
|
|
|
|
a calling program. The code below takes an |
132
|
|
|
|
|
|
|
arbitrary string and retrieves the reply from |
133
|
|
|
|
|
|
|
the Eliza object: |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
my $string = "I have too many problems."; |
136
|
|
|
|
|
|
|
my $reply = $mybot->transform( $string ); |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
You can easily create two bots, each with a different |
139
|
|
|
|
|
|
|
script, and see how they interact: |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
use Chatbot::Eliza |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
my ($harry, $sally, $he_says, $she_says); |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
$sally = new Chatbot::Eliza "Sally", "histext.txt"; |
146
|
|
|
|
|
|
|
$harry = new Chatbot::Eliza "Harry", "hertext.txt"; |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
$he_says = "I am sad."; |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
# Seed the random number generator. |
151
|
|
|
|
|
|
|
srand( time ^ ($$ + ($$ << 15)) ); |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
while (1) { |
154
|
|
|
|
|
|
|
$she_says = $sally->transform( $he_says ); |
155
|
|
|
|
|
|
|
print $sally->name, ": $she_says \n"; |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
$he_says = $harry->transform( $she_says ); |
158
|
|
|
|
|
|
|
print $harry->name, ": $he_says \n"; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
Mechanically, this works well. However, it critically depends |
162
|
|
|
|
|
|
|
on the actual script data. Having two mock Rogerian therapists |
163
|
|
|
|
|
|
|
talk to each other usually does not produce any sensible conversation, |
164
|
|
|
|
|
|
|
of course. |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
After each call to the transform() method, the debugging output |
167
|
|
|
|
|
|
|
for that transformation is stored in a variable called $debug_text. |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
my $reply = $mybot->transform( "My foot hurts" ); |
170
|
|
|
|
|
|
|
my $debugging = $mybot->debug_text; |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
This feature always available, even if the instance's $debug |
173
|
|
|
|
|
|
|
variable is set to 0. |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
Calling programs can specify their own random-number generators. |
176
|
|
|
|
|
|
|
Use this syntax: |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
$chatbot = new Chatbot::Eliza; |
179
|
|
|
|
|
|
|
$chatbot->myrand( |
180
|
|
|
|
|
|
|
sub { |
181
|
|
|
|
|
|
|
#function goes here! |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
); |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
The custom random function should have the same prototype |
186
|
|
|
|
|
|
|
as perl's built-in rand() function. That is, it should take |
187
|
|
|
|
|
|
|
a single (numeric) expression as a parameter, and it should |
188
|
|
|
|
|
|
|
return a floating-point value between 0 and that number. |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
What this code actually does is pass a reference to an anonymous |
191
|
|
|
|
|
|
|
subroutine ("code reference"). Make sure you've read the perlref |
192
|
|
|
|
|
|
|
manpage for details on how code references actually work. |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
If you don't specify any custom rand function, then the Eliza |
195
|
|
|
|
|
|
|
object will just use the built-in rand() function. |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=head1 MAIN DATA MEMBERS |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
Each Eliza object uses the following data structures |
200
|
|
|
|
|
|
|
to hold the script data in memory: |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=head2 %decomplist |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
I: the set of keywords; I: strings containing |
205
|
|
|
|
|
|
|
the decomposition rules. |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=head2 %reasmblist |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
I: a set of values which are each the join |
210
|
|
|
|
|
|
|
of a keyword and a corresponding decomposition rule; |
211
|
|
|
|
|
|
|
I: the set of possible reassembly statements |
212
|
|
|
|
|
|
|
for that keyword and decomposition rule. |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
=head2 %reasmblist_for_memory |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
This structure is identical to C<%reasmblist>, except |
217
|
|
|
|
|
|
|
that these rules are only invoked when a user comment |
218
|
|
|
|
|
|
|
is being retrieved from memory. These contain comments |
219
|
|
|
|
|
|
|
such as "Earlier you mentioned that...," which are only |
220
|
|
|
|
|
|
|
appropriate for remembered comments. Rules in the script |
221
|
|
|
|
|
|
|
must be specially marked in order to be included |
222
|
|
|
|
|
|
|
in this list rather than C<%reasmblist>. The default |
223
|
|
|
|
|
|
|
script only has a few of these rules. |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=head2 @memory |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
A list of user comments which an Eliza instance is remembering |
228
|
|
|
|
|
|
|
for future use. Eliza does not remember everything, only some things. |
229
|
|
|
|
|
|
|
In this implementation, Eliza will only remember comments |
230
|
|
|
|
|
|
|
which match a decomposition rule which actually has reassembly |
231
|
|
|
|
|
|
|
rules that are marked with the keyword "reasm_for_memory" |
232
|
|
|
|
|
|
|
rather than the normal "reasmb". The default script |
233
|
|
|
|
|
|
|
only has a few of these. |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
=head2 %keyranks |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
I: the set of keywords; I: the ranks for each keyword |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
=head2 @quit |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
"quit" words -- that is, words the user might use |
242
|
|
|
|
|
|
|
to try to exit the program. |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
=head2 @initial |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
Possible greetings for the beginning of the program. |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
=head2 @final |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
Possible farewells for the end of the program. |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=head2 %pre |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
I: words which are replaced before any transformations; |
255
|
|
|
|
|
|
|
I: the respective replacement words. |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
=head2 %post |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
I: words which are replaced after the transformations |
260
|
|
|
|
|
|
|
and after the reply is constructed; I: the respective |
261
|
|
|
|
|
|
|
replacement words. |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
=head2 %synon |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
I: words which are found in decomposition rules; |
266
|
|
|
|
|
|
|
I: words which are treated just like their |
267
|
|
|
|
|
|
|
corresponding synonyms during matching of decomposition |
268
|
|
|
|
|
|
|
rules. |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=head2 Other data members |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
There are several other internal data members. Hopefully |
273
|
|
|
|
|
|
|
these are sufficiently obvious that you can learn about them |
274
|
|
|
|
|
|
|
just by reading the source code. |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
=cut |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
my %fields = ( |
280
|
|
|
|
|
|
|
name => 'Eliza', |
281
|
|
|
|
|
|
|
scriptfile => '', |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
debug => 0, |
284
|
|
|
|
|
|
|
debug_text => '', |
285
|
|
|
|
|
|
|
transform_text => '', |
286
|
|
|
|
|
|
|
prompts_on => 1, |
287
|
|
|
|
|
|
|
memory_on => 1, |
288
|
|
|
|
|
|
|
botprompt => '', |
289
|
|
|
|
|
|
|
userprompt => '', |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
myrand => |
292
|
|
|
|
|
|
|
sub { my $N = defined $_[0] ? $_[0] : 1; rand($N); }, |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
keyranks => undef, |
295
|
|
|
|
|
|
|
decomplist => undef, |
296
|
|
|
|
|
|
|
reasmblist => undef, |
297
|
|
|
|
|
|
|
reasmblist_for_memory => undef, |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
pre => undef, |
300
|
|
|
|
|
|
|
post => undef, |
301
|
|
|
|
|
|
|
synon => undef, |
302
|
|
|
|
|
|
|
initial => undef, |
303
|
|
|
|
|
|
|
final => undef, |
304
|
|
|
|
|
|
|
quit => undef, |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
max_memory_size => 5, |
307
|
|
|
|
|
|
|
likelihood_of_using_memory => 1, |
308
|
|
|
|
|
|
|
memory => undef, |
309
|
|
|
|
|
|
|
); |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
#################################################################### |
313
|
|
|
|
|
|
|
# ---{ B E G I N M E T H O D S }---------------------------------- |
314
|
|
|
|
|
|
|
# |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
=head1 METHODS |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=head2 new() |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
my $chatterbot = new Chatbot::Eliza; |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
new() creates a new Eliza object. This method |
323
|
|
|
|
|
|
|
also calls the internal _initialize() method, which in turn |
324
|
|
|
|
|
|
|
calls the parse_script_data() method, which initializes |
325
|
|
|
|
|
|
|
the script data. |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
my $chatterbot = new Chatbot::Eliza 'Ahmad', 'myfile.txt'; |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
The eliza object defaults to the name "Eliza", and it |
330
|
|
|
|
|
|
|
contains default script data within itself. However, |
331
|
|
|
|
|
|
|
using the syntax above, you can specify an alternative |
332
|
|
|
|
|
|
|
name and an alternative script file. |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
See the method parse_script_data(). for a description |
335
|
|
|
|
|
|
|
of the format of the script file. |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=cut |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
sub new { |
340
|
1
|
|
|
1
|
1
|
523
|
my ($that,$name,$scriptfile) = @_; |
341
|
1
|
|
33
|
|
|
8
|
my $class = ref($that) || $that; |
342
|
1
|
|
|
|
|
21
|
my $self = { |
343
|
|
|
|
|
|
|
_permitted => \%fields, |
344
|
|
|
|
|
|
|
%fields, |
345
|
|
|
|
|
|
|
}; |
346
|
1
|
|
|
|
|
3
|
bless $self, $class; |
347
|
1
|
|
|
|
|
5
|
$self->_initialize($name,$scriptfile); |
348
|
1
|
|
|
|
|
5
|
return $self; |
349
|
|
|
|
|
|
|
} # end method new |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
sub _initialize { |
352
|
1
|
|
|
1
|
|
3
|
my ($self,$param1,$param2) = @_; |
353
|
|
|
|
|
|
|
|
354
|
1
|
50
|
33
|
|
|
8
|
if (defined $param1 and ref $param1 eq "HASH") { |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
# Allow the calling program to pass in intial parameters |
357
|
|
|
|
|
|
|
# as an anonymous hash |
358
|
0
|
|
|
|
|
0
|
map { $self->{$_} = $param1->{$_}; } keys %$param1; |
|
0
|
|
|
|
|
0
|
|
359
|
|
|
|
|
|
|
|
360
|
0
|
|
|
|
|
0
|
$self->parse_script_data( $self->{scriptfile} ); |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
} else { |
363
|
1
|
50
|
|
|
|
12
|
$self->name($param1) if $param1; |
364
|
1
|
|
|
|
|
5
|
$self->parse_script_data($param2); |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
# Initialize the memory array ref at instantiation time, |
368
|
|
|
|
|
|
|
# rather than at class definition time. |
369
|
|
|
|
|
|
|
# (THANKS to Randal Schwartz and Robert Chin for fixing this bug.) |
370
|
|
|
|
|
|
|
# |
371
|
1
|
|
|
|
|
5
|
$self->{memory} = []; |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
sub AUTOLOAD { |
375
|
53
|
|
|
53
|
|
123
|
my $self = shift; |
376
|
53
|
|
33
|
|
|
167
|
my $class = ref($self) || croak "$self is not an object : $!\n"; |
377
|
53
|
|
|
|
|
108
|
my $field = $AUTOLOAD; |
378
|
53
|
|
|
|
|
270
|
$field =~ s/.*://; # Strip fully-qualified portion |
379
|
|
|
|
|
|
|
|
380
|
53
|
50
|
|
|
|
220
|
unless (exists $self->{"_permitted"}->{$field} ) { |
381
|
0
|
|
|
|
|
0
|
croak "Can't access `$field' field in object of class $class : $!\n"; |
382
|
|
|
|
|
|
|
} |
383
|
|
|
|
|
|
|
|
384
|
53
|
100
|
|
|
|
178
|
if (@_) { |
385
|
25
|
|
|
|
|
79
|
return $self->{$field} = shift; |
386
|
|
|
|
|
|
|
} else { |
387
|
28
|
|
|
|
|
239
|
return $self->{$field}; |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
} # end method AUTOLOAD |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
#################################################################### |
393
|
|
|
|
|
|
|
# --- command_interface --- |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
=head2 command_interface() |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
$chatterbot->command_interface; |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
command_interface() opens an interactive session with |
400
|
|
|
|
|
|
|
the Eliza object, just like the original Eliza program. |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
If you want to design your own session format, then |
403
|
|
|
|
|
|
|
you can write your own while loop and your own functions |
404
|
|
|
|
|
|
|
for prompting for and reading user input, and use the |
405
|
|
|
|
|
|
|
transform() method to generate Eliza's responses. |
406
|
|
|
|
|
|
|
(I: you do not need to invoke preprocess() |
407
|
|
|
|
|
|
|
and postprocess() directly, because these are invoked |
408
|
|
|
|
|
|
|
from within the transform() method.) |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
But if you're lazy and you want to skip all that, |
411
|
|
|
|
|
|
|
then just use command_interface(). It's all done for you. |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
During an interactive session invoked using command_interface(), |
414
|
|
|
|
|
|
|
you can enter the word "debug" to toggle debug mode on and off. |
415
|
|
|
|
|
|
|
You can also enter the keyword "memory" to invoke the _debug_memory() |
416
|
|
|
|
|
|
|
method and print out the contents of the Eliza instance's memory. |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
=cut |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
sub command_interface { |
421
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
422
|
0
|
|
|
|
|
0
|
my ($user_input, $previous_user_input, $reply); |
423
|
|
|
|
|
|
|
|
424
|
0
|
|
|
|
|
0
|
$user_input = ""; |
425
|
|
|
|
|
|
|
|
426
|
0
|
|
|
|
|
0
|
$self->botprompt($self->name . ":\t"); # Eliza's prompt |
427
|
0
|
|
|
|
|
0
|
$self->userprompt("you:\t"); # User's prompt |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
# Seed the random number generator. |
430
|
0
|
|
|
|
|
0
|
srand( time() ^ ($$ + ($$ << 15)) ); |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
# Print the Eliza prompt |
433
|
0
|
0
|
|
|
|
0
|
print $self->botprompt if $self->prompts_on; |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
# Print an initial greeting |
436
|
0
|
|
|
|
|
0
|
print "$self->{initial}->[ int &{$self->{myrand}}( scalar @{ $self->{initial} } ) ]\n"; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
################################################################### |
440
|
|
|
|
|
|
|
# command loop. This loop should go on forever, |
441
|
|
|
|
|
|
|
# until we explicity break out of it. |
442
|
|
|
|
|
|
|
# |
443
|
0
|
|
|
|
|
0
|
while (1) { |
444
|
|
|
|
|
|
|
|
445
|
0
|
0
|
|
|
|
0
|
print $self->userprompt if $self->prompts_on; |
446
|
|
|
|
|
|
|
|
447
|
0
|
|
|
|
|
0
|
$previous_user_input = $user_input; |
448
|
0
|
|
|
|
|
0
|
chomp( $user_input = ); |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
# If the user wants to quit, |
452
|
|
|
|
|
|
|
# print out a farewell and quit. |
453
|
0
|
0
|
|
|
|
0
|
if ($self->_testquit($user_input) ) { |
454
|
0
|
|
|
|
|
0
|
$reply = "$self->{final}->[ int &{$self->{myrand}}( scalar @{$self->{final}} ) ]"; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
455
|
0
|
0
|
|
|
|
0
|
print $self->botprompt if $self->prompts_on; |
456
|
0
|
|
|
|
|
0
|
print "$reply\n"; |
457
|
0
|
|
|
|
|
0
|
last; |
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
# If the user enters the word "debug", |
461
|
|
|
|
|
|
|
# then toggle on/off this Eliza's debug output. |
462
|
0
|
0
|
|
|
|
0
|
if ($user_input eq "debug") { |
463
|
0
|
|
|
|
|
0
|
$self->debug( ! $self->debug ); |
464
|
0
|
|
|
|
|
0
|
$user_input = $previous_user_input; |
465
|
|
|
|
|
|
|
} |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
# If the user enters the word "memory", |
468
|
|
|
|
|
|
|
# then use the _debug_memory method to dump out |
469
|
|
|
|
|
|
|
# the current contents of Eliza's memory |
470
|
0
|
0
|
0
|
|
|
0
|
if ($user_input eq "memory" or $user_input eq "debug memory") { |
471
|
0
|
|
|
|
|
0
|
print $self->_debug_memory(); |
472
|
0
|
|
|
|
|
0
|
redo; |
473
|
|
|
|
|
|
|
} |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
# If the user enters the word "debug that", |
476
|
|
|
|
|
|
|
# then dump out the debugging of the |
477
|
|
|
|
|
|
|
# most recent call to transform. |
478
|
0
|
0
|
|
|
|
0
|
if ($user_input eq "debug that") { |
479
|
0
|
|
|
|
|
0
|
print $self->debug_text(); |
480
|
0
|
|
|
|
|
0
|
redo; |
481
|
|
|
|
|
|
|
} |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
# Invoke the transform method |
484
|
|
|
|
|
|
|
# to generate a reply. |
485
|
0
|
|
|
|
|
0
|
$reply = $self->transform( $user_input ); |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
# Print out the debugging text if debugging is set to on. |
489
|
|
|
|
|
|
|
# This variable should have been set by the transform method. |
490
|
0
|
0
|
|
|
|
0
|
print $self->debug_text if $self->debug; |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
# Print the actual reply |
493
|
0
|
0
|
|
|
|
0
|
print $self->botprompt if $self->prompts_on; |
494
|
0
|
|
|
|
|
0
|
print "$reply\n"; |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
} # End UI command loop. |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
} # End method command_interface |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
#################################################################### |
503
|
|
|
|
|
|
|
# --- preprocess --- |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
=head2 preprocess() |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
$string = preprocess($string); |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
preprocess() applies simple substitution rules to the input string. |
510
|
|
|
|
|
|
|
Mostly this is to catch varieties in spelling, misspellings, |
511
|
|
|
|
|
|
|
contractions and the like. |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
preprocess() is called from within the transform() method. |
514
|
|
|
|
|
|
|
It is applied to user-input text, BEFORE any processing, |
515
|
|
|
|
|
|
|
and before a reassebly statement has been selected. |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
It uses the array C<%pre>, which is created |
518
|
|
|
|
|
|
|
during the parse of the script. |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
=cut |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
sub preprocess { |
523
|
4
|
|
|
4
|
1
|
12
|
my ($self,$string) = @_; |
524
|
|
|
|
|
|
|
|
525
|
4
|
|
|
|
|
9
|
my ($i, @wordsout, @wordsin, $keyword); |
526
|
|
|
|
|
|
|
|
527
|
4
|
|
|
|
|
22
|
@wordsout = @wordsin = split / /, $string; |
528
|
|
|
|
|
|
|
|
529
|
4
|
|
|
|
|
18
|
WORD: for ($i = 0; $i < @wordsin; $i++) { |
530
|
8
|
|
|
|
|
16
|
foreach $keyword (keys %{ $self->{pre} }) { |
|
8
|
|
|
|
|
35
|
|
531
|
0
|
0
|
|
|
|
0
|
if ($wordsin[$i] =~ /\b$keyword\b/i ) { |
532
|
0
|
|
|
|
|
0
|
($wordsout[$i] = $wordsin[$i]) =~ s/$keyword/$self->{pre}->{$keyword}/ig; |
533
|
0
|
|
|
|
|
0
|
next WORD; |
534
|
|
|
|
|
|
|
} |
535
|
|
|
|
|
|
|
} |
536
|
|
|
|
|
|
|
} |
537
|
4
|
|
|
|
|
22
|
return join ' ', @wordsout; |
538
|
|
|
|
|
|
|
} |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
#################################################################### |
542
|
|
|
|
|
|
|
# --- postprocess --- |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
=head2 postprocess() |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
$string = postprocess($string); |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
postprocess() applies simple substitution rules to the |
549
|
|
|
|
|
|
|
reassembly rule. This is where all the "I"'s and "you"'s |
550
|
|
|
|
|
|
|
are exchanged. postprocess() is called from within the |
551
|
|
|
|
|
|
|
transform() function. |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
It uses the array C<%post>, created |
554
|
|
|
|
|
|
|
during the parse of the script. |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
=cut |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
sub postprocess { |
559
|
27
|
|
|
27
|
1
|
72
|
my ($self,$string) = @_; |
560
|
|
|
|
|
|
|
|
561
|
27
|
|
|
|
|
57
|
my ($i, @wordsout, @wordsin, $keyword); |
562
|
|
|
|
|
|
|
|
563
|
27
|
|
|
|
|
90
|
@wordsin = @wordsout = split (/ /, $string); |
564
|
|
|
|
|
|
|
|
565
|
27
|
|
|
|
|
95
|
WORD: for ($i = 0; $i < @wordsin; $i++) { |
566
|
5
|
|
|
|
|
11
|
foreach $keyword (keys %{ $self->{post} }) { |
|
5
|
|
|
|
|
23
|
|
567
|
0
|
0
|
|
|
|
0
|
if ($wordsin[$i] =~ /\b$keyword\b/i ) { |
568
|
0
|
|
|
|
|
0
|
($wordsout[$i] = $wordsin[$i]) =~ s/$keyword/$self->{post}->{$keyword}/ig; |
569
|
0
|
|
|
|
|
0
|
next WORD; |
570
|
|
|
|
|
|
|
} |
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
} |
573
|
27
|
|
|
|
|
143
|
return join ' ', @wordsout; |
574
|
|
|
|
|
|
|
} |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
#################################################################### |
577
|
|
|
|
|
|
|
# --- _testquit --- |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
=head2 _testquit() |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
if ($self->_testquit($user_input) ) { ... } |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
_testquit() detects words like "bye" and "quit" and returns |
584
|
|
|
|
|
|
|
true if it finds one of them as the first word in the sentence. |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
These words are listed in the script, under the keyword "quit". |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
=cut |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
sub _testquit { |
591
|
0
|
|
|
0
|
|
0
|
my ($self,$string) = @_; |
592
|
|
|
|
|
|
|
|
593
|
0
|
|
|
|
|
0
|
my ($quitword, @wordsin); |
594
|
|
|
|
|
|
|
|
595
|
0
|
|
|
|
|
0
|
foreach $quitword (@{ $self->{quit} }) { |
|
0
|
|
|
|
|
0
|
|
596
|
0
|
0
|
|
|
|
0
|
return 1 if ($string =~ /\b$quitword\b/i ) ; |
597
|
|
|
|
|
|
|
} |
598
|
|
|
|
|
|
|
} |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
#################################################################### |
602
|
|
|
|
|
|
|
# --- _debug_memory --- |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
=head2 _debug_memory() |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
$self->_debug_memory() |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
_debug_memory() is a special function which returns |
609
|
|
|
|
|
|
|
the contents of Eliza's memory stack. |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
=cut |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
sub _debug_memory { |
615
|
|
|
|
|
|
|
|
616
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
617
|
|
|
|
|
|
|
|
618
|
0
|
|
|
|
|
0
|
my $string = "\t"; |
619
|
0
|
|
|
|
|
0
|
$string .= $#{ $self->memory } + 1; |
|
0
|
|
|
|
|
0
|
|
620
|
0
|
|
|
|
|
0
|
$string .= " item(s) in memory stack:\n"; |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
# [THANKS to Roy Stephan for helping me adjust this bit] |
623
|
|
|
|
|
|
|
# |
624
|
0
|
|
|
|
|
0
|
foreach (@{ $self->memory } ) { |
|
0
|
|
|
|
|
0
|
|
625
|
|
|
|
|
|
|
|
626
|
0
|
|
|
|
|
0
|
my $line = $_; |
627
|
0
|
|
|
|
|
0
|
$string .= sprintf "\t\t->$line\n" ; |
628
|
|
|
|
|
|
|
}; |
629
|
|
|
|
|
|
|
|
630
|
0
|
|
|
|
|
0
|
return $string; |
631
|
|
|
|
|
|
|
} |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
#################################################################### |
634
|
|
|
|
|
|
|
# --- transform --- |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
=head2 transform() |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
$reply = $chatterbot->transform( $string, $use_memory ); |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
transform() applies transformation rules to the user input |
641
|
|
|
|
|
|
|
string. It invokes preprocess(), does transformations, |
642
|
|
|
|
|
|
|
then invokes postprocess(). It returns the transformed |
643
|
|
|
|
|
|
|
output string, called C<$reasmb>. |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
The algorithm embedded in the transform() method has three main parts: |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
=over |
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
=item 1 |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
Search the input string for a keyword. |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
=item 2 |
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
If we find a keyword, use the list of decomposition rules |
656
|
|
|
|
|
|
|
for that keyword, and pattern-match the input string against |
657
|
|
|
|
|
|
|
each rule. |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
=item 3 |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
If the input string matches any of the decomposition rules, |
662
|
|
|
|
|
|
|
then randomly select one of the reassembly rules for that |
663
|
|
|
|
|
|
|
decomposition rule, and use it to construct the reply. |
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
=back |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
transform() takes two parameters. The first is the string we want |
668
|
|
|
|
|
|
|
to transform. The second is a flag which indicates where this sting |
669
|
|
|
|
|
|
|
came from. If the flag is set, then the string has been pulled |
670
|
|
|
|
|
|
|
from memory, and we should use reassembly rules appropriate |
671
|
|
|
|
|
|
|
for that. If the flag is not set, then the string is the most |
672
|
|
|
|
|
|
|
recent user input, and we can use the ordinary reassembly rules. |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
The memory flag is only set when the transform() function is called |
675
|
|
|
|
|
|
|
recursively. The mechanism for setting this parameter is |
676
|
|
|
|
|
|
|
embedded in the transoform method itself. If the flag is set |
677
|
|
|
|
|
|
|
inappropriately, it is ignored. |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
=cut |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
sub transform{ |
682
|
4
|
|
|
4
|
1
|
1873
|
my ($self,$string,$use_memory) = @_; |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
# Initialize the debugging text buffer. |
685
|
4
|
|
|
|
|
32
|
$self->debug_text(''); |
686
|
|
|
|
|
|
|
|
687
|
4
|
50
|
|
|
|
16
|
$self->debug_text(sprintf "\t[Pulling string \"$string\" from memory.]\n") |
688
|
|
|
|
|
|
|
if $use_memory; |
689
|
|
|
|
|
|
|
|
690
|
4
|
|
|
|
|
12
|
my ($i, @string_parts, $string_part, $rank, $goto, $reasmb, $keyword, |
691
|
|
|
|
|
|
|
$decomp, $this_decomp, $reasmbkey, @these_reasmbs, |
692
|
|
|
|
|
|
|
@decomp_matches, $synonyms, $synonym_index); |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
# Default to a really low rank. |
695
|
4
|
|
|
|
|
10
|
$rank = -2; |
696
|
4
|
|
|
|
|
9
|
$reasmb = ""; |
697
|
4
|
|
|
|
|
9
|
$goto = ""; |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
# First run the string through the preprocessor. |
700
|
4
|
|
|
|
|
16
|
$string = $self->preprocess( $string ); |
701
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
# Convert punctuation to periods. We will assume that commas |
703
|
|
|
|
|
|
|
# and certain conjunctions separate distinct thoughts/sentences. |
704
|
4
|
|
|
|
|
15
|
$string =~ s/[?!,]/./g; |
705
|
4
|
|
|
|
|
14
|
$string =~ s/but/./g; # Yikes! This is English-specific. |
706
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
# Split the string by periods into an array |
708
|
4
|
|
|
|
|
14
|
@string_parts = split /\./, $string ; |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
# Examine each part of the input string in turn. |
711
|
4
|
|
|
|
|
18
|
STRING_PARTS: foreach $string_part (@string_parts) { |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
# Run through the whole list of keywords. |
714
|
4
|
|
|
|
|
9
|
KEYWORD: foreach $keyword (keys %{ $self->{decomplist} }) { |
|
4
|
|
|
|
|
16
|
|
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
# Check to see if the input string contains a keyword |
717
|
|
|
|
|
|
|
# which outranks any we have found previously |
718
|
|
|
|
|
|
|
# (On first loop, rank is set to -2.) |
719
|
12
|
100
|
66
|
|
|
203
|
if ( ($string_part =~ /\b$keyword\b/i or $keyword eq $goto) |
|
|
|
66
|
|
|
|
|
720
|
|
|
|
|
|
|
and |
721
|
|
|
|
|
|
|
$rank < $self->{keyranks}->{$keyword} |
722
|
|
|
|
|
|
|
) |
723
|
|
|
|
|
|
|
{ |
724
|
|
|
|
|
|
|
# If we find one, then set $rank to equal |
725
|
|
|
|
|
|
|
# the rank of that keyword. |
726
|
3
|
|
|
|
|
13
|
$rank = $self->{keyranks}->{$keyword}; |
727
|
|
|
|
|
|
|
|
728
|
3
|
|
|
|
|
20
|
$self->debug_text($self->debug_text . sprintf "\t$rank> $keyword"); |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
# Now let's check all the decomposition rules for that keyword. |
731
|
3
|
|
|
|
|
9
|
DECOMP: foreach $decomp (@{ $self->{decomplist}->{$keyword} }) { |
|
3
|
|
|
|
|
11
|
|
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
# Change '*' to '\b(.*)\b' in this decomposition rule, |
734
|
|
|
|
|
|
|
# so we can use it for regular expressions. Later, |
735
|
|
|
|
|
|
|
# we will want to isolate individual matches to each wildcard. |
736
|
3
|
|
|
|
|
24
|
($this_decomp = $decomp) =~ s/\s*\*\s*/\\b\(\.\*\)\\b/g; |
737
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
# If this docomposition rule contains a word which begins with '@', |
739
|
|
|
|
|
|
|
# then the script also contained some synonyms for that word. |
740
|
|
|
|
|
|
|
# Find them all using %synon and generate a regular expression |
741
|
|
|
|
|
|
|
# containing all of them. |
742
|
3
|
50
|
|
|
|
16
|
if ($this_decomp =~ /\@/ ) { |
743
|
0
|
|
|
|
|
0
|
($synonym_index = $this_decomp) =~ s/.*\@(\w*).*/$1/i ; |
744
|
0
|
|
|
|
|
0
|
$synonyms = join ('|', @{ $self->{synon}->{$synonym_index} }); |
|
0
|
|
|
|
|
0
|
|
745
|
0
|
|
|
|
|
0
|
$this_decomp =~ s/(.*)\@$synonym_index(.*)/$1($synonym_index\|$synonyms)$2/g; |
746
|
|
|
|
|
|
|
} |
747
|
|
|
|
|
|
|
|
748
|
3
|
|
|
|
|
21
|
$self->debug_text($self->debug_text . sprintf "\n\t\t: $decomp"); |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
# Using the regular expression we just generated, |
751
|
|
|
|
|
|
|
# match against the input string. Use empty "()"'s to |
752
|
|
|
|
|
|
|
# eliminate warnings about uninitialized variables. |
753
|
3
|
50
|
|
|
|
69
|
if ($string_part =~ /$this_decomp()()()()()()()()()()/i) { |
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
# If this decomp rule matched the string, |
756
|
|
|
|
|
|
|
# then create an array, so that we can refer to matches |
757
|
|
|
|
|
|
|
# to individual wildcards. Use '0' as a placeholder |
758
|
|
|
|
|
|
|
# (we don't want to refer to any "zeroth" wildcard). |
759
|
3
|
|
|
|
|
50
|
@decomp_matches = ("0", $1, $2, $3, $4, $5, $6, $7, $8, $9); |
760
|
3
|
|
|
|
|
22
|
$self->debug_text($self->debug_text . sprintf " : @decomp_matches\n"); |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
# Using the keyword and the decomposition rule, |
763
|
|
|
|
|
|
|
# reconstruct a key for the list of reassamble rules. |
764
|
3
|
|
|
|
|
12
|
$reasmbkey = join ($;,$keyword,$decomp); |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
# Get the list of possible reassembly rules for this key. |
767
|
|
|
|
|
|
|
# |
768
|
3
|
|
33
|
|
|
14
|
my $memory = (defined $use_memory and $#{ $self->{reasmblist_for_memory}->{$reasmbkey} } >= 0); |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
# Pick out next reassembly rule. |
771
|
3
|
|
|
|
|
12
|
$reasmb = $self->_get_next_reasmb( $reasmbkey, $memory); |
772
|
|
|
|
|
|
|
|
773
|
3
|
|
|
|
|
17
|
$self->debug_text($self->debug_text . sprintf "\t\t--> $reasmb\n"); |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
# If the reassembly rule we picked contains the word "goto", |
776
|
|
|
|
|
|
|
# then we start over with a new keyword. Set $keyword to equal |
777
|
|
|
|
|
|
|
# that word, and start the whole loop over. |
778
|
3
|
50
|
|
|
|
17
|
if ($reasmb =~ m/^goto\s(\w*).*/i) { |
779
|
0
|
|
|
|
|
0
|
$self->debug_text($self->debug_text . sprintf "\$1 = $1\n"); |
780
|
0
|
|
|
|
|
0
|
$goto = $keyword = $1; |
781
|
0
|
|
|
|
|
0
|
$rank = -2; |
782
|
0
|
|
|
|
|
0
|
redo KEYWORD; |
783
|
|
|
|
|
|
|
} |
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
# Otherwise, using the matches to wildcards which we stored above, |
786
|
|
|
|
|
|
|
# insert words from the input string back into the reassembly rule. |
787
|
|
|
|
|
|
|
# [THANKS to Gidon Wise for submitting a bugfix here] |
788
|
3
|
|
|
|
|
14
|
for ($i=1; $i <= $#decomp_matches; $i++) { |
789
|
27
|
|
|
|
|
111
|
$decomp_matches[$i] = $self->postprocess( $decomp_matches[$i] ); |
790
|
27
|
|
|
|
|
158
|
$decomp_matches[$i] =~ s/([,;?!]|\.*)$//; |
791
|
27
|
|
|
|
|
388
|
$reasmb =~ s/\($i\)/$decomp_matches[$i]/g; |
792
|
|
|
|
|
|
|
} |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
# Move on to the next keyword. If no other keywords match, |
795
|
|
|
|
|
|
|
# then we'll end up actually using the $reasmb string |
796
|
|
|
|
|
|
|
# we just generated above. |
797
|
3
|
|
|
|
|
16
|
next KEYWORD ; |
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
} # End if ($string_part =~ /$this_decomp/i) |
800
|
|
|
|
|
|
|
|
801
|
0
|
|
|
|
|
0
|
$self->debug_text($self->debug_text . sprintf "\n"); |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
} # End DECOMP: foreach $decomp (@{ $self->{decomplist}->{$keyword} }) |
804
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
} # End if ( ($string_part =~ /\b$keyword\b/i or $keyword eq $goto) |
806
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
} # End KEYWORD: foreach $keyword (keys %{ $self->{decomplist}) |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
} # End STRING_PARTS: foreach $string_part (@string_parts) { |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
=head2 How memory is used |
812
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
In the script, some reassembly rules are special. They are marked with |
814
|
|
|
|
|
|
|
the keyword "reasm_for_memory", rather than just "reasm". |
815
|
|
|
|
|
|
|
Eliza "remembers" any comment when it matches a docomposition rule |
816
|
|
|
|
|
|
|
for which there are any reassembly rules for memory. |
817
|
|
|
|
|
|
|
An Eliza object remembers up to C<$max_memory_size> (default: 5) |
818
|
|
|
|
|
|
|
user input strings. |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
If, during a subsequent run, the transform() method fails to find any |
821
|
|
|
|
|
|
|
appropriate decomposition rule for a user's comment, and if there are |
822
|
|
|
|
|
|
|
any comments inside the memory array, then Eliza may elect to ignore |
823
|
|
|
|
|
|
|
the most recent comment and instead pull out one of the strings from memory. |
824
|
|
|
|
|
|
|
In this case, the transform method is called recursively with the memory flag. |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
Honestly, I am not sure exactly how this memory functionality |
827
|
|
|
|
|
|
|
was implemented in the original Eliza program. Hopefully |
828
|
|
|
|
|
|
|
this implementation is not too far from Weizenbaum's. |
829
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
If you don't want to use the memory functionality at all, |
831
|
|
|
|
|
|
|
then you can disable it: |
832
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
$mybot->memory_on(0); |
834
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
You can also achieve the same effect by making sure |
836
|
|
|
|
|
|
|
that the script data does not contain any reassembly rules |
837
|
|
|
|
|
|
|
marked with the keyword "reasm_for_memory". The default |
838
|
|
|
|
|
|
|
script data only has 4 such items. |
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
=cut |
841
|
|
|
|
|
|
|
|
842
|
4
|
100
|
|
|
|
33
|
if ($reasmb eq "") { |
|
|
50
|
|
|
|
|
|
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
# If all else fails, call this method recursively |
845
|
|
|
|
|
|
|
# and make sure that it has something to parse. |
846
|
|
|
|
|
|
|
# Use a string from memory if anything is available. |
847
|
|
|
|
|
|
|
# |
848
|
|
|
|
|
|
|
# $self-likelihood_of_using_memory should be some number |
849
|
|
|
|
|
|
|
# between 1 and 0; it defaults to 1. |
850
|
|
|
|
|
|
|
# |
851
|
1
|
50
|
33
|
|
|
3
|
if ( |
852
|
1
|
|
|
|
|
9
|
$#{ $self->memory } >= 0 |
853
|
|
|
|
|
|
|
and |
854
|
0
|
|
|
|
|
0
|
&{$self->{myrand}}(1) >= 1 - $self->likelihood_of_using_memory |
855
|
|
|
|
|
|
|
) { |
856
|
|
|
|
|
|
|
|
857
|
0
|
|
|
|
|
0
|
$reasmb = $self->transform( shift @{ $self->memory }, "use memory" ); |
|
0
|
|
|
|
|
0
|
|
858
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
} else { |
860
|
1
|
|
|
|
|
11
|
$reasmb = $self->transform("xnone"); |
861
|
|
|
|
|
|
|
} |
862
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
} elsif ($self->memory_on) { |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
# If memory is switched on, then we handle memory. |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
# Now that we have successfully transformed this string, |
868
|
|
|
|
|
|
|
# push it onto the end of the memory stack... unless, of course, |
869
|
|
|
|
|
|
|
# that's where we got it from in the first place, or if the rank |
870
|
|
|
|
|
|
|
# is not the kind we remember. |
871
|
|
|
|
|
|
|
# |
872
|
3
|
50
|
33
|
|
|
8
|
if ( |
873
|
3
|
|
|
|
|
20
|
$#{ $self->{reasmblist_for_memory}->{$reasmbkey} } >= 0 |
874
|
|
|
|
|
|
|
and |
875
|
|
|
|
|
|
|
not defined $use_memory |
876
|
|
|
|
|
|
|
) { |
877
|
|
|
|
|
|
|
|
878
|
0
|
|
|
|
|
0
|
push @{ $self->memory },$string ; |
|
0
|
|
|
|
|
0
|
|
879
|
|
|
|
|
|
|
} |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
# Shift out the least-recent item from the bottom |
882
|
|
|
|
|
|
|
# of the memory stack if the stack exceeds the max size. |
883
|
3
|
50
|
|
|
|
7
|
shift @{ $self->memory } if $#{ $self->memory } >= $self->max_memory_size; |
|
0
|
|
|
|
|
0
|
|
|
3
|
|
|
|
|
30
|
|
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
$self->debug_text($self->debug_text |
886
|
3
|
|
|
|
|
18
|
. sprintf("\t%d item(s) in memory.\n", $#{ $self->memory } + 1 ) ) ; |
|
3
|
|
|
|
|
17
|
|
887
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
} # End if ($reasmb eq "") |
889
|
|
|
|
|
|
|
|
890
|
4
|
|
|
|
|
16
|
$reasmb =~ tr/ / /s; # Eliminate any duplicate space characters. |
891
|
4
|
|
|
|
|
12
|
$reasmb =~ s/[ ][?]$/?/; # Eliminate any spaces before the question mark. |
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
# Save the return string so that forgetful calling programs |
894
|
|
|
|
|
|
|
# can ask the bot what the last reply was. |
895
|
4
|
|
|
|
|
26
|
$self->transform_text($reasmb); |
896
|
|
|
|
|
|
|
|
897
|
4
|
|
|
|
|
23
|
return $reasmb ; |
898
|
|
|
|
|
|
|
} |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
# _get_next_reasmb( $key, $memory_flag ) |
901
|
|
|
|
|
|
|
# |
902
|
|
|
|
|
|
|
# Given a key to a reasmb list and a flag indicating whether the list should |
903
|
|
|
|
|
|
|
# be pulled from a memory list or standard script list, returns the |
904
|
|
|
|
|
|
|
# next reasmb in the list, wrapping back to the start if the last one |
905
|
|
|
|
|
|
|
# is reached. |
906
|
|
|
|
|
|
|
sub _get_next_reasmb { |
907
|
3
|
|
|
3
|
|
12
|
my ( $self, $reasmbkey, $memory ) = @_; |
908
|
|
|
|
|
|
|
|
909
|
3
|
50
|
|
|
|
10
|
my $for_memory = $memory ? '_for_memory' : ''; |
910
|
3
|
|
|
|
|
7
|
my @these_reasmbs = @{ $self->{"reasmblist$for_memory"}->{$reasmbkey} }; |
|
3
|
|
|
|
|
17
|
|
911
|
3
|
|
|
|
|
16
|
my $next_reasmb = $self->{"next_reasmblist$for_memory"}->{$reasmbkey}++; |
912
|
3
|
50
|
|
|
|
13
|
if ( $next_reasmb > scalar( @these_reasmbs ) ) { |
913
|
0
|
|
|
|
|
0
|
$next_reasmb = 1; |
914
|
0
|
|
|
|
|
0
|
$self->{"next_reasmblist$for_memory"}->{$reasmbkey} = 0; |
915
|
|
|
|
|
|
|
} |
916
|
|
|
|
|
|
|
|
917
|
3
|
|
|
|
|
14
|
return $these_reasmbs[$next_reasmb - 1]; |
918
|
|
|
|
|
|
|
} |
919
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
#################################################################### |
921
|
|
|
|
|
|
|
# --- parse_script_data --- |
922
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
=head2 parse_script_data() |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
$self->parse_script_data; |
926
|
|
|
|
|
|
|
$self->parse_script_data( $script_file ); |
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
parse_script_data() is invoked from the _initialize() method, |
929
|
|
|
|
|
|
|
which is called from the new() function. However, you can also |
930
|
|
|
|
|
|
|
call this method at any time against an already-instantiated |
931
|
|
|
|
|
|
|
Eliza instance. In that case, the new script data is I |
932
|
|
|
|
|
|
|
to the old script data. The old script data is not deleted. |
933
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
You can pass a parameter to this function, which is the name of the |
935
|
|
|
|
|
|
|
script file, and it will read in and parse that file. |
936
|
|
|
|
|
|
|
If you do not pass any parameter to this method, then |
937
|
|
|
|
|
|
|
it will read the data embedded at the end of the module as its |
938
|
|
|
|
|
|
|
default script data. |
939
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
If you pass the name of a script file to parse_script_data(), |
941
|
|
|
|
|
|
|
and that file is not available for reading, then the module dies. |
942
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
=head1 Format of the script file |
945
|
|
|
|
|
|
|
|
946
|
|
|
|
|
|
|
This module includes a default script file within itself, |
947
|
|
|
|
|
|
|
so it is not necessary to explicitly specify a script file |
948
|
|
|
|
|
|
|
when instantiating an Eliza object. |
949
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
Each line in the script file can specify a key, |
951
|
|
|
|
|
|
|
a decomposition rule, or a reassembly rule. |
952
|
|
|
|
|
|
|
|
953
|
|
|
|
|
|
|
key: remember 5 |
954
|
|
|
|
|
|
|
decomp: * i remember * |
955
|
|
|
|
|
|
|
reasmb: Do you often think of (2) ? |
956
|
|
|
|
|
|
|
reasmb: Does thinking of (2) bring anything else to mind ? |
957
|
|
|
|
|
|
|
decomp: * do you remember * |
958
|
|
|
|
|
|
|
reasmb: Did you think I would forget (2) ? |
959
|
|
|
|
|
|
|
reasmb: What about (2) ? |
960
|
|
|
|
|
|
|
reasmb: goto what |
961
|
|
|
|
|
|
|
pre: equivalent alike |
962
|
|
|
|
|
|
|
synon: belief feel think believe wish |
963
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
The number after the key specifies the rank. |
965
|
|
|
|
|
|
|
If a user's input contains the keyword, then |
966
|
|
|
|
|
|
|
the transform() function will try to match |
967
|
|
|
|
|
|
|
one of the decomposition rules for that keyword. |
968
|
|
|
|
|
|
|
If one matches, then it will select one of |
969
|
|
|
|
|
|
|
the reassembly rules at random. The number |
970
|
|
|
|
|
|
|
(2) here means "use whatever set of words |
971
|
|
|
|
|
|
|
matched the second asterisk in the decomposition |
972
|
|
|
|
|
|
|
rule." |
973
|
|
|
|
|
|
|
|
974
|
|
|
|
|
|
|
If you specify a list of synonyms for a word, |
975
|
|
|
|
|
|
|
the you should use a "@" when you use that |
976
|
|
|
|
|
|
|
word in a decomposition rule: |
977
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
decomp: * i @belief i * |
979
|
|
|
|
|
|
|
reasmb: Do you really think so ? |
980
|
|
|
|
|
|
|
reasmb: But you are not sure you (3). |
981
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
Otherwise, the script will never check to see |
983
|
|
|
|
|
|
|
if there are any synonyms for that keyword. |
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
Reassembly rules should be marked with I |
986
|
|
|
|
|
|
|
rather than I when it is appropriate for use |
987
|
|
|
|
|
|
|
when a user's comment has been extracted from memory. |
988
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
key: my 2 |
990
|
|
|
|
|
|
|
decomp: * my * |
991
|
|
|
|
|
|
|
reasm_for_memory: Let's discuss further why your (2). |
992
|
|
|
|
|
|
|
reasm_for_memory: Earlier you said your (2). |
993
|
|
|
|
|
|
|
reasm_for_memory: But your (2). |
994
|
|
|
|
|
|
|
reasm_for_memory: Does that have anything to do with the fact that your (2) ? |
995
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
=head1 How the script file is parsed |
997
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
Each line in the script file contains an "entrytype" |
999
|
|
|
|
|
|
|
(key, decomp, synon) and an "entry", separated by |
1000
|
|
|
|
|
|
|
a colon. In turn, each "entry" can itself be |
1001
|
|
|
|
|
|
|
composed of a "key" and a "value", separated by |
1002
|
|
|
|
|
|
|
a space. The parse_script_data() function |
1003
|
|
|
|
|
|
|
parses each line out, and splits the "entry" and |
1004
|
|
|
|
|
|
|
"entrytype" portion of each line into two variables, |
1005
|
|
|
|
|
|
|
C<$entry> and C<$entrytype>. |
1006
|
|
|
|
|
|
|
|
1007
|
|
|
|
|
|
|
Next, it uses the string C<$entrytype> to determine |
1008
|
|
|
|
|
|
|
what sort of stuff to expect in the C<$entry> variable, |
1009
|
|
|
|
|
|
|
if anything, and parses it accordingly. In some cases, |
1010
|
|
|
|
|
|
|
there is no second level of key-value pair, so the function |
1011
|
|
|
|
|
|
|
does not even bother to isolate or create C<$key> and C<$value>. |
1012
|
|
|
|
|
|
|
|
1013
|
|
|
|
|
|
|
C<$key> is always a single word. C<$value> can be null, |
1014
|
|
|
|
|
|
|
or one single word, or a string composed of several words, |
1015
|
|
|
|
|
|
|
or an array of words. |
1016
|
|
|
|
|
|
|
|
1017
|
|
|
|
|
|
|
Based on all these entries and keys and values, |
1018
|
|
|
|
|
|
|
the function creates two giant hashes: |
1019
|
|
|
|
|
|
|
C<%decomplist>, which holds the decomposition rules for |
1020
|
|
|
|
|
|
|
each keyword, and C<%reasmblist>, which holds the |
1021
|
|
|
|
|
|
|
reassembly phrases for each decomposition rule. |
1022
|
|
|
|
|
|
|
It also creates C<%keyranks>, which holds the ranks for |
1023
|
|
|
|
|
|
|
each key. |
1024
|
|
|
|
|
|
|
|
1025
|
|
|
|
|
|
|
Six other arrays are created: C<%reasm_for_memory, %pre, %post, |
1026
|
|
|
|
|
|
|
%synon, @initial,> and C<@final>. |
1027
|
|
|
|
|
|
|
|
1028
|
|
|
|
|
|
|
=cut |
1029
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
sub parse_script_data { |
1031
|
|
|
|
|
|
|
|
1032
|
1
|
|
|
1
|
1
|
3
|
my ($self,$scriptfile) = @_; |
1033
|
1
|
|
|
|
|
2
|
my @scriptlines; |
1034
|
|
|
|
|
|
|
|
1035
|
1
|
50
|
|
|
|
7
|
if ($scriptfile) { |
1036
|
|
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
# If we have an external script file, open it |
1038
|
|
|
|
|
|
|
# and read it in (the whole thing, all at once). |
1039
|
1
|
50
|
|
|
|
29
|
open (SCRIPTFILE, "<$scriptfile") |
1040
|
|
|
|
|
|
|
or die "Could not read from file $scriptfile : $!\n"; |
1041
|
1
|
|
|
|
|
21
|
@scriptlines = ; # read in script data |
1042
|
1
|
|
|
|
|
8
|
$self->scriptfile($scriptfile); |
1043
|
1
|
|
|
|
|
6
|
close (SCRIPTFILE); |
1044
|
|
|
|
|
|
|
|
1045
|
|
|
|
|
|
|
} else { |
1046
|
|
|
|
|
|
|
|
1047
|
|
|
|
|
|
|
# Otherwise, read in the data from the bottom |
1048
|
|
|
|
|
|
|
# of this file. This data might be read several |
1049
|
|
|
|
|
|
|
# times, so we save the offset pointer and |
1050
|
|
|
|
|
|
|
# reset it when we're done. |
1051
|
0
|
|
|
|
|
0
|
my $where= tell(DATA); |
1052
|
0
|
|
|
|
|
0
|
@scriptlines = ; # read in script data |
1053
|
0
|
|
|
|
|
0
|
seek(DATA, $where, 0); |
1054
|
0
|
|
|
|
|
0
|
$self->scriptfile(''); |
1055
|
|
|
|
|
|
|
} |
1056
|
|
|
|
|
|
|
|
1057
|
1
|
|
|
|
|
4
|
my ($entrytype, $entry, $key, $value) ; |
1058
|
1
|
|
|
|
|
2
|
my $thiskey = ""; |
1059
|
1
|
|
|
|
|
2
|
my $thisdecomp = ""; |
1060
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
############################################################ |
1062
|
|
|
|
|
|
|
# Examine each line of script data. |
1063
|
1
|
|
|
|
|
3
|
for (@scriptlines) { |
1064
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
# Skip comments and lines with only whitespace. |
1066
|
9
|
50
|
33
|
|
|
55
|
next if (/^\s*#/ || /^\s*$/); |
1067
|
|
|
|
|
|
|
|
1068
|
|
|
|
|
|
|
# Split entrytype and entry, using a colon as the delimiter. |
1069
|
9
|
|
|
|
|
54
|
($entrytype, $entry) = $_ =~ m/^\s*(\S*)\s*:\s*(.*)\s*$/; |
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
# Case loop, based on the entrytype. |
1072
|
9
|
|
|
|
|
24
|
for ($entrytype) { |
1073
|
|
|
|
|
|
|
|
1074
|
9
|
50
|
|
|
|
24
|
/quit/ and do { push @{ $self->{quit} }, $entry; last; }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
1075
|
9
|
50
|
|
|
|
23
|
/initial/ and do { push @{ $self->{initial} }, $entry; last; }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
1076
|
9
|
50
|
|
|
|
22
|
/final/ and do { push @{ $self->{final} }, $entry; last; }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
1077
|
|
|
|
|
|
|
|
1078
|
9
|
100
|
|
|
|
23
|
/decomp/ and do { |
1079
|
3
|
50
|
|
|
|
9
|
die "$0: error parsing script: decomposition rule with no keyword.\n" |
1080
|
|
|
|
|
|
|
if $thiskey eq ""; |
1081
|
3
|
|
|
|
|
9
|
$thisdecomp = join($;,$thiskey,$entry); |
1082
|
3
|
|
|
|
|
5
|
push @{ $self->{decomplist}->{$thiskey} }, $entry ; |
|
3
|
|
|
|
|
23
|
|
1083
|
3
|
|
|
|
|
7
|
last; |
1084
|
|
|
|
|
|
|
}; |
1085
|
|
|
|
|
|
|
|
1086
|
6
|
100
|
|
|
|
17
|
/reasmb/ and do { |
1087
|
3
|
50
|
|
|
|
9
|
die "$0: error parsing script: reassembly rule with no decomposition rule.\n" |
1088
|
|
|
|
|
|
|
if $thisdecomp eq ""; |
1089
|
3
|
|
|
|
|
7
|
push @{ $self->{reasmblist}->{$thisdecomp} }, $entry ; |
|
3
|
|
|
|
|
11
|
|
1090
|
3
|
|
|
|
|
9
|
last; |
1091
|
|
|
|
|
|
|
}; |
1092
|
|
|
|
|
|
|
|
1093
|
3
|
50
|
|
|
|
9
|
/reasm_for_memory/ and do { |
1094
|
0
|
0
|
|
|
|
0
|
die "$0: error parsing script: reassembly rule with no decomposition rule.\n" |
1095
|
|
|
|
|
|
|
if $thisdecomp eq ""; |
1096
|
0
|
|
|
|
|
0
|
push @{ $self->{reasmblist_for_memory}->{$thisdecomp} }, $entry ; |
|
0
|
|
|
|
|
0
|
|
1097
|
0
|
|
|
|
|
0
|
last; |
1098
|
|
|
|
|
|
|
}; |
1099
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
# The entrytypes below actually expect to see a key and value |
1101
|
|
|
|
|
|
|
# pair in the entry, so we split them out. The first word, |
1102
|
|
|
|
|
|
|
# separated by a space, is the key, and everything else is |
1103
|
|
|
|
|
|
|
# an array of values. |
1104
|
|
|
|
|
|
|
|
1105
|
3
|
|
|
|
|
16
|
($key,$value) = $entry =~ m/^\s*(\S*)\s*(.*)/; |
1106
|
|
|
|
|
|
|
|
1107
|
3
|
50
|
|
|
|
10
|
/pre/ and do { $self->{pre}->{$key} = $value; last; }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
1108
|
3
|
50
|
|
|
|
8
|
/post/ and do { $self->{post}->{$key} = $value; last; }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
1109
|
|
|
|
|
|
|
|
1110
|
|
|
|
|
|
|
# synon expects an array, so we split $value into an array, using " " as delimiter. |
1111
|
3
|
50
|
|
|
|
7
|
/synon/ and do { $self->{synon}->{$key} = [ split /\ /, $value ]; last; }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
1112
|
|
|
|
|
|
|
|
1113
|
3
|
50
|
|
|
|
13
|
/key/ and do { |
1114
|
3
|
|
|
|
|
7
|
$thiskey = $key; |
1115
|
3
|
|
|
|
|
6
|
$thisdecomp = ""; |
1116
|
3
|
|
|
|
|
11
|
$self->{keyranks}->{$thiskey} = $value ; |
1117
|
3
|
|
|
|
|
7
|
last; |
1118
|
|
|
|
|
|
|
}; |
1119
|
|
|
|
|
|
|
|
1120
|
|
|
|
|
|
|
} # End for ($entrytype) (case loop) |
1121
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
} # End for (@scriptlines) |
1123
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
} # End of method parse_script_data |
1125
|
|
|
|
|
|
|
|
1126
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
# Eliminate some pesky warnings. |
1128
|
|
|
|
|
|
|
# |
1129
|
|
|
|
0
|
|
|
sub DESTROY {} |
1130
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
|
1132
|
|
|
|
|
|
|
# ---{ E N D M E T H O D S }---------------------------------- |
1133
|
|
|
|
|
|
|
#################################################################### |
1134
|
|
|
|
|
|
|
|
1135
|
|
|
|
|
|
|
1; # Return a true value. |
1136
|
|
|
|
|
|
|
|
1137
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
1139
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
This software is copyright (c) 2003 by John Nolan Ejpnolan@sonic.netE. |
1141
|
|
|
|
|
|
|
|
1142
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
1143
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
1144
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
=head1 AUTHOR |
1146
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
John Nolan jpnolan@sonic.net January 2003. |
1148
|
|
|
|
|
|
|
|
1149
|
|
|
|
|
|
|
Implements the classic Eliza algorithm by Prof. Joseph Weizenbaum. |
1150
|
|
|
|
|
|
|
Script format devised by Charles Hayden. |
1151
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
=cut |
1153
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
|
1155
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
#################################################################### |
1157
|
|
|
|
|
|
|
# ---{ B E G I N D E F A U L T S C R I P T D A T A }---------- |
1158
|
|
|
|
|
|
|
# |
1159
|
|
|
|
|
|
|
# This script was prepared by Chris Hayden. Hayden's Eliza |
1160
|
|
|
|
|
|
|
# program was written in Java, however, it attempted to match |
1161
|
|
|
|
|
|
|
# the functionality of Weizenbaum's original program as closely |
1162
|
|
|
|
|
|
|
# as possible. |
1163
|
|
|
|
|
|
|
# |
1164
|
|
|
|
|
|
|
# Hayden's script format was quite different from Weizenbaum's, |
1165
|
|
|
|
|
|
|
# but it maintained the same content. I have adapted Hayden's |
1166
|
|
|
|
|
|
|
# script format, since it was simple and convenient enough |
1167
|
|
|
|
|
|
|
# for my purposes. |
1168
|
|
|
|
|
|
|
# |
1169
|
|
|
|
|
|
|
# I've made small modifications here and there. |
1170
|
|
|
|
|
|
|
# |
1171
|
|
|
|
|
|
|
|
1172
|
|
|
|
|
|
|
# We use the token __DATA__ rather than __END__, |
1173
|
|
|
|
|
|
|
# so that all this data is visible within the current package. |
1174
|
|
|
|
|
|
|
|
1175
|
|
|
|
|
|
|
__DATA__ |