line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Eliza::Chatbot::ScriptParser; |
2
|
|
|
|
|
|
|
|
3
|
12
|
|
|
12
|
|
137494
|
use Moo; |
|
12
|
|
|
|
|
23383
|
|
|
12
|
|
|
|
|
79
|
|
4
|
12
|
|
|
12
|
|
6918
|
use MooX::LazierAttributes; |
|
12
|
|
|
|
|
35949
|
|
|
12
|
|
|
|
|
68
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
attributes ( |
7
|
|
|
|
|
|
|
script_file => [rw, q{}], |
8
|
|
|
|
|
|
|
[qw/quit initial final/] => [rw, [], {lzy}], |
9
|
|
|
|
|
|
|
[qw/decomp reasmb reasmb_for_memory pre post synon key unique_words/] => [rw, { }, { lzy }], |
10
|
|
|
|
|
|
|
); |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
sub parse_script_data { |
13
|
24
|
|
|
24
|
1
|
3556
|
my $self = shift; |
14
|
24
|
|
|
|
|
476
|
my @script_lines = $self->_open_script_file($self->script_file); |
15
|
24
|
|
|
|
|
89
|
my ($thiskey, $decomp); |
16
|
|
|
|
|
|
|
# Examine each line of the script data |
17
|
24
|
|
|
|
|
75
|
for my $line (@script_lines) { |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# Skip comments and lines with only whitespace |
20
|
8388
|
100
|
|
|
|
21133
|
next if $line =~ /^[\s*#|\s*]$/; |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# mehhh may be slow who knows |
23
|
8370
|
|
|
|
|
19302
|
$self->_unique_words($line); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# Split entrytype and entry, using a colon as the delimiter |
26
|
8370
|
|
|
|
|
22593
|
my ($entry_type, $entry) = split /:/, $line; |
27
|
|
|
|
|
|
|
# remove the whitespace |
28
|
8370
|
|
|
|
|
16075
|
$entry_type = _trim_string($entry_type); |
29
|
8370
|
|
|
|
|
13429
|
$entry = _trim_string($entry); |
30
|
|
|
|
|
|
|
|
31
|
8370
|
|
|
|
|
14152
|
for ($entry_type) { |
32
|
8370
|
100
|
|
|
|
22230
|
/quit|initial|final/ and do { push @{$self->$_}, $entry; last; }; |
|
234
|
|
|
|
|
377
|
|
|
234
|
|
|
|
|
3889
|
|
|
234
|
|
|
|
|
2016
|
|
33
|
8136
|
100
|
|
|
|
16277
|
/decomp/ and do { |
34
|
1386
|
50
|
|
|
|
2422
|
die "$0: error parsing script: decomp rule with no keyword. \n" |
35
|
|
|
|
|
|
|
unless $thiskey; |
36
|
1386
|
|
|
|
|
2836
|
$decomp = join($;, $thiskey, $entry); |
37
|
1386
|
|
|
|
|
1947
|
push @{$self->$_->{$thiskey}}, $entry; |
|
1386
|
|
|
|
|
22371
|
|
38
|
1386
|
|
|
|
|
10664
|
last; |
39
|
|
|
|
|
|
|
}; |
40
|
6750
|
100
|
|
|
|
15998
|
/reasmb|reasmb_for_mempory/ and do { |
41
|
5130
|
50
|
|
|
|
9063
|
die "$0: error parsing scrip reassembly rule with no decomposition rule" |
42
|
|
|
|
|
|
|
unless $decomp; |
43
|
5130
|
|
|
|
|
6550
|
push @{$self->$_->{$decomp}}, $entry; |
|
5130
|
|
|
|
|
83505
|
|
44
|
5130
|
|
|
|
|
36493
|
last; |
45
|
|
|
|
|
|
|
}; |
46
|
|
|
|
|
|
|
# everything else we have a key - split on first space |
47
|
1620
|
|
|
|
|
4979
|
my ($key, $value) = split(/\s/, $entry); |
48
|
1620
|
100
|
|
|
|
4186
|
/pre|post/ and do { $self->$_->{$key} = $value; last; }; |
|
522
|
|
|
|
|
8445
|
|
|
522
|
|
|
|
|
3742
|
|
49
|
1098
|
100
|
|
|
|
2047
|
/synon/ and do { $self->$_->{$key} = [ split /\ /, $value ]; last; }; |
|
144
|
|
|
|
|
2524
|
|
|
144
|
|
|
|
|
1196
|
|
50
|
954
|
100
|
|
|
|
2200
|
/key/ and do { |
51
|
882
|
|
|
|
|
1291
|
$thiskey = $key; |
52
|
882
|
|
|
|
|
1262
|
$decomp = ""; |
53
|
882
|
|
|
|
|
14330
|
$self->$_->{$key} = $value; |
54
|
882
|
|
|
|
|
6196
|
last; |
55
|
|
|
|
|
|
|
}; |
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
} |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
sub _unique_words { |
61
|
8373
|
|
|
8373
|
|
20137
|
my ($self, $line) = @_; |
62
|
8373
|
|
|
|
|
35590
|
$line =~ s/[^a-zA-Z\'\s+]//g; |
63
|
8373
|
|
|
|
|
23915
|
my @words = split(' ', $line); |
64
|
8373
|
|
|
|
|
14177
|
foreach my $word ( @words ) { |
65
|
40927
|
|
|
|
|
730944
|
$self->unique_words->{$word}++; |
66
|
|
|
|
|
|
|
} |
67
|
8373
|
|
|
|
|
51388
|
return; |
68
|
|
|
|
|
|
|
}; |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
sub _trim_string { |
71
|
16740
|
|
|
16740
|
|
24687
|
my $string = shift; |
72
|
16740
|
|
|
|
|
78622
|
$string =~ s/^\s+|\s+$//g; |
73
|
16740
|
|
|
|
|
31757
|
return $string; |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
sub _open_script_file { |
77
|
24
|
|
|
24
|
|
68
|
my ($self, $script_file) = @_; |
78
|
24
|
|
|
|
|
45
|
my @script_lines; |
79
|
24
|
100
|
|
|
|
72
|
if ($script_file) { |
80
|
|
|
|
|
|
|
# If we have an external script file, open it |
81
|
6
|
50
|
|
|
|
273
|
open (my $fh, "<", $script_file) |
82
|
|
|
|
|
|
|
or die "Could not read from file $script_file : $!\n"; |
83
|
|
|
|
|
|
|
|
84
|
6
|
|
|
|
|
200
|
@script_lines = <$fh>; |
85
|
6
|
|
|
|
|
87
|
close ($fh); |
86
|
|
|
|
|
|
|
|
87
|
6
|
|
|
|
|
52
|
$self->script_file($script_file); |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
else { |
90
|
|
|
|
|
|
|
# Otherwise, read in the data from the bottom of this file. |
91
|
|
|
|
|
|
|
# This data might be read several times, so we save the offset pointer |
92
|
18
|
|
|
|
|
74
|
my $where = tell(DATA); |
93
|
18
|
|
|
|
|
3908
|
@script_lines = ; |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# and reset it when we're done. |
96
|
18
|
|
|
|
|
234
|
seek(DATA, $where, 0); |
97
|
18
|
|
|
|
|
117
|
$self->script_file('none'); |
98
|
|
|
|
|
|
|
} |
99
|
24
|
|
|
|
|
1831
|
return @script_lines; |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
1; |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=head1 Name |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
Eliza::Chatbot::ScriptParser |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=head1 Version |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
Version 0.08 |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=head1 Options |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=over |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=item script_file |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=item quit |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=item initial |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=item final |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=item decomp |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=item reasmb |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=item reasmb_for_memory |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=item pre |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=item post |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=item synon |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=item key |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=item unique_words |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=back |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=head1 SUBROUTINES/METHODS |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
=head2 parse_script_data() |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
$self->parse_script_data; |
147
|
|
|
|
|
|
|
$self->parse_script_data( $script_file ); |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
parse_script_data() is invoked from the _initialize() method, which is called from |
150
|
|
|
|
|
|
|
the new() function. However, you can also call this method at any time against |
151
|
|
|
|
|
|
|
an already-instantiated Eliza instance. In that case, the new script data is I |
152
|
|
|
|
|
|
|
to the old script data. The old script data is not deleted. |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
You can pass a parameter to this function, which is the name of the script file, |
155
|
|
|
|
|
|
|
and it will read in and parse that file. If you do not pass any parameter to |
156
|
|
|
|
|
|
|
this method, then it will read the data embedded at the end of the module as its |
157
|
|
|
|
|
|
|
default script data. |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
If you pass the name of a script file to parse_script_data(), and that file is |
160
|
|
|
|
|
|
|
not available for reading, then the module dies. |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=head1 Format of the script file |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
This module includes a default script file within itself, so it is not necessary |
165
|
|
|
|
|
|
|
to explicitly specify a script file when instantiating an Eliza object. Each line |
166
|
|
|
|
|
|
|
in the script file can specify a key, a decomposition rule, or a reassembly rule. |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
key: remember 5 |
169
|
|
|
|
|
|
|
decomp: * i remember * |
170
|
|
|
|
|
|
|
reasmb: Do you often think of (2) ? |
171
|
|
|
|
|
|
|
reasmb: Does thinking of (2) bring anything else to mind ? |
172
|
|
|
|
|
|
|
decomp: * do you remember * |
173
|
|
|
|
|
|
|
reasmb: Did you think I would forget (2) ? |
174
|
|
|
|
|
|
|
reasmb: What about (2) ? |
175
|
|
|
|
|
|
|
reasmb: goto what |
176
|
|
|
|
|
|
|
pre: equivalent alike |
177
|
|
|
|
|
|
|
synon: belief feel think believe wish |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
The number after the key specifies the rank. If a user's input contains the keyword, then |
180
|
|
|
|
|
|
|
the transform() function will try to match one of the decomposition rules for that keyword. |
181
|
|
|
|
|
|
|
If one matches, then it will select one of the reassembly rules at random. The number |
182
|
|
|
|
|
|
|
(2) here means "use whatever set of words matched the second asterisk in the decomposition |
183
|
|
|
|
|
|
|
rule." If you specify a list of synonyms for a word, the you should use a "@" when you use that |
184
|
|
|
|
|
|
|
word in a decomposition rule: |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
decomp: * i @belief i * |
187
|
|
|
|
|
|
|
reasmb: Do you really think so ? |
188
|
|
|
|
|
|
|
reasmb: But you are not sure you (3). |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
Otherwise, the script will never check to see if there are any synonyms for that keyword. |
191
|
|
|
|
|
|
|
Reassembly rules should be marked with I rather than I when it is appropriate for use when a user's comment has been extracted from memory. |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
key: my 2 |
194
|
|
|
|
|
|
|
decomp: * my * |
195
|
|
|
|
|
|
|
reasm_for_memory: Let's discuss further why your (2). |
196
|
|
|
|
|
|
|
reasm_for_memory: Earlier you said your (2). |
197
|
|
|
|
|
|
|
reasm_for_memory: But your (2). |
198
|
|
|
|
|
|
|
reasm_for_memory: Does that have anything to do with the fact that your (2) ? |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=head1 AUTHOR |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
LNATION email@lnation.org |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
Copyright 2017 Robert Acock. |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under the terms of the the Artistic License (2.0). You may obtain a copy of the full license at: |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
http://www.perlfoundation.org/artistic_license_2_0 |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
Any use, modification, and distribution of the Standard or Modified Versions is governed by this Artistic License. By using, modifying or distributing the Package, you accept this license. Do not use, modify, or distribute the Package, if you do not accept this license. |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
If your Modified Version has been derived from a Modified Version made by someone other than you, you are nevertheless required to ensure that your Modified Version complies with the requirements of this license. |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
This license does not grant you the right to use any trademark, service mark, tradename, or logo of the Copyright Holder. |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
This license includes the non-exclusive, worldwide, free-of-charge patent license to make, have made, use, offer to sell, sell, import and otherwise transfer the Package with respect to any patent claims licensable by the Copyright Holder that are necessarily infringed by the Package. If you institute patent litigation (including a cross-claim or counterclaim) against any party alleging that the Package constitutes direct or contributory patent infringement, then this Artistic License to you shall terminate on the date that such litigation is filed. |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
=cut |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
__DATA__ |