line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# MetaMap::DataStructures::Utterance |
2
|
|
|
|
|
|
|
# (Last Updated $Id: Utterance.pm,v 1.80 2016/01/07 22:49:33 btmcinnes Exp $) |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Perl module that provides a perl interface to the |
5
|
|
|
|
|
|
|
# Unified Medical Language System (UMLS) |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# Copyright (c) 2016 |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# Sam Henry, Virginia Commonwealth University |
10
|
|
|
|
|
|
|
# henryst at vcu.edu |
11
|
|
|
|
|
|
|
# |
12
|
|
|
|
|
|
|
# Bridget T. McInnes, Virginia Commonwealth University |
13
|
|
|
|
|
|
|
# btmcinnes at vcu.edu |
14
|
|
|
|
|
|
|
# |
15
|
|
|
|
|
|
|
# This program is free software; you can redistribute it and/or |
16
|
|
|
|
|
|
|
# modify it under the terms of the GNU General Public License |
17
|
|
|
|
|
|
|
# as published by the Free Software Foundation; either version 2 |
18
|
|
|
|
|
|
|
# of the License, or (at your option) any later version. |
19
|
|
|
|
|
|
|
# |
20
|
|
|
|
|
|
|
# This program is distributed in the hope that it will be useful, |
21
|
|
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
22
|
|
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
23
|
|
|
|
|
|
|
# GNU General Public License for more details. |
24
|
|
|
|
|
|
|
# |
25
|
|
|
|
|
|
|
# You should have received a copy of the GNU General Public License |
26
|
|
|
|
|
|
|
# along with this program; if not, write to |
27
|
|
|
|
|
|
|
# |
28
|
|
|
|
|
|
|
# The Free Software Foundation, Inc., |
29
|
|
|
|
|
|
|
# 59 Temple Place - Suite 330, |
30
|
|
|
|
|
|
|
# Boston, MA 02111-1307, USA. |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
package MetaMap::DataStructures::Utterance; |
33
|
|
|
|
|
|
|
|
34
|
1
|
|
|
1
|
|
3
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
20
|
|
35
|
1
|
|
|
1
|
|
3
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
16
|
|
36
|
|
|
|
|
|
|
|
37
|
1
|
|
|
1
|
|
345
|
use MetaMap::DataStructures::Phrase; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
845
|
|
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
#---------------------------------------- |
40
|
|
|
|
|
|
|
# constructors |
41
|
|
|
|
|
|
|
#---------------------------------------- |
42
|
|
|
|
|
|
|
# constructor method to create a new Utterance object |
43
|
|
|
|
|
|
|
# input : $inputText <- a MetaMap Prolog Output utterance block |
44
|
|
|
|
|
|
|
# (or equivalent) |
45
|
|
|
|
|
|
|
# $id <- the id of this Utterance of the form: (ab:ti).([\d]+).([\d]+) |
46
|
|
|
|
|
|
|
# (e.g. ab.00000.1) |
47
|
|
|
|
|
|
|
# $text <- the human readable text of this utterance |
48
|
|
|
|
|
|
|
# \@phrases <- an ordered list of phrase objects |
49
|
|
|
|
|
|
|
# output: $self <- an instance of an Utterance object |
50
|
|
|
|
|
|
|
sub new { |
51
|
|
|
|
|
|
|
#create and bless self |
52
|
7
|
|
|
7
|
0
|
11
|
my $class = shift; |
53
|
7
|
|
|
|
|
9
|
my $self = {}; |
54
|
7
|
|
|
|
|
13
|
bless $self, $class; |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
#grab input |
57
|
7
|
|
|
|
|
24
|
$self->{inputText} = shift; |
58
|
7
|
|
|
|
|
15
|
$self->{id} = shift; |
59
|
7
|
|
|
|
|
13
|
$self->{text} = shift; |
60
|
7
|
|
|
|
|
8
|
$self->{phrases} = shift; |
61
|
|
|
|
|
|
|
|
62
|
7
|
|
|
|
|
69
|
return $self; |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# method creates and returns an utterance from text |
66
|
|
|
|
|
|
|
# (MetaMap Prolog Machine Output Utterance Block) |
67
|
|
|
|
|
|
|
# input : $inputText <- a MetaMap Prolog Output utterance block (or equivalent) |
68
|
|
|
|
|
|
|
# output: $self <- an instance of an Utterance object |
69
|
|
|
|
|
|
|
sub createFromText { |
70
|
|
|
|
|
|
|
#grab the input |
71
|
7
|
|
|
7
|
0
|
11
|
my $self = shift; |
72
|
7
|
|
|
|
|
8
|
my $inputText = shift; |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
#grab negated CUIs |
75
|
7
|
|
|
|
|
260
|
$inputText =~ m/neg_list\((.*)\)./; |
76
|
7
|
|
|
|
|
285
|
my $negationsText = $1; |
77
|
7
|
|
|
|
|
12
|
my @negatedCUIs = (); |
78
|
7
|
50
|
|
|
|
17
|
if (defined $negationsText) { |
79
|
7
|
|
|
|
|
182
|
while ($negationsText =~ |
80
|
|
|
|
|
|
|
m/negation\(\w+,[^\[\]]*,\[\d+\/\d+\],\['(C\d+)':/g) { |
81
|
0
|
|
|
|
|
0
|
push @negatedCUIs, $1; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
#grab the id and text |
86
|
7
|
|
|
|
|
906
|
$inputText =~ /utterance\('(.*)',"(.*)",/; |
87
|
7
|
|
|
|
|
143
|
my $id = $1; |
88
|
7
|
|
|
|
|
11
|
my $text = $2; |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
#create the phrases list |
91
|
7
|
|
|
|
|
344
|
my @phraseTexts = split /phrase\(/, $inputText; |
92
|
|
|
|
|
|
|
#shift the first part off (its the part before the first phrase match |
93
|
7
|
|
|
|
|
8
|
shift @phraseTexts; |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
#create a phrase from the phrase texts (and collect the concepts) |
96
|
7
|
|
|
|
|
10
|
my @phrases = (); |
97
|
7
|
|
|
|
|
9
|
foreach my $phraseText(@phraseTexts) { |
98
|
|
|
|
|
|
|
#put 'phrase(' back on |
99
|
134
|
|
|
|
|
494
|
$phraseText = 'phrase('.$phraseText; |
100
|
|
|
|
|
|
|
#create a new phrase from text |
101
|
134
|
|
|
|
|
279
|
my $newPhrase = &MetaMap::DataStructures::Phrase::createFromText( |
102
|
|
|
|
|
|
|
$phraseText, \@negatedCUIs); |
103
|
134
|
|
|
|
|
225
|
push @phrases, $newPhrase; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
#create and return the new utterance |
107
|
7
|
|
|
|
|
35
|
return MetaMap::DataStructures::Utterance->new( |
108
|
|
|
|
|
|
|
$inputText, $id, $text, \@phrases); |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# method creates and returns an utterance from text |
112
|
|
|
|
|
|
|
# (MetaMap Prolog Machine Output Utterance Block), and uses a custom $id. |
113
|
|
|
|
|
|
|
# This is useful when the $input text has a non-properly formatted $id |
114
|
|
|
|
|
|
|
# (e.g. tx.0000000.1) |
115
|
|
|
|
|
|
|
# input : $inputText <- a MetaMap Prolog Output utterance block |
116
|
|
|
|
|
|
|
# (or equivalent) |
117
|
|
|
|
|
|
|
# $id <- the id to associate with this Utterance. It overrides any id |
118
|
|
|
|
|
|
|
# found within $inputText. $id should be of the form: |
119
|
|
|
|
|
|
|
# (ab:ti).([\d]+).([\d]+) (e.g. ab.00000.1) |
120
|
|
|
|
|
|
|
# output: $self <- an instance of an Utterance Object |
121
|
|
|
|
|
|
|
sub createFromTextWithId { |
122
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
#grab the input |
125
|
0
|
|
|
|
|
0
|
my $inputText = shift; |
126
|
0
|
|
|
|
|
0
|
my $id = shift; |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
#grab negated CUIs |
129
|
0
|
|
|
|
|
0
|
$inputText =~ m/neg_list\((.*)\)./; |
130
|
0
|
|
|
|
|
0
|
my $negationsText = $1; |
131
|
0
|
|
|
|
|
0
|
my @negatedCUIs = (); |
132
|
0
|
0
|
|
|
|
0
|
if (defined $negationsText) { |
133
|
0
|
|
|
|
|
0
|
while ($negationsText =~ |
134
|
|
|
|
|
|
|
m/negation\(\w+,[^\[\]]*,\[\d+\/\d+\],\['(C\d+)':/g) { |
135
|
0
|
|
|
|
|
0
|
push @negatedCUIs, $1; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
#grab the id and text |
140
|
0
|
|
|
|
|
0
|
$inputText =~ /utterance\('(.*)',"(.*)",/; |
141
|
0
|
|
|
|
|
0
|
my $aid = $1; |
142
|
0
|
|
|
|
|
0
|
my $text = $2; |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
#create the phrases list |
145
|
0
|
|
|
|
|
0
|
my @phraseTexts = split /phrase\(/, $inputText; |
146
|
|
|
|
|
|
|
#shift the first part off (its the part before the first phrase match |
147
|
0
|
|
|
|
|
0
|
shift @phraseTexts; |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
#create a phrase from the phrase texts (and collect the concepts) |
150
|
0
|
|
|
|
|
0
|
my @phrases = (); |
151
|
0
|
|
|
|
|
0
|
foreach my $phraseText(@phraseTexts) { |
152
|
|
|
|
|
|
|
#put 'phrase(' back on |
153
|
0
|
|
|
|
|
0
|
$phraseText = 'phrase('.$phraseText; |
154
|
|
|
|
|
|
|
#create a new phrase from text |
155
|
0
|
|
|
|
|
0
|
my $newPhrase = &MetaMap::DataStructures::Phrase::createFromText( |
156
|
|
|
|
|
|
|
$phraseText, \@negatedCUIs); |
157
|
0
|
|
|
|
|
0
|
push @phrases, $newPhrase; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
#create and return the new utterance |
161
|
0
|
|
|
|
|
0
|
return MetaMap::DataStructures::Utterance->new( |
162
|
|
|
|
|
|
|
$inputText, $id, $text, \@phrases); |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
#---------------------------------------- |
166
|
|
|
|
|
|
|
# methods |
167
|
|
|
|
|
|
|
#---------------------------------------- |
168
|
|
|
|
|
|
|
# method summarizes this utterance as a string |
169
|
|
|
|
|
|
|
# input : - |
170
|
|
|
|
|
|
|
# output: $string <- a string describing $self |
171
|
|
|
|
|
|
|
sub toString { |
172
|
1
|
|
|
1
|
0
|
8
|
my $self = shift; |
173
|
|
|
|
|
|
|
|
174
|
1
|
|
|
|
|
2
|
my $string = "utterance:\n"; |
175
|
1
|
|
|
|
|
103
|
$string .= " $self->{id}\n"; |
176
|
1
|
|
|
|
|
6
|
$string .= " $self->{text}\n"; |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
#add each phrase to the string |
179
|
1
|
|
|
|
|
3
|
foreach my $phrase(@{$self->{phrases}}) { |
|
1
|
|
|
|
|
3
|
|
180
|
28
|
|
|
|
|
45
|
$string .= " ".$phrase->toString()."\n"; |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
1
|
|
|
|
|
359
|
return $string; |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
# method compares this utterance to another and returns 1 if the two |
187
|
|
|
|
|
|
|
# contain identical information |
188
|
|
|
|
|
|
|
# input : $other <- the utterrance object to compare against |
189
|
|
|
|
|
|
|
# output: boolean <- 1 if $self and $other are equivalent (contain equivalent |
190
|
|
|
|
|
|
|
# IDs, and phrases), else 0 |
191
|
|
|
|
|
|
|
sub equals { |
192
|
|
|
|
|
|
|
#grab input |
193
|
2
|
|
|
2
|
0
|
2
|
my $self = shift; |
194
|
2
|
|
|
|
|
3
|
my $other = shift; |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
#compare id's and text |
197
|
2
|
50
|
33
|
|
|
15
|
if ($self->{id} ne $other->{id} |
198
|
|
|
|
|
|
|
|| $self->{text} ne $other->{text}) { |
199
|
0
|
|
|
|
|
0
|
return 0; |
200
|
|
|
|
|
|
|
} |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
#compare Utterances |
203
|
2
|
|
|
|
|
2
|
foreach my $phraseA(@{$self->{phrases}}){ |
|
2
|
|
|
|
|
4
|
|
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
#check each utterance in B |
206
|
56
|
|
|
|
|
41
|
my $match = 0; |
207
|
56
|
|
|
|
|
36
|
foreach my $phraseB(@{$other->{phrases}}) { |
|
56
|
|
|
|
|
63
|
|
208
|
758
|
100
|
|
|
|
941
|
if ($phraseA->equals($phraseB)) { |
209
|
56
|
|
|
|
|
34
|
$match = 1; |
210
|
56
|
|
|
|
|
41
|
last; |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
#utteranceA has no equivalent phrase in $other |
215
|
|
|
|
|
|
|
# so utterances are not identical |
216
|
56
|
50
|
|
|
|
86
|
if ($match < 1) { |
217
|
0
|
|
|
|
|
0
|
return 0; |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
#all tests passed, return true |
222
|
2
|
|
|
|
|
10
|
return 1; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
# method determines if this utterance contains the CUI provided as input |
226
|
|
|
|
|
|
|
# returns 1 if this utterance contains the CUI, else 0 |
227
|
|
|
|
|
|
|
# input : $cui <- a string CUI code |
228
|
|
|
|
|
|
|
# output: boolean <- 1 if any of $self's phrases contain $cui |
229
|
|
|
|
|
|
|
sub contains { |
230
|
|
|
|
|
|
|
#grab input |
231
|
2
|
|
|
2
|
0
|
3
|
my $self = shift; |
232
|
2
|
|
|
|
|
3
|
my $cui = shift; |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
#check each phrase to see if it contains the CUI |
235
|
2
|
|
|
|
|
2
|
my $containsCUI = 0; |
236
|
2
|
|
|
|
|
4
|
foreach my $phrase(@{$self->{phrases}}) { |
|
2
|
|
|
|
|
4
|
|
237
|
29
|
100
|
|
|
|
39
|
if ($phrase->contains($cui)) { |
238
|
1
|
|
|
|
|
1
|
$containsCUI = 1; |
239
|
1
|
|
|
|
|
1
|
last; |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
#return the result |
244
|
2
|
|
|
|
|
6
|
return $containsCUI; |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
# method gets the an array of concepts as they appear in the utterance. |
248
|
|
|
|
|
|
|
# Conepts are not necassarily ordered, where ambiguity exists all possible |
249
|
|
|
|
|
|
|
# token->CUI mappings are listed adjacent to one another. |
250
|
|
|
|
|
|
|
# input : - |
251
|
|
|
|
|
|
|
# output: \@concepts <- a list of concept objects |
252
|
|
|
|
|
|
|
sub getConcepts { |
253
|
|
|
|
|
|
|
#initialize |
254
|
2
|
|
|
2
|
0
|
3
|
my $self = shift; |
255
|
2
|
|
|
|
|
2
|
my @concepts = (); |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
#add concepts in sorted order |
258
|
2
|
|
|
|
|
2
|
foreach my $phrase(@{$self->{phrases}}) { |
|
2
|
|
|
|
|
4
|
|
259
|
56
|
|
|
|
|
35
|
push @concepts, @{$phrase->{concepts}}; |
|
56
|
|
|
|
|
67
|
|
260
|
|
|
|
|
|
|
} |
261
|
2
|
|
|
|
|
10
|
return \@concepts; |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
# method gets an array list of concepts as they appear in the utterance |
265
|
|
|
|
|
|
|
# input : - |
266
|
|
|
|
|
|
|
# output: \@conceptList <- an array of arrays, where each sub-array contains a |
267
|
|
|
|
|
|
|
# list of 1 or more concept objects. Where more than |
268
|
|
|
|
|
|
|
# one concept object occurrs it means the token to |
269
|
|
|
|
|
|
|
# concept mapping was ambiguous. Arrays are ordered as |
270
|
|
|
|
|
|
|
# the tokens occurr in the utterance. |
271
|
|
|
|
|
|
|
sub getOrderedConcepts { |
272
|
|
|
|
|
|
|
#initialize |
273
|
1
|
|
|
1
|
0
|
2
|
my $self = shift; |
274
|
1
|
|
|
|
|
2
|
my @conceptList = (); |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
#add concepts in sorted order |
277
|
1
|
|
|
|
|
1
|
foreach my $phrase(@{ $self->{phrases} }) { |
|
1
|
|
|
|
|
2
|
|
278
|
28
|
|
|
|
|
18
|
push @conceptList, @{ $phrase->{orderedConceptList} }; |
|
28
|
|
|
|
|
39
|
|
279
|
|
|
|
|
|
|
} |
280
|
1
|
|
|
|
|
8
|
return \@conceptList; |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
# method gets the an array of tokens as they appear in the utterance |
284
|
|
|
|
|
|
|
# input : - |
285
|
|
|
|
|
|
|
# output: \@tokens <- a list token objects ordered by their appearance in $self |
286
|
|
|
|
|
|
|
sub getTokens { |
287
|
|
|
|
|
|
|
#initialize |
288
|
2
|
|
|
2
|
0
|
3
|
my $self = shift; |
289
|
2
|
|
|
|
|
4
|
my @tokens = (); |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
#add concepts in sorted order |
292
|
2
|
|
|
|
|
1
|
foreach my $phrase(@{$self->{phrases}}) { |
|
2
|
|
|
|
|
4
|
|
293
|
56
|
|
|
|
|
31
|
push @tokens, @{$phrase->{tokens}}; |
|
56
|
|
|
|
|
90
|
|
294
|
|
|
|
|
|
|
} |
295
|
2
|
|
|
|
|
28
|
return \@tokens; |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
# method gets the an array of Mappings as they appear in the utterance |
299
|
|
|
|
|
|
|
# input : - |
300
|
|
|
|
|
|
|
# output: \@mappings <- a list of mapping objects ordered by their appearance |
301
|
|
|
|
|
|
|
# in $self |
302
|
|
|
|
|
|
|
sub getMappings { |
303
|
|
|
|
|
|
|
#initialize |
304
|
1
|
|
|
1
|
0
|
2
|
my $self = shift; |
305
|
1
|
|
|
|
|
1
|
my @mappings = (); |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
#add concepts in sorted order |
308
|
1
|
|
|
|
|
2
|
foreach my $phrase(@{$self->{phrases}}) { |
|
1
|
|
|
|
|
1
|
|
309
|
28
|
|
|
|
|
18
|
push @mappings, @{$phrase->{mappings}}; |
|
28
|
|
|
|
|
44
|
|
310
|
|
|
|
|
|
|
} |
311
|
1
|
|
|
|
|
10
|
return \@mappings; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
1; |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
__END__ |