line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# MetaMap::DataStructures::Utterance |
2
|
|
|
|
|
|
|
# (Last Updated $Id: Utterance.pm,v 1.80 2016/01/07 22:49:33 btmcinnes Exp $) |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Perl module that provides a perl interface to the |
5
|
|
|
|
|
|
|
# Unified Medical Language System (UMLS) |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# Copyright (c) 2016 |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# Sam Henry, Virginia Commonwealth University |
10
|
|
|
|
|
|
|
# henryst at vcu.edu |
11
|
|
|
|
|
|
|
# |
12
|
|
|
|
|
|
|
# Bridget T. McInnes, Virginia Commonwealth University |
13
|
|
|
|
|
|
|
# btmcinnes at vcu.edu |
14
|
|
|
|
|
|
|
# |
15
|
|
|
|
|
|
|
# This program is free software; you can redistribute it and/or |
16
|
|
|
|
|
|
|
# modify it under the terms of the GNU General Public License |
17
|
|
|
|
|
|
|
# as published by the Free Software Foundation; either version 2 |
18
|
|
|
|
|
|
|
# of the License, or (at your option) any later version. |
19
|
|
|
|
|
|
|
# |
20
|
|
|
|
|
|
|
# This program is distributed in the hope that it will be useful, |
21
|
|
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
22
|
|
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
23
|
|
|
|
|
|
|
# GNU General Public License for more details. |
24
|
|
|
|
|
|
|
# |
25
|
|
|
|
|
|
|
# You should have received a copy of the GNU General Public License |
26
|
|
|
|
|
|
|
# along with this program; if not, write to |
27
|
|
|
|
|
|
|
# |
28
|
|
|
|
|
|
|
# The Free Software Foundation, Inc., |
29
|
|
|
|
|
|
|
# 59 Temple Place - Suite 330, |
30
|
|
|
|
|
|
|
# Boston, MA 02111-1307, USA. |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
package MetaMap::DataStructures::Utterance; |
33
|
|
|
|
|
|
|
|
34
|
1
|
|
|
1
|
|
4
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
30
|
|
35
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
27
|
|
36
|
|
|
|
|
|
|
|
37
|
1
|
|
|
1
|
|
364
|
use MetaMap::DataStructures::Phrase; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
740
|
|
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
#---------------------------------------- |
40
|
|
|
|
|
|
|
# constructors |
41
|
|
|
|
|
|
|
#---------------------------------------- |
42
|
|
|
|
|
|
|
# constructor method to create a new Utterance object |
43
|
|
|
|
|
|
|
# input : $inputText <- a MetaMap Prolog Output utterance block |
44
|
|
|
|
|
|
|
# (or equivalent) |
45
|
|
|
|
|
|
|
# $id <- the id of this Utterance of the form: (ab:ti).([\d]+).([\d]+) |
46
|
|
|
|
|
|
|
# (e.g. ab.00000.1) |
47
|
|
|
|
|
|
|
# $text <- the human readable text of this utterance |
48
|
|
|
|
|
|
|
# \@phrases <- an ordered list of phrase objects |
49
|
|
|
|
|
|
|
# output: $self <- an instance of an Utterance object |
50
|
|
|
|
|
|
|
sub new { |
51
|
|
|
|
|
|
|
#create and bless self |
52
|
7
|
|
|
7
|
0
|
8
|
my $class = shift; |
53
|
7
|
|
|
|
|
9
|
my $self = {}; |
54
|
7
|
|
|
|
|
9
|
bless $self, $class; |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
#grab input |
57
|
7
|
|
|
|
|
20
|
$self->{inputText} = shift; |
58
|
7
|
|
|
|
|
13
|
$self->{id} = shift; |
59
|
7
|
|
|
|
|
8
|
$self->{text} = shift; |
60
|
7
|
|
|
|
|
7
|
$self->{phrases} = shift; |
61
|
|
|
|
|
|
|
|
62
|
7
|
|
|
|
|
55
|
return $self; |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# method creates and returns an utterance from text |
66
|
|
|
|
|
|
|
# (MetaMap Prolog Machine Output Utterance Block) |
67
|
|
|
|
|
|
|
# input : $inputText <- a MetaMap Prolog Output utterance block (or equivalent) |
68
|
|
|
|
|
|
|
# output: $self <- an instance of an Utterance object |
69
|
|
|
|
|
|
|
sub createFromText { |
70
|
|
|
|
|
|
|
#grab the input |
71
|
7
|
|
|
7
|
0
|
7
|
my $self = shift; |
72
|
7
|
|
|
|
|
8
|
my $inputText = shift; |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
#grab negated CUIs |
75
|
7
|
|
|
|
|
249
|
$inputText =~ m/neg_list\((.*)\)./; |
76
|
7
|
|
|
|
|
254
|
my $negationsText = $1; |
77
|
7
|
|
|
|
|
9
|
my @negatedCUIs = (); |
78
|
7
|
50
|
|
|
|
16
|
if (defined $negationsText) { |
79
|
7
|
|
|
|
|
183
|
while ($negationsText =~ |
80
|
|
|
|
|
|
|
m/negation\(\w+,[^\[\]]*,\[\d+\/\d+\],\['(C\d+)':/g) { |
81
|
0
|
|
|
|
|
0
|
push @negatedCUIs, $1; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
#grab the id and text |
86
|
7
|
|
|
|
|
912
|
$inputText =~ /utterance\('(.*)',"(.*)",/; |
87
|
7
|
|
|
|
|
102
|
my $id = $1; |
88
|
7
|
|
|
|
|
10
|
my $text = $2; |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
#create the phrases list |
91
|
7
|
|
|
|
|
310
|
my @phraseTexts = split /phrase\(/, $inputText; |
92
|
|
|
|
|
|
|
#shift the first part off (its the part before the first phrase match |
93
|
7
|
|
|
|
|
7
|
shift @phraseTexts; |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
#create a phrase from the phrase texts (and collect the concepts) |
96
|
7
|
|
|
|
|
11
|
my @phrases = (); |
97
|
7
|
|
|
|
|
8
|
foreach my $phraseText(@phraseTexts) { |
98
|
|
|
|
|
|
|
#put 'phrase(' back on |
99
|
134
|
|
|
|
|
443
|
$phraseText = 'phrase('.$phraseText; |
100
|
|
|
|
|
|
|
#create a new phrase from text |
101
|
134
|
|
|
|
|
238
|
my $newPhrase = &MetaMap::DataStructures::Phrase::createFromText( |
102
|
|
|
|
|
|
|
$phraseText, \@negatedCUIs); |
103
|
134
|
|
|
|
|
191
|
push @phrases, $newPhrase; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
#create and return the new utterance |
107
|
7
|
|
|
|
|
25
|
return MetaMap::DataStructures::Utterance->new($inputText, $id, $text, \@phrases); |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# method creates and returns an utterance from text |
111
|
|
|
|
|
|
|
# (MetaMap Prolog Machine Output Utterance Block), and uses a custom $id. |
112
|
|
|
|
|
|
|
# This is useful when the $input text has a non-properly formatted $id |
113
|
|
|
|
|
|
|
# (e.g. tx.0000000.1) |
114
|
|
|
|
|
|
|
# input : $inputText <- a MetaMap Prolog Output utterance block |
115
|
|
|
|
|
|
|
# (or equivalent) |
116
|
|
|
|
|
|
|
# $id <- the id to associate with this Utterance. It overrides any id |
117
|
|
|
|
|
|
|
# found within $inputText. $id should be of the form: |
118
|
|
|
|
|
|
|
# (ab:ti).([\d]+).([\d]+) (e.g. ab.00000.1) |
119
|
|
|
|
|
|
|
# output: $self <- an instance of an Utterance Object |
120
|
|
|
|
|
|
|
sub createFromTextWithId { |
121
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
#grab the input |
124
|
0
|
|
|
|
|
0
|
my $inputText = shift; |
125
|
0
|
|
|
|
|
0
|
my $id = shift; |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
#grab negated CUIs |
128
|
0
|
|
|
|
|
0
|
$inputText =~ m/neg_list\((.*)\)./; |
129
|
0
|
|
|
|
|
0
|
my $negationsText = $1; |
130
|
0
|
|
|
|
|
0
|
my @negatedCUIs = (); |
131
|
0
|
0
|
|
|
|
0
|
if (defined $negationsText) { |
132
|
0
|
|
|
|
|
0
|
while ($negationsText =~ |
133
|
|
|
|
|
|
|
m/negation\(\w+,[^\[\]]*,\[\d+\/\d+\],\['(C\d+)':/g) { |
134
|
0
|
|
|
|
|
0
|
push @negatedCUIs, $1; |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
#grab the id and text |
139
|
0
|
|
|
|
|
0
|
$inputText =~ /utterance\('(.*)',"(.*)",/; |
140
|
0
|
|
|
|
|
0
|
my $aid = $1; |
141
|
0
|
|
|
|
|
0
|
my $text = $2; |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
#create the phrases list |
144
|
0
|
|
|
|
|
0
|
my @phraseTexts = split /phrase\(/, $inputText; |
145
|
|
|
|
|
|
|
#shift the first part off (its the part before the first phrase match |
146
|
0
|
|
|
|
|
0
|
shift @phraseTexts; |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
#create a phrase from the phrase texts (and collect the concepts) |
149
|
0
|
|
|
|
|
0
|
my @phrases = (); |
150
|
0
|
|
|
|
|
0
|
foreach my $phraseText(@phraseTexts) { |
151
|
|
|
|
|
|
|
#put 'phrase(' back on |
152
|
0
|
|
|
|
|
0
|
$phraseText = 'phrase('.$phraseText; |
153
|
|
|
|
|
|
|
#create a new phrase from text |
154
|
0
|
|
|
|
|
0
|
my $newPhrase = &MetaMap::DataStructures::Phrase::createFromText( |
155
|
|
|
|
|
|
|
$phraseText, \@negatedCUIs); |
156
|
0
|
|
|
|
|
0
|
push @phrases, $newPhrase; |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
#create and return the new utterance |
160
|
0
|
|
|
|
|
0
|
return MetaMap::DataStructures::Utterance->new( |
161
|
|
|
|
|
|
|
$inputText, $id, $text, \@phrases); |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
#---------------------------------------- |
165
|
|
|
|
|
|
|
# methods |
166
|
|
|
|
|
|
|
#---------------------------------------- |
167
|
|
|
|
|
|
|
# method summarizes this utterance as a string |
168
|
|
|
|
|
|
|
# input : - |
169
|
|
|
|
|
|
|
# output: $string <- a string describing $self |
170
|
|
|
|
|
|
|
sub toString { |
171
|
1
|
|
|
1
|
0
|
8
|
my $self = shift; |
172
|
|
|
|
|
|
|
|
173
|
1
|
|
|
|
|
1
|
my $string = "utterance:\n"; |
174
|
1
|
|
|
|
|
85
|
$string .= " $self->{id}\n"; |
175
|
1
|
|
|
|
|
18
|
$string .= " $self->{text}\n"; |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
#add each phrase to the string |
178
|
1
|
|
|
|
|
2
|
foreach my $phrase(@{$self->{phrases}}) { |
|
1
|
|
|
|
|
3
|
|
179
|
28
|
|
|
|
|
45
|
$string .= " ".$phrase->toString()."\n"; |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
|
182
|
1
|
|
|
|
|
319
|
return $string; |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
# method compares this utterance to another and returns 1 if the two |
186
|
|
|
|
|
|
|
# contain identical information |
187
|
|
|
|
|
|
|
# input : $other <- the utterrance object to compare against |
188
|
|
|
|
|
|
|
# output: boolean <- 1 if $self and $other are equivalent (contain equivalent |
189
|
|
|
|
|
|
|
# IDs, and phrases), else 0 |
190
|
|
|
|
|
|
|
sub equals { |
191
|
|
|
|
|
|
|
#grab input |
192
|
2
|
|
|
2
|
0
|
3
|
my $self = shift; |
193
|
2
|
|
|
|
|
3
|
my $other = shift; |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
#compare id's and text |
196
|
2
|
50
|
33
|
|
|
11
|
if ($self->{id} ne $other->{id} |
197
|
|
|
|
|
|
|
|| $self->{text} ne $other->{text}) { |
198
|
0
|
|
|
|
|
0
|
return 0; |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
#compare Utterances |
202
|
2
|
|
|
|
|
3
|
foreach my $phraseA(@{$self->{phrases}}){ |
|
2
|
|
|
|
|
3
|
|
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
#check each utterance in B |
205
|
56
|
|
|
|
|
41
|
my $match = 0; |
206
|
56
|
|
|
|
|
40
|
foreach my $phraseB(@{$other->{phrases}}) { |
|
56
|
|
|
|
|
61
|
|
207
|
758
|
100
|
|
|
|
888
|
if ($phraseA->equals($phraseB)) { |
208
|
56
|
|
|
|
|
40
|
$match = 1; |
209
|
56
|
|
|
|
|
44
|
last; |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
#utteranceA has no equivalent phrase in $other |
214
|
|
|
|
|
|
|
# so utterances are not identical |
215
|
56
|
50
|
|
|
|
80
|
if ($match < 1) { |
216
|
0
|
|
|
|
|
0
|
return 0; |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
#all tests passed, return true |
221
|
2
|
|
|
|
|
10
|
return 1; |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
# method determines if this utterance contains the CUI provided as input |
225
|
|
|
|
|
|
|
# returns 1 if this utterance contains the CUI, else 0 |
226
|
|
|
|
|
|
|
# input : $cui <- a string CUI code |
227
|
|
|
|
|
|
|
# output: boolean <- 1 if any of $self's phrases contain $cui |
228
|
|
|
|
|
|
|
sub contains { |
229
|
|
|
|
|
|
|
#grab input |
230
|
2
|
|
|
2
|
0
|
2
|
my $self = shift; |
231
|
2
|
|
|
|
|
2
|
my $cui = shift; |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
#check each phrase to see if it contains the CUI |
234
|
2
|
|
|
|
|
3
|
my $containsCUI = 0; |
235
|
2
|
|
|
|
|
3
|
foreach my $phrase(@{$self->{phrases}}) { |
|
2
|
|
|
|
|
3
|
|
236
|
29
|
100
|
|
|
|
36
|
if ($phrase->contains($cui)) { |
237
|
1
|
|
|
|
|
2
|
$containsCUI = 1; |
238
|
1
|
|
|
|
|
1
|
last; |
239
|
|
|
|
|
|
|
} |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
#return the result |
243
|
2
|
|
|
|
|
5
|
return $containsCUI; |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# method gets the an array of concepts as they appear in the utterance. |
247
|
|
|
|
|
|
|
# Conepts are not necassarily ordered, where ambiguity exists all possible toke# n->CUI mappings are listed adjacent to one another. |
248
|
|
|
|
|
|
|
# input : - |
249
|
|
|
|
|
|
|
# output: \@concepts <- a list of concept objects |
250
|
|
|
|
|
|
|
sub getConcepts { |
251
|
|
|
|
|
|
|
#initialize |
252
|
2
|
|
|
2
|
0
|
3
|
my $self = shift; |
253
|
2
|
|
|
|
|
3
|
my @concepts = (); |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
#add concepts in sorted order |
256
|
2
|
|
|
|
|
1
|
foreach my $phrase(@{$self->{phrases}}) { |
|
2
|
|
|
|
|
3
|
|
257
|
56
|
|
|
|
|
30
|
push @concepts, @{$phrase->{concepts}}; |
|
56
|
|
|
|
|
68
|
|
258
|
|
|
|
|
|
|
} |
259
|
2
|
|
|
|
|
11
|
return \@concepts; |
260
|
|
|
|
|
|
|
} |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
# method gets an array list of concepts as they appear in the utterance |
263
|
|
|
|
|
|
|
# input : - |
264
|
|
|
|
|
|
|
# output: \@conceptList <- an array of arrays, where each sub-array contains a |
265
|
|
|
|
|
|
|
# list of 1 or more concept objects. Where more than |
266
|
|
|
|
|
|
|
# one concept object occurrs it means the token to |
267
|
|
|
|
|
|
|
# concept mapping was ambiguous. Arrays are ordered as |
268
|
|
|
|
|
|
|
# the tokens occurr in the utterance. |
269
|
|
|
|
|
|
|
sub getOrderedConcepts { |
270
|
|
|
|
|
|
|
#initialize |
271
|
1
|
|
|
1
|
0
|
1
|
my $self = shift; |
272
|
1
|
|
|
|
|
2
|
my @conceptList = (); |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
#add concepts in sorted order |
275
|
1
|
|
|
|
|
1
|
foreach my $phrase(@{ $self->{phrases} }) { |
|
1
|
|
|
|
|
2
|
|
276
|
28
|
|
|
|
|
15
|
push @conceptList, @{ $phrase->{orderedConceptList} }; |
|
28
|
|
|
|
|
37
|
|
277
|
|
|
|
|
|
|
} |
278
|
1
|
|
|
|
|
7
|
return \@conceptList; |
279
|
|
|
|
|
|
|
} |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
# method gets the an array of tokens as they appear in the utterance |
282
|
|
|
|
|
|
|
# input : - |
283
|
|
|
|
|
|
|
# output: \@tokens <- a list token objects ordered by their appearance in $self |
284
|
|
|
|
|
|
|
sub getTokens { |
285
|
|
|
|
|
|
|
#initialize |
286
|
2
|
|
|
2
|
0
|
3
|
my $self = shift; |
287
|
2
|
|
|
|
|
1
|
my @tokens = (); |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
#add concepts in sorted order |
290
|
2
|
|
|
|
|
3
|
foreach my $phrase(@{$self->{phrases}}) { |
|
2
|
|
|
|
|
3
|
|
291
|
56
|
|
|
|
|
31
|
push @tokens, @{$phrase->{tokens}}; |
|
56
|
|
|
|
|
82
|
|
292
|
|
|
|
|
|
|
} |
293
|
2
|
|
|
|
|
21
|
return \@tokens; |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
# method gets the an array of Mappings as they appear in the utterance |
297
|
|
|
|
|
|
|
# input : - |
298
|
|
|
|
|
|
|
# output: \@mappings <- a list of mapping objects ordered by their appearance |
299
|
|
|
|
|
|
|
# in $self |
300
|
|
|
|
|
|
|
sub getMappings { |
301
|
|
|
|
|
|
|
#initialize |
302
|
1
|
|
|
1
|
0
|
2
|
my $self = shift; |
303
|
1
|
|
|
|
|
1
|
my @mappings = (); |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
#add concepts in sorted order |
306
|
1
|
|
|
|
|
2
|
foreach my $phrase(@{$self->{phrases}}) { |
|
1
|
|
|
|
|
2
|
|
307
|
28
|
|
|
|
|
13
|
push @mappings, @{$phrase->{mappings}}; |
|
28
|
|
|
|
|
42
|
|
308
|
|
|
|
|
|
|
} |
309
|
1
|
|
|
|
|
10
|
return \@mappings; |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
1; |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
__END__ |