File Coverage

blib/lib/MetaMap/DataStructures/Utterance.pm
Criterion Covered Total %
statement 93 116 80.1
branch 7 12 58.3
condition 1 3 33.3
subroutine 12 13 92.3
pod 0 10 0.0
total 113 154 73.3


line stmt bran cond sub pod time code
1             # MetaMap::DataStructures::Utterance
2             # (Last Updated $Id: Utterance.pm,v 1.80 2016/01/07 22:49:33 btmcinnes Exp $)
3             #
4             # Perl module that provides a perl interface to the
5             # Unified Medical Language System (UMLS)
6             #
7             # Copyright (c) 2016
8             #
9             # Sam Henry, Virginia Commonwealth University
10             # henryst at vcu.edu
11             #
12             # Bridget T. McInnes, Virginia Commonwealth University
13             # btmcinnes at vcu.edu
14             #
15             # This program is free software; you can redistribute it and/or
16             # modify it under the terms of the GNU General Public License
17             # as published by the Free Software Foundation; either version 2
18             # of the License, or (at your option) any later version.
19             #
20             # This program is distributed in the hope that it will be useful,
21             # but WITHOUT ANY WARRANTY; without even the implied warranty of
22             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23             # GNU General Public License for more details.
24             #
25             # You should have received a copy of the GNU General Public License
26             # along with this program; if not, write to
27             #
28             # The Free Software Foundation, Inc.,
29             # 59 Temple Place - Suite 330,
30             # Boston, MA 02111-1307, USA.
31              
32             package MetaMap::DataStructures::Utterance;
33              
34 1     1   4 use strict;
  1         1  
  1         30  
35 1     1   6 use warnings;
  1         1  
  1         27  
36              
37 1     1   364 use MetaMap::DataStructures::Phrase;
  1         2  
  1         740  
38              
39             #----------------------------------------
40             # constructors
41             #----------------------------------------
42             # constructor method to create a new Utterance object
43             # input : $inputText <- a MetaMap Prolog Output utterance block
44             # (or equivalent)
45             # $id <- the id of this Utterance of the form: (ab:ti).([\d]+).([\d]+)
46             # (e.g. ab.00000.1)
47             # $text <- the human readable text of this utterance
48             # \@phrases <- an ordered list of phrase objects
49             # output: $self <- an instance of an Utterance object
50             sub new {
51             #create and bless self
52 7     7 0 8 my $class = shift;
53 7         9 my $self = {};
54 7         9 bless $self, $class;
55              
56             #grab input
57 7         20 $self->{inputText} = shift;
58 7         13 $self->{id} = shift;
59 7         8 $self->{text} = shift;
60 7         7 $self->{phrases} = shift;
61            
62 7         55 return $self;
63             }
64              
65             # method creates and returns an utterance from text
66             # (MetaMap Prolog Machine Output Utterance Block)
67             # input : $inputText <- a MetaMap Prolog Output utterance block (or equivalent)
68             # output: $self <- an instance of an Utterance object
69             sub createFromText {
70             #grab the input
71 7     7 0 7 my $self = shift;
72 7         8 my $inputText = shift;
73            
74             #grab negated CUIs
75 7         249 $inputText =~ m/neg_list\((.*)\)./;
76 7         254 my $negationsText = $1;
77 7         9 my @negatedCUIs = ();
78 7 50       16 if (defined $negationsText) {
79 7         183 while ($negationsText =~
80             m/negation\(\w+,[^\[\]]*,\[\d+\/\d+\],\['(C\d+)':/g) {
81 0         0 push @negatedCUIs, $1;
82             }
83             }
84              
85             #grab the id and text
86 7         912 $inputText =~ /utterance\('(.*)',"(.*)",/;
87 7         102 my $id = $1;
88 7         10 my $text = $2;
89              
90             #create the phrases list
91 7         310 my @phraseTexts = split /phrase\(/, $inputText;
92             #shift the first part off (its the part before the first phrase match
93 7         7 shift @phraseTexts;
94              
95             #create a phrase from the phrase texts (and collect the concepts)
96 7         11 my @phrases = ();
97 7         8 foreach my $phraseText(@phraseTexts) {
98             #put 'phrase(' back on
99 134         443 $phraseText = 'phrase('.$phraseText;
100             #create a new phrase from text
101 134         238 my $newPhrase = &MetaMap::DataStructures::Phrase::createFromText(
102             $phraseText, \@negatedCUIs);
103 134         191 push @phrases, $newPhrase;
104             }
105              
106             #create and return the new utterance
107 7         25 return MetaMap::DataStructures::Utterance->new($inputText, $id, $text, \@phrases);
108             }
109              
110             # method creates and returns an utterance from text
111             # (MetaMap Prolog Machine Output Utterance Block), and uses a custom $id.
112             # This is useful when the $input text has a non-properly formatted $id
113             # (e.g. tx.0000000.1)
114             # input : $inputText <- a MetaMap Prolog Output utterance block
115             # (or equivalent)
116             # $id <- the id to associate with this Utterance. It overrides any id
117             # found within $inputText. $id should be of the form:
118             # (ab:ti).([\d]+).([\d]+) (e.g. ab.00000.1)
119             # output: $self <- an instance of an Utterance Object
120             sub createFromTextWithId {
121 0     0 0 0 my $self = shift;
122              
123             #grab the input
124 0         0 my $inputText = shift;
125 0         0 my $id = shift;
126            
127             #grab negated CUIs
128 0         0 $inputText =~ m/neg_list\((.*)\)./;
129 0         0 my $negationsText = $1;
130 0         0 my @negatedCUIs = ();
131 0 0       0 if (defined $negationsText) {
132 0         0 while ($negationsText =~
133             m/negation\(\w+,[^\[\]]*,\[\d+\/\d+\],\['(C\d+)':/g) {
134 0         0 push @negatedCUIs, $1;
135             }
136             }
137              
138             #grab the id and text
139 0         0 $inputText =~ /utterance\('(.*)',"(.*)",/;
140 0         0 my $aid = $1;
141 0         0 my $text = $2;
142              
143             #create the phrases list
144 0         0 my @phraseTexts = split /phrase\(/, $inputText;
145             #shift the first part off (its the part before the first phrase match
146 0         0 shift @phraseTexts;
147              
148             #create a phrase from the phrase texts (and collect the concepts)
149 0         0 my @phrases = ();
150 0         0 foreach my $phraseText(@phraseTexts) {
151             #put 'phrase(' back on
152 0         0 $phraseText = 'phrase('.$phraseText;
153             #create a new phrase from text
154 0         0 my $newPhrase = &MetaMap::DataStructures::Phrase::createFromText(
155             $phraseText, \@negatedCUIs);
156 0         0 push @phrases, $newPhrase;
157             }
158              
159             #create and return the new utterance
160 0         0 return MetaMap::DataStructures::Utterance->new(
161             $inputText, $id, $text, \@phrases);
162             }
163              
164             #----------------------------------------
165             # methods
166             #----------------------------------------
167             # method summarizes this utterance as a string
168             # input : -
169             # output: $string <- a string describing $self
170             sub toString {
171 1     1 0 8 my $self = shift;
172              
173 1         1 my $string = "utterance:\n";
174 1         85 $string .= " $self->{id}\n";
175 1         18 $string .= " $self->{text}\n";
176            
177             #add each phrase to the string
178 1         2 foreach my $phrase(@{$self->{phrases}}) {
  1         3  
179 28         45 $string .= " ".$phrase->toString()."\n";
180             }
181            
182 1         319 return $string;
183             }
184              
185             # method compares this utterance to another and returns 1 if the two
186             # contain identical information
187             # input : $other <- the utterrance object to compare against
188             # output: boolean <- 1 if $self and $other are equivalent (contain equivalent
189             # IDs, and phrases), else 0
190             sub equals {
191             #grab input
192 2     2 0 3 my $self = shift;
193 2         3 my $other = shift;
194              
195             #compare id's and text
196 2 50 33     11 if ($self->{id} ne $other->{id}
197             || $self->{text} ne $other->{text}) {
198 0         0 return 0;
199             }
200              
201             #compare Utterances
202 2         3 foreach my $phraseA(@{$self->{phrases}}){
  2         3  
203              
204             #check each utterance in B
205 56         41 my $match = 0;
206 56         40 foreach my $phraseB(@{$other->{phrases}}) {
  56         61  
207 758 100       888 if ($phraseA->equals($phraseB)) {
208 56         40 $match = 1;
209 56         44 last;
210             }
211             }
212              
213             #utteranceA has no equivalent phrase in $other
214             # so utterances are not identical
215 56 50       80 if ($match < 1) {
216 0         0 return 0;
217             }
218             }
219              
220             #all tests passed, return true
221 2         10 return 1;
222             }
223              
224             # method determines if this utterance contains the CUI provided as input
225             # returns 1 if this utterance contains the CUI, else 0
226             # input : $cui <- a string CUI code
227             # output: boolean <- 1 if any of $self's phrases contain $cui
228             sub contains {
229             #grab input
230 2     2 0 2 my $self = shift;
231 2         2 my $cui = shift;
232              
233             #check each phrase to see if it contains the CUI
234 2         3 my $containsCUI = 0;
235 2         3 foreach my $phrase(@{$self->{phrases}}) {
  2         3  
236 29 100       36 if ($phrase->contains($cui)) {
237 1         2 $containsCUI = 1;
238 1         1 last;
239             }
240             }
241            
242             #return the result
243 2         5 return $containsCUI;
244             }
245              
246             # method gets the an array of concepts as they appear in the utterance.
247             # Conepts are not necassarily ordered, where ambiguity exists all possible toke# n->CUI mappings are listed adjacent to one another.
248             # input : -
249             # output: \@concepts <- a list of concept objects
250             sub getConcepts {
251             #initialize
252 2     2 0 3 my $self = shift;
253 2         3 my @concepts = ();
254              
255             #add concepts in sorted order
256 2         1 foreach my $phrase(@{$self->{phrases}}) {
  2         3  
257 56         30 push @concepts, @{$phrase->{concepts}};
  56         68  
258             }
259 2         11 return \@concepts;
260             }
261              
262             # method gets an array list of concepts as they appear in the utterance
263             # input : -
264             # output: \@conceptList <- an array of arrays, where each sub-array contains a
265             # list of 1 or more concept objects. Where more than
266             # one concept object occurrs it means the token to
267             # concept mapping was ambiguous. Arrays are ordered as
268             # the tokens occurr in the utterance.
269             sub getOrderedConcepts {
270             #initialize
271 1     1 0 1 my $self = shift;
272 1         2 my @conceptList = ();
273              
274             #add concepts in sorted order
275 1         1 foreach my $phrase(@{ $self->{phrases} }) {
  1         2  
276 28         15 push @conceptList, @{ $phrase->{orderedConceptList} };
  28         37  
277             }
278 1         7 return \@conceptList;
279             }
280              
281             # method gets the an array of tokens as they appear in the utterance
282             # input : -
283             # output: \@tokens <- a list token objects ordered by their appearance in $self
284             sub getTokens {
285             #initialize
286 2     2 0 3 my $self = shift;
287 2         1 my @tokens = ();
288              
289             #add concepts in sorted order
290 2         3 foreach my $phrase(@{$self->{phrases}}) {
  2         3  
291 56         31 push @tokens, @{$phrase->{tokens}};
  56         82  
292             }
293 2         21 return \@tokens;
294             }
295              
296             # method gets the an array of Mappings as they appear in the utterance
297             # input : -
298             # output: \@mappings <- a list of mapping objects ordered by their appearance
299             # in $self
300             sub getMappings {
301             #initialize
302 1     1 0 2 my $self = shift;
303 1         1 my @mappings = ();
304              
305             #add concepts in sorted order
306 1         2 foreach my $phrase(@{$self->{phrases}}) {
  1         2  
307 28         13 push @mappings, @{$phrase->{mappings}};
  28         42  
308             }
309 1         10 return \@mappings;
310             }
311              
312             1;
313              
314             __END__