File Coverage

blib/lib/MetaMap/DataStructures/Utterance.pm

Criterion	Covered	Total	%
statement	93	116	80.1
branch	7	12	58.3
condition	1	3	33.3
subroutine	12	13	92.3
pod	0	10	0.0
total	113	154	73.3

line	stmt	bran	cond	sub	pod	time	code
1							# MetaMap::DataStructures::Utterance
2							# (Last Updated $Id: Utterance.pm,v 1.80 2016/01/07 22:49:33 btmcinnes Exp $)
3							#
4							# Perl module that provides a perl interface to the
5							# Unified Medical Language System (UMLS)
6							#
7							# Copyright (c) 2016
8							#
9							# Sam Henry, Virginia Commonwealth University
10							# henryst at vcu.edu
11							#
12							# Bridget T. McInnes, Virginia Commonwealth University
13							# btmcinnes at vcu.edu
14							#
15							# This program is free software; you can redistribute it and/or
16							# modify it under the terms of the GNU General Public License
17							# as published by the Free Software Foundation; either version 2
18							# of the License, or (at your option) any later version.
19							#
20							# This program is distributed in the hope that it will be useful,
21							# but WITHOUT ANY WARRANTY; without even the implied warranty of
22							# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23							# GNU General Public License for more details.
24							#
25							# You should have received a copy of the GNU General Public License
26							# along with this program; if not, write to
27							#
28							# The Free Software Foundation, Inc.,
29							# 59 Temple Place - Suite 330,
30							# Boston, MA 02111-1307, USA.
31
32							package MetaMap::DataStructures::Utterance;
33
34	1			1		4	use strict;
	1					1
	1					30
35	1			1		6	use warnings;
	1					1
	1					27
36
37	1			1		364	use MetaMap::DataStructures::Phrase;
	1					2
	1					740
38
39							#----------------------------------------
40							# constructors
41							#----------------------------------------
42							# constructor method to create a new Utterance object
43							# input : $inputText <- a MetaMap Prolog Output utterance block
44							# (or equivalent)
45							# $id <- the id of this Utterance of the form: (ab:ti).([\d]+).([\d]+)
46							# (e.g. ab.00000.1)
47							# $text <- the human readable text of this utterance
48							# \@phrases <- an ordered list of phrase objects
49							# output: $self <- an instance of an Utterance object
50							sub new {
51							#create and bless self
52	7			7	0	8	my $class = shift;
53	7					9	my $self = {};
54	7					9	bless $self, $class;
55
56							#grab input
57	7					20	$self->{inputText} = shift;
58	7					13	$self->{id} = shift;
59	7					8	$self->{text} = shift;
60	7					7	$self->{phrases} = shift;
61
62	7					55	return $self;
63							}
64
65							# method creates and returns an utterance from text
66							# (MetaMap Prolog Machine Output Utterance Block)
67							# input : $inputText <- a MetaMap Prolog Output utterance block (or equivalent)
68							# output: $self <- an instance of an Utterance object
69							sub createFromText {
70							#grab the input
71	7			7	0	7	my $self = shift;
72	7					8	my $inputText = shift;
73
74							#grab negated CUIs
75	7					249	$inputText =~ m/neg_list$(.*)$./;
76	7					254	my $negationsText = $1;
77	7					9	my @negatedCUIs = ();
78	7	50				16	if (defined $negationsText) {
79	7					183	while ($negationsText =~
80							m/negation\(\w+,[^\[\]]*,\[\d+\/\d+\],\['(C\d+)':/g) {
81	0					0	push @negatedCUIs, $1;
82							}
83							}
84
85							#grab the id and text
86	7					912	$inputText =~ /utterance\('(.)',"(.)",/;
87	7					102	my $id = $1;
88	7					10	my $text = $2;
89
90							#create the phrases list
91	7					310	my @phraseTexts = split /phrase\(/, $inputText;
92							#shift the first part off (its the part before the first phrase match
93	7					7	shift @phraseTexts;
94
95							#create a phrase from the phrase texts (and collect the concepts)
96	7					11	my @phrases = ();
97	7					8	foreach my $phraseText(@phraseTexts) {
98							#put 'phrase(' back on
99	134					443	$phraseText = 'phrase('.$phraseText;
100							#create a new phrase from text
101	134					238	my $newPhrase = &MetaMap::DataStructures::Phrase::createFromText(
102							$phraseText, \@negatedCUIs);
103	134					191	push @phrases, $newPhrase;
104							}
105
106							#create and return the new utterance
107	7					25	return MetaMap::DataStructures::Utterance->new($inputText, $id, $text, \@phrases);
108							}
109
110							# method creates and returns an utterance from text
111							# (MetaMap Prolog Machine Output Utterance Block), and uses a custom $id.
112							# This is useful when the $input text has a non-properly formatted $id
113							# (e.g. tx.0000000.1)
114							# input : $inputText <- a MetaMap Prolog Output utterance block
115							# (or equivalent)
116							# $id <- the id to associate with this Utterance. It overrides any id
117							# found within $inputText. $id should be of the form:
118							# (ab:ti).([\d]+).([\d]+) (e.g. ab.00000.1)
119							# output: $self <- an instance of an Utterance Object
120							sub createFromTextWithId {
121	0			0	0	0	my $self = shift;
122
123							#grab the input
124	0					0	my $inputText = shift;
125	0					0	my $id = shift;
126
127							#grab negated CUIs
128	0					0	$inputText =~ m/neg_list$(.*)$./;
129	0					0	my $negationsText = $1;
130	0					0	my @negatedCUIs = ();
131	0	0				0	if (defined $negationsText) {
132	0					0	while ($negationsText =~
133							m/negation\(\w+,[^\[\]]*,\[\d+\/\d+\],\['(C\d+)':/g) {
134	0					0	push @negatedCUIs, $1;
135							}
136							}
137
138							#grab the id and text
139	0					0	$inputText =~ /utterance\('(.)',"(.)",/;
140	0					0	my $aid = $1;
141	0					0	my $text = $2;
142
143							#create the phrases list
144	0					0	my @phraseTexts = split /phrase\(/, $inputText;
145							#shift the first part off (its the part before the first phrase match
146	0					0	shift @phraseTexts;
147
148							#create a phrase from the phrase texts (and collect the concepts)
149	0					0	my @phrases = ();
150	0					0	foreach my $phraseText(@phraseTexts) {
151							#put 'phrase(' back on
152	0					0	$phraseText = 'phrase('.$phraseText;
153							#create a new phrase from text
154	0					0	my $newPhrase = &MetaMap::DataStructures::Phrase::createFromText(
155							$phraseText, \@negatedCUIs);
156	0					0	push @phrases, $newPhrase;
157							}
158
159							#create and return the new utterance
160	0					0	return MetaMap::DataStructures::Utterance->new(
161							$inputText, $id, $text, \@phrases);
162							}
163
164							#----------------------------------------
165							# methods
166							#----------------------------------------
167							# method summarizes this utterance as a string
168							# input : -
169							# output: $string <- a string describing $self
170							sub toString {
171	1			1	0	8	my $self = shift;
172
173	1					1	my $string = "utterance:\n";
174	1					85	$string .= " $self->{id}\n";
175	1					18	$string .= " $self->{text}\n";
176
177							#add each phrase to the string
178	1					2	foreach my $phrase(@{$self->{phrases}}) {
	1					3
179	28					45	$string .= " ".$phrase->toString()."\n";
180							}
181
182	1					319	return $string;
183							}
184
185							# method compares this utterance to another and returns 1 if the two
186							# contain identical information
187							# input : $other <- the utterrance object to compare against
188							# output: boolean <- 1 if $self and $other are equivalent (contain equivalent
189							# IDs, and phrases), else 0
190							sub equals {
191							#grab input
192	2			2	0	3	my $self = shift;
193	2					3	my $other = shift;
194
195							#compare id's and text
196	2	50	33			11	if ($self->{id} ne $other->{id}
197							\|\| $self->{text} ne $other->{text}) {
198	0					0	return 0;
199							}
200
201							#compare Utterances
202	2					3	foreach my $phraseA(@{$self->{phrases}}){
	2					3
203
204							#check each utterance in B
205	56					41	my $match = 0;
206	56					40	foreach my $phraseB(@{$other->{phrases}}) {
	56					61
207	758	100				888	if ($phraseA->equals($phraseB)) {
208	56					40	$match = 1;
209	56					44	last;
210							}
211							}
212
213							#utteranceA has no equivalent phrase in $other
214							# so utterances are not identical
215	56	50				80	if ($match < 1) {
216	0					0	return 0;
217							}
218							}
219
220							#all tests passed, return true
221	2					10	return 1;
222							}
223
224							# method determines if this utterance contains the CUI provided as input
225							# returns 1 if this utterance contains the CUI, else 0
226							# input : $cui <- a string CUI code
227							# output: boolean <- 1 if any of $self's phrases contain $cui
228							sub contains {
229							#grab input
230	2			2	0	2	my $self = shift;
231	2					2	my $cui = shift;
232
233							#check each phrase to see if it contains the CUI
234	2					3	my $containsCUI = 0;
235	2					3	foreach my $phrase(@{$self->{phrases}}) {
	2					3
236	29	100				36	if ($phrase->contains($cui)) {
237	1					2	$containsCUI = 1;
238	1					1	last;
239							}
240							}
241
242							#return the result
243	2					5	return $containsCUI;
244							}
245
246							# method gets the an array of concepts as they appear in the utterance.
247							# Conepts are not necassarily ordered, where ambiguity exists all possible toke# n->CUI mappings are listed adjacent to one another.
248							# input : -
249							# output: \@concepts <- a list of concept objects
250							sub getConcepts {
251							#initialize
252	2			2	0	3	my $self = shift;
253	2					3	my @concepts = ();
254
255							#add concepts in sorted order
256	2					1	foreach my $phrase(@{$self->{phrases}}) {
	2					3
257	56					30	push @concepts, @{$phrase->{concepts}};
	56					68
258							}
259	2					11	return \@concepts;
260							}
261
262							# method gets an array list of concepts as they appear in the utterance
263							# input : -
264							# output: \@conceptList <- an array of arrays, where each sub-array contains a
265							# list of 1 or more concept objects. Where more than
266							# one concept object occurrs it means the token to
267							# concept mapping was ambiguous. Arrays are ordered as
268							# the tokens occurr in the utterance.
269							sub getOrderedConcepts {
270							#initialize
271	1			1	0	1	my $self = shift;
272	1					2	my @conceptList = ();
273
274							#add concepts in sorted order
275	1					1	foreach my $phrase(@{ $self->{phrases} }) {
	1					2
276	28					15	push @conceptList, @{ $phrase->{orderedConceptList} };
	28					37
277							}
278	1					7	return \@conceptList;
279							}
280
281							# method gets the an array of tokens as they appear in the utterance
282							# input : -
283							# output: \@tokens <- a list token objects ordered by their appearance in $self
284							sub getTokens {
285							#initialize
286	2			2	0	3	my $self = shift;
287	2					1	my @tokens = ();
288
289							#add concepts in sorted order
290	2					3	foreach my $phrase(@{$self->{phrases}}) {
	2					3
291	56					31	push @tokens, @{$phrase->{tokens}};
	56					82
292							}
293	2					21	return \@tokens;
294							}
295
296							# method gets the an array of Mappings as they appear in the utterance
297							# input : -
298							# output: \@mappings <- a list of mapping objects ordered by their appearance
299							# in $self
300							sub getMappings {
301							#initialize
302	1			1	0	2	my $self = shift;
303	1					1	my @mappings = ();
304
305							#add concepts in sorted order
306	1					2	foreach my $phrase(@{$self->{phrases}}) {
	1					2
307	28					13	push @mappings, @{$phrase->{mappings}};
	28					42
308							}
309	1					10	return \@mappings;
310							}
311
312							1;
313
314							__END__