File Coverage

blib/lib/MetaMap/DataStructures/Utterance.pm

Criterion	Covered	Total	%
statement	93	116	80.1
branch	7	12	58.3
condition	1	3	33.3
subroutine	12	13	92.3
pod	0	10	0.0
total	113	154	73.3

line	stmt	bran	cond	sub	pod	time	code
1							# MetaMap::DataStructures::Utterance
2							# (Last Updated $Id: Utterance.pm,v 1.80 2016/01/07 22:49:33 btmcinnes Exp $)
3							#
4							# Perl module that provides a perl interface to the
5							# Unified Medical Language System (UMLS)
6							#
7							# Copyright (c) 2016
8							#
9							# Sam Henry, Virginia Commonwealth University
10							# henryst at vcu.edu
11							#
12							# Bridget T. McInnes, Virginia Commonwealth University
13							# btmcinnes at vcu.edu
14							#
15							# This program is free software; you can redistribute it and/or
16							# modify it under the terms of the GNU General Public License
17							# as published by the Free Software Foundation; either version 2
18							# of the License, or (at your option) any later version.
19							#
20							# This program is distributed in the hope that it will be useful,
21							# but WITHOUT ANY WARRANTY; without even the implied warranty of
22							# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23							# GNU General Public License for more details.
24							#
25							# You should have received a copy of the GNU General Public License
26							# along with this program; if not, write to
27							#
28							# The Free Software Foundation, Inc.,
29							# 59 Temple Place - Suite 330,
30							# Boston, MA 02111-1307, USA.
31
32							package MetaMap::DataStructures::Utterance;
33
34	1			1		3	use strict;
	1					1
	1					20
35	1			1		3	use warnings;
	1					1
	1					16
36
37	1			1		345	use MetaMap::DataStructures::Phrase;
	1					3
	1					845
38
39							#----------------------------------------
40							# constructors
41							#----------------------------------------
42							# constructor method to create a new Utterance object
43							# input : $inputText <- a MetaMap Prolog Output utterance block
44							# (or equivalent)
45							# $id <- the id of this Utterance of the form: (ab:ti).([\d]+).([\d]+)
46							# (e.g. ab.00000.1)
47							# $text <- the human readable text of this utterance
48							# \@phrases <- an ordered list of phrase objects
49							# output: $self <- an instance of an Utterance object
50							sub new {
51							#create and bless self
52	7			7	0	11	my $class = shift;
53	7					9	my $self = {};
54	7					13	bless $self, $class;
55
56							#grab input
57	7					24	$self->{inputText} = shift;
58	7					15	$self->{id} = shift;
59	7					13	$self->{text} = shift;
60	7					8	$self->{phrases} = shift;
61
62	7					69	return $self;
63							}
64
65							# method creates and returns an utterance from text
66							# (MetaMap Prolog Machine Output Utterance Block)
67							# input : $inputText <- a MetaMap Prolog Output utterance block (or equivalent)
68							# output: $self <- an instance of an Utterance object
69							sub createFromText {
70							#grab the input
71	7			7	0	11	my $self = shift;
72	7					8	my $inputText = shift;
73
74							#grab negated CUIs
75	7					260	$inputText =~ m/neg_list$(.*)$./;
76	7					285	my $negationsText = $1;
77	7					12	my @negatedCUIs = ();
78	7	50				17	if (defined $negationsText) {
79	7					182	while ($negationsText =~
80							m/negation\(\w+,[^\[\]]*,\[\d+\/\d+\],\['(C\d+)':/g) {
81	0					0	push @negatedCUIs, $1;
82							}
83							}
84
85							#grab the id and text
86	7					906	$inputText =~ /utterance\('(.)',"(.)",/;
87	7					143	my $id = $1;
88	7					11	my $text = $2;
89
90							#create the phrases list
91	7					344	my @phraseTexts = split /phrase\(/, $inputText;
92							#shift the first part off (its the part before the first phrase match
93	7					8	shift @phraseTexts;
94
95							#create a phrase from the phrase texts (and collect the concepts)
96	7					10	my @phrases = ();
97	7					9	foreach my $phraseText(@phraseTexts) {
98							#put 'phrase(' back on
99	134					494	$phraseText = 'phrase('.$phraseText;
100							#create a new phrase from text
101	134					279	my $newPhrase = &MetaMap::DataStructures::Phrase::createFromText(
102							$phraseText, \@negatedCUIs);
103	134					225	push @phrases, $newPhrase;
104							}
105
106							#create and return the new utterance
107	7					35	return MetaMap::DataStructures::Utterance->new(
108							$inputText, $id, $text, \@phrases);
109							}
110
111							# method creates and returns an utterance from text
112							# (MetaMap Prolog Machine Output Utterance Block), and uses a custom $id.
113							# This is useful when the $input text has a non-properly formatted $id
114							# (e.g. tx.0000000.1)
115							# input : $inputText <- a MetaMap Prolog Output utterance block
116							# (or equivalent)
117							# $id <- the id to associate with this Utterance. It overrides any id
118							# found within $inputText. $id should be of the form:
119							# (ab:ti).([\d]+).([\d]+) (e.g. ab.00000.1)
120							# output: $self <- an instance of an Utterance Object
121							sub createFromTextWithId {
122	0			0	0	0	my $self = shift;
123
124							#grab the input
125	0					0	my $inputText = shift;
126	0					0	my $id = shift;
127
128							#grab negated CUIs
129	0					0	$inputText =~ m/neg_list$(.*)$./;
130	0					0	my $negationsText = $1;
131	0					0	my @negatedCUIs = ();
132	0	0				0	if (defined $negationsText) {
133	0					0	while ($negationsText =~
134							m/negation\(\w+,[^\[\]]*,\[\d+\/\d+\],\['(C\d+)':/g) {
135	0					0	push @negatedCUIs, $1;
136							}
137							}
138
139							#grab the id and text
140	0					0	$inputText =~ /utterance\('(.)',"(.)",/;
141	0					0	my $aid = $1;
142	0					0	my $text = $2;
143
144							#create the phrases list
145	0					0	my @phraseTexts = split /phrase\(/, $inputText;
146							#shift the first part off (its the part before the first phrase match
147	0					0	shift @phraseTexts;
148
149							#create a phrase from the phrase texts (and collect the concepts)
150	0					0	my @phrases = ();
151	0					0	foreach my $phraseText(@phraseTexts) {
152							#put 'phrase(' back on
153	0					0	$phraseText = 'phrase('.$phraseText;
154							#create a new phrase from text
155	0					0	my $newPhrase = &MetaMap::DataStructures::Phrase::createFromText(
156							$phraseText, \@negatedCUIs);
157	0					0	push @phrases, $newPhrase;
158							}
159
160							#create and return the new utterance
161	0					0	return MetaMap::DataStructures::Utterance->new(
162							$inputText, $id, $text, \@phrases);
163							}
164
165							#----------------------------------------
166							# methods
167							#----------------------------------------
168							# method summarizes this utterance as a string
169							# input : -
170							# output: $string <- a string describing $self
171							sub toString {
172	1			1	0	8	my $self = shift;
173
174	1					2	my $string = "utterance:\n";
175	1					103	$string .= " $self->{id}\n";
176	1					6	$string .= " $self->{text}\n";
177
178							#add each phrase to the string
179	1					3	foreach my $phrase(@{$self->{phrases}}) {
	1					3
180	28					45	$string .= " ".$phrase->toString()."\n";
181							}
182
183	1					359	return $string;
184							}
185
186							# method compares this utterance to another and returns 1 if the two
187							# contain identical information
188							# input : $other <- the utterrance object to compare against
189							# output: boolean <- 1 if $self and $other are equivalent (contain equivalent
190							# IDs, and phrases), else 0
191							sub equals {
192							#grab input
193	2			2	0	2	my $self = shift;
194	2					3	my $other = shift;
195
196							#compare id's and text
197	2	50	33			15	if ($self->{id} ne $other->{id}
198							\|\| $self->{text} ne $other->{text}) {
199	0					0	return 0;
200							}
201
202							#compare Utterances
203	2					2	foreach my $phraseA(@{$self->{phrases}}){
	2					4
204
205							#check each utterance in B
206	56					41	my $match = 0;
207	56					36	foreach my $phraseB(@{$other->{phrases}}) {
	56					63
208	758	100				941	if ($phraseA->equals($phraseB)) {
209	56					34	$match = 1;
210	56					41	last;
211							}
212							}
213
214							#utteranceA has no equivalent phrase in $other
215							# so utterances are not identical
216	56	50				86	if ($match < 1) {
217	0					0	return 0;
218							}
219							}
220
221							#all tests passed, return true
222	2					10	return 1;
223							}
224
225							# method determines if this utterance contains the CUI provided as input
226							# returns 1 if this utterance contains the CUI, else 0
227							# input : $cui <- a string CUI code
228							# output: boolean <- 1 if any of $self's phrases contain $cui
229							sub contains {
230							#grab input
231	2			2	0	3	my $self = shift;
232	2					3	my $cui = shift;
233
234							#check each phrase to see if it contains the CUI
235	2					2	my $containsCUI = 0;
236	2					4	foreach my $phrase(@{$self->{phrases}}) {
	2					4
237	29	100				39	if ($phrase->contains($cui)) {
238	1					1	$containsCUI = 1;
239	1					1	last;
240							}
241							}
242
243							#return the result
244	2					6	return $containsCUI;
245							}
246
247							# method gets the an array of concepts as they appear in the utterance.
248							# Conepts are not necassarily ordered, where ambiguity exists all possible
249							# token->CUI mappings are listed adjacent to one another.
250							# input : -
251							# output: \@concepts <- a list of concept objects
252							sub getConcepts {
253							#initialize
254	2			2	0	3	my $self = shift;
255	2					2	my @concepts = ();
256
257							#add concepts in sorted order
258	2					2	foreach my $phrase(@{$self->{phrases}}) {
	2					4
259	56					35	push @concepts, @{$phrase->{concepts}};
	56					67
260							}
261	2					10	return \@concepts;
262							}
263
264							# method gets an array list of concepts as they appear in the utterance
265							# input : -
266							# output: \@conceptList <- an array of arrays, where each sub-array contains a
267							# list of 1 or more concept objects. Where more than
268							# one concept object occurrs it means the token to
269							# concept mapping was ambiguous. Arrays are ordered as
270							# the tokens occurr in the utterance.
271							sub getOrderedConcepts {
272							#initialize
273	1			1	0	2	my $self = shift;
274	1					2	my @conceptList = ();
275
276							#add concepts in sorted order
277	1					1	foreach my $phrase(@{ $self->{phrases} }) {
	1					2
278	28					18	push @conceptList, @{ $phrase->{orderedConceptList} };
	28					39
279							}
280	1					8	return \@conceptList;
281							}
282
283							# method gets the an array of tokens as they appear in the utterance
284							# input : -
285							# output: \@tokens <- a list token objects ordered by their appearance in $self
286							sub getTokens {
287							#initialize
288	2			2	0	3	my $self = shift;
289	2					4	my @tokens = ();
290
291							#add concepts in sorted order
292	2					1	foreach my $phrase(@{$self->{phrases}}) {
	2					4
293	56					31	push @tokens, @{$phrase->{tokens}};
	56					90
294							}
295	2					28	return \@tokens;
296							}
297
298							# method gets the an array of Mappings as they appear in the utterance
299							# input : -
300							# output: \@mappings <- a list of mapping objects ordered by their appearance
301							# in $self
302							sub getMappings {
303							#initialize
304	1			1	0	2	my $self = shift;
305	1					1	my @mappings = ();
306
307							#add concepts in sorted order
308	1					2	foreach my $phrase(@{$self->{phrases}}) {
	1					1
309	28					18	push @mappings, @{$phrase->{mappings}};
	28					44
310							}
311	1					10	return \@mappings;
312							}
313
314							1;
315
316							__END__