File Coverage

blib/lib/MetaMap/DataStructures/Phrase.pm

Criterion	Covered	Total	%
statement	145	150	96.6
branch	27	32	84.3
condition			n/a
subroutine	10	10	100.0
pod	0	5	0.0
total	182	197	92.3

line	stmt	bran	sub	pod	time	code
1						# MetaMap::DataStructures::Phrase
2						# (Last Updated $Id: Phrase.pm,v 1.80 2016/01/07 22:49:33 btmcinnes Exp $)
3						#
4						# Perl module that provides a perl interface to the
5						# Unified Medical Language System (UMLS)
6						#
7						# Copyright (c) 2016
8						#
9						# Sam Henry, Virginia Commonwealth University
10						# henryst at vcu.edu
11						#
12						# Bridget T. McInnes, Virginia Commonwealth University
13						# btmcinnes at vcu.edu
14						#
15						# This program is free software; you can redistribute it and/or
16						# modify it under the terms of the GNU General Public License
17						# as published by the Free Software Foundation; either version 2
18						# of the License, or (at your option) any later version.
19						#
20						# This program is distributed in the hope that it will be useful,
21						# but WITHOUT ANY WARRANTY; without even the implied warranty of
22						# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23						# GNU General Public License for more details.
24						#
25						# You should have received a copy of the GNU General Public License
26						# along with this program; if not, write to
27						#
28						# The Free Software Foundation, Inc.,
29						# 59 Temple Place - Suite 330,
30						# Boston, MA 02111-1307, USA.
31
32						package MetaMap::DataStructures::Phrase;
33	1		1		3	use strict;
	1				1
	1				20
34	1		1		2	use warnings;
	1				2
	1				16
35
36	1		1		331	use MetaMap::DataStructures::Token;
	1				1
	1				23
37	1		1		511	use MetaMap::DataStructures::Mapping;
	1				2
	1				67
38	1		1		400	use MetaMap::DataStructures::Concept;
	1				2
	1				879
39
40						#----------------------------------------
41						# constructor
42						#----------------------------------------
43						# constructor method to create a new Phrase object. It is recommended to
44						# construct Phrase objects from createFromText, since it is easier and
45						# garauntees all data structures are created properly.
46						# input : $text <- the human-readable text of the Phrase
47						# \@mappings <- mapping objects of this Phrase
48						# \@orderedConceptList <- concepts object list in sequential order.
49						# Where there is disambiguation (and therefore
50						# multiple concepts mappings to a single term
51						# a vertical dimension is created. This is
52						# therefore an array of arrays, where each
53						# array contains 1 or more concept objects.
54						# \@concepts <- concept objects ordered as they are read in, not
55						# necassarily sequential.
56						# \@tokens <- tokens objects ordered as read in
57						# output: $self <- a new instance of a Phrase Object
58						sub new {
59						#create and bless self
60	134		134	0	101	my $class = shift;
61	134				113	my $self = {};
62	134				114	bless $self, $class;
63
64						#grab variables
65	134				155	$self->{text} = shift;
66	134				110	$self->{mappings} = shift; #sequential mappings
67	134				104	$self->{orderedConceptList} = shift; #ordered 2-D array of concepts
68	134				113	$self->{concepts} = shift; #ordered as read in (not sequential)
69						#These are unique concept objects but necassarily unique CUIs
70	134				90	$self->{tokens} = shift;
71
72	134				453	return $self;
73						}
74
75						# method creates and returns a concept from text
76						# input : $inputText <- a MetaMap Prolog Machine Output Phrase block or
77						# equivalent.
78						# \@negatedCUIs <- a list of negated CUIs within the phrase, empty
79						# is ok.
80						# output: $self <- a new instance of a Phrase Object
81						sub createFromText {
82						#grab the input
83	134		134	0	124	my $inputText = shift;
84	134				88	my $negatedCUIsRef = shift;
85
86						#grab the full text
87	134				836516	$inputText =~ /phrase$(.),\[(.)\],(\d+)\/\d+,\[.*\]$\./;
88	134				257	my $text = $1; #the text of the phrase
89	134				142	my $syntaxText = $2; #text containing the syntactic info
90	134				143	my $phraseStartIndex = $3; #character number the phrase begins at
91
92						#remove trailing and leading quotes
93	134	100			382	if ($text =~ m/'(.*)'/) {
94	117				157	$text = $1;
95						}
96
97						#----- Token Creation -------------------------
98						#get each token text
99	134				1470	my @tokenTexts = split /(adv\|aux\|compl\|conj\|det\|head\|mod\|modal\|pastpart\|prep\|pron\|punc\|shapes\|verb\|not_in_lex)\(/, $syntaxText;
100	134				109	shift @tokenTexts; #shift empty position 0 off
101
102						#loop through each token text
103	134				158	my @tokens = ();
104	134				242	for (my $i = 0; $i < scalar @tokenTexts; $i+=2) {
105
106						#get the type and token text
107	463				411	my $type = $tokenTexts[$i];
108	463				365	my $tokenText = $tokenTexts[$i+1];
109
110						#add a new token to the list of tokens
111	463				1101	push @tokens, &MetaMap::DataStructures::Token::createFromText(
112						$type.'('.$tokenText);
113						}
114
115						#----- Mappings and Concept Creation --------------
116						#gets each mapping text and orders the concepts
117						# iterates through each mapping, and creates a new mapping
118						# for each mapping text. For each new concept that is found
119						# it compares it with existing ordered concepts and sees if it
120						# should add it to a 2-D array of ordered concepts. The second
121						# dimension of the orderedConcepts array indicates multiple
122						# mappings for the token at that index
123						#get each mapping text
124	134				647	$inputText =~ m/mappings$\[(.*)$\./;
125	134				743	my @mappingsTexts = split /map\(-/, $1;
126						#remove the first element (text matched before 'map'
127	134				102	shift @mappingsTexts;
128
129						#loop through each mapping text
130	134				154	my @orderedConcepts = (); #an array of arrays of ordered concepts.
131						# Where multiple mappings occur a new vertical dimension is added
132	134				101	my @mappings = ();
133	134				79	my @concepts = (); #an array of unique concepts
134	134				146	foreach my $mappingText(@mappingsTexts) {
135						#add the map back on (from the split)
136	488				897	$mappingText = 'map(-'.$mappingText;
137
138						#grab the mapping score
139	488				1332	$mappingText =~ m/map\((-?\d+)/;
140	488				578	my $mappingScore = $1;
141
142						#grab the concepts associated with the mapping
143	488				1312	my @conceptTexts = split /ev\(-/, $mappingText;
144	488				333	shift @conceptTexts; #shift off the leading text
145
146						#loop through each concept
147	488				471	my @mappingConcepts = (); # = a list of concepts by mapping
148						# (as they appear when read in)
149	488				422	foreach my $conceptText(@conceptTexts) {
150
151						#create the concept (put ev back on the front)
152	1583				3760	my $newConcept = &MetaMap::DataStructures::Concept::createFromText(
153						'ev(-'.$conceptText, \@tokens);
154
155						#see where to place this concept within the context of the
156						# ordered concepts also see if the concept already exists
157						# (mappings repeat concepts) if it exists use the instance of
158						# that concept (oldConcept) in this mapping
159	1583				1232	my $conceptExists = 0;
160	1583				895	my $conceptIndex = -1;
161	1583				2246	for (my $i = 0; $i < scalar @orderedConcepts; $i++) {
162	3409				2266	for (my $j = 0; $j < scalar @{ $orderedConcepts[$i] }; $j++) {
	7878				10018
163						#grab the existing concept
164	5697				3595	my $oldConcept = $orderedConcepts[$i][$j];
165
166						#check if an instance of the concept already exists
167						# or if it is a new mapping of an existing concept
168	5697	100			7766	if ($newConcept->equals($oldConcept)) {
		100
169						#an instance of this concept alread exists, stop looping
170	1228				874	$conceptExists = 1;
171	1228				871	$newConcept = $oldConcept;
172	1228				2406	last;
173						}
174						elsif ($newConcept->mapsToSameTokens($oldConcept)) {
175						#this concept maps to the same tokens as an existing
176						# concept record index, so must continue checking
177						# though to see if the concept already exists
178	1469				1348	$conceptIndex = $i;
179						}
180
181						} #end inner ordered concept loop
182
183						#check if you can quit early
184	3409	100			4249	if ($conceptExists) {
185	1228				843	last; #an instance of this concept already exists, done
186						}
187	2181	100			4004	if ($conceptIndex >= 0) {
188						#this concept maps to the same token as an existing
189						# concept, your done
190	136				113	last;
191						}
192						} #end outer ordered concept loop
193
194						#done searching through existing concepts, update data structures
195	1583	100			2044	if (!$conceptExists) {
196						#update unordered concepts
197	355				301	push @concepts, $newConcept;
198
199	355	100			373	if ($conceptIndex >= 0) {
200						#add to existing CUI list
201	136				82	push @{ $orderedConcepts[$conceptIndex] }, $newConcept;
	136				166
202						}
203						else {
204						#create a new CUI list
205						#new concept, append ordered concepts with a new array
206	219				286	my @newArray = [ $newConcept ];
207	219				188	push @orderedConcepts, @newArray;
208
209	219				259	my $index = (scalar @orderedConcepts)-1;
210						}
211						}
212
213						#add the concept to the list of mapping concepts
214	1583				1896	push @mappingConcepts, $newConcept;
215
216						}#end concept text loop
217
218						#create and save the new mapping
219	488				979	push @mappings, MetaMap::DataStructures::Mapping->new(
220						$mappingScore, \@mappingConcepts);
221						}
222
223						#----- Concept Negation ---------------------------
224						#TODO possible problem with negations. If a single phrase contains
225						# the same CUI multiple times how do I distinguish between the negated
226						# and non-negated one?
227	134				220	for (my $i = 0; $i < scalar @orderedConcepts; $i++) {
228	219				158	for (my $j = 0; $j < scalar @{ $orderedConcepts[$i] }; $j++) {
	574				923
229	355				236	my $concept = $orderedConcepts[$i][$j];
230
231						#see if it is negated
232	355				182	foreach my $negatedCUI(@{$negatedCUIsRef}) {
	355				370
233	0	0			0	if ($concept->{cui} eq $negatedCUI) {
234	0				0	$concept->{isNegated} = 1;
235						}
236						}
237						}
238						}
239
240						#create and return the new phrase
241	134				228	return MetaMap::DataStructures::Phrase->new(
242						$text, \@mappings, \@orderedConcepts, \@concepts, \@tokens);
243						}
244
245
246						#----------------------------------------
247						# methods
248						#----------------------------------------
249						# method summarizes this phrase as a string
250						# input : -
251						# output: $string <- a string describing $self
252						sub toString {
253	28		28	0	20	my $self = shift;
254
255						#create head
256	28				17	my $string = "Phrase:\n";
257	28				34	$string .= " $self->{text}\n";
258
259						#add each token text
260	28				18	$string .= " tokens:*";
261	28				17	foreach my $token(@{$self->{tokens}}) {
	28				38
262	104				105	$string .= $token->{text}."*";
263						}
264	28				19	$string .= "\n";
265
266						#add each concept text
267	28				22	$string .= " concepts:*";
268	28				10	foreach my $concept(@{$self->{concepts}}) {
	28				30
269	80				82	$string .= $concept->{text}."*";
270						}
271	28				23	$string .= "\n";
272
273						#add each mapping to the string
274	28				22	$string .= " mappings:\n";
275	28				13	foreach my $mapping(@{$self->{mappings}}) {
	28				29
276	91				128	$string .= " ".$mapping->toString()."\n";
277						}
278
279	28				191	return $string;
280						}
281
282						# method compares this phrase to another and returns 1 if the two
283						# contain identical information
284						# input : $other <- the Phrase object to compare against
285						# output: boolean <- 1 if $self and $other are equivalent (equivalent texts,
286						# mappings, concepts, and tokens), else 0
287						sub equals {
288						#grab input
289	758		758	0	466	my $self = shift;
290	758				429	my $other = shift;
291
292						#compare texts
293	758	100			917	if ($self->{text} ne $other->{text}) {
294	702				1193	return 0;
295						}
296
297						#compare mappings
298	56				38	foreach my $mappingA(@{$self->{mappings}}){
	56				68
299
300						#check each mapping in B
301	182				155	my $match = 0;
302	182				118	foreach my $mappingB(@{$other->{mappings}}) {
	182				180
303	2078	100			2672	if ($mappingA->equals($mappingB)) {
304	182				109	$match = 1;
305	182				132	last;
306						}
307						}
308
309						#mappingA has no equivalent mapping in $other
310						# so phrases are not identical
311	182	50			277	if ($match < 1) {
312	0				0	return 0;
313						}
314						}
315
316						#compare Concepts
317	56				38	foreach my $conceptA(@{$self->{concepts}}){
	56				62
318
319						#check each concept in B
320	160				112	my $match = 0;
321	160				90	foreach my $conceptB(@{$other->{concepts}}) {
	160				151
322	510	100			631	if ($conceptA->equals($conceptB)) {
323	160				106	$match = 1;
324	160				101	last;
325						}
326						}
327
328						#conceptA has no equivalent concept in $other
329						# so phrases are not identical
330	160	50			213	if ($match < 1) {
331	0				0	return 0;
332						}
333						}
334
335						#compare Tokens
336	56				44	foreach my $tokenA(@{$self->{tokens}}){
	56				66
337
338						#check each token in B
339	208				137	my $match = 0;
340	208				125	foreach my $tokenB(@{$other->{tokens}}) {
	208				183
341	586	100			732	if ($tokenA->equals($tokenB)) {
342	208				109	$match = 1;
343	208				151	last;
344						}
345						}
346
347						#tokenA has no equivalent mapping in $other
348						# so phrases are not identical
349	208	50			273	if ($match < 1) {
350	0				0	return 0;
351						}
352						}
353
354						#all fields are equivalent, return true
355	56				109	return 1;
356						}
357
358						# method determines if this phrase contains the CUI provided as input
359						# returns 1 if this phrase contains the CUI, else 0
360						# input : $cui <- a string CUI code
361						# output: boolean <- 1 if any $self contains the $cui
362						sub contains {
363						#grab input
364	29		29	0	19	my $self = shift;
365	29				16	my $cui = shift;
366
367						#check concept to see if it is the CUI
368	29				22	my $containsCUI = 0;
369	29				11	foreach my $concept(@{$self->{concepts}}) {
	29				33
370	81	100			119	if ($concept->{cui} eq $cui) {
371	1				2	$containsCUI = 1;
372	1				1	last;
373						}
374						}
375
376						#return the result
377	29				58	return $containsCUI;
378						}
379
380						1;
381
382						__END__