File Coverage

blib/lib/MetaMap/DataStructures/Concept.pm
Criterion Covered Total %
statement 90 104 86.5
branch 18 22 81.8
condition 19 51 37.2
subroutine 7 7 100.0
pod 0 5 0.0
total 134 189 70.9


line stmt bran cond sub pod time code
1             # MetaMap::DataStructures::Concept
2             # (Last Updated $Id: Concept.pm,v 1.80 2016/01/07 22:49:33 btmcinnes Exp $)
3             #
4             # Perl module that provides a perl interface to the
5             # Unified Medical Language System (UMLS)
6             #
7             # Copyright (c) 2016
8             #
9             # Sam Henry, Virginia Commonwealth University
10             # henryst at vcu.edu
11             #
12             # Bridget T. McInnes, Virginia Commonwealth University
13             # btmcinnes at vcu.edu
14             #
15             # This program is free software; you can redistribute it and/or
16             # modify it under the terms of the GNU General Public License
17             # as published by the Free Software Foundation; either version 2
18             # of the License, or (at your option) any later version.
19             #
20             # This program is distributed in the hope that it will be useful,
21             # but WITHOUT ANY WARRANTY; without even the implied warranty of
22             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23             # GNU General Public License for more details.
24             #
25             # You should have received a copy of the GNU General Public License
26             # along with this program; if not, write to
27             #
28             # The Free Software Foundation, Inc.,
29             # 59 Temple Place - Suite 330,
30             # Boston, MA 02111-1307, USA.
31              
32             package MetaMap::DataStructures::Concept;
33 1     1   4 use strict;
  1         5  
  1         21  
34 1     1   3 use warnings;
  1         1  
  1         763  
35              
36             #----------------------------------------
37             # constructors
38             #---------------------------------------
39             # constructor method to create a new Concept object
40             # It is recomennded to create Concepts from the phrase or utterance level.
41             # It is easier since text parsing is performed for you, and it is safer, since
42             # the fields are garaunteed to be correct. Each of the input fields are
43             # parsed directly from the text, and are defined in more detail in the MetaMap
44             # documentation, but are described in breif below.
45             # input : $cui <- the CUI code of this concept (e.g. C0000000)
46             # $text <- the text this concept is associated with
47             # $preferredName <- the preferred name of the CUI
48             # $score <- the MMI score of the token->CUI mapping
49             # $uniqueSources <- comma seperated string of the names of the
50             # vocabularies that contain this CUI
51             # $semanticTypes <- comma seperated string of the semantic types
52             # strings associated with this CUI
53             # \@associatedTokens <- list of token objects that map to this concept
54             # $involvesHead <- boolean indicating if the conept involves the head
55             # $isOvermatch <- boolean indicating if the concept is overmatched
56             # $matchmapText <- the matchmap text of the concept
57             # $isNegated <- boolean indicating if the Concept is negated
58             # output: $self <- a Concept object
59             sub new {
60             #create and bless self
61 1583     1583 0 1144 my $class = shift;
62 1583         1294 my $self= {};
63 1583         1331 bless $self, $class;
64              
65             #initialize from the input
66 1583         1897 $self->{cui} = shift;
67 1583         1286 $self->{text} = shift;
68 1583         1332 $self->{preferredName} = shift;
69 1583         1306 $self->{score} = shift;
70 1583         1320 $self->{uniqueSources} = shift;
71 1583         1315 $self->{semanticTypes} = shift;
72 1583         1139 $self->{associatedTokens} = shift;
73 1583         1798 $self->{involvesHead} = shift;
74 1583         1149 $self->{isOvermatch} = shift;
75 1583         1189 $self->{matchMapText} = shift;
76 1583         1134 $self->{isNegated} = 0;
77              
78 1583         3326 return $self;
79             }
80              
81             # method creates and returns a conept from text
82             # (MetaMap Prolog Machine Output, ev section)
83             # input : $text <- a Metamap Prolog Machine Output ev section block or
84             # equivalent
85             # \@tokens <- a list of token objects in the same phrase as this
86             # utterance. The matchmap text is read, and tokens
87             # from list list are selected as the assocaited tokens
88             # output: $self <- a Concept object
89             sub createFromText {
90             #grab the input
91 1583     1583 0 1106 my $text = shift;
92 1583         1051 my $tokensRef = shift;
93            
94             #match the text
95 1583         45728 $text =~ m/
96             ev\((-*\d+) #score $1
97             ,'?(C\d+)'?, #cui $2
98             (.*), #string $3
99             (.*), #preferredName $4
100             \[[^\]\[]+\], # skip
101             \[([^\]\[]+)\], #semanticTypes $5
102             (\[[\[\]\d,]+\]), #matchMapText $6
103             ([a-zA-Z]+), #involvesHead
104             ([a-zA-Z]+), #isOvermatch
105             \[([^\]\[]+)\], #uniqueSources
106             \[[\d\/,]+\], # skip
107             \d+, # skip
108             \d+/x; # skip
109              
110              
111              
112             #ev\((-*\d+),'?(C\d+)'?,([^\]\[]+),([^\]\[]+),\[[^\]\[]+\],\[([^\]\[]+)\],(\[[\[\]\d,]+\]),([a-zA-Z]+),([a-zA-Z]+),\[([^\]\[]+)\],\[[\d\/,]+\],\d+,\d+
113            
114             #grab values
115 1583         1572 my $score = $1;
116 1583         1205 my $cui = $2;
117 1583         1435 my $string = $3;
118 1583         1424 my $preferredName = $4;
119 1583         1132 my $semanticTypes = $5;
120 1583         1239 my $matchMapText = $6;
121 1583         1052 my $involvesHead = $7;
122 1583         1136 my $isOvermatch = $8;
123 1583         1322 my $uniqueSources = $9;
124              
125             #TODO delete this - once you are SURE everything works, will quit if values are not being properly parsed with the regex
126             #check if everythingis defined
127 1583 50 33     17724 if (defined $score
      33        
      33        
      33        
      33        
      33        
      33        
      33        
128             && defined $cui
129             && defined $string
130             && defined $preferredName
131             && defined $semanticTypes
132             && defined $matchMapText
133             && defined $involvesHead
134             && defined $isOvermatch
135             && defined $uniqueSources) {
136             #DO NOTHING
137             }
138             else {
139 0         0 print "SOMETHING UNDEFINED:\n";
140 0         0 print "1 - $text\n";
141 0         0 print "2 - $score\n";
142 0         0 print "3 - $cui\n";
143 0         0 print "4 - $string\n";
144 0         0 print "5 - $preferredName\n";
145 0         0 print "7 - $semanticTypes\n";
146 0         0 print "8 - $matchMapText\n";
147 0         0 print "9 - $involvesHead\n";
148 0         0 print "10 - $isOvermatch\n";
149 0         0 print "11 - $uniqueSources\n";
150 0         0 exit;
151             }
152              
153             #remove trailing/leading quotes
154 1583 100       3000 if ($string =~ m/'(.*)'/) {
155 1483         1431 $string = $1;
156             }
157 1583 100       2497 if ($preferredName =~ m/'(.*)'/) {
158 1564         1432 $preferredName = $1;
159             }
160              
161             #convert text to bools (involvesHead and isOvermatch)
162 1583 100       1750 if ($involvesHead eq 'yes') {
163 505         371 $involvesHead = 1;
164             }
165             else {
166 1078         807 $involvesHead = 0;
167             }
168 1583 50       1424 if ($isOvermatch eq 'yes') {
169 0         0 $isOvermatch = 1;
170             }
171             else {
172 1583         979 $isOvermatch = 0;
173             }
174              
175             #Map the Concept to its associated Tokens
176 1583         1343 my @associatedTokens = ();
177 1583         4536 while($matchMapText =~ m/\[(\d+),(\d+)\],\[\d+,\d+\],\d+/g) {
178             #grab the token start and end indeces
179 1623         1422 my $startWordNumber = $1;
180 1623         1135 my $endWordNumber = $2;
181            
182             #add the correct tokens to the list of associated tokens
183 1623         1112 my $wordNumber = 1;
184 1623         1130 for (my $tokenIndex = 0; $tokenIndex < scalar @{$tokensRef}; $tokenIndex++) {
  4310         5494  
185            
186             #add the token if the words are within range
187 4310         2339 my $wordsInToken = ${$tokensRef}[$tokenIndex]->{numWords};
  4310         3780  
188 4310 100       5991 if (($wordNumber + $wordsInToken) > $endWordNumber) {
189 1623         1084 push @associatedTokens, ${$tokensRef}[$tokenIndex];
  1623         1667  
190             }
191              
192             #incrememt word number and see if your done
193 4310         2979 $wordNumber += $wordsInToken;
194 4310 100       4995 if ($wordNumber > $endWordNumber) {
195 1623         2910 last;
196             }
197             }
198             }
199              
200             #TODO delete this (for debugging)
201             #----------------------------------------------
202             #print "Creating Concept from Text\n";
203             #print "1 - $text\n";
204             #print "2 - $score\n";
205             #print "3 - $cui\n";
206             #print "4 - $string\n";
207             #print "5 - $preferredName\n";
208             #print "7 - $semanticTypes\n";
209             #print "8 - $matchMapText\n";
210             #print "9 - $involvesHead\n";
211             #print "10 - $isOvermatch\n";
212             #print "11 - $uniqueSources\n";
213             #print "12 - Associated Tokens:\n";
214             #foreach my $token(@associatedTokens) {
215             # print $token->toString();
216             # }
217             #---------------------------------------------
218              
219             #create and return the new concept
220 1583         2267 return MetaMap::DataStructures::Concept->new($cui, $string, $preferredName,
221             $score, $uniqueSources, $semanticTypes,
222             \@associatedTokens, $involvesHead,
223             $isOvermatch, $matchMapText);
224            
225             }
226              
227              
228             #----------------------------------------
229             # Methods
230             #---------------------------------------
231             # method determines if this concept maps to the same tokens as another concept
232             # (i.e. if the two tokens are ambiguities of the same text), returns a 1 if
233             # true, 0 otherwise.
234             # input : $other <- a concept object
235             # output: boolean <- 1 if $self and $other have equivalent associated tokens,
236             # else 0
237             sub mapsToSameTokens {
238             #grab input
239 8555     8555 0 5442 my $self = shift;
240 8555         4847 my $other = shift;
241              
242             #check each associated Token for equality
243 8555         5264 foreach my $tokenA(@{$self->{associatedTokens}}) {
  8555         8243  
244              
245             #check if there is a token match for this token
246             # in token B (need to check all because there
247             # is no garauntee that the token list is ordered)
248 8597         5054 my $match = 0;
249 8597         4993 foreach my $tokenB(@{$other->{associatedTokens}}) {
  8597         7432  
250 8678 100       11590 if ($tokenA->equals($tokenB)) {
251 5597         3327 $match = 1;
252 5597         3652 last;
253             }
254             }
255              
256             #tokenB isn't associated with this token
257             # so concepts are not identical
258 8597 100       11373 if ($match < 1) {
259 3000         5768 return 0;
260             }
261             }
262              
263             #all tokens equal, return true
264 5555         10190 return 1;
265             }
266              
267             # method compares this concept to another and returns 1 if the two
268             # contain identical information
269             # input : $other <- a concept object to compare against
270             # output: boolean <- 1 if $self and $other contain equivalent fields, and map
271             # to the same tokens, else 0.
272             sub equals {
273             #grab input
274 10801     10801 0 6534 my $self = shift;
275 10801         6448 my $other = shift;
276              
277             #check each field for equality
278 10801 50 66     63931 if ($self->{cui} ne $other->{cui}
      66        
      33        
      33        
      33        
      33        
      33        
      33        
      33        
279             || $self->{text} ne $other->{text}
280             || $self->{preferredName} ne $other->{preferredName}
281             || $self->{score} ne $other->{score}
282             || $self->{uniqueSources} ne $other->{uniqueSources}
283             || $self->{semanticTypes} ne $other->{semanticTypes}
284             || $self->{involvesHead} ne $other->{involvesHead}
285             || $self->{isOvermatch} ne $other->{isOvermatch}
286             || $self->{matchMapText} ne $other->{matchMapText}
287             || $self->{isNegated} ne $other->{isNegated}) {
288 6715         13927 return 0;
289             }
290             #fields are passed
291              
292             #check that the token mapping is the same
293 4086         3863 return $self->mapsToSameTokens($other);
294             }
295              
296             # method summarizes this concept as a string
297             # input : -
298             # output: $string <- a string describing $self
299             sub toString {
300 284     284 0 178 my $self = shift;
301              
302             #add info about the concept
303 284         154 my $string = "concept:\n";
304 284         437 $string .= " $self->{cui}, $self->{text}, $self->{preferredName}, $self->{score}\n";
305 284         316 $string .= " $self->{involvesHead}, $self->{isOvermatch}, $self->{isNegated}\n";
306 284         211 $string .= " $self->{semanticTypes}\n";
307 284         307 $string .= " $self->{uniqueSources}\n";
308              
309 284         265 $string .= " $self->{matchMapText}\n";
310              
311             #ensure tokens are associated
312 284 50       181 if (scalar @{$self->{associatedTokens}} < 1) {
  284         367  
313 0         0 $string .= "ERROR NO TOKENS ASSOCIATED\n";
314             }
315              
316             #add the associated tokens
317 284         171 foreach my $token(@{$self->{associatedTokens}}) {
  284         261  
318 293         396 $string .= " ".$token->toString();
319             }
320              
321 284         529 return $string;
322             }
323              
324             1;
325              
326              
327             __END__