File Coverage

blib/lib/MetaMap/DataStructures/Citation.pm
Criterion Covered Total %
statement 114 149 76.5
branch 15 26 57.6
condition 0 6 0.0
subroutine 19 23 82.6
pod 0 17 0.0
total 148 221 66.9


line stmt bran cond sub pod time code
1             # MetaMap::DataStructures::Citation
2             # (Last Updated $Id: Citation.pm,v 1.80 2016/01/07 22:49:33 btmcinnes Exp $)
3             #
4             # Perl module that provides a perl interface to the
5             # Unified Medical Language System (UMLS)
6             #
7             # Copyright (c) 2016
8             #
9             # Sam Henry, Virginia Commonwealth University
10             # henryst at vcu.edu
11             #
12             # Bridget T. McInnes, Virginia Commonwealth University
13             # btmcinnes at vcu.edu
14             #
15             # This program is free software; you can redistribute it and/or
16             # modify it under the terms of the GNU General Public License
17             # as published by the Free Software Foundation; either version 2
18             # of the License, or (at your option) any later version.
19             #
20             # This program is distributed in the hope that it will be useful,
21             # but WITHOUT ANY WARRANTY; without even the implied warranty of
22             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23             # GNU General Public License for more details.
24             #
25             # You should have received a copy of the GNU General Public License
26             # along with this program; if not, write to
27             #
28             # The Free Software Foundation, Inc.,
29             # 59 Temple Place - Suite 330,
30             # Boston, MA 02111-1307, USA.
31              
32             package MetaMap::DataStructures::Citation;
33 1     1   4 use strict;
  1         1  
  1         21  
34 1     1   3 use warnings;
  1         1  
  1         18  
35              
36 1     1   3 use MetaMap::DataStructures::Utterance;
  1         1  
  1         1139  
37              
38             #----------------------------------------
39             # constructors
40             #----------------------------------------
41             # constructor method to create a new Citation object
42             # input : -
43             # output: $self <- a instance of a Citation object
44             sub new {
45             #create and bless self
46 5     5 0 6 my $class = shift;
47 5         6 my $self = {};
48 5         9 bless $self, $class;
49              
50             #grab input and initialize
51 5         11 $self->{id} = shift;
52 5         6 $self->{utterances} = {};
53              
54 5         11 return $self;
55             }
56              
57             #-----------------------------------------------------------------
58             # methods
59             #-----------------------------------------------------------------
60              
61             # method summarizes this utterance as a string
62             # input : -
63             # output: $string <- a string describing $self
64             sub toString {
65 0     0 0 0 my $self = shift;
66              
67             #initiliaze the string
68 0         0 my $string = "citation:\n";
69 0         0 $string .= " $self->{id}\n";
70            
71             #add each utterance to the string
72 0         0 my %utterances = %{$self->{utterances}};
  0         0  
73 0         0 foreach my $key(keys %utterances) {
74 0         0 $string .= " ".$utterances{$key}->toString()."\n";
75             }
76 0         0 return $string;
77             }
78              
79             # method to compare this citation to another and returns 1 if the two
80             # contain identical information
81             # input : $other <- the citation object to compare against
82             # output: boolean <- 1 if $self and $other are equivalent (contain equivalent
83             # ID's and utterances), else 0
84             sub equals {
85             #grab input
86 3     3 0 6 my $self = shift;
87 3         3 my $other = shift;
88              
89             #compare id's
90 3 50       10 if ($self->{id} ne $other->{id}) {
91 0         0 return 0;
92             }
93              
94             #compare Utterances
95 3         3 foreach my $keyA(sort _by_utterance keys %{$self->{utterances}}){
  3         15  
96 3         4 my $utteranceA = $self->{utterances}{$keyA};
97              
98             #check each utterance in B
99 3         4 my $match = 0;
100 3         5 foreach my $keyB(sort _by_utterance keys %{$other->{utterances}}) {
  3         6  
101 2         3 my $utteranceB = $self->{utterances}{$keyB};
102 2 50       5 if ($utteranceA->equals($utteranceB)) {
103 2         3 $match = 1;
104 2         4 last;
105             }
106             }
107              
108             #citationA has no equivalent citation in $other
109             # so citations are not identical
110 3 100       10 if ($match < 1) {
111 1         4 return 0;
112             }
113             }
114              
115             #all tests passed, return true
116 2         21 return 1;
117             }
118              
119             # method to determine if this citation contains the CUI provided as input
120             # returns 1 if this citation contains the CUI, else 0
121             # input : $cui <- a string CUI code
122             # output: boolean <- 1 if any of $self's utterances contain $cui
123             sub contains {
124             #grab input
125 2     2 0 11 my $self = shift;
126 2         3 my $cui = shift;
127              
128             #check each phrase to see if it contains the CUI
129 2         4 my $containsCUI = 0;
130 2         1 foreach my $key(keys %{$self->{utterances}}) {
  2         5  
131 2 100       6 if ($self->{utterances}{$key}->contains($cui)) {
132 1         1 $containsCUI = 1;
133 1         2 last;
134             }
135             }
136            
137             #return the result
138 2         7 return $containsCUI;
139             }
140              
141             # method to add a new utterance to the citation
142             # input : $newUtterance <- the utterance to add to $self
143             # output: -
144             sub addUtterance {
145 9     9 0 12 my $self = shift;
146 9         9 my $newUtterance = shift;
147              
148 9 50       42 if($newUtterance->{id} =~ /((ti|ab)\.[\d]+)/) {
149 9         1417 $self->{utterances}{$1} = $newUtterance;
150             }
151             else {
152 0         0 print STDERR "error adding utterance to citation: $newUtterance->{id}\n";
153             }
154             }
155              
156             # method to sort the utterances by order they appear (title followed
157             # by abstract, number ascending)
158             # (e.g. ti.000.1, ti.000.2, ab.000.1, ab.000.2, ab.000.3)
159             # input : $a, $b <- implicit sort variables, the keys in a hash of utterances
160             # which are the utterance IDs (e.g. ti.0000000.1)
161             # output: integer <- -1 if a is before b, 0 if a and b are same order,
162             # 1 if a is after b
163             sub _by_utterance {
164             #get the utterance type
165 0     0   0 my $a_ab = ($a =~ /ab/);
166 0         0 my $b_ab = ($b =~ /ab/);
167            
168             #check if both are abstracts or titles
169 0 0       0 if ($a_ab == $b_ab) {
170 0         0 $a =~ /(ti|ab)\.([\d]+)/;
171 0         0 my $aNum = ($2);
172              
173 0         0 $b =~ /(ti|ab)\.([\d]+)/;
174 0         0 return $aNum <=> $2;
175             }
176              
177             #check if one is abstract, the other is title
178 0 0 0     0 if ($a_ab && !$b_ab) {
179 0         0 return 1;
180             }
181 0 0 0     0 if (!$a_ab && $b_ab) {
182 0         0 return -1;
183             }
184             }
185              
186              
187             #------------------------------ Get Components ------------------------------
188             # method to returns an ordered list of Utterances contained by the Citation.
189             # Utterances are ordered by title, abstract, then number in ascending order
190             # (e.g. ti.000.1, ti.000.2, ab.000.1, ab.000.2, ab.000.3)
191             # input : -
192             # output: \@utterances <- $self's utterances ordered as they appear in the
193             # original text of $self
194             sub getOrderedUtterances {
195             #initialize
196 2     2 0 69 my $self = shift;
197 2         4 my @utterances = ();
198              
199             #add concepts in sorted order
200 2         3 foreach my $key(sort _by_utterance keys %{$self->{utterances}}) {
  2         10  
201 2         3 push @utterances, $self->{utterances}{$key};
202             }
203 2         4 return \@utterances;
204             }
205              
206             # method to get an array of concepts that appear in the citation
207             # (not necassarily ordered). Use this method if order doesn't matter for
208             # increased performance.
209             # input : -
210             # output: \@concepts <- a list of concept objects
211             sub getConcepts {
212             #initialize
213 1     1 0 11 my $self = shift;
214 1         1 my @concepts = ();
215              
216             #add concepts in sorted order
217 1         2 foreach my $key(keys %{$self->{utterances}}) {
  1         2  
218 1         2 push @concepts, @{ $self->{utterances}{$key}->getConcepts() };
  1         3  
219             }
220 1         3 return \@concepts;
221             }
222              
223             # method to get the unique concepts and return a hash of
224             # concepts, CUIs are the keys
225             # input : -
226             # output: \%concepts <- $self's unique concepts with the key as the concept's
227             # CUI. CUIs are considered unique by their CUI code only (e.g. C0000000
228             # and C0000000 are considered the same even if there are two different
229             # Concept.pm objects associated with them)
230             sub getUniqueConcepts {
231 1     1 0 182 my $self = shift;
232 1         3 my %concepts = ();
233              
234             #update concepts
235 1         1 foreach my $key(keys %{$self->{utterances}}) {
  1         4  
236 1         4 my $utteranceConceptsRef = $self->{utterances}{$key}->getConcepts();
237 1         1 foreach my $concept(@{ $utteranceConceptsRef }) {
  1         2  
238 80         59 my $cui = $concept->{cui};
239 80 100       96 if (!exists $concepts{$cui}) {
240 75         89 $concepts{$cui} = $concept;
241             }
242             }
243             }
244 1         18 return \%concepts;
245             }
246              
247             # method to get the an array of concepts that appear in the citation
248             # concepts are ordered as they appear in the utterance
249             # however where there are multiple mappings for a single
250             # token those two concepts will appear adjacent to one another
251             # input : -
252             # output: \@conceptList <- an array of arrays, where each sub-array contains a
253             # list of 1 or more concept objects. Where more than
254             # one concept object occurrs it means the token to
255             # concept mapping was ambiguous. Arrays are ordered as
256             # the tokens occurr in the utterance.
257             sub getOrderedConcepts {
258             #initialize
259 1     1 0 94 my $self = shift;
260 1         2 my @conceptsList = ();
261              
262             #add concepts in sorted order
263 1         2 foreach my $key(sort _by_utterance keys %{$self->{utterances}}) {
  1         3  
264 1         2 push @conceptsList, @{ $self->{utterances}{$key}->getOrderedConcepts() };
  1         4  
265             }
266 1         2 return \@conceptsList;
267             }
268              
269             # method to get a list of ordered mappings. There may be multiple
270             # mappings for a single utterance, but they will appear in correct
271             # utterance order
272             # input : -
273             # output: \@mappings <- a list of mapping objects ordered by their occurence in
274             # $self.
275             sub getOrderedMappings {
276             #initialize
277 1     1 0 100 my $self = shift;
278 1         1 my @mappings = ();
279            
280             #add mappings in sorted order
281 1         2 foreach my $key(sort _by_utterance keys %{$self->{utterances}}) {
  1         4  
282 1         1 push @mappings, @{ $self->{utterances}{$key}->getMappings() };
  1         4  
283             }
284 1         2 return \@mappings;
285             }
286              
287             # method to get all the mappings of the citation (not necassarily ordered)
288             # input : -
289             # output: \@mappings <- a list of mapping objects
290             sub getMappings {
291             #initialize
292 0     0 0 0 my $self = shift;
293 0         0 my @mappings = ();
294            
295             #add mappings in sorted order
296 0         0 foreach my $key(keys %{$self->{utterances}}) {
  0         0  
297 0         0 push @mappings, @{ $self->{utterances}{$key}->getMappings() };
  0         0  
298             }
299 0         0 return \@mappings;
300             }
301              
302             # method to get an array of ordered tokens as they appear in the citation
303             # input : -
304             # output: \@tokens <- a list of token objects ordered by their appearance in
305             # $self
306             sub getOrderedTokens
307             {
308             #initialize
309 3     3 0 165 my $self = shift;
310 3         5 my @tokens = ();
311            
312             #add words in sorted order
313 3         3 foreach my $key(sort _by_utterance keys %{$self->{utterances}}) {
  3         9  
314 2         1 push @tokens, @{ $self->{utterances}{$key}->getTokens() };
  2         8  
315             }
316 3         5 return \@tokens;
317             }
318              
319             # method to get an array of tokens. Tokens are not necassarily in order
320             # input : -
321             # output: \@tokens <- a list of token objects
322             sub getTokens
323             {
324             #initialize
325 0     0 0 0 my $self = shift;
326 0         0 my @tokens = ();
327            
328             #add words in sorted order
329 0         0 foreach my $key(keys %{$self->{utterances}}) {
  0         0  
330 0         0 push @tokens, @{ $self->{utterances}{$key}->getTokens() };
  0         0  
331             }
332 0         0 return \@tokens;
333             }
334              
335             #---------------------- Has Parts (title or abstract) -------------------------
336             # method to determine if the citation contains any title utterances
337             # input : -
338             # output: boolean <- 1 if $self contains a title utterance, else 0
339             sub hasTitle
340             {
341 1     1 0 518 my $self = shift;
342 1         3 return $self->_hasPart('ti');
343            
344             }
345              
346             # method to determine if the citation contains any abstract utterances
347             # input : -
348             # output: boolean <- 1 if $self contains an abstract utterance, else 0
349             sub hasAbstract
350             {
351 1     1 0 4 my $self = shift;
352 1         2 return $self->_hasPart('ab');
353            
354             }
355              
356             # method to determine if the citation contains any utterances of the
357             # tag ('ti' or 'ab')
358             # input : $tag <- the utterance tag to check for, should be 'ti' or 'ab'
359             # output: boolean <- 1 if $self contains an utterance with the $tag, else 0
360             sub _hasPart
361             {
362 2     2   3 my $self = shift;
363 2         2 my $tag = shift;
364              
365             #get the utterances that match the tag
366 2         3 foreach my $key(keys %{$self->{utterances}}) {
  2         4  
367 2 50       10 if ($key =~ /(ti|ab)/) {
368 2 100       7 if ($1 eq $tag) {
369             #tag found, returning true
370 1         3 return 1;
371             }
372             }
373             }
374             #no matching tags found, returning false
375 1         3 return 0;
376             }
377             #----------------------------------------------------------------------------
378              
379              
380             #------------------ Get Parts (Title or Abstract) ---------------------
381             # method to create a new citation containing just the title of this citation
382             # input : -
383             # output: $part <- a citation object containing all utterances of $self's title
384             sub getTitle
385             {
386 2     2 0 25 my $self = shift;
387 2         6 return $self->_getPart('ti');
388             }
389              
390             # method to create a new citation containing just the abstract of this citation
391             # input : -
392             # output: $part <- a citation object containing all utterances of $self's
393             # abstract
394             sub getAbstract
395             {
396 2     2 0 100 my $self = shift;
397 2         5 return $self->_getPart('ab');
398             }
399              
400             # method to get a part of this citation (title or abstract)
401             # input is a match string, either 'ti' or 'ab'
402             # input : $tag <- the utterance tag to extract, should be 'ti' or 'ab'
403             # output: $part <- a citation object containing all utterance of $self
404             # containing the $tag in their ID
405             sub _getPart
406             {
407 4     4   5 my $self = shift;
408 4         5 my $tag = shift;
409            
410             #get the utterances that match the tag
411 4         16 my $part = MetaMap::DataStructures::Citation->new($self->{id});
412 4         6 foreach my $key(keys %{$self->{utterances}}) {
  4         12  
413 4 50       35 if($key =~ /(ti|ab)/) {
414 4 100       19 if ($1 eq $tag) {
415 2         5 $part->addUtterance($self->{utterances}{$key});
416             }
417             }
418             }
419             #return the title citation
420 4         12 return $part;
421             }
422             #-----------------------------------------------------------------------
423              
424             1;
425              
426             __END__