File Coverage

blib/lib/MetaMap/DataStructures/Citation.pm
Criterion Covered Total %
statement 114 149 76.5
branch 15 26 57.6
condition 0 6 0.0
subroutine 19 23 82.6
pod 0 17 0.0
total 148 221 66.9


line stmt bran cond sub pod time code
1             # MetaMap::DataStructures::Citation
2             # (Last Updated $Id: Citation.pm,v 1.80 2016/01/07 22:49:33 btmcinnes Exp $)
3             #
4             # Perl module that provides a perl interface to the
5             # Unified Medical Language System (UMLS)
6             #
7             # Copyright (c) 2016
8             #
9             # Sam Henry, Virginia Commonwealth University
10             # henryst at vcu.edu
11             #
12             # Bridget T. McInnes, Virginia Commonwealth University
13             # btmcinnes at vcu.edu
14             #
15             # This program is free software; you can redistribute it and/or
16             # modify it under the terms of the GNU General Public License
17             # as published by the Free Software Foundation; either version 2
18             # of the License, or (at your option) any later version.
19             #
20             # This program is distributed in the hope that it will be useful,
21             # but WITHOUT ANY WARRANTY; without even the implied warranty of
22             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23             # GNU General Public License for more details.
24             #
25             # You should have received a copy of the GNU General Public License
26             # along with this program; if not, write to
27             #
28             # The Free Software Foundation, Inc.,
29             # 59 Temple Place - Suite 330,
30             # Boston, MA 02111-1307, USA.
31              
32             package MetaMap::DataStructures::Citation;
33 1     1   4 use strict;
  1         1  
  1         20  
34 1     1   3 use warnings;
  1         1  
  1         18  
35              
36 1     1   2 use MetaMap::DataStructures::Utterance;
  1         1  
  1         1131  
37              
38             #----------------------------------------
39             # constructors
40             #----------------------------------------
41             # constructor method to create a new Citation object
42             # input : -
43             # output: $self <- a instance of a Citation object
44             sub new {
45             #create and bless self
46 5     5 0 8 my $class = shift;
47 5         9 my $self = {};
48 5         28 bless $self, $class;
49              
50             #grab input and initialize
51 5         15 $self->{id} = shift;
52 5         9 $self->{utterances} = {};
53              
54 5         9 return $self;
55             }
56              
57             #-----------------------------------------------------------------
58             # methods
59             #-----------------------------------------------------------------
60              
61             # method summarizes this utterance as a string
62             # input : -
63             # output: $string <- a string describing $self
64             sub toString {
65 0     0 0 0 my $self = shift;
66              
67             #initiliaze the string
68 0         0 my $string = "citation:\n";
69 0         0 $string .= " $self->{id}\n";
70            
71             #add each utterance to the string
72 0         0 my %utterances = %{$self->{utterances}};
  0         0  
73 0         0 foreach my $key(keys %utterances) {
74 0         0 $string .= " ".$utterances{$key}->toString()."\n";
75             }
76 0         0 return $string;
77             }
78              
79             # method to compare this citation to another and returns 1 if the two
80             # contain identical information
81             # input : $other <- the citation object to compare against
82             # output: boolean <- 1 if $self and $other are equivalent (contain equivalent
83             # ID's and utterances), else 0
84             sub equals {
85             #grab input
86 3     3 0 7 my $self = shift;
87 3         4 my $other = shift;
88              
89             #compare id's
90 3 50       10 if ($self->{id} ne $other->{id}) {
91 0         0 return 0;
92             }
93              
94             #compare Utterances
95 3         3 foreach my $keyA(sort _by_utterance keys %{$self->{utterances}}){
  3         19  
96 3         3 my $utteranceA = $self->{utterances}{$keyA};
97              
98             #check each utterance in B
99 3         4 my $match = 0;
100 3         3 foreach my $keyB(sort _by_utterance keys %{$other->{utterances}}) {
  3         8  
101 2         2 my $utteranceB = $self->{utterances}{$keyB};
102 2 50       8 if ($utteranceA->equals($utteranceB)) {
103 2         4 $match = 1;
104 2         5 last;
105             }
106             }
107              
108             #citationA has no equivalent citation in $other
109             # so citations are not identical
110 3 100       12 if ($match < 1) {
111 1         5 return 0;
112             }
113             }
114              
115             #all tests passed, return true
116 2         28 return 1;
117             }
118              
119             # method to determine if this citation contains the CUI provided as input
120             # returns 1 if this citation contains the CUI, else 0
121             # input : $cui <- a string CUI code
122             # output: boolean <- 1 if any of $self's utterances contain $cui
123             sub contains {
124             #grab input
125 2     2 0 12 my $self = shift;
126 2         2 my $cui = shift;
127              
128             #check each phrase to see if it contains the CUI
129 2         3 my $containsCUI = 0;
130 2         1 foreach my $key(keys %{$self->{utterances}}) {
  2         5  
131 2 100       7 if ($self->{utterances}{$key}->contains($cui)) {
132 1         1 $containsCUI = 1;
133 1         2 last;
134             }
135             }
136            
137             #return the result
138 2         6 return $containsCUI;
139             }
140              
141             # method to add a new utterance to the citation
142             # input : $newUtterance <- the utterance to add to $self
143             # output: -
144             sub addUtterance {
145 9     9 0 9 my $self = shift;
146 9         10 my $newUtterance = shift;
147              
148 9 50       52 if($newUtterance->{id} =~ /((ti|ab)\.[\d]+)/) {
149 9         1780 $self->{utterances}{$1} = $newUtterance;
150             }
151             else {
152 0         0 print STDERR "error adding utterance to citation: $newUtterance->{id}\n";
153             }
154             }
155              
156             # method to sort the utterances by order they appear (title followed
157             # by abstract, number ascending)
158             # (e.g. ti.000.1, ti.000.2, ab.000.1, ab.000.2, ab.000.3)
159             # input : $a, $b <- implicit sort variables, the keys in a hash of utterances
160             # which are the utterance IDs (e.g. ti.0000000.1)
161             # output: integer <- -1 if a is before b, 0 if a and b are same order,
162             # 1 if a is after b
163             sub _by_utterance {
164             #get the utterance type
165 0     0   0 my $a_ab = ($a =~ /ab/);
166 0         0 my $b_ab = ($b =~ /ab/);
167            
168             #check if both are abstracts or titles
169 0 0       0 if ($a_ab == $b_ab) {
170 0         0 $a =~ /(ti|ab)\.([\d]+)/;
171 0         0 my $aNum = ($2);
172              
173 0         0 $b =~ /(ti|ab)\.([\d]+)/;
174 0         0 return $aNum <=> $2;
175             }
176              
177             #check if one is abstract, the other is title
178 0 0 0     0 if ($a_ab && !$b_ab) {
179 0         0 return 1;
180             }
181 0 0 0     0 if (!$a_ab && $b_ab) {
182 0         0 return -1;
183             }
184             }
185              
186              
187             #------------------------------ Get Components ------------------------------
188             # method to returns an ordered list of Utterances contained by the Citation.
189             # Utterances are ordered by title, abstract, then number in ascending order
190             # (e.g. ti.000.1, ti.000.2, ab.000.1, ab.000.2, ab.000.3)
191             # input : -
192             # output: \@utterances <- $self's utterances ordered as they appear in the
193             # original text of $self
194             sub getOrderedUtterances {
195             #initialize
196 2     2 0 74 my $self = shift;
197 2         5 my @utterances = ();
198              
199             #add concepts in sorted order
200 2         3 foreach my $key(sort _by_utterance keys %{$self->{utterances}}) {
  2         12  
201 2         6 push @utterances, $self->{utterances}{$key};
202             }
203 2         6 return \@utterances;
204             }
205              
206             # method to get an array of concepts that appear in the citation
207             # (not necassarily ordered). Use this method if order doesn't matter for
208             # increased performance.
209             # input : -
210             # output: \@concepts <- a list of concept objects
211             sub getConcepts {
212             #initialize
213 1     1 0 33 my $self = shift;
214 1         2 my @concepts = ();
215              
216             #add concepts in sorted order
217 1         1 foreach my $key(keys %{$self->{utterances}}) {
  1         3  
218 1         1 push @concepts, @{ $self->{utterances}{$key}->getConcepts() };
  1         4  
219             }
220 1         2 return \@concepts;
221             }
222              
223             # method to get the unique concepts and return a hash of
224             # concepts, CUIs are the keys
225             # input : -
226             # output: \%concepts <- $self's unique concepts with the key as the concept's
227             # CUI. CUIs are considered unique by their CUI code only (e.g. C0000000
228             # and C0000000 are considered the same even if there are two different
229             # Concept.pm objects associated with them)
230             sub getUniqueConcepts {
231 1     1 0 171 my $self = shift;
232 1         3 my %concepts = ();
233              
234             #update concepts
235 1         2 foreach my $key(keys %{$self->{utterances}}) {
  1         2  
236 1         5 my $utteranceConceptsRef = $self->{utterances}{$key}->getConcepts();
237 1         1 foreach my $concept(@{ $utteranceConceptsRef }) {
  1         2  
238 80         67 my $cui = $concept->{cui};
239 80 100       88 if (!exists $concepts{$cui}) {
240 75         89 $concepts{$cui} = $concept;
241             }
242             }
243             }
244 1         18 return \%concepts;
245             }
246              
247             # method to get the an array of concepts that appear in the citation
248             # concepts are ordered as they appear in the utterance
249             # however where there are multiple mappings for a single
250             # token those two concepts will appear adjacent to one another
251             # input : -
252             # output: \@conceptList <- an array of arrays, where each sub-array contains a
253             # list of 1 or more concept objects. Where more than
254             # one concept object occurrs it means the token to
255             # concept mapping was ambiguous. Arrays are ordered as
256             # the tokens occurr in the utterance.
257             sub getOrderedConcepts {
258             #initialize
259 1     1 0 97 my $self = shift;
260 1         2 my @conceptsList = ();
261              
262             #add concepts in sorted order
263 1         1 foreach my $key(sort _by_utterance keys %{$self->{utterances}}) {
  1         4  
264 1         2 push @conceptsList, @{ $self->{utterances}{$key}->getOrderedConcepts() };
  1         4  
265             }
266 1         3 return \@conceptsList;
267             }
268              
269             # method to get a list of ordered mappings. There may be multiple
270             # mappings for a single utterance, but they will appear in correct
271             # utterance order
272             # input : -
273             # output: \@mappings <- a list of mapping objects ordered by their occurence in
274             # $self.
275             sub getOrderedMappings {
276             #initialize
277 1     1 0 97 my $self = shift;
278 1         2 my @mappings = ();
279            
280             #add mappings in sorted order
281 1         2 foreach my $key(sort _by_utterance keys %{$self->{utterances}}) {
  1         4  
282 1         2 push @mappings, @{ $self->{utterances}{$key}->getMappings() };
  1         4  
283             }
284 1         2 return \@mappings;
285             }
286              
287             # method to get all the mappings of the citation (not necassarily ordered)
288             # input : -
289             # output: \@mappings <- a list of mapping objects
290             sub getMappings {
291             #initialize
292 0     0 0 0 my $self = shift;
293 0         0 my @mappings = ();
294            
295             #add mappings in sorted order
296 0         0 foreach my $key(keys %{$self->{utterances}}) {
  0         0  
297 0         0 push @mappings, @{ $self->{utterances}{$key}->getMappings() };
  0         0  
298             }
299 0         0 return \@mappings;
300             }
301              
302             # method to get an array of ordered tokens as they appear in the citation
303             # input : -
304             # output: \@tokens <- a list of token objects ordered by their appearance in
305             # $self
306             sub getOrderedTokens
307             {
308             #initialize
309 3     3 0 190 my $self = shift;
310 3         6 my @tokens = ();
311            
312             #add words in sorted order
313 3         2 foreach my $key(sort _by_utterance keys %{$self->{utterances}}) {
  3         10  
314 2         2 push @tokens, @{ $self->{utterances}{$key}->getTokens() };
  2         7  
315             }
316 3         7 return \@tokens;
317             }
318              
319             # method to get an array of tokens. Tokens are not necassarily in order
320             # input : -
321             # output: \@tokens <- a list of token objects
322             sub getTokens
323             {
324             #initialize
325 0     0 0 0 my $self = shift;
326 0         0 my @tokens = ();
327            
328             #add words in sorted order
329 0         0 foreach my $key(keys %{$self->{utterances}}) {
  0         0  
330 0         0 push @tokens, @{ $self->{utterances}{$key}->getTokens() };
  0         0  
331             }
332 0         0 return \@tokens;
333             }
334              
335             #---------------------- Has Parts (title or abstract) -------------------------
336             # method to determine if the citation contains any title utterances
337             # input : -
338             # output: boolean <- 1 if $self contains a title utterance, else 0
339             sub hasTitle
340             {
341 1     1 0 555 my $self = shift;
342 1         5 return $self->_hasPart('ti');
343             }
344              
345             # method to determine if the citation contains any abstract utterances
346             # input : -
347             # output: boolean <- 1 if $self contains an abstract utterance, else 0
348             sub hasAbstract
349             {
350 1     1 0 5 my $self = shift;
351 1         2 return $self->_hasPart('ab');
352             }
353              
354             # method to determine if the citation contains any utterances of the
355             # tag ('ti' or 'ab')
356             # input : $tag <- the utterance tag to check for, should be 'ti' or 'ab'
357             # output: boolean <- 1 if $self contains an utterance with the $tag, else 0
358             sub _hasPart
359             {
360 2     2   1 my $self = shift;
361 2         3 my $tag = shift;
362              
363             #get the utterances that match the tag
364 2         2 foreach my $key(keys %{$self->{utterances}}) {
  2         5  
365 2 50       13 if ($key =~ /(ti|ab)/) {
366 2 100       7 if ($1 eq $tag) {
367             #tag found, returning true
368 1         4 return 1;
369             }
370             }
371             }
372             #no matching tags found, returning false
373 1         4 return 0;
374             }
375             #----------------------------------------------------------------------------
376              
377              
378             #------------------ Get Parts (Title or Abstract) ---------------------
379             # method to create a new citation containing just the title of this citation
380             # input : -
381             # output: $part <- a citation object containing all utterances of $self's title
382             sub getTitle
383             {
384 2     2 0 12 my $self = shift;
385 2         8 return $self->_getPart('ti');
386             }
387              
388             # method to create a new citation containing just the abstract of this citation
389             # input : -
390             # output: $part <- a citation object containing all utterances of $self's
391             # abstract
392             sub getAbstract
393             {
394 2     2 0 113 my $self = shift;
395 2         6 return $self->_getPart('ab');
396             }
397              
398             # method to get a part of this citation (title or abstract)
399             # input is a match string, either 'ti' or 'ab'
400             # input : $tag <- the utterance tag to extract, should be 'ti' or 'ab'
401             # output: $part <- a citation object containing all utterance of $self
402             # containing the $tag in their ID
403             sub _getPart
404             {
405 4     4   6 my $self = shift;
406 4         7 my $tag = shift;
407            
408             #get the utterances that match the tag
409 4         15 my $part = MetaMap::DataStructures::Citation->new($self->{id});
410 4         7 foreach my $key(keys %{$self->{utterances}}) {
  4         14  
411 4 50       42 if($key =~ /(ti|ab)/) {
412 4 100       20 if ($1 eq $tag) {
413 2         6 $part->addUtterance($self->{utterances}{$key});
414             }
415             }
416             }
417             #return the title citation
418 4         15 return $part;
419             }
420             #-----------------------------------------------------------------------
421              
422             1;
423              
424             __END__