File Coverage

blib/lib/MetaMap/DataStructures/Token.pm

Criterion	Covered	Total	%
statement	55	56	98.2
branch	12	14	85.7
condition	7	15	46.6
subroutine	6	6	100.0
pod	0	4	0.0
total	80	95	84.2

line	stmt	bran	cond	sub	pod	time	code
1							# MetaMap::DataStructures::Token
2							# (Last Updated $Id: Token.pm,v 1.80 2016/01/07 22:49:33 btmcinnes Exp $)
3							#
4							# Perl module that provides a perl interface to the
5							# Unified Medical Language System (UMLS)
6							#
7							# Copyright (c) 2016
8							#
9							# Sam Henry, Virginia Commonwealth University
10							# henryst at vcu.edu
11							#
12							# Bridget T. McInnes, Virginia Commonwealth University
13							# btmcinnes at vcu.edu
14							#
15							# This program is free software; you can redistribute it and/or
16							# modify it under the terms of the GNU General Public License
17							# as published by the Free Software Foundation; either version 2
18							# of the License, or (at your option) any later version.
19							#
20							# This program is distributed in the hope that it will be useful,
21							# but WITHOUT ANY WARRANTY; without even the implied warranty of
22							# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23							# GNU General Public License for more details.
24							#
25							# You should have received a copy of the GNU General Public License
26							# along with this program; if not, write to
27							#
28							# The Free Software Foundation, Inc.,
29							# 59 Temple Place - Suite 330,
30							# Boston, MA 02111-1307, USA.
31
32							package MetaMap::DataStructures::Token;
33
34	1			1		3	use strict;
	1					1
	1					20
35	1			1		2	use warnings;
	1					1
	1					546
36
37							#----------------------------------------
38							# constructor
39							#----------------------------------------
40							# constructor method to create a new Token object
41							# input : -
42							# output: $self <- a Token object
43							sub new {
44							#grab class and create self
45	463			463	0	329	my $class = shift;
46	463					396	my $self = {};
47	463					435	bless $self, $class;
48
49							#get the rest of the input
50	463					584	$self->{text} = shift;
51	463					425	$self->{matchedText} = shift;
52	463					401	$self->{type} = shift;
53	463					419	$self->{posTag} = shift;
54	463					353	$self->{features} = shift;
55	463					346	$self->{numWords} = shift;
56
57	463					1468	return $self;
58							}
59
60							# method creates and returns a token from text
61							# (MetaMap Prolog Machine Output, single
62							# syntax description after phrase)
63							# input : $text <- a Metamap Prolog Machine Output single syntax description
64							# given after the phrase (or equivalent)
65							# output: $self <- a Token object
66							sub createFromText {
67							#grab the inputs
68	463			463	0	322	my $text= shift;
69
70							#grab the type
71	463					874	$text =~ m/(adv\|aux\|compl\|conj\|det\|head\|mod\|modal\|pastpart\|prep\|pron\|punc\|shapes\|verb\|not_in_lex)\(/;
72	463					424	my $type = $1;
73
74							#grab and format input match
75	463					785	$text =~ m/inputmatch\(\[([^\[\]]+)\]/;
76	463					352	my $inputMatchText = $1;
77
78							#remove trailing and leading quotes and commas
79	463					318	my $inputMatch = "";
80	463					531	my @inputs = split /,/, $inputMatchText;
81	463					453	foreach my $input(@inputs) {
82	490	100				908	if ($input =~ m/'(.*)'/) {
83	284					322	$input = $1;
84							}
85	490					556	$inputMatch .= ' '.$input;
86							}
87							#remove trailing and leading spaces from input match
88	463					806	$inputMatch =~ s/\s*//;
89	463					587	$inputMatch =~ s/^\s*//;
90	463					743	$inputMatch =~ s/\s*$//;
91
92							#grab the pos tag (if any)
93	463					328	my $tag = '';
94	463	100				938	if ($text =~ m/tag$([a-z]+)$/) {
95	386					348	$tag = $1;
96							}
97
98							#grab the lexmatch (if any)
99	463					285	my $lexMatch = '';
100	463	100				879	if ($text =~ m/lexmatch\(\[([^\[\]]+)\]/) {
101	379					339	$lexMatch = $1;
102	379	100				506	if ($lexMatch =~ m/'(.*)'/) {
103	2					3	$lexMatch = $1;
104							}
105							}
106
107							#grab features (if any)
108	463					302	my $features = '';
109	463	50				563	if ($text =~ m/features$\[([^\[\]]+)\]$/) {
110	0					0	$features = $1;
111							}
112
113							#Count the number of words in this token
114							# ...important for match mapping
115	463					319	my $numWords = 1;
116	463	100				900	if ($text =~ m/tokens$\[([^\[\]]+)\]$/) {
117	386					559	my @splitToken = (split /,/, $1);
118	386					360	$numWords = scalar @splitToken;
119							}
120
121							#TODO delete this (for debugging)
122							#print out info about the token being created
123							#print "Creating Token:\n";
124							#print "text = $text\n";
125							#print "inputMatch = $inputMatch\n";
126							#print "lexMatch = $lexMatch\n";
127							#print "type = $type\n";
128							#print "tag = $tag\n";
129							#print "features = $features\n";
130							#print "numWords = $numWords\n";
131
132							#create and retrun the new token
133	463					619	return MetaMap::DataStructures::Token->new(
134							$inputMatch, $lexMatch, $type, $tag, $features, $numWords);
135							}
136
137
138							#----------------------------------------
139							# Methods
140							#---------------------------------------
141
142							# method compares this token to another and returns 1 if
143							# the two tokens contain identical information
144							# input : $other <- the Token to comnpare against
145							# output: boolean <- 1 if $self and other are equivalent (have the same field
146							# values)
147							sub equals {
148							#grab input
149	9264			9264	0	5656	my $self = shift;
150	9264					5220	my $other = shift;
151
152							#compare each field
153	9264	50	66			49444	if ($self->{text} ne $other->{text}
			66
			33
			33
			33
154							\|\| $self->{matchedText} ne $other->{matchedText}
155							\|\| $self->{type} ne $other->{type}
156							\|\| $self->{posTag} ne $other->{posTag}
157							\|\| $self->{features} ne $other->{features}
158							\|\| $self->{numWords} ne $other->{numWords}) {
159	3459					6842	return 0;
160							}
161
162							#everything matches, return true
163	5805					10054	return 1;
164							}
165
166							# method summarizes this token as a string
167							# input : -
168							# output: $string <- a string describing $self
169							sub toString {
170	293			293	0	164	my $self = shift;
171	293					173	my $string = "token: \n";
172	293					345	$string .= " $self->{text}, $self->{matchedText}\n";
173	293					318	$string .= " $self->{type}, $self->{posTag}, $self->{numWords}\n";
174
175	293					519	return $string;
176							}
177
178							1;
179
180							__END__