File Coverage

blib/lib/Lingua/YaTeA/TestifiedTerm.pm

Criterion	Covered	Total	%
statement	126	144	87.5
branch	6	20	30.0
condition	3	6	50.0
subroutine	24	26	92.3
pod	21	21	100.0
total	180	217	82.9

line	stmt	bran	cond	sub	pod	time	code
1							package Lingua::YaTeA::TestifiedTerm;
2	5			5		36	use strict;
	5					12
	5					138
3	5			5		29	use warnings;
	5					13
	5					136
4	5			5		27	use UNIVERSAL;
	5					11
	5					25
5	5			5		133	use NEXT;
	5					11
	5					43
6	5			5		155	use Scalar::Util qw(blessed);
	5					16
	5					8211
7
8							our $id = 0;
9							our $VERSION=$Lingua::YaTeA::VERSION;
10
11							sub new
12							{
13	1			1	1	393	my ($class_or_object,$num_content_words,$words_a,$tag_set,$source,$match_type) = @_;
14
15	1					3	my $this = shift;
16
17	1	50				5	$this = bless {}, $this unless ref $this;
18	1					3	$this->{ID} = $id;
19	1					3	$this->{IF} = ();
20	1					2	$this->{POS} = ();
21	1					3	$this->{LF} = ();
22	1					4	$this->{SOURCE} = [];
23	1					3	$this->{WORDS} = [];
24	1					75	$this->{REG_EXP} = ();
25	1					6	$this->{FOUND} = 0;
26	1					3	$this->{OCCURRENCES} = [];
27	1					13	$this->{INDEX_SET} = Lingua::YaTeA::IndexSet->new;
28	1					10	$this->buildLinguisticInfos($words_a,$tag_set);
29	1					2	push @{$this->getSource}, split /,/,$source;
	1					13
30	1					15	$this->buildRegularExpression($match_type);
31	1					2	$this->setIndexSet(scalar @{$this->getWords});
	1					4
32	1					23	$this->NEXT::new(@_);
33	1					6	return $this;
34							}
35
36							sub isInLexicon
37							{
38	10			10	1	28	my ($this,$filtering_lexicon_h,$match_type) = @_;
39	10					17	my $lex_item;
40
41	10					17	foreach $lex_item (@{$this->getWords})
	10					28
42							{
43	13	50				30	if($match_type eq "loose") # look at IF or LF
44							{
45	13	50	66			49	if(
46							(!exists $filtering_lexicon_h->{lc($lex_item->getIF)})
47							&&
48							(!exists $filtering_lexicon_h->{lc($lex_item->getLF)})
49							)
50							{
51							# current word does not appear in the corpus : testified term won't be loaded
52	7					38	return 0;
53							}
54							}
55							else
56							{
57	0	0				0	if($match_type eq "strict") # look at IF and POS
58							{
59	0	0				0	if (!exists $filtering_lexicon_h->{lc($lex_item->getIF)."~".$lex_item->getPOS})
60							{
61							# current word does not appear in the corpus : testified term won't be loaded
62	0					0	return 0;
63							}
64
65							}
66							else
67							{
68							# default match: look at IF
69	0	0				0	if(!exists $filtering_lexicon_h->{lc($lex_item->getIF)})
70							{
71
72							# current word does not appear in the corpus : testified term won't be loaded
73	0					0	return 0;
74							}
75
76							}
77							}
78							}
79	3					13	return 1;
80							}
81
82
83							sub buildLinguisticInfos
84							{
85	2			2	1	5	my ($this,$lex_items_a,$tag_set) = @_;
86
87	2					9	my $lex;
88							my $IF;
89	2					0	my $POS;
90	2					0	my $LF;
91	2					8	my %prep = ("of"=>"of", "to"=>"to");
92
93
94	2					6	foreach $lex (@$lex_items_a)
95							{
96	4	50	33			32	if ((blessed($lex)) && ($lex->isa("Lingua::YaTeA::LexiconItem")))
97							{
98	4					20	$IF .= $lex->getIF . " " ;
99							#if (exists $prep{$lex->getLF})
100	4	50				12	if ($tag_set->existTag('PREPOSITIONS',$lex->getIF))
101							{
102	0					0	$POS .= $lex->getLF . " ";
103							}
104							else
105							{
106	4					10	$POS .= $lex->getPOS . " ";
107							}
108	4					12	$LF .= $lex->getLF . " " ;
109	4					8	push @{$this->getWords}, $lex;
	4					11
110							}
111							else
112							{
113	0					0	die "problem: " . $lex . "\n";
114							}
115							}
116	2					14	$IF =~ s/\s+$//;
117	2					9	$POS =~ s/\s+$//;
118	2					8	$LF =~ s/\s+$//;
119	2					10	$this->setIF($IF);
120	2					8	$this->setPOS($POS);
121	2					8	$this->setLF($LF);
122							}
123
124							sub getWords
125							{
126	39			39	1	88	my ($this) = @_;
127	39					131	return $this->{WORDS};
128							}
129
130							sub setIF
131							{
132	2			2	1	6	my ($this,$new) = @_;
133	2					5	$this->{IF} = $new;
134							}
135
136							sub setPOS
137							{
138	2			2	1	6	my ($this,$new) = @_;
139	2					3	$this->{POS} = $new;
140							}
141
142							sub setLF
143							{
144	2			2	1	11	my ($this,$new) = @_;
145	2					10	$this->{LF} = $new;
146							}
147
148							sub getIF
149							{
150	9			9	1	34	my ($this) = @_;
151	9					31	return $this->{IF};
152							}
153
154							sub getPOS
155							{
156	1			1	1	3	my ($this) = @_;
157	1					5	return $this->{POS};
158							}
159
160							sub getLF
161							{
162	0			0	1	0	my ($this) = @_;
163	0					0	return $this->{LF};
164							}
165
166							sub getID
167							{
168	15			15	1	32	my ($this) = @_;
169	15					79	return $this->{ID};
170							}
171
172
173							sub buildKey
174							{
175	1			1	1	3	my ($this) = @_;
176	1					6	my $key = $this->{"IF"} . "~" . $this->{"POS"} . "~" . $this->{"LF"};
177	1					4	return $key;
178							}
179
180							sub getSource
181							{
182	6			6	1	18	my ($this) = @_;
183	6					28	return $this->{SOURCE};
184							}
185
186							sub buildRegularExpression
187							{
188	1			1	1	4	my ($this,$match_type) = @_;
189	1					3	my $frontier = "$\\n\\<\\/\?FRONTIER ID=\[0\-9\]\+ TT=\[0\-9\]\+\\>$\*";
190	1					4	my $reg_exp = $frontier . "\?";
191	1					2	my $lex;
192
193	1	50				4	if($match_type eq "loose") # IF or LF
194							{
195	1					3	foreach $lex (@{$this->getWords})
	1					3
196							{
197	2					7	$reg_exp .= "$\(\\n".quotemeta($lex->getIF) . "\\t\[\^\\t\]\+\\t\[\^\\t\]\+$\\|$\\n\[\^\\t\]\+\\t\[\^\\t\]\+\\t". quotemeta($lex->getLF) . "$\)" . $frontier;
198
199							}
200							}
201							else
202							{
203	0	0				0	if($match_type eq "strict") # IF and POS
204							{
205	0					0	foreach $lex (@{$this->getWords})
	0					0
206							{
207	0					0	$reg_exp .= "\\n".quotemeta($lex->getIF) . "\\t".quotemeta($lex->getPOS) ."\\t\[\^\\t\]\+" . $frontier;
208							}
209							}
210							else
211							{
212	0					0	foreach $lex (@{$this->getWords}) # IF
	0					0
213							{
214	0					0	$reg_exp .= "\\n".quotemeta($lex->getIF) . "\\t\[\^\\t\]\+\\t\[\^\\t\]\+" . $frontier;
215							}
216							}
217							}
218	1					3	$reg_exp .= "\\n";
219	1					4	$this->{REG_EXP} = $reg_exp;
220							}
221
222
223							sub getRegExp
224							{
225	3			3	1	8	my ($this) = @_;
226	3					8	return $this->{REG_EXP};
227							}
228
229							sub getWord
230							{
231	22			22	1	38	my ($this,$index) = @_;
232	22					58	return $this->getWords->[$index];
233
234							}
235
236							sub addOccurrence
237							{
238	3			3	1	11	my ($this,$phrase_occurrence,$phrase,$key,$fh) = @_;
239	3					11	my $start_offset;
240							my $end_offset;
241	3					0	my $testified_occurrence;
242	3					13	my @index = split(/-/,$key);
243	3					18	($start_offset,$end_offset) = $this->getPositionInPhrase($phrase,\@index,$fh);
244	3					13	$testified_occurrence = Lingua::YaTeA::Occurrence->new;
245	3					13	$testified_occurrence->setInfoForTestifiedTerm($phrase_occurrence->getSentence,$phrase_occurrence->getStartChar + $start_offset, $phrase_occurrence->getEndChar - $end_offset);
246	3					8	push @{$this->{OCCURRENCES}}, $testified_occurrence;
	3					14
247							}
248
249							sub getPositionInPhrase
250							{
251	3			3	1	20	my ($this,$phrase,$index_a,$fh) = @_;
252	3					12	my @before;
253							my @after;
254	3					0	my $index;
255	3					6	my $start_offset = 0;
256	3					6	my $end_offset = 0;
257							#print $fh $index_a->[0] . "\n";
258							#print $fh $index_a->[$#$index_a] . "\n";
259	3					14	for ($index = 0; $index < $index_a->[0]; $index++)
260							{
261	2					8	push @before, $index;
262							}
263	3					26	for ($index = $index_a->[$#$index_a] +1; $index < $phrase->getIndexSet->getSize; $index++)
264							{
265	7					18	push @after, $index;
266							}
267
268	3					9	foreach $index (@before)
269							{
270	2					6	$start_offset += $phrase->getWord($index)->getLength +1;
271							}
272
273	3					14	foreach $index (@after)
274							{
275	7					19	$end_offset += $phrase->getWord($index)->getLength +1;
276							}
277
278	3					9	return ($start_offset,$end_offset);
279							}
280
281							sub setIndexSet
282							{
283	1			1	1	3	my ($this,$size) = @_;
284	1					2	my $i = 0;
285	1					5	while ($i < $size)
286							{
287	2					9	$this->getIndexSet->addIndex($i);
288	2					7	$i++;
289							}
290
291							}
292
293							sub getIndexSet
294							{
295	4			4	1	10	my ($this) = @_;
296	4					26	return $this->{INDEX_SET};
297							}
298
299							sub getOccurrences
300							{
301	0			0	1		my ($this) = @_;
302	0						return $this->{OCCURRENCES};
303							}
304
305
306							1;
307
308
309							__END__