File Coverage

blib/lib/Alvis/TermTagger.pm

Criterion	Covered	Total	%
statement	151	250	60.4
branch	37	64	57.8
condition	45	147	30.6
subroutine	11	15	73.3
pod	11	11	100.0
total	255	487	52.3

line	stmt	bran	cond	sub	pod	time	code
1							package Alvis::TermTagger;
2
3							our $VERSION = '0.8';
4
5							#######################################################################
6							#
7							# Last Update: 21/12/2010 (mm/dd/yyyy date format)
8							#
9							# Copyright (C) 2006 Thierry Hamon
10							#
11							# Written by thierry.hamon@univ-paris13.fr
12							#
13							# Author : Thierry Hamon
14							# Email : thierry.hamon@univ-paris13.fr
15							# URL : http://www-limbio.smbh.univ-paris13.fr/membres/hamon/
16							#
17							########################################################################
18
19
20	3			3		74716	use strict;
	3					7
	3					112
21	3			3		14	use warnings;
	3					5
	3					8525
22
23							# TODO : write functions for term tagginga, term selection with and
24							# without offset in the corpus
25
26							sub termtagging {
27
28	4			4	1	42	my ($corpus_filename, $term_list_filename, $output_filename, $lemmatised_corpus_filename, $caseSensitive) = @_;
29
30	4					9	my @term_list;
31							my %term_listIdx;
32	0					0	my @regex_term_list;
33	0					0	my @regex_lemmawordterm_list;
34	0					0	my %corpus;
35	0					0	my %lc_corpus;
36	0					0	my %lemmatised_corpus;
37	0					0	my %lc_lemmatised_corpus;
38	0					0	my %corpus_index;
39	0					0	my %lemmatised_corpus_index;
40	0					0	my %idtrm_select;
41	0					0	my %idlemtrm_select;
42
43	4	100				18	if (!defined $caseSensitive) {
44	1					2	$caseSensitive = -1;
45							}
46
47	4					22	&load_TermList($term_list_filename,\@term_list, \%term_listIdx);
48	4					24	&get_Regex_TermList(\@term_list, \@regex_term_list, \@regex_lemmawordterm_list);
49
50	4					20	&load_Corpus($corpus_filename, \%corpus, \%lc_corpus);
51	4	100				15	if (defined $lemmatised_corpus_filename) {
52	3					10	&load_Corpus($lemmatised_corpus_filename, \%lemmatised_corpus, \%lc_lemmatised_corpus);
53							}
54	4					19	&corpus_Indexing(\%lc_corpus, \%corpus, \%corpus_index, $caseSensitive);
55	4	100				17	if (defined $lemmatised_corpus_filename) {
56	3					11	&corpus_Indexing(\%lc_lemmatised_corpus, \%lemmatised_corpus, \%lemmatised_corpus_index, $caseSensitive);
57							}
58	4					21	&term_Selection(\%corpus_index, \@term_list, \%idtrm_select, $caseSensitive);
59	4	100				16	if (defined $lemmatised_corpus_filename) {
60	3					11	&term_Selection(\%lemmatised_corpus_index, \@term_list, \%idlemtrm_select, $caseSensitive);
61							}
62	4					56	&term_tagging_offset(\@term_list, \@regex_term_list, \%idtrm_select, \%corpus, $output_filename, $caseSensitive);
63	4	100				20	if (defined $lemmatised_corpus_filename) {
64	3					12	&term_tagging_offset(\@term_list, \@regex_lemmawordterm_list, \%idlemtrm_select, \%lemmatised_corpus, $output_filename, $caseSensitive);
65							}
66	4					265	return(0);
67							}
68
69
70							sub load_TermList {
71	4			4	1	10	my ($termlist_name, $ref_termlist, $ref_termlistIdx) = @_;
72
73	4					9	my $line;
74							my $line1;
75	0					0	my $term; # not use yet
76	0					0	my $suppl_info; # not use yet
77	0					0	my @tab;
78
79	4					996	warn "Loading the terminological resource\n";
80
81	4	50				225	open DESC_TERMLIST, $termlist_name or die "$0: $termlist_name: No such file\n";
82
83	4					28	binmode(DESC_TERMLIST, ":utf8");
84
85	4					129	while($line1 = ) {
86	20					33	chomp $line1;
87	20					55	utf8::decode($line1);
88	20					24	$line=$line1;
89
90							# Blank and comment lines are throw away
91	20	50	33			140	if (($line !~ /^\s\#/o)&&($line !~ /^\s\/\//o)&&($line !~ /^\s*$/o)) {
			33
92							# Term is split from the other information
93	20					70	my @tab = split / ?[\\|:] ?/, $line;
94	20	50				99	if ($tab[0] !~ /^\s*$/) {
95							# TODO better
96	20					82	$tab[0] =~ s/ +/ /go;
97	20					41	$tab[0] =~ s/ $//go;
98	20					27	$tab[0] =~ s/^ //go;
99							# $tab[0] =~ s/\\:/:/go;
100	20	50				61	if (!exists $ref_termlistIdx->{$tab[0]}) {
101	20					32	push @$ref_termlist, \@tab;
102	20					115	$ref_termlistIdx->{$tab[0]} = scalar(@$ref_termlist) -1;
103							} else {
104	0					0	$ref_termlist->[$ref_termlistIdx->{$tab[0]}]->[2] .= ";" . $tab[2];
105							}
106							}
107							}
108							}
109	4					52	close DESC_TERMLIST;
110	4					841	print STDERR "\n\tTerm list size : " . scalar(@$ref_termlist) . "\n\n";
111							}
112
113							sub get_Regex_TermList {
114
115	4			4	1	11	my ($ref_termlist, $ref_regex_termlist, $ref_regex_lemmaWordtermlist) = @_;
116	4					7	my $term_counter;
117
118	4					456	warn "Generating the regular expression from the terms\n";
119
120	4					23	for($term_counter = 0;$term_counter < scalar @$ref_termlist;$term_counter++) {
121	20					45	$ref_regex_termlist->[$term_counter] = $ref_termlist->[$term_counter]->[0];
122	20	50				49	if (defined $ref_regex_lemmaWordtermlist) {
123	20	50				39	if (defined $ref_termlist->[$term_counter]->[3]) {
124	0					0	$ref_regex_lemmaWordtermlist->[$term_counter] = $ref_termlist->[$term_counter]->[3];
125							} else {
126	20					40	$ref_regex_lemmaWordtermlist->[$term_counter] = $ref_termlist->[$term_counter]->[0];
127							}
128							}
129							# warn $ref_regex_lemmaWordtermlist->[$term_counter] . "\n";
130	20					36	$ref_regex_termlist->[$term_counter] =~ s/([()\',\[\]\?\!:;\/.\+\-\*\#\{\}\\])/\\$1/og;
131	20					50	$ref_regex_termlist->[$term_counter] =~ s/ /[\- \n]/og;
132	20	50				44	if (defined $ref_regex_lemmaWordtermlist) {
133	20					31	$ref_regex_lemmaWordtermlist->[$term_counter] =~ s/([()\',\[\]\?\!:;\/.\+\-\*\#\{\}\\])/\\$1/og;
134	20					75	$ref_regex_lemmaWordtermlist->[$term_counter] =~ s/ /[\- \n]/og;
135							}
136							}
137	4					206	print STDERR "\n\tTerm/regex list size : " . scalar(@$ref_regex_termlist);
138	4	50				15	if (defined $ref_regex_lemmaWordtermlist) {
139	4					155	print STDERR" / " . scalar(@$ref_regex_lemmaWordtermlist);
140							}
141	4					390	print STDERR "\n\n";
142							}
143
144							sub load_Corpus {
145
146	7			7	1	13	my ($corpus_filename,$ref_tabh_Corpus, $ref_tabh_Corpus_lc) = @_;
147	7					10	my $line;
148	7					9	my $sent_id = 1;
149
150	7					881	warn "Loading the corpus\n";
151
152	7	50				327	open CORPUS, $corpus_filename or die "File $corpus_filename not found\n";
153
154	7					33	binmode(CORPUS, ":utf8");
155
156	7					118	while($line=){
157	18					23	chomp $line;
158	18					45	$ref_tabh_Corpus->{$sent_id} = $line;
159	18					90	$ref_tabh_Corpus_lc->{$sent_id} = lc $line;
160	18					81	$sent_id++;
161							}
162	7					69	close CORPUS;
163	7					1234	print STDERR "\n\tCorpus size : " . scalar(keys %$ref_tabh_Corpus) . "\n\n";
164							}
165
166
167							sub corpus_Indexing {
168	7			7	1	13	my ($ref_corpus_lc, $ref_corpus, $ref_corpus_index, $caseSensitive) = @_;
169
170	7					12	my $word;
171							my @tab_words;
172	0					0	my @tab_words_lc;
173	0					0	my $sent_id;
174	0					0	my $i;
175
176	7					792	warn "Indexing the corpus\n";
177
178	7					31	foreach $sent_id (keys %$ref_corpus_lc) { # \-\.,\n;\/
179	18					487	@tab_words = split /[ ()\',\[\]\?\!:;\/\.\+\-\*\#\{\}\n]/, $ref_corpus->{$sent_id};
180	18					431	@tab_words_lc = split /[ ()\',\[\]\?\!:;\/\.\+\-\*\#\{\}\n]/, $ref_corpus_lc->{$sent_id};
181	18					89	for($i=0;$i < scalar(@tab_words_lc);$i++) {
182							# foreach $word_lc (@tab_words_lc) {
183	511	100	100			2212	if ((defined $caseSensitive) && (($caseSensitive == 0) \|\| (length($tab_words_lc[$i]) <= $caseSensitive))) {
			33
184	226					286	$word = $tab_words[$i];
185							} else {
186	285					456	$word = $tab_words_lc[$i];
187							}
188	511	100				1001	if ($word ne "") {
189	455	100				1596	if (!exists $ref_corpus_index->{$word}) {
190	339					876	my @tabtmp;
191	339					793	$ref_corpus_index->{$word} = \@tabtmp;
192							}
193	455					509	push @{$ref_corpus_index->{$word}}, $sent_id;
	455					1499
194							}
195							}
196							}
197	7					1429	print STDERR "\n\tSize of the first selected term list: " . scalar(keys %$ref_corpus_index) . "\n\n";
198							}
199
200							sub print_corpus_index {
201	0			0	1	0	my ($ref_corpus_index) = @_;
202
203	0					0	my $word;
204
205	0					0	foreach $word (sort keys %$ref_corpus_index) {
206	0					0	print STDERR "$word\t";
207	0					0	print STDERR join(", ", @{$ref_corpus_index->{$word}});
	0					0
208	0					0	print STDERR "\n";
209							}
210							}
211
212							sub _term_Selection2 {
213	0			0		0	my ($ref_corpus_index, $ref_termlist, $ref_tabh_idtrm_select) = @_;
214	0					0	my $counter;
215							my $term;
216	0					0	my @tab_termlex;
217	0					0	my $i;
218	0					0	my $word;
219	0					0	my $sent_id;
220	0					0	my $word_found = 0;
221
222	0					0	warn "Selecting the terms potentialy appearing in the corpus\n";
223
224	0					0	my %tabh_numtrm_select;
225
226	0					0	for($counter = 0;$counter < scalar @$ref_termlist;$counter++) {
227	0					0	$term = lc $ref_termlist->[$counter]->[0];
228							# XXX - ABREVIATION - XXX
229	0					0	@tab_termlex = split /[ \-]+/, $term;
230	0					0	$word_found = 0;
231	0					0	$i=0;
232	0		0			0	do {
233	0					0	$word = $tab_termlex[$i];
234	0	0	0			0	if (($word ne "") && ((length($word) > 2) \|\| (scalar(@tab_termlex)==1)) &&
			0
			0
235							((exists $ref_corpus_index->{$word}))) { # \|\| (exists $ref_corpus_index->{$word . "s"})
236	0					0	$word_found = 1;
237	0	0				0	if (!exists $ref_tabh_idtrm_select->{$counter}) {
238	0					0	my %tabhtmp2;
239	0					0	$ref_tabh_idtrm_select->{$counter} = \%tabhtmp2;
240							}
241	0					0	foreach $sent_id (@{$ref_corpus_index->{$word}}) {
	0					0
242	0					0	${$ref_tabh_idtrm_select->{$counter}}{$sent_id} = 1;
	0					0
243							}
244							}
245	0					0	$i++;
246							} while((!$word_found) && ($i < scalar @tab_termlex));
247							}
248
249	0					0	warn "\nEnd of selecting the terms potentialy appearing in the corpus\n";
250							}
251
252							sub term_Selection {
253	7			7	1	15	my ($ref_corpus_index, $ref_termlist, $ref_tabh_idtrm_select, $caseSensitive) = @_;
254	7					9	my $counter;
255							my $term;
256	0					0	my @tab_termlex;
257	0					0	my $termCap;
258	0					0	my @tab_termlexCap;
259	0					0	my $i;
260	0					0	my $word;
261	0					0	my $sent_id;
262	7					10	my $word_found = 0;
263
264	7					10	my @recordedWords;
265
266	7					737	warn "Selecting the terms potentialy appearing in the corpus\n";
267
268	7					25	my %tabh_numtrm_select;
269
270							# warn "caseSensitive: $caseSensitive\n";
271	7					24	for($counter = 0;$counter < scalar @$ref_termlist;$counter++) {
272	35	100	66			188	if ((defined $caseSensitive) && (($caseSensitive == 0) \|\| (length($ref_termlist->[$counter]->[0]) <= $caseSensitive))) {
			33
273	10					20	$term = $ref_termlist->[$counter]->[0];
274	10					11	$termCap = $ref_termlist->[$counter]->[0];
275							# warn "passe\n";
276							} else {
277	25					47	$term = lc $ref_termlist->[$counter]->[0];
278	25					52	$termCap = $ref_termlist->[$counter]->[0];
279							}
280							# warn "+++> $term ($termCap)\n";
281							# XXX - ABREVIATION - XXX
282							# @tab_termlex = split /[ \-:]+/, $term;
283	35					134	@tab_termlex = split /[ ()\',\[\]\?\!:;\/\.\+\-\*\#\{\}\n]+/, $term;
284	35					108	@tab_termlexCap = split /[ ()\',\[\]\?\!:;\/\.\+\-\*\#\{\}\n]+/, $termCap;
285							# @tab_termlex = split /[ \-:]+/, $term;
286							# @tab_termlexCap = split /[ \-:]+/, $termCap;
287	35					45	$word_found = 0;
288	35					36	$i=0;
289	35					44	@recordedWords = ();
290	35					46	$word = $tab_termlex[$i];
291							# warn join(':', @tab_termlex) . " -- " . join(':', @tab_termlexCap) . "\n";
292							# warn scalar(@tab_termlex) . " -- " . scalar(@tab_termlexCap) . " ($i)\n";
293	35		66			287	while(($i < scalar(@tab_termlex)) && ($i < scalar(@tab_termlexCap)) &&
			66
			66
294							((($word eq "") \|\| (exists $ref_corpus_index->{$word})) \|\|
295							((($caseSensitive == 0) \|\| (length($ref_termlist->[$counter]->[0]) > $caseSensitive)) &&
296							(exists $ref_corpus_index->{$tab_termlexCap[$i]})))
297							) {
298	61	50				119	if ($word ne "") {
299							# warn "---> $term\n";
300	61					85	push @recordedWords, $word;
301							# } else {
302							# warn "--------------------------> $term\n";
303							}
304	61					61	$i++;
305	61					264	$word = $tab_termlex[$i];
306							# warn "i: $i\n";
307							}
308	35	100				96	if ($i == scalar(@tab_termlex)) {
309	34					43	foreach $word (@recordedWords) {
310	61	100				122	if (!exists $ref_tabh_idtrm_select->{$counter}) {
311	34					36	my %tabhtmp2;
312	34					73	$ref_tabh_idtrm_select->{$counter} = \%tabhtmp2;
313							}
314	61					84	foreach $sent_id (@{$ref_corpus_index->{$word}}) {
	61					124
315	131					126	${$ref_tabh_idtrm_select->{$counter}}{$sent_id} = 1;
	131					373
316							}
317							}
318							}
319							}
320	7					936	warn "Size of the selected list: " . scalar (keys %$ref_tabh_idtrm_select) . "\n";
321							# foreach $counter (keys %$ref_tabh_idtrm_select) {
322							# warn $ref_termlist->[$counter]->[0] . "\n";
323							# }
324
325	7					1419	warn "\nEnd of selecting the terms potentialy appearing in the corpus\n";
326							}
327
328							sub term_tagging_offset {
329	7			7	1	16	my ($ref_termlist, $ref_regex_termlist, $ref_tabh_idtrm_select, $ref_tabh_corpus, $offset_tagged_corpus_name, $caseSensitive) = @_;
330	7					11	my $counter;
331							my $term_regex;
332	0					0	my $sent_id;
333	0					0	my $line;
334
335							# XXX - ABREVIATION - XXX => regex
336
337	7					960	warn "Term tagging\n";
338
339	7	50				375	open TAGGEDCORPUS, ">>$offset_tagged_corpus_name" or die "$0: $offset_tagged_corpus_name: No such file\n";
340
341	7					39	binmode(TAGGEDCORPUS, ":utf8");
342
343	7					26	foreach $counter (keys %$ref_tabh_idtrm_select) {
344	34					103	$term_regex = $ref_regex_termlist->[$counter];
345	34					44	foreach $sent_id (keys %{$ref_tabh_idtrm_select->{$counter}}){
	34					112
346	58					99	$line = $ref_tabh_corpus->{$sent_id};
347	58					2053	print STDERR ".";
348
349	58	100	66			3032	if ((((defined $caseSensitive) && (($caseSensitive == 0) \|\| (length($ref_termlist->[$counter]->[0]) <= $caseSensitive))) &&
			33
			100
			66
			66
			66
350							($line =~ /[,.?!:;\/ \n\-\/\'\#\{\}\[\]\+]($term_regex)[,.?!:;\/ \n\-\/\'\#\[\]\{\}\+]/)) \|\|
351							(((!defined $caseSensitive) \|\| ($caseSensitive < 0) \|\| (length($ref_termlist->[$counter]->[0]) > $caseSensitive)) &&
352							($line =~ /[,.?!:;\/ \n\-\/\'\#\{\}\[\]\+]($term_regex)[,.?!:;\/ \n\-\/\'\#\[\]\{\}\+]/i))) {
353	37					111	printMatchingTerm(\*TAGGEDCORPUS, $ref_termlist->[$counter], $sent_id);
354							}
355	58	50	66			2122	if ((((defined $caseSensitive) && (($caseSensitive == 0) \|\| (length($ref_termlist->[$counter]->[0]) <= $caseSensitive))) &&
			33
			66
			66
			33
			33
356							($line =~ /^($term_regex)[,.?!:;\/ \n\-\/\*'\#\[\]\{\}\+]/i)) \|\|
357							(((!defined $caseSensitive) \|\| ($caseSensitive < 0) \|\| (length($ref_termlist->[$counter]->[0]) > $caseSensitive)) &&
358							($line =~ /^($term_regex)[,.?!:;\/ \n\-\/\*'\#\[\]\{\}\+]/i))) {
359	0					0	printMatchingTerm(\*TAGGEDCORPUS, $ref_termlist->[$counter], $sent_id);
360							}
361	58	50	66			2118	if ((((defined $caseSensitive) && (($caseSensitive == 0) \|\| (length($ref_termlist->[$counter]->[0]) <= $caseSensitive))) &&
			33
			66
			66
			33
			33
362							($line =~ /[,.?!:;\/ \n\-\/\*'\#\[\]\{\}\+]($term_regex)$/)) \|\|
363							(((!defined $caseSensitive) \|\| ($caseSensitive < 0) \|\| (length($ref_termlist->[$counter]->[0]) > $caseSensitive)) &&
364							($line =~ /[,.?!:;\/ \n\-\/\*'\#\[\]\{\}\+]($term_regex)$/i))) {
365	0					0	printMatchingTerm(\*TAGGEDCORPUS, $ref_termlist->[$counter], $sent_id);
366							}
367							}
368	34					3437	print STDERR "\n";
369							}
370
371	7					334	close TAGGEDCORPUS;
372
373							#########################################################################################################
374	7					1217	warn "\nEnd of term tagging\n";
375							}
376
377							sub printMatchingTerm() {
378	37			37	1	59	my ($descriptor, $ref_matching_term, $sent_id) = @_;
379
380	37					161	print $descriptor "$sent_id\t";
381	37					83	print $descriptor join("\t", @$ref_matching_term);
382	37					87	print $descriptor "\n";
383
384							}
385
386
387							sub term_tagging_offset_tab {
388	0			0	1		my ($ref_termlist, $ref_regex_termlist, $ref_tabh_idtrm_select, $ref_tabh_corpus, $ref_tab_results, $caseSensitive) = @_;
389	0						my $counter;
390							my $term_regex;
391	0						my $sent_id;
392	0						my $line;
393	0						my $i;
394	0						my $size_termselect = scalar(keys %$ref_tabh_idtrm_select);
395
396	0						$i = 0;
397
398							# XXX - ABREVIATION - XXX => regex
399							# warn "====> $caseSensitive\n";
400
401	0						foreach $counter (keys %$ref_tabh_idtrm_select) {
402	0						printf STDERR "Term tagging... %0.1f%%\r", ($i/$size_termselect)*100 ;
403	0						$term_regex = $ref_regex_termlist->[$counter];
404							# warn "counter: $counter ($term_regex)\n";
405
406	0						foreach $sent_id (keys %{$ref_tabh_idtrm_select->{$counter}}){
	0
407	0						$line = $ref_tabh_corpus->{$sent_id};
408
409							# warn "$line\n$term_regex\n";
410
411	0	0	0				if ((((defined $caseSensitive) && (($caseSensitive == 0) \|\| (length($ref_termlist->[$counter]->[0]) <= $caseSensitive))) &&
			0
			0
			0
			0
			0
412							($line =~ /[,.?!:;\/ \n\-\/\'\#\{\}\[\]\+](?$term_regex)[,.?!:;\/ \n\-\/\'\#\[\]\{\}\+]/s)) \|\|
413							(((!defined $caseSensitive) \|\| ($caseSensitive < 0) \|\| (length($ref_termlist->[$counter]->[0]) > $caseSensitive)) &&
414							($line =~ /[,.?!:;\/ \n\-\/\'\#\{\}\[\]\+](?$term_regex)[,.?!:;\/ \n\-\/\'\#\[\]\{\}\+]/is))) {
415	3			3		2916	printMatchingTerm_tab($ref_termlist->[$counter], $+{term}, $sent_id, $ref_tab_results);
	3					1680
	3					1358
	0
416							}
417	0	0	0				if ((((defined $caseSensitive) && (($caseSensitive == 0) \|\| (length($ref_termlist->[$counter]->[0]) <= $caseSensitive))) &&
			0
			0
			0
			0
			0
418							($line =~ /^(?$term_regex)[,.?!:;\/ \n\-\/\*'\#\[\]\{\}\+]/s)) \|\|
419							(((!defined $caseSensitive) \|\| ($caseSensitive < 0) \|\| (length($ref_termlist->[$counter]->[0]) > $caseSensitive)) &&
420							($line =~ /^(?$term_regex)[,.?!:;\/ \n\-\/\*'\#\[\]\{\}\+]/is))) {
421	0						printMatchingTerm_tab($ref_termlist->[$counter], $+{term}, $sent_id, $ref_tab_results);
422							}
423	0	0	0				if ((((defined $caseSensitive) && (($caseSensitive == 0) \|\| (length($ref_termlist->[$counter]->[0]) <= $caseSensitive))) &&
			0
			0
			0
			0
			0
424							($line =~ /[,.?!:;\/ \n\-\/\*'\#\[\]\{\}\+](?$term_regex)$/s)) \|\|
425							(((!defined $caseSensitive) \|\| ($caseSensitive < 0) \|\| (length($ref_termlist->[$counter]->[0]) > $caseSensitive)) &&
426							($line =~ /[,.?!:;\/ \n\-\/\*'\#\[\]\{\}\+](?$term_regex)$/is))) {
427	0						printMatchingTerm_tab($ref_termlist->[$counter], $+{term}, $sent_id, $ref_tab_results);
428							}
429							}
430	0						$i++;
431							}
432	0						print STDERR "\n";
433
434							#########################################################################################################
435	0						warn "\nEnd of term tagging\n";
436							}
437
438							sub printMatchingTerm_tab() {
439	0			0	1		my ($ref_matching_term, $term, $sent_id, $ref_tab_results) = @_;
440
441	0						my $tmp_line = "";
442	0						my $tmp_key;
443
444							# warn "\nOK\n";
445
446	0	0					if (ref($ref_tab_results) eq "ARRAY") {
447	0						$tmp_line .= "$sent_id\t";
448	0						$tmp_line .= join ("\t", @$ref_matching_term);
449	0						push @$ref_tab_results, $tmp_line;
450							} else {
451	0	0					if (ref($ref_tab_results) eq "HASH") {
452	0						my @tab_tmp;
453	0						$term =~ s/\$[\-\+\($\{\}])/$1/og;
454	0						$tmp_key .= $sent_id . "_";
455	0						$tmp_key .= $term;
456
457	0						push @tab_tmp, $sent_id;
458	0						push @tab_tmp, @$ref_matching_term;
459
460							# if (!exists $ref_tab_results->{$tmp_key}) {
461	0						$ref_tab_results->{$tmp_key} = \@tab_tmp;
462							# } else {
463							# foreach $refmatch (@{$ref_tab_results->{$tmp_key}}) {
464
465							# }
466							# }
467							}
468							}
469							}
470
471							1;
472
473							__END__