File Coverage

lib/Lingua/YaTeA/TestifiedTermParser.yp

Criterion	Covered	Total	%
statement	15	102	14.7
branch	0	34	0.0
condition	0	9	0.0
subroutine	5	28	17.8
pod	0	1	0.0
total	20	174	11.4

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# TestifiedTermParser.yp
3							#
4							# used to generate Lingua::YaTeA::TestifiedTermParser.pm
5							#
6							# Use: yapp -m 'Lingua::YaTeA::TestifiedTermParser' -o lib/Lingua/YaTeA/TestifiedTermParser.pm lib/Lingua/YaTeA/TestifiedTermParser.yp
7							#
8							# Parse::Yapp input grammar for parsing the yatea testified terms
9							#
10							#
11							#
12							%{
13	5			5		1215	use Lingua::YaTeA;
	5					7
	5					26
14	5			5		118	use Data::Dumper;
	5					7
	5					181
15	5			5		20	use warnings;
	5					7
	5					92
16	5			5		20	use UNIVERSAL;
	5					6
	5					19
17	5			5		139	use Scalar::Util qw(blessed);
	5					11
	5					8523
18							my @words;
19							my $word;
20							my $item;
21							my @infos;
22							my @IF;
23							my @POS;
24							my @LF;
25							my $src;
26							my @lex_items;
27							my $testified;
28							my $i;
29							my $tree;
30							my $node_set;
31							my $node;
32							my $edge;
33							my $index = 0;
34							my @uncomplete;
35							my $level = 0;
36							my $num_line =1;
37							%}
38							%%
39							input: #empty
40	0			0	0		\| input line { #print STDERR "\n INPUT \n";
				0
41	0	0					}
42							;
43
44							line: '\n' { # print "VIDE: " . $_[1]
45	0			0			$num_line++;
46							}
47							\| testified {
48	0			0			$num_line++;
49	0						@lex_items = ();
50	0						@words= ();
51	0						my $testified;
52							# print STDERR "=>$_[1]\n";
53							my $testified_infos;
54	0	0					if($_[0]->YYData->{TTS}->getTestifiedInfos(\$testified_infos,\@IF,\@POS,\@LF,$src,\@lex_items,$_[0]->YYData->{MATCH},$_[0]->YYData->{FILTERING_LEXICON},$_[0]->YYData->{TAGSET}) == 1) {
55	0	0					if(scalar @lex_items > 1) {
56	0						$testified = Lingua::YaTeA::MultiWordTestifiedTerm->new($testified_infos->{"NUM_CONTENT_WORDS"},\@lex_items,$_[0]->YYData->{TAGSET},$src,$_[0]->YYData->{MATCH});
57							}
58							}
59	0	0	0				if ((blessed($testified)) && ($testified->isa('Lingua::YaTeA::TestifiedTerm'))) {
60							#print STDERR "ajout tt: " . $testified->getIF . "\n";
61	0						$_[0]->YYData->{TTS}->addTestified($testified);
62
63	0	0	0				if ((blessed($testified)) && ($testified->isa('Lingua::YaTeA::MultiWordTestifiedTerm'))) {
64	0						$tree = Lingua::YaTeA::Tree->new;
65	0						$tree->setNodeSet($node_set);
66							# $tree->print($testified_infos->{"WORDS"});
67	0						$tree->setIndexSet($testified->getIndexSet);
68	0						$tree->setHead;
69	0						$testified->addTree($tree);
70	0						$testified->setParsingMethod("USER");
71							}
72							}
73							# print "fin creation :" . $testified->getIF . "\n";
74	0						$level = 0;
75	0						$index = 0;
76							}
77							;
78
79
80							testified: {
81	0			0			$node_set = Lingua::YaTeA::NodeSet->new;
82							}
83				0			OPEN parsing END infos { #print "trouve testified2 $_[1]\n";
84							}
85	0			0			\| error '\nTESTIFIED: ' { $_[0]->YYErrok }
86							;
87
88							infos: INFOS {
89							# print "infos $_[1]\n";
90	0			0			@infos = split /\t/, $_[1];
91	0						@IF = split / /, $infos[0];
92	0						@POS = split / /, $infos[1];
93	0						@LF = split / /, $infos[2];
94	0						$src = $infos[3];
95							}
96							;
97
98
99
100							parsing: CANDIDATE PREP DET CANDIDATE
101							\| CANDIDATE PREP CANDIDATE
102							\| CANDIDATE PREP PREP CANDIDATE
103							\| CANDIDATE DET CANDIDATE
104				0			\| CANDIDATE CANDIDATE {
105							# print STDERR "PARSING $_[1]\n"
106							}
107							\|
108	0			0			\| error '\nPARSING: ' { $_[0]->YYErrok }
109							;
110
111
112							PREP: WORD P_STATUS {
113							# print STDERR "PREP $_[1] $_[2]\n";
114	0			0			$node->{"PREP"} = Lingua::YaTeA::TermLeaf->new($index);
115	0						$index++;
116							};
117
118							DET: WORD D_STATUS {
119							# print STDERR "DET $_[1] $_[2]\n";
120	0			0			$node->{"DET"} = Lingua::YaTeA::TermLeaf->new($index);
121	0						$index++;
122							};
123
124							CANDIDATE: WORD C_STATUS{
125							# print STDERR "CANDIDATE1 $_[1] $_[2]\n";
126	0			0			$edge = Lingua::YaTeA::TermLeaf->new($index);
127	0						$node->addEdge($edge,$_[2]);
128							# print "ajout du edge :" ;
129							# print Dumper($edge) . "\n";
130	0						$index++;
131							}
132				0			\| OPEN parsing CLOSE {
133							#print STDERR "CANDIDATE2 $_[1]\n";
134							}
135	0			0			\| error '\nCANDIDATE: ' { $_[0]->YYErrok }
136							;
137
138							OPEN: OPEN_TAG {
139							# print STDERR "OPEN $_[1]\n";
140	0	0		0			if ($level == 0)
141							{
142	0						$node = Lingua::YaTeA::RootNode->new($level);
143							}
144							else
145							{
146	0						$node = Lingua::YaTeA::InternalNode->new($level);
147							}
148	0						$node_set->addNode($node);
149	0						push @uncomplete, $node;
150	0						$level++;
151							}
152	0			0			\| error '\nOPEN: ' { $_[0]->YYErrok }
153							;
154
155
156				0			END: END_TAG {
157							# print STDERR "END $_[1]\n";
158							}
159	0			0			\| error '\nEND: ' { $_[0]->YYErrok }
160							;
161
162							CLOSE: END_TAG C_STATUS {
163							# print STDERR "CLOSE_TAG $_[1] $_[2]\n";
164	0			0			pop @uncomplete;
165	0						$node->linkToFather(\@uncomplete,$_[2]);
166	0						$node = $uncomplete[$#uncomplete];
167	0						$level--;
168							}
169	0			0			\| error '\nCLOSE: ' { $_[0]->YYErrok }
170	0						;
171
172
173	0						%%
174
175							sub _Error {
176							exists $_[0]->YYData->{ERRMSG}
177	0	0		0			and do {
178	0						print $_[0]->YYData->{ERRMSG};
179	0						delete $_[0]->YYData->{ERRMSG};
180	0						return;
181							};
182	0						print "EXPECT: ";
183	0						print $_[0]->YYExpect . "\n";
184	0						print "CURTOK: ";
185	0						print "-" . $_[0]->YYCurtok . "-\n";
186	0						print "CURVAL: ";
187	0						print $_[0]->YYCurval . "\n";
188	0						print "Lexer: ";
189	0						print Dumper($_[0]->YYLexer) . "\n";
190	0						print "Syntax error.\n";
191							}
192
193							sub _Lexer {
194	0			0			my($parser)=shift;
195	0						my $fh = $parser->YYData->{FH};
196
197
198	0						my $open = '(\()';
199	0						my $word = $parser->YYData->{WORD};;
200	0						my $close = '(\)<=[MH]>)';
201	0						my $end = '(\))';
202	0						my $d_status = '<=(D)>';
203	0						my $p_status = '<=(P)>';
204	0						my $c_status = '<=([MH])>';
205	0						my $infos = '\t(.+)';
206
207
208							$parser->YYData->{INPUT}
209							or $parser->YYData->{INPUT} = <$fh>
210	0	0	0				or return('',undef);
211
212	0						$parser->YYData->{INPUT}=~s/^[ \t]#.//;
213
214
215	0						for ($parser->YYData->{INPUT}) {
216							#print "TEST-" .$parser->YYData->{INPUT}. "-\n";
217	0	0					s/^$open\s*// and return ('OPEN_TAG', $1);
218	0	0					s/^$end// and return('END_TAG', $1);
219	0	0					s/^$word\s*// and return ('WORD', $1);
220	0	0					s/^$c_status\s*// and return ('C_STATUS', $1);
221	0	0					s/^$d_status\s*// and return ('D_STATUS', $1);
222	0	0					s/^$p_status\s*// and return ('P_STATUS', $1);
223	0	0					s/^$close\s*// and return('CLOSE_TAG', $1);
224	0	0					s/^$infos\s*// and return('INFOS', $1, $2);
225	0	0					s/^.+//s and return($1,$1);
226							}
227							}
228
229							=head1 NAME
230
231							Lingua::YaTeA::TestifiedTermParser - Perl extension for the parser of testified term file (based on Parse::Yapp)
232
233							=head1 SYNOPSIS
234
235							use Lingua::YaTeA::TestifiedTermParser;
236
237							my $fh = FileHandle->new("<$file_path");
238
239							my $parser = Lingua::YaTeA::TestifiedTermParser->new();
240
241							$parser->YYData->{TTS} = $this;
242							$parser->YYData->{WORD} = $word_characters_regexp;
243							$parser->YYData->{TAGSET} = $tag_set;
244							$parser->YYData->{MATCH} = $match_type;
245							$parser->YYData->{FH} = $fh;
246							$parser->YYData->{FILTERING_LEXICON} = $filtering_lexicon_h;
247
248							$parser->YYParse(yylex => \&Lingua::YaTeA::ParsingPatternParser::_Lexer, yyerror => \&Lingua::YaTeA::ParsingPatternParser::_Error);
249
250
251							=head1 DESCRIPTION
252
253							The module implements a parser for analysing testified term file.
254
255							The parser takes into account several information: the word character
256							list (field C) i.e. all the possible characters in a word, the
257							Part-of-Speech tagset (field C), the type of matching (field
258							C), the file handler to read (field C), and the lexicon of
259							the corpus (field C).
260
261							=head1 METHODS
262
263							=head2 _Error()
264
265							_Error($error_objet);
266
267							The method is used to manage the parsing error and prints a message
268							explaining the error.
269
270							=head2 _Lexer()
271
272							_Lexer($parser_info);
273
274							The method applies the parser on the data contains in the structure
275							C<$parser_info> (field C).
276
277							=head1 SEE ALSO
278
279							Sophie Aubin and Thierry Hamon. Improving Term Extraction with
280							Terminological Resources. In Advances in Natural Language Processing
281							(5th International Conference on NLP, FinTAL 2006). pages
282							380-387. Tapio Salakoski, Filip Ginter, Sampo Pyysalo, Tapio Pahikkala
283							(Eds). August 2006. LNAI 4139.
284
285
286							=head1 AUTHOR
287
288							Thierry Hamon and Sophie Aubin
289
290							=head1 COPYRIGHT AND LICENSE
291
292							Copyright (C) 2005 by Thierry Hamon and Sophie Aubin
293
294							This library is free software; you can redistribute it and/or modify
295							it under the same terms as Perl itself, either Perl version 5.8.6 or,
296							at your option, any later version of Perl 5 you may have available.
297
298							=cut