line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#################################################################### |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# This file was generated using Parse::Yapp version 1.05. |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
# Don't edit this file, use source file instead. |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# ANY CHANGE MADE HERE WILL BE LOST ! |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
#################################################################### |
10
|
|
|
|
|
|
|
package Lingua::YaTeA::TestifiedTermParser; |
11
|
5
|
|
|
5
|
|
43
|
use vars qw ( @ISA ); |
|
5
|
|
|
|
|
12
|
|
|
5
|
|
|
|
|
307
|
|
12
|
5
|
|
|
5
|
|
32
|
use strict; |
|
5
|
|
|
|
|
11
|
|
|
5
|
|
|
|
|
192
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
@ISA= qw ( Parse::Yapp::Driver ); |
15
|
5
|
|
|
5
|
|
32
|
use Parse::Yapp::Driver; |
|
5
|
|
|
|
|
14
|
|
|
5
|
|
|
|
|
40
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
#line 12 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
use Lingua::YaTeA; |
20
|
|
|
|
|
|
|
use Data::Dumper; |
21
|
|
|
|
|
|
|
use warnings; |
22
|
|
|
|
|
|
|
use UNIVERSAL; |
23
|
|
|
|
|
|
|
use Scalar::Util qw(blessed); |
24
|
|
|
|
|
|
|
my @words; |
25
|
|
|
|
|
|
|
my $word; |
26
|
|
|
|
|
|
|
my $item; |
27
|
|
|
|
|
|
|
my @infos; |
28
|
|
|
|
|
|
|
my @IF; |
29
|
|
|
|
|
|
|
my @POS; |
30
|
|
|
|
|
|
|
my @LF; |
31
|
|
|
|
|
|
|
my $src; |
32
|
|
|
|
|
|
|
my @lex_items; |
33
|
|
|
|
|
|
|
my $testified; |
34
|
|
|
|
|
|
|
my $i; |
35
|
|
|
|
|
|
|
my $tree; |
36
|
|
|
|
|
|
|
my $node_set; |
37
|
|
|
|
|
|
|
my $node; |
38
|
|
|
|
|
|
|
my $edge; |
39
|
|
|
|
|
|
|
my $index = 0; |
40
|
|
|
|
|
|
|
my @uncomplete; |
41
|
|
|
|
|
|
|
my $level = 0; |
42
|
|
|
|
|
|
|
my $num_line =1; |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
sub new { |
46
|
|
|
|
|
|
|
my($class)=shift; |
47
|
|
|
|
|
|
|
ref($class) |
48
|
|
|
|
|
|
|
and $class=ref($class); |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
my($self)=$class->SUPER::new( yyversion => '1.05', |
51
|
|
|
|
|
|
|
yystates => |
52
|
|
|
|
|
|
|
[ |
53
|
|
|
|
|
|
|
{#State 0 |
54
|
|
|
|
|
|
|
DEFAULT => -1, |
55
|
|
|
|
|
|
|
GOTOS => { |
56
|
|
|
|
|
|
|
'input' => 1 |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
}, |
59
|
|
|
|
|
|
|
{#State 1 |
60
|
|
|
|
|
|
|
ACTIONS => { |
61
|
|
|
|
|
|
|
'' => 2, |
62
|
|
|
|
|
|
|
"\n" => 5, |
63
|
|
|
|
|
|
|
'error' => 6, |
64
|
|
|
|
|
|
|
'OPEN_TAG' => -5 |
65
|
|
|
|
|
|
|
}, |
66
|
|
|
|
|
|
|
GOTOS => { |
67
|
|
|
|
|
|
|
'@1-0' => 3, |
68
|
|
|
|
|
|
|
'testified' => 4, |
69
|
|
|
|
|
|
|
'line' => 7 |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
}, |
72
|
|
|
|
|
|
|
{#State 2 |
73
|
|
|
|
|
|
|
DEFAULT => 0 |
74
|
|
|
|
|
|
|
}, |
75
|
|
|
|
|
|
|
{#State 3 |
76
|
|
|
|
|
|
|
ACTIONS => { |
77
|
|
|
|
|
|
|
'error' => 9, |
78
|
|
|
|
|
|
|
'OPEN_TAG' => 10 |
79
|
|
|
|
|
|
|
}, |
80
|
|
|
|
|
|
|
GOTOS => { |
81
|
|
|
|
|
|
|
'OPEN' => 8 |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
}, |
84
|
|
|
|
|
|
|
{#State 4 |
85
|
|
|
|
|
|
|
DEFAULT => -4 |
86
|
|
|
|
|
|
|
}, |
87
|
|
|
|
|
|
|
{#State 5 |
88
|
|
|
|
|
|
|
DEFAULT => -3 |
89
|
|
|
|
|
|
|
}, |
90
|
|
|
|
|
|
|
{#State 6 |
91
|
|
|
|
|
|
|
ACTIONS => { |
92
|
|
|
|
|
|
|
"\nTESTIFIED: " => 11 |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
}, |
95
|
|
|
|
|
|
|
{#State 7 |
96
|
|
|
|
|
|
|
DEFAULT => -2 |
97
|
|
|
|
|
|
|
}, |
98
|
|
|
|
|
|
|
{#State 8 |
99
|
|
|
|
|
|
|
ACTIONS => { |
100
|
|
|
|
|
|
|
'END_TAG' => -14, |
101
|
|
|
|
|
|
|
'WORD' => 13, |
102
|
|
|
|
|
|
|
'error' => 15, |
103
|
|
|
|
|
|
|
'OPEN_TAG' => 10 |
104
|
|
|
|
|
|
|
}, |
105
|
|
|
|
|
|
|
GOTOS => { |
106
|
|
|
|
|
|
|
'OPEN' => 12, |
107
|
|
|
|
|
|
|
'CANDIDATE' => 14, |
108
|
|
|
|
|
|
|
'parsing' => 16 |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
}, |
111
|
|
|
|
|
|
|
{#State 9 |
112
|
|
|
|
|
|
|
ACTIONS => { |
113
|
|
|
|
|
|
|
"\nOPEN: " => 17 |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
}, |
116
|
|
|
|
|
|
|
{#State 10 |
117
|
|
|
|
|
|
|
DEFAULT => -21 |
118
|
|
|
|
|
|
|
}, |
119
|
|
|
|
|
|
|
{#State 11 |
120
|
|
|
|
|
|
|
DEFAULT => -7 |
121
|
|
|
|
|
|
|
}, |
122
|
|
|
|
|
|
|
{#State 12 |
123
|
|
|
|
|
|
|
ACTIONS => { |
124
|
|
|
|
|
|
|
'END_TAG' => -14, |
125
|
|
|
|
|
|
|
'WORD' => 13, |
126
|
|
|
|
|
|
|
'error' => 15, |
127
|
|
|
|
|
|
|
'OPEN_TAG' => 10 |
128
|
|
|
|
|
|
|
}, |
129
|
|
|
|
|
|
|
GOTOS => { |
130
|
|
|
|
|
|
|
'OPEN' => 12, |
131
|
|
|
|
|
|
|
'CANDIDATE' => 14, |
132
|
|
|
|
|
|
|
'parsing' => 18 |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
}, |
135
|
|
|
|
|
|
|
{#State 13 |
136
|
|
|
|
|
|
|
ACTIONS => { |
137
|
|
|
|
|
|
|
'C_STATUS' => 19 |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
}, |
140
|
|
|
|
|
|
|
{#State 14 |
141
|
|
|
|
|
|
|
ACTIONS => { |
142
|
|
|
|
|
|
|
'WORD' => 20, |
143
|
|
|
|
|
|
|
'error' => 23, |
144
|
|
|
|
|
|
|
'OPEN_TAG' => 10 |
145
|
|
|
|
|
|
|
}, |
146
|
|
|
|
|
|
|
GOTOS => { |
147
|
|
|
|
|
|
|
'OPEN' => 12, |
148
|
|
|
|
|
|
|
'PREP' => 21, |
149
|
|
|
|
|
|
|
'CANDIDATE' => 22, |
150
|
|
|
|
|
|
|
'DET' => 24 |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
}, |
153
|
|
|
|
|
|
|
{#State 15 |
154
|
|
|
|
|
|
|
ACTIONS => { |
155
|
|
|
|
|
|
|
"\nPARSING: " => 26, |
156
|
|
|
|
|
|
|
"\nCANDIDATE: " => 25, |
157
|
|
|
|
|
|
|
"\nOPEN: " => 17 |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
}, |
160
|
|
|
|
|
|
|
{#State 16 |
161
|
|
|
|
|
|
|
ACTIONS => { |
162
|
|
|
|
|
|
|
'END_TAG' => 27, |
163
|
|
|
|
|
|
|
'error' => 28 |
164
|
|
|
|
|
|
|
}, |
165
|
|
|
|
|
|
|
GOTOS => { |
166
|
|
|
|
|
|
|
'END' => 29 |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
}, |
169
|
|
|
|
|
|
|
{#State 17 |
170
|
|
|
|
|
|
|
DEFAULT => -22 |
171
|
|
|
|
|
|
|
}, |
172
|
|
|
|
|
|
|
{#State 18 |
173
|
|
|
|
|
|
|
ACTIONS => { |
174
|
|
|
|
|
|
|
'END_TAG' => 30, |
175
|
|
|
|
|
|
|
'error' => 31 |
176
|
|
|
|
|
|
|
}, |
177
|
|
|
|
|
|
|
GOTOS => { |
178
|
|
|
|
|
|
|
'CLOSE' => 32 |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
}, |
181
|
|
|
|
|
|
|
{#State 19 |
182
|
|
|
|
|
|
|
DEFAULT => -18 |
183
|
|
|
|
|
|
|
}, |
184
|
|
|
|
|
|
|
{#State 20 |
185
|
|
|
|
|
|
|
ACTIONS => { |
186
|
|
|
|
|
|
|
'P_STATUS' => 33, |
187
|
|
|
|
|
|
|
'C_STATUS' => 19, |
188
|
|
|
|
|
|
|
'D_STATUS' => 34 |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
}, |
191
|
|
|
|
|
|
|
{#State 21 |
192
|
|
|
|
|
|
|
ACTIONS => { |
193
|
|
|
|
|
|
|
'WORD' => 20, |
194
|
|
|
|
|
|
|
'error' => 23, |
195
|
|
|
|
|
|
|
'OPEN_TAG' => 10 |
196
|
|
|
|
|
|
|
}, |
197
|
|
|
|
|
|
|
GOTOS => { |
198
|
|
|
|
|
|
|
'OPEN' => 12, |
199
|
|
|
|
|
|
|
'PREP' => 35, |
200
|
|
|
|
|
|
|
'CANDIDATE' => 36, |
201
|
|
|
|
|
|
|
'DET' => 37 |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
}, |
204
|
|
|
|
|
|
|
{#State 22 |
205
|
|
|
|
|
|
|
DEFAULT => -13 |
206
|
|
|
|
|
|
|
}, |
207
|
|
|
|
|
|
|
{#State 23 |
208
|
|
|
|
|
|
|
ACTIONS => { |
209
|
|
|
|
|
|
|
"\nCANDIDATE: " => 25, |
210
|
|
|
|
|
|
|
"\nOPEN: " => 17 |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
}, |
213
|
|
|
|
|
|
|
{#State 24 |
214
|
|
|
|
|
|
|
ACTIONS => { |
215
|
|
|
|
|
|
|
'WORD' => 13, |
216
|
|
|
|
|
|
|
'error' => 23, |
217
|
|
|
|
|
|
|
'OPEN_TAG' => 10 |
218
|
|
|
|
|
|
|
}, |
219
|
|
|
|
|
|
|
GOTOS => { |
220
|
|
|
|
|
|
|
'OPEN' => 12, |
221
|
|
|
|
|
|
|
'CANDIDATE' => 38 |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
}, |
224
|
|
|
|
|
|
|
{#State 25 |
225
|
|
|
|
|
|
|
DEFAULT => -20 |
226
|
|
|
|
|
|
|
}, |
227
|
|
|
|
|
|
|
{#State 26 |
228
|
|
|
|
|
|
|
DEFAULT => -15 |
229
|
|
|
|
|
|
|
}, |
230
|
|
|
|
|
|
|
{#State 27 |
231
|
|
|
|
|
|
|
DEFAULT => -23 |
232
|
|
|
|
|
|
|
}, |
233
|
|
|
|
|
|
|
{#State 28 |
234
|
|
|
|
|
|
|
ACTIONS => { |
235
|
|
|
|
|
|
|
"\nEND: " => 39 |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
}, |
238
|
|
|
|
|
|
|
{#State 29 |
239
|
|
|
|
|
|
|
ACTIONS => { |
240
|
|
|
|
|
|
|
'INFOS' => 41 |
241
|
|
|
|
|
|
|
}, |
242
|
|
|
|
|
|
|
GOTOS => { |
243
|
|
|
|
|
|
|
'infos' => 40 |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
}, |
246
|
|
|
|
|
|
|
{#State 30 |
247
|
|
|
|
|
|
|
ACTIONS => { |
248
|
|
|
|
|
|
|
'C_STATUS' => 42 |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
}, |
251
|
|
|
|
|
|
|
{#State 31 |
252
|
|
|
|
|
|
|
ACTIONS => { |
253
|
|
|
|
|
|
|
"\nCLOSE: " => 43 |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
}, |
256
|
|
|
|
|
|
|
{#State 32 |
257
|
|
|
|
|
|
|
DEFAULT => -19 |
258
|
|
|
|
|
|
|
}, |
259
|
|
|
|
|
|
|
{#State 33 |
260
|
|
|
|
|
|
|
DEFAULT => -16 |
261
|
|
|
|
|
|
|
}, |
262
|
|
|
|
|
|
|
{#State 34 |
263
|
|
|
|
|
|
|
DEFAULT => -17 |
264
|
|
|
|
|
|
|
}, |
265
|
|
|
|
|
|
|
{#State 35 |
266
|
|
|
|
|
|
|
ACTIONS => { |
267
|
|
|
|
|
|
|
'WORD' => 13, |
268
|
|
|
|
|
|
|
'error' => 23, |
269
|
|
|
|
|
|
|
'OPEN_TAG' => 10 |
270
|
|
|
|
|
|
|
}, |
271
|
|
|
|
|
|
|
GOTOS => { |
272
|
|
|
|
|
|
|
'OPEN' => 12, |
273
|
|
|
|
|
|
|
'CANDIDATE' => 44 |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
}, |
276
|
|
|
|
|
|
|
{#State 36 |
277
|
|
|
|
|
|
|
DEFAULT => -10 |
278
|
|
|
|
|
|
|
}, |
279
|
|
|
|
|
|
|
{#State 37 |
280
|
|
|
|
|
|
|
ACTIONS => { |
281
|
|
|
|
|
|
|
'WORD' => 13, |
282
|
|
|
|
|
|
|
'error' => 23, |
283
|
|
|
|
|
|
|
'OPEN_TAG' => 10 |
284
|
|
|
|
|
|
|
}, |
285
|
|
|
|
|
|
|
GOTOS => { |
286
|
|
|
|
|
|
|
'OPEN' => 12, |
287
|
|
|
|
|
|
|
'CANDIDATE' => 45 |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
}, |
290
|
|
|
|
|
|
|
{#State 38 |
291
|
|
|
|
|
|
|
DEFAULT => -12 |
292
|
|
|
|
|
|
|
}, |
293
|
|
|
|
|
|
|
{#State 39 |
294
|
|
|
|
|
|
|
DEFAULT => -24 |
295
|
|
|
|
|
|
|
}, |
296
|
|
|
|
|
|
|
{#State 40 |
297
|
|
|
|
|
|
|
DEFAULT => -6 |
298
|
|
|
|
|
|
|
}, |
299
|
|
|
|
|
|
|
{#State 41 |
300
|
|
|
|
|
|
|
DEFAULT => -8 |
301
|
|
|
|
|
|
|
}, |
302
|
|
|
|
|
|
|
{#State 42 |
303
|
|
|
|
|
|
|
DEFAULT => -25 |
304
|
|
|
|
|
|
|
}, |
305
|
|
|
|
|
|
|
{#State 43 |
306
|
|
|
|
|
|
|
DEFAULT => -26 |
307
|
|
|
|
|
|
|
}, |
308
|
|
|
|
|
|
|
{#State 44 |
309
|
|
|
|
|
|
|
DEFAULT => -11 |
310
|
|
|
|
|
|
|
}, |
311
|
|
|
|
|
|
|
{#State 45 |
312
|
|
|
|
|
|
|
DEFAULT => -9 |
313
|
|
|
|
|
|
|
} |
314
|
|
|
|
|
|
|
], |
315
|
|
|
|
|
|
|
yyrules => |
316
|
|
|
|
|
|
|
[ |
317
|
|
|
|
|
|
|
[#Rule 0 |
318
|
|
|
|
|
|
|
'$start', 2, undef |
319
|
|
|
|
|
|
|
], |
320
|
|
|
|
|
|
|
[#Rule 1 |
321
|
|
|
|
|
|
|
'input', 0, undef |
322
|
|
|
|
|
|
|
], |
323
|
|
|
|
|
|
|
[#Rule 2 |
324
|
|
|
|
|
|
|
'input', 2, |
325
|
|
|
|
|
|
|
sub |
326
|
|
|
|
|
|
|
#line 40 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
327
|
|
|
|
|
|
|
{ #print STDERR "\n INPUT \n"; |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
], |
330
|
|
|
|
|
|
|
[#Rule 3 |
331
|
|
|
|
|
|
|
'line', 1, |
332
|
|
|
|
|
|
|
sub |
333
|
|
|
|
|
|
|
#line 44 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
334
|
|
|
|
|
|
|
{ # print "VIDE: " . $_[1] |
335
|
|
|
|
|
|
|
$num_line++; |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
], |
338
|
|
|
|
|
|
|
[#Rule 4 |
339
|
|
|
|
|
|
|
'line', 1, |
340
|
|
|
|
|
|
|
sub |
341
|
|
|
|
|
|
|
#line 47 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
342
|
|
|
|
|
|
|
{ |
343
|
|
|
|
|
|
|
$num_line++; |
344
|
|
|
|
|
|
|
@lex_items = (); |
345
|
|
|
|
|
|
|
@words= (); |
346
|
|
|
|
|
|
|
my $testified; |
347
|
|
|
|
|
|
|
# print STDERR "=>$_[1]\n"; |
348
|
|
|
|
|
|
|
my $testified_infos; |
349
|
|
|
|
|
|
|
if($_[0]->YYData->{TTS}->getTestifiedInfos(\$testified_infos,\@IF,\@POS,\@LF,$src,\@lex_items,$_[0]->YYData->{MATCH},$_[0]->YYData->{FILTERING_LEXICON},$_[0]->YYData->{TAGSET}) == 1) { |
350
|
|
|
|
|
|
|
if(scalar @lex_items > 1) { |
351
|
|
|
|
|
|
|
$testified = Lingua::YaTeA::MultiWordTestifiedTerm->new($testified_infos->{"NUM_CONTENT_WORDS"},\@lex_items,$_[0]->YYData->{TAGSET},$src,$_[0]->YYData->{MATCH}); |
352
|
|
|
|
|
|
|
} |
353
|
|
|
|
|
|
|
} |
354
|
|
|
|
|
|
|
if ((blessed($testified)) && ($testified->isa('Lingua::YaTeA::TestifiedTerm'))) { |
355
|
|
|
|
|
|
|
#print STDERR "ajout tt: " . $testified->getIF . "\n"; |
356
|
|
|
|
|
|
|
$_[0]->YYData->{TTS}->addTestified($testified); |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
if ((blessed($testified)) && ($testified->isa('Lingua::YaTeA::MultiWordTestifiedTerm'))) { |
359
|
|
|
|
|
|
|
$tree = Lingua::YaTeA::Tree->new; |
360
|
|
|
|
|
|
|
$tree->setNodeSet($node_set); |
361
|
|
|
|
|
|
|
# $tree->print($testified_infos->{"WORDS"}); |
362
|
|
|
|
|
|
|
$tree->setIndexSet($testified->getIndexSet); |
363
|
|
|
|
|
|
|
$tree->setHead; |
364
|
|
|
|
|
|
|
$testified->addTree($tree); |
365
|
|
|
|
|
|
|
$testified->setParsingMethod("USER"); |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
# print "fin creation :" . $testified->getIF . "\n"; |
369
|
|
|
|
|
|
|
$level = 0; |
370
|
|
|
|
|
|
|
$index = 0; |
371
|
|
|
|
|
|
|
} |
372
|
|
|
|
|
|
|
], |
373
|
|
|
|
|
|
|
[#Rule 5 |
374
|
|
|
|
|
|
|
'@1-0', 0, |
375
|
|
|
|
|
|
|
sub |
376
|
|
|
|
|
|
|
#line 80 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
377
|
|
|
|
|
|
|
{ |
378
|
|
|
|
|
|
|
$node_set = Lingua::YaTeA::NodeSet->new; |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
], |
381
|
|
|
|
|
|
|
[#Rule 6 |
382
|
|
|
|
|
|
|
'testified', 5, |
383
|
|
|
|
|
|
|
sub |
384
|
|
|
|
|
|
|
#line 83 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
385
|
|
|
|
|
|
|
{ #print "trouve testified2 $_[1]\n"; |
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
], |
388
|
|
|
|
|
|
|
[#Rule 7 |
389
|
|
|
|
|
|
|
'testified', 2, |
390
|
|
|
|
|
|
|
sub |
391
|
|
|
|
|
|
|
#line 85 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
392
|
|
|
|
|
|
|
{ $_[0]->YYErrok } |
393
|
|
|
|
|
|
|
], |
394
|
|
|
|
|
|
|
[#Rule 8 |
395
|
|
|
|
|
|
|
'infos', 1, |
396
|
|
|
|
|
|
|
sub |
397
|
|
|
|
|
|
|
#line 88 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
398
|
|
|
|
|
|
|
{ |
399
|
|
|
|
|
|
|
# print "infos $_[1]\n"; |
400
|
|
|
|
|
|
|
@infos = split /\t/, $_[1]; |
401
|
|
|
|
|
|
|
@IF = split / /, $infos[0]; |
402
|
|
|
|
|
|
|
@POS = split / /, $infos[1]; |
403
|
|
|
|
|
|
|
@LF = split / /, $infos[2]; |
404
|
|
|
|
|
|
|
$src = $infos[3]; |
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
], |
407
|
|
|
|
|
|
|
[#Rule 9 |
408
|
|
|
|
|
|
|
'parsing', 4, undef |
409
|
|
|
|
|
|
|
], |
410
|
|
|
|
|
|
|
[#Rule 10 |
411
|
|
|
|
|
|
|
'parsing', 3, undef |
412
|
|
|
|
|
|
|
], |
413
|
|
|
|
|
|
|
[#Rule 11 |
414
|
|
|
|
|
|
|
'parsing', 4, undef |
415
|
|
|
|
|
|
|
], |
416
|
|
|
|
|
|
|
[#Rule 12 |
417
|
|
|
|
|
|
|
'parsing', 3, undef |
418
|
|
|
|
|
|
|
], |
419
|
|
|
|
|
|
|
[#Rule 13 |
420
|
|
|
|
|
|
|
'parsing', 2, |
421
|
|
|
|
|
|
|
sub |
422
|
|
|
|
|
|
|
#line 104 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
423
|
|
|
|
|
|
|
{ |
424
|
|
|
|
|
|
|
# print STDERR "PARSING $_[1]\n" |
425
|
|
|
|
|
|
|
} |
426
|
|
|
|
|
|
|
], |
427
|
|
|
|
|
|
|
[#Rule 14 |
428
|
|
|
|
|
|
|
'parsing', 0, undef |
429
|
|
|
|
|
|
|
], |
430
|
|
|
|
|
|
|
[#Rule 15 |
431
|
|
|
|
|
|
|
'parsing', 2, |
432
|
|
|
|
|
|
|
sub |
433
|
|
|
|
|
|
|
#line 108 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
434
|
|
|
|
|
|
|
{ $_[0]->YYErrok } |
435
|
|
|
|
|
|
|
], |
436
|
|
|
|
|
|
|
[#Rule 16 |
437
|
|
|
|
|
|
|
'PREP', 2, |
438
|
|
|
|
|
|
|
sub |
439
|
|
|
|
|
|
|
#line 112 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
440
|
|
|
|
|
|
|
{ |
441
|
|
|
|
|
|
|
# print STDERR "PREP $_[1] $_[2]\n"; |
442
|
|
|
|
|
|
|
$node->{"PREP"} = Lingua::YaTeA::TermLeaf->new($index); |
443
|
|
|
|
|
|
|
$index++; |
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
], |
446
|
|
|
|
|
|
|
[#Rule 17 |
447
|
|
|
|
|
|
|
'DET', 2, |
448
|
|
|
|
|
|
|
sub |
449
|
|
|
|
|
|
|
#line 118 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
450
|
|
|
|
|
|
|
{ |
451
|
|
|
|
|
|
|
# print STDERR "DET $_[1] $_[2]\n"; |
452
|
|
|
|
|
|
|
$node->{"DET"} = Lingua::YaTeA::TermLeaf->new($index); |
453
|
|
|
|
|
|
|
$index++; |
454
|
|
|
|
|
|
|
} |
455
|
|
|
|
|
|
|
], |
456
|
|
|
|
|
|
|
[#Rule 18 |
457
|
|
|
|
|
|
|
'CANDIDATE', 2, |
458
|
|
|
|
|
|
|
sub |
459
|
|
|
|
|
|
|
#line 124 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
460
|
|
|
|
|
|
|
{ |
461
|
|
|
|
|
|
|
# print STDERR "CANDIDATE1 $_[1] $_[2]\n"; |
462
|
|
|
|
|
|
|
$edge = Lingua::YaTeA::TermLeaf->new($index); |
463
|
|
|
|
|
|
|
$node->addEdge($edge,$_[2]); |
464
|
|
|
|
|
|
|
# print "ajout du edge :" ; |
465
|
|
|
|
|
|
|
# print Dumper($edge) . "\n"; |
466
|
|
|
|
|
|
|
$index++; |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
], |
469
|
|
|
|
|
|
|
[#Rule 19 |
470
|
|
|
|
|
|
|
'CANDIDATE', 3, |
471
|
|
|
|
|
|
|
sub |
472
|
|
|
|
|
|
|
#line 132 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
473
|
|
|
|
|
|
|
{ |
474
|
|
|
|
|
|
|
#print STDERR "CANDIDATE2 $_[1]\n"; |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
], |
477
|
|
|
|
|
|
|
[#Rule 20 |
478
|
|
|
|
|
|
|
'CANDIDATE', 2, |
479
|
|
|
|
|
|
|
sub |
480
|
|
|
|
|
|
|
#line 135 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
481
|
|
|
|
|
|
|
{ $_[0]->YYErrok } |
482
|
|
|
|
|
|
|
], |
483
|
|
|
|
|
|
|
[#Rule 21 |
484
|
|
|
|
|
|
|
'OPEN', 1, |
485
|
|
|
|
|
|
|
sub |
486
|
|
|
|
|
|
|
#line 138 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
487
|
|
|
|
|
|
|
{ |
488
|
|
|
|
|
|
|
# print STDERR "OPEN $_[1]\n"; |
489
|
|
|
|
|
|
|
if ($level == 0) |
490
|
|
|
|
|
|
|
{ |
491
|
|
|
|
|
|
|
$node = Lingua::YaTeA::RootNode->new($level); |
492
|
|
|
|
|
|
|
} |
493
|
|
|
|
|
|
|
else |
494
|
|
|
|
|
|
|
{ |
495
|
|
|
|
|
|
|
$node = Lingua::YaTeA::InternalNode->new($level); |
496
|
|
|
|
|
|
|
} |
497
|
|
|
|
|
|
|
$node_set->addNode($node); |
498
|
|
|
|
|
|
|
push @uncomplete, $node; |
499
|
|
|
|
|
|
|
$level++; |
500
|
|
|
|
|
|
|
} |
501
|
|
|
|
|
|
|
], |
502
|
|
|
|
|
|
|
[#Rule 22 |
503
|
|
|
|
|
|
|
'OPEN', 2, |
504
|
|
|
|
|
|
|
sub |
505
|
|
|
|
|
|
|
#line 152 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
506
|
|
|
|
|
|
|
{ $_[0]->YYErrok } |
507
|
|
|
|
|
|
|
], |
508
|
|
|
|
|
|
|
[#Rule 23 |
509
|
|
|
|
|
|
|
'END', 1, |
510
|
|
|
|
|
|
|
sub |
511
|
|
|
|
|
|
|
#line 156 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
512
|
|
|
|
|
|
|
{ |
513
|
|
|
|
|
|
|
# print STDERR "END $_[1]\n"; |
514
|
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
|
], |
516
|
|
|
|
|
|
|
[#Rule 24 |
517
|
|
|
|
|
|
|
'END', 2, |
518
|
|
|
|
|
|
|
sub |
519
|
|
|
|
|
|
|
#line 159 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
520
|
|
|
|
|
|
|
{ $_[0]->YYErrok } |
521
|
|
|
|
|
|
|
], |
522
|
|
|
|
|
|
|
[#Rule 25 |
523
|
|
|
|
|
|
|
'CLOSE', 2, |
524
|
|
|
|
|
|
|
sub |
525
|
|
|
|
|
|
|
#line 162 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
526
|
|
|
|
|
|
|
{ |
527
|
|
|
|
|
|
|
# print STDERR "CLOSE_TAG $_[1] $_[2]\n"; |
528
|
|
|
|
|
|
|
pop @uncomplete; |
529
|
|
|
|
|
|
|
$node->linkToFather(\@uncomplete,$_[2]); |
530
|
|
|
|
|
|
|
$node = $uncomplete[$#uncomplete]; |
531
|
|
|
|
|
|
|
$level--; |
532
|
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
|
], |
534
|
|
|
|
|
|
|
[#Rule 26 |
535
|
|
|
|
|
|
|
'CLOSE', 2, |
536
|
|
|
|
|
|
|
sub |
537
|
|
|
|
|
|
|
#line 169 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
538
|
|
|
|
|
|
|
{ $_[0]->YYErrok } |
539
|
|
|
|
|
|
|
] |
540
|
|
|
|
|
|
|
], |
541
|
|
|
|
|
|
|
@_); |
542
|
|
|
|
|
|
|
bless($self,$class); |
543
|
|
|
|
|
|
|
} |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
#line 173 "lib/Lingua/YaTeA/TestifiedTermParser.yp" |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
sub _Error { |
549
|
|
|
|
|
|
|
exists $_[0]->YYData->{ERRMSG} |
550
|
|
|
|
|
|
|
and do { |
551
|
|
|
|
|
|
|
print $_[0]->YYData->{ERRMSG}; |
552
|
|
|
|
|
|
|
delete $_[0]->YYData->{ERRMSG}; |
553
|
|
|
|
|
|
|
return; |
554
|
|
|
|
|
|
|
}; |
555
|
|
|
|
|
|
|
print "EXPECT: "; |
556
|
|
|
|
|
|
|
print $_[0]->YYExpect . "\n"; |
557
|
|
|
|
|
|
|
print "CURTOK: "; |
558
|
|
|
|
|
|
|
print "-" . $_[0]->YYCurtok . "-\n"; |
559
|
|
|
|
|
|
|
print "CURVAL: "; |
560
|
|
|
|
|
|
|
print $_[0]->YYCurval . "\n"; |
561
|
|
|
|
|
|
|
print "Lexer: "; |
562
|
|
|
|
|
|
|
print Dumper($_[0]->YYLexer) . "\n"; |
563
|
|
|
|
|
|
|
print "Syntax error.\n"; |
564
|
|
|
|
|
|
|
} |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
sub _Lexer { |
567
|
|
|
|
|
|
|
my($parser)=shift; |
568
|
|
|
|
|
|
|
my $fh = $parser->YYData->{FH}; |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
my $open = '(\()'; |
572
|
|
|
|
|
|
|
my $word = $parser->YYData->{WORD};; |
573
|
|
|
|
|
|
|
my $close = '(\)<=[MH]>)'; |
574
|
|
|
|
|
|
|
my $end = '(\))'; |
575
|
|
|
|
|
|
|
my $d_status = '<=(D)>'; |
576
|
|
|
|
|
|
|
my $p_status = '<=(P)>'; |
577
|
|
|
|
|
|
|
my $c_status = '<=([MH])>'; |
578
|
|
|
|
|
|
|
my $infos = '\t(.+)'; |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
$parser->YYData->{INPUT} |
582
|
|
|
|
|
|
|
or $parser->YYData->{INPUT} = <$fh> |
583
|
|
|
|
|
|
|
or return('',undef); |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
$parser->YYData->{INPUT}=~s/^[ \t]*#.*//; |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
for ($parser->YYData->{INPUT}) { |
589
|
|
|
|
|
|
|
#print "TEST-" .$parser->YYData->{INPUT}. "-\n"; |
590
|
|
|
|
|
|
|
s/^$open\s*// and return ('OPEN_TAG', $1); |
591
|
|
|
|
|
|
|
s/^$end// and return('END_TAG', $1); |
592
|
|
|
|
|
|
|
s/^$word\s*// and return ('WORD', $1); |
593
|
|
|
|
|
|
|
s/^$c_status\s*// and return ('C_STATUS', $1); |
594
|
|
|
|
|
|
|
s/^$d_status\s*// and return ('D_STATUS', $1); |
595
|
|
|
|
|
|
|
s/^$p_status\s*// and return ('P_STATUS', $1); |
596
|
|
|
|
|
|
|
s/^$close\s*// and return('CLOSE_TAG', $1); |
597
|
|
|
|
|
|
|
s/^$infos\s*// and return('INFOS', $1, $2); |
598
|
|
|
|
|
|
|
s/^.+//s and return($1,$1); |
599
|
|
|
|
|
|
|
} |
600
|
|
|
|
|
|
|
} |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
=head1 NAME |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
Lingua::YaTeA::TestifiedTermParser - Perl extension for the parser of testified term file (based on Parse::Yapp) |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
=head1 SYNOPSIS |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
use Lingua::YaTeA::TestifiedTermParser; |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
my $fh = FileHandle->new("<$file_path"); |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
my $parser = Lingua::YaTeA::TestifiedTermParser->new(); |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
$parser->YYData->{TTS} = $this; |
615
|
|
|
|
|
|
|
$parser->YYData->{WORD} = $word_characters_regexp; |
616
|
|
|
|
|
|
|
$parser->YYData->{TAGSET} = $tag_set; |
617
|
|
|
|
|
|
|
$parser->YYData->{MATCH} = $match_type; |
618
|
|
|
|
|
|
|
$parser->YYData->{FH} = $fh; |
619
|
|
|
|
|
|
|
$parser->YYData->{FILTERING_LEXICON} = $filtering_lexicon_h; |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
$parser->YYParse(yylex => \&Lingua::YaTeA::ParsingPatternParser::_Lexer, yyerror => \&Lingua::YaTeA::ParsingPatternParser::_Error); |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
=head1 DESCRIPTION |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
The module implements a parser for analysing testified term file. |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
The parser takes into account several information: the word character |
629
|
|
|
|
|
|
|
list (field C) i.e. all the possible characters in a word, the |
630
|
|
|
|
|
|
|
Part-of-Speech tagset (field C), the type of matching (field |
631
|
|
|
|
|
|
|
C), the file handler to read (field C), and the lexicon of |
632
|
|
|
|
|
|
|
the corpus (field C). |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
=head1 METHODS |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
=head2 _Error() |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
_Error($error_objet); |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
The method is used to manage the parsing error and prints a message |
641
|
|
|
|
|
|
|
explaining the error. |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
=head2 _Lexer() |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
_Lexer($parser_info); |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
The method applies the parser on the data contains in the structure |
648
|
|
|
|
|
|
|
C<$parser_info> (field C). |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
=head1 SEE ALSO |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
Sophie Aubin and Thierry Hamon. Improving Term Extraction with |
653
|
|
|
|
|
|
|
Terminological Resources. In Advances in Natural Language Processing |
654
|
|
|
|
|
|
|
(5th International Conference on NLP, FinTAL 2006). pages |
655
|
|
|
|
|
|
|
380-387. Tapio Salakoski, Filip Ginter, Sampo Pyysalo, Tapio Pahikkala |
656
|
|
|
|
|
|
|
(Eds). August 2006. LNAI 4139. |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
=head1 AUTHOR |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
Thierry Hamon and Sophie Aubin |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
Copyright (C) 2005 by Thierry Hamon and Sophie Aubin |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
668
|
|
|
|
|
|
|
it under the same terms as Perl itself, either Perl version 5.8.6 or, |
669
|
|
|
|
|
|
|
at your option, any later version of Perl 5 you may have available. |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
=cut |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
1; |