line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Treex::Core::Scenario; |
2
|
|
|
|
|
|
|
$Treex::Core::Scenario::VERSION = '2.20210102'; |
3
|
12
|
|
|
12
|
|
347398
|
use Moose; |
|
12
|
|
|
|
|
1399128
|
|
|
12
|
|
|
|
|
117
|
|
4
|
12
|
|
|
12
|
|
84048
|
use Treex::Core::Common; |
|
12
|
|
|
|
|
48
|
|
|
12
|
|
|
|
|
135
|
|
5
|
12
|
|
|
12
|
|
67836
|
use File::Basename; |
|
12
|
|
|
|
|
35
|
|
|
12
|
|
|
|
|
930
|
|
6
|
12
|
|
|
12
|
|
92
|
use File::Slurp; |
|
12
|
|
|
|
|
30
|
|
|
12
|
|
|
|
|
751
|
|
7
|
12
|
|
|
12
|
|
6205
|
use File::chdir; |
|
12
|
|
|
|
|
19640
|
|
|
12
|
|
|
|
|
1437
|
|
8
|
12
|
|
|
12
|
|
107
|
use Digest::MD5 qw(md5_hex); |
|
12
|
|
|
|
|
33
|
|
|
12
|
|
|
|
|
45185
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
#use Parse::RecDescent 1.967003; now using standalone version |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
has from_file => ( |
13
|
|
|
|
|
|
|
is => 'ro', |
14
|
|
|
|
|
|
|
isa => 'Str', |
15
|
|
|
|
|
|
|
predicate => '_has_from_file', |
16
|
|
|
|
|
|
|
documentation => q(Path to file with scenario), |
17
|
|
|
|
|
|
|
); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
has from_string => ( |
20
|
|
|
|
|
|
|
is => 'ro', |
21
|
|
|
|
|
|
|
isa => 'Str', |
22
|
|
|
|
|
|
|
predicate => '_has_from_string', |
23
|
|
|
|
|
|
|
documentation => q(String with scenario), |
24
|
|
|
|
|
|
|
); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
has scenario_string => ( |
27
|
|
|
|
|
|
|
is => 'ro', |
28
|
|
|
|
|
|
|
isa => 'Str', |
29
|
|
|
|
|
|
|
builder => '_build_scenario_string', |
30
|
|
|
|
|
|
|
lazy => 1, |
31
|
|
|
|
|
|
|
); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
has block_items => ( |
34
|
|
|
|
|
|
|
is => 'ro', |
35
|
|
|
|
|
|
|
isa => 'ArrayRef[HashRef]', |
36
|
|
|
|
|
|
|
builder => 'parse_scenario_string', |
37
|
|
|
|
|
|
|
init_arg => undef, |
38
|
|
|
|
|
|
|
lazy => 1, |
39
|
|
|
|
|
|
|
); |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
has loaded_blocks => ( |
42
|
|
|
|
|
|
|
is => 'ro', |
43
|
|
|
|
|
|
|
isa => 'ArrayRef[Treex::Core::Block]', |
44
|
|
|
|
|
|
|
builder => '_build_loaded_blocks', |
45
|
|
|
|
|
|
|
predicate => 'is_initialized', |
46
|
|
|
|
|
|
|
lazy => 1, |
47
|
|
|
|
|
|
|
init_arg => undef, |
48
|
|
|
|
|
|
|
); |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
has document_reader => ( |
51
|
|
|
|
|
|
|
is => 'rw', |
52
|
|
|
|
|
|
|
does => 'Treex::Core::DocumentReader', |
53
|
|
|
|
|
|
|
predicate => '_has_document_reader', |
54
|
|
|
|
|
|
|
writer => '_set_document_reader', |
55
|
|
|
|
|
|
|
init_arg => undef, |
56
|
|
|
|
|
|
|
documentation => 'DocumentReader starts every scenario and reads a stream of documents.' |
57
|
|
|
|
|
|
|
); |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
has writers => ( |
60
|
|
|
|
|
|
|
is => 'rw', |
61
|
|
|
|
|
|
|
does => 'ArrayRef[Treex::Block::Write::BaseWriter]', |
62
|
|
|
|
|
|
|
default => sub { [] } |
63
|
|
|
|
|
|
|
); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
has _global_params => ( |
66
|
|
|
|
|
|
|
is => 'ro', |
67
|
|
|
|
|
|
|
isa => 'HashRef[Str]', |
68
|
|
|
|
|
|
|
traits => ['Hash'], |
69
|
|
|
|
|
|
|
default => sub { {} }, |
70
|
|
|
|
|
|
|
handles => { |
71
|
|
|
|
|
|
|
get_global_param => 'get', |
72
|
|
|
|
|
|
|
set_global_param => 'set', |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
#get_global_param_names => 'keys', |
75
|
|
|
|
|
|
|
#set_verbose => [ set => 'verbose' ], |
76
|
|
|
|
|
|
|
#get_verbose => [ get => 'verbose' ], |
77
|
|
|
|
|
|
|
#set_language => [ set => 'language' ], |
78
|
|
|
|
|
|
|
#get_language => [ get => 'language' ], |
79
|
|
|
|
|
|
|
#... ? |
80
|
|
|
|
|
|
|
}, |
81
|
|
|
|
|
|
|
); |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
has parser => ( |
84
|
|
|
|
|
|
|
is => 'ro', |
85
|
|
|
|
|
|
|
isa => 'Parse::RecDescent::_Runtime', |
86
|
|
|
|
|
|
|
init_arg => undef, |
87
|
|
|
|
|
|
|
builder => '_build_parser', |
88
|
|
|
|
|
|
|
documentation => q{Parses treex scenarios} |
89
|
|
|
|
|
|
|
); |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
has runner => ( |
92
|
|
|
|
|
|
|
is => 'ro', |
93
|
|
|
|
|
|
|
isa => 'Treex::Core::Run', |
94
|
|
|
|
|
|
|
writer => '_set_runner', |
95
|
|
|
|
|
|
|
weak_ref => 1, |
96
|
|
|
|
|
|
|
documentation => 'Treex::Core::Run instance in which the scenario is running', |
97
|
|
|
|
|
|
|
); |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
has cache => ( |
100
|
|
|
|
|
|
|
is => 'rw', |
101
|
|
|
|
|
|
|
isa => 'Maybe[Cache::Memcached]', |
102
|
|
|
|
|
|
|
builder => '_build_cache', |
103
|
|
|
|
|
|
|
); |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
sub _build_scenario_string { |
106
|
15
|
|
|
15
|
|
319
|
my $self = shift; |
107
|
14
|
100
|
|
|
|
480
|
if ( $self->_has_from_file ) { |
|
|
50
|
|
|
|
|
|
108
|
10
|
|
|
|
|
289
|
return $self->_load_scenario_file( $self->from_file ); |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
elsif ( $self->_has_from_string ) { |
111
|
5
|
|
|
|
|
135
|
return $self->from_string; |
112
|
|
|
|
|
|
|
} |
113
|
1
|
|
|
|
|
3
|
log_fatal("You have to provide from_file or from_string attribute"); |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
my %sequence = (); |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
sub _build_loaded_blocks { |
119
|
12
|
|
|
12
|
|
58
|
my $self = shift; |
120
|
12
|
|
|
|
|
34
|
my @block_items = @{ $self->block_items }; |
|
12
|
|
|
|
|
365
|
|
121
|
10
|
|
|
|
|
45
|
my $block_count = scalar @block_items; |
122
|
10
|
|
|
|
|
31
|
my $i = 0; |
123
|
10
|
|
|
|
|
25
|
my @loaded_blocks; |
124
|
|
|
|
|
|
|
|
125
|
10
|
|
|
|
|
39
|
my $sequence_from = 0; |
126
|
10
|
|
|
|
|
32
|
my $sequence_hash = ""; |
127
|
10
|
|
|
|
|
27
|
foreach my $block_item (@block_items) { |
128
|
21
|
|
|
|
|
63
|
$i++; |
129
|
21
|
|
|
|
|
55
|
my $params = ''; |
130
|
21
|
50
|
|
|
|
76
|
if ( $block_item->{block_parameters} ) { |
131
|
21
|
|
|
|
|
73
|
$params = join ' ', @{ $block_item->{block_parameters} }; |
|
21
|
|
|
|
|
83
|
|
132
|
|
|
|
|
|
|
} |
133
|
21
|
|
|
|
|
185
|
log_info("Loading block $block_item->{block_name} $params ($i/$block_count)"); |
134
|
21
|
|
|
|
|
129
|
my $new_block = $self->_load_block($block_item); |
135
|
|
|
|
|
|
|
|
136
|
19
|
100
|
|
|
|
123
|
if ( $new_block->does('Treex::Core::DocumentReader') ) { |
|
|
100
|
|
|
|
|
|
137
|
8
|
50
|
|
|
|
3852
|
log_fatal("Only one DocumentReader per scenario is permitted ($block_item->{block_name})") |
138
|
|
|
|
|
|
|
if $self->_has_document_reader; |
139
|
8
|
|
|
|
|
287
|
$self->_set_document_reader($new_block); |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
elsif ( $new_block->isa('Treex::Block::Write::BaseWriter') ) { |
142
|
1
|
|
|
|
|
552
|
push( @{ $self->writers }, $new_block ); |
|
1
|
|
|
|
|
31
|
|
143
|
1
|
|
|
|
|
4
|
push @loaded_blocks, $new_block; # duplicity |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
else { |
146
|
10
|
50
|
|
|
|
4328
|
if ( ref($new_block) eq "Treex::Core::CacheBlock" ) { |
|
|
50
|
|
|
|
|
|
147
|
0
|
|
|
|
|
0
|
$sequence{$sequence_from}{from} = $sequence_from; |
148
|
0
|
|
|
|
|
0
|
$sequence{$sequence_from}{to} = $i; |
149
|
0
|
|
|
|
|
0
|
$sequence{$sequence_from}{hash} = $sequence_hash; |
150
|
|
|
|
|
|
|
|
151
|
0
|
|
|
|
|
0
|
$sequence{$i}{_from} = $sequence_from; |
152
|
0
|
|
|
|
|
0
|
$sequence_from = $i; |
153
|
0
|
|
|
|
|
0
|
push( @{ $sequence{$sequence_from}{block} }, $new_block->get_hash() ); |
|
0
|
|
|
|
|
0
|
|
154
|
0
|
|
|
|
|
0
|
$sequence_hash = $new_block->get_hash(); |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
elsif ($self->cache) { |
157
|
0
|
|
|
|
|
0
|
$sequence_hash = md5_hex( $sequence_hash . $new_block->get_hash() ); |
158
|
0
|
0
|
|
|
|
0
|
if ( defined( $sequence{$sequence_from} ) ) { |
159
|
0
|
|
|
|
|
0
|
push( @{ $sequence{$sequence_from}{block} }, $new_block->get_hash() ); |
|
0
|
|
|
|
|
0
|
|
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
10
|
|
|
|
|
44
|
push @loaded_blocks, $new_block; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
8
|
|
|
|
|
49
|
log_info('ALL BLOCKS SUCCESSFULLY LOADED.'); |
168
|
8
|
|
|
|
|
297
|
return \@loaded_blocks; |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
sub _load_parser { |
172
|
14
|
|
|
15
|
|
35
|
my $self = shift; |
173
|
14
|
|
|
|
|
6500
|
require Treex::Core::ScenarioParser; |
174
|
14
|
|
|
|
|
131
|
return Treex::Core::ScenarioParser->new(); |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
sub _my_dir { |
178
|
0
|
|
|
1
|
|
0
|
return dirname( (caller)[1] ); |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub _build_parser { |
182
|
14
|
|
|
15
|
|
14712
|
my $self = shift; |
183
|
14
|
|
|
|
|
37
|
my $parser; |
184
|
14
|
50
|
|
|
|
39
|
eval { |
185
|
14
|
|
|
|
|
65
|
$parser = $self->_load_parser(); |
186
|
14
|
|
|
|
|
143
|
1; |
187
|
|
|
|
|
|
|
} and return $parser; |
188
|
0
|
|
|
|
|
0
|
log_info("Cannot find precompiled scenario parser, trying to build it from grammar"); |
189
|
0
|
|
|
|
|
0
|
my $dir = $self->_my_dir(); #get module's directory |
190
|
0
|
|
|
|
|
0
|
my $file = "$dir/ScenarioParser.rdg"; #find grammar file |
191
|
0
|
0
|
|
|
|
0
|
log_fatal("Cannot find grammar file") if !-e $file; |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
#in fact we should never reach this |
194
|
0
|
|
|
|
|
0
|
log_warn('We should NOT reach this place. Treex distribution may be corrupted.'); |
195
|
|
|
|
|
|
|
|
196
|
0
|
|
|
|
|
0
|
my $grammar = read_file($file); #load it |
197
|
|
|
|
|
|
|
eval { |
198
|
0
|
|
|
|
|
0
|
log_info("Trying to precompile it for you"); |
199
|
0
|
|
|
|
|
0
|
require Parse::RecDescent; |
200
|
0
|
|
|
|
|
0
|
local $CWD = $dir; |
201
|
0
|
|
|
|
|
0
|
Parse::RecDescent->Precompile( { -standalone => 1 }, $grammar, 'Treex::Core::ScenarioParser' ); |
202
|
0
|
|
|
|
|
0
|
$parser = $self->_load_parser(); |
203
|
0
|
|
|
|
|
0
|
1; |
204
|
0
|
0
|
0
|
|
|
0
|
} or eval { |
205
|
0
|
|
|
|
|
0
|
log_info("Cannot precompile, loading directly from grammar. Consider precompiling it manually"); |
206
|
0
|
|
|
|
|
0
|
require Parse::RecDescent; |
207
|
0
|
|
|
|
|
0
|
$parser = Parse::RecDescent->new($grammar); #create parser |
208
|
0
|
|
|
|
|
0
|
1; |
209
|
|
|
|
|
|
|
} or log_fatal("Cannot create Scenario parser"); |
210
|
0
|
|
|
|
|
0
|
return $parser; |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
sub _build_cache { |
214
|
14
|
|
|
15
|
|
30785
|
my $self = shift; |
215
|
|
|
|
|
|
|
|
216
|
14
|
50
|
33
|
|
|
433
|
if ( $self->runner && $self->runner->cache ) { |
217
|
|
|
|
|
|
|
|
218
|
0
|
|
|
|
|
0
|
require Treex::Core::CacheBlock; |
219
|
0
|
|
|
|
|
0
|
require Treex::Tool::Memcached::Memcached; |
220
|
|
|
|
|
|
|
|
221
|
0
|
|
|
|
|
0
|
return Treex::Tool::Memcached::Memcached::get_connection( |
222
|
|
|
|
|
|
|
"documents-cache" |
223
|
|
|
|
|
|
|
); |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
|
226
|
14
|
|
|
|
|
52
|
return; |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
sub _load_scenario_file { |
230
|
10
|
|
|
11
|
|
31
|
my ( $self, $scenario_filename ) = @_; |
231
|
10
|
|
|
|
|
70
|
log_info "Loading scenario description $scenario_filename"; |
232
|
10
|
50
|
|
|
|
60
|
my $scenario_string = read_file( $scenario_filename, binmode => ':utf8', err_mode => 'quiet' ) |
233
|
|
|
|
|
|
|
or log_fatal "Can't open scenario file $scenario_filename"; |
234
|
10
|
|
|
|
|
1947
|
return $scenario_string; |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
sub parse_scenario_string { |
238
|
14
|
|
|
14
|
1
|
36
|
my $self = shift; |
239
|
14
|
|
|
|
|
420
|
my $scenario_string = $self->scenario_string; |
240
|
14
|
|
|
|
|
417
|
my $from_file = $self->from_file; |
241
|
|
|
|
|
|
|
|
242
|
14
|
|
|
|
|
409
|
my $parsed = $self->parser->startrule( $scenario_string, 1, $from_file ); |
243
|
14
|
100
|
|
|
|
70
|
log_fatal("Cannot parse the scenario: $scenario_string") if !defined $parsed; |
244
|
12
|
|
|
|
|
610
|
return $parsed; |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
# reverse of parse_scenario_string, used in Treex::Core::Run for treex --dump |
248
|
|
|
|
|
|
|
sub construct_scenario_string { |
249
|
3
|
|
|
3
|
1
|
3676
|
my $self = shift; |
250
|
3
|
|
|
|
|
8
|
my %args = @_; |
251
|
3
|
|
|
|
|
8
|
my $multiline = $args{multiline}; |
252
|
3
|
|
|
|
|
6
|
my @block_items = @{ $self->block_items }; |
|
3
|
|
|
|
|
92
|
|
253
|
3
|
100
|
|
|
|
10
|
my $delim = $multiline ? qq{\n} : q{ }; |
254
|
3
|
|
|
|
|
6
|
my @block_strings; |
255
|
3
|
|
|
|
|
10
|
foreach my $block_item (@block_items) { |
256
|
7
|
|
|
|
|
15
|
my $name = $block_item->{block_name}; |
257
|
7
|
|
|
|
|
11
|
my @parameters = map { _add_quotes($_) } @{ $block_item->{block_parameters} }; |
|
2
|
|
|
|
|
9
|
|
|
7
|
|
|
|
|
19
|
|
258
|
7
|
100
|
|
|
|
37
|
$name =~ s{^Treex::Block::}{} or $name = "::$name"; #strip leading Treex::Block:: or add leading :: |
259
|
7
|
|
|
|
|
12
|
my $params; |
260
|
7
|
100
|
|
|
|
18
|
if ( scalar @parameters ) { |
261
|
2
|
|
|
|
|
9
|
$params = q{ } . join q{ }, @parameters; |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
else { |
264
|
5
|
|
|
|
|
9
|
$params = q{}; |
265
|
|
|
|
|
|
|
} |
266
|
7
|
|
|
|
|
21
|
push @block_strings, $name . $params; |
267
|
|
|
|
|
|
|
} |
268
|
3
|
|
|
|
|
49
|
return join $delim, @block_strings; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
sub get_required_files { |
272
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
273
|
0
|
|
|
|
|
0
|
my @block_items = @{ $self->block_items }; |
|
0
|
|
|
|
|
0
|
|
274
|
0
|
|
|
|
|
0
|
my @required_files; |
275
|
0
|
|
|
|
|
0
|
foreach my $block_item (@block_items) { |
276
|
0
|
|
|
|
|
0
|
my $block = $self->_load_block($block_item); |
277
|
|
|
|
|
|
|
push @required_files, |
278
|
|
|
|
|
|
|
map { |
279
|
0
|
|
|
|
|
0
|
$block_item->{block_name} . "\t" . $_; |
|
0
|
|
|
|
|
0
|
|
280
|
|
|
|
|
|
|
} $block->get_required_share_files(); |
281
|
|
|
|
|
|
|
} |
282
|
0
|
|
|
|
|
0
|
return @required_files; |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
sub _add_quotes { # adding quotes only if param. value contains a space |
286
|
2
|
|
|
2
|
|
7
|
my ($block_parameter) = @_; |
287
|
2
|
|
|
|
|
14
|
my ( $name, $value ) = split /=/, $block_parameter, 2; |
288
|
2
|
50
|
|
|
|
12
|
if ( $value =~ /\s/ ) { |
289
|
0
|
|
|
|
|
0
|
my $res_string = "$name="; |
290
|
|
|
|
|
|
|
|
291
|
0
|
0
|
0
|
|
|
0
|
if ( $value =~ /'/ && $value !~ /"/ ) { |
292
|
0
|
|
|
|
|
0
|
$res_string .= '"' . $value . '"'; |
293
|
|
|
|
|
|
|
} else { |
294
|
0
|
|
|
|
|
0
|
$value =~ s/'/\\'/g; |
295
|
0
|
|
|
|
|
0
|
$res_string .= "'" . $value . "'"; |
296
|
|
|
|
|
|
|
} |
297
|
0
|
|
|
|
|
0
|
return $res_string; |
298
|
|
|
|
|
|
|
} |
299
|
2
|
|
|
|
|
9
|
return $block_parameter; |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
sub load_blocks { |
303
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
304
|
0
|
|
|
|
|
0
|
$self->loaded_blocks; #just access lazy attribute |
305
|
0
|
|
|
|
|
0
|
return; |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
sub init { |
309
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
310
|
0
|
|
|
|
|
0
|
$self->load_blocks(); |
311
|
0
|
|
|
|
|
0
|
return; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
sub _load_block { |
315
|
20
|
|
|
20
|
|
61
|
my ( $self, $block_item ) = @_; |
316
|
20
|
|
|
|
|
54
|
my $block_name = $block_item->{block_name}; |
317
|
20
|
|
|
|
|
32
|
my $new_block; |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
# Initialize with global (scenario) parameters |
320
|
20
|
|
|
|
|
36
|
my %params = ( %{ $self->_global_params }, scenario => $self ); |
|
20
|
|
|
|
|
745
|
|
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
# which can be overriden by (local) block parameters. |
323
|
20
|
|
|
|
|
46
|
foreach my $param ( @{ $block_item->{block_parameters} } ) { |
|
20
|
|
|
|
|
65
|
|
324
|
12
|
|
|
|
|
65
|
my ( $name, $value ) = split /=/, $param, 2; |
325
|
12
|
|
|
|
|
49
|
$params{$name} = $value; |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
|
328
|
20
|
100
|
|
2
|
|
2126
|
eval "use $block_name; 1;" or log_fatal "Can't use block $block_name !\n$@\n"; |
|
2
|
|
|
2
|
|
1387
|
|
|
2
|
|
|
2
|
|
12
|
|
|
2
|
|
|
|
|
50
|
|
|
2
|
|
|
|
|
649
|
|
|
2
|
|
|
|
|
9
|
|
|
2
|
|
|
|
|
61
|
|
|
2
|
|
|
|
|
20
|
|
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
53
|
|
329
|
19
|
50
|
|
|
|
101
|
eval { |
330
|
19
|
|
|
|
|
153
|
$new_block = $block_name->new( \%params ); |
331
|
19
|
|
|
|
|
237
|
1; |
332
|
|
|
|
|
|
|
} or log_fatal "Treex::Core::Scenario->new: error when initializing block $block_name\n\nEVAL ERROR:\t$@"; |
333
|
|
|
|
|
|
|
|
334
|
19
|
0
|
33
|
|
|
598
|
if ( $self->cache && $params{'use_cache'} ) { |
335
|
0
|
|
|
|
|
0
|
$new_block = Treex::Core::CacheBlock->new( { block => $new_block, cache => $self->cache } ); |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
|
338
|
19
|
|
|
|
|
81
|
return $new_block; |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
sub run { |
342
|
1
|
|
|
1
|
1
|
1864
|
my ($self) = @_; |
343
|
1
|
|
|
|
|
3
|
my $number_of_blocks = @{ $self->loaded_blocks }; |
|
1
|
|
|
|
|
40
|
|
344
|
1
|
50
|
|
|
|
42
|
log_fatal('No DocumentReader supplied') if !$self->_has_document_reader; |
345
|
1
|
|
|
|
|
32
|
my $reader = $self->document_reader; |
346
|
1
|
|
50
|
|
|
16
|
my $number_of_documents = $reader->number_of_documents_per_this_job() || '?'; |
347
|
1
|
|
|
|
|
3
|
my $document_number = 0; |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
#if ( $self->cache ) { |
350
|
|
|
|
|
|
|
# $document_number = $self->_run_with_cache( $reader, $number_of_blocks, $number_of_documents ); |
351
|
|
|
|
|
|
|
#} |
352
|
|
|
|
|
|
|
#else { |
353
|
1
|
|
|
|
|
8
|
$document_number = $self->_run_without_cache( $reader, $number_of_blocks, $number_of_documents ); |
354
|
|
|
|
|
|
|
#} |
355
|
|
|
|
|
|
|
|
356
|
1
|
50
|
|
|
|
15
|
log_info "Processed $document_number document" |
357
|
|
|
|
|
|
|
. ( $document_number == 1 ? '' : 's' ); |
358
|
1
|
|
|
|
|
10
|
return 1; |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
sub _run_with_cache { |
362
|
|
|
|
|
|
|
|
363
|
0
|
|
|
0
|
|
0
|
my ( $self, $reader, $number_of_blocks, $number_of_documents ) = @_; |
364
|
0
|
|
|
|
|
0
|
my $document_number = 0; |
365
|
|
|
|
|
|
|
|
366
|
0
|
|
|
|
|
0
|
while ( my $document = $reader->next_document_for_this_job() ) { |
367
|
0
|
|
|
|
|
0
|
$document_number++; |
368
|
0
|
|
|
|
|
0
|
my $doc_name = $document->full_filename; |
369
|
0
|
|
|
|
|
0
|
my $doc_from = $document->loaded_from; |
370
|
0
|
|
|
|
|
0
|
log_info "Document $document_number/$number_of_documents $doc_name loaded from $doc_from"; |
371
|
0
|
|
|
|
|
0
|
my $block_number = 0; |
372
|
0
|
|
|
|
|
0
|
my $skip_to = 0; |
373
|
0
|
|
|
|
|
0
|
my $process = 0; |
374
|
0
|
|
|
|
|
0
|
my $skip_from = 0; |
375
|
0
|
|
|
|
|
0
|
my $from_hash = ""; |
376
|
0
|
|
|
|
|
0
|
my $document_last_hash = ""; |
377
|
0
|
|
|
|
|
0
|
foreach my $block ( @{ $self->loaded_blocks } ) { |
|
0
|
|
|
|
|
0
|
|
378
|
0
|
|
|
|
|
0
|
$block_number++; |
379
|
0
|
|
|
|
|
0
|
$process = 1; |
380
|
0
|
0
|
|
|
|
0
|
if ( $block_number < $skip_to ) { |
|
|
0
|
|
|
|
|
|
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
# we know that there are identical, so we can skip them |
383
|
0
|
|
|
|
|
0
|
log_info "Skipping block $block_number/$number_of_blocks " . ref($block); |
384
|
0
|
|
|
|
|
0
|
$process = 0; |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
elsif ( $block_number == $skip_to ) { |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
# this is border Cache block -> we have to check whether next sequence is also same |
389
|
0
|
|
|
|
|
0
|
$skip_from = $block_number + 1; |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
# following sequence is same => we can continue with skipping |
392
|
0
|
0
|
0
|
|
|
0
|
if ($sequence{$skip_from}{'to'} |
393
|
|
|
|
|
|
|
&& |
394
|
|
|
|
|
|
|
$self->_is_known_sequence( $sequence{$skip_from}{'hash'}, $document->get_hash() ) |
395
|
|
|
|
|
|
|
) |
396
|
|
|
|
|
|
|
{ |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
#log_warn("\tskip from " . $sequence{$skip_from}{from} . ' to ' . $sequence{$skip_from}{to}); |
399
|
0
|
|
|
|
|
0
|
$skip_to = $sequence{$skip_from}{to} - 1; |
400
|
0
|
|
|
|
|
0
|
$from_hash = $document->get_hash(); |
401
|
0
|
|
|
|
|
0
|
$process = 0; |
402
|
|
|
|
|
|
|
} |
403
|
|
|
|
|
|
|
else { |
404
|
0
|
|
|
|
|
0
|
$document_last_hash = $document->get_hash(); |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
#$document->set_hash(md5_hex($document->get_hash() . $block->get_hash())); |
407
|
0
|
|
|
|
|
0
|
my $full_hash = $document->get_hash(); |
408
|
0
|
|
|
|
|
0
|
$document = $self->cache->get($full_hash); |
409
|
|
|
|
|
|
|
|
410
|
0
|
0
|
|
|
|
0
|
if ( !$document ) { |
411
|
0
|
|
|
|
|
0
|
log_fatal("Document - $full_hash is missing!!!"); |
412
|
|
|
|
|
|
|
} |
413
|
0
|
|
|
|
|
0
|
$process = 2; |
414
|
|
|
|
|
|
|
} |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
|
417
|
0
|
0
|
|
|
|
0
|
if ( $process == 1 ) { |
418
|
0
|
|
|
|
|
0
|
log_info "Applying block $block_number/$number_of_blocks " . ref($block); |
419
|
|
|
|
|
|
|
|
420
|
0
|
0
|
|
|
|
0
|
$block->process_start if !$block->is_started; |
421
|
0
|
|
|
|
|
0
|
$block->_set_is_started(1); |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
#log_info("Document-hash: " . $document->get_hash()); |
424
|
0
|
|
|
|
|
0
|
$skip_from = $block_number + 1; |
425
|
0
|
|
|
|
|
0
|
my $status = $block->process_document($document); |
426
|
0
|
0
|
0
|
|
|
0
|
if (defined($status) |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
427
|
|
|
|
|
|
|
&& |
428
|
|
|
|
|
|
|
$status == $Treex::Core::Block::DOCUMENT_FROM_CACHE && |
429
|
|
|
|
|
|
|
$sequence{$skip_from}{'to'} && |
430
|
|
|
|
|
|
|
$self->_is_known_sequence( $sequence{$skip_from}{'hash'}, $document->get_hash() ) |
431
|
|
|
|
|
|
|
) |
432
|
|
|
|
|
|
|
{ |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
#log_warn("\tskip from " . $sequence{$skip_from}{from} . ' to ' . $sequence{$skip_from}{to}); |
435
|
0
|
|
|
|
|
0
|
$skip_to = $sequence{$skip_from}{to} - 1; |
436
|
0
|
|
|
|
|
0
|
$skip_from = $block_number + 1; |
437
|
0
|
|
|
|
|
0
|
$from_hash = $document->get_hash(); |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
} |
440
|
|
|
|
|
|
|
|
441
|
0
|
|
|
|
|
0
|
$document_last_hash = $document->get_hash(); |
442
|
0
|
|
|
|
|
0
|
$document->set_hash( md5_hex( $document->get_hash() . $block->get_hash() ) ); |
443
|
|
|
|
|
|
|
|
444
|
0
|
0
|
|
|
|
0
|
if ( ref($block) eq "Treex::Core::CacheBlock" ) { |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
# cache block => mark this path as known |
447
|
0
|
|
|
|
|
0
|
my $id = $block_number + 1; |
448
|
0
|
|
|
|
|
0
|
my $from = $sequence{$id}{'_from'}; |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
# the first sequence has no document |
451
|
0
|
0
|
|
|
|
0
|
if ( defined( $sequence{$from}{'document'} ) ) { |
452
|
0
|
|
|
|
|
0
|
$self->_set_known_sequence( $sequence{$from}{'hash'}, $sequence{$from}{'document'} ); |
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
|
455
|
0
|
|
|
|
|
0
|
$sequence{$id}{'document'} = $document_last_hash; |
456
|
|
|
|
|
|
|
} |
457
|
|
|
|
|
|
|
} |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
# this actually marks the document as successfully done in parallel processing (if this line |
460
|
|
|
|
|
|
|
# does not appear in the output, the parallel process will fail -- it must appear at any errorlevel, |
461
|
|
|
|
|
|
|
# therefore not using log_info or similiar) |
462
|
0
|
0
|
|
|
|
0
|
if ( $self->document_reader->jobindex ) { |
463
|
0
|
|
|
|
|
0
|
print STDERR "Document $document_number/$number_of_documents $doc_name: [success].\n"; |
464
|
|
|
|
|
|
|
} |
465
|
|
|
|
|
|
|
} |
466
|
|
|
|
|
|
|
|
467
|
0
|
|
|
|
|
0
|
log_info "Applying process_end"; |
468
|
|
|
|
|
|
|
|
469
|
0
|
|
|
|
|
0
|
foreach my $block ( @{ $self->loaded_blocks } ) { |
|
0
|
|
|
|
|
0
|
|
470
|
0
|
0
|
|
|
|
0
|
$block->process_end() if ( $block->is_started ); |
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
|
473
|
0
|
|
|
|
|
0
|
return $document_number; |
474
|
|
|
|
|
|
|
} |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
sub _is_known_sequence { |
477
|
0
|
|
|
0
|
|
0
|
my ( $self, $sequence_hash, $document_hash ) = @_; |
478
|
0
|
|
|
|
|
0
|
my $hash = md5_hex( $sequence_hash, $document_hash ); |
479
|
0
|
|
|
|
|
0
|
return $self->cache->get($hash); |
480
|
|
|
|
|
|
|
} |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
sub _set_known_sequence { |
483
|
0
|
|
|
0
|
|
0
|
my ( $self, $sequence_hash, $document_hash ) = @_; |
484
|
0
|
|
|
|
|
0
|
my $hash = md5_hex( $sequence_hash, $document_hash ); |
485
|
0
|
|
|
|
|
0
|
$self->cache->set( $hash, 1 ); |
486
|
|
|
|
|
|
|
|
487
|
0
|
|
|
|
|
0
|
return; |
488
|
|
|
|
|
|
|
} |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
sub _run_without_cache { |
491
|
|
|
|
|
|
|
|
492
|
1
|
|
|
1
|
|
4
|
my ( $self, $reader, $number_of_blocks, $number_of_documents ) = @_; |
493
|
1
|
|
|
|
|
2
|
my $document_number = 0; |
494
|
|
|
|
|
|
|
|
495
|
1
|
|
|
|
|
6
|
$self->start(); |
496
|
|
|
|
|
|
|
|
497
|
1
|
|
|
|
|
16
|
while ( my $document = $reader->next_document_for_this_job() ) { |
498
|
1
|
|
|
|
|
3
|
$document_number++; |
499
|
1
|
|
|
|
|
14
|
my $doc_name = $document->full_filename; |
500
|
1
|
|
|
|
|
30
|
my $doc_from = $document->loaded_from; |
501
|
1
|
|
|
|
|
10
|
log_info "Document $document_number/$number_of_documents $doc_name loaded from $doc_from"; |
502
|
1
|
|
|
|
|
3
|
my $block_number = 0; |
503
|
1
|
|
|
|
|
5
|
foreach my $block ( @{ $self->loaded_blocks } ) { |
|
1
|
|
|
|
|
32
|
|
504
|
2
|
|
|
|
|
6
|
$block_number++; |
505
|
2
|
|
|
|
|
11
|
log_info "Applying block $block_number/$number_of_blocks " . ref($block); |
506
|
2
|
|
|
|
|
32
|
$block->process_document($document); |
507
|
|
|
|
|
|
|
} |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
# this actually marks the document as successfully done in parallel processing (if this line |
510
|
|
|
|
|
|
|
# does not appear in the output, the parallel process will fail -- it must appear at any errorlevel, |
511
|
|
|
|
|
|
|
# therefore not using log_info or similiar) |
512
|
1
|
50
|
|
|
|
33
|
if ( $self->document_reader->jobindex ) { |
513
|
0
|
|
|
|
|
0
|
print STDERR "Document $document_number/$number_of_documents $doc_name: [success].\n"; |
514
|
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
|
} |
516
|
|
|
|
|
|
|
|
517
|
1
|
|
|
|
|
9
|
$self->end(); |
518
|
|
|
|
|
|
|
|
519
|
1
|
50
|
|
|
|
10
|
log_info "Processed $document_number document" |
520
|
|
|
|
|
|
|
. ( $document_number == 1 ? '' : 's' ); |
521
|
|
|
|
|
|
|
|
522
|
1
|
|
|
|
|
3
|
return $document_number; |
523
|
|
|
|
|
|
|
} |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
# Apply process_start to all blocks for which this has not yet been applied |
526
|
|
|
|
|
|
|
sub start { |
527
|
1
|
|
|
1
|
1
|
4
|
my ($self) = @_; |
528
|
|
|
|
|
|
|
|
529
|
1
|
|
|
|
|
16
|
log_info "Applying process_start"; |
530
|
1
|
|
|
|
|
4
|
foreach my $block ( @{ $self->loaded_blocks } ) { |
|
1
|
|
|
|
|
35
|
|
531
|
2
|
50
|
|
|
|
91
|
$block->process_start() if !$block->is_started; |
532
|
2
|
|
|
|
|
73
|
$block->_set_is_started(1); |
533
|
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
|
|
535
|
1
|
|
|
|
|
10
|
return; |
536
|
|
|
|
|
|
|
} |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
# Apply the scenario to documents given in parameter |
539
|
|
|
|
|
|
|
sub apply_to_documents { |
540
|
|
|
|
|
|
|
|
541
|
0
|
|
|
0
|
1
|
0
|
my ( $self, @documents ) = @_; |
542
|
|
|
|
|
|
|
|
543
|
0
|
|
|
|
|
0
|
my $number_of_blocks = @{ $self->loaded_blocks }; |
|
0
|
|
|
|
|
0
|
|
544
|
0
|
|
|
|
|
0
|
my $block_number = 0; |
545
|
|
|
|
|
|
|
|
546
|
0
|
|
|
|
|
0
|
foreach my $document (@documents){ |
547
|
0
|
|
|
|
|
0
|
log_info "Processing document" . $document->full_filename; |
548
|
|
|
|
|
|
|
|
549
|
0
|
|
|
|
|
0
|
foreach my $block ( @{ $self->loaded_blocks } ) { |
|
0
|
|
|
|
|
0
|
|
550
|
0
|
|
|
|
|
0
|
$block_number++; |
551
|
0
|
|
|
|
|
0
|
log_info "Applying block $block_number/$number_of_blocks " . ref($block); |
552
|
0
|
|
|
|
|
0
|
$block->process_document($document); |
553
|
|
|
|
|
|
|
} |
554
|
|
|
|
|
|
|
} |
555
|
|
|
|
|
|
|
|
556
|
0
|
|
|
|
|
0
|
return; |
557
|
|
|
|
|
|
|
} |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
# Apply process_end to all blocks for which this has not yet been applied |
560
|
|
|
|
|
|
|
sub end { |
561
|
1
|
|
|
1
|
1
|
4
|
my ($self) = @_; |
562
|
|
|
|
|
|
|
|
563
|
1
|
|
|
|
|
5
|
log_info "Applying process_end"; |
564
|
1
|
|
|
|
|
2
|
foreach my $block ( @{ $self->loaded_blocks } ) { |
|
1
|
|
|
|
|
39
|
|
565
|
2
|
50
|
|
|
|
64
|
$block->process_end() if ( $block->is_started ); |
566
|
|
|
|
|
|
|
} |
567
|
|
|
|
|
|
|
|
568
|
1
|
|
|
|
|
4
|
return; |
569
|
|
|
|
|
|
|
} |
570
|
|
|
|
|
|
|
|
571
|
12
|
|
|
12
|
|
5717
|
use Module::Reload; |
|
12
|
|
|
|
|
5755
|
|
|
12
|
|
|
|
|
1452
|
|
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
sub restart { |
574
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
575
|
0
|
|
|
|
|
0
|
my $changed_modules = Module::Reload->check; |
576
|
0
|
|
|
|
|
0
|
log_info "Number of reloaded modules = $changed_modules"; |
577
|
0
|
|
|
|
|
0
|
log_info "reseting the document reader\n"; |
578
|
0
|
|
|
|
|
0
|
$self->document_reader->restart(); |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
# TODO rebuild the reloaded blocks |
581
|
0
|
|
|
|
|
0
|
return; |
582
|
|
|
|
|
|
|
} |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
1; |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
__END__ |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
=for Pod::Coverage BUILD |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
=encoding utf-8 |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
=head1 NAME |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
Treex::Core::Scenario - a larger Treex processing unit, composed of blocks |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
=head1 VERSION |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
version 2.20210102 |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
=head1 SYNOPSIS |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
use Treex::Core; |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
my $doc1, $doc2; |
605
|
|
|
|
|
|
|
my $scenario = Treex::Core::Scenario->new(from_file => 'myscenario.scen' ); |
606
|
|
|
|
|
|
|
$scenario->run; |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
$scenario = Treex::Core::Scenario->new(from_string => 'W2A::EN::Segment language=en'); |
610
|
|
|
|
|
|
|
$scenario->start(); |
611
|
|
|
|
|
|
|
$scenario->apply_to_documents($doc1, $doc2); |
612
|
|
|
|
|
|
|
$scenario->end(); |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
=head1 DESCRIPTION |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
A Treex scenario consists of a sequence of (possibly parametrized) Treex blocks. |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
Scenarios can be described by a simple textual format, which is either passed |
621
|
|
|
|
|
|
|
directly to the scenario construction, or is contained in a text file whose |
622
|
|
|
|
|
|
|
name is passed. |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
The string description of scenarios looks as follows. |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
1) It contains a list of block names from which their 'C<Treex::Block::>' |
627
|
|
|
|
|
|
|
prefixes were removed. |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
2) The block names are separated by one or more white spaces. |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
3) The block names are listed in the same order in which they should be |
632
|
|
|
|
|
|
|
applied on data. |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
4) For each block, there can be one or more parameters specified, using the |
635
|
|
|
|
|
|
|
C<attribute=value> form. |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
5) Comments start with 'C<#>' and end with the nearest newline character. |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
Scenario example: |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
# morphological analysis of an English text |
643
|
|
|
|
|
|
|
Util::SetGlobal language=en selector=src |
644
|
|
|
|
|
|
|
Read::Text |
645
|
|
|
|
|
|
|
W2A::ResegmentSentences |
646
|
|
|
|
|
|
|
W2A::EN::Tokenize |
647
|
|
|
|
|
|
|
W2A::EN::NormalizeForms |
648
|
|
|
|
|
|
|
W2A::EN::FixTokenization |
649
|
|
|
|
|
|
|
W2A::EN::TagMorce |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
=head1 METHODS |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
=head2 Constructor |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
=over 4 |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
=item my $scenario = Treex::Core::Scenario->new(from_string => 'W2A::Tokenize language=en W2A::Lemmatize' ); |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
Constructor parameter C<from_string> specifies the names of blocks which are |
661
|
|
|
|
|
|
|
to be executed (in the specified order) when the scenario is applied on a |
662
|
|
|
|
|
|
|
L<Treex::Core::Document> object. |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
=item my $scenario = Treex::Core::Scenario->new(from_file => 'myscenario.scen' ); |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
The scenario description is loaded from the file. |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
=back |
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
=head2 Running the scenario |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
=over 4 |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
=item $scenario->run(); |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
Run the scenario. |
678
|
|
|
|
|
|
|
One of the blocks (usually the first one) must be the document reader (see |
679
|
|
|
|
|
|
|
L<Treex::Core::DocumentReader>) that produces the |
680
|
|
|
|
|
|
|
documents on which this scenario is applied. |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
=item $scenario->apply_to_documents($treex_doc); |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
Apply this scenario to a L<Treex::Core::Document> instance obtained from elsewhere. |
685
|
|
|
|
|
|
|
Please note that C<start()> must be called before the first call to this method and C<end()> |
686
|
|
|
|
|
|
|
after the last call to this method. |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
The scenario does not need to contain a document reader if documents are given |
689
|
|
|
|
|
|
|
explicitly. |
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
=item $scenario->start(); |
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
Apply C<process_start()> to all blocks in the scenario. |
694
|
|
|
|
|
|
|
This is called automatically by C<run()>, but must be called before C<apply_to_documents()>. |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
=item $scenario->end(); |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
Apply C<process_end()> to all blocks in the scenario. |
699
|
|
|
|
|
|
|
This is called automatically by C<run()>, but must be called after calls to C<apply_to_documents()>. |
700
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
=back |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
=head2 Internal methods for loading scenarios |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
=over 4 |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
=item _load_scenario_file($filename) |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
loads a scenario description from a file |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
=item parse_scenario_string |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
parses a textual description of a scenario |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
=item construct_scenario_string |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
constructs a scenario textual description from an existing scenario instance |
719
|
|
|
|
|
|
|
accepts named parameter multiline - when set, blocks are separated by newline instead of space |
720
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
=item load_blocks |
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
use blocks and call their constructors |
724
|
|
|
|
|
|
|
can be used for preloading blocks for e.g. server applications |
725
|
|
|
|
|
|
|
when running scenario blocks are loaded automatically |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
=item init |
728
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
do all initialization so after this method scenario is ready to run |
730
|
|
|
|
|
|
|
currently just load blocks |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
=item restart |
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
resets the document reader, in future it will rebuild reloaded blocks |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
=back |
737
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
=head1 SEE ALSO |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
L<Treex::Core::Block> |
742
|
|
|
|
|
|
|
L<Treex::Core> |
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
=head1 AUTHORS |
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
Zdeněk Žabokrtský <zabokrtsky@ufal.mff.cuni.cz> |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
Martin Popel <popel@ufal.mff.cuni.cz> |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
David Mareček <marecek@ufal.mff.cuni.cz> |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
Tomáš Kraut <kraut@ufal.mff.cuni.cz> |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
Martin Majliš <majlis@ufal.mff.cuni.cz> |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
Ondřej Dušek <odusek@ufal.mff.cuni.cz> |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
Copyright © 2011-2012 by Institute of Formal and Applied Linguistics, Charles University in Prague |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. |