line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Treex::Core::Scenario; |
2
|
|
|
|
|
|
|
$Treex::Core::Scenario::VERSION = '2.20160630'; |
3
|
3
|
|
|
3
|
|
103649
|
use Moose; |
|
3
|
|
|
|
|
1223889
|
|
|
3
|
|
|
|
|
22
|
|
4
|
3
|
|
|
3
|
|
23121
|
use Treex::Core::Common; |
|
3
|
|
|
|
|
10
|
|
|
3
|
|
|
|
|
14
|
|
5
|
3
|
|
|
3
|
|
17594
|
use File::Basename; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
179
|
|
6
|
3
|
|
|
3
|
|
17
|
use File::Slurp; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
138
|
|
7
|
3
|
|
|
3
|
|
1562
|
use File::chdir; |
|
3
|
|
|
|
|
4215
|
|
|
3
|
|
|
|
|
288
|
|
8
|
3
|
|
|
3
|
|
22
|
use Digest::MD5 qw(md5_hex); |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
8672
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
#use Parse::RecDescent 1.967003; now using standalone version |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
has from_file => ( |
13
|
|
|
|
|
|
|
is => 'ro', |
14
|
|
|
|
|
|
|
isa => 'Str', |
15
|
|
|
|
|
|
|
predicate => '_has_from_file', |
16
|
|
|
|
|
|
|
documentation => q(Path to file with scenario), |
17
|
|
|
|
|
|
|
); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
has from_string => ( |
20
|
|
|
|
|
|
|
is => 'ro', |
21
|
|
|
|
|
|
|
isa => 'Str', |
22
|
|
|
|
|
|
|
predicate => '_has_from_string', |
23
|
|
|
|
|
|
|
documentation => q(String with scenario), |
24
|
|
|
|
|
|
|
); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
has scenario_string => ( |
27
|
|
|
|
|
|
|
is => 'ro', |
28
|
|
|
|
|
|
|
isa => 'Str', |
29
|
|
|
|
|
|
|
builder => '_build_scenario_string', |
30
|
|
|
|
|
|
|
lazy => 1, |
31
|
|
|
|
|
|
|
); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
has block_items => ( |
34
|
|
|
|
|
|
|
is => 'ro', |
35
|
|
|
|
|
|
|
isa => 'ArrayRef[HashRef]', |
36
|
|
|
|
|
|
|
builder => 'parse_scenario_string', |
37
|
|
|
|
|
|
|
init_arg => undef, |
38
|
|
|
|
|
|
|
lazy => 1, |
39
|
|
|
|
|
|
|
); |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
has loaded_blocks => ( |
42
|
|
|
|
|
|
|
is => 'ro', |
43
|
|
|
|
|
|
|
isa => 'ArrayRef[Treex::Core::Block]', |
44
|
|
|
|
|
|
|
builder => '_build_loaded_blocks', |
45
|
|
|
|
|
|
|
predicate => 'is_initialized', |
46
|
|
|
|
|
|
|
lazy => 1, |
47
|
|
|
|
|
|
|
init_arg => undef, |
48
|
|
|
|
|
|
|
); |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
has document_reader => ( |
51
|
|
|
|
|
|
|
is => 'rw', |
52
|
|
|
|
|
|
|
does => 'Treex::Core::DocumentReader', |
53
|
|
|
|
|
|
|
predicate => '_has_document_reader', |
54
|
|
|
|
|
|
|
writer => '_set_document_reader', |
55
|
|
|
|
|
|
|
init_arg => undef, |
56
|
|
|
|
|
|
|
documentation => 'DocumentReader starts every scenario and reads a stream of documents.' |
57
|
|
|
|
|
|
|
); |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
has writers => ( |
60
|
|
|
|
|
|
|
is => 'rw', |
61
|
|
|
|
|
|
|
does => 'ArrayRef[Treex::Block::Write::BaseWriter]', |
62
|
|
|
|
|
|
|
default => sub { [] } |
63
|
|
|
|
|
|
|
); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
has _global_params => ( |
66
|
|
|
|
|
|
|
is => 'ro', |
67
|
|
|
|
|
|
|
isa => 'HashRef[Str]', |
68
|
|
|
|
|
|
|
traits => ['Hash'], |
69
|
|
|
|
|
|
|
default => sub { {} }, |
70
|
|
|
|
|
|
|
handles => { |
71
|
|
|
|
|
|
|
get_global_param => 'get', |
72
|
|
|
|
|
|
|
set_global_param => 'set', |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
#get_global_param_names => 'keys', |
75
|
|
|
|
|
|
|
#set_verbose => [ set => 'verbose' ], |
76
|
|
|
|
|
|
|
#get_verbose => [ get => 'verbose' ], |
77
|
|
|
|
|
|
|
#set_language => [ set => 'language' ], |
78
|
|
|
|
|
|
|
#get_language => [ get => 'language' ], |
79
|
|
|
|
|
|
|
#... ? |
80
|
|
|
|
|
|
|
}, |
81
|
|
|
|
|
|
|
); |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
has parser => ( |
84
|
|
|
|
|
|
|
is => 'ro', |
85
|
|
|
|
|
|
|
isa => 'Parse::RecDescent::_Runtime', |
86
|
|
|
|
|
|
|
init_arg => undef, |
87
|
|
|
|
|
|
|
builder => '_build_parser', |
88
|
|
|
|
|
|
|
documentation => q{Parses treex scenarios} |
89
|
|
|
|
|
|
|
); |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
has runner => ( |
92
|
|
|
|
|
|
|
is => 'ro', |
93
|
|
|
|
|
|
|
isa => 'Treex::Core::Run', |
94
|
|
|
|
|
|
|
writer => '_set_runner', |
95
|
|
|
|
|
|
|
weak_ref => 1, |
96
|
|
|
|
|
|
|
documentation => 'Treex::Core::Run instance in which the scenario is running', |
97
|
|
|
|
|
|
|
); |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
has cache => ( |
100
|
|
|
|
|
|
|
is => 'rw', |
101
|
|
|
|
|
|
|
isa => 'Maybe[Cache::Memcached]', |
102
|
|
|
|
|
|
|
builder => '_build_cache', |
103
|
|
|
|
|
|
|
); |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
sub _build_scenario_string { |
106
|
13
|
|
|
13
|
|
39
|
my $self = shift; |
107
|
13
|
100
|
|
|
|
372
|
if ( $self->_has_from_file ) { |
|
|
50
|
|
|
|
|
|
108
|
10
|
|
|
|
|
257
|
return $self->_load_scenario_file( $self->from_file ); |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
elsif ( $self->_has_from_string ) { |
111
|
3
|
|
|
|
|
155
|
return $self->from_string; |
112
|
|
|
|
|
|
|
} |
113
|
0
|
|
|
|
|
0
|
log_fatal("You have to provide from_file or from_string attribute"); |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
my %sequence = (); |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
sub _build_loaded_blocks { |
119
|
10
|
|
|
10
|
|
24
|
my $self = shift; |
120
|
10
|
|
|
|
|
23
|
my @block_items = @{ $self->block_items }; |
|
10
|
|
|
|
|
254
|
|
121
|
8
|
|
|
|
|
23
|
my $block_count = scalar @block_items; |
122
|
8
|
|
|
|
|
19
|
my $i = 0; |
123
|
8
|
|
|
|
|
19
|
my @loaded_blocks; |
124
|
|
|
|
|
|
|
|
125
|
8
|
|
|
|
|
17
|
my $sequence_from = 0; |
126
|
8
|
|
|
|
|
17
|
my $sequence_hash = ""; |
127
|
8
|
|
|
|
|
21
|
foreach my $block_item (@block_items) { |
128
|
15
|
|
|
|
|
35
|
$i++; |
129
|
15
|
|
|
|
|
35
|
my $params = ''; |
130
|
15
|
50
|
|
|
|
53
|
if ( $block_item->{block_parameters} ) { |
131
|
15
|
|
|
|
|
32
|
$params = join ' ', @{ $block_item->{block_parameters} }; |
|
15
|
|
|
|
|
59
|
|
132
|
|
|
|
|
|
|
} |
133
|
15
|
|
|
|
|
129
|
log_info("Loading block $block_item->{block_name} $params ($i/$block_count)"); |
134
|
15
|
|
|
|
|
80
|
my $new_block = $self->_load_block($block_item); |
135
|
|
|
|
|
|
|
|
136
|
7
|
50
|
|
|
|
36
|
if ( $new_block->does('Treex::Core::DocumentReader') ) { |
|
|
50
|
|
|
|
|
|
137
|
0
|
0
|
|
|
|
0
|
log_fatal("Only one DocumentReader per scenario is permitted ($block_item->{block_name})") |
138
|
|
|
|
|
|
|
if $self->_has_document_reader; |
139
|
0
|
|
|
|
|
0
|
$self->_set_document_reader($new_block); |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
elsif ( $new_block->isa('Treex::Block::Write::BaseWriter') ) { |
142
|
0
|
|
|
|
|
0
|
push( @{ $self->writers }, $new_block ); |
|
0
|
|
|
|
|
0
|
|
143
|
0
|
|
|
|
|
0
|
push @loaded_blocks, $new_block; # duplicity |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
else { |
146
|
7
|
50
|
|
|
|
2643
|
if ( ref($new_block) eq "Treex::Core::CacheBlock" ) { |
|
|
50
|
|
|
|
|
|
147
|
0
|
|
|
|
|
0
|
$sequence{$sequence_from}{from} = $sequence_from; |
148
|
0
|
|
|
|
|
0
|
$sequence{$sequence_from}{to} = $i; |
149
|
0
|
|
|
|
|
0
|
$sequence{$sequence_from}{hash} = $sequence_hash; |
150
|
|
|
|
|
|
|
|
151
|
0
|
|
|
|
|
0
|
$sequence{$i}{_from} = $sequence_from; |
152
|
0
|
|
|
|
|
0
|
$sequence_from = $i; |
153
|
0
|
|
|
|
|
0
|
push( @{ $sequence{$sequence_from}{block} }, $new_block->get_hash() ); |
|
0
|
|
|
|
|
0
|
|
154
|
0
|
|
|
|
|
0
|
$sequence_hash = $new_block->get_hash(); |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
elsif ($self->cache) { |
157
|
0
|
|
|
|
|
0
|
$sequence_hash = md5_hex( $sequence_hash . $new_block->get_hash() ); |
158
|
0
|
0
|
|
|
|
0
|
if ( defined( $sequence{$sequence_from} ) ) { |
159
|
0
|
|
|
|
|
0
|
push( @{ $sequence{$sequence_from}{block} }, $new_block->get_hash() ); |
|
0
|
|
|
|
|
0
|
|
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
7
|
|
|
|
|
28
|
push @loaded_blocks, $new_block; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
0
|
|
|
|
|
0
|
log_info('ALL BLOCKS SUCCESSFULLY LOADED.'); |
168
|
0
|
|
|
|
|
0
|
return \@loaded_blocks; |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
sub _load_parser { |
172
|
13
|
|
|
13
|
|
31
|
my $self = shift; |
173
|
13
|
|
|
|
|
3548
|
require Treex::Core::ScenarioParser; |
174
|
13
|
|
|
|
|
99
|
return Treex::Core::ScenarioParser->new(); |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
sub _my_dir { |
178
|
0
|
|
|
0
|
|
0
|
return dirname( (caller)[1] ); |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub _build_parser { |
182
|
13
|
|
|
13
|
|
8857
|
my $self = shift; |
183
|
13
|
|
|
|
|
35
|
my $parser; |
184
|
13
|
50
|
|
|
|
31
|
eval { |
185
|
13
|
|
|
|
|
59
|
$parser = $self->_load_parser(); |
186
|
13
|
|
|
|
|
117
|
1; |
187
|
|
|
|
|
|
|
} and return $parser; |
188
|
0
|
|
|
|
|
0
|
log_info("Cannot find precompiled scenario parser, trying to build it from grammar"); |
189
|
0
|
|
|
|
|
0
|
my $dir = $self->_my_dir(); #get module's directory |
190
|
0
|
|
|
|
|
0
|
my $file = "$dir/ScenarioParser.rdg"; #find grammar file |
191
|
0
|
0
|
|
|
|
0
|
log_fatal("Cannot find grammar file") if !-e $file; |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
#in fact we should never reach this |
194
|
0
|
|
|
|
|
0
|
log_warn('We should NOT reach this place. Treex distribution may be corrupted.'); |
195
|
|
|
|
|
|
|
|
196
|
0
|
|
|
|
|
0
|
my $grammar = read_file($file); #load it |
197
|
|
|
|
|
|
|
eval { |
198
|
0
|
|
|
|
|
0
|
log_info("Trying to precompile it for you"); |
199
|
0
|
|
|
|
|
0
|
require Parse::RecDescent; |
200
|
0
|
|
|
|
|
0
|
local $CWD = $dir; |
201
|
0
|
|
|
|
|
0
|
Parse::RecDescent->Precompile( { -standalone => 1 }, $grammar, 'Treex::Core::ScenarioParser' ); |
202
|
0
|
|
|
|
|
0
|
$parser = $self->_load_parser(); |
203
|
0
|
|
|
|
|
0
|
1; |
204
|
0
|
0
|
0
|
|
|
0
|
} or eval { |
205
|
0
|
|
|
|
|
0
|
log_info("Cannot precompile, loading directly from grammar. Consider precompiling it manually"); |
206
|
0
|
|
|
|
|
0
|
require Parse::RecDescent; |
207
|
0
|
|
|
|
|
0
|
$parser = Parse::RecDescent->new($grammar); #create parser |
208
|
0
|
|
|
|
|
0
|
1; |
209
|
|
|
|
|
|
|
} or log_fatal("Cannot create Scenario parser"); |
210
|
0
|
|
|
|
|
0
|
return $parser; |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
sub _build_cache { |
214
|
13
|
|
|
13
|
|
17563
|
my $self = shift; |
215
|
|
|
|
|
|
|
|
216
|
13
|
50
|
33
|
|
|
351
|
if ( $self->runner && $self->runner->cache ) { |
217
|
|
|
|
|
|
|
|
218
|
0
|
|
|
|
|
0
|
require Treex::Core::CacheBlock; |
219
|
0
|
|
|
|
|
0
|
require Treex::Tool::Memcached::Memcached; |
220
|
|
|
|
|
|
|
|
221
|
0
|
|
|
|
|
0
|
return Treex::Tool::Memcached::Memcached::get_connection( |
222
|
|
|
|
|
|
|
"documents-cache" |
223
|
|
|
|
|
|
|
); |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
|
226
|
13
|
|
|
|
|
44
|
return; |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
sub _load_scenario_file { |
230
|
10
|
|
|
10
|
|
29
|
my ( $self, $scenario_filename ) = @_; |
231
|
10
|
|
|
|
|
69
|
log_info "Loading scenario description $scenario_filename"; |
232
|
10
|
50
|
|
|
|
57
|
my $scenario_string = read_file( $scenario_filename, binmode => ':utf8', err_mode => 'quiet' ) |
233
|
|
|
|
|
|
|
or log_fatal "Can't open scenario file $scenario_filename"; |
234
|
10
|
|
|
|
|
2125
|
return $scenario_string; |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
sub parse_scenario_string { |
238
|
13
|
|
|
13
|
1
|
35
|
my $self = shift; |
239
|
13
|
|
|
|
|
396
|
my $scenario_string = $self->scenario_string; |
240
|
13
|
|
|
|
|
328
|
my $from_file = $self->from_file; |
241
|
|
|
|
|
|
|
|
242
|
13
|
|
|
|
|
357
|
my $parsed = $self->parser->startrule( $scenario_string, 1, $from_file ); |
243
|
13
|
100
|
|
|
|
66
|
log_fatal("Cannot parse the scenario: $scenario_string") if !defined $parsed; |
244
|
11
|
|
|
|
|
553
|
return $parsed; |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
# reverse of parse_scenario_string, used in Treex::Core::Run for treex --dump |
248
|
|
|
|
|
|
|
sub construct_scenario_string { |
249
|
3
|
|
|
3
|
1
|
3269
|
my $self = shift; |
250
|
3
|
|
|
|
|
12
|
my %args = @_; |
251
|
3
|
|
|
|
|
8
|
my $multiline = $args{multiline}; |
252
|
3
|
|
|
|
|
7
|
my @block_items = @{ $self->block_items }; |
|
3
|
|
|
|
|
94
|
|
253
|
3
|
100
|
|
|
|
14
|
my $delim = $multiline ? qq{\n} : q{ }; |
254
|
3
|
|
|
|
|
6
|
my @block_strings; |
255
|
3
|
|
|
|
|
10
|
foreach my $block_item (@block_items) { |
256
|
7
|
|
|
|
|
15
|
my $name = $block_item->{block_name}; |
257
|
7
|
|
|
|
|
10
|
my @parameters = map { _add_quotes($_) } @{ $block_item->{block_parameters} }; |
|
2
|
|
|
|
|
7
|
|
|
7
|
|
|
|
|
18
|
|
258
|
7
|
100
|
|
|
|
36
|
$name =~ s{^Treex::Block::}{} or $name = "::$name"; #strip leading Treex::Block:: or add leading :: |
259
|
7
|
|
|
|
|
15
|
my $params; |
260
|
7
|
100
|
|
|
|
17
|
if ( scalar @parameters ) { |
261
|
2
|
|
|
|
|
8
|
$params = q{ } . join q{ }, @parameters; |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
else { |
264
|
5
|
|
|
|
|
10
|
$params = q{}; |
265
|
|
|
|
|
|
|
} |
266
|
7
|
|
|
|
|
21
|
push @block_strings, $name . $params; |
267
|
|
|
|
|
|
|
} |
268
|
3
|
|
|
|
|
40
|
return join $delim, @block_strings; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
sub get_required_files { |
272
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
273
|
0
|
|
|
|
|
0
|
my @block_items = @{ $self->block_items }; |
|
0
|
|
|
|
|
0
|
|
274
|
0
|
|
|
|
|
0
|
my @required_files; |
275
|
0
|
|
|
|
|
0
|
foreach my $block_item (@block_items) { |
276
|
0
|
|
|
|
|
0
|
my $block = $self->_load_block($block_item); |
277
|
|
|
|
|
|
|
push @required_files, |
278
|
|
|
|
|
|
|
map { |
279
|
0
|
|
|
|
|
0
|
$block_item->{block_name} . "\t" . $_; |
|
0
|
|
|
|
|
0
|
|
280
|
|
|
|
|
|
|
} $block->get_required_share_files(); |
281
|
|
|
|
|
|
|
} |
282
|
0
|
|
|
|
|
0
|
return @required_files; |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
sub _add_quotes { # adding quotes only if param. value contains a space |
286
|
2
|
|
|
2
|
|
6
|
my ($block_parameter) = @_; |
287
|
2
|
|
|
|
|
13
|
my ( $name, $value ) = split /=/, $block_parameter, 2; |
288
|
2
|
50
|
|
|
|
13
|
if ( $value =~ /\s/ ) { |
289
|
0
|
|
|
|
|
0
|
my $res_string = "$name="; |
290
|
|
|
|
|
|
|
|
291
|
0
|
0
|
0
|
|
|
0
|
if ( $value =~ /'/ && $value !~ /"/ ) { |
292
|
0
|
|
|
|
|
0
|
$res_string .= '"' . $value . '"'; |
293
|
|
|
|
|
|
|
} else { |
294
|
0
|
|
|
|
|
0
|
$value =~ s/'/\\'/g; |
295
|
0
|
|
|
|
|
0
|
$res_string .= "'" . $value . "'"; |
296
|
|
|
|
|
|
|
} |
297
|
0
|
|
|
|
|
0
|
return $res_string; |
298
|
|
|
|
|
|
|
} |
299
|
2
|
|
|
|
|
11
|
return $block_parameter; |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
sub load_blocks { |
303
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
304
|
0
|
|
|
|
|
0
|
$self->loaded_blocks; #just access lazy attribute |
305
|
0
|
|
|
|
|
0
|
return; |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
sub init { |
309
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
310
|
0
|
|
|
|
|
0
|
$self->load_blocks(); |
311
|
0
|
|
|
|
|
0
|
return; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
sub _load_block { |
315
|
15
|
|
|
15
|
|
41
|
my ( $self, $block_item ) = @_; |
316
|
15
|
|
|
|
|
41
|
my $block_name = $block_item->{block_name}; |
317
|
15
|
|
|
|
|
36
|
my $new_block; |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
# Initialize with global (scenario) parameters |
320
|
15
|
|
|
|
|
27
|
my %params = ( %{ $self->_global_params }, scenario => $self ); |
|
15
|
|
|
|
|
507
|
|
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
# which can be overriden by (local) block parameters. |
323
|
15
|
|
|
|
|
31
|
foreach my $param ( @{ $block_item->{block_parameters} } ) { |
|
15
|
|
|
|
|
46
|
|
324
|
8
|
|
|
|
|
47
|
my ( $name, $value ) = split /=/, $param, 2; |
325
|
8
|
|
|
|
|
30
|
$params{$name} = $value; |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
|
328
|
15
|
100
|
|
1
|
|
1497
|
eval "use $block_name; 1;" or log_fatal "Can't use block $block_name !\n$@\n"; |
|
1
|
|
|
1
|
|
604
|
|
|
1
|
|
|
1
|
|
4
|
|
|
1
|
|
|
1
|
|
22
|
|
|
1
|
|
|
1
|
|
594
|
|
|
0
|
|
|
1
|
|
0
|
|
|
0
|
|
|
1
|
|
0
|
|
|
1
|
|
|
1
|
|
10
|
|
|
1
|
|
|
1
|
|
2
|
|
|
1
|
|
|
1
|
|
23
|
|
|
1
|
|
|
1
|
|
47
|
|
|
0
|
|
|
1
|
|
0
|
|
|
0
|
|
|
1
|
|
0
|
|
|
1
|
|
|
1
|
|
10
|
|
|
1
|
|
|
1
|
|
3
|
|
|
1
|
|
|
|
|
22
|
|
|
1
|
|
|
|
|
49
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
249
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
9
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
22
|
|
|
1
|
|
|
|
|
33
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
10
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
22
|
|
|
1
|
|
|
|
|
32
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
11
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
22
|
|
|
1
|
|
|
|
|
36
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
9
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
24
|
|
|
1
|
|
|
|
|
33
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
329
|
7
|
50
|
|
|
|
34
|
eval { |
330
|
7
|
|
|
|
|
54
|
$new_block = $block_name->new( \%params ); |
331
|
7
|
|
|
|
|
87
|
1; |
332
|
|
|
|
|
|
|
} or log_fatal "Treex::Core::Scenario->new: error when initializing block $block_name\n\nEVAL ERROR:\t$@"; |
333
|
|
|
|
|
|
|
|
334
|
7
|
0
|
33
|
|
|
196
|
if ( $self->cache && $params{'use_cache'} ) { |
335
|
0
|
|
|
|
|
0
|
$new_block = Treex::Core::CacheBlock->new( { block => $new_block, cache => $self->cache } ); |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
|
338
|
7
|
|
|
|
|
34
|
return $new_block; |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
sub run { |
342
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
343
|
0
|
|
|
|
|
0
|
my $number_of_blocks = @{ $self->loaded_blocks }; |
|
0
|
|
|
|
|
0
|
|
344
|
0
|
0
|
|
|
|
0
|
log_fatal('No DocumentReader supplied') if !$self->_has_document_reader; |
345
|
0
|
|
|
|
|
0
|
my $reader = $self->document_reader; |
346
|
0
|
|
0
|
|
|
0
|
my $number_of_documents = $reader->number_of_documents_per_this_job() || '?'; |
347
|
0
|
|
|
|
|
0
|
my $document_number = 0; |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
#if ( $self->cache ) { |
350
|
|
|
|
|
|
|
# $document_number = $self->_run_with_cache( $reader, $number_of_blocks, $number_of_documents ); |
351
|
|
|
|
|
|
|
#} |
352
|
|
|
|
|
|
|
#else { |
353
|
0
|
|
|
|
|
0
|
$document_number = $self->_run_without_cache( $reader, $number_of_blocks, $number_of_documents ); |
354
|
|
|
|
|
|
|
#} |
355
|
|
|
|
|
|
|
|
356
|
0
|
0
|
|
|
|
0
|
log_info "Processed $document_number document" |
357
|
|
|
|
|
|
|
. ( $document_number == 1 ? '' : 's' ); |
358
|
0
|
|
|
|
|
0
|
return 1; |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
sub _run_with_cache { |
362
|
|
|
|
|
|
|
|
363
|
0
|
|
|
0
|
|
0
|
my ( $self, $reader, $number_of_blocks, $number_of_documents ) = @_; |
364
|
0
|
|
|
|
|
0
|
my $document_number = 0; |
365
|
|
|
|
|
|
|
|
366
|
0
|
|
|
|
|
0
|
while ( my $document = $reader->next_document_for_this_job() ) { |
367
|
0
|
|
|
|
|
0
|
$document_number++; |
368
|
0
|
|
|
|
|
0
|
my $doc_name = $document->full_filename; |
369
|
0
|
|
|
|
|
0
|
my $doc_from = $document->loaded_from; |
370
|
0
|
|
|
|
|
0
|
log_info "Document $document_number/$number_of_documents $doc_name loaded from $doc_from"; |
371
|
0
|
|
|
|
|
0
|
my $block_number = 0; |
372
|
0
|
|
|
|
|
0
|
my $skip_to = 0; |
373
|
0
|
|
|
|
|
0
|
my $process = 0; |
374
|
0
|
|
|
|
|
0
|
my $skip_from = 0; |
375
|
0
|
|
|
|
|
0
|
my $from_hash = ""; |
376
|
0
|
|
|
|
|
0
|
my $document_last_hash = ""; |
377
|
0
|
|
|
|
|
0
|
foreach my $block ( @{ $self->loaded_blocks } ) { |
|
0
|
|
|
|
|
0
|
|
378
|
0
|
|
|
|
|
0
|
$block_number++; |
379
|
0
|
|
|
|
|
0
|
$process = 1; |
380
|
0
|
0
|
|
|
|
0
|
if ( $block_number < $skip_to ) { |
|
|
0
|
|
|
|
|
|
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
# we know that there are identical, so we can skip them |
383
|
0
|
|
|
|
|
0
|
log_info "Skipping block $block_number/$number_of_blocks " . ref($block); |
384
|
0
|
|
|
|
|
0
|
$process = 0; |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
elsif ( $block_number == $skip_to ) { |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
# this is border Cache block -> we have to check whether next sequence is also same |
389
|
0
|
|
|
|
|
0
|
$skip_from = $block_number + 1; |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
# following sequence is same => we can continue with skipping |
392
|
0
|
0
|
0
|
|
|
0
|
if ($sequence{$skip_from}{'to'} |
393
|
|
|
|
|
|
|
&& |
394
|
|
|
|
|
|
|
$self->_is_known_sequence( $sequence{$skip_from}{'hash'}, $document->get_hash() ) |
395
|
|
|
|
|
|
|
) |
396
|
|
|
|
|
|
|
{ |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
#log_warn("\tskip from " . $sequence{$skip_from}{from} . ' to ' . $sequence{$skip_from}{to}); |
399
|
0
|
|
|
|
|
0
|
$skip_to = $sequence{$skip_from}{to} - 1; |
400
|
0
|
|
|
|
|
0
|
$from_hash = $document->get_hash(); |
401
|
0
|
|
|
|
|
0
|
$process = 0; |
402
|
|
|
|
|
|
|
} |
403
|
|
|
|
|
|
|
else { |
404
|
0
|
|
|
|
|
0
|
$document_last_hash = $document->get_hash(); |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
#$document->set_hash(md5_hex($document->get_hash() . $block->get_hash())); |
407
|
0
|
|
|
|
|
0
|
my $full_hash = $document->get_hash(); |
408
|
0
|
|
|
|
|
0
|
$document = $self->cache->get($full_hash); |
409
|
|
|
|
|
|
|
|
410
|
0
|
0
|
|
|
|
0
|
if ( !$document ) { |
411
|
0
|
|
|
|
|
0
|
log_fatal("Document - $full_hash is missing!!!"); |
412
|
|
|
|
|
|
|
} |
413
|
0
|
|
|
|
|
0
|
$process = 2; |
414
|
|
|
|
|
|
|
} |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
|
417
|
0
|
0
|
|
|
|
0
|
if ( $process == 1 ) { |
418
|
0
|
|
|
|
|
0
|
log_info "Applying block $block_number/$number_of_blocks " . ref($block); |
419
|
|
|
|
|
|
|
|
420
|
0
|
0
|
|
|
|
0
|
$block->process_start if ( !$block->is_started ); |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
#log_info("Document-hash: " . $document->get_hash()); |
423
|
0
|
|
|
|
|
0
|
$skip_from = $block_number + 1; |
424
|
0
|
|
|
|
|
0
|
my $status = $block->process_document($document); |
425
|
0
|
0
|
0
|
|
|
0
|
if (defined($status) |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
426
|
|
|
|
|
|
|
&& |
427
|
|
|
|
|
|
|
$status == $Treex::Core::Block::DOCUMENT_FROM_CACHE && |
428
|
|
|
|
|
|
|
$sequence{$skip_from}{'to'} && |
429
|
|
|
|
|
|
|
$self->_is_known_sequence( $sequence{$skip_from}{'hash'}, $document->get_hash() ) |
430
|
|
|
|
|
|
|
) |
431
|
|
|
|
|
|
|
{ |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
#log_warn("\tskip from " . $sequence{$skip_from}{from} . ' to ' . $sequence{$skip_from}{to}); |
434
|
0
|
|
|
|
|
0
|
$skip_to = $sequence{$skip_from}{to} - 1; |
435
|
0
|
|
|
|
|
0
|
$skip_from = $block_number + 1; |
436
|
0
|
|
|
|
|
0
|
$from_hash = $document->get_hash(); |
437
|
|
|
|
|
|
|
} |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
|
440
|
0
|
|
|
|
|
0
|
$document_last_hash = $document->get_hash(); |
441
|
0
|
|
|
|
|
0
|
$document->set_hash( md5_hex( $document->get_hash() . $block->get_hash() ) ); |
442
|
|
|
|
|
|
|
|
443
|
0
|
0
|
|
|
|
0
|
if ( ref($block) eq "Treex::Core::CacheBlock" ) { |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
# cache block => mark this path as known |
446
|
0
|
|
|
|
|
0
|
my $id = $block_number + 1; |
447
|
0
|
|
|
|
|
0
|
my $from = $sequence{$id}{'_from'}; |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
# the first sequence has no document |
450
|
0
|
0
|
|
|
|
0
|
if ( defined( $sequence{$from}{'document'} ) ) { |
451
|
0
|
|
|
|
|
0
|
$self->_set_known_sequence( $sequence{$from}{'hash'}, $sequence{$from}{'document'} ); |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
|
454
|
0
|
|
|
|
|
0
|
$sequence{$id}{'document'} = $document_last_hash; |
455
|
|
|
|
|
|
|
} |
456
|
|
|
|
|
|
|
} |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
# this actually marks the document as successfully done in parallel processing (if this line |
459
|
|
|
|
|
|
|
# does not appear in the output, the parallel process will fail -- it must appear at any errorlevel, |
460
|
|
|
|
|
|
|
# therefore not using log_info or similiar) |
461
|
0
|
0
|
|
|
|
0
|
if ( $self->document_reader->jobindex ) { |
462
|
0
|
|
|
|
|
0
|
print STDERR "Document $document_number/$number_of_documents $doc_name: [success].\n"; |
463
|
|
|
|
|
|
|
} |
464
|
|
|
|
|
|
|
} |
465
|
|
|
|
|
|
|
|
466
|
0
|
|
|
|
|
0
|
log_info "Applying process_end"; |
467
|
|
|
|
|
|
|
|
468
|
0
|
|
|
|
|
0
|
foreach my $block ( @{ $self->loaded_blocks } ) { |
|
0
|
|
|
|
|
0
|
|
469
|
0
|
0
|
|
|
|
0
|
$block->process_end() if ( $block->is_started ); |
470
|
|
|
|
|
|
|
} |
471
|
|
|
|
|
|
|
|
472
|
0
|
|
|
|
|
0
|
return $document_number; |
473
|
|
|
|
|
|
|
} |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
sub _is_known_sequence { |
476
|
0
|
|
|
0
|
|
0
|
my ( $self, $sequence_hash, $document_hash ) = @_; |
477
|
0
|
|
|
|
|
0
|
my $hash = md5_hex( $sequence_hash, $document_hash ); |
478
|
0
|
|
|
|
|
0
|
return $self->cache->get($hash); |
479
|
|
|
|
|
|
|
} |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
sub _set_known_sequence { |
482
|
0
|
|
|
0
|
|
0
|
my ( $self, $sequence_hash, $document_hash ) = @_; |
483
|
0
|
|
|
|
|
0
|
my $hash = md5_hex( $sequence_hash, $document_hash ); |
484
|
0
|
|
|
|
|
0
|
$self->cache->set( $hash, 1 ); |
485
|
|
|
|
|
|
|
|
486
|
0
|
|
|
|
|
0
|
return; |
487
|
|
|
|
|
|
|
} |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
sub _run_without_cache { |
490
|
|
|
|
|
|
|
|
491
|
0
|
|
|
0
|
|
0
|
my ( $self, $reader, $number_of_blocks, $number_of_documents ) = @_; |
492
|
0
|
|
|
|
|
0
|
my $document_number = 0; |
493
|
|
|
|
|
|
|
|
494
|
0
|
|
|
|
|
0
|
$self->start(); |
495
|
|
|
|
|
|
|
|
496
|
0
|
|
|
|
|
0
|
while ( my $document = $reader->next_document_for_this_job() ) { |
497
|
0
|
|
|
|
|
0
|
$document_number++; |
498
|
0
|
|
|
|
|
0
|
my $doc_name = $document->full_filename; |
499
|
0
|
|
|
|
|
0
|
my $doc_from = $document->loaded_from; |
500
|
0
|
|
|
|
|
0
|
log_info "Document $document_number/$number_of_documents $doc_name loaded from $doc_from"; |
501
|
0
|
|
|
|
|
0
|
my $block_number = 0; |
502
|
0
|
|
|
|
|
0
|
foreach my $block ( @{ $self->loaded_blocks } ) { |
|
0
|
|
|
|
|
0
|
|
503
|
0
|
|
|
|
|
0
|
$block_number++; |
504
|
0
|
|
|
|
|
0
|
log_info "Applying block $block_number/$number_of_blocks " . ref($block); |
505
|
0
|
|
|
|
|
0
|
$block->process_document($document); |
506
|
|
|
|
|
|
|
} |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
# this actually marks the document as successfully done in parallel processing (if this line |
509
|
|
|
|
|
|
|
# does not appear in the output, the parallel process will fail -- it must appear at any errorlevel, |
510
|
|
|
|
|
|
|
# therefore not using log_info or similiar) |
511
|
0
|
0
|
|
|
|
0
|
if ( $self->document_reader->jobindex ) { |
512
|
0
|
|
|
|
|
0
|
print STDERR "Document $document_number/$number_of_documents $doc_name: [success].\n"; |
513
|
|
|
|
|
|
|
} |
514
|
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
|
|
516
|
0
|
|
|
|
|
0
|
$self->end(); |
517
|
|
|
|
|
|
|
|
518
|
0
|
0
|
|
|
|
0
|
log_info "Processed $document_number document" |
519
|
|
|
|
|
|
|
. ( $document_number == 1 ? '' : 's' ); |
520
|
|
|
|
|
|
|
|
521
|
0
|
|
|
|
|
0
|
return $document_number; |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
# Apply process_start to all blocks for which this has not yet been applied |
525
|
|
|
|
|
|
|
sub start { |
526
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
527
|
|
|
|
|
|
|
|
528
|
0
|
|
|
|
|
0
|
log_info "Applying process_start"; |
529
|
0
|
|
|
|
|
0
|
foreach my $block ( @{ $self->loaded_blocks } ) { |
|
0
|
|
|
|
|
0
|
|
530
|
0
|
0
|
|
|
|
0
|
$block->process_start() if ( !$block->is_started ); |
531
|
|
|
|
|
|
|
} |
532
|
|
|
|
|
|
|
|
533
|
0
|
|
|
|
|
0
|
return; |
534
|
|
|
|
|
|
|
} |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
# Apply the scenario to documents given in parameter |
537
|
|
|
|
|
|
|
sub apply_to_documents { |
538
|
|
|
|
|
|
|
|
539
|
0
|
|
|
0
|
1
|
0
|
my ( $self, @documents ) = @_; |
540
|
|
|
|
|
|
|
|
541
|
0
|
|
|
|
|
0
|
my $number_of_blocks = @{ $self->loaded_blocks }; |
|
0
|
|
|
|
|
0
|
|
542
|
0
|
|
|
|
|
0
|
my $block_number = 0; |
543
|
|
|
|
|
|
|
|
544
|
0
|
|
|
|
|
0
|
foreach my $document (@documents){ |
545
|
0
|
|
|
|
|
0
|
log_info "Processing document" . $document->full_filename; |
546
|
|
|
|
|
|
|
|
547
|
0
|
|
|
|
|
0
|
foreach my $block ( @{ $self->loaded_blocks } ) { |
|
0
|
|
|
|
|
0
|
|
548
|
0
|
|
|
|
|
0
|
$block_number++; |
549
|
0
|
|
|
|
|
0
|
log_info "Applying block $block_number/$number_of_blocks " . ref($block); |
550
|
0
|
|
|
|
|
0
|
$block->process_document($document); |
551
|
|
|
|
|
|
|
} |
552
|
|
|
|
|
|
|
} |
553
|
|
|
|
|
|
|
|
554
|
0
|
|
|
|
|
0
|
return; |
555
|
|
|
|
|
|
|
} |
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
# Apply process_end to all blocks for which this has not yet been applied |
558
|
|
|
|
|
|
|
sub end { |
559
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
560
|
|
|
|
|
|
|
|
561
|
0
|
|
|
|
|
0
|
log_info "Applying process_end"; |
562
|
0
|
|
|
|
|
0
|
foreach my $block ( @{ $self->loaded_blocks } ) { |
|
0
|
|
|
|
|
0
|
|
563
|
0
|
0
|
|
|
|
0
|
$block->process_end() if ( $block->is_started ); |
564
|
|
|
|
|
|
|
} |
565
|
|
|
|
|
|
|
|
566
|
0
|
|
|
|
|
0
|
return; |
567
|
|
|
|
|
|
|
} |
568
|
|
|
|
|
|
|
|
569
|
3
|
|
|
3
|
|
1300
|
use Module::Reload; |
|
3
|
|
|
|
|
1111
|
|
|
3
|
|
|
|
|
283
|
|
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
sub restart { |
572
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
573
|
0
|
|
|
|
|
0
|
my $changed_modules = Module::Reload->check; |
574
|
0
|
|
|
|
|
0
|
log_info "Number of reloaded modules = $changed_modules"; |
575
|
0
|
|
|
|
|
0
|
log_info "reseting the document reader\n"; |
576
|
0
|
|
|
|
|
0
|
$self->document_reader->restart(); |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
# TODO rebuild the reloaded blocks |
579
|
0
|
|
|
|
|
0
|
return; |
580
|
|
|
|
|
|
|
} |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
1; |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
__END__ |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
=for Pod::Coverage BUILD |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
=encoding utf-8 |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
=head1 NAME |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
Treex::Core::Scenario - a larger Treex processing unit, composed of blocks |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
=head1 VERSION |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
version 2.20160630 |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
=head1 SYNOPSIS |
599
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
use Treex::Core; |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
my $doc1, $doc2; |
603
|
|
|
|
|
|
|
my $scenario = Treex::Core::Scenario->new(from_file => 'myscenario.scen' ); |
604
|
|
|
|
|
|
|
$scenario->run; |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
$scenario = Treex::Core::Scenario->new(from_string => 'W2A::EN::Segment language=en'); |
608
|
|
|
|
|
|
|
$scenario->start(); |
609
|
|
|
|
|
|
|
$scenario->apply_to_documents($doc1, $doc2); |
610
|
|
|
|
|
|
|
$scenario->end(); |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
=head1 DESCRIPTION |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
A Treex scenario consists of a sequence of (possibly parametrized) Treex blocks. |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
Scenarios can be described by a simple textual format, which is either passed |
619
|
|
|
|
|
|
|
directly to the scenario construction, or is contained in a text file whose |
620
|
|
|
|
|
|
|
name is passed. |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
The string description of scenarios looks as follows. |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
1) It contains a list of block names from which their 'C<Treex::Block::>' |
625
|
|
|
|
|
|
|
prefixes were removed. |
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
2) The block names are separated by one or more white spaces. |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
3) The block names are listed in the same order in which they should be |
630
|
|
|
|
|
|
|
applied on data. |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
4) For each block, there can be one or more parameters specified, using the |
633
|
|
|
|
|
|
|
C<attribute=value> form. |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
5) Comments start with 'C<#>' and end with the nearest newline character. |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
Scenario example: |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
# morphological analysis of an English text |
641
|
|
|
|
|
|
|
Util::SetGlobal language=en selector=src |
642
|
|
|
|
|
|
|
Read::Text |
643
|
|
|
|
|
|
|
W2A::ResegmentSentences |
644
|
|
|
|
|
|
|
W2A::EN::Tokenize |
645
|
|
|
|
|
|
|
W2A::EN::NormalizeForms |
646
|
|
|
|
|
|
|
W2A::EN::FixTokenization |
647
|
|
|
|
|
|
|
W2A::EN::TagMorce |
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
=head1 METHODS |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
=head2 Constructor |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
=over 4 |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
=item my $scenario = Treex::Core::Scenario->new(from_string => 'W2A::Tokenize language=en W2A::Lemmatize' ); |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
Constructor parameter C<from_string> specifies the names of blocks which are |
659
|
|
|
|
|
|
|
to be executed (in the specified order) when the scenario is applied on a |
660
|
|
|
|
|
|
|
L<Treex::Core::Document> object. |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
=item my $scenario = Treex::Core::Scenario->new(from_file => 'myscenario.scen' ); |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
The scenario description is loaded from the file. |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
=back |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
=head2 Running the scenario |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
=over 4 |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
=item $scenario->run(); |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
Run the scenario. |
676
|
|
|
|
|
|
|
One of the blocks (usually the first one) must be the document reader (see |
677
|
|
|
|
|
|
|
L<Treex::Core::DocumentReader>) that produces the |
678
|
|
|
|
|
|
|
documents on which this scenario is applied. |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
=item $scenario->apply_to_documents($treex_doc); |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
Apply this scenario to a L<Treex::Core::Document> instance obtained from elsewhere. |
683
|
|
|
|
|
|
|
Please note that C<start()> must be called before the first call to this method and C<end()> |
684
|
|
|
|
|
|
|
after the last call to this method. |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
The scenario does not need to contain a document reader if documents are given |
687
|
|
|
|
|
|
|
explicitly. |
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
=item $scenario->start(); |
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
Apply C<process_start()> to all blocks in the scenario. |
692
|
|
|
|
|
|
|
This is called automatically by C<run()>, but must be called before C<apply_to_documents()>. |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
=item $scenario->end(); |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
Apply C<process_end()> to all blocks in the scenario. |
697
|
|
|
|
|
|
|
This is called automatically by C<run()>, but must be called after calls to C<apply_to_documents()>. |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
=back |
701
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
=head2 Internal methods for loading scenarios |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
=over 4 |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
=item _load_scenario_file($filename) |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
loads a scenario description from a file |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
=item parse_scenario_string |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
parses a textual description of a scenario |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
=item construct_scenario_string |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
constructs a scenario textual description from an existing scenario instance |
717
|
|
|
|
|
|
|
accepts named parameter multiline - when set, blocks are separated by newline instead of space |
718
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
=item load_blocks |
720
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
use blocks and call their constructors |
722
|
|
|
|
|
|
|
can be used for preloading blocks for e.g. server applications |
723
|
|
|
|
|
|
|
when running scenario blocks are loaded automatically |
724
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
=item init |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
do all initialization so after this method scenario is ready to run |
728
|
|
|
|
|
|
|
currently just load blocks |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
=item restart |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
resets the document reader, in future it will rebuild reloaded blocks |
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
=back |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
=head1 SEE ALSO |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
L<Treex::Core::Block> |
740
|
|
|
|
|
|
|
L<Treex::Core> |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
=head1 AUTHORS |
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
ZdenÄk Žabokrtský <zabokrtsky@ufal.mff.cuni.cz> |
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
Martin Popel <popel@ufal.mff.cuni.cz> |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
David MareÄek <marecek@ufal.mff.cuni.cz> |
749
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
Tomáš Kraut <kraut@ufal.mff.cuni.cz> |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
Martin Majliš <majlis@ufal.mff.cuni.cz> |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
OndÅej DuÅ¡ek <odusek@ufal.mff.cuni.cz> |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
Copyright © 2011-2012 by Institute of Formal and Applied Linguistics, Charles University in Prague |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. |