line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Treex::Core::Block; |
2
|
|
|
|
|
|
|
$Treex::Core::Block::VERSION = '2.20160630'; |
3
|
2
|
|
|
2
|
|
15647
|
use Moose; |
|
2
|
|
|
|
|
430652
|
|
|
2
|
|
|
|
|
16
|
|
4
|
2
|
|
|
2
|
|
11568
|
use Treex::Core::Common; |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
13
|
|
5
|
2
|
|
|
2
|
|
11423
|
use Treex::Core::Resource; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
98
|
|
6
|
2
|
|
|
2
|
|
12
|
use Digest::MD5 qw(md5_hex); |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
94
|
|
7
|
2
|
|
|
2
|
|
13
|
use Storable; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
92
|
|
8
|
2
|
|
|
2
|
|
11
|
use Time::HiRes; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
18
|
|
9
|
2
|
|
|
2
|
|
1173
|
use App::whichpm 'which_pm'; |
|
2
|
|
|
|
|
917
|
|
|
2
|
|
|
|
|
97
|
|
10
|
2
|
|
|
2
|
|
14
|
use Readonly; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
78
|
|
11
|
2
|
|
|
2
|
|
11
|
use List::MoreUtils qw(uniq); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
22
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
has selector => ( is => 'ro', isa => 'Str', default => '' ); |
14
|
|
|
|
|
|
|
has language => ( is => 'ro', isa => 'Str', default => 'all' ); |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
has scenario => ( |
17
|
|
|
|
|
|
|
is => 'ro', |
18
|
|
|
|
|
|
|
isa => 'Treex::Core::Scenario', |
19
|
|
|
|
|
|
|
writer => '_set_scenario', |
20
|
|
|
|
|
|
|
weak_ref => 1, |
21
|
|
|
|
|
|
|
); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
has select_bundles => ( |
24
|
|
|
|
|
|
|
is => 'ro', |
25
|
|
|
|
|
|
|
default => 0, |
26
|
|
|
|
|
|
|
documentation => 'apply process_bundle only on the specified bundles,' |
27
|
|
|
|
|
|
|
. ' e.g. "1-4,6,8-12". The default is 0 which means all bundles. Useful for debugging.', |
28
|
|
|
|
|
|
|
); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
has if_missing_zone => ( |
31
|
|
|
|
|
|
|
is => 'ro', |
32
|
|
|
|
|
|
|
isa => enum( [qw(fatal warn ignore create)] ), |
33
|
|
|
|
|
|
|
default => 'fatal', |
34
|
|
|
|
|
|
|
documentation => 'What to do if process_zone is to be called on a zone' |
35
|
|
|
|
|
|
|
. ' (specified by parameters language and selector) that is missing in a given bundle?', |
36
|
|
|
|
|
|
|
); |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
has if_missing_tree => ( |
39
|
|
|
|
|
|
|
is => 'ro', |
40
|
|
|
|
|
|
|
isa => enum( [qw(fatal warn ignore create)] ), |
41
|
|
|
|
|
|
|
default => 'fatal', |
42
|
|
|
|
|
|
|
documentation => 'What to do if process_[atnp]tree is to be called on a tree' |
43
|
|
|
|
|
|
|
. ' that is missing in a given zone?', |
44
|
|
|
|
|
|
|
); |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
has if_missing_bundles => ( |
47
|
|
|
|
|
|
|
is => 'ro', |
48
|
|
|
|
|
|
|
isa => enum( [qw(fatal warn ignore)] ), |
49
|
|
|
|
|
|
|
default => 'fatal', |
50
|
|
|
|
|
|
|
documentation => 'What to do if process_document is to be called on a document' |
51
|
|
|
|
|
|
|
. ' with no bundles?', |
52
|
|
|
|
|
|
|
); |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
has report_progress => ( |
57
|
|
|
|
|
|
|
is => 'ro', |
58
|
|
|
|
|
|
|
isa => 'Str', |
59
|
|
|
|
|
|
|
default => 0, |
60
|
|
|
|
|
|
|
documentation => 'Report which bundle (TODO: zone,tree,node) is being processed via log_info. Useful for debugging.', |
61
|
|
|
|
|
|
|
); |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
has [qw(_is_bundle_selected _is_language_selected _is_selector_selected)] => ( is => 'rw' ); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
has _hash => ( is => 'rw', isa => 'Str' ); |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
has is_started => ( is => 'ro', isa => 'Bool', writer => '_set_is_started', default => 0 ); |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Readonly our $DOCUMENT_PROCESSED => 1; |
70
|
|
|
|
|
|
|
Readonly our $DOCUMENT_FROM_CACHE => 2; |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
# For load_other_block() and get_or_load_other_block() |
74
|
|
|
|
|
|
|
# TODO this could also be in Scenario instead of Block... |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# new other block of same name replaces old one here |
77
|
|
|
|
|
|
|
has _loaded_other_blocks => ( is => 'rw', isa => 'HashRef', default => sub { {} } ); |
78
|
|
|
|
|
|
|
# all loaded other blocks, no replacing |
79
|
|
|
|
|
|
|
has _loaded_other_blocks_array => ( is => 'rw', isa => 'ArrayRef', default => sub { [] } ); |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
sub zone_label { |
82
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
83
|
0
|
0
|
|
|
|
0
|
my $label = $self->language or return; |
84
|
0
|
0
|
0
|
|
|
0
|
if ( defined $self->selector && $self->selector ne '' ) { |
85
|
0
|
|
|
|
|
0
|
$label .= '_' . $self->selector; |
86
|
|
|
|
|
|
|
} |
87
|
0
|
|
|
|
|
0
|
return $label; |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# TODO |
91
|
|
|
|
|
|
|
# has robust => ( is=> 'ro', isa=>'Bool', default=>0, |
92
|
|
|
|
|
|
|
# documentation=>'no fatal errors in robust mode'); |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
sub BUILD { |
95
|
8
|
|
|
8
|
0
|
16885
|
my $self = shift; |
96
|
|
|
|
|
|
|
|
97
|
8
|
50
|
|
|
|
273
|
if ( $self->select_bundles ) { |
98
|
0
|
0
|
|
|
|
0
|
log_fatal 'select_bundles=' . $self->select_bundles . ' does not match /^\d+(-\d+)?(,\d+(-\d+)?)*$/' |
99
|
|
|
|
|
|
|
if $self->select_bundles !~ /^\d+(-\d+)?(,\d+(-\d+)?)*$/; |
100
|
0
|
|
|
|
|
0
|
my %selected; |
101
|
0
|
|
|
|
|
0
|
foreach my $span ( split /,/, $self->select_bundles ) { |
102
|
0
|
0
|
|
|
|
0
|
if ( $span =~ /(\d+)-(\d+)/ ) { |
103
|
0
|
|
|
|
|
0
|
@selected{ $1 .. $2 } = ( $1 .. $2 ); |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
else { |
106
|
0
|
|
|
|
|
0
|
$selected{$span} = 1; |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
} |
109
|
0
|
|
|
|
|
0
|
$self->_set_is_bundle_selected( \%selected ); |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
|
112
|
8
|
100
|
|
|
|
245
|
if ( $self->language ne 'all' ) { |
113
|
7
|
|
|
|
|
163
|
my @codes = split /,/, $self->language; |
114
|
7
|
|
|
|
|
18
|
my %selected; |
115
|
7
|
|
|
|
|
18
|
for my $code (@codes) { |
116
|
7
|
50
|
|
|
|
37
|
log_fatal "'$code' is not a valid ISO 639 language code" |
117
|
|
|
|
|
|
|
if !Treex::Core::Types::is_lang_code($code); |
118
|
7
|
|
|
|
|
22
|
$selected{$code} = 1; |
119
|
|
|
|
|
|
|
} |
120
|
7
|
|
|
|
|
215
|
$self->_set_is_language_selected( \%selected ); |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
|
123
|
8
|
50
|
|
|
|
186
|
if ( $self->selector ne 'all' ) { |
124
|
8
|
100
|
|
|
|
171
|
if ( $self->selector eq '' ) { |
125
|
7
|
|
|
|
|
219
|
$self->_set_is_selector_selected( { q{} => 1 } ); |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
else { |
128
|
1
|
|
|
|
|
22
|
my @selectors = split /,/, $self->selector; |
129
|
1
|
|
|
|
|
3
|
my %selected; |
130
|
1
|
|
|
|
|
3
|
for my $selector (@selectors) { |
131
|
1
|
50
|
|
|
|
6
|
log_fatal "'$selector' is not a valid selector name" |
132
|
|
|
|
|
|
|
if $selector !~ /^[a-z\d]*$/i; |
133
|
1
|
|
|
|
|
4
|
$selected{$selector} = 1; |
134
|
|
|
|
|
|
|
} |
135
|
1
|
|
|
|
|
30
|
$self->_set_is_selector_selected( \%selected ); |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
8
|
|
|
|
|
24
|
return; |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
sub _compute_hash { |
143
|
0
|
|
|
0
|
|
|
my $self = shift; |
144
|
|
|
|
|
|
|
|
145
|
0
|
|
|
|
|
|
my $md5 = Digest::MD5->new(); |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# compute block parameters hash |
148
|
0
|
|
|
|
|
|
my $params_str = ""; |
149
|
|
|
|
|
|
|
map { |
150
|
0
|
|
|
|
|
|
$params_str .= $_ . "=" . $self->{$_}; |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
# log_warn("\t\t" . $_ . "=" . $self->{$_} . " - " . ref($self->{$_})); |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
sort # in canonical form |
155
|
0
|
|
|
|
|
|
grep { !ref( $self->{$_} ) } # no references |
156
|
0
|
|
|
|
|
|
grep { defined( $self->{$_} ) } # value has to be defined |
157
|
0
|
|
|
|
|
|
grep { !/(scenario|block)/ } |
158
|
0
|
|
|
|
|
|
keys %{$self}; |
|
0
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# Digest::MD5 cannot handle Unicode strings (it dies with "Wide character in subroutine entry") |
161
|
2
|
|
|
2
|
|
2815
|
use Encode; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
3983
|
|
162
|
0
|
|
|
|
|
|
$md5->add(Encode::encode_utf8($params_str)); |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
# compute block source code hash |
165
|
0
|
|
|
|
|
|
my ( $block_filename, $block_version ) = which_pm( $self->get_block_name() ); |
166
|
0
|
0
|
|
|
|
|
open( my $block_fh, "<", $block_filename ) or log_fatal("Can't open '$block_filename': $!"); |
167
|
0
|
|
|
|
|
|
binmode($block_fh); |
168
|
0
|
|
|
|
|
|
$md5->addfile($block_fh); |
169
|
0
|
|
|
|
|
|
close($block_fh); |
170
|
|
|
|
|
|
|
|
171
|
0
|
|
|
|
|
|
$self->_set_hash( $md5->hexdigest ); |
172
|
|
|
|
|
|
|
|
173
|
0
|
|
|
|
|
|
return; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
sub get_hash { |
177
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
178
|
0
|
0
|
|
|
|
|
if (!$self->_hash){ |
179
|
0
|
|
|
|
|
|
$self->_compute_hash(); |
180
|
|
|
|
|
|
|
} |
181
|
0
|
|
|
|
|
|
return $self->_hash; |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
sub require_files_from_share { |
185
|
0
|
|
|
0
|
1
|
|
my ( $self, @rel_paths ) = @_; |
186
|
0
|
|
|
|
|
|
my $my_name = 'the block ' . $self->get_block_name(); |
187
|
|
|
|
|
|
|
return map { |
188
|
0
|
|
|
|
|
|
log_info $self->get_block_name() . " requires file " . $_; |
|
0
|
|
|
|
|
|
|
189
|
0
|
|
|
|
|
|
Treex::Core::Resource::require_file_from_share( $_, $my_name ) |
190
|
|
|
|
|
|
|
} @rel_paths; |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
sub get_required_share_files { |
194
|
0
|
|
|
0
|
1
|
|
my ($self) = @_; |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
# By default there are no required share files. |
197
|
|
|
|
|
|
|
# The purpose of this method is to be overriden if needed. |
198
|
0
|
|
|
|
|
|
return (); |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
sub process_document { |
202
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
203
|
0
|
|
|
|
|
|
my ($document) = pos_validated_list( |
204
|
|
|
|
|
|
|
\@_, |
205
|
|
|
|
|
|
|
{ isa => 'Treex::Core::Document' }, |
206
|
|
|
|
|
|
|
); |
207
|
|
|
|
|
|
|
|
208
|
0
|
0
|
0
|
|
|
|
if ( !$document->get_bundles() && $self->if_missing_bundles =~ /fatal|warn/){ |
209
|
0
|
|
|
|
|
|
my $message = "There are no bundles in the document and block " . $self->get_block_name() . |
210
|
|
|
|
|
|
|
" doesn't override the method process_document. You can use prepend 'Util::SetGlobal if_missing_bundles=ignore' to allow processing empty documents. "; |
211
|
0
|
0
|
|
|
|
|
log_fatal($message) if $self->if_missing_bundles eq 'fatal'; |
212
|
0
|
|
|
|
|
|
log_warn($message); |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
0
|
|
|
|
|
|
my $bundleNo = 1; |
216
|
0
|
|
|
|
|
|
foreach my $bundle ( $document->get_bundles() ) { |
217
|
0
|
0
|
0
|
|
|
|
if ( !$self->select_bundles || $self->_is_bundle_selected->{$bundleNo} ) { |
218
|
0
|
|
|
|
|
|
$self->process_bundle( $bundle, $bundleNo ); |
219
|
|
|
|
|
|
|
} |
220
|
0
|
|
|
|
|
|
$bundleNo++; |
221
|
|
|
|
|
|
|
} |
222
|
0
|
|
|
|
|
|
return 1; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
sub _apply_function_on_each_zone { |
226
|
0
|
|
|
0
|
|
|
my ($self, $doc, $function, @function_params) = @_; |
227
|
0
|
|
|
|
|
|
my %zones; |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
# When using "all", we must collect the zones used in the whole document. |
230
|
0
|
0
|
0
|
|
|
|
if ($self->language eq 'all' || $self->selector eq 'all'){ |
231
|
0
|
|
|
|
|
|
foreach my $bundle ($doc->get_bundles){ |
232
|
0
|
|
|
|
|
|
foreach my $zone ($bundle->get_all_zones()){ |
233
|
0
|
|
|
|
|
|
$zones{$zone->get_label()} = 1; |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
# Otherwise, we can make a Cartesian product of lang(uage)s and sel(ector)s |
238
|
|
|
|
|
|
|
else { |
239
|
0
|
|
|
|
|
|
foreach my $lang (keys %{$self->_is_language_selected}){ |
|
0
|
|
|
|
|
|
|
240
|
0
|
|
|
|
|
|
foreach my $sel (keys %{$self->_is_selector_selected}){ |
|
0
|
|
|
|
|
|
|
241
|
0
|
|
|
|
|
|
$zones{$lang . '_' . $sel} = 1; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
0
|
|
|
|
|
|
my $orig_language = $self->language; |
247
|
0
|
|
|
|
|
|
my $orig_selector = $self->selector; |
248
|
0
|
|
|
|
|
|
foreach my $label (keys %zones){ |
249
|
0
|
|
|
|
|
|
my ($lang, $sel) = split /_/, $label; |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
# pretend this block was called with only this one language and selector |
252
|
0
|
|
|
|
|
|
$self->{language} = $lang; |
253
|
0
|
|
|
|
|
|
$self->{selector} = $sel; |
254
|
0
|
|
|
|
|
|
$function->(@function_params); |
255
|
|
|
|
|
|
|
} |
256
|
0
|
|
|
|
|
|
$self->{language} = $orig_language; |
257
|
0
|
|
|
|
|
|
$self->{selector} = $orig_selector; |
258
|
0
|
|
|
|
|
|
return; |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
sub process_bundle { |
262
|
0
|
|
|
0
|
1
|
|
my ( $self, $bundle, $bundleNo ) = @_; |
263
|
0
|
0
|
|
|
|
|
if ($self->report_progress){ |
264
|
0
|
|
|
|
|
|
log_info "Processing bundle $bundleNo"; |
265
|
|
|
|
|
|
|
} |
266
|
|
|
|
|
|
|
|
267
|
0
|
|
|
|
|
|
my @zones = $bundle->get_all_zones(); |
268
|
|
|
|
|
|
|
|
269
|
0
|
0
|
|
|
|
|
if ($self->if_missing_zone eq 'create') { |
270
|
0
|
|
|
|
|
|
my (@langs, @sels); |
271
|
0
|
0
|
|
|
|
|
if ($self->language eq 'all') { |
272
|
0
|
|
|
|
|
|
@langs = uniq map{$_->language} @zones; |
|
0
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
} else { |
274
|
0
|
|
|
|
|
|
@langs = keys %{$self->_is_language_selected}; |
|
0
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
} |
276
|
0
|
0
|
|
|
|
|
if ($self->selector eq 'all') { |
277
|
0
|
|
|
|
|
|
@sels = uniq map{$_->selector} @zones; |
|
0
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
} else { |
279
|
0
|
|
|
|
|
|
@sels = keys %{$self->_is_selector_selected}; |
|
0
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
# Cartesian product of lang(uage)s and sel(ector)s |
283
|
0
|
|
|
|
|
|
@zones = map {my $l = $_; map{$bundle->get_or_create_zone($l, $_)} @sels} @langs; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
} else { |
285
|
0
|
|
|
|
|
|
@zones = $self->get_selected_zones(@zones); |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
|
288
|
0
|
0
|
0
|
|
|
|
if (!@zones && $self->if_missing_zone =~ /fatal|warn/) { |
289
|
0
|
|
|
|
|
|
my $message = "No zone (language=" |
290
|
|
|
|
|
|
|
. $self->language |
291
|
|
|
|
|
|
|
. ", selector=" |
292
|
|
|
|
|
|
|
. $self->selector |
293
|
|
|
|
|
|
|
. ") was found in a bundle and block " . $self->get_block_name() |
294
|
|
|
|
|
|
|
. " doesn't override the method process_bundle"; |
295
|
0
|
0
|
|
|
|
|
log_fatal($message) if $self->if_missing_zone eq 'fatal'; |
296
|
0
|
|
|
|
|
|
log_warn($message); |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
|
299
|
0
|
|
|
|
|
|
foreach my $zone (@zones) { |
300
|
0
|
|
|
|
|
|
$self->process_zone( $zone, $bundleNo ); |
301
|
|
|
|
|
|
|
} |
302
|
0
|
|
|
|
|
|
return; |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
sub get_selected_zones { |
306
|
0
|
|
|
0
|
0
|
|
my ( $self, @zones ) = @_; |
307
|
0
|
0
|
|
|
|
|
if ( $self->language ne 'all') { |
308
|
0
|
|
|
|
|
|
@zones = grep { $self->_is_language_selected->{ $_->language } } @zones; |
|
0
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
} |
310
|
0
|
0
|
|
|
|
|
if ( $self->selector ne 'all') { |
311
|
0
|
|
|
|
|
|
@zones = grep { $self->_is_selector_selected->{ $_->selector } } @zones; |
|
0
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
0
|
|
|
|
|
|
return @zones; |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
sub _try_process_layer { |
318
|
0
|
|
|
0
|
|
|
my ( $self, $zone, $layer, $bundleNo ) = @_; |
319
|
0
|
|
|
|
|
|
my $meta = $self->meta; |
320
|
|
|
|
|
|
|
|
321
|
0
|
0
|
|
|
|
|
if ( my $m = $meta->find_method_by_name("process_${layer}tree") ) { |
322
|
0
|
0
|
|
|
|
|
if (!$zone->has_tree($layer)){ |
323
|
0
|
0
|
|
|
|
|
if ($self->if_missing_tree eq 'create'){ |
324
|
0
|
|
|
|
|
|
$zone->create_tree($layer); |
325
|
|
|
|
|
|
|
} else { |
326
|
0
|
|
|
|
|
|
return 0; |
327
|
|
|
|
|
|
|
} |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
#$self->process_atree($tree, $bundleNo); |
331
|
0
|
|
|
|
|
|
$m->execute( $self, $zone->get_tree($layer), $bundleNo ); |
332
|
0
|
|
|
|
|
|
return 1; |
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
0
|
0
|
|
|
|
|
if ( my $m = $meta->find_method_by_name("process_${layer}node") ) { |
336
|
0
|
0
|
|
|
|
|
if (!$zone->has_tree($layer)){ |
337
|
0
|
0
|
|
|
|
|
if ($self->if_missing_tree eq 'create'){ |
338
|
0
|
|
|
|
|
|
$zone->create_tree($layer); |
339
|
|
|
|
|
|
|
} else { |
340
|
0
|
|
|
|
|
|
return 0; |
341
|
|
|
|
|
|
|
} |
342
|
|
|
|
|
|
|
} |
343
|
0
|
|
|
|
|
|
my $tree = $zone->get_tree($layer); |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
# process_ptree should be executed also on the root node (usually the S phrase) |
346
|
0
|
0
|
|
|
|
|
my @opts = $layer eq 'p' ? ( { add_self => 1 } ) : (); |
347
|
0
|
|
|
|
|
|
foreach my $node ( $tree->get_descendants(@opts) ) { |
348
|
|
|
|
|
|
|
# Skip nodes deleted by previous process_Xnode() call. |
349
|
0
|
0
|
|
|
|
|
next if ref $node eq 'Treex::Core::Node::Deleted'; |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
#$self->process_anode($node, $bundleNo); |
352
|
0
|
|
|
|
|
|
$m->execute( $self, $node, $bundleNo ); |
353
|
|
|
|
|
|
|
} |
354
|
0
|
|
|
|
|
|
return 1; |
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
|
357
|
0
|
|
|
|
|
|
return 0; |
358
|
|
|
|
|
|
|
} |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
sub process_zone { |
361
|
0
|
|
|
0
|
1
|
|
my ( $self, $zone, $bundleNo ) = @_; |
362
|
0
|
|
|
|
|
|
my $overriden = 0; |
363
|
|
|
|
|
|
|
|
364
|
0
|
|
|
|
|
|
for my $layer (qw(a t n p)) { |
365
|
0
|
0
|
|
|
|
|
if ($self->_try_process_layer( $zone, $layer, $bundleNo )){ |
366
|
0
|
|
|
|
|
|
$overriden++; |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
} |
369
|
|
|
|
|
|
|
|
370
|
0
|
0
|
0
|
|
|
|
if (!$overriden && $self->if_missing_tree =~ /fatal|warn/){ |
371
|
|
|
|
|
|
|
my $message = "At least one of the methods /process_(document|bundle|zone|[atnp](tree|node))/ " |
372
|
|
|
|
|
|
|
. "must be overriden and the corresponding [atnp] trees must be present in bundles.\n" |
373
|
|
|
|
|
|
|
. "The zone '" . $zone->get_label() . "' contains trees ( " |
374
|
0
|
|
|
|
|
|
. ( join ',', map { $_->get_layer() } $zone->get_all_trees() ) . ")."; |
|
0
|
|
|
|
|
|
|
375
|
0
|
0
|
|
|
|
|
log_fatal($message) if $self->if_missing_tree eq 'fatal'; |
376
|
0
|
|
|
|
|
|
log_warn($message); |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
|
379
|
0
|
|
|
|
|
|
return; |
380
|
|
|
|
|
|
|
} |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
sub process_start { |
383
|
|
|
|
|
|
|
my ($self) = @_; |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
$self->require_files_from_share( $self->get_required_share_files() ); |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
return; |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
after 'process_start' => sub { |
391
|
|
|
|
|
|
|
my ($self) = @_; |
392
|
|
|
|
|
|
|
$self->_set_is_started(1); |
393
|
|
|
|
|
|
|
}; |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
sub process_end { |
396
|
|
|
|
|
|
|
my ($self) = @_; |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
# default implementation is empty, but can be overriden |
399
|
|
|
|
|
|
|
return; |
400
|
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
after 'process_end' => sub { |
403
|
|
|
|
|
|
|
my ($self) = @_; |
404
|
|
|
|
|
|
|
foreach my $other_block (@{$self->_loaded_other_blocks_array}) { |
405
|
|
|
|
|
|
|
if ( $other_block->is_started ) { |
406
|
|
|
|
|
|
|
$other_block->process_end(); |
407
|
|
|
|
|
|
|
} |
408
|
|
|
|
|
|
|
} |
409
|
|
|
|
|
|
|
$self->_set_is_started(0); |
410
|
|
|
|
|
|
|
}; |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
sub get_block_name { |
413
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
414
|
0
|
|
|
|
|
|
return ref($self); |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
sub load_other_block { |
418
|
0
|
|
|
0
|
0
|
|
my ($self, $other_block_name, $params_hash_ref) = @_; |
419
|
|
|
|
|
|
|
|
420
|
0
|
|
|
|
|
|
my $other_block_full_name = "Treex::Block::$other_block_name"; |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
# CONSTRUCT PARAMETERS HASH |
423
|
|
|
|
|
|
|
# global params (TODO: do that?) |
424
|
0
|
|
|
|
|
|
my %params = %{$self->scenario->_global_params}; |
|
0
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
# overridden by selected (TODO: all?) block params |
426
|
0
|
|
|
|
|
|
$params{language} = $self->language; |
427
|
0
|
|
|
|
|
|
$params{selector} = $self->selector; |
428
|
0
|
|
|
|
|
|
$params{scenario} = $self->scenario; |
429
|
|
|
|
|
|
|
# overridden by locally set params |
430
|
0
|
|
|
|
|
|
@params{ keys %$params_hash_ref } = values %$params_hash_ref; |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
# CREATE IT and start it |
433
|
0
|
0
|
|
|
|
|
eval "use $other_block_full_name; 1;" or |
434
|
|
|
|
|
|
|
log_fatal "Treex::Core::Block->get_other_block: " . |
435
|
|
|
|
|
|
|
"Can't use block $other_block_name!\n$@\n"; |
436
|
0
|
|
|
|
|
|
my $other_block; |
437
|
0
|
0
|
|
|
|
|
eval { |
438
|
0
|
|
|
|
|
|
$other_block = $other_block_full_name->new( \%params ); |
439
|
0
|
|
|
|
|
|
1; |
440
|
|
|
|
|
|
|
} or log_fatal "Treex::Core::Block->get_other_block: " . |
441
|
|
|
|
|
|
|
"Can't initialize block $other_block_name!\n$@\n"; |
442
|
0
|
|
|
|
|
|
$other_block->process_start(); |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
# this may replace older block with same name |
445
|
0
|
|
|
|
|
|
$self->_loaded_other_blocks->{$other_block_name} = $other_block; |
446
|
|
|
|
|
|
|
# this not |
447
|
0
|
|
|
|
|
|
push @{$self->_loaded_other_blocks_array}, $other_block; |
|
0
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
|
449
|
0
|
|
|
|
|
|
return $other_block; |
450
|
|
|
|
|
|
|
} |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
sub get_or_load_other_block { |
453
|
0
|
|
|
0
|
0
|
|
my ($self, $other_block_name, $params_hash_ref) = @_; |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
my $other_block = |
456
|
|
|
|
|
|
|
exists ($self->_loaded_other_blocks->{$other_block_name}) |
457
|
|
|
|
|
|
|
? |
458
|
0
|
0
|
|
|
|
|
$self->_loaded_other_blocks->{$other_block_name} |
459
|
|
|
|
|
|
|
: |
460
|
|
|
|
|
|
|
$self->load_other_block($other_block_name, $params_hash_ref) |
461
|
|
|
|
|
|
|
; |
462
|
|
|
|
|
|
|
|
463
|
0
|
|
|
|
|
|
return $other_block; |
464
|
|
|
|
|
|
|
} |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
1; |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
__END__ |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=for Pod::Coverage BUILD build_language |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
=encoding utf-8 |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
=head1 NAME |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
Treex::Core::Block - the basic data-processing unit in the Treex framework |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
=head1 VERSION |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
version 2.20160630 |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=head1 SYNOPSIS |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
package Treex::Block::My::Block; |
485
|
|
|
|
|
|
|
use Moose; |
486
|
|
|
|
|
|
|
use Treex::Core::Common; |
487
|
|
|
|
|
|
|
extends 'Treex::Core::Block'; |
488
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
sub process_bundle { |
490
|
|
|
|
|
|
|
my ( $self, $bundle) = @_; |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
# bundle processing |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
} |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
=head1 DESCRIPTION |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
C<Treex::Core::Block> is a base class serving as a common ancestor of |
499
|
|
|
|
|
|
|
all Treex blocks. |
500
|
|
|
|
|
|
|
C<Treex::Core::Block> can't be used directly in any scenario. |
501
|
|
|
|
|
|
|
Use it's descendants which implement one of the methods |
502
|
|
|
|
|
|
|
C<process_document()>, C<process_bundle()>, C<process_zone()>, |
503
|
|
|
|
|
|
|
C<process_[atnp]tree()> or C<process_[atnp]node()>. |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
=over 4 |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
=item my $block = Treex::Block::My::Block->new(); |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
Instance of a block derived from C<Treex::Core::Block> can be created |
513
|
|
|
|
|
|
|
by the constructor (optionally, a reference to a hash of block parameters |
514
|
|
|
|
|
|
|
can be specified as the constructor's argument, see L</BLOCK PARAMETRIZATION>). |
515
|
|
|
|
|
|
|
However, it is not likely to appear in your code since block initialization |
516
|
|
|
|
|
|
|
is usually invoked automatically when initializing a scenario. |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
=back |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
=head1 METHODS FOR BLOCK EXECUTION |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
You must override one of the following methods: |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
=over 4 |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
=item $block->process_document($document); |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
Applies the block instance on the given instance of |
529
|
|
|
|
|
|
|
L<Treex::Core::Document>. The default implementation |
530
|
|
|
|
|
|
|
iterates over all bundles in a document and calls C<process_bundle()>. So in |
531
|
|
|
|
|
|
|
most cases you don't need to override this method. |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
=item $block->process_bundle($bundle); |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
Applies the block instance on the given bundle |
536
|
|
|
|
|
|
|
(L<Treex::Core::Bundle>). |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
=item $block->process_zone($zone); |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
Applies the block instance on the given bundle zone |
541
|
|
|
|
|
|
|
(L<Treex::Core::BundleZone>). Unlike |
542
|
|
|
|
|
|
|
C<process_document> and C<process_bundle>, C<process_zone> requires block |
543
|
|
|
|
|
|
|
attribute C<language> (and possibly also C<selector>) to be specified. |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
=item $block->process_I<X>tree($tree); |
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
Here I<X> stands for a,t,n or p. |
548
|
|
|
|
|
|
|
This method is executed on the root node of a tree on a given layer (a,t,n,p). |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
=item $block->process_I<X>node($node); |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
Here I<X> stands for a,t,n or p. |
553
|
|
|
|
|
|
|
This method is executed on the every node of a tree on a given layer (a,t,n,p). |
554
|
|
|
|
|
|
|
Note that for layers a, t, and n, this method is not executed on the root node |
555
|
|
|
|
|
|
|
(because the root node is just a "technical" root without the attributes of regular nodes). |
556
|
|
|
|
|
|
|
However, C<process_pnode> is executed also on the root node |
557
|
|
|
|
|
|
|
(because its a regular non-terminal node with a phrase attribute, usually C<S>). |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
=back |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
=head2 $block->process_start(); |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
This method is called before all documents are processed. |
564
|
|
|
|
|
|
|
This method is responsible for loading required models. |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
=head2 $block->process_end(); |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
This method is called after all documents are processed. |
569
|
|
|
|
|
|
|
The default implementation is empty, but derived classes can override it |
570
|
|
|
|
|
|
|
to e.g. print some final summaries, statistics etc. |
571
|
|
|
|
|
|
|
Overriding this method is preferable to both |
572
|
|
|
|
|
|
|
standard Perl END blocks (where you cannot access C<$self> and instance attributes), |
573
|
|
|
|
|
|
|
and DEMOLISH (which is not called in some cases, e.g. C<treex --watch>). |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
=head1 BLOCK PARAMETRIZATION |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
=over 4 |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
=item my $block = BlockGroup::My_Block->new({$name1=>$value1,$name2=>$value2...}); |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
Block instances can be parametrized by a hash containing parameter name/value |
584
|
|
|
|
|
|
|
pairs. |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
=item my $param_value = $block->get_parameter($param_name); |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
Parameter values used in block construction can |
589
|
|
|
|
|
|
|
be revealed by C<get_parameter> method (but cannot be changed). |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
=back |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
=head1 MISCEL |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
=over 4 |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
=item my $langcode_selector = $block->zone_label(); |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
=item my $block_name = $block->get_block_name(); |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
It returns the name of the block module. |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
=item my @needed_files = $block->get_required_share_files(); |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
If a block requires some files to be present in the shared part of Treex, |
606
|
|
|
|
|
|
|
their list (with relative paths starting in |
607
|
|
|
|
|
|
|
L<Treex::Core::Config-E<gt>share_dir|Treex::Core::Config/share_dir>) can be |
608
|
|
|
|
|
|
|
specified by redefining by this method. By default, an empty list is returned. |
609
|
|
|
|
|
|
|
Presence of the files is automatically checked in the block constructor. If |
610
|
|
|
|
|
|
|
some of the required file is missing, the constructor tries to download it |
611
|
|
|
|
|
|
|
from L<http://ufallab.ms.mff.cuni.cz>. |
612
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
This method should be used especially for downloading statistical models, |
614
|
|
|
|
|
|
|
but not for installed tools or libraries. |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
sub get_required_share_files { |
617
|
|
|
|
|
|
|
my $self = shift; |
618
|
|
|
|
|
|
|
return ( |
619
|
|
|
|
|
|
|
'data/models/mytool/'.$self->language.'/features.gz', |
620
|
|
|
|
|
|
|
'data/models/mytool/'.$self->language.'/weights.tsv', |
621
|
|
|
|
|
|
|
); |
622
|
|
|
|
|
|
|
} |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
=item require_files_from_share() |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
This method checks existence of files given as parameters, it tries to download them if they are not present |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
=back |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
=head1 SEE ALSO |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
L<Treex::Core::Node>, |
633
|
|
|
|
|
|
|
L<Treex::Core::Bundle>, |
634
|
|
|
|
|
|
|
L<Treex::Core::Document>, |
635
|
|
|
|
|
|
|
L<Treex::Core::Scenario>, |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
=head1 AUTHOR |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
ZdenÄk Žabokrtský <zabokrtsky@ufal.mff.cuni.cz> |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
Martin Popel <popel@ufal.mff.cuni.cz> |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
Copyright © 2011-2012 by Institute of Formal and Applied Linguistics, Charles University in Prague |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. |