File Coverage

blib/lib/Treex/Tool/Parser/MSTperl/ModelLabelling.pm

Criterion	Covered	Total	%
statement	1	3	33.3
branch			n/a
condition			n/a
subroutine	1	1	100.0
pod			n/a
total	2	4	50.0

line	stmt	sub	time	code
1				package Treex::Tool::Parser::MSTperl::ModelLabelling;
2				{
3				$Treex::Tool::Parser::MSTperl::ModelLabelling::VERSION = '0.11949';
4				}
5
6	1	1	4198	use Moose;
	0
	0
7				use Carp;
8
9				extends 'Treex::Tool::Parser::MSTperl::ModelBase';
10
11				# basic MLE from data
12				# unigrams->{label} = prob
13				# to be used for smoothing and/or backoff
14				# (can be used both for emissions and transitions)
15				# It also contains the SEQUENCE_BOUNDARY_LABEL prob
16				# (the SEQUENCE_BOUNDARY_LABEL is counted once for each sequence)
17				# which might be unappropriate in some cases (eg. for emission probs)
18				has 'unigrams' => (
19				is => 'rw',
20				isa => 'HashRef',
21				default => sub { {} },
22				);
23
24				# transition scores for Viterbi with the structure (if MIRA-computed):
25				# transitions->{feature}->{label_prev}->{label_this} = score
26				# or probabilties (if obtained by MLE):
27				# transitions->{label_prev}->{label_this} = prob
28				# (if MLE is used for transitions, during the precomputing phase
29				# counts are temporarily stored instead of probs
30				# and they are converted to probs on calling prepare_for_mira() );
31				has 'transitions' => (
32				is => 'rw',
33				isa => 'HashRef',
34				default => sub { {} },
35				);
36
37				# smoothing parameters of transition probabilities
38				# (to be computed by EM algorithm)
39				# PROB(label\|prev_label) =
40				# smooth_bigrams * transitions->{prev_label}->{label} +
41				# smooth_unigrams * unigrams->{label} +
42				# smooth_uniform
43
44				has 'smooth_bigrams' => (
45				is => 'rw',
46				isa => 'Num',
47				default => 0.6,
48				);
49
50				has 'smooth_unigrams' => (
51				is => 'rw',
52				isa => 'Num',
53				default => 0.3,
54				);
55
56				has 'smooth_uniform' => (
57				is => 'rw',
58				isa => 'Num',
59				default => 0.1,
60				);
61
62				# = 1 / ( keys %{ $self->unigrams } )
63				# set in compute_smoothing_params
64				has 'uniform_prob' => (
65				is => 'rw',
66				isa => 'Num',
67				default => 0.02,
68				);
69
70				# emission scores for Viterbi with the structure
71				# emissions->{feature}->{label} = score
72				has 'emissions' => (
73				is => 'rw',
74				isa => 'HashRef',
75				default => sub { {} },
76				);
77
78				# just an array ref with the sentences that represent the heldout data
79				# to be able to run the EM algorithm in prepare_for_mira()
80				has 'EM_heldout_data' => (
81				is => 'rw',
82				isa => 'ArrayRef[Treex::Tool::Parser::MSTperl::Sentence]',
83				default => sub { [] },
84				);
85
86				sub BUILD {
87				my ($self) = @_;
88
89				$self->featuresControl( $self->config->labelledFeaturesControl );
90
91				return;
92				}
93
94				# STORING AND LOADING
95
96				sub get_data_to_store {
97				my ($self) = @_;
98
99				return {
100				'unigrams' => $self->unigrams,
101				'transitions' => $self->transitions,
102				'emissions' => $self->emissions,
103				'smooth_uniform' => $self->smooth_uniform,
104				'smooth_unigrams' => $self->smooth_unigrams,
105				'smooth_bigrams' => $self->smooth_bigrams,
106				'uniform_prob' => $self->uniform_prob,
107				};
108				}
109
110				sub load_data {
111
112				my ( $self, $data ) = @_;
113
114				$self->unigrams( $data->{'unigrams'} );
115				$self->transitions( $data->{'transitions'} );
116				$self->emissions( $data->{'emissions'} );
117
118				$self->smooth_uniform( $data->{'smooth_uniform'} );
119				$self->smooth_unigrams( $data->{'smooth_unigrams'} );
120				$self->smooth_bigrams( $data->{'smooth_bigrams'} );
121				$self->uniform_prob( $data->{'uniform_prob'} );
122
123				my $unigrams_ok = scalar( keys %{ $self->unigrams } );
124				my $transitions_ok = scalar( keys %{ $self->transitions } );
125				my $emissions_ok = scalar( keys %{ $self->emissions } );
126
127				my $smooth_sum = $self->smooth_uniform + $self->smooth_unigrams
128				+ $self->smooth_bigrams;
129
130				my $smooth_ok = (
131
132				# should be 1 but might be a little shifted
133				$smooth_sum > 0.999
134				&& $smooth_sum < 1.001
135
136				# must be between 0 and 1
137				&& $self->uniform_prob > 0
138				&& $self->uniform_prob < 1
139				);
140
141				my $ALGORITHM = $self->config->labeller_algorithm;
142
143				if ($ALGORITHM == 0
144				\|\| $ALGORITHM == 1
145				\|\| $ALGORITHM == 2
146				\|\| $ALGORITHM == 3
147				\|\| $ALGORITHM == 4
148				\|\| $ALGORITHM == 8
149				\|\| $ALGORITHM == 9
150				\|\| $ALGORITHM == 10
151				\|\| $ALGORITHM == 11
152				\|\| $ALGORITHM == 14
153				)
154				{
155
156				# these algorithms do not use lambda smoothing
157				# (smoothing is kind of part of the learning)
158				$smooth_ok = 1;
159				}
160
161				if ( $ALGORITHM >= 20 ) {
162
163				# these algorithms do not use separate transitions
164				# (transitions are included in emissions)
165				$transitions_ok = 1;
166				}
167
168				if ( $unigrams_ok && $transitions_ok && $emissions_ok && $smooth_ok ) {
169				return 1;
170				} else {
171				return 0;
172				}
173				}
174
175				# TRANSITION AND EMISSION COUNTS AND PROBABILITIES
176				# (more or less standard MLE)
177
178				sub add_unigram {
179				my ( $self, $label ) = @_;
180
181				if ( $self->config->DEBUG >= 2 ) {
182				print "add_unigram($label)\n";
183				}
184
185				# increment number of unigrams
186				$self->unigrams->{$label} += 1;
187
188				return;
189				}
190
191				sub add_transition {
192
193				# Str, Str, Maybe[Str]
194				my ( $self, $label_this, $label_prev, $feature ) = @_;
195
196				if ( defined $feature ) {
197				if ( $self->config->DEBUG >= 2 ) {
198				print "add_transition($label_this, $label_prev, $feature)\n";
199				}
200
201				# increment number of bigrams
202				$self->transitions->{$feature}->{$label_prev}->{$label_this} += 1;
203				} else {
204				if ( $self->config->DEBUG >= 2 ) {
205				print "add_transition($label_this, $label_prev)\n";
206				}
207
208				# increment number of bigrams
209				$self->transitions->{$label_prev}->{$label_this} += 1;
210				}
211
212				return;
213				}
214
215				sub add_emission {
216				my ( $self, $feature, $label ) = @_;
217
218				if ( $self->config->DEBUG >= 3 ) {
219				print "add_emission($feature, $label)\n";
220				}
221
222				$self->emissions->{$feature}->{$label} += 1;
223
224				return;
225				}
226
227				# called after preprocessing training data, before entering the MIRA phase
228				sub prepare_for_mira {
229
230				my ( $self, $trainer ) = @_;
231
232				# $trainer used only in algoprithm no. 9 for emissions initialization
233
234				my $ALGORITHM = $self->config->labeller_algorithm;
235
236				if ( $ALGORITHM == 9 ) {
237
238				# no need to recompute to probabilities (counts are OK)
239				# but have to update emissions_summed
240				# and transitions_summed appropriately
241
242				my $sumUpdateWeight = $trainer->number_of_inner_iterations;
243
244				# emissions->{feature}->{label}
245				foreach my $feature ( keys %{ $self->emissions } ) {
246				foreach my $label ( keys %{ $self->emissions->{$feature} } ) {
247				$trainer->emissions_summed->{$feature}->{$label}
248				= $sumUpdateWeight * $self->emissions->{$feature}->{$label};
249				}
250				}
251
252				# transitions->{feature}->{label_prev}->{label_this}
253				foreach my $feature ( keys %{ $self->transitions } ) {
254				foreach my $label_prev (
255				keys %{ $self->transitions->{$feature} }
256				)
257				{
258				foreach my $label_this (
259				keys %{ $self->transitions->{$feature}->{$label_prev} }
260				)
261				{
262				$trainer->transitions_summed
263				->{$feature}->{$label_prev}->{$label_this}
264				= $sumUpdateWeight * $self->transitions
265				->{$feature}->{$label_prev}->{$label_this};
266				}
267				}
268				}
269
270				} elsif ( $ALGORITHM == 1 \|\| $ALGORITHM == 8 \|\| $ALGORITHM >= 20 ) {
271
272				# no recomputing taking place
273
274				} elsif (
275				$ALGORITHM == 0
276				\|\| $ALGORITHM == 2
277				\|\| $ALGORITHM == 3
278				\|\| $ALGORITHM == 4
279				\|\| $ALGORITHM == 5
280				\|\| $ALGORITHM == 10
281				\|\| $ALGORITHM == 11
282				\|\| $ALGORITHM == 12
283				\|\| $ALGORITHM == 13
284				\|\| $ALGORITHM == 14
285				\|\| $ALGORITHM == 15
286				\|\| $ALGORITHM == 16
287				\|\| $ALGORITHM == 17
288				\|\| $ALGORITHM == 18
289				\|\| $ALGORITHM == 19
290				)
291				{
292
293				# compute unigram probs
294				$self->compute_probs_from_counts( $self->unigrams );
295
296				# compute transition probs
297				foreach my $label ( keys %{ $self->transitions } ) {
298				$self->compute_probs_from_counts( $self->transitions->{$label} );
299				}
300
301				if ($ALGORITHM == 4
302				\|\| $ALGORITHM == 5
303				)
304				{
305
306				# compute emission probs (MLE)
307				foreach my $feature ( keys %{ $self->emissions } ) {
308				$self->compute_probs_from_counts(
309				$self->emissions->{$feature}
310				);
311				}
312				} # end if $ALGORITHM == 4\|5
313
314				if ($ALGORITHM == 5
315				\|\| $ALGORITHM == 12
316				\|\| $ALGORITHM == 13
317				\|\| $ALGORITHM == 15
318				\|\| $ALGORITHM == 16
319				\|\| $ALGORITHM == 17
320				\|\| $ALGORITHM == 18
321				\|\| $ALGORITHM == 19
322				)
323				{
324
325				# run the EM algorithm to compute
326				# transtition probs smoothing params
327				$self->compute_smoothing_params();
328				} # end if $ALGORITHM == 5\|12\|12\|>=16
329
330				} else { # $ALGORITHM not in 0~9
331				croak "ModelLabelling->prepare_for_mira not implemented"
332				. " for algorithm no. $ALGORITHM!";
333				}
334
335				return;
336				} # end prepare_for_mira
337
338				# basic MLE
339				sub compute_probs_from_counts {
340				my ( $self, $hashref ) = @_;
341
342				my $sum = 0;
343				foreach my $key ( keys %{$hashref} ) {
344				$sum += $hashref->{$key};
345				}
346				foreach my $key ( keys %{$hashref} ) {
347				$hashref->{$key} = $hashref->{$key} / $sum;
348				}
349
350				return;
351				}
352
353				# EM algorithm to estimate linear interpolation smoothing parameters
354				# for smoothing of transition probabilities
355				sub compute_smoothing_params {
356				my ($self) = @_;
357
358				# only progress and/or debug info
359				if ( $self->config->DEBUG >= 1 ) {
360				print "Running EM algorithm to estimate lambdas...\n";
361				}
362
363				# uniform probability is 1 / number of different labels
364				$self->uniform_prob( 1 / ( keys %{ $self->unigrams } ) );
365
366				my $change = 1;
367				while ( $change > $self->config->EM_EPSILON ) {
368
369				#compute "expected counts"
370				my $expectedCounts = $self->count_expected_counts_all();
371				my $expectedCountsSum = $expectedCounts->[0] + $expectedCounts->[1]
372				+ $expectedCounts->[2];
373
374				#compute new lambdas
375				my @new_lambdas = map { $_ / $expectedCountsSum } @$expectedCounts;
376
377				#compute the change (sum of changes of lambdas)
378				$change = abs( $self->smooth_uniform - $new_lambdas[0] )
379				+ abs( $self->smooth_unigrams - $new_lambdas[1] )
380				+ abs( $self->smooth_bigrams - $new_lambdas[2] );
381
382				# set new lambdas
383				$self->smooth_uniform( $new_lambdas[0] );
384				$self->smooth_unigrams( $new_lambdas[1] );
385				$self->smooth_bigrams( $new_lambdas[2] );
386
387				# only progress and/or debug info
388				if ( $self->config->DEBUG >= 2 ) {
389				print "Last change: $change\n";
390				}
391				}
392
393				# only progress and/or debug info
394				if ( $self->config->DEBUG >= 2 ) {
395				print "Final lambdas:\n"
396				. "uniform: " . $self->smooth_uniform
397				. "unigram: " . $self->smooth_unigrams
398				. "bigram: " . $self->smooth_bigrams;
399				}
400				if ( $self->config->DEBUG >= 1 ) {
401				print "Done.\n";
402				}
403
404				return;
405				}
406
407				#count "expected counts" of lambdas
408				sub count_expected_counts_all {
409				my ($self) = @_;
410
411				my $expectedCounts = [ 0, 0, 0 ];
412				my $sentence_counts;
413
414				foreach my $sentence ( @{ $self->EM_heldout_data } ) {
415				$sentence_counts = $self->count_expected_counts_tree(
416				$sentence->nodes_with_root->[0]
417				);
418				$expectedCounts->[0] += $sentence_counts->[0];
419				$expectedCounts->[1] += $sentence_counts->[1];
420				$expectedCounts->[2] += $sentence_counts->[2];
421				}
422
423				return $expectedCounts;
424				}
425
426				#count "expected counts" of lambdas for a parse (sub)tree, recursively
427				sub count_expected_counts_tree {
428				my ( $self, $root_node ) = @_;
429
430				my @edges = @{ $root_node->children };
431
432				# get sequence of labels
433				my @labels = map { $_->child->label } @edges;
434
435				# counts for this sequence
436				my $expectedCounts = $self->count_expected_counts_sequence( \@labels );
437
438				# recursion
439				my $subtree_counts;
440				foreach my $edge (@edges) {
441				$subtree_counts = $self->count_expected_counts_tree( $edge->child );
442				$expectedCounts->[0] += $subtree_counts->[0];
443				$expectedCounts->[1] += $subtree_counts->[1];
444				$expectedCounts->[2] += $subtree_counts->[2];
445				}
446
447				return $expectedCounts;
448				}
449
450				# count "expected counts" of lambdas for a sequence of labels
451				# (including the boundaries)
452				sub count_expected_counts_sequence {
453
454				my ( $self, $labels_sequence ) = @_;
455
456				# to be computed here
457				my $expectedCounts = [ 0, 0, 0 ];
458
459				# boundary at the beginning
460				my $label_prev = $self->config->SEQUENCE_BOUNDARY_LABEL;
461
462				# boundary at the end
463				push @$labels_sequence, $self->config->SEQUENCE_BOUNDARY_LABEL;
464
465				foreach my $label_this (@$labels_sequence) {
466
467				# get probs
468				my $ngramProbs =
469				$self->get_transition_probs_array( $label_this, $label_prev );
470				my $finalProb = $ngramProbs->[0] * $self->smooth_uniform
471				+ $ngramProbs->[1] * $self->smooth_unigrams
472				+ $ngramProbs->[2] * $self->smooth_bigrams;
473
474				# update expected counts
475				$expectedCounts->[0] +=
476				$self->smooth_uniform * $ngramProbs->[0] / $finalProb;
477				$expectedCounts->[1] +=
478				$self->smooth_unigrams * $ngramProbs->[1] / $finalProb;
479				$expectedCounts->[2] +=
480				$self->smooth_bigrams * $ngramProbs->[2] / $finalProb;
481
482				$label_prev = $label_this;
483				}
484
485				return $expectedCounts;
486				}
487
488				sub get_all_labels {
489				my ($self) = @_;
490
491				my @labels = keys %{ $self->unigrams };
492
493				return \@labels;
494				}
495
496				# ACCESS TO SCORES
497
498				sub get_label_score {
499
500				# (Str $label, Str $label_prev, ArrayRef[Str] $features)
501				my ( $self, $label, $label_prev, $features ) = @_;
502
503				my $ALGORITHM = $self->config->labeller_algorithm;
504
505				if ( $ALGORITHM == 8 \|\| $ALGORITHM == 9 ) {
506
507				my $result = 0;
508
509				# foreach present feature
510				foreach my $feature (@$features) {
511
512				# add "emission score" and "transition score"
513				$result +=
514				$self->get_emission_score( $label, $feature )
515				+
516				$self->get_transition_score(
517				$label, $label_prev, $feature
518				)
519				;
520				} # end foreach $feature
521
522				return $result;
523
524				} elsif ( $ALGORITHM == 14 \|\| $ALGORITHM == 15 ) {
525
526				my $label_scores = $self->get_emission_scores($features);
527
528				my $result = $label_scores->{$label};
529				if ( !defined $result ) {
530				$result = 0;
531				}
532
533				# multiply by transitions score
534				$result *= $self->get_transition_score( $label, $label_prev );
535
536				return $result;
537
538				} elsif ( $ALGORITHM == 16 \|\| $ALGORITHM == 18 ) {
539
540				my $result = 0;
541
542				# sum of emission scores
543				foreach my $feature (@$features) {
544				$result += $self->get_emission_score( $label, $feature );
545				}
546
547				# multiply by transitions score
548				$result *= $self->get_transition_score( $label, $label_prev );
549
550				return $result;
551
552				} elsif ( $ALGORITHM == 19 ) {
553
554				my $result = 0;
555
556				# sum of emission scores
557				foreach my $feature (@$features) {
558				$result += $self->get_emission_score( $label, $feature );
559				}
560
561				# sigmoid transformation
562				$result = 1 / ( 1 + exp( -$result * $self->config->SIGM_LAMBDA ) );
563
564				# multiply by transitions score
565				$result *= $self->get_transition_score( $label, $label_prev );
566
567				return $result;
568
569				} elsif ( $ALGORITHM == 17 ) {
570
571				my $result = 0;
572
573				# sum of emission scores
574				foreach my $feature (@$features) {
575				$result += $self->get_emission_score( $label, $feature );
576				}
577
578				# multiply by transitions score
579				if ( $result > 0 ) {
580				$result *= $self->get_transition_score( $label, $label_prev );
581				} else {
582
583				# For negative scores this works the other way round,
584				# eg. if I had two labels, both with emission score -5
585				# and their transition probs were 0.2 and 0.9,
586				# then the latter should get a higher score;
587				# simple mltiplication won't help as that would yield scores
588				# of -1.0 and -4.5, thus inverting the order.
589				# What I do is that for transition prob p I use (1-p)
590				# which yields 0.8 and 0.1 transition probabilities here,
591				# giving scores of -4.0 and -0.5, which is much better.
592				# Still, a label with negative emission score, even if very close
593				# to 0 and with a high transition prob, cannot outscore any label
594				# with a positive emission score, even if low with a low transition
595				# prob - normalizing scores to be non-negative would be necessary
596				# for this, as is alg 0 and similar.
597				# $result *=
598				# ( 1 - $self->get_transition_score( $label, $label_prev ) );
599
600				# TODO trying new variant - setting negative scores to 0
601				$result = 0;
602				}
603
604				return $result;
605
606				} elsif ( $ALGORITHM >= 20 ) {
607
608				my $result = 0;
609
610				# sum of emission scores
611				foreach my $feature (@$features) {
612				$result += $self->get_emission_score( $label, $feature );
613				}
614
615				# TODO: could also compute using $label_prev,
616				# using transitions to store these;
617				# would allow to use full Viterbi
618
619				return $result;
620
621				} else {
622				croak "ModelLabelling->get_label_score not implemented"
623				. " for algorithm no. $ALGORITHM!";
624
625				# usually because it needs to know scores of all possible labels
626				# to normalize them properly
627				}
628				}
629
630				sub get_emission_score {
631
632				# (Str $label, Str $feature)
633				my ( $self, $label, $feature ) = @_;
634
635				my $ALGORITHM = $self->config->labeller_algorithm;
636
637				if ($ALGORITHM == 8
638				\|\| $ALGORITHM == 9
639				\|\| $ALGORITHM == 16
640				\|\| $ALGORITHM == 17
641				\|\| $ALGORITHM == 18
642				\|\| $ALGORITHM == 19
643				\|\| $ALGORITHM >= 20
644				)
645				{
646
647				if ($self->emissions->{$feature}
648				&& $self->emissions->{$feature}->{$label}
649				)
650				{
651				return $self->emissions->{$feature}->{$label};
652				} else {
653				return 0;
654				}
655
656				} else {
657				croak "ModelLabelling->get_emission_score not implemented"
658				. " for algorithm no. $ALGORITHM!";
659				}
660				}
661
662				sub get_transition_score {
663
664				# (Str $label_this, Str $label_prev, Maybe[Str] $feature)
665				my ( $self, $label_this, $label_prev, $feature ) = @_;
666
667				my $ALGORITHM = $self->config->labeller_algorithm;
668
669				if ( $ALGORITHM == 8 \|\| $ALGORITHM == 9 ) {
670				if ($self->transitions->{$feature}
671				&& $self->transitions->{$feature}->{$label_prev}
672				&& $self->transitions->{$feature}->{$label_prev}->{$label_this}
673				)
674				{
675				return $self->transitions->{$feature}->{$label_prev}->{$label_this};
676				} else {
677
678				# no smoothing as it is used in addition, not in multiplication
679				return 0;
680				}
681				} elsif (
682				$ALGORITHM == 5
683				\|\| $ALGORITHM == 12 \|\| $ALGORITHM == 13
684				\|\| $ALGORITHM == 15
685				\|\| $ALGORITHM == 16 \|\| $ALGORITHM == 17
686				\|\| $ALGORITHM == 18 \|\| $ALGORITHM == 19
687				)
688				{
689
690				# smoothing by linear combination
691				# PROB(label\|prev_label) =
692				# smooth_bigrams * transitions->{prev_label}->{label} +
693				# smooth_unigrams * unigrams->{label} +
694				# smooth_uniform
695
696				my $probs =
697				$self->get_transition_probs_array( $label_this, $label_prev );
698
699				my $result = $probs->[0] * $self->smooth_uniform
700				+ $probs->[1] * $self->smooth_unigrams
701				+ $probs->[2] * $self->smooth_bigrams;
702
703				return $result;
704
705				} elsif (
706				$ALGORITHM == 0
707				\|\| $ALGORITHM == 1
708				\|\| $ALGORITHM == 2
709				\|\| $ALGORITHM == 3
710				\|\| $ALGORITHM == 4
711				\|\| $ALGORITHM == 10
712				\|\| $ALGORITHM == 11
713				\|\| $ALGORITHM == 14
714				)
715				{
716
717				# no real smoothing
718				if ($self->transitions->{$label_prev}
719				&& $self->transitions->{$label_prev}->{$label_this}
720				)
721				{
722				return $self->transitions->{$label_prev}->{$label_this};
723				} else {
724				return 0.00001;
725				}
726				} else {
727				croak "ModelLabelling->get_transition_score not implemented"
728				. " for algorithm no. $ALGORITHM!";
729				}
730				} # end get_transition_score
731
732				# $result->[0] = uniform prob
733				# $result->[1] = unigram prob
734				# $result->[2] = bigram prob
735				sub get_transition_probs_array {
736
737				# (Str $label_this, Str $label_prev)
738				my ( $self, $label_this, $label_prev ) = @_;
739
740				my $result = [ 0, 0, 0 ];
741
742				# uniform
743				$result->[0] = $self->uniform_prob;
744
745				if ( $self->unigrams->{$label_this} ) {
746
747				# unigram
748				$result->[1] = $self->unigrams->{$label_this};
749
750				if ( $self->transitions->{$label_prev}->{$label_this} ) {
751
752				# bigram
753				$result->[2] = $self->transitions->{$label_prev}->{$label_this};
754				}
755				}
756
757				return $result;
758				}
759
760				# get scores of all possible labels based on all the features
761				# (gives different numbers for different algorithms,
762				# often they are not real probabilities but general scores)
763				sub get_emission_scores {
764
765				# (ArrayRef[Str] $features)
766				my ( $self, $features ) = @_;
767
768				# a hashref of the structure $result->{label} = prob
769				# where prob might or might not be a real probability
770				# (i.e. may or may not fulfill 0 <= prob <= 1 & sum(probs) == 1),
771				# depending on the algorithm used
772				# (but always a higher prob means a better scoring (more probable) label
773				# and all of the probs are non-negative) TODO does it hold?
774				my $result = {};
775
776				my $ALGORITHM = $self->config->labeller_algorithm;
777
778				if ($ALGORITHM == 0
779				\|\| $ALGORITHM == 1
780				\|\| $ALGORITHM == 2
781				\|\| $ALGORITHM == 3
782				\|\| $ALGORITHM == 10
783				\|\| $ALGORITHM == 11
784				\|\| $ALGORITHM == 12
785				\|\| $ALGORITHM == 13
786				\|\| $ALGORITHM == 14
787				\|\| $ALGORITHM == 15
788				)
789				{
790				$result = $self->get_emission_scores_basic_MIRA($features);
791				} elsif ( $ALGORITHM == 4 \|\| $ALGORITHM == 5 ) {
792				$result = $self->get_emission_scores_no_MIRA($features);
793				} else {
794				croak "ModelLabelling->get_emission_scores not implemented"
795				. " for algorithm no. $ALGORITHM!";
796				}
797
798				# the boundary label is NOT a valid label
799				delete $result->{ $self->config->SEQUENCE_BOUNDARY_LABEL };
800
801				return $result;
802				}
803
804				sub get_emission_scores_basic_MIRA {
805
806				my ( $self, $features ) = @_;
807
808				my $ALGORITHM = $self->config->labeller_algorithm;
809
810				my $result = {};
811
812				my $warnNoEmissionProbs = "!!! WARNING !!! "
813				. "Based on the training data, no possible label was found"
814				. " for an edge. This usually means that either"
815				. " your training data are not big enough or that"
816				. " the set of features you are using"
817				. " is not well constructed - either it is too small"
818				. " or it lacks features that would be general enough"
819				. " to cover all possible sentences."
820				. " Using blind emission probabilities instead.\n";
821
822				# "pure MIRA", i.e. no MLE
823
824				if ( $ALGORITHM == 11 \|\| $ALGORITHM == 13 ) {
825
826				# initialize all label scores with 0 (so that all labels get some score)
827				my $all_labels = $self->get_all_labels();
828				foreach my $label (@$all_labels) {
829				$result->{$label} = 0;
830				}
831				}
832
833				# get scores
834				foreach my $feature (@$features) {
835				if ( $self->emissions->{$feature} ) {
836				foreach my $label ( keys %{ $self->emissions->{$feature} } ) {
837				$result->{$label} += $self->emissions->{$feature}->{$label};
838				}
839				}
840				}
841
842				# subtracting the minimum from the score
843				if ($ALGORITHM == 0
844				\|\| $ALGORITHM == 1
845				\|\| $ALGORITHM == 2
846				\|\| $ALGORITHM == 10
847				\|\| $ALGORITHM == 11
848				\|\| $ALGORITHM == 12
849				\|\| $ALGORITHM == 13
850				\|\| $ALGORITHM == 14
851				\|\| $ALGORITHM == 15
852				)
853				{
854
855				# find min and max score
856				my $min = 1e300;
857				my $max = -1e300;
858				foreach my $label ( keys %$result ) {
859				if ( $result->{$label} < $min ) {
860				$min = $result->{$label};
861				}
862				if ( $result->{$label} > $max ) {
863				$max = $result->{$label};
864				}
865
866				# else is between $min and $max -> keep the values as they are
867				}
868
869				if ( $min > $max ) {
870
871				# $min > $max, i.e. nothing has been generated -> backoff
872				if ( $self->config->DEBUG >= 2 ) {
873				print $warnNoEmissionProbs;
874				}
875
876				# backoff by using unigram probabilities
877				# (or unigram counts in some algorithms)
878				$result = $self->unigrams;
879				} else {
880
881				# something has been generated, now 0 and 1 start to differ
882				if ($ALGORITHM == 0
883				\|\| $ALGORITHM == 10
884				\|\| $ALGORITHM == 11
885				\|\| $ALGORITHM == 12
886				\|\| $ALGORITHM == 13
887				\|\| $ALGORITHM == 14
888				\|\| $ALGORITHM == 15
889				)
890				{
891
892				# 0 MIRA-trained scores recomputed by +abs(min)
893				# and converted to probs
894				if ( $min < $max ) {
895
896				# the typical case
897				# my $subtractant = $min;
898				my $divisor = 0;
899
900				foreach my $label ( keys %$result ) {
901				$result->{$label} = ( $result->{$label} - $min );
902				$divisor += $result->{$label};
903				}
904				foreach my $label ( keys %$result ) {
905				$result->{$label} = $result->{$label} / $divisor;
906				}
907				} else {
908
909				# $min == $max
910
911				# uniform prob distribution
912				my $prob = 1 / scalar( keys %$result );
913				foreach my $label ( keys %$result ) {
914				$result->{$label} = $prob;
915				}
916				}
917
918				# end $ALGORITHM == 0\|10\|11\|12\|13\|14\|15
919				} else {
920
921				# $ALGORITHM == 1\|2
922				# 1 dtto, NOT converted to probs
923				# (but should behave the same as 0)
924				# 2 dtto, sum in Viterbi instead of product
925				# (new_prob = old_prob + emiss*trans)
926				# (for 1 and 2 the emission probs are completely the same,
927				# they are just handled differently by the Labeller)
928
929				if ( $min < $max ) {
930
931				# the typical case
932				# my $subtractant = $min;
933
934				foreach my $label ( keys %$result ) {
935				$result->{$label} = ( $result->{$label} - $min );
936				}
937				} else {
938
939				# $min == $max
940				# uniform prob distribution
941
942				if ( $min <= 0 ) {
943
944				# we would like to keep the values
945				# but this is not possible in this case
946				foreach my $label ( keys %$result ) {
947
948				# so lets just assign ones
949				$result->{$label} = 1;
950				}
951				}
952
953				# else there is already a uniform distribution
954				# so let's keep it as it is
955				}
956
957				# end $ALGORITHM == 1\|2
958				}
959				}
960
961				# end $ALGORITHM == 0\|1\|2\|10\|11\|12\|13\|14\|15
962				} else {
963
964				# $ALGORITHM == 3
965				# no subtraction of minimum, just throw away <= 0
966
967				foreach my $label ( keys %$result ) {
968				if ( $result->{$label} <= 0 ) {
969				delete $result->{$label};
970				}
971
972				# else > 0 -> just keep it there and that's it
973				}
974				} # end $ALGORITHM == 3
975
976				return $result;
977				} # end get_emission_scores_basic_MIRA
978
979				sub get_emission_scores_no_MIRA {
980
981				my ( $self, $features ) = @_;
982
983				my $result = {};
984
985				my $warnNoEmissionProbs = "!!! WARNING !!! "
986				. "Based on the training data, no possible label was found"
987				. " for an edge. This usually means that either"
988				. " your training data are not big enough or that"
989				. " the set of features you are using"
990				. " is not well constructed - either it is too small"
991				. " or it lacks features that would be general enough"
992				. " to cover all possible sentences."
993				. " Using blind emission probabilities instead.\n";
994
995				# basic or full MLE, no MIRA
996
997				my %counts = ();
998				my %prob_sums = ();
999
1000				# get scores
1001				foreach my $feature (@$features) {
1002				if ( $self->emissions->{$feature} ) {
1003
1004				# !!! TODO tady by mÄ›l bejt souÄin !!!
1005				foreach my $label ( keys %{ $self->emissions->{$feature} } ) {
1006				$prob_sums{$label} +=
1007				$self->emissions->{$feature}->{$label};
1008				$counts{$label}++;
1009				}
1010				}
1011				}
1012
1013				if ( keys %prob_sums ) {
1014				foreach my $label ( keys %prob_sums ) {
1015
1016				# something like average pobability
1017				# = all features have the score of 1
1018				# (or more precisely 1/number_of_features)
1019				$result->{$label} = $prob_sums{$label} / $counts{$label};
1020				}
1021				} else {
1022
1023				# backoff
1024				if ( $self->config->DEBUG >= 2 ) {
1025				print $warnNoEmissionProbs;
1026				}
1027
1028				# backoff by using unigram probabilities
1029				# (or unigram counts in some algorithms)
1030				$result = $self->unigrams;
1031				}
1032
1033				return $result;
1034				} # end get_emission_scores_no_MIRA
1035
1036				# sets emission score (if $label_prev is not set)
1037				# or transition score (if it is)
1038				# of the $feature to $score
1039				sub set_feature_score {
1040
1041				# (Str $feature, Num $score, Str $label, Maybe[Str] $label_prev)
1042				my ( $self, $feature, $score, $label, $label_prev ) = @_;
1043
1044				if ( defined $label_prev ) {
1045				$self->transitions->{$feature}->{$label_prev}->{$label} = $score;
1046				} else {
1047				$self->emissions->{$feature}->{$label} = $score;
1048				}
1049
1050				return;
1051				}
1052
1053				# updates emission score (if $label_prev is not set)
1054				# or transition score (if it is)
1055				# of the $feature by adding $update
1056				sub update_feature_score {
1057
1058				# (Str $feature, Num $update, Str $label, Maybe[Str] $label_prev)
1059				my ( $self, $feature, $update, $label, $label_prev ) = @_;
1060
1061				if ( defined $label_prev ) {
1062				$self->transitions->{$feature}->{$label_prev}->{$label} += $update;
1063				} else {
1064				$self->emissions->{$feature}->{$label} += $update;
1065				}
1066
1067				return;
1068				}
1069
1070				# returns number of features in the model (where a "feature" can stand for
1071				# various things depending on the algorithm used)
1072				sub get_feature_count {
1073
1074				my ($self) = @_;
1075
1076				my $ALGORITHM = $self->config->labeller_algorithm;
1077
1078				# result = $emissions_count + $transitions_count
1079				my $emissions_count = 0;
1080				my $transitions_count = 0;
1081
1082				# structure: emissions->{feature}->{label}
1083				my @emission_features = keys %{ $self->emissions };
1084				foreach my $feature (@emission_features) {
1085				$emissions_count += scalar( keys %{ $self->emissions->{$feature} } );
1086				}
1087
1088				if ( $ALGORITHM == 8 \|\| $ALGORITHM == 9 ) {
1089
1090				# structure: transitions->{feature}->{label_prev}->{label}
1091
1092				my @transition_features = keys %{ $self->transitions };
1093				foreach my $feature (@transition_features) {
1094
1095				my @labels = keys %{ $self->transitions->{$feature} };
1096				foreach my $label_prev (@labels) {
1097
1098				$transitions_count += scalar(
1099				keys %{ $self->transitions->{$feature}->{$label_prev} }
1100				);
1101				}
1102				}
1103
1104				} else {
1105
1106				# structure: transitions->{label_prev}->{label}
1107
1108				my @labels = keys %{ $self->transitions };
1109				foreach my $label_prev (@labels) {
1110
1111				$transitions_count +=
1112				scalar( keys %{ $self->transitions->{$label_prev} } );
1113				}
1114				}
1115
1116				return $emissions_count + $transitions_count;
1117
1118				} # end get_feature_count
1119
1120				1;
1121
1122				__END__
1123
1124				=pod
1125
1126				=for Pod::Coverage BUILD
1127
1128				=encoding utf-8
1129
1130				=head1 NAME
1131
1132				Treex::Tool::Parser::MSTperl::ModelLabelling
1133
1134				=head1 VERSION
1135
1136				version 0.11949
1137
1138				=head1 DESCRIPTION
1139
1140				This is an in-memory represenation of a labelling model,
1141				extended from L<Treex::Tool::Parser::MSTperl::ModelBase>.
1142
1143				=head1 FIELDS
1144
1145				=head2 Inherited from base package
1146
1147				Fields inherited from L<Treex::Tool::Parser::MSTperl::ModelBase>.
1148
1149				=over 4
1150
1151				=item config
1152
1153				Instance of L<Treex::Tool::Parser::MSTperl::Config> containing settings to be
1154				used for the model.
1155
1156				Currently the settings most relevant to the model are the following:
1157
1158				=over 8
1159
1160				=item EM_EPSILON
1161
1162				See L<Treex::Tool::Parser::MSTperl::Config/EM_EPSILON>.
1163
1164				=item labeller_algorithm
1165
1166				See L<Treex::Tool::Parser::MSTperl::Config/labeller_algorithm>.
1167
1168				=item labelledFeaturesControl
1169
1170				See L<Treex::Tool::Parser::MSTperl::Config/labelledFeaturesControl>.
1171
1172				=item SEQUENCE_BOUNDARY_LABEL
1173
1174				See L<Treex::Tool::Parser::MSTperl::Config/SEQUENCE_BOUNDARY_LABEL>.
1175
1176				=back
1177
1178				=item featuresControl
1179
1180				Provides access to labeller features, especially enabling their computation.
1181				Intance of L<Treex::Tool::Parser::MSTperl::FeaturesControl>.
1182
1183				=back
1184
1185				=head2 Label scoring
1186
1187				=over 4
1188
1189				=item emissions
1190
1191				Emission scores for Viterbi. They follow the edge-based factorization
1192				and provide scores for various labels for an edge based on its features.
1193
1194				The structure is:
1195
1196				emissions->{feature}->{label} = score
1197
1198				Scores may or may not be probabilities, based on the algorithm used.
1199				Also based on the algorithm they may be MIRA-computed
1200				or they might be obtained by standard MLE.
1201
1202				=item transitions
1203
1204				Transition scores for Viterbi. They follow the
1205				first order Markov chain edge-based factorization
1206				and provide scores for various labels for an edge
1207				probably based on its features
1208				and always based on previous edge label.
1209
1210				Scores may or may not be probabilities, based on the algorithm used.
1211				Also based on the algorithm they may be obtained by standard MLE
1212				or they might be MIRA-computed.
1213
1214				The structure is:
1215
1216				transitions->{label_prev}->{label_this} = prob
1217
1218				or
1219
1220				transitions->{feature}->{label_prev}->{label_this} = score
1221
1222				=back
1223
1224				=head2 Transitions smoothing
1225
1226				In some algorithms linear combination smoothing is used
1227				for transition probabilities.
1228				The resulting transition probability is then obtained as:
1229
1230				PROB(label\|prev_label) =
1231				smooth_bigrams * transitions->{prev_label}->{label} +
1232				smooth_unigrams * unigrams->{label} +
1233				smooth_uniform
1234
1235				=over 4
1236
1237				=item smooth_bigrams
1238
1239				=item smooth_unigrams
1240
1241				=item smooth_uniform
1242
1243				The actual smoothing parameters computed by EM algorithm.
1244				Each of them is between 0 and 1 and together they sum up to 1.
1245
1246				=item uniform_prob
1247
1248				Unifrom probability of a label, computed as
1249				C<1 / ( keys %{ $self->unigrams } )>.
1250
1251				Set in C<compute_smoothing_params>.
1252
1253				=item unigrams
1254
1255				Basic MLE from data, the structure is
1256
1257				unigrams->{label} = prob
1258
1259				To be used for transitions smoothing and/or backoff
1260				(can be used both for emissions and transitions)
1261				It also contains the C<SEQUENCE_BOUNDARY_LABEL> prob
1262				(the SEQUENCE_BOUNDARY_LABEL is counted once for each sequence)
1263				which might be unappropriate in some cases (eg. for emission probs).
1264
1265				=item EM_heldout_data
1266
1267				Just an array ref with the sentences that represent the heldout data
1268				to be able to run the EM algorithm in C<prepare_for_mira()>.
1269				Used only in training.
1270
1271				=back
1272
1273				=head1 METHODS
1274
1275				=head2 Inherited
1276
1277				Subroutines inherited from L<Treex::Tool::Parser::MSTperl::ModelBase>.
1278
1279				=head3 Load and store
1280
1281				=over 4
1282
1283				=item store
1284
1285				See L<Treex::Tool::Parser::MSTperl::ModelBase/store>.
1286
1287				=item store_tsv
1288
1289				See L<Treex::Tool::Parser::MSTperl::ModelBase/store_tsv>.
1290
1291				=item load
1292
1293				See L<Treex::Tool::Parser::MSTperl::ModelBase/load>.
1294
1295				=item load_tsv
1296
1297				See L<Treex::Tool::Parser::MSTperl::ModelBase/load_tsv>.
1298
1299				=back
1300
1301				=head2 Overriden
1302
1303				Subroutines overriding stubs in L<Treex::Tool::Parser::MSTperl::ModelBase>.
1304
1305				=head3 Load and store
1306
1307				=over 4
1308
1309				=item $data = get_data_to_store(), $data = get_data_to_store_tsv()
1310
1311				Returns the model data, containing the following fields:
1312				C<unigrams>,
1313				C<transitions>,
1314				C<emissions>,
1315				C<smooth_uniform>,
1316				C<smooth_unigrams>,
1317				C<smooth_bigrams>,
1318				C<uniform_prob>
1319
1320				=item load_data($data), load_data_tsv($data)
1321
1322				Tries to get all necessary data from C<$data>
1323				(see C<get_data_to_store> to see what data are stored).
1324				Also does basic checks on the data, eg. for non-emptiness, but nothing
1325				sophisticated. Is algorithm-sensitive, i.e. if some data are not needed
1326				for the algorithm used, they do not have to be contained in the hash.
1327
1328				=back
1329
1330				=head3 Training support
1331
1332				=over 4
1333
1334				=item prepare_for_mira
1335
1336				Called after preprocessing training data, before entering the MIRA phase.
1337
1338				Function varies depending on algorithm used.
1339				Usually recomputes counts stored in C<emissions>, C<transitions> and C<unigrams>
1340				to probabilities that have been computed by C<add_emission>,
1341				C<add_transition> and C<add_unigram>.
1342				Also calls C<compute_smoothing_params> to estimate smoothing parameters
1343				for smoothing of transition probabilities.
1344
1345				=item get_feature_count
1346
1347				Only to provide information about the model.
1348				Returns number of features in the model (where a "feature" can stand for
1349				various things depending on the algorithm used).
1350
1351				=back
1352
1353				=head2 Technical methods
1354
1355				=over 4
1356
1357				=item BUILD
1358
1359				my $model = Treex::Tool::Parser::MSTperl::ModelLabelling->new(
1360				config => $config,
1361				);
1362
1363				Creates an empty model. If you are training the model, this is probably what you
1364				want, otherwise you can use C<load> or C<load_tsv>
1365				to load an existing labelling model from a file.
1366
1367				However, most often you would probably use a model for a labeller
1368				(L<Treex::Tool::Parser::MSTperl::Labeller>)
1369				or a labelling trainer
1370				(L<Treex::Tool::Parser::MSTperl::TrainerLabelling>)
1371				which both automatically create the model on build.
1372				The labeller also provides wrapping methods
1373				L<Treex::Tool::Parser::MSTperl::Labeller/load_model>
1374				and
1375				L<Treex::Tool::Parser::MSTperl::Labeller/load_model_tsv>
1376				which you can call to load the model from a file.
1377				(Btw. as you might expect, the trainer provides methods
1378				L<Treex::Tool::Parser::MSTperl::TrainerLabelling/store_model>
1379				and
1380				L<Treex::Tool::Parser::MSTperl::TrainerLabelling/store_model_tsv>.)
1381
1382				=back
1383
1384				=head2 MLE on training data
1385
1386				C<emissions> and C<transitions> can be either MIRA-trained
1387				or estimated directly from training data using MLE
1388				(Maximum Likelihood Estimate).
1389				C<unigrams> are always estimated by MLE.
1390
1391				=over 4
1392
1393				=item add_unigram ($label)
1394
1395				Increment count for the label in C<unigrams>.
1396
1397				=item add_transition ($label_this, $label_prev)
1398
1399				=item add_transition ($label_this, $label_prev, $feature)
1400
1401				Increment count for the transition in C<transitions>, possible including a
1402				feature on "this" edge if the algorithm uses features with transitions.
1403
1404				=item add_emission ($feature, $label)
1405
1406				Increment count for this label on an edge with this feature in C<emissions>.
1407
1408				=item compute_probs_from_counts ($self->emissions)
1409
1410				Takes a hash reference with label counts and chnages the counts
1411				to probabilities (this is the actual MLE).
1412				May be called in C<prepare_for_mira> on
1413				C<emissions>, C<transitions> and C<unigrams>.
1414
1415				=back
1416
1417				=head2 EM algorithm
1418
1419				=over 4
1420
1421				=item compute_smoothing_params()
1422
1423				The main method containing an implementation of the Expectation Maximization
1424				Algorithm to compute smoothing parameters (C<smooth_bigrams>,
1425				C<smooth_unigrams>, C<smooth_uniform>) for transition probabilities
1426				smoothing by linear combination of bigram, unigram and uniform probability.
1427				Iteratively tries to find
1428				such parameters that the probabilities from training data
1429				(C<transitions>, C<unigrams> and C<uniform_prob>) combined together by
1430				the smoothing parameters model well enough the heldout data
1431				(C<EM_heldout_data>), i.e. tries to maximize the probability of the heldout
1432				data given the training data probabilities by adjusting the smoothing
1433				parameters values.
1434
1435				Uses C<EM_EPSILON> as a stopping criterion, i.e. stops when the sum of
1436				absolute values of changes to all smoothing parameters are lower
1437				than the value of C<EM_EPSILON>.
1438
1439				=item count_expected_counts_all()
1440
1441				=item count_expected_counts_tree($root_node)
1442
1443				=item count_expected_counts_sequence($labels_sequence)
1444
1445				Support methods to C<compute_smoothing_params>, in the order in which they
1446				call each other.
1447
1448				=back
1449
1450				=head2 Scoring
1451
1452				A bunch of methods to score the likelihood of a label being assigned to an
1453				edge based on the edge's features and the label assigned to the previous
1454				edge.
1455
1456				=over 4
1457
1458				=item get_all_labels()
1459
1460				Returns (a reference to) an array of all labels found in the training data
1461				(eg. C<['Subj', 'Obj', 'Atr']>).
1462
1463				=item get_label_score($label, $label_prev, $features)
1464
1465				Computes a score of assigning the given label to an edge,
1466				given the features of the edge and the label assigned to the previous edge.
1467
1468				Always a higher score means a more likely label for the edge.
1469				Some algorithms may give a negative score.
1470
1471				Is semantically equivalent to calling C<get_emission_score>
1472				and C<get_transition_score> and then combining it together somehow.
1473
1474				=item get_emission_score($label, $feature)
1475
1476				Computes the "emission score" of assigning the given label to an edge,
1477				given one of the feature of the edge and disregarding
1478				the label assigned to the previous edge.
1479
1480				=item get_transition_score($label_this, $label_prev, $feature)
1481
1482				Computes the "transition score" of assigning the given label to an edge,
1483				given the label assigned to the previous edge
1484				and possibly also one of the features of the edge
1485				but NOT including the emission score returned by C<get_emission_score>.
1486
1487				=item $result = get_transition_probs_array ($label_this, $label_prev)
1488
1489				Returns (a reference to) an array of the probabilities of the transition
1490				from label_prev to label_this (to be smoothed together),
1491				having the following structure:
1492
1493				$result->[0] = uniform prob
1494				$result->[1] = unigram prob
1495				$result->[2] = bigram prob
1496
1497				=item $result = get_emission_scores($features)
1498
1499				Get scores of assigning each of the possible labels to an edge
1500				based on all the features of the edge. Is semantically equivalent
1501				to doing:
1502
1503				foreach label
1504				foreach feature
1505				get_emission_score(label, feature)
1506
1507				The structure is:
1508
1509				$result->{label} = score
1510
1511				Actually only serves as a switch for several implementations of the method
1512				(C<get_emission_scores_basic_MIRA> and C<get_emission_scores_no_MIRA>);
1513				the method to be used is selected based on the algorithm being used.
1514
1515				=item get_emission_scores_basic_MIRA($features)
1516
1517				A C<get_emission_scores> implementation used with algorithms
1518				where the emission scores are computed by MIRA (this is currently
1519				the most successful implementation).
1520
1521				=item get_emission_scores_no_MIRA($features)
1522
1523				A C<get_emission_scores> implementation using only MLE. Probably obsolete now.
1524
1525				=back
1526
1527				=head2 Changing the scores
1528
1529				Methods used by the trainer
1530				(L<Treex::Tool::Parser::MSTperl::TrainerLabelling>)
1531				to adjust the scores to whatever seems to be
1532				the best idea at the moment. Used only in MIRA training
1533				(MLE uses C<add_unigram>, C<add_emission>, C<add_transition>
1534				and C<compute_probs_from_counts> instead).
1535
1536				=over 4
1537
1538				=item set_feature_score($feature, $score, $label, $label_prev)
1539
1540				Sets the specified emission score (if label_prev is not set)
1541				or transition score (if it is)
1542				to the given value (C<$score>).
1543
1544				=item update_feature_score($feature, $update, $label, $label_prev)
1545
1546				Updates the specified emission score (if label_prev is not set)
1547				or transition score (if it is)
1548				by the given value (C<$update>), i.e. adds that value to the
1549				current value.
1550
1551				=back
1552
1553				=head1 AUTHORS
1554
1555				Rudolf Rosa <rosa@ufal.mff.cuni.cz>
1556
1557				=head1 COPYRIGHT AND LICENSE
1558
1559				Copyright Â© 2011 by Institute of Formal and Applied Linguistics, Charles
1560				University in Prague
1561
1562				This module is free software; you can redistribute it and/or modify it under
1563				the same terms as Perl itself.