| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# eww - it's a Base.pm |
|
2
|
|
|
|
|
|
|
package Algorithm::MarkovChain::Base; |
|
3
|
1
|
|
|
1
|
|
6
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
32
|
|
|
4
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
25
|
|
|
5
|
1
|
|
|
1
|
|
4
|
use Carp; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
78
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
1
|
|
|
1
|
|
804
|
use fields qw(seperator _symbols _recover_symbols _start_states); |
|
|
1
|
|
|
|
|
1529
|
|
|
|
1
|
|
|
|
|
4
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
sub new { |
|
10
|
1
|
|
|
1
|
0
|
2
|
my $invocant = shift; |
|
11
|
1
|
|
|
|
|
3
|
my %args = @_; |
|
12
|
|
|
|
|
|
|
|
|
13
|
1
|
|
33
|
|
|
7
|
my $class = ref $invocant || $invocant; |
|
14
|
1
|
|
|
|
|
3
|
my Algorithm::MarkovChain::Base $self = fields::new($class); |
|
15
|
|
|
|
|
|
|
|
|
16
|
1
|
|
|
|
|
3538
|
$self->{seperator} = $;; |
|
17
|
1
|
|
|
|
|
3
|
$self->{_symbols} = {}; |
|
18
|
1
|
|
|
|
|
2
|
$self->{_recover_symbols} = $args{recover_symbols}; |
|
19
|
|
|
|
|
|
|
|
|
20
|
1
|
|
|
|
|
4
|
return $self; |
|
21
|
|
|
|
|
|
|
} |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
sub seed { |
|
25
|
1
|
|
|
1
|
0
|
2394
|
my Algorithm::MarkovChain::Base $self = shift; |
|
26
|
1
|
|
|
|
|
4
|
my %args = @_; |
|
27
|
|
|
|
|
|
|
|
|
28
|
1
|
|
|
|
|
5
|
local $; = $self->{seperator}; |
|
29
|
|
|
|
|
|
|
|
|
30
|
1
|
50
|
|
|
|
4
|
croak 'seed: no symbols' unless $args{symbols}; |
|
31
|
1
|
50
|
|
|
|
4
|
croak 'seed: bad symbols' unless ref($args{symbols}) eq 'ARRAY'; |
|
32
|
|
|
|
|
|
|
|
|
33
|
1
|
|
50
|
|
|
7
|
my $longest = $args{longest} || 4; |
|
34
|
|
|
|
|
|
|
|
|
35
|
1
|
|
|
|
|
2
|
our @symbols; |
|
36
|
1
|
|
|
|
|
4
|
*symbols = $args{symbols}; |
|
37
|
|
|
|
|
|
|
|
|
38
|
1
|
|
|
|
|
2
|
push @{ $self->{_start_states} }, $symbols[0]; |
|
|
1
|
|
|
|
|
3
|
|
|
39
|
|
|
|
|
|
|
|
|
40
|
1
|
50
|
|
|
|
4
|
if ($self->{_recover_symbols}) { |
|
41
|
0
|
|
|
|
|
0
|
$self->{_symbols}{$_} = $_ for @symbols; |
|
42
|
|
|
|
|
|
|
} |
|
43
|
|
|
|
|
|
|
|
|
44
|
1
|
|
|
|
|
3
|
for my $length (1..$longest) { |
|
45
|
4
|
|
|
|
|
14
|
for (my $i = 0; ($i + $length) < @symbols; $i++) { |
|
46
|
1
|
|
|
|
|
4
|
my $link = join($;, @symbols[$i..$i + $length - 1]); |
|
47
|
1
|
|
|
|
|
5
|
$self->increment_seen($link, $symbols[$i + $length]); |
|
48
|
|
|
|
|
|
|
} |
|
49
|
|
|
|
|
|
|
} |
|
50
|
|
|
|
|
|
|
} |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
sub spew { |
|
54
|
1
|
|
|
1
|
0
|
3
|
my Algorithm::MarkovChain::Base $self = shift; |
|
55
|
1
|
|
|
|
|
3
|
my %args = @_; |
|
56
|
|
|
|
|
|
|
|
|
57
|
1
|
|
|
|
|
4
|
local $; = $self->{seperator}; |
|
58
|
|
|
|
|
|
|
|
|
59
|
1
|
50
|
|
|
|
6
|
my $longest_sequence = $self->longest_sequence() |
|
60
|
|
|
|
|
|
|
or croak "don't appear to be seeded"; |
|
61
|
|
|
|
|
|
|
|
|
62
|
1
|
|
50
|
|
|
10
|
my $length = $args{length} || 30; |
|
63
|
1
|
|
33
|
|
|
6
|
my $subchain = $args{longest_subchain} || $length; |
|
64
|
|
|
|
|
|
|
|
|
65
|
1
|
|
|
|
|
2
|
my @fin; # final chain |
|
66
|
|
|
|
|
|
|
my @sub; # current sub-chain |
|
67
|
1
|
50
|
33
|
|
|
9
|
if ($args{complete} && ref $args{complete} eq 'ARRAY') { |
|
68
|
1
|
|
|
|
|
2
|
@sub = @{ $args{complete} }; |
|
|
1
|
|
|
|
|
3
|
|
|
69
|
|
|
|
|
|
|
} |
|
70
|
|
|
|
|
|
|
|
|
71
|
1
|
|
|
|
|
4
|
while (@fin < $length) { |
|
72
|
2
|
100
|
66
|
|
|
11
|
if (@sub && (!$self->sequence_known($sub[-1]) || (@sub > $subchain))) { # we've gone terminal |
|
|
|
|
33
|
|
|
|
|
|
73
|
1
|
|
|
|
|
2
|
push @fin, @sub; |
|
74
|
1
|
|
|
|
|
3
|
@sub = (); |
|
75
|
1
|
50
|
|
|
|
3
|
next if $args{force_length}; # ignore stop_at_terminal |
|
76
|
1
|
50
|
|
|
|
6
|
last if $args{stop_at_terminal}; |
|
77
|
|
|
|
|
|
|
} |
|
78
|
|
|
|
|
|
|
|
|
79
|
1
|
50
|
|
|
|
3
|
unless (@sub) { |
|
80
|
0
|
0
|
|
|
|
0
|
if ($args{strict_start}) { |
|
81
|
0
|
|
|
|
|
0
|
our @starts; |
|
82
|
0
|
|
|
|
|
0
|
*starts = $self->{_start_states}; |
|
83
|
0
|
|
|
|
|
0
|
@sub = $starts[rand $#starts]; |
|
84
|
|
|
|
|
|
|
} |
|
85
|
|
|
|
|
|
|
else { |
|
86
|
0
|
|
|
|
|
0
|
@sub = split $;, $self->random_sequence(); |
|
87
|
|
|
|
|
|
|
} |
|
88
|
|
|
|
|
|
|
} |
|
89
|
|
|
|
|
|
|
|
|
90
|
1
|
|
|
|
|
4
|
my $consider = 1; |
|
91
|
1
|
50
|
|
|
|
4
|
if (@sub > 1) { |
|
92
|
0
|
|
|
|
|
0
|
$consider = int rand ($longest_sequence - 1); |
|
93
|
|
|
|
|
|
|
} |
|
94
|
|
|
|
|
|
|
|
|
95
|
1
|
|
|
|
|
5
|
my $start = join($;, @sub[-$consider..-1]); |
|
96
|
|
|
|
|
|
|
|
|
97
|
1
|
50
|
|
|
|
4
|
next unless $self->sequence_known($start); # loop if we missed |
|
98
|
|
|
|
|
|
|
|
|
99
|
1
|
|
|
|
|
3
|
my $cprob; |
|
100
|
1
|
|
|
|
|
4
|
my $target = rand; |
|
101
|
|
|
|
|
|
|
|
|
102
|
1
|
|
|
|
|
4
|
my %options = $self->get_options($start); |
|
103
|
1
|
|
|
|
|
4
|
for my $word (keys %options) { |
|
104
|
1
|
|
|
|
|
2
|
$cprob += $options{$word}; |
|
105
|
1
|
50
|
|
|
|
4
|
if ($cprob >= $target) { |
|
106
|
1
|
|
|
|
|
3
|
push @sub, $word; |
|
107
|
1
|
|
|
|
|
5
|
last; |
|
108
|
|
|
|
|
|
|
} |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
|
|
112
|
1
|
50
|
|
|
|
3
|
$#fin = $length |
|
113
|
|
|
|
|
|
|
if $args{force_length}; |
|
114
|
|
|
|
|
|
|
|
|
115
|
1
|
50
|
|
|
|
4
|
@fin = map { $self->{_symbols}{$_} } @fin |
|
|
0
|
|
|
|
|
0
|
|
|
116
|
|
|
|
|
|
|
if $self->{_recover_symbols}; |
|
117
|
|
|
|
|
|
|
|
|
118
|
1
|
|
|
|
|
9
|
return @fin; |
|
119
|
|
|
|
|
|
|
} |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
|
|
122
|
0
|
|
|
0
|
0
|
|
sub increment_seen { croak "virtual method call" } |
|
123
|
0
|
|
|
0
|
0
|
|
sub get_options { croak "virtual method call" } |
|
124
|
0
|
|
|
0
|
0
|
|
sub longest_sequence { croak "virtual method call" } |
|
125
|
0
|
|
|
0
|
0
|
|
sub sequence_known { croak "virtual method call" } |
|
126
|
0
|
|
|
0
|
0
|
|
sub random_sequence { croak "virtual method call" } |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
1; |
|
130
|
|
|
|
|
|
|
__END__ |