line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Text::Contraction; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
7561
|
use 5.006002; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
44
|
|
4
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
35
|
|
5
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
1454
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
require Carp; |
8
|
|
|
|
|
|
|
require POSIX; |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our $VERSION = '0.02'; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
sub new { |
13
|
11
|
|
|
11
|
1
|
931
|
my($type, %args) = @_; |
14
|
|
|
|
|
|
|
|
15
|
11
|
|
|
|
|
28
|
my $this = bless \%args, $type; |
16
|
|
|
|
|
|
|
|
17
|
11
|
100
|
|
|
|
45
|
$this->{'prefix'} = '^' unless exists $this->{'prefix'}; |
18
|
11
|
100
|
|
|
|
29
|
$this->{'caseless'} = 1 unless exists $this->{'caseless'}; |
19
|
11
|
100
|
|
|
|
26
|
$this->{'minRatio'} = 0.5 unless exists $this->{'minRatio'}; |
20
|
11
|
50
|
|
|
|
27
|
$this->{'words'} = _w() unless exists $this->{'words'}; |
21
|
|
|
|
|
|
|
|
22
|
11
|
|
|
|
|
46
|
return $this; |
23
|
|
|
|
|
|
|
} |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
sub prefix { |
26
|
0
|
|
|
0
|
1
|
0
|
my $this = shift; |
27
|
0
|
0
|
|
|
|
0
|
if (@_) { |
28
|
0
|
|
|
|
|
0
|
return $this->{'prefix'} = shift; |
29
|
|
|
|
|
|
|
} |
30
|
0
|
|
|
|
|
0
|
return $this->{'prefix'}; |
31
|
|
|
|
|
|
|
} |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
sub caseless { |
34
|
1
|
|
|
1
|
1
|
129
|
my $this = shift; |
35
|
1
|
50
|
|
|
|
6
|
if (@_) { |
36
|
1
|
|
|
|
|
5
|
return $this->{'caseless'} = shift; |
37
|
|
|
|
|
|
|
} |
38
|
0
|
|
|
|
|
0
|
return $this->{'caseless'}; |
39
|
|
|
|
|
|
|
} |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
sub minRatio { |
42
|
0
|
|
|
0
|
1
|
0
|
my $this = shift; |
43
|
0
|
0
|
|
|
|
0
|
if (@_) { |
44
|
0
|
|
|
|
|
0
|
my $minRatio = shift; |
45
|
0
|
0
|
0
|
|
|
0
|
unless ($minRatio >= 0 && $minRatio <= 1) { |
46
|
0
|
|
|
|
|
0
|
Carp::croak "Text::Contraction::minRatio must be between 0 and 1, inclusive."; |
47
|
|
|
|
|
|
|
} |
48
|
0
|
|
|
|
|
0
|
return $this->{'minRatio'} = $minRatio; |
49
|
|
|
|
|
|
|
} |
50
|
0
|
|
|
|
|
0
|
return $this->{'minRatio'}; |
51
|
|
|
|
|
|
|
} |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
sub words { |
54
|
1
|
|
|
1
|
1
|
3
|
my $this = shift; |
55
|
1
|
50
|
|
|
|
5
|
if (@_) { |
56
|
1
|
|
|
|
|
2
|
my $words = shift; |
57
|
1
|
50
|
|
|
|
5
|
unless (ref $words eq 'ARRAY') { |
58
|
0
|
|
|
|
|
0
|
Carp::croak "Text::Contraction::words must be an array reference." |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
|
61
|
1
|
|
|
|
|
6
|
delete $this->{'_words'}; |
62
|
1
|
|
|
|
|
5
|
return $this->{'words'} = $words; |
63
|
|
|
|
|
|
|
} |
64
|
0
|
|
|
|
|
0
|
return $this->{'words'}; |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
my @words; |
68
|
|
|
|
|
|
|
sub _w { |
69
|
0
|
0
|
|
0
|
|
0
|
return \@words if @words; |
70
|
0
|
|
|
|
|
0
|
foreach my $file ($ENV{'CONTRACTION_WORDS'}, |
71
|
|
|
|
|
|
|
qw(/dict/words |
72
|
|
|
|
|
|
|
/usr/dict/words |
73
|
|
|
|
|
|
|
/usr/share/dict/words |
74
|
|
|
|
|
|
|
/usr/share/lib/spell/words |
75
|
|
|
|
|
|
|
/usr/ucblib/dict/words |
76
|
|
|
|
|
|
|
/usr/lib/dict/words)) { |
77
|
0
|
0
|
0
|
|
|
0
|
if (defined $file && -s $file) { |
78
|
0
|
0
|
|
|
|
0
|
open my $fh, $file or die "open '$file': $!"; |
79
|
0
|
|
|
|
|
0
|
chomp(@words = <$fh>); |
80
|
0
|
|
|
|
|
0
|
return \@words; |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
0
|
0
|
|
|
|
0
|
if (defined $ENV{'CONTRACTION_WORDS'}) { |
85
|
0
|
0
|
|
|
|
0
|
if (-e $ENV{'CONTRACTION_WORDS'}) { |
86
|
0
|
|
|
|
|
0
|
Carp::croak "Dictionary '$ENV{q(CONTRACTION_WORDS)}' is empty.\n"; |
87
|
|
|
|
|
|
|
} else { |
88
|
0
|
|
|
|
|
0
|
Carp::croak "Could not find dictionary '$ENV{q(CONTRACTION_WORDS)}'.\n"; |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
} else { |
91
|
0
|
|
|
|
|
0
|
Carp::croak "Could not find dictionary. Try setting environment variable\n". |
92
|
|
|
|
|
|
|
"CONTRACTION_WORDS to the path of your dictionary.\n"; |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
sub study { |
97
|
12
|
|
|
12
|
1
|
15
|
my $this = shift; |
98
|
|
|
|
|
|
|
|
99
|
12
|
|
|
|
|
15
|
my @words; |
100
|
12
|
|
|
|
|
18
|
for (my $i = 0; $i < @{ $this->{words} }; $i++) { |
|
37
|
|
|
|
|
99
|
|
101
|
25
|
100
|
|
|
|
67
|
my $word = $this->{caseless} ? uc $this->{words}[$i] : $this->{words}[$i]; |
102
|
25
|
|
|
|
|
32
|
my $j = 0; |
103
|
25
|
|
|
|
|
65
|
for (split //, $word) { |
104
|
54
|
|
|
|
|
55
|
push @{ $words[ord $_][$j++] }, $i; |
|
54
|
|
|
|
|
209
|
|
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
|
108
|
12
|
|
|
|
|
33
|
$this->{_words} = \@words; |
109
|
|
|
|
|
|
|
} |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
sub match { |
112
|
15
|
|
|
15
|
1
|
383
|
my($this, $contraction) = @_; |
113
|
|
|
|
|
|
|
|
114
|
15
|
|
|
|
|
20
|
$contraction =~ y/'//d; |
115
|
|
|
|
|
|
|
|
116
|
15
|
|
|
|
|
23
|
my $prefix; |
117
|
15
|
100
|
|
|
|
36
|
if ($this->{caseless}) { |
118
|
13
|
|
|
|
|
22
|
$contraction = uc $contraction; |
119
|
13
|
|
|
|
|
23
|
$prefix = '(?i)' . $this->{prefix}; |
120
|
|
|
|
|
|
|
} else { |
121
|
2
|
|
|
|
|
29
|
$prefix = $this->{prefix}; |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
15
|
100
|
|
|
|
46
|
$this->study unless $this->{_words}; |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
# find most discriminating character |
127
|
15
|
|
|
|
|
38
|
my($bestChar, $bestIndex, $bestScore) = |
128
|
|
|
|
|
|
|
(substr($contraction, -1, 1), length($contraction) - 1, undef ); |
129
|
|
|
|
|
|
|
|
130
|
15
|
|
|
|
|
43
|
for (my $i = length($contraction) - 1; $i >= 0; $i--) { |
131
|
22
|
|
|
|
|
33
|
my $char = substr($contraction, $i, 1); |
132
|
22
|
|
|
|
|
29
|
my $maxLength = "Inf"; |
133
|
22
|
50
|
|
|
|
63
|
if ($this->{minRatio} > 0) { |
134
|
22
|
|
|
|
|
79
|
$maxLength = POSIX::ceil(($i + 1) / $this->{minRatio}); |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
|
137
|
22
|
|
|
|
|
40
|
my $words = $this->{_words}[ord $char]; |
138
|
|
|
|
|
|
|
|
139
|
22
|
|
|
|
|
26
|
my $score = 0; |
140
|
22
|
|
|
|
|
64
|
for (@$words[$i..min($#$words, $maxLength)]) { |
141
|
52
|
100
|
|
|
|
104
|
$score += @$_ if $_; |
142
|
52
|
50
|
66
|
|
|
173
|
last if defined $bestScore && $score > $bestScore; |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
|
145
|
22
|
100
|
100
|
|
|
118
|
if ($score > 0 && (! defined $bestScore || $score < $bestScore)) { |
|
|
|
33
|
|
|
|
|
146
|
17
|
|
|
|
|
55
|
($bestChar, $bestIndex, $bestScore) = ($char, $i, $score); |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
# get all the words using the most discriminating character |
151
|
15
|
|
|
|
|
21
|
my $maxLength = "Inf"; |
152
|
15
|
50
|
|
|
|
43
|
if ($this->{minRatio} > 0) { |
153
|
15
|
|
|
|
|
40
|
$maxLength = POSIX::ceil(($bestIndex + 1) / $this->{minRatio}); |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
15
|
|
|
|
|
46
|
my $pattern = $prefix . join "[ A-Za-z']*", split //, $contraction; |
157
|
15
|
|
|
|
|
392
|
$pattern = qr($pattern); |
158
|
|
|
|
|
|
|
|
159
|
15
|
|
|
|
|
32
|
my $words = $this->{_words}[ord $bestChar]; |
160
|
|
|
|
|
|
|
|
161
|
15
|
|
|
|
|
16
|
my %match; |
162
|
15
|
|
|
|
|
32
|
for (@$words[$bestIndex..min($#$words, $maxLength)]) { |
163
|
29
|
100
|
|
|
|
173
|
@match{@$_} = (1) x @$_ if $_; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
15
|
|
|
|
|
29
|
$maxLength = "Inf"; |
167
|
15
|
50
|
|
|
|
36
|
if ($this->{minRatio} > 0) { |
168
|
15
|
|
|
|
|
40
|
$maxLength = POSIX::ceil((length $contraction) / $this->{minRatio}); |
169
|
|
|
|
|
|
|
} |
170
|
15
|
100
|
|
|
|
33
|
return grep { length() <= $maxLength && /$pattern/ } @{ $this->{words} }[keys %match]; |
|
27
|
|
|
|
|
294
|
|
|
15
|
|
|
|
|
42
|
|
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
37
|
100
|
|
37
|
0
|
177
|
sub min { $_[0] < $_[1] ? $_[0] : $_[1] } |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
1; |
176
|
|
|
|
|
|
|
__END__ |