line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package WordLists::Pronounce::Pronouncer;
|
2
|
1
|
|
|
1
|
|
32766
|
use strict;
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
42
|
|
3
|
1
|
|
|
1
|
|
6
|
use warnings;
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
35
|
|
4
|
1
|
|
|
1
|
|
6
|
use utf8;
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
9
|
|
5
|
1
|
|
|
1
|
|
854
|
use WordLists::Base;
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
746
|
|
6
|
|
|
|
|
|
|
our $VERSION = $WordLists::Base::VERSION;
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
sub new
|
9
|
|
|
|
|
|
|
{
|
10
|
1
|
|
|
1
|
0
|
15
|
my ($class, $args) = @_;
|
11
|
1
|
50
|
|
|
|
5
|
$args = {} unless defined $args;
|
12
|
0
|
|
|
0
|
|
0
|
my $self = {
|
13
|
|
|
|
|
|
|
cleaner => \&_default_clean,
|
14
|
|
|
|
|
|
|
cb_word => undef,
|
15
|
|
|
|
|
|
|
cb_phrase => undef,
|
16
|
0
|
|
|
|
|
0
|
cb_fail => sub{warn "$_[1] not found\n"; return 'X';},
|
17
|
1
|
|
|
|
|
7
|
%{$args}
|
|
1
|
|
|
|
|
15
|
|
18
|
|
|
|
|
|
|
};
|
19
|
1
|
50
|
|
|
|
5
|
unless (defined $self->{'lookup'})
|
20
|
|
|
|
|
|
|
{
|
21
|
0
|
|
|
|
|
0
|
warn ('No lookup set for '.$class);
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
#
|
24
|
|
|
|
|
|
|
}
|
25
|
1
|
|
|
|
|
12
|
bless $self, $class;
|
26
|
|
|
|
|
|
|
}
|
27
|
|
|
|
|
|
|
sub lookup
|
28
|
|
|
|
|
|
|
{
|
29
|
4
|
|
|
4
|
0
|
6
|
my ($self, $new) = @_;
|
30
|
4
|
50
|
|
|
|
8
|
$self->{'lookup'} = $new if defined $new;
|
31
|
4
|
|
|
|
|
10
|
return $self->{'lookup'};
|
32
|
|
|
|
|
|
|
}
|
33
|
|
|
|
|
|
|
sub clean
|
34
|
|
|
|
|
|
|
{
|
35
|
4
|
|
|
4
|
0
|
6
|
my ($self, $word, $args) = @_;
|
36
|
4
|
|
|
|
|
6
|
&{$self->{'cleaner'}}($self,$word,$args);
|
|
4
|
|
|
|
|
11
|
|
37
|
|
|
|
|
|
|
}
|
38
|
|
|
|
|
|
|
sub _default_clean
|
39
|
|
|
|
|
|
|
{
|
40
|
4
|
|
|
4
|
|
13
|
my ($self, $word, $args) = @_;
|
41
|
4
|
|
|
|
|
9
|
$word =~ s/\([^)]*\)//g;
|
42
|
4
|
|
|
|
|
7
|
$word =~ s/^\*//;
|
43
|
4
|
|
|
|
|
8
|
$word =~ s/^\s+//;
|
44
|
4
|
|
|
|
|
10
|
$word =~ s/\s+$//;
|
45
|
4
|
|
|
|
|
11
|
$word =~ s/[’']s\b//s;
|
46
|
4
|
|
|
|
|
7
|
$word =~ s/s'\b/s/;
|
47
|
1
|
|
|
1
|
|
7
|
$word =~ tr{/’.?",…+;:’‘}{ }d;
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
15
|
|
|
4
|
|
|
|
|
27
|
|
48
|
4
|
|
|
|
|
11
|
$word =~ s/\s+/ /g;
|
49
|
4
|
|
|
|
|
15
|
$word =~ s/^'//;
|
50
|
4
|
|
|
|
|
7
|
$word =~ s/'$//;
|
51
|
4
|
|
|
|
|
9
|
$word = lc $word;
|
52
|
4
|
|
|
|
|
11
|
return $word;
|
53
|
|
|
|
|
|
|
}
|
54
|
|
|
|
|
|
|
sub pronounce_phrase
|
55
|
|
|
|
|
|
|
{
|
56
|
4
|
|
|
4
|
0
|
9
|
my ($self, $word, $args) = @_;
|
57
|
4
|
|
|
|
|
7
|
my $field = $args->{'field'};
|
58
|
4
|
|
33
|
|
|
11
|
$field ||= $self->{'field'};
|
59
|
4
|
50
|
|
|
|
8
|
warn ('Source field must be defined!') unless ($field);
|
60
|
4
|
|
|
|
|
9
|
my $lookup = $self->lookup;
|
61
|
4
|
|
|
|
|
5
|
my $ipa="";
|
62
|
4
|
|
|
|
|
10
|
$word = $self->clean($word, $args);
|
63
|
4
|
|
|
|
|
20
|
my @senses = grep {$_->get($field)} $lookup->get_senses_for($word);
|
|
5
|
|
|
|
|
17
|
|
64
|
4
|
50
|
|
|
|
17
|
return "" if ($word eq "");
|
65
|
4
|
|
|
|
|
7
|
my @ipaout = ();
|
66
|
4
|
100
|
|
|
|
9
|
if (@senses) # if the whole input string matches
|
67
|
|
|
|
|
|
|
{
|
68
|
3
|
|
|
|
|
9
|
$ipa = $senses[0]->get($field);
|
69
|
3
|
|
|
|
|
6
|
push @ipaout, $ipa;
|
70
|
|
|
|
|
|
|
}
|
71
|
|
|
|
|
|
|
else # if the whole input string does not match, try splitting and pronouncing each component
|
72
|
|
|
|
|
|
|
{
|
73
|
1
|
|
|
|
|
7
|
my @words = split(/[- ?]+/, $word);
|
74
|
1
|
50
|
|
|
|
4
|
if ($#words > 0)
|
75
|
|
|
|
|
|
|
{
|
76
|
1
|
|
|
|
|
4
|
foreach (@words)
|
77
|
|
|
|
|
|
|
{
|
78
|
2
|
|
|
|
|
19
|
push @ipaout, $self->pronounce_phrase($_, {%$args,(as_word=>1)});
|
79
|
|
|
|
|
|
|
}
|
80
|
|
|
|
|
|
|
}
|
81
|
|
|
|
|
|
|
else # if non-matching input string is a single word, fail.
|
82
|
|
|
|
|
|
|
{
|
83
|
0
|
0
|
|
|
|
0
|
if (defined $self->{'cb_fail'} )
|
84
|
|
|
|
|
|
|
{
|
85
|
0
|
0
|
|
0
|
|
0
|
if (ref ($self->{'cb_fail'}) eq ref (sub {}))
|
|
0
|
0
|
|
|
|
0
|
|
86
|
|
|
|
|
|
|
{
|
87
|
0
|
|
|
|
|
0
|
return &{$self->{'cb_fail'}}($self, $word, $args);
|
|
0
|
|
|
|
|
0
|
|
88
|
|
|
|
|
|
|
}
|
89
|
|
|
|
|
|
|
elsif (ref ($self->{'cb_fail'}) eq ref (''))
|
90
|
|
|
|
|
|
|
{
|
91
|
0
|
|
|
|
|
0
|
return $self->{'cb_fail'};
|
92
|
|
|
|
|
|
|
}
|
93
|
|
|
|
|
|
|
else
|
94
|
|
|
|
|
|
|
{
|
95
|
0
|
|
|
|
|
0
|
return '';
|
96
|
|
|
|
|
|
|
# Don't return undef, as this will probably break string concatenation.
|
97
|
|
|
|
|
|
|
}
|
98
|
|
|
|
|
|
|
}
|
99
|
|
|
|
|
|
|
}
|
100
|
|
|
|
|
|
|
}
|
101
|
4
|
|
|
|
|
12
|
my $out = join(" ", @ipaout) ;
|
102
|
4
|
50
|
33
|
|
|
25
|
if (defined $self->{'cb_word'} and $args->{'as_word'}) # Callback for processing string as a word
|
|
|
50
|
33
|
|
|
|
|
103
|
|
|
|
|
|
|
{
|
104
|
0
|
|
|
|
|
0
|
$out = &{$self->{'cb_word'}}($self, $out, $args);
|
|
0
|
|
|
|
|
0
|
|
105
|
|
|
|
|
|
|
}
|
106
|
|
|
|
|
|
|
elsif (defined $self->{'cb_phrase'} and !$args->{'as_word'}) # Callback for processing string as a phrase
|
107
|
|
|
|
|
|
|
{
|
108
|
0
|
|
|
|
|
0
|
$out = &{$self->{'cb_phrase'}}($self, $out, $args) ;
|
|
0
|
|
|
|
|
0
|
|
109
|
|
|
|
|
|
|
}
|
110
|
4
|
|
|
|
|
21
|
return $out;
|
111
|
|
|
|
|
|
|
}
|
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
return 1;
|
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=pod
|
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head1 NAME
|
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
WordLists::Pronounce::Pronouncer
|
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=head1 SYNOPSIS
|
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
my $pronouncer = WordLists::Pronounce::Pronouncer->new({ lookup => $here_is_one_i_prepared_earlier });
|
124
|
|
|
|
|
|
|
$ipa = $pronouncer->pronounce_phrase('tomato stew', {field=>'uspron'});
|
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=head1 DESCRIPTION
|
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
Allows the user to create and configure a pronouncing object which will accept strings and have a guess at their IPA transcription, based on a pre-generated list of pronunciations.
|
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
The user must specify a lookup object containing this list.
|
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=head1 TODO
|
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
Write and implement an API for reading several pronunciations and comparing them (possibly once the whole phrase has been put together).
|
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Try to guess stress for compounds based on a list of compounds and/or at least making sure each word has stress. (This might be impossible to do meaningfully)
|
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=head1 BUGS
|
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Please use the Github issues tracker.
|
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=head1 LICENSE
|
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
Copyright 2011-2012 © Cambridge University Press. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
|
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=cut |