line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Google::Cloud::Speech; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
15499
|
use Mojo::Base -base; |
|
1
|
|
|
|
|
11676
|
|
|
1
|
|
|
|
|
7
|
|
4
|
|
|
|
|
|
|
|
5
|
1
|
|
|
1
|
|
480
|
use Google::Cloud::Speech::Auth; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
use Mojo::UserAgent; |
7
|
|
|
|
|
|
|
use MIME::Base64; |
8
|
|
|
|
|
|
|
use Mojo::File; |
9
|
|
|
|
|
|
|
use Carp; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
$Google::Cloud::Speech::VERSION = '0.03'; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
has secret_file => sub { }; |
14
|
|
|
|
|
|
|
has ua => sub { Mojo::UserAgent->new() }; |
15
|
|
|
|
|
|
|
has file => sub { croak 'you must specify the audio file'; }; |
16
|
|
|
|
|
|
|
has samplerate => '16000'; |
17
|
|
|
|
|
|
|
has language => 'en-IN'; |
18
|
|
|
|
|
|
|
has baseurl => 'https://speech.googleapis.com/v1'; |
19
|
|
|
|
|
|
|
has encoding => 'linear16'; |
20
|
|
|
|
|
|
|
has async_id => undef; |
21
|
|
|
|
|
|
|
has results => undef; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
has config => sub { |
24
|
|
|
|
|
|
|
my $self = shift; |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
return { |
27
|
|
|
|
|
|
|
encoding => $self->encoding, |
28
|
|
|
|
|
|
|
sampleRateHertz => $self->samplerate, |
29
|
|
|
|
|
|
|
languageCode => $self->language, |
30
|
|
|
|
|
|
|
profanityFilter => 'false', |
31
|
|
|
|
|
|
|
}; |
32
|
|
|
|
|
|
|
}; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
has auth_class => sub { |
35
|
|
|
|
|
|
|
my $self = shift; |
36
|
|
|
|
|
|
|
Google::Cloud::Speech::Auth->new(from_json => $self->secret_file); |
37
|
|
|
|
|
|
|
}; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
sub token { |
40
|
|
|
|
|
|
|
my $self = shift; |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
my $auth_obj = $self->auth_class; |
43
|
|
|
|
|
|
|
unless ($auth_obj->has_valid_token) { |
44
|
|
|
|
|
|
|
return $auth_obj->request_token->token(); |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
return $auth_obj->token; |
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
sub syncrecognize { |
51
|
|
|
|
|
|
|
my $self = shift; |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
my $audio_raw = Mojo::File->new( $self->file )->slurp(); |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
my $audio = { "content" => encode_base64( $audio_raw, "" ) }; |
56
|
|
|
|
|
|
|
my $header = { |
57
|
|
|
|
|
|
|
'Content-Type' => "application/json", |
58
|
|
|
|
|
|
|
'Authorization' => $self->token, |
59
|
|
|
|
|
|
|
}; |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
my $hash_ref = { |
62
|
|
|
|
|
|
|
config => $self->config, |
63
|
|
|
|
|
|
|
audio => $audio, |
64
|
|
|
|
|
|
|
}; |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
my $url = $self->baseurl . "/speech:recognize"; |
67
|
|
|
|
|
|
|
my $tx = $self->ua->post( $url => $header => json => $hash_ref ); |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
my $response = $self->handle_errors($tx)->json; |
70
|
|
|
|
|
|
|
if ( my $results = $response->{'results'} ) { |
71
|
|
|
|
|
|
|
return $self->results($results); |
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
return $self->results( [] ); |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
sub asyncrecognize { |
78
|
|
|
|
|
|
|
my $self = shift; |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
my $audio_raw = Mojo::File->new( $self->file )->slurp(); |
81
|
|
|
|
|
|
|
my $audio = { "content" => encode_base64( $audio_raw, "" ) }; |
82
|
|
|
|
|
|
|
my $header = { |
83
|
|
|
|
|
|
|
'Content-Type' => "application/json", |
84
|
|
|
|
|
|
|
'Authorization' => $self->token, |
85
|
|
|
|
|
|
|
}; |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
my $hash_ref = { |
88
|
|
|
|
|
|
|
config => $self->config, |
89
|
|
|
|
|
|
|
audio => $audio, |
90
|
|
|
|
|
|
|
}; |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
my $url = $self->url . "/speech:longrunningrecognize"; |
93
|
|
|
|
|
|
|
my $tx = $self->ua->post( $url => $header => json => $hash_ref ); |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
my $res = $self->handle_errors($tx)->json; |
96
|
|
|
|
|
|
|
if ( my $name = $res->{'name'} ) { |
97
|
|
|
|
|
|
|
$self->async_id($name); |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
return $self; |
100
|
|
|
|
|
|
|
} |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
croak 'there was an error'; |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
sub is_done { |
106
|
|
|
|
|
|
|
my $self = shift; |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
my $async_id = $self->async_id; |
109
|
|
|
|
|
|
|
return unless $async_id; |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
my $url = $self->url . "/operations/" . $async_id; |
112
|
|
|
|
|
|
|
my $tx = $self->ua->get( $url => { 'Authorization' => $self->token } ); |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
my $res = $self->handle_errors($tx)->json; |
115
|
|
|
|
|
|
|
my $is_done = $res->{'done'}; |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
if ($is_done) { |
118
|
|
|
|
|
|
|
$self->{'results'} = $res->{'response'}->{'results'}; |
119
|
|
|
|
|
|
|
return 1; |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
return 0; |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub handle_errors { |
126
|
|
|
|
|
|
|
my ( $self, $tx ) = @_; |
127
|
|
|
|
|
|
|
my $res = $tx->res; |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
unless ( $tx->success ) { |
130
|
|
|
|
|
|
|
my $error_ref = $tx->error; |
131
|
|
|
|
|
|
|
croak( "invalid response: " . $error_ref->{'message'} ); |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
return $res; |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
1; |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=encoding utf8 |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=head1 NAME |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
Google::Cloud::Speech - An interface to Google cloud speech service |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=head1 SYNOPSIS |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
use Data::Dumper; |
148
|
|
|
|
|
|
|
use Google::Cloud::Speech; |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
my $speech = Google::Cloud::Speech->new( |
151
|
|
|
|
|
|
|
file => 'test.wav', |
152
|
|
|
|
|
|
|
secret_file => 'my/google/app/project/sa/json/file' |
153
|
|
|
|
|
|
|
); |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
# long running process |
156
|
|
|
|
|
|
|
my $operation = $speech->asyncrecognize(); |
157
|
|
|
|
|
|
|
my $is_done = $operation->is_done; |
158
|
|
|
|
|
|
|
until($is_done) { |
159
|
|
|
|
|
|
|
if ($is_done = $operation->is_done) { |
160
|
|
|
|
|
|
|
print Dumper $operation->results; |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=head1 DESCRIPTION |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
This module lets you access Google cloud speech service. |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=head2 C |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
Loads the JSON file from Google with the client ID informations. |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
$speech->secret_file('/my/google/app/project/sp/json/file'); |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
To create, Google Service Account Key: |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
1) Login to Google Apps Console and select your project |
179
|
|
|
|
|
|
|
2) Click on create credentials-> service account key. |
180
|
|
|
|
|
|
|
4) Select a service account and key type as JSON and click on create and downlaoded the JSON file. |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
See L for more details about API authentication. |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=head2 encoding |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
my $encoding = $speech->encoding('linear16'); |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
Encoding of audio data to be recognized. |
189
|
|
|
|
|
|
|
Acceptable values are: |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
* linear16 - Uncompressed 16-bit signed little-endian samples. |
192
|
|
|
|
|
|
|
(LINEAR16) |
193
|
|
|
|
|
|
|
* flac - The [Free Lossless Audio |
194
|
|
|
|
|
|
|
Codec](http://flac.sourceforge.net/documentation.html) encoding. |
195
|
|
|
|
|
|
|
Only 16-bit samples are supported. Not all fields in STREAMINFO |
196
|
|
|
|
|
|
|
are supported. (FLAC) |
197
|
|
|
|
|
|
|
* mulaw - 8-bit samples that compand 14-bit audio samples using |
198
|
|
|
|
|
|
|
G.711 PCMU/mu-law. (MULAW) |
199
|
|
|
|
|
|
|
* amr - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must |
200
|
|
|
|
|
|
|
be 8000 Hz.) (AMR) |
201
|
|
|
|
|
|
|
* amr_wb - Adaptive Multi-Rate Wideband codec. (`sample_rate` must |
202
|
|
|
|
|
|
|
be 16000 Hz.) (AMR_WB) |
203
|
|
|
|
|
|
|
* ogg_opus - Ogg Mapping for Opus. (OGG_OPUS) |
204
|
|
|
|
|
|
|
Lossy codecs do not recommend, as they result in a lower-quality |
205
|
|
|
|
|
|
|
speech transcription. |
206
|
|
|
|
|
|
|
* speex - Speex with header byte. (SPEEX_WITH_HEADER_BYTE) |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=head2 file |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
my $file = $speech->file; |
212
|
|
|
|
|
|
|
my $file = $speech->('path/to/audio/file.wav'); |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=head2 language |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
my $lang = $speech->language('en-IN'); |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
The language of the supplied audio as a BCP-47 language tag. |
220
|
|
|
|
|
|
|
Example: "en-IN" for English (United States), "en-GB" for English (United |
221
|
|
|
|
|
|
|
Kingdom), "fr-FR" for French (France). See Language Support for a list of the currently supported language codes. |
222
|
|
|
|
|
|
|
L |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=head2 samplrate |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
my $sample_rate = $speech->samplerate('16000'); |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
Sample rate in Hertz of the audio data to be recognized. Valid values |
229
|
|
|
|
|
|
|
are: 8000-48000. 16000 is optimal. For best results, set the sampling |
230
|
|
|
|
|
|
|
rate of the audio source to 16000 Hz. If that's not possible, use the |
231
|
|
|
|
|
|
|
native sample rate of the audio source (instead of re-sampling). |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
=head1 METHODS |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
=head2 asyncrecognize |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
Performs asynchronous speech recognition: |
239
|
|
|
|
|
|
|
receive results via the google.longrunning.Operations interface. |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
my $operation = $speech->asyncrecognize(); |
242
|
|
|
|
|
|
|
my $is_done = $operation->is_done; |
243
|
|
|
|
|
|
|
until($is_done) { |
244
|
|
|
|
|
|
|
if ($is_done = $operation->is_done) { |
245
|
|
|
|
|
|
|
print Dumper $operation->results; |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
=head2 syncrecognize |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
Performs synchronous speech recognition: receive results after all audio has been sent and processed. |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
my $operation = $speech->syncrecognize; |
255
|
|
|
|
|
|
|
print $operation->results; |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
=head2 is_done |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
Checks if the speech-recognition processing of the audio data is complete. |
260
|
|
|
|
|
|
|
return 1 when complete, 0 otherwise. |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
=head2 results |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
returns the transcribed data as Arrayref. |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
print Dumper $speech->syncrecognize->results; |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=head1 AUTHOR |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
Prajith P C |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
This software is Copyright (c) 2017, Prajith P. |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
This is free software, you can redistribute it and/or modify it under |
277
|
|
|
|
|
|
|
the same terms as Perl language system itself. |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=head1 SEE ALSO |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
=over |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=item * L |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
=back |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
=cut |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=head1 DEVELOPMENT |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
This project is hosted on Github, at |
293
|
|
|
|
|
|
|
L |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
=cut |