File Coverage

blib/lib/Google/Cloud/Speech.pm

Criterion	Covered	Total	%
statement	18	63	28.5
branch	0	12	0.0
condition			n/a
subroutine	6	11	54.5
pod	3	5	60.0
total	27	91	29.6

line	stmt	bran	sub	pod	time	code
1						package Google::Cloud::Speech;
2
3	1		1		13366	use Mojo::Base -base;
	1				6635
	1				6
4
5	1		1		455	use Google::Cloud::Speech::Auth;
	1				3
	1				8
6	1		1		33	use Mojo::UserAgent;
	1				2
	1				3
7	1		1		20	use MIME::Base64;
	1				2
	1				47
8	1		1		5	use Mojo::File;
	1				2
	1				25
9	1		1		14	use Carp;
	1				2
	1				651
10
11						$Google::Cloud::Speech::VERSION = '0.05';
12
13						has secret_file => sub { };
14						has ua => sub { Mojo::UserAgent->new() };
15						has file => sub { croak 'you must specify the audio file'; };
16						has samplerate => '16000';
17						has language => 'en-IN';
18						has baseurl => 'https://speech.googleapis.com/v1';
19						has encoding => 'linear16';
20						has async_id => undef;
21						has results => undef;
22
23						has config => sub {
24						my $self = shift;
25
26						return {
27						encoding => $self->encoding,
28						sampleRateHertz => $self->samplerate,
29						languageCode => $self->language,
30						profanityFilter => 'false',
31						};
32						};
33
34						has auth_class => sub {
35						my $self = shift;
36						Google::Cloud::Speech::Auth->new(from_json => $self->secret_file);
37						};
38
39						sub token {
40	0		0	0		my $self = shift;
41
42	0					my $auth_obj = $self->auth_class;
43	0	0				unless ($auth_obj->has_valid_token) {
44	0					return $auth_obj->request_token->token();
45						}
46
47	0					return $auth_obj->token;
48						}
49
50						sub syncrecognize {
51	0		0	1		my $self = shift;
52
53	0					my $audio_raw = Mojo::File->new( $self->file )->slurp();
54
55	0					my $audio = { "content" => encode_base64( $audio_raw, "" ) };
56	0					my $header = {
57						'Content-Type' => "application/json",
58						'Authorization' => $self->token,
59						};
60
61	0					my $hash_ref = {
62						config => $self->config,
63						audio => $audio,
64						};
65
66	0					my $url = $self->baseurl . "/speech:recognize";
67	0					my $tx = $self->ua->post( $url => $header => json => $hash_ref );
68
69	0					my $response = $self->handle_errors($tx)->json;
70	0	0				if ( my $results = $response->{'results'} ) {
71	0					return $self->results($results);
72						}
73	0					return $self->results( [] );
74
75						}
76
77						sub asyncrecognize {
78	0		0	1		my $self = shift;
79
80	0					my $audio_raw = Mojo::File->new( $self->file )->slurp();
81	0					my $audio = { "content" => encode_base64( $audio_raw, "" ) };
82	0					my $header = {
83						'Content-Type' => "application/json",
84						'Authorization' => $self->token,
85						};
86
87	0					my $hash_ref = {
88						config => $self->config,
89						audio => $audio,
90						};
91
92	0					my $url = $self->url . "/speech:longrunningrecognize";
93	0					my $tx = $self->ua->post( $url => $header => json => $hash_ref );
94
95	0					my $res = $self->handle_errors($tx)->json;
96	0	0				if ( my $name = $res->{'name'} ) {
97	0					$self->async_id($name);
98
99	0					return $self;
100						}
101
102	0					croak 'there was an error';
103						}
104
105						sub is_done {
106	0		0	1		my $self = shift;
107
108	0					my $async_id = $self->async_id;
109	0	0				return unless $async_id;
110
111	0					my $url = $self->url . "/operations/" . $async_id;
112	0					my $tx = $self->ua->get( $url => { 'Authorization' => $self->token } );
113
114	0					my $res = $self->handle_errors($tx)->json;
115	0					my $is_done = $res->{'done'};
116
117	0	0				if ($is_done) {
118	0					$self->{'results'} = $res->{'response'}->{'results'};
119	0					return 1;
120						}
121
122	0					return 0;
123						}
124
125						sub handle_errors {
126	0		0	0		my ( $self, $tx ) = @_;
127	0					my $res = $tx->res;
128
129	0	0				unless ( $tx->success ) {
130	0					my $error_ref = $tx->error;
131	0					croak( "invalid response: " . $error_ref->{'message'} );
132						}
133
134	0					return $res;
135						}
136
137						1;
138
139						=encoding utf8
140
141						=head1 NAME
142
143						Google::Cloud::Speech - An interface to Google cloud speech service
144
145						=head1 SYNOPSIS
146
147						use Data::Dumper;
148						use Google::Cloud::Speech;
149
150						my $speech = Google::Cloud::Speech->new(
151						file => 'test.wav',
152						secret_file => 'my/google/app/project/sa/json/file'
153						);
154
155						# long running process
156						my $operation = $speech->asyncrecognize();
157						my $is_done = $operation->is_done;
158						until($is_done) {
159						if ($is_done = $operation->is_done) {
160						print Dumper $operation->results;
161						}
162						}
163
164						=head1 DESCRIPTION
165
166						This module lets you access Google cloud speech service.
167
168						=head1 ATTRIBUTES
169
170						=head2 C
171
172						Loads the JSON file from Google with the client ID informations.
173
174						$speech->secret_file('/my/google/app/project/sp/json/file');
175
176						To create, Google Service Account Key:
177
178						1) Login to Google Apps Console and select your project
179						2) Click on create credentials-> service account key.
180						4) Select a service account and key type as JSON and click on create and downlaoded the JSON file.
181
182						See L for more details about API authentication.
183
184						=head2 encoding
185
186						my $encoding = $speech->encoding('linear16');
187
188						Encoding of audio data to be recognized.
189						Acceptable values are:
190
191						* linear16 - Uncompressed 16-bit signed little-endian samples.
192						(LINEAR16)
193						* flac - The [Free Lossless Audio
194						Codec](http://flac.sourceforge.net/documentation.html) encoding.
195						Only 16-bit samples are supported. Not all fields in STREAMINFO
196						are supported. (FLAC)
197						* mulaw - 8-bit samples that compand 14-bit audio samples using
198						G.711 PCMU/mu-law. (MULAW)
199						* amr - Adaptive Multi-Rate Narrowband codec. (`sample_rate` must
200						be 8000 Hz.) (AMR)
201						* amr_wb - Adaptive Multi-Rate Wideband codec. (`sample_rate` must
202						be 16000 Hz.) (AMR_WB)
203						* ogg_opus - Ogg Mapping for Opus. (OGG_OPUS)
204						Lossy codecs do not recommend, as they result in a lower-quality
205						speech transcription.
206						* speex - Speex with header byte. (SPEEX_WITH_HEADER_BYTE)
207
208
209						=head2 file
210
211						my $file = $speech->file;
212						my $file = $speech->('path/to/audio/file.wav');
213
214
215						=head2 language
216
217						my $lang = $speech->language('en-IN');
218
219						The language of the supplied audio as a BCP-47 language tag.
220						Example: "en-IN" for English (United States), "en-GB" for English (United
221						Kingdom), "fr-FR" for French (France). See Language Support for a list of the currently supported language codes.
222						L
223
224						=head2 samplrate
225
226						my $sample_rate = $speech->samplerate('16000');
227
228						Sample rate in Hertz of the audio data to be recognized. Valid values
229						are: 8000-48000. 16000 is optimal. For best results, set the sampling
230						rate of the audio source to 16000 Hz. If that's not possible, use the
231						native sample rate of the audio source (instead of re-sampling).
232
233
234						=head1 METHODS
235
236						=head2 asyncrecognize
237
238						Performs asynchronous speech recognition:
239						receive results via the google.longrunning.Operations interface.
240
241						my $operation = $speech->asyncrecognize();
242						my $is_done = $operation->is_done;
243						until($is_done) {
244						if ($is_done = $operation->is_done) {
245						print Dumper $operation->results;
246						}
247						}
248
249
250						=head2 syncrecognize
251
252						Performs synchronous speech recognition: receive results after all audio has been sent and processed.
253
254						my $operation = $speech->syncrecognize;
255						print $operation->results;
256
257						=head2 is_done
258
259						Checks if the speech-recognition processing of the audio data is complete.
260						return 1 when complete, 0 otherwise.
261
262						=head2 results
263
264						returns the transcribed data as Arrayref.
265
266						print Dumper $speech->syncrecognize->results;
267
268						=head1 AUTHOR
269
270						Prajith P C
271
272						=head1 COPYRIGHT AND LICENSE
273
274						This software is Copyright (c) 2017, Prajith P.
275
276						This is free software, you can redistribute it and/or modify it under
277						the same terms as Perl language system itself.
278
279
280						=head1 SEE ALSO
281
282						=over
283
284						=item * L
285
286						=back
287
288						=cut
289
290						=head1 DEVELOPMENT
291
292						This project is hosted on Github, at
293						L
294
295						=cut