File Coverage

lib/Google/Ads/AdWords/Utilities/BatchJobHandler.pm
Criterion Covered Total %
statement 31 33 93.9
branch n/a
condition n/a
subroutine 11 11 100.0
pod n/a
total 42 44 95.4


line stmt bran cond sub pod time code
1             # Copyright 2015, Google Inc. All Rights Reserved.
2             #
3             # Licensed under the Apache License, Version 2.0 (the "License");
4             # you may not use this file except in compliance with the License.
5             # You may obtain a copy of the License at
6             #
7             # http://www.apache.org/licenses/LICENSE-2.0
8             #
9             # Unless required by applicable law or agreed to in writing, software
10             # distributed under the License is distributed on an "AS IS" BASIS,
11             # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12             # See the License for the specific language governing permissions and
13             # limitations under the License.
14              
15             package Google::Ads::AdWords::Utilities::BatchJobHandler;
16              
17 1     1   756 use strict;
  1         2  
  1         30  
18 1     1   6 use warnings;
  1         2  
  1         22  
19 1     1   5 use utf8;
  1         1  
  1         7  
20 1     1   21 use version;
  1         2  
  1         6  
21              
22             # The following needs to be on one line because CPAN uses a particularly hacky
23             # eval() to determine module versions.
24 1     1   91 use Google::Ads::Common::Constants; our $VERSION = ${Google::Ads::Common::Constants::VERSION};
  1         3  
  1         73  
25              
26 1     1   7 use Google::Ads::AdWords::Logging;
  1         2  
  1         24  
27 1     1   252 use Google::Ads::AdWords::Serializer;
  1         5  
  1         41  
28 1     1   267 use Google::Ads::AdWords::Utilities::BatchJobHandlerError;
  1         2  
  1         38  
29 1     1   291 use Google::Ads::AdWords::Utilities::BatchJobHandlerStatus;
  1         3  
  1         29  
30 1     1   6 use Google::Ads::Common::Utilities::AdsUtilityRegistry;
  1         2  
  1         24  
31 1     1   292 use Google::Ads::SOAP::Deserializer::MessageParser;
  0            
  0            
32              
33             use Class::Std::Fast;
34              
35             use File::stat;
36             use HTTP::Request;
37             use HTTP::Status qw(:constants);
38             use Log::Log4perl qw(:levels);
39             use LWP::UserAgent;
40             use MIME::Base64;
41             use POSIX;
42             use Time::HiRes qw(gettimeofday tv_interval);
43             use URI::Escape;
44             use XML::Simple;
45              
46             # For incremental uploads, the size (in bytes) of the body of the request
47             # must be multiples of 256K.
48             use constant REQUIRED_CONTENT_LENGTH_INCREMENT => 262144;
49              
50             my %client_of : ATTR(:name :default<>);
51              
52             # Upload a list of operations. Returns the BatchJobHandlerStatus.
53             # If the request fails this returns a BatchJobHandlerError.
54             # The timeout is an optional parameter that can be set to alter the default
55             # time that the http client waits to get a response from the server.
56             # If the timeout is not specified, the default is
57             # Google::Ads::AdWords::Constants::LWP_DEFAULT_TIMEOUT
58             sub upload_operations {
59             my ($self, $operations, $url, $timeout) = @_;
60              
61             my $status = Google::Ads::AdWords::Utilities::BatchJobHandlerStatus->new({
62             total_content_length => 0,
63             resumable_upload_uri => $url
64             });
65             my $is_last_request = 1;
66              
67             return $self->upload_incremental_operations($operations, $status,
68             $is_last_request, $timeout);
69             }
70              
71             # Upload a list of operations incrementally. Send operations to the upload URL
72             # as the operations are available. The operations will not be
73             # executed until the boolean is set indicating that it's the last request.
74             # This returns the current BatchJobHandlerStatus. Keep track of this
75             # BatchJobHandlerStatus as you will need to pass it in to the next request
76             # as the $status.
77             sub upload_incremental_operations {
78             my ($self, $operations, $status, $is_last_request, $timeout) = @_;
79             if (!$status) {
80             return Google::Ads::AdWords::Utilities::BatchJobHandlerError->new({
81             type => "UPLOAD",
82             description => "Required: BatchJobHandlerStatus"
83             });
84             }
85              
86             Google::Ads::Common::Utilities::AdsUtilityRegistry->add_ads_utilities(
87             "BatchJobHandler");
88              
89             my $url = $status->get_resumable_upload_uri();
90             my $total_content_length = $status->get_total_content_length();
91             my $is_first_request = $total_content_length == 0;
92             if (!$url || $url eq '') {
93             return Google::Ads::AdWords::Utilities::BatchJobHandlerError->new({
94             type => "UPLOAD",
95             description => "Required: BatchJobHandlerStatus.resumable_upload_uri"
96             });
97             }
98              
99             # If this is the first request, then take the URI passed in and make a request
100             # to that URI for the URI to which the operations will be uploaded. That
101             # URI will then be stored in the BatchJobHelperStatus.
102             if ($is_first_request) {
103             my $response = $self->
104             __initialize_upload($url, $self->get_client(), $timeout);
105             if (!$response) {
106             return $response;
107             }
108             $url = $response;
109             $status->set_resumable_upload_uri($url);
110             }
111              
112             # The process below follows the Google Cloud Storage guidelines for resumable
113             # uploads of unknown size:
114             # https://cloud.google.com/storage/docs/concepts-techniques#unknownresumables
115             my $upload_request =
116             __prepare_upload_request($operations, $url, $self->get_client(), "PUT",
117             $timeout);
118              
119             # For incremental upload, the headers will need additional arguments, and the
120             # content will need to be padded until the bytes are an increment of 256K.
121             my $request = $upload_request->{request};
122             my $xml = $request->content();
123              
124             $xml = $self->__update_tags($is_first_request, $is_last_request, $xml);
125              
126             my $padded_xml = _add_padding($xml);
127             $request->content($padded_xml);
128             my $content_length = 0;
129             {
130             use bytes;
131             $content_length = length($padded_xml);
132             }
133              
134             $self->__set_incremental_operations_headers($request, $total_content_length,
135             $content_length, $is_last_request);
136              
137             # Continue with making the request.
138             my $start_time = [gettimeofday()];
139             my $response = $upload_request->{lwp}->request($request);
140             $response = $self->__check_response($response, $start_time, 1, 0);
141             if (!$response) {
142             return $response;
143             }
144             $total_content_length = $total_content_length + $content_length;
145             $status->set_total_content_length($total_content_length);
146             return $status;
147             }
148              
149             # Update tags.
150             # The process is that a user sends a list of operations to the Google Cloud
151             # in smaller groups and then requests that all operations execute in a single
152             # batch job e.g.
153             # * HTTP Request 1: 1st list of operations
154             # * HTTP Request 2: 2nd list of operations
155             # * HTTP Request 3: Final list of operations
156             # * Execute all operations.
157             #
158             # When the list of operations is serialized with each HTTP request, the
159             # serialization adds a beginning and ending tag. However, the
160             # AdWords API only want to see the beginning and ending mutate tag in the first
161             # and last HTTP requests. Those mutate tags are being stripped out of the middle
162             # requests e.g.
163             # * HTTP Request 1: <== Take out the
164             # * HTTP Request 2: <== Take out the and
165             # * HTTP Request 3: <== Take out the
166             # * Execute all operations.
167             sub __update_tags {
168             my ($self, $is_first_request, $is_last_request, $xml) = @_;
169             # If this is both the 1st and last request, leave the XML alone.
170             if (!($is_first_request && $is_last_request)) {
171             # If it's not the last request, then remove the ending .
172             if (!$is_last_request) {
173             my $find = "\\s*\$";
174             $xml =~ s/$find//;
175             }
176             # If it's not the first request, then remove everything before .
177             if (!$is_first_request) {
178             my $find = "^.*?
179             my $replace = "
180             $xml =~ s/$find/$replace/;
181             }
182             }
183             return $xml;
184             }
185              
186             # Set the headers for the incremental operations requests.
187             sub __set_incremental_operations_headers {
188             my ($self, $request, $total_content_length, $content_length,
189             $is_last_request) = @_;
190             # Set the Content-Length.
191             $request->header("Content-Length" => $content_length);
192             # Determine and set the content range.
193             my $lower_bound = $total_content_length;
194             my $upper_bound = $total_content_length + $content_length - 1;
195             # On the last request, specify the total number of bytes
196             # e.g. bytes 500-999/1000
197             my $total_bytes = ($is_last_request) ? $upper_bound + 1 : "*";
198             my $content_range =
199             sprintf("bytes %d-%d/%s", $lower_bound, $upper_bound, $total_bytes);
200             $request->header("Content-Range" => $content_range);
201             }
202              
203             # In the first upload request, take the URI passed in and make a request
204             # to that URI for the URI to which the operations will be uploaded.
205             sub __initialize_upload {
206             my ($self, $url, $client, $timeout) = @_;
207              
208             my $lwp = LWP::UserAgent->new();
209             my $can_accept = HTTP::Message::decodable;
210             $lwp->default_header("Accept-Encoding" => scalar $can_accept);
211              
212             # Set agent timeout.
213             $lwp->timeout(
214             $timeout
215             ? $timeout
216             : Google::Ads::AdWords::Constants::LWP_DEFAULT_TIMEOUT
217             );
218              
219             my @headers = ();
220             push @headers, "x-goog-resumable" => "start";
221             push @headers, "Content-Length" => "0";
222             push @headers, "Content-Type" => "application/xml";
223              
224             # Read proxy configuration for the enviroment.
225             $lwp->env_proxy();
226              
227             # Prepare the request.
228             my $signed_url = URI->new($url);
229             my $request = HTTP::Request->new("POST", $signed_url, \@headers);
230              
231             my $start_time = [gettimeofday()];
232             my $response = $lwp->request($request);
233             $response = $self->__check_response($response, $start_time, 0, 1);
234             if (!$response) {
235             return $response;
236             }
237              
238             return $response->header("Location");
239             }
240              
241             # Prepares the HTTP request to upload the operations for the batch job.
242             # Creates and properly configures an LWP::UserAgent and HTTP::Request
243             # for the specified operations. Returns a hash with the keys: lwp and
244             # request.
245             sub __prepare_upload_request {
246             my ($operations, $url, $client, $method, $timeout) = @_;
247             my $version = $client->get_version();
248             my $upload_url = URI->new($url);
249              
250             # Changing the operations to XML.
251             my $batch_job_ops_class =
252             "Google::Ads::AdWords::${version}::BatchJobOpsService::mutate";
253             eval "require $batch_job_ops_class"
254             or return Google::Ads::AdWords::Utilities::BatchJobHandlerError->new({
255             type => "UPLOAD",
256             description =>
257             sprintf("Class '%s' expected, but not found.", $batch_job_ops_class)});
258             my $batch_job_upload = $batch_job_ops_class->new({operations => $operations});
259              
260             # Serialize, and fix the namespace.
261             my $xml = $batch_job_upload->serialize();
262              
263             my $find = "
264             my $replace =
265             "
266             "=\"http://www.w3.org/2001/XMLSchema-instance\" xmlns=\"" .
267             "https://adwords.google.com/api/adwords/cm/$version\"";
268             $xml =~ s/$find/$replace/;
269              
270             my $lwp = LWP::UserAgent->new();
271              
272             # Setting HTTP user-agent and gzip compression.
273             my $can_accept = HTTP::Message::decodable;
274             $lwp->default_header("Accept-Encoding" => scalar $can_accept);
275              
276             # Set agent timeout.
277             $lwp->timeout(
278             $timeout
279             ? $timeout
280             : Google::Ads::AdWords::Constants::LWP_DEFAULT_TIMEOUT
281             );
282              
283             # Set the authorization headers.
284             my @headers = ();
285              
286             # Read proxy configuration for the enviroment.
287             $lwp->env_proxy();
288              
289             # Prepare the request.
290             push @headers, "Content-Type" => "application/xml";
291             my $request = HTTP::Request->new($method, $upload_url, \@headers, $xml);
292              
293             return {
294             lwp => $lwp,
295             request => $request
296             };
297             }
298              
299             # Returns an object containing the result of the batch job. This is retrieved
300             # via the download URL provided in the batch job. On failure, a
301             # BatchJobHandlerError will be returned.
302             sub download_response() {
303             my ($self, $url, $timeout) = @_;
304              
305             my $download_request = __prepare_download_request($url, $timeout);
306              
307             my $request = $download_request->{request};
308             my $start_time = [gettimeofday()];
309             my $response = $download_request->{lwp}->request($request);
310             $response = $self->__check_response($response, $start_time);
311             if (!$response) {
312             return $response;
313             }
314              
315             # Turn this content into a MutateResult object. The deserializer expects
316             # the result to be in a soap envelope, and the XML header is not needed.
317             my $decoded_content = $response->decoded_content();
318             $decoded_content =~ s/^<\?xml[^>]+\?>//;
319             my $xml = sprintf(
320             " 321             "\"http://schemas.xmlsoap.org/soap/envelope/\">%s"
322             . "",
323             $decoded_content
324             );
325              
326             my $parser =
327             Google::Ads::SOAP::Deserializer::MessageParser->new({strict => "1"});
328             my $version = $self->get_client()->get_version();
329             my $service =
330             "Google::Ads::AdWords::${version}::TypeMaps::BatchJobOpsService";
331             eval "require $service";
332             $parser->class_resolver($service);
333             eval { $parser->parse_string($xml) };
334             if ($@) {
335             return Google::Ads::AdWords::Utilities::BatchJobHandlerError->new({
336             type => "UPLOAD",
337             description => sprintf(
338             "Error deserializing message: %s. \nMessage was: \n%s.",
339             $@, $xml
340             )});
341             }
342             return $parser->get_data();
343             }
344              
345             # Prepares a request to download the contents from the batch job download URL.
346             # Creates and properly configures an LWP::UserAgent and HTTP::Request
347             # for the specified operations. Returns a hash with the keys: lwp and
348             # request.
349             sub __prepare_download_request {
350             my ($url, $timeout) = @_;
351              
352             my $lwp = LWP::UserAgent->new();
353             my $can_accept = HTTP::Message::decodable;
354             $lwp->default_header("Accept-Encoding" => scalar $can_accept);
355              
356             # Set agent timeout.
357             $lwp->timeout(
358             $timeout
359             ? $timeout
360             : Google::Ads::AdWords::Constants::LWP_DEFAULT_TIMEOUT
361             );
362              
363             my @headers = ();
364             # Read proxy configuration for the enviroment.
365             $lwp->env_proxy();
366              
367             # Prepare the request.
368             my $download_url = URI->new($url);
369             my $request = HTTP::Request->new("GET", $download_url, \@headers);
370              
371             return {
372             lwp => $lwp,
373             request => $request
374             };
375             }
376              
377             # Checks the response's status code. If OK, then returns the HTTPResponse.
378             # Otherwise, returns a new BatchJobHandlerError.
379             sub __check_response {
380             my ($self, $response, $start_time, $is_incremental, $is_initial) = @_;
381             my $is_successful = 0;
382             my $batch_job_error;
383             my $return_val;
384              
385             if ($response->code == HTTP_OK) {
386             $is_successful = 1;
387             $return_val = $response;
388             } else {
389             if ($response->code == HTTP_BAD_REQUEST) {
390             $batch_job_error = $self->__extract_xml_error($response);
391             } elsif ($is_initial && $response->code == HTTP_CREATED) {
392             # This happens when requesting the resumable upload URL from the
393             # upload URL passed back in the batch job. This means that
394             # the new resumable upload URL is ready to go.
395             $return_val = $response;
396             return $return_val;
397             } elsif ($is_incremental && $response->code == 308) {
398             # This happens when doing an incremental upload. It just means that
399             # we are not done uploading, yet.
400             $return_val = $response;
401             return $return_val;
402             } else {
403             $batch_job_error =
404             Google::Ads::AdWords::Utilities::BatchJobHandlerError->new({
405             type => "HTTP",
406             http_response_code => $response->code,
407             http_response_message => $response->message
408             });
409             }
410             $return_val = $batch_job_error;
411             }
412             return $return_val;
413             }
414              
415             # Returns a new BatchJobHandlerError containing the error details of the
416             # failed HTTP::Response.
417             sub __extract_xml_error {
418             my ($self, $response) = @_;
419             my $ref =
420             XML::Simple->new()->XMLin($response->decoded_content(), ForceContent => 1);
421              
422             return Google::Ads::AdWords::Utilities::BatchJobHandlerError->new({
423             type => "HTTP",
424             http_response_code => $response->code,
425             http_response_message => $response->message,
426             http_type => $ref->{ApiError}->{type}->{content},
427             http_field_path => $ref->{ApiError}->{fieldPath}->{content}
428             ? $ref->{ApiError}->{fieldPath}->{content}
429             : "",
430             http_trigger => $ref->{ApiError}->{trigger}->{content}
431             ? $ref->{ApiError}->{trigger}->{content}
432             : ""
433             });
434             }
435              
436             # Add padding (spaces) to the XML until the XML reaches 256K.
437             sub _add_padding {
438             my $xml = shift;
439              
440             # Pad the content. Use braces to keep the scope of the bytes contained.
441             my $padding = 0;
442             {
443             use bytes;
444             my $remainder = length($xml) % REQUIRED_CONTENT_LENGTH_INCREMENT;
445             if ($remainder > 0) {
446             $padding =
447             length($xml) + (REQUIRED_CONTENT_LENGTH_INCREMENT - $remainder);
448             }
449             }
450             my $padded_xml = sprintf("%-" . $padding . "s", $xml);
451             return $padded_xml;
452             }
453              
454             1;
455              
456             =pod
457              
458             =head1 NAME
459              
460             Google::Ads::AdWords::Utilities::BatchJobHandler
461              
462             =head1 DESCRIPTION
463              
464             Processes batch job requests through the AdWords API.
465              
466             =head2 PROPERTIES
467              
468             The following properties may be accessed using get_PROPERTY methods:
469              
470             =over
471              
472             =item * client
473              
474             A reference to a Google::Ads::AdWords::Client.
475              
476             =back
477              
478             =head1 METHODS
479              
480             =head2 new
481              
482             Constructor. The following data structure may be passed to new():
483              
484             { # Google::Ads::AdWords::Utilities::BatchJobHandler
485             client => $client, # A ref to a Google::Ads::AdWords::Client object
486             },
487              
488             =head1 METHODS
489              
490             =head2 upload_operations
491              
492             Upload a list of operations. Returns the
493             L.
494             If the request fails this returns a
495             L.
496              
497             =head3 Parameters
498              
499             =over
500              
501             =item *
502              
503             An array of operations to be uploaded to the upload URL.
504              
505             =item *
506              
507             A URL to which to upload (POST) the operations.
508              
509             =item *
510              
511             The timeout is an optional parameter that can be set to alter the default
512             time that the http client waits to get a response from the server.
513             If the timeout is not specified, the default is
514             Google::Ads::AdWords::Constants::LWP_DEFAULT_TIMEOUT
515              
516             =back
517              
518             =head3 Returns
519              
520             The contents of the HTTP response as a string if the request is successful.
521             Otherwise, this returns a
522             L.
523              
524             =head3 Exceptions
525              
526             Returns a L if the
527             batch job fails immediately.
528              
529             =head2 upload_incremental_operations
530              
531             Upload a list of operations incrementally. Send operations to the upload URL
532             as the operations are available. The operations will not be
533             executed until the boolean is set indicating that it's the last request.
534             This returns the current
535             L. Keep track of this
536             status as you will need to pass it in to the next request as the
537             $status.
538             If the request fails this returns a
539             L.
540              
541             =head3 Parameters
542              
543             =over
544              
545             =item *
546              
547             An array of operations to be uploaded to the upload URL.
548              
549             =item *
550              
551             The current L.
552             In the first request, this object must be initialized with the URL to which
553             the operations will be uploaded. For any uploads following the first upload,
554             pass in the L
555             from the previous upload.
556              
557             =item *
558              
559             If this is the last request to be uploadeed, set the value to true.
560             False values are: 0, '0', '', (), or undef
561             True values are anything other than the false values e.g. 1
562              
563             =item *
564              
565             A URL to which to upload (POST) the operations.
566              
567             =item *
568              
569             The timeout is an optional parameter that can be set to alter the default
570             time that the http client waits to get a response from the server.
571             If the timeout is not specified, the default is
572             Google::Ads::AdWords::Constants::LWP_DEFAULT_TIMEOUT
573              
574             =back
575              
576             =head3 Returns
577              
578             This returns L if the
579             request is successful. Otherwise, this returns a
580             L.
581              
582             =head3 Exceptions
583              
584             Returns a L if the
585             batch job fails immediately.
586              
587             =head2 download_response
588              
589             Returns an object containing the result of the batch job. This is retrieved
590             via the download URL provided in the batch job. On failure, a
591             BatchJobHandlerError will be returned.
592              
593             =head3 Parameters
594              
595             =over
596              
597             =item *
598              
599             A URL from which to download (GET) the result of processing the operations.
600              
601             =item *
602              
603             The timeout is an optional parameter that can be set to alter the default
604             time that the http client waits to get a response from the server.
605             If the timeout is not specified, the default is
606             Google::Ads::AdWords::Constants::LWP_DEFAULT_TIMEOUT
607              
608             =back
609              
610             =head3 Returns
611              
612             BatchJobOpsService::mutateResponse object with contents from the
613             job's download URL
614              
615             =head3 Exceptions
616              
617             Returns a L if the
618             batch job fails immediately.
619              
620             =cut
621