File Coverage

blib/lib/AWS/S3/FileIterator.pm
Criterion Covered Total %
statement 46 89 51.6
branch 8 40 20.0
condition 3 8 37.5
subroutine 18 18 100.0
pod 7 9 77.7
total 82 164 50.0


line stmt bran cond sub pod time code
1              
2             package AWS::S3::FileIterator;
3              
4 1     1   1626 use strict;
  1         1  
  1         25  
5 1     1   3 use warnings 'all';
  1         1  
  1         30  
6 1     1   4 use base 'Iterator::Paged';
  1         1  
  1         410  
7 1     1   308 use Carp 'confess';
  1         2  
  1         37  
8 1     1   344 use AWS::S3::Owner;
  1         9237  
  1         28  
9 1     1   470 use AWS::S3::File;
  1         294  
  1         6  
10              
11             sub _init {
12 5     5   3585 my ( $s ) = @_;
13              
14 5         9 foreach ( qw( bucket page_size page_number ) ) {
15             confess "Required argument '$_' was not provided"
16 12 100       59 unless $s->{$_};
17             } # end foreach()
18              
19 2         2 $s->{page_number}--;
20 2 100       6 $s->{marker} = '' unless defined( $s->{marker} );
21 2         4 $s->{__fetched_first_page} = 0;
22 2         2 $s->{data} = [];
23 2   66     12 $s->{pattern} ||= qr(.*);
24             } # end _init()
25              
26 2     2 1 313 sub marker { shift->{marker} }
27 2     2 1 10 sub pattern { shift->{pattern} }
28 1     1 0 3 sub bucket { shift->{bucket} }
29 1     1 0 3 sub page_size { shift->{page_size} }
30 2     2 1 6 sub prefix { shift->{prefix} }
31              
32             sub has_prev {
33 1     1 1 1 my $s = shift;
34              
35 1         2 return $s->page_number > 1;
36             } # end has_prev()
37              
38 1     1 1 9 sub has_next { shift->{has_next} }
39              
40             sub page_number {
41 4     4 1 4 my $s = shift;
42 4 100       20 @_ ? $s->{page_number} = $_[0] - 1 : $s->{page_number};
43             } # end page_number()
44              
45             # S3 returns files 100 at a time. If we want more or less than 100, we can't
46             # just fetch the next page over and over - that would be inefficient and likely
47             # to cause errors.
48              
49             # If the page size is 5 and page number is 2, then we:
50             # - fetch 100 items
51             # - store them
52             # - iterate internally until we get to 'page 2'
53             # - return the result.
54             # If the page size is 105 and page number is 1, then we:
55             # - fetch 100 items
56             # - fetch the next 100 items
57             # - return the first 105 items, keeping the remaining 95 items
58             # - on page '2', fetch the next 100 items and return 105 items, saving 90 items.
59             # If the page size is 105 and page number is 3, then we:
60             # - fetch items until our internal 'start' marker is 316-420
61             # - return items 316-420
62             sub next_page {
63 1     1 1 611 my $s = shift;
64              
65             # Advance to page X before proceding:
66 1 50 33     5 if ( ( !$s->{__fetched_first_page}++ ) && $s->page_number ) {
67              
68             # Advance to $s->page_number
69 0         0 my $start_page = $s->page_number;
70 0         0 my $to_discard = $start_page * $s->page_size;
71 0         0 my $discarded = 0;
72 0         0 while ( 1 ) {
73 0 0       0 my $item = $s->_next
74             or last;
75 0 0       0 $discarded++ if $item->{key} =~ $s->pattern;
76 0 0       0 last if $discarded > $to_discard;
77             } # end while()
78             } # end if()
79              
80 1         2 my @chunk = ();
81 1         2 while ( my $item = $s->_next() ) {
82 0 0       0 next unless $item->{key} =~ $s->pattern;
83 0         0 push @chunk, $item;
84 0 0       0 last if @chunk == $s->page_size;
85             } # end while()
86              
87             my @out = map {
88 0         0 my $owner = AWS::S3::Owner->new( %{ $_->{owner} } );
  0         0  
  0         0  
89 0         0 delete $_->{owner};
90 0         0 AWS::S3::File->new( %$_, owner => $owner );
91             } @chunk;
92              
93 0         0 $s->{page_number}++;
94              
95 0 0       0 return unless @out;
96 0 0       0 wantarray ? @out : \@out;
97             } # end next_page()
98              
99             sub _next {
100 1     1   1 my $s = shift;
101              
102 1 50       2 if ( my $item = shift( @{ $s->{data} } ) ) {
  1         2  
103 0         0 return $item;
104             } else {
105 1 0       3 if ( my @chunk = $s->_fetch() ) {
106 0         0 push @{ $s->{data} }, @chunk;
  0         0  
107 0         0 return shift( @{ $s->{data} } );
  0         0  
108             } else {
109 0         0 return;
110             } # end if()
111             } # end if()
112             } # end _next()
113              
114             sub _fetch {
115 1     1   1 my ( $s ) = @_;
116              
117 1         26 my $path = $s->{bucket}->name . '/';
118 0           my %params = ();
119 0 0         $params{marker} = $s->{marker} if $s->{marker};
120 0 0         $params{prefix} = $s->{prefix} if $s->{prefix};
121 0           $params{max_keys} = 1000;
122 0 0         $params{delimiter} = $s->{delimiter} if $s->{delimiter};
123              
124 0           my $type = 'ListBucket';
125 0           my $request = $s->{bucket}->s3->request( $type, %params, bucket => $s->{bucket}->name );
126 0           my $response = $request->request();
127              
128 0 0 0       $s->{has_next} = ( $response->xpc->findvalue( '//s3:IsTruncated' ) || '' ) eq 'true' ? 1 : 0;
129              
130 0           my @files = ();
131 0           foreach my $node ( $response->xpc->findnodes( '//s3:Contents' ) ) {
132 0           my ( $owner_node ) = $response->xpc->findnodes( './/s3:Owner', $node );
133 0           my $owner = {
134             id => $response->xpc->findvalue( './/s3:ID', $owner_node ),
135             display_name => $response->xpc->findvalue( './/s3:DisplayName', $owner_node )
136             };
137 0           my $etag = $response->xpc->findvalue( './/s3:ETag', $node );
138             push @files,
139             {
140             bucket => $s->{bucket},
141 0           key => $response->xpc->findvalue( './/s3:Key', $node ),
142             lastmodified => $response->xpc->findvalue( './/s3:LastModified', $node ),
143             etag => $response->xpc->findvalue( './/s3:ETag', $node ),
144             size => $response->xpc->findvalue( './/s3:Size', $node ),
145             owner => $owner,
146             };
147             } # end foreach()
148              
149 0 0         if ( @files ) {
150 0           $s->{marker} = $files[-1]->{key};
151             } # end if()
152              
153 0 0         return unless defined wantarray;
154 0 0         @files ? return @files : return;
155             } # end _fetch()
156              
157             1;
158              
159             __END__
160              
161             =pod
162              
163             =head1 NAME
164              
165             AWS::S3::FileIterator - Easily access and iterate through your S3 files.
166              
167             =head1 SYNOPSIS
168              
169             # Iterate through all ".txt" files, 100 at a time:
170             my $iter = $bucket->files(
171             # Required params:
172             page_size => 100,
173             page_number => 1,
174             # Optional params:
175             pattern => qr(\.txt$),
176             prefix => 'notes',
177             );
178            
179             while( my @files = $iter->next_page )
180             {
181             warn $iter->page_number, "\n";
182             foreach my $file ( @files )
183             {
184             print "\t", $file->key, "\n";
185             }# end foreach()
186             }# end while()
187              
188              
189             =head1 DESCRIPTION
190              
191             AWS::S3::FileIterator provides a means of I<iterating> through your S3 files.
192              
193             If you only have a few files it might seem odd to require an iterator, but if you
194             have thousands (or millions) of files, the iterator will save you a lot of effort.
195              
196             =head1 PUBLIC PROPERTIES
197              
198             =head2 has_prev
199              
200             Boolean - read-only
201              
202             =head2 has_next
203              
204             Boolean - read-only
205              
206             =head2 page_number
207              
208             Integer - read-write
209              
210             =head2 marker
211              
212             String - read-only
213              
214             Used internally to tell Amazon S3 where the last request for a listing of files left off.
215              
216             =head2 pattern
217              
218             Regexp - read-only
219              
220             If supplied to the constructor, only files which match the pattern will be returned.
221              
222             =head2 prefix
223              
224             String - read-only
225              
226             If supplied to the constructor, only files which begin with the indicated prefix will be returned.
227              
228             =head1 PUBLIC METHODS
229              
230             =head2 next_page()
231              
232             Returns the next page of results as an array in list context or arrayref in scalar context.
233              
234             Increments C<page_number> by one.
235              
236             =head1 SEE ALSO
237              
238             L<The Amazon S3 API Documentation|http://docs.amazonwebservices.com/AmazonS3/latest/API/>
239              
240             L<AWS::S3>
241              
242             L<AWS::S3::Bucket>
243              
244             L<AWS::S3::File>
245              
246             L<AWS::S3::Owner>
247              
248             L<Iterator::Paged> - on which this class is built.
249              
250             =cut
251