File Coverage

blib/lib/AWS/S3/FileIterator.pm
Criterion Covered Total %
statement 45 88 51.1
branch 5 40 12.5
condition 2 8 25.0
subroutine 17 17 100.0
pod 6 8 75.0
total 75 161 46.5


line stmt bran cond sub pod time code
1              
2             package AWS::S3::FileIterator;
3              
4 1     1   1348 use strict;
  1         1  
  1         29  
5 1     1   3 use warnings 'all';
  1         1  
  1         31  
6 1     1   3 use base 'Iterator::Paged';
  1         1  
  1         415  
7 1     1   316 use Carp 'confess';
  1         1  
  1         44  
8 1     1   310 use AWS::S3::Owner;
  1         9501  
  1         32  
9 1     1   551 use AWS::S3::File;
  1         275  
  1         5  
10              
11             sub _init {
12 1     1   1542 my ( $s ) = @_;
13              
14 1         3 foreach ( qw( bucket page_size page_number ) ) {
15             confess "Required argument '$_' was not provided"
16 3 50       11 unless $s->{$_};
17             } # end foreach()
18              
19 1         1 $s->{page_number}--;
20 1 50       3 $s->{marker} = '' unless defined( $s->{marker} );
21 1         2 $s->{__fetched_first_page} = 0;
22 1         1 $s->{data} = [];
23 1   33     12 $s->{pattern} ||= qr(.*);
24             } # end _init()
25              
26 1     1 1 310 sub marker { shift->{marker} }
27 1     1 1 4 sub pattern { shift->{pattern} }
28 1     1 0 4 sub bucket { shift->{bucket} }
29 1     1 0 3 sub page_size { shift->{page_size} }
30              
31             sub has_prev {
32 1     1 1 2 my $s = shift;
33              
34 1         3 return $s->page_number > 1;
35             } # end has_prev()
36              
37 1     1 1 3 sub has_next { shift->{has_next} }
38              
39             sub page_number {
40 3     3 1 4 my $s = shift;
41 3 50       12 @_ ? $s->{page_number} = $_[0] - 1 : $s->{page_number};
42             } # end page_number()
43              
44             # S3 returns files 100 at a time. If we want more or less than 100, we can't
45             # just fetch the next page over and over - that would be inefficient and likely
46             # to cause errors.
47              
48             # If the page size is 5 and page number is 2, then we:
49             # - fetch 100 items
50             # - store them
51             # - iterate internally until we get to 'page 2'
52             # - return the result.
53             # If the page size is 105 and page number is 1, then we:
54             # - fetch 100 items
55             # - fetch the next 100 items
56             # - return the first 105 items, keeping the remaining 95 items
57             # - on page '2', fetch the next 100 items and return 105 items, saving 90 items.
58             # If the page size is 105 and page number is 3, then we:
59             # - fetch items until our internal 'start' marker is 316-420
60             # - return items 316-420
61             sub next_page {
62 1     1 1 56 my $s = shift;
63              
64             # Advance to page X before proceding:
65 1 50 33     4 if ( ( !$s->{__fetched_first_page}++ ) && $s->page_number ) {
66              
67             # Advance to $s->page_number
68 0         0 my $start_page = $s->page_number;
69 0         0 my $to_discard = $start_page * $s->page_size;
70 0         0 my $discarded = 0;
71 0         0 while ( 1 ) {
72 0 0       0 my $item = $s->_next
73             or last;
74 0 0       0 $discarded++ if $item->{key} =~ $s->pattern;
75 0 0       0 last if $discarded > $to_discard;
76             } # end while()
77             } # end if()
78              
79 1         2 my @chunk = ();
80 1         3 while ( my $item = $s->_next() ) {
81 0 0       0 next unless $item->{key} =~ $s->pattern;
82 0         0 push @chunk, $item;
83 0 0       0 last if @chunk == $s->page_size;
84             } # end while()
85              
86             my @out = map {
87 0         0 my $owner = AWS::S3::Owner->new( %{ $_->{owner} } );
  0         0  
  0         0  
88 0         0 delete $_->{owner};
89 0         0 AWS::S3::File->new( %$_, owner => $owner );
90             } @chunk;
91              
92 0         0 $s->{page_number}++;
93              
94 0 0       0 return unless @out;
95 0 0       0 wantarray ? @out : \@out;
96             } # end next_page()
97              
98             sub _next {
99 1     1   1 my $s = shift;
100              
101 1 50       2 if ( my $item = shift( @{ $s->{data} } ) ) {
  1         3  
102 0         0 return $item;
103             } else {
104 1 0       2 if ( my @chunk = $s->_fetch() ) {
105 0         0 push @{ $s->{data} }, @chunk;
  0         0  
106 0         0 return shift( @{ $s->{data} } );
  0         0  
107             } else {
108 0         0 return;
109             } # end if()
110             } # end if()
111             } # end _next()
112              
113             sub _fetch {
114 1     1   1 my ( $s ) = @_;
115              
116 1         189 my $path = $s->{bucket}->name . '/';
117 0           my %params = ();
118 0 0         $params{marker} = $s->{marker} if $s->{marker};
119 0 0         $params{prefix} = $s->{prefix} if $s->{prefix};
120 0           $params{max_keys} = 1000;
121 0 0         $params{delimiter} = $s->{delimiter} if $s->{delimiter};
122              
123 0           my $type = 'ListBucket';
124 0           my $request = $s->{bucket}->s3->request( $type, %params, bucket => $s->{bucket}->name );
125 0           my $response = $request->request();
126              
127 0 0 0       $s->{has_next} = ( $response->xpc->findvalue( '//s3:IsTruncated' ) || '' ) eq 'true' ? 1 : 0;
128              
129 0           my @files = ();
130 0           foreach my $node ( $response->xpc->findnodes( '//s3:Contents' ) ) {
131 0           my ( $owner_node ) = $response->xpc->findnodes( './/s3:Owner', $node );
132 0           my $owner = {
133             id => $response->xpc->findvalue( './/s3:ID', $owner_node ),
134             display_name => $response->xpc->findvalue( './/s3:DisplayName', $owner_node )
135             };
136 0           my $etag = $response->xpc->findvalue( './/s3:ETag', $node );
137             push @files,
138             {
139             bucket => $s->{bucket},
140 0           key => $response->xpc->findvalue( './/s3:Key', $node ),
141             lastmodified => $response->xpc->findvalue( './/s3:LastModified', $node ),
142             etag => $response->xpc->findvalue( './/s3:ETag', $node ),
143             size => $response->xpc->findvalue( './/s3:Size', $node ),
144             owner => $owner,
145             };
146             } # end foreach()
147              
148 0 0         if ( @files ) {
149 0           $s->{marker} = $files[-1]->{key};
150             } # end if()
151              
152 0 0         return unless defined wantarray;
153 0 0         @files ? return @files : return;
154             } # end _fetch()
155              
156             1;
157              
158             __END__
159              
160             =pod
161              
162             =head1 NAME
163              
164             AWS::S3::FileIterator - Easily access and iterate through your S3 files.
165              
166             =head1 SYNOPSIS
167              
168             # Iterate through all ".txt" files, 100 at a time:
169             my $iter = $bucket->files(
170             # Required params:
171             page_size => 100,
172             page_number => 1,
173             # Optional params:
174             pattern => qr(\.txt$)
175             );
176            
177             while( my @files = $iter->next_page )
178             {
179             warn $iter->page_number, "\n";
180             foreach my $file ( @files )
181             {
182             print "\t", $file->key, "\n";
183             }# end foreach()
184             }# end while()
185              
186              
187             =head1 DESCRIPTION
188              
189             AWS::S3::FileIterator provides a means of I<iterating> through your S3 files.
190              
191             If you only have a few files it might seem odd to require an iterator, but if you
192             have thousands (or millions) of files, the iterator will save you a lot of effort.
193              
194             =head1 PUBLIC PROPERTIES
195              
196             =head2 has_prev
197              
198             Boolean - read-only
199              
200             =head2 has_next
201              
202             Boolean - read-only
203              
204             =head2 page_number
205              
206             Integer - read-write
207              
208             =head2 marker
209              
210             String - read-only
211              
212             Used internally to tell Amazon S3 where the last request for a listing of files left off.
213              
214             =head2 pattern
215              
216             Regexp - read-only
217              
218             If supplied to the constructor, only files which match the pattern will be returned.
219              
220             =head1 PUBLIC METHODS
221              
222             =head2 next_page()
223              
224             Returns the next page of results as an array in list context or arrayref in scalar context.
225              
226             Increments C<page_number> by one.
227              
228             =head1 SEE ALSO
229              
230             L<The Amazon S3 API Documentation|http://docs.amazonwebservices.com/AmazonS3/latest/API/>
231              
232             L<AWS::S3>
233              
234             L<AWS::S3::Bucket>
235              
236             L<AWS::S3::File>
237              
238             L<AWS::S3::Owner>
239              
240             L<Iterator::Paged> - on which this class is built.
241              
242             =cut
243