File Coverage

blib/lib/AWS/S3/FileIterator.pm

Criterion	Covered	Total	%
statement	90	100	90.0
branch	36	44	81.8
condition	6	8	75.0
subroutine	18	20	90.0
pod	7	12	58.3
total	157	184	85.3

line	stmt	bran	cond	sub	pod	time	code
1							package AWS::S3::FileIterator;
2
3	6			6		46	use strict;
	6					12
	6					280
4	6			6		31	use warnings 'all';
	6					12
	6					425
5	6			6		41	use Carp 'confess';
	6					39
	6					476
6	6			6		43	use AWS::S3::Owner;
	6					11
	6					253
7	6			6		3552	use AWS::S3::File;
	6					3822
	6					53
8
9							sub new {
10	7			7	0	5242	my ($class, %args) = @_;
11
12	7					82	my $s = bless {
13							data => [ ],
14							page_number => 0,
15							idx => 0,
16							%args,
17							}, $class;
18	7					36	$s->_init;
19	4					22	return $s;
20							}
21
22							sub _init {
23	7			7		14	my ( $s ) = @_;
24
25	7					33	foreach ( qw( bucket page_size page_number ) ) {
26							confess "Required argument '$_' was not provided"
27	18	100				100	unless $s->{$_};
28							} # end foreach()
29
30	4					11	$s->{page_number}--;
31	4	100				52	$s->{marker} = '' unless defined( $s->{marker} );
32	4					12	$s->{__fetched_first_page} = 0;
33	4					14	$s->{data} = [];
34	4		66			30	$s->{pattern} \|\|= qr(.*);
35							} # end _init()
36
37	2			2	1	18	sub marker { shift->{marker} }
38	19			19	1	150	sub pattern { shift->{pattern} }
39	1			1	0	7	sub bucket { shift->{bucket} }
40	12			12	0	67	sub page_size { shift->{page_size} }
41	2			2	1	15	sub prefix { shift->{prefix} }
42
43							sub has_prev {
44	1			1	1	4	my $s = shift;
45
46	1					7	return $s->page_number > 1;
47							} # end has_prev()
48
49	1			1	1	8	sub has_next { shift->{has_next} }
50
51							sub next {
52	0			0	0	0	my $s = shift;
53
54	0	0				0	if( exists( $s->{data}->[ $s->{idx} ] ) ) {
55	0					0	return $s->{data}->[ $s->{idx}++ ];
56							} else {
57							# End of the current resultset, see if we can get another page of records:
58	0	0				0	if( my $page = $s->next_page ) {
59	0					0	$s->{data} = $page;
60	0					0	$s->{idx} = 0;
61	0					0	return $s->{data}->[ $s->{idx}++ ];
62							} else {
63							# No more pages, no more data:
64	0					0	return;
65							}
66							}
67							}
68
69							sub reset {
70	0			0	0	0	my $s = shift;
71	0					0	$s->{idx} = 0;
72							}
73
74
75							sub page_number {
76	6			6	1	13	my $s = shift;
77	6	100				46	@_ ? $s->{page_number} = $_[0] - 1 : $s->{page_number};
78							} # end page_number()
79
80							# S3 returns files 100 at a time. If we want more or less than 100, we can't
81							# just fetch the next page over and over - that would be inefficient and likely
82							# to cause errors.
83
84							# If the page size is 5 and page number is 2, then we:
85							# - fetch 100 items
86							# - store them
87							# - iterate internally until we get to 'page 2'
88							# - return the result.
89							# If the page size is 105 and page number is 1, then we:
90							# - fetch 100 items
91							# - fetch the next 100 items
92							# - return the first 105 items, keeping the remaining 95 items
93							# - on page '2', fetch the next 100 items and return 105 items, saving 90 items.
94							# If the page size is 105 and page number is 3, then we:
95							# - fetch items until our internal 'start' marker is 316-420
96							# - return items 316-420
97							sub next_page {
98	11			11	1	2588	my $s = shift;
99
100							# Advance to page X before proceding:
101	11	100	100			63	if ( ( !$s->{__fetched_first_page}++ ) && $s->page_number ) {
102
103							# Advance to $s->page_number
104	1					4	my $start_page = $s->page_number;
105	1					5	my $to_discard = $start_page * $s->page_size;
106	1					2	my $discarded = 0;
107	1					2	while ( 1 ) {
108	6	50				12	my $item = $s->_next
109							or last;
110	6	100				14	$discarded++ if $item->{key} =~ $s->pattern;
111	6	100				18	last if $discarded > $to_discard;
112							} # end while()
113							} # end if()
114
115	11					30	my @chunk = ();
116	11					36	while ( my $item = $s->_next() ) {
117	11	100				59	next unless $item->{key} =~ $s->pattern;
118	10					49	push @chunk, $item;
119	10	50				41	last if @chunk == $s->page_size;
120							} # end while()
121
122							my @out = map {
123	11					32	my $owner = AWS::S3::Owner->new( %{ $_->{owner} } );
	10					19
	10					425
124	10					39	delete $_->{owner};
125	10					341	AWS::S3::File->new( %$_, owner => $owner );
126							} @chunk;
127
128	11					23	$s->{page_number}++;
129
130	11	100				35	return unless @out;
131	10	100				240	wantarray ? @out : \@out;
132							} # end next_page()
133
134							sub _next {
135	18			18		26	my $s = shift;
136
137	18	100				25	if ( my $item = shift( @{ $s->{data} } ) ) {
	18					115
138	13					38	return $item;
139							} else {
140	5	100				25	if ( my @chunk = $s->_fetch() ) {
141	4					14	push @{ $s->{data} }, @chunk;
	4					23
142	4					13	return shift( @{ $s->{data} } );
	4					33
143							} else {
144	1					8	return;
145							} # end if()
146							} # end if()
147							} # end _next()
148
149							sub _fetch {
150	5			5		16	my ( $s ) = @_;
151
152	5					237	my $path = $s->{bucket}->name . '/';
153	5					14	my %params = ();
154	5	100				24	$params{marker} = $s->{marker} if $s->{marker};
155	5	100				30	$params{prefix} = $s->{prefix} if $s->{prefix};
156	5					15	$params{max_keys} = 1000;
157	5	50				15	$params{delimiter} = $s->{delimiter} if $s->{delimiter};
158
159	5					12	my $type = 'ListBucket';
160	5					178	my $request = $s->{bucket}->s3->request( $type, %params, bucket => $s->{bucket}->name );
161	5					32	my $response = $request->request();
162
163	5	100	50			1002	$s->{has_next} = ( $response->xpc->findvalue( '//s3:IsTruncated' ) \|\| '' ) eq 'true' ? 1 : 0;
164
165	5					924	my @files = ();
166	5					352	foreach my $node ( $response->xpc->findnodes( '//s3:Contents' ) ) {
167	28					2934	my ( $owner_node ) = $response->xpc->findnodes( './/s3:Owner', $node );
168	28					1540	my $owner = {
169							id => $response->xpc->findvalue( './/s3:ID', $owner_node ),
170							display_name => $response->xpc->findvalue( './/s3:DisplayName', $owner_node )
171							};
172	28					1457	my $etag = $response->xpc->findvalue( './/s3:ETag', $node );
173							push @files,
174							{
175							bucket => $s->{bucket},
176	28					1342	key => $response->xpc->findvalue( './/s3:Key', $node ),
177							lastmodified => $response->xpc->findvalue( './/s3:LastModified', $node ),
178							etag => $response->xpc->findvalue( './/s3:ETag', $node ),
179							size => $response->xpc->findvalue( './/s3:Size', $node ),
180							owner => $owner,
181							};
182							} # end foreach()
183
184	5	100				427	if ( @files ) {
185	4					175	$s->{marker} = $files[-1]->{key};
186							} # end if()
187
188	5	50				20	return unless defined wantarray;
189	5	100				205	@files ? return @files : return;
190							} # end _fetch()
191
192							1;
193
194							__END__
195
196							=pod
197
198							=head1 NAME
199
200							AWS::S3::FileIterator - Easily access and iterate through your S3 files.
201
202							=head1 SYNOPSIS
203
204							# Iterate through all ".txt" files, 100 at a time:
205							my $iter = $bucket->files(
206							# Required params:
207							page_size => 100,
208							page_number => 1,
209							# Optional params:
210							pattern => qr(\.txt$),
211							prefix => 'notes',
212							);
213
214							while( my @files = $iter->next_page )
215							{
216							warn $iter->page_number, "\n";
217							foreach my $file ( @files )
218							{
219							print "\t", $file->key, "\n";
220							}# end foreach()
221							}# end while()
222
223
224							=head1 DESCRIPTION
225
226							AWS::S3::FileIterator provides a means of I<iterating> through your S3 files.
227
228							If you only have a few files it might seem odd to require an iterator, but if you
229							have thousands (or millions) of files, the iterator will save you a lot of effort.
230
231							=head1 PUBLIC PROPERTIES
232
233							=head2 has_prev
234
235							Boolean - read-only
236
237							=head2 has_next
238
239							Boolean - read-only
240
241							=head2 page_number
242
243							Integer - read-write
244
245							=head2 marker
246
247							String - read-only
248
249							Used internally to tell Amazon S3 where the last request for a listing of files left off.
250
251							=head2 pattern
252
253							Regexp - read-only
254
255							If supplied to the constructor, only files which match the pattern will be returned.
256
257							=head2 prefix
258
259							String - read-only
260
261							If supplied to the constructor, only files which begin with the indicated prefix will be returned.
262
263							=head1 PUBLIC METHODS
264
265							=head2 next_page()
266
267							Returns the next page of results as an array in list context or arrayref in scalar context.
268
269							Increments C<page_number> by one.
270
271							=head1 SEE ALSO
272
273							L<The Amazon S3 API Documentation\|http://docs.amazonwebservices.com/AmazonS3/latest/API/>
274
275							L<AWS::S3>
276
277							L<AWS::S3::Bucket>
278
279							L<AWS::S3::File>
280
281							L<AWS::S3::Owner>
282
283							L<Iterator::Paged> - on which this class is built.
284
285							=cut
286