File Coverage

blib/lib/AWS/S3/FileIterator.pm

Criterion	Covered	Total	%
statement	46	89	51.6
branch	8	40	20.0
condition	3	8	37.5
subroutine	18	18	100.0
pod	7	9	77.7
total	82	164	50.0

line	stmt	bran	cond	sub	pod	time	code
1
2							package AWS::S3::FileIterator;
3
4	1			1		1626	use strict;
	1					1
	1					25
5	1			1		3	use warnings 'all';
	1					1
	1					30
6	1			1		4	use base 'Iterator::Paged';
	1					1
	1					410
7	1			1		308	use Carp 'confess';
	1					2
	1					37
8	1			1		344	use AWS::S3::Owner;
	1					9237
	1					28
9	1			1		470	use AWS::S3::File;
	1					294
	1					6
10
11							sub _init {
12	5			5		3585	my ( $s ) = @_;
13
14	5					9	foreach ( qw( bucket page_size page_number ) ) {
15							confess "Required argument '$_' was not provided"
16	12	100				59	unless $s->{$_};
17							} # end foreach()
18
19	2					2	$s->{page_number}--;
20	2	100				6	$s->{marker} = '' unless defined( $s->{marker} );
21	2					4	$s->{__fetched_first_page} = 0;
22	2					2	$s->{data} = [];
23	2		66			12	$s->{pattern} \|\|= qr(.*);
24							} # end _init()
25
26	2			2	1	313	sub marker { shift->{marker} }
27	2			2	1	10	sub pattern { shift->{pattern} }
28	1			1	0	3	sub bucket { shift->{bucket} }
29	1			1	0	3	sub page_size { shift->{page_size} }
30	2			2	1	6	sub prefix { shift->{prefix} }
31
32							sub has_prev {
33	1			1	1	1	my $s = shift;
34
35	1					2	return $s->page_number > 1;
36							} # end has_prev()
37
38	1			1	1	9	sub has_next { shift->{has_next} }
39
40							sub page_number {
41	4			4	1	4	my $s = shift;
42	4	100				20	@_ ? $s->{page_number} = $_[0] - 1 : $s->{page_number};
43							} # end page_number()
44
45							# S3 returns files 100 at a time. If we want more or less than 100, we can't
46							# just fetch the next page over and over - that would be inefficient and likely
47							# to cause errors.
48
49							# If the page size is 5 and page number is 2, then we:
50							# - fetch 100 items
51							# - store them
52							# - iterate internally until we get to 'page 2'
53							# - return the result.
54							# If the page size is 105 and page number is 1, then we:
55							# - fetch 100 items
56							# - fetch the next 100 items
57							# - return the first 105 items, keeping the remaining 95 items
58							# - on page '2', fetch the next 100 items and return 105 items, saving 90 items.
59							# If the page size is 105 and page number is 3, then we:
60							# - fetch items until our internal 'start' marker is 316-420
61							# - return items 316-420
62							sub next_page {
63	1			1	1	611	my $s = shift;
64
65							# Advance to page X before proceding:
66	1	50	33			5	if ( ( !$s->{__fetched_first_page}++ ) && $s->page_number ) {
67
68							# Advance to $s->page_number
69	0					0	my $start_page = $s->page_number;
70	0					0	my $to_discard = $start_page * $s->page_size;
71	0					0	my $discarded = 0;
72	0					0	while ( 1 ) {
73	0	0				0	my $item = $s->_next
74							or last;
75	0	0				0	$discarded++ if $item->{key} =~ $s->pattern;
76	0	0				0	last if $discarded > $to_discard;
77							} # end while()
78							} # end if()
79
80	1					2	my @chunk = ();
81	1					2	while ( my $item = $s->_next() ) {
82	0	0				0	next unless $item->{key} =~ $s->pattern;
83	0					0	push @chunk, $item;
84	0	0				0	last if @chunk == $s->page_size;
85							} # end while()
86
87							my @out = map {
88	0					0	my $owner = AWS::S3::Owner->new( %{ $_->{owner} } );
	0					0
	0					0
89	0					0	delete $_->{owner};
90	0					0	AWS::S3::File->new( %$_, owner => $owner );
91							} @chunk;
92
93	0					0	$s->{page_number}++;
94
95	0	0				0	return unless @out;
96	0	0				0	wantarray ? @out : \@out;
97							} # end next_page()
98
99							sub _next {
100	1			1		1	my $s = shift;
101
102	1	50				2	if ( my $item = shift( @{ $s->{data} } ) ) {
	1					2
103	0					0	return $item;
104							} else {
105	1	0				3	if ( my @chunk = $s->_fetch() ) {
106	0					0	push @{ $s->{data} }, @chunk;
	0					0
107	0					0	return shift( @{ $s->{data} } );
	0					0
108							} else {
109	0					0	return;
110							} # end if()
111							} # end if()
112							} # end _next()
113
114							sub _fetch {
115	1			1		1	my ( $s ) = @_;
116
117	1					26	my $path = $s->{bucket}->name . '/';
118	0						my %params = ();
119	0	0					$params{marker} = $s->{marker} if $s->{marker};
120	0	0					$params{prefix} = $s->{prefix} if $s->{prefix};
121	0						$params{max_keys} = 1000;
122	0	0					$params{delimiter} = $s->{delimiter} if $s->{delimiter};
123
124	0						my $type = 'ListBucket';
125	0						my $request = $s->{bucket}->s3->request( $type, %params, bucket => $s->{bucket}->name );
126	0						my $response = $request->request();
127
128	0	0	0				$s->{has_next} = ( $response->xpc->findvalue( '//s3:IsTruncated' ) \|\| '' ) eq 'true' ? 1 : 0;
129
130	0						my @files = ();
131	0						foreach my $node ( $response->xpc->findnodes( '//s3:Contents' ) ) {
132	0						my ( $owner_node ) = $response->xpc->findnodes( './/s3:Owner', $node );
133	0						my $owner = {
134							id => $response->xpc->findvalue( './/s3:ID', $owner_node ),
135							display_name => $response->xpc->findvalue( './/s3:DisplayName', $owner_node )
136							};
137	0						my $etag = $response->xpc->findvalue( './/s3:ETag', $node );
138							push @files,
139							{
140							bucket => $s->{bucket},
141	0						key => $response->xpc->findvalue( './/s3:Key', $node ),
142							lastmodified => $response->xpc->findvalue( './/s3:LastModified', $node ),
143							etag => $response->xpc->findvalue( './/s3:ETag', $node ),
144							size => $response->xpc->findvalue( './/s3:Size', $node ),
145							owner => $owner,
146							};
147							} # end foreach()
148
149	0	0					if ( @files ) {
150	0						$s->{marker} = $files[-1]->{key};
151							} # end if()
152
153	0	0					return unless defined wantarray;
154	0	0					@files ? return @files : return;
155							} # end _fetch()
156
157							1;
158
159							__END__
160
161							=pod
162
163							=head1 NAME
164
165							AWS::S3::FileIterator - Easily access and iterate through your S3 files.
166
167							=head1 SYNOPSIS
168
169							# Iterate through all ".txt" files, 100 at a time:
170							my $iter = $bucket->files(
171							# Required params:
172							page_size => 100,
173							page_number => 1,
174							# Optional params:
175							pattern => qr(\.txt$),
176							prefix => 'notes',
177							);
178
179							while( my @files = $iter->next_page )
180							{
181							warn $iter->page_number, "\n";
182							foreach my $file ( @files )
183							{
184							print "\t", $file->key, "\n";
185							}# end foreach()
186							}# end while()
187
188
189							=head1 DESCRIPTION
190
191							AWS::S3::FileIterator provides a means of I<iterating> through your S3 files.
192
193							If you only have a few files it might seem odd to require an iterator, but if you
194							have thousands (or millions) of files, the iterator will save you a lot of effort.
195
196							=head1 PUBLIC PROPERTIES
197
198							=head2 has_prev
199
200							Boolean - read-only
201
202							=head2 has_next
203
204							Boolean - read-only
205
206							=head2 page_number
207
208							Integer - read-write
209
210							=head2 marker
211
212							String - read-only
213
214							Used internally to tell Amazon S3 where the last request for a listing of files left off.
215
216							=head2 pattern
217
218							Regexp - read-only
219
220							If supplied to the constructor, only files which match the pattern will be returned.
221
222							=head2 prefix
223
224							String - read-only
225
226							If supplied to the constructor, only files which begin with the indicated prefix will be returned.
227
228							=head1 PUBLIC METHODS
229
230							=head2 next_page()
231
232							Returns the next page of results as an array in list context or arrayref in scalar context.
233
234							Increments C<page_number> by one.
235
236							=head1 SEE ALSO
237
238							L<The Amazon S3 API Documentation\|http://docs.amazonwebservices.com/AmazonS3/latest/API/>
239
240							L<AWS::S3>
241
242							L<AWS::S3::Bucket>
243
244							L<AWS::S3::File>
245
246							L<AWS::S3::Owner>
247
248							L<Iterator::Paged> - on which this class is built.
249
250							=cut
251