File Coverage

blib/lib/AWS/S3/FileIterator.pm

Criterion	Covered	Total	%
statement	45	88	51.1
branch	5	40	12.5
condition	2	8	25.0
subroutine	17	17	100.0
pod	6	8	75.0
total	75	161	46.5

line	stmt	bran	cond	sub	pod	time	code
1
2							package AWS::S3::FileIterator;
3
4	1			1		1348	use strict;
	1					1
	1					29
5	1			1		3	use warnings 'all';
	1					1
	1					31
6	1			1		3	use base 'Iterator::Paged';
	1					1
	1					415
7	1			1		316	use Carp 'confess';
	1					1
	1					44
8	1			1		310	use AWS::S3::Owner;
	1					9501
	1					32
9	1			1		551	use AWS::S3::File;
	1					275
	1					5
10
11							sub _init {
12	1			1		1542	my ( $s ) = @_;
13
14	1					3	foreach ( qw( bucket page_size page_number ) ) {
15							confess "Required argument '$_' was not provided"
16	3	50				11	unless $s->{$_};
17							} # end foreach()
18
19	1					1	$s->{page_number}--;
20	1	50				3	$s->{marker} = '' unless defined( $s->{marker} );
21	1					2	$s->{__fetched_first_page} = 0;
22	1					1	$s->{data} = [];
23	1		33			12	$s->{pattern} \|\|= qr(.*);
24							} # end _init()
25
26	1			1	1	310	sub marker { shift->{marker} }
27	1			1	1	4	sub pattern { shift->{pattern} }
28	1			1	0	4	sub bucket { shift->{bucket} }
29	1			1	0	3	sub page_size { shift->{page_size} }
30
31							sub has_prev {
32	1			1	1	2	my $s = shift;
33
34	1					3	return $s->page_number > 1;
35							} # end has_prev()
36
37	1			1	1	3	sub has_next { shift->{has_next} }
38
39							sub page_number {
40	3			3	1	4	my $s = shift;
41	3	50				12	@_ ? $s->{page_number} = $_[0] - 1 : $s->{page_number};
42							} # end page_number()
43
44							# S3 returns files 100 at a time. If we want more or less than 100, we can't
45							# just fetch the next page over and over - that would be inefficient and likely
46							# to cause errors.
47
48							# If the page size is 5 and page number is 2, then we:
49							# - fetch 100 items
50							# - store them
51							# - iterate internally until we get to 'page 2'
52							# - return the result.
53							# If the page size is 105 and page number is 1, then we:
54							# - fetch 100 items
55							# - fetch the next 100 items
56							# - return the first 105 items, keeping the remaining 95 items
57							# - on page '2', fetch the next 100 items and return 105 items, saving 90 items.
58							# If the page size is 105 and page number is 3, then we:
59							# - fetch items until our internal 'start' marker is 316-420
60							# - return items 316-420
61							sub next_page {
62	1			1	1	56	my $s = shift;
63
64							# Advance to page X before proceding:
65	1	50	33			4	if ( ( !$s->{__fetched_first_page}++ ) && $s->page_number ) {
66
67							# Advance to $s->page_number
68	0					0	my $start_page = $s->page_number;
69	0					0	my $to_discard = $start_page * $s->page_size;
70	0					0	my $discarded = 0;
71	0					0	while ( 1 ) {
72	0	0				0	my $item = $s->_next
73							or last;
74	0	0				0	$discarded++ if $item->{key} =~ $s->pattern;
75	0	0				0	last if $discarded > $to_discard;
76							} # end while()
77							} # end if()
78
79	1					2	my @chunk = ();
80	1					3	while ( my $item = $s->_next() ) {
81	0	0				0	next unless $item->{key} =~ $s->pattern;
82	0					0	push @chunk, $item;
83	0	0				0	last if @chunk == $s->page_size;
84							} # end while()
85
86							my @out = map {
87	0					0	my $owner = AWS::S3::Owner->new( %{ $_->{owner} } );
	0					0
	0					0
88	0					0	delete $_->{owner};
89	0					0	AWS::S3::File->new( %$_, owner => $owner );
90							} @chunk;
91
92	0					0	$s->{page_number}++;
93
94	0	0				0	return unless @out;
95	0	0				0	wantarray ? @out : \@out;
96							} # end next_page()
97
98							sub _next {
99	1			1		1	my $s = shift;
100
101	1	50				2	if ( my $item = shift( @{ $s->{data} } ) ) {
	1					3
102	0					0	return $item;
103							} else {
104	1	0				2	if ( my @chunk = $s->_fetch() ) {
105	0					0	push @{ $s->{data} }, @chunk;
	0					0
106	0					0	return shift( @{ $s->{data} } );
	0					0
107							} else {
108	0					0	return;
109							} # end if()
110							} # end if()
111							} # end _next()
112
113							sub _fetch {
114	1			1		1	my ( $s ) = @_;
115
116	1					189	my $path = $s->{bucket}->name . '/';
117	0						my %params = ();
118	0	0					$params{marker} = $s->{marker} if $s->{marker};
119	0	0					$params{prefix} = $s->{prefix} if $s->{prefix};
120	0						$params{max_keys} = 1000;
121	0	0					$params{delimiter} = $s->{delimiter} if $s->{delimiter};
122
123	0						my $type = 'ListBucket';
124	0						my $request = $s->{bucket}->s3->request( $type, %params, bucket => $s->{bucket}->name );
125	0						my $response = $request->request();
126
127	0	0	0				$s->{has_next} = ( $response->xpc->findvalue( '//s3:IsTruncated' ) \|\| '' ) eq 'true' ? 1 : 0;
128
129	0						my @files = ();
130	0						foreach my $node ( $response->xpc->findnodes( '//s3:Contents' ) ) {
131	0						my ( $owner_node ) = $response->xpc->findnodes( './/s3:Owner', $node );
132	0						my $owner = {
133							id => $response->xpc->findvalue( './/s3:ID', $owner_node ),
134							display_name => $response->xpc->findvalue( './/s3:DisplayName', $owner_node )
135							};
136	0						my $etag = $response->xpc->findvalue( './/s3:ETag', $node );
137							push @files,
138							{
139							bucket => $s->{bucket},
140	0						key => $response->xpc->findvalue( './/s3:Key', $node ),
141							lastmodified => $response->xpc->findvalue( './/s3:LastModified', $node ),
142							etag => $response->xpc->findvalue( './/s3:ETag', $node ),
143							size => $response->xpc->findvalue( './/s3:Size', $node ),
144							owner => $owner,
145							};
146							} # end foreach()
147
148	0	0					if ( @files ) {
149	0						$s->{marker} = $files[-1]->{key};
150							} # end if()
151
152	0	0					return unless defined wantarray;
153	0	0					@files ? return @files : return;
154							} # end _fetch()
155
156							1;
157
158							__END__
159
160							=pod
161
162							=head1 NAME
163
164							AWS::S3::FileIterator - Easily access and iterate through your S3 files.
165
166							=head1 SYNOPSIS
167
168							# Iterate through all ".txt" files, 100 at a time:
169							my $iter = $bucket->files(
170							# Required params:
171							page_size => 100,
172							page_number => 1,
173							# Optional params:
174							pattern => qr(\.txt$)
175							);
176
177							while( my @files = $iter->next_page )
178							{
179							warn $iter->page_number, "\n";
180							foreach my $file ( @files )
181							{
182							print "\t", $file->key, "\n";
183							}# end foreach()
184							}# end while()
185
186
187							=head1 DESCRIPTION
188
189							AWS::S3::FileIterator provides a means of I<iterating> through your S3 files.
190
191							If you only have a few files it might seem odd to require an iterator, but if you
192							have thousands (or millions) of files, the iterator will save you a lot of effort.
193
194							=head1 PUBLIC PROPERTIES
195
196							=head2 has_prev
197
198							Boolean - read-only
199
200							=head2 has_next
201
202							Boolean - read-only
203
204							=head2 page_number
205
206							Integer - read-write
207
208							=head2 marker
209
210							String - read-only
211
212							Used internally to tell Amazon S3 where the last request for a listing of files left off.
213
214							=head2 pattern
215
216							Regexp - read-only
217
218							If supplied to the constructor, only files which match the pattern will be returned.
219
220							=head1 PUBLIC METHODS
221
222							=head2 next_page()
223
224							Returns the next page of results as an array in list context or arrayref in scalar context.
225
226							Increments C<page_number> by one.
227
228							=head1 SEE ALSO
229
230							L<The Amazon S3 API Documentation\|http://docs.amazonwebservices.com/AmazonS3/latest/API/>
231
232							L<AWS::S3>
233
234							L<AWS::S3::Bucket>
235
236							L<AWS::S3::File>
237
238							L<AWS::S3::Owner>
239
240							L<Iterator::Paged> - on which this class is built.
241
242							=cut
243