File Coverage

blib/lib/Geo/UK/Postcode/CodePointOpen.pm

Criterion	Covered	Total	%
statement	13	15	86.6
branch			n/a
condition			n/a
subroutine	5	5	100.0
pod			n/a
total	18	20	90.0

line	stmt	sub	time	code
1				package Geo::UK::Postcode::CodePointOpen;
2
3				our $VERSION = '0.004';
4
5	5	5	453375	use Moo;
	5		113979
	5		36
6	5	5	31279	use Types::Path::Tiny qw/ Dir /;
	5		857428
	5		79
7
8	5	5	11489	use Geo::UK::Postcode::Regex;
	5		29245
	5		368
9	5	5	6593	use Geo::Coordinates::OSGB qw/ grid_to_ll shift_ll_into_WGS84 /;
	5		208688
	5		728
10	5	5	9528	use List::MoreUtils qw/ uniq /;
	0
	0
11				use Text::CSV;
12
13				has path => ( is => 'ro', isa => Dir, coerce => Dir->coercion );
14				has pc_re => ( is => 'lazy' );
15				has column_headers => ( is => 'lazy' );
16				has csv => ( is => 'lazy' );
17				has metadata => ( is => 'lazy' );
18
19				sub _build_pc_re {
20				Geo::UK::Postcode::Regex->strict_regex;
21				}
22
23				sub _build_column_headers {
24				my $self = shift;
25
26				my $fh = $self->doc_dir->child('Code-Point_Open_Column_Headers.csv')
27				->filehandle('<');
28
29				return {
30				short => $self->csv->getline($fh),
31				long => $self->csv->getline($fh),
32				};
33				}
34
35				sub _build_csv {
36				my $csv = Text::CSV->new( { binary => 1, eol => "\r\n" } )
37				or die Text::CSV->error_diag();
38				return $csv;
39				}
40
41				#
42				# PRODUCT:
43				# DATASET VERSION NUMBER:
44				# COPYRIGHT DATE:
45				# RM UPDATE DATE:
46				# XX\t123
47				sub _build_metadata {
48				my $self = shift;
49
50				my $metadata_file = $self->doc_dir->child('metadata.txt');
51
52				my @lines = $metadata_file->lines( { chomp => 1 } );
53
54				my $author = shift @lines;
55
56				my @headers = grep {/:/} @lines;
57				my @counts = grep {/\t/} @lines;
58
59				return {
60				AUTHOR => $author,
61				( map { split /\s:\s/ } @headers ),
62				counts =>
63				{ map { /\s+([A-Z]{1,2})\t(\d+)/ ? ( $1, $2 ) : () } @counts },
64				};
65				}
66
67				sub doc_dir {
68				shift->path->child('Doc');
69				}
70
71				sub data_dir {
72				shift->path->child('Data/CSV');
73				}
74
75				sub data_files {
76				my ( $self, @outcodes ) = @_;
77
78				my $areas
79				= join( '\|', uniq grep {$_} map { /^([A-Z]+)/i && lc $1 } @outcodes );
80
81				return sort $self->data_dir->children(
82				$areas ? qr/^(?:$areas)\.csv$/ #
83				: qr/\.csv$/
84				);
85				}
86
87				sub read_iterator {
88				my ( $self, %args ) = @_;
89
90				my ( @col_names, $lat_col, $lon_col, $out_col, $in_col );
91				if ( $args{short_column_names} ) {
92				@col_names = @{ $self->column_headers->{short} };
93				( $lat_col, $lon_col ) = ( 'LA', 'LO' );
94				( $out_col, $in_col ) = ( 'OC', 'IC' );
95				} else {
96				@col_names = @{ $self->column_headers->{long} };
97				( $lat_col, $lon_col ) = ( 'Latitude', 'Longitude' );
98				( $out_col, $in_col ) = ( 'Outcode', 'Incode' );
99				}
100
101				my @outcodes = @{ $args{outcodes} \|\| [] };
102				my @data_files = $self->data_files(@outcodes);
103
104				my $match = @outcodes ? join( '\|', map {uc} @outcodes ) : undef;
105				$match = qr/^(?:$match)$/ if $match;
106
107				# Create iterator coderef
108				my $fh2;
109				my $csv = $self->csv;
110
111				my $iterator = sub {
112
113				my %pc;
114				while (1) {
115
116				unless ( $fh2 && !eof $fh2 ) {
117				my $file = shift @data_files or return; # none left
118				$fh2 = $file->filehandle('<');
119				}
120
121				# Expects:
122				# Postcode,Positional_quality_indicator,Eastings,Northings,...
123				my $row = $csv->getline($fh2);
124
125				my $i = 0;
126				%pc = map { $_ => $row->[ $i++ ] } @col_names;
127
128				if ( $args{include_lat_long} && $pc{Eastings} && $pc{Northings} ) {
129				my ( $lat, $lon )
130				= shift_ll_into_WGS84(
131				grid_to_ll( $pc{Eastings}, $pc{Northings} ) );
132
133				$pc{$lat_col} = sprintf( "%.5f", $lat );
134				$pc{$lon_col} = sprintf( "%.5f", $lon );
135				}
136
137				if ( $args{split_postcode} \|\| $match ) {
138
139				$pc{Postcode} =~ s/\s+/ /;
140
141				my ( $area, $district, $sector, $unit )
142				= eval { $pc{Postcode} =~ $self->pc_re };
143
144				if ( $@ \|\| !$unit ) {
145				die "Unable to parse '"
146				. $pc{Postcode}
147				. "' : Please report via "
148				. "https://github.com/mjemmeson/Geo-UK-Postcode-Regex/issues\n";
149
150				} else {
151
152				next if $match && ($area . $district) !~ $match;
153
154				if ( $args{split_postcode} ) {
155				$pc{$out_col} = $area . $district;
156				$pc{$in_col} = $sector . $unit;
157				}
158				}
159				}
160
161				last;
162				}
163
164				return \%pc;
165				};
166
167				return $iterator;
168				}
169
170				sub batch_iterator {
171				my ( $self, %args ) = @_;
172
173				my $batch_size = $args{batch_size} \|\| 100;
174
175				my $read_iterator = $self->read_iterator(%args);
176
177				return sub {
178
179				my $i = 1;
180				my @postcodes;
181
182				while ( my $pc = $read_iterator->() ) {
183				push @postcodes, $pc;
184				last if ++$i > $batch_size;
185				}
186
187				return @postcodes;
188				};
189				}
190
191				1;
192
193				__END__