File Coverage

blib/lib/Geo/UK/Postcode/CodePointOpen.pm

Criterion	Covered	Total	%
statement	91	100	91.0
branch	26	36	72.2
condition	12	20	60.0
subroutine	16	17	94.1
pod	5	5	100.0
total	150	178	84.2

line	stmt	bran	cond	sub	pod	time	code
1							package Geo::UK::Postcode::CodePointOpen;
2
3							our $VERSION = '0.005';
4
5	6			6		323655	use Moo;
	6					81589
	6					30
6	6			6		10907	use Types::Path::Tiny qw/ Dir /;
	6					560033
	6					71
7
8	6			6		7663	use Geo::UK::Postcode::Regex;
	6					22372
	6					369
9	6			6		4287	use Geo::Coordinates::OSGB qw/ grid_to_ll shift_ll_into_WGS84 /;
	6					118139
	6					728
10	6			6		4144	use List::MoreUtils qw/ uniq /;
	6					37895
	6					57
11	6			6		7759	use Text::CSV;
	6					53109
	6					43
12
13							has path => ( is => 'ro', isa => Dir, coerce => Dir->coercion );
14							has pc_re => ( is => 'lazy' );
15							has column_headers => ( is => 'lazy' );
16							has csv => ( is => 'lazy' );
17							has metadata => ( is => 'lazy' );
18
19							sub _build_pc_re {
20	0			0		0	Geo::UK::Postcode::Regex->strict_regex;
21							}
22
23							sub _build_column_headers {
24	3			3		4068	my $self = shift;
25
26	3					15	my $fh = $self->doc_dir->child('Code-Point_Open_Column_Headers.csv')
27							->filehandle('<');
28
29	3	50				496	my $short = $self->csv->getline($fh)
30							or die "Unable to read short column headers";
31	3	50				19520	my $long = $self->csv->getline($fh)
32							or die "Unable to read long column headers";
33
34							return {
35	3					1192	short => $short,
36							long => $long
37							};
38							}
39
40							sub _build_csv {
41	3	50		3		1076	my $csv = Text::CSV->new( { binary => 1 } )
42							or die Text::CSV->error_diag();
43	3					309	return $csv;
44							}
45
46							#
47							# PRODUCT:
48							# DATASET VERSION NUMBER:
49							# COPYRIGHT DATE:
50							# RM UPDATE DATE:
51							# XX\t123
52							sub _build_metadata {
53	1			1		4754	my $self = shift;
54
55	1					13	my $metadata_file = $self->doc_dir->child('metadata.txt');
56
57	1					59	my @lines = $metadata_file->lines( { chomp => 1 } );
58
59	1					307	my $author = shift @lines;
60
61	1					3	my @headers = grep {/:/} @lines;
	6					9
62	1					3	my @counts = grep {/\t/} @lines;
	6					7
63
64							return {
65	4					16	AUTHOR => $author,
66	2	50				25	( map { split /\s:\s/ } @headers ),
67							counts =>
68	1					3	{ map { /\s+([A-Z]{1,2})\t(\d+)/ ? ( $1, $2 ) : () } @counts },
69							};
70							}
71
72							sub doc_dir {
73	4			4	1	46	shift->path->child('Doc');
74							}
75
76							sub data_dir {
77	7			7	1	59	shift->path->child('Data/CSV');
78							}
79
80							sub data_files {
81	7			7	1	4193	my ( $self, @outcodes ) = @_;
82
83	5					29	my $areas
84	7	50				50	= join( '\|', uniq grep {$_} map { /^([A-Z]+)/i && lc $1 } @outcodes );
	5					38
85
86	7	100				26	return sort $self->data_dir->children(
87							$areas ? qr/^(?:$areas)\.csv$/ #
88							: qr/\.csv$/
89							);
90							}
91
92							sub read_iterator {
93	4			4	1	5856	my ( $self, %args ) = @_;
94
95	4					9	my ( @col_names, $lat_col, $lon_col, $out_col, $in_col );
96	4	50				18	if ( $args{short_column_names} ) {
97	0					0	@col_names = @{ $self->column_headers->{short} };
	0					0
98	0					0	( $lat_col, $lon_col ) = ( 'LA', 'LO' );
99	0					0	( $out_col, $in_col ) = ( 'OC', 'IC' );
100							} else {
101	4					7	@col_names = @{ $self->column_headers->{long} };
	4					75
102	4					32	( $lat_col, $lon_col ) = ( 'Latitude', 'Longitude' );
103	4					11	( $out_col, $in_col ) = ( 'Outcode', 'Incode' );
104							}
105
106	4	100				6	my @outcodes = @{ $args{outcodes} \|\| [] };
	4					26
107	4					22	my @data_files = $self->data_files(@outcodes);
108
109	4	100				878	my $match = @outcodes ? join( '\|', map {uc} @outcodes ) : undef;
	3					10
110	4	100				32	$match = qr/^(?:$match)$/ if $match;
111
112							# Create iterator coderef
113	4					5	my $fh2;
114	4					92	my $csv = $self->csv;
115
116							my $iterator = sub {
117
118	74			74		185	my %pc;
119	74					72	while (1) {
120
121	74	100	100			265	unless ( $fh2 && !eof $fh2 ) {
122	11	100				53	my $file = shift @data_files or return; # none left
123	7					31	$fh2 = $file->filehandle('<');
124							}
125
126							# Expects:
127							# Postcode,Positional_quality_indicator,Eastings,Northings,...
128	70					888	my $row = $csv->getline($fh2);
129
130	70					24713	my $i = 0;
131	70					100	%pc = map { $_ => $row->[ $i++ ] } @col_names;
	700					1133
132
133	70	0	33			205	if ( $args{include_lat_long} && $pc{Eastings} && $pc{Northings} ) {
			33
134	0					0	my ( $lat, $lon )
135							= shift_ll_into_WGS84(
136							grid_to_ll( $pc{Eastings}, $pc{Northings} ) );
137
138	0					0	$pc{$lat_col} = sprintf( "%.5f", $lat );
139	0					0	$pc{$lon_col} = sprintf( "%.5f", $lon );
140							}
141
142	70	100	100			224	if ( $args{split_postcode} \|\| $match ) {
143
144	30					67	$pc{Postcode} =~ s/\s+/ /;
145
146							my ( $area, $district, $sector, $unit )
147	30					33	= eval { $pc{Postcode} =~ $self->pc_re };
	30					557
148
149	30	50	33			817	if ( $@ \|\| !$unit ) {
150	0					0	die "Unable to parse '"
151							. $pc{Postcode}
152							. "' : Please report via "
153							. "https://github.com/mjemmeson/Geo-UK-Postcode-Regex/issues\n";
154
155							} else {
156
157	30	50	66			93	next if $match && ($area . $district) !~ $match;
158
159	30	100				53	if ( $args{split_postcode} ) {
160	20					35	$pc{$out_col} = $area . $district;
161	20					29	$pc{$in_col} = $sector . $unit;
162							}
163							}
164							}
165
166	70					116	last;
167							}
168
169	70					188	return \%pc;
170	4					62	};
171
172	4					27	return $iterator;
173							}
174
175							sub batch_iterator {
176	1			1	1	4012	my ( $self, %args ) = @_;
177
178	1		50			7	my $batch_size = $args{batch_size} \|\| 100;
179
180	1					5	my $read_iterator = $self->read_iterator(%args);
181
182							return sub {
183
184	6			6		19	my $i = 1;
185	6					5	my @postcodes;
186
187	6					9	while ( my $pc = $read_iterator->() ) {
188	20					24	push @postcodes, $pc;
189	20	100				49	last if ++$i > $batch_size;
190							}
191
192	6					16	return @postcodes;
193	1					35	};
194							}
195
196							1;
197
198							__END__