line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Geo::UK::Postcode::Regex; |
2
|
|
|
|
|
|
|
|
3
|
7
|
|
|
7
|
|
359619
|
use strict; |
|
7
|
|
|
|
|
19
|
|
|
7
|
|
|
|
|
218
|
|
4
|
7
|
|
|
7
|
|
46
|
use warnings; |
|
7
|
|
|
|
|
19
|
|
|
7
|
|
|
|
|
207
|
|
5
|
|
|
|
|
|
|
|
6
|
7
|
|
|
7
|
|
2961
|
use Geo::UK::Postcode::Regex::Hash; |
|
7
|
|
|
|
|
26
|
|
|
7
|
|
|
|
|
360
|
|
7
|
|
|
|
|
|
|
|
8
|
7
|
|
|
7
|
|
58
|
use base 'Exporter'; |
|
7
|
|
|
|
|
26
|
|
|
7
|
|
|
|
|
15370
|
|
9
|
|
|
|
|
|
|
our @EXPORT_OK = qw( is_valid_pc is_strict_pc is_lax_pc %REGEXES ); |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
our $VERSION = '0.016'; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=encoding utf-8 |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 NAME |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Geo::UK::Postcode::Regex - regular expressions for handling British postcodes |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSIS |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
See L for an alternative interface. |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
use Geo::UK::Postcode::Regex; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
## REGULAR EXPRESSIONS |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
my $lax_re = Geo::UK::Postcode::Regex->regex; |
28
|
|
|
|
|
|
|
my $strict_re = Geo::UK::Postcode::Regex->strict_regex; |
29
|
|
|
|
|
|
|
my $valid_re = Geo::UK::Postcode::Regex->valid_regex; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# matching only |
32
|
|
|
|
|
|
|
if ( $foo =~ $lax_re ) {...} |
33
|
|
|
|
|
|
|
if ( $foo =~ $strict_re ) {...} |
34
|
|
|
|
|
|
|
if ( $foo =~ $valid_re ) {...} |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# matching and using components - see also parse() |
37
|
|
|
|
|
|
|
if ( $foo =~ $lax_re ) { |
38
|
|
|
|
|
|
|
my ( $area, $district, $sector, $unit ) = ( $1, $2, $3, $4 ); |
39
|
|
|
|
|
|
|
my $subdistrict = $district =~ s/([A-Z])$// ? $1 : undef; |
40
|
|
|
|
|
|
|
... |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
if ( $foo =~ $strict_re ) { |
43
|
|
|
|
|
|
|
my ( $area, $district, $sector, $unit ) = ( $1, $2, $3, $4 ); |
44
|
|
|
|
|
|
|
my $subdistrict = $district =~ s/([A-Z])$// ? $1 : undef; |
45
|
|
|
|
|
|
|
... |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
if ( $foo =~ $valid_re ) { |
48
|
|
|
|
|
|
|
my ( $outcode, $sector, $unit ) = ( $1, $2, $3 ); |
49
|
|
|
|
|
|
|
... |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
## VALIDATION METHODS |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
use Geo::UK::Postcode::Regex qw( is_valid_pc is_strict_pc is_lax_pc ); |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
if (is_valid_pc("GE0 1UK")) { |
58
|
|
|
|
|
|
|
... |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
if (is_strict_pc("GE0 1UK")) { |
61
|
|
|
|
|
|
|
... |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
if (is_lax_pc("GE0 1UK")) { |
64
|
|
|
|
|
|
|
... |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
## PARSING |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
my $parsed = Geo::UK::Postcode::Regex->parse("WC1H 9EB"); |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# returns: |
73
|
|
|
|
|
|
|
# { area => 'WC', |
74
|
|
|
|
|
|
|
# district => '1', |
75
|
|
|
|
|
|
|
# subdistrict => 'H', |
76
|
|
|
|
|
|
|
# sector => '9', |
77
|
|
|
|
|
|
|
# unit => 'EB', |
78
|
|
|
|
|
|
|
# outcode => 'WC1H', |
79
|
|
|
|
|
|
|
# incode => '9EB', |
80
|
|
|
|
|
|
|
# valid => 1, |
81
|
|
|
|
|
|
|
# strict => 1, |
82
|
|
|
|
|
|
|
# partial => 0, |
83
|
|
|
|
|
|
|
# non_geographical => 0, |
84
|
|
|
|
|
|
|
# bfpo => 0, |
85
|
|
|
|
|
|
|
# } |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# strict parsing (only valid characters): |
88
|
|
|
|
|
|
|
...->parse( $pc, { strict => 1 } ) |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# valid outcodes only |
91
|
|
|
|
|
|
|
...->parse( $pc, { valid => 1 } ) |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
# match partial postcodes, e.g. 'WC1H', 'WC1H 9' - see below |
94
|
|
|
|
|
|
|
...->parse( $pc, { partial => 1 } ) |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
## PARSING PARTIAL POSTCODES |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
# outcode (district) only |
100
|
|
|
|
|
|
|
my $parsed = Geo::UK::Postcode::Regex->parse( "AB10", { partial => 1 } ); |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
# returns: |
103
|
|
|
|
|
|
|
# { area => 'AB', |
104
|
|
|
|
|
|
|
# district => '10', |
105
|
|
|
|
|
|
|
# subdistrict => undef, |
106
|
|
|
|
|
|
|
# sector => undef, |
107
|
|
|
|
|
|
|
# unit => undef, |
108
|
|
|
|
|
|
|
# outcode => 'AB10', |
109
|
|
|
|
|
|
|
# incode => undef, |
110
|
|
|
|
|
|
|
# valid => 1, |
111
|
|
|
|
|
|
|
# strict => 1, |
112
|
|
|
|
|
|
|
# partial => 1, |
113
|
|
|
|
|
|
|
# non_geographical => 0, |
114
|
|
|
|
|
|
|
# bfpo => 0, |
115
|
|
|
|
|
|
|
# } |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
# sector only |
118
|
|
|
|
|
|
|
my $parsed = Geo::UK::Postcode::Regex->parse( "AB10 1", { partial => 1 } ); |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# returns: |
121
|
|
|
|
|
|
|
# { area => 'AB', |
122
|
|
|
|
|
|
|
# district => '10', |
123
|
|
|
|
|
|
|
# subdistrict => undef, |
124
|
|
|
|
|
|
|
# sector => 1, |
125
|
|
|
|
|
|
|
# unit => undef, |
126
|
|
|
|
|
|
|
# outcode => 'AB10', |
127
|
|
|
|
|
|
|
# incode => '1', |
128
|
|
|
|
|
|
|
# valid => 1, |
129
|
|
|
|
|
|
|
# strict => 1, |
130
|
|
|
|
|
|
|
# partial => 1, |
131
|
|
|
|
|
|
|
# non_geographical => 0, |
132
|
|
|
|
|
|
|
# bfpo => 0, |
133
|
|
|
|
|
|
|
# } |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
## EXTRACT OUTCODE FROM POSTCODE |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
my $outcode = Geo::UK::Postcode::Regex->outcode("AB101AA"); # returns 'AB10' |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
my $outcode = Geo::UK::Postcode::Regex->outcode( $postcode, { valid => 1 } ) |
141
|
|
|
|
|
|
|
or die "Invalid postcode"; |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
## EXTRACT POSTCODES FROM TEXT |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# \%options as per parse, excluding partial |
147
|
|
|
|
|
|
|
my @extracted = Geo::UK::Postcode::Regex->extract( $text, \%options ); |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
## POSTTOWNS |
151
|
|
|
|
|
|
|
my @posttowns = Geo::UK::Postcode::Regex->outcode_to_posttowns($outcode); |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
## OUTCODES |
155
|
|
|
|
|
|
|
my @outcodes = Geo::UK::Postcode::Regex->posttown_to_outcodes($posttown); |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=head1 DESCRIPTION |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
Parsing UK postcodes with regular expressions (aka Regexp). This package has |
161
|
|
|
|
|
|
|
been separated from L so it can be installed and used with |
162
|
|
|
|
|
|
|
fewer dependencies. |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
Can handle partial postcodes (just the outcode or sector) and can test |
165
|
|
|
|
|
|
|
against valid characters and currently valid outcodes. |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
Also can determine the posttown(s) from a postcode. |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
Districts and post town information taken from: |
170
|
|
|
|
|
|
|
L |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
=head1 IMPORTANT CHANGES FOR VERSION 0.014 |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
Please note that various bugfixes have changed the following: |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=over |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=item * |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
Unanchored regular expressions no longer match valid postcodes within invalid |
181
|
|
|
|
|
|
|
ones. |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=item * |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
Unanchored regular expressions in partial mode now can match a valid or strict |
186
|
|
|
|
|
|
|
outcode with an invalid incode. |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=back |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
Please get in touch if you have any questions. |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
See L for other changes affecting the Simple |
193
|
|
|
|
|
|
|
interface. |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=head1 NOTES AND LIMITATIONS |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
When parsing a partial postcode, whitespace may be required to separate the |
198
|
|
|
|
|
|
|
outcode from the sector. |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
For example the sector 'B1 1' cannot be distinguished from the district 'B11' |
201
|
|
|
|
|
|
|
without whitespace. This is not a problem when parsing full postcodes. |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=cut |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
## REGULAR EXPRESSIONS |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
my $AREA1 = 'ABCDEFGHIJKLMNOPRSTUWYZ'; # [^QVX] |
208
|
|
|
|
|
|
|
my $AREA2 = 'ABCDEFGHKLMNOPQRSTUVWXY'; # [^IJZ] |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
my $SUBDISTRICT1 = 'ABCDEFGHJKPSTUW'; # for single letter areas |
211
|
|
|
|
|
|
|
my $SUBDISTRICT2 = 'ABEHMNPRVWXY'; # for two letter areas |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
my $UNIT1 = 'ABDEFGHJLNPQRSTUWXYZ'; # [^CIKMOV] |
214
|
|
|
|
|
|
|
my $UNIT2 = 'ABDEFGHJLNPQRSTUWXYZ'; # [^CIKMOV] |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
our %COMPONENTS = ( |
217
|
|
|
|
|
|
|
strict => { |
218
|
|
|
|
|
|
|
area => "[$AREA1][$AREA2]?", |
219
|
|
|
|
|
|
|
district => qq% (?: |
220
|
|
|
|
|
|
|
[0-9][0-9]? |
221
|
|
|
|
|
|
|
| (?
|
222
|
|
|
|
|
|
|
| (?<=[A-Z]{2}) [0-9][$SUBDISTRICT2] |
223
|
|
|
|
|
|
|
) %, |
224
|
|
|
|
|
|
|
sector => '[0-9]', |
225
|
|
|
|
|
|
|
unit => "[$UNIT1][$UNIT2]", |
226
|
|
|
|
|
|
|
blank => '', |
227
|
|
|
|
|
|
|
}, |
228
|
|
|
|
|
|
|
lax => { |
229
|
|
|
|
|
|
|
area => '[A-Z]{1,2}', |
230
|
|
|
|
|
|
|
district => '[0-9](?:[0-9]|[A-Z])?', |
231
|
|
|
|
|
|
|
sector => '[0-9]', |
232
|
|
|
|
|
|
|
unit => '[A-Z]{2}', |
233
|
|
|
|
|
|
|
}, |
234
|
|
|
|
|
|
|
); |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
my %BASE_REGEXES = ( |
237
|
|
|
|
|
|
|
full => ' %s %s \s* %s %s ', |
238
|
|
|
|
|
|
|
partial => ' %s %s (?: \s* %s (?:%s)? ) ? ', |
239
|
|
|
|
|
|
|
); |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
my ( %POSTTOWNS, %OUTCODES ); |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
tie our %REGEXES, 'Geo::UK::Postcode::Regex::Hash', _fetch => sub { |
244
|
|
|
|
|
|
|
my ($key) = @_; |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
_outcode_data() if $key =~ m/valid/ && !%OUTCODES; |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
my $type = $key =~ m/lax/ ? 'lax' : 'strict'; |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
my $components = $Geo::UK::Postcode::Regex::COMPONENTS{$type}; |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
my @comps |
253
|
|
|
|
|
|
|
= $key =~ m/valid/ |
254
|
|
|
|
|
|
|
? @{$components}{qw( outcodes blank sector unit )} |
255
|
|
|
|
|
|
|
: @{$components}{qw( area district sector unit )}; |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
@comps = map { $_ ? "($_)" : $_ } @comps if $key =~ m/captures/; |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
my $size = $key =~ m/partial/ ? 'partial' : 'full'; |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
my $re = sprintf( $BASE_REGEXES{$size}, @comps ); |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
if ( $key =~ m/anchored/ ) { |
264
|
|
|
|
|
|
|
$re = '^' . $re . '$'; |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
} elsif ( $key =~ m/extract/ ) { |
267
|
|
|
|
|
|
|
$re = '(?:[^0-9A-Z]|\b) (' . $re . ') (?:[^0-9A-Z]|\b)'; |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
} else { |
270
|
|
|
|
|
|
|
$re = '(?:[^0-9A-Z]|\b) ' . $re . ' (?:[^0-9A-Z]|\b)'; |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
return $key =~ m/case-insensitive/ ? qr/$re/ix : qr/$re/x; |
274
|
|
|
|
|
|
|
}; |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
## OUTCODE AND POSTTOWN DATA |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
sub _outcode_data { |
279
|
6
|
|
|
6
|
|
25
|
my %area_districts; |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
# Get outcodes from __DATA__ |
282
|
6
|
|
|
|
|
39
|
while ( my $line = ) { |
283
|
17958
|
100
|
|
|
|
55990
|
next unless $line =~ m/\w/; |
284
|
17952
|
|
|
|
|
33364
|
chomp $line; |
285
|
17952
|
|
|
|
|
56656
|
my ( $outcode, $non_geographical, @posttowns ) = split /,/, $line; |
286
|
|
|
|
|
|
|
|
287
|
17952
|
|
|
|
|
37176
|
push @{ $POSTTOWNS{$_} }, $outcode foreach @posttowns; |
|
18702
|
|
|
|
|
57560
|
|
288
|
17952
|
|
|
|
|
100952
|
$OUTCODES{$outcode} = { |
289
|
|
|
|
|
|
|
posttowns => \@posttowns, |
290
|
|
|
|
|
|
|
non_geographical => $non_geographical, |
291
|
|
|
|
|
|
|
}; |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
# Add in BX non-geographical outcodes |
295
|
6
|
|
|
|
|
30
|
foreach ( 1 .. 99 ) { |
296
|
594
|
|
|
|
|
1573
|
$OUTCODES{ 'BX' . $_ } = { |
297
|
|
|
|
|
|
|
posttowns => [], |
298
|
|
|
|
|
|
|
non_geographical => 1, |
299
|
|
|
|
|
|
|
}; |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
|
302
|
6
|
|
|
|
|
13488
|
foreach my $outcode ( sort keys %OUTCODES ) { |
303
|
|
|
|
|
|
|
my ( $area, $district ) |
304
|
|
|
|
|
|
|
= $outcode =~ $REGEXES{strict_partial_anchored_captures} |
305
|
18546
|
50
|
|
|
|
62947
|
or next; |
306
|
|
|
|
|
|
|
|
307
|
18546
|
100
|
|
|
|
57508
|
$district = " $district" if length $district < 2; |
308
|
|
|
|
|
|
|
|
309
|
18546
|
|
|
|
|
27713
|
push @{ $area_districts{$area}->{ substr( $district, 0, 1 ) } }, |
|
18546
|
|
|
|
|
62810
|
|
310
|
|
|
|
|
|
|
substr( $district, 1, 1 ); |
311
|
|
|
|
|
|
|
} |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
$Geo::UK::Postcode::Regex::COMPONENTS{strict}->{outcodes} = '(?: ' . join( |
314
|
|
|
|
|
|
|
"|\n", |
315
|
|
|
|
|
|
|
map { |
316
|
6
|
|
|
|
|
2668
|
my $area = $_; |
|
756
|
|
|
|
|
1480
|
|
317
|
|
|
|
|
|
|
sprintf( |
318
|
|
|
|
|
|
|
"%s (?:%s)", # |
319
|
|
|
|
|
|
|
$area, |
320
|
|
|
|
|
|
|
join( |
321
|
|
|
|
|
|
|
' | ', |
322
|
|
|
|
|
|
|
map { |
323
|
|
|
|
|
|
|
sprintf( "%s[%s]", |
324
|
2988
|
|
|
|
|
4789
|
$_, join( '', @{ $area_districts{$area}->{$_} } ) ) |
|
2988
|
|
|
|
|
13959
|
|
325
|
|
|
|
|
|
|
} # |
326
|
4230
|
100
|
|
|
|
9262
|
sort { $a eq ' ' ? 1 : $b eq ' ' ? -1 : $a <=> $b } |
|
|
100
|
|
|
|
|
|
327
|
756
|
|
|
|
|
1117
|
keys %{ $area_districts{$area} } |
|
756
|
|
|
|
|
2851
|
|
328
|
|
|
|
|
|
|
) |
329
|
|
|
|
|
|
|
) |
330
|
|
|
|
|
|
|
} sort keys %area_districts |
331
|
|
|
|
|
|
|
) . ' )'; |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
} |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=head1 VALIDATION METHODS |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
The following methods are for validating postcodes to various degrees. |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
L may provide a more convenient way of using |
340
|
|
|
|
|
|
|
and customising these. |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
=head2 regex, strict_regex, valid_regex |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
Return regular expressions to parse postcodes and capture the constituent |
345
|
|
|
|
|
|
|
parts: area, district, sector and unit (or outcode, sector and unit in the |
346
|
|
|
|
|
|
|
case of C). |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
C checks that the postcode only contains valid characters |
349
|
|
|
|
|
|
|
according to the postcode specifications. |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
C checks that the outcode currently exists. |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
=head2 regex_partial, strict_regex_partial, valid_regex_partial |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
As above, but matches on partial postcodes of just the outcode |
356
|
|
|
|
|
|
|
or sector |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
=cut |
359
|
|
|
|
|
|
|
|
360
|
1
|
|
|
1
|
1
|
10
|
sub valid_regex_partial { $REGEXES{valid_partial_anchored_captures} } |
361
|
1
|
|
|
1
|
1
|
9
|
sub strict_regex_partial { $REGEXES{strict_partial_anchored_captures} } |
362
|
1
|
|
|
1
|
1
|
7
|
sub regex_partial { $REGEXES{lax_partial_anchored_captures} } |
363
|
1
|
|
|
1
|
1
|
5
|
sub valid_regex { $REGEXES{valid_anchored_captures} } |
364
|
1
|
|
|
1
|
1
|
5
|
sub strict_regex { $REGEXES{strict_anchored_captures} } |
365
|
1
|
|
|
1
|
1
|
109
|
sub regex { $REGEXES{lax_anchored_captures} } |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
=head2 is_valid_pc, is_strict_pc, is_lax_pc |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
if (is_valid_pc( "AB1 2CD" ) ) { ... } |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
Alternative way to access the regexes. |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
=cut |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
sub is_valid_pc { |
377
|
61
|
50
|
|
61
|
1
|
40584
|
my $pc = @_ > 1 ? $_[1] : $_[0]; # back-compat: can call as class method |
378
|
61
|
100
|
|
|
|
402
|
return $pc =~ $REGEXES{valid_anchored} ? 1 : 0 |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
sub is_strict_pc { |
381
|
61
|
50
|
|
61
|
1
|
407197
|
my $pc = @_ > 1 ? $_[1] : $_[0]; # back-compat: can call as class method |
382
|
61
|
100
|
|
|
|
707
|
return $pc =~ $REGEXES{strict_anchored} ? 1 : 0 |
383
|
|
|
|
|
|
|
} |
384
|
|
|
|
|
|
|
sub is_lax_pc { |
385
|
61
|
50
|
|
61
|
1
|
33890
|
my $pc = @_ > 1 ? $_[1] : $_[0]; # back-compat: can call as class method |
386
|
61
|
100
|
|
|
|
372
|
return $pc =~ $REGEXES{lax_anchored} ? 1 : 0 |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
=head1 PARSING METHODS |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
The following methods are for parsing postcodes or strings containing postcodes. |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=head2 PARSING_OPTIONS |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
The parsing methods can take the following options, passed via a hashref: |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=over |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
=item strict |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
Postcodes must not contain invalid characters according to the postcode |
402
|
|
|
|
|
|
|
specification. For example a 'Q' may not appear as the first character. |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
=item valid |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
Postcodes must contain an outcode (area + district) that currently exists, in |
407
|
|
|
|
|
|
|
addition to conforming to the C definition. |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
Returns false if string is not a currently existing outcode. |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
=item partial |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
Allows partial postcodes to be matched. In practice this means either an outcode |
414
|
|
|
|
|
|
|
( area and district ) or an outcode together with the sector. |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
=back |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
=head2 extract |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
my @extracted = Geo::UK::Postcode::Regex->extract( $string, \%options ); |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
Returns a list of full postcodes extracted from a string. |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
=cut |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
# TODO need to/can do partial? |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
sub extract { |
429
|
3606
|
|
|
3606
|
1
|
23683
|
my ( $class, $string, $options ) = @_; |
430
|
|
|
|
|
|
|
|
431
|
3606
|
100
|
|
|
|
8611
|
_outcode_data() unless %OUTCODES; |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
my $key |
434
|
|
|
|
|
|
|
= $options->{valid} ? 'valid' |
435
|
3606
|
100
|
|
|
|
11707
|
: $options->{strict} ? 'strict' |
|
|
100
|
|
|
|
|
|
436
|
|
|
|
|
|
|
: 'lax'; |
437
|
|
|
|
|
|
|
|
438
|
3606
|
100
|
|
|
|
9044
|
$key .= '_case-insensitive' if $options->{'case-insensitive'}; |
439
|
3606
|
|
|
|
|
6330
|
$key .= '_extract'; |
440
|
|
|
|
|
|
|
|
441
|
3606
|
|
|
|
|
17424
|
my @extracted = $string =~ m/$REGEXES{$key}/g; |
442
|
|
|
|
|
|
|
|
443
|
3606
|
|
|
|
|
16812
|
return map {uc} @extracted; |
|
2016
|
|
|
|
|
14137
|
|
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
=head2 parse |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
my $parsed = Geo::UK::Postcode::Regex->parse( $pc, \%options ); |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
Returns hashref of the constituent parts - see SYNOPSIS. Missing parts will be |
451
|
|
|
|
|
|
|
set as undefined. |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
=cut |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
sub parse { |
456
|
4036
|
|
|
4036
|
1
|
3846533
|
my ( $class, $string, $options ) = @_; |
457
|
|
|
|
|
|
|
|
458
|
4036
|
|
50
|
|
|
10425
|
$options ||= {}; |
459
|
|
|
|
|
|
|
|
460
|
4036
|
100
|
|
|
|
14231
|
$string = uc $string if $options->{'case-insensitive'}; |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
my $re |
463
|
|
|
|
|
|
|
= $options->{partial} |
464
|
4036
|
100
|
|
|
|
11134
|
? 'partial_anchored_captures' |
465
|
|
|
|
|
|
|
: 'anchored_captures'; |
466
|
|
|
|
|
|
|
|
467
|
4036
|
|
|
|
|
22993
|
my ( $area, $district, $sector, $unit ) = $string =~ $REGEXES{"strict_$re"}; |
468
|
|
|
|
|
|
|
|
469
|
4036
|
100
|
|
|
|
15312
|
my $strict = $area ? 1 : 0; # matched strict? |
470
|
|
|
|
|
|
|
|
471
|
4036
|
100
|
|
|
|
11145
|
unless ($strict) { |
472
|
1635
|
100
|
|
|
|
6759
|
return if $options->{strict}; |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
# try lax regex |
475
|
565
|
100
|
|
|
|
2442
|
( $area, $district, $sector, $unit ) = $string =~ $REGEXES{"lax_$re"} |
476
|
|
|
|
|
|
|
or return; |
477
|
|
|
|
|
|
|
} |
478
|
|
|
|
|
|
|
|
479
|
2559
|
50
|
66
|
|
|
8005
|
return unless $unit || $options->{partial}; |
480
|
|
|
|
|
|
|
|
481
|
2559
|
50
|
|
|
|
6056
|
return unless defined $district; |
482
|
|
|
|
|
|
|
|
483
|
2559
|
|
|
|
|
5207
|
my $outcode = $area . $district; |
484
|
2559
|
|
|
|
|
8244
|
my $outcode_data = $class->outcodes_lookup->{$outcode}; |
485
|
|
|
|
|
|
|
|
486
|
2559
|
100
|
100
|
|
|
12685
|
return if $options->{valid} && !$outcode_data; |
487
|
|
|
|
|
|
|
|
488
|
2336
|
100
|
|
|
|
11627
|
my $subdistrict = $district =~ s/([A-Z])$// ? $1 : undef; |
489
|
|
|
|
|
|
|
|
490
|
2336
|
100
|
|
|
|
7677
|
my $incode = $unit ? "$sector$unit" : $sector ? $sector : undef; |
|
|
100
|
|
|
|
|
|
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
return { |
493
|
|
|
|
|
|
|
area => $area, |
494
|
|
|
|
|
|
|
district => $district, |
495
|
|
|
|
|
|
|
subdistrict => $subdistrict, |
496
|
|
|
|
|
|
|
sector => $sector, |
497
|
|
|
|
|
|
|
unit => $unit, |
498
|
|
|
|
|
|
|
outcode => $outcode, |
499
|
|
|
|
|
|
|
incode => $incode, |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
strict => $strict, |
502
|
|
|
|
|
|
|
partial => $unit ? 0 : 1, |
503
|
|
|
|
|
|
|
valid => $outcode_data && $strict ? 1 : 0, |
504
|
|
|
|
|
|
|
|
505
|
2336
|
100
|
100
|
|
|
39203
|
$outcode_data->{non_geographical} ? ( non_geographical => 1 ) : (), |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
506
|
|
|
|
|
|
|
$outcode eq "BF1" ? ( bfpo => 1 ) : (), |
507
|
|
|
|
|
|
|
}; |
508
|
|
|
|
|
|
|
} |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
=head2 outcode |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
my $outcode = Geo::UK::Postcode::Regex->outcode( $pc, \%options ); |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
Extract the outcode (area and district) from a postcode string. Will work on |
515
|
|
|
|
|
|
|
full or partial postcodes. |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
=cut |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
sub outcode { |
520
|
18
|
|
|
18
|
1
|
917
|
my ( $class, $string, $options ) = @_; |
521
|
|
|
|
|
|
|
|
522
|
18
|
100
|
|
|
|
31
|
my $parsed = $class->parse( $string, { partial => 1, %{ $options || {} } } ) |
|
18
|
100
|
|
|
|
103
|
|
523
|
|
|
|
|
|
|
or return; |
524
|
|
|
|
|
|
|
|
525
|
14
|
|
|
|
|
91
|
return $parsed->{outcode}; |
526
|
|
|
|
|
|
|
} |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
=head1 LOOKUP METHODS |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
=head2 outcode_to_posttowns |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
my ( $posttown1, $posttown2, ... ) |
533
|
|
|
|
|
|
|
= Geo::UK::Postcode::Regex->outcode_to_posttowns($outcode); |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
Returns posttown(s) for supplied outcode. |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
Note - most outcodes will only have one posttown, but some are shared between |
538
|
|
|
|
|
|
|
two posttowns. |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
=cut |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
sub outcode_to_posttowns { |
543
|
2
|
|
|
2
|
1
|
1722
|
my ( $class, $outcode ) = @_; |
544
|
|
|
|
|
|
|
|
545
|
2
|
|
|
|
|
9
|
my $data = $class->outcodes_lookup->{$outcode}; |
546
|
|
|
|
|
|
|
|
547
|
2
|
50
|
|
|
|
6
|
return @{ $data ? $data->{posttowns} : [] }; |
|
2
|
|
|
|
|
34
|
|
548
|
|
|
|
|
|
|
} |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
=head2 posttown_to_outcodes |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
my @outcodes = Geo::UK::Postcode::Regex->posttown_to_outcodes($posttown); |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
Returns the outcodes covered by a posttown. Note some outcodes are shared |
555
|
|
|
|
|
|
|
between posttowns. |
556
|
|
|
|
|
|
|
|
557
|
|
|
|
|
|
|
=cut |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
sub posttown_to_outcodes { |
560
|
1
|
|
|
1
|
1
|
1240
|
my ( $class, $posttown ) = @_; |
561
|
|
|
|
|
|
|
|
562
|
1
|
50
|
50
|
|
|
3
|
return @{ $class->posttowns_lookup->{ $posttown || '' } || [] }; |
|
1
|
|
|
|
|
6
|
|
563
|
|
|
|
|
|
|
} |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
=head2 outcodes_lookup |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
my %outcodes = %{ Geo::UK::Postcode::Regex->outcodes_lookup }; |
568
|
|
|
|
|
|
|
print "valid outcode" if $outcodes{$outcode}; |
569
|
|
|
|
|
|
|
my @posttowns = @{ $outcodes{$outcode} }; |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
Hashref of outcodes to posttown(s); |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
=head2 posttowns_lookup |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
my %posttowns = %{ Geo::UK::Postcode::Regex->posttowns_lookup }; |
576
|
|
|
|
|
|
|
print "valid posttown" if $posttowns{$posttown}; |
577
|
|
|
|
|
|
|
my @outcodes = @{ $[posttowns{$posttown} }; |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
Hashref of posttown to outcode(s); |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
=cut |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
sub outcodes_lookup { |
584
|
2561
|
|
|
2561
|
1
|
6286
|
my $class = shift; |
585
|
|
|
|
|
|
|
|
586
|
2561
|
100
|
|
|
|
6945
|
_outcode_data() unless %OUTCODES; |
587
|
|
|
|
|
|
|
|
588
|
2561
|
|
|
|
|
9921
|
return \%OUTCODES; |
589
|
|
|
|
|
|
|
} |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
sub posttowns_lookup { |
592
|
1
|
|
|
1
|
1
|
4
|
my $class = shift; |
593
|
|
|
|
|
|
|
|
594
|
1
|
50
|
|
|
|
6
|
_outcode_data() unless %POSTTOWNS; |
595
|
|
|
|
|
|
|
|
596
|
1
|
|
|
|
|
24
|
return \%POSTTOWNS; |
597
|
|
|
|
|
|
|
} |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
=head1 SEE ALSO |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
=over |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
=item * |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
L - companion package, provides Postcode objects |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
=item * |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
L |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
=item * |
612
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
L |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
=item * |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
L |
618
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
=item * |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
L |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
=item * |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
L |
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
=back |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
=head1 SUPPORT |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
=head2 Bugs / Feature Requests |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
Please report any bugs or feature requests through the issue tracker |
634
|
|
|
|
|
|
|
at L. |
635
|
|
|
|
|
|
|
You will be notified automatically of any progress on your issue. |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
=head2 Source Code |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
This is open source software. The code repository is available for |
640
|
|
|
|
|
|
|
public review and contribution under the terms of the license. |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
L |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
git clone git://github.com/mjemmeson/geo-uk-postcode-regex.git |
645
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
=head1 AUTHOR |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
Michael Jemmeson Emjemmeson@cpan.orgE |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
=head1 COPYRIGHT |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
Copyright 2015-2017 Michael Jemmeson |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
=head1 LICENSE |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
657
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
=cut |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
1; |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
__DATA__ |