line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Geo::UK::Postcode::Regex; |
2
|
|
|
|
|
|
|
|
3
|
8
|
|
|
8
|
|
450250
|
use strict; |
|
8
|
|
|
|
|
81
|
|
|
8
|
|
|
|
|
257
|
|
4
|
8
|
|
|
8
|
|
55
|
use warnings; |
|
8
|
|
|
|
|
19
|
|
|
8
|
|
|
|
|
241
|
|
5
|
|
|
|
|
|
|
|
6
|
8
|
|
|
8
|
|
3704
|
use Geo::UK::Postcode::Regex::Hash; |
|
8
|
|
|
|
|
24
|
|
|
8
|
|
|
|
|
306
|
|
7
|
|
|
|
|
|
|
|
8
|
8
|
|
|
8
|
|
63
|
use base 'Exporter'; |
|
8
|
|
|
|
|
19
|
|
|
8
|
|
|
|
|
17527
|
|
9
|
|
|
|
|
|
|
our @EXPORT_OK = qw( is_valid_pc is_strict_pc is_lax_pc %REGEXES ); |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
our $VERSION = '0.017'; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=encoding utf-8 |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 NAME |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Geo::UK::Postcode::Regex - regular expressions for handling British postcodes |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSIS |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
See L for an alternative interface. |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
use Geo::UK::Postcode::Regex; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
## REGULAR EXPRESSIONS |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
my $lax_re = Geo::UK::Postcode::Regex->regex; |
28
|
|
|
|
|
|
|
my $strict_re = Geo::UK::Postcode::Regex->strict_regex; |
29
|
|
|
|
|
|
|
my $valid_re = Geo::UK::Postcode::Regex->valid_regex; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# matching only |
32
|
|
|
|
|
|
|
if ( $foo =~ $lax_re ) {...} |
33
|
|
|
|
|
|
|
if ( $foo =~ $strict_re ) {...} |
34
|
|
|
|
|
|
|
if ( $foo =~ $valid_re ) {...} |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# matching and using components - see also parse() |
37
|
|
|
|
|
|
|
if ( $foo =~ $lax_re ) { |
38
|
|
|
|
|
|
|
my ( $area, $district, $sector, $unit ) = ( $1, $2, $3, $4 ); |
39
|
|
|
|
|
|
|
my $subdistrict = $district =~ s/([A-Z])$// ? $1 : undef; |
40
|
|
|
|
|
|
|
... |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
if ( $foo =~ $strict_re ) { |
43
|
|
|
|
|
|
|
my ( $area, $district, $sector, $unit ) = ( $1, $2, $3, $4 ); |
44
|
|
|
|
|
|
|
my $subdistrict = $district =~ s/([A-Z])$// ? $1 : undef; |
45
|
|
|
|
|
|
|
... |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
if ( $foo =~ $valid_re ) { |
48
|
|
|
|
|
|
|
my ( $outcode, $sector, $unit ) = ( $1, $2, $3 ); |
49
|
|
|
|
|
|
|
... |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
## VALIDATION METHODS |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
use Geo::UK::Postcode::Regex qw( is_valid_pc is_strict_pc is_lax_pc ); |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
if (is_valid_pc("GE0 1UK")) { |
58
|
|
|
|
|
|
|
... |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
if (is_strict_pc("GE0 1UK")) { |
61
|
|
|
|
|
|
|
... |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
if (is_lax_pc("GE0 1UK")) { |
64
|
|
|
|
|
|
|
... |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
## PARSING |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
my $parsed = Geo::UK::Postcode::Regex->parse("WC1H 9EB"); |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# returns: |
73
|
|
|
|
|
|
|
# { area => 'WC', |
74
|
|
|
|
|
|
|
# district => '1', |
75
|
|
|
|
|
|
|
# subdistrict => 'H', |
76
|
|
|
|
|
|
|
# sector => '9', |
77
|
|
|
|
|
|
|
# unit => 'EB', |
78
|
|
|
|
|
|
|
# outcode => 'WC1H', |
79
|
|
|
|
|
|
|
# incode => '9EB', |
80
|
|
|
|
|
|
|
# valid => 1, |
81
|
|
|
|
|
|
|
# strict => 1, |
82
|
|
|
|
|
|
|
# partial => 0, |
83
|
|
|
|
|
|
|
# non_geographical => 0, |
84
|
|
|
|
|
|
|
# bfpo => 0, |
85
|
|
|
|
|
|
|
# } |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
# strict parsing (only valid characters): |
88
|
|
|
|
|
|
|
...->parse( $pc, { strict => 1 } ) |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# valid outcodes only |
91
|
|
|
|
|
|
|
...->parse( $pc, { valid => 1 } ) |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
# match partial postcodes, e.g. 'WC1H', 'WC1H 9' - see below |
94
|
|
|
|
|
|
|
...->parse( $pc, { partial => 1 } ) |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
## PARSING PARTIAL POSTCODES |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
# outcode (district) only |
100
|
|
|
|
|
|
|
my $parsed = Geo::UK::Postcode::Regex->parse( "AB10", { partial => 1 } ); |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
# returns: |
103
|
|
|
|
|
|
|
# { area => 'AB', |
104
|
|
|
|
|
|
|
# district => '10', |
105
|
|
|
|
|
|
|
# subdistrict => undef, |
106
|
|
|
|
|
|
|
# sector => undef, |
107
|
|
|
|
|
|
|
# unit => undef, |
108
|
|
|
|
|
|
|
# outcode => 'AB10', |
109
|
|
|
|
|
|
|
# incode => undef, |
110
|
|
|
|
|
|
|
# valid => 1, |
111
|
|
|
|
|
|
|
# strict => 1, |
112
|
|
|
|
|
|
|
# partial => 1, |
113
|
|
|
|
|
|
|
# non_geographical => 0, |
114
|
|
|
|
|
|
|
# bfpo => 0, |
115
|
|
|
|
|
|
|
# } |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
# sector only |
118
|
|
|
|
|
|
|
my $parsed = Geo::UK::Postcode::Regex->parse( "AB10 1", { partial => 1 } ); |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# returns: |
121
|
|
|
|
|
|
|
# { area => 'AB', |
122
|
|
|
|
|
|
|
# district => '10', |
123
|
|
|
|
|
|
|
# subdistrict => undef, |
124
|
|
|
|
|
|
|
# sector => 1, |
125
|
|
|
|
|
|
|
# unit => undef, |
126
|
|
|
|
|
|
|
# outcode => 'AB10', |
127
|
|
|
|
|
|
|
# incode => '1', |
128
|
|
|
|
|
|
|
# valid => 1, |
129
|
|
|
|
|
|
|
# strict => 1, |
130
|
|
|
|
|
|
|
# partial => 1, |
131
|
|
|
|
|
|
|
# non_geographical => 0, |
132
|
|
|
|
|
|
|
# bfpo => 0, |
133
|
|
|
|
|
|
|
# } |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
## EXTRACT OUTCODE FROM POSTCODE |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
my $outcode = Geo::UK::Postcode::Regex->outcode("AB101AA"); # returns 'AB10' |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
my $outcode = Geo::UK::Postcode::Regex->outcode( $postcode, { valid => 1 } ) |
141
|
|
|
|
|
|
|
or die "Invalid postcode"; |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
## EXTRACT POSTCODES FROM TEXT |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# \%options as per parse, excluding partial |
147
|
|
|
|
|
|
|
my @extracted = Geo::UK::Postcode::Regex->extract( $text, \%options ); |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
## POSTTOWNS |
151
|
|
|
|
|
|
|
my @posttowns = Geo::UK::Postcode::Regex->outcode_to_posttowns($outcode); |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
## OUTCODES |
155
|
|
|
|
|
|
|
my @outcodes = Geo::UK::Postcode::Regex->posttown_to_outcodes($posttown); |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=head1 DESCRIPTION |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
Parsing UK postcodes with regular expressions (aka Regexp). This package has |
161
|
|
|
|
|
|
|
been separated from L so it can be installed and used with |
162
|
|
|
|
|
|
|
fewer dependencies. |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
Can handle partial postcodes (just the outcode or sector) and can test |
165
|
|
|
|
|
|
|
against valid characters and currently valid outcodes. |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
Also can determine the posttown(s) from a postcode. |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
Districts and post town information taken from: |
170
|
|
|
|
|
|
|
L |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
=head1 IMPORTANT CHANGES FOR VERSION 0.014 |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
Please note that various bugfixes have changed the following: |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=over |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=item * |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
Unanchored regular expressions no longer match valid postcodes within invalid |
181
|
|
|
|
|
|
|
ones. |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=item * |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
Unanchored regular expressions in partial mode now can match a valid or strict |
186
|
|
|
|
|
|
|
outcode with an invalid incode. |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=back |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
Please get in touch if you have any questions. |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
See L for other changes affecting the Simple |
193
|
|
|
|
|
|
|
interface. |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=head1 NOTES AND LIMITATIONS |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
When parsing a partial postcode, whitespace may be required to separate the |
198
|
|
|
|
|
|
|
outcode from the sector. |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
For example the sector 'B1 1' cannot be distinguished from the district 'B11' |
201
|
|
|
|
|
|
|
without whitespace. This is not a problem when parsing full postcodes. |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=cut |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
## REGULAR EXPRESSIONS |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
my $AREA1 = 'ABCDEFGHIJKLMNOPRSTUWYZ'; # [^QVX] |
208
|
|
|
|
|
|
|
my $AREA2 = 'ABCDEFGHKLMNOPQRSTUVWXY'; # [^IJZ] |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
my $SUBDISTRICT1 = 'ABCDEFGHJKPSTUW'; # for single letter areas |
211
|
|
|
|
|
|
|
my $SUBDISTRICT2 = 'ABEHMNPRVWXY'; # for two letter areas |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
my $UNIT1 = 'ABDEFGHJLNPQRSTUWXYZ'; # [^CIKMOV] |
214
|
|
|
|
|
|
|
my $UNIT2 = 'ABDEFGHJLNPQRSTUWXYZ'; # [^CIKMOV] |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
our %COMPONENTS = ( |
217
|
|
|
|
|
|
|
strict => { |
218
|
|
|
|
|
|
|
area => "[$AREA1][$AREA2]?", |
219
|
|
|
|
|
|
|
district => qq% (?: |
220
|
|
|
|
|
|
|
[0-9][0-9]? |
221
|
|
|
|
|
|
|
| (?
|
222
|
|
|
|
|
|
|
| (?<=[A-Z]{2}) [0-9][$SUBDISTRICT2] |
223
|
|
|
|
|
|
|
) %, |
224
|
|
|
|
|
|
|
sector => '[0-9]', |
225
|
|
|
|
|
|
|
unit => "[$UNIT1][$UNIT2]", |
226
|
|
|
|
|
|
|
blank => '', |
227
|
|
|
|
|
|
|
}, |
228
|
|
|
|
|
|
|
lax => { |
229
|
|
|
|
|
|
|
area => '[A-Z]{1,2}', |
230
|
|
|
|
|
|
|
district => '[0-9](?:[0-9]|[A-Z])?', |
231
|
|
|
|
|
|
|
sector => '[0-9]', |
232
|
|
|
|
|
|
|
unit => '[A-Z]{2}', |
233
|
|
|
|
|
|
|
}, |
234
|
|
|
|
|
|
|
); |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
my %BASE_REGEXES = ( |
237
|
|
|
|
|
|
|
full => ' %s %s \s* %s %s ', |
238
|
|
|
|
|
|
|
partial => ' %s %s (?: \s* %s (?:%s)? ) ? ', |
239
|
|
|
|
|
|
|
); |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
my ( %POSTTOWNS, %OUTCODES ); |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
tie our %REGEXES, 'Geo::UK::Postcode::Regex::Hash', _fetch => sub { |
244
|
|
|
|
|
|
|
my ($key) = @_; |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
_outcode_data() if $key =~ m/valid/ && !%OUTCODES; |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
my $type = $key =~ m/lax/ ? 'lax' : 'strict'; |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
my $components = $Geo::UK::Postcode::Regex::COMPONENTS{$type}; |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
my @comps |
253
|
|
|
|
|
|
|
= $key =~ m/valid/ |
254
|
|
|
|
|
|
|
? @{$components}{qw( outcodes blank sector unit )} |
255
|
|
|
|
|
|
|
: @{$components}{qw( area district sector unit )}; |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
@comps = map { $_ ? "($_)" : $_ } @comps if $key =~ m/captures/; |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
my $size = $key =~ m/partial/ ? 'partial' : 'full'; |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
my $re = sprintf( $BASE_REGEXES{$size}, @comps ); |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
if ( $key =~ m/anchored/ ) { |
264
|
|
|
|
|
|
|
$re = '^' . $re . '$'; |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
} elsif ( $key =~ m/extract/ ) { |
267
|
|
|
|
|
|
|
$re = '(?:[^0-9A-Z]|\b) (' . $re . ') (?:[^0-9A-Z]|\b)'; |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
} else { |
270
|
|
|
|
|
|
|
$re = '(?:[^0-9A-Z]|\b) ' . $re . ' (?:[^0-9A-Z]|\b)'; |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
return $key =~ m/case-insensitive/ ? qr/$re/ix : qr/$re/x; |
274
|
|
|
|
|
|
|
}; |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
## OUTCODE AND POSTTOWN DATA |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
sub _outcode_data { |
279
|
8
|
|
|
8
|
|
18
|
my %area_districts; |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
# get the original position in the DATA File Handle |
282
|
8
|
|
|
|
|
32
|
my $orig_position = tell( DATA ); |
283
|
|
|
|
|
|
|
# Get outcodes from __DATA__ |
284
|
8
|
|
|
|
|
1051
|
while ( my $line = ) { |
285
|
23944
|
100
|
|
|
|
81056
|
next unless $line =~ m/\w/; |
286
|
23936
|
|
|
|
|
48321
|
chomp $line; |
287
|
23936
|
|
|
|
|
84415
|
my ( $outcode, $non_geographical, @posttowns ) = split /,/, $line; |
288
|
|
|
|
|
|
|
|
289
|
23936
|
|
|
|
|
54357
|
push @{ $POSTTOWNS{$_} }, $outcode foreach @posttowns; |
|
24936
|
|
|
|
|
83089
|
|
290
|
23936
|
|
|
|
|
148165
|
$OUTCODES{$outcode} = { |
291
|
|
|
|
|
|
|
posttowns => \@posttowns, |
292
|
|
|
|
|
|
|
non_geographical => $non_geographical, |
293
|
|
|
|
|
|
|
}; |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
# Reset position of DATA File Handle for re-reading |
296
|
8
|
|
|
|
|
50
|
seek DATA, $orig_position, 0; |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
# Add in BX non-geographical outcodes |
299
|
8
|
|
|
|
|
42
|
foreach ( 1 .. 99 ) { |
300
|
792
|
|
|
|
|
2915
|
$OUTCODES{ 'BX' . $_ } = { |
301
|
|
|
|
|
|
|
posttowns => [], |
302
|
|
|
|
|
|
|
non_geographical => 1, |
303
|
|
|
|
|
|
|
}; |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
|
306
|
8
|
|
|
|
|
19970
|
foreach my $outcode ( sort keys %OUTCODES ) { |
307
|
|
|
|
|
|
|
my ( $area, $district ) |
308
|
|
|
|
|
|
|
= $outcode =~ $REGEXES{strict_partial_anchored_captures} |
309
|
24728
|
50
|
|
|
|
83253
|
or next; |
310
|
|
|
|
|
|
|
|
311
|
24728
|
100
|
|
|
|
74052
|
$district = " $district" if length $district < 2; |
312
|
|
|
|
|
|
|
|
313
|
24728
|
|
|
|
|
37347
|
push @{ $area_districts{$area}->{ substr( $district, 0, 1 ) } }, |
|
24728
|
|
|
|
|
80903
|
|
314
|
|
|
|
|
|
|
substr( $district, 1, 1 ); |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
$Geo::UK::Postcode::Regex::COMPONENTS{strict}->{outcodes} = '(?: ' . join( |
318
|
|
|
|
|
|
|
"|\n", |
319
|
|
|
|
|
|
|
map { |
320
|
8
|
|
|
|
|
3893
|
my $area = $_; |
|
1008
|
|
|
|
|
2594
|
|
321
|
|
|
|
|
|
|
sprintf( |
322
|
|
|
|
|
|
|
"%s (?:%s)", # |
323
|
|
|
|
|
|
|
$area, |
324
|
|
|
|
|
|
|
join( |
325
|
|
|
|
|
|
|
' | ', |
326
|
|
|
|
|
|
|
map { |
327
|
|
|
|
|
|
|
sprintf( "%s[%s]", |
328
|
3984
|
|
|
|
|
6383
|
$_, join( '', @{ $area_districts{$area}->{$_} } ) ) |
|
3984
|
|
|
|
|
18964
|
|
329
|
|
|
|
|
|
|
} # |
330
|
5641
|
100
|
|
|
|
12382
|
sort { $a eq ' ' ? 1 : $b eq ' ' ? -1 : $a <=> $b } |
|
|
100
|
|
|
|
|
|
331
|
1008
|
|
|
|
|
1488
|
keys %{ $area_districts{$area} } |
|
1008
|
|
|
|
|
3784
|
|
332
|
|
|
|
|
|
|
) |
333
|
|
|
|
|
|
|
) |
334
|
|
|
|
|
|
|
} sort keys %area_districts |
335
|
|
|
|
|
|
|
) . ' )'; |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
} |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
=head1 VALIDATION METHODS |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
The following methods are for validating postcodes to various degrees. |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
L may provide a more convenient way of using |
344
|
|
|
|
|
|
|
and customising these. |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
=head2 regex, strict_regex, valid_regex |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
Return regular expressions to parse postcodes and capture the constituent |
349
|
|
|
|
|
|
|
parts: area, district, sector and unit (or outcode, sector and unit in the |
350
|
|
|
|
|
|
|
case of C). |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
C checks that the postcode only contains valid characters |
353
|
|
|
|
|
|
|
according to the postcode specifications. |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
C checks that the outcode currently exists. |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
=head2 regex_partial, strict_regex_partial, valid_regex_partial |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
As above, but matches on partial postcodes of just the outcode |
360
|
|
|
|
|
|
|
or sector |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
=cut |
363
|
|
|
|
|
|
|
|
364
|
1
|
|
|
1
|
1
|
8
|
sub valid_regex_partial { $REGEXES{valid_partial_anchored_captures} } |
365
|
1
|
|
|
1
|
1
|
8
|
sub strict_regex_partial { $REGEXES{strict_partial_anchored_captures} } |
366
|
1
|
|
|
1
|
1
|
10
|
sub regex_partial { $REGEXES{lax_partial_anchored_captures} } |
367
|
1
|
|
|
1
|
1
|
9
|
sub valid_regex { $REGEXES{valid_anchored_captures} } |
368
|
1
|
|
|
1
|
1
|
7
|
sub strict_regex { $REGEXES{strict_anchored_captures} } |
369
|
1
|
|
|
1
|
1
|
124
|
sub regex { $REGEXES{lax_anchored_captures} } |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=head2 is_valid_pc, is_strict_pc, is_lax_pc |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
if (is_valid_pc( "AB1 2CD" ) ) { ... } |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
Alternative way to access the regexes. |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
=cut |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
sub is_valid_pc { |
381
|
61
|
50
|
|
61
|
1
|
50972
|
my $pc = @_ > 1 ? $_[1] : $_[0]; # back-compat: can call as class method |
382
|
61
|
100
|
|
|
|
488
|
return $pc =~ $REGEXES{valid_anchored} ? 1 : 0 |
383
|
|
|
|
|
|
|
} |
384
|
|
|
|
|
|
|
sub is_strict_pc { |
385
|
61
|
50
|
|
61
|
1
|
546803
|
my $pc = @_ > 1 ? $_[1] : $_[0]; # back-compat: can call as class method |
386
|
61
|
100
|
|
|
|
781
|
return $pc =~ $REGEXES{strict_anchored} ? 1 : 0 |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
sub is_lax_pc { |
389
|
61
|
50
|
|
61
|
1
|
42931
|
my $pc = @_ > 1 ? $_[1] : $_[0]; # back-compat: can call as class method |
390
|
61
|
100
|
|
|
|
497
|
return $pc =~ $REGEXES{lax_anchored} ? 1 : 0 |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=head1 PARSING METHODS |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
The following methods are for parsing postcodes or strings containing postcodes. |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=head2 PARSING_OPTIONS |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
The parsing methods can take the following options, passed via a hashref: |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
=over |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=item strict |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
Postcodes must not contain invalid characters according to the postcode |
406
|
|
|
|
|
|
|
specification. For example a 'Q' may not appear as the first character. |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
=item valid |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
Postcodes must contain an outcode (area + district) that currently exists, in |
411
|
|
|
|
|
|
|
addition to conforming to the C definition. |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
Returns false if string is not a currently existing outcode. |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
=item partial |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
Allows partial postcodes to be matched. In practice this means either an outcode |
418
|
|
|
|
|
|
|
( area and district ) or an outcode together with the sector. |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
=back |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
=head2 extract |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
my @extracted = Geo::UK::Postcode::Regex->extract( $string, \%options ); |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
Returns a list of full postcodes extracted from a string. |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
=cut |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
# TODO need to/can do partial? |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
sub extract { |
433
|
3606
|
|
|
3606
|
1
|
25237
|
my ( $class, $string, $options ) = @_; |
434
|
|
|
|
|
|
|
|
435
|
3606
|
100
|
|
|
|
9055
|
_outcode_data() unless %OUTCODES; |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
my $key |
438
|
|
|
|
|
|
|
= $options->{valid} ? 'valid' |
439
|
3606
|
100
|
|
|
|
11169
|
: $options->{strict} ? 'strict' |
|
|
100
|
|
|
|
|
|
440
|
|
|
|
|
|
|
: 'lax'; |
441
|
|
|
|
|
|
|
|
442
|
3606
|
100
|
|
|
|
8650
|
$key .= '_case-insensitive' if $options->{'case-insensitive'}; |
443
|
3606
|
|
|
|
|
6694
|
$key .= '_extract'; |
444
|
|
|
|
|
|
|
|
445
|
3606
|
|
|
|
|
18484
|
my @extracted = $string =~ m/$REGEXES{$key}/g; |
446
|
|
|
|
|
|
|
|
447
|
3606
|
|
|
|
|
18595
|
return map {uc} @extracted; |
|
2016
|
|
|
|
|
14022
|
|
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
=head2 parse |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
my $parsed = Geo::UK::Postcode::Regex->parse( $pc, \%options ); |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
Returns hashref of the constituent parts - see SYNOPSIS. Missing parts will be |
455
|
|
|
|
|
|
|
set as undefined. |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=cut |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
sub parse { |
460
|
4040
|
|
|
4040
|
1
|
4655079
|
my ( $class, $string, $options ) = @_; |
461
|
|
|
|
|
|
|
|
462
|
4040
|
|
100
|
|
|
11898
|
$options ||= {}; |
463
|
|
|
|
|
|
|
|
464
|
4040
|
100
|
|
|
|
12770
|
$string = uc $string if $options->{'case-insensitive'}; |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
my $re |
467
|
|
|
|
|
|
|
= $options->{partial} |
468
|
4040
|
100
|
|
|
|
11587
|
? 'partial_anchored_captures' |
469
|
|
|
|
|
|
|
: 'anchored_captures'; |
470
|
|
|
|
|
|
|
|
471
|
4040
|
|
|
|
|
24957
|
my ( $area, $district, $sector, $unit ) = $string =~ $REGEXES{"strict_$re"}; |
472
|
|
|
|
|
|
|
|
473
|
4040
|
100
|
|
|
|
15946
|
my $strict = $area ? 1 : 0; # matched strict? |
474
|
|
|
|
|
|
|
|
475
|
4040
|
100
|
|
|
|
11123
|
unless ($strict) { |
476
|
1635
|
100
|
|
|
|
8120
|
return if $options->{strict}; |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
# try lax regex |
479
|
565
|
100
|
|
|
|
3055
|
( $area, $district, $sector, $unit ) = $string =~ $REGEXES{"lax_$re"} |
480
|
|
|
|
|
|
|
or return; |
481
|
|
|
|
|
|
|
} |
482
|
|
|
|
|
|
|
|
483
|
2563
|
50
|
66
|
|
|
8219
|
return unless $unit || $options->{partial}; |
484
|
|
|
|
|
|
|
|
485
|
2563
|
50
|
|
|
|
6266
|
return unless defined $district; |
486
|
|
|
|
|
|
|
|
487
|
2563
|
|
|
|
|
6079
|
my $outcode = $area . $district; |
488
|
2563
|
|
|
|
|
8805
|
my $outcode_data = $class->outcodes_lookup->{$outcode}; |
489
|
|
|
|
|
|
|
|
490
|
2563
|
100
|
100
|
|
|
11950
|
return if $options->{valid} && !$outcode_data; |
491
|
|
|
|
|
|
|
|
492
|
2340
|
100
|
|
|
|
11438
|
my $subdistrict = $district =~ s/([A-Z])$// ? $1 : undef; |
493
|
|
|
|
|
|
|
|
494
|
2340
|
100
|
|
|
|
7593
|
my $incode = $unit ? "$sector$unit" : $sector ? $sector : undef; |
|
|
100
|
|
|
|
|
|
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
return { |
497
|
|
|
|
|
|
|
area => $area, |
498
|
|
|
|
|
|
|
district => $district, |
499
|
|
|
|
|
|
|
subdistrict => $subdistrict, |
500
|
|
|
|
|
|
|
sector => $sector, |
501
|
|
|
|
|
|
|
unit => $unit, |
502
|
|
|
|
|
|
|
outcode => $outcode, |
503
|
|
|
|
|
|
|
incode => $incode, |
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
strict => $strict, |
506
|
|
|
|
|
|
|
partial => $unit ? 0 : 1, |
507
|
|
|
|
|
|
|
valid => $outcode_data && $strict ? 1 : 0, |
508
|
|
|
|
|
|
|
|
509
|
2340
|
100
|
100
|
|
|
40174
|
$outcode_data->{non_geographical} ? ( non_geographical => 1 ) : (), |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
510
|
|
|
|
|
|
|
$outcode eq "BF1" ? ( bfpo => 1 ) : (), |
511
|
|
|
|
|
|
|
}; |
512
|
|
|
|
|
|
|
} |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
=head2 outcode |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
my $outcode = Geo::UK::Postcode::Regex->outcode( $pc, \%options ); |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
Extract the outcode (area and district) from a postcode string. Will work on |
519
|
|
|
|
|
|
|
full or partial postcodes. |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
=cut |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
sub outcode { |
524
|
18
|
|
|
18
|
1
|
772
|
my ( $class, $string, $options ) = @_; |
525
|
|
|
|
|
|
|
|
526
|
18
|
100
|
|
|
|
27
|
my $parsed = $class->parse( $string, { partial => 1, %{ $options || {} } } ) |
|
18
|
100
|
|
|
|
88
|
|
527
|
|
|
|
|
|
|
or return; |
528
|
|
|
|
|
|
|
|
529
|
14
|
|
|
|
|
101
|
return $parsed->{outcode}; |
530
|
|
|
|
|
|
|
} |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
=head1 LOOKUP METHODS |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
=head2 outcode_to_posttowns |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
my ( $posttown1, $posttown2, ... ) |
537
|
|
|
|
|
|
|
= Geo::UK::Postcode::Regex->outcode_to_posttowns($outcode); |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
Returns posttown(s) for supplied outcode. |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
Note - most outcodes will only have one posttown, but some are shared between |
542
|
|
|
|
|
|
|
two posttowns. |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
=cut |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
sub outcode_to_posttowns { |
547
|
2
|
|
|
2
|
1
|
1933
|
my ( $class, $outcode ) = @_; |
548
|
|
|
|
|
|
|
|
549
|
2
|
|
|
|
|
13
|
my $data = $class->outcodes_lookup->{$outcode}; |
550
|
|
|
|
|
|
|
|
551
|
2
|
50
|
|
|
|
7
|
return @{ $data ? $data->{posttowns} : [] }; |
|
2
|
|
|
|
|
42
|
|
552
|
|
|
|
|
|
|
} |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
=head2 posttown_to_outcodes |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
my @outcodes = Geo::UK::Postcode::Regex->posttown_to_outcodes($posttown); |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
Returns the outcodes covered by a posttown. Note some outcodes are shared |
559
|
|
|
|
|
|
|
between posttowns. |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
=cut |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
sub posttown_to_outcodes { |
564
|
1
|
|
|
1
|
1
|
1120
|
my ( $class, $posttown ) = @_; |
565
|
|
|
|
|
|
|
|
566
|
1
|
50
|
50
|
|
|
5
|
return @{ $class->posttowns_lookup->{ $posttown || '' } || [] }; |
|
1
|
|
|
|
|
7
|
|
567
|
|
|
|
|
|
|
} |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
=head2 outcodes_lookup |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
my %outcodes = %{ Geo::UK::Postcode::Regex->outcodes_lookup }; |
572
|
|
|
|
|
|
|
print "valid outcode" if $outcodes{$outcode}; |
573
|
|
|
|
|
|
|
my @posttowns = @{ $outcodes{$outcode} }; |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
Hashref of outcodes to posttown(s); |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
=head2 posttowns_lookup |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
my %posttowns = %{ Geo::UK::Postcode::Regex->posttowns_lookup }; |
580
|
|
|
|
|
|
|
print "valid posttown" if $posttowns{$posttown}; |
581
|
|
|
|
|
|
|
my @outcodes = @{ $[posttowns{$posttown} }; |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
Hashref of posttown to outcode(s); |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
=cut |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
sub outcodes_lookup { |
588
|
5657
|
|
|
5657
|
1
|
14417
|
my $class = shift; |
589
|
|
|
|
|
|
|
|
590
|
5657
|
100
|
|
|
|
12929
|
_outcode_data() unless %OUTCODES; |
591
|
|
|
|
|
|
|
|
592
|
5657
|
|
|
|
|
21098
|
return \%OUTCODES; |
593
|
|
|
|
|
|
|
} |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
sub posttowns_lookup { |
596
|
1
|
|
|
1
|
1
|
4
|
my $class = shift; |
597
|
|
|
|
|
|
|
|
598
|
1
|
50
|
|
|
|
8
|
_outcode_data() unless %POSTTOWNS; |
599
|
|
|
|
|
|
|
|
600
|
1
|
|
|
|
|
24
|
return \%POSTTOWNS; |
601
|
|
|
|
|
|
|
} |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
=head1 SEE ALSO |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
=over |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
=item * |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
L - companion package, provides Postcode objects |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
=item * |
612
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
L |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
=item * |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
L |
618
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
=item * |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
L |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
=item * |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
L |
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
=item * |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
L |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
=back |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
=head1 SUPPORT |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
=head2 Bugs / Feature Requests |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
Please report any bugs or feature requests through the issue tracker |
638
|
|
|
|
|
|
|
at L. |
639
|
|
|
|
|
|
|
You will be notified automatically of any progress on your issue. |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
=head2 Source Code |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
This is open source software. The code repository is available for |
644
|
|
|
|
|
|
|
public review and contribution under the terms of the license. |
645
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
L |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
git clone git://github.com/mjemmeson/geo-uk-postcode-regex.git |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
=head1 AUTHOR |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
Michael Jemmeson Emjemmeson@cpan.orgE |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
=over |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
=item * |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
Tom Bloor C |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
=back |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
=head1 COPYRIGHT |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
Copyright 2015-2017 Michael Jemmeson |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
=head1 LICENSE |
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
671
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
=cut |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
1; |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
__DATA__ |