line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# $Id: PostalAddress.pm,v 1.4 2005/04/30 18:39:28 michel Exp $ |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
package Geo::PostalAddress; |
6
|
2
|
|
|
2
|
|
7182
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
88
|
|
7
|
|
|
|
|
|
|
require 5.00503; |
8
|
|
|
|
|
|
|
|
9
|
2
|
|
|
2
|
|
10
|
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
273
|
|
10
|
|
|
|
|
|
|
require Exporter; |
11
|
|
|
|
|
|
|
@ISA = qw(Exporter); |
12
|
|
|
|
|
|
|
@EXPORT = (); |
13
|
|
|
|
|
|
|
@EXPORT_OK = (); |
14
|
|
|
|
|
|
|
%EXPORT_TAGS = (); |
15
|
|
|
|
|
|
|
$VERSION = 0.04; # ExtUtils::MakeMaker will use this. |
16
|
|
|
|
|
|
|
my $save_version = $VERSION; # Save so I can clean up after Locale::SubCountry |
17
|
|
|
|
|
|
|
|
18
|
2
|
|
|
2
|
|
2441
|
use UNIVERSAL; |
|
2
|
|
|
|
|
30
|
|
|
2
|
|
|
|
|
11
|
|
19
|
2
|
|
|
2
|
|
1856
|
use Locale::Country; |
|
2
|
|
|
|
|
90297
|
|
|
2
|
|
|
|
|
216
|
|
20
|
2
|
|
|
2
|
|
2185
|
use Locale::SubCountry; |
|
2
|
|
|
|
|
478945
|
|
|
2
|
|
|
|
|
50
|
|
21
|
2
|
|
|
2
|
|
152
|
use Carp; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
24329
|
|
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
if ($save_version ne $VERSION) { # Workaround for Locale::SubCountry lossage |
24
|
|
|
|
|
|
|
$Locale::SubCountry::VERSION = $VERSION; |
25
|
|
|
|
|
|
|
$VERSION = $save_version; |
26
|
|
|
|
|
|
|
} |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
my (%per_country_data, %default_per_country_data); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head1 NAME |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
Geo::PostalAddress - Country-specific postal address parsing/formatting |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=head1 DESCRIPTION |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
This module converts postal (snail mail) addresses between an |
37
|
|
|
|
|
|
|
unstructured country-neutral format (an array of character strings) |
38
|
|
|
|
|
|
|
and a country-specific format that's hopefully meaningful by postal |
39
|
|
|
|
|
|
|
authorities, courier/delivery services, residents, ... of that |
40
|
|
|
|
|
|
|
country for postal address entry. It should handle most countries |
41
|
|
|
|
|
|
|
out of the box with only minor or technical divergences from |
42
|
|
|
|
|
|
|
approved bulk-mailing formats; if needed, country-specific code can |
43
|
|
|
|
|
|
|
be added to make it fully conformant to those formats. |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
The intended audience for this module is anyone needing to handle |
46
|
|
|
|
|
|
|
most addresses in a recognizable country-specific format, without |
47
|
|
|
|
|
|
|
going into the full generality and complexity that UPU standards |
48
|
|
|
|
|
|
|
would appear to require. |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head1 SYNOPSIS |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
use Geo::PostalAddress; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
my $AU_parser = Geo::PostalAddress->new('AU'); |
55
|
|
|
|
|
|
|
my $format = $AU_parser->format(); |
56
|
|
|
|
|
|
|
# $format now contains: |
57
|
|
|
|
|
|
|
# [['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], 3, |
58
|
|
|
|
|
|
|
# ['City', 40], |
59
|
|
|
|
|
|
|
# ['State', {NSW => "New South Wales", TAS => "Tasmania", |
60
|
|
|
|
|
|
|
# QLD => "Queensland", SA => "South Australia", |
61
|
|
|
|
|
|
|
# WA => "Western Australia", VIC => "Victoria", |
62
|
|
|
|
|
|
|
# ACT => "Australian Capital Territory", |
63
|
|
|
|
|
|
|
# NT => "Northern Territory"}], ['Postcode', 4, qr/^\d\d\d\d$/]] |
64
|
|
|
|
|
|
|
# 40 in ['Addr1', 40] is the suggested displayed field width (not the maximum |
65
|
|
|
|
|
|
|
# length). 3 means that the next 3 fields should/could be on the same row. |
66
|
|
|
|
|
|
|
# ['State', {...}] means an enumerated list is used for this field, with keys |
67
|
|
|
|
|
|
|
# being the stored values and values being the labels used for display or |
68
|
|
|
|
|
|
|
# selection. |
69
|
|
|
|
|
|
|
my $display = $AU_parser->display(["4360 DUKES RD", "KALGOORLIE WA 6430"]); |
70
|
|
|
|
|
|
|
# $display now contains: |
71
|
|
|
|
|
|
|
# {Addr1 => "4360 DUKES RD", City => "KALGOORLIE", |
72
|
|
|
|
|
|
|
# State => "WA", Postcode => "6430"} |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
my $US_parser = Geo::PostalAddress->new('US'); |
75
|
|
|
|
|
|
|
my $address = {Addr1 => "123 MAGNOLIA ST", City => "HEMPSTEAD", |
76
|
|
|
|
|
|
|
State => "NY", ZIP => "115501234"}; |
77
|
|
|
|
|
|
|
my $result = $US_parser->storage($address); |
78
|
|
|
|
|
|
|
unless (ref $result) { carp "Bad postal address: $result.\n"; } |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
my $AU_to_US_address_label = $US_parser->label("AU", "MR JOHN DOE", $result); |
81
|
|
|
|
|
|
|
# What to print on an address label or on an envelope, if mailing from |
82
|
|
|
|
|
|
|
# Australia to the United States. |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=head1 METHODS |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head2 new() |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Cnew($country)> returns undef, or a blessed |
89
|
|
|
|
|
|
|
reference to a parser suitable for handling the most common postal address |
90
|
|
|
|
|
|
|
formats for that country. Depending on the country, this reference may be |
91
|
|
|
|
|
|
|
blessed into Geo::PostalAddress or into a country-specific subclass. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=cut |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
sub new { |
96
|
21
|
|
|
21
|
1
|
10083
|
my ($class, $code) = @_; |
97
|
21
|
|
|
|
|
39
|
my $country_class = "Geo::PostalAddress::$code"; |
98
|
21
|
|
|
|
|
28
|
my ($object, $country_new); |
99
|
19
|
|
|
|
|
125
|
my $instance_data = (exists $per_country_data{$code}) |
100
|
21
|
100
|
|
|
|
73
|
? {_country_code => $code, %{$per_country_data{$code}}} |
101
|
|
|
|
|
|
|
: {_country_code => $code, %default_per_country_data}; |
102
|
|
|
|
|
|
|
|
103
|
21
|
100
|
|
|
|
74
|
unless (defined code2country($code, LOCALE_CODE_ALPHA_2)) { return undef; } |
|
1
|
|
|
|
|
73
|
|
104
|
|
|
|
|
|
|
|
105
|
20
|
50
|
|
|
|
826
|
if (exists($Geo::PostalAddress::{"${code}::"})) { # Country class exists |
106
|
0
|
0
|
|
|
|
0
|
if (($country_new = $country_class->can("new")) != \&new) { # Has own new() |
107
|
|
|
|
|
|
|
# XXX fails if &$country_new calls Geo::PostalAddress->new. MUST FIX. |
108
|
0
|
|
|
|
|
0
|
$object = $country_new->($country_class, $code); |
109
|
|
|
|
|
|
|
} else { # Country class, inherits our new() |
110
|
0
|
|
|
|
|
0
|
$object = bless $instance_data, $country_class; |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
} else { # No country class. |
113
|
20
|
|
|
|
|
48
|
$object = bless $instance_data, $class; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
20
|
|
|
|
|
51
|
return $object; |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=head2 format |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
C<$parser-Eformat()> returns a reference to an array describing the |
122
|
|
|
|
|
|
|
(display/input) fields that make a postal address, and gives some hints |
123
|
|
|
|
|
|
|
about on-screen layout. Each element of the array can be an integer n > 0, |
124
|
|
|
|
|
|
|
meaning the next n fields should be on the same line if window/screen width |
125
|
|
|
|
|
|
|
allows it, or a reference to an array describing a field. Each field |
126
|
|
|
|
|
|
|
description contains the field name and either a maximum length for a text |
127
|
|
|
|
|
|
|
field or a hash of {stored => display} values for an enumerated field. An |
128
|
|
|
|
|
|
|
optional regex can also be specified. If present, it should be compatible with |
129
|
|
|
|
|
|
|
both perl and javascript, so it can be used in both client-side and server-side |
130
|
|
|
|
|
|
|
programs or modules. |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
An example for Australia may be: |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
[["Addr1", 40], ["Addr2", 40], ["Addr3", 40], ["Addr4", 40], 3, ["City", 40], |
135
|
|
|
|
|
|
|
["State", {NSW => "New South Wales", TAS => "Tasmania", QLD => "Queensland", |
136
|
|
|
|
|
|
|
SA => "South Australia", WA => "Western Australia", |
137
|
|
|
|
|
|
|
VIC => "Victoria", ACT => "Australian Capital Territory", |
138
|
|
|
|
|
|
|
NT => "Northern Territory"}], ["Postcode", 4, qr/^\d\d\d\d$/]] |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=cut |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
sub format { |
143
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
144
|
|
|
|
|
|
|
|
145
|
0
|
|
|
|
|
0
|
return $self->{_format}; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=head2 display |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
C<$parser-Edisplay($stored)> converts the postal address in @$stored to a |
151
|
|
|
|
|
|
|
format suitable for data input and returns a reference to a hash. The keys of |
152
|
|
|
|
|
|
|
the hash appear as fieldnames in the return value of C<$parser-Eformat()>. |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
If @$stored doesn't contain an address in the country $parser is an instance |
155
|
|
|
|
|
|
|
of, weird results are nearly certain. |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=cut |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
sub display { |
160
|
20
|
|
|
20
|
1
|
7366
|
my ($self, $stored) = @_; |
161
|
20
|
|
|
|
|
27
|
my %display; |
162
|
20
|
|
|
|
|
31
|
my $limit = 0; |
163
|
20
|
|
|
|
|
26
|
my @regex_results; # Cache, 1 per regex (*not* per stored address field) |
164
|
|
|
|
|
|
|
|
165
|
20
|
|
|
|
|
22
|
foreach my $segment (@{$self->{_s2d_map}}) { |
|
20
|
|
|
|
|
60
|
|
166
|
122
|
100
|
|
|
|
319
|
if ($segment->{StoredRownum} < $limit) { |
167
|
19
|
|
|
|
|
39
|
$limit = $segment->{StoredRownum}; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
|
171
|
20
|
|
|
|
|
34
|
$limit += @$stored; # Map positive indexes >= this to empty lines. |
172
|
|
|
|
|
|
|
|
173
|
20
|
|
|
|
|
21
|
foreach my $segment (@{$self->{_s2d_map}}) { |
|
20
|
|
|
|
|
84
|
|
174
|
122
|
100
|
|
|
|
275
|
my $line |
175
|
|
|
|
|
|
|
= ($segment->{StoredRownum} >= $limit) |
176
|
|
|
|
|
|
|
? "" |
177
|
|
|
|
|
|
|
: $stored->[$segment->{StoredRownum}]; |
178
|
|
|
|
|
|
|
|
179
|
122
|
100
|
|
|
|
238
|
if (exists($segment->{StoredColnum})) { |
180
|
8
|
100
|
|
|
|
27
|
$line = exists($segment->{StoredCollen}) |
181
|
|
|
|
|
|
|
? substr($line, $segment->{StoredColnum}, $segment->{StoredCollen}) |
182
|
|
|
|
|
|
|
: substr($line, $segment->{StoredColnum}); |
183
|
|
|
|
|
|
|
} |
184
|
|
|
|
|
|
|
|
185
|
122
|
100
|
|
|
|
236
|
if (exists($segment->{StoredRegexnum})) { |
186
|
17
|
|
|
|
|
26
|
my $renum = $segment->{StoredRegexnum}; |
187
|
|
|
|
|
|
|
|
188
|
17
|
100
|
66
|
|
|
79
|
if ($renum > $#regex_results or !defined($regex_results[$renum])) { |
189
|
|
|
|
|
|
|
# First time for this regex; cache results. |
190
|
8
|
|
|
|
|
19
|
my $regex = $self->{_regexes}->[$renum]; |
191
|
8
|
|
|
|
|
93
|
my @fields = $line =~ /$regex/; |
192
|
|
|
|
|
|
|
|
193
|
8
|
|
|
|
|
21
|
$regex_results[$renum] = \@fields; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
# XXX Complain if not present? |
197
|
17
|
|
|
|
|
40
|
$line = $regex_results[$renum]->[$segment->{StoredFieldnum}]; |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
122
|
|
|
|
|
320
|
$display{$segment->{DisplayName}} = $line; |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
20
|
|
|
|
|
63
|
$self->normalize(\%display); # XXX Do something with return value? |
204
|
20
|
|
|
|
|
66
|
return \%display; |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=head2 storage |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
C<$parser-Estorage($display)> makes country-dependent checks against the |
210
|
|
|
|
|
|
|
postal address in %$display. If it passes all the checks, |
211
|
|
|
|
|
|
|
C<$parser-Estorage($display)> converts it to a format suitable for storage |
212
|
|
|
|
|
|
|
and returns a reference to an array. Otherwise, |
213
|
|
|
|
|
|
|
C<$parser-Estorage($display)> returns a string representing an error |
214
|
|
|
|
|
|
|
message. |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
If %$display doesn't contain an address in the country $parser is an instance |
217
|
|
|
|
|
|
|
of, weird results are nearly certain. |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=cut |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
sub storage { |
222
|
20
|
|
|
20
|
1
|
3950
|
my ($self, $display) = @_; |
223
|
20
|
|
|
|
|
24
|
my (@storage, @storage_bottom); |
224
|
|
|
|
|
|
|
|
225
|
20
|
|
|
|
|
26
|
foreach my $field (@{$self->{_format}}) { |
|
20
|
|
|
|
|
51
|
|
226
|
132
|
50
|
100
|
|
|
728
|
if (ref($field) && (@$field >= 3) |
|
|
|
66
|
|
|
|
|
227
|
|
|
|
|
|
|
&& ($display->{$field->[0]} !~ $field->[2])) { |
228
|
0
|
|
|
|
|
0
|
return "$field->[0]: missing or incorrect value"; # XXX be more specific? |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
20
|
50
|
|
|
|
51
|
if (my $errmsg = $self->normalize($display)) { return $errmsg; } |
|
0
|
|
|
|
|
0
|
|
233
|
|
|
|
|
|
|
|
234
|
20
|
|
|
|
|
24
|
foreach my $segment (@{$self->{_d2s_map}}) { |
|
20
|
|
|
|
|
51
|
|
235
|
111
|
|
|
|
|
467
|
my $line = $segment->{StoredTemplate}; |
236
|
111
|
|
|
|
|
142
|
my $rownum = $segment->{StoredRownum}; |
237
|
|
|
|
|
|
|
|
238
|
111
|
|
|
|
|
487
|
$line =~ s/\${([^{}]+)}/$display->{$1}/eg; |
|
122
|
|
|
|
|
395
|
|
239
|
|
|
|
|
|
|
|
240
|
111
|
100
|
|
|
|
214
|
if ($rownum < 0) { |
241
|
29
|
|
|
|
|
77
|
$storage_bottom[1 - $rownum] = $line; |
242
|
|
|
|
|
|
|
} else { |
243
|
82
|
|
|
|
|
199
|
$storage[$rownum] = $line; |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
20
|
50
|
|
|
|
45
|
@storage = grep { defined($_) && $_ } @storage; |
|
82
|
|
|
|
|
285
|
|
248
|
20
|
100
|
|
|
|
32
|
@storage_bottom = grep { defined($_) && $_ } @storage_bottom; |
|
67
|
|
|
|
|
192
|
|
249
|
20
|
|
|
|
|
36
|
push @storage, reverse @storage_bottom; |
250
|
20
|
|
|
|
|
69
|
return \@storage; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=head2 label |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
C<$parser-Elabel($origin_country, $recipient, $address)> returns a |
256
|
|
|
|
|
|
|
reference to an array containing an address label suitable for correspondance |
257
|
|
|
|
|
|
|
from a sender in $origin_country (2-letter ISO 3166 code) to $recipient (can be |
258
|
|
|
|
|
|
|
a string or an array reference, eg ["Aby's Auto Repair", "Kell Dewclaw"]) at |
259
|
|
|
|
|
|
|
$address (as returned from C<$parser-Estorage()>) in the country for |
260
|
|
|
|
|
|
|
$parser. |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
The default version just tacks on the name of the destination country, if not |
263
|
|
|
|
|
|
|
the same as the origin country. |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
=cut |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
sub label { |
268
|
20
|
|
|
20
|
1
|
8041
|
my ($self, $origin_country, $recipient, $address) = @_; |
269
|
20
|
|
|
|
|
29
|
my @label; |
270
|
|
|
|
|
|
|
|
271
|
20
|
50
|
|
|
|
38
|
if (ref $recipient) { @label = @$recipient; } else { @label = ($recipient); } |
|
0
|
|
|
|
|
0
|
|
|
20
|
|
|
|
|
44
|
|
272
|
20
|
|
|
|
|
50
|
push @label, @$address; |
273
|
20
|
50
|
|
|
|
56
|
if ($origin_country ne $self->{_country_code}) { |
274
|
20
|
|
|
|
|
58
|
push @label, code2country($self->{_country_code}); |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
|
277
|
20
|
|
|
|
|
804
|
return \@label; |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=head2 option |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
C<$parser-Eoption($name [ , $value] )> returns the setting of option $name |
283
|
|
|
|
|
|
|
for parser $parser, after changing it to $value if specified. |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
Available options and meaningful values for each option depend on the country |
286
|
|
|
|
|
|
|
$parser is for. |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
=cut |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
sub option { |
291
|
0
|
|
|
0
|
1
|
0
|
my ($self, $name, $value) = @_; |
292
|
|
|
|
|
|
|
|
293
|
0
|
0
|
|
|
|
0
|
if (defined $value) { $self->{_options}->{$name} = $value; } |
|
0
|
|
|
|
|
0
|
|
294
|
0
|
|
|
|
|
0
|
return $self->{_options}->{$name}; |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
=head2 normalize |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
C<$parser-Enormalize($display)> normalizes the address in %$display by |
300
|
|
|
|
|
|
|
tweaking unambiguous but technically incorrect elements. It can also, if |
301
|
|
|
|
|
|
|
needed, check it for validity and return an error message. If no problems were |
302
|
|
|
|
|
|
|
found, it should return "". |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
This method is called from within C and C, and users of |
305
|
|
|
|
|
|
|
this module shouldn't normally need to call it directly. It exists so it can be |
306
|
|
|
|
|
|
|
overridden in subclasses. The default version does nothing. |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
=cut |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
sub normalize { |
311
|
40
|
|
|
40
|
1
|
78
|
return ""; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
=head1 INTERNALS |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
Unless you plan to add a country or change the format information for a |
317
|
|
|
|
|
|
|
country, either directly in the base class (this) or as a subclass, you can |
318
|
|
|
|
|
|
|
safely skip this. (But if you're curious, feel free to read on.) |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
%per_country_data is a hash using the 2-letter ISO 3166-1 country code as the |
321
|
|
|
|
|
|
|
key. The value is a hash reference ($hr in the following description) with the |
322
|
|
|
|
|
|
|
following fields: |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
=over 4 |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
=item _format |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
This array reference is actually what C<$parser-Eformat()> returns. |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
Each element can be a number n > 0, hinting that the next n fields should be on |
331
|
|
|
|
|
|
|
the same line, if the terminal or window width allows it, but otherwise |
332
|
|
|
|
|
|
|
ignored. Otherwise, it is an array reference describing a single field of the |
333
|
|
|
|
|
|
|
address, and has the following elements: |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=over 4 |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=item 0 |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
The name of a field. For maximum compatibility with form description languages |
340
|
|
|
|
|
|
|
(including the forms part of HTML), this should match /^w+$/ in the C locale, |
341
|
|
|
|
|
|
|
but this module only requires that it not contain {}. The name should be |
342
|
|
|
|
|
|
|
present in C |
343
|
|
|
|
|
|
|
_s2d_map below). |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
=item 1 |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
Can be a number E 0, indicating the maximum length of a text field, or a |
348
|
|
|
|
|
|
|
hash of { stored =E displayed } mappings, indicating an enumerated field. |
349
|
|
|
|
|
|
|
(Note that in the latter case, the order and layout of the values are left to |
350
|
|
|
|
|
|
|
the discretion of the user of this module.) |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
=item 2 |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
An optional validation regex can also be specified. If present, it should be |
355
|
|
|
|
|
|
|
compatible with both perl and javascript, so it can be used in both client-side |
356
|
|
|
|
|
|
|
and server-side programs or modules. Note that although most regexes would be |
357
|
|
|
|
|
|
|
anchored at both ends, this isn't required or enforced. |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
=back |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
=item _s2d_map |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
(storage-to-display map) This is an array of hash references, each describing |
364
|
|
|
|
|
|
|
how to retrieve the value of one display field from the stored unstructured |
365
|
|
|
|
|
|
|
text strings. Each element has the following fields: |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
=over 4 |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=item StoredRownum |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
(stored row number) The row in the array of text lines where the field is. That |
372
|
|
|
|
|
|
|
number is used as a perl-style array index (E=0 from the start, E 0 |
373
|
|
|
|
|
|
|
back from the end), except that on any given unstructured address, if there |
374
|
|
|
|
|
|
|
aren't enough rows to map to both positive and negative indices without |
375
|
|
|
|
|
|
|
overlap, the positive indices that would actually map to a row overlapping the |
376
|
|
|
|
|
|
|
region starting with the negative index having the largest absolute value and |
377
|
|
|
|
|
|
|
going to the end of the array are considered to return "" instead of the actual |
378
|
|
|
|
|
|
|
row. In other words, using the array of lines qw(eenie meenie minie moe), |
379
|
|
|
|
|
|
|
indexes -2 0 1 2 3 would return "minie", "eenie", "meenie", "", "" (even though |
380
|
|
|
|
|
|
|
there is no -1 that would return "moe"). |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
=item StoredColnum |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
(stored column number) The optional column in the line where the field (or |
385
|
|
|
|
|
|
|
regex input) starts, from 0 for the first column. If absent, the field (or |
386
|
|
|
|
|
|
|
regex input) is the whole line, even if StoredCollen is present. Note that |
387
|
|
|
|
|
|
|
StoredColnum can be negative (with the expected result for the second argument |
388
|
|
|
|
|
|
|
to L), but if so, there's no special handling, unlike |
389
|
|
|
|
|
|
|
for StoredRownum. |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
=item StoredCollen |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
(stored column length) The optional length of the field (or regex input). If |
394
|
|
|
|
|
|
|
absent or if StoredColnum is absent, the field (or regex input) extends to the |
395
|
|
|
|
|
|
|
end of the line. Note that StoredCollen can be negative (with the expected |
396
|
|
|
|
|
|
|
result for the third argument to L), but if so, there's |
397
|
|
|
|
|
|
|
no special handling, unlike for StoredRownum. |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
=item StoredRegexnum |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
(stored regex number) The optional index of a regular expression in |
402
|
|
|
|
|
|
|
C<@{$hr-E{_regexes}}> to be matched against the line (or the substring |
403
|
|
|
|
|
|
|
selected by StoredColnum and StoredCollen if applicable) to extract the field |
404
|
|
|
|
|
|
|
value from it. See the description of _regexes below for important restrictions |
405
|
|
|
|
|
|
|
on regex use. |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
=item StoredFieldnum |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
(stored field number) The optional index into the array returned by the regex |
410
|
|
|
|
|
|
|
matching mentioned above of the data to be returned as the field value. Note |
411
|
|
|
|
|
|
|
that if StoredRegexnum is present, StoredFieldnum must be present too. |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
=item DisplayName |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
(display (field) name) The name of a field in C<@{$hr-E{_format}}>. This is |
416
|
|
|
|
|
|
|
also the key used in the record hash returned by C<$parser-Edisplay()>. |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
=back |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
Note that although StoredColnum, StoredCollen, StoredRegexnum, and |
421
|
|
|
|
|
|
|
StoredFieldnum are all optional, not all combinations make sense. Specifically: |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
=over 4 |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=item * |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
At least one of StoredColnum and StoredRegexnum must be present; if both are, |
428
|
|
|
|
|
|
|
StoredColnum (and StoredCollen if also present) are used before StoredRegexnum |
429
|
|
|
|
|
|
|
and StoredFieldnum. |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
=item * |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
If StoredCollen is present without StoredColnum, it is ignored. |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
=item * |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
If StoredRegexnum is present, StoredFieldnum must be present too; if |
438
|
|
|
|
|
|
|
StoredFieldnum is present without StoredRegexnum, it is ignored. |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
=back |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
=item _s2d_map |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
(display-to-storage map) This is an array of hash references, each describing |
445
|
|
|
|
|
|
|
how to generate one line of the unstructured string array used for storage from |
446
|
|
|
|
|
|
|
the parsed fields used for display. Each element has the following fields: |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
=over 4 |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
=item StoredTemplate |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
(stored template) A string containing boilerplate text and field references of |
453
|
|
|
|
|
|
|
the form ${foo} for field foo (using the field names in _format and _s2d_map). |
454
|
|
|
|
|
|
|
Currently, there is no way to escape $, {, or } if they're part of a sequence |
455
|
|
|
|
|
|
|
that could be interpreted as a field reference. |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=item StoredRownum |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
(stored row number) A number that indicates in which row of the unstructured |
460
|
|
|
|
|
|
|
storage string array this should go. This can be positive, 0, or negative, with |
461
|
|
|
|
|
|
|
the same intended meaning as for _s2dmap, except than while putting the array |
462
|
|
|
|
|
|
|
together, it grows in the middle as necessary to accomodate positive indexes. |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
=back |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
=item _regexes |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
(regular expressions) A reference to an array of strings representing regexes, |
469
|
|
|
|
|
|
|
in any form perl will accept (single-quoted, double-quoted, qr//, etc...) for |
470
|
|
|
|
|
|
|
use in parsing unstructured storage strings into structured display fields. |
471
|
|
|
|
|
|
|
Note that each regex is matched at most once in the course of a single |
472
|
|
|
|
|
|
|
invocation to C<$hr-Edisplay()>, and its results cached for reuse. This is |
473
|
|
|
|
|
|
|
true even if a subsequent match would use another string than the first. In |
474
|
|
|
|
|
|
|
practice, this isn't a problem, as a given regex would normally be applied to |
475
|
|
|
|
|
|
|
one storage line only. However, if this isn't the case, that regex must be |
476
|
|
|
|
|
|
|
repeated, each line pointing (through StoredRegexnum) to its own copy. |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
=back |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
%default_per_country_data is similar, but for countries with unspecified |
481
|
|
|
|
|
|
|
address formats. It's a single hash with the same structure as %$hr above. |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
Cnew()> initializes the object hash with those |
484
|
|
|
|
|
|
|
fields, and adds a _country_code field that holds the 2-letter code, in case we |
485
|
|
|
|
|
|
|
need to retrieve other info later. |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
Note that the above applies to the base class only. Subclasses may use other or |
488
|
|
|
|
|
|
|
different data, instead of or in addition to this. |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
=cut |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
%default_per_country_data = ( |
493
|
|
|
|
|
|
|
_format => [ |
494
|
|
|
|
|
|
|
['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], ['Addr5', 40] |
495
|
|
|
|
|
|
|
], |
496
|
|
|
|
|
|
|
_s2d_map => [ |
497
|
|
|
|
|
|
|
{StoredRownum => 0, DisplayName => 'Addr1'}, |
498
|
|
|
|
|
|
|
{StoredRownum => 1, DisplayName => 'Addr2'}, |
499
|
|
|
|
|
|
|
{StoredRownum => 2, DisplayName => 'Addr3'}, |
500
|
|
|
|
|
|
|
{StoredRownum => 3, DisplayName => 'Addr4'}, |
501
|
|
|
|
|
|
|
{StoredRownum => 4, DisplayName => 'Addr5'} |
502
|
|
|
|
|
|
|
], |
503
|
|
|
|
|
|
|
_d2s_map => [ |
504
|
|
|
|
|
|
|
{StoredTemplate => '${Addr1}', StoredRownum => 0}, |
505
|
|
|
|
|
|
|
{StoredTemplate => '${Addr2}', StoredRownum => 1}, |
506
|
|
|
|
|
|
|
{StoredTemplate => '${Addr3}', StoredRownum => 2}, |
507
|
|
|
|
|
|
|
{StoredTemplate => '${Addr4}', StoredRownum => 3}, |
508
|
|
|
|
|
|
|
{StoredTemplate => '${Addr5}', StoredRownum => 4}, |
509
|
|
|
|
|
|
|
] |
510
|
|
|
|
|
|
|
); |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
# District name, no city or postcode: Albania, Angola, Bahamas, United Arab |
513
|
|
|
|
|
|
|
# Emirates |
514
|
|
|
|
|
|
|
# XXX What are districts called in AL & AO? (eg, state/province/county...) |
515
|
|
|
|
|
|
|
# XXX Andorra subcountries missing from Locale::SubCountry |
516
|
|
|
|
|
|
|
# XXX Aruba subcountries missing from Locale::SubCountry |
517
|
|
|
|
|
|
|
# XXX Bhutan subcountries missing from Locale::SubCountry or not the right ones |
518
|
|
|
|
|
|
|
# XXX Grenada districts missing from Locale::SubCountry (also, West Indies?) |
519
|
|
|
|
|
|
|
# XXX Nauru (NR) missing from Locale::SubCountry |
520
|
|
|
|
|
|
|
foreach my $spec ((['AE', 'Emirate'], ['AL', 'District'], ['AO', 'District'], |
521
|
|
|
|
|
|
|
['BS', 'Island'])) { |
522
|
|
|
|
|
|
|
my ($code, $district) = @$spec; |
523
|
|
|
|
|
|
|
if (exists($per_country_data{$code})) { |
524
|
|
|
|
|
|
|
die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n"; |
525
|
|
|
|
|
|
|
} |
526
|
|
|
|
|
|
|
my $subctry = Locale::SubCountry->new($code); |
527
|
|
|
|
|
|
|
$per_country_data{$code} = { |
528
|
|
|
|
|
|
|
_format => [ |
529
|
|
|
|
|
|
|
['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], |
530
|
|
|
|
|
|
|
[$district, { map { $_ => $_ } $subctry->all_full_names() } ] |
531
|
|
|
|
|
|
|
], |
532
|
|
|
|
|
|
|
_s2d_map => [ |
533
|
|
|
|
|
|
|
{StoredRownum => 0, DisplayName => 'Addr1'}, |
534
|
|
|
|
|
|
|
{StoredRownum => 1, DisplayName => 'Addr2'}, |
535
|
|
|
|
|
|
|
{StoredRownum => 2, DisplayName => 'Addr3'}, |
536
|
|
|
|
|
|
|
{StoredRownum => 3, DisplayName => 'Addr4'}, |
537
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => $district} |
538
|
|
|
|
|
|
|
], |
539
|
|
|
|
|
|
|
_d2s_map => [ |
540
|
|
|
|
|
|
|
{StoredTemplate => '${Addr1}', StoredRownum => 0}, |
541
|
|
|
|
|
|
|
{StoredTemplate => '${Addr2}', StoredRownum => 1}, |
542
|
|
|
|
|
|
|
{StoredTemplate => '${Addr3}', StoredRownum => 2}, |
543
|
|
|
|
|
|
|
{StoredTemplate => '${Addr4}', StoredRownum => 3}, |
544
|
|
|
|
|
|
|
{StoredTemplate => "\$\{$district\}", StoredRownum => -1} |
545
|
|
|
|
|
|
|
] |
546
|
|
|
|
|
|
|
}; |
547
|
|
|
|
|
|
|
} |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
# Postcode (and postcode prefix) left of city, no district: Algeria, Andorra, |
550
|
|
|
|
|
|
|
# Argentina, Armenia, Austria, Azerbaijan, Belarus, Belgium, Bosnia and |
551
|
|
|
|
|
|
|
# Herzegovina, Bulgaria, China, Costa Rica, Croatia, Cuba, Cyprus, Czech |
552
|
|
|
|
|
|
|
# Republic, Denmark, Estonia, Ethiopia, Faroe, Finland, France, Gabon, Georgia, |
553
|
|
|
|
|
|
|
# Germany, Guatemala, Guinea Bissau, Haiti, Iceland, Iran, Israel, Kuwait, |
554
|
|
|
|
|
|
|
# Kyrgyzstan, Lao, Liberia, Liechtenstein, Lithuania, Luxembourg, Madagascar, |
555
|
|
|
|
|
|
|
# Moldova, Monaco, Morocco, New Caledonia, Niger, Norway, Paraguay, |
556
|
|
|
|
|
|
|
# Philippines, Romania, Russian Federation, San Marino, Senegal, Serbia and |
557
|
|
|
|
|
|
|
# Montenegro, Slovenia, Spain, Tajikistan, Former Yugoslav Republic of |
558
|
|
|
|
|
|
|
# Macedonia, Tunisia, Turkey, Turkmenistan, Vatican, Zambia. |
559
|
|
|
|
|
|
|
# XXX Aruba may be here as part of the Netherlands. |
560
|
|
|
|
|
|
|
# XXX I require Andorra postcodes to start with AD (uppercase). A better |
561
|
|
|
|
|
|
|
# approach may be to make them optional and have normalize add them if needed. |
562
|
|
|
|
|
|
|
# XXX Armenia may need 6-digit postcodes, not 4. |
563
|
|
|
|
|
|
|
# XXX Austria may not require, or forbid, the A- postcode prefix. |
564
|
|
|
|
|
|
|
# XXX Azerbaijan may need 6-digit postcodes, not 4. |
565
|
|
|
|
|
|
|
# XXX Belarus addresses used to be upside down, but no longer. Thanks to |
566
|
|
|
|
|
|
|
# LeiaCat for the information. |
567
|
|
|
|
|
|
|
# XXX Belgium may not require, or forbid, the B- postcode prefix. |
568
|
|
|
|
|
|
|
# XXX China postcodes may be after city, not before. |
569
|
|
|
|
|
|
|
# XXX Croatia may use county code as first 2 digits of postcode. |
570
|
|
|
|
|
|
|
# XXX Cuba may use county/district code as first 2 digits of postcode, but the |
571
|
|
|
|
|
|
|
# district codes Locale::SubCountry gives are inconsistent with the sample |
572
|
|
|
|
|
|
|
# addresses. |
573
|
|
|
|
|
|
|
# XXX Cyprus may need a CY- postcode prefix. |
574
|
|
|
|
|
|
|
# XXX Denmark may want a DK- postcode prefix. |
575
|
|
|
|
|
|
|
# XXX Estonia may want EE- postcode prefix. Also, after postcode is city or |
576
|
|
|
|
|
|
|
# region, but Locale::SubCountry only has regions, no cities. |
577
|
|
|
|
|
|
|
# XXX Finland wants a different postcode prefix and an extra line for addresses |
578
|
|
|
|
|
|
|
# in the Åland Islands. Forget about it for now? |
579
|
|
|
|
|
|
|
# XXX I require Faroe postcodes to start with AD (uppercase). A better approach |
580
|
|
|
|
|
|
|
# may be to make them optional and have normalize add them if needed. |
581
|
|
|
|
|
|
|
# XXX France apparently no longer uses numeric codes for its outlying bits, but |
582
|
|
|
|
|
|
|
# I'm not sure which (if any) alpha codes are valid. Note that I don't |
583
|
|
|
|
|
|
|
# uppercase the 2A... and 2B... postcodes used for Corsica. Also, may want a F- |
584
|
|
|
|
|
|
|
# or FR- postcode prefix. |
585
|
|
|
|
|
|
|
# XXX French Guiana is here, as part of France. |
586
|
|
|
|
|
|
|
# XXX Ditto for French Polynesia, except that it may want its own country name. |
587
|
|
|
|
|
|
|
# If it does, handle with normalize(). (Can't make it its own country, even |
588
|
|
|
|
|
|
|
# though it has an ISO 3166-1 entry, as it's missing from Locale::Country and |
589
|
|
|
|
|
|
|
# Locale::SubCountry.) |
590
|
|
|
|
|
|
|
# XXX Georgia may want 6-digit postcode, not 4. |
591
|
|
|
|
|
|
|
# XXX Germany doesn't want a postcode prefix anymore, apparently. |
592
|
|
|
|
|
|
|
# XXX Greenland may be part of Denmark, or it may use the same format but with |
593
|
|
|
|
|
|
|
# its own country name. Go for the former. |
594
|
|
|
|
|
|
|
# XXX Guadeloupe is here, as part of France. |
595
|
|
|
|
|
|
|
# XXX I require Haiti postcodes to start with HT (uppercase). A better approach |
596
|
|
|
|
|
|
|
# may be to make them optional and have normalize() add them if needed. Also, |
597
|
|
|
|
|
|
|
# the numeric part may be further constrained, but I don't have a complete list |
598
|
|
|
|
|
|
|
# of postcodes. |
599
|
|
|
|
|
|
|
# XXX Iceland postcodes may be further constrained (first digit 0-8) and may |
600
|
|
|
|
|
|
|
# need an IS- postcode prefix. |
601
|
|
|
|
|
|
|
# XXX Israel may use a IL- postcode prefix. |
602
|
|
|
|
|
|
|
# XXX Kyrgyzstan seems to want addresses upside down, with recipient just above |
603
|
|
|
|
|
|
|
# country. If it does, handle it with label() for now. Not ideal. |
604
|
|
|
|
|
|
|
# XXX Liechtenstein may have the postcode right of city, not left. Also, using |
605
|
|
|
|
|
|
|
# the Switzerland/CH format, with Liechtenstein/FL (not LI?) instead. |
606
|
|
|
|
|
|
|
# XXX Luxembourg may use a L- or LU- postcode prefix. |
607
|
|
|
|
|
|
|
# XXX Mayotte is here, as part of France. |
608
|
|
|
|
|
|
|
# XXX Macedonia postcodes may be 5 digits with a MK- prefix, not 4 digits and |
609
|
|
|
|
|
|
|
# prefixless. |
610
|
|
|
|
|
|
|
# XXX Monaco postcodes may be more constrained than 5 digits. Also, it may use |
611
|
|
|
|
|
|
|
# a MC- postcode prefix. |
612
|
|
|
|
|
|
|
# XXX Martinique is here, as part of France. |
613
|
|
|
|
|
|
|
# XXX New Caledonia is just like France, except with its own country name and |
614
|
|
|
|
|
|
|
# postcodes starting in 988. |
615
|
|
|
|
|
|
|
# XXX Norway may use a N- or NO- postcode prefix. |
616
|
|
|
|
|
|
|
# XXX Philippines may be using district/province instead of city, or either |
617
|
|
|
|
|
|
|
# district/province or city, but I'm not sure which, and neither Manila nor |
618
|
|
|
|
|
|
|
# Metro Manila are in the Locale::SubCountry.pm list, so pretend it's a city. |
619
|
|
|
|
|
|
|
# XXX Réunion is here, as part of France. |
620
|
|
|
|
|
|
|
# XXX Russian Federation prefers addresses upside down for local use, but can |
621
|
|
|
|
|
|
|
# handle the more common format in international mail. Also, it seems to want |
622
|
|
|
|
|
|
|
# postcodes under the country name, but we don't do that. Put it left of the |
623
|
|
|
|
|
|
|
# city for now, although below city (and above country) may be a better place. |
624
|
|
|
|
|
|
|
# Saint Pierre and Miquelon is (are?) here, as part of France. |
625
|
|
|
|
|
|
|
# XXX San Marino may have a single city, as Singapore. Also, all postcodes |
626
|
|
|
|
|
|
|
# start with 4789. |
627
|
|
|
|
|
|
|
# XXX Spain may want the province name between () after the city name for some |
628
|
|
|
|
|
|
|
# cities. Leave it here until I know more. |
629
|
|
|
|
|
|
|
# XXX Svalbard and Jan Mayen Island may be here, as part of Norway. |
630
|
|
|
|
|
|
|
# XXX Switzerland may want a CH postcode prefix. Also, ambiguous cities may |
631
|
|
|
|
|
|
|
# need district (canton) code postfixed. Let users enter it as part of city if |
632
|
|
|
|
|
|
|
# needed (same as post office number). |
633
|
|
|
|
|
|
|
# XXX Tajikistan may have a district code before the postcode, but the list in |
634
|
|
|
|
|
|
|
# Locale::SubCountry looks incomplete. |
635
|
|
|
|
|
|
|
# XXX Tunisia may use a TN- postcode prefix. |
636
|
|
|
|
|
|
|
# XXX Turkmenistan wants addresses in the postcode+city, country, name, street |
637
|
|
|
|
|
|
|
# order? |
638
|
|
|
|
|
|
|
# XXX Vatican actually has a single postcode. (It's part of the Italian postal |
639
|
|
|
|
|
|
|
# system, but doesn't has a province appended, and has its own country name.) |
640
|
|
|
|
|
|
|
# Wallis and Futuna is here as part of France. |
641
|
|
|
|
|
|
|
foreach my $spec |
642
|
|
|
|
|
|
|
((['DZ', 5, qr/^(0[1-9]|[1-3][0-9]|4[0-8])\d{3}$/, ''], |
643
|
|
|
|
|
|
|
['FR', 5, qr/^([02][1-9]|[13-8][0-9]|2[AB]|9[0-578])\d{3}$/, ''], |
644
|
|
|
|
|
|
|
['AD', 5, qr/^AD\d{3}$/, ''], ['AM', 4, qr/^\d{4}$/, ''], |
645
|
|
|
|
|
|
|
['AR', 8, qr/^\w\d{4}\w{3}$/, ''], ['AT', 4, qr/^\d{4}$/, 'A-'], |
646
|
|
|
|
|
|
|
['AZ', 4, qr/^\d{4}$/, 'AZ'], ['BY', 6, qr/^\d{6}$/, ''], |
647
|
|
|
|
|
|
|
['BE', 4, qr/^\d{4}$/, 'B-'], ['BA', 5, qr/^\d{5}$/, ''], |
648
|
|
|
|
|
|
|
['BG', 4, qr/^\d{4}$/, 'BG-'], ['CN', 6, qr/^\d{6}$/, ''], |
649
|
|
|
|
|
|
|
['CR', 4, qr/^\d{4}$/, ''], ['HR', 5, qr/^\d{5}$/, 'HR-'], |
650
|
|
|
|
|
|
|
['CU', 5, qr/^\d{5}$/, 'CP '], ['CY', 4, qr/^\d{4}$/, ''], |
651
|
|
|
|
|
|
|
['DK', 4, qr/^\d{4}$/, ''], ['EE', 5, qr/^\d{5}$/, ''], |
652
|
|
|
|
|
|
|
['ET', 4, qr/^\d{4}$/, ''], ['FI', 5, qr/^\d{5}$/, 'FI-'], |
653
|
|
|
|
|
|
|
['FO', 5, qr/^FO\d{3}$/, ''], ['GA', 2, qr/^\d\d$/, ''], |
654
|
|
|
|
|
|
|
['GE', 4, qr/^\d{4}$/, ''], ['DE', 5, qr/^\d{5}$/, ''], |
655
|
|
|
|
|
|
|
['GT', 5, qr/^\d{5}$/, ''], ['GW', 4, qr/^\d{4}$/, ''], |
656
|
|
|
|
|
|
|
['HT', 6, qr/^HT\d{4}$/, ''], ['IS', 3, qr/^\d{3}$/, ''], |
657
|
|
|
|
|
|
|
['IR', 10, qr/^\d{10}$/, ''], ['IL', 5, qr/^\d{5}$/, ''], |
658
|
|
|
|
|
|
|
['KW', 5, qr/^\d{5}$/, ''], ['KG', 6, qr/^\d{6}$/, ''], |
659
|
|
|
|
|
|
|
['LA', 5, qr/^\d{5}$/, ''], ['LI', 4, qr/^\d{4}$/, 'FL-'], |
660
|
|
|
|
|
|
|
['LR', 4, qr/^\d{4}$/, ''], ['LT', 5, qr/^\d{5}$/, 'LT-'], |
661
|
|
|
|
|
|
|
['LU', 4, qr/^\d{4}$/, 'L-'], ['MG', 3, qr/^\d{3}$/, ''], |
662
|
|
|
|
|
|
|
['MD', 5, qr/^\d{5}$/, 'MD-'], ['MC', 5, qr/^\d{5}$/, ''], |
663
|
|
|
|
|
|
|
['MA', 5, qr/^\d{5}$/, ''], ['NC', 5, qr/^988\d\d$/, ''], |
664
|
|
|
|
|
|
|
['NE', 4, qr/^\d{4}$/, ''], ['NO', 4, qr/^\d{4}$/, ''], |
665
|
|
|
|
|
|
|
['PH', 4, qr/^\d{4}$/, ''], ['PY', 4, qr/^[1-9]\d{3}$/, ''], |
666
|
|
|
|
|
|
|
['RO', 6, qr/^\d{6}$/, ''], ['RU', 6, qr/^\d{6}$/, ''], |
667
|
|
|
|
|
|
|
['SM', 5, qr/^4789\d$/, ''], ['SN', 5, qr/^\d{5}$/, ''], |
668
|
|
|
|
|
|
|
['CS', 5, qr/^\d{5}$/, ''], ['SI', 4, qr/^\d{4}$/, ''], |
669
|
|
|
|
|
|
|
['ES', 5, qr/^\d{5}$/, ''], ['CH', 4, qr/^\d{4}$/, ''], |
670
|
|
|
|
|
|
|
['TJ', 6, qr/^\d{6}$/, ''], ['MK', 4, qr/^\d{4}$/, ''], |
671
|
|
|
|
|
|
|
['TN', 4, qr/^\d{4}$/, ''], ['TR', 5, qr/^\d{5}$/, ''], |
672
|
|
|
|
|
|
|
['TM', 6, qr/^\d{6}$/, ''], ['VA', 5, qr/^00120$/, ''], |
673
|
|
|
|
|
|
|
['ZM', 5, qr/^\d{5}$/, ''])) { |
674
|
|
|
|
|
|
|
my ($code, $postcode_len, $postcode_re, $postcode_pfx) = @$spec; |
675
|
|
|
|
|
|
|
if (exists($per_country_data{$code})) { |
676
|
|
|
|
|
|
|
die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n"; |
677
|
|
|
|
|
|
|
} |
678
|
|
|
|
|
|
|
$per_country_data{$code} = { |
679
|
|
|
|
|
|
|
_format => [ |
680
|
|
|
|
|
|
|
['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], 2, |
681
|
|
|
|
|
|
|
['Postcode', $postcode_len, $postcode_re], ['City', 40], |
682
|
|
|
|
|
|
|
], |
683
|
|
|
|
|
|
|
_s2d_map => [ |
684
|
|
|
|
|
|
|
{StoredRownum => 0, DisplayName => 'Addr1'}, |
685
|
|
|
|
|
|
|
{StoredRownum => 1, DisplayName => 'Addr2'}, |
686
|
|
|
|
|
|
|
{StoredRownum => 2, DisplayName => 'Addr3'}, |
687
|
|
|
|
|
|
|
{StoredRownum => 3, DisplayName => 'Addr4'}, |
688
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'Postcode', |
689
|
|
|
|
|
|
|
StoredColnum => length($postcode_pfx), StoredCollen => $postcode_len}, |
690
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'City', |
691
|
|
|
|
|
|
|
StoredColnum => length($postcode_pfx) + $postcode_len + 1} |
692
|
|
|
|
|
|
|
], |
693
|
|
|
|
|
|
|
_d2s_map => [ |
694
|
|
|
|
|
|
|
{StoredTemplate => '${Addr1}', StoredRownum => 0}, |
695
|
|
|
|
|
|
|
{StoredTemplate => '${Addr2}', StoredRownum => 1}, |
696
|
|
|
|
|
|
|
{StoredTemplate => '${Addr3}', StoredRownum => 2}, |
697
|
|
|
|
|
|
|
{StoredTemplate => '${Addr4}', StoredRownum => 3}, |
698
|
|
|
|
|
|
|
{StoredTemplate => "$postcode_pfx\$\{Postcode\} \$\{City\}", |
699
|
|
|
|
|
|
|
StoredRownum => -1} |
700
|
|
|
|
|
|
|
] |
701
|
|
|
|
|
|
|
}; |
702
|
|
|
|
|
|
|
} |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
# Variable length postcode (and postcode prefix) left of city, no district: |
705
|
|
|
|
|
|
|
# Chile, Czech Republic, Dominican Republic, Greece, Guinea, Netherlands, |
706
|
|
|
|
|
|
|
# Poland, Portugal, Slovakia, Sweden |
707
|
|
|
|
|
|
|
# XXX Czech Republic wants a space after the 3rd digit of the postcode. Also, |
708
|
|
|
|
|
|
|
# it may want CZ- as a postcode prefix. |
709
|
|
|
|
|
|
|
# XXX Dominican Republic wants a - after the 5th digit of the postcode. |
710
|
|
|
|
|
|
|
# XXX Greece doesn't want a postcode prefix anymore, apparently. |
711
|
|
|
|
|
|
|
# XXX Guinea postcodes include a PO box. 12 leaves room for 5-digits PO box #s. |
712
|
|
|
|
|
|
|
# Also, I don't try to normalize postcodes. |
713
|
|
|
|
|
|
|
# XXX Netherlands may want a NL- postcode prefix. |
714
|
|
|
|
|
|
|
# XXX Netherlands Antilles are here, as part of the Netherlands. |
715
|
|
|
|
|
|
|
foreach my $spec |
716
|
|
|
|
|
|
|
((['CL', 8, qr/^\d{3}[-\s]*\d{4}$/, '${Postcode} ${City}', |
717
|
|
|
|
|
|
|
qr/^(\d{3}[-\s]?\d{4})\s+(.+)$/], |
718
|
|
|
|
|
|
|
['CZ', 6, qr/^\d\d\s?\d{3}$/, '${Postcode} ${City}', |
719
|
|
|
|
|
|
|
qr/^(\d{3}\s*\d\d)\s+(.+)$/], |
720
|
|
|
|
|
|
|
['DO', 10, qr/^\d{5}[-\s]*\d{4}$/, '${Postcode} ${City}', |
721
|
|
|
|
|
|
|
qr/^(\d{5}[-\s]*\d{4})\s+(.+)$/], |
722
|
|
|
|
|
|
|
['GR', 6, qr/^\d{3}\s*\d\d$/, '${Postcode} ${City}', |
723
|
|
|
|
|
|
|
qr/^(\d{3}\s*\d\d)\s+(.+)$/], |
724
|
|
|
|
|
|
|
['GN', 12, qr/^[0-4]\d\d\s*BP\s*\d+$/i, '${Postcode} ${City}', |
725
|
|
|
|
|
|
|
qr/^([0-4]\d\d\s*BP\s*\d+)\s+(.+)$/i], |
726
|
|
|
|
|
|
|
['NL', 7, qr/^\d{4}\s?w\w$/, '${Postcode} ${City}', |
727
|
|
|
|
|
|
|
qr/^(\d{4}\s?w\w)\s+(.+)$/], |
728
|
|
|
|
|
|
|
['PL', 6, qr/^\d{3}-?\d\d$/, '${Postcode} ${City}', |
729
|
|
|
|
|
|
|
qr/^(\d{3}-?\d\d)\s+(.+)$/], |
730
|
|
|
|
|
|
|
['PT', 8, qr/^\d{4}-?\d{3}$/, '${Postcode} ${City}', |
731
|
|
|
|
|
|
|
qr/^(\d{4}-?\d{3})\s+(.+)$/], |
732
|
|
|
|
|
|
|
['SK', 6, qr/^\d{3}\s*\d\d$/, '${Postcode} ${City}', |
733
|
|
|
|
|
|
|
qr/^(\d{3}-?\d\d)\s+(.+)$/], |
734
|
|
|
|
|
|
|
['SE', 6, qr/^\d{3}\s*\d\d$/, 'SE-${Postcode} ${City}', |
735
|
|
|
|
|
|
|
qr/^(?i:SE-)?(\d{3}[\s-]?\d\d)\s+(.+)$/])) { |
736
|
|
|
|
|
|
|
my ($code, $postcode_len, $postcode_re, $pc_layout, $pc_re) = @$spec; |
737
|
|
|
|
|
|
|
if (exists($per_country_data{$code})) { |
738
|
|
|
|
|
|
|
die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n"; |
739
|
|
|
|
|
|
|
} |
740
|
|
|
|
|
|
|
$per_country_data{$code} = { |
741
|
|
|
|
|
|
|
_format => [ |
742
|
|
|
|
|
|
|
['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], 2, |
743
|
|
|
|
|
|
|
['Postcode', $postcode_len, $postcode_re], ['City', 40] |
744
|
|
|
|
|
|
|
], |
745
|
|
|
|
|
|
|
_s2d_map => [ |
746
|
|
|
|
|
|
|
{StoredRownum => 0, DisplayName => 'Addr1'}, |
747
|
|
|
|
|
|
|
{StoredRownum => 1, DisplayName => 'Addr2'}, |
748
|
|
|
|
|
|
|
{StoredRownum => 2, DisplayName => 'Addr3'}, |
749
|
|
|
|
|
|
|
{StoredRownum => 3, DisplayName => 'Addr4'}, |
750
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'Postcode', |
751
|
|
|
|
|
|
|
StoredRegexnum => 0, StoredFieldnum => 0}, |
752
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'City', |
753
|
|
|
|
|
|
|
StoredRegexnum => 0, StoredFieldnum => 1} |
754
|
|
|
|
|
|
|
], |
755
|
|
|
|
|
|
|
_d2s_map => [ |
756
|
|
|
|
|
|
|
{StoredTemplate => '${Addr1}', StoredRownum => 0}, |
757
|
|
|
|
|
|
|
{StoredTemplate => '${Addr2}', StoredRownum => 1}, |
758
|
|
|
|
|
|
|
{StoredTemplate => '${Addr3}', StoredRownum => 2}, |
759
|
|
|
|
|
|
|
{StoredTemplate => '${Addr4}', StoredRownum => 3}, |
760
|
|
|
|
|
|
|
{StoredTemplate => $pc_layout, StoredRownum => -1} |
761
|
|
|
|
|
|
|
], |
762
|
|
|
|
|
|
|
_regexes => [ $pc_re ] |
763
|
|
|
|
|
|
|
}; |
764
|
|
|
|
|
|
|
} |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
# Postcode right of city, no district: Bermuda, Bahrain, Cambodia, India, |
767
|
|
|
|
|
|
|
# Indonesia, Jordan, Republic of Korea (aka South Korea), Latvia, Lebanon, |
768
|
|
|
|
|
|
|
# Lesotho, Maldives, Malta, Mongolia, Myanmar, Nepal, New Zealand, Pakistan, |
769
|
|
|
|
|
|
|
# Saudi Arabia, Taiwan. |
770
|
|
|
|
|
|
|
# XXX Cook Islands may be here too as part of New Zealand. |
771
|
|
|
|
|
|
|
# XXX Note that state (code or name) is optional (and almost never used) in |
772
|
|
|
|
|
|
|
# India addresses provided that postcode is present. (thanks to Martin DeMello |
773
|
|
|
|
|
|
|
# for the information.) |
774
|
|
|
|
|
|
|
# XXX Republic of Korea may need region/city (in list) instead of city name, |
775
|
|
|
|
|
|
|
# and its postal authority suggests adding "Seoul" to the city name on |
776
|
|
|
|
|
|
|
# international mail, no matter what the destination, to avoid misrouting to |
777
|
|
|
|
|
|
|
# North Korea. for now, treat it all like a big unstructured city field. |
778
|
|
|
|
|
|
|
# XXX Mongolia wants postcodes right of country, not city, and some cities at |
779
|
|
|
|
|
|
|
# least have a delivery/route code after the city. OTOH, anything next to the |
780
|
|
|
|
|
|
|
# country name is a bad idea. Leave the postcode right of the city for now. |
781
|
|
|
|
|
|
|
# XXX New Zealand postcodes are optional, except for bulk mailers. This means |
782
|
|
|
|
|
|
|
# that many people probably don't know their postcode. Asking for it anyway |
783
|
|
|
|
|
|
|
# doesn't hurt. |
784
|
|
|
|
|
|
|
# XXX Niue may be here as part of New Zealand. |
785
|
|
|
|
|
|
|
# XXX Saudi Arabia addresses use separate formats for Latin and Arabic scripts. |
786
|
|
|
|
|
|
|
# XXX Tokelau may be here as part of New Zealand. |
787
|
|
|
|
|
|
|
# XXX Not sure of the format for Taiwan postcodes: may be 3 digits, 5 digits, |
788
|
|
|
|
|
|
|
# or 5 digits with a - after the 3rd. |
789
|
|
|
|
|
|
|
foreach my $spec |
790
|
|
|
|
|
|
|
((['BH', 4, qr/^([2-9]|1[0-2]?)\d\d$/, |
791
|
|
|
|
|
|
|
qr/^(.+)\s+((?:[2-9]|1[0-2]?)\d\d)$/, ' '], |
792
|
|
|
|
|
|
|
['BM', 5, qr/^\w\w\s*(\d\d|\w\w)$/, |
793
|
|
|
|
|
|
|
qr/^(.+)\s+(\w\w\s*(?:\d\d|\w\w))$/, ' '], |
794
|
|
|
|
|
|
|
['KH', 5, qr/^\d{5}$/, qr/^(.+)\s+(\d{5})$/, ' '], |
795
|
|
|
|
|
|
|
['IN', 6, |
796
|
|
|
|
|
|
|
qr/^(1[1-9]|2[0-8]|[35][0-36-9]|34|[47][0-9]|6[0-47-9]|8[0-5])\d{4}$/, |
797
|
|
|
|
|
|
|
qr/^(.+)(?:\s+|\s*-\s*)(\d{6})$/, '-'], |
798
|
|
|
|
|
|
|
['ID', 5, qr/^\d{5}$/, qr/^(.+)\s+(\d{5})$/, ' '], |
799
|
|
|
|
|
|
|
['JO', 5, qr/^\d{5}$/, qr/^(.+)\s+(\d{5})$/, ' '], |
800
|
|
|
|
|
|
|
['KR', 7, qr/^\d{3}-?\d{3}$/, qr/^(.+)\s+(\d{3}-?\d{3})$/, ' '], |
801
|
|
|
|
|
|
|
['LV', 4, qr/^\d{4}$/, qr/^(.+?),?\s*(?:LV\s*-\s*)?(\d{4})$/, ', LV-'], |
802
|
|
|
|
|
|
|
['LB', 9, qr/^\d{4}\s*\d{4}?$/, qr/^(.+?)\s+(\d{4}\s*\d{4}?)$/, ' '], |
803
|
|
|
|
|
|
|
['LS', 3, qr/^\d{3}$/, qr/^(.+)\s+(\d{3})$/, ' '], |
804
|
|
|
|
|
|
|
['MV', 5, qr/^\d\d-?\d\d$/, qr/^(.+)\s+(\d\d-?\d\d)$/, ' '], |
805
|
|
|
|
|
|
|
['MT', 7, qr/^\w{3}\s*\d{2,3}$/, qr/^(.+)\s+(\w{3}\s*\d{2,3})$/, ' '], |
806
|
|
|
|
|
|
|
['MN', 6, qr/^\d{6}$/, qr/^(.+)\s+(\d{6})$/, ' '], |
807
|
|
|
|
|
|
|
['MM', 5, qr/^(0[1-9]|1[0-4])\d{3}$/, qr/^(.+)(?:,\s*|\s+)(\d{5})$/, ' '], |
808
|
|
|
|
|
|
|
['NP', 5, qr/^\d{5}$/, qr/^(.+)\s+(\d{5})$/, ' '], |
809
|
|
|
|
|
|
|
['NZ', 4, qr/^(\d{4})?$/, qr/^(.+?)(?:\s+(\d{4}))?$/, ' '], |
810
|
|
|
|
|
|
|
['PK', 5, qr/^\d{5}$/, qr/^(.+)(?:\s*-\s*|\s+)(\d{5})$/, ' '], |
811
|
|
|
|
|
|
|
['SA', 5, qr/^\d{5}$/, qr/^(.+)\s+(\d{5})$/, ' '], |
812
|
|
|
|
|
|
|
['VN', 6, qr/^\d{6}$/, qr/^(.+)\s+(\d{6})$/, ' '], |
813
|
|
|
|
|
|
|
['TW', 6, qr/^\d{3}(-?\d{2})?$/, |
814
|
|
|
|
|
|
|
qr/^(.+)(?:,\s*|\s+)\d{3}(?:-?\d{2})?$/, ' '])) { |
815
|
|
|
|
|
|
|
my ($code, $postcode_len, $postcode_re, $cp_re, $postcode_prefix) = @$spec; |
816
|
|
|
|
|
|
|
if (exists($per_country_data{$code})) { |
817
|
|
|
|
|
|
|
die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n"; |
818
|
|
|
|
|
|
|
} |
819
|
|
|
|
|
|
|
$per_country_data{$code} = { |
820
|
|
|
|
|
|
|
_format => [ |
821
|
|
|
|
|
|
|
['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], 2, |
822
|
|
|
|
|
|
|
['City', 40], ['Postcode', $postcode_len, $postcode_re] |
823
|
|
|
|
|
|
|
], |
824
|
|
|
|
|
|
|
_s2d_map => [ |
825
|
|
|
|
|
|
|
{StoredRownum => 0, DisplayName => 'Addr1'}, |
826
|
|
|
|
|
|
|
{StoredRownum => 1, DisplayName => 'Addr2'}, |
827
|
|
|
|
|
|
|
{StoredRownum => 2, DisplayName => 'Addr3'}, |
828
|
|
|
|
|
|
|
{StoredRownum => 3, DisplayName => 'Addr4'}, |
829
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'City', |
830
|
|
|
|
|
|
|
StoredRegexnum => 0, StoredFieldnum => 0}, |
831
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'Postcode', |
832
|
|
|
|
|
|
|
StoredRegexnum => 0, StoredFieldnum => 1} |
833
|
|
|
|
|
|
|
], |
834
|
|
|
|
|
|
|
_d2s_map => [ |
835
|
|
|
|
|
|
|
{StoredTemplate => '${Addr1}', StoredRownum => 0}, |
836
|
|
|
|
|
|
|
{StoredTemplate => '${Addr2}', StoredRownum => 1}, |
837
|
|
|
|
|
|
|
{StoredTemplate => '${Addr3}', StoredRownum => 2}, |
838
|
|
|
|
|
|
|
{StoredTemplate => '${Addr4}', StoredRownum => 3}, |
839
|
|
|
|
|
|
|
{StoredTemplate => "\$\{City\}$postcode_prefix\$\{Postcode\}", |
840
|
|
|
|
|
|
|
StoredRownum => -1} |
841
|
|
|
|
|
|
|
], |
842
|
|
|
|
|
|
|
_regexes => [ $cp_re ] |
843
|
|
|
|
|
|
|
}; |
844
|
|
|
|
|
|
|
} |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
# Postcode left of city, district name below: Cape Verde, El Salvador, |
847
|
|
|
|
|
|
|
# Mozambique |
848
|
|
|
|
|
|
|
# XXX Cape Verde seems to be missing an island in Locale::SubCountry.pm. |
849
|
|
|
|
|
|
|
foreach my $spec |
850
|
|
|
|
|
|
|
((['CV', 'Island'], ['SV', 'Department'], ['MZ', 'Province'])) { |
851
|
|
|
|
|
|
|
my ($code, $dname) = @$spec; |
852
|
|
|
|
|
|
|
if (exists($per_country_data{$code})) { |
853
|
|
|
|
|
|
|
die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n"; |
854
|
|
|
|
|
|
|
} |
855
|
|
|
|
|
|
|
my $subctry = Locale::SubCountry->new($code); |
856
|
|
|
|
|
|
|
$per_country_data{$code} = { |
857
|
|
|
|
|
|
|
_format => [ |
858
|
|
|
|
|
|
|
['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], 2, |
859
|
|
|
|
|
|
|
['Postcode', 4, qr/^\d{4}$/], ['City', 40], |
860
|
|
|
|
|
|
|
[$dname, { map { $_ => $_ } $subctry->all_full_names() } ] |
861
|
|
|
|
|
|
|
], |
862
|
|
|
|
|
|
|
_s2d_map => [ |
863
|
|
|
|
|
|
|
{StoredRownum => 0, DisplayName => 'Addr1'}, |
864
|
|
|
|
|
|
|
{StoredRownum => 1, DisplayName => 'Addr2'}, |
865
|
|
|
|
|
|
|
{StoredRownum => 2, DisplayName => 'Addr3'}, |
866
|
|
|
|
|
|
|
{StoredRownum => 3, DisplayName => 'Addr4'}, |
867
|
|
|
|
|
|
|
{StoredRownum => -2, DisplayName => 'Postcode', |
868
|
|
|
|
|
|
|
StoredColnum => 0, StoredCollen => 4}, |
869
|
|
|
|
|
|
|
{StoredRownum => -2, DisplayName => 'City', StoredColnum => 5}, |
870
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => $dname} |
871
|
|
|
|
|
|
|
], |
872
|
|
|
|
|
|
|
_d2s_map => [ |
873
|
|
|
|
|
|
|
{StoredTemplate => '${Addr1}', StoredRownum => 0}, |
874
|
|
|
|
|
|
|
{StoredTemplate => '${Addr2}', StoredRownum => 1}, |
875
|
|
|
|
|
|
|
{StoredTemplate => '${Addr3}', StoredRownum => 2}, |
876
|
|
|
|
|
|
|
{StoredTemplate => '${Addr4}', StoredRownum => 3}, |
877
|
|
|
|
|
|
|
{StoredTemplate => '${Postcode} ${City}', StoredRownum => -2}, |
878
|
|
|
|
|
|
|
{StoredTemplate => "\$\{$dname\}", StoredRownum => -1} |
879
|
|
|
|
|
|
|
] |
880
|
|
|
|
|
|
|
}; |
881
|
|
|
|
|
|
|
} |
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
# Postcode right of city, district name below: Nigeria |
884
|
|
|
|
|
|
|
foreach my $code (qw(NG)) { |
885
|
|
|
|
|
|
|
if (exists($per_country_data{$code})) { |
886
|
|
|
|
|
|
|
die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n"; |
887
|
|
|
|
|
|
|
} |
888
|
|
|
|
|
|
|
my $subctry = Locale::SubCountry->new($code); |
889
|
|
|
|
|
|
|
$per_country_data{$code} = { |
890
|
|
|
|
|
|
|
_format => [ |
891
|
|
|
|
|
|
|
['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], 2, |
892
|
|
|
|
|
|
|
['City', 40], ['Postcode', 6, qr/^\d{6}$/], |
893
|
|
|
|
|
|
|
['State', { map { $_ => $_ } $subctry->all_full_names() } ] |
894
|
|
|
|
|
|
|
], |
895
|
|
|
|
|
|
|
_s2d_map => [ |
896
|
|
|
|
|
|
|
{StoredRownum => 0, DisplayName => 'Addr1'}, |
897
|
|
|
|
|
|
|
{StoredRownum => 1, DisplayName => 'Addr2'}, |
898
|
|
|
|
|
|
|
{StoredRownum => 2, DisplayName => 'Addr3'}, |
899
|
|
|
|
|
|
|
{StoredRownum => 3, DisplayName => 'Addr4'}, |
900
|
|
|
|
|
|
|
{StoredRownum => -2, DisplayName => 'City', |
901
|
|
|
|
|
|
|
StoredRegexnum => 0, StoredFieldnum => 0}, |
902
|
|
|
|
|
|
|
{StoredRownum => -2, DisplayName => 'Postcode', |
903
|
|
|
|
|
|
|
StoredRegexnum => 0, StoredFieldnum => 1}, |
904
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'State'} |
905
|
|
|
|
|
|
|
], |
906
|
|
|
|
|
|
|
_d2s_map => [ |
907
|
|
|
|
|
|
|
{StoredTemplate => '${Addr1}', StoredRownum => 0}, |
908
|
|
|
|
|
|
|
{StoredTemplate => '${Addr2}', StoredRownum => 1}, |
909
|
|
|
|
|
|
|
{StoredTemplate => '${Addr3}', StoredRownum => 2}, |
910
|
|
|
|
|
|
|
{StoredTemplate => '${Addr4}', StoredRownum => 3}, |
911
|
|
|
|
|
|
|
{StoredTemplate => '${City} ${Postcode}', StoredRownum => -2}, |
912
|
|
|
|
|
|
|
{StoredTemplate => '${State}', StoredRownum => -1} |
913
|
|
|
|
|
|
|
], |
914
|
|
|
|
|
|
|
_regexes => [ qr/^(.+)\s+(\d{6})$/ ] |
915
|
|
|
|
|
|
|
}; |
916
|
|
|
|
|
|
|
} |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
# City and district name each on a line by itself, no postcode: Ireland, |
919
|
|
|
|
|
|
|
# Kiribati, Panama, Solomon Islands |
920
|
|
|
|
|
|
|
# XXX Ireland district (county) is optional (forbidden?) if same as city, and |
921
|
|
|
|
|
|
|
# prefixed with 'CO ' is present. If it's forbidden, not just optional, let |
922
|
|
|
|
|
|
|
# normalize() handle it. Also, Dublin needs a numeric suffix. |
923
|
|
|
|
|
|
|
# XXX Kiribati district (island) list may be incomplete or incorrect. |
924
|
|
|
|
|
|
|
# XXX Panama may use district only, not city. |
925
|
|
|
|
|
|
|
# XXX Seychelles may be here too, but they're missing from Locale::SubCountry. |
926
|
|
|
|
|
|
|
# XXX Solomon Islands district (province) list may be incomplete or incorrect. |
927
|
|
|
|
|
|
|
foreach my $spec |
928
|
|
|
|
|
|
|
((['IE', 'County', 'CO '], ['KI', 'Island', ''], ['PA', 'Province', ''], |
929
|
|
|
|
|
|
|
['SB', 'Province', ''])) { |
930
|
|
|
|
|
|
|
my ($code, $dname, $dpfx) = @$spec; |
931
|
|
|
|
|
|
|
if (exists($per_country_data{$code})) { |
932
|
|
|
|
|
|
|
die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n"; |
933
|
|
|
|
|
|
|
} |
934
|
|
|
|
|
|
|
my $subctry = Locale::SubCountry->new($code); |
935
|
|
|
|
|
|
|
$per_country_data{$code} = { |
936
|
|
|
|
|
|
|
_format => [ |
937
|
|
|
|
|
|
|
['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], ['City', 40], |
938
|
|
|
|
|
|
|
[$dname, { map { $_ => $_ } $subctry->all_full_names() } ] |
939
|
|
|
|
|
|
|
], |
940
|
|
|
|
|
|
|
_s2d_map => [ |
941
|
|
|
|
|
|
|
{StoredRownum => 0, DisplayName => 'Addr1'}, |
942
|
|
|
|
|
|
|
{StoredRownum => 1, DisplayName => 'Addr2'}, |
943
|
|
|
|
|
|
|
{StoredRownum => 2, DisplayName => 'Addr3'}, |
944
|
|
|
|
|
|
|
{StoredRownum => 3, DisplayName => 'Addr4'}, |
945
|
|
|
|
|
|
|
{StoredRownum => -2, DisplayName => 'City'}, |
946
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => $dname, |
947
|
|
|
|
|
|
|
StoredColnum => length($dpfx)} |
948
|
|
|
|
|
|
|
], |
949
|
|
|
|
|
|
|
_d2s_map => [ |
950
|
|
|
|
|
|
|
{StoredTemplate => '${Addr1}', StoredRownum => 0}, |
951
|
|
|
|
|
|
|
{StoredTemplate => '${Addr2}', StoredRownum => 1}, |
952
|
|
|
|
|
|
|
{StoredTemplate => '${Addr3}', StoredRownum => 2}, |
953
|
|
|
|
|
|
|
{StoredTemplate => '${Addr4}', StoredRownum => 3}, |
954
|
|
|
|
|
|
|
{StoredTemplate => '${City}', StoredRownum => -2}, |
955
|
|
|
|
|
|
|
{StoredTemplate => "$dpfx\$\{$dname\}", StoredRownum => -1} |
956
|
|
|
|
|
|
|
] |
957
|
|
|
|
|
|
|
}; |
958
|
|
|
|
|
|
|
} |
959
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
# City, district code/name, postcode in some order all on same line: Australia, |
961
|
|
|
|
|
|
|
# Canada, Italy, Japan, Malaysia, Mexico, Papua New Guinea, Somalia, United |
962
|
|
|
|
|
|
|
# States, Venezuela |
963
|
|
|
|
|
|
|
# DamienPS explained something (I forgot what) about Australian postcodes. |
964
|
|
|
|
|
|
|
# XXX American Samoa may be here as part of the US. |
965
|
|
|
|
|
|
|
# XXX Cocos (Keeling) Islands may be here, as part of Australia. |
966
|
|
|
|
|
|
|
# XXX Christmas Island may be here, as part of Australia. |
967
|
|
|
|
|
|
|
# XXX Canadian postcodes want uppercase letters and 1 space after 3rd position. |
968
|
|
|
|
|
|
|
# XXX Federated States of Micronesia may be here as part of the US. |
969
|
|
|
|
|
|
|
# XXX Guam may be here as part of the US. |
970
|
|
|
|
|
|
|
# XXX Heard Island and McDonald Islands may be here as part of Australia. |
971
|
|
|
|
|
|
|
# XXX Honduras districts are missing from Locale::SubCountry |
972
|
|
|
|
|
|
|
# XXX Thanks to Renée for splainin what Japan adresses are like. Also, this |
973
|
|
|
|
|
|
|
# assumes city and prefecture names don't contain spaces. Also, there may be a |
974
|
|
|
|
|
|
|
# trend toward moving the postcode to its own line below city and prefecture, |
975
|
|
|
|
|
|
|
# and we leave the - insertion after the 3rd postcode digit to normalize(), if |
976
|
|
|
|
|
|
|
# it's really necessary. (Since addresses in Roman script are still sorted by |
977
|
|
|
|
|
|
|
# hand according to the UPU page about Japan, I doubt it is.) |
978
|
|
|
|
|
|
|
# XXX Malaysia doesn't use the names of the federal territories for routing, |
979
|
|
|
|
|
|
|
# and they appear optional. |
980
|
|
|
|
|
|
|
# XXX Marshall Islands may be here as part of the US. |
981
|
|
|
|
|
|
|
# XXX Mariana Islands may be here as part of the US. |
982
|
|
|
|
|
|
|
# XXX Mexico addresses may use state name, not code. Also, on-screen field |
983
|
|
|
|
|
|
|
# order of addresses doesn't match natural/stored. |
984
|
|
|
|
|
|
|
# XXX Norfolk Island may be here as part of Australia. |
985
|
|
|
|
|
|
|
# XXX Puerto Rico may be here as part of the US. |
986
|
|
|
|
|
|
|
# XXX United States definition in Locale::SubCountry.pm is missing AA/AE/AP |
987
|
|
|
|
|
|
|
# entries for APO/FPO. |
988
|
|
|
|
|
|
|
# XXX Venezuela definition in Locale::SubCountry.pm may be missing some states. |
989
|
|
|
|
|
|
|
# XXX US Virgin Islands may be here as part of the US. |
990
|
|
|
|
|
|
|
foreach my $spec |
991
|
|
|
|
|
|
|
((['AU', 'State', 'Postcode', 4, qr/^\d{4}$/, |
992
|
|
|
|
|
|
|
'${City} ${State} ${Postcode}', |
993
|
|
|
|
|
|
|
0, 0, 1, 2, qr/^(.+),?\s+(\w{2,3})\s+(\d{4})$/], |
994
|
|
|
|
|
|
|
['CA', 'Province', 'Postcode', 7, qr/^\w\d\w\s*\d\w\d$/, |
995
|
|
|
|
|
|
|
'${City} ${Province} ${Postcode}', 0, 0, 1, 2, |
996
|
|
|
|
|
|
|
qr/^(.+)\s+(\w\w)\s+(\w\d\w\s*\d\w\d)$/], |
997
|
|
|
|
|
|
|
['IT', 'Province', 'Postcode', 5, qr/^\d{5}$/, |
998
|
|
|
|
|
|
|
'${Postcode}-${City} ${Province}', |
999
|
|
|
|
|
|
|
0, 1, 2, 0, qr/^(\d{5})(?:\s*-\s*|\s+)(.+?)(?:\s+(\w\w))?$/], |
1000
|
|
|
|
|
|
|
['JP', 'Prefecture', 'Postcode', 8, qr/^\d{3}-?\d{4}$/, |
1001
|
|
|
|
|
|
|
'${City} ${Prefecture} ${Postcode}', 1, 0, 1, 2, |
1002
|
|
|
|
|
|
|
qr/^(\S+)\s+(\S+)\s+(\d\d\d-?\d\d\d\d)$/], |
1003
|
|
|
|
|
|
|
['MY', 'State', 'Postcode', 5, qr/^\d{5}$/, |
1004
|
|
|
|
|
|
|
'${Postcode} ${City}, ${State}', |
1005
|
|
|
|
|
|
|
1, 1, 2, 0, qr/^(\d{5})\s+(.+),\s*(\w+)$/], |
1006
|
|
|
|
|
|
|
['MX', 'State', 'Postcode', 5, qr/^\d{5}$/, |
1007
|
|
|
|
|
|
|
'${Postcode} ${City}, ${State}', |
1008
|
|
|
|
|
|
|
0, 1, 2, 0, qr/^(\d{5})\s+(.+),\s*(\w+)$/], |
1009
|
|
|
|
|
|
|
['PG', 'Province', 'Postcode', 3, qr/^\d{3}$/, |
1010
|
|
|
|
|
|
|
'${City} ${Postcode} ${Province}', |
1011
|
|
|
|
|
|
|
0, 0, 2, 1, qr/^(.+)\s+(\d{3})\s+(\w{3})$/], |
1012
|
|
|
|
|
|
|
['SO', 'Region', 'Postcode', 5, qr/^\d{5}$/, |
1013
|
|
|
|
|
|
|
'${City}, ${State} ${Postcode}', |
1014
|
|
|
|
|
|
|
0, 0, 1, 2, qr/^(.+),?\s+(\w{2})\s+(\d{5})$/], |
1015
|
|
|
|
|
|
|
['US', 'State', 'ZIP', 10, qr/^\d{5}(-\d{4})?$/, |
1016
|
|
|
|
|
|
|
'${City}, ${State} ${ZIP}', 0, 0, 1, 2, |
1017
|
|
|
|
|
|
|
qr/^(.+),?\s+(\w{2})\s+(\d{5}(?:-\d{4})?)$/], |
1018
|
|
|
|
|
|
|
['VE', 'State', 'Postcode', 4, qr/^\d{4}$/, |
1019
|
|
|
|
|
|
|
'${City}, ${Postcode} ${State}', |
1020
|
|
|
|
|
|
|
1, 0, 2, 1, qr/^(.+?)\s+(\d{4})\s*,?\s+(.+)$/])) { |
1021
|
|
|
|
|
|
|
my ($code, $district, $postcode, $pc_length, $pc_re, $cdp_layout, $use_dname, |
1022
|
|
|
|
|
|
|
$city_fn, $district_fn, $pc_fn, $cdp_re) = @$spec; |
1023
|
|
|
|
|
|
|
if (exists($per_country_data{$code})) { |
1024
|
|
|
|
|
|
|
die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n"; |
1025
|
|
|
|
|
|
|
} |
1026
|
|
|
|
|
|
|
my $subctry = Locale::SubCountry->new($code); |
1027
|
|
|
|
|
|
|
$per_country_data{$code} = { |
1028
|
|
|
|
|
|
|
_format => [ |
1029
|
|
|
|
|
|
|
['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], 3, |
1030
|
|
|
|
|
|
|
['City', 40], |
1031
|
|
|
|
|
|
|
[$district, ($use_dname ? { map { $_ => $_ } $subctry->all_full_names() } |
1032
|
|
|
|
|
|
|
: {$subctry->code_full_name_hash})], |
1033
|
|
|
|
|
|
|
[$postcode, $pc_length, $pc_re] |
1034
|
|
|
|
|
|
|
], |
1035
|
|
|
|
|
|
|
_s2d_map => [ |
1036
|
|
|
|
|
|
|
{StoredRownum => 0, DisplayName => 'Addr1'}, |
1037
|
|
|
|
|
|
|
{StoredRownum => 1, DisplayName => 'Addr2'}, |
1038
|
|
|
|
|
|
|
{StoredRownum => 2, DisplayName => 'Addr3'}, |
1039
|
|
|
|
|
|
|
{StoredRownum => 3, DisplayName => 'Addr4'}, |
1040
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'City', |
1041
|
|
|
|
|
|
|
StoredRegexnum => 0, StoredFieldnum => $city_fn}, |
1042
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => $district, |
1043
|
|
|
|
|
|
|
StoredRegexnum => 0, StoredFieldnum => $district_fn}, |
1044
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => $postcode, |
1045
|
|
|
|
|
|
|
StoredRegexnum => 0, StoredFieldnum => $pc_fn}, |
1046
|
|
|
|
|
|
|
], |
1047
|
|
|
|
|
|
|
_d2s_map => [ |
1048
|
|
|
|
|
|
|
{StoredTemplate => '${Addr1}', StoredRownum => 0}, |
1049
|
|
|
|
|
|
|
{StoredTemplate => '${Addr2}', StoredRownum => 1}, |
1050
|
|
|
|
|
|
|
{StoredTemplate => '${Addr3}', StoredRownum => 2}, |
1051
|
|
|
|
|
|
|
{StoredTemplate => '${Addr4}', StoredRownum => 3}, |
1052
|
|
|
|
|
|
|
{StoredTemplate => $cdp_layout, StoredRownum => -1} |
1053
|
|
|
|
|
|
|
], |
1054
|
|
|
|
|
|
|
_regexes => [ $cdp_re ] |
1055
|
|
|
|
|
|
|
}; |
1056
|
|
|
|
|
|
|
} |
1057
|
|
|
|
|
|
|
|
1058
|
|
|
|
|
|
|
# City and district code (with postfix) on 1 line, then postcode alone below: |
1059
|
|
|
|
|
|
|
# Brazil |
1060
|
|
|
|
|
|
|
# XXX Brazil may need a "Brazil" suffix after some or all states. |
1061
|
|
|
|
|
|
|
foreach my $code (qw(BR)) { |
1062
|
|
|
|
|
|
|
if (exists($per_country_data{$code})) { |
1063
|
|
|
|
|
|
|
die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n"; |
1064
|
|
|
|
|
|
|
} |
1065
|
|
|
|
|
|
|
my $subctry = Locale::SubCountry->new($code); |
1066
|
|
|
|
|
|
|
$per_country_data{$code} = { |
1067
|
|
|
|
|
|
|
_format => [ |
1068
|
|
|
|
|
|
|
['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], 2, |
1069
|
|
|
|
|
|
|
['City', 40], ['State', {$subctry->code_full_name_hash}], |
1070
|
|
|
|
|
|
|
['Postcode', 9, qr/^\d\d\d\d\d-?\d\d\d$/] |
1071
|
|
|
|
|
|
|
], |
1072
|
|
|
|
|
|
|
_s2d_map => [ |
1073
|
|
|
|
|
|
|
{StoredRownum => 0, DisplayName => 'Addr1'}, |
1074
|
|
|
|
|
|
|
{StoredRownum => 1, DisplayName => 'Addr2'}, |
1075
|
|
|
|
|
|
|
{StoredRownum => 2, DisplayName => 'Addr3'}, |
1076
|
|
|
|
|
|
|
{StoredRownum => 3, DisplayName => 'Addr4'}, |
1077
|
|
|
|
|
|
|
{StoredRownum => -2, DisplayName => 'City', |
1078
|
|
|
|
|
|
|
StoredRegexnum => 0, StoredFieldnum => 0}, |
1079
|
|
|
|
|
|
|
{StoredRownum => -2, DisplayName => 'State', |
1080
|
|
|
|
|
|
|
StoredRegexnum => 0, StoredFieldnum => 1}, |
1081
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'Postcode'} |
1082
|
|
|
|
|
|
|
], |
1083
|
|
|
|
|
|
|
_d2s_map => [ |
1084
|
|
|
|
|
|
|
{StoredTemplate => '${Addr1}', StoredRownum => 0}, |
1085
|
|
|
|
|
|
|
{StoredTemplate => '${Addr2}', StoredRownum => 1}, |
1086
|
|
|
|
|
|
|
{StoredTemplate => '${Addr3}', StoredRownum => 2}, |
1087
|
|
|
|
|
|
|
{StoredTemplate => '${Addr4}', StoredRownum => 3}, |
1088
|
|
|
|
|
|
|
{StoredTemplate => '${City}-${State}', StoredRownum => -2}, |
1089
|
|
|
|
|
|
|
{StoredTemplate => '${Postcode}', StoredRownum => -1} |
1090
|
|
|
|
|
|
|
], |
1091
|
|
|
|
|
|
|
_regexes => [ qr/^(.+?)(?:\s*-\s*|\s+)(\w\w)$/ ] |
1092
|
|
|
|
|
|
|
}; |
1093
|
|
|
|
|
|
|
} |
1094
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
# Postcode alone, then city and district below: Nicaragua |
1096
|
|
|
|
|
|
|
# XXX Nicaragua postcodes may be extended from 7 to 11 digits in the future. |
1097
|
|
|
|
|
|
|
foreach my $code (qw(NI)) { |
1098
|
|
|
|
|
|
|
if (exists($per_country_data{$code})) { |
1099
|
|
|
|
|
|
|
die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n"; |
1100
|
|
|
|
|
|
|
} |
1101
|
|
|
|
|
|
|
my $subctry = Locale::SubCountry->new($code); |
1102
|
|
|
|
|
|
|
$per_country_data{$code} = { |
1103
|
|
|
|
|
|
|
_format => [ |
1104
|
|
|
|
|
|
|
['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], |
1105
|
|
|
|
|
|
|
['Postcode', 9, qr/^\d{3}-?\d{3}-?\d$/], 2, ['City', 40], |
1106
|
|
|
|
|
|
|
['Department', { map { $_ => $_ } $subctry->all_full_names() } ] |
1107
|
|
|
|
|
|
|
], |
1108
|
|
|
|
|
|
|
_s2d_map => [ |
1109
|
|
|
|
|
|
|
{StoredRownum => 0, DisplayName => 'Addr1'}, |
1110
|
|
|
|
|
|
|
{StoredRownum => 1, DisplayName => 'Addr2'}, |
1111
|
|
|
|
|
|
|
{StoredRownum => 2, DisplayName => 'Addr3'}, |
1112
|
|
|
|
|
|
|
{StoredRownum => 3, DisplayName => 'Addr4'}, |
1113
|
|
|
|
|
|
|
{StoredRownum => -2, DisplayName => 'Postcode'}, |
1114
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'City', |
1115
|
|
|
|
|
|
|
StoredRegexnum => 0, StoredFieldnum => 0}, |
1116
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'State', |
1117
|
|
|
|
|
|
|
StoredRegexnum => 0, StoredFieldnum => 1} |
1118
|
|
|
|
|
|
|
], |
1119
|
|
|
|
|
|
|
_d2s_map => [ |
1120
|
|
|
|
|
|
|
{StoredTemplate => '${Addr1}', StoredRownum => 0}, |
1121
|
|
|
|
|
|
|
{StoredTemplate => '${Addr2}', StoredRownum => 1}, |
1122
|
|
|
|
|
|
|
{StoredTemplate => '${Addr3}', StoredRownum => 2}, |
1123
|
|
|
|
|
|
|
{StoredTemplate => '${Addr4}', StoredRownum => 3}, |
1124
|
|
|
|
|
|
|
{StoredTemplate => '${Postcode}', StoredRownum => -2}, |
1125
|
|
|
|
|
|
|
{StoredTemplate => '${City}, ${State}', StoredRownum => -1} |
1126
|
|
|
|
|
|
|
], |
1127
|
|
|
|
|
|
|
_regexes => [ qr/^(.+),\s*(.+)$/ ] |
1128
|
|
|
|
|
|
|
}; |
1129
|
|
|
|
|
|
|
} |
1130
|
|
|
|
|
|
|
|
1131
|
|
|
|
|
|
|
# City and district name on same line, no postcode: Colombia |
1132
|
|
|
|
|
|
|
# XXX Information on Colombia is inconsistent: does it use city+district, or |
1133
|
|
|
|
|
|
|
# city only? |
1134
|
|
|
|
|
|
|
foreach my $code (qw(CO)) { |
1135
|
|
|
|
|
|
|
if (exists($per_country_data{$code})) { |
1136
|
|
|
|
|
|
|
die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n"; |
1137
|
|
|
|
|
|
|
} |
1138
|
|
|
|
|
|
|
my $subctry = Locale::SubCountry->new($code); |
1139
|
|
|
|
|
|
|
$per_country_data{$code} = { |
1140
|
|
|
|
|
|
|
_format => [ |
1141
|
|
|
|
|
|
|
['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], 2, |
1142
|
|
|
|
|
|
|
['City', 40], |
1143
|
|
|
|
|
|
|
['Department', { map { $_ => $_ } $subctry->all_full_names() } ] |
1144
|
|
|
|
|
|
|
], |
1145
|
|
|
|
|
|
|
_s2d_map => [ |
1146
|
|
|
|
|
|
|
{StoredRownum => 0, DisplayName => 'Addr1'}, |
1147
|
|
|
|
|
|
|
{StoredRownum => 1, DisplayName => 'Addr2'}, |
1148
|
|
|
|
|
|
|
{StoredRownum => 2, DisplayName => 'Addr3'}, |
1149
|
|
|
|
|
|
|
{StoredRownum => 3, DisplayName => 'Addr4'}, |
1150
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'City', |
1151
|
|
|
|
|
|
|
StoredRegexnum => 0, StoredFieldnum => 0}, |
1152
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'Department', |
1153
|
|
|
|
|
|
|
StoredRegexnum => 0, StoredFieldnum => 1} |
1154
|
|
|
|
|
|
|
], |
1155
|
|
|
|
|
|
|
_d2s_map => [ |
1156
|
|
|
|
|
|
|
{StoredTemplate => '${Addr1}', StoredRownum => 0}, |
1157
|
|
|
|
|
|
|
{StoredTemplate => '${Addr2}', StoredRownum => 1}, |
1158
|
|
|
|
|
|
|
{StoredTemplate => '${Addr3}', StoredRownum => 2}, |
1159
|
|
|
|
|
|
|
{StoredTemplate => '${Addr4}', StoredRownum => 3}, |
1160
|
|
|
|
|
|
|
{StoredTemplate => '${City}-${Department}', StoredRownum => -1} |
1161
|
|
|
|
|
|
|
], |
1162
|
|
|
|
|
|
|
_regexes => [ qr/^(.+?)(?:\s*[-,]\s*(.*))?$/ ] |
1163
|
|
|
|
|
|
|
}; |
1164
|
|
|
|
|
|
|
} |
1165
|
|
|
|
|
|
|
|
1166
|
|
|
|
|
|
|
# District name and postal code each on a line by itself: Egypt |
1167
|
|
|
|
|
|
|
# XXX Kazakhstan would sort of be here too, but Locale::SubCountry seems to be |
1168
|
|
|
|
|
|
|
# missing stuff. |
1169
|
|
|
|
|
|
|
foreach my $code (qw(EG)) { |
1170
|
|
|
|
|
|
|
if (exists($per_country_data{$code})) { |
1171
|
|
|
|
|
|
|
die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n"; |
1172
|
|
|
|
|
|
|
} |
1173
|
|
|
|
|
|
|
my $subctry = Locale::SubCountry->new($code); |
1174
|
|
|
|
|
|
|
$per_country_data{$code} = { |
1175
|
|
|
|
|
|
|
_format => [ |
1176
|
|
|
|
|
|
|
['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], |
1177
|
|
|
|
|
|
|
['Governorate', { map { $_ => $_ } $subctry->all_full_names() } ], |
1178
|
|
|
|
|
|
|
['Postcode', 5, qr/^\d\d\d\d\d$/] |
1179
|
|
|
|
|
|
|
], |
1180
|
|
|
|
|
|
|
_s2d_map => [ |
1181
|
|
|
|
|
|
|
{StoredRownum => 0, DisplayName => 'Addr1'}, |
1182
|
|
|
|
|
|
|
{StoredRownum => 1, DisplayName => 'Addr2'}, |
1183
|
|
|
|
|
|
|
{StoredRownum => 2, DisplayName => 'Addr3'}, |
1184
|
|
|
|
|
|
|
{StoredRownum => 3, DisplayName => 'Addr4'}, |
1185
|
|
|
|
|
|
|
{StoredRownum => -2, DisplayName => 'Governorate'}, |
1186
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'Postcode'} |
1187
|
|
|
|
|
|
|
], |
1188
|
|
|
|
|
|
|
_d2s_map => [ |
1189
|
|
|
|
|
|
|
{StoredTemplate => '${Addr1}', StoredRownum => 0}, |
1190
|
|
|
|
|
|
|
{StoredTemplate => '${Addr2}', StoredRownum => 1}, |
1191
|
|
|
|
|
|
|
{StoredTemplate => '${Addr3}', StoredRownum => 2}, |
1192
|
|
|
|
|
|
|
{StoredTemplate => '${Addr4}', StoredRownum => 3}, |
1193
|
|
|
|
|
|
|
{StoredTemplate => '${Governorate}', StoredRownum => -2}, |
1194
|
|
|
|
|
|
|
{StoredTemplate => '${Postcode}', StoredRownum => -1} |
1195
|
|
|
|
|
|
|
] |
1196
|
|
|
|
|
|
|
}; |
1197
|
|
|
|
|
|
|
} |
1198
|
|
|
|
|
|
|
|
1199
|
|
|
|
|
|
|
# City, name of district, and postcode each on a line by itself: Ukraine, |
1200
|
|
|
|
|
|
|
# United Kingdom (not Great Britain, dammit!) |
1201
|
|
|
|
|
|
|
# XXX Anguilla may be here as part of the UK. |
1202
|
|
|
|
|
|
|
# XXX Antigua and Barbuda may be here as part of the UK. |
1203
|
|
|
|
|
|
|
# XXX British Virgin Islands may be here as part of the UK. |
1204
|
|
|
|
|
|
|
# XXX British Indian Ocean Territory would be here as part of UK, but the |
1205
|
|
|
|
|
|
|
# postcode given for it (BBND 1ZZ) doesn't match the pattern. |
1206
|
|
|
|
|
|
|
# XXX Ditto for British Antarctic territory and BIQQ 1ZZ. |
1207
|
|
|
|
|
|
|
# XXX Ditto for Falkland Islands and FIQQ 1ZZ. |
1208
|
|
|
|
|
|
|
# XXX Ditto for Gibraltar and (I think) GIR 0AA. |
1209
|
|
|
|
|
|
|
# XXX Guernsey may be here as part of the UK. |
1210
|
|
|
|
|
|
|
# XXX Isle of Man may be here as part of the UK, or it may need its own country |
1211
|
|
|
|
|
|
|
# name. Go with the former. |
1212
|
|
|
|
|
|
|
# XXX Jersey may be here as part of the UK. |
1213
|
|
|
|
|
|
|
# XXX Monserrat may be here as part of the UK. |
1214
|
|
|
|
|
|
|
# XXX Pitcairn, Henderson, Ducie, and Oeno Island would be here as part of UK, |
1215
|
|
|
|
|
|
|
# but the postcode given for it (PCRN 1ZZ) doesn't match the pattern. |
1216
|
|
|
|
|
|
|
# XXX South Georgia and the South Sandwich Island: ditto (SIQQ 1ZZ) |
1217
|
|
|
|
|
|
|
# XXX Tristan Da Cunha: ditto (TDCU 1ZZ), and the rest of the example address |
1218
|
|
|
|
|
|
|
# format is weird: "Via Capetown"?? Also, are Saint Helena and Tristan Da Cunha |
1219
|
|
|
|
|
|
|
# the same? |
1220
|
|
|
|
|
|
|
# XXX Turks and Caicos: ditto (TECA 1ZZ). |
1221
|
|
|
|
|
|
|
# XXX Ukraine addresses may not need districts in some cases (large cities?). |
1222
|
|
|
|
|
|
|
# XXX UK addresses come in 2 formats: postcode below city/county, and postcode |
1223
|
|
|
|
|
|
|
# on the right. Usually, postcode on the right is for storage/reference, and |
1224
|
|
|
|
|
|
|
# postcode below for mailing. However, I use postcode below exclusively. If |
1225
|
|
|
|
|
|
|
# you're curious why, just look at the postcode regexp. (thanks to Ailbhe for |
1226
|
|
|
|
|
|
|
# the clarification.) Also: as always, I don't enforce separators or upper case |
1227
|
|
|
|
|
|
|
# in postal codes. Plus, it's not obvious that the county is optional unless |
1228
|
|
|
|
|
|
|
# the user groks UK addresses. |
1229
|
|
|
|
|
|
|
# XXX Uzbekistan addresses may not need districts in some cases (large |
1230
|
|
|
|
|
|
|
# cities?). Also, it may want postcode below country. Pretend it doesn't. |
1231
|
|
|
|
|
|
|
foreach my $spec |
1232
|
|
|
|
|
|
|
((['GB', 'County', 8, qr/^\w\w?\d[\w\d]?\s*\d\w\w$/], |
1233
|
|
|
|
|
|
|
['UA', 'Region', 5, qr/^\d{5}$/], ['UZ', 'Region', 6, qr/^\d{6}$/])) { |
1234
|
|
|
|
|
|
|
my ($code, $dname, $pc_len, $pc_re) = @$spec; |
1235
|
|
|
|
|
|
|
if (exists($per_country_data{$code})) { |
1236
|
|
|
|
|
|
|
die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n"; |
1237
|
|
|
|
|
|
|
} |
1238
|
|
|
|
|
|
|
my $subctry = Locale::SubCountry->new($code); |
1239
|
|
|
|
|
|
|
$per_country_data{$code} = { |
1240
|
|
|
|
|
|
|
_format => [ |
1241
|
|
|
|
|
|
|
['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], ['City', 40], |
1242
|
|
|
|
|
|
|
[$dname, { map { $_ => $_ } $subctry->all_full_names() }], |
1243
|
|
|
|
|
|
|
['Postcode', $pc_len, $pc_re] |
1244
|
|
|
|
|
|
|
], |
1245
|
|
|
|
|
|
|
_s2d_map => [ |
1246
|
|
|
|
|
|
|
{StoredRownum => 0, DisplayName => 'Addr1'}, |
1247
|
|
|
|
|
|
|
{StoredRownum => 1, DisplayName => 'Addr2'}, |
1248
|
|
|
|
|
|
|
{StoredRownum => 2, DisplayName => 'Addr3'}, |
1249
|
|
|
|
|
|
|
{StoredRownum => 3, DisplayName => 'Addr4'}, |
1250
|
|
|
|
|
|
|
{StoredRownum => -3, DisplayName => 'City'}, |
1251
|
|
|
|
|
|
|
{StoredRownum => -2, DisplayName => $dname}, |
1252
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'Postcode'} |
1253
|
|
|
|
|
|
|
], |
1254
|
|
|
|
|
|
|
_d2s_map => [ |
1255
|
|
|
|
|
|
|
{StoredTemplate => '${Addr1}', StoredRownum => 0}, |
1256
|
|
|
|
|
|
|
{StoredTemplate => '${Addr2}', StoredRownum => 1}, |
1257
|
|
|
|
|
|
|
{StoredTemplate => '${Addr3}', StoredRownum => 2}, |
1258
|
|
|
|
|
|
|
{StoredTemplate => '${Addr4}', StoredRownum => 3}, |
1259
|
|
|
|
|
|
|
{StoredTemplate => '${City}', StoredRownum => -3}, |
1260
|
|
|
|
|
|
|
{StoredTemplate => "\$\{$dname\}", StoredRownum => -2}, |
1261
|
|
|
|
|
|
|
{StoredTemplate => '${Postcode}', StoredRownum => -1} |
1262
|
|
|
|
|
|
|
] |
1263
|
|
|
|
|
|
|
}; |
1264
|
|
|
|
|
|
|
} |
1265
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
# Postcode on a line by itself, then city: Ecuador, Sudan, Uruguay |
1267
|
|
|
|
|
|
|
# XXX Note that I don't uppercase the letters in Ecuador postcodes |
1268
|
|
|
|
|
|
|
# XXX Uruguay may want the district name (and country) next to the city. |
1269
|
|
|
|
|
|
|
foreach my $spec ((['EC', 6, qr/^\w\d{4}\w$/], ['SD', 5, qr/^\d{5}$/], |
1270
|
|
|
|
|
|
|
['UY', 5, qr/^\d{5}$/])) { |
1271
|
|
|
|
|
|
|
my ($code, $pc_len, $pc_re) = @$spec; |
1272
|
|
|
|
|
|
|
if (exists($per_country_data{$code})) { |
1273
|
|
|
|
|
|
|
die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n"; |
1274
|
|
|
|
|
|
|
} |
1275
|
|
|
|
|
|
|
$per_country_data{$code} = { |
1276
|
|
|
|
|
|
|
_format => [ |
1277
|
|
|
|
|
|
|
['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], |
1278
|
|
|
|
|
|
|
['Postcode', $pc_len, $pc_re], ['City', 40] |
1279
|
|
|
|
|
|
|
], |
1280
|
|
|
|
|
|
|
_s2d_map => [ |
1281
|
|
|
|
|
|
|
{StoredRownum => 0, DisplayName => 'Addr1'}, |
1282
|
|
|
|
|
|
|
{StoredRownum => 1, DisplayName => 'Addr2'}, |
1283
|
|
|
|
|
|
|
{StoredRownum => 2, DisplayName => 'Addr3'}, |
1284
|
|
|
|
|
|
|
{StoredRownum => 3, DisplayName => 'Addr4'}, |
1285
|
|
|
|
|
|
|
{StoredRownum => -2, DisplayName => 'Postcode'}, |
1286
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'City'} |
1287
|
|
|
|
|
|
|
], |
1288
|
|
|
|
|
|
|
_d2s_map => [ |
1289
|
|
|
|
|
|
|
{StoredTemplate => '${Addr1}', StoredRownum => 0}, |
1290
|
|
|
|
|
|
|
{StoredTemplate => '${Addr2}', StoredRownum => 1}, |
1291
|
|
|
|
|
|
|
{StoredTemplate => '${Addr3}', StoredRownum => 2}, |
1292
|
|
|
|
|
|
|
{StoredTemplate => '${Addr4}', StoredRownum => 3}, |
1293
|
|
|
|
|
|
|
{StoredTemplate => '${Postcode}', StoredRownum => -2}, |
1294
|
|
|
|
|
|
|
{StoredTemplate => '${City}', StoredRownum => -1} |
1295
|
|
|
|
|
|
|
] |
1296
|
|
|
|
|
|
|
}; |
1297
|
|
|
|
|
|
|
} |
1298
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
# Postcode and prefix on a line by itself under city: Iraq, Kenya, Oman, South |
1300
|
|
|
|
|
|
|
# Africa, Sri Lanka, Swaziland |
1301
|
|
|
|
|
|
|
# XXX Ascension would go there too, but it's not in ISO 3166. |
1302
|
|
|
|
|
|
|
# XXX Iraq may want city, district instead. |
1303
|
|
|
|
|
|
|
# XXX Oman examples show postcode above city, not below. |
1304
|
|
|
|
|
|
|
# XXX Sri Lanka examples show postcode below city, but text says it should be |
1305
|
|
|
|
|
|
|
# above. Assume below. |
1306
|
|
|
|
|
|
|
foreach my $spec |
1307
|
|
|
|
|
|
|
((['IQ', 5, qr/^\d{5}$/, ''], ['KE', 5, qr/^\d{5}$/, ''], |
1308
|
|
|
|
|
|
|
['OM', 3, qr/^\d{3}$/, ''], |
1309
|
|
|
|
|
|
|
['ZA', 4, qr/^\d{4}$/, ''], ['LK', 5, qr/^\d{5}$/, ''], |
1310
|
|
|
|
|
|
|
['SZ', 4, qr/^[HhLlMmSs]\d{3}$/, ''])) { |
1311
|
|
|
|
|
|
|
my ($code, $pc_length, $pc_re, $pc_prefix) = @$spec; |
1312
|
|
|
|
|
|
|
if (exists($per_country_data{$code})) { |
1313
|
|
|
|
|
|
|
die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n"; |
1314
|
|
|
|
|
|
|
} |
1315
|
|
|
|
|
|
|
$per_country_data{$code} = { |
1316
|
|
|
|
|
|
|
_format => [ |
1317
|
|
|
|
|
|
|
['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], |
1318
|
|
|
|
|
|
|
['City', 40], ['Postcode', $pc_length, $pc_re] |
1319
|
|
|
|
|
|
|
], |
1320
|
|
|
|
|
|
|
_s2d_map => [ |
1321
|
|
|
|
|
|
|
{StoredRownum => 0, DisplayName => 'Addr1'}, |
1322
|
|
|
|
|
|
|
{StoredRownum => 1, DisplayName => 'Addr2'}, |
1323
|
|
|
|
|
|
|
{StoredRownum => 2, DisplayName => 'Addr3'}, |
1324
|
|
|
|
|
|
|
{StoredRownum => 3, DisplayName => 'Addr4'}, |
1325
|
|
|
|
|
|
|
{StoredRownum => -2, DisplayName => 'City'}, |
1326
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'Postcode', |
1327
|
|
|
|
|
|
|
StoredColnum => length($pc_prefix)} |
1328
|
|
|
|
|
|
|
], |
1329
|
|
|
|
|
|
|
_d2s_map => [ |
1330
|
|
|
|
|
|
|
{StoredTemplate => '${Addr1}', StoredRownum => 0}, |
1331
|
|
|
|
|
|
|
{StoredTemplate => '${Addr2}', StoredRownum => 1}, |
1332
|
|
|
|
|
|
|
{StoredTemplate => '${Addr3}', StoredRownum => 2}, |
1333
|
|
|
|
|
|
|
{StoredTemplate => '${Addr4}', StoredRownum => 3}, |
1334
|
|
|
|
|
|
|
{StoredTemplate => '${City}', StoredRownum => -2}, |
1335
|
|
|
|
|
|
|
{StoredTemplate => "$pc_prefix\$\{Postcode\}", StoredRownum => -1} |
1336
|
|
|
|
|
|
|
] |
1337
|
|
|
|
|
|
|
}; |
1338
|
|
|
|
|
|
|
} |
1339
|
|
|
|
|
|
|
|
1340
|
|
|
|
|
|
|
# City on first line of address, postcode by itself on last line: Hungary |
1341
|
|
|
|
|
|
|
foreach my $code (qw(HU)) { |
1342
|
|
|
|
|
|
|
if (exists($per_country_data{$code})) { |
1343
|
|
|
|
|
|
|
die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n"; |
1344
|
|
|
|
|
|
|
} |
1345
|
|
|
|
|
|
|
$per_country_data{$code} = { |
1346
|
|
|
|
|
|
|
_format => [ |
1347
|
|
|
|
|
|
|
['City', 40], ['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], |
1348
|
|
|
|
|
|
|
['Postcode', 4, qr/^\d{4}$/] |
1349
|
|
|
|
|
|
|
], |
1350
|
|
|
|
|
|
|
_s2d_map => [ |
1351
|
|
|
|
|
|
|
{StoredRownum => 0, DisplayName => 'City'}, |
1352
|
|
|
|
|
|
|
{StoredRownum => 1, DisplayName => 'Addr1'}, |
1353
|
|
|
|
|
|
|
{StoredRownum => 2, DisplayName => 'Addr2'}, |
1354
|
|
|
|
|
|
|
{StoredRownum => 3, DisplayName => 'Addr3'}, |
1355
|
|
|
|
|
|
|
{StoredRownum => 4, DisplayName => 'Addr4'}, |
1356
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'Postcode', StoredColnum => 0} |
1357
|
|
|
|
|
|
|
], |
1358
|
|
|
|
|
|
|
_d2s_map => [ |
1359
|
|
|
|
|
|
|
{StoredTemplate => '${City}', StoredRownum => 0}, |
1360
|
|
|
|
|
|
|
{StoredTemplate => '${Addr1}', StoredRownum => 1}, |
1361
|
|
|
|
|
|
|
{StoredTemplate => '${Addr2}', StoredRownum => 2}, |
1362
|
|
|
|
|
|
|
{StoredTemplate => '${Addr3}', StoredRownum => 3}, |
1363
|
|
|
|
|
|
|
{StoredTemplate => '${Addr4}', StoredRownum => 4}, |
1364
|
|
|
|
|
|
|
{StoredTemplate => '${Postcode}', StoredRownum => -1} |
1365
|
|
|
|
|
|
|
] |
1366
|
|
|
|
|
|
|
}; |
1367
|
|
|
|
|
|
|
} |
1368
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
# No city (or rather, only one): Singapore |
1370
|
|
|
|
|
|
|
foreach my $code (qw(SG)) { |
1371
|
|
|
|
|
|
|
if (exists($per_country_data{$code})) { |
1372
|
|
|
|
|
|
|
die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n"; |
1373
|
|
|
|
|
|
|
} |
1374
|
|
|
|
|
|
|
$per_country_data{$code} = { |
1375
|
|
|
|
|
|
|
_format => [ |
1376
|
|
|
|
|
|
|
['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], |
1377
|
|
|
|
|
|
|
['Postcode', 6, qr/^\d{6}$/] |
1378
|
|
|
|
|
|
|
], |
1379
|
|
|
|
|
|
|
_s2d_map => [ |
1380
|
|
|
|
|
|
|
{StoredRownum => 0, DisplayName => 'Addr1'}, |
1381
|
|
|
|
|
|
|
{StoredRownum => 1, DisplayName => 'Addr2'}, |
1382
|
|
|
|
|
|
|
{StoredRownum => 2, DisplayName => 'Addr3'}, |
1383
|
|
|
|
|
|
|
{StoredRownum => 3, DisplayName => 'Addr4'}, |
1384
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'Postcode', StoredColnum => 10} |
1385
|
|
|
|
|
|
|
], |
1386
|
|
|
|
|
|
|
_d2s_map => [ |
1387
|
|
|
|
|
|
|
{StoredTemplate => '${Addr1}', StoredRownum => 0}, |
1388
|
|
|
|
|
|
|
{StoredTemplate => '${Addr2}', StoredRownum => 1}, |
1389
|
|
|
|
|
|
|
{StoredTemplate => '${Addr3}', StoredRownum => 2}, |
1390
|
|
|
|
|
|
|
{StoredTemplate => '${Addr4}', StoredRownum => 3}, |
1391
|
|
|
|
|
|
|
{StoredTemplate => 'SINGAPORE ${Postcode}', StoredRownum => -1} |
1392
|
|
|
|
|
|
|
] |
1393
|
|
|
|
|
|
|
}; |
1394
|
|
|
|
|
|
|
} |
1395
|
|
|
|
|
|
|
|
1396
|
|
|
|
|
|
|
# District name, postcode (no city?): Bangladesh, Brunei Darussalam, Thailand |
1397
|
|
|
|
|
|
|
# XXX Bangladesh needs preprocessing of district names? |
1398
|
|
|
|
|
|
|
# XXX Brunei Darussalam is missing districts in Locale::SubCountry? |
1399
|
|
|
|
|
|
|
# XXX Thailand may use 9-digit postcodes in some cases. |
1400
|
|
|
|
|
|
|
foreach my $spec |
1401
|
|
|
|
|
|
|
((['BD', 4, qr/^\d{4}$/, qr/^(.+)\s*-\s*(\d{4})$/, ' - '], |
1402
|
|
|
|
|
|
|
['BN', 4, qr/^[bBkKtTpP]\w\d{4}$/, |
1403
|
|
|
|
|
|
|
qr/^(.+)\s+([bBkKtTpP]\w\d{4})$/, ' - '], |
1404
|
|
|
|
|
|
|
['TH', 5, qr/^\d{5}$/, qr/^(.+)\s+(\d{5})$/, ' '])) { |
1405
|
|
|
|
|
|
|
my ($code, $pc_len, $pc_re, $dp_re, $sep) = @$spec; |
1406
|
|
|
|
|
|
|
if (exists($per_country_data{$code})) { |
1407
|
|
|
|
|
|
|
die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n"; |
1408
|
|
|
|
|
|
|
} |
1409
|
|
|
|
|
|
|
my $subctry = Locale::SubCountry->new($code); |
1410
|
|
|
|
|
|
|
$per_country_data{$code} = { |
1411
|
|
|
|
|
|
|
_format => [ |
1412
|
|
|
|
|
|
|
['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], 2, |
1413
|
|
|
|
|
|
|
['District', { map { $_ => $_ } $subctry->all_full_names() } ], |
1414
|
|
|
|
|
|
|
['Postcode', $pc_len, $pc_re] |
1415
|
|
|
|
|
|
|
], |
1416
|
|
|
|
|
|
|
_s2d_map => [ |
1417
|
|
|
|
|
|
|
{StoredRownum => 0, DisplayName => 'Addr1'}, |
1418
|
|
|
|
|
|
|
{StoredRownum => 1, DisplayName => 'Addr2'}, |
1419
|
|
|
|
|
|
|
{StoredRownum => 2, DisplayName => 'Addr3'}, |
1420
|
|
|
|
|
|
|
{StoredRownum => 3, DisplayName => 'Addr4'}, |
1421
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'District', |
1422
|
|
|
|
|
|
|
StoredRegexnum => 0, StoredFieldnum => 0}, |
1423
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'Postcode', |
1424
|
|
|
|
|
|
|
StoredRegexnum => 0, StoredFieldnum => 1} |
1425
|
|
|
|
|
|
|
], |
1426
|
|
|
|
|
|
|
_d2s_map => [ |
1427
|
|
|
|
|
|
|
{StoredTemplate => '${Addr1}', StoredRownum => 0}, |
1428
|
|
|
|
|
|
|
{StoredTemplate => '${Addr2}', StoredRownum => 1}, |
1429
|
|
|
|
|
|
|
{StoredTemplate => '${Addr3}', StoredRownum => 2}, |
1430
|
|
|
|
|
|
|
{StoredTemplate => '${Addr4}', StoredRownum => 3}, |
1431
|
|
|
|
|
|
|
{StoredTemplate => "\$\{District\}$sep\$\{Postcode\}", |
1432
|
|
|
|
|
|
|
StoredRownum => -1} |
1433
|
|
|
|
|
|
|
], |
1434
|
|
|
|
|
|
|
_regexes => [ $dp_re ] |
1435
|
|
|
|
|
|
|
}; |
1436
|
|
|
|
|
|
|
} |
1437
|
|
|
|
|
|
|
|
1438
|
|
|
|
|
|
|
# City, no postcode or district: Barbados, Belize, Benin, Bolivia, Botswana, |
1439
|
|
|
|
|
|
|
# Burkina Faso, Burundi, Cameroon, Cayman Islands, Central African Republic, |
1440
|
|
|
|
|
|
|
# Chad, Comoros, Congo (Brazzaville), Congo (Kinshasa), Cote d'Ivoire, |
1441
|
|
|
|
|
|
|
# Democratic People's Republic of Korea (aka North Korea), Djibouti, Dominica, |
1442
|
|
|
|
|
|
|
# Equatorial Guinea, Eritrea, Fiji, Gambia, Ghana, Guyana, Hong Kong, Jamaica, |
1443
|
|
|
|
|
|
|
# Libya, Macao, Malawi, Mali, Mauritania, Mauritius, Namibia, Nigeria, Peru, |
1444
|
|
|
|
|
|
|
# Qatar, Rwanda, Saint Kitts and Nevis, Saint Lucia, Saint Vincent and the |
1445
|
|
|
|
|
|
|
# Grenadines, São Tomé and Principe, Sierra Leone, Suriname, Syrian Arab |
1446
|
|
|
|
|
|
|
# Republic, United Republic of Tanzania, Timor Leste, Togo, Tonga, Trinidad and |
1447
|
|
|
|
|
|
|
# Tobago, Tuvalu, Uganda, (Western) Samoa, Yemen, Zimbabwe |
1448
|
|
|
|
|
|
|
# XXX Not sure about Belize - it could be it has a district name, but if so, |
1449
|
|
|
|
|
|
|
# the districts Locale::SubCountry knows about aren't the right ones. |
1450
|
|
|
|
|
|
|
# XXX Bolivia could use district instead of city. |
1451
|
|
|
|
|
|
|
# XXX Not sure about Botswana - it could be it has a district name, but if so, |
1452
|
|
|
|
|
|
|
# the districts Locale::SubCountry knows about aren't the right ones. |
1453
|
|
|
|
|
|
|
# XXX Burkina Faso has 2 digits right of the city that may be (part of) a |
1454
|
|
|
|
|
|
|
# postcode. Also, all addresses are PO boxes and need the postcode-ish number |
1455
|
|
|
|
|
|
|
# prefixed? |
1456
|
|
|
|
|
|
|
# XXX Burundi could use district instead of city. |
1457
|
|
|
|
|
|
|
# XXX Central African Republic could use district instead of city. |
1458
|
|
|
|
|
|
|
# XXX Congo (Kinshasa) has a number after the city (at least in some cases), |
1459
|
|
|
|
|
|
|
# but I don't know whether that's a postcode. Until I do, leave it here. |
1460
|
|
|
|
|
|
|
# XXX Cote d'Ivoire actually has a 2-3 digit postcode duplicated on both ends |
1461
|
|
|
|
|
|
|
# of the "City" line, with a PO box just left of the city. Pretend it's all one |
1462
|
|
|
|
|
|
|
# big "City" field. May be better to leave it completely unstructured instead. |
1463
|
|
|
|
|
|
|
# XXX Djibouti could use district instead of city. |
1464
|
|
|
|
|
|
|
# XXX Gambia could use district instead of city. |
1465
|
|
|
|
|
|
|
# XXX Hong Kong postal info may be out of date, and I'm not sure about how |
1466
|
|
|
|
|
|
|
# Kowloon or New Territories fit in. |
1467
|
|
|
|
|
|
|
# XXX Jamaica needs a 1-2 digit suffix for Kingston. |
1468
|
|
|
|
|
|
|
# XXX Malawi needs a 1-digit suffix for Lilongwe. |
1469
|
|
|
|
|
|
|
# XXX Mauritania could use district instead of city. |
1470
|
|
|
|
|
|
|
# XXX Mauritius has an optional experimental postcode for 1 city only. Treat it |
1471
|
|
|
|
|
|
|
# as an unstructured address line for now. |
1472
|
|
|
|
|
|
|
# XXX Peru wants route numbers for some cities, and may introduce a postcode |
1473
|
|
|
|
|
|
|
# system eventually. |
1474
|
|
|
|
|
|
|
# XXX Qatar city may be optional in some cases. |
1475
|
|
|
|
|
|
|
# XXX Rwanda could use district instead of city. |
1476
|
|
|
|
|
|
|
# XXX Saint Kitts and Nevis could use district, island instead of city, but if |
1477
|
|
|
|
|
|
|
# so, Locale::SubCountry is missing both district and island names. |
1478
|
|
|
|
|
|
|
# XXX Saint Vincent and the Grenadines may want an extra address ligne below |
1479
|
|
|
|
|
|
|
# country. Ignore. |
1480
|
|
|
|
|
|
|
# XXX São Tomé and Principe may need district and or island specified for some |
1481
|
|
|
|
|
|
|
# addresses. |
1482
|
|
|
|
|
|
|
# XXX Suriname could use district instead of city. |
1483
|
|
|
|
|
|
|
# XXX Syrian Arab Republic could use district instead of city, and is currently |
1484
|
|
|
|
|
|
|
# developping a postcode system. |
1485
|
|
|
|
|
|
|
# XXX United Republic of Tanzania could use district instead of city. |
1486
|
|
|
|
|
|
|
# XXX Yemen could use district instead of city. |
1487
|
|
|
|
|
|
|
# XXX Zimbabwe could use district instead of city. |
1488
|
|
|
|
|
|
|
foreach my $code (qw(BB BZ BJ BO BW BF BI CM KY CF TD KM CG CI KP DJ DM GQ ER |
1489
|
|
|
|
|
|
|
FJ GM GH GY HK JM LY MO MW ML MR NA PE QA RW KN LC VC ST |
1490
|
|
|
|
|
|
|
SL SR SY TZ TL TG TT TV UG WS YE ZW)) { |
1491
|
|
|
|
|
|
|
if (exists($per_country_data{$code})) { |
1492
|
|
|
|
|
|
|
die __PACKAGE__ . ": Attempted to initialize country code $code twice.\n"; |
1493
|
|
|
|
|
|
|
} |
1494
|
|
|
|
|
|
|
$per_country_data{$code} = { |
1495
|
|
|
|
|
|
|
_format => [ |
1496
|
|
|
|
|
|
|
['Addr1', 40], ['Addr2', 40], ['Addr3', 40], ['Addr4', 40], ['City', 40] |
1497
|
|
|
|
|
|
|
], |
1498
|
|
|
|
|
|
|
_s2d_map => [ |
1499
|
|
|
|
|
|
|
{StoredRownum => 0, DisplayName => 'Addr1'}, |
1500
|
|
|
|
|
|
|
{StoredRownum => 1, DisplayName => 'Addr2'}, |
1501
|
|
|
|
|
|
|
{StoredRownum => 2, DisplayName => 'Addr3'}, |
1502
|
|
|
|
|
|
|
{StoredRownum => 3, DisplayName => 'Addr4'}, |
1503
|
|
|
|
|
|
|
{StoredRownum => -1, DisplayName => 'City'}, |
1504
|
|
|
|
|
|
|
], |
1505
|
|
|
|
|
|
|
_d2s_map => [ |
1506
|
|
|
|
|
|
|
{StoredTemplate => '${Addr1}', StoredRownum => 0}, |
1507
|
|
|
|
|
|
|
{StoredTemplate => '${Addr2}', StoredRownum => 1}, |
1508
|
|
|
|
|
|
|
{StoredTemplate => '${Addr3}', StoredRownum => 2}, |
1509
|
|
|
|
|
|
|
{StoredTemplate => '${Addr4}', StoredRownum => 3}, |
1510
|
|
|
|
|
|
|
{StoredTemplate => '${City}', StoredRownum => -1} |
1511
|
|
|
|
|
|
|
] |
1512
|
|
|
|
|
|
|
}; |
1513
|
|
|
|
|
|
|
} |
1514
|
|
|
|
|
|
|
|
1515
|
|
|
|
|
|
|
1; |
1516
|
|
|
|
|
|
|
|
1517
|
|
|
|
|
|
|
=head1 BUGS |
1518
|
|
|
|
|
|
|
|
1519
|
|
|
|
|
|
|
Only 2-letter country codes are supported. |
1520
|
|
|
|
|
|
|
|
1521
|
|
|
|
|
|
|
A knob to carp on some errors would be nice. |
1522
|
|
|
|
|
|
|
|
1523
|
|
|
|
|
|
|
Objects returned by the new method can be actually blessed into a |
1524
|
|
|
|
|
|
|
country-specific subclass. This makes it impossible to have other |
1525
|
|
|
|
|
|
|
derived classes than the country-specific ones. |
1526
|
|
|
|
|
|
|
|
1527
|
|
|
|
|
|
|
40 is used as the suggested length for all text fields. This is probably too |
1528
|
|
|
|
|
|
|
long for some and too short for others. |
1529
|
|
|
|
|
|
|
|
1530
|
|
|
|
|
|
|
Support for most countries ranges from non-existent to sketchy. |
1531
|
|
|
|
|
|
|
|
1532
|
|
|
|
|
|
|
The method name "display" is arguably a poor choice. |
1533
|
|
|
|
|
|
|
|
1534
|
|
|
|
|
|
|
Some messages should go through a translation table. |
1535
|
|
|
|
|
|
|
|
1536
|
|
|
|
|
|
|
Data validation should probably be a method of its own. |
1537
|
|
|
|
|
|
|
|
1538
|
|
|
|
|
|
|
This module doesn't yet deal well with countries that want the recipient name |
1539
|
|
|
|
|
|
|
in another position than 1st line, or the country name in another position than |
1540
|
|
|
|
|
|
|
last line. Examples of such countries are: Ukraine (wants country, |
1541
|
|
|
|
|
|
|
city+postcode, street address, recipient name from top down instead of the more |
1542
|
|
|
|
|
|
|
widespread bottom up), Turkmenistan (wants city+postcode, country, recipient |
1543
|
|
|
|
|
|
|
name, street address, from top down), Grenada (wants a supranational line - |
1544
|
|
|
|
|
|
|
West Indies - below the country name). The interface to do that exists, but is |
1545
|
|
|
|
|
|
|
do-nothing until I figure out how to deal with address formats for use between |
1546
|
|
|
|
|
|
|
countries with conflicting requirements. |
1547
|
|
|
|
|
|
|
|
1548
|
|
|
|
|
|
|
This module doesn't deal well with countries where the address format depends |
1549
|
|
|
|
|
|
|
on the script used, such as Saudi Arabia. |
1550
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
This module doesn't yet support entities with their own ISO 3166-1 code that |
1552
|
|
|
|
|
|
|
use another country's address format, including the country name. |
1553
|
|
|
|
|
|
|
|
1554
|
|
|
|
|
|
|
This module assumes "no locale", and blissfully mixes character classes that |
1555
|
|
|
|
|
|
|
could conceivably match in the locale with classes that have to match according |
1556
|
|
|
|
|
|
|
to the Roman alphabet (eg, US ZIP codes and Canadian postal codes). This is |
1557
|
|
|
|
|
|
|
probably nearly impossible to fix, as the relevant locale isn't well-defined |
1558
|
|
|
|
|
|
|
anyway. (The locale for the machine running the application? The locale for the |
1559
|
|
|
|
|
|
|
user? Or the locale for the country the address is in?) |
1560
|
|
|
|
|
|
|
|
1561
|
|
|
|
|
|
|
This module assumes that the privileged order for entering address components |
1562
|
|
|
|
|
|
|
is top-down, left-to right, according to the standard or most common address |
1563
|
|
|
|
|
|
|
format. This may not be true of countries where the dominant language is |
1564
|
|
|
|
|
|
|
written right-to-left. |
1565
|
|
|
|
|
|
|
|
1566
|
|
|
|
|
|
|
This module doesn't use the PATDL |
1567
|
|
|
|
|
|
|
(F) in the address parsing |
1568
|
|
|
|
|
|
|
rules. |
1569
|
|
|
|
|
|
|
|
1570
|
|
|
|
|
|
|
=head1 HISTORY |
1571
|
|
|
|
|
|
|
|
1572
|
|
|
|
|
|
|
=head1 SEE ALSO |
1573
|
|
|
|
|
|
|
|
1574
|
|
|
|
|
|
|
L |
1575
|
|
|
|
|
|
|
|
1576
|
|
|
|
|
|
|
L |
1577
|
|
|
|
|
|
|
|
1578
|
|
|
|
|
|
|
F |
1579
|
|
|
|
|
|
|
|
1580
|
|
|
|
|
|
|
F |
1581
|
|
|
|
|
|
|
|
1582
|
|
|
|
|
|
|
F |
1583
|
|
|
|
|
|
|
|
1584
|
|
|
|
|
|
|
F |
1585
|
|
|
|
|
|
|
|
1586
|
|
|
|
|
|
|
F (previous version of the first |
1587
|
|
|
|
|
|
|
URL, incorrect in spots, and to be used only if no other info is available) |
1588
|
|
|
|
|
|
|
|
1589
|
|
|
|
|
|
|
F |
1590
|
|
|
|
|
|
|
|
1591
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
1592
|
|
|
|
|
|
|
|
1593
|
|
|
|
|
|
|
Ailbhe, DamienPS, LeiaCat, Renée, and Martin DeMello clarified, corrected, or |
1594
|
|
|
|
|
|
|
explained standards or usage for specific countries. See acknowledgements in |
1595
|
|
|
|
|
|
|
comments throughout the source code. |
1596
|
|
|
|
|
|
|
|
1597
|
|
|
|
|
|
|
Bill Holbrook draws (and holds the copyright to) comic strip Kevin and Kell, |
1598
|
|
|
|
|
|
|
from which I got the names used in the description for C<$parser-Elabel()>. |
1599
|
|
|
|
|
|
|
|
1600
|
|
|
|
|
|
|
=head1 AUTHOR AND LICENSE |
1601
|
|
|
|
|
|
|
|
1602
|
|
|
|
|
|
|
Copyright (c) 2004, Michel Lavondès. All rights reserved. |
1603
|
|
|
|
|
|
|
|
1604
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without |
1605
|
|
|
|
|
|
|
modification, are permitted provided that the following conditions are met: |
1606
|
|
|
|
|
|
|
|
1607
|
|
|
|
|
|
|
=over 4 |
1608
|
|
|
|
|
|
|
|
1609
|
|
|
|
|
|
|
=item * |
1610
|
|
|
|
|
|
|
|
1611
|
|
|
|
|
|
|
Redistributions of source code must retain the above copyright notice, this |
1612
|
|
|
|
|
|
|
list of conditions and the following disclaimer. |
1613
|
|
|
|
|
|
|
|
1614
|
|
|
|
|
|
|
=item * |
1615
|
|
|
|
|
|
|
|
1616
|
|
|
|
|
|
|
Redistributions in binary form must reproduce the above copyright notice, this |
1617
|
|
|
|
|
|
|
list of conditions and the following disclaimer in the documentation and/or |
1618
|
|
|
|
|
|
|
other materials provided with the distribution. |
1619
|
|
|
|
|
|
|
|
1620
|
|
|
|
|
|
|
=item * |
1621
|
|
|
|
|
|
|
|
1622
|
|
|
|
|
|
|
Neither the name of the Copyright holder nor the names of any contributors may |
1623
|
|
|
|
|
|
|
be used to endorse or promote products derived from this software without |
1624
|
|
|
|
|
|
|
specific prior written permission. |
1625
|
|
|
|
|
|
|
|
1626
|
|
|
|
|
|
|
=back |
1627
|
|
|
|
|
|
|
|
1628
|
|
|
|
|
|
|
This software is provided by the copyright holder and contributors "as is" and |
1629
|
|
|
|
|
|
|
any express or implied warranties, including, but not limited to, the implied |
1630
|
|
|
|
|
|
|
warranties of merchantability and fitness for a particular purpose are |
1631
|
|
|
|
|
|
|
disclaimed. In no event shall the copyright holder or contributors be liable |
1632
|
|
|
|
|
|
|
for any direct, indirect, incidental, special, exemplary, or consequential |
1633
|
|
|
|
|
|
|
damages (including, but not limited to, procurement of substiture goods or |
1634
|
|
|
|
|
|
|
services; loss of use, data, or profits; or business interruption) however |
1635
|
|
|
|
|
|
|
caused and on any theory of liability, whether in contract, strict liability, |
1636
|
|
|
|
|
|
|
or tort (including negligence or otherwise) arising in any way out of the use |
1637
|
|
|
|
|
|
|
of this software, even if advised of the possibility of such damage. |