| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Geo::StreetAddress::US; |
|
2
|
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
985
|
use 5.008_001; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
37
|
|
|
4
|
1
|
|
|
1
|
|
6
|
use strict; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
33
|
|
|
5
|
1
|
|
|
1
|
|
15
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
58
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
our $VERSION = '1.04'; |
|
8
|
|
|
|
|
|
|
|
|
9
|
1
|
|
|
1
|
|
6
|
use base 'Class::Data::Inheritable'; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
791
|
|
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 NAME |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
Geo::StreetAddress::US - Perl extension for parsing US street addresses |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
use Geo::StreetAddress::US; |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
$hashref = Geo::StreetAddress::US->parse_location( |
|
20
|
|
|
|
|
|
|
"1005 Gravenstein Hwy N, Sebastopol CA 95472" ); |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
$hashref = Geo::StreetAddress::US->parse_location( |
|
23
|
|
|
|
|
|
|
"Hollywood & Vine, Los Angeles, CA" ); |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
$hashref = Geo::StreetAddress::US->parse_address( |
|
26
|
|
|
|
|
|
|
"1600 Pennsylvania Ave, Washington, DC" ); |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
$hashref = Geo::StreetAddress::US->parse_address( |
|
29
|
|
|
|
|
|
|
"1600 Pennsylvania Ave, Washington, DC" ); |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
$hashref = Geo::StreetAddress::US->parse_informal_address( |
|
32
|
|
|
|
|
|
|
"Lot 3 Pennsylvania Ave" ); |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
$hashref = Geo::StreetAddress::US->parse_intersection( |
|
35
|
|
|
|
|
|
|
"Mission Street at Valencia Street, San Francisco, CA" ); |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
$hashref = Geo::StreetAddress::US->normalize_address( \%spec ); |
|
38
|
|
|
|
|
|
|
# the parse_* methods call this automatically... |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
Geo::StreetAddress::US is a regex-based street address and street intersection |
|
43
|
|
|
|
|
|
|
parser for the United States. Its basic goal is to be as forgiving as possible |
|
44
|
|
|
|
|
|
|
when parsing user-provided address strings. Geo::StreetAddress::US knows about |
|
45
|
|
|
|
|
|
|
directional prefixes and suffixes, fractional building numbers, building units, |
|
46
|
|
|
|
|
|
|
grid-based addresses (such as those used in parts of Utah), 5 and 9 digit ZIP |
|
47
|
|
|
|
|
|
|
codes, and all of the official USPS abbreviations for street types, state |
|
48
|
|
|
|
|
|
|
names and secondary unit designators. |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head1 RETURN VALUES |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
Most Geo::StreetAddress::US methods return a reference to a hash containing |
|
53
|
|
|
|
|
|
|
address or intersection information. This |
|
54
|
|
|
|
|
|
|
"address specifier" hash may contain any of the following fields for a |
|
55
|
|
|
|
|
|
|
given address. If a given field is not present in the address, the |
|
56
|
|
|
|
|
|
|
corresponding key will be set to C in the hash. |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
Future versions of this module may add extra fields. |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=head1 ADDRESS SPECIFIER |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=head2 number |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
House or street number. |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=head2 prefix |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
Directional prefix for the street, such as N, NE, E, etc. A given prefix |
|
69
|
|
|
|
|
|
|
should be one to two characters long. |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head2 street |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
Name of the street, without directional or type qualifiers. |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=head2 type |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
Abbreviated street type, e.g. Rd, St, Ave, etc. See the USPS official |
|
78
|
|
|
|
|
|
|
type abbreviations at L |
|
79
|
|
|
|
|
|
|
for a list of abbreviations used. |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=head2 suffix |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
Directional suffix for the street, as above. |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=head2 city |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
Name of the city, town, or other locale that the address is situated in. |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=head2 state |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
The state which the address is situated in, given as its two-letter |
|
92
|
|
|
|
|
|
|
postal abbreviation. for a list of abbreviations used. |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=head2 zip |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
Five digit ZIP postal code for the address, including leading zero, if needed. |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=head2 sec_unit_type |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
If the address includes a Secondary Unit Designator, such as a room, suite or |
|
101
|
|
|
|
|
|
|
appartment, the C field will indicate the type of unit. |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=head2 sec_unit_num |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
If the address includes a Secondary Unit Designator, such as a room, suite or appartment, |
|
106
|
|
|
|
|
|
|
the C field will indicate the number of the unit (which may not be numeric). |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=head1 INTERSECTION SPECIFIER |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head2 prefix1, prefix2 |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
Directional prefixes for the streets in question. |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=head2 street1, street2 |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
Names of the streets in question. |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=head2 type1, type2 |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
Street types for the streets in question. |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=head2 suffix1, suffix2 |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
Directional suffixes for the streets in question. |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=head2 city |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
City or locale containing the intersection, as above. |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head2 state |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
State abbreviation, as above. |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=head2 zip |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Five digit ZIP code, as above. |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=cut |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=head1 GLOBAL VARIABLES |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
Geo::StreetAddress::US contains a number of global variables which it |
|
143
|
|
|
|
|
|
|
uses to recognize different bits of US street addresses. Although you |
|
144
|
|
|
|
|
|
|
will probably not need them, they are documented here for completeness's |
|
145
|
|
|
|
|
|
|
sake. |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=cut |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
=head2 %Directional |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
Maps directional names (north, northeast, etc.) to abbreviations (N, NE, etc.). |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=head2 %Direction_Code |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Maps directional abbreviations to directional names. |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=cut |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
our %Directional = ( |
|
160
|
|
|
|
|
|
|
north => "N", |
|
161
|
|
|
|
|
|
|
northeast => "NE", |
|
162
|
|
|
|
|
|
|
east => "E", |
|
163
|
|
|
|
|
|
|
southeast => "SE", |
|
164
|
|
|
|
|
|
|
south => "S", |
|
165
|
|
|
|
|
|
|
southwest => "SW", |
|
166
|
|
|
|
|
|
|
west => "W", |
|
167
|
|
|
|
|
|
|
northwest => "NW", |
|
168
|
|
|
|
|
|
|
); |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
our %Direction_Code; # setup in init(); |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
=head2 %Street_Type |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
Maps lowercased USPS standard street types to their canonical postal |
|
175
|
|
|
|
|
|
|
abbreviations as found in TIGER/Line. See eg/get_street_abbrev.pl in |
|
176
|
|
|
|
|
|
|
the distrbution for how this map was generated. |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=cut |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
our %Street_Type = ( |
|
181
|
|
|
|
|
|
|
allee => "aly", |
|
182
|
|
|
|
|
|
|
alley => "aly", |
|
183
|
|
|
|
|
|
|
ally => "aly", |
|
184
|
|
|
|
|
|
|
anex => "anx", |
|
185
|
|
|
|
|
|
|
annex => "anx", |
|
186
|
|
|
|
|
|
|
annx => "anx", |
|
187
|
|
|
|
|
|
|
arcade => "arc", |
|
188
|
|
|
|
|
|
|
av => "ave", |
|
189
|
|
|
|
|
|
|
aven => "ave", |
|
190
|
|
|
|
|
|
|
avenu => "ave", |
|
191
|
|
|
|
|
|
|
avenue => "ave", |
|
192
|
|
|
|
|
|
|
avn => "ave", |
|
193
|
|
|
|
|
|
|
avnue => "ave", |
|
194
|
|
|
|
|
|
|
bayoo => "byu", |
|
195
|
|
|
|
|
|
|
bayou => "byu", |
|
196
|
|
|
|
|
|
|
beach => "bch", |
|
197
|
|
|
|
|
|
|
bend => "bnd", |
|
198
|
|
|
|
|
|
|
bluf => "blf", |
|
199
|
|
|
|
|
|
|
bluff => "blf", |
|
200
|
|
|
|
|
|
|
bluffs => "blfs", |
|
201
|
|
|
|
|
|
|
bot => "btm", |
|
202
|
|
|
|
|
|
|
bottm => "btm", |
|
203
|
|
|
|
|
|
|
bottom => "btm", |
|
204
|
|
|
|
|
|
|
boul => "blvd", |
|
205
|
|
|
|
|
|
|
boulevard => "blvd", |
|
206
|
|
|
|
|
|
|
boulv => "blvd", |
|
207
|
|
|
|
|
|
|
branch => "br", |
|
208
|
|
|
|
|
|
|
brdge => "brg", |
|
209
|
|
|
|
|
|
|
bridge => "brg", |
|
210
|
|
|
|
|
|
|
brnch => "br", |
|
211
|
|
|
|
|
|
|
brook => "brk", |
|
212
|
|
|
|
|
|
|
brooks => "brks", |
|
213
|
|
|
|
|
|
|
burg => "bg", |
|
214
|
|
|
|
|
|
|
burgs => "bgs", |
|
215
|
|
|
|
|
|
|
bypa => "byp", |
|
216
|
|
|
|
|
|
|
bypas => "byp", |
|
217
|
|
|
|
|
|
|
bypass => "byp", |
|
218
|
|
|
|
|
|
|
byps => "byp", |
|
219
|
|
|
|
|
|
|
camp => "cp", |
|
220
|
|
|
|
|
|
|
canyn => "cyn", |
|
221
|
|
|
|
|
|
|
canyon => "cyn", |
|
222
|
|
|
|
|
|
|
cape => "cpe", |
|
223
|
|
|
|
|
|
|
causeway => "cswy", |
|
224
|
|
|
|
|
|
|
causway => "cswy", |
|
225
|
|
|
|
|
|
|
cen => "ctr", |
|
226
|
|
|
|
|
|
|
cent => "ctr", |
|
227
|
|
|
|
|
|
|
center => "ctr", |
|
228
|
|
|
|
|
|
|
centers => "ctrs", |
|
229
|
|
|
|
|
|
|
centr => "ctr", |
|
230
|
|
|
|
|
|
|
centre => "ctr", |
|
231
|
|
|
|
|
|
|
circ => "cir", |
|
232
|
|
|
|
|
|
|
circl => "cir", |
|
233
|
|
|
|
|
|
|
circle => "cir", |
|
234
|
|
|
|
|
|
|
circles => "cirs", |
|
235
|
|
|
|
|
|
|
ck => "crk", |
|
236
|
|
|
|
|
|
|
cliff => "clf", |
|
237
|
|
|
|
|
|
|
cliffs => "clfs", |
|
238
|
|
|
|
|
|
|
club => "clb", |
|
239
|
|
|
|
|
|
|
cmp => "cp", |
|
240
|
|
|
|
|
|
|
cnter => "ctr", |
|
241
|
|
|
|
|
|
|
cntr => "ctr", |
|
242
|
|
|
|
|
|
|
cnyn => "cyn", |
|
243
|
|
|
|
|
|
|
common => "cmn", |
|
244
|
|
|
|
|
|
|
corner => "cor", |
|
245
|
|
|
|
|
|
|
corners => "cors", |
|
246
|
|
|
|
|
|
|
course => "crse", |
|
247
|
|
|
|
|
|
|
court => "ct", |
|
248
|
|
|
|
|
|
|
courts => "cts", |
|
249
|
|
|
|
|
|
|
cove => "cv", |
|
250
|
|
|
|
|
|
|
coves => "cvs", |
|
251
|
|
|
|
|
|
|
cr => "crk", |
|
252
|
|
|
|
|
|
|
crcl => "cir", |
|
253
|
|
|
|
|
|
|
crcle => "cir", |
|
254
|
|
|
|
|
|
|
crecent => "cres", |
|
255
|
|
|
|
|
|
|
creek => "crk", |
|
256
|
|
|
|
|
|
|
crescent => "cres", |
|
257
|
|
|
|
|
|
|
cresent => "cres", |
|
258
|
|
|
|
|
|
|
crest => "crst", |
|
259
|
|
|
|
|
|
|
crossing => "xing", |
|
260
|
|
|
|
|
|
|
crossroad => "xrd", |
|
261
|
|
|
|
|
|
|
crscnt => "cres", |
|
262
|
|
|
|
|
|
|
crsent => "cres", |
|
263
|
|
|
|
|
|
|
crsnt => "cres", |
|
264
|
|
|
|
|
|
|
crssing => "xing", |
|
265
|
|
|
|
|
|
|
crssng => "xing", |
|
266
|
|
|
|
|
|
|
crt => "ct", |
|
267
|
|
|
|
|
|
|
curve => "curv", |
|
268
|
|
|
|
|
|
|
dale => "dl", |
|
269
|
|
|
|
|
|
|
dam => "dm", |
|
270
|
|
|
|
|
|
|
div => "dv", |
|
271
|
|
|
|
|
|
|
divide => "dv", |
|
272
|
|
|
|
|
|
|
driv => "dr", |
|
273
|
|
|
|
|
|
|
drive => "dr", |
|
274
|
|
|
|
|
|
|
drives => "drs", |
|
275
|
|
|
|
|
|
|
drv => "dr", |
|
276
|
|
|
|
|
|
|
dvd => "dv", |
|
277
|
|
|
|
|
|
|
estate => "est", |
|
278
|
|
|
|
|
|
|
estates => "ests", |
|
279
|
|
|
|
|
|
|
exp => "expy", |
|
280
|
|
|
|
|
|
|
expr => "expy", |
|
281
|
|
|
|
|
|
|
express => "expy", |
|
282
|
|
|
|
|
|
|
expressway => "expy", |
|
283
|
|
|
|
|
|
|
expw => "expy", |
|
284
|
|
|
|
|
|
|
extension => "ext", |
|
285
|
|
|
|
|
|
|
extensions => "exts", |
|
286
|
|
|
|
|
|
|
extn => "ext", |
|
287
|
|
|
|
|
|
|
extnsn => "ext", |
|
288
|
|
|
|
|
|
|
falls => "fls", |
|
289
|
|
|
|
|
|
|
ferry => "fry", |
|
290
|
|
|
|
|
|
|
field => "fld", |
|
291
|
|
|
|
|
|
|
fields => "flds", |
|
292
|
|
|
|
|
|
|
flat => "flt", |
|
293
|
|
|
|
|
|
|
flats => "flts", |
|
294
|
|
|
|
|
|
|
ford => "frd", |
|
295
|
|
|
|
|
|
|
fords => "frds", |
|
296
|
|
|
|
|
|
|
forest => "frst", |
|
297
|
|
|
|
|
|
|
forests => "frst", |
|
298
|
|
|
|
|
|
|
forg => "frg", |
|
299
|
|
|
|
|
|
|
forge => "frg", |
|
300
|
|
|
|
|
|
|
forges => "frgs", |
|
301
|
|
|
|
|
|
|
fork => "frk", |
|
302
|
|
|
|
|
|
|
forks => "frks", |
|
303
|
|
|
|
|
|
|
fort => "ft", |
|
304
|
|
|
|
|
|
|
freeway => "fwy", |
|
305
|
|
|
|
|
|
|
freewy => "fwy", |
|
306
|
|
|
|
|
|
|
frry => "fry", |
|
307
|
|
|
|
|
|
|
frt => "ft", |
|
308
|
|
|
|
|
|
|
frway => "fwy", |
|
309
|
|
|
|
|
|
|
frwy => "fwy", |
|
310
|
|
|
|
|
|
|
garden => "gdn", |
|
311
|
|
|
|
|
|
|
gardens => "gdns", |
|
312
|
|
|
|
|
|
|
gardn => "gdn", |
|
313
|
|
|
|
|
|
|
gateway => "gtwy", |
|
314
|
|
|
|
|
|
|
gatewy => "gtwy", |
|
315
|
|
|
|
|
|
|
gatway => "gtwy", |
|
316
|
|
|
|
|
|
|
glen => "gln", |
|
317
|
|
|
|
|
|
|
glens => "glns", |
|
318
|
|
|
|
|
|
|
grden => "gdn", |
|
319
|
|
|
|
|
|
|
grdn => "gdn", |
|
320
|
|
|
|
|
|
|
grdns => "gdns", |
|
321
|
|
|
|
|
|
|
green => "grn", |
|
322
|
|
|
|
|
|
|
greens => "grns", |
|
323
|
|
|
|
|
|
|
grov => "grv", |
|
324
|
|
|
|
|
|
|
grove => "grv", |
|
325
|
|
|
|
|
|
|
groves => "grvs", |
|
326
|
|
|
|
|
|
|
gtway => "gtwy", |
|
327
|
|
|
|
|
|
|
harb => "hbr", |
|
328
|
|
|
|
|
|
|
harbor => "hbr", |
|
329
|
|
|
|
|
|
|
harbors => "hbrs", |
|
330
|
|
|
|
|
|
|
harbr => "hbr", |
|
331
|
|
|
|
|
|
|
haven => "hvn", |
|
332
|
|
|
|
|
|
|
havn => "hvn", |
|
333
|
|
|
|
|
|
|
height => "hts", |
|
334
|
|
|
|
|
|
|
heights => "hts", |
|
335
|
|
|
|
|
|
|
hgts => "hts", |
|
336
|
|
|
|
|
|
|
highway => "hwy", |
|
337
|
|
|
|
|
|
|
highwy => "hwy", |
|
338
|
|
|
|
|
|
|
hill => "hl", |
|
339
|
|
|
|
|
|
|
hills => "hls", |
|
340
|
|
|
|
|
|
|
hiway => "hwy", |
|
341
|
|
|
|
|
|
|
hiwy => "hwy", |
|
342
|
|
|
|
|
|
|
hllw => "holw", |
|
343
|
|
|
|
|
|
|
hollow => "holw", |
|
344
|
|
|
|
|
|
|
hollows => "holw", |
|
345
|
|
|
|
|
|
|
holws => "holw", |
|
346
|
|
|
|
|
|
|
hrbor => "hbr", |
|
347
|
|
|
|
|
|
|
ht => "hts", |
|
348
|
|
|
|
|
|
|
hway => "hwy", |
|
349
|
|
|
|
|
|
|
inlet => "inlt", |
|
350
|
|
|
|
|
|
|
island => "is", |
|
351
|
|
|
|
|
|
|
islands => "iss", |
|
352
|
|
|
|
|
|
|
isles => "isle", |
|
353
|
|
|
|
|
|
|
islnd => "is", |
|
354
|
|
|
|
|
|
|
islnds => "iss", |
|
355
|
|
|
|
|
|
|
jction => "jct", |
|
356
|
|
|
|
|
|
|
jctn => "jct", |
|
357
|
|
|
|
|
|
|
jctns => "jcts", |
|
358
|
|
|
|
|
|
|
junction => "jct", |
|
359
|
|
|
|
|
|
|
junctions => "jcts", |
|
360
|
|
|
|
|
|
|
junctn => "jct", |
|
361
|
|
|
|
|
|
|
juncton => "jct", |
|
362
|
|
|
|
|
|
|
key => "ky", |
|
363
|
|
|
|
|
|
|
keys => "kys", |
|
364
|
|
|
|
|
|
|
knol => "knl", |
|
365
|
|
|
|
|
|
|
knoll => "knl", |
|
366
|
|
|
|
|
|
|
knolls => "knls", |
|
367
|
|
|
|
|
|
|
la => "ln", |
|
368
|
|
|
|
|
|
|
lake => "lk", |
|
369
|
|
|
|
|
|
|
lakes => "lks", |
|
370
|
|
|
|
|
|
|
landing => "lndg", |
|
371
|
|
|
|
|
|
|
lane => "ln", |
|
372
|
|
|
|
|
|
|
lanes => "ln", |
|
373
|
|
|
|
|
|
|
ldge => "ldg", |
|
374
|
|
|
|
|
|
|
light => "lgt", |
|
375
|
|
|
|
|
|
|
lights => "lgts", |
|
376
|
|
|
|
|
|
|
lndng => "lndg", |
|
377
|
|
|
|
|
|
|
loaf => "lf", |
|
378
|
|
|
|
|
|
|
lock => "lck", |
|
379
|
|
|
|
|
|
|
locks => "lcks", |
|
380
|
|
|
|
|
|
|
lodg => "ldg", |
|
381
|
|
|
|
|
|
|
lodge => "ldg", |
|
382
|
|
|
|
|
|
|
loops => "loop", |
|
383
|
|
|
|
|
|
|
manor => "mnr", |
|
384
|
|
|
|
|
|
|
manors => "mnrs", |
|
385
|
|
|
|
|
|
|
meadow => "mdw", |
|
386
|
|
|
|
|
|
|
meadows => "mdws", |
|
387
|
|
|
|
|
|
|
medows => "mdws", |
|
388
|
|
|
|
|
|
|
mill => "ml", |
|
389
|
|
|
|
|
|
|
mills => "mls", |
|
390
|
|
|
|
|
|
|
mission => "msn", |
|
391
|
|
|
|
|
|
|
missn => "msn", |
|
392
|
|
|
|
|
|
|
mnt => "mt", |
|
393
|
|
|
|
|
|
|
mntain => "mtn", |
|
394
|
|
|
|
|
|
|
mntn => "mtn", |
|
395
|
|
|
|
|
|
|
mntns => "mtns", |
|
396
|
|
|
|
|
|
|
motorway => "mtwy", |
|
397
|
|
|
|
|
|
|
mount => "mt", |
|
398
|
|
|
|
|
|
|
mountain => "mtn", |
|
399
|
|
|
|
|
|
|
mountains => "mtns", |
|
400
|
|
|
|
|
|
|
mountin => "mtn", |
|
401
|
|
|
|
|
|
|
mssn => "msn", |
|
402
|
|
|
|
|
|
|
mtin => "mtn", |
|
403
|
|
|
|
|
|
|
neck => "nck", |
|
404
|
|
|
|
|
|
|
orchard => "orch", |
|
405
|
|
|
|
|
|
|
orchrd => "orch", |
|
406
|
|
|
|
|
|
|
overpass => "opas", |
|
407
|
|
|
|
|
|
|
ovl => "oval", |
|
408
|
|
|
|
|
|
|
parks => "park", |
|
409
|
|
|
|
|
|
|
parkway => "pkwy", |
|
410
|
|
|
|
|
|
|
parkways => "pkwy", |
|
411
|
|
|
|
|
|
|
parkwy => "pkwy", |
|
412
|
|
|
|
|
|
|
passage => "psge", |
|
413
|
|
|
|
|
|
|
paths => "path", |
|
414
|
|
|
|
|
|
|
pikes => "pike", |
|
415
|
|
|
|
|
|
|
pine => "pne", |
|
416
|
|
|
|
|
|
|
pines => "pnes", |
|
417
|
|
|
|
|
|
|
pk => "park", |
|
418
|
|
|
|
|
|
|
pkway => "pkwy", |
|
419
|
|
|
|
|
|
|
pkwys => "pkwy", |
|
420
|
|
|
|
|
|
|
pky => "pkwy", |
|
421
|
|
|
|
|
|
|
place => "pl", |
|
422
|
|
|
|
|
|
|
plain => "pln", |
|
423
|
|
|
|
|
|
|
plaines => "plns", |
|
424
|
|
|
|
|
|
|
plains => "plns", |
|
425
|
|
|
|
|
|
|
plaza => "plz", |
|
426
|
|
|
|
|
|
|
plza => "plz", |
|
427
|
|
|
|
|
|
|
point => "pt", |
|
428
|
|
|
|
|
|
|
points => "pts", |
|
429
|
|
|
|
|
|
|
port => "prt", |
|
430
|
|
|
|
|
|
|
ports => "prts", |
|
431
|
|
|
|
|
|
|
prairie => "pr", |
|
432
|
|
|
|
|
|
|
prarie => "pr", |
|
433
|
|
|
|
|
|
|
prk => "park", |
|
434
|
|
|
|
|
|
|
prr => "pr", |
|
435
|
|
|
|
|
|
|
rad => "radl", |
|
436
|
|
|
|
|
|
|
radial => "radl", |
|
437
|
|
|
|
|
|
|
radiel => "radl", |
|
438
|
|
|
|
|
|
|
ranch => "rnch", |
|
439
|
|
|
|
|
|
|
ranches => "rnch", |
|
440
|
|
|
|
|
|
|
rapid => "rpd", |
|
441
|
|
|
|
|
|
|
rapids => "rpds", |
|
442
|
|
|
|
|
|
|
rdge => "rdg", |
|
443
|
|
|
|
|
|
|
rest => "rst", |
|
444
|
|
|
|
|
|
|
ridge => "rdg", |
|
445
|
|
|
|
|
|
|
ridges => "rdgs", |
|
446
|
|
|
|
|
|
|
river => "riv", |
|
447
|
|
|
|
|
|
|
rivr => "riv", |
|
448
|
|
|
|
|
|
|
rnchs => "rnch", |
|
449
|
|
|
|
|
|
|
road => "rd", |
|
450
|
|
|
|
|
|
|
roads => "rds", |
|
451
|
|
|
|
|
|
|
route => "rte", |
|
452
|
|
|
|
|
|
|
rvr => "riv", |
|
453
|
|
|
|
|
|
|
shoal => "shl", |
|
454
|
|
|
|
|
|
|
shoals => "shls", |
|
455
|
|
|
|
|
|
|
shoar => "shr", |
|
456
|
|
|
|
|
|
|
shoars => "shrs", |
|
457
|
|
|
|
|
|
|
shore => "shr", |
|
458
|
|
|
|
|
|
|
shores => "shrs", |
|
459
|
|
|
|
|
|
|
skyway => "skwy", |
|
460
|
|
|
|
|
|
|
spng => "spg", |
|
461
|
|
|
|
|
|
|
spngs => "spgs", |
|
462
|
|
|
|
|
|
|
spring => "spg", |
|
463
|
|
|
|
|
|
|
springs => "spgs", |
|
464
|
|
|
|
|
|
|
sprng => "spg", |
|
465
|
|
|
|
|
|
|
sprngs => "spgs", |
|
466
|
|
|
|
|
|
|
spurs => "spur", |
|
467
|
|
|
|
|
|
|
sqr => "sq", |
|
468
|
|
|
|
|
|
|
sqre => "sq", |
|
469
|
|
|
|
|
|
|
sqrs => "sqs", |
|
470
|
|
|
|
|
|
|
squ => "sq", |
|
471
|
|
|
|
|
|
|
square => "sq", |
|
472
|
|
|
|
|
|
|
squares => "sqs", |
|
473
|
|
|
|
|
|
|
station => "sta", |
|
474
|
|
|
|
|
|
|
statn => "sta", |
|
475
|
|
|
|
|
|
|
stn => "sta", |
|
476
|
|
|
|
|
|
|
str => "st", |
|
477
|
|
|
|
|
|
|
strav => "stra", |
|
478
|
|
|
|
|
|
|
strave => "stra", |
|
479
|
|
|
|
|
|
|
straven => "stra", |
|
480
|
|
|
|
|
|
|
stravenue => "stra", |
|
481
|
|
|
|
|
|
|
stravn => "stra", |
|
482
|
|
|
|
|
|
|
stream => "strm", |
|
483
|
|
|
|
|
|
|
street => "st", |
|
484
|
|
|
|
|
|
|
streets => "sts", |
|
485
|
|
|
|
|
|
|
streme => "strm", |
|
486
|
|
|
|
|
|
|
strt => "st", |
|
487
|
|
|
|
|
|
|
strvn => "stra", |
|
488
|
|
|
|
|
|
|
strvnue => "stra", |
|
489
|
|
|
|
|
|
|
sumit => "smt", |
|
490
|
|
|
|
|
|
|
sumitt => "smt", |
|
491
|
|
|
|
|
|
|
summit => "smt", |
|
492
|
|
|
|
|
|
|
terr => "ter", |
|
493
|
|
|
|
|
|
|
terrace => "ter", |
|
494
|
|
|
|
|
|
|
throughway => "trwy", |
|
495
|
|
|
|
|
|
|
tpk => "tpke", |
|
496
|
|
|
|
|
|
|
tr => "trl", |
|
497
|
|
|
|
|
|
|
trace => "trce", |
|
498
|
|
|
|
|
|
|
traces => "trce", |
|
499
|
|
|
|
|
|
|
track => "trak", |
|
500
|
|
|
|
|
|
|
tracks => "trak", |
|
501
|
|
|
|
|
|
|
trafficway => "trfy", |
|
502
|
|
|
|
|
|
|
trail => "trl", |
|
503
|
|
|
|
|
|
|
trails => "trl", |
|
504
|
|
|
|
|
|
|
trk => "trak", |
|
505
|
|
|
|
|
|
|
trks => "trak", |
|
506
|
|
|
|
|
|
|
trls => "trl", |
|
507
|
|
|
|
|
|
|
trnpk => "tpke", |
|
508
|
|
|
|
|
|
|
trpk => "tpke", |
|
509
|
|
|
|
|
|
|
tunel => "tunl", |
|
510
|
|
|
|
|
|
|
tunls => "tunl", |
|
511
|
|
|
|
|
|
|
tunnel => "tunl", |
|
512
|
|
|
|
|
|
|
tunnels => "tunl", |
|
513
|
|
|
|
|
|
|
tunnl => "tunl", |
|
514
|
|
|
|
|
|
|
turnpike => "tpke", |
|
515
|
|
|
|
|
|
|
turnpk => "tpke", |
|
516
|
|
|
|
|
|
|
underpass => "upas", |
|
517
|
|
|
|
|
|
|
union => "un", |
|
518
|
|
|
|
|
|
|
unions => "uns", |
|
519
|
|
|
|
|
|
|
valley => "vly", |
|
520
|
|
|
|
|
|
|
valleys => "vlys", |
|
521
|
|
|
|
|
|
|
vally => "vly", |
|
522
|
|
|
|
|
|
|
vdct => "via", |
|
523
|
|
|
|
|
|
|
viadct => "via", |
|
524
|
|
|
|
|
|
|
viaduct => "via", |
|
525
|
|
|
|
|
|
|
view => "vw", |
|
526
|
|
|
|
|
|
|
views => "vws", |
|
527
|
|
|
|
|
|
|
vill => "vlg", |
|
528
|
|
|
|
|
|
|
villag => "vlg", |
|
529
|
|
|
|
|
|
|
village => "vlg", |
|
530
|
|
|
|
|
|
|
villages => "vlgs", |
|
531
|
|
|
|
|
|
|
ville => "vl", |
|
532
|
|
|
|
|
|
|
villg => "vlg", |
|
533
|
|
|
|
|
|
|
villiage => "vlg", |
|
534
|
|
|
|
|
|
|
vist => "vis", |
|
535
|
|
|
|
|
|
|
vista => "vis", |
|
536
|
|
|
|
|
|
|
vlly => "vly", |
|
537
|
|
|
|
|
|
|
vst => "vis", |
|
538
|
|
|
|
|
|
|
vsta => "vis", |
|
539
|
|
|
|
|
|
|
walks => "walk", |
|
540
|
|
|
|
|
|
|
well => "wl", |
|
541
|
|
|
|
|
|
|
wells => "wls", |
|
542
|
|
|
|
|
|
|
wy => "way", |
|
543
|
|
|
|
|
|
|
); |
|
544
|
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
our %_Street_Type_List; # set up in init() later; |
|
546
|
|
|
|
|
|
|
our %_Street_Type_Match; # set up in init() later; |
|
547
|
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
=head2 %State_Code |
|
549
|
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
Maps lowercased US state and territory names to their canonical two-letter |
|
551
|
|
|
|
|
|
|
postal abbreviations. See eg/get_state_abbrev.pl in the distrbution |
|
552
|
|
|
|
|
|
|
for how this map was generated. |
|
553
|
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
=cut |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
our %State_Code = ( |
|
557
|
|
|
|
|
|
|
"alabama" => "AL", |
|
558
|
|
|
|
|
|
|
"alaska" => "AK", |
|
559
|
|
|
|
|
|
|
"american samoa" => "AS", |
|
560
|
|
|
|
|
|
|
"arizona" => "AZ", |
|
561
|
|
|
|
|
|
|
"arkansas" => "AR", |
|
562
|
|
|
|
|
|
|
"california" => "CA", |
|
563
|
|
|
|
|
|
|
"colorado" => "CO", |
|
564
|
|
|
|
|
|
|
"connecticut" => "CT", |
|
565
|
|
|
|
|
|
|
"delaware" => "DE", |
|
566
|
|
|
|
|
|
|
"district of columbia" => "DC", |
|
567
|
|
|
|
|
|
|
"federated states of micronesia" => "FM", |
|
568
|
|
|
|
|
|
|
"florida" => "FL", |
|
569
|
|
|
|
|
|
|
"georgia" => "GA", |
|
570
|
|
|
|
|
|
|
"guam" => "GU", |
|
571
|
|
|
|
|
|
|
"hawaii" => "HI", |
|
572
|
|
|
|
|
|
|
"idaho" => "ID", |
|
573
|
|
|
|
|
|
|
"illinois" => "IL", |
|
574
|
|
|
|
|
|
|
"indiana" => "IN", |
|
575
|
|
|
|
|
|
|
"iowa" => "IA", |
|
576
|
|
|
|
|
|
|
"kansas" => "KS", |
|
577
|
|
|
|
|
|
|
"kentucky" => "KY", |
|
578
|
|
|
|
|
|
|
"louisiana" => "LA", |
|
579
|
|
|
|
|
|
|
"maine" => "ME", |
|
580
|
|
|
|
|
|
|
"marshall islands" => "MH", |
|
581
|
|
|
|
|
|
|
"maryland" => "MD", |
|
582
|
|
|
|
|
|
|
"massachusetts" => "MA", |
|
583
|
|
|
|
|
|
|
"michigan" => "MI", |
|
584
|
|
|
|
|
|
|
"minnesota" => "MN", |
|
585
|
|
|
|
|
|
|
"mississippi" => "MS", |
|
586
|
|
|
|
|
|
|
"missouri" => "MO", |
|
587
|
|
|
|
|
|
|
"montana" => "MT", |
|
588
|
|
|
|
|
|
|
"nebraska" => "NE", |
|
589
|
|
|
|
|
|
|
"nevada" => "NV", |
|
590
|
|
|
|
|
|
|
"new hampshire" => "NH", |
|
591
|
|
|
|
|
|
|
"new jersey" => "NJ", |
|
592
|
|
|
|
|
|
|
"new mexico" => "NM", |
|
593
|
|
|
|
|
|
|
"new york" => "NY", |
|
594
|
|
|
|
|
|
|
"north carolina" => "NC", |
|
595
|
|
|
|
|
|
|
"north dakota" => "ND", |
|
596
|
|
|
|
|
|
|
"northern mariana islands" => "MP", |
|
597
|
|
|
|
|
|
|
"ohio" => "OH", |
|
598
|
|
|
|
|
|
|
"oklahoma" => "OK", |
|
599
|
|
|
|
|
|
|
"oregon" => "OR", |
|
600
|
|
|
|
|
|
|
"palau" => "PW", |
|
601
|
|
|
|
|
|
|
"pennsylvania" => "PA", |
|
602
|
|
|
|
|
|
|
"puerto rico" => "PR", |
|
603
|
|
|
|
|
|
|
"rhode island" => "RI", |
|
604
|
|
|
|
|
|
|
"south carolina" => "SC", |
|
605
|
|
|
|
|
|
|
"south dakota" => "SD", |
|
606
|
|
|
|
|
|
|
"tennessee" => "TN", |
|
607
|
|
|
|
|
|
|
"texas" => "TX", |
|
608
|
|
|
|
|
|
|
"utah" => "UT", |
|
609
|
|
|
|
|
|
|
"vermont" => "VT", |
|
610
|
|
|
|
|
|
|
"virgin islands" => "VI", |
|
611
|
|
|
|
|
|
|
"virginia" => "VA", |
|
612
|
|
|
|
|
|
|
"washington" => "WA", |
|
613
|
|
|
|
|
|
|
"west virginia" => "WV", |
|
614
|
|
|
|
|
|
|
"wisconsin" => "WI", |
|
615
|
|
|
|
|
|
|
"wyoming" => "WY", |
|
616
|
|
|
|
|
|
|
); |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
=head2 %State_FIPS |
|
619
|
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
Maps two-digit FIPS-55 US state and territory codes (including the |
|
621
|
|
|
|
|
|
|
leading zero!) as found in TIGER/Line to the state's canonical two-letter |
|
622
|
|
|
|
|
|
|
postal abbreviation. See eg/get_state_fips.pl in the distrbution for |
|
623
|
|
|
|
|
|
|
how this map was generated. Yes, I know the FIPS data also has the state |
|
624
|
|
|
|
|
|
|
names. Oops. |
|
625
|
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
=cut |
|
627
|
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
our %State_FIPS = ( |
|
629
|
|
|
|
|
|
|
"01" => "AL", |
|
630
|
|
|
|
|
|
|
"02" => "AK", |
|
631
|
|
|
|
|
|
|
"04" => "AZ", |
|
632
|
|
|
|
|
|
|
"05" => "AR", |
|
633
|
|
|
|
|
|
|
"06" => "CA", |
|
634
|
|
|
|
|
|
|
"08" => "CO", |
|
635
|
|
|
|
|
|
|
"09" => "CT", |
|
636
|
|
|
|
|
|
|
"10" => "DE", |
|
637
|
|
|
|
|
|
|
"11" => "DC", |
|
638
|
|
|
|
|
|
|
"12" => "FL", |
|
639
|
|
|
|
|
|
|
"13" => "GA", |
|
640
|
|
|
|
|
|
|
"15" => "HI", |
|
641
|
|
|
|
|
|
|
"16" => "ID", |
|
642
|
|
|
|
|
|
|
"17" => "IL", |
|
643
|
|
|
|
|
|
|
"18" => "IN", |
|
644
|
|
|
|
|
|
|
"19" => "IA", |
|
645
|
|
|
|
|
|
|
"20" => "KS", |
|
646
|
|
|
|
|
|
|
"21" => "KY", |
|
647
|
|
|
|
|
|
|
"22" => "LA", |
|
648
|
|
|
|
|
|
|
"23" => "ME", |
|
649
|
|
|
|
|
|
|
"24" => "MD", |
|
650
|
|
|
|
|
|
|
"25" => "MA", |
|
651
|
|
|
|
|
|
|
"26" => "MI", |
|
652
|
|
|
|
|
|
|
"27" => "MN", |
|
653
|
|
|
|
|
|
|
"28" => "MS", |
|
654
|
|
|
|
|
|
|
"29" => "MO", |
|
655
|
|
|
|
|
|
|
"30" => "MT", |
|
656
|
|
|
|
|
|
|
"31" => "NE", |
|
657
|
|
|
|
|
|
|
"32" => "NV", |
|
658
|
|
|
|
|
|
|
"33" => "NH", |
|
659
|
|
|
|
|
|
|
"34" => "NJ", |
|
660
|
|
|
|
|
|
|
"35" => "NM", |
|
661
|
|
|
|
|
|
|
"36" => "NY", |
|
662
|
|
|
|
|
|
|
"37" => "NC", |
|
663
|
|
|
|
|
|
|
"38" => "ND", |
|
664
|
|
|
|
|
|
|
"39" => "OH", |
|
665
|
|
|
|
|
|
|
"40" => "OK", |
|
666
|
|
|
|
|
|
|
"41" => "OR", |
|
667
|
|
|
|
|
|
|
"42" => "PA", |
|
668
|
|
|
|
|
|
|
"44" => "RI", |
|
669
|
|
|
|
|
|
|
"45" => "SC", |
|
670
|
|
|
|
|
|
|
"46" => "SD", |
|
671
|
|
|
|
|
|
|
"47" => "TN", |
|
672
|
|
|
|
|
|
|
"48" => "TX", |
|
673
|
|
|
|
|
|
|
"49" => "UT", |
|
674
|
|
|
|
|
|
|
"50" => "VT", |
|
675
|
|
|
|
|
|
|
"51" => "VA", |
|
676
|
|
|
|
|
|
|
"53" => "WA", |
|
677
|
|
|
|
|
|
|
"54" => "WV", |
|
678
|
|
|
|
|
|
|
"55" => "WI", |
|
679
|
|
|
|
|
|
|
"56" => "WY", |
|
680
|
|
|
|
|
|
|
"72" => "PR", |
|
681
|
|
|
|
|
|
|
"78" => "VI", |
|
682
|
|
|
|
|
|
|
); |
|
683
|
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
our %FIPS_State; # setup in init() later; |
|
685
|
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
=head2 %Addr_Match |
|
687
|
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
A hash of compiled regular expressions corresponding to different |
|
689
|
|
|
|
|
|
|
types of address or address portions. Defined regexen include |
|
690
|
|
|
|
|
|
|
type, number, fraction, state, direct(ion), dircode, zip, corner, |
|
691
|
|
|
|
|
|
|
street, place, address, and intersection. |
|
692
|
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
Direct use of these patterns is not recommended because they may change in |
|
694
|
|
|
|
|
|
|
subtle ways between releases. |
|
695
|
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
=cut |
|
697
|
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
our %Addr_Match; # setup in init() |
|
699
|
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
init(); |
|
701
|
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
our %Normalize_Map = ( |
|
703
|
|
|
|
|
|
|
prefix => \%Directional, |
|
704
|
|
|
|
|
|
|
prefix1 => \%Directional, |
|
705
|
|
|
|
|
|
|
prefix2 => \%Directional, |
|
706
|
|
|
|
|
|
|
suffix => \%Directional, |
|
707
|
|
|
|
|
|
|
suffix1 => \%Directional, |
|
708
|
|
|
|
|
|
|
suffix2 => \%Directional, |
|
709
|
|
|
|
|
|
|
type => \%Street_Type, |
|
710
|
|
|
|
|
|
|
type1 => \%Street_Type, |
|
711
|
|
|
|
|
|
|
type2 => \%Street_Type, |
|
712
|
|
|
|
|
|
|
state => \%State_Code, |
|
713
|
|
|
|
|
|
|
); |
|
714
|
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
=head1 CLASS ACCESSORS |
|
717
|
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
=head2 avoid_redundant_street_type |
|
719
|
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
If true then L will set the C field to undef |
|
721
|
|
|
|
|
|
|
if the C field contains a word that corresponds to the C in L<\%Street_Type>. |
|
722
|
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
For example, given "4321 Country Road 7", C will be "Country Road 7" |
|
724
|
|
|
|
|
|
|
and C will be "Rd". With avoid_redundant_street_type set true, C |
|
725
|
|
|
|
|
|
|
will be undef because C matches /\b (rd|road) \b/ix; |
|
726
|
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
Also applies to C for C and C for C |
|
728
|
|
|
|
|
|
|
fields for intersections. |
|
729
|
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
The default is false, for backwards compatibility. |
|
731
|
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
=cut |
|
733
|
|
|
|
|
|
|
|
|
734
|
1
|
|
|
1
|
|
1358
|
BEGIN { __PACKAGE__->mk_classdata('avoid_redundant_street_type' => 0) } |
|
735
|
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
=head1 CLASS METHODS |
|
737
|
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
=head2 init |
|
739
|
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
# Add another street type mapping: |
|
741
|
|
|
|
|
|
|
$Geo::StreetAddress::US::Street_Type{'cur'}='curv'; |
|
742
|
|
|
|
|
|
|
# Re-initialize to pick up the change |
|
743
|
|
|
|
|
|
|
Geo::StreetAddress::US::init(); |
|
744
|
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
Runs the setup on globals. This is run automatically when the module is loaded, |
|
746
|
|
|
|
|
|
|
but if you subsequently change the globals, you should run it again. |
|
747
|
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
=cut |
|
749
|
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
sub init { |
|
751
|
|
|
|
|
|
|
|
|
752
|
1
|
|
|
1
|
1
|
6
|
%Direction_Code = reverse %Directional; |
|
753
|
|
|
|
|
|
|
|
|
754
|
1
|
|
|
|
|
23
|
%FIPS_State = reverse %State_FIPS; |
|
755
|
|
|
|
|
|
|
|
|
756
|
1
|
|
|
|
|
35
|
%_Street_Type_List = map { $_ => 1 } %Street_Type; |
|
|
724
|
|
|
|
|
1087
|
|
|
757
|
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
# build hash { 'rd' => qr/\b (?: rd|road ) \b/xi, ... } |
|
759
|
1
|
|
|
|
|
66
|
%_Street_Type_Match = map { $_ => $_ } values %Street_Type; |
|
|
362
|
|
|
|
|
467
|
|
|
760
|
1
|
|
|
|
|
26
|
while ( my ($type_alt, $type_abbrv) = each %Street_Type ) { |
|
761
|
362
|
|
|
|
|
1001
|
$_Street_Type_Match{$type_abbrv} .= "|\Q$type_alt"; |
|
762
|
|
|
|
|
|
|
} |
|
763
|
188
|
|
|
|
|
246
|
%_Street_Type_Match = map { |
|
764
|
1
|
|
|
|
|
16
|
my $alts = $_Street_Type_Match{$_}; |
|
765
|
188
|
|
|
|
|
3281
|
$_ => qr/\b (?: $alts ) \b/xi; |
|
766
|
|
|
|
|
|
|
} keys %_Street_Type_Match; |
|
767
|
|
|
|
|
|
|
|
|
768
|
1
|
|
|
1
|
|
232
|
use re 'eval'; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
3083
|
|
|
769
|
|
|
|
|
|
|
|
|
770
|
118
|
|
|
|
|
138
|
%Addr_Match = ( |
|
771
|
|
|
|
|
|
|
type => join("|", keys %_Street_Type_List), |
|
772
|
|
|
|
|
|
|
fraction => qr{\d+\/\d+}, |
|
773
|
|
|
|
|
|
|
state => '\b(?:'.join("|", |
|
774
|
|
|
|
|
|
|
# escape spaces in state names (e.g., "new york" --> "new\\ york") |
|
775
|
|
|
|
|
|
|
# so they still match in the x environment below |
|
776
|
8
|
|
|
|
|
10
|
map { ( quotemeta $_) } keys %State_Code, values %State_Code |
|
777
|
|
|
|
|
|
|
).')\b', |
|
778
|
|
|
|
|
|
|
direct => join("|", |
|
779
|
|
|
|
|
|
|
# map direction names to direction codes |
|
780
|
|
|
|
|
|
|
keys %Directional, |
|
781
|
|
|
|
|
|
|
# also map the dotted version of the code to the code itself |
|
782
|
|
|
|
|
|
|
map { |
|
783
|
8
|
|
|
|
|
31
|
my $c = $_; $c =~ s/(\w)/$1./g; ( quotemeta $c, $_ ) |
|
|
8
|
|
|
|
|
27
|
|
|
|
16
|
|
|
|
|
19
|
|
|
784
|
1
|
|
|
|
|
138
|
} sort { length $b <=> length $a } values %Directional |
|
785
|
|
|
|
|
|
|
), |
|
786
|
|
|
|
|
|
|
dircode => join("|", keys %Direction_Code), |
|
787
|
|
|
|
|
|
|
zip => qr/\d{5}(?:-?\d{4})?/, # XXX add \b? |
|
788
|
|
|
|
|
|
|
corner => qr/(?:\band\b|\bat\b|&|\@)/i, |
|
789
|
|
|
|
|
|
|
); |
|
790
|
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
# we don't include letters in the number regex because we want to |
|
792
|
|
|
|
|
|
|
# treat "42S" as "42 S" (42 South). For example, |
|
793
|
|
|
|
|
|
|
# Utah and Wisconsin have a more elaborate system of block numbering |
|
794
|
|
|
|
|
|
|
# http://en.wikipedia.org/wiki/House_number#Block_numbers |
|
795
|
|
|
|
|
|
|
$Addr_Match{number} = qr/(\d+-?\d*)(?=\D) (?{ $_{number} = $^N })/ix, |
|
796
|
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
# note that expressions like [^,]+ may scan more than you expect |
|
798
|
|
|
|
|
|
|
$Addr_Match{street} = qr/ |
|
799
|
|
|
|
|
|
|
(?: |
|
800
|
|
|
|
|
|
|
# special case for addresses like 100 South Street |
|
801
|
39
|
|
|
|
|
365
|
(?:($Addr_Match{direct})\W+ (?{ $_{street} = $^N }) |
|
802
|
1
|
|
|
|
|
9
|
($Addr_Match{type})\b (?{ $_{type} = $^N })) |
|
803
|
|
|
|
|
|
|
#(?{ $_{_street}.=1 }) |
|
804
|
|
|
|
|
|
|
| |
|
805
|
35
|
|
|
|
|
136
|
(?:($Addr_Match{direct})\W+ (?{ $_{prefix} = $^N }))? |
|
806
|
|
|
|
|
|
|
(?: |
|
807
|
95
|
|
|
|
|
302
|
([^,]*\d) (?{ $_{street} = $^N }) |
|
808
|
4
|
|
0
|
|
|
7
|
(?:[^\w,]*($Addr_Match{direct})\b (?{ $_{suffix} = $^N; $_{type}||='' })) |
|
|
4
|
|
|
|
|
31
|
|
|
809
|
|
|
|
|
|
|
#(?{ $_{_street}.=3 }) |
|
810
|
|
|
|
|
|
|
| |
|
811
|
1880
|
|
|
|
|
5277
|
([^,]+) (?{ $_{street} = $^N }) |
|
812
|
76
|
|
|
|
|
570
|
(?:[^\w,]+($Addr_Match{type})\b (?{ $_{type} = $^N })) |
|
813
|
4
|
|
|
|
|
28
|
(?:[^\w,]+($Addr_Match{direct})\b (?{ $_{suffix} = $^N }))? |
|
814
|
|
|
|
|
|
|
#(?{ $_{_street}.=2 }) |
|
815
|
|
|
|
|
|
|
| |
|
816
|
1006
|
|
50
|
|
|
1334
|
([^,]+?) (?{ $_{street} = $^N; $_{type}||='' }) |
|
|
1006
|
|
|
|
|
3550
|
|
|
817
|
28
|
|
|
|
|
285
|
(?:[^\w,]+($Addr_Match{type})\b (?{ $_{type} = $^N }))? |
|
818
|
0
|
|
|
|
|
0
|
(?:[^\w,]+($Addr_Match{direct})\b (?{ $_{suffix} = $^N }))? |
|
819
|
|
|
|
|
|
|
#(?{ $_{_street}.=4 }) |
|
820
|
|
|
|
|
|
|
) |
|
821
|
|
|
|
|
|
|
) |
|
822
|
1
|
|
|
|
|
3475
|
/ix; |
|
823
|
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
# http://pe.usps.com/text/pub28/pub28c2_003.htm |
|
826
|
|
|
|
|
|
|
# TODO add support for those that don't require a number |
|
827
|
|
|
|
|
|
|
# TODO map to standard names/abbreviations |
|
828
|
|
|
|
|
|
|
$Addr_Match{sec_unit_type_numbered} = qr/ |
|
829
|
|
|
|
|
|
|
(su?i?te |
|
830
|
|
|
|
|
|
|
|p\W*[om]\W*b(?:ox)? |
|
831
|
|
|
|
|
|
|
|(?:ap|dep)(?:ar)?t(?:me?nt)? |
|
832
|
|
|
|
|
|
|
|ro*m |
|
833
|
|
|
|
|
|
|
|flo*r? |
|
834
|
|
|
|
|
|
|
|uni?t |
|
835
|
|
|
|
|
|
|
|bu?i?ldi?n?g |
|
836
|
|
|
|
|
|
|
|ha?nga?r |
|
837
|
|
|
|
|
|
|
|lo?t |
|
838
|
|
|
|
|
|
|
|pier |
|
839
|
|
|
|
|
|
|
|slip |
|
840
|
|
|
|
|
|
|
|spa?ce? |
|
841
|
|
|
|
|
|
|
|stop |
|
842
|
|
|
|
|
|
|
|tra?i?le?r |
|
843
|
|
|
|
|
|
|
|box)(?![a-z]) (?{ $_{sec_unit_type} = $^N }) |
|
844
|
1
|
|
|
|
|
33
|
/ix; |
|
845
|
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
$Addr_Match{sec_unit_type_unnumbered} = qr/ |
|
847
|
|
|
|
|
|
|
(ba?se?me?n?t |
|
848
|
|
|
|
|
|
|
|fro?nt |
|
849
|
|
|
|
|
|
|
|lo?bby |
|
850
|
|
|
|
|
|
|
|lowe?r |
|
851
|
|
|
|
|
|
|
|off?i?ce? |
|
852
|
|
|
|
|
|
|
|pe?n?t?ho?u?s?e? |
|
853
|
|
|
|
|
|
|
|rear |
|
854
|
|
|
|
|
|
|
|side |
|
855
|
|
|
|
|
|
|
|uppe?r |
|
856
|
|
|
|
|
|
|
)\b (?{ $_{sec_unit_type} = $^N }) |
|
857
|
1
|
|
|
|
|
5
|
/ix; |
|
858
|
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
$Addr_Match{sec_unit} = qr/ |
|
860
|
|
|
|
|
|
|
(:? |
|
861
|
|
|
|
|
|
|
(?: (?:$Addr_Match{sec_unit_type_numbered} \W*) |
|
862
|
2
|
|
|
|
|
14
|
| (\#)\W* (?{ $_{sec_unit_type} = $^N }) |
|
863
|
|
|
|
|
|
|
) |
|
864
|
7
|
|
|
|
|
61
|
( [\w-]+) (?{ $_{sec_unit_num} = $^N }) |
|
865
|
|
|
|
|
|
|
) |
|
866
|
|
|
|
|
|
|
| |
|
867
|
|
|
|
|
|
|
$Addr_Match{sec_unit_type_unnumbered} |
|
868
|
1
|
|
|
|
|
246
|
/ix; |
|
869
|
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
$Addr_Match{city_and_state} = qr/ |
|
871
|
|
|
|
|
|
|
(?: |
|
872
|
174
|
|
|
|
|
877
|
([^\d,]+?)\W+ (?{ $_{city} = $^N }) |
|
873
|
37
|
|
|
|
|
296
|
($Addr_Match{state}) (?{ $_{state} = $^N }) |
|
874
|
|
|
|
|
|
|
) |
|
875
|
1
|
|
|
|
|
372
|
/ix; |
|
876
|
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
$Addr_Match{place} = qr/ |
|
878
|
|
|
|
|
|
|
(?:$Addr_Match{city_and_state}\W*)? |
|
879
|
23
|
|
|
|
|
194
|
(?:($Addr_Match{zip}) (?{ $_{zip} = $^N }))? |
|
880
|
1
|
|
|
|
|
459
|
/ix; |
|
881
|
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
# the \x23 below is an alias for '#' to avoid a bug in perl 5.18.1 |
|
883
|
|
|
|
|
|
|
# https://rt.cpan.org/Ticket/Display.html?id=91420 |
|
884
|
1
|
|
|
|
|
7763
|
$Addr_Match{address} = qr/ |
|
885
|
|
|
|
|
|
|
^ |
|
886
|
|
|
|
|
|
|
[^\w\x23]* # skip non-word chars except # (eg unit) |
|
887
|
|
|
|
|
|
|
( $Addr_Match{number} )\W* |
|
888
|
|
|
|
|
|
|
(?:$Addr_Match{fraction}\W*)? |
|
889
|
|
|
|
|
|
|
$Addr_Match{street}\W+ |
|
890
|
|
|
|
|
|
|
(?:$Addr_Match{sec_unit}\W+)? |
|
891
|
|
|
|
|
|
|
$Addr_Match{place} |
|
892
|
|
|
|
|
|
|
\W* # require on non-word chars at end |
|
893
|
|
|
|
|
|
|
$ # right up to end of string |
|
894
|
|
|
|
|
|
|
/ix; |
|
895
|
|
|
|
|
|
|
|
|
896
|
1
|
|
|
|
|
19
|
my $sep = qr/(?:\W+|\Z)/; |
|
897
|
|
|
|
|
|
|
|
|
898
|
1
|
|
|
|
|
5786
|
$Addr_Match{informal_address} = qr/ |
|
899
|
|
|
|
|
|
|
^ |
|
900
|
|
|
|
|
|
|
\s* # skip leading whitespace |
|
901
|
|
|
|
|
|
|
(?:$Addr_Match{sec_unit} $sep)? |
|
902
|
|
|
|
|
|
|
(?:$Addr_Match{number})?\W* |
|
903
|
|
|
|
|
|
|
(?:$Addr_Match{fraction}\W*)? |
|
904
|
|
|
|
|
|
|
$Addr_Match{street} $sep |
|
905
|
|
|
|
|
|
|
(?:$Addr_Match{sec_unit} $sep)? |
|
906
|
|
|
|
|
|
|
(?:$Addr_Match{place})? |
|
907
|
|
|
|
|
|
|
# don't require match to reach end of string |
|
908
|
|
|
|
|
|
|
/ix; |
|
909
|
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
$Addr_Match{intersection} = qr/^\W* |
|
911
|
|
|
|
|
|
|
$Addr_Match{street}\W*? |
|
912
|
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
\s+$Addr_Match{corner}\s+ |
|
914
|
|
|
|
|
|
|
|
|
915
|
8
|
|
100
|
|
|
113
|
(?{ exists $_{$_} and $_{$_.1} = delete $_{$_} for (qw{prefix street type suffix})}) |
|
916
|
|
|
|
|
|
|
$Addr_Match{street}\W+ |
|
917
|
8
|
|
100
|
|
|
93
|
(?{ exists $_{$_} and $_{$_.2} = delete $_{$_} for (qw{prefix street type suffix})}) |
|
918
|
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
$Addr_Match{place} |
|
920
|
1
|
|
|
|
|
11658
|
\W*$/ix; |
|
921
|
|
|
|
|
|
|
} |
|
922
|
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
=head2 parse_location |
|
924
|
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
$spec = Geo::StreetAddress::US->parse_location( $string ) |
|
926
|
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
Parses any address or intersection string and returns the appropriate |
|
928
|
|
|
|
|
|
|
specifier. If $string matches the $Addr_Match{corner} pattern then |
|
929
|
|
|
|
|
|
|
parse_intersection() is used. Else parse_address() is called and if that |
|
930
|
|
|
|
|
|
|
returns false then parse_informal_address() is called. |
|
931
|
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
=cut |
|
933
|
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
sub parse_location { |
|
935
|
53
|
|
|
53
|
1
|
31958
|
my ($class, $addr) = @_; |
|
936
|
|
|
|
|
|
|
|
|
937
|
53
|
100
|
|
|
|
583
|
if ($addr =~ /$Addr_Match{corner}/ios) { |
|
938
|
8
|
|
|
|
|
23
|
return $class->parse_intersection($addr); |
|
939
|
|
|
|
|
|
|
} |
|
940
|
45
|
|
66
|
|
|
108
|
return $class->parse_address($addr) |
|
941
|
|
|
|
|
|
|
|| $class->parse_informal_address($addr); |
|
942
|
|
|
|
|
|
|
} |
|
943
|
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
=head2 parse_address |
|
946
|
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
$spec = Geo::StreetAddress::US->parse_address( $address_string ) |
|
948
|
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
Parses a street address into an address specifier using the $Addr_Match{address} |
|
950
|
|
|
|
|
|
|
pattern. Returning undef if the address cannot be parsed as a complete formal |
|
951
|
|
|
|
|
|
|
address. |
|
952
|
|
|
|
|
|
|
|
|
953
|
|
|
|
|
|
|
You may want to use parse_location() instead. |
|
954
|
|
|
|
|
|
|
|
|
955
|
|
|
|
|
|
|
=cut |
|
956
|
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
sub parse_address { |
|
958
|
45
|
|
|
45
|
1
|
55
|
my ($class, $addr) = @_; |
|
959
|
45
|
|
|
|
|
64
|
local %_; |
|
960
|
|
|
|
|
|
|
|
|
961
|
45
|
100
|
|
|
|
256
|
$addr =~ /$Addr_Match{address}/ios |
|
962
|
|
|
|
|
|
|
or return undef; |
|
963
|
|
|
|
|
|
|
|
|
964
|
38
|
|
|
|
|
262
|
return $class->normalize_address({ %_ }); |
|
965
|
|
|
|
|
|
|
} |
|
966
|
|
|
|
|
|
|
|
|
967
|
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
=head2 parse_informal_address |
|
969
|
|
|
|
|
|
|
|
|
970
|
|
|
|
|
|
|
$spec = Geo::StreetAddress::US->parse_informal_address( $address_string ) |
|
971
|
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
Acts like parse_address() except that it handles a wider range of address |
|
973
|
|
|
|
|
|
|
formats because it uses the L pattern. That means a |
|
974
|
|
|
|
|
|
|
unit can come first, a street number is optional, and the city and state aren't |
|
975
|
|
|
|
|
|
|
needed. Which means that informal addresses like "#42 123 Main St" can be parsed. |
|
976
|
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
Returns undef if the address cannot be parsed. |
|
978
|
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
You may want to use parse_location() instead. |
|
980
|
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
=cut |
|
982
|
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
sub parse_informal_address { |
|
984
|
7
|
|
|
7
|
1
|
12
|
my ($class, $addr) = @_; |
|
985
|
7
|
|
|
|
|
11
|
local %_; |
|
986
|
|
|
|
|
|
|
|
|
987
|
7
|
50
|
|
|
|
58
|
$addr =~ /$Addr_Match{informal_address}/ios |
|
988
|
|
|
|
|
|
|
or return undef; |
|
989
|
|
|
|
|
|
|
|
|
990
|
7
|
|
|
|
|
44
|
return $class->normalize_address({ %_ }); |
|
991
|
|
|
|
|
|
|
} |
|
992
|
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
|
|
994
|
|
|
|
|
|
|
=head2 parse_intersection |
|
995
|
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
$spec = Geo::StreetAddress::US->parse_intersection( $intersection_string ) |
|
997
|
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
Parses an intersection string into an intersection specifier, returning |
|
999
|
|
|
|
|
|
|
undef if the address cannot be parsed. You probably want to use |
|
1000
|
|
|
|
|
|
|
parse_location() instead. |
|
1001
|
|
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
=cut |
|
1003
|
|
|
|
|
|
|
|
|
1004
|
|
|
|
|
|
|
sub parse_intersection { |
|
1005
|
8
|
|
|
8
|
1
|
11
|
my ($class, $addr) = @_; |
|
1006
|
8
|
|
|
|
|
9
|
local %_; |
|
1007
|
|
|
|
|
|
|
|
|
1008
|
8
|
50
|
|
|
|
62
|
$addr =~ /$Addr_Match{intersection}/ios |
|
1009
|
|
|
|
|
|
|
or return undef; |
|
1010
|
|
|
|
|
|
|
|
|
1011
|
8
|
|
|
|
|
41
|
my %part = %_; |
|
1012
|
|
|
|
|
|
|
# if we've a type2 and type1 is either missing or the same, |
|
1013
|
|
|
|
|
|
|
# and the type seems plural, |
|
1014
|
|
|
|
|
|
|
# and is still valid if the trailing 's' is removed, then remove it. |
|
1015
|
|
|
|
|
|
|
# So "X & Y Streets" becomes "X Street" and "Y Street". |
|
1016
|
8
|
100
|
66
|
|
|
52
|
if ($part{type2} && (!$part{type1} or $part{type1} eq $part{type2})) { |
|
|
|
|
66
|
|
|
|
|
|
1017
|
5
|
|
|
|
|
6
|
my $type = $part{type2}; |
|
1018
|
5
|
100
|
66
|
|
|
1992
|
if ($type =~ s/s\W*$//ios and $type =~ /^$Addr_Match{type}$/ios) { |
|
1019
|
3
|
|
|
|
|
8
|
$part{type1} = $part{type2} = $type; |
|
1020
|
|
|
|
|
|
|
} |
|
1021
|
|
|
|
|
|
|
} |
|
1022
|
|
|
|
|
|
|
|
|
1023
|
8
|
|
|
|
|
112
|
return $class->normalize_address(\%part); |
|
1024
|
|
|
|
|
|
|
} |
|
1025
|
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
=head2 normalize_address |
|
1028
|
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
$spec = Geo::StreetAddress::US->normalize_address( $spec ) |
|
1030
|
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
Takes an address or intersection specifier, and normalizes its components, |
|
1032
|
|
|
|
|
|
|
stripping out all leading and trailing whitespace and punctuation, and |
|
1033
|
|
|
|
|
|
|
substituting official abbreviations for prefix, suffix, type, and state values. |
|
1034
|
|
|
|
|
|
|
Also, city names that are prefixed with a directional abbreviation (e.g. N, NE, |
|
1035
|
|
|
|
|
|
|
etc.) have the abbreviation expanded. The original specifier ref is returned. |
|
1036
|
|
|
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
Typically, you won't need to use this method, as the C methods |
|
1038
|
|
|
|
|
|
|
call it for you. |
|
1039
|
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
N.B., C crops 9-digit ZIP codes to 5 digits. This is for |
|
1041
|
|
|
|
|
|
|
the benefit of Geo::Coder::US and may not be what you want. E-mail me if this |
|
1042
|
|
|
|
|
|
|
is a problem and I'll see what I can do to fix it. |
|
1043
|
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
=cut |
|
1045
|
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
sub normalize_address { |
|
1047
|
53
|
|
|
53
|
1
|
93
|
my ($class, $part) = @_; |
|
1048
|
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
#m/^_/ and delete $part->{$_} for keys %$part; # for debug |
|
1050
|
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
# strip off some punctuation |
|
1052
|
53
|
|
50
|
|
|
984
|
defined($_) && s/^\s+|\s+$|[^\w\s\-\#\&]//gos for values %$part; |
|
1053
|
|
|
|
|
|
|
|
|
1054
|
53
|
|
|
|
|
164
|
while (my ($key, $map) = each %Normalize_Map) { |
|
1055
|
530
|
100
|
100
|
|
|
2310
|
$part->{$key} = $map->{lc $part->{$key}} |
|
1056
|
|
|
|
|
|
|
if defined $part->{$key} |
|
1057
|
|
|
|
|
|
|
and exists $map->{lc $part->{$key}}; |
|
1058
|
|
|
|
|
|
|
} |
|
1059
|
|
|
|
|
|
|
|
|
1060
|
|
|
|
|
|
|
$part->{$_} = ucfirst lc $part->{$_} |
|
1061
|
53
|
|
|
|
|
272
|
for grep(exists $part->{$_}, qw( type type1 type2 )); |
|
1062
|
|
|
|
|
|
|
|
|
1063
|
53
|
100
|
|
|
|
180
|
if ($class->avoid_redundant_street_type) { |
|
1064
|
2
|
|
|
|
|
23
|
for my $suffix ('', '1', '2') { |
|
1065
|
6
|
100
|
|
|
|
28
|
next unless my $street = $part->{"street$suffix"}; |
|
1066
|
2
|
50
|
|
|
|
15
|
next unless my $type = $part->{"type$suffix"}; |
|
1067
|
2
|
50
|
|
|
|
13
|
my $type_regex = $_Street_Type_Match{lc $type} |
|
1068
|
|
|
|
|
|
|
or die "panic: no _Street_Type_Match for $type"; |
|
1069
|
2
|
50
|
|
|
|
27
|
$part->{"type$suffix"} = undef |
|
1070
|
|
|
|
|
|
|
if $street =~ $type_regex; |
|
1071
|
|
|
|
|
|
|
} |
|
1072
|
|
|
|
|
|
|
} |
|
1073
|
|
|
|
|
|
|
|
|
1074
|
|
|
|
|
|
|
# attempt to expand directional prefixes on place names |
|
1075
|
53
|
100
|
|
|
|
519
|
$part->{city} =~ s/^($Addr_Match{dircode})\s+(?=\S) |
|
1076
|
|
|
|
|
|
|
/\u$Direction_Code{uc $1} /iosx |
|
1077
|
|
|
|
|
|
|
if $part->{city}; |
|
1078
|
|
|
|
|
|
|
|
|
1079
|
|
|
|
|
|
|
# strip ZIP+4 (which may be missing a hyphen) |
|
1080
|
53
|
100
|
|
|
|
179
|
$part->{zip} =~ s/^(.{5}).*/$1/os if $part->{zip}; |
|
1081
|
|
|
|
|
|
|
|
|
1082
|
53
|
|
|
|
|
308
|
return $part; |
|
1083
|
|
|
|
|
|
|
} |
|
1084
|
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
1; |
|
1087
|
|
|
|
|
|
|
__END__ |