line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
=head1 NAME |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
Geo::Coder::US::Import - Import TIGER/Line data into a Geo::Coder::US database |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 SYNOPSIS |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use Geo::Coder::US::Import; |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
Geo::Coder::US->set_db( "/path/to/geocoder.db", 1 ); |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
Geo::Coder::US::Import->load_tiger_data( "TGR06075" ); |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
Geo::Coder::US::Import->load_fips_data( "All_fips55.txt" ); |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 DESCRIPTION |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Geo::Coder::US::Import provides methods for importing TIGER/Line data |
18
|
|
|
|
|
|
|
into a BerkeleyDB database for use with Geo::Coder::US. |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
Instead of using this module directly, you may want to use one of the |
21
|
|
|
|
|
|
|
included utility scripts in the eg/ directory of this distribtion. |
22
|
|
|
|
|
|
|
The import_tiger.pl script imports uncompresed TIGER/Line files from a |
23
|
|
|
|
|
|
|
given location: |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
$ perl eg/import_tiger.pl geocoder.db /path/to/tiger/files/TGRnnnnn |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
Be sure to leave off the .RT? extensions or import_tiger.pl will complain. |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
The import_tiger_zip.pl script imports compressed TIGER/Line data by |
30
|
|
|
|
|
|
|
using L to extract only the needed files from the ZIP file |
31
|
|
|
|
|
|
|
into a temporary directory, which it cleans up for you afterwards. This |
32
|
|
|
|
|
|
|
is the B method of data import, as it can handle multiple |
33
|
|
|
|
|
|
|
ZIP files at once: |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
$ perl eg/import_tiger_zip.pl geocoder.db /path/to/tiger/zips/*.zip |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
Both of these import scripts need to cache a lot of data in memory, so |
38
|
|
|
|
|
|
|
you may find that you need one or two hundred megs of RAM for the import |
39
|
|
|
|
|
|
|
to run to completion. The import process takes about 6 hours to import |
40
|
|
|
|
|
|
|
all 4 gigabytes of compressed TIGER/Line data on a 2 GHz Linux machine, |
41
|
|
|
|
|
|
|
and it appears to be mostly processor bound. The final BerkeleyDB database |
42
|
|
|
|
|
|
|
produced by such an import tops out around 750 megabytes. |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
One way of avoiding the RAM bloat on import is to use xargs to run |
45
|
|
|
|
|
|
|
import_tiger_zip.pl on each TIGER/Line ZIP separately: |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
$ find ~/tiger -name '*.zip' | \ |
48
|
|
|
|
|
|
|
xargs -n1 perl eg/import_tiger_zip.pl geocoder.db |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
Similarly, you can import FIPS-55 place name data into a |
51
|
|
|
|
|
|
|
Geo::Coder::US database with eg/import_fips.pl: |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
$ perl eg/import_fips.pl geocoder.db All_fips55.txt |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
Note that you can make a perfectly good geocoder for a particular |
56
|
|
|
|
|
|
|
region of the US by simply importing only the TIGER/Line and FIPS-55 |
57
|
|
|
|
|
|
|
files for the region you're interested in. You only need to import all |
58
|
|
|
|
|
|
|
of the TIGER/Line data sets in the event that you want a geocoder for |
59
|
|
|
|
|
|
|
the whole US. |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=cut |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
package Geo::Coder::US::Import; |
64
|
|
|
|
|
|
|
|
65
|
2
|
|
|
2
|
|
3268
|
use Geo::Coder::US; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
use Geo::StreetAddress::US; |
67
|
|
|
|
|
|
|
use Geo::TigerLine::Record::1; |
68
|
|
|
|
|
|
|
use Geo::TigerLine::Record::4; |
69
|
|
|
|
|
|
|
use Geo::TigerLine::Record::5; |
70
|
|
|
|
|
|
|
use Geo::TigerLine::Record::6; |
71
|
|
|
|
|
|
|
use Geo::TigerLine::Record::C; |
72
|
|
|
|
|
|
|
use Geo::Fips55; |
73
|
|
|
|
|
|
|
use Carp; |
74
|
|
|
|
|
|
|
use strict; |
75
|
|
|
|
|
|
|
use warnings; |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
my (%place, %street, %seg, %tlid, %feat, %alt, |
78
|
|
|
|
|
|
|
%fips_to_zip, %zip_to_fips, |
79
|
|
|
|
|
|
|
%place_type, %place_name); |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=head1 CLASS METHODS |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=over 4 |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=item load_tiger_data( $tiger_basename ) |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
Loads all data from the specified TIGER/Line data set in order of the |
88
|
|
|
|
|
|
|
following record types: C, 5, 1, 4, 6. This ordering ensures that record |
89
|
|
|
|
|
|
|
references are set correctly. You may prefix $tiger_basename with an |
90
|
|
|
|
|
|
|
absolute or relative path, but B provide the .RT? filename suffix |
91
|
|
|
|
|
|
|
as part of $tiger_basename or load_tiger_data() will become cranky. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
Note that you B first call Geo::Coder::US->set_db() with a second |
94
|
|
|
|
|
|
|
argument with a true value, or set_db() won't open the database for |
95
|
|
|
|
|
|
|
writing. |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=item load_fips_data( $fips_file ) |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
Loads all the data from the specified FIPS-55 gazetteer file. This |
100
|
|
|
|
|
|
|
provides additional or alternate place name data to supplement |
101
|
|
|
|
|
|
|
TIGER/Line. |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=cut |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
sub _fixup_directionals { |
106
|
|
|
|
|
|
|
my $record = shift; |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
# fix up direction prefix embedded in feature name |
109
|
|
|
|
|
|
|
# either a full or abbreviated directional |
110
|
|
|
|
|
|
|
$record->{fedirp} = |
111
|
|
|
|
|
|
|
$Geo::StreetAddress::US::Directional{lc $1} || uc $1 |
112
|
|
|
|
|
|
|
if not $record->{fedirp} and $record->{fename} =~ |
113
|
|
|
|
|
|
|
s/^($Geo::StreetAddress::US::Addr_Match{direct})\s+(?=\S)//ios; |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
# do the same for suffixes |
116
|
|
|
|
|
|
|
$record->{fedirs} = |
117
|
|
|
|
|
|
|
$Geo::StreetAddress::US::Directional{lc $1} || uc $1 |
118
|
|
|
|
|
|
|
if not $record->{fedirs} and $record->{fename} =~ |
119
|
|
|
|
|
|
|
s/(?<=\S)\s+($Geo::StreetAddress::US::Addr_Match{direct})$//ios; |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub _add_range { |
123
|
|
|
|
|
|
|
my ($tlid, $side, $from, $to) = @_; |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
s/\D//go for ($from, $to); |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
# each value in %seg is [lat, lon, lat, lon, [right side], [left side]] |
128
|
|
|
|
|
|
|
push @{$seg{$tlid}[$side eq "r" ? 4 : 5]}, $from, $to; |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
sub _type_1 { |
132
|
|
|
|
|
|
|
my $record = shift; |
133
|
|
|
|
|
|
|
return unless $record->{fename} and $record->{cfcc} =~ /^A/o; |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
my $tlid = $record->{tlid}; |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# each value in %seg is [lat, lon, lat, lon, [right side], [left side]] |
139
|
|
|
|
|
|
|
$seg{$tlid} ||= |
140
|
|
|
|
|
|
|
[ map(abs, @$record{qw{ frlat frlong tolat tolong }}), [], [] ]; |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# fix up direction prefix embedded in feature name |
143
|
|
|
|
|
|
|
_fixup_directionals($record); |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
for my $side ("r", "l") { |
146
|
|
|
|
|
|
|
my $fips = $record->{"place$side"} || $record->{"cousub$side"} |
147
|
|
|
|
|
|
|
or next; |
148
|
|
|
|
|
|
|
$fips = $record->{"state$side"} . $fips; |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
my ($from, $to, $zip) = |
151
|
|
|
|
|
|
|
@$record{"fradd$side", "toadd$side", "zip$side"}; |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
next unless $from and $to and $zip |
154
|
|
|
|
|
|
|
and $zip =~ /^\d{5}$/os |
155
|
|
|
|
|
|
|
and $zip ne '99999'; |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
_add_range( $tlid, $side, $from, $to ); |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
my $key = |
160
|
|
|
|
|
|
|
join("/", "", $zip, @$record{qw{ fename fetype fedirp fedirs }}); |
161
|
|
|
|
|
|
|
$tlid{"$tlid$side"} = $key; |
162
|
|
|
|
|
|
|
$street{$key}{$tlid}++; |
163
|
|
|
|
|
|
|
$place{$key} ||= $fips; |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
$fips_to_zip{$fips}{$zip}++; |
166
|
|
|
|
|
|
|
$zip_to_fips{$zip} = $fips |
167
|
|
|
|
|
|
|
if $place_type{$fips} and ( |
168
|
|
|
|
|
|
|
$place_type{$fips} eq 'C' |
169
|
|
|
|
|
|
|
or not $zip_to_fips{$zip} |
170
|
|
|
|
|
|
|
or ($zip_to_fips{$zip} and |
171
|
|
|
|
|
|
|
$place_type{$zip_to_fips{$zip}} ne 'C')); |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
sub _type_4 { |
177
|
|
|
|
|
|
|
my $record = shift; |
178
|
|
|
|
|
|
|
push @{$feat{$_}}, $record->{tlid} |
179
|
|
|
|
|
|
|
for grep($_, map($record->{"feat$_"}, 1 .. 5)); |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
sub _type_5 { |
183
|
|
|
|
|
|
|
my $record = shift; |
184
|
|
|
|
|
|
|
my $ids = $feat{$record->{feat}} or return; |
185
|
|
|
|
|
|
|
for my $id (@$ids) { |
186
|
|
|
|
|
|
|
for my $side ("r", "l") { |
187
|
|
|
|
|
|
|
my $main = $tlid{"$id$side"} or next; |
188
|
|
|
|
|
|
|
next unless exists $Geo::Coder::US::DB{$main}; |
189
|
|
|
|
|
|
|
my ($zip, $rt1) = ($main =~ /^\/(\d+)(\/.+)/gos); |
190
|
|
|
|
|
|
|
_fixup_directionals($record); |
191
|
|
|
|
|
|
|
my $rt5 = join("/", |
192
|
|
|
|
|
|
|
"", $zip, @$record{qw{ fename fetype fedirp fedirs }}); |
193
|
|
|
|
|
|
|
$alt{$rt5}{$rt1}++; |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
sub _type_6 { |
199
|
|
|
|
|
|
|
my $record = shift; |
200
|
|
|
|
|
|
|
my $tlid = $record->{tlid}; |
201
|
|
|
|
|
|
|
return unless exists $seg{$tlid}; |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
for my $side ("r", "l") { |
204
|
|
|
|
|
|
|
my ($from, $to, $zip) = @$record{"fradd$side", "toadd$side"}; |
205
|
|
|
|
|
|
|
next unless $from and $to; |
206
|
|
|
|
|
|
|
_add_range( $tlid, $side, $from, $to ); |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
sub _type_C { |
211
|
|
|
|
|
|
|
my $record = shift; |
212
|
|
|
|
|
|
|
return unless $record->{fipscc} =~ /^([CDEFTU])/o # inhabited place |
213
|
|
|
|
|
|
|
and $record->{name} and $record->{fips} and $record->{state}; |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
my $fips = $record->{state} . $record->{fips}; |
216
|
|
|
|
|
|
|
$place_type{$fips} = $1; |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
$record->{name} =~ s/\s*\(.+\)\s*//gos; # cleanup bits with parens |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
$place_name{$fips} = $record->{name}; |
221
|
|
|
|
|
|
|
if (exists($Geo::StreetAddress::US::State_FIPS{$record->{state}})) { |
222
|
|
|
|
|
|
|
my $state = $Geo::StreetAddress::US::State_FIPS{$record->{state}}; |
223
|
|
|
|
|
|
|
$place_name{$fips} .= ", $state" if ($state); |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
# map fips->name |
227
|
|
|
|
|
|
|
$Geo::Coder::US::DB{$fips} = $record->{name}; |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
sub _compress_segments { |
231
|
|
|
|
|
|
|
my @segments = @_; |
232
|
|
|
|
|
|
|
my $thunk; |
233
|
|
|
|
|
|
|
while (my $item = shift @segments) { |
234
|
|
|
|
|
|
|
my ($frlat, $frlong, $tolat, $tolong, $right, $left) = @$item; |
235
|
|
|
|
|
|
|
$thunk .= pack("w*", $frlat, $frlong, @$right); |
236
|
|
|
|
|
|
|
$thunk .= pack("w*", 0, @$left) if @$left; |
237
|
|
|
|
|
|
|
next if @segments and $segments[0][0] == $tolat |
238
|
|
|
|
|
|
|
and $segments[0][1] == $tolong; |
239
|
|
|
|
|
|
|
$thunk .= pack("w*", $tolat, $tolong); |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
return $thunk; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
sub load_tiger_data { |
245
|
|
|
|
|
|
|
my ($class, $source) = @_; |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
my $DB = \%Geo::Coder::US::DB; |
248
|
|
|
|
|
|
|
croak "No database specified" unless tied( %$DB ); |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
open TIGER, "<$source.RTC" or croak "can't read $source.RTC: $!"; |
251
|
|
|
|
|
|
|
Geo::TigerLine::Record::C->parse_file( \*TIGER, \&_type_C ); |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
open TIGER, "<$source.RT1" or croak "can't read $source.RT1: $!"; |
254
|
|
|
|
|
|
|
Geo::TigerLine::Record::1->parse_file( \*TIGER, \&_type_1 ); |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
if (open TIGER, "<$source.RT6") { |
257
|
|
|
|
|
|
|
Geo::TigerLine::Record::6->parse_file( \*TIGER, \&_type_6 ); |
258
|
|
|
|
|
|
|
} else { |
259
|
|
|
|
|
|
|
carp "can't read $source.RT6: $!"; |
260
|
|
|
|
|
|
|
} |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
while (my ($path, $tlids) = each %street) { |
263
|
|
|
|
|
|
|
my @segments = @seg{keys %$tlids}; |
264
|
|
|
|
|
|
|
my @thunk; |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
# right side first, ascending |
267
|
|
|
|
|
|
|
$thunk[0] = _compress_segments( sort { |
268
|
|
|
|
|
|
|
($a->[4][0] || $a->[5][0]) <=> ($b->[4][0] || $b->[5][0]) |
269
|
|
|
|
|
|
|
} @segments ); |
270
|
|
|
|
|
|
|
# right side first, descending |
271
|
|
|
|
|
|
|
$thunk[1] = _compress_segments( sort { |
272
|
|
|
|
|
|
|
($b->[4][0] || $b->[5][0]) <=> ($a->[4][0] || $a->[5][0]) |
273
|
|
|
|
|
|
|
} @segments ); |
274
|
|
|
|
|
|
|
# left side first, ascending |
275
|
|
|
|
|
|
|
$thunk[2] = _compress_segments( sort { |
276
|
|
|
|
|
|
|
($a->[5][0] || $a->[4][0]) <=> ($b->[5][0] || $b->[4][0]) |
277
|
|
|
|
|
|
|
} @segments ); |
278
|
|
|
|
|
|
|
# left side first, descending |
279
|
|
|
|
|
|
|
$thunk[3] = _compress_segments( sort { |
280
|
|
|
|
|
|
|
($b->[5][0] || $b->[4][0]) <=> ($a->[5][0] || $a->[4][0]) |
281
|
|
|
|
|
|
|
} @segments ); |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
@thunk = sort { length($a) <=> length($b) } @thunk; |
284
|
|
|
|
|
|
|
$DB->{$path} = pack("w", $place{$path}) . $thunk[0]; |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
# place name -> zip codes mapping |
288
|
|
|
|
|
|
|
while (my ($fips, $zips) = each %fips_to_zip) { |
289
|
|
|
|
|
|
|
my $place = $place_name{$fips} or next; |
290
|
|
|
|
|
|
|
# make sure place->fips mapping doesn't get duplicates |
291
|
|
|
|
|
|
|
if ( exists $DB->{$place} ) { |
292
|
|
|
|
|
|
|
$zips->{$_}++ for unpack("w*", $DB->{$place}) |
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
$DB->{$place} = pack("w*", keys %$zips); |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
# ZIP code -> FIPS mapping |
298
|
|
|
|
|
|
|
$DB->{$_} = pack "w", $zip_to_fips{$_} for keys %zip_to_fips; |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
if (open TIGER, "<$source.RT4") { |
301
|
|
|
|
|
|
|
Geo::TigerLine::Record::4->parse_file( \*TIGER, \&_type_4 ); |
302
|
|
|
|
|
|
|
} else { |
303
|
|
|
|
|
|
|
carp "can't read $source.RT4: $!"; |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
if (open TIGER, "<$source.RT5") { |
307
|
|
|
|
|
|
|
Geo::TigerLine::Record::5->parse_file( \*TIGER, \&_type_5 ); |
308
|
|
|
|
|
|
|
} else { |
309
|
|
|
|
|
|
|
carp "can't read $source.RT5: $!"; |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
$DB->{$_} ||= join ",", keys %{$alt{$_}} for keys %alt; |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
%tlid = %street = %place = %seg = %feat = %alt |
315
|
|
|
|
|
|
|
= %place_type = %place_name |
316
|
|
|
|
|
|
|
= %zip_to_fips = %fips_to_zip = (); |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
sub _fips55 { |
320
|
|
|
|
|
|
|
my $record = shift; |
321
|
|
|
|
|
|
|
my $DB = \%Geo::Coder::US::DB; |
322
|
|
|
|
|
|
|
return unless $record->{name} and $record->{state} |
323
|
|
|
|
|
|
|
and $record->{class} =~ /^[CUT]|^Z1/o; |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
for my $type ( "part_of", "other_name" ) { |
326
|
|
|
|
|
|
|
next unless $record->{$type}; |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
my $fips = sprintf("%02d%05d", $record->{state_fips}, $record->{$type}); |
329
|
|
|
|
|
|
|
next unless exists $DB->{$fips}; |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
my $name = "$record->{name}, $record->{state}"; |
332
|
|
|
|
|
|
|
$name =~ s/\s*\(.+\)\s*//gos; # cleanup bits with parens |
333
|
|
|
|
|
|
|
next if $name =~ /^\d/o or exists $DB->{$name}; |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
$DB->{$name} = pack "w", $fips; |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
} |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
sub load_fips_data { |
340
|
|
|
|
|
|
|
my ($class, $source) = @_; |
341
|
|
|
|
|
|
|
croak "No database specified" unless tied( %Geo::Coder::US::DB ); |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
open TIGER, "<$source" or die "can't read $source: $!"; |
344
|
|
|
|
|
|
|
Geo::Fips55->parse_file( \*TIGER, \&_fips55 ); |
345
|
|
|
|
|
|
|
} |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
=item load_rtC( $tiger_basename ) |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
=item load_rt5( $tiger_basename ) |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
=item load_rt1( $tiger_basename ) |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
=item load_rt4( $tiger_basename ) |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
=item load_rt6( $tiger_basename ) |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
Each of these methods loads all records from the TIGER/Line record type |
358
|
|
|
|
|
|
|
specified, with the following exceptions: Type C data is only loaded for |
359
|
|
|
|
|
|
|
records with a FIPS-55 class code beginning with C, D, E, F, T, U or Z |
360
|
|
|
|
|
|
|
(i.e. inhabited places). Type 1 data is only loaded for records with a |
361
|
|
|
|
|
|
|
Census Feature Class Code beginning with A (i.e. street data). Also, Type |
362
|
|
|
|
|
|
|
1 data for which no feature name or FIPS place and/or county subdivision |
363
|
|
|
|
|
|
|
is found are not loaded. Finally, Type 6 data lacking a matching Type |
364
|
|
|
|
|
|
|
1 record in the database are not loaded. |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
You may prefix $tiger_basename with an absolute or relative path, but |
367
|
|
|
|
|
|
|
B provide the .RT? filename suffix as part of $tiger_basename |
368
|
|
|
|
|
|
|
or the load_rt*() methods will become cranky. |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
=back |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=head1 BUGS |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
The import throws away probably useful data on the assumption that it's |
375
|
|
|
|
|
|
|
not. Similarly, it imports a lot of data you may never use. Mea culpa. |
376
|
|
|
|
|
|
|
Patches welcome. |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
Also, you will encounter from time to time errors from your DBI driver |
379
|
|
|
|
|
|
|
about duplicate keys for certain records. I think the TIGER/Line data has |
380
|
|
|
|
|
|
|
the odd duplicated TLID in Record Type 1, even though it's not supposed |
381
|
|
|
|
|
|
|
to. These errors are annoying but not fatal, and can probably be ignored. |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
The import process can take up huge amounts of RAM. Be forewarned. If |
384
|
|
|
|
|
|
|
anyone really needs it, the data cached in memory by the import process |
385
|
|
|
|
|
|
|
could be buffered to disk, but this would slow down the import process |
386
|
|
|
|
|
|
|
considerably (I think). Contact me if you really want to try this -- |
387
|
|
|
|
|
|
|
it might be faster for you to just download a binary version of the |
388
|
|
|
|
|
|
|
fully imported database. |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
Right now, I can't afford to make the full 750 megabyte database freely |
391
|
|
|
|
|
|
|
downloadable from my website -- the bandwidth charges would eat me |
392
|
|
|
|
|
|
|
alive. Contact me if you can offer funding or mirroring. |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
=head1 SEE ALSO |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
Geo::Coder::US(3pm), Geo::StreetAddress::US(3pm), Geo::TigerLine(3pm), |
397
|
|
|
|
|
|
|
Geo::Fips55(3pm), DB_File(3pm), Archive::Zip(3pm) |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
eg/import_tiger.pl, eg/import_tiger_zip.pl, eg/import_fips.pl |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
You can download the latest TIGER/Line data (as of this writing) from: |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
L |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
You can get the latest FIPS-55 data from: |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
L |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
If you have copious spare time, you can slog through the TIGER/Line 2003 |
410
|
|
|
|
|
|
|
and FIPS-55-3 technical manuals: |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
L |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
L |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
The TIGER/Line 2004 FE schema is more or less unchanged from 2003. |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
Finally, a few words about FIPS-55-3 class codes: |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
L |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
=head1 APPRECIATION |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
Considerable thanks are due to Michael Schwern |
425
|
|
|
|
|
|
|
for writing the very useful Geo::TigerLine package, which does all |
426
|
|
|
|
|
|
|
the heavy lifting for this module. |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
=head1 AUTHOR |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
Schuyler Erle |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
=head1 LICENSE |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
See L for licensing details. |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
=cut |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
1; |