line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Geo::PostalCode::InstallDB; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
Geo::PostalCode::InstallDB - Create and install a new location database for Geo::PostalCode. |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
use Geo::PostalCode::InstallDB; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
Geo::PostalCode::InstallDB->install(zipdata => 'Geo-PostalCode_19991101.txt', |
12
|
|
|
|
|
|
|
db_dir => '.') |
13
|
|
|
|
|
|
|
or die "Couldn't install DB!\n"; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 DESCRIPTION |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
This class contains only one useful method: C. It takes a |
18
|
|
|
|
|
|
|
text file, the name of which should be given in the C |
19
|
|
|
|
|
|
|
parameter, and converts it into three Berkeley database files |
20
|
|
|
|
|
|
|
(postalcode.db, latlon.db, and city.db) which will be installed in the |
21
|
|
|
|
|
|
|
directory given as the C parameter. |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
The format of these files is a series of lines, the first of which is |
24
|
|
|
|
|
|
|
skipped. Each has five tab-seperated values: |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
postal_code lat lon city state |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 SEE ALSO |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
L, L. |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=cut |
33
|
|
|
|
|
|
|
|
34
|
2
|
|
|
2
|
|
41409
|
use strict; |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
74
|
|
35
|
2
|
|
|
2
|
|
23
|
use warnings; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
55
|
|
36
|
2
|
|
|
2
|
|
585
|
use Geo::PostalCode; our $VERSION = $Geo::PostalCode::VERSION; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
use DB_File; |
38
|
|
|
|
|
|
|
use FileHandle; |
39
|
|
|
|
|
|
|
use POSIX; |
40
|
|
|
|
|
|
|
use File::Spec; |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
use constant ZIPCODEDB => 'postalcode.db'; |
43
|
|
|
|
|
|
|
use constant CELLDB => 'latlon.db'; |
44
|
|
|
|
|
|
|
use constant CITYDB => 'city.db'; |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
sub install |
47
|
|
|
|
|
|
|
{ |
48
|
|
|
|
|
|
|
my $class = shift; |
49
|
|
|
|
|
|
|
my %o = @_; |
50
|
|
|
|
|
|
|
my(%zipcode, %cell, %city, %lat, %lon); |
51
|
|
|
|
|
|
|
my $dir; |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
$o{zipdata} |
54
|
|
|
|
|
|
|
or die "Missing required parameter zipdata"; |
55
|
|
|
|
|
|
|
my $zip = FileHandle->new($o{zipdata}, "r") |
56
|
|
|
|
|
|
|
or die "Couldn't open '$o{zipdata}': $!\n"; |
57
|
|
|
|
|
|
|
if ($o{db_dir}) |
58
|
|
|
|
|
|
|
{ |
59
|
|
|
|
|
|
|
$dir = $o{db_dir}; |
60
|
|
|
|
|
|
|
if (!mkdir($dir)) |
61
|
|
|
|
|
|
|
{ |
62
|
|
|
|
|
|
|
die "Couldn't mkdir($dir): $!\n" |
63
|
|
|
|
|
|
|
unless ($! eq 'File exists') |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
foreach my $db (ZIPCODEDB, CELLDB, CITYDB) |
67
|
|
|
|
|
|
|
{ |
68
|
|
|
|
|
|
|
if (!unlink(File::Spec->catfile($dir,"$db.tmp"))) |
69
|
|
|
|
|
|
|
{ |
70
|
|
|
|
|
|
|
die "Couldn't unlink '$db.tmp': $!\n" |
71
|
|
|
|
|
|
|
unless ($! eq 'No such file or directory') |
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
tie (%zipcode, 'DB_File', File::Spec->catfile($dir,ZIPCODEDB.".tmp"), O_RDWR|O_CREAT, 0666, $DB_BTREE) |
76
|
|
|
|
|
|
|
or die "cannot tie %zipcode to file"; |
77
|
|
|
|
|
|
|
tie (%cell, 'DB_File', File::Spec->catfile($dir,CELLDB.".tmp"), O_RDWR|O_CREAT, 0666, $DB_BTREE) |
78
|
|
|
|
|
|
|
or die "cannot tie %cell to file"; |
79
|
|
|
|
|
|
|
tie (%city, 'DB_File', File::Spec->catfile($dir,CITYDB.".tmp"), O_RDWR|O_CREAT, 0666, $DB_BTREE) |
80
|
|
|
|
|
|
|
or die "cannot tie %city to file"; |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# Skip header line |
83
|
|
|
|
|
|
|
<$zip>; |
84
|
|
|
|
|
|
|
while (<$zip>) |
85
|
|
|
|
|
|
|
{ |
86
|
|
|
|
|
|
|
chomp; |
87
|
|
|
|
|
|
|
my ($zipcode, $lat, $lon, $city, $state); |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
if ($o{is_csv}) { |
90
|
|
|
|
|
|
|
# strip enclosing quotes from fields |
91
|
|
|
|
|
|
|
($zipcode, $city, $state, $lat, $lon) = |
92
|
|
|
|
|
|
|
map { substr($_, 1, length($_) - 2) } |
93
|
|
|
|
|
|
|
split(","); |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
# the CSV format has mixed case cities |
96
|
|
|
|
|
|
|
$city = uc($city); |
97
|
|
|
|
|
|
|
} else { |
98
|
|
|
|
|
|
|
($zipcode, $lat, $lon, $city, $state) = split("\t"); |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
$zipcode{$zipcode} = "$lat,$lon,$city,$state"; |
102
|
|
|
|
|
|
|
$lat{$zipcode} = $lat; |
103
|
|
|
|
|
|
|
$lon{$zipcode} = $lon; |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
my $int_lat = floor($lat); |
106
|
|
|
|
|
|
|
my $int_lon = floor($lon); |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
$cell{"$int_lat-$int_lon"} .= $zipcode; |
109
|
|
|
|
|
|
|
$city{"$state$city"} .= $zipcode; |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
foreach my $k (keys %city) { |
113
|
|
|
|
|
|
|
my $v = $city{$k}; |
114
|
|
|
|
|
|
|
my @postal_codes = ($v =~ m!(.{5})!g); |
115
|
|
|
|
|
|
|
next unless @postal_codes; |
116
|
|
|
|
|
|
|
my ($tot_lat, $tot_lon, $count) = (0,0,0,0); |
117
|
|
|
|
|
|
|
for (@postal_codes) { |
118
|
|
|
|
|
|
|
$tot_lat += $lat{$_}; |
119
|
|
|
|
|
|
|
$tot_lon += $lon{$_}; |
120
|
|
|
|
|
|
|
$count++; |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
my $avg_lat = sprintf("%.5f",$tot_lat/$count); |
123
|
|
|
|
|
|
|
my $avg_lon = sprintf("%.5f",$tot_lon/$count); |
124
|
|
|
|
|
|
|
$city{$k} = "$v|$avg_lat|$avg_lon"; |
125
|
|
|
|
|
|
|
} |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
untie %zipcode; |
128
|
|
|
|
|
|
|
untie %cell; |
129
|
|
|
|
|
|
|
untie %city; |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
foreach my $db (ZIPCODEDB, CELLDB, CITYDB) |
132
|
|
|
|
|
|
|
{ |
133
|
|
|
|
|
|
|
rename(File::Spec->catfile($dir,"$db.tmp"),File::Spec->catfile($dir,$db)) |
134
|
|
|
|
|
|
|
or die "Couldn't rename '$db.tmp' to '$db': $!\n"; |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
1; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
1; |