File Coverage

blib/lib/Image/ExifTool/Geolocation.pm
Criterion Covered Total %
statement 294 406 72.4
branch 145 264 54.9
condition 68 143 47.5
subroutine 9 11 81.8
pod 7 8 87.5
total 523 832 62.8


line stmt bran cond sub pod time code
1             #------------------------------------------------------------------------------
2             # File: Geolocation.pm
3             #
4             # Description: Determine geolocation from GPS and visa-versa
5             #
6             # Revisions: 2024-03-03 - P. Harvey Created
7             # 2024-03-21 - PH Significant restructuring and addition of
8             # several new features.
9             #
10             # References: https://download.geonames.org/export/
11             #
12             # Notes: Set $Image::ExifTool::Geolocation::geoDir to override the
13             # default directory containing the database file Geolocation.dat
14             # and the GeoLang directory with the alternate language files.
15             # If set, this directory is
16             #
17             # AltNames.dat may be loaded from a different directory by
18             # specifying $Image::ExifTool::Geolocation::altDir. This
19             # database and has entries in the same order as Geolocation.dat,
20             # and each entry is a newline-separated list of alternate names
21             # terminated by a null byte. These alternate names are used
22             # only when searching for a city by name (eg. "Big Apple").
23             #
24             # Databases are based on data from geonames.org with a
25             # Creative Commons license, reformatted as follows in the
26             # Geolocation.dat file:
27             #
28             # Header:
29             # "GeolocationV.VV\tNNNN\n" (V.VV=version, NNNN=num city entries)
30             # "# \n"
31             # NNNN City entries:
32             # Offset Format Description
33             # 0 int16u - latitude high 16 bits (converted to 0-0x100000 range)
34             # 2 int8u - latitude low 4 bits, longitude low 4 bits
35             # 3 int16u - longitude high 16 bits
36             # 5 int8u - index of country in country list
37             # 6 int8u - 0xf0 = population E exponent (in format "N.Fe+0E"), 0x0f = population N digit
38             # 7 int16u - 0xf000 = population F digit, 0x0fff = index in region list (admin1)
39             # 9 int16u - v1.02: 0x7fff = index in subregion (admin2), 0x8000 = high bit of time zone
40             # 9 int16u - v1.03: index in subregion (admin2)
41             # 11 int8u - low byte of time zone index
42             # 12 int8u - 0x3f = feature code index (see below), v1.03: 0x80 = high bit of time zone
43             # 13 string - UTF8 City name, terminated by newline
44             # "\0\0\0\0\x01"
45             # Country entries:
46             # 1. 2-character country code
47             # 2. Country name, terminated by newline
48             # "\0\0\0\0\x02"
49             # Region entries:
50             # 1. Region name, terminated by newline
51             # "\0\0\0\0\x03"
52             # Subregion entries:
53             # 1. Subregion name, terminated by newline
54             # "\0\0\0\0\x04"
55             # Time zone entries:
56             # 1. Time zone name, terminated by newline
57             # "\0\0\0\0\x05" (feature codes added in v1.03)
58             # Feature codes:
59             # 1. Feature code, optional space-followed-by-feature-name, then newline
60             # "\0\0\0\0\0"
61             #
62             # Feature Codes v1.02: (see http://www.geonames.org/export/codes.html#P for descriptions)
63             #
64             # 0. Other 3. PPLA2 6. PPLA5 9. PPLF 12. PPLR 15. PPLX
65             # 1. PPL 4. PPLA3 7. PPLC 10. PPLG 13. PPLS
66             # 2. PPLA 5. PPLA4 8. PPLCH 11. PPLL 14. STLMT
67             #------------------------------------------------------------------------------
68              
69             package Image::ExifTool::Geolocation;
70              
71 1     1   8708 use strict;
  1         2  
  1         56  
72 1     1   8 use vars qw($VERSION $geoDir $altDir $dbInfo);
  1         2  
  1         3654  
73              
74             $VERSION = '1.10'; # (this is the module version number, not the database version)
75              
76             my $debug; # set to output processing time for testing
77              
78             sub ReadDatabase($);
79             sub SortDatabase($);
80             sub AddEntry(@);
81             sub GetEntry($;$$);
82             sub Geolocate($;$);
83              
84             my (@cityList, @countryList, @regionList, @subregionList, @timezoneList);
85             my (%countryNum, %regionNum, %subregionNum, %timezoneNum); # reverse lookups
86             my (@sortOrder, @altNames, %langLookup, $nCity, %featureCodes, %featureTypes);
87             my ($lastArgs, %lastFound, @lastByPop, @lastByLat); # cached city matches
88             my $dbVer = '1.03'; # database version number
89             my $sortedBy = 'Latitude';
90             my $pi = 3.1415926536;
91             my $earthRadius = 6371; # earth radius in km
92             # hard-coded feature codes for v1.02 database
93             my @featureCodes = qw(Other PPL PPLA PPLA2 PPLA3 PPLA4 PPLA5 PPLC
94             PPLCH PPLF PPLG PPLL PPLR PPLS STLMT PPLX);
95              
96             # get path name for database file from lib/Image/ExifTool/Geolocation.dat by default,
97             # or according to $Image::ExifTool::Geolocation::directory if specified
98             my $defaultDir = $INC{'Image/ExifTool/Geolocation.pm'};
99             if ($defaultDir) {
100             $defaultDir =~ s(/Geolocation\.pm$)();
101             } else {
102             $defaultDir = '.';
103             warn("Error getting Geolocation.pm directory\n");
104             }
105              
106             # read the Geolocation database unless $geoDir set to empty string
107             unless (defined $geoDir and not $geoDir) {
108             unless ($geoDir and ReadDatabase("$geoDir/Geolocation.dat")) {
109             ReadDatabase("$defaultDir/Geolocation.dat");
110             }
111             }
112              
113             # set directory for language files and alternate names
114             $geoDir = $defaultDir unless defined $geoDir;
115             if (not defined $altDir and $geoDir and -e "$geoDir/AltNames.dat") {
116             $altDir = $geoDir;
117             }
118              
119             # add user-defined entries to the database
120             if (@Image::ExifTool::UserDefined::Geolocation) {
121             AddEntry(@$_) foreach @Image::ExifTool::UserDefined::Geolocation;
122             }
123              
124             #------------------------------------------------------------------------------
125             # Read Geolocation database
126             # Inputs: 0) database file name
127             # Returns: true on success
128             sub ReadDatabase($)
129             {
130 1     1 1 3 my $datfile = shift;
131             # open geolocation database and verify header
132 1 50       63 open DATFILE, "< $datfile" or warn("Error reading $datfile\n"), return 0;
133 1         5 binmode DATFILE;
134 1         3660 my $line = ;
135 1 50       17 unless ($line =~ /^Geolocation(\d+\.\d+)\t(\d+)/) {
136 0         0 warn("Bad format Geolocation database\n");
137 0         0 close(DATFILE);
138 0         0 return 0;
139             }
140 1         32 ($dbVer, $nCity) = ($1, $2);
141 1 50       8 if ($dbVer !~ /^1\.0[23]$/) {
142 0 0       0 my $which = $dbVer < 1.03 ? 'database' : 'ExifTool';
143 0         0 warn("Incompatible Geolocation database (update your $which)\n");
144 0         0 close(DATFILE);
145 0         0 return 0;
146             }
147 1         6 my $comment = ;
148 1 50 33     14 defined $comment and $comment =~ / (\d+) / or close(DATFILE), return 0;
149 1         7 $dbInfo = "$datfile v$dbVer: $nCity cities with population > $1";
150 1         3 my $isUserDefined = @Image::ExifTool::UserDefined::Geolocation;
151              
152 1         3 undef @altNames; # reset altNames
153              
154             # read city database
155 1         3 undef @cityList;
156 1         2 my $i = 0;
157 1         2 for (;;) {
158 114878         160880 $line = ;
159 114878 100 100     191716 last if length($line) == 6 and $line =~ /\0\0\0\0/;
160 114877         182057 $line .= while length($line) < 14;
161 114877         138570 chomp $line;
162 114877         189185 push @cityList, $line;
163             }
164 1 50       7 @cityList == $nCity or warn("Bad number of entries in Geolocation database\n"), return 0;
165             # read countries
166 1         1 for (;;) {
167 239         240 $line = ;
168 239 100 66     314 last if length($line) == 6 and $line =~ /\0\0\0\0/;
169 238         224 chomp $line;
170 238         324 push @countryList, $line;
171 238 50       293 $countryNum{lc substr($line,0,2)} = $#countryList if $isUserDefined;
172             }
173             # read regions
174 1         1 for (;;) {
175 3755         4865 $line = ;
176 3755 100 100     6097 last if length($line) == 6 and $line =~ /\0\0\0\0/;
177 3754         4161 chomp $line;
178 3754         5417 push @regionList, $line;
179 3754 50       5438 $regionNum{lc $line} = $#regionList if $isUserDefined;
180             }
181             # read subregions
182 1         3 for (;;) {
183 31914         56910 $line = ;
184 31914 100 100     71973 last if length($line) == 6 and $line =~ /\0\0\0\0/;
185 31913         62108 chomp $line;
186 31913         68999 push @subregionList, $line;
187 31913 50       63065 $subregionNum{lc $line} = $#subregionList if $isUserDefined;
188             }
189             # read time zones
190 1         3 for (;;) {
191 380         728 $line = ;
192 380 100 66     850 last if length($line) == 6 and $line =~ /\0\0\0\0/;
193 379         671 chomp $line;
194 379         953 push @timezoneList, $line;
195 379 50       765 $timezoneNum{lc $line} = $#timezoneList if $isUserDefined;
196             }
197             # read feature codes if available
198 1 50       26 if ($line eq "\0\0\0\0\x05\n") {
199 1         12 undef @featureCodes;
200 1         3 for (;;) {
201 20         54 $line = ;
202 20 100 100     60 last if length($line) == 6 and $line =~ /\0\0\0\0/;
203 19         31 chomp $line;
204 19 100       168 $featureTypes{$line} = $1 if $line =~ s/ (.*)//;
205 19         47 push @featureCodes, $line;
206             }
207             }
208 1         24 close DATFILE;
209             # initialize featureCodes lookup
210 1         4 $i = 0;
211 1         4 %featureCodes = map { lc($_) => $i++ } @featureCodes;
  19         59  
212 1         8 return 1;
213             }
214              
215             #------------------------------------------------------------------------------
216             # Read alternate-names database
217             # Returns: True on success
218             # Notes: Must be called after ReadDatabase(). Resets $altDir on exit.
219             sub ReadAltNames()
220             {
221 0     0 1 0 my $success;
222 0 0 0     0 if ($altDir and $nCity) {
223 0 0       0 if (open ALTFILE, "< $altDir/AltNames.dat") {
224 0         0 binmode ALTFILE;
225 0         0 local $/ = "\0";
226 0         0 my $i = 0;
227 0         0 while () { chop; $altNames[$i++] = $_; }
  0         0  
  0         0  
228 0         0 close ALTFILE;
229 0 0       0 if ($i == $nCity) {
230 0         0 $success = 1;
231             } else {
232 0         0 warn("Bad number of entries in AltNames database\n");
233 0         0 undef @altNames;
234             }
235             } else {
236 0         0 warn "Error reading $altDir/AltNames.dat\n";
237             }
238 0         0 undef $altDir;
239             }
240 0         0 return $success;
241             }
242              
243             #------------------------------------------------------------------------------
244             # Clear last city matches cache
245             sub ClearLastMatches()
246             {
247 4     4 0 10 undef $lastArgs; # arguments in last call to Geolocate
248 4         645 undef %lastFound; # keys are last matching city numbers, values are population codes
249 4         10 undef @lastByPop; # last matching city numbers ordered by population
250 4         94 undef @lastByLat; # last matching city numbers ordered by latitude
251             }
252              
253             #------------------------------------------------------------------------------
254             # Sort database by specified field
255             # Inputs: 0) Field name to sort (Latitude,City,Country)
256             # Returns: 1 on success
257             sub SortDatabase($)
258             {
259 6     6 1 18 my $field = shift;
260 6 100       34 return 1 if $field eq $sortedBy; # already sorted?
261 1         6 undef @sortOrder;
262 1 50       5 if ($field eq 'Latitude') {
    0          
    0          
263 1         8835 @sortOrder = sort { $cityList[$a] cmp $cityList[$b] } 0..$#cityList;
  114936         252599  
264             } elsif ($field eq 'City') {
265 0         0 @sortOrder = sort { substr($cityList[$a],13) cmp substr($cityList[$b],13) } 0..$#cityList;
  0         0  
266             } elsif ($field eq 'Country') {
267 0         0 my %lkup;
268 0         0 foreach (0..$#cityList) {
269 0         0 my $city = substr($cityList[$_],13);
270 0         0 my $ctry = substr($countryList[ord substr($cityList[$_],5,1)], 2);
271 0         0 $lkup{$_} = "$ctry $city";
272             }
273 0         0 @sortOrder = sort { $lkup{$a} cmp $lkup{$b} } 0..$#cityList;
  0         0  
274             } else {
275 0         0 return 0;
276             }
277 1         1967 $sortedBy = $field;
278 1         11 ClearLastMatches();
279 1         3 return 1;
280             }
281              
282             #------------------------------------------------------------------------------
283             # Add cities to the Geolocation database
284             # Inputs: 0-8) city,region,subregion,country_code,country,timezone,feature_code,population,lat,lon,altNames
285             # eg. AddEntry('Sinemorets','Burgas','Obshtina Tsarevo','BG','Bulgaria','Europe/Sofia','',400,42.06115,27.97833)
286             # Returns: true on success, otherwise issues warning
287             sub AddEntry(@)
288             {
289 1     1 1 11 my ($city, $region, $subregion, $cc, $country, $timezone, $fc, $pop, $lat, $lon, $altNames) = @_;
290 1 50       6 @_ < 10 and warn("Too few arguments in $city definition (check for updated format)\n"), return 0;
291 1 50       6 length($cc) != 2 and warn("Country code '${cc}' is not 2 characters\n"), return 0;
292 1 50       4 $featureTypes{$fc} = $1 if $fc =~ s/ (.*)//;
293 1         5 my $fn = $featureCodes{lc $fc};
294 1 50       3 unless (defined $fn) {
295 1 50 33     9 if ($dbVer eq '1.02' or @featureCodes > 0x3f or not length $fc) {
      33        
296 1         2 $fn = 0;
297             } else {
298 0         0 push @featureCodes, uc($fc);
299 0         0 $featureCodes{lc $fc} = $fn = $#featureCodes;
300             }
301             }
302 1         10 chomp $lon; # (just in case it was read from file)
303             # create reverse lookups for country/region/subregion/timezone if not done already
304             # (eg. if the entries are being added manually instead of via UserDefined::Geolocation)
305 1 50       2 unless (%countryNum) {
306 1         3 my $i;
307 1         2 $i = 0; $countryNum{lc substr($_,0,2)} = $i++ foreach @countryList;
  1         180  
308 1         2 $i = 0; $regionNum{lc $_} = $i++ foreach @regionList;
  1         8516  
309 1         5 $i = 0; $subregionNum{lc $_} = $i++ foreach @subregionList;
  1         29260  
310 1         7 $i = 0; $timezoneNum{lc $_} = $i++ foreach @timezoneList;
  1         446  
311             }
312 1         8 my $cn = $countryNum{lc $cc};
313 1 50 50     9 unless (defined $cn) {
314 0 0       0 $#countryList >= 0xff and warn("AddEntry: Too many countries\n"), return 0;
315 0         0 push @countryList, "$cc$country";
316 0         0 $cn = $countryNum{lc $cc} = $#countryList;
317             } elsif ($country) {
318             $countryList[$cn] = "$cc$country"; # (override existing country name)
319             }
320 1         5 my $tn = $timezoneNum{lc $timezone};
321 1 50       6 unless (defined $tn) {
322 0 0       0 $#timezoneList >= 0x1ff and warn("AddEntry: Too many time zones\n"), return 0;
323 0         0 push @timezoneList, $timezone;
324 0         0 $tn = $timezoneNum{lc $timezone} = $#timezoneList;
325             }
326 1         4 my $rn = $regionNum{lc $region};
327 1 50       5 unless (defined $rn) {
328 0 0       0 $#regionList >= 0xfff and warn("AddEntry: Too many regions\n"), return 0;
329 0         0 push @regionList, $region;
330 0         0 $rn = $regionNum{lc $region} = $#regionList;
331             }
332 1         5 my $sn = $subregionNum{lc $subregion};
333 1 50       4 unless (defined $sn) {
334 0 0       0 my $max = $dbVer eq '1.02' ? 0x7fff : 0xffff;
335 0 0       0 $#subregionList >= $max and warn("AddEntry: Too many subregions\n"), return 0;
336 0         0 push @subregionList, $subregion;
337 0         0 $sn = $subregionNum{lc $subregion} = $#subregionList;
338             }
339 1         18 $pop = sprintf('%.1e',$pop); # format: "3.1e+04" or "3.1e+004"
340             # pack CC index, population and region index into a 32-bit integer
341 1         10 my $code = ($cn << 24) | (substr($pop,-1,1)<<20) | (substr($pop,0,1)<<16) | (substr($pop,2,1)<<12) | $rn;
342             # store high bit of timezone index
343 1 50       4 if ($tn > 255) {
344 1 50       6 if ($dbVer eq '1.02') {
345 0         0 $sn |= 0x8000;
346             } else {
347 1         3 $fn |= 0x80;
348             }
349 1         2 $tn -= 256;
350             }
351 1         8 $lat = int(($lat + 90) / 180 * 0x100000 + 0.5) & 0xfffff;
352 1         5 $lon = int(($lon + 180) / 360 * 0x100000 + 0.5) & 0xfffff;
353 1         9 my $hdr = pack('nCnNnCC', $lat>>4, (($lat&0x0f)<<4)|($lon&0x0f), $lon>>4, $code, $sn, $tn, $fn);
354 1         7 push @cityList, "$hdr$city";
355             # add altNames entry if provided
356 1 50       18 if ($altNames) {
357 1         5 chomp $altNames; # (just in case)
358 1         4 $altNames =~ tr/,/\n/;
359             # add any more arguments in case altNames were passed separately (undocumented)
360 1         5 foreach (11..$#_) {
361 0         0 chomp $_[$_];
362 0         0 $altNames .= "\n$_[$_]";
363             }
364 1         2649 $altNames[$#cityList] = $altNames;
365             }
366 1         8 $sortedBy = '';
367 1         5 undef $lastArgs; # (faster than ClearLastArgs)
368 1         8 return 1;
369             }
370              
371             #------------------------------------------------------------------------------
372             # Unpack entry in database
373             # Inputs: 0) entry number or index into sorted database,
374             # 1) optional language code, 2) flag to use index into sorted database
375             # Returns: 0-10) city,region,subregion,country_code,country,timezone,
376             # feature_code,pop,lat,lon,feature_type
377             sub GetEntry($;$$)
378             {
379 9     9 1 56 my ($entryNum, $lang, $sort) = @_;
380 9 50       32 return() if $entryNum > $#cityList;
381 9 50 33     37 $entryNum = $sortOrder[$entryNum] if $sort and @sortOrder > $entryNum;
382 9         67 my ($lt,$f,$ln,$code,$sn,$tn,$fn) = unpack('nCnNnCC', $cityList[$entryNum]);
383 9         43 my $city = substr($cityList[$entryNum],13);
384 9         32 my $ctry = $countryList[$code >> 24];
385 9         34 my $rgn = $regionList[$code & 0x0fff];
386 9 50       34 if ($dbVer eq '1.02') {
387 0 0       0 $sn & 0x8000 and $tn += 256, $sn &= 0x7fff;
388             } else {
389 9 50       34 $fn & 0x80 and $tn += 256;
390             }
391 9         30 my $sub = $subregionList[$sn];
392             # convert population digits back into exponent format
393 9         93 my $pop = (($code>>16 & 0x0f) . '.' . ($code>>12 & 0x0f) . 'e+' . ($code>>20 & 0x0f)) + 0;
394 9         97 $lt = sprintf('%.4f', (($lt<<4)|($f >> 4)) * 180 / 0x100000 - 90);
395 9         70 $ln = sprintf('%.4f', (($ln<<4)|($f & 0x0f))* 360 / 0x100000 - 180);
396 9   50     65 my $fc = $featureCodes[$fn & 0x3f] || 'Other';
397 9         26 my $cc = substr($ctry, 0, 2);
398 9         54 my $country = substr($ctry, 2);
399 9         42 my $ft = $featureTypes{$fc};
400 9 50 33     59 if ($lang and $lang ne 'en') {
401 0         0 my $xlat = $langLookup{$lang};
402             # load language lookups if not done already
403 0 0       0 if (not defined $xlat) {
404 0         0 unshift @INC, $geoDir; # make sure $geoDir is first in path
405 0 0       0 if (eval "require 'GeoLang/$lang.pm'") {
406 0         0 my $trans = "Image::ExifTool::GeoLang::${lang}::Translate";
407 1     1   9 no strict 'refs';
  1         2  
  1         4285  
408 0 0       0 $xlat = \%$trans if %$trans;
409             }
410 0         0 shift @INC;
411             # read user-defined language translations
412 0 0       0 if (%Image::ExifTool::Geolocation::geoLang) {
413 0         0 my $userLang = $Image::ExifTool::Geolocation::geoLang{$lang};
414 0 0 0     0 if ($userLang and ref($userLang) eq 'HASH') {
415 0 0       0 if ($xlat) {
416             # add user-defined entries to main lookup
417 0         0 $$xlat{$_} = $$userLang{$_} foreach keys %$userLang;
418             } else {
419 0         0 $xlat = $userLang;
420             }
421             }
422             }
423 0   0     0 $langLookup{$lang} = $xlat || 0;
424             }
425 0 0       0 if ($xlat) {
426 0         0 my $r2 = $rgn;
427             # City-specific: "CCRgn,Sub,City", "CCRgn,City", "CC,City", ",City"
428             # Subregion-specific: "CCRgn,Sub,"
429             # Region-specific: "CCRgn,"
430             # Country-specific: "CC,"
431             $city = $$xlat{"$cc$r2,$sub,$city"} || $$xlat{"$cc$r2,$city"} ||
432 0   0     0 $$xlat{"$cc,$city"} || $$xlat{",$city"} || $$xlat{$city} || $city;
433 0   0     0 $sub = $$xlat{"$cc$rgn,$sub,"} || $$xlat{$sub} || $sub;
434 0   0     0 $rgn = $$xlat{"$cc$rgn,"} || $$xlat{$rgn} || $rgn;
435 0   0     0 $country = $$xlat{"$cc,"} || $$xlat{$country} || $country;
436 0 0       0 $ft = $$xlat{$fc} if $$xlat{$fc};
437             }
438             }
439 9         293 return($city,$rgn,$sub,$cc,$country,$timezoneList[$tn],$fc,$pop,$lt,$ln,$ft);
440             }
441              
442             #------------------------------------------------------------------------------
443             # Get alternate names for specified database entry
444             # Inputs: 0) entry number or index into sorted database, 1) sort flag
445             # Returns: comma-separated list of alternate names, or empty string if no names
446             # Notes: ReadAltNames() must be called before this
447             sub GetAltNames($;$)
448             {
449 0     0 1 0 my ($entryNum, $sort) = @_;
450 0 0 0     0 $entryNum = $sortOrder[$entryNum] if $sort and @sortOrder > $entryNum;
451 0 0       0 my $alt = $altNames[$entryNum] or return '';
452 0         0 $alt =~ tr/\n/,/;
453 0         0 return $alt;
454             }
455              
456             #------------------------------------------------------------------------------
457             # Look up lat,lon or city in geolocation database
458             # Inputs: 0) "lat,lon", "city,region,country", etc, (city must be first)
459             # 1) options hash reference (or undef for no options)
460             # Options: GeolocMinPop, GeolocMaxDist, GeolocMulti, GeolocFeature, GeolocAltNames,
461             # GeolocNearby
462             # Returns: 0) Reference to list of indices for matching cities, or undef for no matches
463             # 1) Reference to list of distance/bearing pairs, or undef if no GPS
464             # In scalar context returns list of indices only
465             sub Geolocate($;$)
466             {
467 8     8 1 27 my ($arg, $opts) = @_;
468 8         52 my ($city, @exact, %regex, @multiCity, $other, $idx, @cargs);
469 8         0 my ($minPop, $minDistU, $minDistC, @matchParms, @coords, %fcOK, $both);
470 8         0 my ($pop, $maxDist, $multi, $fcodes, $altNames, @startTime);
471              
472             $opts and ($pop, $maxDist, $multi, $fcodes, $altNames) =
473 8 50       59 @$opts{qw(GeolocMinPop GeolocMaxDist GeolocMulti GeolocFeature GeolocAltNames)};
474              
475 8 50       27 if ($debug) {
476 0         0 require Time::HiRes;
477 0         0 @startTime = Time::HiRes::gettimeofday();
478             }
479 8 50       27 @cityList or warn('No Geolocation database'), return();
480             # make population code for comparing with 2 bytes at offset 6 in database
481 8 100       23 if ($pop) {
482 3         32 $pop = sprintf('%.1e', $pop);
483 3         28 $minPop = chr((substr($pop,-1,1)<<4) | (substr($pop,0,1))) . chr(substr($pop,2,1)<<4);
484             }
485 8 100       41 if ($fcodes) {
486 1         6 my $neg = $fcodes =~ s/^-//;
487 1         5 my @fcodes = split /\s*,-?\s*/, lc $fcodes; # (allow leading dash on subsequent codes)
488 1 50       5 if ($neg) {
489 1         29 $fcOK{$_} = 1 foreach 0..$#featureCodes;
490 1   50     12 defined $featureCodes{$_} and delete $fcOK{$featureCodes{$_}} foreach @fcodes;
491             } else {
492 0   0     0 defined $featureCodes{$_} and $fcOK{$featureCodes{$_}} = 1 foreach @fcodes;
493             }
494             }
495             #
496             # process input argument
497             #
498 8         17 my $num = 1;
499 8         40 $arg =~ s/^\s+//; $arg =~ s/\s+$//; # remove leading/trailing spaces
  8         28  
500 8         71 my @args = split /\s*,\s*/, $arg;
501 8         83 my %ri = ( cc => 0, co => 1, re => 2, sr => 3, ci => 8, '' => 9 );
502 8         26 foreach (@args) {
503             # allow regular expressions optionally prefixed by "ci", "cc", "co", "re" or "sr"
504 24 100       188 if (m{^(-)?(\w{2})?/(.*)/(i?)$}) {
    100          
    50          
    100          
    100          
    50          
505 1 50       30 my $re = $4 ? qr/$3/im : qr/$3/m;
506 1 50       9 next if not defined($idx = $ri{$2});
507 1         4 push @cargs, $_;
508 1 50       6 $other = 1 if $idx < 5;
509 1 50       6 $idx += 10 if $1; # add 10 for negative matches
510 1 50       7 $regex{$idx} or $regex{$idx} = [ ];
511 1         3 push @{$regex{$idx}}, $re;
  1         4  
512 1 50       6 $city = '' unless defined $city;
513             } elsif (/^[-+]?\d+(\.\d+)?$/) { # coordinate format
514 12 50       51 push @coords, $_ if @coords < 2;
515             } elsif (/^([-+]?\d+(?:\.\d+)?) *(([NS])[A-Z]*)? +([-+]?\d+(?:\.\d+)?) *(([EW])[A-Z]*)?/i) { # "lat lon" format
516 0 0       0 next if @coords;
517 0         0 my ($lat, $lon) = ($1, $4);
518 0 0 0     0 $lat = -abs($lat) if $3 and uc($3) eq 'S';
519 0 0 0     0 $lon = -abs($lon) if $6 and uc($6) eq 'W';
520 0         0 push @coords, $lat, $lon;
521             } elsif (lc $_ eq 'both') {
522 7         25 $both = 1;
523             } elsif ($_ =~ /^num=(\d+)$/i) {
524 1         5 $num = $1;
525             } elsif ($_) {
526 3         9 push @cargs, $_;
527 3 100       8 if ($city) {
528 1         5 push @exact, lc $_;
529             } else {
530 2         7 $city = lc $_;
531             }
532             }
533             }
534 8 50 66     63 unless (defined $city or @coords == 2) {
535 0         0 warn("Insufficient information to determine geolocation\n");
536 0         0 return();
537             }
538             # sort database by logitude if finding entry based on coordinates
539 8 50 66     70 SortDatabase('Latitude') if @coords == 2 and ($both or not defined $city);
      100        
540             #
541             # perform reverse Geolocation lookup to determine GPS based on city, country, etc.
542             #
543 8   66     48 while (defined $city and (@coords != 2 or $both)) {
      100        
544 3   50     45 my $cargs = join(',', @cargs, $pop||'', $maxDist||'', $fcodes||'');
      50        
      50        
545 3         7 my $i = 0;
546 3 50 66     19 if ($lastArgs and $lastArgs eq $cargs) {
547 0         0 $i = @cityList; # bypass search
548             } else {
549 3         11 ClearLastMatches();
550 3         6 $lastArgs = $cargs;
551             }
552             # read alternate names database if an exact city match is specified
553 3 50       11 if ($altNames) {
554 3 50 66     16 ReadAltNames() if $city and $altDir;
555 3         10 $altNames = \@altNames;
556             } else {
557 0         0 $altNames = [ ]; # (don't search alt names)
558             }
559 3         10 Entry: for (; $i<@cityList; ++$i) {
560 344633         622907 my $cty = substr($cityList[$i],13);
561 344633 100 100     904050 if ($city and $city ne lc $cty) { # test exact city name first
562 229741 100 66     638636 next unless $$altNames[$i] and $$altNames[$i] =~ /^$city$/im;
563             }
564             # test with city-specific regexes
565 114893 50 0     196354 if ($regex{8}) { $cty =~ $_ or next Entry foreach @{$regex{8}} }
  0         0  
  0         0  
566 114893 50 0     200142 if ($regex{18}) { $cty !~ $_ or next Entry foreach @{$regex{18}} }
  0         0  
  0         0  
567             # test other arguments
568 114893         240293 my ($cd,$sn) = unpack('x5Nn', $cityList[$i]);
569 114893         176579 my $ct = $countryList[$cd >> 24];
570 114893 50       197014 $sn &= 0x7fff if $dbVer eq '1.02';
571 114893         317868 my @geo = (substr($ct,0,2), substr($ct,2), $regionList[$cd & 0x0fff], $subregionList[$sn]);
572 114893 100       199870 if (@exact) {
573             # make quick lookup for all names at this location
574 14         40 my %geoLkup;
575 14   100     251 $_ and $geoLkup{lc $_} = 1 foreach @geo;
576 14   100     171 $geoLkup{$_} or next Entry foreach @exact;
577             }
578             # test with cc, co, re and sr regexes
579 114880 100       185952 if ($other) { foreach $idx (0..3) {
  114878         170745  
580 233008 100 100     421505 if ($regex{$idx}) { $geo[$idx] =~ $_ or next Entry foreach @{$regex{$idx}} }
  114878         141568  
  114878         578202  
581 119756 50 0     235396 if ($regex{$idx+10}) { $geo[$idx] !~ $_ or next Entry foreach @{$regex{$idx+10}} }
  0         0  
  0         0  
582             } }
583             # test regexes for any place name
584 1628 50 33     6839 if ($regex{9} or $regex{19}) {
585 0         0 my $str = join "\n", $cty, @geo;
586 0   0     0 $str =~ $_ or next Entry foreach @{$regex{9}};
  0         0  
587 0   0     0 $str !~ $_ or next Entry foreach @{$regex{19}};
  0         0  
588             }
589             # test feature code and population
590 1628 50 33     3735 next if $fcodes and not $fcOK{ord(substr($cityList[$i],12,1)) & 0x3f};
591 1628         3440 my $pc = substr($cityList[$i],6,2);
592 1628 50 33     4738 if (not defined $minPop or $pc ge $minPop) {
593 1628         6494 $lastFound{$i} = $pc;
594 1628 100       8050 push @lastByLat, $i if @coords == 2;
595             }
596             }
597 3 50       18 @startTime and printf("= Processing time: %.3f sec\n", Time::HiRes::tv_interval(\@startTime));
598 3 50       13 if (%lastFound) {
599 3 100       14 last if @coords == 2; # continue to use coords with last city matches
600 2 50       13 scalar(keys %lastFound) > 200 and warn("Too many matching cities\n"), return();
601             # return nearby cities if "num=" is used and only one match found
602 2 50 33     17 if ($num > 1 and scalar(keys %lastFound) == 1) {
603 0         0 my ($i) = keys %lastFound;
604 0         0 my @entry = GetEntry($i);
605 0         0 @coords = @entry[8,9];
606 0         0 SortDatabase('Latitude'); # (make sure we are sorted by latitude)
607 0         0 last;
608             }
609 2 50       8 unless (@lastByPop) {
610 2 0       12 @lastByPop = sort { $lastFound{$b} cmp $lastFound{$a} or $cityList[$a] cmp $cityList[$b] } keys %lastFound;
  0         0  
611             }
612 2         116 return(\@lastByPop);
613             }
614 0         0 warn "No such city in Geolocation database\n";
615 0         0 return();
616             }
617             #
618             # determine Geolocation based on GPS coordinates
619             #
620 6         23 my ($lat, $lon) = @coords;
621 6 50       26 if ($maxDist) {
622 0         0 $minDistU = $maxDist / (2 * $earthRadius); # min distance on unit sphere
623 0         0 $minDistC = $maxDist * 0x100000 / ($pi * $earthRadius); # min distance in coordinate units
624             } else {
625 6         14 $minDistU = $pi;
626 6         16 $minDistC = 0x200000;
627             }
628 6         74 my $cos = cos($lat * $pi / 180); # cosine factor for longitude distances
629             # reduce lat/lon to the range 0-0x100000
630 6         31 $lat = int(($lat + 90) / 180 * 0x100000 + 0.5) & 0xfffff;
631 6         27 $lon = int(($lon + 180) / 360 * 0x100000 + 0.5) & 0xfffff;
632 6 0       20 $lat or $lat = $coords[0] < 0 ? 1 : 0xfffff; # (zero latitude is a problem for our calculations)
    50          
633 6         50 my $coord = pack('nCn',$lat>>4,(($lat&0x0f)<<4)|($lon&0x0f),$lon>>4);;
634             # start from cached city matches if also using city information
635 6   66     27 my $numEntries = @lastByLat || @cityList;
636             # binary search to find closest longitude
637 6         21 my ($n0, $n1) = (0, $numEntries - 1);
638 6 50       68 my $sorted = @lastByLat ? \@lastByLat : (@sortOrder ? \@sortOrder : undef);
    100          
639 6         23 while ($n1 - $n0 > 1) {
640 94         196 my $n = int(($n0 + $n1) / 2);
641 94 100       355 if ($coord lt $cityList[$sorted ? $$sorted[$n] : $n]) {
    100          
642 44         109 $n1 = $n;
643             } else {
644 50         106 $n0 = $n;
645             }
646             }
647             # step backward then forward through database to find nearest city
648 6         26 my ($inc, $end, $n) = (-1, -1, $n0+1);
649 6         30 my ($p0, $t0) = ($lat*$pi/0x100000 - $pi/2, $lon*$pi/0x080000 - $pi);
650 6         19 my $cp0 = cos($p0);
651 6         16 my (@matches, @rtnList, @dist);
652              
653 6         9 for (;;) {
654 41391 100       86402 if (($n += $inc) == $end) {
655 12 100 66     66 last if $inc == 1 or $n0 == $n1;
656 6         19 ($inc, $end, $n) = (1, $numEntries, $n1);
657             }
658 41385 100       76254 my $i = $sorted ? $$sorted[$n] : $n;
659             # get city latitude/longitude
660 41385         112850 my ($lt,$f,$ln) = unpack('nCn', $cityList[$i]);
661 41385         77874 $lt = ($lt << 4) | ($f >> 4);
662             # searched far enough if latitude alone is further than best distance
663 41385 100       90166 abs($lt - $lat) > $minDistC and $n = $end - $inc, next;
664             # ignore if population is below threshold
665 41374 100 100     146624 next if defined $minPop and $minPop ge substr($cityList[$i],6,2);
666 3122 100 100     6716 next if $fcodes and not $fcOK{ord(substr($cityList[$i],12,1)) & 0x3f};
667 3094         4884 $ln = ($ln << 4) | ($f & 0x0f);
668             # calculate great circle distance to this city on unit sphere
669 3094         6557 my ($p1, $t1) = ($lt*$pi/0x100000 - $pi/2, $ln*$pi/0x080000 - $pi);
670 3094         6460 my ($sp, $st) = (sin(($p1-$p0)/2), sin(($t1-$t0)/2));
671 3094         5275 my $a = $sp * $sp + $cp0 * cos($p1) * $st * $st;
672 3094         5345 my $distU = atan2(sqrt($a), sqrt(1-$a)); # distance on unit sphere
673 3094 100       6367 next if $distU > $minDistU;
674 65         217 @matchParms = ($i, $p1, $t1, $distU);
675 65 100       149 if ($num <= 1) {
676 50         107 $minDistU = $distU;
677             } else {
678 15         19 my $j;
679             # add this entry into list of matching cities ordered by closest first
680 15         36 for ($j=0; $j<@matches; ++$j) {
681 20 100       51 last if $distU < $matches[$j][3];
682             }
683 15 100       32 if ($j < $#matches) {
684 6         16 splice @matches, $j, 0, [ @matchParms ];
685             } else {
686 9         31 $matches[$j] = [ @matchParms ];
687             }
688             # restrict list to the specified number of nearest cities
689 15 100       38 pop @matches if @matches > $num;
690             # update minimum distance with furthest match if we satisfied our quota
691 15 100       39 $minDistU = $matches[-1][3] if @matches >= $num;
692             }
693 65         145 $minDistC = $minDistU * 0x200000 / $pi; # distance in scaled coordinate units
694             }
695 6 50       26 @matchParms or warn("No suitable location in Geolocation database\n"), return();
696 6         38 $num = @matches;
697              
698 6 50       44 @startTime and printf("- Processing time: %.3f sec\n", Time::HiRes::tv_interval(\@startTime));
699              
700 6         15 for (;;) {
701 8 100       28 if ($num > 1) {
702 3 100       7 last unless @matches;
703 2         4 @matchParms = @{$matches[0]};
  2         5  
704 2         5 shift @matches;
705             }
706             # calculate distance in km and bearing to matching city
707 7         25 my ($ii, $p1, $t1, $distU) = @matchParms;
708 7         108 my $km = sprintf('%.2f', 2 * $earthRadius * $distU);
709 7         38 my $be = atan2(sin($t1-$t0)*cos($p1-$p0), $cp0*sin($p1)-sin($p0)*cos($p1)*cos($t1-$t0));
710 7         36 $be = int($be * 180 / $pi + 360.5) % 360; # convert from radians to integer degrees
711 7         29 push @rtnList, $ii;
712 7         49 push @dist, [ $km, $be ];
713 7 100       24 last if $num <= 1;
714             }
715 6 50       310 return wantarray ? (\@rtnList, \@dist) : \@rtnList;
716             }
717              
718             1; #end
719              
720             __END__