|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Geo::Address::Formatter;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Geo::Address::Formatter::VERSION = '1.9982';  | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ABSTRACT: take structured address data and format it according to the various global/country rules  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
5
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
26179
 | 
 use strict;  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
162
 | 
    | 
| 
6
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
23
 | 
 use warnings;  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
136
 | 
    | 
| 
7
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
22
 | 
 use feature qw(say);  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
467
 | 
    | 
| 
8
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
1744
 | 
 use Clone qw(clone);  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9615
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
281
 | 
    | 
| 
9
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
973
 | 
 use Data::Dumper;  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10885
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
299
 | 
    | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Data::Dumper::Sortkeys = 1;  | 
| 
11
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
33
 | 
 use File::Basename qw(dirname);  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
254
 | 
    | 
| 
12
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
1890
 | 
 use Ref::Util qw(is_hashref);  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6413
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
343
 | 
    | 
| 
13
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
34
 | 
 use Scalar::Util qw(looks_like_number);  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
244
 | 
    | 
| 
14
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
2087
 | 
 use Text::Hogan::Compiler;  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32633
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
203
 | 
    | 
| 
15
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
1887
 | 
 use Try::Catch;  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3455
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
310
 | 
    | 
| 
16
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
1313
 | 
 use YAML::XS qw(LoadFile);  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8993
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
243
 | 
    | 
| 
17
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
 
 | 
30
 | 
 use utf8;  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
80
 | 
    | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my $THC = Text::Hogan::Compiler->new;  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # optional params  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my $show_warnings = 1;  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my $debug         = 0;  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my $only_address  = 0;  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {  | 
| 
28
 | 
17
 | 
 
 | 
 
 | 
  
17
  
 | 
  
1
  
 | 
16799
 | 
     my ($class, %params) = @_;  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
30
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
     my $self      = {};  | 
| 
31
 | 
17
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
60
 | 
     my $conf_path = $params{conf_path} || die "no conf_path set";  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # optional params  | 
| 
34
 | 
17
 | 
  
 50
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
51
 | 
     if ( defined($params{no_warnings}) && ($params{no_warnings})){  | 
| 
35
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
         $show_warnings  = 0;  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
37
 | 
17
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
67
 | 
     $only_address  = (defined($params{only_address}) && $params{only_address}) // 0;  | 
| 
 
 | 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
38
 | 
17
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
64
 | 
     $debug         = (defined($params{debug})        && $params{debug})        // 0;  | 
| 
 
 | 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
40
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
     $self->{final_components} = undef;  | 
| 
41
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
     $self->{set_district_alias} = {};  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
43
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
     bless($self, $class);  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
45
 | 
17
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
47
 | 
     say STDERR "************* in Geo::Address::Formatter::new ***" if ($debug);  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
47
 | 
17
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
43
 | 
     if ($self->_read_configuration($conf_path)){  | 
| 
48
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
83
 | 
         return $self;  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
50
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
     die 'unable to read configuration';  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _read_configuration {  | 
| 
54
 | 
17
 | 
 
 | 
 
 | 
  
17
  
 | 
 
 | 
25
 | 
     my $self = shift;  | 
| 
55
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
     my $path = shift;  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
57
 | 
17
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
349
 | 
     return if (! -e $path);  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # components file  | 
| 
59
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
63
 | 
     my $compyaml = $path . '/components.yaml';  | 
| 
60
 | 
17
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
247
 | 
     return if (! -e $compyaml);  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
62
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
78
 | 
     $self->{templates}          = {};  | 
| 
63
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
     $self->{component_aliases}  = {};  | 
| 
64
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
     $self->{component2type}     = {};  | 
| 
65
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
     $self->{ordered_components} = [];  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # read the config file(s)  | 
| 
68
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
     my $loaded = 0;  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
70
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
     my $wwfile = $path . '/countries/worldwide.yaml';  | 
| 
71
 | 
16
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
31
 | 
     say STDERR "loading templates $wwfile" if ($debug);  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
73
 | 
16
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
197
 | 
     return if (! -e $wwfile);  | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     try {  | 
| 
76
 | 
16
 | 
 
 | 
 
 | 
  
16
  
 | 
 
 | 
344
 | 
         my $rh_templates = LoadFile($wwfile);  | 
| 
77
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28396
 | 
         foreach (keys %$rh_templates) {  | 
| 
78
 | 
2950
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3120
 | 
             $self->{templates}{$_} = $rh_templates->{$_};  | 
| 
79
 | 
2950
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2682
 | 
             $loaded = 1;  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } catch {  | 
| 
82
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
         warn "error parsing country configuration in $wwfile";  | 
| 
83
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         return;  | 
| 
84
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
173
 | 
     };  | 
| 
85
 | 
16
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
337
 | 
     return if ($loaded == 0);  # no templates  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # see if we can load the components  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     try {  | 
| 
89
 | 
16
 | 
  
 50
  
 | 
 
 | 
  
16
  
 | 
 
 | 
307
 | 
         say STDERR "loading components" if ($debug);  | 
| 
90
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
         my @c = LoadFile($compyaml);  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
92
 | 
16
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3006
 | 
         if ($debug){  | 
| 
93
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             say STDERR Dumper \@c;  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
96
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
         foreach my $rh_c (@c) {  | 
| 
97
 | 
244
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
306
 | 
             if (defined($rh_c->{name})){  | 
| 
98
 | 
244
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
262
 | 
                 if (defined($rh_c->{aliases})){  | 
| 
99
 | 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
251
 | 
                     $self->{component_aliases}{$rh_c->{name}} = $rh_c->{aliases};  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 } else {  | 
| 
101
 | 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
209
 | 
                     $self->{component_aliases}{$rh_c->{name}} = [];  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
106
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
         foreach my $rh_c (@c) {  | 
| 
107
 | 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
197
 | 
             push(@{$self->{ordered_components}}, $rh_c->{name});  | 
| 
 
 | 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
330
 | 
    | 
| 
108
 | 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
340
 | 
             $self->{component2type}->{$rh_c->{name}} = $rh_c->{name};  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
110
 | 
244
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
354
 | 
             if (defined($rh_c->{aliases})) {  | 
| 
111
 | 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
124
 | 
                 foreach my $alias (@{$rh_c->{aliases}}) {  | 
| 
 
 | 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
166
 | 
    | 
| 
112
 | 
510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
393
 | 
                     push(@{$self->{ordered_components}}, $alias);  | 
| 
 
 | 
510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
528
 | 
    | 
| 
113
 | 
510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
774
 | 
                     $self->{component2type}->{$alias} = $rh_c->{name};  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
117
 | 
16
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
103
 | 
         if ($debug){  | 
| 
118
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             say STDERR 'component_aliases';  | 
| 
119
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             say STDERR Dumper $self->{component_aliases};  | 
| 
120
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             say STDERR 'ordered_components';  | 
| 
121
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             say STDERR Dumper $self->{ordered_components};  | 
| 
122
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             say STDERR 'component2type';  | 
| 
123
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             say STDERR Dumper $self->{component2type};  | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } catch {  | 
| 
126
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
         warn "error parsing component configuration: $_";  | 
| 
127
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
109
 | 
     };  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # get the county and state codes and country2lang conf  | 
| 
130
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
300
 | 
     my @conf_files = qw(county_codes state_codes country2lang);  | 
| 
131
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
     foreach my $cfile (@conf_files) {  | 
| 
132
 | 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
74946
 | 
         $self->{$cfile} = {};  | 
| 
133
 | 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
122
 | 
         my $yfile = $path . '/' . $cfile . '.yaml';  | 
| 
134
 | 
48
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
757
 | 
         if (-e $yfile) {  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             try {  | 
| 
136
 | 
42
 | 
 
 | 
 
 | 
  
42
  
 | 
 
 | 
866
 | 
                 $self->{$cfile} = LoadFile($yfile);  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             } catch {  | 
| 
138
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
                 warn "error parsing $cfile configuration: $_";  | 
| 
139
 | 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
384
 | 
             };  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
143
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5555
 | 
     my $abbrvdir = $path . '/abbreviations';  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
145
 | 
16
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
227
 | 
     if (-d $abbrvdir){  | 
| 
146
 | 
12
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
416
 | 
         opendir my $dh, $abbrvdir  | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             or die "Could not open '$abbrvdir' for read: $!\n";  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
149
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
329
 | 
         while (my $file = readdir $dh) {  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # say STDERR "file: $file";  | 
| 
151
 | 
260
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
30296
 | 
             if ($file =~ m/^(\w\w)\.yaml$/) {  | 
| 
152
 | 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
499
 | 
                 my $lang = $1; # two letter lang code like 'en'  | 
| 
153
 | 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
414
 | 
                 my $abbrvfile = $abbrvdir . '/' . $file;  | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 try {  | 
| 
155
 | 
236
 | 
 
 | 
 
 | 
  
236
  
 | 
 
 | 
4838
 | 
                     $self->{abbreviations}->{$lang} = LoadFile($abbrvfile);  | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 } catch {  | 
| 
157
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
                     warn "error parsing abbrv conf in $abbrvfile: $_";  | 
| 
158
 | 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1035
 | 
                 };  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
161
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1618
 | 
         closedir $dh;  | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #say Dumper $self->{abbreviations};  | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #say Dumper $self->{country2lang};  | 
| 
166
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
97
 | 
     return 1;  | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub final_components {  | 
| 
171
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
  
1
  
 | 
1390
 | 
     my $self = shift;  | 
| 
172
 | 
6
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     if (defined($self->{final_components})) {  | 
| 
173
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
         return $self->{final_components};  | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
175
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     warn 'final_components not yet set' if ($show_warnings);  | 
| 
176
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
64
 | 
     return;  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub format_address {  | 
| 
181
 | 
455
 | 
 
 | 
 
 | 
  
455
  
 | 
  
1
  
 | 
359744
 | 
     my $self          = shift;  | 
| 
182
 | 
455
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
7077
 | 
     my $rh_components = clone(shift) || return;  | 
| 
183
 | 
454
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
1674
 | 
     my $rh_options    = shift        || {};  | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # 1. make sure empty at the beginning  | 
| 
186
 | 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1398
 | 
     $self->{final_components} = undef;      | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
188
 | 
454
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
788
 | 
     if ($debug){  | 
| 
189
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR "*** in format_address ***";  | 
| 
190
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR Dumper $rh_options;  | 
| 
191
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR Dumper $rh_components;  | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # 2. deal with the options  | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # 2a. which country format will we use?  | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #     might have been specified in options  | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #     otherwise look at components  | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $cc = $rh_options->{country}  | 
| 
200
 | 
454
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
1227
 | 
         || $self->_determine_country_code($rh_components)  | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         || '';  | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
203
 | 
454
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
671
 | 
     if ($cc) {  | 
| 
204
 | 
450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
482
 | 
         $rh_components->{country_code} = $cc;  | 
| 
205
 | 
450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
798
 | 
         $self->_set_district_alias($cc);  | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # 2b. should we abbreviate?  | 
| 
209
 | 
454
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
1125
 | 
     my $abbrv = $rh_options->{abbreviate} // 0;  | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # 2c. was only_address set at the formatting level  | 
| 
212
 | 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
488
 | 
     my $oa = $only_address;  | 
| 
213
 | 
454
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
622
 | 
     if (defined($rh_options->{only_address})){  | 
| 
214
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
         $oa = $rh_options->{only_address};  | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
217
 | 
454
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
638
 | 
     if ($debug){  | 
| 
218
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR "component_aliases";  | 
| 
219
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR Dumper $self->{component_aliases};  | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # done with the options  | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # 3. set the aliases, unless this would overwrite something  | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # need to do this in the right order (as defined in the components file)  | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # For example:  | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # both 'city_district' and 'suburb' are aliases of 'neighbourhood'  | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # so which one should we use if both are present?  | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # We should use the one defined first in the list  | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
231
 | 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
369
 | 
     my $rhh_p2a;  | 
| 
232
 | 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1343
 | 
     foreach my $c (keys %$rh_components){  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # might not need an alias as it is a primary type  | 
| 
235
 | 
3574
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4862
 | 
         next if (defined($self->{component_aliases}{$c}));  | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # it is not a primary type  | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # is there an alias?  | 
| 
239
 | 
734
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1244
 | 
         if (defined($self->{component2type}{$c})){  | 
| 
240
 | 
500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
568
 | 
             my $ptype = $self->{component2type}{$c};  | 
| 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # but is it already set?  | 
| 
242
 | 
500
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
796
 | 
             if (! defined($rh_components->{$ptype}) ){  | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # no, we will set it later  | 
| 
244
 | 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
808
 | 
                 $rhh_p2a->{$ptype}{$c} = 1;  | 
| 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # now we know which primary types have aliases  | 
| 
251
 | 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1036
 | 
     foreach my $ptype (keys %$rhh_p2a){  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # is there more than one?  | 
| 
253
 | 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
365
 | 
         my @aliases = keys %{$rhh_p2a->{$ptype}};  | 
| 
 
 | 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
742
 | 
    | 
| 
254
 | 
357
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
610
 | 
         if (scalar @aliases == 1){  | 
| 
255
 | 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
515
 | 
             $rh_components->{$ptype} = $rh_components->{$aliases[0]};  | 
| 
256
 | 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
486
 | 
             next;  # we are done with this ptype  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # if there is more than one we need to go through the list  | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # so we do them in the right order  | 
| 
261
 | 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
56
 | 
         foreach my $c (@{$self->{component_aliases}->{$ptype}}){  | 
| 
 
 | 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
79
 | 
    | 
| 
262
 | 
44
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
95
 | 
             if (defined($rh_components->{$c})){  | 
| 
263
 | 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
68
 | 
                 $rh_components->{$ptype} = $rh_components->{$c};  | 
| 
264
 | 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
81
 | 
                 last; # we are done with this ptype  | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
269
 | 
454
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
675
 | 
     if ($debug){  | 
| 
270
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR "after component_aliases applied";  | 
| 
271
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR Dumper $rh_components;  | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # 4. deal wtih terrible inputs  | 
| 
275
 | 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1002
 | 
     $self->_sanity_cleaning($rh_components);  | 
| 
276
 | 
454
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
574
 | 
     if ($debug){  | 
| 
277
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR "after sanity_cleaning applied";  | 
| 
278
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR Dumper $rh_components;  | 
| 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # 5. determine the template  | 
| 
282
 | 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
475
 | 
     my $template_text;  | 
| 
283
 | 
454
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
1013
 | 
     my $rh_config = $self->{templates}{uc($cc)} || $self->{templates}{default};  | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
285
 | 
454
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
634
 | 
     if (defined($rh_options->{address_template})) {  | 
| 
286
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
         $template_text = $rh_options->{address_template};  | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
290
 | 
452
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
651
 | 
         if (defined($rh_config->{address_template})) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
291
 | 
450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
629
 | 
             $template_text = $rh_config->{address_template};  | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } elsif (defined($self->{templates}{default}{address_template})) {  | 
| 
293
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
             $template_text = $self->{templates}{default}{address_template};  | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # do we have the minimal components for an address?  | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # or should we instead use the fallback template?  | 
| 
298
 | 
452
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
829
 | 
         if (!$self->_minimal_components($rh_components)) {  | 
| 
299
 | 
49
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
124
 | 
             say STDERR "using fallback" if ($debug);  | 
| 
300
 | 
49
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
163
 | 
             if (defined($rh_config->{fallback_template})) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
301
 | 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
72
 | 
                 $template_text = $rh_config->{fallback_template};  | 
| 
302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             } elsif (defined($self->{templates}{default}{fallback_template})) {  | 
| 
303
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
                 $template_text = $self->{templates}{default}{fallback_template};  | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # no fallback  | 
| 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
310
 | 
454
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
639
 | 
     say STDERR 'template text: ' . $template_text if ($debug);  | 
| 
311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # 6. clean up the components, possibly add codes  | 
| 
313
 | 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
813
 | 
     $self->_fix_country($rh_components);  | 
| 
314
 | 
454
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
598
 | 
     if ($debug){  | 
| 
315
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR "after fix_country";  | 
| 
316
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR Dumper $rh_components;  | 
| 
317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
319
 | 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1184
 | 
     $self->_apply_replacements($rh_components, $rh_config->{replace});  | 
| 
320
 | 
454
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
764
 | 
     if ($debug){  | 
| 
321
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR "after applying_replacements applied";  | 
| 
322
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR Dumper $rh_components;  | 
| 
323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
324
 | 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
883
 | 
     $self->_add_state_code($rh_components);  | 
| 
325
 | 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
834
 | 
     $self->_add_county_code($rh_components);  | 
| 
326
 | 
454
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
643
 | 
     if ($debug){  | 
| 
327
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR "after adding codes";  | 
| 
328
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR Dumper $rh_components;  | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # 7. add the attention, if needed  | 
| 
332
 | 
454
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
667
 | 
     if ($debug){  | 
| 
333
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR "object level only_address: $only_address";  | 
| 
334
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR "formatting level only_address: $oa";  | 
| 
335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
337
 | 
454
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
606
 | 
     if ($oa){  | 
| 
338
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
         if ($debug){  | 
| 
339
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             say STDERR "not looking for unknown_components";  | 
| 
340
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             say STDERR "only_address was specified";  | 
| 
341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
344
 | 
451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
661
 | 
         my $ra_unknown = $self->_find_unknown_components($rh_components);  | 
| 
345
 | 
451
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
806
 | 
         if ($debug){  | 
| 
346
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             say STDERR "unknown_components:";  | 
| 
347
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             say STDERR Dumper $ra_unknown;  | 
| 
348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
349
 | 
451
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
739
 | 
         if (scalar(@$ra_unknown)){  | 
| 
350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $rh_components->{attention} =  | 
| 
351
 | 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
307
 | 
                 join(', ', map { $rh_components->{$_} } @$ra_unknown);  | 
| 
 
 | 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
649
 | 
    | 
| 
352
 | 
224
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
512
 | 
             if ($debug){  | 
| 
353
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 say STDERR "putting unknown_components in 'attention'";  | 
| 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # 8. abbreviate, if needed  | 
| 
359
 | 
454
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
690
 | 
     if ($abbrv) {  | 
| 
360
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
         $rh_components = $self->_abbreviate($rh_components);  | 
| 
361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # 9. prepare the template  | 
| 
364
 | 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
842
 | 
     $template_text = $self->_replace_template_lambdas($template_text);  | 
| 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # 10. compiled the template  | 
| 
367
 | 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1664
 | 
     my $compiled_template =  | 
| 
368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $THC->compile($template_text, {'numeric_string_as_string' => 1});  | 
| 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
370
 | 
454
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
579878
 | 
     if ($debug){  | 
| 
371
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR "before _render_template";  | 
| 
372
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR Dumper $rh_components;  | 
| 
373
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR "template: ";  | 
| 
374
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR Dumper $compiled_template;  | 
| 
375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # 11. render the template  | 
| 
378
 | 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
841
 | 
     my $text = $self->_render_template($compiled_template, $rh_components);  | 
| 
379
 | 
454
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
699
 | 
     if ($debug){  | 
| 
380
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR "text after _render_template $text";  | 
| 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # 11. postformatting  | 
| 
384
 | 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1163
 | 
     $text = $self->_postformat($text, $rh_config->{postformat_replace});  | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # 12. clean again  | 
| 
387
 | 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
848
 | 
     $text = $self->_clean($text);  | 
| 
388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # 13. set final components  | 
| 
390
 | 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
622
 | 
     $self->{final_components} = $rh_components;  | 
| 
391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # all done  | 
| 
393
 | 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1502
 | 
     return $text;  | 
| 
394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # remove duplicates ("Berlin, Berlin"), do replacements and similar  | 
| 
397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _postformat {  | 
| 
398
 | 
456
 | 
 
 | 
 
 | 
  
456
  
 | 
 
 | 
1391
 | 
     my $self = shift;  | 
| 
399
 | 
456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
437
 | 
     my $text = shift;  | 
| 
400
 | 
456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
603
 | 
     my $raa_rules = shift;  | 
| 
401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
402
 | 
456
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
625
 | 
     if ($debug){  | 
| 
403
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR "entering _postformat: $text"  | 
| 
404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # remove duplicates  | 
| 
407
 | 
456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
911
 | 
     my @before_pieces = split(/, /, $text);  | 
| 
408
 | 
456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
490
 | 
     my %seen;  | 
| 
409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @after_pieces;  | 
| 
410
 | 
456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
591
 | 
     foreach my $piece (@before_pieces) {  | 
| 
411
 | 
565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
812
 | 
         $piece =~ s/^\s+//g;  | 
| 
412
 | 
565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
996
 | 
         $seen{$piece}++;  | 
| 
413
 | 
565
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1114
 | 
         if (lc($piece) ne 'new york') {  | 
| 
414
 | 
561
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
869
 | 
             next if ($seen{$piece} > 1);  | 
| 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
416
 | 
564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
758
 | 
         push(@after_pieces, $piece);  | 
| 
417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
418
 | 
456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
702
 | 
     $text = join(', ', @after_pieces);  | 
| 
419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # do any country specific rules  | 
| 
421
 | 
456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
686
 | 
     foreach my $ra_fromto (@$raa_rules) {  | 
| 
422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         try {  | 
| 
423
 | 
512
 | 
 
 | 
 
 | 
  
512
  
 | 
 
 | 
12136
 | 
             my $regexp = qr/$ra_fromto->[0]/;  | 
| 
424
 | 
512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
846
 | 
             my $replacement = $ra_fromto->[1];  | 
| 
425
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # ultra hack to do substitution  | 
| 
427
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # limited to $1 and $2, should really be a while loop  | 
| 
428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # doing every substitution  | 
| 
429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
430
 | 
512
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
991
 | 
             if ($replacement =~ m/\$\d/) {  | 
| 
431
 | 
46
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
289
 | 
                 if ($text =~ m/$regexp/) {  | 
| 
432
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
                     my $tmp1 = $1;  | 
| 
433
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
                     my $tmp2 = $2;  | 
| 
434
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
                     my $tmp3 = $3;  | 
| 
435
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
56
 | 
                     $replacement =~ s/\$1/$tmp1/;  | 
| 
436
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
63
 | 
                     $replacement =~ s/\$2/$tmp2/;  | 
| 
437
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
51
 | 
                     $replacement =~ s/\$3/$tmp3/;  | 
| 
438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
440
 | 
512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2129
 | 
             $text =~ s/$regexp/$replacement/;  | 
| 
441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } catch {  | 
| 
442
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
             warn "invalid replacement: " . join(', ', @$ra_fromto);  | 
| 
443
 | 
512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5622
 | 
         };  | 
| 
444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
445
 | 
456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2387
 | 
     return $text;  | 
| 
446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _sanity_cleaning {  | 
| 
449
 | 
455
 | 
 
 | 
 
 | 
  
455
  
 | 
 
 | 
1408
 | 
     my $self          = shift;  | 
| 
450
 | 
455
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
643
 | 
     my $rh_components = shift || return;  | 
| 
451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # catch insane postcodes  | 
| 
453
 | 
455
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
718
 | 
     if (defined($rh_components->{'postcode'})) {  | 
| 
454
 | 
313
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1296
 | 
         if (length($rh_components->{'postcode'}) > 20) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
455
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
             delete $rh_components->{'postcode'};  | 
| 
456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } elsif ($rh_components->{'postcode'} =~ m/\d+;\d+/) {  | 
| 
457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # sometimes OSM has postcode ranges  | 
| 
458
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
             delete $rh_components->{'postcode'};  | 
| 
459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } elsif ($rh_components->{'postcode'} =~ m/^(\d{5}),\d{5}/) {  | 
| 
460
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
             $rh_components->{'postcode'} = $1;  | 
| 
461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # remove things that might be empty  | 
| 
465
 | 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
920
 | 
     foreach my $c (keys %$rh_components) {  | 
| 
466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # catch empty values  | 
| 
467
 | 
3931
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
10327
 | 
         if (!defined($rh_components->{$c})) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
468
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             delete $rh_components->{$c};  | 
| 
469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # no chars  | 
| 
471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ($rh_components->{$c} !~ m/\w/) {  | 
| 
472
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
             delete $rh_components->{$c};  | 
| 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # catch values containing URLs  | 
| 
475
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ($rh_components->{$c} =~ m|https?://|) {  | 
| 
476
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
             delete $rh_components->{$c};  | 
| 
477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
479
 | 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
671
 | 
     return;  | 
| 
480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
482
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _minimal_components {  | 
| 
483
 | 
452
 | 
 
 | 
 
 | 
  
452
  
 | 
 
 | 
505
 | 
     my $self                = shift;  | 
| 
484
 | 
452
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
650
 | 
     my $rh_components       = shift || return;  | 
| 
485
 | 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
705
 | 
     my @required_components = qw(road postcode); #FIXME - should be in conf  | 
| 
486
 | 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
425
 | 
     my $missing             = 0;                 # number of required components missing  | 
| 
487
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
488
 | 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
408
 | 
     my $minimal_threshold = 2;  | 
| 
489
 | 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
516
 | 
     foreach my $c (@required_components) {  | 
| 
490
 | 
904
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1413
 | 
         $missing++ if (!defined($rh_components->{$c}));  | 
| 
491
 | 
904
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1381
 | 
         return 0   if ($missing == $minimal_threshold);  | 
| 
492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
493
 | 
403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
790
 | 
     return 1;  | 
| 
494
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my %valid_replacement_components = ('state' => 1,);  | 
| 
497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
498
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # determines which country code to use  | 
| 
499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # may also override other configuration if we are dealing with  | 
| 
500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # a dependent territory  | 
| 
501
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _determine_country_code {  | 
| 
502
 | 
455
 | 
 
 | 
 
 | 
  
455
  
 | 
 
 | 
511
 | 
     my $self          = shift;  | 
| 
503
 | 
455
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
694
 | 
     my $rh_components = shift || return;  | 
| 
504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # FIXME - validate it is a valid country  | 
| 
506
 | 
455
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
769
 | 
     return if (!defined($rh_components->{country_code}));  | 
| 
507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
508
 | 
451
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
990
 | 
     if (my $cc = lc($rh_components->{country_code})) {  | 
| 
509
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # is it two letters long?  | 
| 
511
 | 
451
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1651
 | 
         return      if ($cc !~ m/^[a-z][a-z]$/);  | 
| 
512
 | 
451
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
827
 | 
         return 'GB' if ($cc eq 'uk');  | 
| 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
514
 | 
451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
728
 | 
         $cc = uc($cc);  | 
| 
515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # check if the configuration tells us to use  | 
| 
517
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # the configuration of another country  | 
| 
518
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # used in cases of dependent territories like  | 
| 
519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # American Samoa (AS) and Puerto Rico (PR)  | 
| 
520
 | 
451
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
1868
 | 
         if (   defined($self->{templates}{$cc})  | 
| 
521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             && defined($self->{templates}{$cc}{use_country}))  | 
| 
522
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
523
 | 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
73
 | 
             my $old_cc = $cc;  | 
| 
524
 | 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
75
 | 
             $cc = $self->{templates}{$cc}{use_country};  | 
| 
525
 | 
49
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
137
 | 
             if (defined($self->{templates}{$old_cc}{change_country})) {  | 
| 
526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
527
 | 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
80
 | 
                 my $new_country = $self->{templates}{$old_cc}{change_country};  | 
| 
528
 | 
36
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
108
 | 
                 if ($new_country =~ m/\$(\w*)/) {  | 
| 
529
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
                     my $component = $1;  | 
| 
530
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
                     if (defined($rh_components->{$component})) {  | 
| 
531
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
                         $new_country =~ s/\$$component/$rh_components->{$component}/;  | 
| 
532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     } else {  | 
| 
533
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         $new_country =~ s/\$$component//;  | 
| 
534
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
536
 | 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
83
 | 
                 $rh_components->{country} = $new_country;  | 
| 
537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
538
 | 
49
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
130
 | 
             if (defined($self->{templates}{$old_cc}{add_component})) {  | 
| 
539
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
                 my $tmp = $self->{templates}{$old_cc}{add_component};  | 
| 
540
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
                 my ($k, $v) = split(/=/, $tmp);  | 
| 
541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # check whitelist of valid replacement components  | 
| 
542
 | 
12
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
39
 | 
                 if (defined($valid_replacement_components{$k})) {  | 
| 
543
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
                     $rh_components->{$k} = $v;  | 
| 
544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
548
 | 
451
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
741
 | 
         if ($cc eq 'NL') {  | 
| 
549
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
12
 | 
             if (defined($rh_components->{state})) {  | 
| 
550
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
27
 | 
                 if ($rh_components->{state} eq 'Curaçao') {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
551
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
                     $cc = 'CW';  | 
| 
552
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
                     $rh_components->{country} = 'Curaçao';  | 
| 
553
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 } elsif ($rh_components->{state} =~ m/^sint maarten/i) {  | 
| 
554
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     $cc = 'SX';  | 
| 
555
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     $rh_components->{country} = 'Sint Maarten';  | 
| 
556
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 } elsif ($rh_components->{state} =~ m/^Aruba/i) {  | 
| 
557
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
                     $cc = 'AW';  | 
| 
558
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                     $rh_components->{country} = 'Aruba';  | 
| 
559
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
560
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
562
 | 
451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1443
 | 
         return $cc;  | 
| 
563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
564
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return;  | 
| 
565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
567
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # hacks for bad country data  | 
| 
568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _fix_country {  | 
| 
569
 | 
454
 | 
 
 | 
 
 | 
  
454
  
 | 
 
 | 
631
 | 
     my $self          = shift;  | 
| 
570
 | 
454
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
735
 | 
     my $rh_components = shift || return;  | 
| 
571
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
572
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # is the country a number?  | 
| 
573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # if so, and there is a state, use state as country  | 
| 
574
 | 
454
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
692
 | 
     if (defined($rh_components->{country})) {  | 
| 
575
 | 
451
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1422
 | 
         if (looks_like_number($rh_components->{country})) {  | 
| 
576
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
             if (defined($rh_components->{state})) {  | 
| 
577
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                 $rh_components->{country} = $rh_components->{state};  | 
| 
578
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
                 delete $rh_components->{state};  | 
| 
579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
581
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
582
 | 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
500
 | 
     return;  | 
| 
583
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
584
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # sets and returns a state code  | 
| 
586
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # note may also set other values in some odd edge cases  | 
| 
587
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _add_state_code {  | 
| 
588
 | 
459
 | 
 
 | 
 
 | 
  
459
  
 | 
 
 | 
1064
 | 
     my $self          = shift;  | 
| 
589
 | 
459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
416
 | 
     my $rh_components = shift;  | 
| 
590
 | 
459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
777
 | 
     return $self->_add_code('state', $rh_components);  | 
| 
591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _add_county_code {  | 
| 
594
 | 
455
 | 
 
 | 
 
 | 
  
455
  
 | 
 
 | 
1017
 | 
     my $self          = shift;  | 
| 
595
 | 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
427
 | 
     my $rh_components = shift;  | 
| 
596
 | 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
599
 | 
     return $self->_add_code('county', $rh_components);  | 
| 
597
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _add_code {  | 
| 
600
 | 
914
 | 
 
 | 
 
 | 
  
914
  
 | 
 
 | 
798
 | 
     my $self          = shift;  | 
| 
601
 | 
914
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
1277
 | 
     my $keyname       = shift // return;  | 
| 
602
 | 
914
 | 
 
 | 
 
 | 
 
 | 
 
 | 
772
 | 
     my $rh_components = shift;  | 
| 
603
 | 
914
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1293
 | 
     return if !$rh_components->{country_code}; # do we know country?  | 
| 
604
 | 
905
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1382
 | 
     return if !$rh_components->{$keyname};     # do we know state/county?  | 
| 
605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
606
 | 
566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
815
 | 
     my $code = $keyname . '_code';  | 
| 
607
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
608
 | 
566
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
930
 | 
     if (defined($rh_components->{$code})) {    # do we already have code?  | 
| 
609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                                # but could have situation  | 
| 
610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                                # where code and long name are  | 
| 
611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                                # the same which we want to correct  | 
| 
612
 | 
26
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
68
 | 
         if ($rh_components->{$code} ne $rh_components->{$keyname}) {  | 
| 
613
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
39
 | 
             return;  | 
| 
614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
617
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # ensure country_code is uppercase as we use it as conf key  | 
| 
618
 | 
541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
690
 | 
     $rh_components->{country_code} = uc($rh_components->{country_code});  | 
| 
619
 | 
541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
532
 | 
     my $cc = $rh_components->{country_code};  | 
| 
620
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
621
 | 
541
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1354
 | 
     if (my $mapping = $self->{$code . 's'}{$cc}) {  | 
| 
622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
623
 | 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
354
 | 
         my $name    = $rh_components->{$keyname};  | 
| 
624
 | 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
404
 | 
         my $uc_name = uc($name);  | 
| 
625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
626
 | 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8359
 | 
     LOCCODE: foreach my $abbrv (keys %$mapping) {  | 
| 
627
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
628
 | 
6671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4976
 | 
             my @confnames; # can have multiple names for the place  | 
| 
629
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                            # for example in different languages  | 
| 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
631
 | 
6671
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
6941
 | 
             if (is_hashref($mapping->{$abbrv})) {  | 
| 
632
 | 
839
 | 
 
 | 
 
 | 
 
 | 
 
 | 
683
 | 
                 push(@confnames, values %{$mapping->{$abbrv}});  | 
| 
 
 | 
839
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1493
 | 
    | 
| 
633
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             } else {  | 
| 
634
 | 
5832
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6005
 | 
                 push(@confnames, $mapping->{$abbrv});  | 
| 
635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
637
 | 
6671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6012
 | 
             foreach my $confname (@confnames) {  | 
| 
638
 | 
7587
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
10084
 | 
                 if ($uc_name eq uc($confname)) {  | 
| 
639
 | 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
289
 | 
                     $rh_components->{$code} = $abbrv;  | 
| 
640
 | 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
342
 | 
                     last LOCCODE;  | 
| 
641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # perhaps instead of passing in a name, we passed in a code  | 
| 
643
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # example: state => 'NC'  | 
| 
644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # we want to turn that into  | 
| 
645
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     state => 'North Carolina'  | 
| 
646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     state_code => 'NC'  | 
| 
647
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #  | 
| 
648
 | 
7407
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
9859
 | 
                 if ($uc_name eq $abbrv) {  | 
| 
649
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
                     $rh_components->{$keyname} = $confname;  | 
| 
650
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
                     $rh_components->{$code}    = $abbrv;  | 
| 
651
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
                     last LOCCODE;  | 
| 
652
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
653
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
655
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # didn't find a valid code or name  | 
| 
656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # try again for odd variants like "United States Virgin Islands"  | 
| 
658
 | 
283
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
768
 | 
         if ($keyname eq 'state') {  | 
| 
659
 | 
254
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
492
 | 
             if (!defined($rh_components->{state_code})) {  | 
| 
660
 | 
81
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
144
 | 
                 if ($cc eq 'US') {  | 
| 
661
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
45
 | 
                     if ($rh_components->{state} =~ m/^united states/i) {  | 
| 
662
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         my $state = $rh_components->{state};  | 
| 
663
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         $state =~ s/^United States/US/i;  | 
| 
664
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         foreach my $k (keys %$mapping) {  | 
| 
665
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                             if (uc($state) eq uc($k)) {  | 
| 
666
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                                 $rh_components->{state_code} = $mapping->{$k};  | 
| 
667
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                                 last;  | 
| 
668
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             }  | 
| 
669
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         }  | 
| 
670
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
671
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
11
 | 
                     if ($rh_components->{state} =~ m/^washington,? d\.?c\.?/i) {  | 
| 
672
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                         $rh_components->{state_code} = 'DC';  | 
| 
673
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                         $rh_components->{state}      = 'District of Columbia';  | 
| 
674
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
                         $rh_components->{city}       = 'Washington';  | 
| 
675
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
676
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
678
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
679
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
680
 | 
541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
760
 | 
     return $rh_components->{$code};  | 
| 
681
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
683
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _apply_replacements {  | 
| 
684
 | 
457
 | 
 
 | 
 
 | 
  
457
  
 | 
 
 | 
1601
 | 
     my $self          = shift;  | 
| 
685
 | 
457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
388
 | 
     my $rh_components = shift;  | 
| 
686
 | 
457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
575
 | 
     my $raa_rules     = shift;  | 
| 
687
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
688
 | 
457
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
672
 | 
     if ($debug){  | 
| 
689
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR "in _apply_replacements";  | 
| 
690
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR Dumper $raa_rules;  | 
| 
691
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
692
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
693
 | 
457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
964
 | 
     foreach my $component (keys %$rh_components) {  | 
| 
694
 | 
3930
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12871
 | 
         foreach my $ra_fromto (@$raa_rules) {  | 
| 
695
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
696
 | 
8010
 | 
 
 | 
 
 | 
 
 | 
 
 | 
58255
 | 
             my $regexp;  | 
| 
697
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # do key specific replacement  | 
| 
698
 | 
8010
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
23021
 | 
             if ($ra_fromto->[0] =~ m/^$component=/){  | 
| 
699
 | 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
131
 | 
                 my $from = $ra_fromto->[0];  | 
| 
700
 | 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
316
 | 
                 $from =~ s/^$component=//;  | 
| 
701
 | 
82
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
164
 | 
                 if ($rh_components->{$component} eq $from){  | 
| 
702
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
                     $rh_components->{$component} = $ra_fromto->[1];  | 
| 
703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 } else {  | 
| 
704
 | 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
111
 | 
                     $regexp = $from;  | 
| 
705
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
706
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             } else {  | 
| 
707
 | 
7928
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8399
 | 
                 $regexp = $ra_fromto->[0];  | 
| 
708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
709
 | 
8010
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
10365
 | 
             if (defined($regexp)){  | 
| 
710
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 try {  | 
| 
711
 | 
8005
 | 
 
 | 
 
 | 
  
8005
  
 | 
 
 | 
165819
 | 
                     my $re = qr/$regexp/;  | 
| 
712
 | 
8004
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26689
 | 
                     $rh_components->{$component} =~ s/$re/$ra_fromto->[1]/;  | 
| 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 } catch {  | 
| 
714
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
22
 | 
                     warn "invalid replacement: " . join(', ', @$ra_fromto);  | 
| 
715
 | 
8005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25980
 | 
                 };  | 
| 
716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
717
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
719
 | 
457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1939
 | 
     return $rh_components;  | 
| 
720
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
721
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
722
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _abbreviate {  | 
| 
723
 | 
7
 | 
 
 | 
 
 | 
  
7
  
 | 
 
 | 
566
 | 
     my $self    = shift;  | 
| 
724
 | 
7
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
11
 | 
     my $rh_comp = shift // return;  | 
| 
725
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
726
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # do we know the country?  | 
| 
727
 | 
7
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     if (!defined($rh_comp->{country_code})) {  | 
| 
728
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
         if ($show_warnings){  | 
| 
729
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
             my $error_msg = 'no country_code, unable to abbreviate';  | 
| 
730
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
             if (defined($rh_comp->{country})) {  | 
| 
731
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                 $error_msg .= ' - country: ' . $rh_comp->{country};  | 
| 
732
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
733
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
             warn $error_msg  | 
| 
734
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
735
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
66
 | 
         return;  | 
| 
736
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
737
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
738
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # do we have abbreviations for this country?  | 
| 
739
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     my $cc = uc($rh_comp->{country_code});  | 
| 
740
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
741
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # 1. which languages?  | 
| 
742
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     if (defined($self->{country2lang}{$cc})) {  | 
| 
743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
744
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
         my @langs = split(/,/, $self->{country2lang}{$cc});  | 
| 
745
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
746
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
         foreach my $lang (@langs) {  | 
| 
747
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # do we have abbrv for this lang?  | 
| 
748
 | 
9
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
21
 | 
             if (defined($self->{abbreviations}->{$lang})) {  | 
| 
749
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
750
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
                 my $rh_abbr = $self->{abbreviations}->{$lang};  | 
| 
751
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
                 foreach my $comp_name (keys %$rh_abbr) {  | 
| 
752
 | 
10
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
18
 | 
                     next if (!defined($rh_comp->{$comp_name}));  | 
| 
753
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
                     foreach my $long (keys %{$rh_abbr->{$comp_name}}) {  | 
| 
 
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
    | 
| 
754
 | 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
78
 | 
                         my $short = $rh_abbr->{$comp_name}->{$long};  | 
| 
755
 | 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
357
 | 
                         $rh_comp->{$comp_name} =~ s/\b$long\b/$short/;  | 
| 
756
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
757
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
758
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             } else {  | 
| 
759
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #warn "no abbreviations defined for lang $lang";  | 
| 
760
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
761
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
762
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
763
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
764
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     return $rh_comp;  | 
| 
765
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
767
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # " abc,,def , ghi " => 'abc, def, ghi'  | 
| 
768
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _clean {  | 
| 
769
 | 
914
 | 
 
 | 
 
 | 
  
914
  
 | 
 
 | 
2768
 | 
     my $self = shift;  | 
| 
770
 | 
914
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
1357
 | 
     my $out  = shift // return;  | 
| 
771
 | 
913
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1118
 | 
     if ($debug){  | 
| 
772
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         say STDERR "entering _clean \n$out";  | 
| 
773
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
774
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
775
 | 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1366
 | 
     $out =~ s/\'\;/'/g;  | 
| 
776
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
777
 | 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4829
 | 
     $out =~ s/[\},\s]+$//;  | 
| 
778
 | 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1908
 | 
     $out =~ s/^[,\s]+//;  | 
| 
779
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
780
 | 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1069
 | 
     $out =~ s/^- //; # line starting with dash due to a parameter missing  | 
| 
781
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
782
 | 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1113
 | 
     $out =~ s/,\s*,/, /g;   # multiple commas to one  | 
| 
783
 | 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1338
 | 
     $out =~ s/\h+,\h+/, /g; # one horiz whitespace behind comma  | 
| 
784
 | 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1998
 | 
     $out =~ s/\h\h+/ /g;    # multiple horiz whitespace to one  | 
| 
785
 | 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1841
 | 
     $out =~ s/\h\n/\n/g;    # horiz whitespace, newline to newline  | 
| 
786
 | 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1129
 | 
     $out =~ s/\n,/\n/g;     # newline comma to just newline  | 
| 
787
 | 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1014
 | 
     $out =~ s/,,+/,/g;      # multiple commas to one  | 
| 
788
 | 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1103
 | 
     $out =~ s/,\n/\n/g;     # comma newline to just newline  | 
| 
789
 | 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1971
 | 
     $out =~ s/\n\h+/\n/g;   # newline plus space to newline  | 
| 
790
 | 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2031
 | 
     $out =~ s/\n\n+/\n/g;   # multiple newline to one  | 
| 
791
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
792
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # final dedupe across and within lines  | 
| 
793
 | 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2288
 | 
     my @before_pieces = split(/\n/, $out);  | 
| 
794
 | 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1072
 | 
     my %seen_lines;  | 
| 
795
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @after_pieces;  | 
| 
796
 | 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1134
 | 
     foreach my $line (@before_pieces) {  | 
| 
797
 | 
3529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4290
 | 
         $line =~ s/^\h+//g;  | 
| 
798
 | 
3529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4261
 | 
         $line =~ s/\h+$//g;  | 
| 
799
 | 
3529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5301
 | 
         $seen_lines{$line}++;  | 
| 
800
 | 
3529
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4874
 | 
         next if ($seen_lines{$line} > 1);  | 
| 
801
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # now dedupe within the line  | 
| 
802
 | 
3511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4994
 | 
         my @before_words = split(/,/, $line);  | 
| 
803
 | 
3511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3141
 | 
         my %seen_words;  | 
| 
804
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my @after_words;  | 
| 
805
 | 
3511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3389
 | 
         foreach my $w (@before_words) {  | 
| 
806
 | 
3718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4699
 | 
             $w =~ s/^\h+//g;  | 
| 
807
 | 
3718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4152
 | 
             $w =~ s/\h+$//g;  | 
| 
808
 | 
1
 | 
  
100
  
 | 
 
 | 
  
1
  
 | 
 
 | 
7
 | 
             if (lc($w) ne 'new york') {  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
 
 | 
3718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5974
 | 
    | 
| 
809
 | 
3708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25187
 | 
                 $seen_words{$w}++;  | 
| 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
811
 | 
3718
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
8902
 | 
             next if ((defined($seen_words{$w})) && ($seen_words{$w} > 1));  | 
| 
812
 | 
3716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5053
 | 
             push(@after_words, $w);  | 
| 
813
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
814
 | 
3511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4590
 | 
         $line = join(', ', @after_words);  | 
| 
815
 | 
3511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5733
 | 
         push(@after_pieces, $line);  | 
| 
816
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
817
 | 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1355
 | 
     $out = join("\n", @after_pieces);  | 
| 
818
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
819
 | 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1373
 | 
     $out =~ s/^\s+//; # remove leading whitespace  | 
| 
820
 | 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1969
 | 
     $out =~ s/\s+$//; # remove end whitespace  | 
| 
821
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
822
 | 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1130
 | 
     $out .= "\n";     # add final newline  | 
| 
823
 | 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2237
 | 
     return $out;      # we are done  | 
| 
824
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
825
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
826
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _render_template {  | 
| 
827
 | 
455
 | 
 
 | 
 
 | 
  
455
  
 | 
 
 | 
2017
 | 
     my $self       = shift;  | 
| 
828
 | 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
484
 | 
     my $thtemplate = shift;  | 
| 
829
 | 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
409
 | 
     my $components = shift;  | 
| 
830
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Mustache calls it context  | 
| 
832
 | 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6884
 | 
     my $context = clone($components);  | 
| 
833
 | 
455
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
893
 | 
     say STDERR 'context: ' . Dumper $context if ($debug);  | 
| 
834
 | 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1051
 | 
     my $output = $thtemplate->render($context);  | 
| 
835
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
836
 | 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
370415
 | 
     $output = $self->_evaluate_template_lamdas($output);  | 
| 
837
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
838
 | 
455
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
839
 | 
     say STDERR "in _render before _clean: $output" if ($debug);  | 
| 
839
 | 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
743
 | 
     $output = $self->_clean($output);  | 
| 
840
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
841
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # is it empty?  | 
| 
842
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # if yes and there is only one component then just use that one  | 
| 
843
 | 
455
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1195
 | 
     if ($output !~ m/\w/) {  | 
| 
844
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
         my @comps = sort keys %$components;  | 
| 
845
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
         if (scalar(@comps) == 1) {  | 
| 
846
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             foreach my $k (@comps) {  | 
| 
847
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $output = $components->{$k};  | 
| 
848
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
849
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } # FIXME what if more than one?  | 
| 
850
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
851
 | 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1436
 | 
     return $output;  | 
| 
852
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
853
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
854
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Text::Hogan apparently caches lambdas when rendering templates. In the past  | 
| 
855
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # we needed our lambda 'first', example  | 
| 
856
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #   {{#first}} {{{city}}} || {{{town}}} {{/first}}  | 
| 
857
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # to evaluate the componentes. Whenever the lambda was called with different  | 
| 
858
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # component values it consumed memory. Now replace with a simpler implementation  | 
| 
859
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
860
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _replace_template_lambdas {  | 
| 
861
 | 
455
 | 
 
 | 
 
 | 
  
455
  
 | 
 
 | 
991
 | 
     my $self          = shift;  | 
| 
862
 | 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
592
 | 
     my $template_text = shift;  | 
| 
863
 | 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6076
 | 
     $template_text =~ s!\Q{{#first}}\E(.+?)\Q{{/first}}\E!FIRSTSTART${1}FIRSTEND!g;  | 
| 
864
 | 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1085
 | 
     return $template_text;  | 
| 
865
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
866
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
867
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # We only use a lambda named 'first'  | 
| 
868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _evaluate_template_lamdas {  | 
| 
869
 | 
455
 | 
 
 | 
 
 | 
  
455
  
 | 
 
 | 
540
 | 
     my $self = shift;  | 
| 
870
 | 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
486
 | 
     my $text = shift;  | 
| 
871
 | 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3643
 | 
     $text =~ s!FIRSTSTART\s*(.+?)\s*FIRSTEND!_select_first($1)!seg;  | 
| 
 
 | 
783
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1227
 | 
    | 
| 
872
 | 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
955
 | 
     return $text;  | 
| 
873
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
874
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
875
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # '|| val1 ||  || val3' => 'val1'  | 
| 
876
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _select_first {  | 
| 
877
 | 
783
 | 
 
 | 
 
 | 
  
783
  
 | 
 
 | 
1096
 | 
     my $text = shift;  | 
| 
878
 | 
783
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3762
 | 
     my @a_parts = grep { length($_) } split(/\s*\|\|\s*/, $text);  | 
| 
 
 | 
2591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3377
 | 
    | 
| 
879
 | 
783
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3795
 | 
     return scalar(@a_parts) ? $a_parts[0] : '';  | 
| 
880
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
881
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
882
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my %small_district = (  | 
| 
883
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'br' => 1,  | 
| 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'cr' => 1,  | 
| 
885
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'es' => 1,  | 
| 
886
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'ni' => 1,  | 
| 
887
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'py' => 1,  | 
| 
888
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'ro' => 1,  | 
| 
889
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'tg' => 1,  | 
| 
890
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'tm' => 1,  | 
| 
891
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'xk' => 1,  | 
| 
892
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 );  | 
| 
893
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
894
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # correct the alias for "district"  | 
| 
895
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # in OSM some countries use district to mean "city_district"  | 
| 
896
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # others to mean "state_district"  | 
| 
897
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _set_district_alias {  | 
| 
898
 | 
450
 | 
 
 | 
 
 | 
  
450
  
 | 
 
 | 
456
 | 
     my $self = shift;  | 
| 
899
 | 
450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
431
 | 
     my $cc = shift;  | 
| 
900
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
901
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # this may get called repeatedly  | 
| 
902
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # no need to do the work again  | 
| 
903
 | 
450
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
764
 | 
     if (defined($cc)){  | 
| 
904
 | 
450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
493
 | 
         my $ucc = uc($cc);  | 
| 
905
 | 
450
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
981
 | 
         return if (defined($self->{set_district_alias}{$ucc}));  | 
| 
906
 | 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
416
 | 
         $self->{set_district_alias}{$ucc} = 1;  | 
| 
907
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
908
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
909
 | 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
217
 | 
     my $oldalias;  | 
| 
910
 | 
224
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
425
 | 
     if (defined($small_district{$cc})){  | 
| 
911
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $self->{component2type}{district} = 'neighbourhood';  | 
| 
912
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $oldalias = 'state_district';  | 
| 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
914
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # add to the neighbourhood alias list  | 
| 
915
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # though of course we are just sticking it at the end  | 
| 
916
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         push(@{$self->{component_aliases}{'neighbourhood'}}, 'district');  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
917
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
918
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
919
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # set 'district' to be type 'state_district'  | 
| 
920
 | 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
362
 | 
         $self->{component2type}{district} = 'state_district';  | 
| 
921
 | 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
232
 | 
         $oldalias = 'neighbourhood';          | 
| 
922
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
923
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # add to the state_district alias list  | 
| 
924
 | 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
236
 | 
         push(@{$self->{component_aliases}{'state_district'}}, 'district');  | 
| 
 
 | 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
534
 | 
    | 
| 
925
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }   | 
| 
926
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
927
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # remove from the old alias list  | 
| 
928
 | 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
255
 | 
     my @temp = grep { $_ ne 'district' } @{$self->{component_aliases}{$oldalias}};  | 
| 
 
 | 
2862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3507
 | 
    | 
| 
 
 | 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
476
 | 
    | 
| 
929
 | 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
545
 | 
     $self->{component_aliases}{$oldalias} = \@temp;  | 
| 
930
 | 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
371
 | 
     return;  | 
| 
931
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }    | 
| 
932
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
933
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
934
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # returns []  | 
| 
935
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _find_unknown_components {  | 
| 
936
 | 
452
 | 
 
 | 
 
 | 
  
452
  
 | 
 
 | 
949
 | 
     my $self       = shift;  | 
| 
937
 | 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
366
 | 
     my $rh_components = shift;  | 
| 
938
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
939
 | 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
368
 | 
     my %h_known   = map  { $_ => 1 } @{$self->{ordered_components}};  | 
| 
 
 | 
29861
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34624
 | 
    | 
| 
 
 | 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
790
 | 
    | 
| 
940
 | 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3336
 | 
     my @a_unknown = grep { !exists($h_known{$_}) } sort keys %$rh_components;  | 
| 
 
 | 
4079
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5089
 | 
    | 
| 
941
 | 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2241
 | 
     return \@a_unknown;  | 
| 
942
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
943
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
944
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
945
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
946
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  | 
| 
947
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
948
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =pod  | 
| 
949
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
950
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =encoding UTF-8  | 
| 
951
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
952
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
953
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
954
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Geo::Address::Formatter - take structured address data and format it according to the various global/country rules  | 
| 
955
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
956
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 VERSION  | 
| 
957
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
958
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 version 1.9982  | 
| 
959
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
960
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
961
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
962
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #  | 
| 
963
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # get the templates (or use your own)  | 
| 
964
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # git clone git@github.com:OpenCageData/address-formatting.git  | 
| 
965
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #  | 
| 
966
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $GAF = Geo::Address::Formatter->new( conf_path => '/path/to/templates' );  | 
| 
967
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
968
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $components = { ... }  | 
| 
969
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $text = $GAF->format_address($components, { country => 'FR' } );  | 
| 
970
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $rh_final_components = $GAF->final_components();  | 
| 
971
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #  | 
| 
972
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # or if we want shorter output  | 
| 
973
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   #   | 
| 
974
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $short_text = $GAF->format_address($components, { country => 'FR', abbreviate => 1, });  | 
| 
975
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
976
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 new  | 
| 
977
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
978
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $GAF = Geo::Address::Formatter->new( conf_path => '/path/to/templates' );  | 
| 
979
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
980
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns one instance. The I<conf_path> is required.  | 
| 
981
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
982
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Optional parameters are:  | 
| 
983
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
984
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 I<debug>: prints tons of debugging info for use in development.  | 
| 
985
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
986
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 I<no_warnings>: turns off a few warnings if configuration is not optimal.  | 
| 
987
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
988
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 I<only_address>: formatted will only contain known components (will not include POI names). Note, can be overridden with optional param to format_address method.  | 
| 
989
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
990
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 final_components  | 
| 
991
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
992
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $rh_components = $GAF->final_components();  | 
| 
993
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
994
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 returns a reference to a hash of the final components that are set at the  | 
| 
995
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 completion of B<format_address>. Warns if called before they have been set   | 
| 
996
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (unless I<no_warnings> was set at object creation).  | 
| 
997
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
998
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 format_address  | 
| 
999
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1000
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $text = $GAF->format_address(\%components, \%options );  | 
| 
1001
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1002
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Given a structures address (hashref) and options (hashref) returns a  | 
| 
1003
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 formatted address.  | 
| 
1004
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Possible options are:  | 
| 
1006
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1007
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'abbreviate', if supplied common abbreviations are applied  | 
| 
1008
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     to the resulting output.  | 
| 
1009
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1010
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'address_template', a mustache format template to be used instead of the template  | 
| 
1011
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     defined in the configuration  | 
| 
1012
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1013
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'country', which should be an uppercase ISO 3166-1:alpha-2 code  | 
| 
1014
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     e.g. 'GB' for Great Britain, 'DE' for Germany, etc.  | 
| 
1015
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     If ommited we try to find the country in the address components.  | 
| 
1016
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1017
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'only_address', same as only_address global option but set at formatting level  | 
| 
1018
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1019
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
1020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1021
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 You have a structured postal address (hash) and need to convert it into a  | 
| 
1022
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 readable address based on the format of the address country.  | 
| 
1023
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1024
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 For example, you have:  | 
| 
1025
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1026
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   {  | 
| 
1027
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     house_number => 12,  | 
| 
1028
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     street => 'Avenue Road',  | 
| 
1029
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     postcode => 45678,  | 
| 
1030
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     city => 'Deville'  | 
| 
1031
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
1032
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1033
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 you need:  | 
| 
1034
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1035
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Great Britain: 12 Avenue Road, Deville 45678  | 
| 
1036
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   France: 12 Avenue Road, 45678 Deville  | 
| 
1037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Germany: Avenue Road 12, 45678 Deville  | 
| 
1038
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Latvia: Avenue Road 12, Deville, 45678  | 
| 
1039
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1040
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 It gets more complicated with 200+ countries and territories and dozens more  | 
| 
1041
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 address components to consider.  | 
| 
1042
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1043
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This module comes with a minimal configuration to run tests. Instead of  | 
| 
1044
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 developing your own configuration please use (and contribute to)  | 
| 
1045
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 those in https://github.com/OpenCageData/address-formatting  | 
| 
1046
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 which includes test cases.  | 
| 
1047
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1048
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Together we can address the world!  | 
| 
1049
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1050
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 AUTHOR  | 
| 
1051
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1052
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Ed Freyfogle  | 
| 
1053
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1054
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 COPYRIGHT AND LICENSE  | 
| 
1055
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1056
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This software is copyright (c) 2023 by Opencage GmbH.  | 
| 
1057
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1058
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is free software; you can redistribute it and/or modify it under  | 
| 
1059
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the same terms as the Perl 5 programming language system itself.  | 
| 
1060
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1061
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  |