| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Geo::Address::Formatter; | 
| 2 |  |  |  |  |  |  | $Geo::Address::Formatter::VERSION = '1.9982'; | 
| 3 |  |  |  |  |  |  | # ABSTRACT: take structured address data and format it according to the various global/country rules | 
| 4 |  |  |  |  |  |  |  | 
| 5 | 6 |  |  | 6 |  | 26179 | use strict; | 
|  | 6 |  |  |  |  | 11 |  | 
|  | 6 |  |  |  |  | 162 |  | 
| 6 | 6 |  |  | 6 |  | 23 | use warnings; | 
|  | 6 |  |  |  |  | 8 |  | 
|  | 6 |  |  |  |  | 136 |  | 
| 7 | 6 |  |  | 6 |  | 22 | use feature qw(say); | 
|  | 6 |  |  |  |  | 7 |  | 
|  | 6 |  |  |  |  | 467 |  | 
| 8 | 6 |  |  | 6 |  | 1744 | use Clone qw(clone); | 
|  | 6 |  |  |  |  | 9615 |  | 
|  | 6 |  |  |  |  | 281 |  | 
| 9 | 6 |  |  | 6 |  | 973 | use Data::Dumper; | 
|  | 6 |  |  |  |  | 10885 |  | 
|  | 6 |  |  |  |  | 299 |  | 
| 10 |  |  |  |  |  |  | $Data::Dumper::Sortkeys = 1; | 
| 11 | 6 |  |  | 6 |  | 33 | use File::Basename qw(dirname); | 
|  | 6 |  |  |  |  | 8 |  | 
|  | 6 |  |  |  |  | 254 |  | 
| 12 | 6 |  |  | 6 |  | 1890 | use Ref::Util qw(is_hashref); | 
|  | 6 |  |  |  |  | 6413 |  | 
|  | 6 |  |  |  |  | 343 |  | 
| 13 | 6 |  |  | 6 |  | 34 | use Scalar::Util qw(looks_like_number); | 
|  | 6 |  |  |  |  | 10 |  | 
|  | 6 |  |  |  |  | 244 |  | 
| 14 | 6 |  |  | 6 |  | 2087 | use Text::Hogan::Compiler; | 
|  | 6 |  |  |  |  | 32633 |  | 
|  | 6 |  |  |  |  | 203 |  | 
| 15 | 6 |  |  | 6 |  | 1887 | use Try::Catch; | 
|  | 6 |  |  |  |  | 3455 |  | 
|  | 6 |  |  |  |  | 310 |  | 
| 16 | 6 |  |  | 6 |  | 1313 | use YAML::XS qw(LoadFile); | 
|  | 6 |  |  |  |  | 8993 |  | 
|  | 6 |  |  |  |  | 243 |  | 
| 17 | 6 |  |  | 6 |  | 30 | use utf8; | 
|  | 6 |  |  |  |  | 20 |  | 
|  | 6 |  |  |  |  | 80 |  | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | my $THC = Text::Hogan::Compiler->new; | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | # optional params | 
| 22 |  |  |  |  |  |  | my $show_warnings = 1; | 
| 23 |  |  |  |  |  |  | my $debug         = 0; | 
| 24 |  |  |  |  |  |  | my $only_address  = 0; | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  | sub new { | 
| 28 | 17 |  |  | 17 | 1 | 16799 | my ($class, %params) = @_; | 
| 29 |  |  |  |  |  |  |  | 
| 30 | 17 |  |  |  |  | 36 | my $self      = {}; | 
| 31 | 17 |  | 50 |  |  | 60 | my $conf_path = $params{conf_path} || die "no conf_path set"; | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | # optional params | 
| 34 | 17 | 50 | 66 |  |  | 51 | if ( defined($params{no_warnings}) && ($params{no_warnings})){ | 
| 35 | 1 |  |  |  |  | 2 | $show_warnings  = 0; | 
| 36 |  |  |  |  |  |  | } | 
| 37 | 17 |  | 66 |  |  | 67 | $only_address  = (defined($params{only_address}) && $params{only_address}) // 0; | 
|  |  |  | 50 |  |  |  |  | 
| 38 | 17 |  | 66 |  |  | 64 | $debug         = (defined($params{debug})        && $params{debug})        // 0; | 
|  |  |  | 50 |  |  |  |  | 
| 39 |  |  |  |  |  |  |  | 
| 40 | 17 |  |  |  |  | 31 | $self->{final_components} = undef; | 
| 41 | 17 |  |  |  |  | 28 | $self->{set_district_alias} = {}; | 
| 42 |  |  |  |  |  |  |  | 
| 43 | 17 |  |  |  |  | 25 | bless($self, $class); | 
| 44 |  |  |  |  |  |  |  | 
| 45 | 17 | 50 |  |  |  | 47 | say STDERR "************* in Geo::Address::Formatter::new ***" if ($debug); | 
| 46 |  |  |  |  |  |  |  | 
| 47 | 17 | 100 |  |  |  | 43 | if ($self->_read_configuration($conf_path)){ | 
| 48 | 16 |  |  |  |  | 83 | return $self; | 
| 49 |  |  |  |  |  |  | } | 
| 50 | 1 |  |  |  |  | 18 | die 'unable to read configuration'; | 
| 51 |  |  |  |  |  |  | } | 
| 52 |  |  |  |  |  |  |  | 
| 53 |  |  |  |  |  |  | sub _read_configuration { | 
| 54 | 17 |  |  | 17 |  | 25 | my $self = shift; | 
| 55 | 17 |  |  |  |  | 21 | my $path = shift; | 
| 56 |  |  |  |  |  |  |  | 
| 57 | 17 | 50 |  |  |  | 349 | return if (! -e $path); | 
| 58 |  |  |  |  |  |  | # components file | 
| 59 | 17 |  |  |  |  | 63 | my $compyaml = $path . '/components.yaml'; | 
| 60 | 17 | 100 |  |  |  | 247 | return if (! -e $compyaml); | 
| 61 |  |  |  |  |  |  |  | 
| 62 | 16 |  |  |  |  | 78 | $self->{templates}          = {}; | 
| 63 | 16 |  |  |  |  | 28 | $self->{component_aliases}  = {}; | 
| 64 | 16 |  |  |  |  | 34 | $self->{component2type}     = {}; | 
| 65 | 16 |  |  |  |  | 31 | $self->{ordered_components} = []; | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | # read the config file(s) | 
| 68 | 16 |  |  |  |  | 23 | my $loaded = 0; | 
| 69 |  |  |  |  |  |  |  | 
| 70 | 16 |  |  |  |  | 36 | my $wwfile = $path . '/countries/worldwide.yaml'; | 
| 71 | 16 | 50 |  |  |  | 31 | say STDERR "loading templates $wwfile" if ($debug); | 
| 72 |  |  |  |  |  |  |  | 
| 73 | 16 | 50 |  |  |  | 197 | return if (! -e $wwfile); | 
| 74 |  |  |  |  |  |  |  | 
| 75 |  |  |  |  |  |  | try { | 
| 76 | 16 |  |  | 16 |  | 344 | my $rh_templates = LoadFile($wwfile); | 
| 77 | 16 |  |  |  |  | 28396 | foreach (keys %$rh_templates) { | 
| 78 | 2950 |  |  |  |  | 3120 | $self->{templates}{$_} = $rh_templates->{$_}; | 
| 79 | 2950 |  |  |  |  | 2682 | $loaded = 1; | 
| 80 |  |  |  |  |  |  | } | 
| 81 |  |  |  |  |  |  | } catch { | 
| 82 | 0 |  |  | 0 |  | 0 | warn "error parsing country configuration in $wwfile"; | 
| 83 | 0 |  |  |  |  | 0 | return; | 
| 84 | 16 |  |  |  |  | 173 | }; | 
| 85 | 16 | 50 |  |  |  | 337 | return if ($loaded == 0);  # no templates | 
| 86 |  |  |  |  |  |  |  | 
| 87 |  |  |  |  |  |  | # see if we can load the components | 
| 88 |  |  |  |  |  |  | try { | 
| 89 | 16 | 50 |  | 16 |  | 307 | say STDERR "loading components" if ($debug); | 
| 90 | 16 |  |  |  |  | 44 | my @c = LoadFile($compyaml); | 
| 91 |  |  |  |  |  |  |  | 
| 92 | 16 | 50 |  |  |  | 3006 | if ($debug){ | 
| 93 | 0 |  |  |  |  | 0 | say STDERR Dumper \@c; | 
| 94 |  |  |  |  |  |  | } | 
| 95 |  |  |  |  |  |  |  | 
| 96 | 16 |  |  |  |  | 41 | foreach my $rh_c (@c) { | 
| 97 | 244 | 50 |  |  |  | 306 | if (defined($rh_c->{name})){ | 
| 98 | 244 | 100 |  |  |  | 262 | if (defined($rh_c->{aliases})){ | 
| 99 | 148 |  |  |  |  | 251 | $self->{component_aliases}{$rh_c->{name}} = $rh_c->{aliases}; | 
| 100 |  |  |  |  |  |  | } else { | 
| 101 | 96 |  |  |  |  | 209 | $self->{component_aliases}{$rh_c->{name}} = []; | 
| 102 |  |  |  |  |  |  | } | 
| 103 |  |  |  |  |  |  | } | 
| 104 |  |  |  |  |  |  | } | 
| 105 |  |  |  |  |  |  |  | 
| 106 | 16 |  |  |  |  | 28 | foreach my $rh_c (@c) { | 
| 107 | 244 |  |  |  |  | 197 | push(@{$self->{ordered_components}}, $rh_c->{name}); | 
|  | 244 |  |  |  |  | 330 |  | 
| 108 | 244 |  |  |  |  | 340 | $self->{component2type}->{$rh_c->{name}} = $rh_c->{name}; | 
| 109 |  |  |  |  |  |  |  | 
| 110 | 244 | 100 |  |  |  | 354 | if (defined($rh_c->{aliases})) { | 
| 111 | 148 |  |  |  |  | 124 | foreach my $alias (@{$rh_c->{aliases}}) { | 
|  | 148 |  |  |  |  | 166 |  | 
| 112 | 510 |  |  |  |  | 393 | push(@{$self->{ordered_components}}, $alias); | 
|  | 510 |  |  |  |  | 528 |  | 
| 113 | 510 |  |  |  |  | 774 | $self->{component2type}->{$alias} = $rh_c->{name}; | 
| 114 |  |  |  |  |  |  | } | 
| 115 |  |  |  |  |  |  | } | 
| 116 |  |  |  |  |  |  | } | 
| 117 | 16 | 50 |  |  |  | 103 | if ($debug){ | 
| 118 | 0 |  |  |  |  | 0 | say STDERR 'component_aliases'; | 
| 119 | 0 |  |  |  |  | 0 | say STDERR Dumper $self->{component_aliases}; | 
| 120 | 0 |  |  |  |  | 0 | say STDERR 'ordered_components'; | 
| 121 | 0 |  |  |  |  | 0 | say STDERR Dumper $self->{ordered_components}; | 
| 122 | 0 |  |  |  |  | 0 | say STDERR 'component2type'; | 
| 123 | 0 |  |  |  |  | 0 | say STDERR Dumper $self->{component2type}; | 
| 124 |  |  |  |  |  |  | } | 
| 125 |  |  |  |  |  |  | } catch { | 
| 126 | 0 |  |  | 0 |  | 0 | warn "error parsing component configuration: $_"; | 
| 127 | 16 |  |  |  |  | 109 | }; | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | # get the county and state codes and country2lang conf | 
| 130 | 16 |  |  |  |  | 300 | my @conf_files = qw(county_codes state_codes country2lang); | 
| 131 | 16 |  |  |  |  | 29 | foreach my $cfile (@conf_files) { | 
| 132 | 48 |  |  |  |  | 74946 | $self->{$cfile} = {}; | 
| 133 | 48 |  |  |  |  | 122 | my $yfile = $path . '/' . $cfile . '.yaml'; | 
| 134 | 48 | 100 |  |  |  | 757 | if (-e $yfile) { | 
| 135 |  |  |  |  |  |  | try { | 
| 136 | 42 |  |  | 42 |  | 866 | $self->{$cfile} = LoadFile($yfile); | 
| 137 |  |  |  |  |  |  | } catch { | 
| 138 | 0 |  |  | 0 |  | 0 | warn "error parsing $cfile configuration: $_"; | 
| 139 | 42 |  |  |  |  | 384 | }; | 
| 140 |  |  |  |  |  |  | } | 
| 141 |  |  |  |  |  |  | } | 
| 142 |  |  |  |  |  |  |  | 
| 143 | 16 |  |  |  |  | 5555 | my $abbrvdir = $path . '/abbreviations'; | 
| 144 |  |  |  |  |  |  |  | 
| 145 | 16 | 100 |  |  |  | 227 | if (-d $abbrvdir){ | 
| 146 | 12 | 50 |  |  |  | 416 | opendir my $dh, $abbrvdir | 
| 147 |  |  |  |  |  |  | or die "Could not open '$abbrvdir' for read: $!\n"; | 
| 148 |  |  |  |  |  |  |  | 
| 149 | 12 |  |  |  |  | 329 | while (my $file = readdir $dh) { | 
| 150 |  |  |  |  |  |  | # say STDERR "file: $file"; | 
| 151 | 260 | 100 |  |  |  | 30296 | if ($file =~ m/^(\w\w)\.yaml$/) { | 
| 152 | 236 |  |  |  |  | 499 | my $lang = $1; # two letter lang code like 'en' | 
| 153 | 236 |  |  |  |  | 414 | my $abbrvfile = $abbrvdir . '/' . $file; | 
| 154 |  |  |  |  |  |  | try { | 
| 155 | 236 |  |  | 236 |  | 4838 | $self->{abbreviations}->{$lang} = LoadFile($abbrvfile); | 
| 156 |  |  |  |  |  |  | } catch { | 
| 157 | 0 |  |  | 0 |  | 0 | warn "error parsing abbrv conf in $abbrvfile: $_"; | 
| 158 | 236 |  |  |  |  | 1035 | }; | 
| 159 |  |  |  |  |  |  | } | 
| 160 |  |  |  |  |  |  | } | 
| 161 | 12 |  |  |  |  | 1618 | closedir $dh; | 
| 162 |  |  |  |  |  |  | } | 
| 163 |  |  |  |  |  |  |  | 
| 164 |  |  |  |  |  |  | #say Dumper $self->{abbreviations}; | 
| 165 |  |  |  |  |  |  | #say Dumper $self->{country2lang}; | 
| 166 | 16 |  |  |  |  | 97 | return 1; | 
| 167 |  |  |  |  |  |  | } | 
| 168 |  |  |  |  |  |  |  | 
| 169 |  |  |  |  |  |  |  | 
| 170 |  |  |  |  |  |  | sub final_components { | 
| 171 | 6 |  |  | 6 | 1 | 1390 | my $self = shift; | 
| 172 | 6 | 100 |  |  |  | 15 | if (defined($self->{final_components})) { | 
| 173 | 5 |  |  |  |  | 13 | return $self->{final_components}; | 
| 174 |  |  |  |  |  |  | } | 
| 175 | 1 | 50 |  |  |  | 15 | warn 'final_components not yet set' if ($show_warnings); | 
| 176 | 1 |  |  |  |  | 64 | return; | 
| 177 |  |  |  |  |  |  | } | 
| 178 |  |  |  |  |  |  |  | 
| 179 |  |  |  |  |  |  |  | 
| 180 |  |  |  |  |  |  | sub format_address { | 
| 181 | 455 |  |  | 455 | 1 | 359744 | my $self          = shift; | 
| 182 | 455 |  | 100 |  |  | 7077 | my $rh_components = clone(shift) || return; | 
| 183 | 454 |  | 100 |  |  | 1674 | my $rh_options    = shift        || {}; | 
| 184 |  |  |  |  |  |  |  | 
| 185 |  |  |  |  |  |  | # 1. make sure empty at the beginning | 
| 186 | 454 |  |  |  |  | 1398 | $self->{final_components} = undef; | 
| 187 |  |  |  |  |  |  |  | 
| 188 | 454 | 50 |  |  |  | 788 | if ($debug){ | 
| 189 | 0 |  |  |  |  | 0 | say STDERR "*** in format_address ***"; | 
| 190 | 0 |  |  |  |  | 0 | say STDERR Dumper $rh_options; | 
| 191 | 0 |  |  |  |  | 0 | say STDERR Dumper $rh_components; | 
| 192 |  |  |  |  |  |  | } | 
| 193 |  |  |  |  |  |  |  | 
| 194 |  |  |  |  |  |  | # 2. deal with the options | 
| 195 |  |  |  |  |  |  |  | 
| 196 |  |  |  |  |  |  | # 2a. which country format will we use? | 
| 197 |  |  |  |  |  |  | #     might have been specified in options | 
| 198 |  |  |  |  |  |  | #     otherwise look at components | 
| 199 |  |  |  |  |  |  | my $cc = $rh_options->{country} | 
| 200 | 454 |  | 100 |  |  | 1227 | || $self->_determine_country_code($rh_components) | 
| 201 |  |  |  |  |  |  | || ''; | 
| 202 |  |  |  |  |  |  |  | 
| 203 | 454 | 100 |  |  |  | 671 | if ($cc) { | 
| 204 | 450 |  |  |  |  | 482 | $rh_components->{country_code} = $cc; | 
| 205 | 450 |  |  |  |  | 798 | $self->_set_district_alias($cc); | 
| 206 |  |  |  |  |  |  | } | 
| 207 |  |  |  |  |  |  |  | 
| 208 |  |  |  |  |  |  | # 2b. should we abbreviate? | 
| 209 | 454 |  | 100 |  |  | 1125 | my $abbrv = $rh_options->{abbreviate} // 0; | 
| 210 |  |  |  |  |  |  |  | 
| 211 |  |  |  |  |  |  | # 2c. was only_address set at the formatting level | 
| 212 | 454 |  |  |  |  | 488 | my $oa = $only_address; | 
| 213 | 454 | 100 |  |  |  | 622 | if (defined($rh_options->{only_address})){ | 
| 214 | 2 |  |  |  |  | 4 | $oa = $rh_options->{only_address}; | 
| 215 |  |  |  |  |  |  | } | 
| 216 |  |  |  |  |  |  |  | 
| 217 | 454 | 50 |  |  |  | 638 | if ($debug){ | 
| 218 | 0 |  |  |  |  | 0 | say STDERR "component_aliases"; | 
| 219 | 0 |  |  |  |  | 0 | say STDERR Dumper $self->{component_aliases}; | 
| 220 |  |  |  |  |  |  | } | 
| 221 |  |  |  |  |  |  |  | 
| 222 |  |  |  |  |  |  | # done with the options | 
| 223 |  |  |  |  |  |  |  | 
| 224 |  |  |  |  |  |  | # 3. set the aliases, unless this would overwrite something | 
| 225 |  |  |  |  |  |  | # need to do this in the right order (as defined in the components file) | 
| 226 |  |  |  |  |  |  | # For example: | 
| 227 |  |  |  |  |  |  | # both 'city_district' and 'suburb' are aliases of 'neighbourhood' | 
| 228 |  |  |  |  |  |  | # so which one should we use if both are present? | 
| 229 |  |  |  |  |  |  | # We should use the one defined first in the list | 
| 230 |  |  |  |  |  |  |  | 
| 231 | 454 |  |  |  |  | 369 | my $rhh_p2a; | 
| 232 | 454 |  |  |  |  | 1343 | foreach my $c (keys %$rh_components){ | 
| 233 |  |  |  |  |  |  |  | 
| 234 |  |  |  |  |  |  | # might not need an alias as it is a primary type | 
| 235 | 3574 | 100 |  |  |  | 4862 | next if (defined($self->{component_aliases}{$c})); | 
| 236 |  |  |  |  |  |  |  | 
| 237 |  |  |  |  |  |  | # it is not a primary type | 
| 238 |  |  |  |  |  |  | # is there an alias? | 
| 239 | 734 | 100 |  |  |  | 1244 | if (defined($self->{component2type}{$c})){ | 
| 240 | 500 |  |  |  |  | 568 | my $ptype = $self->{component2type}{$c}; | 
| 241 |  |  |  |  |  |  | # but is it already set? | 
| 242 | 500 | 100 |  |  |  | 796 | if (! defined($rh_components->{$ptype}) ){ | 
| 243 |  |  |  |  |  |  | # no, we will set it later | 
| 244 | 396 |  |  |  |  | 808 | $rhh_p2a->{$ptype}{$c} = 1; | 
| 245 |  |  |  |  |  |  |  | 
| 246 |  |  |  |  |  |  | } | 
| 247 |  |  |  |  |  |  | } | 
| 248 |  |  |  |  |  |  | } | 
| 249 |  |  |  |  |  |  |  | 
| 250 |  |  |  |  |  |  | # now we know which primary types have aliases | 
| 251 | 454 |  |  |  |  | 1036 | foreach my $ptype (keys %$rhh_p2a){ | 
| 252 |  |  |  |  |  |  | # is there more than one? | 
| 253 | 357 |  |  |  |  | 365 | my @aliases = keys %{$rhh_p2a->{$ptype}}; | 
|  | 357 |  |  |  |  | 742 |  | 
| 254 | 357 | 100 |  |  |  | 610 | if (scalar @aliases == 1){ | 
| 255 | 319 |  |  |  |  | 515 | $rh_components->{$ptype} = $rh_components->{$aliases[0]}; | 
| 256 | 319 |  |  |  |  | 486 | next;  # we are done with this ptype | 
| 257 |  |  |  |  |  |  | } | 
| 258 |  |  |  |  |  |  |  | 
| 259 |  |  |  |  |  |  | # if there is more than one we need to go through the list | 
| 260 |  |  |  |  |  |  | # so we do them in the right order | 
| 261 | 38 |  |  |  |  | 56 | foreach my $c (@{$self->{component_aliases}->{$ptype}}){ | 
|  | 38 |  |  |  |  | 79 |  | 
| 262 | 44 | 100 |  |  |  | 95 | if (defined($rh_components->{$c})){ | 
| 263 | 38 |  |  |  |  | 68 | $rh_components->{$ptype} = $rh_components->{$c}; | 
| 264 | 38 |  |  |  |  | 81 | last; # we are done with this ptype | 
| 265 |  |  |  |  |  |  | } | 
| 266 |  |  |  |  |  |  | } | 
| 267 |  |  |  |  |  |  | } | 
| 268 |  |  |  |  |  |  |  | 
| 269 | 454 | 50 |  |  |  | 675 | if ($debug){ | 
| 270 | 0 |  |  |  |  | 0 | say STDERR "after component_aliases applied"; | 
| 271 | 0 |  |  |  |  | 0 | say STDERR Dumper $rh_components; | 
| 272 |  |  |  |  |  |  | } | 
| 273 |  |  |  |  |  |  |  | 
| 274 |  |  |  |  |  |  | # 4. deal wtih terrible inputs | 
| 275 | 454 |  |  |  |  | 1002 | $self->_sanity_cleaning($rh_components); | 
| 276 | 454 | 50 |  |  |  | 574 | if ($debug){ | 
| 277 | 0 |  |  |  |  | 0 | say STDERR "after sanity_cleaning applied"; | 
| 278 | 0 |  |  |  |  | 0 | say STDERR Dumper $rh_components; | 
| 279 |  |  |  |  |  |  | } | 
| 280 |  |  |  |  |  |  |  | 
| 281 |  |  |  |  |  |  | # 5. determine the template | 
| 282 | 454 |  |  |  |  | 475 | my $template_text; | 
| 283 | 454 |  | 66 |  |  | 1013 | my $rh_config = $self->{templates}{uc($cc)} || $self->{templates}{default}; | 
| 284 |  |  |  |  |  |  |  | 
| 285 | 454 | 100 |  |  |  | 634 | if (defined($rh_options->{address_template})) { | 
| 286 | 2 |  |  |  |  | 4 | $template_text = $rh_options->{address_template}; | 
| 287 |  |  |  |  |  |  | } | 
| 288 |  |  |  |  |  |  | else { | 
| 289 |  |  |  |  |  |  |  | 
| 290 | 452 | 100 |  |  |  | 651 | if (defined($rh_config->{address_template})) { | 
|  |  | 50 |  |  |  |  |  | 
| 291 | 450 |  |  |  |  | 629 | $template_text = $rh_config->{address_template}; | 
| 292 |  |  |  |  |  |  | } elsif (defined($self->{templates}{default}{address_template})) { | 
| 293 | 2 |  |  |  |  | 3 | $template_text = $self->{templates}{default}{address_template}; | 
| 294 |  |  |  |  |  |  | } | 
| 295 |  |  |  |  |  |  |  | 
| 296 |  |  |  |  |  |  | # do we have the minimal components for an address? | 
| 297 |  |  |  |  |  |  | # or should we instead use the fallback template? | 
| 298 | 452 | 100 |  |  |  | 829 | if (!$self->_minimal_components($rh_components)) { | 
| 299 | 49 | 50 |  |  |  | 124 | say STDERR "using fallback" if ($debug); | 
| 300 | 49 | 100 |  |  |  | 163 | if (defined($rh_config->{fallback_template})) { | 
|  |  | 100 |  |  |  |  |  | 
| 301 | 34 |  |  |  |  | 72 | $template_text = $rh_config->{fallback_template}; | 
| 302 |  |  |  |  |  |  | } elsif (defined($self->{templates}{default}{fallback_template})) { | 
| 303 | 14 |  |  |  |  | 32 | $template_text = $self->{templates}{default}{fallback_template}; | 
| 304 |  |  |  |  |  |  | } | 
| 305 |  |  |  |  |  |  | # no fallback | 
| 306 |  |  |  |  |  |  | } | 
| 307 |  |  |  |  |  |  |  | 
| 308 |  |  |  |  |  |  | } | 
| 309 |  |  |  |  |  |  |  | 
| 310 | 454 | 50 |  |  |  | 639 | say STDERR 'template text: ' . $template_text if ($debug); | 
| 311 |  |  |  |  |  |  |  | 
| 312 |  |  |  |  |  |  | # 6. clean up the components, possibly add codes | 
| 313 | 454 |  |  |  |  | 813 | $self->_fix_country($rh_components); | 
| 314 | 454 | 50 |  |  |  | 598 | if ($debug){ | 
| 315 | 0 |  |  |  |  | 0 | say STDERR "after fix_country"; | 
| 316 | 0 |  |  |  |  | 0 | say STDERR Dumper $rh_components; | 
| 317 |  |  |  |  |  |  | } | 
| 318 |  |  |  |  |  |  |  | 
| 319 | 454 |  |  |  |  | 1184 | $self->_apply_replacements($rh_components, $rh_config->{replace}); | 
| 320 | 454 | 50 |  |  |  | 764 | if ($debug){ | 
| 321 | 0 |  |  |  |  | 0 | say STDERR "after applying_replacements applied"; | 
| 322 | 0 |  |  |  |  | 0 | say STDERR Dumper $rh_components; | 
| 323 |  |  |  |  |  |  | } | 
| 324 | 454 |  |  |  |  | 883 | $self->_add_state_code($rh_components); | 
| 325 | 454 |  |  |  |  | 834 | $self->_add_county_code($rh_components); | 
| 326 | 454 | 50 |  |  |  | 643 | if ($debug){ | 
| 327 | 0 |  |  |  |  | 0 | say STDERR "after adding codes"; | 
| 328 | 0 |  |  |  |  | 0 | say STDERR Dumper $rh_components; | 
| 329 |  |  |  |  |  |  | } | 
| 330 |  |  |  |  |  |  |  | 
| 331 |  |  |  |  |  |  | # 7. add the attention, if needed | 
| 332 | 454 | 50 |  |  |  | 667 | if ($debug){ | 
| 333 | 0 |  |  |  |  | 0 | say STDERR "object level only_address: $only_address"; | 
| 334 | 0 |  |  |  |  | 0 | say STDERR "formatting level only_address: $oa"; | 
| 335 |  |  |  |  |  |  | } | 
| 336 |  |  |  |  |  |  |  | 
| 337 | 454 | 100 |  |  |  | 606 | if ($oa){ | 
| 338 | 3 | 50 |  |  |  | 8 | if ($debug){ | 
| 339 | 0 |  |  |  |  | 0 | say STDERR "not looking for unknown_components"; | 
| 340 | 0 |  |  |  |  | 0 | say STDERR "only_address was specified"; | 
| 341 |  |  |  |  |  |  | } | 
| 342 |  |  |  |  |  |  | } | 
| 343 |  |  |  |  |  |  | else { | 
| 344 | 451 |  |  |  |  | 661 | my $ra_unknown = $self->_find_unknown_components($rh_components); | 
| 345 | 451 | 50 |  |  |  | 806 | if ($debug){ | 
| 346 | 0 |  |  |  |  | 0 | say STDERR "unknown_components:"; | 
| 347 | 0 |  |  |  |  | 0 | say STDERR Dumper $ra_unknown; | 
| 348 |  |  |  |  |  |  | } | 
| 349 | 451 | 100 |  |  |  | 739 | if (scalar(@$ra_unknown)){ | 
| 350 |  |  |  |  |  |  | $rh_components->{attention} = | 
| 351 | 224 |  |  |  |  | 307 | join(', ', map { $rh_components->{$_} } @$ra_unknown); | 
|  | 230 |  |  |  |  | 649 |  | 
| 352 | 224 | 50 |  |  |  | 512 | if ($debug){ | 
| 353 | 0 |  |  |  |  | 0 | say STDERR "putting unknown_components in 'attention'"; | 
| 354 |  |  |  |  |  |  | } | 
| 355 |  |  |  |  |  |  | } | 
| 356 |  |  |  |  |  |  | } | 
| 357 |  |  |  |  |  |  |  | 
| 358 |  |  |  |  |  |  | # 8. abbreviate, if needed | 
| 359 | 454 | 100 |  |  |  | 690 | if ($abbrv) { | 
| 360 | 5 |  |  |  |  | 13 | $rh_components = $self->_abbreviate($rh_components); | 
| 361 |  |  |  |  |  |  | } | 
| 362 |  |  |  |  |  |  |  | 
| 363 |  |  |  |  |  |  | # 9. prepare the template | 
| 364 | 454 |  |  |  |  | 842 | $template_text = $self->_replace_template_lambdas($template_text); | 
| 365 |  |  |  |  |  |  |  | 
| 366 |  |  |  |  |  |  | # 10. compiled the template | 
| 367 | 454 |  |  |  |  | 1664 | my $compiled_template = | 
| 368 |  |  |  |  |  |  | $THC->compile($template_text, {'numeric_string_as_string' => 1}); | 
| 369 |  |  |  |  |  |  |  | 
| 370 | 454 | 50 |  |  |  | 579878 | if ($debug){ | 
| 371 | 0 |  |  |  |  | 0 | say STDERR "before _render_template"; | 
| 372 | 0 |  |  |  |  | 0 | say STDERR Dumper $rh_components; | 
| 373 | 0 |  |  |  |  | 0 | say STDERR "template: "; | 
| 374 | 0 |  |  |  |  | 0 | say STDERR Dumper $compiled_template; | 
| 375 |  |  |  |  |  |  | } | 
| 376 |  |  |  |  |  |  |  | 
| 377 |  |  |  |  |  |  | # 11. render the template | 
| 378 | 454 |  |  |  |  | 841 | my $text = $self->_render_template($compiled_template, $rh_components); | 
| 379 | 454 | 50 |  |  |  | 699 | if ($debug){ | 
| 380 | 0 |  |  |  |  | 0 | say STDERR "text after _render_template $text"; | 
| 381 |  |  |  |  |  |  | } | 
| 382 |  |  |  |  |  |  |  | 
| 383 |  |  |  |  |  |  | # 11. postformatting | 
| 384 | 454 |  |  |  |  | 1163 | $text = $self->_postformat($text, $rh_config->{postformat_replace}); | 
| 385 |  |  |  |  |  |  |  | 
| 386 |  |  |  |  |  |  | # 12. clean again | 
| 387 | 454 |  |  |  |  | 848 | $text = $self->_clean($text); | 
| 388 |  |  |  |  |  |  |  | 
| 389 |  |  |  |  |  |  | # 13. set final components | 
| 390 | 454 |  |  |  |  | 622 | $self->{final_components} = $rh_components; | 
| 391 |  |  |  |  |  |  |  | 
| 392 |  |  |  |  |  |  | # all done | 
| 393 | 454 |  |  |  |  | 1502 | return $text; | 
| 394 |  |  |  |  |  |  | } | 
| 395 |  |  |  |  |  |  |  | 
| 396 |  |  |  |  |  |  | # remove duplicates ("Berlin, Berlin"), do replacements and similar | 
| 397 |  |  |  |  |  |  | sub _postformat { | 
| 398 | 456 |  |  | 456 |  | 1391 | my $self = shift; | 
| 399 | 456 |  |  |  |  | 437 | my $text = shift; | 
| 400 | 456 |  |  |  |  | 603 | my $raa_rules = shift; | 
| 401 |  |  |  |  |  |  |  | 
| 402 | 456 | 50 |  |  |  | 625 | if ($debug){ | 
| 403 | 0 |  |  |  |  | 0 | say STDERR "entering _postformat: $text" | 
| 404 |  |  |  |  |  |  | } | 
| 405 |  |  |  |  |  |  |  | 
| 406 |  |  |  |  |  |  | # remove duplicates | 
| 407 | 456 |  |  |  |  | 911 | my @before_pieces = split(/, /, $text); | 
| 408 | 456 |  |  |  |  | 490 | my %seen; | 
| 409 |  |  |  |  |  |  | my @after_pieces; | 
| 410 | 456 |  |  |  |  | 591 | foreach my $piece (@before_pieces) { | 
| 411 | 565 |  |  |  |  | 812 | $piece =~ s/^\s+//g; | 
| 412 | 565 |  |  |  |  | 996 | $seen{$piece}++; | 
| 413 | 565 | 100 |  |  |  | 1114 | if (lc($piece) ne 'new york') { | 
| 414 | 561 | 100 |  |  |  | 869 | next if ($seen{$piece} > 1); | 
| 415 |  |  |  |  |  |  | } | 
| 416 | 564 |  |  |  |  | 758 | push(@after_pieces, $piece); | 
| 417 |  |  |  |  |  |  | } | 
| 418 | 456 |  |  |  |  | 702 | $text = join(', ', @after_pieces); | 
| 419 |  |  |  |  |  |  |  | 
| 420 |  |  |  |  |  |  | # do any country specific rules | 
| 421 | 456 |  |  |  |  | 686 | foreach my $ra_fromto (@$raa_rules) { | 
| 422 |  |  |  |  |  |  | try { | 
| 423 | 512 |  |  | 512 |  | 12136 | my $regexp = qr/$ra_fromto->[0]/; | 
| 424 | 512 |  |  |  |  | 846 | my $replacement = $ra_fromto->[1]; | 
| 425 |  |  |  |  |  |  |  | 
| 426 |  |  |  |  |  |  | # ultra hack to do substitution | 
| 427 |  |  |  |  |  |  | # limited to $1 and $2, should really be a while loop | 
| 428 |  |  |  |  |  |  | # doing every substitution | 
| 429 |  |  |  |  |  |  |  | 
| 430 | 512 | 100 |  |  |  | 991 | if ($replacement =~ m/\$\d/) { | 
| 431 | 46 | 100 |  |  |  | 289 | if ($text =~ m/$regexp/) { | 
| 432 | 19 |  |  |  |  | 41 | my $tmp1 = $1; | 
| 433 | 19 |  |  |  |  | 31 | my $tmp2 = $2; | 
| 434 | 19 |  |  |  |  | 28 | my $tmp3 = $3; | 
| 435 | 19 |  |  |  |  | 56 | $replacement =~ s/\$1/$tmp1/; | 
| 436 | 19 |  |  |  |  | 63 | $replacement =~ s/\$2/$tmp2/; | 
| 437 | 19 |  |  |  |  | 51 | $replacement =~ s/\$3/$tmp3/; | 
| 438 |  |  |  |  |  |  | } | 
| 439 |  |  |  |  |  |  | } | 
| 440 | 512 |  |  |  |  | 2129 | $text =~ s/$regexp/$replacement/; | 
| 441 |  |  |  |  |  |  | } catch { | 
| 442 | 0 |  |  | 0 |  | 0 | warn "invalid replacement: " . join(', ', @$ra_fromto); | 
| 443 | 512 |  |  |  |  | 5622 | }; | 
| 444 |  |  |  |  |  |  | } | 
| 445 | 456 |  |  |  |  | 2387 | return $text; | 
| 446 |  |  |  |  |  |  | } | 
| 447 |  |  |  |  |  |  |  | 
| 448 |  |  |  |  |  |  | sub _sanity_cleaning { | 
| 449 | 455 |  |  | 455 |  | 1408 | my $self          = shift; | 
| 450 | 455 |  | 50 |  |  | 643 | my $rh_components = shift || return; | 
| 451 |  |  |  |  |  |  |  | 
| 452 |  |  |  |  |  |  | # catch insane postcodes | 
| 453 | 455 | 100 |  |  |  | 718 | if (defined($rh_components->{'postcode'})) { | 
| 454 | 313 | 100 |  |  |  | 1296 | if (length($rh_components->{'postcode'}) > 20) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 455 | 1 |  |  |  |  | 3 | delete $rh_components->{'postcode'}; | 
| 456 |  |  |  |  |  |  | } elsif ($rh_components->{'postcode'} =~ m/\d+;\d+/) { | 
| 457 |  |  |  |  |  |  | # sometimes OSM has postcode ranges | 
| 458 | 1 |  |  |  |  | 2 | delete $rh_components->{'postcode'}; | 
| 459 |  |  |  |  |  |  | } elsif ($rh_components->{'postcode'} =~ m/^(\d{5}),\d{5}/) { | 
| 460 | 1 |  |  |  |  | 4 | $rh_components->{'postcode'} = $1; | 
| 461 |  |  |  |  |  |  | } | 
| 462 |  |  |  |  |  |  | } | 
| 463 |  |  |  |  |  |  |  | 
| 464 |  |  |  |  |  |  | # remove things that might be empty | 
| 465 | 455 |  |  |  |  | 920 | foreach my $c (keys %$rh_components) { | 
| 466 |  |  |  |  |  |  | # catch empty values | 
| 467 | 3931 | 50 |  |  |  | 10327 | if (!defined($rh_components->{$c})) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 468 | 0 |  |  |  |  | 0 | delete $rh_components->{$c}; | 
| 469 |  |  |  |  |  |  | } | 
| 470 |  |  |  |  |  |  | # no chars | 
| 471 |  |  |  |  |  |  | elsif ($rh_components->{$c} !~ m/\w/) { | 
| 472 | 1 |  |  |  |  | 3 | delete $rh_components->{$c}; | 
| 473 |  |  |  |  |  |  | } | 
| 474 |  |  |  |  |  |  | # catch values containing URLs | 
| 475 |  |  |  |  |  |  | elsif ($rh_components->{$c} =~ m|https?://|) { | 
| 476 | 1 |  |  |  |  | 2 | delete $rh_components->{$c}; | 
| 477 |  |  |  |  |  |  | } | 
| 478 |  |  |  |  |  |  | } | 
| 479 | 455 |  |  |  |  | 671 | return; | 
| 480 |  |  |  |  |  |  | } | 
| 481 |  |  |  |  |  |  |  | 
| 482 |  |  |  |  |  |  | sub _minimal_components { | 
| 483 | 452 |  |  | 452 |  | 505 | my $self                = shift; | 
| 484 | 452 |  | 50 |  |  | 650 | my $rh_components       = shift || return; | 
| 485 | 452 |  |  |  |  | 705 | my @required_components = qw(road postcode); #FIXME - should be in conf | 
| 486 | 452 |  |  |  |  | 425 | my $missing             = 0;                 # number of required components missing | 
| 487 |  |  |  |  |  |  |  | 
| 488 | 452 |  |  |  |  | 408 | my $minimal_threshold = 2; | 
| 489 | 452 |  |  |  |  | 516 | foreach my $c (@required_components) { | 
| 490 | 904 | 100 |  |  |  | 1413 | $missing++ if (!defined($rh_components->{$c})); | 
| 491 | 904 | 100 |  |  |  | 1381 | return 0   if ($missing == $minimal_threshold); | 
| 492 |  |  |  |  |  |  | } | 
| 493 | 403 |  |  |  |  | 790 | return 1; | 
| 494 |  |  |  |  |  |  | } | 
| 495 |  |  |  |  |  |  |  | 
| 496 |  |  |  |  |  |  | my %valid_replacement_components = ('state' => 1,); | 
| 497 |  |  |  |  |  |  |  | 
| 498 |  |  |  |  |  |  | # determines which country code to use | 
| 499 |  |  |  |  |  |  | # may also override other configuration if we are dealing with | 
| 500 |  |  |  |  |  |  | # a dependent territory | 
| 501 |  |  |  |  |  |  | sub _determine_country_code { | 
| 502 | 455 |  |  | 455 |  | 511 | my $self          = shift; | 
| 503 | 455 |  | 50 |  |  | 694 | my $rh_components = shift || return; | 
| 504 |  |  |  |  |  |  |  | 
| 505 |  |  |  |  |  |  | # FIXME - validate it is a valid country | 
| 506 | 455 | 100 |  |  |  | 769 | return if (!defined($rh_components->{country_code})); | 
| 507 |  |  |  |  |  |  |  | 
| 508 | 451 | 50 |  |  |  | 990 | if (my $cc = lc($rh_components->{country_code})) { | 
| 509 |  |  |  |  |  |  |  | 
| 510 |  |  |  |  |  |  | # is it two letters long? | 
| 511 | 451 | 50 |  |  |  | 1651 | return      if ($cc !~ m/^[a-z][a-z]$/); | 
| 512 | 451 | 50 |  |  |  | 827 | return 'GB' if ($cc eq 'uk'); | 
| 513 |  |  |  |  |  |  |  | 
| 514 | 451 |  |  |  |  | 728 | $cc = uc($cc); | 
| 515 |  |  |  |  |  |  |  | 
| 516 |  |  |  |  |  |  | # check if the configuration tells us to use | 
| 517 |  |  |  |  |  |  | # the configuration of another country | 
| 518 |  |  |  |  |  |  | # used in cases of dependent territories like | 
| 519 |  |  |  |  |  |  | # American Samoa (AS) and Puerto Rico (PR) | 
| 520 | 451 | 100 | 100 |  |  | 1868 | if (   defined($self->{templates}{$cc}) | 
| 521 |  |  |  |  |  |  | && defined($self->{templates}{$cc}{use_country})) | 
| 522 |  |  |  |  |  |  | { | 
| 523 | 49 |  |  |  |  | 73 | my $old_cc = $cc; | 
| 524 | 49 |  |  |  |  | 75 | $cc = $self->{templates}{$cc}{use_country}; | 
| 525 | 49 | 100 |  |  |  | 137 | if (defined($self->{templates}{$old_cc}{change_country})) { | 
| 526 |  |  |  |  |  |  |  | 
| 527 | 36 |  |  |  |  | 80 | my $new_country = $self->{templates}{$old_cc}{change_country}; | 
| 528 | 36 | 100 |  |  |  | 108 | if ($new_country =~ m/\$(\w*)/) { | 
| 529 | 2 |  |  |  |  | 5 | my $component = $1; | 
| 530 | 2 | 50 |  |  |  | 7 | if (defined($rh_components->{$component})) { | 
| 531 | 2 |  |  |  |  | 24 | $new_country =~ s/\$$component/$rh_components->{$component}/; | 
| 532 |  |  |  |  |  |  | } else { | 
| 533 | 0 |  |  |  |  | 0 | $new_country =~ s/\$$component//; | 
| 534 |  |  |  |  |  |  | } | 
| 535 |  |  |  |  |  |  | } | 
| 536 | 36 |  |  |  |  | 83 | $rh_components->{country} = $new_country; | 
| 537 |  |  |  |  |  |  | } | 
| 538 | 49 | 100 |  |  |  | 130 | if (defined($self->{templates}{$old_cc}{add_component})) { | 
| 539 | 12 |  |  |  |  | 29 | my $tmp = $self->{templates}{$old_cc}{add_component}; | 
| 540 | 12 |  |  |  |  | 44 | my ($k, $v) = split(/=/, $tmp); | 
| 541 |  |  |  |  |  |  | # check whitelist of valid replacement components | 
| 542 | 12 | 100 |  |  |  | 39 | if (defined($valid_replacement_components{$k})) { | 
| 543 | 11 |  |  |  |  | 26 | $rh_components->{$k} = $v; | 
| 544 |  |  |  |  |  |  | } | 
| 545 |  |  |  |  |  |  | } | 
| 546 |  |  |  |  |  |  | } | 
| 547 |  |  |  |  |  |  |  | 
| 548 | 451 | 100 |  |  |  | 741 | if ($cc eq 'NL') { | 
| 549 | 5 | 50 |  |  |  | 12 | if (defined($rh_components->{state})) { | 
| 550 | 5 | 100 |  |  |  | 27 | if ($rh_components->{state} eq 'Curaçao') { | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 551 | 1 |  |  |  |  | 2 | $cc = 'CW'; | 
| 552 | 1 |  |  |  |  | 4 | $rh_components->{country} = 'Curaçao'; | 
| 553 |  |  |  |  |  |  | } elsif ($rh_components->{state} =~ m/^sint maarten/i) { | 
| 554 | 0 |  |  |  |  | 0 | $cc = 'SX'; | 
| 555 | 0 |  |  |  |  | 0 | $rh_components->{country} = 'Sint Maarten'; | 
| 556 |  |  |  |  |  |  | } elsif ($rh_components->{state} =~ m/^Aruba/i) { | 
| 557 | 1 |  |  |  |  | 2 | $cc = 'AW'; | 
| 558 | 1 |  |  |  |  | 3 | $rh_components->{country} = 'Aruba'; | 
| 559 |  |  |  |  |  |  | } | 
| 560 |  |  |  |  |  |  | } | 
| 561 |  |  |  |  |  |  | } | 
| 562 | 451 |  |  |  |  | 1443 | return $cc; | 
| 563 |  |  |  |  |  |  | } | 
| 564 | 0 |  |  |  |  | 0 | return; | 
| 565 |  |  |  |  |  |  | } | 
| 566 |  |  |  |  |  |  |  | 
| 567 |  |  |  |  |  |  | # hacks for bad country data | 
| 568 |  |  |  |  |  |  | sub _fix_country { | 
| 569 | 454 |  |  | 454 |  | 631 | my $self          = shift; | 
| 570 | 454 |  | 50 |  |  | 735 | my $rh_components = shift || return; | 
| 571 |  |  |  |  |  |  |  | 
| 572 |  |  |  |  |  |  | # is the country a number? | 
| 573 |  |  |  |  |  |  | # if so, and there is a state, use state as country | 
| 574 | 454 | 100 |  |  |  | 692 | if (defined($rh_components->{country})) { | 
| 575 | 451 | 100 |  |  |  | 1422 | if (looks_like_number($rh_components->{country})) { | 
| 576 | 1 | 50 |  |  |  | 3 | if (defined($rh_components->{state})) { | 
| 577 | 1 |  |  |  |  | 3 | $rh_components->{country} = $rh_components->{state}; | 
| 578 | 1 |  |  |  |  | 2 | delete $rh_components->{state}; | 
| 579 |  |  |  |  |  |  | } | 
| 580 |  |  |  |  |  |  | } | 
| 581 |  |  |  |  |  |  | } | 
| 582 | 454 |  |  |  |  | 500 | return; | 
| 583 |  |  |  |  |  |  | } | 
| 584 |  |  |  |  |  |  |  | 
| 585 |  |  |  |  |  |  | # sets and returns a state code | 
| 586 |  |  |  |  |  |  | # note may also set other values in some odd edge cases | 
| 587 |  |  |  |  |  |  | sub _add_state_code { | 
| 588 | 459 |  |  | 459 |  | 1064 | my $self          = shift; | 
| 589 | 459 |  |  |  |  | 416 | my $rh_components = shift; | 
| 590 | 459 |  |  |  |  | 777 | return $self->_add_code('state', $rh_components); | 
| 591 |  |  |  |  |  |  | } | 
| 592 |  |  |  |  |  |  |  | 
| 593 |  |  |  |  |  |  | sub _add_county_code { | 
| 594 | 455 |  |  | 455 |  | 1017 | my $self          = shift; | 
| 595 | 455 |  |  |  |  | 427 | my $rh_components = shift; | 
| 596 | 455 |  |  |  |  | 599 | return $self->_add_code('county', $rh_components); | 
| 597 |  |  |  |  |  |  | } | 
| 598 |  |  |  |  |  |  |  | 
| 599 |  |  |  |  |  |  | sub _add_code { | 
| 600 | 914 |  |  | 914 |  | 798 | my $self          = shift; | 
| 601 | 914 |  | 50 |  |  | 1277 | my $keyname       = shift // return; | 
| 602 | 914 |  |  |  |  | 772 | my $rh_components = shift; | 
| 603 | 914 | 100 |  |  |  | 1293 | return if !$rh_components->{country_code}; # do we know country? | 
| 604 | 905 | 100 |  |  |  | 1382 | return if !$rh_components->{$keyname};     # do we know state/county? | 
| 605 |  |  |  |  |  |  |  | 
| 606 | 566 |  |  |  |  | 815 | my $code = $keyname . '_code'; | 
| 607 |  |  |  |  |  |  |  | 
| 608 | 566 | 100 |  |  |  | 930 | if (defined($rh_components->{$code})) {    # do we already have code? | 
| 609 |  |  |  |  |  |  | # but could have situation | 
| 610 |  |  |  |  |  |  | # where code and long name are | 
| 611 |  |  |  |  |  |  | # the same which we want to correct | 
| 612 | 26 | 100 |  |  |  | 68 | if ($rh_components->{$code} ne $rh_components->{$keyname}) { | 
| 613 | 25 |  |  |  |  | 39 | return; | 
| 614 |  |  |  |  |  |  | } | 
| 615 |  |  |  |  |  |  | } | 
| 616 |  |  |  |  |  |  |  | 
| 617 |  |  |  |  |  |  | # ensure country_code is uppercase as we use it as conf key | 
| 618 | 541 |  |  |  |  | 690 | $rh_components->{country_code} = uc($rh_components->{country_code}); | 
| 619 | 541 |  |  |  |  | 532 | my $cc = $rh_components->{country_code}; | 
| 620 |  |  |  |  |  |  |  | 
| 621 | 541 | 100 |  |  |  | 1354 | if (my $mapping = $self->{$code . 's'}{$cc}) { | 
| 622 |  |  |  |  |  |  |  | 
| 623 | 283 |  |  |  |  | 354 | my $name    = $rh_components->{$keyname}; | 
| 624 | 283 |  |  |  |  | 404 | my $uc_name = uc($name); | 
| 625 |  |  |  |  |  |  |  | 
| 626 | 283 |  |  |  |  | 8359 | LOCCODE: foreach my $abbrv (keys %$mapping) { | 
| 627 |  |  |  |  |  |  |  | 
| 628 | 6671 |  |  |  |  | 4976 | my @confnames; # can have multiple names for the place | 
| 629 |  |  |  |  |  |  | # for example in different languages | 
| 630 |  |  |  |  |  |  |  | 
| 631 | 6671 | 100 |  |  |  | 6941 | if (is_hashref($mapping->{$abbrv})) { | 
| 632 | 839 |  |  |  |  | 683 | push(@confnames, values %{$mapping->{$abbrv}}); | 
|  | 839 |  |  |  |  | 1493 |  | 
| 633 |  |  |  |  |  |  | } else { | 
| 634 | 5832 |  |  |  |  | 6005 | push(@confnames, $mapping->{$abbrv}); | 
| 635 |  |  |  |  |  |  | } | 
| 636 |  |  |  |  |  |  |  | 
| 637 | 6671 |  |  |  |  | 6012 | foreach my $confname (@confnames) { | 
| 638 | 7587 | 100 |  |  |  | 10084 | if ($uc_name eq uc($confname)) { | 
| 639 | 180 |  |  |  |  | 289 | $rh_components->{$code} = $abbrv; | 
| 640 | 180 |  |  |  |  | 342 | last LOCCODE; | 
| 641 |  |  |  |  |  |  | } | 
| 642 |  |  |  |  |  |  | # perhaps instead of passing in a name, we passed in a code | 
| 643 |  |  |  |  |  |  | # example: state => 'NC' | 
| 644 |  |  |  |  |  |  | # we want to turn that into | 
| 645 |  |  |  |  |  |  | #     state => 'North Carolina' | 
| 646 |  |  |  |  |  |  | #     state_code => 'NC' | 
| 647 |  |  |  |  |  |  | # | 
| 648 | 7407 | 100 |  |  |  | 9859 | if ($uc_name eq $abbrv) { | 
| 649 | 6 |  |  |  |  | 9 | $rh_components->{$keyname} = $confname; | 
| 650 | 6 |  |  |  |  | 10 | $rh_components->{$code}    = $abbrv; | 
| 651 | 6 |  |  |  |  | 12 | last LOCCODE; | 
| 652 |  |  |  |  |  |  | } | 
| 653 |  |  |  |  |  |  | } | 
| 654 |  |  |  |  |  |  | } | 
| 655 |  |  |  |  |  |  | # didn't find a valid code or name | 
| 656 |  |  |  |  |  |  |  | 
| 657 |  |  |  |  |  |  | # try again for odd variants like "United States Virgin Islands" | 
| 658 | 283 | 100 |  |  |  | 768 | if ($keyname eq 'state') { | 
| 659 | 254 | 100 |  |  |  | 492 | if (!defined($rh_components->{state_code})) { | 
| 660 | 81 | 100 |  |  |  | 144 | if ($cc eq 'US') { | 
| 661 | 2 | 50 |  |  |  | 45 | if ($rh_components->{state} =~ m/^united states/i) { | 
| 662 | 0 |  |  |  |  | 0 | my $state = $rh_components->{state}; | 
| 663 | 0 |  |  |  |  | 0 | $state =~ s/^United States/US/i; | 
| 664 | 0 |  |  |  |  | 0 | foreach my $k (keys %$mapping) { | 
| 665 | 0 | 0 |  |  |  | 0 | if (uc($state) eq uc($k)) { | 
| 666 | 0 |  |  |  |  | 0 | $rh_components->{state_code} = $mapping->{$k}; | 
| 667 | 0 |  |  |  |  | 0 | last; | 
| 668 |  |  |  |  |  |  | } | 
| 669 |  |  |  |  |  |  | } | 
| 670 |  |  |  |  |  |  | } | 
| 671 | 2 | 50 |  |  |  | 11 | if ($rh_components->{state} =~ m/^washington,? d\.?c\.?/i) { | 
| 672 | 2 |  |  |  |  | 3 | $rh_components->{state_code} = 'DC'; | 
| 673 | 2 |  |  |  |  | 3 | $rh_components->{state}      = 'District of Columbia'; | 
| 674 | 2 |  |  |  |  | 24 | $rh_components->{city}       = 'Washington'; | 
| 675 |  |  |  |  |  |  | } | 
| 676 |  |  |  |  |  |  | } | 
| 677 |  |  |  |  |  |  | } | 
| 678 |  |  |  |  |  |  | } | 
| 679 |  |  |  |  |  |  | } | 
| 680 | 541 |  |  |  |  | 760 | return $rh_components->{$code}; | 
| 681 |  |  |  |  |  |  | } | 
| 682 |  |  |  |  |  |  |  | 
| 683 |  |  |  |  |  |  | sub _apply_replacements { | 
| 684 | 457 |  |  | 457 |  | 1601 | my $self          = shift; | 
| 685 | 457 |  |  |  |  | 388 | my $rh_components = shift; | 
| 686 | 457 |  |  |  |  | 575 | my $raa_rules     = shift; | 
| 687 |  |  |  |  |  |  |  | 
| 688 | 457 | 50 |  |  |  | 672 | if ($debug){ | 
| 689 | 0 |  |  |  |  | 0 | say STDERR "in _apply_replacements"; | 
| 690 | 0 |  |  |  |  | 0 | say STDERR Dumper $raa_rules; | 
| 691 |  |  |  |  |  |  | } | 
| 692 |  |  |  |  |  |  |  | 
| 693 | 457 |  |  |  |  | 964 | foreach my $component (keys %$rh_components) { | 
| 694 | 3930 |  |  |  |  | 12871 | foreach my $ra_fromto (@$raa_rules) { | 
| 695 |  |  |  |  |  |  |  | 
| 696 | 8010 |  |  |  |  | 58255 | my $regexp; | 
| 697 |  |  |  |  |  |  | # do key specific replacement | 
| 698 | 8010 | 100 |  |  |  | 23021 | if ($ra_fromto->[0] =~ m/^$component=/){ | 
| 699 | 82 |  |  |  |  | 131 | my $from = $ra_fromto->[0]; | 
| 700 | 82 |  |  |  |  | 316 | $from =~ s/^$component=//; | 
| 701 | 82 | 100 |  |  |  | 164 | if ($rh_components->{$component} eq $from){ | 
| 702 | 5 |  |  |  |  | 12 | $rh_components->{$component} = $ra_fromto->[1]; | 
| 703 |  |  |  |  |  |  | } else { | 
| 704 | 77 |  |  |  |  | 111 | $regexp = $from; | 
| 705 |  |  |  |  |  |  | } | 
| 706 |  |  |  |  |  |  | } else { | 
| 707 | 7928 |  |  |  |  | 8399 | $regexp = $ra_fromto->[0]; | 
| 708 |  |  |  |  |  |  | } | 
| 709 | 8010 | 100 |  |  |  | 10365 | if (defined($regexp)){ | 
| 710 |  |  |  |  |  |  | try { | 
| 711 | 8005 |  |  | 8005 |  | 165819 | my $re = qr/$regexp/; | 
| 712 | 8004 |  |  |  |  | 26689 | $rh_components->{$component} =~ s/$re/$ra_fromto->[1]/; | 
| 713 |  |  |  |  |  |  | } catch { | 
| 714 | 1 |  |  | 1 |  | 22 | warn "invalid replacement: " . join(', ', @$ra_fromto); | 
| 715 | 8005 |  |  |  |  | 25980 | }; | 
| 716 |  |  |  |  |  |  | } | 
| 717 |  |  |  |  |  |  | } | 
| 718 |  |  |  |  |  |  | } | 
| 719 | 457 |  |  |  |  | 1939 | return $rh_components; | 
| 720 |  |  |  |  |  |  | } | 
| 721 |  |  |  |  |  |  |  | 
| 722 |  |  |  |  |  |  | sub _abbreviate { | 
| 723 | 7 |  |  | 7 |  | 566 | my $self    = shift; | 
| 724 | 7 |  | 50 |  |  | 11 | my $rh_comp = shift // return; | 
| 725 |  |  |  |  |  |  |  | 
| 726 |  |  |  |  |  |  | # do we know the country? | 
| 727 | 7 | 100 |  |  |  | 15 | if (!defined($rh_comp->{country_code})) { | 
| 728 | 2 | 100 |  |  |  | 5 | if ($show_warnings){ | 
| 729 | 1 |  |  |  |  | 2 | my $error_msg = 'no country_code, unable to abbreviate'; | 
| 730 | 1 | 50 |  |  |  | 3 | if (defined($rh_comp->{country})) { | 
| 731 | 1 |  |  |  |  | 3 | $error_msg .= ' - country: ' . $rh_comp->{country}; | 
| 732 |  |  |  |  |  |  | } | 
| 733 | 1 |  |  |  |  | 13 | warn $error_msg | 
| 734 |  |  |  |  |  |  | } | 
| 735 | 2 |  |  |  |  | 66 | return; | 
| 736 |  |  |  |  |  |  | } | 
| 737 |  |  |  |  |  |  |  | 
| 738 |  |  |  |  |  |  | # do we have abbreviations for this country? | 
| 739 | 5 |  |  |  |  | 6 | my $cc = uc($rh_comp->{country_code}); | 
| 740 |  |  |  |  |  |  |  | 
| 741 |  |  |  |  |  |  | # 1. which languages? | 
| 742 | 5 | 50 |  |  |  | 11 | if (defined($self->{country2lang}{$cc})) { | 
| 743 |  |  |  |  |  |  |  | 
| 744 | 5 |  |  |  |  | 12 | my @langs = split(/,/, $self->{country2lang}{$cc}); | 
| 745 |  |  |  |  |  |  |  | 
| 746 | 5 |  |  |  |  | 7 | foreach my $lang (@langs) { | 
| 747 |  |  |  |  |  |  | # do we have abbrv for this lang? | 
| 748 | 9 | 100 |  |  |  | 21 | if (defined($self->{abbreviations}->{$lang})) { | 
| 749 |  |  |  |  |  |  |  | 
| 750 | 6 |  |  |  |  | 6 | my $rh_abbr = $self->{abbreviations}->{$lang}; | 
| 751 | 6 |  |  |  |  | 11 | foreach my $comp_name (keys %$rh_abbr) { | 
| 752 | 10 | 50 |  |  |  | 18 | next if (!defined($rh_comp->{$comp_name})); | 
| 753 | 10 |  |  |  |  | 8 | foreach my $long (keys %{$rh_abbr->{$comp_name}}) { | 
|  | 10 |  |  |  |  | 20 |  | 
| 754 | 62 |  |  |  |  | 78 | my $short = $rh_abbr->{$comp_name}->{$long}; | 
| 755 | 62 |  |  |  |  | 357 | $rh_comp->{$comp_name} =~ s/\b$long\b/$short/; | 
| 756 |  |  |  |  |  |  | } | 
| 757 |  |  |  |  |  |  | } | 
| 758 |  |  |  |  |  |  | } else { | 
| 759 |  |  |  |  |  |  | #warn "no abbreviations defined for lang $lang"; | 
| 760 |  |  |  |  |  |  | } | 
| 761 |  |  |  |  |  |  | } | 
| 762 |  |  |  |  |  |  | } | 
| 763 |  |  |  |  |  |  |  | 
| 764 | 5 |  |  |  |  | 9 | return $rh_comp; | 
| 765 |  |  |  |  |  |  | } | 
| 766 |  |  |  |  |  |  |  | 
| 767 |  |  |  |  |  |  | # " abc,,def , ghi " => 'abc, def, ghi' | 
| 768 |  |  |  |  |  |  | sub _clean { | 
| 769 | 914 |  |  | 914 |  | 2768 | my $self = shift; | 
| 770 | 914 |  | 100 |  |  | 1357 | my $out  = shift // return; | 
| 771 | 913 | 50 |  |  |  | 1118 | if ($debug){ | 
| 772 | 0 |  |  |  |  | 0 | say STDERR "entering _clean \n$out"; | 
| 773 |  |  |  |  |  |  | } | 
| 774 |  |  |  |  |  |  |  | 
| 775 | 913 |  |  |  |  | 1366 | $out =~ s/\'\;/'/g; | 
| 776 |  |  |  |  |  |  |  | 
| 777 | 913 |  |  |  |  | 4829 | $out =~ s/[\},\s]+$//; | 
| 778 | 913 |  |  |  |  | 1908 | $out =~ s/^[,\s]+//; | 
| 779 |  |  |  |  |  |  |  | 
| 780 | 913 |  |  |  |  | 1069 | $out =~ s/^- //; # line starting with dash due to a parameter missing | 
| 781 |  |  |  |  |  |  |  | 
| 782 | 913 |  |  |  |  | 1113 | $out =~ s/,\s*,/, /g;   # multiple commas to one | 
| 783 | 913 |  |  |  |  | 1338 | $out =~ s/\h+,\h+/, /g; # one horiz whitespace behind comma | 
| 784 | 913 |  |  |  |  | 1998 | $out =~ s/\h\h+/ /g;    # multiple horiz whitespace to one | 
| 785 | 913 |  |  |  |  | 1841 | $out =~ s/\h\n/\n/g;    # horiz whitespace, newline to newline | 
| 786 | 913 |  |  |  |  | 1129 | $out =~ s/\n,/\n/g;     # newline comma to just newline | 
| 787 | 913 |  |  |  |  | 1014 | $out =~ s/,,+/,/g;      # multiple commas to one | 
| 788 | 913 |  |  |  |  | 1103 | $out =~ s/,\n/\n/g;     # comma newline to just newline | 
| 789 | 913 |  |  |  |  | 1971 | $out =~ s/\n\h+/\n/g;   # newline plus space to newline | 
| 790 | 913 |  |  |  |  | 2031 | $out =~ s/\n\n+/\n/g;   # multiple newline to one | 
| 791 |  |  |  |  |  |  |  | 
| 792 |  |  |  |  |  |  | # final dedupe across and within lines | 
| 793 | 913 |  |  |  |  | 2288 | my @before_pieces = split(/\n/, $out); | 
| 794 | 913 |  |  |  |  | 1072 | my %seen_lines; | 
| 795 |  |  |  |  |  |  | my @after_pieces; | 
| 796 | 913 |  |  |  |  | 1134 | foreach my $line (@before_pieces) { | 
| 797 | 3529 |  |  |  |  | 4290 | $line =~ s/^\h+//g; | 
| 798 | 3529 |  |  |  |  | 4261 | $line =~ s/\h+$//g; | 
| 799 | 3529 |  |  |  |  | 5301 | $seen_lines{$line}++; | 
| 800 | 3529 | 100 |  |  |  | 4874 | next if ($seen_lines{$line} > 1); | 
| 801 |  |  |  |  |  |  | # now dedupe within the line | 
| 802 | 3511 |  |  |  |  | 4994 | my @before_words = split(/,/, $line); | 
| 803 | 3511 |  |  |  |  | 3141 | my %seen_words; | 
| 804 |  |  |  |  |  |  | my @after_words; | 
| 805 | 3511 |  |  |  |  | 3389 | foreach my $w (@before_words) { | 
| 806 | 3718 |  |  |  |  | 4699 | $w =~ s/^\h+//g; | 
| 807 | 3718 |  |  |  |  | 4152 | $w =~ s/\h+$//g; | 
| 808 | 1 | 100 |  | 1 |  | 7 | if (lc($w) ne 'new york') { | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 11 |  | 
|  | 3718 |  |  |  |  | 5974 |  | 
| 809 | 3708 |  |  |  |  | 25187 | $seen_words{$w}++; | 
| 810 |  |  |  |  |  |  | } | 
| 811 | 3718 | 100 | 100 |  |  | 8902 | next if ((defined($seen_words{$w})) && ($seen_words{$w} > 1)); | 
| 812 | 3716 |  |  |  |  | 5053 | push(@after_words, $w); | 
| 813 |  |  |  |  |  |  | } | 
| 814 | 3511 |  |  |  |  | 4590 | $line = join(', ', @after_words); | 
| 815 | 3511 |  |  |  |  | 5733 | push(@after_pieces, $line); | 
| 816 |  |  |  |  |  |  | } | 
| 817 | 913 |  |  |  |  | 1355 | $out = join("\n", @after_pieces); | 
| 818 |  |  |  |  |  |  |  | 
| 819 | 913 |  |  |  |  | 1373 | $out =~ s/^\s+//; # remove leading whitespace | 
| 820 | 913 |  |  |  |  | 1969 | $out =~ s/\s+$//; # remove end whitespace | 
| 821 |  |  |  |  |  |  |  | 
| 822 | 913 |  |  |  |  | 1130 | $out .= "\n";     # add final newline | 
| 823 | 913 |  |  |  |  | 2237 | return $out;      # we are done | 
| 824 |  |  |  |  |  |  | } | 
| 825 |  |  |  |  |  |  |  | 
| 826 |  |  |  |  |  |  | sub _render_template { | 
| 827 | 455 |  |  | 455 |  | 2017 | my $self       = shift; | 
| 828 | 455 |  |  |  |  | 484 | my $thtemplate = shift; | 
| 829 | 455 |  |  |  |  | 409 | my $components = shift; | 
| 830 |  |  |  |  |  |  |  | 
| 831 |  |  |  |  |  |  | # Mustache calls it context | 
| 832 | 455 |  |  |  |  | 6884 | my $context = clone($components); | 
| 833 | 455 | 50 |  |  |  | 893 | say STDERR 'context: ' . Dumper $context if ($debug); | 
| 834 | 455 |  |  |  |  | 1051 | my $output = $thtemplate->render($context); | 
| 835 |  |  |  |  |  |  |  | 
| 836 | 455 |  |  |  |  | 370415 | $output = $self->_evaluate_template_lamdas($output); | 
| 837 |  |  |  |  |  |  |  | 
| 838 | 455 | 50 |  |  |  | 839 | say STDERR "in _render before _clean: $output" if ($debug); | 
| 839 | 455 |  |  |  |  | 743 | $output = $self->_clean($output); | 
| 840 |  |  |  |  |  |  |  | 
| 841 |  |  |  |  |  |  | # is it empty? | 
| 842 |  |  |  |  |  |  | # if yes and there is only one component then just use that one | 
| 843 | 455 | 100 |  |  |  | 1195 | if ($output !~ m/\w/) { | 
| 844 | 2 |  |  |  |  | 7 | my @comps = sort keys %$components; | 
| 845 | 2 | 50 |  |  |  | 5 | if (scalar(@comps) == 1) { | 
| 846 | 0 |  |  |  |  | 0 | foreach my $k (@comps) { | 
| 847 | 0 |  |  |  |  | 0 | $output = $components->{$k}; | 
| 848 |  |  |  |  |  |  | } | 
| 849 |  |  |  |  |  |  | } # FIXME what if more than one? | 
| 850 |  |  |  |  |  |  | } | 
| 851 | 455 |  |  |  |  | 1436 | return $output; | 
| 852 |  |  |  |  |  |  | } | 
| 853 |  |  |  |  |  |  |  | 
| 854 |  |  |  |  |  |  | # Text::Hogan apparently caches lambdas when rendering templates. In the past | 
| 855 |  |  |  |  |  |  | # we needed our lambda 'first', example | 
| 856 |  |  |  |  |  |  | #   {{#first}} {{{city}}} || {{{town}}} {{/first}} | 
| 857 |  |  |  |  |  |  | # to evaluate the componentes. Whenever the lambda was called with different | 
| 858 |  |  |  |  |  |  | # component values it consumed memory. Now replace with a simpler implementation | 
| 859 |  |  |  |  |  |  | # | 
| 860 |  |  |  |  |  |  | sub _replace_template_lambdas { | 
| 861 | 455 |  |  | 455 |  | 991 | my $self          = shift; | 
| 862 | 455 |  |  |  |  | 592 | my $template_text = shift; | 
| 863 | 455 |  |  |  |  | 6076 | $template_text =~ s!\Q{{#first}}\E(.+?)\Q{{/first}}\E!FIRSTSTART${1}FIRSTEND!g; | 
| 864 | 455 |  |  |  |  | 1085 | return $template_text; | 
| 865 |  |  |  |  |  |  | } | 
| 866 |  |  |  |  |  |  |  | 
| 867 |  |  |  |  |  |  | # We only use a lambda named 'first' | 
| 868 |  |  |  |  |  |  | sub _evaluate_template_lamdas { | 
| 869 | 455 |  |  | 455 |  | 540 | my $self = shift; | 
| 870 | 455 |  |  |  |  | 486 | my $text = shift; | 
| 871 | 455 |  |  |  |  | 3643 | $text =~ s!FIRSTSTART\s*(.+?)\s*FIRSTEND!_select_first($1)!seg; | 
|  | 783 |  |  |  |  | 1227 |  | 
| 872 | 455 |  |  |  |  | 955 | return $text; | 
| 873 |  |  |  |  |  |  | } | 
| 874 |  |  |  |  |  |  |  | 
| 875 |  |  |  |  |  |  | # '|| val1 ||  || val3' => 'val1' | 
| 876 |  |  |  |  |  |  | sub _select_first { | 
| 877 | 783 |  |  | 783 |  | 1096 | my $text = shift; | 
| 878 | 783 |  |  |  |  | 3762 | my @a_parts = grep { length($_) } split(/\s*\|\|\s*/, $text); | 
|  | 2591 |  |  |  |  | 3377 |  | 
| 879 | 783 | 100 |  |  |  | 3795 | return scalar(@a_parts) ? $a_parts[0] : ''; | 
| 880 |  |  |  |  |  |  | } | 
| 881 |  |  |  |  |  |  |  | 
| 882 |  |  |  |  |  |  | my %small_district = ( | 
| 883 |  |  |  |  |  |  | 'br' => 1, | 
| 884 |  |  |  |  |  |  | 'cr' => 1, | 
| 885 |  |  |  |  |  |  | 'es' => 1, | 
| 886 |  |  |  |  |  |  | 'ni' => 1, | 
| 887 |  |  |  |  |  |  | 'py' => 1, | 
| 888 |  |  |  |  |  |  | 'ro' => 1, | 
| 889 |  |  |  |  |  |  | 'tg' => 1, | 
| 890 |  |  |  |  |  |  | 'tm' => 1, | 
| 891 |  |  |  |  |  |  | 'xk' => 1, | 
| 892 |  |  |  |  |  |  | ); | 
| 893 |  |  |  |  |  |  |  | 
| 894 |  |  |  |  |  |  | # correct the alias for "district" | 
| 895 |  |  |  |  |  |  | # in OSM some countries use district to mean "city_district" | 
| 896 |  |  |  |  |  |  | # others to mean "state_district" | 
| 897 |  |  |  |  |  |  | sub _set_district_alias { | 
| 898 | 450 |  |  | 450 |  | 456 | my $self = shift; | 
| 899 | 450 |  |  |  |  | 431 | my $cc = shift; | 
| 900 |  |  |  |  |  |  |  | 
| 901 |  |  |  |  |  |  | # this may get called repeatedly | 
| 902 |  |  |  |  |  |  | # no need to do the work again | 
| 903 | 450 | 50 |  |  |  | 764 | if (defined($cc)){ | 
| 904 | 450 |  |  |  |  | 493 | my $ucc = uc($cc); | 
| 905 | 450 | 100 |  |  |  | 981 | return if (defined($self->{set_district_alias}{$ucc})); | 
| 906 | 224 |  |  |  |  | 416 | $self->{set_district_alias}{$ucc} = 1; | 
| 907 |  |  |  |  |  |  | } | 
| 908 |  |  |  |  |  |  |  | 
| 909 | 224 |  |  |  |  | 217 | my $oldalias; | 
| 910 | 224 | 50 |  |  |  | 425 | if (defined($small_district{$cc})){ | 
| 911 | 0 |  |  |  |  | 0 | $self->{component2type}{district} = 'neighbourhood'; | 
| 912 | 0 |  |  |  |  | 0 | $oldalias = 'state_district'; | 
| 913 |  |  |  |  |  |  |  | 
| 914 |  |  |  |  |  |  | # add to the neighbourhood alias list | 
| 915 |  |  |  |  |  |  | # though of course we are just sticking it at the end | 
| 916 | 0 |  |  |  |  | 0 | push(@{$self->{component_aliases}{'neighbourhood'}}, 'district'); | 
|  | 0 |  |  |  |  | 0 |  | 
| 917 |  |  |  |  |  |  |  | 
| 918 |  |  |  |  |  |  | } else { | 
| 919 |  |  |  |  |  |  | # set 'district' to be type 'state_district' | 
| 920 | 224 |  |  |  |  | 362 | $self->{component2type}{district} = 'state_district'; | 
| 921 | 224 |  |  |  |  | 232 | $oldalias = 'neighbourhood'; | 
| 922 |  |  |  |  |  |  |  | 
| 923 |  |  |  |  |  |  | # add to the state_district alias list | 
| 924 | 224 |  |  |  |  | 236 | push(@{$self->{component_aliases}{'state_district'}}, 'district'); | 
|  | 224 |  |  |  |  | 534 |  | 
| 925 |  |  |  |  |  |  | } | 
| 926 |  |  |  |  |  |  |  | 
| 927 |  |  |  |  |  |  | # remove from the old alias list | 
| 928 | 224 |  |  |  |  | 255 | my @temp = grep { $_ ne 'district' } @{$self->{component_aliases}{$oldalias}}; | 
|  | 2862 |  |  |  |  | 3507 |  | 
|  | 224 |  |  |  |  | 476 |  | 
| 929 | 224 |  |  |  |  | 545 | $self->{component_aliases}{$oldalias} = \@temp; | 
| 930 | 224 |  |  |  |  | 371 | return; | 
| 931 |  |  |  |  |  |  | } | 
| 932 |  |  |  |  |  |  |  | 
| 933 |  |  |  |  |  |  |  | 
| 934 |  |  |  |  |  |  | # returns [] | 
| 935 |  |  |  |  |  |  | sub _find_unknown_components { | 
| 936 | 452 |  |  | 452 |  | 949 | my $self       = shift; | 
| 937 | 452 |  |  |  |  | 366 | my $rh_components = shift; | 
| 938 |  |  |  |  |  |  |  | 
| 939 | 452 |  |  |  |  | 368 | my %h_known   = map  { $_ => 1 } @{$self->{ordered_components}}; | 
|  | 29861 |  |  |  |  | 34624 |  | 
|  | 452 |  |  |  |  | 790 |  | 
| 940 | 452 |  |  |  |  | 3336 | my @a_unknown = grep { !exists($h_known{$_}) } sort keys %$rh_components; | 
|  | 4079 |  |  |  |  | 5089 |  | 
| 941 | 452 |  |  |  |  | 2241 | return \@a_unknown; | 
| 942 |  |  |  |  |  |  | } | 
| 943 |  |  |  |  |  |  |  | 
| 944 |  |  |  |  |  |  | 1; | 
| 945 |  |  |  |  |  |  |  | 
| 946 |  |  |  |  |  |  | __END__ | 
| 947 |  |  |  |  |  |  |  | 
| 948 |  |  |  |  |  |  | =pod | 
| 949 |  |  |  |  |  |  |  | 
| 950 |  |  |  |  |  |  | =encoding UTF-8 | 
| 951 |  |  |  |  |  |  |  | 
| 952 |  |  |  |  |  |  | =head1 NAME | 
| 953 |  |  |  |  |  |  |  | 
| 954 |  |  |  |  |  |  | Geo::Address::Formatter - take structured address data and format it according to the various global/country rules | 
| 955 |  |  |  |  |  |  |  | 
| 956 |  |  |  |  |  |  | =head1 VERSION | 
| 957 |  |  |  |  |  |  |  | 
| 958 |  |  |  |  |  |  | version 1.9982 | 
| 959 |  |  |  |  |  |  |  | 
| 960 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 961 |  |  |  |  |  |  |  | 
| 962 |  |  |  |  |  |  | # | 
| 963 |  |  |  |  |  |  | # get the templates (or use your own) | 
| 964 |  |  |  |  |  |  | # git clone git@github.com:OpenCageData/address-formatting.git | 
| 965 |  |  |  |  |  |  | # | 
| 966 |  |  |  |  |  |  | my $GAF = Geo::Address::Formatter->new( conf_path => '/path/to/templates' ); | 
| 967 |  |  |  |  |  |  |  | 
| 968 |  |  |  |  |  |  | my $components = { ... } | 
| 969 |  |  |  |  |  |  | my $text = $GAF->format_address($components, { country => 'FR' } ); | 
| 970 |  |  |  |  |  |  | my $rh_final_components = $GAF->final_components(); | 
| 971 |  |  |  |  |  |  | # | 
| 972 |  |  |  |  |  |  | # or if we want shorter output | 
| 973 |  |  |  |  |  |  | # | 
| 974 |  |  |  |  |  |  | my $short_text = $GAF->format_address($components, { country => 'FR', abbreviate => 1, }); | 
| 975 |  |  |  |  |  |  |  | 
| 976 |  |  |  |  |  |  | =head2 new | 
| 977 |  |  |  |  |  |  |  | 
| 978 |  |  |  |  |  |  | my $GAF = Geo::Address::Formatter->new( conf_path => '/path/to/templates' ); | 
| 979 |  |  |  |  |  |  |  | 
| 980 |  |  |  |  |  |  | Returns one instance. The I<conf_path> is required. | 
| 981 |  |  |  |  |  |  |  | 
| 982 |  |  |  |  |  |  | Optional parameters are: | 
| 983 |  |  |  |  |  |  |  | 
| 984 |  |  |  |  |  |  | I<debug>: prints tons of debugging info for use in development. | 
| 985 |  |  |  |  |  |  |  | 
| 986 |  |  |  |  |  |  | I<no_warnings>: turns off a few warnings if configuration is not optimal. | 
| 987 |  |  |  |  |  |  |  | 
| 988 |  |  |  |  |  |  | I<only_address>: formatted will only contain known components (will not include POI names). Note, can be overridden with optional param to format_address method. | 
| 989 |  |  |  |  |  |  |  | 
| 990 |  |  |  |  |  |  | =head2 final_components | 
| 991 |  |  |  |  |  |  |  | 
| 992 |  |  |  |  |  |  | my $rh_components = $GAF->final_components(); | 
| 993 |  |  |  |  |  |  |  | 
| 994 |  |  |  |  |  |  | returns a reference to a hash of the final components that are set at the | 
| 995 |  |  |  |  |  |  | completion of B<format_address>. Warns if called before they have been set | 
| 996 |  |  |  |  |  |  | (unless I<no_warnings> was set at object creation). | 
| 997 |  |  |  |  |  |  |  | 
| 998 |  |  |  |  |  |  | =head2 format_address | 
| 999 |  |  |  |  |  |  |  | 
| 1000 |  |  |  |  |  |  | my $text = $GAF->format_address(\%components, \%options ); | 
| 1001 |  |  |  |  |  |  |  | 
| 1002 |  |  |  |  |  |  | Given a structures address (hashref) and options (hashref) returns a | 
| 1003 |  |  |  |  |  |  | formatted address. | 
| 1004 |  |  |  |  |  |  |  | 
| 1005 |  |  |  |  |  |  | Possible options are: | 
| 1006 |  |  |  |  |  |  |  | 
| 1007 |  |  |  |  |  |  | 'abbreviate', if supplied common abbreviations are applied | 
| 1008 |  |  |  |  |  |  | to the resulting output. | 
| 1009 |  |  |  |  |  |  |  | 
| 1010 |  |  |  |  |  |  | 'address_template', a mustache format template to be used instead of the template | 
| 1011 |  |  |  |  |  |  | defined in the configuration | 
| 1012 |  |  |  |  |  |  |  | 
| 1013 |  |  |  |  |  |  | 'country', which should be an uppercase ISO 3166-1:alpha-2 code | 
| 1014 |  |  |  |  |  |  | e.g. 'GB' for Great Britain, 'DE' for Germany, etc. | 
| 1015 |  |  |  |  |  |  | If ommited we try to find the country in the address components. | 
| 1016 |  |  |  |  |  |  |  | 
| 1017 |  |  |  |  |  |  | 'only_address', same as only_address global option but set at formatting level | 
| 1018 |  |  |  |  |  |  |  | 
| 1019 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 1020 |  |  |  |  |  |  |  | 
| 1021 |  |  |  |  |  |  | You have a structured postal address (hash) and need to convert it into a | 
| 1022 |  |  |  |  |  |  | readable address based on the format of the address country. | 
| 1023 |  |  |  |  |  |  |  | 
| 1024 |  |  |  |  |  |  | For example, you have: | 
| 1025 |  |  |  |  |  |  |  | 
| 1026 |  |  |  |  |  |  | { | 
| 1027 |  |  |  |  |  |  | house_number => 12, | 
| 1028 |  |  |  |  |  |  | street => 'Avenue Road', | 
| 1029 |  |  |  |  |  |  | postcode => 45678, | 
| 1030 |  |  |  |  |  |  | city => 'Deville' | 
| 1031 |  |  |  |  |  |  | } | 
| 1032 |  |  |  |  |  |  |  | 
| 1033 |  |  |  |  |  |  | you need: | 
| 1034 |  |  |  |  |  |  |  | 
| 1035 |  |  |  |  |  |  | Great Britain: 12 Avenue Road, Deville 45678 | 
| 1036 |  |  |  |  |  |  | France: 12 Avenue Road, 45678 Deville | 
| 1037 |  |  |  |  |  |  | Germany: Avenue Road 12, 45678 Deville | 
| 1038 |  |  |  |  |  |  | Latvia: Avenue Road 12, Deville, 45678 | 
| 1039 |  |  |  |  |  |  |  | 
| 1040 |  |  |  |  |  |  | It gets more complicated with 200+ countries and territories and dozens more | 
| 1041 |  |  |  |  |  |  | address components to consider. | 
| 1042 |  |  |  |  |  |  |  | 
| 1043 |  |  |  |  |  |  | This module comes with a minimal configuration to run tests. Instead of | 
| 1044 |  |  |  |  |  |  | developing your own configuration please use (and contribute to) | 
| 1045 |  |  |  |  |  |  | those in https://github.com/OpenCageData/address-formatting | 
| 1046 |  |  |  |  |  |  | which includes test cases. | 
| 1047 |  |  |  |  |  |  |  | 
| 1048 |  |  |  |  |  |  | Together we can address the world! | 
| 1049 |  |  |  |  |  |  |  | 
| 1050 |  |  |  |  |  |  | =head1 AUTHOR | 
| 1051 |  |  |  |  |  |  |  | 
| 1052 |  |  |  |  |  |  | Ed Freyfogle | 
| 1053 |  |  |  |  |  |  |  | 
| 1054 |  |  |  |  |  |  | =head1 COPYRIGHT AND LICENSE | 
| 1055 |  |  |  |  |  |  |  | 
| 1056 |  |  |  |  |  |  | This software is copyright (c) 2023 by Opencage GmbH. | 
| 1057 |  |  |  |  |  |  |  | 
| 1058 |  |  |  |  |  |  | This is free software; you can redistribute it and/or modify it under | 
| 1059 |  |  |  |  |  |  | the same terms as the Perl 5 programming language system itself. | 
| 1060 |  |  |  |  |  |  |  | 
| 1061 |  |  |  |  |  |  | =cut |