|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Lingua::EN::AddressParse - extract components of a street address from free format text  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     use Lingua::EN::AddressParse;  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my %args =  | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     (  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       country     => 'US',  | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       auto_clean  => 1,  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       force_case  => 1,  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       abbreviate_subcountry => 0,  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       abbreviated_subcountry_only => 0,  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       force_post_code => 0  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     );  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $address = Lingua::EN::AddressParse->new(%args);  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $error = $address->parse("40 1/2 N OLD MASSACHUSETTS AVE APT 3B Washington Valley Washington 98100: HOLD MAIL");  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print $address->report;         | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         Country address format  'US'  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         Address type            'suburban'  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         Non matching part       'HOLD MAIL '  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         Error                   '1'  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         Error descriptions      'non matching section : HOLD MAIL '  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         Warning                 '1'  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         Warning description     ''  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         Case all                '40 1/2 N Old Massachusetts Ave Apt 3B Washington Valley WA 98100'  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         COMPONENTS              ''  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         base_street_name        'Old Massachusetts'  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         post_code               '98100'  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         property_identifier     '40 1/2'  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         street_direction_prefix 'N'  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         street_name             'N Old Massachusetts'  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         street_type             'Ave'  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         sub_property_identifier '3B'  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         sub_property_type       'Apt'  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         subcountry              'WASHINGTON'  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         suburb                  'Washington Valley'         | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     %address_components = $address->components;  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print $address_components{sub_property_type};       # APT  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print $address_components{sub_property_identifier}; # 3B  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print $address_components{property_identifier};     # 40 1/2  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     %address_properties = $address->properties;  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print $address_properties{type};            # suburban  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print $address_properties{non_matching};    # : HOLD MAIL  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $correct_casing = $address->case_all;  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This module takes as input a suburban, rural or postal address in free format  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 text such as,  | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     3080 28TH AVE N ST PETERSBURG, FL 33713-3810  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     12 1st Avenue N Suite # 2 Somewhere CA 12345 USA  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     C/O JOHN, KENNETH JR POA 744 WIND RIVER DR SYLVANIA, OH 43560-4317  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     9 Church Street, Abertillery, Mid Glamorgan NP13 1DA  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     27 Bury Street, Abingdon, Oxfordshire OX14 3QT  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     2A O'CONNELL ST KEW NSW 2123  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     12/3-5 AUBREY ST MOUNT VICTORIA VICTORIA 3133  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "OLD REGRET" WENTWORTH FALLS NSW 2782 AUSTRALIA  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     GPO Box K318, HAYMARKET, NSW 2000  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and attempts to parse it. If successful, the address is broken  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 down into it's components and useful functions can be performed such as :  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     converting upper or lower case values to title case (2A O'Connell St Kew NSW 2123)  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     extracting the addresses individual components      (2A,O'Connell,St,KEW,NSW,2123)  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     determining the type of format the address is in    ('suburban')  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If the address cannot be parsed you have the option of cleaning the address  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of bad characters, or extracting any portion that was parsed and the portion  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 that failed.  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This module can be used for analysing and improving the quality of  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 lists of residential and postal addresses.  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 By using a large combination of regular expressiosn with look ahead analysis, patterns  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 can be parsed that confuse many other parsers. Examples are  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Street names with several street types: Lane Cove Road  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Suburbs which include street types: Smith Road St Marys  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Suburbs that include state names: Fort Washington Washington  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DEFINITIONS  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The following terms are used by AddressParse to define the components that  | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 can make up an address.  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     Pre cursor : C/O MR A Smith...  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     Sub property identifier : Level 1A Unit 2, Apartment B, Lot 12, Suite # 12 ...  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     Property Identifier : 12/66A, 24-34, 2A, 23B/12C, 12/42-44, 2.5  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     Property name   : "Old Regret"  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     Post Box        : GP0 Box K123, LPO 2345, RMS 23 ...  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     Road Box        : RMB 24A, RMS 234 ...  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     Street Direction: North, SE, Sth. etc  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     Street name     : O'Hare, New South Head, The Causeway, Broadway  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     Street type     : Road, Rd., St, Lane, Highway, Crescent, Circuit ...  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     Suburb          : Dee Why, St. John's Wood ...  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     Sub country     : NSW, New South Wales, ACT, NY, New Jersey AZ ...  | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     Post (zip) code : 2062, 34532-1234, SG12A 9ET  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     Country         : Australia, UK, US or Canada  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The main  address formats  currently supported are as follows. (a ? means the component is optional):  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'suburban' : sub_property(?) property_identifier(?) street street_type suburb subcountry post_code(?)country(?)  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     OR for the USA  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'suburban' : property_identifier(?) street street_type sub_property(?) suburb subcountry post_code(?) country(?)  | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'rural'    : property_name suburb subcountry post_code(?) country(?)  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'post_box' : post_box suburb subcountry post_code(?) country(?)  | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'road_box' : road_box street street_type suburb subcountry post_code(?) country(?)  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'road_box' : road_box suburb subcountry post_code(?) country(?)  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Note that suburb and subcountry are not optional. The accuracy of the parser is  | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 improved by providing as much context as possible. Proding a suburb can ehlp to  | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 identify street names that would itherwise be ambigious.  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 For the case where you only have a street address, dummy (but still valid) values can be used  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 for suburb (such as 'Somewhere') and sub country (such as 'NY'). These dummy values will  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 be parsed  but can be ignored.  | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 All formats may contain a precursor  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Refer to the component grammar defined in the Lingua::EN::AddressParse::Grammar  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 module for a complete list of combinations.  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 METHODS  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 new  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The C method creates an instance of an address object and sets up  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the grammar used to parse addresses. This must be called before any of the  | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 following methods are invoked. Note that the object only needs to be  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 created once, and can be reused with new input data.  | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Various setup options may be defined in a hash that is passed as an  | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 optional argument to the C method.  | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my %args =  | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     (  | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       country     => 'US',  | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       auto_clean  => 1,  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       force_case  => 1,  | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       abbreviate_subcountry => 1,  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       abbreviated_subcountry_only => 1,  | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       force_post_code => 1  | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     );  | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $address = Lingua::EN::AddressParse->new(%args);  | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item country  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The country argument must be specified. It determines the possible list of  | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 valid sub countries (states, counties etc, defined in the Locale::SubCountry  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 module) and post code formats. Either the full name or abbreviation may be  | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 specified. The currently supported country names and codes are:  | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     AU or Australia  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     CA or Canada  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     GB or United Kingdom  | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     US or United States  | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 All forms of upper/lower case are acceptable in the country's spelling. If a  | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 country name is supplied that the module doesn't recognise, it will die.  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item force_case (optional)  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This option only applies to the C method, see below.  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item auto_clean (optional)  | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 When this option is set to a positive value, the input string is  | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'cleaned' to try and normalise bad patterns. The type of cleaning  | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 includes  | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     remove non alphanumeric characters  | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     remove full stops  | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     remove redundant white space  | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     add missing space separators  | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     expand abbreviations to more common forms  | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     remove bracketed annotations  | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     fix badly formed sub property identifiers  | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item abbreviate_subcountry (optional)  | 
| 
203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 When this option is set to a positive value, the sub country is forced to it's  | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 abbreviated form, so "New South Wales" becomes "NSW". If the sub country is  | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 already abbreviated then it's value is not altered.  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item abbreviated_subcountry_only (optional)  | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 When this option is set to a positive value, only the abbreviated form  | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of sub country is allowed, such as "NSW" and not "New South Wales". This  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 will make parsing quicker and ensure that addresses comply with postal  | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 standards that normally permit only abbreviated sub countries.  | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 It also avoids matching a sub_country name too early, as in the case of 'Port Washington New Jersey'  | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Normally, 'Washington would be consumed as the sub country, but by first converting  | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the address to 'Port Washington NJ' we avoid this problem  | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item force_post_code (optional)  | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 When this option is set to a positive value, the address must contain  | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a post code. If it does not then an error flag is raised. If this option  | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is set to 0 than a post code is optional.  | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 By default for this option is true.  | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 parse  | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $error = $address->parse("12/3-5 AUBREY ST VERMONT VIC 3133");  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The C method takes a single parameter of a text string containing a  | 
| 
235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 address. It attempts to parse the address and break it down into the components  | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 described below. If the address is parsed successfully, a 0 is returned,  | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 otherwise a 1.  | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Note that you can successfully parse all the components of an address and still  | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 have an error returned. This occurs when you have non matching data following  | 
| 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a valid address. To check if the data is unusable, you also need to use the  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C method to check the address type is 'unknown'  | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method is a prerequisite for all the following methods.  | 
| 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 components  | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     %address = $address->components($upper_case_all);  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $suburb = $address{suburb};  | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If the optional argument $upper_case_all is set to a postive value, all components  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 are converted to upper case.  | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The C method returns all the address components in a hash. The  | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 following keys are used for each component:  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     pre_cursor - such as 'C/O Mr A Smith'  | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     po_box_type - such as 'Private Boxes'  | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     post_box  | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     road_box  | 
| 
263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sub_property_type  | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sub_property_identifier  | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     property_identifier  | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     property_name  | 
| 
267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     level - such as 12th Floor  | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     building - such as Tower A  | 
| 
269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     street_direction_prefix (such as East, NW, North etc)  | 
| 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     base_street_name (the name with direction removed, such as "Main" in "East Main St")  | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     street_name (the full street name such as "East Main")  | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     street_type  | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     street_direction_suffix (US only, abbreviated only such as N, SE etc)  | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     suburb  | 
| 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     subcountry  | 
| 
276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     post_code  | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     country  | 
| 
278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If a component has no matching data for a given address, it's values will be  | 
| 
280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 set to the empty string.  | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Each component is converted to title case, meaning the first letter of each  | 
| 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 component is set to capitals and the remainder to lower case.  | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Proper name capitalisations such as MacNay and O'Brien are observed  | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The following components are not converted to title case:  | 
| 
288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     post_box  | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     road_box  | 
| 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     subcountry  | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     post_code  | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     country  | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     street_direction_suffix  | 
| 
295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If your input data is all upper case and you want to retian that format for parsed  | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 data, you will need to apply the 'uc' function to each component.  | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 case_all  | 
| 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $correct_casing = $address->case_all;  | 
| 
302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The C method does the same thing as the C method except  | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the entire address is returned as a title cased text string.  | 
| 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If the force_case option was set in the C method above, address case the  | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 entire input string, including any unmatched sections after a recognisable address  | 
| 
308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 that failed parsing. This option is useful when you know you have invalid data,  | 
| 
309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 but you still want to title case what you have.  | 
| 
310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 properties  | 
| 
312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The C method returns several properties of the address as a hash.  | 
| 
314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The  following keys are used for each property -  | 
| 
315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     type - either suburban ,rural,post_box,road_box,unknown  | 
| 
317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     non_matching - any trailing string not part the address  | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Additional properties can be accessed with the following  | 
| 
321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $address->{original_input}  | 
| 
323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $address->{input_string} - string after auto_clean option has been applied  | 
| 
324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $address->{country_code} - abbreviated Country address format (as defined in the C method)  | 
| 
325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $address->{error} - error flag, 0 = good, 1 = error  | 
| 
326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $address->{error_desc} - text to describe the type of parsing error  | 
| 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $address->{warning} - warning flag, 0 = good, 1 = warning  | 
| 
328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $address->{warning_desc} - text to to describe the type of parsing warning(s)  | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Warnings mean that the address has parsed but there may still be errors within it's components    | 
| 
331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 report  | 
| 
334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Create a formatted text report  | 
| 
336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     the input string  | 
| 
338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     the cleaned input string  | 
| 
339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     the country type  | 
| 
340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     the address type  | 
| 
341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     any non matching part of input string  | 
| 
342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if any parsing errors occurred  | 
| 
343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     error description  | 
| 
344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if any parsing warning occurred  | 
| 
345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     warning description  | 
| 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     the name and value of each defined component  | 
| 
348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns a string containing a multi line formatted text report  | 
| 
351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DEPENDENCIES  | 
| 
353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L, L, L  | 
| 
355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 BUGS  | 
| 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 LIMITATIONS  | 
| 
359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Streets such as 'The Esplanade' will return a street of 'The Esplanade' and a  | 
| 
361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 street type of null string.  | 
| 
362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The abbreviation 'St' can be interpreted as either street or Saint. This leads to  | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ambiguities such as '12 East St Thomas Lane'. This could be 'East Street', suburb of  | 
| 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'Thomas Lane' or 'East St Thomas Lane'. And the first pattern is the more common,  | 
| 
366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 that is what will match.  | 
| 
367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 For US addresses, an ambiguity arises between a street directional suffix and  | 
| 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a suburb directional prefix, such as '12 Main St S Springfield CA 92345'. Is it South  | 
| 
370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Main St, or South Springfield? The parser assumes that 'S' belongs to the street  | 
| 
371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 description.  | 
| 
372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The huge number of character combinations that can form a valid address makes  | 
| 
374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 it is impossible to correctly identify them all.  | 
| 
375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Valid addresses must contain:  | 
| 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     property address, suburb, subcountry (aka state) in that order.  | 
| 
379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This format is widely accepted in Australia and the US.  | 
| 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 UK addresses will often include suburb, town, city and county, formats that  | 
| 
383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 are very difficult to parse.  | 
| 
384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Property names must be enclosed in single or double quotes like "Old Regret"  | 
| 
386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Because of the large combination of possible addresses defined in the grammar,  | 
| 
388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the program is not very fast.  | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 REFERENCES  | 
| 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 "The Wordsworth Dictionary of Abbreviations & Acronyms" (1997)  | 
| 
394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Australian Standard AS4212-1994 "Geographic Information Systems -  | 
| 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Data Dictionary for transfer of street addressing information"  | 
| 
397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ISO 3166-2:1998, Codes for the representation of names of countries  | 
| 
399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and their subdivisions. Also released as AS/NZS 2632.2:1999  | 
| 
400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SEE ALSO  | 
| 
403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 AddressParse is designed to identify properties, which have a unique physical  | 
| 
405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 location. L will also parse addresses for the USA, and can handle  | 
| 
406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 locations defined by street intersections, such as: "Hollywood & Vine, Los Angeles, CA"  | 
| 
407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 "Mission Street at Valencia Street, San Francisco, CA"  | 
| 
408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     L  | 
| 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     L  | 
| 
412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     L  | 
| 
413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     L  | 
| 
414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 See L  | 
| 
416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 for a list of different addressing formats from around the world. And also  | 
| 
417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 REPOSITORY  | 
| 
420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 TO DO  | 
| 
424
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
425
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Define grammar for other languages. Hopefully, all that would be needed is  | 
| 
426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to specify a new module with its own grammar, and inherit all the existing  | 
| 
427
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 methods. I don't have the knowledge of the naming conventions for non-english  | 
| 
428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 languages.  | 
| 
429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 AUTHOR  | 
| 
431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 AddressParse was written by Kim Ryan   | 
| 
433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 COPYRIGHT AND LICENSE  | 
| 
435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Copyright (c) 2018 Kim Ryan. All rights reserved.  | 
| 
437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This library is free software; you can redistribute it and/or modify  | 
| 
439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 it under the same terms as Perl itself.  | 
| 
440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #------------------------------------------------------------------------------  | 
| 
445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Lingua::EN::AddressParse;  | 
| 
447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
448
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
491
 | 
 use strict;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
    | 
| 
449
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
4
 | 
 use Carp;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
70
 | 
    | 
| 
450
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
5
 | 
 use warnings;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
    | 
| 
451
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
417
 | 
 use Lingua::EN::AddressParse::Grammar;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
    | 
| 
452
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
570
 | 
 use Lingua::EN::NameParse;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37066
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
64
 | 
    | 
| 
453
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
 
 | 
12
 | 
 use Parse::RecDescent;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION = '1.27';  | 
| 
456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #------------------------------------------------------------------------------  | 
| 
458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Create a new instance of an address parsing object. This step is time  | 
| 
459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # consuming and should normally only be called once in your program.  | 
| 
460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new  | 
| 
462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
463
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
1045
 | 
     my $class = shift;  | 
| 
464
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
     my %args = @_;  | 
| 
465
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
467
 | 
4
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
40
 | 
     unless (defined $args{country} and $args{country} =~ /^(AU|Australia|GB|United Kingdom|US|United States|CA|Canada)$/ )  | 
| 
468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
469
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         croak "Cannot start parser. You must specify a value for the country in the options hash.\nValid options are AU,GB,US or CA.\n";  | 
| 
470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
472
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     my $address = {};  | 
| 
473
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     bless($address,$class);  | 
| 
474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
475
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # option defaults  | 
| 
476
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
     $address->{'force_post_code'} = 1;  | 
| 
477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Add error checking for invalid keys?  | 
| 
479
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     foreach my $curr_key (keys %args)  | 
| 
480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
481
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
         $address->{$curr_key} = $args{$curr_key};  | 
| 
482
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # create the grammar tree (this is country dependent)  | 
| 
485
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
     my $grammar = Lingua::EN::AddressParse::Grammar::_create($address);  | 
| 
486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
487
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
     $address->{parse} = Parse::RecDescent->new($grammar);  | 
| 
488
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
489
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3011169
 | 
     return ($address);  | 
| 
490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #------------------------------------------------------------------------------  | 
| 
492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub parse  | 
| 
493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
494
 | 
15
 | 
 
 | 
 
 | 
  
15
  
 | 
  
1
  
 | 
6906
 | 
     my $address = shift;  | 
| 
495
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
39
 | 
     my ($input_string) = @_;  | 
| 
496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Save original data so we can check effect of auto cleaning  | 
| 
498
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
     $address->{original_input} = $input_string;  | 
| 
499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Convert to all upper case. This will allow for faster regexp matching in  | 
| 
501
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # the grammar tree  | 
| 
502
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
     $address->{input_string} = uc($input_string);  | 
| 
503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
504
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
     chomp($address->{input_string});  | 
| 
505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
506
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
     my $pre_cursor;  | 
| 
507
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
68
 | 
     ($pre_cursor,$address->{input_string}) = _extract_precursor($address->{input_string});  | 
| 
508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
509
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Replace commas (which can be used to chunk sections of addresses) with spaces  | 
| 
510
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
     $address->{input_string} =~ s/,/ /g;  | 
| 
511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
512
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
69
 | 
     if ( $address->{auto_clean} )  | 
| 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
514
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
         $address->{input_string} = _clean($address);  | 
| 
515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }      | 
| 
516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
517
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
     my $po_box_type;  | 
| 
518
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
55
 | 
     ($po_box_type,$address->{input_string}) = _extract_po_box_type($address->{input_string});  | 
| 
519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
520
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
     my $level;  | 
| 
521
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
     ($level,$address->{input_string}) = _extract_level($address->{input_string});  | 
| 
522
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
523
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
     my $building;  | 
| 
524
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
     ($building,$address->{input_string}) = _extract_building($address->{input_string});      | 
| 
525
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
527
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Normalise sub property ID, 4/22-24 => UNIT 4 22-24, 4 12 => Unit 4 12  | 
| 
529
 | 
15
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
97
 | 
     if ($address->{country_code} ne 'US' and $address->{input_string} =~ /^(\d{1,4}[A-Z]{0,2})[\/| ](\d+[ \w-].*)$/ )  | 
| 
530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
531
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
         $address->{input_string} = "UNIT $1 $2";       | 
| 
532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }      | 
| 
533
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
534
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # We need to add a trailing space to the input string. This is because the grammar  | 
| 
535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # tree expects a terminator (the space) fro every production, optionally followed  | 
| 
536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # by other productions or any final non matching text.  | 
| 
537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # This space will be removed in the _assemble function  | 
| 
538
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
     $address->{input_string} .= ' ';  | 
| 
539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
540
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
45
 | 
     $address = _assemble($address,$pre_cursor,$po_box_type,$level,$building);  | 
| 
541
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
     _validate($address);  | 
| 
542
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
544
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
50
 | 
     return($address,$address->{error});  | 
| 
545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #------------------------------------------------------------------------------  | 
| 
548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Apply correct capitalisation to each component of an address  | 
| 
549
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub components  | 
| 
551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
552
 | 
17
 | 
 
 | 
 
 | 
  
17
  
 | 
  
1
  
 | 
84
 | 
     my $address = shift;  | 
| 
553
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
     my ($uc_all) = @_;  | 
| 
554
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
555
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
     my %orig_components = %{ $address->{components} };  | 
| 
 
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
158
 | 
    | 
| 
556
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
557
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
     my (%cased_components);  | 
| 
558
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
54
 | 
     foreach my $curr_key ( keys %orig_components )  | 
| 
559
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
560
 | 
323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
386
 | 
         my ($cased_value, $curr_value);  | 
| 
561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           | 
| 
562
 | 
323
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
452
 | 
         if ( $orig_components{$curr_key})  | 
| 
563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
564
 | 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
170
 | 
             $curr_value = $orig_components{$curr_key};  | 
| 
565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else  | 
| 
567
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
568
 | 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
259
 | 
             $curr_value = '';  | 
| 
569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   | 
| 
571
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           | 
| 
572
 | 
323
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
474
 | 
         if ($uc_all)  | 
| 
573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
574
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $cased_components{$curr_key} = uc($curr_value);  | 
| 
575
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             next;              | 
| 
576
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           | 
| 
578
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
579
 | 
323
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
800
 | 
         if ( $curr_key =~ /^(base_street_name|street_name|street_type|suburb|property_name|sub_property|pre_cursor|po_box_type|level|building)/ )  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
581
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
582
 | 
187
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
445
 | 
             if ( $curr_key eq 'street_name' and$curr_value =~ /^US HIGHWAY (.*)/ )  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
583
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             {  | 
| 
584
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                 $cased_value = "US Highway $1";  | 
| 
585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
586
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif  ( $curr_key eq 'sub_property_identifier' )  | 
| 
587
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             {  | 
| 
588
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # UNIT, APT ... 12D etc  | 
| 
589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
590
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
                 my @words = split(/ /,$curr_value);  | 
| 
591
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
                 my @cased_words;  | 
| 
592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 my $cased_string;  | 
| 
593
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
                 foreach my $word (@words)  | 
| 
594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 {  | 
| 
595
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
                     my $cased_word;  | 
| 
596
 | 
5
 | 
  
 50
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
39
 | 
                     if ( $word =~ /^\d{1,3}(ST|ND|RD|TH)$/)  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
597
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     {  | 
| 
598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         # ordinal component, as in  3rd Floor  | 
| 
599
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         $cased_word = lc($word);  | 
| 
600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     elsif ( length($word) > 1 and $word !~ /\d/ )  | 
| 
602
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     {  | 
| 
603
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         # only need to title case words such as UNIT  | 
| 
604
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         $cased_word = Lingua::EN::NameParse::case_surname($word);  | 
| 
605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     else  | 
| 
607
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     {  | 
| 
608
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
                         $cased_word = $word;  | 
| 
609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
610
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
                     push(@cased_words,$cased_word);  | 
| 
611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
612
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
613
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
                 $cased_value = join(' ',@cased_words);  | 
| 
614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else  | 
| 
616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             {  | 
| 
617
 | 
169
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
241
 | 
                 if ($curr_value)  | 
| 
618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 {  | 
| 
619
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # Surnames can be used for street's or suburbs so this method  | 
| 
620
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # will give correct capitalisation for most cases  | 
| 
621
 | 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
147
 | 
                     $cased_value = Lingua::EN::NameParse::case_surname($curr_value);  | 
| 
622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
623
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else  | 
| 
624
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 {  | 
| 
625
 | 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
127
 | 
                     $cased_value = '';  | 
| 
626
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
627
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
628
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
629
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # retain street_direction,sub country and countries capitalisation, usually uppercase  | 
| 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ($curr_key =~ /street_direction/)  | 
| 
631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
632
 | 
34
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
154
 | 
             if (length($curr_value) == 1 or length($curr_value) == 2)  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
633
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             {  | 
| 
634
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # N, SE etc is capitalised  | 
| 
635
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
                 $cased_value =$curr_value;  | 
| 
636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
637
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif (length($curr_value) > 2)  | 
| 
638
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             {  | 
| 
639
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $cased_value = Lingua::EN::NameParse::case_surname($curr_value);  | 
| 
640
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else  | 
| 
642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             {  | 
| 
643
 | 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
                 $cased_value = '';  | 
| 
644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
645
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # retain sub country and countries capitalisation, as usually uppercase  | 
| 
647
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else  | 
| 
648
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
649
 | 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
143
 | 
             $cased_value = uc($curr_value);  | 
| 
650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
651
 | 
323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2348
 | 
         $cased_components{$curr_key} = $cased_value;  | 
| 
652
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
653
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
299
 | 
     return(%cased_components);  | 
| 
654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
655
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #------------------------------------------------------------------------------  | 
| 
656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Apply correct capitalisation to an entire address  | 
| 
657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
658
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub case_all  | 
| 
659
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
660
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my $address = shift;  | 
| 
661
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
662
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my @cased_address;  | 
| 
663
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
664
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     unless ( $address->{properties}{type} eq 'unknown' )  | 
| 
665
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
666
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
667
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Hash of of lists, indicating the order that address components are assembled in.  | 
| 
668
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Each list element is itself the name of the key value in an address object.  | 
| 
669
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
670
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         my %component_order=  | 
| 
671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         (  | 
| 
672
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            'rural'   => [ qw/pre_cursor property_name suburb subcountry post_code country/],  | 
| 
673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            'post_box'=> [ qw/pre_cursor post_box suburb po_box_type subcountry post_code country/ ],  | 
| 
674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            'road_box'=> [ qw/pre_cursor road_box street_name street_type suburb subcountry post_code country/ ]  | 
| 
675
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
676
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         );  | 
| 
677
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         if ( $address->{country} eq 'US' )  | 
| 
678
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
679
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
            $component_order{'suburban'} = [ qw/pre_cursor property_identifier street_name  street_type street_direction_suffix  building level sub_property_type sub_property_identifier suburb subcountry post_code country/];  | 
| 
680
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
681
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else  | 
| 
682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
683
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
            $component_order{'suburban'} = [ qw/pre_cursor building level sub_property_type sub_property_identifier property_identifier street_name  street_type suburb subcountry post_code country/ ];  | 
| 
684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
685
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
686
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         my %component_vals = $address->components;  | 
| 
687
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         my @order = @{ $component_order{$address->{properties}{type} } };  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
689
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         foreach my $component ( @order )  | 
| 
690
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
691
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # As some components such as property name are optional, they will appear  | 
| 
692
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # in the order array but may or may not have have a value, so check  | 
| 
693
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # for undefined values  | 
| 
694
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             if ( $component_vals{$component} )  | 
| 
695
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             {  | 
| 
696
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 push(@cased_address,$component_vals{$component});  | 
| 
697
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
698
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
699
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
701
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
     if ( $address->{error} and $address->{force_case} )  | 
| 
702
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Despite errors, try to name case non-matching section. As the format  | 
| 
704
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # of this section is unknown, surname case will provide the best  | 
| 
705
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # approximation  | 
| 
706
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         push(@cased_address,&Lingua::EN::NameParse::case_surname($address->{properties}{non_matching}));  | 
| 
707
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
709
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return(join(' ',@cased_address));  | 
| 
710
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #------------------------------------------------------------------------------  | 
| 
712
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub properties  | 
| 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
714
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
11
 | 
     my $address = shift;  | 
| 
715
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     return(%{ $address->{properties} });  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
717
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #------------------------------------------------------------------------------  | 
| 
719
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Create a text report to standard output listing  | 
| 
720
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # - the input string,  | 
| 
721
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # - the name of each defined component  | 
| 
722
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # - any non matching component  | 
| 
723
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
724
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub report  | 
| 
725
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
726
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my $address = shift;  | 
| 
727
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
728
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $report = '';  | 
| 
729
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
730
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     _fmt_report_line(\$report,"Original Input",$address->{original_input});  | 
| 
731
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     _fmt_report_line(\$report,"Cleaned Input",$address->{input_string});  | 
| 
732
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     _fmt_report_line(\$report,"Country address format",$address->{country_code});  | 
| 
733
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
734
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my %props = $address->properties;  | 
| 
735
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     if ( $props{type} )  | 
| 
736
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
737
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         _fmt_report_line(\$report,"Address type",$props{type});  | 
| 
738
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
739
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
740
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
741
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     _fmt_report_line(\$report,"Non matching part",$props{non_matching});  | 
| 
742
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     _fmt_report_line(\$report,"Error",$address->{error});  | 
| 
743
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     _fmt_report_line(\$report,"Error descriptions",$address->{error_desc});  | 
| 
744
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     _fmt_report_line(\$report,"Warning",$address->{error});  | 
| 
745
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     _fmt_report_line(\$report,"Warning description",$address->{warning_desc});      | 
| 
746
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     _fmt_report_line(\$report,"Case all",$address->case_all);  | 
| 
747
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
748
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
749
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     _fmt_report_line(\$report,"COMPONENTS",'');  | 
| 
750
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my %comps = $address->components;  | 
| 
751
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     foreach my $comp ( sort keys %comps)  | 
| 
752
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
753
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         if (defined($comps{$comp})  )  | 
| 
754
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
755
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             _fmt_report_line(\$report,$comp,$comps{$comp});  | 
| 
756
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
757
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
758
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
759
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return($report);  | 
| 
760
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
761
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
762
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
763
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #------------------------------------------------------------------------------  | 
| 
764
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
765
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # PRIVATE METHODS  | 
| 
766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
767
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #------------------------------------------------------------------------------  | 
| 
768
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
769
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _assemble  | 
| 
770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
771
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
772
 | 
15
 | 
 
 | 
 
 | 
  
15
  
 | 
 
 | 
30
 | 
     my $address = shift;  | 
| 
773
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
     my ($pre_cursor,$po_box_type,$level,$building) = @_;  | 
| 
774
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
775
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Parse the address according to the rules defined in the AddressParse::Grammar module,  | 
| 
776
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # $::RD_TRACE  = 1;  # for debugging RecDescent output  | 
| 
777
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Use Parse::RecDescent to do the parsing. 'full_address' is a label for the complete grammar tree  | 
| 
778
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
118
 | 
     my $parsed_address = $address->{parse}->full_address($address->{input_string});  | 
| 
779
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
780
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Place components into a separate hash, so they can be easily returned to the user to inspect and modify  | 
| 
781
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
228790
 | 
     $address->{components} = ();  | 
| 
782
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
783
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
40
 | 
     if ($pre_cursor)  | 
| 
784
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
785
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
         $address->{components}{'pre_cursor'} = $pre_cursor;  | 
| 
786
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
787
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else  | 
| 
788
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
789
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
49
 | 
         $address->{components}{'pre_cursor'} = '';  | 
| 
790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
791
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
792
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
39
 | 
     if ($level)  | 
| 
793
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
794
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
         $address->{components}{'level'} = $level;  | 
| 
795
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
796
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else  | 
| 
797
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
798
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
         $address->{components}{'level'} = '';  | 
| 
799
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
800
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
801
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
29
 | 
     if ($building)  | 
| 
802
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
803
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
         $address->{components}{'building'} = $building;  | 
| 
804
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
805
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else  | 
| 
806
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
807
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
         $address->{components}{'building'} = '';  | 
| 
808
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }         | 
| 
809
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           | 
| 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
811
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
28
 | 
     if ($po_box_type)  | 
| 
812
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
813
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
         $address->{components}{'po_box_type'} = $po_box_type;  | 
| 
814
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
815
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else  | 
| 
816
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
817
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
         $address->{components}{'po_box_type'} = '';  | 
| 
818
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
819
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
820
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
821
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
     $address->{components}{post_box} = '';  | 
| 
822
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
41
 | 
     if ( $parsed_address->{post_box} )  | 
| 
823
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
824
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
         $address->{components}{post_box} = $parsed_address->{post_box};  | 
| 
825
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
826
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
827
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
     $address->{components}{road_box} = '';  | 
| 
828
 | 
15
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
43
 | 
     if ( $parsed_address->{road_box} )  | 
| 
829
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
830
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $address->{components}{road_box} = $parsed_address->{road_box};  | 
| 
831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
832
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
833
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
     $address->{components}{property_name} = '';  | 
| 
834
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
34
 | 
     if ( $parsed_address->{property_name} )  | 
| 
835
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
836
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
         $address->{components}{property_name} = $parsed_address->{property_name};  | 
| 
837
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
838
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
839
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
     $address->{components}{sub_property_identifier} = '';  | 
| 
840
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
     $address->{components}{sub_property_type} = '';  | 
| 
841
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
842
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
38
 | 
     if ( $parsed_address->{sub_property} )  | 
| 
843
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
844
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
26
 | 
         if ($parsed_address->{sub_property} =~ /^(#|[A-Z]{1,}) (.*)$/ )  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
845
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
846
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # Such as Unit 24, # 4A etc  | 
| 
847
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
             $address->{components}{sub_property_type} = $1;  | 
| 
848
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
             $address->{components}{sub_property_identifier} = $2;  | 
| 
849
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
850
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ($parsed_address->{sub_property} =~ /^(\d\w\w) (.*)$/ )  | 
| 
851
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
852
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # Such as 1st Floor  | 
| 
853
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $address->{components}{sub_property_type} = $2;  | 
| 
854
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $address->{components}{sub_property_identifier} = $1;  | 
| 
855
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }          | 
| 
856
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
857
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
858
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
     $address->{components}{property_identifier} = '';  | 
| 
859
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
39
 | 
     if ( $parsed_address->{property_identifier} )  | 
| 
860
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
861
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
         $address->{components}{property_identifier} = $parsed_address->{property_identifier};  | 
| 
862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
863
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
864
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
56
 | 
     ($address->{components}{street_direction_prefix},$address->{components}{base_street_name}) = _get_street_direction($parsed_address->{street_name});  | 
| 
865
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
866
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
     $address->{components}{street_name} = '';  | 
| 
867
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
     $address->{components}{street_type} = '';  | 
| 
868
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
48
 | 
     if ( $parsed_address->{street_name} )  | 
| 
869
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
870
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Streets such as 'The Corso' will parse as street_name = 'The' and street_type = 'Corso', so seperate out  | 
| 
871
 | 
13
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
38
 | 
         if ( $parsed_address->{street_name} eq 'THE ' )  | 
| 
872
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
873
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
              $address->{components}{street_name} = 'THE ' . $parsed_address->{street_type};  | 
| 
874
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
875
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else  | 
| 
876
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
877
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
             $address->{components}{street_name} = $parsed_address->{street_name};  | 
| 
878
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
             $address->{components}{street_type} = $parsed_address->{street_type};  | 
| 
879
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
880
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
881
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
882
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
     $address->{components}{street_direction_suffix} = '';  | 
| 
883
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
35
 | 
     if ( $parsed_address->{street_direction_suffix} )  | 
| 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
885
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
         $address->{components}{street_direction_suffix} =  $parsed_address->{street_direction_suffix};  | 
| 
886
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
887
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
888
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
889
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
     $address->{components}{suburb} = '';  | 
| 
890
 | 
15
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
32
 | 
     if ( $parsed_address->{suburb} )  | 
| 
891
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
892
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
         $address->{components}{suburb} =  $parsed_address->{suburb};  | 
| 
893
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
894
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
895
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
     $address->{components}{subcountry} = '';  | 
| 
896
 | 
15
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
35
 | 
     if ( $parsed_address->{subcountry} )  | 
| 
897
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
898
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
         my $sub_country = $parsed_address->{subcountry};  | 
| 
899
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
900
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Force sub country to abbreviated form, South Australia becomes SA, Michigan become MI etc  | 
| 
901
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
29
 | 
         if ($address->{abbreviate_subcountry})  | 
| 
902
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
903
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
             my $country = Locale::SubCountry->new($address->{country});  | 
| 
904
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
148
 | 
             my $code = $country->code($sub_country);  | 
| 
905
 | 
7
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
578
 | 
             if ( $code ne 'unknown' )  | 
| 
906
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             {  | 
| 
907
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                 $address->{components}{subcountry} = $code;  | 
| 
908
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
909
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # sub country already abbreviated  | 
| 
910
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else  | 
| 
911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             {  | 
| 
912
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
                 $address->{components}{subcountry} = $sub_country;  | 
| 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
914
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
915
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else  | 
| 
916
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
917
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
            $address->{components}{subcountry} = $sub_country;  | 
| 
918
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
919
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
920
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
921
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
     $address->{components}{post_code} = '';  | 
| 
922
 | 
15
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
36
 | 
     if ( $parsed_address->{post_code} )  | 
| 
923
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
924
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
         $address->{components}{post_code} = $parsed_address->{post_code};  | 
| 
925
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
926
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
927
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
     $address->{components}{country} = '';  | 
| 
928
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
62
 | 
     if ( $parsed_address->{country} )  | 
| 
929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
930
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
         $address->{components}{country} = $parsed_address->{country};  | 
| 
931
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
932
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
933
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
62
 | 
     $address->{properties} = ();  | 
| 
934
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
935
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
     $address->{properties}{non_matching} = '';  | 
| 
936
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
33
 | 
     if ( $parsed_address->{non_matching} )  | 
| 
937
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
938
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
         $address->{properties}{non_matching} = $parsed_address->{non_matching};  | 
| 
939
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
940
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
     $address->{properties}{type} = $parsed_address->{type};  | 
| 
941
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
942
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
     _trim_trailing_space($address);  | 
| 
943
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
944
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
60
 | 
     return($address);  | 
| 
945
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
946
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
947
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #-------------------------------------------------------------------------------  | 
| 
948
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
949
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _get_street_direction  | 
| 
950
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
951
 | 
15
 | 
 
 | 
 
 | 
  
15
  
 | 
 
 | 
29
 | 
     my ($street_name) = @_;  | 
| 
952
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
953
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
     my $street_direction;  | 
| 
954
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $base_street_name;  | 
| 
955
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
956
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
33
 | 
     unless ($street_name)  | 
| 
957
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
958
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
         return;  | 
| 
959
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
960
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
961
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
66
 | 
     my @words = split(/\s/,$street_name);  | 
| 
962
 | 
13
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
37
 | 
     if (@words > 1)  | 
| 
963
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
964
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
15
 | 
         if  ( $words[0] =~ /^(N|NE|NW|E|S|SE|SW|W|NORTH|EAST|SOUTH|WEST|NTH|STH)$/ )  | 
| 
965
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
966
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $street_direction = $1;  | 
| 
967
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             shift(@words);  | 
| 
968
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $base_street_name = join(' ',@words);  | 
| 
969
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
970
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else  | 
| 
971
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
972
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
             $street_direction = '';  | 
| 
973
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
             $base_street_name = $street_name;  | 
| 
974
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               | 
| 
975
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
976
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
977
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else  | 
| 
978
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
979
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
         $base_street_name = $street_name;  | 
| 
980
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
981
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52
 | 
     return($street_direction,$base_street_name);  | 
| 
982
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
983
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
984
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
985
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #------------------------------------------------------------------------------  | 
| 
986
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Check for several different types of syntax errors  | 
| 
987
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
988
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _validate  | 
| 
989
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
990
 | 
15
 | 
 
 | 
 
 | 
  
15
  
 | 
 
 | 
25
 | 
     my $address = shift;  | 
| 
991
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
     $address->{error} = 0;  | 
| 
992
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
     $address->{error_desc} = '';  | 
| 
993
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
     $address->{warning} = 0;  | 
| 
994
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
     $address->{warning_desc} = '';  | 
| 
995
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
996
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
39
 | 
     if ( $address->{properties}{non_matching} )  | 
| 
997
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
998
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
         $address->{error} = 1;  | 
| 
999
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
         $address->{error_desc} = 'non matching section : ' . $address->{properties}{non_matching};  | 
| 
1000
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1001
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else  | 
| 
1002
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1003
 | 
14
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
35
 | 
         if ( $address->{properties}{type} eq 'unknown' )  | 
| 
1004
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
1005
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $address->{error} = 1;  | 
| 
1006
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $address->{error_desc} .= 'unknown address format';  | 
| 
1007
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1008
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else  | 
| 
1009
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
1010
 | 
14
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
117
 | 
             if ($address->{force_post_code} and not $address->{components}{post_code})  | 
| 
1011
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             {  | 
| 
1012
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $address->{error} = 1;  | 
| 
1013
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $address->{error_desc} .= ':no post code';  | 
| 
1014
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1015
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1016
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # illegal characters found, note a '#' can appear as an abbreviation for number in USA addresses  | 
| 
1017
 | 
14
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
51
 | 
             if ( $address->{input_string} =~ /[^"A-Z0-9'\-\.,\/ ]/ )  | 
| 
1018
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             {  | 
| 
1019
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # Note, if auto_clean is on, illegal characters will have been removed  | 
| 
1020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # for second parsing and no error flag or message reported  | 
| 
1021
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $address->{error} = 1;  | 
| 
1022
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $address->{error_desc} .= ':illegal chars';  | 
| 
1023
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1024
 | 
14
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
40
 | 
             if ( $address->{properties}{type} eq 'suburban' )  | 
| 
1025
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             {  | 
| 
1026
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
                 my $street = $address->{components}{street_name};  | 
| 
1027
 | 
12
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
37
 | 
                 if ($street !~ /\d/ )  | 
| 
1028
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 {  | 
| 
1029
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # Not an ordinal or single letter street type  | 
| 
1030
 | 
11
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
32
 | 
                     if ( _check_vowel($address->{components}{base_street_name}) )  | 
| 
1031
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     {  | 
| 
1032
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         # street name must have a vowel sound,  | 
| 
1033
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                         $address->{warning} = 1;  | 
| 
1034
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
                         $address->{warning_desc} .= ";no vowel sound in street word : $address->{components}{base_street_name}";  | 
| 
1035
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
1036
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1038
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1039
 | 
14
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
32
 | 
             if ( _check_vowel($address->{components}{suburb}) )  | 
| 
1040
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             {  | 
| 
1041
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $address->{warning} = 1;  | 
| 
1042
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $address->{warning_desc} .= ";no vowel sound in suburb word : $address->{components}{suburb}";  | 
| 
1043
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1044
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1045
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1046
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1047
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #-------------------------------------------------------------------------------  | 
| 
1048
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Purge the input string of illegal or redundant characters.  | 
| 
1049
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Correct malformed patterns  | 
| 
1050
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1051
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _clean  | 
| 
1052
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1053
 | 
12
 | 
 
 | 
 
 | 
  
12
  
 | 
 
 | 
20
 | 
     my $address = shift;  | 
| 
1054
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1055
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
     my ($input) = $address->{input_string};  | 
| 
1056
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1057
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Remove annotations enclosed in brackets, such as 1 Smith St (Cnr Brown St)  | 
| 
1058
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
     $input =~ s|\(.*\)||;  | 
| 
1059
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      | 
| 
1060
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    # Normalise half house numbers, such as 12.5 to 12 1/2. This is needed now before full stops are stripped out           | 
| 
1061
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
     $input =~ s|^(\d{1,4})\.5 |$1 1/2 |;          | 
| 
1062
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1063
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # strip full stops, remove illegal characters  | 
| 
1064
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # & can be part of property name  | 
| 
1065
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # hash (#) may denote number for USA address  | 
| 
1066
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # quotes can occur as property name delimiters  | 
| 
1067
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1068
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
     $input =~ s|[^A-Za-z0-9/'" -]||go;  | 
| 
1069
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1070
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # remove repeating, leading and trailing spaces  | 
| 
1071
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     $input =~ s|  +| |go ;  | 
| 
1072
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
     $input =~ s|^ ||;  | 
| 
1073
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
     $input =~ s| $||;  | 
| 
1074
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1075
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1076
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Expand abbreviations that are too short  | 
| 
1077
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1078
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     $input =~ s/LAKE ST (GEORGE|CLAIR)/LAKE SAINT $1/; # otherwise St gets consumed to early as 'Street'  | 
| 
1079
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     $input =~ s| CSEWY | CAUSEWAY |;  | 
| 
1080
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1081
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Standardise abbreviations  | 
| 
1082
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1083
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
     $input =~ s|STR |ST |;  | 
| 
1084
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
     $input =~ s|TERR |TERRACE |;  | 
| 
1085
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
1086
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     $input =~ s|^FCTR?Y |FACTORY |;  | 
| 
1087
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
     $input =~ s|^FACT?R?Y? |FACTORY |;  | 
| 
1088
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1089
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
     $input =~ s|LVL |LEVEL |; # sub property identifiers  | 
| 
1090
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
     $input =~ s|^UN? |UNIT |;  | 
| 
1091
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
     $input =~ s|^U(\d+)|UNIT $1|;  | 
| 
1092
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1093
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Fix badly formed number dividers such as home unit format of 14/ 12 becomes 14/12, 2- 7A becomes 2-7A  | 
| 
1094
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
     $input =~ s|/ |/|;  | 
| 
1095
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
     $input =~ s| /|/|;  | 
| 
1096
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
     $input =~ s|- |-|;  | 
| 
1097
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
     $input =~ s| -|-|;      | 
| 
1098
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1099
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Remove redundant spaces in property identifiers,  21 B Smith St becomes 21B Smith St  | 
| 
1100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1101
 | 
12
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
44
 | 
     if ( $input !~ /^\d+ [A-Z] (ST|AVE)/ )  | 
| 
1102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Don't remove space before single letter streets such as 21 B Street  | 
| 
1104
 | 
12
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
37
 | 
         if ( $address->{country_code} eq 'US' )  | 
| 
1105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
1106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # Note cannot use N,E,S,W as they can be street direction prefix, as in 1 E MAIN STREET  | 
| 
1107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # Assume that the direction prefix is the more likely case  | 
| 
1108
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $input =~ s|^(\d+) ([A-DF-MO-RT-VX-Z] )|$1$2|;  | 
| 
1109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else  | 
| 
1111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
1112
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
             $input =~ s|^(\d+) ([A-Z] )|$1$2|;  | 
| 
1113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Normalise sub property identifiers  | 
| 
1117
 | 
12
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
30
 | 
     if ( $address->{country_code} eq 'US' )  | 
| 
1118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Fix US sub property identifiers that appear after street name and type  | 
| 
1120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # add space between # or 'Apt' and the number so #2 becomes '# 2'  | 
| 
1121
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $input =~ s| #(\d)| # $1|;  | 
| 
1122
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $input =~ s| #([A-Z])| # $1|;  | 
| 
1123
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $input =~ s| (APT)(\d)| $1 $2|i;  | 
| 
1124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           | 
| 
1125
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $input =~ s| (APT)-(\w)| $1 $2|i;  | 
| 
1126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # remove redundant space so # 34 B becomes # 34B  | 
| 
1128
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $input =~ s| # (\d+) (\w) | # $1$2 |;  | 
| 
1129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # remove redundant '#'  | 
| 
1131
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $input =~ s/ (APT|SUITE|UNIT) #/ $1 /;  | 
| 
1132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # still to test  | 
| 
1133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else  | 
| 
1135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Add a space to separate sub property type from number, UNIT2 becomes UNIT 2  | 
| 
1137
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
         $input =~ s/^(UNIT|LOT|APT|SHOP)(\d)/$1 $2/;  | 
| 
1138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Remove redundant slash or dash  | 
| 
1141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Unit 1B/22, becomes Unit 1B 22, Flat 2-12 becomes Flat 2 12      | 
| 
1142
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
     $input =~ s/^([A-Z]{2,}) (\d+[A-Z]?)[\/-]/$1 $2 /;  | 
| 
1143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Unit J1/ 39 becomes  Unit J1 39  | 
| 
1144
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
     $input =~ s/^([A-Z]{2,}) ([A-Z]\d{0,3})[\/-]/$1 $2 /;  | 
| 
1145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # remove dash that does not from a sequence, such as D-5 or 22-A  | 
| 
1148
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
     $input =~ s|([A-Z])-(\d)|$1$2|;  | 
| 
1149
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
     $input =~ s|(\d)-([A-Z])|$1$2|;  | 
| 
1150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
1151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1152
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
     return($input);  | 
| 
1153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #-------------------------------------------------------------------------------  | 
| 
1155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Remove any "care of" type of precursor from the main address  | 
| 
1156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # such as: C/O BRAKEFIELD  BETTY S PO BOX 214 GULF HAMMOCK, FL 32639-0214  | 
| 
1157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # It will later be saved as an attribute in the address object  | 
| 
1158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _extract_precursor  | 
| 
1160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1161
 | 
15
 | 
 
 | 
 
 | 
  
15
  
 | 
 
 | 
35
 | 
     my ($input) = @_;  | 
| 
1162
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
     my ($pre_cursor,$address_start,$address_end);  | 
| 
1163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1164
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
80
 | 
     if ($input =~ m{^(C/O.*?|ATTN.*?) (\d+|PO BOX)( .*)})  | 
| 
1165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1166
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
         $pre_cursor = $1;  | 
| 
1167
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
         $address_start = $2;  | 
| 
1168
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
         $address_end = $3;  | 
| 
1169
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
         return($pre_cursor, $address_start . $address_end);  | 
| 
1170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else  | 
| 
1172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1173
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
51
 | 
         return('',$input)  | 
| 
1174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #-------------------------------------------------------------------------------  | 
| 
1177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Remove any level or floor info such as:  | 
| 
1178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # 12 Smith St Floor 2  | 
| 
1179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Level 22 Suite 3 12 Main St  | 
| 
1180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # It will later be saved as an attribute in the address object  | 
| 
1181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _extract_level  | 
| 
1183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1184
 | 
15
 | 
 
 | 
 
 | 
  
15
  
 | 
 
 | 
31
 | 
     my ($input) = @_;  | 
| 
1185
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
     my ($level);  | 
| 
1186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
1187
 | 
15
 | 
  
100
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
167
 | 
     if      | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     (  | 
| 
1189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Level info could be at start of string so first space is optional  | 
| 
1190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # TO DO, add lower?, mezz, BASEMENT etc?  | 
| 
1191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $input =~ / ?((GROUND|FIRST|SECOND|THIRD|FOURTH|FIFTH|SIXTH) (FLOOR|FLR|FL) )/ or        | 
| 
1192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $input =~ / ?(\d{1,2}(ST|ND|RD|TH) (FLOOR|FLR|FL) )/ or         | 
| 
1193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $input =~ / ?(LEVEL (\d{1,2}|[GM])[\/ -])/ or  | 
| 
1194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $input =~ / ?((FLOOR|FLR|FL) \d{1,2}[\/ -])/  | 
| 
1195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      )  | 
| 
1196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1197
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
         $level = $1;  | 
| 
1198
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
         $level =~ s|/||;  | 
| 
1199
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
         $level =~ s|-||;  | 
| 
1200
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
         $input =~ s/$level//;  | 
| 
1201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1203
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
49
 | 
     return($level,$input);  | 
| 
1204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #-------------------------------------------------------------------------------  | 
| 
1206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Remove any building info such as:  | 
| 
1207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Building 2 Level 12 123 Smith St  | 
| 
1208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # 12 Main St Tower A Level 2  | 
| 
1209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # It will later be saved as an attribute in the address object  | 
| 
1210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _extract_building  | 
| 
1212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1213
 | 
15
 | 
 
 | 
 
 | 
  
15
  
 | 
 
 | 
35
 | 
     my ($input) = @_;  | 
| 
1214
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
     my ($building);  | 
| 
1215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
1216
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
58
 | 
     my $building_label = qr{BLOCK|BLDG?|BUILDING|TOWER};  | 
| 
1217
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
60
 | 
     my $id = qr{[A-Z]|A[A-Z]|\d+|\d{1,3}[A-Z]|[A-Z]\d{1,3}}; #  AA or 12  or 32C or C12  | 
| 
1218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
1219
 | 
15
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
288
 | 
     if      | 
| 
1220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     (  | 
| 
1221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $input =~ /^($building_label $id) / or $input =~ / ($building_label $id) /  | 
| 
1222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      )  | 
| 
1223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1224
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
         $building = $1;  | 
| 
1225
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
         $building =~ s|/||;  | 
| 
1226
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
         $building =~ s|-||;  | 
| 
1227
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
         $input =~ s/$building//;  | 
| 
1228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # TO DO, North, East etc Building?   | 
| 
1230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
1231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
1232
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
79
 | 
     my $name = qr{[A-Z]+}; #  | 
| 
1233
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
     my $house;  | 
| 
1234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
1235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Allow for x house at start of text only. Not for example: 12 Gate House Road, suburb  | 
| 
1236
 | 
15
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
98
 | 
     if ( $input =~  /^(($name )?$name HOUSE) /   )  | 
| 
1237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1238
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $house = $1;         | 
| 
1239
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $input =~ s/$house//;  | 
| 
1240
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $building .= $house;  | 
| 
1241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1243
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
81
 | 
     return($building,$input);  | 
| 
1244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #-------------------------------------------------------------------------------  | 
| 
1247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Remove any description that follows the suburb after the main address  | 
| 
1248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # such as: PO BOX 1305 BIBRA LAKE PRIVATE BOXES WA 6965"  | 
| 
1249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # It will be saved as an address attribute  | 
| 
1250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _extract_po_box_type  | 
| 
1252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1253
 | 
15
 | 
 
 | 
 
 | 
  
15
  
 | 
 
 | 
36
 | 
     my ($input) = @_;  | 
| 
1254
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
     my ($po_box_type,$address_start,$address_end);  | 
| 
1255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1256
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
52
 | 
     if ($input =~ /^(.*?) (PRIVATE BOXES)( .*)$/ )  | 
| 
1257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1258
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
         $address_start = $1;  | 
| 
1259
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
         $po_box_type = $2;  | 
| 
1260
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
         $address_end = $3;  | 
| 
1261
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
         return($po_box_type, $address_start . $address_end);  | 
| 
1262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else  | 
| 
1264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1265
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52
 | 
         return('',$input)  | 
| 
1266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #------------------------------------------------------------------------------  | 
| 
1269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # For correct matching, the grammar of each component must include the  | 
| 
1270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # trailing space that separates it from any following word. This should  | 
| 
1271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # now be removed from each component  | 
| 
1272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _trim_trailing_space  | 
| 
1274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1275
 | 
15
 | 
 
 | 
 
 | 
  
15
  
 | 
 
 | 
28
 | 
     my ($address) = @_;  | 
| 
1276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
1277
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
     foreach my $key (keys %{ $address->{components} } )  | 
| 
 
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
77
 | 
    | 
| 
1278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1279
 | 
285
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
480
 | 
         if ($address->{components}{$key} )  | 
| 
1280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
1281
 | 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
279
 | 
             $address->{components}{$key}  =~ s/ $//g;  | 
| 
1282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }           | 
| 
1284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #------------------------------------------------------------------------------  | 
| 
1286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _fmt_report_line  | 
| 
1288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1289
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
0
 | 
     my ($report_ref,$label,$value) = @_;      | 
| 
1290
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     $$report_ref .= sprintf("%-23.23s '%s'\n",$label,$value);  | 
| 
1291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #------------------------------------------------------------------------------  | 
| 
1293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _check_vowel  | 
| 
1295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1296
 | 
25
 | 
 
 | 
 
 | 
  
25
  
 | 
 
 | 
51
 | 
     my ($str) = @_;  | 
| 
1297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1298
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
65
 | 
     my @words = split(/ /,$str);  | 
| 
1299
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
     foreach my $word (@words)  | 
| 
1300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         #  Saint, Mount, Junior, Senior (as in Dr Martin Luther King Snr)  | 
| 
1302
 | 
34
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
186
 | 
         if ( length($word) > 1 and $word !~ /[AEIOUY]|ST|MT|DR|JN?R|SN?R/ )    | 
| 
1303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         {  | 
| 
1304
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
             return(1);  | 
| 
1305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1307
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
73
 | 
     return(0);  | 
| 
1308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #------------------------------------------------------------------------------  | 
| 
1310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 return(1);  |