| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | =head1 NAME | 
| 2 |  |  |  |  |  |  |  | 
| 3 |  |  |  |  |  |  | Lingua::EN::NameParse - extract the components of a person or couples full name, presented as a text string | 
| 4 |  |  |  |  |  |  |  | 
| 5 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | use Lingua::EN::NameParse qw(clean case_surname); | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | # optional configuration arguments | 
| 10 |  |  |  |  |  |  | my %args = | 
| 11 |  |  |  |  |  |  | ( | 
| 12 |  |  |  |  |  |  | auto_clean      => 1, | 
| 13 |  |  |  |  |  |  | lc_prefix       => 1, | 
| 14 |  |  |  |  |  |  | initials        => 3, | 
| 15 |  |  |  |  |  |  | allow_reversed  => 1, | 
| 16 |  |  |  |  |  |  | joint_names     => 0, | 
| 17 |  |  |  |  |  |  | extended_titles => 0 | 
| 18 |  |  |  |  |  |  | ); | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | my $name = Lingua::EN::NameParse->new(%args); | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | $error = $name->parse("Estate Of Lt Col AB Van Der Heiden (Hold Mail)"); | 
| 23 |  |  |  |  |  |  | unless ( $error ) | 
| 24 |  |  |  |  |  |  | { | 
| 25 |  |  |  |  |  |  | print($name->report); | 
| 26 |  |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  | Case all             : Estate Of Lt Col AB Van Der Heiden (Hold Mail) | 
| 28 |  |  |  |  |  |  | Case all reversed    : Van Der Heiden, Lt Col AB | 
| 29 |  |  |  |  |  |  | Salutation           : Dear Friend | 
| 30 |  |  |  |  |  |  | Type                 : Mr_A_Smith | 
| 31 |  |  |  |  |  |  | Parsing Error        : 0 | 
| 32 |  |  |  |  |  |  | Error description :  : | 
| 33 |  |  |  |  |  |  | Parsing Warning      : 1 | 
| 34 |  |  |  |  |  |  | Warning description  : ;non_matching text found : (Hold Mail) | 
| 35 |  |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  | COMPONENTS | 
| 37 |  |  |  |  |  |  | initials_1           : AB | 
| 38 |  |  |  |  |  |  | non_matching         : (Hold Mail) | 
| 39 |  |  |  |  |  |  | precursor            : Estate Of | 
| 40 |  |  |  |  |  |  | surname_1            : Van Der Heiden | 
| 41 |  |  |  |  |  |  | title_1              : Lt Col | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | %name_comps = $name->components; | 
| 44 |  |  |  |  |  |  | $surname = $name_comps{surname_1}; | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | $correct_casing = $name->case_all; | 
| 47 |  |  |  |  |  |  |  | 
| 48 |  |  |  |  |  |  | $correct_casing = $name->case_all_reversed ; | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | $salutation = $name->salutation(salutation => 'Dear',sal_default => 'Friend')); | 
| 51 |  |  |  |  |  |  |  | 
| 52 |  |  |  |  |  |  | $good_name = clean("Bad Na9me   "); # "Bad Name" | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | %my_properties = $name->properties; | 
| 55 |  |  |  |  |  |  | $number_surnames = $my_properties{number}; # 1 | 
| 56 |  |  |  |  |  |  | } | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  |  | 
| 59 |  |  |  |  |  |  | $lc_prefix = 0; | 
| 60 |  |  |  |  |  |  | $correct_case = case_surname("DE SILVA-MACNAY",$lc_prefix); # A stand alone function, returns: De Silva-MacNay | 
| 61 |  |  |  |  |  |  |  | 
| 62 |  |  |  |  |  |  | $error = $name->parse("MR AS & D.E. DE LA MARE"); | 
| 63 |  |  |  |  |  |  | %my_properties = $name->properties; | 
| 64 |  |  |  |  |  |  | $number_surnames = $my_properties{number}; # 2 | 
| 65 |  |  |  |  |  |  |  | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 68 |  |  |  |  |  |  |  | 
| 69 |  |  |  |  |  |  |  | 
| 70 |  |  |  |  |  |  | This module takes as input one person's name or a couples names in | 
| 71 |  |  |  |  |  |  | free format text such as, | 
| 72 |  |  |  |  |  |  |  | 
| 73 |  |  |  |  |  |  | Mr AB & M/s CD MacNay-Smith | 
| 74 |  |  |  |  |  |  | MR J.L. D'ANGELO | 
| 75 |  |  |  |  |  |  | Estate Of The Late Lieutenant Colonel AB Van Der Heiden | 
| 76 |  |  |  |  |  |  |  | 
| 77 |  |  |  |  |  |  | and attempts to parse it. If successful, the name is broken | 
| 78 |  |  |  |  |  |  | down into components and useful functions can be performed such as : | 
| 79 |  |  |  |  |  |  |  | 
| 80 |  |  |  |  |  |  | converting upper or lower case values to name case (Mr AB MacNay   ) | 
| 81 |  |  |  |  |  |  | creating a personalised greeting or salutation     (Dear Mr MacNay ) | 
| 82 |  |  |  |  |  |  | extracting the names individual components         (Mr,AB,MacNay   ) | 
| 83 |  |  |  |  |  |  | determining the type of format the name is in      (Mr_A_Smith     ) | 
| 84 |  |  |  |  |  |  |  | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  | If the name(s) cannot be parsed you have the option of cleaning the name(s) | 
| 87 |  |  |  |  |  |  | of bad characters, or extracting any portion that was parsed and the | 
| 88 |  |  |  |  |  |  | portion that failed. | 
| 89 |  |  |  |  |  |  |  | 
| 90 |  |  |  |  |  |  | This module can be used for analysing and improving the quality of | 
| 91 |  |  |  |  |  |  | lists of names. | 
| 92 |  |  |  |  |  |  |  | 
| 93 |  |  |  |  |  |  |  | 
| 94 |  |  |  |  |  |  | =head1 DEFINITIONS | 
| 95 |  |  |  |  |  |  |  | 
| 96 |  |  |  |  |  |  | The following terms are used by NameParse to define the components | 
| 97 |  |  |  |  |  |  | that can make up a name. | 
| 98 |  |  |  |  |  |  |  | 
| 99 |  |  |  |  |  |  | Precursor   - Estate of (The Late), Right Honourable ... | 
| 100 |  |  |  |  |  |  | Title       - Mr, Mrs, Ms., Sir, Dr, Major, Reverend ... | 
| 101 |  |  |  |  |  |  | Conjunction - word to separate two names, such as "And" or & | 
| 102 |  |  |  |  |  |  | Initials    - 1-3 letters, each with an optional space and/or dot | 
| 103 |  |  |  |  |  |  | Surname     - De Silva, Van Der Heiden, MacNay-Smith, O'Reilly ... | 
| 104 |  |  |  |  |  |  | Suffix      - Snr., Jnr, III, V ... | 
| 105 |  |  |  |  |  |  |  | 
| 106 |  |  |  |  |  |  | Refer to the component grammar defined within the code for a complete | 
| 107 |  |  |  |  |  |  | list of combinations. | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | 'Name casing' refers to the correct use of upper and lower case letters | 
| 110 |  |  |  |  |  |  | in peoples names, such as Mr AB McNay. | 
| 111 |  |  |  |  |  |  |  | 
| 112 |  |  |  |  |  |  | To describe the formats supported by NameParse, a short hand representation | 
| 113 |  |  |  |  |  |  | of the name is used. The following formats are currently supported : | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | Mr_John_Smith_&_Ms_Mary_Jones | 
| 116 |  |  |  |  |  |  | Mr_A_Smith_&_Ms_B_Jones | 
| 117 |  |  |  |  |  |  | Mr_&Ms_A_&_B_Smith | 
| 118 |  |  |  |  |  |  | Mr_A_&_Ms_B_Smith | 
| 119 |  |  |  |  |  |  | Mr_&_Ms_A_Smith | 
| 120 |  |  |  |  |  |  | Mr_A_&_B_Smith | 
| 121 |  |  |  |  |  |  | John_Smith_&_Mary_Jones | 
| 122 |  |  |  |  |  |  | John_&_Mary_Smith | 
| 123 |  |  |  |  |  |  | A_Smith_&_B_Jones | 
| 124 |  |  |  |  |  |  |  | 
| 125 |  |  |  |  |  |  | Mr_John_Adam_Smith | 
| 126 |  |  |  |  |  |  | Mr_John_A_Smith | 
| 127 |  |  |  |  |  |  | Mr_John_Smith | 
| 128 |  |  |  |  |  |  | Mr_A_Smith | 
| 129 |  |  |  |  |  |  | John_Adam_Smith | 
| 130 |  |  |  |  |  |  | John_A_Smith | 
| 131 |  |  |  |  |  |  | J_Adam_Smith | 
| 132 |  |  |  |  |  |  | John_Smith | 
| 133 |  |  |  |  |  |  | A_Smith | 
| 134 |  |  |  |  |  |  | John | 
| 135 |  |  |  |  |  |  |  | 
| 136 |  |  |  |  |  |  | Precursors and suffixes may be applied to single names that have a surname | 
| 137 |  |  |  |  |  |  |  | 
| 138 |  |  |  |  |  |  |  | 
| 139 |  |  |  |  |  |  | =head1 METHODS | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  | =head2 new | 
| 142 |  |  |  |  |  |  |  | 
| 143 |  |  |  |  |  |  | The C method creates an instance of a name object and sets up | 
| 144 |  |  |  |  |  |  | the grammar used to parse names. This must be called before any of the | 
| 145 |  |  |  |  |  |  | following methods are invoked. Note that the object only needs to be | 
| 146 |  |  |  |  |  |  | created ONCE, and should be reused with new input data. Calling C | 
| 147 |  |  |  |  |  |  | repeatedly will significantly slow your program down. | 
| 148 |  |  |  |  |  |  |  | 
| 149 |  |  |  |  |  |  | Various setup options may be defined in a hash that is passed as an optional | 
| 150 |  |  |  |  |  |  | argument to the C method. Note that all the arguments are optional. You | 
| 151 |  |  |  |  |  |  | need to define the combination of arguments that are appropriate for your | 
| 152 |  |  |  |  |  |  | usage. | 
| 153 |  |  |  |  |  |  |  | 
| 154 |  |  |  |  |  |  | my %args = | 
| 155 |  |  |  |  |  |  | ( | 
| 156 |  |  |  |  |  |  | auto_clean     => 1, | 
| 157 |  |  |  |  |  |  | lc_prefix      => 1, | 
| 158 |  |  |  |  |  |  | initials       => 3, | 
| 159 |  |  |  |  |  |  | allow_reversed => 1 | 
| 160 |  |  |  |  |  |  | ); | 
| 161 |  |  |  |  |  |  |  | 
| 162 |  |  |  |  |  |  |  | 
| 163 |  |  |  |  |  |  | my $name = Lingua::EN::NameParse->new(%args); | 
| 164 |  |  |  |  |  |  |  | 
| 165 |  |  |  |  |  |  |  | 
| 166 |  |  |  |  |  |  | =over 4 | 
| 167 |  |  |  |  |  |  |  | 
| 168 |  |  |  |  |  |  | =item auto_clean | 
| 169 |  |  |  |  |  |  |  | 
| 170 |  |  |  |  |  |  | When this option is set to a positive value, any call to the C method | 
| 171 |  |  |  |  |  |  | that fails will attempt to 'clean' the name and then reparse it. See the | 
| 172 |  |  |  |  |  |  | C method for details. This is useful for dirty data with embedded | 
| 173 |  |  |  |  |  |  | unprintable or non alphabetic characters. | 
| 174 |  |  |  |  |  |  |  | 
| 175 |  |  |  |  |  |  | =item lc_prefix | 
| 176 |  |  |  |  |  |  |  | 
| 177 |  |  |  |  |  |  | When this option is set to a positive value, it will force the C | 
| 178 |  |  |  |  |  |  | and C methods to lower case the first letter of each word that | 
| 179 |  |  |  |  |  |  | occurs in the prefix portion of a surname. For example, Mr AB de Silva, | 
| 180 |  |  |  |  |  |  | or Ms AS von der Heiden. | 
| 181 |  |  |  |  |  |  |  | 
| 182 |  |  |  |  |  |  | =item initials | 
| 183 |  |  |  |  |  |  |  | 
| 184 |  |  |  |  |  |  | Allows the user to control the number of letters that can occur in the initials. | 
| 185 |  |  |  |  |  |  | Valid settings are 1,2 or 3. If no value is supplied a default of 2 is used. | 
| 186 |  |  |  |  |  |  |  | 
| 187 |  |  |  |  |  |  | =item allow_reversed | 
| 188 |  |  |  |  |  |  |  | 
| 189 |  |  |  |  |  |  | When this option is set to a positive value, names in reverse order will be | 
| 190 |  |  |  |  |  |  | processed. The only valid format is the surname followed by a comma and the | 
| 191 |  |  |  |  |  |  | rest of the name, which can be in any of the combinations allowed by non | 
| 192 |  |  |  |  |  |  | reversed names. Some examples are: | 
| 193 |  |  |  |  |  |  |  | 
| 194 |  |  |  |  |  |  | Smith, Mr AB | 
| 195 |  |  |  |  |  |  | Jones, Jim | 
| 196 |  |  |  |  |  |  | De Silva, Professor A.B. | 
| 197 |  |  |  |  |  |  |  | 
| 198 |  |  |  |  |  |  | The program changes the order of the name back to the non reversed format, and | 
| 199 |  |  |  |  |  |  | then performs the normal parsing. Note that if the name can be parsed, the fact | 
| 200 |  |  |  |  |  |  | that it's order was originally reversed, is not recorded as a property of the | 
| 201 |  |  |  |  |  |  | name object. | 
| 202 |  |  |  |  |  |  |  | 
| 203 |  |  |  |  |  |  | =item joint_names | 
| 204 |  |  |  |  |  |  |  | 
| 205 |  |  |  |  |  |  | When this option is set to a positive value, joint names are accounted for: | 
| 206 |  |  |  |  |  |  |  | 
| 207 |  |  |  |  |  |  | Mr_A_Smith_&Ms_B_Jones | 
| 208 |  |  |  |  |  |  | Mr_&Ms_A_&B_Smith | 
| 209 |  |  |  |  |  |  | Mr_A_&Ms_B_Smith | 
| 210 |  |  |  |  |  |  | Mr_&Ms_A_Smith | 
| 211 |  |  |  |  |  |  | Mr_A_&B_Smith | 
| 212 |  |  |  |  |  |  |  | 
| 213 |  |  |  |  |  |  | Note that if this option is not specified, than by default joint names are | 
| 214 |  |  |  |  |  |  | ignored. Disabling joint names speeds up the processing a lot. | 
| 215 |  |  |  |  |  |  |  | 
| 216 |  |  |  |  |  |  | =item extended_titles | 
| 217 |  |  |  |  |  |  |  | 
| 218 |  |  |  |  |  |  | When this option is set to a positive value, all combinations of titles, | 
| 219 |  |  |  |  |  |  | such as Colonel, Mother Superior are used. If this value is not set, only | 
| 220 |  |  |  |  |  |  | the following titles are accounted for: | 
| 221 |  |  |  |  |  |  |  | 
| 222 |  |  |  |  |  |  | Mr | 
| 223 |  |  |  |  |  |  | Ms | 
| 224 |  |  |  |  |  |  | M/s | 
| 225 |  |  |  |  |  |  | Mrs | 
| 226 |  |  |  |  |  |  | Miss | 
| 227 |  |  |  |  |  |  | Dr | 
| 228 |  |  |  |  |  |  | Sir | 
| 229 |  |  |  |  |  |  | Dame | 
| 230 |  |  |  |  |  |  |  | 
| 231 |  |  |  |  |  |  |  | 
| 232 |  |  |  |  |  |  | Note that if this option is not specified, than by default extended titles | 
| 233 |  |  |  |  |  |  | are ignored. Disabling extended titles speeds up the parsing. | 
| 234 |  |  |  |  |  |  |  | 
| 235 |  |  |  |  |  |  | =back | 
| 236 |  |  |  |  |  |  |  | 
| 237 |  |  |  |  |  |  | =head2 parse | 
| 238 |  |  |  |  |  |  |  | 
| 239 |  |  |  |  |  |  | $error = $name->parse("MR AC DE SILVA"); | 
| 240 |  |  |  |  |  |  |  | 
| 241 |  |  |  |  |  |  | The C method takes a single parameter of a text string containing a | 
| 242 |  |  |  |  |  |  | name. It attempts to parse the name and break it down into the components | 
| 243 |  |  |  |  |  |  |  | 
| 244 |  |  |  |  |  |  | Returns an error flag. If the name was parsed successfully, it's value is 0, | 
| 245 |  |  |  |  |  |  | otherwise a 1. This step is a prerequisite for the following methods. | 
| 246 |  |  |  |  |  |  |  | 
| 247 |  |  |  |  |  |  |  | 
| 248 |  |  |  |  |  |  | =head2 case_all | 
| 249 |  |  |  |  |  |  |  | 
| 250 |  |  |  |  |  |  | $correct_casing = $name->case_all; | 
| 251 |  |  |  |  |  |  |  | 
| 252 |  |  |  |  |  |  | The C method converts the first letter of each component to | 
| 253 |  |  |  |  |  |  | capitals and the remainder to lower case, with the following exceptions- | 
| 254 |  |  |  |  |  |  |  | 
| 255 |  |  |  |  |  |  | initials remain capitalised | 
| 256 |  |  |  |  |  |  | surname spelling such as MacNay-Smith, O'Brien and Van Der Heiden are preserved | 
| 257 |  |  |  |  |  |  | - see C for user defined exceptions | 
| 258 |  |  |  |  |  |  |  | 
| 259 |  |  |  |  |  |  | A complete definition of the capitalising rules can be found by studying | 
| 260 |  |  |  |  |  |  | the case_surname function. | 
| 261 |  |  |  |  |  |  |  | 
| 262 |  |  |  |  |  |  | The method returns the entire cased name as text. | 
| 263 |  |  |  |  |  |  |  | 
| 264 |  |  |  |  |  |  | =head2 case_all_reversed | 
| 265 |  |  |  |  |  |  |  | 
| 266 |  |  |  |  |  |  | $correct_casing = $name->case_all_reversed; | 
| 267 |  |  |  |  |  |  |  | 
| 268 |  |  |  |  |  |  | The C method applies the same type of casing as | 
| 269 |  |  |  |  |  |  | C. However, the name is returned as surname followed by a comma | 
| 270 |  |  |  |  |  |  | and the rest of the name, which can be any of the combinations allowed | 
| 271 |  |  |  |  |  |  | for a name, except the title. Some examples are: "Smith, John", "De Silva, A.B." | 
| 272 |  |  |  |  |  |  | This is useful for sorting names alphabetically by surname. | 
| 273 |  |  |  |  |  |  |  | 
| 274 |  |  |  |  |  |  | The method returns the entire reverse order cased name as text. | 
| 275 |  |  |  |  |  |  |  | 
| 276 |  |  |  |  |  |  |  | 
| 277 |  |  |  |  |  |  | =head2 components | 
| 278 |  |  |  |  |  |  |  | 
| 279 |  |  |  |  |  |  | %my_name = $name->components; | 
| 280 |  |  |  |  |  |  | $cased_surname = $my_name{surname_1}; | 
| 281 |  |  |  |  |  |  |  | 
| 282 |  |  |  |  |  |  |  | 
| 283 |  |  |  |  |  |  | The C method does the same thing as the C method, | 
| 284 |  |  |  |  |  |  | but returns the name cased components in a hash. The following keys are used | 
| 285 |  |  |  |  |  |  | for each component: | 
| 286 |  |  |  |  |  |  |  | 
| 287 |  |  |  |  |  |  | precursor | 
| 288 |  |  |  |  |  |  | title_1 | 
| 289 |  |  |  |  |  |  | title_2 | 
| 290 |  |  |  |  |  |  | given_name_1 | 
| 291 |  |  |  |  |  |  | given_name_2 | 
| 292 |  |  |  |  |  |  | initials_1 | 
| 293 |  |  |  |  |  |  | initials_2 | 
| 294 |  |  |  |  |  |  | middle_name | 
| 295 |  |  |  |  |  |  | conjunction_1 | 
| 296 |  |  |  |  |  |  | conjunction_2 | 
| 297 |  |  |  |  |  |  | surname_1 | 
| 298 |  |  |  |  |  |  | surname_2 | 
| 299 |  |  |  |  |  |  | suffix | 
| 300 |  |  |  |  |  |  |  | 
| 301 |  |  |  |  |  |  | If a component has no matching data for a given name, it will not appear in the hash | 
| 302 |  |  |  |  |  |  |  | 
| 303 |  |  |  |  |  |  | If the name could not be parsed, this method returns null. If you assign the return | 
| 304 |  |  |  |  |  |  | value to a hash, you should check the error status returned by the C method first. | 
| 305 |  |  |  |  |  |  | Ohterwise, you will get an odd number of values assigned to the hash. | 
| 306 |  |  |  |  |  |  |  | 
| 307 |  |  |  |  |  |  |  | 
| 308 |  |  |  |  |  |  | =head2 case_surname | 
| 309 |  |  |  |  |  |  |  | 
| 310 |  |  |  |  |  |  | $correct_casing = case_surname("DE SILVA-MACNAY" [,$lc_prefix]); | 
| 311 |  |  |  |  |  |  |  | 
| 312 |  |  |  |  |  |  | C is a stand alone function that does not require a name | 
| 313 |  |  |  |  |  |  | object. The input is a text string. An optional input argument controls the | 
| 314 |  |  |  |  |  |  | casing rules for prefix portions of a surname, as described above in the | 
| 315 |  |  |  |  |  |  | C section. | 
| 316 |  |  |  |  |  |  |  | 
| 317 |  |  |  |  |  |  | The output is a string converted to the correct casing for surnames. | 
| 318 |  |  |  |  |  |  | See C for user defined exceptions | 
| 319 |  |  |  |  |  |  |  | 
| 320 |  |  |  |  |  |  | This function is useful when you know you are only dealing with names that | 
| 321 |  |  |  |  |  |  | do not have initials like "Mr John Jones". It is much faster than the case_all | 
| 322 |  |  |  |  |  |  | method, but does not understand context, and cannot detect errors on strings | 
| 323 |  |  |  |  |  |  | that are not personal names. | 
| 324 |  |  |  |  |  |  |  | 
| 325 |  |  |  |  |  |  |  | 
| 326 |  |  |  |  |  |  | =head2 surname_prefs.txt | 
| 327 |  |  |  |  |  |  |  | 
| 328 |  |  |  |  |  |  | Some surnames can have more than one form of valid capitalisation, such as | 
| 329 |  |  |  |  |  |  | MacQuarie or Macquarie. Where the user wants to specify one form as the default, | 
| 330 |  |  |  |  |  |  | a text file called surname_prefs.txt should be created and placed in the same | 
| 331 |  |  |  |  |  |  | location as the NameParse module. The text file should contain one surname per | 
| 332 |  |  |  |  |  |  | line, in the capitalised form you want, such as | 
| 333 |  |  |  |  |  |  |  | 
| 334 |  |  |  |  |  |  | Macquarie | 
| 335 |  |  |  |  |  |  | MacHado | 
| 336 |  |  |  |  |  |  |  | 
| 337 |  |  |  |  |  |  | NameParse will still operate if the file does not exist | 
| 338 |  |  |  |  |  |  |  | 
| 339 |  |  |  |  |  |  | =head2 salutation | 
| 340 |  |  |  |  |  |  |  | 
| 341 |  |  |  |  |  |  | $salutation = $name->salutation(salutation => 'Dear',sal_default => 'Friend',sal_type => 'given_name')); | 
| 342 |  |  |  |  |  |  |  | 
| 343 |  |  |  |  |  |  | The C method converts a name into a personal greeting, | 
| 344 |  |  |  |  |  |  | such as "Dear Mr & Mrs O'Brien" or "Dear Sue and John" | 
| 345 |  |  |  |  |  |  |  | 
| 346 |  |  |  |  |  |  | Optional parameters may be specided in a hash as follows: | 
| 347 |  |  |  |  |  |  |  | 
| 348 |  |  |  |  |  |  |  | 
| 349 |  |  |  |  |  |  | salutation: | 
| 350 |  |  |  |  |  |  |  | 
| 351 |  |  |  |  |  |  | The greeting word such as 'Dear' or 'Greetings'. If not spefied than 'Dear' is used | 
| 352 |  |  |  |  |  |  |  | 
| 353 |  |  |  |  |  |  | sal_default: | 
| 354 |  |  |  |  |  |  |  | 
| 355 |  |  |  |  |  |  | The default word used when a personalised salution cannot be generated. If not | 
| 356 |  |  |  |  |  |  | specified, than 'Friend' is used. | 
| 357 |  |  |  |  |  |  |  | 
| 358 |  |  |  |  |  |  | sal_type: | 
| 359 |  |  |  |  |  |  |  | 
| 360 |  |  |  |  |  |  | Can be either 'given_name' such as 'Dear Sue' or 'title_plus_name' such as 'Dear Ms Smith' | 
| 361 |  |  |  |  |  |  | If not specified, than 'given_name' is used. | 
| 362 |  |  |  |  |  |  |  | 
| 363 |  |  |  |  |  |  | If an error is detected during parsing, such as with the name "AB Smith & Associates", | 
| 364 |  |  |  |  |  |  | then the value of sal_default is used instead of a given name, or a title and surname. | 
| 365 |  |  |  |  |  |  | If the input string contains a conjunction, an 's' is added to the value of sal_default. | 
| 366 |  |  |  |  |  |  |  | 
| 367 |  |  |  |  |  |  | If the name contains a precursor, a default salutation is produced. | 
| 368 |  |  |  |  |  |  |  | 
| 369 |  |  |  |  |  |  | =head2 clean | 
| 370 |  |  |  |  |  |  |  | 
| 371 |  |  |  |  |  |  | $good_name = clean("Bad Na9me"); | 
| 372 |  |  |  |  |  |  |  | 
| 373 |  |  |  |  |  |  | C is a stand alone function that does not require a name object. | 
| 374 |  |  |  |  |  |  | The input is a text string and the output is the string with: | 
| 375 |  |  |  |  |  |  |  | 
| 376 |  |  |  |  |  |  | all repeating spaces removed | 
| 377 |  |  |  |  |  |  | all characters not in the set (A-Z a-z - ' , . &) removed | 
| 378 |  |  |  |  |  |  |  | 
| 379 |  |  |  |  |  |  |  | 
| 380 |  |  |  |  |  |  | =head2 properties | 
| 381 |  |  |  |  |  |  |  | 
| 382 |  |  |  |  |  |  | The C method returns all the properties of the name, | 
| 383 |  |  |  |  |  |  | non_matching, number and type, as a hash. | 
| 384 |  |  |  |  |  |  |  | 
| 385 |  |  |  |  |  |  | =over 4 | 
| 386 |  |  |  |  |  |  |  | 
| 387 |  |  |  |  |  |  | =item type | 
| 388 |  |  |  |  |  |  |  | 
| 389 |  |  |  |  |  |  | The type of format a name is in, as one of the following strings: | 
| 390 |  |  |  |  |  |  |  | 
| 391 |  |  |  |  |  |  | Mr_A_Smith_&Ms_B_Jones | 
| 392 |  |  |  |  |  |  | Mr_&Ms_A_&B_Smith | 
| 393 |  |  |  |  |  |  | Mr_A_&Ms_B_Smith | 
| 394 |  |  |  |  |  |  | Mr_&Ms_A_Smith | 
| 395 |  |  |  |  |  |  | Mr_A_&B_Smith | 
| 396 |  |  |  |  |  |  | Mr_John_Adam_Smith | 
| 397 |  |  |  |  |  |  | Mr_John_A_Smith | 
| 398 |  |  |  |  |  |  | Mr_John_Smith | 
| 399 |  |  |  |  |  |  | Mr_A_Smith | 
| 400 |  |  |  |  |  |  | John_Adam_Smith | 
| 401 |  |  |  |  |  |  | John_A_Smith | 
| 402 |  |  |  |  |  |  | J_Adam_Smith | 
| 403 |  |  |  |  |  |  | John_Smith | 
| 404 |  |  |  |  |  |  | A_Smith | 
| 405 |  |  |  |  |  |  | John | 
| 406 |  |  |  |  |  |  | unknown | 
| 407 |  |  |  |  |  |  |  | 
| 408 |  |  |  |  |  |  |  | 
| 409 |  |  |  |  |  |  | =item non_matching | 
| 410 |  |  |  |  |  |  |  | 
| 411 |  |  |  |  |  |  | Returns any unmatched section that was found. | 
| 412 |  |  |  |  |  |  |  | 
| 413 |  |  |  |  |  |  | =back | 
| 414 |  |  |  |  |  |  |  | 
| 415 |  |  |  |  |  |  | =head2 report | 
| 416 |  |  |  |  |  |  |  | 
| 417 |  |  |  |  |  |  | Create a formatted text report to standard output listing | 
| 418 |  |  |  |  |  |  | - the input string, | 
| 419 |  |  |  |  |  |  | - the name and value of each defined component | 
| 420 |  |  |  |  |  |  | - any non matching component | 
| 421 |  |  |  |  |  |  |  | 
| 422 |  |  |  |  |  |  |  | 
| 423 |  |  |  |  |  |  | =head1 LIMITATIONS | 
| 424 |  |  |  |  |  |  |  | 
| 425 |  |  |  |  |  |  | The huge number of character combinations that can form a valid names makes | 
| 426 |  |  |  |  |  |  | it is impossible to correctly identify them all. Firstly, there are many | 
| 427 |  |  |  |  |  |  | ambiguities, which have no right answer. | 
| 428 |  |  |  |  |  |  |  | 
| 429 |  |  |  |  |  |  | Macbeth or MacBeth, are both valid spellings | 
| 430 |  |  |  |  |  |  | Is ED WOOD E.D. Wood or Edward Wood | 
| 431 |  |  |  |  |  |  | Is 'Mr Rapid Print' a name or a company | 
| 432 |  |  |  |  |  |  | Does  John Bradfield Smith have a middle name of Bradfield, or a surname of Bradfield-Smith? | 
| 433 |  |  |  |  |  |  |  | 
| 434 |  |  |  |  |  |  | One approach is to have large lookup files of names and words, statistical rules | 
| 435 |  |  |  |  |  |  | and fuzzy logic to attempt to derive context. This approach gives high levels of | 
| 436 |  |  |  |  |  |  | accuracy but uses a lot of your computers time and resources. | 
| 437 |  |  |  |  |  |  |  | 
| 438 |  |  |  |  |  |  | NameParse takes the approach of using a limited set of rules, based on the | 
| 439 |  |  |  |  |  |  | formats that are commonly used by business to represent peoples names. This | 
| 440 |  |  |  |  |  |  | gives us fairly high accuracy, with acceptable speed and program size. | 
| 441 |  |  |  |  |  |  |  | 
| 442 |  |  |  |  |  |  | NameParse will accept names from many countries, like Van Der Heiden, | 
| 443 |  |  |  |  |  |  | De La Mare and Le Fontain. Having said that, it is still biased toward English, | 
| 444 |  |  |  |  |  |  | because the precursors, titles and conjunctions are based on English usage. | 
| 445 |  |  |  |  |  |  |  | 
| 446 |  |  |  |  |  |  | Names with two or more words, but no separating hyphen are not recognized. | 
| 447 |  |  |  |  |  |  | This is a real quandary as Indian, Chinese and other names can have several | 
| 448 |  |  |  |  |  |  | components. If these are allowed for, any component after the surname | 
| 449 |  |  |  |  |  |  | will also be picked up. For example in "Mr AB Jones Trading As Jones Pty Ltd" | 
| 450 |  |  |  |  |  |  | will return a surname of "Jones Trading". | 
| 451 |  |  |  |  |  |  |  | 
| 452 |  |  |  |  |  |  | Because of the large combination of possible names defined in the grammar, the | 
| 453 |  |  |  |  |  |  | program is not very fast, except for the more limited C subroutine. | 
| 454 |  |  |  |  |  |  | See the "Future Directions" section for possible speed ups. | 
| 455 |  |  |  |  |  |  |  | 
| 456 |  |  |  |  |  |  | As the parser has a very limited understanding of context, the "John_Adam_Smith" | 
| 457 |  |  |  |  |  |  | name type is most likely  to cause problems, as it contains no known tokens | 
| 458 |  |  |  |  |  |  | like a title. A string such as "National Australia Bank" would be accepted | 
| 459 |  |  |  |  |  |  | as a valid name, first name National etc. Supplying  a list of common pronouns | 
| 460 |  |  |  |  |  |  | as exceptions could solve this problem. | 
| 461 |  |  |  |  |  |  |  | 
| 462 |  |  |  |  |  |  |  | 
| 463 |  |  |  |  |  |  | =head1 REFERENCES | 
| 464 |  |  |  |  |  |  |  | 
| 465 |  |  |  |  |  |  | "The Wordsworth Dictionary of Abbreviations & Acronyms" (1997) | 
| 466 |  |  |  |  |  |  |  | 
| 467 |  |  |  |  |  |  | Australian Standard AS4212-1994 "Geographic Information Systems - | 
| 468 |  |  |  |  |  |  | Data Dictionary for transfer of street addressing information" | 
| 469 |  |  |  |  |  |  |  | 
| 470 |  |  |  |  |  |  |  | 
| 471 |  |  |  |  |  |  | =head1 FUTURE DIRECTIONS | 
| 472 |  |  |  |  |  |  |  | 
| 473 |  |  |  |  |  |  | Define grammar for other languages. Hopefully, all that would be needed is | 
| 474 |  |  |  |  |  |  | to specify a new module with its own grammar, and inherit all the existing | 
| 475 |  |  |  |  |  |  | methods. I don't have the knowledge of the naming conventions for non-english | 
| 476 |  |  |  |  |  |  | languages. | 
| 477 |  |  |  |  |  |  |  | 
| 478 |  |  |  |  |  |  | =head1 REPOSITORY | 
| 479 |  |  |  |  |  |  |  | 
| 480 |  |  |  |  |  |  | L | 
| 481 |  |  |  |  |  |  |  | 
| 482 |  |  |  |  |  |  |  | 
| 483 |  |  |  |  |  |  | =head1 SEE ALSO | 
| 484 |  |  |  |  |  |  |  | 
| 485 |  |  |  |  |  |  | L, L, L, | 
| 486 |  |  |  |  |  |  | L, L | 
| 487 |  |  |  |  |  |  |  | 
| 488 |  |  |  |  |  |  |  | 
| 489 |  |  |  |  |  |  | =head1 BUGS | 
| 490 |  |  |  |  |  |  |  | 
| 491 |  |  |  |  |  |  | Names with accented characters (acute, circumfelx etc) will not be parsed | 
| 492 |  |  |  |  |  |  | correctly. A work around is to replace the character class [a-z] with \w | 
| 493 |  |  |  |  |  |  | in the appropriate rules in the grammar tree, but this could lower the accuracy | 
| 494 |  |  |  |  |  |  | of names based purely on ASCII text. | 
| 495 |  |  |  |  |  |  |  | 
| 496 |  |  |  |  |  |  | =head1 CREDITS | 
| 497 |  |  |  |  |  |  |  | 
| 498 |  |  |  |  |  |  | Thanks to all the people who provided ideas and suggestions, including - | 
| 499 |  |  |  |  |  |  |  | 
| 500 |  |  |  |  |  |  | Damian Conway,  author of Parse::RecDescent | 
| 501 |  |  |  |  |  |  | Mark Summerfield author of Lingua::EN::NameCase, | 
| 502 |  |  |  |  |  |  | Ron Savage, Alastair Adam Huffman, Douglas Wilson | 
| 503 |  |  |  |  |  |  | Peter Schendzielorz | 
| 504 |  |  |  |  |  |  |  | 
| 505 |  |  |  |  |  |  | =head1 AUTHOR | 
| 506 |  |  |  |  |  |  |  | 
| 507 |  |  |  |  |  |  | NameParse was written by Kim Ryan | 
| 508 |  |  |  |  |  |  |  | 
| 509 |  |  |  |  |  |  | =head1 COPYRIGHT AND LICENSE | 
| 510 |  |  |  |  |  |  |  | 
| 511 |  |  |  |  |  |  | Copyright (c) 2016 Kim Ryan. All rights reserved. | 
| 512 |  |  |  |  |  |  |  | 
| 513 |  |  |  |  |  |  | This library is free software; you can redistribute it and/or modify | 
| 514 |  |  |  |  |  |  | it under the same terms as Perl itself. | 
| 515 |  |  |  |  |  |  |  | 
| 516 |  |  |  |  |  |  | =cut | 
| 517 |  |  |  |  |  |  | #------------------------------------------------------------------------------- | 
| 518 |  |  |  |  |  |  |  | 
| 519 |  |  |  |  |  |  | package Lingua::EN::NameParse; | 
| 520 |  |  |  |  |  |  |  | 
| 521 | 2 |  |  | 2 |  | 19427 | use strict; | 
|  | 2 |  |  |  |  | 3 |  | 
|  | 2 |  |  |  |  | 47 |  | 
| 522 | 2 |  |  | 2 |  | 6 | use warnings; | 
|  | 2 |  |  |  |  | 2 |  | 
|  | 2 |  |  |  |  | 39 |  | 
| 523 |  |  |  |  |  |  |  | 
| 524 | 2 |  |  | 2 |  | 711 | use Lingua::EN::NameParse::Grammar; | 
|  | 2 |  |  |  |  | 9 |  | 
|  | 2 |  |  |  |  | 42 |  | 
| 525 | 2 |  |  | 2 |  | 2012 | use Parse::RecDescent; | 
|  | 2 |  |  |  |  | 59747 |  | 
|  | 2 |  |  |  |  | 14 |  | 
| 526 |  |  |  |  |  |  |  | 
| 527 | 2 |  |  | 2 |  | 76 | use Exporter; | 
|  | 2 |  |  |  |  | 2 |  | 
|  | 2 |  |  |  |  | 65 |  | 
| 528 | 2 |  |  | 2 |  | 7 | use vars qw (@ISA @EXPORT_OK); | 
|  | 2 |  |  |  |  | 2 |  | 
|  | 2 |  |  |  |  | 2459 |  | 
| 529 |  |  |  |  |  |  |  | 
| 530 |  |  |  |  |  |  | our $VERSION = '1.36'; | 
| 531 |  |  |  |  |  |  | @ISA       = qw(Exporter); | 
| 532 |  |  |  |  |  |  | @EXPORT_OK = qw(clean case_surname); | 
| 533 |  |  |  |  |  |  |  | 
| 534 |  |  |  |  |  |  | #------------------------------------------------------------------------------- | 
| 535 |  |  |  |  |  |  | # Create a new instance of a name parsing object. This step is time consuming | 
| 536 |  |  |  |  |  |  | # and should normally only be called once in your program. | 
| 537 |  |  |  |  |  |  |  | 
| 538 |  |  |  |  |  |  | sub new | 
| 539 |  |  |  |  |  |  | { | 
| 540 | 2 |  |  | 2 | 1 | 15 | my $class = shift; | 
| 541 | 2 |  |  |  |  | 5 | my %args = @_; | 
| 542 |  |  |  |  |  |  |  | 
| 543 | 2 |  |  |  |  | 3 | my $name = {}; | 
| 544 | 2 |  |  |  |  | 3 | bless($name,$class); | 
| 545 |  |  |  |  |  |  |  | 
| 546 |  |  |  |  |  |  | # Default to 2 initials per name. Can be overwritten if user defines | 
| 547 |  |  |  |  |  |  | # 'initials' as a key in the hash supplied to new method. | 
| 548 | 2 |  |  |  |  | 12 | $name->{initials} = 2; | 
| 549 |  |  |  |  |  |  |  | 
| 550 | 2 |  |  |  |  | 14 | my $current_key; | 
| 551 | 2 |  |  |  |  | 7 | foreach my $current_key (keys %args) | 
| 552 |  |  |  |  |  |  | { | 
| 553 | 8 |  |  |  |  | 11 | $name->{$current_key} = $args{$current_key}; | 
| 554 |  |  |  |  |  |  | } | 
| 555 |  |  |  |  |  |  |  | 
| 556 | 2 |  |  |  |  | 7 | my $grammar = Lingua::EN::NameParse::Grammar::_create($name); | 
| 557 | 2 |  |  |  |  | 15 | $name->{parse} = new Parse::RecDescent($grammar); | 
| 558 |  |  |  |  |  |  |  | 
| 559 | 2 |  |  |  |  | 348263 | return ($name); | 
| 560 |  |  |  |  |  |  | } | 
| 561 |  |  |  |  |  |  | #------------------------------------------------------------------------------- | 
| 562 |  |  |  |  |  |  | # Attempt to parse a string and retrieve it's components and properties | 
| 563 |  |  |  |  |  |  | # Requires a name object to have been created with the 'new' method' | 
| 564 |  |  |  |  |  |  | # Returns: an error code, 0 for success, 1 for failure | 
| 565 |  |  |  |  |  |  |  | 
| 566 |  |  |  |  |  |  | sub parse | 
| 567 |  |  |  |  |  |  | { | 
| 568 | 28 |  |  | 28 | 1 | 4271 | my $name = shift; | 
| 569 | 28 |  |  |  |  | 34 | my ($input_string) = @_; | 
| 570 |  |  |  |  |  |  |  | 
| 571 | 28 |  |  |  |  | 36 | chomp($input_string); | 
| 572 |  |  |  |  |  |  |  | 
| 573 |  |  |  |  |  |  | # If reverse ordered names are allowed, swap the surname component, before | 
| 574 |  |  |  |  |  |  | # the comma, with the rest of the name. Rejoin the name, replacing comma | 
| 575 |  |  |  |  |  |  | # with a space. | 
| 576 |  |  |  |  |  |  |  | 
| 577 | 28 | 100 | 100 |  |  | 107 | if ( $name->{allow_reversed} and $input_string =~ /,/ ) | 
| 578 |  |  |  |  |  |  | { | 
| 579 | 1 |  |  |  |  | 4 | my ($first,$second) = split(/,/,$input_string); | 
| 580 | 1 |  |  |  |  | 4 | $input_string = join(' ',$second,$first); | 
| 581 |  |  |  |  |  |  | } | 
| 582 |  |  |  |  |  |  |  | 
| 583 | 28 |  |  |  |  | 38 | $name->{comps} = (); | 
| 584 | 28 |  |  |  |  | 92 | $name->{properties} = (); | 
| 585 | 28 |  |  |  |  | 63 | $name->{properties}{type} = 'unknown'; | 
| 586 | 28 |  |  |  |  | 32 | $name->{error} = 0; | 
| 587 | 28 |  |  |  |  | 33 | $name->{error_desc} = ''; | 
| 588 | 28 |  |  |  |  | 78 | $name->{warning} = 0; | 
| 589 | 28 |  |  |  |  | 32 | $name->{warning_desc} = ''; | 
| 590 |  |  |  |  |  |  |  | 
| 591 | 28 |  |  |  |  | 28 | $name->{original_input} = $input_string; | 
| 592 | 28 |  |  |  |  | 30 | $name->{input_string} = $input_string; | 
| 593 |  |  |  |  |  |  |  | 
| 594 | 28 |  |  |  |  | 49 | $name = _pre_parse($name); | 
| 595 | 28 | 50 |  |  |  | 56 | unless ( $name->{error} ) | 
| 596 |  |  |  |  |  |  | { | 
| 597 | 28 | 100 |  |  |  | 43 | if ( $name->{auto_clean} ) | 
| 598 |  |  |  |  |  |  | { | 
| 599 | 9 |  |  |  |  | 17 | $name->{input_string} = clean($name->{input_string}); | 
| 600 |  |  |  |  |  |  | } | 
| 601 | 28 |  |  |  |  | 41 | $name = _assemble($name); | 
| 602 | 28 |  |  |  |  | 44 | _validate($name); | 
| 603 |  |  |  |  |  |  | } | 
| 604 |  |  |  |  |  |  |  | 
| 605 | 28 |  |  |  |  | 60 | return($name->{error}); | 
| 606 |  |  |  |  |  |  | } | 
| 607 |  |  |  |  |  |  | #------------------------------------------------------------------------------- | 
| 608 |  |  |  |  |  |  | # Clean the input string. Can be called as a stand alone function. | 
| 609 |  |  |  |  |  |  |  | 
| 610 |  |  |  |  |  |  | sub clean | 
| 611 |  |  |  |  |  |  | { | 
| 612 | 10 |  |  | 10 | 1 | 528 | my ($input_string) = @_; | 
| 613 |  |  |  |  |  |  |  | 
| 614 |  |  |  |  |  |  | # remove illegal characters | 
| 615 | 10 |  |  |  |  | 19 | $input_string =~ s/[^A-Za-z\-\'\.&\/ ]//go; | 
| 616 |  |  |  |  |  |  |  | 
| 617 |  |  |  |  |  |  | # remove repeating spaces | 
| 618 | 10 |  |  |  |  | 16 | $input_string =~ s/  +/ /go ; | 
| 619 |  |  |  |  |  |  |  | 
| 620 |  |  |  |  |  |  | # remove any remaining leading or trailing space | 
| 621 | 10 |  |  |  |  | 14 | $input_string =~ s/^ //; | 
| 622 |  |  |  |  |  |  |  | 
| 623 | 10 |  |  |  |  | 18 | return($input_string); | 
| 624 |  |  |  |  |  |  | } | 
| 625 |  |  |  |  |  |  |  | 
| 626 |  |  |  |  |  |  | #------------------------------------------------------------------------------- | 
| 627 |  |  |  |  |  |  | # Given a name object, apply correct capitalisation to each component of a person's name. | 
| 628 |  |  |  |  |  |  | # Return all cased components in a hash. | 
| 629 |  |  |  |  |  |  | # Else return no value. | 
| 630 |  |  |  |  |  |  |  | 
| 631 |  |  |  |  |  |  |  | 
| 632 |  |  |  |  |  |  | sub components | 
| 633 |  |  |  |  |  |  | { | 
| 634 | 35 |  |  | 35 | 1 | 32 | my $name = shift; | 
| 635 |  |  |  |  |  |  |  | 
| 636 | 35 | 50 |  |  |  | 76 | if ( $name->{properties}{type} eq 'unknown'  ) | 
| 637 |  |  |  |  |  |  | { | 
| 638 | 0 |  |  |  |  | 0 | return; | 
| 639 |  |  |  |  |  |  | } | 
| 640 |  |  |  |  |  |  | else | 
| 641 |  |  |  |  |  |  | { | 
| 642 | 35 |  |  |  |  | 29 | my %orig_components = %{ $name->{comps} }; | 
|  | 35 |  |  |  |  | 190 |  | 
| 643 |  |  |  |  |  |  |  | 
| 644 | 35 |  |  |  |  | 48 | my ($current_key,%cased_components); | 
| 645 | 35 |  |  |  |  | 78 | foreach $current_key ( keys %orig_components ) | 
| 646 |  |  |  |  |  |  | { | 
| 647 | 490 |  |  |  |  | 329 | my $cased_value; | 
| 648 | 490 | 100 |  |  |  | 930 | if ( $current_key =~ /initials/ ) # initials_1, possibly initials_2 | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 649 |  |  |  |  |  |  | { | 
| 650 | 70 |  |  |  |  | 80 | $cased_value = uc($orig_components{$current_key}); | 
| 651 |  |  |  |  |  |  | } | 
| 652 |  |  |  |  |  |  | elsif ( $current_key =~ /surname|suffix/ ) | 
| 653 |  |  |  |  |  |  | { | 
| 654 | 105 |  |  |  |  | 191 | $cased_value = case_surname($orig_components{$current_key},$name->{lc_prefix}); | 
| 655 |  |  |  |  |  |  | } | 
| 656 |  |  |  |  |  |  | elsif ( $current_key eq 'type') | 
| 657 |  |  |  |  |  |  | { | 
| 658 | 0 |  |  |  |  | 0 | $cased_value = $orig_components{$current_key}; | 
| 659 |  |  |  |  |  |  | } | 
| 660 |  |  |  |  |  |  | else | 
| 661 |  |  |  |  |  |  | { | 
| 662 | 315 |  |  |  |  | 316 | $cased_value = _case_word($orig_components{$current_key}); | 
| 663 |  |  |  |  |  |  | } | 
| 664 |  |  |  |  |  |  |  | 
| 665 | 490 |  |  |  |  | 559 | $cased_components{$current_key} = $cased_value; | 
| 666 |  |  |  |  |  |  | } | 
| 667 | 35 |  |  |  |  | 333 | return(%cased_components); | 
| 668 |  |  |  |  |  |  | } | 
| 669 |  |  |  |  |  |  | } | 
| 670 |  |  |  |  |  |  |  | 
| 671 |  |  |  |  |  |  | #------------------------------------------------------------------------------- | 
| 672 |  |  |  |  |  |  | # Hash of of lists, indicating the order that name components are assembled in. | 
| 673 |  |  |  |  |  |  | # Each list element is itself the name of the key value in a name object. | 
| 674 |  |  |  |  |  |  | # Used by the case_all and case_all_reversed  methods. | 
| 675 |  |  |  |  |  |  | # These hashes are created here globally, as quite a large overhead is | 
| 676 |  |  |  |  |  |  | # imposed if the are created locally, each time the method is invoked | 
| 677 |  |  |  |  |  |  |  | 
| 678 |  |  |  |  |  |  | my %component_order= | 
| 679 |  |  |  |  |  |  | ( | 
| 680 |  |  |  |  |  |  | 'Mr_John_Smith_&_Ms_Mary_Jones' => ['title_1','given_name_1','surname_1','conjunction_1','title_2','given_name_2','surname_2'], | 
| 681 |  |  |  |  |  |  | 'Mr_A_Smith_&_Ms_B_Jones' => ['title_1','initials_1','surname_1','conjunction_1','title_2','initials_2','surname_2'], | 
| 682 |  |  |  |  |  |  | 'Mr_&_Ms_A_&_B_Smith'     => ['title_1','conjunction_1','title_2','initials_1','conjunction_2','initials_2','surname_1'], | 
| 683 |  |  |  |  |  |  | 'Mr_A_&_Ms_B_Smith'       => ['title_1','initials_1','conjunction_1','title_2','initials_2','surname_1'], | 
| 684 |  |  |  |  |  |  | 'Mr_&_Ms_A_Smith'         => ['title_1','conjunction_1','title_2','initials_1','surname_1'], | 
| 685 |  |  |  |  |  |  | 'Mr_A_&_B_Smith'          => ['title_1','initials_1','conjunction_1','initials_2','surname_1'], | 
| 686 |  |  |  |  |  |  | 'John_Smith_&Mary_Jones' => ['given_name_1','surname_1','conjunction_1','given_name_2','surname_2'], | 
| 687 |  |  |  |  |  |  | 'John_&_Mary_Smith'       => ['given_name_1','conjunction_1','given_name_2','surname_1'], | 
| 688 |  |  |  |  |  |  | 'A_Smith_&_B_Jones'       => ['initials_1','surname_1','conjunction_1','initials_2','surname_2'], | 
| 689 |  |  |  |  |  |  |  | 
| 690 |  |  |  |  |  |  | 'Mr_John_Adam_Smith'      => ['precursor','title_1','given_name_1','middle_name','surname_1','suffix'], | 
| 691 |  |  |  |  |  |  | 'Mr_John_A_Smith'         => ['precursor','title_1','given_name_1','initials_1','surname_1','suffix'], | 
| 692 |  |  |  |  |  |  | 'Mr_John_Smith'           => ['precursor','title_1','given_name_1','surname_1','suffix'], | 
| 693 |  |  |  |  |  |  | 'Mr_A_Smith'              => ['precursor','title_1','initials_1','surname_1','suffix'], | 
| 694 |  |  |  |  |  |  | 'John_Adam_Smith'         => ['precursor','given_name_1','middle_name','surname_1','suffix'], | 
| 695 |  |  |  |  |  |  | 'John_A_Smith'            => ['precursor','given_name_1','initials_1','surname_1','suffix'], | 
| 696 |  |  |  |  |  |  | 'J_Adam_Smith'            => ['precursor','initials_1','middle_name','surname_1','suffix'], | 
| 697 |  |  |  |  |  |  | 'John_Smith'              => ['precursor','given_name_1','surname_1','suffix'], | 
| 698 |  |  |  |  |  |  | 'A_Smith'                 => ['precursor','initials_1','surname_1','suffix'], | 
| 699 |  |  |  |  |  |  | 'John'                    => ['given_name_1'] | 
| 700 |  |  |  |  |  |  | ); | 
| 701 |  |  |  |  |  |  |  | 
| 702 |  |  |  |  |  |  |  | 
| 703 |  |  |  |  |  |  | # only include names with a single surname | 
| 704 |  |  |  |  |  |  | my %reverse_component_order= | 
| 705 |  |  |  |  |  |  | ( | 
| 706 |  |  |  |  |  |  | 'Mr_&_Ms_A_&_B_Smith'  => ['surname_1','title_1','conjunction_1','title_2','initials_1','conjunction_1','initials_2'], | 
| 707 |  |  |  |  |  |  | 'Mr_A_&_Ms_B_Smith'    => ['surname_1','title_1','initials_1','conjunction_1','title_2','initials_2'], | 
| 708 |  |  |  |  |  |  | 'Mr_&_Ms_A_Smith'      => ['surname_1','title_1','title_1','conjunction_1','title_2','initials_1'], | 
| 709 |  |  |  |  |  |  | 'Mr_A_&_B_Smith'       => ['surname_1','title_1','initials_1','conjunction_1','initials_2'], | 
| 710 |  |  |  |  |  |  | 'John_&_Mary_Smith'    => ['surname_1','given_name_1','conjunction_1','given_name_2'], | 
| 711 |  |  |  |  |  |  |  | 
| 712 |  |  |  |  |  |  | 'Mr_John_Adam_Smith'   => ['surname_1','title_1','given_name_1','middle_name','suffix'], | 
| 713 |  |  |  |  |  |  | 'Mr_John_A_Smith'      => ['surname_1','title_1','given_name_1','initials_1','suffix'], | 
| 714 |  |  |  |  |  |  | 'Mr_John_Smith'        => ['surname_1','title_1','given_name_1','suffix'], | 
| 715 |  |  |  |  |  |  | 'Mr_A_Smith'           => ['surname_1','title_1','initials_1','suffix'], | 
| 716 |  |  |  |  |  |  | 'John_Adam_Smith'      => ['surname_1','given_name_1','middle_name','suffix'], | 
| 717 |  |  |  |  |  |  | 'John_A_Smith'         => ['surname_1','given_name_1','initials_1','suffix'], | 
| 718 |  |  |  |  |  |  | 'J_Adam_Smith'         => ['surname_1','initials_1','middle_name','suffix'], | 
| 719 |  |  |  |  |  |  | 'John_Smith'           => ['surname_1','given_name_1','suffix'], | 
| 720 |  |  |  |  |  |  | 'A_Smith'              => ['surname_1','initials_1','suffix'] | 
| 721 |  |  |  |  |  |  | ); | 
| 722 |  |  |  |  |  |  |  | 
| 723 |  |  |  |  |  |  | #------------------------------------------------------------------------------- | 
| 724 |  |  |  |  |  |  | # Apply correct capitalisation to a person's entire name | 
| 725 |  |  |  |  |  |  | # If the name type is unknown, return undef | 
| 726 |  |  |  |  |  |  | # Else, return a string of all cased components in correct order | 
| 727 |  |  |  |  |  |  |  | 
| 728 |  |  |  |  |  |  | sub case_all | 
| 729 |  |  |  |  |  |  | { | 
| 730 | 2 |  |  | 2 | 1 | 6 | my $name = shift; | 
| 731 |  |  |  |  |  |  |  | 
| 732 | 2 |  |  |  |  | 4 | my @cased_name; | 
| 733 |  |  |  |  |  |  |  | 
| 734 | 2 | 50 |  |  |  | 5 | if ( $name->{properties}{type} eq 'unknown' ) | 
| 735 |  |  |  |  |  |  | { | 
| 736 | 0 |  |  |  |  | 0 | return undef; | 
| 737 |  |  |  |  |  |  | } | 
| 738 |  |  |  |  |  |  |  | 
| 739 | 2 | 50 |  |  |  | 9 | unless ( $component_order{$name->{properties}{type}} ) | 
| 740 |  |  |  |  |  |  | { | 
| 741 |  |  |  |  |  |  | # component order missing in array defined above | 
| 742 | 0 |  |  |  |  | 0 | warn "Component order not defined for: $name->{properties}{type}"; | 
| 743 | 0 |  |  |  |  | 0 | return; | 
| 744 |  |  |  |  |  |  | } | 
| 745 |  |  |  |  |  |  |  | 
| 746 | 2 |  |  |  |  | 4 | my %component_vals = $name->components; | 
| 747 | 2 |  |  |  |  | 3 | my @order = @{ $component_order{$name->{properties}{type}} }; | 
|  | 2 |  |  |  |  | 8 |  | 
| 748 |  |  |  |  |  |  |  | 
| 749 | 2 |  |  |  |  | 5 | foreach my $component_key ( @order ) | 
| 750 |  |  |  |  |  |  | { | 
| 751 |  |  |  |  |  |  | # As some components such as precursors are optional, they will appear | 
| 752 |  |  |  |  |  |  | # in the order array but may or may not have have a value, so only | 
| 753 |  |  |  |  |  |  | # process defined values | 
| 754 | 10 | 100 |  |  |  | 18 | if ( $component_vals{$component_key} ) | 
| 755 |  |  |  |  |  |  | { | 
| 756 | 6 |  |  |  |  | 8 | push(@cased_name,$component_vals{$component_key}); | 
| 757 |  |  |  |  |  |  | } | 
| 758 |  |  |  |  |  |  | } | 
| 759 | 2 | 100 |  |  |  | 5 | if ( $name->{comps}{non_matching} ) | 
| 760 |  |  |  |  |  |  | { | 
| 761 |  |  |  |  |  |  | # Despite errors, try to name case non-matching section. As the format | 
| 762 |  |  |  |  |  |  | # of this section is unknown, surname case will provide the best | 
| 763 |  |  |  |  |  |  | # approximation, but still fail on initials of more than 1 letter | 
| 764 | 1 |  |  |  |  | 4 | push(@cased_name,case_surname($name->{comps}{non_matching},$name->{lc_prefix})); | 
| 765 |  |  |  |  |  |  | } | 
| 766 |  |  |  |  |  |  |  | 
| 767 | 2 |  |  |  |  | 17 | return(join(' ',@cased_name)); | 
| 768 |  |  |  |  |  |  | } | 
| 769 |  |  |  |  |  |  |  | 
| 770 |  |  |  |  |  |  | #------------------------------------------------------------------------------- | 
| 771 |  |  |  |  |  |  | =head1 case_all_reversed | 
| 772 |  |  |  |  |  |  |  | 
| 773 |  |  |  |  |  |  | Apply correct capitalisation to a person's entire name and reverse the order | 
| 774 |  |  |  |  |  |  | so that surname is first, followed by the other components, such as: Smith, Mr John A | 
| 775 |  |  |  |  |  |  | Useful for creating a list of names that can be sorted by surname. | 
| 776 |  |  |  |  |  |  |  | 
| 777 |  |  |  |  |  |  | If name type is unknown , returns null | 
| 778 |  |  |  |  |  |  |  | 
| 779 |  |  |  |  |  |  | If the name type has a joint name, such as 'Mr_A_Smith_Ms_B_Jones', return null, | 
| 780 |  |  |  |  |  |  | as it is ambiguous which surname to place at the start of the string | 
| 781 |  |  |  |  |  |  |  | 
| 782 |  |  |  |  |  |  | Else, returns a string of all cased components in correct reversed order | 
| 783 |  |  |  |  |  |  |  | 
| 784 |  |  |  |  |  |  | =cut | 
| 785 |  |  |  |  |  |  |  | 
| 786 |  |  |  |  |  |  | sub case_all_reversed | 
| 787 |  |  |  |  |  |  | { | 
| 788 | 0 |  |  | 0 | 1 | 0 | my $name = shift; | 
| 789 |  |  |  |  |  |  |  | 
| 790 | 0 |  |  |  |  | 0 | my @cased_name_reversed; | 
| 791 |  |  |  |  |  |  |  | 
| 792 | 0 | 0 |  |  |  | 0 | unless ( $name->{properties}{type} eq 'unknown'  ) | 
| 793 |  |  |  |  |  |  | { | 
| 794 | 0 | 0 |  |  |  | 0 | unless ( $reverse_component_order{$name->{properties}{type} } ) | 
| 795 |  |  |  |  |  |  | { | 
| 796 |  |  |  |  |  |  | # this type of name should not be reversed, such as two surnames | 
| 797 | 0 |  |  |  |  | 0 | return; | 
| 798 |  |  |  |  |  |  | } | 
| 799 | 0 |  |  |  |  | 0 | my %component_vals = $name->components; | 
| 800 | 0 |  |  |  |  | 0 | my @reverse_order = @{ $reverse_component_order{$name->{properties}{type} } }; | 
|  | 0 |  |  |  |  | 0 |  | 
| 801 |  |  |  |  |  |  |  | 
| 802 | 0 |  |  |  |  | 0 | foreach my $component_key ( @reverse_order ) | 
| 803 |  |  |  |  |  |  | { | 
| 804 |  |  |  |  |  |  | # As some components such as precursors are optional, they will appear | 
| 805 |  |  |  |  |  |  | # in the order array but may or may not have have a value, so only | 
| 806 |  |  |  |  |  |  | # process defined values | 
| 807 |  |  |  |  |  |  |  | 
| 808 | 0 |  |  |  |  | 0 | my $component_value = $component_vals{$component_key}; | 
| 809 | 0 | 0 |  |  |  | 0 | if ( $component_value ) | 
| 810 |  |  |  |  |  |  | { | 
| 811 | 0 | 0 |  |  |  | 0 | if ($component_key eq 'surname_1') | 
| 812 |  |  |  |  |  |  | { | 
| 813 | 0 |  |  |  |  | 0 | $component_value .= ','; | 
| 814 |  |  |  |  |  |  | } | 
| 815 | 0 |  |  |  |  | 0 | push(@cased_name_reversed,$component_value); | 
| 816 |  |  |  |  |  |  | } | 
| 817 |  |  |  |  |  |  | } | 
| 818 |  |  |  |  |  |  | } | 
| 819 | 0 |  |  |  |  | 0 | return(join(' ',@cased_name_reversed)); | 
| 820 |  |  |  |  |  |  | } | 
| 821 |  |  |  |  |  |  | #------------------------------------------------------------------------------- | 
| 822 |  |  |  |  |  |  | # The user may specify their own preferred spelling for surnames. | 
| 823 |  |  |  |  |  |  | # These should be placed in a text file called surname_prefs.txt | 
| 824 |  |  |  |  |  |  | # in the same location as the module itself. | 
| 825 |  |  |  |  |  |  |  | 
| 826 |  |  |  |  |  |  | BEGIN | 
| 827 |  |  |  |  |  |  | { | 
| 828 |  |  |  |  |  |  | # Obtain the full path to NameParse module, defined in the %INC hash. | 
| 829 | 2 |  |  | 2 |  | 5 | my $prefs_file_location = $INC{"Lingua/EN/NameParse.pm"}; | 
| 830 |  |  |  |  |  |  | # Now substitute the name of the preferences file | 
| 831 | 2 |  |  |  |  | 17 | $prefs_file_location =~ s/NameParse\.pm$/surname_prefs.txt/; | 
| 832 |  |  |  |  |  |  |  | 
| 833 | 2 | 50 |  |  |  | 2989 | if ( open(PREFERENCES_FH,"<$prefs_file_location") ) | 
| 834 |  |  |  |  |  |  | { | 
| 835 | 0 |  |  |  |  | 0 | my @surnames = ; | 
| 836 | 0 |  |  |  |  | 0 | foreach my $name ( @surnames ) | 
| 837 |  |  |  |  |  |  | { | 
| 838 | 0 |  |  |  |  | 0 | chomp($name); | 
| 839 |  |  |  |  |  |  | # Build hash, lower case name is key for case insensitive | 
| 840 |  |  |  |  |  |  | # comparison, while value holds the actual capitalisation | 
| 841 | 0 |  |  |  |  | 0 | $Lingua::EN::surname_preferences{lc($name)} = $name; | 
| 842 |  |  |  |  |  |  | } | 
| 843 | 0 |  |  |  |  | 0 | close(PREFERENCES_FH); | 
| 844 |  |  |  |  |  |  | } | 
| 845 |  |  |  |  |  |  | } | 
| 846 |  |  |  |  |  |  | #------------------------------------------------------------------------------- | 
| 847 |  |  |  |  |  |  | # Apply correct capitalisation to a person's surname. Can be called as a | 
| 848 |  |  |  |  |  |  | # stand alone function. | 
| 849 |  |  |  |  |  |  |  | 
| 850 |  |  |  |  |  |  | sub case_surname | 
| 851 |  |  |  |  |  |  | { | 
| 852 | 108 |  |  | 108 | 1 | 366 | my ($surname,$lc_prefix) = @_; | 
| 853 |  |  |  |  |  |  |  | 
| 854 | 108 | 100 |  |  |  | 159 | unless ($surname) | 
| 855 |  |  |  |  |  |  | { | 
| 856 | 65 |  |  |  |  | 94 | return(''); | 
| 857 |  |  |  |  |  |  | } | 
| 858 |  |  |  |  |  |  |  | 
| 859 |  |  |  |  |  |  | # If the user has specified a preferred capitalisation for this | 
| 860 |  |  |  |  |  |  | # surname in the surname_prefs.txt, it should be returned now. | 
| 861 | 43 | 50 |  |  |  | 91 | if ($Lingua::EN::surname_preferences{lc($surname)} ) | 
| 862 |  |  |  |  |  |  | { | 
| 863 | 0 |  |  |  |  | 0 | return($Lingua::EN::surname_preferences{lc($surname)}); | 
| 864 |  |  |  |  |  |  | } | 
| 865 |  |  |  |  |  |  |  | 
| 866 |  |  |  |  |  |  | # Lowercase everything | 
| 867 | 43 |  |  |  |  | 40 | $surname = lc($surname); | 
| 868 |  |  |  |  |  |  |  | 
| 869 |  |  |  |  |  |  | # Now uppercase first letter of every word. By checking on word boundaries, | 
| 870 |  |  |  |  |  |  | # we will account for apostrophes (D'Angelo) and hyphenated names | 
| 871 | 43 |  |  |  |  | 209 | $surname =~ s/\b(\w)/\u$1/g; | 
| 872 |  |  |  |  |  |  |  | 
| 873 |  |  |  |  |  |  | # Name case Macs and Mcs | 
| 874 |  |  |  |  |  |  | # Exclude names with 1-2 letters after prefix like Mack, Macky, Mace | 
| 875 |  |  |  |  |  |  | # Exclude names ending in a,c,i,o,z or j, typically Polish or Italian | 
| 876 |  |  |  |  |  |  |  | 
| 877 | 43 | 100 |  |  |  | 134 | if ( $surname =~ /\bMac[a-z]{2,}[^a|c|i|o|z|j]\b/i  ) | 
|  |  | 50 |  |  |  |  |  | 
| 878 |  |  |  |  |  |  | { | 
| 879 | 4 |  |  |  |  | 15 | $surname =~ s/\b(Mac)([a-z]+)/$1\u$2/ig; | 
| 880 |  |  |  |  |  |  |  | 
| 881 |  |  |  |  |  |  | # Now correct for "Mac" exceptions | 
| 882 | 4 |  |  |  |  | 7 | $surname =~ s/MacHin/Machin/; | 
| 883 | 4 |  |  |  |  | 9 | $surname =~ s/MacHlin/Machlin/; | 
| 884 | 4 |  |  |  |  | 3 | $surname =~ s/MacHar/Machar/; | 
| 885 | 4 |  |  |  |  | 7 | $surname =~ s/MacKle/Mackle/; | 
| 886 | 4 |  |  |  |  | 4 | $surname =~ s/MacKlin/Macklin/; | 
| 887 | 4 |  |  |  |  | 3 | $surname =~ s/MacKie/Mackie/; | 
| 888 |  |  |  |  |  |  |  | 
| 889 |  |  |  |  |  |  | # Portuguese | 
| 890 | 4 |  |  |  |  | 4 | $surname =~ s/MacHado/Machado/; | 
| 891 |  |  |  |  |  |  |  | 
| 892 |  |  |  |  |  |  | # Lithuanian | 
| 893 | 4 |  |  |  |  | 4 | $surname =~ s/MacEvicius/Macevicius/; | 
| 894 | 4 |  |  |  |  | 3 | $surname =~ s/MacIulis/Maciulis/; | 
| 895 | 4 |  |  |  |  | 4 | $surname =~ s/MacIas/Macias/; | 
| 896 |  |  |  |  |  |  | } | 
| 897 |  |  |  |  |  |  | elsif ( $surname =~ /\bMc/i ) | 
| 898 |  |  |  |  |  |  | { | 
| 899 | 0 |  |  |  |  | 0 | $surname =~ s/\b(Mc)([a-z]+)/$1\u$2/ig; | 
| 900 |  |  |  |  |  |  | } | 
| 901 |  |  |  |  |  |  | # Exceptions (only 'Mac' name ending in 'o' ?) | 
| 902 | 43 |  |  |  |  | 38 | $surname =~ s/Macmurdo/MacMurdo/; | 
| 903 |  |  |  |  |  |  |  | 
| 904 |  |  |  |  |  |  |  | 
| 905 | 43 | 100 |  |  |  | 61 | if ( $lc_prefix ) | 
| 906 |  |  |  |  |  |  | { | 
| 907 |  |  |  |  |  |  | # Lowercase first letter of every word in prefix. The trailing space | 
| 908 |  |  |  |  |  |  | # prevents the surname from being altered. Note that spellings like | 
| 909 |  |  |  |  |  |  | # d'Angelo are not accounted for. | 
| 910 | 1 |  |  |  |  | 10 | $surname =~ s/\b(\w+ )/\l$1/g; | 
| 911 |  |  |  |  |  |  | } | 
| 912 |  |  |  |  |  |  |  | 
| 913 |  |  |  |  |  |  | # Correct for possessives such as "John's" or "Australia's". Although this | 
| 914 |  |  |  |  |  |  | # should not occur in a person's name, they are valid for proper names. | 
| 915 |  |  |  |  |  |  | # As this subroutine may be used to capitalise words other than names, | 
| 916 |  |  |  |  |  |  | # we may need to account for this case. Note that the 's' must be at the | 
| 917 |  |  |  |  |  |  | # end of the string | 
| 918 | 43 |  |  |  |  | 63 | $surname =~ s/(\w+)'S(\s+)/$1's$2/; | 
| 919 | 43 |  |  |  |  | 39 | $surname =~ s/(\w+)'S$/$1's/; | 
| 920 |  |  |  |  |  |  |  | 
| 921 |  |  |  |  |  |  | # Correct for roman numerals, excluding single letter cases I,V and X, | 
| 922 |  |  |  |  |  |  | # which will work with the above code | 
| 923 | 43 |  |  |  |  | 75 | $surname =~ s/\b(I{2,3})\b/\U$1/i;  # 2nd, 3rd | 
| 924 | 43 |  |  |  |  | 46 | $surname =~ s/\b(IV)\b/\U$1/i;      # 4th | 
| 925 | 43 |  |  |  |  | 42 | $surname =~ s/\b(VI{1,3})\b/\U$1/i; # 6th, 7th, 8th | 
| 926 | 43 |  |  |  |  | 46 | $surname =~ s/\b(IX)\b/\U$1/i;      # 9th | 
| 927 | 43 |  |  |  |  | 44 | $surname =~ s/\b(XI{1,3})\b/\U$1/i; # 11th, 12th, 13th | 
| 928 |  |  |  |  |  |  |  | 
| 929 | 43 |  |  |  |  | 70 | return($surname); | 
| 930 |  |  |  |  |  |  | } | 
| 931 |  |  |  |  |  |  | #------------------------------------------------------------------------------- | 
| 932 |  |  |  |  |  |  | # Create a personalised greeting from one or two person's names | 
| 933 |  |  |  |  |  |  | # Returns the salutation as a string, such as "Dear Mr Smith", or "Dear Sue" | 
| 934 |  |  |  |  |  |  |  | 
| 935 |  |  |  |  |  |  | sub salutation | 
| 936 |  |  |  |  |  |  | { | 
| 937 | 3 |  |  | 3 | 1 | 4 | my $name = shift; | 
| 938 | 3 |  |  |  |  | 4 | my %args = @_; | 
| 939 |  |  |  |  |  |  |  | 
| 940 | 3 |  |  |  |  | 5 | my $salutation = 'Dear'; | 
| 941 | 3 |  |  |  |  | 26 | my $sal_default = 'Friend'; | 
| 942 | 3 |  |  |  |  | 4 | my $sal_type = 'title_plus_surname'; | 
| 943 |  |  |  |  |  |  |  | 
| 944 |  |  |  |  |  |  | # Check to see if we should override defualts with any user specified preferences | 
| 945 | 3 | 100 |  |  |  | 6 | if ( %args ) | 
| 946 |  |  |  |  |  |  | { | 
| 947 | 2 |  |  |  |  | 4 | foreach my $current_key (keys %args) | 
| 948 |  |  |  |  |  |  | { | 
| 949 | 2 | 50 |  |  |  | 5 | $current_key eq 'salutation' and $salutation = $args{$current_key}; | 
| 950 | 2 | 50 |  |  |  | 5 | $current_key eq 'sal_default' and $sal_default = $args{$current_key}; | 
| 951 | 2 | 50 |  |  |  | 7 | $current_key eq 'sal_type' and $sal_type = $args{$current_key}; | 
| 952 |  |  |  |  |  |  | } | 
| 953 |  |  |  |  |  |  | } | 
| 954 |  |  |  |  |  |  |  | 
| 955 |  |  |  |  |  |  |  | 
| 956 | 3 |  |  |  |  | 4 | my @greeting; | 
| 957 | 3 |  |  |  |  | 4 | push(@greeting,$salutation); | 
| 958 |  |  |  |  |  |  |  | 
| 959 |  |  |  |  |  |  | # Personalised salutations cannot be created for Estates or people | 
| 960 |  |  |  |  |  |  | # without some title | 
| 961 | 3 | 50 | 33 |  |  | 13 | if | 
|  |  |  | 33 |  |  |  |  | 
| 962 |  |  |  |  |  |  | ( | 
| 963 |  |  |  |  |  |  | $name->{error} or | 
| 964 |  |  |  |  |  |  | ( $name->{comps}{precursor} and  $name->{comps}{precursor} =~ /ESTATE/) | 
| 965 |  |  |  |  |  |  | ) | 
| 966 |  |  |  |  |  |  | { | 
| 967 |  |  |  |  |  |  | # Despite an error, the presence of a conjunction probably | 
| 968 |  |  |  |  |  |  | # means we are dealing with 2 or more people. | 
| 969 |  |  |  |  |  |  | # For example Mr AB Smith & John Jones | 
| 970 | 0 | 0 |  |  |  | 0 | if ( $name->{input_string} =~ / (AND|&) / ) | 
| 971 |  |  |  |  |  |  | { | 
| 972 | 0 |  |  |  |  | 0 | $sal_default .= 's'; | 
| 973 |  |  |  |  |  |  | } | 
| 974 | 0 |  |  |  |  | 0 | push(@greeting,$sal_default); | 
| 975 |  |  |  |  |  |  | } | 
| 976 |  |  |  |  |  |  | else | 
| 977 |  |  |  |  |  |  | { | 
| 978 | 3 |  |  |  |  | 7 | my %component_vals = $name->components; | 
| 979 |  |  |  |  |  |  |  | 
| 980 | 3 | 100 |  |  |  | 14 | if ( $sal_type eq 'given_name') | 
|  |  | 50 |  |  |  |  |  | 
| 981 |  |  |  |  |  |  | { | 
| 982 | 1 | 50 |  |  |  | 4 | if ( $component_vals{'given_name_1'} ) | 
| 983 |  |  |  |  |  |  | { | 
| 984 | 1 |  |  |  |  | 2 | push(@greeting,$component_vals{'given_name_1'}); | 
| 985 | 1 | 50 |  |  |  | 5 | if ( $component_vals{'given_name_2'} ) | 
| 986 |  |  |  |  |  |  | { | 
| 987 | 0 |  |  |  |  | 0 | push(@greeting,$component_vals{'conjunction_1'}); | 
| 988 | 0 |  |  |  |  | 0 | push(@greeting,$component_vals{'given_name_2'}); | 
| 989 |  |  |  |  |  |  | } | 
| 990 |  |  |  |  |  |  | } | 
| 991 |  |  |  |  |  |  | else | 
| 992 |  |  |  |  |  |  | { | 
| 993 |  |  |  |  |  |  | # No given name such as 'A_Smith','J_Adam_Smith','Mr_A_Smith' | 
| 994 |  |  |  |  |  |  | # Must use default | 
| 995 | 0 |  |  |  |  | 0 | push(@greeting,$sal_default); | 
| 996 |  |  |  |  |  |  | } | 
| 997 |  |  |  |  |  |  | } | 
| 998 |  |  |  |  |  |  | elsif ( $sal_type eq 'title_plus_surname' ) | 
| 999 |  |  |  |  |  |  | { | 
| 1000 | 2 | 50 |  |  |  | 6 | if ( $name->{properties}{number} == 1 ) | 
|  |  | 0 |  |  |  |  |  | 
| 1001 |  |  |  |  |  |  | { | 
| 1002 | 2 | 100 |  |  |  | 5 | if ( $component_vals{'title_1'} ) | 
| 1003 |  |  |  |  |  |  | { | 
| 1004 | 1 |  |  |  |  | 2 | push(@greeting,$component_vals{'title_1'}); | 
| 1005 | 1 |  |  |  |  | 3 | push(@greeting,$component_vals{'surname_1'}); | 
| 1006 |  |  |  |  |  |  | } | 
| 1007 |  |  |  |  |  |  | else | 
| 1008 |  |  |  |  |  |  | { | 
| 1009 |  |  |  |  |  |  | # No title such as 'A_Smith','J_Adam_Smith', so must use default | 
| 1010 | 1 |  |  |  |  | 3 | push(@greeting,$sal_default); | 
| 1011 |  |  |  |  |  |  | } | 
| 1012 |  |  |  |  |  |  | } | 
| 1013 |  |  |  |  |  |  | elsif ( $name->{properties}{number} == 2 ) | 
| 1014 |  |  |  |  |  |  | { | 
| 1015 |  |  |  |  |  |  | # a joint name | 
| 1016 |  |  |  |  |  |  |  | 
| 1017 | 0 |  |  |  |  | 0 | my $type = $name->{properties}{type}; | 
| 1018 | 0 | 0 | 0 |  |  | 0 | if ( $type eq 'Mr_&Ms_A_Smith' or $type eq 'Mr_A_&Ms_B_Smith' or $type eq 'Mr_&Ms_A_&B_Smith' ) | 
|  |  | 0 | 0 |  |  |  |  | 
|  |  |  | 0 |  |  |  |  | 
| 1019 |  |  |  |  |  |  | { | 
| 1020 |  |  |  |  |  |  | # common surname | 
| 1021 | 0 |  |  |  |  | 0 | push(@greeting,$component_vals{'title_1'}); | 
| 1022 | 0 |  |  |  |  | 0 | push(@greeting,$component_vals{'conjunction_1'}); | 
| 1023 | 0 |  |  |  |  | 0 | push(@greeting,$component_vals{'title_2'}); | 
| 1024 | 0 |  |  |  |  | 0 | push(@greeting,$component_vals{'surname_1'}); | 
| 1025 |  |  |  |  |  |  |  | 
| 1026 |  |  |  |  |  |  | } | 
| 1027 |  |  |  |  |  |  | elsif ( $type eq 'Mr_A_Smith_&Ms_B_Jones' or $type eq 'Mr_John_Smith_&Ms_Mary_Jones' ) | 
| 1028 |  |  |  |  |  |  | { | 
| 1029 | 0 |  |  |  |  | 0 | push(@greeting,$component_vals{'title_1'}); | 
| 1030 | 0 |  |  |  |  | 0 | push(@greeting,$component_vals{'surname_1'}); | 
| 1031 | 0 |  |  |  |  | 0 | push(@greeting,$component_vals{'conjunction_1'}); | 
| 1032 | 0 |  |  |  |  | 0 | push(@greeting,$component_vals{'title_2'}); | 
| 1033 | 0 |  |  |  |  | 0 | push(@greeting,$component_vals{'surname_2'}); | 
| 1034 |  |  |  |  |  |  | } | 
| 1035 |  |  |  |  |  |  | else | 
| 1036 |  |  |  |  |  |  | { | 
| 1037 |  |  |  |  |  |  | # No title such as A_Smith_&B_Jones', 'John_Smith_&Mary_Jones' | 
| 1038 |  |  |  |  |  |  | # Must use default | 
| 1039 | 0 |  |  |  |  | 0 | push(@greeting,$sal_default); | 
| 1040 |  |  |  |  |  |  | } | 
| 1041 |  |  |  |  |  |  | } | 
| 1042 |  |  |  |  |  |  | } | 
| 1043 |  |  |  |  |  |  | else | 
| 1044 |  |  |  |  |  |  | { | 
| 1045 | 0 |  |  |  |  | 0 | warn "Invalid sal_type : ", $sal_type; | 
| 1046 | 0 |  |  |  |  | 0 | push(@greeting,$sal_default); | 
| 1047 |  |  |  |  |  |  | } | 
| 1048 |  |  |  |  |  |  | } | 
| 1049 | 3 |  |  |  |  | 19 | return(join(' ',@greeting)); | 
| 1050 |  |  |  |  |  |  | } | 
| 1051 |  |  |  |  |  |  | #------------------------------------------------------------------------------- | 
| 1052 |  |  |  |  |  |  | # Return all name properties as a hash | 
| 1053 |  |  |  |  |  |  |  | 
| 1054 |  |  |  |  |  |  | sub properties | 
| 1055 |  |  |  |  |  |  | { | 
| 1056 | 21 |  |  | 21 | 1 | 69 | my $name = shift; | 
| 1057 | 21 |  |  |  |  | 15 | return(%{ $name->{properties} }); | 
|  | 21 |  |  |  |  | 89 |  | 
| 1058 |  |  |  |  |  |  | } | 
| 1059 |  |  |  |  |  |  |  | 
| 1060 |  |  |  |  |  |  | #------------------------------------------------------------------------------- | 
| 1061 |  |  |  |  |  |  | # Create a text report to standard output listing | 
| 1062 |  |  |  |  |  |  | # - the input string, | 
| 1063 |  |  |  |  |  |  | # - the name of each defined component, if it exists | 
| 1064 |  |  |  |  |  |  | # - any non matching component | 
| 1065 |  |  |  |  |  |  |  | 
| 1066 |  |  |  |  |  |  | sub report | 
| 1067 |  |  |  |  |  |  | { | 
| 1068 | 0 |  |  | 0 | 1 | 0 | my $name = shift; | 
| 1069 |  |  |  |  |  |  |  | 
| 1070 | 0 |  |  |  |  | 0 | my %props = $name->properties; | 
| 1071 |  |  |  |  |  |  |  | 
| 1072 | 0 |  |  |  |  | 0 | my $fmt = "%-20.20s : %s\n"; | 
| 1073 |  |  |  |  |  |  |  | 
| 1074 | 0 |  |  |  |  | 0 | printf($fmt,"Original Input",$name->{original_input}); | 
| 1075 | 0 |  |  |  |  | 0 | printf($fmt,"Cleaned Input",$name->{input_string}); | 
| 1076 | 0 |  |  |  |  | 0 | printf($fmt,"Case all",$name->case_all); | 
| 1077 | 0 |  |  |  |  | 0 | printf($fmt,"Case all reversed",$name->case_all_reversed); | 
| 1078 | 0 |  |  |  |  | 0 | printf($fmt,"Salutation",$name->salutation(salutation => 'Dear',sal_default => 'Friend', sal_type => 'title_plus_surname')); | 
| 1079 | 0 |  |  |  |  | 0 | printf($fmt,"Type", $props{type}); | 
| 1080 | 0 |  |  |  |  | 0 | printf($fmt,"Number", $props{number}); | 
| 1081 | 0 |  |  |  |  | 0 | printf($fmt,"Parsing Error", $name->{error}); | 
| 1082 | 0 |  |  |  |  | 0 | printf($fmt,"Error description : ", $name->{error_desc}); | 
| 1083 | 0 |  |  |  |  | 0 | printf($fmt,"Parsing Warning", $name->{warning}); | 
| 1084 | 0 |  |  |  |  | 0 | printf($fmt,"Warning description", $name->{warning_desc}); | 
| 1085 |  |  |  |  |  |  |  | 
| 1086 |  |  |  |  |  |  |  | 
| 1087 | 0 | 0 |  |  |  | 0 | unless ($props{type} eq 'unknown') | 
| 1088 |  |  |  |  |  |  | { | 
| 1089 | 0 |  |  |  |  | 0 | my %comps = $name->components; | 
| 1090 | 0 | 0 |  |  |  | 0 | if ( %comps ) | 
| 1091 |  |  |  |  |  |  | { | 
| 1092 | 0 |  |  |  |  | 0 | print("\nCOMPONENTS\n"); | 
| 1093 | 0 |  |  |  |  | 0 | foreach my $value ( sort keys %comps) | 
| 1094 |  |  |  |  |  |  | { | 
| 1095 | 0 | 0 | 0 |  |  | 0 | if ($value and $comps{$value}) | 
| 1096 |  |  |  |  |  |  | { | 
| 1097 | 0 |  |  |  |  | 0 | printf($fmt,$value,$comps{$value}); | 
| 1098 |  |  |  |  |  |  | } | 
| 1099 |  |  |  |  |  |  | } | 
| 1100 |  |  |  |  |  |  | } | 
| 1101 |  |  |  |  |  |  | } | 
| 1102 |  |  |  |  |  |  | } | 
| 1103 |  |  |  |  |  |  | #------------------------------------------------------------------------------- | 
| 1104 |  |  |  |  |  |  |  | 
| 1105 |  |  |  |  |  |  | # PRIVATE METHODS | 
| 1106 |  |  |  |  |  |  |  | 
| 1107 |  |  |  |  |  |  | #------------------------------------------------------------------------------- | 
| 1108 |  |  |  |  |  |  |  | 
| 1109 |  |  |  |  |  |  | sub _pre_parse | 
| 1110 |  |  |  |  |  |  | { | 
| 1111 | 28 |  |  | 28 |  | 28 | my $name = shift; | 
| 1112 |  |  |  |  |  |  |  | 
| 1113 |  |  |  |  |  |  | # strip all full stops | 
| 1114 | 28 |  |  |  |  | 50 | $name->{input_string}  =~ s/\.//g; | 
| 1115 |  |  |  |  |  |  |  | 
| 1116 |  |  |  |  |  |  | # Fold all text to upper case, as these are used in all regular expressions withun thr grammar tree | 
| 1117 | 28 |  |  |  |  | 39 | $name->{input_string}  = uc($name->{input_string}); | 
| 1118 |  |  |  |  |  |  |  | 
| 1119 |  |  |  |  |  |  | # Check that common reserved word (as found in company names) do not appear | 
| 1120 | 28 | 50 |  |  |  | 197 | if ( $name->{input_string} =~ | 
| 1121 |  |  |  |  |  |  | /\BPTY LTD$|\BLTD$|\BPLC$|ASSOCIATION|DEPARTMENT|NATIONAL|SOCIETY/ ) | 
| 1122 |  |  |  |  |  |  | { | 
| 1123 | 0 |  |  |  |  | 0 | $name->{error} = 1; | 
| 1124 | 0 |  |  |  |  | 0 | $name->{comps}{non_matching} = $name->{input_string}; | 
| 1125 | 0 |  |  |  |  | 0 | $name->{error_desc} = 'Reserved words found in name'; | 
| 1126 |  |  |  |  |  |  | } | 
| 1127 |  |  |  |  |  |  |  | 
| 1128 |  |  |  |  |  |  | # For the case of a single name such as 'Voltaire' we need to add a trailing space | 
| 1129 |  |  |  |  |  |  | # to the input string. This is because the grammar tree expects a terminator (the space) | 
| 1130 |  |  |  |  |  |  | # optionally followed by other productions or non matching text | 
| 1131 | 28 |  |  |  |  | 33 | $name->{input_string} .= ' '; | 
| 1132 | 28 | 50 |  |  |  | 148 | if ( $name->{input_string} =~ /^[A-Z]{2,}(\-)?[A-Z]{0,}$/ ) | 
| 1133 |  |  |  |  |  |  | { | 
| 1134 | 0 |  |  |  |  | 0 | $name->{input_string} .= ' '; | 
| 1135 |  |  |  |  |  |  | } | 
| 1136 | 28 |  |  |  |  | 40 | return($name); | 
| 1137 |  |  |  |  |  |  |  | 
| 1138 |  |  |  |  |  |  | } | 
| 1139 |  |  |  |  |  |  | #------------------------------------------------------------------------------- | 
| 1140 |  |  |  |  |  |  | # Initialise all components to empty string. Assemble hashes of components | 
| 1141 |  |  |  |  |  |  | # and properties as part of the name object | 
| 1142 |  |  |  |  |  |  | # | 
| 1143 |  |  |  |  |  |  | sub _assemble | 
| 1144 |  |  |  |  |  |  | { | 
| 1145 | 28 |  |  | 28 |  | 22 | my $name = shift; | 
| 1146 |  |  |  |  |  |  |  | 
| 1147 |  |  |  |  |  |  | # Use Parse::RecDescent to do the parsing. 'full_name' is a label for the complete grammar tree | 
| 1148 |  |  |  |  |  |  | # defined in Lingua::EN::NameParse::Grammar | 
| 1149 | 28 |  |  |  |  | 163 | my $parsed_name = $name->{parse}->full_name($name->{input_string}); | 
| 1150 |  |  |  |  |  |  |  | 
| 1151 |  |  |  |  |  |  | # Place components into a separate hash, so they can be easily returned | 
| 1152 |  |  |  |  |  |  | # for the user to inspect and modify. | 
| 1153 |  |  |  |  |  |  |  | 
| 1154 | 28 |  |  |  |  | 171857 | my @all_comps = qw(precursor title_1 given_name_1 initials_1 middle_name surname_1 conjunction_1 | 
| 1155 |  |  |  |  |  |  | title_2 given_name_2 initials_2 surname_2 conjunction_2 suffix non_matching); | 
| 1156 |  |  |  |  |  |  |  | 
| 1157 | 28 |  |  |  |  | 48 | foreach my $comp (@all_comps) | 
| 1158 |  |  |  |  |  |  | { | 
| 1159 |  |  |  |  |  |  | # set all components to empty string, as any of them could be accessed, even if they don't exist | 
| 1160 | 392 |  |  |  |  | 380 | $name->{comps}{$comp} = ''; | 
| 1161 | 392 | 100 |  |  |  | 516 | if (defined($parsed_name->{$comp})) | 
| 1162 |  |  |  |  |  |  | { | 
| 1163 |  |  |  |  |  |  | # Copy over existing components. | 
| 1164 | 135 |  |  |  |  | 156 | $name->{comps}{$comp} = _trim_space($parsed_name->{$comp}); | 
| 1165 |  |  |  |  |  |  | } | 
| 1166 |  |  |  |  |  |  | } | 
| 1167 |  |  |  |  |  |  |  | 
| 1168 | 28 |  |  |  |  | 41 | $name->{properties}{number} = 0; | 
| 1169 | 28 |  |  |  |  | 31 | $name->{properties}{number} = $parsed_name->{number}; | 
| 1170 | 28 |  |  |  |  | 35 | $name->{properties}{type}   = $parsed_name->{type}; | 
| 1171 |  |  |  |  |  |  |  | 
| 1172 | 28 |  |  |  |  | 73 | return($name); | 
| 1173 |  |  |  |  |  |  | } | 
| 1174 |  |  |  |  |  |  | #------------------------------------------------------------------------------- | 
| 1175 |  |  |  |  |  |  | # For correct matching, the grammar of each component must include the trailing space that separates it | 
| 1176 |  |  |  |  |  |  | # from any following word. This should now be removed from the components, and will be restored by the | 
| 1177 |  |  |  |  |  |  | # case_all and salutation methods, if called. | 
| 1178 |  |  |  |  |  |  |  | 
| 1179 |  |  |  |  |  |  | sub _trim_space | 
| 1180 |  |  |  |  |  |  | { | 
| 1181 | 135 |  |  | 135 |  | 112 | my ($string) = @_; | 
| 1182 | 135 | 100 |  |  |  | 157 | if ($string) | 
| 1183 |  |  |  |  |  |  | { | 
| 1184 | 109 |  |  |  |  | 213 | $string =~ s/ $//; | 
| 1185 |  |  |  |  |  |  | } | 
| 1186 | 135 |  |  |  |  | 191 | return($string); | 
| 1187 |  |  |  |  |  |  | } | 
| 1188 |  |  |  |  |  |  | #------------------------------------------------------------------------------- | 
| 1189 |  |  |  |  |  |  | # Check if any name components have illegal characters, or do not have the | 
| 1190 |  |  |  |  |  |  | # correct syntax for a valid name. | 
| 1191 |  |  |  |  |  |  |  | 
| 1192 |  |  |  |  |  |  |  | 
| 1193 |  |  |  |  |  |  | sub _validate | 
| 1194 |  |  |  |  |  |  | { | 
| 1195 | 28 |  |  | 28 |  | 26 | my $name = shift; | 
| 1196 | 28 |  |  |  |  | 67 | my %comps = $name->components; | 
| 1197 |  |  |  |  |  |  |  | 
| 1198 | 28 | 100 |  |  |  | 112 | if ( $comps{non_matching} ) | 
|  |  | 50 |  |  |  |  |  | 
| 1199 |  |  |  |  |  |  | { | 
| 1200 | 2 |  |  |  |  | 3 | $name->{warning} = 1; | 
| 1201 | 2 |  |  |  |  | 7 | $name->{warning_desc} .= ";non_matching text found : $comps{non_matching}"; | 
| 1202 |  |  |  |  |  |  | } | 
| 1203 |  |  |  |  |  |  | elsif ( $name->{input_string} =~ /[^A-Za-z\-\'\.,&\/ ]/ ) | 
| 1204 |  |  |  |  |  |  | { | 
| 1205 |  |  |  |  |  |  | # illegal characters found | 
| 1206 | 0 |  |  |  |  | 0 | $name->{error} = 1; | 
| 1207 | 0 |  |  |  |  | 0 | $name->{error_desc} = 'illegal characters found'; | 
| 1208 |  |  |  |  |  |  | } | 
| 1209 |  |  |  |  |  |  |  | 
| 1210 |  |  |  |  |  |  |  | 
| 1211 | 28 | 50 |  |  |  | 44 | if ( not _valid_name($comps{given_name_1}) ) | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 1212 |  |  |  |  |  |  | { | 
| 1213 | 0 |  |  |  |  | 0 | $name->{warning} = 1; | 
| 1214 | 0 |  |  |  |  | 0 | $name->{warning_desc} .= ";no vowel sound in given_name_1 : $comps{given_name_1}"; | 
| 1215 |  |  |  |  |  |  | } | 
| 1216 |  |  |  |  |  |  | elsif ( not _valid_name($comps{middle_name}) ) | 
| 1217 |  |  |  |  |  |  | { | 
| 1218 | 0 |  |  |  |  | 0 | $name->{warning} = 1; | 
| 1219 | 0 |  |  |  |  | 0 | $name->{warning_desc} .= ";no vowel sound in middle_name : $comps{middle_name}"; | 
| 1220 |  |  |  |  |  |  | } | 
| 1221 |  |  |  |  |  |  |  | 
| 1222 |  |  |  |  |  |  | elsif ( not _valid_name($comps{surname_1}) ) | 
| 1223 |  |  |  |  |  |  | { | 
| 1224 | 0 |  |  |  |  | 0 | $name->{warning} = 1; | 
| 1225 | 0 |  |  |  |  | 0 | $name->{warning_desc} .= ";no vowel sound in surname_1 : $comps{surname_1}"; | 
| 1226 |  |  |  |  |  |  |  | 
| 1227 |  |  |  |  |  |  | } | 
| 1228 |  |  |  |  |  |  | elsif ( not _valid_name($comps{surname_2}) ) | 
| 1229 |  |  |  |  |  |  | { | 
| 1230 | 0 |  |  |  |  | 0 | $name->{warning} = 1; | 
| 1231 | 0 |  |  |  |  | 0 | $name->{warning_desc} .= ";no vowel sound in surname_2 : $comps{surname_2}"; | 
| 1232 |  |  |  |  |  |  | } | 
| 1233 |  |  |  |  |  |  | } | 
| 1234 |  |  |  |  |  |  | #------------------------------------------------------------------------------- | 
| 1235 |  |  |  |  |  |  | # If the name has an assigned value, check that it contains a vowel sound, | 
| 1236 |  |  |  |  |  |  | # or matches the exceptions to this rule. | 
| 1237 |  |  |  |  |  |  | # Returns 1 if name is valid, otherwise 0 | 
| 1238 |  |  |  |  |  |  |  | 
| 1239 |  |  |  |  |  |  | sub _valid_name | 
| 1240 |  |  |  |  |  |  | { | 
| 1241 | 112 |  |  | 112 |  | 86 | my ($name) = @_; | 
| 1242 | 112 | 100 | 33 |  |  | 303 | if ( not $name ) | 
|  |  | 50 |  |  |  |  |  | 
| 1243 |  |  |  |  |  |  | { | 
| 1244 | 65 |  |  |  |  | 150 | return(1); | 
| 1245 |  |  |  |  |  |  | } | 
| 1246 |  |  |  |  |  |  | # Names should have a vowel sound, | 
| 1247 |  |  |  |  |  |  | # valid exceptions are Ng, Tsz,Md, Cng,Hng,Chng etc | 
| 1248 |  |  |  |  |  |  | elsif ( $name and $name =~ /[AEIOUYJ]|^(NG|TSZ|MD|(C?H|[PTS])NG)$/i ) | 
| 1249 |  |  |  |  |  |  | { | 
| 1250 | 47 |  |  |  |  | 106 | return(1); | 
| 1251 |  |  |  |  |  |  | } | 
| 1252 |  |  |  |  |  |  | else | 
| 1253 |  |  |  |  |  |  | { | 
| 1254 | 0 |  |  |  |  | 0 | return(0); | 
| 1255 |  |  |  |  |  |  | } | 
| 1256 |  |  |  |  |  |  | } | 
| 1257 |  |  |  |  |  |  | #------------------------------------------------------------------------------- | 
| 1258 |  |  |  |  |  |  | # Upper case first letter, lower case the rest, for all words in string | 
| 1259 |  |  |  |  |  |  | sub _case_word | 
| 1260 |  |  |  |  |  |  | { | 
| 1261 | 315 |  |  | 315 |  | 229 | my ($word) = @_; | 
| 1262 |  |  |  |  |  |  |  | 
| 1263 | 315 | 100 |  |  |  | 349 | if ($word) | 
| 1264 |  |  |  |  |  |  | { | 
| 1265 | 67 |  |  |  |  | 262 | $word =~ s/(\w+)/\u\L$1/g; | 
| 1266 |  |  |  |  |  |  | } | 
| 1267 |  |  |  |  |  |  |  | 
| 1268 | 315 |  |  |  |  | 309 | return($word); | 
| 1269 |  |  |  |  |  |  | } | 
| 1270 |  |  |  |  |  |  | #------------------------------------------------------------------------------- | 
| 1271 |  |  |  |  |  |  | return(1); |