File Coverage

blib/lib/Locale/Util.pm
Criterion Covered Total %
statement 93 221 42.0
branch 22 106 20.7
condition 3 33 9.0
subroutine 12 16 75.0
pod 6 6 100.0
total 136 382 35.6


line stmt bran cond sub pod time code
1             #! /bin/false
2              
3             # vim: set autoindent shiftwidth=4 tabstop=4:
4              
5             # Portable methods for locale handling.
6             # Copyright (C) 2002-2026 Guido Flohr <guido.flohr@cantanea.com>,
7             # all rights reserved.
8              
9             # This program is free software: you can redistribute it and/or modify
10             # it under the terms of the GNU General Public License as published by
11             # the Free Software Foundation; either version 3 of the License, or
12             # (at your option) any later version.
13              
14             # This program is distributed in the hope that it will be useful,
15             # but WITHOUT ANY WARRANTY; without even the implied warranty of
16             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17             # GNU General Public License for more details.
18              
19             # You should have received a copy of the GNU General Public License
20             # along with this program. If not, see <http://www.gnu.org/licenses/>.
21              
22             package Locale::Util;
23              
24 12     12   21671 use strict;
  12         27  
  12         523  
25              
26 12     12   58 use constant DEBUG => 0;
  12         35  
  12         2089  
27              
28 12     12   66 use base qw (Exporter);
  12         22  
  12         2082  
29              
30 12     12   80 use vars qw (@EXPORT_OK);
  12         35  
  12         5554  
31              
32             @EXPORT_OK = qw (parse_http_accept_language
33             parse_http_accept_charset
34             set_locale set_locale_cache get_locale_cache
35             web_set_locale);
36              
37             # The following list maps languages to a rough guess of the country that
38             # is most likely to be meant if no locale info for the country alone is
39             # found. I have assembled the list to the best of my knowledge, preferring
40             # the country that has the language as its official language, and in doubt
41             # listing the country that has the most speakers of that language. Corrections
42             # are welcome.
43 12         6741 use constant LANG2COUNTRY => {
44             aa => 'ET', # Afar => Ethiopia
45             ab => 'AB', # Abkhazian => Georgia
46             # ae => '??', # Avestan => ??, Iran?
47             af => 'za', # Afrikaans => South Africa
48             am => 'ET', # Amharic => Ethiopia
49             ar => 'EG', # Arabic => Egypt
50             as => 'IN', # Assamese => India
51             ay => 'BO', # Aymara => Bolivia
52             az => 'AZ', # Azerbaijani => Azerbaijan
53             ba => 'RU', # Bashkir => Russia
54             be => 'BY', # Belarusian => Belarus
55             bg => 'BG', # Bulgarian => Bulgaria
56             bh => 'IN', # Bihari => India
57             bi => 'VU', # Bislama => Vanuatu
58             bn => 'BD', # Bengali => Bangladesh
59             bo => 'CN', # Tibetan => China
60             br => 'FR', # Breton => France
61             bs => 'BA', # Bosnian => Bosnia and Herzegovina
62             ca => 'ES', # Catalan => Spain
63             ce => 'RU', # Chechen => Russia
64             ch => '??', # Chamorro => Guam (or mp?)
65             co => 'FR', # Corsican => France
66             cs => 'CZ', # Czech => Czech Republic
67             cu => 'BG', # Church Slavic => Bulgaria
68             cv => 'RU', # Chuvash => Russia
69             cy => 'GB', # Welsh => United Kingdom
70             da => 'DK', # Danish => Denmark
71             de => 'DE', # German => Germany
72             dz => 'BT', # Dzongkha => Bhutan
73             el => 'GR', # Greek => Greece
74             en => 'US', # English => United States
75             es => 'ES', # Actually Mexico and the US have more Spanish speakers
76             # than Spain. But it can be assumed that they either add
77             # the country to their browser settings or will not care
78             # to much.
79             et => 'EE', # Estonian => Estonia
80             fa => 'IR', # Iran, Islamic Republic of
81             fi => 'FI', # Finnish => Finland
82             fj => 'FJ', # Fijian => Fiji
83             fo => 'FO', # Faeroese => Faroe Islands
84             fr => 'FR', # French => France
85             fy => 'FY', # Frisian => Netherlands
86             ga => 'IE', # Irish => Ireland
87             gd => 'GB', # Gaelic (Scots) => United Kingdom
88             gl => 'ES', # Gallegan => Spain
89             gn => 'PY', # Guarani => Paraguay
90             gu => 'IN', # Gujarati => IN
91             gv => 'GB', # Manx => United Kingdom
92             ha => 'NE', # Hausa => Niger (ng?)
93             he => 'IL', # Hebrew => Israel
94             hi => 'IN', # Hindi => India
95             ho => 'PG', # Hiri Motu => Papua New Guinea
96             hr => 'HR', # Croatian
97             hu => 'HU', # Hungarian => Hungary
98             hy => 'AM', # Armenian => Armenia
99             hz => 'NA', # Herero => Namibia
100             # ia => '??', # Interlingua (aka "latino sine flexione") => ??
101             id => 'ID', # Indonesian => Indonesia
102             # ie => '??', # Interlingue => ???
103             ik => 'US', # Inupiaq => United States
104             is => 'IS', # Icelandic => Iceland
105             it => 'IT', # Italian => Italy
106             iu => 'CA', # Inuktitut => Canada
107             iw => 'IL', # Hebrew => Israel
108             ja => 'JP', # Japanese => Japan
109             jw => 'ID', # Javanese => Indonesia
110             ka => 'GE', # Georgian => Georgia
111             ki => 'KE', # Kikuyu => Kenya
112             kj => 'AO', # Kuanyama => Angola (na?)
113             kk => 'KZ', # Kazakh => Kazakhstan
114             kl => 'GL', # Kalaallisut => Greenland
115             km => 'KH', # Khmer => Cambodia
116             kn => 'IN', # Kannada => India
117             ko => 'KR', # Korean => Korea, Republic of (more speakers than North Korea)
118             ks => 'IN', # Kashmiri => India
119             ku => 'TR', # Kurdish => Turkey
120             kv => 'RU', # Komi => Russia
121             kw => 'GB', # Cornish => United Kingdom
122             ky => 'KG', # Kirghyz => Kyrgyzstan
123             la => 'VA', # Latin => Holy See (Vatican City State)
124             lb => 'LU', # Letzeburgesch => Luxembourg
125             ln => 'CG', # Lingala => Republic of the Congo (cd?)
126             lo => 'LA', # Lao => Lao People's Democratic Republic
127             lt => 'LT', # Lithuanian => Lithuania
128             lv => 'LV', # Latvian => Latvia
129             mg => 'MG', # Malagasy => Madagascar
130             mh => 'MH', # Marshall => Marshall Islands
131             mi => 'NZ', # Maori => New Zealand
132             mk => 'MK', # Macedonian => Macedonia, the Former Yugoslav Republic of
133             ml => 'IN', # Malayalam => India
134             mn => 'MN', # Mongolian => Mongolia
135             mr => 'IN', # Marathi => India
136             ms => 'MY', # Malay => Malaysia (FIXME: not really sure ...)
137             mt => 'MT', # Maltese => Malta
138             my => 'MM', # Burmese => Myanmar
139             na => 'NR', # Nauru => Nauru
140             nb => 'NO', # Norwegian Bokmål => Norway
141             nd => 'ZA', # Ndebele, North => South Africa
142             ne => 'NP', # Nepali => Nepal
143             ng => 'NA', # Ndonga => Namibia
144             nl => 'NL', # Dutch => Netherlands
145             nn => 'NO', # Norwegian Nynorsk => Norway
146             no => 'NO', # Norwegian => Norway
147             nr => 'ZA', # Ndebele, South => South Africa
148             nv => 'US', # Navajo => United States
149             ny => 'MW', # Chichewa; Nyanja => Malawi
150             oc => 'FR', # Occitan (post 1500) => France
151             om => 'ET', # Oromo => Ethiopia
152             or => 'IN', # Oriya => India
153             os => 'RU', # Ossetian; Ossetic => Russia (FIXME: Or Georgia?)
154             pa => 'IN', # Panjabi => India
155             pi => 'IN', # Pali => India (FIXME: Or Thailand, Sri Lanka, Myanmar,
156             # Cambodia)
157             pl => 'PL', # Polish => Poland
158             ps => 'PK', # Pushto => Pakistan
159             pt => 'PT', # Portuguese => Portugal (following our rules this should
160             # actually be Brazil but that would be to unrealistic,
161             # people from Brazil set their locale to pt_BR).
162             qu => 'PE', # Quechua => Peru
163             rm => 'CH', # Rhaeto-Romance => Switzerland
164             rn => 'RW', # Rundi => Rwanda
165             ro => 'RO', # Romanian => Romania
166             ru => 'RU', # Russian => Russia
167             rw => 'RW', # Kinyarwanda => Rwanda
168             sa => 'IN', # Sanskrit => India
169             sc => 'IT', # Sardinian => Italy
170             sd => 'IN', # Sindhi => India
171             se => 'SE', # Sami => Sweden (Totally unsure here. The Sami languages
172             # are also spoken in Norway, Finland and Russia, but the
173             # largest part of the area seems to be in Sweden.
174             sg => '??', # Sango => Central African Republic
175             si => 'LK', # Sinhalese => Sri Lanka
176             sk => 'SK', # Slovakian => Slovakia
177             sl => 'SI', # Slovenian => Slovenia
178             sm => 'WS', # Samoan => Samoa
179             sh => 'ZW', # Shona => Zimbabwe (FIXME: Rather Mozambique?)
180             so => 'SO', # Somali => Somalia
181             sq => 'AL', # Albanian => Albania
182             sr => 'YU', # Serbian => Yugoslavia
183             ss => '??', # Swati => Swaziland (za?)
184             st => 'LS', # Sotho => Lesotho
185             su => 'IN', # Sundanese => Indonesia
186             sv => 'SE', # Swedish => Sweden
187             sw => 'TZ', # Suaheli => Tanzania, United Republic of
188             ta => 'LK', # Tamil => Sri Lanka
189             te => 'IN', # Telugu => India
190             tg => 'TJ', # Tajik => Tajikistan
191             th => 'TH', # Thai => Thailand
192             ti => 'ER', # Tigrinya => Eritrea
193             tk => 'TM', # Turkmen => Turkmenistan
194             tl => 'PH', # Tagalog => Philippines
195             tn => 'BW', # Tswana => Botswana
196             to => 'TO', # Tonga => Tonga
197             tr => 'TR', # Turkish => Turkish
198             tt => 'RU', # Tatar => Russia
199             tw => 'GH', # Twi => Ghana
200             ug => 'CN', # Uighur => China
201             uk => 'UA', # Ukrainian => Ukraine
202             ur => 'PK', # Urdu => Pakistan
203             uz => 'UZ', # Uzbek => Uzbekistan
204             vi => 'VN', # Vietnamese => Vietnam
205             # vo => '??', # Volapuk => Nowhere
206             wo => 'SN', # Wolof => Senegal
207             xh => 'ZA', # Xhosa => South Africa
208             yi => 'IL', # Yiddish => Israel (FIXME: Rather United States?)
209             yo => 'NG', # Yoruba => Nigeria
210             za => 'CN', # Zhuang => China
211             zh => 'CN', # Chinese => China
212             zu => 'ZA', # Zulu => South Africa
213 12     12   88 };
  12         30  
214              
215 12         7845 use constant WIN32LANGUAGE => {
216             aa => "Afar",
217             ab => "Abkhazian",
218             ae => "Avestan",
219             af => "Afrikaans",
220             am => "Amharic",
221             ar => "Arabic",
222             as => "Assamese",
223             ay => "Aymara",
224             az => "Azerbaijani",
225             ba => "Bashkir",
226             be => "Belarusian",
227             bg => "Bulgarian",
228             bh => "Bihari",
229             bi => "Bislama",
230             bn => "Bengali",
231             bo => "Tibetan",
232             br => "Breton",
233             bs => "Bosnian",
234             ca => "Catalan",
235             ce => "Chechen",
236             ch => "Chamorro",
237             co => "Corsican",
238             cs => "Czech",
239             cu => "Church Slavic",
240             cv => "Chuvash",
241             cy => "Welsh",
242             da => "Danish",
243             de => "German",
244             dz => "Dzongkha",
245             el => "Greek",
246             en => "English",
247             eo => "Esperanto",
248             es => "Spanish",
249             et => "Estonian",
250             eu => "Basque",
251             fa => "Persian",
252             fi => "Finnish",
253             fj => "Fijian",
254             fo => "Faeroese",
255             fr => "French",
256             fy => "Frisian",
257             ga => "Irish",
258             gd => "Gaelic (Scots)",
259             gl => "Gallegan",
260             gn => "Guarani",
261             gu => "Gujarati",
262             gv => "Manx",
263             ha => "Hausa",
264             he => "Hebrew",
265             hi => "Hindi",
266             ho => "Hiri Motu",
267             hr => "Croatian",
268             hu => "Hungarian",
269             hy => "Armenian",
270             hz => "Herero",
271             ia => "Interlingua",
272             id => "Indonesian",
273             ie => "Interlingue",
274             ik => "Inupiaq",
275             is => "Icelandic",
276             it => "Italian",
277             iu => "Inuktitut",
278             ja => "Japanese",
279             jw => "Javanese",
280             ka => "Georgian",
281             ki => "Kikuyu",
282             kj => "Kuanyama",
283             kk => "Kazakh",
284             kl => "Kalaallisut",
285             km => "Khmer",
286             kn => "Kannada",
287             ko => "Korean",
288             ks => "Kashmiri",
289             ku => "Kurdish",
290             kv => "Komi",
291             kw => "Cornish",
292             ky => "Kirghiz",
293             la => "Latin",
294             lb => "Letzeburgesch",
295             ln => "Lingala",
296             lo => "Lao",
297             lt => "Lithuanian",
298             lv => "Latvian",
299             mg => "Malagasy",
300             mh => "Marshall",
301             mi => "Maori",
302             # Sorry, lads, but that is what M$ calls your language ...
303             mk => "FYRO Macedonian",
304             ml => "Malayalam",
305             mn => "Mongolian",
306             mo => "Moldavian",
307             mr => "Marathi",
308             ms => "Malay",
309             mt => "Maltese",
310             my => "Burmese",
311             na => "Nauru",
312             nb => "Norwegian (Bokmål)",
313             nd => "Ndebele, North",
314             ne => "Nepali",
315             ng => "Ndonga",
316             nl => "Dutch",
317             nn => "Norwegian-Nynorsk",
318             no => "Norwegian-Nynorsk",
319             nr => "Ndebele, South",
320             nv => "Navajo",
321             ny => "Chichewa",
322             oc => "Occitan (post 1500)",
323             om => "Oromo",
324             or => "Oriya",
325             os => "Ossetian",
326             pa => "Panjabi",
327             pi => "Pali",
328             pl => "Polish",
329             ps => "Pushto",
330             pt => "Portuguese",
331             qu => "Quechua",
332             rm => "Rhaeto-Romance",
333             rn => "Rundi",
334             ro => "Romanian",
335             ru => "Russian",
336             rw => "Kinyarwanda",
337             sa => "Sanskrit",
338             sc => "Sardinian",
339             sd => "Sindhi",
340             se => "Sami",
341             sg => "Sango",
342             si => "Sinhalese",
343             sk => "Slovak",
344             sl => "Slovenian",
345             sm => "Samoan",
346             sn => "Shona",
347             so => "Somali",
348             sq => "Albanian",
349             sr => "Serbian",
350             ss => "Swati",
351             st => "Sotho",
352             su => "Sundanese",
353             sv => "Swedish",
354             sw => "Swahili",
355             ta => "Tamil",
356             te => "Telugu",
357             tg => "Tajik",
358             th => "Thai",
359             ti => "Tigrinya",
360             tk => "Turkmen",
361             tl => "Tagalog",
362             tn => "Tswana",
363             to => "Tonga",
364             tr => "Turkish",
365             ts => "Tsonga",
366             tt => "Tatar",
367             tw => "Twi",
368             ug => "Uighur",
369             uk => "Ukrainian",
370             ur => "Urdu",
371             uz => "Uzbek",
372             vi => "Vietnamese",
373             vo => "Volapuk",
374             wo => "Wolof",
375             xh => "Xhosa",
376             yi => "Yiddish",
377             yo => "Yoruba",
378             za => "Zhuang",
379             zh => "Chinese",
380             zu => "Zulu",
381 12     12   90 };
  12         21  
382              
383 12         8906 use constant WIN32COUNTRY => {
384             ad => "Andorra",
385             ae => "United Arab Emirates",
386             af => "Afghanistan",
387             ag => "Antigua and Barbuda",
388             ai => "Anguilla",
389             al => "Albania",
390             am => "Armenia",
391             an => "Netherlands Antilles",
392             ao => "Angola",
393             aq => "Antarctica",
394             ar => "Argentina",
395             as => "American Samoa",
396             at => "Austria",
397             au => "Australia",
398             aw => "Aruba",
399             ax => "Aland Islands",
400             az => "Azerbaijan",
401             ba => "Bosnia and Herzegovina",
402             bb => "Barbados",
403             bd => "Bangladesh",
404             be => "Belgium",
405             bf => "Burkina Faso",
406             bg => "Bulgaria",
407             bh => "Bahrain",
408             bi => "Burundi",
409             bj => "Benin",
410             bm => "Bermuda",
411             bn => "Brunei Darussalam",
412             bo => "Bolivia",
413             br => "Brazil",
414             bs => "Bahamas",
415             bt => "Bhutan",
416             bv => "Bouvet Island",
417             bw => "Botswana",
418             by => "Belarus",
419             bz => "Belize",
420             ca => "Canada",
421             cc => "Cocos (Keeling) Islands",
422             cd => "Congo, The Democratic Republic of the",
423             cf => "Central African Republic",
424             cg => "Congo",
425             ch => "Switzerland",
426             ci => "Cote D'Ivoire",
427             ck => "Cook Islands",
428             cl => "Chile",
429             cm => "Cameroon",
430             cn => "China",
431             co => "Colombia",
432             cr => "Costa Rica",
433             cs => "Serbia and Montenegro",
434             cu => "Cuba",
435             cv => "Cape Verde",
436             cx => "Christmas Island",
437             cy => "Cyprus",
438             cz => "Czech Republic",
439             de => "Germany",
440             dj => "Djibouti",
441             dk => "Denmark",
442             dm => "Dominica",
443             do => "Dominican Republic",
444             dz => "Algeria",
445             ec => "Ecuador",
446             ee => "Estonia",
447             eg => "Egypt",
448             eh => "Western Sahara",
449             er => "Eritrea",
450             es => "Spain",
451             et => "Ethiopia",
452             fi => "Finland",
453             fj => "Fiji",
454             fk => "Falkland Islands (Malvinas)",
455             fm => "Micronesia, Federated States of",
456             fo => "Faroe Islands",
457             fr => "France",
458             fx => "France, Metropolitan",
459             ga => "Gabon",
460             gb => "United Kingdom",
461             gd => "Grenada",
462             ge => "Georgia",
463             gf => "French Guiana",
464             gh => "Ghana",
465             gi => "Gibraltar",
466             gl => "Greenland",
467             gm => "Gambia",
468             gn => "Guinea",
469             gp => "Guadeloupe",
470             gq => "Equatorial Guinea",
471             gr => "Greece",
472             gs => "South Georgia and the South Sandwich Islands",
473             gt => "Guatemala",
474             gu => "Guam",
475             gw => "Guinea-Bissau",
476             gy => "Guyana",
477             hk => "Hong Kong",
478             hm => "Heard Island and McDonald Islands",
479             hn => "Honduras",
480             hr => "Croatia",
481             ht => "Haiti",
482             hu => "Hungary",
483             id => "Indonesia",
484             ie => "Ireland",
485             il => "Israel",
486             in => "India",
487             io => "British Indian Ocean Territory",
488             iq => "Iraq",
489             ir => "Iran",
490             is => "Iceland",
491             it => "Italy",
492             jm => "Jamaica",
493             jo => "Jordan",
494             jp => "Japan",
495             ke => "Kenya",
496             kg => "Kyrgyzstan",
497             kh => "Cambodia",
498             ki => "Kiribati",
499             km => "Comoros",
500             kn => "Saint Kitts and Nevis",
501             kp => "North-Korea",
502             kr => "Korea",
503             kw => "Kuwait",
504             ky => "Cayman Islands",
505             kz => "Kazakhstan",
506             la => "Laos",
507             lb => "Lebanon",
508             lc => "Saint Lucia",
509             li => "Liechtenstein",
510             lk => "Sri Lanka",
511             lr => "Liberia",
512             ls => "Lesotho",
513             lt => "Lithuania",
514             lu => "Luxembourg",
515             lv => "Latvia",
516             ly => "Libyan",
517             ma => "Morocco",
518             mc => "Monaco",
519             md => "Moldova",
520             mg => "Madagascar",
521             mh => "Marshall Islands",
522             mk => "Former Yugoslav Republic of Macedonia",
523             ml => "Mali",
524             mm => "Myanmar",
525             mn => "Mongolia",
526             mo => "Macao",
527             mp => "Northern Mariana Islands",
528             mq => "Martinique",
529             mr => "Mauritania",
530             ms => "Montserrat",
531             mt => "Malta",
532             mu => "Mauritius",
533             mv => "Maldives",
534             mw => "Malawi",
535             mx => "Mexico",
536             my => "Malaysia",
537             mz => "Mozambique",
538             na => "Namibia",
539             nc => "New Caledonia",
540             ne => "Niger",
541             nf => "Norfolk Island",
542             ng => "Nigeria",
543             ni => "Nicaragua",
544             nl => "Netherlands",
545             no => "Norway",
546             np => "Nepal",
547             nr => "Nauru",
548             nu => "Niue",
549             nz => "New Zealand",
550             om => "Oman",
551             pa => "Panama",
552             pe => "Peru",
553             pf => "French Polynesia",
554             pg => "Papua New Guinea",
555             ph => "Philippines",
556             pk => "Pakistan",
557             pl => "Poland",
558             pm => "Saint Pierre and Miquelon",
559             pn => "Pitcairn",
560             pr => "Puerto Rico",
561             ps => "Palestinian Territory, Occupied",
562             pt => "Portugal",
563             pw => "Palau",
564             py => "Paraguay",
565             qa => "Qatar",
566             re => "Reunion",
567             ro => "Romania",
568             ru => "Russian Federation",
569             rw => "Rwanda",
570             sa => "Saudi Arabia",
571             sb => "Solomon Islands",
572             sc => "Seychelles",
573             sd => "Sudan",
574             se => "Sweden",
575             sg => "Singapore",
576             sh => "Saint Helena",
577             si => "Slovenia",
578             sj => "Svalbard and Jan Mayen",
579             sk => "Slovakia",
580             sl => "Sierra Leone",
581             sm => "San Marino",
582             sn => "Senegal",
583             so => "Somalia",
584             sr => "Suriname",
585             st => "Sao Tome and Principe",
586             sv => "El Salvador",
587             sy => "Syrian Arab Republic",
588             sz => "Swaziland",
589             tc => "Turks and Caicos Islands",
590             td => "Chad",
591             tf => "French Southern Territories",
592             tg => "Togo",
593             th => "Thailand",
594             tj => "Tajikistan",
595             tk => "Tokelau",
596             tl => "Timor-Leste",
597             tm => "Turkmenistan",
598             tn => "Tunisia",
599             to => "Tonga",
600             tr => "Turkey",
601             tt => "Trinidad and Tobago",
602             tv => "Tuvalu",
603             tw => "Taiwan, Province of China",
604             tz => "Tanzania, United Republic of",
605             ua => "Ukraine",
606             ug => "Uganda",
607             um => "United States Minor Outlying Islands",
608             us => "United States",
609             uy => "Uruguay",
610             uz => "Uzbekistan",
611             va => "Holy See (Vatican City State)",
612             vc => "Saint Vincent and the Grenadines",
613             ve => "Venezuela",
614             vg => "Virgin Islands, British",
615             vi => "Virgin Islands, U.S.",
616             vn => "Vietnam",
617             vu => "Vanuatu",
618             wf => "Wallis and Futuna",
619             ws => "Samoa",
620             ye => "Yemen",
621             yt => "Mayotte",
622             za => "South Africa",
623             zm => "Zambia",
624             zw => "Zimbabwe",
625 12     12   88 };
  12         36  
626              
627             my $locale_cache;
628              
629             sub parse_http_accept_language {
630 3     3 1 184095 my ($string) = @_;
631              
632 3         33 my @tokens = split / *, */, $string;
633            
634 3         6 my %retval;
635 3         7 foreach my $token (@tokens) {
636 11         17 my $quality = 1;
637             # This RE is more forgiving than the standard. It accepts
638             # values greater than 1.0 and with more fractional digits
639             # than 3.
640 11 100       49 if ($token =~ s/ *; *q *= *([0-9]+(?:\.([0-9]+))?)$//) {
641 7         18 $quality = $1;
642             }
643 11         26 $retval{$token} = $quality;
644             }
645              
646             # RFC 2616 only allows 1-8 characters for language and country
647             # but we are more forgiving.
648             return grep {
649 11         66 /^[A-Za-z]+(?:-[A-Za-z]+)?$/
650             } map {
651 11 100       24 $_ = 'C' if $_ eq '*'; $_
  11         19  
652             } sort {
653 3         20 $retval{$b} <=> $retval{$a}
  12         44  
654             } keys %retval;
655             }
656              
657             sub parse_http_accept_charset {
658 1     1 1 106 my ($string) = @_;
659              
660 1         12 my @tokens = split / *, */, $string;
661            
662 1         3 my %retval;
663 1         3 foreach my $token (@tokens) {
664 3         3 my $quality = 1;
665             # This RE is more forgiving than the standard. It accepts
666             # values greater than 1.0 and with more fractional digits
667             # than 3.
668 3 100       19 if ($token =~ s/ *; *q *= *([0-9]+(?:\.([0-9]+))?)$//) {
669 2         4 $quality = $1;
670             }
671 3         6 $retval{$token} = $quality;
672             }
673              
674             return grep {
675             # This is really allowed in character set names ...
676 3         11 /^[-!\#\$\%\&\'\+\.0-9A-Z_\`a-z\|\~]+$/
677             } map {
678 3 50       5 $_ = undef if $_ eq '*'; $_
  3         5  
679             } sort {
680 1         4 $retval{$b} <=> $retval{$a}
  2         8  
681             } keys %retval;
682             }
683              
684             sub set_locale {
685 11     11 1 86 my ($category, $language, $country, $charset) = @_;
686            
687 11         62 require POSIX;
688              
689             # See https://github.com/gflohr/libintl-perl/issues/14!
690 12     12   105 no if $] >= 5.022, warnings => 'locale';
  12         24  
  12         12972  
691              
692 11 50       53 $country = '' unless defined $country;
693 11 50       73 $charset = '' unless defined $charset;
694            
695 11         20 my $set_locale;
696             # Look up the cache first.
697 11 50       57 if (my $retval = $locale_cache->{$language}->{$country}->{$charset}) {
698 0         0 my ($locale, $country) = @$retval;
699 0         0 POSIX::setlocale ($category, $locale);
700 0         0 return @$retval;
701             }
702              
703             # Initialize the cache with the undefined value so that we can do
704             # error returns without setting it.
705 11         35 $locale_cache->{$language}->{$country}->{$charset} = undef;
706              
707 11 50 33     136 my $windows = ($^O !~ /darwin/i && $^O =~ /win/i) ? 1 : 0;
708 11 50       37 if ($windows) {
709 0         0 return &__set_locale_windows;
710             }
711            
712 11         37 my $set_language;
713             my $set_country;
714              
715             # First we try to only use the language.
716 11         29 my @languages = ($language);
717 11         55 my @lc_languages = map { lc $_ } @languages;
  11         49  
718 11         24 my @uc_languages = map { uc $_ } @languages;
  11         31  
719 11         25 my %seen = ();
720              
721 11         27 foreach my $language (@languages, @lc_languages, @uc_languages) {
722 33 100       99 next if $seen{$language}++;
723 22         33 warn "Trying lingua only setlocale '$language'.\n" if DEBUG;
724 22         496 my $result = POSIX::setlocale ($category, $language);
725 22 50       72 if ($result) {
726 0 0       0 $set_locale = $set_language = $result if $result;
727 0         0 last;
728             }
729             }
730              
731             # Now try it with the country appended.
732 11 50       48 my @countries = length $country ? ($country) : ();
733 11         26 my @uc_countries = map { uc $_ } @countries;
  11         33  
734 11         20 my @lc_countries = map { uc $_ } @countries;
  11         22  
735 11         26 push @countries, @uc_countries, @lc_countries;
736            
737 11         23 LINGUA: foreach my $language (@languages, @lc_languages, @uc_languages) {
738 33         67 my $count = 0;
739             my @guessed_countries = (LANG2COUNTRY->{lc $language},
740             lc LANG2COUNTRY->{lc $language},
741 33         131 uc LANG2COUNTRY->{lc $language});
742 33         79 foreach my $c (@countries, @guessed_countries) {
743 198         217 ++$count;
744 198 50 33     526 next unless defined $c && length $c;
745 198         241 my $try = $language . '_' . $c;
746 198 100       462 next if $seen{$try}++;
747 66         71 warn "Trying setlocale '$try'.\n" if DEBUG;
748 66         814 my $result = POSIX::setlocale ($category, $try);
749 66 50       143 if ($result) {
750 0         0 $set_locale = $result;
751 0 0       0 if ($count >= @countries) {
752 0         0 $set_country = $c;
753             } else {
754 0         0 $set_country = $country;
755             }
756              
757 0         0 last LINGUA;
758             }
759             }
760             }
761            
762 11 50       33 unless (length $charset) {
763 11 50 33     112 return unless defined $set_locale && length $set_locale;
764            
765 0           $locale_cache->{$language}->{$country}->{$charset} =
766             [$set_locale, $set_country];
767 0 0         return wantarray ? ($set_locale, $set_country) : $set_locale;
768             }
769            
770 0           my @charsets = ($charset);
771 0           my $cleaned = $charset;
772 0 0         push @charsets, $cleaned if $cleaned =~ s/-//g;
773 0           my @lc_charsets = map { lc $charset } @charsets;
  0            
774 0           my @uc_charsets = map { uc $charset } @charsets;
  0            
775 0           push @charsets, @lc_charsets, @uc_charsets;
776            
777 0           %seen = ();
778 0           LINGUA2: foreach my $language (@languages,
779             @lc_languages, @uc_languages) {
780             my @guessed_countries = (LANG2COUNTRY->{lc $language},
781             lc LANG2COUNTRY->{lc $language},
782 0           uc LANG2COUNTRY->{lc $language});
783 0           my $count = 0;
784 0           foreach my $c (@countries, @guessed_countries) {
785 0           ++$count;
786 0 0 0       $c = '' unless defined $c && length $c;
787 0           my $country_try = $language;
788 0 0         $country_try .= (length $c) ? "_$c" : '';
789            
790 0           foreach my $ch (@charsets, @lc_charsets, @uc_charsets) {
791 0           my $try = $country_try . '.' . $ch;
792 0 0         next if $seen{$try}++;
793 0           warn "Trying setlocale '$try'.\n" if DEBUG;
794            
795 0           my $result = POSIX::setlocale ($category, $try);
796 0 0         if ($result) {
797 0           $set_locale = $result;
798 0 0         if ($count >= @countries) {
799 0           $set_country = $c;
800             } else {
801 0           $set_country = $country;
802             }
803            
804 0           last LINGUA2;
805             }
806             }
807             }
808             }
809              
810 0 0 0       return unless defined $set_locale && length $set_locale;
811              
812 0           $locale_cache->{$language}->{$country}->{$charset} =
813             [$set_locale, $set_country];
814              
815 0 0         return wantarray ? ($set_locale, $set_country) : $set_locale;
816             }
817              
818             sub __set_locale_windows {
819 0     0     my ($category, $language, $country, $charset) = @_;
820              
821             # See https://github.com/gflohr/libintl-perl/issues/14!
822 12     12   95 no if $] >= 5.022, warnings => 'locale';
  12         21  
  12         12455  
823              
824 0           my $set_locale;
825              
826 0 0         $country = '' unless defined $country;
827 0 0         $charset = '' unless defined $charset;
828            
829             # First we try to only use the language.
830 0           my $long_language = WIN32LANGUAGE->{lc $language};
831 0           my @languages = ($long_language, $language);
832 0           my %seen = ();
833 0           foreach my $language (@languages) {
834 0 0         next if $seen{$language}++;
835 0           warn "Trying lingua only setlocale '$language'.\n" if DEBUG;
836 0           my $result = POSIX::setlocale ($category, $language);
837 0 0         if ($result) {
838 0 0         $set_locale = $result if $result;
839 0           last;
840             }
841             }
842            
843             # Now try it with the country appended.
844 0           my $set_country;
845 0 0         if (length $country) {
846 0           COMBI: foreach my $language (@languages) {
847             # We do not need a fallback country here, because the "system" already
848             # provides the information.
849 0           my @short_countries = ($country);
850             my @countries = map {
851 0           WIN32COUNTRY->{lc $_}
852 0           } grep { length $_ } @short_countries;
  0            
853 0           foreach my $c (@countries) {
854 0 0 0       next unless defined $c && length $c;
855 0           my $try = $language . '_' . $c;
856 0 0         next if $seen{$try}++;
857 0           warn "Trying setlocale '$try'.\n" if DEBUG;
858 0           my $result = POSIX::setlocale ($category, $try);
859 0 0         if ($result) {
860 0           $set_locale = $result;
861 0           $set_country = $c;
862 0           last COMBI;
863             }
864             }
865             }
866             }
867              
868 0 0 0       return unless defined $set_locale && length $set_locale;
869              
870             # Apparently, there is no point in setting a charset. Even the new
871             # MS-DOS versions like 2000 or XP still have the concept of more or
872             # less fixed codepages. Switching to UTF-8 does not work.
873 0           $locale_cache->{$language}->{$country}->{$charset} =
874             [$set_locale, $set_country];
875 0 0         return wantarray ? ($set_locale, $set_country) : $set_locale;
876             }
877              
878             sub get_locale_cache {
879 0     0 1   $locale_cache;
880             }
881              
882             sub set_locale_cache {
883 0 0 0 0 1   if (ref $_[0] && 'HASH' eq ref $_[0]) {
884 0           $locale_cache = $_[0];
885             } else {
886 0           my %locale_cache = @_;
887 0           $locale_cache = \%locale_cache;
888             }
889             }
890              
891             sub web_set_locale {
892 0     0 1   my ($accept_language, $accept_charset, $category, $available) = @_;
893              
894 0           my %available;
895 0 0         if ($available) {
896 0           foreach (@$available) {
897 0           my $locale = $_;
898 0           $locale =~ s/[_\@\.].*//;
899 0           $available{lc $locale} = 1;
900             }
901             }
902              
903 0           my @languages;
904 0 0 0       if (ref $accept_language && 'ARRAY' eq ref $accept_language) {
905 0           @languages = @$accept_language;
906             } else {
907 0           @languages = parse_http_accept_language $accept_language;
908             }
909              
910 0 0         if ($available) {
911 0           my @all = @languages;
912 0           @languages = ();
913 0           foreach my $locale (@all) {
914 0           my $language = lc $locale;
915 0           $language =~ s/[_\@\.].*//;
916 0 0         push @languages, $locale if $available{$language};
917             }
918             }
919              
920 0           my @charsets;
921 0 0         if (defined $accept_charset) {
922 0 0 0       if (ref $accept_charset && 'ARRAY' eq ref $accept_charset) {
923 0           @charsets = @$accept_charset;
924             } else {
925 0           @charsets = parse_http_accept_charset $accept_charset;
926             }
927             }
928              
929 0 0         unless (defined $category) {
930 0           require POSIX;
931 0           $category = POSIX::LC_ALL();
932             }
933              
934 0           my ($set_locale, $set_language, $set_country, $set_charset);
935 0           foreach my $lang (@languages) {
936 0           my ($language, $country) = split /-/, $lang, 2;
937              
938 0           my ($locale, $country_used) =
939             set_locale ($category, $language, $country, $charsets[0]);
940            
941 0 0         if (defined $locale) {
942             # If a country was specified, we have to check whether it
943             # was actually selected.
944 0 0         if (defined $country) {
945 0 0 0       if (!defined $country
946             || ($country ne $country_used)) {
947 0           $set_language = $language;
948 0           $set_locale = $locale;
949 0           $set_country = $country_used;
950 0           $set_charset = $charsets[0];
951             }
952             }
953              
954 0 0         if (wantarray) {
955 0           return $locale, $lang, $country_used, $charsets[0];
956             } else {
957 0           return $locale;
958             }
959             }
960             }
961            
962 0 0         if (defined $set_locale) {
963 0 0         if (wantarray) {
964 0           return $set_locale, $set_language, $set_country, $set_charset;
965             } else {
966 0           return $set_locale;
967             }
968             }
969              
970 0           return;
971             }
972              
973             1;
974              
975             __END__
976              
977             =head1 NAME
978              
979             Locale::Util - Portable l10n and i10n functions
980              
981             =head1 SYNOPSIS
982              
983             use Locale::Util;
984              
985             my @linguas = parse_http_accept_language $ENV{HTTP_ACCEPT_LANGUAGE};
986              
987             my @charsets = parse_http_accept_charset $ENV{HTTP_ACCEPT_CHARSET};
988              
989             # Trie to set the locale to Brasilian Portuguese in UTF-8.
990             my $set_locale = set_locale LC_ALL, 'pt', 'BR', 'utf-8';
991              
992             set_locale_cache $last_cache;
993            
994             my $cache = get_locale_cache;
995              
996             web_set_locale ($ENV{HTTP_ACCEPT_LANGUAGE}, $ENV_ACCEPT_CHARSET);
997              
998             web_set_locale (['fr-BE', 'fr', 'it'], ['cp1252', 'utf-8']);
999              
1000             =head1 DESCRIPTION
1001              
1002             This module provides portable functions dealing with localization
1003             (l10n) and internationalization(i10n). It doesn't export anything
1004             by default, you have to specify each function you need in the import
1005             list, or use the fully qualified name.
1006              
1007             The functions here have a focus on web development, although they
1008             are general enough to have them in the Locale:: namespace.
1009              
1010             This module is considered alpha code. The interface is not stable.
1011             Please contact the author if you want to use it in production code.
1012              
1013             This module was introduced in libintl-perl 1.17.
1014              
1015             =head1 FUNCTIONS
1016              
1017             =over 4
1018              
1019             =item B<parse_http_accept_language STRING>
1020              
1021             Parses a string as passed in the HTTP header "Accept-Language".
1022             It returns a list of tokens sorted by the quality value, see RFC 2616
1023             for details.
1024              
1025             Example:
1026              
1027             parse_http_accept ("fr-fr, fr; q=0.7, de; q=0.3");
1028              
1029             This means: Give me French for France with a quality value of 1.0
1030             (the maximum). Otherwise I will take any other French version
1031             (quality 0.7), German has a quality of 0.3 for me.
1032              
1033             The function will return a list of tokens in the order of their quality
1034             values, in this case "fr-fr", "fr" and "de".
1035              
1036             The function is more forgiving than RFC 2616. It accepts quality
1037             values greater than 1.0 and with more than 3 decimal places. It
1038             also accepts languages and country names with more than 8 characters.
1039             The language "*" is translated into "C".
1040              
1041             =item B<parse_http_accept_charset STRING>
1042              
1043             Parses a string as passed in the HTTP header "Accept-Charset".
1044             It returns a list of tokens sorted by the quality value, see RFC 2616
1045             for details.
1046              
1047             The special character set "*" (means all character sets) will be
1048             translated to the undefined value.
1049              
1050             =item B<set_locale CATEGORY, LANGUAGE[, COUNTRY, CHARSET]>
1051              
1052             Tries to set the user locale by means of POSIX::setlocale(). The latter
1053             function has the disadvantage, that its second argument (the locale
1054             description string) is completely non-standard and system-dependent.
1055             This function tries its best at guessing the system's notion of a locale
1056             dientifier, with the arguments supplied:
1057              
1058             =over 8
1059              
1060             =item B<CATEGORY>
1061              
1062             An integer argument for a valid locale category. These are the
1063             LC_* constants (LC_ALL, LC_CTIME, LC_COLLATE, ...) defined in both
1064             Locale::Messages(3pm) and POSIX(3pm).
1065              
1066             =item B<LANGUAGE>
1067              
1068             A 2-letter language identifier as per ISO 639. Case doesn't matter,
1069             but an unchanged version (ie. not lower-cased) of the language you
1070             provided will always be tried to.
1071              
1072             =item B<COUNTRY>
1073              
1074             A 2-letter language identifier as per ISO 639. Case doesn't matter,
1075             but an unchanged version (ie. not lower-cased) of the language you
1076             provided will always be tried to.
1077              
1078             This parameter is optional. If it is not defined, the function will
1079             try to guess an appropriate country, otherwise leave it to the
1080             operating system.
1081              
1082             =item B<CHARSET>
1083              
1084             A valid charset name. Valid means valid! The charset "utf8" is not
1085             valid (it is "utf-8"). Charset names that are accepted by the
1086             guessing algorithms in Encode(3pm) are also not necessarily valid.
1087              
1088             If the parameter is undefined, it is ignored. It is always ignored
1089             under Windows.
1090              
1091             =back
1092              
1093             The function tries to approach the desired locale in loops, refining
1094             it on every success. It will first try to set the language (for
1095             any country), then try to select the correct language, and finally
1096             try to select the correct charset.
1097              
1098             The return value is false in case of failure, or the return value
1099             of the underlying POSIX::setlocale() call in case of success.
1100              
1101             In array context, the function returns the country name
1102             that was passed in the successful
1103             call to POSIX::setlocale(). If this string is equal to the country
1104             name you passed as an argument, you can be reasonably sure that
1105             the settings for this country are really used. If it is not
1106             equal, the function has taken a guess at the country (it has a list
1107             of "default" countries for each language). It seems that under
1108             Windows, POSIX::setlocale() also succeeds, if you pass a country
1109             name that is actually not supported. Therefore, the information
1110             is not completely reliable.
1111              
1112             Please note that this function is intended for server processes
1113             (especially web applications) that need to switch in a portable
1114             way to a certain locale. It is B<not> the recommended way to set
1115             the program locale for a regular application. In a regular application
1116             you should do the following:
1117              
1118             use POSIX qw (setlocale LC_ALL);
1119             setlocale LC_ALL, '';
1120              
1121             The empty string as the second argument means, that the system
1122             should switch to the user's default locale.
1123              
1124             =item B<get_locale_cache>
1125              
1126             The function set_locale() is potentially expansive, especially when
1127             it fails, because it can try a lot of different combinations, and
1128             the system may have to load a lot of locale definitions from its
1129             internal database.
1130              
1131             In order to speed up things, results are internally cached in a
1132             hash, keys are the languages, subkeys countries, subsubkeys the
1133             charsets. You can get a reference to this hash with get_locale_cache().
1134              
1135             The function cannot fail.
1136              
1137             =item B<set_locale_cache HASH>
1138              
1139             Sets the internal cache. You can either pass a hash or a hash reference.
1140             The function will use this as its cache, discarding its old cache.
1141             This allows you to keep the hash persistent.
1142              
1143             The function cannot fail.
1144              
1145             =item B<web_set_locale (ACCEPT_LANGUAGE, ACCEPT_CHARSET, CATEGORY,
1146             AVAILABLE)>
1147              
1148             Try to change the locale to the settings described by ACCEPT_LANGUAGE
1149             and ACCEPT_CHARSET. For each argument you can either pass a string
1150             as in the corresponding http header, or a reference to an array
1151             of language resp. charset identifiers.
1152              
1153             Currently only the first charset passed is used as an argument.
1154             You are strongly encouraged to pass a hard-coded value here, so
1155             that you have control about your output.
1156              
1157             The argument B<CATEGORY> specifies the category (one of the LC_*
1158             constants as defined in Locale::Messages(3pm) or in POSIX(3pm)).
1159             The category defaults to LC_ALL.
1160              
1161             You can pass an optional reference to a list of locales in
1162             XPG4 format that are available in your application. This is
1163             useful if you know which languages are supported by your application.
1164             In fact, only the language part of the values in the list are
1165             considered (for example for "en_US", only "en" is used). The
1166             country or other parts are ignored.
1167              
1168             The function returns the return value of the underlying set_locale()
1169             call, or false on failure.
1170              
1171             The function returns false on failure. On success it returns the
1172             return value of the underlying set_locale() call. This value can
1173             be used directly in subsequent calls to POSIX::setlocale(). In
1174             array context, it additionally returns the identifiers for the language,
1175             the country, and the charset actually used.
1176              
1177             =back
1178              
1179             =head1 BUGS
1180              
1181             The function set_locale() probably fails to guess the correct locale
1182             identifier on a lot of systems. If you have found such a case,
1183             please submit it as a bug report.
1184              
1185             The bug tracking system for this packags is at
1186             http://rt.cpan.org/NoAuth/Bugs.html?libintl-perl
1187              
1188             Please note that this module is considered alpha code, and the interface
1189             is not stable. Please contact the author, if you want to use it in
1190             production code.
1191              
1192             =head1 AUTHOR
1193              
1194             Copyright (C) 2002-2026 L<Guido Flohr|http://www.guido-flohr.net/>
1195             (L<mailto:guido.flohr@cantanea.com>), all rights reserved. See the source
1196             code for details!code for details!
1197              
1198             =head1 SEE ALSO
1199              
1200             POSIX(3pm), perl(1)
1201              
1202             =cut
1203             Local Variables:
1204             mode: perl
1205             perl-indent-level: 4
1206             perl-continued-statement-offset: 4
1207             perl-continued-brace-offset: 0
1208             perl-brace-offset: -4
1209             perl-brace-imaginary-offset: 0
1210             perl-label-offset: -4
1211             cperl-indent-level: 4
1212             cperl-continued-statement-offset: 2
1213             tab-width: 4
1214             End: