line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
############################################################## |
2
|
|
|
|
|
|
|
# Text::StripAccents - remove non a-z chars from a string |
3
|
|
|
|
|
|
|
# and replace them with their a-z counterparts |
4
|
|
|
|
|
|
|
############################################################## |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Version information |
7
|
|
|
|
|
|
|
# =================== |
8
|
|
|
|
|
|
|
# |
9
|
|
|
|
|
|
|
# 0.1 CC Apr 05 New module |
10
|
|
|
|
|
|
|
# |
11
|
|
|
|
|
|
|
# 0.11 CC Jun 05 After feedback in cpanrating, |
12
|
|
|
|
|
|
|
# documented that the module is |
13
|
|
|
|
|
|
|
# latin1 only, and pp with no |
14
|
|
|
|
|
|
|
# prereqs |
15
|
|
|
|
|
|
|
# |
16
|
|
|
|
|
|
|
############################################################## |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
package Text::StripAccents; |
20
|
2
|
|
|
2
|
|
1005
|
use strict; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
71
|
|
21
|
2
|
|
|
2
|
|
82
|
use vars qw (@ISA $VERSION @EXPORT); |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
142
|
|
22
|
2
|
|
|
2
|
|
10
|
use Exporter (); |
|
2
|
|
|
|
|
15
|
|
|
2
|
|
|
|
|
1427
|
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
@ISA = qw(Exporter); |
25
|
|
|
|
|
|
|
@EXPORT = qw(stripaccents); |
26
|
|
|
|
|
|
|
$VERSION="0.11"; |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
############################################################## |
29
|
|
|
|
|
|
|
=pod |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 NAME |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
Text::StripAccents - removes accented & special characters from strings |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
=head1 SYNOPSIS |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
use Text::StripAccents; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
my $Stripaccent = Text::StripAccents->new(); |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
my $convertedString = $StripAccents->strip($unconvertedString); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
OR |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
use Text::StripAccents; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
stripaccents($string); |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=head1 DESCRIPTION |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
This simple module takes accented characters and replaces them with their anglicised ASCII counterparts, e.g. Ü becomes U. It currently ONLY supports Latin1. If there are any characters I've missed out that you think should be included, please mail me and I'll add them in. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
This is a pure perl module with no prerequisites. |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=head1 PREREQS |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
None. |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head1 SEE ALSO |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
Text::Unaccent is a much more advanced utility to do the same job, but with a C dependency. |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=head1 CHANGES |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
0.11 - bugfix to clarify the documentation, as per Dobrica Pavlinusic's feedback. |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head1 LICENSE |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Copyright 2005 by Charles Colbourn, all rights reserved. This program is free software, you can redistribute it and/or modify it under the same terms as Perl itself. |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head1 AUTHOR |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
Charles Colbourn - charlesc@g0n.net |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
(Character mapping hash supplied by Nigel Currie). |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=cut |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
############################################################## |
81
|
|
|
|
|
|
|
# Text::Stripaccent::new - constructor |
82
|
|
|
|
|
|
|
############################################################## |
83
|
|
|
|
|
|
|
# |
84
|
|
|
|
|
|
|
# Takes as param the character set you are using. Latin1 |
85
|
|
|
|
|
|
|
# support only at present |
86
|
|
|
|
|
|
|
# |
87
|
|
|
|
|
|
|
# returns a Stripaccent object |
88
|
|
|
|
|
|
|
############################################################## |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
sub new |
91
|
|
|
|
|
|
|
{ |
92
|
1
|
|
|
1
|
0
|
12
|
my $class = shift; |
93
|
1
|
|
|
|
|
2
|
my $charset = shift; |
94
|
|
|
|
|
|
|
|
95
|
1
|
|
|
|
|
2
|
my %object; |
96
|
1
|
|
|
|
|
4
|
return bless \%object,$class; |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
############################################################### |
100
|
|
|
|
|
|
|
# Text::Stripaccent::strip |
101
|
|
|
|
|
|
|
############################################################### |
102
|
|
|
|
|
|
|
# |
103
|
|
|
|
|
|
|
# Removes all accented chars from a string and replaces them |
104
|
|
|
|
|
|
|
# with their unaccented equivalents. |
105
|
|
|
|
|
|
|
# |
106
|
|
|
|
|
|
|
# takes a string as a param, returns a converted string |
107
|
|
|
|
|
|
|
# |
108
|
|
|
|
|
|
|
############################################################### |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub strip |
111
|
|
|
|
|
|
|
{ |
112
|
|
|
|
|
|
|
|
113
|
2
|
|
|
2
|
0
|
9
|
my $object = shift; |
114
|
2
|
|
|
|
|
3
|
my $string = shift; |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
|
117
|
2
|
|
|
|
|
196
|
my %IsoLatin1ToASCIITable = ("A" => "A", "À" => "A", "Á" => "A", "Â" => "A", |
118
|
|
|
|
|
|
|
"Ã" => "A", "Ä" => "A", "Å" => "A", |
119
|
|
|
|
|
|
|
"B" => "B", |
120
|
|
|
|
|
|
|
"C" => "C", "Ç" => "C", |
121
|
|
|
|
|
|
|
"D" => "D", |
122
|
|
|
|
|
|
|
"E" => "E", "È" => "E", "É" => "E", "Ê" => "E", |
123
|
|
|
|
|
|
|
"Ë" => "E", |
124
|
|
|
|
|
|
|
"F" => "F", |
125
|
|
|
|
|
|
|
"G" => "G", |
126
|
|
|
|
|
|
|
"H" => "H", |
127
|
|
|
|
|
|
|
"I" => "I", "Ì" => "I", "Í" => "I", "Î" => "I", |
128
|
|
|
|
|
|
|
"Ï" => "I", |
129
|
|
|
|
|
|
|
"J" => "J", |
130
|
|
|
|
|
|
|
"K" => "K", |
131
|
|
|
|
|
|
|
"L" => "L", |
132
|
|
|
|
|
|
|
"M" => "M", |
133
|
|
|
|
|
|
|
"N" => "N", "Ñ" => "N", |
134
|
|
|
|
|
|
|
"O" => "O", "Ò" => "O", "Ó" => "O", "Ô" => "O", |
135
|
|
|
|
|
|
|
"Õ" => "O", "Ö" => "O", |
136
|
|
|
|
|
|
|
"P" => "P", |
137
|
|
|
|
|
|
|
"Q" => "Q", |
138
|
|
|
|
|
|
|
"R" => "R", |
139
|
|
|
|
|
|
|
"S" => "S", |
140
|
|
|
|
|
|
|
"T" => "T", |
141
|
|
|
|
|
|
|
"U" => "U", "Ù" => "U", "Ú" => "U", "Û" => "U", |
142
|
|
|
|
|
|
|
"Ü" => "U", |
143
|
|
|
|
|
|
|
"V" => "V", |
144
|
|
|
|
|
|
|
"W" => "W", |
145
|
|
|
|
|
|
|
"X" => "X", |
146
|
|
|
|
|
|
|
"Y" => "Y", "Y" => "Y", |
147
|
|
|
|
|
|
|
"Z" => "Z", |
148
|
|
|
|
|
|
|
"a" => "a", "à" => "a", "á" => "a", "â" => "a", |
149
|
|
|
|
|
|
|
"ã" => "a", "ä" => "a", "å" => "a", |
150
|
|
|
|
|
|
|
"b" => "b", |
151
|
|
|
|
|
|
|
"c" => "c", "ç" => "c", |
152
|
|
|
|
|
|
|
"d" => "d", |
153
|
|
|
|
|
|
|
"e" => "e", "è" => "e", "é" => "e", "ê" => "e", |
154
|
|
|
|
|
|
|
"ë" => "e", |
155
|
|
|
|
|
|
|
"f" => "f", |
156
|
|
|
|
|
|
|
"g" => "g", |
157
|
|
|
|
|
|
|
"h" => "h", |
158
|
|
|
|
|
|
|
"i" => "i", "ì" => "i", "í" => "i", "î" => "i", |
159
|
|
|
|
|
|
|
"ï" => "i", |
160
|
|
|
|
|
|
|
"j" => "j", |
161
|
|
|
|
|
|
|
"k" => "k", |
162
|
|
|
|
|
|
|
"l" => "l", |
163
|
|
|
|
|
|
|
"m" => "m", |
164
|
|
|
|
|
|
|
"n" => "n", "ñ" => "n", |
165
|
|
|
|
|
|
|
"o" => "o", "ò" => "o", "ó" => "o", "ô" => "o", |
166
|
|
|
|
|
|
|
"õ" => "o", "ö" => "o", |
167
|
|
|
|
|
|
|
"p" => "p", |
168
|
|
|
|
|
|
|
"q" => "q", |
169
|
|
|
|
|
|
|
"r" => "r", |
170
|
|
|
|
|
|
|
"s" => "s", |
171
|
|
|
|
|
|
|
"t" => "t", |
172
|
|
|
|
|
|
|
"u" => "u", "ù" => "u", "ú" => "u", "û" => "u", |
173
|
|
|
|
|
|
|
"ü" => "u", |
174
|
|
|
|
|
|
|
"v" => "v", |
175
|
|
|
|
|
|
|
"w" => "w", |
176
|
|
|
|
|
|
|
"x" => "x", |
177
|
|
|
|
|
|
|
"y" => "y", "y" => "y", "ý" => "y", |
178
|
|
|
|
|
|
|
"z" => "z", |
179
|
|
|
|
|
|
|
"ß"=>"ss"); |
180
|
|
|
|
|
|
|
|
181
|
2
|
|
|
|
|
374
|
my @stringArray = split //,$string; |
182
|
2
|
|
|
|
|
39
|
foreach (@stringArray) |
183
|
|
|
|
|
|
|
{ |
184
|
972
|
100
|
|
|
|
1490
|
if ($IsoLatin1ToASCIITable{$_}) |
185
|
|
|
|
|
|
|
{ |
186
|
818
|
|
|
|
|
870
|
$_ = $IsoLatin1ToASCIITable{$_}; |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
|
190
|
2
|
|
|
|
|
52
|
my $returnString = join '',@stringArray; |
191
|
|
|
|
|
|
|
|
192
|
2
|
|
|
|
|
107
|
return $returnString; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
################################################################ |
196
|
|
|
|
|
|
|
# stripaccent - function to call ::strip in non OO mode |
197
|
|
|
|
|
|
|
################################################################ |
198
|
|
|
|
|
|
|
sub stripaccents |
199
|
|
|
|
|
|
|
{ |
200
|
1
|
|
|
1
|
0
|
12
|
my $string = shift; |
201
|
1
|
|
|
|
|
10
|
return __PACKAGE__->strip($string); |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
1; |
207
|
|
|
|
|
|
|
|