line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Hash::Normalize; |
2
|
|
|
|
|
|
|
|
3
|
6
|
|
|
6
|
|
75917
|
use 5.010; |
|
6
|
|
|
|
|
24
|
|
4
|
|
|
|
|
|
|
|
5
|
6
|
|
|
6
|
|
33
|
use strict; |
|
6
|
|
|
|
|
15
|
|
|
6
|
|
|
|
|
135
|
|
6
|
6
|
|
|
6
|
|
30
|
use warnings; |
|
6
|
|
|
|
|
16
|
|
|
6
|
|
|
|
|
334
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
=encoding UTF-8 |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=head1 NAME |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
Hash::Normalize - Automatically normalize Unicode hash keys. |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 VERSION |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Version 0.01 |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=cut |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
our $VERSION; |
21
|
|
|
|
|
|
|
BEGIN { |
22
|
6
|
|
|
6
|
|
125
|
$VERSION = '0.01'; |
23
|
|
|
|
|
|
|
} |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=head1 SYNOPSIS |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
use Hash::Normalize qw; |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
normalize my %hash, 'NFC'; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
$hash{café} = 'coffee'; # NFD, "cafe\x{301}" |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
print $hash{café}; # NFD, "cafe\x{301}" |
34
|
|
|
|
|
|
|
# 'coffee' is printed |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
print $hash{café}; # NFC, "caf\x{e9}" |
37
|
|
|
|
|
|
|
# 'coffee' is also printed |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=head1 DESCRIPTION |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
This module provides an utility routine that augments a given Perl hash table so that its keys are automatically normalized following one of the Unicode normalization schemes. |
42
|
|
|
|
|
|
|
All the following actions on this hash will be made regardless of how the key used for the action is normalized. |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
Since this module does not use the C mechanism, normalized hashes are indistinguishable from regular hashes as far as Perl is concerned, but this module also provides L to identify them if necessary. |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=cut |
47
|
|
|
|
|
|
|
|
48
|
6
|
|
|
6
|
|
2571
|
use Variable::Magic; |
|
6
|
|
|
|
|
5939
|
|
|
6
|
|
|
|
|
278
|
|
49
|
6
|
|
|
6
|
|
2911
|
use Unicode::Normalize (); |
|
6
|
|
|
|
|
9201
|
|
|
6
|
|
|
|
|
2408
|
|
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=head1 FUNCTIONS |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=head2 C |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
normalize %hash; |
56
|
|
|
|
|
|
|
normalize %hash, $mode; |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
Applies the Unicode normalization scheme C<$mode> onto C<%hash>. |
59
|
|
|
|
|
|
|
C<$mode> defaults to C<'NFC'> if omitted, and should match C^(?:(?:nf)?k?|fc)[cd]$/i> otherwise. |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
C will first try to forcefully normalize the existing keys in C<%hash> to the new mode, but it will throw an exception if there are distinct keys that have the same normalization. |
62
|
|
|
|
|
|
|
All the keys subsequently used for fetches, stores, exists, deletes and list assignments are then first passed through the according normalization procedure. |
63
|
|
|
|
|
|
|
C will also return the list of normalized keys. |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=cut |
66
|
|
|
|
|
|
|
|
67
|
33
|
|
|
33
|
|
10969
|
sub _remap { $_[2] = Unicode::Normalize::normalize($_[1], "$_[2]"); undef } |
|
33
|
|
|
|
|
1240
|
|
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
my $wiz = Variable::Magic::wizard( |
70
|
|
|
|
|
|
|
data => sub { $_[1] }, |
71
|
|
|
|
|
|
|
fetch => \&_remap, |
72
|
|
|
|
|
|
|
store => \&_remap, |
73
|
|
|
|
|
|
|
exists => \&_remap, |
74
|
|
|
|
|
|
|
delete => \&_remap, |
75
|
|
|
|
|
|
|
copy_key => 1, |
76
|
|
|
|
|
|
|
); |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
sub _validate_mode { |
79
|
15
|
|
|
15
|
|
36
|
my $mode = shift; |
80
|
|
|
|
|
|
|
|
81
|
15
|
100
|
|
|
|
81
|
$mode = 'nfc' unless defined $mode; |
82
|
15
|
100
|
|
|
|
130
|
if ($mode =~ /^(?:nf)?(k?[cd])$/i) { |
|
|
100
|
|
|
|
|
|
83
|
12
|
|
|
|
|
60
|
$mode = uc "NF$1"; |
84
|
|
|
|
|
|
|
} elsif ($mode =~ /^(fc[cd])$/i) { |
85
|
2
|
|
|
|
|
10
|
$mode = uc "$1"; |
86
|
|
|
|
|
|
|
} else { |
87
|
1
|
|
|
|
|
13
|
require Carp; |
88
|
1
|
|
|
|
|
216
|
Carp::croak('Invalid normalization'); |
89
|
|
|
|
|
|
|
} |
90
|
|
|
|
|
|
|
|
91
|
14
|
|
|
|
|
42
|
return $mode |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
sub normalize (\%;$) { |
95
|
15
|
|
|
15
|
1
|
4552
|
my ($hash, $mode) = @_; |
96
|
|
|
|
|
|
|
|
97
|
15
|
|
|
|
|
48
|
my $previous_mode = &get_normalization($hash); |
98
|
15
|
|
|
|
|
49
|
my $new_mode = _validate_mode($mode); |
99
|
14
|
100
|
100
|
|
|
82
|
return $hash if defined $previous_mode and $previous_mode eq $new_mode; |
100
|
|
|
|
|
|
|
|
101
|
13
|
|
|
|
|
58
|
&Variable::Magic::dispell($hash, $wiz); |
102
|
|
|
|
|
|
|
|
103
|
13
|
100
|
|
|
|
45
|
if (%$hash) { |
104
|
6
|
|
|
|
|
10
|
my %dup; |
105
|
6
|
|
|
|
|
24
|
for my $key (keys %$hash) { |
106
|
12
|
|
|
|
|
41
|
my $norm = Unicode::Normalize::normalize($new_mode, $key); |
107
|
12
|
100
|
|
|
|
199
|
if (exists $dup{$norm}) { |
108
|
1
|
|
|
|
|
9
|
require Carp; |
109
|
1
|
|
|
|
|
163
|
Carp::croak('Key collision after normalization'); |
110
|
|
|
|
|
|
|
} |
111
|
11
|
|
|
|
|
38
|
$dup{$norm} = $hash->{$key}; |
112
|
|
|
|
|
|
|
} |
113
|
5
|
|
|
|
|
38
|
%$hash = %dup; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
12
|
|
|
|
|
76
|
&Variable::Magic::cast($hash, $wiz, $new_mode); |
117
|
|
|
|
|
|
|
|
118
|
12
|
|
|
|
|
81
|
return $hash; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=head2 C |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
my $mode = get_normalization %hash; |
124
|
|
|
|
|
|
|
normalize %hash, $mode; |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
Returns the current Unicode normalization scheme in use for C<%hash>, or C if it is a plain hash. |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=cut |
129
|
|
|
|
|
|
|
|
130
|
23
|
|
|
23
|
1
|
134
|
sub get_normalization (\%) { &Variable::Magic::getdata($_[0], $wiz) } |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=head1 NORMALIZED SYMBOL LOOKUPS |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
Stashes (Perl symbol tables) are implemented as plain hashes, therefore one can use C on them to make sure that Unicode symbol lookups are made regardless of normalization. |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
package Foo; |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
BEGIN { |
139
|
|
|
|
|
|
|
require Hash::Normalize; |
140
|
|
|
|
|
|
|
# Enforce NFC normalization |
141
|
|
|
|
|
|
|
Hash::Normalize::normalize(%Foo::, 'NFC') |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
sub café { # NFD, "cafe\x{301}" |
145
|
|
|
|
|
|
|
return 'coffee' |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub coffee_nfc { |
149
|
|
|
|
|
|
|
café() # NFC, "cafe\x{e9}" |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
sub coffee_nfd { |
153
|
|
|
|
|
|
|
café() # NFD, "cafe\x{301}" |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
# Both coffee_nfc() and coffee_nfd() return 'coffee' |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=head1 CAVEATS |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
Using a normalized hash is slightly slower than a plain hash, due to the normalization procedure and the overhead of magic. |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
If a hash is initialized from a normalized hash by list assignment (C<%new = %normalized>), then the normalization scheme will not be carried over to the new hash, although its keys will initially be normalized like the ones from the original hash. |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=head1 EXPORT |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
The functions L and L are only exported on request by specifying their names in the module import list. |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=cut |
169
|
|
|
|
|
|
|
|
170
|
6
|
|
|
6
|
|
54
|
use base 'Exporter'; |
|
6
|
|
|
|
|
11
|
|
|
6
|
|
|
|
|
576
|
|
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
our @EXPORT = (); |
173
|
|
|
|
|
|
|
our %EXPORT_TAGS = (); |
174
|
|
|
|
|
|
|
our @EXPORT_OK = qw; |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=head1 DEPENDENCIES |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
L 5.10. |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
L, L (core since perl 5). |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
L (core since perl 5.8). |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
L 0.51. |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
=head1 AUTHOR |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
Vincent Pit, C<< >>, L. |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
You can contact me by mail or on C (vincent). |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=head1 BUGS |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
Please report any bugs or feature requests to C, or through the web interface at L. |
195
|
|
|
|
|
|
|
I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=head1 SUPPORT |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
perldoc Hash::Normalize |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=head1 COPYRIGHT & LICENSE |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
Copyright 2017 Vincent Pit, all rights reserved. |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=cut |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
1; # End of Hash::Normalize |