line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package No::Sort; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
require 5.002; |
4
|
1
|
|
|
1
|
|
634
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
41
|
|
5
|
1
|
|
|
1
|
|
5
|
use vars qw(@ISA @EXPORT @EXPORT_OK $VERSION $DEBUG); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
966
|
|
6
|
|
|
|
|
|
|
require Exporter; |
7
|
|
|
|
|
|
|
@ISA=qw(Exporter); |
8
|
|
|
|
|
|
|
@EXPORT=qw(no_sort); |
9
|
|
|
|
|
|
|
@EXPORT_OK=qw(no_xfrm no_aa_xfrm |
10
|
|
|
|
|
|
|
latin1_uc latin1_lc latin1_ucfirst latin1_lcfirst); |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
$VERSION = sprintf("%d.%02d", q$Revision: 1.3 $ =~ /(\d+)\.(\d+)/); |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 NAME |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
No::Sort - Norwegian sorting |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSIS |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
use No::Sort; |
22
|
|
|
|
|
|
|
@sortert = no_sort @norske_ord; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 DESCRIPTION |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
This module provde the function no_sort() which sort a ISO-8859/1 |
27
|
|
|
|
|
|
|
encoded string according to Norwegian practice. The routine works |
28
|
|
|
|
|
|
|
like the normal perl sort routine, but the optional first argument is |
29
|
|
|
|
|
|
|
special. It can either be a reference to the strxfrm() function to |
30
|
|
|
|
|
|
|
use while sorting or a reference to a hash used to transform the words |
31
|
|
|
|
|
|
|
while sorting. |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
You can also import the no_xfrm() function which is used for standard |
34
|
|
|
|
|
|
|
sorting. It can be useful to base your custom transformation function |
35
|
|
|
|
|
|
|
on it. If we for instance would like to sort "Aa" as "Å" we could |
36
|
|
|
|
|
|
|
implement it like this: |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
use No::Sort qw(no_sort no_xfrm); |
39
|
|
|
|
|
|
|
sub my_xfrm { |
40
|
|
|
|
|
|
|
my $word = shift; |
41
|
|
|
|
|
|
|
$word =~ s/A[aA]/Å/g; |
42
|
|
|
|
|
|
|
$word =~ s/aa/å/g; |
43
|
|
|
|
|
|
|
no_xfrm($word); |
44
|
|
|
|
|
|
|
} |
45
|
|
|
|
|
|
|
@sorted = no_sort \&my_xfrm, @names; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
By the way, the my_xfrm shown in this example can be imported from |
48
|
|
|
|
|
|
|
this module under the name 'no_aa_xfrm': |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
use No::Sort qw(no_sort no_aa_xfrm); |
51
|
|
|
|
|
|
|
@sorted = no_sort \&no_aa_xfrm, @names; |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
If you set the $No::Sort::DEBUG variable to a TRUE value, then we will |
54
|
|
|
|
|
|
|
make some extra noise on STDOUT while sorting. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
The module can also export functions for up/down casing ISO-8859/1 |
57
|
|
|
|
|
|
|
strings. These functions are called latin1_uc(), latin1_lc(), |
58
|
|
|
|
|
|
|
latin1_ucfirst(), latin1_lcfirst(). |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=head1 SEE ALSO |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
L |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
=head1 AUTHORS |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
Hallvard B Furuseth , Gisle Aas |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=cut |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
sub no_sort { |
71
|
3
|
|
|
3
|
0
|
1140
|
my $xfrm; # ref to sort hash |
72
|
3
|
100
|
|
|
|
9
|
if (ref $_[0]) { |
73
|
1
|
50
|
|
|
|
5
|
if (ref($_[0]) eq "CODE") { |
|
|
0
|
|
|
|
|
|
74
|
1
|
|
|
|
|
2
|
my $code = shift; |
75
|
1
|
|
|
|
|
6
|
@{$xfrm}{@_} = map &$code($_), @_; |
|
1
|
|
|
|
|
14
|
|
76
|
|
|
|
|
|
|
} elsif (ref($_[0]) eq "HASH") { |
77
|
0
|
|
|
|
|
0
|
$xfrm = shift; |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
} |
80
|
3
|
100
|
|
|
|
15
|
@{$xfrm}{@_} = map no_xfrm($_), @_ unless $xfrm; |
|
2
|
|
|
|
|
50
|
|
81
|
|
|
|
|
|
|
|
82
|
3
|
100
|
|
|
|
12
|
if ($DEBUG) { |
83
|
2
|
50
|
|
|
|
7
|
my @s = sort { $xfrm->{$a} cmp $xfrm->{$b} || $a cmp $b } @_; |
|
106
|
|
|
|
|
217
|
|
84
|
2
|
|
|
|
|
331
|
printf STDERR "%-20s %s\n", "ORD", "SORTERES SOM"; |
85
|
2
|
|
|
|
|
374
|
print STDERR "-" x 20, " ", "-" x 40, "\n"; |
86
|
2
|
|
|
|
|
6
|
for (@s) { |
87
|
34
|
|
|
|
|
3509
|
printf STDERR "%-20s %s\n", $_, $xfrm->{$_}; |
88
|
|
|
|
|
|
|
} |
89
|
2
|
|
|
|
|
31
|
return @s; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
|
92
|
1
|
50
|
|
|
|
8
|
sort { $xfrm->{$a} cmp $xfrm->{$b} || $a cmp $b } @_; |
|
85
|
|
|
|
|
167
|
|
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
sub no_xfrm { |
96
|
59
|
|
|
59
|
0
|
92
|
my $p1 = shift; |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
# Ikke-alfanumeriske tegn regnes som en enkelt blank |
99
|
|
|
|
|
|
|
# (eller sikkert litt mer komplisert, f.eks whitespace -> blank, |
100
|
|
|
|
|
|
|
# punktum o.l -> et annet "lite" tegn, med blanke fjernet på begge |
101
|
|
|
|
|
|
|
# sider, osv... |
102
|
59
|
|
|
|
|
62
|
$p1 =~ tr/\0-\040\177\200-\240/ /s; |
103
|
59
|
50
|
|
|
|
103
|
$p1 =~ tr/ 0-9_A-Za-zÀ-ÖØ-ßà-öø-ÿ/,/cs |
104
|
|
|
|
|
|
|
and $p1 =~ s/,[ ,]+/,/g; |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
# Plasser æøå i riktig rekkefølge. Tar med svensk äø også. |
107
|
|
|
|
|
|
|
# (Egentlig burde *alle* tegn transformeres slik at ting kommer i |
108
|
|
|
|
|
|
|
# riktig rekkefølge her, men da blir resten av programmet så uleselig...) |
109
|
59
|
|
|
|
|
55
|
$p1 =~ tr[æäøöåÆÄØÖÅ] |
110
|
|
|
|
|
|
|
[ååææøÅÅÆÆØ]; |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
# Aksenter telles bare hvis uaksentede tegn er like |
113
|
59
|
|
|
|
|
53
|
my $p2 = $p1; |
114
|
59
|
|
|
|
|
52
|
$p2 =~ tr[ÀÁÂÃÄÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖÙÚÛÜÝÞßàáâãäçèéêëìíîïðñòóôõöùúûüýþÿ] |
115
|
|
|
|
|
|
|
[AAAAÆCEEEEIIIIDNOOOOØUUUUYTSaaaaæceeeeiiiidnooooøuuuuyty]; |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
# Store & små bokstaver er bare forskjellig hvis alt annet er likt |
118
|
59
|
|
|
|
|
56
|
my $p3 = $p2; |
119
|
59
|
|
|
|
|
48
|
$p3 =~ tr[A-ZÆØÅ] |
120
|
|
|
|
|
|
|
[a-zæøå]; |
121
|
|
|
|
|
|
|
|
122
|
59
|
|
|
|
|
153
|
join("\1", $p3, $p2, $p1); |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub no_aa_xfrm { |
126
|
0
|
|
|
0
|
0
|
0
|
my $word = shift; |
127
|
0
|
|
|
|
|
0
|
$word =~ s/A[aA]/Å/g; |
128
|
0
|
|
|
|
|
0
|
$word =~ s/aa/å/g; |
129
|
0
|
|
|
|
|
0
|
no_xfrm($word); |
130
|
|
|
|
|
|
|
} |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
# Some additional case convertion routines that does not really have |
133
|
|
|
|
|
|
|
# much to do with sorting. |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
sub latin1_lc |
136
|
|
|
|
|
|
|
{ |
137
|
2
|
|
|
2
|
0
|
131
|
my $str = shift; |
138
|
2
|
|
|
|
|
6
|
$str =~ tr[A-ZÀ-ÖØ-Þ] |
139
|
|
|
|
|
|
|
[a-zà-öø-þ]; |
140
|
2
|
|
|
|
|
7
|
$str; |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
sub latin1_uc |
144
|
|
|
|
|
|
|
{ |
145
|
2
|
|
|
2
|
0
|
254
|
my $str = shift; |
146
|
2
|
|
|
|
|
6
|
$str =~ tr[a-zà-öø-þ] |
147
|
|
|
|
|
|
|
[A-ZÀ-ÖØ-Þ]; |
148
|
2
|
|
|
|
|
8
|
$str; |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
sub latin1_ucfirst |
152
|
|
|
|
|
|
|
{ |
153
|
1
|
|
|
1
|
0
|
126
|
my $str = shift; |
154
|
1
|
|
|
|
|
7
|
$str =~ s/(.)/latin1_uc($1)/es; |
|
1
|
|
|
|
|
6
|
|
155
|
1
|
|
|
|
|
5
|
$str; |
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
sub latin1_lcfirst |
159
|
|
|
|
|
|
|
{ |
160
|
1
|
|
|
1
|
0
|
228
|
my $str = shift; |
161
|
1
|
|
|
|
|
7
|
$str =~ s/(.)/latin1_lc($1)/es; |
|
1
|
|
|
|
|
3
|
|
162
|
1
|
|
|
|
|
5
|
$str; |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
1; |