File Coverage

blib/lib/Sort/filevercmp.pm

Criterion	Covered	Total	%
statement	73	75	97.3
branch	40	48	83.3
condition	32	39	82.0
subroutine	10	10	100.0
pod	2	2	100.0
total	157	174	90.2

line	stmt	bran	cond	sub	pod	time	code
1							package Sort::filevercmp;
2
3	1			1		598	use strict;
	1					3
	1					34
4	1			1		20	use warnings;
	1					4
	1					38
5	1			1		7	use Exporter 'import';
	1					3
	1					1370
6
7							our $VERSION = '0.001';
8
9							our @EXPORT = 'filevercmp';
10							our @EXPORT_OK = 'fileversort';
11
12	55			55	1	28098	sub filevercmp ($$) { _filevercmp(_parse($_[0]), _parse($_[1])) }
13
14							sub fileversort {
15	1			1	1	599	my @parsed = map { _parse($_) } @_;
	56					103
16	1					9	return @_[sort { _filevercmp($parsed[$a], $parsed[$b]) } 0..$#_];
	55					95
17							}
18
19							# Parse strings into metadata
20							sub _parse {
21	166			166		333	my ($name) = @_;
22	166	50				384	$name = '' unless defined $name;
23
24	166	100	100			910	return { name => $name, special => 1 } if $name eq '' or $name eq '.' or $name eq '..';
			100
25
26	158					252	my %meta;
27	158					307	$meta{name} = $name;
28
29	158					392	$meta{hidden} = $name =~ s/^\.//;
30
31	158					253	my (@prefix_parts, @all_parts);
32
33							# Parse name into pairs of non-digit and digit parts
34	158					247	my $with_suffix = $name;
35	158		100			895	while ($with_suffix =~ s/^([^0-9])([0-9])// and (length $1 or length $2)) {
			66
36	431					2759	push @all_parts, $1, $2;
37							}
38
39	158					336	$meta{all_parts} = \@all_parts;
40
41							# Parse name into pairs without suffix
42	158					232	my $prefix = $name;
43	158	50				745	if ($prefix =~ s/(?:\.[A-Za-z~][A-Za-z0-9~])$//) {
44	158					257	my $without_suffix = $prefix;
45	158		100			696	while ($without_suffix =~ s/^([^0-9])([0-9])// and (length $1 or length $2)) {
			66
46	320					2146	push @prefix_parts, $1, $2;
47							}
48							} else {
49	0					0	@prefix_parts = @all_parts;
50							}
51
52	158					341	$meta{prefix} = $prefix;
53	158					240	$meta{prefix_parts} = \@prefix_parts;
54
55	158					406	return \%meta;
56							}
57
58							# tilde sorts first even before end of string, then letters, then everything else
59							sub _lexorder {
60	1172			1172		1875	my ($char) = @_;
61	1172	50				2647	return 0 if $char =~ m/\A[0-9]\z/;
62	1172	100				2846	return ord $char if $char =~ m/\A[a-zA-Z]\z/;
63	342	100				692	return -1 if $char eq '~';
64	322					525	return ord($char) + ord('z') + 1;
65							}
66
67							sub _lexcmp {
68	150			150		252	my ($alex, $blex) = @_;
69	150					428	my @achars = split '', $alex;
70	150					336	my @bchars = split '', $blex;
71	150		100			389	while (@achars or @bchars) {
72	612					1117	my ($achar, $bchar) = (shift(@achars), shift(@bchars));
73	612	100				1375	my $aord = defined $achar ? _lexorder($achar) : 0;
74	612	100				1337	my $bord = defined $bchar ? _lexorder($bchar) : 0;
75	612					892	my $charcmp = $aord <=> $bord;
76	612	100				1873	return $charcmp if $charcmp;
77							}
78	64					130	return 0;
79							}
80
81							# Based on verrevcmp() from GNU filevercmp
82							sub _verrevcmp {
83	102	50		102		138	my @aparts = @{$_[0] \|\| []};
	102					367
84	102	50				171	my @bparts = @{$_[1] \|\| []};
	102					340
85	102		66			266	while (@aparts or @bparts) {
86							# Lexical part
87	150					309	my ($alex, $blex) = (shift(@aparts), shift(@bparts));
88	150	100				332	$alex = '' unless defined $alex;
89	150	100				301	$blex = '' unless defined $blex;
90	150					299	my $lexcmp = _lexcmp($alex, $blex);
91	150	100				548	return $lexcmp if $lexcmp;
92
93							# Numeric part
94	64					128	my ($anum, $bnum) = (shift(@aparts), shift(@bparts));
95	64	50	33			244	$anum = 0 unless defined $anum and length $anum;
96	64	50	33			223	$bnum = 0 unless defined $bnum and length $bnum;
97	64					123	my $numcmp = $anum <=> $bnum;
98	64	100				252	return $numcmp if $numcmp;
99							}
100	0					0	return 0;
101							}
102
103							# Based on filevercmp() from GNU filevercmp
104							sub _filevercmp {
105	110			110		200	my ($first, $second) = @_;
106	110	50				265	return 0 if $first->{name} eq $second->{name};
107
108							# Special files go first (empty string, ., or ..)
109							return $first->{name} cmp $second->{name}
110	110	100	100			320	if $first->{special} and $second->{special};
111	106	100				229	return -1 if $first->{special};
112	105	100				214	return 1 if $second->{special};
113
114							# Hidden files go before unhidden
115	104	100	100			282	return -1 if $first->{hidden} and !$second->{hidden};
116	103	100	100			389	return 1 if !$first->{hidden} and $second->{hidden};
117
118							# Compare parts, including suffixes only if prefixes are equal
119	102	100				224	if ($first->{prefix} eq $second->{prefix}) {
120	18					38	return _verrevcmp($first->{all_parts}, $second->{all_parts});
121							} else {
122	84					175	return _verrevcmp($first->{prefix_parts}, $second->{prefix_parts});
123							}
124							}
125
126							1;
127
128							=head1 NAME
129
130							Sort::filevercmp - Sort version strings as in GNU filevercmp
131
132							=head1 SYNOPSIS
133
134							use Sort::filevercmp;
135							my @sorted = sort filevercmp 'foo-bar-1.2a.tar.gz', 'foo-bar-1.10.zip';
136							my $cmp = filevercmp 'a1b2c3.tar', 'a1b2c3.tar~';
137							say $cmp ? $cmp < 0 ? 'First name' : 'Second name' : 'Names are equal';
138
139							# Pre-parse list for faster sorting
140							use Sort::filevercmp 'fileversort';
141							my @sorted = fileversort @filenames;
142
143							=head1 DESCRIPTION
144
145							Perl implementation of the C function from
146							L. C is used by the
147							L (C<-V> option) and L (C<-v> option) GNU coreutils commands
148							for "natural" sorting of strings (usually filenames) containing mixed version
149							numbers and filename suffixes.
150
151							=head1 FUNCTIONS
152
153							=head2 filevercmp
154
155							my $cmp = filevercmp $string1, $string2;
156							my @sorted = sort filevercmp @strings;
157
158							Takes two strings and returns -1 if the first string sorts first, 1 if the
159							second string sorts first, or 0 if the strings sort equivalently. Can be passed
160							to L directly as a comparison function. Exported by
161							default.
162
163							=head2 fileversort
164
165							my @sorted = fileversort @strings;
166
167							Takes a list of strings and sorts them according to L. The
168							strings are pre-parsed so it may be more efficient than using L
169							as a sort comparison function. Exported by request.
170
171							=head1 ALGORITHM
172
173							The sort algorithm works roughly as follows:
174
175							=over
176
177							=item 1
178
179							Empty strings, C<.>, and C<..> go first
180
181							=item 2
182
183							Hidden files (strings beginning with C<.>) go next, and are sorted among
184							themselves according to the remaining rules
185
186							=item 3
187
188							Each string is split into sequences of non-digit characters and digit (C<0-9>)
189							characters, ignoring any filename suffix as matched by the regex
190							C, unless the strings to be compared are
191							equal with the suffixes removed.
192
193							=item 4
194
195							The first non-digit sequence of the first string is compared lexically with
196							that of the second string, with letters (C) sorting first and other
197							characters sorting after, ordered by character ordinals. The tilde (C<~>)
198							character sorts before all other characters, even the end of the sequence.
199							Continue if the non-digit sequences are lexically equal.
200
201							=item 5
202
203							The first digit sequence of the first string is compared numerically with that
204							of the second string, ignoring leading zeroes. Continue if the digit sequences
205							are numerically equal.
206
207							=item 6
208
209							Repeat steps 4 and 5 with the remaining sequences.
210
211							=back
212
213							=head1 CAVEATS
214
215							This sort algorithm ignores the current locale, and has unique rules for
216							lexically sorting the non-digit components of the strings, designed for sorting
217							filenames. There are better options for general version string sorting; see
218							L.
219
220							=head1 BUGS
221
222							Report any issues on the public bugtracker.
223
224							=head1 AUTHOR
225
226							Dan Book
227
228							=head1 COPYRIGHT AND LICENSE
229
230							This software is Copyright (c) 2017 by Dan Book.
231
232							This is free software, licensed under:
233
234							The Artistic License 2.0 (GPL Compatible)
235
236							=head1 SEE ALSO
237
238							=over
239
240							=item *
241
242							L - for comparing Perl version strings
243
244							=item *
245
246							L - for comparing standard version strings
247
248							=item *
249
250							L - locale-sensitive natural sorting of mixed strings
251
252							=back