File Coverage

blib/lib/Sqids.pm

Criterion	Covered	Total	%
statement	124	124	100.0
branch	48	52	92.3
condition	9	9	100.0
subroutine	19	19	100.0
pod	2	7	28.5
total	202	211	95.7

line	stmt	bran	cond	sub	pod	time	code
1							package Sqids;
2	4			4		1306309	use 5.008001;
	4					18
3	4			4		28	use strict;
	4					8
	4					121
4	4			4		34	use warnings;
	4					8
	4					340
5	4			4		3229	use bignum;
	4					16912
	4					49
6	4			4		597393	use Carp 'croak';
	4					10
	4					260
7	4			4		27	use Encode qw(encode_utf8);
	4					8
	4					453
8	4			4		27	use List::Util qw(first min reduce);
	4					8
	4					295
9	4			4		3229	use Sqids::Constants;
	4					33
	4					346
10							use Class::Tiny {
11							alphabet => Sqids::Constants::DEFAULT_ALPHABET,
12							min_length => Sqids::Constants::DEFAULT_MIN_LENGTH,
13	63					664	blocklist => sub { Sqids::Constants::DEFAULT_BLOCKLIST },
14	4			4		2495	};
	4					9995
	4					52
15
16							our $VERSION = "0.03";
17
18							sub BUILD {
19	70			70	0	240909	my ($self, $args) = @_;
20
21	70					3050	my $alphabet = $self->alphabet;
22	70					2133	my $min_length = $self->min_length;
23	70					1648	my $blocklist = $self->blocklist;
24
25	70	100				758	croak 'Alphabet cannot contain multibyte characters' if length $alphabet != length encode_utf8 $alphabet;
26
27	69	100				392	croak 'Alphabet length must be at least 3' if length $alphabet < 3;
28
29	68					11568	my %alphabet_hash = map { $_ => undef } split '', $alphabet;
	3992					8858
30	68	100				907	croak 'Alphabet must contain unique characters' if keys %alphabet_hash != length $alphabet;
31
32	67					165	my $min_length_limit = 255;
33	67	100	100			260	croak "Minimum length has to be between 0 and $min_length_limit" if $min_length < 0 \|\| $min_length > $min_length_limit;
34
35							# clean up blocklist:
36							# 1. all blocklist words should be lowercase
37							# 2. no words less than 3 chars
38							# 3. if some words contain chars that are not in the alphabet, remove those
39	65					17003	my $alphabet_lower = quotemeta lc $alphabet;
40							@$blocklist =
41	31385	50				3658956	grep { length >= 3 && /^[$alphabet_lower]+$/ }
42	65					463	map { lc } @$blocklist;
	31385					52147
43
44	65					30255	$self->alphabet($self->shuffle($alphabet));
45							}
46
47							# Encodes an array of unsigned integers into an ID
48							#
49							# These are the cases where encoding might fail:
50							# - One of the numbers passed is smaller than 0
51							# - An n-number of attempts has been made to re-generated the ID, where n is alphabet length + 1
52							#
53							# @param {array.} numbers Non-negative integers to encode into an ID (or an arrayref)
54							# @returns {string} Generated ID
55
56							sub encode {
57	111			111	1	63261	my ($self, @numbers) = @_;
58
59							# if no numbers passed, return an empty string
60	111	100				525	return '' unless @numbers;
61
62							# Convert to array if arrayref provided
63	110	100				549	@numbers = @{$numbers[0]} if ref $numbers[0] eq 'ARRAY';
	86					372
64
65	110	100		394		1151	croak "Encoding only supports non-negative numbers" if first { $_ < 0 } @numbers;
	394					38602
66
67	109					14735	return $self->_encode_numbers(0, @numbers);
68							}
69
70							# Internal function that encodes an array of unsigned integers into an ID
71							#
72							# @param {number} increment An internal number used to modify the `offset` variable in order to re-generate the ID
73							# @param {array.} numbers Non-negative integers to encode into an ID
74							# @returns {string} Generated ID
75
76							sub _encode_numbers {
77	122			122		2061	my ($self, $increment, @numbers) = @_;
78
79							# if increment is greater than alphabet length, we've reached max attempts
80	122					4505	my $alphabet = $self->alphabet;
81	122					1000	my $length = length $alphabet;
82	122	100				457	croak 'Reached max attempts to re-generate the ID' if $increment > $length;
83
84							# get a semi-random offset from input numbers
85	121					24097	my $offset = @numbers;
86	121					547	for (0..$#numbers) {
87	412					20892	$offset += ord(substr($alphabet, $numbers[$_] % $length, 1)) + $_;
88							}
89
90							# if there is a non-zero `increment`, it's an internal attempt to re-generated the ID
91	121					4389	$offset = ($offset + $increment) % $length;
92
93							# re-arrange alphabet so that second-half goes in front of the first-half
94	121					69566	$alphabet = substr($alphabet, $offset) . substr($alphabet, 0, $offset);
95
96							# `prefix` is the first character in the generated ID, used for randomization
97	121					13655	my $prefix = substr($alphabet, 0, 1);
98
99							# reverse alphabet (otherwise for [0, x] `offset` and `separator` will be the same char)
100	121					8362	$alphabet = reverse $alphabet;
101
102							# final ID will always have the `prefix` character at the beginning
103	121					444	my @ret = ($prefix);
104
105							# encode input array
106	121					400	for (0..$#numbers) {
107							# the first character of the alphabet is going to be reserved for the `separator`
108	412					9807	push @ret, $self->to_id($numbers[$_], substr($alphabet, 1));
109
110							# if not the last number
111	412	100				2115	if ($_ < @numbers - 1) {
112							# `separator` character is used to isolate numbers within the ID
113	291					159790	push @ret, substr($alphabet, 0, 1);
114
115							# shuffle on every iteration
116	291					23665	$alphabet = $self->shuffle($alphabet);
117							}
118							}
119
120							# join all the parts to form an ID
121	121					65592	my $id = join('', @ret);
122
123							# handle `min_length` requirement, if the ID is too short
124	121	100				6050	if ($self->min_length > length $id) {
125							# append a separator
126	48					589	$id .= substr($alphabet, 0, 1);
127
128							# keep appending `separator` + however much alphabet is needed
129							# for decoding: two separators next to each other is what tells us the rest are junk characters
130	48					5058	while ($self->min_length - length $id > 0) {
131	40					5761	$alphabet = $self->shuffle($alphabet);
132	40					2138	$id .= substr($alphabet, 0, min(length $alphabet, $self->min_length - length $id));
133							}
134							}
135
136							# if ID has a blocked word anywhere, restart with a +1 increment
137	121	100				10887	if ($self->is_blocked_id($id)) {
138	13					354	$id = $self->_encode_numbers($increment + 1, @numbers);
139							}
140
141	117					5524	return $id;
142							}
143
144							# Decodes an ID back into an array of unsigned integers
145							#
146							# These are the cases where the return value might be an empty array:
147							# - Empty ID / empty string
148							# - Non-alphabet character is found within ID
149							#
150							# @param {string} id Encoded ID
151							# @returns {array.} Array of unsigned integers
152
153							sub decode {
154	104			104	1	44031	my ($self, $id) = @_;
155
156	104	100				437	return if $id eq '';
157	103	50				323	return unless defined wantarray;
158
159							# if a character is not in the alphabet, return
160	103					3747	my $alphabet = quotemeta $self->alphabet;
161	103	100				1849	return if $id =~ /[^$alphabet]/;
162
163							# first character is always the `prefix`
164	102					545	my $prefix = substr($id, 0, 1);
165
166							# `offset` is the semi-random position that was generated during encoding
167	102					13322	my $offset = index $self->alphabet, $prefix;
168
169							# re-arrange alphabet back into its original form
170	102					2447	$alphabet = substr($self->alphabet, $offset) . substr($self->alphabet, 0, $offset);
171
172							# reverse alphabet
173	102					5179	$alphabet = reverse $alphabet;
174
175							# now it's safe to remove the prefix character from ID, it's not needed anymore
176	102					359	$id = substr($id, 1);
177
178							# decode
179	102					3790	my @ret;
180	102					347	while (length $id) {
181	402					23795	my $separator = substr($alphabet, 0, 1);
182
183							# we need the first part to the left of the separator to decode the number
184	402					35269	my @chunks = split /\Q$separator\E/, $id, -1;
185	402	50				16759	if (@chunks) {
186							# if chunk is empty, we are done (the rest are junk characters)
187	402	100				1768	return @ret if $chunks[0] eq '';
188
189							# decode the number without using the `separator` character
190	372					1101	push @ret, $self->to_number($chunks[0], substr($alphabet, 1));
191
192							# if this ID has multiple numbers, shuffle the alphabet because that's what encoding function did
193	372	100				215026	if (@chunks > 1) {
194	303					40447	$alphabet = $self->shuffle($alphabet);
195							}
196							}
197
198							# `id` is now going to be everything to the right of the `separator`
199	372					11781	$id = join($separator, @chunks[1..$#chunks]);
200							}
201
202	72	50				6417	return wantarray ? @ret : @ret == 1 ? $ret[0] : \@ret;
		100
203							}
204
205							# consistent shuffle (always produces the same result given the input)
206							sub shuffle {
207	699			699	0	2478	my ($self, $alphabet) = @_;
208	699					8830	my @chars = split '', $alphabet;
209
210	699					2736	for (my ($i, $j) = (0, @chars-1); $j>0; $i++, $j--) {
211	41953					13936177	my $r = ($i * $j + ord($chars[$i]) + ord($chars[$j])) % @chars;
212	41953					44451402	@chars[$i,$r] = @chars[$r,$i];
213							}
214
215	699					248139	return join('', @chars);
216							}
217
218							sub to_id {
219	412			412	0	19548	my ($self, $num, $alphabet) = @_;
220	412					955	my @id;
221	412					857	my $result = $num;
222	412					791	my $length = length $alphabet;
223
224	412					702	do {
225	559					20170	unshift @id, substr($alphabet, $result % $length, 1);
226	559					23819	$result = int($result / $length);
227							} while ($result > 0);
228
229	412					59182	return join('', @id);
230							}
231
232							sub to_number {
233	372			372	0	28852	my ($self, $id, $alphabet) = @_;
234	372			485		6018	reduce { $a * length($alphabet) + index($alphabet, $b) } 0, split '', $id;
	485					61430
235							}
236
237							sub is_blocked_id {
238	121			121	0	365	my ($self, $id) = @_;
239	121					355	$id = lc $id;
240
241	121					206	foreach my $word (@{$self->blocklist}) {
	121					3291
242							# no point in checking words that are longer than the ID
243	55506	100				3429188	next unless length $word <= length $id;
244	35334	100	100			93943	if (length $id <= 3 \|\| length $word <= 3) {
		100
		100
245							# short words have to match completely; otherwise, too many matches
246	618	100				155807	return 1 if $id eq $word;
247							} elsif ($word =~ /\d/) {
248							# words with leet speak replacements are visible mostly on the ends of the ID
249	21871	100	100			5953791	return 1 if $id =~ /^\Q$word\E/ \|\| $id =~ /\Q$word\E$/;
250							} elsif ($id =~ /\Q$word\E/) {
251							# otherwise, check for blocked word anywhere in the string
252	4					1012	return 1;
253							}
254							}
255
256	108					16218	return 0;
257							}
258
259							1;
260							__END__