File Coverage

blib/lib/Sqids.pm
Criterion Covered Total %
statement 124 124 100.0
branch 48 52 92.3
condition 9 9 100.0
subroutine 19 19 100.0
pod 2 7 28.5
total 202 211 95.7


line stmt bran cond sub pod time code
1             package Sqids;
2 4     4   1306309 use 5.008001;
  4         18  
3 4     4   28 use strict;
  4         8  
  4         121  
4 4     4   34 use warnings;
  4         8  
  4         340  
5 4     4   3229 use bignum;
  4         16912  
  4         49  
6 4     4   597393 use Carp 'croak';
  4         10  
  4         260  
7 4     4   27 use Encode qw(encode_utf8);
  4         8  
  4         453  
8 4     4   27 use List::Util qw(first min reduce);
  4         8  
  4         295  
9 4     4   3229 use Sqids::Constants;
  4         33  
  4         346  
10             use Class::Tiny {
11             alphabet => Sqids::Constants::DEFAULT_ALPHABET,
12             min_length => Sqids::Constants::DEFAULT_MIN_LENGTH,
13 63         664 blocklist => sub { Sqids::Constants::DEFAULT_BLOCKLIST },
14 4     4   2495 };
  4         9995  
  4         52  
15              
16             our $VERSION = "0.03";
17              
18             sub BUILD {
19 70     70 0 240909 my ($self, $args) = @_;
20              
21 70         3050 my $alphabet = $self->alphabet;
22 70         2133 my $min_length = $self->min_length;
23 70         1648 my $blocklist = $self->blocklist;
24              
25 70 100       758 croak 'Alphabet cannot contain multibyte characters' if length $alphabet != length encode_utf8 $alphabet;
26              
27 69 100       392 croak 'Alphabet length must be at least 3' if length $alphabet < 3;
28              
29 68         11568 my %alphabet_hash = map { $_ => undef } split '', $alphabet;
  3992         8858  
30 68 100       907 croak 'Alphabet must contain unique characters' if keys %alphabet_hash != length $alphabet;
31              
32 67         165 my $min_length_limit = 255;
33 67 100 100     260 croak "Minimum length has to be between 0 and $min_length_limit" if $min_length < 0 || $min_length > $min_length_limit;
34              
35             # clean up blocklist:
36             # 1. all blocklist words should be lowercase
37             # 2. no words less than 3 chars
38             # 3. if some words contain chars that are not in the alphabet, remove those
39 65         17003 my $alphabet_lower = quotemeta lc $alphabet;
40             @$blocklist =
41 31385 50       3658956 grep { length >= 3 && /^[$alphabet_lower]+$/ }
42 65         463 map { lc } @$blocklist;
  31385         52147  
43              
44 65         30255 $self->alphabet($self->shuffle($alphabet));
45             }
46              
47             # Encodes an array of unsigned integers into an ID
48             #
49             # These are the cases where encoding might fail:
50             # - One of the numbers passed is smaller than 0
51             # - An n-number of attempts has been made to re-generated the ID, where n is alphabet length + 1
52             #
53             # @param {array.} numbers Non-negative integers to encode into an ID (or an arrayref)
54             # @returns {string} Generated ID
55              
56             sub encode {
57 111     111 1 63261 my ($self, @numbers) = @_;
58              
59             # if no numbers passed, return an empty string
60 111 100       525 return '' unless @numbers;
61              
62             # Convert to array if arrayref provided
63 110 100       549 @numbers = @{$numbers[0]} if ref $numbers[0] eq 'ARRAY';
  86         372  
64              
65 110 100   394   1151 croak "Encoding only supports non-negative numbers" if first { $_ < 0 } @numbers;
  394         38602  
66              
67 109         14735 return $self->_encode_numbers(0, @numbers);
68             }
69              
70             # Internal function that encodes an array of unsigned integers into an ID
71             #
72             # @param {number} increment An internal number used to modify the `offset` variable in order to re-generate the ID
73             # @param {array.} numbers Non-negative integers to encode into an ID
74             # @returns {string} Generated ID
75              
76             sub _encode_numbers {
77 122     122   2061 my ($self, $increment, @numbers) = @_;
78              
79             # if increment is greater than alphabet length, we've reached max attempts
80 122         4505 my $alphabet = $self->alphabet;
81 122         1000 my $length = length $alphabet;
82 122 100       457 croak 'Reached max attempts to re-generate the ID' if $increment > $length;
83              
84             # get a semi-random offset from input numbers
85 121         24097 my $offset = @numbers;
86 121         547 for (0..$#numbers) {
87 412         20892 $offset += ord(substr($alphabet, $numbers[$_] % $length, 1)) + $_;
88             }
89              
90             # if there is a non-zero `increment`, it's an internal attempt to re-generated the ID
91 121         4389 $offset = ($offset + $increment) % $length;
92              
93             # re-arrange alphabet so that second-half goes in front of the first-half
94 121         69566 $alphabet = substr($alphabet, $offset) . substr($alphabet, 0, $offset);
95              
96             # `prefix` is the first character in the generated ID, used for randomization
97 121         13655 my $prefix = substr($alphabet, 0, 1);
98              
99             # reverse alphabet (otherwise for [0, x] `offset` and `separator` will be the same char)
100 121         8362 $alphabet = reverse $alphabet;
101              
102             # final ID will always have the `prefix` character at the beginning
103 121         444 my @ret = ($prefix);
104              
105             # encode input array
106 121         400 for (0..$#numbers) {
107             # the first character of the alphabet is going to be reserved for the `separator`
108 412         9807 push @ret, $self->to_id($numbers[$_], substr($alphabet, 1));
109              
110             # if not the last number
111 412 100       2115 if ($_ < @numbers - 1) {
112             # `separator` character is used to isolate numbers within the ID
113 291         159790 push @ret, substr($alphabet, 0, 1);
114              
115             # shuffle on every iteration
116 291         23665 $alphabet = $self->shuffle($alphabet);
117             }
118             }
119              
120             # join all the parts to form an ID
121 121         65592 my $id = join('', @ret);
122              
123             # handle `min_length` requirement, if the ID is too short
124 121 100       6050 if ($self->min_length > length $id) {
125             # append a separator
126 48         589 $id .= substr($alphabet, 0, 1);
127              
128             # keep appending `separator` + however much alphabet is needed
129             # for decoding: two separators next to each other is what tells us the rest are junk characters
130 48         5058 while ($self->min_length - length $id > 0) {
131 40         5761 $alphabet = $self->shuffle($alphabet);
132 40         2138 $id .= substr($alphabet, 0, min(length $alphabet, $self->min_length - length $id));
133             }
134             }
135              
136             # if ID has a blocked word anywhere, restart with a +1 increment
137 121 100       10887 if ($self->is_blocked_id($id)) {
138 13         354 $id = $self->_encode_numbers($increment + 1, @numbers);
139             }
140              
141 117         5524 return $id;
142             }
143              
144             # Decodes an ID back into an array of unsigned integers
145             #
146             # These are the cases where the return value might be an empty array:
147             # - Empty ID / empty string
148             # - Non-alphabet character is found within ID
149             #
150             # @param {string} id Encoded ID
151             # @returns {array.} Array of unsigned integers
152              
153             sub decode {
154 104     104 1 44031 my ($self, $id) = @_;
155              
156 104 100       437 return if $id eq '';
157 103 50       323 return unless defined wantarray;
158              
159             # if a character is not in the alphabet, return
160 103         3747 my $alphabet = quotemeta $self->alphabet;
161 103 100       1849 return if $id =~ /[^$alphabet]/;
162              
163             # first character is always the `prefix`
164 102         545 my $prefix = substr($id, 0, 1);
165              
166             # `offset` is the semi-random position that was generated during encoding
167 102         13322 my $offset = index $self->alphabet, $prefix;
168              
169             # re-arrange alphabet back into its original form
170 102         2447 $alphabet = substr($self->alphabet, $offset) . substr($self->alphabet, 0, $offset);
171              
172             # reverse alphabet
173 102         5179 $alphabet = reverse $alphabet;
174              
175             # now it's safe to remove the prefix character from ID, it's not needed anymore
176 102         359 $id = substr($id, 1);
177              
178             # decode
179 102         3790 my @ret;
180 102         347 while (length $id) {
181 402         23795 my $separator = substr($alphabet, 0, 1);
182              
183             # we need the first part to the left of the separator to decode the number
184 402         35269 my @chunks = split /\Q$separator\E/, $id, -1;
185 402 50       16759 if (@chunks) {
186             # if chunk is empty, we are done (the rest are junk characters)
187 402 100       1768 return @ret if $chunks[0] eq '';
188              
189             # decode the number without using the `separator` character
190 372         1101 push @ret, $self->to_number($chunks[0], substr($alphabet, 1));
191              
192             # if this ID has multiple numbers, shuffle the alphabet because that's what encoding function did
193 372 100       215026 if (@chunks > 1) {
194 303         40447 $alphabet = $self->shuffle($alphabet);
195             }
196             }
197              
198             # `id` is now going to be everything to the right of the `separator`
199 372         11781 $id = join($separator, @chunks[1..$#chunks]);
200             }
201              
202 72 50       6417 return wantarray ? @ret : @ret == 1 ? $ret[0] : \@ret;
    100          
203             }
204              
205             # consistent shuffle (always produces the same result given the input)
206             sub shuffle {
207 699     699 0 2478 my ($self, $alphabet) = @_;
208 699         8830 my @chars = split '', $alphabet;
209              
210 699         2736 for (my ($i, $j) = (0, @chars-1); $j>0; $i++, $j--) {
211 41953         13936177 my $r = ($i * $j + ord($chars[$i]) + ord($chars[$j])) % @chars;
212 41953         44451402 @chars[$i,$r] = @chars[$r,$i];
213             }
214              
215 699         248139 return join('', @chars);
216             }
217              
218             sub to_id {
219 412     412 0 19548 my ($self, $num, $alphabet) = @_;
220 412         955 my @id;
221 412         857 my $result = $num;
222 412         791 my $length = length $alphabet;
223              
224 412         702 do {
225 559         20170 unshift @id, substr($alphabet, $result % $length, 1);
226 559         23819 $result = int($result / $length);
227             } while ($result > 0);
228              
229 412         59182 return join('', @id);
230             }
231              
232             sub to_number {
233 372     372 0 28852 my ($self, $id, $alphabet) = @_;
234 372     485   6018 reduce { $a * length($alphabet) + index($alphabet, $b) } 0, split '', $id;
  485         61430  
235             }
236              
237             sub is_blocked_id {
238 121     121 0 365 my ($self, $id) = @_;
239 121         355 $id = lc $id;
240              
241 121         206 foreach my $word (@{$self->blocklist}) {
  121         3291  
242             # no point in checking words that are longer than the ID
243 55506 100       3429188 next unless length $word <= length $id;
244 35334 100 100     93943 if (length $id <= 3 || length $word <= 3) {
    100          
    100          
245             # short words have to match completely; otherwise, too many matches
246 618 100       155807 return 1 if $id eq $word;
247             } elsif ($word =~ /\d/) {
248             # words with leet speak replacements are visible mostly on the ends of the ID
249 21871 100 100     5953791 return 1 if $id =~ /^\Q$word\E/ || $id =~ /\Q$word\E$/;
250             } elsif ($id =~ /\Q$word\E/) {
251             # otherwise, check for blocked word anywhere in the string
252 4         1012 return 1;
253             }
254             }
255              
256 108         16218 return 0;
257             }
258              
259             1;
260             __END__