File Coverage

blib/lib/Data/Password/zxcvbn/Match/Dictionary.pm
Criterion Covered Total %
statement 129 134 96.2
branch 50 52 96.1
condition 18 20 90.0
subroutine 18 21 85.7
pod 10 10 100.0
total 225 237 94.9


line stmt bran cond sub pod time code
1             package Data::Password::zxcvbn::Match::Dictionary;
2 4     4   13303 use Moo;
  4         16  
  4         31  
3             with 'Data::Password::zxcvbn::Match';
4 4     4   6506 use Data::Password::zxcvbn::Combinatorics qw(nCk enumerate_substitution_maps);
  4         12  
  4         287  
5 4     4   30 use List::AllUtils qw(min);
  4         48  
  4         11017  
6             our $VERSION = '1.1.3'; # VERSION
7             # ABSTRACT: match class for words in passwords
8              
9              
10             has reversed => (is => 'ro', default => 0); # bool
11             has substitutions => ( is => 'ro', default => sub { +{} } );
12             has rank => ( is => 'ro', default => 1 ); # int
13             # this should be constrained to the keys of %ranked_dictionaries, but
14             # we can't do that because users can pass their own dictionaries to
15             # ->make
16             has dictionary_name => ( is => 'ro', default => 'passwords' );
17              
18              
19             sub l33t {
20 451     451 1 52875 return scalar(keys %{shift->substitutions})!=0;
  451         2675  
21             }
22              
23              
24             our %l33t_table = ( ## no critic (ProhibitPackageVars)
25             a => ['4', '@'],
26             b => ['8'],
27             c => ['(', '{', '[', '<'],
28             e => ['3'],
29             g => ['6', '9'],
30             i => ['1', '!', '|'],
31             l => ['1', '|', '7'],
32             o => ['0'],
33             s => ['$', '5'],
34             t => ['+', '7'],
35             x => ['%'],
36             z => ['2'],
37             );
38              
39             sub make {
40 1533     1533 1 114227 my ($class, $password, $opts) = @_;
41             ## no critic (ProhibitPackageVars)
42             my $dictionaries = $opts->{ranked_dictionaries}
43 1533   66     8296 || do {
44             require Data::Password::zxcvbn::RankedDictionaries;
45             \%Data::Password::zxcvbn::RankedDictionaries::ranked_dictionaries;
46             };
47 1533   100     11221 my $l33t_table = $opts->{l33t_table} || \%l33t_table;
48              
49 1533         4036 my @matches;
50 1533         8333 $class->_make_simple(\@matches,$password,$dictionaries);
51 1533         9833 $class->_make_reversed(\@matches,$password,$dictionaries);
52 1533         11150 $class->_make_l33t(\@matches,$password,$dictionaries, $l33t_table);
53              
54 1533         17236 @matches = sort @matches;
55 1533         11332 return \@matches;
56             }
57              
58             sub _make_simple {
59 4648     4648   13420 my ($class, $matches, $password, $dictionaries) = @_;
60 4648         10846 my $password_lc = lc($password);
61             # lc may change the length of the password...
62 4648         9329 my $length = length($password_lc);
63              
64 4648         7954 for my $dictionary_name (keys %{$dictionaries}) {
  4648         20392  
65 27453         56657 my $ranked_dict = $dictionaries->{$dictionary_name};
66 27453         55911 for my $i (0..$length-1) {
67 156745         293583 for my $j ($i..$length-1) {
68 674738         1173616 my $word = substr($password_lc,$i,$j-$i+1);
69 674738 100       1914835 if (my $rank = $ranked_dict->{$word}) {
70 8139         12867 push @{$matches}, $class->new({
  8139         273078  
71             token => substr($password,$i,$j-$i+1),
72             i => $i, j=> $j,
73             rank => $rank,
74             dictionary_name => $dictionary_name,
75             });
76             }
77             }
78             }
79             }
80             }
81              
82             sub _make_reversed {
83 1533     1533   4979 my ($class, $matches, $password, $dictionaries) = @_;
84              
85 1533         4850 my $rev_password = reverse($password);
86 1533         2607 my @rev_matches;
87 1533         5574 $class->_make_simple(\@rev_matches,$rev_password,$dictionaries);
88              
89 1533         6541 my $rev_length = length($password)-1;
90 1533         4549 for my $rev_match (@rev_matches) {
91 1553         9086 my $word = $rev_match->token;
92             # no need to add this, the normal matching will have produced
93             # it already
94 1553 100       7704 next if $word eq reverse($word);
95 661         1347 push @{$matches}, $class->new({
  661         23050  
96             token => reverse($word),
97             i => $rev_length - $rev_match->j,
98             j=> $rev_length - $rev_match->i,
99             rank => $rev_match->rank,
100             dictionary_name => $rev_match->dictionary_name,
101             reversed => 1,
102             });
103             }
104             }
105              
106             # makes a pruned copy of l33t_table that only includes password's
107             # possible substitutions
108             sub _relevant_l33t_subtable {
109 1539     1539   21372 my ($class, $password, $l33t_table) = @_;
110             # set of characters
111 1539         3062 my %password_chars; @password_chars{split //,$password} = ();
  1539         15430  
112              
113 1539         3924 my %subtable;
114 1539         3138 for my $letter (keys %{$l33t_table}) {
  1539         10729  
115 34681         71351 my @relevant_subs = grep { exists $password_chars{$_} }
116 18084         27514 @{$l33t_table->{$letter}};
  18084         37059  
117 18084 100       44489 $subtable{$letter} = \@relevant_subs
118             if @relevant_subs;
119             }
120              
121 1539         14705 return \%subtable;
122             }
123              
124             sub _translate {
125 1582     1582   4547 my ($class, $string, $table) = @_;
126 1582         3298 my $keys = join '', keys %{$table};
  1582         5453  
127 1582         72354 $string =~ s{([\Q$keys\E])}
128             {$table->{$1}}g;
129 1582         7297 return $string;
130             }
131              
132             sub _make_l33t {
133 1533     1533   6040 my ($class, $matches, $password, $dictionaries, $l33t_table) = @_;
134              
135 1533         5608 my $subs = enumerate_substitution_maps(
136             $class->_relevant_l33t_subtable($password,$l33t_table)
137             );
138 1533         6291 for my $sub (@{$subs}) {
  1533         3742  
139 2246 100       9830 next unless %{$sub};
  2246         7359  
140 1582         6689 my $subbed_password = $class->_translate($password,$sub);
141 1582         3421 my @subbed_matches;
142 1582         6911 $class->_make_simple(\@subbed_matches,$subbed_password,$dictionaries);
143              
144 1582         6586 for my $subbed_match (@subbed_matches) {
145 3651         29354 my $token = substr($password,
146             $subbed_match->i,
147             $subbed_match->j - $subbed_match->i + 1);
148             # too short, ignore
149 3651 100       18723 next if length($token) <= 1;
150             # only return the matches that contain an actual substitution
151 1930 100       9907 next if lc($token) eq lc($subbed_match->token);
152             # subset of mappings in $sub that are in use for this match
153             my %min_subs = map {
154             $token =~ m{\Q$_}
155 2164 100       37622 ? ( $_ => $sub->{$_} )
156             : ()
157 589         1251 } keys %{$sub};
  589         2471  
158 589         1509 push @{$matches}, $class->new({
  589         22431  
159             token => $token,
160             substitutions => \%min_subs,
161             i => $subbed_match->i,
162             j=> $subbed_match->j,
163             rank => $subbed_match->rank,
164             dictionary_name => $subbed_match->dictionary_name,
165             });
166             }
167             }
168             }
169              
170              
171             sub estimate_guesses {
172 4179     4179 1 338565 my ($self,$min_guesses) = @_;
173              
174 4179         13872 return $self->rank *
175             $self->_uppercase_variations *
176             $self->_l33t_variations *
177             $self->_reversed_variations;
178             }
179              
180              
181             # an uppercase letter, followed by stuff that is *not* uppercase
182             # letters
183             my $START_UPPER_RE = qr{\A \p{Lu} \P{Lu}+ \z}x;
184             # stuff that is *not* uppercase letters, followed by an uppercase
185             # letter
186             my $END_UPPER_RE = qr{\A \P{Lu}+ \p{Lu} \z}x;
187             # no characters that are *not* uppercase letters
188             my $ALL_NOT_UPPER_RE = qr{\A \P{Lu}+ \z}x;
189             # no characters that are *not* lowercase letters
190             my $ALL_NOT_LOWER_RE = qr{\A \P{Ll}+ \z}x;
191              
192              
193 386     386 1 4016 sub does_word_start_upper { return $_[1] =~ $START_UPPER_RE }
194 0     0 1 0 sub does_word_end_upper { return $_[1] =~ $END_UPPER_RE }
195 0     0 1 0 sub is_word_all_not_upper { return $_[1] =~ $ALL_NOT_UPPER_RE }
196 0     0 1 0 sub is_word_all_not_lower { return $_[1] =~ $ALL_NOT_LOWER_RE }
197 356   100 356 1 3400 sub is_word_all_upper { return $_[1] =~ $ALL_NOT_LOWER_RE && $_[1] ne lc($_[1]) }
198              
199             sub _uppercase_variations {
200 4179     4179   8119 my ($self) = @_;
201              
202 4179         9965 my $word = $self->token;
203              
204             # if the word has no uppercase letters, count it as 1 variation
205 4179 100       33469 return 1 if $word =~ $ALL_NOT_UPPER_RE;
206 270 100       1004 return 1 if lc($word) eq $word;
207              
208             # a capitalized word is the most common capitalization scheme, so
209             # it only doubles the search space (uncapitalized + capitalized).
210             # allcaps and end-capitalized are common enough too, underestimate
211             # as 2x factor to be safe.
212 268 100       1691 return 2 if $word =~ $START_UPPER_RE;
213 148 100       732 return 2 if $word =~ $END_UPPER_RE;
214 127 100       838 return 2 if $word =~ $ALL_NOT_LOWER_RE;
215              
216             # otherwise calculate the number of ways to capitalize U+L
217             # uppercase+lowercase letters with U uppercase letters or
218             # less. or, if there's more uppercase than lower (for
219             # eg. PASSwORD), the number of ways to lowercase U+L letters with
220             # L lowercase letters or less.
221 37         247 my $U = () = $word =~ m/\p{Lu}/g;
222 37         192 my $L = () = $word =~ m/\p{Ll}/g;
223              
224 37         83 my $variations = 0;
225 37         287 $variations += nCk($U+$L,$_) for 1..min($U,$L);
226 37         141 return $variations;
227             }
228              
229             sub _l33t_variations {
230 4179     4179   8784 my ($self) = @_;
231              
232 4179         9070 my $word = $self->token;
233              
234 4179         7948 my $variations = 1;
235 4179         7073 for my $subbed (keys %{$self->substitutions}) {
  4179         14787  
236 1098         2845 my $unsubbed = $self->substitutions->{$subbed};
237              
238             # number of Substituted characters
239 1098         13068 my $S = () = $word =~ m{\Q$subbed}gi;
240             # number of Unsubstituted characters
241 1098         8476 my $U = () = $word =~ m{\Q$unsubbed}gi;
242              
243 1098 100 66     5474 if ($S==0 || $U==0) {
244             # for this substitution, password is either fully subbed
245             # (444) or fully unsubbed (aaa); treat that as doubling
246             # the space (attacker needs to try fully subbed chars in
247             # addition to unsubbed.)
248 1054         2634 $variations *= 2;
249             }
250             else {
251             # this case is similar to capitalization: with aa44a, U =
252             # 3, S = 2, attacker needs to try unsubbed + one sub + two
253             # subs
254 44         116 my $possibilities = 0;
255 44         334 $possibilities += nCk($U+$S,$_) for 1..min($U,$S);
256 44         157 $variations *= $possibilities;
257             }
258             }
259              
260 4179         12271 return $variations;
261             }
262              
263             sub _reversed_variations {
264 4179 100   4179   21028 return shift->reversed ? 2 : 1;
265             }
266              
267              
268             sub feedback_warning {
269 386     386 1 1232 my ($self, $is_sole_match) = @_;
270              
271 386 100       3278 if ($self->dictionary_name eq 'passwords') {
    100          
    100          
272 107 100 100     925 if ($is_sole_match && !$self->l33t && !$self->reversed) {
    100 100        
273 26 50       193 if ($self->rank <= 10) {
    50          
274 0         0 return 'This is a top-10 common password';
275             }
276             elsif ($self->rank <= 100) {
277 0         0 return 'This is a top-100 common password';
278             }
279             else {
280 26         172 return 'This is a very common password';
281             }
282             }
283             elsif ($self->guesses_log10 <= 4) {
284 64         1303 return 'This is similar to a commonly used password';
285             }
286             }
287             elsif ($self->dictionary_name =~ /names$/) {
288 131 100       407 if ($is_sole_match) {
289 22         142 return 'Names and surnames by themselves are easy to guess'
290             }
291             else {
292 109         740 return 'Common names and surnames are easy to guess';
293             }
294             }
295             elsif ($is_sole_match) {
296 27         162 return 'A word by itself is easy to guess';
297             }
298              
299 138         1086 return undef;
300             }
301              
302             sub feedback_suggestions {
303 386     386 1 974 my ($self) = @_;
304              
305 386         1229 my $word = $self->token;
306 386         796 my @suggestions;
307              
308 386 100       1300 if ($self->does_word_start_upper($word)) {
    100          
309 30         128 push @suggestions, q{Capitalization doesn't help very much};
310             }
311             elsif ($self->is_word_all_upper($word)) {
312 7         22 push @suggestions, 'All-uppercase is almost as easy to guess as all-lowercase';
313             }
314              
315 386 100 100     1846 if ($self->reversed && length($word) >= 4) {
316 13         50 push @suggestions, q{Reversed words aren't much harder to guess};
317             }
318              
319 386 100       1719 if ($self->l33t) {
320 10         54 push @suggestions, q{Predictable substitutions like '@' instead of 'a' don't help very much};
321             }
322              
323 386         2605 return \@suggestions;
324             }
325              
326              
327             around fields_for_json => sub {
328             my ($orig,$self) = @_;
329             ( $self->$orig(), qw(dictionary_name reversed rank substitutions) )
330             };
331              
332             1;
333              
334             __END__
335              
336             =pod
337              
338             =encoding UTF-8
339              
340             =for :stopwords Wiktionary xato
341              
342             =head1 NAME
343              
344             Data::Password::zxcvbn::Match::Dictionary - match class for words in passwords
345              
346             =head1 VERSION
347              
348             version 1.1.3
349              
350             =head1 DESCRIPTION
351              
352             This class represents the guess that a certain substring of a password
353             can be guessed by going through a dictionary.
354              
355             =head1 ATTRIBUTES
356              
357             =head2 C<reversed>
358              
359             Boolean, true if the token appears to be a dictionary word that's been
360             reversed (i.e. last letter first)
361              
362             =head2 C<substitutions>
363              
364             Hashref representing the characters that need to be substituted to
365             make the token match a dictionary work (e.g. if the token is
366             C<s!mpl3>, this hash would be C<< { '!' => 'i', '3' => 'e' } >>).
367              
368             =head2 C<rank>
369              
370             Number, indicating how common the dictionary word is. 1 means "most
371             common".
372              
373             =head2 C<dictionary_name>
374              
375             String, the name of the dictionary that the word was found in. Usually one of:
376              
377             =over 4
378              
379             =item *
380              
381             C<english_wikipedia>
382              
383             words extracted from a dump of the English edition of Wikipedia
384              
385             =item *
386              
387             C<male_names>, C<female_names>, C<surnames>
388              
389             common names from the 1990 US census
390              
391             =item *
392              
393             C<passwords>
394              
395             most common passwords, extracted from the "xato" password dump
396              
397             =item *
398              
399             C<us_tv_and_film>
400              
401             words from a 2006 Wiktionary word frequency study over American
402             television and movies
403              
404             =back
405              
406             =head1 METHODS
407              
408             =head2 C<l33t>
409              
410             Returns true if the token had any L</substitutions> (i.e. it was
411             written in "l33t-speak")
412              
413             =head2 C<make>
414              
415             my @matches = @{ Data::Password::zxcvbn::Match::Dictionary->make(
416             $password,
417             { # these are the defaults
418             ranked_dictionaries => \%Data::Password::zxcvbn::RankedDictionaries::ranked_dictionaries,
419             l33t_table => \%Data::Password::zxcvbn::Match::Dictionary::l33t_table,
420             },
421             ) };
422              
423             Scans the C<$password> for substrings that match words in the
424             C<ranked_dictionaries>, possibly reversed, possibly with substitutions
425             from the C<l33t_table>.
426              
427             The C<ranked_dictionaries> should look like:
428              
429             { some_dictionary_name => { 'word' => 156, 'another' => 13, ... },
430             ... }
431              
432             (i.e. a hash of dictionaries, each mapping words to their frequency
433             rank) and the C<l33t_table> should look like:
434              
435             { a => [ '4', '@' ], ... }
436              
437             (i.e. a hash mapping characters to arrays of other characters)
438              
439             =head2 C<estimate_guesses>
440              
441             The number of guesses is the product of the rank of the word, how many
442             case combinations match it, how many substitutions were used, doubled
443             if the token is reversed.
444              
445             =head2 C<does_word_start_upper>
446              
447             =head2 C<does_word_end_upper>
448              
449             =head2 C<is_word_all_not_upper>
450              
451             =head2 C<is_word_all_not_lower>
452              
453             =head2 C<is_word_all_upper>
454              
455             if ($self->does_word_start_upper($word)) { ... }
456              
457             These are mainly for sub-classes, to use in L<< /C<feedback_warning>
458             >> and L<< /C<feedback_suggestions> >>.
459              
460             =head2 C<feedback_warning>
461              
462             =head2 C<feedback_suggestions>
463              
464             This class suggests not using common words or passwords, especially on
465             their own. It also suggests that capitalisation, "special characters"
466             substitutions, and writing things backwards are not very useful.
467              
468             =head2 C<fields_for_json>
469              
470             The JSON serialisation for matches of this class will contain C<token
471             i j guesses guesses_log10 dictionary_name reversed rank
472             substitutions>.
473              
474             =head1 AUTHOR
475              
476             Gianni Ceccarelli <gianni.ceccarelli@broadbean.com>
477              
478             =head1 COPYRIGHT AND LICENSE
479              
480             This software is copyright (c) 2022 by BroadBean UK, a CareerBuilder Company.
481              
482             This is free software; you can redistribute it and/or modify it under
483             the same terms as the Perl 5 programming language system itself.
484              
485             =cut