File Coverage

blib/lib/Data/Password/zxcvbn/Match/Dictionary.pm

Criterion	Covered	Total	%
statement	129	134	96.2
branch	50	52	96.1
condition	18	20	90.0
subroutine	18	21	85.7
pod	10	10	100.0
total	225	237	94.9

line	stmt	bran	cond	sub	pod	time	code
1							package Data::Password::zxcvbn::Match::Dictionary;
2	4			4		13303	use Moo;
	4					16
	4					31
3							with 'Data::Password::zxcvbn::Match';
4	4			4		6506	use Data::Password::zxcvbn::Combinatorics qw(nCk enumerate_substitution_maps);
	4					12
	4					287
5	4			4		30	use List::AllUtils qw(min);
	4					48
	4					11017
6							our $VERSION = '1.1.3'; # VERSION
7							# ABSTRACT: match class for words in passwords
8
9
10							has reversed => (is => 'ro', default => 0); # bool
11							has substitutions => ( is => 'ro', default => sub { +{} } );
12							has rank => ( is => 'ro', default => 1 ); # int
13							# this should be constrained to the keys of %ranked_dictionaries, but
14							# we can't do that because users can pass their own dictionaries to
15							# ->make
16							has dictionary_name => ( is => 'ro', default => 'passwords' );
17
18
19							sub l33t {
20	451			451	1	52875	return scalar(keys %{shift->substitutions})!=0;
	451					2675
21							}
22
23
24							our %l33t_table = ( ## no critic (ProhibitPackageVars)
25							a => ['4', '@'],
26							b => ['8'],
27							c => ['(', '{', '[', '<'],
28							e => ['3'],
29							g => ['6', '9'],
30							i => ['1', '!', '\|'],
31							l => ['1', '\|', '7'],
32							o => ['0'],
33							s => ['$', '5'],
34							t => ['+', '7'],
35							x => ['%'],
36							z => ['2'],
37							);
38
39							sub make {
40	1533			1533	1	114227	my ($class, $password, $opts) = @_;
41							## no critic (ProhibitPackageVars)
42							my $dictionaries = $opts->{ranked_dictionaries}
43	1533		66			8296	\|\| do {
44							require Data::Password::zxcvbn::RankedDictionaries;
45							\%Data::Password::zxcvbn::RankedDictionaries::ranked_dictionaries;
46							};
47	1533		100			11221	my $l33t_table = $opts->{l33t_table} \|\| \%l33t_table;
48
49	1533					4036	my @matches;
50	1533					8333	$class->_make_simple(\@matches,$password,$dictionaries);
51	1533					9833	$class->_make_reversed(\@matches,$password,$dictionaries);
52	1533					11150	$class->_make_l33t(\@matches,$password,$dictionaries, $l33t_table);
53
54	1533					17236	@matches = sort @matches;
55	1533					11332	return \@matches;
56							}
57
58							sub _make_simple {
59	4648			4648		13420	my ($class, $matches, $password, $dictionaries) = @_;
60	4648					10846	my $password_lc = lc($password);
61							# lc may change the length of the password...
62	4648					9329	my $length = length($password_lc);
63
64	4648					7954	for my $dictionary_name (keys %{$dictionaries}) {
	4648					20392
65	27453					56657	my $ranked_dict = $dictionaries->{$dictionary_name};
66	27453					55911	for my $i (0..$length-1) {
67	156745					293583	for my $j ($i..$length-1) {
68	674738					1173616	my $word = substr($password_lc,$i,$j-$i+1);
69	674738	100				1914835	if (my $rank = $ranked_dict->{$word}) {
70	8139					12867	push @{$matches}, $class->new({
	8139					273078
71							token => substr($password,$i,$j-$i+1),
72							i => $i, j=> $j,
73							rank => $rank,
74							dictionary_name => $dictionary_name,
75							});
76							}
77							}
78							}
79							}
80							}
81
82							sub _make_reversed {
83	1533			1533		4979	my ($class, $matches, $password, $dictionaries) = @_;
84
85	1533					4850	my $rev_password = reverse($password);
86	1533					2607	my @rev_matches;
87	1533					5574	$class->_make_simple(\@rev_matches,$rev_password,$dictionaries);
88
89	1533					6541	my $rev_length = length($password)-1;
90	1533					4549	for my $rev_match (@rev_matches) {
91	1553					9086	my $word = $rev_match->token;
92							# no need to add this, the normal matching will have produced
93							# it already
94	1553	100				7704	next if $word eq reverse($word);
95	661					1347	push @{$matches}, $class->new({
	661					23050
96							token => reverse($word),
97							i => $rev_length - $rev_match->j,
98							j=> $rev_length - $rev_match->i,
99							rank => $rev_match->rank,
100							dictionary_name => $rev_match->dictionary_name,
101							reversed => 1,
102							});
103							}
104							}
105
106							# makes a pruned copy of l33t_table that only includes password's
107							# possible substitutions
108							sub _relevant_l33t_subtable {
109	1539			1539		21372	my ($class, $password, $l33t_table) = @_;
110							# set of characters
111	1539					3062	my %password_chars; @password_chars{split //,$password} = ();
	1539					15430
112
113	1539					3924	my %subtable;
114	1539					3138	for my $letter (keys %{$l33t_table}) {
	1539					10729
115	34681					71351	my @relevant_subs = grep { exists $password_chars{$_} }
116	18084					27514	@{$l33t_table->{$letter}};
	18084					37059
117	18084	100				44489	$subtable{$letter} = \@relevant_subs
118							if @relevant_subs;
119							}
120
121	1539					14705	return \%subtable;
122							}
123
124							sub _translate {
125	1582			1582		4547	my ($class, $string, $table) = @_;
126	1582					3298	my $keys = join '', keys %{$table};
	1582					5453
127	1582					72354	$string =~ s{([\Q$keys\E])}
128							{$table->{$1}}g;
129	1582					7297	return $string;
130							}
131
132							sub _make_l33t {
133	1533			1533		6040	my ($class, $matches, $password, $dictionaries, $l33t_table) = @_;
134
135	1533					5608	my $subs = enumerate_substitution_maps(
136							$class->_relevant_l33t_subtable($password,$l33t_table)
137							);
138	1533					6291	for my $sub (@{$subs}) {
	1533					3742
139	2246	100				9830	next unless %{$sub};
	2246					7359
140	1582					6689	my $subbed_password = $class->_translate($password,$sub);
141	1582					3421	my @subbed_matches;
142	1582					6911	$class->_make_simple(\@subbed_matches,$subbed_password,$dictionaries);
143
144	1582					6586	for my $subbed_match (@subbed_matches) {
145	3651					29354	my $token = substr($password,
146							$subbed_match->i,
147							$subbed_match->j - $subbed_match->i + 1);
148							# too short, ignore
149	3651	100				18723	next if length($token) <= 1;
150							# only return the matches that contain an actual substitution
151	1930	100				9907	next if lc($token) eq lc($subbed_match->token);
152							# subset of mappings in $sub that are in use for this match
153							my %min_subs = map {
154							$token =~ m{\Q$_}
155	2164	100				37622	? ( $_ => $sub->{$_} )
156							: ()
157	589					1251	} keys %{$sub};
	589					2471
158	589					1509	push @{$matches}, $class->new({
	589					22431
159							token => $token,
160							substitutions => \%min_subs,
161							i => $subbed_match->i,
162							j=> $subbed_match->j,
163							rank => $subbed_match->rank,
164							dictionary_name => $subbed_match->dictionary_name,
165							});
166							}
167							}
168							}
169
170
171							sub estimate_guesses {
172	4179			4179	1	338565	my ($self,$min_guesses) = @_;
173
174	4179					13872	return $self->rank *
175							$self->_uppercase_variations *
176							$self->_l33t_variations *
177							$self->_reversed_variations;
178							}
179
180
181							# an uppercase letter, followed by stuff that is not uppercase
182							# letters
183							my $START_UPPER_RE = qr{\A \p{Lu} \P{Lu}+ \z}x;
184							# stuff that is not uppercase letters, followed by an uppercase
185							# letter
186							my $END_UPPER_RE = qr{\A \P{Lu}+ \p{Lu} \z}x;
187							# no characters that are not uppercase letters
188							my $ALL_NOT_UPPER_RE = qr{\A \P{Lu}+ \z}x;
189							# no characters that are not lowercase letters
190							my $ALL_NOT_LOWER_RE = qr{\A \P{Ll}+ \z}x;
191
192
193	386			386	1	4016	sub does_word_start_upper { return $_[1] =~ $START_UPPER_RE }
194	0			0	1	0	sub does_word_end_upper { return $_[1] =~ $END_UPPER_RE }
195	0			0	1	0	sub is_word_all_not_upper { return $_[1] =~ $ALL_NOT_UPPER_RE }
196	0			0	1	0	sub is_word_all_not_lower { return $_[1] =~ $ALL_NOT_LOWER_RE }
197	356		100	356	1	3400	sub is_word_all_upper { return $_[1] =~ $ALL_NOT_LOWER_RE && $_[1] ne lc($_[1]) }
198
199							sub _uppercase_variations {
200	4179			4179		8119	my ($self) = @_;
201
202	4179					9965	my $word = $self->token;
203
204							# if the word has no uppercase letters, count it as 1 variation
205	4179	100				33469	return 1 if $word =~ $ALL_NOT_UPPER_RE;
206	270	100				1004	return 1 if lc($word) eq $word;
207
208							# a capitalized word is the most common capitalization scheme, so
209							# it only doubles the search space (uncapitalized + capitalized).
210							# allcaps and end-capitalized are common enough too, underestimate
211							# as 2x factor to be safe.
212	268	100				1691	return 2 if $word =~ $START_UPPER_RE;
213	148	100				732	return 2 if $word =~ $END_UPPER_RE;
214	127	100				838	return 2 if $word =~ $ALL_NOT_LOWER_RE;
215
216							# otherwise calculate the number of ways to capitalize U+L
217							# uppercase+lowercase letters with U uppercase letters or
218							# less. or, if there's more uppercase than lower (for
219							# eg. PASSwORD), the number of ways to lowercase U+L letters with
220							# L lowercase letters or less.
221	37					247	my $U = () = $word =~ m/\p{Lu}/g;
222	37					192	my $L = () = $word =~ m/\p{Ll}/g;
223
224	37					83	my $variations = 0;
225	37					287	$variations += nCk($U+$L,$_) for 1..min($U,$L);
226	37					141	return $variations;
227							}
228
229							sub _l33t_variations {
230	4179			4179		8784	my ($self) = @_;
231
232	4179					9070	my $word = $self->token;
233
234	4179					7948	my $variations = 1;
235	4179					7073	for my $subbed (keys %{$self->substitutions}) {
	4179					14787
236	1098					2845	my $unsubbed = $self->substitutions->{$subbed};
237
238							# number of Substituted characters
239	1098					13068	my $S = () = $word =~ m{\Q$subbed}gi;
240							# number of Unsubstituted characters
241	1098					8476	my $U = () = $word =~ m{\Q$unsubbed}gi;
242
243	1098	100	66			5474	if ($S==0 \|\| $U==0) {
244							# for this substitution, password is either fully subbed
245							# (444) or fully unsubbed (aaa); treat that as doubling
246							# the space (attacker needs to try fully subbed chars in
247							# addition to unsubbed.)
248	1054					2634	$variations *= 2;
249							}
250							else {
251							# this case is similar to capitalization: with aa44a, U =
252							# 3, S = 2, attacker needs to try unsubbed + one sub + two
253							# subs
254	44					116	my $possibilities = 0;
255	44					334	$possibilities += nCk($U+$S,$_) for 1..min($U,$S);
256	44					157	$variations *= $possibilities;
257							}
258							}
259
260	4179					12271	return $variations;
261							}
262
263							sub _reversed_variations {
264	4179	100		4179		21028	return shift->reversed ? 2 : 1;
265							}
266
267
268							sub feedback_warning {
269	386			386	1	1232	my ($self, $is_sole_match) = @_;
270
271	386	100				3278	if ($self->dictionary_name eq 'passwords') {
		100
		100
272	107	100	100			925	if ($is_sole_match && !$self->l33t && !$self->reversed) {
		100	100
273	26	50				193	if ($self->rank <= 10) {
		50
274	0					0	return 'This is a top-10 common password';
275							}
276							elsif ($self->rank <= 100) {
277	0					0	return 'This is a top-100 common password';
278							}
279							else {
280	26					172	return 'This is a very common password';
281							}
282							}
283							elsif ($self->guesses_log10 <= 4) {
284	64					1303	return 'This is similar to a commonly used password';
285							}
286							}
287							elsif ($self->dictionary_name =~ /names$/) {
288	131	100				407	if ($is_sole_match) {
289	22					142	return 'Names and surnames by themselves are easy to guess'
290							}
291							else {
292	109					740	return 'Common names and surnames are easy to guess';
293							}
294							}
295							elsif ($is_sole_match) {
296	27					162	return 'A word by itself is easy to guess';
297							}
298
299	138					1086	return undef;
300							}
301
302							sub feedback_suggestions {
303	386			386	1	974	my ($self) = @_;
304
305	386					1229	my $word = $self->token;
306	386					796	my @suggestions;
307
308	386	100				1300	if ($self->does_word_start_upper($word)) {
		100
309	30					128	push @suggestions, q{Capitalization doesn't help very much};
310							}
311							elsif ($self->is_word_all_upper($word)) {
312	7					22	push @suggestions, 'All-uppercase is almost as easy to guess as all-lowercase';
313							}
314
315	386	100	100			1846	if ($self->reversed && length($word) >= 4) {
316	13					50	push @suggestions, q{Reversed words aren't much harder to guess};
317							}
318
319	386	100				1719	if ($self->l33t) {
320	10					54	push @suggestions, q{Predictable substitutions like '@' instead of 'a' don't help very much};
321							}
322
323	386					2605	return \@suggestions;
324							}
325
326
327							around fields_for_json => sub {
328							my ($orig,$self) = @_;
329							( $self->$orig(), qw(dictionary_name reversed rank substitutions) )
330							};
331
332							1;
333
334							__END__
335
336							=pod
337
338							=encoding UTF-8
339
340							=for :stopwords Wiktionary xato
341
342							=head1 NAME
343
344							Data::Password::zxcvbn::Match::Dictionary - match class for words in passwords
345
346							=head1 VERSION
347
348							version 1.1.3
349
350							=head1 DESCRIPTION
351
352							This class represents the guess that a certain substring of a password
353							can be guessed by going through a dictionary.
354
355							=head1 ATTRIBUTES
356
357							=head2 C<reversed>
358
359							Boolean, true if the token appears to be a dictionary word that's been
360							reversed (i.e. last letter first)
361
362							=head2 C<substitutions>
363
364							Hashref representing the characters that need to be substituted to
365							make the token match a dictionary work (e.g. if the token is
366							C<s!mpl3>, this hash would be C<< { '!' => 'i', '3' => 'e' } >>).
367
368							=head2 C<rank>
369
370							Number, indicating how common the dictionary word is. 1 means "most
371							common".
372
373							=head2 C<dictionary_name>
374
375							String, the name of the dictionary that the word was found in. Usually one of:
376
377							=over 4
378
379							=item *
380
381							C<english_wikipedia>
382
383							words extracted from a dump of the English edition of Wikipedia
384
385							=item *
386
387							C<male_names>, C<female_names>, C<surnames>
388
389							common names from the 1990 US census
390
391							=item *
392
393							C<passwords>
394
395							most common passwords, extracted from the "xato" password dump
396
397							=item *
398
399							C<us_tv_and_film>
400
401							words from a 2006 Wiktionary word frequency study over American
402							television and movies
403
404							=back
405
406							=head1 METHODS
407
408							=head2 C<l33t>
409
410							Returns true if the token had any L</substitutions> (i.e. it was
411							written in "l33t-speak")
412
413							=head2 C<make>
414
415							my @matches = @{ Data::Password::zxcvbn::Match::Dictionary->make(
416							$password,
417							{ # these are the defaults
418							ranked_dictionaries => \%Data::Password::zxcvbn::RankedDictionaries::ranked_dictionaries,
419							l33t_table => \%Data::Password::zxcvbn::Match::Dictionary::l33t_table,
420							},
421							) };
422
423							Scans the C<$password> for substrings that match words in the
424							C<ranked_dictionaries>, possibly reversed, possibly with substitutions
425							from the C<l33t_table>.
426
427							The C<ranked_dictionaries> should look like:
428
429							{ some_dictionary_name => { 'word' => 156, 'another' => 13, ... },
430							... }
431
432							(i.e. a hash of dictionaries, each mapping words to their frequency
433							rank) and the C<l33t_table> should look like:
434
435							{ a => [ '4', '@' ], ... }
436
437							(i.e. a hash mapping characters to arrays of other characters)
438
439							=head2 C<estimate_guesses>
440
441							The number of guesses is the product of the rank of the word, how many
442							case combinations match it, how many substitutions were used, doubled
443							if the token is reversed.
444
445							=head2 C<does_word_start_upper>
446
447							=head2 C<does_word_end_upper>
448
449							=head2 C<is_word_all_not_upper>
450
451							=head2 C<is_word_all_not_lower>
452
453							=head2 C<is_word_all_upper>
454
455							if ($self->does_word_start_upper($word)) { ... }
456
457							These are mainly for sub-classes, to use in L<< /C<feedback_warning>
458							>> and L<< /C<feedback_suggestions> >>.
459
460							=head2 C<feedback_warning>
461
462							=head2 C<feedback_suggestions>
463
464							This class suggests not using common words or passwords, especially on
465							their own. It also suggests that capitalisation, "special characters"
466							substitutions, and writing things backwards are not very useful.
467
468							=head2 C<fields_for_json>
469
470							The JSON serialisation for matches of this class will contain C<token
471							i j guesses guesses_log10 dictionary_name reversed rank
472							substitutions>.
473
474							=head1 AUTHOR
475
476							Gianni Ceccarelli <gianni.ceccarelli@broadbean.com>
477
478							=head1 COPYRIGHT AND LICENSE
479
480							This software is copyright (c) 2022 by BroadBean UK, a CareerBuilder Company.
481
482							This is free software; you can redistribute it and/or modify it under
483							the same terms as the Perl 5 programming language system itself.
484
485							=cut