File Coverage

blib/lib/Data/Password/zxcvbn/Match/Dictionary.pm

Criterion	Covered	Total	%
statement	132	137	96.3
branch	50	52	96.1
condition	18	20	90.0
subroutine	19	22	86.3
pod	10	10	100.0
total	229	241	95.0

line	stmt	bran	cond	sub	pod	time	code
1							package Data::Password::zxcvbn::Match::Dictionary;
2	4			4		10051	use Moo;
	4					14
	4					35
3							with 'Data::Password::zxcvbn::Match';
4	4			4		4929	use Data::Password::zxcvbn::Combinatorics qw(nCk enumerate_substitution_maps);
	4					12
	4					273
5	4			4		32	use List::AllUtils qw(min);
	4					9
	4					4629
6							our $VERSION = '1.1.2'; # VERSION
7							# ABSTRACT: match class for words in passwords
8
9
10							has reversed => (is => 'ro', default => 0); # bool
11							has substitutions => ( is => 'ro', default => sub { +{} } );
12							has rank => ( is => 'ro', default => 1 ); # int
13							# this should be constrained to the keys of %ranked_dictionaries, but
14							# we can't do that because users can pass their own dictionaries to
15							# ->make
16							has dictionary_name => ( is => 'ro', default => 'passwords' );
17
18
19							sub l33t {
20	451			451	1	60692	return scalar(keys %{shift->substitutions})!=0;
	451					2615
21							}
22
23
24							our %l33t_table = ( ## no critic (ProhibitPackageVars)
25							a => ['4', '@'],
26							b => ['8'],
27							c => ['(', '{', '[', '<'],
28							e => ['3'],
29							g => ['6', '9'],
30							i => ['1', '!', '\|'],
31							l => ['1', '\|', '7'],
32							o => ['0'],
33							s => ['$', '5'],
34							t => ['+', '7'],
35							x => ['%'],
36							z => ['2'],
37							);
38
39							sub make {
40	1533			1533	1	98953	my ($class, $password, $opts) = @_;
41							## no critic (ProhibitPackageVars)
42							my $dictionaries = $opts->{ranked_dictionaries}
43	1533		66			8091	\|\| do {
44							require Data::Password::zxcvbn::RankedDictionaries;
45							\%Data::Password::zxcvbn::RankedDictionaries::ranked_dictionaries;
46							};
47	1533		100			9263	my $l33t_table = $opts->{l33t_table} \|\| \%l33t_table;
48
49	1533					3976	my @matches;
50	1533					7295	$class->_make_simple(\@matches,$password,$dictionaries);
51	1533					9556	$class->_make_reversed(\@matches,$password,$dictionaries);
52	1533					10853	$class->_make_l33t(\@matches,$password,$dictionaries, $l33t_table);
53
54	1533					15710	@matches = sort @matches;
55	1533					10503	return \@matches;
56							}
57
58							sub _make_simple {
59	4648			4648		11931	my ($class, $matches, $password, $dictionaries) = @_;
60	4648					9812	my $password_lc = lc($password);
61							# lc may change the length of the password...
62	4648					9014	my $length = length($password_lc);
63
64	4648					8496	for my $dictionary_name (keys %{$dictionaries}) {
	4648					16640
65	27453					49745	my $ranked_dict = $dictionaries->{$dictionary_name};
66	27453					50353	for my $i (0..$length-1) {
67	156745					262934	for my $j ($i..$length-1) {
68	674738					1098536	my $word = substr($password_lc,$i,$j-$i+1);
69	674738	100				1938980	if (my $rank = $ranked_dict->{$word}) {
70	8139					14111	push @{$matches}, $class->new({
	8139					191337
71							token => substr($password,$i,$j-$i+1),
72							i => $i, j=> $j,
73							rank => $rank,
74							dictionary_name => $dictionary_name,
75							});
76							}
77							}
78							}
79							}
80							}
81
82							sub _make_reversed {
83	1533			1533		5458	my ($class, $matches, $password, $dictionaries) = @_;
84
85	1533					5554	my $rev_password = reverse($password);
86	1533					3128	my @rev_matches;
87	1533					6461	$class->_make_simple(\@rev_matches,$rev_password,$dictionaries);
88
89	1533					5399	my $rev_length = length($password)-1;
90	1533					7375	for my $rev_match (@rev_matches) {
91	1553					9099	my $word = $rev_match->token;
92							# no need to add this, the normal matching will have produced
93							# it already
94	1553	100				7438	next if $word eq reverse($word);
95	661					1607	push @{$matches}, $class->new({
	661					17623
96							token => reverse($word),
97							i => $rev_length - $rev_match->j,
98							j=> $rev_length - $rev_match->i,
99							rank => $rev_match->rank,
100							dictionary_name => $rev_match->dictionary_name,
101							reversed => 1,
102							});
103							}
104							}
105
106							# makes a pruned copy of l33t_table that only includes password's
107							# possible substitutions
108							sub _relevant_l33t_subtable {
109	1539			1539		22071	my ($class, $password, $l33t_table) = @_;
110							# set of characters
111	1539					3232	my %password_chars; @password_chars{split //,$password} = ();
	1539					11235
112
113	1539					4769	my %subtable;
114	1539					3606	for my $letter (keys %{$l33t_table}) {
	1539					9244
115	34681					73973	my @relevant_subs = grep { exists $password_chars{$_} }
116	18084					26989	@{$l33t_table->{$letter}};
	18084					38767
117	18084	100				44279	$subtable{$letter} = \@relevant_subs
118							if @relevant_subs;
119							}
120
121	1539					12429	return \%subtable;
122							}
123
124							sub _translate {
125	1582			1582		4973	my ($class, $string, $table) = @_;
126	1582					3102	my $keys = join '', keys %{$table};
	1582					5283
127	1582					41200	$string =~ s{([\Q$keys\E])}
128	1582					7082	{$table->{$1}}g;
129							return $string;
130							}
131
132	1533			1533		5555	sub _make_l33t {
133							my ($class, $matches, $password, $dictionaries, $l33t_table) = @_;
134	1533					5668
135							my $subs = enumerate_substitution_maps(
136							$class->_relevant_l33t_subtable($password,$l33t_table)
137	1533					5920	);
	1533					4879
138	2246	100				8551	for my $sub (@{$subs}) {
	2246					7897
139	1582					6240	next unless %{$sub};
140	1582					3355	my $subbed_password = $class->_translate($password,$sub);
141	1582					5940	my @subbed_matches;
142							$class->_make_simple(\@subbed_matches,$subbed_password,$dictionaries);
143	1582					6453
144	3651					24611	for my $subbed_match (@subbed_matches) {
145							my $token = substr($password,
146							$subbed_match->i,
147							$subbed_match->j - $subbed_match->i + 1);
148	3651	100				11422	# too short, ignore
149							next if length($token) <= 1;
150	1930	100				7759	# only return the matches that contain an actual substitution
151							next if lc($token) eq lc($subbed_match->token);
152							# subset of mappings in $sub that are in use for this match
153							my %min_subs = map {
154	2164	100				20416	$token =~ m{\Q$_}
155							? ( $_ => $sub->{$_} )
156	589					1388	: ()
	589					2008
157	589					1778	} keys %{$sub};
	589					15472
158							push @{$matches}, $class->new({
159							token => $token,
160							substitutions => \%min_subs,
161							i => $subbed_match->i,
162							j=> $subbed_match->j,
163							rank => $subbed_match->rank,
164							dictionary_name => $subbed_match->dictionary_name,
165							});
166							}
167							}
168							}
169
170
171	4179			4179	1	63436	sub estimate_guesses {
172							my ($self,$min_guesses) = @_;
173	4179					16080
174							return $self->rank *
175							$self->_uppercase_variations *
176							$self->_l33t_variations *
177							$self->_reversed_variations;
178							}
179
180
181							# an uppercase letter, followed by stuff that is not uppercase
182	3			3		1829	# letters
	3					47
	3					42
183							my $START_UPPER_RE = qr{\A \p{Lu} \P{Lu}+ \z}x;
184							# stuff that is not uppercase letters, followed by an uppercase
185							# letter
186							my $END_UPPER_RE = qr{\A \P{Lu}+ \p{Lu} \z}x;
187							# no characters that are not uppercase letters
188							my $ALL_NOT_UPPER_RE = qr{\A \P{Lu}+ \z}x;
189							# no characters that are not lowercase letters
190							my $ALL_NOT_LOWER_RE = qr{\A \P{Ll}+ \z}x;
191
192	386			386	1	4430
193	0			0	1	0	sub does_word_start_upper { return $_[1] =~ $START_UPPER_RE }
194	0			0	1	0	sub does_word_end_upper { return $_[1] =~ $END_UPPER_RE }
195	0			0	1	0	sub is_word_all_not_upper { return $_[1] =~ $ALL_NOT_UPPER_RE }
196	356		100	356	1	3564	sub is_word_all_not_lower { return $_[1] =~ $ALL_NOT_LOWER_RE }
197							sub is_word_all_upper { return $_[1] =~ $ALL_NOT_LOWER_RE && $_[1] ne lc($_[1]) }
198
199	4179			4179		8505	sub _uppercase_variations {
200							my ($self) = @_;
201	4179					9749
202							my $word = $self->token;
203
204	4179	100				30188	# if the word has no uppercase letters, count it as 1 variation
205	270	100				1073	return 1 if $word =~ $ALL_NOT_UPPER_RE;
206							return 1 if lc($word) eq $word;
207
208							# a capitalized word is the most common capitalization scheme, so
209							# it only doubles the search space (uncapitalized + capitalized).
210							# allcaps and end-capitalized are common enough too, underestimate
211	268	100				1632	# as 2x factor to be safe.
212	148	100				821	return 2 if $word =~ $START_UPPER_RE;
213	127	100				826	return 2 if $word =~ $END_UPPER_RE;
214							return 2 if $word =~ $ALL_NOT_LOWER_RE;
215
216							# otherwise calculate the number of ways to capitalize U+L
217							# uppercase+lowercase letters with U uppercase letters or
218							# less. or, if there's more uppercase than lower (for
219							# eg. PASSwORD), the number of ways to lowercase U+L letters with
220	37					198	# L lowercase letters or less.
221	37					161	my $U = () = $word =~ m/\p{Lu}/g;
222							my $L = () = $word =~ m/\p{Ll}/g;
223	37					88
224	37					243	my $variations = 0;
225	37					137	$variations += nCk($U+$L,$_) for 1..min($U,$L);
226							return $variations;
227							}
228
229	4179			4179		10325	sub _l33t_variations {
230							my ($self) = @_;
231	4179					9931
232							my $word = $self->token;
233	4179					7587
234	4179					7232	my $variations = 1;
	4179					16445
235	1098					3292	for my $subbed (keys %{$self->substitutions}) {
236							my $unsubbed = $self->substitutions->{$subbed};
237
238	1098					10603	# number of Substituted characters
239							my $S = () = $word =~ m{\Q$subbed}gi;
240	1098					7425	# number of Unsubstituted characters
241							my $U = () = $word =~ m{\Q$unsubbed}gi;
242	1098	100	66			4809
243							if ($S==0 \|\| $U==0) {
244							# for this substitution, password is either fully subbed
245							# (444) or fully unsubbed (aaa); treat that as doubling
246							# the space (attacker needs to try fully subbed chars in
247	1054					3239	# addition to unsubbed.)
248							$variations *= 2;
249							}
250							else {
251							# this case is similar to capitalization: with aa44a, U =
252							# 3, S = 2, attacker needs to try unsubbed + one sub + two
253	44					135	# subs
254	44					305	my $possibilities = 0;
255	44					133	$possibilities += nCk($U+$S,$_) for 1..min($U,$S);
256							$variations *= $possibilities;
257							}
258							}
259	4179					13522
260							return $variations;
261							}
262
263	4179	100		4179		22109	sub _reversed_variations {
264							return shift->reversed ? 2 : 1;
265							}
266
267
268	386			386	1	1565	sub feedback_warning {
269							my ($self, $is_sole_match) = @_;
270	386	100				3422
		100
		100
271	107	100	100			1051	if ($self->dictionary_name eq 'passwords') {
		100	100
272	26	50				171	if ($is_sole_match && !$self->l33t && !$self->reversed) {
		50
273	0					0	if ($self->rank <= 10) {
274							return 'This is a top-10 common password';
275							}
276	0					0	elsif ($self->rank <= 100) {
277							return 'This is a top-100 common password';
278							}
279	26					166	else {
280							return 'This is a very common password';
281							}
282							}
283	64					1757	elsif ($self->guesses_log10 <= 4) {
284							return 'This is similar to a commonly used password';
285							}
286							}
287	131	100				364	elsif ($self->dictionary_name =~ /names$/) {
288	22					134	if ($is_sole_match) {
289							return 'Names and surnames by themselves are easy to guess'
290							}
291	109					769	else {
292							return 'Common names and surnames are easy to guess';
293							}
294							}
295	27					173	elsif ($is_sole_match) {
296							return 'A word by itself is easy to guess';
297							}
298	138					1191
299							return undef;
300							}
301
302	386			386	1	1374	sub feedback_suggestions {
303							my ($self) = @_;
304	386					1295
305	386					824	my $word = $self->token;
306							my @suggestions;
307	386	100				1829
		100
308	30					155	if ($self->does_word_start_upper($word)) {
309							push @suggestions, q{Capitalization doesn't help very much};
310							}
311	7					31	elsif ($self->is_word_all_upper($word)) {
312							push @suggestions, 'All-uppercase is almost as easy to guess as all-lowercase';
313							}
314	386	100	100			2157
315	13					48	if ($self->reversed && length($word) >= 4) {
316							push @suggestions, q{Reversed words aren't much harder to guess};
317							}
318	386	100				1408
319	10					39	if ($self->l33t) {
320							push @suggestions, q{Predictable substitutions like '@' instead of 'a' don't help very much};
321							}
322	386					1991
323							return \@suggestions;
324							}
325
326
327							around fields_for_json => sub {
328							my ($orig,$self) = @_;
329							( $self->$orig(), qw(dictionary_name reversed rank substitutions) )
330							};
331
332							1;
333
334							__END__
335
336							=pod
337
338							=encoding UTF-8
339
340							=for :stopwords Wiktionary xato
341
342							=head1 NAME
343
344							Data::Password::zxcvbn::Match::Dictionary - match class for words in passwords
345
346							=head1 VERSION
347
348							version 1.1.2
349
350							=head1 DESCRIPTION
351
352							This class represents the guess that a certain substring of a password
353							can be guessed by going through a dictionary.
354
355							=head1 ATTRIBUTES
356
357							=head2 C<reversed>
358
359							Boolean, true if the token appears to be a dictionary word that's been
360							reversed (i.e. last letter first)
361
362							=head2 C<substitutions>
363
364							Hashref representing the characters that need to be substituted to
365							make the token match a dictionary work (e.g. if the token is
366							C<s!mpl3>, this hash would be C<< { '!' => 'i', '3' => 'e' } >>).
367
368							=head2 C<rank>
369
370							Number, indicating how common the dictionary word is. 1 means "most
371							common".
372
373							=head2 C<dictionary_name>
374
375							String, the name of the dictionary that the word was found in. Usually one of:
376
377							=over 4
378
379							=item *
380
381							C<english_wikipedia>
382
383							words extracted from a dump of the English edition of Wikipedia
384
385							=item *
386
387							C<male_names>, C<female_names>, C<surnames>
388
389							common names from the 1990 US census
390
391							=item *
392
393							C<passwords>
394
395							most common passwords, extracted from the "xato" password dump
396
397							=item *
398
399							C<us_tv_and_film>
400
401							words from a 2006 Wiktionary word frequency study over American
402							television and movies
403
404							=back
405
406							=head1 METHODS
407
408							=head2 C<l33t>
409
410							Returns true if the token had any L</substitutions> (i.e. it was
411							written in "l33t-speak")
412
413							=head2 C<make>
414
415							my @matches = @{ Data::Password::zxcvbn::Match::Dictionary->make(
416							$password,
417							{ # these are the defaults
418							ranked_dictionaries => \%Data::Password::zxcvbn::RankedDictionaries::ranked_dictionaries,
419							l33t_table => \%Data::Password::zxcvbn::Match::Dictionary::l33t_table,
420							},
421							) };
422
423							Scans the C<$password> for substrings that match words in the
424							C<ranked_dictionaries>, possibly reversed, possibly with substitutions
425							from the C<l33t_table>.
426
427							The C<ranked_dictionaries> should look like:
428
429							{ some_dictionary_name => { 'word' => 156, 'another' => 13, ... },
430							... }
431
432							(i.e. a hash of dictionaries, each mapping words to their frequency
433							rank) and the C<l33t_table> should look like:
434
435							{ a => [ '4', '@' ], ... }
436
437							(i.e. a hash mapping characters to arrays of other characters)
438
439							=head2 C<estimate_guesses>
440
441							The number of guesses is the product of the rank of the word, how many
442							case combinations match it, how many substitutions were used, doubled
443							if the token is reversed.
444
445							=head2 C<does_word_start_upper>
446
447							=head2 C<does_word_end_upper>
448
449							=head2 C<is_word_all_not_upper>
450
451							=head2 C<is_word_all_not_lower>
452
453							=head2 C<is_word_all_upper>
454
455							if ($self->does_word_start_upper($word)) { ... }
456
457							These are mainly for sub-classes, to use in L<< /C<feedback_warning>
458							>> and L<< /C<feedback_suggestions> >>.
459
460							=head2 C<feedback_warning>
461
462							=head2 C<feedback_suggestions>
463
464							This class suggests not using common words or passwords, especially on
465							their own. It also suggests that capitalisation, "special characters"
466							substitutions, and writing things backwards are not very useful.
467
468							=head2 C<fields_for_json>
469
470							The JSON serialisation for matches of this class will contain C<token
471							i j guesses guesses_log10 dictionary_name reversed rank
472							substitutions>.
473
474							=head1 AUTHOR
475
476							Gianni Ceccarelli <gianni.ceccarelli@broadbean.com>
477
478							=head1 COPYRIGHT AND LICENSE
479
480							This software is copyright (c) 2022 by BroadBean UK, a CareerBuilder Company.
481
482							This is free software; you can redistribute it and/or modify it under
483							the same terms as the Perl 5 programming language system itself.
484
485							=cut