Skip to content

Commit f416148

Browse files
musicsnobjdwolfhub
authored andcommitted
decorator solution for lazy loading frequency_lists library
1 parent 2939b6b commit f416148

File tree

4 files changed

+61
-45
lines changed

4 files changed

+61
-45
lines changed

tests/matching_test.py

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -67,18 +67,6 @@ def test_build_ranked_dict():
6767
}
6868

6969

70-
def test_add_frequency_lists():
71-
matching.add_frequency_lists({
72-
'test_words': ['qidkviflkdoejjfkd', 'sjdshfidssdkdjdhfkl']
73-
})
74-
75-
assert 'test_words' in matching.RANKED_DICTIONARIES
76-
assert matching.RANKED_DICTIONARIES['test_words'] == {
77-
'qidkviflkdoejjfkd': 1,
78-
'sjdshfidssdkdjdhfkl': 2,
79-
}
80-
81-
8270
def test_matching_utils():
8371
chr_map = {
8472
'a': 'A',
@@ -102,7 +90,7 @@ def test_matching_utils():
10290

10391
def test_dictionary_matching():
10492
def dm(pw):
105-
return matching.dictionary_match(pw, test_dicts)
93+
return matching.dictionary_match(pw, _ranked_dictionaries=test_dicts)
10694

10795
test_dicts = {
10896
'd1': {
@@ -196,7 +184,7 @@ def test_reverse_dictionary_matching():
196184
}
197185
}
198186
password = '0123456789'
199-
matches = matching.reverse_dictionary_match(password, test_dicts)
187+
matches = matching.reverse_dictionary_match(password, _ranked_dictionaries=test_dicts)
200188
msg = 'matches against reversed words'
201189
check_matches(msg, matches, 'dictionary', ['123', '456'], [[1, 3], [4, 6]],
202190
{
@@ -236,7 +224,7 @@ def test_l33t_matching():
236224
assert matching.enumerate_l33t_subs(table) == subs, msg
237225

238226
def lm(pw):
239-
return matching.l33t_match(pw, dicts, test_table)
227+
return matching.l33t_match(pw, _ranked_dictionaries=dicts, _l33t_table=test_table)
240228

241229
dicts = {
242230
'words': {

tests/test_compatibility.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,13 @@ def main(argv):
4242
number_of_passwords = len(d)
4343
scores_collision = 0
4444
guesses_collision = 0
45-
refresh_rate = number_of_passwords/100
45+
refresh_rate = number_of_passwords // 100
4646

4747
i = 0
4848
for js_zxcvbn_score in d:
4949
if i%refresh_rate== 0:
5050
update_console_status(i*100/number_of_passwords)
5151
i += 1
52-
5352
py_zxcvbn_scroe = dict()
5453
py_zxcvbn_scroe_full = zxcvbn(js_zxcvbn_score['password'])
5554
py_zxcvbn_scroe["password"] = py_zxcvbn_scroe_full["password"]
@@ -64,15 +63,15 @@ def main(argv):
6463
expected:
6564
%s
6665
results:
67-
%s\033[00m""")%(js_zxcvbn_score, py_zxcvbn_scroe)
66+
%s\033[00m""" % (js_zxcvbn_score, py_zxcvbn_scroe))
6867

6968
if py_zxcvbn_scroe["score"] != js_zxcvbn_score["score"]:
7069
scores_collision += 1
7170

7271
if (guesses_collision or scores_collision):
7372
print ("""\033[91mFailed!
7473
guesses_collision:%d
75-
guesses_score:%d""")%(guesses_collision, scores_collision)
74+
guesses_score:%d""" % (guesses_collision, scores_collision))
7675
else:
7776
print ("\033[92mPassed!")
7877

zxcvbn/__init__.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,7 @@ def zxcvbn(password, user_inputs=None, max_length=72):
2727
arg = str(arg)
2828
sanitized_inputs.append(arg.lower())
2929

30-
ranked_dictionaries = matching.RANKED_DICTIONARIES
31-
ranked_dictionaries['user_inputs'] = matching.build_ranked_dict(sanitized_inputs)
32-
33-
matches = matching.omnimatch(password, ranked_dictionaries)
30+
matches = matching.omnimatch(password, user_inputs=sanitized_inputs)
3431
result = scoring.most_guessable_match_sequence(password, matches)
3532
result['calc_time'] = datetime.now() - start
3633

zxcvbn/matching.py

Lines changed: 54 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,44 @@
11
from zxcvbn import scoring
22
from . import adjacency_graphs
3-
from zxcvbn.frequency_lists import FREQUENCY_LISTS
43
import re
4+
import functools
55

66
from zxcvbn.scoring import most_guessable_match_sequence
77

88

99
def build_ranked_dict(ordered_list):
1010
return {word: idx for idx, word in enumerate(ordered_list, 1)}
1111

12-
RANKED_DICTIONARIES = {}
13-
14-
15-
def add_frequency_lists(frequency_lists_):
16-
for name, lst in frequency_lists_.items():
17-
RANKED_DICTIONARIES[name] = build_ranked_dict(lst)
18-
19-
20-
add_frequency_lists(FREQUENCY_LISTS)
12+
RANKED_DICTIONARIES = None
13+
14+
def get_ranked_dictionaries():
15+
"""
16+
Lazy-load large dictionary data set.
17+
Return global _RANKED_DICTIONARIES, ensuring it is built only once.
18+
"""
19+
global RANKED_DICTIONARIES
20+
21+
if RANKED_DICTIONARIES is None:
22+
# Do the expensive import here only
23+
from zxcvbn.frequency_lists import FREQUENCY_LISTS
24+
25+
# Build the dictionary once
26+
RANKED_DICTIONARIES = {}
27+
for name, lst in FREQUENCY_LISTS.items():
28+
RANKED_DICTIONARIES[name] = build_ranked_dict(lst)
29+
return RANKED_DICTIONARIES
30+
31+
32+
def ensure_ranked_dictionaries(func):
33+
"""Decorator to ensure _ranked_dictionaries argument is always populated."""
34+
@functools.wraps(func)
35+
def wrapper(*args, **kwargs):
36+
# If an explicit _ranked_dictionaries arg was passed, use it.
37+
# Otherwise fetch from the global cache.
38+
if '_ranked_dictionaries' not in kwargs or kwargs['_ranked_dictionaries'] is None:
39+
kwargs['_ranked_dictionaries'] = get_ranked_dictionaries()
40+
return func(*args, **kwargs)
41+
return wrapper
2142

2243
GRAPHS = {
2344
'qwerty': adjacency_graphs.ADJACENCY_GRAPHS['qwerty'],
@@ -75,7 +96,11 @@ def add_frequency_lists(frequency_lists_):
7596

7697

7798
# omnimatch -- perform all matches
78-
def omnimatch(password, _ranked_dictionaries=RANKED_DICTIONARIES):
99+
@ensure_ranked_dictionaries
100+
def omnimatch(password, _ranked_dictionaries=None, user_inputs=[]):
101+
if len(user_inputs):
102+
_ranked_dictionaries['user_inputs'] = build_ranked_dict(user_inputs)
103+
79104
matches = []
80105
for matcher in [
81106
dictionary_match,
@@ -93,7 +118,8 @@ def omnimatch(password, _ranked_dictionaries=RANKED_DICTIONARIES):
93118

94119

95120
# dictionary match (common passwords, english, last names, etc)
96-
def dictionary_match(password, _ranked_dictionaries=RANKED_DICTIONARIES):
121+
@ensure_ranked_dictionaries
122+
def dictionary_match(password, _ranked_dictionaries=None):
97123
matches = []
98124
length = len(password)
99125
password_lower = password.lower()
@@ -117,11 +143,11 @@ def dictionary_match(password, _ranked_dictionaries=RANKED_DICTIONARIES):
117143

118144
return sorted(matches, key=lambda x: (x['i'], x['j']))
119145

120-
146+
@ensure_ranked_dictionaries
121147
def reverse_dictionary_match(password,
122-
_ranked_dictionaries=RANKED_DICTIONARIES):
148+
_ranked_dictionaries=None):
123149
reversed_password = ''.join(reversed(password))
124-
matches = dictionary_match(reversed_password, _ranked_dictionaries)
150+
matches = dictionary_match(reversed_password, _ranked_dictionaries=_ranked_dictionaries)
125151
for match in matches:
126152
match['token'] = ''.join(reversed(match['token']))
127153
match['reversed'] = True
@@ -212,7 +238,8 @@ def translate(string, chr_map):
212238
return ''.join(chars)
213239

214240

215-
def l33t_match(password, _ranked_dictionaries=RANKED_DICTIONARIES,
241+
@ensure_ranked_dictionaries
242+
def l33t_match(password, _ranked_dictionaries=None,
216243
_l33t_table=L33T_TABLE):
217244
matches = []
218245

@@ -222,7 +249,7 @@ def l33t_match(password, _ranked_dictionaries=RANKED_DICTIONARIES,
222249
break
223250

224251
subbed_password = translate(password, sub)
225-
for match in dictionary_match(subbed_password, _ranked_dictionaries):
252+
for match in dictionary_match(subbed_password, _ranked_dictionaries=_ranked_dictionaries):
226253
token = password[match['i']:match['j'] + 1]
227254
if token.lower() == match['matched_word']:
228255
# only return the matches that contain an actual substitution
@@ -247,7 +274,8 @@ def l33t_match(password, _ranked_dictionaries=RANKED_DICTIONARIES,
247274

248275

249276
# repeats (aaa, abcabcabc) and sequences (abcdef)
250-
def repeat_match(password, _ranked_dictionaries=RANKED_DICTIONARIES):
277+
@ensure_ranked_dictionaries
278+
def repeat_match(password, _ranked_dictionaries=None):
251279
matches = []
252280
greedy = re.compile(r'(.+)\1+')
253281
lazy = re.compile(r'(.+?)\1+')
@@ -298,7 +326,8 @@ def repeat_match(password, _ranked_dictionaries=RANKED_DICTIONARIES):
298326
return matches
299327

300328

301-
def spatial_match(password, _graphs=GRAPHS, _ranked_dictionaries=RANKED_DICTIONARIES):
329+
@ensure_ranked_dictionaries
330+
def spatial_match(password, _graphs=GRAPHS, _ranked_dictionaries=None):
302331
matches = []
303332
for graph_name, graph in _graphs.items():
304333
matches.extend(spatial_match_helper(password, graph, graph_name))
@@ -379,7 +408,8 @@ def spatial_match_helper(password, graph, graph_name):
379408
MAX_DELTA = 5
380409

381410

382-
def sequence_match(password, _ranked_dictionaries=RANKED_DICTIONARIES):
411+
@ensure_ranked_dictionaries
412+
def sequence_match(password, _ranked_dictionaries=None):
383413
# Identifies sequences by looking for repeated differences in unicode codepoint.
384414
# this allows skipping, such as 9753, and also matches some extended unicode sequences
385415
# such as Greek and Cyrillic alphabets.
@@ -440,7 +470,8 @@ def update(i, j, delta):
440470
return result
441471

442472

443-
def regex_match(password, _regexen=REGEXEN, _ranked_dictionaries=RANKED_DICTIONARIES):
473+
@ensure_ranked_dictionaries
474+
def regex_match(password, _regexen=REGEXEN, _ranked_dictionaries=None):
444475
matches = []
445476
for name, regex in _regexen.items():
446477
for rx_match in regex.finditer(password):
@@ -456,7 +487,8 @@ def regex_match(password, _regexen=REGEXEN, _ranked_dictionaries=RANKED_DICTIONA
456487
return sorted(matches, key=lambda x: (x['i'], x['j']))
457488

458489

459-
def date_match(password, _ranked_dictionaries=RANKED_DICTIONARIES):
490+
@ensure_ranked_dictionaries
491+
def date_match(password, _ranked_dictionaries=None):
460492
# a "date" is recognized as:
461493
# any 3-tuple that starts or ends with a 2- or 4-digit year,
462494
# with 2 or 0 separator chars (1.1.91 or 1191),

0 commit comments

Comments
 (0)