1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
|
# This file is part of the SBK project
# https://github.com/mbarkhau/sbk
#
# Copyright (c) 2019-2021 Manuel Barkhau (mbarkhau@gmail.com) - MIT License
# SPDX-License-Identifier: MIT
"""Wordlists for SBK."""
import os
import struct
import typing as typ
import pylev
from . import enc_util
WORDLIST_STR = """
abacus abraham academy acrobat admiral albino alcohol aquarium
atlantic attorney augustus avocado bazooka beehive beirut benjamin
bible bicycle bismarck blanket boeing bohemia bolivia bridge
broccoli brussels budapest buffalo button cabbage caesar captain
carolina caucasus cherry church cinnamon claudius coconut coffee
computer cookie coupon cowboy crystal cyprus darwin dentist
deputy detroit diploma doctor dolphin donut dortmund dracula
dublin eagle earpiece edison egypt elephant elvis embassy
ethiopia fairy ferrari firefly flower football france freddie
fujitsu galileo gameboy geisha ghost glasgow google gorilla
gotham gymnast halifax harvard hawaii headset heineken helsinki
hendrix hepburn hitachi hunter hyundai indiana iphone island
jacket jakarta jericho jigsaw joystick judge jukebox julius
kabul kafka kangaroo kashmir keyboard kidney kimono knight
koala kodak kolkata kosovo kurosawa laptop latvia lawyer
leather lebanon leibniz lenin library lobster lunatic macbook
mason meatball mechanic medusa mercury messi michigan miller
miyazaki moldova movie mozart muffin muhammad mumbai mushroom
myanmar nagasaki nairobi nanjing napoleon necklace needle netflix
newton normandy obelix onion ontario oregon orwell oxford
package pakistan pancake papaya peanut pelican penguin pepper
peugeot picasso pigeon pilot pistol pizza plumber podium
popcorn potato present printer prophet pumpkin pyramid python
queen rabbit radio renault reporter rhubarb romania ronaldo
rousseau saddam salmon samurai santiago satoshi sausage school
server sheriff siemens simpson sisyphus slippers slovakia socrates
soldier sparrow spider squid sultan sunlight surgeon suzuki
teacup temple tequila texas titanic tobacco toilet tokyo
trinidad trumpet tshirt tunisia turtle tuxedo twitter ukraine
ulysses unesco uruguay vampire victoria violin virginia vivaldi
vladimir volcano voyager waffle walnut warrior wasabi watanabe
webcam whisky wizard xerox yoghurt yokohama zambia zimbabwe
"""
WORDLIST = WORDLIST_STR.split()
WORDSET = set(WORDLIST)
assert len(WORDLIST) == 256
assert len(WORDSET ) == 256
assert sorted(WORDLIST) == WORDLIST
assert all(5 <= len(w) <= 8 for w in WORDLIST)
assert len({w[:3] for w in WORDLIST}) == 256
WORD_INDEXES = {w: i for i, w in enumerate(WORDLIST)}
wordlist_index = WORD_INDEXES.__getitem__
assert wordlist_index("abacus" ) == 0
assert wordlist_index("zimbabwe") == 255
assert wordlist_index(WORDLIST[127]) == 127
PhraseStr = str
def byte2word(data: bytes) -> str:
assert len(data) == 1
word_idx = enc_util.char_at(data, 0)
return WORDLIST[word_idx]
def _bytes2phrase_words(data: bytes) -> typ.Iterable[str]:
for i in range(len(data)):
word_idx = enc_util.char_at(data, i)
word = WORDLIST[word_idx]
yield word.ljust(9)
def bytes2phrase(data: bytes) -> PhraseStr:
"""Encode data as a human readable phrases."""
if len(data) % 2 != 0:
errmsg = f"Invalid len(data), must be multiple of 2, was {len(data)}"
raise ValueError(errmsg)
words = iter(_bytes2phrase_words(data))
word_pairs = []
try:
while True:
word_pair = next(words) + " " + next(words)
word_pairs.append(word_pair)
except StopIteration:
return "\n".join(word_pairs)
def fuzzy_match(word: str) -> str:
def dist_fn(wl_word: str) -> int:
dist = pylev.damerau_levenshtein(word, wl_word)
assert isinstance(dist, int)
return dist
dist, wl_word = min((dist_fn(wl_word), wl_word) for wl_word in WORDLIST)
if dist < 4:
return wl_word
else:
errmsg = f"Unknown word: {word}"
raise ValueError(errmsg, word)
def phrase2words(phrase: PhraseStr) -> typ.Iterable[str]:
for word in phrase.split():
word = word.strip().lower()
if word not in WORDSET:
word = fuzzy_match(word)
yield word
def _phrase2bytes(phrase: PhraseStr) -> typ.Iterable[bytes]:
for word in phrase2words(phrase):
yield struct.pack("B", wordlist_index(word))
def phrase2bytes(phrase: PhraseStr) -> bytes:
"""Decode human readable phrases to bytes."""
return b"".join(_phrase2bytes(phrase))
def main() -> None:
test_data = os.urandom(8)
print(bytes2phrase(test_data))
if __name__ == '__main__':
main()
|