sbk.kdf

src/sbk/kdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
# This file is part of the sbk project
# https://github.com/mbarkhau/sbk
#
# Copyright (c) 2019-2021 Manuel Barkhau (mbarkhau@gmail.com) - MIT License
# SPDX-License-Identifier: MIT
"""KDF parameter encoding and key derivation."""

import math
import time
import typing as typ
import importlib
import threading

NumThreads = int
MebiBytes  = int
Iterations = int
Seconds    = int

# types for progress bar
Increment             = float
ProgressCallback      = typ.Callable[[Increment], None]
MaybeProgressCallback = typ.Optional[ProgressCallback]


# NOTE (mb 2021-05-29): Since we feed the hash output back into the
#   following iteration (to implement the progress bar), the HASH_LEN
#   is chosen to be much larger than the original input, hopefully
#   this makes loss of entropy between iterations negligable.
#   Feedback welcome.
HASH_LEN = 128

# We are looking for an equation of the form
#
#   f(n) = ⌊o + s * b ** n⌋
#
# such that f(0) = 1 and f(1) = 2 for any given b
#
# n: [0..63]  (int)
# b: base   (chosen)
# s: scale  (unknown)
# o: offset (1 - s)
#
# Knowing that we can chose o = (1 - s), so that
# f(0) = 1. We can work with g(n) = s * b ** n,
# where for n = 0 it must hold that
#
#   g(0) + 1 = g(1)         # lem 1
#
# Given that
#
#  g(0) = s * b ** 0
#  g(0) = s * 1
#  g(0) = s                 # lem 2
#
#  g(1) = s * b ** 1
#  g(1) = s * b
#
#  g(0) + 1 = s + 1         # +1 to lem 2
#  s + 1 = s * b            # substitute g(1) given lem 1
#
# try to isolate s
#
#       s + 1 = s * b
#           1 = s * b - s             # - s
#           1 = s * (b - 1)           # factor out s
# 1 / (b - 1) = s                     # / (b - 1)
#
# if we chose b = 2     , then s =  1 and o = 0
# if we chose b = 1.5   , then s =  2 and o = -1
# if we chose b = 1.25  , then s =  4 and o = -3
# if we chose b = 1.125 , then s =  8 and o = -7
# if we chose b = 1.0625, then s = 16 and o = -15


def curve_params(base: float) -> typ.Tuple[float, float]:
    s = 1 / (base - 1)
    o = 1 - s
    return (s, o)


P_BASE = 1.5
M_BASE = 1.125
T_BASE = 1.125

MIN_P = 1
MIN_M = 10
MIN_T = 1


FieldVal = int


def log(raw_val: int, base: float) -> FieldVal:
    s, o = curve_params(base)
    return math.floor(math.log((raw_val - o) / s) / math.log(base))


def exp(field_val: FieldVal, base: float) -> int:
    s, o = curve_params(base)
    return math.ceil(o + s * base ** field_val)


def _clamp(val: int, lo: int, hi: int) -> int:
    return max(lo, min(hi, val))


class KDFParams(typ.NamedTuple):

    p_raw: NumThreads
    m_raw: MebiBytes
    t_raw: Iterations

    @property
    def _field_values(
        self,
    ) -> typ.Tuple[int, int, int]:
        f_p = _clamp(val=log(self.p_raw // MIN_P, base=P_BASE), lo=0, hi=2 ** 4 - 1)
        f_m = _clamp(val=log(self.m_raw // MIN_M, base=M_BASE), lo=0, hi=2 ** 6 - 1)
        f_t = _clamp(val=log(self.t_raw // MIN_T, base=T_BASE), lo=0, hi=2 ** 6 - 1)

        assert 0 <= f_p < 2 ** 4, f"f_p={f_p}"
        assert 0 <= f_m < 2 ** 6, f"f_m={f_m}"
        assert 0 <= f_t < 2 ** 6, f"f_t={f_t}"

        return (f_p, f_m, f_t)

    def encode(self) -> int:
        """Convert raw values to serializable representation.

        The resulting integer can be encoded as a 16bit unsigned integer.
        """
        f_p, f_m, f_t = self._field_values
        fields = 0
        fields |= f_p << 12
        fields |= f_m << 6
        fields |= f_t << 0

        assert 0 <= fields < 2 ** 16
        return fields

    @staticmethod
    def decode(fields: int) -> 'KDFParams':
        if 0 <= fields < 2 ** 16:
            f_p = (fields >> 12) & 0xF
            f_m = (fields >>  6) & 0x3F
            f_t = (fields >>  0) & 0x3F

            p = exp(f_p, base=P_BASE) * MIN_P
            m = exp(f_m, base=M_BASE) * MIN_M
            t = exp(f_t, base=T_BASE) * MIN_T
            return KDFParams(p, m, t)
        else:
            errmsg = f"Invalid fields, out of bounds: {fields}"
            raise AssertionError(errmsg)

    def _verify_encoding(self) -> None:
        """Validator for serialization.

        Helper to make sure  we always use KDFParams with values
        that can be serialized correctly. This should not be
        needed if we always use init_kdf_params.
        """
        other = KDFParams.decode(self.encode())
        if self != other:
            errmsg = f"{self} != {other}"
            raise AssertionError(errmsg)

    @property
    def p(self) -> NumThreads:
        self._verify_encoding()
        return self.p_raw

    @property
    def m(self) -> NumThreads:
        self._verify_encoding()
        return self.m_raw

    @property
    def t(self) -> NumThreads:
        self._verify_encoding()
        return self.t_raw

    def _replace_any(
        self,
        p: typ.Optional[int] = None,
        m: typ.Optional[int] = None,
        t: typ.Optional[int] = None,
    ) -> 'KDFParams':
        updated = self

        if p:
            updated = updated._replace(p_raw=p)
        if m:
            updated = updated._replace(m_raw=m)
        if t:
            updated = updated._replace(t_raw=t)

        return init_kdf_params(p=updated.p_raw, m=updated.m_raw, t=updated.t_raw)

    def __repr__(self) -> str:
        return f"KDFParams(p={self.p_raw}, m={self.m_raw}, t={self.t_raw})"


def init_kdf_params(p: NumThreads, m: MebiBytes, t: Iterations) -> KDFParams:
    # NOTE mb: It's important to ALWAYS and ONLY use kdf parameters that have gone through
    #   this function so we always do the kdf parameter normalization.
    #
    # Only certain parameter values can be serialized. Everything goes through this
    # constructor to make sure we only use valid values.
    tmp = KDFParams(p, m, t)
    return KDFParams.decode(tmp.encode())


def _hash_pyargon2(
    data: bytes,
    p   : NumThreads,
    m   : MebiBytes,
    t   : Iterations,
) -> bytes:
    # NOTE: only used for testing/validation
    pyargon2 = importlib.import_module('pyargon2')

    result = pyargon2.hash(  # type: ignore
        password=data,
        salt=data,
        encoding='raw',
        hash_len=HASH_LEN,
        parallelism=p,
        memory_cost=m * 1024,
        time_cost=t,
        variant='id',
        version=19,
    )
    assert isinstance(result, bytes)
    return result


def _hash_argon2_cffi(
    data: bytes,
    p   : NumThreads,
    m   : MebiBytes,
    t   : Iterations,
) -> bytes:
    import argon2

    version = argon2.low_level.ARGON2_VERSION
    assert version == 19, version

    result = argon2.low_level.hash_secret_raw(
        secret=data,
        salt=data,
        hash_len=HASH_LEN,
        parallelism=p,
        memory_cost=m * 1024,
        time_cost=t,
        type=argon2.low_level.Type.ID,
        version=version,
    )
    assert isinstance(result, bytes)
    return result


_hash = _hash_argon2_cffi


DIGEST_STEPS = 10


class ProgressSmoother:

    increments: typ.List[float]

    def __init__(self, progress_cb: ProgressCallback) -> None:
        self.increments = [0]

        def fake_progress() -> None:
            step_duration = 0.1
            tzero         = time.time()
            while True:
                time.sleep(step_duration)
                if self.total_incr() == 0:
                    progress_cb(0.01)
                elif self.total_incr() >= 100:
                    progress_cb(100)
                    return
                else:
                    duration      = time.time() - tzero
                    steps         = duration / step_duration
                    incr_per_step = self.total_incr() / steps
                    progress_cb(incr_per_step)

        self._thread = threading.Thread(target=fake_progress)
        self._thread.start()

    def total_incr(self) -> float:
        return sum(self.increments) + max(self.increments) * 0.55

    def progress_cb(self, incr: Increment) -> None:
        self.increments.append(incr)

    def join(self) -> None:
        self._thread.join()


def digest(
    data       : bytes,
    kdf_params : KDFParams,
    hash_len   : int,
    progress_cb: MaybeProgressCallback = None,
) -> bytes:
    _ps           : typ.Optional[ProgressSmoother]
    if progress_cb:
        _ps = ProgressSmoother(progress_cb)
    else:
        _ps = None

    remaining_iters   = kdf_params.t
    remaining_steps   = min(remaining_iters, DIGEST_STEPS)
    progress_per_iter = 100 / kdf_params.t

    constant_kwargs = {
        'p': kdf_params.p,
        'm': kdf_params.m,
    }
    result = data

    while remaining_iters > 0:
        step_iters = max(1, round(remaining_iters / remaining_steps))
        result     = _hash(result, t=step_iters, **constant_kwargs)

        if _ps:
            _ps.progress_cb(step_iters * progress_per_iter)

        remaining_iters -= step_iters
        remaining_steps -= 1

    assert remaining_iters == 0, remaining_iters
    assert remaining_steps == 0, remaining_steps

    if _ps:
        _ps.join()

    return result[:hash_len]


MEASUREMENT_SIGNIFICANCE_THRESHOLD = 2


def kdf_params_for_duration(
    baseline_kdf_params : KDFParams,
    target_duration     : Seconds,
    max_measurement_time: Seconds = 5,
) -> KDFParams:
    test_kdf_params = baseline_kdf_params._replace_any(t=1)
    constant_kwargs = {
        # we only vary t, the baseline should be chosen to max out the others
        'p': test_kdf_params.p,
        'm': test_kdf_params.m,
    }

    tgt_step_duration = target_duration / DIGEST_STEPS
    total_time        = 0.0

    while True:
        tzero = time.time()
        _hash(b"\x00\x00\x00\x00\x00\x00\x00\x00\x00", t=test_kdf_params.t, **constant_kwargs)
        duration = time.time() - tzero
        total_time += duration

        iters_per_sec = test_kdf_params.t / duration
        step_iters    = tgt_step_duration * iters_per_sec * 1.25

        # t = test_kdf_params.t
        # print(f"< {duration:4.3f} t: {t} i/s: {iters_per_sec} tgt: {step_iters}")
        is_tgt_exceeded            = duration   > tgt_step_duration
        is_measurement_significant = duration   > MEASUREMENT_SIGNIFICANCE_THRESHOLD
        is_enough_already          = total_time > max_measurement_time
        if is_tgt_exceeded or is_measurement_significant or is_enough_already:
            new_t = round(step_iters * DIGEST_STEPS)
            return test_kdf_params._replace_any(t=new_t)
        else:
            # min_iters is used to make sure we're always measuring with a higher value for t
            min_iters       = math.ceil(test_kdf_params.t * 1.25)
            min_t           = round(1.25 * MEASUREMENT_SIGNIFICANCE_THRESHOLD * iters_per_sec)
            new_t           = max(min_iters, min_t)
            test_kdf_params = test_kdf_params._replace_any(t=new_t)


def debug_params() -> None:
    max_p = exp(2 ** 4 - 1, base=P_BASE)
    max_m = exp(2 ** 6 - 1, base=M_BASE) * 10
    max_t = exp(2 ** 6 - 1, base=T_BASE)

    print(f"       {max_p=:>12}     {max_m=:>12}     {max_t=:>12}")

    for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 15, 16, 30, 31, 32, 62, 63, 64]:
        p = exp(i, base=P_BASE) * MIN_P
        m = exp(i, base=M_BASE) * MIN_M
        t = exp(i, base=T_BASE) * MIN_T

        p = min(p, max_p)

        kdf_params = init_kdf_params(p=p, m=m, t=t)
        print(f"{i:>2}"                     , end=" ")
        print(f"p: {kdf_params.p:>9} {p:>9}", end=" ")
        print(f"m: {kdf_params.m:>9} {m:>9}", end=" ")
        print(f"t: {kdf_params.t:>9} {t:>9}")


if __name__ == '__main__':
    debug_params()