A. Source code
entropies.py
1
#!/usr/bin/env python
2
# -*- coding: iso-8859-1 -*-
3
4
from __future__ import division
5
from math import *
6
7
# Yes, this is a quite ugly way to get an infinity constant, but it
8
# works, and we really get an IEEE 754 floating point infinity
9
# constant. Module fpconst should solve the problem.
10
infty = 1e300000000000000
11
12
def log2(x):
13
return log(x, 2)
14
15
def shannon_entropy(l):
16
"""Return the Shannon entropy of random variable with probability
17
vector l."""
18
return sum([-p*log2(p) for p in l if p > 0])
19
def min_entropy(l):
20
"""Return the min-entropy of random variable with probability
21
vector l."""
22
return -log2(max(l))
23
def entropy(l, alpha=1):
24
"""Return the Rényi entropy of order alpha of random variable with
25
probability vector l."""
26
if abs(alpha - 1) < 10**-10:
27
return shannon_entropy(l)
28
elif alpha == infty:
29
return min_entropy(l)
30
try:
31
# "if p>0" saves us from 0**0 trouble.
32
return log2(sum([p**float(alpha) for p in l if p>0]))/(1-alpha)
33
except (ZeroDivisionError, OverflowError):
34
return min_entropy(l)
35
36
def guessing_entropy(l):
37
"""Return the Shannon entropy of random variable with probability
38
vector l."""
39
tmp = l[:] # Copy the probability vector.
40
tmp.sort()
41
tmp.reverse() # Highest probability first.
42
return sum([p*i for (i,p) in enumerate(l)]) + 1
43
44
def normalize_inplace(l):
45
"Normalize a probability vector in-place."
46
s = sum(l)
47
for i, p in enumerate(l):
48
l[i] = p/s
49
def normalize(l):
50
"Return a normalized probability vector."
51
s = sum(l)
52
return [ p/s for p in l ]
53
54
def Q(p, n):
55
return [p]+[(1-p)/n]*n
hashfunctions.py
1
#!/usr/bin/env python
2
# -*- coding: iso-8859-1 -*-
3
4
from __future__ import division
5
from math import *
6
import Crypto.Util.number as cn
7
8
def log2(x):
9
return log(x, 2)
10
11
def logint(x, base, __cache=[(8, 2), 3]):
12
"Return int(ceil(log(x, base))) without rounding errors."
13
# Rounding error example:
14
#>>> log(2**96, 2**12)
15
#8.0000000000000018
16
if (x, base) == __cache[0]:
17
return __cache[1]
18
c = int(ceil(log(x, base))) # This works most of the time.
19
while x > base**c:
20
c += 1 # If not, this will fix it.
21
while x <= base**(c-1):
22
c -= 1 # Or this.
23
__cache[:] = (x, base), c
24
return c
25
26
def istwopower(x, __cache={}):
27
c = __cache.get(x)
28
if c is not None:
29
return c
30
c = 0L # This must be a long, or 1<<c will lose bits.
31
while x > 1<<c:
32
c += 1
33
if x != 1<<c:
34
return None
35
__cache[x] = c
36
return c
37
38
def int2list(x, xmax=None, s=256):
39
"""Return the integer x as a little-endian list of bytes with s
40
states each. s is the number of states, not the number of bits.
41
If xmax is given, the returned list will be large enough to
42
contain xmax-1."""
43
if xmax is None:
44
xmax = x+1
45
assert 0 <= x < xmax
46
l = [0] * logint(xmax, s)
47
c = istwopower(s)
48
if c is None:
49
for i in xrange(len(l)):
50
l[i] = int(x%s)
51
x = x//s
52
else: # s is exactly 2**c==1<<c so we can optimize somewhat.
53
mask = (1<<c)-1
54
for i in xrange(len(l)):
55
l[i] = int(x&mask)
56
x = x>>c
57
return l
58
59
def list2int(l, s=256):
60
"""Return list interpreted as a little-endian list of bytes with s
61
states each. s is the number of states, not the number of bits."""
62
while len(l) > 1:
63
l[-2] += l[-1]*s
64
del l[-1]
65
return l[0]
66
67
def nextprime(begin=1, __cache=[7,7]):
68
"""Return the lowest prime >= begin."""
69
if begin == __cache[0]:
70
return __cache[1]
71
n = begin
72
if not n%2: n+=1
73
while not cn.isPrime(long(n)):
74
n += 2
75
__cache[:] = begin, n
76
return int(n)
77
78
# The following functions were first described by J. Lawrence Carter
79
# and Mark N. Wegman in their papers "Universal classes of hash
80
# functions" (1979) and "New hash functions and their use in
81
# authentication and set equality" (1981).
82
# All functions try to follow the Wegman-Carter papers as closely as
83
# possible and all quotes are from the papers.
84
85
def H1(a, b, key=None):
86
"""Return a member of the hash family H_1 from Wegman-Carter 1979
87
Inputs:
88
a -- Number of input states
89
b -- Number of output states
90
key -- a tuple (p, m, n):
91
p -- A (non-secret) prime larger than or equal to a
92
q -- Half of the secret key. 0< q
93
r -- Half of the secret key. 0<=r
94
Output:
95
Normal mode:
96
A hash function mapping range(a) to range(b)
97
Parameter mode:
98
If H1 is called with no key, the smallest possible p is
99
returned to ease construction of a key.
100
"""
101
if key is None:
102
return nextprime(a)
103
p, q, r = key
104
assert (a>b) and (p>=a) and (0and (0<=r
105
def g(x): # "A natural choice for g is the residue modulo b."
106
return x % b
107
def h(m):
108
return (q*m+r) % p
109
def f(m):
110
# Docstring is set below
111
assert 0<=m
112
return g(h(m))
113
f.__doc__ = \
114
"""This is a hash function from the hash family H_1 from
115
Wegman-Carter 1979 using the key %s.
116
Inputs:
117
m -- The integer to be hashed in range(%d)
118
Output:
119
A hash value in range(%d)
120
""" % (str(key), a, b)
121
return f
122
123
def Hprime(aprime, bprime, flist=None):
124
"""Return a member of the hash family H' from Wegman-Carter 1980
125
Inputs:
126
aprime -- Number of input bits
127
bprime -- Number of output states
128
flist -- A sequence of secret hash functions
129
Output:
130
Normal mode:
131
A hash function mapping range(2**aprime) to range(2**bprime)
132
Parameter mode:
133
Hprime needs a sequence flist of hash functions from a
134
universal_2 family. The number of functions, input states and
135
output states are dependent on the inner workings of Hprime
136
and need not be exposed to the outside. Instead, if Hprime is
137
called without flist those specifications are returned as a
138
tuple (a, b, len_f):
139
a -- Number of input states of each hash function
140
b -- Number of output states of each hash function
141
len_f -- Number of hash functions in flist
142
"""
143
s = bprime + int(ceil(log2(log2(aprime))))
144
# "Let H be some strongly universal_2 class of functions which map
145
# bit strings of length 2s to ones of length s"
146
a = 2**(2*s)
147
b = 2**( s)
148
# The "or 1" is needed because the length calculation in W-C-80
149
# doesn't account for the extra padding when the message is
150
# smaller than s from the beginning.
151
len_f = int(ceil(log2(ceil(aprime/s)))) or 1
152
if flist is None:
153
return (a, b, len_f)
154
assert len(flist) == len_f
155
def f(substrings, hashfunction):
156
# Apply hashfunction to all substrings and concatenate pairwise
157
for i in xrange(len(substrings)//2):
158
substrings[i:i+2] = [hashfunction(substrings[i ]) +
159
hashfunction(substrings[i+1]) * 2**s]
160
if len(substrings)%2:
161
substrings[-1] = hashfunction(substrings[-1])
162
def fprime(m):
163
# Docstring is set below
164
assert 0 <= m < 2**aprime
165
# "The message is broken into substrings of length 2s."
166
substrings = int2list(m, xmax=2**aprime, s=2**(2*s))
167
for f_i in flist[:-1]:
168
assert len(substrings) > 1
169
f(substrings, f_i)
170
# "This process is repeated using f_2,f_3,... until only one
171
# substring of length s is left."
172
assert len(substrings) == 1
173
substring = flist[-1](substrings[0])
174
assert 0 <= substring < 2**s
175
# "The tag is the low-order b' bits of this substring."
176
return substring % 2**bprime
177
fprime.__doc__ = "This is a hash function from the hash family H' " \
178
"from Wegman-Carter 1980.\n" \
179
" Inputs:\n" \
180
" m -- A %d bit integer to be hashed\n" \
181
" Output:\n" \
182
" A %d bit hash value\n" \
183
% (aprime, bprime)
184
return fprime
185
186
def Hprime_H1(aprime, bprime, key=None):
187
"""Return a member of the hash family H' from Wegman-Carter 1980
188
using the sub-hash family H_1 from Wegman-Carter 1979
189
Inputs:
190
aprime -- Number of input bits
191
bprime -- Number of output bits
192
key -- A secret key
193
Outputs:
194
Normal mode:
195
A hash function mapping range(2**aprime) to range(2**bprime)
196
Parameter mode:
197
If Hprime_H1 is called without a key an integer maxkey is
198
returned. The key should be in range(maxkey).
199
"""
200
# Get key parameters
201
a, b, len_f = Hprime(aprime, bprime)
202
p = H1(a, b)
203
maxkey = ((p-1)*p)**len_f
204
if key is None:
205
return maxkey
206
assert 0 <= key < maxkey
207
flist = []
208
for thiskey in int2list(key, xmax=maxkey, s=(p-1)*p):
209
q, r = divmod(thiskey, p)
210
q += 1
211
flist.append(H1(a, b, (p, q, r)))
212
return Hprime(aprime, bprime, flist)
213
214
def Hprime_H1_compact(aprime, bprime, key=None):
215
"""A more compact implementation of Wegman-Carter 1980 with H1
216
from W-C 1979.
217
The functions above are written to mimic the language of
218
Wegman-Carter as much as possible. Sometimes it might be easier to
219
understand a more compact language. This code should do exactly
220
the same as the one above, but in far less lines and with no error
221
checking. It is approximately three times faster than Hprime_H1().
222
Inputs:
223
aprime -- Number of input bits
224
bprime -- Number of output bits
225
key -- A secret key
226
Outputs:
227
Normal mode:
228
A hash function mapping range(2**aprime) to range(2**bprime)
229
Parameter mode:
230
If Hprime_H1_compact is called without a key an integer maxkey
231
is returned. The key should be in range(maxkey).
232
"""
233
s = bprime + int(ceil(log2(log2(aprime))))
234
p = nextprime(2**(2*s))
235
len_f = int(ceil(log2(ceil(aprime/s)))) or 1
236
maxkey = ((p-1)*p)**len_f
237
if key is None:
238
return maxkey
239
keys = []
240
for thiskey in int2list(key, xmax=maxkey, s=(p-1)*p):
241
q, r = divmod(thiskey, p)
242
q += 1
243
keys.append( (q,r) )
244
def fprime(m):
245
"This is a hash function returned by Hprime_H1_compact()."
246
substrings = int2list(m, xmax=2**aprime, s=2**(2*s))
247
for q,r in keys:
248
for i in xrange(len(substrings)//2):
249
substrings[i:i+2] = [(((q*substrings[i ]+r)%p)%(2**s)) + \
250
(((q*substrings[i+1]+r)%p)%(2**s)) * 2**s]
251
if len(substrings)%2:
252
substrings[-1] = (((q*substrings[ -1]+r)%p)%(2**s))
253
return substrings[0] % 2**bprime
254
return fprime