line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
/* |
2
|
|
|
|
|
|
|
* Copyright 1999 Dr. Brian Gladman |
3
|
|
|
|
|
|
|
* Copyright 2001 Abhijit Menon-Sen |
4
|
|
|
|
|
|
|
*/ |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
/* Twofish is a 128-bit symmetric block cipher with a variable length |
7
|
|
|
|
|
|
|
key, developed by Counterpane Labs. It is unpatented and free for all |
8
|
|
|
|
|
|
|
uses, as described at |
9
|
|
|
|
|
|
|
and . |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
This implementation is based on code by Dr. Brian Gladman, at |
12
|
|
|
|
|
|
|
. |
13
|
|
|
|
|
|
|
Some of his comments are reproduced below: |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
"Copyright in this implementation is held by Dr. B R Gladman but I |
16
|
|
|
|
|
|
|
hereby give permission for its free direct or derivative use subject |
17
|
|
|
|
|
|
|
to ackowledgement of its origin and compliance with any conditions |
18
|
|
|
|
|
|
|
that the originators of the algorithm place on its exploitation. |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
My thanks to Doug Whiting and Niels Ferguson for comments that led to |
21
|
|
|
|
|
|
|
improvements in this implementation." */ |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
#include "twofish.h" |
24
|
|
|
|
|
|
|
#include "tables.h" |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
/* Extract the n'th byte from a 32-bit word */ |
27
|
|
|
|
|
|
|
#define byte(x,n) ((unsigned char)((x) >> (8 * n))) |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
/* 32 bit rotate-left and right macros */ |
30
|
|
|
|
|
|
|
#define ror(x,n) (((x) >> ((int)(n))) | ((x) << (32 - (int)(n)))) |
31
|
|
|
|
|
|
|
#define rol(x,n) (((x) << ((int)(n))) | ((x) >> (32 - (int)(n)))) |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
/* Endian-independent byte -> word conversion */ |
34
|
|
|
|
|
|
|
#define strtonl(s) (uint32_t)(*(s)|*(s+1)<<8|*(s+2)<<16|*(s+3)<<24) |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
#define nltostr(l, s) \ |
37
|
|
|
|
|
|
|
do { \ |
38
|
|
|
|
|
|
|
*(s )=(unsigned char)((l) ); \ |
39
|
|
|
|
|
|
|
*(s+1)=(unsigned char)((l) >> 8); \ |
40
|
|
|
|
|
|
|
*(s+2)=(unsigned char)((l) >> 16); \ |
41
|
|
|
|
|
|
|
*(s+3)=(unsigned char)((l) >> 24); \ |
42
|
|
|
|
|
|
|
} while (0) |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
static uint32_t mds_rem(uint32_t a, uint32_t b); |
45
|
|
|
|
|
|
|
static uint32_t h(int len, const int x, unsigned char *key, int odd); |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
/* The key schedule takes a 128, 192, or 256-bit key, and provides 40 |
48
|
|
|
|
|
|
|
32-bit words of expanded key K0,...,K39 and the 4 key-dependent |
49
|
|
|
|
|
|
|
S-boxes used in the g function. */ |
50
|
|
|
|
|
|
|
|
51
|
20297
|
|
|
|
|
|
struct twofish *twofish_setup(unsigned char *key, int len) |
52
|
|
|
|
|
|
|
{ |
53
|
|
|
|
|
|
|
int i; |
54
|
|
|
|
|
|
|
uint32_t a, b, x; |
55
|
|
|
|
|
|
|
struct twofish *t; |
56
|
|
|
|
|
|
|
unsigned char *s, skey[16]; |
57
|
|
|
|
|
|
|
|
58
|
20297
|
50
|
|
|
|
|
if ((t = malloc(sizeof(struct twofish))) == NULL) |
59
|
0
|
|
|
|
|
|
return NULL; |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
/* The key consists of k=len/8 (2, 3 or 4) 64-bit units. */ |
62
|
20297
|
|
|
|
|
|
t->len = len /= 8; |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
/* We must derive three vectors Me, Mo, and S, each with k 32-bit |
65
|
|
|
|
|
|
|
words, from the 2k words in the key. |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
Me = (key[0], key[2], ..., key[2k-2]) (even words) |
68
|
|
|
|
|
|
|
Mo = (key[1], key[3], ..., key[2k-1]) (odd words) |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
The third vector is derived by multiplying each of the k groups |
71
|
|
|
|
|
|
|
of 8 bytes from the key by a 4x8 matrix, to get k 32-bit words. |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
S = (S[k-1], S[k-2], ..., S[0]) |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
where S[i] are the 4 bytes from the multiplication, interpreted |
76
|
|
|
|
|
|
|
as a 32-bit word. As described later, mds_rem is equivalent to |
77
|
|
|
|
|
|
|
the matrix multiplication, but faster. |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
Since all these vectors are going to be used byte-by-byte, we |
80
|
|
|
|
|
|
|
avoid converting them to words altogether, and write the bytes of |
81
|
|
|
|
|
|
|
S into the array skey below: */ |
82
|
|
|
|
|
|
|
|
83
|
20297
|
|
|
|
|
|
s = skey + 4*(len - 1); |
84
|
61185
|
100
|
|
|
|
|
for (i = 0; i < len; i++) { |
85
|
40888
|
|
|
|
|
|
x = mds_rem(strtonl(key+8*i), strtonl(key+8*i+4)); |
86
|
40888
|
|
|
|
|
|
nltostr(x, s); |
87
|
40888
|
|
|
|
|
|
s -= 4; |
88
|
|
|
|
|
|
|
} |
89
|
20297
|
|
|
|
|
|
s = skey; |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
/* The words of the expanded key K are defined using the h function: |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
rho = 2^24 + 2^16 + 2^8 + 2^0 (0x01010101) |
94
|
|
|
|
|
|
|
A[i] = h(2i*rho, Me) |
95
|
|
|
|
|
|
|
B[i] = ROL(h(2(i+1)*rho, Mo), 8) |
96
|
|
|
|
|
|
|
K[2i] = (A[i] + B[i]) mod 2^32 |
97
|
|
|
|
|
|
|
K[2i+1] = ROL((A[i] + 2B[i]) mod 2^32, 9) |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
rho has the property that, for i = 0..255, the word i*rho |
100
|
|
|
|
|
|
|
consists of four equal bytes, each with the value i. The function |
101
|
|
|
|
|
|
|
h is only applied to words of this type, so we only pass it the |
102
|
|
|
|
|
|
|
value of i. |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
We also didn't generate the vectors Me and Mo separately: we pass |
105
|
|
|
|
|
|
|
the entire key, and indicate whether we want the even or odd |
106
|
|
|
|
|
|
|
words to be used. */ |
107
|
|
|
|
|
|
|
|
108
|
426237
|
100
|
|
|
|
|
for (i = 0; i < 40; i += 2) { |
109
|
405940
|
|
|
|
|
|
a = h(len, i, key, 0); |
110
|
405940
|
|
|
|
|
|
b = rol(h(len, i+1, key, 1), 8); |
111
|
|
|
|
|
|
|
|
112
|
405940
|
|
|
|
|
|
t->K[i] = a+b; |
113
|
405940
|
|
|
|
|
|
t->K[i+1] = rol(a+2*b, 9); |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
/* The key-dependent S-boxes used in the g() function are created |
117
|
|
|
|
|
|
|
below. They are defined by g(X) = h(X, S), where S is the vector |
118
|
|
|
|
|
|
|
derived from the key. That is, for i=0..3, the S-box S[i] is |
119
|
|
|
|
|
|
|
formed by mapping from x[i] to y[i] in the h function. |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
The relevant lookup tables qN have been precomputed and stored in |
122
|
|
|
|
|
|
|
tables.h; we also perform full key precomputations incorporating |
123
|
|
|
|
|
|
|
the MDS matrix multiplications. */ |
124
|
|
|
|
|
|
|
|
125
|
20297
|
|
|
|
|
|
switch (len) { |
126
|
|
|
|
|
|
|
case 2: |
127
|
5165957
|
100
|
|
|
|
|
for (i = 0; i < 256; i++) { |
128
|
5145856
|
|
|
|
|
|
x = (unsigned char)i; |
129
|
5145856
|
|
|
|
|
|
t->S[0][i] = m[0][q[0][q[0][x]^s[4]]^s[0]]; |
130
|
5145856
|
|
|
|
|
|
t->S[1][i] = m[1][q[0][q[1][x]^s[5]]^s[1]]; |
131
|
5145856
|
|
|
|
|
|
t->S[2][i] = m[2][q[1][q[0][x]^s[6]]^s[2]]; |
132
|
5145856
|
|
|
|
|
|
t->S[3][i] = m[3][q[1][q[1][x]^s[7]]^s[3]]; |
133
|
|
|
|
|
|
|
} |
134
|
20101
|
|
|
|
|
|
break; |
135
|
|
|
|
|
|
|
case 3: |
136
|
25186
|
100
|
|
|
|
|
for (i = 0; i < 256; i++) { |
137
|
25088
|
|
|
|
|
|
x = (unsigned char)i; |
138
|
25088
|
|
|
|
|
|
t->S[0][i] = m[0][q[0][q[0][q[1][x]^s[ 8]]^s[4]]^s[0]]; |
139
|
25088
|
|
|
|
|
|
t->S[1][i] = m[1][q[0][q[1][q[1][x]^s[ 9]]^s[5]]^s[1]]; |
140
|
25088
|
|
|
|
|
|
t->S[2][i] = m[2][q[1][q[0][q[0][x]^s[10]]^s[6]]^s[2]]; |
141
|
25088
|
|
|
|
|
|
t->S[3][i] = m[3][q[1][q[1][q[0][x]^s[11]]^s[7]]^s[3]]; |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
} |
144
|
98
|
|
|
|
|
|
break; |
145
|
|
|
|
|
|
|
case 4: |
146
|
25186
|
100
|
|
|
|
|
for (i = 0; i < 256; i++) { |
147
|
25088
|
|
|
|
|
|
x = (unsigned char)i; |
148
|
25088
|
|
|
|
|
|
t->S[0][i] = m[0][q[0][q[0][q[1][q[1][x]^s[12]]^s[ 8]]^s[4]]^s[0]]; |
149
|
25088
|
|
|
|
|
|
t->S[1][i] = m[1][q[0][q[1][q[1][q[0][x]^s[13]]^s[ 9]]^s[5]]^s[1]]; |
150
|
25088
|
|
|
|
|
|
t->S[2][i] = m[2][q[1][q[0][q[0][q[0][x]^s[14]]^s[10]]^s[6]]^s[2]]; |
151
|
25088
|
|
|
|
|
|
t->S[3][i] = m[3][q[1][q[1][q[0][q[1][x]^s[15]]^s[11]]^s[7]]^s[3]]; |
152
|
|
|
|
|
|
|
} |
153
|
98
|
|
|
|
|
|
break; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
20297
|
|
|
|
|
|
return t; |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
20297
|
|
|
|
|
|
void twofish_free(struct twofish *self) |
160
|
|
|
|
|
|
|
{ |
161
|
20297
|
|
|
|
|
|
free(self); |
162
|
20297
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
/* The function g splits the input word x into four bytes; each byte is |
165
|
|
|
|
|
|
|
run through its own key-dependent S-box. Each S-box is bijective, |
166
|
|
|
|
|
|
|
takes 8 bits of input and produces 8 bits of output. The four results |
167
|
|
|
|
|
|
|
are interpreted as a vector of length 4 over GF(2^8), and multiplied |
168
|
|
|
|
|
|
|
by the 4x4 MDS matrix. The resulting vector is interpreted as a |
169
|
|
|
|
|
|
|
32-bit word. |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
Since we have performed the full key precomputations, g consists only |
172
|
|
|
|
|
|
|
of four lookups and three XORs. g0 is g; g1 is a shortcut for |
173
|
|
|
|
|
|
|
g(ROL(x, 8)). */ |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
#define g0(x) \ |
176
|
|
|
|
|
|
|
t->S[0][byte(x,0)]^t->S[1][byte(x,1)]^t->S[2][byte(x,2)]^t->S[3][byte(x,3)] |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
#define g1(x) \ |
179
|
|
|
|
|
|
|
t->S[0][byte(x,3)]^t->S[1][byte(x,0)]^t->S[2][byte(x,1)]^t->S[3][byte(x,2)] |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
/* F is a key-dependent permutation on 64-bit values. It takes two input |
182
|
|
|
|
|
|
|
words R0 and R1, and a round number r: |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
T0 = g(R0) |
185
|
|
|
|
|
|
|
T1 = g(ROL(R1, 8)) |
186
|
|
|
|
|
|
|
F0 = (T0 + T1 + K[2r+8]) |
187
|
|
|
|
|
|
|
F1 = (T0 + 2*T1 + K[2r+9]) |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
Each of the 16 encryption rounds consists of the following operations: |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
(F0, F1) = F(R0, R1, r) |
192
|
|
|
|
|
|
|
R0 = ROR(R2 ^ F0, 1) |
193
|
|
|
|
|
|
|
R1 = ROL(R3, 1) ^ F1 |
194
|
|
|
|
|
|
|
R2 = R0 |
195
|
|
|
|
|
|
|
R3 = R1 |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
For efficiency, two rounds are combined into one in the macros below. */ |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
#define f_2rounds(i) \ |
200
|
|
|
|
|
|
|
t0 = g0(R[0]); \ |
201
|
|
|
|
|
|
|
t1 = g1(R[1]); \ |
202
|
|
|
|
|
|
|
R[2] = ror(R[2] ^ (t0 + t1 + t->K[4*i+8]), 1); \ |
203
|
|
|
|
|
|
|
R[3] = rol(R[3], 1) ^ (t0 + 2*t1 + t->K[4*i+9]); \ |
204
|
|
|
|
|
|
|
t0 = g0(R[2]); \ |
205
|
|
|
|
|
|
|
t1 = g1(R[3]); \ |
206
|
|
|
|
|
|
|
R[0] = ror(R[0] ^ (t0 + t1 + t->K[4*i+10]), 1); \ |
207
|
|
|
|
|
|
|
R[1] = rol(R[1], 1) ^ (t0 + 2*t1 + t->K[4*i+11]); |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
/* This is the inverse of f_2rounds */ |
210
|
|
|
|
|
|
|
#define i_2rounds(i) \ |
211
|
|
|
|
|
|
|
t0 = g0(R[0]); \ |
212
|
|
|
|
|
|
|
t1 = g1(R[1]); \ |
213
|
|
|
|
|
|
|
R[2] = rol(R[2], 1) ^ (t0 + t1 + t->K[4*i+10]); \ |
214
|
|
|
|
|
|
|
R[3] = ror(R[3] ^ (t0 + 2*t1 + t->K[4*i+11]), 1); \ |
215
|
|
|
|
|
|
|
t0 = g0(R[2]); \ |
216
|
|
|
|
|
|
|
t1 = g1(R[3]); \ |
217
|
|
|
|
|
|
|
R[0] = rol(R[0], 1) ^ (t0 + t1 + t->K[4*i+8]); \ |
218
|
|
|
|
|
|
|
R[1] = ror(R[1] ^ (t0 + 2*t1 + t->K[4*i+9]), 1) |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
/* This function encrypts or decrypts 16 bytes of input data and writes |
221
|
|
|
|
|
|
|
it to output, using the key defined in t. */ |
222
|
|
|
|
|
|
|
|
223
|
40296
|
|
|
|
|
|
void twofish_crypt(struct twofish *t, |
224
|
|
|
|
|
|
|
unsigned char *input, unsigned char *output, |
225
|
|
|
|
|
|
|
int decrypt) |
226
|
|
|
|
|
|
|
{ |
227
|
|
|
|
|
|
|
uint32_t t0, t1, R[4], out[4]; |
228
|
|
|
|
|
|
|
|
229
|
40296
|
100
|
|
|
|
|
if (!decrypt) { |
230
|
|
|
|
|
|
|
/* Whiten four 32-bit input words. */ |
231
|
20148
|
|
|
|
|
|
R[0] = t->K[0] ^ strtonl(input); |
232
|
20148
|
|
|
|
|
|
R[1] = t->K[1] ^ strtonl(input+4); |
233
|
20148
|
|
|
|
|
|
R[2] = t->K[2] ^ strtonl(input+8); |
234
|
20148
|
|
|
|
|
|
R[3] = t->K[3] ^ strtonl(input+12); |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
/* 16 rounds of encryption, combined into 8 pairs. */ |
237
|
20148
|
|
|
|
|
|
f_2rounds(0); f_2rounds(1); f_2rounds(2); f_2rounds(3); |
238
|
20148
|
|
|
|
|
|
f_2rounds(4); f_2rounds(5); f_2rounds(6); f_2rounds(7); |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
/* Output whitening; The order of R[n] undoes the last swap. */ |
241
|
20148
|
|
|
|
|
|
out[0] = t->K[4] ^ R[2]; |
242
|
20148
|
|
|
|
|
|
out[1] = t->K[5] ^ R[3]; |
243
|
20148
|
|
|
|
|
|
out[2] = t->K[6] ^ R[0]; |
244
|
20148
|
|
|
|
|
|
out[3] = t->K[7] ^ R[1]; |
245
|
|
|
|
|
|
|
} else { |
246
|
20148
|
|
|
|
|
|
R[0] = t->K[4] ^ strtonl(input); |
247
|
20148
|
|
|
|
|
|
R[1] = t->K[5] ^ strtonl(input+4); |
248
|
20148
|
|
|
|
|
|
R[2] = t->K[6] ^ strtonl(input+8); |
249
|
20148
|
|
|
|
|
|
R[3] = t->K[7] ^ strtonl(input+12); |
250
|
|
|
|
|
|
|
|
251
|
20148
|
|
|
|
|
|
i_2rounds(7); i_2rounds(6); i_2rounds(5); i_2rounds(4); |
252
|
20148
|
|
|
|
|
|
i_2rounds(3); i_2rounds(2); i_2rounds(1); i_2rounds(0); |
253
|
|
|
|
|
|
|
|
254
|
20148
|
|
|
|
|
|
out[0] = t->K[0] ^ R[2]; |
255
|
20148
|
|
|
|
|
|
out[1] = t->K[1] ^ R[3]; |
256
|
20148
|
|
|
|
|
|
out[2] = t->K[2] ^ R[0]; |
257
|
20148
|
|
|
|
|
|
out[3] = t->K[3] ^ R[1]; |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
/* Write 16 output bytes. */ |
261
|
40296
|
|
|
|
|
|
nltostr(out[0], output); |
262
|
40296
|
|
|
|
|
|
nltostr(out[1], output+4); |
263
|
40296
|
|
|
|
|
|
nltostr(out[2], output+8); |
264
|
40296
|
|
|
|
|
|
nltostr(out[3], output+12); |
265
|
40296
|
|
|
|
|
|
} |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
/* h takes a 32-bit word X, and a list, L = (L[0],...,L[k-1]), of 32-bit |
268
|
|
|
|
|
|
|
words, and produces one word of output. During each of the k stages |
269
|
|
|
|
|
|
|
of the function, the four bytes from X are each passed through a |
270
|
|
|
|
|
|
|
fixed S-box, and XORed with a byte derived from the list. Finally, |
271
|
|
|
|
|
|
|
the bytes are once again passed through an S-box and multiplied by |
272
|
|
|
|
|
|
|
the MDS matrix, just as in g. |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
We use the Lbyte macro to extract a given byte from the list L |
275
|
|
|
|
|
|
|
(expressed in little endian). */ |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
#define Lbyte(w, b) L[4*(2*w+odd)+b] |
278
|
|
|
|
|
|
|
|
279
|
1217820
|
|
|
|
|
|
static uint32_t h(int len, const int X, unsigned char *L, int odd) |
280
|
|
|
|
|
|
|
{ |
281
|
|
|
|
|
|
|
unsigned char b0, b1, b2, b3; |
282
|
|
|
|
|
|
|
|
283
|
1217820
|
|
|
|
|
|
b0 = b1 = b2 = b3 = (unsigned char)X; |
284
|
|
|
|
|
|
|
|
285
|
1217820
|
|
|
|
|
|
switch (len) { |
286
|
|
|
|
|
|
|
case 4: |
287
|
5880
|
|
|
|
|
|
b0 = q[1][b0] ^ Lbyte(3, 0); |
288
|
5880
|
|
|
|
|
|
b1 = q[0][b1] ^ Lbyte(3, 1); |
289
|
5880
|
|
|
|
|
|
b2 = q[0][b2] ^ Lbyte(3, 2); |
290
|
5880
|
|
|
|
|
|
b3 = q[1][b3] ^ Lbyte(3, 3); |
291
|
|
|
|
|
|
|
case 3: |
292
|
11760
|
|
|
|
|
|
b0 = q[1][b0] ^ Lbyte(2, 0); |
293
|
11760
|
|
|
|
|
|
b1 = q[1][b1] ^ Lbyte(2, 1); |
294
|
11760
|
|
|
|
|
|
b2 = q[0][b2] ^ Lbyte(2, 2); |
295
|
11760
|
|
|
|
|
|
b3 = q[0][b3] ^ Lbyte(2, 3); |
296
|
|
|
|
|
|
|
case 2: |
297
|
1217820
|
|
|
|
|
|
b0 = q[0][q[0][b0] ^ Lbyte(1, 0)] ^ Lbyte(0, 0); |
298
|
1217820
|
|
|
|
|
|
b1 = q[0][q[1][b1] ^ Lbyte(1, 1)] ^ Lbyte(0, 1); |
299
|
1217820
|
|
|
|
|
|
b2 = q[1][q[0][b2] ^ Lbyte(1, 2)] ^ Lbyte(0, 2); |
300
|
1217820
|
|
|
|
|
|
b3 = q[1][q[1][b3] ^ Lbyte(1, 3)] ^ Lbyte(0, 3); |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
|
303
|
1217820
|
|
|
|
|
|
return m[0][b0] ^ m[1][b1] ^ m[2][b2] ^ m[3][b3]; |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
/* The (12, 8) Reed Solomon code has the generator polynomial: |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
g(x) = x^4 + (a + 1/a) * x^3 + a * x^2 + (a + 1/a) * x + 1 |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
where the coefficients are in the finite field GF(2^8) with a modular |
311
|
|
|
|
|
|
|
polynomial a^8+a^6+a^3+a^2+1. To generate the remainder, we have to |
312
|
|
|
|
|
|
|
start with a 12th order polynomial with our eight input bytes as the |
313
|
|
|
|
|
|
|
coefficients of the 4th to 11th terms: |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
m[7] * x^11 + m[6] * x^10 ... + m[0] * x^4 + 0 * x^3 +... + 0 |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
We then multiply the generator polynomial by m[7]*x^7 and subtract it |
318
|
|
|
|
|
|
|
(XOR in GF(2^8)) from the above to eliminate the x^7 term (the |
319
|
|
|
|
|
|
|
arithmetic on the coefficients is done in GF(2^8)). We then multiply |
320
|
|
|
|
|
|
|
the generator polynomial by m[6]*x^6 and use this to remove the x^10 |
321
|
|
|
|
|
|
|
term, and so on until the x^4 term is removed, and we are left with: |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
r[3] * x^3 + r[2] * x^2 + r[1] 8 x^1 + r[0] |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
which give the resulting 4 bytes of the remainder. This is equivalent |
326
|
|
|
|
|
|
|
to the matrix multiplication described in the Twofish paper, but is |
327
|
|
|
|
|
|
|
much faster. */ |
328
|
|
|
|
|
|
|
|
329
|
40888
|
|
|
|
|
|
static uint32_t mds_rem(uint32_t a, uint32_t b) |
330
|
|
|
|
|
|
|
{ |
331
|
|
|
|
|
|
|
int i; |
332
|
|
|
|
|
|
|
uint32_t t, u; |
333
|
|
|
|
|
|
|
enum { G_MOD = 0x0000014d }; |
334
|
|
|
|
|
|
|
|
335
|
367992
|
100
|
|
|
|
|
for (i = 0; i < 8; i++) { |
336
|
|
|
|
|
|
|
/* Get most significant coefficient */ |
337
|
327104
|
|
|
|
|
|
t = b >> 24; |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
/* Shift the others up */ |
340
|
327104
|
|
|
|
|
|
b = (b << 8) | (a >> 24); |
341
|
327104
|
|
|
|
|
|
a <<= 8; |
342
|
|
|
|
|
|
|
|
343
|
327104
|
|
|
|
|
|
u = t << 1; |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
/* Subtract the modular polynomial on overflow */ |
346
|
327104
|
100
|
|
|
|
|
if (t & 0x80) |
347
|
263436
|
|
|
|
|
|
u ^= G_MOD; |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
/* Remove t * (a * x^2 + 1) */ |
350
|
327104
|
|
|
|
|
|
b ^= t ^ (u << 16); |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
/* Form u = a*t + t/a = t*(a + 1/a) */ |
353
|
327104
|
|
|
|
|
|
u ^= t >> 1; |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
/* Add the modular polynomial on underflow */ |
356
|
327104
|
100
|
|
|
|
|
if (t & 0x01) |
357
|
223293
|
|
|
|
|
|
u ^= G_MOD >> 1; |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
/* Remove t * (a + 1/a) * (x^3 + x) */ |
360
|
327104
|
|
|
|
|
|
b ^= (u << 24) | (u << 8); |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
|
363
|
40888
|
|
|
|
|
|
return b; |
364
|
|
|
|
|
|
|
} |