File Coverage

_twofish.c

Criterion	Covered	Total	%
statement	97	98	98.9
branch	19	20	95.0
condition			n/a
subroutine			n/a
pod			n/a
total	116	118	98.3

line	stmt	bran	code
1			/*
2			* Copyright 1999 Dr. Brian Gladman
3			* Copyright 2001 Abhijit Menon-Sen
4			*/
5
6			/* Twofish is a 128-bit symmetric block cipher with a variable length
7			key, developed by Counterpane Labs. It is unpatented and free for all
8			uses, as described at
9			and .
10
11			This implementation is based on code by Dr. Brian Gladman, at
12			.
13			Some of his comments are reproduced below:
14
15			"Copyright in this implementation is held by Dr. B R Gladman but I
16			hereby give permission for its free direct or derivative use subject
17			to ackowledgement of its origin and compliance with any conditions
18			that the originators of the algorithm place on its exploitation.
19
20			My thanks to Doug Whiting and Niels Ferguson for comments that led to
21			improvements in this implementation." */
22
23			#include "twofish.h"
24			#include "tables.h"
25
26			/* Extract the n'th byte from a 32-bit word */
27			#define byte(x,n) ((unsigned char)((x) >> (8 * n)))
28
29			/* 32 bit rotate-left and right macros */
30			#define ror(x,n) (((x) >> ((int)(n))) \| ((x) << (32 - (int)(n))))
31			#define rol(x,n) (((x) << ((int)(n))) \| ((x) >> (32 - (int)(n))))
32
33			/* Endian-independent byte -> word conversion */
34			#define strtonl(s) (uint32_t)((s)\|(s+1)<<8\|(s+2)<<16\|(s+3)<<24)
35
36			#define nltostr(l, s) \
37			do { \
38			*(s )=(unsigned char)((l) ); \
39			*(s+1)=(unsigned char)((l) >> 8); \
40			*(s+2)=(unsigned char)((l) >> 16); \
41			*(s+3)=(unsigned char)((l) >> 24); \
42			} while (0)
43
44			static uint32_t mds_rem(uint32_t a, uint32_t b);
45			static uint32_t h(int len, const int x, unsigned char *key, int odd);
46
47			/* The key schedule takes a 128, 192, or 256-bit key, and provides 40
48			32-bit words of expanded key K0,...,K39 and the 4 key-dependent
49			S-boxes used in the g function. */
50
51	20297		struct twofish twofish_setup(unsigned char key, int len)
52			{
53			int i;
54			uint32_t a, b, x;
55			struct twofish *t;
56			unsigned char *s, skey[16];
57
58	20297	50	if ((t = malloc(sizeof(struct twofish))) == NULL)
59	0		return NULL;
60
61			/* The key consists of k=len/8 (2, 3 or 4) 64-bit units. */
62	20297		t->len = len /= 8;
63
64			/* We must derive three vectors Me, Mo, and S, each with k 32-bit
65			words, from the 2k words in the key.
66
67			Me = (key[0], key[2], ..., key[2k-2]) (even words)
68			Mo = (key[1], key[3], ..., key[2k-1]) (odd words)
69
70			The third vector is derived by multiplying each of the k groups
71			of 8 bytes from the key by a 4x8 matrix, to get k 32-bit words.
72
73			S = (S[k-1], S[k-2], ..., S[0])
74
75			where S[i] are the 4 bytes from the multiplication, interpreted
76			as a 32-bit word. As described later, mds_rem is equivalent to
77			the matrix multiplication, but faster.
78
79			Since all these vectors are going to be used byte-by-byte, we
80			avoid converting them to words altogether, and write the bytes of
81			S into the array skey below: */
82
83	20297		s = skey + 4*(len - 1);
84	61185	100	for (i = 0; i < len; i++) {
85	40888		x = mds_rem(strtonl(key+8i), strtonl(key+8i+4));
86	40888		nltostr(x, s);
87	40888		s -= 4;
88			}
89	20297		s = skey;
90
91			/* The words of the expanded key K are defined using the h function:
92
93			rho = 2^24 + 2^16 + 2^8 + 2^0 (0x01010101)
94			A[i] = h(2i*rho, Me)
95			B[i] = ROL(h(2(i+1)*rho, Mo), 8)
96			K[2i] = (A[i] + B[i]) mod 2^32
97			K[2i+1] = ROL((A[i] + 2B[i]) mod 2^32, 9)
98
99			rho has the property that, for i = 0..255, the word i*rho
100			consists of four equal bytes, each with the value i. The function
101			h is only applied to words of this type, so we only pass it the
102			value of i.
103
104			We also didn't generate the vectors Me and Mo separately: we pass
105			the entire key, and indicate whether we want the even or odd
106			words to be used. */
107
108	426237	100	for (i = 0; i < 40; i += 2) {
109	405940		a = h(len, i, key, 0);
110	405940		b = rol(h(len, i+1, key, 1), 8);
111
112	405940		t->K[i] = a+b;
113	405940		t->K[i+1] = rol(a+2*b, 9);
114			}
115
116			/* The key-dependent S-boxes used in the g() function are created
117			below. They are defined by g(X) = h(X, S), where S is the vector
118			derived from the key. That is, for i=0..3, the S-box S[i] is
119			formed by mapping from x[i] to y[i] in the h function.
120
121			The relevant lookup tables qN have been precomputed and stored in
122			tables.h; we also perform full key precomputations incorporating
123			the MDS matrix multiplications. */
124
125	20297		switch (len) {
126			case 2:
127	5165957	100	for (i = 0; i < 256; i++) {
128	5145856		x = (unsigned char)i;
129	5145856		t->S[0][i] = m[0][q[0][q[0][x]^s[4]]^s[0]];
130	5145856		t->S[1][i] = m[1][q[0][q[1][x]^s[5]]^s[1]];
131	5145856		t->S[2][i] = m[2][q[1][q[0][x]^s[6]]^s[2]];
132	5145856		t->S[3][i] = m[3][q[1][q[1][x]^s[7]]^s[3]];
133			}
134	20101		break;
135			case 3:
136	25186	100	for (i = 0; i < 256; i++) {
137	25088		x = (unsigned char)i;
138	25088		t->S[0][i] = m[0][q[0][q[0][q[1][x]^s[ 8]]^s[4]]^s[0]];
139	25088		t->S[1][i] = m[1][q[0][q[1][q[1][x]^s[ 9]]^s[5]]^s[1]];
140	25088		t->S[2][i] = m[2][q[1][q[0][q[0][x]^s[10]]^s[6]]^s[2]];
141	25088		t->S[3][i] = m[3][q[1][q[1][q[0][x]^s[11]]^s[7]]^s[3]];
142
143			}
144	98		break;
145			case 4:
146	25186	100	for (i = 0; i < 256; i++) {
147	25088		x = (unsigned char)i;
148	25088		t->S[0][i] = m[0][q[0][q[0][q[1][q[1][x]^s[12]]^s[ 8]]^s[4]]^s[0]];
149	25088		t->S[1][i] = m[1][q[0][q[1][q[1][q[0][x]^s[13]]^s[ 9]]^s[5]]^s[1]];
150	25088		t->S[2][i] = m[2][q[1][q[0][q[0][q[0][x]^s[14]]^s[10]]^s[6]]^s[2]];
151	25088		t->S[3][i] = m[3][q[1][q[1][q[0][q[1][x]^s[15]]^s[11]]^s[7]]^s[3]];
152			}
153	98		break;
154			}
155
156	20297		return t;
157			}
158
159	20297		void twofish_free(struct twofish *self)
160			{
161	20297		free(self);
162	20297		}
163
164			/* The function g splits the input word x into four bytes; each byte is
165			run through its own key-dependent S-box. Each S-box is bijective,
166			takes 8 bits of input and produces 8 bits of output. The four results
167			are interpreted as a vector of length 4 over GF(2^8), and multiplied
168			by the 4x4 MDS matrix. The resulting vector is interpreted as a
169			32-bit word.
170
171			Since we have performed the full key precomputations, g consists only
172			of four lookups and three XORs. g0 is g; g1 is a shortcut for
173			g(ROL(x, 8)). */
174
175			#define g0(x) \
176			t->S[0][byte(x,0)]^t->S[1][byte(x,1)]^t->S[2][byte(x,2)]^t->S[3][byte(x,3)]
177
178			#define g1(x) \
179			t->S[0][byte(x,3)]^t->S[1][byte(x,0)]^t->S[2][byte(x,1)]^t->S[3][byte(x,2)]
180
181			/* F is a key-dependent permutation on 64-bit values. It takes two input
182			words R0 and R1, and a round number r:
183
184			T0 = g(R0)
185			T1 = g(ROL(R1, 8))
186			F0 = (T0 + T1 + K[2r+8])
187			F1 = (T0 + 2*T1 + K[2r+9])
188
189			Each of the 16 encryption rounds consists of the following operations:
190
191			(F0, F1) = F(R0, R1, r)
192			R0 = ROR(R2 ^ F0, 1)
193			R1 = ROL(R3, 1) ^ F1
194			R2 = R0
195			R3 = R1
196
197			For efficiency, two rounds are combined into one in the macros below. */
198
199			#define f_2rounds(i) \
200			t0 = g0(R[0]); \
201			t1 = g1(R[1]); \
202			R[2] = ror(R[2] ^ (t0 + t1 + t->K[4*i+8]), 1); \
203			R[3] = rol(R[3], 1) ^ (t0 + 2t1 + t->K[4i+9]); \
204			t0 = g0(R[2]); \
205			t1 = g1(R[3]); \
206			R[0] = ror(R[0] ^ (t0 + t1 + t->K[4*i+10]), 1); \
207			R[1] = rol(R[1], 1) ^ (t0 + 2t1 + t->K[4i+11]);
208
209			/* This is the inverse of f_2rounds */
210			#define i_2rounds(i) \
211			t0 = g0(R[0]); \
212			t1 = g1(R[1]); \
213			R[2] = rol(R[2], 1) ^ (t0 + t1 + t->K[4*i+10]); \
214			R[3] = ror(R[3] ^ (t0 + 2t1 + t->K[4i+11]), 1); \
215			t0 = g0(R[2]); \
216			t1 = g1(R[3]); \
217			R[0] = rol(R[0], 1) ^ (t0 + t1 + t->K[4*i+8]); \
218			R[1] = ror(R[1] ^ (t0 + 2t1 + t->K[4i+9]), 1)
219
220			/* This function encrypts or decrypts 16 bytes of input data and writes
221			it to output, using the key defined in t. */
222
223	40296		void twofish_crypt(struct twofish *t,
224			unsigned char input, unsigned char output,
225			int decrypt)
226			{
227			uint32_t t0, t1, R[4], out[4];
228
229	40296	100	if (!decrypt) {
230			/* Whiten four 32-bit input words. */
231	20148		R[0] = t->K[0] ^ strtonl(input);
232	20148		R[1] = t->K[1] ^ strtonl(input+4);
233	20148		R[2] = t->K[2] ^ strtonl(input+8);
234	20148		R[3] = t->K[3] ^ strtonl(input+12);
235
236			/* 16 rounds of encryption, combined into 8 pairs. */
237	20148		f_2rounds(0); f_2rounds(1); f_2rounds(2); f_2rounds(3);
238	20148		f_2rounds(4); f_2rounds(5); f_2rounds(6); f_2rounds(7);
239
240			/* Output whitening; The order of R[n] undoes the last swap. */
241	20148		out[0] = t->K[4] ^ R[2];
242	20148		out[1] = t->K[5] ^ R[3];
243	20148		out[2] = t->K[6] ^ R[0];
244	20148		out[3] = t->K[7] ^ R[1];
245			} else {
246	20148		R[0] = t->K[4] ^ strtonl(input);
247	20148		R[1] = t->K[5] ^ strtonl(input+4);
248	20148		R[2] = t->K[6] ^ strtonl(input+8);
249	20148		R[3] = t->K[7] ^ strtonl(input+12);
250
251	20148		i_2rounds(7); i_2rounds(6); i_2rounds(5); i_2rounds(4);
252	20148		i_2rounds(3); i_2rounds(2); i_2rounds(1); i_2rounds(0);
253
254	20148		out[0] = t->K[0] ^ R[2];
255	20148		out[1] = t->K[1] ^ R[3];
256	20148		out[2] = t->K[2] ^ R[0];
257	20148		out[3] = t->K[3] ^ R[1];
258			}
259
260			/* Write 16 output bytes. */
261	40296		nltostr(out[0], output);
262	40296		nltostr(out[1], output+4);
263	40296		nltostr(out[2], output+8);
264	40296		nltostr(out[3], output+12);
265	40296		}
266
267			/* h takes a 32-bit word X, and a list, L = (L[0],...,L[k-1]), of 32-bit
268			words, and produces one word of output. During each of the k stages
269			of the function, the four bytes from X are each passed through a
270			fixed S-box, and XORed with a byte derived from the list. Finally,
271			the bytes are once again passed through an S-box and multiplied by
272			the MDS matrix, just as in g.
273
274			We use the Lbyte macro to extract a given byte from the list L
275			(expressed in little endian). */
276
277			#define Lbyte(w, b) L[4(2w+odd)+b]
278
279	1217820		static uint32_t h(int len, const int X, unsigned char *L, int odd)
280			{
281			unsigned char b0, b1, b2, b3;
282
283	1217820		b0 = b1 = b2 = b3 = (unsigned char)X;
284
285	1217820		switch (len) {
286			case 4:
287	5880		b0 = q[1][b0] ^ Lbyte(3, 0);
288	5880		b1 = q[0][b1] ^ Lbyte(3, 1);
289	5880		b2 = q[0][b2] ^ Lbyte(3, 2);
290	5880		b3 = q[1][b3] ^ Lbyte(3, 3);
291			case 3:
292	11760		b0 = q[1][b0] ^ Lbyte(2, 0);
293	11760		b1 = q[1][b1] ^ Lbyte(2, 1);
294	11760		b2 = q[0][b2] ^ Lbyte(2, 2);
295	11760		b3 = q[0][b3] ^ Lbyte(2, 3);
296			case 2:
297	1217820		b0 = q[0][q[0][b0] ^ Lbyte(1, 0)] ^ Lbyte(0, 0);
298	1217820		b1 = q[0][q[1][b1] ^ Lbyte(1, 1)] ^ Lbyte(0, 1);
299	1217820		b2 = q[1][q[0][b2] ^ Lbyte(1, 2)] ^ Lbyte(0, 2);
300	1217820		b3 = q[1][q[1][b3] ^ Lbyte(1, 3)] ^ Lbyte(0, 3);
301			}
302
303	1217820		return m[0][b0] ^ m[1][b1] ^ m[2][b2] ^ m[3][b3];
304			}
305
306			/* The (12, 8) Reed Solomon code has the generator polynomial:
307
308			g(x) = x^4 + (a + 1/a) * x^3 + a * x^2 + (a + 1/a) * x + 1
309
310			where the coefficients are in the finite field GF(2^8) with a modular
311			polynomial a^8+a^6+a^3+a^2+1. To generate the remainder, we have to
312			start with a 12th order polynomial with our eight input bytes as the
313			coefficients of the 4th to 11th terms:
314
315			m[7] * x^11 + m[6] * x^10 ... + m[0] * x^4 + 0 * x^3 +... + 0
316
317			We then multiply the generator polynomial by m[7]*x^7 and subtract it
318			(XOR in GF(2^8)) from the above to eliminate the x^7 term (the
319			arithmetic on the coefficients is done in GF(2^8)). We then multiply
320			the generator polynomial by m[6]*x^6 and use this to remove the x^10
321			term, and so on until the x^4 term is removed, and we are left with:
322
323			r[3] * x^3 + r[2] * x^2 + r[1] 8 x^1 + r[0]
324
325			which give the resulting 4 bytes of the remainder. This is equivalent
326			to the matrix multiplication described in the Twofish paper, but is
327			much faster. */
328
329	40888		static uint32_t mds_rem(uint32_t a, uint32_t b)
330			{
331			int i;
332			uint32_t t, u;
333			enum { G_MOD = 0x0000014d };
334
335	367992	100	for (i = 0; i < 8; i++) {
336			/* Get most significant coefficient */
337	327104		t = b >> 24;
338
339			/* Shift the others up */
340	327104		b = (b << 8) \| (a >> 24);
341	327104		a <<= 8;
342
343	327104		u = t << 1;
344
345			/* Subtract the modular polynomial on overflow */
346	327104	100	if (t & 0x80)
347	263436		u ^= G_MOD;
348
349			/* Remove t * (a * x^2 + 1) */
350	327104		b ^= t ^ (u << 16);
351
352			/* Form u = at + t/a = t(a + 1/a) */
353	327104		u ^= t >> 1;
354
355			/* Add the modular polynomial on underflow */
356	327104	100	if (t & 0x01)
357	223293		u ^= G_MOD >> 1;
358
359			/* Remove t * (a + 1/a) * (x^3 + x) */
360	327104		b ^= (u << 24) \| (u << 8);
361			}
362
363	40888		return b;
364			}