line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
/*************************************************************************** |
2
|
|
|
|
|
|
|
TWOFISH2.C -- Optimized C API calls for TWOFISH AES submission |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
Submitters: |
5
|
|
|
|
|
|
|
Bruce Schneier, Counterpane Systems |
6
|
|
|
|
|
|
|
Doug Whiting, Hi/fn |
7
|
|
|
|
|
|
|
John Kelsey, Counterpane Systems |
8
|
|
|
|
|
|
|
Chris Hall, Counterpane Systems |
9
|
|
|
|
|
|
|
David Wagner, UC Berkeley |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
Code Author: Doug Whiting, Hi/fn |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
Version 1.00 April 1998 |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
Copyright 1998, Hi/fn and Counterpane Systems. All rights reserved. |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Notes: |
18
|
|
|
|
|
|
|
* Optimized version |
19
|
|
|
|
|
|
|
* Tab size is set to 4 characters in this file |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
***************************************************************************/ |
22
|
|
|
|
|
|
|
#include "aes.h" |
23
|
|
|
|
|
|
|
#include "table.h" |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
#include |
26
|
|
|
|
|
|
|
/*#include */ |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
#if defined(min_key) && !defined(MIN_KEY) |
29
|
|
|
|
|
|
|
#define MIN_KEY 1 /* toupper() */ |
30
|
|
|
|
|
|
|
#elif defined(part_key) && !defined(PART_KEY) |
31
|
|
|
|
|
|
|
#define PART_KEY 1 |
32
|
|
|
|
|
|
|
#elif defined(zero_key) && !defined(ZERO_KEY) |
33
|
|
|
|
|
|
|
#define ZERO_KEY 1 |
34
|
|
|
|
|
|
|
#endif |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
#ifdef USE_ASM |
38
|
|
|
|
|
|
|
extern int useAsm; /* ok to use ASM code? */ |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
typedef int cdecl CipherProc |
41
|
|
|
|
|
|
|
(cipherInstance *cipher, keyInstance *key,BYTE *input,int inputLen,BYTE *outBuffer); |
42
|
|
|
|
|
|
|
typedef int cdecl KeySetupProc(keyInstance *key); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
extern CipherProc *blockEncrypt_86; /* ptr to ASM functions */ |
45
|
|
|
|
|
|
|
extern CipherProc *blockDecrypt_86; |
46
|
|
|
|
|
|
|
extern KeySetupProc *reKey_86; |
47
|
|
|
|
|
|
|
extern DWORD cdecl TwofishAsmCodeSize(void); |
48
|
|
|
|
|
|
|
#endif |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
/* |
51
|
|
|
|
|
|
|
+***************************************************************************** |
52
|
|
|
|
|
|
|
* Constants/Macros/Tables |
53
|
|
|
|
|
|
|
-****************************************************************************/ |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
#define CONST /* help syntax from C++, NOP here */ |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
static CONST fullSbox MDStab; /* not actually const. Initialized ONE time */ |
58
|
|
|
|
|
|
|
static int needToBuildMDS=1; /* is MDStab initialized yet? */ |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
#define BIG_TAB 0 |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
#if BIG_TAB |
63
|
|
|
|
|
|
|
static BYTE bigTab[4][256][256]; /* pre-computed S-box */ |
64
|
|
|
|
|
|
|
#endif |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
/* number of rounds for various key sizes: 128, 192, 256 */ |
67
|
|
|
|
|
|
|
/* (ignored for now in optimized code!) */ |
68
|
|
|
|
|
|
|
static CONST int numRounds[4]= {0,ROUNDS_128,ROUNDS_192,ROUNDS_256}; |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
#if REENTRANT |
71
|
|
|
|
|
|
|
#define _sBox_ key->sBox8x32 |
72
|
|
|
|
|
|
|
#else |
73
|
|
|
|
|
|
|
static fullSbox _sBox_; /* permuted MDStab based on keys */ |
74
|
|
|
|
|
|
|
#endif |
75
|
|
|
|
|
|
|
#define _sBox8_(N) (((BYTE *) _sBox_) + (N)*256) |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
/*------- see what level of S-box precomputation we need to do -----*/ |
78
|
|
|
|
|
|
|
#if defined(ZERO_KEY) |
79
|
|
|
|
|
|
|
#define MOD_STRING "(Zero S-box keying)" |
80
|
|
|
|
|
|
|
#define Fe32_128(x,R) \ |
81
|
|
|
|
|
|
|
( MDStab[0][p8(01)[p8(02)[_b(x,R )]^b0(SKEY[1])]^b0(SKEY[0])] ^ \ |
82
|
|
|
|
|
|
|
MDStab[1][p8(11)[p8(12)[_b(x,R+1)]^b1(SKEY[1])]^b1(SKEY[0])] ^ \ |
83
|
|
|
|
|
|
|
MDStab[2][p8(21)[p8(22)[_b(x,R+2)]^b2(SKEY[1])]^b2(SKEY[0])] ^ \ |
84
|
|
|
|
|
|
|
MDStab[3][p8(31)[p8(32)[_b(x,R+3)]^b3(SKEY[1])]^b3(SKEY[0])] ) |
85
|
|
|
|
|
|
|
#define Fe32_192(x,R) \ |
86
|
|
|
|
|
|
|
( MDStab[0][p8(01)[p8(02)[p8(03)[_b(x,R )]^b0(SKEY[2])]^b0(SKEY[1])]^b0(SKEY[0])] ^ \ |
87
|
|
|
|
|
|
|
MDStab[1][p8(11)[p8(12)[p8(13)[_b(x,R+1)]^b1(SKEY[2])]^b1(SKEY[1])]^b1(SKEY[0])] ^ \ |
88
|
|
|
|
|
|
|
MDStab[2][p8(21)[p8(22)[p8(23)[_b(x,R+2)]^b2(SKEY[2])]^b2(SKEY[1])]^b2(SKEY[0])] ^ \ |
89
|
|
|
|
|
|
|
MDStab[3][p8(31)[p8(32)[p8(33)[_b(x,R+3)]^b3(SKEY[2])]^b3(SKEY[1])]^b3(SKEY[0])] ) |
90
|
|
|
|
|
|
|
#define Fe32_256(x,R) \ |
91
|
|
|
|
|
|
|
( MDStab[0][p8(01)[p8(02)[p8(03)[p8(04)[_b(x,R )]^b0(SKEY[3])]^b0(SKEY[2])]^b0(SKEY[1])]^b0(SKEY[0])] ^ \ |
92
|
|
|
|
|
|
|
MDStab[1][p8(11)[p8(12)[p8(13)[p8(14)[_b(x,R+1)]^b1(SKEY[3])]^b1(SKEY[2])]^b1(SKEY[1])]^b1(SKEY[0])] ^ \ |
93
|
|
|
|
|
|
|
MDStab[2][p8(21)[p8(22)[p8(23)[p8(24)[_b(x,R+2)]^b2(SKEY[3])]^b2(SKEY[2])]^b2(SKEY[1])]^b2(SKEY[0])] ^ \ |
94
|
|
|
|
|
|
|
MDStab[3][p8(31)[p8(32)[p8(33)[p8(34)[_b(x,R+3)]^b3(SKEY[3])]^b3(SKEY[2])]^b3(SKEY[1])]^b3(SKEY[0])] ) |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
#define GetSboxKey DWORD SKEY[4]; /* local copy */ \ |
97
|
|
|
|
|
|
|
memcpy(SKEY,key->sboxKeys,sizeof(SKEY)); |
98
|
|
|
|
|
|
|
/*----------------------------------------------------------------*/ |
99
|
|
|
|
|
|
|
#elif defined(MIN_KEY) |
100
|
|
|
|
|
|
|
#define MOD_STRING "(Minimal keying)" |
101
|
|
|
|
|
|
|
#define Fe32_(x,R)(MDStab[0][p8(01)[_sBox8_(0)[_b(x,R )]] ^ b0(SKEY0)] ^ \ |
102
|
|
|
|
|
|
|
MDStab[1][p8(11)[_sBox8_(1)[_b(x,R+1)]] ^ b1(SKEY0)] ^ \ |
103
|
|
|
|
|
|
|
MDStab[2][p8(21)[_sBox8_(2)[_b(x,R+2)]] ^ b2(SKEY0)] ^ \ |
104
|
|
|
|
|
|
|
MDStab[3][p8(31)[_sBox8_(3)[_b(x,R+3)]] ^ b3(SKEY0)]) |
105
|
|
|
|
|
|
|
#define sbSet(N,i,J,v) { _sBox8_(N)[i+J] = v; } |
106
|
|
|
|
|
|
|
#define GetSboxKey DWORD SKEY0 = key->sboxKeys[0] /* local copy */ |
107
|
|
|
|
|
|
|
/*----------------------------------------------------------------*/ |
108
|
|
|
|
|
|
|
#elif defined(PART_KEY) |
109
|
|
|
|
|
|
|
#define MOD_STRING "(Partial keying)" |
110
|
|
|
|
|
|
|
#define Fe32_(x,R)(MDStab[0][_sBox8_(0)[_b(x,R )]] ^ \ |
111
|
|
|
|
|
|
|
MDStab[1][_sBox8_(1)[_b(x,R+1)]] ^ \ |
112
|
|
|
|
|
|
|
MDStab[2][_sBox8_(2)[_b(x,R+2)]] ^ \ |
113
|
|
|
|
|
|
|
MDStab[3][_sBox8_(3)[_b(x,R+3)]]) |
114
|
|
|
|
|
|
|
#define sbSet(N,i,J,v) { _sBox8_(N)[i+J] = v; } |
115
|
|
|
|
|
|
|
#define GetSboxKey |
116
|
|
|
|
|
|
|
/*----------------------------------------------------------------*/ |
117
|
|
|
|
|
|
|
#else /* default is FULL_KEY */ |
118
|
|
|
|
|
|
|
#ifndef FULL_KEY |
119
|
|
|
|
|
|
|
#define FULL_KEY 1 |
120
|
|
|
|
|
|
|
#endif |
121
|
|
|
|
|
|
|
#if BIG_TAB |
122
|
|
|
|
|
|
|
#define TAB_STR " (Big table)" |
123
|
|
|
|
|
|
|
#else |
124
|
|
|
|
|
|
|
#define TAB_STR |
125
|
|
|
|
|
|
|
#endif |
126
|
|
|
|
|
|
|
#ifdef COMPILE_KEY |
127
|
|
|
|
|
|
|
#define MOD_STRING "(Compiled subkeys)" TAB_STR |
128
|
|
|
|
|
|
|
#else |
129
|
|
|
|
|
|
|
#define MOD_STRING "(Full keying)" TAB_STR |
130
|
|
|
|
|
|
|
#endif |
131
|
|
|
|
|
|
|
/* Fe32_ does a full S-box + MDS lookup. Need to #define _sBox_ before use. |
132
|
|
|
|
|
|
|
Note that we "interleave" 0,1, and 2,3 to avoid cache bank collisions |
133
|
|
|
|
|
|
|
in optimized assembly language. |
134
|
|
|
|
|
|
|
*/ |
135
|
|
|
|
|
|
|
#define Fe32_(x,R) (_sBox_[0][2*_b(x,R )] ^ _sBox_[0][2*_b(x,R+1)+1] ^ \ |
136
|
|
|
|
|
|
|
_sBox_[2][2*_b(x,R+2)] ^ _sBox_[2][2*_b(x,R+3)+1]) |
137
|
|
|
|
|
|
|
/* set a single S-box value, given the input byte */ |
138
|
|
|
|
|
|
|
//#define sbSet(N,i,J,v) { _sBox_[N&2][2*i+(N&1)+2*J]=MDStab[N][v]; } |
139
|
|
|
|
|
|
|
#define sbSet(N,i,J,v) { *((DWORD *)_sBox_ + (N&2)*256 + 2*i + (N&1) + 2*J) = MDStab[N][v]; } |
140
|
|
|
|
|
|
|
#define GetSboxKey |
141
|
|
|
|
|
|
|
#endif |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
/* macro(s) for debugging help */ |
144
|
|
|
|
|
|
|
#define CHECK_TABLE 0 /* nonzero --> compare against "slow" table */ |
145
|
|
|
|
|
|
|
#define VALIDATE_PARMS 0 /* disable for full speed */ |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
/* end of debug macros */ |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
#ifdef GetCodeSize |
150
|
|
|
|
|
|
|
static extern DWORD Here(DWORD x); /* return caller's address! */ |
151
|
|
|
|
|
|
|
static DWORD TwofishCodeStart(void) { return Here(0); } |
152
|
|
|
|
|
|
|
#endif |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
/* |
155
|
|
|
|
|
|
|
+***************************************************************************** |
156
|
|
|
|
|
|
|
* |
157
|
|
|
|
|
|
|
* Function Name: TableOp |
158
|
|
|
|
|
|
|
* |
159
|
|
|
|
|
|
|
* Function: Handle table use checking |
160
|
|
|
|
|
|
|
* |
161
|
|
|
|
|
|
|
* Arguments: op = what to do (see TAB_* defns in AES.H) |
162
|
|
|
|
|
|
|
* |
163
|
|
|
|
|
|
|
* Return: TRUE --> done (for TAB_QUERY) |
164
|
|
|
|
|
|
|
* |
165
|
|
|
|
|
|
|
* Notes: This routine is for use in generating the tables KAT file. |
166
|
|
|
|
|
|
|
* For this optimized version, we don't actually track table usage, |
167
|
|
|
|
|
|
|
* since it would make the macros incredibly ugly. Instead we just |
168
|
|
|
|
|
|
|
* run for a fixed number of queries and then say we're done. |
169
|
|
|
|
|
|
|
* |
170
|
|
|
|
|
|
|
-****************************************************************************/ |
171
|
0
|
|
|
|
|
|
static int TableOp(int op) |
172
|
|
|
|
|
|
|
{ |
173
|
|
|
|
|
|
|
static int queryCnt=0; |
174
|
|
|
|
|
|
|
|
175
|
0
|
|
|
|
|
|
switch (op) |
176
|
|
|
|
|
|
|
{ |
177
|
|
|
|
|
|
|
case TAB_DISABLE: |
178
|
0
|
|
|
|
|
|
break; |
179
|
|
|
|
|
|
|
case TAB_ENABLE: |
180
|
0
|
|
|
|
|
|
break; |
181
|
|
|
|
|
|
|
case TAB_RESET: |
182
|
0
|
|
|
|
|
|
queryCnt=0; |
183
|
0
|
|
|
|
|
|
break; |
184
|
|
|
|
|
|
|
case TAB_QUERY: |
185
|
0
|
|
|
|
|
|
queryCnt++; |
186
|
0
|
0
|
|
|
|
|
if (queryCnt < TAB_MIN_QUERY) |
187
|
0
|
|
|
|
|
|
return FALSE; |
188
|
|
|
|
|
|
|
} |
189
|
0
|
|
|
|
|
|
return TRUE; |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
#if CHECK_TABLE |
194
|
|
|
|
|
|
|
/* |
195
|
|
|
|
|
|
|
+***************************************************************************** |
196
|
|
|
|
|
|
|
* |
197
|
|
|
|
|
|
|
* Function Name: f32 |
198
|
|
|
|
|
|
|
* |
199
|
|
|
|
|
|
|
* Function: Run four bytes through keyed S-boxes and apply MDS matrix |
200
|
|
|
|
|
|
|
* |
201
|
|
|
|
|
|
|
* Arguments: x = input to f function |
202
|
|
|
|
|
|
|
* k32 = pointer to key dwords |
203
|
|
|
|
|
|
|
* keyLen = total key length (k32 --> keyLey/2 bits) |
204
|
|
|
|
|
|
|
* |
205
|
|
|
|
|
|
|
* Return: The output of the keyed permutation applied to x. |
206
|
|
|
|
|
|
|
* |
207
|
|
|
|
|
|
|
* Notes: |
208
|
|
|
|
|
|
|
* This function is a keyed 32-bit permutation. It is the major building |
209
|
|
|
|
|
|
|
* block for the Twofish round function, including the four keyed 8x8 |
210
|
|
|
|
|
|
|
* permutations and the 4x4 MDS matrix multiply. This function is used |
211
|
|
|
|
|
|
|
* both for generating round subkeys and within the round function on the |
212
|
|
|
|
|
|
|
* block being encrypted. |
213
|
|
|
|
|
|
|
* |
214
|
|
|
|
|
|
|
* This version is fairly slow and pedagogical, although a smartcard would |
215
|
|
|
|
|
|
|
* probably perform the operation exactly this way in firmware. For |
216
|
|
|
|
|
|
|
* ultimate performance, the entire operation can be completed with four |
217
|
|
|
|
|
|
|
* lookups into four 256x32-bit tables, with three dword xors. |
218
|
|
|
|
|
|
|
* |
219
|
|
|
|
|
|
|
* The MDS matrix is defined in TABLE.H. To multiply by Mij, just use the |
220
|
|
|
|
|
|
|
* macro Mij(x). |
221
|
|
|
|
|
|
|
* |
222
|
|
|
|
|
|
|
-****************************************************************************/ |
223
|
|
|
|
|
|
|
static DWORD f32(DWORD x,CONST DWORD *k32,int keyLen) |
224
|
|
|
|
|
|
|
{ |
225
|
|
|
|
|
|
|
BYTE b[4]; |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
/* Run each byte thru 8x8 S-boxes, xoring with key byte at each stage. */ |
228
|
|
|
|
|
|
|
/* Note that each byte goes through a different combination of S-boxes.*/ |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
*((DWORD *)b) = Bswap(x); /* make b[0] = LSB, b[3] = MSB */ |
231
|
|
|
|
|
|
|
switch (((keyLen + 63)/64) & 3) |
232
|
|
|
|
|
|
|
{ |
233
|
|
|
|
|
|
|
case 0: /* 256 bits of key */ |
234
|
|
|
|
|
|
|
b[0] = p8(04)[b[0]] ^ b0(k32[3]); |
235
|
|
|
|
|
|
|
b[1] = p8(14)[b[1]] ^ b1(k32[3]); |
236
|
|
|
|
|
|
|
b[2] = p8(24)[b[2]] ^ b2(k32[3]); |
237
|
|
|
|
|
|
|
b[3] = p8(34)[b[3]] ^ b3(k32[3]); |
238
|
|
|
|
|
|
|
/* fall thru, having pre-processed b[0]..b[3] with k32[3] */ |
239
|
|
|
|
|
|
|
case 3: /* 192 bits of key */ |
240
|
|
|
|
|
|
|
b[0] = p8(03)[b[0]] ^ b0(k32[2]); |
241
|
|
|
|
|
|
|
b[1] = p8(13)[b[1]] ^ b1(k32[2]); |
242
|
|
|
|
|
|
|
b[2] = p8(23)[b[2]] ^ b2(k32[2]); |
243
|
|
|
|
|
|
|
b[3] = p8(33)[b[3]] ^ b3(k32[2]); |
244
|
|
|
|
|
|
|
/* fall thru, having pre-processed b[0]..b[3] with k32[2] */ |
245
|
|
|
|
|
|
|
case 2: /* 128 bits of key */ |
246
|
|
|
|
|
|
|
b[0] = p8(00)[p8(01)[p8(02)[b[0]] ^ b0(k32[1])] ^ b0(k32[0])]; |
247
|
|
|
|
|
|
|
b[1] = p8(10)[p8(11)[p8(12)[b[1]] ^ b1(k32[1])] ^ b1(k32[0])]; |
248
|
|
|
|
|
|
|
b[2] = p8(20)[p8(21)[p8(22)[b[2]] ^ b2(k32[1])] ^ b2(k32[0])]; |
249
|
|
|
|
|
|
|
b[3] = p8(30)[p8(31)[p8(32)[b[3]] ^ b3(k32[1])] ^ b3(k32[0])]; |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
/* Now perform the MDS matrix multiply inline. */ |
253
|
|
|
|
|
|
|
return ((M00(b[0]) ^ M01(b[1]) ^ M02(b[2]) ^ M03(b[3])) ) ^ |
254
|
|
|
|
|
|
|
((M10(b[0]) ^ M11(b[1]) ^ M12(b[2]) ^ M13(b[3])) << 8) ^ |
255
|
|
|
|
|
|
|
((M20(b[0]) ^ M21(b[1]) ^ M22(b[2]) ^ M23(b[3])) << 16) ^ |
256
|
|
|
|
|
|
|
((M30(b[0]) ^ M31(b[1]) ^ M32(b[2]) ^ M33(b[3])) << 24) ; |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
#endif /* CHECK_TABLE */ |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
/* |
262
|
|
|
|
|
|
|
+***************************************************************************** |
263
|
|
|
|
|
|
|
* |
264
|
|
|
|
|
|
|
* Function Name: RS_MDS_encode |
265
|
|
|
|
|
|
|
* |
266
|
|
|
|
|
|
|
* Function: Use (12,8) Reed-Solomon code over GF(256) to produce |
267
|
|
|
|
|
|
|
* a key S-box dword from two key material dwords. |
268
|
|
|
|
|
|
|
* |
269
|
|
|
|
|
|
|
* Arguments: k0 = 1st dword |
270
|
|
|
|
|
|
|
* k1 = 2nd dword |
271
|
|
|
|
|
|
|
* |
272
|
|
|
|
|
|
|
* Return: Remainder polynomial generated using RS code |
273
|
|
|
|
|
|
|
* |
274
|
|
|
|
|
|
|
* Notes: |
275
|
|
|
|
|
|
|
* Since this computation is done only once per reKey per 64 bits of key, |
276
|
|
|
|
|
|
|
* the performance impact of this routine is imperceptible. The RS code |
277
|
|
|
|
|
|
|
* chosen has "simple" coefficients to allow smartcard/hardware implementation |
278
|
|
|
|
|
|
|
* without lookup tables. |
279
|
|
|
|
|
|
|
* |
280
|
|
|
|
|
|
|
-****************************************************************************/ |
281
|
125
|
|
|
|
|
|
static DWORD RS_MDS_Encode(DWORD k0,DWORD k1) |
282
|
|
|
|
|
|
|
{ |
283
|
|
|
|
|
|
|
int i,j; |
284
|
|
|
|
|
|
|
DWORD r; |
285
|
|
|
|
|
|
|
|
286
|
375
|
100
|
|
|
|
|
for (i=r=0;i<2;i++) |
287
|
|
|
|
|
|
|
{ |
288
|
250
|
100
|
|
|
|
|
r ^= (i) ? k0 : k1; /* merge in 32 more key bits */ |
289
|
1250
|
100
|
|
|
|
|
for (j=0;j<4;j++) /* shift one byte at a time */ |
290
|
1000
|
100
|
|
|
|
|
RS_rem(r); |
|
|
100
|
|
|
|
|
|
291
|
|
|
|
|
|
|
} |
292
|
125
|
|
|
|
|
|
return r; |
293
|
|
|
|
|
|
|
} |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
/* |
297
|
|
|
|
|
|
|
+***************************************************************************** |
298
|
|
|
|
|
|
|
* |
299
|
|
|
|
|
|
|
* Function Name: BuildMDS |
300
|
|
|
|
|
|
|
* |
301
|
|
|
|
|
|
|
* Function: Initialize the MDStab array |
302
|
|
|
|
|
|
|
* |
303
|
|
|
|
|
|
|
* Arguments: None. |
304
|
|
|
|
|
|
|
* |
305
|
|
|
|
|
|
|
* Return: None. |
306
|
|
|
|
|
|
|
* |
307
|
|
|
|
|
|
|
* Notes: |
308
|
|
|
|
|
|
|
* Here we precompute all the fixed MDS table. This only needs to be done |
309
|
|
|
|
|
|
|
* one time at initialization, after which the table is "CONST". |
310
|
|
|
|
|
|
|
* |
311
|
|
|
|
|
|
|
-****************************************************************************/ |
312
|
1
|
|
|
|
|
|
static void BuildMDS(void) |
313
|
|
|
|
|
|
|
{ |
314
|
|
|
|
|
|
|
int i; |
315
|
|
|
|
|
|
|
DWORD d; |
316
|
|
|
|
|
|
|
BYTE m1[2],mX[2],mY[4]; |
317
|
|
|
|
|
|
|
|
318
|
257
|
100
|
|
|
|
|
for (i=0;i<256;i++) |
319
|
|
|
|
|
|
|
{ |
320
|
256
|
|
|
|
|
|
m1[0]=P8x8[0][i]; /* compute all the matrix elements */ |
321
|
256
|
100
|
|
|
|
|
mX[0]=(BYTE) Mul_X(m1[0]); |
|
|
100
|
|
|
|
|
|
322
|
256
|
100
|
|
|
|
|
mY[0]=(BYTE) Mul_Y(m1[0]); |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
323
|
|
|
|
|
|
|
|
324
|
256
|
|
|
|
|
|
m1[1]=P8x8[1][i]; |
325
|
256
|
100
|
|
|
|
|
mX[1]=(BYTE) Mul_X(m1[1]); |
|
|
100
|
|
|
|
|
|
326
|
256
|
100
|
|
|
|
|
mY[1]=(BYTE) Mul_Y(m1[1]); |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
#undef Mul_1 /* change what the pre-processor does with Mij */ |
329
|
|
|
|
|
|
|
#undef Mul_X |
330
|
|
|
|
|
|
|
#undef Mul_Y |
331
|
|
|
|
|
|
|
#define Mul_1 m1 /* It will now access m01[], m5B[], and mEF[] */ |
332
|
|
|
|
|
|
|
#define Mul_X mX |
333
|
|
|
|
|
|
|
#define Mul_Y mY |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
#define SetMDS(N) \ |
336
|
|
|
|
|
|
|
b0(d) = M0##N[P_##N##0]; \ |
337
|
|
|
|
|
|
|
b1(d) = M1##N[P_##N##0]; \ |
338
|
|
|
|
|
|
|
b2(d) = M2##N[P_##N##0]; \ |
339
|
|
|
|
|
|
|
b3(d) = M3##N[P_##N##0]; \ |
340
|
|
|
|
|
|
|
MDStab[N][i] = d; |
341
|
|
|
|
|
|
|
|
342
|
256
|
|
|
|
|
|
SetMDS(0); /* fill in the matrix with elements computed above */ |
343
|
256
|
|
|
|
|
|
SetMDS(1); |
344
|
256
|
|
|
|
|
|
SetMDS(2); |
345
|
256
|
|
|
|
|
|
SetMDS(3); |
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
#undef Mul_1 |
348
|
|
|
|
|
|
|
#undef Mul_X |
349
|
|
|
|
|
|
|
#undef Mul_Y |
350
|
|
|
|
|
|
|
#define Mul_1 Mx_1 /* re-enable true multiply */ |
351
|
|
|
|
|
|
|
#define Mul_X Mx_X |
352
|
|
|
|
|
|
|
#define Mul_Y Mx_Y |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
#if BIG_TAB |
355
|
|
|
|
|
|
|
{ |
356
|
|
|
|
|
|
|
int j,k; |
357
|
|
|
|
|
|
|
BYTE *q0,*q1; |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
for (i=0;i<4;i++) |
360
|
|
|
|
|
|
|
{ |
361
|
|
|
|
|
|
|
switch (i) |
362
|
|
|
|
|
|
|
{ |
363
|
|
|
|
|
|
|
case 0: q0=p8(01); q1=p8(02); break; |
364
|
|
|
|
|
|
|
case 1: q0=p8(11); q1=p8(12); break; |
365
|
|
|
|
|
|
|
case 2: q0=p8(21); q1=p8(22); break; |
366
|
|
|
|
|
|
|
case 3: q0=p8(31); q1=p8(32); break; |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
for (j=0;j<256;j++) |
369
|
|
|
|
|
|
|
for (k=0;k<256;k++) |
370
|
|
|
|
|
|
|
bigTab[i][j][k]=q0[q1[k]^j]; |
371
|
|
|
|
|
|
|
} |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
#endif |
374
|
|
|
|
|
|
|
|
375
|
1
|
|
|
|
|
|
needToBuildMDS=0; /* NEVER modify the table again! */ |
376
|
1
|
|
|
|
|
|
} |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
/* |
379
|
|
|
|
|
|
|
+***************************************************************************** |
380
|
|
|
|
|
|
|
* |
381
|
|
|
|
|
|
|
* Function Name: ReverseRoundSubkeys |
382
|
|
|
|
|
|
|
* |
383
|
|
|
|
|
|
|
* Function: Reverse order of round subkeys to switch between encrypt/decrypt |
384
|
|
|
|
|
|
|
* |
385
|
|
|
|
|
|
|
* Arguments: key = ptr to keyInstance to be reversed |
386
|
|
|
|
|
|
|
* newDir = new direction value |
387
|
|
|
|
|
|
|
* |
388
|
|
|
|
|
|
|
* Return: None. |
389
|
|
|
|
|
|
|
* |
390
|
|
|
|
|
|
|
* Notes: |
391
|
|
|
|
|
|
|
* This optimization allows both blockEncrypt and blockDecrypt to use the same |
392
|
|
|
|
|
|
|
* "fallthru" switch statement based on the number of rounds. |
393
|
|
|
|
|
|
|
* Note that key->numRounds must be even and >= 2 here. |
394
|
|
|
|
|
|
|
* |
395
|
|
|
|
|
|
|
-****************************************************************************/ |
396
|
81
|
|
|
|
|
|
static void ReverseRoundSubkeys(keyInstance *key,BYTE newDir) |
397
|
|
|
|
|
|
|
{ |
398
|
|
|
|
|
|
|
DWORD t0,t1; |
399
|
81
|
|
|
|
|
|
register DWORD *r0=key->subKeys+ROUND_SUBKEYS; |
400
|
81
|
|
|
|
|
|
register DWORD *r1=r0 + 2*key->numRounds - 2; |
401
|
|
|
|
|
|
|
|
402
|
729
|
100
|
|
|
|
|
for (;r0 < r1;r0+=2,r1-=2) |
403
|
|
|
|
|
|
|
{ |
404
|
648
|
|
|
|
|
|
t0=r0[0]; /* swap the order */ |
405
|
648
|
|
|
|
|
|
t1=r0[1]; |
406
|
648
|
|
|
|
|
|
r0[0]=r1[0]; /* but keep relative order within pairs */ |
407
|
648
|
|
|
|
|
|
r0[1]=r1[1]; |
408
|
648
|
|
|
|
|
|
r1[0]=t0; |
409
|
648
|
|
|
|
|
|
r1[1]=t1; |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
|
412
|
81
|
|
|
|
|
|
key->direction=newDir; |
413
|
81
|
|
|
|
|
|
} |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
/* |
416
|
|
|
|
|
|
|
+***************************************************************************** |
417
|
|
|
|
|
|
|
* |
418
|
|
|
|
|
|
|
* Function Name: Xor256 |
419
|
|
|
|
|
|
|
* |
420
|
|
|
|
|
|
|
* Function: Copy an 8-bit permutation (256 bytes), xoring with a byte |
421
|
|
|
|
|
|
|
* |
422
|
|
|
|
|
|
|
* Arguments: dst = where to put result |
423
|
|
|
|
|
|
|
* src = where to get data (can be same asa dst) |
424
|
|
|
|
|
|
|
* b = byte to xor |
425
|
|
|
|
|
|
|
* |
426
|
|
|
|
|
|
|
* Return: None |
427
|
|
|
|
|
|
|
* |
428
|
|
|
|
|
|
|
* Notes: |
429
|
|
|
|
|
|
|
* BorlandC's optimization is terrible! When we put the code inline, |
430
|
|
|
|
|
|
|
* it generates fairly good code in the *following* segment (not in the Xor256 |
431
|
|
|
|
|
|
|
* code itself). If the call is made, the code following the call is awful! |
432
|
|
|
|
|
|
|
* The penalty is nearly 50%! So we take the code size hit for inlining for |
433
|
|
|
|
|
|
|
* Borland, while Microsoft happily works with a call. |
434
|
|
|
|
|
|
|
* |
435
|
|
|
|
|
|
|
-****************************************************************************/ |
436
|
|
|
|
|
|
|
#if defined(__BORLANDC__) /* do it inline */ |
437
|
|
|
|
|
|
|
#define Xor32(dst,src,i) { ((DWORD *)dst)[i] = ((DWORD *)src)[i] ^ tmpX; } |
438
|
|
|
|
|
|
|
#define Xor256(dst,src,b) \ |
439
|
|
|
|
|
|
|
{ \ |
440
|
|
|
|
|
|
|
register DWORD tmpX=0x01010101u * b;\ |
441
|
|
|
|
|
|
|
for (i=0;i<64;i+=4) \ |
442
|
|
|
|
|
|
|
{ Xor32(dst,src,i ); Xor32(dst,src,i+1); Xor32(dst,src,i+2); Xor32(dst,src,i+3); } \ |
443
|
|
|
|
|
|
|
} |
444
|
|
|
|
|
|
|
#else /* do it as a function call */ |
445
|
224
|
|
|
|
|
|
static void Xor256(void *dst,void *src,BYTE b) |
446
|
|
|
|
|
|
|
{ |
447
|
224
|
|
|
|
|
|
register DWORD x=b*0x01010101u; /* replicate byte to all four bytes */ |
448
|
224
|
|
|
|
|
|
register DWORD *d=(DWORD *)dst; |
449
|
224
|
|
|
|
|
|
register DWORD *s=(DWORD *)src; |
450
|
|
|
|
|
|
|
#define X_8(N) { d[N]=s[N] ^ x; d[N+1]=s[N+1] ^ x; } |
451
|
|
|
|
|
|
|
#define X_32(N) { X_8(N); X_8(N+2); X_8(N+4); X_8(N+6); } |
452
|
224
|
|
|
|
|
|
X_32(0 ); X_32( 8); X_32(16); X_32(24); /* all inline */ |
453
|
224
|
|
|
|
|
|
d+=32; /* keep offsets small! */ |
454
|
224
|
|
|
|
|
|
s+=32; |
455
|
224
|
|
|
|
|
|
X_32(0 ); X_32( 8); X_32(16); X_32(24); /* all inline */ |
456
|
224
|
|
|
|
|
|
} |
457
|
|
|
|
|
|
|
#endif |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
/* |
460
|
|
|
|
|
|
|
+***************************************************************************** |
461
|
|
|
|
|
|
|
* |
462
|
|
|
|
|
|
|
* Function Name: reKey |
463
|
|
|
|
|
|
|
* |
464
|
|
|
|
|
|
|
* Function: Initialize the Twofish key schedule from key32 |
465
|
|
|
|
|
|
|
* |
466
|
|
|
|
|
|
|
* Arguments: key = ptr to keyInstance to be initialized |
467
|
|
|
|
|
|
|
* |
468
|
|
|
|
|
|
|
* Return: TRUE on success |
469
|
|
|
|
|
|
|
* |
470
|
|
|
|
|
|
|
* Notes: |
471
|
|
|
|
|
|
|
* Here we precompute all the round subkeys, although that is not actually |
472
|
|
|
|
|
|
|
* required. For example, on a smartcard, the round subkeys can |
473
|
|
|
|
|
|
|
* be generated on-the-fly using f32() |
474
|
|
|
|
|
|
|
* |
475
|
|
|
|
|
|
|
-****************************************************************************/ |
476
|
41
|
|
|
|
|
|
static int reKey(keyInstance *key) |
477
|
|
|
|
|
|
|
{ |
478
|
|
|
|
|
|
|
int i,j,k64Cnt,keyLen; |
479
|
|
|
|
|
|
|
int subkeyCnt; |
480
|
41
|
|
|
|
|
|
DWORD A=0,B=0,q; |
481
|
|
|
|
|
|
|
DWORD sKey[MAX_KEY_BITS/64],k32e[MAX_KEY_BITS/64],k32o[MAX_KEY_BITS/64]; |
482
|
|
|
|
|
|
|
BYTE L0[256],L1[256]; /* small local 8-bit permutations */ |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
#if VALIDATE_PARMS |
485
|
|
|
|
|
|
|
#if ALIGN32 |
486
|
|
|
|
|
|
|
if (((int)key) & 3) |
487
|
|
|
|
|
|
|
return BAD_ALIGN32; |
488
|
|
|
|
|
|
|
if ((key->keyLen % 64) || (key->keyLen < MIN_KEY_BITS)) |
489
|
|
|
|
|
|
|
return BAD_KEY_INSTANCE; |
490
|
|
|
|
|
|
|
#endif |
491
|
|
|
|
|
|
|
#endif |
492
|
|
|
|
|
|
|
|
493
|
41
|
100
|
|
|
|
|
if (needToBuildMDS) /* do this one time only */ |
494
|
1
|
|
|
|
|
|
BuildMDS(); |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
#define F32(res,x,k32) \ |
497
|
|
|
|
|
|
|
{ \ |
498
|
|
|
|
|
|
|
DWORD t=x; \ |
499
|
|
|
|
|
|
|
switch (k64Cnt & 3) \ |
500
|
|
|
|
|
|
|
{ \ |
501
|
|
|
|
|
|
|
case 0: /* same as 4 */ \ |
502
|
|
|
|
|
|
|
b0(t) = p8(04)[b0(t)] ^ b0(k32[3]); \ |
503
|
|
|
|
|
|
|
b1(t) = p8(14)[b1(t)] ^ b1(k32[3]); \ |
504
|
|
|
|
|
|
|
b2(t) = p8(24)[b2(t)] ^ b2(k32[3]); \ |
505
|
|
|
|
|
|
|
b3(t) = p8(34)[b3(t)] ^ b3(k32[3]); \ |
506
|
|
|
|
|
|
|
/* fall thru, having pre-processed t */ \ |
507
|
|
|
|
|
|
|
case 3: b0(t) = p8(03)[b0(t)] ^ b0(k32[2]); \ |
508
|
|
|
|
|
|
|
b1(t) = p8(13)[b1(t)] ^ b1(k32[2]); \ |
509
|
|
|
|
|
|
|
b2(t) = p8(23)[b2(t)] ^ b2(k32[2]); \ |
510
|
|
|
|
|
|
|
b3(t) = p8(33)[b3(t)] ^ b3(k32[2]); \ |
511
|
|
|
|
|
|
|
/* fall thru, having pre-processed t */ \ |
512
|
|
|
|
|
|
|
case 2: /* 128-bit keys (optimize for this case) */ \ |
513
|
|
|
|
|
|
|
res= MDStab[0][p8(01)[p8(02)[b0(t)] ^ b0(k32[1])] ^ b0(k32[0])] ^ \ |
514
|
|
|
|
|
|
|
MDStab[1][p8(11)[p8(12)[b1(t)] ^ b1(k32[1])] ^ b1(k32[0])] ^ \ |
515
|
|
|
|
|
|
|
MDStab[2][p8(21)[p8(22)[b2(t)] ^ b2(k32[1])] ^ b2(k32[0])] ^ \ |
516
|
|
|
|
|
|
|
MDStab[3][p8(31)[p8(32)[b3(t)] ^ b3(k32[1])] ^ b3(k32[0])] ; \ |
517
|
|
|
|
|
|
|
} \ |
518
|
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
#if !CHECK_TABLE |
522
|
|
|
|
|
|
|
#if defined(USE_ASM) /* only do this if not using assember */ |
523
|
|
|
|
|
|
|
if (!(useAsm & 4)) |
524
|
|
|
|
|
|
|
#endif |
525
|
|
|
|
|
|
|
#endif |
526
|
|
|
|
|
|
|
{ |
527
|
41
|
|
|
|
|
|
subkeyCnt = ROUND_SUBKEYS + 2*key->numRounds; |
528
|
41
|
|
|
|
|
|
keyLen=key->keyLen; |
529
|
41
|
|
|
|
|
|
k64Cnt=(keyLen+63)/64; /* number of 64-bit key words */ |
530
|
166
|
100
|
|
|
|
|
for (i=0,j=k64Cnt-1;i
|
531
|
|
|
|
|
|
|
{ /* split into even/odd key dwords */ |
532
|
125
|
|
|
|
|
|
k32e[i]=key->key32[2*i ]; |
533
|
125
|
|
|
|
|
|
k32o[i]=key->key32[2*i+1]; |
534
|
|
|
|
|
|
|
/* compute S-box keys using (12,8) Reed-Solomon code over GF(256) */ |
535
|
125
|
|
|
|
|
|
sKey[j]=key->sboxKeys[j]=RS_MDS_Encode(k32e[i],k32o[i]); /* reverse order */ |
536
|
|
|
|
|
|
|
} |
537
|
|
|
|
|
|
|
} |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
#ifdef USE_ASM |
540
|
|
|
|
|
|
|
if (useAsm & 4) |
541
|
|
|
|
|
|
|
{ |
542
|
|
|
|
|
|
|
#if defined(COMPILE_KEY) && defined(USE_ASM) |
543
|
|
|
|
|
|
|
key->keySig = VALID_SIG; /* show that we are initialized */ |
544
|
|
|
|
|
|
|
key->codeSize = sizeof(key->compiledCode); /* set size */ |
545
|
|
|
|
|
|
|
#endif |
546
|
|
|
|
|
|
|
reKey_86(key); |
547
|
|
|
|
|
|
|
} |
548
|
|
|
|
|
|
|
else |
549
|
|
|
|
|
|
|
#endif |
550
|
|
|
|
|
|
|
{ |
551
|
861
|
100
|
|
|
|
|
for (i=q=0;i
|
552
|
|
|
|
|
|
|
{ /* compute round subkeys for PHT */ |
553
|
820
|
|
|
|
|
|
F32(A,q ,k32e); /* A uses even key dwords */ |
554
|
820
|
|
|
|
|
|
F32(B,q+SK_BUMP,k32o); /* B uses odd key dwords */ |
555
|
820
|
|
|
|
|
|
B = ROL(B,8); |
556
|
820
|
|
|
|
|
|
key->subKeys[2*i ] = A+B; /* combine with a PHT */ |
557
|
820
|
|
|
|
|
|
B = A + 2*B; |
558
|
820
|
|
|
|
|
|
key->subKeys[2*i+1] = ROL(B,SK_ROTL); |
559
|
|
|
|
|
|
|
} |
560
|
|
|
|
|
|
|
#if !defined(ZERO_KEY) |
561
|
41
|
|
|
|
|
|
switch (keyLen) /* case out key length for speed in generating S-boxes */ |
562
|
|
|
|
|
|
|
{ |
563
|
|
|
|
|
|
|
case 128: |
564
|
|
|
|
|
|
|
#if defined(FULL_KEY) || defined(PART_KEY) |
565
|
|
|
|
|
|
|
#if BIG_TAB |
566
|
|
|
|
|
|
|
#define one128(N,J) sbSet(N,i,J,L0[i+J]) |
567
|
|
|
|
|
|
|
#define sb128(N) { \ |
568
|
|
|
|
|
|
|
BYTE *qq=bigTab[N][b##N(sKey[1])]; \ |
569
|
|
|
|
|
|
|
Xor256(L0,qq,b##N(sKey[0])); \ |
570
|
|
|
|
|
|
|
for (i=0;i<256;i+=2) { one128(N,0); one128(N,1); } } |
571
|
|
|
|
|
|
|
#else |
572
|
|
|
|
|
|
|
#define one128(N,J) sbSet(N,i,J,p8(N##1)[L0[i+J]]^k0) |
573
|
|
|
|
|
|
|
#define sb128(N) { \ |
574
|
|
|
|
|
|
|
Xor256(L0,p8(N##2),b##N(sKey[1])); \ |
575
|
|
|
|
|
|
|
{ register DWORD k0=b##N(sKey[0]); \ |
576
|
|
|
|
|
|
|
for (i=0;i<256;i+=2) { one128(N,0); one128(N,1); } } } |
577
|
|
|
|
|
|
|
#endif |
578
|
|
|
|
|
|
|
#elif defined(MIN_KEY) |
579
|
|
|
|
|
|
|
#define sb128(N) Xor256(_sBox8_(N),p8(N##2),b##N(sKey[1])) |
580
|
|
|
|
|
|
|
#endif |
581
|
6669
|
100
|
|
|
|
|
sb128(0); sb128(1); sb128(2); sb128(3); |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
582
|
13
|
|
|
|
|
|
break; |
583
|
|
|
|
|
|
|
case 192: |
584
|
|
|
|
|
|
|
#if defined(FULL_KEY) || defined(PART_KEY) |
585
|
|
|
|
|
|
|
#define one192(N,J) sbSet(N,i,J,p8(N##1)[p8(N##2)[L0[i+J]]^k1]^k0) |
586
|
|
|
|
|
|
|
#define sb192(N) { \ |
587
|
|
|
|
|
|
|
Xor256(L0,p8(N##3),b##N(sKey[2])); \ |
588
|
|
|
|
|
|
|
{ register DWORD k0=b##N(sKey[0]); \ |
589
|
|
|
|
|
|
|
register DWORD k1=b##N(sKey[1]); \ |
590
|
|
|
|
|
|
|
for (i=0;i<256;i+=2) { one192(N,0); one192(N,1); } } } |
591
|
|
|
|
|
|
|
#elif defined(MIN_KEY) |
592
|
|
|
|
|
|
|
#define one192(N,J) sbSet(N,i,J,p8(N##2)[L0[i+J]]^k1) |
593
|
|
|
|
|
|
|
#define sb192(N) { \ |
594
|
|
|
|
|
|
|
Xor256(L0,p8(N##3),b##N(sKey[2])); \ |
595
|
|
|
|
|
|
|
{ register DWORD k1=b##N(sKey[1]); \ |
596
|
|
|
|
|
|
|
for (i=0;i<256;i+=2) { one192(N,0); one192(N,1); } } } |
597
|
|
|
|
|
|
|
#endif |
598
|
6669
|
100
|
|
|
|
|
sb192(0); sb192(1); sb192(2); sb192(3); |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
599
|
13
|
|
|
|
|
|
break; |
600
|
|
|
|
|
|
|
case 256: |
601
|
|
|
|
|
|
|
#if defined(FULL_KEY) || defined(PART_KEY) |
602
|
|
|
|
|
|
|
#define one256(N,J) sbSet(N,i,J,p8(N##1)[p8(N##2)[L0[i+J]]^k1]^k0) |
603
|
|
|
|
|
|
|
#define sb256(N) { \ |
604
|
|
|
|
|
|
|
Xor256(L1,p8(N##4),b##N(sKey[3])); \ |
605
|
|
|
|
|
|
|
for (i=0;i<256;i+=2) {L0[i ]=p8(N##3)[L1[i]]; \ |
606
|
|
|
|
|
|
|
L0[i+1]=p8(N##3)[L1[i+1]]; } \ |
607
|
|
|
|
|
|
|
Xor256(L0,L0,b##N(sKey[2])); \ |
608
|
|
|
|
|
|
|
{ register DWORD k0=b##N(sKey[0]); \ |
609
|
|
|
|
|
|
|
register DWORD k1=b##N(sKey[1]); \ |
610
|
|
|
|
|
|
|
for (i=0;i<256;i+=2) { one256(N,0); one256(N,1); } } } |
611
|
|
|
|
|
|
|
#elif defined(MIN_KEY) |
612
|
|
|
|
|
|
|
#define one256(N,J) sbSet(N,i,J,p8(N##2)[L0[i+J]]^k1) |
613
|
|
|
|
|
|
|
#define sb256(N) { \ |
614
|
|
|
|
|
|
|
Xor256(L1,p8(N##4),b##N(sKey[3])); \ |
615
|
|
|
|
|
|
|
for (i=0;i<256;i+=2) {L0[i ]=p8(N##3)[L1[i]]; \ |
616
|
|
|
|
|
|
|
L0[i+1]=p8(N##3)[L1[i+1]]; } \ |
617
|
|
|
|
|
|
|
Xor256(L0,L0,b##N(sKey[2])); \ |
618
|
|
|
|
|
|
|
{ register DWORD k1=b##N(sKey[1]); \ |
619
|
|
|
|
|
|
|
for (i=0;i<256;i+=2) { one256(N,0); one256(N,1); } } } |
620
|
|
|
|
|
|
|
#endif |
621
|
15375
|
100
|
|
|
|
|
sb256(0); sb256(1); sb256(2); sb256(3); |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
622
|
15
|
|
|
|
|
|
break; |
623
|
|
|
|
|
|
|
} |
624
|
|
|
|
|
|
|
#endif |
625
|
|
|
|
|
|
|
} |
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
#if CHECK_TABLE /* sanity check vs. pedagogical code*/ |
628
|
|
|
|
|
|
|
{ |
629
|
|
|
|
|
|
|
GetSboxKey; |
630
|
|
|
|
|
|
|
for (i=0;i
|
631
|
|
|
|
|
|
|
{ |
632
|
|
|
|
|
|
|
A = f32(i*SK_STEP ,k32e,keyLen); /* A uses even key dwords */ |
633
|
|
|
|
|
|
|
B = f32(i*SK_STEP+SK_BUMP,k32o,keyLen); /* B uses odd key dwords */ |
634
|
|
|
|
|
|
|
B = ROL(B,8); |
635
|
|
|
|
|
|
|
assert(key->subKeys[2*i ] == A+ B); |
636
|
|
|
|
|
|
|
assert(key->subKeys[2*i+1] == ROL(A+2*B,SK_ROTL)); |
637
|
|
|
|
|
|
|
} |
638
|
|
|
|
|
|
|
#if !defined(ZERO_KEY) /* any S-boxes to check? */ |
639
|
|
|
|
|
|
|
for (i=q=0;i<256;i++,q+=0x01010101) |
640
|
|
|
|
|
|
|
assert(f32(q,key->sboxKeys,keyLen) == Fe32_(q,0)); |
641
|
|
|
|
|
|
|
#endif |
642
|
|
|
|
|
|
|
} |
643
|
|
|
|
|
|
|
#endif /* CHECK_TABLE */ |
644
|
|
|
|
|
|
|
|
645
|
41
|
50
|
|
|
|
|
if (key->direction == DIR_ENCRYPT) |
646
|
41
|
|
|
|
|
|
ReverseRoundSubkeys(key,DIR_ENCRYPT); /* reverse the round subkey order */ |
647
|
|
|
|
|
|
|
|
648
|
41
|
|
|
|
|
|
return TRUE; |
649
|
|
|
|
|
|
|
} |
650
|
|
|
|
|
|
|
/* |
651
|
|
|
|
|
|
|
+***************************************************************************** |
652
|
|
|
|
|
|
|
* |
653
|
|
|
|
|
|
|
* Function Name: makeKey |
654
|
|
|
|
|
|
|
* |
655
|
|
|
|
|
|
|
* Function: Initialize the Twofish key schedule |
656
|
|
|
|
|
|
|
* |
657
|
|
|
|
|
|
|
* Arguments: key = ptr to keyInstance to be initialized |
658
|
|
|
|
|
|
|
* direction = DIR_ENCRYPT or DIR_DECRYPT |
659
|
|
|
|
|
|
|
* keyLen = # bits of key text at *keyMaterial |
660
|
|
|
|
|
|
|
* keyMaterial = ptr to hex ASCII chars representing key bits |
661
|
|
|
|
|
|
|
* |
662
|
|
|
|
|
|
|
* Return: TRUE on success |
663
|
|
|
|
|
|
|
* else error code (e.g., BAD_KEY_DIR) |
664
|
|
|
|
|
|
|
* |
665
|
|
|
|
|
|
|
* Notes: This parses the key bits from keyMaterial. Zeroes out unused key bits |
666
|
|
|
|
|
|
|
* |
667
|
|
|
|
|
|
|
-****************************************************************************/ |
668
|
41
|
|
|
|
|
|
static int makeKey(keyInstance *key, BYTE direction, int keyLen,CONST char *keyMaterial) |
669
|
|
|
|
|
|
|
{ |
670
|
|
|
|
|
|
|
int i; |
671
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
#if VALIDATE_PARMS /* first, sanity check on parameters */ |
673
|
|
|
|
|
|
|
if (key == NULL) |
674
|
|
|
|
|
|
|
return BAD_KEY_INSTANCE;/* must have a keyInstance to initialize */ |
675
|
|
|
|
|
|
|
if ((direction != DIR_ENCRYPT) && (direction != DIR_DECRYPT)) |
676
|
|
|
|
|
|
|
return BAD_KEY_DIR; /* must have valid direction */ |
677
|
|
|
|
|
|
|
if ((keyLen > MAX_KEY_BITS) || (keyLen < 8) || (keyLen & 0x3F)) |
678
|
|
|
|
|
|
|
return BAD_KEY_MAT; /* length must be valid */ |
679
|
|
|
|
|
|
|
key->keySig = VALID_SIG; /* show that we are initialized */ |
680
|
|
|
|
|
|
|
#if ALIGN32 |
681
|
|
|
|
|
|
|
if ((((int)key) & 3) || (((int)key->key32) & 3)) |
682
|
|
|
|
|
|
|
return BAD_ALIGN32; |
683
|
|
|
|
|
|
|
#endif |
684
|
|
|
|
|
|
|
#endif |
685
|
|
|
|
|
|
|
|
686
|
41
|
|
|
|
|
|
key->direction = direction;/* set our cipher direction */ |
687
|
41
|
|
|
|
|
|
key->keyLen = (keyLen+63) & ~63; /* round up to multiple of 64 */ |
688
|
41
|
|
|
|
|
|
key->numRounds = numRounds[(keyLen-1)/64]; |
689
|
41
|
|
|
|
|
|
memset(key->key32,0,sizeof(key->key32)); /* zero unused bits */ |
690
|
|
|
|
|
|
|
|
691
|
41
|
50
|
|
|
|
|
if (keyMaterial == NULL) |
692
|
0
|
|
|
|
|
|
return TRUE; /* allow a "dummy" call */ |
693
|
|
|
|
|
|
|
|
694
|
291
|
100
|
|
|
|
|
for (i=0;i
|
695
|
500
|
|
|
|
|
|
key->key32[i] = (((unsigned char *)keyMaterial)[i*4+0] << 0) |
696
|
250
|
|
|
|
|
|
| (((unsigned char *)keyMaterial)[i*4+1] << 8) |
697
|
250
|
|
|
|
|
|
| (((unsigned char *)keyMaterial)[i*4+2] << 16) |
698
|
250
|
|
|
|
|
|
| (((unsigned char *)keyMaterial)[i*4+3] << 24); |
699
|
|
|
|
|
|
|
|
700
|
41
|
|
|
|
|
|
return reKey(key); /* generate round subkeys */ |
701
|
|
|
|
|
|
|
} |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
/* |
705
|
|
|
|
|
|
|
+***************************************************************************** |
706
|
|
|
|
|
|
|
* |
707
|
|
|
|
|
|
|
* Function Name: cipherInit |
708
|
|
|
|
|
|
|
* |
709
|
|
|
|
|
|
|
* Function: Initialize the Twofish cipher in a given mode |
710
|
|
|
|
|
|
|
* |
711
|
|
|
|
|
|
|
* Arguments: cipher = ptr to cipherInstance to be initialized |
712
|
|
|
|
|
|
|
* mode = MODE_ECB, MODE_CBC, or MODE_CFB1 |
713
|
|
|
|
|
|
|
* IV = ptr to hex ASCII test representing IV bytes |
714
|
|
|
|
|
|
|
* |
715
|
|
|
|
|
|
|
* Return: TRUE on success |
716
|
|
|
|
|
|
|
* else error code (e.g., BAD_CIPHER_MODE) |
717
|
|
|
|
|
|
|
* |
718
|
|
|
|
|
|
|
-****************************************************************************/ |
719
|
41
|
|
|
|
|
|
static int cipherInit(cipherInstance *cipher, BYTE mode,CONST char *IV) |
720
|
|
|
|
|
|
|
{ |
721
|
|
|
|
|
|
|
int i; |
722
|
|
|
|
|
|
|
#if VALIDATE_PARMS /* first, sanity check on parameters */ |
723
|
|
|
|
|
|
|
if (cipher == NULL) |
724
|
|
|
|
|
|
|
return BAD_PARAMS; /* must have a cipherInstance to initialize */ |
725
|
|
|
|
|
|
|
if ((mode != MODE_ECB) && (mode != MODE_CBC) && (mode != MODE_CFB1)) |
726
|
|
|
|
|
|
|
return BAD_CIPHER_MODE; /* must have valid cipher mode */ |
727
|
|
|
|
|
|
|
cipher->cipherSig = VALID_SIG; |
728
|
|
|
|
|
|
|
#if ALIGN32 |
729
|
|
|
|
|
|
|
if ((((int)cipher) & 3) || (((int)cipher->IV) & 3) || (((int)cipher->iv32) & 3)) |
730
|
|
|
|
|
|
|
return BAD_ALIGN32; |
731
|
|
|
|
|
|
|
#endif |
732
|
|
|
|
|
|
|
#endif |
733
|
|
|
|
|
|
|
|
734
|
41
|
100
|
|
|
|
|
if ((mode != MODE_ECB) && (IV)) /* parse the IV */ |
|
|
50
|
|
|
|
|
|
735
|
|
|
|
|
|
|
{ |
736
|
0
|
|
|
|
|
|
memcpy (cipher->iv32, IV, BLOCK_SIZE/32); |
737
|
0
|
0
|
|
|
|
|
for (i=0;i
|
738
|
0
|
|
|
|
|
|
((DWORD *)cipher->IV)[i] = Bswap(cipher->iv32[i]); |
739
|
|
|
|
|
|
|
} |
740
|
|
|
|
|
|
|
|
741
|
41
|
|
|
|
|
|
cipher->mode = mode; |
742
|
|
|
|
|
|
|
|
743
|
41
|
|
|
|
|
|
return TRUE; |
744
|
|
|
|
|
|
|
} |
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
/* |
747
|
|
|
|
|
|
|
+***************************************************************************** |
748
|
|
|
|
|
|
|
* |
749
|
|
|
|
|
|
|
* Function Name: blockEncrypt |
750
|
|
|
|
|
|
|
* |
751
|
|
|
|
|
|
|
* Function: Encrypt block(s) of data using Twofish |
752
|
|
|
|
|
|
|
* |
753
|
|
|
|
|
|
|
* Arguments: cipher = ptr to already initialized cipherInstance |
754
|
|
|
|
|
|
|
* key = ptr to already initialized keyInstance |
755
|
|
|
|
|
|
|
* input = ptr to data blocks to be encrypted |
756
|
|
|
|
|
|
|
* inputLen = # bits to encrypt (multiple of blockSize) |
757
|
|
|
|
|
|
|
* outBuffer = ptr to where to put encrypted blocks |
758
|
|
|
|
|
|
|
* |
759
|
|
|
|
|
|
|
* Return: # bits ciphered (>= 0) |
760
|
|
|
|
|
|
|
* else error code (e.g., BAD_CIPHER_STATE, BAD_KEY_MATERIAL) |
761
|
|
|
|
|
|
|
* |
762
|
|
|
|
|
|
|
* Notes: The only supported block size for ECB/CBC modes is BLOCK_SIZE bits. |
763
|
|
|
|
|
|
|
* If inputLen is not a multiple of BLOCK_SIZE bits in those modes, |
764
|
|
|
|
|
|
|
* an error BAD_INPUT_LEN is returned. In CFB1 mode, all block |
765
|
|
|
|
|
|
|
* sizes can be supported. |
766
|
|
|
|
|
|
|
* |
767
|
|
|
|
|
|
|
-****************************************************************************/ |
768
|
41
|
|
|
|
|
|
static int blockEncrypt(cipherInstance *cipher, keyInstance *key,CONST BYTE *input, |
769
|
|
|
|
|
|
|
int inputLen, BYTE *outBuffer) |
770
|
|
|
|
|
|
|
{ |
771
|
|
|
|
|
|
|
int i,n; /* loop counters */ |
772
|
|
|
|
|
|
|
DWORD x[BLOCK_SIZE/32]; /* block being encrypted */ |
773
|
|
|
|
|
|
|
DWORD t0,t1; /* temp variables */ |
774
|
41
|
|
|
|
|
|
int rounds=key->numRounds; /* number of rounds */ |
775
|
|
|
|
|
|
|
BYTE bit,bit0,ctBit,carry; /* temps for CFB */ |
776
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
/* make local copies of things for faster access */ |
778
|
41
|
|
|
|
|
|
int mode = cipher->mode; |
779
|
|
|
|
|
|
|
DWORD sk[TOTAL_SUBKEYS]; |
780
|
|
|
|
|
|
|
DWORD IV[BLOCK_SIZE/32]; |
781
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
GetSboxKey; |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
#if VALIDATE_PARMS |
785
|
|
|
|
|
|
|
if ((cipher == NULL) || (cipher->cipherSig != VALID_SIG)) |
786
|
|
|
|
|
|
|
return BAD_CIPHER_STATE; |
787
|
|
|
|
|
|
|
if ((key == NULL) || (key->keySig != VALID_SIG)) |
788
|
|
|
|
|
|
|
return BAD_KEY_INSTANCE; |
789
|
|
|
|
|
|
|
if ((rounds < 2) || (rounds > MAX_ROUNDS) || (rounds&1)) |
790
|
|
|
|
|
|
|
return BAD_KEY_INSTANCE; |
791
|
|
|
|
|
|
|
if ((mode != MODE_CFB1) && (inputLen % BLOCK_SIZE)) |
792
|
|
|
|
|
|
|
return BAD_INPUT_LEN; |
793
|
|
|
|
|
|
|
#if ALIGN32 |
794
|
|
|
|
|
|
|
if ( (((int)cipher) & 3) || (((int)key ) & 3) || |
795
|
|
|
|
|
|
|
(((int)input ) & 3) || (((int)outBuffer) & 3)) |
796
|
|
|
|
|
|
|
return BAD_ALIGN32; |
797
|
|
|
|
|
|
|
#endif |
798
|
|
|
|
|
|
|
#endif |
799
|
|
|
|
|
|
|
|
800
|
41
|
50
|
|
|
|
|
if (mode == MODE_CFB1) |
801
|
|
|
|
|
|
|
{ /* use recursion here to handle CFB, one block at a time */ |
802
|
0
|
|
|
|
|
|
cipher->mode = MODE_ECB; /* do encryption in ECB */ |
803
|
0
|
0
|
|
|
|
|
for (n=0;n
|
804
|
|
|
|
|
|
|
{ |
805
|
0
|
|
|
|
|
|
blockEncrypt(cipher,key,cipher->IV,BLOCK_SIZE,(BYTE *)x); |
806
|
0
|
|
|
|
|
|
bit0 = 0x80 >> (n & 7);/* which bit position in byte */ |
807
|
0
|
|
|
|
|
|
ctBit = (input[n/8] & bit0) ^ ((((BYTE *) x)[0] & 0x80) >> (n&7)); |
808
|
0
|
|
|
|
|
|
outBuffer[n/8] = (outBuffer[n/8] & ~ bit0) | ctBit; |
809
|
0
|
|
|
|
|
|
carry = ctBit >> (7 - (n&7)); |
810
|
0
|
0
|
|
|
|
|
for (i=BLOCK_SIZE/8-1;i>=0;i--) |
811
|
|
|
|
|
|
|
{ |
812
|
0
|
|
|
|
|
|
bit = cipher->IV[i] >> 7; /* save next "carry" from shift */ |
813
|
0
|
|
|
|
|
|
cipher->IV[i] = (cipher->IV[i] << 1) ^ carry; |
814
|
0
|
|
|
|
|
|
carry = bit; |
815
|
|
|
|
|
|
|
} |
816
|
|
|
|
|
|
|
} |
817
|
0
|
|
|
|
|
|
cipher->mode = MODE_CFB1; /* restore mode for next time */ |
818
|
0
|
|
|
|
|
|
return inputLen; |
819
|
|
|
|
|
|
|
} |
820
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
/* here for ECB, CBC modes */ |
822
|
41
|
50
|
|
|
|
|
if (key->direction != DIR_ENCRYPT) |
823
|
0
|
|
|
|
|
|
ReverseRoundSubkeys(key,DIR_ENCRYPT); /* reverse the round subkey order */ |
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
#ifdef USE_ASM |
826
|
|
|
|
|
|
|
if ((useAsm & 1) && (inputLen)) |
827
|
|
|
|
|
|
|
#ifdef COMPILE_KEY |
828
|
|
|
|
|
|
|
if (key->keySig == VALID_SIG) |
829
|
|
|
|
|
|
|
return ((CipherProc *)(key->encryptFuncPtr))(cipher,key,input,inputLen,outBuffer); |
830
|
|
|
|
|
|
|
#else |
831
|
|
|
|
|
|
|
return (*blockEncrypt_86)(cipher,key,input,inputLen,outBuffer); |
832
|
|
|
|
|
|
|
#endif |
833
|
|
|
|
|
|
|
#endif |
834
|
|
|
|
|
|
|
/* make local copy of subkeys for speed */ |
835
|
41
|
|
|
|
|
|
memcpy(sk,key->subKeys,sizeof(DWORD)*(ROUND_SUBKEYS+2*rounds)); |
836
|
41
|
100
|
|
|
|
|
if (mode == MODE_CBC) |
837
|
2
|
|
|
|
|
|
BlockCopy(IV,cipher->iv32) |
838
|
|
|
|
|
|
|
else |
839
|
39
|
|
|
|
|
|
IV[0]=IV[1]=IV[2]=IV[3]=0; |
840
|
|
|
|
|
|
|
|
841
|
84
|
100
|
|
|
|
|
for (n=0;n
|
842
|
|
|
|
|
|
|
{ |
843
|
|
|
|
|
|
|
#define LoadBlockE(N) x[N]=Bswap(((DWORD *)input)[N]) ^ sk[INPUT_WHITEN+N] ^ IV[N] |
844
|
43
|
|
|
|
|
|
LoadBlockE(0); LoadBlockE(1); LoadBlockE(2); LoadBlockE(3); |
845
|
|
|
|
|
|
|
#define EncryptRound(K,R,id) \ |
846
|
|
|
|
|
|
|
t0 = Fe32##id(x[K ],0); \ |
847
|
|
|
|
|
|
|
t1 = Fe32##id(x[K^1],3); \ |
848
|
|
|
|
|
|
|
x[K^3] = ROL(x[K^3],1); \ |
849
|
|
|
|
|
|
|
x[K^2]^= t0 + t1 + sk[ROUND_SUBKEYS+2*(R) ]; \ |
850
|
|
|
|
|
|
|
x[K^3]^= t0 + 2*t1 + sk[ROUND_SUBKEYS+2*(R)+1]; \ |
851
|
|
|
|
|
|
|
x[K^2] = ROR(x[K^2],1); |
852
|
|
|
|
|
|
|
#define Encrypt2(R,id) { EncryptRound(0,R+1,id); EncryptRound(2,R,id); } |
853
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
#if defined(ZERO_KEY) |
855
|
|
|
|
|
|
|
switch (key->keyLen) |
856
|
|
|
|
|
|
|
{ |
857
|
|
|
|
|
|
|
case 128: |
858
|
|
|
|
|
|
|
for (i=rounds-2;i>=0;i-=2) |
859
|
|
|
|
|
|
|
Encrypt2(i,_128); |
860
|
|
|
|
|
|
|
break; |
861
|
|
|
|
|
|
|
case 192: |
862
|
|
|
|
|
|
|
for (i=rounds-2;i>=0;i-=2) |
863
|
|
|
|
|
|
|
Encrypt2(i,_192); |
864
|
|
|
|
|
|
|
break; |
865
|
|
|
|
|
|
|
case 256: |
866
|
|
|
|
|
|
|
for (i=rounds-2;i>=0;i-=2) |
867
|
|
|
|
|
|
|
Encrypt2(i,_256); |
868
|
|
|
|
|
|
|
break; |
869
|
|
|
|
|
|
|
} |
870
|
|
|
|
|
|
|
#else |
871
|
43
|
|
|
|
|
|
Encrypt2(14,_); |
872
|
43
|
|
|
|
|
|
Encrypt2(12,_); |
873
|
43
|
|
|
|
|
|
Encrypt2(10,_); |
874
|
43
|
|
|
|
|
|
Encrypt2( 8,_); |
875
|
43
|
|
|
|
|
|
Encrypt2( 6,_); |
876
|
43
|
|
|
|
|
|
Encrypt2( 4,_); |
877
|
43
|
|
|
|
|
|
Encrypt2( 2,_); |
878
|
43
|
|
|
|
|
|
Encrypt2( 0,_); |
879
|
|
|
|
|
|
|
#endif |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
/* need to do (or undo, depending on your point of view) final swap */ |
882
|
|
|
|
|
|
|
#if LittleEndian |
883
|
|
|
|
|
|
|
#define StoreBlockE(N) ((DWORD *)outBuffer)[N]=x[N^2] ^ sk[OUTPUT_WHITEN+N] |
884
|
|
|
|
|
|
|
#else |
885
|
|
|
|
|
|
|
#define StoreBlockE(N) { t0=x[N^2] ^ sk[OUTPUT_WHITEN+N]; ((DWORD *)outBuffer)[N]=Bswap(t0); } |
886
|
|
|
|
|
|
|
#endif |
887
|
43
|
|
|
|
|
|
StoreBlockE(0); StoreBlockE(1); StoreBlockE(2); StoreBlockE(3); |
888
|
43
|
100
|
|
|
|
|
if (mode == MODE_CBC) |
889
|
|
|
|
|
|
|
{ |
890
|
4
|
|
|
|
|
|
IV[0]=Bswap(((DWORD *)outBuffer)[0]); |
891
|
4
|
|
|
|
|
|
IV[1]=Bswap(((DWORD *)outBuffer)[1]); |
892
|
4
|
|
|
|
|
|
IV[2]=Bswap(((DWORD *)outBuffer)[2]); |
893
|
4
|
|
|
|
|
|
IV[3]=Bswap(((DWORD *)outBuffer)[3]); |
894
|
|
|
|
|
|
|
} |
895
|
|
|
|
|
|
|
} |
896
|
|
|
|
|
|
|
|
897
|
41
|
100
|
|
|
|
|
if (mode == MODE_CBC) |
898
|
2
|
|
|
|
|
|
BlockCopy(cipher->iv32,IV); |
899
|
|
|
|
|
|
|
|
900
|
41
|
|
|
|
|
|
return inputLen; |
901
|
|
|
|
|
|
|
} |
902
|
|
|
|
|
|
|
|
903
|
|
|
|
|
|
|
/* |
904
|
|
|
|
|
|
|
+***************************************************************************** |
905
|
|
|
|
|
|
|
* |
906
|
|
|
|
|
|
|
* Function Name: blockDecrypt |
907
|
|
|
|
|
|
|
* |
908
|
|
|
|
|
|
|
* Function: Decrypt block(s) of data using Twofish |
909
|
|
|
|
|
|
|
* |
910
|
|
|
|
|
|
|
* Arguments: cipher = ptr to already initialized cipherInstance |
911
|
|
|
|
|
|
|
* key = ptr to already initialized keyInstance |
912
|
|
|
|
|
|
|
* input = ptr to data blocks to be decrypted |
913
|
|
|
|
|
|
|
* inputLen = # bits to encrypt (multiple of blockSize) |
914
|
|
|
|
|
|
|
* outBuffer = ptr to where to put decrypted blocks |
915
|
|
|
|
|
|
|
* |
916
|
|
|
|
|
|
|
* Return: # bits ciphered (>= 0) |
917
|
|
|
|
|
|
|
* else error code (e.g., BAD_CIPHER_STATE, BAD_KEY_MATERIAL) |
918
|
|
|
|
|
|
|
* |
919
|
|
|
|
|
|
|
* Notes: The only supported block size for ECB/CBC modes is BLOCK_SIZE bits. |
920
|
|
|
|
|
|
|
* If inputLen is not a multiple of BLOCK_SIZE bits in those modes, |
921
|
|
|
|
|
|
|
* an error BAD_INPUT_LEN is returned. In CFB1 mode, all block |
922
|
|
|
|
|
|
|
* sizes can be supported. |
923
|
|
|
|
|
|
|
* |
924
|
|
|
|
|
|
|
-****************************************************************************/ |
925
|
41
|
|
|
|
|
|
static int blockDecrypt(cipherInstance *cipher, keyInstance *key,CONST BYTE *input, |
926
|
|
|
|
|
|
|
int inputLen, BYTE *outBuffer) |
927
|
|
|
|
|
|
|
{ |
928
|
|
|
|
|
|
|
int i,n; /* loop counters */ |
929
|
|
|
|
|
|
|
DWORD x[BLOCK_SIZE/32]; /* block being encrypted */ |
930
|
|
|
|
|
|
|
DWORD t0,t1; /* temp variables */ |
931
|
41
|
|
|
|
|
|
int rounds=key->numRounds; /* number of rounds */ |
932
|
|
|
|
|
|
|
BYTE bit,bit0,ctBit,carry; /* temps for CFB */ |
933
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
/* make local copies of things for faster access */ |
935
|
41
|
|
|
|
|
|
int mode = cipher->mode; |
936
|
|
|
|
|
|
|
DWORD sk[TOTAL_SUBKEYS]; |
937
|
|
|
|
|
|
|
DWORD IV[BLOCK_SIZE/32]; |
938
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
GetSboxKey; |
940
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
#if VALIDATE_PARMS |
942
|
|
|
|
|
|
|
if ((cipher == NULL) || (cipher->cipherSig != VALID_SIG)) |
943
|
|
|
|
|
|
|
return BAD_CIPHER_STATE; |
944
|
|
|
|
|
|
|
if ((key == NULL) || (key->keySig != VALID_SIG)) |
945
|
|
|
|
|
|
|
return BAD_KEY_INSTANCE; |
946
|
|
|
|
|
|
|
if ((rounds < 2) || (rounds > MAX_ROUNDS) || (rounds&1)) |
947
|
|
|
|
|
|
|
return BAD_KEY_INSTANCE; |
948
|
|
|
|
|
|
|
if ((cipher->mode != MODE_CFB1) && (inputLen % BLOCK_SIZE)) |
949
|
|
|
|
|
|
|
return BAD_INPUT_LEN; |
950
|
|
|
|
|
|
|
#if ALIGN32 |
951
|
|
|
|
|
|
|
if ( (((int)cipher) & 3) || (((int)key ) & 3) || |
952
|
|
|
|
|
|
|
(((int)input) & 3) || (((int)outBuffer) & 3)) |
953
|
|
|
|
|
|
|
return BAD_ALIGN32; |
954
|
|
|
|
|
|
|
#endif |
955
|
|
|
|
|
|
|
#endif |
956
|
|
|
|
|
|
|
|
957
|
41
|
50
|
|
|
|
|
if (cipher->mode == MODE_CFB1) |
958
|
|
|
|
|
|
|
{ /* use blockEncrypt here to handle CFB, one block at a time */ |
959
|
0
|
|
|
|
|
|
cipher->mode = MODE_ECB; /* do encryption in ECB */ |
960
|
0
|
0
|
|
|
|
|
for (n=0;n
|
961
|
|
|
|
|
|
|
{ |
962
|
0
|
|
|
|
|
|
blockEncrypt(cipher,key,cipher->IV,BLOCK_SIZE,(BYTE *)x); |
963
|
0
|
|
|
|
|
|
bit0 = 0x80 >> (n & 7); |
964
|
0
|
|
|
|
|
|
ctBit = input[n/8] & bit0; |
965
|
0
|
|
|
|
|
|
outBuffer[n/8] = (outBuffer[n/8] & ~ bit0) | |
966
|
0
|
|
|
|
|
|
(ctBit ^ ((((BYTE *) x)[0] & 0x80) >> (n&7))); |
967
|
0
|
|
|
|
|
|
carry = ctBit >> (7 - (n&7)); |
968
|
0
|
0
|
|
|
|
|
for (i=BLOCK_SIZE/8-1;i>=0;i--) |
969
|
|
|
|
|
|
|
{ |
970
|
0
|
|
|
|
|
|
bit = cipher->IV[i] >> 7; /* save next "carry" from shift */ |
971
|
0
|
|
|
|
|
|
cipher->IV[i] = (cipher->IV[i] << 1) ^ carry; |
972
|
0
|
|
|
|
|
|
carry = bit; |
973
|
|
|
|
|
|
|
} |
974
|
|
|
|
|
|
|
} |
975
|
0
|
|
|
|
|
|
cipher->mode = MODE_CFB1; /* restore mode for next time */ |
976
|
0
|
|
|
|
|
|
return inputLen; |
977
|
|
|
|
|
|
|
} |
978
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
/* here for ECB, CBC modes */ |
980
|
41
|
100
|
|
|
|
|
if (key->direction != DIR_DECRYPT) |
981
|
40
|
|
|
|
|
|
ReverseRoundSubkeys(key,DIR_DECRYPT); /* reverse the round subkey order */ |
982
|
|
|
|
|
|
|
#ifdef USE_ASM |
983
|
|
|
|
|
|
|
if ((useAsm & 2) && (inputLen)) |
984
|
|
|
|
|
|
|
#ifdef COMPILE_KEY |
985
|
|
|
|
|
|
|
if (key->keySig == VALID_SIG) |
986
|
|
|
|
|
|
|
return ((CipherProc *)(key->decryptFuncPtr))(cipher,key,input,inputLen,outBuffer); |
987
|
|
|
|
|
|
|
#else |
988
|
|
|
|
|
|
|
return (*blockDecrypt_86)(cipher,key,input,inputLen,outBuffer); |
989
|
|
|
|
|
|
|
#endif |
990
|
|
|
|
|
|
|
#endif |
991
|
|
|
|
|
|
|
/* make local copy of subkeys for speed */ |
992
|
41
|
|
|
|
|
|
memcpy(sk,key->subKeys,sizeof(DWORD)*(ROUND_SUBKEYS+2*rounds)); |
993
|
41
|
100
|
|
|
|
|
if (mode == MODE_CBC) |
994
|
2
|
|
|
|
|
|
BlockCopy(IV,cipher->iv32) |
995
|
|
|
|
|
|
|
else |
996
|
39
|
|
|
|
|
|
IV[0]=IV[1]=IV[2]=IV[3]=0; |
997
|
|
|
|
|
|
|
|
998
|
84
|
100
|
|
|
|
|
for (n=0;n
|
999
|
|
|
|
|
|
|
{ |
1000
|
|
|
|
|
|
|
#define LoadBlockD(N) x[N^2]=Bswap(((DWORD *)input)[N]) ^ sk[OUTPUT_WHITEN+N] |
1001
|
43
|
|
|
|
|
|
LoadBlockD(0); LoadBlockD(1); LoadBlockD(2); LoadBlockD(3); |
1002
|
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
#define DecryptRound(K,R,id) \ |
1004
|
|
|
|
|
|
|
t0 = Fe32##id(x[K ],0); \ |
1005
|
|
|
|
|
|
|
t1 = Fe32##id(x[K^1],3); \ |
1006
|
|
|
|
|
|
|
x[K^2] = ROL (x[K^2],1); \ |
1007
|
|
|
|
|
|
|
x[K^2]^= t0 + t1 + sk[ROUND_SUBKEYS+2*(R) ]; \ |
1008
|
|
|
|
|
|
|
x[K^3]^= t0 + 2*t1 + sk[ROUND_SUBKEYS+2*(R)+1]; \ |
1009
|
|
|
|
|
|
|
x[K^3] = ROR (x[K^3],1); |
1010
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
#define Decrypt2(R,id) { DecryptRound(2,R+1,id); DecryptRound(0,R,id); } |
1012
|
|
|
|
|
|
|
|
1013
|
|
|
|
|
|
|
#if defined(ZERO_KEY) |
1014
|
|
|
|
|
|
|
switch (key->keyLen) |
1015
|
|
|
|
|
|
|
{ |
1016
|
|
|
|
|
|
|
case 128: |
1017
|
|
|
|
|
|
|
for (i=rounds-2;i>=0;i-=2) |
1018
|
|
|
|
|
|
|
Decrypt2(i,_128); |
1019
|
|
|
|
|
|
|
break; |
1020
|
|
|
|
|
|
|
case 192: |
1021
|
|
|
|
|
|
|
for (i=rounds-2;i>=0;i-=2) |
1022
|
|
|
|
|
|
|
Decrypt2(i,_192); |
1023
|
|
|
|
|
|
|
break; |
1024
|
|
|
|
|
|
|
case 256: |
1025
|
|
|
|
|
|
|
for (i=rounds-2;i>=0;i-=2) |
1026
|
|
|
|
|
|
|
Decrypt2(i,_256); |
1027
|
|
|
|
|
|
|
break; |
1028
|
|
|
|
|
|
|
} |
1029
|
|
|
|
|
|
|
#else |
1030
|
|
|
|
|
|
|
{ |
1031
|
43
|
|
|
|
|
|
Decrypt2(14,_); |
1032
|
43
|
|
|
|
|
|
Decrypt2(12,_); |
1033
|
43
|
|
|
|
|
|
Decrypt2(10,_); |
1034
|
43
|
|
|
|
|
|
Decrypt2( 8,_); |
1035
|
43
|
|
|
|
|
|
Decrypt2( 6,_); |
1036
|
43
|
|
|
|
|
|
Decrypt2( 4,_); |
1037
|
43
|
|
|
|
|
|
Decrypt2( 2,_); |
1038
|
43
|
|
|
|
|
|
Decrypt2( 0,_); |
1039
|
|
|
|
|
|
|
} |
1040
|
|
|
|
|
|
|
#endif |
1041
|
43
|
100
|
|
|
|
|
if (cipher->mode == MODE_ECB) |
1042
|
|
|
|
|
|
|
{ |
1043
|
|
|
|
|
|
|
#if LittleEndian |
1044
|
|
|
|
|
|
|
#define StoreBlockD(N) ((DWORD *)outBuffer)[N] = x[N] ^ sk[INPUT_WHITEN+N] |
1045
|
|
|
|
|
|
|
#else |
1046
|
|
|
|
|
|
|
#define StoreBlockD(N) { t0=x[N]^sk[INPUT_WHITEN+N]; ((DWORD *)outBuffer)[N] = Bswap(t0); } |
1047
|
|
|
|
|
|
|
#endif |
1048
|
39
|
|
|
|
|
|
StoreBlockD(0); StoreBlockD(1); StoreBlockD(2); StoreBlockD(3); |
1049
|
|
|
|
|
|
|
#undef StoreBlockD |
1050
|
39
|
|
|
|
|
|
continue; |
1051
|
|
|
|
|
|
|
} |
1052
|
|
|
|
|
|
|
else |
1053
|
|
|
|
|
|
|
{ |
1054
|
|
|
|
|
|
|
#define StoreBlockD(N) x[N] ^= sk[INPUT_WHITEN+N] ^ IV[N]; \ |
1055
|
|
|
|
|
|
|
IV[N] = Bswap(((DWORD *)input)[N]); \ |
1056
|
|
|
|
|
|
|
((DWORD *)outBuffer)[N] = Bswap(x[N]); |
1057
|
4
|
|
|
|
|
|
StoreBlockD(0); StoreBlockD(1); StoreBlockD(2); StoreBlockD(3); |
1058
|
|
|
|
|
|
|
#undef StoreBlockD |
1059
|
|
|
|
|
|
|
} |
1060
|
|
|
|
|
|
|
} |
1061
|
41
|
100
|
|
|
|
|
if (mode == MODE_CBC) /* restore iv32 to cipher */ |
1062
|
2
|
|
|
|
|
|
BlockCopy(cipher->iv32,IV) |
1063
|
|
|
|
|
|
|
|
1064
|
41
|
|
|
|
|
|
return inputLen; |
1065
|
|
|
|
|
|
|
} |
1066
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
#ifdef GetCodeSize |
1068
|
|
|
|
|
|
|
static DWORD TwofishCodeSize(void) |
1069
|
|
|
|
|
|
|
{ |
1070
|
|
|
|
|
|
|
DWORD x= Here(0); |
1071
|
|
|
|
|
|
|
#ifdef USE_ASM |
1072
|
|
|
|
|
|
|
if (useAsm & 3) |
1073
|
|
|
|
|
|
|
return TwofishAsmCodeSize(); |
1074
|
|
|
|
|
|
|
#endif |
1075
|
|
|
|
|
|
|
return x - TwofishCodeStart(); |
1076
|
|
|
|
|
|
|
}; |
1077
|
|
|
|
|
|
|
#endif |