| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
/*************************************************************************** |
|
2
|
|
|
|
|
|
|
TWOFISH2.C -- Optimized C API calls for TWOFISH AES submission |
|
3
|
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
Submitters: |
|
5
|
|
|
|
|
|
|
Bruce Schneier, Counterpane Systems |
|
6
|
|
|
|
|
|
|
Doug Whiting, Hi/fn |
|
7
|
|
|
|
|
|
|
John Kelsey, Counterpane Systems |
|
8
|
|
|
|
|
|
|
Chris Hall, Counterpane Systems |
|
9
|
|
|
|
|
|
|
David Wagner, UC Berkeley |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
Code Author: Doug Whiting, Hi/fn |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
Version 1.00 April 1998 |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
Copyright 1998, Hi/fn and Counterpane Systems. All rights reserved. |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Notes: |
|
18
|
|
|
|
|
|
|
* Optimized version |
|
19
|
|
|
|
|
|
|
* Tab size is set to 4 characters in this file |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
***************************************************************************/ |
|
22
|
|
|
|
|
|
|
#include "aes.h" |
|
23
|
|
|
|
|
|
|
#include "table.h" |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
#include |
|
26
|
|
|
|
|
|
|
/*#include */ |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
#if defined(min_key) && !defined(MIN_KEY) |
|
29
|
|
|
|
|
|
|
#define MIN_KEY 1 /* toupper() */ |
|
30
|
|
|
|
|
|
|
#elif defined(part_key) && !defined(PART_KEY) |
|
31
|
|
|
|
|
|
|
#define PART_KEY 1 |
|
32
|
|
|
|
|
|
|
#elif defined(zero_key) && !defined(ZERO_KEY) |
|
33
|
|
|
|
|
|
|
#define ZERO_KEY 1 |
|
34
|
|
|
|
|
|
|
#endif |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
#ifdef USE_ASM |
|
38
|
|
|
|
|
|
|
extern int useAsm; /* ok to use ASM code? */ |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
typedef int cdecl CipherProc |
|
41
|
|
|
|
|
|
|
(cipherInstance *cipher, keyInstance *key,BYTE *input,int inputLen,BYTE *outBuffer); |
|
42
|
|
|
|
|
|
|
typedef int cdecl KeySetupProc(keyInstance *key); |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
extern CipherProc *blockEncrypt_86; /* ptr to ASM functions */ |
|
45
|
|
|
|
|
|
|
extern CipherProc *blockDecrypt_86; |
|
46
|
|
|
|
|
|
|
extern KeySetupProc *reKey_86; |
|
47
|
|
|
|
|
|
|
extern DWORD cdecl TwofishAsmCodeSize(void); |
|
48
|
|
|
|
|
|
|
#endif |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
/* |
|
51
|
|
|
|
|
|
|
+***************************************************************************** |
|
52
|
|
|
|
|
|
|
* Constants/Macros/Tables |
|
53
|
|
|
|
|
|
|
-****************************************************************************/ |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
#define CONST /* help syntax from C++, NOP here */ |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
static CONST fullSbox MDStab; /* not actually const. Initialized ONE time */ |
|
58
|
|
|
|
|
|
|
static int needToBuildMDS=1; /* is MDStab initialized yet? */ |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
#define BIG_TAB 0 |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
#if BIG_TAB |
|
63
|
|
|
|
|
|
|
static BYTE bigTab[4][256][256]; /* pre-computed S-box */ |
|
64
|
|
|
|
|
|
|
#endif |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
/* number of rounds for various key sizes: 128, 192, 256 */ |
|
67
|
|
|
|
|
|
|
/* (ignored for now in optimized code!) */ |
|
68
|
|
|
|
|
|
|
static CONST int numRounds[4]= {0,ROUNDS_128,ROUNDS_192,ROUNDS_256}; |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
#if REENTRANT |
|
71
|
|
|
|
|
|
|
#define _sBox_ key->sBox8x32 |
|
72
|
|
|
|
|
|
|
#else |
|
73
|
|
|
|
|
|
|
static fullSbox _sBox_; /* permuted MDStab based on keys */ |
|
74
|
|
|
|
|
|
|
#endif |
|
75
|
|
|
|
|
|
|
#define _sBox8_(N) (((BYTE *) _sBox_) + (N)*256) |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
/*------- see what level of S-box precomputation we need to do -----*/ |
|
78
|
|
|
|
|
|
|
#if defined(ZERO_KEY) |
|
79
|
|
|
|
|
|
|
#define MOD_STRING "(Zero S-box keying)" |
|
80
|
|
|
|
|
|
|
#define Fe32_128(x,R) \ |
|
81
|
|
|
|
|
|
|
( MDStab[0][p8(01)[p8(02)[_b(x,R )]^b0(SKEY[1])]^b0(SKEY[0])] ^ \ |
|
82
|
|
|
|
|
|
|
MDStab[1][p8(11)[p8(12)[_b(x,R+1)]^b1(SKEY[1])]^b1(SKEY[0])] ^ \ |
|
83
|
|
|
|
|
|
|
MDStab[2][p8(21)[p8(22)[_b(x,R+2)]^b2(SKEY[1])]^b2(SKEY[0])] ^ \ |
|
84
|
|
|
|
|
|
|
MDStab[3][p8(31)[p8(32)[_b(x,R+3)]^b3(SKEY[1])]^b3(SKEY[0])] ) |
|
85
|
|
|
|
|
|
|
#define Fe32_192(x,R) \ |
|
86
|
|
|
|
|
|
|
( MDStab[0][p8(01)[p8(02)[p8(03)[_b(x,R )]^b0(SKEY[2])]^b0(SKEY[1])]^b0(SKEY[0])] ^ \ |
|
87
|
|
|
|
|
|
|
MDStab[1][p8(11)[p8(12)[p8(13)[_b(x,R+1)]^b1(SKEY[2])]^b1(SKEY[1])]^b1(SKEY[0])] ^ \ |
|
88
|
|
|
|
|
|
|
MDStab[2][p8(21)[p8(22)[p8(23)[_b(x,R+2)]^b2(SKEY[2])]^b2(SKEY[1])]^b2(SKEY[0])] ^ \ |
|
89
|
|
|
|
|
|
|
MDStab[3][p8(31)[p8(32)[p8(33)[_b(x,R+3)]^b3(SKEY[2])]^b3(SKEY[1])]^b3(SKEY[0])] ) |
|
90
|
|
|
|
|
|
|
#define Fe32_256(x,R) \ |
|
91
|
|
|
|
|
|
|
( MDStab[0][p8(01)[p8(02)[p8(03)[p8(04)[_b(x,R )]^b0(SKEY[3])]^b0(SKEY[2])]^b0(SKEY[1])]^b0(SKEY[0])] ^ \ |
|
92
|
|
|
|
|
|
|
MDStab[1][p8(11)[p8(12)[p8(13)[p8(14)[_b(x,R+1)]^b1(SKEY[3])]^b1(SKEY[2])]^b1(SKEY[1])]^b1(SKEY[0])] ^ \ |
|
93
|
|
|
|
|
|
|
MDStab[2][p8(21)[p8(22)[p8(23)[p8(24)[_b(x,R+2)]^b2(SKEY[3])]^b2(SKEY[2])]^b2(SKEY[1])]^b2(SKEY[0])] ^ \ |
|
94
|
|
|
|
|
|
|
MDStab[3][p8(31)[p8(32)[p8(33)[p8(34)[_b(x,R+3)]^b3(SKEY[3])]^b3(SKEY[2])]^b3(SKEY[1])]^b3(SKEY[0])] ) |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
#define GetSboxKey DWORD SKEY[4]; /* local copy */ \ |
|
97
|
|
|
|
|
|
|
memcpy(SKEY,key->sboxKeys,sizeof(SKEY)); |
|
98
|
|
|
|
|
|
|
/*----------------------------------------------------------------*/ |
|
99
|
|
|
|
|
|
|
#elif defined(MIN_KEY) |
|
100
|
|
|
|
|
|
|
#define MOD_STRING "(Minimal keying)" |
|
101
|
|
|
|
|
|
|
#define Fe32_(x,R)(MDStab[0][p8(01)[_sBox8_(0)[_b(x,R )]] ^ b0(SKEY0)] ^ \ |
|
102
|
|
|
|
|
|
|
MDStab[1][p8(11)[_sBox8_(1)[_b(x,R+1)]] ^ b1(SKEY0)] ^ \ |
|
103
|
|
|
|
|
|
|
MDStab[2][p8(21)[_sBox8_(2)[_b(x,R+2)]] ^ b2(SKEY0)] ^ \ |
|
104
|
|
|
|
|
|
|
MDStab[3][p8(31)[_sBox8_(3)[_b(x,R+3)]] ^ b3(SKEY0)]) |
|
105
|
|
|
|
|
|
|
#define sbSet(N,i,J,v) { _sBox8_(N)[i+J] = v; } |
|
106
|
|
|
|
|
|
|
#define GetSboxKey DWORD SKEY0 = key->sboxKeys[0] /* local copy */ |
|
107
|
|
|
|
|
|
|
/*----------------------------------------------------------------*/ |
|
108
|
|
|
|
|
|
|
#elif defined(PART_KEY) |
|
109
|
|
|
|
|
|
|
#define MOD_STRING "(Partial keying)" |
|
110
|
|
|
|
|
|
|
#define Fe32_(x,R)(MDStab[0][_sBox8_(0)[_b(x,R )]] ^ \ |
|
111
|
|
|
|
|
|
|
MDStab[1][_sBox8_(1)[_b(x,R+1)]] ^ \ |
|
112
|
|
|
|
|
|
|
MDStab[2][_sBox8_(2)[_b(x,R+2)]] ^ \ |
|
113
|
|
|
|
|
|
|
MDStab[3][_sBox8_(3)[_b(x,R+3)]]) |
|
114
|
|
|
|
|
|
|
#define sbSet(N,i,J,v) { _sBox8_(N)[i+J] = v; } |
|
115
|
|
|
|
|
|
|
#define GetSboxKey |
|
116
|
|
|
|
|
|
|
/*----------------------------------------------------------------*/ |
|
117
|
|
|
|
|
|
|
#else /* default is FULL_KEY */ |
|
118
|
|
|
|
|
|
|
#ifndef FULL_KEY |
|
119
|
|
|
|
|
|
|
#define FULL_KEY 1 |
|
120
|
|
|
|
|
|
|
#endif |
|
121
|
|
|
|
|
|
|
#if BIG_TAB |
|
122
|
|
|
|
|
|
|
#define TAB_STR " (Big table)" |
|
123
|
|
|
|
|
|
|
#else |
|
124
|
|
|
|
|
|
|
#define TAB_STR |
|
125
|
|
|
|
|
|
|
#endif |
|
126
|
|
|
|
|
|
|
#ifdef COMPILE_KEY |
|
127
|
|
|
|
|
|
|
#define MOD_STRING "(Compiled subkeys)" TAB_STR |
|
128
|
|
|
|
|
|
|
#else |
|
129
|
|
|
|
|
|
|
#define MOD_STRING "(Full keying)" TAB_STR |
|
130
|
|
|
|
|
|
|
#endif |
|
131
|
|
|
|
|
|
|
/* Fe32_ does a full S-box + MDS lookup. Need to #define _sBox_ before use. |
|
132
|
|
|
|
|
|
|
Note that we "interleave" 0,1, and 2,3 to avoid cache bank collisions |
|
133
|
|
|
|
|
|
|
in optimized assembly language. |
|
134
|
|
|
|
|
|
|
*/ |
|
135
|
|
|
|
|
|
|
#define Fe32_(x,R) (_sBox_[0][2*_b(x,R )] ^ _sBox_[0][2*_b(x,R+1)+1] ^ \ |
|
136
|
|
|
|
|
|
|
_sBox_[2][2*_b(x,R+2)] ^ _sBox_[2][2*_b(x,R+3)+1]) |
|
137
|
|
|
|
|
|
|
/* set a single S-box value, given the input byte */ |
|
138
|
|
|
|
|
|
|
//#define sbSet(N,i,J,v) { _sBox_[N&2][2*i+(N&1)+2*J]=MDStab[N][v]; } |
|
139
|
|
|
|
|
|
|
#define sbSet(N,i,J,v) { *((DWORD *)_sBox_ + (N&2)*256 + 2*i + (N&1) + 2*J) = MDStab[N][v]; } |
|
140
|
|
|
|
|
|
|
#define GetSboxKey |
|
141
|
|
|
|
|
|
|
#endif |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
/* macro(s) for debugging help */ |
|
144
|
|
|
|
|
|
|
#define CHECK_TABLE 0 /* nonzero --> compare against "slow" table */ |
|
145
|
|
|
|
|
|
|
#define VALIDATE_PARMS 0 /* disable for full speed */ |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
/* end of debug macros */ |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
#ifdef GetCodeSize |
|
150
|
|
|
|
|
|
|
static extern DWORD Here(DWORD x); /* return caller's address! */ |
|
151
|
|
|
|
|
|
|
static DWORD TwofishCodeStart(void) { return Here(0); } |
|
152
|
|
|
|
|
|
|
#endif |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
/* |
|
155
|
|
|
|
|
|
|
+***************************************************************************** |
|
156
|
|
|
|
|
|
|
* |
|
157
|
|
|
|
|
|
|
* Function Name: TableOp |
|
158
|
|
|
|
|
|
|
* |
|
159
|
|
|
|
|
|
|
* Function: Handle table use checking |
|
160
|
|
|
|
|
|
|
* |
|
161
|
|
|
|
|
|
|
* Arguments: op = what to do (see TAB_* defns in AES.H) |
|
162
|
|
|
|
|
|
|
* |
|
163
|
|
|
|
|
|
|
* Return: TRUE --> done (for TAB_QUERY) |
|
164
|
|
|
|
|
|
|
* |
|
165
|
|
|
|
|
|
|
* Notes: This routine is for use in generating the tables KAT file. |
|
166
|
|
|
|
|
|
|
* For this optimized version, we don't actually track table usage, |
|
167
|
|
|
|
|
|
|
* since it would make the macros incredibly ugly. Instead we just |
|
168
|
|
|
|
|
|
|
* run for a fixed number of queries and then say we're done. |
|
169
|
|
|
|
|
|
|
* |
|
170
|
|
|
|
|
|
|
-****************************************************************************/ |
|
171
|
0
|
|
|
|
|
|
static int TableOp(int op) |
|
172
|
|
|
|
|
|
|
{ |
|
173
|
|
|
|
|
|
|
static int queryCnt=0; |
|
174
|
|
|
|
|
|
|
|
|
175
|
0
|
|
|
|
|
|
switch (op) |
|
176
|
|
|
|
|
|
|
{ |
|
177
|
|
|
|
|
|
|
case TAB_DISABLE: |
|
178
|
0
|
|
|
|
|
|
break; |
|
179
|
|
|
|
|
|
|
case TAB_ENABLE: |
|
180
|
0
|
|
|
|
|
|
break; |
|
181
|
|
|
|
|
|
|
case TAB_RESET: |
|
182
|
0
|
|
|
|
|
|
queryCnt=0; |
|
183
|
0
|
|
|
|
|
|
break; |
|
184
|
|
|
|
|
|
|
case TAB_QUERY: |
|
185
|
0
|
|
|
|
|
|
queryCnt++; |
|
186
|
0
|
0
|
|
|
|
|
if (queryCnt < TAB_MIN_QUERY) |
|
187
|
0
|
|
|
|
|
|
return FALSE; |
|
188
|
|
|
|
|
|
|
} |
|
189
|
0
|
|
|
|
|
|
return TRUE; |
|
190
|
|
|
|
|
|
|
} |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
#if CHECK_TABLE |
|
194
|
|
|
|
|
|
|
/* |
|
195
|
|
|
|
|
|
|
+***************************************************************************** |
|
196
|
|
|
|
|
|
|
* |
|
197
|
|
|
|
|
|
|
* Function Name: f32 |
|
198
|
|
|
|
|
|
|
* |
|
199
|
|
|
|
|
|
|
* Function: Run four bytes through keyed S-boxes and apply MDS matrix |
|
200
|
|
|
|
|
|
|
* |
|
201
|
|
|
|
|
|
|
* Arguments: x = input to f function |
|
202
|
|
|
|
|
|
|
* k32 = pointer to key dwords |
|
203
|
|
|
|
|
|
|
* keyLen = total key length (k32 --> keyLey/2 bits) |
|
204
|
|
|
|
|
|
|
* |
|
205
|
|
|
|
|
|
|
* Return: The output of the keyed permutation applied to x. |
|
206
|
|
|
|
|
|
|
* |
|
207
|
|
|
|
|
|
|
* Notes: |
|
208
|
|
|
|
|
|
|
* This function is a keyed 32-bit permutation. It is the major building |
|
209
|
|
|
|
|
|
|
* block for the Twofish round function, including the four keyed 8x8 |
|
210
|
|
|
|
|
|
|
* permutations and the 4x4 MDS matrix multiply. This function is used |
|
211
|
|
|
|
|
|
|
* both for generating round subkeys and within the round function on the |
|
212
|
|
|
|
|
|
|
* block being encrypted. |
|
213
|
|
|
|
|
|
|
* |
|
214
|
|
|
|
|
|
|
* This version is fairly slow and pedagogical, although a smartcard would |
|
215
|
|
|
|
|
|
|
* probably perform the operation exactly this way in firmware. For |
|
216
|
|
|
|
|
|
|
* ultimate performance, the entire operation can be completed with four |
|
217
|
|
|
|
|
|
|
* lookups into four 256x32-bit tables, with three dword xors. |
|
218
|
|
|
|
|
|
|
* |
|
219
|
|
|
|
|
|
|
* The MDS matrix is defined in TABLE.H. To multiply by Mij, just use the |
|
220
|
|
|
|
|
|
|
* macro Mij(x). |
|
221
|
|
|
|
|
|
|
* |
|
222
|
|
|
|
|
|
|
-****************************************************************************/ |
|
223
|
|
|
|
|
|
|
static DWORD f32(DWORD x,CONST DWORD *k32,int keyLen) |
|
224
|
|
|
|
|
|
|
{ |
|
225
|
|
|
|
|
|
|
BYTE b[4]; |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
/* Run each byte thru 8x8 S-boxes, xoring with key byte at each stage. */ |
|
228
|
|
|
|
|
|
|
/* Note that each byte goes through a different combination of S-boxes.*/ |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
*((DWORD *)b) = Bswap(x); /* make b[0] = LSB, b[3] = MSB */ |
|
231
|
|
|
|
|
|
|
switch (((keyLen + 63)/64) & 3) |
|
232
|
|
|
|
|
|
|
{ |
|
233
|
|
|
|
|
|
|
case 0: /* 256 bits of key */ |
|
234
|
|
|
|
|
|
|
b[0] = p8(04)[b[0]] ^ b0(k32[3]); |
|
235
|
|
|
|
|
|
|
b[1] = p8(14)[b[1]] ^ b1(k32[3]); |
|
236
|
|
|
|
|
|
|
b[2] = p8(24)[b[2]] ^ b2(k32[3]); |
|
237
|
|
|
|
|
|
|
b[3] = p8(34)[b[3]] ^ b3(k32[3]); |
|
238
|
|
|
|
|
|
|
/* fall thru, having pre-processed b[0]..b[3] with k32[3] */ |
|
239
|
|
|
|
|
|
|
case 3: /* 192 bits of key */ |
|
240
|
|
|
|
|
|
|
b[0] = p8(03)[b[0]] ^ b0(k32[2]); |
|
241
|
|
|
|
|
|
|
b[1] = p8(13)[b[1]] ^ b1(k32[2]); |
|
242
|
|
|
|
|
|
|
b[2] = p8(23)[b[2]] ^ b2(k32[2]); |
|
243
|
|
|
|
|
|
|
b[3] = p8(33)[b[3]] ^ b3(k32[2]); |
|
244
|
|
|
|
|
|
|
/* fall thru, having pre-processed b[0]..b[3] with k32[2] */ |
|
245
|
|
|
|
|
|
|
case 2: /* 128 bits of key */ |
|
246
|
|
|
|
|
|
|
b[0] = p8(00)[p8(01)[p8(02)[b[0]] ^ b0(k32[1])] ^ b0(k32[0])]; |
|
247
|
|
|
|
|
|
|
b[1] = p8(10)[p8(11)[p8(12)[b[1]] ^ b1(k32[1])] ^ b1(k32[0])]; |
|
248
|
|
|
|
|
|
|
b[2] = p8(20)[p8(21)[p8(22)[b[2]] ^ b2(k32[1])] ^ b2(k32[0])]; |
|
249
|
|
|
|
|
|
|
b[3] = p8(30)[p8(31)[p8(32)[b[3]] ^ b3(k32[1])] ^ b3(k32[0])]; |
|
250
|
|
|
|
|
|
|
} |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
/* Now perform the MDS matrix multiply inline. */ |
|
253
|
|
|
|
|
|
|
return ((M00(b[0]) ^ M01(b[1]) ^ M02(b[2]) ^ M03(b[3])) ) ^ |
|
254
|
|
|
|
|
|
|
((M10(b[0]) ^ M11(b[1]) ^ M12(b[2]) ^ M13(b[3])) << 8) ^ |
|
255
|
|
|
|
|
|
|
((M20(b[0]) ^ M21(b[1]) ^ M22(b[2]) ^ M23(b[3])) << 16) ^ |
|
256
|
|
|
|
|
|
|
((M30(b[0]) ^ M31(b[1]) ^ M32(b[2]) ^ M33(b[3])) << 24) ; |
|
257
|
|
|
|
|
|
|
} |
|
258
|
|
|
|
|
|
|
#endif /* CHECK_TABLE */ |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
/* |
|
262
|
|
|
|
|
|
|
+***************************************************************************** |
|
263
|
|
|
|
|
|
|
* |
|
264
|
|
|
|
|
|
|
* Function Name: RS_MDS_encode |
|
265
|
|
|
|
|
|
|
* |
|
266
|
|
|
|
|
|
|
* Function: Use (12,8) Reed-Solomon code over GF(256) to produce |
|
267
|
|
|
|
|
|
|
* a key S-box dword from two key material dwords. |
|
268
|
|
|
|
|
|
|
* |
|
269
|
|
|
|
|
|
|
* Arguments: k0 = 1st dword |
|
270
|
|
|
|
|
|
|
* k1 = 2nd dword |
|
271
|
|
|
|
|
|
|
* |
|
272
|
|
|
|
|
|
|
* Return: Remainder polynomial generated using RS code |
|
273
|
|
|
|
|
|
|
* |
|
274
|
|
|
|
|
|
|
* Notes: |
|
275
|
|
|
|
|
|
|
* Since this computation is done only once per reKey per 64 bits of key, |
|
276
|
|
|
|
|
|
|
* the performance impact of this routine is imperceptible. The RS code |
|
277
|
|
|
|
|
|
|
* chosen has "simple" coefficients to allow smartcard/hardware implementation |
|
278
|
|
|
|
|
|
|
* without lookup tables. |
|
279
|
|
|
|
|
|
|
* |
|
280
|
|
|
|
|
|
|
-****************************************************************************/ |
|
281
|
125
|
|
|
|
|
|
static DWORD RS_MDS_Encode(DWORD k0,DWORD k1) |
|
282
|
|
|
|
|
|
|
{ |
|
283
|
|
|
|
|
|
|
int i,j; |
|
284
|
|
|
|
|
|
|
DWORD r; |
|
285
|
|
|
|
|
|
|
|
|
286
|
375
|
100
|
|
|
|
|
for (i=r=0;i<2;i++) |
|
287
|
|
|
|
|
|
|
{ |
|
288
|
250
|
100
|
|
|
|
|
r ^= (i) ? k0 : k1; /* merge in 32 more key bits */ |
|
289
|
1250
|
100
|
|
|
|
|
for (j=0;j<4;j++) /* shift one byte at a time */ |
|
290
|
1000
|
100
|
|
|
|
|
RS_rem(r); |
|
|
|
100
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
} |
|
292
|
125
|
|
|
|
|
|
return r; |
|
293
|
|
|
|
|
|
|
} |
|
294
|
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
/* |
|
297
|
|
|
|
|
|
|
+***************************************************************************** |
|
298
|
|
|
|
|
|
|
* |
|
299
|
|
|
|
|
|
|
* Function Name: BuildMDS |
|
300
|
|
|
|
|
|
|
* |
|
301
|
|
|
|
|
|
|
* Function: Initialize the MDStab array |
|
302
|
|
|
|
|
|
|
* |
|
303
|
|
|
|
|
|
|
* Arguments: None. |
|
304
|
|
|
|
|
|
|
* |
|
305
|
|
|
|
|
|
|
* Return: None. |
|
306
|
|
|
|
|
|
|
* |
|
307
|
|
|
|
|
|
|
* Notes: |
|
308
|
|
|
|
|
|
|
* Here we precompute all the fixed MDS table. This only needs to be done |
|
309
|
|
|
|
|
|
|
* one time at initialization, after which the table is "CONST". |
|
310
|
|
|
|
|
|
|
* |
|
311
|
|
|
|
|
|
|
-****************************************************************************/ |
|
312
|
1
|
|
|
|
|
|
static void BuildMDS(void) |
|
313
|
|
|
|
|
|
|
{ |
|
314
|
|
|
|
|
|
|
int i; |
|
315
|
|
|
|
|
|
|
DWORD d; |
|
316
|
|
|
|
|
|
|
BYTE m1[2],mX[2],mY[4]; |
|
317
|
|
|
|
|
|
|
|
|
318
|
257
|
100
|
|
|
|
|
for (i=0;i<256;i++) |
|
319
|
|
|
|
|
|
|
{ |
|
320
|
256
|
|
|
|
|
|
m1[0]=P8x8[0][i]; /* compute all the matrix elements */ |
|
321
|
256
|
100
|
|
|
|
|
mX[0]=(BYTE) Mul_X(m1[0]); |
|
|
|
100
|
|
|
|
|
|
|
322
|
256
|
100
|
|
|
|
|
mY[0]=(BYTE) Mul_Y(m1[0]); |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
|
|
324
|
256
|
|
|
|
|
|
m1[1]=P8x8[1][i]; |
|
325
|
256
|
100
|
|
|
|
|
mX[1]=(BYTE) Mul_X(m1[1]); |
|
|
|
100
|
|
|
|
|
|
|
326
|
256
|
100
|
|
|
|
|
mY[1]=(BYTE) Mul_Y(m1[1]); |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
#undef Mul_1 /* change what the pre-processor does with Mij */ |
|
329
|
|
|
|
|
|
|
#undef Mul_X |
|
330
|
|
|
|
|
|
|
#undef Mul_Y |
|
331
|
|
|
|
|
|
|
#define Mul_1 m1 /* It will now access m01[], m5B[], and mEF[] */ |
|
332
|
|
|
|
|
|
|
#define Mul_X mX |
|
333
|
|
|
|
|
|
|
#define Mul_Y mY |
|
334
|
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
#define SetMDS(N) \ |
|
336
|
|
|
|
|
|
|
b0(d) = M0##N[P_##N##0]; \ |
|
337
|
|
|
|
|
|
|
b1(d) = M1##N[P_##N##0]; \ |
|
338
|
|
|
|
|
|
|
b2(d) = M2##N[P_##N##0]; \ |
|
339
|
|
|
|
|
|
|
b3(d) = M3##N[P_##N##0]; \ |
|
340
|
|
|
|
|
|
|
MDStab[N][i] = d; |
|
341
|
|
|
|
|
|
|
|
|
342
|
256
|
|
|
|
|
|
SetMDS(0); /* fill in the matrix with elements computed above */ |
|
343
|
256
|
|
|
|
|
|
SetMDS(1); |
|
344
|
256
|
|
|
|
|
|
SetMDS(2); |
|
345
|
256
|
|
|
|
|
|
SetMDS(3); |
|
346
|
|
|
|
|
|
|
} |
|
347
|
|
|
|
|
|
|
#undef Mul_1 |
|
348
|
|
|
|
|
|
|
#undef Mul_X |
|
349
|
|
|
|
|
|
|
#undef Mul_Y |
|
350
|
|
|
|
|
|
|
#define Mul_1 Mx_1 /* re-enable true multiply */ |
|
351
|
|
|
|
|
|
|
#define Mul_X Mx_X |
|
352
|
|
|
|
|
|
|
#define Mul_Y Mx_Y |
|
353
|
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
#if BIG_TAB |
|
355
|
|
|
|
|
|
|
{ |
|
356
|
|
|
|
|
|
|
int j,k; |
|
357
|
|
|
|
|
|
|
BYTE *q0,*q1; |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
for (i=0;i<4;i++) |
|
360
|
|
|
|
|
|
|
{ |
|
361
|
|
|
|
|
|
|
switch (i) |
|
362
|
|
|
|
|
|
|
{ |
|
363
|
|
|
|
|
|
|
case 0: q0=p8(01); q1=p8(02); break; |
|
364
|
|
|
|
|
|
|
case 1: q0=p8(11); q1=p8(12); break; |
|
365
|
|
|
|
|
|
|
case 2: q0=p8(21); q1=p8(22); break; |
|
366
|
|
|
|
|
|
|
case 3: q0=p8(31); q1=p8(32); break; |
|
367
|
|
|
|
|
|
|
} |
|
368
|
|
|
|
|
|
|
for (j=0;j<256;j++) |
|
369
|
|
|
|
|
|
|
for (k=0;k<256;k++) |
|
370
|
|
|
|
|
|
|
bigTab[i][j][k]=q0[q1[k]^j]; |
|
371
|
|
|
|
|
|
|
} |
|
372
|
|
|
|
|
|
|
} |
|
373
|
|
|
|
|
|
|
#endif |
|
374
|
|
|
|
|
|
|
|
|
375
|
1
|
|
|
|
|
|
needToBuildMDS=0; /* NEVER modify the table again! */ |
|
376
|
1
|
|
|
|
|
|
} |
|
377
|
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
/* |
|
379
|
|
|
|
|
|
|
+***************************************************************************** |
|
380
|
|
|
|
|
|
|
* |
|
381
|
|
|
|
|
|
|
* Function Name: ReverseRoundSubkeys |
|
382
|
|
|
|
|
|
|
* |
|
383
|
|
|
|
|
|
|
* Function: Reverse order of round subkeys to switch between encrypt/decrypt |
|
384
|
|
|
|
|
|
|
* |
|
385
|
|
|
|
|
|
|
* Arguments: key = ptr to keyInstance to be reversed |
|
386
|
|
|
|
|
|
|
* newDir = new direction value |
|
387
|
|
|
|
|
|
|
* |
|
388
|
|
|
|
|
|
|
* Return: None. |
|
389
|
|
|
|
|
|
|
* |
|
390
|
|
|
|
|
|
|
* Notes: |
|
391
|
|
|
|
|
|
|
* This optimization allows both blockEncrypt and blockDecrypt to use the same |
|
392
|
|
|
|
|
|
|
* "fallthru" switch statement based on the number of rounds. |
|
393
|
|
|
|
|
|
|
* Note that key->numRounds must be even and >= 2 here. |
|
394
|
|
|
|
|
|
|
* |
|
395
|
|
|
|
|
|
|
-****************************************************************************/ |
|
396
|
81
|
|
|
|
|
|
static void ReverseRoundSubkeys(keyInstance *key,BYTE newDir) |
|
397
|
|
|
|
|
|
|
{ |
|
398
|
|
|
|
|
|
|
DWORD t0,t1; |
|
399
|
81
|
|
|
|
|
|
register DWORD *r0=key->subKeys+ROUND_SUBKEYS; |
|
400
|
81
|
|
|
|
|
|
register DWORD *r1=r0 + 2*key->numRounds - 2; |
|
401
|
|
|
|
|
|
|
|
|
402
|
729
|
100
|
|
|
|
|
for (;r0 < r1;r0+=2,r1-=2) |
|
403
|
|
|
|
|
|
|
{ |
|
404
|
648
|
|
|
|
|
|
t0=r0[0]; /* swap the order */ |
|
405
|
648
|
|
|
|
|
|
t1=r0[1]; |
|
406
|
648
|
|
|
|
|
|
r0[0]=r1[0]; /* but keep relative order within pairs */ |
|
407
|
648
|
|
|
|
|
|
r0[1]=r1[1]; |
|
408
|
648
|
|
|
|
|
|
r1[0]=t0; |
|
409
|
648
|
|
|
|
|
|
r1[1]=t1; |
|
410
|
|
|
|
|
|
|
} |
|
411
|
|
|
|
|
|
|
|
|
412
|
81
|
|
|
|
|
|
key->direction=newDir; |
|
413
|
81
|
|
|
|
|
|
} |
|
414
|
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
/* |
|
416
|
|
|
|
|
|
|
+***************************************************************************** |
|
417
|
|
|
|
|
|
|
* |
|
418
|
|
|
|
|
|
|
* Function Name: Xor256 |
|
419
|
|
|
|
|
|
|
* |
|
420
|
|
|
|
|
|
|
* Function: Copy an 8-bit permutation (256 bytes), xoring with a byte |
|
421
|
|
|
|
|
|
|
* |
|
422
|
|
|
|
|
|
|
* Arguments: dst = where to put result |
|
423
|
|
|
|
|
|
|
* src = where to get data (can be same asa dst) |
|
424
|
|
|
|
|
|
|
* b = byte to xor |
|
425
|
|
|
|
|
|
|
* |
|
426
|
|
|
|
|
|
|
* Return: None |
|
427
|
|
|
|
|
|
|
* |
|
428
|
|
|
|
|
|
|
* Notes: |
|
429
|
|
|
|
|
|
|
* BorlandC's optimization is terrible! When we put the code inline, |
|
430
|
|
|
|
|
|
|
* it generates fairly good code in the *following* segment (not in the Xor256 |
|
431
|
|
|
|
|
|
|
* code itself). If the call is made, the code following the call is awful! |
|
432
|
|
|
|
|
|
|
* The penalty is nearly 50%! So we take the code size hit for inlining for |
|
433
|
|
|
|
|
|
|
* Borland, while Microsoft happily works with a call. |
|
434
|
|
|
|
|
|
|
* |
|
435
|
|
|
|
|
|
|
-****************************************************************************/ |
|
436
|
|
|
|
|
|
|
#if defined(__BORLANDC__) /* do it inline */ |
|
437
|
|
|
|
|
|
|
#define Xor32(dst,src,i) { ((DWORD *)dst)[i] = ((DWORD *)src)[i] ^ tmpX; } |
|
438
|
|
|
|
|
|
|
#define Xor256(dst,src,b) \ |
|
439
|
|
|
|
|
|
|
{ \ |
|
440
|
|
|
|
|
|
|
register DWORD tmpX=0x01010101u * b;\ |
|
441
|
|
|
|
|
|
|
for (i=0;i<64;i+=4) \ |
|
442
|
|
|
|
|
|
|
{ Xor32(dst,src,i ); Xor32(dst,src,i+1); Xor32(dst,src,i+2); Xor32(dst,src,i+3); } \ |
|
443
|
|
|
|
|
|
|
} |
|
444
|
|
|
|
|
|
|
#else /* do it as a function call */ |
|
445
|
224
|
|
|
|
|
|
static void Xor256(void *dst,void *src,BYTE b) |
|
446
|
|
|
|
|
|
|
{ |
|
447
|
224
|
|
|
|
|
|
register DWORD x=b*0x01010101u; /* replicate byte to all four bytes */ |
|
448
|
224
|
|
|
|
|
|
register DWORD *d=(DWORD *)dst; |
|
449
|
224
|
|
|
|
|
|
register DWORD *s=(DWORD *)src; |
|
450
|
|
|
|
|
|
|
#define X_8(N) { d[N]=s[N] ^ x; d[N+1]=s[N+1] ^ x; } |
|
451
|
|
|
|
|
|
|
#define X_32(N) { X_8(N); X_8(N+2); X_8(N+4); X_8(N+6); } |
|
452
|
224
|
|
|
|
|
|
X_32(0 ); X_32( 8); X_32(16); X_32(24); /* all inline */ |
|
453
|
224
|
|
|
|
|
|
d+=32; /* keep offsets small! */ |
|
454
|
224
|
|
|
|
|
|
s+=32; |
|
455
|
224
|
|
|
|
|
|
X_32(0 ); X_32( 8); X_32(16); X_32(24); /* all inline */ |
|
456
|
224
|
|
|
|
|
|
} |
|
457
|
|
|
|
|
|
|
#endif |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
/* |
|
460
|
|
|
|
|
|
|
+***************************************************************************** |
|
461
|
|
|
|
|
|
|
* |
|
462
|
|
|
|
|
|
|
* Function Name: reKey |
|
463
|
|
|
|
|
|
|
* |
|
464
|
|
|
|
|
|
|
* Function: Initialize the Twofish key schedule from key32 |
|
465
|
|
|
|
|
|
|
* |
|
466
|
|
|
|
|
|
|
* Arguments: key = ptr to keyInstance to be initialized |
|
467
|
|
|
|
|
|
|
* |
|
468
|
|
|
|
|
|
|
* Return: TRUE on success |
|
469
|
|
|
|
|
|
|
* |
|
470
|
|
|
|
|
|
|
* Notes: |
|
471
|
|
|
|
|
|
|
* Here we precompute all the round subkeys, although that is not actually |
|
472
|
|
|
|
|
|
|
* required. For example, on a smartcard, the round subkeys can |
|
473
|
|
|
|
|
|
|
* be generated on-the-fly using f32() |
|
474
|
|
|
|
|
|
|
* |
|
475
|
|
|
|
|
|
|
-****************************************************************************/ |
|
476
|
41
|
|
|
|
|
|
static int reKey(keyInstance *key) |
|
477
|
|
|
|
|
|
|
{ |
|
478
|
|
|
|
|
|
|
int i,j,k64Cnt,keyLen; |
|
479
|
|
|
|
|
|
|
int subkeyCnt; |
|
480
|
41
|
|
|
|
|
|
DWORD A=0,B=0,q; |
|
481
|
|
|
|
|
|
|
DWORD sKey[MAX_KEY_BITS/64],k32e[MAX_KEY_BITS/64],k32o[MAX_KEY_BITS/64]; |
|
482
|
|
|
|
|
|
|
BYTE L0[256],L1[256]; /* small local 8-bit permutations */ |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
#if VALIDATE_PARMS |
|
485
|
|
|
|
|
|
|
#if ALIGN32 |
|
486
|
|
|
|
|
|
|
if (((int)key) & 3) |
|
487
|
|
|
|
|
|
|
return BAD_ALIGN32; |
|
488
|
|
|
|
|
|
|
if ((key->keyLen % 64) || (key->keyLen < MIN_KEY_BITS)) |
|
489
|
|
|
|
|
|
|
return BAD_KEY_INSTANCE; |
|
490
|
|
|
|
|
|
|
#endif |
|
491
|
|
|
|
|
|
|
#endif |
|
492
|
|
|
|
|
|
|
|
|
493
|
41
|
100
|
|
|
|
|
if (needToBuildMDS) /* do this one time only */ |
|
494
|
1
|
|
|
|
|
|
BuildMDS(); |
|
495
|
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
#define F32(res,x,k32) \ |
|
497
|
|
|
|
|
|
|
{ \ |
|
498
|
|
|
|
|
|
|
DWORD t=x; \ |
|
499
|
|
|
|
|
|
|
switch (k64Cnt & 3) \ |
|
500
|
|
|
|
|
|
|
{ \ |
|
501
|
|
|
|
|
|
|
case 0: /* same as 4 */ \ |
|
502
|
|
|
|
|
|
|
b0(t) = p8(04)[b0(t)] ^ b0(k32[3]); \ |
|
503
|
|
|
|
|
|
|
b1(t) = p8(14)[b1(t)] ^ b1(k32[3]); \ |
|
504
|
|
|
|
|
|
|
b2(t) = p8(24)[b2(t)] ^ b2(k32[3]); \ |
|
505
|
|
|
|
|
|
|
b3(t) = p8(34)[b3(t)] ^ b3(k32[3]); \ |
|
506
|
|
|
|
|
|
|
/* fall thru, having pre-processed t */ \ |
|
507
|
|
|
|
|
|
|
case 3: b0(t) = p8(03)[b0(t)] ^ b0(k32[2]); \ |
|
508
|
|
|
|
|
|
|
b1(t) = p8(13)[b1(t)] ^ b1(k32[2]); \ |
|
509
|
|
|
|
|
|
|
b2(t) = p8(23)[b2(t)] ^ b2(k32[2]); \ |
|
510
|
|
|
|
|
|
|
b3(t) = p8(33)[b3(t)] ^ b3(k32[2]); \ |
|
511
|
|
|
|
|
|
|
/* fall thru, having pre-processed t */ \ |
|
512
|
|
|
|
|
|
|
case 2: /* 128-bit keys (optimize for this case) */ \ |
|
513
|
|
|
|
|
|
|
res= MDStab[0][p8(01)[p8(02)[b0(t)] ^ b0(k32[1])] ^ b0(k32[0])] ^ \ |
|
514
|
|
|
|
|
|
|
MDStab[1][p8(11)[p8(12)[b1(t)] ^ b1(k32[1])] ^ b1(k32[0])] ^ \ |
|
515
|
|
|
|
|
|
|
MDStab[2][p8(21)[p8(22)[b2(t)] ^ b2(k32[1])] ^ b2(k32[0])] ^ \ |
|
516
|
|
|
|
|
|
|
MDStab[3][p8(31)[p8(32)[b3(t)] ^ b3(k32[1])] ^ b3(k32[0])] ; \ |
|
517
|
|
|
|
|
|
|
} \ |
|
518
|
|
|
|
|
|
|
} |
|
519
|
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
#if !CHECK_TABLE |
|
522
|
|
|
|
|
|
|
#if defined(USE_ASM) /* only do this if not using assember */ |
|
523
|
|
|
|
|
|
|
if (!(useAsm & 4)) |
|
524
|
|
|
|
|
|
|
#endif |
|
525
|
|
|
|
|
|
|
#endif |
|
526
|
|
|
|
|
|
|
{ |
|
527
|
41
|
|
|
|
|
|
subkeyCnt = ROUND_SUBKEYS + 2*key->numRounds; |
|
528
|
41
|
|
|
|
|
|
keyLen=key->keyLen; |
|
529
|
41
|
|
|
|
|
|
k64Cnt=(keyLen+63)/64; /* number of 64-bit key words */ |
|
530
|
166
|
100
|
|
|
|
|
for (i=0,j=k64Cnt-1;i
|
|
531
|
|
|
|
|
|
|
{ /* split into even/odd key dwords */ |
|
532
|
125
|
|
|
|
|
|
k32e[i]=key->key32[2*i ]; |
|
533
|
125
|
|
|
|
|
|
k32o[i]=key->key32[2*i+1]; |
|
534
|
|
|
|
|
|
|
/* compute S-box keys using (12,8) Reed-Solomon code over GF(256) */ |
|
535
|
125
|
|
|
|
|
|
sKey[j]=key->sboxKeys[j]=RS_MDS_Encode(k32e[i],k32o[i]); /* reverse order */ |
|
536
|
|
|
|
|
|
|
} |
|
537
|
|
|
|
|
|
|
} |
|
538
|
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
#ifdef USE_ASM |
|
540
|
|
|
|
|
|
|
if (useAsm & 4) |
|
541
|
|
|
|
|
|
|
{ |
|
542
|
|
|
|
|
|
|
#if defined(COMPILE_KEY) && defined(USE_ASM) |
|
543
|
|
|
|
|
|
|
key->keySig = VALID_SIG; /* show that we are initialized */ |
|
544
|
|
|
|
|
|
|
key->codeSize = sizeof(key->compiledCode); /* set size */ |
|
545
|
|
|
|
|
|
|
#endif |
|
546
|
|
|
|
|
|
|
reKey_86(key); |
|
547
|
|
|
|
|
|
|
} |
|
548
|
|
|
|
|
|
|
else |
|
549
|
|
|
|
|
|
|
#endif |
|
550
|
|
|
|
|
|
|
{ |
|
551
|
861
|
100
|
|
|
|
|
for (i=q=0;i
|
|
552
|
|
|
|
|
|
|
{ /* compute round subkeys for PHT */ |
|
553
|
820
|
|
|
|
|
|
F32(A,q ,k32e); /* A uses even key dwords */ |
|
554
|
820
|
|
|
|
|
|
F32(B,q+SK_BUMP,k32o); /* B uses odd key dwords */ |
|
555
|
820
|
|
|
|
|
|
B = ROL(B,8); |
|
556
|
820
|
|
|
|
|
|
key->subKeys[2*i ] = A+B; /* combine with a PHT */ |
|
557
|
820
|
|
|
|
|
|
B = A + 2*B; |
|
558
|
820
|
|
|
|
|
|
key->subKeys[2*i+1] = ROL(B,SK_ROTL); |
|
559
|
|
|
|
|
|
|
} |
|
560
|
|
|
|
|
|
|
#if !defined(ZERO_KEY) |
|
561
|
41
|
|
|
|
|
|
switch (keyLen) /* case out key length for speed in generating S-boxes */ |
|
562
|
|
|
|
|
|
|
{ |
|
563
|
|
|
|
|
|
|
case 128: |
|
564
|
|
|
|
|
|
|
#if defined(FULL_KEY) || defined(PART_KEY) |
|
565
|
|
|
|
|
|
|
#if BIG_TAB |
|
566
|
|
|
|
|
|
|
#define one128(N,J) sbSet(N,i,J,L0[i+J]) |
|
567
|
|
|
|
|
|
|
#define sb128(N) { \ |
|
568
|
|
|
|
|
|
|
BYTE *qq=bigTab[N][b##N(sKey[1])]; \ |
|
569
|
|
|
|
|
|
|
Xor256(L0,qq,b##N(sKey[0])); \ |
|
570
|
|
|
|
|
|
|
for (i=0;i<256;i+=2) { one128(N,0); one128(N,1); } } |
|
571
|
|
|
|
|
|
|
#else |
|
572
|
|
|
|
|
|
|
#define one128(N,J) sbSet(N,i,J,p8(N##1)[L0[i+J]]^k0) |
|
573
|
|
|
|
|
|
|
#define sb128(N) { \ |
|
574
|
|
|
|
|
|
|
Xor256(L0,p8(N##2),b##N(sKey[1])); \ |
|
575
|
|
|
|
|
|
|
{ register DWORD k0=b##N(sKey[0]); \ |
|
576
|
|
|
|
|
|
|
for (i=0;i<256;i+=2) { one128(N,0); one128(N,1); } } } |
|
577
|
|
|
|
|
|
|
#endif |
|
578
|
|
|
|
|
|
|
#elif defined(MIN_KEY) |
|
579
|
|
|
|
|
|
|
#define sb128(N) Xor256(_sBox8_(N),p8(N##2),b##N(sKey[1])) |
|
580
|
|
|
|
|
|
|
#endif |
|
581
|
6669
|
100
|
|
|
|
|
sb128(0); sb128(1); sb128(2); sb128(3); |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
582
|
13
|
|
|
|
|
|
break; |
|
583
|
|
|
|
|
|
|
case 192: |
|
584
|
|
|
|
|
|
|
#if defined(FULL_KEY) || defined(PART_KEY) |
|
585
|
|
|
|
|
|
|
#define one192(N,J) sbSet(N,i,J,p8(N##1)[p8(N##2)[L0[i+J]]^k1]^k0) |
|
586
|
|
|
|
|
|
|
#define sb192(N) { \ |
|
587
|
|
|
|
|
|
|
Xor256(L0,p8(N##3),b##N(sKey[2])); \ |
|
588
|
|
|
|
|
|
|
{ register DWORD k0=b##N(sKey[0]); \ |
|
589
|
|
|
|
|
|
|
register DWORD k1=b##N(sKey[1]); \ |
|
590
|
|
|
|
|
|
|
for (i=0;i<256;i+=2) { one192(N,0); one192(N,1); } } } |
|
591
|
|
|
|
|
|
|
#elif defined(MIN_KEY) |
|
592
|
|
|
|
|
|
|
#define one192(N,J) sbSet(N,i,J,p8(N##2)[L0[i+J]]^k1) |
|
593
|
|
|
|
|
|
|
#define sb192(N) { \ |
|
594
|
|
|
|
|
|
|
Xor256(L0,p8(N##3),b##N(sKey[2])); \ |
|
595
|
|
|
|
|
|
|
{ register DWORD k1=b##N(sKey[1]); \ |
|
596
|
|
|
|
|
|
|
for (i=0;i<256;i+=2) { one192(N,0); one192(N,1); } } } |
|
597
|
|
|
|
|
|
|
#endif |
|
598
|
6669
|
100
|
|
|
|
|
sb192(0); sb192(1); sb192(2); sb192(3); |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
599
|
13
|
|
|
|
|
|
break; |
|
600
|
|
|
|
|
|
|
case 256: |
|
601
|
|
|
|
|
|
|
#if defined(FULL_KEY) || defined(PART_KEY) |
|
602
|
|
|
|
|
|
|
#define one256(N,J) sbSet(N,i,J,p8(N##1)[p8(N##2)[L0[i+J]]^k1]^k0) |
|
603
|
|
|
|
|
|
|
#define sb256(N) { \ |
|
604
|
|
|
|
|
|
|
Xor256(L1,p8(N##4),b##N(sKey[3])); \ |
|
605
|
|
|
|
|
|
|
for (i=0;i<256;i+=2) {L0[i ]=p8(N##3)[L1[i]]; \ |
|
606
|
|
|
|
|
|
|
L0[i+1]=p8(N##3)[L1[i+1]]; } \ |
|
607
|
|
|
|
|
|
|
Xor256(L0,L0,b##N(sKey[2])); \ |
|
608
|
|
|
|
|
|
|
{ register DWORD k0=b##N(sKey[0]); \ |
|
609
|
|
|
|
|
|
|
register DWORD k1=b##N(sKey[1]); \ |
|
610
|
|
|
|
|
|
|
for (i=0;i<256;i+=2) { one256(N,0); one256(N,1); } } } |
|
611
|
|
|
|
|
|
|
#elif defined(MIN_KEY) |
|
612
|
|
|
|
|
|
|
#define one256(N,J) sbSet(N,i,J,p8(N##2)[L0[i+J]]^k1) |
|
613
|
|
|
|
|
|
|
#define sb256(N) { \ |
|
614
|
|
|
|
|
|
|
Xor256(L1,p8(N##4),b##N(sKey[3])); \ |
|
615
|
|
|
|
|
|
|
for (i=0;i<256;i+=2) {L0[i ]=p8(N##3)[L1[i]]; \ |
|
616
|
|
|
|
|
|
|
L0[i+1]=p8(N##3)[L1[i+1]]; } \ |
|
617
|
|
|
|
|
|
|
Xor256(L0,L0,b##N(sKey[2])); \ |
|
618
|
|
|
|
|
|
|
{ register DWORD k1=b##N(sKey[1]); \ |
|
619
|
|
|
|
|
|
|
for (i=0;i<256;i+=2) { one256(N,0); one256(N,1); } } } |
|
620
|
|
|
|
|
|
|
#endif |
|
621
|
15375
|
100
|
|
|
|
|
sb256(0); sb256(1); sb256(2); sb256(3); |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
622
|
15
|
|
|
|
|
|
break; |
|
623
|
|
|
|
|
|
|
} |
|
624
|
|
|
|
|
|
|
#endif |
|
625
|
|
|
|
|
|
|
} |
|
626
|
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
#if CHECK_TABLE /* sanity check vs. pedagogical code*/ |
|
628
|
|
|
|
|
|
|
{ |
|
629
|
|
|
|
|
|
|
GetSboxKey; |
|
630
|
|
|
|
|
|
|
for (i=0;i
|
|
631
|
|
|
|
|
|
|
{ |
|
632
|
|
|
|
|
|
|
A = f32(i*SK_STEP ,k32e,keyLen); /* A uses even key dwords */ |
|
633
|
|
|
|
|
|
|
B = f32(i*SK_STEP+SK_BUMP,k32o,keyLen); /* B uses odd key dwords */ |
|
634
|
|
|
|
|
|
|
B = ROL(B,8); |
|
635
|
|
|
|
|
|
|
assert(key->subKeys[2*i ] == A+ B); |
|
636
|
|
|
|
|
|
|
assert(key->subKeys[2*i+1] == ROL(A+2*B,SK_ROTL)); |
|
637
|
|
|
|
|
|
|
} |
|
638
|
|
|
|
|
|
|
#if !defined(ZERO_KEY) /* any S-boxes to check? */ |
|
639
|
|
|
|
|
|
|
for (i=q=0;i<256;i++,q+=0x01010101) |
|
640
|
|
|
|
|
|
|
assert(f32(q,key->sboxKeys,keyLen) == Fe32_(q,0)); |
|
641
|
|
|
|
|
|
|
#endif |
|
642
|
|
|
|
|
|
|
} |
|
643
|
|
|
|
|
|
|
#endif /* CHECK_TABLE */ |
|
644
|
|
|
|
|
|
|
|
|
645
|
41
|
50
|
|
|
|
|
if (key->direction == DIR_ENCRYPT) |
|
646
|
41
|
|
|
|
|
|
ReverseRoundSubkeys(key,DIR_ENCRYPT); /* reverse the round subkey order */ |
|
647
|
|
|
|
|
|
|
|
|
648
|
41
|
|
|
|
|
|
return TRUE; |
|
649
|
|
|
|
|
|
|
} |
|
650
|
|
|
|
|
|
|
/* |
|
651
|
|
|
|
|
|
|
+***************************************************************************** |
|
652
|
|
|
|
|
|
|
* |
|
653
|
|
|
|
|
|
|
* Function Name: makeKey |
|
654
|
|
|
|
|
|
|
* |
|
655
|
|
|
|
|
|
|
* Function: Initialize the Twofish key schedule |
|
656
|
|
|
|
|
|
|
* |
|
657
|
|
|
|
|
|
|
* Arguments: key = ptr to keyInstance to be initialized |
|
658
|
|
|
|
|
|
|
* direction = DIR_ENCRYPT or DIR_DECRYPT |
|
659
|
|
|
|
|
|
|
* keyLen = # bits of key text at *keyMaterial |
|
660
|
|
|
|
|
|
|
* keyMaterial = ptr to hex ASCII chars representing key bits |
|
661
|
|
|
|
|
|
|
* |
|
662
|
|
|
|
|
|
|
* Return: TRUE on success |
|
663
|
|
|
|
|
|
|
* else error code (e.g., BAD_KEY_DIR) |
|
664
|
|
|
|
|
|
|
* |
|
665
|
|
|
|
|
|
|
* Notes: This parses the key bits from keyMaterial. Zeroes out unused key bits |
|
666
|
|
|
|
|
|
|
* |
|
667
|
|
|
|
|
|
|
-****************************************************************************/ |
|
668
|
41
|
|
|
|
|
|
static int makeKey(keyInstance *key, BYTE direction, int keyLen,CONST char *keyMaterial) |
|
669
|
|
|
|
|
|
|
{ |
|
670
|
|
|
|
|
|
|
int i; |
|
671
|
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
#if VALIDATE_PARMS /* first, sanity check on parameters */ |
|
673
|
|
|
|
|
|
|
if (key == NULL) |
|
674
|
|
|
|
|
|
|
return BAD_KEY_INSTANCE;/* must have a keyInstance to initialize */ |
|
675
|
|
|
|
|
|
|
if ((direction != DIR_ENCRYPT) && (direction != DIR_DECRYPT)) |
|
676
|
|
|
|
|
|
|
return BAD_KEY_DIR; /* must have valid direction */ |
|
677
|
|
|
|
|
|
|
if ((keyLen > MAX_KEY_BITS) || (keyLen < 8) || (keyLen & 0x3F)) |
|
678
|
|
|
|
|
|
|
return BAD_KEY_MAT; /* length must be valid */ |
|
679
|
|
|
|
|
|
|
key->keySig = VALID_SIG; /* show that we are initialized */ |
|
680
|
|
|
|
|
|
|
#if ALIGN32 |
|
681
|
|
|
|
|
|
|
if ((((int)key) & 3) || (((int)key->key32) & 3)) |
|
682
|
|
|
|
|
|
|
return BAD_ALIGN32; |
|
683
|
|
|
|
|
|
|
#endif |
|
684
|
|
|
|
|
|
|
#endif |
|
685
|
|
|
|
|
|
|
|
|
686
|
41
|
|
|
|
|
|
key->direction = direction;/* set our cipher direction */ |
|
687
|
41
|
|
|
|
|
|
key->keyLen = (keyLen+63) & ~63; /* round up to multiple of 64 */ |
|
688
|
41
|
|
|
|
|
|
key->numRounds = numRounds[(keyLen-1)/64]; |
|
689
|
41
|
|
|
|
|
|
memset(key->key32,0,sizeof(key->key32)); /* zero unused bits */ |
|
690
|
|
|
|
|
|
|
|
|
691
|
41
|
50
|
|
|
|
|
if (keyMaterial == NULL) |
|
692
|
0
|
|
|
|
|
|
return TRUE; /* allow a "dummy" call */ |
|
693
|
|
|
|
|
|
|
|
|
694
|
291
|
100
|
|
|
|
|
for (i=0;i
|
|
695
|
500
|
|
|
|
|
|
key->key32[i] = (((unsigned char *)keyMaterial)[i*4+0] << 0) |
|
696
|
250
|
|
|
|
|
|
| (((unsigned char *)keyMaterial)[i*4+1] << 8) |
|
697
|
250
|
|
|
|
|
|
| (((unsigned char *)keyMaterial)[i*4+2] << 16) |
|
698
|
250
|
|
|
|
|
|
| (((unsigned char *)keyMaterial)[i*4+3] << 24); |
|
699
|
|
|
|
|
|
|
|
|
700
|
41
|
|
|
|
|
|
return reKey(key); /* generate round subkeys */ |
|
701
|
|
|
|
|
|
|
} |
|
702
|
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
/* |
|
705
|
|
|
|
|
|
|
+***************************************************************************** |
|
706
|
|
|
|
|
|
|
* |
|
707
|
|
|
|
|
|
|
* Function Name: cipherInit |
|
708
|
|
|
|
|
|
|
* |
|
709
|
|
|
|
|
|
|
* Function: Initialize the Twofish cipher in a given mode |
|
710
|
|
|
|
|
|
|
* |
|
711
|
|
|
|
|
|
|
* Arguments: cipher = ptr to cipherInstance to be initialized |
|
712
|
|
|
|
|
|
|
* mode = MODE_ECB, MODE_CBC, or MODE_CFB1 |
|
713
|
|
|
|
|
|
|
* IV = ptr to hex ASCII test representing IV bytes |
|
714
|
|
|
|
|
|
|
* |
|
715
|
|
|
|
|
|
|
* Return: TRUE on success |
|
716
|
|
|
|
|
|
|
* else error code (e.g., BAD_CIPHER_MODE) |
|
717
|
|
|
|
|
|
|
* |
|
718
|
|
|
|
|
|
|
-****************************************************************************/ |
|
719
|
41
|
|
|
|
|
|
static int cipherInit(cipherInstance *cipher, BYTE mode,CONST char *IV) |
|
720
|
|
|
|
|
|
|
{ |
|
721
|
|
|
|
|
|
|
int i; |
|
722
|
|
|
|
|
|
|
#if VALIDATE_PARMS /* first, sanity check on parameters */ |
|
723
|
|
|
|
|
|
|
if (cipher == NULL) |
|
724
|
|
|
|
|
|
|
return BAD_PARAMS; /* must have a cipherInstance to initialize */ |
|
725
|
|
|
|
|
|
|
if ((mode != MODE_ECB) && (mode != MODE_CBC) && (mode != MODE_CFB1)) |
|
726
|
|
|
|
|
|
|
return BAD_CIPHER_MODE; /* must have valid cipher mode */ |
|
727
|
|
|
|
|
|
|
cipher->cipherSig = VALID_SIG; |
|
728
|
|
|
|
|
|
|
#if ALIGN32 |
|
729
|
|
|
|
|
|
|
if ((((int)cipher) & 3) || (((int)cipher->IV) & 3) || (((int)cipher->iv32) & 3)) |
|
730
|
|
|
|
|
|
|
return BAD_ALIGN32; |
|
731
|
|
|
|
|
|
|
#endif |
|
732
|
|
|
|
|
|
|
#endif |
|
733
|
|
|
|
|
|
|
|
|
734
|
41
|
100
|
|
|
|
|
if ((mode != MODE_ECB) && (IV)) /* parse the IV */ |
|
|
|
50
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
{ |
|
736
|
0
|
|
|
|
|
|
memcpy (cipher->iv32, IV, BLOCK_SIZE/32); |
|
737
|
0
|
0
|
|
|
|
|
for (i=0;i
|
|
738
|
0
|
|
|
|
|
|
((DWORD *)cipher->IV)[i] = Bswap(cipher->iv32[i]); |
|
739
|
|
|
|
|
|
|
} |
|
740
|
|
|
|
|
|
|
|
|
741
|
41
|
|
|
|
|
|
cipher->mode = mode; |
|
742
|
|
|
|
|
|
|
|
|
743
|
41
|
|
|
|
|
|
return TRUE; |
|
744
|
|
|
|
|
|
|
} |
|
745
|
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
/* |
|
747
|
|
|
|
|
|
|
+***************************************************************************** |
|
748
|
|
|
|
|
|
|
* |
|
749
|
|
|
|
|
|
|
* Function Name: blockEncrypt |
|
750
|
|
|
|
|
|
|
* |
|
751
|
|
|
|
|
|
|
* Function: Encrypt block(s) of data using Twofish |
|
752
|
|
|
|
|
|
|
* |
|
753
|
|
|
|
|
|
|
* Arguments: cipher = ptr to already initialized cipherInstance |
|
754
|
|
|
|
|
|
|
* key = ptr to already initialized keyInstance |
|
755
|
|
|
|
|
|
|
* input = ptr to data blocks to be encrypted |
|
756
|
|
|
|
|
|
|
* inputLen = # bits to encrypt (multiple of blockSize) |
|
757
|
|
|
|
|
|
|
* outBuffer = ptr to where to put encrypted blocks |
|
758
|
|
|
|
|
|
|
* |
|
759
|
|
|
|
|
|
|
* Return: # bits ciphered (>= 0) |
|
760
|
|
|
|
|
|
|
* else error code (e.g., BAD_CIPHER_STATE, BAD_KEY_MATERIAL) |
|
761
|
|
|
|
|
|
|
* |
|
762
|
|
|
|
|
|
|
* Notes: The only supported block size for ECB/CBC modes is BLOCK_SIZE bits. |
|
763
|
|
|
|
|
|
|
* If inputLen is not a multiple of BLOCK_SIZE bits in those modes, |
|
764
|
|
|
|
|
|
|
* an error BAD_INPUT_LEN is returned. In CFB1 mode, all block |
|
765
|
|
|
|
|
|
|
* sizes can be supported. |
|
766
|
|
|
|
|
|
|
* |
|
767
|
|
|
|
|
|
|
-****************************************************************************/ |
|
768
|
41
|
|
|
|
|
|
static int blockEncrypt(cipherInstance *cipher, keyInstance *key,CONST BYTE *input, |
|
769
|
|
|
|
|
|
|
int inputLen, BYTE *outBuffer) |
|
770
|
|
|
|
|
|
|
{ |
|
771
|
|
|
|
|
|
|
int i,n; /* loop counters */ |
|
772
|
|
|
|
|
|
|
DWORD x[BLOCK_SIZE/32]; /* block being encrypted */ |
|
773
|
|
|
|
|
|
|
DWORD t0,t1; /* temp variables */ |
|
774
|
41
|
|
|
|
|
|
int rounds=key->numRounds; /* number of rounds */ |
|
775
|
|
|
|
|
|
|
BYTE bit,bit0,ctBit,carry; /* temps for CFB */ |
|
776
|
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
/* make local copies of things for faster access */ |
|
778
|
41
|
|
|
|
|
|
int mode = cipher->mode; |
|
779
|
|
|
|
|
|
|
DWORD sk[TOTAL_SUBKEYS]; |
|
780
|
|
|
|
|
|
|
DWORD IV[BLOCK_SIZE/32]; |
|
781
|
|
|
|
|
|
|
|
|
782
|
|
|
|
|
|
|
GetSboxKey; |
|
783
|
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
#if VALIDATE_PARMS |
|
785
|
|
|
|
|
|
|
if ((cipher == NULL) || (cipher->cipherSig != VALID_SIG)) |
|
786
|
|
|
|
|
|
|
return BAD_CIPHER_STATE; |
|
787
|
|
|
|
|
|
|
if ((key == NULL) || (key->keySig != VALID_SIG)) |
|
788
|
|
|
|
|
|
|
return BAD_KEY_INSTANCE; |
|
789
|
|
|
|
|
|
|
if ((rounds < 2) || (rounds > MAX_ROUNDS) || (rounds&1)) |
|
790
|
|
|
|
|
|
|
return BAD_KEY_INSTANCE; |
|
791
|
|
|
|
|
|
|
if ((mode != MODE_CFB1) && (inputLen % BLOCK_SIZE)) |
|
792
|
|
|
|
|
|
|
return BAD_INPUT_LEN; |
|
793
|
|
|
|
|
|
|
#if ALIGN32 |
|
794
|
|
|
|
|
|
|
if ( (((int)cipher) & 3) || (((int)key ) & 3) || |
|
795
|
|
|
|
|
|
|
(((int)input ) & 3) || (((int)outBuffer) & 3)) |
|
796
|
|
|
|
|
|
|
return BAD_ALIGN32; |
|
797
|
|
|
|
|
|
|
#endif |
|
798
|
|
|
|
|
|
|
#endif |
|
799
|
|
|
|
|
|
|
|
|
800
|
41
|
50
|
|
|
|
|
if (mode == MODE_CFB1) |
|
801
|
|
|
|
|
|
|
{ /* use recursion here to handle CFB, one block at a time */ |
|
802
|
0
|
|
|
|
|
|
cipher->mode = MODE_ECB; /* do encryption in ECB */ |
|
803
|
0
|
0
|
|
|
|
|
for (n=0;n
|
|
804
|
|
|
|
|
|
|
{ |
|
805
|
0
|
|
|
|
|
|
blockEncrypt(cipher,key,cipher->IV,BLOCK_SIZE,(BYTE *)x); |
|
806
|
0
|
|
|
|
|
|
bit0 = 0x80 >> (n & 7);/* which bit position in byte */ |
|
807
|
0
|
|
|
|
|
|
ctBit = (input[n/8] & bit0) ^ ((((BYTE *) x)[0] & 0x80) >> (n&7)); |
|
808
|
0
|
|
|
|
|
|
outBuffer[n/8] = (outBuffer[n/8] & ~ bit0) | ctBit; |
|
809
|
0
|
|
|
|
|
|
carry = ctBit >> (7 - (n&7)); |
|
810
|
0
|
0
|
|
|
|
|
for (i=BLOCK_SIZE/8-1;i>=0;i--) |
|
811
|
|
|
|
|
|
|
{ |
|
812
|
0
|
|
|
|
|
|
bit = cipher->IV[i] >> 7; /* save next "carry" from shift */ |
|
813
|
0
|
|
|
|
|
|
cipher->IV[i] = (cipher->IV[i] << 1) ^ carry; |
|
814
|
0
|
|
|
|
|
|
carry = bit; |
|
815
|
|
|
|
|
|
|
} |
|
816
|
|
|
|
|
|
|
} |
|
817
|
0
|
|
|
|
|
|
cipher->mode = MODE_CFB1; /* restore mode for next time */ |
|
818
|
0
|
|
|
|
|
|
return inputLen; |
|
819
|
|
|
|
|
|
|
} |
|
820
|
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
/* here for ECB, CBC modes */ |
|
822
|
41
|
50
|
|
|
|
|
if (key->direction != DIR_ENCRYPT) |
|
823
|
0
|
|
|
|
|
|
ReverseRoundSubkeys(key,DIR_ENCRYPT); /* reverse the round subkey order */ |
|
824
|
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
#ifdef USE_ASM |
|
826
|
|
|
|
|
|
|
if ((useAsm & 1) && (inputLen)) |
|
827
|
|
|
|
|
|
|
#ifdef COMPILE_KEY |
|
828
|
|
|
|
|
|
|
if (key->keySig == VALID_SIG) |
|
829
|
|
|
|
|
|
|
return ((CipherProc *)(key->encryptFuncPtr))(cipher,key,input,inputLen,outBuffer); |
|
830
|
|
|
|
|
|
|
#else |
|
831
|
|
|
|
|
|
|
return (*blockEncrypt_86)(cipher,key,input,inputLen,outBuffer); |
|
832
|
|
|
|
|
|
|
#endif |
|
833
|
|
|
|
|
|
|
#endif |
|
834
|
|
|
|
|
|
|
/* make local copy of subkeys for speed */ |
|
835
|
41
|
|
|
|
|
|
memcpy(sk,key->subKeys,sizeof(DWORD)*(ROUND_SUBKEYS+2*rounds)); |
|
836
|
41
|
100
|
|
|
|
|
if (mode == MODE_CBC) |
|
837
|
2
|
|
|
|
|
|
BlockCopy(IV,cipher->iv32) |
|
838
|
|
|
|
|
|
|
else |
|
839
|
39
|
|
|
|
|
|
IV[0]=IV[1]=IV[2]=IV[3]=0; |
|
840
|
|
|
|
|
|
|
|
|
841
|
84
|
100
|
|
|
|
|
for (n=0;n
|
|
842
|
|
|
|
|
|
|
{ |
|
843
|
|
|
|
|
|
|
#define LoadBlockE(N) x[N]=Bswap(((DWORD *)input)[N]) ^ sk[INPUT_WHITEN+N] ^ IV[N] |
|
844
|
43
|
|
|
|
|
|
LoadBlockE(0); LoadBlockE(1); LoadBlockE(2); LoadBlockE(3); |
|
845
|
|
|
|
|
|
|
#define EncryptRound(K,R,id) \ |
|
846
|
|
|
|
|
|
|
t0 = Fe32##id(x[K ],0); \ |
|
847
|
|
|
|
|
|
|
t1 = Fe32##id(x[K^1],3); \ |
|
848
|
|
|
|
|
|
|
x[K^3] = ROL(x[K^3],1); \ |
|
849
|
|
|
|
|
|
|
x[K^2]^= t0 + t1 + sk[ROUND_SUBKEYS+2*(R) ]; \ |
|
850
|
|
|
|
|
|
|
x[K^3]^= t0 + 2*t1 + sk[ROUND_SUBKEYS+2*(R)+1]; \ |
|
851
|
|
|
|
|
|
|
x[K^2] = ROR(x[K^2],1); |
|
852
|
|
|
|
|
|
|
#define Encrypt2(R,id) { EncryptRound(0,R+1,id); EncryptRound(2,R,id); } |
|
853
|
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
#if defined(ZERO_KEY) |
|
855
|
|
|
|
|
|
|
switch (key->keyLen) |
|
856
|
|
|
|
|
|
|
{ |
|
857
|
|
|
|
|
|
|
case 128: |
|
858
|
|
|
|
|
|
|
for (i=rounds-2;i>=0;i-=2) |
|
859
|
|
|
|
|
|
|
Encrypt2(i,_128); |
|
860
|
|
|
|
|
|
|
break; |
|
861
|
|
|
|
|
|
|
case 192: |
|
862
|
|
|
|
|
|
|
for (i=rounds-2;i>=0;i-=2) |
|
863
|
|
|
|
|
|
|
Encrypt2(i,_192); |
|
864
|
|
|
|
|
|
|
break; |
|
865
|
|
|
|
|
|
|
case 256: |
|
866
|
|
|
|
|
|
|
for (i=rounds-2;i>=0;i-=2) |
|
867
|
|
|
|
|
|
|
Encrypt2(i,_256); |
|
868
|
|
|
|
|
|
|
break; |
|
869
|
|
|
|
|
|
|
} |
|
870
|
|
|
|
|
|
|
#else |
|
871
|
43
|
|
|
|
|
|
Encrypt2(14,_); |
|
872
|
43
|
|
|
|
|
|
Encrypt2(12,_); |
|
873
|
43
|
|
|
|
|
|
Encrypt2(10,_); |
|
874
|
43
|
|
|
|
|
|
Encrypt2( 8,_); |
|
875
|
43
|
|
|
|
|
|
Encrypt2( 6,_); |
|
876
|
43
|
|
|
|
|
|
Encrypt2( 4,_); |
|
877
|
43
|
|
|
|
|
|
Encrypt2( 2,_); |
|
878
|
43
|
|
|
|
|
|
Encrypt2( 0,_); |
|
879
|
|
|
|
|
|
|
#endif |
|
880
|
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
/* need to do (or undo, depending on your point of view) final swap */ |
|
882
|
|
|
|
|
|
|
#if LittleEndian |
|
883
|
|
|
|
|
|
|
#define StoreBlockE(N) ((DWORD *)outBuffer)[N]=x[N^2] ^ sk[OUTPUT_WHITEN+N] |
|
884
|
|
|
|
|
|
|
#else |
|
885
|
|
|
|
|
|
|
#define StoreBlockE(N) { t0=x[N^2] ^ sk[OUTPUT_WHITEN+N]; ((DWORD *)outBuffer)[N]=Bswap(t0); } |
|
886
|
|
|
|
|
|
|
#endif |
|
887
|
43
|
|
|
|
|
|
StoreBlockE(0); StoreBlockE(1); StoreBlockE(2); StoreBlockE(3); |
|
888
|
43
|
100
|
|
|
|
|
if (mode == MODE_CBC) |
|
889
|
|
|
|
|
|
|
{ |
|
890
|
4
|
|
|
|
|
|
IV[0]=Bswap(((DWORD *)outBuffer)[0]); |
|
891
|
4
|
|
|
|
|
|
IV[1]=Bswap(((DWORD *)outBuffer)[1]); |
|
892
|
4
|
|
|
|
|
|
IV[2]=Bswap(((DWORD *)outBuffer)[2]); |
|
893
|
4
|
|
|
|
|
|
IV[3]=Bswap(((DWORD *)outBuffer)[3]); |
|
894
|
|
|
|
|
|
|
} |
|
895
|
|
|
|
|
|
|
} |
|
896
|
|
|
|
|
|
|
|
|
897
|
41
|
100
|
|
|
|
|
if (mode == MODE_CBC) |
|
898
|
2
|
|
|
|
|
|
BlockCopy(cipher->iv32,IV); |
|
899
|
|
|
|
|
|
|
|
|
900
|
41
|
|
|
|
|
|
return inputLen; |
|
901
|
|
|
|
|
|
|
} |
|
902
|
|
|
|
|
|
|
|
|
903
|
|
|
|
|
|
|
/* |
|
904
|
|
|
|
|
|
|
+***************************************************************************** |
|
905
|
|
|
|
|
|
|
* |
|
906
|
|
|
|
|
|
|
* Function Name: blockDecrypt |
|
907
|
|
|
|
|
|
|
* |
|
908
|
|
|
|
|
|
|
* Function: Decrypt block(s) of data using Twofish |
|
909
|
|
|
|
|
|
|
* |
|
910
|
|
|
|
|
|
|
* Arguments: cipher = ptr to already initialized cipherInstance |
|
911
|
|
|
|
|
|
|
* key = ptr to already initialized keyInstance |
|
912
|
|
|
|
|
|
|
* input = ptr to data blocks to be decrypted |
|
913
|
|
|
|
|
|
|
* inputLen = # bits to encrypt (multiple of blockSize) |
|
914
|
|
|
|
|
|
|
* outBuffer = ptr to where to put decrypted blocks |
|
915
|
|
|
|
|
|
|
* |
|
916
|
|
|
|
|
|
|
* Return: # bits ciphered (>= 0) |
|
917
|
|
|
|
|
|
|
* else error code (e.g., BAD_CIPHER_STATE, BAD_KEY_MATERIAL) |
|
918
|
|
|
|
|
|
|
* |
|
919
|
|
|
|
|
|
|
* Notes: The only supported block size for ECB/CBC modes is BLOCK_SIZE bits. |
|
920
|
|
|
|
|
|
|
* If inputLen is not a multiple of BLOCK_SIZE bits in those modes, |
|
921
|
|
|
|
|
|
|
* an error BAD_INPUT_LEN is returned. In CFB1 mode, all block |
|
922
|
|
|
|
|
|
|
* sizes can be supported. |
|
923
|
|
|
|
|
|
|
* |
|
924
|
|
|
|
|
|
|
-****************************************************************************/ |
|
925
|
41
|
|
|
|
|
|
static int blockDecrypt(cipherInstance *cipher, keyInstance *key,CONST BYTE *input, |
|
926
|
|
|
|
|
|
|
int inputLen, BYTE *outBuffer) |
|
927
|
|
|
|
|
|
|
{ |
|
928
|
|
|
|
|
|
|
int i,n; /* loop counters */ |
|
929
|
|
|
|
|
|
|
DWORD x[BLOCK_SIZE/32]; /* block being encrypted */ |
|
930
|
|
|
|
|
|
|
DWORD t0,t1; /* temp variables */ |
|
931
|
41
|
|
|
|
|
|
int rounds=key->numRounds; /* number of rounds */ |
|
932
|
|
|
|
|
|
|
BYTE bit,bit0,ctBit,carry; /* temps for CFB */ |
|
933
|
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
/* make local copies of things for faster access */ |
|
935
|
41
|
|
|
|
|
|
int mode = cipher->mode; |
|
936
|
|
|
|
|
|
|
DWORD sk[TOTAL_SUBKEYS]; |
|
937
|
|
|
|
|
|
|
DWORD IV[BLOCK_SIZE/32]; |
|
938
|
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
GetSboxKey; |
|
940
|
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
#if VALIDATE_PARMS |
|
942
|
|
|
|
|
|
|
if ((cipher == NULL) || (cipher->cipherSig != VALID_SIG)) |
|
943
|
|
|
|
|
|
|
return BAD_CIPHER_STATE; |
|
944
|
|
|
|
|
|
|
if ((key == NULL) || (key->keySig != VALID_SIG)) |
|
945
|
|
|
|
|
|
|
return BAD_KEY_INSTANCE; |
|
946
|
|
|
|
|
|
|
if ((rounds < 2) || (rounds > MAX_ROUNDS) || (rounds&1)) |
|
947
|
|
|
|
|
|
|
return BAD_KEY_INSTANCE; |
|
948
|
|
|
|
|
|
|
if ((cipher->mode != MODE_CFB1) && (inputLen % BLOCK_SIZE)) |
|
949
|
|
|
|
|
|
|
return BAD_INPUT_LEN; |
|
950
|
|
|
|
|
|
|
#if ALIGN32 |
|
951
|
|
|
|
|
|
|
if ( (((int)cipher) & 3) || (((int)key ) & 3) || |
|
952
|
|
|
|
|
|
|
(((int)input) & 3) || (((int)outBuffer) & 3)) |
|
953
|
|
|
|
|
|
|
return BAD_ALIGN32; |
|
954
|
|
|
|
|
|
|
#endif |
|
955
|
|
|
|
|
|
|
#endif |
|
956
|
|
|
|
|
|
|
|
|
957
|
41
|
50
|
|
|
|
|
if (cipher->mode == MODE_CFB1) |
|
958
|
|
|
|
|
|
|
{ /* use blockEncrypt here to handle CFB, one block at a time */ |
|
959
|
0
|
|
|
|
|
|
cipher->mode = MODE_ECB; /* do encryption in ECB */ |
|
960
|
0
|
0
|
|
|
|
|
for (n=0;n
|
|
961
|
|
|
|
|
|
|
{ |
|
962
|
0
|
|
|
|
|
|
blockEncrypt(cipher,key,cipher->IV,BLOCK_SIZE,(BYTE *)x); |
|
963
|
0
|
|
|
|
|
|
bit0 = 0x80 >> (n & 7); |
|
964
|
0
|
|
|
|
|
|
ctBit = input[n/8] & bit0; |
|
965
|
0
|
|
|
|
|
|
outBuffer[n/8] = (outBuffer[n/8] & ~ bit0) | |
|
966
|
0
|
|
|
|
|
|
(ctBit ^ ((((BYTE *) x)[0] & 0x80) >> (n&7))); |
|
967
|
0
|
|
|
|
|
|
carry = ctBit >> (7 - (n&7)); |
|
968
|
0
|
0
|
|
|
|
|
for (i=BLOCK_SIZE/8-1;i>=0;i--) |
|
969
|
|
|
|
|
|
|
{ |
|
970
|
0
|
|
|
|
|
|
bit = cipher->IV[i] >> 7; /* save next "carry" from shift */ |
|
971
|
0
|
|
|
|
|
|
cipher->IV[i] = (cipher->IV[i] << 1) ^ carry; |
|
972
|
0
|
|
|
|
|
|
carry = bit; |
|
973
|
|
|
|
|
|
|
} |
|
974
|
|
|
|
|
|
|
} |
|
975
|
0
|
|
|
|
|
|
cipher->mode = MODE_CFB1; /* restore mode for next time */ |
|
976
|
0
|
|
|
|
|
|
return inputLen; |
|
977
|
|
|
|
|
|
|
} |
|
978
|
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
/* here for ECB, CBC modes */ |
|
980
|
41
|
100
|
|
|
|
|
if (key->direction != DIR_DECRYPT) |
|
981
|
40
|
|
|
|
|
|
ReverseRoundSubkeys(key,DIR_DECRYPT); /* reverse the round subkey order */ |
|
982
|
|
|
|
|
|
|
#ifdef USE_ASM |
|
983
|
|
|
|
|
|
|
if ((useAsm & 2) && (inputLen)) |
|
984
|
|
|
|
|
|
|
#ifdef COMPILE_KEY |
|
985
|
|
|
|
|
|
|
if (key->keySig == VALID_SIG) |
|
986
|
|
|
|
|
|
|
return ((CipherProc *)(key->decryptFuncPtr))(cipher,key,input,inputLen,outBuffer); |
|
987
|
|
|
|
|
|
|
#else |
|
988
|
|
|
|
|
|
|
return (*blockDecrypt_86)(cipher,key,input,inputLen,outBuffer); |
|
989
|
|
|
|
|
|
|
#endif |
|
990
|
|
|
|
|
|
|
#endif |
|
991
|
|
|
|
|
|
|
/* make local copy of subkeys for speed */ |
|
992
|
41
|
|
|
|
|
|
memcpy(sk,key->subKeys,sizeof(DWORD)*(ROUND_SUBKEYS+2*rounds)); |
|
993
|
41
|
100
|
|
|
|
|
if (mode == MODE_CBC) |
|
994
|
2
|
|
|
|
|
|
BlockCopy(IV,cipher->iv32) |
|
995
|
|
|
|
|
|
|
else |
|
996
|
39
|
|
|
|
|
|
IV[0]=IV[1]=IV[2]=IV[3]=0; |
|
997
|
|
|
|
|
|
|
|
|
998
|
84
|
100
|
|
|
|
|
for (n=0;n
|
|
999
|
|
|
|
|
|
|
{ |
|
1000
|
|
|
|
|
|
|
#define LoadBlockD(N) x[N^2]=Bswap(((DWORD *)input)[N]) ^ sk[OUTPUT_WHITEN+N] |
|
1001
|
43
|
|
|
|
|
|
LoadBlockD(0); LoadBlockD(1); LoadBlockD(2); LoadBlockD(3); |
|
1002
|
|
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
#define DecryptRound(K,R,id) \ |
|
1004
|
|
|
|
|
|
|
t0 = Fe32##id(x[K ],0); \ |
|
1005
|
|
|
|
|
|
|
t1 = Fe32##id(x[K^1],3); \ |
|
1006
|
|
|
|
|
|
|
x[K^2] = ROL (x[K^2],1); \ |
|
1007
|
|
|
|
|
|
|
x[K^2]^= t0 + t1 + sk[ROUND_SUBKEYS+2*(R) ]; \ |
|
1008
|
|
|
|
|
|
|
x[K^3]^= t0 + 2*t1 + sk[ROUND_SUBKEYS+2*(R)+1]; \ |
|
1009
|
|
|
|
|
|
|
x[K^3] = ROR (x[K^3],1); |
|
1010
|
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
#define Decrypt2(R,id) { DecryptRound(2,R+1,id); DecryptRound(0,R,id); } |
|
1012
|
|
|
|
|
|
|
|
|
1013
|
|
|
|
|
|
|
#if defined(ZERO_KEY) |
|
1014
|
|
|
|
|
|
|
switch (key->keyLen) |
|
1015
|
|
|
|
|
|
|
{ |
|
1016
|
|
|
|
|
|
|
case 128: |
|
1017
|
|
|
|
|
|
|
for (i=rounds-2;i>=0;i-=2) |
|
1018
|
|
|
|
|
|
|
Decrypt2(i,_128); |
|
1019
|
|
|
|
|
|
|
break; |
|
1020
|
|
|
|
|
|
|
case 192: |
|
1021
|
|
|
|
|
|
|
for (i=rounds-2;i>=0;i-=2) |
|
1022
|
|
|
|
|
|
|
Decrypt2(i,_192); |
|
1023
|
|
|
|
|
|
|
break; |
|
1024
|
|
|
|
|
|
|
case 256: |
|
1025
|
|
|
|
|
|
|
for (i=rounds-2;i>=0;i-=2) |
|
1026
|
|
|
|
|
|
|
Decrypt2(i,_256); |
|
1027
|
|
|
|
|
|
|
break; |
|
1028
|
|
|
|
|
|
|
} |
|
1029
|
|
|
|
|
|
|
#else |
|
1030
|
|
|
|
|
|
|
{ |
|
1031
|
43
|
|
|
|
|
|
Decrypt2(14,_); |
|
1032
|
43
|
|
|
|
|
|
Decrypt2(12,_); |
|
1033
|
43
|
|
|
|
|
|
Decrypt2(10,_); |
|
1034
|
43
|
|
|
|
|
|
Decrypt2( 8,_); |
|
1035
|
43
|
|
|
|
|
|
Decrypt2( 6,_); |
|
1036
|
43
|
|
|
|
|
|
Decrypt2( 4,_); |
|
1037
|
43
|
|
|
|
|
|
Decrypt2( 2,_); |
|
1038
|
43
|
|
|
|
|
|
Decrypt2( 0,_); |
|
1039
|
|
|
|
|
|
|
} |
|
1040
|
|
|
|
|
|
|
#endif |
|
1041
|
43
|
100
|
|
|
|
|
if (cipher->mode == MODE_ECB) |
|
1042
|
|
|
|
|
|
|
{ |
|
1043
|
|
|
|
|
|
|
#if LittleEndian |
|
1044
|
|
|
|
|
|
|
#define StoreBlockD(N) ((DWORD *)outBuffer)[N] = x[N] ^ sk[INPUT_WHITEN+N] |
|
1045
|
|
|
|
|
|
|
#else |
|
1046
|
|
|
|
|
|
|
#define StoreBlockD(N) { t0=x[N]^sk[INPUT_WHITEN+N]; ((DWORD *)outBuffer)[N] = Bswap(t0); } |
|
1047
|
|
|
|
|
|
|
#endif |
|
1048
|
39
|
|
|
|
|
|
StoreBlockD(0); StoreBlockD(1); StoreBlockD(2); StoreBlockD(3); |
|
1049
|
|
|
|
|
|
|
#undef StoreBlockD |
|
1050
|
39
|
|
|
|
|
|
continue; |
|
1051
|
|
|
|
|
|
|
} |
|
1052
|
|
|
|
|
|
|
else |
|
1053
|
|
|
|
|
|
|
{ |
|
1054
|
|
|
|
|
|
|
#define StoreBlockD(N) x[N] ^= sk[INPUT_WHITEN+N] ^ IV[N]; \ |
|
1055
|
|
|
|
|
|
|
IV[N] = Bswap(((DWORD *)input)[N]); \ |
|
1056
|
|
|
|
|
|
|
((DWORD *)outBuffer)[N] = Bswap(x[N]); |
|
1057
|
4
|
|
|
|
|
|
StoreBlockD(0); StoreBlockD(1); StoreBlockD(2); StoreBlockD(3); |
|
1058
|
|
|
|
|
|
|
#undef StoreBlockD |
|
1059
|
|
|
|
|
|
|
} |
|
1060
|
|
|
|
|
|
|
} |
|
1061
|
41
|
100
|
|
|
|
|
if (mode == MODE_CBC) /* restore iv32 to cipher */ |
|
1062
|
2
|
|
|
|
|
|
BlockCopy(cipher->iv32,IV) |
|
1063
|
|
|
|
|
|
|
|
|
1064
|
41
|
|
|
|
|
|
return inputLen; |
|
1065
|
|
|
|
|
|
|
} |
|
1066
|
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
#ifdef GetCodeSize |
|
1068
|
|
|
|
|
|
|
static DWORD TwofishCodeSize(void) |
|
1069
|
|
|
|
|
|
|
{ |
|
1070
|
|
|
|
|
|
|
DWORD x= Here(0); |
|
1071
|
|
|
|
|
|
|
#ifdef USE_ASM |
|
1072
|
|
|
|
|
|
|
if (useAsm & 3) |
|
1073
|
|
|
|
|
|
|
return TwofishAsmCodeSize(); |
|
1074
|
|
|
|
|
|
|
#endif |
|
1075
|
|
|
|
|
|
|
return x - TwofishCodeStart(); |
|
1076
|
|
|
|
|
|
|
}; |
|
1077
|
|
|
|
|
|
|
#endif |