| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
/* |
|
2
|
|
|
|
|
|
|
* Copyright (c) 2016 Thomas Pornin |
|
3
|
|
|
|
|
|
|
* |
|
4
|
|
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining |
|
5
|
|
|
|
|
|
|
* a copy of this software and associated documentation files (the |
|
6
|
|
|
|
|
|
|
* "Software"), to deal in the Software without restriction, including |
|
7
|
|
|
|
|
|
|
* without limitation the rights to use, copy, modify, merge, publish, |
|
8
|
|
|
|
|
|
|
* distribute, sublicense, and/or sell copies of the Software, and to |
|
9
|
|
|
|
|
|
|
* permit persons to whom the Software is furnished to do so, subject to |
|
10
|
|
|
|
|
|
|
* the following conditions: |
|
11
|
|
|
|
|
|
|
* |
|
12
|
|
|
|
|
|
|
* The above copyright notice and this permission notice shall be |
|
13
|
|
|
|
|
|
|
* included in all copies or substantial portions of the Software. |
|
14
|
|
|
|
|
|
|
* |
|
15
|
|
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|
16
|
|
|
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|
17
|
|
|
|
|
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
|
18
|
|
|
|
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
|
19
|
|
|
|
|
|
|
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
|
20
|
|
|
|
|
|
|
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
|
21
|
|
|
|
|
|
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|
22
|
|
|
|
|
|
|
* SOFTWARE. |
|
23
|
|
|
|
|
|
|
*/ |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
#include "inner.h" |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
/* |
|
28
|
|
|
|
|
|
|
* During key schedule, we need to apply bit extraction PC-2 then permute |
|
29
|
|
|
|
|
|
|
* things into our bitslice representation. PC-2 extracts 48 bits out |
|
30
|
|
|
|
|
|
|
* of two 28-bit words (kl and kr), and we store these bits into two |
|
31
|
|
|
|
|
|
|
* 32-bit words sk0 and sk1. |
|
32
|
|
|
|
|
|
|
* |
|
33
|
|
|
|
|
|
|
* -- bit 16+x of sk0 comes from bit QL0[x] of kl |
|
34
|
|
|
|
|
|
|
* -- bit x of sk0 comes from bit QR0[x] of kr |
|
35
|
|
|
|
|
|
|
* -- bit 16+x of sk1 comes from bit QL1[x] of kl |
|
36
|
|
|
|
|
|
|
* -- bit x of sk1 comes from bit QR1[x] of kr |
|
37
|
|
|
|
|
|
|
*/ |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
static const unsigned char QL0[] = { |
|
40
|
|
|
|
|
|
|
17, 4, 27, 23, 13, 22, 7, 18, |
|
41
|
|
|
|
|
|
|
16, 24, 2, 20, 1, 8, 15, 26 |
|
42
|
|
|
|
|
|
|
}; |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
static const unsigned char QR0[] = { |
|
45
|
|
|
|
|
|
|
25, 19, 9, 1, 5, 11, 23, 8, |
|
46
|
|
|
|
|
|
|
17, 0, 22, 3, 6, 20, 27, 24 |
|
47
|
|
|
|
|
|
|
}; |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
static const unsigned char QL1[] = { |
|
50
|
|
|
|
|
|
|
28, 28, 14, 11, 28, 28, 25, 0, |
|
51
|
|
|
|
|
|
|
28, 28, 5, 9, 28, 28, 12, 21 |
|
52
|
|
|
|
|
|
|
}; |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
static const unsigned char QR1[] = { |
|
55
|
|
|
|
|
|
|
28, 28, 15, 4, 28, 28, 26, 16, |
|
56
|
|
|
|
|
|
|
28, 28, 12, 7, 28, 28, 10, 14 |
|
57
|
|
|
|
|
|
|
}; |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
/* |
|
60
|
|
|
|
|
|
|
* 32-bit rotation. The C compiler is supposed to recognize it as a |
|
61
|
|
|
|
|
|
|
* rotation and use the local architecture rotation opcode (if available). |
|
62
|
|
|
|
|
|
|
*/ |
|
63
|
|
|
|
|
|
|
static inline uint32_t |
|
64
|
0
|
|
|
|
|
|
rotl(uint32_t x, int n) |
|
65
|
|
|
|
|
|
|
{ |
|
66
|
0
|
|
|
|
|
|
return (x << n) | (x >> (32 - n)); |
|
67
|
|
|
|
|
|
|
} |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
/* |
|
70
|
|
|
|
|
|
|
* Compute key schedule for 8 key bytes (produces 32 subkey words). |
|
71
|
|
|
|
|
|
|
*/ |
|
72
|
|
|
|
|
|
|
static void |
|
73
|
0
|
|
|
|
|
|
keysched_unit(uint32_t *skey, const void *key) |
|
74
|
|
|
|
|
|
|
{ |
|
75
|
|
|
|
|
|
|
int i; |
|
76
|
|
|
|
|
|
|
|
|
77
|
0
|
|
|
|
|
|
br_des_keysched_unit(skey, key); |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
/* |
|
80
|
|
|
|
|
|
|
* Apply PC-2 + bitslicing. |
|
81
|
|
|
|
|
|
|
*/ |
|
82
|
0
|
0
|
|
|
|
|
for (i = 0; i < 16; i ++) { |
|
83
|
|
|
|
|
|
|
uint32_t kl, kr, sk0, sk1; |
|
84
|
|
|
|
|
|
|
int j; |
|
85
|
|
|
|
|
|
|
|
|
86
|
0
|
|
|
|
|
|
kl = skey[(i << 1) + 0]; |
|
87
|
0
|
|
|
|
|
|
kr = skey[(i << 1) + 1]; |
|
88
|
0
|
|
|
|
|
|
sk0 = 0; |
|
89
|
0
|
|
|
|
|
|
sk1 = 0; |
|
90
|
0
|
0
|
|
|
|
|
for (j = 0; j < 16; j ++) { |
|
91
|
0
|
|
|
|
|
|
sk0 <<= 1; |
|
92
|
0
|
|
|
|
|
|
sk1 <<= 1; |
|
93
|
0
|
|
|
|
|
|
sk0 |= ((kl >> QL0[j]) & (uint32_t)1) << 16; |
|
94
|
0
|
|
|
|
|
|
sk0 |= (kr >> QR0[j]) & (uint32_t)1; |
|
95
|
0
|
|
|
|
|
|
sk1 |= ((kl >> QL1[j]) & (uint32_t)1) << 16; |
|
96
|
0
|
|
|
|
|
|
sk1 |= (kr >> QR1[j]) & (uint32_t)1; |
|
97
|
|
|
|
|
|
|
} |
|
98
|
|
|
|
|
|
|
|
|
99
|
0
|
|
|
|
|
|
skey[(i << 1) + 0] = sk0; |
|
100
|
0
|
|
|
|
|
|
skey[(i << 1) + 1] = sk1; |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
#if 0 |
|
104
|
|
|
|
|
|
|
/* |
|
105
|
|
|
|
|
|
|
* Speed-optimized version for PC-2 + bitslicing. |
|
106
|
|
|
|
|
|
|
* (Unused. Kept for reference only.) |
|
107
|
|
|
|
|
|
|
*/ |
|
108
|
|
|
|
|
|
|
sk0 = kl & (uint32_t)0x00100000; |
|
109
|
|
|
|
|
|
|
sk0 |= (kl & (uint32_t)0x08008000) << 2; |
|
110
|
|
|
|
|
|
|
sk0 |= (kl & (uint32_t)0x00400000) << 4; |
|
111
|
|
|
|
|
|
|
sk0 |= (kl & (uint32_t)0x00800000) << 5; |
|
112
|
|
|
|
|
|
|
sk0 |= (kl & (uint32_t)0x00040000) << 6; |
|
113
|
|
|
|
|
|
|
sk0 |= (kl & (uint32_t)0x00010000) << 7; |
|
114
|
|
|
|
|
|
|
sk0 |= (kl & (uint32_t)0x00000100) << 10; |
|
115
|
|
|
|
|
|
|
sk0 |= (kl & (uint32_t)0x00022000) << 14; |
|
116
|
|
|
|
|
|
|
sk0 |= (kl & (uint32_t)0x00000082) << 18; |
|
117
|
|
|
|
|
|
|
sk0 |= (kl & (uint32_t)0x00000004) << 19; |
|
118
|
|
|
|
|
|
|
sk0 |= (kl & (uint32_t)0x04000000) >> 10; |
|
119
|
|
|
|
|
|
|
sk0 |= (kl & (uint32_t)0x00000010) << 26; |
|
120
|
|
|
|
|
|
|
sk0 |= (kl & (uint32_t)0x01000000) >> 2; |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sk0 |= kr & (uint32_t)0x00000100; |
|
123
|
|
|
|
|
|
|
sk0 |= (kr & (uint32_t)0x00000008) << 1; |
|
124
|
|
|
|
|
|
|
sk0 |= (kr & (uint32_t)0x00000200) << 4; |
|
125
|
|
|
|
|
|
|
sk0 |= rotl(kr & (uint32_t)0x08000021, 6); |
|
126
|
|
|
|
|
|
|
sk0 |= (kr & (uint32_t)0x01000000) >> 24; |
|
127
|
|
|
|
|
|
|
sk0 |= (kr & (uint32_t)0x00000002) << 11; |
|
128
|
|
|
|
|
|
|
sk0 |= (kr & (uint32_t)0x00100000) >> 18; |
|
129
|
|
|
|
|
|
|
sk0 |= (kr & (uint32_t)0x00400000) >> 17; |
|
130
|
|
|
|
|
|
|
sk0 |= (kr & (uint32_t)0x00800000) >> 14; |
|
131
|
|
|
|
|
|
|
sk0 |= (kr & (uint32_t)0x02020000) >> 10; |
|
132
|
|
|
|
|
|
|
sk0 |= (kr & (uint32_t)0x00080000) >> 5; |
|
133
|
|
|
|
|
|
|
sk0 |= (kr & (uint32_t)0x00000040) >> 3; |
|
134
|
|
|
|
|
|
|
sk0 |= (kr & (uint32_t)0x00000800) >> 1; |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sk1 = kl & (uint32_t)0x02000000; |
|
137
|
|
|
|
|
|
|
sk1 |= (kl & (uint32_t)0x00001000) << 5; |
|
138
|
|
|
|
|
|
|
sk1 |= (kl & (uint32_t)0x00000200) << 11; |
|
139
|
|
|
|
|
|
|
sk1 |= (kl & (uint32_t)0x00004000) << 15; |
|
140
|
|
|
|
|
|
|
sk1 |= (kl & (uint32_t)0x00000020) << 16; |
|
141
|
|
|
|
|
|
|
sk1 |= (kl & (uint32_t)0x00000800) << 17; |
|
142
|
|
|
|
|
|
|
sk1 |= (kl & (uint32_t)0x00000001) << 24; |
|
143
|
|
|
|
|
|
|
sk1 |= (kl & (uint32_t)0x00200000) >> 5; |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
sk1 |= (kr & (uint32_t)0x00000010) << 8; |
|
146
|
|
|
|
|
|
|
sk1 |= (kr & (uint32_t)0x04000000) >> 17; |
|
147
|
|
|
|
|
|
|
sk1 |= (kr & (uint32_t)0x00004000) >> 14; |
|
148
|
|
|
|
|
|
|
sk1 |= (kr & (uint32_t)0x00000400) >> 9; |
|
149
|
|
|
|
|
|
|
sk1 |= (kr & (uint32_t)0x00010000) >> 8; |
|
150
|
|
|
|
|
|
|
sk1 |= (kr & (uint32_t)0x00001000) >> 7; |
|
151
|
|
|
|
|
|
|
sk1 |= (kr & (uint32_t)0x00000080) >> 3; |
|
152
|
|
|
|
|
|
|
sk1 |= (kr & (uint32_t)0x00008000) >> 2; |
|
153
|
|
|
|
|
|
|
#endif |
|
154
|
0
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
/* see inner.h */ |
|
157
|
|
|
|
|
|
|
unsigned |
|
158
|
0
|
|
|
|
|
|
br_des_ct_keysched(uint32_t *skey, const void *key, size_t key_len) |
|
159
|
|
|
|
|
|
|
{ |
|
160
|
0
|
|
|
|
|
|
switch (key_len) { |
|
161
|
0
|
|
|
|
|
|
case 8: |
|
162
|
0
|
|
|
|
|
|
keysched_unit(skey, key); |
|
163
|
0
|
|
|
|
|
|
return 1; |
|
164
|
0
|
|
|
|
|
|
case 16: |
|
165
|
0
|
|
|
|
|
|
keysched_unit(skey, key); |
|
166
|
0
|
|
|
|
|
|
keysched_unit(skey + 32, (const unsigned char *)key + 8); |
|
167
|
0
|
|
|
|
|
|
br_des_rev_skey(skey + 32); |
|
168
|
0
|
|
|
|
|
|
memcpy(skey + 64, skey, 32 * sizeof *skey); |
|
169
|
0
|
|
|
|
|
|
return 3; |
|
170
|
0
|
|
|
|
|
|
default: |
|
171
|
0
|
|
|
|
|
|
keysched_unit(skey, key); |
|
172
|
0
|
|
|
|
|
|
keysched_unit(skey + 32, (const unsigned char *)key + 8); |
|
173
|
0
|
|
|
|
|
|
br_des_rev_skey(skey + 32); |
|
174
|
0
|
|
|
|
|
|
keysched_unit(skey + 64, (const unsigned char *)key + 16); |
|
175
|
0
|
|
|
|
|
|
return 3; |
|
176
|
|
|
|
|
|
|
} |
|
177
|
|
|
|
|
|
|
} |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
/* |
|
180
|
|
|
|
|
|
|
* DES confusion function. This function performs expansion E (32 to |
|
181
|
|
|
|
|
|
|
* 48 bits), XOR with subkey, S-boxes, and permutation P. |
|
182
|
|
|
|
|
|
|
*/ |
|
183
|
|
|
|
|
|
|
static inline uint32_t |
|
184
|
0
|
|
|
|
|
|
Fconf(uint32_t r0, const uint32_t *sk) |
|
185
|
|
|
|
|
|
|
{ |
|
186
|
|
|
|
|
|
|
/* |
|
187
|
|
|
|
|
|
|
* Each 6->4 S-box is virtually turned into four 6->1 boxes; we |
|
188
|
|
|
|
|
|
|
* thus end up with 32 boxes that we call "T-boxes" here. We will |
|
189
|
|
|
|
|
|
|
* evaluate them with bitslice code. |
|
190
|
|
|
|
|
|
|
* |
|
191
|
|
|
|
|
|
|
* Each T-box is a circuit of multiplexers (sort of) and thus |
|
192
|
|
|
|
|
|
|
* takes 70 inputs: the 6 actual T-box inputs, and 64 constants |
|
193
|
|
|
|
|
|
|
* that describe the T-box output for all combinations of the |
|
194
|
|
|
|
|
|
|
* 6 inputs. With this model, all T-boxes are identical (with |
|
195
|
|
|
|
|
|
|
* distinct inputs) and thus can be executed in parallel with |
|
196
|
|
|
|
|
|
|
* bitslice code. |
|
197
|
|
|
|
|
|
|
* |
|
198
|
|
|
|
|
|
|
* T-boxes are numbered from 0 to 31, in least-to-most |
|
199
|
|
|
|
|
|
|
* significant order. Thus, S-box S1 corresponds to T-boxes 31, |
|
200
|
|
|
|
|
|
|
* 30, 29 and 28, in that order. T-box 'n' is computed with the |
|
201
|
|
|
|
|
|
|
* bits at rank 'n' in the 32-bit words. |
|
202
|
|
|
|
|
|
|
* |
|
203
|
|
|
|
|
|
|
* Words x0 to x5 contain the T-box inputs 0 to 5. |
|
204
|
|
|
|
|
|
|
*/ |
|
205
|
|
|
|
|
|
|
uint32_t x0, x1, x2, x3, x4, x5, z0; |
|
206
|
|
|
|
|
|
|
uint32_t y0, y1, y2, y3, y4, y5, y6, y7, y8, y9; |
|
207
|
|
|
|
|
|
|
uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19; |
|
208
|
|
|
|
|
|
|
uint32_t y20, y21, y22, y23, y24, y25, y26, y27, y28, y29; |
|
209
|
|
|
|
|
|
|
uint32_t y30; |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
/* |
|
212
|
|
|
|
|
|
|
* Spread input bits over the 6 input words x*. |
|
213
|
|
|
|
|
|
|
*/ |
|
214
|
0
|
|
|
|
|
|
x1 = r0 & (uint32_t)0x11111111; |
|
215
|
0
|
|
|
|
|
|
x2 = (r0 >> 1) & (uint32_t)0x11111111; |
|
216
|
0
|
|
|
|
|
|
x3 = (r0 >> 2) & (uint32_t)0x11111111; |
|
217
|
0
|
|
|
|
|
|
x4 = (r0 >> 3) & (uint32_t)0x11111111; |
|
218
|
0
|
|
|
|
|
|
x1 = (x1 << 4) - x1; |
|
219
|
0
|
|
|
|
|
|
x2 = (x2 << 4) - x2; |
|
220
|
0
|
|
|
|
|
|
x3 = (x3 << 4) - x3; |
|
221
|
0
|
|
|
|
|
|
x4 = (x4 << 4) - x4; |
|
222
|
0
|
|
|
|
|
|
x0 = (x4 << 4) | (x4 >> 28); |
|
223
|
0
|
|
|
|
|
|
x5 = (x1 >> 4) | (x1 << 28); |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
/* |
|
226
|
|
|
|
|
|
|
* XOR with the subkey for this round. |
|
227
|
|
|
|
|
|
|
*/ |
|
228
|
0
|
|
|
|
|
|
x0 ^= sk[0]; |
|
229
|
0
|
|
|
|
|
|
x1 ^= sk[1]; |
|
230
|
0
|
|
|
|
|
|
x2 ^= sk[2]; |
|
231
|
0
|
|
|
|
|
|
x3 ^= sk[3]; |
|
232
|
0
|
|
|
|
|
|
x4 ^= sk[4]; |
|
233
|
0
|
|
|
|
|
|
x5 ^= sk[5]; |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
/* |
|
236
|
|
|
|
|
|
|
* The T-boxes are done in parallel, since they all use a |
|
237
|
|
|
|
|
|
|
* "tree of multiplexer". We use "fake multiplexers": |
|
238
|
|
|
|
|
|
|
* |
|
239
|
|
|
|
|
|
|
* y = a ^ (x & b) |
|
240
|
|
|
|
|
|
|
* |
|
241
|
|
|
|
|
|
|
* computes y as either 'a' (if x == 0) or 'a ^ b' (if x == 1). |
|
242
|
|
|
|
|
|
|
*/ |
|
243
|
0
|
|
|
|
|
|
y0 = (uint32_t)0xEFA72C4D ^ (x0 & (uint32_t)0xEC7AC69C); |
|
244
|
0
|
|
|
|
|
|
y1 = (uint32_t)0xAEAAEDFF ^ (x0 & (uint32_t)0x500FB821); |
|
245
|
0
|
|
|
|
|
|
y2 = (uint32_t)0x37396665 ^ (x0 & (uint32_t)0x40EFA809); |
|
246
|
0
|
|
|
|
|
|
y3 = (uint32_t)0x68D7B833 ^ (x0 & (uint32_t)0xA5EC0B28); |
|
247
|
0
|
|
|
|
|
|
y4 = (uint32_t)0xC9C755BB ^ (x0 & (uint32_t)0x252CF820); |
|
248
|
0
|
|
|
|
|
|
y5 = (uint32_t)0x73FC3606 ^ (x0 & (uint32_t)0x40205801); |
|
249
|
0
|
|
|
|
|
|
y6 = (uint32_t)0xA2A0A918 ^ (x0 & (uint32_t)0xE220F929); |
|
250
|
0
|
|
|
|
|
|
y7 = (uint32_t)0x8222BD90 ^ (x0 & (uint32_t)0x44A3F9E1); |
|
251
|
0
|
|
|
|
|
|
y8 = (uint32_t)0xD6B6AC77 ^ (x0 & (uint32_t)0x794F104A); |
|
252
|
0
|
|
|
|
|
|
y9 = (uint32_t)0x3069300C ^ (x0 & (uint32_t)0x026F320B); |
|
253
|
0
|
|
|
|
|
|
y10 = (uint32_t)0x6CE0D5CC ^ (x0 & (uint32_t)0x7640B01A); |
|
254
|
0
|
|
|
|
|
|
y11 = (uint32_t)0x59A9A22D ^ (x0 & (uint32_t)0x238F1572); |
|
255
|
0
|
|
|
|
|
|
y12 = (uint32_t)0xAC6D0BD4 ^ (x0 & (uint32_t)0x7A63C083); |
|
256
|
0
|
|
|
|
|
|
y13 = (uint32_t)0x21C83200 ^ (x0 & (uint32_t)0x11CCA000); |
|
257
|
0
|
|
|
|
|
|
y14 = (uint32_t)0xA0E62188 ^ (x0 & (uint32_t)0x202F69AA); |
|
258
|
|
|
|
|
|
|
/* y15 = (uint32_t)0x00000000 ^ (x0 & (uint32_t)0x00000000); */ |
|
259
|
0
|
|
|
|
|
|
y16 = (uint32_t)0xAF7D655A ^ (x0 & (uint32_t)0x51B33BE9); |
|
260
|
0
|
|
|
|
|
|
y17 = (uint32_t)0xF0168AA3 ^ (x0 & (uint32_t)0x3B0FE8AE); |
|
261
|
0
|
|
|
|
|
|
y18 = (uint32_t)0x90AA30C6 ^ (x0 & (uint32_t)0x90BF8816); |
|
262
|
0
|
|
|
|
|
|
y19 = (uint32_t)0x5AB2750A ^ (x0 & (uint32_t)0x09E34F9B); |
|
263
|
0
|
|
|
|
|
|
y20 = (uint32_t)0x5391BE65 ^ (x0 & (uint32_t)0x0103BE88); |
|
264
|
0
|
|
|
|
|
|
y21 = (uint32_t)0x93372BAF ^ (x0 & (uint32_t)0x49AC8E25); |
|
265
|
0
|
|
|
|
|
|
y22 = (uint32_t)0xF288210C ^ (x0 & (uint32_t)0x922C313D); |
|
266
|
0
|
|
|
|
|
|
y23 = (uint32_t)0x920AF5C0 ^ (x0 & (uint32_t)0x70EF31B0); |
|
267
|
0
|
|
|
|
|
|
y24 = (uint32_t)0x63D312C0 ^ (x0 & (uint32_t)0x6A707100); |
|
268
|
0
|
|
|
|
|
|
y25 = (uint32_t)0x537B3006 ^ (x0 & (uint32_t)0xB97C9011); |
|
269
|
0
|
|
|
|
|
|
y26 = (uint32_t)0xA2EFB0A5 ^ (x0 & (uint32_t)0xA320C959); |
|
270
|
0
|
|
|
|
|
|
y27 = (uint32_t)0xBC8F96A5 ^ (x0 & (uint32_t)0x6EA0AB4A); |
|
271
|
0
|
|
|
|
|
|
y28 = (uint32_t)0xFAD176A5 ^ (x0 & (uint32_t)0x6953DDF8); |
|
272
|
0
|
|
|
|
|
|
y29 = (uint32_t)0x665A14A3 ^ (x0 & (uint32_t)0xF74F3E2B); |
|
273
|
0
|
|
|
|
|
|
y30 = (uint32_t)0xF2EFF0CC ^ (x0 & (uint32_t)0xF0306CAD); |
|
274
|
|
|
|
|
|
|
/* y31 = (uint32_t)0x00000000 ^ (x0 & (uint32_t)0x00000000); */ |
|
275
|
|
|
|
|
|
|
|
|
276
|
0
|
|
|
|
|
|
y0 = y0 ^ (x1 & y1); |
|
277
|
0
|
|
|
|
|
|
y1 = y2 ^ (x1 & y3); |
|
278
|
0
|
|
|
|
|
|
y2 = y4 ^ (x1 & y5); |
|
279
|
0
|
|
|
|
|
|
y3 = y6 ^ (x1 & y7); |
|
280
|
0
|
|
|
|
|
|
y4 = y8 ^ (x1 & y9); |
|
281
|
0
|
|
|
|
|
|
y5 = y10 ^ (x1 & y11); |
|
282
|
0
|
|
|
|
|
|
y6 = y12 ^ (x1 & y13); |
|
283
|
0
|
|
|
|
|
|
y7 = y14; /* was: y14 ^ (x1 & y15) */ |
|
284
|
0
|
|
|
|
|
|
y8 = y16 ^ (x1 & y17); |
|
285
|
0
|
|
|
|
|
|
y9 = y18 ^ (x1 & y19); |
|
286
|
0
|
|
|
|
|
|
y10 = y20 ^ (x1 & y21); |
|
287
|
0
|
|
|
|
|
|
y11 = y22 ^ (x1 & y23); |
|
288
|
0
|
|
|
|
|
|
y12 = y24 ^ (x1 & y25); |
|
289
|
0
|
|
|
|
|
|
y13 = y26 ^ (x1 & y27); |
|
290
|
0
|
|
|
|
|
|
y14 = y28 ^ (x1 & y29); |
|
291
|
0
|
|
|
|
|
|
y15 = y30; /* was: y30 ^ (x1 & y31) */ |
|
292
|
|
|
|
|
|
|
|
|
293
|
0
|
|
|
|
|
|
y0 = y0 ^ (x2 & y1); |
|
294
|
0
|
|
|
|
|
|
y1 = y2 ^ (x2 & y3); |
|
295
|
0
|
|
|
|
|
|
y2 = y4 ^ (x2 & y5); |
|
296
|
0
|
|
|
|
|
|
y3 = y6 ^ (x2 & y7); |
|
297
|
0
|
|
|
|
|
|
y4 = y8 ^ (x2 & y9); |
|
298
|
0
|
|
|
|
|
|
y5 = y10 ^ (x2 & y11); |
|
299
|
0
|
|
|
|
|
|
y6 = y12 ^ (x2 & y13); |
|
300
|
0
|
|
|
|
|
|
y7 = y14 ^ (x2 & y15); |
|
301
|
|
|
|
|
|
|
|
|
302
|
0
|
|
|
|
|
|
y0 = y0 ^ (x3 & y1); |
|
303
|
0
|
|
|
|
|
|
y1 = y2 ^ (x3 & y3); |
|
304
|
0
|
|
|
|
|
|
y2 = y4 ^ (x3 & y5); |
|
305
|
0
|
|
|
|
|
|
y3 = y6 ^ (x3 & y7); |
|
306
|
|
|
|
|
|
|
|
|
307
|
0
|
|
|
|
|
|
y0 = y0 ^ (x4 & y1); |
|
308
|
0
|
|
|
|
|
|
y1 = y2 ^ (x4 & y3); |
|
309
|
|
|
|
|
|
|
|
|
310
|
0
|
|
|
|
|
|
y0 = y0 ^ (x5 & y1); |
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
/* |
|
313
|
|
|
|
|
|
|
* The P permutation: |
|
314
|
|
|
|
|
|
|
* -- Each bit move is converted into a mask + left rotation. |
|
315
|
|
|
|
|
|
|
* -- Rotations that use the same movement are coalesced together. |
|
316
|
|
|
|
|
|
|
* -- Left and right shifts are used as alternatives to a rotation |
|
317
|
|
|
|
|
|
|
* where appropriate (this will help architectures that do not have |
|
318
|
|
|
|
|
|
|
* a rotation opcode). |
|
319
|
|
|
|
|
|
|
*/ |
|
320
|
0
|
|
|
|
|
|
z0 = (y0 & (uint32_t)0x00000004) << 3; |
|
321
|
0
|
|
|
|
|
|
z0 |= (y0 & (uint32_t)0x00004000) << 4; |
|
322
|
0
|
|
|
|
|
|
z0 |= rotl(y0 & 0x12020120, 5); |
|
323
|
0
|
|
|
|
|
|
z0 |= (y0 & (uint32_t)0x00100000) << 6; |
|
324
|
0
|
|
|
|
|
|
z0 |= (y0 & (uint32_t)0x00008000) << 9; |
|
325
|
0
|
|
|
|
|
|
z0 |= (y0 & (uint32_t)0x04000000) >> 22; |
|
326
|
0
|
|
|
|
|
|
z0 |= (y0 & (uint32_t)0x00000001) << 11; |
|
327
|
0
|
|
|
|
|
|
z0 |= rotl(y0 & 0x20000200, 12); |
|
328
|
0
|
|
|
|
|
|
z0 |= (y0 & (uint32_t)0x00200000) >> 19; |
|
329
|
0
|
|
|
|
|
|
z0 |= (y0 & (uint32_t)0x00000040) << 14; |
|
330
|
0
|
|
|
|
|
|
z0 |= (y0 & (uint32_t)0x00010000) << 15; |
|
331
|
0
|
|
|
|
|
|
z0 |= (y0 & (uint32_t)0x00000002) << 16; |
|
332
|
0
|
|
|
|
|
|
z0 |= rotl(y0 & 0x40801800, 17); |
|
333
|
0
|
|
|
|
|
|
z0 |= (y0 & (uint32_t)0x00080000) >> 13; |
|
334
|
0
|
|
|
|
|
|
z0 |= (y0 & (uint32_t)0x00000010) << 21; |
|
335
|
0
|
|
|
|
|
|
z0 |= (y0 & (uint32_t)0x01000000) >> 10; |
|
336
|
0
|
|
|
|
|
|
z0 |= rotl(y0 & 0x88000008, 24); |
|
337
|
0
|
|
|
|
|
|
z0 |= (y0 & (uint32_t)0x00000480) >> 7; |
|
338
|
0
|
|
|
|
|
|
z0 |= (y0 & (uint32_t)0x00442000) >> 6; |
|
339
|
0
|
|
|
|
|
|
return z0; |
|
340
|
|
|
|
|
|
|
} |
|
341
|
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
/* |
|
343
|
|
|
|
|
|
|
* Process one block through 16 successive rounds, omitting the swap |
|
344
|
|
|
|
|
|
|
* in the final round. |
|
345
|
|
|
|
|
|
|
*/ |
|
346
|
|
|
|
|
|
|
static void |
|
347
|
0
|
|
|
|
|
|
process_block_unit(uint32_t *pl, uint32_t *pr, const uint32_t *sk_exp) |
|
348
|
|
|
|
|
|
|
{ |
|
349
|
|
|
|
|
|
|
int i; |
|
350
|
|
|
|
|
|
|
uint32_t l, r; |
|
351
|
|
|
|
|
|
|
|
|
352
|
0
|
|
|
|
|
|
l = *pl; |
|
353
|
0
|
|
|
|
|
|
r = *pr; |
|
354
|
0
|
0
|
|
|
|
|
for (i = 0; i < 16; i ++) { |
|
355
|
|
|
|
|
|
|
uint32_t t; |
|
356
|
|
|
|
|
|
|
|
|
357
|
0
|
|
|
|
|
|
t = l ^ Fconf(r, sk_exp); |
|
358
|
0
|
|
|
|
|
|
l = r; |
|
359
|
0
|
|
|
|
|
|
r = t; |
|
360
|
0
|
|
|
|
|
|
sk_exp += 6; |
|
361
|
|
|
|
|
|
|
} |
|
362
|
0
|
|
|
|
|
|
*pl = r; |
|
363
|
0
|
|
|
|
|
|
*pr = l; |
|
364
|
0
|
|
|
|
|
|
} |
|
365
|
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
/* see inner.h */ |
|
367
|
|
|
|
|
|
|
void |
|
368
|
0
|
|
|
|
|
|
br_des_ct_process_block(unsigned num_rounds, |
|
369
|
|
|
|
|
|
|
const uint32_t *sk_exp, void *block) |
|
370
|
|
|
|
|
|
|
{ |
|
371
|
|
|
|
|
|
|
unsigned char *buf; |
|
372
|
|
|
|
|
|
|
uint32_t l, r; |
|
373
|
|
|
|
|
|
|
|
|
374
|
0
|
|
|
|
|
|
buf = block; |
|
375
|
0
|
|
|
|
|
|
l = br_dec32be(buf); |
|
376
|
0
|
|
|
|
|
|
r = br_dec32be(buf + 4); |
|
377
|
0
|
|
|
|
|
|
br_des_do_IP(&l, &r); |
|
378
|
0
|
0
|
|
|
|
|
while (num_rounds -- > 0) { |
|
379
|
0
|
|
|
|
|
|
process_block_unit(&l, &r, sk_exp); |
|
380
|
0
|
|
|
|
|
|
sk_exp += 96; |
|
381
|
|
|
|
|
|
|
} |
|
382
|
0
|
|
|
|
|
|
br_des_do_invIP(&l, &r); |
|
383
|
0
|
|
|
|
|
|
br_enc32be(buf, l); |
|
384
|
0
|
|
|
|
|
|
br_enc32be(buf + 4, r); |
|
385
|
0
|
|
|
|
|
|
} |
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
/* see inner.h */ |
|
388
|
|
|
|
|
|
|
void |
|
389
|
0
|
|
|
|
|
|
br_des_ct_skey_expand(uint32_t *sk_exp, |
|
390
|
|
|
|
|
|
|
unsigned num_rounds, const uint32_t *skey) |
|
391
|
|
|
|
|
|
|
{ |
|
392
|
0
|
|
|
|
|
|
num_rounds <<= 4; |
|
393
|
0
|
0
|
|
|
|
|
while (num_rounds -- > 0) { |
|
394
|
|
|
|
|
|
|
uint32_t v, w0, w1, w2, w3; |
|
395
|
|
|
|
|
|
|
|
|
396
|
0
|
|
|
|
|
|
v = *skey ++; |
|
397
|
0
|
|
|
|
|
|
w0 = v & 0x11111111; |
|
398
|
0
|
|
|
|
|
|
w1 = (v >> 1) & 0x11111111; |
|
399
|
0
|
|
|
|
|
|
w2 = (v >> 2) & 0x11111111; |
|
400
|
0
|
|
|
|
|
|
w3 = (v >> 3) & 0x11111111; |
|
401
|
0
|
|
|
|
|
|
*sk_exp ++ = (w0 << 4) - w0; |
|
402
|
0
|
|
|
|
|
|
*sk_exp ++ = (w1 << 4) - w1; |
|
403
|
0
|
|
|
|
|
|
*sk_exp ++ = (w2 << 4) - w2; |
|
404
|
0
|
|
|
|
|
|
*sk_exp ++ = (w3 << 4) - w3; |
|
405
|
0
|
|
|
|
|
|
v = *skey ++; |
|
406
|
0
|
|
|
|
|
|
w0 = v & 0x11111111; |
|
407
|
0
|
|
|
|
|
|
w1 = (v >> 1) & 0x11111111; |
|
408
|
0
|
|
|
|
|
|
*sk_exp ++ = (w0 << 4) - w0; |
|
409
|
0
|
|
|
|
|
|
*sk_exp ++ = (w1 << 4) - w1; |
|
410
|
|
|
|
|
|
|
} |
|
411
|
0
|
|
|
|
|
|
} |