| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
/* |
|
2
|
|
|
|
|
|
|
* Copyright (c) 2017 Thomas Pornin |
|
3
|
|
|
|
|
|
|
* |
|
4
|
|
|
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining |
|
5
|
|
|
|
|
|
|
* a copy of this software and associated documentation files (the |
|
6
|
|
|
|
|
|
|
* "Software"), to deal in the Software without restriction, including |
|
7
|
|
|
|
|
|
|
* without limitation the rights to use, copy, modify, merge, publish, |
|
8
|
|
|
|
|
|
|
* distribute, sublicense, and/or sell copies of the Software, and to |
|
9
|
|
|
|
|
|
|
* permit persons to whom the Software is furnished to do so, subject to |
|
10
|
|
|
|
|
|
|
* the following conditions: |
|
11
|
|
|
|
|
|
|
* |
|
12
|
|
|
|
|
|
|
* The above copyright notice and this permission notice shall be |
|
13
|
|
|
|
|
|
|
* included in all copies or substantial portions of the Software. |
|
14
|
|
|
|
|
|
|
* |
|
15
|
|
|
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
|
16
|
|
|
|
|
|
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
|
17
|
|
|
|
|
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
|
18
|
|
|
|
|
|
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
|
19
|
|
|
|
|
|
|
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
|
20
|
|
|
|
|
|
|
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
|
21
|
|
|
|
|
|
|
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
|
22
|
|
|
|
|
|
|
* SOFTWARE. |
|
23
|
|
|
|
|
|
|
*/ |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
#define BR_ENABLE_INTRINSICS 1 |
|
26
|
|
|
|
|
|
|
#include "inner.h" |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
/* |
|
29
|
|
|
|
|
|
|
* This code contains the AES key schedule implementation using the |
|
30
|
|
|
|
|
|
|
* AES-NI opcodes. |
|
31
|
|
|
|
|
|
|
*/ |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
#if BR_AES_X86NI |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
/* see inner.h */ |
|
36
|
|
|
|
|
|
|
int |
|
37
|
208
|
|
|
|
|
|
br_aes_x86ni_supported(void) |
|
38
|
|
|
|
|
|
|
{ |
|
39
|
|
|
|
|
|
|
/* |
|
40
|
|
|
|
|
|
|
* Bit mask for features in ECX: |
|
41
|
|
|
|
|
|
|
* 19 SSE4.1 (used for _mm_insert_epi32(), for AES-CTR) |
|
42
|
|
|
|
|
|
|
* 25 AES-NI |
|
43
|
|
|
|
|
|
|
*/ |
|
44
|
208
|
|
|
|
|
|
return br_cpuid(0, 0, 0x02080000, 0); |
|
45
|
|
|
|
|
|
|
} |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
BR_TARGETS_X86_UP |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
BR_TARGET("sse2,aes") |
|
50
|
|
|
|
|
|
|
static inline __m128i |
|
51
|
90
|
|
|
|
|
|
expand_step128(__m128i k, __m128i k2) |
|
52
|
|
|
|
|
|
|
{ |
|
53
|
90
|
|
|
|
|
|
k = _mm_xor_si128(k, _mm_slli_si128(k, 4)); |
|
54
|
90
|
|
|
|
|
|
k = _mm_xor_si128(k, _mm_slli_si128(k, 4)); |
|
55
|
90
|
|
|
|
|
|
k = _mm_xor_si128(k, _mm_slli_si128(k, 4)); |
|
56
|
90
|
|
|
|
|
|
k2 = _mm_shuffle_epi32(k2, 0xFF); |
|
57
|
90
|
|
|
|
|
|
return _mm_xor_si128(k, k2); |
|
58
|
|
|
|
|
|
|
} |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
BR_TARGET("sse2,aes") |
|
61
|
|
|
|
|
|
|
static inline void |
|
62
|
16
|
|
|
|
|
|
expand_step192(__m128i *t1, __m128i *t2, __m128i *t3) |
|
63
|
|
|
|
|
|
|
{ |
|
64
|
|
|
|
|
|
|
__m128i t4; |
|
65
|
|
|
|
|
|
|
|
|
66
|
16
|
|
|
|
|
|
*t2 = _mm_shuffle_epi32(*t2, 0x55); |
|
67
|
16
|
|
|
|
|
|
t4 = _mm_slli_si128(*t1, 0x4); |
|
68
|
16
|
|
|
|
|
|
*t1 = _mm_xor_si128(*t1, t4); |
|
69
|
16
|
|
|
|
|
|
t4 = _mm_slli_si128(t4, 0x4); |
|
70
|
16
|
|
|
|
|
|
*t1 = _mm_xor_si128(*t1, t4); |
|
71
|
16
|
|
|
|
|
|
t4 = _mm_slli_si128(t4, 0x4); |
|
72
|
16
|
|
|
|
|
|
*t1 = _mm_xor_si128(*t1, t4); |
|
73
|
16
|
|
|
|
|
|
*t1 = _mm_xor_si128(*t1, *t2); |
|
74
|
16
|
|
|
|
|
|
*t2 = _mm_shuffle_epi32(*t1, 0xFF); |
|
75
|
16
|
|
|
|
|
|
t4 = _mm_slli_si128(*t3, 0x4); |
|
76
|
16
|
|
|
|
|
|
*t3 = _mm_xor_si128(*t3, t4); |
|
77
|
16
|
|
|
|
|
|
*t3 = _mm_xor_si128(*t3, *t2); |
|
78
|
16
|
|
|
|
|
|
} |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
BR_TARGET("sse2,aes") |
|
81
|
|
|
|
|
|
|
static inline void |
|
82
|
259
|
|
|
|
|
|
expand_step256_1(__m128i *t1, __m128i *t2) |
|
83
|
|
|
|
|
|
|
{ |
|
84
|
|
|
|
|
|
|
__m128i t4; |
|
85
|
|
|
|
|
|
|
|
|
86
|
259
|
|
|
|
|
|
*t2 = _mm_shuffle_epi32(*t2, 0xFF); |
|
87
|
259
|
|
|
|
|
|
t4 = _mm_slli_si128(*t1, 0x4); |
|
88
|
259
|
|
|
|
|
|
*t1 = _mm_xor_si128(*t1, t4); |
|
89
|
259
|
|
|
|
|
|
t4 = _mm_slli_si128(t4, 0x4); |
|
90
|
259
|
|
|
|
|
|
*t1 = _mm_xor_si128(*t1, t4); |
|
91
|
259
|
|
|
|
|
|
t4 = _mm_slli_si128(t4, 0x4); |
|
92
|
259
|
|
|
|
|
|
*t1 = _mm_xor_si128(*t1, t4); |
|
93
|
259
|
|
|
|
|
|
*t1 = _mm_xor_si128(*t1, *t2); |
|
94
|
259
|
|
|
|
|
|
} |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
BR_TARGET("sse2,aes") |
|
97
|
|
|
|
|
|
|
static inline void |
|
98
|
222
|
|
|
|
|
|
expand_step256_2(__m128i *t1, __m128i *t3) |
|
99
|
|
|
|
|
|
|
{ |
|
100
|
|
|
|
|
|
|
__m128i t2, t4; |
|
101
|
|
|
|
|
|
|
|
|
102
|
222
|
|
|
|
|
|
t4 = _mm_aeskeygenassist_si128(*t1, 0x0); |
|
103
|
222
|
|
|
|
|
|
t2 = _mm_shuffle_epi32(t4, 0xAA); |
|
104
|
222
|
|
|
|
|
|
t4 = _mm_slli_si128(*t3, 0x4); |
|
105
|
222
|
|
|
|
|
|
*t3 = _mm_xor_si128(*t3, t4); |
|
106
|
222
|
|
|
|
|
|
t4 = _mm_slli_si128(t4, 0x4); |
|
107
|
222
|
|
|
|
|
|
*t3 = _mm_xor_si128(*t3, t4); |
|
108
|
222
|
|
|
|
|
|
t4 = _mm_slli_si128(t4, 0x4); |
|
109
|
222
|
|
|
|
|
|
*t3 = _mm_xor_si128(*t3, t4); |
|
110
|
222
|
|
|
|
|
|
*t3 = _mm_xor_si128(*t3, t2); |
|
111
|
222
|
|
|
|
|
|
} |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
/* |
|
114
|
|
|
|
|
|
|
* Perform key schedule for AES, encryption direction. Subkeys are written |
|
115
|
|
|
|
|
|
|
* in sk[], and the number of rounds is returned. Key length MUST be 16, |
|
116
|
|
|
|
|
|
|
* 24 or 32 bytes. |
|
117
|
|
|
|
|
|
|
*/ |
|
118
|
|
|
|
|
|
|
BR_TARGET("sse2,aes") |
|
119
|
|
|
|
|
|
|
static unsigned |
|
120
|
48
|
|
|
|
|
|
x86ni_keysched(__m128i *sk, const void *key, size_t len) |
|
121
|
|
|
|
|
|
|
{ |
|
122
|
|
|
|
|
|
|
const unsigned char *kb; |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
#define KEXP128(k, i, rcon) do { \ |
|
125
|
|
|
|
|
|
|
k = expand_step128(k, _mm_aeskeygenassist_si128(k, rcon)); \ |
|
126
|
|
|
|
|
|
|
sk[i] = k; \ |
|
127
|
|
|
|
|
|
|
} while (0) |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
#define KEXP192(i, rcon1, rcon2) do { \ |
|
130
|
|
|
|
|
|
|
sk[(i) + 0] = t1; \ |
|
131
|
|
|
|
|
|
|
sk[(i) + 1] = t3; \ |
|
132
|
|
|
|
|
|
|
t2 = _mm_aeskeygenassist_si128(t3, rcon1); \ |
|
133
|
|
|
|
|
|
|
expand_step192(&t1, &t2, &t3); \ |
|
134
|
|
|
|
|
|
|
sk[(i) + 1] = _mm_castpd_si128(_mm_shuffle_pd( \ |
|
135
|
|
|
|
|
|
|
_mm_castsi128_pd(sk[(i) + 1]), \ |
|
136
|
|
|
|
|
|
|
_mm_castsi128_pd(t1), 0)); \ |
|
137
|
|
|
|
|
|
|
sk[(i) + 2] = _mm_castpd_si128(_mm_shuffle_pd( \ |
|
138
|
|
|
|
|
|
|
_mm_castsi128_pd(t1), \ |
|
139
|
|
|
|
|
|
|
_mm_castsi128_pd(t3), 1)); \ |
|
140
|
|
|
|
|
|
|
t2 = _mm_aeskeygenassist_si128(t3, rcon2); \ |
|
141
|
|
|
|
|
|
|
expand_step192(&t1, &t2, &t3); \ |
|
142
|
|
|
|
|
|
|
} while (0) |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
#define KEXP256(i, rcon) do { \ |
|
145
|
|
|
|
|
|
|
sk[(i) + 0] = t3; \ |
|
146
|
|
|
|
|
|
|
t2 = _mm_aeskeygenassist_si128(t3, rcon); \ |
|
147
|
|
|
|
|
|
|
expand_step256_1(&t1, &t2); \ |
|
148
|
|
|
|
|
|
|
sk[(i) + 1] = t1; \ |
|
149
|
|
|
|
|
|
|
expand_step256_2(&t1, &t3); \ |
|
150
|
|
|
|
|
|
|
} while (0) |
|
151
|
|
|
|
|
|
|
|
|
152
|
48
|
|
|
|
|
|
kb = key; |
|
153
|
48
|
|
|
|
|
|
switch (len) { |
|
154
|
|
|
|
|
|
|
__m128i t1, t2, t3; |
|
155
|
|
|
|
|
|
|
|
|
156
|
9
|
|
|
|
|
|
case 16: |
|
157
|
9
|
|
|
|
|
|
t1 = _mm_loadu_si128((const void *)kb); |
|
158
|
9
|
|
|
|
|
|
sk[0] = t1; |
|
159
|
9
|
|
|
|
|
|
KEXP128(t1, 1, 0x01); |
|
160
|
9
|
|
|
|
|
|
KEXP128(t1, 2, 0x02); |
|
161
|
9
|
|
|
|
|
|
KEXP128(t1, 3, 0x04); |
|
162
|
9
|
|
|
|
|
|
KEXP128(t1, 4, 0x08); |
|
163
|
9
|
|
|
|
|
|
KEXP128(t1, 5, 0x10); |
|
164
|
9
|
|
|
|
|
|
KEXP128(t1, 6, 0x20); |
|
165
|
9
|
|
|
|
|
|
KEXP128(t1, 7, 0x40); |
|
166
|
9
|
|
|
|
|
|
KEXP128(t1, 8, 0x80); |
|
167
|
9
|
|
|
|
|
|
KEXP128(t1, 9, 0x1B); |
|
168
|
9
|
|
|
|
|
|
KEXP128(t1, 10, 0x36); |
|
169
|
9
|
|
|
|
|
|
return 10; |
|
170
|
|
|
|
|
|
|
|
|
171
|
2
|
|
|
|
|
|
case 24: |
|
172
|
2
|
|
|
|
|
|
t1 = _mm_loadu_si128((const void *)kb); |
|
173
|
2
|
|
|
|
|
|
t3 = _mm_loadu_si128((const void *)(kb + 8)); |
|
174
|
2
|
|
|
|
|
|
t3 = _mm_shuffle_epi32(t3, 0x4E); |
|
175
|
14
|
|
|
|
|
|
KEXP192(0, 0x01, 0x02); |
|
176
|
14
|
|
|
|
|
|
KEXP192(3, 0x04, 0x08); |
|
177
|
14
|
|
|
|
|
|
KEXP192(6, 0x10, 0x20); |
|
178
|
14
|
|
|
|
|
|
KEXP192(9, 0x40, 0x80); |
|
179
|
2
|
|
|
|
|
|
sk[12] = t1; |
|
180
|
2
|
|
|
|
|
|
return 12; |
|
181
|
|
|
|
|
|
|
|
|
182
|
37
|
|
|
|
|
|
case 32: |
|
183
|
37
|
|
|
|
|
|
t1 = _mm_loadu_si128((const void *)kb); |
|
184
|
37
|
|
|
|
|
|
t3 = _mm_loadu_si128((const void *)(kb + 16)); |
|
185
|
37
|
|
|
|
|
|
sk[0] = t1; |
|
186
|
37
|
|
|
|
|
|
KEXP256( 1, 0x01); |
|
187
|
37
|
|
|
|
|
|
KEXP256( 3, 0x02); |
|
188
|
37
|
|
|
|
|
|
KEXP256( 5, 0x04); |
|
189
|
37
|
|
|
|
|
|
KEXP256( 7, 0x08); |
|
190
|
37
|
|
|
|
|
|
KEXP256( 9, 0x10); |
|
191
|
37
|
|
|
|
|
|
KEXP256(11, 0x20); |
|
192
|
37
|
|
|
|
|
|
sk[13] = t3; |
|
193
|
37
|
|
|
|
|
|
t2 = _mm_aeskeygenassist_si128(t3, 0x40); |
|
194
|
37
|
|
|
|
|
|
expand_step256_1(&t1, &t2); |
|
195
|
37
|
|
|
|
|
|
sk[14] = t1; |
|
196
|
37
|
|
|
|
|
|
return 14; |
|
197
|
|
|
|
|
|
|
|
|
198
|
0
|
|
|
|
|
|
default: |
|
199
|
0
|
|
|
|
|
|
return 0; |
|
200
|
|
|
|
|
|
|
} |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
#undef KEXP128 |
|
203
|
|
|
|
|
|
|
#undef KEXP192 |
|
204
|
|
|
|
|
|
|
#undef KEXP256 |
|
205
|
|
|
|
|
|
|
} |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
/* see inner.h */ |
|
208
|
|
|
|
|
|
|
BR_TARGET("sse2,aes") |
|
209
|
|
|
|
|
|
|
unsigned |
|
210
|
37
|
|
|
|
|
|
br_aes_x86ni_keysched_enc(unsigned char *skni, const void *key, size_t len) |
|
211
|
|
|
|
|
|
|
{ |
|
212
|
|
|
|
|
|
|
__m128i sk[15]; |
|
213
|
|
|
|
|
|
|
unsigned num_rounds; |
|
214
|
|
|
|
|
|
|
|
|
215
|
37
|
|
|
|
|
|
num_rounds = x86ni_keysched(sk, key, len); |
|
216
|
37
|
|
|
|
|
|
memcpy(skni, sk, (num_rounds + 1) << 4); |
|
217
|
37
|
|
|
|
|
|
return num_rounds; |
|
218
|
|
|
|
|
|
|
} |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
/* see inner.h */ |
|
221
|
|
|
|
|
|
|
BR_TARGET("sse2,aes") |
|
222
|
|
|
|
|
|
|
unsigned |
|
223
|
11
|
|
|
|
|
|
br_aes_x86ni_keysched_dec(unsigned char *skni, const void *key, size_t len) |
|
224
|
|
|
|
|
|
|
{ |
|
225
|
|
|
|
|
|
|
__m128i sk[15]; |
|
226
|
|
|
|
|
|
|
unsigned u, num_rounds; |
|
227
|
|
|
|
|
|
|
|
|
228
|
11
|
|
|
|
|
|
num_rounds = x86ni_keysched(sk, key, len); |
|
229
|
11
|
|
|
|
|
|
_mm_storeu_si128((void *)skni, sk[num_rounds]); |
|
230
|
154
|
100
|
|
|
|
|
for (u = 1; u < num_rounds; u ++) { |
|
231
|
143
|
|
|
|
|
|
_mm_storeu_si128((void *)(skni + (u << 4)), |
|
232
|
143
|
|
|
|
|
|
_mm_aesimc_si128(sk[num_rounds - u])); |
|
233
|
|
|
|
|
|
|
} |
|
234
|
11
|
|
|
|
|
|
_mm_storeu_si128((void *)(skni + (num_rounds << 4)), sk[0]); |
|
235
|
11
|
|
|
|
|
|
return num_rounds; |
|
236
|
|
|
|
|
|
|
} |
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
BR_TARGETS_X86_DOWN |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
#endif |