line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
/* |
2
|
|
|
|
|
|
|
* Argon2 reference source code package - reference C implementations |
3
|
|
|
|
|
|
|
* |
4
|
|
|
|
|
|
|
* Copyright 2015 |
5
|
|
|
|
|
|
|
* Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves |
6
|
|
|
|
|
|
|
* |
7
|
|
|
|
|
|
|
* You may use this work under the terms of a Creative Commons CC0 1.0 |
8
|
|
|
|
|
|
|
* License/Waiver or the Apache Public License 2.0, at your option. The terms of |
9
|
|
|
|
|
|
|
* these licenses can be found at: |
10
|
|
|
|
|
|
|
* |
11
|
|
|
|
|
|
|
* - CC0 1.0 Universal : https://creativecommons.org/publicdomain/zero/1.0 |
12
|
|
|
|
|
|
|
* - Apache 2.0 : https://www.apache.org/licenses/LICENSE-2.0 |
13
|
|
|
|
|
|
|
* |
14
|
|
|
|
|
|
|
* You should have received a copy of both of these licenses along with this |
15
|
|
|
|
|
|
|
* software. If not, they may be obtained at the above URLs. |
16
|
|
|
|
|
|
|
*/ |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
#include |
19
|
|
|
|
|
|
|
#include |
20
|
|
|
|
|
|
|
#include |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
#include "argon2.h" |
23
|
|
|
|
|
|
|
#include "core.h" |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
#include "blake2/blake2.h" |
26
|
|
|
|
|
|
|
#include "blake2/blamka-round-opt.h" |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
/* |
29
|
|
|
|
|
|
|
* Function fills a new memory block and optionally XORs the old block over the new one. |
30
|
|
|
|
|
|
|
* Memory must be initialized. |
31
|
|
|
|
|
|
|
* @param state Pointer to the just produced block. Content will be updated(!) |
32
|
|
|
|
|
|
|
* @param ref_block Pointer to the reference block |
33
|
|
|
|
|
|
|
* @param next_block Pointer to the block to be XORed over. May coincide with @ref_block |
34
|
|
|
|
|
|
|
* @param with_xor Whether to XOR into the new block (1) or just overwrite (0) |
35
|
|
|
|
|
|
|
* @pre all block pointers must be valid |
36
|
|
|
|
|
|
|
*/ |
37
|
|
|
|
|
|
|
#if defined(__AVX512F__) |
38
|
|
|
|
|
|
|
static void fill_block(__m512i *state, const block *ref_block, |
39
|
|
|
|
|
|
|
block *next_block, int with_xor) { |
40
|
|
|
|
|
|
|
__m512i block_XY[ARGON2_512BIT_WORDS_IN_BLOCK]; |
41
|
|
|
|
|
|
|
unsigned int i; |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
if (with_xor) { |
44
|
|
|
|
|
|
|
for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) { |
45
|
|
|
|
|
|
|
state[i] = _mm512_xor_si512( |
46
|
|
|
|
|
|
|
state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i)); |
47
|
|
|
|
|
|
|
block_XY[i] = _mm512_xor_si512( |
48
|
|
|
|
|
|
|
state[i], _mm512_loadu_si512((const __m512i *)next_block->v + i)); |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
} else { |
51
|
|
|
|
|
|
|
for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) { |
52
|
|
|
|
|
|
|
block_XY[i] = state[i] = _mm512_xor_si512( |
53
|
|
|
|
|
|
|
state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i)); |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
} |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
for (i = 0; i < 2; ++i) { |
58
|
|
|
|
|
|
|
BLAKE2_ROUND_1( |
59
|
|
|
|
|
|
|
state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], |
60
|
|
|
|
|
|
|
state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
for (i = 0; i < 2; ++i) { |
64
|
|
|
|
|
|
|
BLAKE2_ROUND_2( |
65
|
|
|
|
|
|
|
state[2 * 0 + i], state[2 * 1 + i], state[2 * 2 + i], state[2 * 3 + i], |
66
|
|
|
|
|
|
|
state[2 * 4 + i], state[2 * 5 + i], state[2 * 6 + i], state[2 * 7 + i]); |
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) { |
70
|
|
|
|
|
|
|
state[i] = _mm512_xor_si512(state[i], block_XY[i]); |
71
|
|
|
|
|
|
|
_mm512_storeu_si512((__m512i *)next_block->v + i, state[i]); |
72
|
|
|
|
|
|
|
} |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
#elif defined(__AVX2__) |
75
|
|
|
|
|
|
|
static void fill_block(__m256i *state, const block *ref_block, |
76
|
|
|
|
|
|
|
block *next_block, int with_xor) { |
77
|
|
|
|
|
|
|
__m256i block_XY[ARGON2_HWORDS_IN_BLOCK]; |
78
|
|
|
|
|
|
|
unsigned int i; |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
if (with_xor) { |
81
|
|
|
|
|
|
|
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { |
82
|
|
|
|
|
|
|
state[i] = _mm256_xor_si256( |
83
|
|
|
|
|
|
|
state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i)); |
84
|
|
|
|
|
|
|
block_XY[i] = _mm256_xor_si256( |
85
|
|
|
|
|
|
|
state[i], _mm256_loadu_si256((const __m256i *)next_block->v + i)); |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
} else { |
88
|
|
|
|
|
|
|
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { |
89
|
|
|
|
|
|
|
block_XY[i] = state[i] = _mm256_xor_si256( |
90
|
|
|
|
|
|
|
state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i)); |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
for (i = 0; i < 4; ++i) { |
95
|
|
|
|
|
|
|
BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5], |
96
|
|
|
|
|
|
|
state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]); |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
for (i = 0; i < 4; ++i) { |
100
|
|
|
|
|
|
|
BLAKE2_ROUND_2(state[ 0 + i], state[ 4 + i], state[ 8 + i], state[12 + i], |
101
|
|
|
|
|
|
|
state[16 + i], state[20 + i], state[24 + i], state[28 + i]); |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) { |
105
|
|
|
|
|
|
|
state[i] = _mm256_xor_si256(state[i], block_XY[i]); |
106
|
|
|
|
|
|
|
_mm256_storeu_si256((__m256i *)next_block->v + i, state[i]); |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
#else |
110
|
10318746
|
|
|
|
|
|
static void fill_block(__m128i *state, const block *ref_block, |
111
|
|
|
|
|
|
|
block *next_block, int with_xor) { |
112
|
|
|
|
|
|
|
__m128i block_XY[ARGON2_OWORDS_IN_BLOCK]; |
113
|
|
|
|
|
|
|
unsigned int i; |
114
|
|
|
|
|
|
|
|
115
|
10318746
|
100
|
|
|
|
|
if (with_xor) { |
116
|
336626115
|
100
|
|
|
|
|
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { |
117
|
1325788980
|
|
|
|
|
|
state[i] = _mm_xor_si128( |
118
|
662894490
|
|
|
|
|
|
state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i)); |
119
|
994341714
|
|
|
|
|
|
block_XY[i] = _mm_xor_si128( |
120
|
331447242
|
|
|
|
|
|
state[i], _mm_loadu_si128((const __m128i *)next_block->v + i)); |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
} else { |
123
|
339247429
|
100
|
|
|
|
|
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { |
124
|
1315714832
|
|
|
|
|
|
block_XY[i] = state[i] = _mm_xor_si128( |
125
|
657857416
|
|
|
|
|
|
state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i)); |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
|
129
|
92864956
|
100
|
|
|
|
|
for (i = 0; i < 8; ++i) { |
130
|
5200410816
|
|
|
|
|
|
BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], |
131
|
|
|
|
|
|
|
state[8 * i + 3], state[8 * i + 4], state[8 * i + 5], |
132
|
|
|
|
|
|
|
state[8 * i + 6], state[8 * i + 7]); |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
|
135
|
92864772
|
100
|
|
|
|
|
for (i = 0; i < 8; ++i) { |
136
|
5200407784
|
|
|
|
|
|
BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], |
137
|
|
|
|
|
|
|
state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i], |
138
|
|
|
|
|
|
|
state[8 * 6 + i], state[8 * 7 + i]); |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
|
141
|
670690265
|
100
|
|
|
|
|
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { |
142
|
1320743706
|
|
|
|
|
|
state[i] = _mm_xor_si128(state[i], block_XY[i]); |
143
|
660371853
|
|
|
|
|
|
_mm_storeu_si128((__m128i *)next_block->v + i, state[i]); |
144
|
|
|
|
|
|
|
} |
145
|
10318412
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
#endif |
147
|
|
|
|
|
|
|
|
148
|
78664
|
|
|
|
|
|
static void next_addresses(block *address_block, block *input_block) { |
149
|
|
|
|
|
|
|
/*Temporary zero-initialized blocks*/ |
150
|
|
|
|
|
|
|
#if defined(__AVX512F__) |
151
|
|
|
|
|
|
|
__m512i zero_block[ARGON2_512BIT_WORDS_IN_BLOCK]; |
152
|
|
|
|
|
|
|
__m512i zero2_block[ARGON2_512BIT_WORDS_IN_BLOCK]; |
153
|
|
|
|
|
|
|
#elif defined(__AVX2__) |
154
|
|
|
|
|
|
|
__m256i zero_block[ARGON2_HWORDS_IN_BLOCK]; |
155
|
|
|
|
|
|
|
__m256i zero2_block[ARGON2_HWORDS_IN_BLOCK]; |
156
|
|
|
|
|
|
|
#else |
157
|
|
|
|
|
|
|
__m128i zero_block[ARGON2_OWORDS_IN_BLOCK]; |
158
|
|
|
|
|
|
|
__m128i zero2_block[ARGON2_OWORDS_IN_BLOCK]; |
159
|
|
|
|
|
|
|
#endif |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
memset(zero_block, 0, sizeof(zero_block)); |
162
|
|
|
|
|
|
|
memset(zero2_block, 0, sizeof(zero2_block)); |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
/*Increasing index counter*/ |
165
|
78664
|
|
|
|
|
|
input_block->v[6]++; |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
/*First iteration of G*/ |
168
|
78664
|
|
|
|
|
|
fill_block(zero_block, input_block, address_block, 0); |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
/*Second iteration of G*/ |
171
|
78664
|
|
|
|
|
|
fill_block(zero2_block, address_block, address_block, 0); |
172
|
78664
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
|
174
|
260
|
|
|
|
|
|
void fill_segment(const argon2_instance_t *instance, |
175
|
|
|
|
|
|
|
argon2_position_t position) { |
176
|
|
|
|
|
|
|
block *ref_block = NULL, *curr_block = NULL; |
177
|
|
|
|
|
|
|
block address_block, input_block; |
178
|
|
|
|
|
|
|
uint64_t pseudo_rand, ref_index, ref_lane; |
179
|
|
|
|
|
|
|
uint32_t prev_offset, curr_offset; |
180
|
|
|
|
|
|
|
uint32_t starting_index, i; |
181
|
|
|
|
|
|
|
#if defined(__AVX512F__) |
182
|
|
|
|
|
|
|
__m512i state[ARGON2_512BIT_WORDS_IN_BLOCK]; |
183
|
|
|
|
|
|
|
#elif defined(__AVX2__) |
184
|
|
|
|
|
|
|
__m256i state[ARGON2_HWORDS_IN_BLOCK]; |
185
|
|
|
|
|
|
|
#else |
186
|
|
|
|
|
|
|
__m128i state[ARGON2_OWORDS_IN_BLOCK]; |
187
|
|
|
|
|
|
|
#endif |
188
|
|
|
|
|
|
|
int data_independent_addressing; |
189
|
|
|
|
|
|
|
|
190
|
260
|
50
|
|
|
|
|
if (instance == NULL) { |
191
|
0
|
|
|
|
|
|
return; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
data_independent_addressing = |
195
|
260
|
100
|
|
|
|
|
(instance->type == Argon2_i) || |
|
|
50
|
|
|
|
|
|
196
|
8
|
100
|
|
|
|
|
(instance->type == Argon2_id && (position.pass == 0) && |
|
|
100
|
|
|
|
|
|
197
|
4
|
|
|
|
|
|
(position.slice < ARGON2_SYNC_POINTS / 2)); |
198
|
|
|
|
|
|
|
|
199
|
260
|
100
|
|
|
|
|
if (data_independent_addressing) { |
200
|
254
|
|
|
|
|
|
init_block_value(&input_block, 0); |
201
|
|
|
|
|
|
|
|
202
|
254
|
|
|
|
|
|
input_block.v[0] = position.pass; |
203
|
254
|
|
|
|
|
|
input_block.v[1] = position.lane; |
204
|
254
|
|
|
|
|
|
input_block.v[2] = position.slice; |
205
|
254
|
|
|
|
|
|
input_block.v[3] = instance->memory_blocks; |
206
|
254
|
|
|
|
|
|
input_block.v[4] = instance->passes; |
207
|
254
|
|
|
|
|
|
input_block.v[5] = instance->type; |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
starting_index = 0; |
211
|
|
|
|
|
|
|
|
212
|
260
|
100
|
|
|
|
|
if ((0 == position.pass) && (0 == position.slice)) { |
|
|
100
|
|
|
|
|
|
213
|
|
|
|
|
|
|
starting_index = 2; /* we have already generated the first two blocks */ |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
/* Don't forget to generate the first block of addresses: */ |
216
|
31
|
50
|
|
|
|
|
if (data_independent_addressing) { |
217
|
31
|
|
|
|
|
|
next_addresses(&address_block, &input_block); |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
/* Offset of the current block */ |
222
|
516
|
|
|
|
|
|
curr_offset = position.lane * instance->lane_length + |
223
|
258
|
|
|
|
|
|
position.slice * instance->segment_length + starting_index; |
224
|
|
|
|
|
|
|
|
225
|
258
|
100
|
|
|
|
|
if (0 == curr_offset % instance->lane_length) { |
226
|
|
|
|
|
|
|
/* Last block in this lane */ |
227
|
32
|
|
|
|
|
|
prev_offset = curr_offset + instance->lane_length - 1; |
228
|
|
|
|
|
|
|
} else { |
229
|
|
|
|
|
|
|
/* Previous block */ |
230
|
226
|
|
|
|
|
|
prev_offset = curr_offset - 1; |
231
|
|
|
|
|
|
|
} |
232
|
|
|
|
|
|
|
|
233
|
258
|
|
|
|
|
|
memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE); |
234
|
|
|
|
|
|
|
|
235
|
10161343
|
100
|
|
|
|
|
for (i = starting_index; i < instance->segment_length; |
236
|
10161085
|
|
|
|
|
|
++i, ++curr_offset, ++prev_offset) { |
237
|
|
|
|
|
|
|
/*1.1 Rotating prev_offset if needed */ |
238
|
10161083
|
100
|
|
|
|
|
if (curr_offset % instance->lane_length == 1) { |
239
|
34
|
|
|
|
|
|
prev_offset = curr_offset - 1; |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
/* 1.2 Computing the index of the reference block */ |
243
|
|
|
|
|
|
|
/* 1.2.1 Taking pseudo-random value from the previous block */ |
244
|
10161083
|
100
|
|
|
|
|
if (data_independent_addressing) { |
245
|
10062779
|
100
|
|
|
|
|
if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { |
246
|
78633
|
|
|
|
|
|
next_addresses(&address_block, &input_block); |
247
|
|
|
|
|
|
|
} |
248
|
10062781
|
|
|
|
|
|
pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK]; |
249
|
|
|
|
|
|
|
} else { |
250
|
98304
|
|
|
|
|
|
pseudo_rand = instance->memory[prev_offset].v[0]; |
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
/* 1.2.2 Computing the lane of the reference block */ |
254
|
10161085
|
|
|
|
|
|
ref_lane = ((pseudo_rand >> 32)) % instance->lanes; |
255
|
|
|
|
|
|
|
|
256
|
10161085
|
100
|
|
|
|
|
if ((position.pass == 0) && (position.slice == 0)) { |
|
|
100
|
|
|
|
|
|
257
|
|
|
|
|
|
|
/* Can not reference other lanes yet */ |
258
|
1245506
|
|
|
|
|
|
ref_lane = position.lane; |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
/* 1.2.3 Computing the number of possible reference block within the |
262
|
|
|
|
|
|
|
* lane. |
263
|
|
|
|
|
|
|
*/ |
264
|
10161085
|
|
|
|
|
|
position.index = i; |
265
|
10161085
|
|
|
|
|
|
ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF, |
266
|
10161085
|
|
|
|
|
|
ref_lane == position.lane); |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
/* 2 Creating a new block */ |
269
|
10161085
|
|
|
|
|
|
ref_block = |
270
|
10161085
|
|
|
|
|
|
instance->memory + instance->lane_length * ref_lane + ref_index; |
271
|
10161085
|
|
|
|
|
|
curr_block = instance->memory + curr_offset; |
272
|
10161085
|
50
|
|
|
|
|
if (ARGON2_VERSION_10 == instance->version) { |
273
|
|
|
|
|
|
|
/* version 1.2.1 and earlier: overwrite, not XOR */ |
274
|
0
|
|
|
|
|
|
fill_block(state, ref_block, curr_block, 0); |
275
|
|
|
|
|
|
|
} else { |
276
|
10161085
|
100
|
|
|
|
|
if(0 == position.pass) { |
277
|
4982206
|
|
|
|
|
|
fill_block(state, ref_block, curr_block, 0); |
278
|
|
|
|
|
|
|
} else { |
279
|
5178879
|
|
|
|
|
|
fill_block(state, ref_block, curr_block, 1); |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
} |