File Coverage

opt/opt.c
Criterion Covered Total %
statement 67 69 97.1
branch 44 48 91.6
condition n/a
subroutine n/a
pod n/a
total 111 117 94.8


line stmt bran cond sub pod time code
1             /*
2             * Argon2 reference source code package - reference C implementations
3             *
4             * Copyright 2015
5             * Daniel Dinu, Dmitry Khovratovich, Jean-Philippe Aumasson, and Samuel Neves
6             *
7             * You may use this work under the terms of a Creative Commons CC0 1.0
8             * License/Waiver or the Apache Public License 2.0, at your option. The terms of
9             * these licenses can be found at:
10             *
11             * - CC0 1.0 Universal : https://creativecommons.org/publicdomain/zero/1.0
12             * - Apache 2.0 : https://www.apache.org/licenses/LICENSE-2.0
13             *
14             * You should have received a copy of both of these licenses along with this
15             * software. If not, they may be obtained at the above URLs.
16             */
17              
18             #include
19             #include
20             #include
21              
22             #include "argon2.h"
23             #include "core.h"
24              
25             #include "blake2/blake2.h"
26             #include "blake2/blamka-round-opt.h"
27              
28             /*
29             * Function fills a new memory block and optionally XORs the old block over the new one.
30             * Memory must be initialized.
31             * @param state Pointer to the just produced block. Content will be updated(!)
32             * @param ref_block Pointer to the reference block
33             * @param next_block Pointer to the block to be XORed over. May coincide with @ref_block
34             * @param with_xor Whether to XOR into the new block (1) or just overwrite (0)
35             * @pre all block pointers must be valid
36             */
37             #if defined(__AVX512F__)
38             static void fill_block(__m512i *state, const block *ref_block,
39             block *next_block, int with_xor) {
40             __m512i block_XY[ARGON2_512BIT_WORDS_IN_BLOCK];
41             unsigned int i;
42              
43             if (with_xor) {
44             for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
45             state[i] = _mm512_xor_si512(
46             state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i));
47             block_XY[i] = _mm512_xor_si512(
48             state[i], _mm512_loadu_si512((const __m512i *)next_block->v + i));
49             }
50             } else {
51             for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
52             block_XY[i] = state[i] = _mm512_xor_si512(
53             state[i], _mm512_loadu_si512((const __m512i *)ref_block->v + i));
54             }
55             }
56              
57             for (i = 0; i < 2; ++i) {
58             BLAKE2_ROUND_1(
59             state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
60             state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
61             }
62              
63             for (i = 0; i < 2; ++i) {
64             BLAKE2_ROUND_2(
65             state[2 * 0 + i], state[2 * 1 + i], state[2 * 2 + i], state[2 * 3 + i],
66             state[2 * 4 + i], state[2 * 5 + i], state[2 * 6 + i], state[2 * 7 + i]);
67             }
68              
69             for (i = 0; i < ARGON2_512BIT_WORDS_IN_BLOCK; i++) {
70             state[i] = _mm512_xor_si512(state[i], block_XY[i]);
71             _mm512_storeu_si512((__m512i *)next_block->v + i, state[i]);
72             }
73             }
74             #elif defined(__AVX2__)
75             static void fill_block(__m256i *state, const block *ref_block,
76             block *next_block, int with_xor) {
77             __m256i block_XY[ARGON2_HWORDS_IN_BLOCK];
78             unsigned int i;
79              
80             if (with_xor) {
81             for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
82             state[i] = _mm256_xor_si256(
83             state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i));
84             block_XY[i] = _mm256_xor_si256(
85             state[i], _mm256_loadu_si256((const __m256i *)next_block->v + i));
86             }
87             } else {
88             for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
89             block_XY[i] = state[i] = _mm256_xor_si256(
90             state[i], _mm256_loadu_si256((const __m256i *)ref_block->v + i));
91             }
92             }
93              
94             for (i = 0; i < 4; ++i) {
95             BLAKE2_ROUND_1(state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5],
96             state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]);
97             }
98              
99             for (i = 0; i < 4; ++i) {
100             BLAKE2_ROUND_2(state[ 0 + i], state[ 4 + i], state[ 8 + i], state[12 + i],
101             state[16 + i], state[20 + i], state[24 + i], state[28 + i]);
102             }
103              
104             for (i = 0; i < ARGON2_HWORDS_IN_BLOCK; i++) {
105             state[i] = _mm256_xor_si256(state[i], block_XY[i]);
106             _mm256_storeu_si256((__m256i *)next_block->v + i, state[i]);
107             }
108             }
109             #else
110 10318716           static void fill_block(__m128i *state, const block *ref_block,
111             block *next_block, int with_xor) {
112             __m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
113             unsigned int i;
114              
115 10318716 100         if (with_xor) {
116 336625168 100         for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
117 1325785262           state[i] = _mm_xor_si128(
118 662892631           state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i));
119 994338894           block_XY[i] = _mm_xor_si128(
120 331446308           state[i], _mm_loadu_si128((const __m128i *)next_block->v + i));
121             }
122             } else {
123 339248984 100         for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
124 1315721192           block_XY[i] = state[i] = _mm_xor_si128(
125 657860596           state[i], _mm_loadu_si128((const __m128i *)ref_block->v + i));
126             }
127             }
128              
129 92865326 100         for (i = 0; i < 8; ++i) {
130 5200439440           BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
131             state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
132             state[8 * i + 6], state[8 * i + 7]);
133             }
134              
135 92865222 100         for (i = 0; i < 8; ++i) {
136 5200440248           BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
137             state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
138             state[8 * 6 + i], state[8 * 7 + i]);
139             }
140              
141 670693958 100         for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
142 1320751090           state[i] = _mm_xor_si128(state[i], block_XY[i]);
143 660375545           _mm_storeu_si128((__m128i *)next_block->v + i, state[i]);
144             }
145 10318413           }
146             #endif
147              
148 78664           static void next_addresses(block *address_block, block *input_block) {
149             /*Temporary zero-initialized blocks*/
150             #if defined(__AVX512F__)
151             __m512i zero_block[ARGON2_512BIT_WORDS_IN_BLOCK];
152             __m512i zero2_block[ARGON2_512BIT_WORDS_IN_BLOCK];
153             #elif defined(__AVX2__)
154             __m256i zero_block[ARGON2_HWORDS_IN_BLOCK];
155             __m256i zero2_block[ARGON2_HWORDS_IN_BLOCK];
156             #else
157             __m128i zero_block[ARGON2_OWORDS_IN_BLOCK];
158             __m128i zero2_block[ARGON2_OWORDS_IN_BLOCK];
159             #endif
160              
161             memset(zero_block, 0, sizeof(zero_block));
162             memset(zero2_block, 0, sizeof(zero2_block));
163              
164             /*Increasing index counter*/
165 78664           input_block->v[6]++;
166              
167             /*First iteration of G*/
168 78664           fill_block(zero_block, input_block, address_block, 0);
169              
170             /*Second iteration of G*/
171 78664           fill_block(zero2_block, address_block, address_block, 0);
172 78664           }
173              
174 260           void fill_segment(const argon2_instance_t *instance,
175             argon2_position_t position) {
176             block *ref_block = NULL, *curr_block = NULL;
177             block address_block, input_block;
178             uint64_t pseudo_rand, ref_index, ref_lane;
179             uint32_t prev_offset, curr_offset;
180             uint32_t starting_index, i;
181             #if defined(__AVX512F__)
182             __m512i state[ARGON2_512BIT_WORDS_IN_BLOCK];
183             #elif defined(__AVX2__)
184             __m256i state[ARGON2_HWORDS_IN_BLOCK];
185             #else
186             __m128i state[ARGON2_OWORDS_IN_BLOCK];
187             #endif
188             int data_independent_addressing;
189              
190 260 50         if (instance == NULL) {
191 0           return;
192             }
193              
194             data_independent_addressing =
195 260 100         (instance->type == Argon2_i) ||
    50          
196 8 100         (instance->type == Argon2_id && (position.pass == 0) &&
    100          
197 4           (position.slice < ARGON2_SYNC_POINTS / 2));
198              
199 260 100         if (data_independent_addressing) {
200 254           init_block_value(&input_block, 0);
201              
202 254           input_block.v[0] = position.pass;
203 254           input_block.v[1] = position.lane;
204 254           input_block.v[2] = position.slice;
205 254           input_block.v[3] = instance->memory_blocks;
206 254           input_block.v[4] = instance->passes;
207 254           input_block.v[5] = instance->type;
208             }
209              
210             starting_index = 0;
211              
212 260 100         if ((0 == position.pass) && (0 == position.slice)) {
    100          
213             starting_index = 2; /* we have already generated the first two blocks */
214              
215             /* Don't forget to generate the first block of addresses: */
216 31 50         if (data_independent_addressing) {
217 31           next_addresses(&address_block, &input_block);
218             }
219             }
220              
221             /* Offset of the current block */
222 520           curr_offset = position.lane * instance->lane_length +
223 260           position.slice * instance->segment_length + starting_index;
224              
225 260 100         if (0 == curr_offset % instance->lane_length) {
226             /* Last block in this lane */
227 34           prev_offset = curr_offset + instance->lane_length - 1;
228             } else {
229             /* Previous block */
230 226           prev_offset = curr_offset - 1;
231             }
232              
233 260           memcpy(state, ((instance->memory + prev_offset)->v), ARGON2_BLOCK_SIZE);
234              
235 10161346 100         for (i = starting_index; i < instance->segment_length;
236 10161086           ++i, ++curr_offset, ++prev_offset) {
237             /*1.1 Rotating prev_offset if needed */
238 10161086 100         if (curr_offset % instance->lane_length == 1) {
239 34           prev_offset = curr_offset - 1;
240             }
241              
242             /* 1.2 Computing the index of the reference block */
243             /* 1.2.1 Taking pseudo-random value from the previous block */
244 10161086 100         if (data_independent_addressing) {
245 10062782 100         if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
246 78633           next_addresses(&address_block, &input_block);
247             }
248 10062781           pseudo_rand = address_block.v[i % ARGON2_ADDRESSES_IN_BLOCK];
249             } else {
250 98304           pseudo_rand = instance->memory[prev_offset].v[0];
251             }
252              
253             /* 1.2.2 Computing the lane of the reference block */
254 10161085           ref_lane = ((pseudo_rand >> 32)) % instance->lanes;
255              
256 10161085 100         if ((position.pass == 0) && (position.slice == 0)) {
    100          
257             /* Can not reference other lanes yet */
258 1245506           ref_lane = position.lane;
259             }
260              
261             /* 1.2.3 Computing the number of possible reference block within the
262             * lane.
263             */
264 10161085           position.index = i;
265 10161085           ref_index = index_alpha(instance, &position, pseudo_rand & 0xFFFFFFFF,
266 10161085           ref_lane == position.lane);
267              
268             /* 2 Creating a new block */
269 10161085           ref_block =
270 10161085           instance->memory + instance->lane_length * ref_lane + ref_index;
271 10161085           curr_block = instance->memory + curr_offset;
272 10161085 50         if (ARGON2_VERSION_10 == instance->version) {
273             /* version 1.2.1 and earlier: overwrite, not XOR */
274 0           fill_block(state, ref_block, curr_block, 0);
275             } else {
276 10161085 100         if(0 == position.pass) {
277 4982210           fill_block(state, ref_block, curr_block, 0);
278             } else {
279 5178875           fill_block(state, ref_block, curr_block, 1);
280             }
281             }
282             }
283             }