line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
/** |
2
|
|
|
|
|
|
|
* @file pstm_sqr_comba.c |
3
|
|
|
|
|
|
|
* @version 950bba4 (HEAD -> master) |
4
|
|
|
|
|
|
|
* |
5
|
|
|
|
|
|
|
* Multiprecision Squaring with Comba technique. |
6
|
|
|
|
|
|
|
*/ |
7
|
|
|
|
|
|
|
/* |
8
|
|
|
|
|
|
|
* Copyright (c) 2013-2017 INSIDE Secure Corporation |
9
|
|
|
|
|
|
|
* Copyright (c) PeerSec Networks, 2002-2011 |
10
|
|
|
|
|
|
|
* All Rights Reserved |
11
|
|
|
|
|
|
|
* |
12
|
|
|
|
|
|
|
* The latest version of this code is available at http://www.matrixssl.org |
13
|
|
|
|
|
|
|
* |
14
|
|
|
|
|
|
|
* This software is open source; you can redistribute it and/or modify |
15
|
|
|
|
|
|
|
* it under the terms of the GNU General Public License as published by |
16
|
|
|
|
|
|
|
* the Free Software Foundation; either version 2 of the License, or |
17
|
|
|
|
|
|
|
* (at your option) any later version. |
18
|
|
|
|
|
|
|
* |
19
|
|
|
|
|
|
|
* This General Public License does NOT permit incorporating this software |
20
|
|
|
|
|
|
|
* into proprietary programs. If you are unable to comply with the GPL, a |
21
|
|
|
|
|
|
|
* commercial license for this software may be purchased from INSIDE at |
22
|
|
|
|
|
|
|
* http://www.insidesecure.com/ |
23
|
|
|
|
|
|
|
* |
24
|
|
|
|
|
|
|
* This program is distributed in WITHOUT ANY WARRANTY; without even the |
25
|
|
|
|
|
|
|
* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
26
|
|
|
|
|
|
|
* See the GNU General Public License for more details. |
27
|
|
|
|
|
|
|
* |
28
|
|
|
|
|
|
|
* You should have received a copy of the GNU General Public License |
29
|
|
|
|
|
|
|
* along with this program; if not, write to the Free Software |
30
|
|
|
|
|
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
31
|
|
|
|
|
|
|
* http://www.gnu.org/copyleft/gpl.html |
32
|
|
|
|
|
|
|
*/ |
33
|
|
|
|
|
|
|
/******************************************************************************/ |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
#include "../cryptoImpl.h" |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
#if defined(USE_MATRIX_RSA) || defined(USE_MATRIX_ECC) || defined(USE_MATRIX_DH) |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
/******************************************************************************/ |
40
|
|
|
|
|
|
|
# if defined(PSTM_X86) |
41
|
|
|
|
|
|
|
/* x86-32 optimized for 32 bit platforms. For 64 bit mode use X86_64 instead */ |
42
|
|
|
|
|
|
|
# if !defined(__GNUC__) || !defined(__i386__) |
43
|
|
|
|
|
|
|
# error "PSTM_X86 option requires GCC and 32 bit mode x86 processor" |
44
|
|
|
|
|
|
|
# endif |
45
|
|
|
|
|
|
|
/* #pragma message ("Using 32 bit x86 Assembly Optimizations") */ |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
# define COMBA_START |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# define CLEAR_CARRY \ |
50
|
|
|
|
|
|
|
c0 = c1 = c2 = 0; |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# define COMBA_STORE(x) \ |
53
|
|
|
|
|
|
|
x = c0; |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# define COMBA_STORE2(x) \ |
56
|
|
|
|
|
|
|
x = c1; |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# define CARRY_FORWARD \ |
59
|
|
|
|
|
|
|
do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# define COMBA_FINI |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# define SQRADD(i, j) \ |
64
|
|
|
|
|
|
|
asm ( \ |
65
|
|
|
|
|
|
|
"movl %6,%%eax \n\t" \ |
66
|
|
|
|
|
|
|
"mull %%eax \n\t" \ |
67
|
|
|
|
|
|
|
"addl %%eax,%0 \n\t" \ |
68
|
|
|
|
|
|
|
"adcl %%edx,%1 \n\t" \ |
69
|
|
|
|
|
|
|
"adcl $0,%2 \n\t" \ |
70
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "0" (c0), "1" (c1), "2" (c2), "m" (i) : "%eax", "%edx", "cc"); |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# define SQRADD2(i, j) \ |
73
|
|
|
|
|
|
|
asm ( \ |
74
|
|
|
|
|
|
|
"movl %6,%%eax \n\t" \ |
75
|
|
|
|
|
|
|
"mull %7 \n\t" \ |
76
|
|
|
|
|
|
|
"addl %%eax,%0 \n\t" \ |
77
|
|
|
|
|
|
|
"adcl %%edx,%1 \n\t" \ |
78
|
|
|
|
|
|
|
"adcl $0,%2 \n\t" \ |
79
|
|
|
|
|
|
|
"addl %%eax,%0 \n\t" \ |
80
|
|
|
|
|
|
|
"adcl %%edx,%1 \n\t" \ |
81
|
|
|
|
|
|
|
"adcl $0,%2 \n\t" \ |
82
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "0" (c0), "1" (c1), "2" (c2), "m" (i), "m" (j) : "%eax", "%edx", "cc"); |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
# define SQRADDSC(i, j) \ |
85
|
|
|
|
|
|
|
asm ( \ |
86
|
|
|
|
|
|
|
"movl %6,%%eax \n\t" \ |
87
|
|
|
|
|
|
|
"mull %7 \n\t" \ |
88
|
|
|
|
|
|
|
"movl %%eax,%0 \n\t" \ |
89
|
|
|
|
|
|
|
"movl %%edx,%1 \n\t" \ |
90
|
|
|
|
|
|
|
"xorl %2,%2 \n\t" \ |
91
|
|
|
|
|
|
|
: "=r" (sc0), "=r" (sc1), "=r" (sc2) : "0" (sc0), "1" (sc1), "2" (sc2), "g" (i), "g" (j) : "%eax", "%edx", "cc"); |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
# define SQRADDAC(i, j) \ |
94
|
|
|
|
|
|
|
asm ( \ |
95
|
|
|
|
|
|
|
"movl %6,%%eax \n\t" \ |
96
|
|
|
|
|
|
|
"mull %7 \n\t" \ |
97
|
|
|
|
|
|
|
"addl %%eax,%0 \n\t" \ |
98
|
|
|
|
|
|
|
"adcl %%edx,%1 \n\t" \ |
99
|
|
|
|
|
|
|
"adcl $0,%2 \n\t" \ |
100
|
|
|
|
|
|
|
: "=r" (sc0), "=r" (sc1), "=r" (sc2) : "0" (sc0), "1" (sc1), "2" (sc2), "g" (i), "g" (j) : "%eax", "%edx", "cc"); |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
# define SQRADDDB \ |
103
|
|
|
|
|
|
|
asm ( \ |
104
|
|
|
|
|
|
|
"addl %6,%0 \n\t" \ |
105
|
|
|
|
|
|
|
"adcl %7,%1 \n\t" \ |
106
|
|
|
|
|
|
|
"adcl %8,%2 \n\t" \ |
107
|
|
|
|
|
|
|
"addl %6,%0 \n\t" \ |
108
|
|
|
|
|
|
|
"adcl %7,%1 \n\t" \ |
109
|
|
|
|
|
|
|
"adcl %8,%2 \n\t" \ |
110
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "0" (c0), "1" (c1), "2" (c2), "r" (sc0), "r" (sc1), "r" (sc2) : "cc"); |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
/******************************************************************************/ |
113
|
|
|
|
|
|
|
# elif defined(PSTM_X86_64) |
114
|
|
|
|
|
|
|
/* x86-64 optimized */ |
115
|
|
|
|
|
|
|
# if !defined(__GNUC__) || !defined(__x86_64__) || !defined(PSTM_64BIT) |
116
|
|
|
|
|
|
|
# error "PSTM_X86_64 option requires PSTM_64BIT, GCC and 64 bit mode x86 processor" |
117
|
|
|
|
|
|
|
# endif |
118
|
|
|
|
|
|
|
/* #pragma message ("Using 64 bit x86_64 Assembly Optimizations") */ |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# define COMBA_START |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# define CLEAR_CARRY \ |
123
|
|
|
|
|
|
|
c0 = c1 = c2 = 0; |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# define COMBA_STORE(x) \ |
126
|
|
|
|
|
|
|
x = c0; |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
# define COMBA_STORE2(x) \ |
129
|
|
|
|
|
|
|
x = c1; |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# define CARRY_FORWARD \ |
132
|
|
|
|
|
|
|
do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# define COMBA_FINI |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
# define SQRADD(i, j) \ |
137
|
|
|
|
|
|
|
asm ( \ |
138
|
|
|
|
|
|
|
"movq %6,%%rax \n\t" \ |
139
|
|
|
|
|
|
|
"mulq %%rax \n\t" \ |
140
|
|
|
|
|
|
|
"addq %%rax,%0 \n\t" \ |
141
|
|
|
|
|
|
|
"adcq %%rdx,%1 \n\t" \ |
142
|
|
|
|
|
|
|
"adcq $0,%2 \n\t" \ |
143
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "0" (c0), "1" (c1), "2" (c2), "g" (i) : "%rax", "%rdx", "cc"); |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# define SQRADD2(i, j) \ |
146
|
|
|
|
|
|
|
asm ( \ |
147
|
|
|
|
|
|
|
"movq %6,%%rax \n\t" \ |
148
|
|
|
|
|
|
|
"mulq %7 \n\t" \ |
149
|
|
|
|
|
|
|
"addq %%rax,%0 \n\t" \ |
150
|
|
|
|
|
|
|
"adcq %%rdx,%1 \n\t" \ |
151
|
|
|
|
|
|
|
"adcq $0,%2 \n\t" \ |
152
|
|
|
|
|
|
|
"addq %%rax,%0 \n\t" \ |
153
|
|
|
|
|
|
|
"adcq %%rdx,%1 \n\t" \ |
154
|
|
|
|
|
|
|
"adcq $0,%2 \n\t" \ |
155
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "0" (c0), "1" (c1), "2" (c2), "g" (i), "g" (j) : "%rax", "%rdx", "cc"); |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
# define SQRADDSC(i, j) \ |
158
|
|
|
|
|
|
|
asm ( \ |
159
|
|
|
|
|
|
|
"movq %6,%%rax \n\t" \ |
160
|
|
|
|
|
|
|
"mulq %7 \n\t" \ |
161
|
|
|
|
|
|
|
"movq %%rax,%0 \n\t" \ |
162
|
|
|
|
|
|
|
"movq %%rdx,%1 \n\t" \ |
163
|
|
|
|
|
|
|
"xorq %2,%2 \n\t" \ |
164
|
|
|
|
|
|
|
: "=r" (sc0), "=r" (sc1), "=r" (sc2) : "0" (sc0), "1" (sc1), "2" (sc2), "g" (i), "g" (j) : "%rax", "%rdx", "cc"); |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
# define SQRADDAC(i, j) \ |
167
|
|
|
|
|
|
|
asm ( \ |
168
|
|
|
|
|
|
|
"movq %6,%%rax \n\t" \ |
169
|
|
|
|
|
|
|
"mulq %7 \n\t" \ |
170
|
|
|
|
|
|
|
"addq %%rax,%0 \n\t" \ |
171
|
|
|
|
|
|
|
"adcq %%rdx,%1 \n\t" \ |
172
|
|
|
|
|
|
|
"adcq $0,%2 \n\t" \ |
173
|
|
|
|
|
|
|
: "=r" (sc0), "=r" (sc1), "=r" (sc2) : "0" (sc0), "1" (sc1), "2" (sc2), "g" (i), "g" (j) : "%rax", "%rdx", "cc"); |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# define SQRADDDB \ |
176
|
|
|
|
|
|
|
asm ( \ |
177
|
|
|
|
|
|
|
"addq %6,%0 \n\t" \ |
178
|
|
|
|
|
|
|
"adcq %7,%1 \n\t" \ |
179
|
|
|
|
|
|
|
"adcq %8,%2 \n\t" \ |
180
|
|
|
|
|
|
|
"addq %6,%0 \n\t" \ |
181
|
|
|
|
|
|
|
"adcq %7,%1 \n\t" \ |
182
|
|
|
|
|
|
|
"adcq %8,%2 \n\t" \ |
183
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "0" (c0), "1" (c1), "2" (c2), "r" (sc0), "r" (sc1), "r" (sc2) : "cc"); |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
/******************************************************************************/ |
186
|
|
|
|
|
|
|
# elif defined(PSTM_ARM) |
187
|
|
|
|
|
|
|
/* ARM code */ |
188
|
|
|
|
|
|
|
/* #pragma message ("Using 32 bit ARM Assembly Optimizations") */ |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
# define COMBA_START |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# define CLEAR_CARRY \ |
193
|
|
|
|
|
|
|
c0 = c1 = c2 = 0; |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
# define COMBA_STORE(x) \ |
196
|
|
|
|
|
|
|
x = c0; |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
# define COMBA_STORE2(x) \ |
199
|
|
|
|
|
|
|
x = c1; |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
# define CARRY_FORWARD \ |
202
|
|
|
|
|
|
|
do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
# define COMBA_FINI |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
/* multiplies point i and j, updates carry "c1" and digit c2 */ |
207
|
|
|
|
|
|
|
# define SQRADD(i, j) \ |
208
|
|
|
|
|
|
|
asm ( \ |
209
|
|
|
|
|
|
|
" UMULL r0,r1,%6,%6 \n\t" \ |
210
|
|
|
|
|
|
|
" ADDS %0,%0,r0 \n\t" \ |
211
|
|
|
|
|
|
|
" ADCS %1,%1,r1 \n\t" \ |
212
|
|
|
|
|
|
|
" ADC %2,%2,#0 \n\t" \ |
213
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "0" (c0), "1" (c1), "2" (c2), "r" (i) : "r0", "r1", "cc"); |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
/* for squaring some of the terms are doubled... */ |
216
|
|
|
|
|
|
|
# define SQRADD2(i, j) \ |
217
|
|
|
|
|
|
|
asm ( \ |
218
|
|
|
|
|
|
|
" UMULL r0,r1,%6,%7 \n\t" \ |
219
|
|
|
|
|
|
|
" ADDS %0,%0,r0 \n\t" \ |
220
|
|
|
|
|
|
|
" ADCS %1,%1,r1 \n\t" \ |
221
|
|
|
|
|
|
|
" ADC %2,%2,#0 \n\t" \ |
222
|
|
|
|
|
|
|
" ADDS %0,%0,r0 \n\t" \ |
223
|
|
|
|
|
|
|
" ADCS %1,%1,r1 \n\t" \ |
224
|
|
|
|
|
|
|
" ADC %2,%2,#0 \n\t" \ |
225
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "0" (c0), "1" (c1), "2" (c2), "r" (i), "r" (j) : "r0", "r1", "cc"); |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
# define SQRADDSC(i, j) \ |
228
|
|
|
|
|
|
|
asm ( \ |
229
|
|
|
|
|
|
|
" UMULL %0,%1,%6,%7 \n\t" \ |
230
|
|
|
|
|
|
|
" SUB %2,%2,%2 \n\t" \ |
231
|
|
|
|
|
|
|
: "=r" (sc0), "=r" (sc1), "=r" (sc2) : "0" (sc0), "1" (sc1), "2" (sc2), "r" (i), "r" (j) : "cc"); |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
# define SQRADDAC(i, j) \ |
234
|
|
|
|
|
|
|
asm ( \ |
235
|
|
|
|
|
|
|
" UMULL r0,r1,%6,%7 \n\t" \ |
236
|
|
|
|
|
|
|
" ADDS %0,%0,r0 \n\t" \ |
237
|
|
|
|
|
|
|
" ADCS %1,%1,r1 \n\t" \ |
238
|
|
|
|
|
|
|
" ADC %2,%2,#0 \n\t" \ |
239
|
|
|
|
|
|
|
: "=r" (sc0), "=r" (sc1), "=r" (sc2) : "0" (sc0), "1" (sc1), "2" (sc2), "r" (i), "r" (j) : "r0", "r1", "cc"); |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
# define SQRADDDB \ |
242
|
|
|
|
|
|
|
asm ( \ |
243
|
|
|
|
|
|
|
" ADDS %0,%0,%3 \n\t" \ |
244
|
|
|
|
|
|
|
" ADCS %1,%1,%4 \n\t" \ |
245
|
|
|
|
|
|
|
" ADC %2,%2,%5 \n\t" \ |
246
|
|
|
|
|
|
|
" ADDS %0,%0,%3 \n\t" \ |
247
|
|
|
|
|
|
|
" ADCS %1,%1,%4 \n\t" \ |
248
|
|
|
|
|
|
|
" ADC %2,%2,%5 \n\t" \ |
249
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "r" (sc0), "r" (sc1), "r" (sc2), "0" (c0), "1" (c1), "2" (c2) : "cc"); |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
/******************************************************************************/ |
252
|
|
|
|
|
|
|
# elif defined(PSTM_MIPS) |
253
|
|
|
|
|
|
|
/* MIPS32 */ |
254
|
|
|
|
|
|
|
/* #pragma message ("Using 32 bit MIPS Assembly Optimizations") */ |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
# define COMBA_START |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
# define CLEAR_CARRY \ |
259
|
|
|
|
|
|
|
c0 = c1 = c2 = 0; |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
# define COMBA_STORE(x) \ |
262
|
|
|
|
|
|
|
x = c0; |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
# define COMBA_STORE2(x) \ |
265
|
|
|
|
|
|
|
x = c1; |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
# define CARRY_FORWARD \ |
268
|
|
|
|
|
|
|
do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
# define COMBA_FINI |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
/* multiplies point i and j, updates carry "c1" and digit c2 */ |
273
|
|
|
|
|
|
|
# define SQRADD(i, j) \ |
274
|
|
|
|
|
|
|
asm ( \ |
275
|
|
|
|
|
|
|
" multu %6,%6 \n\t" \ |
276
|
|
|
|
|
|
|
" mflo $12 \n\t" \ |
277
|
|
|
|
|
|
|
" mfhi $13 \n\t" \ |
278
|
|
|
|
|
|
|
" addu %0,%0,$12 \n\t" \ |
279
|
|
|
|
|
|
|
" sltu $12,%0,$12 \n\t" \ |
280
|
|
|
|
|
|
|
" addu %1,%1,$13 \n\t" \ |
281
|
|
|
|
|
|
|
" sltu $13,%1,$13 \n\t" \ |
282
|
|
|
|
|
|
|
" addu %1,%1,$12 \n\t" \ |
283
|
|
|
|
|
|
|
" sltu $12,%1,$12 \n\t" \ |
284
|
|
|
|
|
|
|
" addu %2,%2,$13 \n\t" \ |
285
|
|
|
|
|
|
|
" addu %2,%2,$12 \n\t" \ |
286
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "0" (c0), "1" (c1), "2" (c2), "r" (i) : "$12", "$13"); |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
/* for squaring some of the terms are doubled... */ |
289
|
|
|
|
|
|
|
# define SQRADD2(i, j) \ |
290
|
|
|
|
|
|
|
asm ( \ |
291
|
|
|
|
|
|
|
" multu %6,%7 \n\t" \ |
292
|
|
|
|
|
|
|
" mflo $12 \n\t" \ |
293
|
|
|
|
|
|
|
" mfhi $13 \n\t" \ |
294
|
|
|
|
|
|
|
\ |
295
|
|
|
|
|
|
|
" addu %0,%0,$12 \n\t" \ |
296
|
|
|
|
|
|
|
" sltu $14,%0,$12 \n\t" \ |
297
|
|
|
|
|
|
|
" addu %1,%1,$13 \n\t" \ |
298
|
|
|
|
|
|
|
" sltu $15,%1,$13 \n\t" \ |
299
|
|
|
|
|
|
|
" addu %1,%1,$14 \n\t" \ |
300
|
|
|
|
|
|
|
" sltu $14,%1,$14 \n\t" \ |
301
|
|
|
|
|
|
|
" addu %2,%2,$15 \n\t" \ |
302
|
|
|
|
|
|
|
" addu %2,%2,$14 \n\t" \ |
303
|
|
|
|
|
|
|
\ |
304
|
|
|
|
|
|
|
" addu %0,%0,$12 \n\t" \ |
305
|
|
|
|
|
|
|
" sltu $14,%0,$12 \n\t" \ |
306
|
|
|
|
|
|
|
" addu %1,%1,$13 \n\t" \ |
307
|
|
|
|
|
|
|
" sltu $15,%1,$13 \n\t" \ |
308
|
|
|
|
|
|
|
" addu %1,%1,$14 \n\t" \ |
309
|
|
|
|
|
|
|
" sltu $14,%1,$14 \n\t" \ |
310
|
|
|
|
|
|
|
" addu %2,%2,$15 \n\t" \ |
311
|
|
|
|
|
|
|
" addu %2,%2,$14 \n\t" \ |
312
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "0" (c0), "1" (c1), "2" (c2), "r" (i), "r" (j) : "$12", "$13", "$14", "$15"); |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
# define SQRADDSC(i, j) \ |
315
|
|
|
|
|
|
|
asm ( \ |
316
|
|
|
|
|
|
|
" multu %6,%7 \n\t" \ |
317
|
|
|
|
|
|
|
" mflo %0 \n\t" \ |
318
|
|
|
|
|
|
|
" mfhi %1 \n\t" \ |
319
|
|
|
|
|
|
|
" xor %2,%2,%2 \n\t" \ |
320
|
|
|
|
|
|
|
: "=r" (sc0), "=r" (sc1), "=r" (sc2) : "0" (sc0), "1" (sc1), "2" (sc2), "r" (i), "r" (j) : "cc"); |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
# define SQRADDAC(i, j) \ |
323
|
|
|
|
|
|
|
asm ( \ |
324
|
|
|
|
|
|
|
" multu %6,%7 \n\t" \ |
325
|
|
|
|
|
|
|
" mflo $12 \n\t" \ |
326
|
|
|
|
|
|
|
" mfhi $13 \n\t" \ |
327
|
|
|
|
|
|
|
" addu %0,%0,$12 \n\t" \ |
328
|
|
|
|
|
|
|
" sltu $12,%0,$12 \n\t" \ |
329
|
|
|
|
|
|
|
" addu %1,%1,$13 \n\t" \ |
330
|
|
|
|
|
|
|
" sltu $13,%1,$13 \n\t" \ |
331
|
|
|
|
|
|
|
" addu %1,%1,$12 \n\t" \ |
332
|
|
|
|
|
|
|
" sltu $12,%1,$12 \n\t" \ |
333
|
|
|
|
|
|
|
" addu %2,%2,$13 \n\t" \ |
334
|
|
|
|
|
|
|
" addu %2,%2,$12 \n\t" \ |
335
|
|
|
|
|
|
|
: "=r" (sc0), "=r" (sc1), "=r" (sc2) : "0" (sc0), "1" (sc1), "2" (sc2), "r" (i), "r" (j) : "$12", "$13", "$14"); |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
# define SQRADDDB \ |
338
|
|
|
|
|
|
|
asm ( \ |
339
|
|
|
|
|
|
|
" addu %0,%0,%3 \n\t" \ |
340
|
|
|
|
|
|
|
" sltu $10,%0,%3 \n\t" \ |
341
|
|
|
|
|
|
|
" addu %1,%1,$10 \n\t" \ |
342
|
|
|
|
|
|
|
" sltu $10,%1,$10 \n\t" \ |
343
|
|
|
|
|
|
|
" addu %1,%1,%4 \n\t" \ |
344
|
|
|
|
|
|
|
" sltu $11,%1,%4 \n\t" \ |
345
|
|
|
|
|
|
|
" addu %2,%2,$10 \n\t" \ |
346
|
|
|
|
|
|
|
" addu %2,%2,$11 \n\t" \ |
347
|
|
|
|
|
|
|
" addu %2,%2,%5 \n\t" \ |
348
|
|
|
|
|
|
|
\ |
349
|
|
|
|
|
|
|
" addu %0,%0,%3 \n\t" \ |
350
|
|
|
|
|
|
|
" sltu $10,%0,%3 \n\t" \ |
351
|
|
|
|
|
|
|
" addu %1,%1,$10 \n\t" \ |
352
|
|
|
|
|
|
|
" sltu $10,%1,$10 \n\t" \ |
353
|
|
|
|
|
|
|
" addu %1,%1,%4 \n\t" \ |
354
|
|
|
|
|
|
|
" sltu $11,%1,%4 \n\t" \ |
355
|
|
|
|
|
|
|
" addu %2,%2,$10 \n\t" \ |
356
|
|
|
|
|
|
|
" addu %2,%2,$11 \n\t" \ |
357
|
|
|
|
|
|
|
" addu %2,%2,%5 \n\t" \ |
358
|
|
|
|
|
|
|
: "=r" (c0), "=r" (c1), "=r" (c2) : "r" (sc0), "r" (sc1), "r" (sc2), "0" (c0), "1" (c1), "2" (c2) : "$10", "$11"); |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
# else |
361
|
|
|
|
|
|
|
/******************************************************************************/ |
362
|
|
|
|
|
|
|
# define PSTM_ISO |
363
|
|
|
|
|
|
|
/* ISO C portable code */ |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
# define COMBA_START |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
# define CLEAR_CARRY \ |
368
|
|
|
|
|
|
|
c0 = c1 = c2 = 0; |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
# define COMBA_STORE(x) \ |
371
|
|
|
|
|
|
|
x = c0; |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
# define COMBA_STORE2(x) \ |
374
|
|
|
|
|
|
|
x = c1; |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
# define CARRY_FORWARD \ |
377
|
|
|
|
|
|
|
do { c0 = c1; c1 = c2; c2 = 0; } while (0); |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
# define COMBA_FINI |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
/* multiplies point i and j, updates carry "c1" and digit c2 */ |
382
|
|
|
|
|
|
|
# define SQRADD(i, j) \ |
383
|
|
|
|
|
|
|
do { pstm_word t; \ |
384
|
|
|
|
|
|
|
t = c0 + ((pstm_word) i) * ((pstm_word) j); c0 = (pstm_digit) t; \ |
385
|
|
|
|
|
|
|
t = c1 + (t >> DIGIT_BIT); \ |
386
|
|
|
|
|
|
|
c1 = (pstm_digit) t; c2 += (pstm_digit) (t >> DIGIT_BIT); \ |
387
|
|
|
|
|
|
|
} while (0); |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
/* for squaring some of the terms are doubled... */ |
391
|
|
|
|
|
|
|
# define SQRADD2(i, j) \ |
392
|
|
|
|
|
|
|
do { pstm_word t; \ |
393
|
|
|
|
|
|
|
t = ((pstm_word) i) * ((pstm_word) j); \ |
394
|
|
|
|
|
|
|
tt = (pstm_word) c0 + t; c0 = (pstm_digit) tt; \ |
395
|
|
|
|
|
|
|
tt = (pstm_word) c1 + (tt >> DIGIT_BIT); \ |
396
|
|
|
|
|
|
|
c1 = (pstm_digit) tt; c2 += (pstm_digit) (tt >> DIGIT_BIT); \ |
397
|
|
|
|
|
|
|
tt = (pstm_word) c0 + t; c0 = (pstm_digit) tt; \ |
398
|
|
|
|
|
|
|
tt = (pstm_word) c1 + (tt >> DIGIT_BIT); \ |
399
|
|
|
|
|
|
|
c1 = (pstm_digit) tt; c2 += (pstm_digit) (tt >> DIGIT_BIT); \ |
400
|
|
|
|
|
|
|
} while (0); |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
# define SQRADDSC(i, j) \ |
403
|
|
|
|
|
|
|
do { pstm_word t; \ |
404
|
|
|
|
|
|
|
t = ((pstm_word) i) * ((pstm_word) j); \ |
405
|
|
|
|
|
|
|
sc0 = (pstm_digit) t; sc1 = (pstm_digit) (t >> DIGIT_BIT); sc2 = 0; \ |
406
|
|
|
|
|
|
|
} while (0); |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
# define SQRADDAC(i, j) \ |
409
|
|
|
|
|
|
|
do { pstm_word t; \ |
410
|
|
|
|
|
|
|
t = ((pstm_word) sc0) + ((pstm_word) i) * ((pstm_word) j); \ |
411
|
|
|
|
|
|
|
sc0 = (pstm_digit) t; \ |
412
|
|
|
|
|
|
|
t = ((pstm_word) sc1) + (t >> DIGIT_BIT); sc1 = (pstm_digit) t; \ |
413
|
|
|
|
|
|
|
sc2 += (pstm_digit) (t >> DIGIT_BIT); \ |
414
|
|
|
|
|
|
|
} while (0); |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
# define SQRADDDB \ |
417
|
|
|
|
|
|
|
do { pstm_word t; \ |
418
|
|
|
|
|
|
|
t = ((pstm_word) sc0) + ((pstm_word) sc0) + ((pstm_word) c0); \ |
419
|
|
|
|
|
|
|
c0 = (pstm_digit) t; \ |
420
|
|
|
|
|
|
|
t = ((pstm_word) sc1) + ((pstm_word) sc1) + c1 + (t >> DIGIT_BIT); \ |
421
|
|
|
|
|
|
|
c1 = (pstm_digit) t; \ |
422
|
|
|
|
|
|
|
c2 = c2 + sc2 + sc2 + (pstm_digit) (t >> DIGIT_BIT); \ |
423
|
|
|
|
|
|
|
} while (0); |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
# endif /* ISO_C */ |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
/******************************************************************************/ |
428
|
|
|
|
|
|
|
/* |
429
|
|
|
|
|
|
|
Non-unrolled comba squarer |
430
|
|
|
|
|
|
|
*/ |
431
|
5387326
|
|
|
|
|
|
static int32_t pstm_sqr_comba_gen(psPool_t *pool, const pstm_int *A, |
432
|
|
|
|
|
|
|
pstm_int *B, pstm_digit *paD, psSize_t paDlen) |
433
|
|
|
|
|
|
|
{ |
434
|
|
|
|
|
|
|
int16 paDfail, pa; |
435
|
|
|
|
|
|
|
int32 ix, iz; |
436
|
|
|
|
|
|
|
pstm_digit c0, c1, c2, *dst; |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
# ifdef PSTM_ISO |
439
|
|
|
|
|
|
|
pstm_word tt; |
440
|
|
|
|
|
|
|
# endif |
441
|
|
|
|
|
|
|
|
442
|
5387326
|
|
|
|
|
|
paDfail = 0; |
443
|
|
|
|
|
|
|
/* get size of output and trim */ |
444
|
5387326
|
|
|
|
|
|
pa = A->used + A->used; |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
/* number of output digits to produce */ |
447
|
|
|
|
|
|
|
COMBA_START; |
448
|
5387326
|
|
|
|
|
|
CLEAR_CARRY; |
449
|
|
|
|
|
|
|
/* |
450
|
|
|
|
|
|
|
If b is not large enough grow it and continue |
451
|
|
|
|
|
|
|
*/ |
452
|
5387326
|
50
|
|
|
|
|
if (B->alloc < pa) |
453
|
|
|
|
|
|
|
{ |
454
|
0
|
0
|
|
|
|
|
if (pstm_grow(B, pa) != PSTM_OKAY) |
455
|
|
|
|
|
|
|
{ |
456
|
0
|
|
|
|
|
|
return PS_MEM_FAIL; |
457
|
|
|
|
|
|
|
} |
458
|
|
|
|
|
|
|
} |
459
|
5387326
|
50
|
|
|
|
|
if (paD != NULL) |
460
|
|
|
|
|
|
|
{ |
461
|
5387326
|
50
|
|
|
|
|
if (paDlen < (sizeof(pstm_digit) * pa)) |
462
|
|
|
|
|
|
|
{ |
463
|
0
|
|
|
|
|
|
paDfail = 1; /* have a paD, but it's not big enough */ |
464
|
0
|
0
|
|
|
|
|
if ((dst = psMalloc(pool, sizeof(pstm_digit) * pa)) == NULL) |
465
|
|
|
|
|
|
|
{ |
466
|
0
|
|
|
|
|
|
return PS_MEM_FAIL; |
467
|
|
|
|
|
|
|
} |
468
|
0
|
|
|
|
|
|
memset(dst, 0x0, sizeof(pstm_digit) * pa); |
469
|
|
|
|
|
|
|
} |
470
|
|
|
|
|
|
|
else |
471
|
|
|
|
|
|
|
{ |
472
|
5387326
|
|
|
|
|
|
dst = paD; |
473
|
5387326
|
|
|
|
|
|
memset(dst, 0x0, paDlen); |
474
|
|
|
|
|
|
|
} |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
else |
477
|
|
|
|
|
|
|
{ |
478
|
0
|
0
|
|
|
|
|
if ((dst = psMalloc(pool, sizeof(pstm_digit) * pa)) == NULL) |
479
|
|
|
|
|
|
|
{ |
480
|
0
|
|
|
|
|
|
return PS_MEM_FAIL; |
481
|
|
|
|
|
|
|
} |
482
|
0
|
|
|
|
|
|
memset(dst, 0x0, sizeof(pstm_digit) * pa); |
483
|
|
|
|
|
|
|
} |
484
|
|
|
|
|
|
|
|
485
|
101994324
|
100
|
|
|
|
|
for (ix = 0; ix < pa; ix++) |
486
|
|
|
|
|
|
|
{ |
487
|
|
|
|
|
|
|
int32 tx, ty, iy; |
488
|
|
|
|
|
|
|
pstm_digit *tmpy, *tmpx; |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
/* get offsets into the two bignums */ |
491
|
96606998
|
|
|
|
|
|
ty = min(A->used - 1, ix); |
492
|
96606998
|
|
|
|
|
|
tx = ix - ty; |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
/* setup temp aliases */ |
495
|
96606998
|
|
|
|
|
|
tmpx = A->dp + tx; |
496
|
96606998
|
|
|
|
|
|
tmpy = A->dp + ty; |
497
|
|
|
|
|
|
|
/* |
498
|
|
|
|
|
|
|
This is the number of times the loop will iterate |
499
|
|
|
|
|
|
|
while (tx++ < a->used && ty-- >= 0) { ... } |
500
|
|
|
|
|
|
|
*/ |
501
|
96606998
|
|
|
|
|
|
iy = min(A->used - tx, ty + 1); |
502
|
|
|
|
|
|
|
/* |
503
|
|
|
|
|
|
|
now for squaring, tx can never equal ty. We halve the distance since |
504
|
|
|
|
|
|
|
they approach at a rate of 2x and we have to round because odd cases |
505
|
|
|
|
|
|
|
need to be executed |
506
|
|
|
|
|
|
|
*/ |
507
|
96606998
|
|
|
|
|
|
iy = min(iy, (ty - tx + 1) >> 1); |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
/* forward carries */ |
510
|
96606998
|
|
|
|
|
|
CARRY_FORWARD; |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
/* execute loop */ |
513
|
289684488
|
100
|
|
|
|
|
for (iz = 0; iz < iy; iz++) |
514
|
|
|
|
|
|
|
{ |
515
|
193077490
|
|
|
|
|
|
SQRADD2(*tmpx++, *tmpy--); |
516
|
|
|
|
|
|
|
} |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
/* even columns have the square term in them */ |
519
|
96606998
|
100
|
|
|
|
|
if ((ix & 1) == 0) |
520
|
|
|
|
|
|
|
{ |
521
|
48303499
|
|
|
|
|
|
SQRADD(A->dp[ix >> 1], A->dp[ix >> 1]); |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
/* store it */ |
525
|
96606998
|
|
|
|
|
|
COMBA_STORE(dst[ix]); |
526
|
|
|
|
|
|
|
} |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
COMBA_FINI; |
529
|
|
|
|
|
|
|
/* |
530
|
|
|
|
|
|
|
setup dest |
531
|
|
|
|
|
|
|
*/ |
532
|
5387326
|
|
|
|
|
|
iz = B->used; |
533
|
5387326
|
|
|
|
|
|
B->used = pa; |
534
|
|
|
|
|
|
|
{ |
535
|
|
|
|
|
|
|
pstm_digit *tmpc; |
536
|
5387326
|
|
|
|
|
|
tmpc = B->dp; |
537
|
101994324
|
100
|
|
|
|
|
for (ix = 0; ix < pa; ix++) |
538
|
|
|
|
|
|
|
{ |
539
|
96606998
|
|
|
|
|
|
*tmpc++ = dst[ix]; |
540
|
|
|
|
|
|
|
} |
541
|
|
|
|
|
|
|
/* clear unused digits (that existed in the old copy of c) */ |
542
|
5514152
|
100
|
|
|
|
|
for (; ix < iz; ix++) |
543
|
|
|
|
|
|
|
{ |
544
|
126826
|
|
|
|
|
|
*tmpc++ = 0; |
545
|
|
|
|
|
|
|
} |
546
|
|
|
|
|
|
|
} |
547
|
5387326
|
|
|
|
|
|
pstm_clamp(B); |
548
|
|
|
|
|
|
|
|
549
|
5387326
|
50
|
|
|
|
|
if ((paD == NULL) || paDfail == 1) |
|
|
50
|
|
|
|
|
|
550
|
|
|
|
|
|
|
{ |
551
|
0
|
|
|
|
|
|
psFree(dst, pool); |
552
|
|
|
|
|
|
|
} |
553
|
5387326
|
|
|
|
|
|
return PS_SUCCESS; |
554
|
|
|
|
|
|
|
} |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
/******************************************************************************/ |
557
|
|
|
|
|
|
|
/* |
558
|
|
|
|
|
|
|
Unrolled Comba loop for 1024 bit keys |
559
|
|
|
|
|
|
|
*/ |
560
|
|
|
|
|
|
|
# ifdef USE_1024_KEY_SPEED_OPTIMIZATIONS |
561
|
|
|
|
|
|
|
static int32_t pstm_sqr_comba16(const pstm_int *A, pstm_int *B) |
562
|
|
|
|
|
|
|
{ |
563
|
|
|
|
|
|
|
pstm_digit *a, b[32], c0, c1, c2, sc0, sc1, sc2; |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
# ifdef PSTM_ISO |
566
|
|
|
|
|
|
|
pstm_word tt; |
567
|
|
|
|
|
|
|
# endif |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
if (B->alloc < 32) |
570
|
|
|
|
|
|
|
{ |
571
|
|
|
|
|
|
|
if (pstm_grow(B, 32) != PSTM_OKAY) |
572
|
|
|
|
|
|
|
{ |
573
|
|
|
|
|
|
|
return PS_MEM_FAIL; |
574
|
|
|
|
|
|
|
} |
575
|
|
|
|
|
|
|
} |
576
|
|
|
|
|
|
|
a = A->dp; |
577
|
|
|
|
|
|
|
sc0 = sc1 = sc2 = 0; |
578
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
COMBA_START; |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
/* clear carries */ |
582
|
|
|
|
|
|
|
CLEAR_CARRY; |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
/* output 0 */ |
585
|
|
|
|
|
|
|
SQRADD(a[0], a[0]); |
586
|
|
|
|
|
|
|
COMBA_STORE(b[0]); |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
/* output 1 */ |
589
|
|
|
|
|
|
|
CARRY_FORWARD; |
590
|
|
|
|
|
|
|
SQRADD2(a[0], a[1]); |
591
|
|
|
|
|
|
|
COMBA_STORE(b[1]); |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
/* output 2 */ |
594
|
|
|
|
|
|
|
CARRY_FORWARD; |
595
|
|
|
|
|
|
|
SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); |
596
|
|
|
|
|
|
|
COMBA_STORE(b[2]); |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
/* output 3 */ |
599
|
|
|
|
|
|
|
CARRY_FORWARD; |
600
|
|
|
|
|
|
|
SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); |
601
|
|
|
|
|
|
|
COMBA_STORE(b[3]); |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
/* output 4 */ |
604
|
|
|
|
|
|
|
CARRY_FORWARD; |
605
|
|
|
|
|
|
|
SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); |
606
|
|
|
|
|
|
|
COMBA_STORE(b[4]); |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
/* output 5 */ |
609
|
|
|
|
|
|
|
CARRY_FORWARD; |
610
|
|
|
|
|
|
|
SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; |
611
|
|
|
|
|
|
|
COMBA_STORE(b[5]); |
612
|
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
/* output 6 */ |
614
|
|
|
|
|
|
|
CARRY_FORWARD; |
615
|
|
|
|
|
|
|
SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); |
616
|
|
|
|
|
|
|
COMBA_STORE(b[6]); |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
/* output 7 */ |
619
|
|
|
|
|
|
|
CARRY_FORWARD; |
620
|
|
|
|
|
|
|
SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; |
621
|
|
|
|
|
|
|
COMBA_STORE(b[7]); |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
/* output 8 */ |
624
|
|
|
|
|
|
|
CARRY_FORWARD; |
625
|
|
|
|
|
|
|
SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); |
626
|
|
|
|
|
|
|
COMBA_STORE(b[8]); |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
/* output 9 */ |
629
|
|
|
|
|
|
|
CARRY_FORWARD; |
630
|
|
|
|
|
|
|
SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; |
631
|
|
|
|
|
|
|
COMBA_STORE(b[9]); |
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
/* output 10 */ |
634
|
|
|
|
|
|
|
CARRY_FORWARD; |
635
|
|
|
|
|
|
|
SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); |
636
|
|
|
|
|
|
|
COMBA_STORE(b[10]); |
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
/* output 11 */ |
639
|
|
|
|
|
|
|
CARRY_FORWARD; |
640
|
|
|
|
|
|
|
SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; |
641
|
|
|
|
|
|
|
COMBA_STORE(b[11]); |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
/* output 12 */ |
644
|
|
|
|
|
|
|
CARRY_FORWARD; |
645
|
|
|
|
|
|
|
SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); |
646
|
|
|
|
|
|
|
COMBA_STORE(b[12]); |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
/* output 13 */ |
649
|
|
|
|
|
|
|
CARRY_FORWARD; |
650
|
|
|
|
|
|
|
SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; |
651
|
|
|
|
|
|
|
COMBA_STORE(b[13]); |
652
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
/* output 14 */ |
654
|
|
|
|
|
|
|
CARRY_FORWARD; |
655
|
|
|
|
|
|
|
SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); |
656
|
|
|
|
|
|
|
COMBA_STORE(b[14]); |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
/* output 15 */ |
659
|
|
|
|
|
|
|
CARRY_FORWARD; |
660
|
|
|
|
|
|
|
SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; |
661
|
|
|
|
|
|
|
COMBA_STORE(b[15]); |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
/* output 16 */ |
664
|
|
|
|
|
|
|
CARRY_FORWARD; |
665
|
|
|
|
|
|
|
SQRADDSC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); |
666
|
|
|
|
|
|
|
COMBA_STORE(b[16]); |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
/* output 17 */ |
669
|
|
|
|
|
|
|
CARRY_FORWARD; |
670
|
|
|
|
|
|
|
SQRADDSC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; |
671
|
|
|
|
|
|
|
COMBA_STORE(b[17]); |
672
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
/* output 18 */ |
674
|
|
|
|
|
|
|
CARRY_FORWARD; |
675
|
|
|
|
|
|
|
SQRADDSC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); |
676
|
|
|
|
|
|
|
COMBA_STORE(b[18]); |
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
/* output 19 */ |
679
|
|
|
|
|
|
|
CARRY_FORWARD; |
680
|
|
|
|
|
|
|
SQRADDSC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; |
681
|
|
|
|
|
|
|
COMBA_STORE(b[19]); |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
/* output 20 */ |
684
|
|
|
|
|
|
|
CARRY_FORWARD; |
685
|
|
|
|
|
|
|
SQRADDSC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); |
686
|
|
|
|
|
|
|
COMBA_STORE(b[20]); |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
/* output 21 */ |
689
|
|
|
|
|
|
|
CARRY_FORWARD; |
690
|
|
|
|
|
|
|
SQRADDSC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; |
691
|
|
|
|
|
|
|
COMBA_STORE(b[21]); |
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
/* output 22 */ |
694
|
|
|
|
|
|
|
CARRY_FORWARD; |
695
|
|
|
|
|
|
|
SQRADDSC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); |
696
|
|
|
|
|
|
|
COMBA_STORE(b[22]); |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
/* output 23 */ |
699
|
|
|
|
|
|
|
CARRY_FORWARD; |
700
|
|
|
|
|
|
|
SQRADDSC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; |
701
|
|
|
|
|
|
|
COMBA_STORE(b[23]); |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
/* output 24 */ |
704
|
|
|
|
|
|
|
CARRY_FORWARD; |
705
|
|
|
|
|
|
|
SQRADDSC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); |
706
|
|
|
|
|
|
|
COMBA_STORE(b[24]); |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
/* output 25 */ |
709
|
|
|
|
|
|
|
CARRY_FORWARD; |
710
|
|
|
|
|
|
|
SQRADDSC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; |
711
|
|
|
|
|
|
|
COMBA_STORE(b[25]); |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
/* output 26 */ |
714
|
|
|
|
|
|
|
CARRY_FORWARD; |
715
|
|
|
|
|
|
|
SQRADD2(a[11], a[15]); SQRADD2(a[12], a[14]); SQRADD(a[13], a[13]); |
716
|
|
|
|
|
|
|
COMBA_STORE(b[26]); |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
/* output 27 */ |
719
|
|
|
|
|
|
|
CARRY_FORWARD; |
720
|
|
|
|
|
|
|
SQRADD2(a[12], a[15]); SQRADD2(a[13], a[14]); |
721
|
|
|
|
|
|
|
COMBA_STORE(b[27]); |
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
/* output 28 */ |
724
|
|
|
|
|
|
|
CARRY_FORWARD; |
725
|
|
|
|
|
|
|
SQRADD2(a[13], a[15]); SQRADD(a[14], a[14]); |
726
|
|
|
|
|
|
|
COMBA_STORE(b[28]); |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
/* output 29 */ |
729
|
|
|
|
|
|
|
CARRY_FORWARD; |
730
|
|
|
|
|
|
|
SQRADD2(a[14], a[15]); |
731
|
|
|
|
|
|
|
COMBA_STORE(b[29]); |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
/* output 30 */ |
734
|
|
|
|
|
|
|
CARRY_FORWARD; |
735
|
|
|
|
|
|
|
SQRADD(a[15], a[15]); |
736
|
|
|
|
|
|
|
COMBA_STORE(b[30]); |
737
|
|
|
|
|
|
|
COMBA_STORE2(b[31]); |
738
|
|
|
|
|
|
|
COMBA_FINI; |
739
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
B->used = 32; |
741
|
|
|
|
|
|
|
B->sign = PSTM_ZPOS; |
742
|
|
|
|
|
|
|
memcpy(B->dp, b, 32 * sizeof(pstm_digit)); |
743
|
|
|
|
|
|
|
pstm_clamp(B); |
744
|
|
|
|
|
|
|
return PSTM_OKAY; |
745
|
|
|
|
|
|
|
} |
746
|
|
|
|
|
|
|
# endif /* USE_1024_KEY_SPEED_OPTIMIZATIONS */ |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
# ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS |
750
|
|
|
|
|
|
|
static int32_t pstm_sqr_comba32(const pstm_int *A, pstm_int *B) |
751
|
|
|
|
|
|
|
{ |
752
|
|
|
|
|
|
|
pstm_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2; |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
# ifdef PSTM_ISO |
755
|
|
|
|
|
|
|
pstm_word tt; |
756
|
|
|
|
|
|
|
# endif |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
if (B->alloc < 64) |
759
|
|
|
|
|
|
|
{ |
760
|
|
|
|
|
|
|
if (pstm_grow(B, 64) != PSTM_OKAY) |
761
|
|
|
|
|
|
|
{ |
762
|
|
|
|
|
|
|
return PS_MEM_FAIL; |
763
|
|
|
|
|
|
|
} |
764
|
|
|
|
|
|
|
} |
765
|
|
|
|
|
|
|
sc0 = sc1 = sc2 = 0; |
766
|
|
|
|
|
|
|
a = A->dp; |
767
|
|
|
|
|
|
|
COMBA_START; |
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
/* clear carries */ |
770
|
|
|
|
|
|
|
CLEAR_CARRY; |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
/* output 0 */ |
773
|
|
|
|
|
|
|
SQRADD(a[0], a[0]); |
774
|
|
|
|
|
|
|
COMBA_STORE(b[0]); |
775
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
/* output 1 */ |
777
|
|
|
|
|
|
|
CARRY_FORWARD; |
778
|
|
|
|
|
|
|
SQRADD2(a[0], a[1]); |
779
|
|
|
|
|
|
|
COMBA_STORE(b[1]); |
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
/* output 2 */ |
782
|
|
|
|
|
|
|
CARRY_FORWARD; |
783
|
|
|
|
|
|
|
SQRADD2(a[0], a[2]); SQRADD(a[1], a[1]); |
784
|
|
|
|
|
|
|
COMBA_STORE(b[2]); |
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
/* output 3 */ |
787
|
|
|
|
|
|
|
CARRY_FORWARD; |
788
|
|
|
|
|
|
|
SQRADD2(a[0], a[3]); SQRADD2(a[1], a[2]); |
789
|
|
|
|
|
|
|
COMBA_STORE(b[3]); |
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
/* output 4 */ |
792
|
|
|
|
|
|
|
CARRY_FORWARD; |
793
|
|
|
|
|
|
|
SQRADD2(a[0], a[4]); SQRADD2(a[1], a[3]); SQRADD(a[2], a[2]); |
794
|
|
|
|
|
|
|
COMBA_STORE(b[4]); |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
/* output 5 */ |
797
|
|
|
|
|
|
|
CARRY_FORWARD; |
798
|
|
|
|
|
|
|
SQRADDSC(a[0], a[5]); SQRADDAC(a[1], a[4]); SQRADDAC(a[2], a[3]); SQRADDDB; |
799
|
|
|
|
|
|
|
COMBA_STORE(b[5]); |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
/* output 6 */ |
802
|
|
|
|
|
|
|
CARRY_FORWARD; |
803
|
|
|
|
|
|
|
SQRADDSC(a[0], a[6]); SQRADDAC(a[1], a[5]); SQRADDAC(a[2], a[4]); SQRADDDB; SQRADD(a[3], a[3]); |
804
|
|
|
|
|
|
|
COMBA_STORE(b[6]); |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
/* output 7 */ |
807
|
|
|
|
|
|
|
CARRY_FORWARD; |
808
|
|
|
|
|
|
|
SQRADDSC(a[0], a[7]); SQRADDAC(a[1], a[6]); SQRADDAC(a[2], a[5]); SQRADDAC(a[3], a[4]); SQRADDDB; |
809
|
|
|
|
|
|
|
COMBA_STORE(b[7]); |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
/* output 8 */ |
812
|
|
|
|
|
|
|
CARRY_FORWARD; |
813
|
|
|
|
|
|
|
SQRADDSC(a[0], a[8]); SQRADDAC(a[1], a[7]); SQRADDAC(a[2], a[6]); SQRADDAC(a[3], a[5]); SQRADDDB; SQRADD(a[4], a[4]); |
814
|
|
|
|
|
|
|
COMBA_STORE(b[8]); |
815
|
|
|
|
|
|
|
|
816
|
|
|
|
|
|
|
/* output 9 */ |
817
|
|
|
|
|
|
|
CARRY_FORWARD; |
818
|
|
|
|
|
|
|
SQRADDSC(a[0], a[9]); SQRADDAC(a[1], a[8]); SQRADDAC(a[2], a[7]); SQRADDAC(a[3], a[6]); SQRADDAC(a[4], a[5]); SQRADDDB; |
819
|
|
|
|
|
|
|
COMBA_STORE(b[9]); |
820
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
/* output 10 */ |
822
|
|
|
|
|
|
|
CARRY_FORWARD; |
823
|
|
|
|
|
|
|
SQRADDSC(a[0], a[10]); SQRADDAC(a[1], a[9]); SQRADDAC(a[2], a[8]); SQRADDAC(a[3], a[7]); SQRADDAC(a[4], a[6]); SQRADDDB; SQRADD(a[5], a[5]); |
824
|
|
|
|
|
|
|
COMBA_STORE(b[10]); |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
/* output 11 */ |
827
|
|
|
|
|
|
|
CARRY_FORWARD; |
828
|
|
|
|
|
|
|
SQRADDSC(a[0], a[11]); SQRADDAC(a[1], a[10]); SQRADDAC(a[2], a[9]); SQRADDAC(a[3], a[8]); SQRADDAC(a[4], a[7]); SQRADDAC(a[5], a[6]); SQRADDDB; |
829
|
|
|
|
|
|
|
COMBA_STORE(b[11]); |
830
|
|
|
|
|
|
|
|
831
|
|
|
|
|
|
|
/* output 12 */ |
832
|
|
|
|
|
|
|
CARRY_FORWARD; |
833
|
|
|
|
|
|
|
SQRADDSC(a[0], a[12]); SQRADDAC(a[1], a[11]); SQRADDAC(a[2], a[10]); SQRADDAC(a[3], a[9]); SQRADDAC(a[4], a[8]); SQRADDAC(a[5], a[7]); SQRADDDB; SQRADD(a[6], a[6]); |
834
|
|
|
|
|
|
|
COMBA_STORE(b[12]); |
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
/* output 13 */ |
837
|
|
|
|
|
|
|
CARRY_FORWARD; |
838
|
|
|
|
|
|
|
SQRADDSC(a[0], a[13]); SQRADDAC(a[1], a[12]); SQRADDAC(a[2], a[11]); SQRADDAC(a[3], a[10]); SQRADDAC(a[4], a[9]); SQRADDAC(a[5], a[8]); SQRADDAC(a[6], a[7]); SQRADDDB; |
839
|
|
|
|
|
|
|
COMBA_STORE(b[13]); |
840
|
|
|
|
|
|
|
|
841
|
|
|
|
|
|
|
/* output 14 */ |
842
|
|
|
|
|
|
|
CARRY_FORWARD; |
843
|
|
|
|
|
|
|
SQRADDSC(a[0], a[14]); SQRADDAC(a[1], a[13]); SQRADDAC(a[2], a[12]); SQRADDAC(a[3], a[11]); SQRADDAC(a[4], a[10]); SQRADDAC(a[5], a[9]); SQRADDAC(a[6], a[8]); SQRADDDB; SQRADD(a[7], a[7]); |
844
|
|
|
|
|
|
|
COMBA_STORE(b[14]); |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
/* output 15 */ |
847
|
|
|
|
|
|
|
CARRY_FORWARD; |
848
|
|
|
|
|
|
|
SQRADDSC(a[0], a[15]); SQRADDAC(a[1], a[14]); SQRADDAC(a[2], a[13]); SQRADDAC(a[3], a[12]); SQRADDAC(a[4], a[11]); SQRADDAC(a[5], a[10]); SQRADDAC(a[6], a[9]); SQRADDAC(a[7], a[8]); SQRADDDB; |
849
|
|
|
|
|
|
|
COMBA_STORE(b[15]); |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
/* output 16 */ |
852
|
|
|
|
|
|
|
CARRY_FORWARD; |
853
|
|
|
|
|
|
|
SQRADDSC(a[0], a[16]); SQRADDAC(a[1], a[15]); SQRADDAC(a[2], a[14]); SQRADDAC(a[3], a[13]); SQRADDAC(a[4], a[12]); SQRADDAC(a[5], a[11]); SQRADDAC(a[6], a[10]); SQRADDAC(a[7], a[9]); SQRADDDB; SQRADD(a[8], a[8]); |
854
|
|
|
|
|
|
|
COMBA_STORE(b[16]); |
855
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
/* output 17 */ |
857
|
|
|
|
|
|
|
CARRY_FORWARD; |
858
|
|
|
|
|
|
|
SQRADDSC(a[0], a[17]); SQRADDAC(a[1], a[16]); SQRADDAC(a[2], a[15]); SQRADDAC(a[3], a[14]); SQRADDAC(a[4], a[13]); SQRADDAC(a[5], a[12]); SQRADDAC(a[6], a[11]); SQRADDAC(a[7], a[10]); SQRADDAC(a[8], a[9]); SQRADDDB; |
859
|
|
|
|
|
|
|
COMBA_STORE(b[17]); |
860
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
/* output 18 */ |
862
|
|
|
|
|
|
|
CARRY_FORWARD; |
863
|
|
|
|
|
|
|
SQRADDSC(a[0], a[18]); SQRADDAC(a[1], a[17]); SQRADDAC(a[2], a[16]); SQRADDAC(a[3], a[15]); SQRADDAC(a[4], a[14]); SQRADDAC(a[5], a[13]); SQRADDAC(a[6], a[12]); SQRADDAC(a[7], a[11]); SQRADDAC(a[8], a[10]); SQRADDDB; SQRADD(a[9], a[9]); |
864
|
|
|
|
|
|
|
COMBA_STORE(b[18]); |
865
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
/* output 19 */ |
867
|
|
|
|
|
|
|
CARRY_FORWARD; |
868
|
|
|
|
|
|
|
SQRADDSC(a[0], a[19]); SQRADDAC(a[1], a[18]); SQRADDAC(a[2], a[17]); SQRADDAC(a[3], a[16]); SQRADDAC(a[4], a[15]); SQRADDAC(a[5], a[14]); SQRADDAC(a[6], a[13]); SQRADDAC(a[7], a[12]); SQRADDAC(a[8], a[11]); SQRADDAC(a[9], a[10]); SQRADDDB; |
869
|
|
|
|
|
|
|
COMBA_STORE(b[19]); |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
/* output 20 */ |
872
|
|
|
|
|
|
|
CARRY_FORWARD; |
873
|
|
|
|
|
|
|
SQRADDSC(a[0], a[20]); SQRADDAC(a[1], a[19]); SQRADDAC(a[2], a[18]); SQRADDAC(a[3], a[17]); SQRADDAC(a[4], a[16]); SQRADDAC(a[5], a[15]); SQRADDAC(a[6], a[14]); SQRADDAC(a[7], a[13]); SQRADDAC(a[8], a[12]); SQRADDAC(a[9], a[11]); SQRADDDB; SQRADD(a[10], a[10]); |
874
|
|
|
|
|
|
|
COMBA_STORE(b[20]); |
875
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
/* output 21 */ |
877
|
|
|
|
|
|
|
CARRY_FORWARD; |
878
|
|
|
|
|
|
|
SQRADDSC(a[0], a[21]); SQRADDAC(a[1], a[20]); SQRADDAC(a[2], a[19]); SQRADDAC(a[3], a[18]); SQRADDAC(a[4], a[17]); SQRADDAC(a[5], a[16]); SQRADDAC(a[6], a[15]); SQRADDAC(a[7], a[14]); SQRADDAC(a[8], a[13]); SQRADDAC(a[9], a[12]); SQRADDAC(a[10], a[11]); SQRADDDB; |
879
|
|
|
|
|
|
|
COMBA_STORE(b[21]); |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
/* output 22 */ |
882
|
|
|
|
|
|
|
CARRY_FORWARD; |
883
|
|
|
|
|
|
|
SQRADDSC(a[0], a[22]); SQRADDAC(a[1], a[21]); SQRADDAC(a[2], a[20]); SQRADDAC(a[3], a[19]); SQRADDAC(a[4], a[18]); SQRADDAC(a[5], a[17]); SQRADDAC(a[6], a[16]); SQRADDAC(a[7], a[15]); SQRADDAC(a[8], a[14]); SQRADDAC(a[9], a[13]); SQRADDAC(a[10], a[12]); SQRADDDB; SQRADD(a[11], a[11]); |
884
|
|
|
|
|
|
|
COMBA_STORE(b[22]); |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
/* output 23 */ |
887
|
|
|
|
|
|
|
CARRY_FORWARD; |
888
|
|
|
|
|
|
|
SQRADDSC(a[0], a[23]); SQRADDAC(a[1], a[22]); SQRADDAC(a[2], a[21]); SQRADDAC(a[3], a[20]); SQRADDAC(a[4], a[19]); SQRADDAC(a[5], a[18]); SQRADDAC(a[6], a[17]); SQRADDAC(a[7], a[16]); SQRADDAC(a[8], a[15]); SQRADDAC(a[9], a[14]); SQRADDAC(a[10], a[13]); SQRADDAC(a[11], a[12]); SQRADDDB; |
889
|
|
|
|
|
|
|
COMBA_STORE(b[23]); |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
/* output 24 */ |
892
|
|
|
|
|
|
|
CARRY_FORWARD; |
893
|
|
|
|
|
|
|
SQRADDSC(a[0], a[24]); SQRADDAC(a[1], a[23]); SQRADDAC(a[2], a[22]); SQRADDAC(a[3], a[21]); SQRADDAC(a[4], a[20]); SQRADDAC(a[5], a[19]); SQRADDAC(a[6], a[18]); SQRADDAC(a[7], a[17]); SQRADDAC(a[8], a[16]); SQRADDAC(a[9], a[15]); SQRADDAC(a[10], a[14]); SQRADDAC(a[11], a[13]); SQRADDDB; SQRADD(a[12], a[12]); |
894
|
|
|
|
|
|
|
COMBA_STORE(b[24]); |
895
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
/* output 25 */ |
897
|
|
|
|
|
|
|
CARRY_FORWARD; |
898
|
|
|
|
|
|
|
SQRADDSC(a[0], a[25]); SQRADDAC(a[1], a[24]); SQRADDAC(a[2], a[23]); SQRADDAC(a[3], a[22]); SQRADDAC(a[4], a[21]); SQRADDAC(a[5], a[20]); SQRADDAC(a[6], a[19]); SQRADDAC(a[7], a[18]); SQRADDAC(a[8], a[17]); SQRADDAC(a[9], a[16]); SQRADDAC(a[10], a[15]); SQRADDAC(a[11], a[14]); SQRADDAC(a[12], a[13]); SQRADDDB; |
899
|
|
|
|
|
|
|
COMBA_STORE(b[25]); |
900
|
|
|
|
|
|
|
|
901
|
|
|
|
|
|
|
/* output 26 */ |
902
|
|
|
|
|
|
|
CARRY_FORWARD; |
903
|
|
|
|
|
|
|
SQRADDSC(a[0], a[26]); SQRADDAC(a[1], a[25]); SQRADDAC(a[2], a[24]); SQRADDAC(a[3], a[23]); SQRADDAC(a[4], a[22]); SQRADDAC(a[5], a[21]); SQRADDAC(a[6], a[20]); SQRADDAC(a[7], a[19]); SQRADDAC(a[8], a[18]); SQRADDAC(a[9], a[17]); SQRADDAC(a[10], a[16]); SQRADDAC(a[11], a[15]); SQRADDAC(a[12], a[14]); SQRADDDB; SQRADD(a[13], a[13]); |
904
|
|
|
|
|
|
|
COMBA_STORE(b[26]); |
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
/* output 27 */ |
907
|
|
|
|
|
|
|
CARRY_FORWARD; |
908
|
|
|
|
|
|
|
SQRADDSC(a[0], a[27]); SQRADDAC(a[1], a[26]); SQRADDAC(a[2], a[25]); SQRADDAC(a[3], a[24]); SQRADDAC(a[4], a[23]); SQRADDAC(a[5], a[22]); SQRADDAC(a[6], a[21]); SQRADDAC(a[7], a[20]); SQRADDAC(a[8], a[19]); SQRADDAC(a[9], a[18]); SQRADDAC(a[10], a[17]); SQRADDAC(a[11], a[16]); SQRADDAC(a[12], a[15]); SQRADDAC(a[13], a[14]); SQRADDDB; |
909
|
|
|
|
|
|
|
COMBA_STORE(b[27]); |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
/* output 28 */ |
912
|
|
|
|
|
|
|
CARRY_FORWARD; |
913
|
|
|
|
|
|
|
SQRADDSC(a[0], a[28]); SQRADDAC(a[1], a[27]); SQRADDAC(a[2], a[26]); SQRADDAC(a[3], a[25]); SQRADDAC(a[4], a[24]); SQRADDAC(a[5], a[23]); SQRADDAC(a[6], a[22]); SQRADDAC(a[7], a[21]); SQRADDAC(a[8], a[20]); SQRADDAC(a[9], a[19]); SQRADDAC(a[10], a[18]); SQRADDAC(a[11], a[17]); SQRADDAC(a[12], a[16]); SQRADDAC(a[13], a[15]); SQRADDDB; SQRADD(a[14], a[14]); |
914
|
|
|
|
|
|
|
COMBA_STORE(b[28]); |
915
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
/* output 29 */ |
917
|
|
|
|
|
|
|
CARRY_FORWARD; |
918
|
|
|
|
|
|
|
SQRADDSC(a[0], a[29]); SQRADDAC(a[1], a[28]); SQRADDAC(a[2], a[27]); SQRADDAC(a[3], a[26]); SQRADDAC(a[4], a[25]); SQRADDAC(a[5], a[24]); SQRADDAC(a[6], a[23]); SQRADDAC(a[7], a[22]); SQRADDAC(a[8], a[21]); SQRADDAC(a[9], a[20]); SQRADDAC(a[10], a[19]); SQRADDAC(a[11], a[18]); SQRADDAC(a[12], a[17]); SQRADDAC(a[13], a[16]); SQRADDAC(a[14], a[15]); SQRADDDB; |
919
|
|
|
|
|
|
|
COMBA_STORE(b[29]); |
920
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
/* output 30 */ |
922
|
|
|
|
|
|
|
CARRY_FORWARD; |
923
|
|
|
|
|
|
|
SQRADDSC(a[0], a[30]); SQRADDAC(a[1], a[29]); SQRADDAC(a[2], a[28]); SQRADDAC(a[3], a[27]); SQRADDAC(a[4], a[26]); SQRADDAC(a[5], a[25]); SQRADDAC(a[6], a[24]); SQRADDAC(a[7], a[23]); SQRADDAC(a[8], a[22]); SQRADDAC(a[9], a[21]); SQRADDAC(a[10], a[20]); SQRADDAC(a[11], a[19]); SQRADDAC(a[12], a[18]); SQRADDAC(a[13], a[17]); SQRADDAC(a[14], a[16]); SQRADDDB; SQRADD(a[15], a[15]); |
924
|
|
|
|
|
|
|
COMBA_STORE(b[30]); |
925
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
/* output 31 */ |
927
|
|
|
|
|
|
|
CARRY_FORWARD; |
928
|
|
|
|
|
|
|
SQRADDSC(a[0], a[31]); SQRADDAC(a[1], a[30]); SQRADDAC(a[2], a[29]); SQRADDAC(a[3], a[28]); SQRADDAC(a[4], a[27]); SQRADDAC(a[5], a[26]); SQRADDAC(a[6], a[25]); SQRADDAC(a[7], a[24]); SQRADDAC(a[8], a[23]); SQRADDAC(a[9], a[22]); SQRADDAC(a[10], a[21]); SQRADDAC(a[11], a[20]); SQRADDAC(a[12], a[19]); SQRADDAC(a[13], a[18]); SQRADDAC(a[14], a[17]); SQRADDAC(a[15], a[16]); SQRADDDB; |
929
|
|
|
|
|
|
|
COMBA_STORE(b[31]); |
930
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
/* output 32 */ |
932
|
|
|
|
|
|
|
CARRY_FORWARD; |
933
|
|
|
|
|
|
|
SQRADDSC(a[1], a[31]); SQRADDAC(a[2], a[30]); SQRADDAC(a[3], a[29]); SQRADDAC(a[4], a[28]); SQRADDAC(a[5], a[27]); SQRADDAC(a[6], a[26]); SQRADDAC(a[7], a[25]); SQRADDAC(a[8], a[24]); SQRADDAC(a[9], a[23]); SQRADDAC(a[10], a[22]); SQRADDAC(a[11], a[21]); SQRADDAC(a[12], a[20]); SQRADDAC(a[13], a[19]); SQRADDAC(a[14], a[18]); SQRADDAC(a[15], a[17]); SQRADDDB; SQRADD(a[16], a[16]); |
934
|
|
|
|
|
|
|
COMBA_STORE(b[32]); |
935
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
/* output 33 */ |
937
|
|
|
|
|
|
|
CARRY_FORWARD; |
938
|
|
|
|
|
|
|
SQRADDSC(a[2], a[31]); SQRADDAC(a[3], a[30]); SQRADDAC(a[4], a[29]); SQRADDAC(a[5], a[28]); SQRADDAC(a[6], a[27]); SQRADDAC(a[7], a[26]); SQRADDAC(a[8], a[25]); SQRADDAC(a[9], a[24]); SQRADDAC(a[10], a[23]); SQRADDAC(a[11], a[22]); SQRADDAC(a[12], a[21]); SQRADDAC(a[13], a[20]); SQRADDAC(a[14], a[19]); SQRADDAC(a[15], a[18]); SQRADDAC(a[16], a[17]); SQRADDDB; |
939
|
|
|
|
|
|
|
COMBA_STORE(b[33]); |
940
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
/* output 34 */ |
942
|
|
|
|
|
|
|
CARRY_FORWARD; |
943
|
|
|
|
|
|
|
SQRADDSC(a[3], a[31]); SQRADDAC(a[4], a[30]); SQRADDAC(a[5], a[29]); SQRADDAC(a[6], a[28]); SQRADDAC(a[7], a[27]); SQRADDAC(a[8], a[26]); SQRADDAC(a[9], a[25]); SQRADDAC(a[10], a[24]); SQRADDAC(a[11], a[23]); SQRADDAC(a[12], a[22]); SQRADDAC(a[13], a[21]); SQRADDAC(a[14], a[20]); SQRADDAC(a[15], a[19]); SQRADDAC(a[16], a[18]); SQRADDDB; SQRADD(a[17], a[17]); |
944
|
|
|
|
|
|
|
COMBA_STORE(b[34]); |
945
|
|
|
|
|
|
|
|
946
|
|
|
|
|
|
|
/* output 35 */ |
947
|
|
|
|
|
|
|
CARRY_FORWARD; |
948
|
|
|
|
|
|
|
SQRADDSC(a[4], a[31]); SQRADDAC(a[5], a[30]); SQRADDAC(a[6], a[29]); SQRADDAC(a[7], a[28]); SQRADDAC(a[8], a[27]); SQRADDAC(a[9], a[26]); SQRADDAC(a[10], a[25]); SQRADDAC(a[11], a[24]); SQRADDAC(a[12], a[23]); SQRADDAC(a[13], a[22]); SQRADDAC(a[14], a[21]); SQRADDAC(a[15], a[20]); SQRADDAC(a[16], a[19]); SQRADDAC(a[17], a[18]); SQRADDDB; |
949
|
|
|
|
|
|
|
COMBA_STORE(b[35]); |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
/* output 36 */ |
952
|
|
|
|
|
|
|
CARRY_FORWARD; |
953
|
|
|
|
|
|
|
SQRADDSC(a[5], a[31]); SQRADDAC(a[6], a[30]); SQRADDAC(a[7], a[29]); SQRADDAC(a[8], a[28]); SQRADDAC(a[9], a[27]); SQRADDAC(a[10], a[26]); SQRADDAC(a[11], a[25]); SQRADDAC(a[12], a[24]); SQRADDAC(a[13], a[23]); SQRADDAC(a[14], a[22]); SQRADDAC(a[15], a[21]); SQRADDAC(a[16], a[20]); SQRADDAC(a[17], a[19]); SQRADDDB; SQRADD(a[18], a[18]); |
954
|
|
|
|
|
|
|
COMBA_STORE(b[36]); |
955
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
/* output 37 */ |
957
|
|
|
|
|
|
|
CARRY_FORWARD; |
958
|
|
|
|
|
|
|
SQRADDSC(a[6], a[31]); SQRADDAC(a[7], a[30]); SQRADDAC(a[8], a[29]); SQRADDAC(a[9], a[28]); SQRADDAC(a[10], a[27]); SQRADDAC(a[11], a[26]); SQRADDAC(a[12], a[25]); SQRADDAC(a[13], a[24]); SQRADDAC(a[14], a[23]); SQRADDAC(a[15], a[22]); SQRADDAC(a[16], a[21]); SQRADDAC(a[17], a[20]); SQRADDAC(a[18], a[19]); SQRADDDB; |
959
|
|
|
|
|
|
|
COMBA_STORE(b[37]); |
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
/* output 38 */ |
962
|
|
|
|
|
|
|
CARRY_FORWARD; |
963
|
|
|
|
|
|
|
SQRADDSC(a[7], a[31]); SQRADDAC(a[8], a[30]); SQRADDAC(a[9], a[29]); SQRADDAC(a[10], a[28]); SQRADDAC(a[11], a[27]); SQRADDAC(a[12], a[26]); SQRADDAC(a[13], a[25]); SQRADDAC(a[14], a[24]); SQRADDAC(a[15], a[23]); SQRADDAC(a[16], a[22]); SQRADDAC(a[17], a[21]); SQRADDAC(a[18], a[20]); SQRADDDB; SQRADD(a[19], a[19]); |
964
|
|
|
|
|
|
|
COMBA_STORE(b[38]); |
965
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
/* output 39 */ |
967
|
|
|
|
|
|
|
CARRY_FORWARD; |
968
|
|
|
|
|
|
|
SQRADDSC(a[8], a[31]); SQRADDAC(a[9], a[30]); SQRADDAC(a[10], a[29]); SQRADDAC(a[11], a[28]); SQRADDAC(a[12], a[27]); SQRADDAC(a[13], a[26]); SQRADDAC(a[14], a[25]); SQRADDAC(a[15], a[24]); SQRADDAC(a[16], a[23]); SQRADDAC(a[17], a[22]); SQRADDAC(a[18], a[21]); SQRADDAC(a[19], a[20]); SQRADDDB; |
969
|
|
|
|
|
|
|
COMBA_STORE(b[39]); |
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
/* output 40 */ |
972
|
|
|
|
|
|
|
CARRY_FORWARD; |
973
|
|
|
|
|
|
|
SQRADDSC(a[9], a[31]); SQRADDAC(a[10], a[30]); SQRADDAC(a[11], a[29]); SQRADDAC(a[12], a[28]); SQRADDAC(a[13], a[27]); SQRADDAC(a[14], a[26]); SQRADDAC(a[15], a[25]); SQRADDAC(a[16], a[24]); SQRADDAC(a[17], a[23]); SQRADDAC(a[18], a[22]); SQRADDAC(a[19], a[21]); SQRADDDB; SQRADD(a[20], a[20]); |
974
|
|
|
|
|
|
|
COMBA_STORE(b[40]); |
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
/* output 41 */ |
977
|
|
|
|
|
|
|
CARRY_FORWARD; |
978
|
|
|
|
|
|
|
SQRADDSC(a[10], a[31]); SQRADDAC(a[11], a[30]); SQRADDAC(a[12], a[29]); SQRADDAC(a[13], a[28]); SQRADDAC(a[14], a[27]); SQRADDAC(a[15], a[26]); SQRADDAC(a[16], a[25]); SQRADDAC(a[17], a[24]); SQRADDAC(a[18], a[23]); SQRADDAC(a[19], a[22]); SQRADDAC(a[20], a[21]); SQRADDDB; |
979
|
|
|
|
|
|
|
COMBA_STORE(b[41]); |
980
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
/* output 42 */ |
982
|
|
|
|
|
|
|
CARRY_FORWARD; |
983
|
|
|
|
|
|
|
SQRADDSC(a[11], a[31]); SQRADDAC(a[12], a[30]); SQRADDAC(a[13], a[29]); SQRADDAC(a[14], a[28]); SQRADDAC(a[15], a[27]); SQRADDAC(a[16], a[26]); SQRADDAC(a[17], a[25]); SQRADDAC(a[18], a[24]); SQRADDAC(a[19], a[23]); SQRADDAC(a[20], a[22]); SQRADDDB; SQRADD(a[21], a[21]); |
984
|
|
|
|
|
|
|
COMBA_STORE(b[42]); |
985
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
/* output 43 */ |
987
|
|
|
|
|
|
|
CARRY_FORWARD; |
988
|
|
|
|
|
|
|
SQRADDSC(a[12], a[31]); SQRADDAC(a[13], a[30]); SQRADDAC(a[14], a[29]); SQRADDAC(a[15], a[28]); SQRADDAC(a[16], a[27]); SQRADDAC(a[17], a[26]); SQRADDAC(a[18], a[25]); SQRADDAC(a[19], a[24]); SQRADDAC(a[20], a[23]); SQRADDAC(a[21], a[22]); SQRADDDB; |
989
|
|
|
|
|
|
|
COMBA_STORE(b[43]); |
990
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
/* output 44 */ |
992
|
|
|
|
|
|
|
CARRY_FORWARD; |
993
|
|
|
|
|
|
|
SQRADDSC(a[13], a[31]); SQRADDAC(a[14], a[30]); SQRADDAC(a[15], a[29]); SQRADDAC(a[16], a[28]); SQRADDAC(a[17], a[27]); SQRADDAC(a[18], a[26]); SQRADDAC(a[19], a[25]); SQRADDAC(a[20], a[24]); SQRADDAC(a[21], a[23]); SQRADDDB; SQRADD(a[22], a[22]); |
994
|
|
|
|
|
|
|
COMBA_STORE(b[44]); |
995
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
/* output 45 */ |
997
|
|
|
|
|
|
|
CARRY_FORWARD; |
998
|
|
|
|
|
|
|
SQRADDSC(a[14], a[31]); SQRADDAC(a[15], a[30]); SQRADDAC(a[16], a[29]); SQRADDAC(a[17], a[28]); SQRADDAC(a[18], a[27]); SQRADDAC(a[19], a[26]); SQRADDAC(a[20], a[25]); SQRADDAC(a[21], a[24]); SQRADDAC(a[22], a[23]); SQRADDDB; |
999
|
|
|
|
|
|
|
COMBA_STORE(b[45]); |
1000
|
|
|
|
|
|
|
|
1001
|
|
|
|
|
|
|
/* output 46 */ |
1002
|
|
|
|
|
|
|
CARRY_FORWARD; |
1003
|
|
|
|
|
|
|
SQRADDSC(a[15], a[31]); SQRADDAC(a[16], a[30]); SQRADDAC(a[17], a[29]); SQRADDAC(a[18], a[28]); SQRADDAC(a[19], a[27]); SQRADDAC(a[20], a[26]); SQRADDAC(a[21], a[25]); SQRADDAC(a[22], a[24]); SQRADDDB; SQRADD(a[23], a[23]); |
1004
|
|
|
|
|
|
|
COMBA_STORE(b[46]); |
1005
|
|
|
|
|
|
|
|
1006
|
|
|
|
|
|
|
/* output 47 */ |
1007
|
|
|
|
|
|
|
CARRY_FORWARD; |
1008
|
|
|
|
|
|
|
SQRADDSC(a[16], a[31]); SQRADDAC(a[17], a[30]); SQRADDAC(a[18], a[29]); SQRADDAC(a[19], a[28]); SQRADDAC(a[20], a[27]); SQRADDAC(a[21], a[26]); SQRADDAC(a[22], a[25]); SQRADDAC(a[23], a[24]); SQRADDDB; |
1009
|
|
|
|
|
|
|
COMBA_STORE(b[47]); |
1010
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
/* output 48 */ |
1012
|
|
|
|
|
|
|
CARRY_FORWARD; |
1013
|
|
|
|
|
|
|
SQRADDSC(a[17], a[31]); SQRADDAC(a[18], a[30]); SQRADDAC(a[19], a[29]); SQRADDAC(a[20], a[28]); SQRADDAC(a[21], a[27]); SQRADDAC(a[22], a[26]); SQRADDAC(a[23], a[25]); SQRADDDB; SQRADD(a[24], a[24]); |
1014
|
|
|
|
|
|
|
COMBA_STORE(b[48]); |
1015
|
|
|
|
|
|
|
|
1016
|
|
|
|
|
|
|
/* output 49 */ |
1017
|
|
|
|
|
|
|
CARRY_FORWARD; |
1018
|
|
|
|
|
|
|
SQRADDSC(a[18], a[31]); SQRADDAC(a[19], a[30]); SQRADDAC(a[20], a[29]); SQRADDAC(a[21], a[28]); SQRADDAC(a[22], a[27]); SQRADDAC(a[23], a[26]); SQRADDAC(a[24], a[25]); SQRADDDB; |
1019
|
|
|
|
|
|
|
COMBA_STORE(b[49]); |
1020
|
|
|
|
|
|
|
|
1021
|
|
|
|
|
|
|
/* output 50 */ |
1022
|
|
|
|
|
|
|
CARRY_FORWARD; |
1023
|
|
|
|
|
|
|
SQRADDSC(a[19], a[31]); SQRADDAC(a[20], a[30]); SQRADDAC(a[21], a[29]); SQRADDAC(a[22], a[28]); SQRADDAC(a[23], a[27]); SQRADDAC(a[24], a[26]); SQRADDDB; SQRADD(a[25], a[25]); |
1024
|
|
|
|
|
|
|
COMBA_STORE(b[50]); |
1025
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
/* output 51 */ |
1027
|
|
|
|
|
|
|
CARRY_FORWARD; |
1028
|
|
|
|
|
|
|
SQRADDSC(a[20], a[31]); SQRADDAC(a[21], a[30]); SQRADDAC(a[22], a[29]); SQRADDAC(a[23], a[28]); SQRADDAC(a[24], a[27]); SQRADDAC(a[25], a[26]); SQRADDDB; |
1029
|
|
|
|
|
|
|
COMBA_STORE(b[51]); |
1030
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
/* output 52 */ |
1032
|
|
|
|
|
|
|
CARRY_FORWARD; |
1033
|
|
|
|
|
|
|
SQRADDSC(a[21], a[31]); SQRADDAC(a[22], a[30]); SQRADDAC(a[23], a[29]); SQRADDAC(a[24], a[28]); SQRADDAC(a[25], a[27]); SQRADDDB; SQRADD(a[26], a[26]); |
1034
|
|
|
|
|
|
|
COMBA_STORE(b[52]); |
1035
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
/* output 53 */ |
1037
|
|
|
|
|
|
|
CARRY_FORWARD; |
1038
|
|
|
|
|
|
|
SQRADDSC(a[22], a[31]); SQRADDAC(a[23], a[30]); SQRADDAC(a[24], a[29]); SQRADDAC(a[25], a[28]); SQRADDAC(a[26], a[27]); SQRADDDB; |
1039
|
|
|
|
|
|
|
COMBA_STORE(b[53]); |
1040
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
/* output 54 */ |
1042
|
|
|
|
|
|
|
CARRY_FORWARD; |
1043
|
|
|
|
|
|
|
SQRADDSC(a[23], a[31]); SQRADDAC(a[24], a[30]); SQRADDAC(a[25], a[29]); SQRADDAC(a[26], a[28]); SQRADDDB; SQRADD(a[27], a[27]); |
1044
|
|
|
|
|
|
|
COMBA_STORE(b[54]); |
1045
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
/* output 55 */ |
1047
|
|
|
|
|
|
|
CARRY_FORWARD; |
1048
|
|
|
|
|
|
|
SQRADDSC(a[24], a[31]); SQRADDAC(a[25], a[30]); SQRADDAC(a[26], a[29]); SQRADDAC(a[27], a[28]); SQRADDDB; |
1049
|
|
|
|
|
|
|
COMBA_STORE(b[55]); |
1050
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
/* output 56 */ |
1052
|
|
|
|
|
|
|
CARRY_FORWARD; |
1053
|
|
|
|
|
|
|
SQRADDSC(a[25], a[31]); SQRADDAC(a[26], a[30]); SQRADDAC(a[27], a[29]); SQRADDDB; SQRADD(a[28], a[28]); |
1054
|
|
|
|
|
|
|
COMBA_STORE(b[56]); |
1055
|
|
|
|
|
|
|
|
1056
|
|
|
|
|
|
|
/* output 57 */ |
1057
|
|
|
|
|
|
|
CARRY_FORWARD; |
1058
|
|
|
|
|
|
|
SQRADDSC(a[26], a[31]); SQRADDAC(a[27], a[30]); SQRADDAC(a[28], a[29]); SQRADDDB; |
1059
|
|
|
|
|
|
|
COMBA_STORE(b[57]); |
1060
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
/* output 58 */ |
1062
|
|
|
|
|
|
|
CARRY_FORWARD; |
1063
|
|
|
|
|
|
|
SQRADD2(a[27], a[31]); SQRADD2(a[28], a[30]); SQRADD(a[29], a[29]); |
1064
|
|
|
|
|
|
|
COMBA_STORE(b[58]); |
1065
|
|
|
|
|
|
|
|
1066
|
|
|
|
|
|
|
/* output 59 */ |
1067
|
|
|
|
|
|
|
CARRY_FORWARD; |
1068
|
|
|
|
|
|
|
SQRADD2(a[28], a[31]); SQRADD2(a[29], a[30]); |
1069
|
|
|
|
|
|
|
COMBA_STORE(b[59]); |
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
/* output 60 */ |
1072
|
|
|
|
|
|
|
CARRY_FORWARD; |
1073
|
|
|
|
|
|
|
SQRADD2(a[29], a[31]); SQRADD(a[30], a[30]); |
1074
|
|
|
|
|
|
|
COMBA_STORE(b[60]); |
1075
|
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
/* output 61 */ |
1077
|
|
|
|
|
|
|
CARRY_FORWARD; |
1078
|
|
|
|
|
|
|
SQRADD2(a[30], a[31]); |
1079
|
|
|
|
|
|
|
COMBA_STORE(b[61]); |
1080
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
/* output 62 */ |
1082
|
|
|
|
|
|
|
CARRY_FORWARD; |
1083
|
|
|
|
|
|
|
SQRADD(a[31], a[31]); |
1084
|
|
|
|
|
|
|
COMBA_STORE(b[62]); |
1085
|
|
|
|
|
|
|
COMBA_STORE2(b[63]); |
1086
|
|
|
|
|
|
|
COMBA_FINI; |
1087
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
B->used = 64; |
1089
|
|
|
|
|
|
|
B->sign = PSTM_ZPOS; |
1090
|
|
|
|
|
|
|
memcpy(B->dp, b, 64 * sizeof(pstm_digit)); |
1091
|
|
|
|
|
|
|
pstm_clamp(B); |
1092
|
|
|
|
|
|
|
return PSTM_OKAY; |
1093
|
|
|
|
|
|
|
} |
1094
|
|
|
|
|
|
|
# endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */ |
1095
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
/******************************************************************************/ |
1097
|
|
|
|
|
|
|
/** |
1098
|
|
|
|
|
|
|
B = A**2. |
1099
|
|
|
|
|
|
|
@param[in] pool Memory pool |
1100
|
|
|
|
|
|
|
@param[in] A Base |
1101
|
|
|
|
|
|
|
@param[out] B Result |
1102
|
|
|
|
|
|
|
@param[in,out] paD Temporary storage |
1103
|
|
|
|
|
|
|
@param[in] paDlen Number of items pointed to by paD |
1104
|
|
|
|
|
|
|
*/ |
1105
|
5387326
|
|
|
|
|
|
int32_t pstm_sqr_comba(psPool_t *pool, const pstm_int *A, pstm_int *B, |
1106
|
|
|
|
|
|
|
pstm_digit *paD, psSize_t paDlen) |
1107
|
|
|
|
|
|
|
{ |
1108
|
|
|
|
|
|
|
# ifdef USE_1024_KEY_SPEED_OPTIMIZATIONS |
1109
|
|
|
|
|
|
|
if (A->used == 16) |
1110
|
|
|
|
|
|
|
{ |
1111
|
|
|
|
|
|
|
return pstm_sqr_comba16(A, B); |
1112
|
|
|
|
|
|
|
} |
1113
|
|
|
|
|
|
|
else |
1114
|
|
|
|
|
|
|
{ |
1115
|
|
|
|
|
|
|
# ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS |
1116
|
|
|
|
|
|
|
if (A->used == 32) |
1117
|
|
|
|
|
|
|
{ |
1118
|
|
|
|
|
|
|
return pstm_sqr_comba32(A, B); |
1119
|
|
|
|
|
|
|
} |
1120
|
|
|
|
|
|
|
# endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */ |
1121
|
|
|
|
|
|
|
return pstm_sqr_comba_gen(pool, A, B, paD, paDlen); |
1122
|
|
|
|
|
|
|
} |
1123
|
|
|
|
|
|
|
# else |
1124
|
|
|
|
|
|
|
# ifdef USE_2048_KEY_SPEED_OPTIMIZATIONS |
1125
|
|
|
|
|
|
|
if (A->used == 32) |
1126
|
|
|
|
|
|
|
{ |
1127
|
|
|
|
|
|
|
return pstm_sqr_comba32(A, B); |
1128
|
|
|
|
|
|
|
} |
1129
|
|
|
|
|
|
|
# endif /* USE_2048_KEY_SPEED_OPTIMIZATIONS */ |
1130
|
5387326
|
|
|
|
|
|
return pstm_sqr_comba_gen(pool, A, B, paD, paDlen); |
1131
|
|
|
|
|
|
|
# endif |
1132
|
|
|
|
|
|
|
} |
1133
|
|
|
|
|
|
|
|
1134
|
|
|
|
|
|
|
#endif /* defined(USE_MATRIX_RSA) || defined(USE_MATRIX_ECC) */ |
1135
|
|
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
/******************************************************************************/ |
1137
|
|
|
|
|
|
|
|