File Coverage

src/rsa/rsa_i15_privexp.c

Criterion	Covered	Total	%
statement	0	94	0.0
branch	0	34	0.0
condition			n/a
subroutine			n/a
pod			n/a
total	0	128	0.0

line	stmt	bran	code
1			/*
2			* Copyright (c) 2018 Thomas Pornin
3			*
4			* Permission is hereby granted, free of charge, to any person obtaining
5			* a copy of this software and associated documentation files (the
6			* "Software"), to deal in the Software without restriction, including
7			* without limitation the rights to use, copy, modify, merge, publish,
8			* distribute, sublicense, and/or sell copies of the Software, and to
9			* permit persons to whom the Software is furnished to do so, subject to
10			* the following conditions:
11			*
12			* The above copyright notice and this permission notice shall be
13			* included in all copies or substantial portions of the Software.
14			*
15			* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16			* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17			* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18			* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19			* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20			* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21			* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22			* SOFTWARE.
23			*/
24
25			#include "inner.h"
26
27			/* see bearssl_rsa.h */
28			size_t
29	0		br_rsa_i15_compute_privexp(void *d,
30			const br_rsa_private_key *sk, uint32_t e)
31			{
32			/*
33			* We want to invert e modulo phi = (p-1)(q-1). This first
34			* requires computing phi, which is easy since we have the factors
35			* p and q in the private key structure.
36			*
37			* Since p = 3 mod 4 and q = 3 mod 4, phi/4 is an odd integer.
38			* We could invert e modulo phi/4 then patch the result to
39			* modulo phi, but this would involve assembling three modulus-wide
40			* values (phi/4, 1 and e) and calling moddiv, that requires
41			* three more temporaries, for a total of six big integers, or
42			* slightly more than 3 kB of stack space for RSA-4096. This
43			* exceeds our stack requirements.
44			*
45			* Instead, we first use one step of the extended GCD:
46			*
47			* - We compute phi = k*e + r (Euclidean division of phi by e).
48			* If public exponent e is correct, then r != 0 (e must be
49			* invertible modulo phi). We also have k != 0 since we
50			* enforce non-ridiculously-small factors.
51			*
52			* - We find small u, v such that ue - vr = 1 (using a
53			* binary GCD; we can arrange for u < r and v < e, i.e. all
54			* values fit on 32 bits).
55			*
56			* - Solution is: d = u + v*k
57			* This last computation is exact: since u < r and v < e,
58			* the above implies d < r + e*((phi-r)/e) = phi
59			*/
60
61			uint16_t tmp[4 * ((BR_MAX_RSA_FACTOR + 14) / 15) + 12];
62			uint16_t p, q, k, m, z, phi;
63			const unsigned char pbuf, qbuf;
64			size_t plen, qlen, u, len, dlen;
65			uint32_t r, a, b, u0, v0, u1, v1, he, hr;
66			int i;
67
68			/*
69			* Check that e is correct.
70			*/
71	0	0	if (e < 3 \|\| (e & 1) == 0) {
		0
72	0		return 0;
73			}
74
75			/*
76			* Check lengths of p and q, and that they are both odd.
77			*/
78	0		pbuf = sk->p;
79	0		plen = sk->plen;
80	0	0	while (plen > 0 && *pbuf == 0) {
		0
81	0		pbuf ++;
82	0		plen --;
83			}
84	0	0	if (plen < 5 \|\| plen > (BR_MAX_RSA_FACTOR / 8)
		0
85	0	0	\|\| (pbuf[plen - 1] & 1) != 1)
86			{
87	0		return 0;
88			}
89	0		qbuf = sk->q;
90	0		qlen = sk->qlen;
91	0	0	while (qlen > 0 && *qbuf == 0) {
		0
92	0		qbuf ++;
93	0		qlen --;
94			}
95	0	0	if (qlen < 5 \|\| qlen > (BR_MAX_RSA_FACTOR / 8)
		0
96	0	0	\|\| (qbuf[qlen - 1] & 1) != 1)
97			{
98	0		return 0;
99			}
100
101			/*
102			* Output length is that of the modulus.
103			*/
104	0		dlen = (sk->n_bitlen + 7) >> 3;
105	0	0	if (d == NULL) {
106	0		return dlen;
107			}
108
109	0		p = tmp;
110	0		br_i15_decode(p, pbuf, plen);
111	0		plen = (p[0] + 15) >> 4;
112	0		q = p + 1 + plen;
113	0		br_i15_decode(q, qbuf, qlen);
114	0		qlen = (q[0] + 15) >> 4;
115
116			/*
117			* Compute phi = (p-1)*(q-1), then move it over p-1 and q-1 (that
118			* we do not need anymore). The mulacc function sets the announced
119			* bit length of t to be the sum of the announced bit lengths of
120			* p-1 and q-1, which is usually exact but may overshoot by one 1
121			* bit in some cases; we readjust it to its true length.
122			*/
123	0		p[1] --;
124	0		q[1] --;
125	0		phi = q + 1 + qlen;
126	0		br_i15_zero(phi, p[0]);
127	0		br_i15_mulacc(phi, p, q);
128	0		len = (phi[0] + 15) >> 4;
129	0		memmove(tmp, phi, (1 + len) * sizeof *phi);
130	0		phi = tmp;
131	0		phi[0] = br_i15_bit_length(phi + 1, len);
132	0		len = (phi[0] + 15) >> 4;
133
134			/*
135			* Divide phi by public exponent e. The final remainder r must be
136			* non-zero (otherwise, the key is invalid). The quotient is k,
137			* which we write over phi, since we don't need phi after that.
138			*/
139	0		r = 0;
140	0	0	for (u = len; u >= 1; u --) {
141			/*
142			* Upon entry, r < e, and phi[u] < 2^15; hence,
143			* hi:lo < e*2^15. Thus, the produced word k[u]
144			* must be lower than 2^15, and the new remainder r
145			* is lower than e.
146			*/
147			uint32_t hi, lo;
148
149	0		hi = r >> 17;
150	0		lo = (r << 15) + phi[u];
151	0		phi[u] = br_divrem(hi, lo, e, &r);
152			}
153	0	0	if (r == 0) {
154	0		return 0;
155			}
156	0		k = phi;
157
158			/*
159			* Compute u and v such that ue - vr = GCD(e,r). We use
160			* a binary GCD algorithm, with 6 extra integers a, b,
161			* u0, u1, v0 and v1. Initial values are:
162			* a = e u0 = 1 v0 = 0
163			* b = r u1 = r v1 = e-1
164			* The following invariants are maintained:
165			* a = u0e - v0r
166			* b = u1e - v1r
167			* 0 < a <= e
168			* 0 < b <= r
169			* 0 <= u0 <= r
170			* 0 <= v0 <= e
171			* 0 <= u1 <= r
172			* 0 <= v1 <= e
173			*
174			* At each iteration, we reduce either a or b by one bit, and
175			* adjust u0, u1, v0 and v1 to maintain the invariants:
176			* - if a is even, then a <- a/2
177			* - otherwise, if b is even, then b <- b/2
178			* - otherwise, if a > b, then a <- (a-b)/2
179			* - otherwise, if b > a, then b <- (b-a)/2
180			* Algorithm stops when a = b. At that point, the common value
181			* is the GCD of e and r; it must be 1 (otherwise, the private
182			* key or public exponent is not valid). The (u0,v0) or (u1,v1)
183			* pairs are the solution we are looking for.
184			*
185			* Since either a or b is reduced by at least 1 bit at each
186			* iteration, 62 iterations are enough to reach the end
187			* condition.
188			*
189			* To maintain the invariants, we must compute the same operations
190			* on the u* and v* values that we do on a and b:
191			* - When a is divided by 2, u0 and v0 must be divided by 2.
192			* - When b is divided by 2, u1 and v1 must be divided by 2.
193			* - When b is subtracted from a, u1 and v1 are subtracted from
194			* u0 and v0, respectively.
195			* - When a is subtracted from b, u0 and v0 are subtracted from
196			* u1 and v1, respectively.
197			*
198			* However, we want to keep the u* and v* values in their proper
199			* ranges. The following remarks apply:
200			*
201			* - When a is divided by 2, then a is even. Therefore:
202			*
203			* * If r is odd, then u0 and v0 must have the same parity;
204			* if they are both odd, then adding r to u0 and e to v0
205			* makes them both even, and the division by 2 brings them
206			* back to the proper range.
207			*
208			* * If r is even, then u0 must be even; if v0 is odd, then
209			* adding r to u0 and e to v0 makes them both even, and the
210			* division by 2 brings them back to the proper range.
211			*
212			* Thus, all we need to do is to look at the parity of v0,
213			* and add (r,e) to (u0,v0) when v0 is odd. In order to avoid
214			* a 32-bit overflow, we can add ((r+1)/2,(e/2)+1) after the
215			* division (r+1 does not overflow since r < e; and (e/2)+1
216			* is equal to (e+1)/2 since e is odd).
217			*
218			* - When we subtract b from a, three cases may occur:
219			*
220			* * u1 <= u0 and v1 <= v0: just do the subtractions
221			*
222			* * u1 > u0 and v1 > v0: compute:
223			* (u0, v0) <- (u0 + r - u1, v0 + e - v1)
224			*
225			* * u1 <= u0 and v1 > v0: compute:
226			* (u0, v0) <- (u0 + r - u1, v0 + e - v1)
227			*
228			* The fourth case (u1 > u0 and v1 <= v0) is not possible
229			* because it would contradict "b < a" (which is the reason
230			* why we subtract b from a).
231			*
232			* The tricky case is the third one: from the equations, it
233			* seems that u0 may go out of range. However, the invariants
234			* and ranges of other values imply that, in that case, the
235			* new u0 does not actually exceed the range.
236			*
237			* We can thus handle the subtraction by adding (r,e) based
238			* solely on the comparison between v0 and v1.
239			*/
240	0		a = e;
241	0		b = r;
242	0		u0 = 1;
243	0		v0 = 0;
244	0		u1 = r;
245	0		v1 = e - 1;
246	0		hr = (r + 1) >> 1;
247	0		he = (e >> 1) + 1;
248	0	0	for (i = 0; i < 62; i ++) {
249			uint32_t oa, ob, agtb, bgta;
250			uint32_t sab, sba, da, db;
251			uint32_t ctl;
252
253	0		oa = a & 1; /* 1 if a is odd */
254	0		ob = b & 1; /* 1 if b is odd */
255	0		agtb = GT(a, b); /* 1 if a > b */
256	0		bgta = GT(b, a); /* 1 if b > a */
257
258	0		sab = oa & ob & agtb; /* 1 if a <- a-b */
259	0		sba = oa & ob & bgta; /* 1 if b <- b-a */
260
261			/* a <- a-b, u0 <- u0-u1, v0 <- v0-v1 */
262	0		ctl = GT(v1, v0);
263	0		a -= b & -sab;
264	0		u0 -= (u1 - (r & -ctl)) & -sab;
265	0		v0 -= (v1 - (e & -ctl)) & -sab;
266
267			/* b <- b-a, u1 <- u1-u0 mod r, v1 <- v1-v0 mod e */
268	0		ctl = GT(v0, v1);
269	0		b -= a & -sba;
270	0		u1 -= (u0 - (r & -ctl)) & -sba;
271	0		v1 -= (v0 - (e & -ctl)) & -sba;
272
273	0		da = NOT(oa) \| sab; /* 1 if a <- a/2 */
274	0		db = (oa & NOT(ob)) \| sba; /* 1 if b <- b/2 */
275
276			/* a <- a/2, u0 <- u0/2, v0 <- v0/2 */
277	0		ctl = v0 & 1;
278	0		a ^= (a ^ (a >> 1)) & -da;
279	0		u0 ^= (u0 ^ ((u0 >> 1) + (hr & -ctl))) & -da;
280	0		v0 ^= (v0 ^ ((v0 >> 1) + (he & -ctl))) & -da;
281
282			/* b <- b/2, u1 <- u1/2 mod r, v1 <- v1/2 mod e */
283	0		ctl = v1 & 1;
284	0		b ^= (b ^ (b >> 1)) & -db;
285	0		u1 ^= (u1 ^ ((u1 >> 1) + (hr & -ctl))) & -db;
286	0		v1 ^= (v1 ^ ((v1 >> 1) + (he & -ctl))) & -db;
287			}
288
289			/*
290			* Check that the GCD is indeed 1. If not, then the key is invalid
291			* (and there's no harm in leaking that piece of information).
292			*/
293	0	0	if (a != 1) {
294	0		return 0;
295			}
296
297			/*
298			* Now we have u0e - v0r = 1. Let's compute the result as:
299			* d = u0 + v0*k
300			* We still have k in the tmp[] array, and its announced bit
301			* length is that of phi.
302			*/
303	0		m = k + 1 + len;
304	0		m[0] = (2 << 4) + 2; /* bit length is 32 bits, encoded */
305	0		m[1] = v0 & 0x7FFF;
306	0		m[2] = (v0 >> 15) & 0x7FFF;
307	0		m[3] = v0 >> 30;
308	0		z = m + 4;
309	0		br_i15_zero(z, k[0]);
310	0		z[1] = u0 & 0x7FFF;
311	0		z[2] = (u0 >> 15) & 0x7FFF;
312	0		z[3] = u0 >> 30;
313	0		br_i15_mulacc(z, k, m);
314
315			/*
316			* Encode the result.
317			*/
318	0		br_i15_encode(d, dlen, z);
319	0		return dlen;
320			}